Index: head/sys/cam/ctl/ctl_ha.c
===================================================================
--- head/sys/cam/ctl/ctl_ha.c	(revision 319721)
+++ head/sys/cam/ctl/ctl_ha.c	(revision 319722)
@@ -1,1030 +1,1006 @@
 /*-
  * Copyright (c) 2015 Alexander Motin <mav@FreeBSD.org>
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer,
  *    without modification, immediately at the beginning of the file.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/kernel.h>
 #include <sys/kthread.h>
 #include <sys/types.h>
 #include <sys/limits.h>
 #include <sys/lock.h>
 #include <sys/module.h>
 #include <sys/mutex.h>
 #include <sys/condvar.h>
 #include <sys/malloc.h>
 #include <sys/mbuf.h>
 #include <sys/proc.h>
 #include <sys/conf.h>
 #include <sys/queue.h>
 #include <sys/sysctl.h>
 #include <sys/socket.h>
 #include <sys/socketvar.h>
 #include <sys/uio.h>
 #include <netinet/in.h>
 #include <netinet/tcp.h>
 #include <vm/uma.h>
 
 #include <cam/cam.h>
 #include <cam/scsi/scsi_all.h>
 #include <cam/scsi/scsi_da.h>
 #include <cam/ctl/ctl_io.h>
 #include <cam/ctl/ctl.h>
 #include <cam/ctl/ctl_frontend.h>
 #include <cam/ctl/ctl_util.h>
 #include <cam/ctl/ctl_backend.h>
 #include <cam/ctl/ctl_ioctl.h>
 #include <cam/ctl/ctl_ha.h>
 #include <cam/ctl/ctl_private.h>
 #include <cam/ctl/ctl_debug.h>
 #include <cam/ctl/ctl_error.h>
 
 #if (__FreeBSD_version < 1100000)
 struct mbufq {
 	struct mbuf *head;
 	struct mbuf *tail;
 };
 
 static void
 mbufq_init(struct mbufq *q, int limit)
 {
 
 	q->head = q->tail = NULL;
 }
 
 static void
 mbufq_drain(struct mbufq *q)
 {
 	struct mbuf *m;
 
 	while ((m = q->head) != NULL) {
 		q->head = m->m_nextpkt;
 		m_freem(m);
 	}
 	q->tail = NULL;
 }
 
 static struct mbuf *
 mbufq_dequeue(struct mbufq *q)
 {
 	struct mbuf *m;
 
 	m = q->head;
 	if (m) {
 		if (q->tail == m)
 			q->tail = NULL;
 		q->head = m->m_nextpkt;
 		m->m_nextpkt = NULL;
 	}
 	return (m);
 }
 
 static void
 mbufq_enqueue(struct mbufq *q, struct mbuf *m)
 {
 
 	m->m_nextpkt = NULL;
 	if (q->tail)
 		q->tail->m_nextpkt = m;
 	else
 		q->head = m;
 	q->tail = m;
 }
 
 static u_int
 sbavail(struct sockbuf *sb)
 {
 	return (sb->sb_cc);
 }
 
 #if (__FreeBSD_version < 1000000)
 #define	mtodo(m, o)	((void *)(((m)->m_data) + (o)))
 #endif
 #endif
 
 struct ha_msg_wire {
 	uint32_t	 channel;
 	uint32_t	 length;
 };
 
 struct ha_dt_msg_wire {
 	ctl_ha_dt_cmd	command;
 	uint32_t	size;
 	uint8_t		*local;
 	uint8_t		*remote;
 };
 
 struct ha_softc {
 	struct ctl_softc *ha_ctl_softc;
 	ctl_evt_handler	 ha_handler[CTL_HA_CHAN_MAX];
 	char		 ha_peer[128];
 	struct sockaddr_in  ha_peer_in;
 	struct socket	*ha_lso;
 	struct socket	*ha_so;
 	struct mbufq	 ha_sendq;
 	struct mbuf	*ha_sending;
 	struct mtx	 ha_lock;
 	int		 ha_connect;
 	int		 ha_listen;
 	int		 ha_connected;
 	int		 ha_receiving;
 	int		 ha_wakeup;
 	int		 ha_disconnect;
 	int		 ha_shutdown;
 	eventhandler_tag ha_shutdown_eh;
 	TAILQ_HEAD(, ctl_ha_dt_req) ha_dts;
 } ha_softc;
 
 static void
 ctl_ha_conn_wake(struct ha_softc *softc)
 {
 
 	mtx_lock(&softc->ha_lock);
 	softc->ha_wakeup = 1;
 	mtx_unlock(&softc->ha_lock);
 	wakeup(&softc->ha_wakeup);
 }
 
 static int
 ctl_ha_lupcall(struct socket *so, void *arg, int waitflag)
 {
 	struct ha_softc *softc = arg;
 
 	ctl_ha_conn_wake(softc);
 	return (SU_OK);
 }
 
 static int
 ctl_ha_rupcall(struct socket *so, void *arg, int waitflag)
 {
 	struct ha_softc *softc = arg;
 
 	wakeup(&softc->ha_receiving);
 	return (SU_OK);
 }
 
 static int
 ctl_ha_supcall(struct socket *so, void *arg, int waitflag)
 {
 	struct ha_softc *softc = arg;
 
 	ctl_ha_conn_wake(softc);
 	return (SU_OK);
 }
 
 static void
 ctl_ha_evt(struct ha_softc *softc, ctl_ha_channel ch, ctl_ha_event evt,
     int param)
 {
 	int i;
 
 	if (ch < CTL_HA_CHAN_MAX) {
 		if (softc->ha_handler[ch])
 			softc->ha_handler[ch](ch, evt, param);
 		return;
 	}
 	for (i = 0; i < CTL_HA_CHAN_MAX; i++) {
 		if (softc->ha_handler[i])
 			softc->ha_handler[i](i, evt, param);
 	}
 }
 
 static void
 ctl_ha_close(struct ha_softc *softc)
 {
 	struct socket *so = softc->ha_so;
 	int report = 0;
 
 	if (softc->ha_connected || softc->ha_disconnect) {
 		softc->ha_connected = 0;
 		mbufq_drain(&softc->ha_sendq);
 		m_freem(softc->ha_sending);
 		softc->ha_sending = NULL;
 		report = 1;
 	}
 	if (so) {
 		SOCKBUF_LOCK(&so->so_rcv);
 		soupcall_clear(so, SO_RCV);
 		while (softc->ha_receiving) {
 			wakeup(&softc->ha_receiving);
 			msleep(&softc->ha_receiving, SOCKBUF_MTX(&so->so_rcv),
 			    0, "ha_rx exit", 0);
 		}
 		SOCKBUF_UNLOCK(&so->so_rcv);
 		SOCKBUF_LOCK(&so->so_snd);
 		soupcall_clear(so, SO_SND);
 		SOCKBUF_UNLOCK(&so->so_snd);
 		softc->ha_so = NULL;
 		if (softc->ha_connect)
 			pause("reconnect", hz / 2);
 		soclose(so);
 	}
 	if (report) {
 		ctl_ha_evt(softc, CTL_HA_CHAN_MAX, CTL_HA_EVT_LINK_CHANGE,
 		    (softc->ha_connect || softc->ha_listen) ?
 		    CTL_HA_LINK_UNKNOWN : CTL_HA_LINK_OFFLINE);
 	}
 }
 
 static void
 ctl_ha_lclose(struct ha_softc *softc)
 {
 
 	if (softc->ha_lso) {
 		SOCKBUF_LOCK(&softc->ha_lso->so_rcv);
 		soupcall_clear(softc->ha_lso, SO_RCV);
 		SOCKBUF_UNLOCK(&softc->ha_lso->so_rcv);
 		soclose(softc->ha_lso);
 		softc->ha_lso = NULL;
 	}
 }
 
 static void
 ctl_ha_rx_thread(void *arg)
 {
 	struct ha_softc *softc = arg;
 	struct socket *so = softc->ha_so;
 	struct ha_msg_wire wire_hdr;
 	struct uio uio;
 	struct iovec iov;
 	int error, flags, next;
 
 	bzero(&wire_hdr, sizeof(wire_hdr));
 	while (1) {
 		if (wire_hdr.length > 0)
 			next = wire_hdr.length;
 		else
 			next = sizeof(wire_hdr);
 		SOCKBUF_LOCK(&so->so_rcv);
 		while (sbavail(&so->so_rcv) < next || softc->ha_disconnect) {
 			if (softc->ha_connected == 0 || softc->ha_disconnect ||
 			    so->so_error ||
 			    (so->so_rcv.sb_state & SBS_CANTRCVMORE)) {
 				goto errout;
 			}
 			so->so_rcv.sb_lowat = next;
 			msleep(&softc->ha_receiving, SOCKBUF_MTX(&so->so_rcv),
 			    0, "-", 0);
 		}
 		SOCKBUF_UNLOCK(&so->so_rcv);
 
 		if (wire_hdr.length == 0) {
 			iov.iov_base = &wire_hdr;
 			iov.iov_len = sizeof(wire_hdr);
 			uio.uio_iov = &iov;
 			uio.uio_iovcnt = 1;
 			uio.uio_rw = UIO_READ;
 			uio.uio_segflg = UIO_SYSSPACE;
 			uio.uio_td = curthread;
 			uio.uio_resid = sizeof(wire_hdr);
 			flags = MSG_DONTWAIT;
 			error = soreceive(softc->ha_so, NULL, &uio, NULL,
 			    NULL, &flags);
 			if (error != 0) {
 				printf("%s: header receive error %d\n",
 				    __func__, error);
 				SOCKBUF_LOCK(&so->so_rcv);
 				goto errout;
 			}
 		} else {
 			ctl_ha_evt(softc, wire_hdr.channel,
 			    CTL_HA_EVT_MSG_RECV, wire_hdr.length);
 			wire_hdr.length = 0;
 		}
 	}
 
 errout:
 	softc->ha_receiving = 0;
 	wakeup(&softc->ha_receiving);
 	SOCKBUF_UNLOCK(&so->so_rcv);
 	ctl_ha_conn_wake(softc);
 	kthread_exit();
 }
 
 static void
 ctl_ha_send(struct ha_softc *softc)
 {
 	struct socket *so = softc->ha_so;
 	int error;
 
 	while (1) {
 		if (softc->ha_sending == NULL) {
 			mtx_lock(&softc->ha_lock);
 			softc->ha_sending = mbufq_dequeue(&softc->ha_sendq);
 			mtx_unlock(&softc->ha_lock);
 			if (softc->ha_sending == NULL) {
 				so->so_snd.sb_lowat = so->so_snd.sb_hiwat + 1;
 				break;
 			}
 		}
 		SOCKBUF_LOCK(&so->so_snd);
 		if (sbspace(&so->so_snd) < softc->ha_sending->m_pkthdr.len) {
 			so->so_snd.sb_lowat = softc->ha_sending->m_pkthdr.len;
 			SOCKBUF_UNLOCK(&so->so_snd);
 			break;
 		}
 		SOCKBUF_UNLOCK(&so->so_snd);
 		error = sosend(softc->ha_so, NULL, NULL, softc->ha_sending,
 		    NULL, MSG_DONTWAIT, curthread);
 		softc->ha_sending = NULL;
 		if (error != 0) {
 			printf("%s: sosend() error %d\n", __func__, error);
 			return;
 		}
 	}
 }
 
 static void
 ctl_ha_sock_setup(struct ha_softc *softc)
 {
 	struct sockopt opt;
 	struct socket *so = softc->ha_so;
 	int error, val;
 
 	val = 1024 * 1024;
 	error = soreserve(so, val, val);
 	if (error)
 		printf("%s: soreserve failed %d\n", __func__, error);
 
 	SOCKBUF_LOCK(&so->so_rcv);
 	so->so_rcv.sb_lowat = sizeof(struct ha_msg_wire);
 	soupcall_set(so, SO_RCV, ctl_ha_rupcall, softc);
 	SOCKBUF_UNLOCK(&so->so_rcv);
 	SOCKBUF_LOCK(&so->so_snd);
 	so->so_snd.sb_lowat = sizeof(struct ha_msg_wire);
 	soupcall_set(so, SO_SND, ctl_ha_supcall, softc);
 	SOCKBUF_UNLOCK(&so->so_snd);
 
 	bzero(&opt, sizeof(struct sockopt));
 	opt.sopt_dir = SOPT_SET;
 	opt.sopt_level = SOL_SOCKET;
 	opt.sopt_name = SO_KEEPALIVE;
 	opt.sopt_val = &val;
 	opt.sopt_valsize = sizeof(val);
 	val = 1;
 	error = sosetopt(so, &opt);
 	if (error)
 		printf("%s: KEEPALIVE setting failed %d\n", __func__, error);
 
 	opt.sopt_level = IPPROTO_TCP;
 	opt.sopt_name = TCP_NODELAY;
 	val = 1;
 	error = sosetopt(so, &opt);
 	if (error)
 		printf("%s: NODELAY setting failed %d\n", __func__, error);
 
 	opt.sopt_name = TCP_KEEPINIT;
 	val = 3;
 	error = sosetopt(so, &opt);
 	if (error)
 		printf("%s: KEEPINIT setting failed %d\n", __func__, error);
 
 	opt.sopt_name = TCP_KEEPIDLE;
 	val = 1;
 	error = sosetopt(so, &opt);
 	if (error)
 		printf("%s: KEEPIDLE setting failed %d\n", __func__, error);
 
 	opt.sopt_name = TCP_KEEPINTVL;
 	val = 1;
 	error = sosetopt(so, &opt);
 	if (error)
 		printf("%s: KEEPINTVL setting failed %d\n", __func__, error);
 
 	opt.sopt_name = TCP_KEEPCNT;
 	val = 5;
 	error = sosetopt(so, &opt);
 	if (error)
 		printf("%s: KEEPCNT setting failed %d\n", __func__, error);
 }
 
 static int
 ctl_ha_connect(struct ha_softc *softc)
 {
 	struct thread *td = curthread;
 	struct sockaddr_in sa;
 	struct socket *so;
 	int error;
 
 	/* Create the socket */
 	error = socreate(PF_INET, &so, SOCK_STREAM,
 	    IPPROTO_TCP, td->td_ucred, td);
 	if (error != 0) {
 		printf("%s: socreate() error %d\n", __func__, error);
 		return (error);
 	}
 	softc->ha_so = so;
 	ctl_ha_sock_setup(softc);
 
 	memcpy(&sa, &softc->ha_peer_in, sizeof(sa));
 	error = soconnect(so, (struct sockaddr *)&sa, td);
 	if (error != 0) {
 		if (bootverbose)
 			printf("%s: soconnect() error %d\n", __func__, error);
 		goto out;
 	}
 	return (0);
 
 out:
 	ctl_ha_close(softc);
 	return (error);
 }
 
 static int
 ctl_ha_accept(struct ha_softc *softc)
 {
-	struct socket *so;
+	struct socket *lso, *so;
 	struct sockaddr *sap;
 	int error;
 
-	ACCEPT_LOCK();
-	if (softc->ha_lso->so_rcv.sb_state & SBS_CANTRCVMORE)
-		softc->ha_lso->so_error = ECONNABORTED;
-	if (softc->ha_lso->so_error) {
-		error = softc->ha_lso->so_error;
-		softc->ha_lso->so_error = 0;
-		ACCEPT_UNLOCK();
+	lso = softc->ha_lso;
+	SOLISTEN_LOCK(lso);
+	error = solisten_dequeue(lso, &so, 0);
+	if (error == EWOULDBLOCK)
+		return (error);
+	if (error) {
 		printf("%s: socket error %d\n", __func__, error);
 		goto out;
 	}
-	so = TAILQ_FIRST(&softc->ha_lso->so_comp);
-	if (so == NULL) {
-		ACCEPT_UNLOCK();
-		return (EWOULDBLOCK);
-	}
-	KASSERT(!(so->so_qstate & SQ_INCOMP), ("accept1: so SQ_INCOMP"));
-	KASSERT(so->so_qstate & SQ_COMP, ("accept1: so not SQ_COMP"));
 
-	/*
-	 * Before changing the flags on the socket, we have to bump the
-	 * reference count.  Otherwise, if the protocol calls sofree(),
-	 * the socket will be released due to a zero refcount.
-	 */
-	SOCK_LOCK(so);			/* soref() and so_state update */
-	soref(so);			/* file descriptor reference */
-
-	TAILQ_REMOVE(&softc->ha_lso->so_comp, so, so_list);
-	softc->ha_lso->so_qlen--;
-	so->so_state |= SS_NBIO;
-	so->so_qstate &= ~SQ_COMP;
-	so->so_head = NULL;
-
-	SOCK_UNLOCK(so);
-	ACCEPT_UNLOCK();
-
 	sap = NULL;
 	error = soaccept(so, &sap);
 	if (error != 0) {
 		printf("%s: soaccept() error %d\n", __func__, error);
 		if (sap != NULL)
 			free(sap, M_SONAME);
 		goto out;
 	}
 	if (sap != NULL)
 		free(sap, M_SONAME);
 	softc->ha_so = so;
 	ctl_ha_sock_setup(softc);
 	return (0);
 
 out:
 	ctl_ha_lclose(softc);
 	return (error);
 }
 
 static int
 ctl_ha_listen(struct ha_softc *softc)
 {
 	struct thread *td = curthread;
 	struct sockaddr_in sa;
 	struct sockopt opt;
 	int error, val;
 
 	/* Create the socket */
 	if (softc->ha_lso == NULL) {
 		error = socreate(PF_INET, &softc->ha_lso, SOCK_STREAM,
 		    IPPROTO_TCP, td->td_ucred, td);
 		if (error != 0) {
 			printf("%s: socreate() error %d\n", __func__, error);
 			return (error);
 		}
 		bzero(&opt, sizeof(struct sockopt));
 		opt.sopt_dir = SOPT_SET;
 		opt.sopt_level = SOL_SOCKET;
 		opt.sopt_name = SO_REUSEADDR;
 		opt.sopt_val = &val;
 		opt.sopt_valsize = sizeof(val);
 		val = 1;
 		error = sosetopt(softc->ha_lso, &opt);
 		if (error) {
 			printf("%s: REUSEADDR setting failed %d\n",
 			    __func__, error);
 		}
 		bzero(&opt, sizeof(struct sockopt));
 		opt.sopt_dir = SOPT_SET;
 		opt.sopt_level = SOL_SOCKET;
 		opt.sopt_name = SO_REUSEPORT;
 		opt.sopt_val = &val;
 		opt.sopt_valsize = sizeof(val);
 		val = 1;
 		error = sosetopt(softc->ha_lso, &opt);
 		if (error) {
 			printf("%s: REUSEPORT setting failed %d\n",
 			    __func__, error);
 		}
-		SOCKBUF_LOCK(&softc->ha_lso->so_rcv);
-		soupcall_set(softc->ha_lso, SO_RCV, ctl_ha_lupcall, softc);
-		SOCKBUF_UNLOCK(&softc->ha_lso->so_rcv);
 	}
 
 	memcpy(&sa, &softc->ha_peer_in, sizeof(sa));
 	error = sobind(softc->ha_lso, (struct sockaddr *)&sa, td);
 	if (error != 0) {
 		printf("%s: sobind() error %d\n", __func__, error);
 		goto out;
 	}
 	error = solisten(softc->ha_lso, 1, td);
 	if (error != 0) {
 		printf("%s: solisten() error %d\n", __func__, error);
 		goto out;
 	}
+	SOLISTEN_LOCK(softc->ha_lso);
+	softc->ha_lso->so_state |= SS_NBIO;
+	solisten_upcall_set(softc->ha_lso, ctl_ha_lupcall, softc);
+	SOLISTEN_UNLOCK(softc->ha_lso);
 	return (0);
 
 out:
 	ctl_ha_lclose(softc);
 	return (error);
 }
 
 static void
 ctl_ha_conn_thread(void *arg)
 {
 	struct ha_softc *softc = arg;
 	int error;
 
 	while (1) {
 		if (softc->ha_disconnect || softc->ha_shutdown) {
 			ctl_ha_close(softc);
 			if (softc->ha_disconnect == 2 || softc->ha_shutdown)
 				ctl_ha_lclose(softc);
 			softc->ha_disconnect = 0;
 			if (softc->ha_shutdown)
 				break;
 		} else if (softc->ha_so != NULL &&
 		    (softc->ha_so->so_error ||
 		     softc->ha_so->so_rcv.sb_state & SBS_CANTRCVMORE))
 			ctl_ha_close(softc);
 		if (softc->ha_so == NULL) {
 			if (softc->ha_lso != NULL)
 				ctl_ha_accept(softc);
 			else if (softc->ha_listen)
 				ctl_ha_listen(softc);
 			else if (softc->ha_connect)
 				ctl_ha_connect(softc);
 		}
 		if (softc->ha_so != NULL) {
 			if (softc->ha_connected == 0 &&
 			    softc->ha_so->so_error == 0 &&
 			    (softc->ha_so->so_state & SS_ISCONNECTING) == 0) {
 				softc->ha_connected = 1;
 				ctl_ha_evt(softc, CTL_HA_CHAN_MAX,
 				    CTL_HA_EVT_LINK_CHANGE,
 				    CTL_HA_LINK_ONLINE);
 				softc->ha_receiving = 1;
 				error = kproc_kthread_add(ctl_ha_rx_thread,
 				    softc, &softc->ha_ctl_softc->ctl_proc,
 				    NULL, 0, 0, "ctl", "ha_rx");
 				if (error != 0) {
 					printf("Error creating CTL HA rx thread!\n");
 					softc->ha_receiving = 0;
 					softc->ha_disconnect = 1;
 				}
 			}
 			ctl_ha_send(softc);
 		}
 		mtx_lock(&softc->ha_lock);
 		if (softc->ha_so != NULL &&
 		    (softc->ha_so->so_error ||
 		     softc->ha_so->so_rcv.sb_state & SBS_CANTRCVMORE))
 			;
 		else if (!softc->ha_wakeup)
 			msleep(&softc->ha_wakeup, &softc->ha_lock, 0, "-", hz);
 		softc->ha_wakeup = 0;
 		mtx_unlock(&softc->ha_lock);
 	}
 	mtx_lock(&softc->ha_lock);
 	softc->ha_shutdown = 2;
 	wakeup(&softc->ha_wakeup);
 	mtx_unlock(&softc->ha_lock);
 	kthread_exit();
 }
 
 static int
 ctl_ha_peer_sysctl(SYSCTL_HANDLER_ARGS)
 {
 	struct ha_softc *softc = (struct ha_softc *)arg1;
 	struct sockaddr_in *sa;
 	int error, b1, b2, b3, b4, p, num;
 	char buf[128];
 
 	strlcpy(buf, softc->ha_peer, sizeof(buf));
 	error = sysctl_handle_string(oidp, buf, sizeof(buf), req);
 	if ((error != 0) || (req->newptr == NULL) ||
 	    strncmp(buf, softc->ha_peer, sizeof(buf)) == 0)
 		return (error);
 
 	sa = &softc->ha_peer_in;
 	mtx_lock(&softc->ha_lock);
 	if ((num = sscanf(buf, "connect %d.%d.%d.%d:%d",
 	    &b1, &b2, &b3, &b4, &p)) >= 4) {
 		softc->ha_connect = 1;
 		softc->ha_listen = 0;
 	} else if ((num = sscanf(buf, "listen %d.%d.%d.%d:%d",
 	    &b1, &b2, &b3, &b4, &p)) >= 4) {
 		softc->ha_connect = 0;
 		softc->ha_listen = 1;
 	} else {
 		softc->ha_connect = 0;
 		softc->ha_listen = 0;
 		if (buf[0] != 0) {
 			buf[0] = 0;
 			error = EINVAL;
 		}
 	}
 	strlcpy(softc->ha_peer, buf, sizeof(softc->ha_peer));
 	if (softc->ha_connect || softc->ha_listen) {
 		memset(sa, 0, sizeof(*sa));
 		sa->sin_len = sizeof(struct sockaddr_in);
 		sa->sin_family = AF_INET;
 		sa->sin_port = htons((num >= 5) ? p : 999);
 		sa->sin_addr.s_addr =
 		    htonl((b1 << 24) + (b2 << 16) + (b3 << 8) + b4);
 	}
 	softc->ha_disconnect = 2;
 	softc->ha_wakeup = 1;
 	mtx_unlock(&softc->ha_lock);
 	wakeup(&softc->ha_wakeup);
 	return (error);
 }
 
 ctl_ha_status
 ctl_ha_msg_register(ctl_ha_channel channel, ctl_evt_handler handler)
 {
 	struct ha_softc *softc = &ha_softc;
 
 	KASSERT(channel < CTL_HA_CHAN_MAX,
 	    ("Wrong CTL HA channel %d", channel));
 	softc->ha_handler[channel] = handler;
 	return (CTL_HA_STATUS_SUCCESS);
 }
 
 ctl_ha_status
 ctl_ha_msg_deregister(ctl_ha_channel channel)
 {
 	struct ha_softc *softc = &ha_softc;
 
 	KASSERT(channel < CTL_HA_CHAN_MAX,
 	    ("Wrong CTL HA channel %d", channel));
 	softc->ha_handler[channel] = NULL;
 	return (CTL_HA_STATUS_SUCCESS);
 }
 
 /*
  * Receive a message of the specified size.
  */
 ctl_ha_status
 ctl_ha_msg_recv(ctl_ha_channel channel, void *addr, size_t len,
 		int wait)
 {
 	struct ha_softc *softc = &ha_softc;
 	struct uio uio;
 	struct iovec iov;
 	int error, flags;
 
 	if (!softc->ha_connected)
 		return (CTL_HA_STATUS_DISCONNECT);
 
 	iov.iov_base = addr;
 	iov.iov_len = len;
 	uio.uio_iov = &iov;
 	uio.uio_iovcnt = 1;
 	uio.uio_rw = UIO_READ;
 	uio.uio_segflg = UIO_SYSSPACE;
 	uio.uio_td = curthread;
 	uio.uio_resid = len;
 	flags = wait ? 0 : MSG_DONTWAIT;
 	error = soreceive(softc->ha_so, NULL, &uio, NULL, NULL, &flags);
 	if (error == 0)
 		return (CTL_HA_STATUS_SUCCESS);
 
 	/* Consider all errors fatal for HA sanity. */
 	mtx_lock(&softc->ha_lock);
 	if (softc->ha_connected) {
 		softc->ha_disconnect = 1;
 		softc->ha_wakeup = 1;
 		wakeup(&softc->ha_wakeup);
 	}
 	mtx_unlock(&softc->ha_lock);
 	return (CTL_HA_STATUS_ERROR);
 }
 
 /*
  * Send a message of the specified size.
  */
 ctl_ha_status
 ctl_ha_msg_send2(ctl_ha_channel channel, const void *addr, size_t len,
     const void *addr2, size_t len2, int wait)
 {
 	struct ha_softc *softc = &ha_softc;
 	struct mbuf *mb, *newmb;
 	struct ha_msg_wire hdr;
 	size_t copylen, off;
 
 	if (!softc->ha_connected)
 		return (CTL_HA_STATUS_DISCONNECT);
 
 	newmb = m_getm2(NULL, sizeof(hdr) + len + len2, wait, MT_DATA,
 	    M_PKTHDR);
 	if (newmb == NULL) {
 		/* Consider all errors fatal for HA sanity. */
 		mtx_lock(&softc->ha_lock);
 		if (softc->ha_connected) {
 			softc->ha_disconnect = 1;
 			softc->ha_wakeup = 1;
 			wakeup(&softc->ha_wakeup);
 		}
 		mtx_unlock(&softc->ha_lock);
 		printf("%s: Can't allocate mbuf chain\n", __func__);
 		return (CTL_HA_STATUS_ERROR);
 	}
 	hdr.channel = channel;
 	hdr.length = len + len2;
 	mb = newmb;
 	memcpy(mtodo(mb, 0), &hdr, sizeof(hdr));
 	mb->m_len += sizeof(hdr);
 	off = 0;
 	for (; mb != NULL && off < len; mb = mb->m_next) {
 		copylen = min(M_TRAILINGSPACE(mb), len - off);
 		memcpy(mtodo(mb, mb->m_len), (const char *)addr + off, copylen);
 		mb->m_len += copylen;
 		off += copylen;
 		if (off == len)
 			break;
 	}
 	KASSERT(off == len, ("%s: off (%zu) != len (%zu)", __func__,
 	    off, len));
 	off = 0;
 	for (; mb != NULL && off < len2; mb = mb->m_next) {
 		copylen = min(M_TRAILINGSPACE(mb), len2 - off);
 		memcpy(mtodo(mb, mb->m_len), (const char *)addr2 + off, copylen);
 		mb->m_len += copylen;
 		off += copylen;
 	}
 	KASSERT(off == len2, ("%s: off (%zu) != len2 (%zu)", __func__,
 	    off, len2));
 	newmb->m_pkthdr.len = sizeof(hdr) + len + len2;
 
 	mtx_lock(&softc->ha_lock);
 	if (!softc->ha_connected) {
 		mtx_unlock(&softc->ha_lock);
 		m_freem(newmb);
 		return (CTL_HA_STATUS_DISCONNECT);
 	}
 	mbufq_enqueue(&softc->ha_sendq, newmb);
 	softc->ha_wakeup = 1;
 	mtx_unlock(&softc->ha_lock);
 	wakeup(&softc->ha_wakeup);
 	return (CTL_HA_STATUS_SUCCESS);
 }
 
 ctl_ha_status
 ctl_ha_msg_send(ctl_ha_channel channel, const void *addr, size_t len,
     int wait)
 {
 
 	return (ctl_ha_msg_send2(channel, addr, len, NULL, 0, wait));
 }
 
 ctl_ha_status
 ctl_ha_msg_abort(ctl_ha_channel channel)
 {
 	struct ha_softc *softc = &ha_softc;
 
 	mtx_lock(&softc->ha_lock);
 	softc->ha_disconnect = 1;
 	softc->ha_wakeup = 1;
 	mtx_unlock(&softc->ha_lock);
 	wakeup(&softc->ha_wakeup);
 	return (CTL_HA_STATUS_SUCCESS);
 }
 
 /*
  * Allocate a data transfer request structure.
  */
 struct ctl_ha_dt_req *
 ctl_dt_req_alloc(void)
 {
 
 	return (malloc(sizeof(struct ctl_ha_dt_req), M_CTL, M_WAITOK | M_ZERO));
 }
 
 /*
  * Free a data transfer request structure.
  */
 void
 ctl_dt_req_free(struct ctl_ha_dt_req *req)
 {
 
 	free(req, M_CTL);
 }
 
 /*
  * Issue a DMA request for a single buffer.
  */
 ctl_ha_status
 ctl_dt_single(struct ctl_ha_dt_req *req)
 {
 	struct ha_softc *softc = &ha_softc;
 	struct ha_dt_msg_wire wire_dt;
 	ctl_ha_status status;
 
 	wire_dt.command = req->command;
 	wire_dt.size = req->size;
 	wire_dt.local = req->local;
 	wire_dt.remote = req->remote;
 	if (req->command == CTL_HA_DT_CMD_READ && req->callback != NULL) {
 		mtx_lock(&softc->ha_lock);
 		TAILQ_INSERT_TAIL(&softc->ha_dts, req, links);
 		mtx_unlock(&softc->ha_lock);
 		ctl_ha_msg_send(CTL_HA_CHAN_DATA, &wire_dt, sizeof(wire_dt),
 		    M_WAITOK);
 		return (CTL_HA_STATUS_WAIT);
 	}
 	if (req->command == CTL_HA_DT_CMD_READ) {
 		status = ctl_ha_msg_send(CTL_HA_CHAN_DATA, &wire_dt,
 		    sizeof(wire_dt), M_WAITOK);
 	} else {
 		status = ctl_ha_msg_send2(CTL_HA_CHAN_DATA, &wire_dt,
 		    sizeof(wire_dt), req->local, req->size, M_WAITOK);
 	}
 	return (status);
 }
 
 static void
 ctl_dt_event_handler(ctl_ha_channel channel, ctl_ha_event event, int param)
 {
 	struct ha_softc *softc = &ha_softc;
 	struct ctl_ha_dt_req *req;
 	ctl_ha_status isc_status;
 
 	if (event == CTL_HA_EVT_MSG_RECV) {
 		struct ha_dt_msg_wire wire_dt;
 		uint8_t *tmp;
 		int size;
 
 		size = min(sizeof(wire_dt), param);
 		isc_status = ctl_ha_msg_recv(CTL_HA_CHAN_DATA, &wire_dt,
 					     size, M_WAITOK);
 		if (isc_status != CTL_HA_STATUS_SUCCESS) {
 			printf("%s: Error receiving message: %d\n",
 			    __func__, isc_status);
 			return;
 		}
 
 		if (wire_dt.command == CTL_HA_DT_CMD_READ) {
 			wire_dt.command = CTL_HA_DT_CMD_WRITE;
 			tmp = wire_dt.local;
 			wire_dt.local = wire_dt.remote;
 			wire_dt.remote = tmp;
 			ctl_ha_msg_send2(CTL_HA_CHAN_DATA, &wire_dt,
 			    sizeof(wire_dt), wire_dt.local, wire_dt.size,
 			    M_WAITOK);
 		} else if (wire_dt.command == CTL_HA_DT_CMD_WRITE) {
 			isc_status = ctl_ha_msg_recv(CTL_HA_CHAN_DATA,
 			    wire_dt.remote, wire_dt.size, M_WAITOK);
 			mtx_lock(&softc->ha_lock);
 			TAILQ_FOREACH(req, &softc->ha_dts, links) {
 				if (req->local == wire_dt.remote) {
 					TAILQ_REMOVE(&softc->ha_dts, req, links);
 					break;
 				}
 			}
 			mtx_unlock(&softc->ha_lock);
 			if (req) {
 				req->ret = isc_status;
 				req->callback(req);
 			}
 		}
 	} else if (event == CTL_HA_EVT_LINK_CHANGE) {
 		CTL_DEBUG_PRINT(("%s: Link state change to %d\n", __func__,
 		    param));
 		if (param != CTL_HA_LINK_ONLINE) {
 			mtx_lock(&softc->ha_lock);
 			while ((req = TAILQ_FIRST(&softc->ha_dts)) != NULL) {
 				TAILQ_REMOVE(&softc->ha_dts, req, links);
 				mtx_unlock(&softc->ha_lock);
 				req->ret = CTL_HA_STATUS_DISCONNECT;
 				req->callback(req);
 				mtx_lock(&softc->ha_lock);
 			}
 			mtx_unlock(&softc->ha_lock);
 		}
 	} else {
 		printf("%s: Unknown event %d\n", __func__, event);
 	}
 }
 
 
 ctl_ha_status
 ctl_ha_msg_init(struct ctl_softc *ctl_softc)
 {
 	struct ha_softc *softc = &ha_softc;
 	int error;
 
 	softc->ha_ctl_softc = ctl_softc;
 	mtx_init(&softc->ha_lock, "CTL HA mutex", NULL, MTX_DEF);
 	mbufq_init(&softc->ha_sendq, INT_MAX);
 	TAILQ_INIT(&softc->ha_dts);
 	error = kproc_kthread_add(ctl_ha_conn_thread, softc,
 	    &ctl_softc->ctl_proc, NULL, 0, 0, "ctl", "ha_tx");
 	if (error != 0) {
 		printf("error creating CTL HA connection thread!\n");
 		mtx_destroy(&softc->ha_lock);
 		return (CTL_HA_STATUS_ERROR);
 	}
 	softc->ha_shutdown_eh = EVENTHANDLER_REGISTER(shutdown_pre_sync,
 	    ctl_ha_msg_shutdown, ctl_softc, SHUTDOWN_PRI_FIRST);
 	SYSCTL_ADD_PROC(&ctl_softc->sysctl_ctx,
 	    SYSCTL_CHILDREN(ctl_softc->sysctl_tree),
 	    OID_AUTO, "ha_peer", CTLTYPE_STRING | CTLFLAG_RWTUN,
 	    softc, 0, ctl_ha_peer_sysctl, "A", "HA peer connection method");
 
 	if (ctl_ha_msg_register(CTL_HA_CHAN_DATA, ctl_dt_event_handler)
 	    != CTL_HA_STATUS_SUCCESS) {
 		printf("%s: ctl_ha_msg_register failed.\n", __func__);
 	}
 
 	return (CTL_HA_STATUS_SUCCESS);
 };
 
 void
 ctl_ha_msg_shutdown(struct ctl_softc *ctl_softc)
 {
 	struct ha_softc *softc = &ha_softc;
 
 	/* Disconnect and shutdown threads. */
 	mtx_lock(&softc->ha_lock);
 	if (softc->ha_shutdown < 2) {
 		softc->ha_shutdown = 1;
 		softc->ha_wakeup = 1;
 		wakeup(&softc->ha_wakeup);
 		while (softc->ha_shutdown < 2 && !SCHEDULER_STOPPED()) {
 			msleep(&softc->ha_wakeup, &softc->ha_lock, 0,
 			    "shutdown", hz);
 		}
 	}
 	mtx_unlock(&softc->ha_lock);
 };
 
 ctl_ha_status
 ctl_ha_msg_destroy(struct ctl_softc *ctl_softc)
 {
 	struct ha_softc *softc = &ha_softc;
 
 	if (softc->ha_shutdown_eh != NULL) {
 		EVENTHANDLER_DEREGISTER(shutdown_pre_sync,
 		    softc->ha_shutdown_eh);
 		softc->ha_shutdown_eh = NULL;
 	}
 
 	ctl_ha_msg_shutdown(ctl_softc);	/* Just in case. */
 
 	if (ctl_ha_msg_deregister(CTL_HA_CHAN_DATA) != CTL_HA_STATUS_SUCCESS)
 		printf("%s: ctl_ha_msg_deregister failed.\n", __func__);
 
 	mtx_destroy(&softc->ha_lock);
 	return (CTL_HA_STATUS_SUCCESS);
 };
Index: head/sys/dev/iscsi/icl_soft_proxy.c
===================================================================
--- head/sys/dev/iscsi/icl_soft_proxy.c	(revision 319721)
+++ head/sys/dev/iscsi/icl_soft_proxy.c	(revision 319722)
@@ -1,375 +1,343 @@
 /*-
  * Copyright (c) 2012 The FreeBSD Foundation
  * All rights reserved.
  *
  * This software was developed by Edward Tomasz Napierala under sponsorship
  * from the FreeBSD Foundation.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  */
 /*-
  * Copyright (c) 1982, 1986, 1989, 1990, 1993
  *	The Regents of the University of California.  All rights reserved.
  *
  * sendfile(2) and related extensions:
  * Copyright (c) 1998, David Greenman. All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 3. Neither the name of the University nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  *	@(#)uipc_syscalls.c	8.4 (Berkeley) 2/21/94
  */
 
 /*
  * iSCSI Common Layer, kernel proxy part.
  */
 
 #ifdef ICL_KERNEL_PROXY
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include <sys/param.h>
 #include <sys/capsicum.h>
 #include <sys/condvar.h>
 #include <sys/conf.h>
 #include <sys/kernel.h>
 #include <sys/kthread.h>
 #include <sys/malloc.h>
 #include <sys/proc.h>
 #include <sys/socket.h>
 #include <sys/socketvar.h>
 #include <sys/sx.h>
 #include <sys/systm.h>
 #include <netinet/in.h>
 #include <netinet/tcp.h>
 
 #include <dev/iscsi/icl.h>
 
 struct icl_listen_sock {
 	TAILQ_ENTRY(icl_listen_sock)	ils_next;
 	struct icl_listen		*ils_listen;
 	struct socket			*ils_socket;
 	bool				ils_running;
-	bool				ils_disconnecting;
 	int				ils_id;
 };
 
 struct icl_listen	{
 	TAILQ_HEAD(, icl_listen_sock)	il_sockets;
 	struct sx			il_lock;
 	void				(*il_accept)(struct socket *,
 					    struct sockaddr *, int);
 };
 
 static MALLOC_DEFINE(M_ICL_PROXY, "ICL_PROXY", "iSCSI common layer proxy");
 
 int
 icl_soft_proxy_connect(struct icl_conn *ic, int domain, int socktype,
     int protocol, struct sockaddr *from_sa, struct sockaddr *to_sa)
 {
 	struct socket *so;
 	int error;
 	int interrupted = 0;
 
 	error = socreate(domain, &so, socktype, protocol,
 	    curthread->td_ucred, curthread);
 	if (error != 0)
 		return (error);
 
 	if (from_sa != NULL) {
 		error = sobind(so, from_sa, curthread);
 		if (error != 0) {
 			soclose(so);
 			return (error);
 		}
 	}
 
 	error = soconnect(so, to_sa, curthread);
 	if (error != 0) {
 		soclose(so);
 		return (error);
 	}
 
 	SOCK_LOCK(so);
 	while ((so->so_state & SS_ISCONNECTING) && so->so_error == 0) {
 		error = msleep(&so->so_timeo, SOCK_MTX(so), PSOCK | PCATCH,
 		    "icl_connect", 0);
 		if (error) {
 			if (error == EINTR || error == ERESTART)
 				interrupted = 1;
 			break;
 		}
 	}
 	if (error == 0) {
 		error = so->so_error;
 		so->so_error = 0;
 	}
 	SOCK_UNLOCK(so);
 
 	if (error != 0) {
 		soclose(so);
 		return (error);
 	}
 
 	error = icl_soft_handoff_sock(ic, so);
 	if (error != 0)
 		soclose(so);
 
 	return (error);
 }
 
 struct icl_listen *
 icl_listen_new(void (*accept_cb)(struct socket *, struct sockaddr *, int))
 {
 	struct icl_listen *il;
 
 	il = malloc(sizeof(*il), M_ICL_PROXY, M_ZERO | M_WAITOK);
 	TAILQ_INIT(&il->il_sockets);
 	sx_init(&il->il_lock, "icl_listen");
 	il->il_accept = accept_cb;
 
 	return (il);
 }
 
 void
 icl_listen_free(struct icl_listen *il)
 {
 	struct icl_listen_sock *ils;
 
 	sx_xlock(&il->il_lock);
 	while (!TAILQ_EMPTY(&il->il_sockets)) {
 		ils = TAILQ_FIRST(&il->il_sockets);
 		while (ils->ils_running) {
 			ICL_DEBUG("waiting for accept thread to terminate");
 			sx_xunlock(&il->il_lock);
-			ils->ils_disconnecting = true;
+			SOLISTEN_LOCK(ils->ils_socket);
+			ils->ils_socket->so_error = ENOTCONN;
+			SOLISTEN_UNLOCK(ils->ils_socket);
 			wakeup(&ils->ils_socket->so_timeo);
 			pause("icl_unlisten", 1 * hz);
 			sx_xlock(&il->il_lock);
 		}
 	
 		TAILQ_REMOVE(&il->il_sockets, ils, ils_next);
 		soclose(ils->ils_socket);
 		free(ils, M_ICL_PROXY);
 	}
 	sx_xunlock(&il->il_lock);
 
 	free(il, M_ICL_PROXY);
 }
 
 /*
- * XXX: Doing accept in a separate thread in each socket might not be the best way
- * 	to do stuff, but it's pretty clean and debuggable - and you probably won't
- * 	have hundreds of listening sockets anyway.
+ * XXX: Doing accept in a separate thread in each socket might not be the
+ * best way to do stuff, but it's pretty clean and debuggable - and you
+ * probably won't have hundreds of listening sockets anyway.
  */
 static void
 icl_accept_thread(void *arg)
 {
 	struct icl_listen_sock *ils;
 	struct socket *head, *so;
 	struct sockaddr *sa;
 	int error;
 
 	ils = arg;
 	head = ils->ils_socket;
 
 	ils->ils_running = true;
 
 	for (;;) {
-		ACCEPT_LOCK();
-		while (TAILQ_EMPTY(&head->so_comp) && head->so_error == 0 && ils->ils_disconnecting == false) {
-			if (head->so_rcv.sb_state & SBS_CANTRCVMORE) {
-				head->so_error = ECONNABORTED;
-				break;
-			}
-			error = msleep(&head->so_timeo, &accept_mtx, PSOCK | PCATCH,
-			    "accept", 0);
-			if (error) {
-				ACCEPT_UNLOCK();
-				ICL_WARN("msleep failed with error %d", error);
-				continue;
-			}
-			if (ils->ils_disconnecting) {
-				ACCEPT_UNLOCK();
-				ICL_DEBUG("terminating");
-				ils->ils_running = false;
-				kthread_exit();
-				return;
-			}
+		SOLISTEN_LOCK(head);
+		error = solisten_dequeue(head, &so, 0);
+		if (error == ENOTCONN) {
+			/*
+			 * XXXGL: ENOTCONN is our mark from icl_listen_free().
+			 * Neither socket code, nor msleep(9) may return it.
+			 */
+			ICL_DEBUG("terminating");
+			ils->ils_running = false;
+			kthread_exit();
+			return;
 		}
-		if (head->so_error) {
-			error = head->so_error;
-			head->so_error = 0;
-			ACCEPT_UNLOCK();
-			ICL_WARN("socket error %d", error);
+		if (error) {
+			ICL_WARN("solisten_dequeue error %d", error);
 			continue;
 		}
-		so = TAILQ_FIRST(&head->so_comp);
-		KASSERT(so != NULL, ("NULL so"));
-		KASSERT(!(so->so_qstate & SQ_INCOMP), ("accept1: so SQ_INCOMP"));
-		KASSERT(so->so_qstate & SQ_COMP, ("accept1: so not SQ_COMP"));
-
-		/*
-		 * Before changing the flags on the socket, we have to bump the
-		 * reference count.  Otherwise, if the protocol calls sofree(),
-		 * the socket will be released due to a zero refcount.
-		 */
-		SOCK_LOCK(so);			/* soref() and so_state update */
-		soref(so);			/* file descriptor reference */
-
-		TAILQ_REMOVE(&head->so_comp, so, so_list);
-		head->so_qlen--;
-		so->so_state |= (head->so_state & SS_NBIO);
-		so->so_qstate &= ~SQ_COMP;
-		so->so_head = NULL;
-
-		SOCK_UNLOCK(so);
-		ACCEPT_UNLOCK();
 
 		sa = NULL;
 		error = soaccept(so, &sa);
 		if (error != 0) {
 			ICL_WARN("soaccept error %d", error);
 			if (sa != NULL)
 				free(sa, M_SONAME);
 			soclose(so);
 			continue;
 		}
 
 		(ils->ils_listen->il_accept)(so, sa, ils->ils_id);
 	}
 }
 
 static int
 icl_listen_add_tcp(struct icl_listen *il, int domain, int socktype,
     int protocol, struct sockaddr *sa, int portal_id)
 {
 	struct icl_listen_sock *ils;
 	struct socket *so;
 	struct sockopt sopt;
 	int error, one = 1;
 
 	error = socreate(domain, &so, socktype, protocol,
 	    curthread->td_ucred, curthread);
 	if (error != 0) {
 		ICL_WARN("socreate failed with error %d", error);
 		return (error);
 	}
 
 	sopt.sopt_dir = SOPT_SET;
 	sopt.sopt_level = SOL_SOCKET;
 	sopt.sopt_name = SO_REUSEADDR;
 	sopt.sopt_val = &one;
 	sopt.sopt_valsize = sizeof(one);
 	sopt.sopt_td = NULL;
 	error = sosetopt(so, &sopt);
 	if (error != 0) {
 		ICL_WARN("failed to set SO_REUSEADDR with error %d", error);
 		soclose(so);
 		return (error);
 	}
 
 	error = sobind(so, sa, curthread);
 	if (error != 0) {
 		ICL_WARN("sobind failed with error %d", error);
 		soclose(so);
 		return (error);
 	}
 
 	error = solisten(so, -1, curthread);
 	if (error != 0) {
 		ICL_WARN("solisten failed with error %d", error);
 		soclose(so);
 		return (error);
 	}
 
 	ils = malloc(sizeof(*ils), M_ICL_PROXY, M_ZERO | M_WAITOK);
 	ils->ils_listen = il;
 	ils->ils_socket = so;
 	ils->ils_id = portal_id;
 
 	error = kthread_add(icl_accept_thread, ils, NULL, NULL, 0, 0, "iclacc");
 	if (error != 0) {
 		ICL_WARN("kthread_add failed with error %d", error);
 		soclose(so);
 		free(ils, M_ICL_PROXY);
 
 		return (error);
 	}
 
 	sx_xlock(&il->il_lock);
 	TAILQ_INSERT_TAIL(&il->il_sockets, ils, ils_next);
 	sx_xunlock(&il->il_lock);
 
 	return (0);
 }
 
 int
 icl_listen_add(struct icl_listen *il, bool rdma, int domain, int socktype,
     int protocol, struct sockaddr *sa, int portal_id)
 {
 
 	if (rdma) {
 		ICL_DEBUG("RDMA not supported");
 		return (EOPNOTSUPP);
 	}
 
 
 	return (icl_listen_add_tcp(il, domain, socktype, protocol, sa,
 	    portal_id));
 }
 
 int
 icl_listen_remove(struct icl_listen *il, struct sockaddr *sa)
 {
 
 	/*
 	 * XXX
 	 */
 
 	return (EOPNOTSUPP);
 }
 
 #endif /* ICL_KERNEL_PROXY */
Index: head/sys/kern/sys_socket.c
===================================================================
--- head/sys/kern/sys_socket.c	(revision 319721)
+++ head/sys/kern/sys_socket.c	(revision 319722)
@@ -1,815 +1,818 @@
 /*-
  * Copyright (c) 1982, 1986, 1990, 1993
  *	The Regents of the University of California.  All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 3. Neither the name of the University nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  *	@(#)sys_socket.c	8.1 (Berkeley) 6/10/93
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/aio.h>
 #include <sys/domain.h>
 #include <sys/file.h>
 #include <sys/filedesc.h>
 #include <sys/kernel.h>
 #include <sys/kthread.h>
 #include <sys/malloc.h>
 #include <sys/proc.h>
 #include <sys/protosw.h>
 #include <sys/sigio.h>
 #include <sys/signal.h>
 #include <sys/signalvar.h>
 #include <sys/socket.h>
 #include <sys/socketvar.h>
 #include <sys/filio.h>			/* XXX */
 #include <sys/sockio.h>
 #include <sys/stat.h>
 #include <sys/sysctl.h>
 #include <sys/sysproto.h>
 #include <sys/taskqueue.h>
 #include <sys/uio.h>
 #include <sys/ucred.h>
 #include <sys/un.h>
 #include <sys/unpcb.h>
 #include <sys/user.h>
 
 #include <net/if.h>
 #include <net/if_var.h>
 #include <net/route.h>
 #include <net/vnet.h>
 
 #include <netinet/in.h>
 #include <netinet/in_pcb.h>
 
 #include <security/mac/mac_framework.h>
 
 #include <vm/vm.h>
 #include <vm/pmap.h>
 #include <vm/vm_extern.h>
 #include <vm/vm_map.h>
 
 static SYSCTL_NODE(_kern_ipc, OID_AUTO, aio, CTLFLAG_RD, NULL,
     "socket AIO stats");
 
 static int empty_results;
 SYSCTL_INT(_kern_ipc_aio, OID_AUTO, empty_results, CTLFLAG_RD, &empty_results,
     0, "socket operation returned EAGAIN");
 
 static int empty_retries;
 SYSCTL_INT(_kern_ipc_aio, OID_AUTO, empty_retries, CTLFLAG_RD, &empty_retries,
     0, "socket operation retries");
 
 static fo_rdwr_t soo_read;
 static fo_rdwr_t soo_write;
 static fo_ioctl_t soo_ioctl;
 static fo_poll_t soo_poll;
 extern fo_kqfilter_t soo_kqfilter;
 static fo_stat_t soo_stat;
 static fo_close_t soo_close;
 static fo_fill_kinfo_t soo_fill_kinfo;
 static fo_aio_queue_t soo_aio_queue;
 
 static void	soo_aio_cancel(struct kaiocb *job);
 
 struct fileops	socketops = {
 	.fo_read = soo_read,
 	.fo_write = soo_write,
 	.fo_truncate = invfo_truncate,
 	.fo_ioctl = soo_ioctl,
 	.fo_poll = soo_poll,
 	.fo_kqfilter = soo_kqfilter,
 	.fo_stat = soo_stat,
 	.fo_close = soo_close,
 	.fo_chmod = invfo_chmod,
 	.fo_chown = invfo_chown,
 	.fo_sendfile = invfo_sendfile,
 	.fo_fill_kinfo = soo_fill_kinfo,
 	.fo_aio_queue = soo_aio_queue,
 	.fo_flags = DFLAG_PASSABLE
 };
 
 static int
 soo_read(struct file *fp, struct uio *uio, struct ucred *active_cred,
     int flags, struct thread *td)
 {
 	struct socket *so = fp->f_data;
 	int error;
 
 #ifdef MAC
 	error = mac_socket_check_receive(active_cred, so);
 	if (error)
 		return (error);
 #endif
 	error = soreceive(so, 0, uio, 0, 0, 0);
 	return (error);
 }
 
 static int
 soo_write(struct file *fp, struct uio *uio, struct ucred *active_cred,
     int flags, struct thread *td)
 {
 	struct socket *so = fp->f_data;
 	int error;
 
 #ifdef MAC
 	error = mac_socket_check_send(active_cred, so);
 	if (error)
 		return (error);
 #endif
 	error = sosend(so, 0, uio, 0, 0, 0, uio->uio_td);
 	if (error == EPIPE && (so->so_options & SO_NOSIGPIPE) == 0) {
 		PROC_LOCK(uio->uio_td->td_proc);
 		tdsignal(uio->uio_td, SIGPIPE);
 		PROC_UNLOCK(uio->uio_td->td_proc);
 	}
 	return (error);
 }
 
 static int
 soo_ioctl(struct file *fp, u_long cmd, void *data, struct ucred *active_cred,
     struct thread *td)
 {
 	struct socket *so = fp->f_data;
 	int error = 0;
 
 	switch (cmd) {
 	case FIONBIO:
 		SOCK_LOCK(so);
 		if (*(int *)data)
 			so->so_state |= SS_NBIO;
 		else
 			so->so_state &= ~SS_NBIO;
 		SOCK_UNLOCK(so);
 		break;
 
 	case FIOASYNC:
-		/*
-		 * XXXRW: This code separately acquires SOCK_LOCK(so) and
-		 * SOCKBUF_LOCK(&so->so_rcv) even though they are the same
-		 * mutex to avoid introducing the assumption that they are
-		 * the same.
-		 */
 		if (*(int *)data) {
 			SOCK_LOCK(so);
 			so->so_state |= SS_ASYNC;
+			if (SOLISTENING(so)) {
+				so->sol_sbrcv_flags |= SB_ASYNC;
+				so->sol_sbsnd_flags |= SB_ASYNC;
+			} else {
+				SOCKBUF_LOCK(&so->so_rcv);
+				so->so_rcv.sb_flags |= SB_ASYNC;
+				SOCKBUF_UNLOCK(&so->so_rcv);
+				SOCKBUF_LOCK(&so->so_snd);
+				so->so_snd.sb_flags |= SB_ASYNC;
+				SOCKBUF_UNLOCK(&so->so_snd);
+			}
 			SOCK_UNLOCK(so);
-			SOCKBUF_LOCK(&so->so_rcv);
-			so->so_rcv.sb_flags |= SB_ASYNC;
-			SOCKBUF_UNLOCK(&so->so_rcv);
-			SOCKBUF_LOCK(&so->so_snd);
-			so->so_snd.sb_flags |= SB_ASYNC;
-			SOCKBUF_UNLOCK(&so->so_snd);
 		} else {
 			SOCK_LOCK(so);
 			so->so_state &= ~SS_ASYNC;
+			if (SOLISTENING(so)) {
+				so->sol_sbrcv_flags &= ~SB_ASYNC;
+				so->sol_sbsnd_flags &= ~SB_ASYNC;
+			} else {
+				SOCKBUF_LOCK(&so->so_rcv);
+				so->so_rcv.sb_flags &= ~SB_ASYNC;
+				SOCKBUF_UNLOCK(&so->so_rcv);
+				SOCKBUF_LOCK(&so->so_snd);
+				so->so_snd.sb_flags &= ~SB_ASYNC;
+				SOCKBUF_UNLOCK(&so->so_snd);
+			}
 			SOCK_UNLOCK(so);
-			SOCKBUF_LOCK(&so->so_rcv);
-			so->so_rcv.sb_flags &= ~SB_ASYNC;
-			SOCKBUF_UNLOCK(&so->so_rcv);
-			SOCKBUF_LOCK(&so->so_snd);
-			so->so_snd.sb_flags &= ~SB_ASYNC;
-			SOCKBUF_UNLOCK(&so->so_snd);
 		}
 		break;
 
 	case FIONREAD:
 		/* Unlocked read. */
 		*(int *)data = sbavail(&so->so_rcv);
 		break;
 
 	case FIONWRITE:
 		/* Unlocked read. */
 		*(int *)data = sbavail(&so->so_snd);
 		break;
 
 	case FIONSPACE:
 		/* Unlocked read. */
 		if ((so->so_snd.sb_hiwat < sbused(&so->so_snd)) ||
 		    (so->so_snd.sb_mbmax < so->so_snd.sb_mbcnt))
 			*(int *)data = 0;
 		else
 			*(int *)data = sbspace(&so->so_snd);
 		break;
 
 	case FIOSETOWN:
 		error = fsetown(*(int *)data, &so->so_sigio);
 		break;
 
 	case FIOGETOWN:
 		*(int *)data = fgetown(&so->so_sigio);
 		break;
 
 	case SIOCSPGRP:
 		error = fsetown(-(*(int *)data), &so->so_sigio);
 		break;
 
 	case SIOCGPGRP:
 		*(int *)data = -fgetown(&so->so_sigio);
 		break;
 
 	case SIOCATMARK:
 		/* Unlocked read. */
 		*(int *)data = (so->so_rcv.sb_state & SBS_RCVATMARK) != 0;
 		break;
 	default:
 		/*
 		 * Interface/routing/protocol specific ioctls: interface and
 		 * routing ioctls should have a different entry since a
 		 * socket is unnecessary.
 		 */
 		if (IOCGROUP(cmd) == 'i')
 			error = ifioctl(so, cmd, data, td);
 		else if (IOCGROUP(cmd) == 'r') {
 			CURVNET_SET(so->so_vnet);
 			error = rtioctl_fib(cmd, data, so->so_fibnum);
 			CURVNET_RESTORE();
 		} else {
 			CURVNET_SET(so->so_vnet);
 			error = ((*so->so_proto->pr_usrreqs->pru_control)
 			    (so, cmd, data, 0, td));
 			CURVNET_RESTORE();
 		}
 		break;
 	}
 	return (error);
 }
 
 static int
 soo_poll(struct file *fp, int events, struct ucred *active_cred,
     struct thread *td)
 {
 	struct socket *so = fp->f_data;
 #ifdef MAC
 	int error;
 
 	error = mac_socket_check_poll(active_cred, so);
 	if (error)
 		return (error);
 #endif
 	return (sopoll(so, events, fp->f_cred, td));
 }
 
 static int
 soo_stat(struct file *fp, struct stat *ub, struct ucred *active_cred,
     struct thread *td)
 {
 	struct socket *so = fp->f_data;
 	struct sockbuf *sb;
 #ifdef MAC
 	int error;
 #endif
 
 	bzero((caddr_t)ub, sizeof (*ub));
 	ub->st_mode = S_IFSOCK;
 #ifdef MAC
 	error = mac_socket_check_stat(active_cred, so);
 	if (error)
 		return (error);
 #endif
 	/*
 	 * If SBS_CANTRCVMORE is set, but there's still data left in the
 	 * receive buffer, the socket is still readable.
 	 */
 	sb = &so->so_rcv;
 	SOCKBUF_LOCK(sb);
 	if ((sb->sb_state & SBS_CANTRCVMORE) == 0 || sbavail(sb))
 		ub->st_mode |= S_IRUSR | S_IRGRP | S_IROTH;
 	ub->st_size = sbavail(sb) - sb->sb_ctl;
 	SOCKBUF_UNLOCK(sb);
 
 	sb = &so->so_snd;
 	SOCKBUF_LOCK(sb);
 	if ((sb->sb_state & SBS_CANTSENDMORE) == 0)
 		ub->st_mode |= S_IWUSR | S_IWGRP | S_IWOTH;
 	SOCKBUF_UNLOCK(sb);
 	ub->st_uid = so->so_cred->cr_uid;
 	ub->st_gid = so->so_cred->cr_gid;
 	return (*so->so_proto->pr_usrreqs->pru_sense)(so, ub);
 }
 
 /*
  * API socket close on file pointer.  We call soclose() to close the socket
  * (including initiating closing protocols).  soclose() will sorele() the
  * file reference but the actual socket will not go away until the socket's
  * ref count hits 0.
  */
 static int
 soo_close(struct file *fp, struct thread *td)
 {
 	int error = 0;
 	struct socket *so;
 
 	so = fp->f_data;
 	fp->f_ops = &badfileops;
 	fp->f_data = NULL;
 
 	if (so)
 		error = soclose(so);
 	return (error);
 }
 
 static int
 soo_fill_kinfo(struct file *fp, struct kinfo_file *kif, struct filedesc *fdp)
 {
 	struct sockaddr *sa;
 	struct inpcb *inpcb;
 	struct unpcb *unpcb;
 	struct socket *so;
 	int error;
 
 	kif->kf_type = KF_TYPE_SOCKET;
 	so = fp->f_data;
 	kif->kf_un.kf_sock.kf_sock_domain0 =
 	    so->so_proto->pr_domain->dom_family;
 	kif->kf_un.kf_sock.kf_sock_type0 = so->so_type;
 	kif->kf_un.kf_sock.kf_sock_protocol0 = so->so_proto->pr_protocol;
 	kif->kf_un.kf_sock.kf_sock_pcb = (uintptr_t)so->so_pcb;
 	switch (kif->kf_un.kf_sock.kf_sock_domain0) {
 	case AF_INET:
 	case AF_INET6:
 		if (kif->kf_un.kf_sock.kf_sock_protocol0 == IPPROTO_TCP) {
 			if (so->so_pcb != NULL) {
 				inpcb = (struct inpcb *)(so->so_pcb);
 				kif->kf_un.kf_sock.kf_sock_inpcb =
 				    (uintptr_t)inpcb->inp_ppcb;
 				kif->kf_un.kf_sock.kf_sock_sendq =
 				    sbused(&so->so_snd);
 				kif->kf_un.kf_sock.kf_sock_recvq =
 				    sbused(&so->so_rcv);
 			}
 		}
 		break;
 	case AF_UNIX:
 		if (so->so_pcb != NULL) {
 			unpcb = (struct unpcb *)(so->so_pcb);
 			if (unpcb->unp_conn) {
 				kif->kf_un.kf_sock.kf_sock_unpconn =
 				    (uintptr_t)unpcb->unp_conn;
 				kif->kf_un.kf_sock.kf_sock_rcv_sb_state =
 				    so->so_rcv.sb_state;
 				kif->kf_un.kf_sock.kf_sock_snd_sb_state =
 				    so->so_snd.sb_state;
 				kif->kf_un.kf_sock.kf_sock_sendq =
 				    sbused(&so->so_snd);
 				kif->kf_un.kf_sock.kf_sock_recvq =
 				    sbused(&so->so_rcv);
 			}
 		}
 		break;
 	}
 	error = so->so_proto->pr_usrreqs->pru_sockaddr(so, &sa);
 	if (error == 0 &&
 	    sa->sa_len <= sizeof(kif->kf_un.kf_sock.kf_sa_local)) {
 		bcopy(sa, &kif->kf_un.kf_sock.kf_sa_local, sa->sa_len);
 		free(sa, M_SONAME);
 	}
 	error = so->so_proto->pr_usrreqs->pru_peeraddr(so, &sa);
 	if (error == 0 &&
 	    sa->sa_len <= sizeof(kif->kf_un.kf_sock.kf_sa_peer)) {
 		bcopy(sa, &kif->kf_un.kf_sock.kf_sa_peer, sa->sa_len);
 		free(sa, M_SONAME);
 	}
 	strncpy(kif->kf_path, so->so_proto->pr_domain->dom_name,
 	    sizeof(kif->kf_path));
 	return (0);	
 }
 
 /*
  * Use the 'backend3' field in AIO jobs to store the amount of data
  * completed by the AIO job so far.
  */
 #define	aio_done	backend3
 
 static STAILQ_HEAD(, task) soaio_jobs;
 static struct mtx soaio_jobs_lock;
 static struct task soaio_kproc_task;
 static int soaio_starting, soaio_idle, soaio_queued;
 static struct unrhdr *soaio_kproc_unr;
 
 static int soaio_max_procs = MAX_AIO_PROCS;
 SYSCTL_INT(_kern_ipc_aio, OID_AUTO, max_procs, CTLFLAG_RW, &soaio_max_procs, 0,
     "Maximum number of kernel processes to use for async socket IO");
 
 static int soaio_num_procs;
 SYSCTL_INT(_kern_ipc_aio, OID_AUTO, num_procs, CTLFLAG_RD, &soaio_num_procs, 0,
     "Number of active kernel processes for async socket IO");
 
 static int soaio_target_procs = TARGET_AIO_PROCS;
 SYSCTL_INT(_kern_ipc_aio, OID_AUTO, target_procs, CTLFLAG_RD,
     &soaio_target_procs, 0,
     "Preferred number of ready kernel processes for async socket IO");
 
 static int soaio_lifetime;
 SYSCTL_INT(_kern_ipc_aio, OID_AUTO, lifetime, CTLFLAG_RW, &soaio_lifetime, 0,
     "Maximum lifetime for idle aiod");
 
 static void
 soaio_kproc_loop(void *arg)
 {
 	struct proc *p;
 	struct vmspace *myvm;
 	struct task *task;
 	int error, id, pending;
 
 	id = (intptr_t)arg;
 
 	/*
 	 * Grab an extra reference on the daemon's vmspace so that it
 	 * doesn't get freed by jobs that switch to a different
 	 * vmspace.
 	 */
 	p = curproc;
 	myvm = vmspace_acquire_ref(p);
 
 	mtx_lock(&soaio_jobs_lock);
 	MPASS(soaio_starting > 0);
 	soaio_starting--;
 	for (;;) {
 		while (!STAILQ_EMPTY(&soaio_jobs)) {
 			task = STAILQ_FIRST(&soaio_jobs);
 			STAILQ_REMOVE_HEAD(&soaio_jobs, ta_link);
 			soaio_queued--;
 			pending = task->ta_pending;
 			task->ta_pending = 0;
 			mtx_unlock(&soaio_jobs_lock);
 
 			task->ta_func(task->ta_context, pending);
 
 			mtx_lock(&soaio_jobs_lock);
 		}
 		MPASS(soaio_queued == 0);
 
 		if (p->p_vmspace != myvm) {
 			mtx_unlock(&soaio_jobs_lock);
 			vmspace_switch_aio(myvm);
 			mtx_lock(&soaio_jobs_lock);
 			continue;
 		}
 
 		soaio_idle++;
 		error = mtx_sleep(&soaio_idle, &soaio_jobs_lock, 0, "-",
 		    soaio_lifetime);
 		soaio_idle--;
 		if (error == EWOULDBLOCK && STAILQ_EMPTY(&soaio_jobs) &&
 		    soaio_num_procs > soaio_target_procs)
 			break;
 	}
 	soaio_num_procs--;
 	mtx_unlock(&soaio_jobs_lock);
 	free_unr(soaio_kproc_unr, id);
 	kproc_exit(0);
 }
 
 static void
 soaio_kproc_create(void *context, int pending)
 {
 	struct proc *p;
 	int error, id;
 
 	mtx_lock(&soaio_jobs_lock);
 	for (;;) {
 		if (soaio_num_procs < soaio_target_procs) {
 			/* Must create */
 		} else if (soaio_num_procs >= soaio_max_procs) {
 			/*
 			 * Hit the limit on kernel processes, don't
 			 * create another one.
 			 */
 			break;
 		} else if (soaio_queued <= soaio_idle + soaio_starting) {
 			/*
 			 * No more AIO jobs waiting for a process to be
 			 * created, so stop.
 			 */
 			break;
 		}
 		soaio_starting++;
 		mtx_unlock(&soaio_jobs_lock);
 
 		id = alloc_unr(soaio_kproc_unr);
 		error = kproc_create(soaio_kproc_loop, (void *)(intptr_t)id,
 		    &p, 0, 0, "soaiod%d", id);
 		if (error != 0) {
 			free_unr(soaio_kproc_unr, id);
 			mtx_lock(&soaio_jobs_lock);
 			soaio_starting--;
 			break;
 		}
 
 		mtx_lock(&soaio_jobs_lock);
 		soaio_num_procs++;
 	}
 	mtx_unlock(&soaio_jobs_lock);
 }
 
 void
 soaio_enqueue(struct task *task)
 {
 
 	mtx_lock(&soaio_jobs_lock);
 	MPASS(task->ta_pending == 0);
 	task->ta_pending++;
 	STAILQ_INSERT_TAIL(&soaio_jobs, task, ta_link);
 	soaio_queued++;
 	if (soaio_queued <= soaio_idle)
 		wakeup_one(&soaio_idle);
 	else if (soaio_num_procs < soaio_max_procs)
 		taskqueue_enqueue(taskqueue_thread, &soaio_kproc_task);
 	mtx_unlock(&soaio_jobs_lock);
 }
 
 static void
 soaio_init(void)
 {
 
 	soaio_lifetime = AIOD_LIFETIME_DEFAULT;
 	STAILQ_INIT(&soaio_jobs);
 	mtx_init(&soaio_jobs_lock, "soaio jobs", NULL, MTX_DEF);
 	soaio_kproc_unr = new_unrhdr(1, INT_MAX, NULL);
 	TASK_INIT(&soaio_kproc_task, 0, soaio_kproc_create, NULL);
 	if (soaio_target_procs > 0)
 		taskqueue_enqueue(taskqueue_thread, &soaio_kproc_task);
 }
 SYSINIT(soaio, SI_SUB_VFS, SI_ORDER_ANY, soaio_init, NULL);
 
 static __inline int
 soaio_ready(struct socket *so, struct sockbuf *sb)
 {
 	return (sb == &so->so_rcv ? soreadable(so) : sowriteable(so));
 }
 
 static void
 soaio_process_job(struct socket *so, struct sockbuf *sb, struct kaiocb *job)
 {
 	struct ucred *td_savedcred;
 	struct thread *td;
 	struct file *fp;
 	struct uio uio;
 	struct iovec iov;
 	size_t cnt, done;
 	long ru_before;
 	int error, flags;
 
 	SOCKBUF_UNLOCK(sb);
 	aio_switch_vmspace(job);
 	td = curthread;
 	fp = job->fd_file;
 retry:
 	td_savedcred = td->td_ucred;
 	td->td_ucred = job->cred;
 
 	done = job->aio_done;
 	cnt = job->uaiocb.aio_nbytes - done;
 	iov.iov_base = (void *)((uintptr_t)job->uaiocb.aio_buf + done);
 	iov.iov_len = cnt;
 	uio.uio_iov = &iov;
 	uio.uio_iovcnt = 1;
 	uio.uio_offset = 0;
 	uio.uio_resid = cnt;
 	uio.uio_segflg = UIO_USERSPACE;
 	uio.uio_td = td;
 	flags = MSG_NBIO;
 
 	/*
 	 * For resource usage accounting, only count a completed request
 	 * as a single message to avoid counting multiple calls to
 	 * sosend/soreceive on a blocking socket.
 	 */
 
 	if (sb == &so->so_rcv) {
 		uio.uio_rw = UIO_READ;
 		ru_before = td->td_ru.ru_msgrcv;
 #ifdef MAC
 		error = mac_socket_check_receive(fp->f_cred, so);
 		if (error == 0)
 
 #endif
 			error = soreceive(so, NULL, &uio, NULL, NULL, &flags);
 		if (td->td_ru.ru_msgrcv != ru_before)
 			job->msgrcv = 1;
 	} else {
 		if (!TAILQ_EMPTY(&sb->sb_aiojobq))
 			flags |= MSG_MORETOCOME;
 		uio.uio_rw = UIO_WRITE;
 		ru_before = td->td_ru.ru_msgsnd;
 #ifdef MAC
 		error = mac_socket_check_send(fp->f_cred, so);
 		if (error == 0)
 #endif
 			error = sosend(so, NULL, &uio, NULL, NULL, flags, td);
 		if (td->td_ru.ru_msgsnd != ru_before)
 			job->msgsnd = 1;
 		if (error == EPIPE && (so->so_options & SO_NOSIGPIPE) == 0) {
 			PROC_LOCK(job->userproc);
 			kern_psignal(job->userproc, SIGPIPE);
 			PROC_UNLOCK(job->userproc);
 		}
 	}
 
 	done += cnt - uio.uio_resid;
 	job->aio_done = done;
 	td->td_ucred = td_savedcred;
 
 	if (error == EWOULDBLOCK) {
 		/*
 		 * The request was either partially completed or not
 		 * completed at all due to racing with a read() or
 		 * write() on the socket.  If the socket is
 		 * non-blocking, return with any partial completion.
 		 * If the socket is blocking or if no progress has
 		 * been made, requeue this request at the head of the
 		 * queue to try again when the socket is ready.
 		 */
 		MPASS(done != job->uaiocb.aio_nbytes);
 		SOCKBUF_LOCK(sb);
 		if (done == 0 || !(so->so_state & SS_NBIO)) {
 			empty_results++;
 			if (soaio_ready(so, sb)) {
 				empty_retries++;
 				SOCKBUF_UNLOCK(sb);
 				goto retry;
 			}
 			
 			if (!aio_set_cancel_function(job, soo_aio_cancel)) {
 				SOCKBUF_UNLOCK(sb);
 				if (done != 0)
 					aio_complete(job, done, 0);
 				else
 					aio_cancel(job);
 				SOCKBUF_LOCK(sb);
 			} else {
 				TAILQ_INSERT_HEAD(&sb->sb_aiojobq, job, list);
 			}
 			return;
 		}
 		SOCKBUF_UNLOCK(sb);
 	}		
 	if (done != 0 && (error == ERESTART || error == EINTR ||
 	    error == EWOULDBLOCK))
 		error = 0;
 	if (error)
 		aio_complete(job, -1, error);
 	else
 		aio_complete(job, done, 0);
 	SOCKBUF_LOCK(sb);
 }
 
 static void
 soaio_process_sb(struct socket *so, struct sockbuf *sb)
 {
 	struct kaiocb *job;
 
 	SOCKBUF_LOCK(sb);
 	while (!TAILQ_EMPTY(&sb->sb_aiojobq) && soaio_ready(so, sb)) {
 		job = TAILQ_FIRST(&sb->sb_aiojobq);
 		TAILQ_REMOVE(&sb->sb_aiojobq, job, list);
 		if (!aio_clear_cancel_function(job))
 			continue;
 
 		soaio_process_job(so, sb, job);
 	}
 
 	/*
 	 * If there are still pending requests, the socket must not be
 	 * ready so set SB_AIO to request a wakeup when the socket
 	 * becomes ready.
 	 */
 	if (!TAILQ_EMPTY(&sb->sb_aiojobq))
 		sb->sb_flags |= SB_AIO;
 	sb->sb_flags &= ~SB_AIO_RUNNING;
 	SOCKBUF_UNLOCK(sb);
 
-	ACCEPT_LOCK();
 	SOCK_LOCK(so);
 	sorele(so);
 }
 
 void
 soaio_rcv(void *context, int pending)
 {
 	struct socket *so;
 
 	so = context;
 	soaio_process_sb(so, &so->so_rcv);
 }
 
 void
 soaio_snd(void *context, int pending)
 {
 	struct socket *so;
 
 	so = context;
 	soaio_process_sb(so, &so->so_snd);
 }
 
 void
 sowakeup_aio(struct socket *so, struct sockbuf *sb)
 {
 
 	SOCKBUF_LOCK_ASSERT(sb);
 	sb->sb_flags &= ~SB_AIO;
 	if (sb->sb_flags & SB_AIO_RUNNING)
 		return;
 	sb->sb_flags |= SB_AIO_RUNNING;
 	if (sb == &so->so_snd)
 		SOCK_LOCK(so);
 	soref(so);
 	if (sb == &so->so_snd)
 		SOCK_UNLOCK(so);
 	soaio_enqueue(&sb->sb_aiotask);
 }
 
 static void
 soo_aio_cancel(struct kaiocb *job)
 {
 	struct socket *so;
 	struct sockbuf *sb;
 	long done;
 	int opcode;
 
 	so = job->fd_file->f_data;
 	opcode = job->uaiocb.aio_lio_opcode;
 	if (opcode == LIO_READ)
 		sb = &so->so_rcv;
 	else {
 		MPASS(opcode == LIO_WRITE);
 		sb = &so->so_snd;
 	}
 
 	SOCKBUF_LOCK(sb);
 	if (!aio_cancel_cleared(job))
 		TAILQ_REMOVE(&sb->sb_aiojobq, job, list);
 	if (TAILQ_EMPTY(&sb->sb_aiojobq))
 		sb->sb_flags &= ~SB_AIO;
 	SOCKBUF_UNLOCK(sb);
 
 	done = job->aio_done;
 	if (done != 0)
 		aio_complete(job, done, 0);
 	else
 		aio_cancel(job);
 }
 
 static int
 soo_aio_queue(struct file *fp, struct kaiocb *job)
 {
 	struct socket *so;
 	struct sockbuf *sb;
 	int error;
 
 	so = fp->f_data;
 	error = (*so->so_proto->pr_usrreqs->pru_aio_queue)(so, job);
 	if (error == 0)
 		return (0);
 
 	switch (job->uaiocb.aio_lio_opcode) {
 	case LIO_READ:
 		sb = &so->so_rcv;
 		break;
 	case LIO_WRITE:
 		sb = &so->so_snd;
 		break;
 	default:
 		return (EINVAL);
 	}
 
 	SOCKBUF_LOCK(sb);
 	if (!aio_set_cancel_function(job, soo_aio_cancel))
 		panic("new job was cancelled");
 	TAILQ_INSERT_TAIL(&sb->sb_aiojobq, job, list);
 	if (!(sb->sb_flags & SB_AIO_RUNNING)) {
 		if (soaio_ready(so, sb))
 			sowakeup_aio(so, sb);
 		else
 			sb->sb_flags |= SB_AIO;
 	}
 	SOCKBUF_UNLOCK(sb);
 	return (0);
 }
Index: head/sys/kern/uipc_accf.c
===================================================================
--- head/sys/kern/uipc_accf.c	(revision 319721)
+++ head/sys/kern/uipc_accf.c	(revision 319722)
@@ -1,292 +1,306 @@
 /*-
  * Copyright (c) 2000 Paycounter, Inc.
  * Copyright (c) 2005 Robert N. M. Watson
  * Author: Alfred Perlstein <alfred@paycounter.com>, <alfred@FreeBSD.org>
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #define ACCEPT_FILTER_MOD
 
 #include "opt_param.h"
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/domain.h>
 #include <sys/kernel.h>
 #include <sys/lock.h>
 #include <sys/malloc.h>
 #include <sys/mbuf.h>
 #include <sys/module.h>
 #include <sys/mutex.h>
 #include <sys/protosw.h>
 #include <sys/sysctl.h>
 #include <sys/socket.h>
 #include <sys/socketvar.h>
 #include <sys/queue.h>
 
 static struct mtx accept_filter_mtx;
 MTX_SYSINIT(accept_filter, &accept_filter_mtx, "accept_filter_mtx",
 	MTX_DEF);
 #define	ACCEPT_FILTER_LOCK()	mtx_lock(&accept_filter_mtx)
 #define	ACCEPT_FILTER_UNLOCK()	mtx_unlock(&accept_filter_mtx)
 
 static SLIST_HEAD(, accept_filter) accept_filtlsthd =
 	SLIST_HEAD_INITIALIZER(accept_filtlsthd);
 
 MALLOC_DEFINE(M_ACCF, "accf", "accept filter data");
 
 static int unloadable = 0;
 
 SYSCTL_NODE(_net, OID_AUTO, accf, CTLFLAG_RW, 0, "Accept filters");
 SYSCTL_INT(_net_accf, OID_AUTO, unloadable, CTLFLAG_RW, &unloadable, 0,
 	"Allow unload of accept filters (not recommended)");
 
 /*
  * Must be passed a malloc'd structure so we don't explode if the kld is
  * unloaded, we leak the struct on deallocation to deal with this, but if a
  * filter is loaded with the same name as a leaked one we re-use the entry.
  */
 int
 accept_filt_add(struct accept_filter *filt)
 {
 	struct accept_filter *p;
 
 	ACCEPT_FILTER_LOCK();
 	SLIST_FOREACH(p, &accept_filtlsthd, accf_next)
 		if (strcmp(p->accf_name, filt->accf_name) == 0)  {
 			if (p->accf_callback != NULL) {
 				ACCEPT_FILTER_UNLOCK();
 				return (EEXIST);
 			} else {
 				p->accf_callback = filt->accf_callback;
 				ACCEPT_FILTER_UNLOCK();
 				free(filt, M_ACCF);
 				return (0);
 			}
 		}
 				
 	if (p == NULL)
 		SLIST_INSERT_HEAD(&accept_filtlsthd, filt, accf_next);
 	ACCEPT_FILTER_UNLOCK();
 	return (0);
 }
 
 int
 accept_filt_del(char *name)
 {
 	struct accept_filter *p;
 
 	p = accept_filt_get(name);
 	if (p == NULL)
 		return (ENOENT);
 
 	p->accf_callback = NULL;
 	return (0);
 }
 
 struct accept_filter *
 accept_filt_get(char *name)
 {
 	struct accept_filter *p;
 
 	ACCEPT_FILTER_LOCK();
 	SLIST_FOREACH(p, &accept_filtlsthd, accf_next)
 		if (strcmp(p->accf_name, name) == 0)
 			break;
 	ACCEPT_FILTER_UNLOCK();
 
 	return (p);
 }
 
 int
 accept_filt_generic_mod_event(module_t mod, int event, void *data)
 {
 	struct accept_filter *p;
 	struct accept_filter *accfp = (struct accept_filter *) data;
 	int error;
 
 	switch (event) {
 	case MOD_LOAD:
 		p = malloc(sizeof(*p), M_ACCF, M_WAITOK);
 		bcopy(accfp, p, sizeof(*p));
 		error = accept_filt_add(p);
 		break;
 
 	case MOD_UNLOAD:
 		/*
 		 * Do not support unloading yet. we don't keep track of
 		 * refcounts and unloading an accept filter callback and then
 		 * having it called is a bad thing.  A simple fix would be to
 		 * track the refcount in the struct accept_filter.
 		 */
 		if (unloadable != 0) {
 			error = accept_filt_del(accfp->accf_name);
 		} else
 			error = EOPNOTSUPP;
 		break;
 
 	case MOD_SHUTDOWN:
 		error = 0;
 		break;
 
 	default:
 		error = EOPNOTSUPP;
 		break;
 	}
 
 	return (error);
 }
 
 int
 accept_filt_getopt(struct socket *so, struct sockopt *sopt)
 {
 	struct accept_filter_arg *afap;
 	int error;
 
 	error = 0;
 	afap = malloc(sizeof(*afap), M_TEMP, M_WAITOK | M_ZERO);
 	SOCK_LOCK(so);
 	if ((so->so_options & SO_ACCEPTCONN) == 0) {
 		error = EINVAL;
 		goto out;
 	}
-	if ((so->so_options & SO_ACCEPTFILTER) == 0) {
+	if (so->sol_accept_filter == NULL) {
 		error = EINVAL;
 		goto out;
 	}
-	strcpy(afap->af_name, so->so_accf->so_accept_filter->accf_name);
-	if (so->so_accf->so_accept_filter_str != NULL)
-		strcpy(afap->af_arg, so->so_accf->so_accept_filter_str);
+	strcpy(afap->af_name, so->sol_accept_filter->accf_name);
+	if (so->sol_accept_filter_str != NULL)
+		strcpy(afap->af_arg, so->sol_accept_filter_str);
 out:
 	SOCK_UNLOCK(so);
 	if (error == 0)
 		error = sooptcopyout(sopt, afap, sizeof(*afap));
 	free(afap, M_TEMP);
 	return (error);
 }
 
 int
 accept_filt_setopt(struct socket *so, struct sockopt *sopt)
 {
 	struct accept_filter_arg *afap;
 	struct accept_filter *afp;
-	struct so_accf *newaf;
-	int error = 0;
+	char *accept_filter_str = NULL;
+	void *accept_filter_arg = NULL;
+	int error;
 
 	/*
 	 * Handle the simple delete case first.
 	 */
 	if (sopt == NULL || sopt->sopt_val == NULL) {
+		struct socket *sp, *sp1;
+		int wakeup;
+
 		SOCK_LOCK(so);
 		if ((so->so_options & SO_ACCEPTCONN) == 0) {
 			SOCK_UNLOCK(so);
 			return (EINVAL);
 		}
-		if (so->so_accf != NULL) {
-			struct so_accf *af = so->so_accf;
-			if (af->so_accept_filter != NULL &&
-				af->so_accept_filter->accf_destroy != NULL) {
-				af->so_accept_filter->accf_destroy(so);
-			}
-			if (af->so_accept_filter_str != NULL)
-				free(af->so_accept_filter_str, M_ACCF);
-			free(af, M_ACCF);
-			so->so_accf = NULL;
+		if (so->sol_accept_filter == NULL) {
+			SOCK_UNLOCK(so);
+			return (0);
 		}
+		if (so->sol_accept_filter->accf_destroy != NULL)
+			so->sol_accept_filter->accf_destroy(so);
+		if (so->sol_accept_filter_str != NULL)
+			free(so->sol_accept_filter_str, M_ACCF);
+		so->sol_accept_filter = NULL;
+		so->sol_accept_filter_arg = NULL;
+		so->sol_accept_filter_str = NULL;
 		so->so_options &= ~SO_ACCEPTFILTER;
-		SOCK_UNLOCK(so);
+
+		/*
+		 * Move from incomplete queue to complete only those
+		 * connections, that are blocked by us.
+		 */
+		wakeup = 0;
+		TAILQ_FOREACH_SAFE(sp, &so->sol_incomp, so_list, sp1) {
+			SOCK_LOCK(sp);
+			if (sp->so_options & SO_ACCEPTFILTER) {
+				TAILQ_REMOVE(&so->sol_incomp, sp, so_list);
+				TAILQ_INSERT_TAIL(&so->sol_comp, sp, so_list);
+				sp->so_qstate = SQ_COMP;
+				sp->so_options &= ~SO_ACCEPTFILTER;
+				so->sol_incqlen--;
+				so->sol_qlen++;
+				wakeup = 1;
+			}
+			SOCK_UNLOCK(sp);
+		}
+		if (wakeup)
+			solisten_wakeup(so);  /* unlocks */
+		else
+			SOLISTEN_UNLOCK(so);
 		return (0);
 	}
 
 	/*
 	 * Pre-allocate any memory we may need later to avoid blocking at
 	 * untimely moments.  This does not optimize for invalid arguments.
 	 */
 	afap = malloc(sizeof(*afap), M_TEMP, M_WAITOK);
 	error = sooptcopyin(sopt, afap, sizeof *afap, sizeof *afap);
 	afap->af_name[sizeof(afap->af_name)-1] = '\0';
 	afap->af_arg[sizeof(afap->af_arg)-1] = '\0';
 	if (error) {
 		free(afap, M_TEMP);
 		return (error);
 	}
 	afp = accept_filt_get(afap->af_name);
 	if (afp == NULL) {
 		free(afap, M_TEMP);
 		return (ENOENT);
 	}
-	/*
-	 * Allocate the new accept filter instance storage.  We may
-	 * have to free it again later if we fail to attach it.  If
-	 * attached properly, 'newaf' is NULLed to avoid a free()
-	 * while in use.
-	 */
-	newaf = malloc(sizeof(*newaf), M_ACCF, M_WAITOK | M_ZERO);
 	if (afp->accf_create != NULL && afap->af_name[0] != '\0') {
 		size_t len = strlen(afap->af_name) + 1;
-		newaf->so_accept_filter_str = malloc(len, M_ACCF, M_WAITOK);
-		strcpy(newaf->so_accept_filter_str, afap->af_name);
+		accept_filter_str = malloc(len, M_ACCF, M_WAITOK);
+		strcpy(accept_filter_str, afap->af_name);
 	}
 
 	/*
 	 * Require a listen socket; don't try to replace an existing filter
 	 * without first removing it.
 	 */
 	SOCK_LOCK(so);
-	if (((so->so_options & SO_ACCEPTCONN) == 0) ||
-	    (so->so_accf != NULL)) {
+	if ((so->so_options & SO_ACCEPTCONN) == 0 ||
+	    so->sol_accept_filter != NULL) {
 		error = EINVAL;
 		goto out;
 	}
 
 	/*
 	 * Invoke the accf_create() method of the filter if required.  The
 	 * socket mutex is held over this call, so create methods for filters
 	 * can't block.
 	 */
 	if (afp->accf_create != NULL) {
-		newaf->so_accept_filter_arg =
-		    afp->accf_create(so, afap->af_arg);
-		if (newaf->so_accept_filter_arg == NULL) {
+		accept_filter_arg = afp->accf_create(so, afap->af_arg);
+		if (accept_filter_arg == NULL) {
 			error = EINVAL;
 			goto out;
 		}
 	}
-	newaf->so_accept_filter = afp;
-	so->so_accf = newaf;
+	so->sol_accept_filter = afp;
+	so->sol_accept_filter_arg = accept_filter_arg;
+	so->sol_accept_filter_str = accept_filter_str;
 	so->so_options |= SO_ACCEPTFILTER;
-	newaf = NULL;
 out:
 	SOCK_UNLOCK(so);
-	if (newaf != NULL) {
-		if (newaf->so_accept_filter_str != NULL)
-			free(newaf->so_accept_filter_str, M_ACCF);
-		free(newaf, M_ACCF);
-	}
-	if (afap != NULL)
-		free(afap, M_TEMP);
+	if (accept_filter_str != NULL)
+		free(accept_filter_str, M_ACCF);
+	free(afap, M_TEMP);
 	return (error);
 }
Index: head/sys/kern/uipc_debug.c
===================================================================
--- head/sys/kern/uipc_debug.c	(revision 319721)
+++ head/sys/kern/uipc_debug.c	(revision 319722)
@@ -1,532 +1,534 @@
 /*-
  * Copyright (c) 2007 Robert N. M. Watson
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  */
 
 /*
  * Debugger routines relating to sockets, protocols, etc, for use in DDB.
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include "opt_ddb.h"
 
 #include <sys/param.h>
 #include <sys/domain.h>
 #include <sys/kernel.h>
 #include <sys/protosw.h>
 #include <sys/socket.h>
 #include <sys/socketvar.h>
 
 #ifdef DDB
 #include <ddb/ddb.h>
 
 static void
 db_print_sotype(short so_type)
 {
 
 	switch (so_type) {
 	case SOCK_STREAM:
 		db_printf("SOCK_STREAM");
 		break;
 
 	case SOCK_DGRAM:
 		db_printf("SOCK_DGRAM");
 		break;
 
 	case SOCK_RAW:
 		db_printf("SOCK_RAW");
 		break;
 
 	case SOCK_RDM:
 		db_printf("SOCK_RDM");
 		break;
 
 	case SOCK_SEQPACKET:
 		db_printf("SOCK_SEQPACKET");
 		break;
 
 	default:
 		db_printf("unknown");
 		break;
 	}
 }
 
 static void
 db_print_sooptions(short so_options)
 {
 	int comma;
 
 	comma = 0;
 	if (so_options & SO_DEBUG) {
 		db_printf("%sSO_DEBUG", comma ? ", " : "");
 		comma = 1;
 	}
 	if (so_options & SO_ACCEPTCONN) {
 		db_printf("%sSO_ACCEPTCONN", comma ? ", " : "");
 		comma = 1;
 	}
 	if (so_options & SO_REUSEADDR) {
 		db_printf("%sSO_REUSEADDR", comma ? ", " : "");
 		comma = 1;
 	}
 	if (so_options & SO_KEEPALIVE) {
 		db_printf("%sSO_KEEPALIVE", comma ? ", " : "");
 		comma = 1;
 	}
 	if (so_options & SO_DONTROUTE) {
 		db_printf("%sSO_DONTROUTE", comma ? ", " : "");
 		comma = 1;
 	}
 	if (so_options & SO_BROADCAST) {
 		db_printf("%sSO_BROADCAST", comma ? ", " : "");
 		comma = 1;
 	}
 	if (so_options & SO_USELOOPBACK) {
 		db_printf("%sSO_USELOOPBACK", comma ? ", " : "");
 		comma = 1;
 	}
 	if (so_options & SO_LINGER) {
 		db_printf("%sSO_LINGER", comma ? ", " : "");
 		comma = 1;
 	}
 	if (so_options & SO_OOBINLINE) {
 		db_printf("%sSO_OOBINLINE", comma ? ", " : "");
 		comma = 1;
 	}
 	if (so_options & SO_REUSEPORT) {
 		db_printf("%sSO_REUSEPORT", comma ? ", " : "");
 		comma = 1;
 	}
 	if (so_options & SO_TIMESTAMP) {
 		db_printf("%sSO_TIMESTAMP", comma ? ", " : "");
 		comma = 1;
 	}
 	if (so_options & SO_NOSIGPIPE) {
 		db_printf("%sSO_NOSIGPIPE", comma ? ", " : "");
 		comma = 1;
 	}
 	if (so_options & SO_ACCEPTFILTER) {
 		db_printf("%sSO_ACCEPTFILTER", comma ? ", " : "");
 		comma = 1;
 	}
 	if (so_options & SO_BINTIME) {
 		db_printf("%sSO_BINTIME", comma ? ", " : "");
 		comma = 1;
 	}
 	if (so_options & SO_NO_OFFLOAD) {
 		db_printf("%sSO_NO_OFFLOAD", comma ? ", " : "");
 		comma = 1;
 	}
 	if (so_options & SO_NO_DDP) {
 		db_printf("%sSO_NO_DDP", comma ? ", " : "");
 		comma = 1;
 	}
 }
 
 static void
 db_print_sostate(short so_state)
 {
 	int comma;
 
 	comma = 0;
 	if (so_state & SS_NOFDREF) {
 		db_printf("%sSS_NOFDREF", comma ? ", " : "");
 		comma = 1;
 	}
 	if (so_state & SS_ISCONNECTED) {
 		db_printf("%sSS_ISCONNECTED", comma ? ", " : "");
 		comma = 1;
 	}
 	if (so_state & SS_ISCONNECTING) {
 		db_printf("%sSS_ISCONNECTING", comma ? ", " : "");
 		comma = 1;
 	}
 	if (so_state & SS_ISDISCONNECTING) {
 		db_printf("%sSS_ISDISCONNECTING", comma ? ", " : "");
 		comma = 1;
 	}
 	if (so_state & SS_NBIO) {
 		db_printf("%sSS_NBIO", comma ? ", " : "");
 		comma = 1;
 	}
 	if (so_state & SS_ASYNC) {
 		db_printf("%sSS_ASYNC", comma ? ", " : "");
 		comma = 1;
 	}
 	if (so_state & SS_ISCONFIRMING) {
 		db_printf("%sSS_ISCONFIRMING", comma ? ", " : "");
 		comma = 1;
 	}
 	if (so_state & SS_PROTOREF) {
 		db_printf("%sSS_PROTOREF", comma ? ", " : "");
 		comma = 1;
 	}
 }
 
 static void
 db_print_soqstate(int so_qstate)
 {
 	int comma;
 
 	comma = 0;
 	if (so_qstate & SQ_INCOMP) {
 		db_printf("%sSQ_INCOMP", comma ? ", " : "");
 		comma = 1;
 	}
 	if (so_qstate & SQ_COMP) {
 		db_printf("%sSQ_COMP", comma ? ", " : "");
 		comma = 1;
 	}
 }
 
 static void
 db_print_sbstate(short sb_state)
 {
 	int comma;
 
 	comma = 0;
 	if (sb_state & SBS_CANTSENDMORE) {
 		db_printf("%sSBS_CANTSENDMORE", comma ? ", " : "");
 		comma = 1;
 	}
 	if (sb_state & SBS_CANTRCVMORE) {
 		db_printf("%sSBS_CANTRCVMORE", comma ? ", " : "");
 		comma = 1;
 	}
 	if (sb_state & SBS_RCVATMARK) {
 		db_printf("%sSBS_RCVATMARK", comma ? ", " : "");
 		comma = 1;
 	}
 }
 
 static void
 db_print_indent(int indent)
 {
 	int i;
 
 	for (i = 0; i < indent; i++)
 		db_printf(" ");
 }
 
 static void
 db_print_domain(struct domain *d, const char *domain_name, int indent)
 {
 
 	db_print_indent(indent);
 	db_printf("%s at %p\n", domain_name, d);
 
 	indent += 2;
 
 	db_print_indent(indent);
 	db_printf("dom_family: %d   ", d->dom_family);
 	db_printf("dom_name: %s\n", d->dom_name);
 
 	db_print_indent(indent);
 	db_printf("dom_init: %p   ", d->dom_init);
 	db_printf("dom_externalize: %p   ", d->dom_externalize);
 	db_printf("dom_dispose: %p\n", d->dom_dispose);
 
 	db_print_indent(indent);
 	db_printf("dom_protosw: %p   ", d->dom_protosw);
 	db_printf("dom_next: %p\n", d->dom_next);
 
 	db_print_indent(indent);
 	db_printf("dom_rtattach: %p   ", d->dom_rtattach);
 
 	db_print_indent(indent);
 	db_printf("dom_ifattach: %p   ", d->dom_ifattach);
 	db_printf("dom_ifdetach: %p\n", d->dom_ifdetach);
 }
 
 static void
 db_print_prflags(short pr_flags)
 {
 	int comma;
 
 	comma = 0;
 	if (pr_flags & PR_ATOMIC) {
 		db_printf("%sPR_ATOMIC", comma ? ", " : "");
 		comma = 1;
 	}
 	if (pr_flags & PR_ADDR) {
 		db_printf("%sPR_ADDR", comma ? ", " : "");
 		comma = 1;
 	}
 	if (pr_flags & PR_CONNREQUIRED) {
 		db_printf("%sPR_CONNREQUIRED", comma ? ", " : "");
 		comma = 1;
 	}
 	if (pr_flags & PR_WANTRCVD) {
 		db_printf("%sPR_WANTRCVD", comma ? ", " : "");
 		comma = 1;
 	}
 	if (pr_flags & PR_RIGHTS) {
 		db_printf("%sPR_RIGHTS", comma ? ", " : "");
 		comma = 1;
 	}
 	if (pr_flags & PR_IMPLOPCL) {
 		db_printf("%sPR_IMPLOPCL", comma ? ", " : "");
 		comma = 1;
 	}
 	if (pr_flags & PR_LASTHDR) {
 		db_printf("%sPR_LASTHDR", comma ? ", " : "");
 		comma = 1;
 	}
 }
 
 static void
 db_print_protosw(struct protosw *pr, const char *prname, int indent)
 {
 
 	db_print_indent(indent);
 	db_printf("%s at %p\n", prname, pr);
 
 	indent += 2;
 
 	db_print_indent(indent);
 	db_printf("pr_type: %d   ", pr->pr_type);
 	db_printf("pr_domain: %p\n", pr->pr_domain);
 	if (pr->pr_domain != NULL)
 		db_print_domain(pr->pr_domain, "pr_domain", indent);
 
 	db_print_indent(indent);
 	db_printf("pr_protocol: %d\n", pr->pr_protocol);
 
 	db_print_indent(indent);
 	db_printf("pr_flags: %d (", pr->pr_flags);
 	db_print_prflags(pr->pr_flags);
 	db_printf(")\n");
 
 	db_print_indent(indent);
 	db_printf("pr_input: %p   ", pr->pr_input);
 	db_printf("pr_output: %p   ", pr->pr_output);
 	db_printf("pr_ctlinput: %p\n", pr->pr_ctlinput);
 
 	db_print_indent(indent);
 	db_printf("pr_ctloutput: %p   ", pr->pr_ctloutput);
 	db_printf("pr_init: %p\n", pr->pr_init);
 
 	db_print_indent(indent);
 	db_printf("pr_fasttimo: %p   ", pr->pr_fasttimo);
 	db_printf("pr_slowtimo: %p   ", pr->pr_slowtimo);
 	db_printf("pr_drain: %p\n", pr->pr_drain);
 
 	db_print_indent(indent);
 }
 
 static void
 db_print_sbflags(short sb_flags)
 {
 	int comma;
 
 	comma = 0;
 	if (sb_flags & SB_WAIT) {
 		db_printf("%sSB_WAIT", comma ? ", " : "");
 		comma = 1;
 	}
 	if (sb_flags & SB_SEL) {
 		db_printf("%sSB_SEL", comma ? ", " : "");
 		comma = 1;
 	}
 	if (sb_flags & SB_ASYNC) {
 		db_printf("%sSB_ASYNC", comma ? ", " : "");
 		comma = 1;
 	}
 	if (sb_flags & SB_UPCALL) {
 		db_printf("%sSB_UPCALL", comma ? ", " : "");
 		comma = 1;
 	}
 	if (sb_flags & SB_NOINTR) {
 		db_printf("%sSB_NOINTR", comma ? ", " : "");
 		comma = 1;
 	}
 	if (sb_flags & SB_AIO) {
 		db_printf("%sSB_AIO", comma ? ", " : "");
 		comma = 1;
 	}
 	if (sb_flags & SB_KNOTE) {
 		db_printf("%sSB_KNOTE", comma ? ", " : "");
 		comma = 1;
 	}
 	if (sb_flags & SB_AUTOSIZE) {
 		db_printf("%sSB_AUTOSIZE", comma ? ", " : "");
 		comma = 1;
 	}
 }
 
 static void
 db_print_sockbuf(struct sockbuf *sb, const char *sockbufname, int indent)
 {
 
 	db_print_indent(indent);
 	db_printf("%s at %p\n", sockbufname, sb);
 
 	indent += 2;
 
 	db_print_indent(indent);
 	db_printf("sb_state: 0x%x (", sb->sb_state);
 	db_print_sbstate(sb->sb_state);
 	db_printf(")\n");
 
 	db_print_indent(indent);
 	db_printf("sb_mb: %p   ", sb->sb_mb);
 	db_printf("sb_mbtail: %p   ", sb->sb_mbtail);
 	db_printf("sb_lastrecord: %p\n", sb->sb_lastrecord);
 
 	db_print_indent(indent);
 	db_printf("sb_sndptr: %p   ", sb->sb_sndptr);
 	db_printf("sb_sndptroff: %u\n", sb->sb_sndptroff);
 
 	db_print_indent(indent);
 	db_printf("sb_acc: %u   ", sb->sb_acc);
 	db_printf("sb_ccc: %u   ", sb->sb_ccc);
 	db_printf("sb_hiwat: %u   ", sb->sb_hiwat);
 	db_printf("sb_mbcnt: %u   ", sb->sb_mbcnt);
 	db_printf("sb_mbmax: %u\n", sb->sb_mbmax);
 
 	db_print_indent(indent);
 	db_printf("sb_ctl: %u   ", sb->sb_ctl);
 	db_printf("sb_lowat: %d   ", sb->sb_lowat);
 	db_printf("sb_timeo: %jd\n", sb->sb_timeo);
 
 	db_print_indent(indent);
 	db_printf("sb_flags: 0x%x (", sb->sb_flags);
 	db_print_sbflags(sb->sb_flags);
 	db_printf(")\n");
 
 	db_print_indent(indent);
 	db_printf("sb_aiojobq first: %p\n", TAILQ_FIRST(&sb->sb_aiojobq));
 }
 
 static void
 db_print_socket(struct socket *so, const char *socketname, int indent)
 {
 
 	db_print_indent(indent);
 	db_printf("%s at %p\n", socketname, so);
 
 	indent += 2;
 
 	db_print_indent(indent);
 	db_printf("so_count: %d   ", so->so_count);
 	db_printf("so_type: %d (", so->so_type);
 	db_print_sotype(so->so_type);
 	db_printf(")\n");
 
 	db_print_indent(indent);
 	db_printf("so_options: 0x%x (", so->so_options);
 	db_print_sooptions(so->so_options);
 	db_printf(")\n");
 
 	db_print_indent(indent);
 	db_printf("so_linger: %d   ", so->so_linger);
 	db_printf("so_state: 0x%x (", so->so_state);
 	db_print_sostate(so->so_state);
 	db_printf(")\n");
 
 	db_print_indent(indent);
-	db_printf("so_qstate: 0x%x (", so->so_qstate);
-	db_print_soqstate(so->so_qstate);
 	db_printf(")   ");
 	db_printf("so_pcb: %p   ", so->so_pcb);
 	db_printf("so_proto: %p\n", so->so_proto);
 
 	if (so->so_proto != NULL)
 		db_print_protosw(so->so_proto, "so_proto", indent);
 
 	db_print_indent(indent);
-	db_printf("so_head: %p   ", so->so_head);
-	db_printf("so_incomp first: %p   ", TAILQ_FIRST(&so->so_incomp));
-	db_printf("so_comp first: %p\n", TAILQ_FIRST(&so->so_comp));
+	if (so->so_options & SO_ACCEPTCONN) {
+		db_printf("sol_incomp first: %p   ",
+		    TAILQ_FIRST(&so->sol_incomp));
+		db_printf("sol_comp first: %p\n", TAILQ_FIRST(&so->sol_comp));
+		db_printf("sol_qlen: %d   ", so->sol_qlen);
+		db_printf("sol_incqlen: %d   ", so->sol_incqlen);
+		db_printf("sol_qlimit: %d   ", so->sol_qlimit);
+	} else {
+		db_printf("so_qstate: 0x%x (", so->so_qstate);
+		db_print_soqstate(so->so_qstate);
+		db_printf("so_listen: %p   ", so->so_listen);
+		/* so_list skipped */
+		db_printf("so_timeo: %d   ", so->so_timeo);
+		db_printf("so_error: %d\n", so->so_error);
 
-	db_print_indent(indent);
-	/* so_list skipped */
-	db_printf("so_qlen: %u   ", so->so_qlen);
-	db_printf("so_incqlen: %u   ", so->so_incqlen);
-	db_printf("so_qlimit: %u   ", so->so_qlimit);
-	db_printf("so_timeo: %d   ", so->so_timeo);
-	db_printf("so_error: %d\n", so->so_error);
+		db_print_indent(indent);
+		db_printf("so_sigio: %p   ", so->so_sigio);
+		db_printf("so_oobmark: %lu   ", so->so_oobmark);
 
-	db_print_indent(indent);
-	db_printf("so_sigio: %p   ", so->so_sigio);
-	db_printf("so_oobmark: %lu   ", so->so_oobmark);
-
-	db_print_sockbuf(&so->so_rcv, "so_rcv", indent);
-	db_print_sockbuf(&so->so_snd, "so_snd", indent);
+		db_print_sockbuf(&so->so_rcv, "so_rcv", indent);
+		db_print_sockbuf(&so->so_snd, "so_snd", indent);
+	}
 }
 
 DB_SHOW_COMMAND(socket, db_show_socket)
 {
 	struct socket *so;
 
 	if (!have_addr) {
 		db_printf("usage: show socket <addr>\n");
 		return;
 	}
 	so = (struct socket *)addr;
 
 	db_print_socket(so, "socket", 0);
 }
 
 DB_SHOW_COMMAND(sockbuf, db_show_sockbuf)
 {
 	struct sockbuf *sb;
 
 	if (!have_addr) {
 		db_printf("usage: show sockbuf <addr>\n");
 		return;
 	}
 	sb = (struct sockbuf *)addr;
 
 	db_print_sockbuf(sb, "sockbuf", 0);
 }
 
 DB_SHOW_COMMAND(protosw, db_show_protosw)
 {
 	struct protosw *pr;
 
 	if (!have_addr) {
 		db_printf("usage: show protosw <addr>\n");
 		return;
 	}
 	pr = (struct protosw *)addr;
 
 	db_print_protosw(pr, "protosw", 0);
 }
 
 DB_SHOW_COMMAND(domain, db_show_domain)
 {
 	struct domain *d;
 
 	if (!have_addr) {
 		db_printf("usage: show protosw <addr>\n");
 		return;
 	}
 	d = (struct domain *)addr;
 
 	db_print_domain(d, "domain", 0);
 }
 #endif
Index: head/sys/kern/uipc_sockbuf.c
===================================================================
--- head/sys/kern/uipc_sockbuf.c	(revision 319721)
+++ head/sys/kern/uipc_sockbuf.c	(revision 319722)
@@ -1,1349 +1,1349 @@
 /*-
  * Copyright (c) 1982, 1986, 1988, 1990, 1993
  *	The Regents of the University of California.  All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 3. Neither the name of the University nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  *	@(#)uipc_socket2.c	8.1 (Berkeley) 6/10/93
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include "opt_param.h"
 
 #include <sys/param.h>
 #include <sys/aio.h> /* for aio_swake proto */
 #include <sys/kernel.h>
 #include <sys/lock.h>
 #include <sys/malloc.h>
 #include <sys/mbuf.h>
 #include <sys/mutex.h>
 #include <sys/proc.h>
 #include <sys/protosw.h>
 #include <sys/resourcevar.h>
 #include <sys/signalvar.h>
 #include <sys/socket.h>
 #include <sys/socketvar.h>
 #include <sys/sx.h>
 #include <sys/sysctl.h>
 
 /*
  * Function pointer set by the AIO routines so that the socket buffer code
  * can call back into the AIO module if it is loaded.
  */
 void	(*aio_swake)(struct socket *, struct sockbuf *);
 
 /*
  * Primitive routines for operating on socket buffers
  */
 
 u_long	sb_max = SB_MAX;
 u_long sb_max_adj =
        (quad_t)SB_MAX * MCLBYTES / (MSIZE + MCLBYTES); /* adjusted sb_max */
 
 static	u_long sb_efficiency = 8;	/* parameter for sbreserve() */
 
 static struct mbuf	*sbcut_internal(struct sockbuf *sb, int len);
 static void	sbflush_internal(struct sockbuf *sb);
 
 /*
  * Our own version of m_clrprotoflags(), that can preserve M_NOTREADY.
  */
 static void
 sbm_clrprotoflags(struct mbuf *m, int flags)
 {
 	int mask;
 
 	mask = ~M_PROTOFLAGS;
 	if (flags & PRUS_NOTREADY)
 		mask |= M_NOTREADY;
 	while (m) {
 		m->m_flags &= mask;
 		m = m->m_next;
 	}
 }
 
 /*
  * Mark ready "count" mbufs starting with "m".
  */
 int
 sbready(struct sockbuf *sb, struct mbuf *m, int count)
 {
 	u_int blocker;
 
 	SOCKBUF_LOCK_ASSERT(sb);
 	KASSERT(sb->sb_fnrdy != NULL, ("%s: sb %p NULL fnrdy", __func__, sb));
 
 	blocker = (sb->sb_fnrdy == m) ? M_BLOCKED : 0;
 
 	for (int i = 0; i < count; i++, m = m->m_next) {
 		KASSERT(m->m_flags & M_NOTREADY,
 		    ("%s: m %p !M_NOTREADY", __func__, m));
 		m->m_flags &= ~(M_NOTREADY | blocker);
 		if (blocker)
 			sb->sb_acc += m->m_len;
 	}
 
 	if (!blocker)
 		return (EINPROGRESS);
 
 	/* This one was blocking all the queue. */
 	for (; m && (m->m_flags & M_NOTREADY) == 0; m = m->m_next) {
 		KASSERT(m->m_flags & M_BLOCKED,
 		    ("%s: m %p !M_BLOCKED", __func__, m));
 		m->m_flags &= ~M_BLOCKED;
 		sb->sb_acc += m->m_len;
 	}
 
 	sb->sb_fnrdy = m;
 
 	return (0);
 }
 
 /*
  * Adjust sockbuf state reflecting allocation of m.
  */
 void
 sballoc(struct sockbuf *sb, struct mbuf *m)
 {
 
 	SOCKBUF_LOCK_ASSERT(sb);
 
 	sb->sb_ccc += m->m_len;
 
 	if (sb->sb_fnrdy == NULL) {
 		if (m->m_flags & M_NOTREADY)
 			sb->sb_fnrdy = m;
 		else
 			sb->sb_acc += m->m_len;
 	} else
 		m->m_flags |= M_BLOCKED;
 
 	if (m->m_type != MT_DATA && m->m_type != MT_OOBDATA)
 		sb->sb_ctl += m->m_len;
 
 	sb->sb_mbcnt += MSIZE;
 	sb->sb_mcnt += 1;
 
 	if (m->m_flags & M_EXT) {
 		sb->sb_mbcnt += m->m_ext.ext_size;
 		sb->sb_ccnt += 1;
 	}
 }
 
 /*
  * Adjust sockbuf state reflecting freeing of m.
  */
 void
 sbfree(struct sockbuf *sb, struct mbuf *m)
 {
 
 #if 0	/* XXX: not yet: soclose() call path comes here w/o lock. */
 	SOCKBUF_LOCK_ASSERT(sb);
 #endif
 
 	sb->sb_ccc -= m->m_len;
 
 	if (!(m->m_flags & M_NOTAVAIL))
 		sb->sb_acc -= m->m_len;
 
 	if (m == sb->sb_fnrdy) {
 		struct mbuf *n;
 
 		KASSERT(m->m_flags & M_NOTREADY,
 		    ("%s: m %p !M_NOTREADY", __func__, m));
 
 		n = m->m_next;
 		while (n != NULL && !(n->m_flags & M_NOTREADY)) {
 			n->m_flags &= ~M_BLOCKED;
 			sb->sb_acc += n->m_len;
 			n = n->m_next;
 		}
 		sb->sb_fnrdy = n;
 	}
 
 	if (m->m_type != MT_DATA && m->m_type != MT_OOBDATA)
 		sb->sb_ctl -= m->m_len;
 
 	sb->sb_mbcnt -= MSIZE;
 	sb->sb_mcnt -= 1;
 	if (m->m_flags & M_EXT) {
 		sb->sb_mbcnt -= m->m_ext.ext_size;
 		sb->sb_ccnt -= 1;
 	}
 
 	if (sb->sb_sndptr == m) {
 		sb->sb_sndptr = NULL;
 		sb->sb_sndptroff = 0;
 	}
 	if (sb->sb_sndptroff != 0)
 		sb->sb_sndptroff -= m->m_len;
 }
 
 /*
  * Socantsendmore indicates that no more data will be sent on the socket; it
  * would normally be applied to a socket when the user informs the system
  * that no more data is to be sent, by the protocol code (in case
  * PRU_SHUTDOWN).  Socantrcvmore indicates that no more data will be
  * received, and will normally be applied to the socket by a protocol when it
  * detects that the peer will send no more data.  Data queued for reading in
  * the socket may yet be read.
  */
 void
 socantsendmore_locked(struct socket *so)
 {
 
 	SOCKBUF_LOCK_ASSERT(&so->so_snd);
 
 	so->so_snd.sb_state |= SBS_CANTSENDMORE;
 	sowwakeup_locked(so);
 	mtx_assert(SOCKBUF_MTX(&so->so_snd), MA_NOTOWNED);
 }
 
 void
 socantsendmore(struct socket *so)
 {
 
 	SOCKBUF_LOCK(&so->so_snd);
 	socantsendmore_locked(so);
 	mtx_assert(SOCKBUF_MTX(&so->so_snd), MA_NOTOWNED);
 }
 
 void
 socantrcvmore_locked(struct socket *so)
 {
 
 	SOCKBUF_LOCK_ASSERT(&so->so_rcv);
 
 	so->so_rcv.sb_state |= SBS_CANTRCVMORE;
 	sorwakeup_locked(so);
 	mtx_assert(SOCKBUF_MTX(&so->so_rcv), MA_NOTOWNED);
 }
 
 void
 socantrcvmore(struct socket *so)
 {
 
 	SOCKBUF_LOCK(&so->so_rcv);
 	socantrcvmore_locked(so);
 	mtx_assert(SOCKBUF_MTX(&so->so_rcv), MA_NOTOWNED);
 }
 
 /*
  * Wait for data to arrive at/drain from a socket buffer.
  */
 int
 sbwait(struct sockbuf *sb)
 {
 
 	SOCKBUF_LOCK_ASSERT(sb);
 
 	sb->sb_flags |= SB_WAIT;
 	return (msleep_sbt(&sb->sb_acc, &sb->sb_mtx,
 	    (sb->sb_flags & SB_NOINTR) ? PSOCK : PSOCK | PCATCH, "sbwait",
 	    sb->sb_timeo, 0, 0));
 }
 
 int
 sblock(struct sockbuf *sb, int flags)
 {
 
 	KASSERT((flags & SBL_VALID) == flags,
 	    ("sblock: flags invalid (0x%x)", flags));
 
 	if (flags & SBL_WAIT) {
 		if ((sb->sb_flags & SB_NOINTR) ||
 		    (flags & SBL_NOINTR)) {
 			sx_xlock(&sb->sb_sx);
 			return (0);
 		}
 		return (sx_xlock_sig(&sb->sb_sx));
 	} else {
 		if (sx_try_xlock(&sb->sb_sx) == 0)
 			return (EWOULDBLOCK);
 		return (0);
 	}
 }
 
 void
 sbunlock(struct sockbuf *sb)
 {
 
 	sx_xunlock(&sb->sb_sx);
 }
 
 /*
  * Wakeup processes waiting on a socket buffer.  Do asynchronous notification
  * via SIGIO if the socket has the SS_ASYNC flag set.
  *
  * Called with the socket buffer lock held; will release the lock by the end
  * of the function.  This allows the caller to acquire the socket buffer lock
  * while testing for the need for various sorts of wakeup and hold it through
  * to the point where it's no longer required.  We currently hold the lock
  * through calls out to other subsystems (with the exception of kqueue), and
  * then release it to avoid lock order issues.  It's not clear that's
  * correct.
  */
 void
 sowakeup(struct socket *so, struct sockbuf *sb)
 {
 	int ret;
 
 	SOCKBUF_LOCK_ASSERT(sb);
 
-	selwakeuppri(&sb->sb_sel, PSOCK);
-	if (!SEL_WAITING(&sb->sb_sel))
+	selwakeuppri(sb->sb_sel, PSOCK);
+	if (!SEL_WAITING(sb->sb_sel))
 		sb->sb_flags &= ~SB_SEL;
 	if (sb->sb_flags & SB_WAIT) {
 		sb->sb_flags &= ~SB_WAIT;
 		wakeup(&sb->sb_acc);
 	}
-	KNOTE_LOCKED(&sb->sb_sel.si_note, 0);
+	KNOTE_LOCKED(&sb->sb_sel->si_note, 0);
 	if (sb->sb_upcall != NULL && !(so->so_state & SS_ISDISCONNECTED)) {
 		ret = sb->sb_upcall(so, sb->sb_upcallarg, M_NOWAIT);
 		if (ret == SU_ISCONNECTED) {
 			KASSERT(sb == &so->so_rcv,
 			    ("SO_SND upcall returned SU_ISCONNECTED"));
 			soupcall_clear(so, SO_RCV);
 		}
 	} else
 		ret = SU_OK;
 	if (sb->sb_flags & SB_AIO)
 		sowakeup_aio(so, sb);
 	SOCKBUF_UNLOCK(sb);
 	if (ret == SU_ISCONNECTED)
 		soisconnected(so);
 	if ((so->so_state & SS_ASYNC) && so->so_sigio != NULL)
 		pgsigio(&so->so_sigio, SIGIO, 0);
 	mtx_assert(SOCKBUF_MTX(sb), MA_NOTOWNED);
 }
 
 /*
  * Socket buffer (struct sockbuf) utility routines.
  *
  * Each socket contains two socket buffers: one for sending data and one for
  * receiving data.  Each buffer contains a queue of mbufs, information about
  * the number of mbufs and amount of data in the queue, and other fields
  * allowing select() statements and notification on data availability to be
  * implemented.
  *
  * Data stored in a socket buffer is maintained as a list of records.  Each
  * record is a list of mbufs chained together with the m_next field.  Records
  * are chained together with the m_nextpkt field. The upper level routine
  * soreceive() expects the following conventions to be observed when placing
  * information in the receive buffer:
  *
  * 1. If the protocol requires each message be preceded by the sender's name,
  *    then a record containing that name must be present before any
  *    associated data (mbuf's must be of type MT_SONAME).
  * 2. If the protocol supports the exchange of ``access rights'' (really just
  *    additional data associated with the message), and there are ``rights''
  *    to be received, then a record containing this data should be present
  *    (mbuf's must be of type MT_RIGHTS).
  * 3. If a name or rights record exists, then it must be followed by a data
  *    record, perhaps of zero length.
  *
  * Before using a new socket structure it is first necessary to reserve
  * buffer space to the socket, by calling sbreserve().  This should commit
  * some of the available buffer space in the system buffer pool for the
  * socket (currently, it does nothing but enforce limits).  The space should
  * be released by calling sbrelease() when the socket is destroyed.
  */
 int
 soreserve(struct socket *so, u_long sndcc, u_long rcvcc)
 {
 	struct thread *td = curthread;
 
 	SOCKBUF_LOCK(&so->so_snd);
 	SOCKBUF_LOCK(&so->so_rcv);
 	if (sbreserve_locked(&so->so_snd, sndcc, so, td) == 0)
 		goto bad;
 	if (sbreserve_locked(&so->so_rcv, rcvcc, so, td) == 0)
 		goto bad2;
 	if (so->so_rcv.sb_lowat == 0)
 		so->so_rcv.sb_lowat = 1;
 	if (so->so_snd.sb_lowat == 0)
 		so->so_snd.sb_lowat = MCLBYTES;
 	if (so->so_snd.sb_lowat > so->so_snd.sb_hiwat)
 		so->so_snd.sb_lowat = so->so_snd.sb_hiwat;
 	SOCKBUF_UNLOCK(&so->so_rcv);
 	SOCKBUF_UNLOCK(&so->so_snd);
 	return (0);
 bad2:
 	sbrelease_locked(&so->so_snd, so);
 bad:
 	SOCKBUF_UNLOCK(&so->so_rcv);
 	SOCKBUF_UNLOCK(&so->so_snd);
 	return (ENOBUFS);
 }
 
 static int
 sysctl_handle_sb_max(SYSCTL_HANDLER_ARGS)
 {
 	int error = 0;
 	u_long tmp_sb_max = sb_max;
 
 	error = sysctl_handle_long(oidp, &tmp_sb_max, arg2, req);
 	if (error || !req->newptr)
 		return (error);
 	if (tmp_sb_max < MSIZE + MCLBYTES)
 		return (EINVAL);
 	sb_max = tmp_sb_max;
 	sb_max_adj = (u_quad_t)sb_max * MCLBYTES / (MSIZE + MCLBYTES);
 	return (0);
 }
 	
 /*
  * Allot mbufs to a sockbuf.  Attempt to scale mbmax so that mbcnt doesn't
  * become limiting if buffering efficiency is near the normal case.
  */
 int
 sbreserve_locked(struct sockbuf *sb, u_long cc, struct socket *so,
     struct thread *td)
 {
 	rlim_t sbsize_limit;
 
 	SOCKBUF_LOCK_ASSERT(sb);
 
 	/*
 	 * When a thread is passed, we take into account the thread's socket
 	 * buffer size limit.  The caller will generally pass curthread, but
 	 * in the TCP input path, NULL will be passed to indicate that no
 	 * appropriate thread resource limits are available.  In that case,
 	 * we don't apply a process limit.
 	 */
 	if (cc > sb_max_adj)
 		return (0);
 	if (td != NULL) {
 		sbsize_limit = lim_cur(td, RLIMIT_SBSIZE);
 	} else
 		sbsize_limit = RLIM_INFINITY;
 	if (!chgsbsize(so->so_cred->cr_uidinfo, &sb->sb_hiwat, cc,
 	    sbsize_limit))
 		return (0);
 	sb->sb_mbmax = min(cc * sb_efficiency, sb_max);
 	if (sb->sb_lowat > sb->sb_hiwat)
 		sb->sb_lowat = sb->sb_hiwat;
 	return (1);
 }
 
 int
 sbreserve(struct sockbuf *sb, u_long cc, struct socket *so, 
     struct thread *td)
 {
 	int error;
 
 	SOCKBUF_LOCK(sb);
 	error = sbreserve_locked(sb, cc, so, td);
 	SOCKBUF_UNLOCK(sb);
 	return (error);
 }
 
 /*
  * Free mbufs held by a socket, and reserved mbuf space.
  */
 void
 sbrelease_internal(struct sockbuf *sb, struct socket *so)
 {
 
 	sbflush_internal(sb);
 	(void)chgsbsize(so->so_cred->cr_uidinfo, &sb->sb_hiwat, 0,
 	    RLIM_INFINITY);
 	sb->sb_mbmax = 0;
 }
 
 void
 sbrelease_locked(struct sockbuf *sb, struct socket *so)
 {
 
 	SOCKBUF_LOCK_ASSERT(sb);
 
 	sbrelease_internal(sb, so);
 }
 
 void
 sbrelease(struct sockbuf *sb, struct socket *so)
 {
 
 	SOCKBUF_LOCK(sb);
 	sbrelease_locked(sb, so);
 	SOCKBUF_UNLOCK(sb);
 }
 
 void
 sbdestroy(struct sockbuf *sb, struct socket *so)
 {
 
 	sbrelease_internal(sb, so);
 }
 
 /*
  * Routines to add and remove data from an mbuf queue.
  *
  * The routines sbappend() or sbappendrecord() are normally called to append
  * new mbufs to a socket buffer, after checking that adequate space is
  * available, comparing the function sbspace() with the amount of data to be
  * added.  sbappendrecord() differs from sbappend() in that data supplied is
  * treated as the beginning of a new record.  To place a sender's address,
  * optional access rights, and data in a socket receive buffer,
  * sbappendaddr() should be used.  To place access rights and data in a
  * socket receive buffer, sbappendrights() should be used.  In either case,
  * the new data begins a new record.  Note that unlike sbappend() and
  * sbappendrecord(), these routines check for the caller that there will be
  * enough space to store the data.  Each fails if there is not enough space,
  * or if it cannot find mbufs to store additional information in.
  *
  * Reliable protocols may use the socket send buffer to hold data awaiting
  * acknowledgement.  Data is normally copied from a socket send buffer in a
  * protocol with m_copy for output to a peer, and then removing the data from
  * the socket buffer with sbdrop() or sbdroprecord() when the data is
  * acknowledged by the peer.
  */
 #ifdef SOCKBUF_DEBUG
 void
 sblastrecordchk(struct sockbuf *sb, const char *file, int line)
 {
 	struct mbuf *m = sb->sb_mb;
 
 	SOCKBUF_LOCK_ASSERT(sb);
 
 	while (m && m->m_nextpkt)
 		m = m->m_nextpkt;
 
 	if (m != sb->sb_lastrecord) {
 		printf("%s: sb_mb %p sb_lastrecord %p last %p\n",
 			__func__, sb->sb_mb, sb->sb_lastrecord, m);
 		printf("packet chain:\n");
 		for (m = sb->sb_mb; m != NULL; m = m->m_nextpkt)
 			printf("\t%p\n", m);
 		panic("%s from %s:%u", __func__, file, line);
 	}
 }
 
 void
 sblastmbufchk(struct sockbuf *sb, const char *file, int line)
 {
 	struct mbuf *m = sb->sb_mb;
 	struct mbuf *n;
 
 	SOCKBUF_LOCK_ASSERT(sb);
 
 	while (m && m->m_nextpkt)
 		m = m->m_nextpkt;
 
 	while (m && m->m_next)
 		m = m->m_next;
 
 	if (m != sb->sb_mbtail) {
 		printf("%s: sb_mb %p sb_mbtail %p last %p\n",
 			__func__, sb->sb_mb, sb->sb_mbtail, m);
 		printf("packet tree:\n");
 		for (m = sb->sb_mb; m != NULL; m = m->m_nextpkt) {
 			printf("\t");
 			for (n = m; n != NULL; n = n->m_next)
 				printf("%p ", n);
 			printf("\n");
 		}
 		panic("%s from %s:%u", __func__, file, line);
 	}
 }
 #endif /* SOCKBUF_DEBUG */
 
 #define SBLINKRECORD(sb, m0) do {					\
 	SOCKBUF_LOCK_ASSERT(sb);					\
 	if ((sb)->sb_lastrecord != NULL)				\
 		(sb)->sb_lastrecord->m_nextpkt = (m0);			\
 	else								\
 		(sb)->sb_mb = (m0);					\
 	(sb)->sb_lastrecord = (m0);					\
 } while (/*CONSTCOND*/0)
 
 /*
  * Append mbuf chain m to the last record in the socket buffer sb.  The
  * additional space associated the mbuf chain is recorded in sb.  Empty mbufs
  * are discarded and mbufs are compacted where possible.
  */
 void
 sbappend_locked(struct sockbuf *sb, struct mbuf *m, int flags)
 {
 	struct mbuf *n;
 
 	SOCKBUF_LOCK_ASSERT(sb);
 
 	if (m == NULL)
 		return;
 	sbm_clrprotoflags(m, flags);
 	SBLASTRECORDCHK(sb);
 	n = sb->sb_mb;
 	if (n) {
 		while (n->m_nextpkt)
 			n = n->m_nextpkt;
 		do {
 			if (n->m_flags & M_EOR) {
 				sbappendrecord_locked(sb, m); /* XXXXXX!!!! */
 				return;
 			}
 		} while (n->m_next && (n = n->m_next));
 	} else {
 		/*
 		 * XXX Would like to simply use sb_mbtail here, but
 		 * XXX I need to verify that I won't miss an EOR that
 		 * XXX way.
 		 */
 		if ((n = sb->sb_lastrecord) != NULL) {
 			do {
 				if (n->m_flags & M_EOR) {
 					sbappendrecord_locked(sb, m); /* XXXXXX!!!! */
 					return;
 				}
 			} while (n->m_next && (n = n->m_next));
 		} else {
 			/*
 			 * If this is the first record in the socket buffer,
 			 * it's also the last record.
 			 */
 			sb->sb_lastrecord = m;
 		}
 	}
 	sbcompress(sb, m, n);
 	SBLASTRECORDCHK(sb);
 }
 
 /*
  * Append mbuf chain m to the last record in the socket buffer sb.  The
  * additional space associated the mbuf chain is recorded in sb.  Empty mbufs
  * are discarded and mbufs are compacted where possible.
  */
 void
 sbappend(struct sockbuf *sb, struct mbuf *m, int flags)
 {
 
 	SOCKBUF_LOCK(sb);
 	sbappend_locked(sb, m, flags);
 	SOCKBUF_UNLOCK(sb);
 }
 
 /*
  * This version of sbappend() should only be used when the caller absolutely
  * knows that there will never be more than one record in the socket buffer,
  * that is, a stream protocol (such as TCP).
  */
 void
 sbappendstream_locked(struct sockbuf *sb, struct mbuf *m, int flags)
 {
 	SOCKBUF_LOCK_ASSERT(sb);
 
 	KASSERT(m->m_nextpkt == NULL,("sbappendstream 0"));
 	KASSERT(sb->sb_mb == sb->sb_lastrecord,("sbappendstream 1"));
 
 	SBLASTMBUFCHK(sb);
 
 	/* Remove all packet headers and mbuf tags to get a pure data chain. */
 	m_demote(m, 1, flags & PRUS_NOTREADY ? M_NOTREADY : 0);
 
 	sbcompress(sb, m, sb->sb_mbtail);
 
 	sb->sb_lastrecord = sb->sb_mb;
 	SBLASTRECORDCHK(sb);
 }
 
 /*
  * This version of sbappend() should only be used when the caller absolutely
  * knows that there will never be more than one record in the socket buffer,
  * that is, a stream protocol (such as TCP).
  */
 void
 sbappendstream(struct sockbuf *sb, struct mbuf *m, int flags)
 {
 
 	SOCKBUF_LOCK(sb);
 	sbappendstream_locked(sb, m, flags);
 	SOCKBUF_UNLOCK(sb);
 }
 
 #ifdef SOCKBUF_DEBUG
 void
 sbcheck(struct sockbuf *sb, const char *file, int line)
 {
 	struct mbuf *m, *n, *fnrdy;
 	u_long acc, ccc, mbcnt;
 
 	SOCKBUF_LOCK_ASSERT(sb);
 
 	acc = ccc = mbcnt = 0;
 	fnrdy = NULL;
 
 	for (m = sb->sb_mb; m; m = n) {
 	    n = m->m_nextpkt;
 	    for (; m; m = m->m_next) {
 		if (m->m_len == 0) {
 			printf("sb %p empty mbuf %p\n", sb, m);
 			goto fail;
 		}
 		if ((m->m_flags & M_NOTREADY) && fnrdy == NULL) {
 			if (m != sb->sb_fnrdy) {
 				printf("sb %p: fnrdy %p != m %p\n",
 				    sb, sb->sb_fnrdy, m);
 				goto fail;
 			}
 			fnrdy = m;
 		}
 		if (fnrdy) {
 			if (!(m->m_flags & M_NOTAVAIL)) {
 				printf("sb %p: fnrdy %p, m %p is avail\n",
 				    sb, sb->sb_fnrdy, m);
 				goto fail;
 			}
 		} else
 			acc += m->m_len;
 		ccc += m->m_len;
 		mbcnt += MSIZE;
 		if (m->m_flags & M_EXT) /*XXX*/ /* pretty sure this is bogus */
 			mbcnt += m->m_ext.ext_size;
 	    }
 	}
 	if (acc != sb->sb_acc || ccc != sb->sb_ccc || mbcnt != sb->sb_mbcnt) {
 		printf("acc %ld/%u ccc %ld/%u mbcnt %ld/%u\n",
 		    acc, sb->sb_acc, ccc, sb->sb_ccc, mbcnt, sb->sb_mbcnt);
 		goto fail;
 	}
 	return;
 fail:
 	panic("%s from %s:%u", __func__, file, line);
 }
 #endif
 
 /*
  * As above, except the mbuf chain begins a new record.
  */
 void
 sbappendrecord_locked(struct sockbuf *sb, struct mbuf *m0)
 {
 	struct mbuf *m;
 
 	SOCKBUF_LOCK_ASSERT(sb);
 
 	if (m0 == NULL)
 		return;
 	m_clrprotoflags(m0);
 	/*
 	 * Put the first mbuf on the queue.  Note this permits zero length
 	 * records.
 	 */
 	sballoc(sb, m0);
 	SBLASTRECORDCHK(sb);
 	SBLINKRECORD(sb, m0);
 	sb->sb_mbtail = m0;
 	m = m0->m_next;
 	m0->m_next = 0;
 	if (m && (m0->m_flags & M_EOR)) {
 		m0->m_flags &= ~M_EOR;
 		m->m_flags |= M_EOR;
 	}
 	/* always call sbcompress() so it can do SBLASTMBUFCHK() */
 	sbcompress(sb, m, m0);
 }
 
 /*
  * As above, except the mbuf chain begins a new record.
  */
 void
 sbappendrecord(struct sockbuf *sb, struct mbuf *m0)
 {
 
 	SOCKBUF_LOCK(sb);
 	sbappendrecord_locked(sb, m0);
 	SOCKBUF_UNLOCK(sb);
 }
 
 /* Helper routine that appends data, control, and address to a sockbuf. */
 static int
 sbappendaddr_locked_internal(struct sockbuf *sb, const struct sockaddr *asa,
     struct mbuf *m0, struct mbuf *control, struct mbuf *ctrl_last)
 {
 	struct mbuf *m, *n, *nlast;
 #if MSIZE <= 256
 	if (asa->sa_len > MLEN)
 		return (0);
 #endif
 	m = m_get(M_NOWAIT, MT_SONAME);
 	if (m == NULL)
 		return (0);
 	m->m_len = asa->sa_len;
 	bcopy(asa, mtod(m, caddr_t), asa->sa_len);
 	if (m0) {
 		m_clrprotoflags(m0);
 		m_tag_delete_chain(m0, NULL);
 		/*
 		 * Clear some persistent info from pkthdr.
 		 * We don't use m_demote(), because some netgraph consumers
 		 * expect M_PKTHDR presence.
 		 */
 		m0->m_pkthdr.rcvif = NULL;
 		m0->m_pkthdr.flowid = 0;
 		m0->m_pkthdr.csum_flags = 0;
 		m0->m_pkthdr.fibnum = 0;
 		m0->m_pkthdr.rsstype = 0;
 	}
 	if (ctrl_last)
 		ctrl_last->m_next = m0;	/* concatenate data to control */
 	else
 		control = m0;
 	m->m_next = control;
 	for (n = m; n->m_next != NULL; n = n->m_next)
 		sballoc(sb, n);
 	sballoc(sb, n);
 	nlast = n;
 	SBLINKRECORD(sb, m);
 
 	sb->sb_mbtail = nlast;
 	SBLASTMBUFCHK(sb);
 
 	SBLASTRECORDCHK(sb);
 	return (1);
 }
 
 /*
  * Append address and data, and optionally, control (ancillary) data to the
  * receive queue of a socket.  If present, m0 must include a packet header
  * with total length.  Returns 0 if no space in sockbuf or insufficient
  * mbufs.
  */
 int
 sbappendaddr_locked(struct sockbuf *sb, const struct sockaddr *asa,
     struct mbuf *m0, struct mbuf *control)
 {
 	struct mbuf *ctrl_last;
 	int space = asa->sa_len;
 
 	SOCKBUF_LOCK_ASSERT(sb);
 
 	if (m0 && (m0->m_flags & M_PKTHDR) == 0)
 		panic("sbappendaddr_locked");
 	if (m0)
 		space += m0->m_pkthdr.len;
 	space += m_length(control, &ctrl_last);
 
 	if (space > sbspace(sb))
 		return (0);
 	return (sbappendaddr_locked_internal(sb, asa, m0, control, ctrl_last));
 }
 
 /*
  * Append address and data, and optionally, control (ancillary) data to the
  * receive queue of a socket.  If present, m0 must include a packet header
  * with total length.  Returns 0 if insufficient mbufs.  Does not validate space
  * on the receiving sockbuf.
  */
 int
 sbappendaddr_nospacecheck_locked(struct sockbuf *sb, const struct sockaddr *asa,
     struct mbuf *m0, struct mbuf *control)
 {
 	struct mbuf *ctrl_last;
 
 	SOCKBUF_LOCK_ASSERT(sb);
 
 	ctrl_last = (control == NULL) ? NULL : m_last(control);
 	return (sbappendaddr_locked_internal(sb, asa, m0, control, ctrl_last));
 }
 
 /*
  * Append address and data, and optionally, control (ancillary) data to the
  * receive queue of a socket.  If present, m0 must include a packet header
  * with total length.  Returns 0 if no space in sockbuf or insufficient
  * mbufs.
  */
 int
 sbappendaddr(struct sockbuf *sb, const struct sockaddr *asa,
     struct mbuf *m0, struct mbuf *control)
 {
 	int retval;
 
 	SOCKBUF_LOCK(sb);
 	retval = sbappendaddr_locked(sb, asa, m0, control);
 	SOCKBUF_UNLOCK(sb);
 	return (retval);
 }
 
 int
 sbappendcontrol_locked(struct sockbuf *sb, struct mbuf *m0,
     struct mbuf *control)
 {
 	struct mbuf *m, *n, *mlast;
 	int space;
 
 	SOCKBUF_LOCK_ASSERT(sb);
 
 	if (control == NULL)
 		panic("sbappendcontrol_locked");
 	space = m_length(control, &n) + m_length(m0, NULL);
 
 	if (space > sbspace(sb))
 		return (0);
 	m_clrprotoflags(m0);
 	n->m_next = m0;			/* concatenate data to control */
 
 	SBLASTRECORDCHK(sb);
 
 	for (m = control; m->m_next; m = m->m_next)
 		sballoc(sb, m);
 	sballoc(sb, m);
 	mlast = m;
 	SBLINKRECORD(sb, control);
 
 	sb->sb_mbtail = mlast;
 	SBLASTMBUFCHK(sb);
 
 	SBLASTRECORDCHK(sb);
 	return (1);
 }
 
 int
 sbappendcontrol(struct sockbuf *sb, struct mbuf *m0, struct mbuf *control)
 {
 	int retval;
 
 	SOCKBUF_LOCK(sb);
 	retval = sbappendcontrol_locked(sb, m0, control);
 	SOCKBUF_UNLOCK(sb);
 	return (retval);
 }
 
 /*
  * Append the data in mbuf chain (m) into the socket buffer sb following mbuf
  * (n).  If (n) is NULL, the buffer is presumed empty.
  *
  * When the data is compressed, mbufs in the chain may be handled in one of
  * three ways:
  *
  * (1) The mbuf may simply be dropped, if it contributes nothing (no data, no
  *     record boundary, and no change in data type).
  *
  * (2) The mbuf may be coalesced -- i.e., data in the mbuf may be copied into
  *     an mbuf already in the socket buffer.  This can occur if an
  *     appropriate mbuf exists, there is room, both mbufs are not marked as
  *     not ready, and no merging of data types will occur.
  *
  * (3) The mbuf may be appended to the end of the existing mbuf chain.
  *
  * If any of the new mbufs is marked as M_EOR, mark the last mbuf appended as
  * end-of-record.
  */
 void
 sbcompress(struct sockbuf *sb, struct mbuf *m, struct mbuf *n)
 {
 	int eor = 0;
 	struct mbuf *o;
 
 	SOCKBUF_LOCK_ASSERT(sb);
 
 	while (m) {
 		eor |= m->m_flags & M_EOR;
 		if (m->m_len == 0 &&
 		    (eor == 0 ||
 		     (((o = m->m_next) || (o = n)) &&
 		      o->m_type == m->m_type))) {
 			if (sb->sb_lastrecord == m)
 				sb->sb_lastrecord = m->m_next;
 			m = m_free(m);
 			continue;
 		}
 		if (n && (n->m_flags & M_EOR) == 0 &&
 		    M_WRITABLE(n) &&
 		    ((sb->sb_flags & SB_NOCOALESCE) == 0) &&
 		    !(m->m_flags & M_NOTREADY) &&
 		    !(n->m_flags & M_NOTREADY) &&
 		    m->m_len <= MCLBYTES / 4 && /* XXX: Don't copy too much */
 		    m->m_len <= M_TRAILINGSPACE(n) &&
 		    n->m_type == m->m_type) {
 			bcopy(mtod(m, caddr_t), mtod(n, caddr_t) + n->m_len,
 			    (unsigned)m->m_len);
 			n->m_len += m->m_len;
 			sb->sb_ccc += m->m_len;
 			if (sb->sb_fnrdy == NULL)
 				sb->sb_acc += m->m_len;
 			if (m->m_type != MT_DATA && m->m_type != MT_OOBDATA)
 				/* XXX: Probably don't need.*/
 				sb->sb_ctl += m->m_len;
 			m = m_free(m);
 			continue;
 		}
 		if (n)
 			n->m_next = m;
 		else
 			sb->sb_mb = m;
 		sb->sb_mbtail = m;
 		sballoc(sb, m);
 		n = m;
 		m->m_flags &= ~M_EOR;
 		m = m->m_next;
 		n->m_next = 0;
 	}
 	if (eor) {
 		KASSERT(n != NULL, ("sbcompress: eor && n == NULL"));
 		n->m_flags |= eor;
 	}
 	SBLASTMBUFCHK(sb);
 }
 
 /*
  * Free all mbufs in a sockbuf.  Check that all resources are reclaimed.
  */
 static void
 sbflush_internal(struct sockbuf *sb)
 {
 
 	while (sb->sb_mbcnt) {
 		/*
 		 * Don't call sbcut(sb, 0) if the leading mbuf is non-empty:
 		 * we would loop forever. Panic instead.
 		 */
 		if (sb->sb_ccc == 0 && (sb->sb_mb == NULL || sb->sb_mb->m_len))
 			break;
 		m_freem(sbcut_internal(sb, (int)sb->sb_ccc));
 	}
 	KASSERT(sb->sb_ccc == 0 && sb->sb_mb == 0 && sb->sb_mbcnt == 0,
 	    ("%s: ccc %u mb %p mbcnt %u", __func__,
 	    sb->sb_ccc, (void *)sb->sb_mb, sb->sb_mbcnt));
 }
 
 void
 sbflush_locked(struct sockbuf *sb)
 {
 
 	SOCKBUF_LOCK_ASSERT(sb);
 	sbflush_internal(sb);
 }
 
 void
 sbflush(struct sockbuf *sb)
 {
 
 	SOCKBUF_LOCK(sb);
 	sbflush_locked(sb);
 	SOCKBUF_UNLOCK(sb);
 }
 
 /*
  * Cut data from (the front of) a sockbuf.
  */
 static struct mbuf *
 sbcut_internal(struct sockbuf *sb, int len)
 {
 	struct mbuf *m, *next, *mfree;
 
 	KASSERT(len >= 0, ("%s: len is %d but it is supposed to be >= 0",
 	    __func__, len));
 	KASSERT(len <= sb->sb_ccc, ("%s: len: %d is > ccc: %u",
 	    __func__, len, sb->sb_ccc));
 
 	next = (m = sb->sb_mb) ? m->m_nextpkt : 0;
 	mfree = NULL;
 
 	while (len > 0) {
 		if (m == NULL) {
 			KASSERT(next, ("%s: no next, len %d", __func__, len));
 			m = next;
 			next = m->m_nextpkt;
 		}
 		if (m->m_len > len) {
 			KASSERT(!(m->m_flags & M_NOTAVAIL),
 			    ("%s: m %p M_NOTAVAIL", __func__, m));
 			m->m_len -= len;
 			m->m_data += len;
 			sb->sb_ccc -= len;
 			sb->sb_acc -= len;
 			if (sb->sb_sndptroff != 0)
 				sb->sb_sndptroff -= len;
 			if (m->m_type != MT_DATA && m->m_type != MT_OOBDATA)
 				sb->sb_ctl -= len;
 			break;
 		}
 		len -= m->m_len;
 		sbfree(sb, m);
 		/*
 		 * Do not put M_NOTREADY buffers to the free list, they
 		 * are referenced from outside.
 		 */
 		if (m->m_flags & M_NOTREADY)
 			m = m->m_next;
 		else {
 			struct mbuf *n;
 
 			n = m->m_next;
 			m->m_next = mfree;
 			mfree = m;
 			m = n;
 		}
 	}
 	/*
 	 * Free any zero-length mbufs from the buffer.
 	 * For SOCK_DGRAM sockets such mbufs represent empty records.
 	 * XXX: For SOCK_STREAM sockets such mbufs can appear in the buffer,
 	 * when sosend_generic() needs to send only control data.
 	 */
 	while (m && m->m_len == 0) {
 		struct mbuf *n;
 
 		sbfree(sb, m);
 		n = m->m_next;
 		m->m_next = mfree;
 		mfree = m;
 		m = n;
 	}
 	if (m) {
 		sb->sb_mb = m;
 		m->m_nextpkt = next;
 	} else
 		sb->sb_mb = next;
 	/*
 	 * First part is an inline SB_EMPTY_FIXUP().  Second part makes sure
 	 * sb_lastrecord is up-to-date if we dropped part of the last record.
 	 */
 	m = sb->sb_mb;
 	if (m == NULL) {
 		sb->sb_mbtail = NULL;
 		sb->sb_lastrecord = NULL;
 	} else if (m->m_nextpkt == NULL) {
 		sb->sb_lastrecord = m;
 	}
 
 	return (mfree);
 }
 
 /*
  * Drop data from (the front of) a sockbuf.
  */
 void
 sbdrop_locked(struct sockbuf *sb, int len)
 {
 
 	SOCKBUF_LOCK_ASSERT(sb);
 	m_freem(sbcut_internal(sb, len));
 }
 
 /*
  * Drop data from (the front of) a sockbuf,
  * and return it to caller.
  */
 struct mbuf *
 sbcut_locked(struct sockbuf *sb, int len)
 {
 
 	SOCKBUF_LOCK_ASSERT(sb);
 	return (sbcut_internal(sb, len));
 }
 
 void
 sbdrop(struct sockbuf *sb, int len)
 {
 	struct mbuf *mfree;
 
 	SOCKBUF_LOCK(sb);
 	mfree = sbcut_internal(sb, len);
 	SOCKBUF_UNLOCK(sb);
 
 	m_freem(mfree);
 }
 
 /*
  * Maintain a pointer and offset pair into the socket buffer mbuf chain to
  * avoid traversal of the entire socket buffer for larger offsets.
  */
 struct mbuf *
 sbsndptr(struct sockbuf *sb, u_int off, u_int len, u_int *moff)
 {
 	struct mbuf *m, *ret;
 
 	KASSERT(sb->sb_mb != NULL, ("%s: sb_mb is NULL", __func__));
 	KASSERT(off + len <= sb->sb_acc, ("%s: beyond sb", __func__));
 	KASSERT(sb->sb_sndptroff <= sb->sb_acc, ("%s: sndptroff broken", __func__));
 
 	/*
 	 * Is off below stored offset? Happens on retransmits.
 	 * Just return, we can't help here.
 	 */
 	if (sb->sb_sndptroff > off) {
 		*moff = off;
 		return (sb->sb_mb);
 	}
 
 	/* Return closest mbuf in chain for current offset. */
 	*moff = off - sb->sb_sndptroff;
 	m = ret = sb->sb_sndptr ? sb->sb_sndptr : sb->sb_mb;
 	if (*moff == m->m_len) {
 		*moff = 0;
 		sb->sb_sndptroff += m->m_len;
 		m = ret = m->m_next;
 		KASSERT(ret->m_len > 0,
 		    ("mbuf %p in sockbuf %p chain has no valid data", ret, sb));
 	}
 
 	/* Advance by len to be as close as possible for the next transmit. */
 	for (off = off - sb->sb_sndptroff + len - 1;
 	     off > 0 && m != NULL && off >= m->m_len;
 	     m = m->m_next) {
 		sb->sb_sndptroff += m->m_len;
 		off -= m->m_len;
 	}
 	if (off > 0 && m == NULL)
 		panic("%s: sockbuf %p and mbuf %p clashing", __func__, sb, ret);
 	sb->sb_sndptr = m;
 
 	return (ret);
 }
 
 /*
  * Return the first mbuf and the mbuf data offset for the provided
  * send offset without changing the "sb_sndptroff" field.
  */
 struct mbuf *
 sbsndmbuf(struct sockbuf *sb, u_int off, u_int *moff)
 {
 	struct mbuf *m;
 
 	KASSERT(sb->sb_mb != NULL, ("%s: sb_mb is NULL", __func__));
 
 	/*
 	 * If the "off" is below the stored offset, which happens on
 	 * retransmits, just use "sb_mb":
 	 */
 	if (sb->sb_sndptr == NULL || sb->sb_sndptroff > off) {
 		m = sb->sb_mb;
 	} else {
 		m = sb->sb_sndptr;
 		off -= sb->sb_sndptroff;
 	}
 	while (off > 0 && m != NULL) {
 		if (off < m->m_len)
 			break;
 		off -= m->m_len;
 		m = m->m_next;
 	}
 	*moff = off;
 	return (m);
 }
 
 /*
  * Drop a record off the front of a sockbuf and move the next record to the
  * front.
  */
 void
 sbdroprecord_locked(struct sockbuf *sb)
 {
 	struct mbuf *m;
 
 	SOCKBUF_LOCK_ASSERT(sb);
 
 	m = sb->sb_mb;
 	if (m) {
 		sb->sb_mb = m->m_nextpkt;
 		do {
 			sbfree(sb, m);
 			m = m_free(m);
 		} while (m);
 	}
 	SB_EMPTY_FIXUP(sb);
 }
 
 /*
  * Drop a record off the front of a sockbuf and move the next record to the
  * front.
  */
 void
 sbdroprecord(struct sockbuf *sb)
 {
 
 	SOCKBUF_LOCK(sb);
 	sbdroprecord_locked(sb);
 	SOCKBUF_UNLOCK(sb);
 }
 
 /*
  * Create a "control" mbuf containing the specified data with the specified
  * type for presentation on a socket buffer.
  */
 struct mbuf *
 sbcreatecontrol(caddr_t p, int size, int type, int level)
 {
 	struct cmsghdr *cp;
 	struct mbuf *m;
 
 	if (CMSG_SPACE((u_int)size) > MCLBYTES)
 		return ((struct mbuf *) NULL);
 	if (CMSG_SPACE((u_int)size) > MLEN)
 		m = m_getcl(M_NOWAIT, MT_CONTROL, 0);
 	else
 		m = m_get(M_NOWAIT, MT_CONTROL);
 	if (m == NULL)
 		return ((struct mbuf *) NULL);
 	cp = mtod(m, struct cmsghdr *);
 	m->m_len = 0;
 	KASSERT(CMSG_SPACE((u_int)size) <= M_TRAILINGSPACE(m),
 	    ("sbcreatecontrol: short mbuf"));
 	/*
 	 * Don't leave the padding between the msg header and the
 	 * cmsg data and the padding after the cmsg data un-initialized.
 	 */
 	bzero(cp, CMSG_SPACE((u_int)size));
 	if (p != NULL)
 		(void)memcpy(CMSG_DATA(cp), p, size);
 	m->m_len = CMSG_SPACE(size);
 	cp->cmsg_len = CMSG_LEN(size);
 	cp->cmsg_level = level;
 	cp->cmsg_type = type;
 	return (m);
 }
 
 /*
  * This does the same for socket buffers that sotoxsocket does for sockets:
  * generate an user-format data structure describing the socket buffer.  Note
  * that the xsockbuf structure, since it is always embedded in a socket, does
  * not include a self pointer nor a length.  We make this entry point public
  * in case some other mechanism needs it.
  */
 void
 sbtoxsockbuf(struct sockbuf *sb, struct xsockbuf *xsb)
 {
 
 	xsb->sb_cc = sb->sb_ccc;
 	xsb->sb_hiwat = sb->sb_hiwat;
 	xsb->sb_mbcnt = sb->sb_mbcnt;
 	xsb->sb_mcnt = sb->sb_mcnt;	
 	xsb->sb_ccnt = sb->sb_ccnt;
 	xsb->sb_mbmax = sb->sb_mbmax;
 	xsb->sb_lowat = sb->sb_lowat;
 	xsb->sb_flags = sb->sb_flags;
 	xsb->sb_timeo = sb->sb_timeo;
 }
 
 /* This takes the place of kern.maxsockbuf, which moved to kern.ipc. */
 static int dummy;
 SYSCTL_INT(_kern, KERN_DUMMY, dummy, CTLFLAG_RW, &dummy, 0, "");
 SYSCTL_OID(_kern_ipc, KIPC_MAXSOCKBUF, maxsockbuf, CTLTYPE_ULONG|CTLFLAG_RW,
     &sb_max, 0, sysctl_handle_sb_max, "LU", "Maximum socket buffer size");
 SYSCTL_ULONG(_kern_ipc, KIPC_SOCKBUF_WASTE, sockbuf_waste_factor, CTLFLAG_RW,
     &sb_efficiency, 0, "Socket buffer size waste factor");
Index: head/sys/kern/uipc_socket.c
===================================================================
--- head/sys/kern/uipc_socket.c	(revision 319721)
+++ head/sys/kern/uipc_socket.c	(revision 319722)
@@ -1,3777 +1,4124 @@
 /*-
  * Copyright (c) 1982, 1986, 1988, 1990, 1993
  *	The Regents of the University of California.
  * Copyright (c) 2004 The FreeBSD Foundation
  * Copyright (c) 2004-2008 Robert N. M. Watson
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 3. Neither the name of the University nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  *	@(#)uipc_socket.c	8.3 (Berkeley) 4/15/94
  */
 
 /*
  * Comments on the socket life cycle:
  *
  * soalloc() sets of socket layer state for a socket, called only by
  * socreate() and sonewconn().  Socket layer private.
  *
  * sodealloc() tears down socket layer state for a socket, called only by
  * sofree() and sonewconn().  Socket layer private.
  *
  * pru_attach() associates protocol layer state with an allocated socket;
  * called only once, may fail, aborting socket allocation.  This is called
  * from socreate() and sonewconn().  Socket layer private.
  *
  * pru_detach() disassociates protocol layer state from an attached socket,
  * and will be called exactly once for sockets in which pru_attach() has
  * been successfully called.  If pru_attach() returned an error,
  * pru_detach() will not be called.  Socket layer private.
  *
  * pru_abort() and pru_close() notify the protocol layer that the last
  * consumer of a socket is starting to tear down the socket, and that the
  * protocol should terminate the connection.  Historically, pru_abort() also
  * detached protocol state from the socket state, but this is no longer the
  * case.
  *
  * socreate() creates a socket and attaches protocol state.  This is a public
  * interface that may be used by socket layer consumers to create new
  * sockets.
  *
  * sonewconn() creates a socket and attaches protocol state.  This is a
  * public interface  that may be used by protocols to create new sockets when
  * a new connection is received and will be available for accept() on a
  * listen socket.
  *
  * soclose() destroys a socket after possibly waiting for it to disconnect.
  * This is a public interface that socket consumers should use to close and
  * release a socket when done with it.
  *
  * soabort() destroys a socket without waiting for it to disconnect (used
  * only for incoming connections that are already partially or fully
  * connected).  This is used internally by the socket layer when clearing
  * listen socket queues (due to overflow or close on the listen socket), but
  * is also a public interface protocols may use to abort connections in
  * their incomplete listen queues should they no longer be required.  Sockets
  * placed in completed connection listen queues should not be aborted for
  * reasons described in the comment above the soclose() implementation.  This
  * is not a general purpose close routine, and except in the specific
  * circumstances described here, should not be used.
  *
  * sofree() will free a socket and its protocol state if all references on
  * the socket have been released, and is the public interface to attempt to
  * free a socket when a reference is removed.  This is a socket layer private
  * interface.
  *
  * NOTE: In addition to socreate() and soclose(), which provide a single
  * socket reference to the consumer to be managed as required, there are two
  * calls to explicitly manage socket references, soref(), and sorele().
  * Currently, these are generally required only when transitioning a socket
  * from a listen queue to a file descriptor, in order to prevent garbage
  * collection of the socket at an untimely moment.  For a number of reasons,
  * these interfaces are not preferred, and should be avoided.
  *
  * NOTE: With regard to VNETs the general rule is that callers do not set
  * curvnet. Exceptions to this rule include soabort(), sodisconnect(),
  * sofree() (and with that sorele(), sotryfree()), as well as sonewconn()
  * and sorflush(), which are usually called from a pre-set VNET context.
  * sopoll() currently does not need a VNET context to be set.
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include "opt_inet.h"
 #include "opt_inet6.h"
 #include "opt_compat.h"
+#include "opt_sctp.h"
 
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/fcntl.h>
 #include <sys/limits.h>
 #include <sys/lock.h>
 #include <sys/mac.h>
 #include <sys/malloc.h>
 #include <sys/mbuf.h>
 #include <sys/mutex.h>
 #include <sys/domain.h>
 #include <sys/file.h>			/* for struct knote */
 #include <sys/hhook.h>
 #include <sys/kernel.h>
 #include <sys/khelp.h>
 #include <sys/event.h>
 #include <sys/eventhandler.h>
 #include <sys/poll.h>
 #include <sys/proc.h>
 #include <sys/protosw.h>
 #include <sys/socket.h>
 #include <sys/socketvar.h>
 #include <sys/resourcevar.h>
 #include <net/route.h>
 #include <sys/signalvar.h>
 #include <sys/stat.h>
 #include <sys/sx.h>
 #include <sys/sysctl.h>
 #include <sys/taskqueue.h>
 #include <sys/uio.h>
 #include <sys/jail.h>
 #include <sys/syslog.h>
 #include <netinet/in.h>
 
 #include <net/vnet.h>
 
 #include <security/mac/mac_framework.h>
 
 #include <vm/uma.h>
 
 #ifdef COMPAT_FREEBSD32
 #include <sys/mount.h>
 #include <sys/sysent.h>
 #include <compat/freebsd32/freebsd32.h>
 #endif
 
 static int	soreceive_rcvoob(struct socket *so, struct uio *uio,
 		    int flags);
+static void	so_rdknl_lock(void *);
+static void	so_rdknl_unlock(void *);
+static void	so_rdknl_assert_locked(void *);
+static void	so_rdknl_assert_unlocked(void *);
+static void	so_wrknl_lock(void *);
+static void	so_wrknl_unlock(void *);
+static void	so_wrknl_assert_locked(void *);
+static void	so_wrknl_assert_unlocked(void *);
 
 static void	filt_sordetach(struct knote *kn);
 static int	filt_soread(struct knote *kn, long hint);
 static void	filt_sowdetach(struct knote *kn);
 static int	filt_sowrite(struct knote *kn, long hint);
-static int inline hhook_run_socket(struct socket *so, void *hctx, int32_t h_id);
 static int	filt_soempty(struct knote *kn, long hint);
+static int inline hhook_run_socket(struct socket *so, void *hctx, int32_t h_id);
 fo_kqfilter_t	soo_kqfilter;
 
 static struct filterops soread_filtops = {
 	.f_isfd = 1,
 	.f_detach = filt_sordetach,
 	.f_event = filt_soread,
 };
 static struct filterops sowrite_filtops = {
 	.f_isfd = 1,
 	.f_detach = filt_sowdetach,
 	.f_event = filt_sowrite,
 };
 static struct filterops soempty_filtops = {
 	.f_isfd = 1,
 	.f_detach = filt_sowdetach,
 	.f_event = filt_soempty,
 };
 
 so_gen_t	so_gencnt;	/* generation count for sockets */
 
 MALLOC_DEFINE(M_SONAME, "soname", "socket name");
 MALLOC_DEFINE(M_PCB, "pcb", "protocol control block");
 
 #define	VNET_SO_ASSERT(so)						\
 	VNET_ASSERT(curvnet != NULL,					\
 	    ("%s:%d curvnet is NULL, so=%p", __func__, __LINE__, (so)));
 
 VNET_DEFINE(struct hhook_head *, socket_hhh[HHOOK_SOCKET_LAST + 1]);
 #define	V_socket_hhh		VNET(socket_hhh)
 
 /*
  * Limit on the number of connections in the listen queue waiting
  * for accept(2).
  * NB: The original sysctl somaxconn is still available but hidden
  * to prevent confusion about the actual purpose of this number.
  */
 static u_int somaxconn = SOMAXCONN;
 
 static int
 sysctl_somaxconn(SYSCTL_HANDLER_ARGS)
 {
 	int error;
 	int val;
 
 	val = somaxconn;
 	error = sysctl_handle_int(oidp, &val, 0, req);
 	if (error || !req->newptr )
 		return (error);
 
 	/*
 	 * The purpose of the UINT_MAX / 3 limit, is so that the formula
 	 *   3 * so_qlimit / 2
 	 * below, will not overflow.
          */
 
 	if (val < 1 || val > UINT_MAX / 3)
 		return (EINVAL);
 
 	somaxconn = val;
 	return (0);
 }
 SYSCTL_PROC(_kern_ipc, OID_AUTO, soacceptqueue, CTLTYPE_UINT | CTLFLAG_RW,
     0, sizeof(int), sysctl_somaxconn, "I",
     "Maximum listen socket pending connection accept queue size");
 SYSCTL_PROC(_kern_ipc, KIPC_SOMAXCONN, somaxconn,
     CTLTYPE_UINT | CTLFLAG_RW | CTLFLAG_SKIP,
     0, sizeof(int), sysctl_somaxconn, "I",
     "Maximum listen socket pending connection accept queue size (compat)");
 
 static int numopensockets;
 SYSCTL_INT(_kern_ipc, OID_AUTO, numopensockets, CTLFLAG_RD,
     &numopensockets, 0, "Number of open sockets");
 
 /*
  * accept_mtx locks down per-socket fields relating to accept queues.  See
  * socketvar.h for an annotation of the protected fields of struct socket.
  */
 struct mtx accept_mtx;
 MTX_SYSINIT(accept_mtx, &accept_mtx, "accept", MTX_DEF);
 
 /*
  * so_global_mtx protects so_gencnt, numopensockets, and the per-socket
  * so_gencnt field.
  */
 static struct mtx so_global_mtx;
 MTX_SYSINIT(so_global_mtx, &so_global_mtx, "so_glabel", MTX_DEF);
 
 /*
  * General IPC sysctl name space, used by sockets and a variety of other IPC
  * types.
  */
 SYSCTL_NODE(_kern, KERN_IPC, ipc, CTLFLAG_RW, 0, "IPC");
 
 /*
  * Initialize the socket subsystem and set up the socket
  * memory allocator.
  */
 static uma_zone_t socket_zone;
 int	maxsockets;
 
 static void
 socket_zone_change(void *tag)
 {
 
 	maxsockets = uma_zone_set_max(socket_zone, maxsockets);
 }
 
 static void
 socket_hhook_register(int subtype)
 {
 	
 	if (hhook_head_register(HHOOK_TYPE_SOCKET, subtype,
 	    &V_socket_hhh[subtype],
 	    HHOOK_NOWAIT|HHOOK_HEADISINVNET) != 0)
 		printf("%s: WARNING: unable to register hook\n", __func__);
 }
 
 static void
 socket_hhook_deregister(int subtype)
 {
 	
 	if (hhook_head_deregister(V_socket_hhh[subtype]) != 0)
 		printf("%s: WARNING: unable to deregister hook\n", __func__);
 }
 
 static void
 socket_init(void *tag)
 {
 
 	socket_zone = uma_zcreate("socket", sizeof(struct socket), NULL, NULL,
 	    NULL, NULL, UMA_ALIGN_PTR, UMA_ZONE_NOFREE);
 	maxsockets = uma_zone_set_max(socket_zone, maxsockets);
 	uma_zone_set_warning(socket_zone, "kern.ipc.maxsockets limit reached");
 	EVENTHANDLER_REGISTER(maxsockets_change, socket_zone_change, NULL,
 	    EVENTHANDLER_PRI_FIRST);
 }
 SYSINIT(socket, SI_SUB_PROTO_DOMAININIT, SI_ORDER_ANY, socket_init, NULL);
 
 static void
 socket_vnet_init(const void *unused __unused)
 {
 	int i;
 
 	/* We expect a contiguous range */
 	for (i = 0; i <= HHOOK_SOCKET_LAST; i++)
 		socket_hhook_register(i);
 }
 VNET_SYSINIT(socket_vnet_init, SI_SUB_PROTO_DOMAININIT, SI_ORDER_ANY,
     socket_vnet_init, NULL);
 
 static void
 socket_vnet_uninit(const void *unused __unused)
 {
 	int i;
 
 	for (i = 0; i <= HHOOK_SOCKET_LAST; i++)
 		socket_hhook_deregister(i);
 }
 VNET_SYSUNINIT(socket_vnet_uninit, SI_SUB_PROTO_DOMAININIT, SI_ORDER_ANY,
     socket_vnet_uninit, NULL);
 
 /*
  * Initialise maxsockets.  This SYSINIT must be run after
  * tunable_mbinit().
  */
 static void
 init_maxsockets(void *ignored)
 {
 
 	TUNABLE_INT_FETCH("kern.ipc.maxsockets", &maxsockets);
 	maxsockets = imax(maxsockets, maxfiles);
 }
 SYSINIT(param, SI_SUB_TUNABLES, SI_ORDER_ANY, init_maxsockets, NULL);
 
 /*
  * Sysctl to get and set the maximum global sockets limit.  Notify protocols
  * of the change so that they can update their dependent limits as required.
  */
 static int
 sysctl_maxsockets(SYSCTL_HANDLER_ARGS)
 {
 	int error, newmaxsockets;
 
 	newmaxsockets = maxsockets;
 	error = sysctl_handle_int(oidp, &newmaxsockets, 0, req);
 	if (error == 0 && req->newptr) {
 		if (newmaxsockets > maxsockets &&
 		    newmaxsockets <= maxfiles) {
 			maxsockets = newmaxsockets;
 			EVENTHANDLER_INVOKE(maxsockets_change);
 		} else
 			error = EINVAL;
 	}
 	return (error);
 }
 SYSCTL_PROC(_kern_ipc, OID_AUTO, maxsockets, CTLTYPE_INT|CTLFLAG_RW,
     &maxsockets, 0, sysctl_maxsockets, "IU",
     "Maximum number of sockets available");
 
 /*
  * Socket operation routines.  These routines are called by the routines in
  * sys_socket.c or from a system process, and implement the semantics of
  * socket operations by switching out to the protocol specific routines.
  */
 
 /*
  * Get a socket structure from our zone, and initialize it.  Note that it
  * would probably be better to allocate socket and PCB at the same time, but
  * I'm not convinced that all the protocols can be easily modified to do
  * this.
  *
  * soalloc() returns a socket with a ref count of 0.
  */
 static struct socket *
 soalloc(struct vnet *vnet)
 {
 	struct socket *so;
 
 	so = uma_zalloc(socket_zone, M_NOWAIT | M_ZERO);
 	if (so == NULL)
 		return (NULL);
 #ifdef MAC
 	if (mac_socket_init(so, M_NOWAIT) != 0) {
 		uma_zfree(socket_zone, so);
 		return (NULL);
 	}
 #endif
 	if (khelp_init_osd(HELPER_CLASS_SOCKET, &so->osd)) {
 		uma_zfree(socket_zone, so);
 		return (NULL);
 	}
 
+	/*
+	 * The socket locking protocol allows to lock 2 sockets at a time,
+	 * however, the first one must be a listening socket.  WITNESS lacks
+	 * a feature to change class of an existing lock, so we use DUPOK.
+	 */
+	mtx_init(&so->so_lock, "socket", NULL, MTX_DEF | MTX_DUPOK);
 	SOCKBUF_LOCK_INIT(&so->so_snd, "so_snd");
 	SOCKBUF_LOCK_INIT(&so->so_rcv, "so_rcv");
+	so->so_rcv.sb_sel = &so->so_rdsel;
+	so->so_snd.sb_sel = &so->so_wrsel;
 	sx_init(&so->so_snd.sb_sx, "so_snd_sx");
 	sx_init(&so->so_rcv.sb_sx, "so_rcv_sx");
 	TAILQ_INIT(&so->so_snd.sb_aiojobq);
 	TAILQ_INIT(&so->so_rcv.sb_aiojobq);
 	TASK_INIT(&so->so_snd.sb_aiotask, 0, soaio_snd, so);
 	TASK_INIT(&so->so_rcv.sb_aiotask, 0, soaio_rcv, so);
 #ifdef VIMAGE
 	VNET_ASSERT(vnet != NULL, ("%s:%d vnet is NULL, so=%p",
 	    __func__, __LINE__, so));
 	so->so_vnet = vnet;
 #endif
 	/* We shouldn't need the so_global_mtx */
 	if (hhook_run_socket(so, NULL, HHOOK_SOCKET_CREATE)) {
 		/* Do we need more comprehensive error returns? */
 		uma_zfree(socket_zone, so);
 		return (NULL);
 	}
 	mtx_lock(&so_global_mtx);
 	so->so_gencnt = ++so_gencnt;
 	++numopensockets;
 #ifdef VIMAGE
 	vnet->vnet_sockcnt++;
 #endif
 	mtx_unlock(&so_global_mtx);
 
 	return (so);
 }
 
 /*
  * Free the storage associated with a socket at the socket layer, tear down
  * locks, labels, etc.  All protocol state is assumed already to have been
  * torn down (and possibly never set up) by the caller.
  */
 static void
 sodealloc(struct socket *so)
 {
 
 	KASSERT(so->so_count == 0, ("sodealloc(): so_count %d", so->so_count));
 	KASSERT(so->so_pcb == NULL, ("sodealloc(): so_pcb != NULL"));
 
 	mtx_lock(&so_global_mtx);
 	so->so_gencnt = ++so_gencnt;
 	--numopensockets;	/* Could be below, but faster here. */
 #ifdef VIMAGE
 	VNET_ASSERT(so->so_vnet != NULL, ("%s:%d so_vnet is NULL, so=%p",
 	    __func__, __LINE__, so));
 	so->so_vnet->vnet_sockcnt--;
 #endif
 	mtx_unlock(&so_global_mtx);
 	if (so->so_rcv.sb_hiwat)
 		(void)chgsbsize(so->so_cred->cr_uidinfo,
 		    &so->so_rcv.sb_hiwat, 0, RLIM_INFINITY);
 	if (so->so_snd.sb_hiwat)
 		(void)chgsbsize(so->so_cred->cr_uidinfo,
 		    &so->so_snd.sb_hiwat, 0, RLIM_INFINITY);
-	/* remove accept filter if one is present. */
-	if (so->so_accf != NULL)
-		accept_filt_setopt(so, NULL);
 #ifdef MAC
 	mac_socket_destroy(so);
 #endif
 	hhook_run_socket(so, NULL, HHOOK_SOCKET_CLOSE);
 
 	crfree(so->so_cred);
 	khelp_destroy_osd(&so->osd);
-	sx_destroy(&so->so_snd.sb_sx);
-	sx_destroy(&so->so_rcv.sb_sx);
-	SOCKBUF_LOCK_DESTROY(&so->so_snd);
-	SOCKBUF_LOCK_DESTROY(&so->so_rcv);
+	if (SOLISTENING(so)) {
+		if (so->sol_accept_filter != NULL)
+			accept_filt_setopt(so, NULL);
+	} else {
+		sx_destroy(&so->so_snd.sb_sx);
+		sx_destroy(&so->so_rcv.sb_sx);
+		SOCKBUF_LOCK_DESTROY(&so->so_snd);
+		SOCKBUF_LOCK_DESTROY(&so->so_rcv);
+	}
+	mtx_destroy(&so->so_lock);
 	uma_zfree(socket_zone, so);
 }
 
 /*
  * socreate returns a socket with a ref count of 1.  The socket should be
  * closed with soclose().
  */
 int
 socreate(int dom, struct socket **aso, int type, int proto,
     struct ucred *cred, struct thread *td)
 {
 	struct protosw *prp;
 	struct socket *so;
 	int error;
 
 	if (proto)
 		prp = pffindproto(dom, proto, type);
 	else
 		prp = pffindtype(dom, type);
 
 	if (prp == NULL) {
 		/* No support for domain. */
 		if (pffinddomain(dom) == NULL)
 			return (EAFNOSUPPORT);
 		/* No support for socket type. */
 		if (proto == 0 && type != 0)
 			return (EPROTOTYPE);
 		return (EPROTONOSUPPORT);
 	}
 	if (prp->pr_usrreqs->pru_attach == NULL ||
 	    prp->pr_usrreqs->pru_attach == pru_attach_notsupp)
 		return (EPROTONOSUPPORT);
 
 	if (prison_check_af(cred, prp->pr_domain->dom_family) != 0)
 		return (EPROTONOSUPPORT);
 
 	if (prp->pr_type != type)
 		return (EPROTOTYPE);
 	so = soalloc(CRED_TO_VNET(cred));
 	if (so == NULL)
 		return (ENOBUFS);
 
-	TAILQ_INIT(&so->so_incomp);
-	TAILQ_INIT(&so->so_comp);
 	so->so_type = type;
 	so->so_cred = crhold(cred);
 	if ((prp->pr_domain->dom_family == PF_INET) ||
 	    (prp->pr_domain->dom_family == PF_INET6) ||
 	    (prp->pr_domain->dom_family == PF_ROUTE))
 		so->so_fibnum = td->td_proc->p_fibnum;
 	else
 		so->so_fibnum = 0;
 	so->so_proto = prp;
 #ifdef MAC
 	mac_socket_create(cred, so);
 #endif
-	knlist_init_mtx(&so->so_rcv.sb_sel.si_note, SOCKBUF_MTX(&so->so_rcv));
-	knlist_init_mtx(&so->so_snd.sb_sel.si_note, SOCKBUF_MTX(&so->so_snd));
-	so->so_count = 1;
+	knlist_init(&so->so_rdsel.si_note, so, so_rdknl_lock, so_rdknl_unlock,
+	    so_rdknl_assert_locked, so_rdknl_assert_unlocked);
+	knlist_init(&so->so_wrsel.si_note, so, so_wrknl_lock, so_wrknl_unlock,
+	    so_wrknl_assert_locked, so_wrknl_assert_unlocked);
 	/*
 	 * Auto-sizing of socket buffers is managed by the protocols and
 	 * the appropriate flags must be set in the pru_attach function.
 	 */
 	CURVNET_SET(so->so_vnet);
 	error = (*prp->pr_usrreqs->pru_attach)(so, proto, td);
 	CURVNET_RESTORE();
 	if (error) {
-		KASSERT(so->so_count == 1, ("socreate: so_count %d",
-		    so->so_count));
-		so->so_count = 0;
 		sodealloc(so);
 		return (error);
 	}
+	soref(so);
 	*aso = so;
 	return (0);
 }
 
 #ifdef REGRESSION
 static int regression_sonewconn_earlytest = 1;
 SYSCTL_INT(_regression, OID_AUTO, sonewconn_earlytest, CTLFLAG_RW,
     &regression_sonewconn_earlytest, 0, "Perform early sonewconn limit test");
 #endif
 
 /*
  * When an attempt at a new connection is noted on a socket which accepts
  * connections, sonewconn is called.  If the connection is possible (subject
  * to space constraints, etc.) then we allocate a new structure, properly
  * linked into the data structure of the original socket, and return this.
  * Connstatus may be 0, or SS_ISCONFIRMING, or SS_ISCONNECTED.
  *
  * Note: the ref count on the socket is 0 on return.
  */
 struct socket *
 sonewconn(struct socket *head, int connstatus)
 {
 	static struct timeval lastover;
 	static struct timeval overinterval = { 60, 0 };
 	static int overcount;
 
 	struct socket *so;
-	int over;
+	u_int over;
 
-	ACCEPT_LOCK();
-	over = (head->so_qlen > 3 * head->so_qlimit / 2);
-	ACCEPT_UNLOCK();
+	SOLISTEN_LOCK(head);
+	over = (head->sol_qlen > 3 * head->sol_qlimit / 2);
+	SOLISTEN_UNLOCK(head);
 #ifdef REGRESSION
 	if (regression_sonewconn_earlytest && over) {
 #else
 	if (over) {
 #endif
 		overcount++;
 
 		if (ratecheck(&lastover, &overinterval)) {
 			log(LOG_DEBUG, "%s: pcb %p: Listen queue overflow: "
 			    "%i already in queue awaiting acceptance "
 			    "(%d occurrences)\n",
-			    __func__, head->so_pcb, head->so_qlen, overcount);
+			    __func__, head->so_pcb, head->sol_qlen, overcount);
 
 			overcount = 0;
 		}
 
 		return (NULL);
 	}
-	VNET_ASSERT(head->so_vnet != NULL, ("%s:%d so_vnet is NULL, head=%p",
-	    __func__, __LINE__, head));
+	VNET_ASSERT(head->so_vnet != NULL, ("%s: so %p vnet is NULL",
+	    __func__, head));
 	so = soalloc(head->so_vnet);
 	if (so == NULL) {
 		log(LOG_DEBUG, "%s: pcb %p: New socket allocation failure: "
 		    "limit reached or out of memory\n",
 		    __func__, head->so_pcb);
 		return (NULL);
 	}
-	if ((head->so_options & SO_ACCEPTFILTER) != 0)
-		connstatus = 0;
-	so->so_head = head;
+	so->so_listen = head;
 	so->so_type = head->so_type;
-	so->so_options = head->so_options &~ SO_ACCEPTCONN;
 	so->so_linger = head->so_linger;
 	so->so_state = head->so_state | SS_NOFDREF;
 	so->so_fibnum = head->so_fibnum;
 	so->so_proto = head->so_proto;
 	so->so_cred = crhold(head->so_cred);
 #ifdef MAC
 	mac_socket_newconn(head, so);
 #endif
-	knlist_init_mtx(&so->so_rcv.sb_sel.si_note, SOCKBUF_MTX(&so->so_rcv));
-	knlist_init_mtx(&so->so_snd.sb_sel.si_note, SOCKBUF_MTX(&so->so_snd));
+	knlist_init(&so->so_rdsel.si_note, so, so_rdknl_lock, so_rdknl_unlock,
+	    so_rdknl_assert_locked, so_rdknl_assert_unlocked);
+	knlist_init(&so->so_wrsel.si_note, so, so_wrknl_lock, so_wrknl_unlock,
+	    so_wrknl_assert_locked, so_wrknl_assert_unlocked);
 	VNET_SO_ASSERT(head);
-	if (soreserve(so, head->so_snd.sb_hiwat, head->so_rcv.sb_hiwat)) {
+	if (soreserve(so, head->sol_sbsnd_hiwat, head->sol_sbrcv_hiwat)) {
 		sodealloc(so);
 		log(LOG_DEBUG, "%s: pcb %p: soreserve() failed\n",
 		    __func__, head->so_pcb);
 		return (NULL);
 	}
 	if ((*so->so_proto->pr_usrreqs->pru_attach)(so, 0, NULL)) {
 		sodealloc(so);
 		log(LOG_DEBUG, "%s: pcb %p: pru_attach() failed\n",
 		    __func__, head->so_pcb);
 		return (NULL);
 	}
-	so->so_rcv.sb_lowat = head->so_rcv.sb_lowat;
-	so->so_snd.sb_lowat = head->so_snd.sb_lowat;
-	so->so_rcv.sb_timeo = head->so_rcv.sb_timeo;
-	so->so_snd.sb_timeo = head->so_snd.sb_timeo;
-	so->so_rcv.sb_flags |= head->so_rcv.sb_flags & SB_AUTOSIZE;
-	so->so_snd.sb_flags |= head->so_snd.sb_flags & SB_AUTOSIZE;
+	so->so_rcv.sb_lowat = head->sol_sbrcv_lowat;
+	so->so_snd.sb_lowat = head->sol_sbsnd_lowat;
+	so->so_rcv.sb_timeo = head->sol_sbrcv_timeo;
+	so->so_snd.sb_timeo = head->sol_sbsnd_timeo;
+	so->so_rcv.sb_flags |= head->sol_sbrcv_flags & SB_AUTOSIZE;
+	so->so_snd.sb_flags |= head->sol_sbsnd_flags & SB_AUTOSIZE;
+
+	SOLISTEN_LOCK(head);
+	if (head->sol_accept_filter != NULL)
+		connstatus = 0;
 	so->so_state |= connstatus;
-	ACCEPT_LOCK();
-	/*
-	 * The accept socket may be tearing down but we just
-	 * won a race on the ACCEPT_LOCK.
-	 * However, if sctp_peeloff() is called on a 1-to-many
-	 * style socket, the SO_ACCEPTCONN doesn't need to be set.
-	 */
-	if (!(head->so_options & SO_ACCEPTCONN) &&
-	    ((head->so_proto->pr_protocol != IPPROTO_SCTP) ||
-	     (head->so_type != SOCK_SEQPACKET))) {
-		SOCK_LOCK(so);
-		so->so_head = NULL;
-		sofree(so);		/* NB: returns ACCEPT_UNLOCK'ed. */
-		return (NULL);
-	}
+	so->so_options = head->so_options & ~SO_ACCEPTCONN;
+	soref(head); /* A socket on (in)complete queue refs head. */
 	if (connstatus) {
-		TAILQ_INSERT_TAIL(&head->so_comp, so, so_list);
-		so->so_qstate |= SQ_COMP;
-		head->so_qlen++;
+		TAILQ_INSERT_TAIL(&head->sol_comp, so, so_list);
+		so->so_qstate = SQ_COMP;
+		head->sol_qlen++;
+		solisten_wakeup(head);	/* unlocks */
 	} else {
 		/*
 		 * Keep removing sockets from the head until there's room for
 		 * us to insert on the tail.  In pre-locking revisions, this
 		 * was a simple if(), but as we could be racing with other
 		 * threads and soabort() requires dropping locks, we must
 		 * loop waiting for the condition to be true.
 		 */
-		while (head->so_incqlen > head->so_qlimit) {
+		while (head->sol_incqlen > head->sol_qlimit) {
 			struct socket *sp;
-			sp = TAILQ_FIRST(&head->so_incomp);
-			TAILQ_REMOVE(&head->so_incomp, sp, so_list);
-			head->so_incqlen--;
-			sp->so_qstate &= ~SQ_INCOMP;
-			sp->so_head = NULL;
-			ACCEPT_UNLOCK();
+
+			sp = TAILQ_FIRST(&head->sol_incomp);
+			TAILQ_REMOVE(&head->sol_incomp, sp, so_list);
+			head->sol_incqlen--;
+			SOCK_LOCK(sp);
+			sp->so_qstate = SQ_NONE;
+			sp->so_listen = NULL;
+			SOCK_UNLOCK(sp);
+			sorele(head);	/* does SOLISTEN_UNLOCK, head stays */
 			soabort(sp);
-			ACCEPT_LOCK();
+			SOLISTEN_LOCK(head);
 		}
-		TAILQ_INSERT_TAIL(&head->so_incomp, so, so_list);
-		so->so_qstate |= SQ_INCOMP;
-		head->so_incqlen++;
+		TAILQ_INSERT_TAIL(&head->sol_incomp, so, so_list);
+		so->so_qstate = SQ_INCOMP;
+		head->sol_incqlen++;
+		SOLISTEN_UNLOCK(head);
 	}
-	ACCEPT_UNLOCK();
-	if (connstatus) {
-		sorwakeup(head);
-		wakeup_one(&head->so_timeo);
+	return (so);
+}
+
+#ifdef SCTP
+/*
+ * Socket part of sctp_peeloff().  Detach a new socket from an
+ * association.  The new socket is returned with a reference.
+ */
+struct socket *
+sopeeloff(struct socket *head)
+{
+	struct socket *so;
+
+	VNET_ASSERT(head->so_vnet != NULL, ("%s:%d so_vnet is NULL, head=%p",
+	    __func__, __LINE__, head));
+	so = soalloc(head->so_vnet);
+	if (so == NULL) {
+		log(LOG_DEBUG, "%s: pcb %p: New socket allocation failure: "
+		    "limit reached or out of memory\n",
+		    __func__, head->so_pcb);
+		return (NULL);
 	}
+	so->so_type = head->so_type;
+	so->so_options = head->so_options;
+	so->so_linger = head->so_linger;
+	so->so_state = (head->so_state & SS_NBIO) | SS_ISCONNECTED;
+	so->so_fibnum = head->so_fibnum;
+	so->so_proto = head->so_proto;
+	so->so_cred = crhold(head->so_cred);
+#ifdef MAC
+	mac_socket_newconn(head, so);
+#endif
+	knlist_init(&so->so_rdsel.si_note, so, so_rdknl_lock, so_rdknl_unlock,
+	    so_rdknl_assert_locked, so_rdknl_assert_unlocked);
+	knlist_init(&so->so_wrsel.si_note, so, so_wrknl_lock, so_wrknl_unlock,
+	    so_wrknl_assert_locked, so_wrknl_assert_unlocked);
+	VNET_SO_ASSERT(head);
+	if (soreserve(so, head->so_snd.sb_hiwat, head->so_rcv.sb_hiwat)) {
+		sodealloc(so);
+		log(LOG_DEBUG, "%s: pcb %p: soreserve() failed\n",
+		    __func__, head->so_pcb);
+		return (NULL);
+	}
+	if ((*so->so_proto->pr_usrreqs->pru_attach)(so, 0, NULL)) {
+		sodealloc(so);
+		log(LOG_DEBUG, "%s: pcb %p: pru_attach() failed\n",
+		    __func__, head->so_pcb);
+		return (NULL);
+	}
+	so->so_rcv.sb_lowat = head->so_rcv.sb_lowat;
+	so->so_snd.sb_lowat = head->so_snd.sb_lowat;
+	so->so_rcv.sb_timeo = head->so_rcv.sb_timeo;
+	so->so_snd.sb_timeo = head->so_snd.sb_timeo;
+	so->so_rcv.sb_flags |= head->so_rcv.sb_flags & SB_AUTOSIZE;
+	so->so_snd.sb_flags |= head->so_snd.sb_flags & SB_AUTOSIZE;
+
+	soref(so);
+
 	return (so);
 }
+#endif	/* SCTP */
 
 int
 sobind(struct socket *so, struct sockaddr *nam, struct thread *td)
 {
 	int error;
 
 	CURVNET_SET(so->so_vnet);
 	error = (*so->so_proto->pr_usrreqs->pru_bind)(so, nam, td);
 	CURVNET_RESTORE();
 	return (error);
 }
 
 int
 sobindat(int fd, struct socket *so, struct sockaddr *nam, struct thread *td)
 {
 	int error;
 
 	CURVNET_SET(so->so_vnet);
 	error = (*so->so_proto->pr_usrreqs->pru_bindat)(fd, so, nam, td);
 	CURVNET_RESTORE();
 	return (error);
 }
 
 /*
  * solisten() transitions a socket from a non-listening state to a listening
  * state, but can also be used to update the listen queue depth on an
  * existing listen socket.  The protocol will call back into the sockets
  * layer using solisten_proto_check() and solisten_proto() to check and set
  * socket-layer listen state.  Call backs are used so that the protocol can
  * acquire both protocol and socket layer locks in whatever order is required
  * by the protocol.
  *
  * Protocol implementors are advised to hold the socket lock across the
  * socket-layer test and set to avoid races at the socket layer.
  */
 int
 solisten(struct socket *so, int backlog, struct thread *td)
 {
 	int error;
 
 	CURVNET_SET(so->so_vnet);
 	error = (*so->so_proto->pr_usrreqs->pru_listen)(so, backlog, td);
 	CURVNET_RESTORE();
 	return (error);
 }
 
 int
 solisten_proto_check(struct socket *so)
 {
 
 	SOCK_LOCK_ASSERT(so);
 
 	if (so->so_state & (SS_ISCONNECTED | SS_ISCONNECTING |
 	    SS_ISDISCONNECTING))
 		return (EINVAL);
 	return (0);
 }
 
 void
 solisten_proto(struct socket *so, int backlog)
 {
+	int sbrcv_lowat, sbsnd_lowat;
+	u_int sbrcv_hiwat, sbsnd_hiwat;
+	short sbrcv_flags, sbsnd_flags;
+	sbintime_t sbrcv_timeo, sbsnd_timeo;
 
 	SOCK_LOCK_ASSERT(so);
 
+	if (SOLISTENING(so))
+		goto listening;
+
+	/*
+	 * Change this socket to listening state.
+	 */
+	sbrcv_lowat = so->so_rcv.sb_lowat;
+	sbsnd_lowat = so->so_snd.sb_lowat;
+	sbrcv_hiwat = so->so_rcv.sb_hiwat;
+	sbsnd_hiwat = so->so_snd.sb_hiwat;
+	sbrcv_flags = so->so_rcv.sb_flags;
+	sbsnd_flags = so->so_snd.sb_flags;
+	sbrcv_timeo = so->so_rcv.sb_timeo;
+	sbsnd_timeo = so->so_snd.sb_timeo;
+
+	sbdestroy(&so->so_snd, so);
+	sbdestroy(&so->so_rcv, so);
+	sx_destroy(&so->so_snd.sb_sx);
+	sx_destroy(&so->so_rcv.sb_sx);
+	SOCKBUF_LOCK_DESTROY(&so->so_snd);
+	SOCKBUF_LOCK_DESTROY(&so->so_rcv);
+
+#ifdef INVARIANTS
+	bzero(&so->so_rcv,
+	    sizeof(struct socket) - offsetof(struct socket, so_rcv));
+#endif
+
+	so->sol_sbrcv_lowat = sbrcv_lowat;
+	so->sol_sbsnd_lowat = sbsnd_lowat;
+	so->sol_sbrcv_hiwat = sbrcv_hiwat;
+	so->sol_sbsnd_hiwat = sbsnd_hiwat;
+	so->sol_sbrcv_flags = sbrcv_flags;
+	so->sol_sbsnd_flags = sbsnd_flags;
+	so->sol_sbrcv_timeo = sbrcv_timeo;
+	so->sol_sbsnd_timeo = sbsnd_timeo;
+
+	so->sol_qlen = so->sol_incqlen = 0;
+	TAILQ_INIT(&so->sol_incomp);
+	TAILQ_INIT(&so->sol_comp);
+
+	so->sol_accept_filter = NULL;
+	so->sol_accept_filter_arg = NULL;
+	so->sol_accept_filter_str = NULL;
+
+	so->so_options |= SO_ACCEPTCONN;
+
+listening:
 	if (backlog < 0 || backlog > somaxconn)
 		backlog = somaxconn;
-	so->so_qlimit = backlog;
-	so->so_options |= SO_ACCEPTCONN;
+	so->sol_qlimit = backlog;
 }
 
 /*
+ * Wakeup listeners/subsystems once we have a complete connection.
+ * Enters with lock, returns unlocked.
+ */
+void
+solisten_wakeup(struct socket *sol)
+{
+
+	if (sol->sol_upcall != NULL)
+		(void )sol->sol_upcall(sol, sol->sol_upcallarg, M_NOWAIT);
+	else {
+		selwakeuppri(&sol->so_rdsel, PSOCK);
+		KNOTE_LOCKED(&sol->so_rdsel.si_note, 0);
+	}
+	SOLISTEN_UNLOCK(sol);
+	wakeup_one(&sol->sol_comp);
+}
+
+/*
+ * Return single connection off a listening socket queue.  Main consumer of
+ * the function is kern_accept4().  Some modules, that do their own accept
+ * management also use the function.
+ *
+ * Listening socket must be locked on entry and is returned unlocked on
+ * return.
+ * The flags argument is set of accept4(2) flags and ACCEPT4_INHERIT.
+ */
+int
+solisten_dequeue(struct socket *head, struct socket **ret, int flags)
+{
+	struct socket *so;
+	int error;
+
+	SOLISTEN_LOCK_ASSERT(head);
+
+	while (!(head->so_state & SS_NBIO) && TAILQ_EMPTY(&head->sol_comp) &&
+	    head->so_error == 0) {
+		error = msleep(&head->sol_comp, &head->so_lock, PSOCK | PCATCH,
+		    "accept", 0);
+		if (error != 0) {
+			SOLISTEN_UNLOCK(head);
+			return (error);
+		}
+	}
+	if (head->so_error) {
+		error = head->so_error;
+		head->so_error = 0;
+		SOLISTEN_UNLOCK(head);
+		return (error);
+        }
+	if ((head->so_state & SS_NBIO) && TAILQ_EMPTY(&head->sol_comp)) {
+		SOLISTEN_UNLOCK(head);
+		return (EWOULDBLOCK);
+	}
+	so = TAILQ_FIRST(&head->sol_comp);
+	SOCK_LOCK(so);
+	KASSERT(so->so_qstate == SQ_COMP,
+	    ("%s: so %p not SQ_COMP", __func__, so));
+	soref(so);
+	head->sol_qlen--;
+	so->so_qstate = SQ_NONE;
+	so->so_listen = NULL;
+	TAILQ_REMOVE(&head->sol_comp, so, so_list);
+	if (flags & ACCEPT4_INHERIT)
+		so->so_state |= (head->so_state & SS_NBIO);
+	else
+		so->so_state |= (flags & SOCK_NONBLOCK) ? SS_NBIO : 0;
+	SOCK_UNLOCK(so);
+	sorele(head);
+
+	*ret = so;
+	return (0);
+}
+
+/*
  * Evaluate the reference count and named references on a socket; if no
  * references remain, free it.  This should be called whenever a reference is
  * released, such as in sorele(), but also when named reference flags are
  * cleared in socket or protocol code.
  *
  * sofree() will free the socket if:
  *
  * - There are no outstanding file descriptor references or related consumers
  *   (so_count == 0).
  *
  * - The socket has been closed by user space, if ever open (SS_NOFDREF).
  *
  * - The protocol does not have an outstanding strong reference on the socket
  *   (SS_PROTOREF).
  *
  * - The socket is not in a completed connection queue, so a process has been
  *   notified that it is present.  If it is removed, the user process may
  *   block in accept() despite select() saying the socket was ready.
  */
 void
 sofree(struct socket *so)
 {
 	struct protosw *pr = so->so_proto;
-	struct socket *head;
 
-	ACCEPT_LOCK_ASSERT();
 	SOCK_LOCK_ASSERT(so);
 
 	if ((so->so_state & SS_NOFDREF) == 0 || so->so_count != 0 ||
-	    (so->so_state & SS_PROTOREF) || (so->so_qstate & SQ_COMP)) {
+	    (so->so_state & SS_PROTOREF) || (so->so_qstate == SQ_COMP)) {
 		SOCK_UNLOCK(so);
-		ACCEPT_UNLOCK();
 		return;
 	}
 
-	head = so->so_head;
-	if (head != NULL) {
-		KASSERT((so->so_qstate & SQ_COMP) != 0 ||
-		    (so->so_qstate & SQ_INCOMP) != 0,
-		    ("sofree: so_head != NULL, but neither SQ_COMP nor "
-		    "SQ_INCOMP"));
-		KASSERT((so->so_qstate & SQ_COMP) == 0 ||
-		    (so->so_qstate & SQ_INCOMP) == 0,
-		    ("sofree: so->so_qstate is SQ_COMP and also SQ_INCOMP"));
-		TAILQ_REMOVE(&head->so_incomp, so, so_list);
-		head->so_incqlen--;
-		so->so_qstate &= ~SQ_INCOMP;
-		so->so_head = NULL;
+	if (!SOLISTENING(so) && so->so_qstate == SQ_INCOMP) {
+		struct socket *sol;
+
+		sol = so->so_listen;
+		KASSERT(sol, ("%s: so %p on incomp of NULL", __func__, so));
+
+		/*
+		 * To solve race between close of a listening socket and
+		 * a socket on its incomplete queue, we need to lock both.
+		 * The order is first listening socket, then regular.
+		 * Since we don't have SS_NOFDREF neither SS_PROTOREF, this
+		 * function and the listening socket are the only pointers
+		 * to so.  To preserve so and sol, we reference both and then
+		 * relock.
+		 * After relock the socket may not move to so_comp since it
+		 * doesn't have PCB already, but it may be removed from
+		 * so_incomp. If that happens, we share responsiblity on
+		 * freeing the socket, but soclose() has already removed
+		 * it from queue.
+		 */
+		soref(sol);
+		soref(so);
+		SOCK_UNLOCK(so);
+		SOLISTEN_LOCK(sol);
+		SOCK_LOCK(so);
+		if (so->so_qstate == SQ_INCOMP) {
+			KASSERT(so->so_listen == sol,
+			    ("%s: so %p migrated out of sol %p",
+			    __func__, so, sol));
+			TAILQ_REMOVE(&sol->sol_incomp, so, so_list);
+			sol->sol_incqlen--;
+			/* This is guarenteed not to be the last. */
+			refcount_release(&sol->so_count);
+			so->so_qstate = SQ_NONE;
+			so->so_listen = NULL;
+		} else
+			KASSERT(so->so_listen == NULL,
+			    ("%s: so %p not on (in)comp with so_listen",
+			    __func__, so));
+		sorele(sol);
+		KASSERT(so->so_count == 1,
+		    ("%s: so %p count %u", __func__, so, so->so_count));
+		so->so_count = 0;
 	}
-	KASSERT((so->so_qstate & SQ_COMP) == 0 &&
-	    (so->so_qstate & SQ_INCOMP) == 0,
-	    ("sofree: so_head == NULL, but still SQ_COMP(%d) or SQ_INCOMP(%d)",
-	    so->so_qstate & SQ_COMP, so->so_qstate & SQ_INCOMP));
-	if (so->so_options & SO_ACCEPTCONN) {
-		KASSERT((TAILQ_EMPTY(&so->so_comp)),
-		    ("sofree: so_comp populated"));
-		KASSERT((TAILQ_EMPTY(&so->so_incomp)),
-		    ("sofree: so_incomp populated"));
-	}
+	if (SOLISTENING(so))
+		so->so_error = ECONNABORTED;
 	SOCK_UNLOCK(so);
-	ACCEPT_UNLOCK();
 
 	VNET_SO_ASSERT(so);
 	if (pr->pr_flags & PR_RIGHTS && pr->pr_domain->dom_dispose != NULL)
 		(*pr->pr_domain->dom_dispose)(so);
 	if (pr->pr_usrreqs->pru_detach != NULL)
 		(*pr->pr_usrreqs->pru_detach)(so);
 
 	/*
 	 * From this point on, we assume that no other references to this
 	 * socket exist anywhere else in the stack.  Therefore, no locks need
 	 * to be acquired or held.
 	 *
 	 * We used to do a lot of socket buffer and socket locking here, as
 	 * well as invoke sorflush() and perform wakeups.  The direct call to
 	 * dom_dispose() and sbrelease_internal() are an inlining of what was
 	 * necessary from sorflush().
 	 *
 	 * Notice that the socket buffer and kqueue state are torn down
 	 * before calling pru_detach.  This means that protocols shold not
 	 * assume they can perform socket wakeups, etc, in their detach code.
 	 */
-	sbdestroy(&so->so_snd, so);
-	sbdestroy(&so->so_rcv, so);
-	seldrain(&so->so_snd.sb_sel);
-	seldrain(&so->so_rcv.sb_sel);
-	knlist_destroy(&so->so_rcv.sb_sel.si_note);
-	knlist_destroy(&so->so_snd.sb_sel.si_note);
+	if (!SOLISTENING(so)) {
+		sbdestroy(&so->so_snd, so);
+		sbdestroy(&so->so_rcv, so);
+	}
+	seldrain(&so->so_rdsel);
+	seldrain(&so->so_wrsel);
+	knlist_destroy(&so->so_rdsel.si_note);
+	knlist_destroy(&so->so_wrsel.si_note);
 	sodealloc(so);
 }
 
 /*
  * Close a socket on last file table reference removal.  Initiate disconnect
  * if connected.  Free socket when disconnect complete.
  *
  * This function will sorele() the socket.  Note that soclose() may be called
  * prior to the ref count reaching zero.  The actual socket structure will
  * not be freed until the ref count reaches zero.
  */
 int
 soclose(struct socket *so)
 {
+	struct accept_queue lqueue;
+	bool listening;
 	int error = 0;
 
 	KASSERT(!(so->so_state & SS_NOFDREF), ("soclose: SS_NOFDREF on enter"));
 
 	CURVNET_SET(so->so_vnet);
 	funsetown(&so->so_sigio);
 	if (so->so_state & SS_ISCONNECTED) {
 		if ((so->so_state & SS_ISDISCONNECTING) == 0) {
 			error = sodisconnect(so);
 			if (error) {
 				if (error == ENOTCONN)
 					error = 0;
 				goto drop;
 			}
 		}
 		if (so->so_options & SO_LINGER) {
 			if ((so->so_state & SS_ISDISCONNECTING) &&
 			    (so->so_state & SS_NBIO))
 				goto drop;
 			while (so->so_state & SS_ISCONNECTED) {
 				error = tsleep(&so->so_timeo,
 				    PSOCK | PCATCH, "soclos",
 				    so->so_linger * hz);
 				if (error)
 					break;
 			}
 		}
 	}
 
 drop:
 	if (so->so_proto->pr_usrreqs->pru_close != NULL)
 		(*so->so_proto->pr_usrreqs->pru_close)(so);
-	ACCEPT_LOCK();
-	if (so->so_options & SO_ACCEPTCONN) {
+
+	SOCK_LOCK(so);
+	if ((listening = (so->so_options & SO_ACCEPTCONN))) {
 		struct socket *sp;
-		/*
-		 * Prevent new additions to the accept queues due
-		 * to ACCEPT_LOCK races while we are draining them.
-		 */
-		so->so_options &= ~SO_ACCEPTCONN;
-		while ((sp = TAILQ_FIRST(&so->so_incomp)) != NULL) {
-			TAILQ_REMOVE(&so->so_incomp, sp, so_list);
-			so->so_incqlen--;
-			sp->so_qstate &= ~SQ_INCOMP;
-			sp->so_head = NULL;
-			ACCEPT_UNLOCK();
-			soabort(sp);
-			ACCEPT_LOCK();
+
+		TAILQ_INIT(&lqueue);
+		TAILQ_SWAP(&lqueue, &so->sol_incomp, socket, so_list);
+		TAILQ_CONCAT(&lqueue, &so->sol_comp, so_list);
+
+		so->sol_qlen = so->sol_incqlen = 0;
+
+		TAILQ_FOREACH(sp, &lqueue, so_list) {
+			SOCK_LOCK(sp);
+			sp->so_qstate = SQ_NONE;
+			sp->so_listen = NULL;
+			SOCK_UNLOCK(sp);
+			/* Guaranteed not to be the last. */
+			refcount_release(&so->so_count);
 		}
-		while ((sp = TAILQ_FIRST(&so->so_comp)) != NULL) {
-			TAILQ_REMOVE(&so->so_comp, sp, so_list);
-			so->so_qlen--;
-			sp->so_qstate &= ~SQ_COMP;
-			sp->so_head = NULL;
-			ACCEPT_UNLOCK();
-			soabort(sp);
-			ACCEPT_LOCK();
-		}
-		KASSERT((TAILQ_EMPTY(&so->so_comp)),
-		    ("%s: so_comp populated", __func__));
-		KASSERT((TAILQ_EMPTY(&so->so_incomp)),
-		    ("%s: so_incomp populated", __func__));
 	}
-	SOCK_LOCK(so);
 	KASSERT((so->so_state & SS_NOFDREF) == 0, ("soclose: NOFDREF"));
 	so->so_state |= SS_NOFDREF;
-	sorele(so);			/* NB: Returns with ACCEPT_UNLOCK(). */
+	sorele(so);
+	if (listening) {
+		struct socket *sp;
+
+		TAILQ_FOREACH(sp, &lqueue, so_list) {
+			SOCK_LOCK(sp);
+			if (sp->so_count == 0) {
+				SOCK_UNLOCK(sp);
+				soabort(sp);
+			} else
+				/* sp is now in sofree() */
+				SOCK_UNLOCK(sp);
+		}
+	}
 	CURVNET_RESTORE();
 	return (error);
 }
 
 /*
  * soabort() is used to abruptly tear down a connection, such as when a
  * resource limit is reached (listen queue depth exceeded), or if a listen
  * socket is closed while there are sockets waiting to be accepted.
  *
  * This interface is tricky, because it is called on an unreferenced socket,
  * and must be called only by a thread that has actually removed the socket
  * from the listen queue it was on, or races with other threads are risked.
  *
  * This interface will call into the protocol code, so must not be called
  * with any socket locks held.  Protocols do call it while holding their own
  * recursible protocol mutexes, but this is something that should be subject
  * to review in the future.
  */
 void
 soabort(struct socket *so)
 {
 
 	/*
 	 * In as much as is possible, assert that no references to this
 	 * socket are held.  This is not quite the same as asserting that the
 	 * current thread is responsible for arranging for no references, but
 	 * is as close as we can get for now.
 	 */
 	KASSERT(so->so_count == 0, ("soabort: so_count"));
 	KASSERT((so->so_state & SS_PROTOREF) == 0, ("soabort: SS_PROTOREF"));
 	KASSERT(so->so_state & SS_NOFDREF, ("soabort: !SS_NOFDREF"));
-	KASSERT((so->so_state & SQ_COMP) == 0, ("soabort: SQ_COMP"));
-	KASSERT((so->so_state & SQ_INCOMP) == 0, ("soabort: SQ_INCOMP"));
+	KASSERT(so->so_qstate == SQ_NONE, ("soabort: !SQ_NONE"));
 	VNET_SO_ASSERT(so);
 
 	if (so->so_proto->pr_usrreqs->pru_abort != NULL)
 		(*so->so_proto->pr_usrreqs->pru_abort)(so);
-	ACCEPT_LOCK();
 	SOCK_LOCK(so);
 	sofree(so);
 }
 
 int
 soaccept(struct socket *so, struct sockaddr **nam)
 {
 	int error;
 
 	SOCK_LOCK(so);
 	KASSERT((so->so_state & SS_NOFDREF) != 0, ("soaccept: !NOFDREF"));
 	so->so_state &= ~SS_NOFDREF;
 	SOCK_UNLOCK(so);
 
 	CURVNET_SET(so->so_vnet);
 	error = (*so->so_proto->pr_usrreqs->pru_accept)(so, nam);
 	CURVNET_RESTORE();
 	return (error);
 }
 
 int
 soconnect(struct socket *so, struct sockaddr *nam, struct thread *td)
 {
 
 	return (soconnectat(AT_FDCWD, so, nam, td));
 }
 
 int
 soconnectat(int fd, struct socket *so, struct sockaddr *nam, struct thread *td)
 {
 	int error;
 
 	if (so->so_options & SO_ACCEPTCONN)
 		return (EOPNOTSUPP);
 
 	CURVNET_SET(so->so_vnet);
 	/*
 	 * If protocol is connection-based, can only connect once.
 	 * Otherwise, if connected, try to disconnect first.  This allows
 	 * user to disconnect by connecting to, e.g., a null address.
 	 */
 	if (so->so_state & (SS_ISCONNECTED|SS_ISCONNECTING) &&
 	    ((so->so_proto->pr_flags & PR_CONNREQUIRED) ||
 	    (error = sodisconnect(so)))) {
 		error = EISCONN;
 	} else {
 		/*
 		 * Prevent accumulated error from previous connection from
 		 * biting us.
 		 */
 		so->so_error = 0;
 		if (fd == AT_FDCWD) {
 			error = (*so->so_proto->pr_usrreqs->pru_connect)(so,
 			    nam, td);
 		} else {
 			error = (*so->so_proto->pr_usrreqs->pru_connectat)(fd,
 			    so, nam, td);
 		}
 	}
 	CURVNET_RESTORE();
 
 	return (error);
 }
 
 int
 soconnect2(struct socket *so1, struct socket *so2)
 {
 	int error;
 
 	CURVNET_SET(so1->so_vnet);
 	error = (*so1->so_proto->pr_usrreqs->pru_connect2)(so1, so2);
 	CURVNET_RESTORE();
 	return (error);
 }
 
 int
 sodisconnect(struct socket *so)
 {
 	int error;
 
 	if ((so->so_state & SS_ISCONNECTED) == 0)
 		return (ENOTCONN);
 	if (so->so_state & SS_ISDISCONNECTING)
 		return (EALREADY);
 	VNET_SO_ASSERT(so);
 	error = (*so->so_proto->pr_usrreqs->pru_disconnect)(so);
 	return (error);
 }
 
 #define	SBLOCKWAIT(f)	(((f) & MSG_DONTWAIT) ? 0 : SBL_WAIT)
 
 int
 sosend_dgram(struct socket *so, struct sockaddr *addr, struct uio *uio,
     struct mbuf *top, struct mbuf *control, int flags, struct thread *td)
 {
 	long space;
 	ssize_t resid;
 	int clen = 0, error, dontroute;
 
 	KASSERT(so->so_type == SOCK_DGRAM, ("sosend_dgram: !SOCK_DGRAM"));
 	KASSERT(so->so_proto->pr_flags & PR_ATOMIC,
 	    ("sosend_dgram: !PR_ATOMIC"));
 
 	if (uio != NULL)
 		resid = uio->uio_resid;
 	else
 		resid = top->m_pkthdr.len;
 	/*
 	 * In theory resid should be unsigned.  However, space must be
 	 * signed, as it might be less than 0 if we over-committed, and we
 	 * must use a signed comparison of space and resid.  On the other
 	 * hand, a negative resid causes us to loop sending 0-length
 	 * segments to the protocol.
 	 */
 	if (resid < 0) {
 		error = EINVAL;
 		goto out;
 	}
 
 	dontroute =
 	    (flags & MSG_DONTROUTE) && (so->so_options & SO_DONTROUTE) == 0;
 	if (td != NULL)
 		td->td_ru.ru_msgsnd++;
 	if (control != NULL)
 		clen = control->m_len;
 
 	SOCKBUF_LOCK(&so->so_snd);
 	if (so->so_snd.sb_state & SBS_CANTSENDMORE) {
 		SOCKBUF_UNLOCK(&so->so_snd);
 		error = EPIPE;
 		goto out;
 	}
 	if (so->so_error) {
 		error = so->so_error;
 		so->so_error = 0;
 		SOCKBUF_UNLOCK(&so->so_snd);
 		goto out;
 	}
 	if ((so->so_state & SS_ISCONNECTED) == 0) {
 		/*
 		 * `sendto' and `sendmsg' is allowed on a connection-based
 		 * socket if it supports implied connect.  Return ENOTCONN if
 		 * not connected and no address is supplied.
 		 */
 		if ((so->so_proto->pr_flags & PR_CONNREQUIRED) &&
 		    (so->so_proto->pr_flags & PR_IMPLOPCL) == 0) {
 			if ((so->so_state & SS_ISCONFIRMING) == 0 &&
 			    !(resid == 0 && clen != 0)) {
 				SOCKBUF_UNLOCK(&so->so_snd);
 				error = ENOTCONN;
 				goto out;
 			}
 		} else if (addr == NULL) {
 			if (so->so_proto->pr_flags & PR_CONNREQUIRED)
 				error = ENOTCONN;
 			else
 				error = EDESTADDRREQ;
 			SOCKBUF_UNLOCK(&so->so_snd);
 			goto out;
 		}
 	}
 
 	/*
 	 * Do we need MSG_OOB support in SOCK_DGRAM?  Signs here may be a
 	 * problem and need fixing.
 	 */
 	space = sbspace(&so->so_snd);
 	if (flags & MSG_OOB)
 		space += 1024;
 	space -= clen;
 	SOCKBUF_UNLOCK(&so->so_snd);
 	if (resid > space) {
 		error = EMSGSIZE;
 		goto out;
 	}
 	if (uio == NULL) {
 		resid = 0;
 		if (flags & MSG_EOR)
 			top->m_flags |= M_EOR;
 	} else {
 		/*
 		 * Copy the data from userland into a mbuf chain.
 		 * If no data is to be copied in, a single empty mbuf
 		 * is returned.
 		 */
 		top = m_uiotombuf(uio, M_WAITOK, space, max_hdr,
 		    (M_PKTHDR | ((flags & MSG_EOR) ? M_EOR : 0)));
 		if (top == NULL) {
 			error = EFAULT;	/* only possible error */
 			goto out;
 		}
 		space -= resid - uio->uio_resid;
 		resid = uio->uio_resid;
 	}
 	KASSERT(resid == 0, ("sosend_dgram: resid != 0"));
 	/*
 	 * XXXRW: Frobbing SO_DONTROUTE here is even worse without sblock
 	 * than with.
 	 */
 	if (dontroute) {
 		SOCK_LOCK(so);
 		so->so_options |= SO_DONTROUTE;
 		SOCK_UNLOCK(so);
 	}
 	/*
 	 * XXX all the SBS_CANTSENDMORE checks previously done could be out
 	 * of date.  We could have received a reset packet in an interrupt or
 	 * maybe we slept while doing page faults in uiomove() etc.  We could
 	 * probably recheck again inside the locking protection here, but
 	 * there are probably other places that this also happens.  We must
 	 * rethink this.
 	 */
 	VNET_SO_ASSERT(so);
 	error = (*so->so_proto->pr_usrreqs->pru_send)(so,
 	    (flags & MSG_OOB) ? PRUS_OOB :
 	/*
 	 * If the user set MSG_EOF, the protocol understands this flag and
 	 * nothing left to send then use PRU_SEND_EOF instead of PRU_SEND.
 	 */
 	    ((flags & MSG_EOF) &&
 	     (so->so_proto->pr_flags & PR_IMPLOPCL) &&
 	     (resid <= 0)) ?
 		PRUS_EOF :
 		/* If there is more to send set PRUS_MORETOCOME */
 		(flags & MSG_MORETOCOME) ||
 		(resid > 0 && space > 0) ? PRUS_MORETOCOME : 0,
 		top, addr, control, td);
 	if (dontroute) {
 		SOCK_LOCK(so);
 		so->so_options &= ~SO_DONTROUTE;
 		SOCK_UNLOCK(so);
 	}
 	clen = 0;
 	control = NULL;
 	top = NULL;
 out:
 	if (top != NULL)
 		m_freem(top);
 	if (control != NULL)
 		m_freem(control);
 	return (error);
 }
 
 /*
  * Send on a socket.  If send must go all at once and message is larger than
  * send buffering, then hard error.  Lock against other senders.  If must go
  * all at once and not enough room now, then inform user that this would
  * block and do nothing.  Otherwise, if nonblocking, send as much as
  * possible.  The data to be sent is described by "uio" if nonzero, otherwise
  * by the mbuf chain "top" (which must be null if uio is not).  Data provided
  * in mbuf chain must be small enough to send all at once.
  *
  * Returns nonzero on error, timeout or signal; callers must check for short
  * counts if EINTR/ERESTART are returned.  Data and control buffers are freed
  * on return.
  */
 int
 sosend_generic(struct socket *so, struct sockaddr *addr, struct uio *uio,
     struct mbuf *top, struct mbuf *control, int flags, struct thread *td)
 {
 	long space;
 	ssize_t resid;
 	int clen = 0, error, dontroute;
 	int atomic = sosendallatonce(so) || top;
 
 	if (uio != NULL)
 		resid = uio->uio_resid;
 	else
 		resid = top->m_pkthdr.len;
 	/*
 	 * In theory resid should be unsigned.  However, space must be
 	 * signed, as it might be less than 0 if we over-committed, and we
 	 * must use a signed comparison of space and resid.  On the other
 	 * hand, a negative resid causes us to loop sending 0-length
 	 * segments to the protocol.
 	 *
 	 * Also check to make sure that MSG_EOR isn't used on SOCK_STREAM
 	 * type sockets since that's an error.
 	 */
 	if (resid < 0 || (so->so_type == SOCK_STREAM && (flags & MSG_EOR))) {
 		error = EINVAL;
 		goto out;
 	}
 
 	dontroute =
 	    (flags & MSG_DONTROUTE) && (so->so_options & SO_DONTROUTE) == 0 &&
 	    (so->so_proto->pr_flags & PR_ATOMIC);
 	if (td != NULL)
 		td->td_ru.ru_msgsnd++;
 	if (control != NULL)
 		clen = control->m_len;
 
 	error = sblock(&so->so_snd, SBLOCKWAIT(flags));
 	if (error)
 		goto out;
 
 restart:
 	do {
 		SOCKBUF_LOCK(&so->so_snd);
 		if (so->so_snd.sb_state & SBS_CANTSENDMORE) {
 			SOCKBUF_UNLOCK(&so->so_snd);
 			error = EPIPE;
 			goto release;
 		}
 		if (so->so_error) {
 			error = so->so_error;
 			so->so_error = 0;
 			SOCKBUF_UNLOCK(&so->so_snd);
 			goto release;
 		}
 		if ((so->so_state & SS_ISCONNECTED) == 0) {
 			/*
 			 * `sendto' and `sendmsg' is allowed on a connection-
 			 * based socket if it supports implied connect.
 			 * Return ENOTCONN if not connected and no address is
 			 * supplied.
 			 */
 			if ((so->so_proto->pr_flags & PR_CONNREQUIRED) &&
 			    (so->so_proto->pr_flags & PR_IMPLOPCL) == 0) {
 				if ((so->so_state & SS_ISCONFIRMING) == 0 &&
 				    !(resid == 0 && clen != 0)) {
 					SOCKBUF_UNLOCK(&so->so_snd);
 					error = ENOTCONN;
 					goto release;
 				}
 			} else if (addr == NULL) {
 				SOCKBUF_UNLOCK(&so->so_snd);
 				if (so->so_proto->pr_flags & PR_CONNREQUIRED)
 					error = ENOTCONN;
 				else
 					error = EDESTADDRREQ;
 				goto release;
 			}
 		}
 		space = sbspace(&so->so_snd);
 		if (flags & MSG_OOB)
 			space += 1024;
 		if ((atomic && resid > so->so_snd.sb_hiwat) ||
 		    clen > so->so_snd.sb_hiwat) {
 			SOCKBUF_UNLOCK(&so->so_snd);
 			error = EMSGSIZE;
 			goto release;
 		}
 		if (space < resid + clen &&
 		    (atomic || space < so->so_snd.sb_lowat || space < clen)) {
 			if ((so->so_state & SS_NBIO) || (flags & MSG_NBIO)) {
 				SOCKBUF_UNLOCK(&so->so_snd);
 				error = EWOULDBLOCK;
 				goto release;
 			}
 			error = sbwait(&so->so_snd);
 			SOCKBUF_UNLOCK(&so->so_snd);
 			if (error)
 				goto release;
 			goto restart;
 		}
 		SOCKBUF_UNLOCK(&so->so_snd);
 		space -= clen;
 		do {
 			if (uio == NULL) {
 				resid = 0;
 				if (flags & MSG_EOR)
 					top->m_flags |= M_EOR;
 			} else {
 				/*
 				 * Copy the data from userland into a mbuf
 				 * chain.  If resid is 0, which can happen
 				 * only if we have control to send, then
 				 * a single empty mbuf is returned.  This
 				 * is a workaround to prevent protocol send
 				 * methods to panic.
 				 */
 				top = m_uiotombuf(uio, M_WAITOK, space,
 				    (atomic ? max_hdr : 0),
 				    (atomic ? M_PKTHDR : 0) |
 				    ((flags & MSG_EOR) ? M_EOR : 0));
 				if (top == NULL) {
 					error = EFAULT; /* only possible error */
 					goto release;
 				}
 				space -= resid - uio->uio_resid;
 				resid = uio->uio_resid;
 			}
 			if (dontroute) {
 				SOCK_LOCK(so);
 				so->so_options |= SO_DONTROUTE;
 				SOCK_UNLOCK(so);
 			}
 			/*
 			 * XXX all the SBS_CANTSENDMORE checks previously
 			 * done could be out of date.  We could have received
 			 * a reset packet in an interrupt or maybe we slept
 			 * while doing page faults in uiomove() etc.  We
 			 * could probably recheck again inside the locking
 			 * protection here, but there are probably other
 			 * places that this also happens.  We must rethink
 			 * this.
 			 */
 			VNET_SO_ASSERT(so);
 			error = (*so->so_proto->pr_usrreqs->pru_send)(so,
 			    (flags & MSG_OOB) ? PRUS_OOB :
 			/*
 			 * If the user set MSG_EOF, the protocol understands
 			 * this flag and nothing left to send then use
 			 * PRU_SEND_EOF instead of PRU_SEND.
 			 */
 			    ((flags & MSG_EOF) &&
 			     (so->so_proto->pr_flags & PR_IMPLOPCL) &&
 			     (resid <= 0)) ?
 				PRUS_EOF :
 			/* If there is more to send set PRUS_MORETOCOME. */
 			    (flags & MSG_MORETOCOME) ||
 			    (resid > 0 && space > 0) ? PRUS_MORETOCOME : 0,
 			    top, addr, control, td);
 			if (dontroute) {
 				SOCK_LOCK(so);
 				so->so_options &= ~SO_DONTROUTE;
 				SOCK_UNLOCK(so);
 			}
 			clen = 0;
 			control = NULL;
 			top = NULL;
 			if (error)
 				goto release;
 		} while (resid && space > 0);
 	} while (resid);
 
 release:
 	sbunlock(&so->so_snd);
 out:
 	if (top != NULL)
 		m_freem(top);
 	if (control != NULL)
 		m_freem(control);
 	return (error);
 }
 
 int
 sosend(struct socket *so, struct sockaddr *addr, struct uio *uio,
     struct mbuf *top, struct mbuf *control, int flags, struct thread *td)
 {
 	int error;
 
 	CURVNET_SET(so->so_vnet);
 	error = so->so_proto->pr_usrreqs->pru_sosend(so, addr, uio, top,
 	    control, flags, td);
 	CURVNET_RESTORE();
 	return (error);
 }
 
 /*
  * The part of soreceive() that implements reading non-inline out-of-band
  * data from a socket.  For more complete comments, see soreceive(), from
  * which this code originated.
  *
  * Note that soreceive_rcvoob(), unlike the remainder of soreceive(), is
  * unable to return an mbuf chain to the caller.
  */
 static int
 soreceive_rcvoob(struct socket *so, struct uio *uio, int flags)
 {
 	struct protosw *pr = so->so_proto;
 	struct mbuf *m;
 	int error;
 
 	KASSERT(flags & MSG_OOB, ("soreceive_rcvoob: (flags & MSG_OOB) == 0"));
 	VNET_SO_ASSERT(so);
 
 	m = m_get(M_WAITOK, MT_DATA);
 	error = (*pr->pr_usrreqs->pru_rcvoob)(so, m, flags & MSG_PEEK);
 	if (error)
 		goto bad;
 	do {
 		error = uiomove(mtod(m, void *),
 		    (int) min(uio->uio_resid, m->m_len), uio);
 		m = m_free(m);
 	} while (uio->uio_resid && error == 0 && m);
 bad:
 	if (m != NULL)
 		m_freem(m);
 	return (error);
 }
 
 /*
  * Following replacement or removal of the first mbuf on the first mbuf chain
  * of a socket buffer, push necessary state changes back into the socket
  * buffer so that other consumers see the values consistently.  'nextrecord'
  * is the callers locally stored value of the original value of
  * sb->sb_mb->m_nextpkt which must be restored when the lead mbuf changes.
  * NOTE: 'nextrecord' may be NULL.
  */
 static __inline void
 sockbuf_pushsync(struct sockbuf *sb, struct mbuf *nextrecord)
 {
 
 	SOCKBUF_LOCK_ASSERT(sb);
 	/*
 	 * First, update for the new value of nextrecord.  If necessary, make
 	 * it the first record.
 	 */
 	if (sb->sb_mb != NULL)
 		sb->sb_mb->m_nextpkt = nextrecord;
 	else
 		sb->sb_mb = nextrecord;
 
 	/*
 	 * Now update any dependent socket buffer fields to reflect the new
 	 * state.  This is an expanded inline of SB_EMPTY_FIXUP(), with the
 	 * addition of a second clause that takes care of the case where
 	 * sb_mb has been updated, but remains the last record.
 	 */
 	if (sb->sb_mb == NULL) {
 		sb->sb_mbtail = NULL;
 		sb->sb_lastrecord = NULL;
 	} else if (sb->sb_mb->m_nextpkt == NULL)
 		sb->sb_lastrecord = sb->sb_mb;
 }
 
 /*
  * Implement receive operations on a socket.  We depend on the way that
  * records are added to the sockbuf by sbappend.  In particular, each record
  * (mbufs linked through m_next) must begin with an address if the protocol
  * so specifies, followed by an optional mbuf or mbufs containing ancillary
  * data, and then zero or more mbufs of data.  In order to allow parallelism
  * between network receive and copying to user space, as well as avoid
  * sleeping with a mutex held, we release the socket buffer mutex during the
  * user space copy.  Although the sockbuf is locked, new data may still be
  * appended, and thus we must maintain consistency of the sockbuf during that
  * time.
  *
  * The caller may receive the data as a single mbuf chain by supplying an
  * mbuf **mp0 for use in returning the chain.  The uio is then used only for
  * the count in uio_resid.
  */
 int
 soreceive_generic(struct socket *so, struct sockaddr **psa, struct uio *uio,
     struct mbuf **mp0, struct mbuf **controlp, int *flagsp)
 {
 	struct mbuf *m, **mp;
 	int flags, error, offset;
 	ssize_t len;
 	struct protosw *pr = so->so_proto;
 	struct mbuf *nextrecord;
 	int moff, type = 0;
 	ssize_t orig_resid = uio->uio_resid;
 
 	mp = mp0;
 	if (psa != NULL)
 		*psa = NULL;
 	if (controlp != NULL)
 		*controlp = NULL;
 	if (flagsp != NULL)
 		flags = *flagsp &~ MSG_EOR;
 	else
 		flags = 0;
 	if (flags & MSG_OOB)
 		return (soreceive_rcvoob(so, uio, flags));
 	if (mp != NULL)
 		*mp = NULL;
 	if ((pr->pr_flags & PR_WANTRCVD) && (so->so_state & SS_ISCONFIRMING)
 	    && uio->uio_resid) {
 		VNET_SO_ASSERT(so);
 		(*pr->pr_usrreqs->pru_rcvd)(so, 0);
 	}
 
 	error = sblock(&so->so_rcv, SBLOCKWAIT(flags));
 	if (error)
 		return (error);
 
 restart:
 	SOCKBUF_LOCK(&so->so_rcv);
 	m = so->so_rcv.sb_mb;
 	/*
 	 * If we have less data than requested, block awaiting more (subject
 	 * to any timeout) if:
 	 *   1. the current count is less than the low water mark, or
 	 *   2. MSG_DONTWAIT is not set
 	 */
 	if (m == NULL || (((flags & MSG_DONTWAIT) == 0 &&
 	    sbavail(&so->so_rcv) < uio->uio_resid) &&
 	    sbavail(&so->so_rcv) < so->so_rcv.sb_lowat &&
 	    m->m_nextpkt == NULL && (pr->pr_flags & PR_ATOMIC) == 0)) {
 		KASSERT(m != NULL || !sbavail(&so->so_rcv),
 		    ("receive: m == %p sbavail == %u",
 		    m, sbavail(&so->so_rcv)));
 		if (so->so_error) {
 			if (m != NULL)
 				goto dontblock;
 			error = so->so_error;
 			if ((flags & MSG_PEEK) == 0)
 				so->so_error = 0;
 			SOCKBUF_UNLOCK(&so->so_rcv);
 			goto release;
 		}
 		SOCKBUF_LOCK_ASSERT(&so->so_rcv);
 		if (so->so_rcv.sb_state & SBS_CANTRCVMORE) {
 			if (m == NULL) {
 				SOCKBUF_UNLOCK(&so->so_rcv);
 				goto release;
 			} else
 				goto dontblock;
 		}
 		for (; m != NULL; m = m->m_next)
 			if (m->m_type == MT_OOBDATA  || (m->m_flags & M_EOR)) {
 				m = so->so_rcv.sb_mb;
 				goto dontblock;
 			}
 		if ((so->so_state & (SS_ISCONNECTED|SS_ISCONNECTING)) == 0 &&
 		    (so->so_proto->pr_flags & PR_CONNREQUIRED)) {
 			SOCKBUF_UNLOCK(&so->so_rcv);
 			error = ENOTCONN;
 			goto release;
 		}
 		if (uio->uio_resid == 0) {
 			SOCKBUF_UNLOCK(&so->so_rcv);
 			goto release;
 		}
 		if ((so->so_state & SS_NBIO) ||
 		    (flags & (MSG_DONTWAIT|MSG_NBIO))) {
 			SOCKBUF_UNLOCK(&so->so_rcv);
 			error = EWOULDBLOCK;
 			goto release;
 		}
 		SBLASTRECORDCHK(&so->so_rcv);
 		SBLASTMBUFCHK(&so->so_rcv);
 		error = sbwait(&so->so_rcv);
 		SOCKBUF_UNLOCK(&so->so_rcv);
 		if (error)
 			goto release;
 		goto restart;
 	}
 dontblock:
 	/*
 	 * From this point onward, we maintain 'nextrecord' as a cache of the
 	 * pointer to the next record in the socket buffer.  We must keep the
 	 * various socket buffer pointers and local stack versions of the
 	 * pointers in sync, pushing out modifications before dropping the
 	 * socket buffer mutex, and re-reading them when picking it up.
 	 *
 	 * Otherwise, we will race with the network stack appending new data
 	 * or records onto the socket buffer by using inconsistent/stale
 	 * versions of the field, possibly resulting in socket buffer
 	 * corruption.
 	 *
 	 * By holding the high-level sblock(), we prevent simultaneous
 	 * readers from pulling off the front of the socket buffer.
 	 */
 	SOCKBUF_LOCK_ASSERT(&so->so_rcv);
 	if (uio->uio_td)
 		uio->uio_td->td_ru.ru_msgrcv++;
 	KASSERT(m == so->so_rcv.sb_mb, ("soreceive: m != so->so_rcv.sb_mb"));
 	SBLASTRECORDCHK(&so->so_rcv);
 	SBLASTMBUFCHK(&so->so_rcv);
 	nextrecord = m->m_nextpkt;
 	if (pr->pr_flags & PR_ADDR) {
 		KASSERT(m->m_type == MT_SONAME,
 		    ("m->m_type == %d", m->m_type));
 		orig_resid = 0;
 		if (psa != NULL)
 			*psa = sodupsockaddr(mtod(m, struct sockaddr *),
 			    M_NOWAIT);
 		if (flags & MSG_PEEK) {
 			m = m->m_next;
 		} else {
 			sbfree(&so->so_rcv, m);
 			so->so_rcv.sb_mb = m_free(m);
 			m = so->so_rcv.sb_mb;
 			sockbuf_pushsync(&so->so_rcv, nextrecord);
 		}
 	}
 
 	/*
 	 * Process one or more MT_CONTROL mbufs present before any data mbufs
 	 * in the first mbuf chain on the socket buffer.  If MSG_PEEK, we
 	 * just copy the data; if !MSG_PEEK, we call into the protocol to
 	 * perform externalization (or freeing if controlp == NULL).
 	 */
 	if (m != NULL && m->m_type == MT_CONTROL) {
 		struct mbuf *cm = NULL, *cmn;
 		struct mbuf **cme = &cm;
 
 		do {
 			if (flags & MSG_PEEK) {
 				if (controlp != NULL) {
 					*controlp = m_copym(m, 0, m->m_len,
 					    M_NOWAIT);
 					controlp = &(*controlp)->m_next;
 				}
 				m = m->m_next;
 			} else {
 				sbfree(&so->so_rcv, m);
 				so->so_rcv.sb_mb = m->m_next;
 				m->m_next = NULL;
 				*cme = m;
 				cme = &(*cme)->m_next;
 				m = so->so_rcv.sb_mb;
 			}
 		} while (m != NULL && m->m_type == MT_CONTROL);
 		if ((flags & MSG_PEEK) == 0)
 			sockbuf_pushsync(&so->so_rcv, nextrecord);
 		while (cm != NULL) {
 			cmn = cm->m_next;
 			cm->m_next = NULL;
 			if (pr->pr_domain->dom_externalize != NULL) {
 				SOCKBUF_UNLOCK(&so->so_rcv);
 				VNET_SO_ASSERT(so);
 				error = (*pr->pr_domain->dom_externalize)
 				    (cm, controlp, flags);
 				SOCKBUF_LOCK(&so->so_rcv);
 			} else if (controlp != NULL)
 				*controlp = cm;
 			else
 				m_freem(cm);
 			if (controlp != NULL) {
 				orig_resid = 0;
 				while (*controlp != NULL)
 					controlp = &(*controlp)->m_next;
 			}
 			cm = cmn;
 		}
 		if (m != NULL)
 			nextrecord = so->so_rcv.sb_mb->m_nextpkt;
 		else
 			nextrecord = so->so_rcv.sb_mb;
 		orig_resid = 0;
 	}
 	if (m != NULL) {
 		if ((flags & MSG_PEEK) == 0) {
 			KASSERT(m->m_nextpkt == nextrecord,
 			    ("soreceive: post-control, nextrecord !sync"));
 			if (nextrecord == NULL) {
 				KASSERT(so->so_rcv.sb_mb == m,
 				    ("soreceive: post-control, sb_mb!=m"));
 				KASSERT(so->so_rcv.sb_lastrecord == m,
 				    ("soreceive: post-control, lastrecord!=m"));
 			}
 		}
 		type = m->m_type;
 		if (type == MT_OOBDATA)
 			flags |= MSG_OOB;
 	} else {
 		if ((flags & MSG_PEEK) == 0) {
 			KASSERT(so->so_rcv.sb_mb == nextrecord,
 			    ("soreceive: sb_mb != nextrecord"));
 			if (so->so_rcv.sb_mb == NULL) {
 				KASSERT(so->so_rcv.sb_lastrecord == NULL,
 				    ("soreceive: sb_lastercord != NULL"));
 			}
 		}
 	}
 	SOCKBUF_LOCK_ASSERT(&so->so_rcv);
 	SBLASTRECORDCHK(&so->so_rcv);
 	SBLASTMBUFCHK(&so->so_rcv);
 
 	/*
 	 * Now continue to read any data mbufs off of the head of the socket
 	 * buffer until the read request is satisfied.  Note that 'type' is
 	 * used to store the type of any mbuf reads that have happened so far
 	 * such that soreceive() can stop reading if the type changes, which
 	 * causes soreceive() to return only one of regular data and inline
 	 * out-of-band data in a single socket receive operation.
 	 */
 	moff = 0;
 	offset = 0;
 	while (m != NULL && !(m->m_flags & M_NOTAVAIL) && uio->uio_resid > 0
 	    && error == 0) {
 		/*
 		 * If the type of mbuf has changed since the last mbuf
 		 * examined ('type'), end the receive operation.
 		 */
 		SOCKBUF_LOCK_ASSERT(&so->so_rcv);
 		if (m->m_type == MT_OOBDATA || m->m_type == MT_CONTROL) {
 			if (type != m->m_type)
 				break;
 		} else if (type == MT_OOBDATA)
 			break;
 		else
 		    KASSERT(m->m_type == MT_DATA,
 			("m->m_type == %d", m->m_type));
 		so->so_rcv.sb_state &= ~SBS_RCVATMARK;
 		len = uio->uio_resid;
 		if (so->so_oobmark && len > so->so_oobmark - offset)
 			len = so->so_oobmark - offset;
 		if (len > m->m_len - moff)
 			len = m->m_len - moff;
 		/*
 		 * If mp is set, just pass back the mbufs.  Otherwise copy
 		 * them out via the uio, then free.  Sockbuf must be
 		 * consistent here (points to current mbuf, it points to next
 		 * record) when we drop priority; we must note any additions
 		 * to the sockbuf when we block interrupts again.
 		 */
 		if (mp == NULL) {
 			SOCKBUF_LOCK_ASSERT(&so->so_rcv);
 			SBLASTRECORDCHK(&so->so_rcv);
 			SBLASTMBUFCHK(&so->so_rcv);
 			SOCKBUF_UNLOCK(&so->so_rcv);
 			error = uiomove(mtod(m, char *) + moff, (int)len, uio);
 			SOCKBUF_LOCK(&so->so_rcv);
 			if (error) {
 				/*
 				 * The MT_SONAME mbuf has already been removed
 				 * from the record, so it is necessary to
 				 * remove the data mbufs, if any, to preserve
 				 * the invariant in the case of PR_ADDR that
 				 * requires MT_SONAME mbufs at the head of
 				 * each record.
 				 */
 				if (pr->pr_flags & PR_ATOMIC &&
 				    ((flags & MSG_PEEK) == 0))
 					(void)sbdroprecord_locked(&so->so_rcv);
 				SOCKBUF_UNLOCK(&so->so_rcv);
 				goto release;
 			}
 		} else
 			uio->uio_resid -= len;
 		SOCKBUF_LOCK_ASSERT(&so->so_rcv);
 		if (len == m->m_len - moff) {
 			if (m->m_flags & M_EOR)
 				flags |= MSG_EOR;
 			if (flags & MSG_PEEK) {
 				m = m->m_next;
 				moff = 0;
 			} else {
 				nextrecord = m->m_nextpkt;
 				sbfree(&so->so_rcv, m);
 				if (mp != NULL) {
 					m->m_nextpkt = NULL;
 					*mp = m;
 					mp = &m->m_next;
 					so->so_rcv.sb_mb = m = m->m_next;
 					*mp = NULL;
 				} else {
 					so->so_rcv.sb_mb = m_free(m);
 					m = so->so_rcv.sb_mb;
 				}
 				sockbuf_pushsync(&so->so_rcv, nextrecord);
 				SBLASTRECORDCHK(&so->so_rcv);
 				SBLASTMBUFCHK(&so->so_rcv);
 			}
 		} else {
 			if (flags & MSG_PEEK)
 				moff += len;
 			else {
 				if (mp != NULL) {
 					if (flags & MSG_DONTWAIT) {
 						*mp = m_copym(m, 0, len,
 						    M_NOWAIT);
 						if (*mp == NULL) {
 							/*
 							 * m_copym() couldn't
 							 * allocate an mbuf.
 							 * Adjust uio_resid back
 							 * (it was adjusted
 							 * down by len bytes,
 							 * which we didn't end
 							 * up "copying" over).
 							 */
 							uio->uio_resid += len;
 							break;
 						}
 					} else {
 						SOCKBUF_UNLOCK(&so->so_rcv);
 						*mp = m_copym(m, 0, len,
 						    M_WAITOK);
 						SOCKBUF_LOCK(&so->so_rcv);
 					}
 				}
 				sbcut_locked(&so->so_rcv, len);
 			}
 		}
 		SOCKBUF_LOCK_ASSERT(&so->so_rcv);
 		if (so->so_oobmark) {
 			if ((flags & MSG_PEEK) == 0) {
 				so->so_oobmark -= len;
 				if (so->so_oobmark == 0) {
 					so->so_rcv.sb_state |= SBS_RCVATMARK;
 					break;
 				}
 			} else {
 				offset += len;
 				if (offset == so->so_oobmark)
 					break;
 			}
 		}
 		if (flags & MSG_EOR)
 			break;
 		/*
 		 * If the MSG_WAITALL flag is set (for non-atomic socket), we
 		 * must not quit until "uio->uio_resid == 0" or an error
 		 * termination.  If a signal/timeout occurs, return with a
 		 * short count but without error.  Keep sockbuf locked
 		 * against other readers.
 		 */
 		while (flags & MSG_WAITALL && m == NULL && uio->uio_resid > 0 &&
 		    !sosendallatonce(so) && nextrecord == NULL) {
 			SOCKBUF_LOCK_ASSERT(&so->so_rcv);
 			if (so->so_error ||
 			    so->so_rcv.sb_state & SBS_CANTRCVMORE)
 				break;
 			/*
 			 * Notify the protocol that some data has been
 			 * drained before blocking.
 			 */
 			if (pr->pr_flags & PR_WANTRCVD) {
 				SOCKBUF_UNLOCK(&so->so_rcv);
 				VNET_SO_ASSERT(so);
 				(*pr->pr_usrreqs->pru_rcvd)(so, flags);
 				SOCKBUF_LOCK(&so->so_rcv);
 			}
 			SBLASTRECORDCHK(&so->so_rcv);
 			SBLASTMBUFCHK(&so->so_rcv);
 			/*
 			 * We could receive some data while was notifying
 			 * the protocol. Skip blocking in this case.
 			 */
 			if (so->so_rcv.sb_mb == NULL) {
 				error = sbwait(&so->so_rcv);
 				if (error) {
 					SOCKBUF_UNLOCK(&so->so_rcv);
 					goto release;
 				}
 			}
 			m = so->so_rcv.sb_mb;
 			if (m != NULL)
 				nextrecord = m->m_nextpkt;
 		}
 	}
 
 	SOCKBUF_LOCK_ASSERT(&so->so_rcv);
 	if (m != NULL && pr->pr_flags & PR_ATOMIC) {
 		flags |= MSG_TRUNC;
 		if ((flags & MSG_PEEK) == 0)
 			(void) sbdroprecord_locked(&so->so_rcv);
 	}
 	if ((flags & MSG_PEEK) == 0) {
 		if (m == NULL) {
 			/*
 			 * First part is an inline SB_EMPTY_FIXUP().  Second
 			 * part makes sure sb_lastrecord is up-to-date if
 			 * there is still data in the socket buffer.
 			 */
 			so->so_rcv.sb_mb = nextrecord;
 			if (so->so_rcv.sb_mb == NULL) {
 				so->so_rcv.sb_mbtail = NULL;
 				so->so_rcv.sb_lastrecord = NULL;
 			} else if (nextrecord->m_nextpkt == NULL)
 				so->so_rcv.sb_lastrecord = nextrecord;
 		}
 		SBLASTRECORDCHK(&so->so_rcv);
 		SBLASTMBUFCHK(&so->so_rcv);
 		/*
 		 * If soreceive() is being done from the socket callback,
 		 * then don't need to generate ACK to peer to update window,
 		 * since ACK will be generated on return to TCP.
 		 */
 		if (!(flags & MSG_SOCALLBCK) &&
 		    (pr->pr_flags & PR_WANTRCVD)) {
 			SOCKBUF_UNLOCK(&so->so_rcv);
 			VNET_SO_ASSERT(so);
 			(*pr->pr_usrreqs->pru_rcvd)(so, flags);
 			SOCKBUF_LOCK(&so->so_rcv);
 		}
 	}
 	SOCKBUF_LOCK_ASSERT(&so->so_rcv);
 	if (orig_resid == uio->uio_resid && orig_resid &&
 	    (flags & MSG_EOR) == 0 && (so->so_rcv.sb_state & SBS_CANTRCVMORE) == 0) {
 		SOCKBUF_UNLOCK(&so->so_rcv);
 		goto restart;
 	}
 	SOCKBUF_UNLOCK(&so->so_rcv);
 
 	if (flagsp != NULL)
 		*flagsp |= flags;
 release:
 	sbunlock(&so->so_rcv);
 	return (error);
 }
 
 /*
  * Optimized version of soreceive() for stream (TCP) sockets.
  * XXXAO: (MSG_WAITALL | MSG_PEEK) isn't properly handled.
  */
 int
 soreceive_stream(struct socket *so, struct sockaddr **psa, struct uio *uio,
     struct mbuf **mp0, struct mbuf **controlp, int *flagsp)
 {
 	int len = 0, error = 0, flags, oresid;
 	struct sockbuf *sb;
 	struct mbuf *m, *n = NULL;
 
 	/* We only do stream sockets. */
 	if (so->so_type != SOCK_STREAM)
 		return (EINVAL);
 	if (psa != NULL)
 		*psa = NULL;
 	if (controlp != NULL)
 		return (EINVAL);
 	if (flagsp != NULL)
 		flags = *flagsp &~ MSG_EOR;
 	else
 		flags = 0;
 	if (flags & MSG_OOB)
 		return (soreceive_rcvoob(so, uio, flags));
 	if (mp0 != NULL)
 		*mp0 = NULL;
 
 	sb = &so->so_rcv;
 
 	/* Prevent other readers from entering the socket. */
 	error = sblock(sb, SBLOCKWAIT(flags));
 	if (error)
 		goto out;
 	SOCKBUF_LOCK(sb);
 
 	/* Easy one, no space to copyout anything. */
 	if (uio->uio_resid == 0) {
 		error = EINVAL;
 		goto out;
 	}
 	oresid = uio->uio_resid;
 
 	/* We will never ever get anything unless we are or were connected. */
 	if (!(so->so_state & (SS_ISCONNECTED|SS_ISDISCONNECTED))) {
 		error = ENOTCONN;
 		goto out;
 	}
 
 restart:
 	SOCKBUF_LOCK_ASSERT(&so->so_rcv);
 
 	/* Abort if socket has reported problems. */
 	if (so->so_error) {
 		if (sbavail(sb) > 0)
 			goto deliver;
 		if (oresid > uio->uio_resid)
 			goto out;
 		error = so->so_error;
 		if (!(flags & MSG_PEEK))
 			so->so_error = 0;
 		goto out;
 	}
 
 	/* Door is closed.  Deliver what is left, if any. */
 	if (sb->sb_state & SBS_CANTRCVMORE) {
 		if (sbavail(sb) > 0)
 			goto deliver;
 		else
 			goto out;
 	}
 
 	/* Socket buffer is empty and we shall not block. */
 	if (sbavail(sb) == 0 &&
 	    ((so->so_state & SS_NBIO) || (flags & (MSG_DONTWAIT|MSG_NBIO)))) {
 		error = EAGAIN;
 		goto out;
 	}
 
 	/* Socket buffer got some data that we shall deliver now. */
 	if (sbavail(sb) > 0 && !(flags & MSG_WAITALL) &&
 	    ((so->so_state & SS_NBIO) ||
 	     (flags & (MSG_DONTWAIT|MSG_NBIO)) ||
 	     sbavail(sb) >= sb->sb_lowat ||
 	     sbavail(sb) >= uio->uio_resid ||
 	     sbavail(sb) >= sb->sb_hiwat) ) {
 		goto deliver;
 	}
 
 	/* On MSG_WAITALL we must wait until all data or error arrives. */
 	if ((flags & MSG_WAITALL) &&
 	    (sbavail(sb) >= uio->uio_resid || sbavail(sb) >= sb->sb_hiwat))
 		goto deliver;
 
 	/*
 	 * Wait and block until (more) data comes in.
 	 * NB: Drops the sockbuf lock during wait.
 	 */
 	error = sbwait(sb);
 	if (error)
 		goto out;
 	goto restart;
 
 deliver:
 	SOCKBUF_LOCK_ASSERT(&so->so_rcv);
 	KASSERT(sbavail(sb) > 0, ("%s: sockbuf empty", __func__));
 	KASSERT(sb->sb_mb != NULL, ("%s: sb_mb == NULL", __func__));
 
 	/* Statistics. */
 	if (uio->uio_td)
 		uio->uio_td->td_ru.ru_msgrcv++;
 
 	/* Fill uio until full or current end of socket buffer is reached. */
 	len = min(uio->uio_resid, sbavail(sb));
 	if (mp0 != NULL) {
 		/* Dequeue as many mbufs as possible. */
 		if (!(flags & MSG_PEEK) && len >= sb->sb_mb->m_len) {
 			if (*mp0 == NULL)
 				*mp0 = sb->sb_mb;
 			else
 				m_cat(*mp0, sb->sb_mb);
 			for (m = sb->sb_mb;
 			     m != NULL && m->m_len <= len;
 			     m = m->m_next) {
 				KASSERT(!(m->m_flags & M_NOTAVAIL),
 				    ("%s: m %p not available", __func__, m));
 				len -= m->m_len;
 				uio->uio_resid -= m->m_len;
 				sbfree(sb, m);
 				n = m;
 			}
 			n->m_next = NULL;
 			sb->sb_mb = m;
 			sb->sb_lastrecord = sb->sb_mb;
 			if (sb->sb_mb == NULL)
 				SB_EMPTY_FIXUP(sb);
 		}
 		/* Copy the remainder. */
 		if (len > 0) {
 			KASSERT(sb->sb_mb != NULL,
 			    ("%s: len > 0 && sb->sb_mb empty", __func__));
 
 			m = m_copym(sb->sb_mb, 0, len, M_NOWAIT);
 			if (m == NULL)
 				len = 0;	/* Don't flush data from sockbuf. */
 			else
 				uio->uio_resid -= len;
 			if (*mp0 != NULL)
 				m_cat(*mp0, m);
 			else
 				*mp0 = m;
 			if (*mp0 == NULL) {
 				error = ENOBUFS;
 				goto out;
 			}
 		}
 	} else {
 		/* NB: Must unlock socket buffer as uiomove may sleep. */
 		SOCKBUF_UNLOCK(sb);
 		error = m_mbuftouio(uio, sb->sb_mb, len);
 		SOCKBUF_LOCK(sb);
 		if (error)
 			goto out;
 	}
 	SBLASTRECORDCHK(sb);
 	SBLASTMBUFCHK(sb);
 
 	/*
 	 * Remove the delivered data from the socket buffer unless we
 	 * were only peeking.
 	 */
 	if (!(flags & MSG_PEEK)) {
 		if (len > 0)
 			sbdrop_locked(sb, len);
 
 		/* Notify protocol that we drained some data. */
 		if ((so->so_proto->pr_flags & PR_WANTRCVD) &&
 		    (((flags & MSG_WAITALL) && uio->uio_resid > 0) ||
 		     !(flags & MSG_SOCALLBCK))) {
 			SOCKBUF_UNLOCK(sb);
 			VNET_SO_ASSERT(so);
 			(*so->so_proto->pr_usrreqs->pru_rcvd)(so, flags);
 			SOCKBUF_LOCK(sb);
 		}
 	}
 
 	/*
 	 * For MSG_WAITALL we may have to loop again and wait for
 	 * more data to come in.
 	 */
 	if ((flags & MSG_WAITALL) && uio->uio_resid > 0)
 		goto restart;
 out:
 	SOCKBUF_LOCK_ASSERT(sb);
 	SBLASTRECORDCHK(sb);
 	SBLASTMBUFCHK(sb);
 	SOCKBUF_UNLOCK(sb);
 	sbunlock(sb);
 	return (error);
 }
 
 /*
  * Optimized version of soreceive() for simple datagram cases from userspace.
  * Unlike in the stream case, we're able to drop a datagram if copyout()
  * fails, and because we handle datagrams atomically, we don't need to use a
  * sleep lock to prevent I/O interlacing.
  */
 int
 soreceive_dgram(struct socket *so, struct sockaddr **psa, struct uio *uio,
     struct mbuf **mp0, struct mbuf **controlp, int *flagsp)
 {
 	struct mbuf *m, *m2;
 	int flags, error;
 	ssize_t len;
 	struct protosw *pr = so->so_proto;
 	struct mbuf *nextrecord;
 
 	if (psa != NULL)
 		*psa = NULL;
 	if (controlp != NULL)
 		*controlp = NULL;
 	if (flagsp != NULL)
 		flags = *flagsp &~ MSG_EOR;
 	else
 		flags = 0;
 
 	/*
 	 * For any complicated cases, fall back to the full
 	 * soreceive_generic().
 	 */
 	if (mp0 != NULL || (flags & MSG_PEEK) || (flags & MSG_OOB))
 		return (soreceive_generic(so, psa, uio, mp0, controlp,
 		    flagsp));
 
 	/*
 	 * Enforce restrictions on use.
 	 */
 	KASSERT((pr->pr_flags & PR_WANTRCVD) == 0,
 	    ("soreceive_dgram: wantrcvd"));
 	KASSERT(pr->pr_flags & PR_ATOMIC, ("soreceive_dgram: !atomic"));
 	KASSERT((so->so_rcv.sb_state & SBS_RCVATMARK) == 0,
 	    ("soreceive_dgram: SBS_RCVATMARK"));
 	KASSERT((so->so_proto->pr_flags & PR_CONNREQUIRED) == 0,
 	    ("soreceive_dgram: P_CONNREQUIRED"));
 
 	/*
 	 * Loop blocking while waiting for a datagram.
 	 */
 	SOCKBUF_LOCK(&so->so_rcv);
 	while ((m = so->so_rcv.sb_mb) == NULL) {
 		KASSERT(sbavail(&so->so_rcv) == 0,
 		    ("soreceive_dgram: sb_mb NULL but sbavail %u",
 		    sbavail(&so->so_rcv)));
 		if (so->so_error) {
 			error = so->so_error;
 			so->so_error = 0;
 			SOCKBUF_UNLOCK(&so->so_rcv);
 			return (error);
 		}
 		if (so->so_rcv.sb_state & SBS_CANTRCVMORE ||
 		    uio->uio_resid == 0) {
 			SOCKBUF_UNLOCK(&so->so_rcv);
 			return (0);
 		}
 		if ((so->so_state & SS_NBIO) ||
 		    (flags & (MSG_DONTWAIT|MSG_NBIO))) {
 			SOCKBUF_UNLOCK(&so->so_rcv);
 			return (EWOULDBLOCK);
 		}
 		SBLASTRECORDCHK(&so->so_rcv);
 		SBLASTMBUFCHK(&so->so_rcv);
 		error = sbwait(&so->so_rcv);
 		if (error) {
 			SOCKBUF_UNLOCK(&so->so_rcv);
 			return (error);
 		}
 	}
 	SOCKBUF_LOCK_ASSERT(&so->so_rcv);
 
 	if (uio->uio_td)
 		uio->uio_td->td_ru.ru_msgrcv++;
 	SBLASTRECORDCHK(&so->so_rcv);
 	SBLASTMBUFCHK(&so->so_rcv);
 	nextrecord = m->m_nextpkt;
 	if (nextrecord == NULL) {
 		KASSERT(so->so_rcv.sb_lastrecord == m,
 		    ("soreceive_dgram: lastrecord != m"));
 	}
 
 	KASSERT(so->so_rcv.sb_mb->m_nextpkt == nextrecord,
 	    ("soreceive_dgram: m_nextpkt != nextrecord"));
 
 	/*
 	 * Pull 'm' and its chain off the front of the packet queue.
 	 */
 	so->so_rcv.sb_mb = NULL;
 	sockbuf_pushsync(&so->so_rcv, nextrecord);
 
 	/*
 	 * Walk 'm's chain and free that many bytes from the socket buffer.
 	 */
 	for (m2 = m; m2 != NULL; m2 = m2->m_next)
 		sbfree(&so->so_rcv, m2);
 
 	/*
 	 * Do a few last checks before we let go of the lock.
 	 */
 	SBLASTRECORDCHK(&so->so_rcv);
 	SBLASTMBUFCHK(&so->so_rcv);
 	SOCKBUF_UNLOCK(&so->so_rcv);
 
 	if (pr->pr_flags & PR_ADDR) {
 		KASSERT(m->m_type == MT_SONAME,
 		    ("m->m_type == %d", m->m_type));
 		if (psa != NULL)
 			*psa = sodupsockaddr(mtod(m, struct sockaddr *),
 			    M_NOWAIT);
 		m = m_free(m);
 	}
 	if (m == NULL) {
 		/* XXXRW: Can this happen? */
 		return (0);
 	}
 
 	/*
 	 * Packet to copyout() is now in 'm' and it is disconnected from the
 	 * queue.
 	 *
 	 * Process one or more MT_CONTROL mbufs present before any data mbufs
 	 * in the first mbuf chain on the socket buffer.  We call into the
 	 * protocol to perform externalization (or freeing if controlp ==
 	 * NULL). In some cases there can be only MT_CONTROL mbufs without
 	 * MT_DATA mbufs.
 	 */
 	if (m->m_type == MT_CONTROL) {
 		struct mbuf *cm = NULL, *cmn;
 		struct mbuf **cme = &cm;
 
 		do {
 			m2 = m->m_next;
 			m->m_next = NULL;
 			*cme = m;
 			cme = &(*cme)->m_next;
 			m = m2;
 		} while (m != NULL && m->m_type == MT_CONTROL);
 		while (cm != NULL) {
 			cmn = cm->m_next;
 			cm->m_next = NULL;
 			if (pr->pr_domain->dom_externalize != NULL) {
 				error = (*pr->pr_domain->dom_externalize)
 				    (cm, controlp, flags);
 			} else if (controlp != NULL)
 				*controlp = cm;
 			else
 				m_freem(cm);
 			if (controlp != NULL) {
 				while (*controlp != NULL)
 					controlp = &(*controlp)->m_next;
 			}
 			cm = cmn;
 		}
 	}
 	KASSERT(m == NULL || m->m_type == MT_DATA,
 	    ("soreceive_dgram: !data"));
 	while (m != NULL && uio->uio_resid > 0) {
 		len = uio->uio_resid;
 		if (len > m->m_len)
 			len = m->m_len;
 		error = uiomove(mtod(m, char *), (int)len, uio);
 		if (error) {
 			m_freem(m);
 			return (error);
 		}
 		if (len == m->m_len)
 			m = m_free(m);
 		else {
 			m->m_data += len;
 			m->m_len -= len;
 		}
 	}
 	if (m != NULL) {
 		flags |= MSG_TRUNC;
 		m_freem(m);
 	}
 	if (flagsp != NULL)
 		*flagsp |= flags;
 	return (0);
 }
 
 int
 soreceive(struct socket *so, struct sockaddr **psa, struct uio *uio,
     struct mbuf **mp0, struct mbuf **controlp, int *flagsp)
 {
 	int error;
 
 	CURVNET_SET(so->so_vnet);
 	error = (so->so_proto->pr_usrreqs->pru_soreceive(so, psa, uio, mp0,
 	    controlp, flagsp));
 	CURVNET_RESTORE();
 	return (error);
 }
 
 int
 soshutdown(struct socket *so, int how)
 {
 	struct protosw *pr = so->so_proto;
 	int error, soerror_enotconn;
 
 	if (!(how == SHUT_RD || how == SHUT_WR || how == SHUT_RDWR))
 		return (EINVAL);
 
 	soerror_enotconn = 0;
 	if ((so->so_state &
 	    (SS_ISCONNECTED | SS_ISCONNECTING | SS_ISDISCONNECTING)) == 0) {
 		/*
 		 * POSIX mandates us to return ENOTCONN when shutdown(2) is
 		 * invoked on a datagram sockets, however historically we would
 		 * actually tear socket down. This is known to be leveraged by
 		 * some applications to unblock process waiting in recvXXX(2)
 		 * by other process that it shares that socket with. Try to meet
 		 * both backward-compatibility and POSIX requirements by forcing
 		 * ENOTCONN but still asking protocol to perform pru_shutdown().
 		 */
 		if (so->so_type != SOCK_DGRAM)
 			return (ENOTCONN);
 		soerror_enotconn = 1;
 	}
 
 	CURVNET_SET(so->so_vnet);
 	if (pr->pr_usrreqs->pru_flush != NULL)
 		(*pr->pr_usrreqs->pru_flush)(so, how);
 	if (how != SHUT_WR)
 		sorflush(so);
 	if (how != SHUT_RD) {
 		error = (*pr->pr_usrreqs->pru_shutdown)(so);
 		wakeup(&so->so_timeo);
 		CURVNET_RESTORE();
 		return ((error == 0 && soerror_enotconn) ? ENOTCONN : error);
 	}
 	wakeup(&so->so_timeo);
 	CURVNET_RESTORE();
 
 	return (soerror_enotconn ? ENOTCONN : 0);
 }
 
 void
 sorflush(struct socket *so)
 {
 	struct sockbuf *sb = &so->so_rcv;
 	struct protosw *pr = so->so_proto;
 	struct socket aso;
 
 	VNET_SO_ASSERT(so);
 
 	/*
 	 * In order to avoid calling dom_dispose with the socket buffer mutex
 	 * held, and in order to generally avoid holding the lock for a long
 	 * time, we make a copy of the socket buffer and clear the original
 	 * (except locks, state).  The new socket buffer copy won't have
 	 * initialized locks so we can only call routines that won't use or
 	 * assert those locks.
 	 *
 	 * Dislodge threads currently blocked in receive and wait to acquire
 	 * a lock against other simultaneous readers before clearing the
 	 * socket buffer.  Don't let our acquire be interrupted by a signal
 	 * despite any existing socket disposition on interruptable waiting.
 	 */
 	socantrcvmore(so);
 	(void) sblock(sb, SBL_WAIT | SBL_NOINTR);
 
 	/*
 	 * Invalidate/clear most of the sockbuf structure, but leave selinfo
 	 * and mutex data unchanged.
 	 */
 	SOCKBUF_LOCK(sb);
 	bzero(&aso, sizeof(aso));
 	aso.so_pcb = so->so_pcb;
 	bcopy(&sb->sb_startzero, &aso.so_rcv.sb_startzero,
 	    sizeof(*sb) - offsetof(struct sockbuf, sb_startzero));
 	bzero(&sb->sb_startzero,
 	    sizeof(*sb) - offsetof(struct sockbuf, sb_startzero));
 	SOCKBUF_UNLOCK(sb);
 	sbunlock(sb);
 
 	/*
 	 * Dispose of special rights and flush the copied socket.  Don't call
 	 * any unsafe routines (that rely on locks being initialized) on aso.
 	 */
 	if (pr->pr_flags & PR_RIGHTS && pr->pr_domain->dom_dispose != NULL)
 		(*pr->pr_domain->dom_dispose)(&aso);
 	sbrelease_internal(&aso.so_rcv, so);
 }
 
 /*
  * Wrapper for Socket established helper hook.
  * Parameters: socket, context of the hook point, hook id.
  */
 static int inline
 hhook_run_socket(struct socket *so, void *hctx, int32_t h_id)
 {
 	struct socket_hhook_data hhook_data = {
 		.so = so,
 		.hctx = hctx,
 		.m = NULL,
 		.status = 0
 	};
 
 	CURVNET_SET(so->so_vnet);
 	HHOOKS_RUN_IF(V_socket_hhh[h_id], &hhook_data, &so->osd);
 	CURVNET_RESTORE();
 
 	/* Ugly but needed, since hhooks return void for now */
 	return (hhook_data.status);
 }
 
 /*
  * Perhaps this routine, and sooptcopyout(), below, ought to come in an
  * additional variant to handle the case where the option value needs to be
  * some kind of integer, but not a specific size.  In addition to their use
  * here, these functions are also called by the protocol-level pr_ctloutput()
  * routines.
  */
 int
 sooptcopyin(struct sockopt *sopt, void *buf, size_t len, size_t minlen)
 {
 	size_t	valsize;
 
 	/*
 	 * If the user gives us more than we wanted, we ignore it, but if we
 	 * don't get the minimum length the caller wants, we return EINVAL.
 	 * On success, sopt->sopt_valsize is set to however much we actually
 	 * retrieved.
 	 */
 	if ((valsize = sopt->sopt_valsize) < minlen)
 		return EINVAL;
 	if (valsize > len)
 		sopt->sopt_valsize = valsize = len;
 
 	if (sopt->sopt_td != NULL)
 		return (copyin(sopt->sopt_val, buf, valsize));
 
 	bcopy(sopt->sopt_val, buf, valsize);
 	return (0);
 }
 
 /*
  * Kernel version of setsockopt(2).
  *
  * XXX: optlen is size_t, not socklen_t
  */
 int
 so_setsockopt(struct socket *so, int level, int optname, void *optval,
     size_t optlen)
 {
 	struct sockopt sopt;
 
 	sopt.sopt_level = level;
 	sopt.sopt_name = optname;
 	sopt.sopt_dir = SOPT_SET;
 	sopt.sopt_val = optval;
 	sopt.sopt_valsize = optlen;
 	sopt.sopt_td = NULL;
 	return (sosetopt(so, &sopt));
 }
 
 int
 sosetopt(struct socket *so, struct sockopt *sopt)
 {
 	int	error, optval;
 	struct	linger l;
 	struct	timeval tv;
 	sbintime_t val;
 	uint32_t val32;
 #ifdef MAC
 	struct mac extmac;
 #endif
 
 	CURVNET_SET(so->so_vnet);
 	error = 0;
 	if (sopt->sopt_level != SOL_SOCKET) {
 		if (so->so_proto->pr_ctloutput != NULL) {
 			error = (*so->so_proto->pr_ctloutput)(so, sopt);
 			CURVNET_RESTORE();
 			return (error);
 		}
 		error = ENOPROTOOPT;
 	} else {
 		switch (sopt->sopt_name) {
 		case SO_ACCEPTFILTER:
 			error = accept_filt_setopt(so, sopt);
 			if (error)
 				goto bad;
 			break;
 
 		case SO_LINGER:
 			error = sooptcopyin(sopt, &l, sizeof l, sizeof l);
 			if (error)
 				goto bad;
 
 			SOCK_LOCK(so);
 			so->so_linger = l.l_linger;
 			if (l.l_onoff)
 				so->so_options |= SO_LINGER;
 			else
 				so->so_options &= ~SO_LINGER;
 			SOCK_UNLOCK(so);
 			break;
 
 		case SO_DEBUG:
 		case SO_KEEPALIVE:
 		case SO_DONTROUTE:
 		case SO_USELOOPBACK:
 		case SO_BROADCAST:
 		case SO_REUSEADDR:
 		case SO_REUSEPORT:
 		case SO_OOBINLINE:
 		case SO_TIMESTAMP:
 		case SO_BINTIME:
 		case SO_NOSIGPIPE:
 		case SO_NO_DDP:
 		case SO_NO_OFFLOAD:
 			error = sooptcopyin(sopt, &optval, sizeof optval,
 			    sizeof optval);
 			if (error)
 				goto bad;
 			SOCK_LOCK(so);
 			if (optval)
 				so->so_options |= sopt->sopt_name;
 			else
 				so->so_options &= ~sopt->sopt_name;
 			SOCK_UNLOCK(so);
 			break;
 
 		case SO_SETFIB:
 			error = sooptcopyin(sopt, &optval, sizeof optval,
 			    sizeof optval);
 			if (error)
 				goto bad;
 
 			if (optval < 0 || optval >= rt_numfibs) {
 				error = EINVAL;
 				goto bad;
 			}
 			if (((so->so_proto->pr_domain->dom_family == PF_INET) ||
 			   (so->so_proto->pr_domain->dom_family == PF_INET6) ||
 			   (so->so_proto->pr_domain->dom_family == PF_ROUTE)))
 				so->so_fibnum = optval;
 			else
 				so->so_fibnum = 0;
 			break;
 
 		case SO_USER_COOKIE:
 			error = sooptcopyin(sopt, &val32, sizeof val32,
 			    sizeof val32);
 			if (error)
 				goto bad;
 			so->so_user_cookie = val32;
 			break;
 
 		case SO_SNDBUF:
 		case SO_RCVBUF:
 		case SO_SNDLOWAT:
 		case SO_RCVLOWAT:
 			error = sooptcopyin(sopt, &optval, sizeof optval,
 			    sizeof optval);
 			if (error)
 				goto bad;
 
 			/*
 			 * Values < 1 make no sense for any of these options,
 			 * so disallow them.
 			 */
 			if (optval < 1) {
 				error = EINVAL;
 				goto bad;
 			}
 
 			switch (sopt->sopt_name) {
 			case SO_SNDBUF:
 			case SO_RCVBUF:
 				if (sbreserve(sopt->sopt_name == SO_SNDBUF ?
 				    &so->so_snd : &so->so_rcv, (u_long)optval,
 				    so, curthread) == 0) {
 					error = ENOBUFS;
 					goto bad;
 				}
 				(sopt->sopt_name == SO_SNDBUF ? &so->so_snd :
 				    &so->so_rcv)->sb_flags &= ~SB_AUTOSIZE;
 				break;
 
 			/*
 			 * Make sure the low-water is never greater than the
 			 * high-water.
 			 */
 			case SO_SNDLOWAT:
 				SOCKBUF_LOCK(&so->so_snd);
 				so->so_snd.sb_lowat =
 				    (optval > so->so_snd.sb_hiwat) ?
 				    so->so_snd.sb_hiwat : optval;
 				SOCKBUF_UNLOCK(&so->so_snd);
 				break;
 			case SO_RCVLOWAT:
 				SOCKBUF_LOCK(&so->so_rcv);
 				so->so_rcv.sb_lowat =
 				    (optval > so->so_rcv.sb_hiwat) ?
 				    so->so_rcv.sb_hiwat : optval;
 				SOCKBUF_UNLOCK(&so->so_rcv);
 				break;
 			}
 			break;
 
 		case SO_SNDTIMEO:
 		case SO_RCVTIMEO:
 #ifdef COMPAT_FREEBSD32
 			if (SV_CURPROC_FLAG(SV_ILP32)) {
 				struct timeval32 tv32;
 
 				error = sooptcopyin(sopt, &tv32, sizeof tv32,
 				    sizeof tv32);
 				CP(tv32, tv, tv_sec);
 				CP(tv32, tv, tv_usec);
 			} else
 #endif
 				error = sooptcopyin(sopt, &tv, sizeof tv,
 				    sizeof tv);
 			if (error)
 				goto bad;
 			if (tv.tv_sec < 0 || tv.tv_usec < 0 ||
 			    tv.tv_usec >= 1000000) {
 				error = EDOM;
 				goto bad;
 			}
 			if (tv.tv_sec > INT32_MAX)
 				val = SBT_MAX;
 			else
 				val = tvtosbt(tv);
 			switch (sopt->sopt_name) {
 			case SO_SNDTIMEO:
 				so->so_snd.sb_timeo = val;
 				break;
 			case SO_RCVTIMEO:
 				so->so_rcv.sb_timeo = val;
 				break;
 			}
 			break;
 
 		case SO_LABEL:
 #ifdef MAC
 			error = sooptcopyin(sopt, &extmac, sizeof extmac,
 			    sizeof extmac);
 			if (error)
 				goto bad;
 			error = mac_setsockopt_label(sopt->sopt_td->td_ucred,
 			    so, &extmac);
 #else
 			error = EOPNOTSUPP;
 #endif
 			break;
 
 		case SO_TS_CLOCK:
 			error = sooptcopyin(sopt, &optval, sizeof optval,
 			    sizeof optval);
 			if (error)
 				goto bad;
 			if (optval < 0 || optval > SO_TS_CLOCK_MAX) {
 				error = EINVAL;
 				goto bad;
 			}
 			so->so_ts_clock = optval;
 			break;
 
 		case SO_MAX_PACING_RATE:
 			error = sooptcopyin(sopt, &val32, sizeof(val32),
 			    sizeof(val32));
 			if (error)
 				goto bad;
 			so->so_max_pacing_rate = val32;
 			break;
 
 		default:
 			if (V_socket_hhh[HHOOK_SOCKET_OPT]->hhh_nhooks > 0)
 				error = hhook_run_socket(so, sopt,
 				    HHOOK_SOCKET_OPT);
 			else
 				error = ENOPROTOOPT;
 			break;
 		}
 		if (error == 0 && so->so_proto->pr_ctloutput != NULL)
 			(void)(*so->so_proto->pr_ctloutput)(so, sopt);
 	}
 bad:
 	CURVNET_RESTORE();
 	return (error);
 }
 
 /*
  * Helper routine for getsockopt.
  */
 int
 sooptcopyout(struct sockopt *sopt, const void *buf, size_t len)
 {
 	int	error;
 	size_t	valsize;
 
 	error = 0;
 
 	/*
 	 * Documented get behavior is that we always return a value, possibly
 	 * truncated to fit in the user's buffer.  Traditional behavior is
 	 * that we always tell the user precisely how much we copied, rather
 	 * than something useful like the total amount we had available for
 	 * her.  Note that this interface is not idempotent; the entire
 	 * answer must be generated ahead of time.
 	 */
 	valsize = min(len, sopt->sopt_valsize);
 	sopt->sopt_valsize = valsize;
 	if (sopt->sopt_val != NULL) {
 		if (sopt->sopt_td != NULL)
 			error = copyout(buf, sopt->sopt_val, valsize);
 		else
 			bcopy(buf, sopt->sopt_val, valsize);
 	}
 	return (error);
 }
 
 int
 sogetopt(struct socket *so, struct sockopt *sopt)
 {
 	int	error, optval;
 	struct	linger l;
 	struct	timeval tv;
 #ifdef MAC
 	struct mac extmac;
 #endif
 
 	CURVNET_SET(so->so_vnet);
 	error = 0;
 	if (sopt->sopt_level != SOL_SOCKET) {
 		if (so->so_proto->pr_ctloutput != NULL)
 			error = (*so->so_proto->pr_ctloutput)(so, sopt);
 		else
 			error = ENOPROTOOPT;
 		CURVNET_RESTORE();
 		return (error);
 	} else {
 		switch (sopt->sopt_name) {
 		case SO_ACCEPTFILTER:
 			error = accept_filt_getopt(so, sopt);
 			break;
 
 		case SO_LINGER:
 			SOCK_LOCK(so);
 			l.l_onoff = so->so_options & SO_LINGER;
 			l.l_linger = so->so_linger;
 			SOCK_UNLOCK(so);
 			error = sooptcopyout(sopt, &l, sizeof l);
 			break;
 
 		case SO_USELOOPBACK:
 		case SO_DONTROUTE:
 		case SO_DEBUG:
 		case SO_KEEPALIVE:
 		case SO_REUSEADDR:
 		case SO_REUSEPORT:
 		case SO_BROADCAST:
 		case SO_OOBINLINE:
 		case SO_ACCEPTCONN:
 		case SO_TIMESTAMP:
 		case SO_BINTIME:
 		case SO_NOSIGPIPE:
 			optval = so->so_options & sopt->sopt_name;
 integer:
 			error = sooptcopyout(sopt, &optval, sizeof optval);
 			break;
 
 		case SO_TYPE:
 			optval = so->so_type;
 			goto integer;
 
 		case SO_PROTOCOL:
 			optval = so->so_proto->pr_protocol;
 			goto integer;
 
 		case SO_ERROR:
 			SOCK_LOCK(so);
 			optval = so->so_error;
 			so->so_error = 0;
 			SOCK_UNLOCK(so);
 			goto integer;
 
 		case SO_SNDBUF:
 			optval = so->so_snd.sb_hiwat;
 			goto integer;
 
 		case SO_RCVBUF:
 			optval = so->so_rcv.sb_hiwat;
 			goto integer;
 
 		case SO_SNDLOWAT:
 			optval = so->so_snd.sb_lowat;
 			goto integer;
 
 		case SO_RCVLOWAT:
 			optval = so->so_rcv.sb_lowat;
 			goto integer;
 
 		case SO_SNDTIMEO:
 		case SO_RCVTIMEO:
 			tv = sbttotv(sopt->sopt_name == SO_SNDTIMEO ?
 			    so->so_snd.sb_timeo : so->so_rcv.sb_timeo);
 #ifdef COMPAT_FREEBSD32
 			if (SV_CURPROC_FLAG(SV_ILP32)) {
 				struct timeval32 tv32;
 
 				CP(tv, tv32, tv_sec);
 				CP(tv, tv32, tv_usec);
 				error = sooptcopyout(sopt, &tv32, sizeof tv32);
 			} else
 #endif
 				error = sooptcopyout(sopt, &tv, sizeof tv);
 			break;
 
 		case SO_LABEL:
 #ifdef MAC
 			error = sooptcopyin(sopt, &extmac, sizeof(extmac),
 			    sizeof(extmac));
 			if (error)
 				goto bad;
 			error = mac_getsockopt_label(sopt->sopt_td->td_ucred,
 			    so, &extmac);
 			if (error)
 				goto bad;
 			error = sooptcopyout(sopt, &extmac, sizeof extmac);
 #else
 			error = EOPNOTSUPP;
 #endif
 			break;
 
 		case SO_PEERLABEL:
 #ifdef MAC
 			error = sooptcopyin(sopt, &extmac, sizeof(extmac),
 			    sizeof(extmac));
 			if (error)
 				goto bad;
 			error = mac_getsockopt_peerlabel(
 			    sopt->sopt_td->td_ucred, so, &extmac);
 			if (error)
 				goto bad;
 			error = sooptcopyout(sopt, &extmac, sizeof extmac);
 #else
 			error = EOPNOTSUPP;
 #endif
 			break;
 
 		case SO_LISTENQLIMIT:
-			optval = so->so_qlimit;
+			optval = SOLISTENING(so) ? so->sol_qlimit : 0;
 			goto integer;
 
 		case SO_LISTENQLEN:
-			optval = so->so_qlen;
+			optval = SOLISTENING(so) ? so->sol_qlen : 0;
 			goto integer;
 
 		case SO_LISTENINCQLEN:
-			optval = so->so_incqlen;
+			optval = SOLISTENING(so) ? so->sol_incqlen : 0;
 			goto integer;
 
 		case SO_TS_CLOCK:
 			optval = so->so_ts_clock;
 			goto integer;
 
 		case SO_MAX_PACING_RATE:
 			optval = so->so_max_pacing_rate;
 			goto integer;
 
 		default:
 			if (V_socket_hhh[HHOOK_SOCKET_OPT]->hhh_nhooks > 0)
 				error = hhook_run_socket(so, sopt,
 				    HHOOK_SOCKET_OPT);
 			else
 				error = ENOPROTOOPT;
 			break;
 		}
 	}
 #ifdef MAC
 bad:
 #endif
 	CURVNET_RESTORE();
 	return (error);
 }
 
 int
 soopt_getm(struct sockopt *sopt, struct mbuf **mp)
 {
 	struct mbuf *m, *m_prev;
 	int sopt_size = sopt->sopt_valsize;
 
 	MGET(m, sopt->sopt_td ? M_WAITOK : M_NOWAIT, MT_DATA);
 	if (m == NULL)
 		return ENOBUFS;
 	if (sopt_size > MLEN) {
 		MCLGET(m, sopt->sopt_td ? M_WAITOK : M_NOWAIT);
 		if ((m->m_flags & M_EXT) == 0) {
 			m_free(m);
 			return ENOBUFS;
 		}
 		m->m_len = min(MCLBYTES, sopt_size);
 	} else {
 		m->m_len = min(MLEN, sopt_size);
 	}
 	sopt_size -= m->m_len;
 	*mp = m;
 	m_prev = m;
 
 	while (sopt_size) {
 		MGET(m, sopt->sopt_td ? M_WAITOK : M_NOWAIT, MT_DATA);
 		if (m == NULL) {
 			m_freem(*mp);
 			return ENOBUFS;
 		}
 		if (sopt_size > MLEN) {
 			MCLGET(m, sopt->sopt_td != NULL ? M_WAITOK :
 			    M_NOWAIT);
 			if ((m->m_flags & M_EXT) == 0) {
 				m_freem(m);
 				m_freem(*mp);
 				return ENOBUFS;
 			}
 			m->m_len = min(MCLBYTES, sopt_size);
 		} else {
 			m->m_len = min(MLEN, sopt_size);
 		}
 		sopt_size -= m->m_len;
 		m_prev->m_next = m;
 		m_prev = m;
 	}
 	return (0);
 }
 
 int
 soopt_mcopyin(struct sockopt *sopt, struct mbuf *m)
 {
 	struct mbuf *m0 = m;
 
 	if (sopt->sopt_val == NULL)
 		return (0);
 	while (m != NULL && sopt->sopt_valsize >= m->m_len) {
 		if (sopt->sopt_td != NULL) {
 			int error;
 
 			error = copyin(sopt->sopt_val, mtod(m, char *),
 			    m->m_len);
 			if (error != 0) {
 				m_freem(m0);
 				return(error);
 			}
 		} else
 			bcopy(sopt->sopt_val, mtod(m, char *), m->m_len);
 		sopt->sopt_valsize -= m->m_len;
 		sopt->sopt_val = (char *)sopt->sopt_val + m->m_len;
 		m = m->m_next;
 	}
 	if (m != NULL) /* should be allocated enoughly at ip6_sooptmcopyin() */
 		panic("ip6_sooptmcopyin");
 	return (0);
 }
 
 int
 soopt_mcopyout(struct sockopt *sopt, struct mbuf *m)
 {
 	struct mbuf *m0 = m;
 	size_t valsize = 0;
 
 	if (sopt->sopt_val == NULL)
 		return (0);
 	while (m != NULL && sopt->sopt_valsize >= m->m_len) {
 		if (sopt->sopt_td != NULL) {
 			int error;
 
 			error = copyout(mtod(m, char *), sopt->sopt_val,
 			    m->m_len);
 			if (error != 0) {
 				m_freem(m0);
 				return(error);
 			}
 		} else
 			bcopy(mtod(m, char *), sopt->sopt_val, m->m_len);
 		sopt->sopt_valsize -= m->m_len;
 		sopt->sopt_val = (char *)sopt->sopt_val + m->m_len;
 		valsize += m->m_len;
 		m = m->m_next;
 	}
 	if (m != NULL) {
 		/* enough soopt buffer should be given from user-land */
 		m_freem(m0);
 		return(EINVAL);
 	}
 	sopt->sopt_valsize = valsize;
 	return (0);
 }
 
 /*
  * sohasoutofband(): protocol notifies socket layer of the arrival of new
  * out-of-band data, which will then notify socket consumers.
  */
 void
 sohasoutofband(struct socket *so)
 {
 
 	if (so->so_sigio != NULL)
 		pgsigio(&so->so_sigio, SIGURG, 0);
-	selwakeuppri(&so->so_rcv.sb_sel, PSOCK);
+	selwakeuppri(&so->so_rdsel, PSOCK);
 }
 
 int
 sopoll(struct socket *so, int events, struct ucred *active_cred,
     struct thread *td)
 {
 
 	/*
 	 * We do not need to set or assert curvnet as long as everyone uses
 	 * sopoll_generic().
 	 */
 	return (so->so_proto->pr_usrreqs->pru_sopoll(so, events, active_cred,
 	    td));
 }
 
 int
 sopoll_generic(struct socket *so, int events, struct ucred *active_cred,
     struct thread *td)
 {
-	int revents = 0;
+	int revents;
 
-	SOCKBUF_LOCK(&so->so_snd);
-	SOCKBUF_LOCK(&so->so_rcv);
-	if (events & (POLLIN | POLLRDNORM))
-		if (soreadabledata(so))
-			revents |= events & (POLLIN | POLLRDNORM);
-
-	if (events & (POLLOUT | POLLWRNORM))
-		if (sowriteable(so))
-			revents |= events & (POLLOUT | POLLWRNORM);
-
-	if (events & (POLLPRI | POLLRDBAND))
-		if (so->so_oobmark || (so->so_rcv.sb_state & SBS_RCVATMARK))
-			revents |= events & (POLLPRI | POLLRDBAND);
-
-	if ((events & POLLINIGNEOF) == 0) {
-		if (so->so_rcv.sb_state & SBS_CANTRCVMORE) {
-			revents |= events & (POLLIN | POLLRDNORM);
-			if (so->so_snd.sb_state & SBS_CANTSENDMORE)
-				revents |= POLLHUP;
+	SOCK_LOCK(so);
+	if (SOLISTENING(so)) {
+		if (!(events & (POLLIN | POLLRDNORM)))
+			revents = 0;
+		else if (!TAILQ_EMPTY(&so->sol_comp))
+			revents = events & (POLLIN | POLLRDNORM);
+		else {
+			selrecord(td, &so->so_rdsel);
+			revents = 0;
 		}
-	}
-
-	if (revents == 0) {
-		if (events & (POLLIN | POLLPRI | POLLRDNORM | POLLRDBAND)) {
-			selrecord(td, &so->so_rcv.sb_sel);
-			so->so_rcv.sb_flags |= SB_SEL;
+	} else {
+		revents = 0;
+		SOCKBUF_LOCK(&so->so_snd);
+		SOCKBUF_LOCK(&so->so_rcv);
+		if (events & (POLLIN | POLLRDNORM))
+			if (soreadabledata(so))
+				revents |= events & (POLLIN | POLLRDNORM);
+		if (events & (POLLOUT | POLLWRNORM))
+			if (sowriteable(so))
+				revents |= events & (POLLOUT | POLLWRNORM);
+		if (events & (POLLPRI | POLLRDBAND))
+			if (so->so_oobmark ||
+			    (so->so_rcv.sb_state & SBS_RCVATMARK))
+				revents |= events & (POLLPRI | POLLRDBAND);
+		if ((events & POLLINIGNEOF) == 0) {
+			if (so->so_rcv.sb_state & SBS_CANTRCVMORE) {
+				revents |= events & (POLLIN | POLLRDNORM);
+				if (so->so_snd.sb_state & SBS_CANTSENDMORE)
+					revents |= POLLHUP;
+			}
 		}
-
-		if (events & (POLLOUT | POLLWRNORM)) {
-			selrecord(td, &so->so_snd.sb_sel);
-			so->so_snd.sb_flags |= SB_SEL;
+		if (revents == 0) {
+			if (events &
+			    (POLLIN | POLLPRI | POLLRDNORM | POLLRDBAND)) {
+				selrecord(td, &so->so_rdsel);
+				so->so_rcv.sb_flags |= SB_SEL;
+			}
+			if (events & (POLLOUT | POLLWRNORM)) {
+				selrecord(td, &so->so_wrsel);
+				so->so_snd.sb_flags |= SB_SEL;
+			}
 		}
+		SOCKBUF_UNLOCK(&so->so_rcv);
+		SOCKBUF_UNLOCK(&so->so_snd);
 	}
-
-	SOCKBUF_UNLOCK(&so->so_rcv);
-	SOCKBUF_UNLOCK(&so->so_snd);
+	SOCK_UNLOCK(so);
 	return (revents);
 }
 
 int
 soo_kqfilter(struct file *fp, struct knote *kn)
 {
 	struct socket *so = kn->kn_fp->f_data;
 	struct sockbuf *sb;
+	struct knlist *knl;
 
 	switch (kn->kn_filter) {
 	case EVFILT_READ:
 		kn->kn_fop = &soread_filtops;
+		knl = &so->so_rdsel.si_note;
 		sb = &so->so_rcv;
 		break;
 	case EVFILT_WRITE:
 		kn->kn_fop = &sowrite_filtops;
+		knl = &so->so_wrsel.si_note;
 		sb = &so->so_snd;
 		break;
 	case EVFILT_EMPTY:
 		kn->kn_fop = &soempty_filtops;
+		knl = &so->so_wrsel.si_note;
 		sb = &so->so_snd;
 		break;
 	default:
 		return (EINVAL);
 	}
 
-	SOCKBUF_LOCK(sb);
-	knlist_add(&sb->sb_sel.si_note, kn, 1);
-	sb->sb_flags |= SB_KNOTE;
-	SOCKBUF_UNLOCK(sb);
+	SOCK_LOCK(so);
+	if (SOLISTENING(so)) {
+		knlist_add(knl, kn, 1);
+	} else {
+		SOCKBUF_LOCK(sb);
+		knlist_add(knl, kn, 1);
+		sb->sb_flags |= SB_KNOTE;
+		SOCKBUF_UNLOCK(sb);
+	}
+	SOCK_UNLOCK(so);
 	return (0);
 }
 
 /*
  * Some routines that return EOPNOTSUPP for entry points that are not
  * supported by a protocol.  Fill in as needed.
  */
 int
 pru_accept_notsupp(struct socket *so, struct sockaddr **nam)
 {
 
 	return EOPNOTSUPP;
 }
 
 int
 pru_aio_queue_notsupp(struct socket *so, struct kaiocb *job)
 {
 
 	return EOPNOTSUPP;
 }
 
 int
 pru_attach_notsupp(struct socket *so, int proto, struct thread *td)
 {
 
 	return EOPNOTSUPP;
 }
 
 int
 pru_bind_notsupp(struct socket *so, struct sockaddr *nam, struct thread *td)
 {
 
 	return EOPNOTSUPP;
 }
 
 int
 pru_bindat_notsupp(int fd, struct socket *so, struct sockaddr *nam,
     struct thread *td)
 {
 
 	return EOPNOTSUPP;
 }
 
 int
 pru_connect_notsupp(struct socket *so, struct sockaddr *nam, struct thread *td)
 {
 
 	return EOPNOTSUPP;
 }
 
 int
 pru_connectat_notsupp(int fd, struct socket *so, struct sockaddr *nam,
     struct thread *td)
 {
 
 	return EOPNOTSUPP;
 }
 
 int
 pru_connect2_notsupp(struct socket *so1, struct socket *so2)
 {
 
 	return EOPNOTSUPP;
 }
 
 int
 pru_control_notsupp(struct socket *so, u_long cmd, caddr_t data,
     struct ifnet *ifp, struct thread *td)
 {
 
 	return EOPNOTSUPP;
 }
 
 int
 pru_disconnect_notsupp(struct socket *so)
 {
 
 	return EOPNOTSUPP;
 }
 
 int
 pru_listen_notsupp(struct socket *so, int backlog, struct thread *td)
 {
 
 	return EOPNOTSUPP;
 }
 
 int
 pru_peeraddr_notsupp(struct socket *so, struct sockaddr **nam)
 {
 
 	return EOPNOTSUPP;
 }
 
 int
 pru_rcvd_notsupp(struct socket *so, int flags)
 {
 
 	return EOPNOTSUPP;
 }
 
 int
 pru_rcvoob_notsupp(struct socket *so, struct mbuf *m, int flags)
 {
 
 	return EOPNOTSUPP;
 }
 
 int
 pru_send_notsupp(struct socket *so, int flags, struct mbuf *m,
     struct sockaddr *addr, struct mbuf *control, struct thread *td)
 {
 
 	return EOPNOTSUPP;
 }
 
 int
 pru_ready_notsupp(struct socket *so, struct mbuf *m, int count)
 {
 
 	return (EOPNOTSUPP);
 }
 
 /*
  * This isn't really a ``null'' operation, but it's the default one and
  * doesn't do anything destructive.
  */
 int
 pru_sense_null(struct socket *so, struct stat *sb)
 {
 
 	sb->st_blksize = so->so_snd.sb_hiwat;
 	return 0;
 }
 
 int
 pru_shutdown_notsupp(struct socket *so)
 {
 
 	return EOPNOTSUPP;
 }
 
 int
 pru_sockaddr_notsupp(struct socket *so, struct sockaddr **nam)
 {
 
 	return EOPNOTSUPP;
 }
 
 int
 pru_sosend_notsupp(struct socket *so, struct sockaddr *addr, struct uio *uio,
     struct mbuf *top, struct mbuf *control, int flags, struct thread *td)
 {
 
 	return EOPNOTSUPP;
 }
 
 int
 pru_soreceive_notsupp(struct socket *so, struct sockaddr **paddr,
     struct uio *uio, struct mbuf **mp0, struct mbuf **controlp, int *flagsp)
 {
 
 	return EOPNOTSUPP;
 }
 
 int
 pru_sopoll_notsupp(struct socket *so, int events, struct ucred *cred,
     struct thread *td)
 {
 
 	return EOPNOTSUPP;
 }
 
 static void
 filt_sordetach(struct knote *kn)
 {
 	struct socket *so = kn->kn_fp->f_data;
 
-	SOCKBUF_LOCK(&so->so_rcv);
-	knlist_remove(&so->so_rcv.sb_sel.si_note, kn, 1);
-	if (knlist_empty(&so->so_rcv.sb_sel.si_note))
+	so_rdknl_lock(so);
+	knlist_remove(&so->so_rdsel.si_note, kn, 1);
+	if (!SOLISTENING(so) && knlist_empty(&so->so_rdsel.si_note))
 		so->so_rcv.sb_flags &= ~SB_KNOTE;
-	SOCKBUF_UNLOCK(&so->so_rcv);
+	so_rdknl_unlock(so);
 }
 
 /*ARGSUSED*/
 static int
 filt_soread(struct knote *kn, long hint)
 {
 	struct socket *so;
 
 	so = kn->kn_fp->f_data;
-	if (so->so_options & SO_ACCEPTCONN) {
-		kn->kn_data = so->so_qlen;
-		return (!TAILQ_EMPTY(&so->so_comp));
 
+	if (SOLISTENING(so)) {
+		SOCK_LOCK_ASSERT(so);
+		kn->kn_data = so->sol_qlen;
+		return (!TAILQ_EMPTY(&so->sol_comp));
 	}
+
 	SOCKBUF_LOCK_ASSERT(&so->so_rcv);
 
 	kn->kn_data = sbavail(&so->so_rcv) - so->so_rcv.sb_ctl;
 	if (so->so_rcv.sb_state & SBS_CANTRCVMORE) {
 		kn->kn_flags |= EV_EOF;
 		kn->kn_fflags = so->so_error;
 		return (1);
 	} else if (so->so_error)	/* temporary udp error */
 		return (1);
 
 	if (kn->kn_sfflags & NOTE_LOWAT) {
 		if (kn->kn_data >= kn->kn_sdata)
 			return (1);
 	} else if (sbavail(&so->so_rcv) >= so->so_rcv.sb_lowat)
 		return (1);
 
 	/* This hook returning non-zero indicates an event, not error */
 	return (hhook_run_socket(so, NULL, HHOOK_FILT_SOREAD));
 }
 
 static void
 filt_sowdetach(struct knote *kn)
 {
 	struct socket *so = kn->kn_fp->f_data;
 
-	SOCKBUF_LOCK(&so->so_snd);
-	knlist_remove(&so->so_snd.sb_sel.si_note, kn, 1);
-	if (knlist_empty(&so->so_snd.sb_sel.si_note))
+	so_wrknl_lock(so);
+	knlist_remove(&so->so_wrsel.si_note, kn, 1);
+	if (!SOLISTENING(so) && knlist_empty(&so->so_wrsel.si_note))
 		so->so_snd.sb_flags &= ~SB_KNOTE;
-	SOCKBUF_UNLOCK(&so->so_snd);
+	so_wrknl_unlock(so);
 }
 
 /*ARGSUSED*/
 static int
 filt_sowrite(struct knote *kn, long hint)
 {
 	struct socket *so;
 
 	so = kn->kn_fp->f_data;
+
+	if (SOLISTENING(so))
+		return (0);
+
 	SOCKBUF_LOCK_ASSERT(&so->so_snd);
 	kn->kn_data = sbspace(&so->so_snd);
 
 	hhook_run_socket(so, kn, HHOOK_FILT_SOWRITE);
 
 	if (so->so_snd.sb_state & SBS_CANTSENDMORE) {
 		kn->kn_flags |= EV_EOF;
 		kn->kn_fflags = so->so_error;
 		return (1);
 	} else if (so->so_error)	/* temporary udp error */
 		return (1);
 	else if (((so->so_state & SS_ISCONNECTED) == 0) &&
 	    (so->so_proto->pr_flags & PR_CONNREQUIRED))
 		return (0);
 	else if (kn->kn_sfflags & NOTE_LOWAT)
 		return (kn->kn_data >= kn->kn_sdata);
 	else
 		return (kn->kn_data >= so->so_snd.sb_lowat);
 }
 
 static int
 filt_soempty(struct knote *kn, long hint)
 {
 	struct socket *so;
 
 	so = kn->kn_fp->f_data;
+
+	if (SOLISTENING(so))
+		return (1);
+
 	SOCKBUF_LOCK_ASSERT(&so->so_snd);
 	kn->kn_data = sbused(&so->so_snd);
 
 	if (kn->kn_data == 0)
 		return (1);
 	else
 		return (0);
 }
 
 int
 socheckuid(struct socket *so, uid_t uid)
 {
 
 	if (so == NULL)
 		return (EPERM);
 	if (so->so_cred->cr_uid != uid)
 		return (EPERM);
 	return (0);
 }
 
 /*
  * These functions are used by protocols to notify the socket layer (and its
  * consumers) of state changes in the sockets driven by protocol-side events.
  */
 
 /*
  * Procedures to manipulate state flags of socket and do appropriate wakeups.
  *
  * Normal sequence from the active (originating) side is that
  * soisconnecting() is called during processing of connect() call, resulting
  * in an eventual call to soisconnected() if/when the connection is
  * established.  When the connection is torn down soisdisconnecting() is
  * called during processing of disconnect() call, and soisdisconnected() is
  * called when the connection to the peer is totally severed.  The semantics
  * of these routines are such that connectionless protocols can call
  * soisconnected() and soisdisconnected() only, bypassing the in-progress
  * calls when setting up a ``connection'' takes no time.
  *
  * From the passive side, a socket is created with two queues of sockets:
  * so_incomp for connections in progress and so_comp for connections already
  * made and awaiting user acceptance.  As a protocol is preparing incoming
  * connections, it creates a socket structure queued on so_incomp by calling
  * sonewconn().  When the connection is established, soisconnected() is
  * called, and transfers the socket structure to so_comp, making it available
  * to accept().
  *
  * If a socket is closed with sockets on either so_incomp or so_comp, these
  * sockets are dropped.
  *
  * If higher-level protocols are implemented in the kernel, the wakeups done
  * here will sometimes cause software-interrupt process scheduling.
  */
 void
 soisconnecting(struct socket *so)
 {
 
 	SOCK_LOCK(so);
 	so->so_state &= ~(SS_ISCONNECTED|SS_ISDISCONNECTING);
 	so->so_state |= SS_ISCONNECTING;
 	SOCK_UNLOCK(so);
 }
 
 void
 soisconnected(struct socket *so)
 {
 	struct socket *head;
 	int ret;
 
+	/*
+	 * XXXGL: this is the only place where we acquire socket locks
+	 * in reverse order: first child, then listening socket.  To
+	 * avoid possible LOR, use try semantics.
+	 */
 restart:
-	ACCEPT_LOCK();
 	SOCK_LOCK(so);
+	if ((head = so->so_listen) != NULL &&
+	    __predict_false(SOLISTEN_TRYLOCK(head) == 0)) {
+		SOCK_UNLOCK(so);
+		goto restart;
+	}
 	so->so_state &= ~(SS_ISCONNECTING|SS_ISDISCONNECTING|SS_ISCONFIRMING);
 	so->so_state |= SS_ISCONNECTED;
-	head = so->so_head;
-	if (head != NULL && (so->so_qstate & SQ_INCOMP)) {
+	if (head != NULL && (so->so_qstate == SQ_INCOMP)) {
+again:
 		if ((so->so_options & SO_ACCEPTFILTER) == 0) {
+			TAILQ_REMOVE(&head->sol_incomp, so, so_list);
+			head->sol_incqlen--;
+			TAILQ_INSERT_TAIL(&head->sol_comp, so, so_list);
+			head->sol_qlen++;
+			so->so_qstate = SQ_COMP;
 			SOCK_UNLOCK(so);
-			TAILQ_REMOVE(&head->so_incomp, so, so_list);
-			head->so_incqlen--;
-			so->so_qstate &= ~SQ_INCOMP;
-			TAILQ_INSERT_TAIL(&head->so_comp, so, so_list);
-			head->so_qlen++;
-			so->so_qstate |= SQ_COMP;
-			ACCEPT_UNLOCK();
-			sorwakeup(head);
-			wakeup_one(&head->so_timeo);
+			solisten_wakeup(head);	/* unlocks */
 		} else {
-			ACCEPT_UNLOCK();
+			SOCKBUF_LOCK(&so->so_rcv);
 			soupcall_set(so, SO_RCV,
-			    head->so_accf->so_accept_filter->accf_callback,
-			    head->so_accf->so_accept_filter_arg);
+			    head->sol_accept_filter->accf_callback,
+			    head->sol_accept_filter_arg);
 			so->so_options &= ~SO_ACCEPTFILTER;
-			ret = head->so_accf->so_accept_filter->accf_callback(so,
-			    head->so_accf->so_accept_filter_arg, M_NOWAIT);
-			if (ret == SU_ISCONNECTED)
+			ret = head->sol_accept_filter->accf_callback(so,
+			    head->sol_accept_filter_arg, M_NOWAIT);
+			if (ret == SU_ISCONNECTED) {
 				soupcall_clear(so, SO_RCV);
+				SOCKBUF_UNLOCK(&so->so_rcv);
+				goto again;
+			}
+			SOCKBUF_UNLOCK(&so->so_rcv);
 			SOCK_UNLOCK(so);
-			if (ret == SU_ISCONNECTED)
-				goto restart;
+			SOLISTEN_UNLOCK(head);
 		}
 		return;
 	}
+	if (head != NULL)
+		SOLISTEN_UNLOCK(head);
 	SOCK_UNLOCK(so);
-	ACCEPT_UNLOCK();
 	wakeup(&so->so_timeo);
 	sorwakeup(so);
 	sowwakeup(so);
 }
 
 void
 soisdisconnecting(struct socket *so)
 {
 
-	/*
-	 * Note: This code assumes that SOCK_LOCK(so) and
-	 * SOCKBUF_LOCK(&so->so_rcv) are the same.
-	 */
-	SOCKBUF_LOCK(&so->so_rcv);
+	SOCK_LOCK(so);
 	so->so_state &= ~SS_ISCONNECTING;
 	so->so_state |= SS_ISDISCONNECTING;
-	socantrcvmore_locked(so);
-	SOCKBUF_LOCK(&so->so_snd);
-	socantsendmore_locked(so);
+
+	if (!SOLISTENING(so)) {
+		SOCKBUF_LOCK(&so->so_rcv);
+		socantrcvmore_locked(so);
+		SOCKBUF_LOCK(&so->so_snd);
+		socantsendmore_locked(so);
+	}
+	SOCK_UNLOCK(so);
 	wakeup(&so->so_timeo);
 }
 
 void
 soisdisconnected(struct socket *so)
 {
 
-	/*
-	 * Note: This code assumes that SOCK_LOCK(so) and
-	 * SOCKBUF_LOCK(&so->so_rcv) are the same.
-	 */
-	SOCKBUF_LOCK(&so->so_rcv);
+	SOCK_LOCK(so);
 	so->so_state &= ~(SS_ISCONNECTING|SS_ISCONNECTED|SS_ISDISCONNECTING);
 	so->so_state |= SS_ISDISCONNECTED;
-	socantrcvmore_locked(so);
-	SOCKBUF_LOCK(&so->so_snd);
-	sbdrop_locked(&so->so_snd, sbused(&so->so_snd));
-	socantsendmore_locked(so);
+
+	if (!SOLISTENING(so)) {
+		SOCKBUF_LOCK(&so->so_rcv);
+		socantrcvmore_locked(so);
+		SOCKBUF_LOCK(&so->so_snd);
+		sbdrop_locked(&so->so_snd, sbused(&so->so_snd));
+		socantsendmore_locked(so);
+	}
+	SOCK_UNLOCK(so);
 	wakeup(&so->so_timeo);
 }
 
 /*
  * Make a copy of a sockaddr in a malloced buffer of type M_SONAME.
  */
 struct sockaddr *
 sodupsockaddr(const struct sockaddr *sa, int mflags)
 {
 	struct sockaddr *sa2;
 
 	sa2 = malloc(sa->sa_len, M_SONAME, mflags);
 	if (sa2)
 		bcopy(sa, sa2, sa->sa_len);
 	return sa2;
 }
 
 /*
  * Register per-socket buffer upcalls.
  */
 void
 soupcall_set(struct socket *so, int which, so_upcall_t func, void *arg)
 {
 	struct sockbuf *sb;
 
+	KASSERT(!SOLISTENING(so), ("%s: so %p listening", __func__, so));
+
 	switch (which) {
 	case SO_RCV:
 		sb = &so->so_rcv;
 		break;
 	case SO_SND:
 		sb = &so->so_snd;
 		break;
 	default:
 		panic("soupcall_set: bad which");
 	}
 	SOCKBUF_LOCK_ASSERT(sb);
 	sb->sb_upcall = func;
 	sb->sb_upcallarg = arg;
 	sb->sb_flags |= SB_UPCALL;
 }
 
 void
 soupcall_clear(struct socket *so, int which)
 {
 	struct sockbuf *sb;
 
+	KASSERT(!SOLISTENING(so), ("%s: so %p listening", __func__, so));
+
 	switch (which) {
 	case SO_RCV:
 		sb = &so->so_rcv;
 		break;
 	case SO_SND:
 		sb = &so->so_snd;
 		break;
 	default:
 		panic("soupcall_clear: bad which");
 	}
 	SOCKBUF_LOCK_ASSERT(sb);
-	KASSERT(sb->sb_upcall != NULL, ("soupcall_clear: no upcall to clear"));
+	KASSERT(sb->sb_upcall != NULL,
+	    ("%s: so %p no upcall to clear", __func__, so));
 	sb->sb_upcall = NULL;
 	sb->sb_upcallarg = NULL;
 	sb->sb_flags &= ~SB_UPCALL;
 }
 
+void
+solisten_upcall_set(struct socket *so, so_upcall_t func, void *arg)
+{
+
+	SOLISTEN_LOCK_ASSERT(so);
+	so->sol_upcall = func;
+	so->sol_upcallarg = arg;
+}
+
+static void
+so_rdknl_lock(void *arg)
+{
+	struct socket *so = arg;
+
+	if (SOLISTENING(so))
+		SOCK_LOCK(so);
+	else
+		SOCKBUF_LOCK(&so->so_rcv);
+}
+
+static void
+so_rdknl_unlock(void *arg)
+{
+	struct socket *so = arg;
+
+	if (SOLISTENING(so))
+		SOCK_UNLOCK(so);
+	else
+		SOCKBUF_UNLOCK(&so->so_rcv);
+}
+
+static void
+so_rdknl_assert_locked(void *arg)
+{
+	struct socket *so = arg;
+
+	if (SOLISTENING(so))
+		SOCK_LOCK_ASSERT(so);
+	else
+		SOCKBUF_LOCK_ASSERT(&so->so_rcv);
+}
+
+static void
+so_rdknl_assert_unlocked(void *arg)
+{
+	struct socket *so = arg;
+
+	if (SOLISTENING(so))
+		SOCK_UNLOCK_ASSERT(so);
+	else
+		SOCKBUF_UNLOCK_ASSERT(&so->so_rcv);
+}
+
+static void
+so_wrknl_lock(void *arg)
+{
+	struct socket *so = arg;
+
+	if (SOLISTENING(so))
+		SOCK_LOCK(so);
+	else
+		SOCKBUF_LOCK(&so->so_snd);
+}
+
+static void
+so_wrknl_unlock(void *arg)
+{
+	struct socket *so = arg;
+
+	if (SOLISTENING(so))
+		SOCK_UNLOCK(so);
+	else
+		SOCKBUF_UNLOCK(&so->so_snd);
+}
+
+static void
+so_wrknl_assert_locked(void *arg)
+{
+	struct socket *so = arg;
+
+	if (SOLISTENING(so))
+		SOCK_LOCK_ASSERT(so);
+	else
+		SOCKBUF_LOCK_ASSERT(&so->so_snd);
+}
+
+static void
+so_wrknl_assert_unlocked(void *arg)
+{
+	struct socket *so = arg;
+
+	if (SOLISTENING(so))
+		SOCK_UNLOCK_ASSERT(so);
+	else
+		SOCKBUF_UNLOCK_ASSERT(&so->so_snd);
+}
+
 /*
  * Create an external-format (``xsocket'') structure using the information in
  * the kernel-format socket structure pointed to by so.  This is done to
  * reduce the spew of irrelevant information over this interface, to isolate
  * user code from changes in the kernel structure, and potentially to provide
  * information-hiding if we decide that some of this information should be
  * hidden from users.
  */
 void
 sotoxsocket(struct socket *so, struct xsocket *xso)
 {
 
 	xso->xso_len = sizeof *xso;
 	xso->xso_so = so;
 	xso->so_type = so->so_type;
 	xso->so_options = so->so_options;
 	xso->so_linger = so->so_linger;
 	xso->so_state = so->so_state;
 	xso->so_pcb = so->so_pcb;
 	xso->xso_protocol = so->so_proto->pr_protocol;
 	xso->xso_family = so->so_proto->pr_domain->dom_family;
-	xso->so_qlen = so->so_qlen;
-	xso->so_incqlen = so->so_incqlen;
-	xso->so_qlimit = so->so_qlimit;
 	xso->so_timeo = so->so_timeo;
 	xso->so_error = so->so_error;
-	xso->so_pgid = so->so_sigio ? so->so_sigio->sio_pgid : 0;
-	xso->so_oobmark = so->so_oobmark;
-	sbtoxsockbuf(&so->so_snd, &xso->so_snd);
-	sbtoxsockbuf(&so->so_rcv, &xso->so_rcv);
 	xso->so_uid = so->so_cred->cr_uid;
-}
-
-
-/*
- * Socket accessor functions to provide external consumers with
- * a safe interface to socket state
- *
- */
-
-void
-so_listeners_apply_all(struct socket *so, void (*func)(struct socket *, void *),
-    void *arg)
-{
-
-	TAILQ_FOREACH(so, &so->so_comp, so_list)
-		func(so, arg);
+	xso->so_pgid = so->so_sigio ? so->so_sigio->sio_pgid : 0;
+	if (SOLISTENING(so)) {
+		xso->so_qlen = so->sol_qlen;
+		xso->so_incqlen = so->sol_incqlen;
+		xso->so_qlimit = so->sol_qlimit;
+		xso->so_oobmark = 0;
+		bzero(&xso->so_snd, sizeof(xso->so_snd));
+		bzero(&xso->so_rcv, sizeof(xso->so_rcv));
+	} else {
+		xso->so_state |= so->so_qstate;
+		xso->so_qlen = xso->so_incqlen = xso->so_qlimit = 0;
+		xso->so_oobmark = so->so_oobmark;
+		sbtoxsockbuf(&so->so_snd, &xso->so_snd);
+		sbtoxsockbuf(&so->so_rcv, &xso->so_rcv);
+	}
 }
 
 struct sockbuf *
 so_sockbuf_rcv(struct socket *so)
 {
 
 	return (&so->so_rcv);
 }
 
 struct sockbuf *
 so_sockbuf_snd(struct socket *so)
 {
 
 	return (&so->so_snd);
 }
 
 int
 so_state_get(const struct socket *so)
 {
 
 	return (so->so_state);
 }
 
 void
 so_state_set(struct socket *so, int val)
 {
 
 	so->so_state = val;
 }
 
 int
 so_options_get(const struct socket *so)
 {
 
 	return (so->so_options);
 }
 
 void
 so_options_set(struct socket *so, int val)
 {
 
 	so->so_options = val;
 }
 
 int
 so_error_get(const struct socket *so)
 {
 
 	return (so->so_error);
 }
 
 void
 so_error_set(struct socket *so, int val)
 {
 
 	so->so_error = val;
 }
 
 int
 so_linger_get(const struct socket *so)
 {
 
 	return (so->so_linger);
 }
 
 void
 so_linger_set(struct socket *so, int val)
 {
 
 	so->so_linger = val;
 }
 
 struct protosw *
 so_protosw_get(const struct socket *so)
 {
 
 	return (so->so_proto);
 }
 
 void
 so_protosw_set(struct socket *so, struct protosw *val)
 {
 
 	so->so_proto = val;
 }
 
 void
 so_sorwakeup(struct socket *so)
 {
 
 	sorwakeup(so);
 }
 
 void
 so_sowwakeup(struct socket *so)
 {
 
 	sowwakeup(so);
 }
 
 void
 so_sorwakeup_locked(struct socket *so)
 {
 
 	sorwakeup_locked(so);
 }
 
 void
 so_sowwakeup_locked(struct socket *so)
 {
 
 	sowwakeup_locked(so);
 }
 
 void
 so_lock(struct socket *so)
 {
 
 	SOCK_LOCK(so);
 }
 
 void
 so_unlock(struct socket *so)
 {
 
 	SOCK_UNLOCK(so);
 }
Index: head/sys/kern/uipc_syscalls.c
===================================================================
--- head/sys/kern/uipc_syscalls.c	(revision 319721)
+++ head/sys/kern/uipc_syscalls.c	(revision 319722)
@@ -1,1607 +1,1562 @@
 /*-
  * Copyright (c) 1982, 1986, 1989, 1990, 1993
  *	The Regents of the University of California.  All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 3. Neither the name of the University nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  *	@(#)uipc_syscalls.c	8.4 (Berkeley) 2/21/94
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include "opt_capsicum.h"
 #include "opt_inet.h"
 #include "opt_inet6.h"
 #include "opt_compat.h"
 #include "opt_ktrace.h"
 
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/capsicum.h>
 #include <sys/kernel.h>
 #include <sys/lock.h>
 #include <sys/mutex.h>
 #include <sys/sysproto.h>
 #include <sys/malloc.h>
 #include <sys/filedesc.h>
 #include <sys/proc.h>
 #include <sys/filio.h>
 #include <sys/jail.h>
 #include <sys/mbuf.h>
 #include <sys/protosw.h>
 #include <sys/rwlock.h>
 #include <sys/socket.h>
 #include <sys/socketvar.h>
 #include <sys/syscallsubr.h>
 #ifdef KTRACE
 #include <sys/ktrace.h>
 #endif
 #ifdef COMPAT_FREEBSD32
 #include <compat/freebsd32/freebsd32_util.h>
 #endif
 
 #include <net/vnet.h>
 
 #include <security/audit/audit.h>
 #include <security/mac/mac_framework.h>
 
-/*
- * Flags for accept1() and kern_accept4(), in addition to SOCK_CLOEXEC
- * and SOCK_NONBLOCK.
- */
-#define	ACCEPT4_INHERIT	0x1
-#define	ACCEPT4_COMPAT	0x2
-
 static int sendit(struct thread *td, int s, struct msghdr *mp, int flags);
 static int recvit(struct thread *td, int s, struct msghdr *mp, void *namelenp);
 
 static int accept1(struct thread *td, int s, struct sockaddr *uname,
 		   socklen_t *anamelen, int flags);
 static int getsockname1(struct thread *td, struct getsockname_args *uap,
 			int compat);
 static int getpeername1(struct thread *td, struct getpeername_args *uap,
 			int compat);
 static int sockargs(struct mbuf **, char *, socklen_t, int);
 
 /*
  * Convert a user file descriptor to a kernel file entry and check if required
  * capability rights are present.
  * If required copy of current set of capability rights is returned.
  * A reference on the file entry is held upon returning.
  */
 int
 getsock_cap(struct thread *td, int fd, cap_rights_t *rightsp,
     struct file **fpp, u_int *fflagp, struct filecaps *havecapsp)
 {
 	struct file *fp;
 	int error;
 
 	error = fget_cap(td, fd, rightsp, &fp, havecapsp);
 	if (error != 0)
 		return (error);
 	if (fp->f_type != DTYPE_SOCKET) {
 		fdrop(fp, td);
 		if (havecapsp != NULL)
 			filecaps_free(havecapsp);
 		return (ENOTSOCK);
 	}
 	if (fflagp != NULL)
 		*fflagp = fp->f_flag;
 	*fpp = fp;
 	return (0);
 }
 
 /*
  * System call interface to the socket abstraction.
  */
 #if defined(COMPAT_43)
 #define COMPAT_OLDSOCK
 #endif
 
 int
 sys_socket(struct thread *td, struct socket_args *uap)
 {
 
 	return (kern_socket(td, uap->domain, uap->type, uap->protocol));
 }
 
 int
 kern_socket(struct thread *td, int domain, int type, int protocol)
 {
 	struct socket *so;
 	struct file *fp;
 	int fd, error, oflag, fflag;
 
 	AUDIT_ARG_SOCKET(domain, type, protocol);
 
 	oflag = 0;
 	fflag = 0;
 	if ((type & SOCK_CLOEXEC) != 0) {
 		type &= ~SOCK_CLOEXEC;
 		oflag |= O_CLOEXEC;
 	}
 	if ((type & SOCK_NONBLOCK) != 0) {
 		type &= ~SOCK_NONBLOCK;
 		fflag |= FNONBLOCK;
 	}
 
 #ifdef MAC
 	error = mac_socket_check_create(td->td_ucred, domain, type, protocol);
 	if (error != 0)
 		return (error);
 #endif
 	error = falloc(td, &fp, &fd, oflag);
 	if (error != 0)
 		return (error);
 	/* An extra reference on `fp' has been held for us by falloc(). */
 	error = socreate(domain, &so, type, protocol, td->td_ucred, td);
 	if (error != 0) {
 		fdclose(td, fp, fd);
 	} else {
 		finit(fp, FREAD | FWRITE | fflag, DTYPE_SOCKET, so, &socketops);
 		if ((fflag & FNONBLOCK) != 0)
 			(void) fo_ioctl(fp, FIONBIO, &fflag, td->td_ucred, td);
 		td->td_retval[0] = fd;
 	}
 	fdrop(fp, td);
 	return (error);
 }
 
 int
 sys_bind(struct thread *td, struct bind_args *uap)
 {
 	struct sockaddr *sa;
 	int error;
 
 	error = getsockaddr(&sa, uap->name, uap->namelen);
 	if (error == 0) {
 		error = kern_bindat(td, AT_FDCWD, uap->s, sa);
 		free(sa, M_SONAME);
 	}
 	return (error);
 }
 
 int
 kern_bindat(struct thread *td, int dirfd, int fd, struct sockaddr *sa)
 {
 	struct socket *so;
 	struct file *fp;
 	cap_rights_t rights;
 	int error;
 
 	AUDIT_ARG_FD(fd);
 	AUDIT_ARG_SOCKADDR(td, dirfd, sa);
 	error = getsock_cap(td, fd, cap_rights_init(&rights, CAP_BIND),
 	    &fp, NULL, NULL);
 	if (error != 0)
 		return (error);
 	so = fp->f_data;
 #ifdef KTRACE
 	if (KTRPOINT(td, KTR_STRUCT))
 		ktrsockaddr(sa);
 #endif
 #ifdef MAC
 	error = mac_socket_check_bind(td->td_ucred, so, sa);
 	if (error == 0) {
 #endif
 		if (dirfd == AT_FDCWD)
 			error = sobind(so, sa, td);
 		else
 			error = sobindat(dirfd, so, sa, td);
 #ifdef MAC
 	}
 #endif
 	fdrop(fp, td);
 	return (error);
 }
 
 int
 sys_bindat(struct thread *td, struct bindat_args *uap)
 {
 	struct sockaddr *sa;
 	int error;
 
 	error = getsockaddr(&sa, uap->name, uap->namelen);
 	if (error == 0) {
 		error = kern_bindat(td, uap->fd, uap->s, sa);
 		free(sa, M_SONAME);
 	}
 	return (error);
 }
 
 int
 sys_listen(struct thread *td, struct listen_args *uap)
 {
 
 	return (kern_listen(td, uap->s, uap->backlog));
 }
 
 int
 kern_listen(struct thread *td, int s, int backlog)
 {
 	struct socket *so;
 	struct file *fp;
 	cap_rights_t rights;
 	int error;
 
 	AUDIT_ARG_FD(s);
 	error = getsock_cap(td, s, cap_rights_init(&rights, CAP_LISTEN),
 	    &fp, NULL, NULL);
 	if (error == 0) {
 		so = fp->f_data;
 #ifdef MAC
 		error = mac_socket_check_listen(td->td_ucred, so);
 		if (error == 0)
 #endif
 			error = solisten(so, backlog, td);
 		fdrop(fp, td);
 	}
 	return (error);
 }
 
 /*
  * accept1()
  */
 static int
 accept1(td, s, uname, anamelen, flags)
 	struct thread *td;
 	int s;
 	struct sockaddr *uname;
 	socklen_t *anamelen;
 	int flags;
 {
 	struct sockaddr *name;
 	socklen_t namelen;
 	struct file *fp;
 	int error;
 
 	if (uname == NULL)
 		return (kern_accept4(td, s, NULL, NULL, flags, NULL));
 
 	error = copyin(anamelen, &namelen, sizeof (namelen));
 	if (error != 0)
 		return (error);
 
 	error = kern_accept4(td, s, &name, &namelen, flags, &fp);
 
 	if (error != 0)
 		return (error);
 
 	if (error == 0 && uname != NULL) {
 #ifdef COMPAT_OLDSOCK
 		if (flags & ACCEPT4_COMPAT)
 			((struct osockaddr *)name)->sa_family =
 			    name->sa_family;
 #endif
 		error = copyout(name, uname, namelen);
 	}
 	if (error == 0)
 		error = copyout(&namelen, anamelen,
 		    sizeof(namelen));
 	if (error != 0)
 		fdclose(td, fp, td->td_retval[0]);
 	fdrop(fp, td);
 	free(name, M_SONAME);
 	return (error);
 }
 
 int
 kern_accept(struct thread *td, int s, struct sockaddr **name,
     socklen_t *namelen, struct file **fp)
 {
 	return (kern_accept4(td, s, name, namelen, ACCEPT4_INHERIT, fp));
 }
 
 int
 kern_accept4(struct thread *td, int s, struct sockaddr **name,
     socklen_t *namelen, int flags, struct file **fp)
 {
 	struct file *headfp, *nfp = NULL;
 	struct sockaddr *sa = NULL;
 	struct socket *head, *so;
 	struct filecaps fcaps;
 	cap_rights_t rights;
 	u_int fflag;
 	pid_t pgid;
 	int error, fd, tmp;
 
 	if (name != NULL)
 		*name = NULL;
 
 	AUDIT_ARG_FD(s);
 	error = getsock_cap(td, s, cap_rights_init(&rights, CAP_ACCEPT),
 	    &headfp, &fflag, &fcaps);
 	if (error != 0)
 		return (error);
 	head = headfp->f_data;
 	if ((head->so_options & SO_ACCEPTCONN) == 0) {
 		error = EINVAL;
 		goto done;
 	}
 #ifdef MAC
 	error = mac_socket_check_accept(td->td_ucred, head);
 	if (error != 0)
 		goto done;
 #endif
 	error = falloc_caps(td, &nfp, &fd,
 	    (flags & SOCK_CLOEXEC) ? O_CLOEXEC : 0, &fcaps);
 	if (error != 0)
 		goto done;
-	ACCEPT_LOCK();
-	if ((head->so_state & SS_NBIO) && TAILQ_EMPTY(&head->so_comp)) {
-		ACCEPT_UNLOCK();
-		error = EWOULDBLOCK;
+	SOCK_LOCK(head);
+	if (!SOLISTENING(head)) {
+		SOCK_UNLOCK(head);
+		error = EINVAL;
 		goto noconnection;
 	}
-	while (TAILQ_EMPTY(&head->so_comp) && head->so_error == 0) {
-		if (head->so_rcv.sb_state & SBS_CANTRCVMORE) {
-			head->so_error = ECONNABORTED;
-			break;
-		}
-		error = msleep(&head->so_timeo, &accept_mtx, PSOCK | PCATCH,
-		    "accept", 0);
-		if (error != 0) {
-			ACCEPT_UNLOCK();
-			goto noconnection;
-		}
-	}
-	if (head->so_error) {
-		error = head->so_error;
-		head->so_error = 0;
-		ACCEPT_UNLOCK();
+
+	error = solisten_dequeue(head, &so, flags);
+	if (error != 0)
 		goto noconnection;
-	}
-	so = TAILQ_FIRST(&head->so_comp);
-	KASSERT(!(so->so_qstate & SQ_INCOMP), ("accept1: so SQ_INCOMP"));
-	KASSERT(so->so_qstate & SQ_COMP, ("accept1: so not SQ_COMP"));
 
-	/*
-	 * Before changing the flags on the socket, we have to bump the
-	 * reference count.  Otherwise, if the protocol calls sofree(),
-	 * the socket will be released due to a zero refcount.
-	 */
-	SOCK_LOCK(so);			/* soref() and so_state update */
-	soref(so);			/* file descriptor reference */
-
-	TAILQ_REMOVE(&head->so_comp, so, so_list);
-	head->so_qlen--;
-	if (flags & ACCEPT4_INHERIT)
-		so->so_state |= (head->so_state & SS_NBIO);
-	else
-		so->so_state |= (flags & SOCK_NONBLOCK) ? SS_NBIO : 0;
-	so->so_qstate &= ~SQ_COMP;
-	so->so_head = NULL;
-
-	SOCK_UNLOCK(so);
-	ACCEPT_UNLOCK();
-
 	/* An extra reference on `nfp' has been held for us by falloc(). */
 	td->td_retval[0] = fd;
 
-	/* connection has been removed from the listen queue */
-	KNOTE_UNLOCKED(&head->so_rcv.sb_sel.si_note, 0);
+	/* Connection has been removed from the listen queue. */
+	KNOTE_UNLOCKED(&head->so_rdsel.si_note, 0);
 
 	if (flags & ACCEPT4_INHERIT) {
 		pgid = fgetown(&head->so_sigio);
 		if (pgid != 0)
 			fsetown(pgid, &so->so_sigio);
 	} else {
 		fflag &= ~(FNONBLOCK | FASYNC);
 		if (flags & SOCK_NONBLOCK)
 			fflag |= FNONBLOCK;
 	}
 
 	finit(nfp, fflag, DTYPE_SOCKET, so, &socketops);
 	/* Sync socket nonblocking/async state with file flags */
 	tmp = fflag & FNONBLOCK;
 	(void) fo_ioctl(nfp, FIONBIO, &tmp, td->td_ucred, td);
 	tmp = fflag & FASYNC;
 	(void) fo_ioctl(nfp, FIOASYNC, &tmp, td->td_ucred, td);
-	sa = NULL;
 	error = soaccept(so, &sa);
 	if (error != 0)
 		goto noconnection;
 	if (sa == NULL) {
 		if (name)
 			*namelen = 0;
 		goto done;
 	}
 	AUDIT_ARG_SOCKADDR(td, AT_FDCWD, sa);
 	if (name) {
 		/* check sa_len before it is destroyed */
 		if (*namelen > sa->sa_len)
 			*namelen = sa->sa_len;
 #ifdef KTRACE
 		if (KTRPOINT(td, KTR_STRUCT))
 			ktrsockaddr(sa);
 #endif
 		*name = sa;
 		sa = NULL;
 	}
 noconnection:
 	free(sa, M_SONAME);
 
 	/*
 	 * close the new descriptor, assuming someone hasn't ripped it
 	 * out from under us.
 	 */
 	if (error != 0)
 		fdclose(td, nfp, fd);
 
 	/*
 	 * Release explicitly held references before returning.  We return
 	 * a reference on nfp to the caller on success if they request it.
 	 */
 done:
 	if (nfp == NULL)
 		filecaps_free(&fcaps);
 	if (fp != NULL) {
 		if (error == 0) {
 			*fp = nfp;
 			nfp = NULL;
 		} else
 			*fp = NULL;
 	}
 	if (nfp != NULL)
 		fdrop(nfp, td);
 	fdrop(headfp, td);
 	return (error);
 }
 
 int
 sys_accept(td, uap)
 	struct thread *td;
 	struct accept_args *uap;
 {
 
 	return (accept1(td, uap->s, uap->name, uap->anamelen, ACCEPT4_INHERIT));
 }
 
 int
 sys_accept4(td, uap)
 	struct thread *td;
 	struct accept4_args *uap;
 {
 
 	if (uap->flags & ~(SOCK_CLOEXEC | SOCK_NONBLOCK))
 		return (EINVAL);
 
 	return (accept1(td, uap->s, uap->name, uap->anamelen, uap->flags));
 }
 
 #ifdef COMPAT_OLDSOCK
 int
 oaccept(td, uap)
 	struct thread *td;
 	struct accept_args *uap;
 {
 
 	return (accept1(td, uap->s, uap->name, uap->anamelen,
 	    ACCEPT4_INHERIT | ACCEPT4_COMPAT));
 }
 #endif /* COMPAT_OLDSOCK */
 
 int
 sys_connect(struct thread *td, struct connect_args *uap)
 {
 	struct sockaddr *sa;
 	int error;
 
 	error = getsockaddr(&sa, uap->name, uap->namelen);
 	if (error == 0) {
 		error = kern_connectat(td, AT_FDCWD, uap->s, sa);
 		free(sa, M_SONAME);
 	}
 	return (error);
 }
 
 int
 kern_connectat(struct thread *td, int dirfd, int fd, struct sockaddr *sa)
 {
 	struct socket *so;
 	struct file *fp;
 	cap_rights_t rights;
 	int error, interrupted = 0;
 
 	AUDIT_ARG_FD(fd);
 	AUDIT_ARG_SOCKADDR(td, dirfd, sa);
 	error = getsock_cap(td, fd, cap_rights_init(&rights, CAP_CONNECT),
 	    &fp, NULL, NULL);
 	if (error != 0)
 		return (error);
 	so = fp->f_data;
 	if (so->so_state & SS_ISCONNECTING) {
 		error = EALREADY;
 		goto done1;
 	}
 #ifdef KTRACE
 	if (KTRPOINT(td, KTR_STRUCT))
 		ktrsockaddr(sa);
 #endif
 #ifdef MAC
 	error = mac_socket_check_connect(td->td_ucred, so, sa);
 	if (error != 0)
 		goto bad;
 #endif
 	if (dirfd == AT_FDCWD)
 		error = soconnect(so, sa, td);
 	else
 		error = soconnectat(dirfd, so, sa, td);
 	if (error != 0)
 		goto bad;
 	if ((so->so_state & SS_NBIO) && (so->so_state & SS_ISCONNECTING)) {
 		error = EINPROGRESS;
 		goto done1;
 	}
 	SOCK_LOCK(so);
 	while ((so->so_state & SS_ISCONNECTING) && so->so_error == 0) {
-		error = msleep(&so->so_timeo, SOCK_MTX(so), PSOCK | PCATCH,
+		error = msleep(&so->so_timeo, &so->so_lock, PSOCK | PCATCH,
 		    "connec", 0);
 		if (error != 0) {
 			if (error == EINTR || error == ERESTART)
 				interrupted = 1;
 			break;
 		}
 	}
 	if (error == 0) {
 		error = so->so_error;
 		so->so_error = 0;
 	}
 	SOCK_UNLOCK(so);
 bad:
 	if (!interrupted)
 		so->so_state &= ~SS_ISCONNECTING;
 	if (error == ERESTART)
 		error = EINTR;
 done1:
 	fdrop(fp, td);
 	return (error);
 }
 
 int
 sys_connectat(struct thread *td, struct connectat_args *uap)
 {
 	struct sockaddr *sa;
 	int error;
 
 	error = getsockaddr(&sa, uap->name, uap->namelen);
 	if (error == 0) {
 		error = kern_connectat(td, uap->fd, uap->s, sa);
 		free(sa, M_SONAME);
 	}
 	return (error);
 }
 
 int
 kern_socketpair(struct thread *td, int domain, int type, int protocol,
     int *rsv)
 {
 	struct file *fp1, *fp2;
 	struct socket *so1, *so2;
 	int fd, error, oflag, fflag;
 
 	AUDIT_ARG_SOCKET(domain, type, protocol);
 
 	oflag = 0;
 	fflag = 0;
 	if ((type & SOCK_CLOEXEC) != 0) {
 		type &= ~SOCK_CLOEXEC;
 		oflag |= O_CLOEXEC;
 	}
 	if ((type & SOCK_NONBLOCK) != 0) {
 		type &= ~SOCK_NONBLOCK;
 		fflag |= FNONBLOCK;
 	}
 #ifdef MAC
 	/* We might want to have a separate check for socket pairs. */
 	error = mac_socket_check_create(td->td_ucred, domain, type,
 	    protocol);
 	if (error != 0)
 		return (error);
 #endif
 	error = socreate(domain, &so1, type, protocol, td->td_ucred, td);
 	if (error != 0)
 		return (error);
 	error = socreate(domain, &so2, type, protocol, td->td_ucred, td);
 	if (error != 0)
 		goto free1;
 	/* On success extra reference to `fp1' and 'fp2' is set by falloc. */
 	error = falloc(td, &fp1, &fd, oflag);
 	if (error != 0)
 		goto free2;
 	rsv[0] = fd;
 	fp1->f_data = so1;	/* so1 already has ref count */
 	error = falloc(td, &fp2, &fd, oflag);
 	if (error != 0)
 		goto free3;
 	fp2->f_data = so2;	/* so2 already has ref count */
 	rsv[1] = fd;
 	error = soconnect2(so1, so2);
 	if (error != 0)
 		goto free4;
 	if (type == SOCK_DGRAM) {
 		/*
 		 * Datagram socket connection is asymmetric.
 		 */
 		 error = soconnect2(so2, so1);
 		 if (error != 0)
 			goto free4;
 	}
 	finit(fp1, FREAD | FWRITE | fflag, DTYPE_SOCKET, fp1->f_data,
 	    &socketops);
 	finit(fp2, FREAD | FWRITE | fflag, DTYPE_SOCKET, fp2->f_data,
 	    &socketops);
 	if ((fflag & FNONBLOCK) != 0) {
 		(void) fo_ioctl(fp1, FIONBIO, &fflag, td->td_ucred, td);
 		(void) fo_ioctl(fp2, FIONBIO, &fflag, td->td_ucred, td);
 	}
 	fdrop(fp1, td);
 	fdrop(fp2, td);
 	return (0);
 free4:
 	fdclose(td, fp2, rsv[1]);
 	fdrop(fp2, td);
 free3:
 	fdclose(td, fp1, rsv[0]);
 	fdrop(fp1, td);
 free2:
 	if (so2 != NULL)
 		(void)soclose(so2);
 free1:
 	if (so1 != NULL)
 		(void)soclose(so1);
 	return (error);
 }
 
 int
 sys_socketpair(struct thread *td, struct socketpair_args *uap)
 {
 	int error, sv[2];
 
 	error = kern_socketpair(td, uap->domain, uap->type,
 	    uap->protocol, sv);
 	if (error != 0)
 		return (error);
 	error = copyout(sv, uap->rsv, 2 * sizeof(int));
 	if (error != 0) {
 		(void)kern_close(td, sv[0]);
 		(void)kern_close(td, sv[1]);
 	}
 	return (error);
 }
 
 static int
 sendit(struct thread *td, int s, struct msghdr *mp, int flags)
 {
 	struct mbuf *control;
 	struct sockaddr *to;
 	int error;
 
 #ifdef CAPABILITY_MODE
 	if (IN_CAPABILITY_MODE(td) && (mp->msg_name != NULL))
 		return (ECAPMODE);
 #endif
 
 	if (mp->msg_name != NULL) {
 		error = getsockaddr(&to, mp->msg_name, mp->msg_namelen);
 		if (error != 0) {
 			to = NULL;
 			goto bad;
 		}
 		mp->msg_name = to;
 	} else {
 		to = NULL;
 	}
 
 	if (mp->msg_control) {
 		if (mp->msg_controllen < sizeof(struct cmsghdr)
 #ifdef COMPAT_OLDSOCK
 		    && mp->msg_flags != MSG_COMPAT
 #endif
 		) {
 			error = EINVAL;
 			goto bad;
 		}
 		error = sockargs(&control, mp->msg_control,
 		    mp->msg_controllen, MT_CONTROL);
 		if (error != 0)
 			goto bad;
 #ifdef COMPAT_OLDSOCK
 		if (mp->msg_flags == MSG_COMPAT) {
 			struct cmsghdr *cm;
 
 			M_PREPEND(control, sizeof(*cm), M_WAITOK);
 			cm = mtod(control, struct cmsghdr *);
 			cm->cmsg_len = control->m_len;
 			cm->cmsg_level = SOL_SOCKET;
 			cm->cmsg_type = SCM_RIGHTS;
 		}
 #endif
 	} else {
 		control = NULL;
 	}
 
 	error = kern_sendit(td, s, mp, flags, control, UIO_USERSPACE);
 
 bad:
 	free(to, M_SONAME);
 	return (error);
 }
 
 int
 kern_sendit(struct thread *td, int s, struct msghdr *mp, int flags,
     struct mbuf *control, enum uio_seg segflg)
 {
 	struct file *fp;
 	struct uio auio;
 	struct iovec *iov;
 	struct socket *so;
 	cap_rights_t rights;
 #ifdef KTRACE
 	struct uio *ktruio = NULL;
 #endif
 	ssize_t len;
 	int i, error;
 
 	AUDIT_ARG_FD(s);
 	cap_rights_init(&rights, CAP_SEND);
 	if (mp->msg_name != NULL) {
 		AUDIT_ARG_SOCKADDR(td, AT_FDCWD, mp->msg_name);
 		cap_rights_set(&rights, CAP_CONNECT);
 	}
 	error = getsock_cap(td, s, &rights, &fp, NULL, NULL);
 	if (error != 0) {
 		m_freem(control);
 		return (error);
 	}
 	so = (struct socket *)fp->f_data;
 
 #ifdef KTRACE
 	if (mp->msg_name != NULL && KTRPOINT(td, KTR_STRUCT))
 		ktrsockaddr(mp->msg_name);
 #endif
 #ifdef MAC
 	if (mp->msg_name != NULL) {
 		error = mac_socket_check_connect(td->td_ucred, so,
 		    mp->msg_name);
 		if (error != 0) {
 			m_freem(control);
 			goto bad;
 		}
 	}
 	error = mac_socket_check_send(td->td_ucred, so);
 	if (error != 0) {
 		m_freem(control);
 		goto bad;
 	}
 #endif
 
 	auio.uio_iov = mp->msg_iov;
 	auio.uio_iovcnt = mp->msg_iovlen;
 	auio.uio_segflg = segflg;
 	auio.uio_rw = UIO_WRITE;
 	auio.uio_td = td;
 	auio.uio_offset = 0;			/* XXX */
 	auio.uio_resid = 0;
 	iov = mp->msg_iov;
 	for (i = 0; i < mp->msg_iovlen; i++, iov++) {
 		if ((auio.uio_resid += iov->iov_len) < 0) {
 			error = EINVAL;
 			m_freem(control);
 			goto bad;
 		}
 	}
 #ifdef KTRACE
 	if (KTRPOINT(td, KTR_GENIO))
 		ktruio = cloneuio(&auio);
 #endif
 	len = auio.uio_resid;
 	error = sosend(so, mp->msg_name, &auio, 0, control, flags, td);
 	if (error != 0) {
 		if (auio.uio_resid != len && (error == ERESTART ||
 		    error == EINTR || error == EWOULDBLOCK))
 			error = 0;
 		/* Generation of SIGPIPE can be controlled per socket */
 		if (error == EPIPE && !(so->so_options & SO_NOSIGPIPE) &&
 		    !(flags & MSG_NOSIGNAL)) {
 			PROC_LOCK(td->td_proc);
 			tdsignal(td, SIGPIPE);
 			PROC_UNLOCK(td->td_proc);
 		}
 	}
 	if (error == 0)
 		td->td_retval[0] = len - auio.uio_resid;
 #ifdef KTRACE
 	if (ktruio != NULL) {
 		ktruio->uio_resid = td->td_retval[0];
 		ktrgenio(s, UIO_WRITE, ktruio, error);
 	}
 #endif
 bad:
 	fdrop(fp, td);
 	return (error);
 }
 
 int
 sys_sendto(struct thread *td, struct sendto_args *uap)
 {
 	struct msghdr msg;
 	struct iovec aiov;
 
 	msg.msg_name = uap->to;
 	msg.msg_namelen = uap->tolen;
 	msg.msg_iov = &aiov;
 	msg.msg_iovlen = 1;
 	msg.msg_control = 0;
 #ifdef COMPAT_OLDSOCK
 	msg.msg_flags = 0;
 #endif
 	aiov.iov_base = uap->buf;
 	aiov.iov_len = uap->len;
 	return (sendit(td, uap->s, &msg, uap->flags));
 }
 
 #ifdef COMPAT_OLDSOCK
 int
 osend(struct thread *td, struct osend_args *uap)
 {
 	struct msghdr msg;
 	struct iovec aiov;
 
 	msg.msg_name = 0;
 	msg.msg_namelen = 0;
 	msg.msg_iov = &aiov;
 	msg.msg_iovlen = 1;
 	aiov.iov_base = uap->buf;
 	aiov.iov_len = uap->len;
 	msg.msg_control = 0;
 	msg.msg_flags = 0;
 	return (sendit(td, uap->s, &msg, uap->flags));
 }
 
 int
 osendmsg(struct thread *td, struct osendmsg_args *uap)
 {
 	struct msghdr msg;
 	struct iovec *iov;
 	int error;
 
 	error = copyin(uap->msg, &msg, sizeof (struct omsghdr));
 	if (error != 0)
 		return (error);
 	error = copyiniov(msg.msg_iov, msg.msg_iovlen, &iov, EMSGSIZE);
 	if (error != 0)
 		return (error);
 	msg.msg_iov = iov;
 	msg.msg_flags = MSG_COMPAT;
 	error = sendit(td, uap->s, &msg, uap->flags);
 	free(iov, M_IOV);
 	return (error);
 }
 #endif
 
 int
 sys_sendmsg(struct thread *td, struct sendmsg_args *uap)
 {
 	struct msghdr msg;
 	struct iovec *iov;
 	int error;
 
 	error = copyin(uap->msg, &msg, sizeof (msg));
 	if (error != 0)
 		return (error);
 	error = copyiniov(msg.msg_iov, msg.msg_iovlen, &iov, EMSGSIZE);
 	if (error != 0)
 		return (error);
 	msg.msg_iov = iov;
 #ifdef COMPAT_OLDSOCK
 	msg.msg_flags = 0;
 #endif
 	error = sendit(td, uap->s, &msg, uap->flags);
 	free(iov, M_IOV);
 	return (error);
 }
 
 int
 kern_recvit(struct thread *td, int s, struct msghdr *mp, enum uio_seg fromseg,
     struct mbuf **controlp)
 {
 	struct uio auio;
 	struct iovec *iov;
 	struct mbuf *m, *control = NULL;
 	caddr_t ctlbuf;
 	struct file *fp;
 	struct socket *so;
 	struct sockaddr *fromsa = NULL;
 	cap_rights_t rights;
 #ifdef KTRACE
 	struct uio *ktruio = NULL;
 #endif
 	ssize_t len;
 	int error, i;
 
 	if (controlp != NULL)
 		*controlp = NULL;
 
 	AUDIT_ARG_FD(s);
 	error = getsock_cap(td, s, cap_rights_init(&rights, CAP_RECV),
 	    &fp, NULL, NULL);
 	if (error != 0)
 		return (error);
 	so = fp->f_data;
 
 #ifdef MAC
 	error = mac_socket_check_receive(td->td_ucred, so);
 	if (error != 0) {
 		fdrop(fp, td);
 		return (error);
 	}
 #endif
 
 	auio.uio_iov = mp->msg_iov;
 	auio.uio_iovcnt = mp->msg_iovlen;
 	auio.uio_segflg = UIO_USERSPACE;
 	auio.uio_rw = UIO_READ;
 	auio.uio_td = td;
 	auio.uio_offset = 0;			/* XXX */
 	auio.uio_resid = 0;
 	iov = mp->msg_iov;
 	for (i = 0; i < mp->msg_iovlen; i++, iov++) {
 		if ((auio.uio_resid += iov->iov_len) < 0) {
 			fdrop(fp, td);
 			return (EINVAL);
 		}
 	}
 #ifdef KTRACE
 	if (KTRPOINT(td, KTR_GENIO))
 		ktruio = cloneuio(&auio);
 #endif
 	len = auio.uio_resid;
 	error = soreceive(so, &fromsa, &auio, NULL,
 	    (mp->msg_control || controlp) ? &control : NULL,
 	    &mp->msg_flags);
 	if (error != 0) {
 		if (auio.uio_resid != len && (error == ERESTART ||
 		    error == EINTR || error == EWOULDBLOCK))
 			error = 0;
 	}
 	if (fromsa != NULL)
 		AUDIT_ARG_SOCKADDR(td, AT_FDCWD, fromsa);
 #ifdef KTRACE
 	if (ktruio != NULL) {
 		ktruio->uio_resid = len - auio.uio_resid;
 		ktrgenio(s, UIO_READ, ktruio, error);
 	}
 #endif
 	if (error != 0)
 		goto out;
 	td->td_retval[0] = len - auio.uio_resid;
 	if (mp->msg_name) {
 		len = mp->msg_namelen;
 		if (len <= 0 || fromsa == NULL)
 			len = 0;
 		else {
 			/* save sa_len before it is destroyed by MSG_COMPAT */
 			len = MIN(len, fromsa->sa_len);
 #ifdef COMPAT_OLDSOCK
 			if (mp->msg_flags & MSG_COMPAT)
 				((struct osockaddr *)fromsa)->sa_family =
 				    fromsa->sa_family;
 #endif
 			if (fromseg == UIO_USERSPACE) {
 				error = copyout(fromsa, mp->msg_name,
 				    (unsigned)len);
 				if (error != 0)
 					goto out;
 			} else
 				bcopy(fromsa, mp->msg_name, len);
 		}
 		mp->msg_namelen = len;
 	}
 	if (mp->msg_control && controlp == NULL) {
 #ifdef COMPAT_OLDSOCK
 		/*
 		 * We assume that old recvmsg calls won't receive access
 		 * rights and other control info, esp. as control info
 		 * is always optional and those options didn't exist in 4.3.
 		 * If we receive rights, trim the cmsghdr; anything else
 		 * is tossed.
 		 */
 		if (control && mp->msg_flags & MSG_COMPAT) {
 			if (mtod(control, struct cmsghdr *)->cmsg_level !=
 			    SOL_SOCKET ||
 			    mtod(control, struct cmsghdr *)->cmsg_type !=
 			    SCM_RIGHTS) {
 				mp->msg_controllen = 0;
 				goto out;
 			}
 			control->m_len -= sizeof (struct cmsghdr);
 			control->m_data += sizeof (struct cmsghdr);
 		}
 #endif
 		len = mp->msg_controllen;
 		m = control;
 		mp->msg_controllen = 0;
 		ctlbuf = mp->msg_control;
 
 		while (m && len > 0) {
 			unsigned int tocopy;
 
 			if (len >= m->m_len)
 				tocopy = m->m_len;
 			else {
 				mp->msg_flags |= MSG_CTRUNC;
 				tocopy = len;
 			}
 
 			if ((error = copyout(mtod(m, caddr_t),
 					ctlbuf, tocopy)) != 0)
 				goto out;
 
 			ctlbuf += tocopy;
 			len -= tocopy;
 			m = m->m_next;
 		}
 		mp->msg_controllen = ctlbuf - (caddr_t)mp->msg_control;
 	}
 out:
 	fdrop(fp, td);
 #ifdef KTRACE
 	if (fromsa && KTRPOINT(td, KTR_STRUCT))
 		ktrsockaddr(fromsa);
 #endif
 	free(fromsa, M_SONAME);
 
 	if (error == 0 && controlp != NULL)
 		*controlp = control;
 	else  if (control)
 		m_freem(control);
 
 	return (error);
 }
 
 static int
 recvit(struct thread *td, int s, struct msghdr *mp, void *namelenp)
 {
 	int error;
 
 	error = kern_recvit(td, s, mp, UIO_USERSPACE, NULL);
 	if (error != 0)
 		return (error);
 	if (namelenp != NULL) {
 		error = copyout(&mp->msg_namelen, namelenp, sizeof (socklen_t));
 #ifdef COMPAT_OLDSOCK
 		if (mp->msg_flags & MSG_COMPAT)
 			error = 0;	/* old recvfrom didn't check */
 #endif
 	}
 	return (error);
 }
 
 int
 sys_recvfrom(struct thread *td, struct recvfrom_args *uap)
 {
 	struct msghdr msg;
 	struct iovec aiov;
 	int error;
 
 	if (uap->fromlenaddr) {
 		error = copyin(uap->fromlenaddr,
 		    &msg.msg_namelen, sizeof (msg.msg_namelen));
 		if (error != 0)
 			goto done2;
 	} else {
 		msg.msg_namelen = 0;
 	}
 	msg.msg_name = uap->from;
 	msg.msg_iov = &aiov;
 	msg.msg_iovlen = 1;
 	aiov.iov_base = uap->buf;
 	aiov.iov_len = uap->len;
 	msg.msg_control = 0;
 	msg.msg_flags = uap->flags;
 	error = recvit(td, uap->s, &msg, uap->fromlenaddr);
 done2:
 	return (error);
 }
 
 #ifdef COMPAT_OLDSOCK
 int
 orecvfrom(struct thread *td, struct recvfrom_args *uap)
 {
 
 	uap->flags |= MSG_COMPAT;
 	return (sys_recvfrom(td, uap));
 }
 #endif
 
 #ifdef COMPAT_OLDSOCK
 int
 orecv(struct thread *td, struct orecv_args *uap)
 {
 	struct msghdr msg;
 	struct iovec aiov;
 
 	msg.msg_name = 0;
 	msg.msg_namelen = 0;
 	msg.msg_iov = &aiov;
 	msg.msg_iovlen = 1;
 	aiov.iov_base = uap->buf;
 	aiov.iov_len = uap->len;
 	msg.msg_control = 0;
 	msg.msg_flags = uap->flags;
 	return (recvit(td, uap->s, &msg, NULL));
 }
 
 /*
  * Old recvmsg.  This code takes advantage of the fact that the old msghdr
  * overlays the new one, missing only the flags, and with the (old) access
  * rights where the control fields are now.
  */
 int
 orecvmsg(struct thread *td, struct orecvmsg_args *uap)
 {
 	struct msghdr msg;
 	struct iovec *iov;
 	int error;
 
 	error = copyin(uap->msg, &msg, sizeof (struct omsghdr));
 	if (error != 0)
 		return (error);
 	error = copyiniov(msg.msg_iov, msg.msg_iovlen, &iov, EMSGSIZE);
 	if (error != 0)
 		return (error);
 	msg.msg_flags = uap->flags | MSG_COMPAT;
 	msg.msg_iov = iov;
 	error = recvit(td, uap->s, &msg, &uap->msg->msg_namelen);
 	if (msg.msg_controllen && error == 0)
 		error = copyout(&msg.msg_controllen,
 		    &uap->msg->msg_accrightslen, sizeof (int));
 	free(iov, M_IOV);
 	return (error);
 }
 #endif
 
 int
 sys_recvmsg(struct thread *td, struct recvmsg_args *uap)
 {
 	struct msghdr msg;
 	struct iovec *uiov, *iov;
 	int error;
 
 	error = copyin(uap->msg, &msg, sizeof (msg));
 	if (error != 0)
 		return (error);
 	error = copyiniov(msg.msg_iov, msg.msg_iovlen, &iov, EMSGSIZE);
 	if (error != 0)
 		return (error);
 	msg.msg_flags = uap->flags;
 #ifdef COMPAT_OLDSOCK
 	msg.msg_flags &= ~MSG_COMPAT;
 #endif
 	uiov = msg.msg_iov;
 	msg.msg_iov = iov;
 	error = recvit(td, uap->s, &msg, NULL);
 	if (error == 0) {
 		msg.msg_iov = uiov;
 		error = copyout(&msg, uap->msg, sizeof(msg));
 	}
 	free(iov, M_IOV);
 	return (error);
 }
 
 int
 sys_shutdown(struct thread *td, struct shutdown_args *uap)
 {
 
 	return (kern_shutdown(td, uap->s, uap->how));
 }
 
 int
 kern_shutdown(struct thread *td, int s, int how)
 {
 	struct socket *so;
 	struct file *fp;
 	cap_rights_t rights;
 	int error;
 
 	AUDIT_ARG_FD(s);
 	error = getsock_cap(td, s, cap_rights_init(&rights, CAP_SHUTDOWN),
 	    &fp, NULL, NULL);
 	if (error == 0) {
 		so = fp->f_data;
 		error = soshutdown(so, how);
 		/*
 		 * Previous versions did not return ENOTCONN, but 0 in
 		 * case the socket was not connected. Some important
 		 * programs like syslogd up to r279016, 2015-02-19,
 		 * still depend on this behavior.
 		 */
 		if (error == ENOTCONN &&
 		    td->td_proc->p_osrel < P_OSREL_SHUTDOWN_ENOTCONN)
 			error = 0;
 		fdrop(fp, td);
 	}
 	return (error);
 }
 
 int
 sys_setsockopt(struct thread *td, struct setsockopt_args *uap)
 {
 
 	return (kern_setsockopt(td, uap->s, uap->level, uap->name,
 	    uap->val, UIO_USERSPACE, uap->valsize));
 }
 
 int
 kern_setsockopt(struct thread *td, int s, int level, int name, void *val,
     enum uio_seg valseg, socklen_t valsize)
 {
 	struct socket *so;
 	struct file *fp;
 	struct sockopt sopt;
 	cap_rights_t rights;
 	int error;
 
 	if (val == NULL && valsize != 0)
 		return (EFAULT);
 	if ((int)valsize < 0)
 		return (EINVAL);
 
 	sopt.sopt_dir = SOPT_SET;
 	sopt.sopt_level = level;
 	sopt.sopt_name = name;
 	sopt.sopt_val = val;
 	sopt.sopt_valsize = valsize;
 	switch (valseg) {
 	case UIO_USERSPACE:
 		sopt.sopt_td = td;
 		break;
 	case UIO_SYSSPACE:
 		sopt.sopt_td = NULL;
 		break;
 	default:
 		panic("kern_setsockopt called with bad valseg");
 	}
 
 	AUDIT_ARG_FD(s);
 	error = getsock_cap(td, s, cap_rights_init(&rights, CAP_SETSOCKOPT),
 	    &fp, NULL, NULL);
 	if (error == 0) {
 		so = fp->f_data;
 		error = sosetopt(so, &sopt);
 		fdrop(fp, td);
 	}
 	return(error);
 }
 
 int
 sys_getsockopt(struct thread *td, struct getsockopt_args *uap)
 {
 	socklen_t valsize;
 	int error;
 
 	if (uap->val) {
 		error = copyin(uap->avalsize, &valsize, sizeof (valsize));
 		if (error != 0)
 			return (error);
 	}
 
 	error = kern_getsockopt(td, uap->s, uap->level, uap->name,
 	    uap->val, UIO_USERSPACE, &valsize);
 
 	if (error == 0)
 		error = copyout(&valsize, uap->avalsize, sizeof (valsize));
 	return (error);
 }
 
 /*
  * Kernel version of getsockopt.
  * optval can be a userland or userspace. optlen is always a kernel pointer.
  */
 int
 kern_getsockopt(struct thread *td, int s, int level, int name, void *val,
     enum uio_seg valseg, socklen_t *valsize)
 {
 	struct socket *so;
 	struct file *fp;
 	struct sockopt sopt;
 	cap_rights_t rights;
 	int error;
 
 	if (val == NULL)
 		*valsize = 0;
 	if ((int)*valsize < 0)
 		return (EINVAL);
 
 	sopt.sopt_dir = SOPT_GET;
 	sopt.sopt_level = level;
 	sopt.sopt_name = name;
 	sopt.sopt_val = val;
 	sopt.sopt_valsize = (size_t)*valsize; /* checked non-negative above */
 	switch (valseg) {
 	case UIO_USERSPACE:
 		sopt.sopt_td = td;
 		break;
 	case UIO_SYSSPACE:
 		sopt.sopt_td = NULL;
 		break;
 	default:
 		panic("kern_getsockopt called with bad valseg");
 	}
 
 	AUDIT_ARG_FD(s);
 	error = getsock_cap(td, s, cap_rights_init(&rights, CAP_GETSOCKOPT),
 	    &fp, NULL, NULL);
 	if (error == 0) {
 		so = fp->f_data;
 		error = sogetopt(so, &sopt);
 		*valsize = sopt.sopt_valsize;
 		fdrop(fp, td);
 	}
 	return (error);
 }
 
 /*
  * getsockname1() - Get socket name.
  */
 static int
 getsockname1(struct thread *td, struct getsockname_args *uap, int compat)
 {
 	struct sockaddr *sa;
 	socklen_t len;
 	int error;
 
 	error = copyin(uap->alen, &len, sizeof(len));
 	if (error != 0)
 		return (error);
 
 	error = kern_getsockname(td, uap->fdes, &sa, &len);
 	if (error != 0)
 		return (error);
 
 	if (len != 0) {
 #ifdef COMPAT_OLDSOCK
 		if (compat)
 			((struct osockaddr *)sa)->sa_family = sa->sa_family;
 #endif
 		error = copyout(sa, uap->asa, (u_int)len);
 	}
 	free(sa, M_SONAME);
 	if (error == 0)
 		error = copyout(&len, uap->alen, sizeof(len));
 	return (error);
 }
 
 int
 kern_getsockname(struct thread *td, int fd, struct sockaddr **sa,
     socklen_t *alen)
 {
 	struct socket *so;
 	struct file *fp;
 	cap_rights_t rights;
 	socklen_t len;
 	int error;
 
 	AUDIT_ARG_FD(fd);
 	error = getsock_cap(td, fd, cap_rights_init(&rights, CAP_GETSOCKNAME),
 	    &fp, NULL, NULL);
 	if (error != 0)
 		return (error);
 	so = fp->f_data;
 	*sa = NULL;
 	CURVNET_SET(so->so_vnet);
 	error = (*so->so_proto->pr_usrreqs->pru_sockaddr)(so, sa);
 	CURVNET_RESTORE();
 	if (error != 0)
 		goto bad;
 	if (*sa == NULL)
 		len = 0;
 	else
 		len = MIN(*alen, (*sa)->sa_len);
 	*alen = len;
 #ifdef KTRACE
 	if (KTRPOINT(td, KTR_STRUCT))
 		ktrsockaddr(*sa);
 #endif
 bad:
 	fdrop(fp, td);
 	if (error != 0 && *sa != NULL) {
 		free(*sa, M_SONAME);
 		*sa = NULL;
 	}
 	return (error);
 }
 
 int
 sys_getsockname(struct thread *td, struct getsockname_args *uap)
 {
 
 	return (getsockname1(td, uap, 0));
 }
 
 #ifdef COMPAT_OLDSOCK
 int
 ogetsockname(struct thread *td, struct getsockname_args *uap)
 {
 
 	return (getsockname1(td, uap, 1));
 }
 #endif /* COMPAT_OLDSOCK */
 
 /*
  * getpeername1() - Get name of peer for connected socket.
  */
 static int
 getpeername1(struct thread *td, struct getpeername_args *uap, int compat)
 {
 	struct sockaddr *sa;
 	socklen_t len;
 	int error;
 
 	error = copyin(uap->alen, &len, sizeof (len));
 	if (error != 0)
 		return (error);
 
 	error = kern_getpeername(td, uap->fdes, &sa, &len);
 	if (error != 0)
 		return (error);
 
 	if (len != 0) {
 #ifdef COMPAT_OLDSOCK
 		if (compat)
 			((struct osockaddr *)sa)->sa_family = sa->sa_family;
 #endif
 		error = copyout(sa, uap->asa, (u_int)len);
 	}
 	free(sa, M_SONAME);
 	if (error == 0)
 		error = copyout(&len, uap->alen, sizeof(len));
 	return (error);
 }
 
 int
 kern_getpeername(struct thread *td, int fd, struct sockaddr **sa,
     socklen_t *alen)
 {
 	struct socket *so;
 	struct file *fp;
 	cap_rights_t rights;
 	socklen_t len;
 	int error;
 
 	AUDIT_ARG_FD(fd);
 	error = getsock_cap(td, fd, cap_rights_init(&rights, CAP_GETPEERNAME),
 	    &fp, NULL, NULL);
 	if (error != 0)
 		return (error);
 	so = fp->f_data;
 	if ((so->so_state & (SS_ISCONNECTED|SS_ISCONFIRMING)) == 0) {
 		error = ENOTCONN;
 		goto done;
 	}
 	*sa = NULL;
 	CURVNET_SET(so->so_vnet);
 	error = (*so->so_proto->pr_usrreqs->pru_peeraddr)(so, sa);
 	CURVNET_RESTORE();
 	if (error != 0)
 		goto bad;
 	if (*sa == NULL)
 		len = 0;
 	else
 		len = MIN(*alen, (*sa)->sa_len);
 	*alen = len;
 #ifdef KTRACE
 	if (KTRPOINT(td, KTR_STRUCT))
 		ktrsockaddr(*sa);
 #endif
 bad:
 	if (error != 0 && *sa != NULL) {
 		free(*sa, M_SONAME);
 		*sa = NULL;
 	}
 done:
 	fdrop(fp, td);
 	return (error);
 }
 
 int
 sys_getpeername(struct thread *td, struct getpeername_args *uap)
 {
 
 	return (getpeername1(td, uap, 0));
 }
 
 #ifdef COMPAT_OLDSOCK
 int
 ogetpeername(struct thread *td, struct ogetpeername_args *uap)
 {
 
 	/* XXX uap should have type `getpeername_args *' to begin with. */
 	return (getpeername1(td, (struct getpeername_args *)uap, 1));
 }
 #endif /* COMPAT_OLDSOCK */
 
 static int
 sockargs(struct mbuf **mp, char *buf, socklen_t buflen, int type)
 {
 	struct sockaddr *sa;
 	struct mbuf *m;
 	int error;
 
 	if (buflen > MLEN) {
 #ifdef COMPAT_OLDSOCK
 		if (type == MT_SONAME && buflen <= 112)
 			buflen = MLEN;		/* unix domain compat. hack */
 		else
 #endif
 			if (buflen > MCLBYTES)
 				return (EINVAL);
 	}
 	m = m_get2(buflen, M_WAITOK, type, 0);
 	m->m_len = buflen;
 	error = copyin(buf, mtod(m, void *), buflen);
 	if (error != 0)
 		(void) m_free(m);
 	else {
 		*mp = m;
 		if (type == MT_SONAME) {
 			sa = mtod(m, struct sockaddr *);
 
 #if defined(COMPAT_OLDSOCK) && BYTE_ORDER != BIG_ENDIAN
 			if (sa->sa_family == 0 && sa->sa_len < AF_MAX)
 				sa->sa_family = sa->sa_len;
 #endif
 			sa->sa_len = buflen;
 		}
 	}
 	return (error);
 }
 
 int
 getsockaddr(struct sockaddr **namp, caddr_t uaddr, size_t len)
 {
 	struct sockaddr *sa;
 	int error;
 
 	if (len > SOCK_MAXADDRLEN)
 		return (ENAMETOOLONG);
 	if (len < offsetof(struct sockaddr, sa_data[0]))
 		return (EINVAL);
 	sa = malloc(len, M_SONAME, M_WAITOK);
 	error = copyin(uaddr, sa, len);
 	if (error != 0) {
 		free(sa, M_SONAME);
 	} else {
 #if defined(COMPAT_OLDSOCK) && BYTE_ORDER != BIG_ENDIAN
 		if (sa->sa_family == 0 && sa->sa_len < AF_MAX)
 			sa->sa_family = sa->sa_len;
 #endif
 		sa->sa_len = len;
 		*namp = sa;
 	}
 	return (error);
 }
Index: head/sys/kern/uipc_usrreq.c
===================================================================
--- head/sys/kern/uipc_usrreq.c	(revision 319721)
+++ head/sys/kern/uipc_usrreq.c	(revision 319722)
@@ -1,2586 +1,2580 @@
 /*-
  * Copyright (c) 1982, 1986, 1989, 1991, 1993
  *	The Regents of the University of California.
  * Copyright (c) 2004-2009 Robert N. M. Watson
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 3. Neither the name of the University nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  *	From: @(#)uipc_usrreq.c	8.3 (Berkeley) 1/4/94
  */
 
 /*
  * UNIX Domain (Local) Sockets
  *
  * This is an implementation of UNIX (local) domain sockets.  Each socket has
  * an associated struct unpcb (UNIX protocol control block).  Stream sockets
  * may be connected to 0 or 1 other socket.  Datagram sockets may be
  * connected to 0, 1, or many other sockets.  Sockets may be created and
  * connected in pairs (socketpair(2)), or bound/connected to using the file
  * system name space.  For most purposes, only the receive socket buffer is
  * used, as sending on one socket delivers directly to the receive socket
  * buffer of a second socket.
  *
  * The implementation is substantially complicated by the fact that
  * "ancillary data", such as file descriptors or credentials, may be passed
  * across UNIX domain sockets.  The potential for passing UNIX domain sockets
  * over other UNIX domain sockets requires the implementation of a simple
  * garbage collector to find and tear down cycles of disconnected sockets.
  *
  * TODO:
  *	RDM
  *	rethink name space problems
  *	need a proper out-of-band
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include "opt_ddb.h"
 
 #include <sys/param.h>
 #include <sys/capsicum.h>
 #include <sys/domain.h>
 #include <sys/fcntl.h>
 #include <sys/malloc.h>		/* XXX must be before <sys/file.h> */
 #include <sys/eventhandler.h>
 #include <sys/file.h>
 #include <sys/filedesc.h>
 #include <sys/kernel.h>
 #include <sys/lock.h>
 #include <sys/mbuf.h>
 #include <sys/mount.h>
 #include <sys/mutex.h>
 #include <sys/namei.h>
 #include <sys/proc.h>
 #include <sys/protosw.h>
 #include <sys/queue.h>
 #include <sys/resourcevar.h>
 #include <sys/rwlock.h>
 #include <sys/socket.h>
 #include <sys/socketvar.h>
 #include <sys/signalvar.h>
 #include <sys/stat.h>
 #include <sys/sx.h>
 #include <sys/sysctl.h>
 #include <sys/systm.h>
 #include <sys/taskqueue.h>
 #include <sys/un.h>
 #include <sys/unpcb.h>
 #include <sys/vnode.h>
 
 #include <net/vnet.h>
 
 #ifdef DDB
 #include <ddb/ddb.h>
 #endif
 
 #include <security/mac/mac_framework.h>
 
 #include <vm/uma.h>
 
 MALLOC_DECLARE(M_FILECAPS);
 
 /*
  * Locking key:
  * (l)	Locked using list lock
  * (g)	Locked using linkage lock
  */
 
 static uma_zone_t	unp_zone;
 static unp_gen_t	unp_gencnt;	/* (l) */
 static u_int		unp_count;	/* (l) Count of local sockets. */
 static ino_t		unp_ino;	/* Prototype for fake inode numbers. */
 static int		unp_rights;	/* (g) File descriptors in flight. */
 static struct unp_head	unp_shead;	/* (l) List of stream sockets. */
 static struct unp_head	unp_dhead;	/* (l) List of datagram sockets. */
 static struct unp_head	unp_sphead;	/* (l) List of seqpacket sockets. */
 
 struct unp_defer {
 	SLIST_ENTRY(unp_defer) ud_link;
 	struct file *ud_fp;
 };
 static SLIST_HEAD(, unp_defer) unp_defers;
 static int unp_defers_count;
 
 static const struct sockaddr	sun_noname = { sizeof(sun_noname), AF_LOCAL };
 
 /*
  * Garbage collection of cyclic file descriptor/socket references occurs
  * asynchronously in a taskqueue context in order to avoid recursion and
  * reentrance in the UNIX domain socket, file descriptor, and socket layer
  * code.  See unp_gc() for a full description.
  */
 static struct timeout_task unp_gc_task;
 
 /*
  * The close of unix domain sockets attached as SCM_RIGHTS is
  * postponed to the taskqueue, to avoid arbitrary recursion depth.
  * The attached sockets might have another sockets attached.
  */
 static struct task	unp_defer_task;
 
 /*
  * Both send and receive buffers are allocated PIPSIZ bytes of buffering for
  * stream sockets, although the total for sender and receiver is actually
  * only PIPSIZ.
  *
  * Datagram sockets really use the sendspace as the maximum datagram size,
  * and don't really want to reserve the sendspace.  Their recvspace should be
  * large enough for at least one max-size datagram plus address.
  */
 #ifndef PIPSIZ
 #define	PIPSIZ	8192
 #endif
 static u_long	unpst_sendspace = PIPSIZ;
 static u_long	unpst_recvspace = PIPSIZ;
 static u_long	unpdg_sendspace = 2*1024;	/* really max datagram size */
 static u_long	unpdg_recvspace = 4*1024;
 static u_long	unpsp_sendspace = PIPSIZ;	/* really max datagram size */
 static u_long	unpsp_recvspace = PIPSIZ;
 
 static SYSCTL_NODE(_net, PF_LOCAL, local, CTLFLAG_RW, 0, "Local domain");
 static SYSCTL_NODE(_net_local, SOCK_STREAM, stream, CTLFLAG_RW, 0,
     "SOCK_STREAM");
 static SYSCTL_NODE(_net_local, SOCK_DGRAM, dgram, CTLFLAG_RW, 0, "SOCK_DGRAM");
 static SYSCTL_NODE(_net_local, SOCK_SEQPACKET, seqpacket, CTLFLAG_RW, 0,
     "SOCK_SEQPACKET");
 
 SYSCTL_ULONG(_net_local_stream, OID_AUTO, sendspace, CTLFLAG_RW,
 	   &unpst_sendspace, 0, "Default stream send space.");
 SYSCTL_ULONG(_net_local_stream, OID_AUTO, recvspace, CTLFLAG_RW,
 	   &unpst_recvspace, 0, "Default stream receive space.");
 SYSCTL_ULONG(_net_local_dgram, OID_AUTO, maxdgram, CTLFLAG_RW,
 	   &unpdg_sendspace, 0, "Default datagram send space.");
 SYSCTL_ULONG(_net_local_dgram, OID_AUTO, recvspace, CTLFLAG_RW,
 	   &unpdg_recvspace, 0, "Default datagram receive space.");
 SYSCTL_ULONG(_net_local_seqpacket, OID_AUTO, maxseqpacket, CTLFLAG_RW,
 	   &unpsp_sendspace, 0, "Default seqpacket send space.");
 SYSCTL_ULONG(_net_local_seqpacket, OID_AUTO, recvspace, CTLFLAG_RW,
 	   &unpsp_recvspace, 0, "Default seqpacket receive space.");
 SYSCTL_INT(_net_local, OID_AUTO, inflight, CTLFLAG_RD, &unp_rights, 0,
     "File descriptors in flight.");
 SYSCTL_INT(_net_local, OID_AUTO, deferred, CTLFLAG_RD,
     &unp_defers_count, 0,
     "File descriptors deferred to taskqueue for close.");
 
 /*
  * Locking and synchronization:
  *
- * Three types of locks exit in the local domain socket implementation: a
- * global list mutex, a global linkage rwlock, and per-unpcb mutexes.  Of the
- * global locks, the list lock protects the socket count, global generation
- * number, and stream/datagram global lists.  The linkage lock protects the
+ * Two types of locks exist in the local domain socket implementation: a
+ * a global linkage rwlock and per-unpcb mutexes.  The linkage lock protects
+ * the socket count, global generation number, stream/datagram global lists and
  * interconnection of unpcbs, the v_socket and unp_vnode pointers, and can be
  * held exclusively over the acquisition of multiple unpcb locks to prevent
  * deadlock.
  *
  * UNIX domain sockets each have an unpcb hung off of their so_pcb pointer,
  * allocated in pru_attach() and freed in pru_detach().  The validity of that
  * pointer is an invariant, so no lock is required to dereference the so_pcb
  * pointer if a valid socket reference is held by the caller.  In practice,
  * this is always true during operations performed on a socket.  Each unpcb
  * has a back-pointer to its socket, unp_socket, which will be stable under
  * the same circumstances.
  *
  * This pointer may only be safely dereferenced as long as a valid reference
  * to the unpcb is held.  Typically, this reference will be from the socket,
  * or from another unpcb when the referring unpcb's lock is held (in order
  * that the reference not be invalidated during use).  For example, to follow
  * unp->unp_conn->unp_socket, you need unlock the lock on unp, not unp_conn,
  * as unp_socket remains valid as long as the reference to unp_conn is valid.
  *
  * Fields of unpcbss are locked using a per-unpcb lock, unp_mtx.  Individual
  * atomic reads without the lock may be performed "lockless", but more
  * complex reads and read-modify-writes require the mutex to be held.  No
  * lock order is defined between unpcb locks -- multiple unpcb locks may be
  * acquired at the same time only when holding the linkage rwlock
  * exclusively, which prevents deadlocks.
  *
  * Blocking with UNIX domain sockets is a tricky issue: unlike most network
  * protocols, bind() is a non-atomic operation, and connect() requires
  * potential sleeping in the protocol, due to potentially waiting on local or
  * distributed file systems.  We try to separate "lookup" operations, which
  * may sleep, and the IPC operations themselves, which typically can occur
  * with relative atomicity as locks can be held over the entire operation.
  *
  * Another tricky issue is simultaneous multi-threaded or multi-process
  * access to a single UNIX domain socket.  These are handled by the flags
  * UNP_CONNECTING and UNP_BINDING, which prevent concurrent connecting or
  * binding, both of which involve dropping UNIX domain socket locks in order
  * to perform namei() and other file system operations.
  */
 static struct rwlock	unp_link_rwlock;
-static struct mtx	unp_list_lock;
 static struct mtx	unp_defers_lock;
 
 #define	UNP_LINK_LOCK_INIT()		rw_init(&unp_link_rwlock,	\
 					    "unp_link_rwlock")
 
 #define	UNP_LINK_LOCK_ASSERT()	rw_assert(&unp_link_rwlock,	\
 					    RA_LOCKED)
 #define	UNP_LINK_UNLOCK_ASSERT()	rw_assert(&unp_link_rwlock,	\
 					    RA_UNLOCKED)
 
 #define	UNP_LINK_RLOCK()		rw_rlock(&unp_link_rwlock)
 #define	UNP_LINK_RUNLOCK()		rw_runlock(&unp_link_rwlock)
 #define	UNP_LINK_WLOCK()		rw_wlock(&unp_link_rwlock)
 #define	UNP_LINK_WUNLOCK()		rw_wunlock(&unp_link_rwlock)
 #define	UNP_LINK_WLOCK_ASSERT()		rw_assert(&unp_link_rwlock,	\
 					    RA_WLOCKED)
+#define	UNP_LINK_WOWNED()		rw_wowned(&unp_link_rwlock)
 
-#define	UNP_LIST_LOCK_INIT()		mtx_init(&unp_list_lock,	\
-					    "unp_list_lock", NULL, MTX_DEF)
-#define	UNP_LIST_LOCK()			mtx_lock(&unp_list_lock)
-#define	UNP_LIST_UNLOCK()		mtx_unlock(&unp_list_lock)
-
 #define	UNP_DEFERRED_LOCK_INIT()	mtx_init(&unp_defers_lock, \
 					    "unp_defer", NULL, MTX_DEF)
 #define	UNP_DEFERRED_LOCK()		mtx_lock(&unp_defers_lock)
 #define	UNP_DEFERRED_UNLOCK()		mtx_unlock(&unp_defers_lock)
 
 #define UNP_PCB_LOCK_INIT(unp)		mtx_init(&(unp)->unp_mtx,	\
 					    "unp_mtx", "unp_mtx",	\
 					    MTX_DUPOK|MTX_DEF|MTX_RECURSE)
 #define	UNP_PCB_LOCK_DESTROY(unp)	mtx_destroy(&(unp)->unp_mtx)
 #define	UNP_PCB_LOCK(unp)		mtx_lock(&(unp)->unp_mtx)
 #define	UNP_PCB_UNLOCK(unp)		mtx_unlock(&(unp)->unp_mtx)
 #define	UNP_PCB_LOCK_ASSERT(unp)	mtx_assert(&(unp)->unp_mtx, MA_OWNED)
 
 static int	uipc_connect2(struct socket *, struct socket *);
 static int	uipc_ctloutput(struct socket *, struct sockopt *);
 static int	unp_connect(struct socket *, struct sockaddr *,
 		    struct thread *);
 static int	unp_connectat(int, struct socket *, struct sockaddr *,
 		    struct thread *);
 static int	unp_connect2(struct socket *so, struct socket *so2, int);
 static void	unp_disconnect(struct unpcb *unp, struct unpcb *unp2);
 static void	unp_dispose(struct socket *so);
 static void	unp_dispose_mbuf(struct mbuf *);
 static void	unp_shutdown(struct unpcb *);
 static void	unp_drop(struct unpcb *);
 static void	unp_gc(__unused void *, int);
 static void	unp_scan(struct mbuf *, void (*)(struct filedescent **, int));
 static void	unp_discard(struct file *);
 static void	unp_freerights(struct filedescent **, int);
 static void	unp_init(void);
 static int	unp_internalize(struct mbuf **, struct thread *);
 static void	unp_internalize_fp(struct file *);
 static int	unp_externalize(struct mbuf *, struct mbuf **, int);
 static int	unp_externalize_fp(struct file *);
 static struct mbuf	*unp_addsockcred(struct thread *, struct mbuf *);
 static void	unp_process_defers(void * __unused, int);
 
 /*
  * Definitions of protocols supported in the LOCAL domain.
  */
 static struct domain localdomain;
 static struct pr_usrreqs uipc_usrreqs_dgram, uipc_usrreqs_stream;
 static struct pr_usrreqs uipc_usrreqs_seqpacket;
 static struct protosw localsw[] = {
 {
 	.pr_type =		SOCK_STREAM,
 	.pr_domain =		&localdomain,
 	.pr_flags =		PR_CONNREQUIRED|PR_WANTRCVD|PR_RIGHTS,
 	.pr_ctloutput =		&uipc_ctloutput,
 	.pr_usrreqs =		&uipc_usrreqs_stream
 },
 {
 	.pr_type =		SOCK_DGRAM,
 	.pr_domain =		&localdomain,
 	.pr_flags =		PR_ATOMIC|PR_ADDR|PR_RIGHTS,
 	.pr_ctloutput =		&uipc_ctloutput,
 	.pr_usrreqs =		&uipc_usrreqs_dgram
 },
 {
 	.pr_type =		SOCK_SEQPACKET,
 	.pr_domain =		&localdomain,
 
 	/*
 	 * XXXRW: For now, PR_ADDR because soreceive will bump into them
 	 * due to our use of sbappendaddr.  A new sbappend variants is needed
 	 * that supports both atomic record writes and control data.
 	 */
 	.pr_flags =		PR_ADDR|PR_ATOMIC|PR_CONNREQUIRED|PR_WANTRCVD|
 				    PR_RIGHTS,
 	.pr_ctloutput =		&uipc_ctloutput,
 	.pr_usrreqs =		&uipc_usrreqs_seqpacket,
 },
 };
 
 static struct domain localdomain = {
 	.dom_family =		AF_LOCAL,
 	.dom_name =		"local",
 	.dom_init =		unp_init,
 	.dom_externalize =	unp_externalize,
 	.dom_dispose =		unp_dispose,
 	.dom_protosw =		localsw,
 	.dom_protoswNPROTOSW =	&localsw[nitems(localsw)]
 };
 DOMAIN_SET(local);
 
 static void
 uipc_abort(struct socket *so)
 {
 	struct unpcb *unp, *unp2;
 
 	unp = sotounpcb(so);
 	KASSERT(unp != NULL, ("uipc_abort: unp == NULL"));
 
 	UNP_LINK_WLOCK();
 	UNP_PCB_LOCK(unp);
 	unp2 = unp->unp_conn;
 	if (unp2 != NULL) {
 		UNP_PCB_LOCK(unp2);
 		unp_drop(unp2);
 		UNP_PCB_UNLOCK(unp2);
 	}
 	UNP_PCB_UNLOCK(unp);
 	UNP_LINK_WUNLOCK();
 }
 
 static int
 uipc_accept(struct socket *so, struct sockaddr **nam)
 {
 	struct unpcb *unp, *unp2;
 	const struct sockaddr *sa;
 
 	/*
 	 * Pass back name of connected socket, if it was bound and we are
 	 * still connected (our peer may have closed already!).
 	 */
 	unp = sotounpcb(so);
 	KASSERT(unp != NULL, ("uipc_accept: unp == NULL"));
 
 	*nam = malloc(sizeof(struct sockaddr_un), M_SONAME, M_WAITOK);
 	UNP_LINK_RLOCK();
 	unp2 = unp->unp_conn;
 	if (unp2 != NULL && unp2->unp_addr != NULL) {
 		UNP_PCB_LOCK(unp2);
 		sa = (struct sockaddr *) unp2->unp_addr;
 		bcopy(sa, *nam, sa->sa_len);
 		UNP_PCB_UNLOCK(unp2);
 	} else {
 		sa = &sun_noname;
 		bcopy(sa, *nam, sa->sa_len);
 	}
 	UNP_LINK_RUNLOCK();
 	return (0);
 }
 
 static int
 uipc_attach(struct socket *so, int proto, struct thread *td)
 {
 	u_long sendspace, recvspace;
 	struct unpcb *unp;
 	int error;
+	bool locked;
 
 	KASSERT(so->so_pcb == NULL, ("uipc_attach: so_pcb != NULL"));
 	if (so->so_snd.sb_hiwat == 0 || so->so_rcv.sb_hiwat == 0) {
 		switch (so->so_type) {
 		case SOCK_STREAM:
 			sendspace = unpst_sendspace;
 			recvspace = unpst_recvspace;
 			break;
 
 		case SOCK_DGRAM:
 			sendspace = unpdg_sendspace;
 			recvspace = unpdg_recvspace;
 			break;
 
 		case SOCK_SEQPACKET:
 			sendspace = unpsp_sendspace;
 			recvspace = unpsp_recvspace;
 			break;
 
 		default:
 			panic("uipc_attach");
 		}
 		error = soreserve(so, sendspace, recvspace);
 		if (error)
 			return (error);
 	}
 	unp = uma_zalloc(unp_zone, M_NOWAIT | M_ZERO);
 	if (unp == NULL)
 		return (ENOBUFS);
 	LIST_INIT(&unp->unp_refs);
 	UNP_PCB_LOCK_INIT(unp);
 	unp->unp_socket = so;
 	so->so_pcb = unp;
 	unp->unp_refcount = 1;
-	if (so->so_head != NULL)
+	if (so->so_listen != NULL)
 		unp->unp_flags |= UNP_NASCENT;
 
-	UNP_LIST_LOCK();
+	if ((locked = UNP_LINK_WOWNED()) == false)
+		UNP_LINK_WLOCK();
+
 	unp->unp_gencnt = ++unp_gencnt;
 	unp_count++;
 	switch (so->so_type) {
 	case SOCK_STREAM:
 		LIST_INSERT_HEAD(&unp_shead, unp, unp_link);
 		break;
 
 	case SOCK_DGRAM:
 		LIST_INSERT_HEAD(&unp_dhead, unp, unp_link);
 		break;
 
 	case SOCK_SEQPACKET:
 		LIST_INSERT_HEAD(&unp_sphead, unp, unp_link);
 		break;
 
 	default:
 		panic("uipc_attach");
 	}
-	UNP_LIST_UNLOCK();
 
+	if (locked == false)
+		UNP_LINK_WUNLOCK();
+
 	return (0);
 }
 
 static int
 uipc_bindat(int fd, struct socket *so, struct sockaddr *nam, struct thread *td)
 {
 	struct sockaddr_un *soun = (struct sockaddr_un *)nam;
 	struct vattr vattr;
 	int error, namelen;
 	struct nameidata nd;
 	struct unpcb *unp;
 	struct vnode *vp;
 	struct mount *mp;
 	cap_rights_t rights;
 	char *buf;
 
 	if (nam->sa_family != AF_UNIX)
 		return (EAFNOSUPPORT);
 
 	unp = sotounpcb(so);
 	KASSERT(unp != NULL, ("uipc_bind: unp == NULL"));
 
 	if (soun->sun_len > sizeof(struct sockaddr_un))
 		return (EINVAL);
 	namelen = soun->sun_len - offsetof(struct sockaddr_un, sun_path);
 	if (namelen <= 0)
 		return (EINVAL);
 
 	/*
 	 * We don't allow simultaneous bind() calls on a single UNIX domain
 	 * socket, so flag in-progress operations, and return an error if an
 	 * operation is already in progress.
 	 *
 	 * Historically, we have not allowed a socket to be rebound, so this
 	 * also returns an error.  Not allowing re-binding simplifies the
 	 * implementation and avoids a great many possible failure modes.
 	 */
 	UNP_PCB_LOCK(unp);
 	if (unp->unp_vnode != NULL) {
 		UNP_PCB_UNLOCK(unp);
 		return (EINVAL);
 	}
 	if (unp->unp_flags & UNP_BINDING) {
 		UNP_PCB_UNLOCK(unp);
 		return (EALREADY);
 	}
 	unp->unp_flags |= UNP_BINDING;
 	UNP_PCB_UNLOCK(unp);
 
 	buf = malloc(namelen + 1, M_TEMP, M_WAITOK);
 	bcopy(soun->sun_path, buf, namelen);
 	buf[namelen] = 0;
 
 restart:
 	NDINIT_ATRIGHTS(&nd, CREATE, NOFOLLOW | LOCKPARENT | SAVENAME | NOCACHE,
 	    UIO_SYSSPACE, buf, fd, cap_rights_init(&rights, CAP_BINDAT), td);
 /* SHOULD BE ABLE TO ADOPT EXISTING AND wakeup() ALA FIFO's */
 	error = namei(&nd);
 	if (error)
 		goto error;
 	vp = nd.ni_vp;
 	if (vp != NULL || vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) {
 		NDFREE(&nd, NDF_ONLY_PNBUF);
 		if (nd.ni_dvp == vp)
 			vrele(nd.ni_dvp);
 		else
 			vput(nd.ni_dvp);
 		if (vp != NULL) {
 			vrele(vp);
 			error = EADDRINUSE;
 			goto error;
 		}
 		error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH);
 		if (error)
 			goto error;
 		goto restart;
 	}
 	VATTR_NULL(&vattr);
 	vattr.va_type = VSOCK;
 	vattr.va_mode = (ACCESSPERMS & ~td->td_proc->p_fd->fd_cmask);
 #ifdef MAC
 	error = mac_vnode_check_create(td->td_ucred, nd.ni_dvp, &nd.ni_cnd,
 	    &vattr);
 #endif
 	if (error == 0)
 		error = VOP_CREATE(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr);
 	NDFREE(&nd, NDF_ONLY_PNBUF);
 	vput(nd.ni_dvp);
 	if (error) {
 		vn_finished_write(mp);
 		goto error;
 	}
 	vp = nd.ni_vp;
 	ASSERT_VOP_ELOCKED(vp, "uipc_bind");
 	soun = (struct sockaddr_un *)sodupsockaddr(nam, M_WAITOK);
 
 	UNP_LINK_WLOCK();
 	UNP_PCB_LOCK(unp);
 	VOP_UNP_BIND(vp, unp);
 	unp->unp_vnode = vp;
 	unp->unp_addr = soun;
 	unp->unp_flags &= ~UNP_BINDING;
 	UNP_PCB_UNLOCK(unp);
 	UNP_LINK_WUNLOCK();
 	VOP_UNLOCK(vp, 0);
 	vn_finished_write(mp);
 	free(buf, M_TEMP);
 	return (0);
 
 error:
 	UNP_PCB_LOCK(unp);
 	unp->unp_flags &= ~UNP_BINDING;
 	UNP_PCB_UNLOCK(unp);
 	free(buf, M_TEMP);
 	return (error);
 }
 
 static int
 uipc_bind(struct socket *so, struct sockaddr *nam, struct thread *td)
 {
 
 	return (uipc_bindat(AT_FDCWD, so, nam, td));
 }
 
 static int
 uipc_connect(struct socket *so, struct sockaddr *nam, struct thread *td)
 {
 	int error;
 
 	KASSERT(td == curthread, ("uipc_connect: td != curthread"));
 	UNP_LINK_WLOCK();
 	error = unp_connect(so, nam, td);
 	UNP_LINK_WUNLOCK();
 	return (error);
 }
 
 static int
 uipc_connectat(int fd, struct socket *so, struct sockaddr *nam,
     struct thread *td)
 {
 	int error;
 
 	KASSERT(td == curthread, ("uipc_connectat: td != curthread"));
 	UNP_LINK_WLOCK();
 	error = unp_connectat(fd, so, nam, td);
 	UNP_LINK_WUNLOCK();
 	return (error);
 }
 
 static void
 uipc_close(struct socket *so)
 {
 	struct unpcb *unp, *unp2;
+	struct vnode *vp = NULL;
 
 	unp = sotounpcb(so);
 	KASSERT(unp != NULL, ("uipc_close: unp == NULL"));
 
 	UNP_LINK_WLOCK();
 	UNP_PCB_LOCK(unp);
 	unp2 = unp->unp_conn;
 	if (unp2 != NULL) {
 		UNP_PCB_LOCK(unp2);
 		unp_disconnect(unp, unp2);
 		UNP_PCB_UNLOCK(unp2);
 	}
+	if (SOLISTENING(so) && ((vp = unp->unp_vnode) != NULL)) {
+		VOP_UNP_DETACH(vp);
+		unp->unp_vnode = NULL;
+	}
 	UNP_PCB_UNLOCK(unp);
 	UNP_LINK_WUNLOCK();
+	if (vp)
+		vrele(vp);
 }
 
 static int
 uipc_connect2(struct socket *so1, struct socket *so2)
 {
 	struct unpcb *unp, *unp2;
 	int error;
 
 	UNP_LINK_WLOCK();
 	unp = so1->so_pcb;
 	KASSERT(unp != NULL, ("uipc_connect2: unp == NULL"));
 	UNP_PCB_LOCK(unp);
 	unp2 = so2->so_pcb;
 	KASSERT(unp2 != NULL, ("uipc_connect2: unp2 == NULL"));
 	UNP_PCB_LOCK(unp2);
 	error = unp_connect2(so1, so2, PRU_CONNECT2);
 	UNP_PCB_UNLOCK(unp2);
 	UNP_PCB_UNLOCK(unp);
 	UNP_LINK_WUNLOCK();
 	return (error);
 }
 
 static void
 uipc_detach(struct socket *so)
 {
 	struct unpcb *unp, *unp2;
 	struct sockaddr_un *saved_unp_addr;
 	struct vnode *vp;
 	int freeunp, local_unp_rights;
 
 	unp = sotounpcb(so);
 	KASSERT(unp != NULL, ("uipc_detach: unp == NULL"));
 
 	vp = NULL;
 	local_unp_rights = 0;
 
-	UNP_LIST_LOCK();
+	UNP_LINK_WLOCK();
 	LIST_REMOVE(unp, unp_link);
 	unp->unp_gencnt = ++unp_gencnt;
 	--unp_count;
-	UNP_LIST_UNLOCK();
-
-	if ((unp->unp_flags & UNP_NASCENT) != 0) {
-		UNP_PCB_LOCK(unp);
-		goto teardown;
-	}
-	UNP_LINK_WLOCK();
 	UNP_PCB_LOCK(unp);
+	if ((unp->unp_flags & UNP_NASCENT) != 0)
+		goto teardown;
 
 	if ((vp = unp->unp_vnode) != NULL) {
 		VOP_UNP_DETACH(vp);
 		unp->unp_vnode = NULL;
 	}
 	unp2 = unp->unp_conn;
 	if (unp2 != NULL) {
 		UNP_PCB_LOCK(unp2);
 		unp_disconnect(unp, unp2);
 		UNP_PCB_UNLOCK(unp2);
 	}
 
 	/*
 	 * We hold the linkage lock exclusively, so it's OK to acquire
 	 * multiple pcb locks at a time.
 	 */
 	while (!LIST_EMPTY(&unp->unp_refs)) {
 		struct unpcb *ref = LIST_FIRST(&unp->unp_refs);
 
 		UNP_PCB_LOCK(ref);
 		unp_drop(ref);
 		UNP_PCB_UNLOCK(ref);
 	}
 	local_unp_rights = unp_rights;
-	UNP_LINK_WUNLOCK();
 teardown:
+	UNP_LINK_WUNLOCK();
 	unp->unp_socket->so_pcb = NULL;
 	saved_unp_addr = unp->unp_addr;
 	unp->unp_addr = NULL;
 	unp->unp_refcount--;
 	freeunp = (unp->unp_refcount == 0);
 	if (saved_unp_addr != NULL)
 		free(saved_unp_addr, M_SONAME);
 	if (freeunp) {
 		UNP_PCB_LOCK_DESTROY(unp);
 		uma_zfree(unp_zone, unp);
 	} else
 		UNP_PCB_UNLOCK(unp);
 	if (vp)
 		vrele(vp);
 	if (local_unp_rights)
 		taskqueue_enqueue_timeout(taskqueue_thread, &unp_gc_task, -1);
 }
 
 static int
 uipc_disconnect(struct socket *so)
 {
 	struct unpcb *unp, *unp2;
 
 	unp = sotounpcb(so);
 	KASSERT(unp != NULL, ("uipc_disconnect: unp == NULL"));
 
 	UNP_LINK_WLOCK();
 	UNP_PCB_LOCK(unp);
 	unp2 = unp->unp_conn;
 	if (unp2 != NULL) {
 		UNP_PCB_LOCK(unp2);
 		unp_disconnect(unp, unp2);
 		UNP_PCB_UNLOCK(unp2);
 	}
 	UNP_PCB_UNLOCK(unp);
 	UNP_LINK_WUNLOCK();
 	return (0);
 }
 
 static int
 uipc_listen(struct socket *so, int backlog, struct thread *td)
 {
 	struct unpcb *unp;
 	int error;
 
 	if (so->so_type != SOCK_STREAM && so->so_type != SOCK_SEQPACKET)
 		return (EOPNOTSUPP);
 
 	unp = sotounpcb(so);
 	KASSERT(unp != NULL, ("uipc_listen: unp == NULL"));
 
 	UNP_PCB_LOCK(unp);
 	if (unp->unp_vnode == NULL) {
 		/* Already connected or not bound to an address. */
 		error = unp->unp_conn != NULL ? EINVAL : EDESTADDRREQ;
 		UNP_PCB_UNLOCK(unp);
 		return (error);
 	}
 
 	SOCK_LOCK(so);
 	error = solisten_proto_check(so);
 	if (error == 0) {
 		cru2x(td->td_ucred, &unp->unp_peercred);
 		solisten_proto(so, backlog);
 	}
 	SOCK_UNLOCK(so);
 	UNP_PCB_UNLOCK(unp);
 	return (error);
 }
 
 static int
 uipc_peeraddr(struct socket *so, struct sockaddr **nam)
 {
 	struct unpcb *unp, *unp2;
 	const struct sockaddr *sa;
 
 	unp = sotounpcb(so);
 	KASSERT(unp != NULL, ("uipc_peeraddr: unp == NULL"));
 
 	*nam = malloc(sizeof(struct sockaddr_un), M_SONAME, M_WAITOK);
 	UNP_LINK_RLOCK();
 	/*
 	 * XXX: It seems that this test always fails even when connection is
 	 * established.  So, this else clause is added as workaround to
 	 * return PF_LOCAL sockaddr.
 	 */
 	unp2 = unp->unp_conn;
 	if (unp2 != NULL) {
 		UNP_PCB_LOCK(unp2);
 		if (unp2->unp_addr != NULL)
 			sa = (struct sockaddr *) unp2->unp_addr;
 		else
 			sa = &sun_noname;
 		bcopy(sa, *nam, sa->sa_len);
 		UNP_PCB_UNLOCK(unp2);
 	} else {
 		sa = &sun_noname;
 		bcopy(sa, *nam, sa->sa_len);
 	}
 	UNP_LINK_RUNLOCK();
 	return (0);
 }
 
 static int
 uipc_rcvd(struct socket *so, int flags)
 {
 	struct unpcb *unp, *unp2;
 	struct socket *so2;
 	u_int mbcnt, sbcc;
 
 	unp = sotounpcb(so);
 	KASSERT(unp != NULL, ("%s: unp == NULL", __func__));
 	KASSERT(so->so_type == SOCK_STREAM || so->so_type == SOCK_SEQPACKET,
 	    ("%s: socktype %d", __func__, so->so_type));
 
 	/*
 	 * Adjust backpressure on sender and wakeup any waiting to write.
 	 *
 	 * The unp lock is acquired to maintain the validity of the unp_conn
 	 * pointer; no lock on unp2 is required as unp2->unp_socket will be
 	 * static as long as we don't permit unp2 to disconnect from unp,
 	 * which is prevented by the lock on unp.  We cache values from
 	 * so_rcv to avoid holding the so_rcv lock over the entire
 	 * transaction on the remote so_snd.
 	 */
 	SOCKBUF_LOCK(&so->so_rcv);
 	mbcnt = so->so_rcv.sb_mbcnt;
 	sbcc = sbavail(&so->so_rcv);
 	SOCKBUF_UNLOCK(&so->so_rcv);
 	/*
 	 * There is a benign race condition at this point.  If we're planning to
 	 * clear SB_STOP, but uipc_send is called on the connected socket at
 	 * this instant, it might add data to the sockbuf and set SB_STOP.  Then
 	 * we would erroneously clear SB_STOP below, even though the sockbuf is
 	 * full.  The race is benign because the only ill effect is to allow the
 	 * sockbuf to exceed its size limit, and the size limits are not
 	 * strictly guaranteed anyway.
 	 */
 	UNP_PCB_LOCK(unp);
 	unp2 = unp->unp_conn;
 	if (unp2 == NULL) {
 		UNP_PCB_UNLOCK(unp);
 		return (0);
 	}
 	so2 = unp2->unp_socket;
 	SOCKBUF_LOCK(&so2->so_snd);
 	if (sbcc < so2->so_snd.sb_hiwat && mbcnt < so2->so_snd.sb_mbmax)
 		so2->so_snd.sb_flags &= ~SB_STOP;
 	sowwakeup_locked(so2);
 	UNP_PCB_UNLOCK(unp);
 	return (0);
 }
 
 static int
 uipc_send(struct socket *so, int flags, struct mbuf *m, struct sockaddr *nam,
     struct mbuf *control, struct thread *td)
 {
 	struct unpcb *unp, *unp2;
 	struct socket *so2;
 	u_int mbcnt, sbcc;
 	int error = 0;
 
 	unp = sotounpcb(so);
 	KASSERT(unp != NULL, ("%s: unp == NULL", __func__));
 	KASSERT(so->so_type == SOCK_STREAM || so->so_type == SOCK_DGRAM ||
 	    so->so_type == SOCK_SEQPACKET,
 	    ("%s: socktype %d", __func__, so->so_type));
 
 	if (flags & PRUS_OOB) {
 		error = EOPNOTSUPP;
 		goto release;
 	}
 	if (control != NULL && (error = unp_internalize(&control, td)))
 		goto release;
 	if ((nam != NULL) || (flags & PRUS_EOF))
 		UNP_LINK_WLOCK();
 	else
 		UNP_LINK_RLOCK();
 	switch (so->so_type) {
 	case SOCK_DGRAM:
 	{
 		const struct sockaddr *from;
 
 		unp2 = unp->unp_conn;
 		if (nam != NULL) {
 			UNP_LINK_WLOCK_ASSERT();
 			if (unp2 != NULL) {
 				error = EISCONN;
 				break;
 			}
 			error = unp_connect(so, nam, td);
 			if (error)
 				break;
 			unp2 = unp->unp_conn;
 		}
 
 		/*
 		 * Because connect() and send() are non-atomic in a sendto()
 		 * with a target address, it's possible that the socket will
 		 * have disconnected before the send() can run.  In that case
 		 * return the slightly counter-intuitive but otherwise
 		 * correct error that the socket is not connected.
 		 */
 		if (unp2 == NULL) {
 			error = ENOTCONN;
 			break;
 		}
 		/* Lockless read. */
 		if (unp2->unp_flags & UNP_WANTCRED)
 			control = unp_addsockcred(td, control);
 		UNP_PCB_LOCK(unp);
 		if (unp->unp_addr != NULL)
 			from = (struct sockaddr *)unp->unp_addr;
 		else
 			from = &sun_noname;
 		so2 = unp2->unp_socket;
 		SOCKBUF_LOCK(&so2->so_rcv);
 		if (sbappendaddr_locked(&so2->so_rcv, from, m,
 		    control)) {
 			sorwakeup_locked(so2);
 			m = NULL;
 			control = NULL;
 		} else {
 			SOCKBUF_UNLOCK(&so2->so_rcv);
 			error = ENOBUFS;
 		}
 		if (nam != NULL) {
 			UNP_LINK_WLOCK_ASSERT();
 			UNP_PCB_LOCK(unp2);
 			unp_disconnect(unp, unp2);
 			UNP_PCB_UNLOCK(unp2);
 		}
 		UNP_PCB_UNLOCK(unp);
 		break;
 	}
 
 	case SOCK_SEQPACKET:
 	case SOCK_STREAM:
 		if ((so->so_state & SS_ISCONNECTED) == 0) {
 			if (nam != NULL) {
 				UNP_LINK_WLOCK_ASSERT();
 				error = unp_connect(so, nam, td);
 				if (error)
 					break;	/* XXX */
 			} else {
 				error = ENOTCONN;
 				break;
 			}
 		}
 
 		/* Lockless read. */
 		if (so->so_snd.sb_state & SBS_CANTSENDMORE) {
 			error = EPIPE;
 			break;
 		}
 
 		/*
 		 * Because connect() and send() are non-atomic in a sendto()
 		 * with a target address, it's possible that the socket will
 		 * have disconnected before the send() can run.  In that case
 		 * return the slightly counter-intuitive but otherwise
 		 * correct error that the socket is not connected.
 		 *
 		 * Locking here must be done carefully: the linkage lock
 		 * prevents interconnections between unpcbs from changing, so
 		 * we can traverse from unp to unp2 without acquiring unp's
 		 * lock.  Socket buffer locks follow unpcb locks, so we can
 		 * acquire both remote and lock socket buffer locks.
 		 */
 		unp2 = unp->unp_conn;
 		if (unp2 == NULL) {
 			error = ENOTCONN;
 			break;
 		}
 		so2 = unp2->unp_socket;
 		UNP_PCB_LOCK(unp2);
 		SOCKBUF_LOCK(&so2->so_rcv);
 		if (unp2->unp_flags & UNP_WANTCRED) {
 			/*
 			 * Credentials are passed only once on SOCK_STREAM
 			 * and SOCK_SEQPACKET.
 			 */
 			unp2->unp_flags &= ~UNP_WANTCRED;
 			control = unp_addsockcred(td, control);
 		}
 		/*
 		 * Send to paired receive port, and then reduce send buffer
 		 * hiwater marks to maintain backpressure.  Wake up readers.
 		 */
 		switch (so->so_type) {
 		case SOCK_STREAM:
 			if (control != NULL) {
 				if (sbappendcontrol_locked(&so2->so_rcv, m,
 				    control))
 					control = NULL;
 			} else
 				sbappend_locked(&so2->so_rcv, m, flags);
 			break;
 
 		case SOCK_SEQPACKET: {
 			const struct sockaddr *from;
 
 			from = &sun_noname;
 			/*
 			 * Don't check for space available in so2->so_rcv.
 			 * Unix domain sockets only check for space in the
 			 * sending sockbuf, and that check is performed one
 			 * level up the stack.
 			 */
 			if (sbappendaddr_nospacecheck_locked(&so2->so_rcv,
 				from, m, control))
 				control = NULL;
 			break;
 			}
 		}
 
 		mbcnt = so2->so_rcv.sb_mbcnt;
 		sbcc = sbavail(&so2->so_rcv);
 		if (sbcc)
 			sorwakeup_locked(so2);
 		else
 			SOCKBUF_UNLOCK(&so2->so_rcv);
 
 		/*
 		 * The PCB lock on unp2 protects the SB_STOP flag.  Without it,
 		 * it would be possible for uipc_rcvd to be called at this
 		 * point, drain the receiving sockbuf, clear SB_STOP, and then
 		 * we would set SB_STOP below.  That could lead to an empty
 		 * sockbuf having SB_STOP set
 		 */
 		SOCKBUF_LOCK(&so->so_snd);
 		if (sbcc >= so->so_snd.sb_hiwat || mbcnt >= so->so_snd.sb_mbmax)
 			so->so_snd.sb_flags |= SB_STOP;
 		SOCKBUF_UNLOCK(&so->so_snd);
 		UNP_PCB_UNLOCK(unp2);
 		m = NULL;
 		break;
 	}
 
 	/*
 	 * PRUS_EOF is equivalent to pru_send followed by pru_shutdown.
 	 */
 	if (flags & PRUS_EOF) {
 		UNP_PCB_LOCK(unp);
 		socantsendmore(so);
 		unp_shutdown(unp);
 		UNP_PCB_UNLOCK(unp);
 	}
 
 	if ((nam != NULL) || (flags & PRUS_EOF))
 		UNP_LINK_WUNLOCK();
 	else
 		UNP_LINK_RUNLOCK();
 
 	if (control != NULL && error != 0)
 		unp_dispose_mbuf(control);
 
 release:
 	if (control != NULL)
 		m_freem(control);
 	if (m != NULL)
 		m_freem(m);
 	return (error);
 }
 
 static int
 uipc_ready(struct socket *so, struct mbuf *m, int count)
 {
 	struct unpcb *unp, *unp2;
 	struct socket *so2;
 	int error;
 
 	unp = sotounpcb(so);
 
 	UNP_LINK_RLOCK();
 	unp2 = unp->unp_conn;
 	UNP_PCB_LOCK(unp2);
 	so2 = unp2->unp_socket;
 
 	SOCKBUF_LOCK(&so2->so_rcv);
 	if ((error = sbready(&so2->so_rcv, m, count)) == 0)
 		sorwakeup_locked(so2);
 	else
 		SOCKBUF_UNLOCK(&so2->so_rcv);
 
 	UNP_PCB_UNLOCK(unp2);
 	UNP_LINK_RUNLOCK();
 
 	return (error);
 }
 
 static int
 uipc_sense(struct socket *so, struct stat *sb)
 {
 	struct unpcb *unp;
 
 	unp = sotounpcb(so);
 	KASSERT(unp != NULL, ("uipc_sense: unp == NULL"));
 
 	sb->st_blksize = so->so_snd.sb_hiwat;
 	UNP_PCB_LOCK(unp);
 	sb->st_dev = NODEV;
 	if (unp->unp_ino == 0)
 		unp->unp_ino = (++unp_ino == 0) ? ++unp_ino : unp_ino;
 	sb->st_ino = unp->unp_ino;
 	UNP_PCB_UNLOCK(unp);
 	return (0);
 }
 
 static int
 uipc_shutdown(struct socket *so)
 {
 	struct unpcb *unp;
 
 	unp = sotounpcb(so);
 	KASSERT(unp != NULL, ("uipc_shutdown: unp == NULL"));
 
 	UNP_LINK_WLOCK();
 	UNP_PCB_LOCK(unp);
 	socantsendmore(so);
 	unp_shutdown(unp);
 	UNP_PCB_UNLOCK(unp);
 	UNP_LINK_WUNLOCK();
 	return (0);
 }
 
 static int
 uipc_sockaddr(struct socket *so, struct sockaddr **nam)
 {
 	struct unpcb *unp;
 	const struct sockaddr *sa;
 
 	unp = sotounpcb(so);
 	KASSERT(unp != NULL, ("uipc_sockaddr: unp == NULL"));
 
 	*nam = malloc(sizeof(struct sockaddr_un), M_SONAME, M_WAITOK);
 	UNP_PCB_LOCK(unp);
 	if (unp->unp_addr != NULL)
 		sa = (struct sockaddr *) unp->unp_addr;
 	else
 		sa = &sun_noname;
 	bcopy(sa, *nam, sa->sa_len);
 	UNP_PCB_UNLOCK(unp);
 	return (0);
 }
 
 static struct pr_usrreqs uipc_usrreqs_dgram = {
 	.pru_abort = 		uipc_abort,
 	.pru_accept =		uipc_accept,
 	.pru_attach =		uipc_attach,
 	.pru_bind =		uipc_bind,
 	.pru_bindat =		uipc_bindat,
 	.pru_connect =		uipc_connect,
 	.pru_connectat =	uipc_connectat,
 	.pru_connect2 =		uipc_connect2,
 	.pru_detach =		uipc_detach,
 	.pru_disconnect =	uipc_disconnect,
 	.pru_listen =		uipc_listen,
 	.pru_peeraddr =		uipc_peeraddr,
 	.pru_rcvd =		uipc_rcvd,
 	.pru_send =		uipc_send,
 	.pru_sense =		uipc_sense,
 	.pru_shutdown =		uipc_shutdown,
 	.pru_sockaddr =		uipc_sockaddr,
 	.pru_soreceive =	soreceive_dgram,
 	.pru_close =		uipc_close,
 };
 
 static struct pr_usrreqs uipc_usrreqs_seqpacket = {
 	.pru_abort =		uipc_abort,
 	.pru_accept =		uipc_accept,
 	.pru_attach =		uipc_attach,
 	.pru_bind =		uipc_bind,
 	.pru_bindat =		uipc_bindat,
 	.pru_connect =		uipc_connect,
 	.pru_connectat =	uipc_connectat,
 	.pru_connect2 =		uipc_connect2,
 	.pru_detach =		uipc_detach,
 	.pru_disconnect =	uipc_disconnect,
 	.pru_listen =		uipc_listen,
 	.pru_peeraddr =		uipc_peeraddr,
 	.pru_rcvd =		uipc_rcvd,
 	.pru_send =		uipc_send,
 	.pru_sense =		uipc_sense,
 	.pru_shutdown =		uipc_shutdown,
 	.pru_sockaddr =		uipc_sockaddr,
 	.pru_soreceive =	soreceive_generic,	/* XXX: or...? */
 	.pru_close =		uipc_close,
 };
 
 static struct pr_usrreqs uipc_usrreqs_stream = {
 	.pru_abort = 		uipc_abort,
 	.pru_accept =		uipc_accept,
 	.pru_attach =		uipc_attach,
 	.pru_bind =		uipc_bind,
 	.pru_bindat =		uipc_bindat,
 	.pru_connect =		uipc_connect,
 	.pru_connectat =	uipc_connectat,
 	.pru_connect2 =		uipc_connect2,
 	.pru_detach =		uipc_detach,
 	.pru_disconnect =	uipc_disconnect,
 	.pru_listen =		uipc_listen,
 	.pru_peeraddr =		uipc_peeraddr,
 	.pru_rcvd =		uipc_rcvd,
 	.pru_send =		uipc_send,
 	.pru_ready =		uipc_ready,
 	.pru_sense =		uipc_sense,
 	.pru_shutdown =		uipc_shutdown,
 	.pru_sockaddr =		uipc_sockaddr,
 	.pru_soreceive =	soreceive_generic,
 	.pru_close =		uipc_close,
 };
 
 static int
 uipc_ctloutput(struct socket *so, struct sockopt *sopt)
 {
 	struct unpcb *unp;
 	struct xucred xu;
 	int error, optval;
 
 	if (sopt->sopt_level != 0)
 		return (EINVAL);
 
 	unp = sotounpcb(so);
 	KASSERT(unp != NULL, ("uipc_ctloutput: unp == NULL"));
 	error = 0;
 	switch (sopt->sopt_dir) {
 	case SOPT_GET:
 		switch (sopt->sopt_name) {
 		case LOCAL_PEERCRED:
 			UNP_PCB_LOCK(unp);
 			if (unp->unp_flags & UNP_HAVEPC)
 				xu = unp->unp_peercred;
 			else {
 				if (so->so_type == SOCK_STREAM)
 					error = ENOTCONN;
 				else
 					error = EINVAL;
 			}
 			UNP_PCB_UNLOCK(unp);
 			if (error == 0)
 				error = sooptcopyout(sopt, &xu, sizeof(xu));
 			break;
 
 		case LOCAL_CREDS:
 			/* Unlocked read. */
 			optval = unp->unp_flags & UNP_WANTCRED ? 1 : 0;
 			error = sooptcopyout(sopt, &optval, sizeof(optval));
 			break;
 
 		case LOCAL_CONNWAIT:
 			/* Unlocked read. */
 			optval = unp->unp_flags & UNP_CONNWAIT ? 1 : 0;
 			error = sooptcopyout(sopt, &optval, sizeof(optval));
 			break;
 
 		default:
 			error = EOPNOTSUPP;
 			break;
 		}
 		break;
 
 	case SOPT_SET:
 		switch (sopt->sopt_name) {
 		case LOCAL_CREDS:
 		case LOCAL_CONNWAIT:
 			error = sooptcopyin(sopt, &optval, sizeof(optval),
 					    sizeof(optval));
 			if (error)
 				break;
 
 #define	OPTSET(bit) do {						\
 	UNP_PCB_LOCK(unp);						\
 	if (optval)							\
 		unp->unp_flags |= bit;					\
 	else								\
 		unp->unp_flags &= ~bit;					\
 	UNP_PCB_UNLOCK(unp);						\
 } while (0)
 
 			switch (sopt->sopt_name) {
 			case LOCAL_CREDS:
 				OPTSET(UNP_WANTCRED);
 				break;
 
 			case LOCAL_CONNWAIT:
 				OPTSET(UNP_CONNWAIT);
 				break;
 
 			default:
 				break;
 			}
 			break;
 #undef	OPTSET
 		default:
 			error = ENOPROTOOPT;
 			break;
 		}
 		break;
 
 	default:
 		error = EOPNOTSUPP;
 		break;
 	}
 	return (error);
 }
 
 static int
 unp_connect(struct socket *so, struct sockaddr *nam, struct thread *td)
 {
 
 	return (unp_connectat(AT_FDCWD, so, nam, td));
 }
 
 static int
 unp_connectat(int fd, struct socket *so, struct sockaddr *nam,
     struct thread *td)
 {
 	struct sockaddr_un *soun = (struct sockaddr_un *)nam;
 	struct vnode *vp;
-	struct socket *so2, *so3;
+	struct socket *so2;
 	struct unpcb *unp, *unp2, *unp3;
 	struct nameidata nd;
 	char buf[SOCK_MAXADDRLEN];
 	struct sockaddr *sa;
 	cap_rights_t rights;
 	int error, len;
 
 	if (nam->sa_family != AF_UNIX)
 		return (EAFNOSUPPORT);
 
 	UNP_LINK_WLOCK_ASSERT();
 
 	unp = sotounpcb(so);
 	KASSERT(unp != NULL, ("unp_connect: unp == NULL"));
 
 	if (nam->sa_len > sizeof(struct sockaddr_un))
 		return (EINVAL);
 	len = nam->sa_len - offsetof(struct sockaddr_un, sun_path);
 	if (len <= 0)
 		return (EINVAL);
 	bcopy(soun->sun_path, buf, len);
 	buf[len] = 0;
 
 	UNP_PCB_LOCK(unp);
 	if (unp->unp_flags & UNP_CONNECTING) {
 		UNP_PCB_UNLOCK(unp);
 		return (EALREADY);
 	}
 	UNP_LINK_WUNLOCK();
 	unp->unp_flags |= UNP_CONNECTING;
 	UNP_PCB_UNLOCK(unp);
 
 	sa = malloc(sizeof(struct sockaddr_un), M_SONAME, M_WAITOK);
 	NDINIT_ATRIGHTS(&nd, LOOKUP, FOLLOW | LOCKSHARED | LOCKLEAF,
 	    UIO_SYSSPACE, buf, fd, cap_rights_init(&rights, CAP_CONNECTAT), td);
 	error = namei(&nd);
 	if (error)
 		vp = NULL;
 	else
 		vp = nd.ni_vp;
 	ASSERT_VOP_LOCKED(vp, "unp_connect");
 	NDFREE(&nd, NDF_ONLY_PNBUF);
 	if (error)
 		goto bad;
 
 	if (vp->v_type != VSOCK) {
 		error = ENOTSOCK;
 		goto bad;
 	}
 #ifdef MAC
 	error = mac_vnode_check_open(td->td_ucred, vp, VWRITE | VREAD);
 	if (error)
 		goto bad;
 #endif
 	error = VOP_ACCESS(vp, VWRITE, td->td_ucred, td);
 	if (error)
 		goto bad;
 
 	unp = sotounpcb(so);
 	KASSERT(unp != NULL, ("unp_connect: unp == NULL"));
 
 	/*
 	 * Lock linkage lock for two reasons: make sure v_socket is stable,
 	 * and to protect simultaneous locking of multiple pcbs.
 	 */
 	UNP_LINK_WLOCK();
 	VOP_UNP_CONNECT(vp, &unp2);
 	if (unp2 == NULL) {
 		error = ECONNREFUSED;
 		goto bad2;
 	}
 	so2 = unp2->unp_socket;
 	if (so->so_type != so2->so_type) {
 		error = EPROTOTYPE;
 		goto bad2;
 	}
+	UNP_PCB_LOCK(unp);
+	UNP_PCB_LOCK(unp2);
 	if (so->so_proto->pr_flags & PR_CONNREQUIRED) {
 		if (so2->so_options & SO_ACCEPTCONN) {
 			CURVNET_SET(so2->so_vnet);
-			so3 = sonewconn(so2, 0);
+			so2 = sonewconn(so2, 0);
 			CURVNET_RESTORE();
 		} else
-			so3 = NULL;
-		if (so3 == NULL) {
+			so2 = NULL;
+		if (so2 == NULL) {
 			error = ECONNREFUSED;
-			goto bad2;
+			goto bad3;
 		}
-		unp = sotounpcb(so);
-		unp2 = sotounpcb(so2);
-		unp3 = sotounpcb(so3);
-		UNP_PCB_LOCK(unp);
-		UNP_PCB_LOCK(unp2);
+		unp3 = sotounpcb(so2);
 		UNP_PCB_LOCK(unp3);
 		if (unp2->unp_addr != NULL) {
 			bcopy(unp2->unp_addr, sa, unp2->unp_addr->sun_len);
 			unp3->unp_addr = (struct sockaddr_un *) sa;
 			sa = NULL;
 		}
 
 		/*
 		 * The connector's (client's) credentials are copied from its
 		 * process structure at the time of connect() (which is now).
 		 */
 		cru2x(td->td_ucred, &unp3->unp_peercred);
 		unp3->unp_flags |= UNP_HAVEPC;
 
 		/*
 		 * The receiver's (server's) credentials are copied from the
 		 * unp_peercred member of socket on which the former called
 		 * listen(); uipc_listen() cached that process's credentials
 		 * at that time so we can use them now.
 		 */
 		memcpy(&unp->unp_peercred, &unp2->unp_peercred,
 		    sizeof(unp->unp_peercred));
 		unp->unp_flags |= UNP_HAVEPC;
 		if (unp2->unp_flags & UNP_WANTCRED)
 			unp3->unp_flags |= UNP_WANTCRED;
-		UNP_PCB_UNLOCK(unp3);
 		UNP_PCB_UNLOCK(unp2);
-		UNP_PCB_UNLOCK(unp);
+		unp2 = unp3;
 #ifdef MAC
-		mac_socketpeer_set_from_socket(so, so3);
-		mac_socketpeer_set_from_socket(so3, so);
+		mac_socketpeer_set_from_socket(so, so2);
+		mac_socketpeer_set_from_socket(so2, so);
 #endif
-
-		so2 = so3;
 	}
-	unp = sotounpcb(so);
-	KASSERT(unp != NULL, ("unp_connect: unp == NULL"));
-	unp2 = sotounpcb(so2);
-	KASSERT(unp2 != NULL, ("unp_connect: unp2 == NULL"));
-	UNP_PCB_LOCK(unp);
-	UNP_PCB_LOCK(unp2);
+
+	KASSERT(unp2 != NULL && so2 != NULL && unp2->unp_socket == so2 &&
+	    sotounpcb(so2) == unp2,
+	    ("%s: unp2 %p so2 %p", __func__, unp2, so2));
 	error = unp_connect2(so, so2, PRU_CONNECT);
+bad3:
 	UNP_PCB_UNLOCK(unp2);
 	UNP_PCB_UNLOCK(unp);
 bad2:
 	UNP_LINK_WUNLOCK();
 bad:
 	if (vp != NULL)
 		vput(vp);
 	free(sa, M_SONAME);
 	UNP_LINK_WLOCK();
 	UNP_PCB_LOCK(unp);
 	unp->unp_flags &= ~UNP_CONNECTING;
 	UNP_PCB_UNLOCK(unp);
 	return (error);
 }
 
 static int
 unp_connect2(struct socket *so, struct socket *so2, int req)
 {
 	struct unpcb *unp;
 	struct unpcb *unp2;
 
 	unp = sotounpcb(so);
 	KASSERT(unp != NULL, ("unp_connect2: unp == NULL"));
 	unp2 = sotounpcb(so2);
 	KASSERT(unp2 != NULL, ("unp_connect2: unp2 == NULL"));
 
 	UNP_LINK_WLOCK_ASSERT();
 	UNP_PCB_LOCK_ASSERT(unp);
 	UNP_PCB_LOCK_ASSERT(unp2);
 
 	if (so2->so_type != so->so_type)
 		return (EPROTOTYPE);
 	unp2->unp_flags &= ~UNP_NASCENT;
 	unp->unp_conn = unp2;
 
 	switch (so->so_type) {
 	case SOCK_DGRAM:
 		LIST_INSERT_HEAD(&unp2->unp_refs, unp, unp_reflink);
 		soisconnected(so);
 		break;
 
 	case SOCK_STREAM:
 	case SOCK_SEQPACKET:
 		unp2->unp_conn = unp;
 		if (req == PRU_CONNECT &&
 		    ((unp->unp_flags | unp2->unp_flags) & UNP_CONNWAIT))
 			soisconnecting(so);
 		else
 			soisconnected(so);
 		soisconnected(so2);
 		break;
 
 	default:
 		panic("unp_connect2");
 	}
 	return (0);
 }
 
 static void
 unp_disconnect(struct unpcb *unp, struct unpcb *unp2)
 {
 	struct socket *so;
 
 	KASSERT(unp2 != NULL, ("unp_disconnect: unp2 == NULL"));
 
 	UNP_LINK_WLOCK_ASSERT();
 	UNP_PCB_LOCK_ASSERT(unp);
 	UNP_PCB_LOCK_ASSERT(unp2);
 
 	unp->unp_conn = NULL;
 	switch (unp->unp_socket->so_type) {
 	case SOCK_DGRAM:
 		LIST_REMOVE(unp, unp_reflink);
 		so = unp->unp_socket;
 		SOCK_LOCK(so);
 		so->so_state &= ~SS_ISCONNECTED;
 		SOCK_UNLOCK(so);
 		break;
 
 	case SOCK_STREAM:
 	case SOCK_SEQPACKET:
 		soisdisconnected(unp->unp_socket);
 		unp2->unp_conn = NULL;
 		soisdisconnected(unp2->unp_socket);
 		break;
 	}
 }
 
 /*
  * unp_pcblist() walks the global list of struct unpcb's to generate a
  * pointer list, bumping the refcount on each unpcb.  It then copies them out
  * sequentially, validating the generation number on each to see if it has
  * been detached.  All of this is necessary because copyout() may sleep on
  * disk I/O.
  */
 static int
 unp_pcblist(SYSCTL_HANDLER_ARGS)
 {
 	int error, i, n;
 	int freeunp;
 	struct unpcb *unp, **unp_list;
 	unp_gen_t gencnt;
 	struct xunpgen *xug;
 	struct unp_head *head;
 	struct xunpcb *xu;
 
 	switch ((intptr_t)arg1) {
 	case SOCK_STREAM:
 		head = &unp_shead;
 		break;
 
 	case SOCK_DGRAM:
 		head = &unp_dhead;
 		break;
 
 	case SOCK_SEQPACKET:
 		head = &unp_sphead;
 		break;
 
 	default:
 		panic("unp_pcblist: arg1 %d", (int)(intptr_t)arg1);
 	}
 
 	/*
 	 * The process of preparing the PCB list is too time-consuming and
 	 * resource-intensive to repeat twice on every request.
 	 */
 	if (req->oldptr == NULL) {
 		n = unp_count;
 		req->oldidx = 2 * (sizeof *xug)
 			+ (n + n/8) * sizeof(struct xunpcb);
 		return (0);
 	}
 
 	if (req->newptr != NULL)
 		return (EPERM);
 
 	/*
 	 * OK, now we're committed to doing something.
 	 */
 	xug = malloc(sizeof(*xug), M_TEMP, M_WAITOK);
-	UNP_LIST_LOCK();
+	UNP_LINK_RLOCK();
 	gencnt = unp_gencnt;
 	n = unp_count;
-	UNP_LIST_UNLOCK();
+	UNP_LINK_RUNLOCK();
 
 	xug->xug_len = sizeof *xug;
 	xug->xug_count = n;
 	xug->xug_gen = gencnt;
 	xug->xug_sogen = so_gencnt;
 	error = SYSCTL_OUT(req, xug, sizeof *xug);
 	if (error) {
 		free(xug, M_TEMP);
 		return (error);
 	}
 
 	unp_list = malloc(n * sizeof *unp_list, M_TEMP, M_WAITOK);
 
-	UNP_LIST_LOCK();
+	UNP_LINK_RLOCK();
 	for (unp = LIST_FIRST(head), i = 0; unp && i < n;
 	     unp = LIST_NEXT(unp, unp_link)) {
 		UNP_PCB_LOCK(unp);
 		if (unp->unp_gencnt <= gencnt) {
 			if (cr_cansee(req->td->td_ucred,
 			    unp->unp_socket->so_cred)) {
 				UNP_PCB_UNLOCK(unp);
 				continue;
 			}
 			unp_list[i++] = unp;
 			unp->unp_refcount++;
 		}
 		UNP_PCB_UNLOCK(unp);
 	}
-	UNP_LIST_UNLOCK();
+	UNP_LINK_RUNLOCK();
 	n = i;			/* In case we lost some during malloc. */
 
 	error = 0;
 	xu = malloc(sizeof(*xu), M_TEMP, M_WAITOK | M_ZERO);
 	for (i = 0; i < n; i++) {
 		unp = unp_list[i];
 		UNP_PCB_LOCK(unp);
 		unp->unp_refcount--;
 	        if (unp->unp_refcount != 0 && unp->unp_gencnt <= gencnt) {
 			xu->xu_len = sizeof *xu;
 			xu->xu_unpp = unp;
 			/*
 			 * XXX - need more locking here to protect against
 			 * connect/disconnect races for SMP.
 			 */
 			if (unp->unp_addr != NULL)
 				bcopy(unp->unp_addr, &xu->xu_addr,
 				      unp->unp_addr->sun_len);
 			if (unp->unp_conn != NULL &&
 			    unp->unp_conn->unp_addr != NULL)
 				bcopy(unp->unp_conn->unp_addr,
 				      &xu->xu_caddr,
 				      unp->unp_conn->unp_addr->sun_len);
 			bcopy(unp, &xu->xu_unp, sizeof *unp);
 			sotoxsocket(unp->unp_socket, &xu->xu_socket);
 			UNP_PCB_UNLOCK(unp);
 			error = SYSCTL_OUT(req, xu, sizeof *xu);
 		} else {
 			freeunp = (unp->unp_refcount == 0);
 			UNP_PCB_UNLOCK(unp);
 			if (freeunp) {
 				UNP_PCB_LOCK_DESTROY(unp);
 				uma_zfree(unp_zone, unp);
 			}
 		}
 	}
 	free(xu, M_TEMP);
 	if (!error) {
 		/*
 		 * Give the user an updated idea of our state.  If the
 		 * generation differs from what we told her before, she knows
 		 * that something happened while we were processing this
 		 * request, and it might be necessary to retry.
 		 */
 		xug->xug_gen = unp_gencnt;
 		xug->xug_sogen = so_gencnt;
 		xug->xug_count = unp_count;
 		error = SYSCTL_OUT(req, xug, sizeof *xug);
 	}
 	free(unp_list, M_TEMP);
 	free(xug, M_TEMP);
 	return (error);
 }
 
 SYSCTL_PROC(_net_local_dgram, OID_AUTO, pcblist, CTLTYPE_OPAQUE | CTLFLAG_RD,
     (void *)(intptr_t)SOCK_DGRAM, 0, unp_pcblist, "S,xunpcb",
     "List of active local datagram sockets");
 SYSCTL_PROC(_net_local_stream, OID_AUTO, pcblist, CTLTYPE_OPAQUE | CTLFLAG_RD,
     (void *)(intptr_t)SOCK_STREAM, 0, unp_pcblist, "S,xunpcb",
     "List of active local stream sockets");
 SYSCTL_PROC(_net_local_seqpacket, OID_AUTO, pcblist,
     CTLTYPE_OPAQUE | CTLFLAG_RD,
     (void *)(intptr_t)SOCK_SEQPACKET, 0, unp_pcblist, "S,xunpcb",
     "List of active local seqpacket sockets");
 
 static void
 unp_shutdown(struct unpcb *unp)
 {
 	struct unpcb *unp2;
 	struct socket *so;
 
 	UNP_LINK_WLOCK_ASSERT();
 	UNP_PCB_LOCK_ASSERT(unp);
 
 	unp2 = unp->unp_conn;
 	if ((unp->unp_socket->so_type == SOCK_STREAM ||
 	    (unp->unp_socket->so_type == SOCK_SEQPACKET)) && unp2 != NULL) {
 		so = unp2->unp_socket;
 		if (so != NULL)
 			socantrcvmore(so);
 	}
 }
 
 static void
 unp_drop(struct unpcb *unp)
 {
 	struct socket *so = unp->unp_socket;
 	struct unpcb *unp2;
 
 	UNP_LINK_WLOCK_ASSERT();
 	UNP_PCB_LOCK_ASSERT(unp);
 
 	/*
 	 * Regardless of whether the socket's peer dropped the connection
 	 * with this socket by aborting or disconnecting, POSIX requires
 	 * that ECONNRESET is returned.
 	 */
 	so->so_error = ECONNRESET;
 	unp2 = unp->unp_conn;
 	if (unp2 == NULL)
 		return;
 	UNP_PCB_LOCK(unp2);
 	unp_disconnect(unp, unp2);
 	UNP_PCB_UNLOCK(unp2);
 }
 
 static void
 unp_freerights(struct filedescent **fdep, int fdcount)
 {
 	struct file *fp;
 	int i;
 
 	KASSERT(fdcount > 0, ("%s: fdcount %d", __func__, fdcount));
 
 	for (i = 0; i < fdcount; i++) {
 		fp = fdep[i]->fde_file;
 		filecaps_free(&fdep[i]->fde_caps);
 		unp_discard(fp);
 	}
 	free(fdep[0], M_FILECAPS);
 }
 
 static int
 unp_externalize(struct mbuf *control, struct mbuf **controlp, int flags)
 {
 	struct thread *td = curthread;		/* XXX */
 	struct cmsghdr *cm = mtod(control, struct cmsghdr *);
 	int i;
 	int *fdp;
 	struct filedesc *fdesc = td->td_proc->p_fd;
 	struct filedescent **fdep;
 	void *data;
 	socklen_t clen = control->m_len, datalen;
 	int error, newfds;
 	u_int newlen;
 
 	UNP_LINK_UNLOCK_ASSERT();
 
 	error = 0;
 	if (controlp != NULL) /* controlp == NULL => free control messages */
 		*controlp = NULL;
 	while (cm != NULL) {
 		if (sizeof(*cm) > clen || cm->cmsg_len > clen) {
 			error = EINVAL;
 			break;
 		}
 		data = CMSG_DATA(cm);
 		datalen = (caddr_t)cm + cm->cmsg_len - (caddr_t)data;
 		if (cm->cmsg_level == SOL_SOCKET
 		    && cm->cmsg_type == SCM_RIGHTS) {
 			newfds = datalen / sizeof(*fdep);
 			if (newfds == 0)
 				goto next;
 			fdep = data;
 
 			/* If we're not outputting the descriptors free them. */
 			if (error || controlp == NULL) {
 				unp_freerights(fdep, newfds);
 				goto next;
 			}
 			FILEDESC_XLOCK(fdesc);
 
 			/*
 			 * Now change each pointer to an fd in the global
 			 * table to an integer that is the index to the local
 			 * fd table entry that we set up to point to the
 			 * global one we are transferring.
 			 */
 			newlen = newfds * sizeof(int);
 			*controlp = sbcreatecontrol(NULL, newlen,
 			    SCM_RIGHTS, SOL_SOCKET);
 			if (*controlp == NULL) {
 				FILEDESC_XUNLOCK(fdesc);
 				error = E2BIG;
 				unp_freerights(fdep, newfds);
 				goto next;
 			}
 
 			fdp = (int *)
 			    CMSG_DATA(mtod(*controlp, struct cmsghdr *));
 			if (fdallocn(td, 0, fdp, newfds) != 0) {
 				FILEDESC_XUNLOCK(fdesc);
 				error = EMSGSIZE;
 				unp_freerights(fdep, newfds);
 				m_freem(*controlp);
 				*controlp = NULL;
 				goto next;
 			}
 			for (i = 0; i < newfds; i++, fdp++) {
 				_finstall(fdesc, fdep[i]->fde_file, *fdp,
 				    (flags & MSG_CMSG_CLOEXEC) != 0 ? UF_EXCLOSE : 0,
 				    &fdep[i]->fde_caps);
 				unp_externalize_fp(fdep[i]->fde_file);
 			}
 			FILEDESC_XUNLOCK(fdesc);
 			free(fdep[0], M_FILECAPS);
 		} else {
 			/* We can just copy anything else across. */
 			if (error || controlp == NULL)
 				goto next;
 			*controlp = sbcreatecontrol(NULL, datalen,
 			    cm->cmsg_type, cm->cmsg_level);
 			if (*controlp == NULL) {
 				error = ENOBUFS;
 				goto next;
 			}
 			bcopy(data,
 			    CMSG_DATA(mtod(*controlp, struct cmsghdr *)),
 			    datalen);
 		}
 		controlp = &(*controlp)->m_next;
 
 next:
 		if (CMSG_SPACE(datalen) < clen) {
 			clen -= CMSG_SPACE(datalen);
 			cm = (struct cmsghdr *)
 			    ((caddr_t)cm + CMSG_SPACE(datalen));
 		} else {
 			clen = 0;
 			cm = NULL;
 		}
 	}
 
 	m_freem(control);
 	return (error);
 }
 
 static void
 unp_zone_change(void *tag)
 {
 
 	uma_zone_set_max(unp_zone, maxsockets);
 }
 
 static void
 unp_init(void)
 {
 
 #ifdef VIMAGE
 	if (!IS_DEFAULT_VNET(curvnet))
 		return;
 #endif
 	unp_zone = uma_zcreate("unpcb", sizeof(struct unpcb), NULL, NULL,
 	    NULL, NULL, UMA_ALIGN_PTR, 0);
 	if (unp_zone == NULL)
 		panic("unp_init");
 	uma_zone_set_max(unp_zone, maxsockets);
 	uma_zone_set_warning(unp_zone, "kern.ipc.maxsockets limit reached");
 	EVENTHANDLER_REGISTER(maxsockets_change, unp_zone_change,
 	    NULL, EVENTHANDLER_PRI_ANY);
 	LIST_INIT(&unp_dhead);
 	LIST_INIT(&unp_shead);
 	LIST_INIT(&unp_sphead);
 	SLIST_INIT(&unp_defers);
 	TIMEOUT_TASK_INIT(taskqueue_thread, &unp_gc_task, 0, unp_gc, NULL);
 	TASK_INIT(&unp_defer_task, 0, unp_process_defers, NULL);
 	UNP_LINK_LOCK_INIT();
-	UNP_LIST_LOCK_INIT();
 	UNP_DEFERRED_LOCK_INIT();
 }
 
 static int
 unp_internalize(struct mbuf **controlp, struct thread *td)
 {
 	struct mbuf *control = *controlp;
 	struct proc *p = td->td_proc;
 	struct filedesc *fdesc = p->p_fd;
 	struct bintime *bt;
 	struct cmsghdr *cm = mtod(control, struct cmsghdr *);
 	struct cmsgcred *cmcred;
 	struct filedescent *fde, **fdep, *fdev;
 	struct file *fp;
 	struct timeval *tv;
 	struct timespec *ts;
 	int i, *fdp;
 	void *data;
 	socklen_t clen = control->m_len, datalen;
 	int error, oldfds;
 	u_int newlen;
 
 	UNP_LINK_UNLOCK_ASSERT();
 
 	error = 0;
 	*controlp = NULL;
 	while (cm != NULL) {
 		if (sizeof(*cm) > clen || cm->cmsg_level != SOL_SOCKET
 		    || cm->cmsg_len > clen || cm->cmsg_len < sizeof(*cm)) {
 			error = EINVAL;
 			goto out;
 		}
 		data = CMSG_DATA(cm);
 		datalen = (caddr_t)cm + cm->cmsg_len - (caddr_t)data;
 
 		switch (cm->cmsg_type) {
 		/*
 		 * Fill in credential information.
 		 */
 		case SCM_CREDS:
 			*controlp = sbcreatecontrol(NULL, sizeof(*cmcred),
 			    SCM_CREDS, SOL_SOCKET);
 			if (*controlp == NULL) {
 				error = ENOBUFS;
 				goto out;
 			}
 			cmcred = (struct cmsgcred *)
 			    CMSG_DATA(mtod(*controlp, struct cmsghdr *));
 			cmcred->cmcred_pid = p->p_pid;
 			cmcred->cmcred_uid = td->td_ucred->cr_ruid;
 			cmcred->cmcred_gid = td->td_ucred->cr_rgid;
 			cmcred->cmcred_euid = td->td_ucred->cr_uid;
 			cmcred->cmcred_ngroups = MIN(td->td_ucred->cr_ngroups,
 			    CMGROUP_MAX);
 			for (i = 0; i < cmcred->cmcred_ngroups; i++)
 				cmcred->cmcred_groups[i] =
 				    td->td_ucred->cr_groups[i];
 			break;
 
 		case SCM_RIGHTS:
 			oldfds = datalen / sizeof (int);
 			if (oldfds == 0)
 				break;
 			/*
 			 * Check that all the FDs passed in refer to legal
 			 * files.  If not, reject the entire operation.
 			 */
 			fdp = data;
 			FILEDESC_SLOCK(fdesc);
 			for (i = 0; i < oldfds; i++, fdp++) {
 				fp = fget_locked(fdesc, *fdp);
 				if (fp == NULL) {
 					FILEDESC_SUNLOCK(fdesc);
 					error = EBADF;
 					goto out;
 				}
 				if (!(fp->f_ops->fo_flags & DFLAG_PASSABLE)) {
 					FILEDESC_SUNLOCK(fdesc);
 					error = EOPNOTSUPP;
 					goto out;
 				}
 
 			}
 
 			/*
 			 * Now replace the integer FDs with pointers to the
 			 * file structure and capability rights.
 			 */
 			newlen = oldfds * sizeof(fdep[0]);
 			*controlp = sbcreatecontrol(NULL, newlen,
 			    SCM_RIGHTS, SOL_SOCKET);
 			if (*controlp == NULL) {
 				FILEDESC_SUNLOCK(fdesc);
 				error = E2BIG;
 				goto out;
 			}
 			fdp = data;
 			fdep = (struct filedescent **)
 			    CMSG_DATA(mtod(*controlp, struct cmsghdr *));
 			fdev = malloc(sizeof(*fdev) * oldfds, M_FILECAPS,
 			    M_WAITOK);
 			for (i = 0; i < oldfds; i++, fdev++, fdp++) {
 				fde = &fdesc->fd_ofiles[*fdp];
 				fdep[i] = fdev;
 				fdep[i]->fde_file = fde->fde_file;
 				filecaps_copy(&fde->fde_caps,
 				    &fdep[i]->fde_caps, true);
 				unp_internalize_fp(fdep[i]->fde_file);
 			}
 			FILEDESC_SUNLOCK(fdesc);
 			break;
 
 		case SCM_TIMESTAMP:
 			*controlp = sbcreatecontrol(NULL, sizeof(*tv),
 			    SCM_TIMESTAMP, SOL_SOCKET);
 			if (*controlp == NULL) {
 				error = ENOBUFS;
 				goto out;
 			}
 			tv = (struct timeval *)
 			    CMSG_DATA(mtod(*controlp, struct cmsghdr *));
 			microtime(tv);
 			break;
 
 		case SCM_BINTIME:
 			*controlp = sbcreatecontrol(NULL, sizeof(*bt),
 			    SCM_BINTIME, SOL_SOCKET);
 			if (*controlp == NULL) {
 				error = ENOBUFS;
 				goto out;
 			}
 			bt = (struct bintime *)
 			    CMSG_DATA(mtod(*controlp, struct cmsghdr *));
 			bintime(bt);
 			break;
 
 		case SCM_REALTIME:
 			*controlp = sbcreatecontrol(NULL, sizeof(*ts),
 			    SCM_REALTIME, SOL_SOCKET);
 			if (*controlp == NULL) {
 				error = ENOBUFS;
 				goto out;
 			}
 			ts = (struct timespec *)
 			    CMSG_DATA(mtod(*controlp, struct cmsghdr *));
 			nanotime(ts);
 			break;
 
 		case SCM_MONOTONIC:
 			*controlp = sbcreatecontrol(NULL, sizeof(*ts),
 			    SCM_MONOTONIC, SOL_SOCKET);
 			if (*controlp == NULL) {
 				error = ENOBUFS;
 				goto out;
 			}
 			ts = (struct timespec *)
 			    CMSG_DATA(mtod(*controlp, struct cmsghdr *));
 			nanouptime(ts);
 			break;
 
 		default:
 			error = EINVAL;
 			goto out;
 		}
 
 		controlp = &(*controlp)->m_next;
 		if (CMSG_SPACE(datalen) < clen) {
 			clen -= CMSG_SPACE(datalen);
 			cm = (struct cmsghdr *)
 			    ((caddr_t)cm + CMSG_SPACE(datalen));
 		} else {
 			clen = 0;
 			cm = NULL;
 		}
 	}
 
 out:
 	m_freem(control);
 	return (error);
 }
 
 static struct mbuf *
 unp_addsockcred(struct thread *td, struct mbuf *control)
 {
 	struct mbuf *m, *n, *n_prev;
 	struct sockcred *sc;
 	const struct cmsghdr *cm;
 	int ngroups;
 	int i;
 
 	ngroups = MIN(td->td_ucred->cr_ngroups, CMGROUP_MAX);
 	m = sbcreatecontrol(NULL, SOCKCREDSIZE(ngroups), SCM_CREDS, SOL_SOCKET);
 	if (m == NULL)
 		return (control);
 
 	sc = (struct sockcred *) CMSG_DATA(mtod(m, struct cmsghdr *));
 	sc->sc_uid = td->td_ucred->cr_ruid;
 	sc->sc_euid = td->td_ucred->cr_uid;
 	sc->sc_gid = td->td_ucred->cr_rgid;
 	sc->sc_egid = td->td_ucred->cr_gid;
 	sc->sc_ngroups = ngroups;
 	for (i = 0; i < sc->sc_ngroups; i++)
 		sc->sc_groups[i] = td->td_ucred->cr_groups[i];
 
 	/*
 	 * Unlink SCM_CREDS control messages (struct cmsgcred), since just
 	 * created SCM_CREDS control message (struct sockcred) has another
 	 * format.
 	 */
 	if (control != NULL)
 		for (n = control, n_prev = NULL; n != NULL;) {
 			cm = mtod(n, struct cmsghdr *);
     			if (cm->cmsg_level == SOL_SOCKET &&
 			    cm->cmsg_type == SCM_CREDS) {
     				if (n_prev == NULL)
 					control = n->m_next;
 				else
 					n_prev->m_next = n->m_next;
 				n = m_free(n);
 			} else {
 				n_prev = n;
 				n = n->m_next;
 			}
 		}
 
 	/* Prepend it to the head. */
 	m->m_next = control;
 	return (m);
 }
 
 static struct unpcb *
 fptounp(struct file *fp)
 {
 	struct socket *so;
 
 	if (fp->f_type != DTYPE_SOCKET)
 		return (NULL);
 	if ((so = fp->f_data) == NULL)
 		return (NULL);
 	if (so->so_proto->pr_domain != &localdomain)
 		return (NULL);
 	return sotounpcb(so);
 }
 
 static void
 unp_discard(struct file *fp)
 {
 	struct unp_defer *dr;
 
 	if (unp_externalize_fp(fp)) {
 		dr = malloc(sizeof(*dr), M_TEMP, M_WAITOK);
 		dr->ud_fp = fp;
 		UNP_DEFERRED_LOCK();
 		SLIST_INSERT_HEAD(&unp_defers, dr, ud_link);
 		UNP_DEFERRED_UNLOCK();
 		atomic_add_int(&unp_defers_count, 1);
 		taskqueue_enqueue(taskqueue_thread, &unp_defer_task);
 	} else
 		(void) closef(fp, (struct thread *)NULL);
 }
 
 static void
 unp_process_defers(void *arg __unused, int pending)
 {
 	struct unp_defer *dr;
 	SLIST_HEAD(, unp_defer) drl;
 	int count;
 
 	SLIST_INIT(&drl);
 	for (;;) {
 		UNP_DEFERRED_LOCK();
 		if (SLIST_FIRST(&unp_defers) == NULL) {
 			UNP_DEFERRED_UNLOCK();
 			break;
 		}
 		SLIST_SWAP(&unp_defers, &drl, unp_defer);
 		UNP_DEFERRED_UNLOCK();
 		count = 0;
 		while ((dr = SLIST_FIRST(&drl)) != NULL) {
 			SLIST_REMOVE_HEAD(&drl, ud_link);
 			closef(dr->ud_fp, NULL);
 			free(dr, M_TEMP);
 			count++;
 		}
 		atomic_add_int(&unp_defers_count, -count);
 	}
 }
 
 static void
 unp_internalize_fp(struct file *fp)
 {
 	struct unpcb *unp;
 
 	UNP_LINK_WLOCK();
 	if ((unp = fptounp(fp)) != NULL) {
 		unp->unp_file = fp;
 		unp->unp_msgcount++;
 	}
 	fhold(fp);
 	unp_rights++;
 	UNP_LINK_WUNLOCK();
 }
 
 static int
 unp_externalize_fp(struct file *fp)
 {
 	struct unpcb *unp;
 	int ret;
 
 	UNP_LINK_WLOCK();
 	if ((unp = fptounp(fp)) != NULL) {
 		unp->unp_msgcount--;
 		ret = 1;
 	} else
 		ret = 0;
 	unp_rights--;
 	UNP_LINK_WUNLOCK();
 	return (ret);
 }
 
 /*
  * unp_defer indicates whether additional work has been defered for a future
  * pass through unp_gc().  It is thread local and does not require explicit
  * synchronization.
  */
 static int	unp_marked;
 static int	unp_unreachable;
 
 static void
 unp_accessable(struct filedescent **fdep, int fdcount)
 {
 	struct unpcb *unp;
 	struct file *fp;
 	int i;
 
 	for (i = 0; i < fdcount; i++) {
 		fp = fdep[i]->fde_file;
 		if ((unp = fptounp(fp)) == NULL)
 			continue;
 		if (unp->unp_gcflag & UNPGC_REF)
 			continue;
 		unp->unp_gcflag &= ~UNPGC_DEAD;
 		unp->unp_gcflag |= UNPGC_REF;
 		unp_marked++;
 	}
 }
 
 static void
 unp_gc_process(struct unpcb *unp)
 {
-	struct socket *soa;
-	struct socket *so;
+	struct socket *so, *soa;
 	struct file *fp;
 
 	/* Already processed. */
 	if (unp->unp_gcflag & UNPGC_SCANNED)
 		return;
 	fp = unp->unp_file;
 
 	/*
 	 * Check for a socket potentially in a cycle.  It must be in a
 	 * queue as indicated by msgcount, and this must equal the file
 	 * reference count.  Note that when msgcount is 0 the file is NULL.
 	 */
 	if ((unp->unp_gcflag & UNPGC_REF) == 0 && fp &&
 	    unp->unp_msgcount != 0 && fp->f_count == unp->unp_msgcount) {
 		unp->unp_gcflag |= UNPGC_DEAD;
 		unp_unreachable++;
 		return;
 	}
 
-	/*
-	 * Mark all sockets we reference with RIGHTS.
-	 */
 	so = unp->unp_socket;
-	if ((unp->unp_gcflag & UNPGC_IGNORE_RIGHTS) == 0) {
-		SOCKBUF_LOCK(&so->so_rcv);
-		unp_scan(so->so_rcv.sb_mb, unp_accessable);
-		SOCKBUF_UNLOCK(&so->so_rcv);
+	SOCK_LOCK(so);
+	if (SOLISTENING(so)) {
+		/*
+		 * Mark all sockets in our accept queue.
+		 */
+		TAILQ_FOREACH(soa, &so->sol_comp, so_list) {
+			if (sotounpcb(soa)->unp_gcflag & UNPGC_IGNORE_RIGHTS)
+				continue;
+			SOCKBUF_LOCK(&soa->so_rcv);
+			unp_scan(soa->so_rcv.sb_mb, unp_accessable);
+			SOCKBUF_UNLOCK(&soa->so_rcv);
+		}
+	} else {
+		/*
+		 * Mark all sockets we reference with RIGHTS.
+		 */
+		if ((unp->unp_gcflag & UNPGC_IGNORE_RIGHTS) == 0) {
+			SOCKBUF_LOCK(&so->so_rcv);
+			unp_scan(so->so_rcv.sb_mb, unp_accessable);
+			SOCKBUF_UNLOCK(&so->so_rcv);
+		}
 	}
-
-	/*
-	 * Mark all sockets in our accept queue.
-	 */
-	ACCEPT_LOCK();
-	TAILQ_FOREACH(soa, &so->so_comp, so_list) {
-		if ((sotounpcb(soa)->unp_gcflag & UNPGC_IGNORE_RIGHTS) != 0)
-			continue;
-		SOCKBUF_LOCK(&soa->so_rcv);
-		unp_scan(soa->so_rcv.sb_mb, unp_accessable);
-		SOCKBUF_UNLOCK(&soa->so_rcv);
-	}
-	ACCEPT_UNLOCK();
+	SOCK_UNLOCK(so);
 	unp->unp_gcflag |= UNPGC_SCANNED;
 }
 
 static int unp_recycled;
 SYSCTL_INT(_net_local, OID_AUTO, recycled, CTLFLAG_RD, &unp_recycled, 0, 
     "Number of unreachable sockets claimed by the garbage collector.");
 
 static int unp_taskcount;
 SYSCTL_INT(_net_local, OID_AUTO, taskcount, CTLFLAG_RD, &unp_taskcount, 0, 
     "Number of times the garbage collector has run.");
 
 static void
 unp_gc(__unused void *arg, int pending)
 {
 	struct unp_head *heads[] = { &unp_dhead, &unp_shead, &unp_sphead,
 				    NULL };
 	struct unp_head **head;
 	struct file *f, **unref;
 	struct unpcb *unp;
 	int i, total;
 
 	unp_taskcount++;
-	UNP_LIST_LOCK();
+	UNP_LINK_RLOCK();
 	/*
 	 * First clear all gc flags from previous runs, apart from
 	 * UNPGC_IGNORE_RIGHTS.
 	 */
 	for (head = heads; *head != NULL; head++)
 		LIST_FOREACH(unp, *head, unp_link)
 			unp->unp_gcflag =
 			    (unp->unp_gcflag & UNPGC_IGNORE_RIGHTS);
 
 	/*
 	 * Scan marking all reachable sockets with UNPGC_REF.  Once a socket
 	 * is reachable all of the sockets it references are reachable.
 	 * Stop the scan once we do a complete loop without discovering
 	 * a new reachable socket.
 	 */
 	do {
 		unp_unreachable = 0;
 		unp_marked = 0;
 		for (head = heads; *head != NULL; head++)
 			LIST_FOREACH(unp, *head, unp_link)
 				unp_gc_process(unp);
 	} while (unp_marked);
-	UNP_LIST_UNLOCK();
+	UNP_LINK_RUNLOCK();
 	if (unp_unreachable == 0)
 		return;
 
 	/*
 	 * Allocate space for a local list of dead unpcbs.
 	 */
 	unref = malloc(unp_unreachable * sizeof(struct file *),
 	    M_TEMP, M_WAITOK);
 
 	/*
 	 * Iterate looking for sockets which have been specifically marked
 	 * as as unreachable and store them locally.
 	 */
 	UNP_LINK_RLOCK();
-	UNP_LIST_LOCK();
 	for (total = 0, head = heads; *head != NULL; head++)
 		LIST_FOREACH(unp, *head, unp_link)
 			if ((unp->unp_gcflag & UNPGC_DEAD) != 0) {
 				f = unp->unp_file;
 				if (unp->unp_msgcount == 0 || f == NULL ||
 				    f->f_count != unp->unp_msgcount)
 					continue;
 				unref[total++] = f;
 				fhold(f);
 				KASSERT(total <= unp_unreachable,
 				    ("unp_gc: incorrect unreachable count."));
 			}
-	UNP_LIST_UNLOCK();
 	UNP_LINK_RUNLOCK();
 
 	/*
 	 * Now flush all sockets, free'ing rights.  This will free the
 	 * struct files associated with these sockets but leave each socket
 	 * with one remaining ref.
 	 */
 	for (i = 0; i < total; i++) {
 		struct socket *so;
 
 		so = unref[i]->f_data;
 		CURVNET_SET(so->so_vnet);
 		sorflush(so);
 		CURVNET_RESTORE();
 	}
 
 	/*
 	 * And finally release the sockets so they can be reclaimed.
 	 */
 	for (i = 0; i < total; i++)
 		fdrop(unref[i], NULL);
 	unp_recycled += total;
 	free(unref, M_TEMP);
 }
 
 static void
 unp_dispose_mbuf(struct mbuf *m)
 {
 
 	if (m)
 		unp_scan(m, unp_freerights);
 }
 
 /*
  * Synchronize against unp_gc, which can trip over data as we are freeing it.
  */
 static void
 unp_dispose(struct socket *so)
 {
 	struct unpcb *unp;
 
 	unp = sotounpcb(so);
-	UNP_LIST_LOCK();
+	UNP_LINK_WLOCK();
 	unp->unp_gcflag |= UNPGC_IGNORE_RIGHTS;
-	UNP_LIST_UNLOCK();
-	unp_dispose_mbuf(so->so_rcv.sb_mb);
+	UNP_LINK_WUNLOCK();
+	if (!SOLISTENING(so))
+		unp_dispose_mbuf(so->so_rcv.sb_mb);
 }
 
 static void
 unp_scan(struct mbuf *m0, void (*op)(struct filedescent **, int))
 {
 	struct mbuf *m;
 	struct cmsghdr *cm;
 	void *data;
 	socklen_t clen, datalen;
 
 	while (m0 != NULL) {
 		for (m = m0; m; m = m->m_next) {
 			if (m->m_type != MT_CONTROL)
 				continue;
 
 			cm = mtod(m, struct cmsghdr *);
 			clen = m->m_len;
 
 			while (cm != NULL) {
 				if (sizeof(*cm) > clen || cm->cmsg_len > clen)
 					break;
 
 				data = CMSG_DATA(cm);
 				datalen = (caddr_t)cm + cm->cmsg_len
 				    - (caddr_t)data;
 
 				if (cm->cmsg_level == SOL_SOCKET &&
 				    cm->cmsg_type == SCM_RIGHTS) {
 					(*op)(data, datalen /
 					    sizeof(struct filedescent *));
 				}
 
 				if (CMSG_SPACE(datalen) < clen) {
 					clen -= CMSG_SPACE(datalen);
 					cm = (struct cmsghdr *)
 					    ((caddr_t)cm + CMSG_SPACE(datalen));
 				} else {
 					clen = 0;
 					cm = NULL;
 				}
 			}
 		}
 		m0 = m0->m_nextpkt;
 	}
 }
 
 /*
  * A helper function called by VFS before socket-type vnode reclamation.
  * For an active vnode it clears unp_vnode pointer and decrements unp_vnode
  * use count.
  */
 void
 vfs_unp_reclaim(struct vnode *vp)
 {
 	struct unpcb *unp;
 	int active;
 
 	ASSERT_VOP_ELOCKED(vp, "vfs_unp_reclaim");
 	KASSERT(vp->v_type == VSOCK,
 	    ("vfs_unp_reclaim: vp->v_type != VSOCK"));
 
 	active = 0;
 	UNP_LINK_WLOCK();
 	VOP_UNP_CONNECT(vp, &unp);
 	if (unp == NULL)
 		goto done;
 	UNP_PCB_LOCK(unp);
 	if (unp->unp_vnode == vp) {
 		VOP_UNP_DETACH(vp);
 		unp->unp_vnode = NULL;
 		active = 1;
 	}
 	UNP_PCB_UNLOCK(unp);
 done:
 	UNP_LINK_WUNLOCK();
 	if (active)
 		vunref(vp);
 }
 
 #ifdef DDB
 static void
 db_print_indent(int indent)
 {
 	int i;
 
 	for (i = 0; i < indent; i++)
 		db_printf(" ");
 }
 
 static void
 db_print_unpflags(int unp_flags)
 {
 	int comma;
 
 	comma = 0;
 	if (unp_flags & UNP_HAVEPC) {
 		db_printf("%sUNP_HAVEPC", comma ? ", " : "");
 		comma = 1;
 	}
 	if (unp_flags & UNP_WANTCRED) {
 		db_printf("%sUNP_WANTCRED", comma ? ", " : "");
 		comma = 1;
 	}
 	if (unp_flags & UNP_CONNWAIT) {
 		db_printf("%sUNP_CONNWAIT", comma ? ", " : "");
 		comma = 1;
 	}
 	if (unp_flags & UNP_CONNECTING) {
 		db_printf("%sUNP_CONNECTING", comma ? ", " : "");
 		comma = 1;
 	}
 	if (unp_flags & UNP_BINDING) {
 		db_printf("%sUNP_BINDING", comma ? ", " : "");
 		comma = 1;
 	}
 }
 
 static void
 db_print_xucred(int indent, struct xucred *xu)
 {
 	int comma, i;
 
 	db_print_indent(indent);
 	db_printf("cr_version: %u   cr_uid: %u   cr_ngroups: %d\n",
 	    xu->cr_version, xu->cr_uid, xu->cr_ngroups);
 	db_print_indent(indent);
 	db_printf("cr_groups: ");
 	comma = 0;
 	for (i = 0; i < xu->cr_ngroups; i++) {
 		db_printf("%s%u", comma ? ", " : "", xu->cr_groups[i]);
 		comma = 1;
 	}
 	db_printf("\n");
 }
 
 static void
 db_print_unprefs(int indent, struct unp_head *uh)
 {
 	struct unpcb *unp;
 	int counter;
 
 	counter = 0;
 	LIST_FOREACH(unp, uh, unp_reflink) {
 		if (counter % 4 == 0)
 			db_print_indent(indent);
 		db_printf("%p  ", unp);
 		if (counter % 4 == 3)
 			db_printf("\n");
 		counter++;
 	}
 	if (counter != 0 && counter % 4 != 0)
 		db_printf("\n");
 }
 
 DB_SHOW_COMMAND(unpcb, db_show_unpcb)
 {
 	struct unpcb *unp;
 
         if (!have_addr) {
                 db_printf("usage: show unpcb <addr>\n");
                 return;
         }
         unp = (struct unpcb *)addr;
 
 	db_printf("unp_socket: %p   unp_vnode: %p\n", unp->unp_socket,
 	    unp->unp_vnode);
 
 	db_printf("unp_ino: %ju   unp_conn: %p\n", (uintmax_t)unp->unp_ino,
 	    unp->unp_conn);
 
 	db_printf("unp_refs:\n");
 	db_print_unprefs(2, &unp->unp_refs);
 
 	/* XXXRW: Would be nice to print the full address, if any. */
 	db_printf("unp_addr: %p\n", unp->unp_addr);
 
 	db_printf("unp_gencnt: %llu\n",
 	    (unsigned long long)unp->unp_gencnt);
 
 	db_printf("unp_flags: %x (", unp->unp_flags);
 	db_print_unpflags(unp->unp_flags);
 	db_printf(")\n");
 
 	db_printf("unp_peercred:\n");
 	db_print_xucred(2, &unp->unp_peercred);
 
 	db_printf("unp_refcount: %u\n", unp->unp_refcount);
 }
 #endif
Index: head/sys/netgraph/bluetooth/socket/ng_btsocket_l2cap.c
===================================================================
--- head/sys/netgraph/bluetooth/socket/ng_btsocket_l2cap.c	(revision 319721)
+++ head/sys/netgraph/bluetooth/socket/ng_btsocket_l2cap.c	(revision 319722)
@@ -1,2979 +1,2971 @@
 /*
  * ng_btsocket_l2cap.c
  */
 
 /*-
  * Copyright (c) 2001-2002 Maksim Yevmenkin <m_evmenkin@yahoo.com>
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  * $Id: ng_btsocket_l2cap.c,v 1.16 2003/09/14 23:29:06 max Exp $
  * $FreeBSD$
  */
 
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/bitstring.h>
 #include <sys/domain.h>
 #include <sys/endian.h>
 #include <sys/errno.h>
 #include <sys/filedesc.h>
 #include <sys/ioccom.h>
 #include <sys/kernel.h>
 #include <sys/lock.h>
 #include <sys/malloc.h>
 #include <sys/mbuf.h>
 #include <sys/mutex.h>
 #include <sys/protosw.h>
 #include <sys/queue.h>
 #include <sys/socket.h>
 #include <sys/socketvar.h>
 #include <sys/sysctl.h>
 #include <sys/taskqueue.h>
 
 #include <net/vnet.h>
 
 #include <netgraph/ng_message.h>
 #include <netgraph/netgraph.h>
 #include <netgraph/bluetooth/include/ng_bluetooth.h>
 #include <netgraph/bluetooth/include/ng_hci.h>
 #include <netgraph/bluetooth/include/ng_l2cap.h>
 #include <netgraph/bluetooth/include/ng_btsocket.h>
 #include <netgraph/bluetooth/include/ng_btsocket_l2cap.h>
 
 /* MALLOC define */
 #ifdef NG_SEPARATE_MALLOC
 static MALLOC_DEFINE(M_NETGRAPH_BTSOCKET_L2CAP, "netgraph_btsocks_l2cap",
 		"Netgraph Bluetooth L2CAP sockets");
 #else
 #define M_NETGRAPH_BTSOCKET_L2CAP M_NETGRAPH
 #endif /* NG_SEPARATE_MALLOC */
 
 /* Netgraph node methods */
 static ng_constructor_t	ng_btsocket_l2cap_node_constructor;
 static ng_rcvmsg_t	ng_btsocket_l2cap_node_rcvmsg;
 static ng_shutdown_t	ng_btsocket_l2cap_node_shutdown;
 static ng_newhook_t	ng_btsocket_l2cap_node_newhook;
 static ng_connect_t	ng_btsocket_l2cap_node_connect;
 static ng_rcvdata_t	ng_btsocket_l2cap_node_rcvdata;
 static ng_disconnect_t	ng_btsocket_l2cap_node_disconnect;
 
 static void		ng_btsocket_l2cap_input   (void *, int);
 static void		ng_btsocket_l2cap_rtclean (void *, int);
 
 /* Netgraph type descriptor */
 static struct ng_type	typestruct = {
 	.version =	NG_ABI_VERSION,
 	.name =		NG_BTSOCKET_L2CAP_NODE_TYPE,
 	.constructor =	ng_btsocket_l2cap_node_constructor,
 	.rcvmsg =	ng_btsocket_l2cap_node_rcvmsg,
 	.shutdown =	ng_btsocket_l2cap_node_shutdown,
 	.newhook =	ng_btsocket_l2cap_node_newhook,
 	.connect =	ng_btsocket_l2cap_node_connect,
 	.rcvdata =	ng_btsocket_l2cap_node_rcvdata,
 	.disconnect =	ng_btsocket_l2cap_node_disconnect,
 };
 
 /* Globals */
 extern int					ifqmaxlen;
 static u_int32_t				ng_btsocket_l2cap_debug_level;
 static node_p					ng_btsocket_l2cap_node;
 static struct ng_bt_itemq			ng_btsocket_l2cap_queue;
 static struct mtx				ng_btsocket_l2cap_queue_mtx;
 static struct task				ng_btsocket_l2cap_queue_task;
 static LIST_HEAD(, ng_btsocket_l2cap_pcb)	ng_btsocket_l2cap_sockets;
 static struct mtx				ng_btsocket_l2cap_sockets_mtx;
 static LIST_HEAD(, ng_btsocket_l2cap_rtentry)	ng_btsocket_l2cap_rt;
 static struct mtx				ng_btsocket_l2cap_rt_mtx;
 static struct task				ng_btsocket_l2cap_rt_task;
 static struct timeval				ng_btsocket_l2cap_lasttime;
 static int					ng_btsocket_l2cap_curpps;
 
 /* Sysctl tree */
 SYSCTL_DECL(_net_bluetooth_l2cap_sockets);
 static SYSCTL_NODE(_net_bluetooth_l2cap_sockets, OID_AUTO, seq, CTLFLAG_RW,
 	0, "Bluetooth SEQPACKET L2CAP sockets family");
 SYSCTL_UINT(_net_bluetooth_l2cap_sockets_seq, OID_AUTO, debug_level,
 	CTLFLAG_RW,
 	&ng_btsocket_l2cap_debug_level, NG_BTSOCKET_WARN_LEVEL,
 	"Bluetooth SEQPACKET L2CAP sockets debug level");
 SYSCTL_UINT(_net_bluetooth_l2cap_sockets_seq, OID_AUTO, queue_len,
 	CTLFLAG_RD,
 	&ng_btsocket_l2cap_queue.len, 0,
 	"Bluetooth SEQPACKET L2CAP sockets input queue length");
 SYSCTL_UINT(_net_bluetooth_l2cap_sockets_seq, OID_AUTO, queue_maxlen,
 	CTLFLAG_RD,
 	&ng_btsocket_l2cap_queue.maxlen, 0,
 	"Bluetooth SEQPACKET L2CAP sockets input queue max. length");
 SYSCTL_UINT(_net_bluetooth_l2cap_sockets_seq, OID_AUTO, queue_drops,
 	CTLFLAG_RD,
 	&ng_btsocket_l2cap_queue.drops, 0,
 	"Bluetooth SEQPACKET L2CAP sockets input queue drops");
 
 /* Debug */
 #define NG_BTSOCKET_L2CAP_INFO \
 	if (ng_btsocket_l2cap_debug_level >= NG_BTSOCKET_INFO_LEVEL && \
 	    ppsratecheck(&ng_btsocket_l2cap_lasttime, &ng_btsocket_l2cap_curpps, 1)) \
 		printf
 
 #define NG_BTSOCKET_L2CAP_WARN \
 	if (ng_btsocket_l2cap_debug_level >= NG_BTSOCKET_WARN_LEVEL && \
 	    ppsratecheck(&ng_btsocket_l2cap_lasttime, &ng_btsocket_l2cap_curpps, 1)) \
 		printf
 
 #define NG_BTSOCKET_L2CAP_ERR \
 	if (ng_btsocket_l2cap_debug_level >= NG_BTSOCKET_ERR_LEVEL && \
 	    ppsratecheck(&ng_btsocket_l2cap_lasttime, &ng_btsocket_l2cap_curpps, 1)) \
 		printf
 
 #define NG_BTSOCKET_L2CAP_ALERT \
 	if (ng_btsocket_l2cap_debug_level >= NG_BTSOCKET_ALERT_LEVEL && \
 	    ppsratecheck(&ng_btsocket_l2cap_lasttime, &ng_btsocket_l2cap_curpps, 1)) \
 		printf
 
 /* 
  * Netgraph message processing routines
  */
 
 static int ng_btsocket_l2cap_process_l2ca_con_req_rsp
 	(struct ng_mesg *, ng_btsocket_l2cap_rtentry_p);
 static int ng_btsocket_l2cap_process_l2ca_con_rsp_rsp
 	(struct ng_mesg *, ng_btsocket_l2cap_rtentry_p);
 static int ng_btsocket_l2cap_process_l2ca_con_ind
 	(struct ng_mesg *, ng_btsocket_l2cap_rtentry_p);
 
 static int ng_btsocket_l2cap_process_l2ca_cfg_req_rsp
 	(struct ng_mesg *, ng_btsocket_l2cap_rtentry_p);
 static int ng_btsocket_l2cap_process_l2ca_cfg_rsp_rsp
 	(struct ng_mesg *, ng_btsocket_l2cap_rtentry_p);
 static int ng_btsocket_l2cap_process_l2ca_cfg_ind
 	(struct ng_mesg *, ng_btsocket_l2cap_rtentry_p);
 
 static int ng_btsocket_l2cap_process_l2ca_discon_rsp
 	(struct ng_mesg *, ng_btsocket_l2cap_rtentry_p);
 static int ng_btsocket_l2cap_process_l2ca_discon_ind
 	(struct ng_mesg *, ng_btsocket_l2cap_rtentry_p);
 
 static int ng_btsocket_l2cap_process_l2ca_write_rsp
 	(struct ng_mesg *, ng_btsocket_l2cap_rtentry_p);
 
 /*
  * Send L2CA_xxx messages to the lower layer
  */
 
 static int  ng_btsocket_l2cap_send_l2ca_con_req
 	(ng_btsocket_l2cap_pcb_p);
 static int  ng_btsocket_l2cap_send_l2ca_con_rsp_req
 	(u_int32_t, ng_btsocket_l2cap_rtentry_p, bdaddr_p, int, int, int, int);
 static int  ng_btsocket_l2cap_send_l2ca_cfg_req
 	(ng_btsocket_l2cap_pcb_p);
 static int  ng_btsocket_l2cap_send_l2ca_cfg_rsp
 	(ng_btsocket_l2cap_pcb_p);
 static int  ng_btsocket_l2cap_send_l2ca_discon_req
 	(u_int32_t, ng_btsocket_l2cap_pcb_p);
 
 static int ng_btsocket_l2cap_send2
 	(ng_btsocket_l2cap_pcb_p);
 
 /* 
  * Timeout processing routines
  */
 
 static void ng_btsocket_l2cap_timeout         (ng_btsocket_l2cap_pcb_p);
 static void ng_btsocket_l2cap_untimeout       (ng_btsocket_l2cap_pcb_p);
 static void ng_btsocket_l2cap_process_timeout (void *);
 
 /* 
  * Other stuff 
  */
 
 static ng_btsocket_l2cap_pcb_p     ng_btsocket_l2cap_pcb_by_addr(bdaddr_p, int);
 static ng_btsocket_l2cap_pcb_p     ng_btsocket_l2cap_pcb_by_token(u_int32_t);
 static ng_btsocket_l2cap_pcb_p     ng_btsocket_l2cap_pcb_by_cid (bdaddr_p, int,int);
 static int                         ng_btsocket_l2cap_result2errno(int);
 
 static int ng_btsock_l2cap_addrtype_to_linktype(int addrtype);
 
 #define ng_btsocket_l2cap_wakeup_input_task() \
 	taskqueue_enqueue(taskqueue_swi_giant, &ng_btsocket_l2cap_queue_task)
 
 #define ng_btsocket_l2cap_wakeup_route_task() \
 	taskqueue_enqueue(taskqueue_swi_giant, &ng_btsocket_l2cap_rt_task)
 
 
 
 int ng_btsock_l2cap_addrtype_to_linktype(int addrtype)
 {
 	switch(addrtype){
 	case BDADDR_LE_PUBLIC:
 		return NG_HCI_LINK_LE_PUBLIC;
 	case BDADDR_LE_RANDOM:
 		return NG_HCI_LINK_LE_RANDOM;
 	default:
 		return NG_HCI_LINK_ACL;
 	}
 }
 
 
 /*****************************************************************************
  *****************************************************************************
  **                        Netgraph node interface
  *****************************************************************************
  *****************************************************************************/
 
 /*
  * Netgraph node constructor. Do not allow to create node of this type.
  */
 
 static int
 ng_btsocket_l2cap_node_constructor(node_p node)
 {
 	return (EINVAL);
 } /* ng_btsocket_l2cap_node_constructor */
 
 /*
  * Do local shutdown processing. Let old node go and create new fresh one.
  */
 
 static int
 ng_btsocket_l2cap_node_shutdown(node_p node)
 {
 	int	error = 0;
 
 	NG_NODE_UNREF(node);
 
 	/* Create new node */
 	error = ng_make_node_common(&typestruct, &ng_btsocket_l2cap_node);
 	if (error != 0) {
 		NG_BTSOCKET_L2CAP_ALERT(
 "%s: Could not create Netgraph node, error=%d\n", __func__, error);
 
 		ng_btsocket_l2cap_node = NULL;
 
 		return (error);
 	}
 
 	error = ng_name_node(ng_btsocket_l2cap_node,
 				NG_BTSOCKET_L2CAP_NODE_TYPE);
 	if (error != 0) {
 		NG_BTSOCKET_L2CAP_ALERT(
 "%s: Could not name Netgraph node, error=%d\n", __func__, error);
 
 		NG_NODE_UNREF(ng_btsocket_l2cap_node);
 		ng_btsocket_l2cap_node = NULL;
 
 		return (error);
 	}
 		
 	return (0);
 } /* ng_btsocket_l2cap_node_shutdown */
 
 /*
  * We allow any hook to be connected to the node.
  */
 
 static int
 ng_btsocket_l2cap_node_newhook(node_p node, hook_p hook, char const *name)
 {
 	return (0);
 } /* ng_btsocket_l2cap_node_newhook */
 
 /* 
  * Just say "YEP, that's OK by me!"
  */
 
 static int
 ng_btsocket_l2cap_node_connect(hook_p hook)
 {
 	NG_HOOK_SET_PRIVATE(hook, NULL);
 	NG_HOOK_REF(hook); /* Keep extra reference to the hook */
 
 #if 0
 	NG_HOOK_FORCE_QUEUE(NG_HOOK_PEER(hook));
 	NG_HOOK_FORCE_QUEUE(hook);
 #endif
 
 	return (0);
 } /* ng_btsocket_l2cap_node_connect */
 
 /*
  * Hook disconnection. Schedule route cleanup task
  */
 
 static int
 ng_btsocket_l2cap_node_disconnect(hook_p hook)
 {
 	/*
 	 * If hook has private information than we must have this hook in
 	 * the routing table and must schedule cleaning for the routing table.
 	 * Otherwise hook was connected but we never got "hook_info" message,
 	 * so we have never added this hook to the routing table and it save
 	 * to just delete it.
 	 */
 
 	if (NG_HOOK_PRIVATE(hook) != NULL)
 		return (ng_btsocket_l2cap_wakeup_route_task());
 
 	NG_HOOK_UNREF(hook); /* Remove extra reference */
 
 	return (0);
 } /* ng_btsocket_l2cap_node_disconnect */
 
 /*
  * Process incoming messages 
  */
 
 static int
 ng_btsocket_l2cap_node_rcvmsg(node_p node, item_p item, hook_p hook)
 {
 	struct ng_mesg	*msg = NGI_MSG(item); /* item still has message */
 	int		 error = 0;
 
 	if (msg != NULL && msg->header.typecookie == NGM_L2CAP_COOKIE) {
 		mtx_lock(&ng_btsocket_l2cap_queue_mtx);
 		if (NG_BT_ITEMQ_FULL(&ng_btsocket_l2cap_queue)) {
 			NG_BTSOCKET_L2CAP_ERR(
 "%s: Input queue is full (msg)\n", __func__);
 
 			NG_BT_ITEMQ_DROP(&ng_btsocket_l2cap_queue);
 			NG_FREE_ITEM(item);
 			error = ENOBUFS;
 		} else {
 			if (hook != NULL) {
 				NG_HOOK_REF(hook);
 				NGI_SET_HOOK(item, hook);
 			}
 
 			NG_BT_ITEMQ_ENQUEUE(&ng_btsocket_l2cap_queue, item);
 			error = ng_btsocket_l2cap_wakeup_input_task();
 		}
 		mtx_unlock(&ng_btsocket_l2cap_queue_mtx);
 	} else {
 		NG_FREE_ITEM(item);
 		error = EINVAL;
 	}
 
 	return (error);
 } /* ng_btsocket_l2cap_node_rcvmsg */
 
 /*
  * Receive data on a hook
  */
 
 static int
 ng_btsocket_l2cap_node_rcvdata(hook_p hook, item_p item)
 {
 	int	error = 0;
 
 	mtx_lock(&ng_btsocket_l2cap_queue_mtx);
 	if (NG_BT_ITEMQ_FULL(&ng_btsocket_l2cap_queue)) {
 		NG_BTSOCKET_L2CAP_ERR(
 "%s: Input queue is full (data)\n", __func__);
 
 		NG_BT_ITEMQ_DROP(&ng_btsocket_l2cap_queue);
 		NG_FREE_ITEM(item);
 		error = ENOBUFS;
 	} else {
 		NG_HOOK_REF(hook);
 		NGI_SET_HOOK(item, hook);
 
 		NG_BT_ITEMQ_ENQUEUE(&ng_btsocket_l2cap_queue, item);
 		error = ng_btsocket_l2cap_wakeup_input_task();
 	}
 	mtx_unlock(&ng_btsocket_l2cap_queue_mtx);
 
 	return (error);
 } /* ng_btsocket_l2cap_node_rcvdata */
 
 /*
  * Process L2CA_Connect respose. Socket layer must have initiated connection,
  * so we have to have a socket associated with message token.
  */
 
 static int
 ng_btsocket_l2cap_process_l2ca_con_req_rsp(struct ng_mesg *msg,
 		ng_btsocket_l2cap_rtentry_p rt)
 {
 	ng_l2cap_l2ca_con_op	*op = NULL;
 	ng_btsocket_l2cap_pcb_t	*pcb = NULL;
 	int			 error = 0;
 
 	if (msg->header.arglen != sizeof(*op))
 		return (EMSGSIZE);
 
 	op = (ng_l2cap_l2ca_con_op *)(msg->data);
 
 	mtx_lock(&ng_btsocket_l2cap_sockets_mtx);
 
 	/* Look for the socket with the token */
 	pcb = ng_btsocket_l2cap_pcb_by_token(msg->header.token);
 	if (pcb == NULL) {
 		mtx_unlock(&ng_btsocket_l2cap_sockets_mtx);
 		return (ENOENT);
 	}
 
 	mtx_lock(&pcb->pcb_mtx);
 
 	NG_BTSOCKET_L2CAP_INFO(
 "%s: Got L2CA_Connect response, token=%d, src bdaddr=%x:%x:%x:%x:%x:%x, " \
 "dst bdaddr=%x:%x:%x:%x:%x:%x, psm=%d, lcid=%d, result=%d, status=%d, " \
 "state=%d\n",	__func__, msg->header.token,
 		pcb->src.b[5], pcb->src.b[4], pcb->src.b[3],
 		pcb->src.b[2], pcb->src.b[1], pcb->src.b[0],
 		pcb->dst.b[5], pcb->dst.b[4], pcb->dst.b[3],
 		pcb->dst.b[2], pcb->dst.b[1], pcb->dst.b[0],
 		pcb->psm, op->lcid, op->result, op->status,
 		pcb->state);
 
 	if (pcb->state != NG_BTSOCKET_L2CAP_CONNECTING) {
 		mtx_unlock(&pcb->pcb_mtx);
 		mtx_unlock(&ng_btsocket_l2cap_sockets_mtx);
 
 		return (ENOENT);
 	}
 
 	ng_btsocket_l2cap_untimeout(pcb);
 
 	if (op->result == NG_L2CAP_PENDING) {
 		ng_btsocket_l2cap_timeout(pcb);
 		mtx_unlock(&pcb->pcb_mtx);
 		mtx_unlock(&ng_btsocket_l2cap_sockets_mtx);
 
 		return (0);
 	}
 
 	if (op->result == NG_L2CAP_SUCCESS){
 		if((pcb->idtype == NG_L2CAP_L2CA_IDTYPE_ATT)||
 		   (pcb->idtype == NG_L2CAP_L2CA_IDTYPE_SMP)){
 			pcb->encryption = op->encryption;					pcb->cid = op->lcid;	
 			if(pcb->need_encrypt && !(pcb->encryption)){
 				ng_btsocket_l2cap_timeout(pcb);
 				pcb->state = NG_BTSOCKET_L2CAP_W4_ENC_CHANGE;
 			}else{
 				pcb->state = NG_BTSOCKET_L2CAP_OPEN;
 				soisconnected(pcb->so);
 			}
 		}else{
 			/*
 			 * Channel is now open, so update local channel ID and 
 			 * start configuration process. Source and destination
 			 * addresses as well as route must be already set.
 			 */
 			
 			pcb->cid = op->lcid;
 			pcb->encryption = op->encryption;
 			error = ng_btsocket_l2cap_send_l2ca_cfg_req(pcb);
 			if (error != 0) {
 				/* Send disconnect request with "zero" token */
 				ng_btsocket_l2cap_send_l2ca_discon_req(0, pcb);
 				
 				/* ... and close the socket */
 				pcb->state = NG_BTSOCKET_L2CAP_CLOSED;
 				soisdisconnected(pcb->so);
 			} else {
 				pcb->cfg_state = NG_BTSOCKET_L2CAP_CFG_IN_SENT;
 				pcb->state = NG_BTSOCKET_L2CAP_CONFIGURING;
 				
 				ng_btsocket_l2cap_timeout(pcb);
 			}
 		}
 	} else {
 		/*
 		 * We have failed to open connection, so convert result
 		 * code to "errno" code and disconnect the socket. Channel
 		 * already has been closed.
 		 */
 
 		pcb->so->so_error = ng_btsocket_l2cap_result2errno(op->result);
 		pcb->state = NG_BTSOCKET_L2CAP_CLOSED;
 		soisdisconnected(pcb->so); 
 	}
 	mtx_unlock(&pcb->pcb_mtx);
 	mtx_unlock(&ng_btsocket_l2cap_sockets_mtx);
 
 	return (error);
 } /* ng_btsocket_l2cap_process_l2ca_con_req_rsp */
 
 /*
  * Process L2CA_ConnectRsp response
  */
 
 static int
 ng_btsocket_l2cap_process_l2ca_con_rsp_rsp(struct ng_mesg *msg,
 		ng_btsocket_l2cap_rtentry_p rt)
 {
 	ng_l2cap_l2ca_con_rsp_op	*op = NULL;
 	ng_btsocket_l2cap_pcb_t		*pcb = NULL;
 
 	if (msg->header.arglen != sizeof(*op)) 
 		return (EMSGSIZE);
 
 	op = (ng_l2cap_l2ca_con_rsp_op *)(msg->data);
 
 	mtx_lock(&ng_btsocket_l2cap_sockets_mtx);
 
 	/* Look for the socket with the token */
 	pcb = ng_btsocket_l2cap_pcb_by_token(msg->header.token);
 	if (pcb == NULL) {
 		mtx_unlock(&ng_btsocket_l2cap_sockets_mtx);
 		return (ENOENT);
 	}
 
 	mtx_lock(&pcb->pcb_mtx);
 
 	NG_BTSOCKET_L2CAP_INFO(
 "%s: Got L2CA_ConnectRsp response, token=%d, src bdaddr=%x:%x:%x:%x:%x:%x, " \
 "dst bdaddr=%x:%x:%x:%x:%x:%x, psm=%d, lcid=%d, result=%d, state=%d\n",
 		__func__, msg->header.token,
 		pcb->src.b[5], pcb->src.b[4], pcb->src.b[3],
 		pcb->src.b[2], pcb->src.b[1], pcb->src.b[0],
 		pcb->dst.b[5], pcb->dst.b[4], pcb->dst.b[3],
 		pcb->dst.b[2], pcb->dst.b[1], pcb->dst.b[0],
 		pcb->psm, pcb->cid, op->result, pcb->state);
 
 	if (pcb->state != NG_BTSOCKET_L2CAP_CONNECTING) {
 		mtx_unlock(&pcb->pcb_mtx);
 		mtx_unlock(&ng_btsocket_l2cap_sockets_mtx);
 
 		return (ENOENT);
 	}
 
 	ng_btsocket_l2cap_untimeout(pcb);
 
 	/* Check the result and disconnect the socket on failure */
 	if (op->result != NG_L2CAP_SUCCESS) {
 		/* Close the socket - channel already closed */
 		pcb->so->so_error = ng_btsocket_l2cap_result2errno(op->result);
 		pcb->state = NG_BTSOCKET_L2CAP_CLOSED;
 		soisdisconnected(pcb->so);
 	} else {
 		/* Move to CONFIGURING state and wait for CONFIG_IND */
 		pcb->cfg_state = 0;
 		pcb->state = NG_BTSOCKET_L2CAP_CONFIGURING;
 		ng_btsocket_l2cap_timeout(pcb);
 	}
 
 	mtx_unlock(&pcb->pcb_mtx);
 	mtx_unlock(&ng_btsocket_l2cap_sockets_mtx);
 
 	return (0);
 } /* ng_btsocket_process_l2ca_con_rsp_rsp */
 
 /*
  * Process L2CA_Connect indicator. Find socket that listens on address 
  * and PSM. Find exact or closest match. Create new socket and initiate 
  * connection.
  */
 
 static int
 ng_btsocket_l2cap_process_l2ca_con_ind(struct ng_mesg *msg,
 		ng_btsocket_l2cap_rtentry_p rt)
 {
 	ng_l2cap_l2ca_con_ind_ip	*ip = NULL;
 	ng_btsocket_l2cap_pcb_t		*pcb = NULL, *pcb1 = NULL;
 	int				 error = 0;
 	u_int32_t			 token = 0;
 	u_int16_t			 result = 0;
 
 	if (msg->header.arglen != sizeof(*ip))
 		return (EMSGSIZE);
 
 	ip = (ng_l2cap_l2ca_con_ind_ip *)(msg->data);
 
 	NG_BTSOCKET_L2CAP_INFO(
 "%s: Got L2CA_Connect indicator, src bdaddr=%x:%x:%x:%x:%x:%x, " \
 "dst bdaddr=%x:%x:%x:%x:%x:%x, psm=%d, lcid=%d, ident=%d\n",
 		__func__,
 		rt->src.b[5], rt->src.b[4], rt->src.b[3],
 		rt->src.b[2], rt->src.b[1], rt->src.b[0],
 		ip->bdaddr.b[5], ip->bdaddr.b[4], ip->bdaddr.b[3],
 		ip->bdaddr.b[2], ip->bdaddr.b[1], ip->bdaddr.b[0],
 		ip->psm, ip->lcid, ip->ident);
 
 	mtx_lock(&ng_btsocket_l2cap_sockets_mtx);
 	
 	pcb = ng_btsocket_l2cap_pcb_by_addr(&rt->src, ip->psm);
 	if (pcb != NULL) {
-		struct socket	*so1 = NULL;
+		struct socket *so1;
 
 		mtx_lock(&pcb->pcb_mtx);
 
-		/*
-		 * First check the pending connections queue and if we have
-		 * space then create new socket and set proper source address.
-		 */
-
-		if (pcb->so->so_qlen <= pcb->so->so_qlimit) {
-			CURVNET_SET(pcb->so->so_vnet);
-			so1 = sonewconn(pcb->so, 0);
-			CURVNET_RESTORE();
-		}
-
+		CURVNET_SET(pcb->so->so_vnet);
+		so1 = sonewconn(pcb->so, 0);
+		CURVNET_RESTORE();
 		if (so1 == NULL) {
 			result = NG_L2CAP_NO_RESOURCES;
 			goto respond;
 		}
 
 		/*
 		 * If we got here than we have created new socket. So complete 
 		 * connection. If we we listening on specific address then copy 
 		 * source address from listening socket, otherwise copy source 
 		 * address from hook's routing information.
 		 */
 
 		pcb1 = so2l2cap_pcb(so1);
 		KASSERT((pcb1 != NULL),
 ("%s: pcb1 == NULL\n", __func__));
 
  		mtx_lock(&pcb1->pcb_mtx);
 
 		if (bcmp(&pcb->src, NG_HCI_BDADDR_ANY, sizeof(pcb->src)) != 0)
 			bcopy(&pcb->src, &pcb1->src, sizeof(pcb1->src));
 		else
 			bcopy(&rt->src, &pcb1->src, sizeof(pcb1->src));
 
 		pcb1->flags &= ~NG_BTSOCKET_L2CAP_CLIENT;
 
 		bcopy(&ip->bdaddr, &pcb1->dst, sizeof(pcb1->dst));
 		pcb1->psm = ip->psm;
 		pcb1->cid = ip->lcid;
 		pcb1->rt = rt;
 
 		/* Copy socket settings */
 		pcb1->imtu = pcb->imtu;
 		bcopy(&pcb->oflow, &pcb1->oflow, sizeof(pcb1->oflow));
 		pcb1->flush_timo = pcb->flush_timo;
 
 		token = pcb1->token;
 	} else
 		/* Nobody listens on requested BDADDR/PSM */
 		result = NG_L2CAP_PSM_NOT_SUPPORTED;
 
 respond:
 	error = ng_btsocket_l2cap_send_l2ca_con_rsp_req(token, rt,
 							&ip->bdaddr,
 							ip->ident, ip->lcid,
 							result,ip->linktype);
 	if (pcb1 != NULL) {
 		if (error != 0) {
 			pcb1->so->so_error = error;
 			pcb1->state = NG_BTSOCKET_L2CAP_CLOSED;
 			soisdisconnected(pcb1->so);
 		} else {
 			pcb1->state = NG_BTSOCKET_L2CAP_CONNECTING;
 			soisconnecting(pcb1->so);
 
 			ng_btsocket_l2cap_timeout(pcb1);
 		}
 
 		mtx_unlock(&pcb1->pcb_mtx);
 	}
 
 	if (pcb != NULL)
 		mtx_unlock(&pcb->pcb_mtx);
 
 	mtx_unlock(&ng_btsocket_l2cap_sockets_mtx);
 
 	return (error);
 } /* ng_btsocket_l2cap_process_l2ca_con_ind */
 /*Encryption Change*/
 static int ng_btsocket_l2cap_process_l2ca_enc_change(struct ng_mesg *msg, ng_btsocket_l2cap_rtentry_p rt)
 {
 	ng_l2cap_l2ca_enc_chg_op	*op = NULL;
 	ng_btsocket_l2cap_pcb_t		*pcb = NULL;
 
 
 	if (msg->header.arglen != sizeof(*op))
 		return (EMSGSIZE);
 
 	op = (ng_l2cap_l2ca_enc_chg_op *)(msg->data);
 
 	mtx_lock(&ng_btsocket_l2cap_sockets_mtx);
 
 	pcb = ng_btsocket_l2cap_pcb_by_cid(&rt->src, op->lcid,
 					   op->idtype);
 	if (pcb == NULL) {
 		mtx_unlock(&ng_btsocket_l2cap_sockets_mtx);
 		return (ENOENT);
 	}
 
 	mtx_lock(&pcb->pcb_mtx);
 	pcb->encryption = op->result;
 	
 	if(pcb->need_encrypt){
 		ng_btsocket_l2cap_untimeout(pcb);		
 		if(pcb->state != NG_BTSOCKET_L2CAP_W4_ENC_CHANGE){
 			NG_BTSOCKET_L2CAP_WARN("%s: Invalid pcb status %d",
 					       __func__, pcb->state);
 		}else if(pcb->encryption){
 			pcb->state = NG_BTSOCKET_L2CAP_OPEN;
 			soisconnected(pcb->so);
 		}else{
 			pcb->so->so_error = EPERM;
 			ng_btsocket_l2cap_send_l2ca_discon_req(0, pcb);
 			pcb->state = NG_BTSOCKET_L2CAP_CLOSED;
 			soisdisconnected(pcb->so);
 		}
 	}
 	mtx_unlock(&pcb->pcb_mtx);
 	mtx_unlock(&ng_btsocket_l2cap_sockets_mtx);
 
 	return 0;
 }
 /*
  * Process L2CA_Config response
  */
 
 static int
 ng_btsocket_l2cap_process_l2ca_cfg_req_rsp(struct ng_mesg *msg,
 		ng_btsocket_l2cap_rtentry_p rt)
 {
 	ng_l2cap_l2ca_cfg_op	*op = NULL;
 	ng_btsocket_l2cap_pcb_p	 pcb = NULL;
 
 	if (msg->header.arglen != sizeof(*op))
 		return (EMSGSIZE);
 
 	op = (ng_l2cap_l2ca_cfg_op *)(msg->data);
 
 	mtx_lock(&ng_btsocket_l2cap_sockets_mtx);
 
 	/* 
 	 * Socket must have issued a Configure request, so we must have a 
 	 * socket that wants to be configured. Use Netgraph message token 
 	 * to find it
 	 */
 
 	pcb = ng_btsocket_l2cap_pcb_by_token(msg->header.token);
 	if (pcb == NULL) {
 		/*
 		 * XXX FIXME what to do here? We could not find a
 		 * socket with requested token. We even can not send
 		 * Disconnect, because we do not know channel ID
 		 */
 
 		mtx_unlock(&ng_btsocket_l2cap_sockets_mtx);
 		return (ENOENT);
 	}
 
 	mtx_lock(&pcb->pcb_mtx);
 
         NG_BTSOCKET_L2CAP_INFO(
 "%s: Got L2CA_Config response, token=%d, src bdaddr=%x:%x:%x:%x:%x:%x, " \
 "dst bdaddr=%x:%x:%x:%x:%x:%x, psm=%d, lcid=%d, result=%d, state=%d, " \
 "cfg_state=%x\n",
 		__func__, msg->header.token,
 		pcb->src.b[5], pcb->src.b[4], pcb->src.b[3],
 		pcb->src.b[2], pcb->src.b[1], pcb->src.b[0],
 		pcb->dst.b[5], pcb->dst.b[4], pcb->dst.b[3],
 		pcb->dst.b[2], pcb->dst.b[1], pcb->dst.b[0],
 		pcb->psm, pcb->cid, op->result, pcb->state, pcb->cfg_state);
 
 	if (pcb->state != NG_BTSOCKET_L2CAP_CONFIGURING) {
 		mtx_unlock(&pcb->pcb_mtx);
 		mtx_unlock(&ng_btsocket_l2cap_sockets_mtx);
 
 		return (ENOENT);
 	}
 
 	if (op->result == NG_L2CAP_SUCCESS) {
 		/*
 		 * XXX FIXME Actually set flush and link timeout.
 		 * Set QoS here if required. Resolve conficts (flush_timo). 
 		 * Save incoming MTU (peer's outgoing MTU) and outgoing flow 
 		 * spec.
 		 */
 
 		pcb->imtu = op->imtu;
 		bcopy(&op->oflow, &pcb->oflow, sizeof(pcb->oflow));
 		pcb->flush_timo = op->flush_timo;
 
 		/*
 		 * We have configured incoming side, so record it and check 
 		 * if configuration is complete. If complete then mark socket
 		 * as connected, otherwise wait for the peer.
 		 */
 
 		pcb->cfg_state &= ~NG_BTSOCKET_L2CAP_CFG_IN_SENT;
 		pcb->cfg_state |= NG_BTSOCKET_L2CAP_CFG_IN;
 
 		if (pcb->cfg_state == NG_BTSOCKET_L2CAP_CFG_BOTH) {
 			/* Configuration complete - mark socket as open */
 			ng_btsocket_l2cap_untimeout(pcb);
 			pcb->state = NG_BTSOCKET_L2CAP_OPEN;
 			soisconnected(pcb->so); 
 		} 
 	} else {
 		/*
 		 * Something went wrong. Could be unacceptable parameters,
 		 * reject or unknown option. That's too bad, but we will
 		 * not negotiate. Send Disconnect and close the channel.
 		 */
 
 		ng_btsocket_l2cap_untimeout(pcb);
 
 		switch (op->result) {
 		case NG_L2CAP_UNACCEPTABLE_PARAMS:
 		case NG_L2CAP_UNKNOWN_OPTION:
 			pcb->so->so_error = EINVAL;
 			break;
 
 		default:
 			pcb->so->so_error = ECONNRESET;
 			break;
 		}
 
 		/* Send disconnect with "zero" token */
 		ng_btsocket_l2cap_send_l2ca_discon_req(0, pcb);
 
 		/* ... and close the socket */
 		pcb->state = NG_BTSOCKET_L2CAP_CLOSED;
 		soisdisconnected(pcb->so);
 	}
 
 	mtx_unlock(&pcb->pcb_mtx);
 	mtx_unlock(&ng_btsocket_l2cap_sockets_mtx);
 
 	return (0);
 } /* ng_btsocket_l2cap_process_l2ca_cfg_req_rsp */
 
 /*
  * Process L2CA_ConfigRsp response
  */
 
 static int
 ng_btsocket_l2cap_process_l2ca_cfg_rsp_rsp(struct ng_mesg *msg,
 		ng_btsocket_l2cap_rtentry_p rt)
 {
 	ng_l2cap_l2ca_cfg_rsp_op	*op = NULL;
 	ng_btsocket_l2cap_pcb_t		*pcb = NULL;
 	int				 error = 0;
 
 	if (msg->header.arglen != sizeof(*op))
 		return (EMSGSIZE);
 
 	op = (ng_l2cap_l2ca_cfg_rsp_op *)(msg->data);
 
 	mtx_lock(&ng_btsocket_l2cap_sockets_mtx);
 
 	/* Look for the socket with the token */
 	pcb = ng_btsocket_l2cap_pcb_by_token(msg->header.token);
 	if (pcb == NULL) {
 		mtx_unlock(&ng_btsocket_l2cap_sockets_mtx);
 		return (ENOENT);
 	}
 
 	mtx_lock(&pcb->pcb_mtx);
 
         NG_BTSOCKET_L2CAP_INFO(
 "%s: Got L2CA_ConfigRsp response, token=%d, src bdaddr=%x:%x:%x:%x:%x:%x, " \
 "dst bdaddr=%x:%x:%x:%x:%x:%x, psm=%d, lcid=%d, result=%d, state=%d, " \
 "cfg_state=%x\n",
 		__func__, msg->header.token,
 		pcb->src.b[5], pcb->src.b[4], pcb->src.b[3],
 		pcb->src.b[2], pcb->src.b[1], pcb->src.b[0],
 		pcb->dst.b[5], pcb->dst.b[4], pcb->dst.b[3],
 		pcb->dst.b[2], pcb->dst.b[1], pcb->dst.b[0],
 		pcb->psm, pcb->cid, op->result, pcb->state, pcb->cfg_state);
 
 	if (pcb->state != NG_BTSOCKET_L2CAP_CONFIGURING) {
 		mtx_unlock(&pcb->pcb_mtx);
 		mtx_unlock(&ng_btsocket_l2cap_sockets_mtx);
 
 		return (ENOENT);
 	}
 
 	/* Check the result and disconnect socket of failure */
 	if (op->result != NG_L2CAP_SUCCESS)
 		goto disconnect;
 
 	/*
 	 * Now we done with remote side configuration. Configure local 
 	 * side if we have not done it yet.
 	 */
 
 	pcb->cfg_state &= ~NG_BTSOCKET_L2CAP_CFG_OUT_SENT;
 	pcb->cfg_state |= NG_BTSOCKET_L2CAP_CFG_OUT;
 
 	if (pcb->cfg_state == NG_BTSOCKET_L2CAP_CFG_BOTH) {
 		/* Configuration complete - mask socket as open */
 		ng_btsocket_l2cap_untimeout(pcb);
 		pcb->state = NG_BTSOCKET_L2CAP_OPEN;
 		soisconnected(pcb->so);
 	} else {
 		if (!(pcb->cfg_state & NG_BTSOCKET_L2CAP_CFG_IN_SENT)) {
 			/* Send L2CA_Config request - incoming path */
 			error = ng_btsocket_l2cap_send_l2ca_cfg_req(pcb);
 			if (error != 0)
 				goto disconnect;
 
 			pcb->cfg_state |= NG_BTSOCKET_L2CAP_CFG_IN_SENT;
 		}
 	}
 
 	mtx_unlock(&pcb->pcb_mtx);
 	mtx_unlock(&ng_btsocket_l2cap_sockets_mtx);
 
 	return (error);
 
 disconnect:
 	ng_btsocket_l2cap_untimeout(pcb);
 
 	/* Send disconnect with "zero" token */
 	ng_btsocket_l2cap_send_l2ca_discon_req(0, pcb);
 
 	/* ... and close the socket */
 	pcb->state = NG_BTSOCKET_L2CAP_CLOSED;
 	soisdisconnected(pcb->so);
 
 	mtx_unlock(&pcb->pcb_mtx);
 	mtx_unlock(&ng_btsocket_l2cap_sockets_mtx);
 
 	return (error);
 } /* ng_btsocket_l2cap_process_l2ca_cfg_rsp_rsp */
 
 /*
  * Process L2CA_Config indicator
  */
 
 static int
 ng_btsocket_l2cap_process_l2ca_cfg_ind(struct ng_mesg *msg,
 		ng_btsocket_l2cap_rtentry_p rt)
 {
 	ng_l2cap_l2ca_cfg_ind_ip	*ip = NULL;
 	ng_btsocket_l2cap_pcb_t		*pcb = NULL;
 	int				 error = 0;
 
 	if (msg->header.arglen != sizeof(*ip))
 		return (EMSGSIZE);
 
 	ip = (ng_l2cap_l2ca_cfg_ind_ip *)(msg->data);
 
 	mtx_lock(&ng_btsocket_l2cap_sockets_mtx);
 
 	/* Check for the open socket that has given channel ID */
 	pcb = ng_btsocket_l2cap_pcb_by_cid(&rt->src, ip->lcid,
 					   NG_L2CAP_L2CA_IDTYPE_BREDR);
 	if (pcb == NULL) {
 		mtx_unlock(&ng_btsocket_l2cap_sockets_mtx);
 		return (ENOENT);
 	}
 
 	mtx_lock(&pcb->pcb_mtx);
 
         NG_BTSOCKET_L2CAP_INFO(
 "%s: Got L2CA_Config indicator, src bdaddr=%x:%x:%x:%x:%x:%x, " \
 "dst bdaddr=%x:%x:%x:%x:%x:%x, psm=%d, lcid=%d, state=%d, cfg_state=%x\n",
 		__func__,
 		pcb->src.b[5], pcb->src.b[4], pcb->src.b[3],
 		pcb->src.b[2], pcb->src.b[1], pcb->src.b[0],
 		pcb->dst.b[5], pcb->dst.b[4], pcb->dst.b[3],
 		pcb->dst.b[2], pcb->dst.b[1], pcb->dst.b[0],
 		pcb->psm, pcb->cid, pcb->state, pcb->cfg_state);
 
 	/* XXX FIXME re-configuration on open socket */
  	if (pcb->state != NG_BTSOCKET_L2CAP_CONFIGURING) {
 		mtx_unlock(&pcb->pcb_mtx);
 		mtx_unlock(&ng_btsocket_l2cap_sockets_mtx);
 
 		return (ENOENT);
 	}
 
 	/*
 	 * XXX FIXME Actually set flush and link timeout. Set QoS here if
 	 * required. Resolve conficts (flush_timo). Note outgoing MTU (peer's 
 	 * incoming MTU) and incoming flow spec.
 	 */
 
 	pcb->omtu = ip->omtu;
 	bcopy(&ip->iflow, &pcb->iflow, sizeof(pcb->iflow));
 	pcb->flush_timo = ip->flush_timo;
 
 	/*
 	 * Send L2CA_Config response to our peer and check for the errors, 
 	 * if any send disconnect to close the channel. 
 	 */
 
 	if (!(pcb->cfg_state & NG_BTSOCKET_L2CAP_CFG_OUT_SENT)) {
 		error = ng_btsocket_l2cap_send_l2ca_cfg_rsp(pcb);
 		if (error != 0) {
 			ng_btsocket_l2cap_untimeout(pcb);
 
 			pcb->so->so_error = error;
 
 			/* Send disconnect with "zero" token */
 			ng_btsocket_l2cap_send_l2ca_discon_req(0, pcb);
 
 			/* ... and close the socket */
 			pcb->state = NG_BTSOCKET_L2CAP_CLOSED;
 			soisdisconnected(pcb->so);
 		} else
 			pcb->cfg_state |= NG_BTSOCKET_L2CAP_CFG_OUT_SENT;
 	}
 
 	mtx_unlock(&pcb->pcb_mtx);
 	mtx_unlock(&ng_btsocket_l2cap_sockets_mtx);
 
 	return (error);
 } /* ng_btsocket_l2cap_process_l2cap_cfg_ind */
 
 /*
  * Process L2CA_Disconnect response
  */
 
 static int
 ng_btsocket_l2cap_process_l2ca_discon_rsp(struct ng_mesg *msg,
 		ng_btsocket_l2cap_rtentry_p rt)
 {
 	ng_l2cap_l2ca_discon_op	*op = NULL;
 	ng_btsocket_l2cap_pcb_t	*pcb = NULL;
 
 	/* Check message */
 	if (msg->header.arglen != sizeof(*op))
 		return (EMSGSIZE);
 
 	op = (ng_l2cap_l2ca_discon_op *)(msg->data);
 
 	mtx_lock(&ng_btsocket_l2cap_sockets_mtx);
 
 	/*
 	 * Socket layer must have issued L2CA_Disconnect request, so there 
 	 * must be a socket that wants to be disconnected. Use Netgraph 
 	 * message token to find it.
 	 */
 
 	pcb = ng_btsocket_l2cap_pcb_by_token(msg->header.token);
 	if (pcb == NULL) {
 		mtx_unlock(&ng_btsocket_l2cap_sockets_mtx);
 		return (0);
 	}
 
 	mtx_lock(&pcb->pcb_mtx);
 
 	/* XXX Close socket no matter what op->result says */
 	if (pcb->state != NG_BTSOCKET_L2CAP_CLOSED) {
        		NG_BTSOCKET_L2CAP_INFO(
 "%s: Got L2CA_Disconnect response, token=%d, src bdaddr=%x:%x:%x:%x:%x:%x, " \
 "dst bdaddr=%x:%x:%x:%x:%x:%x, psm=%d, lcid=%d, result=%d, state=%d\n",
 			__func__, msg->header.token,
 			pcb->src.b[5], pcb->src.b[4], pcb->src.b[3],
 			pcb->src.b[2], pcb->src.b[1], pcb->src.b[0],
 			pcb->dst.b[5], pcb->dst.b[4], pcb->dst.b[3],
 			pcb->dst.b[2], pcb->dst.b[1], pcb->dst.b[0],
 			pcb->psm, pcb->cid, op->result, pcb->state);
 
 		ng_btsocket_l2cap_untimeout(pcb);
 
 		pcb->state = NG_BTSOCKET_L2CAP_CLOSED;
 		soisdisconnected(pcb->so);
 	}
 
 	mtx_unlock(&pcb->pcb_mtx);
 	mtx_unlock(&ng_btsocket_l2cap_sockets_mtx);
 
 	return (0);
 } /* ng_btsocket_l2cap_process_l2ca_discon_rsp */
 
 /*
  * Process L2CA_Disconnect indicator
  */
 
 static int
 ng_btsocket_l2cap_process_l2ca_discon_ind(struct ng_mesg *msg,
 		ng_btsocket_l2cap_rtentry_p rt)
 {
 	ng_l2cap_l2ca_discon_ind_ip	*ip = NULL;
 	ng_btsocket_l2cap_pcb_t		*pcb = NULL;
 
 	/* Check message */
 	if (msg->header.arglen != sizeof(*ip))
 		return (EMSGSIZE);
 
 	ip = (ng_l2cap_l2ca_discon_ind_ip *)(msg->data);
 
 	mtx_lock(&ng_btsocket_l2cap_sockets_mtx);
 
 	/* Look for the socket with given channel ID */
 	pcb = ng_btsocket_l2cap_pcb_by_cid(&rt->src, ip->lcid,
 					   ip->idtype);
 	if (pcb == NULL) {
 		mtx_unlock(&ng_btsocket_l2cap_sockets_mtx);
 		return (0);
 	}
 
 	/*
 	 * Channel has already been destroyed, so disconnect the socket 
 	 * and be done with it. If there was any pending request we can
 	 * not do anything here anyway.
 	 */
 
 	mtx_lock(&pcb->pcb_mtx);
 
        	NG_BTSOCKET_L2CAP_INFO(
 "%s: Got L2CA_Disconnect indicator, src bdaddr=%x:%x:%x:%x:%x:%x, " \
 "dst bdaddr=%x:%x:%x:%x:%x:%x, psm=%d, lcid=%d, state=%d\n",
 		__func__,
 		pcb->src.b[5], pcb->src.b[4], pcb->src.b[3],
 		pcb->src.b[2], pcb->src.b[1], pcb->src.b[0],
 		pcb->dst.b[5], pcb->dst.b[4], pcb->dst.b[3],
 		pcb->dst.b[2], pcb->dst.b[1], pcb->dst.b[0],
 		pcb->psm, pcb->cid, pcb->state);
 
 	if (pcb->flags & NG_BTSOCKET_L2CAP_TIMO)
 		ng_btsocket_l2cap_untimeout(pcb);
 
 	pcb->state = NG_BTSOCKET_L2CAP_CLOSED;
 	soisdisconnected(pcb->so);
 
 	mtx_unlock(&pcb->pcb_mtx);
 	mtx_unlock(&ng_btsocket_l2cap_sockets_mtx);
 
 	return (0);
 } /* ng_btsocket_l2cap_process_l2ca_discon_ind */
 
 /*
  * Process L2CA_Write response
  */
 
 static int 
 ng_btsocket_l2cap_process_l2ca_write_rsp(struct ng_mesg *msg,
 		ng_btsocket_l2cap_rtentry_p rt)
 {
 	ng_l2cap_l2ca_write_op	*op = NULL;
 	ng_btsocket_l2cap_pcb_t	*pcb = NULL;
 
 	/* Check message */
 	if (msg->header.arglen != sizeof(*op))
 		return (EMSGSIZE);
 
 	op = (ng_l2cap_l2ca_write_op *)(msg->data);
 
 	mtx_lock(&ng_btsocket_l2cap_sockets_mtx);
 
 	/* Look for the socket with given token */
 	pcb = ng_btsocket_l2cap_pcb_by_token(msg->header.token);
 	if (pcb == NULL) {
 		mtx_unlock(&ng_btsocket_l2cap_sockets_mtx);
 		return (ENOENT);
 	}
 
 	mtx_lock(&pcb->pcb_mtx);
 
        	NG_BTSOCKET_L2CAP_INFO(
 "%s: Got L2CA_Write response, src bdaddr=%x:%x:%x:%x:%x:%x, " \
 "dst bdaddr=%x:%x:%x:%x:%x:%x, psm=%d, lcid=%d, result=%d, length=%d, " \
 "state=%d\n",		__func__,
 			pcb->src.b[5], pcb->src.b[4], pcb->src.b[3],
 			pcb->src.b[2], pcb->src.b[1], pcb->src.b[0],
 			pcb->dst.b[5], pcb->dst.b[4], pcb->dst.b[3],
 			pcb->dst.b[2], pcb->dst.b[1], pcb->dst.b[0],
 			pcb->psm, pcb->cid, op->result, op->length,
 			pcb->state);
 
 	if (pcb->state != NG_BTSOCKET_L2CAP_OPEN) {
 		mtx_unlock(&pcb->pcb_mtx);
 		mtx_unlock(&ng_btsocket_l2cap_sockets_mtx);
 
 		return (ENOENT);
 	}
 	
 	ng_btsocket_l2cap_untimeout(pcb);
 
 	/*
  	 * Check if we have more data to send
  	 */
 	sbdroprecord(&pcb->so->so_snd);
 	if (sbavail(&pcb->so->so_snd) > 0) {
 		if (ng_btsocket_l2cap_send2(pcb) == 0)
 			ng_btsocket_l2cap_timeout(pcb);
 		else
 			sbdroprecord(&pcb->so->so_snd); /* XXX */
 	}
 
 	/*
 	 * Now set the result, drop packet from the socket send queue and 
 	 * ask for more (wakeup sender)
 	 */
 
 	pcb->so->so_error = ng_btsocket_l2cap_result2errno(op->result);
 	sowwakeup(pcb->so);
 
 	mtx_unlock(&pcb->pcb_mtx);
 	mtx_unlock(&ng_btsocket_l2cap_sockets_mtx);
 
 	return (0);
 } /* ng_btsocket_l2cap_process_l2ca_write_rsp */
 
 /*
  * Send L2CA_Connect request
  */
 
 static int
 ng_btsocket_l2cap_send_l2ca_con_req(ng_btsocket_l2cap_pcb_p pcb)
 {
 	struct ng_mesg		*msg = NULL;
 	ng_l2cap_l2ca_con_ip	*ip = NULL;
 	int			 error = 0;
 
 	mtx_assert(&pcb->pcb_mtx, MA_OWNED);
 
 	if (pcb->rt == NULL || 
 	    pcb->rt->hook == NULL || NG_HOOK_NOT_VALID(pcb->rt->hook))
 		return (ENETDOWN); 
 
 	NG_MKMESSAGE(msg, NGM_L2CAP_COOKIE, NGM_L2CAP_L2CA_CON,
 		sizeof(*ip), M_NOWAIT);
 	if (msg == NULL)
 		return (ENOMEM);
 
 	msg->header.token = pcb->token;
 
 	ip = (ng_l2cap_l2ca_con_ip *)(msg->data);
 	bcopy(&pcb->dst, &ip->bdaddr, sizeof(ip->bdaddr));
 	ip->psm = pcb->psm;
 	ip->linktype = ng_btsock_l2cap_addrtype_to_linktype(pcb->dsttype);
 	ip->idtype = pcb->idtype;
 	NG_SEND_MSG_HOOK(error, ng_btsocket_l2cap_node, msg,pcb->rt->hook, 0);
 
 	return (error);
 } /* ng_btsocket_l2cap_send_l2ca_con_req */
 
 /*
  * Send L2CA_Connect response
  */
 
 static int
 ng_btsocket_l2cap_send_l2ca_con_rsp_req(u_int32_t token,
 		ng_btsocket_l2cap_rtentry_p rt, bdaddr_p dst, int ident, 
 					int lcid, int result, int linktype)
 {
 	struct ng_mesg			*msg = NULL;
 	ng_l2cap_l2ca_con_rsp_ip	*ip = NULL;
 	int				 error = 0;
 
 	if (rt == NULL || rt->hook == NULL || NG_HOOK_NOT_VALID(rt->hook))
 		return (ENETDOWN); 
 
 	NG_MKMESSAGE(msg, NGM_L2CAP_COOKIE, NGM_L2CAP_L2CA_CON_RSP,
 		sizeof(*ip), M_NOWAIT);
 	if (msg == NULL)
 		return (ENOMEM);
 
 	msg->header.token = token;
 
 	ip = (ng_l2cap_l2ca_con_rsp_ip *)(msg->data);
 	bcopy(dst, &ip->bdaddr, sizeof(ip->bdaddr));
 	ip->ident = ident;
 	ip->lcid = lcid;
 	ip->linktype = linktype;
 	ip->result = result;
 	ip->status = 0;
 
 	NG_SEND_MSG_HOOK(error, ng_btsocket_l2cap_node, msg, rt->hook, 0);
 
 	return (error);
 } /* ng_btsocket_l2cap_send_l2ca_con_rsp_req */
 
 /*
  * Send L2CA_Config request
  */
 
 static int
 ng_btsocket_l2cap_send_l2ca_cfg_req(ng_btsocket_l2cap_pcb_p pcb)
 {
 	struct ng_mesg		*msg = NULL;
 	ng_l2cap_l2ca_cfg_ip	*ip = NULL;
 	int			 error = 0;
 
 	mtx_assert(&pcb->pcb_mtx, MA_OWNED);
 
 	if (pcb->rt == NULL || 
 	    pcb->rt->hook == NULL || NG_HOOK_NOT_VALID(pcb->rt->hook))
 		return (ENETDOWN); 
 
 	NG_MKMESSAGE(msg, NGM_L2CAP_COOKIE, NGM_L2CAP_L2CA_CFG,
 		sizeof(*ip), M_NOWAIT);
 	if (msg == NULL)
 		return (ENOMEM);
 
 	msg->header.token = pcb->token;
 
 	ip = (ng_l2cap_l2ca_cfg_ip *)(msg->data);
 	ip->lcid = pcb->cid;
 	ip->imtu = pcb->imtu;
 	bcopy(&pcb->oflow, &ip->oflow, sizeof(ip->oflow));
 	ip->flush_timo = pcb->flush_timo;
 	ip->link_timo = pcb->link_timo;
 
 	NG_SEND_MSG_HOOK(error, ng_btsocket_l2cap_node, msg,pcb->rt->hook, 0);
 
 	return (error);
 } /* ng_btsocket_l2cap_send_l2ca_cfg_req */
 
 /*
  * Send L2CA_Config response
  */
 
 static int
 ng_btsocket_l2cap_send_l2ca_cfg_rsp(ng_btsocket_l2cap_pcb_p pcb)
 {
 	struct ng_mesg			*msg = NULL;
 	ng_l2cap_l2ca_cfg_rsp_ip	*ip = NULL;
 	int				 error = 0;
 
 	mtx_assert(&pcb->pcb_mtx, MA_OWNED);
 
 	if (pcb->rt == NULL || 
 	    pcb->rt->hook == NULL || NG_HOOK_NOT_VALID(pcb->rt->hook))
 		return (ENETDOWN); 
 
 	NG_MKMESSAGE(msg, NGM_L2CAP_COOKIE, NGM_L2CAP_L2CA_CFG_RSP,
 		sizeof(*ip), M_NOWAIT);
 	if (msg == NULL)
 		return (ENOMEM);
 
 	msg->header.token = pcb->token;
 
 	ip = (ng_l2cap_l2ca_cfg_rsp_ip *)(msg->data);
 	ip->lcid = pcb->cid;
 	ip->omtu = pcb->omtu;
 	bcopy(&pcb->iflow, &ip->iflow, sizeof(ip->iflow));
 
 	NG_SEND_MSG_HOOK(error, ng_btsocket_l2cap_node, msg, pcb->rt->hook, 0);
 
 	return (error);
 } /* ng_btsocket_l2cap_send_l2ca_cfg_rsp */
 
 /*
  * Send L2CA_Disconnect request
  */
 
 static int
 ng_btsocket_l2cap_send_l2ca_discon_req(u_int32_t token,
 		ng_btsocket_l2cap_pcb_p pcb)
 {
 	struct ng_mesg		*msg = NULL;
 	ng_l2cap_l2ca_discon_ip	*ip = NULL;
 	int			 error = 0;
 
 	mtx_assert(&pcb->pcb_mtx, MA_OWNED);
 
 	if (pcb->rt == NULL || 
 	    pcb->rt->hook == NULL || NG_HOOK_NOT_VALID(pcb->rt->hook))
 		return (ENETDOWN); 
 
 	NG_MKMESSAGE(msg, NGM_L2CAP_COOKIE, NGM_L2CAP_L2CA_DISCON,
 		sizeof(*ip), M_NOWAIT);
 	if (msg == NULL)
 		return (ENOMEM);
 
 	msg->header.token = token;
 
 	ip = (ng_l2cap_l2ca_discon_ip *)(msg->data);
 	ip->lcid = pcb->cid;
 	ip->idtype = pcb->idtype;
 
 	NG_SEND_MSG_HOOK(error, ng_btsocket_l2cap_node, msg,pcb->rt->hook, 0);
 
 	return (error);
 } /* ng_btsocket_l2cap_send_l2ca_discon_req */
 
 /*****************************************************************************
  *****************************************************************************
  **                              Socket interface
  *****************************************************************************
  *****************************************************************************/
 
 /*
  * L2CAP sockets data input routine
  */
 
 static void
 ng_btsocket_l2cap_data_input(struct mbuf *m, hook_p hook)
 {
 	ng_l2cap_hdr_t			*hdr = NULL;
 	ng_l2cap_clt_hdr_t		*clt_hdr = NULL;
 	ng_btsocket_l2cap_pcb_t		*pcb = NULL;
 	ng_btsocket_l2cap_rtentry_t	*rt = NULL;
 	uint16_t idtype;
 
 	if (hook == NULL) {
 		NG_BTSOCKET_L2CAP_ALERT(
 "%s: Invalid source hook for L2CAP data packet\n", __func__);
 		goto drop;
 	}
 
 	rt = (ng_btsocket_l2cap_rtentry_t *) NG_HOOK_PRIVATE(hook);
 	if (rt == NULL) {
 		NG_BTSOCKET_L2CAP_ALERT(
 "%s: Could not find out source bdaddr for L2CAP data packet\n", __func__);
 		goto drop;
 	}
 
 	m = m_pullup(m, sizeof(uint16_t));
 	idtype = *mtod(m, uint16_t *);
 	m_adj(m, sizeof(uint16_t));
 	
 	/* Make sure we can access header */
 	if (m->m_pkthdr.len < sizeof(*hdr)) {
 		NG_BTSOCKET_L2CAP_ERR(
 "%s: L2CAP data packet too small, len=%d\n", __func__, m->m_pkthdr.len);
 		goto drop;
 	}
 
 	if (m->m_len < sizeof(*hdr)) { 
 		m = m_pullup(m, sizeof(*hdr));
 		if (m == NULL)
 			goto drop;
 	}
 
 	/* Strip L2CAP packet header and verify packet length */
 	hdr = mtod(m, ng_l2cap_hdr_t *);
 	m_adj(m, sizeof(*hdr));
 
 	if (hdr->length != m->m_pkthdr.len) {
 		NG_BTSOCKET_L2CAP_ERR(
 "%s: Bad L2CAP data packet length, len=%d, length=%d\n",
 			__func__, m->m_pkthdr.len, hdr->length);
 		goto drop;
 	}
 
 	/*
 	 * Now process packet. Two cases:
 	 *
 	 * 1) Normal packet (cid != 2) then find connected socket and append
 	 *    mbuf to the socket queue. Wakeup socket.
 	 *
 	 * 2) Broadcast packet (cid == 2) then find all sockets that connected
 	 *    to the given PSM and have SO_BROADCAST bit set and append mbuf
 	 *    to the socket queue. Wakeup socket.
 	 */
 
 	NG_BTSOCKET_L2CAP_INFO(
 "%s: Received L2CAP data packet: src bdaddr=%x:%x:%x:%x:%x:%x, " \
 "dcid=%d, length=%d\n",
 		__func__, 
 		rt->src.b[5], rt->src.b[4], rt->src.b[3],
 		rt->src.b[2], rt->src.b[1], rt->src.b[0],
 		hdr->dcid, hdr->length);
 
 	if ((hdr->dcid >= NG_L2CAP_FIRST_CID) ||
 	    (idtype == NG_L2CAP_L2CA_IDTYPE_ATT)||
 	    (idtype == NG_L2CAP_L2CA_IDTYPE_SMP)
 	    ){
 
 		mtx_lock(&ng_btsocket_l2cap_sockets_mtx);
 
 		/* Normal packet: find connected socket */
 		pcb = ng_btsocket_l2cap_pcb_by_cid(&rt->src, hdr->dcid,idtype);
 		if (pcb == NULL) {
 			mtx_unlock(&ng_btsocket_l2cap_sockets_mtx);
 			goto drop;
 		}
 
 		mtx_lock(&pcb->pcb_mtx);
 
 		if (pcb->state != NG_BTSOCKET_L2CAP_OPEN) {
 			NG_BTSOCKET_L2CAP_ERR(
 "%s: No connected socket found, src bdaddr=%x:%x:%x:%x:%x:%x, dcid=%d, " \
 "state=%d\n",			__func__,
 				rt->src.b[5], rt->src.b[4], rt->src.b[3],
 				rt->src.b[2], rt->src.b[1], rt->src.b[0],
 				hdr->dcid, pcb->state);
 
 			mtx_unlock(&pcb->pcb_mtx);
 			mtx_unlock(&ng_btsocket_l2cap_sockets_mtx);
 			goto drop;
 		}
 
 		/* Check packet size against socket's incoming MTU */
 		if (hdr->length > pcb->imtu) {
 			NG_BTSOCKET_L2CAP_ERR(
 "%s: L2CAP data packet too big, src bdaddr=%x:%x:%x:%x:%x:%x, " \
 "dcid=%d, length=%d, imtu=%d\n",
 				__func__, 
 				rt->src.b[5], rt->src.b[4], rt->src.b[3],
 				rt->src.b[2], rt->src.b[1], rt->src.b[0],
 				hdr->dcid, hdr->length, pcb->imtu);
 
 			mtx_unlock(&pcb->pcb_mtx);
 			mtx_unlock(&ng_btsocket_l2cap_sockets_mtx);
 			goto drop;
 		}
 
 		/* Check if we have enough space in socket receive queue */
 		if (m->m_pkthdr.len > sbspace(&pcb->so->so_rcv)) {
 
 			/* 
 			 * This is really bad. Receive queue on socket does
 			 * not have enough space for the packet. We do not 
 			 * have any other choice but drop the packet. L2CAP 
 			 * does not provide any flow control.
 			 */
 
 			NG_BTSOCKET_L2CAP_ERR(
 "%s: Not enough space in socket receive queue. Dropping L2CAP data packet, " \
 "src bdaddr=%x:%x:%x:%x:%x:%x, dcid=%d, len=%d, space=%ld\n",
 				__func__,
 				rt->src.b[5], rt->src.b[4], rt->src.b[3],
 				rt->src.b[2], rt->src.b[1], rt->src.b[0],
 				hdr->dcid, m->m_pkthdr.len,
 				sbspace(&pcb->so->so_rcv));
 
 			mtx_unlock(&pcb->pcb_mtx);
 			mtx_unlock(&ng_btsocket_l2cap_sockets_mtx);
 			goto drop;
 		}
 
 		/* Append packet to the socket receive queue and wakeup */
 		sbappendrecord(&pcb->so->so_rcv, m);
 		m = NULL;
 
 		sorwakeup(pcb->so);
 
 		mtx_unlock(&pcb->pcb_mtx);
 		mtx_unlock(&ng_btsocket_l2cap_sockets_mtx);
 	} else if (hdr->dcid == NG_L2CAP_CLT_CID) {
 		/* Broadcast packet: give packet to all sockets  */
 
 		/* Check packet size against connectionless MTU */
 		if (hdr->length > NG_L2CAP_MTU_DEFAULT) {
 			NG_BTSOCKET_L2CAP_ERR(
 "%s: Connectionless L2CAP data packet too big, " \
 "src bdaddr=%x:%x:%x:%x:%x:%x, length=%d\n",
 				__func__,
 				rt->src.b[5], rt->src.b[4], rt->src.b[3],
 				rt->src.b[2], rt->src.b[1], rt->src.b[0],
 				hdr->length);
 			goto drop;
 		}
 
 		/* Make sure we can access connectionless header */
 		if (m->m_pkthdr.len < sizeof(*clt_hdr)) {
 			NG_BTSOCKET_L2CAP_ERR(
 "%s: Can not get L2CAP connectionless packet header, " \
 "src bdaddr=%x:%x:%x:%x:%x:%x, length=%d\n",
 				__func__,
 				rt->src.b[5], rt->src.b[4], rt->src.b[3],
 				rt->src.b[2], rt->src.b[1], rt->src.b[0],
 				hdr->length);
 			goto drop;
 		}
 
 		if (m->m_len < sizeof(*clt_hdr)) {
 			m = m_pullup(m, sizeof(*clt_hdr));
 			if (m == NULL)
 				goto drop;
 		}
 
 		/* Strip connectionless header and deliver packet */
 		clt_hdr = mtod(m, ng_l2cap_clt_hdr_t *);
 		m_adj(m, sizeof(*clt_hdr));
 
 		NG_BTSOCKET_L2CAP_INFO(
 "%s: Got L2CAP connectionless data packet, " \
 "src bdaddr=%x:%x:%x:%x:%x:%x, psm=%d, length=%d\n",
 			__func__,
 			rt->src.b[5], rt->src.b[4], rt->src.b[3],
 			rt->src.b[2], rt->src.b[1], rt->src.b[0],
 			clt_hdr->psm, hdr->length);
 
 		mtx_lock(&ng_btsocket_l2cap_sockets_mtx);
 
 		LIST_FOREACH(pcb, &ng_btsocket_l2cap_sockets, next) {
 			struct mbuf	*copy = NULL;
 
 			mtx_lock(&pcb->pcb_mtx);
 
 			if (bcmp(&rt->src, &pcb->src, sizeof(pcb->src)) != 0 ||
 			    pcb->psm != clt_hdr->psm || 
 			    pcb->state != NG_BTSOCKET_L2CAP_OPEN || 
 			    (pcb->so->so_options & SO_BROADCAST) == 0 || 
 			    m->m_pkthdr.len > sbspace(&pcb->so->so_rcv))
 				goto next;
 
 			/*
 			 * Create a copy of the packet and append it to the 
 			 * socket's queue. If m_dup() failed - no big deal
 			 * it is a broadcast traffic after all
 			 */
 
 			copy = m_dup(m, M_NOWAIT);
 			if (copy != NULL) {
 				sbappendrecord(&pcb->so->so_rcv, copy);
 				sorwakeup(pcb->so);
 			}
 next:
 			mtx_unlock(&pcb->pcb_mtx);
 		}
 
 		mtx_unlock(&ng_btsocket_l2cap_sockets_mtx);
 	}
 drop:
 	NG_FREE_M(m); /* checks for m != NULL */
 } /* ng_btsocket_l2cap_data_input */
 
 /*
  * L2CAP sockets default message input routine
  */
 
 static void
 ng_btsocket_l2cap_default_msg_input(struct ng_mesg *msg, hook_p hook)
 {
 	switch (msg->header.cmd) {
 	case NGM_L2CAP_NODE_HOOK_INFO: {
 		ng_btsocket_l2cap_rtentry_t	*rt = NULL;
 		ng_l2cap_node_hook_info_ep *ep =
 		  (ng_l2cap_node_hook_info_ep *)msg->data;
 		if (hook == NULL || msg->header.arglen != sizeof(*ep))
 			break;
 
 		if (bcmp(&ep->addr, NG_HCI_BDADDR_ANY, sizeof(bdaddr_t)) == 0)
 			break;
 
 		mtx_lock(&ng_btsocket_l2cap_rt_mtx);
 
 		rt = (ng_btsocket_l2cap_rtentry_t *) NG_HOOK_PRIVATE(hook);
 		if (rt == NULL) {
 			rt = malloc(sizeof(*rt),
 				M_NETGRAPH_BTSOCKET_L2CAP, M_NOWAIT|M_ZERO);
 			if (rt == NULL) {
 				mtx_unlock(&ng_btsocket_l2cap_rt_mtx);
 				break;
 			}
 
 			LIST_INSERT_HEAD(&ng_btsocket_l2cap_rt, rt, next);
 
 			NG_HOOK_SET_PRIVATE(hook, rt);
 		}
 
 		bcopy(&ep->addr, &rt->src, sizeof(rt->src));
 		rt->hook = hook;
 
 		mtx_unlock(&ng_btsocket_l2cap_rt_mtx);
 
 		NG_BTSOCKET_L2CAP_INFO(
 "%s: Updating hook \"%s\", src bdaddr=%x:%x:%x:%x:%x:%x\n",
 			__func__, NG_HOOK_NAME(hook), 
 			rt->src.b[5], rt->src.b[4], rt->src.b[3], 
 			rt->src.b[2], rt->src.b[1], rt->src.b[0]);
 		} break;
 
 	default:
 		NG_BTSOCKET_L2CAP_WARN(
 "%s: Unknown message, cmd=%d\n", __func__, msg->header.cmd);
 		break;
 	}
 
 	NG_FREE_MSG(msg); /* Checks for msg != NULL */
 } /* ng_btsocket_l2cap_default_msg_input */
 
 /*
  * L2CAP sockets L2CA message input routine
  */
 
 static void
 ng_btsocket_l2cap_l2ca_msg_input(struct ng_mesg *msg, hook_p hook)
 {
 	ng_btsocket_l2cap_rtentry_p	rt = NULL;
 
 	if (hook == NULL) {
 		NG_BTSOCKET_L2CAP_ALERT(
 "%s: Invalid source hook for L2CA message\n", __func__);
 		goto drop;
 	}
 
 	rt = (ng_btsocket_l2cap_rtentry_p) NG_HOOK_PRIVATE(hook);
 	if (rt == NULL) {
 		NG_BTSOCKET_L2CAP_ALERT(
 "%s: Could not find out source bdaddr for L2CA message\n", __func__);
 		goto drop;
 	}
 
 	switch (msg->header.cmd) {
 	case NGM_L2CAP_L2CA_CON: /* L2CA_Connect response */
 		ng_btsocket_l2cap_process_l2ca_con_req_rsp(msg, rt);
 		break;
 
 	case NGM_L2CAP_L2CA_CON_RSP: /* L2CA_ConnectRsp response */
 		ng_btsocket_l2cap_process_l2ca_con_rsp_rsp(msg, rt);
 		break;
 
 	case NGM_L2CAP_L2CA_CON_IND: /* L2CA_Connect indicator */
 		ng_btsocket_l2cap_process_l2ca_con_ind(msg, rt);
 		break;
 
 	case NGM_L2CAP_L2CA_CFG: /* L2CA_Config response */
 		ng_btsocket_l2cap_process_l2ca_cfg_req_rsp(msg, rt);
 		break;
 
 	case NGM_L2CAP_L2CA_CFG_RSP: /* L2CA_ConfigRsp response */
 		ng_btsocket_l2cap_process_l2ca_cfg_rsp_rsp(msg, rt);
 		break;
 
 	case NGM_L2CAP_L2CA_CFG_IND: /* L2CA_Config indicator */
 		ng_btsocket_l2cap_process_l2ca_cfg_ind(msg, rt);
 		break;
 
 	case NGM_L2CAP_L2CA_DISCON: /* L2CA_Disconnect response */
 		ng_btsocket_l2cap_process_l2ca_discon_rsp(msg, rt);
 		break;
 
 	case NGM_L2CAP_L2CA_DISCON_IND: /* L2CA_Disconnect indicator */
 		ng_btsocket_l2cap_process_l2ca_discon_ind(msg, rt);
 		break;
 
 	case NGM_L2CAP_L2CA_WRITE: /* L2CA_Write response */
 		ng_btsocket_l2cap_process_l2ca_write_rsp(msg, rt);
 		break;
 	case NGM_L2CAP_L2CA_ENC_CHANGE:
 		ng_btsocket_l2cap_process_l2ca_enc_change(msg, rt);
 
 		break;
 	/* XXX FIXME add other L2CA messages */
 
 	default:
 		NG_BTSOCKET_L2CAP_WARN(
 "%s: Unknown L2CA message, cmd=%d\n", __func__, msg->header.cmd);
 		break;
 	}
 drop:
 	NG_FREE_MSG(msg);
 } /* ng_btsocket_l2cap_l2ca_msg_input */
 
 /*
  * L2CAP sockets input routine
  */
 
 static void
 ng_btsocket_l2cap_input(void *context, int pending)
 {
 	item_p	item = NULL;
 	hook_p	hook = NULL;
 
 	for (;;) {
 		mtx_lock(&ng_btsocket_l2cap_queue_mtx);
 		NG_BT_ITEMQ_DEQUEUE(&ng_btsocket_l2cap_queue, item);
 		mtx_unlock(&ng_btsocket_l2cap_queue_mtx);
 
 		if (item == NULL)
 			break;
 
 		NGI_GET_HOOK(item, hook);
 		if (hook != NULL && NG_HOOK_NOT_VALID(hook))
 			goto drop;
 
 		switch(item->el_flags & NGQF_TYPE) {
 		case NGQF_DATA: {
 			struct mbuf     *m = NULL;
 
 			NGI_GET_M(item, m);
 			ng_btsocket_l2cap_data_input(m, hook);
 			} break;
 
 		case NGQF_MESG: {
 			struct ng_mesg  *msg = NULL;
 
 			NGI_GET_MSG(item, msg);
 
 			switch (msg->header.cmd) {
 			case NGM_L2CAP_L2CA_CON:
 			case NGM_L2CAP_L2CA_CON_RSP:
 			case NGM_L2CAP_L2CA_CON_IND:
 			case NGM_L2CAP_L2CA_CFG:
 			case NGM_L2CAP_L2CA_CFG_RSP:
 			case NGM_L2CAP_L2CA_CFG_IND: 
 			case NGM_L2CAP_L2CA_DISCON:
 			case NGM_L2CAP_L2CA_DISCON_IND:
 			case NGM_L2CAP_L2CA_WRITE:
 			case NGM_L2CAP_L2CA_ENC_CHANGE:
 			/* XXX FIXME add other L2CA messages */
 				ng_btsocket_l2cap_l2ca_msg_input(msg, hook);
 				break;
 
 			default:
 				ng_btsocket_l2cap_default_msg_input(msg, hook);
 				break;
 			}
 			} break;
 
 		default:
 			KASSERT(0,
 ("%s: invalid item type=%ld\n", __func__, (item->el_flags & NGQF_TYPE)));
 			break;
 		}
 drop:
 		if (hook != NULL)
 			NG_HOOK_UNREF(hook);
 
 		NG_FREE_ITEM(item);
 	}
 } /* ng_btsocket_l2cap_input */
 
 /*
  * Route cleanup task. Gets scheduled when hook is disconnected. Here we 
  * will find all sockets that use "invalid" hook and disconnect them.
  */
 
 static void
 ng_btsocket_l2cap_rtclean(void *context, int pending)
 {
 	ng_btsocket_l2cap_pcb_p		pcb = NULL, pcb_next = NULL;
 	ng_btsocket_l2cap_rtentry_p	rt = NULL;
 
 	mtx_lock(&ng_btsocket_l2cap_rt_mtx);
 	mtx_lock(&ng_btsocket_l2cap_sockets_mtx);
 
 	/*
 	 * First disconnect all sockets that use "invalid" hook
 	 */
 
 	for (pcb = LIST_FIRST(&ng_btsocket_l2cap_sockets); pcb != NULL; ) {
 		mtx_lock(&pcb->pcb_mtx);
 		pcb_next = LIST_NEXT(pcb, next);
 
 		if (pcb->rt != NULL &&
 		    pcb->rt->hook != NULL && NG_HOOK_NOT_VALID(pcb->rt->hook)) {
 			if (pcb->flags & NG_BTSOCKET_L2CAP_TIMO)
 				ng_btsocket_l2cap_untimeout(pcb);
 
 			pcb->so->so_error = ENETDOWN;
 			pcb->state = NG_BTSOCKET_L2CAP_CLOSED;
 			soisdisconnected(pcb->so);
 
 			pcb->token = 0;
 			pcb->cid = 0;
 			pcb->rt = NULL;
 		}
 
 		mtx_unlock(&pcb->pcb_mtx);
 		pcb = pcb_next;
 	}
 
 	/*
 	 * Now cleanup routing table
 	 */
 
 	for (rt = LIST_FIRST(&ng_btsocket_l2cap_rt); rt != NULL; ) {
 		ng_btsocket_l2cap_rtentry_p	rt_next = LIST_NEXT(rt, next);
 
 		if (rt->hook != NULL && NG_HOOK_NOT_VALID(rt->hook)) {
 			LIST_REMOVE(rt, next);
 
 			NG_HOOK_SET_PRIVATE(rt->hook, NULL);
 			NG_HOOK_UNREF(rt->hook); /* Remove extra reference */
 
 			bzero(rt, sizeof(*rt));
 			free(rt, M_NETGRAPH_BTSOCKET_L2CAP);
 		}
 
 		rt = rt_next;
 	}
 
 	mtx_unlock(&ng_btsocket_l2cap_sockets_mtx);
 	mtx_unlock(&ng_btsocket_l2cap_rt_mtx);
 } /* ng_btsocket_l2cap_rtclean */
 
 /*
  * Initialize everything
  */
 
 void
 ng_btsocket_l2cap_init(void)
 {
 	int	error = 0;
 
 	/* Skip initialization of globals for non-default instances. */
 	if (!IS_DEFAULT_VNET(curvnet))
 		return;
 
 	ng_btsocket_l2cap_node = NULL;
 	ng_btsocket_l2cap_debug_level = NG_BTSOCKET_WARN_LEVEL;
 
 	/* Register Netgraph node type */
 	error = ng_newtype(&typestruct);
 	if (error != 0) {
 		NG_BTSOCKET_L2CAP_ALERT(
 "%s: Could not register Netgraph node type, error=%d\n", __func__, error);
 
                 return;
 	}
 
 	/* Create Netgrapg node */
 	error = ng_make_node_common(&typestruct, &ng_btsocket_l2cap_node);
 	if (error != 0) {
 		NG_BTSOCKET_L2CAP_ALERT(
 "%s: Could not create Netgraph node, error=%d\n", __func__, error);
 
 		ng_btsocket_l2cap_node = NULL;
 
 		return;
 	}
 
 	error = ng_name_node(ng_btsocket_l2cap_node,
 				NG_BTSOCKET_L2CAP_NODE_TYPE);
 	if (error != 0) {
 		NG_BTSOCKET_L2CAP_ALERT(
 "%s: Could not name Netgraph node, error=%d\n", __func__, error);
 
 		NG_NODE_UNREF(ng_btsocket_l2cap_node);
 		ng_btsocket_l2cap_node = NULL;
 
 		return;
 	}
 
 	/* Create input queue */
 	NG_BT_ITEMQ_INIT(&ng_btsocket_l2cap_queue, ifqmaxlen);
 	mtx_init(&ng_btsocket_l2cap_queue_mtx,
 		"btsocks_l2cap_queue_mtx", NULL, MTX_DEF);
 	TASK_INIT(&ng_btsocket_l2cap_queue_task, 0,
 		ng_btsocket_l2cap_input, NULL);
 
 	/* Create list of sockets */
 	LIST_INIT(&ng_btsocket_l2cap_sockets);
 	mtx_init(&ng_btsocket_l2cap_sockets_mtx,
 		"btsocks_l2cap_sockets_mtx", NULL, MTX_DEF);
 
 	/* Routing table */
 	LIST_INIT(&ng_btsocket_l2cap_rt);
 	mtx_init(&ng_btsocket_l2cap_rt_mtx,
 		"btsocks_l2cap_rt_mtx", NULL, MTX_DEF);
 	TASK_INIT(&ng_btsocket_l2cap_rt_task, 0,
 		ng_btsocket_l2cap_rtclean, NULL);
 } /* ng_btsocket_l2cap_init */
 
 /*
  * Abort connection on socket
  */
 
 void
 ng_btsocket_l2cap_abort(struct socket *so)
 {
 	so->so_error = ECONNABORTED;
 
 	(void)ng_btsocket_l2cap_disconnect(so);
 } /* ng_btsocket_l2cap_abort */
 
 void
 ng_btsocket_l2cap_close(struct socket *so)
 {
 
 	(void)ng_btsocket_l2cap_disconnect(so);
 } /* ng_btsocket_l2cap_close */
 
 /*
  * Accept connection on socket. Nothing to do here, socket must be connected
  * and ready, so just return peer address and be done with it.
  */
 
 int
 ng_btsocket_l2cap_accept(struct socket *so, struct sockaddr **nam)
 {
 	if (ng_btsocket_l2cap_node == NULL) 
 		return (EINVAL);
 
 	return (ng_btsocket_l2cap_peeraddr(so, nam));
 } /* ng_btsocket_l2cap_accept */
 
 /*
  * Create and attach new socket
  */
 
 int
 ng_btsocket_l2cap_attach(struct socket *so, int proto, struct thread *td)
 {
 	static u_int32_t	token = 0;
 	ng_btsocket_l2cap_pcb_p	pcb = so2l2cap_pcb(so);
 	int			error;
 
 	/* Check socket and protocol */
 	if (ng_btsocket_l2cap_node == NULL) 
 		return (EPROTONOSUPPORT);
 	if (so->so_type != SOCK_SEQPACKET)
 		return (ESOCKTNOSUPPORT);
 
 #if 0 /* XXX sonewconn() calls "pru_attach" with proto == 0 */
 	if (proto != 0) 
 		if (proto != BLUETOOTH_PROTO_L2CAP)
 			return (EPROTONOSUPPORT);
 #endif /* XXX */
 
 	if (pcb != NULL)
 		return (EISCONN);
 
 	/* Reserve send and receive space if it is not reserved yet */
 	if ((so->so_snd.sb_hiwat == 0) || (so->so_rcv.sb_hiwat == 0)) {
 		error = soreserve(so, NG_BTSOCKET_L2CAP_SENDSPACE,
 					NG_BTSOCKET_L2CAP_RECVSPACE);
 		if (error != 0)
 			return (error);
 	}
 
 	/* Allocate the PCB */
         pcb = malloc(sizeof(*pcb),
 		M_NETGRAPH_BTSOCKET_L2CAP, M_NOWAIT | M_ZERO);
         if (pcb == NULL)
                 return (ENOMEM);
 
 	/* Link the PCB and the socket */
 	so->so_pcb = (caddr_t) pcb;
 	pcb->so = so;
 	pcb->state = NG_BTSOCKET_L2CAP_CLOSED;
 
 	/* Initialize PCB */
 	pcb->imtu = pcb->omtu = NG_L2CAP_MTU_DEFAULT;
 
 	/* Default flow */
 	pcb->iflow.flags = 0x0;
 	pcb->iflow.service_type = NG_HCI_SERVICE_TYPE_BEST_EFFORT;
 	pcb->iflow.token_rate = 0xffffffff; /* maximum */
 	pcb->iflow.token_bucket_size = 0xffffffff; /* maximum */
 	pcb->iflow.peak_bandwidth = 0x00000000; /* maximum */
 	pcb->iflow.latency = 0xffffffff; /* don't care */
 	pcb->iflow.delay_variation = 0xffffffff; /* don't care */
 
 	bcopy(&pcb->iflow, &pcb->oflow, sizeof(pcb->oflow));
 
 	pcb->flush_timo = NG_L2CAP_FLUSH_TIMO_DEFAULT;
 	pcb->link_timo = NG_L2CAP_LINK_TIMO_DEFAULT;
 
 	/*
 	 * XXX Mark PCB mutex as DUPOK to prevent "duplicated lock of
 	 * the same type" message. When accepting new L2CAP connection 
 	 * ng_btsocket_l2cap_process_l2ca_con_ind() holds both PCB mutexes 
 	 * for "old" (accepting) PCB and "new" (created) PCB.
 	 */
 		
 	mtx_init(&pcb->pcb_mtx, "btsocks_l2cap_pcb_mtx", NULL,
 		MTX_DEF|MTX_DUPOK);
 	callout_init_mtx(&pcb->timo, &pcb->pcb_mtx, 0);
 
         /*
 	 * Add the PCB to the list
 	 * 
 	 * XXX FIXME VERY IMPORTANT!
 	 *
 	 * This is totally FUBAR. We could get here in two cases:
 	 *
 	 * 1) When user calls socket()
 	 * 2) When we need to accept new incoming connection and call 
 	 *    sonewconn()
 	 *
 	 * In the first case we must acquire ng_btsocket_l2cap_sockets_mtx.
 	 * In the second case we hold ng_btsocket_l2cap_sockets_mtx already.
 	 * So we now need to distinguish between these cases. From reading
 	 * /sys/kern/uipc_socket.c we can find out that sonewconn() calls
 	 * pru_attach with proto == 0 and td == NULL. For now use this fact
 	 * to figure out if we were called from socket() or from sonewconn().
 	 */
 
 	if (td != NULL)
 		mtx_lock(&ng_btsocket_l2cap_sockets_mtx);
 	else
 		mtx_assert(&ng_btsocket_l2cap_sockets_mtx, MA_OWNED);
 	
 	/* Set PCB token. Use ng_btsocket_l2cap_sockets_mtx for protection */
 	if (++ token == 0)
 		token ++;
 
 	pcb->token = token;
 
 	LIST_INSERT_HEAD(&ng_btsocket_l2cap_sockets, pcb, next);
 
 	if (td != NULL)
 		mtx_unlock(&ng_btsocket_l2cap_sockets_mtx);
 
         return (0);
 } /* ng_btsocket_l2cap_attach */
 
 /*
  * Bind socket
  */
 
 int
 ng_btsocket_l2cap_bind(struct socket *so, struct sockaddr *nam, 
 		struct thread *td)
 {
 	ng_btsocket_l2cap_pcb_t	*pcb = NULL;
 	struct sockaddr_l2cap	*sa = (struct sockaddr_l2cap *) nam;
 	int			 psm, error = 0;
 
 	if (ng_btsocket_l2cap_node == NULL) 
 		return (EINVAL);
 
 	/* Verify address */
 	if (sa == NULL)
 		return (EINVAL);
 	if (sa->l2cap_family != AF_BLUETOOTH)
 		return (EAFNOSUPPORT);
 	/*For the time being, Not support LE binding.*/
 	if ((sa->l2cap_len != sizeof(*sa))&&
 	    (sa->l2cap_len != sizeof(struct sockaddr_l2cap_compat)))
 		return (EINVAL);
 
 	psm = le16toh(sa->l2cap_psm);
 
 	/* 
 	 * Check if other socket has this address already (look for exact
 	 * match PSM and bdaddr) and assign socket address if it's available.
 	 *
 	 * Note: socket can be bound to ANY PSM (zero) thus allowing several
 	 * channels with the same PSM between the same pair of BD_ADDR'es.
 	 */
 
 	mtx_lock(&ng_btsocket_l2cap_sockets_mtx);
 
 	LIST_FOREACH(pcb, &ng_btsocket_l2cap_sockets, next)
 		if (psm != 0 && psm == pcb->psm &&
 		    bcmp(&pcb->src, &sa->l2cap_bdaddr, sizeof(bdaddr_t)) == 0)
 			break;
 
 	if (pcb == NULL) {
 		/* Set socket address */
 		pcb = so2l2cap_pcb(so);
 		if (pcb != NULL) {
 			bcopy(&sa->l2cap_bdaddr, &pcb->src, sizeof(pcb->src));
 			pcb->psm = psm;
 		} else
 			error = EINVAL;
 	} else
 		error = EADDRINUSE;
 
 	mtx_unlock(&ng_btsocket_l2cap_sockets_mtx);
 
 	return (error);
 } /* ng_btsocket_l2cap_bind */
 
 /*
  * Connect socket
  */
 
 int
 ng_btsocket_l2cap_connect(struct socket *so, struct sockaddr *nam, 
 		struct thread *td)
 {
 	ng_btsocket_l2cap_pcb_t		*pcb = so2l2cap_pcb(so);
 	struct sockaddr_l2cap_compat	*sal = (struct sockaddr_l2cap_compat *) nam;
 	struct sockaddr_l2cap *sa  = (struct sockaddr_l2cap *)nam;
 	struct sockaddr_l2cap  ba;
 	ng_btsocket_l2cap_rtentry_t	*rt = NULL;
 	int				 have_src, error = 0;
 	int idtype = NG_L2CAP_L2CA_IDTYPE_BREDR;
 	/* Check socket */
 	if (pcb == NULL)
 		return (EINVAL);
 	if (ng_btsocket_l2cap_node == NULL) 
 		return (EINVAL);
 	if (pcb->state == NG_BTSOCKET_L2CAP_CONNECTING)
 		return (EINPROGRESS);
 
 	/* Verify address */
 	if (sa == NULL)
 		return (EINVAL);
 	if (sa->l2cap_family != AF_BLUETOOTH)
 		return (EAFNOSUPPORT);
 	if (sa->l2cap_len == sizeof(*sal)){
 		bcopy(sal, &ba, sizeof(*sal));
 		sa = &ba;
 		sa->l2cap_len = sizeof(*sa);
 		sa->l2cap_bdaddr_type = BDADDR_BREDR;
 	}
 	if (sa->l2cap_len != sizeof(*sa))
 		return (EINVAL);
 	if ((sa->l2cap_psm &&  sa->l2cap_cid))
 		return EINVAL;
 	if (bcmp(&sa->l2cap_bdaddr, NG_HCI_BDADDR_ANY, sizeof(bdaddr_t)) == 0)
 		return (EDESTADDRREQ);
 	if((sa->l2cap_bdaddr_type == BDADDR_BREDR)&&
 	   (sa->l2cap_psm == 0))
 		return EDESTADDRREQ;
 	if(sa->l2cap_bdaddr_type != BDADDR_BREDR){
 		if(sa->l2cap_cid == NG_L2CAP_ATT_CID){
 			idtype = NG_L2CAP_L2CA_IDTYPE_ATT;
 		}else if (sa->l2cap_cid == NG_L2CAP_SMP_CID){
 			idtype =NG_L2CAP_L2CA_IDTYPE_SMP;
 		}else{
 			//if cid == 0 idtype = NG_L2CAP_L2CA_IDTYPE_LE;
 			// Not supported yet
 			return EINVAL;
 		}
 	}
 	if (pcb->psm != 0 && pcb->psm != le16toh(sa->l2cap_psm))
 		return (EINVAL);
 	/*
 	 * Routing. Socket should be bound to some source address. The source
 	 * address can be ANY. Destination address must be set and it must not
 	 * be ANY. If source address is ANY then find first rtentry that has
 	 * src != dst.
 	 */
 
 	mtx_lock(&ng_btsocket_l2cap_rt_mtx);
 	mtx_lock(&ng_btsocket_l2cap_sockets_mtx);
 	mtx_lock(&pcb->pcb_mtx);
 
 	/* Send destination address and PSM */
 	bcopy(&sa->l2cap_bdaddr, &pcb->dst, sizeof(pcb->dst));
 	pcb->psm = le16toh(sa->l2cap_psm);
 	pcb->dsttype = sa->l2cap_bdaddr_type;
 	pcb->cid = 0;
 	pcb->idtype = idtype;
 	pcb->rt = NULL;
 	have_src = bcmp(&pcb->src, NG_HCI_BDADDR_ANY, sizeof(pcb->src));
 
 	LIST_FOREACH(rt, &ng_btsocket_l2cap_rt, next) {
 		if (rt->hook == NULL || NG_HOOK_NOT_VALID(rt->hook))
 			continue;
 
 		/* Match src and dst */
 		if (have_src) {
 			if (bcmp(&pcb->src, &rt->src, sizeof(rt->src)) == 0)
 				break;
 		} else {
 			if (bcmp(&pcb->dst, &rt->src, sizeof(rt->src)) != 0)
 				break;
 		}
 	}
 
 	if (rt != NULL) {
 		pcb->rt = rt;
 
 		if (!have_src){
 			bcopy(&rt->src, &pcb->src, sizeof(pcb->src));
 			pcb->srctype =
 			  (sa->l2cap_bdaddr_type == BDADDR_BREDR)?
 			  BDADDR_BREDR : BDADDR_LE_PUBLIC;
 		}
 	} else
 		error = EHOSTUNREACH;
 
 	/*
 	 * Send L2CA_Connect request 
 	 */
 
 	if (error == 0) {	
 		error = ng_btsocket_l2cap_send_l2ca_con_req(pcb);
 		if (error == 0) {
 			pcb->flags |= NG_BTSOCKET_L2CAP_CLIENT;
 			pcb->state = NG_BTSOCKET_L2CAP_CONNECTING;
 			soisconnecting(pcb->so);
 
 			ng_btsocket_l2cap_timeout(pcb);
 		}
 	}
 
 	mtx_unlock(&pcb->pcb_mtx);
 	mtx_unlock(&ng_btsocket_l2cap_sockets_mtx);
 	mtx_unlock(&ng_btsocket_l2cap_rt_mtx);
 
 	return (error);
 } /* ng_btsocket_l2cap_connect */
 
 /*
  * Process ioctl's calls on socket
  */
 
 int
 ng_btsocket_l2cap_control(struct socket *so, u_long cmd, caddr_t data,
 		struct ifnet *ifp, struct thread *td)
 {
 	return (EINVAL);
 } /* ng_btsocket_l2cap_control */
 
 /*
  * Process getsockopt/setsockopt system calls
  */
 
 int
 ng_btsocket_l2cap_ctloutput(struct socket *so, struct sockopt *sopt)
 {
 	ng_btsocket_l2cap_pcb_p	pcb = so2l2cap_pcb(so);
 	int			error = 0;
 	ng_l2cap_cfg_opt_val_t	v;
 
 	if (pcb == NULL)
 		return (EINVAL);
 	if (ng_btsocket_l2cap_node == NULL) 
 		return (EINVAL);
 
 	if (sopt->sopt_level != SOL_L2CAP)
 		return (0);
 
 	mtx_lock(&pcb->pcb_mtx);
 
 	switch (sopt->sopt_dir) {
 	case SOPT_GET:
 		switch (sopt->sopt_name) {
 		case SO_L2CAP_IMTU: /* get incoming MTU */
 			error = sooptcopyout(sopt, &pcb->imtu,
 						sizeof(pcb->imtu));
 			break;
 
 		case SO_L2CAP_OMTU: /* get outgoing (peer incoming) MTU */
 			error = sooptcopyout(sopt, &pcb->omtu,
 						sizeof(pcb->omtu));
 			break;
 
 		case SO_L2CAP_IFLOW: /* get incoming flow spec. */
 			error = sooptcopyout(sopt, &pcb->iflow,
 						sizeof(pcb->iflow));
 			break;
 
 		case SO_L2CAP_OFLOW: /* get outgoing flow spec. */
 			error = sooptcopyout(sopt, &pcb->oflow,
 						sizeof(pcb->oflow));
 			break;
 
 		case SO_L2CAP_FLUSH: /* get flush timeout */
 			error = sooptcopyout(sopt, &pcb->flush_timo,
 						sizeof(pcb->flush_timo));
 			break;
 		case SO_L2CAP_ENCRYPTED: /* get encrypt required */
 			error = sooptcopyout(sopt, &pcb->need_encrypt,
 						sizeof(pcb->need_encrypt));
 			break;
 
 
 		default:
 			error = ENOPROTOOPT;
 			break;
 		}
 		break;
 
 	case SOPT_SET:
 		/*
 		 * XXX
 		 * We do not allow to change these parameters while socket is 
 		 * connected or we are in the process of creating a connection.
 		 * May be this should indicate re-configuration of the open 
 		 * channel?
 		 */
 
 		if (pcb->state != NG_BTSOCKET_L2CAP_CLOSED) {
 			error = EACCES;
 			break;
 		}
 
 		switch (sopt->sopt_name) {
 		case SO_L2CAP_IMTU: /* set incoming MTU */
 			error = sooptcopyin(sopt, &v, sizeof(v), sizeof(v.mtu));
 			if (error == 0)
 				pcb->imtu = v.mtu;
 			break;
 
 		case SO_L2CAP_OFLOW: /* set outgoing flow spec. */
 			error = sooptcopyin(sopt, &v, sizeof(v),sizeof(v.flow));
 			if (error == 0)
 				bcopy(&v.flow, &pcb->oflow, sizeof(pcb->oflow));
 			break;
 
 		case SO_L2CAP_FLUSH: /* set flush timeout */
 			error = sooptcopyin(sopt, &v, sizeof(v),
 						sizeof(v.flush_timo));
 			if (error == 0)
 				pcb->flush_timo = v.flush_timo;
 			break;
 		case SO_L2CAP_ENCRYPTED: /*set connect encryption opt*/
 			if((pcb->state != NG_BTSOCKET_L2CAP_OPEN) &&
 			   (pcb->state != NG_BTSOCKET_L2CAP_W4_ENC_CHANGE)){
 				error = sooptcopyin(sopt, &v, sizeof(v),
 						    sizeof(v.encryption));
 				if(error == 0)
 					pcb->need_encrypt = (v.encryption)?1:0;
 			}else{
 				error = EINVAL;
 			}
 			break;
 		default:
 			error = ENOPROTOOPT;
 			break;
 		}
 		break;
 
 	default:
 		error = EINVAL;
 		break;
 	}
 
 	mtx_unlock(&pcb->pcb_mtx);
 
 	return (error);
 } /* ng_btsocket_l2cap_ctloutput */
 
 /*
  * Detach and destroy socket
  */
 
 void
 ng_btsocket_l2cap_detach(struct socket *so)
 {
 	ng_btsocket_l2cap_pcb_p	pcb = so2l2cap_pcb(so);
 
 	KASSERT(pcb != NULL, ("ng_btsocket_l2cap_detach: pcb == NULL"));
 
 	if (ng_btsocket_l2cap_node == NULL) 
 		return;
 
 	mtx_lock(&ng_btsocket_l2cap_sockets_mtx);
 	mtx_lock(&pcb->pcb_mtx);
 
 	/* XXX what to do with pending request? */
 	if (pcb->flags & NG_BTSOCKET_L2CAP_TIMO)
 		ng_btsocket_l2cap_untimeout(pcb);
 
 	if (pcb->state != NG_BTSOCKET_L2CAP_CLOSED &&
 	    pcb->state != NG_BTSOCKET_L2CAP_DISCONNECTING)
 		/* Send disconnect request with "zero" token */
 		ng_btsocket_l2cap_send_l2ca_discon_req(0, pcb);
 
 	pcb->state = NG_BTSOCKET_L2CAP_CLOSED;
 
 	LIST_REMOVE(pcb, next);
 
 	mtx_unlock(&pcb->pcb_mtx);
 	mtx_unlock(&ng_btsocket_l2cap_sockets_mtx);
 
 	mtx_destroy(&pcb->pcb_mtx);
 	bzero(pcb, sizeof(*pcb));
 	free(pcb, M_NETGRAPH_BTSOCKET_L2CAP);
 
 	soisdisconnected(so);
 	so->so_pcb = NULL;
 } /* ng_btsocket_l2cap_detach */
 
 /*
  * Disconnect socket
  */
 
 int
 ng_btsocket_l2cap_disconnect(struct socket *so)
 {
 	ng_btsocket_l2cap_pcb_p	pcb = so2l2cap_pcb(so);
 	int			error = 0;
 
 	if (pcb == NULL)
 		return (EINVAL);
 	if (ng_btsocket_l2cap_node == NULL) 
 		return (EINVAL);
 
 	mtx_lock(&pcb->pcb_mtx);
 
 	if (pcb->state == NG_BTSOCKET_L2CAP_DISCONNECTING) {
 		mtx_unlock(&pcb->pcb_mtx);
 		return (EINPROGRESS);
 	}
 
 	if (pcb->state != NG_BTSOCKET_L2CAP_CLOSED) {
 		/* XXX FIXME what to do with pending request? */
 		if (pcb->flags & NG_BTSOCKET_L2CAP_TIMO)
 			ng_btsocket_l2cap_untimeout(pcb);
 
 		error = ng_btsocket_l2cap_send_l2ca_discon_req(pcb->token, pcb);
 		if (error == 0) {
 			pcb->state = NG_BTSOCKET_L2CAP_DISCONNECTING;
 			soisdisconnecting(so);
 
 			ng_btsocket_l2cap_timeout(pcb);
 		}
 
 		/* XXX FIXME what to do if error != 0 */
 	}
 
 	mtx_unlock(&pcb->pcb_mtx);
 
 	return (error);
 } /* ng_btsocket_l2cap_disconnect */
 
 /*
  * Listen on socket
  */
 
 int
 ng_btsocket_l2cap_listen(struct socket *so, int backlog, struct thread *td)
 {
 	ng_btsocket_l2cap_pcb_p	pcb = so2l2cap_pcb(so);
 	int error;
 
 	SOCK_LOCK(so);
 	error = solisten_proto_check(so);
 	if (error != 0)
 		goto out;
 	if (pcb == NULL) {
 		error = EINVAL;
 		goto out;
 	}
 	if (ng_btsocket_l2cap_node == NULL) {
 		error = EINVAL;
 		goto out;
 	}
 	if (pcb->psm == 0) {
 		error = EADDRNOTAVAIL;
 		goto out;
 	}
 	solisten_proto(so, backlog);
 out:
 	SOCK_UNLOCK(so);
 	return (error);
 } /* ng_btsocket_listen */
 
 /*
  * Get peer address
  */
 
 int
 ng_btsocket_l2cap_peeraddr(struct socket *so, struct sockaddr **nam)
 {
 	ng_btsocket_l2cap_pcb_p	pcb = so2l2cap_pcb(so);
 	struct sockaddr_l2cap	sa;
 
 	if (pcb == NULL)
 		return (EINVAL);
 	if (ng_btsocket_l2cap_node == NULL) 
 		return (EINVAL);
 
 	bcopy(&pcb->dst, &sa.l2cap_bdaddr, sizeof(sa.l2cap_bdaddr));
 	sa.l2cap_psm = htole16(pcb->psm);
 	sa.l2cap_len = sizeof(sa);
 	sa.l2cap_family = AF_BLUETOOTH;
 	switch(pcb->idtype){
 	case NG_L2CAP_L2CA_IDTYPE_ATT:
 		sa.l2cap_cid = NG_L2CAP_ATT_CID;
 		break;
 	case NG_L2CAP_L2CA_IDTYPE_SMP:
 		sa.l2cap_cid = NG_L2CAP_SMP_CID;
 		break;
 	default:
 		sa.l2cap_cid = 0;
 		break;
 	}
 	sa.l2cap_bdaddr_type = pcb->dsttype;
 	*nam = sodupsockaddr((struct sockaddr *) &sa, M_NOWAIT);
 
 	return ((*nam == NULL)? ENOMEM : 0);
 } /* ng_btsocket_l2cap_peeraddr */
 
 /*
  * Send data to socket
  */
 
 int
 ng_btsocket_l2cap_send(struct socket *so, int flags, struct mbuf *m,
 		struct sockaddr *nam, struct mbuf *control, struct thread *td)
 {
 	ng_btsocket_l2cap_pcb_t	*pcb = so2l2cap_pcb(so);
 	int			 error = 0;
 
 	if (ng_btsocket_l2cap_node == NULL) {
 		error = ENETDOWN;
 		goto drop;
 	}
 
 	/* Check socket and input */
 	if (pcb == NULL || m == NULL || control != NULL) {
 		error = EINVAL;
 		goto drop;
 	}
 
 	mtx_lock(&pcb->pcb_mtx);
 
 	/* Make sure socket is connected */
 	if (pcb->state != NG_BTSOCKET_L2CAP_OPEN) {
 		mtx_unlock(&pcb->pcb_mtx);
 		error = ENOTCONN;
 		goto drop;
 	}
 
 	/* Check route */
 	if (pcb->rt == NULL ||
 	    pcb->rt->hook == NULL || NG_HOOK_NOT_VALID(pcb->rt->hook)) {
 		mtx_unlock(&pcb->pcb_mtx);
 		error = ENETDOWN;
 		goto drop;
 	}
 
 	/* Check packet size against outgoing (peer's incoming) MTU) */
 	if (m->m_pkthdr.len > pcb->omtu) {
 		NG_BTSOCKET_L2CAP_ERR(
 "%s: Packet too big, len=%d, omtu=%d\n", __func__, m->m_pkthdr.len, pcb->omtu);
 
 		mtx_unlock(&pcb->pcb_mtx);
 		error = EMSGSIZE;
 		goto drop;
 	}
 
 	/*
 	 * First put packet on socket send queue. Then check if we have
 	 * pending timeout. If we do not have timeout then we must send
 	 * packet and schedule timeout. Otherwise do nothing and wait for
 	 * L2CA_WRITE_RSP.
 	 */
 
 	sbappendrecord(&pcb->so->so_snd, m);
 	m = NULL;
 
 	if (!(pcb->flags & NG_BTSOCKET_L2CAP_TIMO)) {
 		error = ng_btsocket_l2cap_send2(pcb);
 		if (error == 0)
 			ng_btsocket_l2cap_timeout(pcb);
 		else
 			sbdroprecord(&pcb->so->so_snd); /* XXX */
 	}
 
 	mtx_unlock(&pcb->pcb_mtx);
 drop:
 	NG_FREE_M(m); /* checks for != NULL */
 	NG_FREE_M(control);
 
 	return (error);
 } /* ng_btsocket_l2cap_send */
 
 /*
  * Send first packet in the socket queue to the L2CAP layer
  */
 
 static int
 ng_btsocket_l2cap_send2(ng_btsocket_l2cap_pcb_p pcb)
 {
 	struct	mbuf		*m = NULL;
 	ng_l2cap_l2ca_hdr_t	*hdr = NULL;
 	int			 error = 0;
 	
 	mtx_assert(&pcb->pcb_mtx, MA_OWNED);
 
 	if (sbavail(&pcb->so->so_snd) == 0)
 		return (EINVAL); /* XXX */
 
 	m = m_dup(pcb->so->so_snd.sb_mb, M_NOWAIT);
 	if (m == NULL)
 		return (ENOBUFS);
 
 	/* Create L2CA packet header */
 	M_PREPEND(m, sizeof(*hdr), M_NOWAIT);
 	if (m != NULL)
 		if (m->m_len < sizeof(*hdr))
 			m = m_pullup(m, sizeof(*hdr));
 
 	if (m == NULL) {
 		NG_BTSOCKET_L2CAP_ERR(
 "%s: Failed to create L2CA packet header\n", __func__);
 
 		return (ENOBUFS);
 	}
 
 	hdr = mtod(m, ng_l2cap_l2ca_hdr_t *);
 	hdr->token = pcb->token;
 	hdr->length = m->m_pkthdr.len - sizeof(*hdr);
 	hdr->lcid = pcb->cid;
 	hdr->idtype = pcb->idtype;
 	NG_BTSOCKET_L2CAP_INFO(
 "%s: Sending packet: len=%d, length=%d, lcid=%d, token=%d, state=%d\n",
 		__func__, m->m_pkthdr.len, hdr->length, hdr->lcid, 
 		hdr->token, pcb->state);
 
 	/*
 	 * If we got here than we have successfully creates new L2CAP 
 	 * data packet and now we can send it to the L2CAP layer
 	 */
 
 	NG_SEND_DATA_ONLY(error, pcb->rt->hook, m);
 
 	return (error);
 } /* ng_btsocket_l2cap_send2 */
 
 /*
  * Get socket address
  */
 
 int
 ng_btsocket_l2cap_sockaddr(struct socket *so, struct sockaddr **nam)
 {
 	ng_btsocket_l2cap_pcb_p	pcb = so2l2cap_pcb(so);
 	struct sockaddr_l2cap	sa;
 
 	if (pcb == NULL)
 		return (EINVAL);
 	if (ng_btsocket_l2cap_node == NULL) 
 		return (EINVAL);
 
 	bcopy(&pcb->src, &sa.l2cap_bdaddr, sizeof(sa.l2cap_bdaddr));
 	sa.l2cap_psm = htole16(pcb->psm);
 	sa.l2cap_len = sizeof(sa);
 	sa.l2cap_family = AF_BLUETOOTH;
 	sa.l2cap_cid = 0;
 	sa.l2cap_bdaddr_type = pcb->srctype;
 
 	*nam = sodupsockaddr((struct sockaddr *) &sa, M_NOWAIT);
 
 	return ((*nam == NULL)? ENOMEM : 0);
 } /* ng_btsocket_l2cap_sockaddr */
 
 /*****************************************************************************
  *****************************************************************************
  **                              Misc. functions
  *****************************************************************************
  *****************************************************************************/
 
 /*
  * Look for the socket that listens on given PSM and bdaddr. Returns exact or
  * close match (if any). Caller must hold ng_btsocket_l2cap_sockets_mtx.
  */
 
 static ng_btsocket_l2cap_pcb_p
 ng_btsocket_l2cap_pcb_by_addr(bdaddr_p bdaddr, int psm)
 {
 	ng_btsocket_l2cap_pcb_p	p = NULL, p1 = NULL;
 
 	mtx_assert(&ng_btsocket_l2cap_sockets_mtx, MA_OWNED);
 
 	LIST_FOREACH(p, &ng_btsocket_l2cap_sockets, next) {
 		if (p->so == NULL || !(p->so->so_options & SO_ACCEPTCONN) || 
 		    p->psm != psm) 
 			continue;
 
 		if (bcmp(&p->src, bdaddr, sizeof(p->src)) == 0)
 			break;
 
 		if (bcmp(&p->src, NG_HCI_BDADDR_ANY, sizeof(p->src)) == 0)
 			p1 = p;
 	}
 
 	return ((p != NULL)? p : p1);
 } /* ng_btsocket_l2cap_pcb_by_addr */
 
 /*
  * Look for the socket that has given token.
  * Caller must hold ng_btsocket_l2cap_sockets_mtx.
  */
 
 static ng_btsocket_l2cap_pcb_p
 ng_btsocket_l2cap_pcb_by_token(u_int32_t token)
 {
 	ng_btsocket_l2cap_pcb_p	p = NULL;
 
 	if (token == 0)
 		return (NULL);
 
 	mtx_assert(&ng_btsocket_l2cap_sockets_mtx, MA_OWNED);
 
 	LIST_FOREACH(p, &ng_btsocket_l2cap_sockets, next)
 		if (p->token == token)
 			break;
 
 	return (p);
 } /* ng_btsocket_l2cap_pcb_by_token */
 
 /*
  * Look for the socket that assigned to given source address and channel ID.
  * Caller must hold ng_btsocket_l2cap_sockets_mtx
  */
 
 static ng_btsocket_l2cap_pcb_p
 ng_btsocket_l2cap_pcb_by_cid(bdaddr_p src, int cid, int idtype)
 {
 	ng_btsocket_l2cap_pcb_p	p = NULL;
 
 	mtx_assert(&ng_btsocket_l2cap_sockets_mtx, MA_OWNED);
 
 	LIST_FOREACH(p, &ng_btsocket_l2cap_sockets, next){
 		if (p->cid == cid &&
 		    bcmp(src, &p->src, sizeof(p->src)) == 0&&
 		    p->idtype == idtype)		    
 			break;
 
 	}
 	return (p);
 } /* ng_btsocket_l2cap_pcb_by_cid */
 
 /*
  * Set timeout on socket
  */
 
 static void
 ng_btsocket_l2cap_timeout(ng_btsocket_l2cap_pcb_p pcb)
 {
 	mtx_assert(&pcb->pcb_mtx, MA_OWNED);
 
 	if (!(pcb->flags & NG_BTSOCKET_L2CAP_TIMO)) {
 		pcb->flags |= NG_BTSOCKET_L2CAP_TIMO;
 		callout_reset(&pcb->timo, bluetooth_l2cap_ertx_timeout(),
 		    ng_btsocket_l2cap_process_timeout, pcb);
 	} else
 		KASSERT(0,
 ("%s: Duplicated socket timeout?!\n", __func__));
 } /* ng_btsocket_l2cap_timeout */
 
 /*
  * Unset timeout on socket
  */
 
 static void
 ng_btsocket_l2cap_untimeout(ng_btsocket_l2cap_pcb_p pcb)
 {
 	mtx_assert(&pcb->pcb_mtx, MA_OWNED);
 
 	if (pcb->flags & NG_BTSOCKET_L2CAP_TIMO) {
 		callout_stop(&pcb->timo);
 		pcb->flags &= ~NG_BTSOCKET_L2CAP_TIMO;
 	} else
 		KASSERT(0,
 ("%s: No socket timeout?!\n", __func__));
 } /* ng_btsocket_l2cap_untimeout */
 
 /*
  * Process timeout on socket
  */
 
 static void
 ng_btsocket_l2cap_process_timeout(void *xpcb)
 {
 	ng_btsocket_l2cap_pcb_p	pcb = (ng_btsocket_l2cap_pcb_p) xpcb;
 
 	mtx_assert(&pcb->pcb_mtx, MA_OWNED);
 
 	pcb->flags &= ~NG_BTSOCKET_L2CAP_TIMO;
 	pcb->so->so_error = ETIMEDOUT;
 
 	switch (pcb->state) {
 	case NG_BTSOCKET_L2CAP_CONNECTING:
 	case NG_BTSOCKET_L2CAP_CONFIGURING:
 	case NG_BTSOCKET_L2CAP_W4_ENC_CHANGE:		
 		/* Send disconnect request with "zero" token */
 		if (pcb->cid != 0)
 			ng_btsocket_l2cap_send_l2ca_discon_req(0, pcb);
 
 		/* ... and close the socket */
 		pcb->state = NG_BTSOCKET_L2CAP_CLOSED;
 		soisdisconnected(pcb->so);
 		break;
 
 	case NG_BTSOCKET_L2CAP_OPEN:
 		/* Send timeout - drop packet and wakeup sender */
 		sbdroprecord(&pcb->so->so_snd);
 		sowwakeup(pcb->so);
 		break;
 
 	case NG_BTSOCKET_L2CAP_DISCONNECTING:
 		/* Disconnect timeout - disconnect the socket anyway */
 		pcb->state = NG_BTSOCKET_L2CAP_CLOSED;
 		soisdisconnected(pcb->so);
 		break;
 
 	default:
 		NG_BTSOCKET_L2CAP_ERR(
 "%s: Invalid socket state=%d\n", __func__, pcb->state);
 		break;
 	}
 } /* ng_btsocket_l2cap_process_timeout */
 
 /*
  * Translate HCI/L2CAP error code into "errno" code
  * XXX Note: Some L2CAP and HCI error codes have the same value, but 
  *     different meaning
  */
 
 static int
 ng_btsocket_l2cap_result2errno(int result)
 {
 	switch (result) {
 	case 0x00: /* No error */ 
 		return (0);
 
 	case 0x01: /* Unknown HCI command */
 		return (ENODEV);
 
 	case 0x02: /* No connection */
 		return (ENOTCONN);
 
 	case 0x03: /* Hardware failure */
 		return (EIO);
 
 	case 0x04: /* Page timeout */
 		return (EHOSTDOWN);
 
 	case 0x05: /* Authentication failure */
 	case 0x06: /* Key missing */
 	case 0x18: /* Pairing not allowed */
 	case 0x21: /* Role change not allowed */
 	case 0x24: /* LMP PSU not allowed */
 	case 0x25: /* Encryption mode not acceptable */
 	case 0x26: /* Unit key used */
 		return (EACCES);
 
 	case 0x07: /* Memory full */
 		return (ENOMEM);
 
 	case 0x08:   /* Connection timeout */
 	case 0x10:   /* Host timeout */
 	case 0x22:   /* LMP response timeout */
 	case 0xee:   /* HCI timeout */
 	case 0xeeee: /* L2CAP timeout */
 		return (ETIMEDOUT);
 
 	case 0x09: /* Max number of connections */
 	case 0x0a: /* Max number of SCO connections to a unit */
 		return (EMLINK);
 
 	case 0x0b: /* ACL connection already exists */
 		return (EEXIST);
 
 	case 0x0c: /* Command disallowed */
 		return (EBUSY);
 
 	case 0x0d: /* Host rejected due to limited resources */
 	case 0x0e: /* Host rejected due to securiity reasons */
 	case 0x0f: /* Host rejected due to remote unit is a personal unit */
 	case 0x1b: /* SCO offset rejected */
 	case 0x1c: /* SCO interval rejected */
 	case 0x1d: /* SCO air mode rejected */
 		return (ECONNREFUSED);
 
 	case 0x11: /* Unsupported feature or parameter value */
 	case 0x19: /* Unknown LMP PDU */
 	case 0x1a: /* Unsupported remote feature */
 	case 0x20: /* Unsupported LMP parameter value */
 	case 0x27: /* QoS is not supported */
 	case 0x29: /* Paring with unit key not supported */
 		return (EOPNOTSUPP);
 
 	case 0x12: /* Invalid HCI command parameter */
 	case 0x1e: /* Invalid LMP parameters */
 		return (EINVAL);
 
 	case 0x13: /* Other end terminated connection: User ended connection */
 	case 0x14: /* Other end terminated connection: Low resources */
 	case 0x15: /* Other end terminated connection: About to power off */
 		return (ECONNRESET);
 
 	case 0x16: /* Connection terminated by local host */
 		return (ECONNABORTED);
 
 #if 0 /* XXX not yet */
 	case 0x17: /* Repeated attempts */
 	case 0x1f: /* Unspecified error */
 	case 0x23: /* LMP error transaction collision */
 	case 0x28: /* Instant passed */
 #endif
 	}
 
 	return (ENOSYS);
 } /* ng_btsocket_l2cap_result2errno */
 
Index: head/sys/netgraph/bluetooth/socket/ng_btsocket_rfcomm.c
===================================================================
--- head/sys/netgraph/bluetooth/socket/ng_btsocket_rfcomm.c	(revision 319721)
+++ head/sys/netgraph/bluetooth/socket/ng_btsocket_rfcomm.c	(revision 319722)
@@ -1,3582 +1,3558 @@
 /*
  * ng_btsocket_rfcomm.c
  */
 
 /*-
  * Copyright (c) 2001-2003 Maksim Yevmenkin <m_evmenkin@yahoo.com>
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  * $Id: ng_btsocket_rfcomm.c,v 1.28 2003/09/14 23:29:06 max Exp $
  * $FreeBSD$
  */
 
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/bitstring.h>
 #include <sys/domain.h>
 #include <sys/endian.h>
 #include <sys/errno.h>
 #include <sys/filedesc.h>
 #include <sys/ioccom.h>
 #include <sys/kernel.h>
 #include <sys/lock.h>
 #include <sys/malloc.h>
 #include <sys/mbuf.h>
 #include <sys/mutex.h>
 #include <sys/proc.h>
 #include <sys/protosw.h>
 #include <sys/queue.h>
 #include <sys/socket.h>
 #include <sys/socketvar.h>
 #include <sys/sysctl.h>
 #include <sys/taskqueue.h>
 #include <sys/uio.h>
 
 #include <net/vnet.h>
 
 #include <netgraph/ng_message.h>
 #include <netgraph/netgraph.h>
 #include <netgraph/bluetooth/include/ng_bluetooth.h>
 #include <netgraph/bluetooth/include/ng_hci.h>
 #include <netgraph/bluetooth/include/ng_l2cap.h>
 #include <netgraph/bluetooth/include/ng_btsocket.h>
 #include <netgraph/bluetooth/include/ng_btsocket_l2cap.h>
 #include <netgraph/bluetooth/include/ng_btsocket_rfcomm.h>
 
 /* MALLOC define */
 #ifdef NG_SEPARATE_MALLOC
 static MALLOC_DEFINE(M_NETGRAPH_BTSOCKET_RFCOMM, "netgraph_btsocks_rfcomm",
 		"Netgraph Bluetooth RFCOMM sockets");
 #else
 #define M_NETGRAPH_BTSOCKET_RFCOMM M_NETGRAPH
 #endif /* NG_SEPARATE_MALLOC */
 
 /* Debug */
 #define NG_BTSOCKET_RFCOMM_INFO \
 	if (ng_btsocket_rfcomm_debug_level >= NG_BTSOCKET_INFO_LEVEL && \
 	    ppsratecheck(&ng_btsocket_rfcomm_lasttime, &ng_btsocket_rfcomm_curpps, 1)) \
 		printf
 
 #define NG_BTSOCKET_RFCOMM_WARN \
 	if (ng_btsocket_rfcomm_debug_level >= NG_BTSOCKET_WARN_LEVEL && \
 	    ppsratecheck(&ng_btsocket_rfcomm_lasttime, &ng_btsocket_rfcomm_curpps, 1)) \
 		printf
 
 #define NG_BTSOCKET_RFCOMM_ERR \
 	if (ng_btsocket_rfcomm_debug_level >= NG_BTSOCKET_ERR_LEVEL && \
 	    ppsratecheck(&ng_btsocket_rfcomm_lasttime, &ng_btsocket_rfcomm_curpps, 1)) \
 		printf
 
 #define NG_BTSOCKET_RFCOMM_ALERT \
 	if (ng_btsocket_rfcomm_debug_level >= NG_BTSOCKET_ALERT_LEVEL && \
 	    ppsratecheck(&ng_btsocket_rfcomm_lasttime, &ng_btsocket_rfcomm_curpps, 1)) \
 		printf
 
 #define	ALOT	0x7fff
 
 /* Local prototypes */
 static int ng_btsocket_rfcomm_upcall
 	(struct socket *so, void *arg, int waitflag);
 static void ng_btsocket_rfcomm_sessions_task
 	(void *ctx, int pending);
 static void ng_btsocket_rfcomm_session_task
 	(ng_btsocket_rfcomm_session_p s);
 #define ng_btsocket_rfcomm_task_wakeup() \
 	taskqueue_enqueue(taskqueue_swi_giant, &ng_btsocket_rfcomm_task)
 
 static ng_btsocket_rfcomm_pcb_p ng_btsocket_rfcomm_connect_ind
 	(ng_btsocket_rfcomm_session_p s, int channel);
 static void ng_btsocket_rfcomm_connect_cfm
 	(ng_btsocket_rfcomm_session_p s);
 
 static int ng_btsocket_rfcomm_session_create
 	(ng_btsocket_rfcomm_session_p *sp, struct socket *l2so,
 	 bdaddr_p src, bdaddr_p dst, struct thread *td);
 static int ng_btsocket_rfcomm_session_accept
 	(ng_btsocket_rfcomm_session_p s0);
 static int ng_btsocket_rfcomm_session_connect
 	(ng_btsocket_rfcomm_session_p s);
 static int ng_btsocket_rfcomm_session_receive
 	(ng_btsocket_rfcomm_session_p s);
 static int ng_btsocket_rfcomm_session_send
 	(ng_btsocket_rfcomm_session_p s);
 static void ng_btsocket_rfcomm_session_clean
 	(ng_btsocket_rfcomm_session_p s);
 static void ng_btsocket_rfcomm_session_process_pcb
 	(ng_btsocket_rfcomm_session_p s);
 static ng_btsocket_rfcomm_session_p ng_btsocket_rfcomm_session_by_addr
 	(bdaddr_p src, bdaddr_p dst);
 
 static int ng_btsocket_rfcomm_receive_frame
 	(ng_btsocket_rfcomm_session_p s, struct mbuf *m0);
 static int ng_btsocket_rfcomm_receive_sabm
 	(ng_btsocket_rfcomm_session_p s, int dlci);
 static int ng_btsocket_rfcomm_receive_disc
 	(ng_btsocket_rfcomm_session_p s, int dlci);
 static int ng_btsocket_rfcomm_receive_ua
 	(ng_btsocket_rfcomm_session_p s, int dlci);
 static int ng_btsocket_rfcomm_receive_dm
 	(ng_btsocket_rfcomm_session_p s, int dlci);
 static int ng_btsocket_rfcomm_receive_uih
 	(ng_btsocket_rfcomm_session_p s, int dlci, int pf, struct mbuf *m0);
 static int ng_btsocket_rfcomm_receive_mcc
 	(ng_btsocket_rfcomm_session_p s, struct mbuf *m0);
 static int ng_btsocket_rfcomm_receive_test
 	(ng_btsocket_rfcomm_session_p s, struct mbuf *m0);
 static int ng_btsocket_rfcomm_receive_fc
 	(ng_btsocket_rfcomm_session_p s, struct mbuf *m0);
 static int ng_btsocket_rfcomm_receive_msc
 	(ng_btsocket_rfcomm_session_p s, struct mbuf *m0);
 static int ng_btsocket_rfcomm_receive_rpn
 	(ng_btsocket_rfcomm_session_p s, struct mbuf *m0);
 static int ng_btsocket_rfcomm_receive_rls
 	(ng_btsocket_rfcomm_session_p s, struct mbuf *m0);
 static int ng_btsocket_rfcomm_receive_pn
 	(ng_btsocket_rfcomm_session_p s, struct mbuf *m0);
 static void ng_btsocket_rfcomm_set_pn
 	(ng_btsocket_rfcomm_pcb_p pcb, u_int8_t cr, u_int8_t flow_control, 
 	 u_int8_t credits, u_int16_t mtu);
 
 static int ng_btsocket_rfcomm_send_command
 	(ng_btsocket_rfcomm_session_p s, u_int8_t type, u_int8_t dlci);
 static int ng_btsocket_rfcomm_send_uih
 	(ng_btsocket_rfcomm_session_p s, u_int8_t address, u_int8_t pf, 
 	 u_int8_t credits, struct mbuf *data);
 static int ng_btsocket_rfcomm_send_msc
 	(ng_btsocket_rfcomm_pcb_p pcb);
 static int ng_btsocket_rfcomm_send_pn
 	(ng_btsocket_rfcomm_pcb_p pcb);
 static int ng_btsocket_rfcomm_send_credits
 	(ng_btsocket_rfcomm_pcb_p pcb);
 
 static int ng_btsocket_rfcomm_pcb_send
 	(ng_btsocket_rfcomm_pcb_p pcb, int limit);
 static void ng_btsocket_rfcomm_pcb_kill
 	(ng_btsocket_rfcomm_pcb_p pcb, int error);
 static ng_btsocket_rfcomm_pcb_p ng_btsocket_rfcomm_pcb_by_dlci
 	(ng_btsocket_rfcomm_session_p s, int dlci);
 static ng_btsocket_rfcomm_pcb_p ng_btsocket_rfcomm_pcb_listener
 	(bdaddr_p src, int channel);
 
 static void ng_btsocket_rfcomm_timeout
 	(ng_btsocket_rfcomm_pcb_p pcb);
 static void ng_btsocket_rfcomm_untimeout
 	(ng_btsocket_rfcomm_pcb_p pcb);
 static void ng_btsocket_rfcomm_process_timeout
 	(void *xpcb);
 
 static struct mbuf * ng_btsocket_rfcomm_prepare_packet
 	(struct sockbuf *sb, int length);
 
 /* Globals */
 extern int					ifqmaxlen;
 static u_int32_t				ng_btsocket_rfcomm_debug_level;
 static u_int32_t				ng_btsocket_rfcomm_timo;
 struct task					ng_btsocket_rfcomm_task;
 static LIST_HEAD(, ng_btsocket_rfcomm_session)	ng_btsocket_rfcomm_sessions;
 static struct mtx				ng_btsocket_rfcomm_sessions_mtx;
 static LIST_HEAD(, ng_btsocket_rfcomm_pcb)	ng_btsocket_rfcomm_sockets;
 static struct mtx				ng_btsocket_rfcomm_sockets_mtx;
 static struct timeval				ng_btsocket_rfcomm_lasttime;
 static int					ng_btsocket_rfcomm_curpps;
 
 /* Sysctl tree */
 SYSCTL_DECL(_net_bluetooth_rfcomm_sockets);
 static SYSCTL_NODE(_net_bluetooth_rfcomm_sockets, OID_AUTO, stream, CTLFLAG_RW,
 	0, "Bluetooth STREAM RFCOMM sockets family");
 SYSCTL_UINT(_net_bluetooth_rfcomm_sockets_stream, OID_AUTO, debug_level,
 	CTLFLAG_RW,
 	&ng_btsocket_rfcomm_debug_level, NG_BTSOCKET_INFO_LEVEL,
 	"Bluetooth STREAM RFCOMM sockets debug level");
 SYSCTL_UINT(_net_bluetooth_rfcomm_sockets_stream, OID_AUTO, timeout,
 	CTLFLAG_RW,
 	&ng_btsocket_rfcomm_timo, 60,
 	"Bluetooth STREAM RFCOMM sockets timeout");
 
 /*****************************************************************************
  *****************************************************************************
  **                              RFCOMM CRC
  *****************************************************************************
  *****************************************************************************/
 
 static u_int8_t	ng_btsocket_rfcomm_crc_table[256] = {
 	0x00, 0x91, 0xe3, 0x72, 0x07, 0x96, 0xe4, 0x75,
 	0x0e, 0x9f, 0xed, 0x7c, 0x09, 0x98, 0xea, 0x7b,
 	0x1c, 0x8d, 0xff, 0x6e, 0x1b, 0x8a, 0xf8, 0x69,
 	0x12, 0x83, 0xf1, 0x60, 0x15, 0x84, 0xf6, 0x67,
 
 	0x38, 0xa9, 0xdb, 0x4a, 0x3f, 0xae, 0xdc, 0x4d,
 	0x36, 0xa7, 0xd5, 0x44, 0x31, 0xa0, 0xd2, 0x43,
 	0x24, 0xb5, 0xc7, 0x56, 0x23, 0xb2, 0xc0, 0x51,
 	0x2a, 0xbb, 0xc9, 0x58, 0x2d, 0xbc, 0xce, 0x5f,
 
 	0x70, 0xe1, 0x93, 0x02, 0x77, 0xe6, 0x94, 0x05,
 	0x7e, 0xef, 0x9d, 0x0c, 0x79, 0xe8, 0x9a, 0x0b,
 	0x6c, 0xfd, 0x8f, 0x1e, 0x6b, 0xfa, 0x88, 0x19,
 	0x62, 0xf3, 0x81, 0x10, 0x65, 0xf4, 0x86, 0x17,
 
 	0x48, 0xd9, 0xab, 0x3a, 0x4f, 0xde, 0xac, 0x3d,
 	0x46, 0xd7, 0xa5, 0x34, 0x41, 0xd0, 0xa2, 0x33,
 	0x54, 0xc5, 0xb7, 0x26, 0x53, 0xc2, 0xb0, 0x21,
 	0x5a, 0xcb, 0xb9, 0x28, 0x5d, 0xcc, 0xbe, 0x2f,
 
 	0xe0, 0x71, 0x03, 0x92, 0xe7, 0x76, 0x04, 0x95,
 	0xee, 0x7f, 0x0d, 0x9c, 0xe9, 0x78, 0x0a, 0x9b,
 	0xfc, 0x6d, 0x1f, 0x8e, 0xfb, 0x6a, 0x18, 0x89,
 	0xf2, 0x63, 0x11, 0x80, 0xf5, 0x64, 0x16, 0x87,
 
 	0xd8, 0x49, 0x3b, 0xaa, 0xdf, 0x4e, 0x3c, 0xad,
 	0xd6, 0x47, 0x35, 0xa4, 0xd1, 0x40, 0x32, 0xa3,
 	0xc4, 0x55, 0x27, 0xb6, 0xc3, 0x52, 0x20, 0xb1,
 	0xca, 0x5b, 0x29, 0xb8, 0xcd, 0x5c, 0x2e, 0xbf,
 
 	0x90, 0x01, 0x73, 0xe2, 0x97, 0x06, 0x74, 0xe5,
 	0x9e, 0x0f, 0x7d, 0xec, 0x99, 0x08, 0x7a, 0xeb,
 	0x8c, 0x1d, 0x6f, 0xfe, 0x8b, 0x1a, 0x68, 0xf9,
 	0x82, 0x13, 0x61, 0xf0, 0x85, 0x14, 0x66, 0xf7,
 
 	0xa8, 0x39, 0x4b, 0xda, 0xaf, 0x3e, 0x4c, 0xdd,
 	0xa6, 0x37, 0x45, 0xd4, 0xa1, 0x30, 0x42, 0xd3,
 	0xb4, 0x25, 0x57, 0xc6, 0xb3, 0x22, 0x50, 0xc1,
 	0xba, 0x2b, 0x59, 0xc8, 0xbd, 0x2c, 0x5e, 0xcf
 };
 
 /* CRC */
 static u_int8_t
 ng_btsocket_rfcomm_crc(u_int8_t *data, int length)
 {
 	u_int8_t	crc = 0xff;
 
 	while (length --)
 		crc = ng_btsocket_rfcomm_crc_table[crc ^ *data++];
 
 	return (crc);
 } /* ng_btsocket_rfcomm_crc */
 
 /* FCS on 2 bytes */
 static u_int8_t
 ng_btsocket_rfcomm_fcs2(u_int8_t *data)
 {
 	return (0xff - ng_btsocket_rfcomm_crc(data, 2));
 } /* ng_btsocket_rfcomm_fcs2 */
   
 /* FCS on 3 bytes */
 static u_int8_t
 ng_btsocket_rfcomm_fcs3(u_int8_t *data)
 {
 	return (0xff - ng_btsocket_rfcomm_crc(data, 3));
 } /* ng_btsocket_rfcomm_fcs3 */
 
 /* 
  * Check FCS
  *
  * From Bluetooth spec
  *
  * "... In 07.10, the frame check sequence (FCS) is calculated on different 
  * sets of fields for different frame types. These are the fields that the 
  * FCS are calculated on:
  *
  * For SABM, DISC, UA, DM frames: on Address, Control and length field.
  * For UIH frames: on Address and Control field.
  *
  * (This is stated here for clarification, and to set the standard for RFCOMM;
  * the fields included in FCS calculation have actually changed in version
  * 7.0.0 of TS 07.10, but RFCOMM will not change the FCS calculation scheme
  * from the one above.) ..."
  */
 
 static int
 ng_btsocket_rfcomm_check_fcs(u_int8_t *data, int type, u_int8_t fcs)
 {
 	if (type != RFCOMM_FRAME_UIH)
 		return (ng_btsocket_rfcomm_fcs3(data) != fcs);
 
 	return (ng_btsocket_rfcomm_fcs2(data) != fcs);
 } /* ng_btsocket_rfcomm_check_fcs */
 
 /*****************************************************************************
  *****************************************************************************
  **                              Socket interface
  *****************************************************************************
  *****************************************************************************/
 
 /* 
  * Initialize everything
  */
 
 void
 ng_btsocket_rfcomm_init(void)
 {
 
 	/* Skip initialization of globals for non-default instances. */
 	if (!IS_DEFAULT_VNET(curvnet))
 		return;
 
 	ng_btsocket_rfcomm_debug_level = NG_BTSOCKET_WARN_LEVEL;
 	ng_btsocket_rfcomm_timo = 60;
 
 	/* RFCOMM task */
 	TASK_INIT(&ng_btsocket_rfcomm_task, 0,
 		ng_btsocket_rfcomm_sessions_task, NULL);
 
 	/* RFCOMM sessions list */
 	LIST_INIT(&ng_btsocket_rfcomm_sessions);
 	mtx_init(&ng_btsocket_rfcomm_sessions_mtx,
 		"btsocks_rfcomm_sessions_mtx", NULL, MTX_DEF);
 
 	/* RFCOMM sockets list */
 	LIST_INIT(&ng_btsocket_rfcomm_sockets);
 	mtx_init(&ng_btsocket_rfcomm_sockets_mtx,
 		"btsocks_rfcomm_sockets_mtx", NULL, MTX_DEF);
 } /* ng_btsocket_rfcomm_init */
 
 /*
  * Abort connection on socket
  */
 
 void
 ng_btsocket_rfcomm_abort(struct socket *so)
 {
 
 	so->so_error = ECONNABORTED;
 	(void)ng_btsocket_rfcomm_disconnect(so);
 } /* ng_btsocket_rfcomm_abort */
 
 void
 ng_btsocket_rfcomm_close(struct socket *so)
 {
 
 	(void)ng_btsocket_rfcomm_disconnect(so);
 } /* ng_btsocket_rfcomm_close */
 
 /*
  * Accept connection on socket. Nothing to do here, socket must be connected
  * and ready, so just return peer address and be done with it.
  */
 
 int
 ng_btsocket_rfcomm_accept(struct socket *so, struct sockaddr **nam)
 {
 	return (ng_btsocket_rfcomm_peeraddr(so, nam));
 } /* ng_btsocket_rfcomm_accept */
 
 /*
  * Create and attach new socket
  */
 
 int
 ng_btsocket_rfcomm_attach(struct socket *so, int proto, struct thread *td)
 {
 	ng_btsocket_rfcomm_pcb_p	pcb = so2rfcomm_pcb(so);
 	int				error;
 
 	/* Check socket and protocol */
 	if (so->so_type != SOCK_STREAM)
 		return (ESOCKTNOSUPPORT);
 
 #if 0 /* XXX sonewconn() calls "pru_attach" with proto == 0 */
 	if (proto != 0) 
 		if (proto != BLUETOOTH_PROTO_RFCOMM)
 			return (EPROTONOSUPPORT);
 #endif /* XXX */
 
 	if (pcb != NULL)
 		return (EISCONN);
 
 	/* Reserve send and receive space if it is not reserved yet */
 	if ((so->so_snd.sb_hiwat == 0) || (so->so_rcv.sb_hiwat == 0)) {
 		error = soreserve(so, NG_BTSOCKET_RFCOMM_SENDSPACE,
 					NG_BTSOCKET_RFCOMM_RECVSPACE);
 		if (error != 0)
 			return (error);
 	}
 
 	/* Allocate the PCB */
         pcb = malloc(sizeof(*pcb),
 		M_NETGRAPH_BTSOCKET_RFCOMM, M_NOWAIT | M_ZERO);
         if (pcb == NULL)
                 return (ENOMEM);
 
 	/* Link the PCB and the socket */
 	so->so_pcb = (caddr_t) pcb;
 	pcb->so = so;
 
 	/* Initialize PCB */
 	pcb->state = NG_BTSOCKET_RFCOMM_DLC_CLOSED;
 	pcb->flags = NG_BTSOCKET_RFCOMM_DLC_CFC;
 
 	pcb->lmodem =
 	pcb->rmodem = (RFCOMM_MODEM_RTC | RFCOMM_MODEM_RTR | RFCOMM_MODEM_DV);
 
 	pcb->mtu = RFCOMM_DEFAULT_MTU;
 	pcb->tx_cred = 0;
 	pcb->rx_cred = RFCOMM_DEFAULT_CREDITS;
 
 	mtx_init(&pcb->pcb_mtx, "btsocks_rfcomm_pcb_mtx", NULL, MTX_DEF);
 	callout_init_mtx(&pcb->timo, &pcb->pcb_mtx, 0);
 
 	/* Add the PCB to the list */
 	mtx_lock(&ng_btsocket_rfcomm_sockets_mtx);
 	LIST_INSERT_HEAD(&ng_btsocket_rfcomm_sockets, pcb, next);
 	mtx_unlock(&ng_btsocket_rfcomm_sockets_mtx);
 
         return (0);
 } /* ng_btsocket_rfcomm_attach */
 
 /*
  * Bind socket
  */
 
 int
 ng_btsocket_rfcomm_bind(struct socket *so, struct sockaddr *nam, 
 		struct thread *td)
 {
 	ng_btsocket_rfcomm_pcb_t	*pcb = so2rfcomm_pcb(so), *pcb1;
 	struct sockaddr_rfcomm		*sa = (struct sockaddr_rfcomm *) nam;
 
 	if (pcb == NULL)
 		return (EINVAL);
 
 	/* Verify address */
 	if (sa == NULL)
 		return (EINVAL);
 	if (sa->rfcomm_family != AF_BLUETOOTH)
 		return (EAFNOSUPPORT);
 	if (sa->rfcomm_len != sizeof(*sa))
 		return (EINVAL);
 	if (sa->rfcomm_channel > 30)
 		return (EINVAL);
 
 	mtx_lock(&pcb->pcb_mtx);
 
 	if (sa->rfcomm_channel != 0) {
 		mtx_lock(&ng_btsocket_rfcomm_sockets_mtx);
 
 		LIST_FOREACH(pcb1, &ng_btsocket_rfcomm_sockets, next) {
 			if (pcb1->channel == sa->rfcomm_channel &&
 			    bcmp(&pcb1->src, &sa->rfcomm_bdaddr,
 					sizeof(pcb1->src)) == 0) {
 				mtx_unlock(&ng_btsocket_rfcomm_sockets_mtx);
 				mtx_unlock(&pcb->pcb_mtx);
 
 				return (EADDRINUSE);
 			}
 		}
 
 		mtx_unlock(&ng_btsocket_rfcomm_sockets_mtx);
 	}
 
 	bcopy(&sa->rfcomm_bdaddr, &pcb->src, sizeof(pcb->src));
 	pcb->channel = sa->rfcomm_channel;
 
 	mtx_unlock(&pcb->pcb_mtx);
 
 	return (0);
 } /* ng_btsocket_rfcomm_bind */
 
 /*
  * Connect socket
  */
 
 int
 ng_btsocket_rfcomm_connect(struct socket *so, struct sockaddr *nam, 
 		struct thread *td)
 {
 	ng_btsocket_rfcomm_pcb_t	*pcb = so2rfcomm_pcb(so);
 	struct sockaddr_rfcomm		*sa = (struct sockaddr_rfcomm *) nam;
 	ng_btsocket_rfcomm_session_t	*s = NULL;
 	struct socket			*l2so = NULL;
 	int				 dlci, error = 0;
 
 	if (pcb == NULL)
 		return (EINVAL);
 
 	/* Verify address */
 	if (sa == NULL)
 		return (EINVAL);
 	if (sa->rfcomm_family != AF_BLUETOOTH)
 		return (EAFNOSUPPORT);
 	if (sa->rfcomm_len != sizeof(*sa))
 		return (EINVAL);
 	if (sa->rfcomm_channel > 30)
 		return (EINVAL);
 	if (sa->rfcomm_channel == 0 ||
 	    bcmp(&sa->rfcomm_bdaddr, NG_HCI_BDADDR_ANY, sizeof(bdaddr_t)) == 0)
 		return (EDESTADDRREQ);
 
 	/*
 	 * Note that we will not check for errors in socreate() because
 	 * if we failed to create L2CAP socket at this point we still
 	 * might have already open session.
 	 */
 
 	error = socreate(PF_BLUETOOTH, &l2so, SOCK_SEQPACKET,
 			BLUETOOTH_PROTO_L2CAP, td->td_ucred, td);
 
 	/* 
 	 * Look for session between "pcb->src" and "sa->rfcomm_bdaddr" (dst)
 	 */
 
 	mtx_lock(&ng_btsocket_rfcomm_sessions_mtx);
 
 	s = ng_btsocket_rfcomm_session_by_addr(&pcb->src, &sa->rfcomm_bdaddr);
 	if (s == NULL) {
 		/*
 		 * We need to create new RFCOMM session. Check if we have L2CAP
 		 * socket. If l2so == NULL then error has the error code from
 		 * socreate()
 		 */
 
 		if (l2so == NULL) {
 			mtx_unlock(&ng_btsocket_rfcomm_sessions_mtx);
 			return (error);
 		}
 
 		error = ng_btsocket_rfcomm_session_create(&s, l2so,
 				&pcb->src, &sa->rfcomm_bdaddr, td);
 		if (error != 0) {
 			mtx_unlock(&ng_btsocket_rfcomm_sessions_mtx);
 			soclose(l2so);
 
 			return (error);
 		}
 	} else if (l2so != NULL)
 		soclose(l2so); /* we don't need new L2CAP socket */
 
 	/*
 	 * Check if we already have the same DLCI the same session
 	 */
 
 	mtx_lock(&s->session_mtx);
 	mtx_lock(&pcb->pcb_mtx);
 
 	dlci = RFCOMM_MKDLCI(!INITIATOR(s), sa->rfcomm_channel);
 
 	if (ng_btsocket_rfcomm_pcb_by_dlci(s, dlci) != NULL) {
 		mtx_unlock(&pcb->pcb_mtx);
 		mtx_unlock(&s->session_mtx);
 		mtx_unlock(&ng_btsocket_rfcomm_sessions_mtx);
 
 		return (EBUSY);
 	}
 
 	/*
 	 * Check session state and if its not acceptable then refuse connection
 	 */
 
 	switch (s->state) {
 	case NG_BTSOCKET_RFCOMM_SESSION_CONNECTING:
 	case NG_BTSOCKET_RFCOMM_SESSION_CONNECTED:
 	case NG_BTSOCKET_RFCOMM_SESSION_OPEN:
 		/*
 		 * Update destination address and channel and attach 
 		 * DLC to the session
 		 */
 
 		bcopy(&sa->rfcomm_bdaddr, &pcb->dst, sizeof(pcb->dst));
 		pcb->channel = sa->rfcomm_channel;
 		pcb->dlci = dlci;
 
 		LIST_INSERT_HEAD(&s->dlcs, pcb, session_next);
 		pcb->session = s;
 
 		ng_btsocket_rfcomm_timeout(pcb);
 		soisconnecting(pcb->so);
 
 		if (s->state == NG_BTSOCKET_RFCOMM_SESSION_OPEN) {
 			pcb->mtu = s->mtu;
 			bcopy(&so2l2cap_pcb(s->l2so)->src, &pcb->src,
 				sizeof(pcb->src));
 
 			pcb->state = NG_BTSOCKET_RFCOMM_DLC_CONFIGURING;
 
 			error = ng_btsocket_rfcomm_send_pn(pcb);
 			if (error == 0)
 				error = ng_btsocket_rfcomm_task_wakeup();
 		} else
 			pcb->state = NG_BTSOCKET_RFCOMM_DLC_W4_CONNECT;
 		break;
 
 	default:
 		error = ECONNRESET;
 		break;
 	}
 
 	mtx_unlock(&pcb->pcb_mtx);
 	mtx_unlock(&s->session_mtx);
 	mtx_unlock(&ng_btsocket_rfcomm_sessions_mtx);
 
 	return (error);
 } /* ng_btsocket_rfcomm_connect */
 
 /*
  * Process ioctl's calls on socket.
  * XXX FIXME this should provide interface to the RFCOMM multiplexor channel
  */
 
 int
 ng_btsocket_rfcomm_control(struct socket *so, u_long cmd, caddr_t data,
 		struct ifnet *ifp, struct thread *td)
 {
 	return (EINVAL);
 } /* ng_btsocket_rfcomm_control */
 
 /*
  * Process getsockopt/setsockopt system calls
  */
 
 int
 ng_btsocket_rfcomm_ctloutput(struct socket *so, struct sockopt *sopt)
 {
 	ng_btsocket_rfcomm_pcb_p		pcb = so2rfcomm_pcb(so);
 	struct ng_btsocket_rfcomm_fc_info	fcinfo;
 	int					error = 0;
 
 	if (pcb == NULL)
 		return (EINVAL);
 	if (sopt->sopt_level != SOL_RFCOMM)
 		return (0);
 
 	mtx_lock(&pcb->pcb_mtx);
 
 	switch (sopt->sopt_dir) {
 	case SOPT_GET:
 		switch (sopt->sopt_name) {
 		case SO_RFCOMM_MTU:
 			error = sooptcopyout(sopt, &pcb->mtu, sizeof(pcb->mtu));
 			break;
 
 		case SO_RFCOMM_FC_INFO:
 			fcinfo.lmodem = pcb->lmodem;
 			fcinfo.rmodem = pcb->rmodem;
 			fcinfo.tx_cred = pcb->tx_cred;
 			fcinfo.rx_cred = pcb->rx_cred;
 			fcinfo.cfc = (pcb->flags & NG_BTSOCKET_RFCOMM_DLC_CFC)?
 				1 : 0;
 			fcinfo.reserved = 0;
 
 			error = sooptcopyout(sopt, &fcinfo, sizeof(fcinfo));
 			break;
 
 		default:
 			error = ENOPROTOOPT;
 			break;
 		}
 		break;
 
 	case SOPT_SET:
 		switch (sopt->sopt_name) {
 		default:
 			error = ENOPROTOOPT;
 			break;
 		}
 		break;
 
 	default:
 		error = EINVAL;
 		break;
 	}
 
 	mtx_unlock(&pcb->pcb_mtx);
 
 	return (error);
 } /* ng_btsocket_rfcomm_ctloutput */
 
 /*
  * Detach and destroy socket
  */
 
 void
 ng_btsocket_rfcomm_detach(struct socket *so)
 {
 	ng_btsocket_rfcomm_pcb_p	pcb = so2rfcomm_pcb(so);
 
 	KASSERT(pcb != NULL, ("ng_btsocket_rfcomm_detach: pcb == NULL"));
 
 	mtx_lock(&pcb->pcb_mtx);
 
 	switch (pcb->state) {
 	case NG_BTSOCKET_RFCOMM_DLC_W4_CONNECT:
 	case NG_BTSOCKET_RFCOMM_DLC_CONFIGURING:
 	case NG_BTSOCKET_RFCOMM_DLC_CONNECTING:
 	case NG_BTSOCKET_RFCOMM_DLC_CONNECTED:
 		/* XXX What to do with pending request? */
 		if (pcb->flags & NG_BTSOCKET_RFCOMM_DLC_TIMO)
 			ng_btsocket_rfcomm_untimeout(pcb);
 
 		if (pcb->state == NG_BTSOCKET_RFCOMM_DLC_W4_CONNECT)
 			pcb->flags |= NG_BTSOCKET_RFCOMM_DLC_DETACHED;
 		else
 			pcb->state = NG_BTSOCKET_RFCOMM_DLC_DISCONNECTING;
 
 		ng_btsocket_rfcomm_task_wakeup();
 		break;
 
 	case NG_BTSOCKET_RFCOMM_DLC_DISCONNECTING:
 		ng_btsocket_rfcomm_task_wakeup();
 		break;
 	}
 	
 	while (pcb->state != NG_BTSOCKET_RFCOMM_DLC_CLOSED)
 		msleep(&pcb->state, &pcb->pcb_mtx, PZERO, "rf_det", 0);
 
 	if (pcb->session != NULL)
 		panic("%s: pcb->session != NULL\n", __func__);
 	if (pcb->flags & NG_BTSOCKET_RFCOMM_DLC_TIMO)
 		panic("%s: timeout on closed DLC, flags=%#x\n",
 			__func__, pcb->flags);
 
 	mtx_lock(&ng_btsocket_rfcomm_sockets_mtx);
 	LIST_REMOVE(pcb, next);
 	mtx_unlock(&ng_btsocket_rfcomm_sockets_mtx);
 
 	mtx_unlock(&pcb->pcb_mtx);
 
 	mtx_destroy(&pcb->pcb_mtx);
 	bzero(pcb, sizeof(*pcb));
 	free(pcb, M_NETGRAPH_BTSOCKET_RFCOMM);
 
 	soisdisconnected(so);
 	so->so_pcb = NULL;
 } /* ng_btsocket_rfcomm_detach */
 
 /*
  * Disconnect socket
  */
 
 int
 ng_btsocket_rfcomm_disconnect(struct socket *so)
 {
 	ng_btsocket_rfcomm_pcb_p	pcb = so2rfcomm_pcb(so);
 
 	if (pcb == NULL)
 		return (EINVAL);
 
 	mtx_lock(&pcb->pcb_mtx);
 
 	if (pcb->state == NG_BTSOCKET_RFCOMM_DLC_DISCONNECTING) {
 		mtx_unlock(&pcb->pcb_mtx);
 		return (EINPROGRESS);
 	}
 
 	/* XXX What to do with pending request? */
 	if (pcb->flags & NG_BTSOCKET_RFCOMM_DLC_TIMO)
 		ng_btsocket_rfcomm_untimeout(pcb);
 
 	switch (pcb->state) {
 	case NG_BTSOCKET_RFCOMM_DLC_CONFIGURING: /* XXX can we get here? */
 	case NG_BTSOCKET_RFCOMM_DLC_CONNECTING: /* XXX can we get here? */
 	case NG_BTSOCKET_RFCOMM_DLC_CONNECTED:
 
 		/*
 		 * Just change DLC state and enqueue RFCOMM task. It will
 		 * queue and send DISC on the DLC.
 		 */ 
 
 		pcb->state = NG_BTSOCKET_RFCOMM_DLC_DISCONNECTING;
 		soisdisconnecting(so);
 
 		ng_btsocket_rfcomm_task_wakeup();
 		break;
 
 	case NG_BTSOCKET_RFCOMM_DLC_CLOSED:
 	case NG_BTSOCKET_RFCOMM_DLC_W4_CONNECT:
 		break;
 
 	default:
 		panic("%s: Invalid DLC state=%d, flags=%#x\n",
 			__func__, pcb->state, pcb->flags);
 		break;
 	}
 
 	mtx_unlock(&pcb->pcb_mtx);
 
 	return (0);
 } /* ng_btsocket_rfcomm_disconnect */
 
 /*
  * Listen on socket. First call to listen() will create listening RFCOMM session
  */
 
 int
 ng_btsocket_rfcomm_listen(struct socket *so, int backlog, struct thread *td)
 {
 	ng_btsocket_rfcomm_pcb_p	 pcb = so2rfcomm_pcb(so), pcb1;
 	ng_btsocket_rfcomm_session_p	 s = NULL;
 	struct socket			*l2so = NULL;
 	int				 error, socreate_error, usedchannels;
 
 	if (pcb == NULL)
 		return (EINVAL);
 	if (pcb->channel > 30)
 		return (EADDRNOTAVAIL);
 
 	usedchannels = 0;
 
 	mtx_lock(&pcb->pcb_mtx);
 
 	if (pcb->channel == 0) {
 		mtx_lock(&ng_btsocket_rfcomm_sockets_mtx);
 
 		LIST_FOREACH(pcb1, &ng_btsocket_rfcomm_sockets, next)
 			if (pcb1->channel != 0 &&
 			    bcmp(&pcb1->src, &pcb->src, sizeof(pcb->src)) == 0)
 				usedchannels |= (1 << (pcb1->channel - 1));
 
 		for (pcb->channel = 30; pcb->channel > 0; pcb->channel --)
 			if (!(usedchannels & (1 << (pcb->channel - 1))))
 				break;
 
 		if (pcb->channel == 0) {
 			mtx_unlock(&ng_btsocket_rfcomm_sockets_mtx);
 			mtx_unlock(&pcb->pcb_mtx);
 
 			return (EADDRNOTAVAIL);
 		}
 
 		mtx_unlock(&ng_btsocket_rfcomm_sockets_mtx);
 	}
 
 	mtx_unlock(&pcb->pcb_mtx);
 
 	/*
 	 * Note that we will not check for errors in socreate() because
 	 * if we failed to create L2CAP socket at this point we still
 	 * might have already open session.
 	 */
 
 	socreate_error = socreate(PF_BLUETOOTH, &l2so, SOCK_SEQPACKET,
 			BLUETOOTH_PROTO_L2CAP, td->td_ucred, td);
 
 	/*
 	 * Transition the socket and session into the LISTENING state.  Check
 	 * for collisions first, as there can only be one.
 	 */
 	mtx_lock(&ng_btsocket_rfcomm_sessions_mtx);
 	SOCK_LOCK(so);
 	error = solisten_proto_check(so);
 	SOCK_UNLOCK(so);
 	if (error != 0)
 		goto out;
 
 	LIST_FOREACH(s, &ng_btsocket_rfcomm_sessions, next)
 		if (s->state == NG_BTSOCKET_RFCOMM_SESSION_LISTENING)
 			break;
 
 	if (s == NULL) {
 		/*
 		 * We need to create default RFCOMM session. Check if we have 
 		 * L2CAP socket. If l2so == NULL then error has the error code 
 		 * from socreate()
 		 */
 		if (l2so == NULL) {
 			error = socreate_error;
 			goto out;
 		}
 
 		/* 
 		 * Create default listen RFCOMM session. The default RFCOMM 
 		 * session will listen on ANY address.
 		 *
 		 * XXX FIXME Note that currently there is no way to adjust MTU
 		 * for the default session.
 		 */
 		error = ng_btsocket_rfcomm_session_create(&s, l2so,
 					NG_HCI_BDADDR_ANY, NULL, td);
 		if (error != 0)
 			goto out;
 		l2so = NULL;
 	}
 	SOCK_LOCK(so);
 	solisten_proto(so, backlog);
 	SOCK_UNLOCK(so);
 out:
 	mtx_unlock(&ng_btsocket_rfcomm_sessions_mtx);
 	/*
 	 * If we still have an l2so reference here, it's unneeded, so release
 	 * it.
 	 */
 	if (l2so != NULL)
 		soclose(l2so);
 	return (error);
 } /* ng_btsocket_listen */
 
 /*
  * Get peer address
  */
 
 int
 ng_btsocket_rfcomm_peeraddr(struct socket *so, struct sockaddr **nam)
 {
 	ng_btsocket_rfcomm_pcb_p	pcb = so2rfcomm_pcb(so);
 	struct sockaddr_rfcomm		sa;
 
 	if (pcb == NULL)
 		return (EINVAL);
 
 	bcopy(&pcb->dst, &sa.rfcomm_bdaddr, sizeof(sa.rfcomm_bdaddr));
 	sa.rfcomm_channel = pcb->channel;
 	sa.rfcomm_len = sizeof(sa);
 	sa.rfcomm_family = AF_BLUETOOTH;
 
 	*nam = sodupsockaddr((struct sockaddr *) &sa, M_NOWAIT);
 
 	return ((*nam == NULL)? ENOMEM : 0);
 } /* ng_btsocket_rfcomm_peeraddr */
 
 /*
  * Send data to socket
  */
 
 int
 ng_btsocket_rfcomm_send(struct socket *so, int flags, struct mbuf *m,
 		struct sockaddr *nam, struct mbuf *control, struct thread *td)
 {
 	ng_btsocket_rfcomm_pcb_t	*pcb = so2rfcomm_pcb(so);
 	int				 error = 0;
 
 	/* Check socket and input */
 	if (pcb == NULL || m == NULL || control != NULL) {
 		error = EINVAL;
 		goto drop;
 	}
 
 	mtx_lock(&pcb->pcb_mtx);
 
 	/* Make sure DLC is connected */
 	if (pcb->state != NG_BTSOCKET_RFCOMM_DLC_CONNECTED) {
 		mtx_unlock(&pcb->pcb_mtx);
 		error = ENOTCONN;
 		goto drop;
 	}
 
 	/* Put the packet on the socket's send queue and wakeup RFCOMM task */
 	sbappend(&pcb->so->so_snd, m, flags);
 	m = NULL;
 	
 	if (!(pcb->flags & NG_BTSOCKET_RFCOMM_DLC_SENDING)) {
 		pcb->flags |= NG_BTSOCKET_RFCOMM_DLC_SENDING;
 		error = ng_btsocket_rfcomm_task_wakeup();
 	}
 
 	mtx_unlock(&pcb->pcb_mtx);
 drop:
 	NG_FREE_M(m); /* checks for != NULL */
 	NG_FREE_M(control);
 
 	return (error);
 } /* ng_btsocket_rfcomm_send */
 
 /*
  * Get socket address
  */
 
 int
 ng_btsocket_rfcomm_sockaddr(struct socket *so, struct sockaddr **nam)
 {
 	ng_btsocket_rfcomm_pcb_p	pcb = so2rfcomm_pcb(so);
 	struct sockaddr_rfcomm		sa;
 
 	if (pcb == NULL)
 		return (EINVAL);
 
 	bcopy(&pcb->src, &sa.rfcomm_bdaddr, sizeof(sa.rfcomm_bdaddr));
 	sa.rfcomm_channel = pcb->channel;
 	sa.rfcomm_len = sizeof(sa);
 	sa.rfcomm_family = AF_BLUETOOTH;
 
 	*nam = sodupsockaddr((struct sockaddr *) &sa, M_NOWAIT);
 
 	return ((*nam == NULL)? ENOMEM : 0);
 } /* ng_btsocket_rfcomm_sockaddr */
 
 /*
  * Upcall function for L2CAP sockets. Enqueue RFCOMM task.
  */
 
 static int
 ng_btsocket_rfcomm_upcall(struct socket *so, void *arg, int waitflag)
 {
 	int	error;
 
 	if (so == NULL)
 		panic("%s: so == NULL\n", __func__);
 
 	if ((error = ng_btsocket_rfcomm_task_wakeup()) != 0)
 		NG_BTSOCKET_RFCOMM_ALERT(
 "%s: Could not enqueue RFCOMM task, error=%d\n", __func__, error);
 	return (SU_OK);
 } /* ng_btsocket_rfcomm_upcall */
 
 /*
  * RFCOMM task. Will handle all RFCOMM sessions in one pass.
  * XXX FIXME does not scale very well
  */
 
 static void
 ng_btsocket_rfcomm_sessions_task(void *ctx, int pending)
 {
 	ng_btsocket_rfcomm_session_p	s = NULL, s_next = NULL;
 
 	mtx_lock(&ng_btsocket_rfcomm_sessions_mtx);
 
 	for (s = LIST_FIRST(&ng_btsocket_rfcomm_sessions); s != NULL; ) {
 		mtx_lock(&s->session_mtx);
 		s_next = LIST_NEXT(s, next);
 
 		ng_btsocket_rfcomm_session_task(s);
 
 		if (s->state == NG_BTSOCKET_RFCOMM_SESSION_CLOSED) {
 			/* Unlink and clean the session */
 			LIST_REMOVE(s, next);
 
 			NG_BT_MBUFQ_DRAIN(&s->outq);
 			if (!LIST_EMPTY(&s->dlcs))
 				panic("%s: DLC list is not empty\n", __func__);
 
 			/* Close L2CAP socket */
 			SOCKBUF_LOCK(&s->l2so->so_rcv);
 			soupcall_clear(s->l2so, SO_RCV);
 			SOCKBUF_UNLOCK(&s->l2so->so_rcv);
 			SOCKBUF_LOCK(&s->l2so->so_snd);
 			soupcall_clear(s->l2so, SO_SND);
 			SOCKBUF_UNLOCK(&s->l2so->so_snd);
 			soclose(s->l2so);
 
 			mtx_unlock(&s->session_mtx);
 
 			mtx_destroy(&s->session_mtx);
 			bzero(s, sizeof(*s));
 			free(s, M_NETGRAPH_BTSOCKET_RFCOMM);
 		} else
 			mtx_unlock(&s->session_mtx);
 
 		s = s_next;
 	}
 
 	mtx_unlock(&ng_btsocket_rfcomm_sessions_mtx);
 } /* ng_btsocket_rfcomm_sessions_task */
 
 /*
  * Process RFCOMM session. Will handle all RFCOMM sockets in one pass.
  */
 
 static void
 ng_btsocket_rfcomm_session_task(ng_btsocket_rfcomm_session_p s)
 {
 	mtx_assert(&s->session_mtx, MA_OWNED);
 
 	if (s->l2so->so_rcv.sb_state & SBS_CANTRCVMORE) {
 		NG_BTSOCKET_RFCOMM_INFO(
 "%s: L2CAP connection has been terminated, so=%p, so_state=%#x, so_count=%d, " \
 "state=%d, flags=%#x\n", __func__, s->l2so, s->l2so->so_state, 
 			s->l2so->so_count, s->state, s->flags);
 
 		s->state = NG_BTSOCKET_RFCOMM_SESSION_CLOSED;
 		ng_btsocket_rfcomm_session_clean(s);
 	}
 
 	/* Now process upcall */
 	switch (s->state) {
 	/* Try to accept new L2CAP connection(s) */
 	case NG_BTSOCKET_RFCOMM_SESSION_LISTENING:
 		while (ng_btsocket_rfcomm_session_accept(s) == 0)
 			;
 		break;
 
 	/* Process the results of the L2CAP connect */
 	case NG_BTSOCKET_RFCOMM_SESSION_CONNECTING:
 		ng_btsocket_rfcomm_session_process_pcb(s);
 
 		if (ng_btsocket_rfcomm_session_connect(s) != 0) {
 			s->state = NG_BTSOCKET_RFCOMM_SESSION_CLOSED;
 			ng_btsocket_rfcomm_session_clean(s);
 		} 
 		break;
 
 	/* Try to receive/send more data */
 	case NG_BTSOCKET_RFCOMM_SESSION_CONNECTED:
 	case NG_BTSOCKET_RFCOMM_SESSION_OPEN:
 	case NG_BTSOCKET_RFCOMM_SESSION_DISCONNECTING:
 		ng_btsocket_rfcomm_session_process_pcb(s);
 
 		if (ng_btsocket_rfcomm_session_receive(s) != 0) {
 			s->state = NG_BTSOCKET_RFCOMM_SESSION_CLOSED;
 			ng_btsocket_rfcomm_session_clean(s);
 		} else if (ng_btsocket_rfcomm_session_send(s) != 0) {
 			s->state = NG_BTSOCKET_RFCOMM_SESSION_CLOSED;
 			ng_btsocket_rfcomm_session_clean(s);
 		}
 		break;
 
 	case NG_BTSOCKET_RFCOMM_SESSION_CLOSED:
 		break;
 
 	default:
 		panic("%s: Invalid session state=%d, flags=%#x\n",
 			__func__, s->state, s->flags);
 		break;
 	}
 } /* ng_btsocket_rfcomm_session_task */
 
 /*
  * Process RFCOMM connection indicator. Caller must hold s->session_mtx
  */
 
 static ng_btsocket_rfcomm_pcb_p
 ng_btsocket_rfcomm_connect_ind(ng_btsocket_rfcomm_session_p s, int channel)
 {
 	ng_btsocket_rfcomm_pcb_p	 pcb = NULL, pcb1 = NULL;
 	ng_btsocket_l2cap_pcb_p		 l2pcb = NULL;
-	struct socket			*so1 = NULL;
+	struct socket			*so1;
 
 	mtx_assert(&s->session_mtx, MA_OWNED);
 
 	/*
 	 * Try to find RFCOMM socket that listens on given source address 
 	 * and channel. This will return the best possible match.
 	 */
 
 	l2pcb = so2l2cap_pcb(s->l2so);
 	pcb = ng_btsocket_rfcomm_pcb_listener(&l2pcb->src, channel);
 	if (pcb == NULL)
 		return (NULL);
 
 	/*
 	 * Check the pending connections queue and if we have space then 
 	 * create new socket and set proper source and destination address,
 	 * and channel.
 	 */
 
 	mtx_lock(&pcb->pcb_mtx);
 
-	if (pcb->so->so_qlen <= pcb->so->so_qlimit) {
-		CURVNET_SET(pcb->so->so_vnet);
-		so1 = sonewconn(pcb->so, 0);
-		CURVNET_RESTORE();
-	}
+	CURVNET_SET(pcb->so->so_vnet);
+	so1 = sonewconn(pcb->so, 0);
+	CURVNET_RESTORE();
 
 	mtx_unlock(&pcb->pcb_mtx);
 
 	if (so1 == NULL)
 		return (NULL);
 
 	/*
 	 * If we got here than we have created new socket. So complete the 
 	 * connection. Set source and destination address from the session.
 	 */
 
 	pcb1 = so2rfcomm_pcb(so1);
 	if (pcb1 == NULL)
 		panic("%s: pcb1 == NULL\n", __func__);
 
 	mtx_lock(&pcb1->pcb_mtx);
 
 	bcopy(&l2pcb->src, &pcb1->src, sizeof(pcb1->src));
 	bcopy(&l2pcb->dst, &pcb1->dst, sizeof(pcb1->dst));
 	pcb1->channel = channel;
 
 	/* Link new DLC to the session. We already hold s->session_mtx */
 	LIST_INSERT_HEAD(&s->dlcs, pcb1, session_next);
 	pcb1->session = s;
 			
 	mtx_unlock(&pcb1->pcb_mtx);
 
 	return (pcb1);
 } /* ng_btsocket_rfcomm_connect_ind */
 
 /*
  * Process RFCOMM connect confirmation. Caller must hold s->session_mtx.
  */
 
 static void
 ng_btsocket_rfcomm_connect_cfm(ng_btsocket_rfcomm_session_p s)
 {
 	ng_btsocket_rfcomm_pcb_p	pcb = NULL, pcb_next = NULL;
 	int				error;
 
 	mtx_assert(&s->session_mtx, MA_OWNED);
 
 	/*
 	 * Wake up all waiting sockets and send PN request for each of them. 
 	 * Note that timeout already been set in ng_btsocket_rfcomm_connect()
 	 *
 	 * Note: cannot use LIST_FOREACH because ng_btsocket_rfcomm_pcb_kill
 	 * will unlink DLC from the session
 	 */
 
 	for (pcb = LIST_FIRST(&s->dlcs); pcb != NULL; ) {
 		mtx_lock(&pcb->pcb_mtx);
 		pcb_next = LIST_NEXT(pcb, session_next);
 
 		if (pcb->state == NG_BTSOCKET_RFCOMM_DLC_W4_CONNECT) {
 			pcb->mtu = s->mtu;
 			bcopy(&so2l2cap_pcb(s->l2so)->src, &pcb->src,
 				sizeof(pcb->src));
 
 			error = ng_btsocket_rfcomm_send_pn(pcb);
 			if (error == 0)
 				pcb->state = NG_BTSOCKET_RFCOMM_DLC_CONFIGURING;
 			else
 				ng_btsocket_rfcomm_pcb_kill(pcb, error);
 		}
 
 		mtx_unlock(&pcb->pcb_mtx);
 		pcb = pcb_next;
 	}
 } /* ng_btsocket_rfcomm_connect_cfm */
 
 /*****************************************************************************
  *****************************************************************************
  **                              RFCOMM sessions
  *****************************************************************************
  *****************************************************************************/
 
 /*
  * Create new RFCOMM session. That function WILL NOT take ownership over l2so.
  * Caller MUST free l2so if function failed.
  */
 
 static int
 ng_btsocket_rfcomm_session_create(ng_btsocket_rfcomm_session_p *sp,
 		struct socket *l2so, bdaddr_p src, bdaddr_p dst,
 		struct thread *td)
 {
 	ng_btsocket_rfcomm_session_p	s = NULL;
 	struct sockaddr_l2cap		l2sa;
 	struct sockopt			l2sopt;
 	int				error;
 	u_int16_t			mtu;
 
 	mtx_assert(&ng_btsocket_rfcomm_sessions_mtx, MA_OWNED);
 
 	/* Allocate the RFCOMM session */
         s = malloc(sizeof(*s),
 		M_NETGRAPH_BTSOCKET_RFCOMM, M_NOWAIT | M_ZERO);
         if (s == NULL)
                 return (ENOMEM);
 
 	/* Set defaults */
 	s->mtu = RFCOMM_DEFAULT_MTU;
 	s->flags = 0;
 	s->state = NG_BTSOCKET_RFCOMM_SESSION_CLOSED;
 	NG_BT_MBUFQ_INIT(&s->outq, ifqmaxlen);
 
 	/*
 	 * XXX Mark session mutex as DUPOK to prevent "duplicated lock of 
 	 * the same type" message. When accepting new L2CAP connection
 	 * ng_btsocket_rfcomm_session_accept() holds both session mutexes 
 	 * for "old" (accepting) session and "new" (created) session.
 	 */
 
 	mtx_init(&s->session_mtx, "btsocks_rfcomm_session_mtx", NULL,
 		MTX_DEF|MTX_DUPOK);
 
 	LIST_INIT(&s->dlcs);
 
 	/* Prepare L2CAP socket */
 	SOCKBUF_LOCK(&l2so->so_rcv);
 	soupcall_set(l2so, SO_RCV, ng_btsocket_rfcomm_upcall, NULL);
 	SOCKBUF_UNLOCK(&l2so->so_rcv);
 	SOCKBUF_LOCK(&l2so->so_snd);
 	soupcall_set(l2so, SO_SND, ng_btsocket_rfcomm_upcall, NULL);
 	SOCKBUF_UNLOCK(&l2so->so_snd);
 	l2so->so_state |= SS_NBIO;
 	s->l2so = l2so;
 
 	mtx_lock(&s->session_mtx);
 
 	/*
 	 * "src" == NULL and "dst" == NULL means just create session.
 	 * caller must do the rest
 	 */
 
 	if (src == NULL && dst == NULL)
 		goto done;
 
 	/*
 	 * Set incoming MTU on L2CAP socket. It is RFCOMM session default MTU 
 	 * plus 5 bytes: RFCOMM frame header, one extra byte for length and one
 	 * extra byte for credits.
 	 */
 
 	mtu = s->mtu + sizeof(struct rfcomm_frame_hdr) + 1 + 1;
 
 	l2sopt.sopt_dir = SOPT_SET;
 	l2sopt.sopt_level = SOL_L2CAP;
 	l2sopt.sopt_name = SO_L2CAP_IMTU;
 	l2sopt.sopt_val = (void *) &mtu;
 	l2sopt.sopt_valsize = sizeof(mtu);
 	l2sopt.sopt_td = NULL;
 
 	error = sosetopt(s->l2so, &l2sopt);
 	if (error != 0)
 		goto bad;
 
 	/* Bind socket to "src" address */
 	l2sa.l2cap_len = sizeof(l2sa);
 	l2sa.l2cap_family = AF_BLUETOOTH;
 	l2sa.l2cap_psm = (dst == NULL)? htole16(NG_L2CAP_PSM_RFCOMM) : 0;
 	bcopy(src, &l2sa.l2cap_bdaddr, sizeof(l2sa.l2cap_bdaddr));
 	l2sa.l2cap_cid = 0;
 	l2sa.l2cap_bdaddr_type = BDADDR_BREDR;
 
 	error = sobind(s->l2so, (struct sockaddr *) &l2sa, td);
 	if (error != 0)
 		goto bad;
 
 	/* If "dst" is not NULL then initiate connect(), otherwise listen() */
 	if (dst == NULL) {
 		s->flags = 0;
 		s->state = NG_BTSOCKET_RFCOMM_SESSION_LISTENING;
 
 		error = solisten(s->l2so, 10, td);
 		if (error != 0)
 			goto bad;
 	} else {
 		s->flags = NG_BTSOCKET_RFCOMM_SESSION_INITIATOR;
 		s->state = NG_BTSOCKET_RFCOMM_SESSION_CONNECTING;
 
 		l2sa.l2cap_len = sizeof(l2sa);   
 		l2sa.l2cap_family = AF_BLUETOOTH;
 		l2sa.l2cap_psm = htole16(NG_L2CAP_PSM_RFCOMM);
 	        bcopy(dst, &l2sa.l2cap_bdaddr, sizeof(l2sa.l2cap_bdaddr));
 		l2sa.l2cap_cid = 0;
 		l2sa.l2cap_bdaddr_type = BDADDR_BREDR;
 
 		error = soconnect(s->l2so, (struct sockaddr *) &l2sa, td);
 		if (error != 0)
 			goto bad;
 	}
 
 done:
 	LIST_INSERT_HEAD(&ng_btsocket_rfcomm_sessions, s, next);
 	*sp = s;
 
 	mtx_unlock(&s->session_mtx);
 
 	return (0);
 
 bad:
 	mtx_unlock(&s->session_mtx);
 
 	/* Return L2CAP socket back to its original state */
 	SOCKBUF_LOCK(&l2so->so_rcv);
 	soupcall_clear(s->l2so, SO_RCV);
 	SOCKBUF_UNLOCK(&l2so->so_rcv);
 	SOCKBUF_LOCK(&l2so->so_snd);
 	soupcall_clear(s->l2so, SO_SND);
 	SOCKBUF_UNLOCK(&l2so->so_snd);
 	l2so->so_state &= ~SS_NBIO;
 
 	mtx_destroy(&s->session_mtx);
 	bzero(s, sizeof(*s));
 	free(s, M_NETGRAPH_BTSOCKET_RFCOMM);
 
 	return (error);
 } /* ng_btsocket_rfcomm_session_create */
 
 /*
  * Process accept() on RFCOMM session
  * XXX FIXME locking for "l2so"?
  */
 
 static int
 ng_btsocket_rfcomm_session_accept(ng_btsocket_rfcomm_session_p s0)
 {
-	struct socket			*l2so = NULL;
+	struct socket			*l2so;
 	struct sockaddr_l2cap		*l2sa = NULL;
 	ng_btsocket_l2cap_pcb_t		*l2pcb = NULL;
 	ng_btsocket_rfcomm_session_p	 s = NULL;
-	int				 error = 0;
+	int				 error;
 
 	mtx_assert(&ng_btsocket_rfcomm_sessions_mtx, MA_OWNED);
 	mtx_assert(&s0->session_mtx, MA_OWNED);
 
-	/* Check if there is a complete L2CAP connection in the queue */
-	if ((error = s0->l2so->so_error) != 0) {
+	SOLISTEN_LOCK(s0->l2so);
+	error = solisten_dequeue(s0->l2so, &l2so, 0);
+	if (error == EWOULDBLOCK)
+		return (error);
+	if (error) {
 		NG_BTSOCKET_RFCOMM_ERR(
 "%s: Could not accept connection on L2CAP socket, error=%d\n", __func__, error);
-		s0->l2so->so_error = 0;
-
 		return (error);
 	}
-
-	ACCEPT_LOCK();
-	if (TAILQ_EMPTY(&s0->l2so->so_comp)) {
-		ACCEPT_UNLOCK();
-		if (s0->l2so->so_rcv.sb_state & SBS_CANTRCVMORE)
-			return (ECONNABORTED);
-		return (EWOULDBLOCK);
-	}
-
-	/* Accept incoming L2CAP connection */
-	l2so = TAILQ_FIRST(&s0->l2so->so_comp);
-	if (l2so == NULL)
-		panic("%s: l2so == NULL\n", __func__);
-
-	TAILQ_REMOVE(&s0->l2so->so_comp, l2so, so_list);
-	s0->l2so->so_qlen --;
-	l2so->so_qstate &= ~SQ_COMP;
-	l2so->so_head = NULL;
-	SOCK_LOCK(l2so);
-	soref(l2so);
-	l2so->so_state |= SS_NBIO;
-	SOCK_UNLOCK(l2so);
-	ACCEPT_UNLOCK();
 
 	error = soaccept(l2so, (struct sockaddr **) &l2sa);
 	if (error != 0) {
 		NG_BTSOCKET_RFCOMM_ERR(
 "%s: soaccept() on L2CAP socket failed, error=%d\n", __func__, error);
 		soclose(l2so);
 
 		return (error);
 	}
 
 	/*
 	 * Check if there is already active RFCOMM session between two devices.
 	 * If so then close L2CAP connection. We only support one RFCOMM session
 	 * between each pair of devices. Note that here we assume session in any
 	 * state. The session even could be in the middle of disconnecting.
 	 */
 
 	l2pcb = so2l2cap_pcb(l2so);
 	s = ng_btsocket_rfcomm_session_by_addr(&l2pcb->src, &l2pcb->dst);
 	if (s == NULL) {
 		/* Create a new RFCOMM session */
 		error = ng_btsocket_rfcomm_session_create(&s, l2so, NULL, NULL,
 				curthread /* XXX */);
 		if (error == 0) {
 			mtx_lock(&s->session_mtx);
 
 			s->flags = 0;
 			s->state = NG_BTSOCKET_RFCOMM_SESSION_CONNECTED;
 
 			/*
 			 * Adjust MTU on incoming connection. Reserve 5 bytes:
 			 * RFCOMM frame header, one extra byte for length and 
 			 * one extra byte for credits.
 			 */
 
 			s->mtu = min(l2pcb->imtu, l2pcb->omtu) -
 					sizeof(struct rfcomm_frame_hdr) - 1 - 1;
 
 			mtx_unlock(&s->session_mtx);
 		} else {
 			NG_BTSOCKET_RFCOMM_ALERT(
 "%s: Failed to create new RFCOMM session, error=%d\n", __func__, error);
 
 			soclose(l2so);
 		}
 	} else {
 		NG_BTSOCKET_RFCOMM_WARN(
 "%s: Rejecting duplicating RFCOMM session between src=%x:%x:%x:%x:%x:%x and " \
 "dst=%x:%x:%x:%x:%x:%x, state=%d, flags=%#x\n",	__func__,
 			l2pcb->src.b[5], l2pcb->src.b[4], l2pcb->src.b[3],
 			l2pcb->src.b[2], l2pcb->src.b[1], l2pcb->src.b[0],
 			l2pcb->dst.b[5], l2pcb->dst.b[4], l2pcb->dst.b[3],
 			l2pcb->dst.b[2], l2pcb->dst.b[1], l2pcb->dst.b[0],
 			s->state, s->flags);
 
 		error = EBUSY;
 		soclose(l2so);
 	}
 
 	return (error);
 } /* ng_btsocket_rfcomm_session_accept */
 
 /*
  * Process connect() on RFCOMM session
  * XXX FIXME locking for "l2so"?
  */
 
 static int
 ng_btsocket_rfcomm_session_connect(ng_btsocket_rfcomm_session_p s)
 {
 	ng_btsocket_l2cap_pcb_p	l2pcb = so2l2cap_pcb(s->l2so);
 	int			error;
 
 	mtx_assert(&s->session_mtx, MA_OWNED);
 
 	/* First check if connection has failed */
 	if ((error = s->l2so->so_error) != 0) {
 		s->l2so->so_error = 0;
 
 		NG_BTSOCKET_RFCOMM_ERR(
 "%s: Could not connect RFCOMM session, error=%d, state=%d, flags=%#x\n",
 			__func__, error, s->state, s->flags);
 
 		return (error);
 	}
 
 	/* Is connection still in progress? */
 	if (s->l2so->so_state & SS_ISCONNECTING)
 		return (0); 
 
 	/* 
 	 * If we got here then we are connected. Send SABM on DLCI 0 to 
 	 * open multiplexor channel.
 	 */
 
 	if (error == 0) {
 		s->state = NG_BTSOCKET_RFCOMM_SESSION_CONNECTED;
 
 		/*
 		 * Adjust MTU on outgoing connection. Reserve 5 bytes: RFCOMM 
 		 * frame header, one extra byte for length and one extra byte 
 		 * for credits.
 		 */
 
 		s->mtu = min(l2pcb->imtu, l2pcb->omtu) -
 				sizeof(struct rfcomm_frame_hdr) - 1 - 1;
 
 		error = ng_btsocket_rfcomm_send_command(s,RFCOMM_FRAME_SABM,0);
 		if (error == 0)
 			error = ng_btsocket_rfcomm_task_wakeup();
 	}
 
 	return (error);
 }/* ng_btsocket_rfcomm_session_connect */
 
 /*
  * Receive data on RFCOMM session
  * XXX FIXME locking for "l2so"?
  */
 
 static int
 ng_btsocket_rfcomm_session_receive(ng_btsocket_rfcomm_session_p s)
 {
 	struct mbuf	*m = NULL;
 	struct uio	 uio;
 	int		 more, flags, error;
 
 	mtx_assert(&s->session_mtx, MA_OWNED);
 
 	/* Can we read from the L2CAP socket? */
 	if (!soreadable(s->l2so))
 		return (0);
 
 	/* First check for error on L2CAP socket */
 	if ((error = s->l2so->so_error) != 0) {
 		s->l2so->so_error = 0;
 
 		NG_BTSOCKET_RFCOMM_ERR(
 "%s: Could not receive data from L2CAP socket, error=%d, state=%d, flags=%#x\n",
 			__func__, error, s->state, s->flags);
 
 		return (error);
 	}
 
 	/*
 	 * Read all packets from the L2CAP socket. 
 	 * XXX FIXME/VERIFY is that correct? For now use m->m_nextpkt as
 	 * indication that there is more packets on the socket's buffer.
 	 * Also what should we use in uio.uio_resid?
 	 * May be s->mtu + sizeof(struct rfcomm_frame_hdr) + 1 + 1?
 	 */
 
 	for (more = 1; more; ) {
 		/* Try to get next packet from socket */
 		bzero(&uio, sizeof(uio));
 /*		uio.uio_td = NULL; */
 		uio.uio_resid = 1000000000;
 		flags = MSG_DONTWAIT;
 
 		m = NULL;
 		error = soreceive(s->l2so, NULL, &uio, &m,
 		    (struct mbuf **) NULL, &flags);
 		if (error != 0) {
 			if (error == EWOULDBLOCK)
 				return (0); /* XXX can happen? */
 
 			NG_BTSOCKET_RFCOMM_ERR(
 "%s: Could not receive data from L2CAP socket, error=%d\n", __func__, error);
 
 			return (error);
 		}
 	
 		more = (m->m_nextpkt != NULL);
 		m->m_nextpkt = NULL;
 
 		ng_btsocket_rfcomm_receive_frame(s, m);
 	}
 
 	return (0);
 } /* ng_btsocket_rfcomm_session_receive */
 
 /*
  * Send data on RFCOMM session
  * XXX FIXME locking for "l2so"?
  */
 
 static int
 ng_btsocket_rfcomm_session_send(ng_btsocket_rfcomm_session_p s)
 {
 	struct mbuf	*m = NULL;
 	int		 error;
 
 	mtx_assert(&s->session_mtx, MA_OWNED);
 
 	/* Send as much as we can from the session queue */
 	while (sowriteable(s->l2so)) {
 		/* Check if socket still OK */
 		if ((error = s->l2so->so_error) != 0) {
 			s->l2so->so_error = 0;
 
 			NG_BTSOCKET_RFCOMM_ERR(
 "%s: Detected error=%d on L2CAP socket, state=%d, flags=%#x\n",
 				__func__, error, s->state, s->flags);
 
 			return (error);
 		}
 
 		NG_BT_MBUFQ_DEQUEUE(&s->outq, m);
 		if (m == NULL)
 			return (0); /* we are done */
 
 		/* Call send function on the L2CAP socket */
 		error = (*s->l2so->so_proto->pr_usrreqs->pru_send)(s->l2so,
 				0, m, NULL, NULL, curthread /* XXX */);
 		if (error != 0) {
 			NG_BTSOCKET_RFCOMM_ERR(
 "%s: Could not send data to L2CAP socket, error=%d\n", __func__, error);
 
 			return (error);
 		}
 	}
 
 	return (0);
 } /* ng_btsocket_rfcomm_session_send */
 
 /*
  * Close and disconnect all DLCs for the given session. Caller must hold 
  * s->sesson_mtx. Will wakeup session.
  */
 
 static void
 ng_btsocket_rfcomm_session_clean(ng_btsocket_rfcomm_session_p s)
 {
 	ng_btsocket_rfcomm_pcb_p	pcb = NULL, pcb_next = NULL;
 	int				error;
 
 	mtx_assert(&s->session_mtx, MA_OWNED);
 
 	/*
 	 * Note: cannot use LIST_FOREACH because ng_btsocket_rfcomm_pcb_kill
 	 * will unlink DLC from the session
 	 */
 
 	for (pcb = LIST_FIRST(&s->dlcs); pcb != NULL; ) {
 		mtx_lock(&pcb->pcb_mtx);
 		pcb_next = LIST_NEXT(pcb, session_next);
 
 		NG_BTSOCKET_RFCOMM_INFO(
 "%s: Disconnecting dlci=%d, state=%d, flags=%#x\n",
 			__func__, pcb->dlci, pcb->state, pcb->flags);
 
 		if (pcb->state == NG_BTSOCKET_RFCOMM_DLC_CONNECTED)
 			error = ECONNRESET;
 		else
 			error = ECONNREFUSED;
 
 		ng_btsocket_rfcomm_pcb_kill(pcb, error);
 
 		mtx_unlock(&pcb->pcb_mtx);
 		pcb = pcb_next;
 	}
 } /* ng_btsocket_rfcomm_session_clean */
 
 /*
  * Process all DLCs on the session. Caller MUST hold s->session_mtx.
  */
 
 static void
 ng_btsocket_rfcomm_session_process_pcb(ng_btsocket_rfcomm_session_p s)
 {
 	ng_btsocket_rfcomm_pcb_p	pcb = NULL, pcb_next = NULL;
 	int				error;
 
 	mtx_assert(&s->session_mtx, MA_OWNED);
 
 	/*
 	 * Note: cannot use LIST_FOREACH because ng_btsocket_rfcomm_pcb_kill
 	 * will unlink DLC from the session
 	 */
 
 	for (pcb = LIST_FIRST(&s->dlcs); pcb != NULL; ) {
 		mtx_lock(&pcb->pcb_mtx);
 		pcb_next = LIST_NEXT(pcb, session_next);
 
 		switch (pcb->state) {
 
 		/*
 		 * If DLC in W4_CONNECT state then we should check for both
 		 * timeout and detach.
 		 */
 
 		case NG_BTSOCKET_RFCOMM_DLC_W4_CONNECT:
 			if (pcb->flags & NG_BTSOCKET_RFCOMM_DLC_DETACHED)
 				ng_btsocket_rfcomm_pcb_kill(pcb, 0);
 			else if (pcb->flags & NG_BTSOCKET_RFCOMM_DLC_TIMEDOUT)
 				ng_btsocket_rfcomm_pcb_kill(pcb, ETIMEDOUT);
 			break;
 
 		/*
 		 * If DLC in CONFIGURING or CONNECTING state then we only
 		 * should check for timeout. If detach() was called then
 		 * DLC will be moved into DISCONNECTING state.
 		 */
 
 		case NG_BTSOCKET_RFCOMM_DLC_CONFIGURING:
 		case NG_BTSOCKET_RFCOMM_DLC_CONNECTING:
 			if (pcb->flags & NG_BTSOCKET_RFCOMM_DLC_TIMEDOUT)
 				ng_btsocket_rfcomm_pcb_kill(pcb, ETIMEDOUT);
 			break;
 
 		/*
 		 * If DLC in CONNECTED state then we need to send data (if any)
 		 * from the socket's send queue. Note that we will send data
 		 * from either all sockets or none. This may overload session's
 		 * outgoing queue (but we do not check for that).
 		 *
  		 * XXX FIXME need scheduler for RFCOMM sockets
 		 */
 
 		case NG_BTSOCKET_RFCOMM_DLC_CONNECTED:
 			error = ng_btsocket_rfcomm_pcb_send(pcb, ALOT);
 			if (error != 0)
 				ng_btsocket_rfcomm_pcb_kill(pcb, error);
 			break;
 
 		/*
 		 * If DLC in DISCONNECTING state then we must send DISC frame.
 		 * Note that if DLC has timeout set then we do not need to 
 		 * resend DISC frame.
 		 *
 		 * XXX FIXME need to drain all data from the socket's queue
 		 * if LINGER option was set
 		 */
 
 		case NG_BTSOCKET_RFCOMM_DLC_DISCONNECTING:
 			if (!(pcb->flags & NG_BTSOCKET_RFCOMM_DLC_TIMO)) {
 				error = ng_btsocket_rfcomm_send_command(
 						pcb->session, RFCOMM_FRAME_DISC,
 						pcb->dlci);
 				if (error == 0)
 					ng_btsocket_rfcomm_timeout(pcb);
 				else
 					ng_btsocket_rfcomm_pcb_kill(pcb, error);
 			} else if (pcb->flags & NG_BTSOCKET_RFCOMM_DLC_TIMEDOUT)
 				ng_btsocket_rfcomm_pcb_kill(pcb, ETIMEDOUT);
 			break;
 		
 /*		case NG_BTSOCKET_RFCOMM_DLC_CLOSED: */
 		default:
 			panic("%s: Invalid DLC state=%d, flags=%#x\n",
 				__func__, pcb->state, pcb->flags);
 			break;
 		}
 
 		mtx_unlock(&pcb->pcb_mtx);
 		pcb = pcb_next;
 	}
 } /* ng_btsocket_rfcomm_session_process_pcb */
 
 /*
  * Find RFCOMM session between "src" and "dst".
  * Caller MUST hold ng_btsocket_rfcomm_sessions_mtx.
  */
 
 static ng_btsocket_rfcomm_session_p
 ng_btsocket_rfcomm_session_by_addr(bdaddr_p src, bdaddr_p dst)
 {
 	ng_btsocket_rfcomm_session_p	s = NULL;
 	ng_btsocket_l2cap_pcb_p		l2pcb = NULL;
 	int				any_src;
 
 	mtx_assert(&ng_btsocket_rfcomm_sessions_mtx, MA_OWNED);
 
 	any_src = (bcmp(src, NG_HCI_BDADDR_ANY, sizeof(*src)) == 0);
 
 	LIST_FOREACH(s, &ng_btsocket_rfcomm_sessions, next) {
 		l2pcb = so2l2cap_pcb(s->l2so);
 
 		if ((any_src || bcmp(&l2pcb->src, src, sizeof(*src)) == 0) &&
 		    bcmp(&l2pcb->dst, dst, sizeof(*dst)) == 0)
 			break;
 	}
 
 	return (s);
 } /* ng_btsocket_rfcomm_session_by_addr */
 
 /*****************************************************************************
  *****************************************************************************
  **                                  RFCOMM 
  *****************************************************************************
  *****************************************************************************/
 
 /*
  * Process incoming RFCOMM frame. Caller must hold s->session_mtx.
  * XXX FIXME check frame length
  */
 
 static int
 ng_btsocket_rfcomm_receive_frame(ng_btsocket_rfcomm_session_p s,
 		struct mbuf *m0)
 {
 	struct rfcomm_frame_hdr	*hdr = NULL;
 	struct mbuf		*m = NULL;
 	u_int16_t		 length;
 	u_int8_t		 dlci, type;
 	int			 error = 0;
 
 	mtx_assert(&s->session_mtx, MA_OWNED);
 
 	/* Pullup as much as we can into first mbuf (for direct access) */
 	length = min(m0->m_pkthdr.len, MHLEN);
 	if (m0->m_len < length) {
 		if ((m0 = m_pullup(m0, length)) == NULL) {
 			NG_BTSOCKET_RFCOMM_ALERT(
 "%s: m_pullup(%d) failed\n", __func__, length);
 
 			return (ENOBUFS);
 		}
 	}
 
 	hdr = mtod(m0, struct rfcomm_frame_hdr *);
 	dlci = RFCOMM_DLCI(hdr->address);
 	type = RFCOMM_TYPE(hdr->control);
 
 	/* Test EA bit in length. If not set then we have 2 bytes of length */
 	if (!RFCOMM_EA(hdr->length)) {
 		bcopy(&hdr->length, &length, sizeof(length));
 		length = le16toh(length) >> 1;
 		m_adj(m0, sizeof(*hdr) + 1);
 	} else {
 		length = hdr->length >> 1;
 		m_adj(m0, sizeof(*hdr));
 	}
 
 	NG_BTSOCKET_RFCOMM_INFO(
 "%s: Got frame type=%#x, dlci=%d, length=%d, cr=%d, pf=%d, len=%d\n",
 		__func__, type, dlci, length, RFCOMM_CR(hdr->address),
 		RFCOMM_PF(hdr->control), m0->m_pkthdr.len);
 
 	/*
 	 * Get FCS (the last byte in the frame)
 	 * XXX this will not work if mbuf chain ends with empty mbuf.
 	 * XXX let's hope it never happens :)
 	 */
 
 	for (m = m0; m->m_next != NULL; m = m->m_next)
 		;
 	if (m->m_len <= 0)
 		panic("%s: Empty mbuf at the end of the chain, len=%d\n",
 			__func__, m->m_len);
 
 	/*
 	 * Check FCS. We only need to calculate FCS on first 2 or 3 bytes
 	 * and already m_pullup'ed mbuf chain, so it should be safe.
 	 */
 
 	if (ng_btsocket_rfcomm_check_fcs((u_int8_t *) hdr, type, m->m_data[m->m_len - 1])) {
 		NG_BTSOCKET_RFCOMM_ERR(
 "%s: Invalid RFCOMM packet. Bad checksum\n", __func__);
 		NG_FREE_M(m0);
 
 		return (EINVAL);
 	}
 
 	m_adj(m0, -1); /* Trim FCS byte */
 
 	/*
 	 * Process RFCOMM frame.
 	 *
 	 * From TS 07.10 spec
 	 * 
 	 * "... In the case where a SABM or DISC command with the P bit set
 	 * to 0 is received then the received frame shall be discarded..."
  	 *
 	 * "... If a unsolicited DM response is received then the frame shall
 	 * be processed irrespective of the P/F setting... "
 	 *
 	 * "... The station may transmit response frames with the F bit set 
 	 * to 0 at any opportunity on an asynchronous basis. However, in the 
 	 * case where a UA response is received with the F bit set to 0 then 
 	 * the received frame shall be discarded..."
 	 *
 	 * From Bluetooth spec
 	 *
 	 * "... When credit based flow control is being used, the meaning of
 	 * the P/F bit in the control field of the RFCOMM header is redefined
 	 * for UIH frames..."
 	 */
 
 	switch (type) {
 	case RFCOMM_FRAME_SABM:
 		if (RFCOMM_PF(hdr->control))
 			error = ng_btsocket_rfcomm_receive_sabm(s, dlci);
 		break;
 
 	case RFCOMM_FRAME_DISC:
 		if (RFCOMM_PF(hdr->control))
 			error = ng_btsocket_rfcomm_receive_disc(s, dlci);
 		break;
 
 	case RFCOMM_FRAME_UA:
 		if (RFCOMM_PF(hdr->control))
 			error = ng_btsocket_rfcomm_receive_ua(s, dlci);
 		break;
 
 	case RFCOMM_FRAME_DM:
 		error = ng_btsocket_rfcomm_receive_dm(s, dlci);
 		break;
 
 	case RFCOMM_FRAME_UIH:
 		if (dlci == 0)
 			error = ng_btsocket_rfcomm_receive_mcc(s, m0);
 		else
 			error = ng_btsocket_rfcomm_receive_uih(s, dlci,
 					RFCOMM_PF(hdr->control), m0);
 
 		return (error);
 		/* NOT REACHED */
 
 	default:
 		NG_BTSOCKET_RFCOMM_ERR(
 "%s: Invalid RFCOMM packet. Unknown type=%#x\n", __func__, type);
 		error = EINVAL;
 		break;
 	}
 
 	NG_FREE_M(m0);
 
 	return (error);
 } /* ng_btsocket_rfcomm_receive_frame */
 
 /*
  * Process RFCOMM SABM frame
  */
 
 static int
 ng_btsocket_rfcomm_receive_sabm(ng_btsocket_rfcomm_session_p s, int dlci)
 {
 	ng_btsocket_rfcomm_pcb_p	pcb = NULL;
 	int				error = 0;
 
 	mtx_assert(&s->session_mtx, MA_OWNED);
 
 	NG_BTSOCKET_RFCOMM_INFO(
 "%s: Got SABM, session state=%d, flags=%#x, mtu=%d, dlci=%d\n",
 		__func__, s->state, s->flags, s->mtu, dlci);
 
 	/* DLCI == 0 means open multiplexor channel */
 	if (dlci == 0) {
 		switch (s->state) {
 		case NG_BTSOCKET_RFCOMM_SESSION_CONNECTED:
 		case NG_BTSOCKET_RFCOMM_SESSION_OPEN:
 			error = ng_btsocket_rfcomm_send_command(s,
 					RFCOMM_FRAME_UA, dlci);
 			if (error == 0) {
 				s->state = NG_BTSOCKET_RFCOMM_SESSION_OPEN;
 				ng_btsocket_rfcomm_connect_cfm(s);
 			} else {
 				s->state = NG_BTSOCKET_RFCOMM_SESSION_CLOSED;
 				ng_btsocket_rfcomm_session_clean(s);
 			}
 			break;
 
 		default:
 			NG_BTSOCKET_RFCOMM_WARN(
 "%s: Got SABM for session in invalid state state=%d, flags=%#x\n",
 				__func__, s->state, s->flags);
 			error = EINVAL;
 			break;
 		}
 
 		return (error);
 	}
 
 	/* Make sure multiplexor channel is open */
 	if (s->state != NG_BTSOCKET_RFCOMM_SESSION_OPEN) {
 		NG_BTSOCKET_RFCOMM_ERR(
 "%s: Got SABM for dlci=%d with mulitplexor channel closed, state=%d, " \
 "flags=%#x\n",		__func__, dlci, s->state, s->flags);
 
 		return (EINVAL);
 	}
 
 	/*
 	 * Check if we have this DLCI. This might happen when remote
 	 * peer uses PN command before actual open (SABM) happens.
 	 */
 
 	pcb = ng_btsocket_rfcomm_pcb_by_dlci(s, dlci);
 	if (pcb != NULL) {
 		mtx_lock(&pcb->pcb_mtx);
 
 		if (pcb->state != NG_BTSOCKET_RFCOMM_DLC_CONNECTING) {
 			NG_BTSOCKET_RFCOMM_ERR(
 "%s: Got SABM for dlci=%d in invalid state=%d, flags=%#x\n",
 				__func__, dlci, pcb->state, pcb->flags);
 			mtx_unlock(&pcb->pcb_mtx);
 
 			return (ENOENT);
 		}
 
 		ng_btsocket_rfcomm_untimeout(pcb);
 
 		error = ng_btsocket_rfcomm_send_command(s,RFCOMM_FRAME_UA,dlci);
 		if (error == 0)
 			error = ng_btsocket_rfcomm_send_msc(pcb);
 
 		if (error == 0) {
 			pcb->state = NG_BTSOCKET_RFCOMM_DLC_CONNECTED;
 			soisconnected(pcb->so);
 		} else
 			ng_btsocket_rfcomm_pcb_kill(pcb, error);
 
 		mtx_unlock(&pcb->pcb_mtx);
 
 		return (error);
 	}
 
 	/*
 	 * We do not have requested DLCI, so it must be an incoming connection
 	 * with default parameters. Try to accept it.
 	 */ 
 
 	pcb = ng_btsocket_rfcomm_connect_ind(s, RFCOMM_SRVCHANNEL(dlci));
 	if (pcb != NULL) {
 		mtx_lock(&pcb->pcb_mtx);
 
 		pcb->dlci = dlci;
 
 		error = ng_btsocket_rfcomm_send_command(s,RFCOMM_FRAME_UA,dlci);
 		if (error == 0)
 			error = ng_btsocket_rfcomm_send_msc(pcb);
 
 		if (error == 0) {
 			pcb->state = NG_BTSOCKET_RFCOMM_DLC_CONNECTED;
 			soisconnected(pcb->so);
 		} else
 			ng_btsocket_rfcomm_pcb_kill(pcb, error);
 
 		mtx_unlock(&pcb->pcb_mtx);
 	} else
 		/* Nobody is listen()ing on the requested DLCI */
 		error = ng_btsocket_rfcomm_send_command(s,RFCOMM_FRAME_DM,dlci);
 
 	return (error);
 } /* ng_btsocket_rfcomm_receive_sabm */
 
 /*
  * Process RFCOMM DISC frame
  */
 
 static int
 ng_btsocket_rfcomm_receive_disc(ng_btsocket_rfcomm_session_p s, int dlci)
 {
 	ng_btsocket_rfcomm_pcb_p	pcb = NULL;
 	int				error = 0;
 
 	mtx_assert(&s->session_mtx, MA_OWNED);
 
 	NG_BTSOCKET_RFCOMM_INFO(
 "%s: Got DISC, session state=%d, flags=%#x, mtu=%d, dlci=%d\n",
 		__func__, s->state, s->flags, s->mtu, dlci);
 
 	/* DLCI == 0 means close multiplexor channel */
 	if (dlci == 0) {
 		/* XXX FIXME assume that remote side will close the socket */
 		error = ng_btsocket_rfcomm_send_command(s, RFCOMM_FRAME_UA, 0);
 		if (error == 0) {
 			if (s->state == NG_BTSOCKET_RFCOMM_SESSION_DISCONNECTING)
 				s->state = NG_BTSOCKET_RFCOMM_SESSION_CLOSED; /* XXX */
 			else
 				s->state = NG_BTSOCKET_RFCOMM_SESSION_DISCONNECTING;
 		} else
 			s->state = NG_BTSOCKET_RFCOMM_SESSION_CLOSED; /* XXX */
 
 		ng_btsocket_rfcomm_session_clean(s);
 	} else {
 		pcb = ng_btsocket_rfcomm_pcb_by_dlci(s, dlci);
 		if (pcb != NULL) {
 			int	err;
 
 			mtx_lock(&pcb->pcb_mtx);
 
 			NG_BTSOCKET_RFCOMM_INFO(
 "%s: Got DISC for dlci=%d, state=%d, flags=%#x\n",
 				__func__, dlci, pcb->state, pcb->flags);
 
 			error = ng_btsocket_rfcomm_send_command(s,
 					RFCOMM_FRAME_UA, dlci);
 
 			if (pcb->state == NG_BTSOCKET_RFCOMM_DLC_CONNECTED)
 				err = 0;
 			else
 				err = ECONNREFUSED;
 
 			ng_btsocket_rfcomm_pcb_kill(pcb, err);
 
 			mtx_unlock(&pcb->pcb_mtx);
 		} else {
 			NG_BTSOCKET_RFCOMM_WARN(
 "%s: Got DISC for non-existing dlci=%d\n", __func__, dlci);
 
 			error = ng_btsocket_rfcomm_send_command(s,
 					RFCOMM_FRAME_DM, dlci);
 		}
 	}
 
 	return (error);
 } /* ng_btsocket_rfcomm_receive_disc */
 
 /*
  * Process RFCOMM UA frame
  */
 
 static int
 ng_btsocket_rfcomm_receive_ua(ng_btsocket_rfcomm_session_p s, int dlci)
 {
 	ng_btsocket_rfcomm_pcb_p	pcb = NULL;
 	int				error = 0;
 
 	mtx_assert(&s->session_mtx, MA_OWNED);
 
 	NG_BTSOCKET_RFCOMM_INFO(
 "%s: Got UA, session state=%d, flags=%#x, mtu=%d, dlci=%d\n",
 		__func__, s->state, s->flags, s->mtu, dlci);
 
 	/* dlci == 0 means multiplexor channel */
 	if (dlci == 0) {
 		switch (s->state) {
 		case NG_BTSOCKET_RFCOMM_SESSION_CONNECTED:
 			s->state = NG_BTSOCKET_RFCOMM_SESSION_OPEN;
 			ng_btsocket_rfcomm_connect_cfm(s);
 			break;
 
 		case NG_BTSOCKET_RFCOMM_SESSION_DISCONNECTING:
 			s->state = NG_BTSOCKET_RFCOMM_SESSION_CLOSED;
 			ng_btsocket_rfcomm_session_clean(s);
 			break;
 
 		default:
 			NG_BTSOCKET_RFCOMM_WARN(
 "%s: Got UA for session in invalid state=%d(%d), flags=%#x, mtu=%d\n",
 				__func__, s->state, INITIATOR(s), s->flags,
 				s->mtu);
 			error = ENOENT;
 			break;
 		}
 
 		return (error);
 	}
 
 	/* Check if we have this DLCI */
 	pcb = ng_btsocket_rfcomm_pcb_by_dlci(s, dlci);
 	if (pcb != NULL) {
 		mtx_lock(&pcb->pcb_mtx);
 
 		NG_BTSOCKET_RFCOMM_INFO(
 "%s: Got UA for dlci=%d, state=%d, flags=%#x\n",
 			__func__, dlci, pcb->state, pcb->flags);
 
 		switch (pcb->state) {
 		case NG_BTSOCKET_RFCOMM_DLC_CONNECTING:
 			ng_btsocket_rfcomm_untimeout(pcb);
 
 			error = ng_btsocket_rfcomm_send_msc(pcb);
 			if (error == 0) {
 				pcb->state = NG_BTSOCKET_RFCOMM_DLC_CONNECTED;
 				soisconnected(pcb->so);
 			}
 			break;
 
 		case NG_BTSOCKET_RFCOMM_DLC_DISCONNECTING:
 			ng_btsocket_rfcomm_pcb_kill(pcb, 0);
 			break;
 
 		default:
 			NG_BTSOCKET_RFCOMM_WARN(
 "%s: Got UA for dlci=%d in invalid state=%d, flags=%#x\n",
 				__func__, dlci, pcb->state, pcb->flags);
 			error = ENOENT;
 			break;
 		}
 
 		mtx_unlock(&pcb->pcb_mtx);
 	} else {
 		NG_BTSOCKET_RFCOMM_WARN(
 "%s: Got UA for non-existing dlci=%d\n", __func__, dlci);
 
 		error = ng_btsocket_rfcomm_send_command(s,RFCOMM_FRAME_DM,dlci);
 	}
 
 	return (error);
 } /* ng_btsocket_rfcomm_receive_ua */
 
 /*
  * Process RFCOMM DM frame
  */
 
 static int
 ng_btsocket_rfcomm_receive_dm(ng_btsocket_rfcomm_session_p s, int dlci)
 {
 	ng_btsocket_rfcomm_pcb_p	pcb = NULL;
 	int				error;
 
 	mtx_assert(&s->session_mtx, MA_OWNED);
 
 	NG_BTSOCKET_RFCOMM_INFO(
 "%s: Got DM, session state=%d, flags=%#x, mtu=%d, dlci=%d\n",
 		__func__, s->state, s->flags, s->mtu, dlci);
 
 	/* DLCI == 0 means multiplexor channel */
 	if (dlci == 0) {
 		/* Disconnect all dlc's on the session */
 		s->state = NG_BTSOCKET_RFCOMM_SESSION_CLOSED;
 		ng_btsocket_rfcomm_session_clean(s);
 	} else {
 		pcb = ng_btsocket_rfcomm_pcb_by_dlci(s, dlci);
 		if (pcb != NULL) {
 			mtx_lock(&pcb->pcb_mtx);
 
 			NG_BTSOCKET_RFCOMM_INFO(
 "%s: Got DM for dlci=%d, state=%d, flags=%#x\n",
 				__func__, dlci, pcb->state, pcb->flags);
 
 			if (pcb->state == NG_BTSOCKET_RFCOMM_DLC_CONNECTED)
 				error = ECONNRESET;
 			else
 				error = ECONNREFUSED;
 
 			ng_btsocket_rfcomm_pcb_kill(pcb, error);
 
 			mtx_unlock(&pcb->pcb_mtx);
 		} else
 			NG_BTSOCKET_RFCOMM_WARN(
 "%s: Got DM for non-existing dlci=%d\n", __func__, dlci);
 	}
 
 	return (0);
 } /* ng_btsocket_rfcomm_receive_dm */
 
 /*
  * Process RFCOMM UIH frame (data)
  */
 
 static int
 ng_btsocket_rfcomm_receive_uih(ng_btsocket_rfcomm_session_p s, int dlci,
 		int pf, struct mbuf *m0)
 {
 	ng_btsocket_rfcomm_pcb_p	pcb = NULL;
 	int				error = 0;
 
 	mtx_assert(&s->session_mtx, MA_OWNED);
 
 	NG_BTSOCKET_RFCOMM_INFO(
 "%s: Got UIH, session state=%d, flags=%#x, mtu=%d, dlci=%d, pf=%d, len=%d\n",
 		__func__, s->state, s->flags, s->mtu, dlci, pf,
 		m0->m_pkthdr.len);
 
 	/* XXX should we do it here? Check for session flow control */
 	if (s->flags & NG_BTSOCKET_RFCOMM_SESSION_LFC) {
 		NG_BTSOCKET_RFCOMM_WARN(
 "%s: Got UIH with session flow control asserted, state=%d, flags=%#x\n",
 			__func__, s->state, s->flags);
 		goto drop;
 	}
 
 	/* Check if we have this dlci */
 	pcb = ng_btsocket_rfcomm_pcb_by_dlci(s, dlci);
 	if (pcb == NULL) {
 		NG_BTSOCKET_RFCOMM_WARN(
 "%s: Got UIH for non-existing dlci=%d\n", __func__, dlci);
 		error = ng_btsocket_rfcomm_send_command(s,RFCOMM_FRAME_DM,dlci);
 		goto drop;
 	}
 
 	mtx_lock(&pcb->pcb_mtx);
 
 	/* Check dlci state */	
 	if (pcb->state != NG_BTSOCKET_RFCOMM_DLC_CONNECTED) {
 		NG_BTSOCKET_RFCOMM_WARN(
 "%s: Got UIH for dlci=%d in invalid state=%d, flags=%#x\n",
 			__func__, dlci, pcb->state, pcb->flags);
 		error = EINVAL;
 		goto drop1;
 	}
 
 	/* Check dlci flow control */
 	if (((pcb->flags & NG_BTSOCKET_RFCOMM_DLC_CFC) && pcb->rx_cred <= 0) ||
 	     (pcb->lmodem & RFCOMM_MODEM_FC)) {
 		NG_BTSOCKET_RFCOMM_ERR(
 "%s: Got UIH for dlci=%d with asserted flow control, state=%d, " \
 "flags=%#x, rx_cred=%d, lmodem=%#x\n",
 			__func__, dlci, pcb->state, pcb->flags,
 			pcb->rx_cred, pcb->lmodem);
 		goto drop1;
 	}
 
 	/* Did we get any credits? */
 	if ((pcb->flags & NG_BTSOCKET_RFCOMM_DLC_CFC) && pf) {
 		NG_BTSOCKET_RFCOMM_INFO(
 "%s: Got %d more credits for dlci=%d, state=%d, flags=%#x, " \
 "rx_cred=%d, tx_cred=%d\n",
 			__func__, *mtod(m0, u_int8_t *), dlci, pcb->state, 
 			pcb->flags, pcb->rx_cred, pcb->tx_cred);
 
 		pcb->tx_cred += *mtod(m0, u_int8_t *);
 		m_adj(m0, 1);
 
 		/* Send more from the DLC. XXX check for errors? */
 		ng_btsocket_rfcomm_pcb_send(pcb, ALOT);
 	} 
 
 	/* OK the of the rest of the mbuf is the data */
 	if (m0->m_pkthdr.len > 0) {
 		/* If we are using credit flow control decrease rx_cred here */
 		if (pcb->flags & NG_BTSOCKET_RFCOMM_DLC_CFC) {
 			/* Give remote peer more credits (if needed) */
 			if (-- pcb->rx_cred <= RFCOMM_MAX_CREDITS / 2)
 				ng_btsocket_rfcomm_send_credits(pcb);
 			else
 				NG_BTSOCKET_RFCOMM_INFO(
 "%s: Remote side still has credits, dlci=%d, state=%d, flags=%#x, " \
 "rx_cred=%d, tx_cred=%d\n",		__func__, dlci, pcb->state, pcb->flags,
 					pcb->rx_cred, pcb->tx_cred);
 		}
 		
 		/* Check packet against mtu on dlci */
 		if (m0->m_pkthdr.len > pcb->mtu) {
 			NG_BTSOCKET_RFCOMM_ERR(
 "%s: Got oversized UIH for dlci=%d, state=%d, flags=%#x, mtu=%d, len=%d\n",
 				__func__, dlci, pcb->state, pcb->flags,
 				pcb->mtu, m0->m_pkthdr.len);
 
 			error = EMSGSIZE;
 		} else if (m0->m_pkthdr.len > sbspace(&pcb->so->so_rcv)) {
  
 			/*
 			 * This is really bad. Receive queue on socket does
 			 * not have enough space for the packet. We do not
 			 * have any other choice but drop the packet. 
 			 */
  
 			NG_BTSOCKET_RFCOMM_ERR(
 "%s: Not enough space in socket receive queue. Dropping UIH for dlci=%d, " \
 "state=%d, flags=%#x, len=%d, space=%ld\n",
 				__func__, dlci, pcb->state, pcb->flags,
 				m0->m_pkthdr.len, sbspace(&pcb->so->so_rcv));
 
 			error = ENOBUFS;
 		} else {
 			/* Append packet to the socket receive queue */
 			sbappend(&pcb->so->so_rcv, m0, 0);
 			m0 = NULL;
 
 			sorwakeup(pcb->so);
 		}
 	}
 drop1:
 	mtx_unlock(&pcb->pcb_mtx);
 drop:
 	NG_FREE_M(m0); /* checks for != NULL */
 
 	return (error);
 } /* ng_btsocket_rfcomm_receive_uih */
 
 /*
  * Process RFCOMM MCC command (Multiplexor)
  * 
  * From TS 07.10 spec
  *
  * "5.4.3.1 Information Data
  * 
  *  ...The frames (UIH) sent by the initiating station have the C/R bit set 
  *  to 1 and those sent by the responding station have the C/R bit set to 0..."
  *
  * "5.4.6.2 Operating procedures
  *
  *  Messages always exist in pairs; a command message and a corresponding 
  *  response message. If the C/R bit is set to 1 the message is a command, 
  *  if it is set to 0 the message is a response...
  *
  *  ...
  * 
  *  NOTE: Notice that when UIH frames are used to convey information on DLCI 0
  *  there are at least two different fields that contain a C/R bit, and the 
  *  bits are set of different form. The C/R bit in the Type field shall be set
  *  as it is stated above, while the C/R bit in the Address field (see subclause
  *  5.2.1.2) shall be set as it is described in subclause 5.4.3.1."
  */
 
 static int
 ng_btsocket_rfcomm_receive_mcc(ng_btsocket_rfcomm_session_p s, struct mbuf *m0)
 {
 	struct rfcomm_mcc_hdr	*hdr = NULL;
 	u_int8_t		 cr, type, length;
 
 	mtx_assert(&s->session_mtx, MA_OWNED);
 
 	/*
 	 * We can access data directly in the first mbuf, because we have
 	 * m_pullup()'ed mbuf chain in ng_btsocket_rfcomm_receive_frame().
 	 * All MCC commands should fit into single mbuf (except probably TEST).
 	 */
 
 	hdr = mtod(m0, struct rfcomm_mcc_hdr *);
 	cr = RFCOMM_CR(hdr->type);
 	type = RFCOMM_MCC_TYPE(hdr->type);
 	length = RFCOMM_MCC_LENGTH(hdr->length);
 
 	/* Check MCC frame length */
 	if (sizeof(*hdr) + length != m0->m_pkthdr.len) {
 		NG_BTSOCKET_RFCOMM_ERR(
 "%s: Invalid MCC frame length=%d, len=%d\n",
 			__func__, length, m0->m_pkthdr.len);
 		NG_FREE_M(m0);
 
 		return (EMSGSIZE);
 	}
 
 	switch (type) {
 	case RFCOMM_MCC_TEST:
 		return (ng_btsocket_rfcomm_receive_test(s, m0));
 		/* NOT REACHED */
 
 	case RFCOMM_MCC_FCON:
 	case RFCOMM_MCC_FCOFF:
 		return (ng_btsocket_rfcomm_receive_fc(s, m0));
 		/* NOT REACHED */
 
 	case RFCOMM_MCC_MSC:
 		return (ng_btsocket_rfcomm_receive_msc(s, m0));
 		/* NOT REACHED */
 
 	case RFCOMM_MCC_RPN:
 		return (ng_btsocket_rfcomm_receive_rpn(s, m0));
 		/* NOT REACHED */
 
 	case RFCOMM_MCC_RLS:
 		return (ng_btsocket_rfcomm_receive_rls(s, m0));
 		/* NOT REACHED */
 
 	case RFCOMM_MCC_PN:
 		return (ng_btsocket_rfcomm_receive_pn(s, m0));
 		/* NOT REACHED */
 
 	case RFCOMM_MCC_NSC:
 		NG_BTSOCKET_RFCOMM_ERR(
 "%s: Got MCC NSC, type=%#x, cr=%d, length=%d, session state=%d, flags=%#x, " \
 "mtu=%d, len=%d\n",	__func__, RFCOMM_MCC_TYPE(*((u_int8_t *)(hdr + 1))), cr,
 			 length, s->state, s->flags, s->mtu, m0->m_pkthdr.len);
 		NG_FREE_M(m0);
 		break;
 
 	default:
 		NG_BTSOCKET_RFCOMM_ERR(
 "%s: Got unknown MCC, type=%#x, cr=%d, length=%d, session state=%d, " \
 "flags=%#x, mtu=%d, len=%d\n",
 			__func__, type, cr, length, s->state, s->flags,
 			s->mtu, m0->m_pkthdr.len);
 
 		/* Reuse mbuf to send NSC */
 		hdr = mtod(m0, struct rfcomm_mcc_hdr *);
 		m0->m_pkthdr.len = m0->m_len = sizeof(*hdr);
 
 		/* Create MCC NSC header */
 		hdr->type = RFCOMM_MKMCC_TYPE(0, RFCOMM_MCC_NSC);
 		hdr->length = RFCOMM_MKLEN8(1);
 
 		/* Put back MCC command type we did not like */
 		m0->m_data[m0->m_len] = RFCOMM_MKMCC_TYPE(cr, type);
 		m0->m_pkthdr.len ++;
 		m0->m_len ++;
 
 		/* Send UIH frame */
 		return (ng_btsocket_rfcomm_send_uih(s,
 				RFCOMM_MKADDRESS(INITIATOR(s), 0), 0, 0, m0));
 		/* NOT REACHED */
 	}
 
 	return (0);
 } /* ng_btsocket_rfcomm_receive_mcc */
 
 /*
  * Receive RFCOMM TEST MCC command
  */
 
 static int
 ng_btsocket_rfcomm_receive_test(ng_btsocket_rfcomm_session_p s, struct mbuf *m0)
 {
 	struct rfcomm_mcc_hdr	*hdr = mtod(m0, struct rfcomm_mcc_hdr *);
 	int			 error = 0;
 
 	mtx_assert(&s->session_mtx, MA_OWNED);
 
 	NG_BTSOCKET_RFCOMM_INFO(
 "%s: Got MCC TEST, cr=%d, length=%d, session state=%d, flags=%#x, mtu=%d, " \
 "len=%d\n",	__func__, RFCOMM_CR(hdr->type), RFCOMM_MCC_LENGTH(hdr->length),
 		s->state, s->flags, s->mtu, m0->m_pkthdr.len);
 
 	if (RFCOMM_CR(hdr->type)) {
 		hdr->type = RFCOMM_MKMCC_TYPE(0, RFCOMM_MCC_TEST);
 		error = ng_btsocket_rfcomm_send_uih(s,
 				RFCOMM_MKADDRESS(INITIATOR(s), 0), 0, 0, m0);
 	} else
 		NG_FREE_M(m0); /* XXX ignore response */
 
 	return (error);
 } /* ng_btsocket_rfcomm_receive_test */
 
 /*
  * Receive RFCOMM FCON/FCOFF MCC command
  */
 
 static int
 ng_btsocket_rfcomm_receive_fc(ng_btsocket_rfcomm_session_p s, struct mbuf *m0)
 {
 	struct rfcomm_mcc_hdr	*hdr = mtod(m0, struct rfcomm_mcc_hdr *);
 	u_int8_t		 type = RFCOMM_MCC_TYPE(hdr->type);
 	int			 error = 0;
 
 	mtx_assert(&s->session_mtx, MA_OWNED);
 
 	/*
 	 * Turn ON/OFF aggregate flow on the entire session. When remote peer 
 	 * asserted flow control no transmission shall occur except on dlci 0
 	 * (control channel).
 	 */
 
 	NG_BTSOCKET_RFCOMM_INFO(
 "%s: Got MCC FC%s, cr=%d, length=%d, session state=%d, flags=%#x, mtu=%d, " \
 "len=%d\n",	__func__, (type == RFCOMM_MCC_FCON)? "ON" : "OFF",
 		RFCOMM_CR(hdr->type), RFCOMM_MCC_LENGTH(hdr->length),
 		s->state, s->flags, s->mtu, m0->m_pkthdr.len);
 
 	if (RFCOMM_CR(hdr->type)) {
 		if (type == RFCOMM_MCC_FCON)
 			s->flags &= ~NG_BTSOCKET_RFCOMM_SESSION_RFC;
 		else
 			s->flags |= NG_BTSOCKET_RFCOMM_SESSION_RFC;
 
 		hdr->type = RFCOMM_MKMCC_TYPE(0, type);
 		error = ng_btsocket_rfcomm_send_uih(s,
 				RFCOMM_MKADDRESS(INITIATOR(s), 0), 0, 0, m0);
 	} else
 		NG_FREE_M(m0); /* XXX ignore response */
 
 	return (error);
 } /* ng_btsocket_rfcomm_receive_fc  */
 
 /*
  * Receive RFCOMM MSC MCC command
  */
 
 static int
 ng_btsocket_rfcomm_receive_msc(ng_btsocket_rfcomm_session_p s, struct mbuf *m0)
 {
 	struct rfcomm_mcc_hdr		*hdr = mtod(m0, struct rfcomm_mcc_hdr*);
 	struct rfcomm_mcc_msc		*msc = (struct rfcomm_mcc_msc *)(hdr+1);
 	ng_btsocket_rfcomm_pcb_t	*pcb = NULL;
 	int				 error = 0;
 
 	mtx_assert(&s->session_mtx, MA_OWNED);
 
 	NG_BTSOCKET_RFCOMM_INFO(
 "%s: Got MCC MSC, dlci=%d, cr=%d, length=%d, session state=%d, flags=%#x, " \
 "mtu=%d, len=%d\n",
 		__func__,  RFCOMM_DLCI(msc->address), RFCOMM_CR(hdr->type),
 		RFCOMM_MCC_LENGTH(hdr->length), s->state, s->flags,
 		s->mtu, m0->m_pkthdr.len);
 
 	if (RFCOMM_CR(hdr->type)) {
 		pcb = ng_btsocket_rfcomm_pcb_by_dlci(s, RFCOMM_DLCI(msc->address));
 		if (pcb == NULL) {
 			NG_BTSOCKET_RFCOMM_WARN(
 "%s: Got MSC command for non-existing dlci=%d\n",
 				__func__, RFCOMM_DLCI(msc->address));
 			NG_FREE_M(m0);
 
 			return (ENOENT);
 		}
 
 		mtx_lock(&pcb->pcb_mtx);
 
 		if (pcb->state != NG_BTSOCKET_RFCOMM_DLC_CONNECTING &&
 		    pcb->state != NG_BTSOCKET_RFCOMM_DLC_CONNECTED) {
 			NG_BTSOCKET_RFCOMM_WARN(
 "%s: Got MSC on dlci=%d in invalid state=%d\n",
 				__func__, RFCOMM_DLCI(msc->address),
 				pcb->state);
 
 			mtx_unlock(&pcb->pcb_mtx);
 			NG_FREE_M(m0);
 
 			return (EINVAL);
 		}
 
 		pcb->rmodem = msc->modem; /* Update remote port signals */
 
 		hdr->type = RFCOMM_MKMCC_TYPE(0, RFCOMM_MCC_MSC);
 		error = ng_btsocket_rfcomm_send_uih(s,
 				RFCOMM_MKADDRESS(INITIATOR(s), 0), 0, 0, m0);
 
 #if 0 /* YYY */
 		/* Send more data from DLC. XXX check for errors? */
 		if (!(pcb->rmodem & RFCOMM_MODEM_FC) &&
 		    !(pcb->flags & NG_BTSOCKET_RFCOMM_DLC_CFC))
 			ng_btsocket_rfcomm_pcb_send(pcb, ALOT);
 #endif /* YYY */
 
 		mtx_unlock(&pcb->pcb_mtx);
 	} else
 		NG_FREE_M(m0); /* XXX ignore response */
 
 	return (error);
 } /* ng_btsocket_rfcomm_receive_msc */
 
 /*
  * Receive RFCOMM RPN MCC command
  * XXX FIXME do we need htole16/le16toh for RPN param_mask?
  */
 
 static int
 ng_btsocket_rfcomm_receive_rpn(ng_btsocket_rfcomm_session_p s, struct mbuf *m0)
 {
 	struct rfcomm_mcc_hdr	*hdr = mtod(m0, struct rfcomm_mcc_hdr *);
 	struct rfcomm_mcc_rpn	*rpn = (struct rfcomm_mcc_rpn *)(hdr + 1);
 	int			 error = 0;
 	u_int16_t		 param_mask;
 	u_int8_t		 bit_rate, data_bits, stop_bits, parity,
 				 flow_control, xon_char, xoff_char;
 
 	mtx_assert(&s->session_mtx, MA_OWNED);
 
 	NG_BTSOCKET_RFCOMM_INFO(
 "%s: Got MCC RPN, dlci=%d, cr=%d, length=%d, session state=%d, flags=%#x, " \
 "mtu=%d, len=%d\n",
 		__func__, RFCOMM_DLCI(rpn->dlci), RFCOMM_CR(hdr->type),
 		RFCOMM_MCC_LENGTH(hdr->length), s->state, s->flags,
 		s->mtu, m0->m_pkthdr.len);
 
 	if (RFCOMM_CR(hdr->type)) {
 		param_mask = RFCOMM_RPN_PM_ALL;
 
 		if (RFCOMM_MCC_LENGTH(hdr->length) == 1) {
 			/* Request - return default setting */
 			bit_rate = RFCOMM_RPN_BR_115200;
 			data_bits = RFCOMM_RPN_DATA_8;
 			stop_bits = RFCOMM_RPN_STOP_1;
 			parity = RFCOMM_RPN_PARITY_NONE;
 			flow_control = RFCOMM_RPN_FLOW_NONE;
 			xon_char = RFCOMM_RPN_XON_CHAR;
 			xoff_char = RFCOMM_RPN_XOFF_CHAR;
                 } else {
 			/*
 			 * Ignore/accept bit_rate, 8 bits, 1 stop bit, no 
 			 * parity, no flow control lines, default XON/XOFF 
 			 * chars.
 			 */
 
 			bit_rate = rpn->bit_rate;
 			rpn->param_mask = le16toh(rpn->param_mask); /* XXX */
 
 			data_bits = RFCOMM_RPN_DATA_BITS(rpn->line_settings);
 			if (rpn->param_mask & RFCOMM_RPN_PM_DATA &&
 			    data_bits != RFCOMM_RPN_DATA_8) {
 				data_bits = RFCOMM_RPN_DATA_8;
 				param_mask ^= RFCOMM_RPN_PM_DATA;
 			}
 
 			stop_bits = RFCOMM_RPN_STOP_BITS(rpn->line_settings);
 			if (rpn->param_mask & RFCOMM_RPN_PM_STOP &&
 			    stop_bits != RFCOMM_RPN_STOP_1) {
 				stop_bits = RFCOMM_RPN_STOP_1;
 				param_mask ^= RFCOMM_RPN_PM_STOP;
 			}
 
 			parity = RFCOMM_RPN_PARITY(rpn->line_settings);
 			if (rpn->param_mask & RFCOMM_RPN_PM_PARITY &&
 			    parity != RFCOMM_RPN_PARITY_NONE) {
 				parity = RFCOMM_RPN_PARITY_NONE;
 				param_mask ^= RFCOMM_RPN_PM_PARITY;
 			}
 
 			flow_control = rpn->flow_control;
 			if (rpn->param_mask & RFCOMM_RPN_PM_FLOW &&
 			    flow_control != RFCOMM_RPN_FLOW_NONE) {
 				flow_control = RFCOMM_RPN_FLOW_NONE;
 				param_mask ^= RFCOMM_RPN_PM_FLOW;
 			}
 
 			xon_char = rpn->xon_char;
 			if (rpn->param_mask & RFCOMM_RPN_PM_XON &&
 			    xon_char != RFCOMM_RPN_XON_CHAR) {
 				xon_char = RFCOMM_RPN_XON_CHAR;
 				param_mask ^= RFCOMM_RPN_PM_XON;
 			}
 
 			xoff_char = rpn->xoff_char;
 			if (rpn->param_mask & RFCOMM_RPN_PM_XOFF &&
 			    xoff_char != RFCOMM_RPN_XOFF_CHAR) {
 				xoff_char = RFCOMM_RPN_XOFF_CHAR;
 				param_mask ^= RFCOMM_RPN_PM_XOFF;
 			}
 		}
 
 		rpn->bit_rate = bit_rate;
 		rpn->line_settings = RFCOMM_MKRPN_LINE_SETTINGS(data_bits, 
 						stop_bits, parity);
 		rpn->flow_control = flow_control;
 		rpn->xon_char = xon_char;
 		rpn->xoff_char = xoff_char;
 		rpn->param_mask = htole16(param_mask); /* XXX */
 
 		m0->m_pkthdr.len = m0->m_len = sizeof(*hdr) + sizeof(*rpn);
 
 		hdr->type = RFCOMM_MKMCC_TYPE(0, RFCOMM_MCC_RPN);
 		error = ng_btsocket_rfcomm_send_uih(s,
 				RFCOMM_MKADDRESS(INITIATOR(s), 0), 0, 0, m0);
 	} else
 		NG_FREE_M(m0); /* XXX ignore response */
 
 	return (error);
 } /* ng_btsocket_rfcomm_receive_rpn */
 
 /*
  * Receive RFCOMM RLS MCC command
  */
 
 static int
 ng_btsocket_rfcomm_receive_rls(ng_btsocket_rfcomm_session_p s, struct mbuf *m0)
 {
 	struct rfcomm_mcc_hdr	*hdr = mtod(m0, struct rfcomm_mcc_hdr *);
 	struct rfcomm_mcc_rls	*rls = (struct rfcomm_mcc_rls *)(hdr + 1);
 	int			 error = 0;
 
 	mtx_assert(&s->session_mtx, MA_OWNED);
 
 	/*
 	 * XXX FIXME Do we have to do anything else here? Remote peer tries to 
 	 * tell us something about DLCI. Just report what we have received and
 	 * return back received values as required by TS 07.10 spec.
 	 */
 
 	NG_BTSOCKET_RFCOMM_INFO(
 "%s: Got MCC RLS, dlci=%d, status=%#x, cr=%d, length=%d, session state=%d, " \
 "flags=%#x, mtu=%d, len=%d\n",
 		__func__, RFCOMM_DLCI(rls->address), rls->status,
 		RFCOMM_CR(hdr->type), RFCOMM_MCC_LENGTH(hdr->length),
 		s->state, s->flags, s->mtu, m0->m_pkthdr.len);
 
 	if (RFCOMM_CR(hdr->type)) {
 		if (rls->status & 0x1)
 			NG_BTSOCKET_RFCOMM_ERR(
 "%s: Got RLS dlci=%d, error=%#x\n", __func__, RFCOMM_DLCI(rls->address),
 				rls->status >> 1);
 
 		hdr->type = RFCOMM_MKMCC_TYPE(0, RFCOMM_MCC_RLS);
 		error = ng_btsocket_rfcomm_send_uih(s,
 				RFCOMM_MKADDRESS(INITIATOR(s), 0), 0, 0, m0);
 	} else
 		NG_FREE_M(m0); /* XXX ignore responses */
 
 	return (error);
 } /* ng_btsocket_rfcomm_receive_rls */
 
 /*
  * Receive RFCOMM PN MCC command
  */
 
 static int
 ng_btsocket_rfcomm_receive_pn(ng_btsocket_rfcomm_session_p s, struct mbuf *m0)
 {
 	struct rfcomm_mcc_hdr		*hdr = mtod(m0, struct rfcomm_mcc_hdr*);
 	struct rfcomm_mcc_pn		*pn = (struct rfcomm_mcc_pn *)(hdr+1);
 	ng_btsocket_rfcomm_pcb_t	*pcb = NULL;
 	int				 error = 0;
 
 	mtx_assert(&s->session_mtx, MA_OWNED);
 
 	NG_BTSOCKET_RFCOMM_INFO(
 "%s: Got MCC PN, dlci=%d, cr=%d, length=%d, flow_control=%#x, priority=%d, " \
 "ack_timer=%d, mtu=%d, max_retrans=%d, credits=%d, session state=%d, " \
 "flags=%#x, session mtu=%d, len=%d\n",
 		__func__, pn->dlci, RFCOMM_CR(hdr->type),
 		RFCOMM_MCC_LENGTH(hdr->length), pn->flow_control, pn->priority,
 		pn->ack_timer, le16toh(pn->mtu), pn->max_retrans, pn->credits,
 		s->state, s->flags, s->mtu, m0->m_pkthdr.len);
 
 	if (pn->dlci == 0) {
 		NG_BTSOCKET_RFCOMM_ERR("%s: Zero dlci in MCC PN\n", __func__);
 		NG_FREE_M(m0);
 
 		return (EINVAL);
 	}
 
 	/* Check if we have this dlci */
 	pcb = ng_btsocket_rfcomm_pcb_by_dlci(s, pn->dlci);
 	if (pcb != NULL) {
 		mtx_lock(&pcb->pcb_mtx);
 
 		if (RFCOMM_CR(hdr->type)) {
 			/* PN Request */
 			ng_btsocket_rfcomm_set_pn(pcb, 1, pn->flow_control,
 				pn->credits, pn->mtu);
 
 			if (pcb->flags & NG_BTSOCKET_RFCOMM_DLC_CFC) {
 				pn->flow_control = 0xe0;
 				pn->credits = RFCOMM_DEFAULT_CREDITS;
 			} else {
 				pn->flow_control = 0;
 				pn->credits = 0;
 			}
 
 			hdr->type = RFCOMM_MKMCC_TYPE(0, RFCOMM_MCC_PN);
 			error = ng_btsocket_rfcomm_send_uih(s, 
 					RFCOMM_MKADDRESS(INITIATOR(s), 0),
 					0, 0, m0);
 		} else {
 			/* PN Response - proceed with SABM. Timeout still set */
 			if (pcb->state == NG_BTSOCKET_RFCOMM_DLC_CONFIGURING) {
 				ng_btsocket_rfcomm_set_pn(pcb, 0,
 					pn->flow_control, pn->credits, pn->mtu);
 
 				pcb->state = NG_BTSOCKET_RFCOMM_DLC_CONNECTING;
 				error = ng_btsocket_rfcomm_send_command(s,
 						RFCOMM_FRAME_SABM, pn->dlci);
 			} else
 				NG_BTSOCKET_RFCOMM_WARN(
 "%s: Got PN response for dlci=%d in invalid state=%d\n",
 					__func__, pn->dlci, pcb->state);
 
 			NG_FREE_M(m0);
 		}
 
 		mtx_unlock(&pcb->pcb_mtx);
 	} else if (RFCOMM_CR(hdr->type)) {
 		/* PN request to non-existing dlci - incoming connection */
 		pcb = ng_btsocket_rfcomm_connect_ind(s,
 				RFCOMM_SRVCHANNEL(pn->dlci));
 		if (pcb != NULL) {
 			mtx_lock(&pcb->pcb_mtx);
 
 			pcb->dlci = pn->dlci;
 
 			ng_btsocket_rfcomm_set_pn(pcb, 1, pn->flow_control,
 				pn->credits, pn->mtu);
 
 			if (pcb->flags & NG_BTSOCKET_RFCOMM_DLC_CFC) {
 				pn->flow_control = 0xe0;
 				pn->credits = RFCOMM_DEFAULT_CREDITS;
 			} else {
 				pn->flow_control = 0;
 				pn->credits = 0;
 			}
 
 			hdr->type = RFCOMM_MKMCC_TYPE(0, RFCOMM_MCC_PN);
 			error = ng_btsocket_rfcomm_send_uih(s, 
 					RFCOMM_MKADDRESS(INITIATOR(s), 0),
 					0, 0, m0);
 
 			if (error == 0) {
 				ng_btsocket_rfcomm_timeout(pcb);
 				pcb->state = NG_BTSOCKET_RFCOMM_DLC_CONNECTING;
 				soisconnecting(pcb->so);
 			} else
 				ng_btsocket_rfcomm_pcb_kill(pcb, error);
 
 			mtx_unlock(&pcb->pcb_mtx);
 		} else {
 			/* Nobody is listen()ing on this channel */
 			error = ng_btsocket_rfcomm_send_command(s,
 					RFCOMM_FRAME_DM, pn->dlci);
 			NG_FREE_M(m0);
 		}
 	} else
 		NG_FREE_M(m0); /* XXX ignore response to non-existing dlci */
 
 	return (error);
 } /* ng_btsocket_rfcomm_receive_pn */
 
 /*
  * Set PN parameters for dlci. Caller must hold pcb->pcb_mtx.
  * 
  * From Bluetooth spec.
  * 
  * "... The CL1 - CL4 field is completely redefined. (In TS07.10 this defines 
  *  the convergence layer to use, which is not applicable to RFCOMM. In RFCOMM,
  *  in Bluetooth versions up to 1.0B, this field was forced to 0).
  *
  *  In the PN request sent prior to a DLC establishment, this field must contain
  *  the value 15 (0xF), indicating support of credit based flow control in the 
  *  sender. See Table 5.3 below. If the PN response contains any other value 
  *  than 14 (0xE) in this field, it is inferred that the peer RFCOMM entity is 
  *  not supporting the credit based flow control feature. (This is only possible
  *  if the peer RFCOMM implementation is only conforming to Bluetooth version 
  *  1.0B.) If a PN request is sent on an already open DLC, then this field must
  *  contain the value zero; it is not possible to set initial credits  more 
  *  than once per DLC activation. A responding implementation must set this 
  *  field in the PN response to 14 (0xE), if (and only if) the value in the PN 
  *  request was 15..."
  */
 
 static void
 ng_btsocket_rfcomm_set_pn(ng_btsocket_rfcomm_pcb_p pcb, u_int8_t cr,
 		u_int8_t flow_control, u_int8_t credits, u_int16_t mtu)
 {
 	mtx_assert(&pcb->pcb_mtx, MA_OWNED);
 
 	pcb->mtu = le16toh(mtu);
 
 	if (cr) {
 		if (flow_control == 0xf0) {
 			pcb->flags |= NG_BTSOCKET_RFCOMM_DLC_CFC;
 			pcb->tx_cred = credits;
 		} else {
 			pcb->flags &= ~NG_BTSOCKET_RFCOMM_DLC_CFC;
 			pcb->tx_cred = 0;
 		}
 	} else {
 		if (flow_control == 0xe0) {
 			pcb->flags |= NG_BTSOCKET_RFCOMM_DLC_CFC;
 			pcb->tx_cred = credits;
 		} else {
 			pcb->flags &= ~NG_BTSOCKET_RFCOMM_DLC_CFC;
 			pcb->tx_cred = 0;
 		}
 	}
 
 	NG_BTSOCKET_RFCOMM_INFO(
 "%s: cr=%d, dlci=%d, state=%d, flags=%#x, mtu=%d, rx_cred=%d, tx_cred=%d\n",
 		__func__, cr, pcb->dlci, pcb->state, pcb->flags, pcb->mtu,
 		pcb->rx_cred, pcb->tx_cred);
 } /* ng_btsocket_rfcomm_set_pn */
 
 /*
  * Send RFCOMM SABM/DISC/UA/DM frames. Caller must hold s->session_mtx
  */
 
 static int
 ng_btsocket_rfcomm_send_command(ng_btsocket_rfcomm_session_p s,
 		u_int8_t type, u_int8_t dlci)
 {
 	struct rfcomm_cmd_hdr	*hdr = NULL;
 	struct mbuf		*m = NULL;
 	int			 cr;
 
 	mtx_assert(&s->session_mtx, MA_OWNED);
 
 	NG_BTSOCKET_RFCOMM_INFO(
 "%s: Sending command type %#x, session state=%d, flags=%#x, mtu=%d, dlci=%d\n",
 		__func__, type, s->state, s->flags, s->mtu, dlci);
 
 	switch (type) {
 	case RFCOMM_FRAME_SABM:
 	case RFCOMM_FRAME_DISC:
 		cr = INITIATOR(s);
 		break;
 
 	case RFCOMM_FRAME_UA:
 	case RFCOMM_FRAME_DM:
 		cr = !INITIATOR(s);
 		break;
 
 	default:
 		panic("%s: Invalid frame type=%#x\n", __func__, type);
 		return (EINVAL);
 		/* NOT REACHED */
 	}
 
 	MGETHDR(m, M_NOWAIT, MT_DATA);
 	if (m == NULL)
 		return (ENOBUFS);
 
 	m->m_pkthdr.len = m->m_len = sizeof(*hdr);
 
 	hdr = mtod(m, struct rfcomm_cmd_hdr *);
 	hdr->address = RFCOMM_MKADDRESS(cr, dlci);
 	hdr->control = RFCOMM_MKCONTROL(type, 1);
 	hdr->length = RFCOMM_MKLEN8(0);
 	hdr->fcs = ng_btsocket_rfcomm_fcs3((u_int8_t *) hdr);
 
 	NG_BT_MBUFQ_ENQUEUE(&s->outq, m);
 
 	return (0);
 } /* ng_btsocket_rfcomm_send_command */
 
 /*
  * Send RFCOMM UIH frame. Caller must hold s->session_mtx
  */
 
 static int
 ng_btsocket_rfcomm_send_uih(ng_btsocket_rfcomm_session_p s, u_int8_t address,
 		u_int8_t pf, u_int8_t credits, struct mbuf *data)
 {
 	struct rfcomm_frame_hdr	*hdr = NULL;
 	struct mbuf		*m = NULL, *mcrc = NULL;
 	u_int16_t		 length;
 
 	mtx_assert(&s->session_mtx, MA_OWNED);
 
 	MGETHDR(m, M_NOWAIT, MT_DATA);
 	if (m == NULL) {
 		NG_FREE_M(data);
 		return (ENOBUFS);
 	}
 	m->m_pkthdr.len = m->m_len = sizeof(*hdr);
 
 	MGET(mcrc, M_NOWAIT, MT_DATA);
 	if (mcrc == NULL) {
 		NG_FREE_M(data);
 		return (ENOBUFS);
 	}
 	mcrc->m_len = 1;
 
 	/* Fill UIH frame header */
 	hdr = mtod(m, struct rfcomm_frame_hdr *);
 	hdr->address = address;
 	hdr->control = RFCOMM_MKCONTROL(RFCOMM_FRAME_UIH, pf);
 
 	/* Calculate FCS */
 	mcrc->m_data[0] = ng_btsocket_rfcomm_fcs2((u_int8_t *) hdr);
 
 	/* Put length back */
 	length = (data != NULL)? data->m_pkthdr.len : 0;
 	if (length > 127) {
 		u_int16_t	l = htole16(RFCOMM_MKLEN16(length));
 
 		bcopy(&l, &hdr->length, sizeof(l));
 		m->m_pkthdr.len ++;
 		m->m_len ++;
 	} else
 		hdr->length = RFCOMM_MKLEN8(length);
 
 	if (pf) {
 		m->m_data[m->m_len] = credits;
 		m->m_pkthdr.len ++;
 		m->m_len ++;
 	}
 
 	/* Add payload */
 	if (data != NULL) {
 		m_cat(m, data);
 		m->m_pkthdr.len += length;
 	}
 
 	/* Put FCS back */
 	m_cat(m, mcrc);
 	m->m_pkthdr.len ++;
 
 	NG_BTSOCKET_RFCOMM_INFO(
 "%s: Sending UIH state=%d, flags=%#x, address=%d, length=%d, pf=%d, " \
 "credits=%d, len=%d\n",
 		__func__, s->state, s->flags, address, length, pf, credits,
 		m->m_pkthdr.len);
 
 	NG_BT_MBUFQ_ENQUEUE(&s->outq, m);
 
 	return (0);
 } /* ng_btsocket_rfcomm_send_uih */
 
 /*
  * Send MSC request. Caller must hold pcb->pcb_mtx and pcb->session->session_mtx
  */
 
 static int
 ng_btsocket_rfcomm_send_msc(ng_btsocket_rfcomm_pcb_p pcb)
 {
 	struct mbuf		*m = NULL;
 	struct rfcomm_mcc_hdr	*hdr = NULL;
 	struct rfcomm_mcc_msc	*msc = NULL;
 
 	mtx_assert(&pcb->session->session_mtx, MA_OWNED);
 	mtx_assert(&pcb->pcb_mtx, MA_OWNED);
 
 	MGETHDR(m, M_NOWAIT, MT_DATA);
 	if (m == NULL)
 		return (ENOBUFS);
 
 	m->m_pkthdr.len = m->m_len = sizeof(*hdr) + sizeof(*msc);
 
 	hdr = mtod(m, struct rfcomm_mcc_hdr *);
 	msc = (struct rfcomm_mcc_msc *)(hdr + 1);
 
 	hdr->type = RFCOMM_MKMCC_TYPE(1, RFCOMM_MCC_MSC);
 	hdr->length = RFCOMM_MKLEN8(sizeof(*msc));
 
 	msc->address = RFCOMM_MKADDRESS(1, pcb->dlci);
 	msc->modem = pcb->lmodem;
 
 	NG_BTSOCKET_RFCOMM_INFO(
 "%s: Sending MSC dlci=%d, state=%d, flags=%#x, address=%d, modem=%#x\n",
 		__func__, pcb->dlci, pcb->state, pcb->flags, msc->address,
 		msc->modem);
 
 	return (ng_btsocket_rfcomm_send_uih(pcb->session,
 			RFCOMM_MKADDRESS(INITIATOR(pcb->session), 0), 0, 0, m));
 } /* ng_btsocket_rfcomm_send_msc */
 
 /*
  * Send PN request. Caller must hold pcb->pcb_mtx and pcb->session->session_mtx
  */
 
 static int
 ng_btsocket_rfcomm_send_pn(ng_btsocket_rfcomm_pcb_p pcb)
 {
 	struct mbuf		*m = NULL;
 	struct rfcomm_mcc_hdr	*hdr = NULL;
 	struct rfcomm_mcc_pn	*pn = NULL;
 
 	mtx_assert(&pcb->session->session_mtx, MA_OWNED);
 	mtx_assert(&pcb->pcb_mtx, MA_OWNED);
 
 	MGETHDR(m, M_NOWAIT, MT_DATA);
 	if (m == NULL)
 		return (ENOBUFS);
 
 	m->m_pkthdr.len = m->m_len = sizeof(*hdr) + sizeof(*pn);
 
 	hdr = mtod(m, struct rfcomm_mcc_hdr *);
 	pn = (struct rfcomm_mcc_pn *)(hdr + 1);
 
 	hdr->type = RFCOMM_MKMCC_TYPE(1, RFCOMM_MCC_PN);
 	hdr->length = RFCOMM_MKLEN8(sizeof(*pn));
 
 	pn->dlci = pcb->dlci;
 
 	/*
 	 * Set default DLCI priority as described in GSM 07.10
 	 * (ETSI TS 101 369) clause 5.6 page 42
 	 */
 
 	pn->priority = (pcb->dlci < 56)? (((pcb->dlci >> 3) << 3) + 7) : 61;
 	pn->ack_timer = 0;
 	pn->mtu = htole16(pcb->mtu);
 	pn->max_retrans = 0;
 
 	if (pcb->flags & NG_BTSOCKET_RFCOMM_DLC_CFC) {
 		pn->flow_control = 0xf0;
 		pn->credits = pcb->rx_cred;
 	} else {
 		pn->flow_control = 0;
 		pn->credits = 0;
 	}
 
 	NG_BTSOCKET_RFCOMM_INFO(
 "%s: Sending PN dlci=%d, state=%d, flags=%#x, mtu=%d, flow_control=%#x, " \
 "credits=%d\n",	__func__, pcb->dlci, pcb->state, pcb->flags, pcb->mtu,
 		pn->flow_control, pn->credits);
 
 	return (ng_btsocket_rfcomm_send_uih(pcb->session,
 			RFCOMM_MKADDRESS(INITIATOR(pcb->session), 0), 0, 0, m));
 } /* ng_btsocket_rfcomm_send_pn */
 
 /*
  * Calculate and send credits based on available space in receive buffer
  */
 
 static int
 ng_btsocket_rfcomm_send_credits(ng_btsocket_rfcomm_pcb_p pcb)
 {
 	int		error = 0;
 	u_int8_t	credits;
 
 	mtx_assert(&pcb->pcb_mtx, MA_OWNED);
 	mtx_assert(&pcb->session->session_mtx, MA_OWNED);
 
 	NG_BTSOCKET_RFCOMM_INFO(
 "%s: Sending more credits, dlci=%d, state=%d, flags=%#x, mtu=%d, " \
 "space=%ld, tx_cred=%d, rx_cred=%d\n",
 		__func__, pcb->dlci, pcb->state, pcb->flags, pcb->mtu,
 		sbspace(&pcb->so->so_rcv), pcb->tx_cred, pcb->rx_cred);
 
 	credits = sbspace(&pcb->so->so_rcv) / pcb->mtu;
 	if (credits > 0) {
 		if (pcb->rx_cred + credits > RFCOMM_MAX_CREDITS)
 			credits = RFCOMM_MAX_CREDITS - pcb->rx_cred;
 
 		error = ng_btsocket_rfcomm_send_uih(
 				pcb->session,
 				RFCOMM_MKADDRESS(INITIATOR(pcb->session),
 					pcb->dlci), 1, credits, NULL);
 		if (error == 0) {
 			pcb->rx_cred += credits;
 
 			NG_BTSOCKET_RFCOMM_INFO(
 "%s: Gave remote side %d more credits, dlci=%d, state=%d, flags=%#x, " \
 "rx_cred=%d, tx_cred=%d\n",	__func__, credits, pcb->dlci, pcb->state,
 				pcb->flags, pcb->rx_cred, pcb->tx_cred);
 		} else
 			NG_BTSOCKET_RFCOMM_ERR(
 "%s: Could not send credits, error=%d, dlci=%d, state=%d, flags=%#x, " \
 "mtu=%d, space=%ld, tx_cred=%d, rx_cred=%d\n",
 				__func__, error, pcb->dlci, pcb->state,
 				pcb->flags, pcb->mtu, sbspace(&pcb->so->so_rcv),
 				pcb->tx_cred, pcb->rx_cred);
 	}
 
 	return (error);
 } /* ng_btsocket_rfcomm_send_credits */
 
 /*****************************************************************************
  *****************************************************************************
  **                              RFCOMM DLCs
  *****************************************************************************
  *****************************************************************************/
 
 /*
  * Send data from socket send buffer
  * Caller must hold pcb->pcb_mtx and pcb->session->session_mtx
  */
 
 static int
 ng_btsocket_rfcomm_pcb_send(ng_btsocket_rfcomm_pcb_p pcb, int limit)
 {
 	struct mbuf	*m = NULL;
 	int		 sent, length, error;
 
 	mtx_assert(&pcb->session->session_mtx, MA_OWNED);
 	mtx_assert(&pcb->pcb_mtx, MA_OWNED);
 
 	if (pcb->flags & NG_BTSOCKET_RFCOMM_DLC_CFC)
 		limit = min(limit, pcb->tx_cred);
 	else if (!(pcb->rmodem & RFCOMM_MODEM_FC))
 		limit = min(limit, RFCOMM_MAX_CREDITS); /* XXX ??? */
 	else
 		limit = 0;
 
 	if (limit == 0) {
 		NG_BTSOCKET_RFCOMM_INFO(
 "%s: Could not send - remote flow control asserted, dlci=%d, flags=%#x, " \
 "rmodem=%#x, tx_cred=%d\n",
 			__func__, pcb->dlci, pcb->flags, pcb->rmodem,
 			pcb->tx_cred);
 
 		return (0);
 	}
 
 	for (error = 0, sent = 0; sent < limit; sent ++) { 
 		length = min(pcb->mtu, sbavail(&pcb->so->so_snd));
 		if (length == 0)
 			break;
 
 		/* Get the chunk from the socket's send buffer */
 		m = ng_btsocket_rfcomm_prepare_packet(&pcb->so->so_snd, length);
 		if (m == NULL) {
 			error = ENOBUFS;
 			break;
 		}
 
 		sbdrop(&pcb->so->so_snd, length);
 
 		error = ng_btsocket_rfcomm_send_uih(pcb->session,
 				RFCOMM_MKADDRESS(INITIATOR(pcb->session),
 					pcb->dlci), 0, 0, m);
 		if (error != 0)
 			break;
 	}
 
 	if (pcb->flags & NG_BTSOCKET_RFCOMM_DLC_CFC)
 		pcb->tx_cred -= sent;
 
 	if (error == 0 && sent > 0) {
 		pcb->flags &= ~NG_BTSOCKET_RFCOMM_DLC_SENDING;
 		sowwakeup(pcb->so);
 	}
 
 	return (error);
 } /* ng_btsocket_rfcomm_pcb_send */
 
 /*
  * Unlink and disconnect DLC. If ng_btsocket_rfcomm_pcb_kill() returns
  * non zero value than socket has no reference and has to be detached.
  * Caller must hold pcb->pcb_mtx and pcb->session->session_mtx
  */
 
 static void
 ng_btsocket_rfcomm_pcb_kill(ng_btsocket_rfcomm_pcb_p pcb, int error)
 {
 	ng_btsocket_rfcomm_session_p	s = pcb->session;
 
 	NG_BTSOCKET_RFCOMM_INFO(
 "%s: Killing DLC, so=%p, dlci=%d, state=%d, flags=%#x, error=%d\n",
 		__func__, pcb->so, pcb->dlci, pcb->state, pcb->flags, error);
 
 	if (pcb->session == NULL)
 		panic("%s: DLC without session, pcb=%p, state=%d, flags=%#x\n",
 			__func__, pcb, pcb->state, pcb->flags);
 
 	mtx_assert(&pcb->session->session_mtx, MA_OWNED);
 	mtx_assert(&pcb->pcb_mtx, MA_OWNED);
 
 	if (pcb->flags & NG_BTSOCKET_RFCOMM_DLC_TIMO)
 		ng_btsocket_rfcomm_untimeout(pcb);
 
 	/* Detach DLC from the session. Does not matter which state DLC in */
 	LIST_REMOVE(pcb, session_next);
 	pcb->session = NULL;
 
 	/* Change DLC state and wakeup all sleepers */
 	pcb->state = NG_BTSOCKET_RFCOMM_DLC_CLOSED;
 	pcb->so->so_error = error;
 	soisdisconnected(pcb->so);
 	wakeup(&pcb->state);
 
 	/* Check if we have any DLCs left on the session */
 	if (LIST_EMPTY(&s->dlcs) && INITIATOR(s)) {
 		NG_BTSOCKET_RFCOMM_INFO(
 "%s: Disconnecting session, state=%d, flags=%#x, mtu=%d\n",
 			__func__, s->state, s->flags, s->mtu);
 
 		switch (s->state) {
 		case NG_BTSOCKET_RFCOMM_SESSION_CLOSED:
 		case NG_BTSOCKET_RFCOMM_SESSION_DISCONNECTING:
 			/*
 			 * Do not have to do anything here. We can get here
 			 * when L2CAP connection was terminated or we have 
 			 * received DISC on multiplexor channel
 			 */
 			break;
 
 		case NG_BTSOCKET_RFCOMM_SESSION_OPEN:
 			/* Send DISC on multiplexor channel */
 			error = ng_btsocket_rfcomm_send_command(s,
 					RFCOMM_FRAME_DISC, 0);
 			if (error == 0) {
 				s->state = NG_BTSOCKET_RFCOMM_SESSION_DISCONNECTING;
 				break;
 			}
 			/* FALL THROUGH */
 
 		case NG_BTSOCKET_RFCOMM_SESSION_CONNECTING:
 		case NG_BTSOCKET_RFCOMM_SESSION_CONNECTED:
 			s->state = NG_BTSOCKET_RFCOMM_SESSION_CLOSED;
 			break;
 
 /*		case NG_BTSOCKET_RFCOMM_SESSION_LISTENING: */
 		default:
 			panic("%s: Invalid session state=%d, flags=%#x\n",
 				__func__, s->state, s->flags);
 			break;
 		}
 
 		ng_btsocket_rfcomm_task_wakeup();
 	}
 } /* ng_btsocket_rfcomm_pcb_kill */
 
 /*
  * Look for given dlci for given RFCOMM session. Caller must hold s->session_mtx
  */
 
 static ng_btsocket_rfcomm_pcb_p
 ng_btsocket_rfcomm_pcb_by_dlci(ng_btsocket_rfcomm_session_p s, int dlci)
 {
 	ng_btsocket_rfcomm_pcb_p	pcb = NULL;
 
 	mtx_assert(&s->session_mtx, MA_OWNED);
 
 	LIST_FOREACH(pcb, &s->dlcs, session_next)
 		if (pcb->dlci == dlci)
 			break;
 
 	return (pcb);
 } /* ng_btsocket_rfcomm_pcb_by_dlci */
 
 /*
  * Look for socket that listens on given src address and given channel
  */
 
 static ng_btsocket_rfcomm_pcb_p
 ng_btsocket_rfcomm_pcb_listener(bdaddr_p src, int channel)
 {
 	ng_btsocket_rfcomm_pcb_p	pcb = NULL, pcb1 = NULL;
 
 	mtx_lock(&ng_btsocket_rfcomm_sockets_mtx);
 
 	LIST_FOREACH(pcb, &ng_btsocket_rfcomm_sockets, next) {
 		if (pcb->channel != channel ||
 		    !(pcb->so->so_options & SO_ACCEPTCONN))
 			continue;
 
 		if (bcmp(&pcb->src, src, sizeof(*src)) == 0)
 			break;
 
 		if (bcmp(&pcb->src, NG_HCI_BDADDR_ANY, sizeof(bdaddr_t)) == 0)
 			pcb1 = pcb;
 	}
 
 	mtx_unlock(&ng_btsocket_rfcomm_sockets_mtx);
 
 	return ((pcb != NULL)? pcb : pcb1);
 } /* ng_btsocket_rfcomm_pcb_listener */
 
 /*****************************************************************************
  *****************************************************************************
  **                              Misc. functions 
  *****************************************************************************
  *****************************************************************************/
 
 /*
  *  Set timeout. Caller MUST hold pcb_mtx
  */
 
 static void
 ng_btsocket_rfcomm_timeout(ng_btsocket_rfcomm_pcb_p pcb)
 {
 	mtx_assert(&pcb->pcb_mtx, MA_OWNED);
 
 	if (!(pcb->flags & NG_BTSOCKET_RFCOMM_DLC_TIMO)) {
 		pcb->flags |= NG_BTSOCKET_RFCOMM_DLC_TIMO;
 		pcb->flags &= ~NG_BTSOCKET_RFCOMM_DLC_TIMEDOUT;
 		callout_reset(&pcb->timo, ng_btsocket_rfcomm_timo * hz,
 		    ng_btsocket_rfcomm_process_timeout, pcb);
 	} else
 		panic("%s: Duplicated socket timeout?!\n", __func__);
 } /* ng_btsocket_rfcomm_timeout */
 
 /*
  *  Unset pcb timeout. Caller MUST hold pcb_mtx
  */
 
 static void
 ng_btsocket_rfcomm_untimeout(ng_btsocket_rfcomm_pcb_p pcb)
 {
 	mtx_assert(&pcb->pcb_mtx, MA_OWNED);
 
 	if (pcb->flags & NG_BTSOCKET_RFCOMM_DLC_TIMO) {
 		callout_stop(&pcb->timo);
 		pcb->flags &= ~NG_BTSOCKET_RFCOMM_DLC_TIMO;
 		pcb->flags &= ~NG_BTSOCKET_RFCOMM_DLC_TIMEDOUT;
 	} else
 		panic("%s: No socket timeout?!\n", __func__);
 } /* ng_btsocket_rfcomm_timeout */
 
 /*
  * Process pcb timeout
  */
 
 static void
 ng_btsocket_rfcomm_process_timeout(void *xpcb)
 {
 	ng_btsocket_rfcomm_pcb_p	pcb = (ng_btsocket_rfcomm_pcb_p) xpcb;
 
 	mtx_assert(&pcb->pcb_mtx, MA_OWNED);
 
 	NG_BTSOCKET_RFCOMM_INFO(
 "%s: Timeout, so=%p, dlci=%d, state=%d, flags=%#x\n",
 		__func__, pcb->so, pcb->dlci, pcb->state, pcb->flags);
 
 	pcb->flags &= ~NG_BTSOCKET_RFCOMM_DLC_TIMO;
 	pcb->flags |= NG_BTSOCKET_RFCOMM_DLC_TIMEDOUT;
 
 	switch (pcb->state) {
 	case NG_BTSOCKET_RFCOMM_DLC_CONFIGURING:
 	case NG_BTSOCKET_RFCOMM_DLC_CONNECTING:
 		pcb->state = NG_BTSOCKET_RFCOMM_DLC_DISCONNECTING;
 		break;
 
 	case NG_BTSOCKET_RFCOMM_DLC_W4_CONNECT:
 	case NG_BTSOCKET_RFCOMM_DLC_DISCONNECTING:
 		break;
 
 	default:
 		panic(
 "%s: DLC timeout in invalid state, dlci=%d, state=%d, flags=%#x\n",
 			__func__, pcb->dlci, pcb->state, pcb->flags);
 		break;
 	}
 
 	ng_btsocket_rfcomm_task_wakeup();
 } /* ng_btsocket_rfcomm_process_timeout */
 
 /*
  * Get up to length bytes from the socket buffer
  */
 
 static struct mbuf *
 ng_btsocket_rfcomm_prepare_packet(struct sockbuf *sb, int length)
 {
 	struct mbuf	*top = NULL, *m = NULL, *n = NULL, *nextpkt = NULL;
 	int		 mlen, noff, len;
 
 	MGETHDR(top, M_NOWAIT, MT_DATA);
 	if (top == NULL)
 		return (NULL);
 
 	top->m_pkthdr.len = length;
 	top->m_len = 0;
 	mlen = MHLEN;
 
 	m = top;
 	n = sb->sb_mb;
 	nextpkt = n->m_nextpkt;
 	noff = 0;
 
 	while (length > 0 && n != NULL) {
 		len = min(mlen - m->m_len, n->m_len - noff);
 		if (len > length)
 			len = length;
 
 		bcopy(mtod(n, caddr_t)+noff, mtod(m, caddr_t)+m->m_len, len);
 		m->m_len += len;
 		noff += len;
 		length -= len;
 
 		if (length > 0 && m->m_len == mlen) {
 			MGET(m->m_next, M_NOWAIT, MT_DATA);
 			if (m->m_next == NULL) {
 				NG_FREE_M(top);
 				return (NULL);
 			}
 
 			m = m->m_next;
 			m->m_len = 0;
 			mlen = MLEN;
 		}
 
 		if (noff == n->m_len) {
 			noff = 0;
 			n = n->m_next;
 
 			if (n == NULL)
 				n = nextpkt;
 
 			nextpkt = (n != NULL)? n->m_nextpkt : NULL;
 		}
 	}
 
 	if (length < 0)
 		panic("%s: length=%d\n", __func__, length);
 	if (length > 0 && n == NULL)
 		panic("%s: bogus length=%d, n=%p\n", __func__, length, n);
 
 	return (top);
 } /* ng_btsocket_rfcomm_prepare_packet */
 
Index: head/sys/netgraph/bluetooth/socket/ng_btsocket_sco.c
===================================================================
--- head/sys/netgraph/bluetooth/socket/ng_btsocket_sco.c	(revision 319721)
+++ head/sys/netgraph/bluetooth/socket/ng_btsocket_sco.c	(revision 319722)
@@ -1,1987 +1,1980 @@
 /*
  * ng_btsocket_sco.c
  */
 
 /*-
  * Copyright (c) 2001-2002 Maksim Yevmenkin <m_evmenkin@yahoo.com>
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  * $Id: ng_btsocket_sco.c,v 1.2 2005/10/31 18:08:51 max Exp $
  * $FreeBSD$
  */
 
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/bitstring.h>
 #include <sys/domain.h>
 #include <sys/endian.h>
 #include <sys/errno.h>
 #include <sys/filedesc.h>
 #include <sys/ioccom.h>
 #include <sys/kernel.h>
 #include <sys/lock.h>
 #include <sys/malloc.h>
 #include <sys/mbuf.h>
 #include <sys/mutex.h>
 #include <sys/protosw.h>
 #include <sys/queue.h>
 #include <sys/socket.h>
 #include <sys/socketvar.h>
 #include <sys/sysctl.h>
 #include <sys/taskqueue.h>
 
 #include <net/vnet.h>
 
 #include <netgraph/ng_message.h>
 #include <netgraph/netgraph.h>
 #include <netgraph/bluetooth/include/ng_bluetooth.h>
 #include <netgraph/bluetooth/include/ng_hci.h>
 #include <netgraph/bluetooth/include/ng_l2cap.h>
 #include <netgraph/bluetooth/include/ng_btsocket.h>
 #include <netgraph/bluetooth/include/ng_btsocket_sco.h>
 
 /* MALLOC define */
 #ifdef NG_SEPARATE_MALLOC
 static MALLOC_DEFINE(M_NETGRAPH_BTSOCKET_SCO, "netgraph_btsocks_sco",
 		"Netgraph Bluetooth SCO sockets");
 #else
 #define M_NETGRAPH_BTSOCKET_SCO M_NETGRAPH
 #endif /* NG_SEPARATE_MALLOC */
 
 /* Netgraph node methods */
 static ng_constructor_t	ng_btsocket_sco_node_constructor;
 static ng_rcvmsg_t	ng_btsocket_sco_node_rcvmsg;
 static ng_shutdown_t	ng_btsocket_sco_node_shutdown;
 static ng_newhook_t	ng_btsocket_sco_node_newhook;
 static ng_connect_t	ng_btsocket_sco_node_connect;
 static ng_rcvdata_t	ng_btsocket_sco_node_rcvdata;
 static ng_disconnect_t	ng_btsocket_sco_node_disconnect;
 
 static void		ng_btsocket_sco_input   (void *, int);
 static void		ng_btsocket_sco_rtclean (void *, int);
 
 /* Netgraph type descriptor */
 static struct ng_type	typestruct = {
 	.version =	NG_ABI_VERSION,
 	.name =		NG_BTSOCKET_SCO_NODE_TYPE,
 	.constructor =	ng_btsocket_sco_node_constructor,
 	.rcvmsg =	ng_btsocket_sco_node_rcvmsg,
 	.shutdown =	ng_btsocket_sco_node_shutdown,
 	.newhook =	ng_btsocket_sco_node_newhook,
 	.connect =	ng_btsocket_sco_node_connect,
 	.rcvdata =	ng_btsocket_sco_node_rcvdata,
 	.disconnect =	ng_btsocket_sco_node_disconnect,
 };
 
 /* Globals */
 static u_int32_t				ng_btsocket_sco_debug_level;
 static node_p					ng_btsocket_sco_node;
 static struct ng_bt_itemq			ng_btsocket_sco_queue;
 static struct mtx				ng_btsocket_sco_queue_mtx;
 static struct task				ng_btsocket_sco_queue_task;
 static struct mtx				ng_btsocket_sco_sockets_mtx;
 static LIST_HEAD(, ng_btsocket_sco_pcb)		ng_btsocket_sco_sockets;
 static LIST_HEAD(, ng_btsocket_sco_rtentry)	ng_btsocket_sco_rt;
 static struct mtx				ng_btsocket_sco_rt_mtx;
 static struct task				ng_btsocket_sco_rt_task;
 static struct timeval				ng_btsocket_sco_lasttime;
 static int					ng_btsocket_sco_curpps;
 
 /* Sysctl tree */
 SYSCTL_DECL(_net_bluetooth_sco_sockets);
 static SYSCTL_NODE(_net_bluetooth_sco_sockets, OID_AUTO, seq, CTLFLAG_RW,
 	0, "Bluetooth SEQPACKET SCO sockets family");
 SYSCTL_UINT(_net_bluetooth_sco_sockets_seq, OID_AUTO, debug_level,
 	CTLFLAG_RW,
 	&ng_btsocket_sco_debug_level, NG_BTSOCKET_WARN_LEVEL,
 	"Bluetooth SEQPACKET SCO sockets debug level");
 SYSCTL_UINT(_net_bluetooth_sco_sockets_seq, OID_AUTO, queue_len,
 	CTLFLAG_RD,
 	&ng_btsocket_sco_queue.len, 0,
 	"Bluetooth SEQPACKET SCO sockets input queue length");
 SYSCTL_UINT(_net_bluetooth_sco_sockets_seq, OID_AUTO, queue_maxlen,
 	CTLFLAG_RD,
 	&ng_btsocket_sco_queue.maxlen, 0,
 	"Bluetooth SEQPACKET SCO sockets input queue max. length");
 SYSCTL_UINT(_net_bluetooth_sco_sockets_seq, OID_AUTO, queue_drops,
 	CTLFLAG_RD,
 	&ng_btsocket_sco_queue.drops, 0,
 	"Bluetooth SEQPACKET SCO sockets input queue drops");
 
 /* Debug */
 #define NG_BTSOCKET_SCO_INFO \
 	if (ng_btsocket_sco_debug_level >= NG_BTSOCKET_INFO_LEVEL && \
 	    ppsratecheck(&ng_btsocket_sco_lasttime, &ng_btsocket_sco_curpps, 1)) \
 		printf
 
 #define NG_BTSOCKET_SCO_WARN \
 	if (ng_btsocket_sco_debug_level >= NG_BTSOCKET_WARN_LEVEL && \
 	    ppsratecheck(&ng_btsocket_sco_lasttime, &ng_btsocket_sco_curpps, 1)) \
 		printf
 
 #define NG_BTSOCKET_SCO_ERR \
 	if (ng_btsocket_sco_debug_level >= NG_BTSOCKET_ERR_LEVEL && \
 	    ppsratecheck(&ng_btsocket_sco_lasttime, &ng_btsocket_sco_curpps, 1)) \
 		printf
 
 #define NG_BTSOCKET_SCO_ALERT \
 	if (ng_btsocket_sco_debug_level >= NG_BTSOCKET_ALERT_LEVEL && \
 	    ppsratecheck(&ng_btsocket_sco_lasttime, &ng_btsocket_sco_curpps, 1)) \
 		printf
 
 /* 
  * Netgraph message processing routines
  */
 
 static int ng_btsocket_sco_process_lp_con_cfm
 	(struct ng_mesg *, ng_btsocket_sco_rtentry_p);
 static int ng_btsocket_sco_process_lp_con_ind
 	(struct ng_mesg *, ng_btsocket_sco_rtentry_p);
 static int ng_btsocket_sco_process_lp_discon_ind
 	(struct ng_mesg *, ng_btsocket_sco_rtentry_p);
 
 /*
  * Send LP messages to the lower layer
  */
 
 static int  ng_btsocket_sco_send_lp_con_req
 	(ng_btsocket_sco_pcb_p);
 static int  ng_btsocket_sco_send_lp_con_rsp
 	(ng_btsocket_sco_rtentry_p, bdaddr_p, int);
 static int  ng_btsocket_sco_send_lp_discon_req
 	(ng_btsocket_sco_pcb_p);
 
 static int ng_btsocket_sco_send2
 	(ng_btsocket_sco_pcb_p);
 
 /* 
  * Timeout processing routines
  */
 
 static void ng_btsocket_sco_timeout         (ng_btsocket_sco_pcb_p);
 static void ng_btsocket_sco_untimeout       (ng_btsocket_sco_pcb_p);
 static void ng_btsocket_sco_process_timeout (void *);
 
 /* 
  * Other stuff 
  */
 
 static ng_btsocket_sco_pcb_p	ng_btsocket_sco_pcb_by_addr(bdaddr_p);
 static ng_btsocket_sco_pcb_p	ng_btsocket_sco_pcb_by_handle(bdaddr_p, int);
 static ng_btsocket_sco_pcb_p	ng_btsocket_sco_pcb_by_addrs(bdaddr_p, bdaddr_p);
 
 #define ng_btsocket_sco_wakeup_input_task() \
 	taskqueue_enqueue(taskqueue_swi, &ng_btsocket_sco_queue_task)
 
 #define ng_btsocket_sco_wakeup_route_task() \
 	taskqueue_enqueue(taskqueue_swi, &ng_btsocket_sco_rt_task)
 
 /*****************************************************************************
  *****************************************************************************
  **                        Netgraph node interface
  *****************************************************************************
  *****************************************************************************/
 
 /*
  * Netgraph node constructor. Do not allow to create node of this type.
  */
 
 static int
 ng_btsocket_sco_node_constructor(node_p node)
 {
 	return (EINVAL);
 } /* ng_btsocket_sco_node_constructor */
 
 /*
  * Do local shutdown processing. Let old node go and create new fresh one.
  */
 
 static int
 ng_btsocket_sco_node_shutdown(node_p node)
 {
 	int	error = 0;
 
 	NG_NODE_UNREF(node);
 
 	/* Create new node */
 	error = ng_make_node_common(&typestruct, &ng_btsocket_sco_node);
 	if (error != 0) {
 		NG_BTSOCKET_SCO_ALERT(
 "%s: Could not create Netgraph node, error=%d\n", __func__, error);
 
 		ng_btsocket_sco_node = NULL;
 
 		return (error);
 	}
 
 	error = ng_name_node(ng_btsocket_sco_node,
 				NG_BTSOCKET_SCO_NODE_TYPE);
 	if (error != 0) {
 		NG_BTSOCKET_SCO_ALERT(
 "%s: Could not name Netgraph node, error=%d\n", __func__, error);
 
 		NG_NODE_UNREF(ng_btsocket_sco_node);
 		ng_btsocket_sco_node = NULL;
 
 		return (error);
 	}
 		
 	return (0);
 } /* ng_btsocket_sco_node_shutdown */
 
 /*
  * We allow any hook to be connected to the node.
  */
 
 static int
 ng_btsocket_sco_node_newhook(node_p node, hook_p hook, char const *name)
 {
 	return (0);
 } /* ng_btsocket_sco_node_newhook */
 
 /* 
  * Just say "YEP, that's OK by me!"
  */
 
 static int
 ng_btsocket_sco_node_connect(hook_p hook)
 {
 	NG_HOOK_SET_PRIVATE(hook, NULL);
 	NG_HOOK_REF(hook); /* Keep extra reference to the hook */
 
 #if 0
 	NG_HOOK_FORCE_QUEUE(NG_HOOK_PEER(hook));
 	NG_HOOK_FORCE_QUEUE(hook);
 #endif
 
 	return (0);
 } /* ng_btsocket_sco_node_connect */
 
 /*
  * Hook disconnection. Schedule route cleanup task
  */
 
 static int
 ng_btsocket_sco_node_disconnect(hook_p hook)
 {
 	/*
 	 * If hook has private information than we must have this hook in
 	 * the routing table and must schedule cleaning for the routing table.
 	 * Otherwise hook was connected but we never got "hook_info" message,
 	 * so we have never added this hook to the routing table and it save
 	 * to just delete it.
 	 */
 
 	if (NG_HOOK_PRIVATE(hook) != NULL)
 		return (ng_btsocket_sco_wakeup_route_task());
 
 	NG_HOOK_UNREF(hook); /* Remove extra reference */
 
 	return (0);
 } /* ng_btsocket_sco_node_disconnect */
 
 /*
  * Process incoming messages 
  */
 
 static int
 ng_btsocket_sco_node_rcvmsg(node_p node, item_p item, hook_p hook)
 {
 	struct ng_mesg	*msg = NGI_MSG(item); /* item still has message */
 	int		 error = 0;
 
 	if (msg != NULL && msg->header.typecookie == NGM_HCI_COOKIE) {
 		mtx_lock(&ng_btsocket_sco_queue_mtx);
 		if (NG_BT_ITEMQ_FULL(&ng_btsocket_sco_queue)) {
 			NG_BTSOCKET_SCO_ERR(
 "%s: Input queue is full (msg)\n", __func__);
 
 			NG_BT_ITEMQ_DROP(&ng_btsocket_sco_queue);
 			NG_FREE_ITEM(item);
 			error = ENOBUFS;
 		} else {
 			if (hook != NULL) {
 				NG_HOOK_REF(hook);
 				NGI_SET_HOOK(item, hook);
 			}
 
 			NG_BT_ITEMQ_ENQUEUE(&ng_btsocket_sco_queue, item);
 			error = ng_btsocket_sco_wakeup_input_task();
 		}
 		mtx_unlock(&ng_btsocket_sco_queue_mtx);
 	} else {
 		NG_FREE_ITEM(item);
 		error = EINVAL;
 	}
 
 	return (error);
 } /* ng_btsocket_sco_node_rcvmsg */
 
 /*
  * Receive data on a hook
  */
 
 static int
 ng_btsocket_sco_node_rcvdata(hook_p hook, item_p item)
 {
 	int	error = 0;
 
 	mtx_lock(&ng_btsocket_sco_queue_mtx);
 	if (NG_BT_ITEMQ_FULL(&ng_btsocket_sco_queue)) {
 		NG_BTSOCKET_SCO_ERR(
 "%s: Input queue is full (data)\n", __func__);
 
 		NG_BT_ITEMQ_DROP(&ng_btsocket_sco_queue);
 		NG_FREE_ITEM(item);
 		error = ENOBUFS;
 	} else {
 		NG_HOOK_REF(hook);
 		NGI_SET_HOOK(item, hook);
 
 		NG_BT_ITEMQ_ENQUEUE(&ng_btsocket_sco_queue, item);
 		error = ng_btsocket_sco_wakeup_input_task();
 	}
 	mtx_unlock(&ng_btsocket_sco_queue_mtx);
 
 	return (error);
 } /* ng_btsocket_sco_node_rcvdata */
 
 /*
  * Process LP_ConnectCfm event from the lower layer protocol
  */
 
 static int
 ng_btsocket_sco_process_lp_con_cfm(struct ng_mesg *msg,
 		ng_btsocket_sco_rtentry_p rt)
 {
 	ng_hci_lp_con_cfm_ep	*ep = NULL;
 	ng_btsocket_sco_pcb_t	*pcb = NULL;
 	int			 error = 0;
 
 	if (msg->header.arglen != sizeof(*ep))
 		return (EMSGSIZE);
 
 	ep = (ng_hci_lp_con_cfm_ep *)(msg->data);
 
 	mtx_lock(&ng_btsocket_sco_sockets_mtx);
 
 	/* Look for the socket with the token */
 	pcb = ng_btsocket_sco_pcb_by_addrs(&rt->src, &ep->bdaddr);
 	if (pcb == NULL) {
 		mtx_unlock(&ng_btsocket_sco_sockets_mtx);
 		return (ENOENT);
 	}
 
 	/* pcb is locked */
 
 	NG_BTSOCKET_SCO_INFO(
 "%s: Got LP_ConnectCfm response, src bdaddr=%x:%x:%x:%x:%x:%x, " \
 "dst bdaddr=%x:%x:%x:%x:%x:%x, status=%d, handle=%d, state=%d\n",
 		__func__,
 		pcb->src.b[5], pcb->src.b[4], pcb->src.b[3],
 		pcb->src.b[2], pcb->src.b[1], pcb->src.b[0],
 		pcb->dst.b[5], pcb->dst.b[4], pcb->dst.b[3],
 		pcb->dst.b[2], pcb->dst.b[1], pcb->dst.b[0],
 		ep->status, ep->con_handle, pcb->state);
 
 	if (pcb->state != NG_BTSOCKET_SCO_CONNECTING) {
 		mtx_unlock(&pcb->pcb_mtx);
 		mtx_unlock(&ng_btsocket_sco_sockets_mtx);
 
 		return (ENOENT);
 	}
 
 	ng_btsocket_sco_untimeout(pcb);
 
 	if (ep->status == 0) {
 		/*
 		 * Connection is open. Update connection handle and
 		 * socket state
 		 */
 
 		pcb->con_handle = ep->con_handle; 
 		pcb->state = NG_BTSOCKET_SCO_OPEN;
 		soisconnected(pcb->so); 
 	} else {
 		/*
 		 * We have failed to open connection, so disconnect the socket
 		 */
 
 		pcb->so->so_error = ECONNREFUSED; /* XXX convert status ??? */
 		pcb->state = NG_BTSOCKET_SCO_CLOSED;
 		soisdisconnected(pcb->so); 
 	}
 
 	mtx_unlock(&pcb->pcb_mtx);
 	mtx_unlock(&ng_btsocket_sco_sockets_mtx);
 
 	return (error);
 } /* ng_btsocket_sco_process_lp_con_cfm */
 
 /*
  * Process LP_ConnectInd indicator. Find socket that listens on address.
  * Find exact or closest match.
  */
 
 static int
 ng_btsocket_sco_process_lp_con_ind(struct ng_mesg *msg,
 		ng_btsocket_sco_rtentry_p rt)
 {
 	ng_hci_lp_con_ind_ep	*ep = NULL;
 	ng_btsocket_sco_pcb_t	*pcb = NULL, *pcb1 = NULL;
 	int			 error = 0;
 	u_int16_t		 status = 0;
 
 	if (msg->header.arglen != sizeof(*ep))
 		return (EMSGSIZE);
 
 	ep = (ng_hci_lp_con_ind_ep *)(msg->data);
 
 	NG_BTSOCKET_SCO_INFO(
 "%s: Got LP_ConnectInd indicator, src bdaddr=%x:%x:%x:%x:%x:%x, " \
 "dst bdaddr=%x:%x:%x:%x:%x:%x\n",
 		__func__,
 		rt->src.b[5], rt->src.b[4], rt->src.b[3],
 		rt->src.b[2], rt->src.b[1], rt->src.b[0],
 		ep->bdaddr.b[5], ep->bdaddr.b[4], ep->bdaddr.b[3],
 		ep->bdaddr.b[2], ep->bdaddr.b[1], ep->bdaddr.b[0]);
 
 	mtx_lock(&ng_btsocket_sco_sockets_mtx);
 
 	pcb = ng_btsocket_sco_pcb_by_addr(&rt->src);
 	if (pcb != NULL) {
-		struct socket	*so1 = NULL;
+		struct socket *so1;
 
 		/* pcb is locked */
 
-		/*
-		 * First check the pending connections queue and if we have
-		 * space then create new socket and set proper source address.
-		 */
-
-		if (pcb->so->so_qlen <= pcb->so->so_qlimit) {
-			CURVNET_SET(pcb->so->so_vnet);
-			so1 = sonewconn(pcb->so, 0);
-			CURVNET_RESTORE();
-		}
+		CURVNET_SET(pcb->so->so_vnet);
+		so1 = sonewconn(pcb->so, 0);
+		CURVNET_RESTORE();
 
 		if (so1 == NULL) {
 			status = 0x0d; /* Rejected due to limited resources */
 			goto respond;
 		}
 
 		/*
 		 * If we got here than we have created new socket. So complete 
 		 * connection. If we we listening on specific address then copy 
 		 * source address from listening socket, otherwise copy source 
 		 * address from hook's routing information.
 		 */
 
 		pcb1 = so2sco_pcb(so1);
 		KASSERT((pcb1 != NULL),
 ("%s: pcb1 == NULL\n", __func__));
 
  		mtx_lock(&pcb1->pcb_mtx);
 
 		if (bcmp(&pcb->src, NG_HCI_BDADDR_ANY, sizeof(pcb->src)) != 0)
 			bcopy(&pcb->src, &pcb1->src, sizeof(pcb1->src));
 		else
 			bcopy(&rt->src, &pcb1->src, sizeof(pcb1->src));
 
 		pcb1->flags &= ~NG_BTSOCKET_SCO_CLIENT;
 
 		bcopy(&ep->bdaddr, &pcb1->dst, sizeof(pcb1->dst));
 		pcb1->rt = rt;
 	} else
 		/* Nobody listens on requested BDADDR */
 		status = 0x1f; /* Unspecified Error */
 
 respond:
 	error = ng_btsocket_sco_send_lp_con_rsp(rt, &ep->bdaddr, status);
 	if (pcb1 != NULL) {
 		if (error != 0) {
 			pcb1->so->so_error = error;
 			pcb1->state = NG_BTSOCKET_SCO_CLOSED;
 			soisdisconnected(pcb1->so);
 		} else {
 			pcb1->state = NG_BTSOCKET_SCO_CONNECTING;
 			soisconnecting(pcb1->so);
 
 			ng_btsocket_sco_timeout(pcb1);
 		}
 
 		mtx_unlock(&pcb1->pcb_mtx);
 	}
 
 	if (pcb != NULL)
 		mtx_unlock(&pcb->pcb_mtx);
 
 	mtx_unlock(&ng_btsocket_sco_sockets_mtx);
 
 	return (error);
 } /* ng_btsocket_sco_process_lp_con_ind */
 
 /*
  * Process LP_DisconnectInd indicator
  */
 
 static int
 ng_btsocket_sco_process_lp_discon_ind(struct ng_mesg *msg,
 		ng_btsocket_sco_rtentry_p rt)
 {
 	ng_hci_lp_discon_ind_ep	*ep = NULL;
 	ng_btsocket_sco_pcb_t	*pcb = NULL;
 
 	/* Check message */
 	if (msg->header.arglen != sizeof(*ep))
 		return (EMSGSIZE);
 
 	ep = (ng_hci_lp_discon_ind_ep *)(msg->data);
 
 	mtx_lock(&ng_btsocket_sco_sockets_mtx);
 
 	/* Look for the socket with given channel ID */
 	pcb = ng_btsocket_sco_pcb_by_handle(&rt->src, ep->con_handle);
 	if (pcb == NULL) {
 		mtx_unlock(&ng_btsocket_sco_sockets_mtx);
 		return (0);
 	}
 
 	/*
 	 * Disconnect the socket. If there was any pending request we can
 	 * not do anything here anyway.
 	 */
 
 	/* pcb is locked */
 
        	NG_BTSOCKET_SCO_INFO(
 "%s: Got LP_DisconnectInd indicator, src bdaddr=%x:%x:%x:%x:%x:%x, " \
 "dst bdaddr=%x:%x:%x:%x:%x:%x, handle=%d, state=%d\n",
 		__func__,
 		pcb->src.b[5], pcb->src.b[4], pcb->src.b[3],
 		pcb->src.b[2], pcb->src.b[1], pcb->src.b[0],
 		pcb->dst.b[5], pcb->dst.b[4], pcb->dst.b[3],
 		pcb->dst.b[2], pcb->dst.b[1], pcb->dst.b[0],
 		pcb->con_handle, pcb->state);
 
 	if (pcb->flags & NG_BTSOCKET_SCO_TIMO)
 		ng_btsocket_sco_untimeout(pcb);
 
 	pcb->state = NG_BTSOCKET_SCO_CLOSED;
 	soisdisconnected(pcb->so);
 
 	mtx_unlock(&pcb->pcb_mtx);
 	mtx_unlock(&ng_btsocket_sco_sockets_mtx);
 
 	return (0);
 } /* ng_btsocket_sco_process_lp_discon_ind */
 
 /*
  * Send LP_ConnectReq request
  */
 
 static int
 ng_btsocket_sco_send_lp_con_req(ng_btsocket_sco_pcb_p pcb)
 {
 	struct ng_mesg		*msg = NULL;
 	ng_hci_lp_con_req_ep	*ep = NULL;
 	int			 error = 0;
 
 	mtx_assert(&pcb->pcb_mtx, MA_OWNED);
 
 	if (pcb->rt == NULL || 
 	    pcb->rt->hook == NULL || NG_HOOK_NOT_VALID(pcb->rt->hook))
 		return (ENETDOWN); 
 
 	NG_MKMESSAGE(msg, NGM_HCI_COOKIE, NGM_HCI_LP_CON_REQ,
 		sizeof(*ep), M_NOWAIT);
 	if (msg == NULL)
 		return (ENOMEM);
 
 	ep = (ng_hci_lp_con_req_ep *)(msg->data);
 	ep->link_type = NG_HCI_LINK_SCO;
 	bcopy(&pcb->dst, &ep->bdaddr, sizeof(ep->bdaddr));
 
 	NG_SEND_MSG_HOOK(error, ng_btsocket_sco_node, msg, pcb->rt->hook, 0);
 
 	return (error);
 } /* ng_btsocket_sco_send_lp_con_req */
 
 /*
  * Send LP_ConnectRsp response
  */
 
 static int
 ng_btsocket_sco_send_lp_con_rsp(ng_btsocket_sco_rtentry_p rt, bdaddr_p dst, int status)
 {
 	struct ng_mesg		*msg = NULL;
 	ng_hci_lp_con_rsp_ep	*ep = NULL;
 	int			 error = 0;
 
 	if (rt == NULL || rt->hook == NULL || NG_HOOK_NOT_VALID(rt->hook))
 		return (ENETDOWN); 
 
 	NG_MKMESSAGE(msg, NGM_HCI_COOKIE, NGM_HCI_LP_CON_RSP,
 		sizeof(*ep), M_NOWAIT);
 	if (msg == NULL)
 		return (ENOMEM);
 
 	ep = (ng_hci_lp_con_rsp_ep *)(msg->data);
 	ep->status = status;
 	ep->link_type = NG_HCI_LINK_SCO;
 	bcopy(dst, &ep->bdaddr, sizeof(ep->bdaddr));
 
 	NG_SEND_MSG_HOOK(error, ng_btsocket_sco_node, msg, rt->hook, 0);
 
 	return (error);
 } /* ng_btsocket_sco_send_lp_con_rsp */
 
 /*
  * Send LP_DisconReq request
  */
 
 static int
 ng_btsocket_sco_send_lp_discon_req(ng_btsocket_sco_pcb_p pcb)
 {
 	struct ng_mesg		*msg = NULL;
 	ng_hci_lp_discon_req_ep	*ep = NULL;
 	int			 error = 0;
 
 	mtx_assert(&pcb->pcb_mtx, MA_OWNED);
 
 	if (pcb->rt == NULL || 
 	    pcb->rt->hook == NULL || NG_HOOK_NOT_VALID(pcb->rt->hook))
 		return (ENETDOWN); 
 
 	NG_MKMESSAGE(msg, NGM_HCI_COOKIE, NGM_HCI_LP_DISCON_REQ,
 		sizeof(*ep), M_NOWAIT);
 	if (msg == NULL)
 		return (ENOMEM);
 
 	ep = (ng_hci_lp_discon_req_ep *)(msg->data);
 	ep->con_handle = pcb->con_handle;
 	ep->reason = 0x13; /* User Ended Connection */
 
 	NG_SEND_MSG_HOOK(error, ng_btsocket_sco_node, msg, pcb->rt->hook, 0);
 
 	return (error);
 } /* ng_btsocket_sco_send_lp_discon_req */
 
 /*****************************************************************************
  *****************************************************************************
  **                              Socket interface
  *****************************************************************************
  *****************************************************************************/
 
 /*
  * SCO sockets data input routine
  */
 
 static void
 ng_btsocket_sco_data_input(struct mbuf *m, hook_p hook)
 {
 	ng_hci_scodata_pkt_t		*hdr = NULL;
 	ng_btsocket_sco_pcb_t		*pcb = NULL;
 	ng_btsocket_sco_rtentry_t	*rt = NULL;
 	u_int16_t			 con_handle;
 
 	if (hook == NULL) {
 		NG_BTSOCKET_SCO_ALERT(
 "%s: Invalid source hook for SCO data packet\n", __func__);
 		goto drop;
 	}
 
 	rt = (ng_btsocket_sco_rtentry_t *) NG_HOOK_PRIVATE(hook);
 	if (rt == NULL) {
 		NG_BTSOCKET_SCO_ALERT(
 "%s: Could not find out source bdaddr for SCO data packet\n", __func__);
 		goto drop;
 	}
 
 	/* Make sure we can access header */
 	if (m->m_pkthdr.len < sizeof(*hdr)) {
 		NG_BTSOCKET_SCO_ERR(
 "%s: SCO data packet too small, len=%d\n", __func__, m->m_pkthdr.len);
 		goto drop;
 	}
 
 	if (m->m_len < sizeof(*hdr)) { 
 		m = m_pullup(m, sizeof(*hdr));
 		if (m == NULL)
 			goto drop;
 	}
 
 	/* Strip SCO packet header and verify packet length */
 	hdr = mtod(m, ng_hci_scodata_pkt_t *);
 	m_adj(m, sizeof(*hdr));
 
 	if (hdr->length != m->m_pkthdr.len) {
 		NG_BTSOCKET_SCO_ERR(
 "%s: Bad SCO data packet length, len=%d, length=%d\n",
 			__func__, m->m_pkthdr.len, hdr->length);
 		goto drop;
 	}
 
 	/*
 	 * Now process packet
 	 */
 
 	con_handle = NG_HCI_CON_HANDLE(le16toh(hdr->con_handle));
 
 	NG_BTSOCKET_SCO_INFO(
 "%s: Received SCO data packet: src bdaddr=%x:%x:%x:%x:%x:%x, handle=%d, " \
 "length=%d\n",	__func__,
 		rt->src.b[5], rt->src.b[4], rt->src.b[3],
 		rt->src.b[2], rt->src.b[1], rt->src.b[0],
 		con_handle, hdr->length);
 
 	mtx_lock(&ng_btsocket_sco_sockets_mtx);
 
 	/* Find socket */
 	pcb = ng_btsocket_sco_pcb_by_handle(&rt->src, con_handle);
 	if (pcb == NULL) {
 		mtx_unlock(&ng_btsocket_sco_sockets_mtx);
 		goto drop;
 	}
 
 	/* pcb is locked */
 
 	if (pcb->state != NG_BTSOCKET_SCO_OPEN) {
 		NG_BTSOCKET_SCO_ERR(
 "%s: No connected socket found, src bdaddr=%x:%x:%x:%x:%x:%x, state=%d\n",
 			__func__,
 			rt->src.b[5], rt->src.b[4], rt->src.b[3],
 			rt->src.b[2], rt->src.b[1], rt->src.b[0],
 			pcb->state);
 
 		mtx_unlock(&pcb->pcb_mtx);
 		mtx_unlock(&ng_btsocket_sco_sockets_mtx);
 		goto drop;
 	}
 
 	/* Check if we have enough space in socket receive queue */
 	if (m->m_pkthdr.len > sbspace(&pcb->so->so_rcv)) {
 		NG_BTSOCKET_SCO_ERR(
 "%s: Not enough space in socket receive queue. Dropping SCO data packet, " \
 "src bdaddr=%x:%x:%x:%x:%x:%x, len=%d, space=%ld\n",
 			__func__,
 			rt->src.b[5], rt->src.b[4], rt->src.b[3],
 			rt->src.b[2], rt->src.b[1], rt->src.b[0],
 			m->m_pkthdr.len,
 			sbspace(&pcb->so->so_rcv));
 
 		mtx_unlock(&pcb->pcb_mtx);
 		mtx_unlock(&ng_btsocket_sco_sockets_mtx);
 		goto drop;
 	}
 
 	/* Append packet to the socket receive queue and wakeup */
 	sbappendrecord(&pcb->so->so_rcv, m);
 	m = NULL;
 
 	sorwakeup(pcb->so);
 
 	mtx_unlock(&pcb->pcb_mtx);
 	mtx_unlock(&ng_btsocket_sco_sockets_mtx);
 drop:
 	NG_FREE_M(m); /* checks for m != NULL */
 } /* ng_btsocket_sco_data_input */
 
 /*
  * SCO sockets default message input routine
  */
 
 static void
 ng_btsocket_sco_default_msg_input(struct ng_mesg *msg, hook_p hook)
 {
 	ng_btsocket_sco_rtentry_t	*rt = NULL;
 
 	if (hook == NULL || NG_HOOK_NOT_VALID(hook))
 		return;
 
 	rt = (ng_btsocket_sco_rtentry_t *) NG_HOOK_PRIVATE(hook);
 
 	switch (msg->header.cmd) {
 	case NGM_HCI_NODE_UP: {
 		ng_hci_node_up_ep	*ep = NULL;
 
 		if (msg->header.arglen != sizeof(*ep))
 			break;
 
 		ep = (ng_hci_node_up_ep *)(msg->data);
 		if (bcmp(&ep->bdaddr, NG_HCI_BDADDR_ANY, sizeof(bdaddr_t)) == 0)
 			break;
 
 		if (rt == NULL) {
 			rt = malloc(sizeof(*rt),
 				M_NETGRAPH_BTSOCKET_SCO, M_NOWAIT|M_ZERO);
 			if (rt == NULL)
 				break;
 
 			NG_HOOK_SET_PRIVATE(hook, rt);
 
 			mtx_lock(&ng_btsocket_sco_rt_mtx);
 
 			LIST_INSERT_HEAD(&ng_btsocket_sco_rt, rt, next);
 		} else
 			mtx_lock(&ng_btsocket_sco_rt_mtx);
 
 		bcopy(&ep->bdaddr, &rt->src, sizeof(rt->src));
 		rt->pkt_size = (ep->pkt_size == 0)? 60 : ep->pkt_size;
 		rt->num_pkts = ep->num_pkts;
 		rt->hook = hook;
 
 		mtx_unlock(&ng_btsocket_sco_rt_mtx);
 
 		NG_BTSOCKET_SCO_INFO(
 "%s: Updating hook \"%s\", src bdaddr=%x:%x:%x:%x:%x:%x, pkt_size=%d, " \
 "num_pkts=%d\n",	__func__, NG_HOOK_NAME(hook), 
 			rt->src.b[5], rt->src.b[4], rt->src.b[3], 
 			rt->src.b[2], rt->src.b[1], rt->src.b[0],
 			rt->pkt_size, rt->num_pkts);
 		} break;
 
 	case NGM_HCI_SYNC_CON_QUEUE: {
 		ng_hci_sync_con_queue_ep	*ep = NULL;
 		ng_btsocket_sco_pcb_t		*pcb = NULL;
 
 		if (rt == NULL || msg->header.arglen != sizeof(*ep))
 			break;
 
 		ep = (ng_hci_sync_con_queue_ep *)(msg->data);
 
 		rt->pending -= ep->completed;
 		if (rt->pending < 0) {
 			NG_BTSOCKET_SCO_WARN(
 "%s: Pending packet counter is out of sync! bdaddr=%x:%x:%x:%x:%x:%x, " \
 "handle=%d, pending=%d, completed=%d\n",
 				__func__,
 				rt->src.b[5], rt->src.b[4], rt->src.b[3],
 				rt->src.b[2], rt->src.b[1], rt->src.b[0],
 				ep->con_handle, rt->pending,
 				ep->completed);
 
 			rt->pending = 0;
 		}
 
 		mtx_lock(&ng_btsocket_sco_sockets_mtx);
 
 		/* Find socket */
 		pcb = ng_btsocket_sco_pcb_by_handle(&rt->src, ep->con_handle);
 		if (pcb == NULL) {
 			mtx_unlock(&ng_btsocket_sco_sockets_mtx);
 			break;
 		}
 
 		/* pcb is locked */
 
 		/* Check state */
 		if (pcb->state == NG_BTSOCKET_SCO_OPEN) {
 			/* Remove timeout */
 			ng_btsocket_sco_untimeout(pcb);
 			
 			/* Drop completed packets from the send queue */
 			for (; ep->completed > 0; ep->completed --)
 				sbdroprecord(&pcb->so->so_snd);
 
 			/* Send more if we have any */
 			if (sbavail(&pcb->so->so_snd) > 0)
 				if (ng_btsocket_sco_send2(pcb) == 0)
 					ng_btsocket_sco_timeout(pcb);
 
 			/* Wake up writers */
 			sowwakeup(pcb->so);
 		}
 
 		mtx_unlock(&pcb->pcb_mtx);
 		mtx_unlock(&ng_btsocket_sco_sockets_mtx);
 	} break;
 
 	default:
 		NG_BTSOCKET_SCO_WARN(
 "%s: Unknown message, cmd=%d\n", __func__, msg->header.cmd);
 		break;
 	}
 
 	NG_FREE_MSG(msg); /* Checks for msg != NULL */
 } /* ng_btsocket_sco_default_msg_input */
 
 /*
  * SCO sockets LP message input routine
  */
 
 static void
 ng_btsocket_sco_lp_msg_input(struct ng_mesg *msg, hook_p hook)
 {
 	ng_btsocket_sco_rtentry_p	 rt = NULL;
 
 	if (hook == NULL) {
 		NG_BTSOCKET_SCO_ALERT(
 "%s: Invalid source hook for LP message\n", __func__);
 		goto drop;
 	}
 
 	rt = (ng_btsocket_sco_rtentry_p) NG_HOOK_PRIVATE(hook);
 	if (rt == NULL) {
 		NG_BTSOCKET_SCO_ALERT(
 "%s: Could not find out source bdaddr for LP message\n", __func__);
 		goto drop;
 	}
 
 	switch (msg->header.cmd) {
 	case NGM_HCI_LP_CON_CFM: /* Connection Confirmation Event */
 		ng_btsocket_sco_process_lp_con_cfm(msg, rt);
 		break;
 
 	case NGM_HCI_LP_CON_IND: /* Connection Indication Event */
 		ng_btsocket_sco_process_lp_con_ind(msg, rt);
 		break;
 
 	case NGM_HCI_LP_DISCON_IND: /* Disconnection Indication Event */
 		ng_btsocket_sco_process_lp_discon_ind(msg, rt);
 		break;
 
 	/* XXX FIXME add other LP messages */
 
 	default:
 		NG_BTSOCKET_SCO_WARN(
 "%s: Unknown LP message, cmd=%d\n", __func__, msg->header.cmd);
 		break;
 	}
 drop:
 	NG_FREE_MSG(msg);
 } /* ng_btsocket_sco_lp_msg_input */
 
 /*
  * SCO sockets input routine
  */
 
 static void
 ng_btsocket_sco_input(void *context, int pending)
 {
 	item_p	item = NULL;
 	hook_p	hook = NULL;
 
 	for (;;) {
 		mtx_lock(&ng_btsocket_sco_queue_mtx);
 		NG_BT_ITEMQ_DEQUEUE(&ng_btsocket_sco_queue, item);
 		mtx_unlock(&ng_btsocket_sco_queue_mtx);
 
 		if (item == NULL)
 			break;
 
 		NGI_GET_HOOK(item, hook);
 		if (hook != NULL && NG_HOOK_NOT_VALID(hook))
 			goto drop;
 
 		switch(item->el_flags & NGQF_TYPE) {
 		case NGQF_DATA: {
 			struct mbuf     *m = NULL;
 
 			NGI_GET_M(item, m);
 			ng_btsocket_sco_data_input(m, hook);
 			} break;
 
 		case NGQF_MESG: {
 			struct ng_mesg  *msg = NULL;
 
 			NGI_GET_MSG(item, msg);
 
 			switch (msg->header.cmd) {
 			case NGM_HCI_LP_CON_CFM:
 			case NGM_HCI_LP_CON_IND:
 			case NGM_HCI_LP_DISCON_IND:
 			/* XXX FIXME add other LP messages */
 				ng_btsocket_sco_lp_msg_input(msg, hook);
 				break;
 
 			default:
 				ng_btsocket_sco_default_msg_input(msg, hook);
 				break;
 			}
 			} break;
 
 		default:
 			KASSERT(0,
 ("%s: invalid item type=%ld\n", __func__, (item->el_flags & NGQF_TYPE)));
 			break;
 		}
 drop:
 		if (hook != NULL)
 			NG_HOOK_UNREF(hook);
 
 		NG_FREE_ITEM(item);
 	}
 } /* ng_btsocket_sco_input */
 
 /*
  * Route cleanup task. Gets scheduled when hook is disconnected. Here we 
  * will find all sockets that use "invalid" hook and disconnect them.
  */
 
 static void
 ng_btsocket_sco_rtclean(void *context, int pending)
 {
 	ng_btsocket_sco_pcb_p		pcb = NULL, pcb_next = NULL;
 	ng_btsocket_sco_rtentry_p	rt = NULL;
 
 	/*
 	 * First disconnect all sockets that use "invalid" hook
 	 */
 
 	mtx_lock(&ng_btsocket_sco_sockets_mtx);
 
 	for(pcb = LIST_FIRST(&ng_btsocket_sco_sockets); pcb != NULL; ) {
 		mtx_lock(&pcb->pcb_mtx);
 		pcb_next = LIST_NEXT(pcb, next);
 
 		if (pcb->rt != NULL &&
 		    pcb->rt->hook != NULL && NG_HOOK_NOT_VALID(pcb->rt->hook)) {
 			if (pcb->flags & NG_BTSOCKET_SCO_TIMO)
 				ng_btsocket_sco_untimeout(pcb);
 
 			pcb->rt = NULL;
 			pcb->so->so_error = ENETDOWN;
 			pcb->state = NG_BTSOCKET_SCO_CLOSED;
 			soisdisconnected(pcb->so);
 		}
 
 		mtx_unlock(&pcb->pcb_mtx);
 		pcb = pcb_next;
 	}
 
 	mtx_unlock(&ng_btsocket_sco_sockets_mtx);
 
 	/*
 	 * Now cleanup routing table
 	 */
 
 	mtx_lock(&ng_btsocket_sco_rt_mtx);
 
 	for (rt = LIST_FIRST(&ng_btsocket_sco_rt); rt != NULL; ) {
 		ng_btsocket_sco_rtentry_p	rt_next = LIST_NEXT(rt, next);
 
 		if (rt->hook != NULL && NG_HOOK_NOT_VALID(rt->hook)) {
 			LIST_REMOVE(rt, next);
 
 			NG_HOOK_SET_PRIVATE(rt->hook, NULL);
 			NG_HOOK_UNREF(rt->hook); /* Remove extra reference */
 
 			bzero(rt, sizeof(*rt));
 			free(rt, M_NETGRAPH_BTSOCKET_SCO);
 		}
 
 		rt = rt_next;
 	}
 
 	mtx_unlock(&ng_btsocket_sco_rt_mtx);
 } /* ng_btsocket_sco_rtclean */
 
 /*
  * Initialize everything
  */
 
 void
 ng_btsocket_sco_init(void)
 {
 	int	error = 0;
 
 	/* Skip initialization of globals for non-default instances. */
 	if (!IS_DEFAULT_VNET(curvnet))
 		return;
 
 	ng_btsocket_sco_node = NULL;
 	ng_btsocket_sco_debug_level = NG_BTSOCKET_WARN_LEVEL;
 
 	/* Register Netgraph node type */
 	error = ng_newtype(&typestruct);
 	if (error != 0) {
 		NG_BTSOCKET_SCO_ALERT(
 "%s: Could not register Netgraph node type, error=%d\n", __func__, error);
 
                 return;
 	}
 
 	/* Create Netgrapg node */
 	error = ng_make_node_common(&typestruct, &ng_btsocket_sco_node);
 	if (error != 0) {
 		NG_BTSOCKET_SCO_ALERT(
 "%s: Could not create Netgraph node, error=%d\n", __func__, error);
 
 		ng_btsocket_sco_node = NULL;
 
 		return;
 	}
 
 	error = ng_name_node(ng_btsocket_sco_node, NG_BTSOCKET_SCO_NODE_TYPE);
 	if (error != 0) {
 		NG_BTSOCKET_SCO_ALERT(
 "%s: Could not name Netgraph node, error=%d\n", __func__, error);
 
 		NG_NODE_UNREF(ng_btsocket_sco_node);
 		ng_btsocket_sco_node = NULL;
 
 		return;
 	}
 
 	/* Create input queue */
 	NG_BT_ITEMQ_INIT(&ng_btsocket_sco_queue, 300);
 	mtx_init(&ng_btsocket_sco_queue_mtx,
 		"btsocks_sco_queue_mtx", NULL, MTX_DEF);
 	TASK_INIT(&ng_btsocket_sco_queue_task, 0,
 		ng_btsocket_sco_input, NULL);
 
 	/* Create list of sockets */
 	LIST_INIT(&ng_btsocket_sco_sockets);
 	mtx_init(&ng_btsocket_sco_sockets_mtx,
 		"btsocks_sco_sockets_mtx", NULL, MTX_DEF);
 
 	/* Routing table */
 	LIST_INIT(&ng_btsocket_sco_rt);
 	mtx_init(&ng_btsocket_sco_rt_mtx,
 		"btsocks_sco_rt_mtx", NULL, MTX_DEF);
 	TASK_INIT(&ng_btsocket_sco_rt_task, 0,
 		ng_btsocket_sco_rtclean, NULL);
 } /* ng_btsocket_sco_init */
 
 /*
  * Abort connection on socket
  */
 
 void
 ng_btsocket_sco_abort(struct socket *so)
 {
 	so->so_error = ECONNABORTED;
 
 	(void) ng_btsocket_sco_disconnect(so);
 } /* ng_btsocket_sco_abort */
 
 void
 ng_btsocket_sco_close(struct socket *so)
 {
 	(void) ng_btsocket_sco_disconnect(so);
 } /* ng_btsocket_sco_close */
 
 /*
  * Accept connection on socket. Nothing to do here, socket must be connected
  * and ready, so just return peer address and be done with it.
  */
 
 int
 ng_btsocket_sco_accept(struct socket *so, struct sockaddr **nam)
 {
 	if (ng_btsocket_sco_node == NULL) 
 		return (EINVAL);
 
 	return (ng_btsocket_sco_peeraddr(so, nam));
 } /* ng_btsocket_sco_accept */
 
 /*
  * Create and attach new socket
  */
 
 int
 ng_btsocket_sco_attach(struct socket *so, int proto, struct thread *td)
 {
 	ng_btsocket_sco_pcb_p	pcb = so2sco_pcb(so);
 	int			error;
 
 	/* Check socket and protocol */
 	if (ng_btsocket_sco_node == NULL) 
 		return (EPROTONOSUPPORT);
 	if (so->so_type != SOCK_SEQPACKET)
 		return (ESOCKTNOSUPPORT);
 
 #if 0 /* XXX sonewconn() calls "pru_attach" with proto == 0 */
 	if (proto != 0) 
 		if (proto != BLUETOOTH_PROTO_SCO)
 			return (EPROTONOSUPPORT);
 #endif /* XXX */
 
 	if (pcb != NULL)
 		return (EISCONN);
 
 	/* Reserve send and receive space if it is not reserved yet */
 	if ((so->so_snd.sb_hiwat == 0) || (so->so_rcv.sb_hiwat == 0)) {
 		error = soreserve(so, NG_BTSOCKET_SCO_SENDSPACE,
 					NG_BTSOCKET_SCO_RECVSPACE);
 		if (error != 0)
 			return (error);
 	}
 
 	/* Allocate the PCB */
         pcb = malloc(sizeof(*pcb),
 		M_NETGRAPH_BTSOCKET_SCO, M_NOWAIT | M_ZERO);
         if (pcb == NULL)
                 return (ENOMEM);
 
 	/* Link the PCB and the socket */
 	so->so_pcb = (caddr_t) pcb;
 	pcb->so = so;
 	pcb->state = NG_BTSOCKET_SCO_CLOSED;
 
 	callout_init(&pcb->timo, 1);
 
 	/*
 	 * Mark PCB mutex as DUPOK to prevent "duplicated lock of
 	 * the same type" message. When accepting new SCO connection 
 	 * ng_btsocket_sco_process_lp_con_ind() holds both PCB mutexes 
 	 * for "old" (accepting) PCB and "new" (created) PCB.
 	 */
 		
 	mtx_init(&pcb->pcb_mtx, "btsocks_sco_pcb_mtx", NULL,
 		MTX_DEF|MTX_DUPOK);
 
 	/*
 	 * Add the PCB to the list
 	 *
 	 * XXX FIXME VERY IMPORTANT!
 	 *
 	 * This is totally FUBAR. We could get here in two cases:
 	 *
 	 * 1) When user calls socket()
 	 * 2) When we need to accept new incoming connection and call
 	 *    sonewconn()
 	 *
 	 * In the first case we must acquire ng_btsocket_sco_sockets_mtx.
 	 * In the second case we hold ng_btsocket_sco_sockets_mtx already.
 	 * So we now need to distinguish between these cases. From reading
 	 * /sys/kern/uipc_socket2.c we can find out that sonewconn() calls
 	 * pru_attach with proto == 0 and td == NULL. For now use this fact
 	 * to figure out if we were called from socket() or from sonewconn().
 	 */
 
 	if (td != NULL)
 		mtx_lock(&ng_btsocket_sco_sockets_mtx);
 	else
 		mtx_assert(&ng_btsocket_sco_sockets_mtx, MA_OWNED);
 
 	LIST_INSERT_HEAD(&ng_btsocket_sco_sockets, pcb, next);
 
 	if (td != NULL)
 		mtx_unlock(&ng_btsocket_sco_sockets_mtx);
 
         return (0);
 } /* ng_btsocket_sco_attach */
 
 /*
  * Bind socket
  */
 
 int
 ng_btsocket_sco_bind(struct socket *so, struct sockaddr *nam, 
 		struct thread *td)
 {
 	ng_btsocket_sco_pcb_t	*pcb = NULL;
 	struct sockaddr_sco	*sa = (struct sockaddr_sco *) nam;
 
 	if (ng_btsocket_sco_node == NULL) 
 		return (EINVAL);
 
 	/* Verify address */
 	if (sa == NULL)
 		return (EINVAL);
 	if (sa->sco_family != AF_BLUETOOTH)
 		return (EAFNOSUPPORT);
 	if (sa->sco_len != sizeof(*sa))
 		return (EINVAL);
 
 	mtx_lock(&ng_btsocket_sco_sockets_mtx);
 
 	/* 
 	 * Check if other socket has this address already (look for exact
 	 * match in bdaddr) and assign socket address if it's available.
 	 */
 
 	if (bcmp(&sa->sco_bdaddr, NG_HCI_BDADDR_ANY, sizeof(sa->sco_bdaddr)) != 0) {
  		LIST_FOREACH(pcb, &ng_btsocket_sco_sockets, next) {
 			mtx_lock(&pcb->pcb_mtx);
 
 			if (bcmp(&pcb->src, &sa->sco_bdaddr, sizeof(bdaddr_t)) == 0) {
 				mtx_unlock(&pcb->pcb_mtx);
 				mtx_unlock(&ng_btsocket_sco_sockets_mtx);
 
 				return (EADDRINUSE);
 			}
 
 			mtx_unlock(&pcb->pcb_mtx);
 		}
 
 	}
 
 	pcb = so2sco_pcb(so);
 	if (pcb == NULL) {
 		mtx_unlock(&ng_btsocket_sco_sockets_mtx);
 		return (EINVAL);
 	}
 
 	mtx_lock(&pcb->pcb_mtx);
 	bcopy(&sa->sco_bdaddr, &pcb->src, sizeof(pcb->src));
 	mtx_unlock(&pcb->pcb_mtx);
 
 	mtx_unlock(&ng_btsocket_sco_sockets_mtx);
 
 	return (0);
 } /* ng_btsocket_sco_bind */
 
 /*
  * Connect socket
  */
 
 int
 ng_btsocket_sco_connect(struct socket *so, struct sockaddr *nam, 
 		struct thread *td)
 {
 	ng_btsocket_sco_pcb_t		*pcb = so2sco_pcb(so);
 	struct sockaddr_sco		*sa = (struct sockaddr_sco *) nam;
 	ng_btsocket_sco_rtentry_t	*rt = NULL;
 	int				 have_src, error = 0;
 
 	/* Check socket */
 	if (pcb == NULL)
 		return (EINVAL);
 	if (ng_btsocket_sco_node == NULL) 
 		return (EINVAL);
 
 	/* Verify address */
 	if (sa == NULL)
 		return (EINVAL);
 	if (sa->sco_family != AF_BLUETOOTH)
 		return (EAFNOSUPPORT);
 	if (sa->sco_len != sizeof(*sa))
 		return (EINVAL);
 	if (bcmp(&sa->sco_bdaddr, NG_HCI_BDADDR_ANY, sizeof(bdaddr_t)) == 0)
 		return (EDESTADDRREQ);
 
 	/*
 	 * Routing. Socket should be bound to some source address. The source
 	 * address can be ANY. Destination address must be set and it must not
 	 * be ANY. If source address is ANY then find first rtentry that has
 	 * src != dst.
 	 */
 
 	mtx_lock(&ng_btsocket_sco_rt_mtx);
 	mtx_lock(&pcb->pcb_mtx);
 
 	if (pcb->state == NG_BTSOCKET_SCO_CONNECTING) {
 		mtx_unlock(&pcb->pcb_mtx);
 		mtx_unlock(&ng_btsocket_sco_rt_mtx);
 
 		return (EINPROGRESS);
 	}
 
 	if (bcmp(&sa->sco_bdaddr, &pcb->src, sizeof(pcb->src)) == 0) {
 		mtx_unlock(&pcb->pcb_mtx);
 		mtx_unlock(&ng_btsocket_sco_rt_mtx);
 
 		return (EINVAL);
 	}
 
 	/* Send destination address and PSM */
 	bcopy(&sa->sco_bdaddr, &pcb->dst, sizeof(pcb->dst));
 
 	pcb->rt = NULL;
 	have_src = bcmp(&pcb->src, NG_HCI_BDADDR_ANY, sizeof(pcb->src));
 
 	LIST_FOREACH(rt, &ng_btsocket_sco_rt, next) {
 		if (rt->hook == NULL || NG_HOOK_NOT_VALID(rt->hook))
 			continue;
 
 		/* Match src and dst */
 		if (have_src) {
 			if (bcmp(&pcb->src, &rt->src, sizeof(rt->src)) == 0)
 				break;
 		} else {
 			if (bcmp(&pcb->dst, &rt->src, sizeof(rt->src)) != 0)
 				break;
 		}
 	}
 
 	if (rt != NULL) {
 		pcb->rt = rt;
 
 		if (!have_src)
 			bcopy(&rt->src, &pcb->src, sizeof(pcb->src));
 	} else
 		error = EHOSTUNREACH;
 
 	/*
 	 * Send LP_Connect request 
 	 */
 
 	if (error == 0) {	
 		error = ng_btsocket_sco_send_lp_con_req(pcb);
 		if (error == 0) {
 			pcb->flags |= NG_BTSOCKET_SCO_CLIENT;
 			pcb->state = NG_BTSOCKET_SCO_CONNECTING;
 			soisconnecting(pcb->so);
 
 			ng_btsocket_sco_timeout(pcb);
 		}
 	}
 
 	mtx_unlock(&pcb->pcb_mtx);
 	mtx_unlock(&ng_btsocket_sco_rt_mtx);
 
 	return (error);
 } /* ng_btsocket_sco_connect */
 
 /*
  * Process ioctl's calls on socket
  */
 
 int
 ng_btsocket_sco_control(struct socket *so, u_long cmd, caddr_t data,
 		struct ifnet *ifp, struct thread *td)
 {
 	return (EINVAL);
 } /* ng_btsocket_sco_control */
 
 /*
  * Process getsockopt/setsockopt system calls
  */
 
 int
 ng_btsocket_sco_ctloutput(struct socket *so, struct sockopt *sopt)
 {
 	ng_btsocket_sco_pcb_p	pcb = so2sco_pcb(so);
         int			error, tmp;
 
 	if (ng_btsocket_sco_node == NULL) 
 		return (EINVAL);
 	if (pcb == NULL)
 		return (EINVAL);
 
 	if (sopt->sopt_level != SOL_SCO)
 		return (0);
 
 	mtx_lock(&pcb->pcb_mtx);
 
 	switch (sopt->sopt_dir) {
 	case SOPT_GET:
 		if (pcb->state != NG_BTSOCKET_SCO_OPEN) {
 			error = ENOTCONN;
 			break;
 		}
 		
 		switch (sopt->sopt_name) {
 		case SO_SCO_MTU:
 			tmp = pcb->rt->pkt_size;
 			error = sooptcopyout(sopt, &tmp, sizeof(tmp));
 			break;
 
 		case SO_SCO_CONNINFO:
 			tmp = pcb->con_handle;
 			error = sooptcopyout(sopt, &tmp, sizeof(tmp));
 			break;
 
 		default:
 			error = EINVAL;
 			break;
 		}
 		break;
 
 	case SOPT_SET:
 		error = ENOPROTOOPT;
 		break;
 
 	default:
 		error = EINVAL;
 		break;
 	}
 
 	mtx_unlock(&pcb->pcb_mtx);
 	
 	return (error);
 } /* ng_btsocket_sco_ctloutput */
 
 /*
  * Detach and destroy socket
  */
 
 void
 ng_btsocket_sco_detach(struct socket *so)
 {
 	ng_btsocket_sco_pcb_p	pcb = so2sco_pcb(so);
 
 	KASSERT(pcb != NULL, ("ng_btsocket_sco_detach: pcb == NULL"));
 
 	if (ng_btsocket_sco_node == NULL) 
 		return;
 
 	mtx_lock(&ng_btsocket_sco_sockets_mtx);
 	mtx_lock(&pcb->pcb_mtx);
 
 	if (pcb->flags & NG_BTSOCKET_SCO_TIMO)
 		ng_btsocket_sco_untimeout(pcb);
 
 	if (pcb->state == NG_BTSOCKET_SCO_OPEN)
 		ng_btsocket_sco_send_lp_discon_req(pcb);
 
 	pcb->state = NG_BTSOCKET_SCO_CLOSED;
 
 	LIST_REMOVE(pcb, next);
 
 	mtx_unlock(&pcb->pcb_mtx);
 	mtx_unlock(&ng_btsocket_sco_sockets_mtx);
 
 	mtx_destroy(&pcb->pcb_mtx);
 	bzero(pcb, sizeof(*pcb));
 	free(pcb, M_NETGRAPH_BTSOCKET_SCO);
 
 	soisdisconnected(so);
 	so->so_pcb = NULL;
 } /* ng_btsocket_sco_detach */
 
 /*
  * Disconnect socket
  */
 
 int
 ng_btsocket_sco_disconnect(struct socket *so)
 {
 	ng_btsocket_sco_pcb_p	pcb = so2sco_pcb(so);
 
 	if (pcb == NULL)
 		return (EINVAL);
 	if (ng_btsocket_sco_node == NULL) 
 		return (EINVAL);
 
 	mtx_lock(&pcb->pcb_mtx);
 
 	if (pcb->state == NG_BTSOCKET_SCO_DISCONNECTING) {
 		mtx_unlock(&pcb->pcb_mtx);
 
 		return (EINPROGRESS);
 	}
 
 	if (pcb->flags & NG_BTSOCKET_SCO_TIMO)
 		ng_btsocket_sco_untimeout(pcb);
 
 	if (pcb->state == NG_BTSOCKET_SCO_OPEN) {
 		ng_btsocket_sco_send_lp_discon_req(pcb);
 
 		pcb->state = NG_BTSOCKET_SCO_DISCONNECTING;
 		soisdisconnecting(so);
 
 		ng_btsocket_sco_timeout(pcb);
 	} else {
 		pcb->state = NG_BTSOCKET_SCO_CLOSED;
 		soisdisconnected(so);
 	}
 
 	mtx_unlock(&pcb->pcb_mtx);
 
 	return (0);
 } /* ng_btsocket_sco_disconnect */
 
 /*
  * Listen on socket
  */
 
 int
 ng_btsocket_sco_listen(struct socket *so, int backlog, struct thread *td)
 {
 	ng_btsocket_sco_pcb_p	pcb = so2sco_pcb(so);
 	int			error;
 
 	if (pcb == NULL)
 		return (EINVAL);
 	if (ng_btsocket_sco_node == NULL)
 		return (EINVAL);
 
 	SOCK_LOCK(so);
 	mtx_lock(&pcb->pcb_mtx);
 
 	error = solisten_proto_check(so);
 	if (error != 0)
 		goto out;
 #if 0
 	if (bcmp(&pcb->src, NG_HCI_BDADDR_ANY, sizeof(bdaddr_t)) == 0) {
 		error = EDESTADDRREQ;
 		goto out;
 	}
 #endif
 	solisten_proto(so, backlog);
 out:
 	mtx_unlock(&pcb->pcb_mtx);
 	SOCK_UNLOCK(so);
 
 	return (error);
 } /* ng_btsocket_listen */
 
 /*
  * Get peer address
  */
 
 int
 ng_btsocket_sco_peeraddr(struct socket *so, struct sockaddr **nam)
 {
 	ng_btsocket_sco_pcb_p	pcb = so2sco_pcb(so);
 	struct sockaddr_sco	sa;
 
 	if (pcb == NULL)
 		return (EINVAL);
 	if (ng_btsocket_sco_node == NULL) 
 		return (EINVAL);
 
 	mtx_lock(&pcb->pcb_mtx);
 	bcopy(&pcb->dst, &sa.sco_bdaddr, sizeof(sa.sco_bdaddr));
 	mtx_unlock(&pcb->pcb_mtx);
 
 	sa.sco_len = sizeof(sa);
 	sa.sco_family = AF_BLUETOOTH;
 
 	*nam = sodupsockaddr((struct sockaddr *) &sa, M_NOWAIT);
 
 	return ((*nam == NULL)? ENOMEM : 0);
 } /* ng_btsocket_sco_peeraddr */
 
 /*
  * Send data to socket
  */
 
 int
 ng_btsocket_sco_send(struct socket *so, int flags, struct mbuf *m,
 		struct sockaddr *nam, struct mbuf *control, struct thread *td)
 {
 	ng_btsocket_sco_pcb_t	*pcb = so2sco_pcb(so);
 	int			 error = 0;
                         
 	if (ng_btsocket_sco_node == NULL) {
 		error = ENETDOWN;
 		goto drop;
 	}
 
 	/* Check socket and input */
 	if (pcb == NULL || m == NULL || control != NULL) {
 		error = EINVAL;
 		goto drop;
 	}
                  
 	mtx_lock(&pcb->pcb_mtx);
                   
 	/* Make sure socket is connected */
 	if (pcb->state != NG_BTSOCKET_SCO_OPEN) {
 		mtx_unlock(&pcb->pcb_mtx); 
 		error = ENOTCONN;
 		goto drop;
 	}
 
 	/* Check route */
 	if (pcb->rt == NULL ||
 	    pcb->rt->hook == NULL || NG_HOOK_NOT_VALID(pcb->rt->hook)) {
 		mtx_unlock(&pcb->pcb_mtx);
 		error = ENETDOWN;
 		goto drop;
 	}
 
 	/* Check packet size */
 	if (m->m_pkthdr.len > pcb->rt->pkt_size) {
 		NG_BTSOCKET_SCO_ERR(
 "%s: Packet too big, len=%d, pkt_size=%d\n",
 			__func__, m->m_pkthdr.len, pcb->rt->pkt_size);
 
 		mtx_unlock(&pcb->pcb_mtx);
 		error = EMSGSIZE;
 		goto drop;
 	}
 
 	/*
 	 * First put packet on socket send queue. Then check if we have
 	 * pending timeout. If we do not have timeout then we must send
 	 * packet and schedule timeout. Otherwise do nothing and wait for
 	 * NGM_HCI_SYNC_CON_QUEUE message.
 	 */
 
 	sbappendrecord(&pcb->so->so_snd, m);
 	m = NULL;
 
 	if (!(pcb->flags & NG_BTSOCKET_SCO_TIMO)) {
 		error = ng_btsocket_sco_send2(pcb);
 		if (error == 0)
 			ng_btsocket_sco_timeout(pcb);
 		else
 			sbdroprecord(&pcb->so->so_snd); /* XXX */
 	}
 
 	mtx_unlock(&pcb->pcb_mtx);
 drop:
 	NG_FREE_M(m); /* checks for != NULL */
 	NG_FREE_M(control);
 
 	return (error);
 } /* ng_btsocket_sco_send */
 
 /*
  * Send first packet in the socket queue to the SCO layer
  */
 
 static int
 ng_btsocket_sco_send2(ng_btsocket_sco_pcb_p pcb)
 {
 	struct  mbuf		*m = NULL;
 	ng_hci_scodata_pkt_t	*hdr = NULL;
 	int			 error = 0;
 
 	mtx_assert(&pcb->pcb_mtx, MA_OWNED);
 
 	while (pcb->rt->pending < pcb->rt->num_pkts &&
 	       sbavail(&pcb->so->so_snd) > 0) {
 		/* Get a copy of the first packet on send queue */
 		m = m_dup(pcb->so->so_snd.sb_mb, M_NOWAIT);
 		if (m == NULL) {
 			error = ENOBUFS;
 			break;
 		}
 
 		/* Create SCO packet header */
 		M_PREPEND(m, sizeof(*hdr), M_NOWAIT);
 		if (m != NULL)
 			if (m->m_len < sizeof(*hdr))
 				m = m_pullup(m, sizeof(*hdr));
 
 		if (m == NULL) {
 			error = ENOBUFS;
 			break;
 		}
 
 		/* Fill in the header */
 		hdr = mtod(m, ng_hci_scodata_pkt_t *);
 		hdr->type = NG_HCI_SCO_DATA_PKT;
 		hdr->con_handle = htole16(NG_HCI_MK_CON_HANDLE(pcb->con_handle, 0, 0));
 		hdr->length = m->m_pkthdr.len - sizeof(*hdr);
 
 		/* Send packet */
 		NG_SEND_DATA_ONLY(error, pcb->rt->hook, m);
 		if (error != 0)
 			break;
 
 		pcb->rt->pending ++;
 	}
 
 	return ((pcb->rt->pending > 0)? 0 : error);
 } /* ng_btsocket_sco_send2 */
 
 /*
  * Get socket address
  */
 
 int
 ng_btsocket_sco_sockaddr(struct socket *so, struct sockaddr **nam)
 {
 	ng_btsocket_sco_pcb_p	pcb = so2sco_pcb(so);
 	struct sockaddr_sco	sa;
 
 	if (pcb == NULL)
 		return (EINVAL);
 	if (ng_btsocket_sco_node == NULL) 
 		return (EINVAL);
 
 	mtx_lock(&pcb->pcb_mtx);
 	bcopy(&pcb->src, &sa.sco_bdaddr, sizeof(sa.sco_bdaddr));
 	mtx_unlock(&pcb->pcb_mtx);
 
 	sa.sco_len = sizeof(sa);
 	sa.sco_family = AF_BLUETOOTH;
 
 	*nam = sodupsockaddr((struct sockaddr *) &sa, M_NOWAIT);
 
 	return ((*nam == NULL)? ENOMEM : 0);
 } /* ng_btsocket_sco_sockaddr */
 
 /*****************************************************************************
  *****************************************************************************
  **                              Misc. functions
  *****************************************************************************
  *****************************************************************************/
 
 /*
  * Look for the socket that listens on given bdaddr.
  * Returns exact or close match (if any).
  * Caller must hold ng_btsocket_sco_sockets_mtx.
  * Returns with locked pcb.
  */
 
 static ng_btsocket_sco_pcb_p
 ng_btsocket_sco_pcb_by_addr(bdaddr_p bdaddr)
 {
 	ng_btsocket_sco_pcb_p	p = NULL, p1 = NULL;
 
 	mtx_assert(&ng_btsocket_sco_sockets_mtx, MA_OWNED);
 
 	LIST_FOREACH(p, &ng_btsocket_sco_sockets, next) {
 		mtx_lock(&p->pcb_mtx);
 
 		if (p->so == NULL || !(p->so->so_options & SO_ACCEPTCONN)) {
 			mtx_unlock(&p->pcb_mtx);
 			continue;
 		}
 
 		if (bcmp(&p->src, bdaddr, sizeof(p->src)) == 0)
 			return (p); /* return with locked pcb */
 
 		if (bcmp(&p->src, NG_HCI_BDADDR_ANY, sizeof(p->src)) == 0)
 			p1 = p;
 
 		mtx_unlock(&p->pcb_mtx);
 	}
 
 	if (p1 != NULL)
 		mtx_lock(&p1->pcb_mtx);
 
 	return (p1);
 } /* ng_btsocket_sco_pcb_by_addr */
 
 /*
  * Look for the socket that assigned to given source address and handle.
  * Caller must hold ng_btsocket_sco_sockets_mtx.
  * Returns with locked pcb.
  */
 
 static ng_btsocket_sco_pcb_p
 ng_btsocket_sco_pcb_by_handle(bdaddr_p src, int con_handle)
 {
 	ng_btsocket_sco_pcb_p	p = NULL;
 
 	mtx_assert(&ng_btsocket_sco_sockets_mtx, MA_OWNED);
 
 	LIST_FOREACH(p, &ng_btsocket_sco_sockets, next) {
 		mtx_lock(&p->pcb_mtx);
 
 		if (p->con_handle == con_handle &&
 		    bcmp(src, &p->src, sizeof(p->src)) == 0)
 			return (p); /* return with locked pcb */
 
 		mtx_unlock(&p->pcb_mtx);
 	}
 
 	return (NULL);
 } /* ng_btsocket_sco_pcb_by_handle */
 
 /*
  * Look for the socket in CONNECTING state with given source and destination
  * addresses. Caller must hold ng_btsocket_sco_sockets_mtx.
  * Returns with locked pcb.
  */
 
 static ng_btsocket_sco_pcb_p
 ng_btsocket_sco_pcb_by_addrs(bdaddr_p src, bdaddr_p dst)
 {
 	ng_btsocket_sco_pcb_p	p = NULL;
 
 	mtx_assert(&ng_btsocket_sco_sockets_mtx, MA_OWNED);
 
 	LIST_FOREACH(p, &ng_btsocket_sco_sockets, next) {
 		mtx_lock(&p->pcb_mtx);
 
 		if (p->state == NG_BTSOCKET_SCO_CONNECTING &&
 		    bcmp(src, &p->src, sizeof(p->src)) == 0 &&
 		    bcmp(dst, &p->dst, sizeof(p->dst)) == 0)
 			return (p); /* return with locked pcb */
 
 		mtx_unlock(&p->pcb_mtx);
 	}
 
 	return (NULL);
 } /* ng_btsocket_sco_pcb_by_addrs */
 
 /*
  * Set timeout on socket
  */
 
 static void
 ng_btsocket_sco_timeout(ng_btsocket_sco_pcb_p pcb)
 {
 	mtx_assert(&pcb->pcb_mtx, MA_OWNED);
 
 	if (!(pcb->flags & NG_BTSOCKET_SCO_TIMO)) {
 		pcb->flags |= NG_BTSOCKET_SCO_TIMO;
 		callout_reset(&pcb->timo, bluetooth_sco_rtx_timeout(),
 					ng_btsocket_sco_process_timeout, pcb);
 	} else
 		KASSERT(0,
 ("%s: Duplicated socket timeout?!\n", __func__));
 } /* ng_btsocket_sco_timeout */
 
 /*
  * Unset timeout on socket
  */
 
 static void
 ng_btsocket_sco_untimeout(ng_btsocket_sco_pcb_p pcb)
 {
 	mtx_assert(&pcb->pcb_mtx, MA_OWNED);
 
 	if (pcb->flags & NG_BTSOCKET_SCO_TIMO) {
 		callout_stop(&pcb->timo);
 		pcb->flags &= ~NG_BTSOCKET_SCO_TIMO;
 	} else
 		KASSERT(0,
 ("%s: No socket timeout?!\n", __func__));
 } /* ng_btsocket_sco_untimeout */
 
 /*
  * Process timeout on socket
  */
 
 static void
 ng_btsocket_sco_process_timeout(void *xpcb)
 {
 	ng_btsocket_sco_pcb_p	 pcb = (ng_btsocket_sco_pcb_p) xpcb;
 
 	mtx_lock(&pcb->pcb_mtx);
 
 	pcb->flags &= ~NG_BTSOCKET_SCO_TIMO;
 	pcb->so->so_error = ETIMEDOUT;
 
 	switch (pcb->state) {
 	case NG_BTSOCKET_SCO_CONNECTING:
 		/* Connect timeout - close the socket */
 		pcb->state = NG_BTSOCKET_SCO_CLOSED;
 		soisdisconnected(pcb->so);
 		break;
 
 	case NG_BTSOCKET_SCO_OPEN:
 		/* Send timeout - did not get NGM_HCI_SYNC_CON_QUEUE */
 		sbdroprecord(&pcb->so->so_snd);
 		sowwakeup(pcb->so);
 		/* XXX FIXME what to do with pcb->rt->pending??? */
 		break;
 
 	case NG_BTSOCKET_SCO_DISCONNECTING:
 		/* Disconnect timeout - disconnect the socket anyway */
 		pcb->state = NG_BTSOCKET_SCO_CLOSED;
 		soisdisconnected(pcb->so);
 		break;
 
 	default:
 		NG_BTSOCKET_SCO_ERR(
 "%s: Invalid socket state=%d\n", __func__, pcb->state);
 		break;
 	}
 
 	mtx_unlock(&pcb->pcb_mtx);
 } /* ng_btsocket_sco_process_timeout */
 
Index: head/sys/netgraph/ng_ksocket.c
===================================================================
--- head/sys/netgraph/ng_ksocket.c	(revision 319721)
+++ head/sys/netgraph/ng_ksocket.c	(revision 319722)
@@ -1,1316 +1,1273 @@
 /*
  * ng_ksocket.c
  */
 
 /*-
  * Copyright (c) 1996-1999 Whistle Communications, Inc.
  * All rights reserved.
  * 
  * Subject to the following obligations and disclaimer of warranty, use and
  * redistribution of this software, in source or object code forms, with or
  * without modifications are expressly permitted by Whistle Communications;
  * provided, however, that:
  * 1. Any and all reproductions of the source or object code must include the
  *    copyright notice above and the following disclaimer of warranties; and
  * 2. No rights are granted, in any manner or form, to use Whistle
  *    Communications, Inc. trademarks, including the mark "WHISTLE
  *    COMMUNICATIONS" on advertising, endorsements, or otherwise except as
  *    such appears in the above copyright notice or in the software.
  * 
  * THIS SOFTWARE IS BEING PROVIDED BY WHISTLE COMMUNICATIONS "AS IS", AND
  * TO THE MAXIMUM EXTENT PERMITTED BY LAW, WHISTLE COMMUNICATIONS MAKES NO
  * REPRESENTATIONS OR WARRANTIES, EXPRESS OR IMPLIED, REGARDING THIS SOFTWARE,
  * INCLUDING WITHOUT LIMITATION, ANY AND ALL IMPLIED WARRANTIES OF
  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, OR NON-INFRINGEMENT.
  * WHISTLE COMMUNICATIONS DOES NOT WARRANT, GUARANTEE, OR MAKE ANY
  * REPRESENTATIONS REGARDING THE USE OF, OR THE RESULTS OF THE USE OF THIS
  * SOFTWARE IN TERMS OF ITS CORRECTNESS, ACCURACY, RELIABILITY OR OTHERWISE.
  * IN NO EVENT SHALL WHISTLE COMMUNICATIONS BE LIABLE FOR ANY DAMAGES
  * RESULTING FROM OR ARISING OUT OF ANY USE OF THIS SOFTWARE, INCLUDING
  * WITHOUT LIMITATION, ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY,
  * PUNITIVE, OR CONSEQUENTIAL DAMAGES, PROCUREMENT OF SUBSTITUTE GOODS OR
  * SERVICES, LOSS OF USE, DATA OR PROFITS, HOWEVER CAUSED AND UNDER ANY
  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
  * THIS SOFTWARE, EVEN IF WHISTLE COMMUNICATIONS IS ADVISED OF THE POSSIBILITY
  * OF SUCH DAMAGE.
  *
  * Author: Archie Cobbs <archie@freebsd.org>
  *
  * $FreeBSD$
  * $Whistle: ng_ksocket.c,v 1.1 1999/11/16 20:04:40 archie Exp $
  */
 
 /*
  * Kernel socket node type.  This node type is basically a kernel-mode
  * version of a socket... kindof like the reverse of the socket node type.
  */
 
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/kernel.h>
 #include <sys/mbuf.h>
 #include <sys/proc.h>
 #include <sys/malloc.h>
 #include <sys/ctype.h>
 #include <sys/protosw.h>
 #include <sys/errno.h>
 #include <sys/socket.h>
 #include <sys/socketvar.h>
 #include <sys/uio.h>
 #include <sys/un.h>
 
 #include <netgraph/ng_message.h>
 #include <netgraph/netgraph.h>
 #include <netgraph/ng_parse.h>
 #include <netgraph/ng_ksocket.h>
 
 #include <netinet/in.h>
 #include <netinet/ip.h>
 
 #ifdef NG_SEPARATE_MALLOC
 static MALLOC_DEFINE(M_NETGRAPH_KSOCKET, "netgraph_ksock",
     "netgraph ksock node");
 #else
 #define M_NETGRAPH_KSOCKET M_NETGRAPH
 #endif
 
 #define OFFSETOF(s, e) ((char *)&((s *)0)->e - (char *)((s *)0))
 #define SADATA_OFFSET	(OFFSETOF(struct sockaddr, sa_data))
 
 /* Node private data */
 struct ng_ksocket_private {
 	node_p		node;
 	hook_p		hook;
 	struct socket	*so;
 	int		fn_sent;	/* FN call on incoming event was sent */
 	LIST_HEAD(, ng_ksocket_private)	embryos;
 	LIST_ENTRY(ng_ksocket_private)	siblings;
 	u_int32_t	flags;
 	u_int32_t	response_token;
 	ng_ID_t		response_addr;
 };
 typedef struct ng_ksocket_private *priv_p;
 
 /* Flags for priv_p */
 #define	KSF_CONNECTING	0x00000001	/* Waiting for connection complete */
 #define	KSF_ACCEPTING	0x00000002	/* Waiting for accept complete */
 #define	KSF_EOFSEEN	0x00000004	/* Have sent 0-length EOF mbuf */
 #define	KSF_CLONED	0x00000008	/* Cloned from an accepting socket */
 #define	KSF_EMBRYONIC	0x00000010	/* Cloned node with no hooks yet */
 
 /* Netgraph node methods */
 static ng_constructor_t	ng_ksocket_constructor;
 static ng_rcvmsg_t	ng_ksocket_rcvmsg;
 static ng_shutdown_t	ng_ksocket_shutdown;
 static ng_newhook_t	ng_ksocket_newhook;
 static ng_rcvdata_t	ng_ksocket_rcvdata;
 static ng_connect_t	ng_ksocket_connect;
 static ng_disconnect_t	ng_ksocket_disconnect;
 
 /* Alias structure */
 struct ng_ksocket_alias {
 	const char	*name;
 	const int	value;
 	const int	family;
 };
 
 /* Protocol family aliases */
 static const struct ng_ksocket_alias ng_ksocket_families[] = {
 	{ "local",	PF_LOCAL	},
 	{ "inet",	PF_INET		},
 	{ "inet6",	PF_INET6	},
 	{ "atm",	PF_ATM		},
 	{ NULL,		-1		},
 };
 
 /* Socket type aliases */
 static const struct ng_ksocket_alias ng_ksocket_types[] = {
 	{ "stream",	SOCK_STREAM	},
 	{ "dgram",	SOCK_DGRAM	},
 	{ "raw",	SOCK_RAW	},
 	{ "rdm",	SOCK_RDM	},
 	{ "seqpacket",	SOCK_SEQPACKET	},
 	{ NULL,		-1		},
 };
 
 /* Protocol aliases */
 static const struct ng_ksocket_alias ng_ksocket_protos[] = {
 	{ "ip",		IPPROTO_IP,		PF_INET		},
 	{ "raw",	IPPROTO_RAW,		PF_INET		},
 	{ "icmp",	IPPROTO_ICMP,		PF_INET		},
 	{ "igmp",	IPPROTO_IGMP,		PF_INET		},
 	{ "tcp",	IPPROTO_TCP,		PF_INET		},
 	{ "udp",	IPPROTO_UDP,		PF_INET		},
 	{ "gre",	IPPROTO_GRE,		PF_INET		},
 	{ "esp",	IPPROTO_ESP,		PF_INET		},
 	{ "ah",		IPPROTO_AH,		PF_INET		},
 	{ "swipe",	IPPROTO_SWIPE,		PF_INET		},
 	{ "encap",	IPPROTO_ENCAP,		PF_INET		},
 	{ "divert",	IPPROTO_DIVERT,		PF_INET		},
 	{ "pim",	IPPROTO_PIM,		PF_INET		},
 	{ NULL,		-1					},
 };
 
 /* Helper functions */
-static int	ng_ksocket_check_accept(priv_p);
-static void	ng_ksocket_finish_accept(priv_p);
+static int	ng_ksocket_accept(priv_p);
 static int	ng_ksocket_incoming(struct socket *so, void *arg, int waitflag);
 static int	ng_ksocket_parse(const struct ng_ksocket_alias *aliases,
 			const char *s, int family);
 static void	ng_ksocket_incoming2(node_p node, hook_p hook,
 			void *arg1, int arg2);
 
 /************************************************************************
 			STRUCT SOCKADDR PARSE TYPE
  ************************************************************************/
 
 /* Get the length of the data portion of a generic struct sockaddr */
 static int
 ng_parse_generic_sockdata_getLength(const struct ng_parse_type *type,
 	const u_char *start, const u_char *buf)
 {
 	const struct sockaddr *sa;
 
 	sa = (const struct sockaddr *)(buf - SADATA_OFFSET);
 	return (sa->sa_len < SADATA_OFFSET) ? 0 : sa->sa_len - SADATA_OFFSET;
 }
 
 /* Type for the variable length data portion of a generic struct sockaddr */
 static const struct ng_parse_type ng_ksocket_generic_sockdata_type = {
 	&ng_parse_bytearray_type,
 	&ng_parse_generic_sockdata_getLength
 };
 
 /* Type for a generic struct sockaddr */
 static const struct ng_parse_struct_field
     ng_parse_generic_sockaddr_type_fields[] = {
 	  { "len",	&ng_parse_uint8_type			},
 	  { "family",	&ng_parse_uint8_type			},
 	  { "data",	&ng_ksocket_generic_sockdata_type	},
 	  { NULL }
 };
 static const struct ng_parse_type ng_ksocket_generic_sockaddr_type = {
 	&ng_parse_struct_type,
 	&ng_parse_generic_sockaddr_type_fields
 };
 
 /* Convert a struct sockaddr from ASCII to binary.  If its a protocol
    family that we specially handle, do that, otherwise defer to the
    generic parse type ng_ksocket_generic_sockaddr_type. */
 static int
 ng_ksocket_sockaddr_parse(const struct ng_parse_type *type,
 	const char *s, int *off, const u_char *const start,
 	u_char *const buf, int *buflen)
 {
 	struct sockaddr *const sa = (struct sockaddr *)buf;
 	enum ng_parse_token tok;
 	char fambuf[32];
 	int family, len;
 	char *t;
 
 	/* If next token is a left curly brace, use generic parse type */
 	if ((tok = ng_parse_get_token(s, off, &len)) == T_LBRACE) {
 		return (*ng_ksocket_generic_sockaddr_type.supertype->parse)
 		    (&ng_ksocket_generic_sockaddr_type,
 		    s, off, start, buf, buflen);
 	}
 
 	/* Get socket address family followed by a slash */
 	while (isspace(s[*off]))
 		(*off)++;
 	if ((t = strchr(s + *off, '/')) == NULL)
 		return (EINVAL);
 	if ((len = t - (s + *off)) > sizeof(fambuf) - 1)
 		return (EINVAL);
 	strncpy(fambuf, s + *off, len);
 	fambuf[len] = '\0';
 	*off += len + 1;
 	if ((family = ng_ksocket_parse(ng_ksocket_families, fambuf, 0)) == -1)
 		return (EINVAL);
 
 	/* Set family */
 	if (*buflen < SADATA_OFFSET)
 		return (ERANGE);
 	sa->sa_family = family;
 
 	/* Set family-specific data and length */
 	switch (sa->sa_family) {
 	case PF_LOCAL:		/* Get pathname */
 	    {
 		const int pathoff = OFFSETOF(struct sockaddr_un, sun_path);
 		struct sockaddr_un *const sun = (struct sockaddr_un *)sa;
 		int toklen, pathlen;
 		char *path;
 
 		if ((path = ng_get_string_token(s, off, &toklen, NULL)) == NULL)
 			return (EINVAL);
 		pathlen = strlen(path);
 		if (pathlen > SOCK_MAXADDRLEN) {
 			free(path, M_NETGRAPH_KSOCKET);
 			return (E2BIG);
 		}
 		if (*buflen < pathoff + pathlen) {
 			free(path, M_NETGRAPH_KSOCKET);
 			return (ERANGE);
 		}
 		*off += toklen;
 		bcopy(path, sun->sun_path, pathlen);
 		sun->sun_len = pathoff + pathlen;
 		free(path, M_NETGRAPH_KSOCKET);
 		break;
 	    }
 
 	case PF_INET:		/* Get an IP address with optional port */
 	    {
 		struct sockaddr_in *const sin = (struct sockaddr_in *)sa;
 		int i;
 
 		/* Parse this: <ipaddress>[:port] */
 		for (i = 0; i < 4; i++) {
 			u_long val;
 			char *eptr;
 
 			val = strtoul(s + *off, &eptr, 10);
 			if (val > 0xff || eptr == s + *off)
 				return (EINVAL);
 			*off += (eptr - (s + *off));
 			((u_char *)&sin->sin_addr)[i] = (u_char)val;
 			if (i < 3) {
 				if (s[*off] != '.')
 					return (EINVAL);
 				(*off)++;
 			} else if (s[*off] == ':') {
 				(*off)++;
 				val = strtoul(s + *off, &eptr, 10);
 				if (val > 0xffff || eptr == s + *off)
 					return (EINVAL);
 				*off += (eptr - (s + *off));
 				sin->sin_port = htons(val);
 			} else
 				sin->sin_port = 0;
 		}
 		bzero(&sin->sin_zero, sizeof(sin->sin_zero));
 		sin->sin_len = sizeof(*sin);
 		break;
 	    }
 
 #if 0
 	case PF_INET6:	/* XXX implement this someday */
 #endif
 
 	default:
 		return (EINVAL);
 	}
 
 	/* Done */
 	*buflen = sa->sa_len;
 	return (0);
 }
 
 /* Convert a struct sockaddr from binary to ASCII */
 static int
 ng_ksocket_sockaddr_unparse(const struct ng_parse_type *type,
 	const u_char *data, int *off, char *cbuf, int cbuflen)
 {
 	const struct sockaddr *sa = (const struct sockaddr *)(data + *off);
 	int slen = 0;
 
 	/* Output socket address, either in special or generic format */
 	switch (sa->sa_family) {
 	case PF_LOCAL:
 	    {
 		const int pathoff = OFFSETOF(struct sockaddr_un, sun_path);
 		const struct sockaddr_un *sun = (const struct sockaddr_un *)sa;
 		const int pathlen = sun->sun_len - pathoff;
 		char pathbuf[SOCK_MAXADDRLEN + 1];
 		char *pathtoken;
 
 		bcopy(sun->sun_path, pathbuf, pathlen);
 		if ((pathtoken = ng_encode_string(pathbuf, pathlen)) == NULL)
 			return (ENOMEM);
 		slen += snprintf(cbuf, cbuflen, "local/%s", pathtoken);
 		free(pathtoken, M_NETGRAPH_KSOCKET);
 		if (slen >= cbuflen)
 			return (ERANGE);
 		*off += sun->sun_len;
 		return (0);
 	    }
 
 	case PF_INET:
 	    {
 		const struct sockaddr_in *sin = (const struct sockaddr_in *)sa;
 
 		slen += snprintf(cbuf, cbuflen, "inet/%d.%d.%d.%d",
 		  ((const u_char *)&sin->sin_addr)[0],
 		  ((const u_char *)&sin->sin_addr)[1],
 		  ((const u_char *)&sin->sin_addr)[2],
 		  ((const u_char *)&sin->sin_addr)[3]);
 		if (sin->sin_port != 0) {
 			slen += snprintf(cbuf + strlen(cbuf),
 			    cbuflen - strlen(cbuf), ":%d",
 			    (u_int)ntohs(sin->sin_port));
 		}
 		if (slen >= cbuflen)
 			return (ERANGE);
 		*off += sizeof(*sin);
 		return(0);
 	    }
 
 #if 0
 	case PF_INET6:	/* XXX implement this someday */
 #endif
 
 	default:
 		return (*ng_ksocket_generic_sockaddr_type.supertype->unparse)
 		    (&ng_ksocket_generic_sockaddr_type,
 		    data, off, cbuf, cbuflen);
 	}
 }
 
 /* Parse type for struct sockaddr */
 static const struct ng_parse_type ng_ksocket_sockaddr_type = {
 	NULL,
 	NULL,
 	NULL,
 	&ng_ksocket_sockaddr_parse,
 	&ng_ksocket_sockaddr_unparse,
 	NULL		/* no such thing as a default struct sockaddr */
 };
 
 /************************************************************************
 		STRUCT NG_KSOCKET_SOCKOPT PARSE TYPE
  ************************************************************************/
 
 /* Get length of the struct ng_ksocket_sockopt value field, which is the
    just the excess of the message argument portion over the length of
    the struct ng_ksocket_sockopt. */
 static int
 ng_parse_sockoptval_getLength(const struct ng_parse_type *type,
 	const u_char *start, const u_char *buf)
 {
 	static const int offset = OFFSETOF(struct ng_ksocket_sockopt, value);
 	const struct ng_ksocket_sockopt *sopt;
 	const struct ng_mesg *msg;
 
 	sopt = (const struct ng_ksocket_sockopt *)(buf - offset);
 	msg = (const struct ng_mesg *)((const u_char *)sopt - sizeof(*msg));
 	return msg->header.arglen - sizeof(*sopt);
 }
 
 /* Parse type for the option value part of a struct ng_ksocket_sockopt
    XXX Eventually, we should handle the different socket options specially.
    XXX This would avoid byte order problems, eg an integer value of 1 is
    XXX going to be "[1]" for little endian or "[3=1]" for big endian. */
 static const struct ng_parse_type ng_ksocket_sockoptval_type = {
 	&ng_parse_bytearray_type,
 	&ng_parse_sockoptval_getLength
 };
 
 /* Parse type for struct ng_ksocket_sockopt */
 static const struct ng_parse_struct_field ng_ksocket_sockopt_type_fields[]
 	= NG_KSOCKET_SOCKOPT_INFO(&ng_ksocket_sockoptval_type);
 static const struct ng_parse_type ng_ksocket_sockopt_type = {
 	&ng_parse_struct_type,
 	&ng_ksocket_sockopt_type_fields
 };
 
 /* Parse type for struct ng_ksocket_accept */
 static const struct ng_parse_struct_field ng_ksocket_accept_type_fields[]
 	= NGM_KSOCKET_ACCEPT_INFO;
 static const struct ng_parse_type ng_ksocket_accept_type = {
 	&ng_parse_struct_type,
 	&ng_ksocket_accept_type_fields
 };
 
 /* List of commands and how to convert arguments to/from ASCII */
 static const struct ng_cmdlist ng_ksocket_cmds[] = {
 	{
 	  NGM_KSOCKET_COOKIE,
 	  NGM_KSOCKET_BIND,
 	  "bind",
 	  &ng_ksocket_sockaddr_type,
 	  NULL
 	},
 	{
 	  NGM_KSOCKET_COOKIE,
 	  NGM_KSOCKET_LISTEN,
 	  "listen",
 	  &ng_parse_int32_type,
 	  NULL
 	},
 	{
 	  NGM_KSOCKET_COOKIE,
 	  NGM_KSOCKET_ACCEPT,
 	  "accept",
 	  NULL,
 	  &ng_ksocket_accept_type
 	},
 	{
 	  NGM_KSOCKET_COOKIE,
 	  NGM_KSOCKET_CONNECT,
 	  "connect",
 	  &ng_ksocket_sockaddr_type,
 	  &ng_parse_int32_type
 	},
 	{
 	  NGM_KSOCKET_COOKIE,
 	  NGM_KSOCKET_GETNAME,
 	  "getname",
 	  NULL,
 	  &ng_ksocket_sockaddr_type
 	},
 	{
 	  NGM_KSOCKET_COOKIE,
 	  NGM_KSOCKET_GETPEERNAME,
 	  "getpeername",
 	  NULL,
 	  &ng_ksocket_sockaddr_type
 	},
 	{
 	  NGM_KSOCKET_COOKIE,
 	  NGM_KSOCKET_SETOPT,
 	  "setopt",
 	  &ng_ksocket_sockopt_type,
 	  NULL
 	},
 	{
 	  NGM_KSOCKET_COOKIE,
 	  NGM_KSOCKET_GETOPT,
 	  "getopt",
 	  &ng_ksocket_sockopt_type,
 	  &ng_ksocket_sockopt_type
 	},
 	{ 0 }
 };
 
 /* Node type descriptor */
 static struct ng_type ng_ksocket_typestruct = {
 	.version =	NG_ABI_VERSION,
 	.name =		NG_KSOCKET_NODE_TYPE,
 	.constructor =	ng_ksocket_constructor,
 	.rcvmsg =	ng_ksocket_rcvmsg,
 	.shutdown =	ng_ksocket_shutdown,
 	.newhook =	ng_ksocket_newhook,
 	.connect =	ng_ksocket_connect,
 	.rcvdata =	ng_ksocket_rcvdata,
 	.disconnect =	ng_ksocket_disconnect,
 	.cmdlist =	ng_ksocket_cmds,
 };
 NETGRAPH_INIT(ksocket, &ng_ksocket_typestruct);
 
 #define ERROUT(x)	do { error = (x); goto done; } while (0)
 
 /************************************************************************
 			NETGRAPH NODE STUFF
  ************************************************************************/
 
 /*
  * Node type constructor
  * The NODE part is assumed to be all set up.
  * There is already a reference to the node for us.
  */
 static int
 ng_ksocket_constructor(node_p node)
 {
 	priv_p priv;
 
 	/* Allocate private structure */
 	priv = malloc(sizeof(*priv), M_NETGRAPH_KSOCKET, M_NOWAIT | M_ZERO);
 	if (priv == NULL)
 		return (ENOMEM);
 
 	LIST_INIT(&priv->embryos);
 	/* cross link them */
 	priv->node = node;
 	NG_NODE_SET_PRIVATE(node, priv);
 
 	/* Done */
 	return (0);
 }
 
 /*
  * Give our OK for a hook to be added. The hook name is of the
  * form "<family>/<type>/<proto>" where the three components may
  * be decimal numbers or else aliases from the above lists.
  *
  * Connecting a hook amounts to opening the socket.  Disconnecting
  * the hook closes the socket and destroys the node as well.
  */
 static int
 ng_ksocket_newhook(node_p node, hook_p hook, const char *name0)
 {
 	struct thread *td = curthread;	/* XXX broken */
 	const priv_p priv = NG_NODE_PRIVATE(node);
 	char *s1, *s2, name[NG_HOOKSIZ];
 	int family, type, protocol, error;
 
 	/* Check if we're already connected */
 	if (priv->hook != NULL)
 		return (EISCONN);
 
 	if (priv->flags & KSF_CLONED) {
 		if (priv->flags & KSF_EMBRYONIC) {
 			/* Remove ourselves from our parent's embryo list */
 			LIST_REMOVE(priv, siblings);
 			priv->flags &= ~KSF_EMBRYONIC;
 		}
 	} else {
 		/* Extract family, type, and protocol from hook name */
 		snprintf(name, sizeof(name), "%s", name0);
 		s1 = name;
 		if ((s2 = strchr(s1, '/')) == NULL)
 			return (EINVAL);
 		*s2++ = '\0';
 		family = ng_ksocket_parse(ng_ksocket_families, s1, 0);
 		if (family == -1)
 			return (EINVAL);
 		s1 = s2;
 		if ((s2 = strchr(s1, '/')) == NULL)
 			return (EINVAL);
 		*s2++ = '\0';
 		type = ng_ksocket_parse(ng_ksocket_types, s1, 0);
 		if (type == -1)
 			return (EINVAL);
 		s1 = s2;
 		protocol = ng_ksocket_parse(ng_ksocket_protos, s1, family);
 		if (protocol == -1)
 			return (EINVAL);
 
 		/* Create the socket */
 		error = socreate(family, &priv->so, type, protocol,
 		   td->td_ucred, td);
 		if (error != 0)
 			return (error);
 
 		/* XXX call soreserve() ? */
 
 	}
 
 	/* OK */
 	priv->hook = hook;
 
 	/*
 	 * In case of misconfigured routing a packet may reenter
 	 * ksocket node recursively. Decouple stack to avoid possible
 	 * panics about sleeping with locks held.
 	 */
 	NG_HOOK_FORCE_QUEUE(hook);
 
 	return(0);
 }
 
 static int
 ng_ksocket_connect(hook_p hook)
 {
 	node_p node = NG_HOOK_NODE(hook);
 	const priv_p priv = NG_NODE_PRIVATE(node);
 	struct socket *const so = priv->so;
 
 	/* Add our hook for incoming data and other events */
 	SOCKBUF_LOCK(&priv->so->so_rcv);
 	soupcall_set(priv->so, SO_RCV, ng_ksocket_incoming, node);
 	SOCKBUF_UNLOCK(&priv->so->so_rcv);
 	SOCKBUF_LOCK(&priv->so->so_snd);
 	soupcall_set(priv->so, SO_SND, ng_ksocket_incoming, node);
 	SOCKBUF_UNLOCK(&priv->so->so_snd);
 	SOCK_LOCK(priv->so);
 	priv->so->so_state |= SS_NBIO;
 	SOCK_UNLOCK(priv->so);
 	/*
 	 * --Original comment--
 	 * On a cloned socket we may have already received one or more
 	 * upcalls which we couldn't handle without a hook.  Handle
 	 * those now.
 	 * We cannot call the upcall function directly
 	 * from here, because until this function has returned our
 	 * hook isn't connected.
 	 *
 	 * ---meta comment for -current ---
 	 * XXX This is dubius.
 	 * Upcalls between the time that the hook was
 	 * first created and now (on another processesor) will
 	 * be earlier on the queue than the request to finalise the hook.
 	 * By the time the hook is finalised,
 	 * The queued upcalls will have happened and the code
 	 * will have discarded them because of a lack of a hook.
 	 * (socket not open).
 	 *
 	 * This is a bad byproduct of the complicated way in which hooks
 	 * are now created (3 daisy chained async events).
 	 *
 	 * Since we are a netgraph operation
 	 * We know that we hold a lock on this node. This forces the
 	 * request we make below to be queued rather than implemented
 	 * immediately which will cause the upcall function to be called a bit
 	 * later.
 	 * However, as we will run any waiting queued operations immediately
 	 * after doing this one, if we have not finalised the other end
 	 * of the hook, those queued operations will fail.
 	 */
 	if (priv->flags & KSF_CLONED) {
 		ng_send_fn(node, NULL, &ng_ksocket_incoming2, so, M_NOWAIT);
 	}
 
 	return (0);
 }
 
 /*
  * Receive a control message
  */
 static int
 ng_ksocket_rcvmsg(node_p node, item_p item, hook_p lasthook)
 {
 	struct thread *td = curthread;	/* XXX broken */
 	const priv_p priv = NG_NODE_PRIVATE(node);
 	struct socket *const so = priv->so;
 	struct ng_mesg *resp = NULL;
 	int error = 0;
 	struct ng_mesg *msg;
 	ng_ID_t raddr;
 
 	NGI_GET_MSG(item, msg);
 	switch (msg->header.typecookie) {
 	case NGM_KSOCKET_COOKIE:
 		switch (msg->header.cmd) {
 		case NGM_KSOCKET_BIND:
 		    {
 			struct sockaddr *const sa
 			    = (struct sockaddr *)msg->data;
 
 			/* Sanity check */
 			if (msg->header.arglen < SADATA_OFFSET
 			    || msg->header.arglen < sa->sa_len)
 				ERROUT(EINVAL);
 			if (so == NULL)
 				ERROUT(ENXIO);
 
 			/* Bind */
 			error = sobind(so, sa, td);
 			break;
 		    }
 		case NGM_KSOCKET_LISTEN:
 		    {
 			/* Sanity check */
 			if (msg->header.arglen != sizeof(int32_t))
 				ERROUT(EINVAL);
 			if (so == NULL)
 				ERROUT(ENXIO);
 
 			/* Listen */
+			so->so_state |= SS_NBIO;
 			error = solisten(so, *((int32_t *)msg->data), td);
 			break;
 		    }
 
 		case NGM_KSOCKET_ACCEPT:
 		    {
 			/* Sanity check */
 			if (msg->header.arglen != 0)
 				ERROUT(EINVAL);
 			if (so == NULL)
 				ERROUT(ENXIO);
 
 			/* Make sure the socket is capable of accepting */
 			if (!(so->so_options & SO_ACCEPTCONN))
 				ERROUT(EINVAL);
 			if (priv->flags & KSF_ACCEPTING)
 				ERROUT(EALREADY);
 
-			error = ng_ksocket_check_accept(priv);
-			if (error != 0 && error != EWOULDBLOCK)
-				ERROUT(error);
-
 			/*
 			 * If a connection is already complete, take it.
 			 * Otherwise let the upcall function deal with
 			 * the connection when it comes in.
 			 */
+			error = ng_ksocket_accept(priv);
+			if (error != 0 && error != EWOULDBLOCK)
+				ERROUT(error);
 			priv->response_token = msg->header.token;
 			raddr = priv->response_addr = NGI_RETADDR(item);
-			if (error == 0) {
-				ng_ksocket_finish_accept(priv);
-			} else
-				priv->flags |= KSF_ACCEPTING;
 			break;
 		    }
 
 		case NGM_KSOCKET_CONNECT:
 		    {
 			struct sockaddr *const sa
 			    = (struct sockaddr *)msg->data;
 
 			/* Sanity check */
 			if (msg->header.arglen < SADATA_OFFSET
 			    || msg->header.arglen < sa->sa_len)
 				ERROUT(EINVAL);
 			if (so == NULL)
 				ERROUT(ENXIO);
 
 			/* Do connect */
 			if ((so->so_state & SS_ISCONNECTING) != 0)
 				ERROUT(EALREADY);
 			if ((error = soconnect(so, sa, td)) != 0) {
 				so->so_state &= ~SS_ISCONNECTING;
 				ERROUT(error);
 			}
 			if ((so->so_state & SS_ISCONNECTING) != 0) {
 				/* We will notify the sender when we connect */
 				priv->response_token = msg->header.token;
 				raddr = priv->response_addr = NGI_RETADDR(item);
 				priv->flags |= KSF_CONNECTING;
 				ERROUT(EINPROGRESS);
 			}
 			break;
 		    }
 
 		case NGM_KSOCKET_GETNAME:
 		case NGM_KSOCKET_GETPEERNAME:
 		    {
 			int (*func)(struct socket *so, struct sockaddr **nam);
 			struct sockaddr *sa = NULL;
 			int len;
 
 			/* Sanity check */
 			if (msg->header.arglen != 0)
 				ERROUT(EINVAL);
 			if (so == NULL)
 				ERROUT(ENXIO);
 
 			/* Get function */
 			if (msg->header.cmd == NGM_KSOCKET_GETPEERNAME) {
 				if ((so->so_state
 				    & (SS_ISCONNECTED|SS_ISCONFIRMING)) == 0)
 					ERROUT(ENOTCONN);
 				func = so->so_proto->pr_usrreqs->pru_peeraddr;
 			} else
 				func = so->so_proto->pr_usrreqs->pru_sockaddr;
 
 			/* Get local or peer address */
 			if ((error = (*func)(so, &sa)) != 0)
 				goto bail;
 			len = (sa == NULL) ? 0 : sa->sa_len;
 
 			/* Send it back in a response */
 			NG_MKRESPONSE(resp, msg, len, M_NOWAIT);
 			if (resp == NULL) {
 				error = ENOMEM;
 				goto bail;
 			}
 			bcopy(sa, resp->data, len);
 
 		bail:
 			/* Cleanup */
 			if (sa != NULL)
 				free(sa, M_SONAME);
 			break;
 		    }
 
 		case NGM_KSOCKET_GETOPT:
 		    {
 			struct ng_ksocket_sockopt *ksopt =
 			    (struct ng_ksocket_sockopt *)msg->data;
 			struct sockopt sopt;
 
 			/* Sanity check */
 			if (msg->header.arglen != sizeof(*ksopt))
 				ERROUT(EINVAL);
 			if (so == NULL)
 				ERROUT(ENXIO);
 
 			/* Get response with room for option value */
 			NG_MKRESPONSE(resp, msg, sizeof(*ksopt)
 			    + NG_KSOCKET_MAX_OPTLEN, M_NOWAIT);
 			if (resp == NULL)
 				ERROUT(ENOMEM);
 
 			/* Get socket option, and put value in the response */
 			sopt.sopt_dir = SOPT_GET;
 			sopt.sopt_level = ksopt->level;
 			sopt.sopt_name = ksopt->name;
 			sopt.sopt_td = NULL;
 			sopt.sopt_valsize = NG_KSOCKET_MAX_OPTLEN;
 			ksopt = (struct ng_ksocket_sockopt *)resp->data;
 			sopt.sopt_val = ksopt->value;
 			if ((error = sogetopt(so, &sopt)) != 0) {
 				NG_FREE_MSG(resp);
 				break;
 			}
 
 			/* Set actual value length */
 			resp->header.arglen = sizeof(*ksopt)
 			    + sopt.sopt_valsize;
 			break;
 		    }
 
 		case NGM_KSOCKET_SETOPT:
 		    {
 			struct ng_ksocket_sockopt *const ksopt =
 			    (struct ng_ksocket_sockopt *)msg->data;
 			const int valsize = msg->header.arglen - sizeof(*ksopt);
 			struct sockopt sopt;
 
 			/* Sanity check */
 			if (valsize < 0)
 				ERROUT(EINVAL);
 			if (so == NULL)
 				ERROUT(ENXIO);
 
 			/* Set socket option */
 			sopt.sopt_dir = SOPT_SET;
 			sopt.sopt_level = ksopt->level;
 			sopt.sopt_name = ksopt->name;
 			sopt.sopt_val = ksopt->value;
 			sopt.sopt_valsize = valsize;
 			sopt.sopt_td = NULL;
 			error = sosetopt(so, &sopt);
 			break;
 		    }
 
 		default:
 			error = EINVAL;
 			break;
 		}
 		break;
 	default:
 		error = EINVAL;
 		break;
 	}
 done:
 	NG_RESPOND_MSG(error, node, item, resp);
 	NG_FREE_MSG(msg);
 	return (error);
 }
 
 /*
  * Receive incoming data on our hook.  Send it out the socket.
  */
 static int
 ng_ksocket_rcvdata(hook_p hook, item_p item)
 {
 	struct thread *td = curthread;	/* XXX broken */
 	const node_p node = NG_HOOK_NODE(hook);
 	const priv_p priv = NG_NODE_PRIVATE(node);
 	struct socket *const so = priv->so;
 	struct sockaddr *sa = NULL;
 	int error;
 	struct mbuf *m;
 #ifdef ALIGNED_POINTER
 	struct mbuf *n;
 #endif /* ALIGNED_POINTER */
 	struct sa_tag *stag;
 
 	/* Extract data */
 	NGI_GET_M(item, m);
 	NG_FREE_ITEM(item);
 #ifdef ALIGNED_POINTER
 	if (!ALIGNED_POINTER(mtod(m, caddr_t), uint32_t)) {
 		n = m_defrag(m, M_NOWAIT);
 		if (n == NULL) {
 			m_freem(m);
 			return (ENOBUFS);
 		}
 		m = n;
 	}
 #endif /* ALIGNED_POINTER */
 	/*
 	 * Look if socket address is stored in packet tags.
 	 * If sockaddr is ours, or provided by a third party (zero id),
 	 * then we accept it.
 	 */
 	if (((stag = (struct sa_tag *)m_tag_locate(m, NGM_KSOCKET_COOKIE,
 	    NG_KSOCKET_TAG_SOCKADDR, NULL)) != NULL) &&
 	    (stag->id == NG_NODE_ID(node) || stag->id == 0))
 		sa = &stag->sa;
 
 	/* Reset specific mbuf flags to prevent addressing problems. */
 	m->m_flags &= ~(M_BCAST|M_MCAST);
 
 	/* Send packet */
 	error = sosend(so, sa, 0, m, 0, 0, td);
 
 	return (error);
 }
 
 /*
  * Destroy node
  */
 static int
 ng_ksocket_shutdown(node_p node)
 {
 	const priv_p priv = NG_NODE_PRIVATE(node);
 	priv_p embryo;
 
 	/* Close our socket (if any) */
 	if (priv->so != NULL) {
 		SOCKBUF_LOCK(&priv->so->so_rcv);
 		soupcall_clear(priv->so, SO_RCV);
 		SOCKBUF_UNLOCK(&priv->so->so_rcv);
 		SOCKBUF_LOCK(&priv->so->so_snd);
 		soupcall_clear(priv->so, SO_SND);
 		SOCKBUF_UNLOCK(&priv->so->so_snd);
 		soclose(priv->so);
 		priv->so = NULL;
 	}
 
 	/* If we are an embryo, take ourselves out of the parent's list */
 	if (priv->flags & KSF_EMBRYONIC) {
 		LIST_REMOVE(priv, siblings);
 		priv->flags &= ~KSF_EMBRYONIC;
 	}
 
 	/* Remove any embryonic children we have */
 	while (!LIST_EMPTY(&priv->embryos)) {
 		embryo = LIST_FIRST(&priv->embryos);
 		ng_rmnode_self(embryo->node);
 	}
 
 	/* Take down netgraph node */
 	bzero(priv, sizeof(*priv));
 	free(priv, M_NETGRAPH_KSOCKET);
 	NG_NODE_SET_PRIVATE(node, NULL);
 	NG_NODE_UNREF(node);		/* let the node escape */
 	return (0);
 }
 
 /*
  * Hook disconnection
  */
 static int
 ng_ksocket_disconnect(hook_p hook)
 {
 	KASSERT(NG_NODE_NUMHOOKS(NG_HOOK_NODE(hook)) == 0,
 	    ("%s: numhooks=%d?", __func__,
 	    NG_NODE_NUMHOOKS(NG_HOOK_NODE(hook))));
 	if (NG_NODE_IS_VALID(NG_HOOK_NODE(hook)))
 		ng_rmnode_self(NG_HOOK_NODE(hook));
 	return (0);
 }
 
 /************************************************************************
 			HELPER STUFF
  ************************************************************************/
 /*
  * You should not "just call" a netgraph node function from an external
  * asynchronous event. This is because in doing so you are ignoring the
  * locking on the netgraph nodes. Instead call your function via ng_send_fn().
  * This will call the function you chose, but will first do all the
  * locking rigmarole. Your function MAY only be called at some distant future
  * time (several millisecs away) so don't give it any arguments
  * that may be revoked soon (e.g. on your stack).
  *
  * To decouple stack, we use queue version of ng_send_fn().
  */
 
 static int
 ng_ksocket_incoming(struct socket *so, void *arg, int waitflag)
 {
 	const node_p node = arg;
 	const priv_p priv = NG_NODE_PRIVATE(node);
 	int wait = ((waitflag & M_WAITOK) ? NG_WAITOK : 0) | NG_QUEUE;
 
 	/*
 	 * Even if node is not locked, as soon as we are called, we assume
 	 * it exist and it's private area is valid. With some care we can
 	 * access it. Mark node that incoming event for it was sent to
 	 * avoid unneded queue trashing.
 	 */
 	if (atomic_cmpset_int(&priv->fn_sent, 0, 1) &&
 	    ng_send_fn1(node, NULL, &ng_ksocket_incoming2, so, 0, wait)) {
 		atomic_store_rel_int(&priv->fn_sent, 0);
 	}
 	return (SU_OK);
 }
 
 
 /*
  * When incoming data is appended to the socket, we get notified here.
  * This is also called whenever a significant event occurs for the socket.
  * Our original caller may have queued this even some time ago and
  * we cannot trust that he even still exists. The node however is being
  * held with a reference by the queueing code and guarantied to be valid.
  */
 static void
 ng_ksocket_incoming2(node_p node, hook_p hook, void *arg1, int arg2)
 {
 	struct socket *so = arg1;
 	const priv_p priv = NG_NODE_PRIVATE(node);
 	struct ng_mesg *response;
 	int error;
 
 	KASSERT(so == priv->so, ("%s: wrong socket", __func__));
 
 	/* Allow next incoming event to be queued. */
 	atomic_store_rel_int(&priv->fn_sent, 0);
 
 	/* Check whether a pending connect operation has completed */
 	if (priv->flags & KSF_CONNECTING) {
 		if ((error = so->so_error) != 0) {
 			so->so_error = 0;
 			so->so_state &= ~SS_ISCONNECTING;
 		}
 		if (!(so->so_state & SS_ISCONNECTING)) {
 			NG_MKMESSAGE(response, NGM_KSOCKET_COOKIE,
 			    NGM_KSOCKET_CONNECT, sizeof(int32_t), M_NOWAIT);
 			if (response != NULL) {
 				response->header.flags |= NGF_RESP;
 				response->header.token = priv->response_token;
 				*(int32_t *)response->data = error;
 				/*
 				 * send an async "response" message
 				 * to the node that set us up
 				 * (if it still exists)
 				 */
 				NG_SEND_MSG_ID(error, node,
 				    response, priv->response_addr, 0);
 			}
 			priv->flags &= ~KSF_CONNECTING;
 		}
 	}
 
 	/* Check whether a pending accept operation has completed */
-	if (priv->flags & KSF_ACCEPTING) {
-		error = ng_ksocket_check_accept(priv);
-		if (error != EWOULDBLOCK)
-			priv->flags &= ~KSF_ACCEPTING;
-		if (error == 0)
-			ng_ksocket_finish_accept(priv);
-	}
+	if (priv->flags & KSF_ACCEPTING)
+		(void )ng_ksocket_accept(priv);
 
 	/*
 	 * If we don't have a hook, we must handle data events later.  When
 	 * the hook gets created and is connected, this upcall function
 	 * will be called again.
 	 */
 	if (priv->hook == NULL)
 		return;
 
 	/* Read and forward available mbufs. */
 	while (1) {
 		struct uio uio;
 		struct sockaddr *sa;
 		struct mbuf *m;
 		int flags;
 
 		/* Try to get next packet from socket. */
 		uio.uio_td = NULL;
 		uio.uio_resid = IP_MAXPACKET;
 		flags = MSG_DONTWAIT;
 		sa = NULL;
 		if ((error = soreceive(so, (so->so_state & SS_ISCONNECTED) ?
 		    NULL : &sa, &uio, &m, NULL, &flags)) != 0)
 			break;
 
 		/* See if we got anything. */
 		if (flags & MSG_TRUNC) {
 			m_freem(m);
 			m = NULL;
 		}
 		if (m == NULL) {
 			if (sa != NULL)
 				free(sa, M_SONAME);
 			break;
 		}
 
 		KASSERT(m->m_nextpkt == NULL, ("%s: nextpkt", __func__));
 
 		/*
 		 * Stream sockets do not have packet boundaries, so
 		 * we have to allocate a header mbuf and attach the
 		 * stream of data to it.
 		 */
 		if (so->so_type == SOCK_STREAM) {
 			struct mbuf *mh;
 
 			mh = m_gethdr(M_NOWAIT, MT_DATA);
 			if (mh == NULL) {
 				m_freem(m);
 				if (sa != NULL)
 					free(sa, M_SONAME);
 				break;
 			}
 
 			mh->m_next = m;
 			for (; m; m = m->m_next)
 				mh->m_pkthdr.len += m->m_len;
 			m = mh;
 		}
 
 		/* Put peer's socket address (if any) into a tag */
 		if (sa != NULL) {
 			struct sa_tag	*stag;
 
 			stag = (struct sa_tag *)m_tag_alloc(NGM_KSOCKET_COOKIE,
 			    NG_KSOCKET_TAG_SOCKADDR, sizeof(ng_ID_t) +
 			    sa->sa_len, M_NOWAIT);
 			if (stag == NULL) {
 				free(sa, M_SONAME);
 				goto sendit;
 			}
 			bcopy(sa, &stag->sa, sa->sa_len);
 			free(sa, M_SONAME);
 			stag->id = NG_NODE_ID(node);
 			m_tag_prepend(m, &stag->tag);
 		}
 
 sendit:		/* Forward data with optional peer sockaddr as packet tag */
 		NG_SEND_DATA_ONLY(error, priv->hook, m);
 	}
 
 	/*
 	 * If the peer has closed the connection, forward a 0-length mbuf
 	 * to indicate end-of-file.
 	 */
 	if (so->so_rcv.sb_state & SBS_CANTRCVMORE &&
 	    !(priv->flags & KSF_EOFSEEN)) {
 		struct mbuf *m;
 
 		m = m_gethdr(M_NOWAIT, MT_DATA);
 		if (m != NULL)
 			NG_SEND_DATA_ONLY(error, priv->hook, m);
 		priv->flags |= KSF_EOFSEEN;
 	}
 }
 
-/*
- * Check for a completed incoming connection and return 0 if one is found.
- * Otherwise return the appropriate error code.
- */
 static int
-ng_ksocket_check_accept(priv_p priv)
+ng_ksocket_accept(priv_p priv)
 {
 	struct socket *const head = priv->so;
-	int error;
-
-	if ((error = head->so_error) != 0) {
-		head->so_error = 0;
-		return error;
-	}
-	/* Unlocked read. */
-	if (TAILQ_EMPTY(&head->so_comp)) {
-		if (head->so_rcv.sb_state & SBS_CANTRCVMORE)
-			return ECONNABORTED;
-		return EWOULDBLOCK;
-	}
-	return 0;
-}
-
-/*
- * Handle the first completed incoming connection, assumed to be already
- * on the socket's so_comp queue.
- */
-static void
-ng_ksocket_finish_accept(priv_p priv)
-{
-	struct socket *const head = priv->so;
 	struct socket *so;
 	struct sockaddr *sa = NULL;
 	struct ng_mesg *resp;
 	struct ng_ksocket_accept *resp_data;
 	node_p node;
 	priv_p priv2;
 	int len;
 	int error;
 
-	ACCEPT_LOCK();
-	so = TAILQ_FIRST(&head->so_comp);
-	if (so == NULL) {	/* Should never happen */
-		ACCEPT_UNLOCK();
-		return;
+	SOLISTEN_LOCK(head);
+	error = solisten_dequeue(head, &so, SOCK_NONBLOCK);
+	if (error == EWOULDBLOCK) {
+		priv->flags |= KSF_ACCEPTING;
+		return (error);
 	}
-	TAILQ_REMOVE(&head->so_comp, so, so_list);
-	head->so_qlen--;
-	so->so_qstate &= ~SQ_COMP;
-	so->so_head = NULL;
-	SOCK_LOCK(so);
-	soref(so);
-	so->so_state |= SS_NBIO;
-	SOCK_UNLOCK(so);
-	ACCEPT_UNLOCK();
+	priv->flags &= ~KSF_ACCEPTING;
+	if (error)
+		return (error);
 
-	/* XXX KNOTE_UNLOCKED(&head->so_rcv.sb_sel.si_note, 0); */
-
 	soaccept(so, &sa);
 
 	len = OFFSETOF(struct ng_ksocket_accept, addr);
 	if (sa != NULL)
 		len += sa->sa_len;
 
 	NG_MKMESSAGE(resp, NGM_KSOCKET_COOKIE, NGM_KSOCKET_ACCEPT, len,
 	    M_NOWAIT);
 	if (resp == NULL) {
 		soclose(so);
 		goto out;
 	}
 	resp->header.flags |= NGF_RESP;
 	resp->header.token = priv->response_token;
 
 	/* Clone a ksocket node to wrap the new socket */
 	error = ng_make_node_common(&ng_ksocket_typestruct, &node);
 	if (error) {
 		free(resp, M_NETGRAPH);
 		soclose(so);
 		goto out;
 	}
 
 	if (ng_ksocket_constructor(node) != 0) {
 		NG_NODE_UNREF(node);
 		free(resp, M_NETGRAPH);
 		soclose(so);
 		goto out;
 	}
 
 	priv2 = NG_NODE_PRIVATE(node);
 	priv2->so = so;
 	priv2->flags |= KSF_CLONED | KSF_EMBRYONIC;
 
 	/*
 	 * Insert the cloned node into a list of embryonic children
 	 * on the parent node.  When a hook is created on the cloned
 	 * node it will be removed from this list.  When the parent
 	 * is destroyed it will destroy any embryonic children it has.
 	 */
 	LIST_INSERT_HEAD(&priv->embryos, priv2, siblings);
 
 	SOCKBUF_LOCK(&so->so_rcv);
 	soupcall_set(so, SO_RCV, ng_ksocket_incoming, node);
 	SOCKBUF_UNLOCK(&so->so_rcv);
 	SOCKBUF_LOCK(&so->so_snd);
 	soupcall_set(so, SO_SND, ng_ksocket_incoming, node);
 	SOCKBUF_UNLOCK(&so->so_snd);
 
 	/* Fill in the response data and send it or return it to the caller */
 	resp_data = (struct ng_ksocket_accept *)resp->data;
 	resp_data->nodeid = NG_NODE_ID(node);
 	if (sa != NULL)
 		bcopy(sa, &resp_data->addr, sa->sa_len);
 	NG_SEND_MSG_ID(error, node, resp, priv->response_addr, 0);
 
 out:
 	if (sa != NULL)
 		free(sa, M_SONAME);
+
+	return (0);
 }
 
 /*
  * Parse out either an integer value or an alias.
  */
 static int
 ng_ksocket_parse(const struct ng_ksocket_alias *aliases,
 	const char *s, int family)
 {
 	int k, val;
 	char *eptr;
 
 	/* Try aliases */
 	for (k = 0; aliases[k].name != NULL; k++) {
 		if (strcmp(s, aliases[k].name) == 0
 		    && aliases[k].family == family)
 			return aliases[k].value;
 	}
 
 	/* Try parsing as a number */
 	val = (int)strtoul(s, &eptr, 10);
 	if (val < 0 || *eptr != '\0')
 		return (-1);
 	return (val);
 }
 
Index: head/sys/netinet/sctp_input.c
===================================================================
--- head/sys/netinet/sctp_input.c	(revision 319721)
+++ head/sys/netinet/sctp_input.c	(revision 319722)
@@ -1,6208 +1,6218 @@
 /*-
  * Copyright (c) 2001-2008, by Cisco Systems, Inc. All rights reserved.
  * Copyright (c) 2008-2012, by Randall Stewart. All rights reserved.
  * Copyright (c) 2008-2012, by Michael Tuexen. All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions are met:
  *
  * a) Redistributions of source code must retain the above copyright notice,
  *    this list of conditions and the following disclaimer.
  *
  * b) Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in
  *    the documentation and/or other materials provided with the distribution.
  *
  * c) Neither the name of Cisco Systems, Inc. nor the names of its
  *    contributors may be used to endorse or promote products derived
  *    from this software without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
  * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
  * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
  * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
  * THE POSSIBILITY OF SUCH DAMAGE.
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include <netinet/sctp_os.h>
 #include <netinet/sctp_var.h>
 #include <netinet/sctp_sysctl.h>
 #include <netinet/sctp_pcb.h>
 #include <netinet/sctp_header.h>
 #include <netinet/sctputil.h>
 #include <netinet/sctp_output.h>
 #include <netinet/sctp_input.h>
 #include <netinet/sctp_auth.h>
 #include <netinet/sctp_indata.h>
 #include <netinet/sctp_asconf.h>
 #include <netinet/sctp_bsd_addr.h>
 #include <netinet/sctp_timer.h>
 #include <netinet/sctp_crc32.h>
 #if defined(INET) || defined(INET6)
 #include <netinet/udp.h>
 #endif
 #include <sys/smp.h>
 
 
 
 static void
 sctp_stop_all_cookie_timers(struct sctp_tcb *stcb)
 {
 	struct sctp_nets *net;
 
 	/*
 	 * This now not only stops all cookie timers it also stops any INIT
 	 * timers as well. This will make sure that the timers are stopped
 	 * in all collision cases.
 	 */
 	SCTP_TCB_LOCK_ASSERT(stcb);
 	TAILQ_FOREACH(net, &stcb->asoc.nets, sctp_next) {
 		if (net->rxt_timer.type == SCTP_TIMER_TYPE_COOKIE) {
 			sctp_timer_stop(SCTP_TIMER_TYPE_COOKIE,
 			    stcb->sctp_ep,
 			    stcb,
 			    net, SCTP_FROM_SCTP_INPUT + SCTP_LOC_1);
 		} else if (net->rxt_timer.type == SCTP_TIMER_TYPE_INIT) {
 			sctp_timer_stop(SCTP_TIMER_TYPE_INIT,
 			    stcb->sctp_ep,
 			    stcb,
 			    net, SCTP_FROM_SCTP_INPUT + SCTP_LOC_2);
 		}
 	}
 }
 
 /* INIT handler */
 static void
 sctp_handle_init(struct mbuf *m, int iphlen, int offset,
     struct sockaddr *src, struct sockaddr *dst, struct sctphdr *sh,
     struct sctp_init_chunk *cp, struct sctp_inpcb *inp,
     struct sctp_tcb *stcb, struct sctp_nets *net, int *abort_no_unlock,
     uint8_t mflowtype, uint32_t mflowid,
     uint32_t vrf_id, uint16_t port)
 {
 	struct sctp_init *init;
 	struct mbuf *op_err;
 
 	SCTPDBG(SCTP_DEBUG_INPUT2, "sctp_handle_init: handling INIT tcb:%p\n",
 	    (void *)stcb);
 	if (stcb == NULL) {
 		SCTP_INP_RLOCK(inp);
 	}
 	/* validate length */
 	if (ntohs(cp->ch.chunk_length) < sizeof(struct sctp_init_chunk)) {
 		op_err = sctp_generate_cause(SCTP_CAUSE_INVALID_PARAM, "");
 		sctp_abort_association(inp, stcb, m, iphlen, src, dst, sh, op_err,
 		    mflowtype, mflowid,
 		    vrf_id, port);
 		if (stcb)
 			*abort_no_unlock = 1;
 		goto outnow;
 	}
 	/* validate parameters */
 	init = &cp->init;
 	if (init->initiate_tag == 0) {
 		/* protocol error... send abort */
 		op_err = sctp_generate_cause(SCTP_CAUSE_INVALID_PARAM, "");
 		sctp_abort_association(inp, stcb, m, iphlen, src, dst, sh, op_err,
 		    mflowtype, mflowid,
 		    vrf_id, port);
 		if (stcb)
 			*abort_no_unlock = 1;
 		goto outnow;
 	}
 	if (ntohl(init->a_rwnd) < SCTP_MIN_RWND) {
 		/* invalid parameter... send abort */
 		op_err = sctp_generate_cause(SCTP_CAUSE_INVALID_PARAM, "");
 		sctp_abort_association(inp, stcb, m, iphlen, src, dst, sh, op_err,
 		    mflowtype, mflowid,
 		    vrf_id, port);
 		if (stcb)
 			*abort_no_unlock = 1;
 		goto outnow;
 	}
 	if (init->num_inbound_streams == 0) {
 		/* protocol error... send abort */
 		op_err = sctp_generate_cause(SCTP_CAUSE_INVALID_PARAM, "");
 		sctp_abort_association(inp, stcb, m, iphlen, src, dst, sh, op_err,
 		    mflowtype, mflowid,
 		    vrf_id, port);
 		if (stcb)
 			*abort_no_unlock = 1;
 		goto outnow;
 	}
 	if (init->num_outbound_streams == 0) {
 		/* protocol error... send abort */
 		op_err = sctp_generate_cause(SCTP_CAUSE_INVALID_PARAM, "");
 		sctp_abort_association(inp, stcb, m, iphlen, src, dst, sh, op_err,
 		    mflowtype, mflowid,
 		    vrf_id, port);
 		if (stcb)
 			*abort_no_unlock = 1;
 		goto outnow;
 	}
 	if (sctp_validate_init_auth_params(m, offset + sizeof(*cp),
 	    offset + ntohs(cp->ch.chunk_length))) {
 		/* auth parameter(s) error... send abort */
 		op_err = sctp_generate_cause(SCTP_BASE_SYSCTL(sctp_diag_info_code),
 		    "Problem with AUTH parameters");
 		sctp_abort_association(inp, stcb, m, iphlen, src, dst, sh, op_err,
 		    mflowtype, mflowid,
 		    vrf_id, port);
 		if (stcb)
 			*abort_no_unlock = 1;
 		goto outnow;
 	}
 	/* We are only accepting if we have a listening socket. */
 	if ((stcb == NULL) &&
 	    ((inp->sctp_flags & SCTP_PCB_FLAGS_SOCKET_GONE) ||
 	    (inp->sctp_flags & SCTP_PCB_FLAGS_SOCKET_ALLGONE) ||
 	    (!SCTP_IS_LISTENING(inp)))) {
 		/*
 		 * FIX ME ?? What about TCP model and we have a
 		 * match/restart case? Actually no fix is needed. the lookup
 		 * will always find the existing assoc so stcb would not be
 		 * NULL. It may be questionable to do this since we COULD
 		 * just send back the INIT-ACK and hope that the app did
 		 * accept()'s by the time the COOKIE was sent. But there is
 		 * a price to pay for COOKIE generation and I don't want to
 		 * pay it on the chance that the app will actually do some
 		 * accepts(). The App just looses and should NOT be in this
 		 * state :-)
 		 */
 		if (SCTP_BASE_SYSCTL(sctp_blackhole) == 0) {
 			op_err = sctp_generate_cause(SCTP_BASE_SYSCTL(sctp_diag_info_code),
 			    "No listener");
 			sctp_send_abort(m, iphlen, src, dst, sh, 0, op_err,
 			    mflowtype, mflowid, inp->fibnum,
 			    vrf_id, port);
 		}
 		goto outnow;
 	}
 	if ((stcb != NULL) &&
 	    (SCTP_GET_STATE(&stcb->asoc) == SCTP_STATE_SHUTDOWN_ACK_SENT)) {
 		SCTPDBG(SCTP_DEBUG_INPUT3, "sctp_handle_init: sending SHUTDOWN-ACK\n");
 		sctp_send_shutdown_ack(stcb, NULL);
 		sctp_chunk_output(inp, stcb, SCTP_OUTPUT_FROM_CONTROL_PROC, SCTP_SO_NOT_LOCKED);
 	} else {
 		SCTPDBG(SCTP_DEBUG_INPUT3, "sctp_handle_init: sending INIT-ACK\n");
 		sctp_send_initiate_ack(inp, stcb, net, m, iphlen, offset,
 		    src, dst, sh, cp,
 		    mflowtype, mflowid,
 		    vrf_id, port,
 		    ((stcb == NULL) ? SCTP_HOLDS_LOCK : SCTP_NOT_LOCKED));
 	}
 outnow:
 	if (stcb == NULL) {
 		SCTP_INP_RUNLOCK(inp);
 	}
 }
 
 /*
  * process peer "INIT/INIT-ACK" chunk returns value < 0 on error
  */
 
 int
 sctp_is_there_unsent_data(struct sctp_tcb *stcb, int so_locked
 #if !defined(__APPLE__) && !defined(SCTP_SO_LOCK_TESTING)
     SCTP_UNUSED
 #endif
 )
 {
 	int unsent_data;
 	unsigned int i;
 	struct sctp_stream_queue_pending *sp;
 	struct sctp_association *asoc;
 
 	/*
 	 * This function returns if any stream has true unsent data on it.
 	 * Note that as it looks through it will clean up any places that
 	 * have old data that has been sent but left at top of stream queue.
 	 */
 	asoc = &stcb->asoc;
 	unsent_data = 0;
 	SCTP_TCB_SEND_LOCK(stcb);
 	if (!stcb->asoc.ss_functions.sctp_ss_is_empty(stcb, asoc)) {
 		/* Check to see if some data queued */
 		for (i = 0; i < stcb->asoc.streamoutcnt; i++) {
 			/* sa_ignore FREED_MEMORY */
 			sp = TAILQ_FIRST(&stcb->asoc.strmout[i].outqueue);
 			if (sp == NULL) {
 				continue;
 			}
 			if ((sp->msg_is_complete) &&
 			    (sp->length == 0) &&
 			    (sp->sender_all_done)) {
 				/*
 				 * We are doing differed cleanup. Last time
 				 * through when we took all the data the
 				 * sender_all_done was not set.
 				 */
 				if (sp->put_last_out == 0) {
 					SCTP_PRINTF("Gak, put out entire msg with NO end!-1\n");
 					SCTP_PRINTF("sender_done:%d len:%d msg_comp:%d put_last_out:%d\n",
 					    sp->sender_all_done,
 					    sp->length,
 					    sp->msg_is_complete,
 					    sp->put_last_out);
 				}
 				atomic_subtract_int(&stcb->asoc.stream_queue_cnt, 1);
 				TAILQ_REMOVE(&stcb->asoc.strmout[i].outqueue, sp, next);
 				stcb->asoc.ss_functions.sctp_ss_remove_from_stream(stcb, asoc, &asoc->strmout[i], sp, 1);
 				if (sp->net) {
 					sctp_free_remote_addr(sp->net);
 					sp->net = NULL;
 				}
 				if (sp->data) {
 					sctp_m_freem(sp->data);
 					sp->data = NULL;
 				}
 				sctp_free_a_strmoq(stcb, sp, so_locked);
 				if (!TAILQ_EMPTY(&stcb->asoc.strmout[i].outqueue)) {
 					unsent_data++;
 				}
 			} else {
 				unsent_data++;
 			}
 			if (unsent_data > 0) {
 				break;
 			}
 		}
 	}
 	SCTP_TCB_SEND_UNLOCK(stcb);
 	return (unsent_data);
 }
 
 static int
 sctp_process_init(struct sctp_init_chunk *cp, struct sctp_tcb *stcb)
 {
 	struct sctp_init *init;
 	struct sctp_association *asoc;
 	struct sctp_nets *lnet;
 	unsigned int i;
 
 	init = &cp->init;
 	asoc = &stcb->asoc;
 	/* save off parameters */
 	asoc->peer_vtag = ntohl(init->initiate_tag);
 	asoc->peers_rwnd = ntohl(init->a_rwnd);
 	/* init tsn's */
 	asoc->highest_tsn_inside_map = asoc->asconf_seq_in = ntohl(init->initial_tsn) - 1;
 
 	if (!TAILQ_EMPTY(&asoc->nets)) {
 		/* update any ssthresh's that may have a default */
 		TAILQ_FOREACH(lnet, &asoc->nets, sctp_next) {
 			lnet->ssthresh = asoc->peers_rwnd;
 			if (SCTP_BASE_SYSCTL(sctp_logging_level) & (SCTP_CWND_MONITOR_ENABLE | SCTP_CWND_LOGGING_ENABLE)) {
 				sctp_log_cwnd(stcb, lnet, 0, SCTP_CWND_INITIALIZATION);
 			}
 		}
 	}
 	SCTP_TCB_SEND_LOCK(stcb);
 	if (asoc->pre_open_streams > ntohs(init->num_inbound_streams)) {
 		unsigned int newcnt;
 		struct sctp_stream_out *outs;
 		struct sctp_stream_queue_pending *sp, *nsp;
 		struct sctp_tmit_chunk *chk, *nchk;
 
 		/* abandon the upper streams */
 		newcnt = ntohs(init->num_inbound_streams);
 		TAILQ_FOREACH_SAFE(chk, &asoc->send_queue, sctp_next, nchk) {
 			if (chk->rec.data.sid >= newcnt) {
 				TAILQ_REMOVE(&asoc->send_queue, chk, sctp_next);
 				asoc->send_queue_cnt--;
 				if (asoc->strmout[chk->rec.data.sid].chunks_on_queues > 0) {
 					asoc->strmout[chk->rec.data.sid].chunks_on_queues--;
 #ifdef INVARIANTS
 				} else {
 					panic("No chunks on the queues for sid %u.", chk->rec.data.sid);
 #endif
 				}
 				if (chk->data != NULL) {
 					sctp_free_bufspace(stcb, asoc, chk, 1);
 					sctp_ulp_notify(SCTP_NOTIFY_UNSENT_DG_FAIL, stcb,
 					    0, chk, SCTP_SO_NOT_LOCKED);
 					if (chk->data) {
 						sctp_m_freem(chk->data);
 						chk->data = NULL;
 					}
 				}
 				sctp_free_a_chunk(stcb, chk, SCTP_SO_NOT_LOCKED);
 				/* sa_ignore FREED_MEMORY */
 			}
 		}
 		if (asoc->strmout) {
 			for (i = newcnt; i < asoc->pre_open_streams; i++) {
 				outs = &asoc->strmout[i];
 				TAILQ_FOREACH_SAFE(sp, &outs->outqueue, next, nsp) {
 					atomic_subtract_int(&stcb->asoc.stream_queue_cnt, 1);
 					TAILQ_REMOVE(&outs->outqueue, sp, next);
 					stcb->asoc.ss_functions.sctp_ss_remove_from_stream(stcb, asoc, outs, sp, 1);
 					sctp_ulp_notify(SCTP_NOTIFY_SPECIAL_SP_FAIL,
 					    stcb, 0, sp, SCTP_SO_NOT_LOCKED);
 					if (sp->data) {
 						sctp_m_freem(sp->data);
 						sp->data = NULL;
 					}
 					if (sp->net) {
 						sctp_free_remote_addr(sp->net);
 						sp->net = NULL;
 					}
 					/* Free the chunk */
 					sctp_free_a_strmoq(stcb, sp, SCTP_SO_NOT_LOCKED);
 					/* sa_ignore FREED_MEMORY */
 				}
 				outs->state = SCTP_STREAM_CLOSED;
 			}
 		}
 		/* cut back the count */
 		asoc->pre_open_streams = newcnt;
 	}
 	SCTP_TCB_SEND_UNLOCK(stcb);
 	asoc->streamoutcnt = asoc->pre_open_streams;
 	if (asoc->strmout) {
 		for (i = 0; i < asoc->streamoutcnt; i++) {
 			asoc->strmout[i].state = SCTP_STREAM_OPEN;
 		}
 	}
 	/* EY - nr_sack: initialize highest tsn in nr_mapping_array */
 	asoc->highest_tsn_inside_nr_map = asoc->highest_tsn_inside_map;
 	if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_MAP_LOGGING_ENABLE) {
 		sctp_log_map(0, 5, asoc->highest_tsn_inside_map, SCTP_MAP_SLIDE_RESULT);
 	}
 	/* This is the next one we expect */
 	asoc->str_reset_seq_in = asoc->asconf_seq_in + 1;
 
 	asoc->mapping_array_base_tsn = ntohl(init->initial_tsn);
 	asoc->tsn_last_delivered = asoc->cumulative_tsn = asoc->asconf_seq_in;
 
 	asoc->advanced_peer_ack_point = asoc->last_acked_seq;
 	/* open the requested streams */
 
 	if (asoc->strmin != NULL) {
 		/* Free the old ones */
 		for (i = 0; i < asoc->streamincnt; i++) {
 			sctp_clean_up_stream(stcb, &asoc->strmin[i].inqueue);
 			sctp_clean_up_stream(stcb, &asoc->strmin[i].uno_inqueue);
 		}
 		SCTP_FREE(asoc->strmin, SCTP_M_STRMI);
 	}
 	if (asoc->max_inbound_streams > ntohs(init->num_outbound_streams)) {
 		asoc->streamincnt = ntohs(init->num_outbound_streams);
 	} else {
 		asoc->streamincnt = asoc->max_inbound_streams;
 	}
 	SCTP_MALLOC(asoc->strmin, struct sctp_stream_in *, asoc->streamincnt *
 	    sizeof(struct sctp_stream_in), SCTP_M_STRMI);
 	if (asoc->strmin == NULL) {
 		/* we didn't get memory for the streams! */
 		SCTPDBG(SCTP_DEBUG_INPUT2, "process_init: couldn't get memory for the streams!\n");
 		return (-1);
 	}
 	for (i = 0; i < asoc->streamincnt; i++) {
 		asoc->strmin[i].sid = i;
 		asoc->strmin[i].last_mid_delivered = 0xffffffff;
 		TAILQ_INIT(&asoc->strmin[i].inqueue);
 		TAILQ_INIT(&asoc->strmin[i].uno_inqueue);
 		asoc->strmin[i].pd_api_started = 0;
 		asoc->strmin[i].delivery_started = 0;
 	}
 	/*
 	 * load_address_from_init will put the addresses into the
 	 * association when the COOKIE is processed or the INIT-ACK is
 	 * processed. Both types of COOKIE's existing and new call this
 	 * routine. It will remove addresses that are no longer in the
 	 * association (for the restarting case where addresses are
 	 * removed). Up front when the INIT arrives we will discard it if it
 	 * is a restart and new addresses have been added.
 	 */
 	/* sa_ignore MEMLEAK */
 	return (0);
 }
 
 /*
  * INIT-ACK message processing/consumption returns value < 0 on error
  */
 static int
 sctp_process_init_ack(struct mbuf *m, int iphlen, int offset,
     struct sockaddr *src, struct sockaddr *dst, struct sctphdr *sh,
     struct sctp_init_ack_chunk *cp, struct sctp_tcb *stcb,
     struct sctp_nets *net, int *abort_no_unlock,
     uint8_t mflowtype, uint32_t mflowid,
     uint32_t vrf_id)
 {
 	struct sctp_association *asoc;
 	struct mbuf *op_err;
 	int retval, abort_flag;
 	uint32_t initack_limit;
 	int nat_friendly = 0;
 
 	/* First verify that we have no illegal param's */
 	abort_flag = 0;
 
 	op_err = sctp_arethere_unrecognized_parameters(m,
 	    (offset + sizeof(struct sctp_init_chunk)),
 	    &abort_flag, (struct sctp_chunkhdr *)cp, &nat_friendly);
 	if (abort_flag) {
 		/* Send an abort and notify peer */
 		sctp_abort_an_association(stcb->sctp_ep, stcb, op_err, SCTP_SO_NOT_LOCKED);
 		*abort_no_unlock = 1;
 		return (-1);
 	}
 	asoc = &stcb->asoc;
 	asoc->peer_supports_nat = (uint8_t)nat_friendly;
 	/* process the peer's parameters in the INIT-ACK */
 	retval = sctp_process_init((struct sctp_init_chunk *)cp, stcb);
 	if (retval < 0) {
 		return (retval);
 	}
 	initack_limit = offset + ntohs(cp->ch.chunk_length);
 	/* load all addresses */
 	if ((retval = sctp_load_addresses_from_init(stcb, m,
 	    (offset + sizeof(struct sctp_init_chunk)), initack_limit,
 	    src, dst, NULL, stcb->asoc.port))) {
 		op_err = sctp_generate_cause(SCTP_BASE_SYSCTL(sctp_diag_info_code),
 		    "Problem with address parameters");
 		SCTPDBG(SCTP_DEBUG_INPUT1,
 		    "Load addresses from INIT causes an abort %d\n",
 		    retval);
 		sctp_abort_association(stcb->sctp_ep, stcb, m, iphlen,
 		    src, dst, sh, op_err,
 		    mflowtype, mflowid,
 		    vrf_id, net->port);
 		*abort_no_unlock = 1;
 		return (-1);
 	}
 	/* if the peer doesn't support asconf, flush the asconf queue */
 	if (asoc->asconf_supported == 0) {
 		struct sctp_asconf_addr *param, *nparam;
 
 		TAILQ_FOREACH_SAFE(param, &asoc->asconf_queue, next, nparam) {
 			TAILQ_REMOVE(&asoc->asconf_queue, param, next);
 			SCTP_FREE(param, SCTP_M_ASC_ADDR);
 		}
 	}
 	stcb->asoc.peer_hmac_id = sctp_negotiate_hmacid(stcb->asoc.peer_hmacs,
 	    stcb->asoc.local_hmacs);
 	if (op_err) {
 		sctp_queue_op_err(stcb, op_err);
 		/* queuing will steal away the mbuf chain to the out queue */
 		op_err = NULL;
 	}
 	/* extract the cookie and queue it to "echo" it back... */
 	if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_THRESHOLD_LOGGING) {
 		sctp_misc_ints(SCTP_THRESHOLD_CLEAR,
 		    stcb->asoc.overall_error_count,
 		    0,
 		    SCTP_FROM_SCTP_INPUT,
 		    __LINE__);
 	}
 	stcb->asoc.overall_error_count = 0;
 	net->error_count = 0;
 
 	/*
 	 * Cancel the INIT timer, We do this first before queueing the
 	 * cookie. We always cancel at the primary to assue that we are
 	 * canceling the timer started by the INIT which always goes to the
 	 * primary.
 	 */
 	sctp_timer_stop(SCTP_TIMER_TYPE_INIT, stcb->sctp_ep, stcb,
 	    asoc->primary_destination, SCTP_FROM_SCTP_INPUT + SCTP_LOC_3);
 
 	/* calculate the RTO */
 	net->RTO = sctp_calculate_rto(stcb, asoc, net, &asoc->time_entered, sctp_align_safe_nocopy,
 	    SCTP_RTT_FROM_NON_DATA);
 	retval = sctp_send_cookie_echo(m, offset, stcb, net);
 	if (retval < 0) {
 		/*
 		 * No cookie, we probably should send a op error. But in any
 		 * case if there is no cookie in the INIT-ACK, we can
 		 * abandon the peer, its broke.
 		 */
 		if (retval == -3) {
 			uint16_t len;
 
 			len = (uint16_t)(sizeof(struct sctp_error_missing_param) + sizeof(uint16_t));
 			/* We abort with an error of missing mandatory param */
 			op_err = sctp_get_mbuf_for_msg(len, 0, M_NOWAIT, 1, MT_DATA);
 			if (op_err != NULL) {
 				struct sctp_error_missing_param *cause;
 
 				SCTP_BUF_LEN(op_err) = len;
 				cause = mtod(op_err, struct sctp_error_missing_param *);
 				/* Subtract the reserved param */
 				cause->cause.code = htons(SCTP_CAUSE_MISSING_PARAM);
 				cause->cause.length = htons(len);
 				cause->num_missing_params = htonl(1);
 				cause->type[0] = htons(SCTP_STATE_COOKIE);
 			}
 			sctp_abort_association(stcb->sctp_ep, stcb, m, iphlen,
 			    src, dst, sh, op_err,
 			    mflowtype, mflowid,
 			    vrf_id, net->port);
 			*abort_no_unlock = 1;
 		}
 		return (retval);
 	}
 	return (0);
 }
 
 static void
 sctp_handle_heartbeat_ack(struct sctp_heartbeat_chunk *cp,
     struct sctp_tcb *stcb, struct sctp_nets *net)
 {
 	union sctp_sockstore store;
 	struct sctp_nets *r_net, *f_net;
 	struct timeval tv;
 	int req_prim = 0;
 	uint16_t old_error_counter;
 
 	if (ntohs(cp->ch.chunk_length) != sizeof(struct sctp_heartbeat_chunk)) {
 		/* Invalid length */
 		return;
 	}
 	memset(&store, 0, sizeof(store));
 	switch (cp->heartbeat.hb_info.addr_family) {
 #ifdef INET
 	case AF_INET:
 		if (cp->heartbeat.hb_info.addr_len == sizeof(struct sockaddr_in)) {
 			store.sin.sin_family = cp->heartbeat.hb_info.addr_family;
 			store.sin.sin_len = cp->heartbeat.hb_info.addr_len;
 			store.sin.sin_port = stcb->rport;
 			memcpy(&store.sin.sin_addr, cp->heartbeat.hb_info.address,
 			    sizeof(store.sin.sin_addr));
 		} else {
 			return;
 		}
 		break;
 #endif
 #ifdef INET6
 	case AF_INET6:
 		if (cp->heartbeat.hb_info.addr_len == sizeof(struct sockaddr_in6)) {
 			store.sin6.sin6_family = cp->heartbeat.hb_info.addr_family;
 			store.sin6.sin6_len = cp->heartbeat.hb_info.addr_len;
 			store.sin6.sin6_port = stcb->rport;
 			memcpy(&store.sin6.sin6_addr, cp->heartbeat.hb_info.address, sizeof(struct in6_addr));
 		} else {
 			return;
 		}
 		break;
 #endif
 	default:
 		return;
 	}
 	r_net = sctp_findnet(stcb, &store.sa);
 	if (r_net == NULL) {
 		SCTPDBG(SCTP_DEBUG_INPUT1, "Huh? I can't find the address I sent it to, discard\n");
 		return;
 	}
 	if ((r_net && (r_net->dest_state & SCTP_ADDR_UNCONFIRMED)) &&
 	    (r_net->heartbeat_random1 == cp->heartbeat.hb_info.random_value1) &&
 	    (r_net->heartbeat_random2 == cp->heartbeat.hb_info.random_value2)) {
 		/*
 		 * If the its a HB and it's random value is correct when can
 		 * confirm the destination.
 		 */
 		r_net->dest_state &= ~SCTP_ADDR_UNCONFIRMED;
 		if (r_net->dest_state & SCTP_ADDR_REQ_PRIMARY) {
 			stcb->asoc.primary_destination = r_net;
 			r_net->dest_state &= ~SCTP_ADDR_REQ_PRIMARY;
 			f_net = TAILQ_FIRST(&stcb->asoc.nets);
 			if (f_net != r_net) {
 				/*
 				 * first one on the list is NOT the primary
 				 * sctp_cmpaddr() is much more efficient if
 				 * the primary is the first on the list,
 				 * make it so.
 				 */
 				TAILQ_REMOVE(&stcb->asoc.nets, r_net, sctp_next);
 				TAILQ_INSERT_HEAD(&stcb->asoc.nets, r_net, sctp_next);
 			}
 			req_prim = 1;
 		}
 		sctp_ulp_notify(SCTP_NOTIFY_INTERFACE_CONFIRMED,
 		    stcb, 0, (void *)r_net, SCTP_SO_NOT_LOCKED);
 		sctp_timer_stop(SCTP_TIMER_TYPE_HEARTBEAT, stcb->sctp_ep, stcb,
 		    r_net, SCTP_FROM_SCTP_INPUT + SCTP_LOC_4);
 		sctp_timer_start(SCTP_TIMER_TYPE_HEARTBEAT, stcb->sctp_ep, stcb, r_net);
 	}
 	old_error_counter = r_net->error_count;
 	r_net->error_count = 0;
 	r_net->hb_responded = 1;
 	tv.tv_sec = cp->heartbeat.hb_info.time_value_1;
 	tv.tv_usec = cp->heartbeat.hb_info.time_value_2;
 	/* Now lets do a RTO with this */
 	r_net->RTO = sctp_calculate_rto(stcb, &stcb->asoc, r_net, &tv, sctp_align_safe_nocopy,
 	    SCTP_RTT_FROM_NON_DATA);
 	if (!(r_net->dest_state & SCTP_ADDR_REACHABLE)) {
 		r_net->dest_state |= SCTP_ADDR_REACHABLE;
 		sctp_ulp_notify(SCTP_NOTIFY_INTERFACE_UP, stcb,
 		    0, (void *)r_net, SCTP_SO_NOT_LOCKED);
 	}
 	if (r_net->dest_state & SCTP_ADDR_PF) {
 		r_net->dest_state &= ~SCTP_ADDR_PF;
 		stcb->asoc.cc_functions.sctp_cwnd_update_exit_pf(stcb, net);
 	}
 	if (old_error_counter > 0) {
 		sctp_timer_stop(SCTP_TIMER_TYPE_HEARTBEAT, stcb->sctp_ep,
 		    stcb, r_net, SCTP_FROM_SCTP_INPUT + SCTP_LOC_5);
 		sctp_timer_start(SCTP_TIMER_TYPE_HEARTBEAT, stcb->sctp_ep, stcb, r_net);
 	}
 	if (r_net == stcb->asoc.primary_destination) {
 		if (stcb->asoc.alternate) {
 			/* release the alternate, primary is good */
 			sctp_free_remote_addr(stcb->asoc.alternate);
 			stcb->asoc.alternate = NULL;
 		}
 	}
 	/* Mobility adaptation */
 	if (req_prim) {
 		if ((sctp_is_mobility_feature_on(stcb->sctp_ep,
 		    SCTP_MOBILITY_BASE) ||
 		    sctp_is_mobility_feature_on(stcb->sctp_ep,
 		    SCTP_MOBILITY_FASTHANDOFF)) &&
 		    sctp_is_mobility_feature_on(stcb->sctp_ep,
 		    SCTP_MOBILITY_PRIM_DELETED)) {
 
 			sctp_timer_stop(SCTP_TIMER_TYPE_PRIM_DELETED,
 			    stcb->sctp_ep, stcb, NULL,
 			    SCTP_FROM_SCTP_INPUT + SCTP_LOC_6);
 			if (sctp_is_mobility_feature_on(stcb->sctp_ep,
 			    SCTP_MOBILITY_FASTHANDOFF)) {
 				sctp_assoc_immediate_retrans(stcb,
 				    stcb->asoc.primary_destination);
 			}
 			if (sctp_is_mobility_feature_on(stcb->sctp_ep,
 			    SCTP_MOBILITY_BASE)) {
 				sctp_move_chunks_from_net(stcb,
 				    stcb->asoc.deleted_primary);
 			}
 			sctp_delete_prim_timer(stcb->sctp_ep, stcb,
 			    stcb->asoc.deleted_primary);
 		}
 	}
 }
 
 static int
 sctp_handle_nat_colliding_state(struct sctp_tcb *stcb)
 {
 	/*
 	 * return 0 means we want you to proceed with the abort non-zero
 	 * means no abort processing
 	 */
 	struct sctpasochead *head;
 
 	if (SCTP_GET_STATE(&stcb->asoc) == SCTP_STATE_COOKIE_WAIT) {
 		/* generate a new vtag and send init */
 		LIST_REMOVE(stcb, sctp_asocs);
 		stcb->asoc.my_vtag = sctp_select_a_tag(stcb->sctp_ep, stcb->sctp_ep->sctp_lport, stcb->rport, 1);
 		head = &SCTP_BASE_INFO(sctp_asochash)[SCTP_PCBHASH_ASOC(stcb->asoc.my_vtag, SCTP_BASE_INFO(hashasocmark))];
 		/*
 		 * put it in the bucket in the vtag hash of assoc's for the
 		 * system
 		 */
 		LIST_INSERT_HEAD(head, stcb, sctp_asocs);
 		sctp_send_initiate(stcb->sctp_ep, stcb, SCTP_SO_NOT_LOCKED);
 		return (1);
 	}
 	if (SCTP_GET_STATE(&stcb->asoc) == SCTP_STATE_COOKIE_ECHOED) {
 		/*
 		 * treat like a case where the cookie expired i.e.: - dump
 		 * current cookie. - generate a new vtag. - resend init.
 		 */
 		/* generate a new vtag and send init */
 		LIST_REMOVE(stcb, sctp_asocs);
 		stcb->asoc.state &= ~SCTP_STATE_COOKIE_ECHOED;
 		stcb->asoc.state |= SCTP_STATE_COOKIE_WAIT;
 		sctp_stop_all_cookie_timers(stcb);
 		sctp_toss_old_cookies(stcb, &stcb->asoc);
 		stcb->asoc.my_vtag = sctp_select_a_tag(stcb->sctp_ep, stcb->sctp_ep->sctp_lport, stcb->rport, 1);
 		head = &SCTP_BASE_INFO(sctp_asochash)[SCTP_PCBHASH_ASOC(stcb->asoc.my_vtag, SCTP_BASE_INFO(hashasocmark))];
 		/*
 		 * put it in the bucket in the vtag hash of assoc's for the
 		 * system
 		 */
 		LIST_INSERT_HEAD(head, stcb, sctp_asocs);
 		sctp_send_initiate(stcb->sctp_ep, stcb, SCTP_SO_NOT_LOCKED);
 		return (1);
 	}
 	return (0);
 }
 
 static int
 sctp_handle_nat_missing_state(struct sctp_tcb *stcb,
     struct sctp_nets *net)
 {
 	/*
 	 * return 0 means we want you to proceed with the abort non-zero
 	 * means no abort processing
 	 */
 	if (stcb->asoc.auth_supported == 0) {
 		SCTPDBG(SCTP_DEBUG_INPUT2, "sctp_handle_nat_missing_state: Peer does not support AUTH, cannot send an asconf\n");
 		return (0);
 	}
 	sctp_asconf_send_nat_state_update(stcb, net);
 	return (1);
 }
 
 
 static void
 sctp_handle_abort(struct sctp_abort_chunk *abort,
     struct sctp_tcb *stcb, struct sctp_nets *net)
 {
 #if defined(__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
 	struct socket *so;
 #endif
 	uint16_t len;
 	uint16_t error;
 
 	SCTPDBG(SCTP_DEBUG_INPUT2, "sctp_handle_abort: handling ABORT\n");
 	if (stcb == NULL)
 		return;
 
 	len = ntohs(abort->ch.chunk_length);
 	if (len > sizeof(struct sctp_chunkhdr)) {
 		/*
 		 * Need to check the cause codes for our two magic nat
 		 * aborts which don't kill the assoc necessarily.
 		 */
 		struct sctp_gen_error_cause *cause;
 
 		cause = (struct sctp_gen_error_cause *)(abort + 1);
 		error = ntohs(cause->code);
 		if (error == SCTP_CAUSE_NAT_COLLIDING_STATE) {
 			SCTPDBG(SCTP_DEBUG_INPUT2, "Received Colliding state abort flags:%x\n",
 			    abort->ch.chunk_flags);
 			if (sctp_handle_nat_colliding_state(stcb)) {
 				return;
 			}
 		} else if (error == SCTP_CAUSE_NAT_MISSING_STATE) {
 			SCTPDBG(SCTP_DEBUG_INPUT2, "Received missing state abort flags:%x\n",
 			    abort->ch.chunk_flags);
 			if (sctp_handle_nat_missing_state(stcb, net)) {
 				return;
 			}
 		}
 	} else {
 		error = 0;
 	}
 	/* stop any receive timers */
 	sctp_timer_stop(SCTP_TIMER_TYPE_RECV, stcb->sctp_ep, stcb, net,
 	    SCTP_FROM_SCTP_INPUT + SCTP_LOC_7);
 	/* notify user of the abort and clean up... */
 	sctp_abort_notification(stcb, 1, error, abort, SCTP_SO_NOT_LOCKED);
 	/* free the tcb */
 	SCTP_STAT_INCR_COUNTER32(sctps_aborted);
 	if ((SCTP_GET_STATE(&stcb->asoc) == SCTP_STATE_OPEN) ||
 	    (SCTP_GET_STATE(&stcb->asoc) == SCTP_STATE_SHUTDOWN_RECEIVED)) {
 		SCTP_STAT_DECR_GAUGE32(sctps_currestab);
 	}
 #ifdef SCTP_ASOCLOG_OF_TSNS
 	sctp_print_out_track_log(stcb);
 #endif
 #if defined(__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
 	so = SCTP_INP_SO(stcb->sctp_ep);
 	atomic_add_int(&stcb->asoc.refcnt, 1);
 	SCTP_TCB_UNLOCK(stcb);
 	SCTP_SOCKET_LOCK(so, 1);
 	SCTP_TCB_LOCK(stcb);
 	atomic_subtract_int(&stcb->asoc.refcnt, 1);
 #endif
 	stcb->asoc.state |= SCTP_STATE_WAS_ABORTED;
 	(void)sctp_free_assoc(stcb->sctp_ep, stcb, SCTP_NORMAL_PROC,
 	    SCTP_FROM_SCTP_INPUT + SCTP_LOC_8);
 #if defined(__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
 	SCTP_SOCKET_UNLOCK(so, 1);
 #endif
 	SCTPDBG(SCTP_DEBUG_INPUT2, "sctp_handle_abort: finished\n");
 }
 
 static void
 sctp_start_net_timers(struct sctp_tcb *stcb)
 {
 	uint32_t cnt_hb_sent;
 	struct sctp_nets *net;
 
 	cnt_hb_sent = 0;
 	TAILQ_FOREACH(net, &stcb->asoc.nets, sctp_next) {
 		/*
 		 * For each network start: 1) A pmtu timer. 2) A HB timer 3)
 		 * If the dest in unconfirmed send a hb as well if under
 		 * max_hb_burst have been sent.
 		 */
 		sctp_timer_start(SCTP_TIMER_TYPE_PATHMTURAISE, stcb->sctp_ep, stcb, net);
 		sctp_timer_start(SCTP_TIMER_TYPE_HEARTBEAT, stcb->sctp_ep, stcb, net);
 		if ((net->dest_state & SCTP_ADDR_UNCONFIRMED) &&
 		    (cnt_hb_sent < SCTP_BASE_SYSCTL(sctp_hb_maxburst))) {
 			sctp_send_hb(stcb, net, SCTP_SO_NOT_LOCKED);
 			cnt_hb_sent++;
 		}
 	}
 	if (cnt_hb_sent) {
 		sctp_chunk_output(stcb->sctp_ep, stcb,
 		    SCTP_OUTPUT_FROM_COOKIE_ACK,
 		    SCTP_SO_NOT_LOCKED);
 	}
 }
 
 
 static void
 sctp_handle_shutdown(struct sctp_shutdown_chunk *cp,
     struct sctp_tcb *stcb, struct sctp_nets *net, int *abort_flag)
 {
 	struct sctp_association *asoc;
 	int some_on_streamwheel;
 	int old_state;
 #if defined(__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
 	struct socket *so;
 #endif
 
 	SCTPDBG(SCTP_DEBUG_INPUT2,
 	    "sctp_handle_shutdown: handling SHUTDOWN\n");
 	if (stcb == NULL)
 		return;
 	asoc = &stcb->asoc;
 	if ((SCTP_GET_STATE(asoc) == SCTP_STATE_COOKIE_WAIT) ||
 	    (SCTP_GET_STATE(asoc) == SCTP_STATE_COOKIE_ECHOED)) {
 		return;
 	}
 	if (ntohs(cp->ch.chunk_length) != sizeof(struct sctp_shutdown_chunk)) {
 		/* Shutdown NOT the expected size */
 		return;
 	}
 	old_state = SCTP_GET_STATE(asoc);
 	sctp_update_acked(stcb, cp, abort_flag);
 	if (*abort_flag) {
 		return;
 	}
 	if (asoc->control_pdapi) {
 		/*
 		 * With a normal shutdown we assume the end of last record.
 		 */
 		SCTP_INP_READ_LOCK(stcb->sctp_ep);
 		if (asoc->control_pdapi->on_strm_q) {
 			struct sctp_stream_in *strm;
 
 			strm = &asoc->strmin[asoc->control_pdapi->sinfo_stream];
 			if (asoc->control_pdapi->on_strm_q == SCTP_ON_UNORDERED) {
 				/* Unordered */
 				TAILQ_REMOVE(&strm->uno_inqueue, asoc->control_pdapi, next_instrm);
 				asoc->control_pdapi->on_strm_q = 0;
 			} else if (asoc->control_pdapi->on_strm_q == SCTP_ON_ORDERED) {
 				/* Ordered */
 				TAILQ_REMOVE(&strm->inqueue, asoc->control_pdapi, next_instrm);
 				asoc->control_pdapi->on_strm_q = 0;
 #ifdef INVARIANTS
 			} else {
 				panic("Unknown state on ctrl:%p on_strm_q:%d",
 				    asoc->control_pdapi,
 				    asoc->control_pdapi->on_strm_q);
 #endif
 			}
 		}
 		asoc->control_pdapi->end_added = 1;
 		asoc->control_pdapi->pdapi_aborted = 1;
 		asoc->control_pdapi = NULL;
 		SCTP_INP_READ_UNLOCK(stcb->sctp_ep);
 #if defined(__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
 		so = SCTP_INP_SO(stcb->sctp_ep);
 		atomic_add_int(&stcb->asoc.refcnt, 1);
 		SCTP_TCB_UNLOCK(stcb);
 		SCTP_SOCKET_LOCK(so, 1);
 		SCTP_TCB_LOCK(stcb);
 		atomic_subtract_int(&stcb->asoc.refcnt, 1);
 		if (stcb->asoc.state & SCTP_STATE_CLOSED_SOCKET) {
 			/* assoc was freed while we were unlocked */
 			SCTP_SOCKET_UNLOCK(so, 1);
 			return;
 		}
 #endif
 		if (stcb->sctp_socket) {
 			sctp_sorwakeup(stcb->sctp_ep, stcb->sctp_socket);
 		}
 #if defined(__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
 		SCTP_SOCKET_UNLOCK(so, 1);
 #endif
 	}
 	/* goto SHUTDOWN_RECEIVED state to block new requests */
 	if (stcb->sctp_socket) {
 		if ((SCTP_GET_STATE(asoc) != SCTP_STATE_SHUTDOWN_RECEIVED) &&
 		    (SCTP_GET_STATE(asoc) != SCTP_STATE_SHUTDOWN_ACK_SENT) &&
 		    (SCTP_GET_STATE(asoc) != SCTP_STATE_SHUTDOWN_SENT)) {
 			SCTP_SET_STATE(asoc, SCTP_STATE_SHUTDOWN_RECEIVED);
 			SCTP_CLEAR_SUBSTATE(asoc, SCTP_STATE_SHUTDOWN_PENDING);
 			/*
 			 * notify upper layer that peer has initiated a
 			 * shutdown
 			 */
 			sctp_ulp_notify(SCTP_NOTIFY_PEER_SHUTDOWN, stcb, 0, NULL, SCTP_SO_NOT_LOCKED);
 
 			/* reset time */
 			(void)SCTP_GETTIME_TIMEVAL(&asoc->time_entered);
 		}
 	}
 	if (SCTP_GET_STATE(asoc) == SCTP_STATE_SHUTDOWN_SENT) {
 		/*
 		 * stop the shutdown timer, since we WILL move to
 		 * SHUTDOWN-ACK-SENT.
 		 */
 		sctp_timer_stop(SCTP_TIMER_TYPE_SHUTDOWN, stcb->sctp_ep, stcb,
 		    net, SCTP_FROM_SCTP_INPUT + SCTP_LOC_9);
 	}
 	/* Now is there unsent data on a stream somewhere? */
 	some_on_streamwheel = sctp_is_there_unsent_data(stcb, SCTP_SO_NOT_LOCKED);
 
 	if (!TAILQ_EMPTY(&asoc->send_queue) ||
 	    !TAILQ_EMPTY(&asoc->sent_queue) ||
 	    some_on_streamwheel) {
 		/* By returning we will push more data out */
 		return;
 	} else {
 		/* no outstanding data to send, so move on... */
 		/* send SHUTDOWN-ACK */
 		/* move to SHUTDOWN-ACK-SENT state */
 		if ((SCTP_GET_STATE(asoc) == SCTP_STATE_OPEN) ||
 		    (SCTP_GET_STATE(asoc) == SCTP_STATE_SHUTDOWN_RECEIVED)) {
 			SCTP_STAT_DECR_GAUGE32(sctps_currestab);
 		}
 		SCTP_CLEAR_SUBSTATE(asoc, SCTP_STATE_SHUTDOWN_PENDING);
 		if (SCTP_GET_STATE(asoc) != SCTP_STATE_SHUTDOWN_ACK_SENT) {
 			SCTP_SET_STATE(asoc, SCTP_STATE_SHUTDOWN_ACK_SENT);
 			sctp_stop_timers_for_shutdown(stcb);
 			sctp_send_shutdown_ack(stcb, net);
 			sctp_timer_start(SCTP_TIMER_TYPE_SHUTDOWNACK,
 			    stcb->sctp_ep, stcb, net);
 		} else if (old_state == SCTP_STATE_SHUTDOWN_ACK_SENT) {
 			sctp_send_shutdown_ack(stcb, net);
 		}
 	}
 }
 
 static void
 sctp_handle_shutdown_ack(struct sctp_shutdown_ack_chunk *cp SCTP_UNUSED,
     struct sctp_tcb *stcb,
     struct sctp_nets *net)
 {
 	struct sctp_association *asoc;
 #if defined(__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
 	struct socket *so;
 
 	so = SCTP_INP_SO(stcb->sctp_ep);
 #endif
 	SCTPDBG(SCTP_DEBUG_INPUT2,
 	    "sctp_handle_shutdown_ack: handling SHUTDOWN ACK\n");
 	if (stcb == NULL)
 		return;
 
 	asoc = &stcb->asoc;
 	/* process according to association state */
 	if ((SCTP_GET_STATE(asoc) == SCTP_STATE_COOKIE_WAIT) ||
 	    (SCTP_GET_STATE(asoc) == SCTP_STATE_COOKIE_ECHOED)) {
 		/* unexpected SHUTDOWN-ACK... do OOTB handling... */
 		sctp_send_shutdown_complete(stcb, net, 1);
 		SCTP_TCB_UNLOCK(stcb);
 		return;
 	}
 	if ((SCTP_GET_STATE(asoc) != SCTP_STATE_SHUTDOWN_SENT) &&
 	    (SCTP_GET_STATE(asoc) != SCTP_STATE_SHUTDOWN_ACK_SENT)) {
 		/* unexpected SHUTDOWN-ACK... so ignore... */
 		SCTP_TCB_UNLOCK(stcb);
 		return;
 	}
 	if (asoc->control_pdapi) {
 		/*
 		 * With a normal shutdown we assume the end of last record.
 		 */
 		SCTP_INP_READ_LOCK(stcb->sctp_ep);
 		asoc->control_pdapi->end_added = 1;
 		asoc->control_pdapi->pdapi_aborted = 1;
 		asoc->control_pdapi = NULL;
 		SCTP_INP_READ_UNLOCK(stcb->sctp_ep);
 #if defined(__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
 		atomic_add_int(&stcb->asoc.refcnt, 1);
 		SCTP_TCB_UNLOCK(stcb);
 		SCTP_SOCKET_LOCK(so, 1);
 		SCTP_TCB_LOCK(stcb);
 		atomic_subtract_int(&stcb->asoc.refcnt, 1);
 		if (stcb->asoc.state & SCTP_STATE_CLOSED_SOCKET) {
 			/* assoc was freed while we were unlocked */
 			SCTP_SOCKET_UNLOCK(so, 1);
 			return;
 		}
 #endif
 		sctp_sorwakeup(stcb->sctp_ep, stcb->sctp_socket);
 #if defined(__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
 		SCTP_SOCKET_UNLOCK(so, 1);
 #endif
 	}
 #ifdef INVARIANTS
 	if (!TAILQ_EMPTY(&asoc->send_queue) ||
 	    !TAILQ_EMPTY(&asoc->sent_queue) ||
 	    sctp_is_there_unsent_data(stcb, SCTP_SO_NOT_LOCKED)) {
 		panic("Queues are not empty when handling SHUTDOWN-ACK");
 	}
 #endif
 	/* stop the timer */
 	sctp_timer_stop(SCTP_TIMER_TYPE_SHUTDOWN, stcb->sctp_ep, stcb, net,
 	    SCTP_FROM_SCTP_INPUT + SCTP_LOC_10);
 	/* send SHUTDOWN-COMPLETE */
 	sctp_send_shutdown_complete(stcb, net, 0);
 	/* notify upper layer protocol */
 	if (stcb->sctp_socket) {
 		if ((stcb->sctp_ep->sctp_flags & SCTP_PCB_FLAGS_TCPTYPE) ||
 		    (stcb->sctp_ep->sctp_flags & SCTP_PCB_FLAGS_IN_TCPPOOL)) {
 			stcb->sctp_socket->so_snd.sb_cc = 0;
 		}
 		sctp_ulp_notify(SCTP_NOTIFY_ASSOC_DOWN, stcb, 0, NULL, SCTP_SO_NOT_LOCKED);
 	}
 	SCTP_STAT_INCR_COUNTER32(sctps_shutdown);
 	/* free the TCB but first save off the ep */
 #if defined(__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
 	atomic_add_int(&stcb->asoc.refcnt, 1);
 	SCTP_TCB_UNLOCK(stcb);
 	SCTP_SOCKET_LOCK(so, 1);
 	SCTP_TCB_LOCK(stcb);
 	atomic_subtract_int(&stcb->asoc.refcnt, 1);
 #endif
 	(void)sctp_free_assoc(stcb->sctp_ep, stcb, SCTP_NORMAL_PROC,
 	    SCTP_FROM_SCTP_INPUT + SCTP_LOC_11);
 #if defined(__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
 	SCTP_SOCKET_UNLOCK(so, 1);
 #endif
 }
 
 /*
  * Skip past the param header and then we will find the chunk that caused the
  * problem. There are two possibilities ASCONF or FWD-TSN other than that and
  * our peer must be broken.
  */
 static void
 sctp_process_unrecog_chunk(struct sctp_tcb *stcb, struct sctp_paramhdr *phdr,
     struct sctp_nets *net)
 {
 	struct sctp_chunkhdr *chk;
 
 	chk = (struct sctp_chunkhdr *)((caddr_t)phdr + sizeof(*phdr));
 	switch (chk->chunk_type) {
 	case SCTP_ASCONF_ACK:
 	case SCTP_ASCONF:
 		sctp_asconf_cleanup(stcb, net);
 		break;
 	case SCTP_IFORWARD_CUM_TSN:
 	case SCTP_FORWARD_CUM_TSN:
 		stcb->asoc.prsctp_supported = 0;
 		break;
 	default:
 		SCTPDBG(SCTP_DEBUG_INPUT2,
 		    "Peer does not support chunk type %d(%x)??\n",
 		    chk->chunk_type, (uint32_t)chk->chunk_type);
 		break;
 	}
 }
 
 /*
  * Skip past the param header and then we will find the param that caused the
  * problem.  There are a number of param's in a ASCONF OR the prsctp param
  * these will turn of specific features.
  * XXX: Is this the right thing to do?
  */
 static void
 sctp_process_unrecog_param(struct sctp_tcb *stcb, struct sctp_paramhdr *phdr)
 {
 	struct sctp_paramhdr *pbad;
 
 	pbad = phdr + 1;
 	switch (ntohs(pbad->param_type)) {
 		/* pr-sctp draft */
 	case SCTP_PRSCTP_SUPPORTED:
 		stcb->asoc.prsctp_supported = 0;
 		break;
 	case SCTP_SUPPORTED_CHUNK_EXT:
 		break;
 		/* draft-ietf-tsvwg-addip-sctp */
 	case SCTP_HAS_NAT_SUPPORT:
 		stcb->asoc.peer_supports_nat = 0;
 		break;
 	case SCTP_ADD_IP_ADDRESS:
 	case SCTP_DEL_IP_ADDRESS:
 	case SCTP_SET_PRIM_ADDR:
 		stcb->asoc.asconf_supported = 0;
 		break;
 	case SCTP_SUCCESS_REPORT:
 	case SCTP_ERROR_CAUSE_IND:
 		SCTPDBG(SCTP_DEBUG_INPUT2, "Huh, the peer does not support success? or error cause?\n");
 		SCTPDBG(SCTP_DEBUG_INPUT2,
 		    "Turning off ASCONF to this strange peer\n");
 		stcb->asoc.asconf_supported = 0;
 		break;
 	default:
 		SCTPDBG(SCTP_DEBUG_INPUT2,
 		    "Peer does not support param type %d(%x)??\n",
 		    pbad->param_type, (uint32_t)pbad->param_type);
 		break;
 	}
 }
 
 static int
 sctp_handle_error(struct sctp_chunkhdr *ch,
     struct sctp_tcb *stcb, struct sctp_nets *net)
 {
 	int chklen;
 	struct sctp_paramhdr *phdr;
 	uint16_t error, error_type;
 	uint16_t error_len;
 	struct sctp_association *asoc;
 	int adjust;
 #if defined(__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
 	struct socket *so;
 #endif
 
 	/* parse through all of the errors and process */
 	asoc = &stcb->asoc;
 	phdr = (struct sctp_paramhdr *)((caddr_t)ch +
 	    sizeof(struct sctp_chunkhdr));
 	chklen = ntohs(ch->chunk_length) - sizeof(struct sctp_chunkhdr);
 	error = 0;
 	while ((size_t)chklen >= sizeof(struct sctp_paramhdr)) {
 		/* Process an Error Cause */
 		error_type = ntohs(phdr->param_type);
 		error_len = ntohs(phdr->param_length);
 		if ((error_len > chklen) || (error_len == 0)) {
 			/* invalid param length for this param */
 			SCTPDBG(SCTP_DEBUG_INPUT1, "Bogus length in error param- chunk left:%d errorlen:%d\n",
 			    chklen, error_len);
 			return (0);
 		}
 		if (error == 0) {
 			/* report the first error cause */
 			error = error_type;
 		}
 		switch (error_type) {
 		case SCTP_CAUSE_INVALID_STREAM:
 		case SCTP_CAUSE_MISSING_PARAM:
 		case SCTP_CAUSE_INVALID_PARAM:
 		case SCTP_CAUSE_NO_USER_DATA:
 			SCTPDBG(SCTP_DEBUG_INPUT1, "Software error we got a %d back? We have a bug :/ (or do they?)\n",
 			    error_type);
 			break;
 		case SCTP_CAUSE_NAT_COLLIDING_STATE:
 			SCTPDBG(SCTP_DEBUG_INPUT2, "Received Colliding state abort flags:%x\n",
 			    ch->chunk_flags);
 			if (sctp_handle_nat_colliding_state(stcb)) {
 				return (0);
 			}
 			break;
 		case SCTP_CAUSE_NAT_MISSING_STATE:
 			SCTPDBG(SCTP_DEBUG_INPUT2, "Received missing state abort flags:%x\n",
 			    ch->chunk_flags);
 			if (sctp_handle_nat_missing_state(stcb, net)) {
 				return (0);
 			}
 			break;
 		case SCTP_CAUSE_STALE_COOKIE:
 			/*
 			 * We only act if we have echoed a cookie and are
 			 * waiting.
 			 */
 			if (SCTP_GET_STATE(asoc) == SCTP_STATE_COOKIE_ECHOED) {
 				int *p;
 
 				p = (int *)((caddr_t)phdr + sizeof(*phdr));
 				/* Save the time doubled */
 				asoc->cookie_preserve_req = ntohl(*p) << 1;
 				asoc->stale_cookie_count++;
 				if (asoc->stale_cookie_count >
 				    asoc->max_init_times) {
 					sctp_abort_notification(stcb, 0, 0, NULL, SCTP_SO_NOT_LOCKED);
 					/* now free the asoc */
 #if defined(__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
 					so = SCTP_INP_SO(stcb->sctp_ep);
 					atomic_add_int(&stcb->asoc.refcnt, 1);
 					SCTP_TCB_UNLOCK(stcb);
 					SCTP_SOCKET_LOCK(so, 1);
 					SCTP_TCB_LOCK(stcb);
 					atomic_subtract_int(&stcb->asoc.refcnt, 1);
 #endif
 					(void)sctp_free_assoc(stcb->sctp_ep, stcb, SCTP_NORMAL_PROC,
 					    SCTP_FROM_SCTP_INPUT + SCTP_LOC_12);
 #if defined(__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
 					SCTP_SOCKET_UNLOCK(so, 1);
 #endif
 					return (-1);
 				}
 				/* blast back to INIT state */
 				sctp_toss_old_cookies(stcb, &stcb->asoc);
 				asoc->state &= ~SCTP_STATE_COOKIE_ECHOED;
 				asoc->state |= SCTP_STATE_COOKIE_WAIT;
 				sctp_stop_all_cookie_timers(stcb);
 				sctp_send_initiate(stcb->sctp_ep, stcb, SCTP_SO_NOT_LOCKED);
 			}
 			break;
 		case SCTP_CAUSE_UNRESOLVABLE_ADDR:
 			/*
 			 * Nothing we can do here, we don't do hostname
 			 * addresses so if the peer does not like my IPv6
 			 * (or IPv4 for that matter) it does not matter. If
 			 * they don't support that type of address, they can
 			 * NOT possibly get that packet type... i.e. with no
 			 * IPv6 you can't receive a IPv6 packet. so we can
 			 * safely ignore this one. If we ever added support
 			 * for HOSTNAME Addresses, then we would need to do
 			 * something here.
 			 */
 			break;
 		case SCTP_CAUSE_UNRECOG_CHUNK:
 			sctp_process_unrecog_chunk(stcb, phdr, net);
 			break;
 		case SCTP_CAUSE_UNRECOG_PARAM:
 			sctp_process_unrecog_param(stcb, phdr);
 			break;
 		case SCTP_CAUSE_COOKIE_IN_SHUTDOWN:
 			/*
 			 * We ignore this since the timer will drive out a
 			 * new cookie anyway and there timer will drive us
 			 * to send a SHUTDOWN_COMPLETE. We can't send one
 			 * here since we don't have their tag.
 			 */
 			break;
 		case SCTP_CAUSE_DELETING_LAST_ADDR:
 		case SCTP_CAUSE_RESOURCE_SHORTAGE:
 		case SCTP_CAUSE_DELETING_SRC_ADDR:
 			/*
 			 * We should NOT get these here, but in a
 			 * ASCONF-ACK.
 			 */
 			SCTPDBG(SCTP_DEBUG_INPUT2, "Peer sends ASCONF errors in a Operational Error?<%d>?\n",
 			    error_type);
 			break;
 		case SCTP_CAUSE_OUT_OF_RESC:
 			/*
 			 * And what, pray tell do we do with the fact that
 			 * the peer is out of resources? Not really sure we
 			 * could do anything but abort. I suspect this
 			 * should have came WITH an abort instead of in a
 			 * OP-ERROR.
 			 */
 			break;
 		default:
 			SCTPDBG(SCTP_DEBUG_INPUT1, "sctp_handle_error: unknown error type = 0x%xh\n",
 			    error_type);
 			break;
 		}
 		adjust = SCTP_SIZE32(error_len);
 		chklen -= adjust;
 		phdr = (struct sctp_paramhdr *)((caddr_t)phdr + adjust);
 	}
 	sctp_ulp_notify(SCTP_NOTIFY_REMOTE_ERROR, stcb, error, ch, SCTP_SO_NOT_LOCKED);
 	return (0);
 }
 
 static int
 sctp_handle_init_ack(struct mbuf *m, int iphlen, int offset,
     struct sockaddr *src, struct sockaddr *dst, struct sctphdr *sh,
     struct sctp_init_ack_chunk *cp, struct sctp_tcb *stcb,
     struct sctp_nets *net, int *abort_no_unlock,
     uint8_t mflowtype, uint32_t mflowid,
     uint32_t vrf_id)
 {
 	struct sctp_init_ack *init_ack;
 	struct mbuf *op_err;
 
 	SCTPDBG(SCTP_DEBUG_INPUT2,
 	    "sctp_handle_init_ack: handling INIT-ACK\n");
 
 	if (stcb == NULL) {
 		SCTPDBG(SCTP_DEBUG_INPUT2,
 		    "sctp_handle_init_ack: TCB is null\n");
 		return (-1);
 	}
 	if (ntohs(cp->ch.chunk_length) < sizeof(struct sctp_init_ack_chunk)) {
 		/* Invalid length */
 		op_err = sctp_generate_cause(SCTP_CAUSE_INVALID_PARAM, "");
 		sctp_abort_association(stcb->sctp_ep, stcb, m, iphlen,
 		    src, dst, sh, op_err,
 		    mflowtype, mflowid,
 		    vrf_id, net->port);
 		*abort_no_unlock = 1;
 		return (-1);
 	}
 	init_ack = &cp->init;
 	/* validate parameters */
 	if (init_ack->initiate_tag == 0) {
 		/* protocol error... send an abort */
 		op_err = sctp_generate_cause(SCTP_CAUSE_INVALID_PARAM, "");
 		sctp_abort_association(stcb->sctp_ep, stcb, m, iphlen,
 		    src, dst, sh, op_err,
 		    mflowtype, mflowid,
 		    vrf_id, net->port);
 		*abort_no_unlock = 1;
 		return (-1);
 	}
 	if (ntohl(init_ack->a_rwnd) < SCTP_MIN_RWND) {
 		/* protocol error... send an abort */
 		op_err = sctp_generate_cause(SCTP_CAUSE_INVALID_PARAM, "");
 		sctp_abort_association(stcb->sctp_ep, stcb, m, iphlen,
 		    src, dst, sh, op_err,
 		    mflowtype, mflowid,
 		    vrf_id, net->port);
 		*abort_no_unlock = 1;
 		return (-1);
 	}
 	if (init_ack->num_inbound_streams == 0) {
 		/* protocol error... send an abort */
 		op_err = sctp_generate_cause(SCTP_CAUSE_INVALID_PARAM, "");
 		sctp_abort_association(stcb->sctp_ep, stcb, m, iphlen,
 		    src, dst, sh, op_err,
 		    mflowtype, mflowid,
 		    vrf_id, net->port);
 		*abort_no_unlock = 1;
 		return (-1);
 	}
 	if (init_ack->num_outbound_streams == 0) {
 		/* protocol error... send an abort */
 		op_err = sctp_generate_cause(SCTP_CAUSE_INVALID_PARAM, "");
 		sctp_abort_association(stcb->sctp_ep, stcb, m, iphlen,
 		    src, dst, sh, op_err,
 		    mflowtype, mflowid,
 		    vrf_id, net->port);
 		*abort_no_unlock = 1;
 		return (-1);
 	}
 	/* process according to association state... */
 	switch (stcb->asoc.state & SCTP_STATE_MASK) {
 	case SCTP_STATE_COOKIE_WAIT:
 		/* this is the expected state for this chunk */
 		/* process the INIT-ACK parameters */
 		if (stcb->asoc.primary_destination->dest_state &
 		    SCTP_ADDR_UNCONFIRMED) {
 			/*
 			 * The primary is where we sent the INIT, we can
 			 * always consider it confirmed when the INIT-ACK is
 			 * returned. Do this before we load addresses
 			 * though.
 			 */
 			stcb->asoc.primary_destination->dest_state &=
 			    ~SCTP_ADDR_UNCONFIRMED;
 			sctp_ulp_notify(SCTP_NOTIFY_INTERFACE_CONFIRMED,
 			    stcb, 0, (void *)stcb->asoc.primary_destination, SCTP_SO_NOT_LOCKED);
 		}
 		if (sctp_process_init_ack(m, iphlen, offset, src, dst, sh, cp, stcb,
 		    net, abort_no_unlock,
 		    mflowtype, mflowid,
 		    vrf_id) < 0) {
 			/* error in parsing parameters */
 			return (-1);
 		}
 		/* update our state */
 		SCTPDBG(SCTP_DEBUG_INPUT2, "moving to COOKIE-ECHOED state\n");
 		SCTP_SET_STATE(&stcb->asoc, SCTP_STATE_COOKIE_ECHOED);
 
 		/* reset the RTO calc */
 		if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_THRESHOLD_LOGGING) {
 			sctp_misc_ints(SCTP_THRESHOLD_CLEAR,
 			    stcb->asoc.overall_error_count,
 			    0,
 			    SCTP_FROM_SCTP_INPUT,
 			    __LINE__);
 		}
 		stcb->asoc.overall_error_count = 0;
 		(void)SCTP_GETTIME_TIMEVAL(&stcb->asoc.time_entered);
 		/*
 		 * collapse the init timer back in case of a exponential
 		 * backoff
 		 */
 		sctp_timer_start(SCTP_TIMER_TYPE_COOKIE, stcb->sctp_ep,
 		    stcb, net);
 		/*
 		 * the send at the end of the inbound data processing will
 		 * cause the cookie to be sent
 		 */
 		break;
 	case SCTP_STATE_SHUTDOWN_SENT:
 		/* incorrect state... discard */
 		break;
 	case SCTP_STATE_COOKIE_ECHOED:
 		/* incorrect state... discard */
 		break;
 	case SCTP_STATE_OPEN:
 		/* incorrect state... discard */
 		break;
 	case SCTP_STATE_EMPTY:
 	case SCTP_STATE_INUSE:
 	default:
 		/* incorrect state... discard */
 		return (-1);
 		break;
 	}
 	SCTPDBG(SCTP_DEBUG_INPUT1, "Leaving handle-init-ack end\n");
 	return (0);
 }
 
 static struct sctp_tcb *
 sctp_process_cookie_new(struct mbuf *m, int iphlen, int offset,
     struct sockaddr *src, struct sockaddr *dst,
     struct sctphdr *sh, struct sctp_state_cookie *cookie, int cookie_len,
     struct sctp_inpcb *inp, struct sctp_nets **netp,
     struct sockaddr *init_src, int *notification,
     int auth_skipped, uint32_t auth_offset, uint32_t auth_len,
     uint8_t mflowtype, uint32_t mflowid,
     uint32_t vrf_id, uint16_t port);
 
 
 /*
  * handle a state cookie for an existing association m: input packet mbuf
  * chain-- assumes a pullup on IP/SCTP/COOKIE-ECHO chunk note: this is a
  * "split" mbuf and the cookie signature does not exist offset: offset into
  * mbuf to the cookie-echo chunk
  */
 static struct sctp_tcb *
 sctp_process_cookie_existing(struct mbuf *m, int iphlen, int offset,
     struct sockaddr *src, struct sockaddr *dst,
     struct sctphdr *sh, struct sctp_state_cookie *cookie, int cookie_len,
     struct sctp_inpcb *inp, struct sctp_tcb *stcb, struct sctp_nets **netp,
     struct sockaddr *init_src, int *notification,
     int auth_skipped, uint32_t auth_offset, uint32_t auth_len,
     uint8_t mflowtype, uint32_t mflowid,
     uint32_t vrf_id, uint16_t port)
 {
 	struct sctp_association *asoc;
 	struct sctp_init_chunk *init_cp, init_buf;
 	struct sctp_init_ack_chunk *initack_cp, initack_buf;
 	struct sctp_nets *net;
 	struct mbuf *op_err;
 	int init_offset, initack_offset, i;
 	int retval;
 	int spec_flag = 0;
 	uint32_t how_indx;
 #if defined(SCTP_DETAILED_STR_STATS)
 	int j;
 #endif
 
 	net = *netp;
 	/* I know that the TCB is non-NULL from the caller */
 	asoc = &stcb->asoc;
 	for (how_indx = 0; how_indx < sizeof(asoc->cookie_how); how_indx++) {
 		if (asoc->cookie_how[how_indx] == 0)
 			break;
 	}
 	if (how_indx < sizeof(asoc->cookie_how)) {
 		asoc->cookie_how[how_indx] = 1;
 	}
 	if (SCTP_GET_STATE(asoc) == SCTP_STATE_SHUTDOWN_ACK_SENT) {
 		/* SHUTDOWN came in after sending INIT-ACK */
 		sctp_send_shutdown_ack(stcb, stcb->asoc.primary_destination);
 		op_err = sctp_generate_cause(SCTP_CAUSE_COOKIE_IN_SHUTDOWN, "");
 		sctp_send_operr_to(src, dst, sh, cookie->peers_vtag, op_err,
 		    mflowtype, mflowid, inp->fibnum,
 		    vrf_id, net->port);
 		if (how_indx < sizeof(asoc->cookie_how))
 			asoc->cookie_how[how_indx] = 2;
 		return (NULL);
 	}
 	/*
 	 * find and validate the INIT chunk in the cookie (peer's info) the
 	 * INIT should start after the cookie-echo header struct (chunk
 	 * header, state cookie header struct)
 	 */
 	init_offset = offset += sizeof(struct sctp_cookie_echo_chunk);
 
 	init_cp = (struct sctp_init_chunk *)
 	    sctp_m_getptr(m, init_offset, sizeof(struct sctp_init_chunk),
 	    (uint8_t *)&init_buf);
 	if (init_cp == NULL) {
 		/* could not pull a INIT chunk in cookie */
 		return (NULL);
 	}
 	if (init_cp->ch.chunk_type != SCTP_INITIATION) {
 		return (NULL);
 	}
 	/*
 	 * find and validate the INIT-ACK chunk in the cookie (my info) the
 	 * INIT-ACK follows the INIT chunk
 	 */
 	initack_offset = init_offset + SCTP_SIZE32(ntohs(init_cp->ch.chunk_length));
 	initack_cp = (struct sctp_init_ack_chunk *)
 	    sctp_m_getptr(m, initack_offset, sizeof(struct sctp_init_ack_chunk),
 	    (uint8_t *)&initack_buf);
 	if (initack_cp == NULL) {
 		/* could not pull INIT-ACK chunk in cookie */
 		return (NULL);
 	}
 	if (initack_cp->ch.chunk_type != SCTP_INITIATION_ACK) {
 		return (NULL);
 	}
 	if ((ntohl(initack_cp->init.initiate_tag) == asoc->my_vtag) &&
 	    (ntohl(init_cp->init.initiate_tag) == asoc->peer_vtag)) {
 		/*
 		 * case D in Section 5.2.4 Table 2: MMAA process accordingly
 		 * to get into the OPEN state
 		 */
 		if (ntohl(initack_cp->init.initial_tsn) != asoc->init_seq_number) {
 			/*-
 			 * Opps, this means that we somehow generated two vtag's
 			 * the same. I.e. we did:
 			 *  Us               Peer
 			 *   <---INIT(tag=a)------
 			 *   ----INIT-ACK(tag=t)-->
 			 *   ----INIT(tag=t)------> *1
 			 *   <---INIT-ACK(tag=a)---
                          *   <----CE(tag=t)------------- *2
 			 *
 			 * At point *1 we should be generating a different
 			 * tag t'. Which means we would throw away the CE and send
 			 * ours instead. Basically this is case C (throw away side).
 			 */
 			if (how_indx < sizeof(asoc->cookie_how))
 				asoc->cookie_how[how_indx] = 17;
 			return (NULL);
 
 		}
 		switch (SCTP_GET_STATE(asoc)) {
 		case SCTP_STATE_COOKIE_WAIT:
 		case SCTP_STATE_COOKIE_ECHOED:
 			/*
 			 * INIT was sent but got a COOKIE_ECHO with the
 			 * correct tags... just accept it...but we must
 			 * process the init so that we can make sure we have
 			 * the right seq no's.
 			 */
 			/* First we must process the INIT !! */
 			retval = sctp_process_init(init_cp, stcb);
 			if (retval < 0) {
 				if (how_indx < sizeof(asoc->cookie_how))
 					asoc->cookie_how[how_indx] = 3;
 				return (NULL);
 			}
 			/* we have already processed the INIT so no problem */
 			sctp_timer_stop(SCTP_TIMER_TYPE_HEARTBEAT, inp,
 			    stcb, net,
 			    SCTP_FROM_SCTP_INPUT + SCTP_LOC_13);
 			sctp_timer_stop(SCTP_TIMER_TYPE_INIT, inp,
 			    stcb, net,
 			    SCTP_FROM_SCTP_INPUT + SCTP_LOC_14);
 			/* update current state */
 			if (SCTP_GET_STATE(asoc) == SCTP_STATE_COOKIE_ECHOED)
 				SCTP_STAT_INCR_COUNTER32(sctps_activeestab);
 			else
 				SCTP_STAT_INCR_COUNTER32(sctps_collisionestab);
 
 			SCTP_SET_STATE(asoc, SCTP_STATE_OPEN);
 			if (asoc->state & SCTP_STATE_SHUTDOWN_PENDING) {
 				sctp_timer_start(SCTP_TIMER_TYPE_SHUTDOWNGUARD,
 				    stcb->sctp_ep, stcb, asoc->primary_destination);
 			}
 			SCTP_STAT_INCR_GAUGE32(sctps_currestab);
 			sctp_stop_all_cookie_timers(stcb);
 			if (((stcb->sctp_ep->sctp_flags & SCTP_PCB_FLAGS_TCPTYPE) ||
 			    (stcb->sctp_ep->sctp_flags & SCTP_PCB_FLAGS_IN_TCPPOOL)) &&
 			    (!SCTP_IS_LISTENING(inp))) {
 #if defined(__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
 				struct socket *so;
 #endif
 				/*
 				 * Here is where collision would go if we
 				 * did a connect() and instead got a
 				 * init/init-ack/cookie done before the
 				 * init-ack came back..
 				 */
 				stcb->sctp_ep->sctp_flags |=
 				    SCTP_PCB_FLAGS_CONNECTED;
 #if defined(__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
 				so = SCTP_INP_SO(stcb->sctp_ep);
 				atomic_add_int(&stcb->asoc.refcnt, 1);
 				SCTP_TCB_UNLOCK(stcb);
 				SCTP_SOCKET_LOCK(so, 1);
 				SCTP_TCB_LOCK(stcb);
 				atomic_add_int(&stcb->asoc.refcnt, -1);
 				if (stcb->asoc.state & SCTP_STATE_CLOSED_SOCKET) {
 					SCTP_SOCKET_UNLOCK(so, 1);
 					return (NULL);
 				}
 #endif
 				soisconnected(stcb->sctp_socket);
 #if defined(__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
 				SCTP_SOCKET_UNLOCK(so, 1);
 #endif
 			}
 			/* notify upper layer */
 			*notification = SCTP_NOTIFY_ASSOC_UP;
 			/*
 			 * since we did not send a HB make sure we don't
 			 * double things
 			 */
 			net->hb_responded = 1;
 			net->RTO = sctp_calculate_rto(stcb, asoc, net,
 			    &cookie->time_entered,
 			    sctp_align_unsafe_makecopy,
 			    SCTP_RTT_FROM_NON_DATA);
 
 			if (stcb->asoc.sctp_autoclose_ticks &&
 			    (sctp_is_feature_on(inp, SCTP_PCB_FLAGS_AUTOCLOSE))) {
 				sctp_timer_start(SCTP_TIMER_TYPE_AUTOCLOSE,
 				    inp, stcb, NULL);
 			}
 			break;
 		default:
 			/*
 			 * we're in the OPEN state (or beyond), so peer must
 			 * have simply lost the COOKIE-ACK
 			 */
 			break;
 		}		/* end switch */
 		sctp_stop_all_cookie_timers(stcb);
 		/*
 		 * We ignore the return code here.. not sure if we should
 		 * somehow abort.. but we do have an existing asoc. This
 		 * really should not fail.
 		 */
 		if (sctp_load_addresses_from_init(stcb, m,
 		    init_offset + sizeof(struct sctp_init_chunk),
 		    initack_offset, src, dst, init_src, stcb->asoc.port)) {
 			if (how_indx < sizeof(asoc->cookie_how))
 				asoc->cookie_how[how_indx] = 4;
 			return (NULL);
 		}
 		/* respond with a COOKIE-ACK */
 		sctp_toss_old_cookies(stcb, asoc);
 		sctp_send_cookie_ack(stcb);
 		if (how_indx < sizeof(asoc->cookie_how))
 			asoc->cookie_how[how_indx] = 5;
 		return (stcb);
 	}
 	if (ntohl(initack_cp->init.initiate_tag) != asoc->my_vtag &&
 	    ntohl(init_cp->init.initiate_tag) == asoc->peer_vtag &&
 	    cookie->tie_tag_my_vtag == 0 &&
 	    cookie->tie_tag_peer_vtag == 0) {
 		/*
 		 * case C in Section 5.2.4 Table 2: XMOO silently discard
 		 */
 		if (how_indx < sizeof(asoc->cookie_how))
 			asoc->cookie_how[how_indx] = 6;
 		return (NULL);
 	}
 	/*
 	 * If nat support, and the below and stcb is established, send back
 	 * a ABORT(colliding state) if we are established.
 	 */
 	if ((SCTP_GET_STATE(asoc) == SCTP_STATE_OPEN) &&
 	    (asoc->peer_supports_nat) &&
 	    ((ntohl(initack_cp->init.initiate_tag) == asoc->my_vtag) &&
 	    ((ntohl(init_cp->init.initiate_tag) != asoc->peer_vtag) ||
 	    (asoc->peer_vtag == 0)))) {
 		/*
 		 * Special case - Peer's support nat. We may have two init's
 		 * that we gave out the same tag on since one was not
 		 * established.. i.e. we get INIT from host-1 behind the nat
 		 * and we respond tag-a, we get a INIT from host-2 behind
 		 * the nat and we get tag-a again. Then we bring up host-1
 		 * (or 2's) assoc, Then comes the cookie from hsot-2 (or 1).
 		 * Now we have colliding state. We must send an abort here
 		 * with colliding state indication.
 		 */
 		op_err = sctp_generate_cause(SCTP_CAUSE_NAT_COLLIDING_STATE, "");
 		sctp_send_abort(m, iphlen, src, dst, sh, 0, op_err,
 		    mflowtype, mflowid, inp->fibnum,
 		    vrf_id, port);
 		return (NULL);
 	}
 	if ((ntohl(initack_cp->init.initiate_tag) == asoc->my_vtag) &&
 	    ((ntohl(init_cp->init.initiate_tag) != asoc->peer_vtag) ||
 	    (asoc->peer_vtag == 0))) {
 		/*
 		 * case B in Section 5.2.4 Table 2: MXAA or MOAA my info
 		 * should be ok, re-accept peer info
 		 */
 		if (ntohl(initack_cp->init.initial_tsn) != asoc->init_seq_number) {
 			/*
 			 * Extension of case C. If we hit this, then the
 			 * random number generator returned the same vtag
 			 * when we first sent our INIT-ACK and when we later
 			 * sent our INIT. The side with the seq numbers that
 			 * are different will be the one that normnally
 			 * would have hit case C. This in effect "extends"
 			 * our vtags in this collision case to be 64 bits.
 			 * The same collision could occur aka you get both
 			 * vtag and seq number the same twice in a row.. but
 			 * is much less likely. If it did happen then we
 			 * would proceed through and bring up the assoc.. we
 			 * may end up with the wrong stream setup however..
 			 * which would be bad.. but there is no way to
 			 * tell.. until we send on a stream that does not
 			 * exist :-)
 			 */
 			if (how_indx < sizeof(asoc->cookie_how))
 				asoc->cookie_how[how_indx] = 7;
 
 			return (NULL);
 		}
 		if (how_indx < sizeof(asoc->cookie_how))
 			asoc->cookie_how[how_indx] = 8;
 		sctp_timer_stop(SCTP_TIMER_TYPE_HEARTBEAT, inp, stcb, net,
 		    SCTP_FROM_SCTP_INPUT + SCTP_LOC_15);
 		sctp_stop_all_cookie_timers(stcb);
 		/*
 		 * since we did not send a HB make sure we don't double
 		 * things
 		 */
 		net->hb_responded = 1;
 		if (stcb->asoc.sctp_autoclose_ticks &&
 		    sctp_is_feature_on(inp, SCTP_PCB_FLAGS_AUTOCLOSE)) {
 			sctp_timer_start(SCTP_TIMER_TYPE_AUTOCLOSE, inp, stcb,
 			    NULL);
 		}
 		asoc->my_rwnd = ntohl(initack_cp->init.a_rwnd);
 		asoc->pre_open_streams = ntohs(initack_cp->init.num_outbound_streams);
 
 		if (ntohl(init_cp->init.initiate_tag) != asoc->peer_vtag) {
 			/*
 			 * Ok the peer probably discarded our data (if we
 			 * echoed a cookie+data). So anything on the
 			 * sent_queue should be marked for retransmit, we
 			 * may not get something to kick us so it COULD
 			 * still take a timeout to move these.. but it can't
 			 * hurt to mark them.
 			 */
 			struct sctp_tmit_chunk *chk;
 
 			TAILQ_FOREACH(chk, &stcb->asoc.sent_queue, sctp_next) {
 				if (chk->sent < SCTP_DATAGRAM_RESEND) {
 					chk->sent = SCTP_DATAGRAM_RESEND;
 					sctp_flight_size_decrease(chk);
 					sctp_total_flight_decrease(stcb, chk);
 					sctp_ucount_incr(stcb->asoc.sent_queue_retran_cnt);
 					spec_flag++;
 				}
 			}
 
 		}
 		/* process the INIT info (peer's info) */
 		retval = sctp_process_init(init_cp, stcb);
 		if (retval < 0) {
 			if (how_indx < sizeof(asoc->cookie_how))
 				asoc->cookie_how[how_indx] = 9;
 			return (NULL);
 		}
 		if (sctp_load_addresses_from_init(stcb, m,
 		    init_offset + sizeof(struct sctp_init_chunk),
 		    initack_offset, src, dst, init_src, stcb->asoc.port)) {
 			if (how_indx < sizeof(asoc->cookie_how))
 				asoc->cookie_how[how_indx] = 10;
 			return (NULL);
 		}
 		if ((asoc->state & SCTP_STATE_COOKIE_WAIT) ||
 		    (asoc->state & SCTP_STATE_COOKIE_ECHOED)) {
 			*notification = SCTP_NOTIFY_ASSOC_UP;
 
 			if (((stcb->sctp_ep->sctp_flags & SCTP_PCB_FLAGS_TCPTYPE) ||
 			    (stcb->sctp_ep->sctp_flags & SCTP_PCB_FLAGS_IN_TCPPOOL)) &&
 			    (!SCTP_IS_LISTENING(inp))) {
 #if defined(__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
 				struct socket *so;
 #endif
 				stcb->sctp_ep->sctp_flags |=
 				    SCTP_PCB_FLAGS_CONNECTED;
 #if defined(__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
 				so = SCTP_INP_SO(stcb->sctp_ep);
 				atomic_add_int(&stcb->asoc.refcnt, 1);
 				SCTP_TCB_UNLOCK(stcb);
 				SCTP_SOCKET_LOCK(so, 1);
 				SCTP_TCB_LOCK(stcb);
 				atomic_add_int(&stcb->asoc.refcnt, -1);
 				if (stcb->asoc.state & SCTP_STATE_CLOSED_SOCKET) {
 					SCTP_SOCKET_UNLOCK(so, 1);
 					return (NULL);
 				}
 #endif
 				soisconnected(stcb->sctp_socket);
 #if defined(__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
 				SCTP_SOCKET_UNLOCK(so, 1);
 #endif
 			}
 			if (SCTP_GET_STATE(asoc) == SCTP_STATE_COOKIE_ECHOED)
 				SCTP_STAT_INCR_COUNTER32(sctps_activeestab);
 			else
 				SCTP_STAT_INCR_COUNTER32(sctps_collisionestab);
 			SCTP_STAT_INCR_GAUGE32(sctps_currestab);
 		} else if (SCTP_GET_STATE(asoc) == SCTP_STATE_OPEN) {
 			SCTP_STAT_INCR_COUNTER32(sctps_restartestab);
 		} else {
 			SCTP_STAT_INCR_COUNTER32(sctps_collisionestab);
 		}
 		SCTP_SET_STATE(asoc, SCTP_STATE_OPEN);
 		if (asoc->state & SCTP_STATE_SHUTDOWN_PENDING) {
 			sctp_timer_start(SCTP_TIMER_TYPE_SHUTDOWNGUARD,
 			    stcb->sctp_ep, stcb, asoc->primary_destination);
 		}
 		sctp_stop_all_cookie_timers(stcb);
 		sctp_toss_old_cookies(stcb, asoc);
 		sctp_send_cookie_ack(stcb);
 		if (spec_flag) {
 			/*
 			 * only if we have retrans set do we do this. What
 			 * this call does is get only the COOKIE-ACK out and
 			 * then when we return the normal call to
 			 * sctp_chunk_output will get the retrans out behind
 			 * this.
 			 */
 			sctp_chunk_output(inp, stcb, SCTP_OUTPUT_FROM_COOKIE_ACK, SCTP_SO_NOT_LOCKED);
 		}
 		if (how_indx < sizeof(asoc->cookie_how))
 			asoc->cookie_how[how_indx] = 11;
 
 		return (stcb);
 	}
 	if ((ntohl(initack_cp->init.initiate_tag) != asoc->my_vtag &&
 	    ntohl(init_cp->init.initiate_tag) != asoc->peer_vtag) &&
 	    cookie->tie_tag_my_vtag == asoc->my_vtag_nonce &&
 	    cookie->tie_tag_peer_vtag == asoc->peer_vtag_nonce &&
 	    cookie->tie_tag_peer_vtag != 0) {
 		struct sctpasochead *head;
 #if defined(__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
 		struct socket *so;
 #endif
 
 		if (asoc->peer_supports_nat) {
 			/*
 			 * This is a gross gross hack. Just call the
 			 * cookie_new code since we are allowing a duplicate
 			 * association. I hope this works...
 			 */
 			return (sctp_process_cookie_new(m, iphlen, offset, src, dst,
 			    sh, cookie, cookie_len,
 			    inp, netp, init_src, notification,
 			    auth_skipped, auth_offset, auth_len,
 			    mflowtype, mflowid,
 			    vrf_id, port));
 		}
 		/*
 		 * case A in Section 5.2.4 Table 2: XXMM (peer restarted)
 		 */
 		/* temp code */
 		if (how_indx < sizeof(asoc->cookie_how))
 			asoc->cookie_how[how_indx] = 12;
 		sctp_timer_stop(SCTP_TIMER_TYPE_INIT, inp, stcb, net,
 		    SCTP_FROM_SCTP_INPUT + SCTP_LOC_16);
 		sctp_timer_stop(SCTP_TIMER_TYPE_HEARTBEAT, inp, stcb, net,
 		    SCTP_FROM_SCTP_INPUT + SCTP_LOC_17);
 
 		/* notify upper layer */
 		*notification = SCTP_NOTIFY_ASSOC_RESTART;
 		atomic_add_int(&stcb->asoc.refcnt, 1);
 		if ((SCTP_GET_STATE(asoc) != SCTP_STATE_OPEN) &&
 		    (SCTP_GET_STATE(asoc) != SCTP_STATE_SHUTDOWN_RECEIVED) &&
 		    (SCTP_GET_STATE(asoc) != SCTP_STATE_SHUTDOWN_SENT)) {
 			SCTP_STAT_INCR_GAUGE32(sctps_currestab);
 		}
 		if (SCTP_GET_STATE(asoc) == SCTP_STATE_OPEN) {
 			SCTP_STAT_INCR_GAUGE32(sctps_restartestab);
 		} else if (SCTP_GET_STATE(asoc) != SCTP_STATE_SHUTDOWN_SENT) {
 			SCTP_STAT_INCR_GAUGE32(sctps_collisionestab);
 		}
 		if (asoc->state & SCTP_STATE_SHUTDOWN_PENDING) {
 			SCTP_SET_STATE(asoc, SCTP_STATE_OPEN);
 			sctp_timer_start(SCTP_TIMER_TYPE_SHUTDOWNGUARD,
 			    stcb->sctp_ep, stcb, asoc->primary_destination);
 
 		} else if (!(asoc->state & SCTP_STATE_SHUTDOWN_SENT)) {
 			/* move to OPEN state, if not in SHUTDOWN_SENT */
 			SCTP_SET_STATE(asoc, SCTP_STATE_OPEN);
 		}
 		asoc->pre_open_streams =
 		    ntohs(initack_cp->init.num_outbound_streams);
 		asoc->init_seq_number = ntohl(initack_cp->init.initial_tsn);
 		asoc->sending_seq = asoc->asconf_seq_out = asoc->str_reset_seq_out = asoc->init_seq_number;
 		asoc->asconf_seq_out_acked = asoc->asconf_seq_out - 1;
 
 		asoc->asconf_seq_in = asoc->last_acked_seq = asoc->init_seq_number - 1;
 
 		asoc->str_reset_seq_in = asoc->init_seq_number;
 
 		asoc->advanced_peer_ack_point = asoc->last_acked_seq;
 		if (asoc->mapping_array) {
 			memset(asoc->mapping_array, 0,
 			    asoc->mapping_array_size);
 		}
 		if (asoc->nr_mapping_array) {
 			memset(asoc->nr_mapping_array, 0,
 			    asoc->mapping_array_size);
 		}
 		SCTP_TCB_UNLOCK(stcb);
 #if defined(__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
 		so = SCTP_INP_SO(stcb->sctp_ep);
 		SCTP_SOCKET_LOCK(so, 1);
 #endif
 		SCTP_INP_INFO_WLOCK();
 		SCTP_INP_WLOCK(stcb->sctp_ep);
 		SCTP_TCB_LOCK(stcb);
 		atomic_add_int(&stcb->asoc.refcnt, -1);
 		/* send up all the data */
 		SCTP_TCB_SEND_LOCK(stcb);
 
 		sctp_report_all_outbound(stcb, 0, 1, SCTP_SO_LOCKED);
 		for (i = 0; i < stcb->asoc.streamoutcnt; i++) {
 			stcb->asoc.strmout[i].chunks_on_queues = 0;
 #if defined(SCTP_DETAILED_STR_STATS)
 			for (j = 0; j < SCTP_PR_SCTP_MAX + 1; j++) {
 				asoc->strmout[i].abandoned_sent[j] = 0;
 				asoc->strmout[i].abandoned_unsent[j] = 0;
 			}
 #else
 			asoc->strmout[i].abandoned_sent[0] = 0;
 			asoc->strmout[i].abandoned_unsent[0] = 0;
 #endif
 			stcb->asoc.strmout[i].sid = i;
 			stcb->asoc.strmout[i].next_mid_ordered = 0;
 			stcb->asoc.strmout[i].next_mid_unordered = 0;
 			stcb->asoc.strmout[i].last_msg_incomplete = 0;
 		}
 		/* process the INIT-ACK info (my info) */
 		asoc->my_vtag = ntohl(initack_cp->init.initiate_tag);
 		asoc->my_rwnd = ntohl(initack_cp->init.a_rwnd);
 
 		/* pull from vtag hash */
 		LIST_REMOVE(stcb, sctp_asocs);
 		/* re-insert to new vtag position */
 		head = &SCTP_BASE_INFO(sctp_asochash)[SCTP_PCBHASH_ASOC(stcb->asoc.my_vtag,
 		    SCTP_BASE_INFO(hashasocmark))];
 		/*
 		 * put it in the bucket in the vtag hash of assoc's for the
 		 * system
 		 */
 		LIST_INSERT_HEAD(head, stcb, sctp_asocs);
 
 		SCTP_TCB_SEND_UNLOCK(stcb);
 		SCTP_INP_WUNLOCK(stcb->sctp_ep);
 		SCTP_INP_INFO_WUNLOCK();
 #if defined(__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
 		SCTP_SOCKET_UNLOCK(so, 1);
 #endif
 		asoc->total_flight = 0;
 		asoc->total_flight_count = 0;
 		/* process the INIT info (peer's info) */
 		retval = sctp_process_init(init_cp, stcb);
 		if (retval < 0) {
 			if (how_indx < sizeof(asoc->cookie_how))
 				asoc->cookie_how[how_indx] = 13;
 
 			return (NULL);
 		}
 		/*
 		 * since we did not send a HB make sure we don't double
 		 * things
 		 */
 		net->hb_responded = 1;
 
 		if (sctp_load_addresses_from_init(stcb, m,
 		    init_offset + sizeof(struct sctp_init_chunk),
 		    initack_offset, src, dst, init_src, stcb->asoc.port)) {
 			if (how_indx < sizeof(asoc->cookie_how))
 				asoc->cookie_how[how_indx] = 14;
 
 			return (NULL);
 		}
 		/* respond with a COOKIE-ACK */
 		sctp_stop_all_cookie_timers(stcb);
 		sctp_toss_old_cookies(stcb, asoc);
 		sctp_send_cookie_ack(stcb);
 		if (how_indx < sizeof(asoc->cookie_how))
 			asoc->cookie_how[how_indx] = 15;
 
 		return (stcb);
 	}
 	if (how_indx < sizeof(asoc->cookie_how))
 		asoc->cookie_how[how_indx] = 16;
 	/* all other cases... */
 	return (NULL);
 }
 
 
 /*
  * handle a state cookie for a new association m: input packet mbuf chain--
  * assumes a pullup on IP/SCTP/COOKIE-ECHO chunk note: this is a "split" mbuf
  * and the cookie signature does not exist offset: offset into mbuf to the
  * cookie-echo chunk length: length of the cookie chunk to: where the init
  * was from returns a new TCB
  */
 static struct sctp_tcb *
 sctp_process_cookie_new(struct mbuf *m, int iphlen, int offset,
     struct sockaddr *src, struct sockaddr *dst,
     struct sctphdr *sh, struct sctp_state_cookie *cookie, int cookie_len,
     struct sctp_inpcb *inp, struct sctp_nets **netp,
     struct sockaddr *init_src, int *notification,
     int auth_skipped, uint32_t auth_offset, uint32_t auth_len,
     uint8_t mflowtype, uint32_t mflowid,
     uint32_t vrf_id, uint16_t port)
 {
 	struct sctp_tcb *stcb;
 	struct sctp_init_chunk *init_cp, init_buf;
 	struct sctp_init_ack_chunk *initack_cp, initack_buf;
 	union sctp_sockstore store;
 	struct sctp_association *asoc;
 	int init_offset, initack_offset, initack_limit;
 	int retval;
 	int error = 0;
 	uint8_t auth_chunk_buf[SCTP_PARAM_BUFFER_SIZE];
 #if defined(__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
 	struct socket *so;
 
 	so = SCTP_INP_SO(inp);
 #endif
 
 	/*
 	 * find and validate the INIT chunk in the cookie (peer's info) the
 	 * INIT should start after the cookie-echo header struct (chunk
 	 * header, state cookie header struct)
 	 */
 	init_offset = offset + sizeof(struct sctp_cookie_echo_chunk);
 	init_cp = (struct sctp_init_chunk *)
 	    sctp_m_getptr(m, init_offset, sizeof(struct sctp_init_chunk),
 	    (uint8_t *)&init_buf);
 	if (init_cp == NULL) {
 		/* could not pull a INIT chunk in cookie */
 		SCTPDBG(SCTP_DEBUG_INPUT1,
 		    "process_cookie_new: could not pull INIT chunk hdr\n");
 		return (NULL);
 	}
 	if (init_cp->ch.chunk_type != SCTP_INITIATION) {
 		SCTPDBG(SCTP_DEBUG_INPUT1, "HUH? process_cookie_new: could not find INIT chunk!\n");
 		return (NULL);
 	}
 	initack_offset = init_offset + SCTP_SIZE32(ntohs(init_cp->ch.chunk_length));
 	/*
 	 * find and validate the INIT-ACK chunk in the cookie (my info) the
 	 * INIT-ACK follows the INIT chunk
 	 */
 	initack_cp = (struct sctp_init_ack_chunk *)
 	    sctp_m_getptr(m, initack_offset, sizeof(struct sctp_init_ack_chunk),
 	    (uint8_t *)&initack_buf);
 	if (initack_cp == NULL) {
 		/* could not pull INIT-ACK chunk in cookie */
 		SCTPDBG(SCTP_DEBUG_INPUT1, "process_cookie_new: could not pull INIT-ACK chunk hdr\n");
 		return (NULL);
 	}
 	if (initack_cp->ch.chunk_type != SCTP_INITIATION_ACK) {
 		return (NULL);
 	}
 	/*
 	 * NOTE: We can't use the INIT_ACK's chk_length to determine the
 	 * "initack_limit" value.  This is because the chk_length field
 	 * includes the length of the cookie, but the cookie is omitted when
 	 * the INIT and INIT_ACK are tacked onto the cookie...
 	 */
 	initack_limit = offset + cookie_len;
 
 	/*
 	 * now that we know the INIT/INIT-ACK are in place, create a new TCB
 	 * and popluate
 	 */
 
 	/*
 	 * Here we do a trick, we set in NULL for the proc/thread argument.
 	 * We do this since in effect we only use the p argument when the
 	 * socket is unbound and we must do an implicit bind. Since we are
 	 * getting a cookie, we cannot be unbound.
 	 */
 	stcb = sctp_aloc_assoc(inp, init_src, &error,
 	    ntohl(initack_cp->init.initiate_tag), vrf_id,
 	    ntohs(initack_cp->init.num_outbound_streams),
 	    port,
 	    (struct thread *)NULL
 	    );
 	if (stcb == NULL) {
 		struct mbuf *op_err;
 
 		/* memory problem? */
 		SCTPDBG(SCTP_DEBUG_INPUT1,
 		    "process_cookie_new: no room for another TCB!\n");
 		op_err = sctp_generate_cause(SCTP_CAUSE_OUT_OF_RESC, "");
 		sctp_abort_association(inp, (struct sctp_tcb *)NULL, m, iphlen,
 		    src, dst, sh, op_err,
 		    mflowtype, mflowid,
 		    vrf_id, port);
 		return (NULL);
 	}
 	/* get the correct sctp_nets */
 	if (netp)
 		*netp = sctp_findnet(stcb, init_src);
 
 	asoc = &stcb->asoc;
 	/* get scope variables out of cookie */
 	asoc->scope.ipv4_local_scope = cookie->ipv4_scope;
 	asoc->scope.site_scope = cookie->site_scope;
 	asoc->scope.local_scope = cookie->local_scope;
 	asoc->scope.loopback_scope = cookie->loopback_scope;
 
 	if ((asoc->scope.ipv4_addr_legal != cookie->ipv4_addr_legal) ||
 	    (asoc->scope.ipv6_addr_legal != cookie->ipv6_addr_legal)) {
 		struct mbuf *op_err;
 
 		/*
 		 * Houston we have a problem. The EP changed while the
 		 * cookie was in flight. Only recourse is to abort the
 		 * association.
 		 */
 		atomic_add_int(&stcb->asoc.refcnt, 1);
 		op_err = sctp_generate_cause(SCTP_CAUSE_OUT_OF_RESC, "");
 		sctp_abort_association(inp, (struct sctp_tcb *)NULL, m, iphlen,
 		    src, dst, sh, op_err,
 		    mflowtype, mflowid,
 		    vrf_id, port);
 #if defined(__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
 		SCTP_TCB_UNLOCK(stcb);
 		SCTP_SOCKET_LOCK(so, 1);
 		SCTP_TCB_LOCK(stcb);
 #endif
 		(void)sctp_free_assoc(inp, stcb, SCTP_NORMAL_PROC,
 		    SCTP_FROM_SCTP_INPUT + SCTP_LOC_18);
 #if defined(__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
 		SCTP_SOCKET_UNLOCK(so, 1);
 #endif
 		atomic_subtract_int(&stcb->asoc.refcnt, 1);
 		return (NULL);
 	}
 	/* process the INIT-ACK info (my info) */
 	asoc->my_vtag = ntohl(initack_cp->init.initiate_tag);
 	asoc->my_rwnd = ntohl(initack_cp->init.a_rwnd);
 	asoc->pre_open_streams = ntohs(initack_cp->init.num_outbound_streams);
 	asoc->init_seq_number = ntohl(initack_cp->init.initial_tsn);
 	asoc->sending_seq = asoc->asconf_seq_out = asoc->str_reset_seq_out = asoc->init_seq_number;
 	asoc->asconf_seq_out_acked = asoc->asconf_seq_out - 1;
 	asoc->asconf_seq_in = asoc->last_acked_seq = asoc->init_seq_number - 1;
 	asoc->str_reset_seq_in = asoc->init_seq_number;
 
 	asoc->advanced_peer_ack_point = asoc->last_acked_seq;
 
 	/* process the INIT info (peer's info) */
 	if (netp)
 		retval = sctp_process_init(init_cp, stcb);
 	else
 		retval = 0;
 	if (retval < 0) {
 		atomic_add_int(&stcb->asoc.refcnt, 1);
 #if defined(__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
 		SCTP_TCB_UNLOCK(stcb);
 		SCTP_SOCKET_LOCK(so, 1);
 		SCTP_TCB_LOCK(stcb);
 #endif
 		(void)sctp_free_assoc(inp, stcb, SCTP_NORMAL_PROC,
 		    SCTP_FROM_SCTP_INPUT + SCTP_LOC_19);
 #if defined(__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
 		SCTP_SOCKET_UNLOCK(so, 1);
 #endif
 		atomic_subtract_int(&stcb->asoc.refcnt, 1);
 		return (NULL);
 	}
 	/* load all addresses */
 	if (sctp_load_addresses_from_init(stcb, m,
 	    init_offset + sizeof(struct sctp_init_chunk), initack_offset,
 	    src, dst, init_src, port)) {
 		atomic_add_int(&stcb->asoc.refcnt, 1);
 #if defined(__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
 		SCTP_TCB_UNLOCK(stcb);
 		SCTP_SOCKET_LOCK(so, 1);
 		SCTP_TCB_LOCK(stcb);
 #endif
 		(void)sctp_free_assoc(inp, stcb, SCTP_NORMAL_PROC,
 		    SCTP_FROM_SCTP_INPUT + SCTP_LOC_20);
 #if defined(__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
 		SCTP_SOCKET_UNLOCK(so, 1);
 #endif
 		atomic_subtract_int(&stcb->asoc.refcnt, 1);
 		return (NULL);
 	}
 	/*
 	 * verify any preceding AUTH chunk that was skipped
 	 */
 	/* pull the local authentication parameters from the cookie/init-ack */
 	sctp_auth_get_cookie_params(stcb, m,
 	    initack_offset + sizeof(struct sctp_init_ack_chunk),
 	    initack_limit - (initack_offset + sizeof(struct sctp_init_ack_chunk)));
 	if (auth_skipped) {
 		struct sctp_auth_chunk *auth;
 
 		auth = (struct sctp_auth_chunk *)
 		    sctp_m_getptr(m, auth_offset, auth_len, auth_chunk_buf);
 		if ((auth == NULL) || sctp_handle_auth(stcb, auth, m, auth_offset)) {
 			/* auth HMAC failed, dump the assoc and packet */
 			SCTPDBG(SCTP_DEBUG_AUTH1,
 			    "COOKIE-ECHO: AUTH failed\n");
 			atomic_add_int(&stcb->asoc.refcnt, 1);
 #if defined(__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
 			SCTP_TCB_UNLOCK(stcb);
 			SCTP_SOCKET_LOCK(so, 1);
 			SCTP_TCB_LOCK(stcb);
 #endif
 			(void)sctp_free_assoc(inp, stcb, SCTP_NORMAL_PROC,
 			    SCTP_FROM_SCTP_INPUT + SCTP_LOC_21);
 #if defined(__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
 			SCTP_SOCKET_UNLOCK(so, 1);
 #endif
 			atomic_subtract_int(&stcb->asoc.refcnt, 1);
 			return (NULL);
 		} else {
 			/* remaining chunks checked... good to go */
 			stcb->asoc.authenticated = 1;
 		}
 	}
 	/* update current state */
 	SCTPDBG(SCTP_DEBUG_INPUT2, "moving to OPEN state\n");
 	SCTP_SET_STATE(asoc, SCTP_STATE_OPEN);
 	if (asoc->state & SCTP_STATE_SHUTDOWN_PENDING) {
 		sctp_timer_start(SCTP_TIMER_TYPE_SHUTDOWNGUARD,
 		    stcb->sctp_ep, stcb, asoc->primary_destination);
 	}
 	sctp_stop_all_cookie_timers(stcb);
 	SCTP_STAT_INCR_COUNTER32(sctps_passiveestab);
 	SCTP_STAT_INCR_GAUGE32(sctps_currestab);
 
 	/*
 	 * if we're doing ASCONFs, check to see if we have any new local
 	 * addresses that need to get added to the peer (eg. addresses
 	 * changed while cookie echo in flight).  This needs to be done
 	 * after we go to the OPEN state to do the correct asconf
 	 * processing. else, make sure we have the correct addresses in our
 	 * lists
 	 */
 
 	/* warning, we re-use sin, sin6, sa_store here! */
 	/* pull in local_address (our "from" address) */
 	switch (cookie->laddr_type) {
 #ifdef INET
 	case SCTP_IPV4_ADDRESS:
 		/* source addr is IPv4 */
 		memset(&store.sin, 0, sizeof(struct sockaddr_in));
 		store.sin.sin_family = AF_INET;
 		store.sin.sin_len = sizeof(struct sockaddr_in);
 		store.sin.sin_addr.s_addr = cookie->laddress[0];
 		break;
 #endif
 #ifdef INET6
 	case SCTP_IPV6_ADDRESS:
 		/* source addr is IPv6 */
 		memset(&store.sin6, 0, sizeof(struct sockaddr_in6));
 		store.sin6.sin6_family = AF_INET6;
 		store.sin6.sin6_len = sizeof(struct sockaddr_in6);
 		store.sin6.sin6_scope_id = cookie->scope_id;
 		memcpy(&store.sin6.sin6_addr, cookie->laddress, sizeof(struct in6_addr));
 		break;
 #endif
 	default:
 		atomic_add_int(&stcb->asoc.refcnt, 1);
 #if defined(__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
 		SCTP_TCB_UNLOCK(stcb);
 		SCTP_SOCKET_LOCK(so, 1);
 		SCTP_TCB_LOCK(stcb);
 #endif
 		(void)sctp_free_assoc(inp, stcb, SCTP_NORMAL_PROC,
 		    SCTP_FROM_SCTP_INPUT + SCTP_LOC_22);
 #if defined(__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
 		SCTP_SOCKET_UNLOCK(so, 1);
 #endif
 		atomic_subtract_int(&stcb->asoc.refcnt, 1);
 		return (NULL);
 	}
 
 	/* set up to notify upper layer */
 	*notification = SCTP_NOTIFY_ASSOC_UP;
 	if (((stcb->sctp_ep->sctp_flags & SCTP_PCB_FLAGS_TCPTYPE) ||
 	    (stcb->sctp_ep->sctp_flags & SCTP_PCB_FLAGS_IN_TCPPOOL)) &&
 	    (!SCTP_IS_LISTENING(inp))) {
 		/*
 		 * This is an endpoint that called connect() how it got a
 		 * cookie that is NEW is a bit of a mystery. It must be that
 		 * the INIT was sent, but before it got there.. a complete
 		 * INIT/INIT-ACK/COOKIE arrived. But of course then it
 		 * should have went to the other code.. not here.. oh well..
 		 * a bit of protection is worth having..
 		 */
 		stcb->sctp_ep->sctp_flags |= SCTP_PCB_FLAGS_CONNECTED;
 #if defined(__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
 		atomic_add_int(&stcb->asoc.refcnt, 1);
 		SCTP_TCB_UNLOCK(stcb);
 		SCTP_SOCKET_LOCK(so, 1);
 		SCTP_TCB_LOCK(stcb);
 		atomic_subtract_int(&stcb->asoc.refcnt, 1);
 		if (stcb->asoc.state & SCTP_STATE_CLOSED_SOCKET) {
 			SCTP_SOCKET_UNLOCK(so, 1);
 			return (NULL);
 		}
 #endif
 		soisconnected(stcb->sctp_socket);
 #if defined(__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
 		SCTP_SOCKET_UNLOCK(so, 1);
 #endif
 	} else if ((stcb->sctp_ep->sctp_flags & SCTP_PCB_FLAGS_TCPTYPE) &&
 	    (SCTP_IS_LISTENING(inp))) {
 		/*
 		 * We don't want to do anything with this one. Since it is
 		 * the listening guy. The timer will get started for
 		 * accepted connections in the caller.
 		 */
 		;
 	}
 	/* since we did not send a HB make sure we don't double things */
 	if ((netp) && (*netp))
 		(*netp)->hb_responded = 1;
 
 	if (stcb->asoc.sctp_autoclose_ticks &&
 	    sctp_is_feature_on(inp, SCTP_PCB_FLAGS_AUTOCLOSE)) {
 		sctp_timer_start(SCTP_TIMER_TYPE_AUTOCLOSE, inp, stcb, NULL);
 	}
 	(void)SCTP_GETTIME_TIMEVAL(&stcb->asoc.time_entered);
 	if ((netp != NULL) && (*netp != NULL)) {
 		/* calculate the RTT and set the encaps port */
 		(*netp)->RTO = sctp_calculate_rto(stcb, asoc, *netp,
 		    &cookie->time_entered, sctp_align_unsafe_makecopy,
 		    SCTP_RTT_FROM_NON_DATA);
 	}
 	/* respond with a COOKIE-ACK */
 	sctp_send_cookie_ack(stcb);
 
 	/*
 	 * check the address lists for any ASCONFs that need to be sent
 	 * AFTER the cookie-ack is sent
 	 */
 	sctp_check_address_list(stcb, m,
 	    initack_offset + sizeof(struct sctp_init_ack_chunk),
 	    initack_limit - (initack_offset + sizeof(struct sctp_init_ack_chunk)),
 	    &store.sa, cookie->local_scope, cookie->site_scope,
 	    cookie->ipv4_scope, cookie->loopback_scope);
 
 
 	return (stcb);
 }
 
 /*
  * CODE LIKE THIS NEEDS TO RUN IF the peer supports the NAT extension, i.e
  * we NEED to make sure we are not already using the vtag. If so we
  * need to send back an ABORT-TRY-AGAIN-WITH-NEW-TAG No middle box bit!
 	head = &SCTP_BASE_INFO(sctp_asochash)[SCTP_PCBHASH_ASOC(tag,
 							    SCTP_BASE_INFO(hashasocmark))];
 	LIST_FOREACH(stcb, head, sctp_asocs) {
 	        if ((stcb->asoc.my_vtag == tag) && (stcb->rport == rport) && (inp == stcb->sctp_ep))  {
 		       -- SEND ABORT - TRY AGAIN --
 		}
 	}
 */
 
 /*
  * handles a COOKIE-ECHO message stcb: modified to either a new or left as
  * existing (non-NULL) TCB
  */
 static struct mbuf *
 sctp_handle_cookie_echo(struct mbuf *m, int iphlen, int offset,
     struct sockaddr *src, struct sockaddr *dst,
     struct sctphdr *sh, struct sctp_cookie_echo_chunk *cp,
     struct sctp_inpcb **inp_p, struct sctp_tcb **stcb, struct sctp_nets **netp,
     int auth_skipped, uint32_t auth_offset, uint32_t auth_len,
     struct sctp_tcb **locked_tcb,
     uint8_t mflowtype, uint32_t mflowid,
     uint32_t vrf_id, uint16_t port)
 {
 	struct sctp_state_cookie *cookie;
 	struct sctp_tcb *l_stcb = *stcb;
 	struct sctp_inpcb *l_inp;
 	struct sockaddr *to;
 	struct sctp_pcb *ep;
 	struct mbuf *m_sig;
 	uint8_t calc_sig[SCTP_SIGNATURE_SIZE], tmp_sig[SCTP_SIGNATURE_SIZE];
 	uint8_t *sig;
 	uint8_t cookie_ok = 0;
 	unsigned int sig_offset, cookie_offset;
 	unsigned int cookie_len;
 	struct timeval now;
 	struct timeval time_expires;
 	int notification = 0;
 	struct sctp_nets *netl;
 	int had_a_existing_tcb = 0;
 	int send_int_conf = 0;
 #ifdef INET
 	struct sockaddr_in sin;
 #endif
 #ifdef INET6
 	struct sockaddr_in6 sin6;
 #endif
 
 	SCTPDBG(SCTP_DEBUG_INPUT2,
 	    "sctp_handle_cookie: handling COOKIE-ECHO\n");
 
 	if (inp_p == NULL) {
 		return (NULL);
 	}
 	cookie = &cp->cookie;
 	cookie_offset = offset + sizeof(struct sctp_chunkhdr);
 	cookie_len = ntohs(cp->ch.chunk_length);
 
 	if ((cookie->peerport != sh->src_port) ||
 	    (cookie->myport != sh->dest_port) ||
 	    (cookie->my_vtag != sh->v_tag)) {
 		/*
 		 * invalid ports or bad tag.  Note that we always leave the
 		 * v_tag in the header in network order and when we stored
 		 * it in the my_vtag slot we also left it in network order.
 		 * This maintains the match even though it may be in the
 		 * opposite byte order of the machine :->
 		 */
 		return (NULL);
 	}
 	if (cookie_len < sizeof(struct sctp_cookie_echo_chunk) +
 	    sizeof(struct sctp_init_chunk) +
 	    sizeof(struct sctp_init_ack_chunk) + SCTP_SIGNATURE_SIZE) {
 		/* cookie too small */
 		return (NULL);
 	}
 	/*
 	 * split off the signature into its own mbuf (since it should not be
 	 * calculated in the sctp_hmac_m() call).
 	 */
 	sig_offset = offset + cookie_len - SCTP_SIGNATURE_SIZE;
 	m_sig = m_split(m, sig_offset, M_NOWAIT);
 	if (m_sig == NULL) {
 		/* out of memory or ?? */
 		return (NULL);
 	}
 #ifdef SCTP_MBUF_LOGGING
 	if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_MBUF_LOGGING_ENABLE) {
 		sctp_log_mbc(m_sig, SCTP_MBUF_SPLIT);
 	}
 #endif
 
 	/*
 	 * compute the signature/digest for the cookie
 	 */
 	ep = &(*inp_p)->sctp_ep;
 	l_inp = *inp_p;
 	if (l_stcb) {
 		SCTP_TCB_UNLOCK(l_stcb);
 	}
 	SCTP_INP_RLOCK(l_inp);
 	if (l_stcb) {
 		SCTP_TCB_LOCK(l_stcb);
 	}
 	/* which cookie is it? */
 	if ((cookie->time_entered.tv_sec < (long)ep->time_of_secret_change) &&
 	    (ep->current_secret_number != ep->last_secret_number)) {
 		/* it's the old cookie */
 		(void)sctp_hmac_m(SCTP_HMAC,
 		    (uint8_t *)ep->secret_key[(int)ep->last_secret_number],
 		    SCTP_SECRET_SIZE, m, cookie_offset, calc_sig, 0);
 	} else {
 		/* it's the current cookie */
 		(void)sctp_hmac_m(SCTP_HMAC,
 		    (uint8_t *)ep->secret_key[(int)ep->current_secret_number],
 		    SCTP_SECRET_SIZE, m, cookie_offset, calc_sig, 0);
 	}
 	/* get the signature */
 	SCTP_INP_RUNLOCK(l_inp);
 	sig = (uint8_t *)sctp_m_getptr(m_sig, 0, SCTP_SIGNATURE_SIZE, (uint8_t *)&tmp_sig);
 	if (sig == NULL) {
 		/* couldn't find signature */
 		sctp_m_freem(m_sig);
 		return (NULL);
 	}
 	/* compare the received digest with the computed digest */
 	if (memcmp(calc_sig, sig, SCTP_SIGNATURE_SIZE) != 0) {
 		/* try the old cookie? */
 		if ((cookie->time_entered.tv_sec == (long)ep->time_of_secret_change) &&
 		    (ep->current_secret_number != ep->last_secret_number)) {
 			/* compute digest with old */
 			(void)sctp_hmac_m(SCTP_HMAC,
 			    (uint8_t *)ep->secret_key[(int)ep->last_secret_number],
 			    SCTP_SECRET_SIZE, m, cookie_offset, calc_sig, 0);
 			/* compare */
 			if (memcmp(calc_sig, sig, SCTP_SIGNATURE_SIZE) == 0)
 				cookie_ok = 1;
 		}
 	} else {
 		cookie_ok = 1;
 	}
 
 	/*
 	 * Now before we continue we must reconstruct our mbuf so that
 	 * normal processing of any other chunks will work.
 	 */
 	{
 		struct mbuf *m_at;
 
 		m_at = m;
 		while (SCTP_BUF_NEXT(m_at) != NULL) {
 			m_at = SCTP_BUF_NEXT(m_at);
 		}
 		SCTP_BUF_NEXT(m_at) = m_sig;
 	}
 
 	if (cookie_ok == 0) {
 		SCTPDBG(SCTP_DEBUG_INPUT2, "handle_cookie_echo: cookie signature validation failed!\n");
 		SCTPDBG(SCTP_DEBUG_INPUT2,
 		    "offset = %u, cookie_offset = %u, sig_offset = %u\n",
 		    (uint32_t)offset, cookie_offset, sig_offset);
 		return (NULL);
 	}
 	/*
 	 * check the cookie timestamps to be sure it's not stale
 	 */
 	(void)SCTP_GETTIME_TIMEVAL(&now);
 	/* Expire time is in Ticks, so we convert to seconds */
 	time_expires.tv_sec = cookie->time_entered.tv_sec + TICKS_TO_SEC(cookie->cookie_life);
 	time_expires.tv_usec = cookie->time_entered.tv_usec;
 	/*
 	 * TODO sctp_constants.h needs alternative time macros when _KERNEL
 	 * is undefined.
 	 */
 	if (timevalcmp(&now, &time_expires, >)) {
 		/* cookie is stale! */
 		struct mbuf *op_err;
 		struct sctp_error_stale_cookie *cause;
 		uint32_t tim;
 
 		op_err = sctp_get_mbuf_for_msg(sizeof(struct sctp_error_stale_cookie),
 		    0, M_NOWAIT, 1, MT_DATA);
 		if (op_err == NULL) {
 			/* FOOBAR */
 			return (NULL);
 		}
 		/* Set the len */
 		SCTP_BUF_LEN(op_err) = sizeof(struct sctp_error_stale_cookie);
 		cause = mtod(op_err, struct sctp_error_stale_cookie *);
 		cause->cause.code = htons(SCTP_CAUSE_STALE_COOKIE);
 		cause->cause.length = htons((sizeof(struct sctp_paramhdr) +
 		    (sizeof(uint32_t))));
 		/* seconds to usec */
 		tim = (now.tv_sec - time_expires.tv_sec) * 1000000;
 		/* add in usec */
 		if (tim == 0)
 			tim = now.tv_usec - cookie->time_entered.tv_usec;
 		cause->stale_time = htonl(tim);
 		sctp_send_operr_to(src, dst, sh, cookie->peers_vtag, op_err,
 		    mflowtype, mflowid, l_inp->fibnum,
 		    vrf_id, port);
 		return (NULL);
 	}
 	/*
 	 * Now we must see with the lookup address if we have an existing
 	 * asoc. This will only happen if we were in the COOKIE-WAIT state
 	 * and a INIT collided with us and somewhere the peer sent the
 	 * cookie on another address besides the single address our assoc
 	 * had for him. In this case we will have one of the tie-tags set at
 	 * least AND the address field in the cookie can be used to look it
 	 * up.
 	 */
 	to = NULL;
 	switch (cookie->addr_type) {
 #ifdef INET6
 	case SCTP_IPV6_ADDRESS:
 		memset(&sin6, 0, sizeof(sin6));
 		sin6.sin6_family = AF_INET6;
 		sin6.sin6_len = sizeof(sin6);
 		sin6.sin6_port = sh->src_port;
 		sin6.sin6_scope_id = cookie->scope_id;
 		memcpy(&sin6.sin6_addr.s6_addr, cookie->address,
 		    sizeof(sin6.sin6_addr.s6_addr));
 		to = (struct sockaddr *)&sin6;
 		break;
 #endif
 #ifdef INET
 	case SCTP_IPV4_ADDRESS:
 		memset(&sin, 0, sizeof(sin));
 		sin.sin_family = AF_INET;
 		sin.sin_len = sizeof(sin);
 		sin.sin_port = sh->src_port;
 		sin.sin_addr.s_addr = cookie->address[0];
 		to = (struct sockaddr *)&sin;
 		break;
 #endif
 	default:
 		/* This should not happen */
 		return (NULL);
 	}
 	if (*stcb == NULL) {
 		/* Yep, lets check */
 		*stcb = sctp_findassociation_ep_addr(inp_p, to, netp, dst, NULL);
 		if (*stcb == NULL) {
 			/*
 			 * We should have only got back the same inp. If we
 			 * got back a different ep we have a problem. The
 			 * original findep got back l_inp and now
 			 */
 			if (l_inp != *inp_p) {
 				SCTP_PRINTF("Bad problem find_ep got a diff inp then special_locate?\n");
 			}
 		} else {
 			if (*locked_tcb == NULL) {
 				/*
 				 * In this case we found the assoc only
 				 * after we locked the create lock. This
 				 * means we are in a colliding case and we
 				 * must make sure that we unlock the tcb if
 				 * its one of the cases where we throw away
 				 * the incoming packets.
 				 */
 				*locked_tcb = *stcb;
 
 				/*
 				 * We must also increment the inp ref count
 				 * since the ref_count flags was set when we
 				 * did not find the TCB, now we found it
 				 * which reduces the refcount.. we must
 				 * raise it back out to balance it all :-)
 				 */
 				SCTP_INP_INCR_REF((*stcb)->sctp_ep);
 				if ((*stcb)->sctp_ep != l_inp) {
 					SCTP_PRINTF("Huh? ep:%p diff then l_inp:%p?\n",
 					    (void *)(*stcb)->sctp_ep, (void *)l_inp);
 				}
 			}
 		}
 	}
 	cookie_len -= SCTP_SIGNATURE_SIZE;
 	if (*stcb == NULL) {
 		/* this is the "normal" case... get a new TCB */
 		*stcb = sctp_process_cookie_new(m, iphlen, offset, src, dst, sh,
 		    cookie, cookie_len, *inp_p,
 		    netp, to, &notification,
 		    auth_skipped, auth_offset, auth_len,
 		    mflowtype, mflowid,
 		    vrf_id, port);
 	} else {
 		/* this is abnormal... cookie-echo on existing TCB */
 		had_a_existing_tcb = 1;
 		*stcb = sctp_process_cookie_existing(m, iphlen, offset,
 		    src, dst, sh,
 		    cookie, cookie_len, *inp_p, *stcb, netp, to,
 		    &notification, auth_skipped, auth_offset, auth_len,
 		    mflowtype, mflowid,
 		    vrf_id, port);
 	}
 
 	if (*stcb == NULL) {
 		/* still no TCB... must be bad cookie-echo */
 		return (NULL);
 	}
 	if (*netp != NULL) {
 		(*netp)->flowtype = mflowtype;
 		(*netp)->flowid = mflowid;
 	}
 	/*
 	 * Ok, we built an association so confirm the address we sent the
 	 * INIT-ACK to.
 	 */
 	netl = sctp_findnet(*stcb, to);
 	/*
 	 * This code should in theory NOT run but
 	 */
 	if (netl == NULL) {
 		/* TSNH! Huh, why do I need to add this address here? */
 		if (sctp_add_remote_addr(*stcb, to, NULL, port,
 		    SCTP_DONOT_SETSCOPE, SCTP_IN_COOKIE_PROC)) {
 			return (NULL);
 		}
 		netl = sctp_findnet(*stcb, to);
 	}
 	if (netl) {
 		if (netl->dest_state & SCTP_ADDR_UNCONFIRMED) {
 			netl->dest_state &= ~SCTP_ADDR_UNCONFIRMED;
 			(void)sctp_set_primary_addr((*stcb), (struct sockaddr *)NULL,
 			    netl);
 			send_int_conf = 1;
 		}
 	}
 	sctp_start_net_timers(*stcb);
 	if ((*inp_p)->sctp_flags & SCTP_PCB_FLAGS_TCPTYPE) {
 		if (!had_a_existing_tcb ||
 		    (((*inp_p)->sctp_flags & SCTP_PCB_FLAGS_CONNECTED) == 0)) {
 			/*
 			 * If we have a NEW cookie or the connect never
 			 * reached the connected state during collision we
 			 * must do the TCP accept thing.
 			 */
 			struct socket *so, *oso;
 			struct sctp_inpcb *inp;
 
 			if (notification == SCTP_NOTIFY_ASSOC_RESTART) {
 				/*
 				 * For a restart we will keep the same
 				 * socket, no need to do anything. I THINK!!
 				 */
 				sctp_ulp_notify(notification, *stcb, 0, NULL, SCTP_SO_NOT_LOCKED);
 				if (send_int_conf) {
 					sctp_ulp_notify(SCTP_NOTIFY_INTERFACE_CONFIRMED,
 					    (*stcb), 0, (void *)netl, SCTP_SO_NOT_LOCKED);
 				}
 				return (m);
 			}
 			oso = (*inp_p)->sctp_socket;
 			atomic_add_int(&(*stcb)->asoc.refcnt, 1);
 			SCTP_TCB_UNLOCK((*stcb));
 			CURVNET_SET(oso->so_vnet);
 			so = sonewconn(oso, 0
 			    );
 			CURVNET_RESTORE();
 			SCTP_TCB_LOCK((*stcb));
 			atomic_subtract_int(&(*stcb)->asoc.refcnt, 1);
 
 			if (so == NULL) {
 				struct mbuf *op_err;
 #if defined(__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
 				struct socket *pcb_so;
 #endif
 				/* Too many sockets */
 				SCTPDBG(SCTP_DEBUG_INPUT1, "process_cookie_new: no room for another socket!\n");
 				op_err = sctp_generate_cause(SCTP_CAUSE_OUT_OF_RESC, "");
 				sctp_abort_association(*inp_p, NULL, m, iphlen,
 				    src, dst, sh, op_err,
 				    mflowtype, mflowid,
 				    vrf_id, port);
 #if defined(__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
 				pcb_so = SCTP_INP_SO(*inp_p);
 				atomic_add_int(&(*stcb)->asoc.refcnt, 1);
 				SCTP_TCB_UNLOCK((*stcb));
 				SCTP_SOCKET_LOCK(pcb_so, 1);
 				SCTP_TCB_LOCK((*stcb));
 				atomic_subtract_int(&(*stcb)->asoc.refcnt, 1);
 #endif
 				(void)sctp_free_assoc(*inp_p, *stcb, SCTP_NORMAL_PROC,
 				    SCTP_FROM_SCTP_INPUT + SCTP_LOC_23);
 #if defined(__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
 				SCTP_SOCKET_UNLOCK(pcb_so, 1);
 #endif
 				return (NULL);
 			}
 			inp = (struct sctp_inpcb *)so->so_pcb;
 			SCTP_INP_INCR_REF(inp);
 			/*
 			 * We add the unbound flag here so that if we get an
 			 * soabort() before we get the move_pcb done, we
 			 * will properly cleanup.
 			 */
 			inp->sctp_flags = (SCTP_PCB_FLAGS_TCPTYPE |
 			    SCTP_PCB_FLAGS_CONNECTED |
 			    SCTP_PCB_FLAGS_IN_TCPPOOL |
 			    SCTP_PCB_FLAGS_UNBOUND |
 			    (SCTP_PCB_COPY_FLAGS & (*inp_p)->sctp_flags) |
 			    SCTP_PCB_FLAGS_DONT_WAKE);
 			inp->sctp_features = (*inp_p)->sctp_features;
 			inp->sctp_mobility_features = (*inp_p)->sctp_mobility_features;
 			inp->sctp_socket = so;
 			inp->sctp_frag_point = (*inp_p)->sctp_frag_point;
 			inp->max_cwnd = (*inp_p)->max_cwnd;
 			inp->sctp_cmt_on_off = (*inp_p)->sctp_cmt_on_off;
 			inp->ecn_supported = (*inp_p)->ecn_supported;
 			inp->prsctp_supported = (*inp_p)->prsctp_supported;
 			inp->auth_supported = (*inp_p)->auth_supported;
 			inp->asconf_supported = (*inp_p)->asconf_supported;
 			inp->reconfig_supported = (*inp_p)->reconfig_supported;
 			inp->nrsack_supported = (*inp_p)->nrsack_supported;
 			inp->pktdrop_supported = (*inp_p)->pktdrop_supported;
 			inp->partial_delivery_point = (*inp_p)->partial_delivery_point;
 			inp->sctp_context = (*inp_p)->sctp_context;
 			inp->local_strreset_support = (*inp_p)->local_strreset_support;
 			inp->fibnum = (*inp_p)->fibnum;
 			inp->inp_starting_point_for_iterator = NULL;
 			/*
 			 * copy in the authentication parameters from the
 			 * original endpoint
 			 */
 			if (inp->sctp_ep.local_hmacs)
 				sctp_free_hmaclist(inp->sctp_ep.local_hmacs);
 			inp->sctp_ep.local_hmacs =
 			    sctp_copy_hmaclist((*inp_p)->sctp_ep.local_hmacs);
 			if (inp->sctp_ep.local_auth_chunks)
 				sctp_free_chunklist(inp->sctp_ep.local_auth_chunks);
 			inp->sctp_ep.local_auth_chunks =
 			    sctp_copy_chunklist((*inp_p)->sctp_ep.local_auth_chunks);
 
 			/*
 			 * Now we must move it from one hash table to
 			 * another and get the tcb in the right place.
 			 */
 
 			/*
 			 * This is where the one-2-one socket is put into
 			 * the accept state waiting for the accept!
 			 */
 			if (*stcb) {
 				(*stcb)->asoc.state |= SCTP_STATE_IN_ACCEPT_QUEUE;
 			}
 			sctp_move_pcb_and_assoc(*inp_p, inp, *stcb);
 
 			atomic_add_int(&(*stcb)->asoc.refcnt, 1);
 			SCTP_TCB_UNLOCK((*stcb));
 
 			sctp_pull_off_control_to_new_inp((*inp_p), inp, *stcb,
 			    0);
 			SCTP_TCB_LOCK((*stcb));
 			atomic_subtract_int(&(*stcb)->asoc.refcnt, 1);
 
 
 			/*
 			 * now we must check to see if we were aborted while
 			 * the move was going on and the lock/unlock
 			 * happened.
 			 */
 			if (inp->sctp_flags & SCTP_PCB_FLAGS_SOCKET_GONE) {
 				/*
 				 * yep it was, we leave the assoc attached
 				 * to the socket since the sctp_inpcb_free()
 				 * call will send an abort for us.
 				 */
 				SCTP_INP_DECR_REF(inp);
 				return (NULL);
 			}
 			SCTP_INP_DECR_REF(inp);
 			/* Switch over to the new guy */
 			*inp_p = inp;
 			sctp_ulp_notify(notification, *stcb, 0, NULL, SCTP_SO_NOT_LOCKED);
 			if (send_int_conf) {
 				sctp_ulp_notify(SCTP_NOTIFY_INTERFACE_CONFIRMED,
 				    (*stcb), 0, (void *)netl, SCTP_SO_NOT_LOCKED);
 			}
 			/*
 			 * Pull it from the incomplete queue and wake the
 			 * guy
 			 */
 #if defined(__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
 			atomic_add_int(&(*stcb)->asoc.refcnt, 1);
 			SCTP_TCB_UNLOCK((*stcb));
 			SCTP_SOCKET_LOCK(so, 1);
 #endif
 			soisconnected(so);
 #if defined(__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
 			SCTP_TCB_LOCK((*stcb));
 			atomic_subtract_int(&(*stcb)->asoc.refcnt, 1);
 			SCTP_SOCKET_UNLOCK(so, 1);
 #endif
 			return (m);
 		}
 	}
 	if (notification) {
 		sctp_ulp_notify(notification, *stcb, 0, NULL, SCTP_SO_NOT_LOCKED);
 	}
 	if (send_int_conf) {
 		sctp_ulp_notify(SCTP_NOTIFY_INTERFACE_CONFIRMED,
 		    (*stcb), 0, (void *)netl, SCTP_SO_NOT_LOCKED);
 	}
 	return (m);
 }
 
 static void
 sctp_handle_cookie_ack(struct sctp_cookie_ack_chunk *cp SCTP_UNUSED,
     struct sctp_tcb *stcb, struct sctp_nets *net)
 {
 	/* cp must not be used, others call this without a c-ack :-) */
 	struct sctp_association *asoc;
 
 	SCTPDBG(SCTP_DEBUG_INPUT2,
 	    "sctp_handle_cookie_ack: handling COOKIE-ACK\n");
 	if ((stcb == NULL) || (net == NULL)) {
 		return;
 	}
 	asoc = &stcb->asoc;
 
 	sctp_stop_all_cookie_timers(stcb);
 	/* process according to association state */
 	if (SCTP_GET_STATE(asoc) == SCTP_STATE_COOKIE_ECHOED) {
 		/* state change only needed when I am in right state */
 		SCTPDBG(SCTP_DEBUG_INPUT2, "moving to OPEN state\n");
 		SCTP_SET_STATE(asoc, SCTP_STATE_OPEN);
 		sctp_start_net_timers(stcb);
 		if (asoc->state & SCTP_STATE_SHUTDOWN_PENDING) {
 			sctp_timer_start(SCTP_TIMER_TYPE_SHUTDOWNGUARD,
 			    stcb->sctp_ep, stcb, asoc->primary_destination);
 
 		}
 		/* update RTO */
 		SCTP_STAT_INCR_COUNTER32(sctps_activeestab);
 		SCTP_STAT_INCR_GAUGE32(sctps_currestab);
 		if (asoc->overall_error_count == 0) {
 			net->RTO = sctp_calculate_rto(stcb, asoc, net,
 			    &asoc->time_entered, sctp_align_safe_nocopy,
 			    SCTP_RTT_FROM_NON_DATA);
 		}
 		(void)SCTP_GETTIME_TIMEVAL(&asoc->time_entered);
 		sctp_ulp_notify(SCTP_NOTIFY_ASSOC_UP, stcb, 0, NULL, SCTP_SO_NOT_LOCKED);
 		if ((stcb->sctp_ep->sctp_flags & SCTP_PCB_FLAGS_TCPTYPE) ||
 		    (stcb->sctp_ep->sctp_flags & SCTP_PCB_FLAGS_IN_TCPPOOL)) {
 #if defined(__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
 			struct socket *so;
 
 #endif
 			stcb->sctp_ep->sctp_flags |= SCTP_PCB_FLAGS_CONNECTED;
 #if defined(__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
 			so = SCTP_INP_SO(stcb->sctp_ep);
 			atomic_add_int(&stcb->asoc.refcnt, 1);
 			SCTP_TCB_UNLOCK(stcb);
 			SCTP_SOCKET_LOCK(so, 1);
 			SCTP_TCB_LOCK(stcb);
 			atomic_subtract_int(&stcb->asoc.refcnt, 1);
 #endif
 			if ((stcb->asoc.state & SCTP_STATE_CLOSED_SOCKET) == 0) {
 				soisconnected(stcb->sctp_socket);
 			}
 #if defined(__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
 			SCTP_SOCKET_UNLOCK(so, 1);
 #endif
 		}
 		/*
 		 * since we did not send a HB make sure we don't double
 		 * things
 		 */
 		net->hb_responded = 1;
 
 		if (stcb->asoc.state & SCTP_STATE_CLOSED_SOCKET) {
 			/*
 			 * We don't need to do the asconf thing, nor hb or
 			 * autoclose if the socket is closed.
 			 */
 			goto closed_socket;
 		}
 		sctp_timer_start(SCTP_TIMER_TYPE_HEARTBEAT, stcb->sctp_ep,
 		    stcb, net);
 
 
 		if (stcb->asoc.sctp_autoclose_ticks &&
 		    sctp_is_feature_on(stcb->sctp_ep, SCTP_PCB_FLAGS_AUTOCLOSE)) {
 			sctp_timer_start(SCTP_TIMER_TYPE_AUTOCLOSE,
 			    stcb->sctp_ep, stcb, NULL);
 		}
 		/*
 		 * send ASCONF if parameters are pending and ASCONFs are
 		 * allowed (eg. addresses changed when init/cookie echo were
 		 * in flight)
 		 */
 		if ((sctp_is_feature_on(stcb->sctp_ep, SCTP_PCB_FLAGS_DO_ASCONF)) &&
 		    (stcb->asoc.asconf_supported == 1) &&
 		    (!TAILQ_EMPTY(&stcb->asoc.asconf_queue))) {
 #ifdef SCTP_TIMER_BASED_ASCONF
 			sctp_timer_start(SCTP_TIMER_TYPE_ASCONF,
 			    stcb->sctp_ep, stcb,
 			    stcb->asoc.primary_destination);
 #else
 			sctp_send_asconf(stcb, stcb->asoc.primary_destination,
 			    SCTP_ADDR_NOT_LOCKED);
 #endif
 		}
 	}
 closed_socket:
 	/* Toss the cookie if I can */
 	sctp_toss_old_cookies(stcb, asoc);
 	if (!TAILQ_EMPTY(&asoc->sent_queue)) {
 		/* Restart the timer if we have pending data */
 		struct sctp_tmit_chunk *chk;
 
 		chk = TAILQ_FIRST(&asoc->sent_queue);
 		sctp_timer_start(SCTP_TIMER_TYPE_SEND, stcb->sctp_ep, stcb, chk->whoTo);
 	}
 }
 
 static void
 sctp_handle_ecn_echo(struct sctp_ecne_chunk *cp,
     struct sctp_tcb *stcb)
 {
 	struct sctp_nets *net;
 	struct sctp_tmit_chunk *lchk;
 	struct sctp_ecne_chunk bkup;
 	uint8_t override_bit;
 	uint32_t tsn, window_data_tsn;
 	int len;
 	unsigned int pkt_cnt;
 
 	len = ntohs(cp->ch.chunk_length);
 	if ((len != sizeof(struct sctp_ecne_chunk)) &&
 	    (len != sizeof(struct old_sctp_ecne_chunk))) {
 		return;
 	}
 	if (len == sizeof(struct old_sctp_ecne_chunk)) {
 		/* Its the old format */
 		memcpy(&bkup, cp, sizeof(struct old_sctp_ecne_chunk));
 		bkup.num_pkts_since_cwr = htonl(1);
 		cp = &bkup;
 	}
 	SCTP_STAT_INCR(sctps_recvecne);
 	tsn = ntohl(cp->tsn);
 	pkt_cnt = ntohl(cp->num_pkts_since_cwr);
 	lchk = TAILQ_LAST(&stcb->asoc.send_queue, sctpchunk_listhead);
 	if (lchk == NULL) {
 		window_data_tsn = stcb->asoc.sending_seq - 1;
 	} else {
 		window_data_tsn = lchk->rec.data.tsn;
 	}
 
 	/* Find where it was sent to if possible. */
 	net = NULL;
 	TAILQ_FOREACH(lchk, &stcb->asoc.sent_queue, sctp_next) {
 		if (lchk->rec.data.tsn == tsn) {
 			net = lchk->whoTo;
 			net->ecn_prev_cwnd = lchk->rec.data.cwnd_at_send;
 			break;
 		}
 		if (SCTP_TSN_GT(lchk->rec.data.tsn, tsn)) {
 			break;
 		}
 	}
 	if (net == NULL) {
 		/*
 		 * What to do. A previous send of a CWR was possibly lost.
 		 * See how old it is, we may have it marked on the actual
 		 * net.
 		 */
 		TAILQ_FOREACH(net, &stcb->asoc.nets, sctp_next) {
 			if (tsn == net->last_cwr_tsn) {
 				/* Found him, send it off */
 				break;
 			}
 		}
 		if (net == NULL) {
 			/*
 			 * If we reach here, we need to send a special CWR
 			 * that says hey, we did this a long time ago and
 			 * you lost the response.
 			 */
 			net = TAILQ_FIRST(&stcb->asoc.nets);
 			if (net == NULL) {
 				/* TSNH */
 				return;
 			}
 			override_bit = SCTP_CWR_REDUCE_OVERRIDE;
 		} else {
 			override_bit = 0;
 		}
 	} else {
 		override_bit = 0;
 	}
 	if (SCTP_TSN_GT(tsn, net->cwr_window_tsn) &&
 	    ((override_bit & SCTP_CWR_REDUCE_OVERRIDE) == 0)) {
 		/*
 		 * JRS - Use the congestion control given in the pluggable
 		 * CC module
 		 */
 		stcb->asoc.cc_functions.sctp_cwnd_update_after_ecn_echo(stcb, net, 0, pkt_cnt);
 		/*
 		 * We reduce once every RTT. So we will only lower cwnd at
 		 * the next sending seq i.e. the window_data_tsn
 		 */
 		net->cwr_window_tsn = window_data_tsn;
 		net->ecn_ce_pkt_cnt += pkt_cnt;
 		net->lost_cnt = pkt_cnt;
 		net->last_cwr_tsn = tsn;
 	} else {
 		override_bit |= SCTP_CWR_IN_SAME_WINDOW;
 		if (SCTP_TSN_GT(tsn, net->last_cwr_tsn) &&
 		    ((override_bit & SCTP_CWR_REDUCE_OVERRIDE) == 0)) {
 			/*
 			 * Another loss in the same window update how many
 			 * marks/packets lost we have had.
 			 */
 			int cnt = 1;
 
 			if (pkt_cnt > net->lost_cnt) {
 				/* Should be the case */
 				cnt = (pkt_cnt - net->lost_cnt);
 				net->ecn_ce_pkt_cnt += cnt;
 			}
 			net->lost_cnt = pkt_cnt;
 			net->last_cwr_tsn = tsn;
 			/*
 			 * Most CC functions will ignore this call, since we
 			 * are in-window yet of the initial CE the peer saw.
 			 */
 			stcb->asoc.cc_functions.sctp_cwnd_update_after_ecn_echo(stcb, net, 1, cnt);
 		}
 	}
 	/*
 	 * We always send a CWR this way if our previous one was lost our
 	 * peer will get an update, or if it is not time again to reduce we
 	 * still get the cwr to the peer. Note we set the override when we
 	 * could not find the TSN on the chunk or the destination network.
 	 */
 	sctp_send_cwr(stcb, net, net->last_cwr_tsn, override_bit);
 }
 
 static void
 sctp_handle_ecn_cwr(struct sctp_cwr_chunk *cp, struct sctp_tcb *stcb, struct sctp_nets *net)
 {
 	/*
 	 * Here we get a CWR from the peer. We must look in the outqueue and
 	 * make sure that we have a covered ECNE in the control chunk part.
 	 * If so remove it.
 	 */
 	struct sctp_tmit_chunk *chk;
 	struct sctp_ecne_chunk *ecne;
 	int override;
 	uint32_t cwr_tsn;
 
 	cwr_tsn = ntohl(cp->tsn);
 	override = cp->ch.chunk_flags & SCTP_CWR_REDUCE_OVERRIDE;
 	TAILQ_FOREACH(chk, &stcb->asoc.control_send_queue, sctp_next) {
 		if (chk->rec.chunk_id.id != SCTP_ECN_ECHO) {
 			continue;
 		}
 		if ((override == 0) && (chk->whoTo != net)) {
 			/* Must be from the right src unless override is set */
 			continue;
 		}
 		ecne = mtod(chk->data, struct sctp_ecne_chunk *);
 		if (SCTP_TSN_GE(cwr_tsn, ntohl(ecne->tsn))) {
 			/* this covers this ECNE, we can remove it */
 			stcb->asoc.ecn_echo_cnt_onq--;
 			TAILQ_REMOVE(&stcb->asoc.control_send_queue, chk,
 			    sctp_next);
 			sctp_m_freem(chk->data);
 			chk->data = NULL;
 			stcb->asoc.ctrl_queue_cnt--;
 			sctp_free_a_chunk(stcb, chk, SCTP_SO_NOT_LOCKED);
 			if (override == 0) {
 				break;
 			}
 		}
 	}
 }
 
 static void
 sctp_handle_shutdown_complete(struct sctp_shutdown_complete_chunk *cp SCTP_UNUSED,
     struct sctp_tcb *stcb, struct sctp_nets *net)
 {
 	struct sctp_association *asoc;
 #if defined(__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
 	struct socket *so;
 #endif
 
 	SCTPDBG(SCTP_DEBUG_INPUT2,
 	    "sctp_handle_shutdown_complete: handling SHUTDOWN-COMPLETE\n");
 	if (stcb == NULL)
 		return;
 
 	asoc = &stcb->asoc;
 	/* process according to association state */
 	if (SCTP_GET_STATE(asoc) != SCTP_STATE_SHUTDOWN_ACK_SENT) {
 		/* unexpected SHUTDOWN-COMPLETE... so ignore... */
 		SCTPDBG(SCTP_DEBUG_INPUT2,
 		    "sctp_handle_shutdown_complete: not in SCTP_STATE_SHUTDOWN_ACK_SENT --- ignore\n");
 		SCTP_TCB_UNLOCK(stcb);
 		return;
 	}
 	/* notify upper layer protocol */
 	if (stcb->sctp_socket) {
 		sctp_ulp_notify(SCTP_NOTIFY_ASSOC_DOWN, stcb, 0, NULL, SCTP_SO_NOT_LOCKED);
 	}
 #ifdef INVARIANTS
 	if (!TAILQ_EMPTY(&asoc->send_queue) ||
 	    !TAILQ_EMPTY(&asoc->sent_queue) ||
 	    sctp_is_there_unsent_data(stcb, SCTP_SO_NOT_LOCKED)) {
 		panic("Queues are not empty when handling SHUTDOWN-COMPLETE");
 	}
 #endif
 	/* stop the timer */
 	sctp_timer_stop(SCTP_TIMER_TYPE_SHUTDOWNACK, stcb->sctp_ep, stcb, net,
 	    SCTP_FROM_SCTP_INPUT + SCTP_LOC_24);
 	SCTP_STAT_INCR_COUNTER32(sctps_shutdown);
 	/* free the TCB */
 	SCTPDBG(SCTP_DEBUG_INPUT2,
 	    "sctp_handle_shutdown_complete: calls free-asoc\n");
 #if defined(__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
 	so = SCTP_INP_SO(stcb->sctp_ep);
 	atomic_add_int(&stcb->asoc.refcnt, 1);
 	SCTP_TCB_UNLOCK(stcb);
 	SCTP_SOCKET_LOCK(so, 1);
 	SCTP_TCB_LOCK(stcb);
 	atomic_subtract_int(&stcb->asoc.refcnt, 1);
 #endif
 	(void)sctp_free_assoc(stcb->sctp_ep, stcb, SCTP_NORMAL_PROC,
 	    SCTP_FROM_SCTP_INPUT + SCTP_LOC_25);
 #if defined(__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
 	SCTP_SOCKET_UNLOCK(so, 1);
 #endif
 	return;
 }
 
 static int
 process_chunk_drop(struct sctp_tcb *stcb, struct sctp_chunk_desc *desc,
     struct sctp_nets *net, uint8_t flg)
 {
 	switch (desc->chunk_type) {
 	case SCTP_DATA:
 		/* find the tsn to resend (possibly */
 		{
 			uint32_t tsn;
 			struct sctp_tmit_chunk *tp1;
 
 			tsn = ntohl(desc->tsn_ifany);
 			TAILQ_FOREACH(tp1, &stcb->asoc.sent_queue, sctp_next) {
 				if (tp1->rec.data.tsn == tsn) {
 					/* found it */
 					break;
 				}
 				if (SCTP_TSN_GT(tp1->rec.data.tsn, tsn)) {
 					/* not found */
 					tp1 = NULL;
 					break;
 				}
 			}
 			if (tp1 == NULL) {
 				/*
 				 * Do it the other way , aka without paying
 				 * attention to queue seq order.
 				 */
 				SCTP_STAT_INCR(sctps_pdrpdnfnd);
 				TAILQ_FOREACH(tp1, &stcb->asoc.sent_queue, sctp_next) {
 					if (tp1->rec.data.tsn == tsn) {
 						/* found it */
 						break;
 					}
 				}
 			}
 			if (tp1 == NULL) {
 				SCTP_STAT_INCR(sctps_pdrptsnnf);
 			}
 			if ((tp1) && (tp1->sent < SCTP_DATAGRAM_ACKED)) {
 				uint8_t *ddp;
 
 				if (((flg & SCTP_BADCRC) == 0) &&
 				    ((flg & SCTP_FROM_MIDDLE_BOX) == 0)) {
 					return (0);
 				}
 				if ((stcb->asoc.peers_rwnd == 0) &&
 				    ((flg & SCTP_FROM_MIDDLE_BOX) == 0)) {
 					SCTP_STAT_INCR(sctps_pdrpdiwnp);
 					return (0);
 				}
 				if (stcb->asoc.peers_rwnd == 0 &&
 				    (flg & SCTP_FROM_MIDDLE_BOX)) {
 					SCTP_STAT_INCR(sctps_pdrpdizrw);
 					return (0);
 				}
 				ddp = (uint8_t *)(mtod(tp1->data, caddr_t)+
 				    sizeof(struct sctp_data_chunk));
 				{
 					unsigned int iii;
 
 					for (iii = 0; iii < sizeof(desc->data_bytes);
 					    iii++) {
 						if (ddp[iii] != desc->data_bytes[iii]) {
 							SCTP_STAT_INCR(sctps_pdrpbadd);
 							return (-1);
 						}
 					}
 				}
 
 				if (tp1->do_rtt) {
 					/*
 					 * this guy had a RTO calculation
 					 * pending on it, cancel it
 					 */
 					if (tp1->whoTo->rto_needed == 0) {
 						tp1->whoTo->rto_needed = 1;
 					}
 					tp1->do_rtt = 0;
 				}
 				SCTP_STAT_INCR(sctps_pdrpmark);
 				if (tp1->sent != SCTP_DATAGRAM_RESEND)
 					sctp_ucount_incr(stcb->asoc.sent_queue_retran_cnt);
 				/*
 				 * mark it as if we were doing a FR, since
 				 * we will be getting gap ack reports behind
 				 * the info from the router.
 				 */
 				tp1->rec.data.doing_fast_retransmit = 1;
 				/*
 				 * mark the tsn with what sequences can
 				 * cause a new FR.
 				 */
 				if (TAILQ_EMPTY(&stcb->asoc.send_queue)) {
 					tp1->rec.data.fast_retran_tsn = stcb->asoc.sending_seq;
 				} else {
 					tp1->rec.data.fast_retran_tsn = (TAILQ_FIRST(&stcb->asoc.send_queue))->rec.data.tsn;
 				}
 
 				/* restart the timer */
 				sctp_timer_stop(SCTP_TIMER_TYPE_SEND, stcb->sctp_ep,
 				    stcb, tp1->whoTo,
 				    SCTP_FROM_SCTP_INPUT + SCTP_LOC_26);
 				sctp_timer_start(SCTP_TIMER_TYPE_SEND, stcb->sctp_ep,
 				    stcb, tp1->whoTo);
 
 				/* fix counts and things */
 				if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_FLIGHT_LOGGING_ENABLE) {
 					sctp_misc_ints(SCTP_FLIGHT_LOG_DOWN_PDRP,
 					    tp1->whoTo->flight_size,
 					    tp1->book_size,
 					    (uint32_t)(uintptr_t)stcb,
 					    tp1->rec.data.tsn);
 				}
 				if (tp1->sent < SCTP_DATAGRAM_RESEND) {
 					sctp_flight_size_decrease(tp1);
 					sctp_total_flight_decrease(stcb, tp1);
 				}
 				tp1->sent = SCTP_DATAGRAM_RESEND;
 			} {
 				/* audit code */
 				unsigned int audit;
 
 				audit = 0;
 				TAILQ_FOREACH(tp1, &stcb->asoc.sent_queue, sctp_next) {
 					if (tp1->sent == SCTP_DATAGRAM_RESEND)
 						audit++;
 				}
 				TAILQ_FOREACH(tp1, &stcb->asoc.control_send_queue,
 				    sctp_next) {
 					if (tp1->sent == SCTP_DATAGRAM_RESEND)
 						audit++;
 				}
 				if (audit != stcb->asoc.sent_queue_retran_cnt) {
 					SCTP_PRINTF("**Local Audit finds cnt:%d asoc cnt:%d\n",
 					    audit, stcb->asoc.sent_queue_retran_cnt);
 #ifndef SCTP_AUDITING_ENABLED
 					stcb->asoc.sent_queue_retran_cnt = audit;
 #endif
 				}
 			}
 		}
 		break;
 	case SCTP_ASCONF:
 		{
 			struct sctp_tmit_chunk *asconf;
 
 			TAILQ_FOREACH(asconf, &stcb->asoc.control_send_queue,
 			    sctp_next) {
 				if (asconf->rec.chunk_id.id == SCTP_ASCONF) {
 					break;
 				}
 			}
 			if (asconf) {
 				if (asconf->sent != SCTP_DATAGRAM_RESEND)
 					sctp_ucount_incr(stcb->asoc.sent_queue_retran_cnt);
 				asconf->sent = SCTP_DATAGRAM_RESEND;
 				asconf->snd_count--;
 			}
 		}
 		break;
 	case SCTP_INITIATION:
 		/* resend the INIT */
 		stcb->asoc.dropped_special_cnt++;
 		if (stcb->asoc.dropped_special_cnt < SCTP_RETRY_DROPPED_THRESH) {
 			/*
 			 * If we can get it in, in a few attempts we do
 			 * this, otherwise we let the timer fire.
 			 */
 			sctp_timer_stop(SCTP_TIMER_TYPE_INIT, stcb->sctp_ep,
 			    stcb, net,
 			    SCTP_FROM_SCTP_INPUT + SCTP_LOC_27);
 			sctp_send_initiate(stcb->sctp_ep, stcb, SCTP_SO_NOT_LOCKED);
 		}
 		break;
 	case SCTP_SELECTIVE_ACK:
 	case SCTP_NR_SELECTIVE_ACK:
 		/* resend the sack */
 		sctp_send_sack(stcb, SCTP_SO_NOT_LOCKED);
 		break;
 	case SCTP_HEARTBEAT_REQUEST:
 		/* resend a demand HB */
 		if ((stcb->asoc.overall_error_count + 3) < stcb->asoc.max_send_times) {
 			/*
 			 * Only retransmit if we KNOW we wont destroy the
 			 * tcb
 			 */
 			sctp_send_hb(stcb, net, SCTP_SO_NOT_LOCKED);
 		}
 		break;
 	case SCTP_SHUTDOWN:
 		sctp_send_shutdown(stcb, net);
 		break;
 	case SCTP_SHUTDOWN_ACK:
 		sctp_send_shutdown_ack(stcb, net);
 		break;
 	case SCTP_COOKIE_ECHO:
 		{
 			struct sctp_tmit_chunk *cookie;
 
 			cookie = NULL;
 			TAILQ_FOREACH(cookie, &stcb->asoc.control_send_queue,
 			    sctp_next) {
 				if (cookie->rec.chunk_id.id == SCTP_COOKIE_ECHO) {
 					break;
 				}
 			}
 			if (cookie) {
 				if (cookie->sent != SCTP_DATAGRAM_RESEND)
 					sctp_ucount_incr(stcb->asoc.sent_queue_retran_cnt);
 				cookie->sent = SCTP_DATAGRAM_RESEND;
 				sctp_stop_all_cookie_timers(stcb);
 			}
 		}
 		break;
 	case SCTP_COOKIE_ACK:
 		sctp_send_cookie_ack(stcb);
 		break;
 	case SCTP_ASCONF_ACK:
 		/* resend last asconf ack */
 		sctp_send_asconf_ack(stcb);
 		break;
 	case SCTP_IFORWARD_CUM_TSN:
 	case SCTP_FORWARD_CUM_TSN:
 		send_forward_tsn(stcb, &stcb->asoc);
 		break;
 		/* can't do anything with these */
 	case SCTP_PACKET_DROPPED:
 	case SCTP_INITIATION_ACK:	/* this should not happen */
 	case SCTP_HEARTBEAT_ACK:
 	case SCTP_ABORT_ASSOCIATION:
 	case SCTP_OPERATION_ERROR:
 	case SCTP_SHUTDOWN_COMPLETE:
 	case SCTP_ECN_ECHO:
 	case SCTP_ECN_CWR:
 	default:
 		break;
 	}
 	return (0);
 }
 
 void
 sctp_reset_in_stream(struct sctp_tcb *stcb, uint32_t number_entries, uint16_t *list)
 {
 	uint32_t i;
 	uint16_t temp;
 
 	/*
 	 * We set things to 0xffffffff since this is the last delivered
 	 * sequence and we will be sending in 0 after the reset.
 	 */
 
 	if (number_entries) {
 		for (i = 0; i < number_entries; i++) {
 			temp = ntohs(list[i]);
 			if (temp >= stcb->asoc.streamincnt) {
 				continue;
 			}
 			stcb->asoc.strmin[temp].last_mid_delivered = 0xffffffff;
 		}
 	} else {
 		list = NULL;
 		for (i = 0; i < stcb->asoc.streamincnt; i++) {
 			stcb->asoc.strmin[i].last_mid_delivered = 0xffffffff;
 		}
 	}
 	sctp_ulp_notify(SCTP_NOTIFY_STR_RESET_RECV, stcb, number_entries, (void *)list, SCTP_SO_NOT_LOCKED);
 }
 
 static void
 sctp_reset_out_streams(struct sctp_tcb *stcb, uint32_t number_entries, uint16_t *list)
 {
 	uint32_t i;
 	uint16_t temp;
 
 	if (number_entries > 0) {
 		for (i = 0; i < number_entries; i++) {
 			temp = ntohs(list[i]);
 			if (temp >= stcb->asoc.streamoutcnt) {
 				/* no such stream */
 				continue;
 			}
 			stcb->asoc.strmout[temp].next_mid_ordered = 0;
 			stcb->asoc.strmout[temp].next_mid_unordered = 0;
 		}
 	} else {
 		for (i = 0; i < stcb->asoc.streamoutcnt; i++) {
 			stcb->asoc.strmout[i].next_mid_ordered = 0;
 			stcb->asoc.strmout[i].next_mid_unordered = 0;
 		}
 	}
 	sctp_ulp_notify(SCTP_NOTIFY_STR_RESET_SEND, stcb, number_entries, (void *)list, SCTP_SO_NOT_LOCKED);
 }
 
 static void
 sctp_reset_clear_pending(struct sctp_tcb *stcb, uint32_t number_entries, uint16_t *list)
 {
 	uint32_t i;
 	uint16_t temp;
 
 	if (number_entries > 0) {
 		for (i = 0; i < number_entries; i++) {
 			temp = ntohs(list[i]);
 			if (temp >= stcb->asoc.streamoutcnt) {
 				/* no such stream */
 				continue;
 			}
 			stcb->asoc.strmout[temp].state = SCTP_STREAM_OPEN;
 		}
 	} else {
 		for (i = 0; i < stcb->asoc.streamoutcnt; i++) {
 			stcb->asoc.strmout[i].state = SCTP_STREAM_OPEN;
 		}
 	}
 }
 
 
 struct sctp_stream_reset_request *
 sctp_find_stream_reset(struct sctp_tcb *stcb, uint32_t seq, struct sctp_tmit_chunk **bchk)
 {
 	struct sctp_association *asoc;
 	struct sctp_chunkhdr *ch;
 	struct sctp_stream_reset_request *r;
 	struct sctp_tmit_chunk *chk;
 	int len, clen;
 
 	asoc = &stcb->asoc;
 	if (TAILQ_EMPTY(&stcb->asoc.control_send_queue)) {
 		asoc->stream_reset_outstanding = 0;
 		return (NULL);
 	}
 	if (stcb->asoc.str_reset == NULL) {
 		asoc->stream_reset_outstanding = 0;
 		return (NULL);
 	}
 	chk = stcb->asoc.str_reset;
 	if (chk->data == NULL) {
 		return (NULL);
 	}
 	if (bchk) {
 		/* he wants a copy of the chk pointer */
 		*bchk = chk;
 	}
 	clen = chk->send_size;
 	ch = mtod(chk->data, struct sctp_chunkhdr *);
 	r = (struct sctp_stream_reset_request *)(ch + 1);
 	if (ntohl(r->request_seq) == seq) {
 		/* found it */
 		return (r);
 	}
 	len = SCTP_SIZE32(ntohs(r->ph.param_length));
 	if (clen > (len + (int)sizeof(struct sctp_chunkhdr))) {
 		/* move to the next one, there can only be a max of two */
 		r = (struct sctp_stream_reset_request *)((caddr_t)r + len);
 		if (ntohl(r->request_seq) == seq) {
 			return (r);
 		}
 	}
 	/* that seq is not here */
 	return (NULL);
 }
 
 static void
 sctp_clean_up_stream_reset(struct sctp_tcb *stcb)
 {
 	struct sctp_association *asoc;
 	struct sctp_tmit_chunk *chk = stcb->asoc.str_reset;
 
 	if (stcb->asoc.str_reset == NULL) {
 		return;
 	}
 	asoc = &stcb->asoc;
 
 	sctp_timer_stop(SCTP_TIMER_TYPE_STRRESET, stcb->sctp_ep, stcb,
 	    chk->whoTo, SCTP_FROM_SCTP_INPUT + SCTP_LOC_28);
 	TAILQ_REMOVE(&asoc->control_send_queue,
 	    chk,
 	    sctp_next);
 	if (chk->data) {
 		sctp_m_freem(chk->data);
 		chk->data = NULL;
 	}
 	asoc->ctrl_queue_cnt--;
 	sctp_free_a_chunk(stcb, chk, SCTP_SO_NOT_LOCKED);
 	/* sa_ignore NO_NULL_CHK */
 	stcb->asoc.str_reset = NULL;
 }
 
 
 static int
 sctp_handle_stream_reset_response(struct sctp_tcb *stcb,
     uint32_t seq, uint32_t action,
     struct sctp_stream_reset_response *respin)
 {
 	uint16_t type;
 	int lparm_len;
 	struct sctp_association *asoc = &stcb->asoc;
 	struct sctp_tmit_chunk *chk;
 	struct sctp_stream_reset_request *req_param;
 	struct sctp_stream_reset_out_request *req_out_param;
 	struct sctp_stream_reset_in_request *req_in_param;
 	uint32_t number_entries;
 
 	if (asoc->stream_reset_outstanding == 0) {
 		/* duplicate */
 		return (0);
 	}
 	if (seq == stcb->asoc.str_reset_seq_out) {
 		req_param = sctp_find_stream_reset(stcb, seq, &chk);
 		if (req_param != NULL) {
 			stcb->asoc.str_reset_seq_out++;
 			type = ntohs(req_param->ph.param_type);
 			lparm_len = ntohs(req_param->ph.param_length);
 			if (type == SCTP_STR_RESET_OUT_REQUEST) {
 				int no_clear = 0;
 
 				req_out_param = (struct sctp_stream_reset_out_request *)req_param;
 				number_entries = (lparm_len - sizeof(struct sctp_stream_reset_out_request)) / sizeof(uint16_t);
 				asoc->stream_reset_out_is_outstanding = 0;
 				if (asoc->stream_reset_outstanding)
 					asoc->stream_reset_outstanding--;
 				if (action == SCTP_STREAM_RESET_RESULT_PERFORMED) {
 					/* do it */
 					sctp_reset_out_streams(stcb, number_entries, req_out_param->list_of_streams);
 				} else if (action == SCTP_STREAM_RESET_RESULT_DENIED) {
 					sctp_ulp_notify(SCTP_NOTIFY_STR_RESET_DENIED_OUT, stcb, number_entries, req_out_param->list_of_streams, SCTP_SO_NOT_LOCKED);
 				} else if (action == SCTP_STREAM_RESET_RESULT_IN_PROGRESS) {
 					/*
 					 * Set it up so we don't stop
 					 * retransmitting
 					 */
 					asoc->stream_reset_outstanding++;
 					stcb->asoc.str_reset_seq_out--;
 					asoc->stream_reset_out_is_outstanding = 1;
 					no_clear = 1;
 				} else {
 					sctp_ulp_notify(SCTP_NOTIFY_STR_RESET_FAILED_OUT, stcb, number_entries, req_out_param->list_of_streams, SCTP_SO_NOT_LOCKED);
 				}
 				if (no_clear == 0) {
 					sctp_reset_clear_pending(stcb, number_entries, req_out_param->list_of_streams);
 				}
 			} else if (type == SCTP_STR_RESET_IN_REQUEST) {
 				req_in_param = (struct sctp_stream_reset_in_request *)req_param;
 				number_entries = (lparm_len - sizeof(struct sctp_stream_reset_in_request)) / sizeof(uint16_t);
 				if (asoc->stream_reset_outstanding)
 					asoc->stream_reset_outstanding--;
 				if (action == SCTP_STREAM_RESET_RESULT_DENIED) {
 					sctp_ulp_notify(SCTP_NOTIFY_STR_RESET_DENIED_IN, stcb,
 					    number_entries, req_in_param->list_of_streams, SCTP_SO_NOT_LOCKED);
 				} else if (action != SCTP_STREAM_RESET_RESULT_PERFORMED) {
 					sctp_ulp_notify(SCTP_NOTIFY_STR_RESET_FAILED_IN, stcb,
 					    number_entries, req_in_param->list_of_streams, SCTP_SO_NOT_LOCKED);
 				}
 			} else if (type == SCTP_STR_RESET_ADD_OUT_STREAMS) {
 				/* Ok we now may have more streams */
 				int num_stream;
 
 				num_stream = stcb->asoc.strm_pending_add_size;
 				if (num_stream > (stcb->asoc.strm_realoutsize - stcb->asoc.streamoutcnt)) {
 					/* TSNH */
 					num_stream = stcb->asoc.strm_realoutsize - stcb->asoc.streamoutcnt;
 				}
 				stcb->asoc.strm_pending_add_size = 0;
 				if (asoc->stream_reset_outstanding)
 					asoc->stream_reset_outstanding--;
 				if (action == SCTP_STREAM_RESET_RESULT_PERFORMED) {
 					/* Put the new streams into effect */
 					int i;
 
 					for (i = asoc->streamoutcnt; i < (asoc->streamoutcnt + num_stream); i++) {
 						asoc->strmout[i].state = SCTP_STREAM_OPEN;
 					}
 					asoc->streamoutcnt += num_stream;
 					sctp_notify_stream_reset_add(stcb, stcb->asoc.streamincnt, stcb->asoc.streamoutcnt, 0);
 				} else if (action == SCTP_STREAM_RESET_RESULT_DENIED) {
 					sctp_notify_stream_reset_add(stcb, stcb->asoc.streamincnt, stcb->asoc.streamoutcnt,
 					    SCTP_STREAM_CHANGE_DENIED);
 				} else {
 					sctp_notify_stream_reset_add(stcb, stcb->asoc.streamincnt, stcb->asoc.streamoutcnt,
 					    SCTP_STREAM_CHANGE_FAILED);
 				}
 			} else if (type == SCTP_STR_RESET_ADD_IN_STREAMS) {
 				if (asoc->stream_reset_outstanding)
 					asoc->stream_reset_outstanding--;
 				if (action == SCTP_STREAM_RESET_RESULT_DENIED) {
 					sctp_notify_stream_reset_add(stcb, stcb->asoc.streamincnt, stcb->asoc.streamoutcnt,
 					    SCTP_STREAM_CHANGE_DENIED);
 				} else if (action != SCTP_STREAM_RESET_RESULT_PERFORMED) {
 					sctp_notify_stream_reset_add(stcb, stcb->asoc.streamincnt, stcb->asoc.streamoutcnt,
 					    SCTP_STREAM_CHANGE_FAILED);
 				}
 			} else if (type == SCTP_STR_RESET_TSN_REQUEST) {
 				/**
 				 * a) Adopt the new in tsn.
 				 * b) reset the map
 				 * c) Adopt the new out-tsn
 				 */
 				struct sctp_stream_reset_response_tsn *resp;
 				struct sctp_forward_tsn_chunk fwdtsn;
 				int abort_flag = 0;
 
 				if (respin == NULL) {
 					/* huh ? */
 					return (0);
 				}
 				if (ntohs(respin->ph.param_length) < sizeof(struct sctp_stream_reset_response_tsn)) {
 					return (0);
 				}
 				if (action == SCTP_STREAM_RESET_RESULT_PERFORMED) {
 					resp = (struct sctp_stream_reset_response_tsn *)respin;
 					asoc->stream_reset_outstanding--;
 					fwdtsn.ch.chunk_length = htons(sizeof(struct sctp_forward_tsn_chunk));
 					fwdtsn.ch.chunk_type = SCTP_FORWARD_CUM_TSN;
 					fwdtsn.new_cumulative_tsn = htonl(ntohl(resp->senders_next_tsn) - 1);
 					sctp_handle_forward_tsn(stcb, &fwdtsn, &abort_flag, NULL, 0);
 					if (abort_flag) {
 						return (1);
 					}
 					stcb->asoc.highest_tsn_inside_map = (ntohl(resp->senders_next_tsn) - 1);
 					if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_MAP_LOGGING_ENABLE) {
 						sctp_log_map(0, 7, asoc->highest_tsn_inside_map, SCTP_MAP_SLIDE_RESULT);
 					}
 					stcb->asoc.tsn_last_delivered = stcb->asoc.cumulative_tsn = stcb->asoc.highest_tsn_inside_map;
 					stcb->asoc.mapping_array_base_tsn = ntohl(resp->senders_next_tsn);
 					memset(stcb->asoc.mapping_array, 0, stcb->asoc.mapping_array_size);
 
 					stcb->asoc.highest_tsn_inside_nr_map = stcb->asoc.highest_tsn_inside_map;
 					memset(stcb->asoc.nr_mapping_array, 0, stcb->asoc.mapping_array_size);
 
 					stcb->asoc.sending_seq = ntohl(resp->receivers_next_tsn);
 					stcb->asoc.last_acked_seq = stcb->asoc.cumulative_tsn;
 
 					sctp_reset_out_streams(stcb, 0, (uint16_t *)NULL);
 					sctp_reset_in_stream(stcb, 0, (uint16_t *)NULL);
 					sctp_notify_stream_reset_tsn(stcb, stcb->asoc.sending_seq, (stcb->asoc.mapping_array_base_tsn + 1), 0);
 				} else if (action == SCTP_STREAM_RESET_RESULT_DENIED) {
 					sctp_notify_stream_reset_tsn(stcb, stcb->asoc.sending_seq, (stcb->asoc.mapping_array_base_tsn + 1),
 					    SCTP_ASSOC_RESET_DENIED);
 				} else {
 					sctp_notify_stream_reset_tsn(stcb, stcb->asoc.sending_seq, (stcb->asoc.mapping_array_base_tsn + 1),
 					    SCTP_ASSOC_RESET_FAILED);
 				}
 			}
 			/* get rid of the request and get the request flags */
 			if (asoc->stream_reset_outstanding == 0) {
 				sctp_clean_up_stream_reset(stcb);
 			}
 		}
 	}
 	if (asoc->stream_reset_outstanding == 0) {
 		sctp_send_stream_reset_out_if_possible(stcb, SCTP_SO_NOT_LOCKED);
 	}
 	return (0);
 }
 
 static void
 sctp_handle_str_reset_request_in(struct sctp_tcb *stcb,
     struct sctp_tmit_chunk *chk,
     struct sctp_stream_reset_in_request *req, int trunc)
 {
 	uint32_t seq;
 	int len, i;
 	int number_entries;
 	uint16_t temp;
 
 	/*
 	 * peer wants me to send a str-reset to him for my outgoing seq's if
 	 * seq_in is right.
 	 */
 	struct sctp_association *asoc = &stcb->asoc;
 
 	seq = ntohl(req->request_seq);
 	if (asoc->str_reset_seq_in == seq) {
 		asoc->last_reset_action[1] = asoc->last_reset_action[0];
 		if (!(asoc->local_strreset_support & SCTP_ENABLE_RESET_STREAM_REQ)) {
 			asoc->last_reset_action[0] = SCTP_STREAM_RESET_RESULT_DENIED;
 		} else if (trunc) {
 			/* Can't do it, since they exceeded our buffer size  */
 			asoc->last_reset_action[0] = SCTP_STREAM_RESET_RESULT_DENIED;
 		} else if (stcb->asoc.stream_reset_out_is_outstanding == 0) {
 			len = ntohs(req->ph.param_length);
 			number_entries = ((len - sizeof(struct sctp_stream_reset_in_request)) / sizeof(uint16_t));
 			if (number_entries) {
 				for (i = 0; i < number_entries; i++) {
 					temp = ntohs(req->list_of_streams[i]);
 					if (temp >= stcb->asoc.streamoutcnt) {
 						asoc->last_reset_action[0] = SCTP_STREAM_RESET_RESULT_DENIED;
 						goto bad_boy;
 					}
 					req->list_of_streams[i] = temp;
 				}
 				for (i = 0; i < number_entries; i++) {
 					if (stcb->asoc.strmout[req->list_of_streams[i]].state == SCTP_STREAM_OPEN) {
 						stcb->asoc.strmout[req->list_of_streams[i]].state = SCTP_STREAM_RESET_PENDING;
 					}
 				}
 			} else {
 				/* Its all */
 				for (i = 0; i < stcb->asoc.streamoutcnt; i++) {
 					if (stcb->asoc.strmout[i].state == SCTP_STREAM_OPEN)
 						stcb->asoc.strmout[i].state = SCTP_STREAM_RESET_PENDING;
 				}
 			}
 			asoc->last_reset_action[0] = SCTP_STREAM_RESET_RESULT_PERFORMED;
 		} else {
 			/* Can't do it, since we have sent one out */
 			asoc->last_reset_action[0] = SCTP_STREAM_RESET_RESULT_ERR_IN_PROGRESS;
 		}
 bad_boy:
 		sctp_add_stream_reset_result(chk, seq, asoc->last_reset_action[0]);
 		asoc->str_reset_seq_in++;
 	} else if (asoc->str_reset_seq_in - 1 == seq) {
 		sctp_add_stream_reset_result(chk, seq, asoc->last_reset_action[0]);
 	} else if (asoc->str_reset_seq_in - 2 == seq) {
 		sctp_add_stream_reset_result(chk, seq, asoc->last_reset_action[1]);
 	} else {
 		sctp_add_stream_reset_result(chk, seq, SCTP_STREAM_RESET_RESULT_ERR_BAD_SEQNO);
 	}
 	sctp_send_stream_reset_out_if_possible(stcb, SCTP_SO_NOT_LOCKED);
 }
 
 static int
 sctp_handle_str_reset_request_tsn(struct sctp_tcb *stcb,
     struct sctp_tmit_chunk *chk,
     struct sctp_stream_reset_tsn_request *req)
 {
 	/* reset all in and out and update the tsn */
 	/*
 	 * A) reset my str-seq's on in and out. B) Select a receive next,
 	 * and set cum-ack to it. Also process this selected number as a
 	 * fwd-tsn as well. C) set in the response my next sending seq.
 	 */
 	struct sctp_forward_tsn_chunk fwdtsn;
 	struct sctp_association *asoc = &stcb->asoc;
 	int abort_flag = 0;
 	uint32_t seq;
 
 	seq = ntohl(req->request_seq);
 	if (asoc->str_reset_seq_in == seq) {
 		asoc->last_reset_action[1] = stcb->asoc.last_reset_action[0];
 		if (!(asoc->local_strreset_support & SCTP_ENABLE_CHANGE_ASSOC_REQ)) {
 			asoc->last_reset_action[0] = SCTP_STREAM_RESET_RESULT_DENIED;
 		} else {
 			fwdtsn.ch.chunk_length = htons(sizeof(struct sctp_forward_tsn_chunk));
 			fwdtsn.ch.chunk_type = SCTP_FORWARD_CUM_TSN;
 			fwdtsn.ch.chunk_flags = 0;
 			fwdtsn.new_cumulative_tsn = htonl(stcb->asoc.highest_tsn_inside_map + 1);
 			sctp_handle_forward_tsn(stcb, &fwdtsn, &abort_flag, NULL, 0);
 			if (abort_flag) {
 				return (1);
 			}
 			asoc->highest_tsn_inside_map += SCTP_STREAM_RESET_TSN_DELTA;
 			if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_MAP_LOGGING_ENABLE) {
 				sctp_log_map(0, 10, asoc->highest_tsn_inside_map, SCTP_MAP_SLIDE_RESULT);
 			}
 			asoc->tsn_last_delivered = asoc->cumulative_tsn = asoc->highest_tsn_inside_map;
 			asoc->mapping_array_base_tsn = asoc->highest_tsn_inside_map + 1;
 			memset(asoc->mapping_array, 0, asoc->mapping_array_size);
 			asoc->highest_tsn_inside_nr_map = asoc->highest_tsn_inside_map;
 			memset(asoc->nr_mapping_array, 0, asoc->mapping_array_size);
 			atomic_add_int(&asoc->sending_seq, 1);
 			/* save off historical data for retrans */
 			asoc->last_sending_seq[1] = asoc->last_sending_seq[0];
 			asoc->last_sending_seq[0] = asoc->sending_seq;
 			asoc->last_base_tsnsent[1] = asoc->last_base_tsnsent[0];
 			asoc->last_base_tsnsent[0] = asoc->mapping_array_base_tsn;
 			sctp_reset_out_streams(stcb, 0, (uint16_t *)NULL);
 			sctp_reset_in_stream(stcb, 0, (uint16_t *)NULL);
 			asoc->last_reset_action[0] = SCTP_STREAM_RESET_RESULT_PERFORMED;
 			sctp_notify_stream_reset_tsn(stcb, asoc->sending_seq, (asoc->mapping_array_base_tsn + 1), 0);
 		}
 		sctp_add_stream_reset_result_tsn(chk, seq, asoc->last_reset_action[0],
 		    asoc->last_sending_seq[0], asoc->last_base_tsnsent[0]);
 		asoc->str_reset_seq_in++;
 	} else if (asoc->str_reset_seq_in - 1 == seq) {
 		sctp_add_stream_reset_result_tsn(chk, seq, asoc->last_reset_action[0],
 		    asoc->last_sending_seq[0], asoc->last_base_tsnsent[0]);
 	} else if (asoc->str_reset_seq_in - 2 == seq) {
 		sctp_add_stream_reset_result_tsn(chk, seq, asoc->last_reset_action[1],
 		    asoc->last_sending_seq[1], asoc->last_base_tsnsent[1]);
 	} else {
 		sctp_add_stream_reset_result(chk, seq, SCTP_STREAM_RESET_RESULT_ERR_BAD_SEQNO);
 	}
 	return (0);
 }
 
 static void
 sctp_handle_str_reset_request_out(struct sctp_tcb *stcb,
     struct sctp_tmit_chunk *chk,
     struct sctp_stream_reset_out_request *req, int trunc)
 {
 	uint32_t seq, tsn;
 	int number_entries, len;
 	struct sctp_association *asoc = &stcb->asoc;
 
 	seq = ntohl(req->request_seq);
 
 	/* now if its not a duplicate we process it */
 	if (asoc->str_reset_seq_in == seq) {
 		len = ntohs(req->ph.param_length);
 		number_entries = ((len - sizeof(struct sctp_stream_reset_out_request)) / sizeof(uint16_t));
 		/*
 		 * the sender is resetting, handle the list issue.. we must
 		 * a) verify if we can do the reset, if so no problem b) If
 		 * we can't do the reset we must copy the request. c) queue
 		 * it, and setup the data in processor to trigger it off
 		 * when needed and dequeue all the queued data.
 		 */
 		tsn = ntohl(req->send_reset_at_tsn);
 
 		/* move the reset action back one */
 		asoc->last_reset_action[1] = asoc->last_reset_action[0];
 		if (!(asoc->local_strreset_support & SCTP_ENABLE_RESET_STREAM_REQ)) {
 			asoc->last_reset_action[0] = SCTP_STREAM_RESET_RESULT_DENIED;
 		} else if (trunc) {
 			asoc->last_reset_action[0] = SCTP_STREAM_RESET_RESULT_DENIED;
 		} else if (SCTP_TSN_GE(asoc->cumulative_tsn, tsn)) {
 			/* we can do it now */
 			sctp_reset_in_stream(stcb, number_entries, req->list_of_streams);
 			asoc->last_reset_action[0] = SCTP_STREAM_RESET_RESULT_PERFORMED;
 		} else {
 			/*
 			 * we must queue it up and thus wait for the TSN's
 			 * to arrive that are at or before tsn
 			 */
 			struct sctp_stream_reset_list *liste;
 			int siz;
 
 			siz = sizeof(struct sctp_stream_reset_list) + (number_entries * sizeof(uint16_t));
 			SCTP_MALLOC(liste, struct sctp_stream_reset_list *,
 			    siz, SCTP_M_STRESET);
 			if (liste == NULL) {
 				/* gak out of memory */
 				asoc->last_reset_action[0] = SCTP_STREAM_RESET_RESULT_DENIED;
 				sctp_add_stream_reset_result(chk, seq, asoc->last_reset_action[0]);
 				return;
 			}
 			liste->seq = seq;
 			liste->tsn = tsn;
 			liste->number_entries = number_entries;
 			memcpy(&liste->list_of_streams, req->list_of_streams, number_entries * sizeof(uint16_t));
 			TAILQ_INSERT_TAIL(&asoc->resetHead, liste, next_resp);
 			asoc->last_reset_action[0] = SCTP_STREAM_RESET_RESULT_IN_PROGRESS;
 		}
 		sctp_add_stream_reset_result(chk, seq, asoc->last_reset_action[0]);
 		asoc->str_reset_seq_in++;
 	} else if ((asoc->str_reset_seq_in - 1) == seq) {
 		/*
 		 * one seq back, just echo back last action since my
 		 * response was lost.
 		 */
 		sctp_add_stream_reset_result(chk, seq, asoc->last_reset_action[0]);
 	} else if ((asoc->str_reset_seq_in - 2) == seq) {
 		/*
 		 * two seq back, just echo back last action since my
 		 * response was lost.
 		 */
 		sctp_add_stream_reset_result(chk, seq, asoc->last_reset_action[1]);
 	} else {
 		sctp_add_stream_reset_result(chk, seq, SCTP_STREAM_RESET_RESULT_ERR_BAD_SEQNO);
 	}
 }
 
 static void
 sctp_handle_str_reset_add_strm(struct sctp_tcb *stcb, struct sctp_tmit_chunk *chk,
     struct sctp_stream_reset_add_strm *str_add)
 {
 	/*
 	 * Peer is requesting to add more streams. If its within our
 	 * max-streams we will allow it.
 	 */
 	uint32_t num_stream, i;
 	uint32_t seq;
 	struct sctp_association *asoc = &stcb->asoc;
 	struct sctp_queued_to_read *ctl, *nctl;
 
 	/* Get the number. */
 	seq = ntohl(str_add->request_seq);
 	num_stream = ntohs(str_add->number_of_streams);
 	/* Now what would be the new total? */
 	if (asoc->str_reset_seq_in == seq) {
 		num_stream += stcb->asoc.streamincnt;
 		stcb->asoc.last_reset_action[1] = stcb->asoc.last_reset_action[0];
 		if (!(asoc->local_strreset_support & SCTP_ENABLE_CHANGE_ASSOC_REQ)) {
 			asoc->last_reset_action[0] = SCTP_STREAM_RESET_RESULT_DENIED;
 		} else if ((num_stream > stcb->asoc.max_inbound_streams) ||
 		    (num_stream > 0xffff)) {
 			/* We must reject it they ask for to many */
 	denied:
 			stcb->asoc.last_reset_action[0] = SCTP_STREAM_RESET_RESULT_DENIED;
 		} else {
 			/* Ok, we can do that :-) */
 			struct sctp_stream_in *oldstrm;
 
 			/* save off the old */
 			oldstrm = stcb->asoc.strmin;
 			SCTP_MALLOC(stcb->asoc.strmin, struct sctp_stream_in *,
 			    (num_stream * sizeof(struct sctp_stream_in)),
 			    SCTP_M_STRMI);
 			if (stcb->asoc.strmin == NULL) {
 				stcb->asoc.strmin = oldstrm;
 				goto denied;
 			}
 			/* copy off the old data */
 			for (i = 0; i < stcb->asoc.streamincnt; i++) {
 				TAILQ_INIT(&stcb->asoc.strmin[i].inqueue);
 				TAILQ_INIT(&stcb->asoc.strmin[i].uno_inqueue);
 				stcb->asoc.strmin[i].sid = i;
 				stcb->asoc.strmin[i].last_mid_delivered = oldstrm[i].last_mid_delivered;
 				stcb->asoc.strmin[i].delivery_started = oldstrm[i].delivery_started;
 				stcb->asoc.strmin[i].pd_api_started = oldstrm[i].pd_api_started;
 				/* now anything on those queues? */
 				TAILQ_FOREACH_SAFE(ctl, &oldstrm[i].inqueue, next_instrm, nctl) {
 					TAILQ_REMOVE(&oldstrm[i].inqueue, ctl, next_instrm);
 					TAILQ_INSERT_TAIL(&stcb->asoc.strmin[i].inqueue, ctl, next_instrm);
 				}
 				TAILQ_FOREACH_SAFE(ctl, &oldstrm[i].uno_inqueue, next_instrm, nctl) {
 					TAILQ_REMOVE(&oldstrm[i].uno_inqueue, ctl, next_instrm);
 					TAILQ_INSERT_TAIL(&stcb->asoc.strmin[i].uno_inqueue, ctl, next_instrm);
 				}
 			}
 			/* Init the new streams */
 			for (i = stcb->asoc.streamincnt; i < num_stream; i++) {
 				TAILQ_INIT(&stcb->asoc.strmin[i].inqueue);
 				TAILQ_INIT(&stcb->asoc.strmin[i].uno_inqueue);
 				stcb->asoc.strmin[i].sid = i;
 				stcb->asoc.strmin[i].last_mid_delivered = 0xffffffff;
 				stcb->asoc.strmin[i].pd_api_started = 0;
 				stcb->asoc.strmin[i].delivery_started = 0;
 			}
 			SCTP_FREE(oldstrm, SCTP_M_STRMI);
 			/* update the size */
 			stcb->asoc.streamincnt = num_stream;
 			stcb->asoc.last_reset_action[0] = SCTP_STREAM_RESET_RESULT_PERFORMED;
 			sctp_notify_stream_reset_add(stcb, stcb->asoc.streamincnt, stcb->asoc.streamoutcnt, 0);
 		}
 		sctp_add_stream_reset_result(chk, seq, asoc->last_reset_action[0]);
 		asoc->str_reset_seq_in++;
 	} else if ((asoc->str_reset_seq_in - 1) == seq) {
 		/*
 		 * one seq back, just echo back last action since my
 		 * response was lost.
 		 */
 		sctp_add_stream_reset_result(chk, seq, asoc->last_reset_action[0]);
 	} else if ((asoc->str_reset_seq_in - 2) == seq) {
 		/*
 		 * two seq back, just echo back last action since my
 		 * response was lost.
 		 */
 		sctp_add_stream_reset_result(chk, seq, asoc->last_reset_action[1]);
 	} else {
 		sctp_add_stream_reset_result(chk, seq, SCTP_STREAM_RESET_RESULT_ERR_BAD_SEQNO);
 
 	}
 }
 
 static void
 sctp_handle_str_reset_add_out_strm(struct sctp_tcb *stcb, struct sctp_tmit_chunk *chk,
     struct sctp_stream_reset_add_strm *str_add)
 {
 	/*
 	 * Peer is requesting to add more streams. If its within our
 	 * max-streams we will allow it.
 	 */
 	uint16_t num_stream;
 	uint32_t seq;
 	struct sctp_association *asoc = &stcb->asoc;
 
 	/* Get the number. */
 	seq = ntohl(str_add->request_seq);
 	num_stream = ntohs(str_add->number_of_streams);
 	/* Now what would be the new total? */
 	if (asoc->str_reset_seq_in == seq) {
 		stcb->asoc.last_reset_action[1] = stcb->asoc.last_reset_action[0];
 		if (!(asoc->local_strreset_support & SCTP_ENABLE_CHANGE_ASSOC_REQ)) {
 			asoc->last_reset_action[0] = SCTP_STREAM_RESET_RESULT_DENIED;
 		} else if (stcb->asoc.stream_reset_outstanding) {
 			/* We must reject it we have something pending */
 			stcb->asoc.last_reset_action[0] = SCTP_STREAM_RESET_RESULT_ERR_IN_PROGRESS;
 		} else {
 			/* Ok, we can do that :-) */
 			int mychk;
 
 			mychk = stcb->asoc.streamoutcnt;
 			mychk += num_stream;
 			if (mychk < 0x10000) {
 				stcb->asoc.last_reset_action[0] = SCTP_STREAM_RESET_RESULT_PERFORMED;
 				if (sctp_send_str_reset_req(stcb, 0, NULL, 0, 0, 1, num_stream, 0, 1)) {
 					stcb->asoc.last_reset_action[0] = SCTP_STREAM_RESET_RESULT_DENIED;
 				}
 			} else {
 				stcb->asoc.last_reset_action[0] = SCTP_STREAM_RESET_RESULT_DENIED;
 			}
 		}
 		sctp_add_stream_reset_result(chk, seq, stcb->asoc.last_reset_action[0]);
 		asoc->str_reset_seq_in++;
 	} else if ((asoc->str_reset_seq_in - 1) == seq) {
 		/*
 		 * one seq back, just echo back last action since my
 		 * response was lost.
 		 */
 		sctp_add_stream_reset_result(chk, seq, asoc->last_reset_action[0]);
 	} else if ((asoc->str_reset_seq_in - 2) == seq) {
 		/*
 		 * two seq back, just echo back last action since my
 		 * response was lost.
 		 */
 		sctp_add_stream_reset_result(chk, seq, asoc->last_reset_action[1]);
 	} else {
 		sctp_add_stream_reset_result(chk, seq, SCTP_STREAM_RESET_RESULT_ERR_BAD_SEQNO);
 	}
 }
 
 #ifdef __GNUC__
 __attribute__((noinline))
 #endif
 static int
 sctp_handle_stream_reset(struct sctp_tcb *stcb, struct mbuf *m, int offset,
     struct sctp_chunkhdr *ch_req)
 {
 	uint16_t remaining_length, param_len, ptype;
 	struct sctp_paramhdr pstore;
 	uint8_t cstore[SCTP_CHUNK_BUFFER_SIZE];
 	uint32_t seq = 0;
 	int num_req = 0;
 	int trunc = 0;
 	struct sctp_tmit_chunk *chk;
 	struct sctp_chunkhdr *ch;
 	struct sctp_paramhdr *ph;
 	int ret_code = 0;
 	int num_param = 0;
 
 	/* now it may be a reset or a reset-response */
 	remaining_length = ntohs(ch_req->chunk_length) - sizeof(struct sctp_chunkhdr);
 
 	/* setup for adding the response */
 	sctp_alloc_a_chunk(stcb, chk);
 	if (chk == NULL) {
 		return (ret_code);
 	}
 	chk->copy_by_ref = 0;
 	chk->rec.chunk_id.id = SCTP_STREAM_RESET;
 	chk->rec.chunk_id.can_take_data = 0;
 	chk->flags = 0;
 	chk->asoc = &stcb->asoc;
 	chk->no_fr_allowed = 0;
 	chk->book_size = chk->send_size = sizeof(struct sctp_chunkhdr);
 	chk->book_size_scale = 0;
 	chk->data = sctp_get_mbuf_for_msg(MCLBYTES, 0, M_NOWAIT, 1, MT_DATA);
 	if (chk->data == NULL) {
 strres_nochunk:
 		if (chk->data) {
 			sctp_m_freem(chk->data);
 			chk->data = NULL;
 		}
 		sctp_free_a_chunk(stcb, chk, SCTP_SO_NOT_LOCKED);
 		return (ret_code);
 	}
 	SCTP_BUF_RESV_UF(chk->data, SCTP_MIN_OVERHEAD);
 
 	/* setup chunk parameters */
 	chk->sent = SCTP_DATAGRAM_UNSENT;
 	chk->snd_count = 0;
 	chk->whoTo = NULL;
 
 	ch = mtod(chk->data, struct sctp_chunkhdr *);
 	ch->chunk_type = SCTP_STREAM_RESET;
 	ch->chunk_flags = 0;
 	ch->chunk_length = htons(chk->send_size);
 	SCTP_BUF_LEN(chk->data) = SCTP_SIZE32(chk->send_size);
 	offset += sizeof(struct sctp_chunkhdr);
 	while (remaining_length >= sizeof(struct sctp_paramhdr)) {
 		ph = (struct sctp_paramhdr *)sctp_m_getptr(m, offset, sizeof(pstore), (uint8_t *)&pstore);
 		if (ph == NULL) {
 			/* TSNH */
 			break;
 		}
 		param_len = ntohs(ph->param_length);
 		if ((param_len > remaining_length) ||
 		    (param_len < (sizeof(struct sctp_paramhdr) + sizeof(uint32_t)))) {
 			/* bad parameter length */
 			break;
 		}
 		ph = (struct sctp_paramhdr *)sctp_m_getptr(m, offset, min(param_len, sizeof(cstore)),
 		    (uint8_t *)&cstore);
 		if (ph == NULL) {
 			/* TSNH */
 			break;
 		}
 		ptype = ntohs(ph->param_type);
 		num_param++;
 		if (param_len > sizeof(cstore)) {
 			trunc = 1;
 		} else {
 			trunc = 0;
 		}
 		if (num_param > SCTP_MAX_RESET_PARAMS) {
 			/* hit the max of parameters already sorry.. */
 			break;
 		}
 		if (ptype == SCTP_STR_RESET_OUT_REQUEST) {
 			struct sctp_stream_reset_out_request *req_out;
 
 			if (param_len < sizeof(struct sctp_stream_reset_out_request)) {
 				break;
 			}
 			req_out = (struct sctp_stream_reset_out_request *)ph;
 			num_req++;
 			if (stcb->asoc.stream_reset_outstanding) {
 				seq = ntohl(req_out->response_seq);
 				if (seq == stcb->asoc.str_reset_seq_out) {
 					/* implicit ack */
 					(void)sctp_handle_stream_reset_response(stcb, seq, SCTP_STREAM_RESET_RESULT_PERFORMED, NULL);
 				}
 			}
 			sctp_handle_str_reset_request_out(stcb, chk, req_out, trunc);
 		} else if (ptype == SCTP_STR_RESET_ADD_OUT_STREAMS) {
 			struct sctp_stream_reset_add_strm *str_add;
 
 			if (param_len < sizeof(struct sctp_stream_reset_add_strm)) {
 				break;
 			}
 			str_add = (struct sctp_stream_reset_add_strm *)ph;
 			num_req++;
 			sctp_handle_str_reset_add_strm(stcb, chk, str_add);
 		} else if (ptype == SCTP_STR_RESET_ADD_IN_STREAMS) {
 			struct sctp_stream_reset_add_strm *str_add;
 
 			if (param_len < sizeof(struct sctp_stream_reset_add_strm)) {
 				break;
 			}
 			str_add = (struct sctp_stream_reset_add_strm *)ph;
 			num_req++;
 			sctp_handle_str_reset_add_out_strm(stcb, chk, str_add);
 		} else if (ptype == SCTP_STR_RESET_IN_REQUEST) {
 			struct sctp_stream_reset_in_request *req_in;
 
 			num_req++;
 			req_in = (struct sctp_stream_reset_in_request *)ph;
 			sctp_handle_str_reset_request_in(stcb, chk, req_in, trunc);
 		} else if (ptype == SCTP_STR_RESET_TSN_REQUEST) {
 			struct sctp_stream_reset_tsn_request *req_tsn;
 
 			num_req++;
 			req_tsn = (struct sctp_stream_reset_tsn_request *)ph;
 			if (sctp_handle_str_reset_request_tsn(stcb, chk, req_tsn)) {
 				ret_code = 1;
 				goto strres_nochunk;
 			}
 			/* no more */
 			break;
 		} else if (ptype == SCTP_STR_RESET_RESPONSE) {
 			struct sctp_stream_reset_response *resp;
 			uint32_t result;
 
 			if (param_len < sizeof(struct sctp_stream_reset_response)) {
 				break;
 			}
 			resp = (struct sctp_stream_reset_response *)ph;
 			seq = ntohl(resp->response_seq);
 			result = ntohl(resp->result);
 			if (sctp_handle_stream_reset_response(stcb, seq, result, resp)) {
 				ret_code = 1;
 				goto strres_nochunk;
 			}
 		} else {
 			break;
 		}
 		offset += SCTP_SIZE32(param_len);
 		if (remaining_length >= SCTP_SIZE32(param_len)) {
 			remaining_length -= SCTP_SIZE32(param_len);
 		} else {
 			remaining_length = 0;
 		}
 	}
 	if (num_req == 0) {
 		/* we have no response free the stuff */
 		goto strres_nochunk;
 	}
 	/* ok we have a chunk to link in */
 	TAILQ_INSERT_TAIL(&stcb->asoc.control_send_queue,
 	    chk,
 	    sctp_next);
 	stcb->asoc.ctrl_queue_cnt++;
 	return (ret_code);
 }
 
 /*
  * Handle a router or endpoints report of a packet loss, there are two ways
  * to handle this, either we get the whole packet and must disect it
  * ourselves (possibly with truncation and or corruption) or it is a summary
  * from a middle box that did the disectting for us.
  */
 static void
 sctp_handle_packet_dropped(struct sctp_pktdrop_chunk *cp,
     struct sctp_tcb *stcb, struct sctp_nets *net, uint32_t limit)
 {
 	uint32_t bottle_bw, on_queue;
 	uint16_t trunc_len;
 	unsigned int chlen;
 	unsigned int at;
 	struct sctp_chunk_desc desc;
 	struct sctp_chunkhdr *ch;
 
 	chlen = ntohs(cp->ch.chunk_length);
 	chlen -= sizeof(struct sctp_pktdrop_chunk);
 	/* XXX possible chlen underflow */
 	if (chlen == 0) {
 		ch = NULL;
 		if (cp->ch.chunk_flags & SCTP_FROM_MIDDLE_BOX)
 			SCTP_STAT_INCR(sctps_pdrpbwrpt);
 	} else {
 		ch = (struct sctp_chunkhdr *)(cp->data + sizeof(struct sctphdr));
 		chlen -= sizeof(struct sctphdr);
 		/* XXX possible chlen underflow */
 		memset(&desc, 0, sizeof(desc));
 	}
 	trunc_len = (uint16_t)ntohs(cp->trunc_len);
 	if (trunc_len > limit) {
 		trunc_len = limit;
 	}
 	/* now the chunks themselves */
 	while ((ch != NULL) && (chlen >= sizeof(struct sctp_chunkhdr))) {
 		desc.chunk_type = ch->chunk_type;
 		/* get amount we need to move */
 		at = ntohs(ch->chunk_length);
 		if (at < sizeof(struct sctp_chunkhdr)) {
 			/* corrupt chunk, maybe at the end? */
 			SCTP_STAT_INCR(sctps_pdrpcrupt);
 			break;
 		}
 		if (trunc_len == 0) {
 			/* we are supposed to have all of it */
 			if (at > chlen) {
 				/* corrupt skip it */
 				SCTP_STAT_INCR(sctps_pdrpcrupt);
 				break;
 			}
 		} else {
 			/* is there enough of it left ? */
 			if (desc.chunk_type == SCTP_DATA) {
 				if (chlen < (sizeof(struct sctp_data_chunk) +
 				    sizeof(desc.data_bytes))) {
 					break;
 				}
 			} else {
 				if (chlen < sizeof(struct sctp_chunkhdr)) {
 					break;
 				}
 			}
 		}
 		if (desc.chunk_type == SCTP_DATA) {
 			/* can we get out the tsn? */
 			if ((cp->ch.chunk_flags & SCTP_FROM_MIDDLE_BOX))
 				SCTP_STAT_INCR(sctps_pdrpmbda);
 
 			if (chlen >= (sizeof(struct sctp_data_chunk) + sizeof(uint32_t))) {
 				/* yep */
 				struct sctp_data_chunk *dcp;
 				uint8_t *ddp;
 				unsigned int iii;
 
 				dcp = (struct sctp_data_chunk *)ch;
 				ddp = (uint8_t *)(dcp + 1);
 				for (iii = 0; iii < sizeof(desc.data_bytes); iii++) {
 					desc.data_bytes[iii] = ddp[iii];
 				}
 				desc.tsn_ifany = dcp->dp.tsn;
 			} else {
 				/* nope we are done. */
 				SCTP_STAT_INCR(sctps_pdrpnedat);
 				break;
 			}
 		} else {
 			if ((cp->ch.chunk_flags & SCTP_FROM_MIDDLE_BOX))
 				SCTP_STAT_INCR(sctps_pdrpmbct);
 		}
 
 		if (process_chunk_drop(stcb, &desc, net, cp->ch.chunk_flags)) {
 			SCTP_STAT_INCR(sctps_pdrppdbrk);
 			break;
 		}
 		if (SCTP_SIZE32(at) > chlen) {
 			break;
 		}
 		chlen -= SCTP_SIZE32(at);
 		if (chlen < sizeof(struct sctp_chunkhdr)) {
 			/* done, none left */
 			break;
 		}
 		ch = (struct sctp_chunkhdr *)((caddr_t)ch + SCTP_SIZE32(at));
 	}
 	/* Now update any rwnd --- possibly */
 	if ((cp->ch.chunk_flags & SCTP_FROM_MIDDLE_BOX) == 0) {
 		/* From a peer, we get a rwnd report */
 		uint32_t a_rwnd;
 
 		SCTP_STAT_INCR(sctps_pdrpfehos);
 
 		bottle_bw = ntohl(cp->bottle_bw);
 		on_queue = ntohl(cp->current_onq);
 		if (bottle_bw && on_queue) {
 			/* a rwnd report is in here */
 			if (bottle_bw > on_queue)
 				a_rwnd = bottle_bw - on_queue;
 			else
 				a_rwnd = 0;
 
 			if (a_rwnd == 0)
 				stcb->asoc.peers_rwnd = 0;
 			else {
 				if (a_rwnd > stcb->asoc.total_flight) {
 					stcb->asoc.peers_rwnd =
 					    a_rwnd - stcb->asoc.total_flight;
 				} else {
 					stcb->asoc.peers_rwnd = 0;
 				}
 				if (stcb->asoc.peers_rwnd <
 				    stcb->sctp_ep->sctp_ep.sctp_sws_sender) {
 					/* SWS sender side engages */
 					stcb->asoc.peers_rwnd = 0;
 				}
 			}
 		}
 	} else {
 		SCTP_STAT_INCR(sctps_pdrpfmbox);
 	}
 
 	/* now middle boxes in sat networks get a cwnd bump */
 	if ((cp->ch.chunk_flags & SCTP_FROM_MIDDLE_BOX) &&
 	    (stcb->asoc.sat_t3_loss_recovery == 0) &&
 	    (stcb->asoc.sat_network)) {
 		/*
 		 * This is debatable but for sat networks it makes sense
 		 * Note if a T3 timer has went off, we will prohibit any
 		 * changes to cwnd until we exit the t3 loss recovery.
 		 */
 		stcb->asoc.cc_functions.sctp_cwnd_update_after_packet_dropped(stcb,
 		    net, cp, &bottle_bw, &on_queue);
 	}
 }
 
 /*
  * handles all control chunks in a packet inputs: - m: mbuf chain, assumed to
  * still contain IP/SCTP header - stcb: is the tcb found for this packet -
  * offset: offset into the mbuf chain to first chunkhdr - length: is the
  * length of the complete packet outputs: - length: modified to remaining
  * length after control processing - netp: modified to new sctp_nets after
  * cookie-echo processing - return NULL to discard the packet (ie. no asoc,
  * bad packet,...) otherwise return the tcb for this packet
  */
 #ifdef __GNUC__
 __attribute__((noinline))
 #endif
 static struct sctp_tcb *
 sctp_process_control(struct mbuf *m, int iphlen, int *offset, int length,
     struct sockaddr *src, struct sockaddr *dst,
     struct sctphdr *sh, struct sctp_chunkhdr *ch, struct sctp_inpcb *inp,
     struct sctp_tcb *stcb, struct sctp_nets **netp, int *fwd_tsn_seen,
     uint8_t mflowtype, uint32_t mflowid, uint16_t fibnum,
     uint32_t vrf_id, uint16_t port)
 {
 	struct sctp_association *asoc;
 	struct mbuf *op_err;
 	char msg[SCTP_DIAG_INFO_LEN];
 	uint32_t vtag_in;
 	int num_chunks = 0;	/* number of control chunks processed */
 	uint32_t chk_length;
 	int ret;
 	int abort_no_unlock = 0;
 	int ecne_seen = 0;
 
 	/*
 	 * How big should this be, and should it be alloc'd? Lets try the
 	 * d-mtu-ceiling for now (2k) and that should hopefully work ...
 	 * until we get into jumbo grams and such..
 	 */
 	uint8_t chunk_buf[SCTP_CHUNK_BUFFER_SIZE];
 	struct sctp_tcb *locked_tcb = stcb;
 	int got_auth = 0;
 	uint32_t auth_offset = 0, auth_len = 0;
 	int auth_skipped = 0;
 	int asconf_cnt = 0;
 #if defined(__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
 	struct socket *so;
 #endif
 
 	SCTPDBG(SCTP_DEBUG_INPUT1, "sctp_process_control: iphlen=%u, offset=%u, length=%u stcb:%p\n",
 	    iphlen, *offset, length, (void *)stcb);
 
 	/* validate chunk header length... */
 	if (ntohs(ch->chunk_length) < sizeof(*ch)) {
 		SCTPDBG(SCTP_DEBUG_INPUT1, "Invalid header length %d\n",
 		    ntohs(ch->chunk_length));
 		if (locked_tcb) {
 			SCTP_TCB_UNLOCK(locked_tcb);
 		}
 		return (NULL);
 	}
 	/*
 	 * validate the verification tag
 	 */
 	vtag_in = ntohl(sh->v_tag);
 
 	if (locked_tcb) {
 		SCTP_TCB_LOCK_ASSERT(locked_tcb);
 	}
 	if (ch->chunk_type == SCTP_INITIATION) {
 		SCTPDBG(SCTP_DEBUG_INPUT1, "Its an INIT of len:%d vtag:%x\n",
 		    ntohs(ch->chunk_length), vtag_in);
 		if (vtag_in != 0) {
 			/* protocol error- silently discard... */
 			SCTP_STAT_INCR(sctps_badvtag);
 			if (locked_tcb) {
 				SCTP_TCB_UNLOCK(locked_tcb);
 			}
 			return (NULL);
 		}
 	} else if (ch->chunk_type != SCTP_COOKIE_ECHO) {
 		/*
 		 * If there is no stcb, skip the AUTH chunk and process
 		 * later after a stcb is found (to validate the lookup was
 		 * valid.
 		 */
 		if ((ch->chunk_type == SCTP_AUTHENTICATION) &&
 		    (stcb == NULL) &&
 		    (inp->auth_supported == 1)) {
 			/* save this chunk for later processing */
 			auth_skipped = 1;
 			auth_offset = *offset;
 			auth_len = ntohs(ch->chunk_length);
 
 			/* (temporarily) move past this chunk */
 			*offset += SCTP_SIZE32(auth_len);
 			if (*offset >= length) {
 				/* no more data left in the mbuf chain */
 				*offset = length;
 				if (locked_tcb) {
 					SCTP_TCB_UNLOCK(locked_tcb);
 				}
 				return (NULL);
 			}
 			ch = (struct sctp_chunkhdr *)sctp_m_getptr(m, *offset,
 			    sizeof(struct sctp_chunkhdr), chunk_buf);
 		}
 		if (ch == NULL) {
 			/* Help */
 			*offset = length;
 			if (locked_tcb) {
 				SCTP_TCB_UNLOCK(locked_tcb);
 			}
 			return (NULL);
 		}
 		if (ch->chunk_type == SCTP_COOKIE_ECHO) {
 			goto process_control_chunks;
 		}
 		/*
 		 * first check if it's an ASCONF with an unknown src addr we
 		 * need to look inside to find the association
 		 */
 		if (ch->chunk_type == SCTP_ASCONF && stcb == NULL) {
 			struct sctp_chunkhdr *asconf_ch = ch;
 			uint32_t asconf_offset = 0, asconf_len = 0;
 
 			/* inp's refcount may be reduced */
 			SCTP_INP_INCR_REF(inp);
 
 			asconf_offset = *offset;
 			do {
 				asconf_len = ntohs(asconf_ch->chunk_length);
 				if (asconf_len < sizeof(struct sctp_asconf_paramhdr))
 					break;
 				stcb = sctp_findassociation_ep_asconf(m,
 				    *offset,
 				    dst,
 				    sh, &inp, netp, vrf_id);
 				if (stcb != NULL)
 					break;
 				asconf_offset += SCTP_SIZE32(asconf_len);
 				asconf_ch = (struct sctp_chunkhdr *)sctp_m_getptr(m, asconf_offset,
 				    sizeof(struct sctp_chunkhdr), chunk_buf);
 			} while (asconf_ch != NULL && asconf_ch->chunk_type == SCTP_ASCONF);
 			if (stcb == NULL) {
 				/*
 				 * reduce inp's refcount if not reduced in
 				 * sctp_findassociation_ep_asconf().
 				 */
 				SCTP_INP_DECR_REF(inp);
 			} else {
 				locked_tcb = stcb;
 			}
 
 			/* now go back and verify any auth chunk to be sure */
 			if (auth_skipped && (stcb != NULL)) {
 				struct sctp_auth_chunk *auth;
 
 				auth = (struct sctp_auth_chunk *)
 				    sctp_m_getptr(m, auth_offset,
 				    auth_len, chunk_buf);
 				got_auth = 1;
 				auth_skipped = 0;
 				if ((auth == NULL) || sctp_handle_auth(stcb, auth, m,
 				    auth_offset)) {
 					/* auth HMAC failed so dump it */
 					*offset = length;
 					if (locked_tcb) {
 						SCTP_TCB_UNLOCK(locked_tcb);
 					}
 					return (NULL);
 				} else {
 					/* remaining chunks are HMAC checked */
 					stcb->asoc.authenticated = 1;
 				}
 			}
 		}
 		if (stcb == NULL) {
 			snprintf(msg, sizeof(msg), "OOTB, %s:%d at %s", __FILE__, __LINE__, __func__);
 			op_err = sctp_generate_cause(SCTP_BASE_SYSCTL(sctp_diag_info_code),
 			    msg);
 			/* no association, so it's out of the blue... */
 			sctp_handle_ootb(m, iphlen, *offset, src, dst, sh, inp, op_err,
 			    mflowtype, mflowid, inp->fibnum,
 			    vrf_id, port);
 			*offset = length;
 			if (locked_tcb) {
 				SCTP_TCB_UNLOCK(locked_tcb);
 			}
 			return (NULL);
 		}
 		asoc = &stcb->asoc;
 		/* ABORT and SHUTDOWN can use either v_tag... */
 		if ((ch->chunk_type == SCTP_ABORT_ASSOCIATION) ||
 		    (ch->chunk_type == SCTP_SHUTDOWN_COMPLETE) ||
 		    (ch->chunk_type == SCTP_PACKET_DROPPED)) {
 			/* Take the T-bit always into account. */
 			if ((((ch->chunk_flags & SCTP_HAD_NO_TCB) == 0) &&
 			    (vtag_in == asoc->my_vtag)) ||
 			    (((ch->chunk_flags & SCTP_HAD_NO_TCB) == SCTP_HAD_NO_TCB) &&
 			    (vtag_in == asoc->peer_vtag))) {
 				/* this is valid */
 			} else {
 				/* drop this packet... */
 				SCTP_STAT_INCR(sctps_badvtag);
 				if (locked_tcb) {
 					SCTP_TCB_UNLOCK(locked_tcb);
 				}
 				return (NULL);
 			}
 		} else if (ch->chunk_type == SCTP_SHUTDOWN_ACK) {
 			if (vtag_in != asoc->my_vtag) {
 				/*
 				 * this could be a stale SHUTDOWN-ACK or the
 				 * peer never got the SHUTDOWN-COMPLETE and
 				 * is still hung; we have started a new asoc
 				 * but it won't complete until the shutdown
 				 * is completed
 				 */
 				if (locked_tcb) {
 					SCTP_TCB_UNLOCK(locked_tcb);
 				}
 				snprintf(msg, sizeof(msg), "OOTB, %s:%d at %s", __FILE__, __LINE__, __func__);
 				op_err = sctp_generate_cause(SCTP_BASE_SYSCTL(sctp_diag_info_code),
 				    msg);
 				sctp_handle_ootb(m, iphlen, *offset, src, dst,
 				    sh, inp, op_err,
 				    mflowtype, mflowid, fibnum,
 				    vrf_id, port);
 				return (NULL);
 			}
 		} else {
 			/* for all other chunks, vtag must match */
 			if (vtag_in != asoc->my_vtag) {
 				/* invalid vtag... */
 				SCTPDBG(SCTP_DEBUG_INPUT3,
 				    "invalid vtag: %xh, expect %xh\n",
 				    vtag_in, asoc->my_vtag);
 				SCTP_STAT_INCR(sctps_badvtag);
 				if (locked_tcb) {
 					SCTP_TCB_UNLOCK(locked_tcb);
 				}
 				*offset = length;
 				return (NULL);
 			}
 		}
 	}			/* end if !SCTP_COOKIE_ECHO */
 	/*
 	 * process all control chunks...
 	 */
 	if (((ch->chunk_type == SCTP_SELECTIVE_ACK) ||
 	    (ch->chunk_type == SCTP_NR_SELECTIVE_ACK) ||
 	    (ch->chunk_type == SCTP_HEARTBEAT_REQUEST)) &&
 	    (SCTP_GET_STATE(&stcb->asoc) == SCTP_STATE_COOKIE_ECHOED)) {
 		/* implied cookie-ack.. we must have lost the ack */
 		if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_THRESHOLD_LOGGING) {
 			sctp_misc_ints(SCTP_THRESHOLD_CLEAR,
 			    stcb->asoc.overall_error_count,
 			    0,
 			    SCTP_FROM_SCTP_INPUT,
 			    __LINE__);
 		}
 		stcb->asoc.overall_error_count = 0;
 		sctp_handle_cookie_ack((struct sctp_cookie_ack_chunk *)ch, stcb,
 		    *netp);
 	}
 process_control_chunks:
 	while (IS_SCTP_CONTROL(ch)) {
 		/* validate chunk length */
 		chk_length = ntohs(ch->chunk_length);
 		SCTPDBG(SCTP_DEBUG_INPUT2, "sctp_process_control: processing a chunk type=%u, len=%u\n",
 		    ch->chunk_type, chk_length);
 		SCTP_LTRACE_CHK(inp, stcb, ch->chunk_type, chk_length);
 		if (chk_length < sizeof(*ch) ||
 		    (*offset + (int)chk_length) > length) {
 			*offset = length;
 			if (locked_tcb) {
 				SCTP_TCB_UNLOCK(locked_tcb);
 			}
 			return (NULL);
 		}
 		SCTP_STAT_INCR_COUNTER64(sctps_incontrolchunks);
 		/*
 		 * INIT-ACK only gets the init ack "header" portion only
 		 * because we don't have to process the peer's COOKIE. All
 		 * others get a complete chunk.
 		 */
 		if ((ch->chunk_type == SCTP_INITIATION_ACK) ||
 		    (ch->chunk_type == SCTP_INITIATION)) {
 			/* get an init-ack chunk */
 			ch = (struct sctp_chunkhdr *)sctp_m_getptr(m, *offset,
 			    sizeof(struct sctp_init_ack_chunk), chunk_buf);
 			if (ch == NULL) {
 				*offset = length;
 				if (locked_tcb) {
 					SCTP_TCB_UNLOCK(locked_tcb);
 				}
 				return (NULL);
 			}
 		} else {
 			/* For cookies and all other chunks. */
 			if (chk_length > sizeof(chunk_buf)) {
 				/*
 				 * use just the size of the chunk buffer so
 				 * the front part of our chunks fit in
 				 * contiguous space up to the chunk buffer
 				 * size (508 bytes). For chunks that need to
 				 * get more than that they must use the
 				 * sctp_m_getptr() function or other means
 				 * (e.g. know how to parse mbuf chains).
 				 * Cookies do this already.
 				 */
 				ch = (struct sctp_chunkhdr *)sctp_m_getptr(m, *offset,
 				    (sizeof(chunk_buf) - 4),
 				    chunk_buf);
 				if (ch == NULL) {
 					*offset = length;
 					if (locked_tcb) {
 						SCTP_TCB_UNLOCK(locked_tcb);
 					}
 					return (NULL);
 				}
 			} else {
 				/* We can fit it all */
 				ch = (struct sctp_chunkhdr *)sctp_m_getptr(m, *offset,
 				    chk_length, chunk_buf);
 				if (ch == NULL) {
 					SCTP_PRINTF("sctp_process_control: Can't get the all data....\n");
 					*offset = length;
 					if (locked_tcb) {
 						SCTP_TCB_UNLOCK(locked_tcb);
 					}
 					return (NULL);
 				}
 			}
 		}
 		num_chunks++;
 		/* Save off the last place we got a control from */
 		if (stcb != NULL) {
 			if (((netp != NULL) && (*netp != NULL)) || (ch->chunk_type == SCTP_ASCONF)) {
 				/*
 				 * allow last_control to be NULL if
 				 * ASCONF... ASCONF processing will find the
 				 * right net later
 				 */
 				if ((netp != NULL) && (*netp != NULL))
 					stcb->asoc.last_control_chunk_from = *netp;
 			}
 		}
 #ifdef SCTP_AUDITING_ENABLED
 		sctp_audit_log(0xB0, ch->chunk_type);
 #endif
 
 		/* check to see if this chunk required auth, but isn't */
 		if ((stcb != NULL) &&
 		    (stcb->asoc.auth_supported == 1) &&
 		    sctp_auth_is_required_chunk(ch->chunk_type, stcb->asoc.local_auth_chunks) &&
 		    !stcb->asoc.authenticated) {
 			/* "silently" ignore */
 			SCTP_STAT_INCR(sctps_recvauthmissing);
 			goto next_chunk;
 		}
 		switch (ch->chunk_type) {
 		case SCTP_INITIATION:
 			SCTPDBG(SCTP_DEBUG_INPUT3, "SCTP_INIT\n");
 			/* The INIT chunk must be the only chunk. */
 			if ((num_chunks > 1) ||
 			    (length - *offset > (int)SCTP_SIZE32(chk_length))) {
 				/* RFC 4960 requires that no ABORT is sent */
 				*offset = length;
 				if (locked_tcb) {
 					SCTP_TCB_UNLOCK(locked_tcb);
 				}
 				return (NULL);
 			}
 			/* Honor our resource limit. */
 			if (chk_length > SCTP_LARGEST_INIT_ACCEPTED) {
 				op_err = sctp_generate_cause(SCTP_CAUSE_OUT_OF_RESC, "");
 				sctp_abort_association(inp, stcb, m, iphlen,
 				    src, dst, sh, op_err,
 				    mflowtype, mflowid,
 				    vrf_id, port);
 				*offset = length;
 				return (NULL);
 			}
 			sctp_handle_init(m, iphlen, *offset, src, dst, sh,
 			    (struct sctp_init_chunk *)ch, inp,
 			    stcb, *netp, &abort_no_unlock,
 			    mflowtype, mflowid,
 			    vrf_id, port);
 			*offset = length;
 			if ((!abort_no_unlock) && (locked_tcb)) {
 				SCTP_TCB_UNLOCK(locked_tcb);
 			}
 			return (NULL);
 			break;
 		case SCTP_PAD_CHUNK:
 			break;
 		case SCTP_INITIATION_ACK:
 			SCTPDBG(SCTP_DEBUG_INPUT3, "SCTP_INIT-ACK\n");
 			if (inp->sctp_flags & SCTP_PCB_FLAGS_SOCKET_GONE) {
 				/* We are not interested anymore */
 				if ((stcb) && (stcb->asoc.total_output_queue_size)) {
 					;
 				} else {
 					if ((locked_tcb != NULL) && (locked_tcb != stcb)) {
 						/* Very unlikely */
 						SCTP_TCB_UNLOCK(locked_tcb);
 					}
 					*offset = length;
 					if (stcb) {
 #if defined(__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
 						so = SCTP_INP_SO(inp);
 						atomic_add_int(&stcb->asoc.refcnt, 1);
 						SCTP_TCB_UNLOCK(stcb);
 						SCTP_SOCKET_LOCK(so, 1);
 						SCTP_TCB_LOCK(stcb);
 						atomic_subtract_int(&stcb->asoc.refcnt, 1);
 #endif
 						(void)sctp_free_assoc(inp, stcb, SCTP_NORMAL_PROC,
 						    SCTP_FROM_SCTP_INPUT + SCTP_LOC_29);
 #if defined(__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
 						SCTP_SOCKET_UNLOCK(so, 1);
 #endif
 					}
 					return (NULL);
 				}
 			}
 			/* The INIT-ACK chunk must be the only chunk. */
 			if ((num_chunks > 1) ||
 			    (length - *offset > (int)SCTP_SIZE32(chk_length))) {
 				*offset = length;
 				if (locked_tcb) {
 					SCTP_TCB_UNLOCK(locked_tcb);
 				}
 				return (NULL);
 			}
 			if ((netp) && (*netp)) {
 				ret = sctp_handle_init_ack(m, iphlen, *offset,
 				    src, dst, sh,
 				    (struct sctp_init_ack_chunk *)ch,
 				    stcb, *netp,
 				    &abort_no_unlock,
 				    mflowtype, mflowid,
 				    vrf_id);
 			} else {
 				ret = -1;
 			}
 			*offset = length;
 			if (abort_no_unlock) {
 				return (NULL);
 			}
 			/*
 			 * Special case, I must call the output routine to
 			 * get the cookie echoed
 			 */
 			if ((stcb != NULL) && (ret == 0)) {
 				sctp_chunk_output(stcb->sctp_ep, stcb, SCTP_OUTPUT_FROM_CONTROL_PROC, SCTP_SO_NOT_LOCKED);
 			}
 			if (locked_tcb) {
 				SCTP_TCB_UNLOCK(locked_tcb);
 			}
 			return (NULL);
 			break;
 		case SCTP_SELECTIVE_ACK:
 			{
 				struct sctp_sack_chunk *sack;
 				int abort_now = 0;
 				uint32_t a_rwnd, cum_ack;
 				uint16_t num_seg, num_dup;
 				uint8_t flags;
 				int offset_seg, offset_dup;
 
 				SCTPDBG(SCTP_DEBUG_INPUT3, "SCTP_SACK\n");
 				SCTP_STAT_INCR(sctps_recvsacks);
 				if (stcb == NULL) {
 					SCTPDBG(SCTP_DEBUG_INDATA1, "No stcb when processing SACK chunk\n");
 					break;
 				}
 				if (chk_length < sizeof(struct sctp_sack_chunk)) {
 					SCTPDBG(SCTP_DEBUG_INDATA1, "Bad size on SACK chunk, too small\n");
 					break;
 				}
 				if (SCTP_GET_STATE(&stcb->asoc) == SCTP_STATE_SHUTDOWN_ACK_SENT) {
 					/*-
 					 * If we have sent a shutdown-ack, we will pay no
 					 * attention to a sack sent in to us since
 					 * we don't care anymore.
 					 */
 					break;
 				}
 				sack = (struct sctp_sack_chunk *)ch;
 				flags = ch->chunk_flags;
 				cum_ack = ntohl(sack->sack.cum_tsn_ack);
 				num_seg = ntohs(sack->sack.num_gap_ack_blks);
 				num_dup = ntohs(sack->sack.num_dup_tsns);
 				a_rwnd = (uint32_t)ntohl(sack->sack.a_rwnd);
 				if (sizeof(struct sctp_sack_chunk) +
 				    num_seg * sizeof(struct sctp_gap_ack_block) +
 				    num_dup * sizeof(uint32_t) != chk_length) {
 					SCTPDBG(SCTP_DEBUG_INDATA1, "Bad size of SACK chunk\n");
 					break;
 				}
 				offset_seg = *offset + sizeof(struct sctp_sack_chunk);
 				offset_dup = offset_seg + num_seg * sizeof(struct sctp_gap_ack_block);
 				SCTPDBG(SCTP_DEBUG_INPUT3, "SCTP_SACK process cum_ack:%x num_seg:%d a_rwnd:%d\n",
 				    cum_ack, num_seg, a_rwnd);
 				stcb->asoc.seen_a_sack_this_pkt = 1;
 				if ((stcb->asoc.pr_sctp_cnt == 0) &&
 				    (num_seg == 0) &&
 				    SCTP_TSN_GE(cum_ack, stcb->asoc.last_acked_seq) &&
 				    (stcb->asoc.saw_sack_with_frags == 0) &&
 				    (stcb->asoc.saw_sack_with_nr_frags == 0) &&
 				    (!TAILQ_EMPTY(&stcb->asoc.sent_queue))
 				    ) {
 					/*
 					 * We have a SIMPLE sack having no
 					 * prior segments and data on sent
 					 * queue to be acked.. Use the
 					 * faster path sack processing. We
 					 * also allow window update sacks
 					 * with no missing segments to go
 					 * this way too.
 					 */
 					sctp_express_handle_sack(stcb, cum_ack, a_rwnd, &abort_now, ecne_seen);
 				} else {
 					if (netp && *netp)
 						sctp_handle_sack(m, offset_seg, offset_dup, stcb,
 						    num_seg, 0, num_dup, &abort_now, flags,
 						    cum_ack, a_rwnd, ecne_seen);
 				}
 				if (abort_now) {
 					/* ABORT signal from sack processing */
 					*offset = length;
 					return (NULL);
 				}
 				if (TAILQ_EMPTY(&stcb->asoc.send_queue) &&
 				    TAILQ_EMPTY(&stcb->asoc.sent_queue) &&
 				    (stcb->asoc.stream_queue_cnt == 0)) {
 					sctp_ulp_notify(SCTP_NOTIFY_SENDER_DRY, stcb, 0, NULL, SCTP_SO_NOT_LOCKED);
 				}
 			}
 			break;
 			/*
 			 * EY - nr_sack:  If the received chunk is an
 			 * nr_sack chunk
 			 */
 		case SCTP_NR_SELECTIVE_ACK:
 			{
 				struct sctp_nr_sack_chunk *nr_sack;
 				int abort_now = 0;
 				uint32_t a_rwnd, cum_ack;
 				uint16_t num_seg, num_nr_seg, num_dup;
 				uint8_t flags;
 				int offset_seg, offset_dup;
 
 				SCTPDBG(SCTP_DEBUG_INPUT3, "SCTP_NR_SACK\n");
 				SCTP_STAT_INCR(sctps_recvsacks);
 				if (stcb == NULL) {
 					SCTPDBG(SCTP_DEBUG_INDATA1, "No stcb when processing NR-SACK chunk\n");
 					break;
 				}
 				if (stcb->asoc.nrsack_supported == 0) {
 					goto unknown_chunk;
 				}
 				if (chk_length < sizeof(struct sctp_nr_sack_chunk)) {
 					SCTPDBG(SCTP_DEBUG_INDATA1, "Bad size on NR-SACK chunk, too small\n");
 					break;
 				}
 				if (SCTP_GET_STATE(&stcb->asoc) == SCTP_STATE_SHUTDOWN_ACK_SENT) {
 					/*-
 					 * If we have sent a shutdown-ack, we will pay no
 					 * attention to a sack sent in to us since
 					 * we don't care anymore.
 					 */
 					break;
 				}
 				nr_sack = (struct sctp_nr_sack_chunk *)ch;
 				flags = ch->chunk_flags;
 				cum_ack = ntohl(nr_sack->nr_sack.cum_tsn_ack);
 				num_seg = ntohs(nr_sack->nr_sack.num_gap_ack_blks);
 				num_nr_seg = ntohs(nr_sack->nr_sack.num_nr_gap_ack_blks);
 				num_dup = ntohs(nr_sack->nr_sack.num_dup_tsns);
 				a_rwnd = (uint32_t)ntohl(nr_sack->nr_sack.a_rwnd);
 				if (sizeof(struct sctp_nr_sack_chunk) +
 				    (num_seg + num_nr_seg) * sizeof(struct sctp_gap_ack_block) +
 				    num_dup * sizeof(uint32_t) != chk_length) {
 					SCTPDBG(SCTP_DEBUG_INDATA1, "Bad size of NR_SACK chunk\n");
 					break;
 				}
 				offset_seg = *offset + sizeof(struct sctp_nr_sack_chunk);
 				offset_dup = offset_seg + num_seg * sizeof(struct sctp_gap_ack_block);
 				SCTPDBG(SCTP_DEBUG_INPUT3, "SCTP_NR_SACK process cum_ack:%x num_seg:%d a_rwnd:%d\n",
 				    cum_ack, num_seg, a_rwnd);
 				stcb->asoc.seen_a_sack_this_pkt = 1;
 				if ((stcb->asoc.pr_sctp_cnt == 0) &&
 				    (num_seg == 0) && (num_nr_seg == 0) &&
 				    SCTP_TSN_GE(cum_ack, stcb->asoc.last_acked_seq) &&
 				    (stcb->asoc.saw_sack_with_frags == 0) &&
 				    (stcb->asoc.saw_sack_with_nr_frags == 0) &&
 				    (!TAILQ_EMPTY(&stcb->asoc.sent_queue))) {
 					/*
 					 * We have a SIMPLE sack having no
 					 * prior segments and data on sent
 					 * queue to be acked. Use the faster
 					 * path sack processing. We also
 					 * allow window update sacks with no
 					 * missing segments to go this way
 					 * too.
 					 */
 					sctp_express_handle_sack(stcb, cum_ack, a_rwnd,
 					    &abort_now, ecne_seen);
 				} else {
 					if (netp && *netp)
 						sctp_handle_sack(m, offset_seg, offset_dup, stcb,
 						    num_seg, num_nr_seg, num_dup, &abort_now, flags,
 						    cum_ack, a_rwnd, ecne_seen);
 				}
 				if (abort_now) {
 					/* ABORT signal from sack processing */
 					*offset = length;
 					return (NULL);
 				}
 				if (TAILQ_EMPTY(&stcb->asoc.send_queue) &&
 				    TAILQ_EMPTY(&stcb->asoc.sent_queue) &&
 				    (stcb->asoc.stream_queue_cnt == 0)) {
 					sctp_ulp_notify(SCTP_NOTIFY_SENDER_DRY, stcb, 0, NULL, SCTP_SO_NOT_LOCKED);
 				}
 			}
 			break;
 
 		case SCTP_HEARTBEAT_REQUEST:
 			SCTPDBG(SCTP_DEBUG_INPUT3, "SCTP_HEARTBEAT\n");
 			if ((stcb) && netp && *netp) {
 				SCTP_STAT_INCR(sctps_recvheartbeat);
 				sctp_send_heartbeat_ack(stcb, m, *offset,
 				    chk_length, *netp);
 
 				/* He's alive so give him credit */
 				if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_THRESHOLD_LOGGING) {
 					sctp_misc_ints(SCTP_THRESHOLD_CLEAR,
 					    stcb->asoc.overall_error_count,
 					    0,
 					    SCTP_FROM_SCTP_INPUT,
 					    __LINE__);
 				}
 				stcb->asoc.overall_error_count = 0;
 			}
 			break;
 		case SCTP_HEARTBEAT_ACK:
 			SCTPDBG(SCTP_DEBUG_INPUT3, "SCTP_HEARTBEAT-ACK\n");
 			if ((stcb == NULL) || (chk_length != sizeof(struct sctp_heartbeat_chunk))) {
 				/* Its not ours */
 				*offset = length;
 				if (locked_tcb) {
 					SCTP_TCB_UNLOCK(locked_tcb);
 				}
 				return (NULL);
 			}
 			/* He's alive so give him credit */
 			if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_THRESHOLD_LOGGING) {
 				sctp_misc_ints(SCTP_THRESHOLD_CLEAR,
 				    stcb->asoc.overall_error_count,
 				    0,
 				    SCTP_FROM_SCTP_INPUT,
 				    __LINE__);
 			}
 			stcb->asoc.overall_error_count = 0;
 			SCTP_STAT_INCR(sctps_recvheartbeatack);
 			if (netp && *netp)
 				sctp_handle_heartbeat_ack((struct sctp_heartbeat_chunk *)ch,
 				    stcb, *netp);
 			break;
 		case SCTP_ABORT_ASSOCIATION:
 			SCTPDBG(SCTP_DEBUG_INPUT3, "SCTP_ABORT, stcb %p\n",
 			    (void *)stcb);
 			if ((stcb) && netp && *netp)
 				sctp_handle_abort((struct sctp_abort_chunk *)ch,
 				    stcb, *netp);
 			*offset = length;
 			return (NULL);
 			break;
 		case SCTP_SHUTDOWN:
 			SCTPDBG(SCTP_DEBUG_INPUT3, "SCTP_SHUTDOWN, stcb %p\n",
 			    (void *)stcb);
 			if ((stcb == NULL) || (chk_length != sizeof(struct sctp_shutdown_chunk))) {
 				*offset = length;
 				if (locked_tcb) {
 					SCTP_TCB_UNLOCK(locked_tcb);
 				}
 				return (NULL);
 			}
 			if (netp && *netp) {
 				int abort_flag = 0;
 
 				sctp_handle_shutdown((struct sctp_shutdown_chunk *)ch,
 				    stcb, *netp, &abort_flag);
 				if (abort_flag) {
 					*offset = length;
 					return (NULL);
 				}
 			}
 			break;
 		case SCTP_SHUTDOWN_ACK:
 			SCTPDBG(SCTP_DEBUG_INPUT3, "SCTP_SHUTDOWN-ACK, stcb %p\n", (void *)stcb);
 			if ((stcb) && (netp) && (*netp))
 				sctp_handle_shutdown_ack((struct sctp_shutdown_ack_chunk *)ch, stcb, *netp);
 			*offset = length;
 			return (NULL);
 			break;
 
 		case SCTP_OPERATION_ERROR:
 			SCTPDBG(SCTP_DEBUG_INPUT3, "SCTP_OP-ERR\n");
 			if ((stcb) && netp && *netp && sctp_handle_error(ch, stcb, *netp) < 0) {
 				*offset = length;
 				return (NULL);
 			}
 			break;
 		case SCTP_COOKIE_ECHO:
 			SCTPDBG(SCTP_DEBUG_INPUT3,
 			    "SCTP_COOKIE-ECHO, stcb %p\n", (void *)stcb);
 			if ((stcb) && (stcb->asoc.total_output_queue_size)) {
 				;
 			} else {
 				if (inp->sctp_flags & SCTP_PCB_FLAGS_SOCKET_GONE) {
 					/* We are not interested anymore */
 			abend:
 					if (stcb) {
 						SCTP_TCB_UNLOCK(stcb);
 					}
 					*offset = length;
 					return (NULL);
 				}
 			}
 			/*
 			 * First are we accepting? We do this again here
 			 * since it is possible that a previous endpoint WAS
 			 * listening responded to a INIT-ACK and then
 			 * closed. We opened and bound.. and are now no
 			 * longer listening.
+			 *
+			 * XXXGL: notes on checking listen queue length.
+			 * 1) SCTP_IS_LISTENING() doesn't necessarily mean
+			 *    SOLISTENING(), because a listening "UDP type"
+			 *    socket isn't listening in terms of the socket
+			 *    layer.  It is a normal data flow socket, that
+			 *    can fork off new connections.  Thus, we should
+			 *    look into sol_qlen only in case we are !UDP.
+			 * 2) Checking sol_qlen in general requires locking
+			 *    the socket, and this code lacks that.
 			 */
-
 			if ((stcb == NULL) &&
 			    (!SCTP_IS_LISTENING(inp) ||
-			    inp->sctp_socket->so_qlen >= inp->sctp_socket->so_qlimit)) {
+			    (!(inp->sctp_flags & SCTP_PCB_FLAGS_UDPTYPE) &&
+			    inp->sctp_socket->sol_qlen >= inp->sctp_socket->sol_qlimit))) {
 				if ((inp->sctp_flags & SCTP_PCB_FLAGS_TCPTYPE) &&
 				    (SCTP_BASE_SYSCTL(sctp_abort_if_one_2_one_hits_limit))) {
 					op_err = sctp_generate_cause(SCTP_CAUSE_OUT_OF_RESC, "");
 					sctp_abort_association(inp, stcb, m, iphlen,
 					    src, dst, sh, op_err,
 					    mflowtype, mflowid,
 					    vrf_id, port);
 				}
 				*offset = length;
 				return (NULL);
 			} else {
 				struct mbuf *ret_buf;
 				struct sctp_inpcb *linp;
 
 				if (stcb) {
 					linp = NULL;
 				} else {
 					linp = inp;
 				}
 
 				if (linp) {
 					SCTP_ASOC_CREATE_LOCK(linp);
 					if ((inp->sctp_flags & SCTP_PCB_FLAGS_SOCKET_GONE) ||
 					    (inp->sctp_flags & SCTP_PCB_FLAGS_SOCKET_ALLGONE)) {
 						SCTP_ASOC_CREATE_UNLOCK(linp);
 						goto abend;
 					}
 				}
 				if (netp) {
 					ret_buf =
 					    sctp_handle_cookie_echo(m, iphlen,
 					    *offset,
 					    src, dst,
 					    sh,
 					    (struct sctp_cookie_echo_chunk *)ch,
 					    &inp, &stcb, netp,
 					    auth_skipped,
 					    auth_offset,
 					    auth_len,
 					    &locked_tcb,
 					    mflowtype,
 					    mflowid,
 					    vrf_id,
 					    port);
 				} else {
 					ret_buf = NULL;
 				}
 				if (linp) {
 					SCTP_ASOC_CREATE_UNLOCK(linp);
 				}
 				if (ret_buf == NULL) {
 					if (locked_tcb) {
 						SCTP_TCB_UNLOCK(locked_tcb);
 					}
 					SCTPDBG(SCTP_DEBUG_INPUT3,
 					    "GAK, null buffer\n");
 					*offset = length;
 					return (NULL);
 				}
 				/* if AUTH skipped, see if it verified... */
 				if (auth_skipped) {
 					got_auth = 1;
 					auth_skipped = 0;
 				}
 				if (!TAILQ_EMPTY(&stcb->asoc.sent_queue)) {
 					/*
 					 * Restart the timer if we have
 					 * pending data
 					 */
 					struct sctp_tmit_chunk *chk;
 
 					chk = TAILQ_FIRST(&stcb->asoc.sent_queue);
 					sctp_timer_start(SCTP_TIMER_TYPE_SEND, stcb->sctp_ep, stcb, chk->whoTo);
 				}
 			}
 			break;
 		case SCTP_COOKIE_ACK:
 			SCTPDBG(SCTP_DEBUG_INPUT3, "SCTP_COOKIE-ACK, stcb %p\n", (void *)stcb);
 			if ((stcb == NULL) || chk_length != sizeof(struct sctp_cookie_ack_chunk)) {
 				if (locked_tcb) {
 					SCTP_TCB_UNLOCK(locked_tcb);
 				}
 				return (NULL);
 			}
 			if (inp->sctp_flags & SCTP_PCB_FLAGS_SOCKET_GONE) {
 				/* We are not interested anymore */
 				if ((stcb) && (stcb->asoc.total_output_queue_size)) {
 					;
 				} else if (stcb) {
 #if defined(__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
 					so = SCTP_INP_SO(inp);
 					atomic_add_int(&stcb->asoc.refcnt, 1);
 					SCTP_TCB_UNLOCK(stcb);
 					SCTP_SOCKET_LOCK(so, 1);
 					SCTP_TCB_LOCK(stcb);
 					atomic_subtract_int(&stcb->asoc.refcnt, 1);
 #endif
 					(void)sctp_free_assoc(inp, stcb, SCTP_NORMAL_PROC,
 					    SCTP_FROM_SCTP_INPUT + SCTP_LOC_30);
 #if defined(__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
 					SCTP_SOCKET_UNLOCK(so, 1);
 #endif
 					*offset = length;
 					return (NULL);
 				}
 			}
 			/* He's alive so give him credit */
 			if ((stcb) && netp && *netp) {
 				if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_THRESHOLD_LOGGING) {
 					sctp_misc_ints(SCTP_THRESHOLD_CLEAR,
 					    stcb->asoc.overall_error_count,
 					    0,
 					    SCTP_FROM_SCTP_INPUT,
 					    __LINE__);
 				}
 				stcb->asoc.overall_error_count = 0;
 				sctp_handle_cookie_ack((struct sctp_cookie_ack_chunk *)ch, stcb, *netp);
 			}
 			break;
 		case SCTP_ECN_ECHO:
 			SCTPDBG(SCTP_DEBUG_INPUT3, "SCTP_ECN-ECHO\n");
 			/* He's alive so give him credit */
 			if ((stcb == NULL) || (chk_length != sizeof(struct sctp_ecne_chunk))) {
 				/* Its not ours */
 				if (locked_tcb) {
 					SCTP_TCB_UNLOCK(locked_tcb);
 				}
 				*offset = length;
 				return (NULL);
 			}
 			if (stcb) {
 				if (stcb->asoc.ecn_supported == 0) {
 					goto unknown_chunk;
 				}
 				if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_THRESHOLD_LOGGING) {
 					sctp_misc_ints(SCTP_THRESHOLD_CLEAR,
 					    stcb->asoc.overall_error_count,
 					    0,
 					    SCTP_FROM_SCTP_INPUT,
 					    __LINE__);
 				}
 				stcb->asoc.overall_error_count = 0;
 				sctp_handle_ecn_echo((struct sctp_ecne_chunk *)ch,
 				    stcb);
 				ecne_seen = 1;
 			}
 			break;
 		case SCTP_ECN_CWR:
 			SCTPDBG(SCTP_DEBUG_INPUT3, "SCTP_ECN-CWR\n");
 			/* He's alive so give him credit */
 			if ((stcb == NULL) || (chk_length != sizeof(struct sctp_cwr_chunk))) {
 				/* Its not ours */
 				if (locked_tcb) {
 					SCTP_TCB_UNLOCK(locked_tcb);
 				}
 				*offset = length;
 				return (NULL);
 			}
 			if (stcb) {
 				if (stcb->asoc.ecn_supported == 0) {
 					goto unknown_chunk;
 				}
 				if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_THRESHOLD_LOGGING) {
 					sctp_misc_ints(SCTP_THRESHOLD_CLEAR,
 					    stcb->asoc.overall_error_count,
 					    0,
 					    SCTP_FROM_SCTP_INPUT,
 					    __LINE__);
 				}
 				stcb->asoc.overall_error_count = 0;
 				sctp_handle_ecn_cwr((struct sctp_cwr_chunk *)ch, stcb, *netp);
 			}
 			break;
 		case SCTP_SHUTDOWN_COMPLETE:
 			SCTPDBG(SCTP_DEBUG_INPUT3, "SCTP_SHUTDOWN-COMPLETE, stcb %p\n", (void *)stcb);
 			/* must be first and only chunk */
 			if ((num_chunks > 1) ||
 			    (length - *offset > (int)SCTP_SIZE32(chk_length))) {
 				*offset = length;
 				if (locked_tcb) {
 					SCTP_TCB_UNLOCK(locked_tcb);
 				}
 				return (NULL);
 			}
 			if ((stcb) && netp && *netp) {
 				sctp_handle_shutdown_complete((struct sctp_shutdown_complete_chunk *)ch,
 				    stcb, *netp);
 			}
 			*offset = length;
 			return (NULL);
 			break;
 		case SCTP_ASCONF:
 			SCTPDBG(SCTP_DEBUG_INPUT3, "SCTP_ASCONF\n");
 			/* He's alive so give him credit */
 			if (stcb) {
 				if (stcb->asoc.asconf_supported == 0) {
 					goto unknown_chunk;
 				}
 				if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_THRESHOLD_LOGGING) {
 					sctp_misc_ints(SCTP_THRESHOLD_CLEAR,
 					    stcb->asoc.overall_error_count,
 					    0,
 					    SCTP_FROM_SCTP_INPUT,
 					    __LINE__);
 				}
 				stcb->asoc.overall_error_count = 0;
 				sctp_handle_asconf(m, *offset, src,
 				    (struct sctp_asconf_chunk *)ch, stcb, asconf_cnt == 0);
 				asconf_cnt++;
 			}
 			break;
 		case SCTP_ASCONF_ACK:
 			SCTPDBG(SCTP_DEBUG_INPUT3, "SCTP_ASCONF-ACK\n");
 			if (chk_length < sizeof(struct sctp_asconf_ack_chunk)) {
 				/* Its not ours */
 				if (locked_tcb) {
 					SCTP_TCB_UNLOCK(locked_tcb);
 				}
 				*offset = length;
 				return (NULL);
 			}
 			if ((stcb) && netp && *netp) {
 				if (stcb->asoc.asconf_supported == 0) {
 					goto unknown_chunk;
 				}
 				/* He's alive so give him credit */
 				if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_THRESHOLD_LOGGING) {
 					sctp_misc_ints(SCTP_THRESHOLD_CLEAR,
 					    stcb->asoc.overall_error_count,
 					    0,
 					    SCTP_FROM_SCTP_INPUT,
 					    __LINE__);
 				}
 				stcb->asoc.overall_error_count = 0;
 				sctp_handle_asconf_ack(m, *offset,
 				    (struct sctp_asconf_ack_chunk *)ch, stcb, *netp, &abort_no_unlock);
 				if (abort_no_unlock)
 					return (NULL);
 			}
 			break;
 		case SCTP_FORWARD_CUM_TSN:
 		case SCTP_IFORWARD_CUM_TSN:
 			SCTPDBG(SCTP_DEBUG_INPUT3, "SCTP_FWD-TSN\n");
 			if (chk_length < sizeof(struct sctp_forward_tsn_chunk)) {
 				/* Its not ours */
 				if (locked_tcb) {
 					SCTP_TCB_UNLOCK(locked_tcb);
 				}
 				*offset = length;
 				return (NULL);
 			}
 			/* He's alive so give him credit */
 			if (stcb) {
 				int abort_flag = 0;
 
 				if (stcb->asoc.prsctp_supported == 0) {
 					goto unknown_chunk;
 				}
 				stcb->asoc.overall_error_count = 0;
 				if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_THRESHOLD_LOGGING) {
 					sctp_misc_ints(SCTP_THRESHOLD_CLEAR,
 					    stcb->asoc.overall_error_count,
 					    0,
 					    SCTP_FROM_SCTP_INPUT,
 					    __LINE__);
 				}
 				*fwd_tsn_seen = 1;
 				if (inp->sctp_flags & SCTP_PCB_FLAGS_SOCKET_GONE) {
 					/* We are not interested anymore */
 #if defined(__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
 					so = SCTP_INP_SO(inp);
 					atomic_add_int(&stcb->asoc.refcnt, 1);
 					SCTP_TCB_UNLOCK(stcb);
 					SCTP_SOCKET_LOCK(so, 1);
 					SCTP_TCB_LOCK(stcb);
 					atomic_subtract_int(&stcb->asoc.refcnt, 1);
 #endif
 					(void)sctp_free_assoc(inp, stcb, SCTP_NORMAL_PROC,
 					    SCTP_FROM_SCTP_INPUT + SCTP_LOC_31);
 #if defined(__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
 					SCTP_SOCKET_UNLOCK(so, 1);
 #endif
 					*offset = length;
 					return (NULL);
 				}
 				/*
 				 * For sending a SACK this looks like DATA
 				 * chunks.
 				 */
 				stcb->asoc.last_data_chunk_from = stcb->asoc.last_control_chunk_from;
 				sctp_handle_forward_tsn(stcb,
 				    (struct sctp_forward_tsn_chunk *)ch, &abort_flag, m, *offset);
 				if (abort_flag) {
 					*offset = length;
 					return (NULL);
 				} else {
 					if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_THRESHOLD_LOGGING) {
 						sctp_misc_ints(SCTP_THRESHOLD_CLEAR,
 						    stcb->asoc.overall_error_count,
 						    0,
 						    SCTP_FROM_SCTP_INPUT,
 						    __LINE__);
 					}
 					stcb->asoc.overall_error_count = 0;
 				}
 
 			}
 			break;
 		case SCTP_STREAM_RESET:
 			SCTPDBG(SCTP_DEBUG_INPUT3, "SCTP_STREAM_RESET\n");
 			if (((stcb == NULL) || (ch == NULL) || (chk_length < sizeof(struct sctp_stream_reset_tsn_req)))) {
 				/* Its not ours */
 				if (locked_tcb) {
 					SCTP_TCB_UNLOCK(locked_tcb);
 				}
 				*offset = length;
 				return (NULL);
 			}
 			if (stcb->asoc.reconfig_supported == 0) {
 				goto unknown_chunk;
 			}
 			if (sctp_handle_stream_reset(stcb, m, *offset, ch)) {
 				/* stop processing */
 				*offset = length;
 				return (NULL);
 			}
 			break;
 		case SCTP_PACKET_DROPPED:
 			SCTPDBG(SCTP_DEBUG_INPUT3, "SCTP_PACKET_DROPPED\n");
 			/* re-get it all please */
 			if (chk_length < sizeof(struct sctp_pktdrop_chunk)) {
 				/* Its not ours */
 				if (locked_tcb) {
 					SCTP_TCB_UNLOCK(locked_tcb);
 				}
 				*offset = length;
 				return (NULL);
 			}
 			if (ch && (stcb) && netp && (*netp)) {
 				if (stcb->asoc.pktdrop_supported == 0) {
 					goto unknown_chunk;
 				}
 				sctp_handle_packet_dropped((struct sctp_pktdrop_chunk *)ch,
 				    stcb, *netp,
 				    min(chk_length, (sizeof(chunk_buf) - 4)));
 
 			}
 			break;
 		case SCTP_AUTHENTICATION:
 			SCTPDBG(SCTP_DEBUG_INPUT3, "SCTP_AUTHENTICATION\n");
 			if (stcb == NULL) {
 				/* save the first AUTH for later processing */
 				if (auth_skipped == 0) {
 					auth_offset = *offset;
 					auth_len = chk_length;
 					auth_skipped = 1;
 				}
 				/* skip this chunk (temporarily) */
 				goto next_chunk;
 			}
 			if (stcb->asoc.auth_supported == 0) {
 				goto unknown_chunk;
 			}
 			if ((chk_length < (sizeof(struct sctp_auth_chunk))) ||
 			    (chk_length > (sizeof(struct sctp_auth_chunk) +
 			    SCTP_AUTH_DIGEST_LEN_MAX))) {
 				/* Its not ours */
 				if (locked_tcb) {
 					SCTP_TCB_UNLOCK(locked_tcb);
 				}
 				*offset = length;
 				return (NULL);
 			}
 			if (got_auth == 1) {
 				/* skip this chunk... it's already auth'd */
 				goto next_chunk;
 			}
 			got_auth = 1;
 			if ((ch == NULL) || sctp_handle_auth(stcb, (struct sctp_auth_chunk *)ch,
 			    m, *offset)) {
 				/* auth HMAC failed so dump the packet */
 				*offset = length;
 				return (stcb);
 			} else {
 				/* remaining chunks are HMAC checked */
 				stcb->asoc.authenticated = 1;
 			}
 			break;
 
 		default:
 	unknown_chunk:
 			/* it's an unknown chunk! */
 			if ((ch->chunk_type & 0x40) && (stcb != NULL)) {
 				struct sctp_gen_error_cause *cause;
 				int len;
 
 				op_err = sctp_get_mbuf_for_msg(sizeof(struct sctp_gen_error_cause),
 				    0, M_NOWAIT, 1, MT_DATA);
 				if (op_err != NULL) {
 					len = min(SCTP_SIZE32(chk_length), (uint32_t)(length - *offset));
 					cause = mtod(op_err, struct sctp_gen_error_cause *);
 					cause->code = htons(SCTP_CAUSE_UNRECOG_CHUNK);
 					cause->length = htons((uint16_t)(len + sizeof(struct sctp_gen_error_cause)));
 					SCTP_BUF_LEN(op_err) = sizeof(struct sctp_gen_error_cause);
 					SCTP_BUF_NEXT(op_err) = SCTP_M_COPYM(m, *offset, len, M_NOWAIT);
 					if (SCTP_BUF_NEXT(op_err) != NULL) {
 #ifdef SCTP_MBUF_LOGGING
 						if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_MBUF_LOGGING_ENABLE) {
 							sctp_log_mbc(SCTP_BUF_NEXT(op_err), SCTP_MBUF_ICOPY);
 						}
 #endif
 						sctp_queue_op_err(stcb, op_err);
 					} else {
 						sctp_m_freem(op_err);
 					}
 				}
 			}
 			if ((ch->chunk_type & 0x80) == 0) {
 				/* discard this packet */
 				*offset = length;
 				return (stcb);
 			}	/* else skip this bad chunk and continue... */
 			break;
 		}		/* switch (ch->chunk_type) */
 
 
 next_chunk:
 		/* get the next chunk */
 		*offset += SCTP_SIZE32(chk_length);
 		if (*offset >= length) {
 			/* no more data left in the mbuf chain */
 			break;
 		}
 		ch = (struct sctp_chunkhdr *)sctp_m_getptr(m, *offset,
 		    sizeof(struct sctp_chunkhdr), chunk_buf);
 		if (ch == NULL) {
 			if (locked_tcb) {
 				SCTP_TCB_UNLOCK(locked_tcb);
 			}
 			*offset = length;
 			return (NULL);
 		}
 	}			/* while */
 
 	if (asconf_cnt > 0 && stcb != NULL) {
 		sctp_send_asconf_ack(stcb);
 	}
 	return (stcb);
 }
 
 
 /*
  * common input chunk processing (v4 and v6)
  */
 void
 sctp_common_input_processing(struct mbuf **mm, int iphlen, int offset, int length,
     struct sockaddr *src, struct sockaddr *dst,
     struct sctphdr *sh, struct sctp_chunkhdr *ch,
 #if !defined(SCTP_WITH_NO_CSUM)
     uint8_t compute_crc,
 #endif
     uint8_t ecn_bits,
     uint8_t mflowtype, uint32_t mflowid, uint16_t fibnum,
     uint32_t vrf_id, uint16_t port)
 {
 	uint32_t high_tsn;
 	int fwd_tsn_seen = 0, data_processed = 0;
 	struct mbuf *m = *mm, *op_err;
 	char msg[SCTP_DIAG_INFO_LEN];
 	int un_sent;
 	int cnt_ctrl_ready = 0;
 	struct sctp_inpcb *inp = NULL, *inp_decr = NULL;
 	struct sctp_tcb *stcb = NULL;
 	struct sctp_nets *net = NULL;
 
 	SCTP_STAT_INCR(sctps_recvdatagrams);
 #ifdef SCTP_AUDITING_ENABLED
 	sctp_audit_log(0xE0, 1);
 	sctp_auditing(0, inp, stcb, net);
 #endif
 #if !defined(SCTP_WITH_NO_CSUM)
 	if (compute_crc != 0) {
 		uint32_t check, calc_check;
 
 		check = sh->checksum;
 		sh->checksum = 0;
 		calc_check = sctp_calculate_cksum(m, iphlen);
 		sh->checksum = check;
 		if (calc_check != check) {
 			SCTPDBG(SCTP_DEBUG_INPUT1, "Bad CSUM on SCTP packet calc_check:%x check:%x  m:%p mlen:%d iphlen:%d\n",
 			    calc_check, check, (void *)m, length, iphlen);
 			stcb = sctp_findassociation_addr(m, offset, src, dst,
 			    sh, ch, &inp, &net, vrf_id);
 #if defined(INET) || defined(INET6)
 			if ((ch->chunk_type != SCTP_INITIATION) &&
 			    (net != NULL) && (net->port != port)) {
 				if (net->port == 0) {
 					/* UDP encapsulation turned on. */
 					net->mtu -= sizeof(struct udphdr);
 					if (stcb->asoc.smallest_mtu > net->mtu) {
 						sctp_pathmtu_adjustment(stcb, net->mtu);
 					}
 				} else if (port == 0) {
 					/* UDP encapsulation turned off. */
 					net->mtu += sizeof(struct udphdr);
 					/* XXX Update smallest_mtu */
 				}
 				net->port = port;
 			}
 #endif
 			if (net != NULL) {
 				net->flowtype = mflowtype;
 				net->flowid = mflowid;
 			}
 			if ((inp != NULL) && (stcb != NULL)) {
 				sctp_send_packet_dropped(stcb, net, m, length, iphlen, 1);
 				sctp_chunk_output(inp, stcb, SCTP_OUTPUT_FROM_INPUT_ERROR, SCTP_SO_NOT_LOCKED);
 			} else if ((inp != NULL) && (stcb == NULL)) {
 				inp_decr = inp;
 			}
 			SCTP_STAT_INCR(sctps_badsum);
 			SCTP_STAT_INCR_COUNTER32(sctps_checksumerrors);
 			goto out;
 		}
 	}
 #endif
 	/* Destination port of 0 is illegal, based on RFC4960. */
 	if (sh->dest_port == 0) {
 		SCTP_STAT_INCR(sctps_hdrops);
 		goto out;
 	}
 	stcb = sctp_findassociation_addr(m, offset, src, dst,
 	    sh, ch, &inp, &net, vrf_id);
 #if defined(INET) || defined(INET6)
 	if ((ch->chunk_type != SCTP_INITIATION) &&
 	    (net != NULL) && (net->port != port)) {
 		if (net->port == 0) {
 			/* UDP encapsulation turned on. */
 			net->mtu -= sizeof(struct udphdr);
 			if (stcb->asoc.smallest_mtu > net->mtu) {
 				sctp_pathmtu_adjustment(stcb, net->mtu);
 			}
 		} else if (port == 0) {
 			/* UDP encapsulation turned off. */
 			net->mtu += sizeof(struct udphdr);
 			/* XXX Update smallest_mtu */
 		}
 		net->port = port;
 	}
 #endif
 	if (net != NULL) {
 		net->flowtype = mflowtype;
 		net->flowid = mflowid;
 	}
 	if (inp == NULL) {
 		SCTP_STAT_INCR(sctps_noport);
 		if (badport_bandlim(BANDLIM_SCTP_OOTB) < 0) {
 			goto out;
 		}
 		if (ch->chunk_type == SCTP_SHUTDOWN_ACK) {
 			sctp_send_shutdown_complete2(src, dst, sh,
 			    mflowtype, mflowid, fibnum,
 			    vrf_id, port);
 			goto out;
 		}
 		if (ch->chunk_type == SCTP_SHUTDOWN_COMPLETE) {
 			goto out;
 		}
 		if (ch->chunk_type != SCTP_ABORT_ASSOCIATION) {
 			if ((SCTP_BASE_SYSCTL(sctp_blackhole) == 0) ||
 			    ((SCTP_BASE_SYSCTL(sctp_blackhole) == 1) &&
 			    (ch->chunk_type != SCTP_INIT))) {
 				op_err = sctp_generate_cause(SCTP_BASE_SYSCTL(sctp_diag_info_code),
 				    "Out of the blue");
 				sctp_send_abort(m, iphlen, src, dst,
 				    sh, 0, op_err,
 				    mflowtype, mflowid, fibnum,
 				    vrf_id, port);
 			}
 		}
 		goto out;
 	} else if (stcb == NULL) {
 		inp_decr = inp;
 	}
 	SCTPDBG(SCTP_DEBUG_INPUT1, "Ok, Common input processing called, m:%p iphlen:%d offset:%d length:%d stcb:%p\n",
 	    (void *)m, iphlen, offset, length, (void *)stcb);
 	if (stcb) {
 		/* always clear this before beginning a packet */
 		stcb->asoc.authenticated = 0;
 		stcb->asoc.seen_a_sack_this_pkt = 0;
 		SCTPDBG(SCTP_DEBUG_INPUT1, "stcb:%p state:%x\n",
 		    (void *)stcb, stcb->asoc.state);
 
 		if ((stcb->asoc.state & SCTP_STATE_WAS_ABORTED) ||
 		    (stcb->asoc.state & SCTP_STATE_ABOUT_TO_BE_FREED)) {
 			/*-
 			 * If we hit here, we had a ref count
 			 * up when the assoc was aborted and the
 			 * timer is clearing out the assoc, we should
 			 * NOT respond to any packet.. its OOTB.
 			 */
 			SCTP_TCB_UNLOCK(stcb);
 			stcb = NULL;
 			snprintf(msg, sizeof(msg), "OOTB, %s:%d at %s", __FILE__, __LINE__, __func__);
 			op_err = sctp_generate_cause(SCTP_BASE_SYSCTL(sctp_diag_info_code),
 			    msg);
 			sctp_handle_ootb(m, iphlen, offset, src, dst, sh, inp, op_err,
 			    mflowtype, mflowid, inp->fibnum,
 			    vrf_id, port);
 			goto out;
 		}
 	}
 	if (IS_SCTP_CONTROL(ch)) {
 		/* process the control portion of the SCTP packet */
 		/* sa_ignore NO_NULL_CHK */
 		stcb = sctp_process_control(m, iphlen, &offset, length,
 		    src, dst, sh, ch,
 		    inp, stcb, &net, &fwd_tsn_seen,
 		    mflowtype, mflowid, fibnum,
 		    vrf_id, port);
 		if (stcb) {
 			/*
 			 * This covers us if the cookie-echo was there and
 			 * it changes our INP.
 			 */
 			inp = stcb->sctp_ep;
 #if defined(INET) || defined(INET6)
 			if ((ch->chunk_type != SCTP_INITIATION) &&
 			    (net != NULL) && (net->port != port)) {
 				if (net->port == 0) {
 					/* UDP encapsulation turned on. */
 					net->mtu -= sizeof(struct udphdr);
 					if (stcb->asoc.smallest_mtu > net->mtu) {
 						sctp_pathmtu_adjustment(stcb, net->mtu);
 					}
 				} else if (port == 0) {
 					/* UDP encapsulation turned off. */
 					net->mtu += sizeof(struct udphdr);
 					/* XXX Update smallest_mtu */
 				}
 				net->port = port;
 			}
 #endif
 		}
 	} else {
 		/*
 		 * no control chunks, so pre-process DATA chunks (these
 		 * checks are taken care of by control processing)
 		 */
 
 		/*
 		 * if DATA only packet, and auth is required, then punt...
 		 * can't have authenticated without any AUTH (control)
 		 * chunks
 		 */
 		if ((stcb != NULL) &&
 		    (stcb->asoc.auth_supported == 1) &&
 		    sctp_auth_is_required_chunk(SCTP_DATA, stcb->asoc.local_auth_chunks)) {
 			/* "silently" ignore */
 			SCTP_STAT_INCR(sctps_recvauthmissing);
 			goto out;
 		}
 		if (stcb == NULL) {
 			/* out of the blue DATA chunk */
 			snprintf(msg, sizeof(msg), "OOTB, %s:%d at %s", __FILE__, __LINE__, __func__);
 			op_err = sctp_generate_cause(SCTP_BASE_SYSCTL(sctp_diag_info_code),
 			    msg);
 			sctp_handle_ootb(m, iphlen, offset, src, dst, sh, inp, op_err,
 			    mflowtype, mflowid, fibnum,
 			    vrf_id, port);
 			goto out;
 		}
 		if (stcb->asoc.my_vtag != ntohl(sh->v_tag)) {
 			/* v_tag mismatch! */
 			SCTP_STAT_INCR(sctps_badvtag);
 			goto out;
 		}
 	}
 
 	if (stcb == NULL) {
 		/*
 		 * no valid TCB for this packet, or we found it's a bad
 		 * packet while processing control, or we're done with this
 		 * packet (done or skip rest of data), so we drop it...
 		 */
 		goto out;
 	}
 	/*
 	 * DATA chunk processing
 	 */
 	/* plow through the data chunks while length > offset */
 
 	/*
 	 * Rest should be DATA only.  Check authentication state if AUTH for
 	 * DATA is required.
 	 */
 	if ((length > offset) &&
 	    (stcb != NULL) &&
 	    (stcb->asoc.auth_supported == 1) &&
 	    sctp_auth_is_required_chunk(SCTP_DATA, stcb->asoc.local_auth_chunks) &&
 	    !stcb->asoc.authenticated) {
 		/* "silently" ignore */
 		SCTP_STAT_INCR(sctps_recvauthmissing);
 		SCTPDBG(SCTP_DEBUG_AUTH1,
 		    "Data chunk requires AUTH, skipped\n");
 		goto trigger_send;
 	}
 	if (length > offset) {
 		int retval;
 
 		/*
 		 * First check to make sure our state is correct. We would
 		 * not get here unless we really did have a tag, so we don't
 		 * abort if this happens, just dump the chunk silently.
 		 */
 		switch (SCTP_GET_STATE(&stcb->asoc)) {
 		case SCTP_STATE_COOKIE_ECHOED:
 			/*
 			 * we consider data with valid tags in this state
 			 * shows us the cookie-ack was lost. Imply it was
 			 * there.
 			 */
 			if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_THRESHOLD_LOGGING) {
 				sctp_misc_ints(SCTP_THRESHOLD_CLEAR,
 				    stcb->asoc.overall_error_count,
 				    0,
 				    SCTP_FROM_SCTP_INPUT,
 				    __LINE__);
 			}
 			stcb->asoc.overall_error_count = 0;
 			sctp_handle_cookie_ack((struct sctp_cookie_ack_chunk *)ch, stcb, net);
 			break;
 		case SCTP_STATE_COOKIE_WAIT:
 			/*
 			 * We consider OOTB any data sent during asoc setup.
 			 */
 			snprintf(msg, sizeof(msg), "OOTB, %s:%d at %s", __FILE__, __LINE__, __func__);
 			op_err = sctp_generate_cause(SCTP_BASE_SYSCTL(sctp_diag_info_code),
 			    msg);
 			sctp_handle_ootb(m, iphlen, offset, src, dst, sh, inp, op_err,
 			    mflowtype, mflowid, inp->fibnum,
 			    vrf_id, port);
 			goto out;
 			/* sa_ignore NOTREACHED */
 			break;
 		case SCTP_STATE_EMPTY:	/* should not happen */
 		case SCTP_STATE_INUSE:	/* should not happen */
 		case SCTP_STATE_SHUTDOWN_RECEIVED:	/* This is a peer error */
 		case SCTP_STATE_SHUTDOWN_ACK_SENT:
 		default:
 			goto out;
 			/* sa_ignore NOTREACHED */
 			break;
 		case SCTP_STATE_OPEN:
 		case SCTP_STATE_SHUTDOWN_SENT:
 			break;
 		}
 		/* plow through the data chunks while length > offset */
 		retval = sctp_process_data(mm, iphlen, &offset, length,
 		    inp, stcb, net, &high_tsn);
 		if (retval == 2) {
 			/*
 			 * The association aborted, NO UNLOCK needed since
 			 * the association is destroyed.
 			 */
 			stcb = NULL;
 			goto out;
 		}
 		data_processed = 1;
 		/*
 		 * Anything important needs to have been m_copy'ed in
 		 * process_data
 		 */
 	}
 	/* take care of ecn */
 	if ((data_processed == 1) &&
 	    (stcb->asoc.ecn_supported == 1) &&
 	    ((ecn_bits & SCTP_CE_BITS) == SCTP_CE_BITS)) {
 		/* Yep, we need to add a ECNE */
 		sctp_send_ecn_echo(stcb, net, high_tsn);
 	}
 	if ((data_processed == 0) && (fwd_tsn_seen)) {
 		int was_a_gap;
 		uint32_t highest_tsn;
 
 		if (SCTP_TSN_GT(stcb->asoc.highest_tsn_inside_nr_map, stcb->asoc.highest_tsn_inside_map)) {
 			highest_tsn = stcb->asoc.highest_tsn_inside_nr_map;
 		} else {
 			highest_tsn = stcb->asoc.highest_tsn_inside_map;
 		}
 		was_a_gap = SCTP_TSN_GT(highest_tsn, stcb->asoc.cumulative_tsn);
 		stcb->asoc.send_sack = 1;
 		sctp_sack_check(stcb, was_a_gap);
 	} else if (fwd_tsn_seen) {
 		stcb->asoc.send_sack = 1;
 	}
 	/* trigger send of any chunks in queue... */
 trigger_send:
 #ifdef SCTP_AUDITING_ENABLED
 	sctp_audit_log(0xE0, 2);
 	sctp_auditing(1, inp, stcb, net);
 #endif
 	SCTPDBG(SCTP_DEBUG_INPUT1,
 	    "Check for chunk output prw:%d tqe:%d tf=%d\n",
 	    stcb->asoc.peers_rwnd,
 	    TAILQ_EMPTY(&stcb->asoc.control_send_queue),
 	    stcb->asoc.total_flight);
 	un_sent = (stcb->asoc.total_output_queue_size - stcb->asoc.total_flight);
 	if (!TAILQ_EMPTY(&stcb->asoc.control_send_queue)) {
 		cnt_ctrl_ready = stcb->asoc.ctrl_queue_cnt - stcb->asoc.ecn_echo_cnt_onq;
 	}
 	if (!TAILQ_EMPTY(&stcb->asoc.asconf_send_queue) ||
 	    cnt_ctrl_ready ||
 	    stcb->asoc.trigger_reset ||
 	    ((un_sent) &&
 	    (stcb->asoc.peers_rwnd > 0 ||
 	    (stcb->asoc.peers_rwnd <= 0 && stcb->asoc.total_flight == 0)))) {
 		SCTPDBG(SCTP_DEBUG_INPUT3, "Calling chunk OUTPUT\n");
 		sctp_chunk_output(inp, stcb, SCTP_OUTPUT_FROM_CONTROL_PROC, SCTP_SO_NOT_LOCKED);
 		SCTPDBG(SCTP_DEBUG_INPUT3, "chunk OUTPUT returns\n");
 	}
 #ifdef SCTP_AUDITING_ENABLED
 	sctp_audit_log(0xE0, 3);
 	sctp_auditing(2, inp, stcb, net);
 #endif
 out:
 	if (stcb != NULL) {
 		SCTP_TCB_UNLOCK(stcb);
 	}
 	if (inp_decr != NULL) {
 		/* reduce ref-count */
 		SCTP_INP_WLOCK(inp_decr);
 		SCTP_INP_DECR_REF(inp_decr);
 		SCTP_INP_WUNLOCK(inp_decr);
 	}
 	return;
 }
 
 #ifdef INET
 void
 sctp_input_with_port(struct mbuf *i_pak, int off, uint16_t port)
 {
 	struct mbuf *m;
 	int iphlen;
 	uint32_t vrf_id = 0;
 	uint8_t ecn_bits;
 	struct sockaddr_in src, dst;
 	struct ip *ip;
 	struct sctphdr *sh;
 	struct sctp_chunkhdr *ch;
 	int length, offset;
 #if !defined(SCTP_WITH_NO_CSUM)
 	uint8_t compute_crc;
 #endif
 	uint32_t mflowid;
 	uint8_t mflowtype;
 	uint16_t fibnum;
 
 	iphlen = off;
 	if (SCTP_GET_PKT_VRFID(i_pak, vrf_id)) {
 		SCTP_RELEASE_PKT(i_pak);
 		return;
 	}
 	m = SCTP_HEADER_TO_CHAIN(i_pak);
 #ifdef SCTP_MBUF_LOGGING
 	/* Log in any input mbufs */
 	if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_MBUF_LOGGING_ENABLE) {
 		sctp_log_mbc(m, SCTP_MBUF_INPUT);
 	}
 #endif
 #ifdef SCTP_PACKET_LOGGING
 	if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_LAST_PACKET_TRACING) {
 		sctp_packet_log(m);
 	}
 #endif
 	SCTPDBG(SCTP_DEBUG_CRCOFFLOAD,
 	    "sctp_input(): Packet of length %d received on %s with csum_flags 0x%b.\n",
 	    m->m_pkthdr.len,
 	    if_name(m->m_pkthdr.rcvif),
 	    (int)m->m_pkthdr.csum_flags, CSUM_BITS);
 	mflowid = m->m_pkthdr.flowid;
 	mflowtype = M_HASHTYPE_GET(m);
 	fibnum = M_GETFIB(m);
 	SCTP_STAT_INCR(sctps_recvpackets);
 	SCTP_STAT_INCR_COUNTER64(sctps_inpackets);
 	/* Get IP, SCTP, and first chunk header together in the first mbuf. */
 	offset = iphlen + sizeof(struct sctphdr) + sizeof(struct sctp_chunkhdr);
 	if (SCTP_BUF_LEN(m) < offset) {
 		if ((m = m_pullup(m, offset)) == NULL) {
 			SCTP_STAT_INCR(sctps_hdrops);
 			return;
 		}
 	}
 	ip = mtod(m, struct ip *);
 	sh = (struct sctphdr *)((caddr_t)ip + iphlen);
 	ch = (struct sctp_chunkhdr *)((caddr_t)sh + sizeof(struct sctphdr));
 	offset -= sizeof(struct sctp_chunkhdr);
 	memset(&src, 0, sizeof(struct sockaddr_in));
 	src.sin_family = AF_INET;
 	src.sin_len = sizeof(struct sockaddr_in);
 	src.sin_port = sh->src_port;
 	src.sin_addr = ip->ip_src;
 	memset(&dst, 0, sizeof(struct sockaddr_in));
 	dst.sin_family = AF_INET;
 	dst.sin_len = sizeof(struct sockaddr_in);
 	dst.sin_port = sh->dest_port;
 	dst.sin_addr = ip->ip_dst;
 	length = ntohs(ip->ip_len);
 	/* Validate mbuf chain length with IP payload length. */
 	if (SCTP_HEADER_LEN(m) != length) {
 		SCTPDBG(SCTP_DEBUG_INPUT1,
 		    "sctp_input() length:%d reported length:%d\n", length, SCTP_HEADER_LEN(m));
 		SCTP_STAT_INCR(sctps_hdrops);
 		goto out;
 	}
 	/* SCTP does not allow broadcasts or multicasts */
 	if (IN_MULTICAST(ntohl(dst.sin_addr.s_addr))) {
 		goto out;
 	}
 	if (SCTP_IS_IT_BROADCAST(dst.sin_addr, m)) {
 		goto out;
 	}
 	ecn_bits = ip->ip_tos;
 #if defined(SCTP_WITH_NO_CSUM)
 	SCTP_STAT_INCR(sctps_recvnocrc);
 #else
 	if (m->m_pkthdr.csum_flags & CSUM_SCTP_VALID) {
 		SCTP_STAT_INCR(sctps_recvhwcrc);
 		compute_crc = 0;
 	} else {
 		SCTP_STAT_INCR(sctps_recvswcrc);
 		compute_crc = 1;
 	}
 #endif
 	sctp_common_input_processing(&m, iphlen, offset, length,
 	    (struct sockaddr *)&src,
 	    (struct sockaddr *)&dst,
 	    sh, ch,
 #if !defined(SCTP_WITH_NO_CSUM)
 	    compute_crc,
 #endif
 	    ecn_bits,
 	    mflowtype, mflowid, fibnum,
 	    vrf_id, port);
 out:
 	if (m) {
 		sctp_m_freem(m);
 	}
 	return;
 }
 
 #if defined(__FreeBSD__) && defined(SCTP_MCORE_INPUT) && defined(SMP)
 extern int *sctp_cpuarry;
 #endif
 
 int
 sctp_input(struct mbuf **mp, int *offp, int proto SCTP_UNUSED)
 {
 	struct mbuf *m;
 	int off;
 
 	m = *mp;
 	off = *offp;
 #if defined(__FreeBSD__) && defined(SCTP_MCORE_INPUT) && defined(SMP)
 	if (mp_ncpus > 1) {
 		struct ip *ip;
 		struct sctphdr *sh;
 		int offset;
 		int cpu_to_use;
 		uint32_t flowid, tag;
 
 		if (M_HASHTYPE_GET(m) != M_HASHTYPE_NONE) {
 			flowid = m->m_pkthdr.flowid;
 		} else {
 			/*
 			 * No flow id built by lower layers fix it so we
 			 * create one.
 			 */
 			offset = off + sizeof(struct sctphdr);
 			if (SCTP_BUF_LEN(m) < offset) {
 				if ((m = m_pullup(m, offset)) == NULL) {
 					SCTP_STAT_INCR(sctps_hdrops);
 					return (IPPROTO_DONE);
 				}
 			}
 			ip = mtod(m, struct ip *);
 			sh = (struct sctphdr *)((caddr_t)ip + off);
 			tag = htonl(sh->v_tag);
 			flowid = tag ^ ntohs(sh->dest_port) ^ ntohs(sh->src_port);
 			m->m_pkthdr.flowid = flowid;
 			M_HASHTYPE_SET(m, M_HASHTYPE_OPAQUE_HASH);
 		}
 		cpu_to_use = sctp_cpuarry[flowid % mp_ncpus];
 		sctp_queue_to_mcore(m, off, cpu_to_use);
 		return (IPPROTO_DONE);
 	}
 #endif
 	sctp_input_with_port(m, off, 0);
 	return (IPPROTO_DONE);
 }
 #endif
Index: head/sys/netinet/sctp_syscalls.c
===================================================================
--- head/sys/netinet/sctp_syscalls.c	(revision 319721)
+++ head/sys/netinet/sctp_syscalls.c	(revision 319722)
@@ -1,597 +1,579 @@
 /*-
  * Copyright (c) 1982, 1986, 1989, 1990, 1993
  *	The Regents of the University of California.  All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 3. Neither the name of the University nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include "opt_capsicum.h"
 #include "opt_inet.h"
 #include "opt_inet6.h"
 #include "opt_sctp.h"
 #include "opt_compat.h"
 #include "opt_ktrace.h"
 
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/capsicum.h>
 #include <sys/kernel.h>
 #include <sys/lock.h>
 #include <sys/mutex.h>
 #include <sys/sysproto.h>
 #include <sys/malloc.h>
 #include <sys/filedesc.h>
 #include <sys/event.h>
 #include <sys/proc.h>
 #include <sys/fcntl.h>
 #include <sys/file.h>
 #include <sys/filio.h>
 #include <sys/jail.h>
 #include <sys/mount.h>
 #include <sys/mbuf.h>
 #include <sys/protosw.h>
 #include <sys/sf_buf.h>
 #include <sys/sysent.h>
 #include <sys/socket.h>
 #include <sys/socketvar.h>
 #include <sys/signalvar.h>
 #include <sys/syscall.h>
 #include <sys/syscallsubr.h>
 #include <sys/sysctl.h>
 #include <sys/uio.h>
 #include <sys/vnode.h>
 #ifdef KTRACE
 #include <sys/ktrace.h>
 #endif
 #ifdef COMPAT_FREEBSD32
 #include <compat/freebsd32/freebsd32_util.h>
 #endif
 
 #include <net/vnet.h>
 
 #include <security/audit/audit.h>
 #include <security/mac/mac_framework.h>
 
 #include <netinet/sctp.h>
 #include <netinet/sctp_peeloff.h>
 
 static struct syscall_helper_data sctp_syscalls[] = {
 	SYSCALL_INIT_HELPER(sctp_peeloff),
 	SYSCALL_INIT_HELPER(sctp_generic_sendmsg),
 	SYSCALL_INIT_HELPER(sctp_generic_sendmsg_iov),
 	SYSCALL_INIT_HELPER(sctp_generic_recvmsg),
 	SYSCALL_INIT_LAST
 };
 
 static void
 sctp_syscalls_init(void *unused __unused)
 {
 	int error;
 
 	error = syscall_helper_register(sctp_syscalls, SY_THR_STATIC);
 	KASSERT((error == 0),
 	    ("%s: syscall_helper_register failed for sctp syscalls", __func__));
 #ifdef COMPAT_FREEBSD32
 	error = syscall32_helper_register(sctp_syscalls, SY_THR_STATIC);
 	KASSERT((error == 0),
 	    ("%s: syscall32_helper_register failed for sctp syscalls",
 	    __func__));
 #endif
 }
 SYSINIT(sctp_syscalls, SI_SUB_SYSCALLS, SI_ORDER_ANY, sctp_syscalls_init, NULL);
 
 /*
  * SCTP syscalls.
  * Functionality only compiled in if SCTP is defined in the kernel Makefile,
  * otherwise all return EOPNOTSUPP.
  * XXX: We should make this loadable one day.
  */
 int
 sys_sctp_peeloff(td, uap)
 	struct thread *td;
 	struct sctp_peeloff_args /* {
 		int	sd;
 		caddr_t	name;
 	} */ *uap;
 {
 #if (defined(INET) || defined(INET6)) && defined(SCTP)
 	struct file *headfp, *nfp = NULL;
 	struct socket *head, *so;
 	cap_rights_t rights;
 	u_int fflag;
 	int error, fd;
 
 	AUDIT_ARG_FD(uap->sd);
 	error = getsock_cap(td, uap->sd, cap_rights_init(&rights, CAP_PEELOFF),
 	    &headfp, &fflag, NULL);
 	if (error != 0)
 		goto done2;
 	head = headfp->f_data;
 	if (head->so_proto->pr_protocol != IPPROTO_SCTP) {
 		error = EOPNOTSUPP;
 		goto done;
 	}
 	error = sctp_can_peel_off(head, (sctp_assoc_t)uap->name);
 	if (error != 0)
 		goto done;
 	/*
 	 * At this point we know we do have a assoc to pull
 	 * we proceed to get the fd setup. This may block
 	 * but that is ok.
 	 */
 
 	error = falloc(td, &nfp, &fd, 0);
 	if (error != 0)
 		goto done;
 	td->td_retval[0] = fd;
 
 	CURVNET_SET(head->so_vnet);
-	so = sonewconn(head, SS_ISCONNECTED);
+	so = sopeeloff(head);
 	if (so == NULL) {
 		error = ENOMEM;
 		goto noconnection;
 	}
-	/*
-	 * Before changing the flags on the socket, we have to bump the
-	 * reference count.  Otherwise, if the protocol calls sofree(),
-	 * the socket will be released due to a zero refcount.
-	 */
-        SOCK_LOCK(so);
-        soref(so);                      /* file descriptor reference */
-        SOCK_UNLOCK(so);
-
-	ACCEPT_LOCK();
-
-	TAILQ_REMOVE(&head->so_comp, so, so_list);
-	head->so_qlen--;
-	so->so_state |= (head->so_state & SS_NBIO);
-	so->so_state &= ~SS_NOFDREF;
-	so->so_qstate &= ~SQ_COMP;
-	so->so_head = NULL;
-	ACCEPT_UNLOCK();
 	finit(nfp, fflag, DTYPE_SOCKET, so, &socketops);
 	error = sctp_do_peeloff(head, so, (sctp_assoc_t)uap->name);
 	if (error != 0)
 		goto noconnection;
 	if (head->so_sigio != NULL)
 		fsetown(fgetown(&head->so_sigio), &so->so_sigio);
 
 noconnection:
 	/*
 	 * close the new descriptor, assuming someone hasn't ripped it
 	 * out from under us.
 	 */
 	if (error != 0)
 		fdclose(td, nfp, fd);
 
 	/*
 	 * Release explicitly held references before returning.
 	 */
 	CURVNET_RESTORE();
 done:
 	if (nfp != NULL)
 		fdrop(nfp, td);
 	fdrop(headfp, td);
 done2:
 	return (error);
 #else  /* SCTP */
 	return (EOPNOTSUPP);
 #endif /* SCTP */
 }
 
 int
 sys_sctp_generic_sendmsg (td, uap)
 	struct thread *td;
 	struct sctp_generic_sendmsg_args /* {
 		int sd,
 		caddr_t msg,
 		int mlen,
 		caddr_t to,
 		__socklen_t tolen,
 		struct sctp_sndrcvinfo *sinfo,
 		int flags
 	} */ *uap;
 {
 #if (defined(INET) || defined(INET6)) && defined(SCTP)
 	struct sctp_sndrcvinfo sinfo, *u_sinfo = NULL;
 	struct socket *so;
 	struct file *fp = NULL;
 	struct sockaddr *to = NULL;
 #ifdef KTRACE
 	struct uio *ktruio = NULL;
 #endif
 	struct uio auio;
 	struct iovec iov[1];
 	cap_rights_t rights;
 	int error = 0, len;
 
 	if (uap->sinfo != NULL) {
 		error = copyin(uap->sinfo, &sinfo, sizeof (sinfo));
 		if (error != 0)
 			return (error);
 		u_sinfo = &sinfo;
 	}
 
 	cap_rights_init(&rights, CAP_SEND);
 	if (uap->tolen != 0) {
 		error = getsockaddr(&to, uap->to, uap->tolen);
 		if (error != 0) {
 			to = NULL;
 			goto sctp_bad2;
 		}
 		cap_rights_set(&rights, CAP_CONNECT);
 	}
 
 	AUDIT_ARG_FD(uap->sd);
 	error = getsock_cap(td, uap->sd, &rights, &fp, NULL, NULL);
 	if (error != 0)
 		goto sctp_bad;
 #ifdef KTRACE
 	if (to && (KTRPOINT(td, KTR_STRUCT)))
 		ktrsockaddr(to);
 #endif
 
 	iov[0].iov_base = uap->msg;
 	iov[0].iov_len = uap->mlen;
 
 	so = (struct socket *)fp->f_data;
 	if (so->so_proto->pr_protocol != IPPROTO_SCTP) {
 		error = EOPNOTSUPP;
 		goto sctp_bad;
 	}
 #ifdef MAC
 	error = mac_socket_check_send(td->td_ucred, so);
 	if (error != 0)
 		goto sctp_bad;
 #endif /* MAC */
 
 	auio.uio_iov =  iov;
 	auio.uio_iovcnt = 1;
 	auio.uio_segflg = UIO_USERSPACE;
 	auio.uio_rw = UIO_WRITE;
 	auio.uio_td = td;
 	auio.uio_offset = 0;			/* XXX */
 	auio.uio_resid = 0;
 #ifdef KTRACE
 	if (KTRPOINT(td, KTR_GENIO))
 		ktruio = cloneuio(&auio);
 #endif /* KTRACE */
 	len = auio.uio_resid = uap->mlen;
 	CURVNET_SET(so->so_vnet);
 	error = sctp_lower_sosend(so, to, &auio, (struct mbuf *)NULL,
 	    (struct mbuf *)NULL, uap->flags, u_sinfo, td);
 	CURVNET_RESTORE();
 	if (error != 0) {
 		if (auio.uio_resid != len && (error == ERESTART ||
 		    error == EINTR || error == EWOULDBLOCK))
 			error = 0;
 		/* Generation of SIGPIPE can be controlled per socket. */
 		if (error == EPIPE && !(so->so_options & SO_NOSIGPIPE) &&
 		    !(uap->flags & MSG_NOSIGNAL)) {
 			PROC_LOCK(td->td_proc);
 			tdsignal(td, SIGPIPE);
 			PROC_UNLOCK(td->td_proc);
 		}
 	}
 	if (error == 0)
 		td->td_retval[0] = len - auio.uio_resid;
 #ifdef KTRACE
 	if (ktruio != NULL) {
 		ktruio->uio_resid = td->td_retval[0];
 		ktrgenio(uap->sd, UIO_WRITE, ktruio, error);
 	}
 #endif /* KTRACE */
 sctp_bad:
 	if (fp != NULL)
 		fdrop(fp, td);
 sctp_bad2:
 	free(to, M_SONAME);
 	return (error);
 #else  /* SCTP */
 	return (EOPNOTSUPP);
 #endif /* SCTP */
 }
 
 int
 sys_sctp_generic_sendmsg_iov(td, uap)
 	struct thread *td;
 	struct sctp_generic_sendmsg_iov_args /* {
 		int sd,
 		struct iovec *iov,
 		int iovlen,
 		caddr_t to,
 		__socklen_t tolen,
 		struct sctp_sndrcvinfo *sinfo,
 		int flags
 	} */ *uap;
 {
 #if (defined(INET) || defined(INET6)) && defined(SCTP)
 	struct sctp_sndrcvinfo sinfo, *u_sinfo = NULL;
 	struct socket *so;
 	struct file *fp = NULL;
 	struct sockaddr *to = NULL;
 #ifdef KTRACE
 	struct uio *ktruio = NULL;
 #endif
 	struct uio auio;
 	struct iovec *iov, *tiov;
 	cap_rights_t rights;
 	ssize_t len;
 	int error, i;
 
 	if (uap->sinfo != NULL) {
 		error = copyin(uap->sinfo, &sinfo, sizeof (sinfo));
 		if (error != 0)
 			return (error);
 		u_sinfo = &sinfo;
 	}
 	cap_rights_init(&rights, CAP_SEND);
 	if (uap->tolen != 0) {
 		error = getsockaddr(&to, uap->to, uap->tolen);
 		if (error != 0) {
 			to = NULL;
 			goto sctp_bad2;
 		}
 		cap_rights_set(&rights, CAP_CONNECT);
 	}
 
 	AUDIT_ARG_FD(uap->sd);
 	error = getsock_cap(td, uap->sd, &rights, &fp, NULL, NULL);
 	if (error != 0)
 		goto sctp_bad1;
 
 #ifdef COMPAT_FREEBSD32
 	if (SV_CURPROC_FLAG(SV_ILP32))
 		error = freebsd32_copyiniov((struct iovec32 *)uap->iov,
 		    uap->iovlen, &iov, EMSGSIZE);
 	else
 #endif
 		error = copyiniov(uap->iov, uap->iovlen, &iov, EMSGSIZE);
 	if (error != 0)
 		goto sctp_bad1;
 #ifdef KTRACE
 	if (to && (KTRPOINT(td, KTR_STRUCT)))
 		ktrsockaddr(to);
 #endif
 
 	so = (struct socket *)fp->f_data;
 	if (so->so_proto->pr_protocol != IPPROTO_SCTP) {
 		error = EOPNOTSUPP;
 		goto sctp_bad;
 	}
 #ifdef MAC
 	error = mac_socket_check_send(td->td_ucred, so);
 	if (error != 0)
 		goto sctp_bad;
 #endif /* MAC */
 
 	auio.uio_iov = iov;
 	auio.uio_iovcnt = uap->iovlen;
 	auio.uio_segflg = UIO_USERSPACE;
 	auio.uio_rw = UIO_WRITE;
 	auio.uio_td = td;
 	auio.uio_offset = 0;			/* XXX */
 	auio.uio_resid = 0;
 	tiov = iov;
 	for (i = 0; i <uap->iovlen; i++, tiov++) {
 		if ((auio.uio_resid += tiov->iov_len) < 0) {
 			error = EINVAL;
 			goto sctp_bad;
 		}
 	}
 #ifdef KTRACE
 	if (KTRPOINT(td, KTR_GENIO))
 		ktruio = cloneuio(&auio);
 #endif /* KTRACE */
 	len = auio.uio_resid;
 	CURVNET_SET(so->so_vnet);
 	error = sctp_lower_sosend(so, to, &auio,
 		    (struct mbuf *)NULL, (struct mbuf *)NULL,
 		    uap->flags, u_sinfo, td);
 	CURVNET_RESTORE();
 	if (error != 0) {
 		if (auio.uio_resid != len && (error == ERESTART ||
 		    error == EINTR || error == EWOULDBLOCK))
 			error = 0;
 		/* Generation of SIGPIPE can be controlled per socket */
 		if (error == EPIPE && !(so->so_options & SO_NOSIGPIPE) &&
 		    !(uap->flags & MSG_NOSIGNAL)) {
 			PROC_LOCK(td->td_proc);
 			tdsignal(td, SIGPIPE);
 			PROC_UNLOCK(td->td_proc);
 		}
 	}
 	if (error == 0)
 		td->td_retval[0] = len - auio.uio_resid;
 #ifdef KTRACE
 	if (ktruio != NULL) {
 		ktruio->uio_resid = td->td_retval[0];
 		ktrgenio(uap->sd, UIO_WRITE, ktruio, error);
 	}
 #endif /* KTRACE */
 sctp_bad:
 	free(iov, M_IOV);
 sctp_bad1:
 	if (fp != NULL)
 		fdrop(fp, td);
 sctp_bad2:
 	free(to, M_SONAME);
 	return (error);
 #else  /* SCTP */
 	return (EOPNOTSUPP);
 #endif /* SCTP */
 }
 
 int
 sys_sctp_generic_recvmsg(td, uap)
 	struct thread *td;
 	struct sctp_generic_recvmsg_args /* {
 		int sd,
 		struct iovec *iov,
 		int iovlen,
 		struct sockaddr *from,
 		__socklen_t *fromlenaddr,
 		struct sctp_sndrcvinfo *sinfo,
 		int *msg_flags
 	} */ *uap;
 {
 #if (defined(INET) || defined(INET6)) && defined(SCTP)
 	uint8_t sockbufstore[256];
 	struct uio auio;
 	struct iovec *iov, *tiov;
 	struct sctp_sndrcvinfo sinfo;
 	struct socket *so;
 	struct file *fp = NULL;
 	struct sockaddr *fromsa;
 	cap_rights_t rights;
 #ifdef KTRACE
 	struct uio *ktruio = NULL;
 #endif
 	ssize_t len;
 	int error, fromlen, i, msg_flags;
 
 	AUDIT_ARG_FD(uap->sd);
 	error = getsock_cap(td, uap->sd, cap_rights_init(&rights, CAP_RECV),
 	    &fp, NULL, NULL);
 	if (error != 0)
 		return (error);
 #ifdef COMPAT_FREEBSD32
 	if (SV_CURPROC_FLAG(SV_ILP32))
 		error = freebsd32_copyiniov((struct iovec32 *)uap->iov,
 		    uap->iovlen, &iov, EMSGSIZE);
 	else
 #endif
 		error = copyiniov(uap->iov, uap->iovlen, &iov, EMSGSIZE);
 	if (error != 0)
 		goto out1;
 
 	so = fp->f_data;
 	if (so->so_proto->pr_protocol != IPPROTO_SCTP) {
 		error = EOPNOTSUPP;
 		goto out;
 	}
 #ifdef MAC
 	error = mac_socket_check_receive(td->td_ucred, so);
 	if (error != 0)
 		goto out;
 #endif /* MAC */
 
 	if (uap->fromlenaddr != NULL) {
 		error = copyin(uap->fromlenaddr, &fromlen, sizeof (fromlen));
 		if (error != 0)
 			goto out;
 	} else {
 		fromlen = 0;
 	}
 	if (uap->msg_flags) {
 		error = copyin(uap->msg_flags, &msg_flags, sizeof (int));
 		if (error != 0)
 			goto out;
 	} else {
 		msg_flags = 0;
 	}
 	auio.uio_iov = iov;
 	auio.uio_iovcnt = uap->iovlen;
 	auio.uio_segflg = UIO_USERSPACE;
 	auio.uio_rw = UIO_READ;
 	auio.uio_td = td;
 	auio.uio_offset = 0;			/* XXX */
 	auio.uio_resid = 0;
 	tiov = iov;
 	for (i = 0; i <uap->iovlen; i++, tiov++) {
 		if ((auio.uio_resid += tiov->iov_len) < 0) {
 			error = EINVAL;
 			goto out;
 		}
 	}
 	len = auio.uio_resid;
 	fromsa = (struct sockaddr *)sockbufstore;
 
 #ifdef KTRACE
 	if (KTRPOINT(td, KTR_GENIO))
 		ktruio = cloneuio(&auio);
 #endif /* KTRACE */
 	memset(&sinfo, 0, sizeof(struct sctp_sndrcvinfo));
 	CURVNET_SET(so->so_vnet);
 	error = sctp_sorecvmsg(so, &auio, (struct mbuf **)NULL,
 		    fromsa, fromlen, &msg_flags,
 		    (struct sctp_sndrcvinfo *)&sinfo, 1);
 	CURVNET_RESTORE();
 	if (error != 0) {
 		if (auio.uio_resid != len && (error == ERESTART ||
 		    error == EINTR || error == EWOULDBLOCK))
 			error = 0;
 	} else {
 		if (uap->sinfo)
 			error = copyout(&sinfo, uap->sinfo, sizeof (sinfo));
 	}
 #ifdef KTRACE
 	if (ktruio != NULL) {
 		ktruio->uio_resid = len - auio.uio_resid;
 		ktrgenio(uap->sd, UIO_READ, ktruio, error);
 	}
 #endif /* KTRACE */
 	if (error != 0)
 		goto out;
 	td->td_retval[0] = len - auio.uio_resid;
 
 	if (fromlen && uap->from) {
 		len = fromlen;
 		if (len <= 0 || fromsa == NULL)
 			len = 0;
 		else {
 			len = MIN(len, fromsa->sa_len);
 			error = copyout(fromsa, uap->from, (size_t)len);
 			if (error != 0)
 				goto out;
 		}
 		error = copyout(&len, uap->fromlenaddr, sizeof (socklen_t));
 		if (error != 0)
 			goto out;
 	}
 #ifdef KTRACE
 	if (KTRPOINT(td, KTR_STRUCT))
 		ktrsockaddr(fromsa);
 #endif
 	if (uap->msg_flags) {
 		error = copyout(&msg_flags, uap->msg_flags, sizeof (int));
 		if (error != 0)
 			goto out;
 	}
 out:
 	free(iov, M_IOV);
 out1:
 	if (fp != NULL)
 		fdrop(fp, td);
 
 	return (error);
 #else  /* SCTP */
 	return (EOPNOTSUPP);
 #endif /* SCTP */
 }
Index: head/sys/netinet/sctp_sysctl.c
===================================================================
--- head/sys/netinet/sctp_sysctl.c	(revision 319721)
+++ head/sys/netinet/sctp_sysctl.c	(revision 319722)
@@ -1,945 +1,945 @@
 /*-
  * Copyright (c) 2007, by Cisco Systems, Inc. All rights reserved.
  * Copyright (c) 2008-2012, by Randall Stewart. All rights reserved.
  * Copyright (c) 2008-2012, by Michael Tuexen. All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions are met:
  *
  * a) Redistributions of source code must retain the above copyright notice,
  *    this list of conditions and the following disclaimer.
  *
  * b) Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in
  *    the documentation and/or other materials provided with the distribution.
  *
  * c) Neither the name of Cisco Systems, Inc. nor the names of its
  *    contributors may be used to endorse or promote products derived
  *    from this software without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
  * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
  * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
  * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
  * THE POSSIBILITY OF SUCH DAMAGE.
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include <netinet/sctp_os.h>
 #include <netinet/sctp.h>
 #include <netinet/sctp_constants.h>
 #include <netinet/sctp_sysctl.h>
 #include <netinet/sctp_pcb.h>
 #include <netinet/sctputil.h>
 #include <netinet/sctp_output.h>
 #include <sys/smp.h>
 #include <sys/sysctl.h>
 
 FEATURE(sctp, "Stream Control Transmission Protocol");
 
 /*
  * sysctl tunable variables
  */
 
 void
 sctp_init_sysctls()
 {
 	SCTP_BASE_SYSCTL(sctp_sendspace) = SCTPCTL_MAXDGRAM_DEFAULT;
 	SCTP_BASE_SYSCTL(sctp_recvspace) = SCTPCTL_RECVSPACE_DEFAULT;
 	SCTP_BASE_SYSCTL(sctp_auto_asconf) = SCTPCTL_AUTOASCONF_DEFAULT;
 	SCTP_BASE_SYSCTL(sctp_multiple_asconfs) = SCTPCTL_MULTIPLEASCONFS_DEFAULT;
 	SCTP_BASE_SYSCTL(sctp_ecn_enable) = SCTPCTL_ECN_ENABLE_DEFAULT;
 	SCTP_BASE_SYSCTL(sctp_pr_enable) = SCTPCTL_PR_ENABLE_DEFAULT;
 	SCTP_BASE_SYSCTL(sctp_auth_enable) = SCTPCTL_AUTH_ENABLE_DEFAULT;
 	SCTP_BASE_SYSCTL(sctp_asconf_enable) = SCTPCTL_ASCONF_ENABLE_DEFAULT;
 	SCTP_BASE_SYSCTL(sctp_reconfig_enable) = SCTPCTL_RECONFIG_ENABLE_DEFAULT;
 	SCTP_BASE_SYSCTL(sctp_nrsack_enable) = SCTPCTL_NRSACK_ENABLE_DEFAULT;
 	SCTP_BASE_SYSCTL(sctp_pktdrop_enable) = SCTPCTL_PKTDROP_ENABLE_DEFAULT;
 	SCTP_BASE_SYSCTL(sctp_peer_chunk_oh) = SCTPCTL_PEER_CHKOH_DEFAULT;
 	SCTP_BASE_SYSCTL(sctp_max_burst_default) = SCTPCTL_MAXBURST_DEFAULT;
 	SCTP_BASE_SYSCTL(sctp_fr_max_burst_default) = SCTPCTL_FRMAXBURST_DEFAULT;
 	SCTP_BASE_SYSCTL(sctp_max_chunks_on_queue) = SCTPCTL_MAXCHUNKS_DEFAULT;
 	SCTP_BASE_SYSCTL(sctp_hashtblsize) = SCTPCTL_TCBHASHSIZE_DEFAULT;
 	SCTP_BASE_SYSCTL(sctp_pcbtblsize) = SCTPCTL_PCBHASHSIZE_DEFAULT;
 	SCTP_BASE_SYSCTL(sctp_min_split_point) = SCTPCTL_MIN_SPLIT_POINT_DEFAULT;
 	SCTP_BASE_SYSCTL(sctp_chunkscale) = SCTPCTL_CHUNKSCALE_DEFAULT;
 	SCTP_BASE_SYSCTL(sctp_delayed_sack_time_default) = SCTPCTL_DELAYED_SACK_TIME_DEFAULT;
 	SCTP_BASE_SYSCTL(sctp_sack_freq_default) = SCTPCTL_SACK_FREQ_DEFAULT;
 	SCTP_BASE_SYSCTL(sctp_system_free_resc_limit) = SCTPCTL_SYS_RESOURCE_DEFAULT;
 	SCTP_BASE_SYSCTL(sctp_asoc_free_resc_limit) = SCTPCTL_ASOC_RESOURCE_DEFAULT;
 	SCTP_BASE_SYSCTL(sctp_heartbeat_interval_default) = SCTPCTL_HEARTBEAT_INTERVAL_DEFAULT;
 	SCTP_BASE_SYSCTL(sctp_pmtu_raise_time_default) = SCTPCTL_PMTU_RAISE_TIME_DEFAULT;
 	SCTP_BASE_SYSCTL(sctp_shutdown_guard_time_default) = SCTPCTL_SHUTDOWN_GUARD_TIME_DEFAULT;
 	SCTP_BASE_SYSCTL(sctp_secret_lifetime_default) = SCTPCTL_SECRET_LIFETIME_DEFAULT;
 	SCTP_BASE_SYSCTL(sctp_rto_max_default) = SCTPCTL_RTO_MAX_DEFAULT;
 	SCTP_BASE_SYSCTL(sctp_rto_min_default) = SCTPCTL_RTO_MIN_DEFAULT;
 	SCTP_BASE_SYSCTL(sctp_rto_initial_default) = SCTPCTL_RTO_INITIAL_DEFAULT;
 	SCTP_BASE_SYSCTL(sctp_init_rto_max_default) = SCTPCTL_INIT_RTO_MAX_DEFAULT;
 	SCTP_BASE_SYSCTL(sctp_valid_cookie_life_default) = SCTPCTL_VALID_COOKIE_LIFE_DEFAULT;
 	SCTP_BASE_SYSCTL(sctp_init_rtx_max_default) = SCTPCTL_INIT_RTX_MAX_DEFAULT;
 	SCTP_BASE_SYSCTL(sctp_assoc_rtx_max_default) = SCTPCTL_ASSOC_RTX_MAX_DEFAULT;
 	SCTP_BASE_SYSCTL(sctp_path_rtx_max_default) = SCTPCTL_PATH_RTX_MAX_DEFAULT;
 	SCTP_BASE_SYSCTL(sctp_path_pf_threshold) = SCTPCTL_PATH_PF_THRESHOLD_DEFAULT;
 	SCTP_BASE_SYSCTL(sctp_add_more_threshold) = SCTPCTL_ADD_MORE_ON_OUTPUT_DEFAULT;
 	SCTP_BASE_SYSCTL(sctp_nr_incoming_streams_default) = SCTPCTL_INCOMING_STREAMS_DEFAULT;
 	SCTP_BASE_SYSCTL(sctp_nr_outgoing_streams_default) = SCTPCTL_OUTGOING_STREAMS_DEFAULT;
 	SCTP_BASE_SYSCTL(sctp_cmt_on_off) = SCTPCTL_CMT_ON_OFF_DEFAULT;
 	SCTP_BASE_SYSCTL(sctp_cmt_use_dac) = SCTPCTL_CMT_USE_DAC_DEFAULT;
 	SCTP_BASE_SYSCTL(sctp_use_cwnd_based_maxburst) = SCTPCTL_CWND_MAXBURST_DEFAULT;
 	SCTP_BASE_SYSCTL(sctp_nat_friendly) = SCTPCTL_NAT_FRIENDLY_DEFAULT;
 	SCTP_BASE_SYSCTL(sctp_L2_abc_variable) = SCTPCTL_ABC_L_VAR_DEFAULT;
 	SCTP_BASE_SYSCTL(sctp_mbuf_threshold_count) = SCTPCTL_MAX_CHAINED_MBUFS_DEFAULT;
 	SCTP_BASE_SYSCTL(sctp_do_drain) = SCTPCTL_DO_SCTP_DRAIN_DEFAULT;
 	SCTP_BASE_SYSCTL(sctp_hb_maxburst) = SCTPCTL_HB_MAX_BURST_DEFAULT;
 	SCTP_BASE_SYSCTL(sctp_abort_if_one_2_one_hits_limit) = SCTPCTL_ABORT_AT_LIMIT_DEFAULT;
 	SCTP_BASE_SYSCTL(sctp_min_residual) = SCTPCTL_MIN_RESIDUAL_DEFAULT;
 	SCTP_BASE_SYSCTL(sctp_max_retran_chunk) = SCTPCTL_MAX_RETRAN_CHUNK_DEFAULT;
 	SCTP_BASE_SYSCTL(sctp_logging_level) = SCTPCTL_LOGGING_LEVEL_DEFAULT;
 	SCTP_BASE_SYSCTL(sctp_default_cc_module) = SCTPCTL_DEFAULT_CC_MODULE_DEFAULT;
 	SCTP_BASE_SYSCTL(sctp_default_ss_module) = SCTPCTL_DEFAULT_SS_MODULE_DEFAULT;
 	SCTP_BASE_SYSCTL(sctp_default_frag_interleave) = SCTPCTL_DEFAULT_FRAG_INTERLEAVE_DEFAULT;
 	SCTP_BASE_SYSCTL(sctp_mobility_base) = SCTPCTL_MOBILITY_BASE_DEFAULT;
 	SCTP_BASE_SYSCTL(sctp_mobility_fasthandoff) = SCTPCTL_MOBILITY_FASTHANDOFF_DEFAULT;
 	SCTP_BASE_SYSCTL(sctp_vtag_time_wait) = SCTPCTL_TIME_WAIT_DEFAULT;
 	SCTP_BASE_SYSCTL(sctp_buffer_splitting) = SCTPCTL_BUFFER_SPLITTING_DEFAULT;
 	SCTP_BASE_SYSCTL(sctp_initial_cwnd) = SCTPCTL_INITIAL_CWND_DEFAULT;
 	SCTP_BASE_SYSCTL(sctp_rttvar_bw) = SCTPCTL_RTTVAR_BW_DEFAULT;
 	SCTP_BASE_SYSCTL(sctp_rttvar_rtt) = SCTPCTL_RTTVAR_RTT_DEFAULT;
 	SCTP_BASE_SYSCTL(sctp_rttvar_eqret) = SCTPCTL_RTTVAR_EQRET_DEFAULT;
 	SCTP_BASE_SYSCTL(sctp_steady_step) = SCTPCTL_RTTVAR_STEADYS_DEFAULT;
 	SCTP_BASE_SYSCTL(sctp_use_dccc_ecn) = SCTPCTL_RTTVAR_DCCCECN_DEFAULT;
 	SCTP_BASE_SYSCTL(sctp_blackhole) = SCTPCTL_BLACKHOLE_DEFAULT;
 	SCTP_BASE_SYSCTL(sctp_diag_info_code) = SCTPCTL_DIAG_INFO_CODE_DEFAULT;
 #if defined(SCTP_LOCAL_TRACE_BUF)
 	memset(&SCTP_BASE_SYSCTL(sctp_log), 0, sizeof(struct sctp_log));
 #endif
 	SCTP_BASE_SYSCTL(sctp_udp_tunneling_port) = SCTPCTL_UDP_TUNNELING_PORT_DEFAULT;
 	SCTP_BASE_SYSCTL(sctp_enable_sack_immediately) = SCTPCTL_SACK_IMMEDIATELY_ENABLE_DEFAULT;
 	SCTP_BASE_SYSCTL(sctp_inits_include_nat_friendly) = SCTPCTL_NAT_FRIENDLY_INITS_DEFAULT;
 #if defined(SCTP_DEBUG)
 	SCTP_BASE_SYSCTL(sctp_debug_on) = SCTPCTL_DEBUG_DEFAULT;
 #endif
 #if defined(__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
 	SCTP_BASE_SYSCTL(sctp_output_unlocked) = SCTPCTL_OUTPUT_UNLOCKED_DEFAULT;
 #endif
 }
 
 
 /* It returns an upper limit. No filtering is done here */
 static unsigned int
 sctp_sysctl_number_of_addresses(struct sctp_inpcb *inp)
 {
 	unsigned int cnt;
 	struct sctp_vrf *vrf;
 	struct sctp_ifn *sctp_ifn;
 	struct sctp_ifa *sctp_ifa;
 	struct sctp_laddr *laddr;
 
 	cnt = 0;
 	/* neither Mac OS X nor FreeBSD support mulitple routing functions */
 	if ((vrf = sctp_find_vrf(inp->def_vrf_id)) == NULL) {
 		return (0);
 	}
 	if (inp->sctp_flags & SCTP_PCB_FLAGS_BOUNDALL) {
 		LIST_FOREACH(sctp_ifn, &vrf->ifnlist, next_ifn) {
 			LIST_FOREACH(sctp_ifa, &sctp_ifn->ifalist, next_ifa) {
 				switch (sctp_ifa->address.sa.sa_family) {
 #ifdef INET
 				case AF_INET:
 #endif
 #ifdef INET6
 				case AF_INET6:
 #endif
 					cnt++;
 					break;
 				default:
 					break;
 				}
 			}
 		}
 	} else {
 		LIST_FOREACH(laddr, &inp->sctp_addr_list, sctp_nxt_addr) {
 			switch (laddr->ifa->address.sa.sa_family) {
 #ifdef INET
 			case AF_INET:
 #endif
 #ifdef INET6
 			case AF_INET6:
 #endif
 				cnt++;
 				break;
 			default:
 				break;
 			}
 		}
 	}
 	return (cnt);
 }
 
 static int
 sctp_sysctl_copy_out_local_addresses(struct sctp_inpcb *inp, struct sctp_tcb *stcb, struct sysctl_req *req)
 {
 	struct sctp_ifn *sctp_ifn;
 	struct sctp_ifa *sctp_ifa;
 	int loopback_scope, ipv4_local_scope, local_scope, site_scope;
 	int ipv4_addr_legal, ipv6_addr_legal;
 	struct sctp_vrf *vrf;
 	struct xsctp_laddr xladdr;
 	struct sctp_laddr *laddr;
 	int error;
 
 	/* Turn on all the appropriate scope */
 	if (stcb) {
 		/* use association specific values */
 		loopback_scope = stcb->asoc.scope.loopback_scope;
 		ipv4_local_scope = stcb->asoc.scope.ipv4_local_scope;
 		local_scope = stcb->asoc.scope.local_scope;
 		site_scope = stcb->asoc.scope.site_scope;
 		ipv4_addr_legal = stcb->asoc.scope.ipv4_addr_legal;
 		ipv6_addr_legal = stcb->asoc.scope.ipv6_addr_legal;
 	} else {
 		/* Use generic values for endpoints. */
 		loopback_scope = 1;
 		ipv4_local_scope = 1;
 		local_scope = 1;
 		site_scope = 1;
 		if (inp->sctp_flags & SCTP_PCB_FLAGS_BOUND_V6) {
 			ipv6_addr_legal = 1;
 			if (SCTP_IPV6_V6ONLY(inp)) {
 				ipv4_addr_legal = 0;
 			} else {
 				ipv4_addr_legal = 1;
 			}
 		} else {
 			ipv6_addr_legal = 0;
 			ipv4_addr_legal = 1;
 		}
 	}
 
 	/* neither Mac OS X nor FreeBSD support mulitple routing functions */
 	if ((vrf = sctp_find_vrf(inp->def_vrf_id)) == NULL) {
 		SCTP_INP_RUNLOCK(inp);
 		SCTP_INP_INFO_RUNLOCK();
 		return (-1);
 	}
 	if (inp->sctp_flags & SCTP_PCB_FLAGS_BOUNDALL) {
 		LIST_FOREACH(sctp_ifn, &vrf->ifnlist, next_ifn) {
 			if ((loopback_scope == 0) && SCTP_IFN_IS_IFT_LOOP(sctp_ifn))
 				/* Skip loopback if loopback_scope not set */
 				continue;
 			LIST_FOREACH(sctp_ifa, &sctp_ifn->ifalist, next_ifa) {
 				if (stcb) {
 					/*
 					 * ignore if blacklisted at
 					 * association level
 					 */
 					if (sctp_is_addr_restricted(stcb, sctp_ifa))
 						continue;
 				}
 				switch (sctp_ifa->address.sa.sa_family) {
 #ifdef INET
 				case AF_INET:
 					if (ipv4_addr_legal) {
 						struct sockaddr_in *sin;
 
 						sin = &sctp_ifa->address.sin;
 						if (sin->sin_addr.s_addr == 0)
 							continue;
 						if (prison_check_ip4(inp->ip_inp.inp.inp_cred,
 						    &sin->sin_addr) != 0) {
 							continue;
 						}
 						if ((ipv4_local_scope == 0) && (IN4_ISPRIVATE_ADDRESS(&sin->sin_addr)))
 							continue;
 					} else {
 						continue;
 					}
 					break;
 #endif
 #ifdef INET6
 				case AF_INET6:
 					if (ipv6_addr_legal) {
 						struct sockaddr_in6 *sin6;
 
 						sin6 = &sctp_ifa->address.sin6;
 						if (IN6_IS_ADDR_UNSPECIFIED(&sin6->sin6_addr))
 							continue;
 						if (prison_check_ip6(inp->ip_inp.inp.inp_cred,
 						    &sin6->sin6_addr) != 0) {
 							continue;
 						}
 						if (IN6_IS_ADDR_LINKLOCAL(&sin6->sin6_addr)) {
 							if (local_scope == 0)
 								continue;
 						}
 						if ((site_scope == 0) && (IN6_IS_ADDR_SITELOCAL(&sin6->sin6_addr)))
 							continue;
 					} else {
 						continue;
 					}
 					break;
 #endif
 				default:
 					continue;
 				}
 				memset((void *)&xladdr, 0, sizeof(struct xsctp_laddr));
 				memcpy((void *)&xladdr.address, (const void *)&sctp_ifa->address, sizeof(union sctp_sockstore));
 				SCTP_INP_RUNLOCK(inp);
 				SCTP_INP_INFO_RUNLOCK();
 				error = SYSCTL_OUT(req, &xladdr, sizeof(struct xsctp_laddr));
 				if (error) {
 					return (error);
 				} else {
 					SCTP_INP_INFO_RLOCK();
 					SCTP_INP_RLOCK(inp);
 				}
 			}
 		}
 	} else {
 		LIST_FOREACH(laddr, &inp->sctp_addr_list, sctp_nxt_addr) {
 			/* ignore if blacklisted at association level */
 			if (stcb && sctp_is_addr_restricted(stcb, laddr->ifa))
 				continue;
 			memset((void *)&xladdr, 0, sizeof(struct xsctp_laddr));
 			memcpy((void *)&xladdr.address, (const void *)&laddr->ifa->address, sizeof(union sctp_sockstore));
 			xladdr.start_time.tv_sec = (uint32_t)laddr->start_time.tv_sec;
 			xladdr.start_time.tv_usec = (uint32_t)laddr->start_time.tv_usec;
 			SCTP_INP_RUNLOCK(inp);
 			SCTP_INP_INFO_RUNLOCK();
 			error = SYSCTL_OUT(req, &xladdr, sizeof(struct xsctp_laddr));
 			if (error) {
 				return (error);
 			} else {
 				SCTP_INP_INFO_RLOCK();
 				SCTP_INP_RLOCK(inp);
 			}
 		}
 	}
 	memset((void *)&xladdr, 0, sizeof(struct xsctp_laddr));
 	xladdr.last = 1;
 	SCTP_INP_RUNLOCK(inp);
 	SCTP_INP_INFO_RUNLOCK();
 	error = SYSCTL_OUT(req, &xladdr, sizeof(struct xsctp_laddr));
 
 	if (error) {
 		return (error);
 	} else {
 		SCTP_INP_INFO_RLOCK();
 		SCTP_INP_RLOCK(inp);
 		return (0);
 	}
 }
 
 /*
  * sysctl functions
  */
 static int
 sctp_sysctl_handle_assoclist(SYSCTL_HANDLER_ARGS)
 {
 	unsigned int number_of_endpoints;
 	unsigned int number_of_local_addresses;
 	unsigned int number_of_associations;
 	unsigned int number_of_remote_addresses;
 	unsigned int n;
 	int error;
 	struct sctp_inpcb *inp;
 	struct sctp_tcb *stcb;
 	struct sctp_nets *net;
 	struct xsctp_inpcb xinpcb;
 	struct xsctp_tcb xstcb;
 	struct xsctp_raddr xraddr;
 	struct socket *so;
 
 	number_of_endpoints = 0;
 	number_of_local_addresses = 0;
 	number_of_associations = 0;
 	number_of_remote_addresses = 0;
 
 	SCTP_INP_INFO_RLOCK();
 	if (req->oldptr == NULL) {
 		LIST_FOREACH(inp, &SCTP_BASE_INFO(listhead), sctp_list) {
 			SCTP_INP_RLOCK(inp);
 			number_of_endpoints++;
 			number_of_local_addresses += sctp_sysctl_number_of_addresses(inp);
 			LIST_FOREACH(stcb, &inp->sctp_asoc_list, sctp_tcblist) {
 				number_of_associations++;
 				number_of_local_addresses += sctp_sysctl_number_of_addresses(inp);
 				TAILQ_FOREACH(net, &stcb->asoc.nets, sctp_next) {
 					number_of_remote_addresses++;
 				}
 			}
 			SCTP_INP_RUNLOCK(inp);
 		}
 		SCTP_INP_INFO_RUNLOCK();
 		n = (number_of_endpoints + 1) * sizeof(struct xsctp_inpcb) +
 		    (number_of_local_addresses + number_of_endpoints + number_of_associations) * sizeof(struct xsctp_laddr) +
 		    (number_of_associations + number_of_endpoints) * sizeof(struct xsctp_tcb) +
 		    (number_of_remote_addresses + number_of_associations) * sizeof(struct xsctp_raddr);
 
 		/* request some more memory than needed */
 		req->oldidx = (n + n / 8);
 		return (0);
 	}
 	if (req->newptr != NULL) {
 		SCTP_INP_INFO_RUNLOCK();
 		SCTP_LTRACE_ERR_RET(NULL, NULL, NULL, SCTP_FROM_SCTP_SYSCTL, EPERM);
 		return (EPERM);
 	}
 	LIST_FOREACH(inp, &SCTP_BASE_INFO(listhead), sctp_list) {
 		SCTP_INP_RLOCK(inp);
 		if (inp->sctp_flags & SCTP_PCB_FLAGS_SOCKET_ALLGONE) {
 			/* if its allgone it is being freed - skip it  */
 			goto skip;
 		}
 		xinpcb.last = 0;
 		xinpcb.local_port = ntohs(inp->sctp_lport);
 		xinpcb.flags = inp->sctp_flags;
 		xinpcb.features = inp->sctp_features;
 		xinpcb.total_sends = inp->total_sends;
 		xinpcb.total_recvs = inp->total_recvs;
 		xinpcb.total_nospaces = inp->total_nospaces;
 		xinpcb.fragmentation_point = inp->sctp_frag_point;
 		xinpcb.socket = inp->sctp_socket;
 		so = inp->sctp_socket;
 		if ((so == NULL) ||
 		    (!SCTP_IS_LISTENING(inp)) ||
 		    (inp->sctp_flags & SCTP_PCB_FLAGS_SOCKET_GONE)) {
 			xinpcb.qlen = 0;
 			xinpcb.maxqlen = 0;
 		} else {
-			xinpcb.qlen = so->so_qlen;
-			xinpcb.qlen_old = so->so_qlen > USHRT_MAX ?
-			    USHRT_MAX : (uint16_t)so->so_qlen;
-			xinpcb.maxqlen = so->so_qlimit;
-			xinpcb.maxqlen_old = so->so_qlimit > USHRT_MAX ?
-			    USHRT_MAX : (uint16_t)so->so_qlimit;
+			xinpcb.qlen = so->sol_qlen;
+			xinpcb.qlen_old = so->sol_qlen > USHRT_MAX ?
+			    USHRT_MAX : (uint16_t)so->sol_qlen;
+			xinpcb.maxqlen = so->sol_qlimit;
+			xinpcb.maxqlen_old = so->sol_qlimit > USHRT_MAX ?
+			    USHRT_MAX : (uint16_t)so->sol_qlimit;
 		}
 		SCTP_INP_INCR_REF(inp);
 		SCTP_INP_RUNLOCK(inp);
 		SCTP_INP_INFO_RUNLOCK();
 		error = SYSCTL_OUT(req, &xinpcb, sizeof(struct xsctp_inpcb));
 		if (error) {
 			SCTP_INP_DECR_REF(inp);
 			return (error);
 		}
 		SCTP_INP_INFO_RLOCK();
 		SCTP_INP_RLOCK(inp);
 		error = sctp_sysctl_copy_out_local_addresses(inp, NULL, req);
 		if (error) {
 			SCTP_INP_DECR_REF(inp);
 			return (error);
 		}
 		LIST_FOREACH(stcb, &inp->sctp_asoc_list, sctp_tcblist) {
 			SCTP_TCB_LOCK(stcb);
 			atomic_add_int(&stcb->asoc.refcnt, 1);
 			SCTP_TCB_UNLOCK(stcb);
 			xstcb.last = 0;
 			xstcb.local_port = ntohs(inp->sctp_lport);
 			xstcb.remote_port = ntohs(stcb->rport);
 			if (stcb->asoc.primary_destination != NULL)
 				xstcb.primary_addr = stcb->asoc.primary_destination->ro._l_addr;
 			xstcb.heartbeat_interval = stcb->asoc.heart_beat_delay;
 			xstcb.state = (uint32_t)sctp_map_assoc_state(stcb->asoc.state);
 			/* 7.0 does not support these */
 			xstcb.assoc_id = sctp_get_associd(stcb);
 			xstcb.peers_rwnd = stcb->asoc.peers_rwnd;
 			xstcb.in_streams = stcb->asoc.streamincnt;
 			xstcb.out_streams = stcb->asoc.streamoutcnt;
 			xstcb.max_nr_retrans = stcb->asoc.overall_error_count;
 			xstcb.primary_process = 0;	/* not really supported
 							 * yet */
 			xstcb.T1_expireries = stcb->asoc.timoinit + stcb->asoc.timocookie;
 			xstcb.T2_expireries = stcb->asoc.timoshutdown + stcb->asoc.timoshutdownack;
 			xstcb.retransmitted_tsns = stcb->asoc.marked_retrans;
 			xstcb.start_time.tv_sec = (uint32_t)stcb->asoc.start_time.tv_sec;
 			xstcb.start_time.tv_usec = (uint32_t)stcb->asoc.start_time.tv_usec;
 			xstcb.discontinuity_time.tv_sec = (uint32_t)stcb->asoc.discontinuity_time.tv_sec;
 			xstcb.discontinuity_time.tv_usec = (uint32_t)stcb->asoc.discontinuity_time.tv_usec;
 			xstcb.total_sends = stcb->total_sends;
 			xstcb.total_recvs = stcb->total_recvs;
 			xstcb.local_tag = stcb->asoc.my_vtag;
 			xstcb.remote_tag = stcb->asoc.peer_vtag;
 			xstcb.initial_tsn = stcb->asoc.init_seq_number;
 			xstcb.highest_tsn = stcb->asoc.sending_seq - 1;
 			xstcb.cumulative_tsn = stcb->asoc.last_acked_seq;
 			xstcb.cumulative_tsn_ack = stcb->asoc.cumulative_tsn;
 			xstcb.mtu = stcb->asoc.smallest_mtu;
 			xstcb.refcnt = stcb->asoc.refcnt;
 			SCTP_INP_RUNLOCK(inp);
 			SCTP_INP_INFO_RUNLOCK();
 			error = SYSCTL_OUT(req, &xstcb, sizeof(struct xsctp_tcb));
 			if (error) {
 				SCTP_INP_DECR_REF(inp);
 				atomic_subtract_int(&stcb->asoc.refcnt, 1);
 				return (error);
 			}
 			SCTP_INP_INFO_RLOCK();
 			SCTP_INP_RLOCK(inp);
 			error = sctp_sysctl_copy_out_local_addresses(inp, stcb, req);
 			if (error) {
 				SCTP_INP_DECR_REF(inp);
 				atomic_subtract_int(&stcb->asoc.refcnt, 1);
 				return (error);
 			}
 			TAILQ_FOREACH(net, &stcb->asoc.nets, sctp_next) {
 				xraddr.last = 0;
 				xraddr.address = net->ro._l_addr;
 				xraddr.active = ((net->dest_state & SCTP_ADDR_REACHABLE) == SCTP_ADDR_REACHABLE);
 				xraddr.confirmed = ((net->dest_state & SCTP_ADDR_UNCONFIRMED) == 0);
 				xraddr.heartbeat_enabled = ((net->dest_state & SCTP_ADDR_NOHB) == 0);
 				xraddr.potentially_failed = ((net->dest_state & SCTP_ADDR_PF) == SCTP_ADDR_PF);
 				xraddr.rto = net->RTO;
 				xraddr.max_path_rtx = net->failure_threshold;
 				xraddr.rtx = net->marked_retrans;
 				xraddr.error_counter = net->error_count;
 				xraddr.cwnd = net->cwnd;
 				xraddr.flight_size = net->flight_size;
 				xraddr.mtu = net->mtu;
 				xraddr.rtt = net->rtt / 1000;
 				xraddr.heartbeat_interval = net->heart_beat_delay;
 				xraddr.ssthresh = net->ssthresh;
 				xraddr.start_time.tv_sec = (uint32_t)net->start_time.tv_sec;
 				xraddr.start_time.tv_usec = (uint32_t)net->start_time.tv_usec;
 				SCTP_INP_RUNLOCK(inp);
 				SCTP_INP_INFO_RUNLOCK();
 				error = SYSCTL_OUT(req, &xraddr, sizeof(struct xsctp_raddr));
 				if (error) {
 					SCTP_INP_DECR_REF(inp);
 					atomic_subtract_int(&stcb->asoc.refcnt, 1);
 					return (error);
 				}
 				SCTP_INP_INFO_RLOCK();
 				SCTP_INP_RLOCK(inp);
 			}
 			atomic_subtract_int(&stcb->asoc.refcnt, 1);
 			memset((void *)&xraddr, 0, sizeof(struct xsctp_raddr));
 			xraddr.last = 1;
 			SCTP_INP_RUNLOCK(inp);
 			SCTP_INP_INFO_RUNLOCK();
 			error = SYSCTL_OUT(req, &xraddr, sizeof(struct xsctp_raddr));
 			if (error) {
 				SCTP_INP_DECR_REF(inp);
 				return (error);
 			}
 			SCTP_INP_INFO_RLOCK();
 			SCTP_INP_RLOCK(inp);
 		}
 		SCTP_INP_DECR_REF(inp);
 		SCTP_INP_RUNLOCK(inp);
 		SCTP_INP_INFO_RUNLOCK();
 		memset((void *)&xstcb, 0, sizeof(struct xsctp_tcb));
 		xstcb.last = 1;
 		error = SYSCTL_OUT(req, &xstcb, sizeof(struct xsctp_tcb));
 		if (error) {
 			return (error);
 		}
 skip:
 		SCTP_INP_INFO_RLOCK();
 	}
 	SCTP_INP_INFO_RUNLOCK();
 
 	memset((void *)&xinpcb, 0, sizeof(struct xsctp_inpcb));
 	xinpcb.last = 1;
 	error = SYSCTL_OUT(req, &xinpcb, sizeof(struct xsctp_inpcb));
 	return (error);
 }
 
 static int
 sctp_sysctl_handle_udp_tunneling(SYSCTL_HANDLER_ARGS)
 {
 	int error;
 	uint32_t old, new;
 
 	SCTP_INP_INFO_RLOCK();
 	old = SCTP_BASE_SYSCTL(sctp_udp_tunneling_port);
 	SCTP_INP_INFO_RUNLOCK();
 	new = old;
 	error = sysctl_handle_int(oidp, &new, 0, req);
 	if ((error == 0) &&
 	    (req->newptr != NULL)) {
 #if (SCTPCTL_UDP_TUNNELING_PORT_MIN == 0)
 		if (new > SCTPCTL_UDP_TUNNELING_PORT_MAX) {
 #else
 		if ((new < SCTPCTL_UDP_TUNNELING_PORT_MIN) ||
 		    (new > SCTPCTL_UDP_TUNNELING_PORT_MAX)) {
 #endif
 			error = EINVAL;
 		} else {
 			SCTP_INP_INFO_WLOCK();
 			SCTP_BASE_SYSCTL(sctp_udp_tunneling_port) = new;
 			if (old != 0) {
 				sctp_over_udp_stop();
 			}
 			if (new != 0) {
 				error = sctp_over_udp_start();
 			}
 			SCTP_INP_INFO_WUNLOCK();
 		}
 	}
 	return (error);
 }
 
 
 static int
 sctp_sysctl_handle_auth(SYSCTL_HANDLER_ARGS)
 {
 	int error;
 	uint32_t new;
 
 	new = SCTP_BASE_SYSCTL(sctp_auth_enable);
 	error = sysctl_handle_int(oidp, &new, 0, req);
 	if ((error == 0) &&
 	    (req->newptr != NULL)) {
 #if (SCTPCTL_AUTH_ENABLE_MIN == 0)
 		if ((new > SCTPCTL_AUTH_ENABLE_MAX) ||
 		    ((new == 0) && (SCTP_BASE_SYSCTL(sctp_asconf_enable) == 1))) {
 #else
 		if ((new < SCTPCTL_AUTH_ENABLE_MIN) ||
 		    (new > SCTPCTL_AUTH_ENABLE_MAX) ||
 		    ((new == 0) && (SCTP_BASE_SYSCTL(sctp_asconf_enable) == 1))) {
 #endif
 			error = EINVAL;
 		} else {
 			SCTP_BASE_SYSCTL(sctp_auth_enable) = new;
 		}
 	}
 	return (error);
 }
 
 static int
 sctp_sysctl_handle_asconf(SYSCTL_HANDLER_ARGS)
 {
 	int error;
 	uint32_t new;
 
 	new = SCTP_BASE_SYSCTL(sctp_asconf_enable);
 	error = sysctl_handle_int(oidp, &new, 0, req);
 	if ((error == 0) &&
 	    (req->newptr != NULL)) {
 #if (SCTPCTL_ASCONF_ENABLE_MIN == 0)
 		if ((new > SCTPCTL_ASCONF_ENABLE_MAX) ||
 		    ((new == 1) && (SCTP_BASE_SYSCTL(sctp_auth_enable) == 0))) {
 #else
 		if ((new < SCTPCTL_ASCONF_ENABLE_MIN) ||
 		    (new > SCTPCTL_ASCONF_ENABLE_MAX) ||
 		    ((new == 1) && (SCTP_BASE_SYSCTL(sctp_auth_enable) == 0))) {
 #endif
 			error = EINVAL;
 		} else {
 			SCTP_BASE_SYSCTL(sctp_asconf_enable) = new;
 		}
 	}
 	return (error);
 }
 
 static int
 sctp_sysctl_handle_stats(SYSCTL_HANDLER_ARGS)
 {
 	int error;
 #if defined(SMP) && defined(SCTP_USE_PERCPU_STAT)
 	struct sctpstat *sarry;
 	struct sctpstat sb;
 	int cpu;
 #endif
 	struct sctpstat sb_temp;
 
 	if ((req->newptr != NULL) &&
 	    (req->newlen != sizeof(struct sctpstat))) {
 		return (EINVAL);
 	}
 	memset(&sb_temp, 0, sizeof(struct sctpstat));
 
 	if (req->newptr != NULL) {
 		error = SYSCTL_IN(req, &sb_temp, sizeof(struct sctpstat));
 		if (error != 0) {
 			return (error);
 		}
 	}
 #if defined(SMP) && defined(SCTP_USE_PERCPU_STAT)
 	memset(&sb, 0, sizeof(sb));
 	for (cpu = 0; cpu < mp_maxid; cpu++) {
 		sarry = &SCTP_BASE_STATS[cpu];
 		if (sarry->sctps_discontinuitytime.tv_sec > sb.sctps_discontinuitytime.tv_sec) {
 			sb.sctps_discontinuitytime.tv_sec = sarry->sctps_discontinuitytime.tv_sec;
 			sb.sctps_discontinuitytime.tv_usec = sarry->sctps_discontinuitytime.tv_usec;
 		}
 		sb.sctps_currestab += sarry->sctps_currestab;
 		sb.sctps_activeestab += sarry->sctps_activeestab;
 		sb.sctps_restartestab += sarry->sctps_restartestab;
 		sb.sctps_collisionestab += sarry->sctps_collisionestab;
 		sb.sctps_passiveestab += sarry->sctps_passiveestab;
 		sb.sctps_aborted += sarry->sctps_aborted;
 		sb.sctps_shutdown += sarry->sctps_shutdown;
 		sb.sctps_outoftheblue += sarry->sctps_outoftheblue;
 		sb.sctps_checksumerrors += sarry->sctps_checksumerrors;
 		sb.sctps_outcontrolchunks += sarry->sctps_outcontrolchunks;
 		sb.sctps_outorderchunks += sarry->sctps_outorderchunks;
 		sb.sctps_outunorderchunks += sarry->sctps_outunorderchunks;
 		sb.sctps_incontrolchunks += sarry->sctps_incontrolchunks;
 		sb.sctps_inorderchunks += sarry->sctps_inorderchunks;
 		sb.sctps_inunorderchunks += sarry->sctps_inunorderchunks;
 		sb.sctps_fragusrmsgs += sarry->sctps_fragusrmsgs;
 		sb.sctps_reasmusrmsgs += sarry->sctps_reasmusrmsgs;
 		sb.sctps_outpackets += sarry->sctps_outpackets;
 		sb.sctps_inpackets += sarry->sctps_inpackets;
 		sb.sctps_recvpackets += sarry->sctps_recvpackets;
 		sb.sctps_recvdatagrams += sarry->sctps_recvdatagrams;
 		sb.sctps_recvpktwithdata += sarry->sctps_recvpktwithdata;
 		sb.sctps_recvsacks += sarry->sctps_recvsacks;
 		sb.sctps_recvdata += sarry->sctps_recvdata;
 		sb.sctps_recvdupdata += sarry->sctps_recvdupdata;
 		sb.sctps_recvheartbeat += sarry->sctps_recvheartbeat;
 		sb.sctps_recvheartbeatack += sarry->sctps_recvheartbeatack;
 		sb.sctps_recvecne += sarry->sctps_recvecne;
 		sb.sctps_recvauth += sarry->sctps_recvauth;
 		sb.sctps_recvauthmissing += sarry->sctps_recvauthmissing;
 		sb.sctps_recvivalhmacid += sarry->sctps_recvivalhmacid;
 		sb.sctps_recvivalkeyid += sarry->sctps_recvivalkeyid;
 		sb.sctps_recvauthfailed += sarry->sctps_recvauthfailed;
 		sb.sctps_recvexpress += sarry->sctps_recvexpress;
 		sb.sctps_recvexpressm += sarry->sctps_recvexpressm;
 		sb.sctps_recvnocrc += sarry->sctps_recvnocrc;
 		sb.sctps_recvswcrc += sarry->sctps_recvswcrc;
 		sb.sctps_recvhwcrc += sarry->sctps_recvhwcrc;
 		sb.sctps_sendpackets += sarry->sctps_sendpackets;
 		sb.sctps_sendsacks += sarry->sctps_sendsacks;
 		sb.sctps_senddata += sarry->sctps_senddata;
 		sb.sctps_sendretransdata += sarry->sctps_sendretransdata;
 		sb.sctps_sendfastretrans += sarry->sctps_sendfastretrans;
 		sb.sctps_sendmultfastretrans += sarry->sctps_sendmultfastretrans;
 		sb.sctps_sendheartbeat += sarry->sctps_sendheartbeat;
 		sb.sctps_sendecne += sarry->sctps_sendecne;
 		sb.sctps_sendauth += sarry->sctps_sendauth;
 		sb.sctps_senderrors += sarry->sctps_senderrors;
 		sb.sctps_sendnocrc += sarry->sctps_sendnocrc;
 		sb.sctps_sendswcrc += sarry->sctps_sendswcrc;
 		sb.sctps_sendhwcrc += sarry->sctps_sendhwcrc;
 		sb.sctps_pdrpfmbox += sarry->sctps_pdrpfmbox;
 		sb.sctps_pdrpfehos += sarry->sctps_pdrpfehos;
 		sb.sctps_pdrpmbda += sarry->sctps_pdrpmbda;
 		sb.sctps_pdrpmbct += sarry->sctps_pdrpmbct;
 		sb.sctps_pdrpbwrpt += sarry->sctps_pdrpbwrpt;
 		sb.sctps_pdrpcrupt += sarry->sctps_pdrpcrupt;
 		sb.sctps_pdrpnedat += sarry->sctps_pdrpnedat;
 		sb.sctps_pdrppdbrk += sarry->sctps_pdrppdbrk;
 		sb.sctps_pdrptsnnf += sarry->sctps_pdrptsnnf;
 		sb.sctps_pdrpdnfnd += sarry->sctps_pdrpdnfnd;
 		sb.sctps_pdrpdiwnp += sarry->sctps_pdrpdiwnp;
 		sb.sctps_pdrpdizrw += sarry->sctps_pdrpdizrw;
 		sb.sctps_pdrpbadd += sarry->sctps_pdrpbadd;
 		sb.sctps_pdrpmark += sarry->sctps_pdrpmark;
 		sb.sctps_timoiterator += sarry->sctps_timoiterator;
 		sb.sctps_timodata += sarry->sctps_timodata;
 		sb.sctps_timowindowprobe += sarry->sctps_timowindowprobe;
 		sb.sctps_timoinit += sarry->sctps_timoinit;
 		sb.sctps_timosack += sarry->sctps_timosack;
 		sb.sctps_timoshutdown += sarry->sctps_timoshutdown;
 		sb.sctps_timoheartbeat += sarry->sctps_timoheartbeat;
 		sb.sctps_timocookie += sarry->sctps_timocookie;
 		sb.sctps_timosecret += sarry->sctps_timosecret;
 		sb.sctps_timopathmtu += sarry->sctps_timopathmtu;
 		sb.sctps_timoshutdownack += sarry->sctps_timoshutdownack;
 		sb.sctps_timoshutdownguard += sarry->sctps_timoshutdownguard;
 		sb.sctps_timostrmrst += sarry->sctps_timostrmrst;
 		sb.sctps_timoearlyfr += sarry->sctps_timoearlyfr;
 		sb.sctps_timoasconf += sarry->sctps_timoasconf;
 		sb.sctps_timodelprim += sarry->sctps_timodelprim;
 		sb.sctps_timoautoclose += sarry->sctps_timoautoclose;
 		sb.sctps_timoassockill += sarry->sctps_timoassockill;
 		sb.sctps_timoinpkill += sarry->sctps_timoinpkill;
 		sb.sctps_hdrops += sarry->sctps_hdrops;
 		sb.sctps_badsum += sarry->sctps_badsum;
 		sb.sctps_noport += sarry->sctps_noport;
 		sb.sctps_badvtag += sarry->sctps_badvtag;
 		sb.sctps_badsid += sarry->sctps_badsid;
 		sb.sctps_nomem += sarry->sctps_nomem;
 		sb.sctps_fastretransinrtt += sarry->sctps_fastretransinrtt;
 		sb.sctps_markedretrans += sarry->sctps_markedretrans;
 		sb.sctps_naglesent += sarry->sctps_naglesent;
 		sb.sctps_naglequeued += sarry->sctps_naglequeued;
 		sb.sctps_maxburstqueued += sarry->sctps_maxburstqueued;
 		sb.sctps_ifnomemqueued += sarry->sctps_ifnomemqueued;
 		sb.sctps_windowprobed += sarry->sctps_windowprobed;
 		sb.sctps_lowlevelerr += sarry->sctps_lowlevelerr;
 		sb.sctps_lowlevelerrusr += sarry->sctps_lowlevelerrusr;
 		sb.sctps_datadropchklmt += sarry->sctps_datadropchklmt;
 		sb.sctps_datadroprwnd += sarry->sctps_datadroprwnd;
 		sb.sctps_ecnereducedcwnd += sarry->sctps_ecnereducedcwnd;
 		sb.sctps_vtagexpress += sarry->sctps_vtagexpress;
 		sb.sctps_vtagbogus += sarry->sctps_vtagbogus;
 		sb.sctps_primary_randry += sarry->sctps_primary_randry;
 		sb.sctps_cmt_randry += sarry->sctps_cmt_randry;
 		sb.sctps_slowpath_sack += sarry->sctps_slowpath_sack;
 		sb.sctps_wu_sacks_sent += sarry->sctps_wu_sacks_sent;
 		sb.sctps_sends_with_flags += sarry->sctps_sends_with_flags;
 		sb.sctps_sends_with_unord += sarry->sctps_sends_with_unord;
 		sb.sctps_sends_with_eof += sarry->sctps_sends_with_eof;
 		sb.sctps_sends_with_abort += sarry->sctps_sends_with_abort;
 		sb.sctps_protocol_drain_calls += sarry->sctps_protocol_drain_calls;
 		sb.sctps_protocol_drains_done += sarry->sctps_protocol_drains_done;
 		sb.sctps_read_peeks += sarry->sctps_read_peeks;
 		sb.sctps_cached_chk += sarry->sctps_cached_chk;
 		sb.sctps_cached_strmoq += sarry->sctps_cached_strmoq;
 		sb.sctps_left_abandon += sarry->sctps_left_abandon;
 		sb.sctps_send_burst_avoid += sarry->sctps_send_burst_avoid;
 		sb.sctps_send_cwnd_avoid += sarry->sctps_send_cwnd_avoid;
 		sb.sctps_fwdtsn_map_over += sarry->sctps_fwdtsn_map_over;
 		if (req->newptr != NULL) {
 			memcpy(sarry, &sb_temp, sizeof(struct sctpstat));
 		}
 	}
 	error = SYSCTL_OUT(req, &sb, sizeof(struct sctpstat));
 #else
 	error = SYSCTL_OUT(req, &SCTP_BASE_STATS, sizeof(struct sctpstat));
 	if (error != 0) {
 		return (error);
 	}
 	if (req->newptr != NULL) {
 		memcpy(&SCTP_BASE_STATS, &sb_temp, sizeof(struct sctpstat));
 	}
 #endif
 	return (error);
 }
 
 #if defined(SCTP_LOCAL_TRACE_BUF)
 static int
 sctp_sysctl_handle_trace_log(SYSCTL_HANDLER_ARGS)
 {
 	int error;
 
 	error = SYSCTL_OUT(req, &SCTP_BASE_SYSCTL(sctp_log), sizeof(struct sctp_log));
 	return (error);
 }
 
 static int
 sctp_sysctl_handle_trace_log_clear(SYSCTL_HANDLER_ARGS)
 {
 	int error = 0;
 
 	memset(&SCTP_BASE_SYSCTL(sctp_log), 0, sizeof(struct sctp_log));
 	return (error);
 }
 #endif
 
 #define SCTP_UINT_SYSCTL(mib_name, var_name, prefix)			\
 	static int							\
 	sctp_sysctl_handle_##mib_name(SYSCTL_HANDLER_ARGS)		\
 	{								\
 		int error;						\
 		uint32_t new;						\
 									\
 		new = SCTP_BASE_SYSCTL(var_name);			\
 		error = sysctl_handle_int(oidp, &new, 0, req);		\
 		if ((error == 0) && (req->newptr != NULL)) {		\
 			if ((new < prefix##_MIN) ||			\
 			    (new > prefix##_MAX)) {			\
 				error = EINVAL;				\
 			} else {					\
 				SCTP_BASE_SYSCTL(var_name) = new;	\
 			}						\
 		}							\
 		return (error);						\
 	}								\
 	SYSCTL_PROC(_net_inet_sctp, OID_AUTO, mib_name,			\
 	                 CTLFLAG_VNET|CTLTYPE_UINT|CTLFLAG_RW, NULL, 0,	\
 	                 sctp_sysctl_handle_##mib_name, "UI", prefix##_DESC);
 
 /*
  * sysctl definitions
  */
 
 SCTP_UINT_SYSCTL(sendspace, sctp_sendspace, SCTPCTL_MAXDGRAM)
 SCTP_UINT_SYSCTL(recvspace, sctp_recvspace, SCTPCTL_RECVSPACE)
 SCTP_UINT_SYSCTL(auto_asconf, sctp_auto_asconf, SCTPCTL_AUTOASCONF)
 SCTP_UINT_SYSCTL(ecn_enable, sctp_ecn_enable, SCTPCTL_ECN_ENABLE)
 SCTP_UINT_SYSCTL(pr_enable, sctp_pr_enable, SCTPCTL_PR_ENABLE)
 SYSCTL_PROC(_net_inet_sctp, OID_AUTO, auth_enable, CTLFLAG_VNET | CTLTYPE_UINT | CTLFLAG_RW,
     NULL, 0, sctp_sysctl_handle_auth, "IU", SCTPCTL_AUTH_ENABLE_DESC);
 SYSCTL_PROC(_net_inet_sctp, OID_AUTO, asconf_enable, CTLFLAG_VNET | CTLTYPE_UINT | CTLFLAG_RW,
     NULL, 0, sctp_sysctl_handle_asconf, "IU", SCTPCTL_ASCONF_ENABLE_DESC);
 SCTP_UINT_SYSCTL(reconfig_enable, sctp_reconfig_enable, SCTPCTL_RECONFIG_ENABLE)
 SCTP_UINT_SYSCTL(nrsack_enable, sctp_nrsack_enable, SCTPCTL_NRSACK_ENABLE)
 SCTP_UINT_SYSCTL(pktdrop_enable, sctp_pktdrop_enable, SCTPCTL_PKTDROP_ENABLE)
 SCTP_UINT_SYSCTL(peer_chkoh, sctp_peer_chunk_oh, SCTPCTL_PEER_CHKOH)
 SCTP_UINT_SYSCTL(maxburst, sctp_max_burst_default, SCTPCTL_MAXBURST)
 SCTP_UINT_SYSCTL(fr_maxburst, sctp_fr_max_burst_default, SCTPCTL_FRMAXBURST)
 SCTP_UINT_SYSCTL(maxchunks, sctp_max_chunks_on_queue, SCTPCTL_MAXCHUNKS)
 SCTP_UINT_SYSCTL(tcbhashsize, sctp_hashtblsize, SCTPCTL_TCBHASHSIZE)
 SCTP_UINT_SYSCTL(pcbhashsize, sctp_pcbtblsize, SCTPCTL_PCBHASHSIZE)
 SCTP_UINT_SYSCTL(min_split_point, sctp_min_split_point, SCTPCTL_MIN_SPLIT_POINT)
 SCTP_UINT_SYSCTL(chunkscale, sctp_chunkscale, SCTPCTL_CHUNKSCALE)
 SCTP_UINT_SYSCTL(delayed_sack_time, sctp_delayed_sack_time_default, SCTPCTL_DELAYED_SACK_TIME)
 SCTP_UINT_SYSCTL(sack_freq, sctp_sack_freq_default, SCTPCTL_SACK_FREQ)
 SCTP_UINT_SYSCTL(sys_resource, sctp_system_free_resc_limit, SCTPCTL_SYS_RESOURCE)
 SCTP_UINT_SYSCTL(asoc_resource, sctp_asoc_free_resc_limit, SCTPCTL_ASOC_RESOURCE)
 SCTP_UINT_SYSCTL(heartbeat_interval, sctp_heartbeat_interval_default, SCTPCTL_HEARTBEAT_INTERVAL)
 SCTP_UINT_SYSCTL(pmtu_raise_time, sctp_pmtu_raise_time_default, SCTPCTL_PMTU_RAISE_TIME)
 SCTP_UINT_SYSCTL(shutdown_guard_time, sctp_shutdown_guard_time_default, SCTPCTL_SHUTDOWN_GUARD_TIME)
 SCTP_UINT_SYSCTL(secret_lifetime, sctp_secret_lifetime_default, SCTPCTL_SECRET_LIFETIME)
 SCTP_UINT_SYSCTL(rto_max, sctp_rto_max_default, SCTPCTL_RTO_MAX)
 SCTP_UINT_SYSCTL(rto_min, sctp_rto_min_default, SCTPCTL_RTO_MIN)
 SCTP_UINT_SYSCTL(rto_initial, sctp_rto_initial_default, SCTPCTL_RTO_INITIAL)
 SCTP_UINT_SYSCTL(init_rto_max, sctp_init_rto_max_default, SCTPCTL_INIT_RTO_MAX)
 SCTP_UINT_SYSCTL(valid_cookie_life, sctp_valid_cookie_life_default, SCTPCTL_VALID_COOKIE_LIFE)
 SCTP_UINT_SYSCTL(init_rtx_max, sctp_init_rtx_max_default, SCTPCTL_INIT_RTX_MAX)
 SCTP_UINT_SYSCTL(assoc_rtx_max, sctp_assoc_rtx_max_default, SCTPCTL_ASSOC_RTX_MAX)
 SCTP_UINT_SYSCTL(path_rtx_max, sctp_path_rtx_max_default, SCTPCTL_PATH_RTX_MAX)
 SCTP_UINT_SYSCTL(path_pf_threshold, sctp_path_pf_threshold, SCTPCTL_PATH_PF_THRESHOLD)
 SCTP_UINT_SYSCTL(add_more_on_output, sctp_add_more_threshold, SCTPCTL_ADD_MORE_ON_OUTPUT)
 SCTP_UINT_SYSCTL(incoming_streams, sctp_nr_incoming_streams_default, SCTPCTL_INCOMING_STREAMS)
 SCTP_UINT_SYSCTL(outgoing_streams, sctp_nr_outgoing_streams_default, SCTPCTL_OUTGOING_STREAMS)
 SCTP_UINT_SYSCTL(cmt_on_off, sctp_cmt_on_off, SCTPCTL_CMT_ON_OFF)
 SCTP_UINT_SYSCTL(cmt_use_dac, sctp_cmt_use_dac, SCTPCTL_CMT_USE_DAC)
 SCTP_UINT_SYSCTL(cwnd_maxburst, sctp_use_cwnd_based_maxburst, SCTPCTL_CWND_MAXBURST)
 SCTP_UINT_SYSCTL(nat_friendly, sctp_nat_friendly, SCTPCTL_NAT_FRIENDLY)
 SCTP_UINT_SYSCTL(abc_l_var, sctp_L2_abc_variable, SCTPCTL_ABC_L_VAR)
 SCTP_UINT_SYSCTL(max_chained_mbufs, sctp_mbuf_threshold_count, SCTPCTL_MAX_CHAINED_MBUFS)
 SCTP_UINT_SYSCTL(do_sctp_drain, sctp_do_drain, SCTPCTL_DO_SCTP_DRAIN)
 SCTP_UINT_SYSCTL(hb_max_burst, sctp_hb_maxburst, SCTPCTL_HB_MAX_BURST)
 SCTP_UINT_SYSCTL(abort_at_limit, sctp_abort_if_one_2_one_hits_limit, SCTPCTL_ABORT_AT_LIMIT)
 SCTP_UINT_SYSCTL(min_residual, sctp_min_residual, SCTPCTL_MIN_RESIDUAL)
 SCTP_UINT_SYSCTL(max_retran_chunk, sctp_max_retran_chunk, SCTPCTL_MAX_RETRAN_CHUNK)
 SCTP_UINT_SYSCTL(log_level, sctp_logging_level, SCTPCTL_LOGGING_LEVEL)
 SCTP_UINT_SYSCTL(default_cc_module, sctp_default_cc_module, SCTPCTL_DEFAULT_CC_MODULE)
 SCTP_UINT_SYSCTL(default_ss_module, sctp_default_ss_module, SCTPCTL_DEFAULT_SS_MODULE)
 SCTP_UINT_SYSCTL(default_frag_interleave, sctp_default_frag_interleave, SCTPCTL_DEFAULT_FRAG_INTERLEAVE)
 SCTP_UINT_SYSCTL(mobility_base, sctp_mobility_base, SCTPCTL_MOBILITY_BASE)
 SCTP_UINT_SYSCTL(mobility_fasthandoff, sctp_mobility_fasthandoff, SCTPCTL_MOBILITY_FASTHANDOFF)
 #if defined(SCTP_LOCAL_TRACE_BUF)
 SYSCTL_PROC(_net_inet_sctp, OID_AUTO, log, CTLFLAG_VNET | CTLTYPE_STRUCT | CTLFLAG_RD,
     NULL, 0, sctp_sysctl_handle_trace_log, "S,sctplog", "SCTP logging (struct sctp_log)");
 SYSCTL_PROC(_net_inet_sctp, OID_AUTO, clear_trace, CTLFLAG_VNET | CTLTYPE_UINT | CTLFLAG_RW,
     NULL, 0, sctp_sysctl_handle_trace_log_clear, "IU", "Clear SCTP Logging buffer");
 #endif
 SYSCTL_PROC(_net_inet_sctp, OID_AUTO, udp_tunneling_port, CTLFLAG_VNET | CTLTYPE_UINT | CTLFLAG_RW,
     NULL, 0, sctp_sysctl_handle_udp_tunneling, "IU", SCTPCTL_UDP_TUNNELING_PORT_DESC);
 SCTP_UINT_SYSCTL(enable_sack_immediately, sctp_enable_sack_immediately, SCTPCTL_SACK_IMMEDIATELY_ENABLE)
 SCTP_UINT_SYSCTL(nat_friendly_init, sctp_inits_include_nat_friendly, SCTPCTL_NAT_FRIENDLY_INITS)
 SCTP_UINT_SYSCTL(vtag_time_wait, sctp_vtag_time_wait, SCTPCTL_TIME_WAIT)
 SCTP_UINT_SYSCTL(buffer_splitting, sctp_buffer_splitting, SCTPCTL_BUFFER_SPLITTING)
 SCTP_UINT_SYSCTL(initial_cwnd, sctp_initial_cwnd, SCTPCTL_INITIAL_CWND)
 SCTP_UINT_SYSCTL(rttvar_bw, sctp_rttvar_bw, SCTPCTL_RTTVAR_BW)
 SCTP_UINT_SYSCTL(rttvar_rtt, sctp_rttvar_rtt, SCTPCTL_RTTVAR_RTT)
 SCTP_UINT_SYSCTL(rttvar_eqret, sctp_rttvar_eqret, SCTPCTL_RTTVAR_EQRET)
 SCTP_UINT_SYSCTL(rttvar_steady_step, sctp_steady_step, SCTPCTL_RTTVAR_STEADYS)
 SCTP_UINT_SYSCTL(use_dcccecn, sctp_use_dccc_ecn, SCTPCTL_RTTVAR_DCCCECN)
 SCTP_UINT_SYSCTL(blackhole, sctp_blackhole, SCTPCTL_BLACKHOLE)
 SCTP_UINT_SYSCTL(diag_info_code, sctp_diag_info_code, SCTPCTL_DIAG_INFO_CODE)
 #ifdef SCTP_DEBUG
 SCTP_UINT_SYSCTL(debug, sctp_debug_on, SCTPCTL_DEBUG)
 #endif
 #if defined(__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
 SCTP_UINT_SYSCTL(output_unlocked, sctp_output_unlocked, SCTPCTL_OUTPUT_UNLOCKED)
 #endif
 SYSCTL_PROC(_net_inet_sctp, OID_AUTO, stats, CTLFLAG_VNET | CTLTYPE_STRUCT | CTLFLAG_RW,
     NULL, 0, sctp_sysctl_handle_stats, "S,sctpstat", "SCTP statistics (struct sctp_stat)");
 SYSCTL_PROC(_net_inet_sctp, OID_AUTO, assoclist, CTLFLAG_VNET | CTLTYPE_OPAQUE | CTLFLAG_RD,
     NULL, 0, sctp_sysctl_handle_assoclist, "S,xassoc", "List of active SCTP associations");
Index: head/sys/netinet/sctp_usrreq.c
===================================================================
--- head/sys/netinet/sctp_usrreq.c	(revision 319721)
+++ head/sys/netinet/sctp_usrreq.c	(revision 319722)
@@ -1,7450 +1,7443 @@
 /*-
  * Copyright (c) 2001-2008, by Cisco Systems, Inc. All rights reserved.
  * Copyright (c) 2008-2012, by Randall Stewart. All rights reserved.
  * Copyright (c) 2008-2012, by Michael Tuexen. All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions are met:
  *
  * a) Redistributions of source code must retain the above copyright notice,
  *    this list of conditions and the following disclaimer.
  *
  * b) Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in
  *    the documentation and/or other materials provided with the distribution.
  *
  * c) Neither the name of Cisco Systems, Inc. nor the names of its
  *    contributors may be used to endorse or promote products derived
  *    from this software without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
  * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
  * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
  * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
  * THE POSSIBILITY OF SUCH DAMAGE.
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include <netinet/sctp_os.h>
 #include <sys/proc.h>
 #include <netinet/sctp_pcb.h>
 #include <netinet/sctp_header.h>
 #include <netinet/sctp_var.h>
 #ifdef INET6
 #include <netinet6/sctp6_var.h>
 #endif
 #include <netinet/sctp_sysctl.h>
 #include <netinet/sctp_output.h>
 #include <netinet/sctp_uio.h>
 #include <netinet/sctp_asconf.h>
 #include <netinet/sctputil.h>
 #include <netinet/sctp_indata.h>
 #include <netinet/sctp_timer.h>
 #include <netinet/sctp_auth.h>
 #include <netinet/sctp_bsd_addr.h>
 #include <netinet/udp.h>
 
 
 
 extern const struct sctp_cc_functions sctp_cc_functions[];
 extern const struct sctp_ss_functions sctp_ss_functions[];
 
 void
 sctp_init(void)
 {
 	u_long sb_max_adj;
 
 	/* Initialize and modify the sysctled variables */
 	sctp_init_sysctls();
 	if ((nmbclusters / 8) > SCTP_ASOC_MAX_CHUNKS_ON_QUEUE)
 		SCTP_BASE_SYSCTL(sctp_max_chunks_on_queue) = (nmbclusters / 8);
 	/*
 	 * Allow a user to take no more than 1/2 the number of clusters or
 	 * the SB_MAX whichever is smaller for the send window.
 	 */
 	sb_max_adj = (u_long)((u_quad_t)(SB_MAX) * MCLBYTES / (MSIZE + MCLBYTES));
 	SCTP_BASE_SYSCTL(sctp_sendspace) = min(sb_max_adj,
 	    (((uint32_t)nmbclusters / 2) * SCTP_DEFAULT_MAXSEGMENT));
 	/*
 	 * Now for the recv window, should we take the same amount? or
 	 * should I do 1/2 the SB_MAX instead in the SB_MAX min above. For
 	 * now I will just copy.
 	 */
 	SCTP_BASE_SYSCTL(sctp_recvspace) = SCTP_BASE_SYSCTL(sctp_sendspace);
 	SCTP_BASE_VAR(first_time) = 0;
 	SCTP_BASE_VAR(sctp_pcb_initialized) = 0;
 	sctp_pcb_init();
 #if defined(SCTP_PACKET_LOGGING)
 	SCTP_BASE_VAR(packet_log_writers) = 0;
 	SCTP_BASE_VAR(packet_log_end) = 0;
 	bzero(&SCTP_BASE_VAR(packet_log_buffer), SCTP_PACKET_LOG_SIZE);
 #endif
 }
 
 #ifdef VIMAGE
 static void
 sctp_finish(void *unused __unused)
 {
 	sctp_pcb_finish();
 }
 
 VNET_SYSUNINIT(sctp, SI_SUB_PROTO_DOMAIN, SI_ORDER_FOURTH, sctp_finish, NULL);
 #endif
 
 void
 sctp_pathmtu_adjustment(struct sctp_tcb *stcb, uint16_t nxtsz)
 {
 	struct sctp_tmit_chunk *chk;
 	uint16_t overhead;
 
 	/* Adjust that too */
 	stcb->asoc.smallest_mtu = nxtsz;
 	/* now off to subtract IP_DF flag if needed */
 	overhead = IP_HDR_SIZE + sizeof(struct sctphdr);
 	if (sctp_auth_is_required_chunk(SCTP_DATA, stcb->asoc.peer_auth_chunks)) {
 		overhead += sctp_get_auth_chunk_len(stcb->asoc.peer_hmac_id);
 	}
 	TAILQ_FOREACH(chk, &stcb->asoc.send_queue, sctp_next) {
 		if ((chk->send_size + overhead) > nxtsz) {
 			chk->flags |= CHUNK_FLAGS_FRAGMENT_OK;
 		}
 	}
 	TAILQ_FOREACH(chk, &stcb->asoc.sent_queue, sctp_next) {
 		if ((chk->send_size + overhead) > nxtsz) {
 			/*
 			 * For this guy we also mark for immediate resend
 			 * since we sent to big of chunk
 			 */
 			chk->flags |= CHUNK_FLAGS_FRAGMENT_OK;
 			if (chk->sent < SCTP_DATAGRAM_RESEND) {
 				sctp_flight_size_decrease(chk);
 				sctp_total_flight_decrease(stcb, chk);
 				chk->sent = SCTP_DATAGRAM_RESEND;
 				sctp_ucount_incr(stcb->asoc.sent_queue_retran_cnt);
 				chk->rec.data.doing_fast_retransmit = 0;
 				if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_FLIGHT_LOGGING_ENABLE) {
 					sctp_misc_ints(SCTP_FLIGHT_LOG_DOWN_PMTU,
 					    chk->whoTo->flight_size,
 					    chk->book_size,
 					    (uint32_t)(uintptr_t)chk->whoTo,
 					    chk->rec.data.tsn);
 				}
 				/* Clear any time so NO RTT is being done */
 				chk->do_rtt = 0;
 			}
 		}
 	}
 }
 
 #ifdef INET
 void
 sctp_notify(struct sctp_inpcb *inp,
     struct sctp_tcb *stcb,
     struct sctp_nets *net,
     uint8_t icmp_type,
     uint8_t icmp_code,
     uint16_t ip_len,
     uint32_t next_mtu)
 {
 #if defined(__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
 	struct socket *so;
 #endif
 	int timer_stopped;
 
 	if (icmp_type != ICMP_UNREACH) {
 		/* We only care about unreachable */
 		SCTP_TCB_UNLOCK(stcb);
 		return;
 	}
 	if ((icmp_code == ICMP_UNREACH_NET) ||
 	    (icmp_code == ICMP_UNREACH_HOST) ||
 	    (icmp_code == ICMP_UNREACH_NET_UNKNOWN) ||
 	    (icmp_code == ICMP_UNREACH_HOST_UNKNOWN) ||
 	    (icmp_code == ICMP_UNREACH_ISOLATED) ||
 	    (icmp_code == ICMP_UNREACH_NET_PROHIB) ||
 	    (icmp_code == ICMP_UNREACH_HOST_PROHIB) ||
 	    (icmp_code == ICMP_UNREACH_FILTER_PROHIB)) {
 		/* Mark the net unreachable. */
 		if (net->dest_state & SCTP_ADDR_REACHABLE) {
 			/* OK, that destination is NOT reachable. */
 			net->dest_state &= ~SCTP_ADDR_REACHABLE;
 			net->dest_state &= ~SCTP_ADDR_PF;
 			sctp_ulp_notify(SCTP_NOTIFY_INTERFACE_DOWN,
 			    stcb, 0,
 			    (void *)net, SCTP_SO_NOT_LOCKED);
 		}
 		SCTP_TCB_UNLOCK(stcb);
 	} else if ((icmp_code == ICMP_UNREACH_PROTOCOL) ||
 	    (icmp_code == ICMP_UNREACH_PORT)) {
 		/* Treat it like an ABORT. */
 		sctp_abort_notification(stcb, 1, 0, NULL, SCTP_SO_NOT_LOCKED);
 #if defined(__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
 		so = SCTP_INP_SO(inp);
 		atomic_add_int(&stcb->asoc.refcnt, 1);
 		SCTP_TCB_UNLOCK(stcb);
 		SCTP_SOCKET_LOCK(so, 1);
 		SCTP_TCB_LOCK(stcb);
 		atomic_subtract_int(&stcb->asoc.refcnt, 1);
 #endif
 		(void)sctp_free_assoc(inp, stcb, SCTP_NORMAL_PROC,
 		    SCTP_FROM_SCTP_USRREQ + SCTP_LOC_2);
 #if defined(__APPLE__) || defined(SCTP_SO_LOCK_TESTING)
 		SCTP_SOCKET_UNLOCK(so, 1);
 		/* SCTP_TCB_UNLOCK(stcb); MT: I think this is not needed. */
 #endif
 		/* no need to unlock here, since the TCB is gone */
 	} else if (icmp_code == ICMP_UNREACH_NEEDFRAG) {
 		/* Find the next (smaller) MTU */
 		if (next_mtu == 0) {
 			/*
 			 * Old type router that does not tell us what the
 			 * next MTU is. Rats we will have to guess (in a
 			 * educated fashion of course).
 			 */
 			next_mtu = sctp_get_prev_mtu(ip_len);
 		}
 		/* Stop the PMTU timer. */
 		if (SCTP_OS_TIMER_PENDING(&net->pmtu_timer.timer)) {
 			timer_stopped = 1;
 			sctp_timer_stop(SCTP_TIMER_TYPE_PATHMTURAISE, inp, stcb, net,
 			    SCTP_FROM_SCTP_USRREQ + SCTP_LOC_1);
 		} else {
 			timer_stopped = 0;
 		}
 		/* Update the path MTU. */
 		if (net->port) {
 			next_mtu -= sizeof(struct udphdr);
 		}
 		if (net->mtu > next_mtu) {
 			net->mtu = next_mtu;
 			if (net->port) {
 				sctp_hc_set_mtu(&net->ro._l_addr, inp->fibnum, next_mtu + sizeof(struct udphdr));
 			} else {
 				sctp_hc_set_mtu(&net->ro._l_addr, inp->fibnum, next_mtu);
 			}
 		}
 		/* Update the association MTU */
 		if (stcb->asoc.smallest_mtu > next_mtu) {
 			sctp_pathmtu_adjustment(stcb, next_mtu);
 		}
 		/* Finally, start the PMTU timer if it was running before. */
 		if (timer_stopped) {
 			sctp_timer_start(SCTP_TIMER_TYPE_PATHMTURAISE, inp, stcb, net);
 		}
 		SCTP_TCB_UNLOCK(stcb);
 	} else {
 		SCTP_TCB_UNLOCK(stcb);
 	}
 }
 
 void
 sctp_ctlinput(int cmd, struct sockaddr *sa, void *vip)
 {
 	struct ip *outer_ip;
 	struct ip *inner_ip;
 	struct sctphdr *sh;
 	struct icmp *icmp;
 	struct sctp_inpcb *inp;
 	struct sctp_tcb *stcb;
 	struct sctp_nets *net;
 	struct sctp_init_chunk *ch;
 	struct sockaddr_in src, dst;
 
 	if (sa->sa_family != AF_INET ||
 	    ((struct sockaddr_in *)sa)->sin_addr.s_addr == INADDR_ANY) {
 		return;
 	}
 	if (PRC_IS_REDIRECT(cmd)) {
 		vip = NULL;
 	} else if ((unsigned)cmd >= PRC_NCMDS || inetctlerrmap[cmd] == 0) {
 		return;
 	}
 	if (vip != NULL) {
 		inner_ip = (struct ip *)vip;
 		icmp = (struct icmp *)((caddr_t)inner_ip -
 		    (sizeof(struct icmp) - sizeof(struct ip)));
 		outer_ip = (struct ip *)((caddr_t)icmp - sizeof(struct ip));
 		sh = (struct sctphdr *)((caddr_t)inner_ip + (inner_ip->ip_hl << 2));
 		memset(&src, 0, sizeof(struct sockaddr_in));
 		src.sin_family = AF_INET;
 		src.sin_len = sizeof(struct sockaddr_in);
 		src.sin_port = sh->src_port;
 		src.sin_addr = inner_ip->ip_src;
 		memset(&dst, 0, sizeof(struct sockaddr_in));
 		dst.sin_family = AF_INET;
 		dst.sin_len = sizeof(struct sockaddr_in);
 		dst.sin_port = sh->dest_port;
 		dst.sin_addr = inner_ip->ip_dst;
 		/*
 		 * 'dst' holds the dest of the packet that failed to be
 		 * sent. 'src' holds our local endpoint address. Thus we
 		 * reverse the dst and the src in the lookup.
 		 */
 		inp = NULL;
 		net = NULL;
 		stcb = sctp_findassociation_addr_sa((struct sockaddr *)&dst,
 		    (struct sockaddr *)&src,
 		    &inp, &net, 1,
 		    SCTP_DEFAULT_VRFID);
 		if ((stcb != NULL) &&
 		    (net != NULL) &&
 		    (inp != NULL)) {
 			/* Check the verification tag */
 			if (ntohl(sh->v_tag) != 0) {
 				/*
 				 * This must be the verification tag used
 				 * for sending out packets. We don't
 				 * consider packets reflecting the
 				 * verification tag.
 				 */
 				if (ntohl(sh->v_tag) != stcb->asoc.peer_vtag) {
 					SCTP_TCB_UNLOCK(stcb);
 					return;
 				}
 			} else {
 				if (ntohs(outer_ip->ip_len) >=
 				    sizeof(struct ip) +
 				    8 + (inner_ip->ip_hl << 2) + 20) {
 					/*
 					 * In this case we can check if we
 					 * got an INIT chunk and if the
 					 * initiate tag matches.
 					 */
 					ch = (struct sctp_init_chunk *)(sh + 1);
 					if ((ch->ch.chunk_type != SCTP_INITIATION) ||
 					    (ntohl(ch->init.initiate_tag) != stcb->asoc.my_vtag)) {
 						SCTP_TCB_UNLOCK(stcb);
 						return;
 					}
 				} else {
 					SCTP_TCB_UNLOCK(stcb);
 					return;
 				}
 			}
 			sctp_notify(inp, stcb, net,
 			    icmp->icmp_type,
 			    icmp->icmp_code,
 			    ntohs(inner_ip->ip_len),
 			    (uint32_t)ntohs(icmp->icmp_nextmtu));
 		} else {
 			if ((stcb == NULL) && (inp != NULL)) {
 				/* reduce ref-count */
 				SCTP_INP_WLOCK(inp);
 				SCTP_INP_DECR_REF(inp);
 				SCTP_INP_WUNLOCK(inp);
 			}
 			if (stcb) {
 				SCTP_TCB_UNLOCK(stcb);
 			}
 		}
 	}
 	return;
 }
 #endif
 
 static int
 sctp_getcred(SYSCTL_HANDLER_ARGS)
 {
 	struct xucred xuc;
 	struct sockaddr_in addrs[2];
 	struct sctp_inpcb *inp;
 	struct sctp_nets *net;
 	struct sctp_tcb *stcb;
 	int error;
 	uint32_t vrf_id;
 
 	/* FIX, for non-bsd is this right? */
 	vrf_id = SCTP_DEFAULT_VRFID;
 
 	error = priv_check(req->td, PRIV_NETINET_GETCRED);
 
 	if (error)
 		return (error);
 
 	error = SYSCTL_IN(req, addrs, sizeof(addrs));
 	if (error)
 		return (error);
 
 	stcb = sctp_findassociation_addr_sa(sintosa(&addrs[1]),
 	    sintosa(&addrs[0]),
 	    &inp, &net, 1, vrf_id);
 	if (stcb == NULL || inp == NULL || inp->sctp_socket == NULL) {
 		if ((inp != NULL) && (stcb == NULL)) {
 			/* reduce ref-count */
 			SCTP_INP_WLOCK(inp);
 			SCTP_INP_DECR_REF(inp);
 			goto cred_can_cont;
 		}
 		SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, ENOENT);
 		error = ENOENT;
 		goto out;
 	}
 	SCTP_TCB_UNLOCK(stcb);
 	/*
 	 * We use the write lock here, only since in the error leg we need
 	 * it. If we used RLOCK, then we would have to
 	 * wlock/decr/unlock/rlock. Which in theory could create a hole.
 	 * Better to use higher wlock.
 	 */
 	SCTP_INP_WLOCK(inp);
 cred_can_cont:
 	error = cr_canseesocket(req->td->td_ucred, inp->sctp_socket);
 	if (error) {
 		SCTP_INP_WUNLOCK(inp);
 		goto out;
 	}
 	cru2x(inp->sctp_socket->so_cred, &xuc);
 	SCTP_INP_WUNLOCK(inp);
 	error = SYSCTL_OUT(req, &xuc, sizeof(struct xucred));
 out:
 	return (error);
 }
 
 SYSCTL_PROC(_net_inet_sctp, OID_AUTO, getcred, CTLTYPE_OPAQUE | CTLFLAG_RW,
     0, 0, sctp_getcred, "S,ucred", "Get the ucred of a SCTP connection");
 
 
 #ifdef INET
 static void
 sctp_abort(struct socket *so)
 {
 	struct sctp_inpcb *inp;
 	uint32_t flags;
 
 	inp = (struct sctp_inpcb *)so->so_pcb;
 	if (inp == NULL) {
 		return;
 	}
 sctp_must_try_again:
 	flags = inp->sctp_flags;
 #ifdef SCTP_LOG_CLOSING
 	sctp_log_closing(inp, NULL, 17);
 #endif
 	if (((flags & SCTP_PCB_FLAGS_SOCKET_GONE) == 0) &&
 	    (atomic_cmpset_int(&inp->sctp_flags, flags, (flags | SCTP_PCB_FLAGS_SOCKET_GONE | SCTP_PCB_FLAGS_CLOSE_IP)))) {
 #ifdef SCTP_LOG_CLOSING
 		sctp_log_closing(inp, NULL, 16);
 #endif
 		sctp_inpcb_free(inp, SCTP_FREE_SHOULD_USE_ABORT,
 		    SCTP_CALLED_AFTER_CMPSET_OFCLOSE);
 		SOCK_LOCK(so);
 		SCTP_SB_CLEAR(so->so_snd);
 		/*
 		 * same for the rcv ones, they are only here for the
 		 * accounting/select.
 		 */
 		SCTP_SB_CLEAR(so->so_rcv);
 
 		/* Now null out the reference, we are completely detached. */
 		so->so_pcb = NULL;
 		SOCK_UNLOCK(so);
 	} else {
 		flags = inp->sctp_flags;
 		if ((flags & SCTP_PCB_FLAGS_SOCKET_GONE) == 0) {
 			goto sctp_must_try_again;
 		}
 	}
 	return;
 }
 
 static int
 sctp_attach(struct socket *so, int proto SCTP_UNUSED, struct thread *p SCTP_UNUSED)
 {
 	struct sctp_inpcb *inp;
 	struct inpcb *ip_inp;
 	int error;
 	uint32_t vrf_id = SCTP_DEFAULT_VRFID;
 
 	inp = (struct sctp_inpcb *)so->so_pcb;
 	if (inp != NULL) {
 		SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EINVAL);
 		return (EINVAL);
 	}
 	if (so->so_snd.sb_hiwat == 0 || so->so_rcv.sb_hiwat == 0) {
 		error = SCTP_SORESERVE(so, SCTP_BASE_SYSCTL(sctp_sendspace), SCTP_BASE_SYSCTL(sctp_recvspace));
 		if (error) {
 			return (error);
 		}
 	}
 	error = sctp_inpcb_alloc(so, vrf_id);
 	if (error) {
 		return (error);
 	}
 	inp = (struct sctp_inpcb *)so->so_pcb;
 	SCTP_INP_WLOCK(inp);
 	inp->sctp_flags &= ~SCTP_PCB_FLAGS_BOUND_V6;	/* I'm not v6! */
 	ip_inp = &inp->ip_inp.inp;
 	ip_inp->inp_vflag |= INP_IPV4;
 	ip_inp->inp_ip_ttl = MODULE_GLOBAL(ip_defttl);
 	SCTP_INP_WUNLOCK(inp);
 	return (0);
 }
 
 static int
 sctp_bind(struct socket *so, struct sockaddr *addr, struct thread *p)
 {
 	struct sctp_inpcb *inp;
 
 	inp = (struct sctp_inpcb *)so->so_pcb;
 	if (inp == NULL) {
 		SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EINVAL);
 		return (EINVAL);
 	}
 	if (addr != NULL) {
 		if ((addr->sa_family != AF_INET) ||
 		    (addr->sa_len != sizeof(struct sockaddr_in))) {
 			SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EINVAL);
 			return (EINVAL);
 		}
 	}
 	return (sctp_inpcb_bind(so, addr, NULL, p));
 }
 
 #endif
 void
 sctp_close(struct socket *so)
 {
 	struct sctp_inpcb *inp;
 	uint32_t flags;
 
 	inp = (struct sctp_inpcb *)so->so_pcb;
 	if (inp == NULL)
 		return;
 
 	/*
 	 * Inform all the lower layer assoc that we are done.
 	 */
 sctp_must_try_again:
 	flags = inp->sctp_flags;
 #ifdef SCTP_LOG_CLOSING
 	sctp_log_closing(inp, NULL, 17);
 #endif
 	if (((flags & SCTP_PCB_FLAGS_SOCKET_GONE) == 0) &&
 	    (atomic_cmpset_int(&inp->sctp_flags, flags, (flags | SCTP_PCB_FLAGS_SOCKET_GONE | SCTP_PCB_FLAGS_CLOSE_IP)))) {
 		if (((so->so_options & SO_LINGER) && (so->so_linger == 0)) ||
 		    (so->so_rcv.sb_cc > 0)) {
 #ifdef SCTP_LOG_CLOSING
 			sctp_log_closing(inp, NULL, 13);
 #endif
 			sctp_inpcb_free(inp, SCTP_FREE_SHOULD_USE_ABORT,
 			    SCTP_CALLED_AFTER_CMPSET_OFCLOSE);
 		} else {
 #ifdef SCTP_LOG_CLOSING
 			sctp_log_closing(inp, NULL, 14);
 #endif
 			sctp_inpcb_free(inp, SCTP_FREE_SHOULD_USE_GRACEFUL_CLOSE,
 			    SCTP_CALLED_AFTER_CMPSET_OFCLOSE);
 		}
 		/*
 		 * The socket is now detached, no matter what the state of
 		 * the SCTP association.
 		 */
 		SOCK_LOCK(so);
 		SCTP_SB_CLEAR(so->so_snd);
 		/*
 		 * same for the rcv ones, they are only here for the
 		 * accounting/select.
 		 */
 		SCTP_SB_CLEAR(so->so_rcv);
 
 		/* Now null out the reference, we are completely detached. */
 		so->so_pcb = NULL;
 		SOCK_UNLOCK(so);
 	} else {
 		flags = inp->sctp_flags;
 		if ((flags & SCTP_PCB_FLAGS_SOCKET_GONE) == 0) {
 			goto sctp_must_try_again;
 		}
 	}
 	return;
 }
 
 
 int
 sctp_sendm(struct socket *so, int flags, struct mbuf *m, struct sockaddr *addr,
     struct mbuf *control, struct thread *p);
 
 
 int
 sctp_sendm(struct socket *so, int flags, struct mbuf *m, struct sockaddr *addr,
     struct mbuf *control, struct thread *p)
 {
 	struct sctp_inpcb *inp;
 	int error;
 
 	inp = (struct sctp_inpcb *)so->so_pcb;
 	if (inp == NULL) {
 		if (control) {
 			sctp_m_freem(control);
 			control = NULL;
 		}
 		SCTP_LTRACE_ERR_RET_PKT(m, inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EINVAL);
 		sctp_m_freem(m);
 		return (EINVAL);
 	}
 	/* Got to have an to address if we are NOT a connected socket */
 	if ((addr == NULL) &&
 	    ((inp->sctp_flags & SCTP_PCB_FLAGS_CONNECTED) ||
 	    (inp->sctp_flags & SCTP_PCB_FLAGS_TCPTYPE))) {
 		goto connected_type;
 	} else if (addr == NULL) {
 		SCTP_LTRACE_ERR_RET_PKT(m, inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EDESTADDRREQ);
 		error = EDESTADDRREQ;
 		sctp_m_freem(m);
 		if (control) {
 			sctp_m_freem(control);
 			control = NULL;
 		}
 		return (error);
 	}
 #ifdef INET6
 	if (addr->sa_family != AF_INET) {
 		/* must be a v4 address! */
 		SCTP_LTRACE_ERR_RET_PKT(m, inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EDESTADDRREQ);
 		sctp_m_freem(m);
 		if (control) {
 			sctp_m_freem(control);
 			control = NULL;
 		}
 		error = EDESTADDRREQ;
 		return (error);
 	}
 #endif				/* INET6 */
 connected_type:
 	/* now what about control */
 	if (control) {
 		if (inp->control) {
 			SCTP_PRINTF("huh? control set?\n");
 			sctp_m_freem(inp->control);
 			inp->control = NULL;
 		}
 		inp->control = control;
 	}
 	/* Place the data */
 	if (inp->pkt) {
 		SCTP_BUF_NEXT(inp->pkt_last) = m;
 		inp->pkt_last = m;
 	} else {
 		inp->pkt_last = inp->pkt = m;
 	}
 	if (
 	/* FreeBSD uses a flag passed */
 	    ((flags & PRUS_MORETOCOME) == 0)
 	    ) {
 		/*
 		 * note with the current version this code will only be used
 		 * by OpenBSD-- NetBSD, FreeBSD, and MacOS have methods for
 		 * re-defining sosend to use the sctp_sosend. One can
 		 * optionally switch back to this code (by changing back the
 		 * definitions) but this is not advisable. This code is used
 		 * by FreeBSD when sending a file with sendfile() though.
 		 */
 		int ret;
 
 		ret = sctp_output(inp, inp->pkt, addr, inp->control, p, flags);
 		inp->pkt = NULL;
 		inp->control = NULL;
 		return (ret);
 	} else {
 		return (0);
 	}
 }
 
 int
 sctp_disconnect(struct socket *so)
 {
 	struct sctp_inpcb *inp;
 
 	inp = (struct sctp_inpcb *)so->so_pcb;
 	if (inp == NULL) {
 		SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, ENOTCONN);
 		return (ENOTCONN);
 	}
 	SCTP_INP_RLOCK(inp);
 	if ((inp->sctp_flags & SCTP_PCB_FLAGS_TCPTYPE) ||
 	    (inp->sctp_flags & SCTP_PCB_FLAGS_IN_TCPPOOL)) {
 		if (LIST_EMPTY(&inp->sctp_asoc_list)) {
 			/* No connection */
 			SCTP_INP_RUNLOCK(inp);
 			return (0);
 		} else {
 			struct sctp_association *asoc;
 			struct sctp_tcb *stcb;
 
 			stcb = LIST_FIRST(&inp->sctp_asoc_list);
 			if (stcb == NULL) {
 				SCTP_INP_RUNLOCK(inp);
 				SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EINVAL);
 				return (EINVAL);
 			}
 			SCTP_TCB_LOCK(stcb);
 			asoc = &stcb->asoc;
 			if (stcb->asoc.state & SCTP_STATE_ABOUT_TO_BE_FREED) {
 				/* We are about to be freed, out of here */
 				SCTP_TCB_UNLOCK(stcb);
 				SCTP_INP_RUNLOCK(inp);
 				return (0);
 			}
 			if (((so->so_options & SO_LINGER) &&
 			    (so->so_linger == 0)) ||
 			    (so->so_rcv.sb_cc > 0)) {
 				if (SCTP_GET_STATE(asoc) !=
 				    SCTP_STATE_COOKIE_WAIT) {
 					/* Left with Data unread */
 					struct mbuf *err;
 
 					err = sctp_get_mbuf_for_msg(sizeof(struct sctp_paramhdr), 0, M_NOWAIT, 1, MT_DATA);
 					if (err) {
 						/*
 						 * Fill in the user
 						 * initiated abort
 						 */
 						struct sctp_paramhdr *ph;
 
 						ph = mtod(err, struct sctp_paramhdr *);
 						SCTP_BUF_LEN(err) = sizeof(struct sctp_paramhdr);
 						ph->param_type = htons(SCTP_CAUSE_USER_INITIATED_ABT);
 						ph->param_length = htons(SCTP_BUF_LEN(err));
 					}
 					sctp_send_abort_tcb(stcb, err, SCTP_SO_LOCKED);
 					SCTP_STAT_INCR_COUNTER32(sctps_aborted);
 				}
 				SCTP_INP_RUNLOCK(inp);
 				if ((SCTP_GET_STATE(&stcb->asoc) == SCTP_STATE_OPEN) ||
 				    (SCTP_GET_STATE(&stcb->asoc) == SCTP_STATE_SHUTDOWN_RECEIVED)) {
 					SCTP_STAT_DECR_GAUGE32(sctps_currestab);
 				}
 				(void)sctp_free_assoc(inp, stcb, SCTP_NORMAL_PROC,
 				    SCTP_FROM_SCTP_USRREQ + SCTP_LOC_3);
 				/* No unlock tcb assoc is gone */
 				return (0);
 			}
 			if (TAILQ_EMPTY(&asoc->send_queue) &&
 			    TAILQ_EMPTY(&asoc->sent_queue) &&
 			    (asoc->stream_queue_cnt == 0)) {
 				/* there is nothing queued to send, so done */
 				if ((*asoc->ss_functions.sctp_ss_is_user_msgs_incomplete) (stcb, asoc)) {
 					goto abort_anyway;
 				}
 				if ((SCTP_GET_STATE(asoc) != SCTP_STATE_SHUTDOWN_SENT) &&
 				    (SCTP_GET_STATE(asoc) != SCTP_STATE_SHUTDOWN_ACK_SENT)) {
 					/* only send SHUTDOWN 1st time thru */
 					struct sctp_nets *netp;
 
 					if ((SCTP_GET_STATE(asoc) == SCTP_STATE_OPEN) ||
 					    (SCTP_GET_STATE(asoc) == SCTP_STATE_SHUTDOWN_RECEIVED)) {
 						SCTP_STAT_DECR_GAUGE32(sctps_currestab);
 					}
 					SCTP_SET_STATE(asoc, SCTP_STATE_SHUTDOWN_SENT);
 					SCTP_CLEAR_SUBSTATE(asoc, SCTP_STATE_SHUTDOWN_PENDING);
 					sctp_stop_timers_for_shutdown(stcb);
 					if (stcb->asoc.alternate) {
 						netp = stcb->asoc.alternate;
 					} else {
 						netp = stcb->asoc.primary_destination;
 					}
 					sctp_send_shutdown(stcb, netp);
 					sctp_timer_start(SCTP_TIMER_TYPE_SHUTDOWN,
 					    stcb->sctp_ep, stcb, netp);
 					sctp_timer_start(SCTP_TIMER_TYPE_SHUTDOWNGUARD,
 					    stcb->sctp_ep, stcb, netp);
 					sctp_chunk_output(stcb->sctp_ep, stcb, SCTP_OUTPUT_FROM_T3, SCTP_SO_LOCKED);
 				}
 			} else {
 				/*
 				 * we still got (or just got) data to send,
 				 * so set SHUTDOWN_PENDING
 				 */
 				/*
 				 * XXX sockets draft says that SCTP_EOF
 				 * should be sent with no data. currently,
 				 * we will allow user data to be sent first
 				 * and move to SHUTDOWN-PENDING
 				 */
 				struct sctp_nets *netp;
 
 				if (stcb->asoc.alternate) {
 					netp = stcb->asoc.alternate;
 				} else {
 					netp = stcb->asoc.primary_destination;
 				}
 
 				asoc->state |= SCTP_STATE_SHUTDOWN_PENDING;
 				sctp_timer_start(SCTP_TIMER_TYPE_SHUTDOWNGUARD, stcb->sctp_ep, stcb,
 				    netp);
 				if ((*asoc->ss_functions.sctp_ss_is_user_msgs_incomplete) (stcb, asoc)) {
 					asoc->state |= SCTP_STATE_PARTIAL_MSG_LEFT;
 				}
 				if (TAILQ_EMPTY(&asoc->send_queue) &&
 				    TAILQ_EMPTY(&asoc->sent_queue) &&
 				    (asoc->state & SCTP_STATE_PARTIAL_MSG_LEFT)) {
 					struct mbuf *op_err;
 
 			abort_anyway:
 					op_err = sctp_generate_cause(SCTP_CAUSE_USER_INITIATED_ABT, "");
 					stcb->sctp_ep->last_abort_code = SCTP_FROM_SCTP_USRREQ + SCTP_LOC_4;
 					sctp_send_abort_tcb(stcb, op_err, SCTP_SO_LOCKED);
 					SCTP_STAT_INCR_COUNTER32(sctps_aborted);
 					if ((SCTP_GET_STATE(&stcb->asoc) == SCTP_STATE_OPEN) ||
 					    (SCTP_GET_STATE(&stcb->asoc) == SCTP_STATE_SHUTDOWN_RECEIVED)) {
 						SCTP_STAT_DECR_GAUGE32(sctps_currestab);
 					}
 					SCTP_INP_RUNLOCK(inp);
 					(void)sctp_free_assoc(inp, stcb, SCTP_NORMAL_PROC,
 					    SCTP_FROM_SCTP_USRREQ + SCTP_LOC_5);
 					return (0);
 				} else {
 					sctp_chunk_output(inp, stcb, SCTP_OUTPUT_FROM_CLOSING, SCTP_SO_LOCKED);
 				}
 			}
 			soisdisconnecting(so);
 			SCTP_TCB_UNLOCK(stcb);
 			SCTP_INP_RUNLOCK(inp);
 			return (0);
 		}
 		/* not reached */
 	} else {
 		/* UDP model does not support this */
 		SCTP_INP_RUNLOCK(inp);
 		SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EOPNOTSUPP);
 		return (EOPNOTSUPP);
 	}
 }
 
 int
 sctp_flush(struct socket *so, int how)
 {
 	/*
 	 * We will just clear out the values and let subsequent close clear
 	 * out the data, if any. Note if the user did a shutdown(SHUT_RD)
 	 * they will not be able to read the data, the socket will block
 	 * that from happening.
 	 */
 	struct sctp_inpcb *inp;
 
 	inp = (struct sctp_inpcb *)so->so_pcb;
 	if (inp == NULL) {
 		SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EINVAL);
 		return (EINVAL);
 	}
 	SCTP_INP_RLOCK(inp);
 	/* For the 1 to many model this does nothing */
 	if (inp->sctp_flags & SCTP_PCB_FLAGS_UDPTYPE) {
 		SCTP_INP_RUNLOCK(inp);
 		return (0);
 	}
 	SCTP_INP_RUNLOCK(inp);
 	if ((how == PRU_FLUSH_RD) || (how == PRU_FLUSH_RDWR)) {
 		/*
 		 * First make sure the sb will be happy, we don't use these
 		 * except maybe the count
 		 */
 		SCTP_INP_WLOCK(inp);
 		SCTP_INP_READ_LOCK(inp);
 		inp->sctp_flags |= SCTP_PCB_FLAGS_SOCKET_CANT_READ;
 		SCTP_INP_READ_UNLOCK(inp);
 		SCTP_INP_WUNLOCK(inp);
 		so->so_rcv.sb_cc = 0;
 		so->so_rcv.sb_mbcnt = 0;
 		so->so_rcv.sb_mb = NULL;
 	}
 	if ((how == PRU_FLUSH_WR) || (how == PRU_FLUSH_RDWR)) {
 		/*
 		 * First make sure the sb will be happy, we don't use these
 		 * except maybe the count
 		 */
 		so->so_snd.sb_cc = 0;
 		so->so_snd.sb_mbcnt = 0;
 		so->so_snd.sb_mb = NULL;
 
 	}
 	return (0);
 }
 
 int
 sctp_shutdown(struct socket *so)
 {
 	struct sctp_inpcb *inp;
 
 	inp = (struct sctp_inpcb *)so->so_pcb;
 	if (inp == NULL) {
 		SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EINVAL);
 		return (EINVAL);
 	}
 	SCTP_INP_RLOCK(inp);
 	/* For UDP model this is a invalid call */
 	if (!((inp->sctp_flags & SCTP_PCB_FLAGS_TCPTYPE) ||
 	    (inp->sctp_flags & SCTP_PCB_FLAGS_IN_TCPPOOL))) {
 		/* Restore the flags that the soshutdown took away. */
 		SOCKBUF_LOCK(&so->so_rcv);
 		so->so_rcv.sb_state &= ~SBS_CANTRCVMORE;
 		SOCKBUF_UNLOCK(&so->so_rcv);
 		/* This proc will wakeup for read and do nothing (I hope) */
 		SCTP_INP_RUNLOCK(inp);
 		SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EOPNOTSUPP);
 		return (EOPNOTSUPP);
 	} else {
 		/*
 		 * Ok, if we reach here its the TCP model and it is either a
 		 * SHUT_WR or SHUT_RDWR. This means we put the shutdown flag
 		 * against it.
 		 */
 		struct sctp_tcb *stcb;
 		struct sctp_association *asoc;
 		struct sctp_nets *netp;
 
 		if ((so->so_state &
 		    (SS_ISCONNECTED | SS_ISCONNECTING | SS_ISDISCONNECTING)) == 0) {
 			SCTP_INP_RUNLOCK(inp);
 			return (ENOTCONN);
 		}
 		socantsendmore(so);
 
 		stcb = LIST_FIRST(&inp->sctp_asoc_list);
 		if (stcb == NULL) {
 			/*
 			 * Ok, we hit the case that the shutdown call was
 			 * made after an abort or something. Nothing to do
 			 * now.
 			 */
 			SCTP_INP_RUNLOCK(inp);
 			return (0);
 		}
 		SCTP_TCB_LOCK(stcb);
 		asoc = &stcb->asoc;
 		if (asoc->state & SCTP_STATE_ABOUT_TO_BE_FREED) {
 			SCTP_TCB_UNLOCK(stcb);
 			SCTP_INP_RUNLOCK(inp);
 			return (0);
 		}
 		if ((SCTP_GET_STATE(asoc) != SCTP_STATE_COOKIE_WAIT) &&
 		    (SCTP_GET_STATE(asoc) != SCTP_STATE_COOKIE_ECHOED) &&
 		    (SCTP_GET_STATE(asoc) != SCTP_STATE_OPEN)) {
 			/*
 			 * If we are not in or before ESTABLISHED, there is
 			 * no protocol action required.
 			 */
 			SCTP_TCB_UNLOCK(stcb);
 			SCTP_INP_RUNLOCK(inp);
 			return (0);
 		}
 		if (stcb->asoc.alternate) {
 			netp = stcb->asoc.alternate;
 		} else {
 			netp = stcb->asoc.primary_destination;
 		}
 		if ((SCTP_GET_STATE(asoc) == SCTP_STATE_OPEN) &&
 		    TAILQ_EMPTY(&asoc->send_queue) &&
 		    TAILQ_EMPTY(&asoc->sent_queue) &&
 		    (asoc->stream_queue_cnt == 0)) {
 			if ((*asoc->ss_functions.sctp_ss_is_user_msgs_incomplete) (stcb, asoc)) {
 				goto abort_anyway;
 			}
 			/* there is nothing queued to send, so I'm done... */
 			SCTP_STAT_DECR_GAUGE32(sctps_currestab);
 			SCTP_SET_STATE(asoc, SCTP_STATE_SHUTDOWN_SENT);
 			SCTP_CLEAR_SUBSTATE(asoc, SCTP_STATE_SHUTDOWN_PENDING);
 			sctp_stop_timers_for_shutdown(stcb);
 			sctp_send_shutdown(stcb, netp);
 			sctp_timer_start(SCTP_TIMER_TYPE_SHUTDOWN,
 			    stcb->sctp_ep, stcb, netp);
 		} else {
 			/*
 			 * We still got (or just got) data to send, so set
 			 * SHUTDOWN_PENDING.
 			 */
 			SCTP_ADD_SUBSTATE(asoc, SCTP_STATE_SHUTDOWN_PENDING);
 			if ((*asoc->ss_functions.sctp_ss_is_user_msgs_incomplete) (stcb, asoc)) {
 				SCTP_ADD_SUBSTATE(asoc, SCTP_STATE_PARTIAL_MSG_LEFT);
 			}
 			if (TAILQ_EMPTY(&asoc->send_queue) &&
 			    TAILQ_EMPTY(&asoc->sent_queue) &&
 			    (asoc->state & SCTP_STATE_PARTIAL_MSG_LEFT)) {
 				struct mbuf *op_err;
 
 		abort_anyway:
 				op_err = sctp_generate_cause(SCTP_CAUSE_USER_INITIATED_ABT, "");
 				stcb->sctp_ep->last_abort_code = SCTP_FROM_SCTP_USRREQ + SCTP_LOC_6;
 				sctp_abort_an_association(stcb->sctp_ep, stcb,
 				    op_err, SCTP_SO_LOCKED);
 				SCTP_INP_RUNLOCK(inp);
 				return (0);
 			}
 		}
 		sctp_timer_start(SCTP_TIMER_TYPE_SHUTDOWNGUARD, stcb->sctp_ep, stcb, netp);
 		/*
 		 * XXX: Why do this in the case where we have still data
 		 * queued?
 		 */
 		sctp_chunk_output(inp, stcb, SCTP_OUTPUT_FROM_CLOSING, SCTP_SO_LOCKED);
 		SCTP_TCB_UNLOCK(stcb);
 		SCTP_INP_RUNLOCK(inp);
 		return (0);
 	}
 }
 
 /*
  * copies a "user" presentable address and removes embedded scope, etc.
  * returns 0 on success, 1 on error
  */
 static uint32_t
 sctp_fill_user_address(struct sockaddr_storage *ss, struct sockaddr *sa)
 {
 #ifdef INET6
 	struct sockaddr_in6 lsa6;
 
 	sa = (struct sockaddr *)sctp_recover_scope((struct sockaddr_in6 *)sa,
 	    &lsa6);
 #endif
 	memcpy(ss, sa, sa->sa_len);
 	return (0);
 }
 
 
 
 /*
  * NOTE: assumes addr lock is held
  */
 static size_t
 sctp_fill_up_addresses_vrf(struct sctp_inpcb *inp,
     struct sctp_tcb *stcb,
     size_t limit,
     struct sockaddr_storage *sas,
     uint32_t vrf_id)
 {
 	struct sctp_ifn *sctp_ifn;
 	struct sctp_ifa *sctp_ifa;
 	size_t actual;
 	int loopback_scope;
 #if defined(INET)
 	int ipv4_local_scope, ipv4_addr_legal;
 #endif
 #if defined(INET6)
 	int local_scope, site_scope, ipv6_addr_legal;
 #endif
 	struct sctp_vrf *vrf;
 
 	actual = 0;
 	if (limit <= 0)
 		return (actual);
 
 	if (stcb) {
 		/* Turn on all the appropriate scope */
 		loopback_scope = stcb->asoc.scope.loopback_scope;
 #if defined(INET)
 		ipv4_local_scope = stcb->asoc.scope.ipv4_local_scope;
 		ipv4_addr_legal = stcb->asoc.scope.ipv4_addr_legal;
 #endif
 #if defined(INET6)
 		local_scope = stcb->asoc.scope.local_scope;
 		site_scope = stcb->asoc.scope.site_scope;
 		ipv6_addr_legal = stcb->asoc.scope.ipv6_addr_legal;
 #endif
 	} else {
 		/* Use generic values for endpoints. */
 		loopback_scope = 1;
 #if defined(INET)
 		ipv4_local_scope = 1;
 #endif
 #if defined(INET6)
 		local_scope = 1;
 		site_scope = 1;
 #endif
 		if (inp->sctp_flags & SCTP_PCB_FLAGS_BOUND_V6) {
 #if defined(INET6)
 			ipv6_addr_legal = 1;
 #endif
 #if defined(INET)
 			if (SCTP_IPV6_V6ONLY(inp)) {
 				ipv4_addr_legal = 0;
 			} else {
 				ipv4_addr_legal = 1;
 			}
 #endif
 		} else {
 #if defined(INET6)
 			ipv6_addr_legal = 0;
 #endif
 #if defined(INET)
 			ipv4_addr_legal = 1;
 #endif
 		}
 	}
 	vrf = sctp_find_vrf(vrf_id);
 	if (vrf == NULL) {
 		return (0);
 	}
 	if (inp->sctp_flags & SCTP_PCB_FLAGS_BOUNDALL) {
 		LIST_FOREACH(sctp_ifn, &vrf->ifnlist, next_ifn) {
 			if ((loopback_scope == 0) &&
 			    SCTP_IFN_IS_IFT_LOOP(sctp_ifn)) {
 				/* Skip loopback if loopback_scope not set */
 				continue;
 			}
 			LIST_FOREACH(sctp_ifa, &sctp_ifn->ifalist, next_ifa) {
 				if (stcb) {
 					/*
 					 * For the BOUND-ALL case, the list
 					 * associated with a TCB is Always
 					 * considered a reverse list.. i.e.
 					 * it lists addresses that are NOT
 					 * part of the association. If this
 					 * is one of those we must skip it.
 					 */
 					if (sctp_is_addr_restricted(stcb,
 					    sctp_ifa)) {
 						continue;
 					}
 				}
 				switch (sctp_ifa->address.sa.sa_family) {
 #ifdef INET
 				case AF_INET:
 					if (ipv4_addr_legal) {
 						struct sockaddr_in *sin;
 
 						sin = &sctp_ifa->address.sin;
 						if (sin->sin_addr.s_addr == 0) {
 							/*
 							 * we skip
 							 * unspecifed
 							 * addresses
 							 */
 							continue;
 						}
 						if (prison_check_ip4(inp->ip_inp.inp.inp_cred,
 						    &sin->sin_addr) != 0) {
 							continue;
 						}
 						if ((ipv4_local_scope == 0) &&
 						    (IN4_ISPRIVATE_ADDRESS(&sin->sin_addr))) {
 							continue;
 						}
 #ifdef INET6
 						if (sctp_is_feature_on(inp, SCTP_PCB_FLAGS_NEEDS_MAPPED_V4)) {
 							in6_sin_2_v4mapsin6(sin, (struct sockaddr_in6 *)sas);
 							((struct sockaddr_in6 *)sas)->sin6_port = inp->sctp_lport;
 							sas = (struct sockaddr_storage *)((caddr_t)sas + sizeof(struct sockaddr_in6));
 							actual += sizeof(struct sockaddr_in6);
 						} else {
 #endif
 							memcpy(sas, sin, sizeof(*sin));
 							((struct sockaddr_in *)sas)->sin_port = inp->sctp_lport;
 							sas = (struct sockaddr_storage *)((caddr_t)sas + sizeof(*sin));
 							actual += sizeof(*sin);
 #ifdef INET6
 						}
 #endif
 						if (actual >= limit) {
 							return (actual);
 						}
 					} else {
 						continue;
 					}
 					break;
 #endif
 #ifdef INET6
 				case AF_INET6:
 					if (ipv6_addr_legal) {
 						struct sockaddr_in6 *sin6;
 
 						sin6 = &sctp_ifa->address.sin6;
 						if (IN6_IS_ADDR_UNSPECIFIED(&sin6->sin6_addr)) {
 							/*
 							 * we skip
 							 * unspecifed
 							 * addresses
 							 */
 							continue;
 						}
 						if (prison_check_ip6(inp->ip_inp.inp.inp_cred,
 						    &sin6->sin6_addr) != 0) {
 							continue;
 						}
 						if (IN6_IS_ADDR_LINKLOCAL(&sin6->sin6_addr)) {
 							if (local_scope == 0)
 								continue;
 							if (sin6->sin6_scope_id == 0) {
 								if (sa6_recoverscope(sin6) != 0)
 									/*
 									 *
 									 * bad
 									 * link
 									 *
 									 * local
 									 *
 									 * address
 									 */
 									continue;
 							}
 						}
 						if ((site_scope == 0) &&
 						    (IN6_IS_ADDR_SITELOCAL(&sin6->sin6_addr))) {
 							continue;
 						}
 						memcpy(sas, sin6, sizeof(*sin6));
 						((struct sockaddr_in6 *)sas)->sin6_port = inp->sctp_lport;
 						sas = (struct sockaddr_storage *)((caddr_t)sas + sizeof(*sin6));
 						actual += sizeof(*sin6);
 						if (actual >= limit) {
 							return (actual);
 						}
 					} else {
 						continue;
 					}
 					break;
 #endif
 				default:
 					/* TSNH */
 					break;
 				}
 			}
 		}
 	} else {
 		struct sctp_laddr *laddr;
 
 		LIST_FOREACH(laddr, &inp->sctp_addr_list, sctp_nxt_addr) {
 			if (stcb) {
 				if (sctp_is_addr_restricted(stcb, laddr->ifa)) {
 					continue;
 				}
 			}
 			if (sctp_fill_user_address(sas, &laddr->ifa->address.sa))
 				continue;
 			switch (laddr->ifa->address.sa.sa_family) {
 #ifdef INET
 			case AF_INET:
 				((struct sockaddr_in *)sas)->sin_port = inp->sctp_lport;
 				break;
 #endif
 #ifdef INET6
 			case AF_INET6:
 				((struct sockaddr_in6 *)sas)->sin6_port = inp->sctp_lport;
 				break;
 #endif
 			default:
 				/* TSNH */
 				break;
 			}
 			sas = (struct sockaddr_storage *)((caddr_t)sas +
 			    laddr->ifa->address.sa.sa_len);
 			actual += laddr->ifa->address.sa.sa_len;
 			if (actual >= limit) {
 				return (actual);
 			}
 		}
 	}
 	return (actual);
 }
 
 static size_t
 sctp_fill_up_addresses(struct sctp_inpcb *inp,
     struct sctp_tcb *stcb,
     size_t limit,
     struct sockaddr_storage *sas)
 {
 	size_t size = 0;
 
 	SCTP_IPI_ADDR_RLOCK();
 	/* fill up addresses for the endpoint's default vrf */
 	size = sctp_fill_up_addresses_vrf(inp, stcb, limit, sas,
 	    inp->def_vrf_id);
 	SCTP_IPI_ADDR_RUNLOCK();
 	return (size);
 }
 
 /*
  * NOTE: assumes addr lock is held
  */
 static int
 sctp_count_max_addresses_vrf(struct sctp_inpcb *inp, uint32_t vrf_id)
 {
 	int cnt = 0;
 	struct sctp_vrf *vrf = NULL;
 
 	/*
 	 * In both sub-set bound an bound_all cases we return the MAXIMUM
 	 * number of addresses that you COULD get. In reality the sub-set
 	 * bound may have an exclusion list for a given TCB OR in the
 	 * bound-all case a TCB may NOT include the loopback or other
 	 * addresses as well.
 	 */
 	vrf = sctp_find_vrf(vrf_id);
 	if (vrf == NULL) {
 		return (0);
 	}
 	if (inp->sctp_flags & SCTP_PCB_FLAGS_BOUNDALL) {
 		struct sctp_ifn *sctp_ifn;
 		struct sctp_ifa *sctp_ifa;
 
 		LIST_FOREACH(sctp_ifn, &vrf->ifnlist, next_ifn) {
 			LIST_FOREACH(sctp_ifa, &sctp_ifn->ifalist, next_ifa) {
 				/* Count them if they are the right type */
 				switch (sctp_ifa->address.sa.sa_family) {
 #ifdef INET
 				case AF_INET:
 #ifdef INET6
 					if (sctp_is_feature_on(inp, SCTP_PCB_FLAGS_NEEDS_MAPPED_V4))
 						cnt += sizeof(struct sockaddr_in6);
 					else
 						cnt += sizeof(struct sockaddr_in);
 #else
 					cnt += sizeof(struct sockaddr_in);
 #endif
 					break;
 #endif
 #ifdef INET6
 				case AF_INET6:
 					cnt += sizeof(struct sockaddr_in6);
 					break;
 #endif
 				default:
 					break;
 				}
 			}
 		}
 	} else {
 		struct sctp_laddr *laddr;
 
 		LIST_FOREACH(laddr, &inp->sctp_addr_list, sctp_nxt_addr) {
 			switch (laddr->ifa->address.sa.sa_family) {
 #ifdef INET
 			case AF_INET:
 #ifdef INET6
 				if (sctp_is_feature_on(inp, SCTP_PCB_FLAGS_NEEDS_MAPPED_V4))
 					cnt += sizeof(struct sockaddr_in6);
 				else
 					cnt += sizeof(struct sockaddr_in);
 #else
 				cnt += sizeof(struct sockaddr_in);
 #endif
 				break;
 #endif
 #ifdef INET6
 			case AF_INET6:
 				cnt += sizeof(struct sockaddr_in6);
 				break;
 #endif
 			default:
 				break;
 			}
 		}
 	}
 	return (cnt);
 }
 
 static int
 sctp_count_max_addresses(struct sctp_inpcb *inp)
 {
 	int cnt = 0;
 
 	SCTP_IPI_ADDR_RLOCK();
 	/* count addresses for the endpoint's default VRF */
 	cnt = sctp_count_max_addresses_vrf(inp, inp->def_vrf_id);
 	SCTP_IPI_ADDR_RUNLOCK();
 	return (cnt);
 }
 
 static int
 sctp_do_connect_x(struct socket *so, struct sctp_inpcb *inp, void *optval,
     size_t optsize, void *p, int delay)
 {
 	int error = 0;
 	int creat_lock_on = 0;
 	struct sctp_tcb *stcb = NULL;
 	struct sockaddr *sa;
 	unsigned int num_v6 = 0, num_v4 = 0, *totaddrp, totaddr;
 	uint32_t vrf_id;
 	int bad_addresses = 0;
 	sctp_assoc_t *a_id;
 
 	SCTPDBG(SCTP_DEBUG_PCB1, "Connectx called\n");
 
 	if ((inp->sctp_flags & SCTP_PCB_FLAGS_TCPTYPE) &&
 	    (inp->sctp_flags & SCTP_PCB_FLAGS_CONNECTED)) {
 		/* We are already connected AND the TCP model */
 		SCTP_LTRACE_ERR_RET(inp, stcb, NULL, SCTP_FROM_SCTP_USRREQ, EADDRINUSE);
 		return (EADDRINUSE);
 	}
 	if ((inp->sctp_flags & SCTP_PCB_FLAGS_IN_TCPPOOL) &&
 	    (sctp_is_feature_off(inp, SCTP_PCB_FLAGS_PORTREUSE))) {
 		SCTP_LTRACE_ERR_RET(inp, stcb, NULL, SCTP_FROM_SCTP_USRREQ, EINVAL);
 		return (EINVAL);
 	}
 	if (inp->sctp_flags & SCTP_PCB_FLAGS_CONNECTED) {
 		SCTP_INP_RLOCK(inp);
 		stcb = LIST_FIRST(&inp->sctp_asoc_list);
 		SCTP_INP_RUNLOCK(inp);
 	}
 	if (stcb) {
 		SCTP_LTRACE_ERR_RET(inp, stcb, NULL, SCTP_FROM_SCTP_USRREQ, EALREADY);
 		return (EALREADY);
 	}
 	SCTP_INP_INCR_REF(inp);
 	SCTP_ASOC_CREATE_LOCK(inp);
 	creat_lock_on = 1;
 	if ((inp->sctp_flags & SCTP_PCB_FLAGS_SOCKET_ALLGONE) ||
 	    (inp->sctp_flags & SCTP_PCB_FLAGS_SOCKET_GONE)) {
 		SCTP_LTRACE_ERR_RET(inp, stcb, NULL, SCTP_FROM_SCTP_USRREQ, EFAULT);
 		error = EFAULT;
 		goto out_now;
 	}
 	totaddrp = (unsigned int *)optval;
 	totaddr = *totaddrp;
 	sa = (struct sockaddr *)(totaddrp + 1);
 	stcb = sctp_connectx_helper_find(inp, sa, &totaddr, &num_v4, &num_v6, &error, (unsigned int)(optsize - sizeof(int)), &bad_addresses);
 	if ((stcb != NULL) || bad_addresses) {
 		/* Already have or am bring up an association */
 		SCTP_ASOC_CREATE_UNLOCK(inp);
 		creat_lock_on = 0;
 		if (stcb)
 			SCTP_TCB_UNLOCK(stcb);
 		if (bad_addresses == 0) {
 			SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EALREADY);
 			error = EALREADY;
 		}
 		goto out_now;
 	}
 #ifdef INET6
 	if (((inp->sctp_flags & SCTP_PCB_FLAGS_BOUND_V6) == 0) &&
 	    (num_v6 > 0)) {
 		error = EINVAL;
 		goto out_now;
 	}
 	if ((inp->sctp_flags & SCTP_PCB_FLAGS_BOUND_V6) &&
 	    (num_v4 > 0)) {
 		struct in6pcb *inp6;
 
 		inp6 = (struct in6pcb *)inp;
 		if (SCTP_IPV6_V6ONLY(inp6)) {
 			/*
 			 * if IPV6_V6ONLY flag, ignore connections destined
 			 * to a v4 addr or v4-mapped addr
 			 */
 			SCTP_LTRACE_ERR_RET(inp, stcb, NULL, SCTP_FROM_SCTP_USRREQ, EINVAL);
 			error = EINVAL;
 			goto out_now;
 		}
 	}
 #endif				/* INET6 */
 	if ((inp->sctp_flags & SCTP_PCB_FLAGS_UNBOUND) ==
 	    SCTP_PCB_FLAGS_UNBOUND) {
 		/* Bind a ephemeral port */
 		error = sctp_inpcb_bind(so, NULL, NULL, p);
 		if (error) {
 			goto out_now;
 		}
 	}
 	/* FIX ME: do we want to pass in a vrf on the connect call? */
 	vrf_id = inp->def_vrf_id;
 
 
 	/* We are GOOD to go */
 	stcb = sctp_aloc_assoc(inp, sa, &error, 0, vrf_id,
 	    inp->sctp_ep.pre_open_stream_count,
 	    inp->sctp_ep.port,
 	    (struct thread *)p
 	    );
 	if (stcb == NULL) {
 		/* Gak! no memory */
 		goto out_now;
 	}
 	if (stcb->sctp_ep->sctp_flags & SCTP_PCB_FLAGS_TCPTYPE) {
 		stcb->sctp_ep->sctp_flags |= SCTP_PCB_FLAGS_CONNECTED;
 		/* Set the connected flag so we can queue data */
 		soisconnecting(so);
 	}
 	SCTP_SET_STATE(&stcb->asoc, SCTP_STATE_COOKIE_WAIT);
 	/* move to second address */
 	switch (sa->sa_family) {
 #ifdef INET
 	case AF_INET:
 		sa = (struct sockaddr *)((caddr_t)sa + sizeof(struct sockaddr_in));
 		break;
 #endif
 #ifdef INET6
 	case AF_INET6:
 		sa = (struct sockaddr *)((caddr_t)sa + sizeof(struct sockaddr_in6));
 		break;
 #endif
 	default:
 		break;
 	}
 
 	error = 0;
 	sctp_connectx_helper_add(stcb, sa, (totaddr - 1), &error);
 	/* Fill in the return id */
 	if (error) {
 		(void)sctp_free_assoc(inp, stcb, SCTP_PCBFREE_FORCE,
 		    SCTP_FROM_SCTP_USRREQ + SCTP_LOC_7);
 		goto out_now;
 	}
 	a_id = (sctp_assoc_t *)optval;
 	*a_id = sctp_get_associd(stcb);
 
 	/* initialize authentication parameters for the assoc */
 	sctp_initialize_auth_params(inp, stcb);
 
 	if (delay) {
 		/* doing delayed connection */
 		stcb->asoc.delayed_connection = 1;
 		sctp_timer_start(SCTP_TIMER_TYPE_INIT, inp, stcb, stcb->asoc.primary_destination);
 	} else {
 		(void)SCTP_GETTIME_TIMEVAL(&stcb->asoc.time_entered);
 		sctp_send_initiate(inp, stcb, SCTP_SO_LOCKED);
 	}
 	SCTP_TCB_UNLOCK(stcb);
 out_now:
 	if (creat_lock_on) {
 		SCTP_ASOC_CREATE_UNLOCK(inp);
 	}
 	SCTP_INP_DECR_REF(inp);
 	return (error);
 }
 
 #define SCTP_FIND_STCB(inp, stcb, assoc_id) { \
 	if ((inp->sctp_flags & SCTP_PCB_FLAGS_TCPTYPE) ||\
 	    (inp->sctp_flags & SCTP_PCB_FLAGS_IN_TCPPOOL)) { \
 		SCTP_INP_RLOCK(inp); \
 		stcb = LIST_FIRST(&inp->sctp_asoc_list); \
 		if (stcb) { \
 			SCTP_TCB_LOCK(stcb); \
 		} \
 		SCTP_INP_RUNLOCK(inp); \
 	} else if (assoc_id > SCTP_ALL_ASSOC) { \
 		stcb = sctp_findassociation_ep_asocid(inp, assoc_id, 1); \
 		if (stcb == NULL) { \
 			SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, ENOENT); \
 			error = ENOENT; \
 			break; \
 		} \
 	} else { \
 		stcb = NULL; \
 	} \
 }
 
 
 #define SCTP_CHECK_AND_CAST(destp, srcp, type, size) {\
 	if (size < sizeof(type)) { \
 		SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EINVAL); \
 		error = EINVAL; \
 		break; \
 	} else { \
 		destp = (type *)srcp; \
 	} \
 }
 
 static int
 sctp_getopt(struct socket *so, int optname, void *optval, size_t *optsize,
     void *p)
 {
 	struct sctp_inpcb *inp = NULL;
 	int error, val = 0;
 	struct sctp_tcb *stcb = NULL;
 
 	if (optval == NULL) {
 		SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EINVAL);
 		return (EINVAL);
 	}
 	inp = (struct sctp_inpcb *)so->so_pcb;
 	if (inp == NULL) {
 		SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EINVAL);
 		return EINVAL;
 	}
 	error = 0;
 
 	switch (optname) {
 	case SCTP_NODELAY:
 	case SCTP_AUTOCLOSE:
 	case SCTP_EXPLICIT_EOR:
 	case SCTP_AUTO_ASCONF:
 	case SCTP_DISABLE_FRAGMENTS:
 	case SCTP_I_WANT_MAPPED_V4_ADDR:
 	case SCTP_USE_EXT_RCVINFO:
 		SCTP_INP_RLOCK(inp);
 		switch (optname) {
 		case SCTP_DISABLE_FRAGMENTS:
 			val = sctp_is_feature_on(inp, SCTP_PCB_FLAGS_NO_FRAGMENT);
 			break;
 		case SCTP_I_WANT_MAPPED_V4_ADDR:
 			val = sctp_is_feature_on(inp, SCTP_PCB_FLAGS_NEEDS_MAPPED_V4);
 			break;
 		case SCTP_AUTO_ASCONF:
 			if (inp->sctp_flags & SCTP_PCB_FLAGS_BOUNDALL) {
 				/* only valid for bound all sockets */
 				val = sctp_is_feature_on(inp, SCTP_PCB_FLAGS_AUTO_ASCONF);
 			} else {
 				SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EINVAL);
 				error = EINVAL;
 				goto flags_out;
 			}
 			break;
 		case SCTP_EXPLICIT_EOR:
 			val = sctp_is_feature_on(inp, SCTP_PCB_FLAGS_EXPLICIT_EOR);
 			break;
 		case SCTP_NODELAY:
 			val = sctp_is_feature_on(inp, SCTP_PCB_FLAGS_NODELAY);
 			break;
 		case SCTP_USE_EXT_RCVINFO:
 			val = sctp_is_feature_on(inp, SCTP_PCB_FLAGS_EXT_RCVINFO);
 			break;
 		case SCTP_AUTOCLOSE:
 			if (sctp_is_feature_on(inp, SCTP_PCB_FLAGS_AUTOCLOSE))
 				val = TICKS_TO_SEC(inp->sctp_ep.auto_close_time);
 			else
 				val = 0;
 			break;
 
 		default:
 			SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, ENOPROTOOPT);
 			error = ENOPROTOOPT;
 		}		/* end switch (sopt->sopt_name) */
 		if (*optsize < sizeof(val)) {
 			SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EINVAL);
 			error = EINVAL;
 		}
 flags_out:
 		SCTP_INP_RUNLOCK(inp);
 		if (error == 0) {
 			/* return the option value */
 			*(int *)optval = val;
 			*optsize = sizeof(val);
 		}
 		break;
 	case SCTP_GET_PACKET_LOG:
 		{
 #ifdef  SCTP_PACKET_LOGGING
 			uint8_t *target;
 			int ret;
 
 			SCTP_CHECK_AND_CAST(target, optval, uint8_t, *optsize);
 			ret = sctp_copy_out_packet_log(target, (int)*optsize);
 			*optsize = ret;
 #else
 			SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EOPNOTSUPP);
 			error = EOPNOTSUPP;
 #endif
 			break;
 		}
 	case SCTP_REUSE_PORT:
 		{
 			uint32_t *value;
 
 			if ((inp->sctp_flags & SCTP_PCB_FLAGS_UDPTYPE)) {
 				/* Can't do this for a 1-m socket */
 				error = EINVAL;
 				break;
 			}
 			SCTP_CHECK_AND_CAST(value, optval, uint32_t, *optsize);
 			*value = sctp_is_feature_on(inp, SCTP_PCB_FLAGS_PORTREUSE);
 			*optsize = sizeof(uint32_t);
 			break;
 		}
 	case SCTP_PARTIAL_DELIVERY_POINT:
 		{
 			uint32_t *value;
 
 			SCTP_CHECK_AND_CAST(value, optval, uint32_t, *optsize);
 			*value = inp->partial_delivery_point;
 			*optsize = sizeof(uint32_t);
 			break;
 		}
 	case SCTP_FRAGMENT_INTERLEAVE:
 		{
 			uint32_t *value;
 
 			SCTP_CHECK_AND_CAST(value, optval, uint32_t, *optsize);
 			if (sctp_is_feature_on(inp, SCTP_PCB_FLAGS_FRAG_INTERLEAVE)) {
 				if (sctp_is_feature_on(inp, SCTP_PCB_FLAGS_INTERLEAVE_STRMS)) {
 					*value = SCTP_FRAG_LEVEL_2;
 				} else {
 					*value = SCTP_FRAG_LEVEL_1;
 				}
 			} else {
 				*value = SCTP_FRAG_LEVEL_0;
 			}
 			*optsize = sizeof(uint32_t);
 			break;
 		}
 	case SCTP_INTERLEAVING_SUPPORTED:
 		{
 			struct sctp_assoc_value *av;
 
 			SCTP_CHECK_AND_CAST(av, optval, struct sctp_assoc_value, *optsize);
 			SCTP_FIND_STCB(inp, stcb, av->assoc_id);
 
 			if (stcb) {
 				av->assoc_value = stcb->asoc.idata_supported;
 				SCTP_TCB_UNLOCK(stcb);
 			} else {
 				if ((inp->sctp_flags & SCTP_PCB_FLAGS_TCPTYPE) ||
 				    (inp->sctp_flags & SCTP_PCB_FLAGS_IN_TCPPOOL) ||
 				    (av->assoc_id == SCTP_FUTURE_ASSOC)) {
 					SCTP_INP_RLOCK(inp);
 					if (inp->idata_supported) {
 						av->assoc_value = 1;
 					} else {
 						av->assoc_value = 0;
 					}
 					SCTP_INP_RUNLOCK(inp);
 				} else {
 					SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EINVAL);
 					error = EINVAL;
 				}
 			}
 			if (error == 0) {
 				*optsize = sizeof(struct sctp_assoc_value);
 			}
 			break;
 		}
 	case SCTP_CMT_ON_OFF:
 		{
 			struct sctp_assoc_value *av;
 
 			SCTP_CHECK_AND_CAST(av, optval, struct sctp_assoc_value, *optsize);
 			SCTP_FIND_STCB(inp, stcb, av->assoc_id);
 			if (stcb) {
 				av->assoc_value = stcb->asoc.sctp_cmt_on_off;
 				SCTP_TCB_UNLOCK(stcb);
 			} else {
 				if ((inp->sctp_flags & SCTP_PCB_FLAGS_TCPTYPE) ||
 				    (inp->sctp_flags & SCTP_PCB_FLAGS_IN_TCPPOOL) ||
 				    (av->assoc_id == SCTP_FUTURE_ASSOC)) {
 					SCTP_INP_RLOCK(inp);
 					av->assoc_value = inp->sctp_cmt_on_off;
 					SCTP_INP_RUNLOCK(inp);
 				} else {
 					SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EINVAL);
 					error = EINVAL;
 				}
 			}
 			if (error == 0) {
 				*optsize = sizeof(struct sctp_assoc_value);
 			}
 			break;
 		}
 	case SCTP_PLUGGABLE_CC:
 		{
 			struct sctp_assoc_value *av;
 
 			SCTP_CHECK_AND_CAST(av, optval, struct sctp_assoc_value, *optsize);
 			SCTP_FIND_STCB(inp, stcb, av->assoc_id);
 			if (stcb) {
 				av->assoc_value = stcb->asoc.congestion_control_module;
 				SCTP_TCB_UNLOCK(stcb);
 			} else {
 				if ((inp->sctp_flags & SCTP_PCB_FLAGS_TCPTYPE) ||
 				    (inp->sctp_flags & SCTP_PCB_FLAGS_IN_TCPPOOL) ||
 				    (av->assoc_id == SCTP_FUTURE_ASSOC)) {
 					SCTP_INP_RLOCK(inp);
 					av->assoc_value = inp->sctp_ep.sctp_default_cc_module;
 					SCTP_INP_RUNLOCK(inp);
 				} else {
 					SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EINVAL);
 					error = EINVAL;
 				}
 			}
 			if (error == 0) {
 				*optsize = sizeof(struct sctp_assoc_value);
 			}
 			break;
 		}
 	case SCTP_CC_OPTION:
 		{
 			struct sctp_cc_option *cc_opt;
 
 			SCTP_CHECK_AND_CAST(cc_opt, optval, struct sctp_cc_option, *optsize);
 			SCTP_FIND_STCB(inp, stcb, cc_opt->aid_value.assoc_id);
 			if (stcb == NULL) {
 				error = EINVAL;
 			} else {
 				if (stcb->asoc.cc_functions.sctp_cwnd_socket_option == NULL) {
 					error = ENOTSUP;
 				} else {
 					error = (*stcb->asoc.cc_functions.sctp_cwnd_socket_option) (stcb, 0, cc_opt);
 					*optsize = sizeof(struct sctp_cc_option);
 				}
 				SCTP_TCB_UNLOCK(stcb);
 			}
 			break;
 		}
 	case SCTP_PLUGGABLE_SS:
 		{
 			struct sctp_assoc_value *av;
 
 			SCTP_CHECK_AND_CAST(av, optval, struct sctp_assoc_value, *optsize);
 			SCTP_FIND_STCB(inp, stcb, av->assoc_id);
 			if (stcb) {
 				av->assoc_value = stcb->asoc.stream_scheduling_module;
 				SCTP_TCB_UNLOCK(stcb);
 			} else {
 				if ((inp->sctp_flags & SCTP_PCB_FLAGS_TCPTYPE) ||
 				    (inp->sctp_flags & SCTP_PCB_FLAGS_IN_TCPPOOL) ||
 				    (av->assoc_id == SCTP_FUTURE_ASSOC)) {
 					SCTP_INP_RLOCK(inp);
 					av->assoc_value = inp->sctp_ep.sctp_default_ss_module;
 					SCTP_INP_RUNLOCK(inp);
 				} else {
 					SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EINVAL);
 					error = EINVAL;
 				}
 			}
 			if (error == 0) {
 				*optsize = sizeof(struct sctp_assoc_value);
 			}
 			break;
 		}
 	case SCTP_SS_VALUE:
 		{
 			struct sctp_stream_value *av;
 
 			SCTP_CHECK_AND_CAST(av, optval, struct sctp_stream_value, *optsize);
 			SCTP_FIND_STCB(inp, stcb, av->assoc_id);
 			if (stcb) {
 				if ((av->stream_id >= stcb->asoc.streamoutcnt) ||
 				    (stcb->asoc.ss_functions.sctp_ss_get_value(stcb, &stcb->asoc, &stcb->asoc.strmout[av->stream_id],
 				    &av->stream_value) < 0)) {
 					SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EINVAL);
 					error = EINVAL;
 				} else {
 					*optsize = sizeof(struct sctp_stream_value);
 				}
 				SCTP_TCB_UNLOCK(stcb);
 			} else {
 				/*
 				 * Can't get stream value without
 				 * association
 				 */
 				SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EINVAL);
 				error = EINVAL;
 			}
 			break;
 		}
 	case SCTP_GET_ADDR_LEN:
 		{
 			struct sctp_assoc_value *av;
 
 			SCTP_CHECK_AND_CAST(av, optval, struct sctp_assoc_value, *optsize);
 			error = EINVAL;
 #ifdef INET
 			if (av->assoc_value == AF_INET) {
 				av->assoc_value = sizeof(struct sockaddr_in);
 				error = 0;
 			}
 #endif
 #ifdef INET6
 			if (av->assoc_value == AF_INET6) {
 				av->assoc_value = sizeof(struct sockaddr_in6);
 				error = 0;
 			}
 #endif
 			if (error) {
 				SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, error);
 			} else {
 				*optsize = sizeof(struct sctp_assoc_value);
 			}
 			break;
 		}
 	case SCTP_GET_ASSOC_NUMBER:
 		{
 			uint32_t *value, cnt;
 
 			SCTP_CHECK_AND_CAST(value, optval, uint32_t, *optsize);
 			SCTP_INP_RLOCK(inp);
 			if ((inp->sctp_flags & SCTP_PCB_FLAGS_TCPTYPE) ||
 			    (inp->sctp_flags & SCTP_PCB_FLAGS_IN_TCPPOOL)) {
 				/* Can't do this for a 1-1 socket */
 				error = EINVAL;
 				SCTP_INP_RUNLOCK(inp);
 				break;
 			}
 			cnt = 0;
 			LIST_FOREACH(stcb, &inp->sctp_asoc_list, sctp_tcblist) {
 				cnt++;
 			}
 			SCTP_INP_RUNLOCK(inp);
 			*value = cnt;
 			*optsize = sizeof(uint32_t);
 			break;
 		}
 	case SCTP_GET_ASSOC_ID_LIST:
 		{
 			struct sctp_assoc_ids *ids;
 			uint32_t at;
 			size_t limit;
 
 			SCTP_CHECK_AND_CAST(ids, optval, struct sctp_assoc_ids, *optsize);
 			SCTP_INP_RLOCK(inp);
 			if ((inp->sctp_flags & SCTP_PCB_FLAGS_TCPTYPE) ||
 			    (inp->sctp_flags & SCTP_PCB_FLAGS_IN_TCPPOOL)) {
 				/* Can't do this for a 1-1 socket */
 				error = EINVAL;
 				SCTP_INP_RUNLOCK(inp);
 				break;
 			}
 			at = 0;
 			limit = (*optsize - sizeof(uint32_t)) / sizeof(sctp_assoc_t);
 			LIST_FOREACH(stcb, &inp->sctp_asoc_list, sctp_tcblist) {
 				if (at < limit) {
 					ids->gaids_assoc_id[at++] = sctp_get_associd(stcb);
 					if (at == 0) {
 						error = EINVAL;
 						SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, error);
 						break;
 					}
 				} else {
 					error = EINVAL;
 					SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, error);
 					break;
 				}
 			}
 			SCTP_INP_RUNLOCK(inp);
 			if (error == 0) {
 				ids->gaids_number_of_ids = at;
 				*optsize = ((at * sizeof(sctp_assoc_t)) + sizeof(uint32_t));
 			}
 			break;
 		}
 	case SCTP_CONTEXT:
 		{
 			struct sctp_assoc_value *av;
 
 			SCTP_CHECK_AND_CAST(av, optval, struct sctp_assoc_value, *optsize);
 			SCTP_FIND_STCB(inp, stcb, av->assoc_id);
 
 			if (stcb) {
 				av->assoc_value = stcb->asoc.context;
 				SCTP_TCB_UNLOCK(stcb);
 			} else {
 				if ((inp->sctp_flags & SCTP_PCB_FLAGS_TCPTYPE) ||
 				    (inp->sctp_flags & SCTP_PCB_FLAGS_IN_TCPPOOL) ||
 				    (av->assoc_id == SCTP_FUTURE_ASSOC)) {
 					SCTP_INP_RLOCK(inp);
 					av->assoc_value = inp->sctp_context;
 					SCTP_INP_RUNLOCK(inp);
 				} else {
 					SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EINVAL);
 					error = EINVAL;
 				}
 			}
 			if (error == 0) {
 				*optsize = sizeof(struct sctp_assoc_value);
 			}
 			break;
 		}
 	case SCTP_VRF_ID:
 		{
 			uint32_t *default_vrfid;
 
 			SCTP_CHECK_AND_CAST(default_vrfid, optval, uint32_t, *optsize);
 			*default_vrfid = inp->def_vrf_id;
 			*optsize = sizeof(uint32_t);
 			break;
 		}
 	case SCTP_GET_ASOC_VRF:
 		{
 			struct sctp_assoc_value *id;
 
 			SCTP_CHECK_AND_CAST(id, optval, struct sctp_assoc_value, *optsize);
 			SCTP_FIND_STCB(inp, stcb, id->assoc_id);
 			if (stcb == NULL) {
 				error = EINVAL;
 				SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, error);
 			} else {
 				id->assoc_value = stcb->asoc.vrf_id;
 				*optsize = sizeof(struct sctp_assoc_value);
 			}
 			break;
 		}
 	case SCTP_GET_VRF_IDS:
 		{
 			SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EOPNOTSUPP);
 			error = EOPNOTSUPP;
 			break;
 		}
 	case SCTP_GET_NONCE_VALUES:
 		{
 			struct sctp_get_nonce_values *gnv;
 
 			SCTP_CHECK_AND_CAST(gnv, optval, struct sctp_get_nonce_values, *optsize);
 			SCTP_FIND_STCB(inp, stcb, gnv->gn_assoc_id);
 
 			if (stcb) {
 				gnv->gn_peers_tag = stcb->asoc.peer_vtag;
 				gnv->gn_local_tag = stcb->asoc.my_vtag;
 				SCTP_TCB_UNLOCK(stcb);
 				*optsize = sizeof(struct sctp_get_nonce_values);
 			} else {
 				SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, ENOTCONN);
 				error = ENOTCONN;
 			}
 			break;
 		}
 	case SCTP_DELAYED_SACK:
 		{
 			struct sctp_sack_info *sack;
 
 			SCTP_CHECK_AND_CAST(sack, optval, struct sctp_sack_info, *optsize);
 			SCTP_FIND_STCB(inp, stcb, sack->sack_assoc_id);
 			if (stcb) {
 				sack->sack_delay = stcb->asoc.delayed_ack;
 				sack->sack_freq = stcb->asoc.sack_freq;
 				SCTP_TCB_UNLOCK(stcb);
 			} else {
 				if ((inp->sctp_flags & SCTP_PCB_FLAGS_TCPTYPE) ||
 				    (inp->sctp_flags & SCTP_PCB_FLAGS_IN_TCPPOOL) ||
 				    (sack->sack_assoc_id == SCTP_FUTURE_ASSOC)) {
 					SCTP_INP_RLOCK(inp);
 					sack->sack_delay = TICKS_TO_MSEC(inp->sctp_ep.sctp_timeoutticks[SCTP_TIMER_RECV]);
 					sack->sack_freq = inp->sctp_ep.sctp_sack_freq;
 					SCTP_INP_RUNLOCK(inp);
 				} else {
 					SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EINVAL);
 					error = EINVAL;
 				}
 			}
 			if (error == 0) {
 				*optsize = sizeof(struct sctp_sack_info);
 			}
 			break;
 		}
 	case SCTP_GET_SNDBUF_USE:
 		{
 			struct sctp_sockstat *ss;
 
 			SCTP_CHECK_AND_CAST(ss, optval, struct sctp_sockstat, *optsize);
 			SCTP_FIND_STCB(inp, stcb, ss->ss_assoc_id);
 
 			if (stcb) {
 				ss->ss_total_sndbuf = stcb->asoc.total_output_queue_size;
 				ss->ss_total_recv_buf = (stcb->asoc.size_on_reasm_queue +
 				    stcb->asoc.size_on_all_streams);
 				SCTP_TCB_UNLOCK(stcb);
 				*optsize = sizeof(struct sctp_sockstat);
 			} else {
 				SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, ENOTCONN);
 				error = ENOTCONN;
 			}
 			break;
 		}
 	case SCTP_MAX_BURST:
 		{
 			struct sctp_assoc_value *av;
 
 			SCTP_CHECK_AND_CAST(av, optval, struct sctp_assoc_value, *optsize);
 			SCTP_FIND_STCB(inp, stcb, av->assoc_id);
 
 			if (stcb) {
 				av->assoc_value = stcb->asoc.max_burst;
 				SCTP_TCB_UNLOCK(stcb);
 			} else {
 				if ((inp->sctp_flags & SCTP_PCB_FLAGS_TCPTYPE) ||
 				    (inp->sctp_flags & SCTP_PCB_FLAGS_IN_TCPPOOL) ||
 				    (av->assoc_id == SCTP_FUTURE_ASSOC)) {
 					SCTP_INP_RLOCK(inp);
 					av->assoc_value = inp->sctp_ep.max_burst;
 					SCTP_INP_RUNLOCK(inp);
 				} else {
 					SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EINVAL);
 					error = EINVAL;
 				}
 			}
 			if (error == 0) {
 				*optsize = sizeof(struct sctp_assoc_value);
 			}
 			break;
 		}
 	case SCTP_MAXSEG:
 		{
 			struct sctp_assoc_value *av;
 			int ovh;
 
 			SCTP_CHECK_AND_CAST(av, optval, struct sctp_assoc_value, *optsize);
 			SCTP_FIND_STCB(inp, stcb, av->assoc_id);
 
 			if (stcb) {
 				av->assoc_value = sctp_get_frag_point(stcb, &stcb->asoc);
 				SCTP_TCB_UNLOCK(stcb);
 			} else {
 				if ((inp->sctp_flags & SCTP_PCB_FLAGS_TCPTYPE) ||
 				    (inp->sctp_flags & SCTP_PCB_FLAGS_IN_TCPPOOL) ||
 				    (av->assoc_id == SCTP_FUTURE_ASSOC)) {
 					SCTP_INP_RLOCK(inp);
 					if (inp->sctp_flags & SCTP_PCB_FLAGS_BOUND_V6) {
 						ovh = SCTP_MED_OVERHEAD;
 					} else {
 						ovh = SCTP_MED_V4_OVERHEAD;
 					}
 					if (inp->sctp_frag_point >= SCTP_DEFAULT_MAXSEGMENT)
 						av->assoc_value = 0;
 					else
 						av->assoc_value = inp->sctp_frag_point - ovh;
 					SCTP_INP_RUNLOCK(inp);
 				} else {
 					SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EINVAL);
 					error = EINVAL;
 				}
 			}
 			if (error == 0) {
 				*optsize = sizeof(struct sctp_assoc_value);
 			}
 			break;
 		}
 	case SCTP_GET_STAT_LOG:
 		error = sctp_fill_stat_log(optval, optsize);
 		break;
 	case SCTP_EVENTS:
 		{
 			struct sctp_event_subscribe *events;
 
 			SCTP_CHECK_AND_CAST(events, optval, struct sctp_event_subscribe, *optsize);
 			memset(events, 0, sizeof(struct sctp_event_subscribe));
 			SCTP_INP_RLOCK(inp);
 			if (sctp_is_feature_on(inp, SCTP_PCB_FLAGS_RECVDATAIOEVNT))
 				events->sctp_data_io_event = 1;
 
 			if (sctp_is_feature_on(inp, SCTP_PCB_FLAGS_RECVASSOCEVNT))
 				events->sctp_association_event = 1;
 
 			if (sctp_is_feature_on(inp, SCTP_PCB_FLAGS_RECVPADDREVNT))
 				events->sctp_address_event = 1;
 
 			if (sctp_is_feature_on(inp, SCTP_PCB_FLAGS_RECVSENDFAILEVNT))
 				events->sctp_send_failure_event = 1;
 
 			if (sctp_is_feature_on(inp, SCTP_PCB_FLAGS_RECVPEERERR))
 				events->sctp_peer_error_event = 1;
 
 			if (sctp_is_feature_on(inp, SCTP_PCB_FLAGS_RECVSHUTDOWNEVNT))
 				events->sctp_shutdown_event = 1;
 
 			if (sctp_is_feature_on(inp, SCTP_PCB_FLAGS_PDAPIEVNT))
 				events->sctp_partial_delivery_event = 1;
 
 			if (sctp_is_feature_on(inp, SCTP_PCB_FLAGS_ADAPTATIONEVNT))
 				events->sctp_adaptation_layer_event = 1;
 
 			if (sctp_is_feature_on(inp, SCTP_PCB_FLAGS_AUTHEVNT))
 				events->sctp_authentication_event = 1;
 
 			if (sctp_is_feature_on(inp, SCTP_PCB_FLAGS_DRYEVNT))
 				events->sctp_sender_dry_event = 1;
 
 			if (sctp_is_feature_on(inp, SCTP_PCB_FLAGS_STREAM_RESETEVNT))
 				events->sctp_stream_reset_event = 1;
 			SCTP_INP_RUNLOCK(inp);
 			*optsize = sizeof(struct sctp_event_subscribe);
 			break;
 		}
 	case SCTP_ADAPTATION_LAYER:
 		{
 			uint32_t *value;
 
 			SCTP_CHECK_AND_CAST(value, optval, uint32_t, *optsize);
 
 			SCTP_INP_RLOCK(inp);
 			*value = inp->sctp_ep.adaptation_layer_indicator;
 			SCTP_INP_RUNLOCK(inp);
 			*optsize = sizeof(uint32_t);
 			break;
 		}
 	case SCTP_SET_INITIAL_DBG_SEQ:
 		{
 			uint32_t *value;
 
 			SCTP_CHECK_AND_CAST(value, optval, uint32_t, *optsize);
 			SCTP_INP_RLOCK(inp);
 			*value = inp->sctp_ep.initial_sequence_debug;
 			SCTP_INP_RUNLOCK(inp);
 			*optsize = sizeof(uint32_t);
 			break;
 		}
 	case SCTP_GET_LOCAL_ADDR_SIZE:
 		{
 			uint32_t *value;
 
 			SCTP_CHECK_AND_CAST(value, optval, uint32_t, *optsize);
 			SCTP_INP_RLOCK(inp);
 			*value = sctp_count_max_addresses(inp);
 			SCTP_INP_RUNLOCK(inp);
 			*optsize = sizeof(uint32_t);
 			break;
 		}
 	case SCTP_GET_REMOTE_ADDR_SIZE:
 		{
 			uint32_t *value;
 			size_t size;
 			struct sctp_nets *net;
 
 			SCTP_CHECK_AND_CAST(value, optval, uint32_t, *optsize);
 			/* FIXME MT: change to sctp_assoc_value? */
 			SCTP_FIND_STCB(inp, stcb, (sctp_assoc_t)*value);
 
 			if (stcb) {
 				size = 0;
 				/* Count the sizes */
 				TAILQ_FOREACH(net, &stcb->asoc.nets, sctp_next) {
 					switch (net->ro._l_addr.sa.sa_family) {
 #ifdef INET
 					case AF_INET:
 #ifdef INET6
 						if (sctp_is_feature_on(inp, SCTP_PCB_FLAGS_NEEDS_MAPPED_V4)) {
 							size += sizeof(struct sockaddr_in6);
 						} else {
 							size += sizeof(struct sockaddr_in);
 						}
 #else
 						size += sizeof(struct sockaddr_in);
 #endif
 						break;
 #endif
 #ifdef INET6
 					case AF_INET6:
 						size += sizeof(struct sockaddr_in6);
 						break;
 #endif
 					default:
 						break;
 					}
 				}
 				SCTP_TCB_UNLOCK(stcb);
 				*value = (uint32_t)size;
 				*optsize = sizeof(uint32_t);
 			} else {
 				SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, ENOTCONN);
 				error = ENOTCONN;
 			}
 			break;
 		}
 	case SCTP_GET_PEER_ADDRESSES:
 		/*
 		 * Get the address information, an array is passed in to
 		 * fill up we pack it.
 		 */
 		{
 			size_t cpsz, left;
 			struct sockaddr_storage *sas;
 			struct sctp_nets *net;
 			struct sctp_getaddresses *saddr;
 
 			SCTP_CHECK_AND_CAST(saddr, optval, struct sctp_getaddresses, *optsize);
 			SCTP_FIND_STCB(inp, stcb, saddr->sget_assoc_id);
 
 			if (stcb) {
 				left = (*optsize) - sizeof(struct sctp_getaddresses);
 				*optsize = sizeof(struct sctp_getaddresses);
 				sas = (struct sockaddr_storage *)&saddr->addr[0];
 
 				TAILQ_FOREACH(net, &stcb->asoc.nets, sctp_next) {
 					switch (net->ro._l_addr.sa.sa_family) {
 #ifdef INET
 					case AF_INET:
 #ifdef INET6
 						if (sctp_is_feature_on(inp, SCTP_PCB_FLAGS_NEEDS_MAPPED_V4)) {
 							cpsz = sizeof(struct sockaddr_in6);
 						} else {
 							cpsz = sizeof(struct sockaddr_in);
 						}
 #else
 						cpsz = sizeof(struct sockaddr_in);
 #endif
 						break;
 #endif
 #ifdef INET6
 					case AF_INET6:
 						cpsz = sizeof(struct sockaddr_in6);
 						break;
 #endif
 					default:
 						cpsz = 0;
 						break;
 					}
 					if (cpsz == 0) {
 						break;
 					}
 					if (left < cpsz) {
 						/* not enough room. */
 						break;
 					}
 #if defined(INET) && defined(INET6)
 					if ((sctp_is_feature_on(inp, SCTP_PCB_FLAGS_NEEDS_MAPPED_V4)) &&
 					    (net->ro._l_addr.sa.sa_family == AF_INET)) {
 						/* Must map the address */
 						in6_sin_2_v4mapsin6(&net->ro._l_addr.sin,
 						    (struct sockaddr_in6 *)sas);
 					} else {
 						memcpy(sas, &net->ro._l_addr, cpsz);
 					}
 #else
 					memcpy(sas, &net->ro._l_addr, cpsz);
 #endif
 					((struct sockaddr_in *)sas)->sin_port = stcb->rport;
 
 					sas = (struct sockaddr_storage *)((caddr_t)sas + cpsz);
 					left -= cpsz;
 					*optsize += cpsz;
 				}
 				SCTP_TCB_UNLOCK(stcb);
 			} else {
 				SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, ENOENT);
 				error = ENOENT;
 			}
 			break;
 		}
 	case SCTP_GET_LOCAL_ADDRESSES:
 		{
 			size_t limit, actual;
 			struct sockaddr_storage *sas;
 			struct sctp_getaddresses *saddr;
 
 			SCTP_CHECK_AND_CAST(saddr, optval, struct sctp_getaddresses, *optsize);
 			SCTP_FIND_STCB(inp, stcb, saddr->sget_assoc_id);
 
 			sas = (struct sockaddr_storage *)&saddr->addr[0];
 			limit = *optsize - sizeof(sctp_assoc_t);
 			actual = sctp_fill_up_addresses(inp, stcb, limit, sas);
 			if (stcb) {
 				SCTP_TCB_UNLOCK(stcb);
 			}
 			*optsize = sizeof(struct sockaddr_storage) + actual;
 			break;
 		}
 	case SCTP_PEER_ADDR_PARAMS:
 		{
 			struct sctp_paddrparams *paddrp;
 			struct sctp_nets *net;
 			struct sockaddr *addr;
 #if defined(INET) && defined(INET6)
 			struct sockaddr_in sin_store;
 #endif
 
 			SCTP_CHECK_AND_CAST(paddrp, optval, struct sctp_paddrparams, *optsize);
 			SCTP_FIND_STCB(inp, stcb, paddrp->spp_assoc_id);
 
 #if defined(INET) && defined(INET6)
 			if (paddrp->spp_address.ss_family == AF_INET6) {
 				struct sockaddr_in6 *sin6;
 
 				sin6 = (struct sockaddr_in6 *)&paddrp->spp_address;
 				if (IN6_IS_ADDR_V4MAPPED(&sin6->sin6_addr)) {
 					in6_sin6_2_sin(&sin_store, sin6);
 					addr = (struct sockaddr *)&sin_store;
 				} else {
 					addr = (struct sockaddr *)&paddrp->spp_address;
 				}
 			} else {
 				addr = (struct sockaddr *)&paddrp->spp_address;
 			}
 #else
 			addr = (struct sockaddr *)&paddrp->spp_address;
 #endif
 			if (stcb != NULL) {
 				net = sctp_findnet(stcb, addr);
 			} else {
 				/*
 				 * We increment here since
 				 * sctp_findassociation_ep_addr() wil do a
 				 * decrement if it finds the stcb as long as
 				 * the locked tcb (last argument) is NOT a
 				 * TCB.. aka NULL.
 				 */
 				net = NULL;
 				SCTP_INP_INCR_REF(inp);
 				stcb = sctp_findassociation_ep_addr(&inp, addr, &net, NULL, NULL);
 				if (stcb == NULL) {
 					SCTP_INP_DECR_REF(inp);
 				}
 			}
 			if ((stcb != NULL) && (net == NULL)) {
 #ifdef INET
 				if (addr->sa_family == AF_INET) {
 					struct sockaddr_in *sin;
 
 					sin = (struct sockaddr_in *)addr;
 					if (sin->sin_addr.s_addr != INADDR_ANY) {
 						error = EINVAL;
 						SCTP_TCB_UNLOCK(stcb);
 						SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, error);
 						break;
 					}
 				} else
 #endif
 #ifdef INET6
 				if (addr->sa_family == AF_INET6) {
 					struct sockaddr_in6 *sin6;
 
 					sin6 = (struct sockaddr_in6 *)addr;
 					if (!IN6_IS_ADDR_UNSPECIFIED(&sin6->sin6_addr)) {
 						error = EINVAL;
 						SCTP_TCB_UNLOCK(stcb);
 						SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, error);
 						break;
 					}
 				} else
 #endif
 				{
 					error = EAFNOSUPPORT;
 					SCTP_TCB_UNLOCK(stcb);
 					SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, error);
 					break;
 				}
 			}
 			if (stcb != NULL) {
 				/* Applies to the specific association */
 				paddrp->spp_flags = 0;
 				if (net != NULL) {
 					paddrp->spp_hbinterval = net->heart_beat_delay;
 					paddrp->spp_pathmaxrxt = net->failure_threshold;
 					paddrp->spp_pathmtu = net->mtu;
 					switch (net->ro._l_addr.sa.sa_family) {
 #ifdef INET
 					case AF_INET:
 						paddrp->spp_pathmtu -= SCTP_MIN_V4_OVERHEAD;
 						break;
 #endif
 #ifdef INET6
 					case AF_INET6:
 						paddrp->spp_pathmtu -= SCTP_MIN_V4_OVERHEAD;
 						break;
 #endif
 					default:
 						break;
 					}
 					/* get flags for HB */
 					if (net->dest_state & SCTP_ADDR_NOHB) {
 						paddrp->spp_flags |= SPP_HB_DISABLE;
 					} else {
 						paddrp->spp_flags |= SPP_HB_ENABLE;
 					}
 					/* get flags for PMTU */
 					if (net->dest_state & SCTP_ADDR_NO_PMTUD) {
 						paddrp->spp_flags |= SPP_PMTUD_DISABLE;
 					} else {
 						paddrp->spp_flags |= SPP_PMTUD_ENABLE;
 					}
 					if (net->dscp & 0x01) {
 						paddrp->spp_dscp = net->dscp & 0xfc;
 						paddrp->spp_flags |= SPP_DSCP;
 					}
 #ifdef INET6
 					if ((net->ro._l_addr.sa.sa_family == AF_INET6) &&
 					    (net->flowlabel & 0x80000000)) {
 						paddrp->spp_ipv6_flowlabel = net->flowlabel & 0x000fffff;
 						paddrp->spp_flags |= SPP_IPV6_FLOWLABEL;
 					}
 #endif
 				} else {
 					/*
 					 * No destination so return default
 					 * value
 					 */
 					paddrp->spp_pathmaxrxt = stcb->asoc.def_net_failure;
 					paddrp->spp_pathmtu = 0;
 					if (stcb->asoc.default_dscp & 0x01) {
 						paddrp->spp_dscp = stcb->asoc.default_dscp & 0xfc;
 						paddrp->spp_flags |= SPP_DSCP;
 					}
 #ifdef INET6
 					if (stcb->asoc.default_flowlabel & 0x80000000) {
 						paddrp->spp_ipv6_flowlabel = stcb->asoc.default_flowlabel & 0x000fffff;
 						paddrp->spp_flags |= SPP_IPV6_FLOWLABEL;
 					}
 #endif
 					/* default settings should be these */
 					if (sctp_stcb_is_feature_on(inp, stcb, SCTP_PCB_FLAGS_DONOT_HEARTBEAT)) {
 						paddrp->spp_flags |= SPP_HB_DISABLE;
 					} else {
 						paddrp->spp_flags |= SPP_HB_ENABLE;
 					}
 					if (sctp_stcb_is_feature_on(inp, stcb, SCTP_PCB_FLAGS_DO_NOT_PMTUD)) {
 						paddrp->spp_flags |= SPP_PMTUD_DISABLE;
 					} else {
 						paddrp->spp_flags |= SPP_PMTUD_ENABLE;
 					}
 					paddrp->spp_hbinterval = stcb->asoc.heart_beat_delay;
 				}
 				paddrp->spp_assoc_id = sctp_get_associd(stcb);
 				SCTP_TCB_UNLOCK(stcb);
 			} else {
 				if ((inp->sctp_flags & SCTP_PCB_FLAGS_TCPTYPE) ||
 				    (inp->sctp_flags & SCTP_PCB_FLAGS_IN_TCPPOOL) ||
 				    (paddrp->spp_assoc_id == SCTP_FUTURE_ASSOC)) {
 					/* Use endpoint defaults */
 					SCTP_INP_RLOCK(inp);
 					paddrp->spp_pathmaxrxt = inp->sctp_ep.def_net_failure;
 					paddrp->spp_hbinterval = TICKS_TO_MSEC(inp->sctp_ep.sctp_timeoutticks[SCTP_TIMER_HEARTBEAT]);
 					paddrp->spp_assoc_id = SCTP_FUTURE_ASSOC;
 					/* get inp's default */
 					if (inp->sctp_ep.default_dscp & 0x01) {
 						paddrp->spp_dscp = inp->sctp_ep.default_dscp & 0xfc;
 						paddrp->spp_flags |= SPP_DSCP;
 					}
 #ifdef INET6
 					if ((inp->sctp_flags & SCTP_PCB_FLAGS_BOUND_V6) &&
 					    (inp->sctp_ep.default_flowlabel & 0x80000000)) {
 						paddrp->spp_ipv6_flowlabel = inp->sctp_ep.default_flowlabel & 0x000fffff;
 						paddrp->spp_flags |= SPP_IPV6_FLOWLABEL;
 					}
 #endif
 					/* can't return this */
 					paddrp->spp_pathmtu = 0;
 
 					if (sctp_is_feature_off(inp, SCTP_PCB_FLAGS_DONOT_HEARTBEAT)) {
 						paddrp->spp_flags |= SPP_HB_ENABLE;
 					} else {
 						paddrp->spp_flags |= SPP_HB_DISABLE;
 					}
 					if (sctp_is_feature_off(inp, SCTP_PCB_FLAGS_DO_NOT_PMTUD)) {
 						paddrp->spp_flags |= SPP_PMTUD_ENABLE;
 					} else {
 						paddrp->spp_flags |= SPP_PMTUD_DISABLE;
 					}
 					SCTP_INP_RUNLOCK(inp);
 				} else {
 					SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EINVAL);
 					error = EINVAL;
 				}
 			}
 			if (error == 0) {
 				*optsize = sizeof(struct sctp_paddrparams);
 			}
 			break;
 		}
 	case SCTP_GET_PEER_ADDR_INFO:
 		{
 			struct sctp_paddrinfo *paddri;
 			struct sctp_nets *net;
 			struct sockaddr *addr;
 #if defined(INET) && defined(INET6)
 			struct sockaddr_in sin_store;
 #endif
 
 			SCTP_CHECK_AND_CAST(paddri, optval, struct sctp_paddrinfo, *optsize);
 			SCTP_FIND_STCB(inp, stcb, paddri->spinfo_assoc_id);
 
 #if defined(INET) && defined(INET6)
 			if (paddri->spinfo_address.ss_family == AF_INET6) {
 				struct sockaddr_in6 *sin6;
 
 				sin6 = (struct sockaddr_in6 *)&paddri->spinfo_address;
 				if (IN6_IS_ADDR_V4MAPPED(&sin6->sin6_addr)) {
 					in6_sin6_2_sin(&sin_store, sin6);
 					addr = (struct sockaddr *)&sin_store;
 				} else {
 					addr = (struct sockaddr *)&paddri->spinfo_address;
 				}
 			} else {
 				addr = (struct sockaddr *)&paddri->spinfo_address;
 			}
 #else
 			addr = (struct sockaddr *)&paddri->spinfo_address;
 #endif
 			if (stcb != NULL) {
 				net = sctp_findnet(stcb, addr);
 			} else {
 				/*
 				 * We increment here since
 				 * sctp_findassociation_ep_addr() wil do a
 				 * decrement if it finds the stcb as long as
 				 * the locked tcb (last argument) is NOT a
 				 * TCB.. aka NULL.
 				 */
 				net = NULL;
 				SCTP_INP_INCR_REF(inp);
 				stcb = sctp_findassociation_ep_addr(&inp, addr, &net, NULL, NULL);
 				if (stcb == NULL) {
 					SCTP_INP_DECR_REF(inp);
 				}
 			}
 
 			if ((stcb != NULL) && (net != NULL)) {
 				if (net->dest_state & SCTP_ADDR_UNCONFIRMED) {
 					/* It's unconfirmed */
 					paddri->spinfo_state = SCTP_UNCONFIRMED;
 				} else if (net->dest_state & SCTP_ADDR_REACHABLE) {
 					/* It's active */
 					paddri->spinfo_state = SCTP_ACTIVE;
 				} else {
 					/* It's inactive */
 					paddri->spinfo_state = SCTP_INACTIVE;
 				}
 				paddri->spinfo_cwnd = net->cwnd;
 				paddri->spinfo_srtt = net->lastsa >> SCTP_RTT_SHIFT;
 				paddri->spinfo_rto = net->RTO;
 				paddri->spinfo_assoc_id = sctp_get_associd(stcb);
 				paddri->spinfo_mtu = net->mtu;
 				switch (addr->sa_family) {
 #if defined(INET)
 				case AF_INET:
 					paddri->spinfo_mtu -= SCTP_MIN_V4_OVERHEAD;
 					break;
 #endif
 #if defined(INET6)
 				case AF_INET6:
 					paddri->spinfo_mtu -= SCTP_MIN_OVERHEAD;
 					break;
 #endif
 				default:
 					break;
 				}
 				SCTP_TCB_UNLOCK(stcb);
 				*optsize = sizeof(struct sctp_paddrinfo);
 			} else {
 				if (stcb != NULL) {
 					SCTP_TCB_UNLOCK(stcb);
 				}
 				SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, ENOENT);
 				error = ENOENT;
 			}
 			break;
 		}
 	case SCTP_PCB_STATUS:
 		{
 			struct sctp_pcbinfo *spcb;
 
 			SCTP_CHECK_AND_CAST(spcb, optval, struct sctp_pcbinfo, *optsize);
 			sctp_fill_pcbinfo(spcb);
 			*optsize = sizeof(struct sctp_pcbinfo);
 			break;
 		}
 	case SCTP_STATUS:
 		{
 			struct sctp_nets *net;
 			struct sctp_status *sstat;
 
 			SCTP_CHECK_AND_CAST(sstat, optval, struct sctp_status, *optsize);
 			SCTP_FIND_STCB(inp, stcb, sstat->sstat_assoc_id);
 
 			if (stcb == NULL) {
 				SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EINVAL);
 				error = EINVAL;
 				break;
 			}
 			sstat->sstat_state = sctp_map_assoc_state(stcb->asoc.state);
 			sstat->sstat_assoc_id = sctp_get_associd(stcb);
 			sstat->sstat_rwnd = stcb->asoc.peers_rwnd;
 			sstat->sstat_unackdata = stcb->asoc.sent_queue_cnt;
 			/*
 			 * We can't include chunks that have been passed to
 			 * the socket layer. Only things in queue.
 			 */
 			sstat->sstat_penddata = (stcb->asoc.cnt_on_reasm_queue +
 			    stcb->asoc.cnt_on_all_streams);
 
 
 			sstat->sstat_instrms = stcb->asoc.streamincnt;
 			sstat->sstat_outstrms = stcb->asoc.streamoutcnt;
 			sstat->sstat_fragmentation_point = sctp_get_frag_point(stcb, &stcb->asoc);
 			memcpy(&sstat->sstat_primary.spinfo_address,
 			    &stcb->asoc.primary_destination->ro._l_addr,
 			    ((struct sockaddr *)(&stcb->asoc.primary_destination->ro._l_addr))->sa_len);
 			net = stcb->asoc.primary_destination;
 			((struct sockaddr_in *)&sstat->sstat_primary.spinfo_address)->sin_port = stcb->rport;
 			/*
 			 * Again the user can get info from sctp_constants.h
 			 * for what the state of the network is.
 			 */
 			if (net->dest_state & SCTP_ADDR_UNCONFIRMED) {
 				/* It's unconfirmed */
 				sstat->sstat_primary.spinfo_state = SCTP_UNCONFIRMED;
 			} else if (net->dest_state & SCTP_ADDR_REACHABLE) {
 				/* It's active */
 				sstat->sstat_primary.spinfo_state = SCTP_ACTIVE;
 			} else {
 				/* It's inactive */
 				sstat->sstat_primary.spinfo_state = SCTP_INACTIVE;
 			}
 			sstat->sstat_primary.spinfo_cwnd = net->cwnd;
 			sstat->sstat_primary.spinfo_srtt = net->lastsa >> SCTP_RTT_SHIFT;
 			sstat->sstat_primary.spinfo_rto = net->RTO;
 			sstat->sstat_primary.spinfo_mtu = net->mtu;
 			switch (stcb->asoc.primary_destination->ro._l_addr.sa.sa_family) {
 #if defined(INET)
 			case AF_INET:
 				sstat->sstat_primary.spinfo_mtu -= SCTP_MIN_V4_OVERHEAD;
 				break;
 #endif
 #if defined(INET6)
 			case AF_INET6:
 				sstat->sstat_primary.spinfo_mtu -= SCTP_MIN_OVERHEAD;
 				break;
 #endif
 			default:
 				break;
 			}
 			sstat->sstat_primary.spinfo_assoc_id = sctp_get_associd(stcb);
 			SCTP_TCB_UNLOCK(stcb);
 			*optsize = sizeof(struct sctp_status);
 			break;
 		}
 	case SCTP_RTOINFO:
 		{
 			struct sctp_rtoinfo *srto;
 
 			SCTP_CHECK_AND_CAST(srto, optval, struct sctp_rtoinfo, *optsize);
 			SCTP_FIND_STCB(inp, stcb, srto->srto_assoc_id);
 
 			if (stcb) {
 				srto->srto_initial = stcb->asoc.initial_rto;
 				srto->srto_max = stcb->asoc.maxrto;
 				srto->srto_min = stcb->asoc.minrto;
 				SCTP_TCB_UNLOCK(stcb);
 			} else {
 				if ((inp->sctp_flags & SCTP_PCB_FLAGS_TCPTYPE) ||
 				    (inp->sctp_flags & SCTP_PCB_FLAGS_IN_TCPPOOL) ||
 				    (srto->srto_assoc_id == SCTP_FUTURE_ASSOC)) {
 					SCTP_INP_RLOCK(inp);
 					srto->srto_initial = inp->sctp_ep.initial_rto;
 					srto->srto_max = inp->sctp_ep.sctp_maxrto;
 					srto->srto_min = inp->sctp_ep.sctp_minrto;
 					SCTP_INP_RUNLOCK(inp);
 				} else {
 					SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EINVAL);
 					error = EINVAL;
 				}
 			}
 			if (error == 0) {
 				*optsize = sizeof(struct sctp_rtoinfo);
 			}
 			break;
 		}
 	case SCTP_TIMEOUTS:
 		{
 			struct sctp_timeouts *stimo;
 
 			SCTP_CHECK_AND_CAST(stimo, optval, struct sctp_timeouts, *optsize);
 			SCTP_FIND_STCB(inp, stcb, stimo->stimo_assoc_id);
 
 			if (stcb) {
 				stimo->stimo_init = stcb->asoc.timoinit;
 				stimo->stimo_data = stcb->asoc.timodata;
 				stimo->stimo_sack = stcb->asoc.timosack;
 				stimo->stimo_shutdown = stcb->asoc.timoshutdown;
 				stimo->stimo_heartbeat = stcb->asoc.timoheartbeat;
 				stimo->stimo_cookie = stcb->asoc.timocookie;
 				stimo->stimo_shutdownack = stcb->asoc.timoshutdownack;
 				SCTP_TCB_UNLOCK(stcb);
 				*optsize = sizeof(struct sctp_timeouts);
 			} else {
 				SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EINVAL);
 				error = EINVAL;
 			}
 			break;
 		}
 	case SCTP_ASSOCINFO:
 		{
 			struct sctp_assocparams *sasoc;
 
 			SCTP_CHECK_AND_CAST(sasoc, optval, struct sctp_assocparams, *optsize);
 			SCTP_FIND_STCB(inp, stcb, sasoc->sasoc_assoc_id);
 
 			if (stcb) {
 				sasoc->sasoc_cookie_life = TICKS_TO_MSEC(stcb->asoc.cookie_life);
 				sasoc->sasoc_asocmaxrxt = stcb->asoc.max_send_times;
 				sasoc->sasoc_number_peer_destinations = stcb->asoc.numnets;
 				sasoc->sasoc_peer_rwnd = stcb->asoc.peers_rwnd;
 				sasoc->sasoc_local_rwnd = stcb->asoc.my_rwnd;
 				SCTP_TCB_UNLOCK(stcb);
 			} else {
 				if ((inp->sctp_flags & SCTP_PCB_FLAGS_TCPTYPE) ||
 				    (inp->sctp_flags & SCTP_PCB_FLAGS_IN_TCPPOOL) ||
 				    (sasoc->sasoc_assoc_id == SCTP_FUTURE_ASSOC)) {
 					SCTP_INP_RLOCK(inp);
 					sasoc->sasoc_cookie_life = TICKS_TO_MSEC(inp->sctp_ep.def_cookie_life);
 					sasoc->sasoc_asocmaxrxt = inp->sctp_ep.max_send_times;
 					sasoc->sasoc_number_peer_destinations = 0;
 					sasoc->sasoc_peer_rwnd = 0;
 					sasoc->sasoc_local_rwnd = sbspace(&inp->sctp_socket->so_rcv);
 					SCTP_INP_RUNLOCK(inp);
 				} else {
 					SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EINVAL);
 					error = EINVAL;
 				}
 			}
 			if (error == 0) {
 				*optsize = sizeof(struct sctp_assocparams);
 			}
 			break;
 		}
 	case SCTP_DEFAULT_SEND_PARAM:
 		{
 			struct sctp_sndrcvinfo *s_info;
 
 			SCTP_CHECK_AND_CAST(s_info, optval, struct sctp_sndrcvinfo, *optsize);
 			SCTP_FIND_STCB(inp, stcb, s_info->sinfo_assoc_id);
 
 			if (stcb) {
 				memcpy(s_info, &stcb->asoc.def_send, sizeof(stcb->asoc.def_send));
 				SCTP_TCB_UNLOCK(stcb);
 			} else {
 				if ((inp->sctp_flags & SCTP_PCB_FLAGS_TCPTYPE) ||
 				    (inp->sctp_flags & SCTP_PCB_FLAGS_IN_TCPPOOL) ||
 				    (s_info->sinfo_assoc_id == SCTP_FUTURE_ASSOC)) {
 					SCTP_INP_RLOCK(inp);
 					memcpy(s_info, &inp->def_send, sizeof(inp->def_send));
 					SCTP_INP_RUNLOCK(inp);
 				} else {
 					SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EINVAL);
 					error = EINVAL;
 				}
 			}
 			if (error == 0) {
 				*optsize = sizeof(struct sctp_sndrcvinfo);
 			}
 			break;
 		}
 	case SCTP_INITMSG:
 		{
 			struct sctp_initmsg *sinit;
 
 			SCTP_CHECK_AND_CAST(sinit, optval, struct sctp_initmsg, *optsize);
 			SCTP_INP_RLOCK(inp);
 			sinit->sinit_num_ostreams = inp->sctp_ep.pre_open_stream_count;
 			sinit->sinit_max_instreams = inp->sctp_ep.max_open_streams_intome;
 			sinit->sinit_max_attempts = inp->sctp_ep.max_init_times;
 			sinit->sinit_max_init_timeo = inp->sctp_ep.initial_init_rto_max;
 			SCTP_INP_RUNLOCK(inp);
 			*optsize = sizeof(struct sctp_initmsg);
 			break;
 		}
 	case SCTP_PRIMARY_ADDR:
 		/* we allow a "get" operation on this */
 		{
 			struct sctp_setprim *ssp;
 
 			SCTP_CHECK_AND_CAST(ssp, optval, struct sctp_setprim, *optsize);
 			SCTP_FIND_STCB(inp, stcb, ssp->ssp_assoc_id);
 
 			if (stcb) {
 				union sctp_sockstore *addr;
 
 				addr = &stcb->asoc.primary_destination->ro._l_addr;
 				switch (addr->sa.sa_family) {
 #ifdef INET
 				case AF_INET:
 #ifdef INET6
 					if (sctp_is_feature_on(inp, SCTP_PCB_FLAGS_NEEDS_MAPPED_V4)) {
 						in6_sin_2_v4mapsin6(&addr->sin,
 						    (struct sockaddr_in6 *)&ssp->ssp_addr);
 					} else {
 						memcpy(&ssp->ssp_addr, &addr->sin, sizeof(struct sockaddr_in));
 					}
 #else
 					memcpy(&ssp->ssp_addr, &addr->sin, sizeof(struct sockaddr_in));
 #endif
 					break;
 #endif
 #ifdef INET6
 				case AF_INET6:
 					memcpy(&ssp->ssp_addr, &addr->sin6, sizeof(struct sockaddr_in6));
 					break;
 #endif
 				default:
 					break;
 				}
 				SCTP_TCB_UNLOCK(stcb);
 				*optsize = sizeof(struct sctp_setprim);
 			} else {
 				SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EINVAL);
 				error = EINVAL;
 			}
 			break;
 		}
 	case SCTP_HMAC_IDENT:
 		{
 			struct sctp_hmacalgo *shmac;
 			sctp_hmaclist_t *hmaclist;
 			uint32_t size;
 			int i;
 
 			SCTP_CHECK_AND_CAST(shmac, optval, struct sctp_hmacalgo, *optsize);
 
 			SCTP_INP_RLOCK(inp);
 			hmaclist = inp->sctp_ep.local_hmacs;
 			if (hmaclist == NULL) {
 				/* no HMACs to return */
 				*optsize = sizeof(*shmac);
 				SCTP_INP_RUNLOCK(inp);
 				break;
 			}
 			/* is there room for all of the hmac ids? */
 			size = sizeof(*shmac) + (hmaclist->num_algo *
 			    sizeof(shmac->shmac_idents[0]));
 			if ((size_t)(*optsize) < size) {
 				SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EINVAL);
 				error = EINVAL;
 				SCTP_INP_RUNLOCK(inp);
 				break;
 			}
 			/* copy in the list */
 			shmac->shmac_number_of_idents = hmaclist->num_algo;
 			for (i = 0; i < hmaclist->num_algo; i++) {
 				shmac->shmac_idents[i] = hmaclist->hmac[i];
 			}
 			SCTP_INP_RUNLOCK(inp);
 			*optsize = size;
 			break;
 		}
 	case SCTP_AUTH_ACTIVE_KEY:
 		{
 			struct sctp_authkeyid *scact;
 
 			SCTP_CHECK_AND_CAST(scact, optval, struct sctp_authkeyid, *optsize);
 			SCTP_FIND_STCB(inp, stcb, scact->scact_assoc_id);
 
 			if (stcb) {
 				/* get the active key on the assoc */
 				scact->scact_keynumber = stcb->asoc.authinfo.active_keyid;
 				SCTP_TCB_UNLOCK(stcb);
 			} else {
 				if ((inp->sctp_flags & SCTP_PCB_FLAGS_TCPTYPE) ||
 				    (inp->sctp_flags & SCTP_PCB_FLAGS_IN_TCPPOOL) ||
 				    (scact->scact_assoc_id == SCTP_FUTURE_ASSOC)) {
 					/* get the endpoint active key */
 					SCTP_INP_RLOCK(inp);
 					scact->scact_keynumber = inp->sctp_ep.default_keyid;
 					SCTP_INP_RUNLOCK(inp);
 				} else {
 					SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EINVAL);
 					error = EINVAL;
 				}
 			}
 			if (error == 0) {
 				*optsize = sizeof(struct sctp_authkeyid);
 			}
 			break;
 		}
 	case SCTP_LOCAL_AUTH_CHUNKS:
 		{
 			struct sctp_authchunks *sac;
 			sctp_auth_chklist_t *chklist = NULL;
 			size_t size = 0;
 
 			SCTP_CHECK_AND_CAST(sac, optval, struct sctp_authchunks, *optsize);
 			SCTP_FIND_STCB(inp, stcb, sac->gauth_assoc_id);
 
 			if (stcb) {
 				/* get off the assoc */
 				chklist = stcb->asoc.local_auth_chunks;
 				/* is there enough space? */
 				size = sctp_auth_get_chklist_size(chklist);
 				if (*optsize < (sizeof(struct sctp_authchunks) + size)) {
 					error = EINVAL;
 					SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, error);
 				} else {
 					/* copy in the chunks */
 					(void)sctp_serialize_auth_chunks(chklist, sac->gauth_chunks);
 					sac->gauth_number_of_chunks = (uint32_t)size;
 					*optsize = sizeof(struct sctp_authchunks) + size;
 				}
 				SCTP_TCB_UNLOCK(stcb);
 			} else {
 				if ((inp->sctp_flags & SCTP_PCB_FLAGS_TCPTYPE) ||
 				    (inp->sctp_flags & SCTP_PCB_FLAGS_IN_TCPPOOL) ||
 				    (sac->gauth_assoc_id == SCTP_FUTURE_ASSOC)) {
 					/* get off the endpoint */
 					SCTP_INP_RLOCK(inp);
 					chklist = inp->sctp_ep.local_auth_chunks;
 					/* is there enough space? */
 					size = sctp_auth_get_chklist_size(chklist);
 					if (*optsize < (sizeof(struct sctp_authchunks) + size)) {
 						error = EINVAL;
 						SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, error);
 					} else {
 						/* copy in the chunks */
 						(void)sctp_serialize_auth_chunks(chklist, sac->gauth_chunks);
 						sac->gauth_number_of_chunks = (uint32_t)size;
 						*optsize = sizeof(struct sctp_authchunks) + size;
 					}
 					SCTP_INP_RUNLOCK(inp);
 				} else {
 					SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EINVAL);
 					error = EINVAL;
 				}
 			}
 			break;
 		}
 	case SCTP_PEER_AUTH_CHUNKS:
 		{
 			struct sctp_authchunks *sac;
 			sctp_auth_chklist_t *chklist = NULL;
 			size_t size = 0;
 
 			SCTP_CHECK_AND_CAST(sac, optval, struct sctp_authchunks, *optsize);
 			SCTP_FIND_STCB(inp, stcb, sac->gauth_assoc_id);
 
 			if (stcb) {
 				/* get off the assoc */
 				chklist = stcb->asoc.peer_auth_chunks;
 				/* is there enough space? */
 				size = sctp_auth_get_chklist_size(chklist);
 				if (*optsize < (sizeof(struct sctp_authchunks) + size)) {
 					error = EINVAL;
 					SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, error);
 				} else {
 					/* copy in the chunks */
 					(void)sctp_serialize_auth_chunks(chklist, sac->gauth_chunks);
 					sac->gauth_number_of_chunks = (uint32_t)size;
 					*optsize = sizeof(struct sctp_authchunks) + size;
 				}
 				SCTP_TCB_UNLOCK(stcb);
 			} else {
 				SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, ENOENT);
 				error = ENOENT;
 			}
 			break;
 		}
 	case SCTP_EVENT:
 		{
 			struct sctp_event *event;
 			uint32_t event_type;
 
 			SCTP_CHECK_AND_CAST(event, optval, struct sctp_event, *optsize);
 			SCTP_FIND_STCB(inp, stcb, event->se_assoc_id);
 
 			switch (event->se_type) {
 			case SCTP_ASSOC_CHANGE:
 				event_type = SCTP_PCB_FLAGS_RECVASSOCEVNT;
 				break;
 			case SCTP_PEER_ADDR_CHANGE:
 				event_type = SCTP_PCB_FLAGS_RECVPADDREVNT;
 				break;
 			case SCTP_REMOTE_ERROR:
 				event_type = SCTP_PCB_FLAGS_RECVPEERERR;
 				break;
 			case SCTP_SEND_FAILED:
 				event_type = SCTP_PCB_FLAGS_RECVSENDFAILEVNT;
 				break;
 			case SCTP_SHUTDOWN_EVENT:
 				event_type = SCTP_PCB_FLAGS_RECVSHUTDOWNEVNT;
 				break;
 			case SCTP_ADAPTATION_INDICATION:
 				event_type = SCTP_PCB_FLAGS_ADAPTATIONEVNT;
 				break;
 			case SCTP_PARTIAL_DELIVERY_EVENT:
 				event_type = SCTP_PCB_FLAGS_PDAPIEVNT;
 				break;
 			case SCTP_AUTHENTICATION_EVENT:
 				event_type = SCTP_PCB_FLAGS_AUTHEVNT;
 				break;
 			case SCTP_STREAM_RESET_EVENT:
 				event_type = SCTP_PCB_FLAGS_STREAM_RESETEVNT;
 				break;
 			case SCTP_SENDER_DRY_EVENT:
 				event_type = SCTP_PCB_FLAGS_DRYEVNT;
 				break;
 			case SCTP_NOTIFICATIONS_STOPPED_EVENT:
 				event_type = 0;
 				SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, ENOTSUP);
 				error = ENOTSUP;
 				break;
 			case SCTP_ASSOC_RESET_EVENT:
 				event_type = SCTP_PCB_FLAGS_ASSOC_RESETEVNT;
 				break;
 			case SCTP_STREAM_CHANGE_EVENT:
 				event_type = SCTP_PCB_FLAGS_STREAM_CHANGEEVNT;
 				break;
 			case SCTP_SEND_FAILED_EVENT:
 				event_type = SCTP_PCB_FLAGS_RECVNSENDFAILEVNT;
 				break;
 			default:
 				event_type = 0;
 				SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EINVAL);
 				error = EINVAL;
 				break;
 			}
 			if (event_type > 0) {
 				if (stcb) {
 					event->se_on = sctp_stcb_is_feature_on(inp, stcb, event_type);
 					SCTP_TCB_UNLOCK(stcb);
 				} else {
 					if ((inp->sctp_flags & SCTP_PCB_FLAGS_TCPTYPE) ||
 					    (inp->sctp_flags & SCTP_PCB_FLAGS_IN_TCPPOOL) ||
 					    (event->se_assoc_id == SCTP_FUTURE_ASSOC)) {
 						SCTP_INP_RLOCK(inp);
 						event->se_on = sctp_is_feature_on(inp, event_type);
 						SCTP_INP_RUNLOCK(inp);
 					} else {
 						SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EINVAL);
 						error = EINVAL;
 					}
 				}
 			}
 			if (error == 0) {
 				*optsize = sizeof(struct sctp_event);
 			}
 			break;
 		}
 	case SCTP_RECVRCVINFO:
 		{
 			int onoff;
 
 			if (*optsize < sizeof(int)) {
 				SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EINVAL);
 				error = EINVAL;
 			} else {
 				SCTP_INP_RLOCK(inp);
 				onoff = sctp_is_feature_on(inp, SCTP_PCB_FLAGS_RECVRCVINFO);
 				SCTP_INP_RUNLOCK(inp);
 			}
 			if (error == 0) {
 				/* return the option value */
 				*(int *)optval = onoff;
 				*optsize = sizeof(int);
 			}
 			break;
 		}
 	case SCTP_RECVNXTINFO:
 		{
 			int onoff;
 
 			if (*optsize < sizeof(int)) {
 				SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EINVAL);
 				error = EINVAL;
 			} else {
 				SCTP_INP_RLOCK(inp);
 				onoff = sctp_is_feature_on(inp, SCTP_PCB_FLAGS_RECVNXTINFO);
 				SCTP_INP_RUNLOCK(inp);
 			}
 			if (error == 0) {
 				/* return the option value */
 				*(int *)optval = onoff;
 				*optsize = sizeof(int);
 			}
 			break;
 		}
 	case SCTP_DEFAULT_SNDINFO:
 		{
 			struct sctp_sndinfo *info;
 
 			SCTP_CHECK_AND_CAST(info, optval, struct sctp_sndinfo, *optsize);
 			SCTP_FIND_STCB(inp, stcb, info->snd_assoc_id);
 
 			if (stcb) {
 				info->snd_sid = stcb->asoc.def_send.sinfo_stream;
 				info->snd_flags = stcb->asoc.def_send.sinfo_flags;
 				info->snd_flags &= 0xfff0;
 				info->snd_ppid = stcb->asoc.def_send.sinfo_ppid;
 				info->snd_context = stcb->asoc.def_send.sinfo_context;
 				SCTP_TCB_UNLOCK(stcb);
 			} else {
 				if ((inp->sctp_flags & SCTP_PCB_FLAGS_TCPTYPE) ||
 				    (inp->sctp_flags & SCTP_PCB_FLAGS_IN_TCPPOOL) ||
 				    (info->snd_assoc_id == SCTP_FUTURE_ASSOC)) {
 					SCTP_INP_RLOCK(inp);
 					info->snd_sid = inp->def_send.sinfo_stream;
 					info->snd_flags = inp->def_send.sinfo_flags;
 					info->snd_flags &= 0xfff0;
 					info->snd_ppid = inp->def_send.sinfo_ppid;
 					info->snd_context = inp->def_send.sinfo_context;
 					SCTP_INP_RUNLOCK(inp);
 				} else {
 					SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EINVAL);
 					error = EINVAL;
 				}
 			}
 			if (error == 0) {
 				*optsize = sizeof(struct sctp_sndinfo);
 			}
 			break;
 		}
 	case SCTP_DEFAULT_PRINFO:
 		{
 			struct sctp_default_prinfo *info;
 
 			SCTP_CHECK_AND_CAST(info, optval, struct sctp_default_prinfo, *optsize);
 			SCTP_FIND_STCB(inp, stcb, info->pr_assoc_id);
 
 			if (stcb) {
 				info->pr_policy = PR_SCTP_POLICY(stcb->asoc.def_send.sinfo_flags);
 				info->pr_value = stcb->asoc.def_send.sinfo_timetolive;
 				SCTP_TCB_UNLOCK(stcb);
 			} else {
 				if ((inp->sctp_flags & SCTP_PCB_FLAGS_TCPTYPE) ||
 				    (inp->sctp_flags & SCTP_PCB_FLAGS_IN_TCPPOOL) ||
 				    (info->pr_assoc_id == SCTP_FUTURE_ASSOC)) {
 					SCTP_INP_RLOCK(inp);
 					info->pr_policy = PR_SCTP_POLICY(inp->def_send.sinfo_flags);
 					info->pr_value = inp->def_send.sinfo_timetolive;
 					SCTP_INP_RUNLOCK(inp);
 				} else {
 					SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EINVAL);
 					error = EINVAL;
 				}
 			}
 			if (error == 0) {
 				*optsize = sizeof(struct sctp_default_prinfo);
 			}
 			break;
 		}
 	case SCTP_PEER_ADDR_THLDS:
 		{
 			struct sctp_paddrthlds *thlds;
 			struct sctp_nets *net;
 			struct sockaddr *addr;
 #if defined(INET) && defined(INET6)
 			struct sockaddr_in sin_store;
 #endif
 
 			SCTP_CHECK_AND_CAST(thlds, optval, struct sctp_paddrthlds, *optsize);
 			SCTP_FIND_STCB(inp, stcb, thlds->spt_assoc_id);
 
 #if defined(INET) && defined(INET6)
 			if (thlds->spt_address.ss_family == AF_INET6) {
 				struct sockaddr_in6 *sin6;
 
 				sin6 = (struct sockaddr_in6 *)&thlds->spt_address;
 				if (IN6_IS_ADDR_V4MAPPED(&sin6->sin6_addr)) {
 					in6_sin6_2_sin(&sin_store, sin6);
 					addr = (struct sockaddr *)&sin_store;
 				} else {
 					addr = (struct sockaddr *)&thlds->spt_address;
 				}
 			} else {
 				addr = (struct sockaddr *)&thlds->spt_address;
 			}
 #else
 			addr = (struct sockaddr *)&thlds->spt_address;
 #endif
 			if (stcb != NULL) {
 				net = sctp_findnet(stcb, addr);
 			} else {
 				/*
 				 * We increment here since
 				 * sctp_findassociation_ep_addr() wil do a
 				 * decrement if it finds the stcb as long as
 				 * the locked tcb (last argument) is NOT a
 				 * TCB.. aka NULL.
 				 */
 				net = NULL;
 				SCTP_INP_INCR_REF(inp);
 				stcb = sctp_findassociation_ep_addr(&inp, addr, &net, NULL, NULL);
 				if (stcb == NULL) {
 					SCTP_INP_DECR_REF(inp);
 				}
 			}
 			if ((stcb != NULL) && (net == NULL)) {
 #ifdef INET
 				if (addr->sa_family == AF_INET) {
 					struct sockaddr_in *sin;
 
 					sin = (struct sockaddr_in *)addr;
 					if (sin->sin_addr.s_addr != INADDR_ANY) {
 						error = EINVAL;
 						SCTP_TCB_UNLOCK(stcb);
 						SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, error);
 						break;
 					}
 				} else
 #endif
 #ifdef INET6
 				if (addr->sa_family == AF_INET6) {
 					struct sockaddr_in6 *sin6;
 
 					sin6 = (struct sockaddr_in6 *)addr;
 					if (!IN6_IS_ADDR_UNSPECIFIED(&sin6->sin6_addr)) {
 						error = EINVAL;
 						SCTP_TCB_UNLOCK(stcb);
 						SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, error);
 						break;
 					}
 				} else
 #endif
 				{
 					error = EAFNOSUPPORT;
 					SCTP_TCB_UNLOCK(stcb);
 					SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, error);
 					break;
 				}
 			}
 			if (stcb != NULL) {
 				if (net != NULL) {
 					thlds->spt_pathmaxrxt = net->failure_threshold;
 					thlds->spt_pathpfthld = net->pf_threshold;
 					thlds->spt_pathcpthld = 0xffff;
 				} else {
 					thlds->spt_pathmaxrxt = stcb->asoc.def_net_failure;
 					thlds->spt_pathpfthld = stcb->asoc.def_net_pf_threshold;
 					thlds->spt_pathcpthld = 0xffff;
 				}
 				thlds->spt_assoc_id = sctp_get_associd(stcb);
 				SCTP_TCB_UNLOCK(stcb);
 			} else {
 				if ((inp->sctp_flags & SCTP_PCB_FLAGS_TCPTYPE) ||
 				    (inp->sctp_flags & SCTP_PCB_FLAGS_IN_TCPPOOL) ||
 				    (thlds->spt_assoc_id == SCTP_FUTURE_ASSOC)) {
 					/* Use endpoint defaults */
 					SCTP_INP_RLOCK(inp);
 					thlds->spt_pathmaxrxt = inp->sctp_ep.def_net_failure;
 					thlds->spt_pathpfthld = inp->sctp_ep.def_net_pf_threshold;
 					thlds->spt_pathcpthld = 0xffff;
 					SCTP_INP_RUNLOCK(inp);
 				} else {
 					SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EINVAL);
 					error = EINVAL;
 				}
 			}
 			if (error == 0) {
 				*optsize = sizeof(struct sctp_paddrthlds);
 			}
 			break;
 		}
 	case SCTP_REMOTE_UDP_ENCAPS_PORT:
 		{
 			struct sctp_udpencaps *encaps;
 			struct sctp_nets *net;
 			struct sockaddr *addr;
 #if defined(INET) && defined(INET6)
 			struct sockaddr_in sin_store;
 #endif
 
 			SCTP_CHECK_AND_CAST(encaps, optval, struct sctp_udpencaps, *optsize);
 			SCTP_FIND_STCB(inp, stcb, encaps->sue_assoc_id);
 
 #if defined(INET) && defined(INET6)
 			if (encaps->sue_address.ss_family == AF_INET6) {
 				struct sockaddr_in6 *sin6;
 
 				sin6 = (struct sockaddr_in6 *)&encaps->sue_address;
 				if (IN6_IS_ADDR_V4MAPPED(&sin6->sin6_addr)) {
 					in6_sin6_2_sin(&sin_store, sin6);
 					addr = (struct sockaddr *)&sin_store;
 				} else {
 					addr = (struct sockaddr *)&encaps->sue_address;
 				}
 			} else {
 				addr = (struct sockaddr *)&encaps->sue_address;
 			}
 #else
 			addr = (struct sockaddr *)&encaps->sue_address;
 #endif
 			if (stcb) {
 				net = sctp_findnet(stcb, addr);
 			} else {
 				/*
 				 * We increment here since
 				 * sctp_findassociation_ep_addr() wil do a
 				 * decrement if it finds the stcb as long as
 				 * the locked tcb (last argument) is NOT a
 				 * TCB.. aka NULL.
 				 */
 				net = NULL;
 				SCTP_INP_INCR_REF(inp);
 				stcb = sctp_findassociation_ep_addr(&inp, addr, &net, NULL, NULL);
 				if (stcb == NULL) {
 					SCTP_INP_DECR_REF(inp);
 				}
 			}
 			if ((stcb != NULL) && (net == NULL)) {
 #ifdef INET
 				if (addr->sa_family == AF_INET) {
 					struct sockaddr_in *sin;
 
 					sin = (struct sockaddr_in *)addr;
 					if (sin->sin_addr.s_addr != INADDR_ANY) {
 						error = EINVAL;
 						SCTP_TCB_UNLOCK(stcb);
 						SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, error);
 						break;
 					}
 				} else
 #endif
 #ifdef INET6
 				if (addr->sa_family == AF_INET6) {
 					struct sockaddr_in6 *sin6;
 
 					sin6 = (struct sockaddr_in6 *)addr;
 					if (!IN6_IS_ADDR_UNSPECIFIED(&sin6->sin6_addr)) {
 						error = EINVAL;
 						SCTP_TCB_UNLOCK(stcb);
 						SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, error);
 						break;
 					}
 				} else
 #endif
 				{
 					error = EAFNOSUPPORT;
 					SCTP_TCB_UNLOCK(stcb);
 					SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, error);
 					break;
 				}
 			}
 			if (stcb != NULL) {
 				if (net) {
 					encaps->sue_port = net->port;
 				} else {
 					encaps->sue_port = stcb->asoc.port;
 				}
 				SCTP_TCB_UNLOCK(stcb);
 			} else {
 				if ((inp->sctp_flags & SCTP_PCB_FLAGS_TCPTYPE) ||
 				    (inp->sctp_flags & SCTP_PCB_FLAGS_IN_TCPPOOL) ||
 				    (encaps->sue_assoc_id == SCTP_FUTURE_ASSOC)) {
 					SCTP_INP_RLOCK(inp);
 					encaps->sue_port = inp->sctp_ep.port;
 					SCTP_INP_RUNLOCK(inp);
 				} else {
 					SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EINVAL);
 					error = EINVAL;
 				}
 			}
 			if (error == 0) {
 				*optsize = sizeof(struct sctp_udpencaps);
 			}
 			break;
 		}
 	case SCTP_ECN_SUPPORTED:
 		{
 			struct sctp_assoc_value *av;
 
 			SCTP_CHECK_AND_CAST(av, optval, struct sctp_assoc_value, *optsize);
 			SCTP_FIND_STCB(inp, stcb, av->assoc_id);
 
 			if (stcb) {
 				av->assoc_value = stcb->asoc.ecn_supported;
 				SCTP_TCB_UNLOCK(stcb);
 			} else {
 				if ((inp->sctp_flags & SCTP_PCB_FLAGS_TCPTYPE) ||
 				    (inp->sctp_flags & SCTP_PCB_FLAGS_IN_TCPPOOL) ||
 				    (av->assoc_id == SCTP_FUTURE_ASSOC)) {
 					SCTP_INP_RLOCK(inp);
 					av->assoc_value = inp->ecn_supported;
 					SCTP_INP_RUNLOCK(inp);
 				} else {
 					SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EINVAL);
 					error = EINVAL;
 				}
 			}
 			if (error == 0) {
 				*optsize = sizeof(struct sctp_assoc_value);
 			}
 			break;
 		}
 	case SCTP_PR_SUPPORTED:
 		{
 			struct sctp_assoc_value *av;
 
 			SCTP_CHECK_AND_CAST(av, optval, struct sctp_assoc_value, *optsize);
 			SCTP_FIND_STCB(inp, stcb, av->assoc_id);
 
 			if (stcb) {
 				av->assoc_value = stcb->asoc.prsctp_supported;
 				SCTP_TCB_UNLOCK(stcb);
 			} else {
 				if ((inp->sctp_flags & SCTP_PCB_FLAGS_TCPTYPE) ||
 				    (inp->sctp_flags & SCTP_PCB_FLAGS_IN_TCPPOOL) ||
 				    (av->assoc_id == SCTP_FUTURE_ASSOC)) {
 					SCTP_INP_RLOCK(inp);
 					av->assoc_value = inp->prsctp_supported;
 					SCTP_INP_RUNLOCK(inp);
 				} else {
 					SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EINVAL);
 					error = EINVAL;
 				}
 			}
 			if (error == 0) {
 				*optsize = sizeof(struct sctp_assoc_value);
 			}
 			break;
 		}
 	case SCTP_AUTH_SUPPORTED:
 		{
 			struct sctp_assoc_value *av;
 
 			SCTP_CHECK_AND_CAST(av, optval, struct sctp_assoc_value, *optsize);
 			SCTP_FIND_STCB(inp, stcb, av->assoc_id);
 
 			if (stcb) {
 				av->assoc_value = stcb->asoc.auth_supported;
 				SCTP_TCB_UNLOCK(stcb);
 			} else {
 				if ((inp->sctp_flags & SCTP_PCB_FLAGS_TCPTYPE) ||
 				    (inp->sctp_flags & SCTP_PCB_FLAGS_IN_TCPPOOL) ||
 				    (av->assoc_id == SCTP_FUTURE_ASSOC)) {
 					SCTP_INP_RLOCK(inp);
 					av->assoc_value = inp->auth_supported;
 					SCTP_INP_RUNLOCK(inp);
 				} else {
 					SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EINVAL);
 					error = EINVAL;
 				}
 			}
 			if (error == 0) {
 				*optsize = sizeof(struct sctp_assoc_value);
 			}
 			break;
 		}
 	case SCTP_ASCONF_SUPPORTED:
 		{
 			struct sctp_assoc_value *av;
 
 			SCTP_CHECK_AND_CAST(av, optval, struct sctp_assoc_value, *optsize);
 			SCTP_FIND_STCB(inp, stcb, av->assoc_id);
 
 			if (stcb) {
 				av->assoc_value = stcb->asoc.asconf_supported;
 				SCTP_TCB_UNLOCK(stcb);
 			} else {
 				if ((inp->sctp_flags & SCTP_PCB_FLAGS_TCPTYPE) ||
 				    (inp->sctp_flags & SCTP_PCB_FLAGS_IN_TCPPOOL) ||
 				    (av->assoc_id == SCTP_FUTURE_ASSOC)) {
 					SCTP_INP_RLOCK(inp);
 					av->assoc_value = inp->asconf_supported;
 					SCTP_INP_RUNLOCK(inp);
 				} else {
 					SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EINVAL);
 					error = EINVAL;
 				}
 			}
 			if (error == 0) {
 				*optsize = sizeof(struct sctp_assoc_value);
 			}
 			break;
 		}
 	case SCTP_RECONFIG_SUPPORTED:
 		{
 			struct sctp_assoc_value *av;
 
 			SCTP_CHECK_AND_CAST(av, optval, struct sctp_assoc_value, *optsize);
 			SCTP_FIND_STCB(inp, stcb, av->assoc_id);
 
 			if (stcb) {
 				av->assoc_value = stcb->asoc.reconfig_supported;
 				SCTP_TCB_UNLOCK(stcb);
 			} else {
 				if ((inp->sctp_flags & SCTP_PCB_FLAGS_TCPTYPE) ||
 				    (inp->sctp_flags & SCTP_PCB_FLAGS_IN_TCPPOOL) ||
 				    (av->assoc_id == SCTP_FUTURE_ASSOC)) {
 					SCTP_INP_RLOCK(inp);
 					av->assoc_value = inp->reconfig_supported;
 					SCTP_INP_RUNLOCK(inp);
 				} else {
 					SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EINVAL);
 					error = EINVAL;
 				}
 			}
 			if (error == 0) {
 				*optsize = sizeof(struct sctp_assoc_value);
 			}
 			break;
 		}
 	case SCTP_NRSACK_SUPPORTED:
 		{
 			struct sctp_assoc_value *av;
 
 			SCTP_CHECK_AND_CAST(av, optval, struct sctp_assoc_value, *optsize);
 			SCTP_FIND_STCB(inp, stcb, av->assoc_id);
 
 			if (stcb) {
 				av->assoc_value = stcb->asoc.nrsack_supported;
 				SCTP_TCB_UNLOCK(stcb);
 			} else {
 				if ((inp->sctp_flags & SCTP_PCB_FLAGS_TCPTYPE) ||
 				    (inp->sctp_flags & SCTP_PCB_FLAGS_IN_TCPPOOL) ||
 				    (av->assoc_id == SCTP_FUTURE_ASSOC)) {
 					SCTP_INP_RLOCK(inp);
 					av->assoc_value = inp->nrsack_supported;
 					SCTP_INP_RUNLOCK(inp);
 				} else {
 					SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EINVAL);
 					error = EINVAL;
 				}
 			}
 			if (error == 0) {
 				*optsize = sizeof(struct sctp_assoc_value);
 			}
 			break;
 		}
 	case SCTP_PKTDROP_SUPPORTED:
 		{
 			struct sctp_assoc_value *av;
 
 			SCTP_CHECK_AND_CAST(av, optval, struct sctp_assoc_value, *optsize);
 			SCTP_FIND_STCB(inp, stcb, av->assoc_id);
 
 			if (stcb) {
 				av->assoc_value = stcb->asoc.pktdrop_supported;
 				SCTP_TCB_UNLOCK(stcb);
 			} else {
 				if ((inp->sctp_flags & SCTP_PCB_FLAGS_TCPTYPE) ||
 				    (inp->sctp_flags & SCTP_PCB_FLAGS_IN_TCPPOOL) ||
 				    (av->assoc_id == SCTP_FUTURE_ASSOC)) {
 					SCTP_INP_RLOCK(inp);
 					av->assoc_value = inp->pktdrop_supported;
 					SCTP_INP_RUNLOCK(inp);
 				} else {
 					SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EINVAL);
 					error = EINVAL;
 				}
 			}
 			if (error == 0) {
 				*optsize = sizeof(struct sctp_assoc_value);
 			}
 			break;
 		}
 	case SCTP_ENABLE_STREAM_RESET:
 		{
 			struct sctp_assoc_value *av;
 
 			SCTP_CHECK_AND_CAST(av, optval, struct sctp_assoc_value, *optsize);
 			SCTP_FIND_STCB(inp, stcb, av->assoc_id);
 
 			if (stcb) {
 				av->assoc_value = (uint32_t)stcb->asoc.local_strreset_support;
 				SCTP_TCB_UNLOCK(stcb);
 			} else {
 				if ((inp->sctp_flags & SCTP_PCB_FLAGS_TCPTYPE) ||
 				    (inp->sctp_flags & SCTP_PCB_FLAGS_IN_TCPPOOL) ||
 				    (av->assoc_id == SCTP_FUTURE_ASSOC)) {
 					SCTP_INP_RLOCK(inp);
 					av->assoc_value = (uint32_t)inp->local_strreset_support;
 					SCTP_INP_RUNLOCK(inp);
 				} else {
 					SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EINVAL);
 					error = EINVAL;
 				}
 			}
 			if (error == 0) {
 				*optsize = sizeof(struct sctp_assoc_value);
 			}
 			break;
 		}
 	case SCTP_PR_STREAM_STATUS:
 		{
 			struct sctp_prstatus *sprstat;
 			uint16_t sid;
 			uint16_t policy;
 
 			SCTP_CHECK_AND_CAST(sprstat, optval, struct sctp_prstatus, *optsize);
 			SCTP_FIND_STCB(inp, stcb, sprstat->sprstat_assoc_id);
 
 			sid = sprstat->sprstat_sid;
 			policy = sprstat->sprstat_policy;
 #if defined(SCTP_DETAILED_STR_STATS)
 			if ((stcb != NULL) &&
 			    (sid < stcb->asoc.streamoutcnt) &&
 			    (policy != SCTP_PR_SCTP_NONE) &&
 			    ((policy <= SCTP_PR_SCTP_MAX) ||
 			    (policy == SCTP_PR_SCTP_ALL))) {
 				if (policy == SCTP_PR_SCTP_ALL) {
 					sprstat->sprstat_abandoned_unsent = stcb->asoc.strmout[sid].abandoned_unsent[0];
 					sprstat->sprstat_abandoned_sent = stcb->asoc.strmout[sid].abandoned_sent[0];
 				} else {
 					sprstat->sprstat_abandoned_unsent = stcb->asoc.strmout[sid].abandoned_unsent[policy];
 					sprstat->sprstat_abandoned_sent = stcb->asoc.strmout[sid].abandoned_sent[policy];
 				}
 #else
 			if ((stcb != NULL) &&
 			    (sid < stcb->asoc.streamoutcnt) &&
 			    (policy == SCTP_PR_SCTP_ALL)) {
 				sprstat->sprstat_abandoned_unsent = stcb->asoc.strmout[sid].abandoned_unsent[0];
 				sprstat->sprstat_abandoned_sent = stcb->asoc.strmout[sid].abandoned_sent[0];
 #endif
 				SCTP_TCB_UNLOCK(stcb);
 				*optsize = sizeof(struct sctp_prstatus);
 			} else {
 				SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EINVAL);
 				error = EINVAL;
 			}
 			break;
 		}
 	case SCTP_PR_ASSOC_STATUS:
 		{
 			struct sctp_prstatus *sprstat;
 			uint16_t policy;
 
 			SCTP_CHECK_AND_CAST(sprstat, optval, struct sctp_prstatus, *optsize);
 			SCTP_FIND_STCB(inp, stcb, sprstat->sprstat_assoc_id);
 
 			policy = sprstat->sprstat_policy;
 			if ((stcb != NULL) &&
 			    (policy != SCTP_PR_SCTP_NONE) &&
 			    ((policy <= SCTP_PR_SCTP_MAX) ||
 			    (policy == SCTP_PR_SCTP_ALL))) {
 				if (policy == SCTP_PR_SCTP_ALL) {
 					sprstat->sprstat_abandoned_unsent = stcb->asoc.abandoned_unsent[0];
 					sprstat->sprstat_abandoned_sent = stcb->asoc.abandoned_sent[0];
 				} else {
 					sprstat->sprstat_abandoned_unsent = stcb->asoc.abandoned_unsent[policy];
 					sprstat->sprstat_abandoned_sent = stcb->asoc.abandoned_sent[policy];
 				}
 				SCTP_TCB_UNLOCK(stcb);
 				*optsize = sizeof(struct sctp_prstatus);
 			} else {
 				SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EINVAL);
 				error = EINVAL;
 			}
 			break;
 		}
 	case SCTP_MAX_CWND:
 		{
 			struct sctp_assoc_value *av;
 
 			SCTP_CHECK_AND_CAST(av, optval, struct sctp_assoc_value, *optsize);
 			SCTP_FIND_STCB(inp, stcb, av->assoc_id);
 
 			if (stcb) {
 				av->assoc_value = stcb->asoc.max_cwnd;
 				SCTP_TCB_UNLOCK(stcb);
 			} else {
 				if ((inp->sctp_flags & SCTP_PCB_FLAGS_TCPTYPE) ||
 				    (inp->sctp_flags & SCTP_PCB_FLAGS_IN_TCPPOOL) ||
 				    (av->assoc_id == SCTP_FUTURE_ASSOC)) {
 					SCTP_INP_RLOCK(inp);
 					av->assoc_value = inp->max_cwnd;
 					SCTP_INP_RUNLOCK(inp);
 				} else {
 					SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EINVAL);
 					error = EINVAL;
 				}
 			}
 			if (error == 0) {
 				*optsize = sizeof(struct sctp_assoc_value);
 			}
 			break;
 		}
 	default:
 		SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, ENOPROTOOPT);
 		error = ENOPROTOOPT;
 		break;
 	}			/* end switch (sopt->sopt_name) */
 	if (error) {
 		*optsize = 0;
 	}
 	return (error);
 }
 
 static int
 sctp_setopt(struct socket *so, int optname, void *optval, size_t optsize,
     void *p)
 {
 	int error, set_opt;
 	uint32_t *mopt;
 	struct sctp_tcb *stcb = NULL;
 	struct sctp_inpcb *inp = NULL;
 	uint32_t vrf_id;
 
 	if (optval == NULL) {
 		SCTP_PRINTF("optval is NULL\n");
 		SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EINVAL);
 		return (EINVAL);
 	}
 	inp = (struct sctp_inpcb *)so->so_pcb;
 	if (inp == NULL) {
 		SCTP_PRINTF("inp is NULL?\n");
 		SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EINVAL);
 		return (EINVAL);
 	}
 	vrf_id = inp->def_vrf_id;
 
 	error = 0;
 	switch (optname) {
 	case SCTP_NODELAY:
 	case SCTP_AUTOCLOSE:
 	case SCTP_AUTO_ASCONF:
 	case SCTP_EXPLICIT_EOR:
 	case SCTP_DISABLE_FRAGMENTS:
 	case SCTP_USE_EXT_RCVINFO:
 	case SCTP_I_WANT_MAPPED_V4_ADDR:
 		/* copy in the option value */
 		SCTP_CHECK_AND_CAST(mopt, optval, uint32_t, optsize);
 		set_opt = 0;
 		if (error)
 			break;
 		switch (optname) {
 		case SCTP_DISABLE_FRAGMENTS:
 			set_opt = SCTP_PCB_FLAGS_NO_FRAGMENT;
 			break;
 		case SCTP_AUTO_ASCONF:
 			/*
 			 * NOTE: we don't really support this flag
 			 */
 			if (inp->sctp_flags & SCTP_PCB_FLAGS_BOUNDALL) {
 				/* only valid for bound all sockets */
 				if ((SCTP_BASE_SYSCTL(sctp_auto_asconf) == 0) &&
 				    (*mopt != 0)) {
 					/* forbidden by admin */
 					SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EPERM);
 					return (EPERM);
 				}
 				set_opt = SCTP_PCB_FLAGS_AUTO_ASCONF;
 			} else {
 				SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EINVAL);
 				return (EINVAL);
 			}
 			break;
 		case SCTP_EXPLICIT_EOR:
 			set_opt = SCTP_PCB_FLAGS_EXPLICIT_EOR;
 			break;
 		case SCTP_USE_EXT_RCVINFO:
 			set_opt = SCTP_PCB_FLAGS_EXT_RCVINFO;
 			break;
 		case SCTP_I_WANT_MAPPED_V4_ADDR:
 			if (inp->sctp_flags & SCTP_PCB_FLAGS_BOUND_V6) {
 				set_opt = SCTP_PCB_FLAGS_NEEDS_MAPPED_V4;
 			} else {
 				SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EINVAL);
 				return (EINVAL);
 			}
 			break;
 		case SCTP_NODELAY:
 			set_opt = SCTP_PCB_FLAGS_NODELAY;
 			break;
 		case SCTP_AUTOCLOSE:
 			if ((inp->sctp_flags & SCTP_PCB_FLAGS_TCPTYPE) ||
 			    (inp->sctp_flags & SCTP_PCB_FLAGS_IN_TCPPOOL)) {
 				SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EINVAL);
 				return (EINVAL);
 			}
 			set_opt = SCTP_PCB_FLAGS_AUTOCLOSE;
 			/*
 			 * The value is in ticks. Note this does not effect
 			 * old associations, only new ones.
 			 */
 			inp->sctp_ep.auto_close_time = SEC_TO_TICKS(*mopt);
 			break;
 		}
 		SCTP_INP_WLOCK(inp);
 		if (*mopt != 0) {
 			sctp_feature_on(inp, set_opt);
 		} else {
 			sctp_feature_off(inp, set_opt);
 		}
 		SCTP_INP_WUNLOCK(inp);
 		break;
 	case SCTP_REUSE_PORT:
 		{
 			SCTP_CHECK_AND_CAST(mopt, optval, uint32_t, optsize);
 			if ((inp->sctp_flags & SCTP_PCB_FLAGS_UNBOUND) == 0) {
 				/* Can't set it after we are bound */
 				error = EINVAL;
 				break;
 			}
 			if ((inp->sctp_flags & SCTP_PCB_FLAGS_UDPTYPE)) {
 				/* Can't do this for a 1-m socket */
 				error = EINVAL;
 				break;
 			}
 			if (optval)
 				sctp_feature_on(inp, SCTP_PCB_FLAGS_PORTREUSE);
 			else
 				sctp_feature_off(inp, SCTP_PCB_FLAGS_PORTREUSE);
 			break;
 		}
 	case SCTP_PARTIAL_DELIVERY_POINT:
 		{
 			uint32_t *value;
 
 			SCTP_CHECK_AND_CAST(value, optval, uint32_t, optsize);
 			if (*value > SCTP_SB_LIMIT_RCV(so)) {
 				SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EINVAL);
 				error = EINVAL;
 				break;
 			}
 			inp->partial_delivery_point = *value;
 			break;
 		}
 	case SCTP_FRAGMENT_INTERLEAVE:
 		/* not yet until we re-write sctp_recvmsg() */
 		{
 			uint32_t *level;
 
 			SCTP_CHECK_AND_CAST(level, optval, uint32_t, optsize);
 			if (*level == SCTP_FRAG_LEVEL_2) {
 				sctp_feature_on(inp, SCTP_PCB_FLAGS_FRAG_INTERLEAVE);
 				sctp_feature_on(inp, SCTP_PCB_FLAGS_INTERLEAVE_STRMS);
 			} else if (*level == SCTP_FRAG_LEVEL_1) {
 				sctp_feature_on(inp, SCTP_PCB_FLAGS_FRAG_INTERLEAVE);
 				sctp_feature_off(inp, SCTP_PCB_FLAGS_INTERLEAVE_STRMS);
 			} else if (*level == SCTP_FRAG_LEVEL_0) {
 				sctp_feature_off(inp, SCTP_PCB_FLAGS_FRAG_INTERLEAVE);
 				sctp_feature_off(inp, SCTP_PCB_FLAGS_INTERLEAVE_STRMS);
 
 			} else {
 				SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EINVAL);
 				error = EINVAL;
 			}
 			break;
 		}
 	case SCTP_INTERLEAVING_SUPPORTED:
 		{
 			struct sctp_assoc_value *av;
 
 			SCTP_CHECK_AND_CAST(av, optval, struct sctp_assoc_value, optsize);
 			SCTP_FIND_STCB(inp, stcb, av->assoc_id);
 
 			if (stcb) {
 				SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EINVAL);
 				error = EINVAL;
 				SCTP_TCB_UNLOCK(stcb);
 			} else {
 				if ((inp->sctp_flags & SCTP_PCB_FLAGS_TCPTYPE) ||
 				    (inp->sctp_flags & SCTP_PCB_FLAGS_IN_TCPPOOL) ||
 				    (av->assoc_id == SCTP_FUTURE_ASSOC)) {
 					SCTP_INP_WLOCK(inp);
 					if (av->assoc_value == 0) {
 						inp->idata_supported = 0;
 					} else {
 						if ((sctp_is_feature_on(inp, SCTP_PCB_FLAGS_FRAG_INTERLEAVE)) &&
 						    (sctp_is_feature_on(inp, SCTP_PCB_FLAGS_INTERLEAVE_STRMS))) {
 							inp->idata_supported = 1;
 						} else {
 							/*
 							 * Must have Frag
 							 * interleave and
 							 * stream interleave
 							 * on
 							 */
 							SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EINVAL);
 							error = EINVAL;
 						}
 					}
 					SCTP_INP_WUNLOCK(inp);
 				} else {
 					SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EINVAL);
 					error = EINVAL;
 				}
 			}
 			break;
 		}
 	case SCTP_CMT_ON_OFF:
 		if (SCTP_BASE_SYSCTL(sctp_cmt_on_off)) {
 			struct sctp_assoc_value *av;
 
 			SCTP_CHECK_AND_CAST(av, optval, struct sctp_assoc_value, optsize);
 			if (av->assoc_value > SCTP_CMT_MAX) {
 				SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EINVAL);
 				error = EINVAL;
 				break;
 			}
 			SCTP_FIND_STCB(inp, stcb, av->assoc_id);
 			if (stcb) {
 				stcb->asoc.sctp_cmt_on_off = av->assoc_value;
 				SCTP_TCB_UNLOCK(stcb);
 			} else {
 				if ((inp->sctp_flags & SCTP_PCB_FLAGS_TCPTYPE) ||
 				    (inp->sctp_flags & SCTP_PCB_FLAGS_IN_TCPPOOL) ||
 				    (av->assoc_id == SCTP_FUTURE_ASSOC) ||
 				    (av->assoc_id == SCTP_ALL_ASSOC)) {
 					SCTP_INP_WLOCK(inp);
 					inp->sctp_cmt_on_off = av->assoc_value;
 					SCTP_INP_WUNLOCK(inp);
 				}
 				if ((av->assoc_id == SCTP_CURRENT_ASSOC) ||
 				    (av->assoc_id == SCTP_ALL_ASSOC)) {
 					SCTP_INP_RLOCK(inp);
 					LIST_FOREACH(stcb, &inp->sctp_asoc_list, sctp_tcblist) {
 						SCTP_TCB_LOCK(stcb);
 						stcb->asoc.sctp_cmt_on_off = av->assoc_value;
 						SCTP_TCB_UNLOCK(stcb);
 					}
 					SCTP_INP_RUNLOCK(inp);
 				}
 			}
 		} else {
 			SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, ENOPROTOOPT);
 			error = ENOPROTOOPT;
 		}
 		break;
 	case SCTP_PLUGGABLE_CC:
 		{
 			struct sctp_assoc_value *av;
 			struct sctp_nets *net;
 
 			SCTP_CHECK_AND_CAST(av, optval, struct sctp_assoc_value, optsize);
 			if ((av->assoc_value != SCTP_CC_RFC2581) &&
 			    (av->assoc_value != SCTP_CC_HSTCP) &&
 			    (av->assoc_value != SCTP_CC_HTCP) &&
 			    (av->assoc_value != SCTP_CC_RTCC)) {
 				SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EINVAL);
 				error = EINVAL;
 				break;
 			}
 			SCTP_FIND_STCB(inp, stcb, av->assoc_id);
 			if (stcb) {
 				stcb->asoc.cc_functions = sctp_cc_functions[av->assoc_value];
 				stcb->asoc.congestion_control_module = av->assoc_value;
 				if (stcb->asoc.cc_functions.sctp_set_initial_cc_param != NULL) {
 					TAILQ_FOREACH(net, &stcb->asoc.nets, sctp_next) {
 						stcb->asoc.cc_functions.sctp_set_initial_cc_param(stcb, net);
 					}
 				}
 				SCTP_TCB_UNLOCK(stcb);
 			} else {
 				if ((inp->sctp_flags & SCTP_PCB_FLAGS_TCPTYPE) ||
 				    (inp->sctp_flags & SCTP_PCB_FLAGS_IN_TCPPOOL) ||
 				    (av->assoc_id == SCTP_FUTURE_ASSOC) ||
 				    (av->assoc_id == SCTP_ALL_ASSOC)) {
 					SCTP_INP_WLOCK(inp);
 					inp->sctp_ep.sctp_default_cc_module = av->assoc_value;
 					SCTP_INP_WUNLOCK(inp);
 				}
 				if ((av->assoc_id == SCTP_CURRENT_ASSOC) ||
 				    (av->assoc_id == SCTP_ALL_ASSOC)) {
 					SCTP_INP_RLOCK(inp);
 					LIST_FOREACH(stcb, &inp->sctp_asoc_list, sctp_tcblist) {
 						SCTP_TCB_LOCK(stcb);
 						stcb->asoc.cc_functions = sctp_cc_functions[av->assoc_value];
 						stcb->asoc.congestion_control_module = av->assoc_value;
 						if (stcb->asoc.cc_functions.sctp_set_initial_cc_param != NULL) {
 							TAILQ_FOREACH(net, &stcb->asoc.nets, sctp_next) {
 								stcb->asoc.cc_functions.sctp_set_initial_cc_param(stcb, net);
 							}
 						}
 						SCTP_TCB_UNLOCK(stcb);
 					}
 					SCTP_INP_RUNLOCK(inp);
 				}
 			}
 			break;
 		}
 	case SCTP_CC_OPTION:
 		{
 			struct sctp_cc_option *cc_opt;
 
 			SCTP_CHECK_AND_CAST(cc_opt, optval, struct sctp_cc_option, optsize);
 			SCTP_FIND_STCB(inp, stcb, cc_opt->aid_value.assoc_id);
 			if (stcb == NULL) {
 				if (cc_opt->aid_value.assoc_id == SCTP_CURRENT_ASSOC) {
 					SCTP_INP_RLOCK(inp);
 					LIST_FOREACH(stcb, &inp->sctp_asoc_list, sctp_tcblist) {
 						SCTP_TCB_LOCK(stcb);
 						if (stcb->asoc.cc_functions.sctp_cwnd_socket_option) {
 							(*stcb->asoc.cc_functions.sctp_cwnd_socket_option) (stcb, 1, cc_opt);
 						}
 						SCTP_TCB_UNLOCK(stcb);
 					}
 					SCTP_INP_RUNLOCK(inp);
 				} else {
 					error = EINVAL;
 				}
 			} else {
 				if (stcb->asoc.cc_functions.sctp_cwnd_socket_option == NULL) {
 					error = ENOTSUP;
 				} else {
 					error = (*stcb->asoc.cc_functions.sctp_cwnd_socket_option) (stcb, 1,
 					    cc_opt);
 				}
 				SCTP_TCB_UNLOCK(stcb);
 			}
 			break;
 		}
 	case SCTP_PLUGGABLE_SS:
 		{
 			struct sctp_assoc_value *av;
 
 			SCTP_CHECK_AND_CAST(av, optval, struct sctp_assoc_value, optsize);
 			if ((av->assoc_value != SCTP_SS_DEFAULT) &&
 			    (av->assoc_value != SCTP_SS_ROUND_ROBIN) &&
 			    (av->assoc_value != SCTP_SS_ROUND_ROBIN_PACKET) &&
 			    (av->assoc_value != SCTP_SS_PRIORITY) &&
 			    (av->assoc_value != SCTP_SS_FAIR_BANDWITH) &&
 			    (av->assoc_value != SCTP_SS_FIRST_COME)) {
 				SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EINVAL);
 				error = EINVAL;
 				break;
 			}
 			SCTP_FIND_STCB(inp, stcb, av->assoc_id);
 			if (stcb) {
 				stcb->asoc.ss_functions.sctp_ss_clear(stcb, &stcb->asoc, 1, 1);
 				stcb->asoc.ss_functions = sctp_ss_functions[av->assoc_value];
 				stcb->asoc.stream_scheduling_module = av->assoc_value;
 				stcb->asoc.ss_functions.sctp_ss_init(stcb, &stcb->asoc, 1);
 				SCTP_TCB_UNLOCK(stcb);
 			} else {
 				if ((inp->sctp_flags & SCTP_PCB_FLAGS_TCPTYPE) ||
 				    (inp->sctp_flags & SCTP_PCB_FLAGS_IN_TCPPOOL) ||
 				    (av->assoc_id == SCTP_FUTURE_ASSOC) ||
 				    (av->assoc_id == SCTP_ALL_ASSOC)) {
 					SCTP_INP_WLOCK(inp);
 					inp->sctp_ep.sctp_default_ss_module = av->assoc_value;
 					SCTP_INP_WUNLOCK(inp);
 				}
 				if ((av->assoc_id == SCTP_CURRENT_ASSOC) ||
 				    (av->assoc_id == SCTP_ALL_ASSOC)) {
 					SCTP_INP_RLOCK(inp);
 					LIST_FOREACH(stcb, &inp->sctp_asoc_list, sctp_tcblist) {
 						SCTP_TCB_LOCK(stcb);
 						stcb->asoc.ss_functions.sctp_ss_clear(stcb, &stcb->asoc, 1, 1);
 						stcb->asoc.ss_functions = sctp_ss_functions[av->assoc_value];
 						stcb->asoc.stream_scheduling_module = av->assoc_value;
 						stcb->asoc.ss_functions.sctp_ss_init(stcb, &stcb->asoc, 1);
 						SCTP_TCB_UNLOCK(stcb);
 					}
 					SCTP_INP_RUNLOCK(inp);
 				}
 			}
 			break;
 		}
 	case SCTP_SS_VALUE:
 		{
 			struct sctp_stream_value *av;
 
 			SCTP_CHECK_AND_CAST(av, optval, struct sctp_stream_value, optsize);
 			SCTP_FIND_STCB(inp, stcb, av->assoc_id);
 			if (stcb) {
 				if ((av->stream_id >= stcb->asoc.streamoutcnt) ||
 				    (stcb->asoc.ss_functions.sctp_ss_set_value(stcb, &stcb->asoc, &stcb->asoc.strmout[av->stream_id],
 				    av->stream_value) < 0)) {
 					SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EINVAL);
 					error = EINVAL;
 				}
 				SCTP_TCB_UNLOCK(stcb);
 			} else {
 				if (av->assoc_id == SCTP_CURRENT_ASSOC) {
 					SCTP_INP_RLOCK(inp);
 					LIST_FOREACH(stcb, &inp->sctp_asoc_list, sctp_tcblist) {
 						SCTP_TCB_LOCK(stcb);
 						if (av->stream_id < stcb->asoc.streamoutcnt) {
 							stcb->asoc.ss_functions.sctp_ss_set_value(stcb,
 							    &stcb->asoc,
 							    &stcb->asoc.strmout[av->stream_id],
 							    av->stream_value);
 						}
 						SCTP_TCB_UNLOCK(stcb);
 					}
 					SCTP_INP_RUNLOCK(inp);
 				} else {
 					/*
 					 * Can't set stream value without
 					 * association
 					 */
 					SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EINVAL);
 					error = EINVAL;
 				}
 			}
 			break;
 		}
 	case SCTP_CLR_STAT_LOG:
 		SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EOPNOTSUPP);
 		error = EOPNOTSUPP;
 		break;
 	case SCTP_CONTEXT:
 		{
 			struct sctp_assoc_value *av;
 
 			SCTP_CHECK_AND_CAST(av, optval, struct sctp_assoc_value, optsize);
 			SCTP_FIND_STCB(inp, stcb, av->assoc_id);
 
 			if (stcb) {
 				stcb->asoc.context = av->assoc_value;
 				SCTP_TCB_UNLOCK(stcb);
 			} else {
 				if ((inp->sctp_flags & SCTP_PCB_FLAGS_TCPTYPE) ||
 				    (inp->sctp_flags & SCTP_PCB_FLAGS_IN_TCPPOOL) ||
 				    (av->assoc_id == SCTP_FUTURE_ASSOC) ||
 				    (av->assoc_id == SCTP_ALL_ASSOC)) {
 					SCTP_INP_WLOCK(inp);
 					inp->sctp_context = av->assoc_value;
 					SCTP_INP_WUNLOCK(inp);
 				}
 				if ((av->assoc_id == SCTP_CURRENT_ASSOC) ||
 				    (av->assoc_id == SCTP_ALL_ASSOC)) {
 					SCTP_INP_RLOCK(inp);
 					LIST_FOREACH(stcb, &inp->sctp_asoc_list, sctp_tcblist) {
 						SCTP_TCB_LOCK(stcb);
 						stcb->asoc.context = av->assoc_value;
 						SCTP_TCB_UNLOCK(stcb);
 					}
 					SCTP_INP_RUNLOCK(inp);
 				}
 			}
 			break;
 		}
 	case SCTP_VRF_ID:
 		{
 			uint32_t *default_vrfid;
 
 			SCTP_CHECK_AND_CAST(default_vrfid, optval, uint32_t, optsize);
 			if (*default_vrfid > SCTP_MAX_VRF_ID) {
 				SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EINVAL);
 				error = EINVAL;
 				break;
 			}
 			inp->def_vrf_id = *default_vrfid;
 			break;
 		}
 	case SCTP_DEL_VRF_ID:
 		{
 			SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EOPNOTSUPP);
 			error = EOPNOTSUPP;
 			break;
 		}
 	case SCTP_ADD_VRF_ID:
 		{
 			SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EOPNOTSUPP);
 			error = EOPNOTSUPP;
 			break;
 		}
 	case SCTP_DELAYED_SACK:
 		{
 			struct sctp_sack_info *sack;
 
 			SCTP_CHECK_AND_CAST(sack, optval, struct sctp_sack_info, optsize);
 			SCTP_FIND_STCB(inp, stcb, sack->sack_assoc_id);
 			if (sack->sack_delay) {
 				if (sack->sack_delay > SCTP_MAX_SACK_DELAY)
 					sack->sack_delay = SCTP_MAX_SACK_DELAY;
 				if (MSEC_TO_TICKS(sack->sack_delay) < 1) {
 					sack->sack_delay = TICKS_TO_MSEC(1);
 				}
 			}
 			if (stcb) {
 				if (sack->sack_delay) {
 					stcb->asoc.delayed_ack = sack->sack_delay;
 				}
 				if (sack->sack_freq) {
 					stcb->asoc.sack_freq = sack->sack_freq;
 				}
 				SCTP_TCB_UNLOCK(stcb);
 			} else {
 				if ((inp->sctp_flags & SCTP_PCB_FLAGS_TCPTYPE) ||
 				    (inp->sctp_flags & SCTP_PCB_FLAGS_IN_TCPPOOL) ||
 				    (sack->sack_assoc_id == SCTP_FUTURE_ASSOC) ||
 				    (sack->sack_assoc_id == SCTP_ALL_ASSOC)) {
 					SCTP_INP_WLOCK(inp);
 					if (sack->sack_delay) {
 						inp->sctp_ep.sctp_timeoutticks[SCTP_TIMER_RECV] = MSEC_TO_TICKS(sack->sack_delay);
 					}
 					if (sack->sack_freq) {
 						inp->sctp_ep.sctp_sack_freq = sack->sack_freq;
 					}
 					SCTP_INP_WUNLOCK(inp);
 				}
 				if ((sack->sack_assoc_id == SCTP_CURRENT_ASSOC) ||
 				    (sack->sack_assoc_id == SCTP_ALL_ASSOC)) {
 					SCTP_INP_RLOCK(inp);
 					LIST_FOREACH(stcb, &inp->sctp_asoc_list, sctp_tcblist) {
 						SCTP_TCB_LOCK(stcb);
 						if (sack->sack_delay) {
 							stcb->asoc.delayed_ack = sack->sack_delay;
 						}
 						if (sack->sack_freq) {
 							stcb->asoc.sack_freq = sack->sack_freq;
 						}
 						SCTP_TCB_UNLOCK(stcb);
 					}
 					SCTP_INP_RUNLOCK(inp);
 				}
 			}
 			break;
 		}
 	case SCTP_AUTH_CHUNK:
 		{
 			struct sctp_authchunk *sauth;
 
 			SCTP_CHECK_AND_CAST(sauth, optval, struct sctp_authchunk, optsize);
 
 			SCTP_INP_WLOCK(inp);
 			if (sctp_auth_add_chunk(sauth->sauth_chunk, inp->sctp_ep.local_auth_chunks)) {
 				SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EINVAL);
 				error = EINVAL;
 			}
 			SCTP_INP_WUNLOCK(inp);
 			break;
 		}
 	case SCTP_AUTH_KEY:
 		{
 			struct sctp_authkey *sca;
 			struct sctp_keyhead *shared_keys;
 			sctp_sharedkey_t *shared_key;
 			sctp_key_t *key = NULL;
 			size_t size;
 
 			SCTP_CHECK_AND_CAST(sca, optval, struct sctp_authkey, optsize);
 			if (sca->sca_keylength == 0) {
 				size = optsize - sizeof(struct sctp_authkey);
 			} else {
 				if (sca->sca_keylength + sizeof(struct sctp_authkey) <= optsize) {
 					size = sca->sca_keylength;
 				} else {
 					SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EINVAL);
 					error = EINVAL;
 					break;
 				}
 			}
 			SCTP_FIND_STCB(inp, stcb, sca->sca_assoc_id);
 
 			if (stcb) {
 				shared_keys = &stcb->asoc.shared_keys;
 				/* clear the cached keys for this key id */
 				sctp_clear_cachedkeys(stcb, sca->sca_keynumber);
 				/*
 				 * create the new shared key and
 				 * insert/replace it
 				 */
 				if (size > 0) {
 					key = sctp_set_key(sca->sca_key, (uint32_t)size);
 					if (key == NULL) {
 						SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, ENOMEM);
 						error = ENOMEM;
 						SCTP_TCB_UNLOCK(stcb);
 						break;
 					}
 				}
 				shared_key = sctp_alloc_sharedkey();
 				if (shared_key == NULL) {
 					sctp_free_key(key);
 					SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, ENOMEM);
 					error = ENOMEM;
 					SCTP_TCB_UNLOCK(stcb);
 					break;
 				}
 				shared_key->key = key;
 				shared_key->keyid = sca->sca_keynumber;
 				error = sctp_insert_sharedkey(shared_keys, shared_key);
 				SCTP_TCB_UNLOCK(stcb);
 			} else {
 				if ((inp->sctp_flags & SCTP_PCB_FLAGS_TCPTYPE) ||
 				    (inp->sctp_flags & SCTP_PCB_FLAGS_IN_TCPPOOL) ||
 				    (sca->sca_assoc_id == SCTP_FUTURE_ASSOC) ||
 				    (sca->sca_assoc_id == SCTP_ALL_ASSOC)) {
 					SCTP_INP_WLOCK(inp);
 					shared_keys = &inp->sctp_ep.shared_keys;
 					/*
 					 * clear the cached keys on all
 					 * assocs for this key id
 					 */
 					sctp_clear_cachedkeys_ep(inp, sca->sca_keynumber);
 					/*
 					 * create the new shared key and
 					 * insert/replace it
 					 */
 					if (size > 0) {
 						key = sctp_set_key(sca->sca_key, (uint32_t)size);
 						if (key == NULL) {
 							SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, ENOMEM);
 							error = ENOMEM;
 							SCTP_INP_WUNLOCK(inp);
 							break;
 						}
 					}
 					shared_key = sctp_alloc_sharedkey();
 					if (shared_key == NULL) {
 						sctp_free_key(key);
 						SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, ENOMEM);
 						error = ENOMEM;
 						SCTP_INP_WUNLOCK(inp);
 						break;
 					}
 					shared_key->key = key;
 					shared_key->keyid = sca->sca_keynumber;
 					error = sctp_insert_sharedkey(shared_keys, shared_key);
 					SCTP_INP_WUNLOCK(inp);
 				}
 				if ((sca->sca_assoc_id == SCTP_CURRENT_ASSOC) ||
 				    (sca->sca_assoc_id == SCTP_ALL_ASSOC)) {
 					SCTP_INP_RLOCK(inp);
 					LIST_FOREACH(stcb, &inp->sctp_asoc_list, sctp_tcblist) {
 						SCTP_TCB_LOCK(stcb);
 						shared_keys = &stcb->asoc.shared_keys;
 						/*
 						 * clear the cached keys for
 						 * this key id
 						 */
 						sctp_clear_cachedkeys(stcb, sca->sca_keynumber);
 						/*
 						 * create the new shared key
 						 * and insert/replace it
 						 */
 						if (size > 0) {
 							key = sctp_set_key(sca->sca_key, (uint32_t)size);
 							if (key == NULL) {
 								SCTP_TCB_UNLOCK(stcb);
 								continue;
 							}
 						}
 						shared_key = sctp_alloc_sharedkey();
 						if (shared_key == NULL) {
 							sctp_free_key(key);
 							SCTP_TCB_UNLOCK(stcb);
 							continue;
 						}
 						shared_key->key = key;
 						shared_key->keyid = sca->sca_keynumber;
 						error = sctp_insert_sharedkey(shared_keys, shared_key);
 						SCTP_TCB_UNLOCK(stcb);
 					}
 					SCTP_INP_RUNLOCK(inp);
 				}
 			}
 			break;
 		}
 	case SCTP_HMAC_IDENT:
 		{
 			struct sctp_hmacalgo *shmac;
 			sctp_hmaclist_t *hmaclist;
 			uint16_t hmacid;
 			uint32_t i;
 
 			SCTP_CHECK_AND_CAST(shmac, optval, struct sctp_hmacalgo, optsize);
 			if ((optsize < sizeof(struct sctp_hmacalgo) + shmac->shmac_number_of_idents * sizeof(uint16_t)) ||
 			    (shmac->shmac_number_of_idents > 0xffff)) {
 				SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EINVAL);
 				error = EINVAL;
 				break;
 			}
 			hmaclist = sctp_alloc_hmaclist((uint16_t)shmac->shmac_number_of_idents);
 			if (hmaclist == NULL) {
 				SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, ENOMEM);
 				error = ENOMEM;
 				break;
 			}
 			for (i = 0; i < shmac->shmac_number_of_idents; i++) {
 				hmacid = shmac->shmac_idents[i];
 				if (sctp_auth_add_hmacid(hmaclist, hmacid)) {
 					 /* invalid HMACs were found */ ;
 					SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EINVAL);
 					error = EINVAL;
 					sctp_free_hmaclist(hmaclist);
 					goto sctp_set_hmac_done;
 				}
 			}
 			for (i = 0; i < hmaclist->num_algo; i++) {
 				if (hmaclist->hmac[i] == SCTP_AUTH_HMAC_ID_SHA1) {
 					/* already in list */
 					break;
 				}
 			}
 			if (i == hmaclist->num_algo) {
 				/* not found in list */
 				sctp_free_hmaclist(hmaclist);
 				SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EINVAL);
 				error = EINVAL;
 				break;
 			}
 			/* set it on the endpoint */
 			SCTP_INP_WLOCK(inp);
 			if (inp->sctp_ep.local_hmacs)
 				sctp_free_hmaclist(inp->sctp_ep.local_hmacs);
 			inp->sctp_ep.local_hmacs = hmaclist;
 			SCTP_INP_WUNLOCK(inp);
 	sctp_set_hmac_done:
 			break;
 		}
 	case SCTP_AUTH_ACTIVE_KEY:
 		{
 			struct sctp_authkeyid *scact;
 
 			SCTP_CHECK_AND_CAST(scact, optval, struct sctp_authkeyid, optsize);
 			SCTP_FIND_STCB(inp, stcb, scact->scact_assoc_id);
 
 			/* set the active key on the right place */
 			if (stcb) {
 				/* set the active key on the assoc */
 				if (sctp_auth_setactivekey(stcb,
 				    scact->scact_keynumber)) {
 					SCTP_LTRACE_ERR_RET(inp, NULL, NULL,
 					    SCTP_FROM_SCTP_USRREQ,
 					    EINVAL);
 					error = EINVAL;
 				}
 				SCTP_TCB_UNLOCK(stcb);
 			} else {
 				if ((inp->sctp_flags & SCTP_PCB_FLAGS_TCPTYPE) ||
 				    (inp->sctp_flags & SCTP_PCB_FLAGS_IN_TCPPOOL) ||
 				    (scact->scact_assoc_id == SCTP_FUTURE_ASSOC) ||
 				    (scact->scact_assoc_id == SCTP_ALL_ASSOC)) {
 					SCTP_INP_WLOCK(inp);
 					if (sctp_auth_setactivekey_ep(inp, scact->scact_keynumber)) {
 						SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EINVAL);
 						error = EINVAL;
 					}
 					SCTP_INP_WUNLOCK(inp);
 				}
 				if ((scact->scact_assoc_id == SCTP_CURRENT_ASSOC) ||
 				    (scact->scact_assoc_id == SCTP_ALL_ASSOC)) {
 					SCTP_INP_RLOCK(inp);
 					LIST_FOREACH(stcb, &inp->sctp_asoc_list, sctp_tcblist) {
 						SCTP_TCB_LOCK(stcb);
 						sctp_auth_setactivekey(stcb, scact->scact_keynumber);
 						SCTP_TCB_UNLOCK(stcb);
 					}
 					SCTP_INP_RUNLOCK(inp);
 				}
 			}
 			break;
 		}
 	case SCTP_AUTH_DELETE_KEY:
 		{
 			struct sctp_authkeyid *scdel;
 
 			SCTP_CHECK_AND_CAST(scdel, optval, struct sctp_authkeyid, optsize);
 			SCTP_FIND_STCB(inp, stcb, scdel->scact_assoc_id);
 
 			/* delete the key from the right place */
 			if (stcb) {
 				if (sctp_delete_sharedkey(stcb, scdel->scact_keynumber)) {
 					SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EINVAL);
 					error = EINVAL;
 				}
 				SCTP_TCB_UNLOCK(stcb);
 			} else {
 				if ((inp->sctp_flags & SCTP_PCB_FLAGS_TCPTYPE) ||
 				    (inp->sctp_flags & SCTP_PCB_FLAGS_IN_TCPPOOL) ||
 				    (scdel->scact_assoc_id == SCTP_FUTURE_ASSOC) ||
 				    (scdel->scact_assoc_id == SCTP_ALL_ASSOC)) {
 					SCTP_INP_WLOCK(inp);
 					if (sctp_delete_sharedkey_ep(inp, scdel->scact_keynumber)) {
 						SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EINVAL);
 						error = EINVAL;
 					}
 					SCTP_INP_WUNLOCK(inp);
 				}
 				if ((scdel->scact_assoc_id == SCTP_CURRENT_ASSOC) ||
 				    (scdel->scact_assoc_id == SCTP_ALL_ASSOC)) {
 					SCTP_INP_RLOCK(inp);
 					LIST_FOREACH(stcb, &inp->sctp_asoc_list, sctp_tcblist) {
 						SCTP_TCB_LOCK(stcb);
 						sctp_delete_sharedkey(stcb, scdel->scact_keynumber);
 						SCTP_TCB_UNLOCK(stcb);
 					}
 					SCTP_INP_RUNLOCK(inp);
 				}
 			}
 			break;
 		}
 	case SCTP_AUTH_DEACTIVATE_KEY:
 		{
 			struct sctp_authkeyid *keyid;
 
 			SCTP_CHECK_AND_CAST(keyid, optval, struct sctp_authkeyid, optsize);
 			SCTP_FIND_STCB(inp, stcb, keyid->scact_assoc_id);
 
 			/* deactivate the key from the right place */
 			if (stcb) {
 				if (sctp_deact_sharedkey(stcb, keyid->scact_keynumber)) {
 					SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EINVAL);
 					error = EINVAL;
 				}
 				SCTP_TCB_UNLOCK(stcb);
 			} else {
 				if ((inp->sctp_flags & SCTP_PCB_FLAGS_TCPTYPE) ||
 				    (inp->sctp_flags & SCTP_PCB_FLAGS_IN_TCPPOOL) ||
 				    (keyid->scact_assoc_id == SCTP_FUTURE_ASSOC) ||
 				    (keyid->scact_assoc_id == SCTP_ALL_ASSOC)) {
 					SCTP_INP_WLOCK(inp);
 					if (sctp_deact_sharedkey_ep(inp, keyid->scact_keynumber)) {
 						SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EINVAL);
 						error = EINVAL;
 					}
 					SCTP_INP_WUNLOCK(inp);
 				}
 				if ((keyid->scact_assoc_id == SCTP_CURRENT_ASSOC) ||
 				    (keyid->scact_assoc_id == SCTP_ALL_ASSOC)) {
 					SCTP_INP_RLOCK(inp);
 					LIST_FOREACH(stcb, &inp->sctp_asoc_list, sctp_tcblist) {
 						SCTP_TCB_LOCK(stcb);
 						sctp_deact_sharedkey(stcb, keyid->scact_keynumber);
 						SCTP_TCB_UNLOCK(stcb);
 					}
 					SCTP_INP_RUNLOCK(inp);
 				}
 			}
 			break;
 		}
 	case SCTP_ENABLE_STREAM_RESET:
 		{
 			struct sctp_assoc_value *av;
 
 			SCTP_CHECK_AND_CAST(av, optval, struct sctp_assoc_value, optsize);
 			if (av->assoc_value & (~SCTP_ENABLE_VALUE_MASK)) {
 				SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EINVAL);
 				error = EINVAL;
 				break;
 			}
 			SCTP_FIND_STCB(inp, stcb, av->assoc_id);
 			if (stcb) {
 				stcb->asoc.local_strreset_support = (uint8_t)av->assoc_value;
 				SCTP_TCB_UNLOCK(stcb);
 			} else {
 				if ((inp->sctp_flags & SCTP_PCB_FLAGS_TCPTYPE) ||
 				    (inp->sctp_flags & SCTP_PCB_FLAGS_IN_TCPPOOL) ||
 				    (av->assoc_id == SCTP_FUTURE_ASSOC) ||
 				    (av->assoc_id == SCTP_ALL_ASSOC)) {
 					SCTP_INP_WLOCK(inp);
 					inp->local_strreset_support = (uint8_t)av->assoc_value;
 					SCTP_INP_WUNLOCK(inp);
 				}
 				if ((av->assoc_id == SCTP_CURRENT_ASSOC) ||
 				    (av->assoc_id == SCTP_ALL_ASSOC)) {
 					SCTP_INP_RLOCK(inp);
 					LIST_FOREACH(stcb, &inp->sctp_asoc_list, sctp_tcblist) {
 						SCTP_TCB_LOCK(stcb);
 						stcb->asoc.local_strreset_support = (uint8_t)av->assoc_value;
 						SCTP_TCB_UNLOCK(stcb);
 					}
 					SCTP_INP_RUNLOCK(inp);
 				}
 			}
 			break;
 		}
 	case SCTP_RESET_STREAMS:
 		{
 			struct sctp_reset_streams *strrst;
 			int i, send_out = 0;
 			int send_in = 0;
 
 			SCTP_CHECK_AND_CAST(strrst, optval, struct sctp_reset_streams, optsize);
 			SCTP_FIND_STCB(inp, stcb, strrst->srs_assoc_id);
 			if (stcb == NULL) {
 				SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, ENOENT);
 				error = ENOENT;
 				break;
 			}
 			if (stcb->asoc.reconfig_supported == 0) {
 				/*
 				 * Peer does not support the chunk type.
 				 */
 				SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EOPNOTSUPP);
 				error = EOPNOTSUPP;
 				SCTP_TCB_UNLOCK(stcb);
 				break;
 			}
 			if (sizeof(struct sctp_reset_streams) +
 			    strrst->srs_number_streams * sizeof(uint16_t) > optsize) {
 				error = EINVAL;
 				SCTP_TCB_UNLOCK(stcb);
 				break;
 			}
 			if (strrst->srs_flags & SCTP_STREAM_RESET_INCOMING) {
 				send_in = 1;
 				if (stcb->asoc.stream_reset_outstanding) {
 					SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EALREADY);
 					error = EALREADY;
 					SCTP_TCB_UNLOCK(stcb);
 					break;
 				}
 			}
 			if (strrst->srs_flags & SCTP_STREAM_RESET_OUTGOING) {
 				send_out = 1;
 			}
 			if ((strrst->srs_number_streams > SCTP_MAX_STREAMS_AT_ONCE_RESET) && send_in) {
 				SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, ENOMEM);
 				error = ENOMEM;
 				SCTP_TCB_UNLOCK(stcb);
 				break;
 			}
 			if ((send_in == 0) && (send_out == 0)) {
 				SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EINVAL);
 				error = EINVAL;
 				SCTP_TCB_UNLOCK(stcb);
 				break;
 			}
 			for (i = 0; i < strrst->srs_number_streams; i++) {
 				if ((send_in) &&
 				    (strrst->srs_stream_list[i] > stcb->asoc.streamincnt)) {
 					SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EINVAL);
 					error = EINVAL;
 					break;
 				}
 				if ((send_out) &&
 				    (strrst->srs_stream_list[i] > stcb->asoc.streamoutcnt)) {
 					SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EINVAL);
 					error = EINVAL;
 					break;
 				}
 			}
 			if (error) {
 				SCTP_TCB_UNLOCK(stcb);
 				break;
 			}
 			if (send_out) {
 				int cnt;
 				uint16_t strm;
 
 				if (strrst->srs_number_streams) {
 					for (i = 0, cnt = 0; i < strrst->srs_number_streams; i++) {
 						strm = strrst->srs_stream_list[i];
 						if (stcb->asoc.strmout[strm].state == SCTP_STREAM_OPEN) {
 							stcb->asoc.strmout[strm].state = SCTP_STREAM_RESET_PENDING;
 							cnt++;
 						}
 					}
 				} else {
 					/* Its all */
 					for (i = 0, cnt = 0; i < stcb->asoc.streamoutcnt; i++) {
 						if (stcb->asoc.strmout[i].state == SCTP_STREAM_OPEN) {
 							stcb->asoc.strmout[i].state = SCTP_STREAM_RESET_PENDING;
 							cnt++;
 						}
 					}
 				}
 			}
 			if (send_in) {
 				error = sctp_send_str_reset_req(stcb, strrst->srs_number_streams,
 				    strrst->srs_stream_list,
 				    send_in, 0, 0, 0, 0, 0);
 			} else {
 				error = sctp_send_stream_reset_out_if_possible(stcb, SCTP_SO_LOCKED);
 			}
 			if (error == 0) {
 				sctp_chunk_output(inp, stcb, SCTP_OUTPUT_FROM_STRRST_REQ, SCTP_SO_LOCKED);
 			} else {
 				/*
 				 * For outgoing streams don't report any
 				 * problems in sending the request to the
 				 * application. XXX: Double check resetting
 				 * incoming streams.
 				 */
 				error = 0;
 			}
 			SCTP_TCB_UNLOCK(stcb);
 			break;
 		}
 	case SCTP_ADD_STREAMS:
 		{
 			struct sctp_add_streams *stradd;
 			uint8_t addstream = 0;
 			uint16_t add_o_strmcnt = 0;
 			uint16_t add_i_strmcnt = 0;
 
 			SCTP_CHECK_AND_CAST(stradd, optval, struct sctp_add_streams, optsize);
 			SCTP_FIND_STCB(inp, stcb, stradd->sas_assoc_id);
 			if (stcb == NULL) {
 				SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, ENOENT);
 				error = ENOENT;
 				break;
 			}
 			if (stcb->asoc.reconfig_supported == 0) {
 				/*
 				 * Peer does not support the chunk type.
 				 */
 				SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EOPNOTSUPP);
 				error = EOPNOTSUPP;
 				SCTP_TCB_UNLOCK(stcb);
 				break;
 			}
 			if (stcb->asoc.stream_reset_outstanding) {
 				SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EALREADY);
 				error = EALREADY;
 				SCTP_TCB_UNLOCK(stcb);
 				break;
 			}
 			if ((stradd->sas_outstrms == 0) &&
 			    (stradd->sas_instrms == 0)) {
 				error = EINVAL;
 				goto skip_stuff;
 			}
 			if (stradd->sas_outstrms) {
 				addstream = 1;
 				/* We allocate here */
 				add_o_strmcnt = stradd->sas_outstrms;
 				if ((((int)add_o_strmcnt) + ((int)stcb->asoc.streamoutcnt)) > 0x0000ffff) {
 					/* You can't have more than 64k */
 					error = EINVAL;
 					goto skip_stuff;
 				}
 			}
 			if (stradd->sas_instrms) {
 				int cnt;
 
 				addstream |= 2;
 				/*
 				 * We allocate inside
 				 * sctp_send_str_reset_req()
 				 */
 				add_i_strmcnt = stradd->sas_instrms;
 				cnt = add_i_strmcnt;
 				cnt += stcb->asoc.streamincnt;
 				if (cnt > 0x0000ffff) {
 					/* You can't have more than 64k */
 					error = EINVAL;
 					goto skip_stuff;
 				}
 				if (cnt > (int)stcb->asoc.max_inbound_streams) {
 					/* More than you are allowed */
 					error = EINVAL;
 					goto skip_stuff;
 				}
 			}
 			error = sctp_send_str_reset_req(stcb, 0, NULL, 0, 0, addstream, add_o_strmcnt, add_i_strmcnt, 0);
 			sctp_chunk_output(inp, stcb, SCTP_OUTPUT_FROM_STRRST_REQ, SCTP_SO_LOCKED);
 	skip_stuff:
 			SCTP_TCB_UNLOCK(stcb);
 			break;
 		}
 	case SCTP_RESET_ASSOC:
 		{
 			int i;
 			uint32_t *value;
 
 			SCTP_CHECK_AND_CAST(value, optval, uint32_t, optsize);
 			SCTP_FIND_STCB(inp, stcb, (sctp_assoc_t)*value);
 			if (stcb == NULL) {
 				SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, ENOENT);
 				error = ENOENT;
 				break;
 			}
 			if (stcb->asoc.reconfig_supported == 0) {
 				/*
 				 * Peer does not support the chunk type.
 				 */
 				SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EOPNOTSUPP);
 				error = EOPNOTSUPP;
 				SCTP_TCB_UNLOCK(stcb);
 				break;
 			}
 			if (stcb->asoc.stream_reset_outstanding) {
 				SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EALREADY);
 				error = EALREADY;
 				SCTP_TCB_UNLOCK(stcb);
 				break;
 			}
 			/*
 			 * Is there any data pending in the send or sent
 			 * queues?
 			 */
 			if (!TAILQ_EMPTY(&stcb->asoc.send_queue) ||
 			    !TAILQ_EMPTY(&stcb->asoc.sent_queue)) {
 		busy_out:
 				error = EBUSY;
 				SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, error);
 				SCTP_TCB_UNLOCK(stcb);
 				break;
 			}
 			/* Do any streams have data queued? */
 			for (i = 0; i < stcb->asoc.streamoutcnt; i++) {
 				if (!TAILQ_EMPTY(&stcb->asoc.strmout[i].outqueue)) {
 					goto busy_out;
 				}
 			}
 			error = sctp_send_str_reset_req(stcb, 0, NULL, 0, 1, 0, 0, 0, 0);
 			sctp_chunk_output(inp, stcb, SCTP_OUTPUT_FROM_STRRST_REQ, SCTP_SO_LOCKED);
 			SCTP_TCB_UNLOCK(stcb);
 			break;
 		}
 	case SCTP_CONNECT_X:
 		if (optsize < (sizeof(int) + sizeof(struct sockaddr_in))) {
 			SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EINVAL);
 			error = EINVAL;
 			break;
 		}
 		error = sctp_do_connect_x(so, inp, optval, optsize, p, 0);
 		break;
 	case SCTP_CONNECT_X_DELAYED:
 		if (optsize < (sizeof(int) + sizeof(struct sockaddr_in))) {
 			SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EINVAL);
 			error = EINVAL;
 			break;
 		}
 		error = sctp_do_connect_x(so, inp, optval, optsize, p, 1);
 		break;
 	case SCTP_CONNECT_X_COMPLETE:
 		{
 			struct sockaddr *sa;
 
 			/* FIXME MT: check correct? */
 			SCTP_CHECK_AND_CAST(sa, optval, struct sockaddr, optsize);
 
 			/* find tcb */
 			if (inp->sctp_flags & SCTP_PCB_FLAGS_CONNECTED) {
 				SCTP_INP_RLOCK(inp);
 				stcb = LIST_FIRST(&inp->sctp_asoc_list);
 				if (stcb) {
 					SCTP_TCB_LOCK(stcb);
 				}
 				SCTP_INP_RUNLOCK(inp);
 			} else {
 				/*
 				 * We increment here since
 				 * sctp_findassociation_ep_addr() wil do a
 				 * decrement if it finds the stcb as long as
 				 * the locked tcb (last argument) is NOT a
 				 * TCB.. aka NULL.
 				 */
 				SCTP_INP_INCR_REF(inp);
 				stcb = sctp_findassociation_ep_addr(&inp, sa, NULL, NULL, NULL);
 				if (stcb == NULL) {
 					SCTP_INP_DECR_REF(inp);
 				}
 			}
 
 			if (stcb == NULL) {
 				SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, ENOENT);
 				error = ENOENT;
 				break;
 			}
 			if (stcb->asoc.delayed_connection == 1) {
 				stcb->asoc.delayed_connection = 0;
 				(void)SCTP_GETTIME_TIMEVAL(&stcb->asoc.time_entered);
 				sctp_timer_stop(SCTP_TIMER_TYPE_INIT, inp, stcb,
 				    stcb->asoc.primary_destination,
 				    SCTP_FROM_SCTP_USRREQ + SCTP_LOC_8);
 				sctp_send_initiate(inp, stcb, SCTP_SO_LOCKED);
 			} else {
 				/*
 				 * already expired or did not use delayed
 				 * connectx
 				 */
 				SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EALREADY);
 				error = EALREADY;
 			}
 			SCTP_TCB_UNLOCK(stcb);
 			break;
 		}
 	case SCTP_MAX_BURST:
 		{
 			struct sctp_assoc_value *av;
 
 			SCTP_CHECK_AND_CAST(av, optval, struct sctp_assoc_value, optsize);
 			SCTP_FIND_STCB(inp, stcb, av->assoc_id);
 
 			if (stcb) {
 				stcb->asoc.max_burst = av->assoc_value;
 				SCTP_TCB_UNLOCK(stcb);
 			} else {
 				if ((inp->sctp_flags & SCTP_PCB_FLAGS_TCPTYPE) ||
 				    (inp->sctp_flags & SCTP_PCB_FLAGS_IN_TCPPOOL) ||
 				    (av->assoc_id == SCTP_FUTURE_ASSOC) ||
 				    (av->assoc_id == SCTP_ALL_ASSOC)) {
 					SCTP_INP_WLOCK(inp);
 					inp->sctp_ep.max_burst = av->assoc_value;
 					SCTP_INP_WUNLOCK(inp);
 				}
 				if ((av->assoc_id == SCTP_CURRENT_ASSOC) ||
 				    (av->assoc_id == SCTP_ALL_ASSOC)) {
 					SCTP_INP_RLOCK(inp);
 					LIST_FOREACH(stcb, &inp->sctp_asoc_list, sctp_tcblist) {
 						SCTP_TCB_LOCK(stcb);
 						stcb->asoc.max_burst = av->assoc_value;
 						SCTP_TCB_UNLOCK(stcb);
 					}
 					SCTP_INP_RUNLOCK(inp);
 				}
 			}
 			break;
 		}
 	case SCTP_MAXSEG:
 		{
 			struct sctp_assoc_value *av;
 			int ovh;
 
 			SCTP_CHECK_AND_CAST(av, optval, struct sctp_assoc_value, optsize);
 			SCTP_FIND_STCB(inp, stcb, av->assoc_id);
 
 			if (inp->sctp_flags & SCTP_PCB_FLAGS_BOUND_V6) {
 				ovh = SCTP_MED_OVERHEAD;
 			} else {
 				ovh = SCTP_MED_V4_OVERHEAD;
 			}
 			if (stcb) {
 				if (av->assoc_value) {
 					stcb->asoc.sctp_frag_point = (av->assoc_value + ovh);
 				} else {
 					stcb->asoc.sctp_frag_point = SCTP_DEFAULT_MAXSEGMENT;
 				}
 				SCTP_TCB_UNLOCK(stcb);
 			} else {
 				if ((inp->sctp_flags & SCTP_PCB_FLAGS_TCPTYPE) ||
 				    (inp->sctp_flags & SCTP_PCB_FLAGS_IN_TCPPOOL) ||
 				    (av->assoc_id == SCTP_FUTURE_ASSOC)) {
 					SCTP_INP_WLOCK(inp);
 					/*
 					 * FIXME MT: I think this is not in
 					 * tune with the API ID
 					 */
 					if (av->assoc_value) {
 						inp->sctp_frag_point = (av->assoc_value + ovh);
 					} else {
 						inp->sctp_frag_point = SCTP_DEFAULT_MAXSEGMENT;
 					}
 					SCTP_INP_WUNLOCK(inp);
 				} else {
 					SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EINVAL);
 					error = EINVAL;
 				}
 			}
 			break;
 		}
 	case SCTP_EVENTS:
 		{
 			struct sctp_event_subscribe *events;
 
 			SCTP_CHECK_AND_CAST(events, optval, struct sctp_event_subscribe, optsize);
 
 			SCTP_INP_WLOCK(inp);
 			if (events->sctp_data_io_event) {
 				sctp_feature_on(inp, SCTP_PCB_FLAGS_RECVDATAIOEVNT);
 			} else {
 				sctp_feature_off(inp, SCTP_PCB_FLAGS_RECVDATAIOEVNT);
 			}
 
 			if (events->sctp_association_event) {
 				sctp_feature_on(inp, SCTP_PCB_FLAGS_RECVASSOCEVNT);
 			} else {
 				sctp_feature_off(inp, SCTP_PCB_FLAGS_RECVASSOCEVNT);
 			}
 
 			if (events->sctp_address_event) {
 				sctp_feature_on(inp, SCTP_PCB_FLAGS_RECVPADDREVNT);
 			} else {
 				sctp_feature_off(inp, SCTP_PCB_FLAGS_RECVPADDREVNT);
 			}
 
 			if (events->sctp_send_failure_event) {
 				sctp_feature_on(inp, SCTP_PCB_FLAGS_RECVSENDFAILEVNT);
 			} else {
 				sctp_feature_off(inp, SCTP_PCB_FLAGS_RECVSENDFAILEVNT);
 			}
 
 			if (events->sctp_peer_error_event) {
 				sctp_feature_on(inp, SCTP_PCB_FLAGS_RECVPEERERR);
 			} else {
 				sctp_feature_off(inp, SCTP_PCB_FLAGS_RECVPEERERR);
 			}
 
 			if (events->sctp_shutdown_event) {
 				sctp_feature_on(inp, SCTP_PCB_FLAGS_RECVSHUTDOWNEVNT);
 			} else {
 				sctp_feature_off(inp, SCTP_PCB_FLAGS_RECVSHUTDOWNEVNT);
 			}
 
 			if (events->sctp_partial_delivery_event) {
 				sctp_feature_on(inp, SCTP_PCB_FLAGS_PDAPIEVNT);
 			} else {
 				sctp_feature_off(inp, SCTP_PCB_FLAGS_PDAPIEVNT);
 			}
 
 			if (events->sctp_adaptation_layer_event) {
 				sctp_feature_on(inp, SCTP_PCB_FLAGS_ADAPTATIONEVNT);
 			} else {
 				sctp_feature_off(inp, SCTP_PCB_FLAGS_ADAPTATIONEVNT);
 			}
 
 			if (events->sctp_authentication_event) {
 				sctp_feature_on(inp, SCTP_PCB_FLAGS_AUTHEVNT);
 			} else {
 				sctp_feature_off(inp, SCTP_PCB_FLAGS_AUTHEVNT);
 			}
 
 			if (events->sctp_sender_dry_event) {
 				sctp_feature_on(inp, SCTP_PCB_FLAGS_DRYEVNT);
 			} else {
 				sctp_feature_off(inp, SCTP_PCB_FLAGS_DRYEVNT);
 			}
 
 			if (events->sctp_stream_reset_event) {
 				sctp_feature_on(inp, SCTP_PCB_FLAGS_STREAM_RESETEVNT);
 			} else {
 				sctp_feature_off(inp, SCTP_PCB_FLAGS_STREAM_RESETEVNT);
 			}
 			SCTP_INP_WUNLOCK(inp);
 
 			SCTP_INP_RLOCK(inp);
 			LIST_FOREACH(stcb, &inp->sctp_asoc_list, sctp_tcblist) {
 				SCTP_TCB_LOCK(stcb);
 				if (events->sctp_association_event) {
 					sctp_stcb_feature_on(inp, stcb, SCTP_PCB_FLAGS_RECVASSOCEVNT);
 				} else {
 					sctp_stcb_feature_off(inp, stcb, SCTP_PCB_FLAGS_RECVASSOCEVNT);
 				}
 				if (events->sctp_address_event) {
 					sctp_stcb_feature_on(inp, stcb, SCTP_PCB_FLAGS_RECVPADDREVNT);
 				} else {
 					sctp_stcb_feature_off(inp, stcb, SCTP_PCB_FLAGS_RECVPADDREVNT);
 				}
 				if (events->sctp_send_failure_event) {
 					sctp_stcb_feature_on(inp, stcb, SCTP_PCB_FLAGS_RECVSENDFAILEVNT);
 				} else {
 					sctp_stcb_feature_off(inp, stcb, SCTP_PCB_FLAGS_RECVSENDFAILEVNT);
 				}
 				if (events->sctp_peer_error_event) {
 					sctp_stcb_feature_on(inp, stcb, SCTP_PCB_FLAGS_RECVPEERERR);
 				} else {
 					sctp_stcb_feature_off(inp, stcb, SCTP_PCB_FLAGS_RECVPEERERR);
 				}
 				if (events->sctp_shutdown_event) {
 					sctp_stcb_feature_on(inp, stcb, SCTP_PCB_FLAGS_RECVSHUTDOWNEVNT);
 				} else {
 					sctp_stcb_feature_off(inp, stcb, SCTP_PCB_FLAGS_RECVSHUTDOWNEVNT);
 				}
 				if (events->sctp_partial_delivery_event) {
 					sctp_stcb_feature_on(inp, stcb, SCTP_PCB_FLAGS_PDAPIEVNT);
 				} else {
 					sctp_stcb_feature_off(inp, stcb, SCTP_PCB_FLAGS_PDAPIEVNT);
 				}
 				if (events->sctp_adaptation_layer_event) {
 					sctp_stcb_feature_on(inp, stcb, SCTP_PCB_FLAGS_ADAPTATIONEVNT);
 				} else {
 					sctp_stcb_feature_off(inp, stcb, SCTP_PCB_FLAGS_ADAPTATIONEVNT);
 				}
 				if (events->sctp_authentication_event) {
 					sctp_stcb_feature_on(inp, stcb, SCTP_PCB_FLAGS_AUTHEVNT);
 				} else {
 					sctp_stcb_feature_off(inp, stcb, SCTP_PCB_FLAGS_AUTHEVNT);
 				}
 				if (events->sctp_sender_dry_event) {
 					sctp_stcb_feature_on(inp, stcb, SCTP_PCB_FLAGS_DRYEVNT);
 				} else {
 					sctp_stcb_feature_off(inp, stcb, SCTP_PCB_FLAGS_DRYEVNT);
 				}
 				if (events->sctp_stream_reset_event) {
 					sctp_stcb_feature_on(inp, stcb, SCTP_PCB_FLAGS_STREAM_RESETEVNT);
 				} else {
 					sctp_stcb_feature_off(inp, stcb, SCTP_PCB_FLAGS_STREAM_RESETEVNT);
 				}
 				SCTP_TCB_UNLOCK(stcb);
 			}
 			/*
 			 * Send up the sender dry event only for 1-to-1
 			 * style sockets.
 			 */
 			if (events->sctp_sender_dry_event) {
 				if ((inp->sctp_flags & SCTP_PCB_FLAGS_TCPTYPE) ||
 				    (inp->sctp_flags & SCTP_PCB_FLAGS_IN_TCPPOOL)) {
 					stcb = LIST_FIRST(&inp->sctp_asoc_list);
 					if (stcb) {
 						SCTP_TCB_LOCK(stcb);
 						if (TAILQ_EMPTY(&stcb->asoc.send_queue) &&
 						    TAILQ_EMPTY(&stcb->asoc.sent_queue) &&
 						    (stcb->asoc.stream_queue_cnt == 0)) {
 							sctp_ulp_notify(SCTP_NOTIFY_SENDER_DRY, stcb, 0, NULL, SCTP_SO_LOCKED);
 						}
 						SCTP_TCB_UNLOCK(stcb);
 					}
 				}
 			}
 			SCTP_INP_RUNLOCK(inp);
 			break;
 		}
 	case SCTP_ADAPTATION_LAYER:
 		{
 			struct sctp_setadaptation *adap_bits;
 
 			SCTP_CHECK_AND_CAST(adap_bits, optval, struct sctp_setadaptation, optsize);
 			SCTP_INP_WLOCK(inp);
 			inp->sctp_ep.adaptation_layer_indicator = adap_bits->ssb_adaptation_ind;
 			inp->sctp_ep.adaptation_layer_indicator_provided = 1;
 			SCTP_INP_WUNLOCK(inp);
 			break;
 		}
 #ifdef SCTP_DEBUG
 	case SCTP_SET_INITIAL_DBG_SEQ:
 		{
 			uint32_t *vvv;
 
 			SCTP_CHECK_AND_CAST(vvv, optval, uint32_t, optsize);
 			SCTP_INP_WLOCK(inp);
 			inp->sctp_ep.initial_sequence_debug = *vvv;
 			SCTP_INP_WUNLOCK(inp);
 			break;
 		}
 #endif
 	case SCTP_DEFAULT_SEND_PARAM:
 		{
 			struct sctp_sndrcvinfo *s_info;
 
 			SCTP_CHECK_AND_CAST(s_info, optval, struct sctp_sndrcvinfo, optsize);
 			SCTP_FIND_STCB(inp, stcb, s_info->sinfo_assoc_id);
 
 			if (stcb) {
 				if (s_info->sinfo_stream < stcb->asoc.streamoutcnt) {
 					memcpy(&stcb->asoc.def_send, s_info, min(optsize, sizeof(stcb->asoc.def_send)));
 				} else {
 					SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EINVAL);
 					error = EINVAL;
 				}
 				SCTP_TCB_UNLOCK(stcb);
 			} else {
 				if ((inp->sctp_flags & SCTP_PCB_FLAGS_TCPTYPE) ||
 				    (inp->sctp_flags & SCTP_PCB_FLAGS_IN_TCPPOOL) ||
 				    (s_info->sinfo_assoc_id == SCTP_FUTURE_ASSOC) ||
 				    (s_info->sinfo_assoc_id == SCTP_ALL_ASSOC)) {
 					SCTP_INP_WLOCK(inp);
 					memcpy(&inp->def_send, s_info, min(optsize, sizeof(inp->def_send)));
 					SCTP_INP_WUNLOCK(inp);
 				}
 				if ((s_info->sinfo_assoc_id == SCTP_CURRENT_ASSOC) ||
 				    (s_info->sinfo_assoc_id == SCTP_ALL_ASSOC)) {
 					SCTP_INP_RLOCK(inp);
 					LIST_FOREACH(stcb, &inp->sctp_asoc_list, sctp_tcblist) {
 						SCTP_TCB_LOCK(stcb);
 						if (s_info->sinfo_stream < stcb->asoc.streamoutcnt) {
 							memcpy(&stcb->asoc.def_send, s_info, min(optsize, sizeof(stcb->asoc.def_send)));
 						}
 						SCTP_TCB_UNLOCK(stcb);
 					}
 					SCTP_INP_RUNLOCK(inp);
 				}
 			}
 			break;
 		}
 	case SCTP_PEER_ADDR_PARAMS:
 		{
 			struct sctp_paddrparams *paddrp;
 			struct sctp_nets *net;
 			struct sockaddr *addr;
 #if defined(INET) && defined(INET6)
 			struct sockaddr_in sin_store;
 #endif
 
 			SCTP_CHECK_AND_CAST(paddrp, optval, struct sctp_paddrparams, optsize);
 			SCTP_FIND_STCB(inp, stcb, paddrp->spp_assoc_id);
 
 #if defined(INET) && defined(INET6)
 			if (paddrp->spp_address.ss_family == AF_INET6) {
 				struct sockaddr_in6 *sin6;
 
 				sin6 = (struct sockaddr_in6 *)&paddrp->spp_address;
 				if (IN6_IS_ADDR_V4MAPPED(&sin6->sin6_addr)) {
 					in6_sin6_2_sin(&sin_store, sin6);
 					addr = (struct sockaddr *)&sin_store;
 				} else {
 					addr = (struct sockaddr *)&paddrp->spp_address;
 				}
 			} else {
 				addr = (struct sockaddr *)&paddrp->spp_address;
 			}
 #else
 			addr = (struct sockaddr *)&paddrp->spp_address;
 #endif
 			if (stcb != NULL) {
 				net = sctp_findnet(stcb, addr);
 			} else {
 				/*
 				 * We increment here since
 				 * sctp_findassociation_ep_addr() wil do a
 				 * decrement if it finds the stcb as long as
 				 * the locked tcb (last argument) is NOT a
 				 * TCB.. aka NULL.
 				 */
 				net = NULL;
 				SCTP_INP_INCR_REF(inp);
 				stcb = sctp_findassociation_ep_addr(&inp, addr,
 				    &net, NULL, NULL);
 				if (stcb == NULL) {
 					SCTP_INP_DECR_REF(inp);
 				}
 			}
 			if ((stcb != NULL) && (net == NULL)) {
 #ifdef INET
 				if (addr->sa_family == AF_INET) {
 
 					struct sockaddr_in *sin;
 
 					sin = (struct sockaddr_in *)addr;
 					if (sin->sin_addr.s_addr != INADDR_ANY) {
 						SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EINVAL);
 						SCTP_TCB_UNLOCK(stcb);
 						error = EINVAL;
 						break;
 					}
 				} else
 #endif
 #ifdef INET6
 				if (addr->sa_family == AF_INET6) {
 					struct sockaddr_in6 *sin6;
 
 					sin6 = (struct sockaddr_in6 *)addr;
 					if (!IN6_IS_ADDR_UNSPECIFIED(&sin6->sin6_addr)) {
 						SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EINVAL);
 						SCTP_TCB_UNLOCK(stcb);
 						error = EINVAL;
 						break;
 					}
 				} else
 #endif
 				{
 					error = EAFNOSUPPORT;
 					SCTP_TCB_UNLOCK(stcb);
 					SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, error);
 					break;
 				}
 			}
 			/* sanity checks */
 			if ((paddrp->spp_flags & SPP_HB_ENABLE) && (paddrp->spp_flags & SPP_HB_DISABLE)) {
 				if (stcb)
 					SCTP_TCB_UNLOCK(stcb);
 				SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EINVAL);
 				return (EINVAL);
 			}
 			if ((paddrp->spp_flags & SPP_PMTUD_ENABLE) && (paddrp->spp_flags & SPP_PMTUD_DISABLE)) {
 				if (stcb)
 					SCTP_TCB_UNLOCK(stcb);
 				SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EINVAL);
 				return (EINVAL);
 			}
 			if (stcb != NULL) {
 				/************************TCB SPECIFIC SET ******************/
 				if (net != NULL) {
 					/************************NET SPECIFIC SET ******************/
 					if (paddrp->spp_flags & SPP_HB_DISABLE) {
 						if (!(net->dest_state & SCTP_ADDR_UNCONFIRMED) &&
 						    !(net->dest_state & SCTP_ADDR_NOHB)) {
 							sctp_timer_stop(SCTP_TIMER_TYPE_HEARTBEAT, inp, stcb, net,
 							    SCTP_FROM_SCTP_USRREQ + SCTP_LOC_9);
 						}
 						net->dest_state |= SCTP_ADDR_NOHB;
 					}
 					if (paddrp->spp_flags & SPP_HB_ENABLE) {
 						if (paddrp->spp_hbinterval) {
 							net->heart_beat_delay = paddrp->spp_hbinterval;
 						} else if (paddrp->spp_flags & SPP_HB_TIME_IS_ZERO) {
 							net->heart_beat_delay = 0;
 						}
 						sctp_timer_stop(SCTP_TIMER_TYPE_HEARTBEAT, inp, stcb, net,
 						    SCTP_FROM_SCTP_USRREQ + SCTP_LOC_10);
 						sctp_timer_start(SCTP_TIMER_TYPE_HEARTBEAT, inp, stcb, net);
 						net->dest_state &= ~SCTP_ADDR_NOHB;
 					}
 					if (paddrp->spp_flags & SPP_HB_DEMAND) {
 						/* on demand HB */
 						sctp_send_hb(stcb, net, SCTP_SO_LOCKED);
 						sctp_chunk_output(inp, stcb, SCTP_OUTPUT_FROM_SOCKOPT, SCTP_SO_LOCKED);
 						sctp_timer_start(SCTP_TIMER_TYPE_HEARTBEAT, inp, stcb, net);
 					}
 					if ((paddrp->spp_flags & SPP_PMTUD_DISABLE) && (paddrp->spp_pathmtu >= SCTP_SMALLEST_PMTU)) {
 						if (SCTP_OS_TIMER_PENDING(&net->pmtu_timer.timer)) {
 							sctp_timer_stop(SCTP_TIMER_TYPE_PATHMTURAISE, inp, stcb, net,
 							    SCTP_FROM_SCTP_USRREQ + SCTP_LOC_11);
 						}
 						net->dest_state |= SCTP_ADDR_NO_PMTUD;
 						net->mtu = paddrp->spp_pathmtu;
 						switch (net->ro._l_addr.sa.sa_family) {
 #ifdef INET
 						case AF_INET:
 							net->mtu += SCTP_MIN_V4_OVERHEAD;
 							break;
 #endif
 #ifdef INET6
 						case AF_INET6:
 							net->mtu += SCTP_MIN_OVERHEAD;
 							break;
 #endif
 						default:
 							break;
 						}
 						if (net->mtu < stcb->asoc.smallest_mtu) {
 							sctp_pathmtu_adjustment(stcb, net->mtu);
 						}
 					}
 					if (paddrp->spp_flags & SPP_PMTUD_ENABLE) {
 						if (!SCTP_OS_TIMER_PENDING(&net->pmtu_timer.timer)) {
 							sctp_timer_start(SCTP_TIMER_TYPE_PATHMTURAISE, inp, stcb, net);
 						}
 						net->dest_state &= ~SCTP_ADDR_NO_PMTUD;
 					}
 					if (paddrp->spp_pathmaxrxt) {
 						if (net->dest_state & SCTP_ADDR_PF) {
 							if (net->error_count > paddrp->spp_pathmaxrxt) {
 								net->dest_state &= ~SCTP_ADDR_PF;
 							}
 						} else {
 							if ((net->error_count <= paddrp->spp_pathmaxrxt) &&
 							    (net->error_count > net->pf_threshold)) {
 								net->dest_state |= SCTP_ADDR_PF;
 								sctp_send_hb(stcb, net, SCTP_SO_LOCKED);
 								sctp_timer_stop(SCTP_TIMER_TYPE_HEARTBEAT,
 								    stcb->sctp_ep, stcb, net,
 								    SCTP_FROM_SCTP_USRREQ + SCTP_LOC_12);
 								sctp_timer_start(SCTP_TIMER_TYPE_HEARTBEAT, stcb->sctp_ep, stcb, net);
 							}
 						}
 						if (net->dest_state & SCTP_ADDR_REACHABLE) {
 							if (net->error_count > paddrp->spp_pathmaxrxt) {
 								net->dest_state &= ~SCTP_ADDR_REACHABLE;
 								sctp_ulp_notify(SCTP_NOTIFY_INTERFACE_DOWN, stcb, 0, net, SCTP_SO_LOCKED);
 							}
 						} else {
 							if (net->error_count <= paddrp->spp_pathmaxrxt) {
 								net->dest_state |= SCTP_ADDR_REACHABLE;
 								sctp_ulp_notify(SCTP_NOTIFY_INTERFACE_UP, stcb, 0, net, SCTP_SO_LOCKED);
 							}
 						}
 						net->failure_threshold = paddrp->spp_pathmaxrxt;
 					}
 					if (paddrp->spp_flags & SPP_DSCP) {
 						net->dscp = paddrp->spp_dscp & 0xfc;
 						net->dscp |= 0x01;
 					}
 #ifdef INET6
 					if (paddrp->spp_flags & SPP_IPV6_FLOWLABEL) {
 						if (net->ro._l_addr.sa.sa_family == AF_INET6) {
 							net->flowlabel = paddrp->spp_ipv6_flowlabel & 0x000fffff;
 							net->flowlabel |= 0x80000000;
 						}
 					}
 #endif
 				} else {
 					/************************ASSOC ONLY -- NO NET SPECIFIC SET ******************/
 					if (paddrp->spp_pathmaxrxt != 0) {
 						stcb->asoc.def_net_failure = paddrp->spp_pathmaxrxt;
 						TAILQ_FOREACH(net, &stcb->asoc.nets, sctp_next) {
 							if (net->dest_state & SCTP_ADDR_PF) {
 								if (net->error_count > paddrp->spp_pathmaxrxt) {
 									net->dest_state &= ~SCTP_ADDR_PF;
 								}
 							} else {
 								if ((net->error_count <= paddrp->spp_pathmaxrxt) &&
 								    (net->error_count > net->pf_threshold)) {
 									net->dest_state |= SCTP_ADDR_PF;
 									sctp_send_hb(stcb, net, SCTP_SO_LOCKED);
 									sctp_timer_stop(SCTP_TIMER_TYPE_HEARTBEAT,
 									    stcb->sctp_ep, stcb, net,
 									    SCTP_FROM_SCTP_USRREQ + SCTP_LOC_13);
 									sctp_timer_start(SCTP_TIMER_TYPE_HEARTBEAT, stcb->sctp_ep, stcb, net);
 								}
 							}
 							if (net->dest_state & SCTP_ADDR_REACHABLE) {
 								if (net->error_count > paddrp->spp_pathmaxrxt) {
 									net->dest_state &= ~SCTP_ADDR_REACHABLE;
 									sctp_ulp_notify(SCTP_NOTIFY_INTERFACE_DOWN, stcb, 0, net, SCTP_SO_LOCKED);
 								}
 							} else {
 								if (net->error_count <= paddrp->spp_pathmaxrxt) {
 									net->dest_state |= SCTP_ADDR_REACHABLE;
 									sctp_ulp_notify(SCTP_NOTIFY_INTERFACE_UP, stcb, 0, net, SCTP_SO_LOCKED);
 								}
 							}
 							net->failure_threshold = paddrp->spp_pathmaxrxt;
 						}
 					}
 					if (paddrp->spp_flags & SPP_HB_ENABLE) {
 						if (paddrp->spp_hbinterval != 0) {
 							stcb->asoc.heart_beat_delay = paddrp->spp_hbinterval;
 						} else if (paddrp->spp_flags & SPP_HB_TIME_IS_ZERO) {
 							stcb->asoc.heart_beat_delay = 0;
 						}
 						/* Turn back on the timer */
 						TAILQ_FOREACH(net, &stcb->asoc.nets, sctp_next) {
 							if (paddrp->spp_hbinterval != 0) {
 								net->heart_beat_delay = paddrp->spp_hbinterval;
 							} else if (paddrp->spp_flags & SPP_HB_TIME_IS_ZERO) {
 								net->heart_beat_delay = 0;
 							}
 							if (net->dest_state & SCTP_ADDR_NOHB) {
 								net->dest_state &= ~SCTP_ADDR_NOHB;
 							}
 							sctp_timer_stop(SCTP_TIMER_TYPE_HEARTBEAT, inp, stcb, net,
 							    SCTP_FROM_SCTP_USRREQ + SCTP_LOC_14);
 							sctp_timer_start(SCTP_TIMER_TYPE_HEARTBEAT, inp, stcb, net);
 						}
 						sctp_stcb_feature_off(inp, stcb, SCTP_PCB_FLAGS_DONOT_HEARTBEAT);
 					}
 					if (paddrp->spp_flags & SPP_HB_DISABLE) {
 						TAILQ_FOREACH(net, &stcb->asoc.nets, sctp_next) {
 							if (!(net->dest_state & SCTP_ADDR_NOHB)) {
 								net->dest_state |= SCTP_ADDR_NOHB;
 								if (!(net->dest_state & SCTP_ADDR_UNCONFIRMED)) {
 									sctp_timer_stop(SCTP_TIMER_TYPE_HEARTBEAT,
 									    inp, stcb, net,
 									    SCTP_FROM_SCTP_USRREQ + SCTP_LOC_15);
 								}
 							}
 						}
 						sctp_stcb_feature_on(inp, stcb, SCTP_PCB_FLAGS_DONOT_HEARTBEAT);
 					}
 					if ((paddrp->spp_flags & SPP_PMTUD_DISABLE) && (paddrp->spp_pathmtu >= SCTP_SMALLEST_PMTU)) {
 						TAILQ_FOREACH(net, &stcb->asoc.nets, sctp_next) {
 							if (SCTP_OS_TIMER_PENDING(&net->pmtu_timer.timer)) {
 								sctp_timer_stop(SCTP_TIMER_TYPE_PATHMTURAISE, inp, stcb, net,
 								    SCTP_FROM_SCTP_USRREQ + SCTP_LOC_16);
 							}
 							net->dest_state |= SCTP_ADDR_NO_PMTUD;
 							net->mtu = paddrp->spp_pathmtu;
 							switch (net->ro._l_addr.sa.sa_family) {
 #ifdef INET
 							case AF_INET:
 								net->mtu += SCTP_MIN_V4_OVERHEAD;
 								break;
 #endif
 #ifdef INET6
 							case AF_INET6:
 								net->mtu += SCTP_MIN_OVERHEAD;
 								break;
 #endif
 							default:
 								break;
 							}
 							if (net->mtu < stcb->asoc.smallest_mtu) {
 								sctp_pathmtu_adjustment(stcb, net->mtu);
 							}
 						}
 						sctp_stcb_feature_on(inp, stcb, SCTP_PCB_FLAGS_DO_NOT_PMTUD);
 					}
 					if (paddrp->spp_flags & SPP_PMTUD_ENABLE) {
 						TAILQ_FOREACH(net, &stcb->asoc.nets, sctp_next) {
 							if (!SCTP_OS_TIMER_PENDING(&net->pmtu_timer.timer)) {
 								sctp_timer_start(SCTP_TIMER_TYPE_PATHMTURAISE, inp, stcb, net);
 							}
 							net->dest_state &= ~SCTP_ADDR_NO_PMTUD;
 						}
 						sctp_stcb_feature_off(inp, stcb, SCTP_PCB_FLAGS_DO_NOT_PMTUD);
 					}
 					if (paddrp->spp_flags & SPP_DSCP) {
 						TAILQ_FOREACH(net, &stcb->asoc.nets, sctp_next) {
 							net->dscp = paddrp->spp_dscp & 0xfc;
 							net->dscp |= 0x01;
 						}
 						stcb->asoc.default_dscp = paddrp->spp_dscp & 0xfc;
 						stcb->asoc.default_dscp |= 0x01;
 					}
 #ifdef INET6
 					if (paddrp->spp_flags & SPP_IPV6_FLOWLABEL) {
 						TAILQ_FOREACH(net, &stcb->asoc.nets, sctp_next) {
 							if (net->ro._l_addr.sa.sa_family == AF_INET6) {
 								net->flowlabel = paddrp->spp_ipv6_flowlabel & 0x000fffff;
 								net->flowlabel |= 0x80000000;
 							}
 						}
 						stcb->asoc.default_flowlabel = paddrp->spp_ipv6_flowlabel & 0x000fffff;
 						stcb->asoc.default_flowlabel |= 0x80000000;
 					}
 #endif
 				}
 				SCTP_TCB_UNLOCK(stcb);
 			} else {
 				/************************NO TCB, SET TO default stuff ******************/
 				if ((inp->sctp_flags & SCTP_PCB_FLAGS_TCPTYPE) ||
 				    (inp->sctp_flags & SCTP_PCB_FLAGS_IN_TCPPOOL) ||
 				    (paddrp->spp_assoc_id == SCTP_FUTURE_ASSOC)) {
 					SCTP_INP_WLOCK(inp);
 					/*
 					 * For the TOS/FLOWLABEL stuff you
 					 * set it with the options on the
 					 * socket
 					 */
 					if (paddrp->spp_pathmaxrxt != 0) {
 						inp->sctp_ep.def_net_failure = paddrp->spp_pathmaxrxt;
 					}
 					if (paddrp->spp_flags & SPP_HB_TIME_IS_ZERO)
 						inp->sctp_ep.sctp_timeoutticks[SCTP_TIMER_HEARTBEAT] = 0;
 					else if (paddrp->spp_hbinterval != 0) {
 						if (paddrp->spp_hbinterval > SCTP_MAX_HB_INTERVAL)
 							paddrp->spp_hbinterval = SCTP_MAX_HB_INTERVAL;
 						inp->sctp_ep.sctp_timeoutticks[SCTP_TIMER_HEARTBEAT] = MSEC_TO_TICKS(paddrp->spp_hbinterval);
 					}
 					if (paddrp->spp_flags & SPP_HB_ENABLE) {
 						if (paddrp->spp_flags & SPP_HB_TIME_IS_ZERO) {
 							inp->sctp_ep.sctp_timeoutticks[SCTP_TIMER_HEARTBEAT] = 0;
 						} else if (paddrp->spp_hbinterval) {
 							inp->sctp_ep.sctp_timeoutticks[SCTP_TIMER_HEARTBEAT] = MSEC_TO_TICKS(paddrp->spp_hbinterval);
 						}
 						sctp_feature_off(inp, SCTP_PCB_FLAGS_DONOT_HEARTBEAT);
 					} else if (paddrp->spp_flags & SPP_HB_DISABLE) {
 						sctp_feature_on(inp, SCTP_PCB_FLAGS_DONOT_HEARTBEAT);
 					}
 					if (paddrp->spp_flags & SPP_PMTUD_ENABLE) {
 						sctp_feature_off(inp, SCTP_PCB_FLAGS_DO_NOT_PMTUD);
 					} else if (paddrp->spp_flags & SPP_PMTUD_DISABLE) {
 						sctp_feature_on(inp, SCTP_PCB_FLAGS_DO_NOT_PMTUD);
 					}
 					if (paddrp->spp_flags & SPP_DSCP) {
 						inp->sctp_ep.default_dscp = paddrp->spp_dscp & 0xfc;
 						inp->sctp_ep.default_dscp |= 0x01;
 					}
 #ifdef INET6
 					if (paddrp->spp_flags & SPP_IPV6_FLOWLABEL) {
 						if (inp->sctp_flags & SCTP_PCB_FLAGS_BOUND_V6) {
 							inp->sctp_ep.default_flowlabel = paddrp->spp_ipv6_flowlabel & 0x000fffff;
 							inp->sctp_ep.default_flowlabel |= 0x80000000;
 						}
 					}
 #endif
 					SCTP_INP_WUNLOCK(inp);
 				} else {
 					SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EINVAL);
 					error = EINVAL;
 				}
 			}
 			break;
 		}
 	case SCTP_RTOINFO:
 		{
 			struct sctp_rtoinfo *srto;
 			uint32_t new_init, new_min, new_max;
 
 			SCTP_CHECK_AND_CAST(srto, optval, struct sctp_rtoinfo, optsize);
 			SCTP_FIND_STCB(inp, stcb, srto->srto_assoc_id);
 
 			if (stcb) {
 				if (srto->srto_initial)
 					new_init = srto->srto_initial;
 				else
 					new_init = stcb->asoc.initial_rto;
 				if (srto->srto_max)
 					new_max = srto->srto_max;
 				else
 					new_max = stcb->asoc.maxrto;
 				if (srto->srto_min)
 					new_min = srto->srto_min;
 				else
 					new_min = stcb->asoc.minrto;
 				if ((new_min <= new_init) && (new_init <= new_max)) {
 					stcb->asoc.initial_rto = new_init;
 					stcb->asoc.maxrto = new_max;
 					stcb->asoc.minrto = new_min;
 				} else {
 					SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EINVAL);
 					error = EINVAL;
 				}
 				SCTP_TCB_UNLOCK(stcb);
 			} else {
 				if ((inp->sctp_flags & SCTP_PCB_FLAGS_TCPTYPE) ||
 				    (inp->sctp_flags & SCTP_PCB_FLAGS_IN_TCPPOOL) ||
 				    (srto->srto_assoc_id == SCTP_FUTURE_ASSOC)) {
 					SCTP_INP_WLOCK(inp);
 					if (srto->srto_initial)
 						new_init = srto->srto_initial;
 					else
 						new_init = inp->sctp_ep.initial_rto;
 					if (srto->srto_max)
 						new_max = srto->srto_max;
 					else
 						new_max = inp->sctp_ep.sctp_maxrto;
 					if (srto->srto_min)
 						new_min = srto->srto_min;
 					else
 						new_min = inp->sctp_ep.sctp_minrto;
 					if ((new_min <= new_init) && (new_init <= new_max)) {
 						inp->sctp_ep.initial_rto = new_init;
 						inp->sctp_ep.sctp_maxrto = new_max;
 						inp->sctp_ep.sctp_minrto = new_min;
 					} else {
 						SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EINVAL);
 						error = EINVAL;
 					}
 					SCTP_INP_WUNLOCK(inp);
 				} else {
 					SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EINVAL);
 					error = EINVAL;
 				}
 			}
 			break;
 		}
 	case SCTP_ASSOCINFO:
 		{
 			struct sctp_assocparams *sasoc;
 
 			SCTP_CHECK_AND_CAST(sasoc, optval, struct sctp_assocparams, optsize);
 			SCTP_FIND_STCB(inp, stcb, sasoc->sasoc_assoc_id);
 			if (sasoc->sasoc_cookie_life) {
 				/* boundary check the cookie life */
 				if (sasoc->sasoc_cookie_life < 1000)
 					sasoc->sasoc_cookie_life = 1000;
 				if (sasoc->sasoc_cookie_life > SCTP_MAX_COOKIE_LIFE) {
 					sasoc->sasoc_cookie_life = SCTP_MAX_COOKIE_LIFE;
 				}
 			}
 			if (stcb) {
 				if (sasoc->sasoc_asocmaxrxt)
 					stcb->asoc.max_send_times = sasoc->sasoc_asocmaxrxt;
 				if (sasoc->sasoc_cookie_life) {
 					stcb->asoc.cookie_life = MSEC_TO_TICKS(sasoc->sasoc_cookie_life);
 				}
 				SCTP_TCB_UNLOCK(stcb);
 			} else {
 				if ((inp->sctp_flags & SCTP_PCB_FLAGS_TCPTYPE) ||
 				    (inp->sctp_flags & SCTP_PCB_FLAGS_IN_TCPPOOL) ||
 				    (sasoc->sasoc_assoc_id == SCTP_FUTURE_ASSOC)) {
 					SCTP_INP_WLOCK(inp);
 					if (sasoc->sasoc_asocmaxrxt)
 						inp->sctp_ep.max_send_times = sasoc->sasoc_asocmaxrxt;
 					if (sasoc->sasoc_cookie_life) {
 						inp->sctp_ep.def_cookie_life = MSEC_TO_TICKS(sasoc->sasoc_cookie_life);
 					}
 					SCTP_INP_WUNLOCK(inp);
 				} else {
 					SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EINVAL);
 					error = EINVAL;
 				}
 			}
 			break;
 		}
 	case SCTP_INITMSG:
 		{
 			struct sctp_initmsg *sinit;
 
 			SCTP_CHECK_AND_CAST(sinit, optval, struct sctp_initmsg, optsize);
 			SCTP_INP_WLOCK(inp);
 			if (sinit->sinit_num_ostreams)
 				inp->sctp_ep.pre_open_stream_count = sinit->sinit_num_ostreams;
 
 			if (sinit->sinit_max_instreams)
 				inp->sctp_ep.max_open_streams_intome = sinit->sinit_max_instreams;
 
 			if (sinit->sinit_max_attempts)
 				inp->sctp_ep.max_init_times = sinit->sinit_max_attempts;
 
 			if (sinit->sinit_max_init_timeo)
 				inp->sctp_ep.initial_init_rto_max = sinit->sinit_max_init_timeo;
 			SCTP_INP_WUNLOCK(inp);
 			break;
 		}
 	case SCTP_PRIMARY_ADDR:
 		{
 			struct sctp_setprim *spa;
 			struct sctp_nets *net;
 			struct sockaddr *addr;
 #if defined(INET) && defined(INET6)
 			struct sockaddr_in sin_store;
 #endif
 
 			SCTP_CHECK_AND_CAST(spa, optval, struct sctp_setprim, optsize);
 			SCTP_FIND_STCB(inp, stcb, spa->ssp_assoc_id);
 
 #if defined(INET) && defined(INET6)
 			if (spa->ssp_addr.ss_family == AF_INET6) {
 				struct sockaddr_in6 *sin6;
 
 				sin6 = (struct sockaddr_in6 *)&spa->ssp_addr;
 				if (IN6_IS_ADDR_V4MAPPED(&sin6->sin6_addr)) {
 					in6_sin6_2_sin(&sin_store, sin6);
 					addr = (struct sockaddr *)&sin_store;
 				} else {
 					addr = (struct sockaddr *)&spa->ssp_addr;
 				}
 			} else {
 				addr = (struct sockaddr *)&spa->ssp_addr;
 			}
 #else
 			addr = (struct sockaddr *)&spa->ssp_addr;
 #endif
 			if (stcb != NULL) {
 				net = sctp_findnet(stcb, addr);
 			} else {
 				/*
 				 * We increment here since
 				 * sctp_findassociation_ep_addr() wil do a
 				 * decrement if it finds the stcb as long as
 				 * the locked tcb (last argument) is NOT a
 				 * TCB.. aka NULL.
 				 */
 				net = NULL;
 				SCTP_INP_INCR_REF(inp);
 				stcb = sctp_findassociation_ep_addr(&inp, addr,
 				    &net, NULL, NULL);
 				if (stcb == NULL) {
 					SCTP_INP_DECR_REF(inp);
 				}
 			}
 
 			if ((stcb != NULL) && (net != NULL)) {
 				if (net != stcb->asoc.primary_destination) {
 					if (!(net->dest_state & SCTP_ADDR_UNCONFIRMED)) {
 						/* Ok we need to set it */
 						if (sctp_set_primary_addr(stcb, (struct sockaddr *)NULL, net) == 0) {
 							if ((stcb->asoc.alternate) &&
 							    (!(net->dest_state & SCTP_ADDR_PF)) &&
 							    (net->dest_state & SCTP_ADDR_REACHABLE)) {
 								sctp_free_remote_addr(stcb->asoc.alternate);
 								stcb->asoc.alternate = NULL;
 							}
 						} else {
 							SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EINVAL);
 							error = EINVAL;
 						}
 					} else {
 						SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EINVAL);
 						error = EINVAL;
 					}
 				}
 			} else {
 				SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EINVAL);
 				error = EINVAL;
 			}
 			if (stcb != NULL) {
 				SCTP_TCB_UNLOCK(stcb);
 			}
 			break;
 		}
 	case SCTP_SET_DYNAMIC_PRIMARY:
 		{
 			union sctp_sockstore *ss;
 
 			error = priv_check(curthread,
 			    PRIV_NETINET_RESERVEDPORT);
 			if (error)
 				break;
 
 			SCTP_CHECK_AND_CAST(ss, optval, union sctp_sockstore, optsize);
 			/* SUPER USER CHECK? */
 			error = sctp_dynamic_set_primary(&ss->sa, vrf_id);
 			break;
 		}
 	case SCTP_SET_PEER_PRIMARY_ADDR:
 		{
 			struct sctp_setpeerprim *sspp;
 			struct sockaddr *addr;
 #if defined(INET) && defined(INET6)
 			struct sockaddr_in sin_store;
 #endif
 
 			SCTP_CHECK_AND_CAST(sspp, optval, struct sctp_setpeerprim, optsize);
 			SCTP_FIND_STCB(inp, stcb, sspp->sspp_assoc_id);
 			if (stcb != NULL) {
 				struct sctp_ifa *ifa;
 
 #if defined(INET) && defined(INET6)
 				if (sspp->sspp_addr.ss_family == AF_INET6) {
 					struct sockaddr_in6 *sin6;
 
 					sin6 = (struct sockaddr_in6 *)&sspp->sspp_addr;
 					if (IN6_IS_ADDR_V4MAPPED(&sin6->sin6_addr)) {
 						in6_sin6_2_sin(&sin_store, sin6);
 						addr = (struct sockaddr *)&sin_store;
 					} else {
 						addr = (struct sockaddr *)&sspp->sspp_addr;
 					}
 				} else {
 					addr = (struct sockaddr *)&sspp->sspp_addr;
 				}
 #else
 				addr = (struct sockaddr *)&sspp->sspp_addr;
 #endif
 				ifa = sctp_find_ifa_by_addr(addr, stcb->asoc.vrf_id, SCTP_ADDR_NOT_LOCKED);
 				if (ifa == NULL) {
 					SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EINVAL);
 					error = EINVAL;
 					goto out_of_it;
 				}
 				if ((inp->sctp_flags & SCTP_PCB_FLAGS_BOUNDALL) == 0) {
 					/*
 					 * Must validate the ifa found is in
 					 * our ep
 					 */
 					struct sctp_laddr *laddr;
 					int found = 0;
 
 					LIST_FOREACH(laddr, &inp->sctp_addr_list, sctp_nxt_addr) {
 						if (laddr->ifa == NULL) {
 							SCTPDBG(SCTP_DEBUG_OUTPUT1, "%s: NULL ifa\n",
 							    __func__);
 							continue;
 						}
 						if ((sctp_is_addr_restricted(stcb, laddr->ifa)) &&
 						    (!sctp_is_addr_pending(stcb, laddr->ifa))) {
 							continue;
 						}
 						if (laddr->ifa == ifa) {
 							found = 1;
 							break;
 						}
 					}
 					if (!found) {
 						SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EINVAL);
 						error = EINVAL;
 						goto out_of_it;
 					}
 				} else {
 					switch (addr->sa_family) {
 #ifdef INET
 					case AF_INET:
 						{
 							struct sockaddr_in *sin;
 
 							sin = (struct sockaddr_in *)addr;
 							if (prison_check_ip4(inp->ip_inp.inp.inp_cred,
 							    &sin->sin_addr) != 0) {
 								SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EINVAL);
 								error = EINVAL;
 								goto out_of_it;
 							}
 							break;
 						}
 #endif
 #ifdef INET6
 					case AF_INET6:
 						{
 							struct sockaddr_in6 *sin6;
 
 							sin6 = (struct sockaddr_in6 *)addr;
 							if (prison_check_ip6(inp->ip_inp.inp.inp_cred,
 							    &sin6->sin6_addr) != 0) {
 								SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EINVAL);
 								error = EINVAL;
 								goto out_of_it;
 							}
 							break;
 						}
 #endif
 					default:
 						SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EINVAL);
 						error = EINVAL;
 						goto out_of_it;
 					}
 				}
 				if (sctp_set_primary_ip_address_sa(stcb, addr) != 0) {
 					SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EINVAL);
 					error = EINVAL;
 				}
 				sctp_chunk_output(inp, stcb, SCTP_OUTPUT_FROM_SOCKOPT, SCTP_SO_LOCKED);
 		out_of_it:
 				SCTP_TCB_UNLOCK(stcb);
 			} else {
 				SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EINVAL);
 				error = EINVAL;
 			}
 			break;
 		}
 	case SCTP_BINDX_ADD_ADDR:
 		{
 			struct sctp_getaddresses *addrs;
 			struct thread *td;
 
 			td = (struct thread *)p;
 			SCTP_CHECK_AND_CAST(addrs, optval, struct sctp_getaddresses,
 			    optsize);
 #ifdef INET
 			if (addrs->addr->sa_family == AF_INET) {
 				if (optsize < sizeof(struct sctp_getaddresses) - sizeof(struct sockaddr) + sizeof(struct sockaddr_in)) {
 					SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EINVAL);
 					error = EINVAL;
 					break;
 				}
 				if (td != NULL && (error = prison_local_ip4(td->td_ucred, &(((struct sockaddr_in *)(addrs->addr))->sin_addr)))) {
 					SCTP_LTRACE_ERR_RET(inp, stcb, NULL, SCTP_FROM_SCTP_USRREQ, error);
 					break;
 				}
 			} else
 #endif
 #ifdef INET6
 			if (addrs->addr->sa_family == AF_INET6) {
 				if (optsize < sizeof(struct sctp_getaddresses) - sizeof(struct sockaddr) + sizeof(struct sockaddr_in6)) {
 					SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EINVAL);
 					error = EINVAL;
 					break;
 				}
 				if (td != NULL && (error = prison_local_ip6(td->td_ucred, &(((struct sockaddr_in6 *)(addrs->addr))->sin6_addr),
 				    (SCTP_IPV6_V6ONLY(inp) != 0))) != 0) {
 					SCTP_LTRACE_ERR_RET(inp, stcb, NULL, SCTP_FROM_SCTP_USRREQ, error);
 					break;
 				}
 			} else
 #endif
 			{
 				error = EAFNOSUPPORT;
 				break;
 			}
 			sctp_bindx_add_address(so, inp, addrs->addr,
 			    addrs->sget_assoc_id, vrf_id,
 			    &error, p);
 			break;
 		}
 	case SCTP_BINDX_REM_ADDR:
 		{
 			struct sctp_getaddresses *addrs;
 			struct thread *td;
 
 			td = (struct thread *)p;
 
 			SCTP_CHECK_AND_CAST(addrs, optval, struct sctp_getaddresses, optsize);
 #ifdef INET
 			if (addrs->addr->sa_family == AF_INET) {
 				if (optsize < sizeof(struct sctp_getaddresses) - sizeof(struct sockaddr) + sizeof(struct sockaddr_in)) {
 					SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EINVAL);
 					error = EINVAL;
 					break;
 				}
 				if (td != NULL && (error = prison_local_ip4(td->td_ucred, &(((struct sockaddr_in *)(addrs->addr))->sin_addr)))) {
 					SCTP_LTRACE_ERR_RET(inp, stcb, NULL, SCTP_FROM_SCTP_USRREQ, error);
 					break;
 				}
 			} else
 #endif
 #ifdef INET6
 			if (addrs->addr->sa_family == AF_INET6) {
 				if (optsize < sizeof(struct sctp_getaddresses) - sizeof(struct sockaddr) + sizeof(struct sockaddr_in6)) {
 					SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EINVAL);
 					error = EINVAL;
 					break;
 				}
 				if (td != NULL &&
 				    (error = prison_local_ip6(td->td_ucred,
 				    &(((struct sockaddr_in6 *)(addrs->addr))->sin6_addr),
 				    (SCTP_IPV6_V6ONLY(inp) != 0))) != 0) {
 					SCTP_LTRACE_ERR_RET(inp, stcb, NULL, SCTP_FROM_SCTP_USRREQ, error);
 					break;
 				}
 			} else
 #endif
 			{
 				error = EAFNOSUPPORT;
 				break;
 			}
 			sctp_bindx_delete_address(inp, addrs->addr,
 			    addrs->sget_assoc_id, vrf_id,
 			    &error);
 			break;
 		}
 	case SCTP_EVENT:
 		{
 			struct sctp_event *event;
 			uint32_t event_type;
 
 			SCTP_CHECK_AND_CAST(event, optval, struct sctp_event, optsize);
 			SCTP_FIND_STCB(inp, stcb, event->se_assoc_id);
 			switch (event->se_type) {
 			case SCTP_ASSOC_CHANGE:
 				event_type = SCTP_PCB_FLAGS_RECVASSOCEVNT;
 				break;
 			case SCTP_PEER_ADDR_CHANGE:
 				event_type = SCTP_PCB_FLAGS_RECVPADDREVNT;
 				break;
 			case SCTP_REMOTE_ERROR:
 				event_type = SCTP_PCB_FLAGS_RECVPEERERR;
 				break;
 			case SCTP_SEND_FAILED:
 				event_type = SCTP_PCB_FLAGS_RECVSENDFAILEVNT;
 				break;
 			case SCTP_SHUTDOWN_EVENT:
 				event_type = SCTP_PCB_FLAGS_RECVSHUTDOWNEVNT;
 				break;
 			case SCTP_ADAPTATION_INDICATION:
 				event_type = SCTP_PCB_FLAGS_ADAPTATIONEVNT;
 				break;
 			case SCTP_PARTIAL_DELIVERY_EVENT:
 				event_type = SCTP_PCB_FLAGS_PDAPIEVNT;
 				break;
 			case SCTP_AUTHENTICATION_EVENT:
 				event_type = SCTP_PCB_FLAGS_AUTHEVNT;
 				break;
 			case SCTP_STREAM_RESET_EVENT:
 				event_type = SCTP_PCB_FLAGS_STREAM_RESETEVNT;
 				break;
 			case SCTP_SENDER_DRY_EVENT:
 				event_type = SCTP_PCB_FLAGS_DRYEVNT;
 				break;
 			case SCTP_NOTIFICATIONS_STOPPED_EVENT:
 				event_type = 0;
 				SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, ENOTSUP);
 				error = ENOTSUP;
 				break;
 			case SCTP_ASSOC_RESET_EVENT:
 				event_type = SCTP_PCB_FLAGS_ASSOC_RESETEVNT;
 				break;
 			case SCTP_STREAM_CHANGE_EVENT:
 				event_type = SCTP_PCB_FLAGS_STREAM_CHANGEEVNT;
 				break;
 			case SCTP_SEND_FAILED_EVENT:
 				event_type = SCTP_PCB_FLAGS_RECVNSENDFAILEVNT;
 				break;
 			default:
 				event_type = 0;
 				SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EINVAL);
 				error = EINVAL;
 				break;
 			}
 			if (event_type > 0) {
 				if (stcb) {
 					if (event->se_on) {
 						sctp_stcb_feature_on(inp, stcb, event_type);
 						if (event_type == SCTP_PCB_FLAGS_DRYEVNT) {
 							if (TAILQ_EMPTY(&stcb->asoc.send_queue) &&
 							    TAILQ_EMPTY(&stcb->asoc.sent_queue) &&
 							    (stcb->asoc.stream_queue_cnt == 0)) {
 								sctp_ulp_notify(SCTP_NOTIFY_SENDER_DRY, stcb, 0, NULL, SCTP_SO_LOCKED);
 							}
 						}
 					} else {
 						sctp_stcb_feature_off(inp, stcb, event_type);
 					}
 					SCTP_TCB_UNLOCK(stcb);
 				} else {
 					/*
 					 * We don't want to send up a storm
 					 * of events, so return an error for
 					 * sender dry events
 					 */
 					if ((event_type == SCTP_PCB_FLAGS_DRYEVNT) &&
 					    ((inp->sctp_flags & SCTP_PCB_FLAGS_TCPTYPE) == 0) &&
 					    ((inp->sctp_flags & SCTP_PCB_FLAGS_IN_TCPPOOL) == 0) &&
 					    ((event->se_assoc_id == SCTP_ALL_ASSOC) ||
 					    (event->se_assoc_id == SCTP_CURRENT_ASSOC))) {
 						SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, ENOTSUP);
 						error = ENOTSUP;
 						break;
 					}
 					if ((inp->sctp_flags & SCTP_PCB_FLAGS_TCPTYPE) ||
 					    (inp->sctp_flags & SCTP_PCB_FLAGS_IN_TCPPOOL) ||
 					    (event->se_assoc_id == SCTP_FUTURE_ASSOC) ||
 					    (event->se_assoc_id == SCTP_ALL_ASSOC)) {
 						SCTP_INP_WLOCK(inp);
 						if (event->se_on) {
 							sctp_feature_on(inp, event_type);
 						} else {
 							sctp_feature_off(inp, event_type);
 						}
 						SCTP_INP_WUNLOCK(inp);
 					}
 					if ((event->se_assoc_id == SCTP_CURRENT_ASSOC) ||
 					    (event->se_assoc_id == SCTP_ALL_ASSOC)) {
 						SCTP_INP_RLOCK(inp);
 						LIST_FOREACH(stcb, &inp->sctp_asoc_list, sctp_tcblist) {
 							SCTP_TCB_LOCK(stcb);
 							if (event->se_on) {
 								sctp_stcb_feature_on(inp, stcb, event_type);
 							} else {
 								sctp_stcb_feature_off(inp, stcb, event_type);
 							}
 							SCTP_TCB_UNLOCK(stcb);
 						}
 						SCTP_INP_RUNLOCK(inp);
 					}
 				}
 			}
 			break;
 		}
 	case SCTP_RECVRCVINFO:
 		{
 			int *onoff;
 
 			SCTP_CHECK_AND_CAST(onoff, optval, int, optsize);
 			SCTP_INP_WLOCK(inp);
 			if (*onoff != 0) {
 				sctp_feature_on(inp, SCTP_PCB_FLAGS_RECVRCVINFO);
 			} else {
 				sctp_feature_off(inp, SCTP_PCB_FLAGS_RECVRCVINFO);
 			}
 			SCTP_INP_WUNLOCK(inp);
 			break;
 		}
 	case SCTP_RECVNXTINFO:
 		{
 			int *onoff;
 
 			SCTP_CHECK_AND_CAST(onoff, optval, int, optsize);
 			SCTP_INP_WLOCK(inp);
 			if (*onoff != 0) {
 				sctp_feature_on(inp, SCTP_PCB_FLAGS_RECVNXTINFO);
 			} else {
 				sctp_feature_off(inp, SCTP_PCB_FLAGS_RECVNXTINFO);
 			}
 			SCTP_INP_WUNLOCK(inp);
 			break;
 		}
 	case SCTP_DEFAULT_SNDINFO:
 		{
 			struct sctp_sndinfo *info;
 			uint16_t policy;
 
 			SCTP_CHECK_AND_CAST(info, optval, struct sctp_sndinfo, optsize);
 			SCTP_FIND_STCB(inp, stcb, info->snd_assoc_id);
 
 			if (stcb) {
 				if (info->snd_sid < stcb->asoc.streamoutcnt) {
 					stcb->asoc.def_send.sinfo_stream = info->snd_sid;
 					policy = PR_SCTP_POLICY(stcb->asoc.def_send.sinfo_flags);
 					stcb->asoc.def_send.sinfo_flags = info->snd_flags;
 					stcb->asoc.def_send.sinfo_flags |= policy;
 					stcb->asoc.def_send.sinfo_ppid = info->snd_ppid;
 					stcb->asoc.def_send.sinfo_context = info->snd_context;
 				} else {
 					SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EINVAL);
 					error = EINVAL;
 				}
 				SCTP_TCB_UNLOCK(stcb);
 			} else {
 				if ((inp->sctp_flags & SCTP_PCB_FLAGS_TCPTYPE) ||
 				    (inp->sctp_flags & SCTP_PCB_FLAGS_IN_TCPPOOL) ||
 				    (info->snd_assoc_id == SCTP_FUTURE_ASSOC) ||
 				    (info->snd_assoc_id == SCTP_ALL_ASSOC)) {
 					SCTP_INP_WLOCK(inp);
 					inp->def_send.sinfo_stream = info->snd_sid;
 					policy = PR_SCTP_POLICY(inp->def_send.sinfo_flags);
 					inp->def_send.sinfo_flags = info->snd_flags;
 					inp->def_send.sinfo_flags |= policy;
 					inp->def_send.sinfo_ppid = info->snd_ppid;
 					inp->def_send.sinfo_context = info->snd_context;
 					SCTP_INP_WUNLOCK(inp);
 				}
 				if ((info->snd_assoc_id == SCTP_CURRENT_ASSOC) ||
 				    (info->snd_assoc_id == SCTP_ALL_ASSOC)) {
 					SCTP_INP_RLOCK(inp);
 					LIST_FOREACH(stcb, &inp->sctp_asoc_list, sctp_tcblist) {
 						SCTP_TCB_LOCK(stcb);
 						if (info->snd_sid < stcb->asoc.streamoutcnt) {
 							stcb->asoc.def_send.sinfo_stream = info->snd_sid;
 							policy = PR_SCTP_POLICY(stcb->asoc.def_send.sinfo_flags);
 							stcb->asoc.def_send.sinfo_flags = info->snd_flags;
 							stcb->asoc.def_send.sinfo_flags |= policy;
 							stcb->asoc.def_send.sinfo_ppid = info->snd_ppid;
 							stcb->asoc.def_send.sinfo_context = info->snd_context;
 						}
 						SCTP_TCB_UNLOCK(stcb);
 					}
 					SCTP_INP_RUNLOCK(inp);
 				}
 			}
 			break;
 		}
 	case SCTP_DEFAULT_PRINFO:
 		{
 			struct sctp_default_prinfo *info;
 
 			SCTP_CHECK_AND_CAST(info, optval, struct sctp_default_prinfo, optsize);
 			SCTP_FIND_STCB(inp, stcb, info->pr_assoc_id);
 
 			if (info->pr_policy > SCTP_PR_SCTP_MAX) {
 				SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EINVAL);
 				error = EINVAL;
 				break;
 			}
 			if (stcb) {
 				stcb->asoc.def_send.sinfo_flags &= 0xfff0;
 				stcb->asoc.def_send.sinfo_flags |= info->pr_policy;
 				stcb->asoc.def_send.sinfo_timetolive = info->pr_value;
 				SCTP_TCB_UNLOCK(stcb);
 			} else {
 				if ((inp->sctp_flags & SCTP_PCB_FLAGS_TCPTYPE) ||
 				    (inp->sctp_flags & SCTP_PCB_FLAGS_IN_TCPPOOL) ||
 				    (info->pr_assoc_id == SCTP_FUTURE_ASSOC) ||
 				    (info->pr_assoc_id == SCTP_ALL_ASSOC)) {
 					SCTP_INP_WLOCK(inp);
 					inp->def_send.sinfo_flags &= 0xfff0;
 					inp->def_send.sinfo_flags |= info->pr_policy;
 					inp->def_send.sinfo_timetolive = info->pr_value;
 					SCTP_INP_WUNLOCK(inp);
 				}
 				if ((info->pr_assoc_id == SCTP_CURRENT_ASSOC) ||
 				    (info->pr_assoc_id == SCTP_ALL_ASSOC)) {
 					SCTP_INP_RLOCK(inp);
 					LIST_FOREACH(stcb, &inp->sctp_asoc_list, sctp_tcblist) {
 						SCTP_TCB_LOCK(stcb);
 						stcb->asoc.def_send.sinfo_flags &= 0xfff0;
 						stcb->asoc.def_send.sinfo_flags |= info->pr_policy;
 						stcb->asoc.def_send.sinfo_timetolive = info->pr_value;
 						SCTP_TCB_UNLOCK(stcb);
 					}
 					SCTP_INP_RUNLOCK(inp);
 				}
 			}
 			break;
 		}
 	case SCTP_PEER_ADDR_THLDS:
 		/* Applies to the specific association */
 		{
 			struct sctp_paddrthlds *thlds;
 			struct sctp_nets *net;
 			struct sockaddr *addr;
 #if defined(INET) && defined(INET6)
 			struct sockaddr_in sin_store;
 #endif
 
 			SCTP_CHECK_AND_CAST(thlds, optval, struct sctp_paddrthlds, optsize);
 			SCTP_FIND_STCB(inp, stcb, thlds->spt_assoc_id);
 
 #if defined(INET) && defined(INET6)
 			if (thlds->spt_address.ss_family == AF_INET6) {
 				struct sockaddr_in6 *sin6;
 
 				sin6 = (struct sockaddr_in6 *)&thlds->spt_address;
 				if (IN6_IS_ADDR_V4MAPPED(&sin6->sin6_addr)) {
 					in6_sin6_2_sin(&sin_store, sin6);
 					addr = (struct sockaddr *)&sin_store;
 				} else {
 					addr = (struct sockaddr *)&thlds->spt_address;
 				}
 			} else {
 				addr = (struct sockaddr *)&thlds->spt_address;
 			}
 #else
 			addr = (struct sockaddr *)&thlds->spt_address;
 #endif
 			if (stcb != NULL) {
 				net = sctp_findnet(stcb, addr);
 			} else {
 				/*
 				 * We increment here since
 				 * sctp_findassociation_ep_addr() wil do a
 				 * decrement if it finds the stcb as long as
 				 * the locked tcb (last argument) is NOT a
 				 * TCB.. aka NULL.
 				 */
 				net = NULL;
 				SCTP_INP_INCR_REF(inp);
 				stcb = sctp_findassociation_ep_addr(&inp, addr,
 				    &net, NULL, NULL);
 				if (stcb == NULL) {
 					SCTP_INP_DECR_REF(inp);
 				}
 			}
 			if ((stcb != NULL) && (net == NULL)) {
 #ifdef INET
 				if (addr->sa_family == AF_INET) {
 
 					struct sockaddr_in *sin;
 
 					sin = (struct sockaddr_in *)addr;
 					if (sin->sin_addr.s_addr != INADDR_ANY) {
 						SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EINVAL);
 						SCTP_TCB_UNLOCK(stcb);
 						error = EINVAL;
 						break;
 					}
 				} else
 #endif
 #ifdef INET6
 				if (addr->sa_family == AF_INET6) {
 					struct sockaddr_in6 *sin6;
 
 					sin6 = (struct sockaddr_in6 *)addr;
 					if (!IN6_IS_ADDR_UNSPECIFIED(&sin6->sin6_addr)) {
 						SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EINVAL);
 						SCTP_TCB_UNLOCK(stcb);
 						error = EINVAL;
 						break;
 					}
 				} else
 #endif
 				{
 					error = EAFNOSUPPORT;
 					SCTP_TCB_UNLOCK(stcb);
 					SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, error);
 					break;
 				}
 			}
 			if (thlds->spt_pathcpthld != 0xffff) {
 				error = EINVAL;
 				SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, error);
 				break;
 			}
 			if (stcb != NULL) {
 				if (net != NULL) {
 					net->failure_threshold = thlds->spt_pathmaxrxt;
 					net->pf_threshold = thlds->spt_pathpfthld;
 					if (net->dest_state & SCTP_ADDR_PF) {
 						if ((net->error_count > net->failure_threshold) ||
 						    (net->error_count <= net->pf_threshold)) {
 							net->dest_state &= ~SCTP_ADDR_PF;
 						}
 					} else {
 						if ((net->error_count > net->pf_threshold) &&
 						    (net->error_count <= net->failure_threshold)) {
 							net->dest_state |= SCTP_ADDR_PF;
 							sctp_send_hb(stcb, net, SCTP_SO_LOCKED);
 							sctp_timer_stop(SCTP_TIMER_TYPE_HEARTBEAT,
 							    stcb->sctp_ep, stcb, net,
 							    SCTP_FROM_SCTP_USRREQ + SCTP_LOC_17);
 							sctp_timer_start(SCTP_TIMER_TYPE_HEARTBEAT, stcb->sctp_ep, stcb, net);
 						}
 					}
 					if (net->dest_state & SCTP_ADDR_REACHABLE) {
 						if (net->error_count > net->failure_threshold) {
 							net->dest_state &= ~SCTP_ADDR_REACHABLE;
 							sctp_ulp_notify(SCTP_NOTIFY_INTERFACE_DOWN, stcb, 0, net, SCTP_SO_LOCKED);
 						}
 					} else {
 						if (net->error_count <= net->failure_threshold) {
 							net->dest_state |= SCTP_ADDR_REACHABLE;
 							sctp_ulp_notify(SCTP_NOTIFY_INTERFACE_UP, stcb, 0, net, SCTP_SO_LOCKED);
 						}
 					}
 				} else {
 					TAILQ_FOREACH(net, &stcb->asoc.nets, sctp_next) {
 						net->failure_threshold = thlds->spt_pathmaxrxt;
 						net->pf_threshold = thlds->spt_pathpfthld;
 						if (net->dest_state & SCTP_ADDR_PF) {
 							if ((net->error_count > net->failure_threshold) ||
 							    (net->error_count <= net->pf_threshold)) {
 								net->dest_state &= ~SCTP_ADDR_PF;
 							}
 						} else {
 							if ((net->error_count > net->pf_threshold) &&
 							    (net->error_count <= net->failure_threshold)) {
 								net->dest_state |= SCTP_ADDR_PF;
 								sctp_send_hb(stcb, net, SCTP_SO_LOCKED);
 								sctp_timer_stop(SCTP_TIMER_TYPE_HEARTBEAT,
 								    stcb->sctp_ep, stcb, net,
 								    SCTP_FROM_SCTP_USRREQ + SCTP_LOC_18);
 								sctp_timer_start(SCTP_TIMER_TYPE_HEARTBEAT, stcb->sctp_ep, stcb, net);
 							}
 						}
 						if (net->dest_state & SCTP_ADDR_REACHABLE) {
 							if (net->error_count > net->failure_threshold) {
 								net->dest_state &= ~SCTP_ADDR_REACHABLE;
 								sctp_ulp_notify(SCTP_NOTIFY_INTERFACE_DOWN, stcb, 0, net, SCTP_SO_LOCKED);
 							}
 						} else {
 							if (net->error_count <= net->failure_threshold) {
 								net->dest_state |= SCTP_ADDR_REACHABLE;
 								sctp_ulp_notify(SCTP_NOTIFY_INTERFACE_UP, stcb, 0, net, SCTP_SO_LOCKED);
 							}
 						}
 					}
 					stcb->asoc.def_net_failure = thlds->spt_pathmaxrxt;
 					stcb->asoc.def_net_pf_threshold = thlds->spt_pathpfthld;
 				}
 				SCTP_TCB_UNLOCK(stcb);
 			} else {
 				if ((inp->sctp_flags & SCTP_PCB_FLAGS_TCPTYPE) ||
 				    (inp->sctp_flags & SCTP_PCB_FLAGS_IN_TCPPOOL) ||
 				    (thlds->spt_assoc_id == SCTP_FUTURE_ASSOC)) {
 					SCTP_INP_WLOCK(inp);
 					inp->sctp_ep.def_net_failure = thlds->spt_pathmaxrxt;
 					inp->sctp_ep.def_net_pf_threshold = thlds->spt_pathpfthld;
 					SCTP_INP_WUNLOCK(inp);
 				} else {
 					SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EINVAL);
 					error = EINVAL;
 				}
 			}
 			break;
 		}
 	case SCTP_REMOTE_UDP_ENCAPS_PORT:
 		{
 			struct sctp_udpencaps *encaps;
 			struct sctp_nets *net;
 			struct sockaddr *addr;
 #if defined(INET) && defined(INET6)
 			struct sockaddr_in sin_store;
 #endif
 
 			SCTP_CHECK_AND_CAST(encaps, optval, struct sctp_udpencaps, optsize);
 			SCTP_FIND_STCB(inp, stcb, encaps->sue_assoc_id);
 
 #if defined(INET) && defined(INET6)
 			if (encaps->sue_address.ss_family == AF_INET6) {
 				struct sockaddr_in6 *sin6;
 
 				sin6 = (struct sockaddr_in6 *)&encaps->sue_address;
 				if (IN6_IS_ADDR_V4MAPPED(&sin6->sin6_addr)) {
 					in6_sin6_2_sin(&sin_store, sin6);
 					addr = (struct sockaddr *)&sin_store;
 				} else {
 					addr = (struct sockaddr *)&encaps->sue_address;
 				}
 			} else {
 				addr = (struct sockaddr *)&encaps->sue_address;
 			}
 #else
 			addr = (struct sockaddr *)&encaps->sue_address;
 #endif
 			if (stcb != NULL) {
 				net = sctp_findnet(stcb, addr);
 			} else {
 				/*
 				 * We increment here since
 				 * sctp_findassociation_ep_addr() wil do a
 				 * decrement if it finds the stcb as long as
 				 * the locked tcb (last argument) is NOT a
 				 * TCB.. aka NULL.
 				 */
 				net = NULL;
 				SCTP_INP_INCR_REF(inp);
 				stcb = sctp_findassociation_ep_addr(&inp, addr, &net, NULL, NULL);
 				if (stcb == NULL) {
 					SCTP_INP_DECR_REF(inp);
 				}
 			}
 			if ((stcb != NULL) && (net == NULL)) {
 #ifdef INET
 				if (addr->sa_family == AF_INET) {
 
 					struct sockaddr_in *sin;
 
 					sin = (struct sockaddr_in *)addr;
 					if (sin->sin_addr.s_addr != INADDR_ANY) {
 						SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EINVAL);
 						SCTP_TCB_UNLOCK(stcb);
 						error = EINVAL;
 						break;
 					}
 				} else
 #endif
 #ifdef INET6
 				if (addr->sa_family == AF_INET6) {
 					struct sockaddr_in6 *sin6;
 
 					sin6 = (struct sockaddr_in6 *)addr;
 					if (!IN6_IS_ADDR_UNSPECIFIED(&sin6->sin6_addr)) {
 						SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EINVAL);
 						SCTP_TCB_UNLOCK(stcb);
 						error = EINVAL;
 						break;
 					}
 				} else
 #endif
 				{
 					error = EAFNOSUPPORT;
 					SCTP_TCB_UNLOCK(stcb);
 					SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, error);
 					break;
 				}
 			}
 			if (stcb != NULL) {
 				if (net != NULL) {
 					net->port = encaps->sue_port;
 				} else {
 					stcb->asoc.port = encaps->sue_port;
 				}
 				SCTP_TCB_UNLOCK(stcb);
 			} else {
 				if ((inp->sctp_flags & SCTP_PCB_FLAGS_TCPTYPE) ||
 				    (inp->sctp_flags & SCTP_PCB_FLAGS_IN_TCPPOOL) ||
 				    (encaps->sue_assoc_id == SCTP_FUTURE_ASSOC)) {
 					SCTP_INP_WLOCK(inp);
 					inp->sctp_ep.port = encaps->sue_port;
 					SCTP_INP_WUNLOCK(inp);
 				} else {
 					SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EINVAL);
 					error = EINVAL;
 				}
 			}
 			break;
 		}
 	case SCTP_ECN_SUPPORTED:
 		{
 			struct sctp_assoc_value *av;
 
 			SCTP_CHECK_AND_CAST(av, optval, struct sctp_assoc_value, optsize);
 			SCTP_FIND_STCB(inp, stcb, av->assoc_id);
 
 			if (stcb) {
 				SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EINVAL);
 				error = EINVAL;
 				SCTP_TCB_UNLOCK(stcb);
 			} else {
 				if ((inp->sctp_flags & SCTP_PCB_FLAGS_TCPTYPE) ||
 				    (inp->sctp_flags & SCTP_PCB_FLAGS_IN_TCPPOOL) ||
 				    (av->assoc_id == SCTP_FUTURE_ASSOC)) {
 					SCTP_INP_WLOCK(inp);
 					if (av->assoc_value == 0) {
 						inp->ecn_supported = 0;
 					} else {
 						inp->ecn_supported = 1;
 					}
 					SCTP_INP_WUNLOCK(inp);
 				} else {
 					SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EINVAL);
 					error = EINVAL;
 				}
 			}
 			break;
 		}
 	case SCTP_PR_SUPPORTED:
 		{
 			struct sctp_assoc_value *av;
 
 			SCTP_CHECK_AND_CAST(av, optval, struct sctp_assoc_value, optsize);
 			SCTP_FIND_STCB(inp, stcb, av->assoc_id);
 
 			if (stcb) {
 				SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EINVAL);
 				error = EINVAL;
 				SCTP_TCB_UNLOCK(stcb);
 			} else {
 				if ((inp->sctp_flags & SCTP_PCB_FLAGS_TCPTYPE) ||
 				    (inp->sctp_flags & SCTP_PCB_FLAGS_IN_TCPPOOL) ||
 				    (av->assoc_id == SCTP_FUTURE_ASSOC)) {
 					SCTP_INP_WLOCK(inp);
 					if (av->assoc_value == 0) {
 						inp->prsctp_supported = 0;
 					} else {
 						inp->prsctp_supported = 1;
 					}
 					SCTP_INP_WUNLOCK(inp);
 				} else {
 					SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EINVAL);
 					error = EINVAL;
 				}
 			}
 			break;
 		}
 	case SCTP_AUTH_SUPPORTED:
 		{
 			struct sctp_assoc_value *av;
 
 			SCTP_CHECK_AND_CAST(av, optval, struct sctp_assoc_value, optsize);
 			SCTP_FIND_STCB(inp, stcb, av->assoc_id);
 
 			if (stcb) {
 				SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EINVAL);
 				error = EINVAL;
 				SCTP_TCB_UNLOCK(stcb);
 			} else {
 				if ((inp->sctp_flags & SCTP_PCB_FLAGS_TCPTYPE) ||
 				    (inp->sctp_flags & SCTP_PCB_FLAGS_IN_TCPPOOL) ||
 				    (av->assoc_id == SCTP_FUTURE_ASSOC)) {
 					if ((av->assoc_value == 0) &&
 					    (inp->asconf_supported == 1)) {
 						/*
 						 * AUTH is required for
 						 * ASCONF
 						 */
 						SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EINVAL);
 						error = EINVAL;
 					} else {
 						SCTP_INP_WLOCK(inp);
 						if (av->assoc_value == 0) {
 							inp->auth_supported = 0;
 						} else {
 							inp->auth_supported = 1;
 						}
 						SCTP_INP_WUNLOCK(inp);
 					}
 				} else {
 					SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EINVAL);
 					error = EINVAL;
 				}
 			}
 			break;
 		}
 	case SCTP_ASCONF_SUPPORTED:
 		{
 			struct sctp_assoc_value *av;
 
 			SCTP_CHECK_AND_CAST(av, optval, struct sctp_assoc_value, optsize);
 			SCTP_FIND_STCB(inp, stcb, av->assoc_id);
 
 			if (stcb) {
 				SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EINVAL);
 				error = EINVAL;
 				SCTP_TCB_UNLOCK(stcb);
 			} else {
 				if ((inp->sctp_flags & SCTP_PCB_FLAGS_TCPTYPE) ||
 				    (inp->sctp_flags & SCTP_PCB_FLAGS_IN_TCPPOOL) ||
 				    (av->assoc_id == SCTP_FUTURE_ASSOC)) {
 					if ((av->assoc_value != 0) &&
 					    (inp->auth_supported == 0)) {
 						/*
 						 * AUTH is required for
 						 * ASCONF
 						 */
 						SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EINVAL);
 						error = EINVAL;
 					} else {
 						SCTP_INP_WLOCK(inp);
 						if (av->assoc_value == 0) {
 							inp->asconf_supported = 0;
 							sctp_auth_delete_chunk(SCTP_ASCONF,
 							    inp->sctp_ep.local_auth_chunks);
 							sctp_auth_delete_chunk(SCTP_ASCONF_ACK,
 							    inp->sctp_ep.local_auth_chunks);
 						} else {
 							inp->asconf_supported = 1;
 							sctp_auth_add_chunk(SCTP_ASCONF,
 							    inp->sctp_ep.local_auth_chunks);
 							sctp_auth_add_chunk(SCTP_ASCONF_ACK,
 							    inp->sctp_ep.local_auth_chunks);
 						}
 						SCTP_INP_WUNLOCK(inp);
 					}
 				} else {
 					SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EINVAL);
 					error = EINVAL;
 				}
 			}
 			break;
 		}
 	case SCTP_RECONFIG_SUPPORTED:
 		{
 			struct sctp_assoc_value *av;
 
 			SCTP_CHECK_AND_CAST(av, optval, struct sctp_assoc_value, optsize);
 			SCTP_FIND_STCB(inp, stcb, av->assoc_id);
 
 			if (stcb) {
 				SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EINVAL);
 				error = EINVAL;
 				SCTP_TCB_UNLOCK(stcb);
 			} else {
 				if ((inp->sctp_flags & SCTP_PCB_FLAGS_TCPTYPE) ||
 				    (inp->sctp_flags & SCTP_PCB_FLAGS_IN_TCPPOOL) ||
 				    (av->assoc_id == SCTP_FUTURE_ASSOC)) {
 					SCTP_INP_WLOCK(inp);
 					if (av->assoc_value == 0) {
 						inp->reconfig_supported = 0;
 					} else {
 						inp->reconfig_supported = 1;
 					}
 					SCTP_INP_WUNLOCK(inp);
 				} else {
 					SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EINVAL);
 					error = EINVAL;
 				}
 			}
 			break;
 		}
 	case SCTP_NRSACK_SUPPORTED:
 		{
 			struct sctp_assoc_value *av;
 
 			SCTP_CHECK_AND_CAST(av, optval, struct sctp_assoc_value, optsize);
 			SCTP_FIND_STCB(inp, stcb, av->assoc_id);
 
 			if (stcb) {
 				SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EINVAL);
 				error = EINVAL;
 				SCTP_TCB_UNLOCK(stcb);
 			} else {
 				if ((inp->sctp_flags & SCTP_PCB_FLAGS_TCPTYPE) ||
 				    (inp->sctp_flags & SCTP_PCB_FLAGS_IN_TCPPOOL) ||
 				    (av->assoc_id == SCTP_FUTURE_ASSOC)) {
 					SCTP_INP_WLOCK(inp);
 					if (av->assoc_value == 0) {
 						inp->nrsack_supported = 0;
 					} else {
 						inp->nrsack_supported = 1;
 					}
 					SCTP_INP_WUNLOCK(inp);
 				} else {
 					SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EINVAL);
 					error = EINVAL;
 				}
 			}
 			break;
 		}
 	case SCTP_PKTDROP_SUPPORTED:
 		{
 			struct sctp_assoc_value *av;
 
 			SCTP_CHECK_AND_CAST(av, optval, struct sctp_assoc_value, optsize);
 			SCTP_FIND_STCB(inp, stcb, av->assoc_id);
 
 			if (stcb) {
 				SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EINVAL);
 				error = EINVAL;
 				SCTP_TCB_UNLOCK(stcb);
 			} else {
 				if ((inp->sctp_flags & SCTP_PCB_FLAGS_TCPTYPE) ||
 				    (inp->sctp_flags & SCTP_PCB_FLAGS_IN_TCPPOOL) ||
 				    (av->assoc_id == SCTP_FUTURE_ASSOC)) {
 					SCTP_INP_WLOCK(inp);
 					if (av->assoc_value == 0) {
 						inp->pktdrop_supported = 0;
 					} else {
 						inp->pktdrop_supported = 1;
 					}
 					SCTP_INP_WUNLOCK(inp);
 				} else {
 					SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EINVAL);
 					error = EINVAL;
 				}
 			}
 			break;
 		}
 	case SCTP_MAX_CWND:
 		{
 			struct sctp_assoc_value *av;
 			struct sctp_nets *net;
 
 			SCTP_CHECK_AND_CAST(av, optval, struct sctp_assoc_value, optsize);
 			SCTP_FIND_STCB(inp, stcb, av->assoc_id);
 
 			if (stcb) {
 				stcb->asoc.max_cwnd = av->assoc_value;
 				if (stcb->asoc.max_cwnd > 0) {
 					TAILQ_FOREACH(net, &stcb->asoc.nets, sctp_next) {
 						if ((net->cwnd > stcb->asoc.max_cwnd) &&
 						    (net->cwnd > (net->mtu - sizeof(struct sctphdr)))) {
 							net->cwnd = stcb->asoc.max_cwnd;
 							if (net->cwnd < (net->mtu - sizeof(struct sctphdr))) {
 								net->cwnd = net->mtu - sizeof(struct sctphdr);
 							}
 						}
 					}
 				}
 				SCTP_TCB_UNLOCK(stcb);
 			} else {
 				if ((inp->sctp_flags & SCTP_PCB_FLAGS_TCPTYPE) ||
 				    (inp->sctp_flags & SCTP_PCB_FLAGS_IN_TCPPOOL) ||
 				    (av->assoc_id == SCTP_FUTURE_ASSOC)) {
 					SCTP_INP_WLOCK(inp);
 					inp->max_cwnd = av->assoc_value;
 					SCTP_INP_WUNLOCK(inp);
 				} else {
 					SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EINVAL);
 					error = EINVAL;
 				}
 			}
 			break;
 		}
 	default:
 		SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, ENOPROTOOPT);
 		error = ENOPROTOOPT;
 		break;
 	}			/* end switch (opt) */
 	return (error);
 }
 
 int
 sctp_ctloutput(struct socket *so, struct sockopt *sopt)
 {
 	void *optval = NULL;
 	size_t optsize = 0;
 	void *p;
 	int error = 0;
 	struct sctp_inpcb *inp;
 
 	if ((sopt->sopt_level == SOL_SOCKET) &&
 	    (sopt->sopt_name == SO_SETFIB)) {
 		inp = (struct sctp_inpcb *)so->so_pcb;
 		if (inp == NULL) {
 			SCTP_LTRACE_ERR_RET(so->so_pcb, NULL, NULL, SCTP_FROM_SCTP_USRREQ, ENOBUFS);
 			return (EINVAL);
 		}
 		SCTP_INP_WLOCK(inp);
 		inp->fibnum = so->so_fibnum;
 		SCTP_INP_WUNLOCK(inp);
 		return (0);
 	}
 	if (sopt->sopt_level != IPPROTO_SCTP) {
 		/* wrong proto level... send back up to IP */
 #ifdef INET6
 		if (INP_CHECK_SOCKAF(so, AF_INET6))
 			error = ip6_ctloutput(so, sopt);
 #endif				/* INET6 */
 #if defined(INET) && defined(INET6)
 		else
 #endif
 #ifdef INET
 			error = ip_ctloutput(so, sopt);
 #endif
 		return (error);
 	}
 	optsize = sopt->sopt_valsize;
 	if (optsize) {
 		SCTP_MALLOC(optval, void *, optsize, SCTP_M_SOCKOPT);
 		if (optval == NULL) {
 			SCTP_LTRACE_ERR_RET(so->so_pcb, NULL, NULL, SCTP_FROM_SCTP_USRREQ, ENOBUFS);
 			return (ENOBUFS);
 		}
 		error = sooptcopyin(sopt, optval, optsize, optsize);
 		if (error) {
 			SCTP_FREE(optval, SCTP_M_SOCKOPT);
 			goto out;
 		}
 	}
 	p = (void *)sopt->sopt_td;
 	if (sopt->sopt_dir == SOPT_SET) {
 		error = sctp_setopt(so, sopt->sopt_name, optval, optsize, p);
 	} else if (sopt->sopt_dir == SOPT_GET) {
 		error = sctp_getopt(so, sopt->sopt_name, optval, &optsize, p);
 	} else {
 		SCTP_LTRACE_ERR_RET(so->so_pcb, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EINVAL);
 		error = EINVAL;
 	}
 	if ((error == 0) && (optval != NULL)) {
 		error = sooptcopyout(sopt, optval, optsize);
 		SCTP_FREE(optval, SCTP_M_SOCKOPT);
 	} else if (optval != NULL) {
 		SCTP_FREE(optval, SCTP_M_SOCKOPT);
 	}
 out:
 	return (error);
 }
 
 #ifdef INET
 static int
 sctp_connect(struct socket *so, struct sockaddr *addr, struct thread *p)
 {
 	int error = 0;
 	int create_lock_on = 0;
 	uint32_t vrf_id;
 	struct sctp_inpcb *inp;
 	struct sctp_tcb *stcb = NULL;
 
 	inp = (struct sctp_inpcb *)so->so_pcb;
 	if (inp == NULL) {
 		/* I made the same as TCP since we are not setup? */
 		SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EINVAL);
 		return (ECONNRESET);
 	}
 	if (addr == NULL) {
 		SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EINVAL);
 		return EINVAL;
 	}
 	switch (addr->sa_family) {
 #ifdef INET6
 	case AF_INET6:
 		{
 			struct sockaddr_in6 *sin6p;
 
 			if (addr->sa_len != sizeof(struct sockaddr_in6)) {
 				SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EINVAL);
 				return (EINVAL);
 			}
 			sin6p = (struct sockaddr_in6 *)addr;
 			if (p != NULL && (error = prison_remote_ip6(p->td_ucred, &sin6p->sin6_addr)) != 0) {
 				SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, error);
 				return (error);
 			}
 			break;
 		}
 #endif
 #ifdef INET
 	case AF_INET:
 		{
 			struct sockaddr_in *sinp;
 
 			if (addr->sa_len != sizeof(struct sockaddr_in)) {
 				SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EINVAL);
 				return (EINVAL);
 			}
 			sinp = (struct sockaddr_in *)addr;
 			if (p != NULL && (error = prison_remote_ip4(p->td_ucred, &sinp->sin_addr)) != 0) {
 				SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, error);
 				return (error);
 			}
 			break;
 		}
 #endif
 	default:
 		SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EAFNOSUPPORT);
 		return (EAFNOSUPPORT);
 	}
 	SCTP_INP_INCR_REF(inp);
 	SCTP_ASOC_CREATE_LOCK(inp);
 	create_lock_on = 1;
 
 
 	if ((inp->sctp_flags & SCTP_PCB_FLAGS_SOCKET_ALLGONE) ||
 	    (inp->sctp_flags & SCTP_PCB_FLAGS_SOCKET_GONE)) {
 		/* Should I really unlock ? */
 		SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EFAULT);
 		error = EFAULT;
 		goto out_now;
 	}
 #ifdef INET6
 	if (((inp->sctp_flags & SCTP_PCB_FLAGS_BOUND_V6) == 0) &&
 	    (addr->sa_family == AF_INET6)) {
 		SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EINVAL);
 		error = EINVAL;
 		goto out_now;
 	}
 #endif
 	if ((inp->sctp_flags & SCTP_PCB_FLAGS_UNBOUND) ==
 	    SCTP_PCB_FLAGS_UNBOUND) {
 		/* Bind a ephemeral port */
 		error = sctp_inpcb_bind(so, NULL, NULL, p);
 		if (error) {
 			goto out_now;
 		}
 	}
 	/* Now do we connect? */
 	if ((inp->sctp_flags & SCTP_PCB_FLAGS_IN_TCPPOOL) &&
 	    (sctp_is_feature_off(inp, SCTP_PCB_FLAGS_PORTREUSE))) {
 		SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EINVAL);
 		error = EINVAL;
 		goto out_now;
 	}
 	if ((inp->sctp_flags & SCTP_PCB_FLAGS_TCPTYPE) &&
 	    (inp->sctp_flags & SCTP_PCB_FLAGS_CONNECTED)) {
 		/* We are already connected AND the TCP model */
 		SCTP_LTRACE_ERR_RET(inp, stcb, NULL, SCTP_FROM_SCTP_USRREQ, EADDRINUSE);
 		error = EADDRINUSE;
 		goto out_now;
 	}
 	if (inp->sctp_flags & SCTP_PCB_FLAGS_CONNECTED) {
 		SCTP_INP_RLOCK(inp);
 		stcb = LIST_FIRST(&inp->sctp_asoc_list);
 		SCTP_INP_RUNLOCK(inp);
 	} else {
 		/*
 		 * We increment here since sctp_findassociation_ep_addr()
 		 * will do a decrement if it finds the stcb as long as the
 		 * locked tcb (last argument) is NOT a TCB.. aka NULL.
 		 */
 		SCTP_INP_INCR_REF(inp);
 		stcb = sctp_findassociation_ep_addr(&inp, addr, NULL, NULL, NULL);
 		if (stcb == NULL) {
 			SCTP_INP_DECR_REF(inp);
 		} else {
 			SCTP_TCB_UNLOCK(stcb);
 		}
 	}
 	if (stcb != NULL) {
 		/* Already have or am bring up an association */
 		SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EALREADY);
 		error = EALREADY;
 		goto out_now;
 	}
 	vrf_id = inp->def_vrf_id;
 	/* We are GOOD to go */
 	stcb = sctp_aloc_assoc(inp, addr, &error, 0, vrf_id,
 	    inp->sctp_ep.pre_open_stream_count,
 	    inp->sctp_ep.port, p);
 	if (stcb == NULL) {
 		/* Gak! no memory */
 		goto out_now;
 	}
 	if (stcb->sctp_ep->sctp_flags & SCTP_PCB_FLAGS_TCPTYPE) {
 		stcb->sctp_ep->sctp_flags |= SCTP_PCB_FLAGS_CONNECTED;
 		/* Set the connected flag so we can queue data */
 		soisconnecting(so);
 	}
 	SCTP_SET_STATE(&stcb->asoc, SCTP_STATE_COOKIE_WAIT);
 	(void)SCTP_GETTIME_TIMEVAL(&stcb->asoc.time_entered);
 
 	/* initialize authentication parameters for the assoc */
 	sctp_initialize_auth_params(inp, stcb);
 
 	sctp_send_initiate(inp, stcb, SCTP_SO_LOCKED);
 	SCTP_TCB_UNLOCK(stcb);
 out_now:
 	if (create_lock_on) {
 		SCTP_ASOC_CREATE_UNLOCK(inp);
 	}
 	SCTP_INP_DECR_REF(inp);
 	return (error);
 }
 #endif
 
 int
 sctp_listen(struct socket *so, int backlog, struct thread *p)
 {
 	/*
 	 * Note this module depends on the protocol processing being called
 	 * AFTER any socket level flags and backlog are applied to the
 	 * socket. The traditional way that the socket flags are applied is
 	 * AFTER protocol processing. We have made a change to the
 	 * sys/kern/uipc_socket.c module to reverse this but this MUST be in
 	 * place if the socket API for SCTP is to work properly.
 	 */
 
 	int error = 0;
 	struct sctp_inpcb *inp;
 
 	inp = (struct sctp_inpcb *)so->so_pcb;
 	if (inp == NULL) {
 		/* I made the same as TCP since we are not setup? */
 		SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EINVAL);
 		return (ECONNRESET);
 	}
 	if (sctp_is_feature_on(inp, SCTP_PCB_FLAGS_PORTREUSE)) {
 		/* See if we have a listener */
 		struct sctp_inpcb *tinp;
 		union sctp_sockstore store;
 
 		if ((inp->sctp_flags & SCTP_PCB_FLAGS_BOUNDALL) == 0) {
 			/* not bound all */
 			struct sctp_laddr *laddr;
 
 			LIST_FOREACH(laddr, &inp->sctp_addr_list, sctp_nxt_addr) {
 				memcpy(&store, &laddr->ifa->address, sizeof(store));
 				switch (store.sa.sa_family) {
 #ifdef INET
 				case AF_INET:
 					store.sin.sin_port = inp->sctp_lport;
 					break;
 #endif
 #ifdef INET6
 				case AF_INET6:
 					store.sin6.sin6_port = inp->sctp_lport;
 					break;
 #endif
 				default:
 					break;
 				}
 				tinp = sctp_pcb_findep(&store.sa, 0, 0, inp->def_vrf_id);
 				if (tinp && (tinp != inp) &&
 				    ((tinp->sctp_flags & SCTP_PCB_FLAGS_SOCKET_ALLGONE) == 0) &&
 				    ((tinp->sctp_flags & SCTP_PCB_FLAGS_SOCKET_GONE) == 0) &&
 				    (SCTP_IS_LISTENING(tinp))) {
 					/*
 					 * we have a listener already and
 					 * its not this inp.
 					 */
 					SCTP_INP_DECR_REF(tinp);
 					return (EADDRINUSE);
 				} else if (tinp) {
 					SCTP_INP_DECR_REF(tinp);
 				}
 			}
 		} else {
 			/* Setup a local addr bound all */
 			memset(&store, 0, sizeof(store));
 #ifdef INET6
 			if (inp->sctp_flags & SCTP_PCB_FLAGS_BOUND_V6) {
 				store.sa.sa_family = AF_INET6;
 				store.sa.sa_len = sizeof(struct sockaddr_in6);
 			}
 #endif
 #ifdef INET
 			if ((inp->sctp_flags & SCTP_PCB_FLAGS_BOUND_V6) == 0) {
 				store.sa.sa_family = AF_INET;
 				store.sa.sa_len = sizeof(struct sockaddr_in);
 			}
 #endif
 			switch (store.sa.sa_family) {
 #ifdef INET
 			case AF_INET:
 				store.sin.sin_port = inp->sctp_lport;
 				break;
 #endif
 #ifdef INET6
 			case AF_INET6:
 				store.sin6.sin6_port = inp->sctp_lport;
 				break;
 #endif
 			default:
 				break;
 			}
 			tinp = sctp_pcb_findep(&store.sa, 0, 0, inp->def_vrf_id);
 			if (tinp && (tinp != inp) &&
 			    ((tinp->sctp_flags & SCTP_PCB_FLAGS_SOCKET_ALLGONE) == 0) &&
 			    ((tinp->sctp_flags & SCTP_PCB_FLAGS_SOCKET_GONE) == 0) &&
 			    (SCTP_IS_LISTENING(tinp))) {
 				/*
 				 * we have a listener already and its not
 				 * this inp.
 				 */
 				SCTP_INP_DECR_REF(tinp);
 				return (EADDRINUSE);
 			} else if (tinp) {
 				SCTP_INP_DECR_REF(tinp);
 			}
 		}
 	}
 	SCTP_INP_RLOCK(inp);
 #ifdef SCTP_LOCK_LOGGING
 	if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_LOCK_LOGGING_ENABLE) {
 		sctp_log_lock(inp, (struct sctp_tcb *)NULL, SCTP_LOG_LOCK_SOCK);
 	}
 #endif
 	SOCK_LOCK(so);
 	error = solisten_proto_check(so);
 	SOCK_UNLOCK(so);
 	if (error) {
 		SCTP_INP_RUNLOCK(inp);
 		return (error);
 	}
 	if ((sctp_is_feature_on(inp, SCTP_PCB_FLAGS_PORTREUSE)) &&
 	    (inp->sctp_flags & SCTP_PCB_FLAGS_IN_TCPPOOL)) {
 		/*
 		 * The unlucky case - We are in the tcp pool with this guy.
 		 * - Someone else is in the main inp slot. - We must move
 		 * this guy (the listener) to the main slot - We must then
 		 * move the guy that was listener to the TCP Pool.
 		 */
 		if (sctp_swap_inpcb_for_listen(inp)) {
 			SCTP_INP_RUNLOCK(inp);
 			SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EADDRINUSE);
 			return (EADDRINUSE);
 		}
 	}
 	if ((inp->sctp_flags & SCTP_PCB_FLAGS_TCPTYPE) &&
 	    (inp->sctp_flags & SCTP_PCB_FLAGS_CONNECTED)) {
 		/* We are already connected AND the TCP model */
 		SCTP_INP_RUNLOCK(inp);
 		SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EADDRINUSE);
 		return (EADDRINUSE);
 	}
 	SCTP_INP_RUNLOCK(inp);
 	if (inp->sctp_flags & SCTP_PCB_FLAGS_UNBOUND) {
 		/* We must do a bind. */
 		if ((error = sctp_inpcb_bind(so, NULL, NULL, p))) {
 			/* bind error, probably perm */
 			return (error);
 		}
 	}
 	SCTP_INP_WLOCK(inp);
-	SOCK_LOCK(so);
-	/* It appears for 7.0 and on, we must always call this. */
-	solisten_proto(so, backlog);
-	if (inp->sctp_flags & SCTP_PCB_FLAGS_UDPTYPE) {
-		/* remove the ACCEPTCONN flag for one-to-many sockets */
-		so->so_options &= ~SO_ACCEPTCONN;
+	if ((inp->sctp_flags & SCTP_PCB_FLAGS_UDPTYPE) == 0) {
+		SOCK_LOCK(so);
+		solisten_proto(so, backlog);
+		SOCK_UNLOCK(so);
 	}
-	if (backlog > 0) {
-		inp->sctp_flags |= SCTP_PCB_FLAGS_ACCEPTING;
-	} else {
-		inp->sctp_flags &= ~SCTP_PCB_FLAGS_ACCEPTING;
-	}
-	SOCK_UNLOCK(so);
+	inp->sctp_flags |= SCTP_PCB_FLAGS_ACCEPTING;
 	SCTP_INP_WUNLOCK(inp);
 	return (error);
 }
 
 static int sctp_defered_wakeup_cnt = 0;
 
 int
 sctp_accept(struct socket *so, struct sockaddr **addr)
 {
 	struct sctp_tcb *stcb;
 	struct sctp_inpcb *inp;
 	union sctp_sockstore store;
 #ifdef INET6
 	int error;
 #endif
 	inp = (struct sctp_inpcb *)so->so_pcb;
 
 	if (inp == NULL) {
 		SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EINVAL);
 		return (ECONNRESET);
 	}
 	SCTP_INP_RLOCK(inp);
 	if (inp->sctp_flags & SCTP_PCB_FLAGS_UDPTYPE) {
 		SCTP_INP_RUNLOCK(inp);
 		SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EOPNOTSUPP);
 		return (EOPNOTSUPP);
 	}
 	if (so->so_state & SS_ISDISCONNECTED) {
 		SCTP_INP_RUNLOCK(inp);
 		SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, ECONNABORTED);
 		return (ECONNABORTED);
 	}
 	stcb = LIST_FIRST(&inp->sctp_asoc_list);
 	if (stcb == NULL) {
 		SCTP_INP_RUNLOCK(inp);
 		SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EINVAL);
 		return (ECONNRESET);
 	}
 	SCTP_TCB_LOCK(stcb);
 	SCTP_INP_RUNLOCK(inp);
 	store = stcb->asoc.primary_destination->ro._l_addr;
 	stcb->asoc.state &= ~SCTP_STATE_IN_ACCEPT_QUEUE;
 	SCTP_TCB_UNLOCK(stcb);
 	switch (store.sa.sa_family) {
 #ifdef INET
 	case AF_INET:
 		{
 			struct sockaddr_in *sin;
 
 			SCTP_MALLOC_SONAME(sin, struct sockaddr_in *, sizeof *sin);
 			if (sin == NULL)
 				return (ENOMEM);
 			sin->sin_family = AF_INET;
 			sin->sin_len = sizeof(*sin);
 			sin->sin_port = store.sin.sin_port;
 			sin->sin_addr = store.sin.sin_addr;
 			*addr = (struct sockaddr *)sin;
 			break;
 		}
 #endif
 #ifdef INET6
 	case AF_INET6:
 		{
 			struct sockaddr_in6 *sin6;
 
 			SCTP_MALLOC_SONAME(sin6, struct sockaddr_in6 *, sizeof *sin6);
 			if (sin6 == NULL)
 				return (ENOMEM);
 			sin6->sin6_family = AF_INET6;
 			sin6->sin6_len = sizeof(*sin6);
 			sin6->sin6_port = store.sin6.sin6_port;
 			sin6->sin6_addr = store.sin6.sin6_addr;
 			if ((error = sa6_recoverscope(sin6)) != 0) {
 				SCTP_FREE_SONAME(sin6);
 				return (error);
 			}
 			*addr = (struct sockaddr *)sin6;
 			break;
 		}
 #endif
 	default:
 		/* TSNH */
 		break;
 	}
 	/* Wake any delayed sleep action */
 	if (inp->sctp_flags & SCTP_PCB_FLAGS_DONT_WAKE) {
 		SCTP_INP_WLOCK(inp);
 		inp->sctp_flags &= ~SCTP_PCB_FLAGS_DONT_WAKE;
 		if (inp->sctp_flags & SCTP_PCB_FLAGS_WAKEOUTPUT) {
 			inp->sctp_flags &= ~SCTP_PCB_FLAGS_WAKEOUTPUT;
 			SCTP_INP_WUNLOCK(inp);
 			SOCKBUF_LOCK(&inp->sctp_socket->so_snd);
 			if (sowriteable(inp->sctp_socket)) {
 				sowwakeup_locked(inp->sctp_socket);
 			} else {
 				SOCKBUF_UNLOCK(&inp->sctp_socket->so_snd);
 			}
 			SCTP_INP_WLOCK(inp);
 		}
 		if (inp->sctp_flags & SCTP_PCB_FLAGS_WAKEINPUT) {
 			inp->sctp_flags &= ~SCTP_PCB_FLAGS_WAKEINPUT;
 			SCTP_INP_WUNLOCK(inp);
 			SOCKBUF_LOCK(&inp->sctp_socket->so_rcv);
 			if (soreadable(inp->sctp_socket)) {
 				sctp_defered_wakeup_cnt++;
 				sorwakeup_locked(inp->sctp_socket);
 			} else {
 				SOCKBUF_UNLOCK(&inp->sctp_socket->so_rcv);
 			}
 			SCTP_INP_WLOCK(inp);
 		}
 		SCTP_INP_WUNLOCK(inp);
 	}
 	if (stcb->asoc.state & SCTP_STATE_ABOUT_TO_BE_FREED) {
 		SCTP_TCB_LOCK(stcb);
 		sctp_free_assoc(inp, stcb, SCTP_NORMAL_PROC,
 		    SCTP_FROM_SCTP_USRREQ + SCTP_LOC_19);
 	}
 	return (0);
 }
 
 #ifdef INET
 int
 sctp_ingetaddr(struct socket *so, struct sockaddr **addr)
 {
 	struct sockaddr_in *sin;
 	uint32_t vrf_id;
 	struct sctp_inpcb *inp;
 	struct sctp_ifa *sctp_ifa;
 
 	/*
 	 * Do the malloc first in case it blocks.
 	 */
 	SCTP_MALLOC_SONAME(sin, struct sockaddr_in *, sizeof *sin);
 	if (sin == NULL)
 		return (ENOMEM);
 	sin->sin_family = AF_INET;
 	sin->sin_len = sizeof(*sin);
 	inp = (struct sctp_inpcb *)so->so_pcb;
 	if (!inp) {
 		SCTP_FREE_SONAME(sin);
 		SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EINVAL);
 		return (ECONNRESET);
 	}
 	SCTP_INP_RLOCK(inp);
 	sin->sin_port = inp->sctp_lport;
 	if (inp->sctp_flags & SCTP_PCB_FLAGS_BOUNDALL) {
 		if (inp->sctp_flags & SCTP_PCB_FLAGS_CONNECTED) {
 			struct sctp_tcb *stcb;
 			struct sockaddr_in *sin_a;
 			struct sctp_nets *net;
 			int fnd;
 
 			stcb = LIST_FIRST(&inp->sctp_asoc_list);
 			if (stcb == NULL) {
 				goto notConn;
 			}
 			fnd = 0;
 			sin_a = NULL;
 			SCTP_TCB_LOCK(stcb);
 			TAILQ_FOREACH(net, &stcb->asoc.nets, sctp_next) {
 				sin_a = (struct sockaddr_in *)&net->ro._l_addr;
 				if (sin_a == NULL)
 					/* this will make coverity happy */
 					continue;
 
 				if (sin_a->sin_family == AF_INET) {
 					fnd = 1;
 					break;
 				}
 			}
 			if ((!fnd) || (sin_a == NULL)) {
 				/* punt */
 				SCTP_TCB_UNLOCK(stcb);
 				goto notConn;
 			}
 			vrf_id = inp->def_vrf_id;
 			sctp_ifa = sctp_source_address_selection(inp,
 			    stcb,
 			    (sctp_route_t *)&net->ro,
 			    net, 0, vrf_id);
 			if (sctp_ifa) {
 				sin->sin_addr = sctp_ifa->address.sin.sin_addr;
 				sctp_free_ifa(sctp_ifa);
 			}
 			SCTP_TCB_UNLOCK(stcb);
 		} else {
 			/* For the bound all case you get back 0 */
 	notConn:
 			sin->sin_addr.s_addr = 0;
 		}
 
 	} else {
 		/* Take the first IPv4 address in the list */
 		struct sctp_laddr *laddr;
 		int fnd = 0;
 
 		LIST_FOREACH(laddr, &inp->sctp_addr_list, sctp_nxt_addr) {
 			if (laddr->ifa->address.sa.sa_family == AF_INET) {
 				struct sockaddr_in *sin_a;
 
 				sin_a = &laddr->ifa->address.sin;
 				sin->sin_addr = sin_a->sin_addr;
 				fnd = 1;
 				break;
 			}
 		}
 		if (!fnd) {
 			SCTP_FREE_SONAME(sin);
 			SCTP_INP_RUNLOCK(inp);
 			SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, ENOENT);
 			return (ENOENT);
 		}
 	}
 	SCTP_INP_RUNLOCK(inp);
 	(*addr) = (struct sockaddr *)sin;
 	return (0);
 }
 
 int
 sctp_peeraddr(struct socket *so, struct sockaddr **addr)
 {
 	struct sockaddr_in *sin;
 	int fnd;
 	struct sockaddr_in *sin_a;
 	struct sctp_inpcb *inp;
 	struct sctp_tcb *stcb;
 	struct sctp_nets *net;
 
 	/* Do the malloc first in case it blocks. */
 	SCTP_MALLOC_SONAME(sin, struct sockaddr_in *, sizeof *sin);
 	if (sin == NULL)
 		return (ENOMEM);
 	sin->sin_family = AF_INET;
 	sin->sin_len = sizeof(*sin);
 
 	inp = (struct sctp_inpcb *)so->so_pcb;
 	if ((inp == NULL) ||
 	    ((inp->sctp_flags & SCTP_PCB_FLAGS_CONNECTED) == 0)) {
 		/* UDP type and listeners will drop out here */
 		SCTP_FREE_SONAME(sin);
 		SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, ENOTCONN);
 		return (ENOTCONN);
 	}
 	SCTP_INP_RLOCK(inp);
 	stcb = LIST_FIRST(&inp->sctp_asoc_list);
 	if (stcb) {
 		SCTP_TCB_LOCK(stcb);
 	}
 	SCTP_INP_RUNLOCK(inp);
 	if (stcb == NULL) {
 		SCTP_FREE_SONAME(sin);
 		SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EINVAL);
 		return (ECONNRESET);
 	}
 	fnd = 0;
 	TAILQ_FOREACH(net, &stcb->asoc.nets, sctp_next) {
 		sin_a = (struct sockaddr_in *)&net->ro._l_addr;
 		if (sin_a->sin_family == AF_INET) {
 			fnd = 1;
 			sin->sin_port = stcb->rport;
 			sin->sin_addr = sin_a->sin_addr;
 			break;
 		}
 	}
 	SCTP_TCB_UNLOCK(stcb);
 	if (!fnd) {
 		/* No IPv4 address */
 		SCTP_FREE_SONAME(sin);
 		SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, ENOENT);
 		return (ENOENT);
 	}
 	(*addr) = (struct sockaddr *)sin;
 	return (0);
 }
 
 struct pr_usrreqs sctp_usrreqs = {
 	.pru_abort = sctp_abort,
 	.pru_accept = sctp_accept,
 	.pru_attach = sctp_attach,
 	.pru_bind = sctp_bind,
 	.pru_connect = sctp_connect,
 	.pru_control = in_control,
 	.pru_close = sctp_close,
 	.pru_detach = sctp_close,
 	.pru_sopoll = sopoll_generic,
 	.pru_flush = sctp_flush,
 	.pru_disconnect = sctp_disconnect,
 	.pru_listen = sctp_listen,
 	.pru_peeraddr = sctp_peeraddr,
 	.pru_send = sctp_sendm,
 	.pru_shutdown = sctp_shutdown,
 	.pru_sockaddr = sctp_ingetaddr,
 	.pru_sosend = sctp_sosend,
 	.pru_soreceive = sctp_soreceive
 };
 #endif
Index: head/sys/netinet/tcp_subr.c
===================================================================
--- head/sys/netinet/tcp_subr.c	(revision 319721)
+++ head/sys/netinet/tcp_subr.c	(revision 319722)
@@ -1,2888 +1,2887 @@
 /*-
  * Copyright (c) 1982, 1986, 1988, 1990, 1993, 1995
  *	The Regents of the University of California.  All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 3. Neither the name of the University nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  *	@(#)tcp_subr.c	8.2 (Berkeley) 5/24/95
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include "opt_compat.h"
 #include "opt_inet.h"
 #include "opt_inet6.h"
 #include "opt_ipsec.h"
 #include "opt_tcpdebug.h"
 
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/callout.h>
 #include <sys/eventhandler.h>
 #ifdef TCP_HHOOK
 #include <sys/hhook.h>
 #endif
 #include <sys/kernel.h>
 #ifdef TCP_HHOOK
 #include <sys/khelp.h>
 #endif
 #include <sys/sysctl.h>
 #include <sys/jail.h>
 #include <sys/malloc.h>
 #include <sys/refcount.h>
 #include <sys/mbuf.h>
 #ifdef INET6
 #include <sys/domain.h>
 #endif
 #include <sys/priv.h>
 #include <sys/proc.h>
 #include <sys/sdt.h>
 #include <sys/socket.h>
 #include <sys/socketvar.h>
 #include <sys/protosw.h>
 #include <sys/random.h>
 
 #include <vm/uma.h>
 
 #include <net/route.h>
 #include <net/if.h>
 #include <net/if_var.h>
 #include <net/vnet.h>
 
 #include <netinet/in.h>
 #include <netinet/in_fib.h>
 #include <netinet/in_kdtrace.h>
 #include <netinet/in_pcb.h>
 #include <netinet/in_systm.h>
 #include <netinet/in_var.h>
 #include <netinet/ip.h>
 #include <netinet/ip_icmp.h>
 #include <netinet/ip_var.h>
 #ifdef INET6
 #include <netinet/icmp6.h>
 #include <netinet/ip6.h>
 #include <netinet6/in6_fib.h>
 #include <netinet6/in6_pcb.h>
 #include <netinet6/ip6_var.h>
 #include <netinet6/scope6_var.h>
 #include <netinet6/nd6.h>
 #endif
 
 #ifdef TCP_RFC7413
 #include <netinet/tcp_fastopen.h>
 #endif
 #include <netinet/tcp.h>
 #include <netinet/tcp_fsm.h>
 #include <netinet/tcp_seq.h>
 #include <netinet/tcp_timer.h>
 #include <netinet/tcp_var.h>
 #include <netinet/tcp_syncache.h>
 #include <netinet/cc/cc.h>
 #ifdef INET6
 #include <netinet6/tcp6_var.h>
 #endif
 #include <netinet/tcpip.h>
 #ifdef TCPPCAP
 #include <netinet/tcp_pcap.h>
 #endif
 #ifdef TCPDEBUG
 #include <netinet/tcp_debug.h>
 #endif
 #ifdef INET6
 #include <netinet6/ip6protosw.h>
 #endif
 #ifdef TCP_OFFLOAD
 #include <netinet/tcp_offload.h>
 #endif
 
 #include <netipsec/ipsec_support.h>
 
 #include <machine/in_cksum.h>
 #include <sys/md5.h>
 
 #include <security/mac/mac_framework.h>
 
 VNET_DEFINE(int, tcp_mssdflt) = TCP_MSS;
 #ifdef INET6
 VNET_DEFINE(int, tcp_v6mssdflt) = TCP6_MSS;
 #endif
 
 struct rwlock tcp_function_lock;
 
 static int
 sysctl_net_inet_tcp_mss_check(SYSCTL_HANDLER_ARGS)
 {
 	int error, new;
 
 	new = V_tcp_mssdflt;
 	error = sysctl_handle_int(oidp, &new, 0, req);
 	if (error == 0 && req->newptr) {
 		if (new < TCP_MINMSS)
 			error = EINVAL;
 		else
 			V_tcp_mssdflt = new;
 	}
 	return (error);
 }
 
 SYSCTL_PROC(_net_inet_tcp, TCPCTL_MSSDFLT, mssdflt,
     CTLFLAG_VNET | CTLTYPE_INT | CTLFLAG_RW, &VNET_NAME(tcp_mssdflt), 0,
     &sysctl_net_inet_tcp_mss_check, "I",
     "Default TCP Maximum Segment Size");
 
 #ifdef INET6
 static int
 sysctl_net_inet_tcp_mss_v6_check(SYSCTL_HANDLER_ARGS)
 {
 	int error, new;
 
 	new = V_tcp_v6mssdflt;
 	error = sysctl_handle_int(oidp, &new, 0, req);
 	if (error == 0 && req->newptr) {
 		if (new < TCP_MINMSS)
 			error = EINVAL;
 		else
 			V_tcp_v6mssdflt = new;
 	}
 	return (error);
 }
 
 SYSCTL_PROC(_net_inet_tcp, TCPCTL_V6MSSDFLT, v6mssdflt,
     CTLFLAG_VNET | CTLTYPE_INT | CTLFLAG_RW, &VNET_NAME(tcp_v6mssdflt), 0,
     &sysctl_net_inet_tcp_mss_v6_check, "I",
    "Default TCP Maximum Segment Size for IPv6");
 #endif /* INET6 */
 
 /*
  * Minimum MSS we accept and use. This prevents DoS attacks where
  * we are forced to a ridiculous low MSS like 20 and send hundreds
  * of packets instead of one. The effect scales with the available
  * bandwidth and quickly saturates the CPU and network interface
  * with packet generation and sending. Set to zero to disable MINMSS
  * checking. This setting prevents us from sending too small packets.
  */
 VNET_DEFINE(int, tcp_minmss) = TCP_MINMSS;
 SYSCTL_INT(_net_inet_tcp, OID_AUTO, minmss, CTLFLAG_VNET | CTLFLAG_RW,
      &VNET_NAME(tcp_minmss), 0,
     "Minimum TCP Maximum Segment Size");
 
 VNET_DEFINE(int, tcp_do_rfc1323) = 1;
 SYSCTL_INT(_net_inet_tcp, TCPCTL_DO_RFC1323, rfc1323, CTLFLAG_VNET | CTLFLAG_RW,
     &VNET_NAME(tcp_do_rfc1323), 0,
     "Enable rfc1323 (high performance TCP) extensions");
 
 static int	tcp_log_debug = 0;
 SYSCTL_INT(_net_inet_tcp, OID_AUTO, log_debug, CTLFLAG_RW,
     &tcp_log_debug, 0, "Log errors caused by incoming TCP segments");
 
 static int	tcp_tcbhashsize;
 SYSCTL_INT(_net_inet_tcp, OID_AUTO, tcbhashsize, CTLFLAG_RDTUN | CTLFLAG_NOFETCH,
     &tcp_tcbhashsize, 0, "Size of TCP control-block hashtable");
 
 static int	do_tcpdrain = 1;
 SYSCTL_INT(_net_inet_tcp, OID_AUTO, do_tcpdrain, CTLFLAG_RW, &do_tcpdrain, 0,
     "Enable tcp_drain routine for extra help when low on mbufs");
 
 SYSCTL_UINT(_net_inet_tcp, OID_AUTO, pcbcount, CTLFLAG_VNET | CTLFLAG_RD,
     &VNET_NAME(tcbinfo.ipi_count), 0, "Number of active PCBs");
 
 static VNET_DEFINE(int, icmp_may_rst) = 1;
 #define	V_icmp_may_rst			VNET(icmp_may_rst)
 SYSCTL_INT(_net_inet_tcp, OID_AUTO, icmp_may_rst, CTLFLAG_VNET | CTLFLAG_RW,
     &VNET_NAME(icmp_may_rst), 0,
     "Certain ICMP unreachable messages may abort connections in SYN_SENT");
 
 static VNET_DEFINE(int, tcp_isn_reseed_interval) = 0;
 #define	V_tcp_isn_reseed_interval	VNET(tcp_isn_reseed_interval)
 SYSCTL_INT(_net_inet_tcp, OID_AUTO, isn_reseed_interval, CTLFLAG_VNET | CTLFLAG_RW,
     &VNET_NAME(tcp_isn_reseed_interval), 0,
     "Seconds between reseeding of ISN secret");
 
 static int	tcp_soreceive_stream;
 SYSCTL_INT(_net_inet_tcp, OID_AUTO, soreceive_stream, CTLFLAG_RDTUN,
     &tcp_soreceive_stream, 0, "Using soreceive_stream for TCP sockets");
 
 VNET_DEFINE(uma_zone_t, sack_hole_zone);
 #define	V_sack_hole_zone		VNET(sack_hole_zone)
 
 #ifdef TCP_HHOOK
 VNET_DEFINE(struct hhook_head *, tcp_hhh[HHOOK_TCP_LAST+1]);
 #endif
 
 static struct inpcb *tcp_notify(struct inpcb *, int);
 static struct inpcb *tcp_mtudisc_notify(struct inpcb *, int);
 static void tcp_mtudisc(struct inpcb *, int);
 static char *	tcp_log_addr(struct in_conninfo *inc, struct tcphdr *th,
 		    void *ip4hdr, const void *ip6hdr);
 
 
 static struct tcp_function_block tcp_def_funcblk = {
 	"default",
 	tcp_output,
 	tcp_do_segment,
 	tcp_default_ctloutput,
 	NULL,
 	NULL,	
 	NULL,
 	NULL,
 	NULL,
 	NULL,
 	0,
 	0
 };
 
 int t_functions_inited = 0;
 struct tcp_funchead t_functions;
 static struct tcp_function_block *tcp_func_set_ptr = &tcp_def_funcblk;
 
 static void
 init_tcp_functions(void)
 {
 	if (t_functions_inited == 0) {
 		TAILQ_INIT(&t_functions);
 		rw_init_flags(&tcp_function_lock, "tcp_func_lock" , 0);
 		t_functions_inited = 1;
 	}
 }
 
 static struct tcp_function_block *
 find_tcp_functions_locked(struct tcp_function_set *fs)
 {
 	struct tcp_function *f;
 	struct tcp_function_block *blk=NULL;
 
 	TAILQ_FOREACH(f, &t_functions, tf_next) {
 		if (strcmp(f->tf_name, fs->function_set_name) == 0) {
 			blk = f->tf_fb;
 			break;
 		}
 	}
 	return(blk);
 }
 
 static struct tcp_function_block *
 find_tcp_fb_locked(struct tcp_function_block *blk, struct tcp_function **s)
 {
 	struct tcp_function_block *rblk=NULL;
 	struct tcp_function *f;
 
 	TAILQ_FOREACH(f, &t_functions, tf_next) {
 		if (f->tf_fb == blk) {
 			rblk = blk;
 			if (s) {
 				*s = f;
 			}
 			break;
 		}
 	}
 	return (rblk);
 }
 
 struct tcp_function_block *
 find_and_ref_tcp_functions(struct tcp_function_set *fs)
 {
 	struct tcp_function_block *blk;
 	
 	rw_rlock(&tcp_function_lock);	
 	blk = find_tcp_functions_locked(fs);
 	if (blk)
 		refcount_acquire(&blk->tfb_refcnt); 
 	rw_runlock(&tcp_function_lock);
 	return(blk);
 }
 
 struct tcp_function_block *
 find_and_ref_tcp_fb(struct tcp_function_block *blk)
 {
 	struct tcp_function_block *rblk;
 	
 	rw_rlock(&tcp_function_lock);	
 	rblk = find_tcp_fb_locked(blk, NULL);
 	if (rblk) 
 		refcount_acquire(&rblk->tfb_refcnt);
 	rw_runlock(&tcp_function_lock);
 	return(rblk);
 }
 
 
 static int
 sysctl_net_inet_default_tcp_functions(SYSCTL_HANDLER_ARGS)
 {
 	int error=ENOENT;
 	struct tcp_function_set fs;
 	struct tcp_function_block *blk;
 
 	memset(&fs, 0, sizeof(fs));
 	rw_rlock(&tcp_function_lock);
 	blk = find_tcp_fb_locked(tcp_func_set_ptr, NULL);
 	if (blk) {
 		/* Found him */
 		strcpy(fs.function_set_name, blk->tfb_tcp_block_name);
 		fs.pcbcnt = blk->tfb_refcnt;
 	}
 	rw_runlock(&tcp_function_lock);	
 	error = sysctl_handle_string(oidp, fs.function_set_name,
 				     sizeof(fs.function_set_name), req);
 
 	/* Check for error or no change */
 	if (error != 0 || req->newptr == NULL)
 		return(error);
 
 	rw_wlock(&tcp_function_lock);
 	blk = find_tcp_functions_locked(&fs);
 	if ((blk == NULL) ||
 	    (blk->tfb_flags & TCP_FUNC_BEING_REMOVED)) { 
 		error = ENOENT; 
 		goto done;
 	}
 	tcp_func_set_ptr = blk;
 done:
 	rw_wunlock(&tcp_function_lock);
 	return (error);
 }
 
 SYSCTL_PROC(_net_inet_tcp, OID_AUTO, functions_default,
 	    CTLTYPE_STRING | CTLFLAG_RW,
 	    NULL, 0, sysctl_net_inet_default_tcp_functions, "A",
 	    "Set/get the default TCP functions");
 
 static int
 sysctl_net_inet_list_available(SYSCTL_HANDLER_ARGS)
 {
 	int error, cnt, linesz;
 	struct tcp_function *f;
 	char *buffer, *cp;
 	size_t bufsz, outsz;
 	bool alias;
 
 	cnt = 0;
 	rw_rlock(&tcp_function_lock);
 	TAILQ_FOREACH(f, &t_functions, tf_next) {
 		cnt++;
 	}
 	rw_runlock(&tcp_function_lock);
 
 	bufsz = (cnt+2) * ((TCP_FUNCTION_NAME_LEN_MAX * 2) + 13) + 1;
 	buffer = malloc(bufsz, M_TEMP, M_WAITOK);
 
 	error = 0;
 	cp = buffer;
 
 	linesz = snprintf(cp, bufsz, "\n%-32s%c %-32s %s\n", "Stack", 'D',
 	    "Alias", "PCB count");
 	cp += linesz;
 	bufsz -= linesz;
 	outsz = linesz;
 
 	rw_rlock(&tcp_function_lock);	
 	TAILQ_FOREACH(f, &t_functions, tf_next) {
 		alias = (f->tf_name != f->tf_fb->tfb_tcp_block_name);
 		linesz = snprintf(cp, bufsz, "%-32s%c %-32s %u\n",
 		    f->tf_fb->tfb_tcp_block_name,
 		    (f->tf_fb == tcp_func_set_ptr) ? '*' : ' ',
 		    alias ? f->tf_name : "-",
 		    f->tf_fb->tfb_refcnt);
 		if (linesz >= bufsz) {
 			error = EOVERFLOW;
 			break;
 		}
 		cp += linesz;
 		bufsz -= linesz;
 		outsz += linesz;
 	}
 	rw_runlock(&tcp_function_lock);
 	if (error == 0)
 		error = sysctl_handle_string(oidp, buffer, outsz + 1, req);
 	free(buffer, M_TEMP);
 	return (error);
 }
 
 SYSCTL_PROC(_net_inet_tcp, OID_AUTO, functions_available,
 	    CTLTYPE_STRING|CTLFLAG_RD,
 	    NULL, 0, sysctl_net_inet_list_available, "A",
 	    "list available TCP Function sets");
 
 /*
  * Target size of TCP PCB hash tables. Must be a power of two.
  *
  * Note that this can be overridden by the kernel environment
  * variable net.inet.tcp.tcbhashsize
  */
 #ifndef TCBHASHSIZE
 #define TCBHASHSIZE	0
 #endif
 
 /*
  * XXX
  * Callouts should be moved into struct tcp directly.  They are currently
  * separate because the tcpcb structure is exported to userland for sysctl
  * parsing purposes, which do not know about callouts.
  */
 struct tcpcb_mem {
 	struct	tcpcb		tcb;
 	struct	tcp_timer	tt;
 	struct	cc_var		ccv;
 #ifdef TCP_HHOOK
 	struct	osd		osd;
 #endif
 };
 
 static VNET_DEFINE(uma_zone_t, tcpcb_zone);
 #define	V_tcpcb_zone			VNET(tcpcb_zone)
 
 MALLOC_DEFINE(M_TCPLOG, "tcplog", "TCP address and flags print buffers");
 MALLOC_DEFINE(M_TCPFUNCTIONS, "tcpfunc", "TCP function set memory");
 
 static struct mtx isn_mtx;
 
 #define	ISN_LOCK_INIT()	mtx_init(&isn_mtx, "isn_mtx", NULL, MTX_DEF)
 #define	ISN_LOCK()	mtx_lock(&isn_mtx)
 #define	ISN_UNLOCK()	mtx_unlock(&isn_mtx)
 
 /*
  * TCP initialization.
  */
 static void
 tcp_zone_change(void *tag)
 {
 
 	uma_zone_set_max(V_tcbinfo.ipi_zone, maxsockets);
 	uma_zone_set_max(V_tcpcb_zone, maxsockets);
 	tcp_tw_zone_change();
 }
 
 static int
 tcp_inpcb_init(void *mem, int size, int flags)
 {
 	struct inpcb *inp = mem;
 
 	INP_LOCK_INIT(inp, "inp", "tcpinp");
 	return (0);
 }
 
 /*
  * Take a value and get the next power of 2 that doesn't overflow.
  * Used to size the tcp_inpcb hash buckets.
  */
 static int
 maketcp_hashsize(int size)
 {
 	int hashsize;
 
 	/*
 	 * auto tune.
 	 * get the next power of 2 higher than maxsockets.
 	 */
 	hashsize = 1 << fls(size);
 	/* catch overflow, and just go one power of 2 smaller */
 	if (hashsize < size) {
 		hashsize = 1 << (fls(size) - 1);
 	}
 	return (hashsize);
 }
 
 /*
  * Register a TCP function block with the name provided in the names
  * array.  (Note that this function does NOT automatically register
  * blk->tfb_tcp_block_name as a stack name.  Therefore, you should
  * explicitly include blk->tfb_tcp_block_name in the list of names if
  * you wish to register the stack with that name.)
  *
  * Either all name registrations will succeed or all will fail.  If
  * a name registration fails, the function will update the num_names
  * argument to point to the array index of the name that encountered
  * the failure.
  *
  * Returns 0 on success, or an error code on failure.
  */
 int
 register_tcp_functions_as_names(struct tcp_function_block *blk, int wait,
     const char *names[], int *num_names)
 {
 	struct tcp_function *n;
 	struct tcp_function_set fs;
 	int error, i;
 
 	KASSERT(names != NULL && *num_names > 0,
 	    ("%s: Called with 0-length name list", __func__));
 	KASSERT(names != NULL, ("%s: Called with NULL name list", __func__));
 
 	if (t_functions_inited == 0) {
 		init_tcp_functions();
 	}
 	if ((blk->tfb_tcp_output == NULL) ||
 	    (blk->tfb_tcp_do_segment == NULL) ||
 	    (blk->tfb_tcp_ctloutput == NULL) ||
 	    (strlen(blk->tfb_tcp_block_name) == 0)) {
 		/* 
 		 * These functions are required and you
 		 * need a name.
 		 */
 		*num_names = 0;
 		return (EINVAL);
 	}
 	if (blk->tfb_tcp_timer_stop_all ||
 	    blk->tfb_tcp_timer_activate ||
 	    blk->tfb_tcp_timer_active ||
 	    blk->tfb_tcp_timer_stop) {
 		/*
 		 * If you define one timer function you 
 		 * must have them all.
 		 */
 		if ((blk->tfb_tcp_timer_stop_all == NULL) ||
 		    (blk->tfb_tcp_timer_activate == NULL) ||
 		    (blk->tfb_tcp_timer_active == NULL) ||
 		    (blk->tfb_tcp_timer_stop == NULL)) {
 			*num_names = 0;
 			return (EINVAL);
 		}
 	}
 
 	refcount_init(&blk->tfb_refcnt, 0);
 	blk->tfb_flags = 0;
 	for (i = 0; i < *num_names; i++) {
 		n = malloc(sizeof(struct tcp_function), M_TCPFUNCTIONS, wait);
 		if (n == NULL) {
 			error = ENOMEM;
 			goto cleanup;
 		}
 		n->tf_fb = blk;
 
 		(void)strncpy(fs.function_set_name, names[i],
 		    TCP_FUNCTION_NAME_LEN_MAX);
 		fs.function_set_name[TCP_FUNCTION_NAME_LEN_MAX - 1] = '\0';
 		rw_wlock(&tcp_function_lock);
 		if (find_tcp_functions_locked(&fs) != NULL) {
 			/* Duplicate name space not allowed */
 			rw_wunlock(&tcp_function_lock);
 			free(n, M_TCPFUNCTIONS);
 			error = EALREADY;
 			goto cleanup;
 		}
 		(void)strncpy(n->tf_name, names[i], TCP_FUNCTION_NAME_LEN_MAX);
 		n->tf_name[TCP_FUNCTION_NAME_LEN_MAX - 1] = '\0';
 		TAILQ_INSERT_TAIL(&t_functions, n, tf_next);
 		rw_wunlock(&tcp_function_lock);
 	}
 	return(0);
 
 cleanup:
 	/*
 	 * Deregister the names we just added. Because registration failed
 	 * for names[i], we don't need to deregister that name.
 	 */
 	*num_names = i;
 	rw_wlock(&tcp_function_lock);
 	while (--i >= 0) {
 		TAILQ_FOREACH(n, &t_functions, tf_next) {
 			if (!strncmp(n->tf_name, names[i],
 			    TCP_FUNCTION_NAME_LEN_MAX)) {
 				TAILQ_REMOVE(&t_functions, n, tf_next);
 				n->tf_fb = NULL;
 				free(n, M_TCPFUNCTIONS);
 				break;
 			}
 		}
 	}
 	rw_wunlock(&tcp_function_lock);
 	return (error);
 }
 
 /*
  * Register a TCP function block using the name provided in the name
  * argument.
  *
  * Returns 0 on success, or an error code on failure.
  */
 int
 register_tcp_functions_as_name(struct tcp_function_block *blk, const char *name,
     int wait)
 {
 	const char *name_list[1];
 	int num_names, rv;
 
 	num_names = 1;
 	if (name != NULL)
 		name_list[0] = name;
 	else
 		name_list[0] = blk->tfb_tcp_block_name;
 	rv = register_tcp_functions_as_names(blk, wait, name_list, &num_names);
 	return (rv);
 }
 
 /*
  * Register a TCP function block using the name defined in
  * blk->tfb_tcp_block_name.
  *
  * Returns 0 on success, or an error code on failure.
  */
 int
 register_tcp_functions(struct tcp_function_block *blk, int wait)
 {
 
 	return (register_tcp_functions_as_name(blk, NULL, wait));
 }
 
 int
 deregister_tcp_functions(struct tcp_function_block *blk)
 {
 	struct tcp_function *f;
 	int error=ENOENT;
 	
 	if (strcmp(blk->tfb_tcp_block_name, "default") == 0) {
 		/* You can't un-register the default */
 		return (EPERM);
 	}
 	rw_wlock(&tcp_function_lock);
 	if (blk == tcp_func_set_ptr) {
 		/* You can't free the current default */
 		rw_wunlock(&tcp_function_lock);
 		return (EBUSY);
 	}
 	if (blk->tfb_refcnt) {
 		/* Still tcb attached, mark it. */
 		blk->tfb_flags |= TCP_FUNC_BEING_REMOVED;
 		rw_wunlock(&tcp_function_lock);		
 		return (EBUSY);
 	}
 	while (find_tcp_fb_locked(blk, &f) != NULL) {
 		/* Found */
 		TAILQ_REMOVE(&t_functions, f, tf_next);
 		f->tf_fb = NULL;
 		free(f, M_TCPFUNCTIONS);
 		error = 0;
 	}
 	rw_wunlock(&tcp_function_lock);
 	return (error);
 }
 
 void
 tcp_init(void)
 {
 	const char *tcbhash_tuneable;
 	int hashsize;
 
 	tcbhash_tuneable = "net.inet.tcp.tcbhashsize";
 
 #ifdef TCP_HHOOK
 	if (hhook_head_register(HHOOK_TYPE_TCP, HHOOK_TCP_EST_IN,
 	    &V_tcp_hhh[HHOOK_TCP_EST_IN], HHOOK_NOWAIT|HHOOK_HEADISINVNET) != 0)
 		printf("%s: WARNING: unable to register helper hook\n", __func__);
 	if (hhook_head_register(HHOOK_TYPE_TCP, HHOOK_TCP_EST_OUT,
 	    &V_tcp_hhh[HHOOK_TCP_EST_OUT], HHOOK_NOWAIT|HHOOK_HEADISINVNET) != 0)
 		printf("%s: WARNING: unable to register helper hook\n", __func__);
 #endif
 	hashsize = TCBHASHSIZE;
 	TUNABLE_INT_FETCH(tcbhash_tuneable, &hashsize);
 	if (hashsize == 0) {
 		/*
 		 * Auto tune the hash size based on maxsockets.
 		 * A perfect hash would have a 1:1 mapping
 		 * (hashsize = maxsockets) however it's been
 		 * suggested that O(2) average is better.
 		 */
 		hashsize = maketcp_hashsize(maxsockets / 4);
 		/*
 		 * Our historical default is 512,
 		 * do not autotune lower than this.
 		 */
 		if (hashsize < 512)
 			hashsize = 512;
 		if (bootverbose && IS_DEFAULT_VNET(curvnet))
 			printf("%s: %s auto tuned to %d\n", __func__,
 			    tcbhash_tuneable, hashsize);
 	}
 	/*
 	 * We require a hashsize to be a power of two.
 	 * Previously if it was not a power of two we would just reset it
 	 * back to 512, which could be a nasty surprise if you did not notice
 	 * the error message.
 	 * Instead what we do is clip it to the closest power of two lower
 	 * than the specified hash value.
 	 */
 	if (!powerof2(hashsize)) {
 		int oldhashsize = hashsize;
 
 		hashsize = maketcp_hashsize(hashsize);
 		/* prevent absurdly low value */
 		if (hashsize < 16)
 			hashsize = 16;
 		printf("%s: WARNING: TCB hash size not a power of 2, "
 		    "clipped from %d to %d.\n", __func__, oldhashsize,
 		    hashsize);
 	}
 	in_pcbinfo_init(&V_tcbinfo, "tcp", &V_tcb, hashsize, hashsize,
 	    "tcp_inpcb", tcp_inpcb_init, IPI_HASHFIELDS_4TUPLE);
 
 	/*
 	 * These have to be type stable for the benefit of the timers.
 	 */
 	V_tcpcb_zone = uma_zcreate("tcpcb", sizeof(struct tcpcb_mem),
 	    NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, 0);
 	uma_zone_set_max(V_tcpcb_zone, maxsockets);
 	uma_zone_set_warning(V_tcpcb_zone, "kern.ipc.maxsockets limit reached");
 
 	tcp_tw_init();
 	syncache_init();
 	tcp_hc_init();
 
 	TUNABLE_INT_FETCH("net.inet.tcp.sack.enable", &V_tcp_do_sack);
 	V_sack_hole_zone = uma_zcreate("sackhole", sizeof(struct sackhole),
 	    NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, 0);
 
 #ifdef TCP_RFC7413
 	tcp_fastopen_init();
 #endif
 
 	/* Skip initialization of globals for non-default instances. */
 	if (!IS_DEFAULT_VNET(curvnet))
 		return;
 
 	tcp_reass_global_init();
 
 	/* XXX virtualize those bellow? */
 	tcp_delacktime = TCPTV_DELACK;
 	tcp_keepinit = TCPTV_KEEP_INIT;
 	tcp_keepidle = TCPTV_KEEP_IDLE;
 	tcp_keepintvl = TCPTV_KEEPINTVL;
 	tcp_maxpersistidle = TCPTV_KEEP_IDLE;
 	tcp_msl = TCPTV_MSL;
 	tcp_rexmit_min = TCPTV_MIN;
 	if (tcp_rexmit_min < 1)
 		tcp_rexmit_min = 1;
 	tcp_persmin = TCPTV_PERSMIN;
 	tcp_persmax = TCPTV_PERSMAX;
 	tcp_rexmit_slop = TCPTV_CPU_VAR;
 	tcp_finwait2_timeout = TCPTV_FINWAIT2_TIMEOUT;
 	tcp_tcbhashsize = hashsize;
 	/* Setup the tcp function block list */
 	init_tcp_functions();
 	register_tcp_functions(&tcp_def_funcblk, M_WAITOK);
 
 	if (tcp_soreceive_stream) {
 #ifdef INET
 		tcp_usrreqs.pru_soreceive = soreceive_stream;
 #endif
 #ifdef INET6
 		tcp6_usrreqs.pru_soreceive = soreceive_stream;
 #endif /* INET6 */
 	}
 
 #ifdef INET6
 #define TCP_MINPROTOHDR (sizeof(struct ip6_hdr) + sizeof(struct tcphdr))
 #else /* INET6 */
 #define TCP_MINPROTOHDR (sizeof(struct tcpiphdr))
 #endif /* INET6 */
 	if (max_protohdr < TCP_MINPROTOHDR)
 		max_protohdr = TCP_MINPROTOHDR;
 	if (max_linkhdr + TCP_MINPROTOHDR > MHLEN)
 		panic("tcp_init");
 #undef TCP_MINPROTOHDR
 
 	ISN_LOCK_INIT();
 	EVENTHANDLER_REGISTER(shutdown_pre_sync, tcp_fini, NULL,
 		SHUTDOWN_PRI_DEFAULT);
 	EVENTHANDLER_REGISTER(maxsockets_change, tcp_zone_change, NULL,
 		EVENTHANDLER_PRI_ANY);
 #ifdef TCPPCAP
 	tcp_pcap_init();
 #endif
 }
 
 #ifdef VIMAGE
 static void
 tcp_destroy(void *unused __unused)
 {
 	int n;
 #ifdef TCP_HHOOK
 	int error;
 #endif
 
 	/*
 	 * All our processes are gone, all our sockets should be cleaned
 	 * up, which means, we should be past the tcp_discardcb() calls.
 	 * Sleep to let all tcpcb timers really disappear and cleanup.
 	 */
 	for (;;) {
 		INP_LIST_RLOCK(&V_tcbinfo);
 		n = V_tcbinfo.ipi_count;
 		INP_LIST_RUNLOCK(&V_tcbinfo);
 		if (n == 0)
 			break;
 		pause("tcpdes", hz / 10);
 	}
 	tcp_hc_destroy();
 	syncache_destroy();
 	tcp_tw_destroy();
 	in_pcbinfo_destroy(&V_tcbinfo);
 	/* tcp_discardcb() clears the sack_holes up. */
 	uma_zdestroy(V_sack_hole_zone);
 	uma_zdestroy(V_tcpcb_zone);
 
 #ifdef TCP_RFC7413
 	/*
 	 * Cannot free the zone until all tcpcbs are released as we attach
 	 * the allocations to them.
 	 */
 	tcp_fastopen_destroy();
 #endif
 
 #ifdef TCP_HHOOK
 	error = hhook_head_deregister(V_tcp_hhh[HHOOK_TCP_EST_IN]);
 	if (error != 0) {
 		printf("%s: WARNING: unable to deregister helper hook "
 		    "type=%d, id=%d: error %d returned\n", __func__,
 		    HHOOK_TYPE_TCP, HHOOK_TCP_EST_IN, error);
 	}
 	error = hhook_head_deregister(V_tcp_hhh[HHOOK_TCP_EST_OUT]);
 	if (error != 0) {
 		printf("%s: WARNING: unable to deregister helper hook "
 		    "type=%d, id=%d: error %d returned\n", __func__,
 		    HHOOK_TYPE_TCP, HHOOK_TCP_EST_OUT, error);
 	}
 #endif
 }
 VNET_SYSUNINIT(tcp, SI_SUB_PROTO_DOMAIN, SI_ORDER_FOURTH, tcp_destroy, NULL);
 #endif
 
 void
 tcp_fini(void *xtp)
 {
 
 }
 
 /*
  * Fill in the IP and TCP headers for an outgoing packet, given the tcpcb.
  * tcp_template used to store this data in mbufs, but we now recopy it out
  * of the tcpcb each time to conserve mbufs.
  */
 void
 tcpip_fillheaders(struct inpcb *inp, void *ip_ptr, void *tcp_ptr)
 {
 	struct tcphdr *th = (struct tcphdr *)tcp_ptr;
 
 	INP_WLOCK_ASSERT(inp);
 
 #ifdef INET6
 	if ((inp->inp_vflag & INP_IPV6) != 0) {
 		struct ip6_hdr *ip6;
 
 		ip6 = (struct ip6_hdr *)ip_ptr;
 		ip6->ip6_flow = (ip6->ip6_flow & ~IPV6_FLOWINFO_MASK) |
 			(inp->inp_flow & IPV6_FLOWINFO_MASK);
 		ip6->ip6_vfc = (ip6->ip6_vfc & ~IPV6_VERSION_MASK) |
 			(IPV6_VERSION & IPV6_VERSION_MASK);
 		ip6->ip6_nxt = IPPROTO_TCP;
 		ip6->ip6_plen = htons(sizeof(struct tcphdr));
 		ip6->ip6_src = inp->in6p_laddr;
 		ip6->ip6_dst = inp->in6p_faddr;
 	}
 #endif /* INET6 */
 #if defined(INET6) && defined(INET)
 	else
 #endif
 #ifdef INET
 	{
 		struct ip *ip;
 
 		ip = (struct ip *)ip_ptr;
 		ip->ip_v = IPVERSION;
 		ip->ip_hl = 5;
 		ip->ip_tos = inp->inp_ip_tos;
 		ip->ip_len = 0;
 		ip->ip_id = 0;
 		ip->ip_off = 0;
 		ip->ip_ttl = inp->inp_ip_ttl;
 		ip->ip_sum = 0;
 		ip->ip_p = IPPROTO_TCP;
 		ip->ip_src = inp->inp_laddr;
 		ip->ip_dst = inp->inp_faddr;
 	}
 #endif /* INET */
 	th->th_sport = inp->inp_lport;
 	th->th_dport = inp->inp_fport;
 	th->th_seq = 0;
 	th->th_ack = 0;
 	th->th_x2 = 0;
 	th->th_off = 5;
 	th->th_flags = 0;
 	th->th_win = 0;
 	th->th_urp = 0;
 	th->th_sum = 0;		/* in_pseudo() is called later for ipv4 */
 }
 
 /*
  * Create template to be used to send tcp packets on a connection.
  * Allocates an mbuf and fills in a skeletal tcp/ip header.  The only
  * use for this function is in keepalives, which use tcp_respond.
  */
 struct tcptemp *
 tcpip_maketemplate(struct inpcb *inp)
 {
 	struct tcptemp *t;
 
 	t = malloc(sizeof(*t), M_TEMP, M_NOWAIT);
 	if (t == NULL)
 		return (NULL);
 	tcpip_fillheaders(inp, (void *)&t->tt_ipgen, (void *)&t->tt_t);
 	return (t);
 }
 
 /*
  * Send a single message to the TCP at address specified by
  * the given TCP/IP header.  If m == NULL, then we make a copy
  * of the tcpiphdr at th and send directly to the addressed host.
  * This is used to force keep alive messages out using the TCP
  * template for a connection.  If flags are given then we send
  * a message back to the TCP which originated the segment th,
  * and discard the mbuf containing it and any other attached mbufs.
  *
  * In any case the ack and sequence number of the transmitted
  * segment are as specified by the parameters.
  *
  * NOTE: If m != NULL, then th must point to *inside* the mbuf.
  */
 void
 tcp_respond(struct tcpcb *tp, void *ipgen, struct tcphdr *th, struct mbuf *m,
     tcp_seq ack, tcp_seq seq, int flags)
 {
 	struct tcpopt to;
 	struct inpcb *inp;
 	struct ip *ip;
 	struct mbuf *optm;
 	struct tcphdr *nth;
 	u_char *optp;
 #ifdef INET6
 	struct ip6_hdr *ip6;
 	int isipv6;
 #endif /* INET6 */
 	int optlen, tlen, win;
 	bool incl_opts;
 
 	KASSERT(tp != NULL || m != NULL, ("tcp_respond: tp and m both NULL"));
 
 #ifdef INET6
 	isipv6 = ((struct ip *)ipgen)->ip_v == (IPV6_VERSION >> 4);
 	ip6 = ipgen;
 #endif /* INET6 */
 	ip = ipgen;
 
 	if (tp != NULL) {
 		inp = tp->t_inpcb;
 		KASSERT(inp != NULL, ("tcp control block w/o inpcb"));
 		INP_WLOCK_ASSERT(inp);
 	} else
 		inp = NULL;
 
 	incl_opts = false;
 	win = 0;
 	if (tp != NULL) {
 		if (!(flags & TH_RST)) {
 			win = sbspace(&inp->inp_socket->so_rcv);
 			if (win > TCP_MAXWIN << tp->rcv_scale)
 				win = TCP_MAXWIN << tp->rcv_scale;
 		}
 		if ((tp->t_flags & TF_NOOPT) == 0)
 			incl_opts = true;
 	}
 	if (m == NULL) {
 		m = m_gethdr(M_NOWAIT, MT_DATA);
 		if (m == NULL)
 			return;
 		m->m_data += max_linkhdr;
 #ifdef INET6
 		if (isipv6) {
 			bcopy((caddr_t)ip6, mtod(m, caddr_t),
 			      sizeof(struct ip6_hdr));
 			ip6 = mtod(m, struct ip6_hdr *);
 			nth = (struct tcphdr *)(ip6 + 1);
 		} else
 #endif /* INET6 */
 		{
 			bcopy((caddr_t)ip, mtod(m, caddr_t), sizeof(struct ip));
 			ip = mtod(m, struct ip *);
 			nth = (struct tcphdr *)(ip + 1);
 		}
 		bcopy((caddr_t)th, (caddr_t)nth, sizeof(struct tcphdr));
 		flags = TH_ACK;
 	} else if (!M_WRITABLE(m)) {
 		struct mbuf *n;
 
 		/* Can't reuse 'm', allocate a new mbuf. */
 		n = m_gethdr(M_NOWAIT, MT_DATA);
 		if (n == NULL) {
 			m_freem(m);
 			return;
 		}
 
 		if (!m_dup_pkthdr(n, m, M_NOWAIT)) {
 			m_freem(m);
 			m_freem(n);
 			return;
 		}
 
 		n->m_data += max_linkhdr;
 		/* m_len is set later */
 #define xchg(a,b,type) { type t; t=a; a=b; b=t; }
 #ifdef INET6
 		if (isipv6) {
 			bcopy((caddr_t)ip6, mtod(n, caddr_t),
 			      sizeof(struct ip6_hdr));
 			ip6 = mtod(n, struct ip6_hdr *);
 			xchg(ip6->ip6_dst, ip6->ip6_src, struct in6_addr);
 			nth = (struct tcphdr *)(ip6 + 1);
 		} else
 #endif /* INET6 */
 		{
 			bcopy((caddr_t)ip, mtod(n, caddr_t), sizeof(struct ip));
 			ip = mtod(n, struct ip *);
 			xchg(ip->ip_dst.s_addr, ip->ip_src.s_addr, uint32_t);
 			nth = (struct tcphdr *)(ip + 1);
 		}
 		bcopy((caddr_t)th, (caddr_t)nth, sizeof(struct tcphdr));
 		xchg(nth->th_dport, nth->th_sport, uint16_t);
 		th = nth;
 		m_freem(m);
 		m = n;
 	} else {
 		/*
 		 *  reuse the mbuf. 
 		 * XXX MRT We inherit the FIB, which is lucky.
 		 */
 		m_freem(m->m_next);
 		m->m_next = NULL;
 		m->m_data = (caddr_t)ipgen;
 		/* m_len is set later */
 #ifdef INET6
 		if (isipv6) {
 			xchg(ip6->ip6_dst, ip6->ip6_src, struct in6_addr);
 			nth = (struct tcphdr *)(ip6 + 1);
 		} else
 #endif /* INET6 */
 		{
 			xchg(ip->ip_dst.s_addr, ip->ip_src.s_addr, uint32_t);
 			nth = (struct tcphdr *)(ip + 1);
 		}
 		if (th != nth) {
 			/*
 			 * this is usually a case when an extension header
 			 * exists between the IPv6 header and the
 			 * TCP header.
 			 */
 			nth->th_sport = th->th_sport;
 			nth->th_dport = th->th_dport;
 		}
 		xchg(nth->th_dport, nth->th_sport, uint16_t);
 #undef xchg
 	}
 	tlen = 0;
 #ifdef INET6
 	if (isipv6)
 		tlen = sizeof (struct ip6_hdr) + sizeof (struct tcphdr);
 #endif
 #if defined(INET) && defined(INET6)
 	else
 #endif
 #ifdef INET
 		tlen = sizeof (struct tcpiphdr);
 #endif
 #ifdef INVARIANTS
 	m->m_len = 0;
 	KASSERT(M_TRAILINGSPACE(m) >= tlen,
 	    ("Not enough trailing space for message (m=%p, need=%d, have=%ld)",
 	    m, tlen, (long)M_TRAILINGSPACE(m)));
 #endif
 	m->m_len = tlen;
 	to.to_flags = 0;
 	if (incl_opts) {
 		/* Make sure we have room. */
 		if (M_TRAILINGSPACE(m) < TCP_MAXOLEN) {
 			m->m_next = m_get(M_NOWAIT, MT_DATA);
 			if (m->m_next) {
 				optp = mtod(m->m_next, u_char *);
 				optm = m->m_next;
 			} else
 				incl_opts = false;
 		} else {
 			optp = (u_char *) (nth + 1);
 			optm = m;
 		}
 	}
 	if (incl_opts) {
 		/* Timestamps. */
 		if (tp->t_flags & TF_RCVD_TSTMP) {
 			to.to_tsval = tcp_ts_getticks() + tp->ts_offset;
 			to.to_tsecr = tp->ts_recent;
 			to.to_flags |= TOF_TS;
 		}
 #if defined(IPSEC_SUPPORT) || defined(TCP_SIGNATURE)
 		/* TCP-MD5 (RFC2385). */
 		if (tp->t_flags & TF_SIGNATURE)
 			to.to_flags |= TOF_SIGNATURE;
 #endif
 		/* Add the options. */
 		tlen += optlen = tcp_addoptions(&to, optp);
 
 		/* Update m_len in the correct mbuf. */
 		optm->m_len += optlen;
 	} else
 		optlen = 0;
 #ifdef INET6
 	if (isipv6) {
 		ip6->ip6_flow = 0;
 		ip6->ip6_vfc = IPV6_VERSION;
 		ip6->ip6_nxt = IPPROTO_TCP;
 		ip6->ip6_plen = htons(tlen - sizeof(*ip6));
 	}
 #endif
 #if defined(INET) && defined(INET6)
 	else
 #endif
 #ifdef INET
 	{
 		ip->ip_len = htons(tlen);
 		ip->ip_ttl = V_ip_defttl;
 		if (V_path_mtu_discovery)
 			ip->ip_off |= htons(IP_DF);
 	}
 #endif
 	m->m_pkthdr.len = tlen;
 	m->m_pkthdr.rcvif = NULL;
 #ifdef MAC
 	if (inp != NULL) {
 		/*
 		 * Packet is associated with a socket, so allow the
 		 * label of the response to reflect the socket label.
 		 */
 		INP_WLOCK_ASSERT(inp);
 		mac_inpcb_create_mbuf(inp, m);
 	} else {
 		/*
 		 * Packet is not associated with a socket, so possibly
 		 * update the label in place.
 		 */
 		mac_netinet_tcp_reply(m);
 	}
 #endif
 	nth->th_seq = htonl(seq);
 	nth->th_ack = htonl(ack);
 	nth->th_x2 = 0;
 	nth->th_off = (sizeof (struct tcphdr) + optlen) >> 2;
 	nth->th_flags = flags;
 	if (tp != NULL)
 		nth->th_win = htons((u_short) (win >> tp->rcv_scale));
 	else
 		nth->th_win = htons((u_short)win);
 	nth->th_urp = 0;
 
 #if defined(IPSEC_SUPPORT) || defined(TCP_SIGNATURE)
 	if (to.to_flags & TOF_SIGNATURE) {
 		if (!TCPMD5_ENABLED() ||
 		    TCPMD5_OUTPUT(m, nth, to.to_signature) != 0) {
 			m_freem(m);
 			return;
 		}
 	}
 #endif
 
 	m->m_pkthdr.csum_data = offsetof(struct tcphdr, th_sum);
 #ifdef INET6
 	if (isipv6) {
 		m->m_pkthdr.csum_flags = CSUM_TCP_IPV6;
 		nth->th_sum = in6_cksum_pseudo(ip6,
 		    tlen - sizeof(struct ip6_hdr), IPPROTO_TCP, 0);
 		ip6->ip6_hlim = in6_selecthlim(tp != NULL ? tp->t_inpcb :
 		    NULL, NULL);
 	}
 #endif /* INET6 */
 #if defined(INET6) && defined(INET)
 	else
 #endif
 #ifdef INET
 	{
 		m->m_pkthdr.csum_flags = CSUM_TCP;
 		nth->th_sum = in_pseudo(ip->ip_src.s_addr, ip->ip_dst.s_addr,
 		    htons((u_short)(tlen - sizeof(struct ip) + ip->ip_p)));
 	}
 #endif /* INET */
 #ifdef TCPDEBUG
 	if (tp == NULL || (inp->inp_socket->so_options & SO_DEBUG))
 		tcp_trace(TA_OUTPUT, 0, tp, mtod(m, void *), th, 0);
 #endif
 	TCP_PROBE3(debug__output, tp, th, m);
 	if (flags & TH_RST)
 		TCP_PROBE5(accept__refused, NULL, NULL, m, tp, nth);
 
 	TCP_PROBE5(send, NULL, tp, m, tp, nth);
 #ifdef INET6
 	if (isipv6)
 		(void) ip6_output(m, NULL, NULL, 0, NULL, NULL, inp);
 #endif /* INET6 */
 #if defined(INET) && defined(INET6)
 	else
 #endif
 #ifdef INET
 		(void) ip_output(m, NULL, NULL, 0, NULL, inp);
 #endif
 }
 
 /*
  * Create a new TCP control block, making an
  * empty reassembly queue and hooking it to the argument
  * protocol control block.  The `inp' parameter must have
  * come from the zone allocator set up in tcp_init().
  */
 struct tcpcb *
 tcp_newtcpcb(struct inpcb *inp)
 {
 	struct tcpcb_mem *tm;
 	struct tcpcb *tp;
 #ifdef INET6
 	int isipv6 = (inp->inp_vflag & INP_IPV6) != 0;
 #endif /* INET6 */
 
 	tm = uma_zalloc(V_tcpcb_zone, M_NOWAIT | M_ZERO);
 	if (tm == NULL)
 		return (NULL);
 	tp = &tm->tcb;
 
 	/* Initialise cc_var struct for this tcpcb. */
 	tp->ccv = &tm->ccv;
 	tp->ccv->type = IPPROTO_TCP;
 	tp->ccv->ccvc.tcp = tp;
 	rw_rlock(&tcp_function_lock);
 	tp->t_fb = tcp_func_set_ptr;
 	refcount_acquire(&tp->t_fb->tfb_refcnt);
 	rw_runlock(&tcp_function_lock);
 	/*
 	 * Use the current system default CC algorithm.
 	 */
 	CC_LIST_RLOCK();
 	KASSERT(!STAILQ_EMPTY(&cc_list), ("cc_list is empty!"));
 	CC_ALGO(tp) = CC_DEFAULT();
 	CC_LIST_RUNLOCK();
 
 	if (CC_ALGO(tp)->cb_init != NULL)
 		if (CC_ALGO(tp)->cb_init(tp->ccv) > 0) {
 			if (tp->t_fb->tfb_tcp_fb_fini)
 				(*tp->t_fb->tfb_tcp_fb_fini)(tp, 1);
 			refcount_release(&tp->t_fb->tfb_refcnt);
 			uma_zfree(V_tcpcb_zone, tm);
 			return (NULL);
 		}
 
 #ifdef TCP_HHOOK
 	tp->osd = &tm->osd;
 	if (khelp_init_osd(HELPER_CLASS_TCP, tp->osd)) {
 		if (tp->t_fb->tfb_tcp_fb_fini)
 			(*tp->t_fb->tfb_tcp_fb_fini)(tp, 1);
 		refcount_release(&tp->t_fb->tfb_refcnt);
 		uma_zfree(V_tcpcb_zone, tm);
 		return (NULL);
 	}
 #endif
 
 #ifdef VIMAGE
 	tp->t_vnet = inp->inp_vnet;
 #endif
 	tp->t_timers = &tm->tt;
 	/*	LIST_INIT(&tp->t_segq); */	/* XXX covered by M_ZERO */
 	tp->t_maxseg =
 #ifdef INET6
 		isipv6 ? V_tcp_v6mssdflt :
 #endif /* INET6 */
 		V_tcp_mssdflt;
 
 	/* Set up our timeouts. */
 	callout_init(&tp->t_timers->tt_rexmt, 1);
 	callout_init(&tp->t_timers->tt_persist, 1);
 	callout_init(&tp->t_timers->tt_keep, 1);
 	callout_init(&tp->t_timers->tt_2msl, 1);
 	callout_init(&tp->t_timers->tt_delack, 1);
 
 	if (V_tcp_do_rfc1323)
 		tp->t_flags = (TF_REQ_SCALE|TF_REQ_TSTMP);
 	if (V_tcp_do_sack)
 		tp->t_flags |= TF_SACK_PERMIT;
 	TAILQ_INIT(&tp->snd_holes);
 	/*
 	 * The tcpcb will hold a reference on its inpcb until tcp_discardcb()
 	 * is called.
 	 */
 	in_pcbref(inp);	/* Reference for tcpcb */
 	tp->t_inpcb = inp;
 
 	/*
 	 * Init srtt to TCPTV_SRTTBASE (0), so we can tell that we have no
 	 * rtt estimate.  Set rttvar so that srtt + 4 * rttvar gives
 	 * reasonable initial retransmit time.
 	 */
 	tp->t_srtt = TCPTV_SRTTBASE;
 	tp->t_rttvar = ((TCPTV_RTOBASE - TCPTV_SRTTBASE) << TCP_RTTVAR_SHIFT) / 4;
 	tp->t_rttmin = tcp_rexmit_min;
 	tp->t_rxtcur = TCPTV_RTOBASE;
 	tp->snd_cwnd = TCP_MAXWIN << TCP_MAX_WINSHIFT;
 	tp->snd_ssthresh = TCP_MAXWIN << TCP_MAX_WINSHIFT;
 	tp->t_rcvtime = ticks;
 	/*
 	 * IPv4 TTL initialization is necessary for an IPv6 socket as well,
 	 * because the socket may be bound to an IPv6 wildcard address,
 	 * which may match an IPv4-mapped IPv6 address.
 	 */
 	inp->inp_ip_ttl = V_ip_defttl;
 	inp->inp_ppcb = tp;
 #ifdef TCPPCAP
 	/*
 	 * Init the TCP PCAP queues.
 	 */
 	tcp_pcap_tcpcb_init(tp);
 #endif
 	if (tp->t_fb->tfb_tcp_fb_init) {
 		(*tp->t_fb->tfb_tcp_fb_init)(tp);
 	}
 	return (tp);		/* XXX */
 }
 
 /*
  * Switch the congestion control algorithm back to NewReno for any active
  * control blocks using an algorithm which is about to go away.
  * This ensures the CC framework can allow the unload to proceed without leaving
  * any dangling pointers which would trigger a panic.
  * Returning non-zero would inform the CC framework that something went wrong
  * and it would be unsafe to allow the unload to proceed. However, there is no
  * way for this to occur with this implementation so we always return zero.
  */
 int
 tcp_ccalgounload(struct cc_algo *unload_algo)
 {
 	struct cc_algo *tmpalgo;
 	struct inpcb *inp;
 	struct tcpcb *tp;
 	VNET_ITERATOR_DECL(vnet_iter);
 
 	/*
 	 * Check all active control blocks across all network stacks and change
 	 * any that are using "unload_algo" back to NewReno. If "unload_algo"
 	 * requires cleanup code to be run, call it.
 	 */
 	VNET_LIST_RLOCK();
 	VNET_FOREACH(vnet_iter) {
 		CURVNET_SET(vnet_iter);
 		INP_INFO_WLOCK(&V_tcbinfo);
 		/*
 		 * New connections already part way through being initialised
 		 * with the CC algo we're removing will not race with this code
 		 * because the INP_INFO_WLOCK is held during initialisation. We
 		 * therefore don't enter the loop below until the connection
 		 * list has stabilised.
 		 */
 		LIST_FOREACH(inp, &V_tcb, inp_list) {
 			INP_WLOCK(inp);
 			/* Important to skip tcptw structs. */
 			if (!(inp->inp_flags & INP_TIMEWAIT) &&
 			    (tp = intotcpcb(inp)) != NULL) {
 				/*
 				 * By holding INP_WLOCK here, we are assured
 				 * that the connection is not currently
 				 * executing inside the CC module's functions
 				 * i.e. it is safe to make the switch back to
 				 * NewReno.
 				 */
 				if (CC_ALGO(tp) == unload_algo) {
 					tmpalgo = CC_ALGO(tp);
 					/* NewReno does not require any init. */
 					CC_ALGO(tp) = &newreno_cc_algo;
 					if (tmpalgo->cb_destroy != NULL)
 						tmpalgo->cb_destroy(tp->ccv);
 				}
 			}
 			INP_WUNLOCK(inp);
 		}
 		INP_INFO_WUNLOCK(&V_tcbinfo);
 		CURVNET_RESTORE();
 	}
 	VNET_LIST_RUNLOCK();
 
 	return (0);
 }
 
 /*
  * Drop a TCP connection, reporting
  * the specified error.  If connection is synchronized,
  * then send a RST to peer.
  */
 struct tcpcb *
 tcp_drop(struct tcpcb *tp, int errno)
 {
 	struct socket *so = tp->t_inpcb->inp_socket;
 
 	INP_INFO_LOCK_ASSERT(&V_tcbinfo);
 	INP_WLOCK_ASSERT(tp->t_inpcb);
 
 	if (TCPS_HAVERCVDSYN(tp->t_state)) {
 		tcp_state_change(tp, TCPS_CLOSED);
 		(void) tp->t_fb->tfb_tcp_output(tp);
 		TCPSTAT_INC(tcps_drops);
 	} else
 		TCPSTAT_INC(tcps_conndrops);
 	if (errno == ETIMEDOUT && tp->t_softerror)
 		errno = tp->t_softerror;
 	so->so_error = errno;
 	return (tcp_close(tp));
 }
 
 void
 tcp_discardcb(struct tcpcb *tp)
 {
 	struct inpcb *inp = tp->t_inpcb;
 	struct socket *so = inp->inp_socket;
 #ifdef INET6
 	int isipv6 = (inp->inp_vflag & INP_IPV6) != 0;
 #endif /* INET6 */
 	int released;
 
 	INP_WLOCK_ASSERT(inp);
 
 	/*
 	 * Make sure that all of our timers are stopped before we delete the
 	 * PCB.
 	 *
 	 * If stopping a timer fails, we schedule a discard function in same
 	 * callout, and the last discard function called will take care of
 	 * deleting the tcpcb.
 	 */
 	tp->t_timers->tt_draincnt = 0;
 	tcp_timer_stop(tp, TT_REXMT);
 	tcp_timer_stop(tp, TT_PERSIST);
 	tcp_timer_stop(tp, TT_KEEP);
 	tcp_timer_stop(tp, TT_2MSL);
 	tcp_timer_stop(tp, TT_DELACK);
 	if (tp->t_fb->tfb_tcp_timer_stop_all) {
 		/* 
 		 * Call the stop-all function of the methods, 
 		 * this function should call the tcp_timer_stop()
 		 * method with each of the function specific timeouts.
 		 * That stop will be called via the tfb_tcp_timer_stop()
 		 * which should use the async drain function of the 
 		 * callout system (see tcp_var.h).
 		 */
 		tp->t_fb->tfb_tcp_timer_stop_all(tp);
 	}
 
 	/*
 	 * If we got enough samples through the srtt filter,
 	 * save the rtt and rttvar in the routing entry.
 	 * 'Enough' is arbitrarily defined as 4 rtt samples.
 	 * 4 samples is enough for the srtt filter to converge
 	 * to within enough % of the correct value; fewer samples
 	 * and we could save a bogus rtt. The danger is not high
 	 * as tcp quickly recovers from everything.
 	 * XXX: Works very well but needs some more statistics!
 	 */
 	if (tp->t_rttupdated >= 4) {
 		struct hc_metrics_lite metrics;
 		uint32_t ssthresh;
 
 		bzero(&metrics, sizeof(metrics));
 		/*
 		 * Update the ssthresh always when the conditions below
 		 * are satisfied. This gives us better new start value
 		 * for the congestion avoidance for new connections.
 		 * ssthresh is only set if packet loss occurred on a session.
 		 *
 		 * XXXRW: 'so' may be NULL here, and/or socket buffer may be
 		 * being torn down.  Ideally this code would not use 'so'.
 		 */
 		ssthresh = tp->snd_ssthresh;
 		if (ssthresh != 0 && ssthresh < so->so_snd.sb_hiwat / 2) {
 			/*
 			 * convert the limit from user data bytes to
 			 * packets then to packet data bytes.
 			 */
 			ssthresh = (ssthresh + tp->t_maxseg / 2) / tp->t_maxseg;
 			if (ssthresh < 2)
 				ssthresh = 2;
 			ssthresh *= (tp->t_maxseg +
 #ifdef INET6
 			    (isipv6 ? sizeof (struct ip6_hdr) +
 				sizeof (struct tcphdr) :
 #endif
 				sizeof (struct tcpiphdr)
 #ifdef INET6
 			    )
 #endif
 			    );
 		} else
 			ssthresh = 0;
 		metrics.rmx_ssthresh = ssthresh;
 
 		metrics.rmx_rtt = tp->t_srtt;
 		metrics.rmx_rttvar = tp->t_rttvar;
 		metrics.rmx_cwnd = tp->snd_cwnd;
 		metrics.rmx_sendpipe = 0;
 		metrics.rmx_recvpipe = 0;
 
 		tcp_hc_update(&inp->inp_inc, &metrics);
 	}
 
 	/* free the reassembly queue, if any */
 	tcp_reass_flush(tp);
 
 #ifdef TCP_OFFLOAD
 	/* Disconnect offload device, if any. */
 	if (tp->t_flags & TF_TOE)
 		tcp_offload_detach(tp);
 #endif
 		
 	tcp_free_sackholes(tp);
 
 #ifdef TCPPCAP
 	/* Free the TCP PCAP queues. */
 	tcp_pcap_drain(&(tp->t_inpkts));
 	tcp_pcap_drain(&(tp->t_outpkts));
 #endif
 
 	/* Allow the CC algorithm to clean up after itself. */
 	if (CC_ALGO(tp)->cb_destroy != NULL)
 		CC_ALGO(tp)->cb_destroy(tp->ccv);
 
 #ifdef TCP_HHOOK
 	khelp_destroy_osd(tp->osd);
 #endif
 
 	CC_ALGO(tp) = NULL;
 	inp->inp_ppcb = NULL;
 	if (tp->t_timers->tt_draincnt == 0) {
 		/* We own the last reference on tcpcb, let's free it. */
 		TCPSTATES_DEC(tp->t_state);
 		if (tp->t_fb->tfb_tcp_fb_fini)
 			(*tp->t_fb->tfb_tcp_fb_fini)(tp, 1);
 		refcount_release(&tp->t_fb->tfb_refcnt);
 		tp->t_inpcb = NULL;
 		uma_zfree(V_tcpcb_zone, tp);
 		released = in_pcbrele_wlocked(inp);
 		KASSERT(!released, ("%s: inp %p should not have been released "
 			"here", __func__, inp));
 	}
 }
 
 void
 tcp_timer_discard(void *ptp)
 {
 	struct inpcb *inp;
 	struct tcpcb *tp;
 	
 	tp = (struct tcpcb *)ptp;
 	CURVNET_SET(tp->t_vnet);
 	INP_INFO_RLOCK(&V_tcbinfo);
 	inp = tp->t_inpcb;
 	KASSERT(inp != NULL, ("%s: tp %p tp->t_inpcb == NULL",
 		__func__, tp));
 	INP_WLOCK(inp);
 	KASSERT((tp->t_timers->tt_flags & TT_STOPPED) != 0,
 		("%s: tcpcb has to be stopped here", __func__));
 	tp->t_timers->tt_draincnt--;
 	if (tp->t_timers->tt_draincnt == 0) {
 		/* We own the last reference on this tcpcb, let's free it. */
 		TCPSTATES_DEC(tp->t_state);
 		if (tp->t_fb->tfb_tcp_fb_fini)
 			(*tp->t_fb->tfb_tcp_fb_fini)(tp, 1);
 		refcount_release(&tp->t_fb->tfb_refcnt);
 		tp->t_inpcb = NULL;
 		uma_zfree(V_tcpcb_zone, tp);
 		if (in_pcbrele_wlocked(inp)) {
 			INP_INFO_RUNLOCK(&V_tcbinfo);
 			CURVNET_RESTORE();
 			return;
 		}
 	}
 	INP_WUNLOCK(inp);
 	INP_INFO_RUNLOCK(&V_tcbinfo);
 	CURVNET_RESTORE();
 }
 
 /*
  * Attempt to close a TCP control block, marking it as dropped, and freeing
  * the socket if we hold the only reference.
  */
 struct tcpcb *
 tcp_close(struct tcpcb *tp)
 {
 	struct inpcb *inp = tp->t_inpcb;
 	struct socket *so;
 
 	INP_INFO_LOCK_ASSERT(&V_tcbinfo);
 	INP_WLOCK_ASSERT(inp);
 
 #ifdef TCP_OFFLOAD
 	if (tp->t_state == TCPS_LISTEN)
 		tcp_offload_listen_stop(tp);
 #endif
 #ifdef TCP_RFC7413
 	/*
 	 * This releases the TFO pending counter resource for TFO listen
 	 * sockets as well as passively-created TFO sockets that transition
 	 * from SYN_RECEIVED to CLOSED.
 	 */
 	if (tp->t_tfo_pending) {
 		tcp_fastopen_decrement_counter(tp->t_tfo_pending);
 		tp->t_tfo_pending = NULL;
 	}
 #endif
 	in_pcbdrop(inp);
 	TCPSTAT_INC(tcps_closed);
 	if (tp->t_state != TCPS_CLOSED)
 		tcp_state_change(tp, TCPS_CLOSED);
 	KASSERT(inp->inp_socket != NULL, ("tcp_close: inp_socket NULL"));
 	so = inp->inp_socket;
 	soisdisconnected(so);
 	if (inp->inp_flags & INP_SOCKREF) {
 		KASSERT(so->so_state & SS_PROTOREF,
 		    ("tcp_close: !SS_PROTOREF"));
 		inp->inp_flags &= ~INP_SOCKREF;
 		INP_WUNLOCK(inp);
-		ACCEPT_LOCK();
 		SOCK_LOCK(so);
 		so->so_state &= ~SS_PROTOREF;
 		sofree(so);
 		return (NULL);
 	}
 	return (tp);
 }
 
 void
 tcp_drain(void)
 {
 	VNET_ITERATOR_DECL(vnet_iter);
 
 	if (!do_tcpdrain)
 		return;
 
 	VNET_LIST_RLOCK_NOSLEEP();
 	VNET_FOREACH(vnet_iter) {
 		CURVNET_SET(vnet_iter);
 		struct inpcb *inpb;
 		struct tcpcb *tcpb;
 
 	/*
 	 * Walk the tcpbs, if existing, and flush the reassembly queue,
 	 * if there is one...
 	 * XXX: The "Net/3" implementation doesn't imply that the TCP
 	 *      reassembly queue should be flushed, but in a situation
 	 *	where we're really low on mbufs, this is potentially
 	 *	useful.
 	 */
 		INP_INFO_WLOCK(&V_tcbinfo);
 		LIST_FOREACH(inpb, V_tcbinfo.ipi_listhead, inp_list) {
 			if (inpb->inp_flags & INP_TIMEWAIT)
 				continue;
 			INP_WLOCK(inpb);
 			if ((tcpb = intotcpcb(inpb)) != NULL) {
 				tcp_reass_flush(tcpb);
 				tcp_clean_sackreport(tcpb);
 #ifdef TCPPCAP
 				if (tcp_pcap_aggressive_free) {
 					/* Free the TCP PCAP queues. */
 					tcp_pcap_drain(&(tcpb->t_inpkts));
 					tcp_pcap_drain(&(tcpb->t_outpkts));
 				}
 #endif
 			}
 			INP_WUNLOCK(inpb);
 		}
 		INP_INFO_WUNLOCK(&V_tcbinfo);
 		CURVNET_RESTORE();
 	}
 	VNET_LIST_RUNLOCK_NOSLEEP();
 }
 
 /*
  * Notify a tcp user of an asynchronous error;
  * store error as soft error, but wake up user
  * (for now, won't do anything until can select for soft error).
  *
  * Do not wake up user since there currently is no mechanism for
  * reporting soft errors (yet - a kqueue filter may be added).
  */
 static struct inpcb *
 tcp_notify(struct inpcb *inp, int error)
 {
 	struct tcpcb *tp;
 
 	INP_INFO_LOCK_ASSERT(&V_tcbinfo);
 	INP_WLOCK_ASSERT(inp);
 
 	if ((inp->inp_flags & INP_TIMEWAIT) ||
 	    (inp->inp_flags & INP_DROPPED))
 		return (inp);
 
 	tp = intotcpcb(inp);
 	KASSERT(tp != NULL, ("tcp_notify: tp == NULL"));
 
 	/*
 	 * Ignore some errors if we are hooked up.
 	 * If connection hasn't completed, has retransmitted several times,
 	 * and receives a second error, give up now.  This is better
 	 * than waiting a long time to establish a connection that
 	 * can never complete.
 	 */
 	if (tp->t_state == TCPS_ESTABLISHED &&
 	    (error == EHOSTUNREACH || error == ENETUNREACH ||
 	     error == EHOSTDOWN)) {
 		if (inp->inp_route.ro_rt) {
 			RTFREE(inp->inp_route.ro_rt);
 			inp->inp_route.ro_rt = (struct rtentry *)NULL;
 		}
 		return (inp);
 	} else if (tp->t_state < TCPS_ESTABLISHED && tp->t_rxtshift > 3 &&
 	    tp->t_softerror) {
 		tp = tcp_drop(tp, error);
 		if (tp != NULL)
 			return (inp);
 		else
 			return (NULL);
 	} else {
 		tp->t_softerror = error;
 		return (inp);
 	}
 #if 0
 	wakeup( &so->so_timeo);
 	sorwakeup(so);
 	sowwakeup(so);
 #endif
 }
 
 static int
 tcp_pcblist(SYSCTL_HANDLER_ARGS)
 {
 	int error, i, m, n, pcb_count;
 	struct inpcb *inp, **inp_list;
 	inp_gen_t gencnt;
 	struct xinpgen xig;
 
 	/*
 	 * The process of preparing the TCB list is too time-consuming and
 	 * resource-intensive to repeat twice on every request.
 	 */
 	if (req->oldptr == NULL) {
 		n = V_tcbinfo.ipi_count +
 		    counter_u64_fetch(V_tcps_states[TCPS_SYN_RECEIVED]);
 		n += imax(n / 8, 10);
 		req->oldidx = 2 * (sizeof xig) + n * sizeof(struct xtcpcb);
 		return (0);
 	}
 
 	if (req->newptr != NULL)
 		return (EPERM);
 
 	/*
 	 * OK, now we're committed to doing something.
 	 */
 	INP_LIST_RLOCK(&V_tcbinfo);
 	gencnt = V_tcbinfo.ipi_gencnt;
 	n = V_tcbinfo.ipi_count;
 	INP_LIST_RUNLOCK(&V_tcbinfo);
 
 	m = counter_u64_fetch(V_tcps_states[TCPS_SYN_RECEIVED]);
 
 	error = sysctl_wire_old_buffer(req, 2 * (sizeof xig)
 		+ (n + m) * sizeof(struct xtcpcb));
 	if (error != 0)
 		return (error);
 
 	xig.xig_len = sizeof xig;
 	xig.xig_count = n + m;
 	xig.xig_gen = gencnt;
 	xig.xig_sogen = so_gencnt;
 	error = SYSCTL_OUT(req, &xig, sizeof xig);
 	if (error)
 		return (error);
 
 	error = syncache_pcblist(req, m, &pcb_count);
 	if (error)
 		return (error);
 
 	inp_list = malloc(n * sizeof *inp_list, M_TEMP, M_WAITOK);
 
 	INP_INFO_WLOCK(&V_tcbinfo);
 	for (inp = LIST_FIRST(V_tcbinfo.ipi_listhead), i = 0;
 	    inp != NULL && i < n; inp = LIST_NEXT(inp, inp_list)) {
 		INP_WLOCK(inp);
 		if (inp->inp_gencnt <= gencnt) {
 			/*
 			 * XXX: This use of cr_cansee(), introduced with
 			 * TCP state changes, is not quite right, but for
 			 * now, better than nothing.
 			 */
 			if (inp->inp_flags & INP_TIMEWAIT) {
 				if (intotw(inp) != NULL)
 					error = cr_cansee(req->td->td_ucred,
 					    intotw(inp)->tw_cred);
 				else
 					error = EINVAL;	/* Skip this inp. */
 			} else
 				error = cr_canseeinpcb(req->td->td_ucred, inp);
 			if (error == 0) {
 				in_pcbref(inp);
 				inp_list[i++] = inp;
 			}
 		}
 		INP_WUNLOCK(inp);
 	}
 	INP_INFO_WUNLOCK(&V_tcbinfo);
 	n = i;
 
 	error = 0;
 	for (i = 0; i < n; i++) {
 		inp = inp_list[i];
 		INP_RLOCK(inp);
 		if (inp->inp_gencnt <= gencnt) {
 			struct xtcpcb xt;
 
 			tcp_inptoxtp(inp, &xt);
 			INP_RUNLOCK(inp);
 			error = SYSCTL_OUT(req, &xt, sizeof xt);
 		} else
 			INP_RUNLOCK(inp);
 	}
 	INP_INFO_RLOCK(&V_tcbinfo);
 	for (i = 0; i < n; i++) {
 		inp = inp_list[i];
 		INP_RLOCK(inp);
 		if (!in_pcbrele_rlocked(inp))
 			INP_RUNLOCK(inp);
 	}
 	INP_INFO_RUNLOCK(&V_tcbinfo);
 
 	if (!error) {
 		/*
 		 * Give the user an updated idea of our state.
 		 * If the generation differs from what we told
 		 * her before, she knows that something happened
 		 * while we were processing this request, and it
 		 * might be necessary to retry.
 		 */
 		INP_LIST_RLOCK(&V_tcbinfo);
 		xig.xig_gen = V_tcbinfo.ipi_gencnt;
 		xig.xig_sogen = so_gencnt;
 		xig.xig_count = V_tcbinfo.ipi_count + pcb_count;
 		INP_LIST_RUNLOCK(&V_tcbinfo);
 		error = SYSCTL_OUT(req, &xig, sizeof xig);
 	}
 	free(inp_list, M_TEMP);
 	return (error);
 }
 
 SYSCTL_PROC(_net_inet_tcp, TCPCTL_PCBLIST, pcblist,
     CTLTYPE_OPAQUE | CTLFLAG_RD, NULL, 0,
     tcp_pcblist, "S,xtcpcb", "List of active TCP connections");
 
 #ifdef INET
 static int
 tcp_getcred(SYSCTL_HANDLER_ARGS)
 {
 	struct xucred xuc;
 	struct sockaddr_in addrs[2];
 	struct inpcb *inp;
 	int error;
 
 	error = priv_check(req->td, PRIV_NETINET_GETCRED);
 	if (error)
 		return (error);
 	error = SYSCTL_IN(req, addrs, sizeof(addrs));
 	if (error)
 		return (error);
 	inp = in_pcblookup(&V_tcbinfo, addrs[1].sin_addr, addrs[1].sin_port,
 	    addrs[0].sin_addr, addrs[0].sin_port, INPLOOKUP_RLOCKPCB, NULL);
 	if (inp != NULL) {
 		if (inp->inp_socket == NULL)
 			error = ENOENT;
 		if (error == 0)
 			error = cr_canseeinpcb(req->td->td_ucred, inp);
 		if (error == 0)
 			cru2x(inp->inp_cred, &xuc);
 		INP_RUNLOCK(inp);
 	} else
 		error = ENOENT;
 	if (error == 0)
 		error = SYSCTL_OUT(req, &xuc, sizeof(struct xucred));
 	return (error);
 }
 
 SYSCTL_PROC(_net_inet_tcp, OID_AUTO, getcred,
     CTLTYPE_OPAQUE|CTLFLAG_RW|CTLFLAG_PRISON, 0, 0,
     tcp_getcred, "S,xucred", "Get the xucred of a TCP connection");
 #endif /* INET */
 
 #ifdef INET6
 static int
 tcp6_getcred(SYSCTL_HANDLER_ARGS)
 {
 	struct xucred xuc;
 	struct sockaddr_in6 addrs[2];
 	struct inpcb *inp;
 	int error;
 #ifdef INET
 	int mapped = 0;
 #endif
 
 	error = priv_check(req->td, PRIV_NETINET_GETCRED);
 	if (error)
 		return (error);
 	error = SYSCTL_IN(req, addrs, sizeof(addrs));
 	if (error)
 		return (error);
 	if ((error = sa6_embedscope(&addrs[0], V_ip6_use_defzone)) != 0 ||
 	    (error = sa6_embedscope(&addrs[1], V_ip6_use_defzone)) != 0) {
 		return (error);
 	}
 	if (IN6_IS_ADDR_V4MAPPED(&addrs[0].sin6_addr)) {
 #ifdef INET
 		if (IN6_IS_ADDR_V4MAPPED(&addrs[1].sin6_addr))
 			mapped = 1;
 		else
 #endif
 			return (EINVAL);
 	}
 
 #ifdef INET
 	if (mapped == 1)
 		inp = in_pcblookup(&V_tcbinfo,
 			*(struct in_addr *)&addrs[1].sin6_addr.s6_addr[12],
 			addrs[1].sin6_port,
 			*(struct in_addr *)&addrs[0].sin6_addr.s6_addr[12],
 			addrs[0].sin6_port, INPLOOKUP_RLOCKPCB, NULL);
 	else
 #endif
 		inp = in6_pcblookup(&V_tcbinfo,
 			&addrs[1].sin6_addr, addrs[1].sin6_port,
 			&addrs[0].sin6_addr, addrs[0].sin6_port,
 			INPLOOKUP_RLOCKPCB, NULL);
 	if (inp != NULL) {
 		if (inp->inp_socket == NULL)
 			error = ENOENT;
 		if (error == 0)
 			error = cr_canseeinpcb(req->td->td_ucred, inp);
 		if (error == 0)
 			cru2x(inp->inp_cred, &xuc);
 		INP_RUNLOCK(inp);
 	} else
 		error = ENOENT;
 	if (error == 0)
 		error = SYSCTL_OUT(req, &xuc, sizeof(struct xucred));
 	return (error);
 }
 
 SYSCTL_PROC(_net_inet6_tcp6, OID_AUTO, getcred,
     CTLTYPE_OPAQUE|CTLFLAG_RW|CTLFLAG_PRISON, 0, 0,
     tcp6_getcred, "S,xucred", "Get the xucred of a TCP6 connection");
 #endif /* INET6 */
 
 
 #ifdef INET
 void
 tcp_ctlinput(int cmd, struct sockaddr *sa, void *vip)
 {
 	struct ip *ip = vip;
 	struct tcphdr *th;
 	struct in_addr faddr;
 	struct inpcb *inp;
 	struct tcpcb *tp;
 	struct inpcb *(*notify)(struct inpcb *, int) = tcp_notify;
 	struct icmp *icp;
 	struct in_conninfo inc;
 	tcp_seq icmp_tcp_seq;
 	int mtu;
 
 	faddr = ((struct sockaddr_in *)sa)->sin_addr;
 	if (sa->sa_family != AF_INET || faddr.s_addr == INADDR_ANY)
 		return;
 
 	if (cmd == PRC_MSGSIZE)
 		notify = tcp_mtudisc_notify;
 	else if (V_icmp_may_rst && (cmd == PRC_UNREACH_ADMIN_PROHIB ||
 		cmd == PRC_UNREACH_PORT || cmd == PRC_UNREACH_PROTOCOL || 
 		cmd == PRC_TIMXCEED_INTRANS) && ip)
 		notify = tcp_drop_syn_sent;
 
 	/*
 	 * Hostdead is ugly because it goes linearly through all PCBs.
 	 * XXX: We never get this from ICMP, otherwise it makes an
 	 * excellent DoS attack on machines with many connections.
 	 */
 	else if (cmd == PRC_HOSTDEAD)
 		ip = NULL;
 	else if ((unsigned)cmd >= PRC_NCMDS || inetctlerrmap[cmd] == 0)
 		return;
 
 	if (ip == NULL) {
 		in_pcbnotifyall(&V_tcbinfo, faddr, inetctlerrmap[cmd], notify);
 		return;
 	}
 
 	icp = (struct icmp *)((caddr_t)ip - offsetof(struct icmp, icmp_ip));
 	th = (struct tcphdr *)((caddr_t)ip + (ip->ip_hl << 2));
 	INP_INFO_RLOCK(&V_tcbinfo);
 	inp = in_pcblookup(&V_tcbinfo, faddr, th->th_dport, ip->ip_src,
 	    th->th_sport, INPLOOKUP_WLOCKPCB, NULL);
 	if (inp != NULL && PRC_IS_REDIRECT(cmd)) {
 		/* signal EHOSTDOWN, as it flushes the cached route */
 		inp = (*notify)(inp, EHOSTDOWN);
 		goto out;
 	}
 	icmp_tcp_seq = th->th_seq;
 	if (inp != NULL)  {
 		if (!(inp->inp_flags & INP_TIMEWAIT) &&
 		    !(inp->inp_flags & INP_DROPPED) &&
 		    !(inp->inp_socket == NULL)) {
 			tp = intotcpcb(inp);
 			if (SEQ_GEQ(ntohl(icmp_tcp_seq), tp->snd_una) &&
 			    SEQ_LT(ntohl(icmp_tcp_seq), tp->snd_max)) {
 				if (cmd == PRC_MSGSIZE) {
 					/*
 					 * MTU discovery:
 					 * If we got a needfrag set the MTU
 					 * in the route to the suggested new
 					 * value (if given) and then notify.
 					 */
 					mtu = ntohs(icp->icmp_nextmtu);
 					/*
 					 * If no alternative MTU was
 					 * proposed, try the next smaller
 					 * one.
 					 */
 					if (!mtu)
 						mtu = ip_next_mtu(
 						    ntohs(ip->ip_len), 1);
 					if (mtu < V_tcp_minmss +
 					    sizeof(struct tcpiphdr))
 						mtu = V_tcp_minmss +
 						    sizeof(struct tcpiphdr);
 					/*
 					 * Only process the offered MTU if it
 					 * is smaller than the current one.
 					 */
 					if (mtu < tp->t_maxseg +
 					    sizeof(struct tcpiphdr)) {
 						bzero(&inc, sizeof(inc));
 						inc.inc_faddr = faddr;
 						inc.inc_fibnum =
 						    inp->inp_inc.inc_fibnum;
 						tcp_hc_updatemtu(&inc, mtu);
 						tcp_mtudisc(inp, mtu);
 					}
 				} else
 					inp = (*notify)(inp,
 					    inetctlerrmap[cmd]);
 			}
 		}
 	} else {
 		bzero(&inc, sizeof(inc));
 		inc.inc_fport = th->th_dport;
 		inc.inc_lport = th->th_sport;
 		inc.inc_faddr = faddr;
 		inc.inc_laddr = ip->ip_src;
 		syncache_unreach(&inc, icmp_tcp_seq);
 	}
 out:
 	if (inp != NULL)
 		INP_WUNLOCK(inp);
 	INP_INFO_RUNLOCK(&V_tcbinfo);
 }
 #endif /* INET */
 
 #ifdef INET6
 void
 tcp6_ctlinput(int cmd, struct sockaddr *sa, void *d)
 {
 	struct in6_addr *dst;
 	struct inpcb *(*notify)(struct inpcb *, int) = tcp_notify;
 	struct ip6_hdr *ip6;
 	struct mbuf *m;
 	struct inpcb *inp;
 	struct tcpcb *tp;
 	struct icmp6_hdr *icmp6;
 	struct ip6ctlparam *ip6cp = NULL;
 	const struct sockaddr_in6 *sa6_src = NULL;
 	struct in_conninfo inc;
 	struct tcp_ports {
 		uint16_t th_sport;
 		uint16_t th_dport;
 	} t_ports;
 	tcp_seq icmp_tcp_seq;
 	unsigned int mtu;
 	unsigned int off;
 
 	if (sa->sa_family != AF_INET6 ||
 	    sa->sa_len != sizeof(struct sockaddr_in6))
 		return;
 
 	/* if the parameter is from icmp6, decode it. */
 	if (d != NULL) {
 		ip6cp = (struct ip6ctlparam *)d;
 		icmp6 = ip6cp->ip6c_icmp6;
 		m = ip6cp->ip6c_m;
 		ip6 = ip6cp->ip6c_ip6;
 		off = ip6cp->ip6c_off;
 		sa6_src = ip6cp->ip6c_src;
 		dst = ip6cp->ip6c_finaldst;
 	} else {
 		m = NULL;
 		ip6 = NULL;
 		off = 0;	/* fool gcc */
 		sa6_src = &sa6_any;
 		dst = NULL;
 	}
 
 	if (cmd == PRC_MSGSIZE)
 		notify = tcp_mtudisc_notify;
 	else if (V_icmp_may_rst && (cmd == PRC_UNREACH_ADMIN_PROHIB ||
 		cmd == PRC_UNREACH_PORT || cmd == PRC_UNREACH_PROTOCOL || 
 		cmd == PRC_TIMXCEED_INTRANS) && ip6 != NULL)
 		notify = tcp_drop_syn_sent;
 
 	/*
 	 * Hostdead is ugly because it goes linearly through all PCBs.
 	 * XXX: We never get this from ICMP, otherwise it makes an
 	 * excellent DoS attack on machines with many connections.
 	 */
 	else if (cmd == PRC_HOSTDEAD)
 		ip6 = NULL;
 	else if ((unsigned)cmd >= PRC_NCMDS || inet6ctlerrmap[cmd] == 0)
 		return;
 
 	if (ip6 == NULL) {
 		in6_pcbnotify(&V_tcbinfo, sa, 0,
 			      (const struct sockaddr *)sa6_src,
 			      0, cmd, NULL, notify);
 		return;
 	}
 
 	/* Check if we can safely get the ports from the tcp hdr */
 	if (m == NULL ||
 	    (m->m_pkthdr.len <
 		(int32_t) (off + sizeof(struct tcp_ports)))) {
 		return;
 	}
 	bzero(&t_ports, sizeof(struct tcp_ports));
 	m_copydata(m, off, sizeof(struct tcp_ports), (caddr_t)&t_ports);
 	INP_INFO_RLOCK(&V_tcbinfo);
 	inp = in6_pcblookup(&V_tcbinfo, &ip6->ip6_dst, t_ports.th_dport,
 	    &ip6->ip6_src, t_ports.th_sport, INPLOOKUP_WLOCKPCB, NULL);
 	if (inp != NULL && PRC_IS_REDIRECT(cmd)) {
 		/* signal EHOSTDOWN, as it flushes the cached route */
 		inp = (*notify)(inp, EHOSTDOWN);
 		goto out;
 	}
 	off += sizeof(struct tcp_ports);
 	if (m->m_pkthdr.len < (int32_t) (off + sizeof(tcp_seq))) {
 		goto out;
 	}
 	m_copydata(m, off, sizeof(tcp_seq), (caddr_t)&icmp_tcp_seq);
 	if (inp != NULL)  {
 		if (!(inp->inp_flags & INP_TIMEWAIT) &&
 		    !(inp->inp_flags & INP_DROPPED) &&
 		    !(inp->inp_socket == NULL)) {
 			tp = intotcpcb(inp);
 			if (SEQ_GEQ(ntohl(icmp_tcp_seq), tp->snd_una) &&
 			    SEQ_LT(ntohl(icmp_tcp_seq), tp->snd_max)) {
 				if (cmd == PRC_MSGSIZE) {
 					/*
 					 * MTU discovery:
 					 * If we got a needfrag set the MTU
 					 * in the route to the suggested new
 					 * value (if given) and then notify.
 					 */
 					mtu = ntohl(icmp6->icmp6_mtu);
 					/*
 					 * If no alternative MTU was
 					 * proposed, or the proposed
 					 * MTU was too small, set to
 					 * the min.
 					 */
 					if (mtu < IPV6_MMTU)
 						mtu = IPV6_MMTU - 8;
 					bzero(&inc, sizeof(inc));
 					inc.inc_fibnum = M_GETFIB(m);
 					inc.inc_flags |= INC_ISIPV6;
 					inc.inc6_faddr = *dst;
 					if (in6_setscope(&inc.inc6_faddr,
 						m->m_pkthdr.rcvif, NULL))
 						goto out;
 					/*
 					 * Only process the offered MTU if it
 					 * is smaller than the current one.
 					 */
 					if (mtu < tp->t_maxseg +
 					    sizeof (struct tcphdr) +
 					    sizeof (struct ip6_hdr)) {
 						tcp_hc_updatemtu(&inc, mtu);
 						tcp_mtudisc(inp, mtu);
 						ICMP6STAT_INC(icp6s_pmtuchg);
 					}
 				} else
 					inp = (*notify)(inp,
 					    inet6ctlerrmap[cmd]);
 			}
 		}
 	} else {
 		bzero(&inc, sizeof(inc));
 		inc.inc_fibnum = M_GETFIB(m);
 		inc.inc_flags |= INC_ISIPV6;
 		inc.inc_fport = t_ports.th_dport;
 		inc.inc_lport = t_ports.th_sport;
 		inc.inc6_faddr = *dst;
 		inc.inc6_laddr = ip6->ip6_src;
 		syncache_unreach(&inc, icmp_tcp_seq);
 	}
 out:
 	if (inp != NULL)
 		INP_WUNLOCK(inp);
 	INP_INFO_RUNLOCK(&V_tcbinfo);
 }
 #endif /* INET6 */
 
 
 /*
  * Following is where TCP initial sequence number generation occurs.
  *
  * There are two places where we must use initial sequence numbers:
  * 1.  In SYN-ACK packets.
  * 2.  In SYN packets.
  *
  * All ISNs for SYN-ACK packets are generated by the syncache.  See
  * tcp_syncache.c for details.
  *
  * The ISNs in SYN packets must be monotonic; TIME_WAIT recycling
  * depends on this property.  In addition, these ISNs should be
  * unguessable so as to prevent connection hijacking.  To satisfy
  * the requirements of this situation, the algorithm outlined in
  * RFC 1948 is used, with only small modifications.
  *
  * Implementation details:
  *
  * Time is based off the system timer, and is corrected so that it
  * increases by one megabyte per second.  This allows for proper
  * recycling on high speed LANs while still leaving over an hour
  * before rollover.
  *
  * As reading the *exact* system time is too expensive to be done
  * whenever setting up a TCP connection, we increment the time
  * offset in two ways.  First, a small random positive increment
  * is added to isn_offset for each connection that is set up.
  * Second, the function tcp_isn_tick fires once per clock tick
  * and increments isn_offset as necessary so that sequence numbers
  * are incremented at approximately ISN_BYTES_PER_SECOND.  The
  * random positive increments serve only to ensure that the same
  * exact sequence number is never sent out twice (as could otherwise
  * happen when a port is recycled in less than the system tick
  * interval.)
  *
  * net.inet.tcp.isn_reseed_interval controls the number of seconds
  * between seeding of isn_secret.  This is normally set to zero,
  * as reseeding should not be necessary.
  *
  * Locking of the global variables isn_secret, isn_last_reseed, isn_offset,
  * isn_offset_old, and isn_ctx is performed using the TCP pcbinfo lock.  In
  * general, this means holding an exclusive (write) lock.
  */
 
 #define ISN_BYTES_PER_SECOND 1048576
 #define ISN_STATIC_INCREMENT 4096
 #define ISN_RANDOM_INCREMENT (4096 - 1)
 
 static VNET_DEFINE(u_char, isn_secret[32]);
 static VNET_DEFINE(int, isn_last);
 static VNET_DEFINE(int, isn_last_reseed);
 static VNET_DEFINE(u_int32_t, isn_offset);
 static VNET_DEFINE(u_int32_t, isn_offset_old);
 
 #define	V_isn_secret			VNET(isn_secret)
 #define	V_isn_last			VNET(isn_last)
 #define	V_isn_last_reseed		VNET(isn_last_reseed)
 #define	V_isn_offset			VNET(isn_offset)
 #define	V_isn_offset_old		VNET(isn_offset_old)
 
 tcp_seq
 tcp_new_isn(struct tcpcb *tp)
 {
 	MD5_CTX isn_ctx;
 	u_int32_t md5_buffer[4];
 	tcp_seq new_isn;
 	u_int32_t projected_offset;
 
 	INP_WLOCK_ASSERT(tp->t_inpcb);
 
 	ISN_LOCK();
 	/* Seed if this is the first use, reseed if requested. */
 	if ((V_isn_last_reseed == 0) || ((V_tcp_isn_reseed_interval > 0) &&
 	     (((u_int)V_isn_last_reseed + (u_int)V_tcp_isn_reseed_interval*hz)
 		< (u_int)ticks))) {
 		read_random(&V_isn_secret, sizeof(V_isn_secret));
 		V_isn_last_reseed = ticks;
 	}
 
 	/* Compute the md5 hash and return the ISN. */
 	MD5Init(&isn_ctx);
 	MD5Update(&isn_ctx, (u_char *) &tp->t_inpcb->inp_fport, sizeof(u_short));
 	MD5Update(&isn_ctx, (u_char *) &tp->t_inpcb->inp_lport, sizeof(u_short));
 #ifdef INET6
 	if ((tp->t_inpcb->inp_vflag & INP_IPV6) != 0) {
 		MD5Update(&isn_ctx, (u_char *) &tp->t_inpcb->in6p_faddr,
 			  sizeof(struct in6_addr));
 		MD5Update(&isn_ctx, (u_char *) &tp->t_inpcb->in6p_laddr,
 			  sizeof(struct in6_addr));
 	} else
 #endif
 	{
 		MD5Update(&isn_ctx, (u_char *) &tp->t_inpcb->inp_faddr,
 			  sizeof(struct in_addr));
 		MD5Update(&isn_ctx, (u_char *) &tp->t_inpcb->inp_laddr,
 			  sizeof(struct in_addr));
 	}
 	MD5Update(&isn_ctx, (u_char *) &V_isn_secret, sizeof(V_isn_secret));
 	MD5Final((u_char *) &md5_buffer, &isn_ctx);
 	new_isn = (tcp_seq) md5_buffer[0];
 	V_isn_offset += ISN_STATIC_INCREMENT +
 		(arc4random() & ISN_RANDOM_INCREMENT);
 	if (ticks != V_isn_last) {
 		projected_offset = V_isn_offset_old +
 		    ISN_BYTES_PER_SECOND / hz * (ticks - V_isn_last);
 		if (SEQ_GT(projected_offset, V_isn_offset))
 			V_isn_offset = projected_offset;
 		V_isn_offset_old = V_isn_offset;
 		V_isn_last = ticks;
 	}
 	new_isn += V_isn_offset;
 	ISN_UNLOCK();
 	return (new_isn);
 }
 
 /*
  * When a specific ICMP unreachable message is received and the
  * connection state is SYN-SENT, drop the connection.  This behavior
  * is controlled by the icmp_may_rst sysctl.
  */
 struct inpcb *
 tcp_drop_syn_sent(struct inpcb *inp, int errno)
 {
 	struct tcpcb *tp;
 
 	INP_INFO_RLOCK_ASSERT(&V_tcbinfo);
 	INP_WLOCK_ASSERT(inp);
 
 	if ((inp->inp_flags & INP_TIMEWAIT) ||
 	    (inp->inp_flags & INP_DROPPED))
 		return (inp);
 
 	tp = intotcpcb(inp);
 	if (tp->t_state != TCPS_SYN_SENT)
 		return (inp);
 
 	tp = tcp_drop(tp, errno);
 	if (tp != NULL)
 		return (inp);
 	else
 		return (NULL);
 }
 
 /*
  * When `need fragmentation' ICMP is received, update our idea of the MSS
  * based on the new value. Also nudge TCP to send something, since we
  * know the packet we just sent was dropped.
  * This duplicates some code in the tcp_mss() function in tcp_input.c.
  */
 static struct inpcb *
 tcp_mtudisc_notify(struct inpcb *inp, int error)
 {
 
 	tcp_mtudisc(inp, -1);
 	return (inp);
 }
 
 static void
 tcp_mtudisc(struct inpcb *inp, int mtuoffer)
 {
 	struct tcpcb *tp;
 	struct socket *so;
 
 	INP_WLOCK_ASSERT(inp);
 	if ((inp->inp_flags & INP_TIMEWAIT) ||
 	    (inp->inp_flags & INP_DROPPED))
 		return;
 
 	tp = intotcpcb(inp);
 	KASSERT(tp != NULL, ("tcp_mtudisc: tp == NULL"));
 
 	tcp_mss_update(tp, -1, mtuoffer, NULL, NULL);
   
 	so = inp->inp_socket;
 	SOCKBUF_LOCK(&so->so_snd);
 	/* If the mss is larger than the socket buffer, decrease the mss. */
 	if (so->so_snd.sb_hiwat < tp->t_maxseg)
 		tp->t_maxseg = so->so_snd.sb_hiwat;
 	SOCKBUF_UNLOCK(&so->so_snd);
 
 	TCPSTAT_INC(tcps_mturesent);
 	tp->t_rtttime = 0;
 	tp->snd_nxt = tp->snd_una;
 	tcp_free_sackholes(tp);
 	tp->snd_recover = tp->snd_max;
 	if (tp->t_flags & TF_SACK_PERMIT)
 		EXIT_FASTRECOVERY(tp->t_flags);
 	tp->t_fb->tfb_tcp_output(tp);
 }
 
 #ifdef INET
 /*
  * Look-up the routing entry to the peer of this inpcb.  If no route
  * is found and it cannot be allocated, then return 0.  This routine
  * is called by TCP routines that access the rmx structure and by
  * tcp_mss_update to get the peer/interface MTU.
  */
 uint32_t
 tcp_maxmtu(struct in_conninfo *inc, struct tcp_ifcap *cap)
 {
 	struct nhop4_extended nh4;
 	struct ifnet *ifp;
 	uint32_t maxmtu = 0;
 
 	KASSERT(inc != NULL, ("tcp_maxmtu with NULL in_conninfo pointer"));
 
 	if (inc->inc_faddr.s_addr != INADDR_ANY) {
 
 		if (fib4_lookup_nh_ext(inc->inc_fibnum, inc->inc_faddr,
 		    NHR_REF, 0, &nh4) != 0)
 			return (0);
 
 		ifp = nh4.nh_ifp;
 		maxmtu = nh4.nh_mtu;
 
 		/* Report additional interface capabilities. */
 		if (cap != NULL) {
 			if (ifp->if_capenable & IFCAP_TSO4 &&
 			    ifp->if_hwassist & CSUM_TSO) {
 				cap->ifcap |= CSUM_TSO;
 				cap->tsomax = ifp->if_hw_tsomax;
 				cap->tsomaxsegcount = ifp->if_hw_tsomaxsegcount;
 				cap->tsomaxsegsize = ifp->if_hw_tsomaxsegsize;
 			}
 		}
 		fib4_free_nh_ext(inc->inc_fibnum, &nh4);
 	}
 	return (maxmtu);
 }
 #endif /* INET */
 
 #ifdef INET6
 uint32_t
 tcp_maxmtu6(struct in_conninfo *inc, struct tcp_ifcap *cap)
 {
 	struct nhop6_extended nh6;
 	struct in6_addr dst6;
 	uint32_t scopeid;
 	struct ifnet *ifp;
 	uint32_t maxmtu = 0;
 
 	KASSERT(inc != NULL, ("tcp_maxmtu6 with NULL in_conninfo pointer"));
 
 	if (!IN6_IS_ADDR_UNSPECIFIED(&inc->inc6_faddr)) {
 		in6_splitscope(&inc->inc6_faddr, &dst6, &scopeid);
 		if (fib6_lookup_nh_ext(inc->inc_fibnum, &dst6, scopeid, 0,
 		    0, &nh6) != 0)
 			return (0);
 
 		ifp = nh6.nh_ifp;
 		maxmtu = nh6.nh_mtu;
 
 		/* Report additional interface capabilities. */
 		if (cap != NULL) {
 			if (ifp->if_capenable & IFCAP_TSO6 &&
 			    ifp->if_hwassist & CSUM_TSO) {
 				cap->ifcap |= CSUM_TSO;
 				cap->tsomax = ifp->if_hw_tsomax;
 				cap->tsomaxsegcount = ifp->if_hw_tsomaxsegcount;
 				cap->tsomaxsegsize = ifp->if_hw_tsomaxsegsize;
 			}
 		}
 		fib6_free_nh_ext(inc->inc_fibnum, &nh6);
 	}
 
 	return (maxmtu);
 }
 #endif /* INET6 */
 
 /*
  * Calculate effective SMSS per RFC5681 definition for a given TCP
  * connection at its current state, taking into account SACK and etc.
  */
 u_int
 tcp_maxseg(const struct tcpcb *tp)
 {
 	u_int optlen;
 
 	if (tp->t_flags & TF_NOOPT)
 		return (tp->t_maxseg);
 
 	/*
 	 * Here we have a simplified code from tcp_addoptions(),
 	 * without a proper loop, and having most of paddings hardcoded.
 	 * We might make mistakes with padding here in some edge cases,
 	 * but this is harmless, since result of tcp_maxseg() is used
 	 * only in cwnd and ssthresh estimations.
 	 */
 #define	PAD(len)	((((len) / 4) + !!((len) % 4)) * 4)
 	if (TCPS_HAVEESTABLISHED(tp->t_state)) {
 		if (tp->t_flags & TF_RCVD_TSTMP)
 			optlen = TCPOLEN_TSTAMP_APPA;
 		else
 			optlen = 0;
 #if defined(IPSEC_SUPPORT) || defined(TCP_SIGNATURE)
 		if (tp->t_flags & TF_SIGNATURE)
 			optlen += PAD(TCPOLEN_SIGNATURE);
 #endif
 		if ((tp->t_flags & TF_SACK_PERMIT) && tp->rcv_numsacks > 0) {
 			optlen += TCPOLEN_SACKHDR;
 			optlen += tp->rcv_numsacks * TCPOLEN_SACK;
 			optlen = PAD(optlen);
 		}
 	} else {
 		if (tp->t_flags & TF_REQ_TSTMP)
 			optlen = TCPOLEN_TSTAMP_APPA;
 		else
 			optlen = PAD(TCPOLEN_MAXSEG);
 		if (tp->t_flags & TF_REQ_SCALE)
 			optlen += PAD(TCPOLEN_WINDOW);
 #if defined(IPSEC_SUPPORT) || defined(TCP_SIGNATURE)
 		if (tp->t_flags & TF_SIGNATURE)
 			optlen += PAD(TCPOLEN_SIGNATURE);
 #endif
 		if (tp->t_flags & TF_SACK_PERMIT)
 			optlen += PAD(TCPOLEN_SACK_PERMITTED);
 	}
 #undef PAD
 	optlen = min(optlen, TCP_MAXOLEN);
 	return (tp->t_maxseg - optlen);
 }
 
 static int
 sysctl_drop(SYSCTL_HANDLER_ARGS)
 {
 	/* addrs[0] is a foreign socket, addrs[1] is a local one. */
 	struct sockaddr_storage addrs[2];
 	struct inpcb *inp;
 	struct tcpcb *tp;
 	struct tcptw *tw;
 	struct sockaddr_in *fin, *lin;
 #ifdef INET6
 	struct sockaddr_in6 *fin6, *lin6;
 #endif
 	int error;
 
 	inp = NULL;
 	fin = lin = NULL;
 #ifdef INET6
 	fin6 = lin6 = NULL;
 #endif
 	error = 0;
 
 	if (req->oldptr != NULL || req->oldlen != 0)
 		return (EINVAL);
 	if (req->newptr == NULL)
 		return (EPERM);
 	if (req->newlen < sizeof(addrs))
 		return (ENOMEM);
 	error = SYSCTL_IN(req, &addrs, sizeof(addrs));
 	if (error)
 		return (error);
 
 	switch (addrs[0].ss_family) {
 #ifdef INET6
 	case AF_INET6:
 		fin6 = (struct sockaddr_in6 *)&addrs[0];
 		lin6 = (struct sockaddr_in6 *)&addrs[1];
 		if (fin6->sin6_len != sizeof(struct sockaddr_in6) ||
 		    lin6->sin6_len != sizeof(struct sockaddr_in6))
 			return (EINVAL);
 		if (IN6_IS_ADDR_V4MAPPED(&fin6->sin6_addr)) {
 			if (!IN6_IS_ADDR_V4MAPPED(&lin6->sin6_addr))
 				return (EINVAL);
 			in6_sin6_2_sin_in_sock((struct sockaddr *)&addrs[0]);
 			in6_sin6_2_sin_in_sock((struct sockaddr *)&addrs[1]);
 			fin = (struct sockaddr_in *)&addrs[0];
 			lin = (struct sockaddr_in *)&addrs[1];
 			break;
 		}
 		error = sa6_embedscope(fin6, V_ip6_use_defzone);
 		if (error)
 			return (error);
 		error = sa6_embedscope(lin6, V_ip6_use_defzone);
 		if (error)
 			return (error);
 		break;
 #endif
 #ifdef INET
 	case AF_INET:
 		fin = (struct sockaddr_in *)&addrs[0];
 		lin = (struct sockaddr_in *)&addrs[1];
 		if (fin->sin_len != sizeof(struct sockaddr_in) ||
 		    lin->sin_len != sizeof(struct sockaddr_in))
 			return (EINVAL);
 		break;
 #endif
 	default:
 		return (EINVAL);
 	}
 	INP_INFO_RLOCK(&V_tcbinfo);
 	switch (addrs[0].ss_family) {
 #ifdef INET6
 	case AF_INET6:
 		inp = in6_pcblookup(&V_tcbinfo, &fin6->sin6_addr,
 		    fin6->sin6_port, &lin6->sin6_addr, lin6->sin6_port,
 		    INPLOOKUP_WLOCKPCB, NULL);
 		break;
 #endif
 #ifdef INET
 	case AF_INET:
 		inp = in_pcblookup(&V_tcbinfo, fin->sin_addr, fin->sin_port,
 		    lin->sin_addr, lin->sin_port, INPLOOKUP_WLOCKPCB, NULL);
 		break;
 #endif
 	}
 	if (inp != NULL) {
 		if (inp->inp_flags & INP_TIMEWAIT) {
 			/*
 			 * XXXRW: There currently exists a state where an
 			 * inpcb is present, but its timewait state has been
 			 * discarded.  For now, don't allow dropping of this
 			 * type of inpcb.
 			 */
 			tw = intotw(inp);
 			if (tw != NULL)
 				tcp_twclose(tw, 0);
 			else
 				INP_WUNLOCK(inp);
 		} else if (!(inp->inp_flags & INP_DROPPED) &&
 			   !(inp->inp_socket->so_options & SO_ACCEPTCONN)) {
 			tp = intotcpcb(inp);
 			tp = tcp_drop(tp, ECONNABORTED);
 			if (tp != NULL)
 				INP_WUNLOCK(inp);
 		} else
 			INP_WUNLOCK(inp);
 	} else
 		error = ESRCH;
 	INP_INFO_RUNLOCK(&V_tcbinfo);
 	return (error);
 }
 
 SYSCTL_PROC(_net_inet_tcp, TCPCTL_DROP, drop,
     CTLFLAG_VNET | CTLTYPE_STRUCT | CTLFLAG_WR | CTLFLAG_SKIP, NULL,
     0, sysctl_drop, "", "Drop TCP connection");
 
 /*
  * Generate a standardized TCP log line for use throughout the
  * tcp subsystem.  Memory allocation is done with M_NOWAIT to
  * allow use in the interrupt context.
  *
  * NB: The caller MUST free(s, M_TCPLOG) the returned string.
  * NB: The function may return NULL if memory allocation failed.
  *
  * Due to header inclusion and ordering limitations the struct ip
  * and ip6_hdr pointers have to be passed as void pointers.
  */
 char *
 tcp_log_vain(struct in_conninfo *inc, struct tcphdr *th, void *ip4hdr,
     const void *ip6hdr)
 {
 
 	/* Is logging enabled? */
 	if (tcp_log_in_vain == 0)
 		return (NULL);
 
 	return (tcp_log_addr(inc, th, ip4hdr, ip6hdr));
 }
 
 char *
 tcp_log_addrs(struct in_conninfo *inc, struct tcphdr *th, void *ip4hdr,
     const void *ip6hdr)
 {
 
 	/* Is logging enabled? */
 	if (tcp_log_debug == 0)
 		return (NULL);
 
 	return (tcp_log_addr(inc, th, ip4hdr, ip6hdr));
 }
 
 static char *
 tcp_log_addr(struct in_conninfo *inc, struct tcphdr *th, void *ip4hdr,
     const void *ip6hdr)
 {
 	char *s, *sp;
 	size_t size;
 	struct ip *ip;
 #ifdef INET6
 	const struct ip6_hdr *ip6;
 
 	ip6 = (const struct ip6_hdr *)ip6hdr;
 #endif /* INET6 */
 	ip = (struct ip *)ip4hdr;
 
 	/*
 	 * The log line looks like this:
 	 * "TCP: [1.2.3.4]:50332 to [1.2.3.4]:80 tcpflags 0x2<SYN>"
 	 */
 	size = sizeof("TCP: []:12345 to []:12345 tcpflags 0x2<>") +
 	    sizeof(PRINT_TH_FLAGS) + 1 +
 #ifdef INET6
 	    2 * INET6_ADDRSTRLEN;
 #else
 	    2 * INET_ADDRSTRLEN;
 #endif /* INET6 */
 
 	s = malloc(size, M_TCPLOG, M_ZERO|M_NOWAIT);
 	if (s == NULL)
 		return (NULL);
 
 	strcat(s, "TCP: [");
 	sp = s + strlen(s);
 
 	if (inc && ((inc->inc_flags & INC_ISIPV6) == 0)) {
 		inet_ntoa_r(inc->inc_faddr, sp);
 		sp = s + strlen(s);
 		sprintf(sp, "]:%i to [", ntohs(inc->inc_fport));
 		sp = s + strlen(s);
 		inet_ntoa_r(inc->inc_laddr, sp);
 		sp = s + strlen(s);
 		sprintf(sp, "]:%i", ntohs(inc->inc_lport));
 #ifdef INET6
 	} else if (inc) {
 		ip6_sprintf(sp, &inc->inc6_faddr);
 		sp = s + strlen(s);
 		sprintf(sp, "]:%i to [", ntohs(inc->inc_fport));
 		sp = s + strlen(s);
 		ip6_sprintf(sp, &inc->inc6_laddr);
 		sp = s + strlen(s);
 		sprintf(sp, "]:%i", ntohs(inc->inc_lport));
 	} else if (ip6 && th) {
 		ip6_sprintf(sp, &ip6->ip6_src);
 		sp = s + strlen(s);
 		sprintf(sp, "]:%i to [", ntohs(th->th_sport));
 		sp = s + strlen(s);
 		ip6_sprintf(sp, &ip6->ip6_dst);
 		sp = s + strlen(s);
 		sprintf(sp, "]:%i", ntohs(th->th_dport));
 #endif /* INET6 */
 #ifdef INET
 	} else if (ip && th) {
 		inet_ntoa_r(ip->ip_src, sp);
 		sp = s + strlen(s);
 		sprintf(sp, "]:%i to [", ntohs(th->th_sport));
 		sp = s + strlen(s);
 		inet_ntoa_r(ip->ip_dst, sp);
 		sp = s + strlen(s);
 		sprintf(sp, "]:%i", ntohs(th->th_dport));
 #endif /* INET */
 	} else {
 		free(s, M_TCPLOG);
 		return (NULL);
 	}
 	sp = s + strlen(s);
 	if (th)
 		sprintf(sp, " tcpflags 0x%b", th->th_flags, PRINT_TH_FLAGS);
 	if (*(s + size - 1) != '\0')
 		panic("%s: string too long", __func__);
 	return (s);
 }
 
 /*
  * A subroutine which makes it easy to track TCP state changes with DTrace.
  * This function shouldn't be called for t_state initializations that don't
  * correspond to actual TCP state transitions.
  */
 void
 tcp_state_change(struct tcpcb *tp, int newstate)
 {
 #if defined(KDTRACE_HOOKS)
 	int pstate = tp->t_state;
 #endif
 
 	TCPSTATES_DEC(tp->t_state);
 	TCPSTATES_INC(newstate);
 	tp->t_state = newstate;
 	TCP_PROBE6(state__change, NULL, tp, NULL, tp, NULL, pstate);
 }
 
 /*
  * Create an external-format (``xtcpcb'') structure using the information in
  * the kernel-format tcpcb structure pointed to by tp.  This is done to
  * reduce the spew of irrelevant information over this interface, to isolate
  * user code from changes in the kernel structure, and potentially to provide
  * information-hiding if we decide that some of this information should be
  * hidden from users.
  */
 void
 tcp_inptoxtp(const struct inpcb *inp, struct xtcpcb *xt)
 {
 	struct tcpcb *tp = intotcpcb(inp);
 	sbintime_t now;
 
 	if (inp->inp_flags & INP_TIMEWAIT) {
 		bzero(xt, sizeof(struct xtcpcb));
 		xt->t_state = TCPS_TIME_WAIT;
 	} else {
 		xt->t_state = tp->t_state;
 		xt->t_flags = tp->t_flags;
 		xt->t_sndzerowin = tp->t_sndzerowin;
 		xt->t_sndrexmitpack = tp->t_sndrexmitpack;
 		xt->t_rcvoopack = tp->t_rcvoopack;
 
 		now = getsbinuptime();
 #define	COPYTIMER(ttt)	do {						\
 		if (callout_active(&tp->t_timers->ttt))			\
 			xt->ttt = (tp->t_timers->ttt.c_time - now) /	\
 			    SBT_1MS;					\
 		else							\
 			xt->ttt = 0;					\
 } while (0)
 		COPYTIMER(tt_delack);
 		COPYTIMER(tt_rexmt);
 		COPYTIMER(tt_persist);
 		COPYTIMER(tt_keep);
 		COPYTIMER(tt_2msl);
 #undef COPYTIMER
 		xt->t_rcvtime = 1000 * (ticks - tp->t_rcvtime) / hz;
 
 		bcopy(tp->t_fb->tfb_tcp_block_name, xt->xt_stack,
 		    TCP_FUNCTION_NAME_LEN_MAX);
 	}
 
 	xt->xt_len = sizeof(struct xtcpcb);
 	in_pcbtoxinpcb(inp, &xt->xt_inp);
 	if (inp->inp_socket == NULL)
 		xt->xt_inp.xi_socket.xso_protocol = IPPROTO_TCP;
 }
Index: head/sys/netinet/tcp_syncache.c
===================================================================
--- head/sys/netinet/tcp_syncache.c	(revision 319721)
+++ head/sys/netinet/tcp_syncache.c	(revision 319722)
@@ -1,2256 +1,2257 @@
 /*-
  * Copyright (c) 2001 McAfee, Inc.
  * Copyright (c) 2006,2013 Andre Oppermann, Internet Business Solutions AG
  * All rights reserved.
  *
  * This software was developed for the FreeBSD Project by Jonathan Lemon
  * and McAfee Research, the Security Research Division of McAfee, Inc. under
  * DARPA/SPAWAR contract N66001-01-C-8035 ("CBOSS"), as part of the
  * DARPA CHATS research program. [2001 McAfee, Inc.]
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include "opt_inet.h"
 #include "opt_inet6.h"
 #include "opt_ipsec.h"
 #include "opt_pcbgroup.h"
 
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/hash.h>
 #include <sys/refcount.h>
 #include <sys/kernel.h>
 #include <sys/sysctl.h>
 #include <sys/limits.h>
 #include <sys/lock.h>
 #include <sys/mutex.h>
 #include <sys/malloc.h>
 #include <sys/mbuf.h>
 #include <sys/proc.h>		/* for proc0 declaration */
 #include <sys/random.h>
 #include <sys/socket.h>
 #include <sys/socketvar.h>
 #include <sys/syslog.h>
 #include <sys/ucred.h>
 
 #include <sys/md5.h>
 #include <crypto/siphash/siphash.h>
 
 #include <vm/uma.h>
 
 #include <net/if.h>
 #include <net/if_var.h>
 #include <net/route.h>
 #include <net/vnet.h>
 
 #include <netinet/in.h>
 #include <netinet/in_systm.h>
 #include <netinet/ip.h>
 #include <netinet/in_var.h>
 #include <netinet/in_pcb.h>
 #include <netinet/ip_var.h>
 #include <netinet/ip_options.h>
 #ifdef INET6
 #include <netinet/ip6.h>
 #include <netinet/icmp6.h>
 #include <netinet6/nd6.h>
 #include <netinet6/ip6_var.h>
 #include <netinet6/in6_pcb.h>
 #endif
 #include <netinet/tcp.h>
 #ifdef TCP_RFC7413
 #include <netinet/tcp_fastopen.h>
 #endif
 #include <netinet/tcp_fsm.h>
 #include <netinet/tcp_seq.h>
 #include <netinet/tcp_timer.h>
 #include <netinet/tcp_var.h>
 #include <netinet/tcp_syncache.h>
 #ifdef INET6
 #include <netinet6/tcp6_var.h>
 #endif
 #ifdef TCP_OFFLOAD
 #include <netinet/toecore.h>
 #endif
 
 #include <netipsec/ipsec_support.h>
 
 #include <machine/in_cksum.h>
 
 #include <security/mac/mac_framework.h>
 
 static VNET_DEFINE(int, tcp_syncookies) = 1;
 #define	V_tcp_syncookies		VNET(tcp_syncookies)
 SYSCTL_INT(_net_inet_tcp, OID_AUTO, syncookies, CTLFLAG_VNET | CTLFLAG_RW,
     &VNET_NAME(tcp_syncookies), 0,
     "Use TCP SYN cookies if the syncache overflows");
 
 static VNET_DEFINE(int, tcp_syncookiesonly) = 0;
 #define	V_tcp_syncookiesonly		VNET(tcp_syncookiesonly)
 SYSCTL_INT(_net_inet_tcp, OID_AUTO, syncookies_only, CTLFLAG_VNET | CTLFLAG_RW,
     &VNET_NAME(tcp_syncookiesonly), 0,
     "Use only TCP SYN cookies");
 
 static VNET_DEFINE(int, functions_inherit_listen_socket_stack) = 1;
 #define V_functions_inherit_listen_socket_stack \
     VNET(functions_inherit_listen_socket_stack)
 SYSCTL_INT(_net_inet_tcp, OID_AUTO, functions_inherit_listen_socket_stack,
     CTLFLAG_VNET | CTLFLAG_RW,
     &VNET_NAME(functions_inherit_listen_socket_stack), 0,
     "Inherit listen socket's stack");
 
 #ifdef TCP_OFFLOAD
 #define ADDED_BY_TOE(sc) ((sc)->sc_tod != NULL)
 #endif
 
 static void	 syncache_drop(struct syncache *, struct syncache_head *);
 static void	 syncache_free(struct syncache *);
 static void	 syncache_insert(struct syncache *, struct syncache_head *);
 static int	 syncache_respond(struct syncache *, struct syncache_head *, int,
 		    const struct mbuf *);
 static struct	 socket *syncache_socket(struct syncache *, struct socket *,
 		    struct mbuf *m);
 static void	 syncache_timeout(struct syncache *sc, struct syncache_head *sch,
 		    int docallout);
 static void	 syncache_timer(void *);
 
 static uint32_t	 syncookie_mac(struct in_conninfo *, tcp_seq, uint8_t,
 		    uint8_t *, uintptr_t);
 static tcp_seq	 syncookie_generate(struct syncache_head *, struct syncache *);
 static struct syncache
 		*syncookie_lookup(struct in_conninfo *, struct syncache_head *,
 		    struct syncache *, struct tcphdr *, struct tcpopt *,
 		    struct socket *);
 static void	 syncookie_reseed(void *);
 #ifdef INVARIANTS
 static int	 syncookie_cmp(struct in_conninfo *inc, struct syncache_head *sch,
 		    struct syncache *sc, struct tcphdr *th, struct tcpopt *to,
 		    struct socket *lso);
 #endif
 
 /*
  * Transmit the SYN,ACK fewer times than TCP_MAXRXTSHIFT specifies.
  * 3 retransmits corresponds to a timeout of 3 * (1 + 2 + 4 + 8) == 45 seconds,
  * the odds are that the user has given up attempting to connect by then.
  */
 #define SYNCACHE_MAXREXMTS		3
 
 /* Arbitrary values */
 #define TCP_SYNCACHE_HASHSIZE		512
 #define TCP_SYNCACHE_BUCKETLIMIT	30
 
 static VNET_DEFINE(struct tcp_syncache, tcp_syncache);
 #define	V_tcp_syncache			VNET(tcp_syncache)
 
 static SYSCTL_NODE(_net_inet_tcp, OID_AUTO, syncache, CTLFLAG_RW, 0,
     "TCP SYN cache");
 
 SYSCTL_UINT(_net_inet_tcp_syncache, OID_AUTO, bucketlimit, CTLFLAG_VNET | CTLFLAG_RDTUN,
     &VNET_NAME(tcp_syncache.bucket_limit), 0,
     "Per-bucket hash limit for syncache");
 
 SYSCTL_UINT(_net_inet_tcp_syncache, OID_AUTO, cachelimit, CTLFLAG_VNET | CTLFLAG_RDTUN,
     &VNET_NAME(tcp_syncache.cache_limit), 0,
     "Overall entry limit for syncache");
 
 SYSCTL_UMA_CUR(_net_inet_tcp_syncache, OID_AUTO, count, CTLFLAG_VNET,
     &VNET_NAME(tcp_syncache.zone), "Current number of entries in syncache");
 
 SYSCTL_UINT(_net_inet_tcp_syncache, OID_AUTO, hashsize, CTLFLAG_VNET | CTLFLAG_RDTUN,
     &VNET_NAME(tcp_syncache.hashsize), 0,
     "Size of TCP syncache hashtable");
 
 SYSCTL_UINT(_net_inet_tcp_syncache, OID_AUTO, rexmtlimit, CTLFLAG_VNET | CTLFLAG_RW,
     &VNET_NAME(tcp_syncache.rexmt_limit), 0,
     "Limit on SYN/ACK retransmissions");
 
 VNET_DEFINE(int, tcp_sc_rst_sock_fail) = 1;
 SYSCTL_INT(_net_inet_tcp_syncache, OID_AUTO, rst_on_sock_fail,
     CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(tcp_sc_rst_sock_fail), 0,
     "Send reset on socket allocation failure");
 
 static MALLOC_DEFINE(M_SYNCACHE, "syncache", "TCP syncache");
 
 #define	SCH_LOCK(sch)		mtx_lock(&(sch)->sch_mtx)
 #define	SCH_UNLOCK(sch)		mtx_unlock(&(sch)->sch_mtx)
 #define	SCH_LOCK_ASSERT(sch)	mtx_assert(&(sch)->sch_mtx, MA_OWNED)
 
 /*
  * Requires the syncache entry to be already removed from the bucket list.
  */
 static void
 syncache_free(struct syncache *sc)
 {
 
 	if (sc->sc_ipopts)
 		(void) m_free(sc->sc_ipopts);
 	if (sc->sc_cred)
 		crfree(sc->sc_cred);
 #ifdef MAC
 	mac_syncache_destroy(&sc->sc_label);
 #endif
 
 	uma_zfree(V_tcp_syncache.zone, sc);
 }
 
 void
 syncache_init(void)
 {
 	int i;
 
 	V_tcp_syncache.hashsize = TCP_SYNCACHE_HASHSIZE;
 	V_tcp_syncache.bucket_limit = TCP_SYNCACHE_BUCKETLIMIT;
 	V_tcp_syncache.rexmt_limit = SYNCACHE_MAXREXMTS;
 	V_tcp_syncache.hash_secret = arc4random();
 
 	TUNABLE_INT_FETCH("net.inet.tcp.syncache.hashsize",
 	    &V_tcp_syncache.hashsize);
 	TUNABLE_INT_FETCH("net.inet.tcp.syncache.bucketlimit",
 	    &V_tcp_syncache.bucket_limit);
 	if (!powerof2(V_tcp_syncache.hashsize) ||
 	    V_tcp_syncache.hashsize == 0) {
 		printf("WARNING: syncache hash size is not a power of 2.\n");
 		V_tcp_syncache.hashsize = TCP_SYNCACHE_HASHSIZE;
 	}
 	V_tcp_syncache.hashmask = V_tcp_syncache.hashsize - 1;
 
 	/* Set limits. */
 	V_tcp_syncache.cache_limit =
 	    V_tcp_syncache.hashsize * V_tcp_syncache.bucket_limit;
 	TUNABLE_INT_FETCH("net.inet.tcp.syncache.cachelimit",
 	    &V_tcp_syncache.cache_limit);
 
 	/* Allocate the hash table. */
 	V_tcp_syncache.hashbase = malloc(V_tcp_syncache.hashsize *
 	    sizeof(struct syncache_head), M_SYNCACHE, M_WAITOK | M_ZERO);
 
 #ifdef VIMAGE
 	V_tcp_syncache.vnet = curvnet;
 #endif
 
 	/* Initialize the hash buckets. */
 	for (i = 0; i < V_tcp_syncache.hashsize; i++) {
 		TAILQ_INIT(&V_tcp_syncache.hashbase[i].sch_bucket);
 		mtx_init(&V_tcp_syncache.hashbase[i].sch_mtx, "tcp_sc_head",
 			 NULL, MTX_DEF);
 		callout_init_mtx(&V_tcp_syncache.hashbase[i].sch_timer,
 			 &V_tcp_syncache.hashbase[i].sch_mtx, 0);
 		V_tcp_syncache.hashbase[i].sch_length = 0;
 		V_tcp_syncache.hashbase[i].sch_sc = &V_tcp_syncache;
 		V_tcp_syncache.hashbase[i].sch_last_overflow =
 		    -(SYNCOOKIE_LIFETIME + 1);
 	}
 
 	/* Create the syncache entry zone. */
 	V_tcp_syncache.zone = uma_zcreate("syncache", sizeof(struct syncache),
 	    NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, 0);
 	V_tcp_syncache.cache_limit = uma_zone_set_max(V_tcp_syncache.zone,
 	    V_tcp_syncache.cache_limit);
 
 	/* Start the SYN cookie reseeder callout. */
 	callout_init(&V_tcp_syncache.secret.reseed, 1);
 	arc4rand(V_tcp_syncache.secret.key[0], SYNCOOKIE_SECRET_SIZE, 0);
 	arc4rand(V_tcp_syncache.secret.key[1], SYNCOOKIE_SECRET_SIZE, 0);
 	callout_reset(&V_tcp_syncache.secret.reseed, SYNCOOKIE_LIFETIME * hz,
 	    syncookie_reseed, &V_tcp_syncache);
 }
 
 #ifdef VIMAGE
 void
 syncache_destroy(void)
 {
 	struct syncache_head *sch;
 	struct syncache *sc, *nsc;
 	int i;
 
 	/*
 	 * Stop the re-seed timer before freeing resources.  No need to
 	 * possibly schedule it another time.
 	 */
 	callout_drain(&V_tcp_syncache.secret.reseed);
 
 	/* Cleanup hash buckets: stop timers, free entries, destroy locks. */
 	for (i = 0; i < V_tcp_syncache.hashsize; i++) {
 
 		sch = &V_tcp_syncache.hashbase[i];
 		callout_drain(&sch->sch_timer);
 
 		SCH_LOCK(sch);
 		TAILQ_FOREACH_SAFE(sc, &sch->sch_bucket, sc_hash, nsc)
 			syncache_drop(sc, sch);
 		SCH_UNLOCK(sch);
 		KASSERT(TAILQ_EMPTY(&sch->sch_bucket),
 		    ("%s: sch->sch_bucket not empty", __func__));
 		KASSERT(sch->sch_length == 0, ("%s: sch->sch_length %d not 0",
 		    __func__, sch->sch_length));
 		mtx_destroy(&sch->sch_mtx);
 	}
 
 	KASSERT(uma_zone_get_cur(V_tcp_syncache.zone) == 0,
 	    ("%s: cache_count not 0", __func__));
 
 	/* Free the allocated global resources. */
 	uma_zdestroy(V_tcp_syncache.zone);
 	free(V_tcp_syncache.hashbase, M_SYNCACHE);
 }
 #endif
 
 /*
  * Inserts a syncache entry into the specified bucket row.
  * Locks and unlocks the syncache_head autonomously.
  */
 static void
 syncache_insert(struct syncache *sc, struct syncache_head *sch)
 {
 	struct syncache *sc2;
 
 	SCH_LOCK(sch);
 
 	/*
 	 * Make sure that we don't overflow the per-bucket limit.
 	 * If the bucket is full, toss the oldest element.
 	 */
 	if (sch->sch_length >= V_tcp_syncache.bucket_limit) {
 		KASSERT(!TAILQ_EMPTY(&sch->sch_bucket),
 			("sch->sch_length incorrect"));
 		sc2 = TAILQ_LAST(&sch->sch_bucket, sch_head);
 		sch->sch_last_overflow = time_uptime;
 		syncache_drop(sc2, sch);
 		TCPSTAT_INC(tcps_sc_bucketoverflow);
 	}
 
 	/* Put it into the bucket. */
 	TAILQ_INSERT_HEAD(&sch->sch_bucket, sc, sc_hash);
 	sch->sch_length++;
 
 #ifdef TCP_OFFLOAD
 	if (ADDED_BY_TOE(sc)) {
 		struct toedev *tod = sc->sc_tod;
 
 		tod->tod_syncache_added(tod, sc->sc_todctx);
 	}
 #endif
 
 	/* Reinitialize the bucket row's timer. */
 	if (sch->sch_length == 1)
 		sch->sch_nextc = ticks + INT_MAX;
 	syncache_timeout(sc, sch, 1);
 
 	SCH_UNLOCK(sch);
 
 	TCPSTATES_INC(TCPS_SYN_RECEIVED);
 	TCPSTAT_INC(tcps_sc_added);
 }
 
 /*
  * Remove and free entry from syncache bucket row.
  * Expects locked syncache head.
  */
 static void
 syncache_drop(struct syncache *sc, struct syncache_head *sch)
 {
 
 	SCH_LOCK_ASSERT(sch);
 
 	TCPSTATES_DEC(TCPS_SYN_RECEIVED);
 	TAILQ_REMOVE(&sch->sch_bucket, sc, sc_hash);
 	sch->sch_length--;
 
 #ifdef TCP_OFFLOAD
 	if (ADDED_BY_TOE(sc)) {
 		struct toedev *tod = sc->sc_tod;
 
 		tod->tod_syncache_removed(tod, sc->sc_todctx);
 	}
 #endif
 
 	syncache_free(sc);
 }
 
 /*
  * Engage/reengage time on bucket row.
  */
 static void
 syncache_timeout(struct syncache *sc, struct syncache_head *sch, int docallout)
 {
 	sc->sc_rxttime = ticks +
 		TCPTV_RTOBASE * (tcp_syn_backoff[sc->sc_rxmits]);
 	sc->sc_rxmits++;
 	if (TSTMP_LT(sc->sc_rxttime, sch->sch_nextc)) {
 		sch->sch_nextc = sc->sc_rxttime;
 		if (docallout)
 			callout_reset(&sch->sch_timer, sch->sch_nextc - ticks,
 			    syncache_timer, (void *)sch);
 	}
 }
 
 /*
  * Walk the timer queues, looking for SYN,ACKs that need to be retransmitted.
  * If we have retransmitted an entry the maximum number of times, expire it.
  * One separate timer for each bucket row.
  */
 static void
 syncache_timer(void *xsch)
 {
 	struct syncache_head *sch = (struct syncache_head *)xsch;
 	struct syncache *sc, *nsc;
 	int tick = ticks;
 	char *s;
 
 	CURVNET_SET(sch->sch_sc->vnet);
 
 	/* NB: syncache_head has already been locked by the callout. */
 	SCH_LOCK_ASSERT(sch);
 
 	/*
 	 * In the following cycle we may remove some entries and/or
 	 * advance some timeouts, so re-initialize the bucket timer.
 	 */
 	sch->sch_nextc = tick + INT_MAX;
 
 	TAILQ_FOREACH_SAFE(sc, &sch->sch_bucket, sc_hash, nsc) {
 		/*
 		 * We do not check if the listen socket still exists
 		 * and accept the case where the listen socket may be
 		 * gone by the time we resend the SYN/ACK.  We do
 		 * not expect this to happens often. If it does,
 		 * then the RST will be sent by the time the remote
 		 * host does the SYN/ACK->ACK.
 		 */
 		if (TSTMP_GT(sc->sc_rxttime, tick)) {
 			if (TSTMP_LT(sc->sc_rxttime, sch->sch_nextc))
 				sch->sch_nextc = sc->sc_rxttime;
 			continue;
 		}
 		if (sc->sc_rxmits > V_tcp_syncache.rexmt_limit) {
 			if ((s = tcp_log_addrs(&sc->sc_inc, NULL, NULL, NULL))) {
 				log(LOG_DEBUG, "%s; %s: Retransmits exhausted, "
 				    "giving up and removing syncache entry\n",
 				    s, __func__);
 				free(s, M_TCPLOG);
 			}
 			syncache_drop(sc, sch);
 			TCPSTAT_INC(tcps_sc_stale);
 			continue;
 		}
 		if ((s = tcp_log_addrs(&sc->sc_inc, NULL, NULL, NULL))) {
 			log(LOG_DEBUG, "%s; %s: Response timeout, "
 			    "retransmitting (%u) SYN|ACK\n",
 			    s, __func__, sc->sc_rxmits);
 			free(s, M_TCPLOG);
 		}
 
 		syncache_respond(sc, sch, 1, NULL);
 		TCPSTAT_INC(tcps_sc_retransmitted);
 		syncache_timeout(sc, sch, 0);
 	}
 	if (!TAILQ_EMPTY(&(sch)->sch_bucket))
 		callout_reset(&(sch)->sch_timer, (sch)->sch_nextc - tick,
 			syncache_timer, (void *)(sch));
 	CURVNET_RESTORE();
 }
 
 /*
  * Find an entry in the syncache.
  * Returns always with locked syncache_head plus a matching entry or NULL.
  */
 static struct syncache *
 syncache_lookup(struct in_conninfo *inc, struct syncache_head **schp)
 {
 	struct syncache *sc;
 	struct syncache_head *sch;
 	uint32_t hash;
 
 	/*
 	 * The hash is built on foreign port + local port + foreign address.
 	 * We rely on the fact that struct in_conninfo starts with 16 bits
 	 * of foreign port, then 16 bits of local port then followed by 128
 	 * bits of foreign address.  In case of IPv4 address, the first 3
 	 * 32-bit words of the address always are zeroes.
 	 */
 	hash = jenkins_hash32((uint32_t *)&inc->inc_ie, 5,
 	    V_tcp_syncache.hash_secret) & V_tcp_syncache.hashmask;
 
 	sch = &V_tcp_syncache.hashbase[hash];
 	*schp = sch;
 	SCH_LOCK(sch);
 
 	/* Circle through bucket row to find matching entry. */
 	TAILQ_FOREACH(sc, &sch->sch_bucket, sc_hash)
 		if (bcmp(&inc->inc_ie, &sc->sc_inc.inc_ie,
 		    sizeof(struct in_endpoints)) == 0)
 			break;
 
 	return (sc);	/* Always returns with locked sch. */
 }
 
 /*
  * This function is called when we get a RST for a
  * non-existent connection, so that we can see if the
  * connection is in the syn cache.  If it is, zap it.
  */
 void
 syncache_chkrst(struct in_conninfo *inc, struct tcphdr *th)
 {
 	struct syncache *sc;
 	struct syncache_head *sch;
 	char *s = NULL;
 
 	sc = syncache_lookup(inc, &sch);	/* returns locked sch */
 	SCH_LOCK_ASSERT(sch);
 
 	/*
 	 * Any RST to our SYN|ACK must not carry ACK, SYN or FIN flags.
 	 * See RFC 793 page 65, section SEGMENT ARRIVES.
 	 */
 	if (th->th_flags & (TH_ACK|TH_SYN|TH_FIN)) {
 		if ((s = tcp_log_addrs(inc, th, NULL, NULL)))
 			log(LOG_DEBUG, "%s; %s: Spurious RST with ACK, SYN or "
 			    "FIN flag set, segment ignored\n", s, __func__);
 		TCPSTAT_INC(tcps_badrst);
 		goto done;
 	}
 
 	/*
 	 * No corresponding connection was found in syncache.
 	 * If syncookies are enabled and possibly exclusively
 	 * used, or we are under memory pressure, a valid RST
 	 * may not find a syncache entry.  In that case we're
 	 * done and no SYN|ACK retransmissions will happen.
 	 * Otherwise the RST was misdirected or spoofed.
 	 */
 	if (sc == NULL) {
 		if ((s = tcp_log_addrs(inc, th, NULL, NULL)))
 			log(LOG_DEBUG, "%s; %s: Spurious RST without matching "
 			    "syncache entry (possibly syncookie only), "
 			    "segment ignored\n", s, __func__);
 		TCPSTAT_INC(tcps_badrst);
 		goto done;
 	}
 
 	/*
 	 * If the RST bit is set, check the sequence number to see
 	 * if this is a valid reset segment.
 	 * RFC 793 page 37:
 	 *   In all states except SYN-SENT, all reset (RST) segments
 	 *   are validated by checking their SEQ-fields.  A reset is
 	 *   valid if its sequence number is in the window.
 	 *
 	 *   The sequence number in the reset segment is normally an
 	 *   echo of our outgoing acknowlegement numbers, but some hosts
 	 *   send a reset with the sequence number at the rightmost edge
 	 *   of our receive window, and we have to handle this case.
 	 */
 	if (SEQ_GEQ(th->th_seq, sc->sc_irs) &&
 	    SEQ_LEQ(th->th_seq, sc->sc_irs + sc->sc_wnd)) {
 		syncache_drop(sc, sch);
 		if ((s = tcp_log_addrs(inc, th, NULL, NULL)))
 			log(LOG_DEBUG, "%s; %s: Our SYN|ACK was rejected, "
 			    "connection attempt aborted by remote endpoint\n",
 			    s, __func__);
 		TCPSTAT_INC(tcps_sc_reset);
 	} else {
 		if ((s = tcp_log_addrs(inc, th, NULL, NULL)))
 			log(LOG_DEBUG, "%s; %s: RST with invalid SEQ %u != "
 			    "IRS %u (+WND %u), segment ignored\n",
 			    s, __func__, th->th_seq, sc->sc_irs, sc->sc_wnd);
 		TCPSTAT_INC(tcps_badrst);
 	}
 
 done:
 	if (s != NULL)
 		free(s, M_TCPLOG);
 	SCH_UNLOCK(sch);
 }
 
 void
 syncache_badack(struct in_conninfo *inc)
 {
 	struct syncache *sc;
 	struct syncache_head *sch;
 
 	sc = syncache_lookup(inc, &sch);	/* returns locked sch */
 	SCH_LOCK_ASSERT(sch);
 	if (sc != NULL) {
 		syncache_drop(sc, sch);
 		TCPSTAT_INC(tcps_sc_badack);
 	}
 	SCH_UNLOCK(sch);
 }
 
 void
 syncache_unreach(struct in_conninfo *inc, tcp_seq th_seq)
 {
 	struct syncache *sc;
 	struct syncache_head *sch;
 
 	sc = syncache_lookup(inc, &sch);	/* returns locked sch */
 	SCH_LOCK_ASSERT(sch);
 	if (sc == NULL)
 		goto done;
 
 	/* If the sequence number != sc_iss, then it's a bogus ICMP msg */
 	if (ntohl(th_seq) != sc->sc_iss)
 		goto done;
 
 	/*
 	 * If we've rertransmitted 3 times and this is our second error,
 	 * we remove the entry.  Otherwise, we allow it to continue on.
 	 * This prevents us from incorrectly nuking an entry during a
 	 * spurious network outage.
 	 *
 	 * See tcp_notify().
 	 */
 	if ((sc->sc_flags & SCF_UNREACH) == 0 || sc->sc_rxmits < 3 + 1) {
 		sc->sc_flags |= SCF_UNREACH;
 		goto done;
 	}
 	syncache_drop(sc, sch);
 	TCPSTAT_INC(tcps_sc_unreach);
 done:
 	SCH_UNLOCK(sch);
 }
 
 /*
  * Build a new TCP socket structure from a syncache entry.
  *
  * On success return the newly created socket with its underlying inp locked.
  */
 static struct socket *
 syncache_socket(struct syncache *sc, struct socket *lso, struct mbuf *m)
 {
 	struct tcp_function_block *blk;
 	struct inpcb *inp = NULL;
 	struct socket *so;
 	struct tcpcb *tp;
 	int error;
 	char *s;
 
 	INP_INFO_RLOCK_ASSERT(&V_tcbinfo);
 
 	/*
 	 * Ok, create the full blown connection, and set things up
 	 * as they would have been set up if we had created the
 	 * connection when the SYN arrived.  If we can't create
 	 * the connection, abort it.
 	 */
 	so = sonewconn(lso, 0);
 	if (so == NULL) {
 		/*
 		 * Drop the connection; we will either send a RST or
 		 * have the peer retransmit its SYN again after its
 		 * RTO and try again.
 		 */
 		TCPSTAT_INC(tcps_listendrop);
 		if ((s = tcp_log_addrs(&sc->sc_inc, NULL, NULL, NULL))) {
 			log(LOG_DEBUG, "%s; %s: Socket create failed "
 			    "due to limits or memory shortage\n",
 			    s, __func__);
 			free(s, M_TCPLOG);
 		}
 		goto abort2;
 	}
 #ifdef MAC
 	mac_socketpeer_set_from_mbuf(m, so);
 #endif
 
 	inp = sotoinpcb(so);
 	inp->inp_inc.inc_fibnum = so->so_fibnum;
 	INP_WLOCK(inp);
 	/*
 	 * Exclusive pcbinfo lock is not required in syncache socket case even
 	 * if two inpcb locks can be acquired simultaneously:
 	 *  - the inpcb in LISTEN state,
 	 *  - the newly created inp.
 	 *
 	 * In this case, an inp cannot be at same time in LISTEN state and
 	 * just created by an accept() call.
 	 */
 	INP_HASH_WLOCK(&V_tcbinfo);
 
 	/* Insert new socket into PCB hash list. */
 	inp->inp_inc.inc_flags = sc->sc_inc.inc_flags;
 #ifdef INET6
 	if (sc->sc_inc.inc_flags & INC_ISIPV6) {
 		inp->in6p_laddr = sc->sc_inc.inc6_laddr;
 	} else {
 		inp->inp_vflag &= ~INP_IPV6;
 		inp->inp_vflag |= INP_IPV4;
 #endif
 		inp->inp_laddr = sc->sc_inc.inc_laddr;
 #ifdef INET6
 	}
 #endif
 
 	/*
 	 * If there's an mbuf and it has a flowid, then let's initialise the
 	 * inp with that particular flowid.
 	 */
 	if (m != NULL && M_HASHTYPE_GET(m) != M_HASHTYPE_NONE) {
 		inp->inp_flowid = m->m_pkthdr.flowid;
 		inp->inp_flowtype = M_HASHTYPE_GET(m);
 	}
 
 	/*
 	 * Install in the reservation hash table for now, but don't yet
 	 * install a connection group since the full 4-tuple isn't yet
 	 * configured.
 	 */
 	inp->inp_lport = sc->sc_inc.inc_lport;
 	if ((error = in_pcbinshash_nopcbgroup(inp)) != 0) {
 		/*
 		 * Undo the assignments above if we failed to
 		 * put the PCB on the hash lists.
 		 */
 #ifdef INET6
 		if (sc->sc_inc.inc_flags & INC_ISIPV6)
 			inp->in6p_laddr = in6addr_any;
 		else
 #endif
 			inp->inp_laddr.s_addr = INADDR_ANY;
 		inp->inp_lport = 0;
 		if ((s = tcp_log_addrs(&sc->sc_inc, NULL, NULL, NULL))) {
 			log(LOG_DEBUG, "%s; %s: in_pcbinshash failed "
 			    "with error %i\n",
 			    s, __func__, error);
 			free(s, M_TCPLOG);
 		}
 		INP_HASH_WUNLOCK(&V_tcbinfo);
 		goto abort;
 	}
 #ifdef INET6
 	if (sc->sc_inc.inc_flags & INC_ISIPV6) {
 		struct inpcb *oinp = sotoinpcb(lso);
 		struct in6_addr laddr6;
 		struct sockaddr_in6 sin6;
 		/*
 		 * Inherit socket options from the listening socket.
 		 * Note that in6p_inputopts are not (and should not be)
 		 * copied, since it stores previously received options and is
 		 * used to detect if each new option is different than the
 		 * previous one and hence should be passed to a user.
 		 * If we copied in6p_inputopts, a user would not be able to
 		 * receive options just after calling the accept system call.
 		 */
 		inp->inp_flags |= oinp->inp_flags & INP_CONTROLOPTS;
 		if (oinp->in6p_outputopts)
 			inp->in6p_outputopts =
 			    ip6_copypktopts(oinp->in6p_outputopts, M_NOWAIT);
 
 		sin6.sin6_family = AF_INET6;
 		sin6.sin6_len = sizeof(sin6);
 		sin6.sin6_addr = sc->sc_inc.inc6_faddr;
 		sin6.sin6_port = sc->sc_inc.inc_fport;
 		sin6.sin6_flowinfo = sin6.sin6_scope_id = 0;
 		laddr6 = inp->in6p_laddr;
 		if (IN6_IS_ADDR_UNSPECIFIED(&inp->in6p_laddr))
 			inp->in6p_laddr = sc->sc_inc.inc6_laddr;
 		if ((error = in6_pcbconnect_mbuf(inp, (struct sockaddr *)&sin6,
 		    thread0.td_ucred, m)) != 0) {
 			inp->in6p_laddr = laddr6;
 			if ((s = tcp_log_addrs(&sc->sc_inc, NULL, NULL, NULL))) {
 				log(LOG_DEBUG, "%s; %s: in6_pcbconnect failed "
 				    "with error %i\n",
 				    s, __func__, error);
 				free(s, M_TCPLOG);
 			}
 			INP_HASH_WUNLOCK(&V_tcbinfo);
 			goto abort;
 		}
 		/* Override flowlabel from in6_pcbconnect. */
 		inp->inp_flow &= ~IPV6_FLOWLABEL_MASK;
 		inp->inp_flow |= sc->sc_flowlabel;
 	}
 #endif /* INET6 */
 #if defined(INET) && defined(INET6)
 	else
 #endif
 #ifdef INET
 	{
 		struct in_addr laddr;
 		struct sockaddr_in sin;
 
 		inp->inp_options = (m) ? ip_srcroute(m) : NULL;
 		
 		if (inp->inp_options == NULL) {
 			inp->inp_options = sc->sc_ipopts;
 			sc->sc_ipopts = NULL;
 		}
 
 		sin.sin_family = AF_INET;
 		sin.sin_len = sizeof(sin);
 		sin.sin_addr = sc->sc_inc.inc_faddr;
 		sin.sin_port = sc->sc_inc.inc_fport;
 		bzero((caddr_t)sin.sin_zero, sizeof(sin.sin_zero));
 		laddr = inp->inp_laddr;
 		if (inp->inp_laddr.s_addr == INADDR_ANY)
 			inp->inp_laddr = sc->sc_inc.inc_laddr;
 		if ((error = in_pcbconnect_mbuf(inp, (struct sockaddr *)&sin,
 		    thread0.td_ucred, m)) != 0) {
 			inp->inp_laddr = laddr;
 			if ((s = tcp_log_addrs(&sc->sc_inc, NULL, NULL, NULL))) {
 				log(LOG_DEBUG, "%s; %s: in_pcbconnect failed "
 				    "with error %i\n",
 				    s, __func__, error);
 				free(s, M_TCPLOG);
 			}
 			INP_HASH_WUNLOCK(&V_tcbinfo);
 			goto abort;
 		}
 	}
 #endif /* INET */
 #if defined(IPSEC) || defined(IPSEC_SUPPORT)
 	/* Copy old policy into new socket's. */
 	if (ipsec_copy_pcbpolicy(sotoinpcb(lso), inp) != 0)
 		printf("syncache_socket: could not copy policy\n");
 #endif
 	INP_HASH_WUNLOCK(&V_tcbinfo);
 	tp = intotcpcb(inp);
 	tcp_state_change(tp, TCPS_SYN_RECEIVED);
 	tp->iss = sc->sc_iss;
 	tp->irs = sc->sc_irs;
 	tcp_rcvseqinit(tp);
 	tcp_sendseqinit(tp);
 	blk = sototcpcb(lso)->t_fb;
 	if (V_functions_inherit_listen_socket_stack && blk != tp->t_fb) {
 		/*
 		 * Our parents t_fb was not the default,
 		 * we need to release our ref on tp->t_fb and 
 		 * pickup one on the new entry.
 		 */
 		struct tcp_function_block *rblk;
 		
 		rblk = find_and_ref_tcp_fb(blk);
 		KASSERT(rblk != NULL,
 		    ("cannot find blk %p out of syncache?", blk));
 		if (tp->t_fb->tfb_tcp_fb_fini)
 			(*tp->t_fb->tfb_tcp_fb_fini)(tp, 0);
 		refcount_release(&tp->t_fb->tfb_refcnt);
 		tp->t_fb = rblk;
 		if (tp->t_fb->tfb_tcp_fb_init) {
 			(*tp->t_fb->tfb_tcp_fb_init)(tp);
 		}
 	}		
 	tp->snd_wl1 = sc->sc_irs;
 	tp->snd_max = tp->iss + 1;
 	tp->snd_nxt = tp->iss + 1;
 	tp->rcv_up = sc->sc_irs + 1;
 	tp->rcv_wnd = sc->sc_wnd;
 	tp->rcv_adv += tp->rcv_wnd;
 	tp->last_ack_sent = tp->rcv_nxt;
 
 	tp->t_flags = sototcpcb(lso)->t_flags & (TF_NOPUSH|TF_NODELAY);
 	if (sc->sc_flags & SCF_NOOPT)
 		tp->t_flags |= TF_NOOPT;
 	else {
 		if (sc->sc_flags & SCF_WINSCALE) {
 			tp->t_flags |= TF_REQ_SCALE|TF_RCVD_SCALE;
 			tp->snd_scale = sc->sc_requested_s_scale;
 			tp->request_r_scale = sc->sc_requested_r_scale;
 		}
 		if (sc->sc_flags & SCF_TIMESTAMP) {
 			tp->t_flags |= TF_REQ_TSTMP|TF_RCVD_TSTMP;
 			tp->ts_recent = sc->sc_tsreflect;
 			tp->ts_recent_age = tcp_ts_getticks();
 			tp->ts_offset = sc->sc_tsoff;
 		}
 #if defined(IPSEC_SUPPORT) || defined(TCP_SIGNATURE)
 		if (sc->sc_flags & SCF_SIGNATURE)
 			tp->t_flags |= TF_SIGNATURE;
 #endif
 		if (sc->sc_flags & SCF_SACK)
 			tp->t_flags |= TF_SACK_PERMIT;
 	}
 
 	if (sc->sc_flags & SCF_ECN)
 		tp->t_flags |= TF_ECN_PERMIT;
 
 	/*
 	 * Set up MSS and get cached values from tcp_hostcache.
 	 * This might overwrite some of the defaults we just set.
 	 */
 	tcp_mss(tp, sc->sc_peer_mss);
 
 	/*
 	 * If the SYN,ACK was retransmitted, indicate that CWND to be
 	 * limited to one segment in cc_conn_init().
 	 * NB: sc_rxmits counts all SYN,ACK transmits, not just retransmits.
 	 */
 	if (sc->sc_rxmits > 1)
 		tp->snd_cwnd = 1;
 
 #ifdef TCP_OFFLOAD
 	/*
 	 * Allow a TOE driver to install its hooks.  Note that we hold the
 	 * pcbinfo lock too and that prevents tcp_usr_accept from accepting a
 	 * new connection before the TOE driver has done its thing.
 	 */
 	if (ADDED_BY_TOE(sc)) {
 		struct toedev *tod = sc->sc_tod;
 
 		tod->tod_offload_socket(tod, sc->sc_todctx, so);
 	}
 #endif
 	/*
 	 * Copy and activate timers.
 	 */
 	tp->t_keepinit = sototcpcb(lso)->t_keepinit;
 	tp->t_keepidle = sototcpcb(lso)->t_keepidle;
 	tp->t_keepintvl = sototcpcb(lso)->t_keepintvl;
 	tp->t_keepcnt = sototcpcb(lso)->t_keepcnt;
 	tcp_timer_activate(tp, TT_KEEP, TP_KEEPINIT(tp));
 
 	TCPSTAT_INC(tcps_accepts);
 	return (so);
 
 abort:
 	INP_WUNLOCK(inp);
 abort2:
 	if (so != NULL)
 		soabort(so);
 	return (NULL);
 }
 
 /*
  * This function gets called when we receive an ACK for a
  * socket in the LISTEN state.  We look up the connection
  * in the syncache, and if its there, we pull it out of
  * the cache and turn it into a full-blown connection in
  * the SYN-RECEIVED state.
  *
  * On syncache_socket() success the newly created socket
  * has its underlying inp locked.
  */
 int
 syncache_expand(struct in_conninfo *inc, struct tcpopt *to, struct tcphdr *th,
     struct socket **lsop, struct mbuf *m)
 {
 	struct syncache *sc;
 	struct syncache_head *sch;
 	struct syncache scs;
 	char *s;
 
 	/*
 	 * Global TCP locks are held because we manipulate the PCB lists
 	 * and create a new socket.
 	 */
 	INP_INFO_RLOCK_ASSERT(&V_tcbinfo);
 	KASSERT((th->th_flags & (TH_RST|TH_ACK|TH_SYN)) == TH_ACK,
 	    ("%s: can handle only ACK", __func__));
 
 	sc = syncache_lookup(inc, &sch);	/* returns locked sch */
 	SCH_LOCK_ASSERT(sch);
 
 #ifdef INVARIANTS
 	/*
 	 * Test code for syncookies comparing the syncache stored
 	 * values with the reconstructed values from the cookie.
 	 */
 	if (sc != NULL)
 		syncookie_cmp(inc, sch, sc, th, to, *lsop);
 #endif
 
 	if (sc == NULL) {
 		/*
 		 * There is no syncache entry, so see if this ACK is
 		 * a returning syncookie.  To do this, first:
 		 *  A. Check if syncookies are used in case of syncache
 		 *     overflows
 		 *  B. See if this socket has had a syncache entry dropped in
 		 *     the recent past. We don't want to accept a bogus
 		 *     syncookie if we've never received a SYN or accept it
 		 *     twice.
 		 *  C. check that the syncookie is valid.  If it is, then
 		 *     cobble up a fake syncache entry, and return.
 		 */
 		if (!V_tcp_syncookies) {
 			SCH_UNLOCK(sch);
 			if ((s = tcp_log_addrs(inc, th, NULL, NULL)))
 				log(LOG_DEBUG, "%s; %s: Spurious ACK, "
 				    "segment rejected (syncookies disabled)\n",
 				    s, __func__);
 			goto failed;
 		}
 		if (!V_tcp_syncookiesonly &&
 		    sch->sch_last_overflow < time_uptime - SYNCOOKIE_LIFETIME) {
 			SCH_UNLOCK(sch);
 			if ((s = tcp_log_addrs(inc, th, NULL, NULL)))
 				log(LOG_DEBUG, "%s; %s: Spurious ACK, "
 				    "segment rejected (no syncache entry)\n",
 				    s, __func__);
 			goto failed;
 		}
 		bzero(&scs, sizeof(scs));
 		sc = syncookie_lookup(inc, sch, &scs, th, to, *lsop);
 		SCH_UNLOCK(sch);
 		if (sc == NULL) {
 			if ((s = tcp_log_addrs(inc, th, NULL, NULL)))
 				log(LOG_DEBUG, "%s; %s: Segment failed "
 				    "SYNCOOKIE authentication, segment rejected "
 				    "(probably spoofed)\n", s, __func__);
 			goto failed;
 		}
 #if defined(IPSEC_SUPPORT) || defined(TCP_SIGNATURE)
 		/* If received ACK has MD5 signature, check it. */
 		if ((to->to_flags & TOF_SIGNATURE) != 0 &&
 		    (!TCPMD5_ENABLED() ||
 		    TCPMD5_INPUT(m, th, to->to_signature) != 0)) {
 			/* Drop the ACK. */
 			if ((s = tcp_log_addrs(inc, th, NULL, NULL))) {
 				log(LOG_DEBUG, "%s; %s: Segment rejected, "
 				    "MD5 signature doesn't match.\n",
 				    s, __func__);
 				free(s, M_TCPLOG);
 			}
 			TCPSTAT_INC(tcps_sig_err_sigopt);
 			return (-1); /* Do not send RST */
 		}
 #endif /* TCP_SIGNATURE */
 	} else {
 #if defined(IPSEC_SUPPORT) || defined(TCP_SIGNATURE)
 		/*
 		 * If listening socket requested TCP digests, check that
 		 * received ACK has signature and it is correct.
 		 * If not, drop the ACK and leave sc entry in th cache,
 		 * because SYN was received with correct signature.
 		 */
 		if (sc->sc_flags & SCF_SIGNATURE) {
 			if ((to->to_flags & TOF_SIGNATURE) == 0) {
 				/* No signature */
 				TCPSTAT_INC(tcps_sig_err_nosigopt);
 				SCH_UNLOCK(sch);
 				if ((s = tcp_log_addrs(inc, th, NULL, NULL))) {
 					log(LOG_DEBUG, "%s; %s: Segment "
 					    "rejected, MD5 signature wasn't "
 					    "provided.\n", s, __func__);
 					free(s, M_TCPLOG);
 				}
 				return (-1); /* Do not send RST */
 			}
 			if (!TCPMD5_ENABLED() ||
 			    TCPMD5_INPUT(m, th, to->to_signature) != 0) {
 				/* Doesn't match or no SA */
 				SCH_UNLOCK(sch);
 				if ((s = tcp_log_addrs(inc, th, NULL, NULL))) {
 					log(LOG_DEBUG, "%s; %s: Segment "
 					    "rejected, MD5 signature doesn't "
 					    "match.\n", s, __func__);
 					free(s, M_TCPLOG);
 				}
 				return (-1); /* Do not send RST */
 			}
 		}
 #endif /* TCP_SIGNATURE */
 		/*
 		 * Pull out the entry to unlock the bucket row.
 		 * 
 		 * NOTE: We must decrease TCPS_SYN_RECEIVED count here, not
 		 * tcp_state_change().  The tcpcb is not existent at this
 		 * moment.  A new one will be allocated via syncache_socket->
 		 * sonewconn->tcp_usr_attach in TCPS_CLOSED state, then
 		 * syncache_socket() will change it to TCPS_SYN_RECEIVED.
 		 */
 		TCPSTATES_DEC(TCPS_SYN_RECEIVED);
 		TAILQ_REMOVE(&sch->sch_bucket, sc, sc_hash);
 		sch->sch_length--;
 #ifdef TCP_OFFLOAD
 		if (ADDED_BY_TOE(sc)) {
 			struct toedev *tod = sc->sc_tod;
 
 			tod->tod_syncache_removed(tod, sc->sc_todctx);
 		}
 #endif
 		SCH_UNLOCK(sch);
 	}
 
 	/*
 	 * Segment validation:
 	 * ACK must match our initial sequence number + 1 (the SYN|ACK).
 	 */
 	if (th->th_ack != sc->sc_iss + 1) {
 		if ((s = tcp_log_addrs(inc, th, NULL, NULL)))
 			log(LOG_DEBUG, "%s; %s: ACK %u != ISS+1 %u, segment "
 			    "rejected\n", s, __func__, th->th_ack, sc->sc_iss);
 		goto failed;
 	}
 
 	/*
 	 * The SEQ must fall in the window starting at the received
 	 * initial receive sequence number + 1 (the SYN).
 	 */
 	if (SEQ_LEQ(th->th_seq, sc->sc_irs) ||
 	    SEQ_GT(th->th_seq, sc->sc_irs + sc->sc_wnd)) {
 		if ((s = tcp_log_addrs(inc, th, NULL, NULL)))
 			log(LOG_DEBUG, "%s; %s: SEQ %u != IRS+1 %u, segment "
 			    "rejected\n", s, __func__, th->th_seq, sc->sc_irs);
 		goto failed;
 	}
 
 	/*
 	 * If timestamps were not negotiated during SYN/ACK they
 	 * must not appear on any segment during this session.
 	 */
 	if (!(sc->sc_flags & SCF_TIMESTAMP) && (to->to_flags & TOF_TS)) {
 		if ((s = tcp_log_addrs(inc, th, NULL, NULL)))
 			log(LOG_DEBUG, "%s; %s: Timestamp not expected, "
 			    "segment rejected\n", s, __func__);
 		goto failed;
 	}
 
 	/*
 	 * If timestamps were negotiated during SYN/ACK they should
 	 * appear on every segment during this session.
 	 * XXXAO: This is only informal as there have been unverified
 	 * reports of non-compliants stacks.
 	 */
 	if ((sc->sc_flags & SCF_TIMESTAMP) && !(to->to_flags & TOF_TS)) {
 		if ((s = tcp_log_addrs(inc, th, NULL, NULL))) {
 			log(LOG_DEBUG, "%s; %s: Timestamp missing, "
 			    "no action\n", s, __func__);
 			free(s, M_TCPLOG);
 			s = NULL;
 		}
 	}
 
 	/*
 	 * If timestamps were negotiated, the reflected timestamp
 	 * must be equal to what we actually sent in the SYN|ACK
 	 * except in the case of 0. Some boxes are known for sending
 	 * broken timestamp replies during the 3whs (and potentially
 	 * during the connection also).
 	 *
 	 * Accept the final ACK of 3whs with reflected timestamp of 0
 	 * instead of sending a RST and deleting the syncache entry.
 	 */
 	if ((to->to_flags & TOF_TS) && to->to_tsecr &&
 	    to->to_tsecr != sc->sc_ts) {
 		if ((s = tcp_log_addrs(inc, th, NULL, NULL)))
 			log(LOG_DEBUG, "%s; %s: TSECR %u != TS %u, "
 			    "segment rejected\n",
 			    s, __func__, to->to_tsecr, sc->sc_ts);
 		goto failed;
 	}
 
 	*lsop = syncache_socket(sc, *lsop, m);
 
 	if (*lsop == NULL)
 		TCPSTAT_INC(tcps_sc_aborted);
 	else
 		TCPSTAT_INC(tcps_sc_completed);
 
 /* how do we find the inp for the new socket? */
 	if (sc != &scs)
 		syncache_free(sc);
 	return (1);
 failed:
 	if (sc != NULL && sc != &scs)
 		syncache_free(sc);
 	if (s != NULL)
 		free(s, M_TCPLOG);
 	*lsop = NULL;
 	return (0);
 }
 
 #ifdef TCP_RFC7413
 static void
 syncache_tfo_expand(struct syncache *sc, struct socket **lsop, struct mbuf *m,
     uint64_t response_cookie)
 {
 	struct inpcb *inp;
 	struct tcpcb *tp;
 	unsigned int *pending_counter;
 
 	/*
 	 * Global TCP locks are held because we manipulate the PCB lists
 	 * and create a new socket.
 	 */
 	INP_INFO_RLOCK_ASSERT(&V_tcbinfo);
 
 	pending_counter = intotcpcb(sotoinpcb(*lsop))->t_tfo_pending;
 	*lsop = syncache_socket(sc, *lsop, m);
 	if (*lsop == NULL) {
 		TCPSTAT_INC(tcps_sc_aborted);
 		atomic_subtract_int(pending_counter, 1);
 	} else {
 		inp = sotoinpcb(*lsop);
 		tp = intotcpcb(inp);
 		tp->t_flags |= TF_FASTOPEN;
 		tp->t_tfo_cookie = response_cookie;
 		tp->snd_max = tp->iss;
 		tp->snd_nxt = tp->iss;
 		tp->t_tfo_pending = pending_counter;
 		TCPSTAT_INC(tcps_sc_completed);
 	}
 }
 #endif /* TCP_RFC7413 */
 
 /*
  * Given a LISTEN socket and an inbound SYN request, add
  * this to the syn cache, and send back a segment:
  *	<SEQ=ISS><ACK=RCV_NXT><CTL=SYN,ACK>
  * to the source.
  *
  * IMPORTANT NOTE: We do _NOT_ ACK data that might accompany the SYN.
  * Doing so would require that we hold onto the data and deliver it
  * to the application.  However, if we are the target of a SYN-flood
  * DoS attack, an attacker could send data which would eventually
  * consume all available buffer space if it were ACKed.  By not ACKing
  * the data, we avoid this DoS scenario.
  *
  * The exception to the above is when a SYN with a valid TCP Fast Open (TFO)
  * cookie is processed and a new socket is created.  In this case, any data
  * accompanying the SYN will be queued to the socket by tcp_input() and will
  * be ACKed either when the application sends response data or the delayed
  * ACK timer expires, whichever comes first.
  */
 int
 syncache_add(struct in_conninfo *inc, struct tcpopt *to, struct tcphdr *th,
     struct inpcb *inp, struct socket **lsop, struct mbuf *m, void *tod,
     void *todctx)
 {
 	struct tcpcb *tp;
 	struct socket *so;
 	struct syncache *sc = NULL;
 	struct syncache_head *sch;
 	struct mbuf *ipopts = NULL;
 	u_int ltflags;
 	int win, ip_ttl, ip_tos;
 	char *s;
 	int rv = 0;
 #ifdef INET6
 	int autoflowlabel = 0;
 #endif
 #ifdef MAC
 	struct label *maclabel;
 #endif
 	struct syncache scs;
 	struct ucred *cred;
 #ifdef TCP_RFC7413
 	uint64_t tfo_response_cookie;
 	unsigned int *tfo_pending = NULL;
 	int tfo_cookie_valid = 0;
 	int tfo_response_cookie_valid = 0;
 #endif
 
 	INP_WLOCK_ASSERT(inp);			/* listen socket */
 	KASSERT((th->th_flags & (TH_RST|TH_ACK|TH_SYN)) == TH_SYN,
 	    ("%s: unexpected tcp flags", __func__));
 
 	/*
 	 * Combine all so/tp operations very early to drop the INP lock as
 	 * soon as possible.
 	 */
 	so = *lsop;
+	KASSERT(SOLISTENING(so), ("%s: %p not listening", __func__, so));
 	tp = sototcpcb(so);
 	cred = crhold(so->so_cred);
 
 #ifdef INET6
 	if ((inc->inc_flags & INC_ISIPV6) &&
 	    (inp->inp_flags & IN6P_AUTOFLOWLABEL))
 		autoflowlabel = 1;
 #endif
 	ip_ttl = inp->inp_ip_ttl;
 	ip_tos = inp->inp_ip_tos;
-	win = sbspace(&so->so_rcv);
+	win = so->sol_sbrcv_hiwat;
 	ltflags = (tp->t_flags & (TF_NOOPT | TF_SIGNATURE));
 
 #ifdef TCP_RFC7413
 	if (V_tcp_fastopen_enabled && IS_FASTOPEN(tp->t_flags) &&
 	    (tp->t_tfo_pending != NULL) && (to->to_flags & TOF_FASTOPEN)) {
 		/*
 		 * Limit the number of pending TFO connections to
 		 * approximately half of the queue limit.  This prevents TFO
 		 * SYN floods from starving the service by filling the
 		 * listen queue with bogus TFO connections.
 		 */
 		if (atomic_fetchadd_int(tp->t_tfo_pending, 1) <=
-		    (so->so_qlimit / 2)) {
+		    (so->sol_qlimit / 2)) {
 			int result;
 
 			result = tcp_fastopen_check_cookie(inc,
 			    to->to_tfo_cookie, to->to_tfo_len,
 			    &tfo_response_cookie);
 			tfo_cookie_valid = (result > 0);
 			tfo_response_cookie_valid = (result >= 0);
 		}
 
 		/*
 		 * Remember the TFO pending counter as it will have to be
 		 * decremented below if we don't make it to syncache_tfo_expand().
 		 */
 		tfo_pending = tp->t_tfo_pending;
 	}
 #endif
 
 	/* By the time we drop the lock these should no longer be used. */
 	so = NULL;
 	tp = NULL;
 
 #ifdef MAC
 	if (mac_syncache_init(&maclabel) != 0) {
 		INP_WUNLOCK(inp);
 		goto done;
 	} else
 		mac_syncache_create(maclabel, inp);
 #endif
 #ifdef TCP_RFC7413
 	if (!tfo_cookie_valid)
 #endif
 		INP_WUNLOCK(inp);
 
 	/*
 	 * Remember the IP options, if any.
 	 */
 #ifdef INET6
 	if (!(inc->inc_flags & INC_ISIPV6))
 #endif
 #ifdef INET
 		ipopts = (m) ? ip_srcroute(m) : NULL;
 #else
 		ipopts = NULL;
 #endif
 
 #if defined(IPSEC_SUPPORT) || defined(TCP_SIGNATURE)
 	/*
 	 * If listening socket requested TCP digests, check that received
 	 * SYN has signature and it is correct. If signature doesn't match
 	 * or TCP_SIGNATURE support isn't enabled, drop the packet.
 	 */
 	if (ltflags & TF_SIGNATURE) {
 		if ((to->to_flags & TOF_SIGNATURE) == 0) {
 			TCPSTAT_INC(tcps_sig_err_nosigopt);
 			goto done;
 		}
 		if (!TCPMD5_ENABLED() ||
 		    TCPMD5_INPUT(m, th, to->to_signature) != 0)
 			goto done;
 	}
 #endif	/* TCP_SIGNATURE */
 	/*
 	 * See if we already have an entry for this connection.
 	 * If we do, resend the SYN,ACK, and reset the retransmit timer.
 	 *
 	 * XXX: should the syncache be re-initialized with the contents
 	 * of the new SYN here (which may have different options?)
 	 *
 	 * XXX: We do not check the sequence number to see if this is a
 	 * real retransmit or a new connection attempt.  The question is
 	 * how to handle such a case; either ignore it as spoofed, or
 	 * drop the current entry and create a new one?
 	 */
 	sc = syncache_lookup(inc, &sch);	/* returns locked entry */
 	SCH_LOCK_ASSERT(sch);
 	if (sc != NULL) {
 #ifdef TCP_RFC7413
 		if (tfo_cookie_valid)
 			INP_WUNLOCK(inp);
 #endif
 		TCPSTAT_INC(tcps_sc_dupsyn);
 		if (ipopts) {
 			/*
 			 * If we were remembering a previous source route,
 			 * forget it and use the new one we've been given.
 			 */
 			if (sc->sc_ipopts)
 				(void) m_free(sc->sc_ipopts);
 			sc->sc_ipopts = ipopts;
 		}
 		/*
 		 * Update timestamp if present.
 		 */
 		if ((sc->sc_flags & SCF_TIMESTAMP) && (to->to_flags & TOF_TS))
 			sc->sc_tsreflect = to->to_tsval;
 		else
 			sc->sc_flags &= ~SCF_TIMESTAMP;
 #ifdef MAC
 		/*
 		 * Since we have already unconditionally allocated label
 		 * storage, free it up.  The syncache entry will already
 		 * have an initialized label we can use.
 		 */
 		mac_syncache_destroy(&maclabel);
 #endif
 		/* Retransmit SYN|ACK and reset retransmit count. */
 		if ((s = tcp_log_addrs(&sc->sc_inc, th, NULL, NULL))) {
 			log(LOG_DEBUG, "%s; %s: Received duplicate SYN, "
 			    "resetting timer and retransmitting SYN|ACK\n",
 			    s, __func__);
 			free(s, M_TCPLOG);
 		}
 		if (syncache_respond(sc, sch, 1, m) == 0) {
 			sc->sc_rxmits = 0;
 			syncache_timeout(sc, sch, 1);
 			TCPSTAT_INC(tcps_sndacks);
 			TCPSTAT_INC(tcps_sndtotal);
 		}
 		SCH_UNLOCK(sch);
 		goto done;
 	}
 
 #ifdef TCP_RFC7413
 	if (tfo_cookie_valid) {
 		bzero(&scs, sizeof(scs));
 		sc = &scs;
 		goto skip_alloc;
 	}
 #endif
 
 	sc = uma_zalloc(V_tcp_syncache.zone, M_NOWAIT | M_ZERO);
 	if (sc == NULL) {
 		/*
 		 * The zone allocator couldn't provide more entries.
 		 * Treat this as if the cache was full; drop the oldest
 		 * entry and insert the new one.
 		 */
 		TCPSTAT_INC(tcps_sc_zonefail);
 		if ((sc = TAILQ_LAST(&sch->sch_bucket, sch_head)) != NULL) {
 			sch->sch_last_overflow = time_uptime;
 			syncache_drop(sc, sch);
 		}
 		sc = uma_zalloc(V_tcp_syncache.zone, M_NOWAIT | M_ZERO);
 		if (sc == NULL) {
 			if (V_tcp_syncookies) {
 				bzero(&scs, sizeof(scs));
 				sc = &scs;
 			} else {
 				SCH_UNLOCK(sch);
 				if (ipopts)
 					(void) m_free(ipopts);
 				goto done;
 			}
 		}
 	}
 
 #ifdef TCP_RFC7413
 skip_alloc:
 	if (!tfo_cookie_valid && tfo_response_cookie_valid)
 		sc->sc_tfo_cookie = &tfo_response_cookie;
 #endif
 
 	/*
 	 * Fill in the syncache values.
 	 */
 #ifdef MAC
 	sc->sc_label = maclabel;
 #endif
 	sc->sc_cred = cred;
 	cred = NULL;
 	sc->sc_ipopts = ipopts;
 	bcopy(inc, &sc->sc_inc, sizeof(struct in_conninfo));
 #ifdef INET6
 	if (!(inc->inc_flags & INC_ISIPV6))
 #endif
 	{
 		sc->sc_ip_tos = ip_tos;
 		sc->sc_ip_ttl = ip_ttl;
 	}
 #ifdef TCP_OFFLOAD
 	sc->sc_tod = tod;
 	sc->sc_todctx = todctx;
 #endif
 	sc->sc_irs = th->th_seq;
 	sc->sc_iss = arc4random();
 	sc->sc_flags = 0;
 	sc->sc_flowlabel = 0;
 
 	/*
 	 * Initial receive window: clip sbspace to [0 .. TCP_MAXWIN].
 	 * win was derived from socket earlier in the function.
 	 */
 	win = imax(win, 0);
 	win = imin(win, TCP_MAXWIN);
 	sc->sc_wnd = win;
 
 	if (V_tcp_do_rfc1323) {
 		/*
 		 * A timestamp received in a SYN makes
 		 * it ok to send timestamp requests and replies.
 		 */
 		if (to->to_flags & TOF_TS) {
 			sc->sc_tsreflect = to->to_tsval;
 			sc->sc_ts = tcp_ts_getticks();
 			sc->sc_flags |= SCF_TIMESTAMP;
 		}
 		if (to->to_flags & TOF_SCALE) {
 			int wscale = 0;
 
 			/*
 			 * Pick the smallest possible scaling factor that
 			 * will still allow us to scale up to sb_max, aka
 			 * kern.ipc.maxsockbuf.
 			 *
 			 * We do this because there are broken firewalls that
 			 * will corrupt the window scale option, leading to
 			 * the other endpoint believing that our advertised
 			 * window is unscaled.  At scale factors larger than
 			 * 5 the unscaled window will drop below 1500 bytes,
 			 * leading to serious problems when traversing these
 			 * broken firewalls.
 			 *
 			 * With the default maxsockbuf of 256K, a scale factor
 			 * of 3 will be chosen by this algorithm.  Those who
 			 * choose a larger maxsockbuf should watch out
 			 * for the compatibility problems mentioned above.
 			 *
 			 * RFC1323: The Window field in a SYN (i.e., a <SYN>
 			 * or <SYN,ACK>) segment itself is never scaled.
 			 */
 			while (wscale < TCP_MAX_WINSHIFT &&
 			    (TCP_MAXWIN << wscale) < sb_max)
 				wscale++;
 			sc->sc_requested_r_scale = wscale;
 			sc->sc_requested_s_scale = to->to_wscale;
 			sc->sc_flags |= SCF_WINSCALE;
 		}
 	}
 #if defined(IPSEC_SUPPORT) || defined(TCP_SIGNATURE)
 	/*
 	 * If listening socket requested TCP digests, flag this in the
 	 * syncache so that syncache_respond() will do the right thing
 	 * with the SYN+ACK.
 	 */
 	if (ltflags & TF_SIGNATURE)
 		sc->sc_flags |= SCF_SIGNATURE;
 #endif	/* TCP_SIGNATURE */
 	if (to->to_flags & TOF_SACKPERM)
 		sc->sc_flags |= SCF_SACK;
 	if (to->to_flags & TOF_MSS)
 		sc->sc_peer_mss = to->to_mss;	/* peer mss may be zero */
 	if (ltflags & TF_NOOPT)
 		sc->sc_flags |= SCF_NOOPT;
 	if ((th->th_flags & (TH_ECE|TH_CWR)) && V_tcp_do_ecn)
 		sc->sc_flags |= SCF_ECN;
 
 	if (V_tcp_syncookies)
 		sc->sc_iss = syncookie_generate(sch, sc);
 #ifdef INET6
 	if (autoflowlabel) {
 		if (V_tcp_syncookies)
 			sc->sc_flowlabel = sc->sc_iss;
 		else
 			sc->sc_flowlabel = ip6_randomflowlabel();
 		sc->sc_flowlabel = htonl(sc->sc_flowlabel) & IPV6_FLOWLABEL_MASK;
 	}
 #endif
 	SCH_UNLOCK(sch);
 
 #ifdef TCP_RFC7413
 	if (tfo_cookie_valid) {
 		syncache_tfo_expand(sc, lsop, m, tfo_response_cookie);
 		/* INP_WUNLOCK(inp) will be performed by the caller */
 		rv = 1;
 		goto tfo_expanded;
 	}
 #endif
 
 	/*
 	 * Do a standard 3-way handshake.
 	 */
 	if (syncache_respond(sc, sch, 0, m) == 0) {
 		if (V_tcp_syncookies && V_tcp_syncookiesonly && sc != &scs)
 			syncache_free(sc);
 		else if (sc != &scs)
 			syncache_insert(sc, sch);   /* locks and unlocks sch */
 		TCPSTAT_INC(tcps_sndacks);
 		TCPSTAT_INC(tcps_sndtotal);
 	} else {
 		if (sc != &scs)
 			syncache_free(sc);
 		TCPSTAT_INC(tcps_sc_dropped);
 	}
 
 done:
 	if (m) {
 		*lsop = NULL;
 		m_freem(m);
 	}
 #ifdef TCP_RFC7413
 	/*
 	 * If tfo_pending is not NULL here, then a TFO SYN that did not
 	 * result in a new socket was processed and the associated pending
 	 * counter has not yet been decremented.  All such TFO processing paths
 	 * transit this point.
 	 */
 	if (tfo_pending != NULL)
 		tcp_fastopen_decrement_counter(tfo_pending);
 
 tfo_expanded:
 #endif
 	if (cred != NULL)
 		crfree(cred);
 #ifdef MAC
 	if (sc == &scs)
 		mac_syncache_destroy(&maclabel);
 #endif
 	return (rv);
 }
 
 /*
  * Send SYN|ACK to the peer.  Either in response to the peer's SYN,
  * i.e. m0 != NULL, or upon 3WHS ACK timeout, i.e. m0 == NULL.
  */
 static int
 syncache_respond(struct syncache *sc, struct syncache_head *sch, int locked,
     const struct mbuf *m0)
 {
 	struct ip *ip = NULL;
 	struct mbuf *m;
 	struct tcphdr *th = NULL;
 	int optlen, error = 0;	/* Make compiler happy */
 	u_int16_t hlen, tlen, mssopt;
 	struct tcpopt to;
 #ifdef INET6
 	struct ip6_hdr *ip6 = NULL;
 #endif
 	hlen =
 #ifdef INET6
 	       (sc->sc_inc.inc_flags & INC_ISIPV6) ? sizeof(struct ip6_hdr) :
 #endif
 		sizeof(struct ip);
 	tlen = hlen + sizeof(struct tcphdr);
 
 	/* Determine MSS we advertize to other end of connection. */
 	mssopt = tcp_mssopt(&sc->sc_inc);
 	if (sc->sc_peer_mss)
 		mssopt = max( min(sc->sc_peer_mss, mssopt), V_tcp_minmss);
 
 	/* XXX: Assume that the entire packet will fit in a header mbuf. */
 	KASSERT(max_linkhdr + tlen + TCP_MAXOLEN <= MHLEN,
 	    ("syncache: mbuf too small"));
 
 	/* Create the IP+TCP header from scratch. */
 	m = m_gethdr(M_NOWAIT, MT_DATA);
 	if (m == NULL)
 		return (ENOBUFS);
 #ifdef MAC
 	mac_syncache_create_mbuf(sc->sc_label, m);
 #endif
 	m->m_data += max_linkhdr;
 	m->m_len = tlen;
 	m->m_pkthdr.len = tlen;
 	m->m_pkthdr.rcvif = NULL;
 
 #ifdef INET6
 	if (sc->sc_inc.inc_flags & INC_ISIPV6) {
 		ip6 = mtod(m, struct ip6_hdr *);
 		ip6->ip6_vfc = IPV6_VERSION;
 		ip6->ip6_nxt = IPPROTO_TCP;
 		ip6->ip6_src = sc->sc_inc.inc6_laddr;
 		ip6->ip6_dst = sc->sc_inc.inc6_faddr;
 		ip6->ip6_plen = htons(tlen - hlen);
 		/* ip6_hlim is set after checksum */
 		ip6->ip6_flow &= ~IPV6_FLOWLABEL_MASK;
 		ip6->ip6_flow |= sc->sc_flowlabel;
 
 		th = (struct tcphdr *)(ip6 + 1);
 	}
 #endif
 #if defined(INET6) && defined(INET)
 	else
 #endif
 #ifdef INET
 	{
 		ip = mtod(m, struct ip *);
 		ip->ip_v = IPVERSION;
 		ip->ip_hl = sizeof(struct ip) >> 2;
 		ip->ip_len = htons(tlen);
 		ip->ip_id = 0;
 		ip->ip_off = 0;
 		ip->ip_sum = 0;
 		ip->ip_p = IPPROTO_TCP;
 		ip->ip_src = sc->sc_inc.inc_laddr;
 		ip->ip_dst = sc->sc_inc.inc_faddr;
 		ip->ip_ttl = sc->sc_ip_ttl;
 		ip->ip_tos = sc->sc_ip_tos;
 
 		/*
 		 * See if we should do MTU discovery.  Route lookups are
 		 * expensive, so we will only unset the DF bit if:
 		 *
 		 *	1) path_mtu_discovery is disabled
 		 *	2) the SCF_UNREACH flag has been set
 		 */
 		if (V_path_mtu_discovery && ((sc->sc_flags & SCF_UNREACH) == 0))
 		       ip->ip_off |= htons(IP_DF);
 
 		th = (struct tcphdr *)(ip + 1);
 	}
 #endif /* INET */
 	th->th_sport = sc->sc_inc.inc_lport;
 	th->th_dport = sc->sc_inc.inc_fport;
 
 	th->th_seq = htonl(sc->sc_iss);
 	th->th_ack = htonl(sc->sc_irs + 1);
 	th->th_off = sizeof(struct tcphdr) >> 2;
 	th->th_x2 = 0;
 	th->th_flags = TH_SYN|TH_ACK;
 	th->th_win = htons(sc->sc_wnd);
 	th->th_urp = 0;
 
 	if (sc->sc_flags & SCF_ECN) {
 		th->th_flags |= TH_ECE;
 		TCPSTAT_INC(tcps_ecn_shs);
 	}
 
 	/* Tack on the TCP options. */
 	if ((sc->sc_flags & SCF_NOOPT) == 0) {
 		to.to_flags = 0;
 
 		to.to_mss = mssopt;
 		to.to_flags = TOF_MSS;
 		if (sc->sc_flags & SCF_WINSCALE) {
 			to.to_wscale = sc->sc_requested_r_scale;
 			to.to_flags |= TOF_SCALE;
 		}
 		if (sc->sc_flags & SCF_TIMESTAMP) {
 			/* Virgin timestamp or TCP cookie enhanced one. */
 			to.to_tsval = sc->sc_ts;
 			to.to_tsecr = sc->sc_tsreflect;
 			to.to_flags |= TOF_TS;
 		}
 		if (sc->sc_flags & SCF_SACK)
 			to.to_flags |= TOF_SACKPERM;
 #if defined(IPSEC_SUPPORT) || defined(TCP_SIGNATURE)
 		if (sc->sc_flags & SCF_SIGNATURE)
 			to.to_flags |= TOF_SIGNATURE;
 #endif
 #ifdef TCP_RFC7413
 		if (sc->sc_tfo_cookie) {
 			to.to_flags |= TOF_FASTOPEN;
 			to.to_tfo_len = TCP_FASTOPEN_COOKIE_LEN;
 			to.to_tfo_cookie = sc->sc_tfo_cookie;
 			/* don't send cookie again when retransmitting response */
 			sc->sc_tfo_cookie = NULL;
 		}
 #endif
 		optlen = tcp_addoptions(&to, (u_char *)(th + 1));
 
 		/* Adjust headers by option size. */
 		th->th_off = (sizeof(struct tcphdr) + optlen) >> 2;
 		m->m_len += optlen;
 		m->m_pkthdr.len += optlen;
 #ifdef INET6
 		if (sc->sc_inc.inc_flags & INC_ISIPV6)
 			ip6->ip6_plen = htons(ntohs(ip6->ip6_plen) + optlen);
 		else
 #endif
 			ip->ip_len = htons(ntohs(ip->ip_len) + optlen);
 #if defined(IPSEC_SUPPORT) || defined(TCP_SIGNATURE)
 		if (sc->sc_flags & SCF_SIGNATURE) {
 			KASSERT(to.to_flags & TOF_SIGNATURE,
 			    ("tcp_addoptions() didn't set tcp_signature"));
 
 			/* NOTE: to.to_signature is inside of mbuf */
 			if (!TCPMD5_ENABLED() ||
 			    TCPMD5_OUTPUT(m, th, to.to_signature) != 0) {
 				m_freem(m);
 				return (EACCES);
 			}
 		}
 #endif
 	} else
 		optlen = 0;
 
 	M_SETFIB(m, sc->sc_inc.inc_fibnum);
 	m->m_pkthdr.csum_data = offsetof(struct tcphdr, th_sum);
 	/*
 	 * If we have peer's SYN and it has a flowid, then let's assign it to
 	 * our SYN|ACK.  ip6_output() and ip_output() will not assign flowid
 	 * to SYN|ACK due to lack of inp here.
 	 */
 	if (m0 != NULL && M_HASHTYPE_GET(m0) != M_HASHTYPE_NONE) {
 		m->m_pkthdr.flowid = m0->m_pkthdr.flowid;
 		M_HASHTYPE_SET(m, M_HASHTYPE_GET(m0));
 	}
 #ifdef INET6
 	if (sc->sc_inc.inc_flags & INC_ISIPV6) {
 		m->m_pkthdr.csum_flags = CSUM_TCP_IPV6;
 		th->th_sum = in6_cksum_pseudo(ip6, tlen + optlen - hlen,
 		    IPPROTO_TCP, 0);
 		ip6->ip6_hlim = in6_selecthlim(NULL, NULL);
 #ifdef TCP_OFFLOAD
 		if (ADDED_BY_TOE(sc)) {
 			struct toedev *tod = sc->sc_tod;
 
 			error = tod->tod_syncache_respond(tod, sc->sc_todctx, m);
 
 			return (error);
 		}
 #endif
 		error = ip6_output(m, NULL, NULL, 0, NULL, NULL, NULL);
 	}
 #endif
 #if defined(INET6) && defined(INET)
 	else
 #endif
 #ifdef INET
 	{
 		m->m_pkthdr.csum_flags = CSUM_TCP;
 		th->th_sum = in_pseudo(ip->ip_src.s_addr, ip->ip_dst.s_addr,
 		    htons(tlen + optlen - hlen + IPPROTO_TCP));
 #ifdef TCP_OFFLOAD
 		if (ADDED_BY_TOE(sc)) {
 			struct toedev *tod = sc->sc_tod;
 
 			error = tod->tod_syncache_respond(tod, sc->sc_todctx, m);
 
 			return (error);
 		}
 #endif
 		error = ip_output(m, sc->sc_ipopts, NULL, 0, NULL, NULL);
 	}
 #endif
 	return (error);
 }
 
 /*
  * The purpose of syncookies is to handle spoofed SYN flooding DoS attacks
  * that exceed the capacity of the syncache by avoiding the storage of any
  * of the SYNs we receive.  Syncookies defend against blind SYN flooding
  * attacks where the attacker does not have access to our responses.
  *
  * Syncookies encode and include all necessary information about the
  * connection setup within the SYN|ACK that we send back.  That way we
  * can avoid keeping any local state until the ACK to our SYN|ACK returns
  * (if ever).  Normally the syncache and syncookies are running in parallel
  * with the latter taking over when the former is exhausted.  When matching
  * syncache entry is found the syncookie is ignored.
  *
  * The only reliable information persisting the 3WHS is our initial sequence
  * number ISS of 32 bits.  Syncookies embed a cryptographically sufficient
  * strong hash (MAC) value and a few bits of TCP SYN options in the ISS
  * of our SYN|ACK.  The MAC can be recomputed when the ACK to our SYN|ACK
  * returns and signifies a legitimate connection if it matches the ACK.
  *
  * The available space of 32 bits to store the hash and to encode the SYN
  * option information is very tight and we should have at least 24 bits for
  * the MAC to keep the number of guesses by blind spoofing reasonably high.
  *
  * SYN option information we have to encode to fully restore a connection:
  * MSS: is imporant to chose an optimal segment size to avoid IP level
  *   fragmentation along the path.  The common MSS values can be encoded
  *   in a 3-bit table.  Uncommon values are captured by the next lower value
  *   in the table leading to a slight increase in packetization overhead.
  * WSCALE: is necessary to allow large windows to be used for high delay-
  *   bandwidth product links.  Not scaling the window when it was initially
  *   negotiated is bad for performance as lack of scaling further decreases
  *   the apparent available send window.  We only need to encode the WSCALE
  *   we received from the remote end.  Our end can be recalculated at any
  *   time.  The common WSCALE values can be encoded in a 3-bit table.
  *   Uncommon values are captured by the next lower value in the table
  *   making us under-estimate the available window size halving our
  *   theoretically possible maximum throughput for that connection.
  * SACK: Greatly assists in packet loss recovery and requires 1 bit.
  * TIMESTAMP and SIGNATURE is not encoded because they are permanent options
  *   that are included in all segments on a connection.  We enable them when
  *   the ACK has them.
  *
  * Security of syncookies and attack vectors:
  *
  * The MAC is computed over (faddr||laddr||fport||lport||irs||flags||secmod)
  * together with the gloabl secret to make it unique per connection attempt.
  * Thus any change of any of those parameters results in a different MAC output
  * in an unpredictable way unless a collision is encountered.  24 bits of the
  * MAC are embedded into the ISS.
  *
  * To prevent replay attacks two rotating global secrets are updated with a
  * new random value every 15 seconds.  The life-time of a syncookie is thus
  * 15-30 seconds.
  *
  * Vector 1: Attacking the secret.  This requires finding a weakness in the
  * MAC itself or the way it is used here.  The attacker can do a chosen plain
  * text attack by varying and testing the all parameters under his control.
  * The strength depends on the size and randomness of the secret, and the
  * cryptographic security of the MAC function.  Due to the constant updating
  * of the secret the attacker has at most 29.999 seconds to find the secret
  * and launch spoofed connections.  After that he has to start all over again.
  *
  * Vector 2: Collision attack on the MAC of a single ACK.  With a 24 bit MAC
  * size an average of 4,823 attempts are required for a 50% chance of success
  * to spoof a single syncookie (birthday collision paradox).  However the
  * attacker is blind and doesn't know if one of his attempts succeeded unless
  * he has a side channel to interfere success from.  A single connection setup
  * success average of 90% requires 8,790 packets, 99.99% requires 17,578 packets.
  * This many attempts are required for each one blind spoofed connection.  For
  * every additional spoofed connection he has to launch another N attempts.
  * Thus for a sustained rate 100 spoofed connections per second approximately
  * 1,800,000 packets per second would have to be sent.
  *
  * NB: The MAC function should be fast so that it doesn't become a CPU
  * exhaustion attack vector itself.
  *
  * References:
  *  RFC4987 TCP SYN Flooding Attacks and Common Mitigations
  *  SYN cookies were first proposed by cryptographer Dan J. Bernstein in 1996
  *   http://cr.yp.to/syncookies.html    (overview)
  *   http://cr.yp.to/syncookies/archive (details)
  *
  *
  * Schematic construction of a syncookie enabled Initial Sequence Number:
  *  0        1         2         3
  *  12345678901234567890123456789012
  * |xxxxxxxxxxxxxxxxxxxxxxxxWWWMMMSP|
  *
  *  x 24 MAC (truncated)
  *  W  3 Send Window Scale index
  *  M  3 MSS index
  *  S  1 SACK permitted
  *  P  1 Odd/even secret
  */
 
 /*
  * Distribution and probability of certain MSS values.  Those in between are
  * rounded down to the next lower one.
  * [An Analysis of TCP Maximum Segment Sizes, S. Alcock and R. Nelson, 2011]
  *                            .2%  .3%   5%    7%    7%    20%   15%   45%
  */
 static int tcp_sc_msstab[] = { 216, 536, 1200, 1360, 1400, 1440, 1452, 1460 };
 
 /*
  * Distribution and probability of certain WSCALE values.  We have to map the
  * (send) window scale (shift) option with a range of 0-14 from 4 bits into 3
  * bits based on prevalence of certain values.  Where we don't have an exact
  * match for are rounded down to the next lower one letting us under-estimate
  * the true available window.  At the moment this would happen only for the
  * very uncommon values 3, 5 and those above 8 (more than 16MB socket buffer
  * and window size).  The absence of the WSCALE option (no scaling in either
  * direction) is encoded with index zero.
  * [WSCALE values histograms, Allman, 2012]
  *                            X 10 10 35  5  6 14 10%   by host
  *                            X 11  4  5  5 18 49  3%   by connections
  */
 static int tcp_sc_wstab[] = { 0, 0, 1, 2, 4, 6, 7, 8 };
 
 /*
  * Compute the MAC for the SYN cookie.  SIPHASH-2-4 is chosen for its speed
  * and good cryptographic properties.
  */
 static uint32_t
 syncookie_mac(struct in_conninfo *inc, tcp_seq irs, uint8_t flags,
     uint8_t *secbits, uintptr_t secmod)
 {
 	SIPHASH_CTX ctx;
 	uint32_t siphash[2];
 
 	SipHash24_Init(&ctx);
 	SipHash_SetKey(&ctx, secbits);
 	switch (inc->inc_flags & INC_ISIPV6) {
 #ifdef INET
 	case 0:
 		SipHash_Update(&ctx, &inc->inc_faddr, sizeof(inc->inc_faddr));
 		SipHash_Update(&ctx, &inc->inc_laddr, sizeof(inc->inc_laddr));
 		break;
 #endif
 #ifdef INET6
 	case INC_ISIPV6:
 		SipHash_Update(&ctx, &inc->inc6_faddr, sizeof(inc->inc6_faddr));
 		SipHash_Update(&ctx, &inc->inc6_laddr, sizeof(inc->inc6_laddr));
 		break;
 #endif
 	}
 	SipHash_Update(&ctx, &inc->inc_fport, sizeof(inc->inc_fport));
 	SipHash_Update(&ctx, &inc->inc_lport, sizeof(inc->inc_lport));
 	SipHash_Update(&ctx, &irs, sizeof(irs));
 	SipHash_Update(&ctx, &flags, sizeof(flags));
 	SipHash_Update(&ctx, &secmod, sizeof(secmod));
 	SipHash_Final((u_int8_t *)&siphash, &ctx);
 
 	return (siphash[0] ^ siphash[1]);
 }
 
 static tcp_seq
 syncookie_generate(struct syncache_head *sch, struct syncache *sc)
 {
 	u_int i, mss, secbit, wscale;
 	uint32_t iss, hash;
 	uint8_t *secbits;
 	union syncookie cookie;
 
 	SCH_LOCK_ASSERT(sch);
 
 	cookie.cookie = 0;
 
 	/* Map our computed MSS into the 3-bit index. */
 	mss = min(tcp_mssopt(&sc->sc_inc), max(sc->sc_peer_mss, V_tcp_minmss));
 	for (i = nitems(tcp_sc_msstab) - 1; tcp_sc_msstab[i] > mss && i > 0;
 	     i--)
 		;
 	cookie.flags.mss_idx = i;
 
 	/*
 	 * Map the send window scale into the 3-bit index but only if
 	 * the wscale option was received.
 	 */
 	if (sc->sc_flags & SCF_WINSCALE) {
 		wscale = sc->sc_requested_s_scale;
 		for (i = nitems(tcp_sc_wstab) - 1;
 		    tcp_sc_wstab[i] > wscale && i > 0;
 		     i--)
 			;
 		cookie.flags.wscale_idx = i;
 	}
 
 	/* Can we do SACK? */
 	if (sc->sc_flags & SCF_SACK)
 		cookie.flags.sack_ok = 1;
 
 	/* Which of the two secrets to use. */
 	secbit = sch->sch_sc->secret.oddeven & 0x1;
 	cookie.flags.odd_even = secbit;
 
 	secbits = sch->sch_sc->secret.key[secbit];
 	hash = syncookie_mac(&sc->sc_inc, sc->sc_irs, cookie.cookie, secbits,
 	    (uintptr_t)sch);
 
 	/*
 	 * Put the flags into the hash and XOR them to get better ISS number
 	 * variance.  This doesn't enhance the cryptographic strength and is
 	 * done to prevent the 8 cookie bits from showing up directly on the
 	 * wire.
 	 */
 	iss = hash & ~0xff;
 	iss |= cookie.cookie ^ (hash >> 24);
 
 	/* Randomize the timestamp. */
 	if (sc->sc_flags & SCF_TIMESTAMP) {
 		sc->sc_ts = arc4random();
 		sc->sc_tsoff = sc->sc_ts - tcp_ts_getticks();
 	}
 
 	TCPSTAT_INC(tcps_sc_sendcookie);
 	return (iss);
 }
 
 static struct syncache *
 syncookie_lookup(struct in_conninfo *inc, struct syncache_head *sch, 
     struct syncache *sc, struct tcphdr *th, struct tcpopt *to,
     struct socket *lso)
 {
 	uint32_t hash;
 	uint8_t *secbits;
 	tcp_seq ack, seq;
 	int wnd, wscale = 0;
 	union syncookie cookie;
 
 	SCH_LOCK_ASSERT(sch);
 
 	/*
 	 * Pull information out of SYN-ACK/ACK and revert sequence number
 	 * advances.
 	 */
 	ack = th->th_ack - 1;
 	seq = th->th_seq - 1;
 
 	/*
 	 * Unpack the flags containing enough information to restore the
 	 * connection.
 	 */
 	cookie.cookie = (ack & 0xff) ^ (ack >> 24);
 
 	/* Which of the two secrets to use. */
 	secbits = sch->sch_sc->secret.key[cookie.flags.odd_even];
 
 	hash = syncookie_mac(inc, seq, cookie.cookie, secbits, (uintptr_t)sch);
 
 	/* The recomputed hash matches the ACK if this was a genuine cookie. */
 	if ((ack & ~0xff) != (hash & ~0xff))
 		return (NULL);
 
 	/* Fill in the syncache values. */
 	sc->sc_flags = 0;
 	bcopy(inc, &sc->sc_inc, sizeof(struct in_conninfo));
 	sc->sc_ipopts = NULL;
 	
 	sc->sc_irs = seq;
 	sc->sc_iss = ack;
 
 	switch (inc->inc_flags & INC_ISIPV6) {
 #ifdef INET
 	case 0:
 		sc->sc_ip_ttl = sotoinpcb(lso)->inp_ip_ttl;
 		sc->sc_ip_tos = sotoinpcb(lso)->inp_ip_tos;
 		break;
 #endif
 #ifdef INET6
 	case INC_ISIPV6:
 		if (sotoinpcb(lso)->inp_flags & IN6P_AUTOFLOWLABEL)
 			sc->sc_flowlabel = sc->sc_iss & IPV6_FLOWLABEL_MASK;
 		break;
 #endif
 	}
 
 	sc->sc_peer_mss = tcp_sc_msstab[cookie.flags.mss_idx];
 
 	/* We can simply recompute receive window scale we sent earlier. */
 	while (wscale < TCP_MAX_WINSHIFT && (TCP_MAXWIN << wscale) < sb_max)
 		wscale++;
 
 	/* Only use wscale if it was enabled in the orignal SYN. */
 	if (cookie.flags.wscale_idx > 0) {
 		sc->sc_requested_r_scale = wscale;
 		sc->sc_requested_s_scale = tcp_sc_wstab[cookie.flags.wscale_idx];
 		sc->sc_flags |= SCF_WINSCALE;
 	}
 
-	wnd = sbspace(&lso->so_rcv);
+	wnd = lso->sol_sbrcv_hiwat;
 	wnd = imax(wnd, 0);
 	wnd = imin(wnd, TCP_MAXWIN);
 	sc->sc_wnd = wnd;
 
 	if (cookie.flags.sack_ok)
 		sc->sc_flags |= SCF_SACK;
 
 	if (to->to_flags & TOF_TS) {
 		sc->sc_flags |= SCF_TIMESTAMP;
 		sc->sc_tsreflect = to->to_tsval;
 		sc->sc_ts = to->to_tsecr;
 		sc->sc_tsoff = to->to_tsecr - tcp_ts_getticks();
 	}
 
 	if (to->to_flags & TOF_SIGNATURE)
 		sc->sc_flags |= SCF_SIGNATURE;
 
 	sc->sc_rxmits = 0;
 
 	TCPSTAT_INC(tcps_sc_recvcookie);
 	return (sc);
 }
 
 #ifdef INVARIANTS
 static int
 syncookie_cmp(struct in_conninfo *inc, struct syncache_head *sch,
     struct syncache *sc, struct tcphdr *th, struct tcpopt *to,
     struct socket *lso)
 {
 	struct syncache scs, *scx;
 	char *s;
 
 	bzero(&scs, sizeof(scs));
 	scx = syncookie_lookup(inc, sch, &scs, th, to, lso);
 
 	if ((s = tcp_log_addrs(inc, th, NULL, NULL)) == NULL)
 		return (0);
 
 	if (scx != NULL) {
 		if (sc->sc_peer_mss != scx->sc_peer_mss)
 			log(LOG_DEBUG, "%s; %s: mss different %i vs %i\n",
 			    s, __func__, sc->sc_peer_mss, scx->sc_peer_mss);
 
 		if (sc->sc_requested_r_scale != scx->sc_requested_r_scale)
 			log(LOG_DEBUG, "%s; %s: rwscale different %i vs %i\n",
 			    s, __func__, sc->sc_requested_r_scale,
 			    scx->sc_requested_r_scale);
 
 		if (sc->sc_requested_s_scale != scx->sc_requested_s_scale)
 			log(LOG_DEBUG, "%s; %s: swscale different %i vs %i\n",
 			    s, __func__, sc->sc_requested_s_scale,
 			    scx->sc_requested_s_scale);
 
 		if ((sc->sc_flags & SCF_SACK) != (scx->sc_flags & SCF_SACK))
 			log(LOG_DEBUG, "%s; %s: SACK different\n", s, __func__);
 	}
 
 	if (s != NULL)
 		free(s, M_TCPLOG);
 	return (0);
 }
 #endif /* INVARIANTS */
 
 static void
 syncookie_reseed(void *arg)
 {
 	struct tcp_syncache *sc = arg;
 	uint8_t *secbits;
 	int secbit;
 
 	/*
 	 * Reseeding the secret doesn't have to be protected by a lock.
 	 * It only must be ensured that the new random values are visible
 	 * to all CPUs in a SMP environment.  The atomic with release
 	 * semantics ensures that.
 	 */
 	secbit = (sc->secret.oddeven & 0x1) ? 0 : 1;
 	secbits = sc->secret.key[secbit];
 	arc4rand(secbits, SYNCOOKIE_SECRET_SIZE, 0);
 	atomic_add_rel_int(&sc->secret.oddeven, 1);
 
 	/* Reschedule ourself. */
 	callout_schedule(&sc->secret.reseed, SYNCOOKIE_LIFETIME * hz);
 }
 
 /*
  * Exports the syncache entries to userland so that netstat can display
  * them alongside the other sockets.  This function is intended to be
  * called only from tcp_pcblist.
  *
  * Due to concurrency on an active system, the number of pcbs exported
  * may have no relation to max_pcbs.  max_pcbs merely indicates the
  * amount of space the caller allocated for this function to use.
  */
 int
 syncache_pcblist(struct sysctl_req *req, int max_pcbs, int *pcbs_exported)
 {
 	struct xtcpcb xt;
 	struct syncache *sc;
 	struct syncache_head *sch;
 	int count, error, i;
 
 	for (count = 0, error = 0, i = 0; i < V_tcp_syncache.hashsize; i++) {
 		sch = &V_tcp_syncache.hashbase[i];
 		SCH_LOCK(sch);
 		TAILQ_FOREACH(sc, &sch->sch_bucket, sc_hash) {
 			if (count >= max_pcbs) {
 				SCH_UNLOCK(sch);
 				goto exit;
 			}
 			if (cr_cansee(req->td->td_ucred, sc->sc_cred) != 0)
 				continue;
 			bzero(&xt, sizeof(xt));
 			xt.xt_len = sizeof(xt);
 			if (sc->sc_inc.inc_flags & INC_ISIPV6)
 				xt.xt_inp.inp_vflag = INP_IPV6;
 			else
 				xt.xt_inp.inp_vflag = INP_IPV4;
 			bcopy(&sc->sc_inc, &xt.xt_inp.inp_inc,
 			    sizeof (struct in_conninfo));
 			xt.t_state = TCPS_SYN_RECEIVED;
 			xt.xt_inp.xi_socket.xso_protocol = IPPROTO_TCP;
 			xt.xt_inp.xi_socket.xso_len = sizeof (struct xsocket);
 			xt.xt_inp.xi_socket.so_type = SOCK_STREAM;
 			xt.xt_inp.xi_socket.so_state = SS_ISCONNECTING;
 			error = SYSCTL_OUT(req, &xt, sizeof xt);
 			if (error) {
 				SCH_UNLOCK(sch);
 				goto exit;
 			}
 			count++;
 		}
 		SCH_UNLOCK(sch);
 	}
 exit:
 	*pcbs_exported = count;
 	return error;
 }
Index: head/sys/netinet/tcp_timewait.c
===================================================================
--- head/sys/netinet/tcp_timewait.c	(revision 319721)
+++ head/sys/netinet/tcp_timewait.c	(revision 319722)
@@ -1,741 +1,739 @@
 /*-
  * Copyright (c) 1982, 1986, 1988, 1990, 1993, 1995
  *	The Regents of the University of California.  All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 3. Neither the name of the University nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  *	@(#)tcp_subr.c	8.2 (Berkeley) 5/24/95
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include "opt_inet.h"
 #include "opt_inet6.h"
 #include "opt_tcpdebug.h"
 
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/callout.h>
 #include <sys/kernel.h>
 #include <sys/sysctl.h>
 #include <sys/malloc.h>
 #include <sys/mbuf.h>
 #include <sys/priv.h>
 #include <sys/proc.h>
 #include <sys/socket.h>
 #include <sys/socketvar.h>
 #include <sys/protosw.h>
 #include <sys/random.h>
 
 #include <vm/uma.h>
 
 #include <net/route.h>
 #include <net/if.h>
 #include <net/if_var.h>
 #include <net/vnet.h>
 
 #include <netinet/in.h>
 #include <netinet/in_pcb.h>
 #include <netinet/in_systm.h>
 #include <netinet/in_var.h>
 #include <netinet/ip.h>
 #include <netinet/ip_icmp.h>
 #include <netinet/ip_var.h>
 #ifdef INET6
 #include <netinet/ip6.h>
 #include <netinet6/in6_pcb.h>
 #include <netinet6/ip6_var.h>
 #include <netinet6/scope6_var.h>
 #include <netinet6/nd6.h>
 #endif
 #include <netinet/tcp.h>
 #include <netinet/tcp_fsm.h>
 #include <netinet/tcp_seq.h>
 #include <netinet/tcp_timer.h>
 #include <netinet/tcp_var.h>
 #ifdef INET6
 #include <netinet6/tcp6_var.h>
 #endif
 #include <netinet/tcpip.h>
 #ifdef TCPDEBUG
 #include <netinet/tcp_debug.h>
 #endif
 #ifdef INET6
 #include <netinet6/ip6protosw.h>
 #endif
 
 #include <machine/in_cksum.h>
 
 #include <security/mac/mac_framework.h>
 
 static VNET_DEFINE(uma_zone_t, tcptw_zone);
 #define	V_tcptw_zone		VNET(tcptw_zone)
 static int	maxtcptw;
 
 /*
  * The timed wait queue contains references to each of the TCP sessions
  * currently in the TIME_WAIT state.  The queue pointers, including the
  * queue pointers in each tcptw structure, are protected using the global
  * timewait lock, which must be held over queue iteration and modification.
  *
  * Rules on tcptw usage:
  *  - a inpcb is always freed _after_ its tcptw
  *  - a tcptw relies on its inpcb reference counting for memory stability
  *  - a tcptw is dereferenceable only while its inpcb is locked
  */
 static VNET_DEFINE(TAILQ_HEAD(, tcptw), twq_2msl);
 #define	V_twq_2msl		VNET(twq_2msl)
 
 /* Global timewait lock */
 static VNET_DEFINE(struct rwlock, tw_lock);
 #define	V_tw_lock		VNET(tw_lock)
 
 #define	TW_LOCK_INIT(tw, d)	rw_init_flags(&(tw), (d), 0)
 #define	TW_LOCK_DESTROY(tw)	rw_destroy(&(tw))
 #define	TW_RLOCK(tw)		rw_rlock(&(tw))
 #define	TW_WLOCK(tw)		rw_wlock(&(tw))
 #define	TW_RUNLOCK(tw)		rw_runlock(&(tw))
 #define	TW_WUNLOCK(tw)		rw_wunlock(&(tw))
 #define	TW_LOCK_ASSERT(tw)	rw_assert(&(tw), RA_LOCKED)
 #define	TW_RLOCK_ASSERT(tw)	rw_assert(&(tw), RA_RLOCKED)
 #define	TW_WLOCK_ASSERT(tw)	rw_assert(&(tw), RA_WLOCKED)
 #define	TW_UNLOCK_ASSERT(tw)	rw_assert(&(tw), RA_UNLOCKED)
 
 static void	tcp_tw_2msl_reset(struct tcptw *, int);
 static void	tcp_tw_2msl_stop(struct tcptw *, int);
 static int	tcp_twrespond(struct tcptw *, int);
 
 static int
 tcptw_auto_size(void)
 {
 	int halfrange;
 
 	/*
 	 * Max out at half the ephemeral port range so that TIME_WAIT
 	 * sockets don't tie up too many ephemeral ports.
 	 */
 	if (V_ipport_lastauto > V_ipport_firstauto)
 		halfrange = (V_ipport_lastauto - V_ipport_firstauto) / 2;
 	else
 		halfrange = (V_ipport_firstauto - V_ipport_lastauto) / 2;
 	/* Protect against goofy port ranges smaller than 32. */
 	return (imin(imax(halfrange, 32), maxsockets / 5));
 }
 
 static int
 sysctl_maxtcptw(SYSCTL_HANDLER_ARGS)
 {
 	int error, new;
 
 	if (maxtcptw == 0)
 		new = tcptw_auto_size();
 	else
 		new = maxtcptw;
 	error = sysctl_handle_int(oidp, &new, 0, req);
 	if (error == 0 && req->newptr)
 		if (new >= 32) {
 			maxtcptw = new;
 			uma_zone_set_max(V_tcptw_zone, maxtcptw);
 		}
 	return (error);
 }
 
 SYSCTL_PROC(_net_inet_tcp, OID_AUTO, maxtcptw, CTLTYPE_INT|CTLFLAG_RW,
     &maxtcptw, 0, sysctl_maxtcptw, "IU",
     "Maximum number of compressed TCP TIME_WAIT entries");
 
 VNET_DEFINE(int, nolocaltimewait) = 0;
 #define	V_nolocaltimewait	VNET(nolocaltimewait)
 SYSCTL_INT(_net_inet_tcp, OID_AUTO, nolocaltimewait, CTLFLAG_VNET | CTLFLAG_RW,
     &VNET_NAME(nolocaltimewait), 0,
     "Do not create compressed TCP TIME_WAIT entries for local connections");
 
 void
 tcp_tw_zone_change(void)
 {
 
 	if (maxtcptw == 0)
 		uma_zone_set_max(V_tcptw_zone, tcptw_auto_size());
 }
 
 void
 tcp_tw_init(void)
 {
 
 	V_tcptw_zone = uma_zcreate("tcptw", sizeof(struct tcptw),
 	    NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, 0);
 	TUNABLE_INT_FETCH("net.inet.tcp.maxtcptw", &maxtcptw);
 	if (maxtcptw == 0)
 		uma_zone_set_max(V_tcptw_zone, tcptw_auto_size());
 	else
 		uma_zone_set_max(V_tcptw_zone, maxtcptw);
 	TAILQ_INIT(&V_twq_2msl);
 	TW_LOCK_INIT(V_tw_lock, "tcptw");
 }
 
 #ifdef VIMAGE
 void
 tcp_tw_destroy(void)
 {
 	struct tcptw *tw;
 
 	INP_INFO_RLOCK(&V_tcbinfo);
 	while ((tw = TAILQ_FIRST(&V_twq_2msl)) != NULL)
 		tcp_twclose(tw, 0);
 	INP_INFO_RUNLOCK(&V_tcbinfo);
 
 	TW_LOCK_DESTROY(V_tw_lock);
 	uma_zdestroy(V_tcptw_zone);
 }
 #endif
 
 /*
  * Move a TCP connection into TIME_WAIT state.
  *    tcbinfo is locked.
  *    inp is locked, and is unlocked before returning.
  */
 void
 tcp_twstart(struct tcpcb *tp)
 {
 	struct tcptw *tw;
 	struct inpcb *inp = tp->t_inpcb;
 	int acknow;
 	struct socket *so;
 #ifdef INET6
 	int isipv6 = inp->inp_inc.inc_flags & INC_ISIPV6;
 #endif
 
 	INP_INFO_RLOCK_ASSERT(&V_tcbinfo);
 	INP_WLOCK_ASSERT(inp);
 
 	/* A dropped inp should never transition to TIME_WAIT state. */
 	KASSERT((inp->inp_flags & INP_DROPPED) == 0, ("tcp_twstart: "
 	    "(inp->inp_flags & INP_DROPPED) != 0"));
 
 	if (V_nolocaltimewait) {
 		int error = 0;
 #ifdef INET6
 		if (isipv6)
 			error = in6_localaddr(&inp->in6p_faddr);
 #endif
 #if defined(INET6) && defined(INET)
 		else
 #endif
 #ifdef INET
 			error = in_localip(inp->inp_faddr);
 #endif
 		if (error) {
 			tp = tcp_close(tp);
 			if (tp != NULL)
 				INP_WUNLOCK(inp);
 			return;
 		}
 	}
 
 
 	/*
 	 * For use only by DTrace.  We do not reference the state
 	 * after this point so modifying it in place is not a problem.
 	 */
 	tcp_state_change(tp, TCPS_TIME_WAIT);
 
 	tw = uma_zalloc(V_tcptw_zone, M_NOWAIT);
 	if (tw == NULL) {
 		/*
 		 * Reached limit on total number of TIMEWAIT connections
 		 * allowed. Remove a connection from TIMEWAIT queue in LRU
 		 * fashion to make room for this connection.
 		 *
 		 * XXX:  Check if it possible to always have enough room
 		 * in advance based on guarantees provided by uma_zalloc().
 		 */
 		tw = tcp_tw_2msl_scan(1);
 		if (tw == NULL) {
 			tp = tcp_close(tp);
 			if (tp != NULL)
 				INP_WUNLOCK(inp);
 			return;
 		}
 	}
 	/*
 	 * The tcptw will hold a reference on its inpcb until tcp_twclose
 	 * is called
 	 */
 	tw->tw_inpcb = inp;
 	in_pcbref(inp);	/* Reference from tw */
 
 	/*
 	 * Recover last window size sent.
 	 */
 	if (SEQ_GT(tp->rcv_adv, tp->rcv_nxt))
 		tw->last_win = (tp->rcv_adv - tp->rcv_nxt) >> tp->rcv_scale;
 	else
 		tw->last_win = 0;
 
 	/*
 	 * Set t_recent if timestamps are used on the connection.
 	 */
 	if ((tp->t_flags & (TF_REQ_TSTMP|TF_RCVD_TSTMP|TF_NOOPT)) ==
 	    (TF_REQ_TSTMP|TF_RCVD_TSTMP)) {
 		tw->t_recent = tp->ts_recent;
 		tw->ts_offset = tp->ts_offset;
 	} else {
 		tw->t_recent = 0;
 		tw->ts_offset = 0;
 	}
 
 	tw->snd_nxt = tp->snd_nxt;
 	tw->rcv_nxt = tp->rcv_nxt;
 	tw->iss     = tp->iss;
 	tw->irs     = tp->irs;
 	tw->t_starttime = tp->t_starttime;
 	tw->tw_time = 0;
 
 /* XXX
  * If this code will
  * be used for fin-wait-2 state also, then we may need
  * a ts_recent from the last segment.
  */
 	acknow = tp->t_flags & TF_ACKNOW;
 
 	/*
 	 * First, discard tcpcb state, which includes stopping its timers and
 	 * freeing it.  tcp_discardcb() used to also release the inpcb, but
 	 * that work is now done in the caller.
 	 *
 	 * Note: soisdisconnected() call used to be made in tcp_discardcb(),
 	 * and might not be needed here any longer.
 	 */
 	tcp_discardcb(tp);
 	so = inp->inp_socket;
 	soisdisconnected(so);
 	tw->tw_cred = crhold(so->so_cred);
 	SOCK_LOCK(so);
 	tw->tw_so_options = so->so_options;
 	SOCK_UNLOCK(so);
 	if (acknow)
 		tcp_twrespond(tw, TH_ACK);
 	inp->inp_ppcb = tw;
 	inp->inp_flags |= INP_TIMEWAIT;
 	TCPSTATES_INC(TCPS_TIME_WAIT);
 	tcp_tw_2msl_reset(tw, 0);
 
 	/*
 	 * If the inpcb owns the sole reference to the socket, then we can
 	 * detach and free the socket as it is not needed in time wait.
 	 */
 	if (inp->inp_flags & INP_SOCKREF) {
 		KASSERT(so->so_state & SS_PROTOREF,
 		    ("tcp_twstart: !SS_PROTOREF"));
 		inp->inp_flags &= ~INP_SOCKREF;
 		INP_WUNLOCK(inp);
-		ACCEPT_LOCK();
 		SOCK_LOCK(so);
 		so->so_state &= ~SS_PROTOREF;
 		sofree(so);
 	} else
 		INP_WUNLOCK(inp);
 }
 
 /*
  * Returns 1 if the TIME_WAIT state was killed and we should start over,
  * looking for a pcb in the listen state.  Returns 0 otherwise.
  */
 int
 tcp_twcheck(struct inpcb *inp, struct tcpopt *to __unused, struct tcphdr *th,
     struct mbuf *m, int tlen)
 {
 	struct tcptw *tw;
 	int thflags;
 	tcp_seq seq;
 
 	INP_INFO_RLOCK_ASSERT(&V_tcbinfo);
 	INP_WLOCK_ASSERT(inp);
 
 	/*
 	 * XXXRW: Time wait state for inpcb has been recycled, but inpcb is
 	 * still present.  This is undesirable, but temporarily necessary
 	 * until we work out how to handle inpcb's who's timewait state has
 	 * been removed.
 	 */
 	tw = intotw(inp);
 	if (tw == NULL)
 		goto drop;
 
 	thflags = th->th_flags;
 
 	/*
 	 * NOTE: for FIN_WAIT_2 (to be added later),
 	 * must validate sequence number before accepting RST
 	 */
 
 	/*
 	 * If the segment contains RST:
 	 *	Drop the segment - see Stevens, vol. 2, p. 964 and
 	 *      RFC 1337.
 	 */
 	if (thflags & TH_RST)
 		goto drop;
 
 #if 0
 /* PAWS not needed at the moment */
 	/*
 	 * RFC 1323 PAWS: If we have a timestamp reply on this segment
 	 * and it's less than ts_recent, drop it.
 	 */
 	if ((to.to_flags & TOF_TS) != 0 && tp->ts_recent &&
 	    TSTMP_LT(to.to_tsval, tp->ts_recent)) {
 		if ((thflags & TH_ACK) == 0)
 			goto drop;
 		goto ack;
 	}
 	/*
 	 * ts_recent is never updated because we never accept new segments.
 	 */
 #endif
 
 	/*
 	 * If a new connection request is received
 	 * while in TIME_WAIT, drop the old connection
 	 * and start over if the sequence numbers
 	 * are above the previous ones.
 	 */
 	if ((thflags & TH_SYN) && SEQ_GT(th->th_seq, tw->rcv_nxt)) {
 		tcp_twclose(tw, 0);
 		return (1);
 	}
 
 	/*
 	 * Drop the segment if it does not contain an ACK.
 	 */
 	if ((thflags & TH_ACK) == 0)
 		goto drop;
 
 	/*
 	 * Reset the 2MSL timer if this is a duplicate FIN.
 	 */
 	if (thflags & TH_FIN) {
 		seq = th->th_seq + tlen + (thflags & TH_SYN ? 1 : 0);
 		if (seq + 1 == tw->rcv_nxt)
 			tcp_tw_2msl_reset(tw, 1);
 	}
 
 	/*
 	 * Acknowledge the segment if it has data or is not a duplicate ACK.
 	 */
 	if (thflags != TH_ACK || tlen != 0 ||
 	    th->th_seq != tw->rcv_nxt || th->th_ack != tw->snd_nxt)
 		tcp_twrespond(tw, TH_ACK);
 drop:
 	INP_WUNLOCK(inp);
 	m_freem(m);
 	return (0);
 }
 
 void
 tcp_twclose(struct tcptw *tw, int reuse)
 {
 	struct socket *so;
 	struct inpcb *inp;
 
 	/*
 	 * At this point, we are in one of two situations:
 	 *
 	 * (1) We have no socket, just an inpcb<->twtcp pair.  We can free
 	 *     all state.
 	 *
 	 * (2) We have a socket -- if we own a reference, release it and
 	 *     notify the socket layer.
 	 */
 	inp = tw->tw_inpcb;
 	KASSERT((inp->inp_flags & INP_TIMEWAIT), ("tcp_twclose: !timewait"));
 	KASSERT(intotw(inp) == tw, ("tcp_twclose: inp_ppcb != tw"));
 	INP_INFO_RLOCK_ASSERT(&V_tcbinfo);	/* in_pcbfree() */
 	INP_WLOCK_ASSERT(inp);
 
 	tcp_tw_2msl_stop(tw, reuse);
 	inp->inp_ppcb = NULL;
 	in_pcbdrop(inp);
 
 	so = inp->inp_socket;
 	if (so != NULL) {
 		/*
 		 * If there's a socket, handle two cases: first, we own a
 		 * strong reference, which we will now release, or we don't
 		 * in which case another reference exists (XXXRW: think
 		 * about this more), and we don't need to take action.
 		 */
 		if (inp->inp_flags & INP_SOCKREF) {
 			inp->inp_flags &= ~INP_SOCKREF;
 			INP_WUNLOCK(inp);
-			ACCEPT_LOCK();
 			SOCK_LOCK(so);
 			KASSERT(so->so_state & SS_PROTOREF,
 			    ("tcp_twclose: INP_SOCKREF && !SS_PROTOREF"));
 			so->so_state &= ~SS_PROTOREF;
 			sofree(so);
 		} else {
 			/*
 			 * If we don't own the only reference, the socket and
 			 * inpcb need to be left around to be handled by
 			 * tcp_usr_detach() later.
 			 */
 			INP_WUNLOCK(inp);
 		}
 	} else {
 		/*
 		 * The socket has been already cleaned-up for us, only free the
 		 * inpcb.
 		 */
 		in_pcbfree(inp);
 	}
 	TCPSTAT_INC(tcps_closed);
 }
 
 static int
 tcp_twrespond(struct tcptw *tw, int flags)
 {
 	struct inpcb *inp = tw->tw_inpcb;
 #if defined(INET6) || defined(INET)
 	struct tcphdr *th = NULL;
 #endif
 	struct mbuf *m;
 #ifdef INET
 	struct ip *ip = NULL;
 #endif
 	u_int hdrlen, optlen;
 	int error = 0;			/* Keep compiler happy */
 	struct tcpopt to;
 #ifdef INET6
 	struct ip6_hdr *ip6 = NULL;
 	int isipv6 = inp->inp_inc.inc_flags & INC_ISIPV6;
 #endif
 	hdrlen = 0;                     /* Keep compiler happy */
 
 	INP_WLOCK_ASSERT(inp);
 
 	m = m_gethdr(M_NOWAIT, MT_DATA);
 	if (m == NULL)
 		return (ENOBUFS);
 	m->m_data += max_linkhdr;
 
 #ifdef MAC
 	mac_inpcb_create_mbuf(inp, m);
 #endif
 
 #ifdef INET6
 	if (isipv6) {
 		hdrlen = sizeof(struct ip6_hdr) + sizeof(struct tcphdr);
 		ip6 = mtod(m, struct ip6_hdr *);
 		th = (struct tcphdr *)(ip6 + 1);
 		tcpip_fillheaders(inp, ip6, th);
 	}
 #endif
 #if defined(INET6) && defined(INET)
 	else
 #endif
 #ifdef INET
 	{
 		hdrlen = sizeof(struct tcpiphdr);
 		ip = mtod(m, struct ip *);
 		th = (struct tcphdr *)(ip + 1);
 		tcpip_fillheaders(inp, ip, th);
 	}
 #endif
 	to.to_flags = 0;
 
 	/*
 	 * Send a timestamp and echo-reply if both our side and our peer
 	 * have sent timestamps in our SYN's and this is not a RST.
 	 */
 	if (tw->t_recent && flags == TH_ACK) {
 		to.to_flags |= TOF_TS;
 		to.to_tsval = tcp_ts_getticks() + tw->ts_offset;
 		to.to_tsecr = tw->t_recent;
 	}
 	optlen = tcp_addoptions(&to, (u_char *)(th + 1));
 
 	m->m_len = hdrlen + optlen;
 	m->m_pkthdr.len = m->m_len;
 
 	KASSERT(max_linkhdr + m->m_len <= MHLEN, ("tcptw: mbuf too small"));
 
 	th->th_seq = htonl(tw->snd_nxt);
 	th->th_ack = htonl(tw->rcv_nxt);
 	th->th_off = (sizeof(struct tcphdr) + optlen) >> 2;
 	th->th_flags = flags;
 	th->th_win = htons(tw->last_win);
 
 	m->m_pkthdr.csum_data = offsetof(struct tcphdr, th_sum);
 #ifdef INET6
 	if (isipv6) {
 		m->m_pkthdr.csum_flags = CSUM_TCP_IPV6;
 		th->th_sum = in6_cksum_pseudo(ip6,
 		    sizeof(struct tcphdr) + optlen, IPPROTO_TCP, 0);
 		ip6->ip6_hlim = in6_selecthlim(inp, NULL);
 		error = ip6_output(m, inp->in6p_outputopts, NULL,
 		    (tw->tw_so_options & SO_DONTROUTE), NULL, NULL, inp);
 	}
 #endif
 #if defined(INET6) && defined(INET)
 	else
 #endif
 #ifdef INET
 	{
 		m->m_pkthdr.csum_flags = CSUM_TCP;
 		th->th_sum = in_pseudo(ip->ip_src.s_addr, ip->ip_dst.s_addr,
 		    htons(sizeof(struct tcphdr) + optlen + IPPROTO_TCP));
 		ip->ip_len = htons(m->m_pkthdr.len);
 		if (V_path_mtu_discovery)
 			ip->ip_off |= htons(IP_DF);
 		error = ip_output(m, inp->inp_options, NULL,
 		    ((tw->tw_so_options & SO_DONTROUTE) ? IP_ROUTETOIF : 0),
 		    NULL, inp);
 	}
 #endif
 	if (flags & TH_ACK)
 		TCPSTAT_INC(tcps_sndacks);
 	else
 		TCPSTAT_INC(tcps_sndctrl);
 	TCPSTAT_INC(tcps_sndtotal);
 	return (error);
 }
 
 static void
 tcp_tw_2msl_reset(struct tcptw *tw, int rearm)
 {
 
 	INP_INFO_RLOCK_ASSERT(&V_tcbinfo);
 	INP_WLOCK_ASSERT(tw->tw_inpcb);
 
 	TW_WLOCK(V_tw_lock);
 	if (rearm)
 		TAILQ_REMOVE(&V_twq_2msl, tw, tw_2msl);
 	tw->tw_time = ticks + 2 * tcp_msl;
 	TAILQ_INSERT_TAIL(&V_twq_2msl, tw, tw_2msl);
 	TW_WUNLOCK(V_tw_lock);
 }
 
 static void
 tcp_tw_2msl_stop(struct tcptw *tw, int reuse)
 {
 	struct ucred *cred;
 	struct inpcb *inp;
 	int released;
 
 	INP_INFO_RLOCK_ASSERT(&V_tcbinfo);
 
 	TW_WLOCK(V_tw_lock);
 	inp = tw->tw_inpcb;
 	tw->tw_inpcb = NULL;
 
 	TAILQ_REMOVE(&V_twq_2msl, tw, tw_2msl);
 	cred = tw->tw_cred;
 	tw->tw_cred = NULL;
 	TW_WUNLOCK(V_tw_lock);
 
 	if (cred != NULL)
 		crfree(cred);
 
 	released = in_pcbrele_wlocked(inp);
 	KASSERT(!released, ("%s: inp should not be released here", __func__));
 
 	if (!reuse)
 		uma_zfree(V_tcptw_zone, tw);
 	TCPSTATES_DEC(TCPS_TIME_WAIT);
 }
 
 struct tcptw *
 tcp_tw_2msl_scan(int reuse)
 {
 	struct tcptw *tw;
 	struct inpcb *inp;
 
 #ifdef INVARIANTS
 	if (reuse) {
 		/*
 		 * Exclusive pcbinfo lock is not required in reuse case even if
 		 * two inpcb locks can be acquired simultaneously:
 		 *  - the inpcb transitioning to TIME_WAIT state in
 		 *    tcp_tw_start(),
 		 *  - the inpcb closed by tcp_twclose().
 		 *
 		 * It is because only inpcbs in FIN_WAIT2 or CLOSING states can
 		 * transition in TIME_WAIT state.  Then a pcbcb cannot be in
 		 * TIME_WAIT list and transitioning to TIME_WAIT state at same
 		 * time.
 		 */
 		INP_INFO_RLOCK_ASSERT(&V_tcbinfo);
 	}
 #endif
 
 	for (;;) {
 		TW_RLOCK(V_tw_lock);
 		tw = TAILQ_FIRST(&V_twq_2msl);
 		if (tw == NULL || (!reuse && (tw->tw_time - ticks) > 0)) {
 			TW_RUNLOCK(V_tw_lock);
 			break;
 		}
 		KASSERT(tw->tw_inpcb != NULL, ("%s: tw->tw_inpcb == NULL",
 		    __func__));
 
 		inp = tw->tw_inpcb;
 		in_pcbref(inp);
 		TW_RUNLOCK(V_tw_lock);
 
 		if (INP_INFO_TRY_RLOCK(&V_tcbinfo)) {
 
 			INP_WLOCK(inp);
 			tw = intotw(inp);
 			if (in_pcbrele_wlocked(inp)) {
 				KASSERT(tw == NULL, ("%s: held last inp "
 				    "reference but tw not NULL", __func__));
 				INP_INFO_RUNLOCK(&V_tcbinfo);
 				continue;
 			}
 
 			if (tw == NULL) {
 				/* tcp_twclose() has already been called */
 				INP_WUNLOCK(inp);
 				INP_INFO_RUNLOCK(&V_tcbinfo);
 				continue;
 			}
 
 			tcp_twclose(tw, reuse);
 			INP_INFO_RUNLOCK(&V_tcbinfo);
 			if (reuse)
 			    return tw;
 		} else {
 			/* INP_INFO lock is busy, continue later. */
 			INP_WLOCK(inp);
 			if (!in_pcbrele_wlocked(inp))
 				INP_WUNLOCK(inp);
 			break;
 		}
 	}
 
 	return NULL;
 }
Index: head/sys/ofed/drivers/infiniband/core/iwcm.c
===================================================================
--- head/sys/ofed/drivers/infiniband/core/iwcm.c	(revision 319721)
+++ head/sys/ofed/drivers/infiniband/core/iwcm.c	(revision 319722)
@@ -1,1316 +1,1285 @@
 /*
  * Copyright (c) 2004, 2005 Intel Corporation.  All rights reserved.
  * Copyright (c) 2004 Topspin Corporation.  All rights reserved.
  * Copyright (c) 2004, 2005 Voltaire Corporation.  All rights reserved.
  * Copyright (c) 2005 Sun Microsystems, Inc. All rights reserved.
  * Copyright (c) 2005 Open Grid Computing, Inc. All rights reserved.
  * Copyright (c) 2005 Network Appliance, Inc. All rights reserved.
  * Copyright (c) 2016 Chelsio Communications.  All rights reserved.
  *
  * This software is available to you under a choice of one of two
  * licenses.  You may choose to be licensed under the terms of the GNU
  * General Public License (GPL) Version 2, available from the file
  * COPYING in the main directory of this source tree, or the
  * OpenIB.org BSD license below:
  *
  *     Redistribution and use in source and binary forms, with or
  *     without modification, are permitted provided that the following
  *     conditions are met:
  *
  *      - Redistributions of source code must retain the above
  *        copyright notice, this list of conditions and the following
  *        disclaimer.
  *
  *      - Redistributions in binary form must reproduce the above
  *        copyright notice, this list of conditions and the following
  *        disclaimer in the documentation and/or other materials
  *        provided with the distribution.
  *
  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
  * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
  * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
  * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
  * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
  * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  * SOFTWARE.
  *
  */
 #include "opt_inet.h"
 
 #include <linux/dma-mapping.h>
 #include <linux/err.h>
 #include <linux/idr.h>
 #include <linux/interrupt.h>
 #include <linux/rbtree.h>
 #include <linux/sched.h>
 #include <linux/spinlock.h>
 #include <linux/workqueue.h>
 #include <linux/completion.h>
 #include <linux/slab.h>
 #include <linux/module.h>
 #include <linux/string.h>
 #include <netinet/tcp.h>
 #include <sys/mutex.h>
 
 #include <rdma/rdma_cm.h>
 #include <rdma/iw_cm.h>
 #include <rdma/ib_addr.h>
 
 #include "iwcm.h"
 
 MODULE_AUTHOR("Tom Tucker");
 MODULE_DESCRIPTION("iWARP CM");
 MODULE_LICENSE("Dual BSD/GPL");
 
 static struct workqueue_struct *iwcm_wq;
 struct iwcm_work {
 	struct work_struct work;
 	struct iwcm_id_private *cm_id;
 	struct list_head list;
 	struct iw_cm_event event;
 	struct list_head free_list;
 };
 struct iwcm_listen_work {
 	struct work_struct work;
 	struct iw_cm_id *cm_id;
 };
 
 static LIST_HEAD(listen_port_list);
 
 static DEFINE_MUTEX(listen_port_mutex);
 
 struct listen_port_info {
 	struct list_head list;
 	uint16_t port_num;
 	uint32_t refcnt;
 };
 
 static int32_t
 add_port_to_listenlist(uint16_t port)
 {
 	struct listen_port_info *port_info;
 	int err = 0;
 
 	mutex_lock(&listen_port_mutex);
 
 	list_for_each_entry(port_info, &listen_port_list, list)
 		if (port_info->port_num == port)
 			goto found_port;
 
 	port_info = kmalloc(sizeof(*port_info), GFP_KERNEL);
 	if (!port_info) {
 		err = -ENOMEM;
 		mutex_unlock(&listen_port_mutex);
 		goto out;
 	}
 
 	port_info->port_num = port;
 	port_info->refcnt    = 0;
 
 	list_add(&port_info->list, &listen_port_list);
 
 found_port:
 	++(port_info->refcnt);
 	mutex_unlock(&listen_port_mutex);
 	return port_info->refcnt;
 out:
 	return err;
 }
 
 static int32_t
 rem_port_from_listenlist(uint16_t port)
 {
 	struct listen_port_info *port_info;
 	int ret, found_port = 0;
 
 	mutex_lock(&listen_port_mutex);
 
 	list_for_each_entry(port_info, &listen_port_list, list)
 		if (port_info->port_num == port) {
 			found_port = 1;
 			break;
 		}
 
 	if (found_port) {
 		--(port_info->refcnt);
 		ret = port_info->refcnt;
 		if (port_info->refcnt == 0) {
 			/* Remove this entry from the list as there are no
 			 * more listeners for this port_num.
 			 */
 			list_del(&port_info->list);
 			kfree(port_info);
 		}
 	} else {
 		ret = -EINVAL;
 	}
 	mutex_unlock(&listen_port_mutex);
 	return ret;
 
 }
 
 /*
  * The following services provide a mechanism for pre-allocating iwcm_work
  * elements.  The design pre-allocates them  based on the cm_id type:
  *	LISTENING IDS: 	Get enough elements preallocated to handle the
  *			listen backlog.
  *	ACTIVE IDS:	4: CONNECT_REPLY, ESTABLISHED, DISCONNECT, CLOSE
  *	PASSIVE IDS:	3: ESTABLISHED, DISCONNECT, CLOSE
  *
  * Allocating them in connect and listen avoids having to deal
  * with allocation failures on the event upcall from the provider (which
  * is called in the interrupt context).
  *
  * One exception is when creating the cm_id for incoming connection requests.
  * There are two cases:
  * 1) in the event upcall, cm_event_handler(), for a listening cm_id.  If
  *    the backlog is exceeded, then no more connection request events will
  *    be processed.  cm_event_handler() returns -ENOMEM in this case.  Its up
  *    to the provider to reject the connection request.
  * 2) in the connection request workqueue handler, cm_conn_req_handler().
  *    If work elements cannot be allocated for the new connect request cm_id,
  *    then IWCM will call the provider reject method.  This is ok since
  *    cm_conn_req_handler() runs in the workqueue thread context.
  */
 
 static struct iwcm_work *get_work(struct iwcm_id_private *cm_id_priv)
 {
 	struct iwcm_work *work;
 
 	if (list_empty(&cm_id_priv->work_free_list))
 		return NULL;
 	work = list_entry(cm_id_priv->work_free_list.next, struct iwcm_work,
 			  free_list);
 	list_del_init(&work->free_list);
 	return work;
 }
 
 static void put_work(struct iwcm_work *work)
 {
 	list_add(&work->free_list, &work->cm_id->work_free_list);
 }
 
 static void dealloc_work_entries(struct iwcm_id_private *cm_id_priv)
 {
 	struct list_head *e, *tmp;
 
 	list_for_each_safe(e, tmp, &cm_id_priv->work_free_list)
 		kfree(list_entry(e, struct iwcm_work, free_list));
 }
 
 static int alloc_work_entries(struct iwcm_id_private *cm_id_priv, int count)
 {
 	struct iwcm_work *work;
 
 	BUG_ON(!list_empty(&cm_id_priv->work_free_list));
 	while (count--) {
 		work = kmalloc(sizeof(struct iwcm_work), GFP_KERNEL);
 		if (!work) {
 			dealloc_work_entries(cm_id_priv);
 			return -ENOMEM;
 		}
 		work->cm_id = cm_id_priv;
 		INIT_LIST_HEAD(&work->list);
 		put_work(work);
 	}
 	return 0;
 }
 
 /*
  * Save private data from incoming connection requests to
  * iw_cm_event, so the low level driver doesn't have to. Adjust
  * the event ptr to point to the local copy.
  */
 static int copy_private_data(struct iw_cm_event *event)
 {
 	void *p;
 
 	p = kmemdup(event->private_data, event->private_data_len, GFP_ATOMIC);
 	if (!p)
 		return -ENOMEM;
 	event->private_data = p;
 	return 0;
 }
 
 static void free_cm_id(struct iwcm_id_private *cm_id_priv)
 {
 	dealloc_work_entries(cm_id_priv);
 	kfree(cm_id_priv);
 }
 
 /*
  * Release a reference on cm_id. If the last reference is being
  * released, enable the waiting thread (in iw_destroy_cm_id) to
  * get woken up, and return 1 if a thread is already waiting.
  */
 static int iwcm_deref_id(struct iwcm_id_private *cm_id_priv)
 {
 	BUG_ON(atomic_read(&cm_id_priv->refcount)==0);
 	if (atomic_dec_and_test(&cm_id_priv->refcount)) {
 		BUG_ON(!list_empty(&cm_id_priv->work_list));
 		complete(&cm_id_priv->destroy_comp);
 		return 1;
 	}
 
 	return 0;
 }
 
 static void add_ref(struct iw_cm_id *cm_id)
 {
 	struct iwcm_id_private *cm_id_priv;
 	cm_id_priv = container_of(cm_id, struct iwcm_id_private, id);
 	atomic_inc(&cm_id_priv->refcount);
 }
 
 static void rem_ref(struct iw_cm_id *cm_id)
 {
 	struct iwcm_id_private *cm_id_priv;
 	int cb_destroy;
 
 	cm_id_priv = container_of(cm_id, struct iwcm_id_private, id);
 
 	/*
 	 * Test bit before deref in case the cm_id gets freed on another
 	 * thread.
 	 */
 	cb_destroy = test_bit(IWCM_F_CALLBACK_DESTROY, &cm_id_priv->flags);
 	if (iwcm_deref_id(cm_id_priv) && cb_destroy) {
 		BUG_ON(!list_empty(&cm_id_priv->work_list));
 		free_cm_id(cm_id_priv);
 	}
 }
 
 static int cm_event_handler(struct iw_cm_id *cm_id, struct iw_cm_event *event);
 
 struct iw_cm_id *iw_create_cm_id(struct ib_device *device,
 				 struct socket *so,
 				 iw_cm_handler cm_handler,
 				 void *context)
 {
 	struct iwcm_id_private *cm_id_priv;
 
 	cm_id_priv = kzalloc(sizeof(*cm_id_priv), GFP_KERNEL);
 	if (!cm_id_priv)
 		return ERR_PTR(-ENOMEM);
 
 	cm_id_priv->state = IW_CM_STATE_IDLE;
 	cm_id_priv->id.device = device;
 	cm_id_priv->id.cm_handler = cm_handler;
 	cm_id_priv->id.context = context;
 	cm_id_priv->id.event_handler = cm_event_handler;
 	cm_id_priv->id.add_ref = add_ref;
 	cm_id_priv->id.rem_ref = rem_ref;
 	cm_id_priv->id.so = so;
 	spin_lock_init(&cm_id_priv->lock);
 	atomic_set(&cm_id_priv->refcount, 1);
 	init_waitqueue_head(&cm_id_priv->connect_wait);
 	init_completion(&cm_id_priv->destroy_comp);
 	INIT_LIST_HEAD(&cm_id_priv->work_list);
 	INIT_LIST_HEAD(&cm_id_priv->work_free_list);
 
 	return &cm_id_priv->id;
 }
 EXPORT_SYMBOL(iw_create_cm_id);
 
 
 static int iwcm_modify_qp_err(struct ib_qp *qp)
 {
 	struct ib_qp_attr qp_attr;
 
 	if (!qp)
 		return -EINVAL;
 
 	qp_attr.qp_state = IB_QPS_ERR;
 	return ib_modify_qp(qp, &qp_attr, IB_QP_STATE);
 }
 
 /*
  * This is really the RDMAC CLOSING state. It is most similar to the
  * IB SQD QP state.
  */
 static int iwcm_modify_qp_sqd(struct ib_qp *qp)
 {
 	struct ib_qp_attr qp_attr;
 
 	BUG_ON(qp == NULL);
 	qp_attr.qp_state = IB_QPS_SQD;
 	return ib_modify_qp(qp, &qp_attr, IB_QP_STATE);
 }
 
 /*
  * CM_ID <-- CLOSING
  *
  * Block if a passive or active connection is currently being processed. Then
  * process the event as follows:
  * - If we are ESTABLISHED, move to CLOSING and modify the QP state
  *   based on the abrupt flag
  * - If the connection is already in the CLOSING or IDLE state, the peer is
  *   disconnecting concurrently with us and we've already seen the
  *   DISCONNECT event -- ignore the request and return 0
  * - Disconnect on a listening endpoint returns -EINVAL
  */
 int iw_cm_disconnect(struct iw_cm_id *cm_id, int abrupt)
 {
 	struct iwcm_id_private *cm_id_priv;
 	unsigned long flags;
 	int ret = 0;
 	struct ib_qp *qp = NULL;
 
 	cm_id_priv = container_of(cm_id, struct iwcm_id_private, id);
 	/* Wait if we're currently in a connect or accept downcall */
 	wait_event(cm_id_priv->connect_wait,
 		   !test_bit(IWCM_F_CONNECT_WAIT, &cm_id_priv->flags));
 
 	spin_lock_irqsave(&cm_id_priv->lock, flags);
 	switch (cm_id_priv->state) {
 	case IW_CM_STATE_ESTABLISHED:
 		cm_id_priv->state = IW_CM_STATE_CLOSING;
 
 		/* QP could be <nul> for user-mode client */
 		if (cm_id_priv->qp)
 			qp = cm_id_priv->qp;
 		else
 			ret = -EINVAL;
 		break;
 	case IW_CM_STATE_LISTEN:
 		ret = -EINVAL;
 		break;
 	case IW_CM_STATE_CLOSING:
 		/* remote peer closed first */
 	case IW_CM_STATE_IDLE:
 		/* accept or connect returned !0 */
 		break;
 	case IW_CM_STATE_CONN_RECV:
 		/*
 		 * App called disconnect before/without calling accept after
 		 * connect_request event delivered.
 		 */
 		break;
 	case IW_CM_STATE_CONN_SENT:
 		/* Can only get here if wait above fails */
 	default:
 		BUG();
 	}
 	spin_unlock_irqrestore(&cm_id_priv->lock, flags);
 
 	if (qp) {
 		if (abrupt)
 			ret = iwcm_modify_qp_err(qp);
 		else
 			ret = iwcm_modify_qp_sqd(qp);
 
 		/*
 		 * If both sides are disconnecting the QP could
 		 * already be in ERR or SQD states
 		 */
 		ret = 0;
 	}
 
 	return ret;
 }
 EXPORT_SYMBOL(iw_cm_disconnect);
 
 static struct socket *
 dequeue_socket(struct socket *head)
 {
 	struct socket *so;
 	struct sockaddr_in *remote;
+	int error;
 
-	ACCEPT_LOCK();
-	so = TAILQ_FIRST(&head->so_comp);
-	if (!so) {
-		ACCEPT_UNLOCK();
-		return NULL;
-	}
-
-	SOCK_LOCK(so);
-	/*
-	 * Before changing the flags on the socket, we have to bump the
-	 * reference count.  Otherwise, if the protocol calls sofree(),
-	 * the socket will be released due to a zero refcount.
-	 */
-	soref(so);
-	TAILQ_REMOVE(&head->so_comp, so, so_list);
-	head->so_qlen--;
-	so->so_qstate &= ~SQ_COMP;
-	so->so_head = NULL;
-	so->so_state |= SS_NBIO;
-	SOCK_UNLOCK(so);
-	ACCEPT_UNLOCK();
+	SOLISTEN_LOCK(head);
+	error = solisten_dequeue(head, &so, SOCK_NONBLOCK);
+	if (error == EWOULDBLOCK)
+		return (NULL);
 	remote = NULL;
 	soaccept(so, (struct sockaddr **)&remote);
 
 	free(remote, M_SONAME);
 	return so;
 }
+
 static void
 iw_so_event_handler(struct work_struct *_work)
 {
 #ifdef INET
 	struct	iwcm_listen_work *work = container_of(_work,
 						struct iwcm_listen_work, work);
 	struct	iw_cm_id *listen_cm_id = work->cm_id;
 	struct	iwcm_id_private *cm_id_priv;
 	struct	iw_cm_id *real_cm_id;
 	struct	sockaddr_in *local;
 	struct	socket *so;
 
 	cm_id_priv = container_of(listen_cm_id, struct iwcm_id_private, id);
 
 	if (cm_id_priv->state != IW_CM_STATE_LISTEN) {
 		kfree(work);
 		return;
 	}
 
 	/* Dequeue & process  all new 'so' connection requests for this cmid */
 	while ((so = dequeue_socket(work->cm_id->so)) != NULL) {
 		if (rdma_cma_any_addr((struct sockaddr *)
 					&listen_cm_id->local_addr)) {
 			in_getsockaddr(so, (struct sockaddr **)&local);
 			if (rdma_find_cmid_laddr(local, ARPHRD_ETHER,
 					(void **) &real_cm_id)) {
 				free(local, M_SONAME);
 				goto err;
 			}
 			free(local, M_SONAME);
 
 			real_cm_id->device->iwcm->newconn(real_cm_id, so);
 		} else {
 			listen_cm_id->device->iwcm->newconn(listen_cm_id, so);
 		}
 	}
 err:
 	kfree(work);
 #endif
 	return;
 }
+
 static int
 iw_so_upcall(struct socket *parent_so, void *arg, int waitflag)
 {
 	struct iwcm_listen_work *work;
-	struct socket *so;
 	struct iw_cm_id *cm_id = arg;
 
 	/* check whether iw_so_event_handler() already dequeued this 'so' */
-	so = TAILQ_FIRST(&parent_so->so_comp);
-	if (!so)
+	if (TAILQ_EMPTY(&parent_so->sol_comp))
 		return SU_OK;
-	work = kzalloc(sizeof(*work), M_NOWAIT);
+	work = kzalloc(sizeof(*work), waitflag);
 	if (!work)
 		return -ENOMEM;
 	work->cm_id = cm_id;
 
 	INIT_WORK(&work->work, iw_so_event_handler);
 	queue_work(iwcm_wq, &work->work);
 
 	return SU_OK;
 }
 
-static void
-iw_init_sock(struct iw_cm_id *cm_id)
+static int
+iw_create_listen(struct iw_cm_id *cm_id, int backlog)
 {
 	struct sockopt sopt;
 	struct socket *so = cm_id->so;
 	int on = 1;
+	int rc;
 
-	SOCK_LOCK(so);
-	soupcall_set(so, SO_RCV, iw_so_upcall, cm_id);
+	rc = -solisten(cm_id->so, backlog, curthread);
+	if (rc != 0)
+		return (rc);
+	SOLISTEN_LOCK(so);
+	solisten_upcall_set(so, iw_so_upcall, cm_id);
 	so->so_state |= SS_NBIO;
-	SOCK_UNLOCK(so);
+	SOLISTEN_UNLOCK(so);
 	sopt.sopt_dir = SOPT_SET;
 	sopt.sopt_level = IPPROTO_TCP;
 	sopt.sopt_name = TCP_NODELAY;
 	sopt.sopt_val = (caddr_t)&on;
 	sopt.sopt_valsize = sizeof(on);
 	sopt.sopt_td = NULL;
 	sosetopt(so, &sopt);
-}
-
-static int
-iw_uninit_socket(struct iw_cm_id *cm_id)
-{
-	struct socket *so = cm_id->so;
-
-	SOCK_LOCK(so);
-	soupcall_clear(so, SO_RCV);
-	SOCK_UNLOCK(so);
-
 	return (0);
 }
 
 static int
-iw_create_listen(struct iw_cm_id *cm_id, int backlog)
-{
-	int rc;
-
-	iw_init_sock(cm_id);
-	rc = -solisten(cm_id->so, backlog, curthread);
-	if (rc != 0)
-		iw_uninit_socket(cm_id);
-	return (rc);
-}
-
-static int
 iw_destroy_listen(struct iw_cm_id *cm_id)
 {
+	struct socket *so = cm_id->so;
 
-	return (iw_uninit_socket(cm_id));
+	SOLISTEN_LOCK(so);
+	solisten_upcall_set(so, NULL, NULL);
+	SOLISTEN_UNLOCK(so);
+	return (0);
 }
 
 
 /*
  * CM_ID <-- DESTROYING
  *
  * Clean up all resources associated with the connection and release
  * the initial reference taken by iw_create_cm_id.
  */
 static void destroy_cm_id(struct iw_cm_id *cm_id)
 {
 	struct iwcm_id_private *cm_id_priv;
 	unsigned long flags;
 	int ret = 0, refcnt;
 
 	cm_id_priv = container_of(cm_id, struct iwcm_id_private, id);
 	/*
 	 * Wait if we're currently in a connect or accept downcall. A
 	 * listening endpoint should never block here.
 	 */
 	wait_event(cm_id_priv->connect_wait,
 		   !test_bit(IWCM_F_CONNECT_WAIT, &cm_id_priv->flags));
 
 	spin_lock_irqsave(&cm_id_priv->lock, flags);
 	switch (cm_id_priv->state) {
 	case IW_CM_STATE_LISTEN:
 		cm_id_priv->state = IW_CM_STATE_DESTROYING;
 		spin_unlock_irqrestore(&cm_id_priv->lock, flags);
 		if (rdma_cma_any_addr((struct sockaddr *)&cm_id->local_addr)) {
 			refcnt =
 			  rem_port_from_listenlist(cm_id->local_addr.sin_port);
 
 			if (refcnt == 0)
 				ret = iw_destroy_listen(cm_id);
 
 			cm_id->device->iwcm->destroy_listen_ep(cm_id);
 		} else {
 			ret = iw_destroy_listen(cm_id);
 			cm_id->device->iwcm->destroy_listen_ep(cm_id);
 		}
 		spin_lock_irqsave(&cm_id_priv->lock, flags);
 		break;
 	case IW_CM_STATE_ESTABLISHED:
 		cm_id_priv->state = IW_CM_STATE_DESTROYING;
 		spin_unlock_irqrestore(&cm_id_priv->lock, flags);
 		/* Abrupt close of the connection */
 		(void)iwcm_modify_qp_err(cm_id_priv->qp);
 		spin_lock_irqsave(&cm_id_priv->lock, flags);
 		break;
 	case IW_CM_STATE_IDLE:
 	case IW_CM_STATE_CLOSING:
 		cm_id_priv->state = IW_CM_STATE_DESTROYING;
 		break;
 	case IW_CM_STATE_CONN_RECV:
 		/*
 		 * App called destroy before/without calling accept after
 		 * receiving connection request event notification or
 		 * returned non zero from the event callback function.
 		 * In either case, must tell the provider to reject.
 		 */
 		cm_id_priv->state = IW_CM_STATE_DESTROYING;
 		spin_unlock_irqrestore(&cm_id_priv->lock, flags);
 		cm_id->device->iwcm->reject(cm_id, NULL, 0);
 		spin_lock_irqsave(&cm_id_priv->lock, flags);
 		break;
 	case IW_CM_STATE_CONN_SENT:
 	case IW_CM_STATE_DESTROYING:
 	default:
 		BUG();
 		break;
 	}
 	if (cm_id_priv->qp) {
 		cm_id_priv->id.device->iwcm->rem_ref(cm_id_priv->qp);
 		cm_id_priv->qp = NULL;
 	}
 	spin_unlock_irqrestore(&cm_id_priv->lock, flags);
 
 	(void)iwcm_deref_id(cm_id_priv);
 }
 
 /*
  * This function is only called by the application thread and cannot
  * be called by the event thread. The function will wait for all
  * references to be released on the cm_id and then kfree the cm_id
  * object.
  */
 void iw_destroy_cm_id(struct iw_cm_id *cm_id)
 {
 	struct iwcm_id_private *cm_id_priv;
 
 	cm_id_priv = container_of(cm_id, struct iwcm_id_private, id);
 	BUG_ON(test_bit(IWCM_F_CALLBACK_DESTROY, &cm_id_priv->flags));
 
 	destroy_cm_id(cm_id);
 
 	wait_for_completion(&cm_id_priv->destroy_comp);
 
 	if (cm_id->so)
 		sock_release(cm_id->so);
 
 	free_cm_id(cm_id_priv);
 }
 EXPORT_SYMBOL(iw_destroy_cm_id);
 
 /*
  * CM_ID <-- LISTEN
  *
  * Start listening for connect requests. Generates one CONNECT_REQUEST
  * event for each inbound connect request.
  */
 int iw_cm_listen(struct iw_cm_id *cm_id, int backlog)
 {
 	struct iwcm_id_private *cm_id_priv;
 	unsigned long flags;
 	int ret, refcnt;
 
 	cm_id_priv = container_of(cm_id, struct iwcm_id_private, id);
 
 	ret = alloc_work_entries(cm_id_priv, backlog);
 	if (ret)
 		return ret;
 
 	spin_lock_irqsave(&cm_id_priv->lock, flags);
 	switch (cm_id_priv->state) {
 	case IW_CM_STATE_IDLE:
 		cm_id_priv->state = IW_CM_STATE_LISTEN;
 		spin_unlock_irqrestore(&cm_id_priv->lock, flags);
 
 		if (rdma_cma_any_addr((struct sockaddr *)&cm_id->local_addr)) {
 			refcnt =
 			  add_port_to_listenlist(cm_id->local_addr.sin_port);
 
 			if (refcnt == 1) {
 				ret = iw_create_listen(cm_id, backlog);
 			} else if (refcnt <= 0) {
 				ret = -EINVAL;
 			} else {
 				/* if refcnt > 1, a socket listener created
 				 * already. And we need not create socket
 				 * listener on other rdma devices/listen cm_id's
 				 * due to TOE. That is when a socket listener is
 				 * created with INADDR_ANY all registered TOE
 				 * devices will get a call to start
 				 * hardware listeners.
 				 */
 			}
 		} else {
 			ret = iw_create_listen(cm_id, backlog);
 		}
 		if (!ret)
 			cm_id->device->iwcm->create_listen_ep(cm_id, backlog);
 		else
 			cm_id_priv->state = IW_CM_STATE_IDLE;
 
 		spin_lock_irqsave(&cm_id_priv->lock, flags);
 		break;
 	default:
 		ret = -EINVAL;
 	}
 	spin_unlock_irqrestore(&cm_id_priv->lock, flags);
 
 	return ret;
 }
 EXPORT_SYMBOL(iw_cm_listen);
 
 /*
  * CM_ID <-- IDLE
  *
  * Rejects an inbound connection request. No events are generated.
  */
 int iw_cm_reject(struct iw_cm_id *cm_id,
 		 const void *private_data,
 		 u8 private_data_len)
 {
 	struct iwcm_id_private *cm_id_priv;
 	unsigned long flags;
 	int ret;
 
 	cm_id_priv = container_of(cm_id, struct iwcm_id_private, id);
 	set_bit(IWCM_F_CONNECT_WAIT, &cm_id_priv->flags);
 
 	spin_lock_irqsave(&cm_id_priv->lock, flags);
 	if (cm_id_priv->state != IW_CM_STATE_CONN_RECV) {
 		spin_unlock_irqrestore(&cm_id_priv->lock, flags);
 		clear_bit(IWCM_F_CONNECT_WAIT, &cm_id_priv->flags);
 		wake_up_all(&cm_id_priv->connect_wait);
 		return -EINVAL;
 	}
 	cm_id_priv->state = IW_CM_STATE_IDLE;
 	spin_unlock_irqrestore(&cm_id_priv->lock, flags);
 
 	ret = cm_id->device->iwcm->reject(cm_id, private_data,
 					  private_data_len);
 
 	clear_bit(IWCM_F_CONNECT_WAIT, &cm_id_priv->flags);
 	wake_up_all(&cm_id_priv->connect_wait);
 
 	return ret;
 }
 EXPORT_SYMBOL(iw_cm_reject);
 
 /*
  * CM_ID <-- ESTABLISHED
  *
  * Accepts an inbound connection request and generates an ESTABLISHED
  * event. Callers of iw_cm_disconnect and iw_destroy_cm_id will block
  * until the ESTABLISHED event is received from the provider.
  */
 int iw_cm_accept(struct iw_cm_id *cm_id,
 		 struct iw_cm_conn_param *iw_param)
 {
 	struct iwcm_id_private *cm_id_priv;
 	struct ib_qp *qp;
 	unsigned long flags;
 	int ret;
 
 	cm_id_priv = container_of(cm_id, struct iwcm_id_private, id);
 	set_bit(IWCM_F_CONNECT_WAIT, &cm_id_priv->flags);
 
 	spin_lock_irqsave(&cm_id_priv->lock, flags);
 	if (cm_id_priv->state != IW_CM_STATE_CONN_RECV) {
 		spin_unlock_irqrestore(&cm_id_priv->lock, flags);
 		clear_bit(IWCM_F_CONNECT_WAIT, &cm_id_priv->flags);
 		wake_up_all(&cm_id_priv->connect_wait);
 		return -EINVAL;
 	}
 	/* Get the ib_qp given the QPN */
 	qp = cm_id->device->iwcm->get_qp(cm_id->device, iw_param->qpn);
 	if (!qp) {
 		spin_unlock_irqrestore(&cm_id_priv->lock, flags);
 		clear_bit(IWCM_F_CONNECT_WAIT, &cm_id_priv->flags);
 		wake_up_all(&cm_id_priv->connect_wait);
 		return -EINVAL;
 	}
 	cm_id->device->iwcm->add_ref(qp);
 	cm_id_priv->qp = qp;
 	spin_unlock_irqrestore(&cm_id_priv->lock, flags);
 
 	ret = cm_id->device->iwcm->accept(cm_id, iw_param);
 	if (ret) {
 		/* An error on accept precludes provider events */
 		BUG_ON(cm_id_priv->state != IW_CM_STATE_CONN_RECV);
 		cm_id_priv->state = IW_CM_STATE_IDLE;
 		spin_lock_irqsave(&cm_id_priv->lock, flags);
 		if (cm_id_priv->qp) {
 			cm_id->device->iwcm->rem_ref(qp);
 			cm_id_priv->qp = NULL;
 		}
 		spin_unlock_irqrestore(&cm_id_priv->lock, flags);
 		clear_bit(IWCM_F_CONNECT_WAIT, &cm_id_priv->flags);
 		wake_up_all(&cm_id_priv->connect_wait);
 	}
 
 	return ret;
 }
 EXPORT_SYMBOL(iw_cm_accept);
 
 /*
  * Active Side: CM_ID <-- CONN_SENT
  *
  * If successful, results in the generation of a CONNECT_REPLY
  * event. iw_cm_disconnect and iw_cm_destroy will block until the
  * CONNECT_REPLY event is received from the provider.
  */
 int iw_cm_connect(struct iw_cm_id *cm_id, struct iw_cm_conn_param *iw_param)
 {
 	struct iwcm_id_private *cm_id_priv;
 	int ret;
 	unsigned long flags;
 	struct ib_qp *qp;
 
 	cm_id_priv = container_of(cm_id, struct iwcm_id_private, id);
 
 	ret = alloc_work_entries(cm_id_priv, 4);
 	if (ret)
 		return ret;
 
 	set_bit(IWCM_F_CONNECT_WAIT, &cm_id_priv->flags);
 	spin_lock_irqsave(&cm_id_priv->lock, flags);
 
 	if (cm_id_priv->state != IW_CM_STATE_IDLE) {
 		spin_unlock_irqrestore(&cm_id_priv->lock, flags);
 		clear_bit(IWCM_F_CONNECT_WAIT, &cm_id_priv->flags);
 		wake_up_all(&cm_id_priv->connect_wait);
 		return -EINVAL;
 	}
 
 	/* Get the ib_qp given the QPN */
 	qp = cm_id->device->iwcm->get_qp(cm_id->device, iw_param->qpn);
 	if (!qp) {
 		spin_unlock_irqrestore(&cm_id_priv->lock, flags);
 		clear_bit(IWCM_F_CONNECT_WAIT, &cm_id_priv->flags);
 		wake_up_all(&cm_id_priv->connect_wait);
 		return -EINVAL;
 	}
 	cm_id->device->iwcm->add_ref(qp);
 	cm_id_priv->qp = qp;
 	cm_id_priv->state = IW_CM_STATE_CONN_SENT;
 	spin_unlock_irqrestore(&cm_id_priv->lock, flags);
 
 	ret = cm_id->device->iwcm->connect(cm_id, iw_param);
 	if (ret) {
 		spin_lock_irqsave(&cm_id_priv->lock, flags);
 		if (cm_id_priv->qp) {
 			cm_id->device->iwcm->rem_ref(qp);
 			cm_id_priv->qp = NULL;
 		}
 		spin_unlock_irqrestore(&cm_id_priv->lock, flags);
 		BUG_ON(cm_id_priv->state != IW_CM_STATE_CONN_SENT);
 		cm_id_priv->state = IW_CM_STATE_IDLE;
 		clear_bit(IWCM_F_CONNECT_WAIT, &cm_id_priv->flags);
 		wake_up_all(&cm_id_priv->connect_wait);
 	}
 
 	return ret;
 }
 EXPORT_SYMBOL(iw_cm_connect);
 
 /*
  * Passive Side: new CM_ID <-- CONN_RECV
  *
  * Handles an inbound connect request. The function creates a new
  * iw_cm_id to represent the new connection and inherits the client
  * callback function and other attributes from the listening parent.
  *
  * The work item contains a pointer to the listen_cm_id and the event. The
  * listen_cm_id contains the client cm_handler, context and
  * device. These are copied when the device is cloned. The event
  * contains the new four tuple.
  *
  * An error on the child should not affect the parent, so this
  * function does not return a value.
  */
 static void cm_conn_req_handler(struct iwcm_id_private *listen_id_priv,
 				struct iw_cm_event *iw_event)
 {
 	unsigned long flags;
 	struct iw_cm_id *cm_id;
 	struct iwcm_id_private *cm_id_priv;
 	int ret;
 
 	/*
 	 * The provider should never generate a connection request
 	 * event with a bad status.
 	 */
 	BUG_ON(iw_event->status);
 
 	cm_id = iw_create_cm_id(listen_id_priv->id.device,
 				iw_event->so,
 				listen_id_priv->id.cm_handler,
 				listen_id_priv->id.context);
 	/* If the cm_id could not be created, ignore the request */
 	if (IS_ERR(cm_id))
 		goto out;
 
 	cm_id->provider_data = iw_event->provider_data;
 	cm_id->local_addr = iw_event->local_addr;
 	cm_id->remote_addr = iw_event->remote_addr;
 
 	cm_id_priv = container_of(cm_id, struct iwcm_id_private, id);
 	cm_id_priv->state = IW_CM_STATE_CONN_RECV;
 
 	/*
 	 * We could be destroying the listening id. If so, ignore this
 	 * upcall.
 	 */
 	spin_lock_irqsave(&listen_id_priv->lock, flags);
 	if (listen_id_priv->state != IW_CM_STATE_LISTEN) {
 		spin_unlock_irqrestore(&listen_id_priv->lock, flags);
 		iw_cm_reject(cm_id, NULL, 0);
 		iw_destroy_cm_id(cm_id);
 		goto out;
 	}
 	spin_unlock_irqrestore(&listen_id_priv->lock, flags);
 
 	ret = alloc_work_entries(cm_id_priv, 3);
 	if (ret) {
 		iw_cm_reject(cm_id, NULL, 0);
 		iw_destroy_cm_id(cm_id);
 		goto out;
 	}
 
 	/* Call the client CM handler */
 	ret = cm_id->cm_handler(cm_id, iw_event);
 	if (ret) {
 		iw_cm_reject(cm_id, NULL, 0);
 		set_bit(IWCM_F_CALLBACK_DESTROY, &cm_id_priv->flags);
 		destroy_cm_id(cm_id);
 		if (atomic_read(&cm_id_priv->refcount)==0)
 			free_cm_id(cm_id_priv);
 	}
 
 out:
 	if (iw_event->private_data_len)
 		kfree(iw_event->private_data);
 }
 
 /*
  * Passive Side: CM_ID <-- ESTABLISHED
  *
  * The provider generated an ESTABLISHED event which means that
  * the MPA negotion has completed successfully and we are now in MPA
  * FPDU mode.
  *
  * This event can only be received in the CONN_RECV state. If the
  * remote peer closed, the ESTABLISHED event would be received followed
  * by the CLOSE event. If the app closes, it will block until we wake
  * it up after processing this event.
  */
 static int cm_conn_est_handler(struct iwcm_id_private *cm_id_priv,
 			       struct iw_cm_event *iw_event)
 {
 	unsigned long flags;
 	int ret;
 
 	spin_lock_irqsave(&cm_id_priv->lock, flags);
 
 	/*
 	 * We clear the CONNECT_WAIT bit here to allow the callback
 	 * function to call iw_cm_disconnect. Calling iw_destroy_cm_id
 	 * from a callback handler is not allowed.
 	 */
 	clear_bit(IWCM_F_CONNECT_WAIT, &cm_id_priv->flags);
 	BUG_ON(cm_id_priv->state != IW_CM_STATE_CONN_RECV);
 	cm_id_priv->state = IW_CM_STATE_ESTABLISHED;
 	spin_unlock_irqrestore(&cm_id_priv->lock, flags);
 	ret = cm_id_priv->id.cm_handler(&cm_id_priv->id, iw_event);
 	wake_up_all(&cm_id_priv->connect_wait);
 
 	return ret;
 }
 
 /*
  * Active Side: CM_ID <-- ESTABLISHED
  *
  * The app has called connect and is waiting for the established event to
  * post it's requests to the server. This event will wake up anyone
  * blocked in iw_cm_disconnect or iw_destroy_id.
  */
 static int cm_conn_rep_handler(struct iwcm_id_private *cm_id_priv,
 			       struct iw_cm_event *iw_event)
 {
 	unsigned long flags;
 	int ret;
 
 	spin_lock_irqsave(&cm_id_priv->lock, flags);
 	/*
 	 * Clear the connect wait bit so a callback function calling
 	 * iw_cm_disconnect will not wait and deadlock this thread
 	 */
 	clear_bit(IWCM_F_CONNECT_WAIT, &cm_id_priv->flags);
 	BUG_ON(cm_id_priv->state != IW_CM_STATE_CONN_SENT);
 	if (iw_event->status == 0) {
 		cm_id_priv->id.local_addr = iw_event->local_addr;
 		cm_id_priv->id.remote_addr = iw_event->remote_addr;
 		cm_id_priv->state = IW_CM_STATE_ESTABLISHED;
 	} else {
 		/* REJECTED or RESET */
 		cm_id_priv->id.device->iwcm->rem_ref(cm_id_priv->qp);
 		cm_id_priv->qp = NULL;
 		cm_id_priv->state = IW_CM_STATE_IDLE;
 	}
 	spin_unlock_irqrestore(&cm_id_priv->lock, flags);
 	ret = cm_id_priv->id.cm_handler(&cm_id_priv->id, iw_event);
 
 	if (iw_event->private_data_len)
 		kfree(iw_event->private_data);
 
 	/* Wake up waiters on connect complete */
 	wake_up_all(&cm_id_priv->connect_wait);
 
 	return ret;
 }
 
 /*
  * CM_ID <-- CLOSING
  *
  * If in the ESTABLISHED state, move to CLOSING.
  */
 static void cm_disconnect_handler(struct iwcm_id_private *cm_id_priv,
 				  struct iw_cm_event *iw_event)
 {
 	unsigned long flags;
 
 	spin_lock_irqsave(&cm_id_priv->lock, flags);
 	if (cm_id_priv->state == IW_CM_STATE_ESTABLISHED)
 		cm_id_priv->state = IW_CM_STATE_CLOSING;
 	spin_unlock_irqrestore(&cm_id_priv->lock, flags);
 }
 
 /*
  * CM_ID <-- IDLE
  *
  * If in the ESTBLISHED or CLOSING states, the QP will have have been
  * moved by the provider to the ERR state. Disassociate the CM_ID from
  * the QP,  move to IDLE, and remove the 'connected' reference.
  *
  * If in some other state, the cm_id was destroyed asynchronously.
  * This is the last reference that will result in waking up
  * the app thread blocked in iw_destroy_cm_id.
  */
 static int cm_close_handler(struct iwcm_id_private *cm_id_priv,
 				  struct iw_cm_event *iw_event)
 {
 	unsigned long flags;
 	int ret = 0;
 	spin_lock_irqsave(&cm_id_priv->lock, flags);
 
 	if (cm_id_priv->qp) {
 		cm_id_priv->id.device->iwcm->rem_ref(cm_id_priv->qp);
 		cm_id_priv->qp = NULL;
 	}
 	switch (cm_id_priv->state) {
 	case IW_CM_STATE_ESTABLISHED:
 	case IW_CM_STATE_CLOSING:
 		cm_id_priv->state = IW_CM_STATE_IDLE;
 		spin_unlock_irqrestore(&cm_id_priv->lock, flags);
 		ret = cm_id_priv->id.cm_handler(&cm_id_priv->id, iw_event);
 		spin_lock_irqsave(&cm_id_priv->lock, flags);
 		break;
 	case IW_CM_STATE_DESTROYING:
 		break;
 	default:
 		BUG();
 	}
 	spin_unlock_irqrestore(&cm_id_priv->lock, flags);
 
 	return ret;
 }
 
 static int process_event(struct iwcm_id_private *cm_id_priv,
 			 struct iw_cm_event *iw_event)
 {
 	int ret = 0;
 
 	switch (iw_event->event) {
 	case IW_CM_EVENT_CONNECT_REQUEST:
 		cm_conn_req_handler(cm_id_priv, iw_event);
 		break;
 	case IW_CM_EVENT_CONNECT_REPLY:
 		ret = cm_conn_rep_handler(cm_id_priv, iw_event);
 		break;
 	case IW_CM_EVENT_ESTABLISHED:
 		ret = cm_conn_est_handler(cm_id_priv, iw_event);
 		break;
 	case IW_CM_EVENT_DISCONNECT:
 		cm_disconnect_handler(cm_id_priv, iw_event);
 		break;
 	case IW_CM_EVENT_CLOSE:
 		ret = cm_close_handler(cm_id_priv, iw_event);
 		break;
 	default:
 		BUG();
 	}
 
 	return ret;
 }
 
 /*
  * Process events on the work_list for the cm_id. If the callback
  * function requests that the cm_id be deleted, a flag is set in the
  * cm_id flags to indicate that when the last reference is
  * removed, the cm_id is to be destroyed. This is necessary to
  * distinguish between an object that will be destroyed by the app
  * thread asleep on the destroy_comp list vs. an object destroyed
  * here synchronously when the last reference is removed.
  */
 static void cm_work_handler(struct work_struct *_work)
 {
 	struct iwcm_work *work = container_of(_work, struct iwcm_work, work);
 	struct iw_cm_event levent;
 	struct iwcm_id_private *cm_id_priv = work->cm_id;
 	unsigned long flags;
 	int empty;
 	int ret = 0;
 	int destroy_id;
 
 	spin_lock_irqsave(&cm_id_priv->lock, flags);
 	empty = list_empty(&cm_id_priv->work_list);
 	while (!empty) {
 		work = list_entry(cm_id_priv->work_list.next,
 				  struct iwcm_work, list);
 		list_del_init(&work->list);
 		empty = list_empty(&cm_id_priv->work_list);
 		levent = work->event;
 		put_work(work);
 		spin_unlock_irqrestore(&cm_id_priv->lock, flags);
 
 		ret = process_event(cm_id_priv, &levent);
 		if (ret) {
 			set_bit(IWCM_F_CALLBACK_DESTROY, &cm_id_priv->flags);
 			destroy_cm_id(&cm_id_priv->id);
 		}
 		BUG_ON(atomic_read(&cm_id_priv->refcount)==0);
 		destroy_id = test_bit(IWCM_F_CALLBACK_DESTROY, &cm_id_priv->flags);
 		if (iwcm_deref_id(cm_id_priv)) {
 			if (destroy_id) {
 				BUG_ON(!list_empty(&cm_id_priv->work_list));
 				free_cm_id(cm_id_priv);
 			}
 			return;
 		}
 		if (empty)
 			return;
 		spin_lock_irqsave(&cm_id_priv->lock, flags);
 	}
 	spin_unlock_irqrestore(&cm_id_priv->lock, flags);
 }
 
 /*
  * This function is called on interrupt context. Schedule events on
  * the iwcm_wq thread to allow callback functions to downcall into
  * the CM and/or block.  Events are queued to a per-CM_ID
  * work_list. If this is the first event on the work_list, the work
  * element is also queued on the iwcm_wq thread.
  *
  * Each event holds a reference on the cm_id. Until the last posted
  * event has been delivered and processed, the cm_id cannot be
  * deleted.
  *
  * Returns:
  * 	      0	- the event was handled.
  *	-ENOMEM	- the event was not handled due to lack of resources.
  */
 static int cm_event_handler(struct iw_cm_id *cm_id,
 			     struct iw_cm_event *iw_event)
 {
 	struct iwcm_work *work;
 	struct iwcm_id_private *cm_id_priv;
 	unsigned long flags;
 	int ret = 0;
 
 	cm_id_priv = container_of(cm_id, struct iwcm_id_private, id);
 
 	spin_lock_irqsave(&cm_id_priv->lock, flags);
 	work = get_work(cm_id_priv);
 	if (!work) {
 		ret = -ENOMEM;
 		goto out;
 	}
 
 	INIT_WORK(&work->work, cm_work_handler);
 	work->cm_id = cm_id_priv;
 	work->event = *iw_event;
 
 	if ((work->event.event == IW_CM_EVENT_CONNECT_REQUEST ||
 	     work->event.event == IW_CM_EVENT_CONNECT_REPLY) &&
 	    work->event.private_data_len) {
 		ret = copy_private_data(&work->event);
 		if (ret) {
 			put_work(work);
 			goto out;
 		}
 	}
 
 	atomic_inc(&cm_id_priv->refcount);
 	if (list_empty(&cm_id_priv->work_list)) {
 		list_add_tail(&work->list, &cm_id_priv->work_list);
 		queue_work(iwcm_wq, &work->work);
 	} else
 		list_add_tail(&work->list, &cm_id_priv->work_list);
 out:
 	spin_unlock_irqrestore(&cm_id_priv->lock, flags);
 	return ret;
 }
 
 static int iwcm_init_qp_init_attr(struct iwcm_id_private *cm_id_priv,
 				  struct ib_qp_attr *qp_attr,
 				  int *qp_attr_mask)
 {
 	unsigned long flags;
 	int ret;
 
 	spin_lock_irqsave(&cm_id_priv->lock, flags);
 	switch (cm_id_priv->state) {
 	case IW_CM_STATE_IDLE:
 	case IW_CM_STATE_CONN_SENT:
 	case IW_CM_STATE_CONN_RECV:
 	case IW_CM_STATE_ESTABLISHED:
 		*qp_attr_mask = IB_QP_STATE | IB_QP_ACCESS_FLAGS;
 		qp_attr->qp_access_flags = IB_ACCESS_REMOTE_WRITE|
 					   IB_ACCESS_REMOTE_READ;
 		ret = 0;
 		break;
 	default:
 		ret = -EINVAL;
 		break;
 	}
 	spin_unlock_irqrestore(&cm_id_priv->lock, flags);
 	return ret;
 }
 
 static int iwcm_init_qp_rts_attr(struct iwcm_id_private *cm_id_priv,
 				  struct ib_qp_attr *qp_attr,
 				  int *qp_attr_mask)
 {
 	unsigned long flags;
 	int ret;
 
 	spin_lock_irqsave(&cm_id_priv->lock, flags);
 	switch (cm_id_priv->state) {
 	case IW_CM_STATE_IDLE:
 	case IW_CM_STATE_CONN_SENT:
 	case IW_CM_STATE_CONN_RECV:
 	case IW_CM_STATE_ESTABLISHED:
 		*qp_attr_mask = 0;
 		ret = 0;
 		break;
 	default:
 		ret = -EINVAL;
 		break;
 	}
 	spin_unlock_irqrestore(&cm_id_priv->lock, flags);
 	return ret;
 }
 
 int iw_cm_init_qp_attr(struct iw_cm_id *cm_id,
 		       struct ib_qp_attr *qp_attr,
 		       int *qp_attr_mask)
 {
 	struct iwcm_id_private *cm_id_priv;
 	int ret;
 
 	cm_id_priv = container_of(cm_id, struct iwcm_id_private, id);
 	switch (qp_attr->qp_state) {
 	case IB_QPS_INIT:
 	case IB_QPS_RTR:
 		ret = iwcm_init_qp_init_attr(cm_id_priv,
 					     qp_attr, qp_attr_mask);
 		break;
 	case IB_QPS_RTS:
 		ret = iwcm_init_qp_rts_attr(cm_id_priv,
 					    qp_attr, qp_attr_mask);
 		break;
 	default:
 		ret = -EINVAL;
 		break;
 	}
 	return ret;
 }
 EXPORT_SYMBOL(iw_cm_init_qp_attr);
 
 static int __init iw_cm_init(void)
 {
 	iwcm_wq = create_singlethread_workqueue("iw_cm_wq");
 	if (!iwcm_wq)
 		return -ENOMEM;
 
 	return 0;
 }
 
 static void __exit iw_cm_cleanup(void)
 {
 	destroy_workqueue(iwcm_wq);
 }
 
 module_init(iw_cm_init);
 module_exit(iw_cm_cleanup);
Index: head/sys/ofed/drivers/infiniband/ulp/sdp/sdp_main.c
===================================================================
--- head/sys/ofed/drivers/infiniband/ulp/sdp/sdp_main.c	(revision 319721)
+++ head/sys/ofed/drivers/infiniband/ulp/sdp/sdp_main.c	(revision 319722)
@@ -1,1973 +1,1972 @@
 
 /*-
  * Copyright (c) 1982, 1986, 1988, 1990, 1993, 1995
  *      The Regents of the University of California.  All rights reserved.
  * Copyright (c) 2004 The FreeBSD Foundation.  All rights reserved.
  * Copyright (c) 2004-2008 Robert N. M. Watson.  All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 3. Neither the name of the University nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  * Excerpts taken from tcp_subr.c, tcp_usrreq.c, uipc_socket.c
  */
 
 /*
  *
  * Copyright (c) 2010 Isilon Systems, Inc.
  * Copyright (c) 2010 iX Systems, Inc.
  * Copyright (c) 2010 Panasas, Inc.
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice unmodified, this list of conditions, and the following
  *    disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  *
  */
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include <sys/param.h>
 #include <sys/kernel.h>
 #include <sys/malloc.h>
 
 #include "sdp.h"
 
 #include <net/if.h>
 #include <net/route.h>
 #include <net/vnet.h>
 #include <sys/sysctl.h>
 
 uma_zone_t	sdp_zone;
 struct rwlock	sdp_lock;
 LIST_HEAD(, sdp_sock) sdp_list;
 
 struct workqueue_struct *rx_comp_wq;
 
 RW_SYSINIT(sdplockinit, &sdp_lock, "SDP lock");
 #define	SDP_LIST_WLOCK()	rw_wlock(&sdp_lock)
 #define	SDP_LIST_RLOCK()	rw_rlock(&sdp_lock)
 #define	SDP_LIST_WUNLOCK()	rw_wunlock(&sdp_lock)
 #define	SDP_LIST_RUNLOCK()	rw_runlock(&sdp_lock)
 #define	SDP_LIST_WLOCK_ASSERT()	rw_assert(&sdp_lock, RW_WLOCKED)
 #define	SDP_LIST_RLOCK_ASSERT()	rw_assert(&sdp_lock, RW_RLOCKED)
 #define	SDP_LIST_LOCK_ASSERT()	rw_assert(&sdp_lock, RW_LOCKED)
 
 MALLOC_DEFINE(M_SDP, "sdp", "Sockets Direct Protocol");
 
 static void sdp_stop_keepalive_timer(struct socket *so);
 
 /*
  * SDP protocol interface to socket abstraction.
  */
 /*
  * sdp_sendspace and sdp_recvspace are the default send and receive window
  * sizes, respectively.
  */
 u_long	sdp_sendspace = 1024*32;
 u_long	sdp_recvspace = 1024*64;
 
 static int sdp_count;
 
 /*
  * Disable async. CMA events for sockets which are being torn down.
  */
 static void
 sdp_destroy_cma(struct sdp_sock *ssk)
 {
 
 	if (ssk->id == NULL)
 		return;
 	rdma_destroy_id(ssk->id);
 	ssk->id = NULL;
 }
 
 static int
 sdp_pcbbind(struct sdp_sock *ssk, struct sockaddr *nam, struct ucred *cred)
 {
 	struct sockaddr_in *sin;
 	struct sockaddr_in null;
 	int error;
 
 	SDP_WLOCK_ASSERT(ssk);
 
 	if (ssk->lport != 0 || ssk->laddr != INADDR_ANY)
 		return (EINVAL);
 	/* rdma_bind_addr handles bind races.  */
 	SDP_WUNLOCK(ssk);
 	if (ssk->id == NULL)
 		ssk->id = rdma_create_id(sdp_cma_handler, ssk, RDMA_PS_SDP, IB_QPT_RC);
 	if (ssk->id == NULL) {
 		SDP_WLOCK(ssk);
 		return (ENOMEM);
 	}
 	if (nam == NULL) {
 		null.sin_family = AF_INET;
 		null.sin_len = sizeof(null);
 		null.sin_addr.s_addr = INADDR_ANY;
 		null.sin_port = 0;
 		bzero(&null.sin_zero, sizeof(null.sin_zero));
 		nam = (struct sockaddr *)&null;
 	}
 	error = -rdma_bind_addr(ssk->id, nam);
 	SDP_WLOCK(ssk);
 	if (error == 0) {
 		sin = (struct sockaddr_in *)&ssk->id->route.addr.src_addr;
 		ssk->laddr = sin->sin_addr.s_addr;
 		ssk->lport = sin->sin_port;
 	} else
 		sdp_destroy_cma(ssk);
 	return (error);
 }
 
 static void
 sdp_pcbfree(struct sdp_sock *ssk)
 {
 
 	KASSERT(ssk->socket == NULL, ("ssk %p socket still attached", ssk));
 	KASSERT((ssk->flags & SDP_DESTROY) == 0,
 	    ("ssk %p already destroyed", ssk));
 
 	sdp_dbg(ssk->socket, "Freeing pcb");
 	SDP_WLOCK_ASSERT(ssk);
 	ssk->flags |= SDP_DESTROY;
 	SDP_WUNLOCK(ssk);
 	SDP_LIST_WLOCK();
 	sdp_count--;
 	LIST_REMOVE(ssk, list);
 	SDP_LIST_WUNLOCK();
 	crfree(ssk->cred);
 	ssk->qp_active = 0;
 	if (ssk->qp) {
 		ib_destroy_qp(ssk->qp);
 		ssk->qp = NULL;
 	}
 	sdp_tx_ring_destroy(ssk);
 	sdp_rx_ring_destroy(ssk);
 	sdp_destroy_cma(ssk);
 	rw_destroy(&ssk->rx_ring.destroyed_lock);
 	rw_destroy(&ssk->lock);
 	uma_zfree(sdp_zone, ssk);
 }
 
 /*
  * Common routines to return a socket address.
  */
 static struct sockaddr *
 sdp_sockaddr(in_port_t port, struct in_addr *addr_p)
 {
 	struct sockaddr_in *sin;
 
 	sin = malloc(sizeof *sin, M_SONAME,
 		M_WAITOK | M_ZERO);
 	sin->sin_family = AF_INET;
 	sin->sin_len = sizeof(*sin);
 	sin->sin_addr = *addr_p;
 	sin->sin_port = port;
 
 	return (struct sockaddr *)sin;
 }
 
 static int
 sdp_getsockaddr(struct socket *so, struct sockaddr **nam)
 {
 	struct sdp_sock *ssk;
 	struct in_addr addr;
 	in_port_t port;
 
 	ssk = sdp_sk(so);
 	SDP_RLOCK(ssk);
 	port = ssk->lport;
 	addr.s_addr = ssk->laddr;
 	SDP_RUNLOCK(ssk);
 
 	*nam = sdp_sockaddr(port, &addr);
 	return 0;
 }
 
 static int
 sdp_getpeeraddr(struct socket *so, struct sockaddr **nam)
 {
 	struct sdp_sock *ssk;
 	struct in_addr addr;
 	in_port_t port;
 
 	ssk = sdp_sk(so);
 	SDP_RLOCK(ssk);
 	port = ssk->fport;
 	addr.s_addr = ssk->faddr;
 	SDP_RUNLOCK(ssk);
 
 	*nam = sdp_sockaddr(port, &addr);
 	return 0;
 }
 
 static void
 sdp_pcbnotifyall(struct in_addr faddr, int errno,
     struct sdp_sock *(*notify)(struct sdp_sock *, int))
 {
 	struct sdp_sock *ssk, *ssk_temp;
 
 	SDP_LIST_WLOCK();
 	LIST_FOREACH_SAFE(ssk, &sdp_list, list, ssk_temp) {
 		SDP_WLOCK(ssk);
 		if (ssk->faddr != faddr.s_addr || ssk->socket == NULL) {
 			SDP_WUNLOCK(ssk);
 			continue;
 		}
 		if ((ssk->flags & SDP_DESTROY) == 0)
 			if ((*notify)(ssk, errno))
 				SDP_WUNLOCK(ssk);
 	}
 	SDP_LIST_WUNLOCK();
 }
 
 #if 0
 static void
 sdp_apply_all(void (*func)(struct sdp_sock *, void *), void *arg)
 {
 	struct sdp_sock *ssk;
 
 	SDP_LIST_RLOCK();
 	LIST_FOREACH(ssk, &sdp_list, list) {
 		SDP_WLOCK(ssk);
 		func(ssk, arg);
 		SDP_WUNLOCK(ssk);
 	}
 	SDP_LIST_RUNLOCK();
 }
 #endif
 
 static void
 sdp_output_reset(struct sdp_sock *ssk)
 {
 	struct rdma_cm_id *id;
 
 	SDP_WLOCK_ASSERT(ssk);
 	if (ssk->id) {
 		id = ssk->id;
 		ssk->qp_active = 0;
 		SDP_WUNLOCK(ssk);
 		rdma_disconnect(id);
 		SDP_WLOCK(ssk);
 	}
 	ssk->state = TCPS_CLOSED;
 }
 
 /*
  * Attempt to close a SDP socket, marking it as dropped, and freeing
  * the socket if we hold the only reference.
  */
 static struct sdp_sock *
 sdp_closed(struct sdp_sock *ssk)
 {
 	struct socket *so;
 
 	SDP_WLOCK_ASSERT(ssk);
 
 	ssk->flags |= SDP_DROPPED;
 	so = ssk->socket;
 	soisdisconnected(so);
 	if (ssk->flags & SDP_SOCKREF) {
 		KASSERT(so->so_state & SS_PROTOREF,
 		    ("sdp_closed: !SS_PROTOREF"));
 		ssk->flags &= ~SDP_SOCKREF;
 		SDP_WUNLOCK(ssk);
-		ACCEPT_LOCK();
 		SOCK_LOCK(so);
 		so->so_state &= ~SS_PROTOREF;
 		sofree(so);
 		return (NULL);
 	}
 	return (ssk);
 }
 
 /*
  * Perform timer based shutdowns which can not operate in
  * callout context.
  */
 static void
 sdp_shutdown_task(void *data, int pending)
 {
 	struct sdp_sock *ssk;
 
 	ssk = data;
 	SDP_WLOCK(ssk);
 	/*
 	 * I don't think this can race with another call to pcbfree()
 	 * because SDP_TIMEWAIT protects it.  SDP_DESTROY may be redundant.
 	 */
 	if (ssk->flags & SDP_DESTROY)
 		panic("sdp_shutdown_task: Racing with pcbfree for ssk %p",
 		    ssk);
 	if (ssk->flags & SDP_DISCON)
 		sdp_output_reset(ssk);
 	/* We have to clear this so sdp_detach() will call pcbfree(). */
 	ssk->flags &= ~(SDP_TIMEWAIT | SDP_DREQWAIT);
 	if ((ssk->flags & SDP_DROPPED) == 0 &&
 	    sdp_closed(ssk) == NULL)
 		return;
 	if (ssk->socket == NULL) {
 		sdp_pcbfree(ssk);
 		return;
 	}
 	SDP_WUNLOCK(ssk);
 }
 
 /*
  * 2msl has expired, schedule the shutdown task.
  */
 static void
 sdp_2msl_timeout(void *data)
 {
 	struct sdp_sock *ssk;
 
 	ssk = data;
 	/* Callout canceled. */
         if (!callout_active(&ssk->keep2msl))
 		goto out;
         callout_deactivate(&ssk->keep2msl);
 	/* Should be impossible, defensive programming. */
 	if ((ssk->flags & SDP_TIMEWAIT) == 0)
 		goto out;
 	taskqueue_enqueue(taskqueue_thread, &ssk->shutdown_task);
 out:
 	SDP_WUNLOCK(ssk);
 	return;
 }
 
 /*
  * Schedule the 2msl wait timer.
  */
 static void
 sdp_2msl_wait(struct sdp_sock *ssk)
 {
 
 	SDP_WLOCK_ASSERT(ssk);
 	ssk->flags |= SDP_TIMEWAIT;
 	ssk->state = TCPS_TIME_WAIT;
 	soisdisconnected(ssk->socket);
 	callout_reset(&ssk->keep2msl, TCPTV_MSL, sdp_2msl_timeout, ssk);
 }
 
 /*
  * Timed out waiting for the final fin/ack from rdma_disconnect().
  */
 static void
 sdp_dreq_timeout(void *data)
 {
 	struct sdp_sock *ssk;
 
 	ssk = data;
 	/* Callout canceled. */
         if (!callout_active(&ssk->keep2msl))
 		goto out;
 	/* Callout rescheduled, probably as a different timer. */
 	if (callout_pending(&ssk->keep2msl))
 		goto out;
         callout_deactivate(&ssk->keep2msl);
 	if (ssk->state != TCPS_FIN_WAIT_1 && ssk->state != TCPS_LAST_ACK)
 		goto out;
 	if ((ssk->flags & SDP_DREQWAIT) == 0)
 		goto out;
 	ssk->flags &= ~SDP_DREQWAIT;
 	ssk->flags |= SDP_DISCON;
 	sdp_2msl_wait(ssk);
 	ssk->qp_active = 0;
 out:
 	SDP_WUNLOCK(ssk);
 }
 
 /*
  * Received the final fin/ack.  Cancel the 2msl.
  */
 void
 sdp_cancel_dreq_wait_timeout(struct sdp_sock *ssk)
 {
 	sdp_dbg(ssk->socket, "cancelling dreq wait timeout\n");
 	ssk->flags &= ~SDP_DREQWAIT;
 	sdp_2msl_wait(ssk);
 }
 
 static int
 sdp_init_sock(struct socket *sk)
 {
 	struct sdp_sock *ssk = sdp_sk(sk);
 
 	sdp_dbg(sk, "%s\n", __func__);
 
 	callout_init_rw(&ssk->keep2msl, &ssk->lock, CALLOUT_RETURNUNLOCKED);
 	TASK_INIT(&ssk->shutdown_task, 0, sdp_shutdown_task, ssk);
 #ifdef SDP_ZCOPY
 	INIT_DELAYED_WORK(&ssk->srcavail_cancel_work, srcavail_cancel_timeout);
 	ssk->zcopy_thresh = -1; /* use global sdp_zcopy_thresh */
 	ssk->tx_ring.rdma_inflight = NULL;
 #endif
 	atomic_set(&ssk->mseq_ack, 0);
 	sdp_rx_ring_init(ssk);
 	ssk->tx_ring.buffer = NULL;
 
 	return 0;
 }
 
 /*
  * Allocate an sdp_sock for the socket and reserve socket buffer space.
  */
 static int
 sdp_attach(struct socket *so, int proto, struct thread *td)
 {
 	struct sdp_sock *ssk;
 	int error;
 
 	ssk = sdp_sk(so);
 	KASSERT(ssk == NULL, ("sdp_attach: ssk already set on so %p", so));
 	if (so->so_snd.sb_hiwat == 0 || so->so_rcv.sb_hiwat == 0) {
 		error = soreserve(so, sdp_sendspace, sdp_recvspace);
 		if (error)
 			return (error);
 	}
 	so->so_rcv.sb_flags |= SB_AUTOSIZE;
 	so->so_snd.sb_flags |= SB_AUTOSIZE;
 	ssk = uma_zalloc(sdp_zone, M_NOWAIT | M_ZERO);
 	if (ssk == NULL)
 		return (ENOBUFS);
 	rw_init(&ssk->lock, "sdpsock");
 	ssk->socket = so;
 	ssk->cred = crhold(so->so_cred);
 	so->so_pcb = (caddr_t)ssk;
 	sdp_init_sock(so);
 	ssk->flags = 0;
 	ssk->qp_active = 0;
 	ssk->state = TCPS_CLOSED;
 	mbufq_init(&ssk->rxctlq, INT_MAX);
 	SDP_LIST_WLOCK();
 	LIST_INSERT_HEAD(&sdp_list, ssk, list);
 	sdp_count++;
 	SDP_LIST_WUNLOCK();
 	if ((so->so_options & SO_LINGER) && so->so_linger == 0)
 		so->so_linger = TCP_LINGERTIME;
 
 	return (0);
 }
 
 /*
  * Detach SDP from the socket, potentially leaving it around for the
  * timewait to expire.
  */
 static void
 sdp_detach(struct socket *so)
 {
 	struct sdp_sock *ssk;
 
 	ssk = sdp_sk(so);
 	SDP_WLOCK(ssk);
 	KASSERT(ssk->socket != NULL, ("sdp_detach: socket is NULL"));
 	ssk->socket->so_pcb = NULL;
 	ssk->socket = NULL;
 	if (ssk->flags & (SDP_TIMEWAIT | SDP_DREQWAIT))
 		SDP_WUNLOCK(ssk);
 	else if (ssk->flags & SDP_DROPPED || ssk->state < TCPS_SYN_SENT)
 		sdp_pcbfree(ssk);
 	else
 		panic("sdp_detach: Unexpected state, ssk %p.\n", ssk);
 }
 
 /*
  * Allocate a local address for the socket.
  */
 static int
 sdp_bind(struct socket *so, struct sockaddr *nam, struct thread *td)
 {
 	int error = 0;
 	struct sdp_sock *ssk;
 	struct sockaddr_in *sin;
 
 	sin = (struct sockaddr_in *)nam;
 	if (nam->sa_len != sizeof (*sin))
 		return (EINVAL);
 	if (sin->sin_family != AF_INET)
 		return (EINVAL);
 	if (IN_MULTICAST(ntohl(sin->sin_addr.s_addr)))
 		return (EAFNOSUPPORT);
 
 	ssk = sdp_sk(so);
 	SDP_WLOCK(ssk);
 	if (ssk->flags & (SDP_TIMEWAIT | SDP_DROPPED)) {
 		error = EINVAL;
 		goto out;
 	}
 	error = sdp_pcbbind(ssk, nam, td->td_ucred);
 out:
 	SDP_WUNLOCK(ssk);
 
 	return (error);
 }
 
 /*
  * Prepare to accept connections.
  */
 static int
 sdp_listen(struct socket *so, int backlog, struct thread *td)
 {
 	int error = 0;
 	struct sdp_sock *ssk;
 
 	ssk = sdp_sk(so);
 	SDP_WLOCK(ssk);
 	if (ssk->flags & (SDP_TIMEWAIT | SDP_DROPPED)) {
 		error = EINVAL;
 		goto out;
 	}
 	if (error == 0 && ssk->lport == 0)
 		error = sdp_pcbbind(ssk, (struct sockaddr *)0, td->td_ucred);
 	SOCK_LOCK(so);
 	if (error == 0)
 		error = solisten_proto_check(so);
 	if (error == 0) {
 		solisten_proto(so, backlog);
 		ssk->state = TCPS_LISTEN;
 	}
 	SOCK_UNLOCK(so);
 
 out:
 	SDP_WUNLOCK(ssk);
 	if (error == 0)
 		error = -rdma_listen(ssk->id, backlog);
 	return (error);
 }
 
 /*
  * Initiate a SDP connection to nam.
  */
 static int
 sdp_start_connect(struct sdp_sock *ssk, struct sockaddr *nam, struct thread *td)
 {
 	struct sockaddr_in src;
 	struct socket *so;
 	int error;
 
 	so = ssk->socket;
 
 	SDP_WLOCK_ASSERT(ssk);
 	if (ssk->lport == 0) {
 		error = sdp_pcbbind(ssk, (struct sockaddr *)0, td->td_ucred);
 		if (error)
 			return error;
 	}
 	src.sin_family = AF_INET;
 	src.sin_len = sizeof(src);
 	bzero(&src.sin_zero, sizeof(src.sin_zero));
 	src.sin_port = ssk->lport;
 	src.sin_addr.s_addr = ssk->laddr;
 	soisconnecting(so);
 	SDP_WUNLOCK(ssk);
 	error = -rdma_resolve_addr(ssk->id, (struct sockaddr *)&src, nam,
 	    SDP_RESOLVE_TIMEOUT);
 	SDP_WLOCK(ssk);
 	if (error == 0)
 		ssk->state = TCPS_SYN_SENT;
 
 	return 0;
 }
 
 /*
  * Initiate SDP connection.
  */
 static int
 sdp_connect(struct socket *so, struct sockaddr *nam, struct thread *td)
 {
 	int error = 0;
 	struct sdp_sock *ssk;
 	struct sockaddr_in *sin;
 
 	sin = (struct sockaddr_in *)nam;
 	if (nam->sa_len != sizeof (*sin))
 		return (EINVAL);
 	if (sin->sin_family != AF_INET)
 		return (EINVAL);
 	if (IN_MULTICAST(ntohl(sin->sin_addr.s_addr)))
 		return (EAFNOSUPPORT);
 	if ((error = prison_remote_ip4(td->td_ucred, &sin->sin_addr)) != 0)
 		return (error);
 	ssk = sdp_sk(so);
 	SDP_WLOCK(ssk);
 	if (ssk->flags & (SDP_TIMEWAIT | SDP_DROPPED))
 		error = EINVAL;
 	else
 		error = sdp_start_connect(ssk, nam, td);
 	SDP_WUNLOCK(ssk);
 	return (error);
 }
 
 /*
  * Drop a SDP socket, reporting
  * the specified error.  If connection is synchronized,
  * then send a RST to peer.
  */
 static struct sdp_sock *
 sdp_drop(struct sdp_sock *ssk, int errno)
 {
 	struct socket *so;
 
 	SDP_WLOCK_ASSERT(ssk);
 	so = ssk->socket;
 	if (TCPS_HAVERCVDSYN(ssk->state))
 		sdp_output_reset(ssk);
 	if (errno == ETIMEDOUT && ssk->softerror)
 		errno = ssk->softerror;
 	so->so_error = errno;
 	return (sdp_closed(ssk));
 }
 
 /*
  * User issued close, and wish to trail through shutdown states:
  * if never received SYN, just forget it.  If got a SYN from peer,
  * but haven't sent FIN, then go to FIN_WAIT_1 state to send peer a FIN.
  * If already got a FIN from peer, then almost done; go to LAST_ACK
  * state.  In all other cases, have already sent FIN to peer (e.g.
  * after PRU_SHUTDOWN), and just have to play tedious game waiting
  * for peer to send FIN or not respond to keep-alives, etc.
  * We can let the user exit from the close as soon as the FIN is acked.
  */
 static void
 sdp_usrclosed(struct sdp_sock *ssk)
 {
 
 	SDP_WLOCK_ASSERT(ssk);
 
 	switch (ssk->state) {
 	case TCPS_LISTEN:
 		ssk->state = TCPS_CLOSED;
 		SDP_WUNLOCK(ssk);
 		sdp_destroy_cma(ssk);
 		SDP_WLOCK(ssk);
 		/* FALLTHROUGH */
 	case TCPS_CLOSED:
 		ssk = sdp_closed(ssk);
 		/*
 		 * sdp_closed() should never return NULL here as the socket is
 		 * still open.
 		 */
 		KASSERT(ssk != NULL,
 		    ("sdp_usrclosed: sdp_closed() returned NULL"));
 		break;
 
 	case TCPS_SYN_SENT:
 		/* FALLTHROUGH */
 	case TCPS_SYN_RECEIVED:
 		ssk->flags |= SDP_NEEDFIN;
 		break;
 
 	case TCPS_ESTABLISHED:
 		ssk->flags |= SDP_NEEDFIN;
 		ssk->state = TCPS_FIN_WAIT_1;
 		break;
 
 	case TCPS_CLOSE_WAIT:
 		ssk->state = TCPS_LAST_ACK;
 		break;
 	}
 	if (ssk->state >= TCPS_FIN_WAIT_2) {
 		/* Prevent the connection hanging in FIN_WAIT_2 forever. */
 		if (ssk->state == TCPS_FIN_WAIT_2)
 			sdp_2msl_wait(ssk);
 		else
 			soisdisconnected(ssk->socket);
 	}
 }
 
 static void
 sdp_output_disconnect(struct sdp_sock *ssk)
 {
 
 	SDP_WLOCK_ASSERT(ssk);
 	callout_reset(&ssk->keep2msl, SDP_FIN_WAIT_TIMEOUT,
 	    sdp_dreq_timeout, ssk);
 	ssk->flags |= SDP_NEEDFIN | SDP_DREQWAIT;
 	sdp_post_sends(ssk, M_NOWAIT);
 }
 
 /*
  * Initiate or continue a disconnect.
  * If embryonic state, just send reset (once).
  * If in ``let data drain'' option and linger null, just drop.
  * Otherwise (hard), mark socket disconnecting and drop
  * current input data; switch states based on user close, and
  * send segment to peer (with FIN).
  */
 static void
 sdp_start_disconnect(struct sdp_sock *ssk)
 {
 	struct socket *so;
 	int unread;
 
 	so = ssk->socket;
 	SDP_WLOCK_ASSERT(ssk);
 	sdp_stop_keepalive_timer(so);
 	/*
 	 * Neither sdp_closed() nor sdp_drop() should return NULL, as the
 	 * socket is still open.
 	 */
 	if (ssk->state < TCPS_ESTABLISHED) {
 		ssk = sdp_closed(ssk);
 		KASSERT(ssk != NULL,
 		    ("sdp_start_disconnect: sdp_close() returned NULL"));
 	} else if ((so->so_options & SO_LINGER) && so->so_linger == 0) {
 		ssk = sdp_drop(ssk, 0);
 		KASSERT(ssk != NULL,
 		    ("sdp_start_disconnect: sdp_drop() returned NULL"));
 	} else {
 		soisdisconnecting(so);
 		unread = sbused(&so->so_rcv);
 		sbflush(&so->so_rcv);
 		sdp_usrclosed(ssk);
 		if (!(ssk->flags & SDP_DROPPED)) {
 			if (unread)
 				sdp_output_reset(ssk);
 			else
 				sdp_output_disconnect(ssk);
 		}
 	}
 }
 
 /*
  * User initiated disconnect.
  */
 static int
 sdp_disconnect(struct socket *so)
 {
 	struct sdp_sock *ssk;
 	int error = 0;
 
 	ssk = sdp_sk(so);
 	SDP_WLOCK(ssk);
 	if (ssk->flags & (SDP_TIMEWAIT | SDP_DROPPED)) {
 		error = ECONNRESET;
 		goto out;
 	}
 	sdp_start_disconnect(ssk);
 out:
 	SDP_WUNLOCK(ssk);
 	return (error);
 }
 
 /*
  * Accept a connection.  Essentially all the work is done at higher levels;
  * just return the address of the peer, storing through addr.
  *
  *
  * XXX This is broken XXX
  * 
  * The rationale for acquiring the sdp lock here is somewhat complicated,
  * and is described in detail in the commit log entry for r175612.  Acquiring
  * it delays an accept(2) racing with sonewconn(), which inserts the socket
  * before the address/port fields are initialized.  A better fix would
  * prevent the socket from being placed in the listen queue until all fields
  * are fully initialized.
  */
 static int
 sdp_accept(struct socket *so, struct sockaddr **nam)
 {
 	struct sdp_sock *ssk = NULL;
 	struct in_addr addr;
 	in_port_t port;
 	int error;
 
 	if (so->so_state & SS_ISDISCONNECTED)
 		return (ECONNABORTED);
 
 	port = 0;
 	addr.s_addr = 0;
 	error = 0;
 	ssk = sdp_sk(so);
 	SDP_WLOCK(ssk);
 	if (ssk->flags & (SDP_TIMEWAIT | SDP_DROPPED)) {
 		error = ECONNABORTED;
 		goto out;
 	}
 	port = ssk->fport;
 	addr.s_addr = ssk->faddr;
 out:
 	SDP_WUNLOCK(ssk);
 	if (error == 0)
 		*nam = sdp_sockaddr(port, &addr);
 	return error;
 }
 
 /*
  * Mark the connection as being incapable of further output.
  */
 static int
 sdp_shutdown(struct socket *so)
 {
 	int error = 0;
 	struct sdp_sock *ssk;
 
 	ssk = sdp_sk(so);
 	SDP_WLOCK(ssk);
 	if (ssk->flags & (SDP_TIMEWAIT | SDP_DROPPED)) {
 		error = ECONNRESET;
 		goto out;
 	}
 	socantsendmore(so);
 	sdp_usrclosed(ssk);
 	if (!(ssk->flags & SDP_DROPPED))
 		sdp_output_disconnect(ssk);
 
 out:
 	SDP_WUNLOCK(ssk);
 
 	return (error);
 }
 
 static void
 sdp_append(struct sdp_sock *ssk, struct sockbuf *sb, struct mbuf *mb, int cnt)
 {
 	struct mbuf *n;
 	int ncnt;
 
 	SOCKBUF_LOCK_ASSERT(sb);
 	SBLASTRECORDCHK(sb);
 	KASSERT(mb->m_flags & M_PKTHDR,
 		("sdp_append: %p Missing packet header.\n", mb));
 	n = sb->sb_lastrecord;
 	/*
 	 * If the queue is empty just set all pointers and proceed.
 	 */
 	if (n == NULL) {
 		sb->sb_lastrecord = sb->sb_mb = sb->sb_sndptr = mb;
 		for (; mb; mb = mb->m_next) {
 	                sb->sb_mbtail = mb;
 			sballoc(sb, mb);
 		}
 		return;
 	}
 	/*
 	 * Count the number of mbufs in the current tail.
 	 */
 	for (ncnt = 0; n->m_next; n = n->m_next)
 		ncnt++;
 	n = sb->sb_lastrecord;
 	/*
 	 * If the two chains can fit in a single sdp packet and
 	 * the last record has not been sent yet (WRITABLE) coalesce
 	 * them.  The lastrecord remains the same but we must strip the
 	 * packet header and then let sbcompress do the hard part.
 	 */
 	if (M_WRITABLE(n) && ncnt + cnt < SDP_MAX_SEND_SGES &&
 	    n->m_pkthdr.len + mb->m_pkthdr.len - SDP_HEAD_SIZE <
 	    ssk->xmit_size_goal) {
 		m_adj(mb, SDP_HEAD_SIZE);
 		n->m_pkthdr.len += mb->m_pkthdr.len;
 		n->m_flags |= mb->m_flags & (M_PUSH | M_URG);
 		m_demote(mb, 1, 0);
 		sbcompress(sb, mb, sb->sb_mbtail);
 		return;
 	}
 	/*
 	 * Not compressible, just append to the end and adjust counters.
 	 */
 	sb->sb_lastrecord->m_flags |= M_PUSH;
 	sb->sb_lastrecord->m_nextpkt = mb;
 	sb->sb_lastrecord = mb;
 	if (sb->sb_sndptr == NULL)
 		sb->sb_sndptr = mb;
 	for (; mb; mb = mb->m_next) {
 		sb->sb_mbtail = mb;
 		sballoc(sb, mb);
 	}
 }
 
 /*
  * Do a send by putting data in output queue and updating urgent
  * marker if URG set.  Possibly send more data.  Unlike the other
  * pru_*() routines, the mbuf chains are our responsibility.  We
  * must either enqueue them or free them.  The other pru_* routines
  * generally are caller-frees.
  *
  * This comes from sendfile, normal sends will come from sdp_sosend().
  */
 static int
 sdp_send(struct socket *so, int flags, struct mbuf *m,
     struct sockaddr *nam, struct mbuf *control, struct thread *td)
 {
 	struct sdp_sock *ssk;
 	struct mbuf *n;
 	int error;
 	int cnt;
 
 	error = 0;
 	ssk = sdp_sk(so);
 	KASSERT(m->m_flags & M_PKTHDR,
 	    ("sdp_send: %p no packet header", m));
 	M_PREPEND(m, SDP_HEAD_SIZE, M_WAITOK);
 	mtod(m, struct sdp_bsdh *)->mid = SDP_MID_DATA; 
 	for (n = m, cnt = 0; n->m_next; n = n->m_next)
 		cnt++;
 	if (cnt > SDP_MAX_SEND_SGES) {
 		n = m_collapse(m, M_WAITOK, SDP_MAX_SEND_SGES);
 		if (n == NULL) {
 			m_freem(m);
 			return (EMSGSIZE);
 		}
 		m = n;
 		for (cnt = 0; n->m_next; n = n->m_next)
 			cnt++;
 	}
 	SDP_WLOCK(ssk);
 	if (ssk->flags & (SDP_TIMEWAIT | SDP_DROPPED)) {
 		if (control)
 			m_freem(control);
 		if (m)
 			m_freem(m);
 		error = ECONNRESET;
 		goto out;
 	}
 	if (control) {
 		/* SDP doesn't support control messages. */
 		if (control->m_len) {
 			m_freem(control);
 			if (m)
 				m_freem(m);
 			error = EINVAL;
 			goto out;
 		}
 		m_freem(control);	/* empty control, just free it */
 	}
 	if (!(flags & PRUS_OOB)) {
 		SOCKBUF_LOCK(&so->so_snd);
 		sdp_append(ssk, &so->so_snd, m, cnt);
 		SOCKBUF_UNLOCK(&so->so_snd);
 		if (nam && ssk->state < TCPS_SYN_SENT) {
 			/*
 			 * Do implied connect if not yet connected.
 			 */
 			error = sdp_start_connect(ssk, nam, td);
 			if (error)
 				goto out;
 		}
 		if (flags & PRUS_EOF) {
 			/*
 			 * Close the send side of the connection after
 			 * the data is sent.
 			 */
 			socantsendmore(so);
 			sdp_usrclosed(ssk);
 			if (!(ssk->flags & SDP_DROPPED))
 				sdp_output_disconnect(ssk);
 		} else if (!(ssk->flags & SDP_DROPPED) &&
 		    !(flags & PRUS_MORETOCOME))
 			sdp_post_sends(ssk, M_NOWAIT);
 		SDP_WUNLOCK(ssk);
 		return (0);
 	} else {
 		SOCKBUF_LOCK(&so->so_snd);
 		if (sbspace(&so->so_snd) < -512) {
 			SOCKBUF_UNLOCK(&so->so_snd);
 			m_freem(m);
 			error = ENOBUFS;
 			goto out;
 		}
 		/*
 		 * According to RFC961 (Assigned Protocols),
 		 * the urgent pointer points to the last octet
 		 * of urgent data.  We continue, however,
 		 * to consider it to indicate the first octet
 		 * of data past the urgent section.
 		 * Otherwise, snd_up should be one lower.
 		 */
 		m->m_flags |= M_URG | M_PUSH;
 		sdp_append(ssk, &so->so_snd, m, cnt);
 		SOCKBUF_UNLOCK(&so->so_snd);
 		if (nam && ssk->state < TCPS_SYN_SENT) {
 			/*
 			 * Do implied connect if not yet connected.
 			 */
 			error = sdp_start_connect(ssk, nam, td);
 			if (error)
 				goto out;
 		}
 		sdp_post_sends(ssk, M_NOWAIT);
 		SDP_WUNLOCK(ssk);
 		return (0);
 	}
 out:
 	SDP_WUNLOCK(ssk);
 	return (error);
 }
 
 #define	SBLOCKWAIT(f)	(((f) & MSG_DONTWAIT) ? 0 : SBL_WAIT)
 
 /*
  * Send on a socket.  If send must go all at once and message is larger than
  * send buffering, then hard error.  Lock against other senders.  If must go
  * all at once and not enough room now, then inform user that this would
  * block and do nothing.  Otherwise, if nonblocking, send as much as
  * possible.  The data to be sent is described by "uio" if nonzero, otherwise
  * by the mbuf chain "top" (which must be null if uio is not).  Data provided
  * in mbuf chain must be small enough to send all at once.
  *
  * Returns nonzero on error, timeout or signal; callers must check for short
  * counts if EINTR/ERESTART are returned.  Data and control buffers are freed
  * on return.
  */
 static int
 sdp_sosend(struct socket *so, struct sockaddr *addr, struct uio *uio,
     struct mbuf *top, struct mbuf *control, int flags, struct thread *td)
 {
 	struct sdp_sock *ssk;
 	long space, resid;
 	int atomic;
 	int error;
 	int copy;
 
 	if (uio != NULL)
 		resid = uio->uio_resid;
 	else
 		resid = top->m_pkthdr.len;
 	atomic = top != NULL;
 	if (control != NULL) {
 		if (control->m_len) {
 			m_freem(control);
 			if (top)
 				m_freem(top);
 			return (EINVAL);
 		}
 		m_freem(control);
 		control = NULL;
 	}
 	/*
 	 * In theory resid should be unsigned.  However, space must be
 	 * signed, as it might be less than 0 if we over-committed, and we
 	 * must use a signed comparison of space and resid.  On the other
 	 * hand, a negative resid causes us to loop sending 0-length
 	 * segments to the protocol.
 	 *
 	 * Also check to make sure that MSG_EOR isn't used on SOCK_STREAM
 	 * type sockets since that's an error.
 	 */
 	if (resid < 0 || (so->so_type == SOCK_STREAM && (flags & MSG_EOR))) {
 		error = EINVAL;
 		goto out;
 	}
 	if (td != NULL)
 		td->td_ru.ru_msgsnd++;
 
 	ssk = sdp_sk(so);
 	error = sblock(&so->so_snd, SBLOCKWAIT(flags));
 	if (error)
 		goto out;
 
 restart:
 	do {
 		SOCKBUF_LOCK(&so->so_snd);
 		if (so->so_snd.sb_state & SBS_CANTSENDMORE) {
 			SOCKBUF_UNLOCK(&so->so_snd);
 			error = EPIPE;
 			goto release;
 		}
 		if (so->so_error) {
 			error = so->so_error;
 			so->so_error = 0;
 			SOCKBUF_UNLOCK(&so->so_snd);
 			goto release;
 		}
 		if ((so->so_state & SS_ISCONNECTED) == 0 && addr == NULL) {
 			SOCKBUF_UNLOCK(&so->so_snd);
 			error = ENOTCONN;
 			goto release;
 		}
 		space = sbspace(&so->so_snd);
 		if (flags & MSG_OOB)
 			space += 1024;
 		if (atomic && resid > ssk->xmit_size_goal - SDP_HEAD_SIZE) {
 			SOCKBUF_UNLOCK(&so->so_snd);
 			error = EMSGSIZE;
 			goto release;
 		}
 		if (space < resid &&
 		    (atomic || space < so->so_snd.sb_lowat)) {
 			if ((so->so_state & SS_NBIO) || (flags & MSG_NBIO)) {
 				SOCKBUF_UNLOCK(&so->so_snd);
 				error = EWOULDBLOCK;
 				goto release;
 			}
 			error = sbwait(&so->so_snd);
 			SOCKBUF_UNLOCK(&so->so_snd);
 			if (error)
 				goto release;
 			goto restart;
 		}
 		SOCKBUF_UNLOCK(&so->so_snd);
 		do {
 			if (uio == NULL) {
 				resid = 0;
 				if (flags & MSG_EOR)
 					top->m_flags |= M_EOR;
 			} else {
 				/*
 				 * Copy the data from userland into a mbuf
 				 * chain.  If no data is to be copied in,
 				 * a single empty mbuf is returned.
 				 */
 				copy = min(space,
 				    ssk->xmit_size_goal - SDP_HEAD_SIZE);
 				top = m_uiotombuf(uio, M_WAITOK, copy,
 				    0, M_PKTHDR |
 				    ((flags & MSG_EOR) ? M_EOR : 0));
 				if (top == NULL) {
 					/* only possible error */
 					error = EFAULT;
 					goto release;
 				}
 				space -= resid - uio->uio_resid;
 				resid = uio->uio_resid;
 			}
 			/*
 			 * XXX all the SBS_CANTSENDMORE checks previously
 			 * done could be out of date after dropping the
 			 * socket lock.
 			 */
 			error = sdp_send(so, (flags & MSG_OOB) ? PRUS_OOB :
 			/*
 			 * Set EOF on the last send if the user specified
 			 * MSG_EOF.
 			 */
 			    ((flags & MSG_EOF) && (resid <= 0)) ? PRUS_EOF :
 			/* If there is more to send set PRUS_MORETOCOME. */
 			    (resid > 0 && space > 0) ? PRUS_MORETOCOME : 0,
 			    top, addr, NULL, td);
 			top = NULL;
 			if (error)
 				goto release;
 		} while (resid && space > 0);
 	} while (resid);
 
 release:
 	sbunlock(&so->so_snd);
 out:
 	if (top != NULL)
 		m_freem(top);
 	return (error);
 }
 
 /*
  * The part of soreceive() that implements reading non-inline out-of-band
  * data from a socket.  For more complete comments, see soreceive(), from
  * which this code originated.
  *
  * Note that soreceive_rcvoob(), unlike the remainder of soreceive(), is
  * unable to return an mbuf chain to the caller.
  */
 static int
 soreceive_rcvoob(struct socket *so, struct uio *uio, int flags)
 {
 	struct protosw *pr = so->so_proto;
 	struct mbuf *m;
 	int error;
 
 	KASSERT(flags & MSG_OOB, ("soreceive_rcvoob: (flags & MSG_OOB) == 0"));
 
 	m = m_get(M_WAITOK, MT_DATA);
 	error = (*pr->pr_usrreqs->pru_rcvoob)(so, m, flags & MSG_PEEK);
 	if (error)
 		goto bad;
 	do {
 		error = uiomove(mtod(m, void *),
 		    (int) min(uio->uio_resid, m->m_len), uio);
 		m = m_free(m);
 	} while (uio->uio_resid && error == 0 && m);
 bad:
 	if (m != NULL)
 		m_freem(m);
 	return (error);
 }
 
 /*
  * Optimized version of soreceive() for stream (TCP) sockets.
  */
 static int
 sdp_sorecv(struct socket *so, struct sockaddr **psa, struct uio *uio,
     struct mbuf **mp0, struct mbuf **controlp, int *flagsp)
 {
 	int len = 0, error = 0, flags, oresid;
 	struct sockbuf *sb;
 	struct mbuf *m, *n = NULL;
 	struct sdp_sock *ssk;
 
 	/* We only do stream sockets. */
 	if (so->so_type != SOCK_STREAM)
 		return (EINVAL);
 	if (psa != NULL)
 		*psa = NULL;
 	if (controlp != NULL)
 		return (EINVAL);
 	if (flagsp != NULL)
 		flags = *flagsp &~ MSG_EOR;
 	else
 		flags = 0;
 	if (flags & MSG_OOB)
 		return (soreceive_rcvoob(so, uio, flags));
 	if (mp0 != NULL)
 		*mp0 = NULL;
 
 	sb = &so->so_rcv;
 	ssk = sdp_sk(so);
 
 	/* Prevent other readers from entering the socket. */
 	error = sblock(sb, SBLOCKWAIT(flags));
 	if (error)
 		goto out;
 	SOCKBUF_LOCK(sb);
 
 	/* Easy one, no space to copyout anything. */
 	if (uio->uio_resid == 0) {
 		error = EINVAL;
 		goto out;
 	}
 	oresid = uio->uio_resid;
 
 	/* We will never ever get anything unless we are connected. */
 	if (!(so->so_state & (SS_ISCONNECTED|SS_ISDISCONNECTED))) {
 		/* When disconnecting there may be still some data left. */
 		if (sbavail(sb))
 			goto deliver;
 		if (!(so->so_state & SS_ISDISCONNECTED))
 			error = ENOTCONN;
 		goto out;
 	}
 
 	/* Socket buffer is empty and we shall not block. */
 	if (sbavail(sb) == 0 &&
 	    ((so->so_state & SS_NBIO) || (flags & (MSG_DONTWAIT|MSG_NBIO)))) {
 		error = EAGAIN;
 		goto out;
 	}
 
 restart:
 	SOCKBUF_LOCK_ASSERT(&so->so_rcv);
 
 	/* Abort if socket has reported problems. */
 	if (so->so_error) {
 		if (sbavail(sb))
 			goto deliver;
 		if (oresid > uio->uio_resid)
 			goto out;
 		error = so->so_error;
 		if (!(flags & MSG_PEEK))
 			so->so_error = 0;
 		goto out;
 	}
 
 	/* Door is closed.  Deliver what is left, if any. */
 	if (sb->sb_state & SBS_CANTRCVMORE) {
 		if (sbavail(sb))
 			goto deliver;
 		else
 			goto out;
 	}
 
 	/* Socket buffer got some data that we shall deliver now. */
 	if (sbavail(sb) && !(flags & MSG_WAITALL) &&
 	    ((so->so_state & SS_NBIO) ||
 	     (flags & (MSG_DONTWAIT|MSG_NBIO)) ||
 	     sbavail(sb) >= sb->sb_lowat ||
 	     sbavail(sb) >= uio->uio_resid ||
 	     sbavail(sb) >= sb->sb_hiwat) ) {
 		goto deliver;
 	}
 
 	/* On MSG_WAITALL we must wait until all data or error arrives. */
 	if ((flags & MSG_WAITALL) &&
 	    (sbavail(sb) >= uio->uio_resid || sbavail(sb) >= sb->sb_lowat))
 		goto deliver;
 
 	/*
 	 * Wait and block until (more) data comes in.
 	 * NB: Drops the sockbuf lock during wait.
 	 */
 	error = sbwait(sb);
 	if (error)
 		goto out;
 	goto restart;
 
 deliver:
 	SOCKBUF_LOCK_ASSERT(&so->so_rcv);
 	KASSERT(sbavail(sb), ("%s: sockbuf empty", __func__));
 	KASSERT(sb->sb_mb != NULL, ("%s: sb_mb == NULL", __func__));
 
 	/* Statistics. */
 	if (uio->uio_td)
 		uio->uio_td->td_ru.ru_msgrcv++;
 
 	/* Fill uio until full or current end of socket buffer is reached. */
 	len = min(uio->uio_resid, sbavail(sb));
 	if (mp0 != NULL) {
 		/* Dequeue as many mbufs as possible. */
 		if (!(flags & MSG_PEEK) && len >= sb->sb_mb->m_len) {
 			for (*mp0 = m = sb->sb_mb;
 			     m != NULL && m->m_len <= len;
 			     m = m->m_next) {
 				len -= m->m_len;
 				uio->uio_resid -= m->m_len;
 				sbfree(sb, m);
 				n = m;
 			}
 			sb->sb_mb = m;
 			if (sb->sb_mb == NULL)
 				SB_EMPTY_FIXUP(sb);
 			n->m_next = NULL;
 		}
 		/* Copy the remainder. */
 		if (len > 0) {
 			KASSERT(sb->sb_mb != NULL,
 			    ("%s: len > 0 && sb->sb_mb empty", __func__));
 
 			m = m_copym(sb->sb_mb, 0, len, M_NOWAIT);
 			if (m == NULL)
 				len = 0;	/* Don't flush data from sockbuf. */
 			else
 				uio->uio_resid -= m->m_len;
 			if (*mp0 != NULL)
 				n->m_next = m;
 			else
 				*mp0 = m;
 			if (*mp0 == NULL) {
 				error = ENOBUFS;
 				goto out;
 			}
 		}
 	} else {
 		/* NB: Must unlock socket buffer as uiomove may sleep. */
 		SOCKBUF_UNLOCK(sb);
 		error = m_mbuftouio(uio, sb->sb_mb, len);
 		SOCKBUF_LOCK(sb);
 		if (error)
 			goto out;
 	}
 	SBLASTRECORDCHK(sb);
 	SBLASTMBUFCHK(sb);
 
 	/*
 	 * Remove the delivered data from the socket buffer unless we
 	 * were only peeking.
 	 */
 	if (!(flags & MSG_PEEK)) {
 		if (len > 0)
 			sbdrop_locked(sb, len);
 
 		/* Notify protocol that we drained some data. */
 		SOCKBUF_UNLOCK(sb);
 		SDP_WLOCK(ssk);
 		sdp_do_posts(ssk);
 		SDP_WUNLOCK(ssk);
 		SOCKBUF_LOCK(sb);
 	}
 
 	/*
 	 * For MSG_WAITALL we may have to loop again and wait for
 	 * more data to come in.
 	 */
 	if ((flags & MSG_WAITALL) && uio->uio_resid > 0)
 		goto restart;
 out:
 	SOCKBUF_LOCK_ASSERT(sb);
 	SBLASTRECORDCHK(sb);
 	SBLASTMBUFCHK(sb);
 	SOCKBUF_UNLOCK(sb);
 	sbunlock(sb);
 	return (error);
 }
 
 /*
  * Abort is used to teardown a connection typically while sitting in
  * the accept queue.
  */
 void
 sdp_abort(struct socket *so)
 {
 	struct sdp_sock *ssk;
 
 	ssk = sdp_sk(so);
 	SDP_WLOCK(ssk);
 	/*
 	 * If we have not yet dropped, do it now.
 	 */
 	if (!(ssk->flags & SDP_TIMEWAIT) &&
 	    !(ssk->flags & SDP_DROPPED))
 		sdp_drop(ssk, ECONNABORTED);
 	KASSERT(ssk->flags & SDP_DROPPED, ("sdp_abort: %p not dropped 0x%X",
 	    ssk, ssk->flags));
 	SDP_WUNLOCK(ssk);
 }
 
 /*
  * Close a SDP socket and initiate a friendly disconnect.
  */
 static void
 sdp_close(struct socket *so)
 {
 	struct sdp_sock *ssk;
 
 	ssk = sdp_sk(so);
 	SDP_WLOCK(ssk);
 	/*
 	 * If we have not yet dropped, do it now.
 	 */
 	if (!(ssk->flags & SDP_TIMEWAIT) &&
 	    !(ssk->flags & SDP_DROPPED)) 
 		sdp_start_disconnect(ssk);
 
 	/*
 	 * If we've still not dropped let the socket layer know we're
 	 * holding on to the socket and pcb for a while.
 	 */
 	if (!(ssk->flags & SDP_DROPPED)) {
 		SOCK_LOCK(so);
 		so->so_state |= SS_PROTOREF;
 		SOCK_UNLOCK(so);
 		ssk->flags |= SDP_SOCKREF;
 	}
 	SDP_WUNLOCK(ssk);
 }
 
 /*
  * User requests out-of-band data.
  */
 static int
 sdp_rcvoob(struct socket *so, struct mbuf *m, int flags)
 {
 	int error = 0;
 	struct sdp_sock *ssk;
 
 	ssk = sdp_sk(so);
 	SDP_WLOCK(ssk);
 	if (!rx_ring_trylock(&ssk->rx_ring)) {
 		SDP_WUNLOCK(ssk);
 		return (ECONNRESET);
 	}
 	if (ssk->flags & (SDP_TIMEWAIT | SDP_DROPPED)) {
 		error = ECONNRESET;
 		goto out;
 	}
 	if ((so->so_oobmark == 0 &&
 	     (so->so_rcv.sb_state & SBS_RCVATMARK) == 0) ||
 	    so->so_options & SO_OOBINLINE ||
 	    ssk->oobflags & SDP_HADOOB) {
 		error = EINVAL;
 		goto out;
 	}
 	if ((ssk->oobflags & SDP_HAVEOOB) == 0) {
 		error = EWOULDBLOCK;
 		goto out;
 	}
 	m->m_len = 1;
 	*mtod(m, caddr_t) = ssk->iobc;
 	if ((flags & MSG_PEEK) == 0)
 		ssk->oobflags ^= (SDP_HAVEOOB | SDP_HADOOB);
 out:
 	rx_ring_unlock(&ssk->rx_ring);
 	SDP_WUNLOCK(ssk);
 	return (error);
 }
 
 void
 sdp_urg(struct sdp_sock *ssk, struct mbuf *mb)
 {
 	struct mbuf *m;
 	struct socket *so;
 
 	so = ssk->socket;
 	if (so == NULL)
 		return;
 
 	so->so_oobmark = sbused(&so->so_rcv) + mb->m_pkthdr.len - 1;
 	sohasoutofband(so);
 	ssk->oobflags &= ~(SDP_HAVEOOB | SDP_HADOOB);
 	if (!(so->so_options & SO_OOBINLINE)) {
 		for (m = mb; m->m_next != NULL; m = m->m_next);
 		ssk->iobc = *(mtod(m, char *) + m->m_len - 1);
 		ssk->oobflags |= SDP_HAVEOOB;
 		m->m_len--;
 		mb->m_pkthdr.len--;
 	}
 }
 
 /*
  * Notify a sdp socket of an asynchronous error.
  *
  * Do not wake up user since there currently is no mechanism for
  * reporting soft errors (yet - a kqueue filter may be added).
  */
 struct sdp_sock *
 sdp_notify(struct sdp_sock *ssk, int error)
 {
 
 	SDP_WLOCK_ASSERT(ssk);
 
 	if ((ssk->flags & SDP_TIMEWAIT) ||
 	    (ssk->flags & SDP_DROPPED))
 		return (ssk);
 
 	/*
 	 * Ignore some errors if we are hooked up.
 	 */
 	if (ssk->state == TCPS_ESTABLISHED &&
 	    (error == EHOSTUNREACH || error == ENETUNREACH ||
 	     error == EHOSTDOWN))
 		return (ssk);
 	ssk->softerror = error;
 	return sdp_drop(ssk, error);
 }
 
 static void
 sdp_ctlinput(int cmd, struct sockaddr *sa, void *vip)
 {
 	struct in_addr faddr;
 
 	faddr = ((struct sockaddr_in *)sa)->sin_addr;
 	if (sa->sa_family != AF_INET || faddr.s_addr == INADDR_ANY)
 		return;
 
 	sdp_pcbnotifyall(faddr, inetctlerrmap[cmd], sdp_notify);
 }
 
 static int
 sdp_control(struct socket *so, u_long cmd, caddr_t data, struct ifnet *ifp,
     struct thread *td)
 {
 	return (EOPNOTSUPP);
 }
 
 static void
 sdp_keepalive_timeout(void *data)
 {
 	struct sdp_sock *ssk;
 
 	ssk = data;
 	/* Callout canceled. */
         if (!callout_active(&ssk->keep2msl))
                 return;
 	/* Callout rescheduled as a different kind of timer. */
 	if (callout_pending(&ssk->keep2msl))
 		goto out;
         callout_deactivate(&ssk->keep2msl);
 	if (ssk->flags & SDP_DROPPED ||
 	    (ssk->socket->so_options & SO_KEEPALIVE) == 0)
 		goto out;
 	sdp_post_keepalive(ssk);
 	callout_reset(&ssk->keep2msl, SDP_KEEPALIVE_TIME,
 	    sdp_keepalive_timeout, ssk);
 out:
 	SDP_WUNLOCK(ssk);
 }
 
 
 void
 sdp_start_keepalive_timer(struct socket *so)
 {
 	struct sdp_sock *ssk;
 
 	ssk = sdp_sk(so);
 	if (!callout_pending(&ssk->keep2msl))
                 callout_reset(&ssk->keep2msl, SDP_KEEPALIVE_TIME,
                     sdp_keepalive_timeout, ssk);
 }
 
 static void
 sdp_stop_keepalive_timer(struct socket *so)
 {
 	struct sdp_sock *ssk;
 
 	ssk = sdp_sk(so);
 	callout_stop(&ssk->keep2msl);
 }
 
 /*
  * sdp_ctloutput() must drop the inpcb lock before performing copyin on
  * socket option arguments.  When it re-acquires the lock after the copy, it
  * has to revalidate that the connection is still valid for the socket
  * option.
  */
 #define SDP_WLOCK_RECHECK(inp) do {					\
 	SDP_WLOCK(ssk);							\
 	if (ssk->flags & (SDP_TIMEWAIT | SDP_DROPPED)) {		\
 		SDP_WUNLOCK(ssk);					\
 		return (ECONNRESET);					\
 	}								\
 } while(0)
 
 static int
 sdp_ctloutput(struct socket *so, struct sockopt *sopt)
 {
 	int	error, opt, optval;
 	struct sdp_sock *ssk;
 
 	error = 0;
 	ssk = sdp_sk(so);
 	if (sopt->sopt_level == SOL_SOCKET && sopt->sopt_name == SO_KEEPALIVE) {
 		SDP_WLOCK(ssk);
 		if (so->so_options & SO_KEEPALIVE)
 			sdp_start_keepalive_timer(so);
 		else
 			sdp_stop_keepalive_timer(so);
 		SDP_WUNLOCK(ssk);
 	}
 	if (sopt->sopt_level != IPPROTO_TCP)
 		return (error);
 
 	SDP_WLOCK(ssk);
 	if (ssk->flags & (SDP_TIMEWAIT | SDP_DROPPED)) {
 		SDP_WUNLOCK(ssk);
 		return (ECONNRESET);
 	}
 
 	switch (sopt->sopt_dir) {
 	case SOPT_SET:
 		switch (sopt->sopt_name) {
 		case TCP_NODELAY:
 			SDP_WUNLOCK(ssk);
 			error = sooptcopyin(sopt, &optval, sizeof optval,
 			    sizeof optval);
 			if (error)
 				return (error);
 
 			SDP_WLOCK_RECHECK(ssk);
 			opt = SDP_NODELAY;
 			if (optval)
 				ssk->flags |= opt;
 			else
 				ssk->flags &= ~opt;
 			sdp_do_posts(ssk);
 			SDP_WUNLOCK(ssk);
 			break;
 
 		default:
 			SDP_WUNLOCK(ssk);
 			error = ENOPROTOOPT;
 			break;
 		}
 		break;
 
 	case SOPT_GET:
 		switch (sopt->sopt_name) {
 		case TCP_NODELAY:
 			optval = ssk->flags & SDP_NODELAY;
 			SDP_WUNLOCK(ssk);
 			error = sooptcopyout(sopt, &optval, sizeof optval);
 			break;
 		default:
 			SDP_WUNLOCK(ssk);
 			error = ENOPROTOOPT;
 			break;
 		}
 		break;
 	}
 	return (error);
 }
 #undef SDP_WLOCK_RECHECK
 
 int sdp_mod_count = 0;
 int sdp_mod_usec = 0;
 
 void
 sdp_set_default_moderation(struct sdp_sock *ssk)
 {
 	struct ib_cq_attr attr;
 	if (sdp_mod_count <= 0 || sdp_mod_usec <= 0)
 		return;
 	memset(&attr, 0, sizeof(attr));
 	attr.moderation.cq_count = sdp_mod_count;
 	attr.moderation.cq_period = sdp_mod_usec;
 
 	ib_modify_cq(ssk->rx_ring.cq, &attr, IB_CQ_MODERATION);
 }
 
 static void
 sdp_dev_add(struct ib_device *device)
 {
 	struct ib_fmr_pool_param param;
 	struct sdp_device *sdp_dev;
 
 	sdp_dev = malloc(sizeof(*sdp_dev), M_SDP, M_WAITOK | M_ZERO);
 	sdp_dev->pd = ib_alloc_pd(device);
 	if (IS_ERR(sdp_dev->pd))
 		goto out_pd;
         sdp_dev->mr = ib_get_dma_mr(sdp_dev->pd, IB_ACCESS_LOCAL_WRITE);
         if (IS_ERR(sdp_dev->mr))
 		goto out_mr;
 	memset(&param, 0, sizeof param);
 	param.max_pages_per_fmr = SDP_FMR_SIZE;
 	param.page_shift = PAGE_SHIFT;
 	param.access = (IB_ACCESS_LOCAL_WRITE | IB_ACCESS_REMOTE_READ);
 	param.pool_size = SDP_FMR_POOL_SIZE;
 	param.dirty_watermark = SDP_FMR_DIRTY_SIZE;
 	param.cache = 1;
 	sdp_dev->fmr_pool = ib_create_fmr_pool(sdp_dev->pd, &param);
 	if (IS_ERR(sdp_dev->fmr_pool))
 		goto out_fmr;
 	ib_set_client_data(device, &sdp_client, sdp_dev);
 	return;
 
 out_fmr:
 	ib_dereg_mr(sdp_dev->mr);
 out_mr:
 	ib_dealloc_pd(sdp_dev->pd);
 out_pd:
 	free(sdp_dev, M_SDP);
 }
 
 static void
 sdp_dev_rem(struct ib_device *device)
 {
 	struct sdp_device *sdp_dev;
 	struct sdp_sock *ssk;
 
 	SDP_LIST_WLOCK();
 	LIST_FOREACH(ssk, &sdp_list, list) {
 		if (ssk->ib_device != device)
 			continue;
 		SDP_WLOCK(ssk);
 		if ((ssk->flags & SDP_DESTROY) == 0)
 			ssk = sdp_notify(ssk, ECONNRESET);
 		if (ssk)
 			SDP_WUNLOCK(ssk);
 	}
 	SDP_LIST_WUNLOCK();
 	/*
 	 * XXX Do I need to wait between these two?
 	 */
 	sdp_dev = ib_get_client_data(device, &sdp_client);
 	if (!sdp_dev)
 		return;
 	ib_flush_fmr_pool(sdp_dev->fmr_pool);
 	ib_destroy_fmr_pool(sdp_dev->fmr_pool);
 	ib_dereg_mr(sdp_dev->mr);
 	ib_dealloc_pd(sdp_dev->pd);
 	free(sdp_dev, M_SDP);
 }
 
 struct ib_client sdp_client =
     { .name = "sdp", .add = sdp_dev_add, .remove = sdp_dev_rem };
 
 
 static int
 sdp_pcblist(SYSCTL_HANDLER_ARGS)
 {
 	int error, n, i;
 	struct sdp_sock *ssk;
 	struct xinpgen xig;
 
 	/*
 	 * The process of preparing the TCB list is too time-consuming and
 	 * resource-intensive to repeat twice on every request.
 	 */
 	if (req->oldptr == NULL) {
 		n = sdp_count;
 		n += imax(n / 8, 10);
 		req->oldidx = 2 * (sizeof xig) + n * sizeof(struct xtcpcb);
 		return (0);
 	}
 
 	if (req->newptr != NULL)
 		return (EPERM);
 
 	/*
 	 * OK, now we're committed to doing something.
 	 */
 	SDP_LIST_RLOCK();
 	n = sdp_count;
 	SDP_LIST_RUNLOCK();
 
 	error = sysctl_wire_old_buffer(req, 2 * (sizeof xig)
 		+ n * sizeof(struct xtcpcb));
 	if (error != 0)
 		return (error);
 
 	xig.xig_len = sizeof xig;
 	xig.xig_count = n;
 	xig.xig_gen = 0;
 	xig.xig_sogen = so_gencnt;
 	error = SYSCTL_OUT(req, &xig, sizeof xig);
 	if (error)
 		return (error);
 
 	SDP_LIST_RLOCK();
 	for (ssk = LIST_FIRST(&sdp_list), i = 0;
 	    ssk != NULL && i < n; ssk = LIST_NEXT(ssk, list)) {
 		struct xtcpcb xt;
 
 		SDP_RLOCK(ssk);
 		if (ssk->flags & SDP_TIMEWAIT) {
 			if (ssk->cred != NULL)
 				error = cr_cansee(req->td->td_ucred,
 				    ssk->cred);
 			else
 				error = EINVAL;	/* Skip this inp. */
 		} else if (ssk->socket)
 			error = cr_canseesocket(req->td->td_ucred,
 			    ssk->socket);
 		else
 			error = EINVAL;
 		if (error) {
 			error = 0;
 			goto next;
 		}
 
 		bzero(&xt, sizeof(xt));
 		xt.xt_len = sizeof xt;
 		xt.xt_inp.inp_gencnt = 0;
 		xt.xt_inp.inp_vflag = INP_IPV4;
 		memcpy(&xt.xt_inp.inp_laddr, &ssk->laddr, sizeof(ssk->laddr));
 		xt.xt_inp.inp_lport = ssk->lport;
 		memcpy(&xt.xt_inp.inp_faddr, &ssk->faddr, sizeof(ssk->faddr));
 		xt.xt_inp.inp_fport = ssk->fport;
 		xt.t_state = ssk->state;
 		if (ssk->socket != NULL)
 			sotoxsocket(ssk->socket, &xt.xt_inp.xi_socket);
 		xt.xt_inp.xi_socket.xso_protocol = IPPROTO_TCP;
 		SDP_RUNLOCK(ssk);
 		error = SYSCTL_OUT(req, &xt, sizeof xt);
 		if (error)
 			break;
 		i++;
 		continue;
 next:
 		SDP_RUNLOCK(ssk);
 	}
 	if (!error) {
 		/*
 		 * Give the user an updated idea of our state.
 		 * If the generation differs from what we told
 		 * her before, she knows that something happened
 		 * while we were processing this request, and it
 		 * might be necessary to retry.
 		 */
 		xig.xig_gen = 0;
 		xig.xig_sogen = so_gencnt;
 		xig.xig_count = sdp_count;
 		error = SYSCTL_OUT(req, &xig, sizeof xig);
 	}
 	SDP_LIST_RUNLOCK();
 	return (error);
 }
 
 static SYSCTL_NODE(_net_inet, -1,  sdp,    CTLFLAG_RW, 0,  "SDP");
 
 SYSCTL_PROC(_net_inet_sdp, TCPCTL_PCBLIST, pcblist,
     CTLFLAG_RD | CTLTYPE_STRUCT, 0, 0, sdp_pcblist, "S,xtcpcb",
     "List of active SDP connections");
 
 static void
 sdp_zone_change(void *tag)
 {
 
 	uma_zone_set_max(sdp_zone, maxsockets);
 }
 
 static void
 sdp_init(void)
 {
 
 	LIST_INIT(&sdp_list);
 	sdp_zone = uma_zcreate("sdp_sock", sizeof(struct sdp_sock),
 	    NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, UMA_ZONE_NOFREE);
 	uma_zone_set_max(sdp_zone, maxsockets);
 	EVENTHANDLER_REGISTER(maxsockets_change, sdp_zone_change, NULL,
 		EVENTHANDLER_PRI_ANY);
 	rx_comp_wq = create_singlethread_workqueue("rx_comp_wq");
 	ib_register_client(&sdp_client);
 }
 
 extern struct domain sdpdomain;
 
 struct pr_usrreqs sdp_usrreqs = {
 	.pru_abort =		sdp_abort,
 	.pru_accept =		sdp_accept,
 	.pru_attach =		sdp_attach,
 	.pru_bind =		sdp_bind,
 	.pru_connect =		sdp_connect,
 	.pru_control =		sdp_control,
 	.pru_detach =		sdp_detach,
 	.pru_disconnect =	sdp_disconnect,
 	.pru_listen =		sdp_listen,
 	.pru_peeraddr =		sdp_getpeeraddr,
 	.pru_rcvoob =		sdp_rcvoob,
 	.pru_send =		sdp_send,
 	.pru_sosend =		sdp_sosend,
 	.pru_soreceive =	sdp_sorecv,
 	.pru_shutdown =		sdp_shutdown,
 	.pru_sockaddr =		sdp_getsockaddr,
 	.pru_close =		sdp_close,
 };
 
 struct protosw sdpsw[] = {
 {
 	.pr_type =		SOCK_STREAM,
 	.pr_domain =		&sdpdomain,
 	.pr_protocol =		IPPROTO_IP,
 	.pr_flags =		PR_CONNREQUIRED|PR_IMPLOPCL|PR_WANTRCVD,
 	.pr_ctlinput =		sdp_ctlinput,
 	.pr_ctloutput =		sdp_ctloutput,
 	.pr_usrreqs =		&sdp_usrreqs
 },
 {
 	.pr_type =		SOCK_STREAM,
 	.pr_domain =		&sdpdomain,
 	.pr_protocol =		IPPROTO_TCP,
 	.pr_flags =		PR_CONNREQUIRED|PR_IMPLOPCL|PR_WANTRCVD,
 	.pr_ctlinput =		sdp_ctlinput,
 	.pr_ctloutput =		sdp_ctloutput,
 	.pr_usrreqs =		&sdp_usrreqs
 },
 };
 
 struct domain sdpdomain = {
 	.dom_family =		AF_INET_SDP,
 	.dom_name =		"SDP",
 	.dom_init =		sdp_init,
 	.dom_protosw =		sdpsw,
 	.dom_protoswNPROTOSW =	&sdpsw[sizeof(sdpsw)/sizeof(sdpsw[0])],
 };
 
 DOMAIN_SET(sdp);
 
 int sdp_debug_level = 1;
 int sdp_data_debug_level = 0;
Index: head/sys/rpc/svc_vc.c
===================================================================
--- head/sys/rpc/svc_vc.c	(revision 319721)
+++ head/sys/rpc/svc_vc.c	(revision 319722)
@@ -1,983 +1,992 @@
 /*	$NetBSD: svc_vc.c,v 1.7 2000/08/03 00:01:53 fvdl Exp $	*/
 
 /*-
  * Copyright (c) 2009, Sun Microsystems, Inc.
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without 
  * modification, are permitted provided that the following conditions are met:
  * - Redistributions of source code must retain the above copyright notice, 
  *   this list of conditions and the following disclaimer.
  * - Redistributions in binary form must reproduce the above copyright notice, 
  *   this list of conditions and the following disclaimer in the documentation 
  *   and/or other materials provided with the distribution.
  * - Neither the name of Sun Microsystems, Inc. nor the names of its 
  *   contributors may be used to endorse or promote products derived 
  *   from this software without specific prior written permission.
  * 
  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 
  * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 
  * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE 
  * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 
  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 
  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 
  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 
  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 
  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 
  * POSSIBILITY OF SUCH DAMAGE.
  */
 
 #if defined(LIBC_SCCS) && !defined(lint)
 static char *sccsid2 = "@(#)svc_tcp.c 1.21 87/08/11 Copyr 1984 Sun Micro";
 static char *sccsid = "@(#)svc_tcp.c	2.2 88/08/01 4.0 RPCSRC";
 #endif
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 /*
  * svc_vc.c, Server side for Connection Oriented based RPC. 
  *
  * Actually implements two flavors of transporter -
  * a tcp rendezvouser (a listner and connection establisher)
  * and a record/tcp stream.
  */
 
 #include <sys/param.h>
 #include <sys/lock.h>
 #include <sys/kernel.h>
 #include <sys/malloc.h>
 #include <sys/mbuf.h>
 #include <sys/mutex.h>
 #include <sys/proc.h>
 #include <sys/protosw.h>
 #include <sys/queue.h>
 #include <sys/socket.h>
 #include <sys/socketvar.h>
 #include <sys/sx.h>
 #include <sys/systm.h>
 #include <sys/uio.h>
 
 #include <net/vnet.h>
 
 #include <netinet/tcp.h>
 
 #include <rpc/rpc.h>
 
 #include <rpc/krpc.h>
 #include <rpc/rpc_com.h>
 
 #include <security/mac/mac_framework.h>
 
 static bool_t svc_vc_rendezvous_recv(SVCXPRT *, struct rpc_msg *,
     struct sockaddr **, struct mbuf **);
 static enum xprt_stat svc_vc_rendezvous_stat(SVCXPRT *);
 static void svc_vc_rendezvous_destroy(SVCXPRT *);
 static bool_t svc_vc_null(void);
 static void svc_vc_destroy(SVCXPRT *);
 static enum xprt_stat svc_vc_stat(SVCXPRT *);
 static bool_t svc_vc_ack(SVCXPRT *, uint32_t *);
 static bool_t svc_vc_recv(SVCXPRT *, struct rpc_msg *,
     struct sockaddr **, struct mbuf **);
 static bool_t svc_vc_reply(SVCXPRT *, struct rpc_msg *,
     struct sockaddr *, struct mbuf *, uint32_t *seq);
 static bool_t svc_vc_control(SVCXPRT *xprt, const u_int rq, void *in);
 static bool_t svc_vc_rendezvous_control (SVCXPRT *xprt, const u_int rq,
     void *in);
 static void svc_vc_backchannel_destroy(SVCXPRT *);
 static enum xprt_stat svc_vc_backchannel_stat(SVCXPRT *);
 static bool_t svc_vc_backchannel_recv(SVCXPRT *, struct rpc_msg *,
     struct sockaddr **, struct mbuf **);
 static bool_t svc_vc_backchannel_reply(SVCXPRT *, struct rpc_msg *,
     struct sockaddr *, struct mbuf *, uint32_t *);
 static bool_t svc_vc_backchannel_control(SVCXPRT *xprt, const u_int rq,
     void *in);
 static SVCXPRT *svc_vc_create_conn(SVCPOOL *pool, struct socket *so,
     struct sockaddr *raddr);
 static int svc_vc_accept(struct socket *head, struct socket **sop);
 static int svc_vc_soupcall(struct socket *so, void *arg, int waitflag);
+static int svc_vc_rendezvous_soupcall(struct socket *, void *, int);
 
 static struct xp_ops svc_vc_rendezvous_ops = {
 	.xp_recv =	svc_vc_rendezvous_recv,
 	.xp_stat =	svc_vc_rendezvous_stat,
 	.xp_reply =	(bool_t (*)(SVCXPRT *, struct rpc_msg *,
 		struct sockaddr *, struct mbuf *, uint32_t *))svc_vc_null,
 	.xp_destroy =	svc_vc_rendezvous_destroy,
 	.xp_control =	svc_vc_rendezvous_control
 };
 
 static struct xp_ops svc_vc_ops = {
 	.xp_recv =	svc_vc_recv,
 	.xp_stat =	svc_vc_stat,
 	.xp_ack =	svc_vc_ack,
 	.xp_reply =	svc_vc_reply,
 	.xp_destroy =	svc_vc_destroy,
 	.xp_control =	svc_vc_control
 };
 
 static struct xp_ops svc_vc_backchannel_ops = {
 	.xp_recv =	svc_vc_backchannel_recv,
 	.xp_stat =	svc_vc_backchannel_stat,
 	.xp_reply =	svc_vc_backchannel_reply,
 	.xp_destroy =	svc_vc_backchannel_destroy,
 	.xp_control =	svc_vc_backchannel_control
 };
 
 /*
  * Usage:
  *	xprt = svc_vc_create(sock, send_buf_size, recv_buf_size);
  *
  * Creates, registers, and returns a (rpc) tcp based transporter.
  * Once *xprt is initialized, it is registered as a transporter
  * see (svc.h, xprt_register).  This routine returns
  * a NULL if a problem occurred.
  *
  * The filedescriptor passed in is expected to refer to a bound, but
  * not yet connected socket.
  *
  * Since streams do buffered io similar to stdio, the caller can specify
  * how big the send and receive buffers are via the second and third parms;
  * 0 => use the system default.
  */
 SVCXPRT *
 svc_vc_create(SVCPOOL *pool, struct socket *so, size_t sendsize,
     size_t recvsize)
 {
 	SVCXPRT *xprt;
 	struct sockaddr* sa;
 	int error;
 
 	SOCK_LOCK(so);
 	if (so->so_state & (SS_ISCONNECTED|SS_ISDISCONNECTED)) {
 		SOCK_UNLOCK(so);
 		CURVNET_SET(so->so_vnet);
 		error = so->so_proto->pr_usrreqs->pru_peeraddr(so, &sa);
 		CURVNET_RESTORE();
 		if (error)
 			return (NULL);
 		xprt = svc_vc_create_conn(pool, so, sa);
 		free(sa, M_SONAME);
 		return (xprt);
 	}
 	SOCK_UNLOCK(so);
 
 	xprt = svc_xprt_alloc();
 	sx_init(&xprt->xp_lock, "xprt->xp_lock");
 	xprt->xp_pool = pool;
 	xprt->xp_socket = so;
 	xprt->xp_p1 = NULL;
 	xprt->xp_p2 = NULL;
 	xprt->xp_ops = &svc_vc_rendezvous_ops;
 
 	CURVNET_SET(so->so_vnet);
 	error = so->so_proto->pr_usrreqs->pru_sockaddr(so, &sa);
 	CURVNET_RESTORE();
 	if (error) {
 		goto cleanup_svc_vc_create;
 	}
 
 	memcpy(&xprt->xp_ltaddr, sa, sa->sa_len);
 	free(sa, M_SONAME);
 
 	xprt_register(xprt);
 
 	solisten(so, -1, curthread);
 
-	SOCKBUF_LOCK(&so->so_rcv);
+	SOLISTEN_LOCK(so);
 	xprt->xp_upcallset = 1;
-	soupcall_set(so, SO_RCV, svc_vc_soupcall, xprt);
-	SOCKBUF_UNLOCK(&so->so_rcv);
+	solisten_upcall_set(so, svc_vc_rendezvous_soupcall, xprt);
+	SOLISTEN_UNLOCK(so);
 
 	return (xprt);
 
 cleanup_svc_vc_create:
 	sx_destroy(&xprt->xp_lock);
 	svc_xprt_free(xprt);
 
 	return (NULL);
 }
 
 /*
  * Create a new transport for a socket optained via soaccept().
  */
 SVCXPRT *
 svc_vc_create_conn(SVCPOOL *pool, struct socket *so, struct sockaddr *raddr)
 {
 	SVCXPRT *xprt;
 	struct cf_conn *cd;
 	struct sockaddr* sa = NULL;
 	struct sockopt opt;
 	int one = 1;
 	int error;
 
 	bzero(&opt, sizeof(struct sockopt));
 	opt.sopt_dir = SOPT_SET;
 	opt.sopt_level = SOL_SOCKET;
 	opt.sopt_name = SO_KEEPALIVE;
 	opt.sopt_val = &one;
 	opt.sopt_valsize = sizeof(one);
 	error = sosetopt(so, &opt);
 	if (error) {
 		return (NULL);
 	}
 
 	if (so->so_proto->pr_protocol == IPPROTO_TCP) {
 		bzero(&opt, sizeof(struct sockopt));
 		opt.sopt_dir = SOPT_SET;
 		opt.sopt_level = IPPROTO_TCP;
 		opt.sopt_name = TCP_NODELAY;
 		opt.sopt_val = &one;
 		opt.sopt_valsize = sizeof(one);
 		error = sosetopt(so, &opt);
 		if (error) {
 			return (NULL);
 		}
 	}
 
 	cd = mem_alloc(sizeof(*cd));
 	cd->strm_stat = XPRT_IDLE;
 
 	xprt = svc_xprt_alloc();
 	sx_init(&xprt->xp_lock, "xprt->xp_lock");
 	xprt->xp_pool = pool;
 	xprt->xp_socket = so;
 	xprt->xp_p1 = cd;
 	xprt->xp_p2 = NULL;
 	xprt->xp_ops = &svc_vc_ops;
 
 	/*
 	 * See http://www.connectathon.org/talks96/nfstcp.pdf - client
 	 * has a 5 minute timer, server has a 6 minute timer.
 	 */
 	xprt->xp_idletimeout = 6 * 60;
 
 	memcpy(&xprt->xp_rtaddr, raddr, raddr->sa_len);
 
 	CURVNET_SET(so->so_vnet);
 	error = so->so_proto->pr_usrreqs->pru_sockaddr(so, &sa);
 	CURVNET_RESTORE();
 	if (error)
 		goto cleanup_svc_vc_create;
 
 	memcpy(&xprt->xp_ltaddr, sa, sa->sa_len);
 	free(sa, M_SONAME);
 
 	xprt_register(xprt);
 
 	SOCKBUF_LOCK(&so->so_rcv);
 	xprt->xp_upcallset = 1;
 	soupcall_set(so, SO_RCV, svc_vc_soupcall, xprt);
 	SOCKBUF_UNLOCK(&so->so_rcv);
 
 	/*
 	 * Throw the transport into the active list in case it already
 	 * has some data buffered.
 	 */
 	sx_xlock(&xprt->xp_lock);
 	xprt_active(xprt);
 	sx_xunlock(&xprt->xp_lock);
 
 	return (xprt);
 cleanup_svc_vc_create:
 	sx_destroy(&xprt->xp_lock);
 	svc_xprt_free(xprt);
 	mem_free(cd, sizeof(*cd));
 
 	return (NULL);
 }
 
 /*
  * Create a new transport for a backchannel on a clnt_vc socket.
  */
 SVCXPRT *
 svc_vc_create_backchannel(SVCPOOL *pool)
 {
 	SVCXPRT *xprt = NULL;
 	struct cf_conn *cd = NULL;
 
 	cd = mem_alloc(sizeof(*cd));
 	cd->strm_stat = XPRT_IDLE;
 
 	xprt = svc_xprt_alloc();
 	sx_init(&xprt->xp_lock, "xprt->xp_lock");
 	xprt->xp_pool = pool;
 	xprt->xp_socket = NULL;
 	xprt->xp_p1 = cd;
 	xprt->xp_p2 = NULL;
 	xprt->xp_ops = &svc_vc_backchannel_ops;
 	return (xprt);
 }
 
 /*
  * This does all of the accept except the final call to soaccept. The
  * caller will call soaccept after dropping its locks (soaccept may
  * call malloc).
  */
 int
 svc_vc_accept(struct socket *head, struct socket **sop)
 {
-	int error = 0;
 	struct socket *so;
+	int error = 0;
+	short nbio;
 
+	/* XXXGL: shouldn't that be an assertion? */
 	if ((head->so_options & SO_ACCEPTCONN) == 0) {
 		error = EINVAL;
 		goto done;
 	}
 #ifdef MAC
 	error = mac_socket_check_accept(curthread->td_ucred, head);
 	if (error != 0)
 		goto done;
 #endif
-	ACCEPT_LOCK();
-	if (TAILQ_EMPTY(&head->so_comp)) {
-		ACCEPT_UNLOCK();
-		error = EWOULDBLOCK;
-		goto done;
-	}
-	so = TAILQ_FIRST(&head->so_comp);
-	KASSERT(!(so->so_qstate & SQ_INCOMP), ("svc_vc_accept: so SQ_INCOMP"));
-	KASSERT(so->so_qstate & SQ_COMP, ("svc_vc_accept: so not SQ_COMP"));
-
 	/*
-	 * Before changing the flags on the socket, we have to bump the
-	 * reference count.  Otherwise, if the protocol calls sofree(),
-	 * the socket will be released due to a zero refcount.
-	 * XXX might not need soref() since this is simpler than kern_accept.
+	 * XXXGL: we want non-blocking semantics.  The socket could be a
+	 * socket created by kernel as well as socket shared with userland,
+	 * so we can't be sure about presense of SS_NBIO.  We also shall not
+	 * toggle it on the socket, since that may surprise userland.  So we
+	 * set SS_NBIO only temporarily.
 	 */
-	SOCK_LOCK(so);			/* soref() and so_state update */
-	soref(so);			/* file descriptor reference */
+	SOLISTEN_LOCK(head);
+	nbio = head->so_state & SS_NBIO;
+	head->so_state |= SS_NBIO;
+	error = solisten_dequeue(head, &so, 0);
+	head->so_state &= (nbio & ~SS_NBIO);
+	if (error)
+		goto done;
 
-	TAILQ_REMOVE(&head->so_comp, so, so_list);
-	head->so_qlen--;
-	so->so_state |= (head->so_state & SS_NBIO);
-	so->so_qstate &= ~SQ_COMP;
-	so->so_head = NULL;
-
-	SOCK_UNLOCK(so);
-	ACCEPT_UNLOCK();
-
+	so->so_state |= nbio;
 	*sop = so;
 
 	/* connection has been removed from the listen queue */
-	KNOTE_UNLOCKED(&head->so_rcv.sb_sel.si_note, 0);
+	KNOTE_UNLOCKED(&head->so_rdsel.si_note, 0);
 done:
 	return (error);
 }
 
 /*ARGSUSED*/
 static bool_t
 svc_vc_rendezvous_recv(SVCXPRT *xprt, struct rpc_msg *msg,
     struct sockaddr **addrp, struct mbuf **mp)
 {
 	struct socket *so = NULL;
 	struct sockaddr *sa = NULL;
 	int error;
 	SVCXPRT *new_xprt;
 
 	/*
 	 * The socket upcall calls xprt_active() which will eventually
 	 * cause the server to call us here. We attempt to accept a
 	 * connection from the socket and turn it into a new
 	 * transport. If the accept fails, we have drained all pending
 	 * connections so we call xprt_inactive().
 	 */
 	sx_xlock(&xprt->xp_lock);
 
 	error = svc_vc_accept(xprt->xp_socket, &so);
 
 	if (error == EWOULDBLOCK) {
 		/*
 		 * We must re-test for new connections after taking
 		 * the lock to protect us in the case where a new
 		 * connection arrives after our call to accept fails
 		 * with EWOULDBLOCK.
 		 */
-		ACCEPT_LOCK();
-		if (TAILQ_EMPTY(&xprt->xp_socket->so_comp))
+		SOLISTEN_LOCK(xprt->xp_socket);
+		if (TAILQ_EMPTY(&xprt->xp_socket->sol_comp))
 			xprt_inactive_self(xprt);
-		ACCEPT_UNLOCK();
+		SOLISTEN_UNLOCK(xprt->xp_socket);
 		sx_xunlock(&xprt->xp_lock);
 		return (FALSE);
 	}
 
 	if (error) {
-		SOCKBUF_LOCK(&xprt->xp_socket->so_rcv);
+		SOLISTEN_LOCK(xprt->xp_socket);
 		if (xprt->xp_upcallset) {
 			xprt->xp_upcallset = 0;
 			soupcall_clear(xprt->xp_socket, SO_RCV);
 		}
-		SOCKBUF_UNLOCK(&xprt->xp_socket->so_rcv);
+		SOLISTEN_UNLOCK(xprt->xp_socket);
 		xprt_inactive_self(xprt);
 		sx_xunlock(&xprt->xp_lock);
 		return (FALSE);
 	}
 
 	sx_xunlock(&xprt->xp_lock);
 
 	sa = NULL;
 	error = soaccept(so, &sa);
 
 	if (error) {
 		/*
 		 * XXX not sure if I need to call sofree or soclose here.
 		 */
 		if (sa)
 			free(sa, M_SONAME);
 		return (FALSE);
 	}
 
 	/*
 	 * svc_vc_create_conn will call xprt_register - we don't need
 	 * to do anything with the new connection except derefence it.
 	 */
 	new_xprt = svc_vc_create_conn(xprt->xp_pool, so, sa);
 	if (!new_xprt) {
 		soclose(so);
 	} else {
 		SVC_RELEASE(new_xprt);
 	}
 
 	free(sa, M_SONAME);
 
 	return (FALSE); /* there is never an rpc msg to be processed */
 }
 
 /*ARGSUSED*/
 static enum xprt_stat
 svc_vc_rendezvous_stat(SVCXPRT *xprt)
 {
 
 	return (XPRT_IDLE);
 }
 
 static void
 svc_vc_destroy_common(SVCXPRT *xprt)
 {
-	SOCKBUF_LOCK(&xprt->xp_socket->so_rcv);
-	if (xprt->xp_upcallset) {
-		xprt->xp_upcallset = 0;
-		soupcall_clear(xprt->xp_socket, SO_RCV);
-	}
-	SOCKBUF_UNLOCK(&xprt->xp_socket->so_rcv);
 
 	if (xprt->xp_socket)
 		(void)soclose(xprt->xp_socket);
 
 	if (xprt->xp_netid)
 		(void) mem_free(xprt->xp_netid, strlen(xprt->xp_netid) + 1);
 	svc_xprt_free(xprt);
 }
 
 static void
 svc_vc_rendezvous_destroy(SVCXPRT *xprt)
 {
 
+	SOLISTEN_LOCK(xprt->xp_socket);
+	if (xprt->xp_upcallset) {
+		xprt->xp_upcallset = 0;
+		solisten_upcall_set(xprt->xp_socket, NULL, NULL);
+	}
+	SOLISTEN_UNLOCK(xprt->xp_socket);
+
 	svc_vc_destroy_common(xprt);
 }
 
 static void
 svc_vc_destroy(SVCXPRT *xprt)
 {
 	struct cf_conn *cd = (struct cf_conn *)xprt->xp_p1;
 
+	SOCKBUF_LOCK(&xprt->xp_socket->so_rcv);
+	if (xprt->xp_upcallset) {
+		xprt->xp_upcallset = 0;
+		soupcall_clear(xprt->xp_socket, SO_RCV);
+	}
+	SOCKBUF_UNLOCK(&xprt->xp_socket->so_rcv);
+
 	svc_vc_destroy_common(xprt);
 
 	if (cd->mreq)
 		m_freem(cd->mreq);
 	if (cd->mpending)
 		m_freem(cd->mpending);
 	mem_free(cd, sizeof(*cd));
 }
 
 static void
 svc_vc_backchannel_destroy(SVCXPRT *xprt)
 {
 	struct cf_conn *cd = (struct cf_conn *)xprt->xp_p1;
 	struct mbuf *m, *m2;
 
 	svc_xprt_free(xprt);
 	m = cd->mreq;
 	while (m != NULL) {
 		m2 = m;
 		m = m->m_nextpkt;
 		m_freem(m2);
 	}
 	mem_free(cd, sizeof(*cd));
 }
 
 /*ARGSUSED*/
 static bool_t
 svc_vc_control(SVCXPRT *xprt, const u_int rq, void *in)
 {
 	return (FALSE);
 }
 
 static bool_t
 svc_vc_rendezvous_control(SVCXPRT *xprt, const u_int rq, void *in)
 {
 
 	return (FALSE);
 }
 
 static bool_t
 svc_vc_backchannel_control(SVCXPRT *xprt, const u_int rq, void *in)
 {
 
 	return (FALSE);
 }
 
 static enum xprt_stat
 svc_vc_stat(SVCXPRT *xprt)
 {
 	struct cf_conn *cd;
 
 	cd = (struct cf_conn *)(xprt->xp_p1);
 
 	if (cd->strm_stat == XPRT_DIED)
 		return (XPRT_DIED);
 
 	if (cd->mreq != NULL && cd->resid == 0 && cd->eor)
 		return (XPRT_MOREREQS);
 
 	if (soreadable(xprt->xp_socket))
 		return (XPRT_MOREREQS);
 
 	return (XPRT_IDLE);
 }
 
 static bool_t
 svc_vc_ack(SVCXPRT *xprt, uint32_t *ack)
 {
 
 	*ack = atomic_load_acq_32(&xprt->xp_snt_cnt);
 	*ack -= sbused(&xprt->xp_socket->so_snd);
 	return (TRUE);
 }
 
 static enum xprt_stat
 svc_vc_backchannel_stat(SVCXPRT *xprt)
 {
 	struct cf_conn *cd;
 
 	cd = (struct cf_conn *)(xprt->xp_p1);
 
 	if (cd->mreq != NULL)
 		return (XPRT_MOREREQS);
 
 	return (XPRT_IDLE);
 }
 
 /*
  * If we have an mbuf chain in cd->mpending, try to parse a record from it,
  * leaving the result in cd->mreq. If we don't have a complete record, leave
  * the partial result in cd->mreq and try to read more from the socket.
  */
 static int
 svc_vc_process_pending(SVCXPRT *xprt)
 {
 	struct cf_conn *cd = (struct cf_conn *) xprt->xp_p1;
 	struct socket *so = xprt->xp_socket;
 	struct mbuf *m;
 
 	/*
 	 * If cd->resid is non-zero, we have part of the
 	 * record already, otherwise we are expecting a record
 	 * marker.
 	 */
 	if (!cd->resid && cd->mpending) {
 		/*
 		 * See if there is enough data buffered to
 		 * make up a record marker. Make sure we can
 		 * handle the case where the record marker is
 		 * split across more than one mbuf.
 		 */
 		size_t n = 0;
 		uint32_t header;
 
 		m = cd->mpending;
 		while (n < sizeof(uint32_t) && m) {
 			n += m->m_len;
 			m = m->m_next;
 		}
 		if (n < sizeof(uint32_t)) {
 			so->so_rcv.sb_lowat = sizeof(uint32_t) - n;
 			return (FALSE);
 		}
 		m_copydata(cd->mpending, 0, sizeof(header),
 		    (char *)&header);
 		header = ntohl(header);
 		cd->eor = (header & 0x80000000) != 0;
 		cd->resid = header & 0x7fffffff;
 		m_adj(cd->mpending, sizeof(uint32_t));
 	}
 
 	/*
 	 * Start pulling off mbufs from cd->mpending
 	 * until we either have a complete record or
 	 * we run out of data. We use m_split to pull
 	 * data - it will pull as much as possible and
 	 * split the last mbuf if necessary.
 	 */
 	while (cd->mpending && cd->resid) {
 		m = cd->mpending;
 		if (cd->mpending->m_next
 		    || cd->mpending->m_len > cd->resid)
 			cd->mpending = m_split(cd->mpending,
 			    cd->resid, M_WAITOK);
 		else
 			cd->mpending = NULL;
 		if (cd->mreq)
 			m_last(cd->mreq)->m_next = m;
 		else
 			cd->mreq = m;
 		while (m) {
 			cd->resid -= m->m_len;
 			m = m->m_next;
 		}
 	}
 
 	/*
 	 * Block receive upcalls if we have more data pending,
 	 * otherwise report our need.
 	 */
 	if (cd->mpending)
 		so->so_rcv.sb_lowat = INT_MAX;
 	else
 		so->so_rcv.sb_lowat =
 		    imax(1, imin(cd->resid, so->so_rcv.sb_hiwat / 2));
 	return (TRUE);
 }
 
 static bool_t
 svc_vc_recv(SVCXPRT *xprt, struct rpc_msg *msg,
     struct sockaddr **addrp, struct mbuf **mp)
 {
 	struct cf_conn *cd = (struct cf_conn *) xprt->xp_p1;
 	struct uio uio;
 	struct mbuf *m;
 	struct socket* so = xprt->xp_socket;
 	XDR xdrs;
 	int error, rcvflag;
 	uint32_t xid_plus_direction[2];
 
 	/*
 	 * Serialise access to the socket and our own record parsing
 	 * state.
 	 */
 	sx_xlock(&xprt->xp_lock);
 
 	for (;;) {
 		/* If we have no request ready, check pending queue. */
 		while (cd->mpending &&
 		    (cd->mreq == NULL || cd->resid != 0 || !cd->eor)) {
 			if (!svc_vc_process_pending(xprt))
 				break;
 		}
 
 		/* Process and return complete request in cd->mreq. */
 		if (cd->mreq != NULL && cd->resid == 0 && cd->eor) {
 
 			/*
 			 * Now, check for a backchannel reply.
 			 * The XID is in the first uint32_t of the reply
 			 * and the message direction is the second one.
 			 */
 			if ((cd->mreq->m_len >= sizeof(xid_plus_direction) ||
 			    m_length(cd->mreq, NULL) >=
 			    sizeof(xid_plus_direction)) &&
 			    xprt->xp_p2 != NULL) {
 				m_copydata(cd->mreq, 0,
 				    sizeof(xid_plus_direction),
 				    (char *)xid_plus_direction);
 				xid_plus_direction[0] =
 				    ntohl(xid_plus_direction[0]);
 				xid_plus_direction[1] =
 				    ntohl(xid_plus_direction[1]);
 				/* Check message direction. */
 				if (xid_plus_direction[1] == REPLY) {
 					clnt_bck_svccall(xprt->xp_p2,
 					    cd->mreq,
 					    xid_plus_direction[0]);
 					cd->mreq = NULL;
 					continue;
 				}
 			}
 
 			xdrmbuf_create(&xdrs, cd->mreq, XDR_DECODE);
 			cd->mreq = NULL;
 
 			/* Check for next request in a pending queue. */
 			svc_vc_process_pending(xprt);
 			if (cd->mreq == NULL || cd->resid != 0) {
 				SOCKBUF_LOCK(&so->so_rcv);
 				if (!soreadable(so))
 					xprt_inactive_self(xprt);
 				SOCKBUF_UNLOCK(&so->so_rcv);
 			}
 
 			sx_xunlock(&xprt->xp_lock);
 
 			if (! xdr_callmsg(&xdrs, msg)) {
 				XDR_DESTROY(&xdrs);
 				return (FALSE);
 			}
 
 			*addrp = NULL;
 			*mp = xdrmbuf_getall(&xdrs);
 			XDR_DESTROY(&xdrs);
 
 			return (TRUE);
 		}
 
 		/*
 		 * The socket upcall calls xprt_active() which will eventually
 		 * cause the server to call us here. We attempt to
 		 * read as much as possible from the socket and put
 		 * the result in cd->mpending. If the read fails,
 		 * we have drained both cd->mpending and the socket so
 		 * we can call xprt_inactive().
 		 */
 		uio.uio_resid = 1000000000;
 		uio.uio_td = curthread;
 		m = NULL;
 		rcvflag = MSG_DONTWAIT;
 		error = soreceive(so, NULL, &uio, &m, NULL, &rcvflag);
 
 		if (error == EWOULDBLOCK) {
 			/*
 			 * We must re-test for readability after
 			 * taking the lock to protect us in the case
 			 * where a new packet arrives on the socket
 			 * after our call to soreceive fails with
 			 * EWOULDBLOCK.
 			 */
 			SOCKBUF_LOCK(&so->so_rcv);
 			if (!soreadable(so))
 				xprt_inactive_self(xprt);
 			SOCKBUF_UNLOCK(&so->so_rcv);
 			sx_xunlock(&xprt->xp_lock);
 			return (FALSE);
 		}
 
 		if (error) {
 			SOCKBUF_LOCK(&so->so_rcv);
 			if (xprt->xp_upcallset) {
 				xprt->xp_upcallset = 0;
 				soupcall_clear(so, SO_RCV);
 			}
 			SOCKBUF_UNLOCK(&so->so_rcv);
 			xprt_inactive_self(xprt);
 			cd->strm_stat = XPRT_DIED;
 			sx_xunlock(&xprt->xp_lock);
 			return (FALSE);
 		}
 
 		if (!m) {
 			/*
 			 * EOF - the other end has closed the socket.
 			 */
 			xprt_inactive_self(xprt);
 			cd->strm_stat = XPRT_DIED;
 			sx_xunlock(&xprt->xp_lock);
 			return (FALSE);
 		}
 
 		if (cd->mpending)
 			m_last(cd->mpending)->m_next = m;
 		else
 			cd->mpending = m;
 	}
 }
 
 static bool_t
 svc_vc_backchannel_recv(SVCXPRT *xprt, struct rpc_msg *msg,
     struct sockaddr **addrp, struct mbuf **mp)
 {
 	struct cf_conn *cd = (struct cf_conn *) xprt->xp_p1;
 	struct ct_data *ct;
 	struct mbuf *m;
 	XDR xdrs;
 
 	sx_xlock(&xprt->xp_lock);
 	ct = (struct ct_data *)xprt->xp_p2;
 	if (ct == NULL) {
 		sx_xunlock(&xprt->xp_lock);
 		return (FALSE);
 	}
 	mtx_lock(&ct->ct_lock);
 	m = cd->mreq;
 	if (m == NULL) {
 		xprt_inactive_self(xprt);
 		mtx_unlock(&ct->ct_lock);
 		sx_xunlock(&xprt->xp_lock);
 		return (FALSE);
 	}
 	cd->mreq = m->m_nextpkt;
 	mtx_unlock(&ct->ct_lock);
 	sx_xunlock(&xprt->xp_lock);
 
 	xdrmbuf_create(&xdrs, m, XDR_DECODE);
 	if (! xdr_callmsg(&xdrs, msg)) {
 		XDR_DESTROY(&xdrs);
 		return (FALSE);
 	}
 	*addrp = NULL;
 	*mp = xdrmbuf_getall(&xdrs);
 	XDR_DESTROY(&xdrs);
 	return (TRUE);
 }
 
 static bool_t
 svc_vc_reply(SVCXPRT *xprt, struct rpc_msg *msg,
     struct sockaddr *addr, struct mbuf *m, uint32_t *seq)
 {
 	XDR xdrs;
 	struct mbuf *mrep;
 	bool_t stat = TRUE;
 	int error, len;
 
 	/*
 	 * Leave space for record mark.
 	 */
 	mrep = m_gethdr(M_WAITOK, MT_DATA);
 	mrep->m_data += sizeof(uint32_t);
 
 	xdrmbuf_create(&xdrs, mrep, XDR_ENCODE);
 
 	if (msg->rm_reply.rp_stat == MSG_ACCEPTED &&
 	    msg->rm_reply.rp_acpt.ar_stat == SUCCESS) {
 		if (!xdr_replymsg(&xdrs, msg))
 			stat = FALSE;
 		else
 			xdrmbuf_append(&xdrs, m);
 	} else {
 		stat = xdr_replymsg(&xdrs, msg);
 	}
 
 	if (stat) {
 		m_fixhdr(mrep);
 
 		/*
 		 * Prepend a record marker containing the reply length.
 		 */
 		M_PREPEND(mrep, sizeof(uint32_t), M_WAITOK);
 		len = mrep->m_pkthdr.len;
 		*mtod(mrep, uint32_t *) =
 			htonl(0x80000000 | (len - sizeof(uint32_t)));
 		atomic_add_32(&xprt->xp_snd_cnt, len);
 		error = sosend(xprt->xp_socket, NULL, NULL, mrep, NULL,
 		    0, curthread);
 		if (!error) {
 			atomic_add_rel_32(&xprt->xp_snt_cnt, len);
 			if (seq)
 				*seq = xprt->xp_snd_cnt;
 			stat = TRUE;
 		} else
 			atomic_subtract_32(&xprt->xp_snd_cnt, len);
 	} else {
 		m_freem(mrep);
 	}
 
 	XDR_DESTROY(&xdrs);
 
 	return (stat);
 }
 
 static bool_t
 svc_vc_backchannel_reply(SVCXPRT *xprt, struct rpc_msg *msg,
     struct sockaddr *addr, struct mbuf *m, uint32_t *seq)
 {
 	struct ct_data *ct;
 	XDR xdrs;
 	struct mbuf *mrep;
 	bool_t stat = TRUE;
 	int error;
 
 	/*
 	 * Leave space for record mark.
 	 */
 	mrep = m_gethdr(M_WAITOK, MT_DATA);
 	mrep->m_data += sizeof(uint32_t);
 
 	xdrmbuf_create(&xdrs, mrep, XDR_ENCODE);
 
 	if (msg->rm_reply.rp_stat == MSG_ACCEPTED &&
 	    msg->rm_reply.rp_acpt.ar_stat == SUCCESS) {
 		if (!xdr_replymsg(&xdrs, msg))
 			stat = FALSE;
 		else
 			xdrmbuf_append(&xdrs, m);
 	} else {
 		stat = xdr_replymsg(&xdrs, msg);
 	}
 
 	if (stat) {
 		m_fixhdr(mrep);
 
 		/*
 		 * Prepend a record marker containing the reply length.
 		 */
 		M_PREPEND(mrep, sizeof(uint32_t), M_WAITOK);
 		*mtod(mrep, uint32_t *) =
 			htonl(0x80000000 | (mrep->m_pkthdr.len
 				- sizeof(uint32_t)));
 		sx_xlock(&xprt->xp_lock);
 		ct = (struct ct_data *)xprt->xp_p2;
 		if (ct != NULL)
 			error = sosend(ct->ct_socket, NULL, NULL, mrep, NULL,
 			    0, curthread);
 		else
 			error = EPIPE;
 		sx_xunlock(&xprt->xp_lock);
 		if (!error) {
 			stat = TRUE;
 		}
 	} else {
 		m_freem(mrep);
 	}
 
 	XDR_DESTROY(&xdrs);
 
 	return (stat);
 }
 
 static bool_t
 svc_vc_null()
 {
 
 	return (FALSE);
 }
 
 static int
 svc_vc_soupcall(struct socket *so, void *arg, int waitflag)
 {
 	SVCXPRT *xprt = (SVCXPRT *) arg;
 
 	if (soreadable(xprt->xp_socket))
+		xprt_active(xprt);
+	return (SU_OK);
+}
+
+static int
+svc_vc_rendezvous_soupcall(struct socket *head, void *arg, int waitflag)
+{
+	SVCXPRT *xprt = (SVCXPRT *) arg;
+
+	if (!TAILQ_EMPTY(&head->sol_comp))
 		xprt_active(xprt);
 	return (SU_OK);
 }
 
 #if 0
 /*
  * Get the effective UID of the sending process. Used by rpcbind, keyserv
  * and rpc.yppasswdd on AF_LOCAL.
  */
 int
 __rpc_get_local_uid(SVCXPRT *transp, uid_t *uid) {
 	int sock, ret;
 	gid_t egid;
 	uid_t euid;
 	struct sockaddr *sa;
 
 	sock = transp->xp_fd;
 	sa = (struct sockaddr *)transp->xp_rtaddr;
 	if (sa->sa_family == AF_LOCAL) {
 		ret = getpeereid(sock, &euid, &egid);
 		if (ret == 0)
 			*uid = euid;
 		return (ret);
 	} else
 		return (-1);
 }
 #endif
Index: head/sys/sys/sockbuf.h
===================================================================
--- head/sys/sys/sockbuf.h	(revision 319721)
+++ head/sys/sys/sockbuf.h	(revision 319722)
@@ -1,259 +1,259 @@
 /*-
  * Copyright (c) 1982, 1986, 1990, 1993
  *	The Regents of the University of California.  All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 3. Neither the name of the University nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  *	@(#)socketvar.h	8.3 (Berkeley) 2/19/95
  *
  * $FreeBSD$
  */
 #ifndef _SYS_SOCKBUF_H_
 #define _SYS_SOCKBUF_H_
-#include <sys/selinfo.h>		/* for struct selinfo */
 #include <sys/_lock.h>
 #include <sys/_mutex.h>
 #include <sys/_sx.h>
 #include <sys/_task.h>
 
 #define	SB_MAX		(2*1024*1024)	/* default for max chars in sockbuf */
 
 /*
  * Constants for sb_flags field of struct sockbuf.
  */
 #define	SB_WAIT		0x04		/* someone is waiting for data/space */
 #define	SB_SEL		0x08		/* someone is selecting */
 #define	SB_ASYNC	0x10		/* ASYNC I/O, need signals */
 #define	SB_UPCALL	0x20		/* someone wants an upcall */
 #define	SB_NOINTR	0x40		/* operations not interruptible */
 #define	SB_AIO		0x80		/* AIO operations queued */
 #define	SB_KNOTE	0x100		/* kernel note attached */
 #define	SB_NOCOALESCE	0x200		/* don't coalesce new data into existing mbufs */
 #define	SB_IN_TOE	0x400		/* socket buffer is in the middle of an operation */
 #define	SB_AUTOSIZE	0x800		/* automatically size socket buffer */
 #define	SB_STOP		0x1000		/* backpressure indicator */
 #define	SB_AIO_RUNNING	0x2000		/* AIO operation running */
 
 #define	SBS_CANTSENDMORE	0x0010	/* can't send more data to peer */
 #define	SBS_CANTRCVMORE		0x0020	/* can't receive more data from peer */
 #define	SBS_RCVATMARK		0x0040	/* at mark on input */
 
 struct mbuf;
 struct sockaddr;
 struct socket;
 struct thread;
+struct selinfo;
 
 struct	xsockbuf {
 	u_int	sb_cc;
 	u_int	sb_hiwat;
 	u_int	sb_mbcnt;
 	u_int   sb_mcnt;
 	u_int   sb_ccnt;
 	u_int	sb_mbmax;
 	int	sb_lowat;
 	int	sb_timeo;
 	short	sb_flags;
 };
 
 /*
  * Variables for socket buffering.
  *
  * Locking key to struct sockbuf:
  * (a) locked by SOCKBUF_LOCK().
  */
 struct	sockbuf {
-	struct	selinfo sb_sel;	/* process selecting read/write */
-	struct	mtx sb_mtx;	/* sockbuf lock */
-	struct	sx sb_sx;	/* prevent I/O interlacing */
+	struct	mtx sb_mtx;		/* sockbuf lock */
+	struct	sx sb_sx;		/* prevent I/O interlacing */
+	struct	selinfo *sb_sel;	/* process selecting read/write */
 	short	sb_state;	/* (a) socket state on sockbuf */
 #define	sb_startzero	sb_mb
 	struct	mbuf *sb_mb;	/* (a) the mbuf chain */
 	struct	mbuf *sb_mbtail; /* (a) the last mbuf in the chain */
 	struct	mbuf *sb_lastrecord;	/* (a) first mbuf of last
 					 * record in socket buffer */
 	struct	mbuf *sb_sndptr; /* (a) pointer into mbuf chain */
 	struct	mbuf *sb_fnrdy;	/* (a) pointer to first not ready buffer */
 	u_int	sb_sndptroff;	/* (a) byte offset of ptr into chain */
 	u_int	sb_acc;		/* (a) available chars in buffer */
 	u_int	sb_ccc;		/* (a) claimed chars in buffer */
 	u_int	sb_hiwat;	/* (a) max actual char count */
 	u_int	sb_mbcnt;	/* (a) chars of mbufs used */
 	u_int   sb_mcnt;        /* (a) number of mbufs in buffer */
 	u_int   sb_ccnt;        /* (a) number of clusters in buffer */
 	u_int	sb_mbmax;	/* (a) max chars of mbufs to use */
 	u_int	sb_ctl;		/* (a) non-data chars in buffer */
 	int	sb_lowat;	/* (a) low water mark */
 	sbintime_t	sb_timeo;	/* (a) timeout for read/write */
 	short	sb_flags;	/* (a) flags, see below */
 	int	(*sb_upcall)(struct socket *, void *, int); /* (a) */
 	void	*sb_upcallarg;	/* (a) */
 	TAILQ_HEAD(, kaiocb) sb_aiojobq; /* (a) pending AIO ops */
 	struct	task sb_aiotask; /* AIO task */
 };
 
 #ifdef _KERNEL
 
 /*
  * Per-socket buffer mutex used to protect most fields in the socket
  * buffer.
  */
 #define	SOCKBUF_MTX(_sb)		(&(_sb)->sb_mtx)
 #define	SOCKBUF_LOCK_INIT(_sb, _name) \
 	mtx_init(SOCKBUF_MTX(_sb), _name, NULL, MTX_DEF)
 #define	SOCKBUF_LOCK_DESTROY(_sb)	mtx_destroy(SOCKBUF_MTX(_sb))
 #define	SOCKBUF_LOCK(_sb)		mtx_lock(SOCKBUF_MTX(_sb))
 #define	SOCKBUF_OWNED(_sb)		mtx_owned(SOCKBUF_MTX(_sb))
 #define	SOCKBUF_UNLOCK(_sb)		mtx_unlock(SOCKBUF_MTX(_sb))
 #define	SOCKBUF_LOCK_ASSERT(_sb)	mtx_assert(SOCKBUF_MTX(_sb), MA_OWNED)
 #define	SOCKBUF_UNLOCK_ASSERT(_sb)	mtx_assert(SOCKBUF_MTX(_sb), MA_NOTOWNED)
 
 /*
  * Socket buffer private mbuf(9) flags.
  */
 #define	M_NOTREADY	M_PROTO1	/* m_data not populated yet */
 #define	M_BLOCKED	M_PROTO2	/* M_NOTREADY in front of m */
 #define	M_NOTAVAIL	(M_NOTREADY | M_BLOCKED)
 
 void	sbappend(struct sockbuf *sb, struct mbuf *m, int flags);
 void	sbappend_locked(struct sockbuf *sb, struct mbuf *m, int flags);
 void	sbappendstream(struct sockbuf *sb, struct mbuf *m, int flags);
 void	sbappendstream_locked(struct sockbuf *sb, struct mbuf *m, int flags);
 int	sbappendaddr(struct sockbuf *sb, const struct sockaddr *asa,
 	    struct mbuf *m0, struct mbuf *control);
 int	sbappendaddr_locked(struct sockbuf *sb, const struct sockaddr *asa,
 	    struct mbuf *m0, struct mbuf *control);
 int	sbappendaddr_nospacecheck_locked(struct sockbuf *sb,
 	    const struct sockaddr *asa, struct mbuf *m0, struct mbuf *control);
 int	sbappendcontrol(struct sockbuf *sb, struct mbuf *m0,
 	    struct mbuf *control);
 int	sbappendcontrol_locked(struct sockbuf *sb, struct mbuf *m0,
 	    struct mbuf *control);
 void	sbappendrecord(struct sockbuf *sb, struct mbuf *m0);
 void	sbappendrecord_locked(struct sockbuf *sb, struct mbuf *m0);
 void	sbcompress(struct sockbuf *sb, struct mbuf *m, struct mbuf *n);
 struct mbuf *
 	sbcreatecontrol(caddr_t p, int size, int type, int level);
 void	sbdestroy(struct sockbuf *sb, struct socket *so);
 void	sbdrop(struct sockbuf *sb, int len);
 void	sbdrop_locked(struct sockbuf *sb, int len);
 struct mbuf *
 	sbcut_locked(struct sockbuf *sb, int len);
 void	sbdroprecord(struct sockbuf *sb);
 void	sbdroprecord_locked(struct sockbuf *sb);
 void	sbflush(struct sockbuf *sb);
 void	sbflush_locked(struct sockbuf *sb);
 void	sbrelease(struct sockbuf *sb, struct socket *so);
 void	sbrelease_internal(struct sockbuf *sb, struct socket *so);
 void	sbrelease_locked(struct sockbuf *sb, struct socket *so);
 int	sbreserve(struct sockbuf *sb, u_long cc, struct socket *so,
 	    struct thread *td);
 int	sbreserve_locked(struct sockbuf *sb, u_long cc, struct socket *so,
 	    struct thread *td);
 struct mbuf *
 	sbsndptr(struct sockbuf *sb, u_int off, u_int len, u_int *moff);
 struct mbuf *
 	sbsndmbuf(struct sockbuf *sb, u_int off, u_int *moff);
 void	sbtoxsockbuf(struct sockbuf *sb, struct xsockbuf *xsb);
 int	sbwait(struct sockbuf *sb);
 int	sblock(struct sockbuf *sb, int flags);
 void	sbunlock(struct sockbuf *sb);
 void	sballoc(struct sockbuf *, struct mbuf *);
 void	sbfree(struct sockbuf *, struct mbuf *);
 int	sbready(struct sockbuf *, struct mbuf *, int);
 
 /*
  * Return how much data is available to be taken out of socket
  * buffer right now.
  */
 static inline u_int
 sbavail(struct sockbuf *sb)
 {
 
 #if 0
 	SOCKBUF_LOCK_ASSERT(sb);
 #endif
 	return (sb->sb_acc);
 }
 
 /*
  * Return how much data sits there in the socket buffer
  * It might be that some data is not yet ready to be read.
  */
 static inline u_int
 sbused(struct sockbuf *sb)
 {
 
 #if 0
 	SOCKBUF_LOCK_ASSERT(sb);
 #endif
 	return (sb->sb_ccc);
 }
 
 /*
  * How much space is there in a socket buffer (so->so_snd or so->so_rcv)?
  * This is problematical if the fields are unsigned, as the space might
  * still be negative (ccc > hiwat or mbcnt > mbmax).
  */
 static inline long
 sbspace(struct sockbuf *sb)
 {
 	int bleft, mleft;		/* size should match sockbuf fields */
 
 #if 0
 	SOCKBUF_LOCK_ASSERT(sb);
 #endif
 
 	if (sb->sb_flags & SB_STOP)
 		return(0);
 
 	bleft = sb->sb_hiwat - sb->sb_ccc;
 	mleft = sb->sb_mbmax - sb->sb_mbcnt;
 
 	return ((bleft < mleft) ? bleft : mleft);
 }
 
 #define SB_EMPTY_FIXUP(sb) do {						\
 	if ((sb)->sb_mb == NULL) {					\
 		(sb)->sb_mbtail = NULL;					\
 		(sb)->sb_lastrecord = NULL;				\
 	}								\
 } while (/*CONSTCOND*/0)
 
 #ifdef SOCKBUF_DEBUG
 void	sblastrecordchk(struct sockbuf *, const char *, int);
 void	sblastmbufchk(struct sockbuf *, const char *, int);
 void	sbcheck(struct sockbuf *, const char *, int);
 #define	SBLASTRECORDCHK(sb)	sblastrecordchk((sb), __FILE__, __LINE__)
 #define	SBLASTMBUFCHK(sb)	sblastmbufchk((sb), __FILE__, __LINE__)
 #define	SBCHECK(sb)		sbcheck((sb), __FILE__, __LINE__)
 #else
 #define	SBLASTRECORDCHK(sb)	do {} while (0)
 #define	SBLASTMBUFCHK(sb)	do {} while (0)
 #define	SBCHECK(sb)		do {} while (0)
 #endif /* SOCKBUF_DEBUG */
 
 #endif /* _KERNEL */
 
 #endif /* _SYS_SOCKBUF_H_ */
Index: head/sys/sys/socket.h
===================================================================
--- head/sys/sys/socket.h	(revision 319721)
+++ head/sys/sys/socket.h	(revision 319722)
@@ -1,712 +1,716 @@
 /*-
  * Copyright (c) 1982, 1985, 1986, 1988, 1993, 1994
  *	The Regents of the University of California.  All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 3. Neither the name of the University nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  *	@(#)socket.h	8.4 (Berkeley) 2/21/94
  * $FreeBSD$
  */
 
 #ifndef _SYS_SOCKET_H_
 #define	_SYS_SOCKET_H_
 
 #include <sys/cdefs.h>
 #include <sys/_types.h>
 #include <sys/_iovec.h>
 #include <machine/_align.h>
 
 /*
  * Definitions related to sockets: types, address families, options.
  */
 
 /*
  * Data types.
  */
 #if __BSD_VISIBLE
 #ifndef _GID_T_DECLARED
 typedef	__gid_t		gid_t;
 #define	_GID_T_DECLARED
 #endif
 
 #ifndef _OFF_T_DECLARED
 typedef	__off_t		off_t;
 #define	_OFF_T_DECLARED
 #endif
 
 #ifndef _PID_T_DECLARED
 typedef	__pid_t		pid_t;
 #define	_PID_T_DECLARED
 #endif
 #endif
 
 #ifndef _SA_FAMILY_T_DECLARED
 typedef	__sa_family_t	sa_family_t;
 #define	_SA_FAMILY_T_DECLARED
 #endif
 
 #ifndef _SOCKLEN_T_DECLARED
 typedef	__socklen_t	socklen_t;
 #define	_SOCKLEN_T_DECLARED
 #endif
  
 #ifndef _SSIZE_T_DECLARED
 typedef	__ssize_t	ssize_t;
 #define	_SSIZE_T_DECLARED
 #endif
 
 #if __BSD_VISIBLE 
 #ifndef _UID_T_DECLARED
 typedef	__uid_t		uid_t;
 #define	_UID_T_DECLARED
 #endif
 #endif
 
 #ifndef _UINT32_T_DECLARED
 typedef	__uint32_t	uint32_t;
 #define	_UINT32_T_DECLARED
 #endif
 
 #ifndef _UINTPTR_T_DECLARED
 typedef	__uintptr_t	uintptr_t;
 #define	_UINTPTR_T_DECLARED
 #endif
 
 /*
  * Types
  */
 #define	SOCK_STREAM	1		/* stream socket */
 #define	SOCK_DGRAM	2		/* datagram socket */
 #define	SOCK_RAW	3		/* raw-protocol interface */
 #if __BSD_VISIBLE
 #define	SOCK_RDM	4		/* reliably-delivered message */
 #endif
 #define	SOCK_SEQPACKET	5		/* sequenced packet stream */
 
 #if __BSD_VISIBLE
 /*
  * Creation flags, OR'ed into socket() and socketpair() type argument.
  */
 #define	SOCK_CLOEXEC	0x10000000
 #define	SOCK_NONBLOCK	0x20000000
-#endif
+#ifdef _KERNEL
+/*
+ * Flags for accept1(), kern_accept4() and solisten_dequeue, in addition
+ * to SOCK_CLOEXEC and SOCK_NONBLOCK.
+ */
+#define ACCEPT4_INHERIT 0x1
+#define ACCEPT4_COMPAT  0x2
+#endif	/* _KERNEL */
+#endif	/* __BSD_VISIBLE */
 
 /*
  * Option flags per-socket.
  */
 #define	SO_DEBUG	0x0001		/* turn on debugging info recording */
 #define	SO_ACCEPTCONN	0x0002		/* socket has had listen() */
 #define	SO_REUSEADDR	0x0004		/* allow local address reuse */
 #define	SO_KEEPALIVE	0x0008		/* keep connections alive */
 #define	SO_DONTROUTE	0x0010		/* just use interface addresses */
 #define	SO_BROADCAST	0x0020		/* permit sending of broadcast msgs */
 #if __BSD_VISIBLE
 #define	SO_USELOOPBACK	0x0040		/* bypass hardware when possible */
 #endif
 #define	SO_LINGER	0x0080		/* linger on close if data present */
 #define	SO_OOBINLINE	0x0100		/* leave received OOB data in line */
 #if __BSD_VISIBLE
 #define	SO_REUSEPORT	0x0200		/* allow local address & port reuse */
 #define	SO_TIMESTAMP	0x0400		/* timestamp received dgram traffic */
 #define	SO_NOSIGPIPE	0x0800		/* no SIGPIPE from EPIPE */
 #define	SO_ACCEPTFILTER	0x1000		/* there is an accept filter */
 #define	SO_BINTIME	0x2000		/* timestamp received dgram traffic */
 #endif
 #define	SO_NO_OFFLOAD	0x4000		/* socket cannot be offloaded */
 #define	SO_NO_DDP	0x8000		/* disable direct data placement */
 
 /*
  * Additional options, not kept in so_options.
  */
 #define	SO_SNDBUF	0x1001		/* send buffer size */
 #define	SO_RCVBUF	0x1002		/* receive buffer size */
 #define	SO_SNDLOWAT	0x1003		/* send low-water mark */
 #define	SO_RCVLOWAT	0x1004		/* receive low-water mark */
 #define	SO_SNDTIMEO	0x1005		/* send timeout */
 #define	SO_RCVTIMEO	0x1006		/* receive timeout */
 #define	SO_ERROR	0x1007		/* get error status and clear */
 #define	SO_TYPE		0x1008		/* get socket type */
 #if __BSD_VISIBLE
 #define	SO_LABEL	0x1009		/* socket's MAC label */
 #define	SO_PEERLABEL	0x1010		/* socket's peer's MAC label */
 #define	SO_LISTENQLIMIT	0x1011		/* socket's backlog limit */
 #define	SO_LISTENQLEN	0x1012		/* socket's complete queue length */
 #define	SO_LISTENINCQLEN	0x1013	/* socket's incomplete queue length */
 #define	SO_SETFIB	0x1014		/* use this FIB to route */
 #define	SO_USER_COOKIE	0x1015		/* user cookie (dummynet etc.) */
 #define	SO_PROTOCOL	0x1016		/* get socket protocol (Linux name) */
 #define	SO_PROTOTYPE	SO_PROTOCOL	/* alias for SO_PROTOCOL (SunOS name) */
 #define	SO_TS_CLOCK	0x1017		/* clock type used for SO_TIMESTAMP */
 #define	SO_MAX_PACING_RATE	0x1018	/* socket's max TX pacing rate (Linux name) */
 #endif
 
 #if __BSD_VISIBLE
 #define	SO_TS_REALTIME_MICRO	0	/* microsecond resolution, realtime */
 #define	SO_TS_BINTIME		1	/* sub-nanosecond resolution, realtime */
 #define	SO_TS_REALTIME		2	/* nanosecond resolution, realtime */
 #define	SO_TS_MONOTONIC		3	/* nanosecond resolution, monotonic */
 #define	SO_TS_DEFAULT		SO_TS_REALTIME_MICRO
 #define	SO_TS_CLOCK_MAX		SO_TS_MONOTONIC
 #endif
 
 /*
  * Space reserved for new socket options added by third-party vendors.
  * This range applies to all socket option levels.  New socket options
  * in FreeBSD should always use an option value less than SO_VENDOR.
  */
 #if __BSD_VISIBLE
 #define	SO_VENDOR	0x80000000
 #endif
 
 /*
  * Structure used for manipulating linger option.
  */
 struct linger {
 	int	l_onoff;		/* option on/off */
 	int	l_linger;		/* linger time */
 };
 
 #if __BSD_VISIBLE
 struct accept_filter_arg {
 	char	af_name[16];
 	char	af_arg[256-16];
 };
 #endif
 
 /*
  * Level number for (get/set)sockopt() to apply to socket itself.
  */
 #define	SOL_SOCKET	0xffff		/* options for socket level */
 
 /*
  * Address families.
  */
 #define	AF_UNSPEC	0		/* unspecified */
 #if __BSD_VISIBLE
 #define	AF_LOCAL	AF_UNIX		/* local to host (pipes, portals) */
 #endif
 #define	AF_UNIX		1		/* standardized name for AF_LOCAL */
 #define	AF_INET		2		/* internetwork: UDP, TCP, etc. */
 #if __BSD_VISIBLE
 #define	AF_IMPLINK	3		/* arpanet imp addresses */
 #define	AF_PUP		4		/* pup protocols: e.g. BSP */
 #define	AF_CHAOS	5		/* mit CHAOS protocols */
 #define	AF_NETBIOS	6		/* SMB protocols */
 #define	AF_ISO		7		/* ISO protocols */
 #define	AF_OSI		AF_ISO
 #define	AF_ECMA		8		/* European computer manufacturers */
 #define	AF_DATAKIT	9		/* datakit protocols */
 #define	AF_CCITT	10		/* CCITT protocols, X.25 etc */
 #define	AF_SNA		11		/* IBM SNA */
 #define AF_DECnet	12		/* DECnet */
 #define AF_DLI		13		/* DEC Direct data link interface */
 #define AF_LAT		14		/* LAT */
 #define	AF_HYLINK	15		/* NSC Hyperchannel */
 #define	AF_APPLETALK	16		/* Apple Talk */
 #define	AF_ROUTE	17		/* Internal Routing Protocol */
 #define	AF_LINK		18		/* Link layer interface */
 #define	pseudo_AF_XTP	19		/* eXpress Transfer Protocol (no AF) */
 #define	AF_COIP		20		/* connection-oriented IP, aka ST II */
 #define	AF_CNT		21		/* Computer Network Technology */
 #define pseudo_AF_RTIP	22		/* Help Identify RTIP packets */
 #define	AF_IPX		23		/* Novell Internet Protocol */
 #define	AF_SIP		24		/* Simple Internet Protocol */
 #define	pseudo_AF_PIP	25		/* Help Identify PIP packets */
 #define	AF_ISDN		26		/* Integrated Services Digital Network*/
 #define	AF_E164		AF_ISDN		/* CCITT E.164 recommendation */
 #define	pseudo_AF_KEY	27		/* Internal key-management function */
 #endif
 #define	AF_INET6	28		/* IPv6 */
 #if __BSD_VISIBLE
 #define	AF_NATM		29		/* native ATM access */
 #define	AF_ATM		30		/* ATM */
 #define pseudo_AF_HDRCMPLT 31		/* Used by BPF to not rewrite headers
 					 * in interface output routine
 					 */
 #define	AF_NETGRAPH	32		/* Netgraph sockets */
 #define	AF_SLOW		33		/* 802.3ad slow protocol */
 #define	AF_SCLUSTER	34		/* Sitara cluster protocol */
 #define	AF_ARP		35
 #define	AF_BLUETOOTH	36		/* Bluetooth sockets */
 #define	AF_IEEE80211	37		/* IEEE 802.11 protocol */
 #define	AF_INET_SDP	40		/* OFED Socket Direct Protocol ipv4 */
 #define	AF_INET6_SDP	42		/* OFED Socket Direct Protocol ipv6 */
 #define	AF_MAX		42
 /*
  * When allocating a new AF_ constant, please only allocate
  * even numbered constants for FreeBSD until 134 as odd numbered AF_
  * constants 39-133 are now reserved for vendors.
  */
 #define AF_VENDOR00 39
 #define AF_VENDOR01 41
 #define AF_VENDOR02 43
 #define AF_VENDOR03 45
 #define AF_VENDOR04 47
 #define AF_VENDOR05 49
 #define AF_VENDOR06 51
 #define AF_VENDOR07 53
 #define AF_VENDOR08 55
 #define AF_VENDOR09 57
 #define AF_VENDOR10 59
 #define AF_VENDOR11 61
 #define AF_VENDOR12 63
 #define AF_VENDOR13 65
 #define AF_VENDOR14 67
 #define AF_VENDOR15 69
 #define AF_VENDOR16 71
 #define AF_VENDOR17 73
 #define AF_VENDOR18 75
 #define AF_VENDOR19 77
 #define AF_VENDOR20 79
 #define AF_VENDOR21 81
 #define AF_VENDOR22 83
 #define AF_VENDOR23 85
 #define AF_VENDOR24 87
 #define AF_VENDOR25 89
 #define AF_VENDOR26 91
 #define AF_VENDOR27 93
 #define AF_VENDOR28 95
 #define AF_VENDOR29 97
 #define AF_VENDOR30 99
 #define AF_VENDOR31 101
 #define AF_VENDOR32 103
 #define AF_VENDOR33 105
 #define AF_VENDOR34 107
 #define AF_VENDOR35 109
 #define AF_VENDOR36 111
 #define AF_VENDOR37 113
 #define AF_VENDOR38 115
 #define AF_VENDOR39 117
 #define AF_VENDOR40 119
 #define AF_VENDOR41 121
 #define AF_VENDOR42 123
 #define AF_VENDOR43 125
 #define AF_VENDOR44 127
 #define AF_VENDOR45 129
 #define AF_VENDOR46 131
 #define AF_VENDOR47 133
 #endif
 
 /*
  * Structure used by kernel to store most
  * addresses.
  */
 struct sockaddr {
 	unsigned char	sa_len;		/* total length */
 	sa_family_t	sa_family;	/* address family */
 	char		sa_data[14];	/* actually longer; address value */
 };
 #if __BSD_VISIBLE
 #define	SOCK_MAXADDRLEN	255		/* longest possible addresses */
 
 /*
  * Structure used by kernel to pass protocol
  * information in raw sockets.
  */
 struct sockproto {
 	unsigned short	sp_family;		/* address family */
 	unsigned short	sp_protocol;		/* protocol */
 };
 #endif
 
 #include <sys/_sockaddr_storage.h>
 
 #if __BSD_VISIBLE
 /*
  * Protocol families, same as address families for now.
  */
 #define	PF_UNSPEC	AF_UNSPEC
 #define	PF_LOCAL	AF_LOCAL
 #define	PF_UNIX		PF_LOCAL	/* backward compatibility */
 #define	PF_INET		AF_INET
 #define	PF_IMPLINK	AF_IMPLINK
 #define	PF_PUP		AF_PUP
 #define	PF_CHAOS	AF_CHAOS
 #define	PF_NETBIOS	AF_NETBIOS
 #define	PF_ISO		AF_ISO
 #define	PF_OSI		AF_ISO
 #define	PF_ECMA		AF_ECMA
 #define	PF_DATAKIT	AF_DATAKIT
 #define	PF_CCITT	AF_CCITT
 #define	PF_SNA		AF_SNA
 #define PF_DECnet	AF_DECnet
 #define PF_DLI		AF_DLI
 #define PF_LAT		AF_LAT
 #define	PF_HYLINK	AF_HYLINK
 #define	PF_APPLETALK	AF_APPLETALK
 #define	PF_ROUTE	AF_ROUTE
 #define	PF_LINK		AF_LINK
 #define	PF_XTP		pseudo_AF_XTP	/* really just proto family, no AF */
 #define	PF_COIP		AF_COIP
 #define	PF_CNT		AF_CNT
 #define	PF_SIP		AF_SIP
 #define	PF_IPX		AF_IPX
 #define PF_RTIP		pseudo_AF_RTIP	/* same format as AF_INET */
 #define PF_PIP		pseudo_AF_PIP
 #define	PF_ISDN		AF_ISDN
 #define	PF_KEY		pseudo_AF_KEY
 #define	PF_INET6	AF_INET6
 #define	PF_NATM		AF_NATM
 #define	PF_ATM		AF_ATM
 #define	PF_NETGRAPH	AF_NETGRAPH
 #define	PF_SLOW		AF_SLOW
 #define PF_SCLUSTER	AF_SCLUSTER
 #define	PF_ARP		AF_ARP
 #define	PF_BLUETOOTH	AF_BLUETOOTH
 #define	PF_IEEE80211	AF_IEEE80211
 #define	PF_INET_SDP	AF_INET_SDP
 #define	PF_INET6_SDP	AF_INET6_SDP
 
 #define	PF_MAX		AF_MAX
 
 /*
  * Definitions for network related sysctl, CTL_NET.
  *
  * Second level is protocol family.
  * Third level is protocol number.
  *
  * Further levels are defined by the individual families.
  */
 
 /*
  * PF_ROUTE - Routing table
  *
  * Three additional levels are defined:
  *	Fourth: address family, 0 is wildcard
  *	Fifth: type of info, defined below
  *	Sixth: flag(s) to mask with for NET_RT_FLAGS
  */
 #define NET_RT_DUMP	1		/* dump; may limit to a.f. */
 #define NET_RT_FLAGS	2		/* by flags, e.g. RESOLVING */
 #define NET_RT_IFLIST	3		/* survey interface list */
 #define	NET_RT_IFMALIST	4		/* return multicast address list */
 #define	NET_RT_IFLISTL	5		/* Survey interface list, using 'l'en
 					 * versions of msghdr structs. */
 #endif /* __BSD_VISIBLE */
 
 /*
  * Maximum queue length specifiable by listen.
  */
 #define	SOMAXCONN	128
 
 /*
  * Message header for recvmsg and sendmsg calls.
  * Used value-result for recvmsg, value only for sendmsg.
  */
 struct msghdr {
 	void		*msg_name;		/* optional address */
 	socklen_t	 msg_namelen;		/* size of address */
 	struct iovec	*msg_iov;		/* scatter/gather array */
 	int		 msg_iovlen;		/* # elements in msg_iov */
 	void		*msg_control;		/* ancillary data, see below */
 	socklen_t	 msg_controllen;	/* ancillary data buffer len */
 	int		 msg_flags;		/* flags on received message */
 };
 
 #define	MSG_OOB		 0x00000001	/* process out-of-band data */
 #define	MSG_PEEK	 0x00000002	/* peek at incoming message */
 #define	MSG_DONTROUTE	 0x00000004	/* send without using routing tables */
 #define	MSG_EOR		 0x00000008	/* data completes record */
 #define	MSG_TRUNC	 0x00000010	/* data discarded before delivery */
 #define	MSG_CTRUNC	 0x00000020	/* control data lost before delivery */
 #define	MSG_WAITALL	 0x00000040	/* wait for full request or error */
 #if __BSD_VISIBLE
 #define	MSG_DONTWAIT	 0x00000080	/* this message should be nonblocking */
 #define	MSG_EOF		 0x00000100	/* data completes connection */
 /*			 0x00000200	   unused */
 /*			 0x00000400	   unused */
 /*			 0x00000800	   unused */
 /*			 0x00001000	   unused */
 #define	MSG_NOTIFICATION 0x00002000	/* SCTP notification */
 #define	MSG_NBIO	 0x00004000	/* FIONBIO mode, used by fifofs */
 #define	MSG_COMPAT       0x00008000		/* used in sendit() */
 #endif
 #ifdef _KERNEL
 #define	MSG_SOCALLBCK    0x00010000	/* for use by socket callbacks - soreceive (TCP) */
 #endif
 #if __POSIX_VISIBLE >= 200809
 #define	MSG_NOSIGNAL	 0x00020000	/* do not generate SIGPIPE on EOF */
 #endif
 #if __BSD_VISIBLE
 #define	MSG_CMSG_CLOEXEC 0x00040000	/* make received fds close-on-exec */
 #define	MSG_WAITFORONE	 0x00080000	/* for recvmmsg() */
 #endif
 #ifdef _KERNEL
 #define	MSG_MORETOCOME	 0x00100000	/* additional data pending */
 #endif
 
 /*
  * Header for ancillary data objects in msg_control buffer.
  * Used for additional information with/about a datagram
  * not expressible by flags.  The format is a sequence
  * of message elements headed by cmsghdr structures.
  */
 struct cmsghdr {
 	socklen_t	cmsg_len;		/* data byte count, including hdr */
 	int		cmsg_level;		/* originating protocol */
 	int		cmsg_type;		/* protocol-specific type */
 /* followed by	u_char  cmsg_data[]; */
 };
 
 #if __BSD_VISIBLE
 /*
  * While we may have more groups than this, the cmsgcred struct must
  * be able to fit in an mbuf and we have historically supported a
  * maximum of 16 groups.
 */
 #define CMGROUP_MAX 16
 
 /*
  * Credentials structure, used to verify the identity of a peer
  * process that has sent us a message. This is allocated by the
  * peer process but filled in by the kernel. This prevents the
  * peer from lying about its identity. (Note that cmcred_groups[0]
  * is the effective GID.)
  */
 struct cmsgcred {
 	pid_t	cmcred_pid;		/* PID of sending process */
 	uid_t	cmcred_uid;		/* real UID of sending process */
 	uid_t	cmcred_euid;		/* effective UID of sending process */
 	gid_t	cmcred_gid;		/* real GID of sending process */
 	short	cmcred_ngroups;		/* number or groups */
 	gid_t	cmcred_groups[CMGROUP_MAX];	/* groups */
 };
 
 /*
  * Socket credentials.
  */
 struct sockcred {
 	uid_t	sc_uid;			/* real user id */
 	uid_t	sc_euid;		/* effective user id */
 	gid_t	sc_gid;			/* real group id */
 	gid_t	sc_egid;		/* effective group id */
 	int	sc_ngroups;		/* number of supplemental groups */
 	gid_t	sc_groups[1];		/* variable length */
 };
 
 /*
  * Compute size of a sockcred structure with groups.
  */
 #define	SOCKCREDSIZE(ngrps) \
 	(sizeof(struct sockcred) + (sizeof(gid_t) * ((ngrps) - 1)))
 
 #endif /* __BSD_VISIBLE */
 
 /* given pointer to struct cmsghdr, return pointer to data */
 #define	CMSG_DATA(cmsg)		((unsigned char *)(cmsg) + \
 				 _ALIGN(sizeof(struct cmsghdr)))
 
 /* given pointer to struct cmsghdr, return pointer to next cmsghdr */
 #define	CMSG_NXTHDR(mhdr, cmsg)	\
 	((char *)(cmsg) == (char *)0 ? CMSG_FIRSTHDR(mhdr) : \
 	    ((char *)(cmsg) + _ALIGN(((struct cmsghdr *)(cmsg))->cmsg_len) + \
 	  _ALIGN(sizeof(struct cmsghdr)) > \
 	    (char *)(mhdr)->msg_control + (mhdr)->msg_controllen) ? \
 	    (struct cmsghdr *)0 : \
 	    (struct cmsghdr *)(void *)((char *)(cmsg) + \
 	    _ALIGN(((struct cmsghdr *)(cmsg))->cmsg_len)))
 
 /*
  * RFC 2292 requires to check msg_controllen, in case that the kernel returns
  * an empty list for some reasons.
  */
 #define	CMSG_FIRSTHDR(mhdr) \
 	((mhdr)->msg_controllen >= sizeof(struct cmsghdr) ? \
 	 (struct cmsghdr *)(mhdr)->msg_control : \
 	 (struct cmsghdr *)0)
 
 #if __BSD_VISIBLE
 /* RFC 2292 additions */
 #define	CMSG_SPACE(l)		(_ALIGN(sizeof(struct cmsghdr)) + _ALIGN(l))
 #define	CMSG_LEN(l)		(_ALIGN(sizeof(struct cmsghdr)) + (l))
 #endif
 
 #ifdef _KERNEL
 #define	CMSG_ALIGN(n)	_ALIGN(n)
 #endif
 
 /* "Socket"-level control message types: */
 #define	SCM_RIGHTS	0x01		/* access rights (array of int) */
 #if __BSD_VISIBLE
 #define	SCM_TIMESTAMP	0x02		/* timestamp (struct timeval) */
 #define	SCM_CREDS	0x03		/* process creds (struct cmsgcred) */
 #define	SCM_BINTIME	0x04		/* timestamp (struct bintime) */
 #define	SCM_REALTIME	0x05		/* timestamp (struct timespec) */
 #define	SCM_MONOTONIC	0x06		/* timestamp (struct timespec) */
 #endif
 
 #if __BSD_VISIBLE
 /*
  * 4.3 compat sockaddr, move to compat file later
  */
 struct osockaddr {
 	unsigned short sa_family;	/* address family */
 	char	sa_data[14];		/* up to 14 bytes of direct address */
 };
 
 /*
  * 4.3-compat message header (move to compat file later).
  */
 struct omsghdr {
 	char	*msg_name;		/* optional address */
 	int	msg_namelen;		/* size of address */
 	struct	iovec *msg_iov;		/* scatter/gather array */
 	int	msg_iovlen;		/* # elements in msg_iov */
 	char	*msg_accrights;		/* access rights sent/received */
 	int	msg_accrightslen;
 };
 #endif
 
 /*
  * howto arguments for shutdown(2), specified by Posix.1g.
  */
 #define	SHUT_RD		0		/* shut down the reading side */
 #define	SHUT_WR		1		/* shut down the writing side */
 #define	SHUT_RDWR	2		/* shut down both sides */
 
 #if __BSD_VISIBLE
 /* for SCTP */
 /* we cheat and use the SHUT_XX defines for these */
 #define PRU_FLUSH_RD     SHUT_RD
 #define PRU_FLUSH_WR     SHUT_WR
 #define PRU_FLUSH_RDWR   SHUT_RDWR
 #endif
 
 
 #if __BSD_VISIBLE
 /*
  * sendfile(2) header/trailer struct
  */
 struct sf_hdtr {
 	struct iovec *headers;	/* pointer to an array of header struct iovec's */
 	int hdr_cnt;		/* number of header iovec's */
 	struct iovec *trailers;	/* pointer to an array of trailer struct iovec's */
 	int trl_cnt;		/* number of trailer iovec's */
 };
 
 /*
  * Sendfile-specific flag(s)
  */
 #define	SF_NODISKIO     0x00000001
 #define	SF_MNOWAIT	0x00000002	/* obsolete */
 #define	SF_SYNC		0x00000004
 #define	SF_USER_READAHEAD	0x00000008
 #define	SF_NOCACHE	0x00000010
 #define	SF_FLAGS(rh, flags)	(((rh) << 16) | (flags))
 
 #ifdef _KERNEL
 #define	SF_READAHEAD(flags)	((flags) >> 16)
 #endif /* _KERNEL */
 
 /*
  * Sendmmsg/recvmmsg specific structure(s)
  */
 struct mmsghdr {
 	struct msghdr	msg_hdr;		/* message header */
 	ssize_t		msg_len;		/* message length */
 };
 #endif /* __BSD_VISIBLE */
 
 #ifndef	_KERNEL
 
 #include <sys/cdefs.h>
 
 __BEGIN_DECLS
 int	accept(int, struct sockaddr * __restrict, socklen_t * __restrict);
 int	bind(int, const struct sockaddr *, socklen_t);
 int	connect(int, const struct sockaddr *, socklen_t);
 #if __BSD_VISIBLE
 int	accept4(int, struct sockaddr * __restrict, socklen_t * __restrict, int);
 int	bindat(int, int, const struct sockaddr *, socklen_t);
 int	connectat(int, int, const struct sockaddr *, socklen_t);
 #endif
 int	getpeername(int, struct sockaddr * __restrict, socklen_t * __restrict);
 int	getsockname(int, struct sockaddr * __restrict, socklen_t * __restrict);
 int	getsockopt(int, int, int, void * __restrict, socklen_t * __restrict);
 int	listen(int, int);
 ssize_t	recv(int, void *, size_t, int);
 ssize_t	recvfrom(int, void *, size_t, int, struct sockaddr * __restrict, socklen_t * __restrict);
 ssize_t	recvmsg(int, struct msghdr *, int);
 #if __BSD_VISIBLE
 struct timespec;
 ssize_t	recvmmsg(int, struct mmsghdr * __restrict, size_t, int,
     const struct timespec * __restrict);
 #endif
 ssize_t	send(int, const void *, size_t, int);
 ssize_t	sendto(int, const void *,
 	    size_t, int, const struct sockaddr *, socklen_t);
 ssize_t	sendmsg(int, const struct msghdr *, int);
 #if __BSD_VISIBLE
 int	sendfile(int, int, off_t, size_t, struct sf_hdtr *, off_t *, int);
 ssize_t	sendmmsg(int, struct mmsghdr * __restrict, size_t, int);
 int	setfib(int);
 #endif
 int	setsockopt(int, int, int, const void *, socklen_t);
 int	shutdown(int, int);
 int	sockatmark(int);
 int	socket(int, int, int);
 int	socketpair(int, int, int, int *);
 __END_DECLS
 
 #endif /* !_KERNEL */
 
 #ifdef _KERNEL
 struct socket;
 
 struct tcpcb *so_sototcpcb(struct socket *so);
 struct inpcb *so_sotoinpcb(struct socket *so);
 struct sockbuf *so_sockbuf_snd(struct socket *);
 struct sockbuf *so_sockbuf_rcv(struct socket *);
 
 int so_state_get(const struct socket *);
 void so_state_set(struct socket *, int);
 
 int so_options_get(const struct socket *);
 void so_options_set(struct socket *, int);
 
 int so_error_get(const struct socket *);
 void so_error_set(struct socket *, int);
 
 int so_linger_get(const struct socket *);
 void so_linger_set(struct socket *, int);
 
 struct protosw *so_protosw_get(const struct socket *);
 void so_protosw_set(struct socket *, struct protosw *);
 
 void so_sorwakeup_locked(struct socket *so);
 void so_sowwakeup_locked(struct socket *so);
 
 void so_sorwakeup(struct socket *so);
 void so_sowwakeup(struct socket *so);
 
 void so_lock(struct socket *so);
 void so_unlock(struct socket *so);
 
-void so_listeners_apply_all(struct socket *so, void (*func)(struct socket *, void *), void *arg);
-
-#endif
-
-
+#endif /* _KERNEL */
 #endif /* !_SYS_SOCKET_H_ */
Index: head/sys/sys/socketvar.h
===================================================================
--- head/sys/sys/socketvar.h	(revision 319721)
+++ head/sys/sys/socketvar.h	(revision 319722)
@@ -1,426 +1,450 @@
 /*-
  * Copyright (c) 1982, 1986, 1990, 1993
  *	The Regents of the University of California.  All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 3. Neither the name of the University nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  *	@(#)socketvar.h	8.3 (Berkeley) 2/19/95
  *
  * $FreeBSD$
  */
 
 #ifndef _SYS_SOCKETVAR_H_
 #define _SYS_SOCKETVAR_H_
 
 #include <sys/queue.h>			/* for TAILQ macros */
 #include <sys/selinfo.h>		/* for struct selinfo */
 #include <sys/_lock.h>
 #include <sys/_mutex.h>
 #include <sys/osd.h>
 #include <sys/_sx.h>
 #include <sys/sockbuf.h>
 #include <sys/sockstate.h>
 #ifdef _KERNEL
 #include <sys/caprights.h>
 #include <sys/sockopt.h>
 #endif
 
 struct vnet;
 
 /*
  * Kernel structure per socket.
  * Contains send and receive buffer queues,
  * handle on protocol and pointer to protocol
  * private data and error information.
  */
 typedef	uint64_t so_gen_t;
 typedef	int so_upcall_t(struct socket *, void *, int);
 
 struct socket;
 
 /*-
  * Locking key to struct socket:
  * (a) constant after allocation, no locking required.
  * (b) locked by SOCK_LOCK(so).
- * (c) locked by SOCKBUF_LOCK(&so->so_rcv).
- * (e) locked by ACCEPT_LOCK().
+ * (cr) locked by SOCKBUF_LOCK(&so->so_rcv).
+ * (cs) locked by SOCKBUF_LOCK(&so->so_rcv).
+ * (e) locked by SOLISTEN_LOCK() of corresponding listening socket.
  * (f) not locked since integer reads/writes are atomic.
  * (g) used only as a sleep/wakeup address, no value.
  * (h) locked by global mutex so_global_mtx.
  */
+TAILQ_HEAD(accept_queue, socket);
 struct socket {
-	int	so_count;		/* (b) reference count */
+	struct mtx	so_lock;
+	volatile u_int	so_count;	/* (b / refcount) */
+	struct selinfo	so_rdsel;	/* (b/cr) for so_rcv/so_comp */
+	struct selinfo	so_wrsel;	/* (b/cs) for so_snd */
 	short	so_type;		/* (a) generic type, see socket.h */
-	short	so_options;		/* from socket call, see socket.h */
-	short	so_linger;		/* time to linger while closing */
+	short	so_options;		/* (b) from socket call, see socket.h */
+	short	so_linger;		/* time to linger close(2) */
 	short	so_state;		/* (b) internal state flags SS_* */
-	int	so_qstate;		/* (e) internal state flags SQ_* */
 	void	*so_pcb;		/* protocol control block */
 	struct	vnet *so_vnet;		/* (a) network stack instance */
 	struct	protosw *so_proto;	/* (a) protocol handle */
-/*
- * Variables for connection queuing.
- * Socket where accepts occur is so_head in all subsidiary sockets.
- * If so_head is 0, socket is not related to an accept.
- * For head socket so_incomp queues partially completed connections,
- * while so_comp is a queue of connections ready to be accepted.
- * If a connection is aborted and it has so_head set, then
- * it has to be pulled out of either so_incomp or so_comp.
- * We allow connections to queue up based on current queue lengths
- * and limit on number of queued connections for this socket.
- */
-	struct	socket *so_head;	/* (e) back pointer to listen socket */
-	TAILQ_HEAD(, socket) so_incomp;	/* (e) queue of partial unaccepted connections */
-	TAILQ_HEAD(, socket) so_comp;	/* (e) queue of complete unaccepted connections */
-	TAILQ_ENTRY(socket) so_list;	/* (e) list of unaccepted connections */
-	u_int	so_qlen;		/* (e) number of unaccepted connections */
-	u_int	so_incqlen;		/* (e) number of unaccepted incomplete
-					   connections */
-	u_int	so_qlimit;		/* (e) max number queued connections */
 	short	so_timeo;		/* (g) connection timeout */
 	u_short	so_error;		/* (f) error affecting connection */
 	struct	sigio *so_sigio;	/* [sg] information for async I/O or
 					   out of band data (SIGURG) */
-	u_long	so_oobmark;		/* (c) chars to oob mark */
-
-	struct sockbuf so_rcv, so_snd;
-
 	struct	ucred *so_cred;		/* (a) user credentials */
 	struct	label *so_label;	/* (b) MAC label for socket */
-	struct	label *so_peerlabel;	/* (b) cached MAC label for peer */
 	/* NB: generation count must not be first. */
 	so_gen_t so_gencnt;		/* (h) generation count */
 	void	*so_emuldata;		/* (b) private data for emulators */
- 	struct so_accf {
-		struct	accept_filter *so_accept_filter;
-		void	*so_accept_filter_arg;	/* saved filter args */
-		char	*so_accept_filter_str;	/* saved user args */
-	} *so_accf;
 	struct	osd	osd;		/* Object Specific extensions */
 	/*
 	 * so_fibnum, so_user_cookie and friends can be used to attach
 	 * some user-specified metadata to a socket, which then can be
 	 * used by the kernel for various actions.
 	 * so_user_cookie is used by ipfw/dummynet.
 	 */
 	int so_fibnum;		/* routing domain for this socket */
 	uint32_t so_user_cookie;
 
 	int so_ts_clock;	/* type of the clock used for timestamps */
 	uint32_t so_max_pacing_rate;	/* (f) TX rate limit in bytes/s */
+	union {
+		/* Regular (data flow) socket. */
+		struct {
+			/* (cr, cs) Receive and send buffers. */
+			struct sockbuf		so_rcv, so_snd;
 
-	void *so_pspare[2];	/* general use */
-	int so_ispare[2];	/* general use */
+			/* (e) Our place on accept queue. */
+			TAILQ_ENTRY(socket)	so_list;
+			struct socket		*so_listen;	/* (b) */
+			enum {
+				SQ_NONE = 0,
+				SQ_INCOMP = 0x0800,	/* on sol_incomp */
+				SQ_COMP = 0x1000,	/* on sol_comp */
+			}			so_qstate;	/* (b) */
+
+			/* (b) cached MAC label for peer */
+			struct	label		*so_peerlabel;
+			u_long	so_oobmark;	/* chars to oob mark */
+		};
+		/*
+		 * Listening socket, where accepts occur, is so_listen in all
+		 * subsidiary sockets.  If so_listen is NULL, socket is not
+		 * related to an accept.  For a listening socket itself
+		 * sol_incomp queues partially completed connections, while
+		 * sol_comp is a queue of connections ready to be accepted.
+		 * If a connection is aborted and it has so_listen set, then
+		 * it has to be pulled out of either sol_incomp or sol_comp.
+		 * We allow connections to queue up based on current queue
+		 * lengths and limit on number of queued connections for this
+		 * socket.
+		 */
+		struct {
+			/* (e) queue of partial unaccepted connections */
+			struct accept_queue	sol_incomp;
+			/* (e) queue of complete unaccepted connections */
+			struct accept_queue	sol_comp;
+			u_int	sol_qlen;    /* (e) sol_comp length */
+			u_int	sol_incqlen; /* (e) sol_incomp length */
+			u_int	sol_qlimit;  /* (e) queue limit */
+
+			/* accept_filter(9) optional data */
+			struct	accept_filter	*sol_accept_filter;
+			void	*sol_accept_filter_arg;	/* saved filter args */
+			char	*sol_accept_filter_str;	/* saved user args */
+
+			/* Optional upcall, for kernel socket. */
+			so_upcall_t	*sol_upcall;	/* (e) */
+			void		*sol_upcallarg;	/* (e) */
+
+			/* Socket buffer parameters, to be copied to
+			 * dataflow sockets, accepted from this one. */
+			int		sol_sbrcv_lowat;
+			int		sol_sbsnd_lowat;
+			u_int		sol_sbrcv_hiwat;
+			u_int		sol_sbsnd_hiwat;
+			short		sol_sbrcv_flags;
+			short		sol_sbsnd_flags;
+			sbintime_t	sol_sbrcv_timeo;
+			sbintime_t	sol_sbsnd_timeo;
+		};
+	};
 };
 
-/*
- * Global accept mutex to serialize access to accept queues and
- * fields associated with multiple sockets.  This allows us to
- * avoid defining a lock order between listen and accept sockets
- * until such time as it proves to be a good idea.
- */
-extern struct mtx accept_mtx;
-#define	ACCEPT_LOCK_ASSERT()		mtx_assert(&accept_mtx, MA_OWNED)
-#define	ACCEPT_UNLOCK_ASSERT()		mtx_assert(&accept_mtx, MA_NOTOWNED)
-#define	ACCEPT_LOCK()			mtx_lock(&accept_mtx)
-#define	ACCEPT_UNLOCK()			mtx_unlock(&accept_mtx)
+#define	SOCK_MTX(so)		&(so)->so_lock
+#define	SOCK_LOCK(so)		mtx_lock(&(so)->so_lock)
+#define	SOCK_OWNED(so)		mtx_owned(&(so)->so_lock)
+#define	SOCK_UNLOCK(so)		mtx_unlock(&(so)->so_lock)
+#define	SOCK_LOCK_ASSERT(so)	mtx_assert(&(so)->so_lock, MA_OWNED)
+#define	SOCK_UNLOCK_ASSERT(so)	mtx_assert(&(so)->so_lock, MA_NOTOWNED)
 
-/*
- * Per-socket mutex: we reuse the receive socket buffer mutex for space
- * efficiency.  This decision should probably be revisited as we optimize
- * locking for the socket code.
- */
-#define	SOCK_MTX(_so)			SOCKBUF_MTX(&(_so)->so_rcv)
-#define	SOCK_LOCK(_so)			SOCKBUF_LOCK(&(_so)->so_rcv)
-#define	SOCK_OWNED(_so)			SOCKBUF_OWNED(&(_so)->so_rcv)
-#define	SOCK_UNLOCK(_so)		SOCKBUF_UNLOCK(&(_so)->so_rcv)
-#define	SOCK_LOCK_ASSERT(_so)		SOCKBUF_LOCK_ASSERT(&(_so)->so_rcv)
+#define	SOLISTENING(sol)	(((sol)->so_options & SO_ACCEPTCONN) != 0)
+#define	SOLISTEN_LOCK(sol)	do {					\
+	mtx_lock(&(sol)->so_lock);					\
+	KASSERT(SOLISTENING(sol),					\
+	    ("%s: %p not listening", __func__, (sol)));			\
+} while (0)
+#define	SOLISTEN_TRYLOCK(sol)	mtx_trylock(&(sol)->so_lock)
+#define	SOLISTEN_UNLOCK(sol)	do {					\
+	KASSERT(SOLISTENING(sol),					\
+	    ("%s: %p not listening", __func__, (sol)));			\
+	mtx_unlock(&(sol)->so_lock);					\
+} while (0)
+#define	SOLISTEN_LOCK_ASSERT(sol)	do {				\
+	mtx_assert(&(sol)->so_lock, MA_OWNED);				\
+	KASSERT(SOLISTENING(sol),					\
+	    ("%s: %p not listening", __func__, (sol)));			\
+} while (0)
 
 /*
- * Socket state bits stored in so_qstate.
- */
-#define	SQ_INCOMP		0x0800	/* unaccepted, incomplete connection */
-#define	SQ_COMP			0x1000	/* unaccepted, complete connection */
-
-/*
  * Externalized form of struct socket used by the sysctl(3) interface.
  */
 struct xsocket {
 	size_t	xso_len;	/* length of this structure */
 	struct	socket *xso_so;	/* makes a convenient handle sometimes */
 	short	so_type;
 	short	so_options;
 	short	so_linger;
 	short	so_state;
 	caddr_t	so_pcb;		/* another convenient handle */
 	int	xso_protocol;
 	int	xso_family;
 	u_int	so_qlen;
 	u_int	so_incqlen;
 	u_int	so_qlimit;
 	short	so_timeo;
 	u_short	so_error;
 	pid_t	so_pgid;
 	u_long	so_oobmark;
 	struct xsockbuf so_rcv, so_snd;
 	uid_t	so_uid;		/* XXX */
 };
 
 #ifdef _KERNEL
 
 /*
  * Macros for sockets and socket buffering.
  */
 
 /*
  * Flags to sblock().
  */
 #define	SBL_WAIT	0x00000001	/* Wait if not immediately available. */
 #define	SBL_NOINTR	0x00000002	/* Force non-interruptible sleep. */
 #define	SBL_VALID	(SBL_WAIT | SBL_NOINTR)
 
 /*
  * Do we need to notify the other side when I/O is possible?
  */
 #define	sb_notify(sb)	(((sb)->sb_flags & (SB_WAIT | SB_SEL | SB_ASYNC | \
     SB_UPCALL | SB_AIO | SB_KNOTE)) != 0)
 
 /* do we have to send all at once on a socket? */
 #define	sosendallatonce(so) \
     ((so)->so_proto->pr_flags & PR_ATOMIC)
 
 /* can we read something from so? */
 #define	soreadabledata(so) \
-    (sbavail(&(so)->so_rcv) >= (so)->so_rcv.sb_lowat || \
-	!TAILQ_EMPTY(&(so)->so_comp) || (so)->so_error)
+	(sbavail(&(so)->so_rcv) >= (so)->so_rcv.sb_lowat ||  (so)->so_error)
 #define	soreadable(so) \
 	(soreadabledata(so) || ((so)->so_rcv.sb_state & SBS_CANTRCVMORE))
 
 /* can we write something to so? */
 #define	sowriteable(so) \
     ((sbspace(&(so)->so_snd) >= (so)->so_snd.sb_lowat && \
 	(((so)->so_state&SS_ISCONNECTED) || \
 	  ((so)->so_proto->pr_flags&PR_CONNREQUIRED)==0)) || \
      ((so)->so_snd.sb_state & SBS_CANTSENDMORE) || \
      (so)->so_error)
 
 /*
- * soref()/sorele() ref-count the socket structure.  Note that you must
- * still explicitly close the socket, but the last ref count will free
- * the structure.
+ * soref()/sorele() ref-count the socket structure.
+ * soref() may be called without owning socket lock, but in that case a
+ * caller must own something that holds socket, and so_count must be not 0.
+ * Note that you must still explicitly close the socket, but the last ref
+ * count will free the structure.
  */
-#define	soref(so) do {							\
-	SOCK_LOCK_ASSERT(so);						\
-	++(so)->so_count;						\
-} while (0)
-
+#define	soref(so)	refcount_acquire(&(so)->so_count)
 #define	sorele(so) do {							\
-	ACCEPT_LOCK_ASSERT();						\
 	SOCK_LOCK_ASSERT(so);						\
-	if ((so)->so_count <= 0)					\
-		panic("sorele");					\
-	if (--(so)->so_count == 0)					\
+	if (refcount_release(&(so)->so_count))				\
 		sofree(so);						\
-	else {								\
+	else								\
 		SOCK_UNLOCK(so);					\
-		ACCEPT_UNLOCK();					\
-	}								\
 } while (0)
 
 /*
  * In sorwakeup() and sowwakeup(), acquire the socket buffer lock to
  * avoid a non-atomic test-and-wakeup.  However, sowakeup is
  * responsible for releasing the lock if it is called.  We unlock only
  * if we don't call into sowakeup.  If any code is introduced that
  * directly invokes the underlying sowakeup() primitives, it must
  * maintain the same semantics.
  */
 #define	sorwakeup_locked(so) do {					\
 	SOCKBUF_LOCK_ASSERT(&(so)->so_rcv);				\
 	if (sb_notify(&(so)->so_rcv))					\
 		sowakeup((so), &(so)->so_rcv);	 			\
 	else								\
 		SOCKBUF_UNLOCK(&(so)->so_rcv);				\
 } while (0)
 
 #define	sorwakeup(so) do {						\
 	SOCKBUF_LOCK(&(so)->so_rcv);					\
 	sorwakeup_locked(so);						\
 } while (0)
 
 #define	sowwakeup_locked(so) do {					\
 	SOCKBUF_LOCK_ASSERT(&(so)->so_snd);				\
 	if (sb_notify(&(so)->so_snd))					\
 		sowakeup((so), &(so)->so_snd); 				\
 	else								\
 		SOCKBUF_UNLOCK(&(so)->so_snd);				\
 } while (0)
 
 #define	sowwakeup(so) do {						\
 	SOCKBUF_LOCK(&(so)->so_snd);					\
 	sowwakeup_locked(so);						\
 } while (0)
 
 struct accept_filter {
 	char	accf_name[16];
 	int	(*accf_callback)
 		(struct socket *so, void *arg, int waitflag);
 	void *	(*accf_create)
 		(struct socket *so, char *arg);
 	void	(*accf_destroy)
 		(struct socket *so);
 	SLIST_ENTRY(accept_filter) accf_next;
 };
 
 #ifdef MALLOC_DECLARE
 MALLOC_DECLARE(M_ACCF);
 MALLOC_DECLARE(M_PCB);
 MALLOC_DECLARE(M_SONAME);
 #endif
 
 /*
  * Socket specific helper hook point identifiers
  * Do not leave holes in the sequence, hook registration is a loop.
  */
 #define HHOOK_SOCKET_OPT		0
 #define HHOOK_SOCKET_CREATE		1
 #define HHOOK_SOCKET_RCV 		2
 #define HHOOK_SOCKET_SND		3
 #define HHOOK_FILT_SOREAD		4
 #define HHOOK_FILT_SOWRITE		5
 #define HHOOK_SOCKET_CLOSE		6
 #define HHOOK_SOCKET_LAST		HHOOK_SOCKET_CLOSE
 
 struct socket_hhook_data {
 	struct socket	*so;
 	struct mbuf	*m;
 	void		*hctx;		/* hook point specific data*/
 	int		status;
 };
 
 extern int	maxsockets;
 extern u_long	sb_max;
 extern so_gen_t so_gencnt;
 
 struct file;
 struct filecaps;
 struct filedesc;
 struct mbuf;
 struct sockaddr;
 struct ucred;
 struct uio;
 
 /* 'which' values for socket upcalls. */
 #define	SO_RCV		1
 #define	SO_SND		2
 
 /* Return values for socket upcalls. */
 #define	SU_OK		0
 #define	SU_ISCONNECTED	1
 
 /*
  * From uipc_socket and friends
  */
 int	getsockaddr(struct sockaddr **namp, caddr_t uaddr, size_t len);
 int	getsock_cap(struct thread *td, int fd, cap_rights_t *rightsp,
 	    struct file **fpp, u_int *fflagp, struct filecaps *havecaps);
 void	soabort(struct socket *so);
 int	soaccept(struct socket *so, struct sockaddr **nam);
 void	soaio_enqueue(struct task *task);
 void	soaio_rcv(void *context, int pending);
 void	soaio_snd(void *context, int pending);
 int	socheckuid(struct socket *so, uid_t uid);
 int	sobind(struct socket *so, struct sockaddr *nam, struct thread *td);
 int	sobindat(int fd, struct socket *so, struct sockaddr *nam,
 	    struct thread *td);
 int	soclose(struct socket *so);
 int	soconnect(struct socket *so, struct sockaddr *nam, struct thread *td);
 int	soconnectat(int fd, struct socket *so, struct sockaddr *nam,
 	    struct thread *td);
 int	soconnect2(struct socket *so1, struct socket *so2);
 int	socreate(int dom, struct socket **aso, int type, int proto,
 	    struct ucred *cred, struct thread *td);
 int	sodisconnect(struct socket *so);
 struct	sockaddr *sodupsockaddr(const struct sockaddr *sa, int mflags);
 void	sofree(struct socket *so);
 void	sohasoutofband(struct socket *so);
 int	solisten(struct socket *so, int backlog, struct thread *td);
 void	solisten_proto(struct socket *so, int backlog);
 int	solisten_proto_check(struct socket *so);
+int	solisten_dequeue(struct socket *, struct socket **, int);
 struct socket *
 	sonewconn(struct socket *head, int connstatus);
-
-
+struct socket *
+	sopeeloff(struct socket *);
 int	sopoll(struct socket *so, int events, struct ucred *active_cred,
 	    struct thread *td);
 int	sopoll_generic(struct socket *so, int events,
 	    struct ucred *active_cred, struct thread *td);
 int	soreceive(struct socket *so, struct sockaddr **paddr, struct uio *uio,
 	    struct mbuf **mp0, struct mbuf **controlp, int *flagsp);
 int	soreceive_stream(struct socket *so, struct sockaddr **paddr,
 	    struct uio *uio, struct mbuf **mp0, struct mbuf **controlp,
 	    int *flagsp);
 int	soreceive_dgram(struct socket *so, struct sockaddr **paddr,
 	    struct uio *uio, struct mbuf **mp0, struct mbuf **controlp,
 	    int *flagsp);
 int	soreceive_generic(struct socket *so, struct sockaddr **paddr,
 	    struct uio *uio, struct mbuf **mp0, struct mbuf **controlp,
 	    int *flagsp);
 int	soreserve(struct socket *so, u_long sndcc, u_long rcvcc);
 void	sorflush(struct socket *so);
 int	sosend(struct socket *so, struct sockaddr *addr, struct uio *uio,
 	    struct mbuf *top, struct mbuf *control, int flags,
 	    struct thread *td);
 int	sosend_dgram(struct socket *so, struct sockaddr *addr,
 	    struct uio *uio, struct mbuf *top, struct mbuf *control,
 	    int flags, struct thread *td);
 int	sosend_generic(struct socket *so, struct sockaddr *addr,
 	    struct uio *uio, struct mbuf *top, struct mbuf *control,
 	    int flags, struct thread *td);
 int	soshutdown(struct socket *so, int how);
 void	sotoxsocket(struct socket *so, struct xsocket *xso);
 void	soupcall_clear(struct socket *, int);
 void	soupcall_set(struct socket *, int, so_upcall_t, void *);
+void	solisten_upcall_set(struct socket *, so_upcall_t, void *);
 void	sowakeup(struct socket *so, struct sockbuf *sb);
 void	sowakeup_aio(struct socket *so, struct sockbuf *sb);
+void	solisten_wakeup(struct socket *);
 int	selsocket(struct socket *so, int events, struct timeval *tv,
 	    struct thread *td);
 
 /*
  * Accept filter functions (duh).
  */
 int	accept_filt_add(struct accept_filter *filt);
 int	accept_filt_del(char *name);
 struct	accept_filter *accept_filt_get(char *name);
 #ifdef ACCEPT_FILTER_MOD
 #ifdef SYSCTL_DECL
 SYSCTL_DECL(_net_inet_accf);
 #endif
 int	accept_filt_generic_mod_event(module_t mod, int event, void *data);
 #endif
 
 #endif /* _KERNEL */
 
 #endif /* !_SYS_SOCKETVAR_H_ */
Index: head/usr.bin/netstat/inet.c
===================================================================
--- head/usr.bin/netstat/inet.c	(revision 319721)
+++ head/usr.bin/netstat/inet.c	(revision 319722)
@@ -1,1385 +1,1388 @@
 /*-
  * Copyright (c) 1983, 1988, 1993, 1995
  *	The Regents of the University of California.  All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 3. Neither the name of the University nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  */
 
 #if 0
 #ifndef lint
 static char sccsid[] = "@(#)inet.c	8.5 (Berkeley) 5/24/95";
 #endif /* not lint */
 #endif
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include <sys/param.h>
 #include <sys/queue.h>
 #include <sys/domain.h>
 #include <sys/protosw.h>
 #include <sys/socket.h>
 #include <sys/socketvar.h>
 #include <sys/sysctl.h>
 
 #include <net/route.h>
 #include <net/if_arp.h>
 #include <netinet/in.h>
 #include <netinet/in_systm.h>
 #include <netinet/ip.h>
 #include <netinet/ip_carp.h>
 #ifdef INET6
 #include <netinet/ip6.h>
 #endif /* INET6 */
 #include <netinet/in_pcb.h>
 #include <netinet/ip_icmp.h>
 #include <netinet/icmp_var.h>
 #include <netinet/igmp_var.h>
 #include <netinet/ip_var.h>
 #include <netinet/pim_var.h>
 #include <netinet/tcp.h>
 #include <netinet/tcpip.h>
 #include <netinet/tcp_seq.h>
 #define	TCPSTATES
 #include <netinet/tcp_fsm.h>
 #include <netinet/tcp_timer.h>
 #include <netinet/tcp_var.h>
 #include <netinet/udp.h>
 #include <netinet/udp_var.h>
 
 #include <arpa/inet.h>
 #include <err.h>
 #include <errno.h>
 #include <libutil.h>
 #include <netdb.h>
 #include <stdint.h>
 #include <stdio.h>
 #include <stdlib.h>
 #include <stdbool.h>
 #include <string.h>
 #include <unistd.h>
 #include <libxo/xo.h>
 #include "netstat.h"
 #include "nl_defs.h"
 
 void	inetprint(const char *, struct in_addr *, int, const char *, int,
     const int);
 #ifdef INET6
 static int udp_done, tcp_done, sdp_done;
 #endif /* INET6 */
 
 static int
 pcblist_sysctl(int proto, const char *name, char **bufp)
 {
 	const char *mibvar;
 	char *buf;
 	size_t len;
 
 	switch (proto) {
 	case IPPROTO_TCP:
 		mibvar = "net.inet.tcp.pcblist";
 		break;
 	case IPPROTO_UDP:
 		mibvar = "net.inet.udp.pcblist";
 		break;
 	case IPPROTO_DIVERT:
 		mibvar = "net.inet.divert.pcblist";
 		break;
 	default:
 		mibvar = "net.inet.raw.pcblist";
 		break;
 	}
 	if (strncmp(name, "sdp", 3) == 0)
 		mibvar = "net.inet.sdp.pcblist";
 	len = 0;
 	if (sysctlbyname(mibvar, 0, &len, 0, 0) < 0) {
 		if (errno != ENOENT)
 			xo_warn("sysctl: %s", mibvar);
 		return (0);
 	}
 	if ((buf = malloc(len)) == NULL) {
 		xo_warnx("malloc %lu bytes", (u_long)len);
 		return (0);
 	}
 	if (sysctlbyname(mibvar, buf, &len, 0, 0) < 0) {
 		xo_warn("sysctl: %s", mibvar);
 		free(buf);
 		return (0);
 	}
 	*bufp = buf;
 	return (1);
 }
 
 /*
  * Copied directly from uipc_socket2.c.  We leave out some fields that are in
  * nested structures that aren't used to avoid extra work.
  */
 static void
 sbtoxsockbuf(struct sockbuf *sb, struct xsockbuf *xsb)
 {
 	xsb->sb_cc = sb->sb_ccc;
 	xsb->sb_hiwat = sb->sb_hiwat;
 	xsb->sb_mbcnt = sb->sb_mbcnt;
 	xsb->sb_mcnt = sb->sb_mcnt;
 	xsb->sb_ccnt = sb->sb_ccnt;
 	xsb->sb_mbmax = sb->sb_mbmax;
 	xsb->sb_lowat = sb->sb_lowat;
 	xsb->sb_flags = sb->sb_flags;
 	xsb->sb_timeo = sb->sb_timeo;
 }
 
 int
 sotoxsocket(struct socket *so, struct xsocket *xso)
 {
 	struct protosw proto;
 	struct domain domain;
 
 	bzero(xso, sizeof *xso);
 	xso->xso_len = sizeof *xso;
 	xso->xso_so = so;
 	xso->so_type = so->so_type;
 	xso->so_options = so->so_options;
 	xso->so_linger = so->so_linger;
 	xso->so_state = so->so_state;
 	xso->so_pcb = so->so_pcb;
 	if (kread((uintptr_t)so->so_proto, &proto, sizeof(proto)) != 0)
 		return (-1);
 	xso->xso_protocol = proto.pr_protocol;
 	if (kread((uintptr_t)proto.pr_domain, &domain, sizeof(domain)) != 0)
 		return (-1);
 	xso->xso_family = domain.dom_family;
-	xso->so_qlen = so->so_qlen;
-	xso->so_incqlen = so->so_incqlen;
-	xso->so_qlimit = so->so_qlimit;
 	xso->so_timeo = so->so_timeo;
 	xso->so_error = so->so_error;
-	xso->so_oobmark = so->so_oobmark;
-	sbtoxsockbuf(&so->so_snd, &xso->so_snd);
-	sbtoxsockbuf(&so->so_rcv, &xso->so_rcv);
+	if (SOLISTENING(so)) {
+		xso->so_qlen = so->sol_qlen;
+		xso->so_incqlen = so->sol_incqlen;
+		xso->so_qlimit = so->sol_qlimit;
+	} else {
+		sbtoxsockbuf(&so->so_snd, &xso->so_snd);
+		sbtoxsockbuf(&so->so_rcv, &xso->so_rcv);
+		xso->so_oobmark = so->so_oobmark;
+	}
 	return (0);
 }
 
 /*
  * Print a summary of connections related to an Internet
  * protocol.  For TCP, also give state of connection.
  * Listening processes (aflag) are suppressed unless the
  * -a (all) flag is specified.
  */
 void
 protopr(u_long off, const char *name, int af1, int proto)
 {
 	static int first = 1;
 	int istcp;
 	char *buf;
 	const char *vchar;
 	struct xtcpcb *tp;
 	struct xinpcb *inp;
 	struct xinpgen *xig, *oxig;
 	struct xsocket *so;
 
 	istcp = 0;
 	switch (proto) {
 	case IPPROTO_TCP:
 #ifdef INET6
 		if (strncmp(name, "sdp", 3) != 0) {
 			if (tcp_done != 0)
 				return;
 			else
 				tcp_done = 1;
 		} else {
 			if (sdp_done != 0)
 				return;
 			else
 				sdp_done = 1;
 		}
 #endif
 		istcp = 1;
 		break;
 	case IPPROTO_UDP:
 #ifdef INET6
 		if (udp_done != 0)
 			return;
 		else
 			udp_done = 1;
 #endif
 		break;
 	}
 
 	if (!pcblist_sysctl(proto, name, &buf))
 		return;
 
 	oxig = xig = (struct xinpgen *)buf;
 	for (xig = (struct xinpgen *)((char *)xig + xig->xig_len);
 	    xig->xig_len > sizeof(struct xinpgen);
 	    xig = (struct xinpgen *)((char *)xig + xig->xig_len)) {
 		if (istcp) {
 			tp = (struct xtcpcb *)xig;
 			inp = &tp->xt_inp;
 		} else {
 			inp = (struct xinpcb *)xig;
 		}
 		so = &inp->xi_socket;
 
 		/* Ignore sockets for protocols other than the desired one. */
 		if (so->xso_protocol != proto)
 			continue;
 
 		/* Ignore PCBs which were freed during copyout. */
 		if (inp->inp_gencnt > oxig->xig_gen)
 			continue;
 
 		if ((af1 == AF_INET && (inp->inp_vflag & INP_IPV4) == 0)
 #ifdef INET6
 		    || (af1 == AF_INET6 && (inp->inp_vflag & INP_IPV6) == 0)
 #endif /* INET6 */
 		    || (af1 == AF_UNSPEC && ((inp->inp_vflag & INP_IPV4) == 0
 #ifdef INET6
 					  && (inp->inp_vflag & INP_IPV6) == 0
 #endif /* INET6 */
 			))
 		    )
 			continue;
 		if (!aflag &&
 		    (
 		     (istcp && tp->t_state == TCPS_LISTEN)
 		     || (af1 == AF_INET &&
 		      inet_lnaof(inp->inp_laddr) == INADDR_ANY)
 #ifdef INET6
 		     || (af1 == AF_INET6 &&
 			 IN6_IS_ADDR_UNSPECIFIED(&inp->in6p_laddr))
 #endif /* INET6 */
 		     || (af1 == AF_UNSPEC &&
 			 (((inp->inp_vflag & INP_IPV4) != 0 &&
 			   inet_lnaof(inp->inp_laddr) == INADDR_ANY)
 #ifdef INET6
 			  || ((inp->inp_vflag & INP_IPV6) != 0 &&
 			      IN6_IS_ADDR_UNSPECIFIED(&inp->in6p_laddr))
 #endif
 			  ))
 		     ))
 			continue;
 
 		if (first) {
 			if (!Lflag) {
 				xo_emit("Active Internet connections");
 				if (aflag)
 					xo_emit(" (including servers)");
 			} else
 				xo_emit(
 	"Current listen queue sizes (qlen/incqlen/maxqlen)");
 			xo_emit("\n");
 			if (Aflag)
 				xo_emit("{T:/%-*s} ", 2 * (int)sizeof(void *),
 				    "Tcpcb");
 			if (Lflag)
 				xo_emit((Aflag && !Wflag) ?
 				    "{T:/%-5.5s} {T:/%-32.32s} {T:/%-18.18s}" :
 				    ((!Wflag || af1 == AF_INET) ?
 				    "{T:/%-5.5s} {T:/%-32.32s} {T:/%-22.22s}" :
 				    "{T:/%-5.5s} {T:/%-32.32s} {T:/%-45.45s}"),
 				    "Proto", "Listen", "Local Address");
 			else if (Tflag)
 				xo_emit((Aflag && !Wflag) ?
     "{T:/%-5.5s} {T:/%-6.6s} {T:/%-6.6s} {T:/%-6.6s} {T:/%-18.18s} {T:/%s}" :
 				    ((!Wflag || af1 == AF_INET) ?
     "{T:/%-5.5s} {T:/%-6.6s} {T:/%-6.6s} {T:/%-6.6s} {T:/%-22.22s} {T:/%s}" :
     "{T:/%-5.5s} {T:/%-6.6s} {T:/%-6.6s} {T:/%-6.6s} {T:/%-45.45s} {T:/%s}"),
 				    "Proto", "Rexmit", "OOORcv", "0-win",
 				    "Local Address", "Foreign Address");
 			else {
 				xo_emit((Aflag && !Wflag) ?
     "{T:/%-5.5s} {T:/%-6.6s} {T:/%-6.6s} {T:/%-18.18s} {T:/%-18.18s}" :
 				    ((!Wflag || af1 == AF_INET) ?
     "{T:/%-5.5s} {T:/%-6.6s} {T:/%-6.6s} {T:/%-22.22s} {T:/%-22.22s}" :
     "{T:/%-5.5s} {T:/%-6.6s} {T:/%-6.6s} {T:/%-45.45s} {T:/%-45.45s}"),
 				    "Proto", "Recv-Q", "Send-Q",
 				    "Local Address", "Foreign Address");
 				if (!xflag && !Rflag)
 					xo_emit(" (state)");
 			}
 			if (xflag) {
 				xo_emit(" {T:/%-6.6s} {T:/%-6.6s} {T:/%-6.6s} "
 				    "{T:/%-6.6s} {T:/%-6.6s} {T:/%-6.6s} "
 				    "{T:/%-6.6s} {T:/%-6.6s} {T:/%-6.6s} "
 				    "{T:/%-6.6s} {T:/%-6.6s} {T:/%-6.6s}",
 				    "R-MBUF", "S-MBUF", "R-CLUS", "S-CLUS",
 				    "R-HIWA", "S-HIWA", "R-LOWA", "S-LOWA",
 				    "R-BCNT", "S-BCNT", "R-BMAX", "S-BMAX");
 				xo_emit(" {T:/%7.7s} {T:/%7.7s} {T:/%7.7s} "
 				    "{T:/%7.7s} {T:/%7.7s} {T:/%7.7s}",
 				    "rexmt", "persist", "keep", "2msl",
 				    "delack", "rcvtime");
 			} else if (Rflag) {
 				xo_emit("  {T:/%8.8s} {T:/%5.5s}",
 				    "flowid", "ftype");
 			}
 			xo_emit("\n");
 			first = 0;
 		}
 		if (Lflag && so->so_qlimit == 0)
 			continue;
 		xo_open_instance("socket");
 		if (Aflag) {
 			if (istcp)
 				xo_emit("{q:address/%*lx} ",
 				    2 * (int)sizeof(void *),
 				    (u_long)inp->inp_ppcb);
 			else
 				xo_emit("{q:address/%*lx} ",
 				    2 * (int)sizeof(void *),
 				    (u_long)so->so_pcb);
 		}
 #ifdef INET6
 		if ((inp->inp_vflag & INP_IPV6) != 0)
 			vchar = ((inp->inp_vflag & INP_IPV4) != 0) ?
 			    "46" : "6";
 		else
 #endif
 		vchar = ((inp->inp_vflag & INP_IPV4) != 0) ?
 		    "4" : "";
 		if (istcp && (tp->t_flags & TF_TOE) != 0)
 			xo_emit("{:protocol/%-3.3s%-2.2s/%s%s} ", "toe", vchar);
 		else
 			xo_emit("{:protocol/%-3.3s%-2.2s/%s%s} ", name, vchar);
 		if (Lflag) {
 			char buf1[33];
 
 			snprintf(buf1, sizeof buf1, "%u/%u/%u", so->so_qlen,
 			    so->so_incqlen, so->so_qlimit);
 			xo_emit("{:listen-queue-sizes/%-32.32s} ", buf1);
 		} else if (Tflag) {
 			if (istcp)
 				xo_emit("{:sent-retransmit-packets/%6u} "
 				    "{:received-out-of-order-packets/%6u} "
 				    "{:sent-zero-window/%6u} ",
 				    tp->t_sndrexmitpack, tp->t_rcvoopack,
 				    tp->t_sndzerowin);
 			else
 				xo_emit("{P:/%21s}", "");
 		} else {
 			xo_emit("{:receive-bytes-waiting/%6u} "
 			    "{:send-bytes-waiting/%6u} ",
 			    so->so_rcv.sb_cc, so->so_snd.sb_cc);
 		}
 		if (numeric_port) {
 			if (inp->inp_vflag & INP_IPV4) {
 				inetprint("local", &inp->inp_laddr,
 				    (int)inp->inp_lport, name, 1, af1);
 				if (!Lflag)
 					inetprint("remote", &inp->inp_faddr,
 					    (int)inp->inp_fport, name, 1, af1);
 			}
 #ifdef INET6
 			else if (inp->inp_vflag & INP_IPV6) {
 				inet6print("local", &inp->in6p_laddr,
 				    (int)inp->inp_lport, name, 1);
 				if (!Lflag)
 					inet6print("remote", &inp->in6p_faddr,
 					    (int)inp->inp_fport, name, 1);
 			} /* else nothing printed now */
 #endif /* INET6 */
 		} else if (inp->inp_flags & INP_ANONPORT) {
 			if (inp->inp_vflag & INP_IPV4) {
 				inetprint("local", &inp->inp_laddr,
 				    (int)inp->inp_lport, name, 1, af1);
 				if (!Lflag)
 					inetprint("remote", &inp->inp_faddr,
 					    (int)inp->inp_fport, name, 0, af1);
 			}
 #ifdef INET6
 			else if (inp->inp_vflag & INP_IPV6) {
 				inet6print("local", &inp->in6p_laddr,
 				    (int)inp->inp_lport, name, 1);
 				if (!Lflag)
 					inet6print("remote", &inp->in6p_faddr,
 					    (int)inp->inp_fport, name, 0);
 			} /* else nothing printed now */
 #endif /* INET6 */
 		} else {
 			if (inp->inp_vflag & INP_IPV4) {
 				inetprint("local", &inp->inp_laddr,
 				    (int)inp->inp_lport, name, 0, af1);
 				if (!Lflag)
 					inetprint("remote", &inp->inp_faddr,
 					    (int)inp->inp_fport, name,
 					    inp->inp_lport != inp->inp_fport,
 					    af1);
 			}
 #ifdef INET6
 			else if (inp->inp_vflag & INP_IPV6) {
 				inet6print("local", &inp->in6p_laddr,
 				    (int)inp->inp_lport, name, 0);
 				if (!Lflag)
 					inet6print("remote", &inp->in6p_faddr,
 					    (int)inp->inp_fport, name,
 					    inp->inp_lport != inp->inp_fport);
 			} /* else nothing printed now */
 #endif /* INET6 */
 		}
 		if (xflag) {
 			xo_emit("{:receive-mbufs/%6u} {:send-mbufs/%6u} "
 			    "{:receive-clusters/%6u} {:send-clusters/%6u} "
 			    "{:receive-high-water/%6u} {:send-high-water/%6u} "
 			    "{:receive-low-water/%6u} {:send-low-water/%6u} "
 			    "{:receive-mbuf-bytes/%6u} {:send-mbuf-bytes/%6u} "
 			    "{:receive-mbuf-bytes-max/%6u} "
 			    "{:send-mbuf-bytes-max/%6u}",
 			    so->so_rcv.sb_mcnt, so->so_snd.sb_mcnt,
 			    so->so_rcv.sb_ccnt, so->so_snd.sb_ccnt,
 			    so->so_rcv.sb_hiwat, so->so_snd.sb_hiwat,
 			    so->so_rcv.sb_lowat, so->so_snd.sb_lowat,
 			    so->so_rcv.sb_mbcnt, so->so_snd.sb_mbcnt,
 			    so->so_rcv.sb_mbmax, so->so_snd.sb_mbmax);
 			if (istcp)
 				xo_emit(" {:retransmit-timer/%4d.%02d} "
 				    "{:persist-timer/%4d.%02d} "
 				    "{:keepalive-timer/%4d.%02d} "
 				    "{:msl2-timer/%4d.%02d} "
 				    "{:delay-ack-timer/%4d.%02d} "
 				    "{:inactivity-timer/%4d.%02d}",
 				    tp->tt_rexmt / 1000,
 				    (tp->tt_rexmt % 1000) / 10,
 				    tp->tt_persist / 1000,
 				    (tp->tt_persist % 1000) / 10,
 				    tp->tt_keep / 1000,
 				    (tp->tt_keep % 1000) / 10,
 				    tp->tt_2msl / 1000,
 				    (tp->tt_2msl % 1000) / 10,
 				    tp->tt_delack / 1000,
 				    (tp->tt_delack % 1000) / 10,
 				    tp->t_rcvtime / 1000,
 				    (tp->t_rcvtime % 1000) / 10);
 		}
 		if (istcp && !Lflag && !xflag && !Tflag && !Rflag) {
 			if (tp->t_state < 0 || tp->t_state >= TCP_NSTATES)
 				xo_emit("{:tcp-state/%d}", tp->t_state);
 			else {
 				xo_emit("{:tcp-state/%s}",
 				    tcpstates[tp->t_state]);
 #if defined(TF_NEEDSYN) && defined(TF_NEEDFIN)
 				/* Show T/TCP `hidden state' */
 				if (tp->t_flags & (TF_NEEDSYN|TF_NEEDFIN))
 					xo_emit("{:need-syn-or-fin/*}");
 #endif /* defined(TF_NEEDSYN) && defined(TF_NEEDFIN) */
 			}
 		}
 		if (Rflag) {
 			/* XXX: is this right Alfred */
 			xo_emit(" {:flow-id/%08x} {:flow-type/%5d}",
 			    inp->inp_flowid,
 			    inp->inp_flowtype);
 		}
 		xo_emit("\n");
 		xo_close_instance("socket");
 	}
 	if (xig != oxig && xig->xig_gen != oxig->xig_gen) {
 		if (oxig->xig_count > xig->xig_count) {
 			xo_emit("Some {d:lost/%s} sockets may have been "
 			    "deleted.\n", name);
 		} else if (oxig->xig_count < xig->xig_count) {
 			xo_emit("Some {d:created/%s} sockets may have been "
 			    "created.\n", name);
 		} else {
 			xo_emit("Some {d:changed/%s} sockets may have been "
 			    "created or deleted.\n", name);
 		}
 	}
 	free(buf);
 }
 
 /*
  * Dump TCP statistics structure.
  */
 void
 tcp_stats(u_long off, const char *name, int af1 __unused, int proto __unused)
 {
 	struct tcpstat tcpstat;
 	uint64_t tcps_states[TCP_NSTATES];
 
 #ifdef INET6
 	if (tcp_done != 0)
 		return;
 	else
 		tcp_done = 1;
 #endif
 
 	if (fetch_stats("net.inet.tcp.stats", off, &tcpstat,
 	    sizeof(tcpstat), kread_counters) != 0)
 		return;
 
 	if (fetch_stats_ro("net.inet.tcp.states", nl[N_TCPS_STATES].n_value,
 	    &tcps_states, sizeof(tcps_states), kread_counters) != 0)
 		return;
 
 	xo_open_container("tcp");
 	xo_emit("{T:/%s}:\n", name);
 
 #define	p(f, m) if (tcpstat.f || sflag <= 1)				\
 	xo_emit(m, (uintmax_t )tcpstat.f, plural(tcpstat.f))
 #define	p1a(f, m) if (tcpstat.f || sflag <= 1)				\
 	xo_emit(m, (uintmax_t )tcpstat.f)
 #define	p2(f1, f2, m) if (tcpstat.f1 || tcpstat.f2 || sflag <= 1)	\
 	xo_emit(m, (uintmax_t )tcpstat.f1, plural(tcpstat.f1),		\
 	    (uintmax_t )tcpstat.f2, plural(tcpstat.f2))
 #define	p2a(f1, f2, m) if (tcpstat.f1 || tcpstat.f2 || sflag <= 1)	\
 	xo_emit(m, (uintmax_t )tcpstat.f1, plural(tcpstat.f1),		\
 	    (uintmax_t )tcpstat.f2)
 #define	p3(f, m) if (tcpstat.f || sflag <= 1)				\
 	xo_emit(m, (uintmax_t )tcpstat.f, pluralies(tcpstat.f))
 
 	p(tcps_sndtotal, "\t{:sent-packets/%ju} {N:/packet%s sent}\n");
 	p2(tcps_sndpack,tcps_sndbyte, "\t\t{:sent-data-packets/%ju} "
 	    "{N:/data packet%s} ({:sent-data-bytes/%ju} {N:/byte%s})\n");
 	p2(tcps_sndrexmitpack, tcps_sndrexmitbyte, "\t\t"
 	    "{:sent-retransmitted-packets/%ju} {N:/data packet%s} "
 	    "({:sent-retransmitted-bytes/%ju} {N:/byte%s}) "
 	    "{N:retransmitted}\n");
 	p(tcps_sndrexmitbad, "\t\t"
 	    "{:sent-unnecessary-retransmitted-packets/%ju} "
 	    "{N:/data packet%s unnecessarily retransmitted}\n");
 	p(tcps_mturesent, "\t\t{:sent-resends-by-mtu-discovery/%ju} "
 	    "{N:/resend%s initiated by MTU discovery}\n");
 	p2a(tcps_sndacks, tcps_delack, "\t\t{:sent-ack-only-packets/%ju} "
 	    "{N:/ack-only packet%s/} ({:sent-packets-delayed/%ju} "
 	    "{N:delayed})\n");
 	p(tcps_sndurg, "\t\t{:sent-urg-only-packets/%ju} "
 	    "{N:/URG only packet%s}\n");
 	p(tcps_sndprobe, "\t\t{:sent-window-probe-packets/%ju} "
 	    "{N:/window probe packet%s}\n");
 	p(tcps_sndwinup, "\t\t{:sent-window-update-packets/%ju} "
 	    "{N:/window update packet%s}\n");
 	p(tcps_sndctrl, "\t\t{:sent-control-packets/%ju} "
 	    "{N:/control packet%s}\n");
 	p(tcps_rcvtotal, "\t{:received-packets/%ju} "
 	    "{N:/packet%s received}\n");
 	p2(tcps_rcvackpack, tcps_rcvackbyte, "\t\t"
 	    "{:received-ack-packets/%ju} {N:/ack%s} "
 	    "{N:(for} {:received-ack-bytes/%ju} {N:/byte%s})\n");
 	p(tcps_rcvdupack, "\t\t{:received-duplicate-acks/%ju} "
 	    "{N:/duplicate ack%s}\n");
 	p(tcps_rcvacktoomuch, "\t\t{:received-acks-for-unsent-data/%ju} "
 	    "{N:/ack%s for unsent data}\n");
 	p2(tcps_rcvpack, tcps_rcvbyte, "\t\t"
 	    "{:received-in-sequence-packets/%ju} {N:/packet%s} "
 	    "({:received-in-sequence-bytes/%ju} {N:/byte%s}) "
 	    "{N:received in-sequence}\n");
 	p2(tcps_rcvduppack, tcps_rcvdupbyte, "\t\t"
 	    "{:received-completely-duplicate-packets/%ju} "
 	    "{N:/completely duplicate packet%s} "
 	    "({:received-completely-duplicate-bytes/%ju} {N:/byte%s})\n");
 	p(tcps_pawsdrop, "\t\t{:received-old-duplicate-packets/%ju} "
 	    "{N:/old duplicate packet%s}\n");
 	p2(tcps_rcvpartduppack, tcps_rcvpartdupbyte, "\t\t"
 	    "{:received-some-duplicate-packets/%ju} "
 	    "{N:/packet%s with some dup. data} "
 	    "({:received-some-duplicate-bytes/%ju} {N:/byte%s duped/})\n");
 	p2(tcps_rcvoopack, tcps_rcvoobyte, "\t\t{:received-out-of-order/%ju} "
 	    "{N:/out-of-order packet%s} "
 	    "({:received-out-of-order-bytes/%ju} {N:/byte%s})\n");
 	p2(tcps_rcvpackafterwin, tcps_rcvbyteafterwin, "\t\t"
 	    "{:received-after-window-packets/%ju} {N:/packet%s} "
 	    "({:received-after-window-bytes/%ju} {N:/byte%s}) "
 	    "{N:of data after window}\n");
 	p(tcps_rcvwinprobe, "\t\t{:received-window-probes/%ju} "
 	    "{N:/window probe%s}\n");
 	p(tcps_rcvwinupd, "\t\t{:receive-window-update-packets/%ju} "
 	    "{N:/window update packet%s}\n");
 	p(tcps_rcvafterclose, "\t\t{:received-after-close-packets/%ju} "
 	    "{N:/packet%s received after close}\n");
 	p(tcps_rcvbadsum, "\t\t{:discard-bad-checksum/%ju} "
 	    "{N:/discarded for bad checksum%s}\n");
 	p(tcps_rcvbadoff, "\t\t{:discard-bad-header-offset/%ju} "
 	    "{N:/discarded for bad header offset field%s}\n");
 	p1a(tcps_rcvshort, "\t\t{:discard-too-short/%ju} "
 	    "{N:discarded because packet too short}\n");
 	p1a(tcps_rcvmemdrop, "\t\t{:discard-memory-problems/%ju} "
 	    "{N:discarded due to memory problems}\n");
 	p(tcps_connattempt, "\t{:connection-requests/%ju} "
 	    "{N:/connection request%s}\n");
 	p(tcps_accepts, "\t{:connections-accepts/%ju} "
 	    "{N:/connection accept%s}\n");
 	p(tcps_badsyn, "\t{:bad-connection-attempts/%ju} "
 	    "{N:/bad connection attempt%s}\n");
 	p(tcps_listendrop, "\t{:listen-queue-overflows/%ju} "
 	    "{N:/listen queue overflow%s}\n");
 	p(tcps_badrst, "\t{:ignored-in-window-resets/%ju} "
 	    "{N:/ignored RSTs in the window%s}\n");
 	p(tcps_connects, "\t{:connections-established/%ju} "
 	    "{N:/connection%s established (including accepts)}\n");
 	p(tcps_usedrtt, "\t\t{:connections-hostcache-rtt/%ju} "
 	    "{N:/time%s used RTT from hostcache}\n");
 	p(tcps_usedrttvar, "\t\t{:connections-hostcache-rttvar/%ju} "
 	    "{N:/time%s used RTT variance from hostcache}\n");
 	p(tcps_usedssthresh, "\t\t{:connections-hostcache-ssthresh/%ju} "
 	    "{N:/time%s used slow-start threshold from hostcache}\n");
 	p2(tcps_closed, tcps_drops, "\t{:connections-closed/%ju} "
 	    "{N:/connection%s closed (including} "
 	    "{:connection-drops/%ju} {N:/drop%s})\n");
 	p(tcps_cachedrtt, "\t\t{:connections-updated-rtt-on-close/%ju} "
 	    "{N:/connection%s updated cached RTT on close}\n");
 	p(tcps_cachedrttvar, "\t\t"
 	    "{:connections-updated-variance-on-close/%ju} "
 	    "{N:/connection%s updated cached RTT variance on close}\n");
 	p(tcps_cachedssthresh, "\t\t"
 	    "{:connections-updated-ssthresh-on-close/%ju} "
 	    "{N:/connection%s updated cached ssthresh on close}\n");
 	p(tcps_conndrops, "\t{:embryonic-connections-dropped/%ju} "
 	    "{N:/embryonic connection%s dropped}\n");
 	p2(tcps_rttupdated, tcps_segstimed, "\t{:segments-updated-rtt/%ju} "
 	    "{N:/segment%s updated rtt (of} "
 	    "{:segment-update-attempts/%ju} {N:/attempt%s})\n");
 	p(tcps_rexmttimeo, "\t{:retransmit-timeouts/%ju} "
 	    "{N:/retransmit timeout%s}\n");
 	p(tcps_timeoutdrop, "\t\t"
 	    "{:connections-dropped-by-retransmit-timeout/%ju} "
 	    "{N:/connection%s dropped by rexmit timeout}\n");
 	p(tcps_persisttimeo, "\t{:persist-timeout/%ju} "
 	    "{N:/persist timeout%s}\n");
 	p(tcps_persistdrop, "\t\t"
 	    "{:connections-dropped-by-persist-timeout/%ju} "
 	    "{N:/connection%s dropped by persist timeout}\n");
 	p(tcps_finwait2_drops, "\t"
 	    "{:connections-dropped-by-finwait2-timeout/%ju} "
 	    "{N:/Connection%s (fin_wait_2) dropped because of timeout}\n");
 	p(tcps_keeptimeo, "\t{:keepalive-timeout/%ju} "
 	    "{N:/keepalive timeout%s}\n");
 	p(tcps_keepprobe, "\t\t{:keepalive-probes/%ju} "
 	    "{N:/keepalive probe%s sent}\n");
 	p(tcps_keepdrops, "\t\t{:connections-dropped-by-keepalives/%ju} "
 	    "{N:/connection%s dropped by keepalive}\n");
 	p(tcps_predack, "\t{:ack-header-predictions/%ju} "
 	    "{N:/correct ACK header prediction%s}\n");
 	p(tcps_preddat, "\t{:data-packet-header-predictions/%ju} "
 	    "{N:/correct data packet header prediction%s}\n");
 
 	xo_open_container("syncache");
 
 	p3(tcps_sc_added, "\t{:entries-added/%ju} "
 	    "{N:/syncache entr%s added}\n");
 	p1a(tcps_sc_retransmitted, "\t\t{:retransmitted/%ju} "
 	    "{N:/retransmitted}\n");
 	p1a(tcps_sc_dupsyn, "\t\t{:duplicates/%ju} {N:/dupsyn}\n");
 	p1a(tcps_sc_dropped, "\t\t{:dropped/%ju} {N:/dropped}\n");
 	p1a(tcps_sc_completed, "\t\t{:completed/%ju} {N:/completed}\n");
 	p1a(tcps_sc_bucketoverflow, "\t\t{:bucket-overflow/%ju} "
 	    "{N:/bucket overflow}\n");
 	p1a(tcps_sc_cacheoverflow, "\t\t{:cache-overflow/%ju} "
 	    "{N:/cache overflow}\n");
 	p1a(tcps_sc_reset, "\t\t{:reset/%ju} {N:/reset}\n");
 	p1a(tcps_sc_stale, "\t\t{:stale/%ju} {N:/stale}\n");
 	p1a(tcps_sc_aborted, "\t\t{:aborted/%ju} {N:/aborted}\n");
 	p1a(tcps_sc_badack, "\t\t{:bad-ack/%ju} {N:/badack}\n");
 	p1a(tcps_sc_unreach, "\t\t{:unreachable/%ju} {N:/unreach}\n");
 	p(tcps_sc_zonefail, "\t\t{:zone-failures/%ju} {N:/zone failure%s}\n");
 	p(tcps_sc_sendcookie, "\t{:sent-cookies/%ju} {N:/cookie%s sent}\n");
 	p(tcps_sc_recvcookie, "\t{:receivd-cookies/%ju} "
 	    "{N:/cookie%s received}\n");
 
 	xo_close_container("syncache");
 
 	xo_open_container("hostcache");
 
 	p3(tcps_hc_added, "\t{:entries-added/%ju} "
 	    "{N:/hostcache entr%s added}\n");
 	p1a(tcps_hc_bucketoverflow, "\t\t{:buffer-overflows/%ju} "
 	    "{N:/bucket overflow}\n");
 
 	xo_close_container("hostcache");
 
 	xo_open_container("sack");
 
 	p(tcps_sack_recovery_episode, "\t{:recovery-episodes/%ju} "
 	    "{N:/SACK recovery episode%s}\n");
  	p(tcps_sack_rexmits, "\t{:segment-retransmits/%ju} "
 	    "{N:/segment rexmit%s in SACK recovery episodes}\n");
  	p(tcps_sack_rexmit_bytes, "\t{:byte-retransmits/%ju} "
 	    "{N:/byte rexmit%s in SACK recovery episodes}\n");
  	p(tcps_sack_rcv_blocks, "\t{:received-blocks/%ju} "
 	    "{N:/SACK option%s (SACK blocks) received}\n");
 	p(tcps_sack_send_blocks, "\t{:sent-option-blocks/%ju} "
 	    "{N:/SACK option%s (SACK blocks) sent}\n");
 	p1a(tcps_sack_sboverflow, "\t{:scoreboard-overflows/%ju} "
 	    "{N:/SACK scoreboard overflow}\n");
 
 	xo_close_container("sack");
 	xo_open_container("ecn");
 
 	p(tcps_ecn_ce, "\t{:ce-packets/%ju} "
 	    "{N:/packet%s with ECN CE bit set}\n");
 	p(tcps_ecn_ect0, "\t{:ect0-packets/%ju} "
 	    "{N:/packet%s with ECN ECT(0) bit set}\n");
 	p(tcps_ecn_ect1, "\t{:ect1-packets/%ju} "
 	    "{N:/packet%s with ECN ECT(1) bit set}\n");
 	p(tcps_ecn_shs, "\t{:handshakes/%ju} "
 	    "{N:/successful ECN handshake%s}\n");
 	p(tcps_ecn_rcwnd, "\t{:congestion-reductions/%ju} "
 	    "{N:/time%s ECN reduced the congestion window}\n");
 
 	xo_close_container("ecn");
 	xo_open_container("tcp-signature");
 	p(tcps_sig_rcvgoodsig, "\t{:received-good-signature/%ju} "
 	    "{N:/packet%s with matching signature received}\n");
 	p(tcps_sig_rcvbadsig, "\t{:received-bad-signature/%ju} "
 	    "{N:/packet%s with bad signature received}\n");
 	p(tcps_sig_err_buildsig, "\t{:failed-make-signature/%ju} "
 	    "{N:/time%s failed to make signature due to no SA}\n");
 	p(tcps_sig_err_sigopt, "\t{:no-signature-expected/%ju} "
 	    "{N:/time%s unexpected signature received}\n");
 	p(tcps_sig_err_nosigopt, "\t{:no-signature-provided/%ju} "
 	    "{N:/time%s no signature provided by segment}\n");
  #undef p
  #undef p1a
  #undef p2
  #undef p2a
  #undef p3
 	xo_close_container("tcp-signature");
 
 	xo_open_container("TCP connection count by state");
 	xo_emit("{T:/TCP connection count by state}:\n");
 	for (int i = 0; i < TCP_NSTATES; i++) {
 		/*
 		 * XXXGL: is there a way in libxo to use %s
 		 * in the "content string" of a format
 		 * string? I failed to do that, that's why
 		 * a temporary buffer is used to construct
 		 * format string for xo_emit().
 		 */
 		char fmtbuf[80];
 
 		if (sflag > 1 && tcps_states[i] == 0)
 			continue;
 		snprintf(fmtbuf, sizeof(fmtbuf), "\t{:%s/%%ju} "
                     "{Np:/connection ,connections} in %s state\n",
 		    tcpstates[i], tcpstates[i]);
 		xo_emit(fmtbuf, (uintmax_t )tcps_states[i]);
 	}
 	xo_close_container("TCP connection count by state");
 
 	xo_close_container("tcp");
 }
 
 /*
  * Dump UDP statistics structure.
  */
 void
 udp_stats(u_long off, const char *name, int af1 __unused, int proto __unused)
 {
 	struct udpstat udpstat;
 	uint64_t delivered;
 
 #ifdef INET6
 	if (udp_done != 0)
 		return;
 	else
 		udp_done = 1;
 #endif
 
 	if (fetch_stats("net.inet.udp.stats", off, &udpstat,
 	    sizeof(udpstat), kread_counters) != 0)
 		return;
 
 	xo_open_container("udp");
 	xo_emit("{T:/%s}:\n", name);
 
 #define	p(f, m) if (udpstat.f || sflag <= 1) \
 	xo_emit("\t" m, (uintmax_t)udpstat.f, plural(udpstat.f))
 #define	p1a(f, m) if (udpstat.f || sflag <= 1) \
 	xo_emit("\t" m, (uintmax_t)udpstat.f)
 
 	p(udps_ipackets, "{:received-datagrams/%ju} "
 	    "{N:/datagram%s received}\n");
 	p1a(udps_hdrops, "{:dropped-incomplete-headers/%ju} "
 	    "{N:/with incomplete header}\n");
 	p1a(udps_badlen, "{:dropped-bad-data-length/%ju} "
 	    "{N:/with bad data length field}\n");
 	p1a(udps_badsum, "{:dropped-bad-checksum/%ju} "
 	    "{N:/with bad checksum}\n");
 	p1a(udps_nosum, "{:dropped-no-checksum/%ju} "
 	    "{N:/with no checksum}\n");
 	p1a(udps_noport, "{:dropped-no-socket/%ju} "
 	    "{N:/dropped due to no socket}\n");
 	p(udps_noportbcast, "{:dropped-broadcast-multicast/%ju} "
 	    "{N:/broadcast\\/multicast datagram%s undelivered}\n");
 	p1a(udps_fullsock, "{:dropped-full-socket-buffer/%ju} "
 	    "{N:/dropped due to full socket buffers}\n");
 	p1a(udpps_pcbhashmiss, "{:not-for-hashed-pcb/%ju} "
 	    "{N:/not for hashed pcb}\n");
 	delivered = udpstat.udps_ipackets -
 		    udpstat.udps_hdrops -
 		    udpstat.udps_badlen -
 		    udpstat.udps_badsum -
 		    udpstat.udps_noport -
 		    udpstat.udps_noportbcast -
 		    udpstat.udps_fullsock;
 	if (delivered || sflag <= 1)
 		xo_emit("\t{:delivered-packets/%ju} {N:/delivered}\n",
 		    (uint64_t)delivered);
 	p(udps_opackets, "{:output-packets/%ju} {N:/datagram%s output}\n");
 	/* the next statistic is cumulative in udps_noportbcast */
 	p(udps_filtermcast, "{:multicast-source-filter-matches/%ju} "
 	    "{N:/time%s multicast source filter matched}\n");
 #undef p
 #undef p1a
 	xo_close_container("udp");
 }
 
 /*
  * Dump CARP statistics structure.
  */
 void
 carp_stats(u_long off, const char *name, int af1 __unused, int proto __unused)
 {
 	struct carpstats carpstat;
 
 	if (fetch_stats("net.inet.carp.stats", off, &carpstat,
 	    sizeof(carpstat), kread_counters) != 0)
 		return;
 
 	xo_open_container(name);
 	xo_emit("{T:/%s}:\n", name);
 
 #define	p(f, m) if (carpstat.f || sflag <= 1) \
 	xo_emit(m, (uintmax_t)carpstat.f, plural(carpstat.f))
 #define	p2(f, m) if (carpstat.f || sflag <= 1) \
 	xo_emit(m, (uintmax_t)carpstat.f)
 
 	p(carps_ipackets, "\t{:received-inet-packets/%ju} "
 	    "{N:/packet%s received (IPv4)}\n");
 	p(carps_ipackets6, "\t{:received-inet6-packets/%ju} "
 	    "{N:/packet%s received (IPv6)}\n");
 	p(carps_badttl, "\t\t{:dropped-wrong-ttl/%ju} "
 	    "{N:/packet%s discarded for wrong TTL}\n");
 	p(carps_hdrops, "\t\t{:dropped-short-header/%ju} "
 	    "{N:/packet%s shorter than header}\n");
 	p(carps_badsum, "\t\t{:dropped-bad-checksum/%ju} "
 	    "{N:/discarded for bad checksum%s}\n");
 	p(carps_badver,	"\t\t{:dropped-bad-version/%ju} "
 	    "{N:/discarded packet%s with a bad version}\n");
 	p2(carps_badlen, "\t\t{:dropped-short-packet/%ju} "
 	    "{N:/discarded because packet too short}\n");
 	p2(carps_badauth, "\t\t{:dropped-bad-authentication/%ju} "
 	    "{N:/discarded for bad authentication}\n");
 	p2(carps_badvhid, "\t\t{:dropped-bad-vhid/%ju} "
 	    "{N:/discarded for bad vhid}\n");
 	p2(carps_badaddrs, "\t\t{:dropped-bad-address-list/%ju} "
 	    "{N:/discarded because of a bad address list}\n");
 	p(carps_opackets, "\t{:sent-inet-packets/%ju} "
 	    "{N:/packet%s sent (IPv4)}\n");
 	p(carps_opackets6, "\t{:sent-inet6-packets/%ju} "
 	    "{N:/packet%s sent (IPv6)}\n");
 	p2(carps_onomem, "\t\t{:send-failed-memory-error/%ju} "
 	    "{N:/send failed due to mbuf memory error}\n");
 #if notyet
 	p(carps_ostates, "\t\t{:send-state-updates/%s} "
 	    "{N:/state update%s sent}\n");
 #endif
 #undef p
 #undef p2
 	xo_close_container(name);
 }
 
 /*
  * Dump IP statistics structure.
  */
 void
 ip_stats(u_long off, const char *name, int af1 __unused, int proto __unused)
 {
 	struct ipstat ipstat;
 
 	if (fetch_stats("net.inet.ip.stats", off, &ipstat,
 	    sizeof(ipstat), kread_counters) != 0)
 		return;
 
 	xo_open_container(name);
 	xo_emit("{T:/%s}:\n", name);
 
 #define	p(f, m) if (ipstat.f || sflag <= 1) \
 	xo_emit(m, (uintmax_t )ipstat.f, plural(ipstat.f))
 #define	p1a(f, m) if (ipstat.f || sflag <= 1) \
 	xo_emit(m, (uintmax_t )ipstat.f)
 
 	p(ips_total, "\t{:received-packets/%ju} "
 	    "{N:/total packet%s received}\n");
 	p(ips_badsum, "\t{:dropped-bad-checksum/%ju} "
 	    "{N:/bad header checksum%s}\n");
 	p1a(ips_toosmall, "\t{:dropped-below-minimum-size/%ju} "
 	    "{N:/with size smaller than minimum}\n");
 	p1a(ips_tooshort, "\t{:dropped-short-packets/%ju} "
 	    "{N:/with data size < data length}\n");
 	p1a(ips_toolong, "\t{:dropped-too-long/%ju} "
 	    "{N:/with ip length > max ip packet size}\n");
 	p1a(ips_badhlen, "\t{:dropped-short-header-length/%ju} "
 	    "{N:/with header length < data size}\n");
 	p1a(ips_badlen, "\t{:dropped-short-data/%ju} "
 	    "{N:/with data length < header length}\n");
 	p1a(ips_badoptions, "\t{:dropped-bad-options/%ju} "
 	    "{N:/with bad options}\n");
 	p1a(ips_badvers, "\t{:dropped-bad-version/%ju} "
 	    "{N:/with incorrect version number}\n");
 	p(ips_fragments, "\t{:received-fragments/%ju} "
 	    "{N:/fragment%s received}\n");
 	p(ips_fragdropped, "\t{:dropped-fragments/%ju} "
 	    "{N:/fragment%s dropped (dup or out of space)}\n");
 	p(ips_fragtimeout, "\t{:dropped-fragments-after-timeout/%ju} "
 	    "{N:/fragment%s dropped after timeout}\n");
 	p(ips_reassembled, "\t{:reassembled-packets/%ju} "
 	    "{N:/packet%s reassembled ok}\n");
 	p(ips_delivered, "\t{:received-local-packets/%ju} "
 	    "{N:/packet%s for this host}\n");
 	p(ips_noproto, "\t{:dropped-unknown-protocol/%ju} "
 	    "{N:/packet%s for unknown\\/unsupported protocol}\n");
 	p(ips_forward, "\t{:forwarded-packets/%ju} "
 	    "{N:/packet%s forwarded}");
 	p(ips_fastforward, " ({:fast-forwarded-packets/%ju} "
 	    "{N:/packet%s fast forwarded})");
 	if (ipstat.ips_forward || sflag <= 1)
 		xo_emit("\n");
 	p(ips_cantforward, "\t{:packets-cannot-forward/%ju} "
 	    "{N:/packet%s not forwardable}\n");
 	p(ips_notmember, "\t{:received-unknown-multicast-group/%ju} "
 	    "{N:/packet%s received for unknown multicast group}\n");
 	p(ips_redirectsent, "\t{:redirects-sent/%ju} "
 	    "{N:/redirect%s sent}\n");
 	p(ips_localout, "\t{:sent-packets/%ju} "
 	    "{N:/packet%s sent from this host}\n");
 	p(ips_rawout, "\t{:send-packets-fabricated-header/%ju} "
 	    "{N:/packet%s sent with fabricated ip header}\n");
 	p(ips_odropped, "\t{:discard-no-mbufs/%ju} "
 	    "{N:/output packet%s dropped due to no bufs, etc.}\n");
 	p(ips_noroute, "\t{:discard-no-route/%ju} "
 	    "{N:/output packet%s discarded due to no route}\n");
 	p(ips_fragmented, "\t{:sent-fragments/%ju} "
 	    "{N:/output datagram%s fragmented}\n");
 	p(ips_ofragments, "\t{:fragments-created/%ju} "
 	    "{N:/fragment%s created}\n");
 	p(ips_cantfrag, "\t{:discard-cannot-fragment/%ju} "
 	    "{N:/datagram%s that can't be fragmented}\n");
 	p(ips_nogif, "\t{:discard-tunnel-no-gif/%ju} "
 	    "{N:/tunneling packet%s that can't find gif}\n");
 	p(ips_badaddr, "\t{:discard-bad-address/%ju} "
 	    "{N:/datagram%s with bad address in header}\n");
 #undef p
 #undef p1a
 	xo_close_container(name);
 }
 
 /*
  * Dump ARP statistics structure.
  */
 void
 arp_stats(u_long off, const char *name, int af1 __unused, int proto __unused)
 {
 	struct arpstat arpstat;
 
 	if (fetch_stats("net.link.ether.arp.stats", off, &arpstat,
 	    sizeof(arpstat), kread_counters) != 0)
 		return;
 
 	xo_open_container(name);
 	xo_emit("{T:/%s}:\n", name);
 
 #define	p(f, m) if (arpstat.f || sflag <= 1) \
 	xo_emit("\t" m, (uintmax_t)arpstat.f, plural(arpstat.f))
 #define	p2(f, m) if (arpstat.f || sflag <= 1) \
 	xo_emit("\t" m, (uintmax_t)arpstat.f, pluralies(arpstat.f))
 
 	p(txrequests, "{:sent-requests/%ju} {N:/ARP request%s sent}\n");
 	p2(txreplies, "{:sent-replies/%ju} {N:/ARP repl%s sent}\n");
 	p(rxrequests, "{:received-requests/%ju} "
 	    "{N:/ARP request%s received}\n");
 	p2(rxreplies, "{:received-replies/%ju} "
 	    "{N:/ARP repl%s received}\n");
 	p(received, "{:received-packers/%ju} "
 	    "{N:/ARP packet%s received}\n");
 	p(dropped, "{:dropped-no-entry/%ju} "
 	    "{N:/total packet%s dropped due to no ARP entry}\n");
 	p(timeouts, "{:entries-timeout/%ju} "
 	    "{N:/ARP entry%s timed out}\n");
 	p(dupips, "{:dropped-duplicate-address/%ju} "
 	    "{N:/Duplicate IP%s seen}\n");
 #undef p
 #undef p2
 	xo_close_container(name);
 }
 
 
 
 static	const char *icmpnames[ICMP_MAXTYPE + 1] = {
 	"echo reply",			/* RFC 792 */
 	"#1",
 	"#2",
 	"destination unreachable",	/* RFC 792 */
 	"source quench",		/* RFC 792 */
 	"routing redirect",		/* RFC 792 */
 	"#6",
 	"#7",
 	"echo",				/* RFC 792 */
 	"router advertisement",		/* RFC 1256 */
 	"router solicitation",		/* RFC 1256 */
 	"time exceeded",		/* RFC 792 */
 	"parameter problem",		/* RFC 792 */
 	"time stamp",			/* RFC 792 */
 	"time stamp reply",		/* RFC 792 */
 	"information request",		/* RFC 792 */
 	"information request reply",	/* RFC 792 */
 	"address mask request",		/* RFC 950 */
 	"address mask reply",		/* RFC 950 */
 	"#19",
 	"#20",
 	"#21",
 	"#22",
 	"#23",
 	"#24",
 	"#25",
 	"#26",
 	"#27",
 	"#28",
 	"#29",
 	"icmp traceroute",		/* RFC 1393 */
 	"datagram conversion error",	/* RFC 1475 */
 	"mobile host redirect",
 	"IPv6 where-are-you",
 	"IPv6 i-am-here",
 	"mobile registration req",
 	"mobile registration reply",
 	"domain name request",		/* RFC 1788 */
 	"domain name reply",		/* RFC 1788 */
 	"icmp SKIP",
 	"icmp photuris",		/* RFC 2521 */
 };
 
 /*
  * Dump ICMP statistics.
  */
 void
 icmp_stats(u_long off, const char *name, int af1 __unused, int proto __unused)
 {
 	struct icmpstat icmpstat;
 	size_t len;
 	int i, first;
 
 	if (fetch_stats("net.inet.icmp.stats", off, &icmpstat,
 	    sizeof(icmpstat), kread_counters) != 0)
 		return;
 
 	xo_open_container(name);
 	xo_emit("{T:/%s}:\n", name);
 
 #define	p(f, m) if (icmpstat.f || sflag <= 1) \
 	xo_emit(m, icmpstat.f, plural(icmpstat.f))
 #define	p1a(f, m) if (icmpstat.f || sflag <= 1) \
 	xo_emit(m, icmpstat.f)
 #define	p2(f, m) if (icmpstat.f || sflag <= 1) \
 	xo_emit(m, icmpstat.f, plurales(icmpstat.f))
 
 	p(icps_error, "\t{:icmp-calls/%lu} "
 	    "{N:/call%s to icmp_error}\n");
 	p(icps_oldicmp, "\t{:errors-not-from-message/%lu} "
 	    "{N:/error%s not generated in response to an icmp message}\n");
 
 	for (first = 1, i = 0; i < ICMP_MAXTYPE + 1; i++) {
 		if (icmpstat.icps_outhist[i] != 0) {
 			if (first) {
 				xo_open_list("output-histogram");
 				xo_emit("\tOutput histogram:\n");
 				first = 0;
 			}
 			xo_open_instance("output-histogram");
 			if (icmpnames[i] != NULL)
 				xo_emit("\t\t{k:name/%s}: {:count/%lu}\n",
 				    icmpnames[i], icmpstat.icps_outhist[i]);
 			else
 				xo_emit("\t\tunknown ICMP #{k:name/%d}: "
 				    "{:count/%lu}\n",
 				    i, icmpstat.icps_outhist[i]);
 			xo_close_instance("output-histogram");
 		}
 	}
 	if (!first)
 		xo_close_list("output-histogram");
 
 	p(icps_badcode, "\t{:dropped-bad-code/%lu} "
 	    "{N:/message%s with bad code fields}\n");
 	p(icps_tooshort, "\t{:dropped-too-short/%lu} "
 	    "{N:/message%s less than the minimum length}\n");
 	p(icps_checksum, "\t{:dropped-bad-checksum/%lu} "
 	    "{N:/message%s with bad checksum}\n");
 	p(icps_badlen, "\t{:dropped-bad-length/%lu} "
 	    "{N:/message%s with bad length}\n");
 	p1a(icps_bmcastecho, "\t{:dropped-multicast-echo/%lu} "
 	    "{N:/multicast echo requests ignored}\n");
 	p1a(icps_bmcasttstamp, "\t{:dropped-multicast-timestamp/%lu} "
 	    "{N:/multicast timestamp requests ignored}\n");
 
 	for (first = 1, i = 0; i < ICMP_MAXTYPE + 1; i++) {
 		if (icmpstat.icps_inhist[i] != 0) {
 			if (first) {
 				xo_open_list("input-histogram");
 				xo_emit("\tInput histogram:\n");
 				first = 0;
 			}
 			xo_open_instance("input-histogram");
 			if (icmpnames[i] != NULL)
 				xo_emit("\t\t{k:name/%s}: {:count/%lu}\n",
 					icmpnames[i],
 					icmpstat.icps_inhist[i]);
 			else
 				xo_emit(
 			"\t\tunknown ICMP #{k:name/%d}: {:count/%lu}\n",
 					i, icmpstat.icps_inhist[i]);
 			xo_close_instance("input-histogram");
 		}
 	}
 	if (!first)
 		xo_close_list("input-histogram");
 
 	p(icps_reflect, "\t{:sent-packets/%lu} "
 	    "{N:/message response%s generated}\n");
 	p2(icps_badaddr, "\t{:discard-invalid-return-address/%lu} "
 	    "{N:/invalid return address%s}\n");
 	p(icps_noroute, "\t{:discard-no-route/%lu} "
 	    "{N:/no return route%s}\n");
 #undef p
 #undef p1a
 #undef p2
 	if (live) {
 		len = sizeof i;
 		if (sysctlbyname("net.inet.icmp.maskrepl", &i, &len, NULL, 0) <
 		    0)
 			return;
 		xo_emit("\tICMP address mask responses are "
 		    "{q:icmp-address-responses/%sabled}\n", i ? "en" : "dis");
 	}
 
 	xo_close_container(name);
 }
 
 /*
  * Dump IGMP statistics structure.
  */
 void
 igmp_stats(u_long off, const char *name, int af1 __unused, int proto __unused)
 {
 	struct igmpstat igmpstat;
 
 	if (fetch_stats("net.inet.igmp.stats", 0, &igmpstat,
 	    sizeof(igmpstat), kread) != 0)
 		return;
 
 	if (igmpstat.igps_version != IGPS_VERSION_3) {
 		xo_warnx("%s: version mismatch (%d != %d)", __func__,
 		    igmpstat.igps_version, IGPS_VERSION_3);
 	}
 	if (igmpstat.igps_len != IGPS_VERSION3_LEN) {
 		xo_warnx("%s: size mismatch (%d != %d)", __func__,
 		    igmpstat.igps_len, IGPS_VERSION3_LEN);
 	}
 
 	xo_open_container(name);
 	xo_emit("{T:/%s}:\n", name);
 
 #define	p64(f, m) if (igmpstat.f || sflag <= 1) \
 	xo_emit(m, (uintmax_t) igmpstat.f, plural(igmpstat.f))
 #define	py64(f, m) if (igmpstat.f || sflag <= 1) \
 	xo_emit(m, (uintmax_t) igmpstat.f, pluralies(igmpstat.f))
 
 	p64(igps_rcv_total, "\t{:received-messages/%ju} "
 	    "{N:/message%s received}\n");
 	p64(igps_rcv_tooshort, "\t{:dropped-too-short/%ju} "
 	    "{N:/message%s received with too few bytes}\n");
 	p64(igps_rcv_badttl, "\t{:dropped-wrong-ttl/%ju} "
 	    "{N:/message%s received with wrong TTL}\n");
 	p64(igps_rcv_badsum, "\t{:dropped-bad-checksum/%ju} "
 	    "{N:/message%s received with bad checksum}\n");
 	py64(igps_rcv_v1v2_queries, "\t{:received-membership-queries/%ju} "
 	    "{N:/V1\\/V2 membership quer%s received}\n");
 	py64(igps_rcv_v3_queries, "\t{:received-v3-membership-queries/%ju} "
 	    "{N:/V3 membership quer%s received}\n");
 	py64(igps_rcv_badqueries, "\t{:dropped-membership-queries/%ju} "
 	    "{N:/membership quer%s received with invalid field(s)}\n");
 	py64(igps_rcv_gen_queries, "\t{:received-general-queries/%ju} "
 	    "{N:/general quer%s received}\n");
 	py64(igps_rcv_group_queries, "\t{:received-group-queries/%ju} "
 	    "{N:/group quer%s received}\n");
 	py64(igps_rcv_gsr_queries, "\t{:received-group-source-queries/%ju} "
 	    "{N:/group-source quer%s received}\n");
 	py64(igps_drop_gsr_queries, "\t{:dropped-group-source-queries/%ju} "
 	    "{N:/group-source quer%s dropped}\n");
 	p64(igps_rcv_reports, "\t{:received-membership-requests/%ju} "
 	    "{N:/membership report%s received}\n");
 	p64(igps_rcv_badreports, "\t{:dropped-membership-reports/%ju} "
 	    "{N:/membership report%s received with invalid field(s)}\n");
 	p64(igps_rcv_ourreports, "\t"
 	    "{:received-membership-reports-matching/%ju} "
 	    "{N:/membership report%s received for groups to which we belong}"
 	    "\n");
 	p64(igps_rcv_nora, "\t{:received-v3-reports-no-router-alert/%ju} "
 	    "{N:/V3 report%s received without Router Alert}\n");
 	p64(igps_snd_reports, "\t{:sent-membership-reports/%ju} "
 	    "{N:/membership report%s sent}\n");
 #undef p64
 #undef py64
 	xo_close_container(name);
 }
 
 /*
  * Dump PIM statistics structure.
  */
 void
 pim_stats(u_long off __unused, const char *name, int af1 __unused,
     int proto __unused)
 {
 	struct pimstat pimstat;
 
 	if (fetch_stats("net.inet.pim.stats", off, &pimstat,
 	    sizeof(pimstat), kread_counters) != 0)
 		return;
 
 	xo_open_container(name);
 	xo_emit("{T:/%s}:\n", name);
 
 #define	p(f, m) if (pimstat.f || sflag <= 1) \
 	xo_emit(m, (uintmax_t)pimstat.f, plural(pimstat.f))
 #define	py(f, m) if (pimstat.f || sflag <= 1) \
 	xo_emit(m, (uintmax_t)pimstat.f, pimstat.f != 1 ? "ies" : "y")
 
 	p(pims_rcv_total_msgs, "\t{:received-messages/%ju} "
 	    "{N:/message%s received}\n");
 	p(pims_rcv_total_bytes, "\t{:received-bytes/%ju} "
 	    "{N:/byte%s received}\n");
 	p(pims_rcv_tooshort, "\t{:dropped-too-short/%ju} "
 	    "{N:/message%s received with too few bytes}\n");
 	p(pims_rcv_badsum, "\t{:dropped-bad-checksum/%ju} "
 	    "{N:/message%s received with bad checksum}\n");
 	p(pims_rcv_badversion, "\t{:dropped-bad-version/%ju} "
 	    "{N:/message%s received with bad version}\n");
 	p(pims_rcv_registers_msgs, "\t{:received-data-register-messages/%ju} "
 	    "{N:/data register message%s received}\n");
 	p(pims_rcv_registers_bytes, "\t{:received-data-register-bytes/%ju} "
 	    "{N:/data register byte%s received}\n");
 	p(pims_rcv_registers_wrongiif, "\t"
 	    "{:received-data-register-wrong-interface/%ju} "
 	    "{N:/data register message%s received on wrong iif}\n");
 	p(pims_rcv_badregisters, "\t{:received-bad-registers/%ju} "
 	    "{N:/bad register%s received}\n");
 	p(pims_snd_registers_msgs, "\t{:sent-data-register-messages/%ju} "
 	    "{N:/data register message%s sent}\n");
 	p(pims_snd_registers_bytes, "\t{:sent-data-register-bytes/%ju} "
 	    "{N:/data register byte%s sent}\n");
 #undef p
 #undef py
 	xo_close_container(name);
 }
 
 /*
  * Pretty print an Internet address (net address + port).
  */
 void
 inetprint(const char *container, struct in_addr *in, int port,
     const char *proto, int num_port, const int af1)
 {
 	struct servent *sp = 0;
 	char line[80], *cp;
 	int width;
 	size_t alen, plen;
 
 	if (container)
 		xo_open_container(container);
 
 	if (Wflag)
 	    snprintf(line, sizeof(line), "%s.", inetname(in));
 	else
 	    snprintf(line, sizeof(line), "%.*s.",
 		(Aflag && !num_port) ? 12 : 16, inetname(in));
 	alen = strlen(line);
 	cp = line + alen;
 	if (!num_port && port)
 		sp = getservbyport((int)port, proto);
 	if (sp || port == 0)
 		snprintf(cp, sizeof(line) - alen,
 		    "%.15s ", sp ? sp->s_name : "*");
 	else
 		snprintf(cp, sizeof(line) - alen,
 		    "%d ", ntohs((u_short)port));
 	width = (Aflag && !Wflag) ? 18 :
 		((!Wflag || af1 == AF_INET) ? 22 : 45);
 	if (Wflag)
 		xo_emit("{d:target/%-*s} ", width, line);
 	else
 		xo_emit("{d:target/%-*.*s} ", width, width, line);
 
 	plen = strlen(cp) - 1;
 	alen--;
 	xo_emit("{e:address/%*.*s}{e:port/%*.*s}", alen, alen, line, plen,
 	    plen, cp);
 
 	if (container)
 		xo_close_container(container);
 }
 
 /*
  * Construct an Internet address representation.
  * If numeric_addr has been supplied, give
  * numeric value, otherwise try for symbolic name.
  */
 char *
 inetname(struct in_addr *inp)
 {
 	char *cp;
 	static char line[MAXHOSTNAMELEN];
 	struct hostent *hp;
 	struct netent *np;
 
 	cp = 0;
 	if (!numeric_addr && inp->s_addr != INADDR_ANY) {
 		int net = inet_netof(*inp);
 		int lna = inet_lnaof(*inp);
 
 		if (lna == INADDR_ANY) {
 			np = getnetbyaddr(net, AF_INET);
 			if (np)
 				cp = np->n_name;
 		}
 		if (cp == NULL) {
 			hp = gethostbyaddr((char *)inp, sizeof (*inp), AF_INET);
 			if (hp) {
 				cp = hp->h_name;
 				trimdomain(cp, strlen(cp));
 			}
 		}
 	}
 	if (inp->s_addr == INADDR_ANY)
 		strcpy(line, "*");
 	else if (cp) {
 		strlcpy(line, cp, sizeof(line));
 	} else {
 		inp->s_addr = ntohl(inp->s_addr);
 #define	C(x)	((u_int)((x) & 0xff))
 		snprintf(line, sizeof(line), "%u.%u.%u.%u",
 		    C(inp->s_addr >> 24), C(inp->s_addr >> 16),
 		    C(inp->s_addr >> 8), C(inp->s_addr));
 	}
 	return (line);
 }