diff --git a/sys/netlink/netlink_domain.c b/sys/netlink/netlink_domain.c
index 38f4a2dfed94..ecd110d62c1f 100644
--- a/sys/netlink/netlink_domain.c
+++ b/sys/netlink/netlink_domain.c
@@ -1,781 +1,833 @@
 /*-
  * SPDX-License-Identifier: BSD-2-Clause
  *
  * Copyright (c) 2021 Ng Peng Nam Sean
  * Copyright (c) 2022 Alexander V. Chernikov <melifaro@FreeBSD.org>
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  */
 
 /*
  * This file contains socket and protocol bindings for netlink.
  */
 
 #include <sys/param.h>
 #include <sys/kernel.h>
 #include <sys/malloc.h>
 #include <sys/lock.h>
 #include <sys/rmlock.h>
 #include <sys/domain.h>
 #include <sys/jail.h>
 #include <sys/mbuf.h>
 #include <sys/osd.h>
 #include <sys/protosw.h>
 #include <sys/proc.h>
 #include <sys/ck.h>
 #include <sys/socket.h>
 #include <sys/socketvar.h>
 #include <sys/sysent.h>
 #include <sys/syslog.h>
 #include <sys/priv.h> /* priv_check */
+#include <sys/uio.h>
 
 #include <netlink/netlink.h>
 #include <netlink/netlink_ctl.h>
 #include <netlink/netlink_var.h>
 
 #define	DEBUG_MOD_NAME	nl_domain
 #define	DEBUG_MAX_LEVEL	LOG_DEBUG3
 #include <netlink/netlink_debug.h>
 _DECLARE_DEBUG(LOG_INFO);
 
 _Static_assert((NLP_MAX_GROUPS % 64) == 0,
     "NLP_MAX_GROUPS has to be multiple of 64");
 _Static_assert(NLP_MAX_GROUPS >= 64,
     "NLP_MAX_GROUPS has to be at least 64");
 
 #define	NLCTL_TRACKER		struct rm_priotracker nl_tracker
 #define	NLCTL_RLOCK(_ctl)	rm_rlock(&((_ctl)->ctl_lock), &nl_tracker)
 #define	NLCTL_RUNLOCK(_ctl)	rm_runlock(&((_ctl)->ctl_lock), &nl_tracker)
 
 #define	NLCTL_WLOCK(_ctl)	rm_wlock(&((_ctl)->ctl_lock))
 #define	NLCTL_WUNLOCK(_ctl)	rm_wunlock(&((_ctl)->ctl_lock))
 
 static u_long nl_sendspace = NLSNDQ;
 SYSCTL_ULONG(_net_netlink, OID_AUTO, sendspace, CTLFLAG_RW, &nl_sendspace, 0,
     "Default netlink socket send space");
 
 static u_long nl_recvspace = NLSNDQ;
 SYSCTL_ULONG(_net_netlink, OID_AUTO, recvspace, CTLFLAG_RW, &nl_recvspace, 0,
     "Default netlink socket receive space");
 
 extern u_long sb_max_adj;
 static u_long nl_maxsockbuf = 512 * 1024 * 1024; /* 512M, XXX: init based on physmem */
 static int sysctl_handle_nl_maxsockbuf(SYSCTL_HANDLER_ARGS);
 SYSCTL_OID(_net_netlink, OID_AUTO, nl_maxsockbuf,
     CTLTYPE_ULONG | CTLFLAG_RW | CTLFLAG_MPSAFE, &nl_maxsockbuf, 0,
     sysctl_handle_nl_maxsockbuf, "LU",
     "Maximum Netlink socket buffer size");
 
 
 static unsigned int osd_slot_id = 0;
 
 void
 nl_osd_register(void)
 {
 	osd_slot_id = osd_register(OSD_THREAD, NULL, NULL);
 }
 
 void
 nl_osd_unregister(void)
 {
 	osd_deregister(OSD_THREAD, osd_slot_id);
 }
 
 struct nlpcb *
 _nl_get_thread_nlp(struct thread *td)
 {
 	return (osd_get(OSD_THREAD, &td->td_osd, osd_slot_id));
 }
 
 void
 nl_set_thread_nlp(struct thread *td, struct nlpcb *nlp)
 {
 	NLP_LOG(LOG_DEBUG2, nlp, "Set thread %p nlp to %p (slot %u)", td, nlp, osd_slot_id);
 	if (osd_set(OSD_THREAD, &td->td_osd, osd_slot_id, nlp) == 0)
 		return;
 	/* Failed, need to realloc */
 	void **rsv = osd_reserve(osd_slot_id);
 	osd_set_reserved(OSD_THREAD, &td->td_osd, osd_slot_id, rsv, nlp);
 }
 
 /*
  * Looks up a nlpcb struct based on the @portid. Need to claim nlsock_mtx.
  * Returns nlpcb pointer if present else NULL
  */
 static struct nlpcb *
 nl_port_lookup(uint32_t port_id)
 {
 	struct nlpcb *nlp;
 
 	CK_LIST_FOREACH(nlp, &V_nl_ctl->ctl_port_head, nl_port_next) {
 		if (nlp->nl_port == port_id)
 			return (nlp);
 	}
 	return (NULL);
 }
 
 static void
 nl_add_group_locked(struct nlpcb *nlp, unsigned int group_id)
 {
 	MPASS(group_id <= NLP_MAX_GROUPS);
 	--group_id;
 
 	/* TODO: add family handler callback */
 	if (!nlp_unconstrained_vnet(nlp))
 		return;
 
 	nlp->nl_groups[group_id / 64] |= (uint64_t)1 << (group_id % 64);
 }
 
 static void
 nl_del_group_locked(struct nlpcb *nlp, unsigned int group_id)
 {
 	MPASS(group_id <= NLP_MAX_GROUPS);
 	--group_id;
 
 	nlp->nl_groups[group_id / 64] &= ~((uint64_t)1 << (group_id % 64));
 }
 
 static bool
 nl_isset_group_locked(struct nlpcb *nlp, unsigned int group_id)
 {
 	MPASS(group_id <= NLP_MAX_GROUPS);
 	--group_id;
 
 	return (nlp->nl_groups[group_id / 64] & ((uint64_t)1 << (group_id % 64)));
 }
 
 static uint32_t
 nl_get_groups_compat(struct nlpcb *nlp)
 {
 	uint32_t groups_mask = 0;
 
 	for (int i = 0; i < 32; i++) {
 		if (nl_isset_group_locked(nlp, i + 1))
 			groups_mask |= (1 << i);
 	}
 
 	return (groups_mask);
 }
 
 static void
 nl_send_one_group(struct mbuf *m, struct nlpcb *nlp, int num_messages,
     int io_flags)
 {
 	if (__predict_false(nlp->nl_flags & NLF_MSG_INFO))
 		nl_add_msg_info(m);
 	nl_send_one(m, nlp, num_messages, io_flags);
 }
 
 /*
  * Broadcasts message @m to the protocol @proto group specified by @group_id
  */
 void
 nl_send_group(struct mbuf *m, int num_messages, int proto, int group_id)
 {
 	struct nlpcb *nlp_last = NULL;
 	struct nlpcb *nlp;
 	NLCTL_TRACKER;
 
 	IF_DEBUG_LEVEL(LOG_DEBUG2) {
 		struct nlmsghdr *hdr = mtod(m, struct nlmsghdr *);
 		NL_LOG(LOG_DEBUG2, "MCAST mbuf len %u msg type %d len %u to group %d/%d",
 		    m->m_len, hdr->nlmsg_type, hdr->nlmsg_len, proto, group_id);
 	}
 
 	struct nl_control *ctl = atomic_load_ptr(&V_nl_ctl);
 	if (__predict_false(ctl == NULL)) {
 		/*
 		 * Can be the case when notification is sent within VNET
 		 * which doesn't have any netlink sockets.
 		 */
 		m_freem(m);
 		return;
 	}
 
 	NLCTL_RLOCK(ctl);
 
 	int io_flags = NL_IOF_UNTRANSLATED;
 
 	CK_LIST_FOREACH(nlp, &ctl->ctl_pcb_head, nl_next) {
 		if (nl_isset_group_locked(nlp, group_id) && nlp->nl_proto == proto) {
 			if (nlp_last != NULL) {
 				struct mbuf *m_copy;
 				m_copy = m_copym(m, 0, M_COPYALL, M_NOWAIT);
 				if (m_copy != NULL)
 					nl_send_one_group(m_copy, nlp_last,
 					    num_messages, io_flags);
 				else {
 					NLP_LOCK(nlp_last);
 					if (nlp_last->nl_socket != NULL)
 						sorwakeup(nlp_last->nl_socket);
 					NLP_UNLOCK(nlp_last);
 				}
 			}
 			nlp_last = nlp;
 		}
 	}
 	if (nlp_last != NULL)
 		nl_send_one_group(m, nlp_last, num_messages, io_flags);
 	else
 		m_freem(m);
 
 	NLCTL_RUNLOCK(ctl);
 }
 
 bool
 nl_has_listeners(int netlink_family, uint32_t groups_mask)
 {
 	return (V_nl_ctl != NULL);
 }
 
 static uint32_t
 nl_find_port(void)
 {
 	/*
 	 * app can open multiple netlink sockets.
 	 * Start with current pid, if already taken,
 	 * try random numbers in 65k..256k+65k space,
 	 * avoiding clash with pids.
 	 */
 	if (nl_port_lookup(curproc->p_pid) == NULL)
 		return (curproc->p_pid);
 	for (int i = 0; i < 16; i++) {
 		uint32_t nl_port = (arc4random() % 65536) + 65536 * 4;
 		if (nl_port_lookup(nl_port) == 0)
 			return (nl_port);
 		NL_LOG(LOG_DEBUG3, "tried %u\n", nl_port);
 	}
 	return (curproc->p_pid);
 }
 
 static int
 nl_bind_locked(struct nlpcb *nlp, struct sockaddr_nl *snl)
 {
 	if (nlp->nl_bound) {
 		if (nlp->nl_port != snl->nl_pid) {
 			NL_LOG(LOG_DEBUG,
 			    "bind() failed: program pid %d "
 			    "is different from provided pid %d",
 			    nlp->nl_port, snl->nl_pid);
 			return (EINVAL); // XXX: better error
 		}
 	} else {
 		if (snl->nl_pid == 0)
 			snl->nl_pid = nl_find_port();
 		if (nl_port_lookup(snl->nl_pid) != NULL)
 			return (EADDRINUSE);
 		nlp->nl_port = snl->nl_pid;
 		nlp->nl_bound = true;
 		CK_LIST_INSERT_HEAD(&V_nl_ctl->ctl_port_head, nlp, nl_port_next);
 	}
 	for (int i = 0; i < 32; i++) {
 		if (snl->nl_groups & ((uint32_t)1 << i))
 			nl_add_group_locked(nlp, i + 1);
 		else
 			nl_del_group_locked(nlp, i + 1);
 	}
 
 	return (0);
 }
 
 static int
 nl_pru_attach(struct socket *so, int proto, struct thread *td)
 {
 	struct nlpcb *nlp;
 	int error;
 
 	if (__predict_false(netlink_unloading != 0))
 		return (EAFNOSUPPORT);
 
 	error = nl_verify_proto(proto);
 	if (error != 0)
 		return (error);
 
 	bool is_linux = SV_PROC_ABI(td->td_proc) == SV_ABI_LINUX;
 	NL_LOG(LOG_DEBUG2, "socket %p, %sPID %d: attaching socket to %s",
 	    so, is_linux ? "(linux) " : "", curproc->p_pid,
 	    nl_get_proto_name(proto));
 
 	/* Create per-VNET state on first socket init */
 	struct nl_control *ctl = atomic_load_ptr(&V_nl_ctl);
 	if (ctl == NULL)
 		ctl = vnet_nl_ctl_init();
 	KASSERT(V_nl_ctl != NULL, ("nl_attach: vnet_sock_init() failed"));
 
 	MPASS(sotonlpcb(so) == NULL);
 
 	nlp = malloc(sizeof(struct nlpcb), M_PCB, M_WAITOK | M_ZERO);
 	error = soreserve(so, nl_sendspace, nl_recvspace);
 	if (error != 0) {
 		free(nlp, M_PCB);
 		return (error);
 	}
+	so->so_rcv.sb_mtx = &so->so_rcv_mtx;
+	TAILQ_INIT(&so->so_snd.nl_queue);
 	so->so_pcb = nlp;
 	nlp->nl_socket = so;
 	/* Copy so_cred to avoid having socket_var.h in every header */
 	nlp->nl_cred = so->so_cred;
 	nlp->nl_proto = proto;
 	nlp->nl_process_id = curproc->p_pid;
 	nlp->nl_linux = is_linux;
-	nlp->nl_active = true;
 	nlp->nl_unconstrained_vnet = !jailed_without_vnet(so->so_cred);
 	nlp->nl_need_thread_setup = true;
 	NLP_LOCK_INIT(nlp);
 	refcount_init(&nlp->nl_refcount, 1);
 	nl_init_io(nlp);
 
 	nlp->nl_taskqueue = taskqueue_create("netlink_socket", M_WAITOK,
 	    taskqueue_thread_enqueue, &nlp->nl_taskqueue);
 	TASK_INIT(&nlp->nl_task, 0, nl_taskqueue_handler, nlp);
 	taskqueue_start_threads(&nlp->nl_taskqueue, 1, PWAIT,
 	    "netlink_socket (PID %u)", nlp->nl_process_id);
 
 	NLCTL_WLOCK(ctl);
 	/* XXX: check ctl is still alive */
 	CK_LIST_INSERT_HEAD(&ctl->ctl_pcb_head, nlp, nl_next);
 	NLCTL_WUNLOCK(ctl);
 
 	soisconnected(so);
 
 	return (0);
 }
 
 static int
 nl_pru_bind(struct socket *so, struct sockaddr *sa, struct thread *td)
 {
 	struct nl_control *ctl = atomic_load_ptr(&V_nl_ctl);
 	struct nlpcb *nlp = sotonlpcb(so);
 	struct sockaddr_nl *snl = (struct sockaddr_nl *)sa;
 	int error;
 
 	NL_LOG(LOG_DEBUG3, "socket %p, PID %d", so, curproc->p_pid);
 	if (snl->nl_len != sizeof(*snl)) {
 		NL_LOG(LOG_DEBUG, "socket %p, wrong sizeof(), ignoring bind()", so);
 		return (EINVAL);
 	}
 
 
 	NLCTL_WLOCK(ctl);
 	NLP_LOCK(nlp);
 	error = nl_bind_locked(nlp, snl);
 	NLP_UNLOCK(nlp);
 	NLCTL_WUNLOCK(ctl);
 	NL_LOG(LOG_DEBUG2, "socket %p, bind() to %u, groups %u, error %d", so,
 	    snl->nl_pid, snl->nl_groups, error);
 
 	return (error);
 }
 
 
 static int
 nl_assign_port(struct nlpcb *nlp, uint32_t port_id)
 {
 	struct nl_control *ctl = atomic_load_ptr(&V_nl_ctl);
 	struct sockaddr_nl snl = {
 		.nl_pid = port_id,
 	};
 	int error;
 
 	NLCTL_WLOCK(ctl);
 	NLP_LOCK(nlp);
 	snl.nl_groups = nl_get_groups_compat(nlp);
 	error = nl_bind_locked(nlp, &snl);
 	NLP_UNLOCK(nlp);
 	NLCTL_WUNLOCK(ctl);
 
 	NL_LOG(LOG_DEBUG3, "socket %p, port assign: %d, error: %d", nlp->nl_socket, port_id, error);
 	return (error);
 }
 
 /*
  * nl_autobind_port binds a unused portid to @nlp
  * @nlp: pcb data for the netlink socket
  * @candidate_id: first id to consider
  */
 static int
 nl_autobind_port(struct nlpcb *nlp, uint32_t candidate_id)
 {
 	struct nl_control *ctl = atomic_load_ptr(&V_nl_ctl);
 	uint32_t port_id = candidate_id;
 	NLCTL_TRACKER;
 	bool exist;
 	int error = EADDRINUSE;
 
 	for (int i = 0; i < 10; i++) {
 		NL_LOG(LOG_DEBUG3, "socket %p, trying to assign port %d", nlp->nl_socket, port_id);
 		NLCTL_RLOCK(ctl);
 		exist = nl_port_lookup(port_id) != 0;
 		NLCTL_RUNLOCK(ctl);
 		if (!exist) {
 			error = nl_assign_port(nlp, port_id);
 			if (error != EADDRINUSE)
 				break;
 		}
 		port_id++;
 	}
 	NL_LOG(LOG_DEBUG3, "socket %p, autobind to %d, error: %d", nlp->nl_socket, port_id, error);
 	return (error);
 }
 
 static int
 nl_pru_connect(struct socket *so, struct sockaddr *sa, struct thread *td)
 {
 	struct sockaddr_nl *snl = (struct sockaddr_nl *)sa;
 	struct nlpcb *nlp;
 
 	NL_LOG(LOG_DEBUG3, "socket %p, PID %d", so, curproc->p_pid);
 	if (snl->nl_len != sizeof(*snl)) {
 		NL_LOG(LOG_DEBUG, "socket %p, wrong sizeof(), ignoring bind()", so);
 		return (EINVAL);
 	}
 
 	nlp = sotonlpcb(so);
 	if (!nlp->nl_bound) {
 		int error = nl_autobind_port(nlp, td->td_proc->p_pid);
 		if (error != 0) {
 			NL_LOG(LOG_DEBUG, "socket %p, nl_autobind() failed: %d", so, error);
 			return (error);
 		}
 	}
 	/* XXX: Handle socket flags & multicast */
 	soisconnected(so);
 
 	NL_LOG(LOG_DEBUG2, "socket %p, connect to %u", so, snl->nl_pid);
 
 	return (0);
 }
 
 static void
 destroy_nlpcb(struct nlpcb *nlp)
 {
 	NLP_LOCK(nlp);
 	nl_free_io(nlp);
 	NLP_LOCK_DESTROY(nlp);
 	free(nlp, M_PCB);
 }
 
 static void
 destroy_nlpcb_epoch(epoch_context_t ctx)
 {
 	struct nlpcb *nlp;
 
 	nlp = __containerof(ctx, struct nlpcb, nl_epoch_ctx);
 
 	destroy_nlpcb(nlp);
 }
 
 
 static void
 nl_close(struct socket *so)
 {
 	struct nl_control *ctl = atomic_load_ptr(&V_nl_ctl);
 	MPASS(sotonlpcb(so) != NULL);
 	struct nlpcb *nlp;
+	struct nl_buf *nb;
 
 	NL_LOG(LOG_DEBUG2, "detaching socket %p, PID %d", so, curproc->p_pid);
 	nlp = sotonlpcb(so);
 
 	/* Mark as inactive so no new work can be enqueued */
 	NLP_LOCK(nlp);
 	bool was_bound = nlp->nl_bound;
-	nlp->nl_active = false;
 	NLP_UNLOCK(nlp);
 
 	/* Wait till all scheduled work has been completed  */
 	taskqueue_drain_all(nlp->nl_taskqueue);
 	taskqueue_free(nlp->nl_taskqueue);
 
 	NLCTL_WLOCK(ctl);
 	NLP_LOCK(nlp);
 	if (was_bound) {
 		CK_LIST_REMOVE(nlp, nl_port_next);
 		NL_LOG(LOG_DEBUG3, "socket %p, unlinking bound pid %u", so, nlp->nl_port);
 	}
 	CK_LIST_REMOVE(nlp, nl_next);
 	nlp->nl_socket = NULL;
 	NLP_UNLOCK(nlp);
 	NLCTL_WUNLOCK(ctl);
 
 	so->so_pcb = NULL;
 
+	while ((nb = TAILQ_FIRST(&so->so_snd.nl_queue)) != NULL) {
+		TAILQ_REMOVE(&so->so_snd.nl_queue, nb, tailq);
+		free(nb, M_NETLINK);
+	}
+	sbdestroy(so, SO_RCV);
+
 	NL_LOG(LOG_DEBUG3, "socket %p, detached", so);
 
 	/* XXX: is delayed free needed? */
 	NET_EPOCH_CALL(destroy_nlpcb_epoch, &nlp->nl_epoch_ctx);
 }
 
 static int
 nl_pru_disconnect(struct socket *so)
 {
 	NL_LOG(LOG_DEBUG3, "socket %p, PID %d", so, curproc->p_pid);
 	MPASS(sotonlpcb(so) != NULL);
 	return (ENOTCONN);
 }
 
 static int
 nl_pru_shutdown(struct socket *so)
 {
 	NL_LOG(LOG_DEBUG3, "socket %p, PID %d", so, curproc->p_pid);
 	MPASS(sotonlpcb(so) != NULL);
 	socantsendmore(so);
 	return (0);
 }
 
 static int
 nl_sockaddr(struct socket *so, struct sockaddr *sa)
 {
 
 	*(struct sockaddr_nl *)sa = (struct sockaddr_nl ){
 		/* TODO: set other fields */
 		.nl_len = sizeof(struct sockaddr_nl),
 		.nl_family = AF_NETLINK,
 		.nl_pid = sotonlpcb(so)->nl_port,
 	};
 
 	return (0);
 }
 
 static int
-nl_pru_output(struct mbuf *m, struct socket *so, ...)
+nl_sosend(struct socket *so, struct sockaddr *addr, struct uio *uio,
+    struct mbuf *m, struct mbuf *control, int flags, struct thread *td)
 {
+	struct nlpcb *nlp = sotonlpcb(so);
+	struct sockbuf *sb = &so->so_snd;
+	struct nl_buf *nb;
+	u_int len;
+	int error;
 
-	if (__predict_false(m == NULL ||
-	    ((m->m_len < sizeof(struct nlmsghdr)) &&
-		(m = m_pullup(m, sizeof(struct nlmsghdr))) == NULL)))
-		return (ENOBUFS);
-	MPASS((m->m_flags & M_PKTHDR) != 0);
-
-	NL_LOG(LOG_DEBUG3, "sending message to kernel async processing");
-	nl_receive_async(m, so);
-	return (0);
-}
-
+	MPASS(m == NULL && uio != NULL);
 
-static int
-nl_pru_send(struct socket *so, int flags, struct mbuf *m, struct sockaddr *sa,
-    struct mbuf *control, struct thread *td)
-{
         NL_LOG(LOG_DEBUG2, "sending message to kernel");
 
 	if (__predict_false(control != NULL)) {
-		if (control->m_len) {
-			m_freem(control);
-			return (EINVAL);
-		}
 		m_freem(control);
+		return (EINVAL);
 	}
 
-	return (nl_pru_output(m, so));
+	if (__predict_false(flags & MSG_OOB))	/* XXXGL: or just ignore? */
+		return (EOPNOTSUPP);
+
+	if (__predict_false(uio->uio_resid < sizeof(struct nlmsghdr)))
+		return (ENOBUFS);		/* XXXGL: any better error? */
+
+	NL_LOG(LOG_DEBUG3, "sending message to kernel async processing");
+
+	error = SOCK_IO_SEND_LOCK(so, SBLOCKWAIT(flags));
+	if (error)
+		return (error);
+
+	len = roundup2(uio->uio_resid, 8) + SCRATCH_BUFFER_SIZE;
+	if (nlp->nl_linux)
+		len += roundup2(uio->uio_resid, 8);
+	nb = malloc(sizeof(*nb) + len, M_NETLINK, M_WAITOK);
+	nb->datalen = uio->uio_resid;
+	nb->buflen = len;
+	nb->offset = 0;
+	error = uiomove(&nb->data[0], uio->uio_resid, uio);
+	if (__predict_false(error))
+		goto out;
+
+	SOCK_SENDBUF_LOCK(so);
+restart:
+	if (sb->sb_hiwat - sb->sb_ccc >= nb->datalen) {
+		TAILQ_INSERT_TAIL(&sb->nl_queue, nb, tailq);
+		sb->sb_acc += nb->datalen;
+		sb->sb_ccc += nb->datalen;
+		nb = NULL;
+	} else if ((so->so_state & SS_NBIO) ||
+	    (flags & (MSG_NBIO | MSG_DONTWAIT)) != 0) {
+		SOCK_SENDBUF_UNLOCK(so);
+		error = EWOULDBLOCK;
+		goto out;
+	} else {
+		if ((error = sbwait(so, SO_SND)) != 0) {
+			SOCK_SENDBUF_UNLOCK(so);
+			goto out;
+		} else
+			goto restart;
+	}
+	SOCK_SENDBUF_UNLOCK(so);
+
+	if (nb == NULL) {
+		NL_LOG(LOG_DEBUG3, "enqueue %u bytes", nb->datalen);
+		NLP_LOCK(nlp);
+		nl_schedule_taskqueue(nlp);
+		NLP_UNLOCK(nlp);
+	}
+
+out:
+	SOCK_IO_SEND_UNLOCK(so);
+	free(nb, M_NETLINK);
+	return (error);
 }
 
 static int
 nl_pru_rcvd(struct socket *so, int flags)
 {
 	NL_LOG(LOG_DEBUG3, "socket %p, PID %d", so, curproc->p_pid);
 	MPASS(sotonlpcb(so) != NULL);
 
 	nl_on_transmit(sotonlpcb(so));
 
 	return (0);
 }
 
 static int
 nl_getoptflag(int sopt_name)
 {
 	switch (sopt_name) {
 	case NETLINK_CAP_ACK:
 		return (NLF_CAP_ACK);
 	case NETLINK_EXT_ACK:
 		return (NLF_EXT_ACK);
 	case NETLINK_GET_STRICT_CHK:
 		return (NLF_STRICT);
 	case NETLINK_MSG_INFO:
 		return (NLF_MSG_INFO);
 	}
 
 	return (0);
 }
 
 static int
 nl_ctloutput(struct socket *so, struct sockopt *sopt)
 {
 	struct nl_control *ctl = atomic_load_ptr(&V_nl_ctl);
 	struct nlpcb *nlp = sotonlpcb(so);
 	uint32_t flag;
 	int optval, error = 0;
 	NLCTL_TRACKER;
 
 	NL_LOG(LOG_DEBUG2, "%ssockopt(%p, %d)", (sopt->sopt_dir) ? "set" : "get",
 	    so, sopt->sopt_name);
 
 	switch (sopt->sopt_dir) {
 	case SOPT_SET:
 		switch (sopt->sopt_name) {
 		case NETLINK_ADD_MEMBERSHIP:
 		case NETLINK_DROP_MEMBERSHIP:
 			error = sooptcopyin(sopt, &optval, sizeof(optval), sizeof(optval));
 			if (error != 0)
 				break;
 			if (optval <= 0 || optval >= NLP_MAX_GROUPS) {
 				error = ERANGE;
 				break;
 			}
 			NL_LOG(LOG_DEBUG2, "ADD/DEL group %d", (uint32_t)optval);
 
 			NLCTL_WLOCK(ctl);
 			if (sopt->sopt_name == NETLINK_ADD_MEMBERSHIP)
 				nl_add_group_locked(nlp, optval);
 			else
 				nl_del_group_locked(nlp, optval);
 			NLCTL_WUNLOCK(ctl);
 			break;
 		case NETLINK_CAP_ACK:
 		case NETLINK_EXT_ACK:
 		case NETLINK_GET_STRICT_CHK:
 		case NETLINK_MSG_INFO:
 			error = sooptcopyin(sopt, &optval, sizeof(optval), sizeof(optval));
 			if (error != 0)
 				break;
 
 			flag = nl_getoptflag(sopt->sopt_name);
 
 			if ((flag == NLF_MSG_INFO) && nlp->nl_linux) {
 				error = EINVAL;
 				break;
 			}
 
 			NLCTL_WLOCK(ctl);
 			if (optval != 0)
 				nlp->nl_flags |= flag;
 			else
 				nlp->nl_flags &= ~flag;
 			NLCTL_WUNLOCK(ctl);
 			break;
 		default:
 			error = ENOPROTOOPT;
 		}
 		break;
 	case SOPT_GET:
 		switch (sopt->sopt_name) {
 		case NETLINK_LIST_MEMBERSHIPS:
 			NLCTL_RLOCK(ctl);
 			optval = nl_get_groups_compat(nlp);
 			NLCTL_RUNLOCK(ctl);
 			error = sooptcopyout(sopt, &optval, sizeof(optval));
 			break;
 		case NETLINK_CAP_ACK:
 		case NETLINK_EXT_ACK:
 		case NETLINK_GET_STRICT_CHK:
 		case NETLINK_MSG_INFO:
 			NLCTL_RLOCK(ctl);
 			optval = (nlp->nl_flags & nl_getoptflag(sopt->sopt_name)) != 0;
 			NLCTL_RUNLOCK(ctl);
 			error = sooptcopyout(sopt, &optval, sizeof(optval));
 			break;
 		default:
 			error = ENOPROTOOPT;
 		}
 		break;
 	default:
 		error = ENOPROTOOPT;
 	}
 
 	return (error);
 }
 
 static int
 sysctl_handle_nl_maxsockbuf(SYSCTL_HANDLER_ARGS)
 {
 	int error = 0;
 	u_long tmp_maxsockbuf = nl_maxsockbuf;
 
 	error = sysctl_handle_long(oidp, &tmp_maxsockbuf, arg2, req);
 	if (error || !req->newptr)
 		return (error);
 	if (tmp_maxsockbuf < MSIZE + MCLBYTES)
 		return (EINVAL);
 	nl_maxsockbuf = tmp_maxsockbuf;
 
 	return (0);
 }
 
 static int
 nl_setsbopt(struct socket *so, struct sockopt *sopt)
 {
 	int error, optval;
 	bool result;
 
 	if (sopt->sopt_name != SO_RCVBUF)
 		return (sbsetopt(so, sopt));
 
 	/* Allow to override max buffer size in certain conditions */
 
 	error = sooptcopyin(sopt, &optval, sizeof optval, sizeof optval);
 	if (error != 0)
 		return (error);
 	NL_LOG(LOG_DEBUG2, "socket %p, PID %d, SO_RCVBUF=%d", so, curproc->p_pid, optval);
 	if (optval > sb_max_adj) {
 		if (priv_check(curthread, PRIV_NET_ROUTE) != 0)
 			return (EPERM);
 	}
 
 	SOCK_RECVBUF_LOCK(so);
 	result = sbreserve_locked_limit(so, SO_RCV, optval, nl_maxsockbuf, curthread);
 	SOCK_RECVBUF_UNLOCK(so);
 
 	return (result ? 0 : ENOBUFS);
 }
 
 #define	NETLINK_PROTOSW						\
-	.pr_flags = PR_ATOMIC | PR_ADDR | PR_WANTRCVD,		\
+	.pr_flags = PR_ATOMIC | PR_ADDR | PR_WANTRCVD |		\
+	    PR_SOCKBUF,						\
 	.pr_ctloutput = nl_ctloutput,				\
 	.pr_setsbopt = nl_setsbopt,				\
 	.pr_attach = nl_pru_attach,				\
 	.pr_bind = nl_pru_bind,					\
 	.pr_connect = nl_pru_connect,				\
 	.pr_disconnect = nl_pru_disconnect,			\
-	.pr_send = nl_pru_send,					\
+	.pr_sosend = nl_sosend,					\
 	.pr_rcvd = nl_pru_rcvd,					\
 	.pr_shutdown = nl_pru_shutdown,				\
 	.pr_sockaddr = nl_sockaddr,				\
 	.pr_close = nl_close
 
 static struct protosw netlink_raw_sw = {
 	.pr_type = SOCK_RAW,
 	NETLINK_PROTOSW
 };
 
 static struct protosw netlink_dgram_sw = {
 	.pr_type = SOCK_DGRAM,
 	NETLINK_PROTOSW
 };
 
 static struct domain netlinkdomain = {
 	.dom_family = PF_NETLINK,
 	.dom_name = "netlink",
 	.dom_flags = DOMF_UNLOADABLE,
 	.dom_nprotosw =		2,
 	.dom_protosw =		{ &netlink_raw_sw, &netlink_dgram_sw },
 };
 
 DOMAIN_SET(netlink);
diff --git a/sys/netlink/netlink_io.c b/sys/netlink/netlink_io.c
index 3fe01bb443a1..7e2e098e4a9a 100644
--- a/sys/netlink/netlink_io.c
+++ b/sys/netlink/netlink_io.c
@@ -1,598 +1,533 @@
 /*-
  * SPDX-License-Identifier: BSD-2-Clause
  *
  * Copyright (c) 2021 Ng Peng Nam Sean
  * Copyright (c) 2022 Alexander V. Chernikov <melifaro@FreeBSD.org>
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  */
 
 #include <sys/param.h>
 #include <sys/ck.h>
 #include <sys/lock.h>
 #include <sys/malloc.h>
 #include <sys/mbuf.h>
 #include <sys/mutex.h>
 #include <sys/socket.h>
 #include <sys/socketvar.h>
 #include <sys/syslog.h>
 
 #include <netlink/netlink.h>
 #include <netlink/netlink_ctl.h>
 #include <netlink/netlink_linux.h>
 #include <netlink/netlink_var.h>
 
 #define	DEBUG_MOD_NAME	nl_io
 #define	DEBUG_MAX_LEVEL	LOG_DEBUG3
 #include <netlink/netlink_debug.h>
 _DECLARE_DEBUG(LOG_INFO);
 
 /*
  * The logic below provide a p2p interface for receiving and
  * sending netlink data between the kernel and userland.
  */
 
 static const struct sockaddr_nl _nl_empty_src = {
 	.nl_len = sizeof(struct sockaddr_nl),
 	.nl_family = PF_NETLINK,
 	.nl_pid = 0 /* comes from the kernel */
 };
 static const struct sockaddr *nl_empty_src = (const struct sockaddr *)&_nl_empty_src;
 
-static struct mbuf *nl_process_mbuf(struct mbuf *m, struct nlpcb *nlp);
-
+static bool nl_process_nbuf(struct nl_buf *nb, struct nlpcb *nlp);
 
 static void
 queue_push(struct nl_io_queue *q, struct mbuf *mq)
 {
 	while (mq != NULL) {
 		struct mbuf *m = mq;
 		mq = mq->m_nextpkt;
 		m->m_nextpkt = NULL;
 
 		q->length += m_length(m, NULL);
 		STAILQ_INSERT_TAIL(&q->head, m, m_stailqpkt);
 	}
 }
 
-static void
-queue_push_head(struct nl_io_queue *q, struct mbuf *m)
-{
-	MPASS(m->m_nextpkt == NULL);
-
-	q->length += m_length(m, NULL);
-	STAILQ_INSERT_HEAD(&q->head, m, m_stailqpkt);
-}
-
 static struct mbuf *
 queue_pop(struct nl_io_queue *q)
 {
 	if (!STAILQ_EMPTY(&q->head)) {
 		struct mbuf *m = STAILQ_FIRST(&q->head);
 		STAILQ_REMOVE_HEAD(&q->head, m_stailqpkt);
 		m->m_nextpkt = NULL;
 		q->length -= m_length(m, NULL);
 
 		return (m);
 	}
 	return (NULL);
 }
 
 static struct mbuf *
 queue_head(const struct nl_io_queue *q)
 {
 	return (STAILQ_FIRST(&q->head));
 }
 
 static inline bool
 queue_empty(const struct nl_io_queue *q)
 {
 	return (q->length == 0);
 }
 
 static void
 queue_free(struct nl_io_queue *q)
 {
 	while (!STAILQ_EMPTY(&q->head)) {
 		struct mbuf *m = STAILQ_FIRST(&q->head);
 		STAILQ_REMOVE_HEAD(&q->head, m_stailqpkt);
 		m->m_nextpkt = NULL;
 		m_freem(m);
 	}
 	q->length = 0;
 }
 
 void
 nl_add_msg_info(struct mbuf *m)
 {
 	struct nlpcb *nlp = nl_get_thread_nlp(curthread);
 	NL_LOG(LOG_DEBUG2, "Trying to recover nlp from thread %p: %p",
 	    curthread, nlp);
 
 	if (nlp == NULL)
 		return;
 
 	/* Prepare what we want to encode - PID, socket PID & msg seq */
 	struct {
 		struct nlattr nla;
 		uint32_t val;
 	} data[] = {
 		{
 			.nla.nla_len = sizeof(struct nlattr) + sizeof(uint32_t),
 			.nla.nla_type = NLMSGINFO_ATTR_PROCESS_ID,
 			.val = nlp->nl_process_id,
 		},
 		{
 			.nla.nla_len = sizeof(struct nlattr) + sizeof(uint32_t),
 			.nla.nla_type = NLMSGINFO_ATTR_PORT_ID,
 			.val = nlp->nl_port,
 		},
 	};
 
 
 	while (m->m_next != NULL)
 		m = m->m_next;
 	m->m_next = sbcreatecontrol(data, sizeof(data),
 	    NETLINK_MSG_INFO, SOL_NETLINK, M_NOWAIT);
 
 	NL_LOG(LOG_DEBUG2, "Storing %u bytes of data, ctl: %p",
 	    (unsigned)sizeof(data), m->m_next);
 }
 
 static __noinline struct mbuf *
 extract_msg_info(struct mbuf *m)
 {
 	while (m->m_next != NULL) {
 		if (m->m_next->m_type == MT_CONTROL) {
 			struct mbuf *ctl = m->m_next;
 			m->m_next = NULL;
 			return (ctl);
 		}
 		m = m->m_next;
 	}
 	return (NULL);
 }
 
-static void
+void
 nl_schedule_taskqueue(struct nlpcb *nlp)
 {
 	if (!nlp->nl_task_pending) {
 		nlp->nl_task_pending = true;
 		taskqueue_enqueue(nlp->nl_taskqueue, &nlp->nl_task);
 		NL_LOG(LOG_DEBUG3, "taskqueue scheduled");
 	} else {
 		NL_LOG(LOG_DEBUG3, "taskqueue schedule skipped");
 	}
 }
 
-int
-nl_receive_async(struct mbuf *m, struct socket *so)
-{
-	struct nlpcb *nlp = sotonlpcb(so);
-	int error = 0;
-
-	m->m_nextpkt = NULL;
-
-	NLP_LOCK(nlp);
-
-	if ((__predict_true(nlp->nl_active))) {
-		sbappend(&so->so_snd, m, 0);
-		NL_LOG(LOG_DEBUG3, "enqueue %u bytes", m_length(m, NULL));
-		nl_schedule_taskqueue(nlp);
-	} else {
-		NL_LOG(LOG_DEBUG, "ignoring %u bytes on non-active socket",
-		    m_length(m, NULL));
-		m_free(m);
-		error = EINVAL;
-	}
-
-	NLP_UNLOCK(nlp);
-
-	return (error);
-}
-
 static bool
 tx_check_locked(struct nlpcb *nlp)
 {
 	if (queue_empty(&nlp->tx_queue))
 		return (true);
 
 	/*
 	 * Check if something can be moved from the internal TX queue
 	 * to the socket queue.
 	 */
 
 	bool appended = false;
 	struct sockbuf *sb = &nlp->nl_socket->so_rcv;
 	SOCKBUF_LOCK(sb);
 
 	while (true) {
 		struct mbuf *m = queue_head(&nlp->tx_queue);
 		if (m != NULL) {
 			struct mbuf *ctl = NULL;
 			if (__predict_false(m->m_next != NULL))
 				ctl = extract_msg_info(m);
 			if (sbappendaddr_locked(sb, nl_empty_src, m, ctl) != 0) {
 				/* appended successfully */
 				queue_pop(&nlp->tx_queue);
 				appended = true;
 			} else
 				break;
 		} else
 			break;
 	}
 
 	SOCKBUF_UNLOCK(sb);
 
 	if (appended)
 		sorwakeup(nlp->nl_socket);
 
 	return (queue_empty(&nlp->tx_queue));
 }
 
 static bool
 nl_process_received_one(struct nlpcb *nlp)
 {
+	struct socket *so = nlp->nl_socket;
+	struct sockbuf *sb = &so->so_snd;
+	struct nl_buf *nb;
 	bool reschedule = false;
 
 	NLP_LOCK(nlp);
 	nlp->nl_task_pending = false;
 
 	if (!tx_check_locked(nlp)) {
 		/* TX overflow queue still not empty, ignore RX */
 		NLP_UNLOCK(nlp);
 		return (false);
 	}
 
-	if (queue_empty(&nlp->rx_queue)) {
-		/*
-		 * Grab all data we have from the socket TX queue
-		 * and store it the internal queue, so it can be worked on
-		 * w/o holding socket lock.
-		 */
-		struct sockbuf *sb = &nlp->nl_socket->so_snd;
-
-		SOCKBUF_LOCK(sb);
-		unsigned int avail = sbavail(sb);
-		if (avail > 0) {
-			NL_LOG(LOG_DEBUG3, "grabbed %u bytes", avail);
-			queue_push(&nlp->rx_queue, sbcut_locked(sb, avail));
-		}
-		SOCKBUF_UNLOCK(sb);
-	} else {
-		/* Schedule another pass to read from the socket queue */
-		reschedule = true;
-	}
-
 	int prev_hiwat = nlp->tx_queue.hiwat;
 	NLP_UNLOCK(nlp);
 
-	while (!queue_empty(&nlp->rx_queue)) {
-		struct mbuf *m = queue_pop(&nlp->rx_queue);
-
-		m = nl_process_mbuf(m, nlp);
-		if (m != NULL) {
-			queue_push_head(&nlp->rx_queue, m);
-			reschedule = false;
+	SOCK_SENDBUF_LOCK(so);
+	while ((nb = TAILQ_FIRST(&sb->nl_queue)) != NULL) {
+		TAILQ_REMOVE(&sb->nl_queue, nb, tailq);
+		SOCK_SENDBUF_UNLOCK(so);
+		reschedule = nl_process_nbuf(nb, nlp);
+		SOCK_SENDBUF_LOCK(so);
+		if (reschedule) {
+			sb->sb_acc -= nb->datalen;
+			sb->sb_ccc -= nb->datalen;
+			/* XXXGL: potentially can reduce lock&unlock count. */
+			sowwakeup_locked(so);
+			free(nb, M_NETLINK);
+			SOCK_SENDBUF_LOCK(so);
+		} else {
+			TAILQ_INSERT_HEAD(&sb->nl_queue, nb, tailq);
 			break;
 		}
 	}
+	SOCK_SENDBUF_UNLOCK(so);
 	if (nlp->tx_queue.hiwat > prev_hiwat) {
 		NLP_LOG(LOG_DEBUG, nlp, "TX override peaked to %d", nlp->tx_queue.hiwat);
 
 	}
 
 	return (reschedule);
 }
 
 static void
 nl_process_received(struct nlpcb *nlp)
 {
 	NL_LOG(LOG_DEBUG3, "taskqueue called");
 
 	if (__predict_false(nlp->nl_need_thread_setup)) {
 		nl_set_thread_nlp(curthread, nlp);
 		NLP_LOCK(nlp);
 		nlp->nl_need_thread_setup = false;
 		NLP_UNLOCK(nlp);
 	}
 
 	while (nl_process_received_one(nlp))
 		;
 }
 
 void
 nl_init_io(struct nlpcb *nlp)
 {
-	STAILQ_INIT(&nlp->rx_queue.head);
 	STAILQ_INIT(&nlp->tx_queue.head);
 }
 
 void
 nl_free_io(struct nlpcb *nlp)
 {
-	queue_free(&nlp->rx_queue);
 	queue_free(&nlp->tx_queue);
 }
 
 /*
  * Called after some data have been read from the socket.
  */
 void
 nl_on_transmit(struct nlpcb *nlp)
 {
 	NLP_LOCK(nlp);
 
 	struct socket *so = nlp->nl_socket;
 	if (__predict_false(nlp->nl_dropped_bytes > 0 && so != NULL)) {
 		unsigned long dropped_bytes = nlp->nl_dropped_bytes;
 		unsigned long dropped_messages = nlp->nl_dropped_messages;
 		nlp->nl_dropped_bytes = 0;
 		nlp->nl_dropped_messages = 0;
 
 		struct sockbuf *sb = &so->so_rcv;
 		NLP_LOG(LOG_DEBUG, nlp,
 		    "socket RX overflowed, %lu messages (%lu bytes) dropped. "
 		    "bytes: [%u/%u] mbufs: [%u/%u]", dropped_messages, dropped_bytes,
 		    sb->sb_ccc, sb->sb_hiwat, sb->sb_mbcnt, sb->sb_mbmax);
 		/* TODO: send netlink message */
 	}
 
 	nl_schedule_taskqueue(nlp);
 	NLP_UNLOCK(nlp);
 }
 
 void
 nl_taskqueue_handler(void *_arg, int pending)
 {
 	struct nlpcb *nlp = (struct nlpcb *)_arg;
 
 	CURVNET_SET(nlp->nl_socket->so_vnet);
 	nl_process_received(nlp);
 	CURVNET_RESTORE();
 }
 
 static __noinline void
 queue_push_tx(struct nlpcb *nlp, struct mbuf *m)
 {
 	queue_push(&nlp->tx_queue, m);
 	nlp->nl_tx_blocked = true;
 
 	if (nlp->tx_queue.length > nlp->tx_queue.hiwat)
 		nlp->tx_queue.hiwat = nlp->tx_queue.length;
 }
 
 /*
  * Tries to send @m to the socket @nlp.
  *
  * @m: mbuf(s) to send to. Consumed in any case.
  * @nlp: socket to send to
  * @cnt: number of messages in @m
  * @io_flags: combination of NL_IOF_* flags
  *
  * Returns true on success.
  * If no queue overrunes happened, wakes up socket owner.
  */
 bool
 nl_send_one(struct mbuf *m, struct nlpcb *nlp, int num_messages, int io_flags)
 {
 	bool untranslated = io_flags & NL_IOF_UNTRANSLATED;
 	bool ignore_limits = io_flags & NL_IOF_IGNORE_LIMIT;
 	bool result = true;
 
 	IF_DEBUG_LEVEL(LOG_DEBUG2) {
 		struct nlmsghdr *hdr = mtod(m, struct nlmsghdr *);
 		NLP_LOG(LOG_DEBUG2, nlp,
 		    "TX mbuf len %u msgs %u msg type %d first hdrlen %u io_flags %X",
 		    m_length(m, NULL), num_messages, hdr->nlmsg_type, hdr->nlmsg_len,
 		    io_flags);
 	}
 
 	if (__predict_false(nlp->nl_linux && linux_netlink_p != NULL && untranslated)) {
 		m = linux_netlink_p->mbufs_to_linux(nlp->nl_proto, m, nlp);
 		if (m == NULL)
 			return (false);
 	}
 
 	NLP_LOCK(nlp);
 
 	if (__predict_false(nlp->nl_socket == NULL)) {
 		NLP_UNLOCK(nlp);
 		m_freem(m);
 		return (false);
 	}
 
 	if (!queue_empty(&nlp->tx_queue)) {
 		if (ignore_limits) {
 			queue_push_tx(nlp, m);
 		} else {
 			m_free(m);
 			result = false;
 		}
 		NLP_UNLOCK(nlp);
 		return (result);
 	}
 
 	struct socket *so = nlp->nl_socket;
 	struct mbuf *ctl = NULL;
 	if (__predict_false(m->m_next != NULL))
 		ctl = extract_msg_info(m);
 	if (sbappendaddr(&so->so_rcv, nl_empty_src, m, ctl) != 0) {
 		sorwakeup(so);
 		NLP_LOG(LOG_DEBUG3, nlp, "appended data & woken up");
 	} else {
 		if (ignore_limits) {
 			queue_push_tx(nlp, m);
 		} else {
 			/*
 			 * Store dropped data so it can be reported
 			 * on the next read
 			 */
 			nlp->nl_dropped_bytes += m_length(m, NULL);
 			nlp->nl_dropped_messages += num_messages;
 			NLP_LOG(LOG_DEBUG2, nlp, "RX oveflow: %lu m (+%d), %lu b (+%d)",
 			    (unsigned long)nlp->nl_dropped_messages, num_messages,
 			    (unsigned long)nlp->nl_dropped_bytes, m_length(m, NULL));
 			soroverflow(so);
 			m_freem(m);
 			result = false;
 		}
 	}
 	NLP_UNLOCK(nlp);
 
 	return (result);
 }
 
 static int
 nl_receive_message(struct nlmsghdr *hdr, int remaining_length,
     struct nlpcb *nlp, struct nl_pstate *npt)
 {
 	nl_handler_f handler = nl_handlers[nlp->nl_proto].cb;
 	int error = 0;
 
 	NLP_LOG(LOG_DEBUG2, nlp, "msg len: %u type: %d: flags: 0x%X seq: %u pid: %u",
 	    hdr->nlmsg_len, hdr->nlmsg_type, hdr->nlmsg_flags, hdr->nlmsg_seq,
 	    hdr->nlmsg_pid);
 
 	if (__predict_false(hdr->nlmsg_len > remaining_length)) {
 		NLP_LOG(LOG_DEBUG, nlp, "message is not entirely present: want %d got %d",
 		    hdr->nlmsg_len, remaining_length);
 		return (EINVAL);
 	} else if (__predict_false(hdr->nlmsg_len < sizeof(*hdr))) {
 		NL_LOG(LOG_DEBUG, "message too short: %d", hdr->nlmsg_len);
 		return (EINVAL);
 	}
 	/* Stamp each message with sender pid */
 	hdr->nlmsg_pid = nlp->nl_port;
 
 	npt->hdr = hdr;
 
 	if (hdr->nlmsg_flags & NLM_F_REQUEST && hdr->nlmsg_type >= NLMSG_MIN_TYPE) {
 		NL_LOG(LOG_DEBUG2, "handling message with msg type: %d",
 		   hdr->nlmsg_type);
 
 		if (nlp->nl_linux && linux_netlink_p != NULL) {
 			struct nlmsghdr *hdr_orig = hdr;
 			hdr = linux_netlink_p->msg_from_linux(nlp->nl_proto, hdr, npt);
 			if (hdr == NULL) {
 				 /* Failed to translate to kernel format. Report an error back */
 				hdr = hdr_orig;
 				npt->hdr = hdr;
 				if (hdr->nlmsg_flags & NLM_F_ACK)
 					nlmsg_ack(nlp, EOPNOTSUPP, hdr, npt);
 				return (0);
 			}
 		}
 		error = handler(hdr, npt);
 		NL_LOG(LOG_DEBUG2, "retcode: %d", error);
 	}
 	if ((hdr->nlmsg_flags & NLM_F_ACK) || (error != 0 && error != EINTR)) {
 		if (!npt->nw->suppress_ack) {
 			NL_LOG(LOG_DEBUG3, "ack");
 			nlmsg_ack(nlp, error, hdr, npt);
 		}
 	}
 
 	return (0);
 }
 
 static void
 npt_clear(struct nl_pstate *npt)
 {
 	lb_clear(&npt->lb);
 	npt->error = 0;
 	npt->err_msg = NULL;
 	npt->err_off = 0;
 	npt->hdr = NULL;
 	npt->nw->suppress_ack = false;
 }
 
 /*
  * Processes an incoming packet, which can contain multiple netlink messages
  */
-static struct mbuf *
-nl_process_mbuf(struct mbuf *m, struct nlpcb *nlp)
+static bool
+nl_process_nbuf(struct nl_buf *nb, struct nlpcb *nlp)
 {
-	int offset, buffer_length;
 	struct nlmsghdr *hdr;
-	char *buffer;
 	int error;
 
-	NL_LOG(LOG_DEBUG3, "RX netlink mbuf %p on %p", m, nlp->nl_socket);
+	NL_LOG(LOG_DEBUG3, "RX netlink buf %p on %p", nb, nlp->nl_socket);
 
 	struct nl_writer nw = {};
 	if (!nlmsg_get_unicast_writer(&nw, NLMSG_SMALL, nlp)) {
-		m_freem(m);
 		NL_LOG(LOG_DEBUG, "error allocating socket writer");
-		return (NULL);
+		return (true);
 	}
 
 	nlmsg_ignore_limit(&nw);
-	/* TODO: alloc this buf once for nlp */
-	int data_length = m_length(m, NULL);
-	buffer_length = roundup2(data_length, 8) + SCRATCH_BUFFER_SIZE;
-	if (nlp->nl_linux)
-		buffer_length += roundup2(data_length, 8);
-	buffer = malloc(buffer_length, M_NETLINK, M_NOWAIT | M_ZERO);
-	if (buffer == NULL) {
-		m_freem(m);
-		nlmsg_flush(&nw);
-		NL_LOG(LOG_DEBUG, "Unable to allocate %d bytes of memory",
-		    buffer_length);
-		return (NULL);
-	}
-	m_copydata(m, 0, data_length, buffer);
 
 	struct nl_pstate npt = {
 		.nlp = nlp,
-		.lb.base = &buffer[roundup2(data_length, 8)],
-		.lb.size = buffer_length - roundup2(data_length, 8),
+		.lb.base = &nb->data[roundup2(nb->datalen, 8)],
+		.lb.size = nb->buflen - roundup2(nb->datalen, 8),
 		.nw = &nw,
 		.strict = nlp->nl_flags & NLF_STRICT,
 	};
 
-	for (offset = 0; offset + sizeof(struct nlmsghdr) <= data_length;) {
-		hdr = (struct nlmsghdr *)&buffer[offset];
+	for (; nb->offset + sizeof(struct nlmsghdr) <= nb->datalen;) {
+		hdr = (struct nlmsghdr *)&nb->data[nb->offset];
 		/* Save length prior to calling handler */
 		int msglen = NLMSG_ALIGN(hdr->nlmsg_len);
-		NL_LOG(LOG_DEBUG3, "parsing offset %d/%d", offset, data_length);
+		NL_LOG(LOG_DEBUG3, "parsing offset %d/%d",
+		    nb->offset, nb->datalen);
 		npt_clear(&npt);
-		error = nl_receive_message(hdr, data_length - offset, nlp, &npt);
-		offset += msglen;
+		error = nl_receive_message(hdr, nb->datalen - nb->offset, nlp,
+		    &npt);
+		nb->offset += msglen;
 		if (__predict_false(error != 0 || nlp->nl_tx_blocked))
 			break;
 	}
 	NL_LOG(LOG_DEBUG3, "packet parsing done");
-	free(buffer, M_NETLINK);
 	nlmsg_flush(&nw);
 
 	if (nlp->nl_tx_blocked) {
 		NLP_LOCK(nlp);
 		nlp->nl_tx_blocked = false;
 		NLP_UNLOCK(nlp);
-		m_adj(m, offset);
-		return (m);
-	} else {
-		m_freem(m);
-		return (NULL);
-	}
+		return (false);
+	} else
+		return (true);
 }
diff --git a/sys/netlink/netlink_var.h b/sys/netlink/netlink_var.h
index 36b7c61974c9..ec174e17d1a2 100644
--- a/sys/netlink/netlink_var.h
+++ b/sys/netlink/netlink_var.h
@@ -1,208 +1,214 @@
 /*-
  * SPDX-License-Identifier: BSD-2-Clause
  *
  * Copyright (c) 2021 Ng Peng Nam Sean
  * Copyright (c) 2022 Alexander V. Chernikov <melifaro@FreeBSD.org>
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  */
 #ifndef _NETLINK_NETLINK_VAR_H_
 #define _NETLINK_NETLINK_VAR_H_
 
 #ifdef _KERNEL
 
 #include <sys/ck.h>
 #include <sys/epoch.h>
 #include <sys/sysctl.h>
 #include <sys/taskqueue.h>
 #include <net/vnet.h>
 
 #define	NLSNDQ  	65536 /* Default socket sendspace */
 #define	NLRCVQ		65536 /* Default socket recvspace */
 
 #define	NLMBUFSIZE	2048	/* External storage size for Netlink mbufs */
 
 struct ucred;
 
 struct nl_io_queue {
 	STAILQ_HEAD(, mbuf)	head;
 	int			length;
 	int			hiwat;
 };
 
+struct nl_buf {
+	TAILQ_ENTRY(nl_buf)	tailq;
+	u_int			buflen;
+	u_int			datalen;
+	u_int			offset;
+	char			data[];
+};
+
 #define	NLP_MAX_GROUPS		128
 
 struct nlpcb {
         struct socket           *nl_socket;
 	uint64_t	        nl_groups[NLP_MAX_GROUPS / 64];
 	uint32_t                nl_port;
 	uint32_t	        nl_flags;
 	uint32_t	        nl_process_id;
         int                     nl_proto;
-        bool			nl_active;
 	bool			nl_bound;
         bool			nl_task_pending;
 	bool			nl_tx_blocked; /* No new requests accepted */
 	bool			nl_linux; /* true if running under compat */
 	bool			nl_unconstrained_vnet; /* true if running under VNET jail (or without jail) */
 	bool			nl_need_thread_setup;
-	struct nl_io_queue	rx_queue;
 	struct nl_io_queue	tx_queue;
 	struct taskqueue	*nl_taskqueue;
 	struct task		nl_task;
 	struct ucred		*nl_cred; /* Copy of nl_socket->so_cred */
 	uint64_t		nl_dropped_bytes;
 	uint64_t		nl_dropped_messages;
         CK_LIST_ENTRY(nlpcb)    nl_next;
         CK_LIST_ENTRY(nlpcb)    nl_port_next;
 	volatile u_int		nl_refcount;
 	struct mtx		nl_lock;
 	struct epoch_context	nl_epoch_ctx;
 };
 #define sotonlpcb(so)       ((struct nlpcb *)(so)->so_pcb)
 
 #define	NLP_LOCK_INIT(_nlp)	mtx_init(&((_nlp)->nl_lock), "nlp mtx", NULL, MTX_DEF)
 #define	NLP_LOCK_DESTROY(_nlp)	mtx_destroy(&((_nlp)->nl_lock))
 #define	NLP_LOCK(_nlp)		mtx_lock(&((_nlp)->nl_lock))
 #define	NLP_UNLOCK(_nlp)	mtx_unlock(&((_nlp)->nl_lock))
 
 #define	ALIGNED_NL_SZ(_data)	roundup2((((struct nlmsghdr *)(_data))->nlmsg_len), 16)
 
 /* nl_flags */
 #define NLF_CAP_ACK             0x01 /* Do not send message body with errmsg */
 #define NLF_EXT_ACK             0x02 /* Allow including extended TLVs in ack */
 #define	NLF_STRICT		0x04 /* Perform strict header checks */
 #define	NLF_MSG_INFO		0x08 /* Send caller info along with the notifications */
 
 SYSCTL_DECL(_net_netlink);
 SYSCTL_DECL(_net_netlink_debug);
 
 struct nl_control {
 	CK_LIST_HEAD(nl_pid_head, nlpcb)	ctl_port_head;
 	CK_LIST_HEAD(nlpcb_head, nlpcb)		ctl_pcb_head;
 	CK_LIST_ENTRY(nl_control)		ctl_next;
 	struct rmlock				ctl_lock;
 };
 VNET_DECLARE(struct nl_control *, nl_ctl);
 #define	V_nl_ctl	VNET(nl_ctl)
 
 
 struct sockaddr_nl;
 struct sockaddr;
 struct nlmsghdr;
 
 /* netlink_module.c */
 struct nl_control *vnet_nl_ctl_init(void);
 
 int nl_verify_proto(int proto);
 const char *nl_get_proto_name(int proto);
 
 extern int netlink_unloading;
 
 struct nl_proto_handler {
 	nl_handler_f	cb;
 	const char	*proto_name;
 };
 extern struct nl_proto_handler *nl_handlers;
 
 /* netlink_domain.c */
 void nl_send_group(struct mbuf *m, int cnt, int proto, int group_id);
 void nl_osd_register(void);
 void nl_osd_unregister(void);
 void nl_set_thread_nlp(struct thread *td, struct nlpcb *nlp);
 
 /* netlink_io.c */
 #define	NL_IOF_UNTRANSLATED	0x01
 #define	NL_IOF_IGNORE_LIMIT	0x02
 bool nl_send_one(struct mbuf *m, struct nlpcb *nlp, int cnt, int io_flags);
 void nlmsg_ack(struct nlpcb *nlp, int error, struct nlmsghdr *nlmsg,
     struct nl_pstate *npt);
 void nl_on_transmit(struct nlpcb *nlp);
 void nl_init_io(struct nlpcb *nlp);
 void nl_free_io(struct nlpcb *nlp);
 
 void nl_taskqueue_handler(void *_arg, int pending);
-int nl_receive_async(struct mbuf *m, struct socket *so);
+void nl_schedule_taskqueue(struct nlpcb *nlp);
 void nl_process_receive_locked(struct nlpcb *nlp);
 void nl_set_source_metadata(struct mbuf *m, int num_messages);
 void nl_add_msg_info(struct mbuf *m);
 
 /* netlink_message_writer.c */
 void nl_init_msg_zone(void);
 void nl_destroy_msg_zone(void);
 
 /* netlink_generic.c */
 struct genl_family {
 	const char	*family_name;
 	uint16_t	family_hdrsize;
 	uint16_t	family_id;
 	uint16_t	family_version;
 	uint16_t	family_attr_max;
 	uint16_t	family_cmd_size;
 	uint16_t	family_num_groups;
 	struct genl_cmd	*family_cmds;
 };
 
 struct genl_group {
 	struct genl_family	*group_family;
 	const char		*group_name;
 };
 
 struct genl_family *genl_get_family(uint32_t family_id);
 struct genl_group *genl_get_group(uint32_t group_id);
 
 #define	MAX_FAMILIES	20
 #define	MAX_GROUPS	64
 
 #define	MIN_GROUP_NUM	48
 
 #define	CTRL_FAMILY_NAME	"nlctrl"
 
 struct ifnet;
 struct nl_parsed_link;
 struct nlattr_bmask;
 struct nl_pstate;
 
 /* Function map */
 struct nl_function_wrapper {
 	bool (*nlmsg_add)(struct nl_writer *nw, uint32_t portid, uint32_t seq, uint16_t type,
 	    uint16_t flags, uint32_t len);
 	bool (*nlmsg_refill_buffer)(struct nl_writer *nw, int required_len);
 	bool (*nlmsg_flush)(struct nl_writer *nw);
 	bool (*nlmsg_end)(struct nl_writer *nw);
 	void (*nlmsg_abort)(struct nl_writer *nw);
 	void (*nlmsg_ignore_limit)(struct nl_writer *nw);
 	bool (*nlmsg_get_unicast_writer)(struct nl_writer *nw, int size, struct nlpcb *nlp);
 	bool (*nlmsg_get_group_writer)(struct nl_writer *nw, int size, int protocol, int group_id);
 	bool (*nlmsg_get_chain_writer)(struct nl_writer *nw, int size, struct mbuf **pm);
 	bool (*nlmsg_end_dump)(struct nl_writer *nw, int error, struct nlmsghdr *hdr);
 	int (*nl_modify_ifp_generic)(struct ifnet *ifp, struct nl_parsed_link *lattrs,
 	    const struct nlattr_bmask *bm, struct nl_pstate *npt);
 	void (*nl_store_ifp_cookie)(struct nl_pstate *npt, struct ifnet *ifp);
 	struct nlpcb * (*nl_get_thread_nlp)(struct  thread *td);
 };
 void nl_set_functions(const struct nl_function_wrapper *nl);
 
 
 
 #endif
 #endif
diff --git a/sys/sys/sockbuf.h b/sys/sys/sockbuf.h
index 92b9964072fb..c6093883be4a 100644
--- a/sys/sys/sockbuf.h
+++ b/sys/sys/sockbuf.h
@@ -1,317 +1,323 @@
 /*-
  * SPDX-License-Identifier: BSD-3-Clause
  *
  * Copyright (c) 1982, 1986, 1990, 1993
  *	The Regents of the University of California.  All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 3. Neither the name of the University nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  */
 #ifndef _SYS_SOCKBUF_H_
 #define _SYS_SOCKBUF_H_
 
 /*
  * Constants for sb_flags field of struct sockbuf/xsockbuf.
  */
 #define	SB_TLS_RX	0x01		/* using KTLS on RX */
 #define	SB_TLS_RX_RUNNING 0x02		/* KTLS RX operation running */
 #define	SB_WAIT		0x04		/* someone is waiting for data/space */
 #define	SB_SEL		0x08		/* someone is selecting */
 #define	SB_ASYNC	0x10		/* ASYNC I/O, need signals */
 #define	SB_UPCALL	0x20		/* someone wants an upcall */
 #define	SB_NOINTR	0x40		/* operations not interruptible */
 #define	SB_AIO		0x80		/* AIO operations queued */
 #define	SB_KNOTE	0x100		/* kernel note attached */
 #define	SB_NOCOALESCE	0x200		/* don't coalesce new data into existing mbufs */
 #define	SB_IN_TOE	0x400		/* socket buffer is in the middle of an operation */
 #define	SB_AUTOSIZE	0x800		/* automatically size socket buffer */
 #define	SB_STOP		0x1000		/* backpressure indicator */
 #define	SB_AIO_RUNNING	0x2000		/* AIO operation running */
 #define	SB_UNUSED	0x4000		/* previously used for SB_TLS_IFNET */
 #define	SB_TLS_RX_RESYNC 0x8000		/* KTLS RX lost HW sync */
 
 #define	SBS_CANTSENDMORE	0x0010	/* can't send more data to peer */
 #define	SBS_CANTRCVMORE		0x0020	/* can't receive more data from peer */
 #define	SBS_RCVATMARK		0x0040	/* at mark on input */
 
 #if defined(_KERNEL) || defined(_WANT_SOCKET)
 #include <sys/_lock.h>
 #include <sys/_mutex.h>
 #include <sys/_sx.h>
 #include <sys/_task.h>
 
 #define	SB_MAX		(2*1024*1024)	/* default for max chars in sockbuf */
 
 struct ktls_session;
 struct mbuf;
 struct sockaddr;
 struct socket;
 struct sockopt;
 struct thread;
 struct selinfo;
 
 /*
  * Socket buffer
  *
  * A buffer starts with the fields that are accessed by I/O multiplexing
  * APIs like select(2), kevent(2) or AIO and thus are shared between different
  * buffer implementations.  They are protected by the SOCK_RECVBUF_LOCK()
  * or SOCK_SENDBUF_LOCK() of the owning socket.
  *
  * XXX: sb_acc, sb_ccc and sb_mbcnt shall become implementation specific
  * methods.
  *
  * Protocol specific implementations follow in a union.
  */
 struct sockbuf {
 	struct	selinfo *sb_sel;	/* process selecting read/write */
 	short	sb_state;		/* socket state on sockbuf */
 	short	sb_flags;		/* flags, see above */
 	u_int	sb_acc;			/* available chars in buffer */
 	u_int	sb_ccc;			/* claimed chars in buffer */
 	u_int	sb_mbcnt;		/* chars of mbufs used */
 	u_int	sb_ctl;			/* non-data chars in buffer */
 	u_int	sb_hiwat;		/* max actual char count */
 	u_int	sb_lowat;		/* low water mark */
 	u_int	sb_mbmax;		/* max chars of mbufs to use */
 	sbintime_t sb_timeo;		/* timeout for read/write */
 	int	(*sb_upcall)(struct socket *, void *, int);
 	void	*sb_upcallarg;
 	TAILQ_HEAD(, kaiocb) sb_aiojobq;	/* pending AIO ops */
 	struct	task sb_aiotask;		/* AIO task */
 	union {
 		/*
 		 * Classic BSD one-size-fits-all socket buffer, capable of
 		 * doing streams and datagrams. The stream part is able
 		 * to perform special features:
 		 * - not ready data (sendfile)
 		 * - TLS
 		 */
 		struct {
 			/* compat: sockbuf lock pointer */
 			struct	mtx *sb_mtx;
 			/* first and last mbufs in the chain */
 			struct	mbuf *sb_mb;
 			struct	mbuf *sb_mbtail;
 			/* first mbuf of last record in socket buffer */
 			struct	mbuf *sb_lastrecord;
 			/* pointer to data to send next (TCP */
 			struct	mbuf *sb_sndptr;
 			/* pointer to first not ready buffer */
 			struct	mbuf *sb_fnrdy;
 			/* byte offset of ptr into chain, used with sb_sndptr */
 			u_int	sb_sndptroff;
 			/* TLS */
 			u_int	sb_tlscc;	/* TLS chain characters */
 			u_int	sb_tlsdcc;	/* characters being decrypted */
 			struct	mbuf *sb_mtls;	/*  TLS mbuf chain */
 			struct	mbuf *sb_mtlstail; /* last mbuf in TLS chain */
 			uint64_t sb_tls_seqno;	/* TLS seqno */
 			struct	ktls_session *sb_tls_info; /* TLS state */
 		};
 		/*
 		 * PF_UNIX/SOCK_DGRAM
 		 *
 		 * Local protocol, thus we should buffer on the receive side
 		 * only.  However, in one to many configuration we don't want
 		 * a single receive buffer to be shared.  So we would link
 		 * send buffers onto receive buffer.  All the fields are locked
 		 * by the receive buffer lock.
 		 */
 		struct {
 			/*
 			 * For receive buffer: own queue of this buffer for
 			 * unconnected sends.  For send buffer: queue lended
 			 * to the peer receive buffer, to isolate ourselves
 			 * from other senders.
 			 */
 			STAILQ_HEAD(, mbuf)	uxdg_mb;
 			/* For receive buffer: datagram seen via MSG_PEEK. */
 			struct mbuf		*uxdg_peeked;
 			/*
 			 * For receive buffer: queue of send buffers of
 			 * connected peers.  For send buffer: linkage on
 			 * connected peer receive buffer queue.
 			 */
 			union {
 				TAILQ_HEAD(, sockbuf)	uxdg_conns;
 				TAILQ_ENTRY(sockbuf)	uxdg_clist;
 			};
 			/* Counters for this buffer uxdg_mb chain + peeked. */
 			u_int uxdg_cc;
 			u_int uxdg_ctl;
 			u_int uxdg_mbcnt;
 		};
+		/*
+		 * Netlink socket.
+		 */
+		struct {
+			TAILQ_HEAD(, nl_buf)	nl_queue;
+		};
 	};
 };
 
 #endif	/* defined(_KERNEL) || defined(_WANT_SOCKET) */
 #ifdef _KERNEL
 
 /* 'which' values for KPIs that operate on one buffer of a socket. */
 typedef enum { SO_RCV, SO_SND } sb_which;
 
 /*
  * Per-socket buffer mutex used to protect most fields in the socket buffer.
  * These make use of the mutex pointer embedded in struct sockbuf, which
  * currently just references mutexes in the containing socket.  The
  * SOCK_SENDBUF_LOCK() etc. macros can be used instead of or in combination with
  * these locking macros.
  */
 #define	SOCKBUF_MTX(_sb)		((_sb)->sb_mtx)
 #define	SOCKBUF_LOCK(_sb)		mtx_lock(SOCKBUF_MTX(_sb))
 #define	SOCKBUF_OWNED(_sb)		mtx_owned(SOCKBUF_MTX(_sb))
 #define	SOCKBUF_UNLOCK(_sb)		mtx_unlock(SOCKBUF_MTX(_sb))
 #define	SOCKBUF_LOCK_ASSERT(_sb)	mtx_assert(SOCKBUF_MTX(_sb), MA_OWNED)
 #define	SOCKBUF_UNLOCK_ASSERT(_sb)	mtx_assert(SOCKBUF_MTX(_sb), MA_NOTOWNED)
 
 /*
  * Socket buffer private mbuf(9) flags.
  */
 #define	M_NOTREADY	M_PROTO1	/* m_data not populated yet */
 #define	M_BLOCKED	M_PROTO2	/* M_NOTREADY in front of m */
 #define	M_NOTAVAIL	(M_NOTREADY | M_BLOCKED)
 
 void	sbappend(struct sockbuf *sb, struct mbuf *m, int flags);
 void	sbappend_locked(struct sockbuf *sb, struct mbuf *m, int flags);
 void	sbappendstream(struct sockbuf *sb, struct mbuf *m, int flags);
 void	sbappendstream_locked(struct sockbuf *sb, struct mbuf *m, int flags);
 int	sbappendaddr(struct sockbuf *sb, const struct sockaddr *asa,
 	    struct mbuf *m0, struct mbuf *control);
 int	sbappendaddr_locked(struct sockbuf *sb, const struct sockaddr *asa,
 	    struct mbuf *m0, struct mbuf *control);
 int	sbappendaddr_nospacecheck_locked(struct sockbuf *sb,
 	    const struct sockaddr *asa, struct mbuf *m0, struct mbuf *control);
 void	sbappendcontrol(struct sockbuf *sb, struct mbuf *m0,
 	    struct mbuf *control, int flags);
 void	sbappendcontrol_locked(struct sockbuf *sb, struct mbuf *m0,
 	    struct mbuf *control, int flags);
 void	sbappendrecord(struct sockbuf *sb, struct mbuf *m0);
 void	sbappendrecord_locked(struct sockbuf *sb, struct mbuf *m0);
 void	sbcompress(struct sockbuf *sb, struct mbuf *m, struct mbuf *n);
 struct mbuf *
 	sbcreatecontrol(const void *p, u_int size, int type, int level,
 	    int wait);
 void	sbdestroy(struct socket *, sb_which);
 void	sbdrop(struct sockbuf *sb, int len);
 void	sbdrop_locked(struct sockbuf *sb, int len);
 struct mbuf *
 	sbcut_locked(struct sockbuf *sb, int len);
 void	sbdroprecord(struct sockbuf *sb);
 void	sbdroprecord_locked(struct sockbuf *sb);
 void	sbflush(struct sockbuf *sb);
 void	sbflush_locked(struct sockbuf *sb);
 void	sbrelease(struct socket *, sb_which);
 void	sbrelease_locked(struct socket *, sb_which);
 int	sbsetopt(struct socket *so, struct sockopt *);
 bool	sbreserve_locked(struct socket *so, sb_which which, u_long cc,
 	    struct thread *td);
 bool	sbreserve_locked_limit(struct socket *so, sb_which which, u_long cc,
 	    u_long buf_max, struct thread *td);
 void	sbsndptr_adv(struct sockbuf *sb, struct mbuf *mb, u_int len);
 struct mbuf *
 	sbsndptr_noadv(struct sockbuf *sb, u_int off, u_int *moff);
 struct mbuf *
 	sbsndmbuf(struct sockbuf *sb, u_int off, u_int *moff);
 int	sbwait(struct socket *, sb_which);
 void	sballoc(struct sockbuf *, struct mbuf *);
 void	sbfree(struct sockbuf *, struct mbuf *);
 void	sballoc_ktls_rx(struct sockbuf *sb, struct mbuf *m);
 void	sbfree_ktls_rx(struct sockbuf *sb, struct mbuf *m);
 int	sbready(struct sockbuf *, struct mbuf *, int);
 
 /*
  * Return how much data is available to be taken out of socket
  * buffer right now.
  */
 static inline u_int
 sbavail(struct sockbuf *sb)
 {
 
 #if 0
 	SOCKBUF_LOCK_ASSERT(sb);
 #endif
 	return (sb->sb_acc);
 }
 
 /*
  * Return how much data sits there in the socket buffer
  * It might be that some data is not yet ready to be read.
  */
 static inline u_int
 sbused(struct sockbuf *sb)
 {
 
 #if 0
 	SOCKBUF_LOCK_ASSERT(sb);
 #endif
 	return (sb->sb_ccc);
 }
 
 /*
  * How much space is there in a socket buffer (so->so_snd or so->so_rcv)?
  * This is problematical if the fields are unsigned, as the space might
  * still be negative (ccc > hiwat or mbcnt > mbmax).
  */
 static inline long
 sbspace(struct sockbuf *sb)
 {
 	int bleft, mleft;		/* size should match sockbuf fields */
 
 #if 0
 	SOCKBUF_LOCK_ASSERT(sb);
 #endif
 
 	if (sb->sb_flags & SB_STOP)
 		return(0);
 
 	bleft = sb->sb_hiwat - sb->sb_ccc;
 	mleft = sb->sb_mbmax - sb->sb_mbcnt;
 
 	return ((bleft < mleft) ? bleft : mleft);
 }
 
 #define SB_EMPTY_FIXUP(sb) do {						\
 	if ((sb)->sb_mb == NULL) {					\
 		(sb)->sb_mbtail = NULL;					\
 		(sb)->sb_lastrecord = NULL;				\
 	}								\
 } while (/*CONSTCOND*/0)
 
 #ifdef SOCKBUF_DEBUG
 void	sblastrecordchk(struct sockbuf *, const char *, int);
 void	sblastmbufchk(struct sockbuf *, const char *, int);
 void	sbcheck(struct sockbuf *, const char *, int);
 #define	SBLASTRECORDCHK(sb)	sblastrecordchk((sb), __FILE__, __LINE__)
 #define	SBLASTMBUFCHK(sb)	sblastmbufchk((sb), __FILE__, __LINE__)
 #define	SBCHECK(sb)		sbcheck((sb), __FILE__, __LINE__)
 #else
 #define	SBLASTRECORDCHK(sb)	do {} while (0)
 #define	SBLASTMBUFCHK(sb)	do {} while (0)
 #define	SBCHECK(sb)		do {} while (0)
 #endif /* SOCKBUF_DEBUG */
 
 #endif /* _KERNEL */
 
 #endif /* _SYS_SOCKBUF_H_ */