diff --git a/sys/compat/linux/linux_netlink.c b/sys/compat/linux/linux_netlink.c
index 807cdc7a14bc..af172fb27ba7 100644
--- a/sys/compat/linux/linux_netlink.c
+++ b/sys/compat/linux/linux_netlink.c
@@ -1,619 +1,604 @@
 /*-
  * SPDX-License-Identifier: BSD-2-Clause
  *
  * Copyright (c) 2022 Alexander V. Chernikov
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  */
 
 #include "opt_inet.h"
 #include "opt_inet6.h"
 
 #include <sys/types.h>
 #include <sys/ck.h>
 #include <sys/lock.h>
 #include <sys/malloc.h>
-#include <sys/rmlock.h>
 #include <sys/socket.h>
 #include <sys/vnode.h>
 
 #include <net/if.h>
 #include <net/if_dl.h>
 #include <net/route.h>
 #include <net/route/nhop.h>
 #include <net/route/route_ctl.h>
 #include <netlink/netlink.h>
 #include <netlink/netlink_ctl.h>
 #include <netlink/netlink_linux.h>
+#include <netlink/netlink_var.h>
 #include <netlink/netlink_route.h>
 
 #include <compat/linux/linux.h>
 #include <compat/linux/linux_common.h>
 #include <compat/linux/linux_util.h>
 
 #define	DEBUG_MOD_NAME	nl_linux
 #define	DEBUG_MAX_LEVEL	LOG_DEBUG3
 #include <netlink/netlink_debug.h>
 _DECLARE_DEBUG(LOG_INFO);
 
 static bool
 valid_rta_size(const struct rtattr *rta, int sz)
 {
 	return (NL_RTA_DATA_LEN(rta) == sz);
 }
 
 static bool
 valid_rta_u32(const struct rtattr *rta)
 {
 	return (valid_rta_size(rta, sizeof(uint32_t)));
 }
 
 static uint32_t
 _rta_get_uint32(const struct rtattr *rta)
 {
 	return (*((const uint32_t *)NL_RTA_DATA_CONST(rta)));
 }
 
 static struct nlmsghdr *
 rtnl_neigh_from_linux(struct nlmsghdr *hdr, struct nl_pstate *npt)
 {
 	struct ndmsg *ndm = (struct ndmsg *)(hdr + 1);
 
 	if (hdr->nlmsg_len >= sizeof(struct nlmsghdr) + sizeof(struct ndmsg))
 		ndm->ndm_family = linux_to_bsd_domain(ndm->ndm_family);
 
 	return (hdr);
 }
 
 static struct nlmsghdr *
 rtnl_ifaddr_from_linux(struct nlmsghdr *hdr, struct nl_pstate *npt)
 {
 	struct ifaddrmsg *ifam = (struct ifaddrmsg *)(hdr + 1);
 
 	if (hdr->nlmsg_len >= sizeof(struct nlmsghdr) + sizeof(struct ifaddrmsg))
 		ifam->ifa_family = linux_to_bsd_domain(ifam->ifa_family);
 
 	return (hdr);
 }
 
 static struct nlmsghdr *
 rtnl_route_from_linux(struct nlmsghdr *hdr, struct nl_pstate *npt)
 {
 	/* Tweak address families and default fib only */
 	struct rtmsg *rtm = (struct rtmsg *)(hdr + 1);
 	struct nlattr *nla, *nla_head;
 	int attrs_len;
 
 	rtm->rtm_family = linux_to_bsd_domain(rtm->rtm_family);
 
 	if (rtm->rtm_table == 254)
 		rtm->rtm_table = 0;
 
 	attrs_len = hdr->nlmsg_len - sizeof(struct nlmsghdr);
 	attrs_len -= NETLINK_ALIGN(sizeof(struct rtmsg));
 	nla_head = (struct nlattr *)((char *)rtm + NETLINK_ALIGN(sizeof(struct rtmsg)));
 
 	NLA_FOREACH(nla, nla_head, attrs_len) {
 		RT_LOG(LOG_DEBUG3, "GOT type %d len %d total %d",
 		    nla->nla_type, nla->nla_len, attrs_len);
 		struct rtattr *rta = (struct rtattr *)nla;
 		if (rta->rta_len < sizeof(struct rtattr)) {
 			break;
 		}
 		switch (rta->rta_type) {
 		case NL_RTA_TABLE:
 			if (!valid_rta_u32(rta))
 				goto done;
 			rtm->rtm_table = 0;
 			uint32_t fibnum = _rta_get_uint32(rta);
 			RT_LOG(LOG_DEBUG3, "GET RTABLE: %u", fibnum);
 			if (fibnum == 254) {
 				*((uint32_t *)NL_RTA_DATA(rta)) = 0;
 			}
 			break;
 		}
 	}
 
 done:
 	return (hdr);
 }
 
 static struct nlmsghdr *
 rtnl_from_linux(struct nlmsghdr *hdr, struct nl_pstate *npt)
 {
 	switch (hdr->nlmsg_type) {
 	case NL_RTM_GETROUTE:
 	case NL_RTM_NEWROUTE:
 	case NL_RTM_DELROUTE:
 		return (rtnl_route_from_linux(hdr, npt));
 	case NL_RTM_GETNEIGH:
 		return (rtnl_neigh_from_linux(hdr, npt));
 	case NL_RTM_GETADDR:
 		return (rtnl_ifaddr_from_linux(hdr, npt));
 	/* Silence warning for the messages where no translation is required */
 	case NL_RTM_NEWLINK:
 	case NL_RTM_DELLINK:
 	case NL_RTM_GETLINK:
 		break;
 	default:
 		RT_LOG(LOG_DEBUG, "Passing message type %d untranslated",
 		    hdr->nlmsg_type);
 	}
 
 	return (hdr);
 }
 
 static struct nlmsghdr *
 nlmsg_from_linux(int netlink_family, struct nlmsghdr *hdr,
     struct nl_pstate *npt)
 {
 	switch (netlink_family) {
 	case NETLINK_ROUTE:
 		return (rtnl_from_linux(hdr, npt));
 	}
 
 	return (hdr);
 }
 
 
 /************************************************************
  * Kernel -> Linux
  ************************************************************/
 
 static bool
 handle_default_out(struct nlmsghdr *hdr, struct nl_writer *nw)
 {
 	char *out_hdr;
 	out_hdr = nlmsg_reserve_data(nw, NLMSG_ALIGN(hdr->nlmsg_len), char);
 
 	if (out_hdr != NULL) {
 		memcpy(out_hdr, hdr, hdr->nlmsg_len);
+		nw->num_messages++;
 		return (true);
 	}
 	return (false);
 }
 
 static bool
 nlmsg_copy_header(struct nlmsghdr *hdr, struct nl_writer *nw)
 {
 	return (nlmsg_add(nw, hdr->nlmsg_pid, hdr->nlmsg_seq, hdr->nlmsg_type,
 	    hdr->nlmsg_flags, 0));
 }
 
 static void *
 _nlmsg_copy_next_header(struct nlmsghdr *hdr, struct nl_writer *nw, int sz)
 {
 	void *next_hdr = nlmsg_reserve_data(nw, sz, void);
 	memcpy(next_hdr, hdr + 1, NLMSG_ALIGN(sz));
 
 	return (next_hdr);
 }
 #define	nlmsg_copy_next_header(_hdr, _ns, _t)	\
 	((_t *)(_nlmsg_copy_next_header(_hdr, _ns, sizeof(_t))))
 
 static bool
 nlmsg_copy_nla(const struct nlattr *nla_orig, struct nl_writer *nw)
 {
 	struct nlattr *nla = nlmsg_reserve_data(nw, nla_orig->nla_len, struct nlattr);
 	if (nla != NULL) {
 		memcpy(nla, nla_orig, nla_orig->nla_len);
 		return (true);
 	}
 	return (false);
 }
 
 /*
  * Translate a FreeBSD interface name to a Linux interface name.
  */
 static bool
 nlmsg_translate_ifname_nla(struct nlattr *nla, struct nl_writer *nw)
 {
 	char ifname[LINUX_IFNAMSIZ];
 
 	if (ifname_bsd_to_linux_name((char *)(nla + 1), ifname,
 	    sizeof(ifname)) <= 0)
 		return (false);
 	return (nlattr_add_string(nw, IFLA_IFNAME, ifname));
 }
 
 #define	LINUX_NLA_UNHANDLED	-1
 /*
  * Translate a FreeBSD attribute to a Linux attribute.
  * Returns LINUX_NLA_UNHANDLED when the attribute is not processed
  * and the caller must take care of it, otherwise the result is returned.
  */
 static int
 nlmsg_translate_all_nla(struct nlmsghdr *hdr, struct nlattr *nla,
     struct nl_writer *nw)
 {
 
 	switch (hdr->nlmsg_type) {
 	case NL_RTM_NEWLINK:
 	case NL_RTM_DELLINK:
 	case NL_RTM_GETLINK:
 		switch (nla->nla_type) {
 		case IFLA_IFNAME:
 			return (nlmsg_translate_ifname_nla(nla, nw));
 		default:
 			break;
 		}
 	default:
 		break;
 	}
 	return (LINUX_NLA_UNHANDLED);
 }
 
 static bool
 nlmsg_copy_all_nla(struct nlmsghdr *hdr, int raw_hdrlen, struct nl_writer *nw)
 {
 	struct nlattr *nla;
 	int ret;
 
 	int hdrlen = NETLINK_ALIGN(raw_hdrlen);
 	int attrs_len = hdr->nlmsg_len - sizeof(struct nlmsghdr) - hdrlen;
 	struct nlattr *nla_head = (struct nlattr *)((char *)(hdr + 1) + hdrlen);
 
 	NLA_FOREACH(nla, nla_head, attrs_len) {
 		RT_LOG(LOG_DEBUG3, "reading attr %d len %d", nla->nla_type, nla->nla_len);
 		if (nla->nla_len < sizeof(struct nlattr)) {
 			return (false);
 		}
 		ret = nlmsg_translate_all_nla(hdr, nla, nw);
 		if (ret == LINUX_NLA_UNHANDLED)
 			ret = nlmsg_copy_nla(nla, nw);
 		if (!ret)
 			return (false);
 	}
 	return (true);
 }
 #undef LINUX_NLA_UNHANDLED
 
 static unsigned int
 rtnl_if_flags_to_linux(unsigned int if_flags)
 {
 	unsigned int result = 0;
 
 	for (int i = 0; i < 31; i++) {
 		unsigned int flag = 1 << i;
 		if (!(flag & if_flags))
 			continue;
 		switch (flag) {
 		case IFF_UP:
 		case IFF_BROADCAST:
 		case IFF_DEBUG:
 		case IFF_LOOPBACK:
 		case IFF_POINTOPOINT:
 		case IFF_DRV_RUNNING:
 		case IFF_NOARP:
 		case IFF_PROMISC:
 		case IFF_ALLMULTI:
 			result |= flag;
 			break;
 		case IFF_NEEDSEPOCH:
 		case IFF_DRV_OACTIVE:
 		case IFF_SIMPLEX:
 		case IFF_LINK0:
 		case IFF_LINK1:
 		case IFF_LINK2:
 		case IFF_CANTCONFIG:
 		case IFF_PPROMISC:
 		case IFF_MONITOR:
 		case IFF_STATICARP:
 		case IFF_STICKYARP:
 		case IFF_DYING:
 		case IFF_RENAMING:
 			/* No Linux analogue */
 			break;
 		case IFF_MULTICAST:
 			result |= 1 << 12;
 		}
 	}
 	return (result);
 }
 
 static bool
 rtnl_newlink_to_linux(struct nlmsghdr *hdr, struct nlpcb *nlp,
     struct nl_writer *nw)
 {
 	if (!nlmsg_copy_header(hdr, nw))
 		return (false);
 
 	struct ifinfomsg *ifinfo;
 	ifinfo = nlmsg_copy_next_header(hdr, nw, struct ifinfomsg);
 
 	ifinfo->ifi_family = bsd_to_linux_domain(ifinfo->ifi_family);
 	/* Convert interface type */
 	switch (ifinfo->ifi_type) {
 	case IFT_ETHER:
 		ifinfo->ifi_type = LINUX_ARPHRD_ETHER;
 		break;
 	}
 	ifinfo->ifi_flags = rtnl_if_flags_to_linux(ifinfo->ifi_flags);
 
 	/* Copy attributes unchanged */
 	if (!nlmsg_copy_all_nla(hdr, sizeof(struct ifinfomsg), nw))
 		return (false);
 
 	/* make ip(8) happy */
 	if (!nlattr_add_string(nw, IFLA_QDISC, "noqueue"))
 		return (false);
 
 	if (!nlattr_add_u32(nw, IFLA_TXQLEN, 1000))
 		return (false);
 
 	nlmsg_end(nw);
 	RT_LOG(LOG_DEBUG2, "done processing nw %p", nw);
 	return (true);
 }
 
 static bool
 rtnl_newaddr_to_linux(struct nlmsghdr *hdr, struct nlpcb *nlp,
     struct nl_writer *nw)
 {
 	if (!nlmsg_copy_header(hdr, nw))
 		return (false);
 
 	struct ifaddrmsg *ifamsg;
 	ifamsg = nlmsg_copy_next_header(hdr, nw, struct ifaddrmsg);
 
 	ifamsg->ifa_family = bsd_to_linux_domain(ifamsg->ifa_family);
 	/* XXX: fake ifa_flags? */
 
 	/* Copy attributes unchanged */
 	if (!nlmsg_copy_all_nla(hdr, sizeof(struct ifaddrmsg), nw))
 		return (false);
 
 	nlmsg_end(nw);
 	RT_LOG(LOG_DEBUG2, "done processing nw %p", nw);
 	return (true);
 }
 
 static bool
 rtnl_newneigh_to_linux(struct nlmsghdr *hdr, struct nlpcb *nlp,
     struct nl_writer *nw)
 {
 	if (!nlmsg_copy_header(hdr, nw))
 		return (false);
 
 	struct ndmsg *ndm;
 	ndm = nlmsg_copy_next_header(hdr, nw, struct ndmsg);
 
 	ndm->ndm_family = bsd_to_linux_domain(ndm->ndm_family);
 
 	/* Copy attributes unchanged */
 	if (!nlmsg_copy_all_nla(hdr, sizeof(struct ndmsg), nw))
 		return (false);
 
 	nlmsg_end(nw);
 	RT_LOG(LOG_DEBUG2, "done processing nw %p", nw);
 	return (true);
 }
 
 static bool
 rtnl_newroute_to_linux(struct nlmsghdr *hdr, struct nlpcb *nlp,
     struct nl_writer *nw)
 {
 	if (!nlmsg_copy_header(hdr, nw))
 		return (false);
 
 	struct rtmsg *rtm;
 	rtm = nlmsg_copy_next_header(hdr, nw, struct rtmsg);
 	rtm->rtm_family = bsd_to_linux_domain(rtm->rtm_family);
 
 	struct nlattr *nla;
 
 	int hdrlen = NETLINK_ALIGN(sizeof(struct rtmsg));
 	int attrs_len = hdr->nlmsg_len - sizeof(struct nlmsghdr) - hdrlen;
 	struct nlattr *nla_head = (struct nlattr *)((char *)(hdr + 1) + hdrlen);
 
 	NLA_FOREACH(nla, nla_head, attrs_len) {
 		struct rtattr *rta = (struct rtattr *)nla;
 		//RT_LOG(LOG_DEBUG, "READING attr %d len %d", nla->nla_type, nla->nla_len);
 		if (rta->rta_len < sizeof(struct rtattr)) {
 			break;
 		}
 
 		switch (rta->rta_type) {
 		case NL_RTA_TABLE:
 			{
 				uint32_t fibnum;
 				fibnum = _rta_get_uint32(rta);
 				if (fibnum == 0)
 					fibnum = 254;
 				RT_LOG(LOG_DEBUG3, "XFIBNUM %u", fibnum);
 				if (!nlattr_add_u32(nw, NL_RTA_TABLE, fibnum))
 					return (false);
 			}
 			break;
 		default:
 			if (!nlmsg_copy_nla(nla, nw))
 				return (false);
 			break;
 		}
 	}
 
 	nlmsg_end(nw);
 	RT_LOG(LOG_DEBUG2, "done processing nw %p", nw);
 	return (true);
 }
 
 static bool
 rtnl_to_linux(struct nlmsghdr *hdr, struct nlpcb *nlp, struct nl_writer *nw)
 {
 	RT_LOG(LOG_DEBUG2, "Got message type %d", hdr->nlmsg_type);
 
 	switch (hdr->nlmsg_type) {
 	case NL_RTM_NEWLINK:
 	case NL_RTM_DELLINK:
 	case NL_RTM_GETLINK:
 		return (rtnl_newlink_to_linux(hdr, nlp, nw));
 	case NL_RTM_NEWADDR:
 	case NL_RTM_DELADDR:
 		return (rtnl_newaddr_to_linux(hdr, nlp, nw));
 	case NL_RTM_NEWROUTE:
 	case NL_RTM_DELROUTE:
 		return (rtnl_newroute_to_linux(hdr, nlp, nw));
 	case NL_RTM_NEWNEIGH:
 	case NL_RTM_DELNEIGH:
 	case NL_RTM_GETNEIGH:
 		return (rtnl_newneigh_to_linux(hdr, nlp, nw));
 	default:
 		RT_LOG(LOG_DEBUG, "[WARN] Passing message type %d untranslated",
 		    hdr->nlmsg_type);
 		return (handle_default_out(hdr, nw));
 	}
 }
 
 static bool
 nlmsg_error_to_linux(struct nlmsghdr *hdr, struct nlpcb *nlp, struct nl_writer *nw)
 {
 	if (!nlmsg_copy_header(hdr, nw))
 		return (false);
 
 	struct nlmsgerr *nlerr;
 	nlerr = nlmsg_copy_next_header(hdr, nw, struct nlmsgerr);
 	nlerr->error = bsd_to_linux_errno(nlerr->error);
 
 	int copied_len = sizeof(struct nlmsghdr) + sizeof(struct nlmsgerr);
 	if (hdr->nlmsg_len == copied_len) {
 		nlmsg_end(nw);
 		return (true);
 	}
 
 	/*
 	 * CAP_ACK was not set. Original request needs to be translated.
 	 * XXX: implement translation of the original message
 	 */
 	RT_LOG(LOG_DEBUG, "[WARN] Passing ack message type %d untranslated",
 	    nlerr->msg.nlmsg_type);
 	char *dst_payload, *src_payload;
 	int copy_len = hdr->nlmsg_len - copied_len;
 	dst_payload = nlmsg_reserve_data(nw, NLMSG_ALIGN(copy_len), char);
 
 	src_payload = (char *)hdr + copied_len;
 
 	memcpy(dst_payload, src_payload, copy_len);
 	nlmsg_end(nw);
 
 	return (true);
 }
 
 static bool
-nlmsg_to_linux(int netlink_family, struct nlmsghdr *hdr, struct nlpcb *nlp,
-    struct nl_writer *nw)
+nlmsg_to_linux(struct nlmsghdr *hdr, struct nlpcb *nlp, struct nl_writer *nw)
 {
 	if (hdr->nlmsg_type < NLMSG_MIN_TYPE) {
 		switch (hdr->nlmsg_type) {
 		case NLMSG_ERROR:
 			return (nlmsg_error_to_linux(hdr, nlp, nw));
 		case NLMSG_NOOP:
 		case NLMSG_DONE:
 		case NLMSG_OVERRUN:
 			return (handle_default_out(hdr, nw));
 		default:
 			RT_LOG(LOG_DEBUG, "[WARN] Passing message type %d untranslated",
 			    hdr->nlmsg_type);
 			return (handle_default_out(hdr, nw));
 		}
 	}
 
-	switch (netlink_family) {
+	switch (nlp->nl_proto) {
 	case NETLINK_ROUTE:
 		return (rtnl_to_linux(hdr, nlp, nw));
 	default:
 		return (handle_default_out(hdr, nw));
 	}
 }
 
-static struct mbuf *
-nlmsgs_to_linux(int netlink_family, char *buf, int data_length, struct nlpcb *nlp)
+static bool
+nlmsgs_to_linux(struct nl_writer *nw, struct nlpcb *nlp)
 {
-	RT_LOG(LOG_DEBUG3, "LINUX: get %p size %d", buf, data_length);
-	struct nl_writer nw = {};
-
-	struct mbuf *m = NULL;
-	if (!nlmsg_get_chain_writer(&nw, data_length, &m)) {
-		RT_LOG(LOG_DEBUG, "unable to setup chain writer for size %d",
-		    data_length);
-		return (NULL);
-	}
+	struct nl_buf *nb, *orig;
+	u_int offset, msglen, orig_messages __diagused;
+
+	RT_LOG(LOG_DEBUG3, "%p: in %u bytes %u messages", __func__,
+	    nw->buf->datalen, nw->num_messages);
+
+	orig = nw->buf;
+	nb = nl_buf_alloc(orig->datalen + SCRATCH_BUFFER_SIZE, M_NOWAIT);
+	if (__predict_false(nb == NULL))
+		return (false);
+	nw->buf = nb;
+#ifdef INVARIANTS
+	orig_messages = nw->num_messages;
+#endif
+	nw->num_messages = 0;
 
 	/* Assume correct headers. Buffer IS mutable */
-	int count = 0;
-	for (int offset = 0; offset + sizeof(struct nlmsghdr) <= data_length;) {
-		struct nlmsghdr *hdr = (struct nlmsghdr *)&buf[offset];
-		int msglen = NLMSG_ALIGN(hdr->nlmsg_len);
-		count++;
+	for (offset = 0;
+	    offset + sizeof(struct nlmsghdr) <= orig->datalen;
+	    offset += msglen) {
+		struct nlmsghdr *hdr = (struct nlmsghdr *)&orig->data[offset];
 
-		if (!nlmsg_to_linux(netlink_family, hdr, nlp, &nw)) {
+		msglen = NLMSG_ALIGN(hdr->nlmsg_len);
+		if (!nlmsg_to_linux(hdr, nlp, nw)) {
 			RT_LOG(LOG_DEBUG, "failed to process msg type %d",
 			    hdr->nlmsg_type);
-			m_freem(m);
-			return (NULL);
+			nl_buf_free(nb);
+			return (false);
 		}
-		offset += msglen;
 	}
-	nlmsg_flush(&nw);
-	RT_LOG(LOG_DEBUG3, "Processed %d messages, chain size %d", count,
-	    m ? m_length(m, NULL) : 0);
 
-	return (m);
-}
+	MPASS(nw->num_messages == orig_messages);
+	MPASS(nw->buf == nb);
+	nl_buf_free(orig);
+	RT_LOG(LOG_DEBUG3, "%p: out %u bytes", __func__, offset);
 
-static struct mbuf *
-mbufs_to_linux(int netlink_family, struct mbuf *m, struct nlpcb *nlp)
-{
-	/* XXX: easiest solution, not optimized for performance */
-	int data_length = m_length(m, NULL);
-	char *buf = malloc(data_length, M_LINUX, M_NOWAIT);
-	if (buf == NULL) {
-		RT_LOG(LOG_DEBUG, "unable to allocate %d bytes, dropping message",
-		    data_length);
-		m_freem(m);
-		return (NULL);
-	}
-	m_copydata(m, 0, data_length, buf);
-	m_freem(m);
-
-	m = nlmsgs_to_linux(netlink_family, buf, data_length, nlp);
-	free(buf, M_LINUX);
-
-	return (m);
+	return (true);
 }
 
 static struct linux_netlink_provider linux_netlink_v1 = {
-	.mbufs_to_linux = mbufs_to_linux,
 	.msgs_to_linux = nlmsgs_to_linux,
 	.msg_from_linux = nlmsg_from_linux,
 };
 
 void
 linux_netlink_register(void)
 {
 	linux_netlink_p = &linux_netlink_v1;
 }
 
 void
 linux_netlink_deregister(void)
 {
 	linux_netlink_p = NULL;
 }
diff --git a/sys/netlink/ktest_netlink_message_writer.c b/sys/netlink/ktest_netlink_message_writer.c
index e46065dd4bd2..805f52197f69 100644
--- a/sys/netlink/ktest_netlink_message_writer.c
+++ b/sys/netlink/ktest_netlink_message_writer.c
@@ -1,167 +1,113 @@
 /*-
  * SPDX-License-Identifier: BSD-2-Clause
  *
  * Copyright (c) 2023 Alexander V. Chernikov
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  */
 
 #include <tests/ktest.h>
 #include <sys/cdefs.h>
 #include <sys/systm.h>
 #include <sys/malloc.h>
-#include <sys/mbuf.h>
 #include <netlink/netlink.h>
 #include <netlink/netlink_ctl.h>
+#include <netlink/netlink_var.h>
 #include <netlink/netlink_message_writer.h>
 
 #define KTEST_CALLER
 #include <netlink/ktest_netlink_message_writer.h>
 
 #ifdef INVARIANTS
 
-struct test_mbuf_attrs {
+struct test_nlbuf_attrs {
 	uint32_t	size;
 	uint32_t	expected_avail;
-	uint32_t	expected_count;
-	uint32_t	wtype;
 	int		waitok;
 };
 
-#define	_OUT(_field)	offsetof(struct test_mbuf_attrs, _field)
-static const struct nlattr_parser nla_p_mbuf_w[] = {
+#define	_OUT(_field)	offsetof(struct test_nlbuf_attrs, _field)
+static const struct nlattr_parser nla_p_nlbuf_w[] = {
 	{ .type = 1, .off = _OUT(size), .cb = nlattr_get_uint32 },
 	{ .type = 2, .off = _OUT(expected_avail), .cb = nlattr_get_uint32 },
-	{ .type = 3, .off = _OUT(expected_count), .cb = nlattr_get_uint32 },
-	{ .type = 4, .off = _OUT(wtype), .cb = nlattr_get_uint32 },
-	{ .type = 5, .off = _OUT(waitok), .cb = nlattr_get_uint32 },
+	{ .type = 3, .off = _OUT(waitok), .cb = nlattr_get_uint32 },
 };
 #undef _OUT
-NL_DECLARE_ATTR_PARSER(mbuf_w_parser, nla_p_mbuf_w);
+NL_DECLARE_ATTR_PARSER(nlbuf_w_parser, nla_p_nlbuf_w);
 
 static int
-test_mbuf_parser(struct ktest_test_context *ctx, struct nlattr *nla)
+test_nlbuf_parser(struct ktest_test_context *ctx, struct nlattr *nla)
 {
-	struct test_mbuf_attrs *attrs = npt_alloc(ctx->npt, sizeof(*attrs));
+	struct test_nlbuf_attrs *attrs = npt_alloc(ctx->npt, sizeof(*attrs));
 
 	ctx->arg = attrs;
 	if (attrs != NULL)
-		return (nl_parse_nested(nla, &mbuf_w_parser, ctx->npt, attrs));
+		return (nl_parse_nested(nla, &nlbuf_w_parser, ctx->npt, attrs));
 	return (ENOMEM);
 }
 
 static int
-test_mbuf_writer_allocation(struct ktest_test_context *ctx)
+test_nlbuf_writer_allocation(struct ktest_test_context *ctx)
 {
-	struct test_mbuf_attrs *attrs = ctx->arg;
-	bool ret;
+	struct test_nlbuf_attrs *attrs = ctx->arg;
 	struct nl_writer nw = {};
+	u_int alloc_len;
+	bool ret;
 
-	ret = nlmsg_get_buf_type_wrapper(&nw, attrs->size, attrs->wtype, attrs->waitok);
+	ret = nlmsg_get_buf_wrapper(&nw, attrs->size, attrs->waitok);
 	if (!ret)
 		return (EINVAL);
 
-	int alloc_len = nw.alloc_len;
+	alloc_len = nw.buf->buflen;
 	KTEST_LOG(ctx, "requested %u, allocated %d", attrs->size, alloc_len);
 
-	/* Set cleanup callback */
-	nw.writer_target = NS_WRITER_TARGET_SOCKET;
-	nlmsg_set_callback_wrapper(&nw);
-
 	/* Mark enomem to avoid reallocation */
 	nw.enomem = true;
 
 	if (nlmsg_reserve_data(&nw, alloc_len, void *) == NULL) {
 		KTEST_LOG(ctx, "unable to get %d bytes from the writer", alloc_len);
 		return (EINVAL);
 	}
 
-	/* Mark as empty to free the storage */
-	nw.offset = 0;
-	nlmsg_flush(&nw);
+	nl_buf_free(nw.buf);
 
 	if (alloc_len < attrs->expected_avail) {
 		KTEST_LOG(ctx, "alloc_len %d, expected %u",
 		    alloc_len, attrs->expected_avail);
 		return (EINVAL);
 	}
 
 	return (0);
 }
-
-static int
-test_mbuf_chain_allocation(struct ktest_test_context *ctx)
-{
-	struct test_mbuf_attrs *attrs = ctx->arg;
-	int mflags = attrs->waitok ? M_WAITOK : M_NOWAIT;
-	struct mbuf *chain = nl_get_mbuf_chain_wrapper(attrs->size, mflags);
-
-	if (chain == NULL) {
-		KTEST_LOG(ctx, "nl_get_mbuf_chain(%u) returned NULL", attrs->size);
-		return (EINVAL);
-	}
-
-	/* Iterate and check number of mbufs and space */
-	uint32_t allocated_count = 0, allocated_size = 0;
-	for (struct mbuf *m = chain; m != NULL; m = m->m_next) {
-		allocated_count++;
-		allocated_size += M_SIZE(m);
-	}
-	m_freem(chain);
-
-	if (attrs->expected_avail > allocated_size) {
-		KTEST_LOG(ctx, "expected/allocated avail(bytes) %u/%u"
-				" expected/allocated count %u/%u",
-		    attrs->expected_avail, allocated_size,
-		    attrs->expected_count, allocated_count);
-		return (EINVAL);
-	}
-
-	if (attrs->expected_count > 0 && (attrs->expected_count != allocated_count)) {
-		KTEST_LOG(ctx, "expected/allocated avail(bytes) %u/%u"
-				" expected/allocated count %u/%u",
-		    attrs->expected_avail, allocated_size,
-		    attrs->expected_count, allocated_count);
-		return (EINVAL);
-	}
-
-	return (0);
-}
 #endif
 
 static const struct ktest_test_info tests[] = {
 #ifdef INVARIANTS
 	{
-		.name = "test_mbuf_writer_allocation",
-		.desc = "test different mbuf sizes in the mbuf writer",
-		.func = &test_mbuf_writer_allocation,
-		.parse = &test_mbuf_parser,
-	},
-	{
-		.name = "test_mbuf_chain_allocation",
-		.desc = "verify allocation different chain sizes",
-		.func = &test_mbuf_chain_allocation,
-		.parse = &test_mbuf_parser,
+		.name = "test_nlbuf_writer_allocation",
+		.desc = "test different buffer sizes in the netlink writer",
+		.func = &test_nlbuf_writer_allocation,
+		.parse = &test_nlbuf_parser,
 	},
 #endif
 };
 KTEST_MODULE_DECLARE(ktest_netlink_message_writer, tests);
diff --git a/sys/netlink/ktest_netlink_message_writer.h b/sys/netlink/ktest_netlink_message_writer.h
index b7864bea59c9..39d2c5e597d6 100644
--- a/sys/netlink/ktest_netlink_message_writer.h
+++ b/sys/netlink/ktest_netlink_message_writer.h
@@ -1,60 +1,46 @@
 /*-
  * SPDX-License-Identifier: BSD-2-Clause
  *
  * Copyright (c) 2023 Alexander V. Chernikov <melifaro@FreeBSD.org>
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  */
 
 #ifndef _NETLINK_KTEST_NETLINK_MESSAGE_WRITER_H_
 #define _NETLINK_KTEST_NETLINK_MESSAGE_WRITER_H_
 
 #if defined(_KERNEL) && defined(INVARIANTS)
 
-bool nlmsg_get_buf_type_wrapper(struct nl_writer *nw, int size, int type, bool waitok);
-void nlmsg_set_callback_wrapper(struct nl_writer *nw);
-struct mbuf *nl_get_mbuf_chain_wrapper(int len, int malloc_flags);
+bool nlmsg_get_buf_wrapper(struct nl_writer *nw, u_int size, bool waitok);
 
 #ifndef KTEST_CALLER
 
 bool
-nlmsg_get_buf_type_wrapper(struct nl_writer *nw, int size, int type, bool waitok)
+nlmsg_get_buf_wrapper(struct nl_writer *nw, u_int size, bool waitok)
 {
-	return (nlmsg_get_buf_type(nw, size, type, waitok));
-}
-
-void
-nlmsg_set_callback_wrapper(struct nl_writer *nw)
-{
-	nlmsg_set_callback(nw);
-}
-
-struct mbuf *
-nl_get_mbuf_chain_wrapper(int len, int malloc_flags)
-{
-	return (nl_get_mbuf_chain(len, malloc_flags));
+	return (nlmsg_get_buf(nw, size, waitok));
 }
 #endif
 
 #endif
 
 #endif
diff --git a/sys/netlink/netlink_domain.c b/sys/netlink/netlink_domain.c
index ecd110d62c1f..3914d402fc04 100644
--- a/sys/netlink/netlink_domain.c
+++ b/sys/netlink/netlink_domain.c
@@ -1,833 +1,936 @@
 /*-
  * SPDX-License-Identifier: BSD-2-Clause
  *
  * Copyright (c) 2021 Ng Peng Nam Sean
  * Copyright (c) 2022 Alexander V. Chernikov <melifaro@FreeBSD.org>
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  */
 
 /*
  * This file contains socket and protocol bindings for netlink.
  */
 
 #include <sys/param.h>
 #include <sys/kernel.h>
 #include <sys/malloc.h>
 #include <sys/lock.h>
 #include <sys/rmlock.h>
 #include <sys/domain.h>
 #include <sys/jail.h>
 #include <sys/mbuf.h>
 #include <sys/osd.h>
 #include <sys/protosw.h>
 #include <sys/proc.h>
 #include <sys/ck.h>
 #include <sys/socket.h>
 #include <sys/socketvar.h>
 #include <sys/sysent.h>
 #include <sys/syslog.h>
 #include <sys/priv.h> /* priv_check */
 #include <sys/uio.h>
 
 #include <netlink/netlink.h>
 #include <netlink/netlink_ctl.h>
 #include <netlink/netlink_var.h>
 
 #define	DEBUG_MOD_NAME	nl_domain
 #define	DEBUG_MAX_LEVEL	LOG_DEBUG3
 #include <netlink/netlink_debug.h>
 _DECLARE_DEBUG(LOG_INFO);
 
 _Static_assert((NLP_MAX_GROUPS % 64) == 0,
     "NLP_MAX_GROUPS has to be multiple of 64");
 _Static_assert(NLP_MAX_GROUPS >= 64,
     "NLP_MAX_GROUPS has to be at least 64");
 
 #define	NLCTL_TRACKER		struct rm_priotracker nl_tracker
 #define	NLCTL_RLOCK(_ctl)	rm_rlock(&((_ctl)->ctl_lock), &nl_tracker)
 #define	NLCTL_RUNLOCK(_ctl)	rm_runlock(&((_ctl)->ctl_lock), &nl_tracker)
 
 #define	NLCTL_WLOCK(_ctl)	rm_wlock(&((_ctl)->ctl_lock))
 #define	NLCTL_WUNLOCK(_ctl)	rm_wunlock(&((_ctl)->ctl_lock))
 
 static u_long nl_sendspace = NLSNDQ;
 SYSCTL_ULONG(_net_netlink, OID_AUTO, sendspace, CTLFLAG_RW, &nl_sendspace, 0,
     "Default netlink socket send space");
 
 static u_long nl_recvspace = NLSNDQ;
 SYSCTL_ULONG(_net_netlink, OID_AUTO, recvspace, CTLFLAG_RW, &nl_recvspace, 0,
     "Default netlink socket receive space");
 
 extern u_long sb_max_adj;
 static u_long nl_maxsockbuf = 512 * 1024 * 1024; /* 512M, XXX: init based on physmem */
 static int sysctl_handle_nl_maxsockbuf(SYSCTL_HANDLER_ARGS);
 SYSCTL_OID(_net_netlink, OID_AUTO, nl_maxsockbuf,
     CTLTYPE_ULONG | CTLFLAG_RW | CTLFLAG_MPSAFE, &nl_maxsockbuf, 0,
     sysctl_handle_nl_maxsockbuf, "LU",
     "Maximum Netlink socket buffer size");
 
 
 static unsigned int osd_slot_id = 0;
 
 void
 nl_osd_register(void)
 {
 	osd_slot_id = osd_register(OSD_THREAD, NULL, NULL);
 }
 
 void
 nl_osd_unregister(void)
 {
 	osd_deregister(OSD_THREAD, osd_slot_id);
 }
 
 struct nlpcb *
 _nl_get_thread_nlp(struct thread *td)
 {
 	return (osd_get(OSD_THREAD, &td->td_osd, osd_slot_id));
 }
 
 void
 nl_set_thread_nlp(struct thread *td, struct nlpcb *nlp)
 {
 	NLP_LOG(LOG_DEBUG2, nlp, "Set thread %p nlp to %p (slot %u)", td, nlp, osd_slot_id);
 	if (osd_set(OSD_THREAD, &td->td_osd, osd_slot_id, nlp) == 0)
 		return;
 	/* Failed, need to realloc */
 	void **rsv = osd_reserve(osd_slot_id);
 	osd_set_reserved(OSD_THREAD, &td->td_osd, osd_slot_id, rsv, nlp);
 }
 
 /*
  * Looks up a nlpcb struct based on the @portid. Need to claim nlsock_mtx.
  * Returns nlpcb pointer if present else NULL
  */
 static struct nlpcb *
 nl_port_lookup(uint32_t port_id)
 {
 	struct nlpcb *nlp;
 
 	CK_LIST_FOREACH(nlp, &V_nl_ctl->ctl_port_head, nl_port_next) {
 		if (nlp->nl_port == port_id)
 			return (nlp);
 	}
 	return (NULL);
 }
 
 static void
 nl_add_group_locked(struct nlpcb *nlp, unsigned int group_id)
 {
 	MPASS(group_id <= NLP_MAX_GROUPS);
 	--group_id;
 
 	/* TODO: add family handler callback */
 	if (!nlp_unconstrained_vnet(nlp))
 		return;
 
 	nlp->nl_groups[group_id / 64] |= (uint64_t)1 << (group_id % 64);
 }
 
 static void
 nl_del_group_locked(struct nlpcb *nlp, unsigned int group_id)
 {
 	MPASS(group_id <= NLP_MAX_GROUPS);
 	--group_id;
 
 	nlp->nl_groups[group_id / 64] &= ~((uint64_t)1 << (group_id % 64));
 }
 
 static bool
 nl_isset_group_locked(struct nlpcb *nlp, unsigned int group_id)
 {
 	MPASS(group_id <= NLP_MAX_GROUPS);
 	--group_id;
 
 	return (nlp->nl_groups[group_id / 64] & ((uint64_t)1 << (group_id % 64)));
 }
 
 static uint32_t
 nl_get_groups_compat(struct nlpcb *nlp)
 {
 	uint32_t groups_mask = 0;
 
 	for (int i = 0; i < 32; i++) {
 		if (nl_isset_group_locked(nlp, i + 1))
 			groups_mask |= (1 << i);
 	}
 
 	return (groups_mask);
 }
 
 static void
-nl_send_one_group(struct mbuf *m, struct nlpcb *nlp, int num_messages,
-    int io_flags)
+nl_send_one_group(struct nl_writer *nw, struct nl_buf *nb, struct nlpcb *nlp)
 {
 	if (__predict_false(nlp->nl_flags & NLF_MSG_INFO))
-		nl_add_msg_info(m);
-	nl_send_one(m, nlp, num_messages, io_flags);
+		nl_add_msg_info(nb);
+	nw->buf = nb;
+	(void)nl_send_one(nw);
+}
+
+static struct nl_buf *
+nl_buf_copy(struct nl_buf *nb)
+{
+	struct nl_buf *copy;
+
+	copy = nl_buf_alloc(nb->buflen, M_NOWAIT);
+	if (__predict_false(copy == NULL))
+		return (NULL);
+	memcpy(copy, nb, sizeof(*nb) + nb->buflen);
+	if (nb->control != NULL) {
+		copy->control = m_copym(nb->control, 0, M_COPYALL, M_NOWAIT);
+		if (__predict_false(copy->control == NULL)) {
+			nl_buf_free(copy);
+			return (NULL);
+		}
+	}
+
+	return (copy);
 }
 
 /*
- * Broadcasts message @m to the protocol @proto group specified by @group_id
+ * Broadcasts in the writer's buffer.
  */
-void
-nl_send_group(struct mbuf *m, int num_messages, int proto, int group_id)
+bool
+nl_send_group(struct nl_writer *nw)
 {
+	struct nl_buf *nb = nw->buf;
 	struct nlpcb *nlp_last = NULL;
 	struct nlpcb *nlp;
 	NLCTL_TRACKER;
 
 	IF_DEBUG_LEVEL(LOG_DEBUG2) {
-		struct nlmsghdr *hdr = mtod(m, struct nlmsghdr *);
-		NL_LOG(LOG_DEBUG2, "MCAST mbuf len %u msg type %d len %u to group %d/%d",
-		    m->m_len, hdr->nlmsg_type, hdr->nlmsg_len, proto, group_id);
+		struct nlmsghdr *hdr = (struct nlmsghdr *)nb->data;
+		NL_LOG(LOG_DEBUG2, "MCAST len %u msg type %d len %u to group %d/%d",
+		    nb->datalen, hdr->nlmsg_type, hdr->nlmsg_len,
+		    nw->group.proto, nw->group.id);
 	}
 
+	nw->buf = NULL;
+
 	struct nl_control *ctl = atomic_load_ptr(&V_nl_ctl);
 	if (__predict_false(ctl == NULL)) {
 		/*
 		 * Can be the case when notification is sent within VNET
 		 * which doesn't have any netlink sockets.
 		 */
-		m_freem(m);
-		return;
+		nl_buf_free(nb);
+		return (false);
 	}
 
 	NLCTL_RLOCK(ctl);
 
-	int io_flags = NL_IOF_UNTRANSLATED;
-
 	CK_LIST_FOREACH(nlp, &ctl->ctl_pcb_head, nl_next) {
-		if (nl_isset_group_locked(nlp, group_id) && nlp->nl_proto == proto) {
+		if (nl_isset_group_locked(nlp, nw->group.id) &&
+		    nlp->nl_proto == nw->group.proto) {
 			if (nlp_last != NULL) {
-				struct mbuf *m_copy;
-				m_copy = m_copym(m, 0, M_COPYALL, M_NOWAIT);
-				if (m_copy != NULL)
-					nl_send_one_group(m_copy, nlp_last,
-					    num_messages, io_flags);
-				else {
+				struct nl_buf *copy;
+
+				copy = nl_buf_copy(nb);
+				if (copy != NULL) {
+					nl_send_one_group(nw, copy, nlp_last);
+				} else {
 					NLP_LOCK(nlp_last);
 					if (nlp_last->nl_socket != NULL)
 						sorwakeup(nlp_last->nl_socket);
 					NLP_UNLOCK(nlp_last);
 				}
 			}
 			nlp_last = nlp;
 		}
 	}
 	if (nlp_last != NULL)
-		nl_send_one_group(m, nlp_last, num_messages, io_flags);
+		nl_send_one_group(nw, nb, nlp_last);
 	else
-		m_freem(m);
+		nl_buf_free(nb);
 
 	NLCTL_RUNLOCK(ctl);
+
+	return (true);
 }
 
 bool
 nl_has_listeners(int netlink_family, uint32_t groups_mask)
 {
 	return (V_nl_ctl != NULL);
 }
 
 static uint32_t
 nl_find_port(void)
 {
 	/*
 	 * app can open multiple netlink sockets.
 	 * Start with current pid, if already taken,
 	 * try random numbers in 65k..256k+65k space,
 	 * avoiding clash with pids.
 	 */
 	if (nl_port_lookup(curproc->p_pid) == NULL)
 		return (curproc->p_pid);
 	for (int i = 0; i < 16; i++) {
 		uint32_t nl_port = (arc4random() % 65536) + 65536 * 4;
 		if (nl_port_lookup(nl_port) == 0)
 			return (nl_port);
 		NL_LOG(LOG_DEBUG3, "tried %u\n", nl_port);
 	}
 	return (curproc->p_pid);
 }
 
 static int
 nl_bind_locked(struct nlpcb *nlp, struct sockaddr_nl *snl)
 {
 	if (nlp->nl_bound) {
 		if (nlp->nl_port != snl->nl_pid) {
 			NL_LOG(LOG_DEBUG,
 			    "bind() failed: program pid %d "
 			    "is different from provided pid %d",
 			    nlp->nl_port, snl->nl_pid);
 			return (EINVAL); // XXX: better error
 		}
 	} else {
 		if (snl->nl_pid == 0)
 			snl->nl_pid = nl_find_port();
 		if (nl_port_lookup(snl->nl_pid) != NULL)
 			return (EADDRINUSE);
 		nlp->nl_port = snl->nl_pid;
 		nlp->nl_bound = true;
 		CK_LIST_INSERT_HEAD(&V_nl_ctl->ctl_port_head, nlp, nl_port_next);
 	}
 	for (int i = 0; i < 32; i++) {
 		if (snl->nl_groups & ((uint32_t)1 << i))
 			nl_add_group_locked(nlp, i + 1);
 		else
 			nl_del_group_locked(nlp, i + 1);
 	}
 
 	return (0);
 }
 
 static int
 nl_pru_attach(struct socket *so, int proto, struct thread *td)
 {
 	struct nlpcb *nlp;
 	int error;
 
 	if (__predict_false(netlink_unloading != 0))
 		return (EAFNOSUPPORT);
 
 	error = nl_verify_proto(proto);
 	if (error != 0)
 		return (error);
 
 	bool is_linux = SV_PROC_ABI(td->td_proc) == SV_ABI_LINUX;
 	NL_LOG(LOG_DEBUG2, "socket %p, %sPID %d: attaching socket to %s",
 	    so, is_linux ? "(linux) " : "", curproc->p_pid,
 	    nl_get_proto_name(proto));
 
 	/* Create per-VNET state on first socket init */
 	struct nl_control *ctl = atomic_load_ptr(&V_nl_ctl);
 	if (ctl == NULL)
 		ctl = vnet_nl_ctl_init();
 	KASSERT(V_nl_ctl != NULL, ("nl_attach: vnet_sock_init() failed"));
 
 	MPASS(sotonlpcb(so) == NULL);
 
 	nlp = malloc(sizeof(struct nlpcb), M_PCB, M_WAITOK | M_ZERO);
 	error = soreserve(so, nl_sendspace, nl_recvspace);
 	if (error != 0) {
 		free(nlp, M_PCB);
 		return (error);
 	}
-	so->so_rcv.sb_mtx = &so->so_rcv_mtx;
+	TAILQ_INIT(&so->so_rcv.nl_queue);
 	TAILQ_INIT(&so->so_snd.nl_queue);
 	so->so_pcb = nlp;
 	nlp->nl_socket = so;
 	/* Copy so_cred to avoid having socket_var.h in every header */
 	nlp->nl_cred = so->so_cred;
 	nlp->nl_proto = proto;
 	nlp->nl_process_id = curproc->p_pid;
 	nlp->nl_linux = is_linux;
 	nlp->nl_unconstrained_vnet = !jailed_without_vnet(so->so_cred);
 	nlp->nl_need_thread_setup = true;
 	NLP_LOCK_INIT(nlp);
 	refcount_init(&nlp->nl_refcount, 1);
-	nl_init_io(nlp);
 
 	nlp->nl_taskqueue = taskqueue_create("netlink_socket", M_WAITOK,
 	    taskqueue_thread_enqueue, &nlp->nl_taskqueue);
 	TASK_INIT(&nlp->nl_task, 0, nl_taskqueue_handler, nlp);
 	taskqueue_start_threads(&nlp->nl_taskqueue, 1, PWAIT,
 	    "netlink_socket (PID %u)", nlp->nl_process_id);
 
 	NLCTL_WLOCK(ctl);
 	/* XXX: check ctl is still alive */
 	CK_LIST_INSERT_HEAD(&ctl->ctl_pcb_head, nlp, nl_next);
 	NLCTL_WUNLOCK(ctl);
 
 	soisconnected(so);
 
 	return (0);
 }
 
 static int
 nl_pru_bind(struct socket *so, struct sockaddr *sa, struct thread *td)
 {
 	struct nl_control *ctl = atomic_load_ptr(&V_nl_ctl);
 	struct nlpcb *nlp = sotonlpcb(so);
 	struct sockaddr_nl *snl = (struct sockaddr_nl *)sa;
 	int error;
 
 	NL_LOG(LOG_DEBUG3, "socket %p, PID %d", so, curproc->p_pid);
 	if (snl->nl_len != sizeof(*snl)) {
 		NL_LOG(LOG_DEBUG, "socket %p, wrong sizeof(), ignoring bind()", so);
 		return (EINVAL);
 	}
 
 
 	NLCTL_WLOCK(ctl);
 	NLP_LOCK(nlp);
 	error = nl_bind_locked(nlp, snl);
 	NLP_UNLOCK(nlp);
 	NLCTL_WUNLOCK(ctl);
 	NL_LOG(LOG_DEBUG2, "socket %p, bind() to %u, groups %u, error %d", so,
 	    snl->nl_pid, snl->nl_groups, error);
 
 	return (error);
 }
 
 
 static int
 nl_assign_port(struct nlpcb *nlp, uint32_t port_id)
 {
 	struct nl_control *ctl = atomic_load_ptr(&V_nl_ctl);
 	struct sockaddr_nl snl = {
 		.nl_pid = port_id,
 	};
 	int error;
 
 	NLCTL_WLOCK(ctl);
 	NLP_LOCK(nlp);
 	snl.nl_groups = nl_get_groups_compat(nlp);
 	error = nl_bind_locked(nlp, &snl);
 	NLP_UNLOCK(nlp);
 	NLCTL_WUNLOCK(ctl);
 
 	NL_LOG(LOG_DEBUG3, "socket %p, port assign: %d, error: %d", nlp->nl_socket, port_id, error);
 	return (error);
 }
 
 /*
  * nl_autobind_port binds a unused portid to @nlp
  * @nlp: pcb data for the netlink socket
  * @candidate_id: first id to consider
  */
 static int
 nl_autobind_port(struct nlpcb *nlp, uint32_t candidate_id)
 {
 	struct nl_control *ctl = atomic_load_ptr(&V_nl_ctl);
 	uint32_t port_id = candidate_id;
 	NLCTL_TRACKER;
 	bool exist;
 	int error = EADDRINUSE;
 
 	for (int i = 0; i < 10; i++) {
 		NL_LOG(LOG_DEBUG3, "socket %p, trying to assign port %d", nlp->nl_socket, port_id);
 		NLCTL_RLOCK(ctl);
 		exist = nl_port_lookup(port_id) != 0;
 		NLCTL_RUNLOCK(ctl);
 		if (!exist) {
 			error = nl_assign_port(nlp, port_id);
 			if (error != EADDRINUSE)
 				break;
 		}
 		port_id++;
 	}
 	NL_LOG(LOG_DEBUG3, "socket %p, autobind to %d, error: %d", nlp->nl_socket, port_id, error);
 	return (error);
 }
 
 static int
 nl_pru_connect(struct socket *so, struct sockaddr *sa, struct thread *td)
 {
 	struct sockaddr_nl *snl = (struct sockaddr_nl *)sa;
 	struct nlpcb *nlp;
 
 	NL_LOG(LOG_DEBUG3, "socket %p, PID %d", so, curproc->p_pid);
 	if (snl->nl_len != sizeof(*snl)) {
 		NL_LOG(LOG_DEBUG, "socket %p, wrong sizeof(), ignoring bind()", so);
 		return (EINVAL);
 	}
 
 	nlp = sotonlpcb(so);
 	if (!nlp->nl_bound) {
 		int error = nl_autobind_port(nlp, td->td_proc->p_pid);
 		if (error != 0) {
 			NL_LOG(LOG_DEBUG, "socket %p, nl_autobind() failed: %d", so, error);
 			return (error);
 		}
 	}
 	/* XXX: Handle socket flags & multicast */
 	soisconnected(so);
 
 	NL_LOG(LOG_DEBUG2, "socket %p, connect to %u", so, snl->nl_pid);
 
 	return (0);
 }
 
-static void
-destroy_nlpcb(struct nlpcb *nlp)
-{
-	NLP_LOCK(nlp);
-	nl_free_io(nlp);
-	NLP_LOCK_DESTROY(nlp);
-	free(nlp, M_PCB);
-}
-
 static void
 destroy_nlpcb_epoch(epoch_context_t ctx)
 {
 	struct nlpcb *nlp;
 
 	nlp = __containerof(ctx, struct nlpcb, nl_epoch_ctx);
 
-	destroy_nlpcb(nlp);
+	NLP_LOCK_DESTROY(nlp);
+	free(nlp, M_PCB);
 }
 
-
 static void
 nl_close(struct socket *so)
 {
 	struct nl_control *ctl = atomic_load_ptr(&V_nl_ctl);
 	MPASS(sotonlpcb(so) != NULL);
 	struct nlpcb *nlp;
 	struct nl_buf *nb;
 
 	NL_LOG(LOG_DEBUG2, "detaching socket %p, PID %d", so, curproc->p_pid);
 	nlp = sotonlpcb(so);
 
 	/* Mark as inactive so no new work can be enqueued */
 	NLP_LOCK(nlp);
 	bool was_bound = nlp->nl_bound;
 	NLP_UNLOCK(nlp);
 
 	/* Wait till all scheduled work has been completed  */
 	taskqueue_drain_all(nlp->nl_taskqueue);
 	taskqueue_free(nlp->nl_taskqueue);
 
 	NLCTL_WLOCK(ctl);
 	NLP_LOCK(nlp);
 	if (was_bound) {
 		CK_LIST_REMOVE(nlp, nl_port_next);
 		NL_LOG(LOG_DEBUG3, "socket %p, unlinking bound pid %u", so, nlp->nl_port);
 	}
 	CK_LIST_REMOVE(nlp, nl_next);
 	nlp->nl_socket = NULL;
 	NLP_UNLOCK(nlp);
 	NLCTL_WUNLOCK(ctl);
 
 	so->so_pcb = NULL;
 
 	while ((nb = TAILQ_FIRST(&so->so_snd.nl_queue)) != NULL) {
 		TAILQ_REMOVE(&so->so_snd.nl_queue, nb, tailq);
-		free(nb, M_NETLINK);
+		nl_buf_free(nb);
+	}
+	while ((nb = TAILQ_FIRST(&so->so_rcv.nl_queue)) != NULL) {
+		TAILQ_REMOVE(&so->so_rcv.nl_queue, nb, tailq);
+		nl_buf_free(nb);
 	}
-	sbdestroy(so, SO_RCV);
 
 	NL_LOG(LOG_DEBUG3, "socket %p, detached", so);
 
 	/* XXX: is delayed free needed? */
 	NET_EPOCH_CALL(destroy_nlpcb_epoch, &nlp->nl_epoch_ctx);
 }
 
 static int
 nl_pru_disconnect(struct socket *so)
 {
 	NL_LOG(LOG_DEBUG3, "socket %p, PID %d", so, curproc->p_pid);
 	MPASS(sotonlpcb(so) != NULL);
 	return (ENOTCONN);
 }
 
 static int
 nl_pru_shutdown(struct socket *so)
 {
 	NL_LOG(LOG_DEBUG3, "socket %p, PID %d", so, curproc->p_pid);
 	MPASS(sotonlpcb(so) != NULL);
 	socantsendmore(so);
 	return (0);
 }
 
 static int
 nl_sockaddr(struct socket *so, struct sockaddr *sa)
 {
 
 	*(struct sockaddr_nl *)sa = (struct sockaddr_nl ){
 		/* TODO: set other fields */
 		.nl_len = sizeof(struct sockaddr_nl),
 		.nl_family = AF_NETLINK,
 		.nl_pid = sotonlpcb(so)->nl_port,
 	};
 
 	return (0);
 }
 
 static int
 nl_sosend(struct socket *so, struct sockaddr *addr, struct uio *uio,
     struct mbuf *m, struct mbuf *control, int flags, struct thread *td)
 {
 	struct nlpcb *nlp = sotonlpcb(so);
 	struct sockbuf *sb = &so->so_snd;
 	struct nl_buf *nb;
 	u_int len;
 	int error;
 
 	MPASS(m == NULL && uio != NULL);
 
         NL_LOG(LOG_DEBUG2, "sending message to kernel");
 
 	if (__predict_false(control != NULL)) {
 		m_freem(control);
 		return (EINVAL);
 	}
 
 	if (__predict_false(flags & MSG_OOB))	/* XXXGL: or just ignore? */
 		return (EOPNOTSUPP);
 
 	if (__predict_false(uio->uio_resid < sizeof(struct nlmsghdr)))
 		return (ENOBUFS);		/* XXXGL: any better error? */
 
 	NL_LOG(LOG_DEBUG3, "sending message to kernel async processing");
 
 	error = SOCK_IO_SEND_LOCK(so, SBLOCKWAIT(flags));
 	if (error)
 		return (error);
 
 	len = roundup2(uio->uio_resid, 8) + SCRATCH_BUFFER_SIZE;
 	if (nlp->nl_linux)
 		len += roundup2(uio->uio_resid, 8);
-	nb = malloc(sizeof(*nb) + len, M_NETLINK, M_WAITOK);
+	nb = nl_buf_alloc(len, M_WAITOK);
 	nb->datalen = uio->uio_resid;
-	nb->buflen = len;
-	nb->offset = 0;
 	error = uiomove(&nb->data[0], uio->uio_resid, uio);
 	if (__predict_false(error))
 		goto out;
 
 	SOCK_SENDBUF_LOCK(so);
 restart:
 	if (sb->sb_hiwat - sb->sb_ccc >= nb->datalen) {
 		TAILQ_INSERT_TAIL(&sb->nl_queue, nb, tailq);
 		sb->sb_acc += nb->datalen;
 		sb->sb_ccc += nb->datalen;
 		nb = NULL;
 	} else if ((so->so_state & SS_NBIO) ||
 	    (flags & (MSG_NBIO | MSG_DONTWAIT)) != 0) {
 		SOCK_SENDBUF_UNLOCK(so);
 		error = EWOULDBLOCK;
 		goto out;
 	} else {
 		if ((error = sbwait(so, SO_SND)) != 0) {
 			SOCK_SENDBUF_UNLOCK(so);
 			goto out;
 		} else
 			goto restart;
 	}
 	SOCK_SENDBUF_UNLOCK(so);
 
 	if (nb == NULL) {
 		NL_LOG(LOG_DEBUG3, "enqueue %u bytes", nb->datalen);
 		NLP_LOCK(nlp);
 		nl_schedule_taskqueue(nlp);
 		NLP_UNLOCK(nlp);
 	}
 
 out:
 	SOCK_IO_SEND_UNLOCK(so);
-	free(nb, M_NETLINK);
+	if (nb != NULL)
+		nl_buf_free(nb);
 	return (error);
 }
 
 static int
-nl_pru_rcvd(struct socket *so, int flags)
+nl_soreceive(struct socket *so, struct sockaddr **psa, struct uio *uio,
+    struct mbuf **mp, struct mbuf **controlp, int *flagsp)
 {
+	static const struct sockaddr_nl nl_empty_src = {
+		.nl_len = sizeof(struct sockaddr_nl),
+		.nl_family = PF_NETLINK,
+		.nl_pid = 0 /* comes from the kernel */
+	};
+	struct sockbuf *sb = &so->so_rcv;
+	struct nl_buf *nb;
+	int flags, error;
+	u_int overflow;
+	bool nonblock, trunc, peek;
+
+	MPASS(mp == NULL && uio != NULL);
+
 	NL_LOG(LOG_DEBUG3, "socket %p, PID %d", so, curproc->p_pid);
-	MPASS(sotonlpcb(so) != NULL);
+
+	if (psa != NULL)
+		*psa = sodupsockaddr((const struct sockaddr *)&nl_empty_src,
+		    M_WAITOK);
+
+	flags = flagsp != NULL ? *flagsp & ~MSG_TRUNC : 0;
+	trunc = flagsp != NULL ? *flagsp & MSG_TRUNC : false;
+	nonblock = (so->so_state & SS_NBIO) ||
+	    (flags & (MSG_DONTWAIT | MSG_NBIO));
+	peek = flags & MSG_PEEK;
+
+	error = SOCK_IO_RECV_LOCK(so, SBLOCKWAIT(flags));
+	if (__predict_false(error))
+		return (error);
+
+	SOCK_RECVBUF_LOCK(so);
+	while ((nb = TAILQ_FIRST(&sb->nl_queue)) == NULL) {
+		if (nonblock) {
+			SOCK_RECVBUF_UNLOCK(so);
+			SOCK_IO_RECV_UNLOCK(so);
+			return (EWOULDBLOCK);
+		}
+		error = sbwait(so, SO_RCV);
+		if (error) {
+			SOCK_RECVBUF_UNLOCK(so);
+			SOCK_IO_RECV_UNLOCK(so);
+			return (error);
+		}
+	}
+
+	/*
+	 * XXXGL
+	 * Here we emulate a PR_ATOMIC behavior of soreceive_generic() where
+	 * we take only the first "record" in the socket buffer and send it
+	 * to uio whole or truncated ignoring how many netlink messages are
+	 * in the record and how much space is left in the uio.
+	 * This needs to be fixed at next refactoring. First, we should perform
+	 * truncation only if the very first message doesn't fit into uio.
+	 * That will help an application with small buffer not to lose data.
+	 * Second, we should continue working on the sb->nl_queue as long as
+	 * there is more space in the uio.  That will boost applications with
+	 * large buffers.
+	 */
+	if (__predict_true(!peek)) {
+		TAILQ_REMOVE(&sb->nl_queue, nb, tailq);
+		sb->sb_acc -= nb->datalen;
+		sb->sb_ccc -= nb->datalen;
+	}
+	SOCK_RECVBUF_UNLOCK(so);
+
+	overflow = __predict_false(nb->datalen > uio->uio_resid) ?
+	    nb->datalen - uio->uio_resid : 0;
+	error = uiomove(nb->data, (int)nb->datalen, uio);
+	if (__predict_false(overflow > 0)) {
+		flags |= MSG_TRUNC;
+		if (trunc)
+			uio->uio_resid -= overflow;
+	}
+
+	if (controlp != NULL) {
+		*controlp = nb->control;
+		nb->control = NULL;
+	}
+
+	if (__predict_true(!peek))
+		nl_buf_free(nb);
+
+	if (uio->uio_td)
+		uio->uio_td->td_ru.ru_msgrcv++;
+
+	if (flagsp != NULL)
+		*flagsp |= flags;
+
+	SOCK_IO_RECV_UNLOCK(so);
 
 	nl_on_transmit(sotonlpcb(so));
 
-	return (0);
+	return (error);
 }
 
 static int
 nl_getoptflag(int sopt_name)
 {
 	switch (sopt_name) {
 	case NETLINK_CAP_ACK:
 		return (NLF_CAP_ACK);
 	case NETLINK_EXT_ACK:
 		return (NLF_EXT_ACK);
 	case NETLINK_GET_STRICT_CHK:
 		return (NLF_STRICT);
 	case NETLINK_MSG_INFO:
 		return (NLF_MSG_INFO);
 	}
 
 	return (0);
 }
 
 static int
 nl_ctloutput(struct socket *so, struct sockopt *sopt)
 {
 	struct nl_control *ctl = atomic_load_ptr(&V_nl_ctl);
 	struct nlpcb *nlp = sotonlpcb(so);
 	uint32_t flag;
 	int optval, error = 0;
 	NLCTL_TRACKER;
 
 	NL_LOG(LOG_DEBUG2, "%ssockopt(%p, %d)", (sopt->sopt_dir) ? "set" : "get",
 	    so, sopt->sopt_name);
 
 	switch (sopt->sopt_dir) {
 	case SOPT_SET:
 		switch (sopt->sopt_name) {
 		case NETLINK_ADD_MEMBERSHIP:
 		case NETLINK_DROP_MEMBERSHIP:
 			error = sooptcopyin(sopt, &optval, sizeof(optval), sizeof(optval));
 			if (error != 0)
 				break;
 			if (optval <= 0 || optval >= NLP_MAX_GROUPS) {
 				error = ERANGE;
 				break;
 			}
 			NL_LOG(LOG_DEBUG2, "ADD/DEL group %d", (uint32_t)optval);
 
 			NLCTL_WLOCK(ctl);
 			if (sopt->sopt_name == NETLINK_ADD_MEMBERSHIP)
 				nl_add_group_locked(nlp, optval);
 			else
 				nl_del_group_locked(nlp, optval);
 			NLCTL_WUNLOCK(ctl);
 			break;
 		case NETLINK_CAP_ACK:
 		case NETLINK_EXT_ACK:
 		case NETLINK_GET_STRICT_CHK:
 		case NETLINK_MSG_INFO:
 			error = sooptcopyin(sopt, &optval, sizeof(optval), sizeof(optval));
 			if (error != 0)
 				break;
 
 			flag = nl_getoptflag(sopt->sopt_name);
 
 			if ((flag == NLF_MSG_INFO) && nlp->nl_linux) {
 				error = EINVAL;
 				break;
 			}
 
 			NLCTL_WLOCK(ctl);
 			if (optval != 0)
 				nlp->nl_flags |= flag;
 			else
 				nlp->nl_flags &= ~flag;
 			NLCTL_WUNLOCK(ctl);
 			break;
 		default:
 			error = ENOPROTOOPT;
 		}
 		break;
 	case SOPT_GET:
 		switch (sopt->sopt_name) {
 		case NETLINK_LIST_MEMBERSHIPS:
 			NLCTL_RLOCK(ctl);
 			optval = nl_get_groups_compat(nlp);
 			NLCTL_RUNLOCK(ctl);
 			error = sooptcopyout(sopt, &optval, sizeof(optval));
 			break;
 		case NETLINK_CAP_ACK:
 		case NETLINK_EXT_ACK:
 		case NETLINK_GET_STRICT_CHK:
 		case NETLINK_MSG_INFO:
 			NLCTL_RLOCK(ctl);
 			optval = (nlp->nl_flags & nl_getoptflag(sopt->sopt_name)) != 0;
 			NLCTL_RUNLOCK(ctl);
 			error = sooptcopyout(sopt, &optval, sizeof(optval));
 			break;
 		default:
 			error = ENOPROTOOPT;
 		}
 		break;
 	default:
 		error = ENOPROTOOPT;
 	}
 
 	return (error);
 }
 
 static int
 sysctl_handle_nl_maxsockbuf(SYSCTL_HANDLER_ARGS)
 {
 	int error = 0;
 	u_long tmp_maxsockbuf = nl_maxsockbuf;
 
 	error = sysctl_handle_long(oidp, &tmp_maxsockbuf, arg2, req);
 	if (error || !req->newptr)
 		return (error);
 	if (tmp_maxsockbuf < MSIZE + MCLBYTES)
 		return (EINVAL);
 	nl_maxsockbuf = tmp_maxsockbuf;
 
 	return (0);
 }
 
 static int
 nl_setsbopt(struct socket *so, struct sockopt *sopt)
 {
 	int error, optval;
 	bool result;
 
 	if (sopt->sopt_name != SO_RCVBUF)
 		return (sbsetopt(so, sopt));
 
 	/* Allow to override max buffer size in certain conditions */
 
 	error = sooptcopyin(sopt, &optval, sizeof optval, sizeof optval);
 	if (error != 0)
 		return (error);
 	NL_LOG(LOG_DEBUG2, "socket %p, PID %d, SO_RCVBUF=%d", so, curproc->p_pid, optval);
 	if (optval > sb_max_adj) {
 		if (priv_check(curthread, PRIV_NET_ROUTE) != 0)
 			return (EPERM);
 	}
 
 	SOCK_RECVBUF_LOCK(so);
 	result = sbreserve_locked_limit(so, SO_RCV, optval, nl_maxsockbuf, curthread);
 	SOCK_RECVBUF_UNLOCK(so);
 
 	return (result ? 0 : ENOBUFS);
 }
 
 #define	NETLINK_PROTOSW						\
-	.pr_flags = PR_ATOMIC | PR_ADDR | PR_WANTRCVD |		\
-	    PR_SOCKBUF,						\
+	.pr_flags = PR_ATOMIC | PR_ADDR | PR_SOCKBUF,		\
 	.pr_ctloutput = nl_ctloutput,				\
 	.pr_setsbopt = nl_setsbopt,				\
 	.pr_attach = nl_pru_attach,				\
 	.pr_bind = nl_pru_bind,					\
 	.pr_connect = nl_pru_connect,				\
 	.pr_disconnect = nl_pru_disconnect,			\
 	.pr_sosend = nl_sosend,					\
-	.pr_rcvd = nl_pru_rcvd,					\
+	.pr_soreceive = nl_soreceive,				\
 	.pr_shutdown = nl_pru_shutdown,				\
 	.pr_sockaddr = nl_sockaddr,				\
 	.pr_close = nl_close
 
 static struct protosw netlink_raw_sw = {
 	.pr_type = SOCK_RAW,
 	NETLINK_PROTOSW
 };
 
 static struct protosw netlink_dgram_sw = {
 	.pr_type = SOCK_DGRAM,
 	NETLINK_PROTOSW
 };
 
 static struct domain netlinkdomain = {
 	.dom_family = PF_NETLINK,
 	.dom_name = "netlink",
 	.dom_flags = DOMF_UNLOADABLE,
 	.dom_nprotosw =		2,
 	.dom_protosw =		{ &netlink_raw_sw, &netlink_dgram_sw },
 };
 
 DOMAIN_SET(netlink);
diff --git a/sys/netlink/netlink_glue.c b/sys/netlink/netlink_glue.c
index e7649c6b13dc..e4b52ffb191b 100644
--- a/sys/netlink/netlink_glue.c
+++ b/sys/netlink/netlink_glue.c
@@ -1,307 +1,306 @@
 /*-
  * SPDX-License-Identifier: BSD-2-Clause
  *
  * Copyright (c) 2023 Alexander V. Chernikov <melifaro@FreeBSD.org>
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  */
 
 #include <sys/param.h>
 #include <sys/kernel.h>
 #include <sys/malloc.h>
 #include <sys/lock.h>
 #include <sys/rmlock.h>
 #include <sys/domain.h>
 #include <sys/mbuf.h>
 #include <sys/protosw.h>
 #include <sys/proc.h>
 #include <sys/ck.h>
 #include <sys/socket.h>
 #include <sys/socketvar.h>
 #include <sys/sysent.h>
 #include <sys/syslog.h>
 #include <sys/priv.h> /* priv_check */
 
 #include <net/route.h>
 #include <net/route/route_ctl.h>
 
 #include <netlink/netlink.h>
 #include <netlink/netlink_ctl.h>
 #include <netlink/netlink_var.h>
 #include <netlink/route/route_var.h>
 
 /* Standard bits: built-in the kernel */
 SYSCTL_NODE(_net, OID_AUTO, netlink, CTLFLAG_RD, 0, "");
 SYSCTL_NODE(_net_netlink, OID_AUTO, debug, CTLFLAG_RD | CTLFLAG_MPSAFE, 0, "");
 
 MALLOC_DEFINE(M_NETLINK, "netlink", "Memory used for netlink packets");
 
 /* Netlink-related callbacks needed to glue rtsock, netlink and linuxolator */
 static void
 ignore_route_event(uint32_t fibnum, const struct rib_cmd_info *rc)
 {
 }
 
 static void
 ignore_ifmsg_event(struct ifnet *ifp, int if_flags_mask)
 {
 }
 
 static struct rtbridge ignore_cb = {
 	.route_f = ignore_route_event,
 	.ifmsg_f = ignore_ifmsg_event,
 };
 
 void *linux_netlink_p = NULL; /* Callback pointer for Linux translator functions */
 struct rtbridge *rtsock_callback_p = &ignore_cb;
 struct rtbridge *netlink_callback_p = &ignore_cb;
 
 
 /*
  * nlp accessors.
  * TODO: move to a separate file once the number grows.
  */
 bool
 nlp_has_priv(struct nlpcb *nlp, int priv)
 {
 	return (priv_check_cred(nlp->nl_cred, priv) == 0);
 }
 
 struct ucred *
 nlp_get_cred(struct nlpcb *nlp)
 {
 	return (nlp->nl_cred);
 }
 
 uint32_t
 nlp_get_pid(const struct nlpcb *nlp)
 {
 	return (nlp->nl_process_id);
 }
 
 bool
 nlp_unconstrained_vnet(const struct nlpcb *nlp)
 {
 	return (nlp->nl_unconstrained_vnet);
 }
 
 #ifndef NETLINK
 /* Stub implementations for the loadable functions */
 
 static bool
 get_stub_writer(struct nl_writer *nw)
 {
 	bzero(nw, sizeof(*nw));
-	nw->writer_type = NS_WRITER_TYPE_STUB;
 	nw->enomem = true;
 
 	return (false);
 }
 
 static bool
 nlmsg_get_unicast_writer_stub(struct nl_writer *nw, int size, struct nlpcb *nlp)
 {
 	return (get_stub_writer(nw));
 }
 
 static bool
 nlmsg_get_group_writer_stub(struct nl_writer *nw, int size, int protocol, int group_id)
 {
 	return (get_stub_writer(nw));
 }
 
 static bool
 nlmsg_get_chain_writer_stub(struct nl_writer *nw, int size, struct mbuf **pm)
 {
 	return (get_stub_writer(nw));
 }
 
 static bool
 nlmsg_flush_stub(struct nl_writer *nw __unused)
 {
 	return (false);
 }
 
 static void
 nlmsg_ignore_limit_stub(struct nl_writer *nw __unused)
 {
 }
 
 static bool
 nlmsg_refill_buffer_stub(struct nl_writer *nw __unused, int required_len __unused)
 {
 	return (false);
 }
 
 static bool
 nlmsg_add_stub(struct nl_writer *nw, uint32_t portid, uint32_t seq, uint16_t type,
     uint16_t flags, uint32_t len)
 {
 	return (false);
 }
 
 static bool
 nlmsg_end_stub(struct nl_writer *nw __unused)
 {
 	return (false);
 }
 
 static void
 nlmsg_abort_stub(struct nl_writer *nw __unused)
 {
 }
 
 static bool
 nlmsg_end_dump_stub(struct nl_writer *nw, int error, struct nlmsghdr *hdr)
 {
 	return (false);
 }
 
 static int
 nl_modify_ifp_generic_stub(struct ifnet *ifp __unused,
     struct nl_parsed_link *lattrs __unused, const struct nlattr_bmask *bm __unused,
     struct nl_pstate *npt __unused)
 {
 	return (ENOTSUP);
 }
 
 static void
 nl_store_ifp_cookie_stub(struct nl_pstate *npt __unused, struct ifnet *ifp __unused)
 {
 }
 
 static struct nlpcb *
 nl_get_thread_nlp_stub(struct thread *td __unused)
 {
 	return (NULL);
 }
 
 const static struct nl_function_wrapper nl_stub = {
 	.nlmsg_add = nlmsg_add_stub,
 	.nlmsg_refill_buffer = nlmsg_refill_buffer_stub,
 	.nlmsg_flush = nlmsg_flush_stub,
 	.nlmsg_end = nlmsg_end_stub,
 	.nlmsg_abort = nlmsg_abort_stub,
 	.nlmsg_ignore_limit = nlmsg_ignore_limit_stub,
 	.nlmsg_get_unicast_writer = nlmsg_get_unicast_writer_stub,
 	.nlmsg_get_group_writer = nlmsg_get_group_writer_stub,
 	.nlmsg_get_chain_writer = nlmsg_get_chain_writer_stub,
 	.nlmsg_end_dump = nlmsg_end_dump_stub,
 	.nl_modify_ifp_generic = nl_modify_ifp_generic_stub,
 	.nl_store_ifp_cookie = nl_store_ifp_cookie_stub,
 	.nl_get_thread_nlp = nl_get_thread_nlp_stub,
 };
 
 /*
  * If the kernel is compiled with netlink as a module,
  *  provide a way to introduce non-stub functioms
  */
 static const struct nl_function_wrapper *_nl = &nl_stub;
 
 void
 nl_set_functions(const struct nl_function_wrapper *nl)
 {
 	_nl = (nl != NULL) ? nl : &nl_stub;
 }
 
 /* Function wrappers */
 bool
 nlmsg_get_unicast_writer(struct nl_writer *nw, int size, struct nlpcb *nlp)
 {
 	return (_nl->nlmsg_get_unicast_writer(nw, size, nlp));
 }
 
 bool
 nlmsg_get_group_writer(struct nl_writer *nw, int size, int protocol, int group_id)
 {
 	return (_nl->nlmsg_get_group_writer(nw, size, protocol, group_id));
 }
 
 bool
 nlmsg_get_chain_writer(struct nl_writer *nw, int size, struct mbuf **pm)
 {
 	return (_nl->nlmsg_get_chain_writer(nw, size, pm));
 }
 
 bool
 nlmsg_flush(struct nl_writer *nw)
 {
 	return (_nl->nlmsg_flush(nw));
 }
 
 void nlmsg_ignore_limit(struct nl_writer *nw)
 {
 	_nl->nlmsg_ignore_limit(nw);
 }
 
 bool
 nlmsg_refill_buffer(struct nl_writer *nw, int required_len)
 {
 	return (_nl->nlmsg_refill_buffer(nw, required_len));
 }
 
 bool
 nlmsg_add(struct nl_writer *nw, uint32_t portid, uint32_t seq, uint16_t type,
     uint16_t flags, uint32_t len)
 {
 	return (_nl->nlmsg_add(nw, portid, seq, type, flags, len));
 }
 
 bool
 nlmsg_end(struct nl_writer *nw)
 {
 	return (_nl->nlmsg_end(nw));
 }
 
 void
 nlmsg_abort(struct nl_writer *nw)
 {
 	_nl->nlmsg_abort(nw);
 }
 
 bool
 nlmsg_end_dump(struct nl_writer *nw, int error, struct nlmsghdr *hdr)
 {
 	return (_nl->nlmsg_end_dump(nw, error, hdr));
 }
 
 int
 nl_modify_ifp_generic(struct ifnet *ifp, struct nl_parsed_link *lattrs,
     const struct nlattr_bmask *bm , struct nl_pstate *npt)
 {
 	return (_nl->nl_modify_ifp_generic(ifp, lattrs, bm, npt));
 }
 
 void
 nl_store_ifp_cookie(struct nl_pstate *npt, struct ifnet *ifp)
 {
 	return (_nl->nl_store_ifp_cookie(npt, ifp));
 }
 
 struct nlpcb *
 nl_get_thread_nlp(struct thread *td)
 {
 	return (_nl->nl_get_thread_nlp(td));
 }
 
 #endif /* !NETLINK */
 
diff --git a/sys/netlink/netlink_io.c b/sys/netlink/netlink_io.c
index 7e2e098e4a9a..56e430cdcfa8 100644
--- a/sys/netlink/netlink_io.c
+++ b/sys/netlink/netlink_io.c
@@ -1,533 +1,410 @@
 /*-
  * SPDX-License-Identifier: BSD-2-Clause
  *
  * Copyright (c) 2021 Ng Peng Nam Sean
  * Copyright (c) 2022 Alexander V. Chernikov <melifaro@FreeBSD.org>
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  */
 
 #include <sys/param.h>
 #include <sys/ck.h>
 #include <sys/lock.h>
 #include <sys/malloc.h>
 #include <sys/mbuf.h>
 #include <sys/mutex.h>
 #include <sys/socket.h>
 #include <sys/socketvar.h>
 #include <sys/syslog.h>
 
 #include <netlink/netlink.h>
 #include <netlink/netlink_ctl.h>
 #include <netlink/netlink_linux.h>
 #include <netlink/netlink_var.h>
 
 #define	DEBUG_MOD_NAME	nl_io
 #define	DEBUG_MAX_LEVEL	LOG_DEBUG3
 #include <netlink/netlink_debug.h>
 _DECLARE_DEBUG(LOG_INFO);
 
 /*
  * The logic below provide a p2p interface for receiving and
  * sending netlink data between the kernel and userland.
  */
 
-static const struct sockaddr_nl _nl_empty_src = {
-	.nl_len = sizeof(struct sockaddr_nl),
-	.nl_family = PF_NETLINK,
-	.nl_pid = 0 /* comes from the kernel */
-};
-static const struct sockaddr *nl_empty_src = (const struct sockaddr *)&_nl_empty_src;
-
 static bool nl_process_nbuf(struct nl_buf *nb, struct nlpcb *nlp);
 
-static void
-queue_push(struct nl_io_queue *q, struct mbuf *mq)
-{
-	while (mq != NULL) {
-		struct mbuf *m = mq;
-		mq = mq->m_nextpkt;
-		m->m_nextpkt = NULL;
-
-		q->length += m_length(m, NULL);
-		STAILQ_INSERT_TAIL(&q->head, m, m_stailqpkt);
-	}
-}
-
-static struct mbuf *
-queue_pop(struct nl_io_queue *q)
+struct nl_buf *
+nl_buf_alloc(size_t len, int mflag)
 {
-	if (!STAILQ_EMPTY(&q->head)) {
-		struct mbuf *m = STAILQ_FIRST(&q->head);
-		STAILQ_REMOVE_HEAD(&q->head, m_stailqpkt);
-		m->m_nextpkt = NULL;
-		q->length -= m_length(m, NULL);
+	struct nl_buf *nb;
 
-		return (m);
+	nb = malloc(sizeof(struct nl_buf) + len, M_NETLINK, mflag);
+	if (__predict_true(nb != NULL)) {
+		nb->buflen = len;
+		nb->datalen = nb->offset = 0;
+		nb->control = NULL;
 	}
-	return (NULL);
-}
 
-static struct mbuf *
-queue_head(const struct nl_io_queue *q)
-{
-	return (STAILQ_FIRST(&q->head));
+	return (nb);
 }
 
-static inline bool
-queue_empty(const struct nl_io_queue *q)
+void
+nl_buf_free(struct nl_buf *nb)
 {
-	return (q->length == 0);
-}
 
-static void
-queue_free(struct nl_io_queue *q)
-{
-	while (!STAILQ_EMPTY(&q->head)) {
-		struct mbuf *m = STAILQ_FIRST(&q->head);
-		STAILQ_REMOVE_HEAD(&q->head, m_stailqpkt);
-		m->m_nextpkt = NULL;
-		m_freem(m);
-	}
-	q->length = 0;
+	if (nb->control)
+		m_freem(nb->control);
+	free(nb, M_NETLINK);
 }
 
 void
-nl_add_msg_info(struct mbuf *m)
+nl_add_msg_info(struct nl_buf *nb)
 {
+	/* XXXGL pass nlp as arg? */
 	struct nlpcb *nlp = nl_get_thread_nlp(curthread);
 	NL_LOG(LOG_DEBUG2, "Trying to recover nlp from thread %p: %p",
 	    curthread, nlp);
 
 	if (nlp == NULL)
 		return;
 
 	/* Prepare what we want to encode - PID, socket PID & msg seq */
 	struct {
 		struct nlattr nla;
 		uint32_t val;
 	} data[] = {
 		{
 			.nla.nla_len = sizeof(struct nlattr) + sizeof(uint32_t),
 			.nla.nla_type = NLMSGINFO_ATTR_PROCESS_ID,
 			.val = nlp->nl_process_id,
 		},
 		{
 			.nla.nla_len = sizeof(struct nlattr) + sizeof(uint32_t),
 			.nla.nla_type = NLMSGINFO_ATTR_PORT_ID,
 			.val = nlp->nl_port,
 		},
 	};
 
 
-	while (m->m_next != NULL)
-		m = m->m_next;
-	m->m_next = sbcreatecontrol(data, sizeof(data),
+	nb->control = sbcreatecontrol(data, sizeof(data),
 	    NETLINK_MSG_INFO, SOL_NETLINK, M_NOWAIT);
 
-	NL_LOG(LOG_DEBUG2, "Storing %u bytes of data, ctl: %p",
-	    (unsigned)sizeof(data), m->m_next);
-}
-
-static __noinline struct mbuf *
-extract_msg_info(struct mbuf *m)
-{
-	while (m->m_next != NULL) {
-		if (m->m_next->m_type == MT_CONTROL) {
-			struct mbuf *ctl = m->m_next;
-			m->m_next = NULL;
-			return (ctl);
-		}
-		m = m->m_next;
-	}
-	return (NULL);
+	if (__predict_true(nb->control != NULL))
+		NL_LOG(LOG_DEBUG2, "Storing %u bytes of control data, ctl: %p",
+		    (unsigned)sizeof(data), nb->control);
+	else
+		NL_LOG(LOG_DEBUG2, "Failed to allocate %u bytes of control",
+		    (unsigned)sizeof(data));
 }
 
 void
 nl_schedule_taskqueue(struct nlpcb *nlp)
 {
 	if (!nlp->nl_task_pending) {
 		nlp->nl_task_pending = true;
 		taskqueue_enqueue(nlp->nl_taskqueue, &nlp->nl_task);
 		NL_LOG(LOG_DEBUG3, "taskqueue scheduled");
 	} else {
 		NL_LOG(LOG_DEBUG3, "taskqueue schedule skipped");
 	}
 }
 
-static bool
-tx_check_locked(struct nlpcb *nlp)
-{
-	if (queue_empty(&nlp->tx_queue))
-		return (true);
-
-	/*
-	 * Check if something can be moved from the internal TX queue
-	 * to the socket queue.
-	 */
-
-	bool appended = false;
-	struct sockbuf *sb = &nlp->nl_socket->so_rcv;
-	SOCKBUF_LOCK(sb);
-
-	while (true) {
-		struct mbuf *m = queue_head(&nlp->tx_queue);
-		if (m != NULL) {
-			struct mbuf *ctl = NULL;
-			if (__predict_false(m->m_next != NULL))
-				ctl = extract_msg_info(m);
-			if (sbappendaddr_locked(sb, nl_empty_src, m, ctl) != 0) {
-				/* appended successfully */
-				queue_pop(&nlp->tx_queue);
-				appended = true;
-			} else
-				break;
-		} else
-			break;
-	}
-
-	SOCKBUF_UNLOCK(sb);
-
-	if (appended)
-		sorwakeup(nlp->nl_socket);
-
-	return (queue_empty(&nlp->tx_queue));
-}
-
 static bool
 nl_process_received_one(struct nlpcb *nlp)
 {
 	struct socket *so = nlp->nl_socket;
-	struct sockbuf *sb = &so->so_snd;
+	struct sockbuf *sb;
 	struct nl_buf *nb;
 	bool reschedule = false;
 
 	NLP_LOCK(nlp);
 	nlp->nl_task_pending = false;
+	NLP_UNLOCK(nlp);
 
-	if (!tx_check_locked(nlp)) {
-		/* TX overflow queue still not empty, ignore RX */
-		NLP_UNLOCK(nlp);
+	/*
+	 * Do not process queued up requests if there is no space to queue
+	 * replies.
+	 */
+	sb = &so->so_rcv;
+	SOCK_RECVBUF_LOCK(so);
+	if (sb->sb_hiwat <= sb->sb_ccc) {
+		SOCK_RECVBUF_UNLOCK(so);
 		return (false);
 	}
+	SOCK_RECVBUF_UNLOCK(so);
 
-	int prev_hiwat = nlp->tx_queue.hiwat;
-	NLP_UNLOCK(nlp);
-
+	sb = &so->so_snd;
 	SOCK_SENDBUF_LOCK(so);
 	while ((nb = TAILQ_FIRST(&sb->nl_queue)) != NULL) {
 		TAILQ_REMOVE(&sb->nl_queue, nb, tailq);
 		SOCK_SENDBUF_UNLOCK(so);
 		reschedule = nl_process_nbuf(nb, nlp);
 		SOCK_SENDBUF_LOCK(so);
 		if (reschedule) {
 			sb->sb_acc -= nb->datalen;
 			sb->sb_ccc -= nb->datalen;
 			/* XXXGL: potentially can reduce lock&unlock count. */
 			sowwakeup_locked(so);
-			free(nb, M_NETLINK);
+			nl_buf_free(nb);
 			SOCK_SENDBUF_LOCK(so);
 		} else {
 			TAILQ_INSERT_HEAD(&sb->nl_queue, nb, tailq);
 			break;
 		}
 	}
 	SOCK_SENDBUF_UNLOCK(so);
-	if (nlp->tx_queue.hiwat > prev_hiwat) {
-		NLP_LOG(LOG_DEBUG, nlp, "TX override peaked to %d", nlp->tx_queue.hiwat);
-
-	}
 
 	return (reschedule);
 }
 
 static void
 nl_process_received(struct nlpcb *nlp)
 {
 	NL_LOG(LOG_DEBUG3, "taskqueue called");
 
 	if (__predict_false(nlp->nl_need_thread_setup)) {
 		nl_set_thread_nlp(curthread, nlp);
 		NLP_LOCK(nlp);
 		nlp->nl_need_thread_setup = false;
 		NLP_UNLOCK(nlp);
 	}
 
 	while (nl_process_received_one(nlp))
 		;
 }
 
-void
-nl_init_io(struct nlpcb *nlp)
-{
-	STAILQ_INIT(&nlp->tx_queue.head);
-}
-
-void
-nl_free_io(struct nlpcb *nlp)
-{
-	queue_free(&nlp->tx_queue);
-}
-
 /*
  * Called after some data have been read from the socket.
  */
 void
 nl_on_transmit(struct nlpcb *nlp)
 {
 	NLP_LOCK(nlp);
 
 	struct socket *so = nlp->nl_socket;
 	if (__predict_false(nlp->nl_dropped_bytes > 0 && so != NULL)) {
 		unsigned long dropped_bytes = nlp->nl_dropped_bytes;
 		unsigned long dropped_messages = nlp->nl_dropped_messages;
 		nlp->nl_dropped_bytes = 0;
 		nlp->nl_dropped_messages = 0;
 
 		struct sockbuf *sb = &so->so_rcv;
 		NLP_LOG(LOG_DEBUG, nlp,
 		    "socket RX overflowed, %lu messages (%lu bytes) dropped. "
-		    "bytes: [%u/%u] mbufs: [%u/%u]", dropped_messages, dropped_bytes,
-		    sb->sb_ccc, sb->sb_hiwat, sb->sb_mbcnt, sb->sb_mbmax);
+		    "bytes: [%u/%u]", dropped_messages, dropped_bytes,
+		    sb->sb_ccc, sb->sb_hiwat);
 		/* TODO: send netlink message */
 	}
 
 	nl_schedule_taskqueue(nlp);
 	NLP_UNLOCK(nlp);
 }
 
 void
 nl_taskqueue_handler(void *_arg, int pending)
 {
 	struct nlpcb *nlp = (struct nlpcb *)_arg;
 
 	CURVNET_SET(nlp->nl_socket->so_vnet);
 	nl_process_received(nlp);
 	CURVNET_RESTORE();
 }
 
-static __noinline void
-queue_push_tx(struct nlpcb *nlp, struct mbuf *m)
-{
-	queue_push(&nlp->tx_queue, m);
-	nlp->nl_tx_blocked = true;
-
-	if (nlp->tx_queue.length > nlp->tx_queue.hiwat)
-		nlp->tx_queue.hiwat = nlp->tx_queue.length;
-}
-
 /*
- * Tries to send @m to the socket @nlp.
- *
- * @m: mbuf(s) to send to. Consumed in any case.
- * @nlp: socket to send to
- * @cnt: number of messages in @m
- * @io_flags: combination of NL_IOF_* flags
+ * Tries to send current data buffer from writer.
  *
  * Returns true on success.
  * If no queue overrunes happened, wakes up socket owner.
  */
 bool
-nl_send_one(struct mbuf *m, struct nlpcb *nlp, int num_messages, int io_flags)
+nl_send_one(struct nl_writer *nw)
 {
-	bool untranslated = io_flags & NL_IOF_UNTRANSLATED;
-	bool ignore_limits = io_flags & NL_IOF_IGNORE_LIMIT;
-	bool result = true;
+	struct nlpcb *nlp = nw->nlp;
+	struct socket *so = nlp->nl_socket;
+	struct sockbuf *sb = &so->so_rcv;
+	struct nl_buf *nb;
+
+	MPASS(nw->hdr == NULL);
 
 	IF_DEBUG_LEVEL(LOG_DEBUG2) {
-		struct nlmsghdr *hdr = mtod(m, struct nlmsghdr *);
+		struct nlmsghdr *hdr = (struct nlmsghdr *)nw->buf->data;
 		NLP_LOG(LOG_DEBUG2, nlp,
-		    "TX mbuf len %u msgs %u msg type %d first hdrlen %u io_flags %X",
-		    m_length(m, NULL), num_messages, hdr->nlmsg_type, hdr->nlmsg_len,
-		    io_flags);
+		    "TX len %u msgs %u msg type %d first hdrlen %u",
+		    nw->buf->datalen, nw->num_messages, hdr->nlmsg_type,
+		    hdr->nlmsg_len);
 	}
 
-	if (__predict_false(nlp->nl_linux && linux_netlink_p != NULL && untranslated)) {
-		m = linux_netlink_p->mbufs_to_linux(nlp->nl_proto, m, nlp);
-		if (m == NULL)
-			return (false);
+	if (nlp->nl_linux && linux_netlink_p != NULL &&
+	    __predict_false(!linux_netlink_p->msgs_to_linux(nw, nlp))) {
+		nl_buf_free(nw->buf);
+		nw->buf = NULL;
+		return (false);
 	}
 
-	NLP_LOCK(nlp);
+	nb = nw->buf;
+	nw->buf = NULL;
 
-	if (__predict_false(nlp->nl_socket == NULL)) {
+	SOCK_RECVBUF_LOCK(so);
+	if (!nw->ignore_limit && __predict_false(sb->sb_hiwat <= sb->sb_ccc)) {
+		SOCK_RECVBUF_UNLOCK(so);
+		NLP_LOCK(nlp);
+		nlp->nl_dropped_bytes += nb->datalen;
+		nlp->nl_dropped_messages += nw->num_messages;
+		NLP_LOG(LOG_DEBUG2, nlp, "RX oveflow: %lu m (+%d), %lu b (+%d)",
+		    (unsigned long)nlp->nl_dropped_messages, nw->num_messages,
+		    (unsigned long)nlp->nl_dropped_bytes, nb->datalen);
 		NLP_UNLOCK(nlp);
-		m_freem(m);
+		nl_buf_free(nb);
 		return (false);
-	}
-
-	if (!queue_empty(&nlp->tx_queue)) {
-		if (ignore_limits) {
-			queue_push_tx(nlp, m);
-		} else {
-			m_free(m);
-			result = false;
-		}
-		NLP_UNLOCK(nlp);
-		return (result);
-	}
-
-	struct socket *so = nlp->nl_socket;
-	struct mbuf *ctl = NULL;
-	if (__predict_false(m->m_next != NULL))
-		ctl = extract_msg_info(m);
-	if (sbappendaddr(&so->so_rcv, nl_empty_src, m, ctl) != 0) {
-		sorwakeup(so);
-		NLP_LOG(LOG_DEBUG3, nlp, "appended data & woken up");
 	} else {
-		if (ignore_limits) {
-			queue_push_tx(nlp, m);
-		} else {
-			/*
-			 * Store dropped data so it can be reported
-			 * on the next read
-			 */
-			nlp->nl_dropped_bytes += m_length(m, NULL);
-			nlp->nl_dropped_messages += num_messages;
-			NLP_LOG(LOG_DEBUG2, nlp, "RX oveflow: %lu m (+%d), %lu b (+%d)",
-			    (unsigned long)nlp->nl_dropped_messages, num_messages,
-			    (unsigned long)nlp->nl_dropped_bytes, m_length(m, NULL));
-			soroverflow(so);
-			m_freem(m);
-			result = false;
+		bool full;
+
+		TAILQ_INSERT_TAIL(&sb->nl_queue, nb, tailq);
+		sb->sb_acc += nb->datalen;
+		sb->sb_ccc += nb->datalen;
+		full = sb->sb_hiwat <= sb->sb_ccc;
+		sorwakeup_locked(so);
+		if (full) {
+			NLP_LOCK(nlp);
+			nlp->nl_tx_blocked = true;
+			NLP_UNLOCK(nlp);
 		}
+		return (true);
 	}
-	NLP_UNLOCK(nlp);
-
-	return (result);
 }
 
 static int
 nl_receive_message(struct nlmsghdr *hdr, int remaining_length,
     struct nlpcb *nlp, struct nl_pstate *npt)
 {
 	nl_handler_f handler = nl_handlers[nlp->nl_proto].cb;
 	int error = 0;
 
 	NLP_LOG(LOG_DEBUG2, nlp, "msg len: %u type: %d: flags: 0x%X seq: %u pid: %u",
 	    hdr->nlmsg_len, hdr->nlmsg_type, hdr->nlmsg_flags, hdr->nlmsg_seq,
 	    hdr->nlmsg_pid);
 
 	if (__predict_false(hdr->nlmsg_len > remaining_length)) {
 		NLP_LOG(LOG_DEBUG, nlp, "message is not entirely present: want %d got %d",
 		    hdr->nlmsg_len, remaining_length);
 		return (EINVAL);
 	} else if (__predict_false(hdr->nlmsg_len < sizeof(*hdr))) {
 		NL_LOG(LOG_DEBUG, "message too short: %d", hdr->nlmsg_len);
 		return (EINVAL);
 	}
 	/* Stamp each message with sender pid */
 	hdr->nlmsg_pid = nlp->nl_port;
 
 	npt->hdr = hdr;
 
 	if (hdr->nlmsg_flags & NLM_F_REQUEST && hdr->nlmsg_type >= NLMSG_MIN_TYPE) {
 		NL_LOG(LOG_DEBUG2, "handling message with msg type: %d",
 		   hdr->nlmsg_type);
 
 		if (nlp->nl_linux && linux_netlink_p != NULL) {
 			struct nlmsghdr *hdr_orig = hdr;
 			hdr = linux_netlink_p->msg_from_linux(nlp->nl_proto, hdr, npt);
 			if (hdr == NULL) {
 				 /* Failed to translate to kernel format. Report an error back */
 				hdr = hdr_orig;
 				npt->hdr = hdr;
 				if (hdr->nlmsg_flags & NLM_F_ACK)
 					nlmsg_ack(nlp, EOPNOTSUPP, hdr, npt);
 				return (0);
 			}
 		}
 		error = handler(hdr, npt);
 		NL_LOG(LOG_DEBUG2, "retcode: %d", error);
 	}
 	if ((hdr->nlmsg_flags & NLM_F_ACK) || (error != 0 && error != EINTR)) {
 		if (!npt->nw->suppress_ack) {
 			NL_LOG(LOG_DEBUG3, "ack");
 			nlmsg_ack(nlp, error, hdr, npt);
 		}
 	}
 
 	return (0);
 }
 
 static void
 npt_clear(struct nl_pstate *npt)
 {
 	lb_clear(&npt->lb);
 	npt->error = 0;
 	npt->err_msg = NULL;
 	npt->err_off = 0;
 	npt->hdr = NULL;
 	npt->nw->suppress_ack = false;
 }
 
 /*
  * Processes an incoming packet, which can contain multiple netlink messages
  */
 static bool
 nl_process_nbuf(struct nl_buf *nb, struct nlpcb *nlp)
 {
 	struct nlmsghdr *hdr;
 	int error;
 
 	NL_LOG(LOG_DEBUG3, "RX netlink buf %p on %p", nb, nlp->nl_socket);
 
 	struct nl_writer nw = {};
 	if (!nlmsg_get_unicast_writer(&nw, NLMSG_SMALL, nlp)) {
 		NL_LOG(LOG_DEBUG, "error allocating socket writer");
 		return (true);
 	}
 
 	nlmsg_ignore_limit(&nw);
 
 	struct nl_pstate npt = {
 		.nlp = nlp,
 		.lb.base = &nb->data[roundup2(nb->datalen, 8)],
 		.lb.size = nb->buflen - roundup2(nb->datalen, 8),
 		.nw = &nw,
 		.strict = nlp->nl_flags & NLF_STRICT,
 	};
 
 	for (; nb->offset + sizeof(struct nlmsghdr) <= nb->datalen;) {
 		hdr = (struct nlmsghdr *)&nb->data[nb->offset];
 		/* Save length prior to calling handler */
 		int msglen = NLMSG_ALIGN(hdr->nlmsg_len);
 		NL_LOG(LOG_DEBUG3, "parsing offset %d/%d",
 		    nb->offset, nb->datalen);
 		npt_clear(&npt);
 		error = nl_receive_message(hdr, nb->datalen - nb->offset, nlp,
 		    &npt);
 		nb->offset += msglen;
 		if (__predict_false(error != 0 || nlp->nl_tx_blocked))
 			break;
 	}
 	NL_LOG(LOG_DEBUG3, "packet parsing done");
 	nlmsg_flush(&nw);
 
 	if (nlp->nl_tx_blocked) {
 		NLP_LOCK(nlp);
 		nlp->nl_tx_blocked = false;
 		NLP_UNLOCK(nlp);
 		return (false);
 	} else
 		return (true);
 }
diff --git a/sys/netlink/netlink_linux.h b/sys/netlink/netlink_linux.h
index 6dd2a964a64a..2d9f8d1b7bd6 100644
--- a/sys/netlink/netlink_linux.h
+++ b/sys/netlink/netlink_linux.h
@@ -1,54 +1,53 @@
 /*-
  * SPDX-License-Identifier: BSD-2-Clause
  *
  * Copyright (c) 2022 Alexander V. Chernikov <melifaro@FreeBSD.org>
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  */
 
 #ifndef _NETLINK_LINUX_VAR_H_
 #define _NETLINK_LINUX_VAR_H_
+#ifdef _KERNEL
 
 /*
  * The file contains headers for the bridge interface between
  * linux[_common] module and the netlink module
  */
 struct nlpcb;
 struct nl_pstate;
+struct nl_writer;
 
-typedef struct mbuf *mbufs_to_linux_cb_t(int netlink_family, struct mbuf *m,
-    struct nlpcb *nlp);
-typedef struct mbuf *msgs_to_linux_cb_t(int netlink_family, char *buf, int data_length,
-    struct nlpcb *nlp);
+typedef bool msgs_to_linux_cb_t(struct nl_writer *nw, struct nlpcb *nlp);
 typedef struct nlmsghdr *msg_from_linux_cb_t(int netlink_family, struct nlmsghdr *hdr,
     struct nl_pstate *npt);
 
 struct linux_netlink_provider {
-	mbufs_to_linux_cb_t	*mbufs_to_linux;
 	msgs_to_linux_cb_t	*msgs_to_linux;
 	msg_from_linux_cb_t	*msg_from_linux;
 
 };
 
 extern struct linux_netlink_provider *linux_netlink_p;
 
 #endif
+#endif
diff --git a/sys/netlink/netlink_message_writer.c b/sys/netlink/netlink_message_writer.c
index dafcca6ef038..0b85378b41b6 100644
--- a/sys/netlink/netlink_message_writer.c
+++ b/sys/netlink/netlink_message_writer.c
@@ -1,838 +1,374 @@
 /*-
  * SPDX-License-Identifier: BSD-2-Clause
  *
  * Copyright (c) 2022 Alexander V. Chernikov <melifaro@FreeBSD.org>
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  */
 
 #include <sys/param.h>
 #include <sys/malloc.h>
 #include <sys/lock.h>
 #include <sys/rmlock.h>
 #include <sys/mbuf.h>
-#include <sys/ck.h>
 #include <sys/socket.h>
 #include <sys/socketvar.h>
 #include <sys/syslog.h>
 
 #include <netlink/netlink.h>
 #include <netlink/netlink_ctl.h>
 #include <netlink/netlink_linux.h>
 #include <netlink/netlink_var.h>
 
 #define	DEBUG_MOD_NAME	nl_writer
 #define	DEBUG_MAX_LEVEL	LOG_DEBUG3
 #include <netlink/netlink_debug.h>
 _DECLARE_DEBUG(LOG_INFO);
 
-/*
- * The goal of this file is to provide convenient message writing KPI on top of
- * different storage methods (mbufs, uio, temporary memory chunks).
- *
- * The main KPI guarantee is that the (last) message always resides in the contiguous
- *  memory buffer, so one is able to update the header after writing the entire message.
- *
- * This guarantee comes with a side effect of potentially reallocating underlying
- *  buffer, so one needs to update the desired pointers after something is added
- *  to the header.
- *
- * Messaging layer contains hooks performing transparent Linux translation for the messages.
- *
- * There are 3 types of supported targets:
- *  * socket (adds mbufs to the socket buffer, used for message replies)
- *  * group (sends mbuf/chain to the specified groups, used for the notifications)
- *  * chain (returns mbuf chain, used in Linux message translation code)
- *
- * There are 3 types of storage:
- * * NS_WRITER_TYPE_MBUF (mbuf-based, most efficient, used when a single message
- *    fits in NLMBUFSIZE)
- * * NS_WRITER_TYPE_BUF (fallback, malloc-based, used when a single message needs
- *    to be larger than one supported by NS_WRITER_TYPE_MBUF)
- * * NS_WRITER_TYPE_LBUF (malloc-based, similar to NS_WRITER_TYPE_BUF, used for
- *    Linux sockets, calls translation hook prior to sending messages to the socket).
- *
- * Internally, KPI switches between different types of storage when memory requirements
- *  change. It happens transparently to the caller.
- */
-
-/*
- * Uma zone for the mbuf-based Netlink storage
- */
-static uma_zone_t	nlmsg_zone;
-
-static void
-nl_free_mbuf_storage(struct mbuf *m)
-{
-	uma_zfree(nlmsg_zone, m->m_ext.ext_buf);
-}
-
-static int
-nl_setup_mbuf_storage(void *mem, int size, void *arg, int how __unused)
-{
-	struct mbuf *m = (struct mbuf *)arg;
-
-	if (m != NULL)
-		m_extadd(m, mem, size, nl_free_mbuf_storage, NULL, NULL, 0, EXT_MOD_TYPE);
-
-	return (0);
-}
-
-static struct mbuf *
-nl_get_mbuf_flags(int size, int malloc_flags, int mbuf_flags)
-{
-	struct mbuf *m, *m_storage;
-
-	if (size <= MHLEN)
-		return (m_get2(size, malloc_flags, MT_DATA, mbuf_flags));
-
-	if (__predict_false(size > NLMBUFSIZE))
-		return (NULL);
-
-	m = m_gethdr(malloc_flags, MT_DATA);
-	if (m == NULL)
-		return (NULL);
-
-	m_storage = uma_zalloc_arg(nlmsg_zone, m, malloc_flags);
-	if (m_storage == NULL) {
-		m_free_raw(m);
-		return (NULL);
-	}
-
-	return (m);
-}
-
-static struct mbuf *
-nl_get_mbuf(int size, int malloc_flags)
-{
-	return (nl_get_mbuf_flags(size, malloc_flags, M_PKTHDR));
-}
-
-/*
- * Gets a chain of Netlink mbufs.
- * This is strip-down version of m_getm2()
- */
-static struct mbuf *
-nl_get_mbuf_chain(int len, int malloc_flags)
-{
-	struct mbuf *m_chain = NULL, *m_tail = NULL;
-	int mbuf_flags = M_PKTHDR;
-
-	while (len > 0) {
-		int sz = len > NLMBUFSIZE ? NLMBUFSIZE: len;
-		struct mbuf *m = nl_get_mbuf_flags(sz, malloc_flags, mbuf_flags);
-
-		if (m == NULL) {
-			m_freem(m_chain);
-			return (NULL);
-		}
-
-		/* Book keeping. */
-		len -= M_SIZE(m);
-		if (m_tail != NULL)
-			m_tail->m_next = m;
-		else
-			m_chain = m;
-		m_tail = m;
-		mbuf_flags &= ~M_PKTHDR;	/* Only valid on the first mbuf. */
-	}
-
-	return (m_chain);
-}
-
-void
-nl_init_msg_zone(void)
-{
-	nlmsg_zone = uma_zcreate("netlink", NLMBUFSIZE, nl_setup_mbuf_storage,
-	    NULL, NULL, NULL, UMA_ALIGN_PTR, 0);
-}
-
-void
-nl_destroy_msg_zone(void)
-{
-	uma_zdestroy(nlmsg_zone);
-}
-
-
-typedef bool nlwriter_op_init(struct nl_writer *nw, int size, bool waitok);
-typedef bool nlwriter_op_write(struct nl_writer *nw, void *buf, int buflen, int cnt);
-
-struct nlwriter_ops {
-	nlwriter_op_init	*init;
-	nlwriter_op_write	*write_socket;
-	nlwriter_op_write	*write_group;
-	nlwriter_op_write	*write_chain;
-};
-
-/*
- * NS_WRITER_TYPE_BUF
- * Writes message to a temporary memory buffer,
- * flushing to the socket/group when buffer size limit is reached
- */
-static bool
-nlmsg_get_ns_buf(struct nl_writer *nw, int size, bool waitok)
-{
-	int mflag = waitok ? M_WAITOK : M_NOWAIT;
-	nw->_storage = malloc(size, M_NETLINK, mflag | M_ZERO);
-	if (__predict_false(nw->_storage == NULL))
-		return (false);
-	nw->alloc_len = size;
-	nw->offset = 0;
-	nw->hdr = NULL;
-	nw->data = nw->_storage;
-	nw->writer_type = NS_WRITER_TYPE_BUF;
-	nw->malloc_flag = mflag;
-	nw->num_messages = 0;
-	nw->enomem = false;
-	return (true);
-}
-
 static bool
-nlmsg_write_socket_buf(struct nl_writer *nw, void *buf, int datalen, int cnt)
+nlmsg_get_buf(struct nl_writer *nw, u_int len, bool waitok)
 {
-	NL_LOG(LOG_DEBUG2, "IN: ptr: %p len: %d arg: %p", buf, datalen, nw->arg.ptr);
-	if (__predict_false(datalen == 0)) {
-		free(buf, M_NETLINK);
-		return (true);
-	}
+	const int mflag = waitok ? M_WAITOK : M_NOWAIT;
 
-	struct mbuf *m = nl_get_mbuf_chain(datalen, nw->malloc_flag);
-	if (__predict_false(m == NULL)) {
-		/* XXX: should we set sorcverr? */
-		free(buf, M_NETLINK);
-		return (false);
-	}
-	m_append(m, datalen, buf);
-	free(buf, M_NETLINK);
+	MPASS(nw->buf == NULL);
 
-	int io_flags = (nw->ignore_limit) ? NL_IOF_IGNORE_LIMIT : 0;
-	return (nl_send_one(m, (struct nlpcb *)(nw->arg.ptr), cnt, io_flags));
-}
-
-static bool
-nlmsg_write_group_buf(struct nl_writer *nw, void *buf, int datalen, int cnt)
-{
-	NL_LOG(LOG_DEBUG2, "IN: ptr: %p len: %d proto: %d id: %d", buf, datalen,
-	    nw->arg.group.proto, nw->arg.group.id);
-	if (__predict_false(datalen == 0)) {
-		free(buf, M_NETLINK);
-		return (true);
-	}
+	NL_LOG(LOG_DEBUG3, "Setting up nw %p len %u %s", nw, len,
+	    waitok ? "wait" : "nowait");
 
-	struct mbuf *m = nl_get_mbuf_chain(datalen, nw->malloc_flag);
-	if (__predict_false(m == NULL)) {
-		free(buf, M_NETLINK);
+	nw->buf = nl_buf_alloc(len, mflag);
+	if (__predict_false(nw->buf == NULL))
 		return (false);
-	}
-	bool success = m_append(m, datalen, buf) != 0;
-	free(buf, M_NETLINK);
-
-	if (!success)
-		return (false);
-
-	nl_send_group(m, cnt, nw->arg.group.proto, nw->arg.group.id);
-	return (true);
-}
-
-static bool
-nlmsg_write_chain_buf(struct nl_writer *nw, void *buf, int datalen, int cnt)
-{
-	struct mbuf **m0 = (struct mbuf **)(nw->arg.ptr);
-	NL_LOG(LOG_DEBUG2, "IN: ptr: %p len: %d arg: %p", buf, datalen, nw->arg.ptr);
-
-	if (__predict_false(datalen == 0)) {
-		free(buf, M_NETLINK);
-		return (true);
-	}
-
-	if (*m0 == NULL) {
-		struct mbuf *m = nl_get_mbuf_chain(datalen, nw->malloc_flag);
-
-		if (__predict_false(m == NULL)) {
-			free(buf, M_NETLINK);
-			return (false);
-		}
-		*m0 = m;
-	}
-	if (__predict_false(m_append(*m0, datalen, buf) == 0)) {
-		free(buf, M_NETLINK);
-		return (false);
-	}
-	return (true);
-}
-
-
-/*
- * NS_WRITER_TYPE_MBUF
- * Writes message to the allocated mbuf,
- * flushing to socket/group when mbuf size limit is reached.
- * This is the most efficient mechanism as it avoids double-copying.
- *
- * Allocates a single mbuf suitable to store up to @size bytes of data.
- * If size < MHLEN (around 160 bytes), allocates mbuf with pkghdr.
- * If the size <= NLMBUFSIZE (2k), allocate mbuf+storage out of nlmsg_zone.
- * Returns NULL on greater size or the allocation failure.
- */
-static bool
-nlmsg_get_ns_mbuf(struct nl_writer *nw, int size, bool waitok)
-{
-	int mflag = waitok ? M_WAITOK : M_NOWAIT;
-	struct mbuf *m = nl_get_mbuf(size, mflag);
-
-	if (__predict_false(m == NULL))
-		return (false);
-	nw->alloc_len = M_TRAILINGSPACE(m);
-	nw->offset = 0;
 	nw->hdr = NULL;
-	nw->_storage = (void *)m;
-	nw->data = mtod(m, void *);
-	nw->writer_type = NS_WRITER_TYPE_MBUF;
 	nw->malloc_flag = mflag;
 	nw->num_messages = 0;
 	nw->enomem = false;
-	memset(nw->data, 0, size);
-	NL_LOG(LOG_DEBUG2, "alloc mbuf %p req_len %d alloc_len %d data_ptr %p",
-	    m, size, nw->alloc_len, nw->data);
-	return (true);
-}
-
-static bool
-nlmsg_write_socket_mbuf(struct nl_writer *nw, void *buf, int datalen, int cnt)
-{
-	struct mbuf *m = (struct mbuf *)buf;
-	NL_LOG(LOG_DEBUG2, "IN: ptr: %p len: %d arg: %p", buf, datalen, nw->arg.ptr);
-
-	if (__predict_false(datalen == 0)) {
-		m_freem(m);
-		return (true);
-	}
-
-	m->m_pkthdr.len = datalen;
-	m->m_len = datalen;
-	int io_flags = (nw->ignore_limit) ? NL_IOF_IGNORE_LIMIT : 0;
-	return (nl_send_one(m, (struct nlpcb *)(nw->arg.ptr), cnt, io_flags));
-}
-
-static bool
-nlmsg_write_group_mbuf(struct nl_writer *nw, void *buf, int datalen, int cnt)
-{
-	struct mbuf *m = (struct mbuf *)buf;
-	NL_LOG(LOG_DEBUG2, "IN: ptr: %p len: %d proto: %d id: %d", buf, datalen,
-	    nw->arg.group.proto, nw->arg.group.id);
-
-	if (__predict_false(datalen == 0)) {
-		m_freem(m);
-		return (true);
-	}
 
-	m->m_pkthdr.len = datalen;
-	m->m_len = datalen;
-	nl_send_group(m, cnt, nw->arg.group.proto, nw->arg.group.id);
 	return (true);
 }
 
-static bool
-nlmsg_write_chain_mbuf(struct nl_writer *nw, void *buf, int datalen, int cnt)
-{
-	struct mbuf *m_new = (struct mbuf *)buf;
-	struct mbuf **m0 = (struct mbuf **)(nw->arg.ptr);
-
-	NL_LOG(LOG_DEBUG2, "IN: ptr: %p len: %d arg: %p", buf, datalen, nw->arg.ptr);
-
-	if (__predict_false(datalen == 0)) {
-		m_freem(m_new);
-		return (true);
-	}
-
-	m_new->m_pkthdr.len = datalen;
-	m_new->m_len = datalen;
-
-	if (*m0 == NULL) {
-		*m0 = m_new;
-	} else {
-		struct mbuf *m_last;
-		for (m_last = *m0; m_last->m_next != NULL; m_last = m_last->m_next)
-			;
-		m_last->m_next = m_new;
-		(*m0)->m_pkthdr.len += datalen;
-	}
-
-	return (true);
-}
-
-/*
- * NS_WRITER_TYPE_LBUF
- * Writes message to the allocated memory buffer,
- * flushing to socket/group when mbuf size limit is reached.
- * Calls linux handler to rewrite messages before sending to the socket.
- */
-static bool
-nlmsg_get_ns_lbuf(struct nl_writer *nw, int size, bool waitok)
-{
-	int mflag = waitok ? M_WAITOK : M_NOWAIT;
-	size = roundup2(size, sizeof(void *));
-	int add_size = sizeof(struct linear_buffer) + SCRATCH_BUFFER_SIZE;
-	char *buf = malloc(add_size + size * 2, M_NETLINK, mflag | M_ZERO);
-	if (__predict_false(buf == NULL))
-		return (false);
-
-	/* Fill buffer header first */
-	struct linear_buffer *lb = (struct linear_buffer *)buf;
-	lb->base = &buf[sizeof(struct linear_buffer) + size];
-	lb->size = size + SCRATCH_BUFFER_SIZE;
-
-	nw->alloc_len = size;
-	nw->offset = 0;
-	nw->hdr = NULL;
-	nw->_storage = buf;
-	nw->data = (char *)(lb + 1);
-	nw->malloc_flag = mflag;
-	nw->writer_type = NS_WRITER_TYPE_LBUF;
-	nw->num_messages = 0;
-	nw->enomem = false;
-	return (true);
-}
-
-static bool
-nlmsg_write_socket_lbuf(struct nl_writer *nw, void *buf, int datalen, int cnt)
-{
-	struct linear_buffer *lb = (struct linear_buffer *)buf;
-	char *data = (char *)(lb + 1);
-	struct nlpcb *nlp = (struct nlpcb *)(nw->arg.ptr);
-
-	if (__predict_false(datalen == 0)) {
-		free(buf, M_NETLINK);
-		return (true);
-	}
-
-	struct mbuf *m = NULL;
-	if (linux_netlink_p != NULL)
-		m = linux_netlink_p->msgs_to_linux(nlp->nl_proto, data, datalen, nlp);
-	free(buf, M_NETLINK);
-
-	if (__predict_false(m == NULL)) {
-		/* XXX: should we set sorcverr? */
-		return (false);
-	}
-
-	int io_flags = (nw->ignore_limit) ? NL_IOF_IGNORE_LIMIT : 0;
-	return (nl_send_one(m, nlp, cnt, io_flags));
-}
-
-/* Shouldn't be called (maybe except Linux code originating message) */
-static bool
-nlmsg_write_group_lbuf(struct nl_writer *nw, void *buf, int datalen, int cnt)
-{
-	struct linear_buffer *lb = (struct linear_buffer *)buf;
-	char *data = (char *)(lb + 1);
-
-	if (__predict_false(datalen == 0)) {
-		free(buf, M_NETLINK);
-		return (true);
-	}
-
-	struct mbuf *m = nl_get_mbuf_chain(datalen, nw->malloc_flag);
-	if (__predict_false(m == NULL)) {
-		free(buf, M_NETLINK);
-		return (false);
-	}
-	m_append(m, datalen, data);
-	free(buf, M_NETLINK);
-
-	nl_send_group(m, cnt, nw->arg.group.proto, nw->arg.group.id);
-	return (true);
-}
-
-static const struct nlwriter_ops nlmsg_writers[] = {
-	/* NS_WRITER_TYPE_MBUF */
-	{
-		.init = nlmsg_get_ns_mbuf,
-		.write_socket = nlmsg_write_socket_mbuf,
-		.write_group = nlmsg_write_group_mbuf,
-		.write_chain = nlmsg_write_chain_mbuf,
-	},
-	/* NS_WRITER_TYPE_BUF */
-	{
-		.init = nlmsg_get_ns_buf,
-		.write_socket = nlmsg_write_socket_buf,
-		.write_group = nlmsg_write_group_buf,
-		.write_chain = nlmsg_write_chain_buf,
-	},
-	/* NS_WRITER_TYPE_LBUF */
-	{
-		.init = nlmsg_get_ns_lbuf,
-		.write_socket = nlmsg_write_socket_lbuf,
-		.write_group = nlmsg_write_group_lbuf,
-	},
-};
-
-static void
-nlmsg_set_callback(struct nl_writer *nw)
-{
-	const struct nlwriter_ops *pops = &nlmsg_writers[nw->writer_type];
-
-	switch (nw->writer_target) {
-	case NS_WRITER_TARGET_SOCKET:
-		nw->cb = pops->write_socket;
-		break;
-	case NS_WRITER_TARGET_GROUP:
-		nw->cb = pops->write_group;
-		break;
-	case NS_WRITER_TARGET_CHAIN:
-		nw->cb = pops->write_chain;
-		break;
-	default:
-		panic("not implemented");
-	}
-}
-
-static bool
-nlmsg_get_buf_type(struct nl_writer *nw, int size, int type, bool waitok)
-{
-	MPASS(type + 1 <= sizeof(nlmsg_writers) / sizeof(nlmsg_writers[0]));
-	NL_LOG(LOG_DEBUG3, "Setting up nw %p size %d type %d", nw, size, type);
-	return (nlmsg_writers[type].init(nw, size, waitok));
-}
-
-static bool
-nlmsg_get_buf(struct nl_writer *nw, int size, bool waitok, bool is_linux)
-{
-	int type;
-
-	if (!is_linux) {
-		if (__predict_true(size <= NLMBUFSIZE))
-			type = NS_WRITER_TYPE_MBUF;
-		else
-			type = NS_WRITER_TYPE_BUF;
-	} else
-		type = NS_WRITER_TYPE_LBUF;
-	return (nlmsg_get_buf_type(nw, size, type, waitok));
-}
-
 bool
 _nlmsg_get_unicast_writer(struct nl_writer *nw, int size, struct nlpcb *nlp)
 {
-	if (!nlmsg_get_buf(nw, size, false, nlp->nl_linux))
-		return (false);
-	nw->arg.ptr = (void *)nlp;
-	nw->writer_target = NS_WRITER_TARGET_SOCKET;
-	nlmsg_set_callback(nw);
-	return (true);
+	nw->nlp = nlp;
+	nw->cb = nl_send_one;
+
+	return (nlmsg_get_buf(nw, size, false));
 }
 
 bool
 _nlmsg_get_group_writer(struct nl_writer *nw, int size, int protocol, int group_id)
 {
-	if (!nlmsg_get_buf(nw, size, false, false))
-		return (false);
-	nw->arg.group.proto = protocol;
-	nw->arg.group.id = group_id;
-	nw->writer_target = NS_WRITER_TARGET_GROUP;
-	nlmsg_set_callback(nw);
-	return (true);
-}
+	nw->group.proto = protocol;
+	nw->group.id = group_id;
+	nw->cb = nl_send_group;
 
-bool
-_nlmsg_get_chain_writer(struct nl_writer *nw, int size, struct mbuf **pm)
-{
-	if (!nlmsg_get_buf(nw, size, false, false))
-		return (false);
-	*pm = NULL;
-	nw->arg.ptr = (void *)pm;
-	nw->writer_target = NS_WRITER_TARGET_CHAIN;
-	nlmsg_set_callback(nw);
-	NL_LOG(LOG_DEBUG3, "setup cb %p (need %p)", nw->cb, &nlmsg_write_chain_mbuf);
-	return (true);
+	return (nlmsg_get_buf(nw, size, false));
 }
 
 void
 _nlmsg_ignore_limit(struct nl_writer *nw)
 {
 	nw->ignore_limit = true;
 }
 
 bool
 _nlmsg_flush(struct nl_writer *nw)
 {
 
 	if (__predict_false(nw->hdr != NULL)) {
 		/* Last message has not been completed, skip it. */
-		int completed_len = (char *)nw->hdr - nw->data;
+		int completed_len = (char *)nw->hdr - nw->buf->data;
 		/* Send completed messages */
-		nw->offset -= nw->offset - completed_len;
+		nw->buf->datalen -= nw->buf->datalen - completed_len;
 		nw->hdr = NULL;
-	}
+        }
 
 	NL_LOG(LOG_DEBUG2, "OUT");
-	bool result = nw->cb(nw, nw->_storage, nw->offset, nw->num_messages);
-	nw->_storage = NULL;
+	bool result = nw->cb(nw);
+	nw->num_messages = 0;
 
 	if (!result) {
-		NL_LOG(LOG_DEBUG, "nw %p offset %d: flush with %p() failed", nw, nw->offset, nw->cb);
+		NL_LOG(LOG_DEBUG, "nw %p flush with %p() failed", nw, nw->cb);
 	}
 
 	return (result);
 }
 
 /*
  * Flushes previous data and allocates new underlying storage
  *  sufficient for holding at least @required_len bytes.
  * Return true on success.
  */
 bool
-_nlmsg_refill_buffer(struct nl_writer *nw, int required_len)
+_nlmsg_refill_buffer(struct nl_writer *nw, u_int required_len)
 {
-	struct nl_writer ns_new = {};
-	int completed_len, new_len;
+	struct nl_buf *new;
+	u_int completed_len, new_len, last_len;
+
+	MPASS(nw->buf != NULL);
 
 	if (nw->enomem)
 		return (false);
 
-	NL_LOG(LOG_DEBUG3, "no space at offset %d/%d (want %d), trying to reclaim",
-	    nw->offset, nw->alloc_len, required_len);
+	NL_LOG(LOG_DEBUG3, "no space at offset %u/%u (want %u), trying to "
+	    "reclaim", nw->buf->datalen, nw->buf->buflen, required_len);
 
-	/* Calculated new buffer size and allocate it s*/
-	completed_len = (nw->hdr != NULL) ? (char *)nw->hdr - nw->data : nw->offset;
+	/* Calculate new buffer size and allocate it. */
+	completed_len = (nw->hdr != NULL) ?
+	    (char *)nw->hdr - nw->buf->data : nw->buf->datalen;
 	if (completed_len > 0 && required_len < NLMBUFSIZE) {
-		/* We already ran out of space, use the largest effective size */
-		new_len = max(nw->alloc_len, NLMBUFSIZE);
+		/* We already ran out of space, use largest effective size. */
+		new_len = max(nw->buf->buflen, NLMBUFSIZE);
 	} else {
-		if (nw->alloc_len < NLMBUFSIZE)
+		if (nw->buf->buflen < NLMBUFSIZE)
+			/* XXXGL: does this happen? */
 			new_len = NLMBUFSIZE;
 		else
-			new_len = nw->alloc_len * 2;
+			new_len = nw->buf->buflen * 2;
 		while (new_len < required_len)
 			new_len *= 2;
 	}
-	bool waitok = (nw->malloc_flag == M_WAITOK);
-	bool is_linux = (nw->writer_type == NS_WRITER_TYPE_LBUF);
-	if (!nlmsg_get_buf(&ns_new, new_len, waitok, is_linux)) {
+
+	new = nl_buf_alloc(new_len, nw->malloc_flag | M_ZERO);
+	if (__predict_false(new == NULL)) {
 		nw->enomem = true;
 		NL_LOG(LOG_DEBUG, "getting new buf failed, setting ENOMEM");
 		return (false);
 	}
-	if (nw->ignore_limit)
-		nlmsg_ignore_limit(&ns_new);
 
-	/* Update callback data */
-	ns_new.writer_target = nw->writer_target;
-	nlmsg_set_callback(&ns_new);
-	ns_new.arg = nw->arg;
-
-	/* Copy last (unfinished) header to the new storage */
-	int last_len = nw->offset - completed_len;
+	/* Copy last (unfinished) header to the new storage. */
+	last_len = nw->buf->datalen - completed_len;
 	if (last_len > 0) {
-		memcpy(ns_new.data, nw->hdr, last_len);
-		ns_new.hdr = (struct nlmsghdr *)ns_new.data;
-		ns_new.offset = last_len;
+		memcpy(new->data, nw->hdr, last_len);
+		new->datalen = last_len;
 	}
 
-	NL_LOG(LOG_DEBUG2, "completed: %d bytes, copied: %d bytes", completed_len, last_len);
+	NL_LOG(LOG_DEBUG2, "completed: %u bytes, copied: %u bytes",
+	    completed_len, last_len);
 
-	/* Flush completed headers & switch to the new nw */
-	nlmsg_flush(nw);
-	memcpy(nw, &ns_new, sizeof(struct nl_writer));
-	NL_LOG(LOG_DEBUG2, "switched buffer: used %d/%d bytes", nw->offset, nw->alloc_len);
+	if (completed_len > 0) {
+		nlmsg_flush(nw);
+		MPASS(nw->buf == NULL);
+	} else
+		nl_buf_free(nw->buf);
+	nw->buf = new;
+	nw->hdr = (last_len > 0) ? (struct nlmsghdr *)new->data : NULL;
+	NL_LOG(LOG_DEBUG2, "switched buffer: used %u/%u bytes",
+	    new->datalen, new->buflen);
 
 	return (true);
 }
 
 bool
 _nlmsg_add(struct nl_writer *nw, uint32_t portid, uint32_t seq, uint16_t type,
     uint16_t flags, uint32_t len)
 {
+	struct nl_buf *nb = nw->buf;
 	struct nlmsghdr *hdr;
+	u_int required_len;
 
 	MPASS(nw->hdr == NULL);
 
-	int required_len = NETLINK_ALIGN(len + sizeof(struct nlmsghdr));
-	if (__predict_false(nw->offset + required_len > nw->alloc_len)) {
+	required_len = NETLINK_ALIGN(len + sizeof(struct nlmsghdr));
+	if (__predict_false(nb->datalen + required_len > nb->buflen)) {
 		if (!nlmsg_refill_buffer(nw, required_len))
 			return (false);
+		nb = nw->buf;
 	}
 
-	hdr = (struct nlmsghdr *)(&nw->data[nw->offset]);
+	hdr = (struct nlmsghdr *)(&nb->data[nb->datalen]);
 
 	hdr->nlmsg_len = len;
 	hdr->nlmsg_type = type;
 	hdr->nlmsg_flags = flags;
 	hdr->nlmsg_seq = seq;
 	hdr->nlmsg_pid = portid;
 
 	nw->hdr = hdr;
-	nw->offset += sizeof(struct nlmsghdr);
+	nb->datalen += sizeof(struct nlmsghdr);
 
 	return (true);
 }
 
 bool
 _nlmsg_end(struct nl_writer *nw)
 {
+	struct nl_buf *nb = nw->buf;
+
 	MPASS(nw->hdr != NULL);
 
 	if (nw->enomem) {
 		NL_LOG(LOG_DEBUG, "ENOMEM when dumping message");
 		nlmsg_abort(nw);
 		return (false);
 	}
 
-	nw->hdr->nlmsg_len = (uint32_t)(nw->data + nw->offset - (char *)nw->hdr);
+	nw->hdr->nlmsg_len = nb->data + nb->datalen - (char *)nw->hdr;
 	NL_LOG(LOG_DEBUG2, "wrote msg len: %u type: %d: flags: 0x%X seq: %u pid: %u",
 	    nw->hdr->nlmsg_len, nw->hdr->nlmsg_type, nw->hdr->nlmsg_flags,
 	    nw->hdr->nlmsg_seq, nw->hdr->nlmsg_pid);
 	nw->hdr = NULL;
 	nw->num_messages++;
 	return (true);
 }
 
 void
 _nlmsg_abort(struct nl_writer *nw)
 {
+	struct nl_buf *nb = nw->buf;
+
 	if (nw->hdr != NULL) {
-		nw->offset = (uint32_t)((char *)nw->hdr - nw->data);
+		nb->datalen = (char *)nw->hdr - nb->data;
 		nw->hdr = NULL;
 	}
 }
 
 void
 nlmsg_ack(struct nlpcb *nlp, int error, struct nlmsghdr *hdr,
     struct nl_pstate *npt)
 {
 	struct nlmsgerr *errmsg;
 	int payload_len;
 	uint32_t flags = nlp->nl_flags;
 	struct nl_writer *nw = npt->nw;
 	bool cap_ack;
 
 	payload_len = sizeof(struct nlmsgerr);
 
 	/*
 	 * The only case when we send the full message in the
 	 * reply is when there is an error and NETLINK_CAP_ACK
 	 * is not set.
 	 */
 	cap_ack = (error == 0) || (flags & NLF_CAP_ACK);
 	if (!cap_ack)
 		payload_len += hdr->nlmsg_len - sizeof(struct nlmsghdr);
 	payload_len = NETLINK_ALIGN(payload_len);
 
 	uint16_t nl_flags = cap_ack ? NLM_F_CAPPED : 0;
 	if ((npt->err_msg || npt->err_off) && nlp->nl_flags & NLF_EXT_ACK)
 		nl_flags |= NLM_F_ACK_TLVS;
 
 	NL_LOG(LOG_DEBUG3, "acknowledging message type %d seq %d",
 	    hdr->nlmsg_type, hdr->nlmsg_seq);
 
 	if (!nlmsg_add(nw, nlp->nl_port, hdr->nlmsg_seq, NLMSG_ERROR, nl_flags, payload_len))
 		goto enomem;
 
 	errmsg = nlmsg_reserve_data(nw, payload_len, struct nlmsgerr);
 	errmsg->error = error;
 	/* In case of error copy the whole message, else just the header */
 	memcpy(&errmsg->msg, hdr, cap_ack ? sizeof(*hdr) : hdr->nlmsg_len);
 
 	if (npt->err_msg != NULL && nlp->nl_flags & NLF_EXT_ACK)
 		nlattr_add_string(nw, NLMSGERR_ATTR_MSG, npt->err_msg);
 	if (npt->err_off != 0 && nlp->nl_flags & NLF_EXT_ACK)
 		nlattr_add_u32(nw, NLMSGERR_ATTR_OFFS, npt->err_off);
 	if (npt->cookie != NULL)
 		nlattr_add_raw(nw, npt->cookie);
 
 	if (nlmsg_end(nw))
 		return;
 enomem:
 	NLP_LOG(LOG_DEBUG, nlp, "error allocating ack data for message %d seq %u",
 	    hdr->nlmsg_type, hdr->nlmsg_seq);
 	nlmsg_abort(nw);
 }
 
 bool
 _nlmsg_end_dump(struct nl_writer *nw, int error, struct nlmsghdr *hdr)
 {
 	if (!nlmsg_add(nw, hdr->nlmsg_pid, hdr->nlmsg_seq, NLMSG_DONE, 0, sizeof(int))) {
 		NL_LOG(LOG_DEBUG, "Error finalizing table dump");
 		return (false);
 	}
 	/* Save operation result */
 	int *perror = nlmsg_reserve_object(nw, int);
 	NL_LOG(LOG_DEBUG2, "record error=%d at off %d (%p)", error,
-	    nw->offset, perror);
+	    nw->buf->datalen, perror);
 	*perror = error;
 	nlmsg_end(nw);
 	nw->suppress_ack = true;
 
 	return (true);
 }
 
 /*
  * KPI functions.
  */
 
-int
+u_int
 nlattr_save_offset(const struct nl_writer *nw)
 {
-	return (nw->offset - ((char *)nw->hdr - nw->data));
+	return (nw->buf->datalen - ((char *)nw->hdr - nw->buf->data));
 }
 
 void *
 nlmsg_reserve_data_raw(struct nl_writer *nw, size_t sz)
 {
-	sz = NETLINK_ALIGN(sz);
+	struct nl_buf *nb = nw->buf;
+	void *data;
 
-	if (__predict_false(nw->offset + sz > nw->alloc_len)) {
+	sz = NETLINK_ALIGN(sz);
+	if (__predict_false(nb->datalen + sz > nb->buflen)) {
 		if (!nlmsg_refill_buffer(nw, sz))
 			return (NULL);
+		nb = nw->buf;
 	}
 
-	void *data_ptr = &nw->data[nw->offset];
-	nw->offset += sz;
-	bzero(data_ptr, sz);
+	data = &nb->data[nb->datalen];
+	bzero(data, sz);
+	nb->datalen += sz;
 
-	return (data_ptr);
+	return (data);
 }
 
 bool
 nlattr_add(struct nl_writer *nw, int attr_type, int attr_len, const void *data)
 {
-	int required_len = NLA_ALIGN(attr_len + sizeof(struct nlattr));
+	struct nl_buf *nb = nw->buf;
+	struct nlattr *nla;
+	u_int required_len;
 
-	if (__predict_false(nw->offset + required_len > nw->alloc_len)) {
+	required_len = NLA_ALIGN(attr_len + sizeof(struct nlattr));
+	if (__predict_false(nb->datalen + required_len > nb->buflen)) {
 		if (!nlmsg_refill_buffer(nw, required_len))
 			return (false);
+		nb = nw->buf;
 	}
 
-	struct nlattr *nla = (struct nlattr *)(&nw->data[nw->offset]);
+	nla = (struct nlattr *)(&nb->data[nb->datalen]);
 
 	nla->nla_len = attr_len + sizeof(struct nlattr);
 	nla->nla_type = attr_type;
 	if (attr_len > 0) {
 		if ((attr_len % 4) != 0) {
 			/* clear padding bytes */
 			bzero((char *)nla + required_len - 4, 4);
 		}
 		memcpy((nla + 1), data, attr_len);
 	}
-	nw->offset += required_len;
+	nb->datalen += required_len;
 	return (true);
 }
 
 #include <netlink/ktest_netlink_message_writer.h>
diff --git a/sys/netlink/netlink_message_writer.h b/sys/netlink/netlink_message_writer.h
index 68e434094678..28f3fb78018c 100644
--- a/sys/netlink/netlink_message_writer.h
+++ b/sys/netlink/netlink_message_writer.h
@@ -1,327 +1,300 @@
 /*-
  * SPDX-License-Identifier: BSD-2-Clause
  *
  * Copyright (c) 2021 Ng Peng Nam Sean
  * Copyright (c) 2022 Alexander V. Chernikov <melifaro@FreeBSD.org>
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  */
 
 #ifndef _NETLINK_NETLINK_MESSAGE_WRITER_H_
 #define _NETLINK_NETLINK_MESSAGE_WRITER_H_
 
 #ifdef _KERNEL
 
 #include <netinet/in.h>
 
 /*
  * It is not meant to be included directly
  */
 
-struct mbuf;
+struct nl_buf;
 struct nl_writer;
-typedef bool nl_writer_cb(struct nl_writer *nw, void *buf, int buflen, int cnt);
+typedef bool nl_writer_cb(struct nl_writer *nw);
 
 struct nl_writer {
-	int			alloc_len;	/* allocated buffer length */
-	int			offset;		/* offset from the start of the buffer */
-	struct nlmsghdr		*hdr;		/* Pointer to the currently-filled msg */
-	char			*data;		/* pointer to the contiguous storage */
-	void			*_storage;	/* Underlying storage pointer */
-	nl_writer_cb		*cb;		/* Callback to flush data */
+	struct nl_buf		*buf;	/* Underlying storage pointer */
+	struct nlmsghdr		*hdr;	/* Pointer to the currently-filled msg */
+	nl_writer_cb		*cb;	/* Callback to flush data */
 	union {
-		void		*ptr;
+		struct nlpcb	*nlp;
 		struct {
 			uint16_t	proto;
 			uint16_t	id;
 		} group;
-	} arg;
-	int			num_messages;	/* Number of messages in the buffer */
-	int			malloc_flag;	/* M_WAITOK or M_NOWAIT */
-	uint8_t			writer_type;	/* NS_WRITER_TYPE_* */
-	uint8_t			writer_target;	/* NS_WRITER_TARGET_*  */
-	bool			ignore_limit;	/* If true, ignores RCVBUF limit */
-	bool			enomem;		/* True if ENOMEM occured */
-	bool			suppress_ack;	/* If true, don't send NLMSG_ERR */
+	};
+	u_int		num_messages;	/* Number of messages in the buffer */
+	int		malloc_flag;	/* M_WAITOK or M_NOWAIT */
+	bool		ignore_limit;	/* If true, ignores RCVBUF limit */
+	bool		enomem;		/* True if ENOMEM occured */
+	bool		suppress_ack;	/* If true, don't send NLMSG_ERR */
 };
-#define	NS_WRITER_TARGET_SOCKET	0
-#define	NS_WRITER_TARGET_GROUP	1
-#define	NS_WRITER_TARGET_CHAIN	2
-
-#define	NS_WRITER_TYPE_MBUF	0
-#define NS_WRITER_TYPE_BUF	1
-#define NS_WRITER_TYPE_LBUF	2
-#define NS_WRITER_TYPE_MBUFC	3
-#define NS_WRITER_TYPE_STUB	4
-
 
 #define	NLMSG_SMALL	128
 #define	NLMSG_LARGE	2048
 
 /* Message and attribute writing */
-
-struct nlpcb;
-
 #if defined(NETLINK) || defined(NETLINK_MODULE)
 /* Provide optimized calls to the functions inside the same linking unit */
 
 bool _nlmsg_get_unicast_writer(struct nl_writer *nw, int expected_size, struct nlpcb *nlp);
 bool _nlmsg_get_group_writer(struct nl_writer *nw, int expected_size, int proto, int group_id);
-bool _nlmsg_get_chain_writer(struct nl_writer *nw, int expected_size, struct mbuf **pm);
 bool _nlmsg_flush(struct nl_writer *nw);
 void _nlmsg_ignore_limit(struct nl_writer *nw);
 
-bool _nlmsg_refill_buffer(struct nl_writer *nw, int required_size);
+bool _nlmsg_refill_buffer(struct nl_writer *nw, u_int required_len);
 bool _nlmsg_add(struct nl_writer *nw, uint32_t portid, uint32_t seq, uint16_t type,
     uint16_t flags, uint32_t len);
 bool _nlmsg_end(struct nl_writer *nw);
 void _nlmsg_abort(struct nl_writer *nw);
 
 bool _nlmsg_end_dump(struct nl_writer *nw, int error, struct nlmsghdr *hdr);
 
 
 static inline bool
 nlmsg_get_unicast_writer(struct nl_writer *nw, int expected_size, struct nlpcb *nlp)
 {
 	return (_nlmsg_get_unicast_writer(nw, expected_size, nlp));
 }
 
 static inline bool
 nlmsg_get_group_writer(struct nl_writer *nw, int expected_size, int proto, int group_id)
 {
 	return (_nlmsg_get_group_writer(nw, expected_size, proto, group_id));
 }
 
-static inline bool
-nlmsg_get_chain_writer(struct nl_writer *nw, int expected_size, struct mbuf **pm)
-{
-	return (_nlmsg_get_chain_writer(nw, expected_size, pm));
-}
-
 static inline bool
 nlmsg_flush(struct nl_writer *nw)
 {
 	return (_nlmsg_flush(nw));
 }
 
 static inline void
 nlmsg_ignore_limit(struct nl_writer *nw)
 {
 	_nlmsg_ignore_limit(nw);
 }
 
 static inline bool
 nlmsg_refill_buffer(struct nl_writer *nw, int required_size)
 {
 	return (_nlmsg_refill_buffer(nw, required_size));
 }
 
 static inline bool
 nlmsg_add(struct nl_writer *nw, uint32_t portid, uint32_t seq, uint16_t type,
     uint16_t flags, uint32_t len)
 {
 	return (_nlmsg_add(nw, portid, seq, type, flags, len));
 }
 
 static inline bool
 nlmsg_end(struct nl_writer *nw)
 {
 	return (_nlmsg_end(nw));
 }
 
 static inline void
 nlmsg_abort(struct nl_writer *nw)
 {
 	return (_nlmsg_abort(nw));
 }
 
 static inline bool
 nlmsg_end_dump(struct nl_writer *nw, int error, struct nlmsghdr *hdr)
 {
 	return (_nlmsg_end_dump(nw, error, hdr));
 }
 
 #else
 /* Provide access to the functions via netlink_glue.c */
 
 bool nlmsg_get_unicast_writer(struct nl_writer *nw, int expected_size, struct nlpcb *nlp);
 bool nlmsg_get_group_writer(struct nl_writer *nw, int expected_size, int proto, int group_id);
 bool nlmsg_get_chain_writer(struct nl_writer *nw, int expected_size, struct mbuf **pm);
 bool nlmsg_flush(struct nl_writer *nw);
 void nlmsg_ignore_limit(struct nl_writer *nw);
 
 bool nlmsg_refill_buffer(struct nl_writer *nw, int required_size);
 bool nlmsg_add(struct nl_writer *nw, uint32_t portid, uint32_t seq, uint16_t type,
     uint16_t flags, uint32_t len);
 bool nlmsg_end(struct nl_writer *nw);
 void nlmsg_abort(struct nl_writer *nw);
 
 bool nlmsg_end_dump(struct nl_writer *nw, int error, struct nlmsghdr *hdr);
 
 #endif /* defined(NETLINK) || defined(NETLINK_MODULE) */
 
 static inline bool
 nlmsg_reply(struct nl_writer *nw, const struct nlmsghdr *hdr, int payload_len)
 {
 	return (nlmsg_add(nw, hdr->nlmsg_pid, hdr->nlmsg_seq, hdr->nlmsg_type,
 	    hdr->nlmsg_flags, payload_len));
 }
 
-#define nlmsg_data(_hdr)	((void *)((_hdr) + 1))
-
 /*
  * KPI similar to mtodo():
  * current (uncompleted) header is guaranteed to be contiguous,
  *  but can be reallocated, thus pointers may need to be readjusted.
  */
 u_int nlattr_save_offset(const struct nl_writer *nw);
 
 static inline void *
 _nlattr_restore_offset(const struct nl_writer *nw, int off)
 {
 	return ((void *)((char *)nw->hdr + off));
 }
 #define	nlattr_restore_offset(_ns, _off, _t)	((_t *)_nlattr_restore_offset(_ns, _off))
 
 static inline void
 nlattr_set_len(const struct nl_writer *nw, int off)
 {
 	struct nlattr *nla = nlattr_restore_offset(nw, off, struct nlattr);
 	nla->nla_len = nlattr_save_offset(nw) - off;
 }
 
 void *nlmsg_reserve_data_raw(struct nl_writer *nw, size_t sz);
 #define nlmsg_reserve_object(_ns, _t)	((_t *)nlmsg_reserve_data_raw(_ns, sizeof(_t)))
 #define nlmsg_reserve_data(_ns, _sz, _t)	((_t *)nlmsg_reserve_data_raw(_ns, _sz))
 
 static inline int
 nlattr_add_nested(struct nl_writer *nw, uint16_t nla_type)
 {
 	int off = nlattr_save_offset(nw);
 	struct nlattr *nla = nlmsg_reserve_data(nw, sizeof(struct nlattr), struct nlattr);
 	if (__predict_false(nla == NULL))
 		return (0);
 	nla->nla_type = nla_type;
 	return (off);
 }
 
 static inline void *
 _nlmsg_reserve_attr(struct nl_writer *nw, uint16_t nla_type, uint16_t sz)
 {
 	sz += sizeof(struct nlattr);
 
 	struct nlattr *nla = nlmsg_reserve_data(nw, sz, struct nlattr);
 	if (__predict_false(nla == NULL))
 		return (NULL);
 	nla->nla_type = nla_type;
 	nla->nla_len = sz;
 
 	return ((void *)(nla + 1));
 }
 #define	nlmsg_reserve_attr(_ns, _at, _t)	((_t *)_nlmsg_reserve_attr(_ns, _at, NLA_ALIGN(sizeof(_t))))
 
 bool nlattr_add(struct nl_writer *nw, int attr_type, int attr_len,
     const void *data);
 
 static inline bool
 nlattr_add_raw(struct nl_writer *nw, const struct nlattr *nla_src)
 {
 	int attr_len = nla_src->nla_len - sizeof(struct nlattr);
 
 	MPASS(attr_len >= 0);
 
 	return (nlattr_add(nw, nla_src->nla_type, attr_len, (const void *)(nla_src + 1)));
 }
 
 static inline bool
 nlattr_add_u8(struct nl_writer *nw, int attrtype, uint8_t value)
 {
 	return (nlattr_add(nw, attrtype, sizeof(uint8_t), &value));
 }
 
 static inline bool
 nlattr_add_u16(struct nl_writer *nw, int attrtype, uint16_t value)
 {
 	return (nlattr_add(nw, attrtype, sizeof(uint16_t), &value));
 }
 
 static inline bool
 nlattr_add_u32(struct nl_writer *nw, int attrtype, uint32_t value)
 {
 	return (nlattr_add(nw, attrtype, sizeof(uint32_t), &value));
 }
 
 static inline bool
 nlattr_add_u64(struct nl_writer *nw, int attrtype, uint64_t value)
 {
 	return (nlattr_add(nw, attrtype, sizeof(uint64_t), &value));
 }
 
 static inline bool
 nlattr_add_s8(struct nl_writer *nw, int attrtype, int8_t value)
 {
 	return (nlattr_add(nw, attrtype, sizeof(int8_t), &value));
 }
 
 static inline bool
 nlattr_add_s16(struct nl_writer *nw, int attrtype, int16_t value)
 {
 	return (nlattr_add(nw, attrtype, sizeof(int16_t), &value));
 }
 
 static inline bool
 nlattr_add_s32(struct nl_writer *nw, int attrtype, int32_t value)
 {
 	return (nlattr_add(nw, attrtype, sizeof(int32_t), &value));
 }
 
 static inline bool
 nlattr_add_s64(struct nl_writer *nw, int attrtype, int64_t value)
 {
 	return (nlattr_add(nw, attrtype, sizeof(int64_t), &value));
 }
 
 static inline bool
 nlattr_add_flag(struct nl_writer *nw, int attrtype)
 {
 	return (nlattr_add(nw, attrtype, 0, NULL));
 }
 
 static inline bool
 nlattr_add_string(struct nl_writer *nw, int attrtype, const char *str)
 {
 	return (nlattr_add(nw, attrtype, strlen(str) + 1, str));
 }
 
 static inline bool
 nlattr_add_in_addr(struct nl_writer *nw, int attrtype, const struct in_addr *in)
 {
 	return (nlattr_add(nw, attrtype, sizeof(*in), in));
 }
 
 static inline bool
 nlattr_add_in6_addr(struct nl_writer *nw, int attrtype, const struct in6_addr *in6)
 {
 	return (nlattr_add(nw, attrtype, sizeof(*in6), in6));
 }
 #endif
 #endif
diff --git a/sys/netlink/netlink_module.c b/sys/netlink/netlink_module.c
index e63048072ae9..ddae4488987b 100644
--- a/sys/netlink/netlink_module.c
+++ b/sys/netlink/netlink_module.c
@@ -1,253 +1,250 @@
 /*-
  * SPDX-License-Identifier: BSD-2-Clause
  *
  * Copyright (c) 2021 Ng Peng Nam Sean
  * Copyright (c) 2022 Alexander V. Chernikov <melifaro@FreeBSD.org>
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  */
 
 #include <sys/param.h>
 #include <sys/kernel.h>
 #include <sys/malloc.h>
 #include <sys/module.h>
 
 #include <sys/lock.h>
 #include <sys/rmlock.h>
 #include <sys/ck.h>
 #include <sys/syslog.h>
 
 #include <netlink/netlink.h>
 #include <netlink/netlink_ctl.h>
 #include <netlink/netlink_var.h>
 #include <netlink/route/route_var.h>
 
 #include <machine/atomic.h>
 
 FEATURE(netlink, "Netlink support");
 
 #define	DEBUG_MOD_NAME	nl_mod
 #define	DEBUG_MAX_LEVEL	LOG_DEBUG3
 #include <netlink/netlink_debug.h>
 _DECLARE_DEBUG(LOG_INFO);
 
 
 #define NL_MAX_HANDLERS	20
 struct nl_proto_handler _nl_handlers[NL_MAX_HANDLERS];
 struct nl_proto_handler *nl_handlers = _nl_handlers;
 
 CK_LIST_HEAD(nl_control_head, nl_control);
 static struct nl_control_head vnets_head = CK_LIST_HEAD_INITIALIZER();
 
 VNET_DEFINE(struct nl_control *, nl_ctl) = NULL;
 
 struct mtx nl_global_mtx;
 MTX_SYSINIT(nl_global_mtx, &nl_global_mtx, "global netlink lock", MTX_DEF);
 
 #define NL_GLOBAL_LOCK()	mtx_lock(&nl_global_mtx)
 #define NL_GLOBAL_UNLOCK()	mtx_unlock(&nl_global_mtx)
 
 int netlink_unloading = 0;
 
 static void
 free_nl_ctl(struct nl_control *ctl)
 {
 	rm_destroy(&ctl->ctl_lock);
 	free(ctl, M_NETLINK);
 }
 
 struct nl_control *
 vnet_nl_ctl_init(void)
 {
 	struct nl_control *ctl;
 
 	ctl = malloc(sizeof(struct nl_control), M_NETLINK, M_WAITOK | M_ZERO);
 	rm_init(&ctl->ctl_lock, "netlink lock");
 	CK_LIST_INIT(&ctl->ctl_port_head);
 	CK_LIST_INIT(&ctl->ctl_pcb_head);
 
 	NL_GLOBAL_LOCK();
 
 	struct nl_control *tmp = atomic_load_ptr(&V_nl_ctl);
 
 	if (tmp == NULL) {
 		atomic_store_ptr(&V_nl_ctl, ctl);
 		CK_LIST_INSERT_HEAD(&vnets_head, ctl, ctl_next);
 		NL_LOG(LOG_DEBUG2, "VNET %p init done, inserted %p into global list",
 		    curvnet, ctl);
 	} else {
 		NL_LOG(LOG_DEBUG, "per-VNET init clash, dropping this instance");
 		free_nl_ctl(ctl);
 		ctl = tmp;
 	}
 
 	NL_GLOBAL_UNLOCK();
 
 	return (ctl);
 }
 
 static void
 vnet_nl_ctl_destroy(const void *unused __unused)
 {
 	struct nl_control *ctl;
 
 	/* Assume at the time all of the processes / sockets are dead */
 
 	NL_GLOBAL_LOCK();
 	ctl = atomic_load_ptr(&V_nl_ctl);
 	atomic_store_ptr(&V_nl_ctl, NULL);
 	if (ctl != NULL) {
 		NL_LOG(LOG_DEBUG2, "Removing %p from global list", ctl);
 		CK_LIST_REMOVE(ctl, ctl_next);
 	}
 	NL_GLOBAL_UNLOCK();
 
 	if (ctl != NULL)
 		free_nl_ctl(ctl);
 }
 VNET_SYSUNINIT(vnet_nl_ctl_destroy, SI_SUB_PROTO_IF, SI_ORDER_ANY,
     vnet_nl_ctl_destroy, NULL);
 
 int
 nl_verify_proto(int proto)
 {
 	if (proto < 0 || proto >= NL_MAX_HANDLERS) {
 		return (EINVAL);
 	}
 	int handler_defined = nl_handlers[proto].cb != NULL;
 	return (handler_defined ? 0 : EPROTONOSUPPORT);
 }
 
 const char *
 nl_get_proto_name(int proto)
 {
 	return (nl_handlers[proto].proto_name);
 }
 
 bool
 netlink_register_proto(int proto, const char *proto_name, nl_handler_f handler)
 {
 	if ((proto < 0) || (proto >= NL_MAX_HANDLERS))
 		return (false);
 	NL_GLOBAL_LOCK();
 	KASSERT((nl_handlers[proto].cb == NULL), ("netlink handler %d is already set", proto));
 	nl_handlers[proto].cb = handler;
 	nl_handlers[proto].proto_name = proto_name;
 	NL_GLOBAL_UNLOCK();
 	NL_LOG(LOG_DEBUG2, "Registered netlink %s(%d) handler", proto_name, proto);
 	return (true);
 }
 
 bool
 netlink_unregister_proto(int proto)
 {
 	if ((proto < 0) || (proto >= NL_MAX_HANDLERS))
 		return (false);
 	NL_GLOBAL_LOCK();
 	KASSERT((nl_handlers[proto].cb != NULL), ("netlink handler %d is not set", proto));
 	nl_handlers[proto].cb = NULL;
 	nl_handlers[proto].proto_name = NULL;
 	NL_GLOBAL_UNLOCK();
 	NL_LOG(LOG_DEBUG2, "Unregistered netlink proto %d handler", proto);
 	return (true);
 }
 
 #if !defined(NETLINK) && defined(NETLINK_MODULE)
 /* Non-stub function provider */
 const static struct nl_function_wrapper nl_module = {
 	.nlmsg_add = _nlmsg_add,
 	.nlmsg_refill_buffer = _nlmsg_refill_buffer,
 	.nlmsg_flush = _nlmsg_flush,
 	.nlmsg_end = _nlmsg_end,
 	.nlmsg_abort = _nlmsg_abort,
 	.nlmsg_get_unicast_writer = _nlmsg_get_unicast_writer,
 	.nlmsg_get_group_writer = _nlmsg_get_group_writer,
-	.nlmsg_get_chain_writer = _nlmsg_get_chain_writer,
 	.nlmsg_end_dump = _nlmsg_end_dump,
 	.nl_modify_ifp_generic = _nl_modify_ifp_generic,
 	.nl_store_ifp_cookie = _nl_store_ifp_cookie,
 	.nl_get_thread_nlp = _nl_get_thread_nlp,
 };
 #endif
 
 static bool
 can_unload(void)
 {
 	struct nl_control *ctl;
 	bool result = true;
 
 	NL_GLOBAL_LOCK();
 
 	CK_LIST_FOREACH(ctl, &vnets_head, ctl_next) {
 		NL_LOG(LOG_DEBUG2, "Iterating VNET head %p", ctl);
 		if (!CK_LIST_EMPTY(&ctl->ctl_pcb_head)) {
 			NL_LOG(LOG_NOTICE, "non-empty socket list in ctl %p", ctl);
 			result = false;
 			break;
 		}
 	}
 
 	NL_GLOBAL_UNLOCK();
 
 	return (result);
 }
 
 static int
 netlink_modevent(module_t mod __unused, int what, void *priv __unused)
 {
 	int ret = 0;
 
 	switch (what) {
 	case MOD_LOAD:
 		NL_LOG(LOG_DEBUG2, "Loading");
-		nl_init_msg_zone();
 		nl_osd_register();
 #if !defined(NETLINK) && defined(NETLINK_MODULE)
 		nl_set_functions(&nl_module);
 #endif
 		break;
 
 	case MOD_UNLOAD:
 		NL_LOG(LOG_DEBUG2, "Unload called");
 		if (can_unload()) {
 			NL_LOG(LOG_WARNING, "unloading");
 			netlink_unloading = 1;
 #if !defined(NETLINK) && defined(NETLINK_MODULE)
 			nl_set_functions(NULL);
 #endif
 			nl_osd_unregister();
-			nl_destroy_msg_zone();
 		} else
 			ret = EBUSY;
 		break;
 
 	default:
 		ret = EOPNOTSUPP;
 		break;
 	}
 
 	return (ret);
 }
 static moduledata_t netlink_mod = { "netlink", netlink_modevent, NULL };
 
 DECLARE_MODULE(netlink, netlink_mod, SI_SUB_PSEUDO, SI_ORDER_ANY);
 MODULE_VERSION(netlink, 1);
diff --git a/sys/netlink/netlink_var.h b/sys/netlink/netlink_var.h
index ec174e17d1a2..97532c31e54b 100644
--- a/sys/netlink/netlink_var.h
+++ b/sys/netlink/netlink_var.h
@@ -1,214 +1,204 @@
 /*-
  * SPDX-License-Identifier: BSD-2-Clause
  *
  * Copyright (c) 2021 Ng Peng Nam Sean
  * Copyright (c) 2022 Alexander V. Chernikov <melifaro@FreeBSD.org>
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  */
 #ifndef _NETLINK_NETLINK_VAR_H_
 #define _NETLINK_NETLINK_VAR_H_
 
 #ifdef _KERNEL
 
 #include <sys/ck.h>
 #include <sys/epoch.h>
 #include <sys/sysctl.h>
 #include <sys/taskqueue.h>
 #include <net/vnet.h>
 
 #define	NLSNDQ  	65536 /* Default socket sendspace */
 #define	NLRCVQ		65536 /* Default socket recvspace */
 
 #define	NLMBUFSIZE	2048	/* External storage size for Netlink mbufs */
 
 struct ucred;
 
-struct nl_io_queue {
-	STAILQ_HEAD(, mbuf)	head;
-	int			length;
-	int			hiwat;
-};
-
 struct nl_buf {
 	TAILQ_ENTRY(nl_buf)	tailq;
+	struct mbuf		*control;
 	u_int			buflen;
 	u_int			datalen;
 	u_int			offset;
 	char			data[];
 };
 
 #define	NLP_MAX_GROUPS		128
 
 struct nlpcb {
         struct socket           *nl_socket;
 	uint64_t	        nl_groups[NLP_MAX_GROUPS / 64];
 	uint32_t                nl_port;
 	uint32_t	        nl_flags;
 	uint32_t	        nl_process_id;
         int                     nl_proto;
 	bool			nl_bound;
         bool			nl_task_pending;
 	bool			nl_tx_blocked; /* No new requests accepted */
 	bool			nl_linux; /* true if running under compat */
 	bool			nl_unconstrained_vnet; /* true if running under VNET jail (or without jail) */
 	bool			nl_need_thread_setup;
-	struct nl_io_queue	tx_queue;
 	struct taskqueue	*nl_taskqueue;
 	struct task		nl_task;
 	struct ucred		*nl_cred; /* Copy of nl_socket->so_cred */
 	uint64_t		nl_dropped_bytes;
 	uint64_t		nl_dropped_messages;
         CK_LIST_ENTRY(nlpcb)    nl_next;
         CK_LIST_ENTRY(nlpcb)    nl_port_next;
 	volatile u_int		nl_refcount;
 	struct mtx		nl_lock;
 	struct epoch_context	nl_epoch_ctx;
 };
 #define sotonlpcb(so)       ((struct nlpcb *)(so)->so_pcb)
 
 #define	NLP_LOCK_INIT(_nlp)	mtx_init(&((_nlp)->nl_lock), "nlp mtx", NULL, MTX_DEF)
 #define	NLP_LOCK_DESTROY(_nlp)	mtx_destroy(&((_nlp)->nl_lock))
 #define	NLP_LOCK(_nlp)		mtx_lock(&((_nlp)->nl_lock))
 #define	NLP_UNLOCK(_nlp)	mtx_unlock(&((_nlp)->nl_lock))
 
 #define	ALIGNED_NL_SZ(_data)	roundup2((((struct nlmsghdr *)(_data))->nlmsg_len), 16)
 
 /* nl_flags */
 #define NLF_CAP_ACK             0x01 /* Do not send message body with errmsg */
 #define NLF_EXT_ACK             0x02 /* Allow including extended TLVs in ack */
 #define	NLF_STRICT		0x04 /* Perform strict header checks */
 #define	NLF_MSG_INFO		0x08 /* Send caller info along with the notifications */
 
 SYSCTL_DECL(_net_netlink);
 SYSCTL_DECL(_net_netlink_debug);
 
 struct nl_control {
 	CK_LIST_HEAD(nl_pid_head, nlpcb)	ctl_port_head;
 	CK_LIST_HEAD(nlpcb_head, nlpcb)		ctl_pcb_head;
 	CK_LIST_ENTRY(nl_control)		ctl_next;
 	struct rmlock				ctl_lock;
 };
 VNET_DECLARE(struct nl_control *, nl_ctl);
 #define	V_nl_ctl	VNET(nl_ctl)
 
 
 struct sockaddr_nl;
 struct sockaddr;
 struct nlmsghdr;
 
 /* netlink_module.c */
 struct nl_control *vnet_nl_ctl_init(void);
 
 int nl_verify_proto(int proto);
 const char *nl_get_proto_name(int proto);
 
 extern int netlink_unloading;
 
 struct nl_proto_handler {
 	nl_handler_f	cb;
 	const char	*proto_name;
 };
 extern struct nl_proto_handler *nl_handlers;
 
 /* netlink_domain.c */
-void nl_send_group(struct mbuf *m, int cnt, int proto, int group_id);
+bool nl_send_group(struct nl_writer *);
 void nl_osd_register(void);
 void nl_osd_unregister(void);
 void nl_set_thread_nlp(struct thread *td, struct nlpcb *nlp);
 
 /* netlink_io.c */
 #define	NL_IOF_UNTRANSLATED	0x01
 #define	NL_IOF_IGNORE_LIMIT	0x02
-bool nl_send_one(struct mbuf *m, struct nlpcb *nlp, int cnt, int io_flags);
+bool nl_send_one(struct nl_writer *);
 void nlmsg_ack(struct nlpcb *nlp, int error, struct nlmsghdr *nlmsg,
     struct nl_pstate *npt);
 void nl_on_transmit(struct nlpcb *nlp);
-void nl_init_io(struct nlpcb *nlp);
-void nl_free_io(struct nlpcb *nlp);
 
 void nl_taskqueue_handler(void *_arg, int pending);
 void nl_schedule_taskqueue(struct nlpcb *nlp);
 void nl_process_receive_locked(struct nlpcb *nlp);
 void nl_set_source_metadata(struct mbuf *m, int num_messages);
-void nl_add_msg_info(struct mbuf *m);
-
-/* netlink_message_writer.c */
-void nl_init_msg_zone(void);
-void nl_destroy_msg_zone(void);
+void nl_add_msg_info(struct nl_buf *nb);
+struct nl_buf *nl_buf_alloc(size_t len, int mflag);
+void nl_buf_free(struct nl_buf *nb);
 
 /* netlink_generic.c */
 struct genl_family {
 	const char	*family_name;
 	uint16_t	family_hdrsize;
 	uint16_t	family_id;
 	uint16_t	family_version;
 	uint16_t	family_attr_max;
 	uint16_t	family_cmd_size;
 	uint16_t	family_num_groups;
 	struct genl_cmd	*family_cmds;
 };
 
 struct genl_group {
 	struct genl_family	*group_family;
 	const char		*group_name;
 };
 
 struct genl_family *genl_get_family(uint32_t family_id);
 struct genl_group *genl_get_group(uint32_t group_id);
 
 #define	MAX_FAMILIES	20
 #define	MAX_GROUPS	64
 
 #define	MIN_GROUP_NUM	48
 
 #define	CTRL_FAMILY_NAME	"nlctrl"
 
 struct ifnet;
 struct nl_parsed_link;
 struct nlattr_bmask;
 struct nl_pstate;
 
 /* Function map */
 struct nl_function_wrapper {
 	bool (*nlmsg_add)(struct nl_writer *nw, uint32_t portid, uint32_t seq, uint16_t type,
 	    uint16_t flags, uint32_t len);
 	bool (*nlmsg_refill_buffer)(struct nl_writer *nw, int required_len);
 	bool (*nlmsg_flush)(struct nl_writer *nw);
 	bool (*nlmsg_end)(struct nl_writer *nw);
 	void (*nlmsg_abort)(struct nl_writer *nw);
 	void (*nlmsg_ignore_limit)(struct nl_writer *nw);
 	bool (*nlmsg_get_unicast_writer)(struct nl_writer *nw, int size, struct nlpcb *nlp);
 	bool (*nlmsg_get_group_writer)(struct nl_writer *nw, int size, int protocol, int group_id);
 	bool (*nlmsg_get_chain_writer)(struct nl_writer *nw, int size, struct mbuf **pm);
 	bool (*nlmsg_end_dump)(struct nl_writer *nw, int error, struct nlmsghdr *hdr);
 	int (*nl_modify_ifp_generic)(struct ifnet *ifp, struct nl_parsed_link *lattrs,
 	    const struct nlattr_bmask *bm, struct nl_pstate *npt);
 	void (*nl_store_ifp_cookie)(struct nl_pstate *npt, struct ifnet *ifp);
 	struct nlpcb * (*nl_get_thread_nlp)(struct  thread *td);
 };
 void nl_set_functions(const struct nl_function_wrapper *nl);
 
 
 
 #endif
 #endif
diff --git a/sys/netlink/route/rt.c b/sys/netlink/route/rt.c
index ed09748995dc..ffa06fb4c1ab 100644
--- a/sys/netlink/route/rt.c
+++ b/sys/netlink/route/rt.c
@@ -1,1119 +1,1117 @@
 /*-
  * SPDX-License-Identifier: BSD-2-Clause
  *
  * Copyright (c) 2021 Ng Peng Nam Sean
  * Copyright (c) 2022 Alexander V. Chernikov <melifaro@FreeBSD.org>
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  */
 
 #include <sys/cdefs.h>
 #include "opt_inet.h"
 #include "opt_inet6.h"
 #include "opt_route.h"
 #include <sys/types.h>
 #include <sys/malloc.h>
 #include <sys/rmlock.h>
 #include <sys/socket.h>
 
 #include <net/if.h>
 #include <net/route.h>
 #include <net/route/nhop.h>
 #include <net/route/route_ctl.h>
 #include <net/route/route_var.h>
 #include <netinet6/scope6_var.h>
 #include <netlink/netlink.h>
 #include <netlink/netlink_ctl.h>
 #include <netlink/netlink_route.h>
 #include <netlink/route/route_var.h>
 
 #define	DEBUG_MOD_NAME	nl_route
 #define	DEBUG_MAX_LEVEL	LOG_DEBUG3
 #include <netlink/netlink_debug.h>
 _DECLARE_DEBUG(LOG_INFO);
 
 static unsigned char
 get_rtm_type(const struct nhop_object *nh)
 {
 	int nh_flags = nh->nh_flags;
 
 	/* Use the fact that nhg runtime flags are only NHF_MULTIPATH */
 	if (nh_flags & NHF_BLACKHOLE)
 		return (RTN_BLACKHOLE);
 	else if (nh_flags & NHF_REJECT)
 		return (RTN_PROHIBIT);
 	return (RTN_UNICAST);
 }
 
 static uint8_t
 nl_get_rtm_protocol(const struct nhop_object *nh)
 {
 #ifdef ROUTE_MPATH
 	if (NH_IS_NHGRP(nh)) {
 		const struct nhgrp_object *nhg = (const struct nhgrp_object *)nh;
 		uint8_t origin = nhgrp_get_origin(nhg);
 		if (origin != RTPROT_UNSPEC)
 			return (origin);
 		nh = nhg->nhops[0];
 	}
 #endif
 	uint8_t origin = nhop_get_origin(nh);
 	if (origin != RTPROT_UNSPEC)
 		return (origin);
 	/* TODO: remove guesswork once all kernel users fill in origin */
 	int rt_flags = nhop_get_rtflags(nh);
 	if (rt_flags & RTF_PROTO1)
 		return (RTPROT_ZEBRA);
 	if (rt_flags & RTF_STATIC)
 		return (RTPROT_STATIC);
 	return (RTPROT_KERNEL);
 }
 
 static int
 get_rtmsg_type_from_rtsock(int cmd)
 {
 	switch (cmd) {
 	case RTM_ADD:
 	case RTM_CHANGE:
 	case RTM_GET:
 		return NL_RTM_NEWROUTE;
 	case RTM_DELETE:
 		return NL_RTM_DELROUTE;
 	}
 
 	return (0);
 }
 
 /*
  * fibnum heuristics
  *
  * if (dump && rtm_table == 0 && !rta_table) RT_ALL_FIBS
  * msg                rtm_table     RTA_TABLE            result
  * RTM_GETROUTE/dump          0             -       RT_ALL_FIBS
  * RTM_GETROUTE/dump          1             -                 1
  * RTM_GETROUTE/get           0             -                 0
  *
  */
 
 static struct nhop_object *
 rc_get_nhop(const struct rib_cmd_info *rc)
 {
 	return ((rc->rc_cmd == RTM_DELETE) ? rc->rc_nh_old : rc->rc_nh_new);
 }
 
 static void
 dump_rc_nhop_gw(struct nl_writer *nw, const struct nhop_object *nh)
 {
 #ifdef INET6
 	int upper_family;
 #endif
 
 	switch (nhop_get_neigh_family(nh)) {
 	case AF_LINK:
 		/* onlink prefix, skip */
 		break;
 	case AF_INET:
 		nlattr_add(nw, NL_RTA_GATEWAY, 4, &nh->gw4_sa.sin_addr);
 		break;
 #ifdef INET6
 	case AF_INET6:
 		upper_family = nhop_get_upper_family(nh);
 		if (upper_family == AF_INET6) {
 			struct in6_addr gw6 = nh->gw6_sa.sin6_addr;
 			in6_clearscope(&gw6);
 
 			nlattr_add(nw, NL_RTA_GATEWAY, 16, &gw6);
 		} else if (upper_family == AF_INET) {
 			/* IPv4 over IPv6 */
 			struct in6_addr gw6 = nh->gw6_sa.sin6_addr;
 			in6_clearscope(&gw6);
 
 			char buf[20];
 			struct rtvia *via = (struct rtvia *)&buf[0];
 			via->rtvia_family = AF_INET6;
 			memcpy(via->rtvia_addr, &gw6, 16);
 			nlattr_add(nw, NL_RTA_VIA, 17, via);
 		}
 		break;
 #endif
 	}
 }
 
 static void
 dump_rc_nhop_mtu(struct nl_writer *nw, const struct nhop_object *nh)
 {
 	int nla_len = sizeof(struct nlattr) * 2 + sizeof(uint32_t);
 	struct nlattr *nla = nlmsg_reserve_data(nw, nla_len, struct nlattr);
 
 	if (nla == NULL)
 		return;
 	nla->nla_type = NL_RTA_METRICS;
 	nla->nla_len = nla_len;
 	nla++;
 	nla->nla_type = NL_RTAX_MTU;
 	nla->nla_len = sizeof(struct nlattr) + sizeof(uint32_t);
 	*((uint32_t *)(nla + 1)) = nh->nh_mtu;
 }
 
 #ifdef ROUTE_MPATH
 static void
 dump_rc_nhg(struct nl_writer *nw, const struct nhgrp_object *nhg, struct rtmsg *rtm)
 {
 	uint32_t uidx = nhgrp_get_uidx(nhg);
 	uint32_t num_nhops;
 	const struct weightened_nhop *wn = nhgrp_get_nhops(nhg, &num_nhops);
 	uint32_t base_rtflags = nhop_get_rtflags(wn[0].nh);
 
 	if (uidx != 0)
 		nlattr_add_u32(nw, NL_RTA_NH_ID, uidx);
 	nlattr_add_u32(nw, NL_RTA_KNH_ID, nhgrp_get_idx(nhg));
 
 	nlattr_add_u32(nw, NL_RTA_RTFLAGS, base_rtflags);
 	int off = nlattr_add_nested(nw, NL_RTA_MULTIPATH);
 	if (off == 0)
 		return;
 
 	for (int i = 0; i < num_nhops; i++) {
 		int nh_off = nlattr_save_offset(nw);
 		struct rtnexthop *rtnh = nlmsg_reserve_object(nw, struct rtnexthop);
 		if (rtnh == NULL)
 			return;
 		rtnh->rtnh_flags = 0;
 		rtnh->rtnh_ifindex = if_getindex(wn[i].nh->nh_ifp);
 		rtnh->rtnh_hops = wn[i].weight;
 		dump_rc_nhop_gw(nw, wn[i].nh);
 		uint32_t rtflags = nhop_get_rtflags(wn[i].nh);
 		if (rtflags != base_rtflags)
 			nlattr_add_u32(nw, NL_RTA_RTFLAGS, rtflags);
 		if (rtflags & RTF_FIXEDMTU)
 			dump_rc_nhop_mtu(nw, wn[i].nh);
 		rtnh = nlattr_restore_offset(nw, nh_off, struct rtnexthop);
 		/*
 		 * nlattr_add() allocates 4-byte aligned storage, no need to aligh
 		 * length here
 		 * */
 		rtnh->rtnh_len = nlattr_save_offset(nw) - nh_off;
 	}
 	nlattr_set_len(nw, off);
 }
 #endif
 
 static void
 dump_rc_nhop(struct nl_writer *nw, const struct route_nhop_data *rnd, struct rtmsg *rtm)
 {
 #ifdef ROUTE_MPATH
 	if (NH_IS_NHGRP(rnd->rnd_nhop)) {
 		dump_rc_nhg(nw, rnd->rnd_nhgrp, rtm);
 		return;
 	}
 #endif
 	const struct nhop_object *nh = rnd->rnd_nhop;
 	uint32_t rtflags = nhop_get_rtflags(nh);
 
 	/*
 	 * IPv4 over IPv6
 	 *    ('RTA_VIA', {'family': 10, 'addr': 'fe80::20c:29ff:fe67:2dd'}), ('RTA_OIF', 2),
 	 * IPv4 w/ gw
 	 *    ('RTA_GATEWAY', '172.16.107.131'), ('RTA_OIF', 2)],
 	 * Direct route:
 	 *    ('RTA_OIF', 2)
 	 */
 	if (nh->nh_flags & NHF_GATEWAY)
 		dump_rc_nhop_gw(nw, nh);
 
 	uint32_t uidx = nhop_get_uidx(nh);
 	if (uidx != 0)
 		nlattr_add_u32(nw, NL_RTA_NH_ID, uidx);
 	nlattr_add_u32(nw, NL_RTA_KNH_ID, nhop_get_idx(nh));
 	nlattr_add_u32(nw, NL_RTA_RTFLAGS, rtflags);
 
 	if (rtflags & RTF_FIXEDMTU)
 		dump_rc_nhop_mtu(nw, nh);
 	uint32_t nh_expire = nhop_get_expire(nh);
 	if (nh_expire > 0)
 		nlattr_add_u32(nw, NL_RTA_EXPIRES, nh_expire - time_uptime);
 
 	/* In any case, fill outgoing interface */
 	nlattr_add_u32(nw, NL_RTA_OIF, if_getindex(nh->nh_ifp));
 
 	if (rnd->rnd_weight != RT_DEFAULT_WEIGHT)
 		nlattr_add_u32(nw, NL_RTA_WEIGHT, rnd->rnd_weight);
 }
 
 /*
  * Dumps output from a rib command into an rtmsg
  */
 
 static int
 dump_px(uint32_t fibnum, const struct nlmsghdr *hdr,
     const struct rtentry *rt, struct route_nhop_data *rnd,
     struct nl_writer *nw)
 {
 	struct rtmsg *rtm;
 	int error = 0;
 
 	NET_EPOCH_ASSERT();
 
 	if (!nlmsg_reply(nw, hdr, sizeof(struct rtmsg)))
 		goto enomem;
 
 	int family = rt_get_family(rt);
 	int rtm_off = nlattr_save_offset(nw);
 	rtm = nlmsg_reserve_object(nw, struct rtmsg);
 	rtm->rtm_family = family;
 	rtm->rtm_dst_len = 0;
 	rtm->rtm_src_len = 0;
 	rtm->rtm_tos = 0;
 	if (fibnum < 255)
 		rtm->rtm_table = (unsigned char)fibnum;
 	rtm->rtm_scope = RT_SCOPE_UNIVERSE;
 	rtm->rtm_protocol = nl_get_rtm_protocol(rnd->rnd_nhop);
 	rtm->rtm_type = get_rtm_type(rnd->rnd_nhop);
 
 	nlattr_add_u32(nw, NL_RTA_TABLE, fibnum);
 
 	int plen = 0;
 #if defined(INET) || defined(INET6)
 	uint32_t scopeid;
 #endif
 	switch (family) {
 #ifdef INET
 	case AF_INET:
 		{
 			struct in_addr addr;
 			rt_get_inet_prefix_plen(rt, &addr, &plen, &scopeid);
 			nlattr_add(nw, NL_RTA_DST, 4, &addr);
 			break;
 		}
 #endif
 #ifdef INET6
 	case AF_INET6:
 		{
 			struct in6_addr addr;
 			rt_get_inet6_prefix_plen(rt, &addr, &plen, &scopeid);
 			nlattr_add(nw, NL_RTA_DST, 16, &addr);
 			break;
 		}
 #endif
 	default:
 		FIB_LOG(LOG_NOTICE, fibnum, family, "unsupported rt family: %d", family);
 		error = EAFNOSUPPORT;
 		goto flush;
 	}
 
 	rtm = nlattr_restore_offset(nw, rtm_off, struct rtmsg);
 	if (plen > 0)
 		rtm->rtm_dst_len = plen;
 	dump_rc_nhop(nw, rnd, rtm);
 
 	if (nlmsg_end(nw))
 		return (0);
 enomem:
 	error = ENOMEM;
 flush:
 	nlmsg_abort(nw);
 	return (error);
 }
 
 static int
 family_to_group(int family)
 {
 	switch (family) {
 	case AF_INET:
 		return (RTNLGRP_IPV4_ROUTE);
 	case AF_INET6:
 		return (RTNLGRP_IPV6_ROUTE);
 	}
 	return (0);
 }
 
 static void
 report_operation(uint32_t fibnum, struct rib_cmd_info *rc,
     struct nlpcb *nlp, struct nlmsghdr *hdr)
 {
 	struct nl_writer nw = {};
 	uint32_t group_id = family_to_group(rt_get_family(rc->rc_rt));
 
 	if (nlmsg_get_group_writer(&nw, NLMSG_SMALL, NETLINK_ROUTE, group_id)) {
 		struct route_nhop_data rnd = {
 			.rnd_nhop = rc_get_nhop(rc),
 			.rnd_weight = rc->rc_nh_weight,
 		};
 		hdr->nlmsg_flags &= ~(NLM_F_REPLACE | NLM_F_CREATE);
 		hdr->nlmsg_flags &= ~(NLM_F_EXCL | NLM_F_APPEND);
 		switch (rc->rc_cmd) {
 		case RTM_ADD:
 			hdr->nlmsg_type = NL_RTM_NEWROUTE;
 			hdr->nlmsg_flags |= NLM_F_CREATE | NLM_F_EXCL;
 			break;
 		case RTM_CHANGE:
 			hdr->nlmsg_type = NL_RTM_NEWROUTE;
 			hdr->nlmsg_flags |= NLM_F_REPLACE;
 			break;
 		case RTM_DELETE:
 			hdr->nlmsg_type = NL_RTM_DELROUTE;
 			break;
 		}
 		dump_px(fibnum, hdr, rc->rc_rt, &rnd, &nw);
 		nlmsg_flush(&nw);
 	}
 
 	rtsock_callback_p->route_f(fibnum, rc);
 }
 
 static void
 set_scope6(struct sockaddr *sa, struct ifnet *ifp)
 {
 #ifdef INET6
 	if (sa != NULL && sa->sa_family == AF_INET6 && ifp != NULL) {
 		struct sockaddr_in6 *sa6 = (struct sockaddr_in6 *)sa;
 
 		if (IN6_IS_ADDR_LINKLOCAL(&sa6->sin6_addr))
 			in6_set_unicast_scopeid(&sa6->sin6_addr, if_getindex(ifp));
 	}
 #endif
 }
 
 struct rta_mpath_nh {
 	struct sockaddr	*gw;
 	struct ifnet	*ifp;
 	uint8_t		rtnh_flags;
 	uint8_t		rtnh_weight;
 };
 
 #define	_IN(_field)	offsetof(struct rtnexthop, _field)
 #define	_OUT(_field)	offsetof(struct rta_mpath_nh, _field)
 const static struct nlattr_parser nla_p_rtnh[] = {
 	{ .type = NL_RTA_GATEWAY, .off = _OUT(gw), .cb = nlattr_get_ip },
 	{ .type = NL_RTA_VIA, .off = _OUT(gw), .cb = nlattr_get_ipvia },
 };
 const static struct nlfield_parser nlf_p_rtnh[] = {
 	{ .off_in = _IN(rtnh_flags), .off_out = _OUT(rtnh_flags), .cb = nlf_get_u8 },
 	{ .off_in = _IN(rtnh_hops), .off_out = _OUT(rtnh_weight), .cb = nlf_get_u8 },
 	{ .off_in = _IN(rtnh_ifindex), .off_out = _OUT(ifp), .cb = nlf_get_ifpz },
 };
 #undef _IN
 #undef _OUT
 
 static bool
 post_p_rtnh(void *_attrs, struct nl_pstate *npt __unused)
 {
 	struct rta_mpath_nh *attrs = (struct rta_mpath_nh *)_attrs;
 
 	set_scope6(attrs->gw, attrs->ifp);
 	return (true);
 }
 NL_DECLARE_PARSER_EXT(mpath_parser, struct rtnexthop, NULL, nlf_p_rtnh, nla_p_rtnh, post_p_rtnh);
 
 struct rta_mpath {
 	int num_nhops;
 	struct rta_mpath_nh nhops[0];
 };
 
 static int
 nlattr_get_multipath(struct nlattr *nla, struct nl_pstate *npt, const void *arg, void *target)
 {
 	int data_len = nla->nla_len - sizeof(struct nlattr);
 	struct rtnexthop *rtnh;
 
 	int max_nhops = data_len / sizeof(struct rtnexthop);
 
 	struct rta_mpath *mp = npt_alloc(npt, (max_nhops + 2) * sizeof(struct rta_mpath_nh));
 	mp->num_nhops = 0;
 
 	for (rtnh = (struct rtnexthop *)(nla + 1); data_len > 0; ) {
 		struct rta_mpath_nh *mpnh = &mp->nhops[mp->num_nhops++];
 
 		int error = nl_parse_header(rtnh, rtnh->rtnh_len, &mpath_parser,
 		    npt, mpnh);
 		if (error != 0) {
 			NLMSG_REPORT_ERR_MSG(npt, "RTA_MULTIPATH: nexhop %d: parse failed",
 			    mp->num_nhops - 1);
 			return (error);
 		}
 
 		int len = NL_ITEM_ALIGN(rtnh->rtnh_len);
 		data_len -= len;
 		rtnh = (struct rtnexthop *)((char *)rtnh + len);
 	}
 	if (data_len != 0 || mp->num_nhops == 0) {
 		NLMSG_REPORT_ERR_MSG(npt, "invalid RTA_MULTIPATH attr");
 		return (EINVAL);
 	}
 
 	*((struct rta_mpath **)target) = mp;
 	return (0);
 }
 
 
 struct nl_parsed_route {
 	struct sockaddr		*rta_dst;
 	struct sockaddr		*rta_gw;
 	struct ifnet		*rta_oif;
 	struct rta_mpath	*rta_multipath;
 	uint32_t		rta_table;
 	uint32_t		rta_rtflags;
 	uint32_t		rta_nh_id;
 	uint32_t		rta_weight;
 	uint32_t		rtax_mtu;
 	uint8_t			rtm_family;
 	uint8_t			rtm_dst_len;
 	uint8_t			rtm_protocol;
 	uint8_t			rtm_type;
 	uint32_t		rtm_flags;
 };
 
 #define	_IN(_field)	offsetof(struct rtmsg, _field)
 #define	_OUT(_field)	offsetof(struct nl_parsed_route, _field)
 static struct nlattr_parser nla_p_rtmetrics[] = {
 	{ .type = NL_RTAX_MTU, .off = _OUT(rtax_mtu), .cb = nlattr_get_uint32 },
 };
 NL_DECLARE_ATTR_PARSER(metrics_parser, nla_p_rtmetrics);
 
 static const struct nlattr_parser nla_p_rtmsg[] = {
 	{ .type = NL_RTA_DST, .off = _OUT(rta_dst), .cb = nlattr_get_ip },
 	{ .type = NL_RTA_OIF, .off = _OUT(rta_oif), .cb = nlattr_get_ifp },
 	{ .type = NL_RTA_GATEWAY, .off = _OUT(rta_gw), .cb = nlattr_get_ip },
 	{ .type = NL_RTA_METRICS, .arg = &metrics_parser, .cb = nlattr_get_nested },
 	{ .type = NL_RTA_MULTIPATH, .off = _OUT(rta_multipath), .cb = nlattr_get_multipath },
 	{ .type = NL_RTA_WEIGHT, .off = _OUT(rta_weight), .cb = nlattr_get_uint32 },
 	{ .type = NL_RTA_RTFLAGS, .off = _OUT(rta_rtflags), .cb = nlattr_get_uint32 },
 	{ .type = NL_RTA_TABLE, .off = _OUT(rta_table), .cb = nlattr_get_uint32 },
 	{ .type = NL_RTA_VIA, .off = _OUT(rta_gw), .cb = nlattr_get_ipvia },
 	{ .type = NL_RTA_NH_ID, .off = _OUT(rta_nh_id), .cb = nlattr_get_uint32 },
 };
 
 static const struct nlfield_parser nlf_p_rtmsg[] = {
 	{ .off_in = _IN(rtm_family), .off_out = _OUT(rtm_family), .cb = nlf_get_u8 },
 	{ .off_in = _IN(rtm_dst_len), .off_out = _OUT(rtm_dst_len), .cb = nlf_get_u8 },
 	{ .off_in = _IN(rtm_protocol), .off_out = _OUT(rtm_protocol), .cb = nlf_get_u8 },
 	{ .off_in = _IN(rtm_type), .off_out = _OUT(rtm_type), .cb = nlf_get_u8 },
 	{ .off_in = _IN(rtm_flags), .off_out = _OUT(rtm_flags), .cb = nlf_get_u32 },
 };
 #undef _IN
 #undef _OUT
 
 static bool
 post_p_rtmsg(void *_attrs, struct nl_pstate *npt __unused)
 {
 	struct nl_parsed_route *attrs = (struct nl_parsed_route *)_attrs;
 
 	set_scope6(attrs->rta_dst, attrs->rta_oif);
 	set_scope6(attrs->rta_gw, attrs->rta_oif);
 	return (true);
 }
 NL_DECLARE_PARSER_EXT(rtm_parser, struct rtmsg, NULL, nlf_p_rtmsg, nla_p_rtmsg, post_p_rtmsg);
 
 struct netlink_walkargs {
 	struct nl_writer *nw;
 	struct route_nhop_data rnd;
 	struct nlmsghdr hdr;
 	struct nlpcb *nlp;
 	uint32_t fibnum;
 	int family;
 	int error;
 	int count;
 	int dumped;
 	int dumped_tables;
 };
 
 static int
 dump_rtentry(struct rtentry *rt, void *_arg)
 {
 	struct netlink_walkargs *wa = (struct netlink_walkargs *)_arg;
 	int error;
 
 	wa->count++;
 	if (wa->error != 0)
 		return (0);
 	if (!rt_is_exportable(rt, nlp_get_cred(wa->nlp)))
 		return (0);
 	wa->dumped++;
 
 	rt_get_rnd(rt, &wa->rnd);
 
 	error = dump_px(wa->fibnum, &wa->hdr, rt, &wa->rnd, wa->nw);
 
 	IF_DEBUG_LEVEL(LOG_DEBUG3) {
 		char rtbuf[INET6_ADDRSTRLEN + 5];
 		FIB_LOG(LOG_DEBUG3, wa->fibnum, wa->family,
-		    "Dump %s, offset %u, error %d",
-		    rt_print_buf(rt, rtbuf, sizeof(rtbuf)),
-		    wa->nw->offset, error);
+		    "Dump %s, error %d",
+		    rt_print_buf(rt, rtbuf, sizeof(rtbuf)), error);
 	}
 	wa->error = error;
 
 	return (0);
 }
 
 static void
 dump_rtable_one(struct netlink_walkargs *wa, uint32_t fibnum, int family)
 {
 	FIB_LOG(LOG_DEBUG2, fibnum, family, "Start dump");
 	wa->count = 0;
 	wa->dumped = 0;
 
 	rib_walk(fibnum, family, false, dump_rtentry, wa);
 
 	wa->dumped_tables++;
 
 	FIB_LOG(LOG_DEBUG2, fibnum, family, "End dump, iterated %d dumped %d",
 	    wa->count, wa->dumped);
-	NL_LOG(LOG_DEBUG2, "Current offset: %d", wa->nw->offset);
 }
 
 static int
 dump_rtable_fib(struct netlink_walkargs *wa, uint32_t fibnum, int family)
 {
 	wa->fibnum = fibnum;
 
 	if (family == AF_UNSPEC) {
 		for (int i = 0; i < AF_MAX; i++) {
 			if (rt_tables_get_rnh(fibnum, i) != 0) {
 				wa->family = i;
 				dump_rtable_one(wa, fibnum, i);
 				if (wa->error != 0)
 					break;
 			}
 		}
 	} else {
 		if (rt_tables_get_rnh(fibnum, family) != 0) {
 			wa->family = family;
 			dump_rtable_one(wa, fibnum, family);
 		}
 	}
 
 	return (wa->error);
 }
 
 static int
 handle_rtm_getroute(struct nlpcb *nlp, struct nl_parsed_route *attrs,
     struct nlmsghdr *hdr, struct nl_pstate *npt)
 {
 	RIB_RLOCK_TRACKER;
 	struct rib_head *rnh;
 	const struct rtentry *rt;
 	struct route_nhop_data rnd;
 	uint32_t fibnum = attrs->rta_table;
 	sa_family_t family = attrs->rtm_family;
 
 	if (attrs->rta_dst == NULL) {
 		NLMSG_REPORT_ERR_MSG(npt, "No RTA_DST supplied");
 			return (EINVAL);
 	}
 
 	rnh = rt_tables_get_rnh(fibnum, family);
 	if (rnh == NULL)
 		return (EAFNOSUPPORT);
 
 	RIB_RLOCK(rnh);
 
 	struct sockaddr *dst = attrs->rta_dst;
 
 	if (attrs->rtm_flags & RTM_F_PREFIX)
 		rt = rib_lookup_prefix_plen(rnh, dst, attrs->rtm_dst_len, &rnd);
 	else
 		rt = (const struct rtentry *)rnh->rnh_matchaddr(dst, &rnh->head);
 	if (rt == NULL) {
 		RIB_RUNLOCK(rnh);
 		return (ESRCH);
 	}
 
 	rt_get_rnd(rt, &rnd);
 	rnd.rnd_nhop = nhop_select_func(rnd.rnd_nhop, 0);
 
 	RIB_RUNLOCK(rnh);
 
 	if (!rt_is_exportable(rt, nlp_get_cred(nlp)))
 		return (ESRCH);
 
 	IF_DEBUG_LEVEL(LOG_DEBUG2) {
 		char rtbuf[NHOP_PRINT_BUFSIZE] __unused, nhbuf[NHOP_PRINT_BUFSIZE] __unused;
 		FIB_LOG(LOG_DEBUG2, fibnum, family, "getroute completed: got %s for %s",
 		    nhop_print_buf_any(rnd.rnd_nhop, nhbuf, sizeof(nhbuf)),
 		    rt_print_buf(rt, rtbuf, sizeof(rtbuf)));
 	}
 
 	hdr->nlmsg_type = NL_RTM_NEWROUTE;
 	dump_px(fibnum, hdr, rt, &rnd, npt->nw);
 
 	return (0);
 }
 
 static int
 handle_rtm_dump(struct nlpcb *nlp, uint32_t fibnum, int family,
     struct nlmsghdr *hdr, struct nl_writer *nw)
 {
 	struct netlink_walkargs wa = {
 		.nlp = nlp,
 		.nw = nw,
 		.hdr.nlmsg_pid = hdr->nlmsg_pid,
 		.hdr.nlmsg_seq = hdr->nlmsg_seq,
 		.hdr.nlmsg_type = NL_RTM_NEWROUTE,
 		.hdr.nlmsg_flags = hdr->nlmsg_flags | NLM_F_MULTI,
 	};
 
 	if (fibnum == RT_TABLE_UNSPEC) {
 		for (int i = 0; i < V_rt_numfibs; i++) {
 			dump_rtable_fib(&wa, fibnum, family);
 			if (wa.error != 0)
 				break;
 		}
 	} else
 		dump_rtable_fib(&wa, fibnum, family);
 
 	if (wa.error == 0 && wa.dumped_tables == 0) {
 		FIB_LOG(LOG_DEBUG, fibnum, family, "incorrect fibnum/family");
 		wa.error = ESRCH;
 		// How do we propagate it?
 	}
 
 	if (!nlmsg_end_dump(wa.nw, wa.error, &wa.hdr)) {
                 NL_LOG(LOG_DEBUG, "Unable to finalize the dump");
                 return (ENOMEM);
         }
 
 	return (wa.error);
 }
 
 static struct nhop_object *
 finalize_nhop(struct nhop_object *nh, const struct sockaddr *dst, int *perror)
 {
 	/*
 	 * The following MUST be filled:
 	 *  nh_ifp, nh_ifa, nh_gw
 	 */
 	if (nh->gw_sa.sa_family == 0) {
 		/*
 		 * Empty gateway. Can be direct route with RTA_OIF set.
 		 */
 		if (nh->nh_ifp != NULL)
 			nhop_set_direct_gw(nh, nh->nh_ifp);
 		else {
 			NL_LOG(LOG_DEBUG, "empty gateway and interface, skipping");
 			*perror = EINVAL;
 			return (NULL);
 		}
 		/* Both nh_ifp and gateway are set */
 	} else {
 		/* Gateway is set up, we can derive ifp if not set */
 		if (nh->nh_ifp == NULL) {
 			uint32_t fibnum = nhop_get_fibnum(nh);
 			uint32_t flags = 0;
 
 			if (nh->nh_flags & NHF_GATEWAY)
 				flags = RTF_GATEWAY;
 			else if (nh->nh_flags & NHF_HOST)
 				flags = RTF_HOST;
 
 			struct ifaddr *ifa = ifa_ifwithroute(flags, dst, &nh->gw_sa, fibnum);
 			if (ifa == NULL) {
 				NL_LOG(LOG_DEBUG, "Unable to determine ifp, skipping");
 				*perror = EINVAL;
 				return (NULL);
 			}
 			nhop_set_transmit_ifp(nh, ifa->ifa_ifp);
 		}
 	}
 	/* Both nh_ifp and gateway are set */
 	if (nh->nh_ifa == NULL) {
 		const struct sockaddr *gw_sa = &nh->gw_sa;
 
 		if (gw_sa->sa_family != dst->sa_family) {
 			/*
 			 * Use dst as the target for determining the default
 			 * preferred ifa IF
 			 * 1) the gateway is link-level (e.g. direct route)
 			 * 2) the gateway family is different (e.g. IPv4 over IPv6).
 			 */
 			gw_sa = dst;
 		}
 
 		struct ifaddr *ifa = ifaof_ifpforaddr(gw_sa, nh->nh_ifp);
 		if (ifa == NULL) {
 			/* Try link-level ifa. */
 			gw_sa = &nh->gw_sa;
 			ifa = ifaof_ifpforaddr(gw_sa, nh->nh_ifp);
 			if (ifa == NULL) {
 				NL_LOG(LOG_DEBUG, "Unable to determine ifa, skipping");
 				*perror = EINVAL;
 				return (NULL);
 			}
 		}
 		nhop_set_src(nh, ifa);
 	}
 
 	return (nhop_get_nhop(nh, perror));
 }
 
 static int
 get_pxflag(const struct nl_parsed_route *attrs)
 {
 	int pxflag = 0;
 	switch (attrs->rtm_family) {
 	case AF_INET:
 		if (attrs->rtm_dst_len == 32)
 			pxflag = NHF_HOST;
 		else if (attrs->rtm_dst_len == 0)
 			pxflag = NHF_DEFAULT;
 		break;
 	case AF_INET6:
 		if (attrs->rtm_dst_len == 128)
 			pxflag = NHF_HOST;
 		else if (attrs->rtm_dst_len == 0)
 			pxflag = NHF_DEFAULT;
 		break;
 	}
 
 	return (pxflag);
 }
 
 static int
 get_op_flags(int nlm_flags)
 {
 	int op_flags = 0;
 
 	op_flags |= (nlm_flags & NLM_F_REPLACE) ? RTM_F_REPLACE : 0;
 	op_flags |= (nlm_flags & NLM_F_EXCL) ? RTM_F_EXCL : 0;
 	op_flags |= (nlm_flags & NLM_F_CREATE) ? RTM_F_CREATE : 0;
 	op_flags |= (nlm_flags & NLM_F_APPEND) ? RTM_F_APPEND : 0;
 
 	return (op_flags);
 }
 
 #ifdef ROUTE_MPATH
 static int
 create_nexthop_one(struct nl_parsed_route *attrs, struct rta_mpath_nh *mpnh,
     struct nl_pstate *npt, struct nhop_object **pnh)
 {
 	int error;
 
 	if (mpnh->gw == NULL)
 		return (EINVAL);
 
 	struct nhop_object *nh = nhop_alloc(attrs->rta_table, attrs->rtm_family);
 	if (nh == NULL)
 		return (ENOMEM);
 
 	error = nl_set_nexthop_gw(nh, mpnh->gw, mpnh->ifp, npt);
 	if (error != 0) {
 		nhop_free(nh);
 		return (error);
 	}
 	if (mpnh->ifp != NULL)
 		nhop_set_transmit_ifp(nh, mpnh->ifp);
 	nhop_set_pxtype_flag(nh, get_pxflag(attrs));
 	nhop_set_rtflags(nh, attrs->rta_rtflags);
 	if (attrs->rtm_protocol > RTPROT_STATIC)
 		nhop_set_origin(nh, attrs->rtm_protocol);
 
 	*pnh = finalize_nhop(nh, attrs->rta_dst, &error);
 
 	return (error);
 }
 #endif
 
 static struct nhop_object *
 create_nexthop_from_attrs(struct nl_parsed_route *attrs,
     struct nl_pstate *npt, int *perror)
 {
 	struct nhop_object *nh = NULL;
 	int error = 0;
 
 	if (attrs->rta_multipath != NULL) {
 #ifdef ROUTE_MPATH
 		/* Multipath w/o explicit nexthops */
 		int num_nhops = attrs->rta_multipath->num_nhops;
 		struct weightened_nhop *wn = npt_alloc(npt, sizeof(*wn) * num_nhops);
 
 		for (int i = 0; i < num_nhops; i++) {
 			struct rta_mpath_nh *mpnh = &attrs->rta_multipath->nhops[i];
 
 			error = create_nexthop_one(attrs, mpnh, npt, &wn[i].nh);
 			if (error != 0) {
 				for (int j = 0; j < i; j++)
 					nhop_free(wn[j].nh);
 				break;
 			}
 			wn[i].weight = mpnh->rtnh_weight > 0 ? mpnh->rtnh_weight : 1;
 		}
 		if (error == 0) {
 			struct rib_head *rh = nhop_get_rh(wn[0].nh);
 			struct nhgrp_object *nhg;
 
 			nhg = nhgrp_alloc(rh->rib_fibnum, rh->rib_family,
 			    wn, num_nhops, perror);
 			if (nhg != NULL) {
 				if (attrs->rtm_protocol > RTPROT_STATIC)
 					nhgrp_set_origin(nhg, attrs->rtm_protocol);
 				nhg = nhgrp_get_nhgrp(nhg, perror);
 			}
 			for (int i = 0; i < num_nhops; i++)
 				nhop_free(wn[i].nh);
 			if (nhg != NULL)
 				return ((struct nhop_object *)nhg);
 			error = *perror;
 		}
 #else
 		error = ENOTSUP;
 #endif
 		*perror = error;
 	} else {
 		nh = nhop_alloc(attrs->rta_table, attrs->rtm_family);
 		if (nh == NULL) {
 			*perror = ENOMEM;
 			return (NULL);
 		}
 		if (attrs->rta_gw != NULL) {
 			*perror = nl_set_nexthop_gw(nh, attrs->rta_gw, attrs->rta_oif, npt);
 			if (*perror != 0) {
 				nhop_free(nh);
 				return (NULL);
 			}
 		}
 		if (attrs->rta_oif != NULL)
 			nhop_set_transmit_ifp(nh, attrs->rta_oif);
 		if (attrs->rtax_mtu != 0)
 			nhop_set_mtu(nh, attrs->rtax_mtu, true);
 		if (attrs->rta_rtflags & RTF_BROADCAST)
 			nhop_set_broadcast(nh, true);
 		if (attrs->rtm_protocol > RTPROT_STATIC)
 			nhop_set_origin(nh, attrs->rtm_protocol);
 		nhop_set_pxtype_flag(nh, get_pxflag(attrs));
 		nhop_set_rtflags(nh, attrs->rta_rtflags);
 
 		switch (attrs->rtm_type) {
 		case RTN_UNICAST:
 			break;
 		case RTN_BLACKHOLE:
 			nhop_set_blackhole(nh, RTF_BLACKHOLE);
 			break;
 		case RTN_PROHIBIT:
 		case RTN_UNREACHABLE:
 			nhop_set_blackhole(nh, RTF_REJECT);
 			break;
 		/* TODO: return ENOTSUP for other types if strict option is set */
 		}
 
 		nh = finalize_nhop(nh, attrs->rta_dst, perror);
 	}
 
 	return (nh);
 }
 
 static int
 rtnl_handle_newroute(struct nlmsghdr *hdr, struct nlpcb *nlp,
     struct nl_pstate *npt)
 {
 	struct rib_cmd_info rc = {};
 	struct nhop_object *nh = NULL;
 	int error;
 
 	struct nl_parsed_route attrs = {};
 	error = nl_parse_nlmsg(hdr, &rtm_parser, npt, &attrs);
 	if (error != 0)
 		return (error);
 
 	/* Check if we have enough data */
 	if (attrs.rta_dst == NULL) {
 		NL_LOG(LOG_DEBUG, "missing RTA_DST");
 		return (EINVAL);
 	}
 
 	if (attrs.rta_table >= V_rt_numfibs) {
 		NLMSG_REPORT_ERR_MSG(npt, "invalid fib");
 		return (EINVAL);
 	}
 
 	if (attrs.rta_nh_id != 0) {
 		/* Referenced uindex */
 		int pxflag = get_pxflag(&attrs);
 		nh = nl_find_nhop(attrs.rta_table, attrs.rtm_family, attrs.rta_nh_id,
 		    pxflag, &error);
 		if (error != 0)
 			return (error);
 	} else {
 		nh = create_nexthop_from_attrs(&attrs, npt, &error);
 		if (error != 0) {
 			NL_LOG(LOG_DEBUG, "Error creating nexthop");
 			return (error);
 		}
 	}
 
 	if (!NH_IS_NHGRP(nh) && attrs.rta_weight == 0)
 		attrs.rta_weight = RT_DEFAULT_WEIGHT;
 	struct route_nhop_data rnd = { .rnd_nhop = nh, .rnd_weight = attrs.rta_weight };
 	int op_flags = get_op_flags(hdr->nlmsg_flags);
 
 	error = rib_add_route_px(attrs.rta_table, attrs.rta_dst, attrs.rtm_dst_len,
 	    &rnd, op_flags, &rc);
 	if (error == 0)
 		report_operation(attrs.rta_table, &rc, nlp, hdr);
 	return (error);
 }
 
 static int
 path_match_func(const struct rtentry *rt, const struct nhop_object *nh, void *_data)
 {
 	struct nl_parsed_route *attrs = (struct nl_parsed_route *)_data;
 
 	if ((attrs->rta_gw != NULL) && !rib_match_gw(rt, nh, attrs->rta_gw))
 		return (0);
 
 	if ((attrs->rta_oif != NULL) && (attrs->rta_oif != nh->nh_ifp))
 		return (0);
 
 	return (1);
 }
 
 static int
 rtnl_handle_delroute(struct nlmsghdr *hdr, struct nlpcb *nlp,
     struct nl_pstate *npt)
 {
 	struct rib_cmd_info rc;
 	int error;
 
 	struct nl_parsed_route attrs = {};
 	error = nl_parse_nlmsg(hdr, &rtm_parser, npt, &attrs);
 	if (error != 0)
 		return (error);
 
 	if (attrs.rta_dst == NULL) {
 		NLMSG_REPORT_ERR_MSG(npt, "RTA_DST is not set");
 		return (ESRCH);
 	}
 
 	if (attrs.rta_table >= V_rt_numfibs) {
 		NLMSG_REPORT_ERR_MSG(npt, "invalid fib");
 		return (EINVAL);
 	}
 
 	error = rib_del_route_px(attrs.rta_table, attrs.rta_dst,
 	    attrs.rtm_dst_len, path_match_func, &attrs, 0, &rc);
 	if (error == 0)
 		report_operation(attrs.rta_table, &rc, nlp, hdr);
 	return (error);
 }
 
 static int
 rtnl_handle_getroute(struct nlmsghdr *hdr, struct nlpcb *nlp, struct nl_pstate *npt)
 {
 	int error;
 
 	struct nl_parsed_route attrs = {};
 	error = nl_parse_nlmsg(hdr, &rtm_parser, npt, &attrs);
 	if (error != 0)
 		return (error);
 
 	if (attrs.rta_table >= V_rt_numfibs) {
 		NLMSG_REPORT_ERR_MSG(npt, "invalid fib");
 		return (EINVAL);
 	}
 
 	if (hdr->nlmsg_flags & NLM_F_DUMP)
 		error = handle_rtm_dump(nlp, attrs.rta_table, attrs.rtm_family, hdr, npt->nw);
 	else
 		error = handle_rtm_getroute(nlp, &attrs, hdr, npt);
 
 	return (error);
 }
 
 void
 rtnl_handle_route_event(uint32_t fibnum, const struct rib_cmd_info *rc)
 {
 	struct nl_writer nw = {};
 	int family, nlm_flags = 0;
 
 	family = rt_get_family(rc->rc_rt);
 
 	/* XXX: check if there are active listeners first */
 
 	/* TODO: consider passing PID/type/seq */
 	switch (rc->rc_cmd) {
 	case RTM_ADD:
 		nlm_flags = NLM_F_EXCL | NLM_F_CREATE;
 		break;
 	case RTM_CHANGE:
 		nlm_flags = NLM_F_REPLACE;
 		break;
 	case RTM_DELETE:
 		nlm_flags = 0;
 		break;
 	}
 	IF_DEBUG_LEVEL(LOG_DEBUG2) {
 		char rtbuf[NHOP_PRINT_BUFSIZE] __unused;
 		FIB_LOG(LOG_DEBUG2, fibnum, family,
 		    "received event %s for %s / nlm_flags=%X",
 		    rib_print_cmd(rc->rc_cmd),
 		    rt_print_buf(rc->rc_rt, rtbuf, sizeof(rtbuf)),
 		    nlm_flags);
 	}
 
 	struct nlmsghdr hdr = {
 		.nlmsg_flags = nlm_flags,
 		.nlmsg_type = get_rtmsg_type_from_rtsock(rc->rc_cmd),
 	};
 
 	struct route_nhop_data rnd = {
 		.rnd_nhop = rc_get_nhop(rc),
 		.rnd_weight = rc->rc_nh_weight,
 	};
 
 	uint32_t group_id = family_to_group(family);
 	if (!nlmsg_get_group_writer(&nw, NLMSG_SMALL, NETLINK_ROUTE, group_id)) {
 		NL_LOG(LOG_DEBUG, "error allocating event buffer");
 		return;
 	}
 
 	dump_px(fibnum, &hdr, rc->rc_rt, &rnd, &nw);
 	nlmsg_flush(&nw);
 }
 
 static const struct rtnl_cmd_handler cmd_handlers[] = {
 	{
 		.cmd = NL_RTM_GETROUTE,
 		.name = "RTM_GETROUTE",
 		.cb = &rtnl_handle_getroute,
 		.flags = RTNL_F_ALLOW_NONVNET_JAIL,
 	},
 	{
 		.cmd = NL_RTM_DELROUTE,
 		.name = "RTM_DELROUTE",
 		.cb = &rtnl_handle_delroute,
 		.priv = PRIV_NET_ROUTE,
 	},
 	{
 		.cmd = NL_RTM_NEWROUTE,
 		.name = "RTM_NEWROUTE",
 		.cb = &rtnl_handle_newroute,
 		.priv = PRIV_NET_ROUTE,
 	}
 };
 
 static const struct nlhdr_parser *all_parsers[] = {&mpath_parser, &metrics_parser, &rtm_parser};
 
 void
 rtnl_routes_init(void)
 {
 	NL_VERIFY_PARSERS(all_parsers);
 	rtnl_register_messages(cmd_handlers, NL_ARRAY_LEN(cmd_handlers));
 }
diff --git a/tests/sys/netlink/test_netlink_message_writer.py b/tests/sys/netlink/test_netlink_message_writer.py
index df1768129b11..5f854b14ca45 100644
--- a/tests/sys/netlink/test_netlink_message_writer.py
+++ b/tests/sys/netlink/test_netlink_message_writer.py
@@ -1,79 +1,39 @@
 import mmap
 import pytest
 
 from atf_python.ktest import BaseKernelTest
 from atf_python.sys.netlink.attrs import NlAttrU32
 
-
 M_NOWAIT = 1
 M_WAITOK = 2
-NS_WRITER_TYPE_MBUF = 0
-NS_WRITER_TYPE_BUF = 1
-NS_WRITER_TYPE_LBUF = 1
-
-MHLEN = 160
-MCLBYTES = 2048  # XXX: may differ on some archs?
-MJUMPAGESIZE = mmap.PAGESIZE
-MJUM9BYTES = 9 * 1024
-MJUM16BYTES = 16 * 1024
 
+NLMSG_SMALL = 128
+NLMSG_LARGE = 2048
 
 class TestNetlinkMessageWriter(BaseKernelTest):
     KTEST_MODULE_NAME = "ktest_netlink_message_writer"
 
     @pytest.mark.parametrize(
         "malloc_flags",
         [
             pytest.param(M_NOWAIT, id="NOWAIT"),
             pytest.param(M_WAITOK, id="WAITOK"),
         ],
     )
-    @pytest.mark.parametrize(
-        "writer_type",
-        [
-            pytest.param(NS_WRITER_TYPE_MBUF, id="MBUF"),
-            pytest.param(NS_WRITER_TYPE_BUF, id="BUF"),
-        ],
-    )
     @pytest.mark.parametrize(
         "sz",
         [
-            pytest.param([160, 160], id="MHLEN"),
-            pytest.param([MCLBYTES, MCLBYTES], id="MCLBYTES"),
+            pytest.param([NLMSG_SMALL, NLMSG_SMALL], id="NLMSG_SMALL"),
+            pytest.param([NLMSG_LARGE, NLMSG_LARGE], id="NLMSG_LARGE"),
+            pytest.param([NLMSG_LARGE + 256, NLMSG_LARGE + 256], id="NLMSG_LARGE+256"),
         ],
     )
-    def test_mbuf_writer_allocation(self, sz, writer_type, malloc_flags):
+    def test_nlbuf_writer_allocation(self, sz, malloc_flags):
         """override to parametrize"""
 
         test_meta = [
             NlAttrU32(1, sz[0]),  # size
             NlAttrU32(2, sz[1]),  # expected_avail
-            NlAttrU32(4, writer_type),
-            NlAttrU32(5, malloc_flags),
-        ]
-        self.runtest(test_meta)
-
-    @pytest.mark.parametrize(
-        "malloc_flags",
-        [
-            pytest.param(M_NOWAIT, id="NOWAIT"),
-            pytest.param(M_WAITOK, id="WAITOK"),
-        ],
-    )
-    @pytest.mark.parametrize(
-        "sz",
-        [
-            pytest.param([160, 160, 1], id="MHLEN"),
-            pytest.param([MCLBYTES, MCLBYTES, 1], id="MCLBYTES"),
-            pytest.param([MCLBYTES + 1, MCLBYTES + 1, 2], id="MCLBYTES_MHLEN"),
-            pytest.param([MCLBYTES + 256, MCLBYTES * 2, 2], id="MCLBYTESx2"),
-        ],
-    )
-    def test_mbuf_chain_allocation(self, sz, malloc_flags):
-        test_meta = [
-            NlAttrU32(1, sz[0]),  # size
-            NlAttrU32(2, sz[1]),  # expected_avail
-            NlAttrU32(3, sz[2]),  # expected_count
-            NlAttrU32(5, malloc_flags),
+            NlAttrU32(3, malloc_flags),
         ]
         self.runtest(test_meta)