diff --git a/sys/compat/linux/linux_netlink.c b/sys/compat/linux/linux_netlink.c
--- a/sys/compat/linux/linux_netlink.c
+++ b/sys/compat/linux/linux_netlink.c
@@ -32,7 +32,6 @@
 #include <sys/ck.h>
 #include <sys/lock.h>
 #include <sys/malloc.h>
-#include <sys/rmlock.h>
 #include <sys/socket.h>
 #include <sys/vnode.h>
 
@@ -44,6 +43,7 @@
 #include <netlink/netlink.h>
 #include <netlink/netlink_ctl.h>
 #include <netlink/netlink_linux.h>
+#include <netlink/netlink_var.h>
 #include <netlink/netlink_route.h>
 
 #include <compat/linux/linux.h>
@@ -187,6 +187,7 @@
 
 	if (out_hdr != NULL) {
 		memcpy(out_hdr, hdr, hdr->nlmsg_len);
+		nw->num_messages++;
 		return (true);
 	}
 	return (false);
@@ -518,8 +519,7 @@
 }
 
 static bool
-nlmsg_to_linux(int netlink_family, struct nlmsghdr *hdr, struct nlpcb *nlp,
-    struct nl_writer *nw)
+nlmsg_to_linux(struct nlmsghdr *hdr, struct nlpcb *nlp, struct nl_writer *nw)
 {
 	if (hdr->nlmsg_type < NLMSG_MIN_TYPE) {
 		switch (hdr->nlmsg_type) {
@@ -536,7 +536,7 @@
 		}
 	}
 
-	switch (netlink_family) {
+	switch (nlp->nl_proto) {
 	case NETLINK_ROUTE:
 		return (rtnl_to_linux(hdr, nlp, nw));
 	default:
@@ -544,64 +544,49 @@
 	}
 }
 
-static struct mbuf *
-nlmsgs_to_linux(int netlink_family, char *buf, int data_length, struct nlpcb *nlp)
+static bool
+nlmsgs_to_linux(struct nl_writer *nw, struct nlpcb *nlp)
 {
-	RT_LOG(LOG_DEBUG3, "LINUX: get %p size %d", buf, data_length);
-	struct nl_writer nw = {};
-
-	struct mbuf *m = NULL;
-	if (!nlmsg_get_chain_writer(&nw, data_length, &m)) {
-		RT_LOG(LOG_DEBUG, "unable to setup chain writer for size %d",
-		    data_length);
-		return (NULL);
-	}
+	struct nl_buf *nb, *orig;
+	u_int offset, msglen, orig_messages __diagused;
+
+	RT_LOG(LOG_DEBUG3, "%p: in %u bytes %u messages", __func__,
+	    nw->buf->datalen, nw->num_messages);
+
+	orig = nw->buf;
+	nb = nl_buf_alloc(orig->datalen + SCRATCH_BUFFER_SIZE, M_NOWAIT);
+	if (__predict_false(nb == NULL))
+		return (false);
+	nw->buf = nb;
+#ifdef INVARIANTS
+	orig_messages = nw->num_messages;
+#endif
+	nw->num_messages = 0;
 
 	/* Assume correct headers. Buffer IS mutable */
-	int count = 0;
-	for (int offset = 0; offset + sizeof(struct nlmsghdr) <= data_length;) {
-		struct nlmsghdr *hdr = (struct nlmsghdr *)&buf[offset];
-		int msglen = NLMSG_ALIGN(hdr->nlmsg_len);
-		count++;
+	for (offset = 0;
+	    offset + sizeof(struct nlmsghdr) <= orig->datalen;
+	    offset += msglen) {
+		struct nlmsghdr *hdr = (struct nlmsghdr *)&orig->data[offset];
 
-		if (!nlmsg_to_linux(netlink_family, hdr, nlp, &nw)) {
+		msglen = NLMSG_ALIGN(hdr->nlmsg_len);
+		if (!nlmsg_to_linux(hdr, nlp, nw)) {
 			RT_LOG(LOG_DEBUG, "failed to process msg type %d",
 			    hdr->nlmsg_type);
-			m_freem(m);
-			return (NULL);
+			nl_buf_free(nb);
+			return (false);
 		}
-		offset += msglen;
 	}
-	nlmsg_flush(&nw);
-	RT_LOG(LOG_DEBUG3, "Processed %d messages, chain size %d", count,
-	    m ? m_length(m, NULL) : 0);
 
-	return (m);
-}
+	MPASS(nw->num_messages == orig_messages);
+	MPASS(nw->buf == nb);
+	nl_buf_free(orig);
+	RT_LOG(LOG_DEBUG3, "%p: out %u bytes", __func__, offset);
 
-static struct mbuf *
-mbufs_to_linux(int netlink_family, struct mbuf *m, struct nlpcb *nlp)
-{
-	/* XXX: easiest solution, not optimized for performance */
-	int data_length = m_length(m, NULL);
-	char *buf = malloc(data_length, M_LINUX, M_NOWAIT);
-	if (buf == NULL) {
-		RT_LOG(LOG_DEBUG, "unable to allocate %d bytes, dropping message",
-		    data_length);
-		m_freem(m);
-		return (NULL);
-	}
-	m_copydata(m, 0, data_length, buf);
-	m_freem(m);
-
-	m = nlmsgs_to_linux(netlink_family, buf, data_length, nlp);
-	free(buf, M_LINUX);
-
-	return (m);
+	return (true);
 }
 
 static struct linux_netlink_provider linux_netlink_v1 = {
-	.mbufs_to_linux = mbufs_to_linux,
 	.msgs_to_linux = nlmsgs_to_linux,
 	.msg_from_linux = nlmsg_from_linux,
 };
diff --git a/sys/netlink/ktest_netlink_message_writer.h b/sys/netlink/ktest_netlink_message_writer.h
--- a/sys/netlink/ktest_netlink_message_writer.h
+++ b/sys/netlink/ktest_netlink_message_writer.h
@@ -30,28 +30,14 @@
 
 #if defined(_KERNEL) && defined(INVARIANTS)
 
-bool nlmsg_get_buf_type_wrapper(struct nl_writer *nw, int size, int type, bool waitok);
-void nlmsg_set_callback_wrapper(struct nl_writer *nw);
-struct mbuf *nl_get_mbuf_chain_wrapper(int len, int malloc_flags);
+bool nlmsg_get_buf_wrapper(struct nl_writer *nw, u_int size, bool waitok);
 
 #ifndef KTEST_CALLER
 
 bool
-nlmsg_get_buf_type_wrapper(struct nl_writer *nw, int size, int type, bool waitok)
+nlmsg_get_buf_wrapper(struct nl_writer *nw, u_int size, bool waitok)
 {
-	return (nlmsg_get_buf_type(nw, size, type, waitok));
-}
-
-void
-nlmsg_set_callback_wrapper(struct nl_writer *nw)
-{
-	nlmsg_set_callback(nw);
-}
-
-struct mbuf *
-nl_get_mbuf_chain_wrapper(int len, int malloc_flags)
-{
-	return (nl_get_mbuf_chain(len, malloc_flags));
+	return (nlmsg_get_buf(nw, size, waitok));
 }
 #endif
 
diff --git a/sys/netlink/ktest_netlink_message_writer.c b/sys/netlink/ktest_netlink_message_writer.c
--- a/sys/netlink/ktest_netlink_message_writer.c
+++ b/sys/netlink/ktest_netlink_message_writer.c
@@ -29,9 +29,9 @@
 #include <sys/cdefs.h>
 #include <sys/systm.h>
 #include <sys/malloc.h>
-#include <sys/mbuf.h>
 #include <netlink/netlink.h>
 #include <netlink/netlink_ctl.h>
+#include <netlink/netlink_var.h>
 #include <netlink/netlink_message_writer.h>
 
 #define KTEST_CALLER
@@ -39,54 +39,47 @@
 
 #ifdef INVARIANTS
 
-struct test_mbuf_attrs {
+struct test_nlbuf_attrs {
 	uint32_t	size;
 	uint32_t	expected_avail;
-	uint32_t	expected_count;
-	uint32_t	wtype;
 	int		waitok;
 };
 
-#define	_OUT(_field)	offsetof(struct test_mbuf_attrs, _field)
-static const struct nlattr_parser nla_p_mbuf_w[] = {
+#define	_OUT(_field)	offsetof(struct test_nlbuf_attrs, _field)
+static const struct nlattr_parser nla_p_nlbuf_w[] = {
 	{ .type = 1, .off = _OUT(size), .cb = nlattr_get_uint32 },
 	{ .type = 2, .off = _OUT(expected_avail), .cb = nlattr_get_uint32 },
-	{ .type = 3, .off = _OUT(expected_count), .cb = nlattr_get_uint32 },
-	{ .type = 4, .off = _OUT(wtype), .cb = nlattr_get_uint32 },
-	{ .type = 5, .off = _OUT(waitok), .cb = nlattr_get_uint32 },
+	{ .type = 3, .off = _OUT(waitok), .cb = nlattr_get_uint32 },
 };
 #undef _OUT
-NL_DECLARE_ATTR_PARSER(mbuf_w_parser, nla_p_mbuf_w);
+NL_DECLARE_ATTR_PARSER(nlbuf_w_parser, nla_p_nlbuf_w);
 
 static int
-test_mbuf_parser(struct ktest_test_context *ctx, struct nlattr *nla)
+test_nlbuf_parser(struct ktest_test_context *ctx, struct nlattr *nla)
 {
-	struct test_mbuf_attrs *attrs = npt_alloc(ctx->npt, sizeof(*attrs));
+	struct test_nlbuf_attrs *attrs = npt_alloc(ctx->npt, sizeof(*attrs));
 
 	ctx->arg = attrs;
 	if (attrs != NULL)
-		return (nl_parse_nested(nla, &mbuf_w_parser, ctx->npt, attrs));
+		return (nl_parse_nested(nla, &nlbuf_w_parser, ctx->npt, attrs));
 	return (ENOMEM);
 }
 
 static int
-test_mbuf_writer_allocation(struct ktest_test_context *ctx)
+test_nlbuf_writer_allocation(struct ktest_test_context *ctx)
 {
-	struct test_mbuf_attrs *attrs = ctx->arg;
-	bool ret;
+	struct test_nlbuf_attrs *attrs = ctx->arg;
 	struct nl_writer nw = {};
+	u_int alloc_len;
+	bool ret;
 
-	ret = nlmsg_get_buf_type_wrapper(&nw, attrs->size, attrs->wtype, attrs->waitok);
+	ret = nlmsg_get_buf_wrapper(&nw, attrs->size, attrs->waitok);
 	if (!ret)
 		return (EINVAL);
 
-	int alloc_len = nw.alloc_len;
+	alloc_len = nw.buf->buflen;
 	KTEST_LOG(ctx, "requested %u, allocated %d", attrs->size, alloc_len);
 
-	/* Set cleanup callback */
-	nw.writer_target = NS_WRITER_TARGET_SOCKET;
-	nlmsg_set_callback_wrapper(&nw);
-
 	/* Mark enomem to avoid reallocation */
 	nw.enomem = true;
 
@@ -95,9 +88,7 @@
 		return (EINVAL);
 	}
 
-	/* Mark as empty to free the storage */
-	nw.offset = 0;
-	nlmsg_flush(&nw);
+	nl_buf_free(nw.buf);
 
 	if (alloc_len < attrs->expected_avail) {
 		KTEST_LOG(ctx, "alloc_len %d, expected %u",
@@ -107,60 +98,15 @@
 
 	return (0);
 }
-
-static int
-test_mbuf_chain_allocation(struct ktest_test_context *ctx)
-{
-	struct test_mbuf_attrs *attrs = ctx->arg;
-	int mflags = attrs->waitok ? M_WAITOK : M_NOWAIT;
-	struct mbuf *chain = nl_get_mbuf_chain_wrapper(attrs->size, mflags);
-
-	if (chain == NULL) {
-		KTEST_LOG(ctx, "nl_get_mbuf_chain(%u) returned NULL", attrs->size);
-		return (EINVAL);
-	}
-
-	/* Iterate and check number of mbufs and space */
-	uint32_t allocated_count = 0, allocated_size = 0;
-	for (struct mbuf *m = chain; m != NULL; m = m->m_next) {
-		allocated_count++;
-		allocated_size += M_SIZE(m);
-	}
-	m_freem(chain);
-
-	if (attrs->expected_avail > allocated_size) {
-		KTEST_LOG(ctx, "expected/allocated avail(bytes) %u/%u"
-				" expected/allocated count %u/%u",
-		    attrs->expected_avail, allocated_size,
-		    attrs->expected_count, allocated_count);
-		return (EINVAL);
-	}
-
-	if (attrs->expected_count > 0 && (attrs->expected_count != allocated_count)) {
-		KTEST_LOG(ctx, "expected/allocated avail(bytes) %u/%u"
-				" expected/allocated count %u/%u",
-		    attrs->expected_avail, allocated_size,
-		    attrs->expected_count, allocated_count);
-		return (EINVAL);
-	}
-
-	return (0);
-}
 #endif
 
 static const struct ktest_test_info tests[] = {
 #ifdef INVARIANTS
 	{
-		.name = "test_mbuf_writer_allocation",
-		.desc = "test different mbuf sizes in the mbuf writer",
-		.func = &test_mbuf_writer_allocation,
-		.parse = &test_mbuf_parser,
-	},
-	{
-		.name = "test_mbuf_chain_allocation",
-		.desc = "verify allocation different chain sizes",
-		.func = &test_mbuf_chain_allocation,
-		.parse = &test_mbuf_parser,
+		.name = "test_nlbuf_writer_allocation",
+		.desc = "test different buffer sizes in the netlink writer",
+		.func = &test_nlbuf_writer_allocation,
+		.parse = &test_nlbuf_parser,
 	},
 #endif
 };
diff --git a/sys/netlink/netlink_domain.c b/sys/netlink/netlink_domain.c
--- a/sys/netlink/netlink_domain.c
+++ b/sys/netlink/netlink_domain.c
@@ -179,53 +179,76 @@
 }
 
 static void
-nl_send_one_group(struct mbuf *m, struct nlpcb *nlp, int num_messages,
-    int io_flags)
+nl_send_one_group(struct nl_writer *nw, struct nl_buf *nb, struct nlpcb *nlp)
 {
 	if (__predict_false(nlp->nl_flags & NLF_MSG_INFO))
-		nl_add_msg_info(m);
-	nl_send_one(m, nlp, num_messages, io_flags);
+		nl_add_msg_info(nb);
+	nw->buf = nb;
+	(void)nl_send_one(nw);
+}
+
+static struct nl_buf *
+nl_buf_copy(struct nl_buf *nb)
+{
+	struct nl_buf *copy;
+
+	copy = nl_buf_alloc(nb->buflen, M_NOWAIT);
+	if (__predict_false(copy == NULL))
+		return (NULL);
+	memcpy(copy, nb, sizeof(*nb) + nb->buflen);
+	if (nb->control != NULL) {
+		copy->control = m_copym(nb->control, 0, M_COPYALL, M_NOWAIT);
+		if (__predict_false(copy->control == NULL)) {
+			nl_buf_free(copy);
+			return (NULL);
+		}
+	}
+
+	return (copy);
 }
 
 /*
- * Broadcasts message @m to the protocol @proto group specified by @group_id
+ * Broadcasts in the writer's buffer.
  */
-void
-nl_send_group(struct mbuf *m, int num_messages, int proto, int group_id)
+bool
+nl_send_group(struct nl_writer *nw)
 {
+	struct nl_buf *nb = nw->buf;
 	struct nlpcb *nlp_last = NULL;
 	struct nlpcb *nlp;
 	NLCTL_TRACKER;
 
 	IF_DEBUG_LEVEL(LOG_DEBUG2) {
-		struct nlmsghdr *hdr = mtod(m, struct nlmsghdr *);
-		NL_LOG(LOG_DEBUG2, "MCAST mbuf len %u msg type %d len %u to group %d/%d",
-		    m->m_len, hdr->nlmsg_type, hdr->nlmsg_len, proto, group_id);
+		struct nlmsghdr *hdr = (struct nlmsghdr *)nb->data;
+		NL_LOG(LOG_DEBUG2, "MCAST len %u msg type %d len %u to group %d/%d",
+		    nb->datalen, hdr->nlmsg_type, hdr->nlmsg_len,
+		    nw->group.proto, nw->group.id);
 	}
 
+	nw->buf = NULL;
+
 	struct nl_control *ctl = atomic_load_ptr(&V_nl_ctl);
 	if (__predict_false(ctl == NULL)) {
 		/*
 		 * Can be the case when notification is sent within VNET
 		 * which doesn't have any netlink sockets.
 		 */
-		m_freem(m);
-		return;
+		nl_buf_free(nb);
+		return (false);
 	}
 
 	NLCTL_RLOCK(ctl);
 
-	int io_flags = NL_IOF_UNTRANSLATED;
-
 	CK_LIST_FOREACH(nlp, &ctl->ctl_pcb_head, nl_next) {
-		if (nl_isset_group_locked(nlp, group_id) && nlp->nl_proto == proto) {
+		if (nl_isset_group_locked(nlp, nw->group.id) &&
+		    nlp->nl_proto == nw->group.proto) {
 			if (nlp_last != NULL) {
-				struct mbuf *m_copy;
-				m_copy = m_copym(m, 0, M_COPYALL, M_NOWAIT);
-				if (m_copy != NULL)
-					nl_send_one_group(m_copy, nlp_last,
-					    num_messages, io_flags);
-				else {
+				struct nl_buf *copy;
+
+				copy = nl_buf_copy(nb);
+				if (copy != NULL) {
+					nl_send_one_group(nw, copy, nlp_last);
+				} else {
 					NLP_LOCK(nlp_last);
 					if (nlp_last->nl_socket != NULL)
 						sorwakeup(nlp_last->nl_socket);
@@ -236,11 +259,13 @@
 		}
 	}
 	if (nlp_last != NULL)
-		nl_send_one_group(m, nlp_last, num_messages, io_flags);
+		nl_send_one_group(nw, nb, nlp_last);
 	else
-		m_freem(m);
+		nl_buf_free(nb);
 
 	NLCTL_RUNLOCK(ctl);
+
+	return (true);
 }
 
 bool
@@ -331,7 +356,7 @@
 		free(nlp, M_PCB);
 		return (error);
 	}
-	so->so_rcv.sb_mtx = &so->so_rcv_mtx;
+	TAILQ_INIT(&so->so_rcv.nl_queue);
 	TAILQ_INIT(&so->so_snd.nl_queue);
 	so->so_pcb = nlp;
 	nlp->nl_socket = so;
@@ -344,7 +369,6 @@
 	nlp->nl_need_thread_setup = true;
 	NLP_LOCK_INIT(nlp);
 	refcount_init(&nlp->nl_refcount, 1);
-	nl_init_io(nlp);
 
 	nlp->nl_taskqueue = taskqueue_create("netlink_socket", M_WAITOK,
 	    taskqueue_thread_enqueue, &nlp->nl_taskqueue);
@@ -467,15 +491,6 @@
 	return (0);
 }
 
-static void
-destroy_nlpcb(struct nlpcb *nlp)
-{
-	NLP_LOCK(nlp);
-	nl_free_io(nlp);
-	NLP_LOCK_DESTROY(nlp);
-	free(nlp, M_PCB);
-}
-
 static void
 destroy_nlpcb_epoch(epoch_context_t ctx)
 {
@@ -483,10 +498,10 @@
 
 	nlp = __containerof(ctx, struct nlpcb, nl_epoch_ctx);
 
-	destroy_nlpcb(nlp);
+	NLP_LOCK_DESTROY(nlp);
+	free(nlp, M_PCB);
 }
 
-
 static void
 nl_close(struct socket *so)
 {
@@ -522,9 +537,12 @@
 
 	while ((nb = TAILQ_FIRST(&so->so_snd.nl_queue)) != NULL) {
 		TAILQ_REMOVE(&so->so_snd.nl_queue, nb, tailq);
-		free(nb, M_NETLINK);
+		nl_buf_free(nb);
+	}
+	while ((nb = TAILQ_FIRST(&so->so_rcv.nl_queue)) != NULL) {
+		TAILQ_REMOVE(&so->so_rcv.nl_queue, nb, tailq);
+		nl_buf_free(nb);
 	}
-	sbdestroy(so, SO_RCV);
 
 	NL_LOG(LOG_DEBUG3, "socket %p, detached", so);
 
@@ -597,10 +615,8 @@
 	len = roundup2(uio->uio_resid, 8) + SCRATCH_BUFFER_SIZE;
 	if (nlp->nl_linux)
 		len += roundup2(uio->uio_resid, 8);
-	nb = malloc(sizeof(*nb) + len, M_NETLINK, M_WAITOK);
+	nb = nl_buf_alloc(len, M_WAITOK);
 	nb->datalen = uio->uio_resid;
-	nb->buflen = len;
-	nb->offset = 0;
 	error = uiomove(&nb->data[0], uio->uio_resid, uio);
 	if (__predict_false(error))
 		goto out;
@@ -635,19 +651,107 @@
 
 out:
 	SOCK_IO_SEND_UNLOCK(so);
-	free(nb, M_NETLINK);
+	if (nb != NULL)
+		nl_buf_free(nb);
 	return (error);
 }
 
 static int
-nl_pru_rcvd(struct socket *so, int flags)
+nl_soreceive(struct socket *so, struct sockaddr **psa, struct uio *uio,
+    struct mbuf **mp, struct mbuf **controlp, int *flagsp)
 {
+	static const struct sockaddr_nl nl_empty_src = {
+		.nl_len = sizeof(struct sockaddr_nl),
+		.nl_family = PF_NETLINK,
+		.nl_pid = 0 /* comes from the kernel */
+	};
+	struct sockbuf *sb = &so->so_rcv;
+	struct nl_buf *nb;
+	int flags, error;
+	u_int overflow;
+	bool nonblock, trunc, peek;
+
+	MPASS(mp == NULL && uio != NULL);
+
 	NL_LOG(LOG_DEBUG3, "socket %p, PID %d", so, curproc->p_pid);
-	MPASS(sotonlpcb(so) != NULL);
+
+	if (psa != NULL)
+		*psa = sodupsockaddr((const struct sockaddr *)&nl_empty_src,
+		    M_WAITOK);
+
+	flags = flagsp != NULL ? *flagsp & ~MSG_TRUNC : 0;
+	trunc = flagsp != NULL ? *flagsp & MSG_TRUNC : false;
+	nonblock = (so->so_state & SS_NBIO) ||
+	    (flags & (MSG_DONTWAIT | MSG_NBIO));
+	peek = flags & MSG_PEEK;
+
+	error = SOCK_IO_RECV_LOCK(so, SBLOCKWAIT(flags));
+	if (__predict_false(error))
+		return (error);
+
+	SOCK_RECVBUF_LOCK(so);
+	while ((nb = TAILQ_FIRST(&sb->nl_queue)) == NULL) {
+		if (nonblock) {
+			SOCK_RECVBUF_UNLOCK(so);
+			SOCK_IO_RECV_UNLOCK(so);
+			return (EWOULDBLOCK);
+		}
+		error = sbwait(so, SO_RCV);
+		if (error) {
+			SOCK_RECVBUF_UNLOCK(so);
+			SOCK_IO_RECV_UNLOCK(so);
+			return (error);
+		}
+	}
+
+	/*
+	 * XXXGL
+	 * Here we emulate a PR_ATOMIC behavior of soreceive_generic() where
+	 * we take only the first "record" in the socket buffer and send it
+	 * to uio whole or truncated ignoring how many netlink messages are
+	 * in the record and how much space is left in the uio.
+	 * This needs to be fixed at next refactoring. First, we should perform
+	 * truncation only if the very first message doesn't fit into uio.
+	 * That will help an application with small buffer not to lose data.
+	 * Second, we should continue working on the sb->nl_queue as long as
+	 * there is more space in the uio.  That will boost applications with
+	 * large buffers.
+	 */
+	if (__predict_true(!peek)) {
+		TAILQ_REMOVE(&sb->nl_queue, nb, tailq);
+		sb->sb_acc -= nb->datalen;
+		sb->sb_ccc -= nb->datalen;
+	}
+	SOCK_RECVBUF_UNLOCK(so);
+
+	overflow = __predict_false(nb->datalen > uio->uio_resid) ?
+	    nb->datalen - uio->uio_resid : 0;
+	error = uiomove(nb->data, (int)nb->datalen, uio);
+	if (__predict_false(overflow > 0)) {
+		flags |= MSG_TRUNC;
+		if (trunc)
+			uio->uio_resid -= overflow;
+	}
+
+	if (controlp != NULL) {
+		*controlp = nb->control;
+		nb->control = NULL;
+	}
+
+	if (__predict_true(!peek))
+		nl_buf_free(nb);
+
+	if (uio->uio_td)
+		uio->uio_td->td_ru.ru_msgrcv++;
+
+	if (flagsp != NULL)
+		*flagsp |= flags;
+
+	SOCK_IO_RECV_UNLOCK(so);
 
 	nl_on_transmit(sotonlpcb(so));
 
-	return (0);
+	return (error);
 }
 
 static int
@@ -798,8 +902,7 @@
 }
 
 #define	NETLINK_PROTOSW						\
-	.pr_flags = PR_ATOMIC | PR_ADDR | PR_WANTRCVD |		\
-	    PR_SOCKBUF,						\
+	.pr_flags = PR_ATOMIC | PR_ADDR | PR_SOCKBUF,		\
 	.pr_ctloutput = nl_ctloutput,				\
 	.pr_setsbopt = nl_setsbopt,				\
 	.pr_attach = nl_pru_attach,				\
@@ -807,7 +910,7 @@
 	.pr_connect = nl_pru_connect,				\
 	.pr_disconnect = nl_pru_disconnect,			\
 	.pr_sosend = nl_sosend,					\
-	.pr_rcvd = nl_pru_rcvd,					\
+	.pr_soreceive = nl_soreceive,				\
 	.pr_shutdown = nl_pru_shutdown,				\
 	.pr_sockaddr = nl_sockaddr,				\
 	.pr_close = nl_close
diff --git a/sys/netlink/netlink_glue.c b/sys/netlink/netlink_glue.c
--- a/sys/netlink/netlink_glue.c
+++ b/sys/netlink/netlink_glue.c
@@ -111,7 +111,6 @@
 get_stub_writer(struct nl_writer *nw)
 {
 	bzero(nw, sizeof(*nw));
-	nw->writer_type = NS_WRITER_TYPE_STUB;
 	nw->enomem = true;
 
 	return (false);
diff --git a/sys/netlink/netlink_io.c b/sys/netlink/netlink_io.c
--- a/sys/netlink/netlink_io.c
+++ b/sys/netlink/netlink_io.c
@@ -51,69 +51,36 @@
  * sending netlink data between the kernel and userland.
  */
 
-static const struct sockaddr_nl _nl_empty_src = {
-	.nl_len = sizeof(struct sockaddr_nl),
-	.nl_family = PF_NETLINK,
-	.nl_pid = 0 /* comes from the kernel */
-};
-static const struct sockaddr *nl_empty_src = (const struct sockaddr *)&_nl_empty_src;
-
 static bool nl_process_nbuf(struct nl_buf *nb, struct nlpcb *nlp);
 
-static void
-queue_push(struct nl_io_queue *q, struct mbuf *mq)
-{
-	while (mq != NULL) {
-		struct mbuf *m = mq;
-		mq = mq->m_nextpkt;
-		m->m_nextpkt = NULL;
-
-		q->length += m_length(m, NULL);
-		STAILQ_INSERT_TAIL(&q->head, m, m_stailqpkt);
-	}
-}
-
-static struct mbuf *
-queue_pop(struct nl_io_queue *q)
+struct nl_buf *
+nl_buf_alloc(size_t len, int mflag)
 {
-	if (!STAILQ_EMPTY(&q->head)) {
-		struct mbuf *m = STAILQ_FIRST(&q->head);
-		STAILQ_REMOVE_HEAD(&q->head, m_stailqpkt);
-		m->m_nextpkt = NULL;
-		q->length -= m_length(m, NULL);
+	struct nl_buf *nb;
 
-		return (m);
+	nb = malloc(sizeof(struct nl_buf) + len, M_NETLINK, mflag);
+	if (__predict_true(nb != NULL)) {
+		nb->buflen = len;
+		nb->datalen = nb->offset = 0;
+		nb->control = NULL;
 	}
-	return (NULL);
-}
 
-static struct mbuf *
-queue_head(const struct nl_io_queue *q)
-{
-	return (STAILQ_FIRST(&q->head));
+	return (nb);
 }
 
-static inline bool
-queue_empty(const struct nl_io_queue *q)
+void
+nl_buf_free(struct nl_buf *nb)
 {
-	return (q->length == 0);
-}
 
-static void
-queue_free(struct nl_io_queue *q)
-{
-	while (!STAILQ_EMPTY(&q->head)) {
-		struct mbuf *m = STAILQ_FIRST(&q->head);
-		STAILQ_REMOVE_HEAD(&q->head, m_stailqpkt);
-		m->m_nextpkt = NULL;
-		m_freem(m);
-	}
-	q->length = 0;
+	if (nb->control)
+		m_freem(nb->control);
+	free(nb, M_NETLINK);
 }
 
 void
-nl_add_msg_info(struct mbuf *m)
+nl_add_msg_info(struct nl_buf *nb)
 {
+	/* XXXGL pass nlp as arg? */
 	struct nlpcb *nlp = nl_get_thread_nlp(curthread);
 	NL_LOG(LOG_DEBUG2, "Trying to recover nlp from thread %p: %p",
 	    curthread, nlp);
@@ -139,27 +106,15 @@
 	};
 
 
-	while (m->m_next != NULL)
-		m = m->m_next;
-	m->m_next = sbcreatecontrol(data, sizeof(data),
+	nb->control = sbcreatecontrol(data, sizeof(data),
 	    NETLINK_MSG_INFO, SOL_NETLINK, M_NOWAIT);
 
-	NL_LOG(LOG_DEBUG2, "Storing %u bytes of data, ctl: %p",
-	    (unsigned)sizeof(data), m->m_next);
-}
-
-static __noinline struct mbuf *
-extract_msg_info(struct mbuf *m)
-{
-	while (m->m_next != NULL) {
-		if (m->m_next->m_type == MT_CONTROL) {
-			struct mbuf *ctl = m->m_next;
-			m->m_next = NULL;
-			return (ctl);
-		}
-		m = m->m_next;
-	}
-	return (NULL);
+	if (__predict_true(nb->control != NULL))
+		NL_LOG(LOG_DEBUG2, "Storing %u bytes of control data, ctl: %p",
+		    (unsigned)sizeof(data), nb->control);
+	else
+		NL_LOG(LOG_DEBUG2, "Failed to allocate %u bytes of control",
+		    (unsigned)sizeof(data));
 }
 
 void
@@ -174,65 +129,31 @@
 	}
 }
 
-static bool
-tx_check_locked(struct nlpcb *nlp)
-{
-	if (queue_empty(&nlp->tx_queue))
-		return (true);
-
-	/*
-	 * Check if something can be moved from the internal TX queue
-	 * to the socket queue.
-	 */
-
-	bool appended = false;
-	struct sockbuf *sb = &nlp->nl_socket->so_rcv;
-	SOCKBUF_LOCK(sb);
-
-	while (true) {
-		struct mbuf *m = queue_head(&nlp->tx_queue);
-		if (m != NULL) {
-			struct mbuf *ctl = NULL;
-			if (__predict_false(m->m_next != NULL))
-				ctl = extract_msg_info(m);
-			if (sbappendaddr_locked(sb, nl_empty_src, m, ctl) != 0) {
-				/* appended successfully */
-				queue_pop(&nlp->tx_queue);
-				appended = true;
-			} else
-				break;
-		} else
-			break;
-	}
-
-	SOCKBUF_UNLOCK(sb);
-
-	if (appended)
-		sorwakeup(nlp->nl_socket);
-
-	return (queue_empty(&nlp->tx_queue));
-}
-
 static bool
 nl_process_received_one(struct nlpcb *nlp)
 {
 	struct socket *so = nlp->nl_socket;
-	struct sockbuf *sb = &so->so_snd;
+	struct sockbuf *sb;
 	struct nl_buf *nb;
 	bool reschedule = false;
 
 	NLP_LOCK(nlp);
 	nlp->nl_task_pending = false;
+	NLP_UNLOCK(nlp);
 
-	if (!tx_check_locked(nlp)) {
-		/* TX overflow queue still not empty, ignore RX */
-		NLP_UNLOCK(nlp);
+	/*
+	 * Do not process queued up requests if there is no space to queue
+	 * replies.
+	 */
+	sb = &so->so_rcv;
+	SOCK_RECVBUF_LOCK(so);
+	if (sb->sb_hiwat <= sb->sb_ccc) {
+		SOCK_RECVBUF_UNLOCK(so);
 		return (false);
 	}
+	SOCK_RECVBUF_UNLOCK(so);
 
-	int prev_hiwat = nlp->tx_queue.hiwat;
-	NLP_UNLOCK(nlp);
-
+	sb = &so->so_snd;
 	SOCK_SENDBUF_LOCK(so);
 	while ((nb = TAILQ_FIRST(&sb->nl_queue)) != NULL) {
 		TAILQ_REMOVE(&sb->nl_queue, nb, tailq);
@@ -244,7 +165,7 @@
 			sb->sb_ccc -= nb->datalen;
 			/* XXXGL: potentially can reduce lock&unlock count. */
 			sowwakeup_locked(so);
-			free(nb, M_NETLINK);
+			nl_buf_free(nb);
 			SOCK_SENDBUF_LOCK(so);
 		} else {
 			TAILQ_INSERT_HEAD(&sb->nl_queue, nb, tailq);
@@ -252,10 +173,6 @@
 		}
 	}
 	SOCK_SENDBUF_UNLOCK(so);
-	if (nlp->tx_queue.hiwat > prev_hiwat) {
-		NLP_LOG(LOG_DEBUG, nlp, "TX override peaked to %d", nlp->tx_queue.hiwat);
-
-	}
 
 	return (reschedule);
 }
@@ -276,18 +193,6 @@
 		;
 }
 
-void
-nl_init_io(struct nlpcb *nlp)
-{
-	STAILQ_INIT(&nlp->tx_queue.head);
-}
-
-void
-nl_free_io(struct nlpcb *nlp)
-{
-	queue_free(&nlp->tx_queue);
-}
-
 /*
  * Called after some data have been read from the socket.
  */
@@ -306,8 +211,8 @@
 		struct sockbuf *sb = &so->so_rcv;
 		NLP_LOG(LOG_DEBUG, nlp,
 		    "socket RX overflowed, %lu messages (%lu bytes) dropped. "
-		    "bytes: [%u/%u] mbufs: [%u/%u]", dropped_messages, dropped_bytes,
-		    sb->sb_ccc, sb->sb_hiwat, sb->sb_mbcnt, sb->sb_mbmax);
+		    "bytes: [%u/%u]", dropped_messages, dropped_bytes,
+		    sb->sb_ccc, sb->sb_hiwat);
 		/* TODO: send netlink message */
 	}
 
@@ -325,95 +230,67 @@
 	CURVNET_RESTORE();
 }
 
-static __noinline void
-queue_push_tx(struct nlpcb *nlp, struct mbuf *m)
-{
-	queue_push(&nlp->tx_queue, m);
-	nlp->nl_tx_blocked = true;
-
-	if (nlp->tx_queue.length > nlp->tx_queue.hiwat)
-		nlp->tx_queue.hiwat = nlp->tx_queue.length;
-}
-
 /*
- * Tries to send @m to the socket @nlp.
- *
- * @m: mbuf(s) to send to. Consumed in any case.
- * @nlp: socket to send to
- * @cnt: number of messages in @m
- * @io_flags: combination of NL_IOF_* flags
+ * Tries to send current data buffer from writer.
  *
  * Returns true on success.
  * If no queue overrunes happened, wakes up socket owner.
  */
 bool
-nl_send_one(struct mbuf *m, struct nlpcb *nlp, int num_messages, int io_flags)
+nl_send_one(struct nl_writer *nw)
 {
-	bool untranslated = io_flags & NL_IOF_UNTRANSLATED;
-	bool ignore_limits = io_flags & NL_IOF_IGNORE_LIMIT;
-	bool result = true;
+	struct nlpcb *nlp = nw->nlp;
+	struct socket *so = nlp->nl_socket;
+	struct sockbuf *sb = &so->so_rcv;
+	struct nl_buf *nb;
+
+	MPASS(nw->hdr == NULL);
 
 	IF_DEBUG_LEVEL(LOG_DEBUG2) {
-		struct nlmsghdr *hdr = mtod(m, struct nlmsghdr *);
+		struct nlmsghdr *hdr = (struct nlmsghdr *)nw->buf->data;
 		NLP_LOG(LOG_DEBUG2, nlp,
-		    "TX mbuf len %u msgs %u msg type %d first hdrlen %u io_flags %X",
-		    m_length(m, NULL), num_messages, hdr->nlmsg_type, hdr->nlmsg_len,
-		    io_flags);
+		    "TX len %u msgs %u msg type %d first hdrlen %u",
+		    nw->buf->datalen, nw->num_messages, hdr->nlmsg_type,
+		    hdr->nlmsg_len);
 	}
 
-	if (__predict_false(nlp->nl_linux && linux_netlink_p != NULL && untranslated)) {
-		m = linux_netlink_p->mbufs_to_linux(nlp->nl_proto, m, nlp);
-		if (m == NULL)
-			return (false);
+	if (nlp->nl_linux && linux_netlink_p != NULL &&
+	    __predict_false(!linux_netlink_p->msgs_to_linux(nw, nlp))) {
+		nl_buf_free(nw->buf);
+		nw->buf = NULL;
+		return (false);
 	}
 
-	NLP_LOCK(nlp);
+	nb = nw->buf;
+	nw->buf = NULL;
 
-	if (__predict_false(nlp->nl_socket == NULL)) {
+	SOCK_RECVBUF_LOCK(so);
+	if (!nw->ignore_limit && __predict_false(sb->sb_hiwat <= sb->sb_ccc)) {
+		SOCK_RECVBUF_UNLOCK(so);
+		NLP_LOCK(nlp);
+		nlp->nl_dropped_bytes += nb->datalen;
+		nlp->nl_dropped_messages += nw->num_messages;
+		NLP_LOG(LOG_DEBUG2, nlp, "RX oveflow: %lu m (+%d), %lu b (+%d)",
+		    (unsigned long)nlp->nl_dropped_messages, nw->num_messages,
+		    (unsigned long)nlp->nl_dropped_bytes, nb->datalen);
 		NLP_UNLOCK(nlp);
-		m_freem(m);
+		nl_buf_free(nb);
 		return (false);
-	}
-
-	if (!queue_empty(&nlp->tx_queue)) {
-		if (ignore_limits) {
-			queue_push_tx(nlp, m);
-		} else {
-			m_free(m);
-			result = false;
-		}
-		NLP_UNLOCK(nlp);
-		return (result);
-	}
-
-	struct socket *so = nlp->nl_socket;
-	struct mbuf *ctl = NULL;
-	if (__predict_false(m->m_next != NULL))
-		ctl = extract_msg_info(m);
-	if (sbappendaddr(&so->so_rcv, nl_empty_src, m, ctl) != 0) {
-		sorwakeup(so);
-		NLP_LOG(LOG_DEBUG3, nlp, "appended data & woken up");
 	} else {
-		if (ignore_limits) {
-			queue_push_tx(nlp, m);
-		} else {
-			/*
-			 * Store dropped data so it can be reported
-			 * on the next read
-			 */
-			nlp->nl_dropped_bytes += m_length(m, NULL);
-			nlp->nl_dropped_messages += num_messages;
-			NLP_LOG(LOG_DEBUG2, nlp, "RX oveflow: %lu m (+%d), %lu b (+%d)",
-			    (unsigned long)nlp->nl_dropped_messages, num_messages,
-			    (unsigned long)nlp->nl_dropped_bytes, m_length(m, NULL));
-			soroverflow(so);
-			m_freem(m);
-			result = false;
+		bool full;
+
+		TAILQ_INSERT_TAIL(&sb->nl_queue, nb, tailq);
+		sb->sb_acc += nb->datalen;
+		sb->sb_ccc += nb->datalen;
+		full = sb->sb_hiwat <= sb->sb_ccc;
+		sorwakeup_locked(so);
+		if (full) {
+			NLP_LOCK(nlp);
+			nlp->nl_tx_blocked = true;
+			NLP_UNLOCK(nlp);
 		}
+		return (true);
 	}
-	NLP_UNLOCK(nlp);
-
-	return (result);
 }
 
 static int
diff --git a/sys/netlink/netlink_linux.h b/sys/netlink/netlink_linux.h
--- a/sys/netlink/netlink_linux.h
+++ b/sys/netlink/netlink_linux.h
@@ -27,6 +27,7 @@
 
 #ifndef _NETLINK_LINUX_VAR_H_
 #define _NETLINK_LINUX_VAR_H_
+#ifdef _KERNEL
 
 /*
  * The file contains headers for the bridge interface between
@@ -34,16 +35,13 @@
  */
 struct nlpcb;
 struct nl_pstate;
+struct nl_writer;
 
-typedef struct mbuf *mbufs_to_linux_cb_t(int netlink_family, struct mbuf *m,
-    struct nlpcb *nlp);
-typedef struct mbuf *msgs_to_linux_cb_t(int netlink_family, char *buf, int data_length,
-    struct nlpcb *nlp);
+typedef bool msgs_to_linux_cb_t(struct nl_writer *nw, struct nlpcb *nlp);
 typedef struct nlmsghdr *msg_from_linux_cb_t(int netlink_family, struct nlmsghdr *hdr,
     struct nl_pstate *npt);
 
 struct linux_netlink_provider {
-	mbufs_to_linux_cb_t	*mbufs_to_linux;
 	msgs_to_linux_cb_t	*msgs_to_linux;
 	msg_from_linux_cb_t	*msg_from_linux;
 
@@ -52,3 +50,4 @@
 extern struct linux_netlink_provider *linux_netlink_p;
 
 #endif
+#endif
diff --git a/sys/netlink/netlink_message_writer.h b/sys/netlink/netlink_message_writer.h
--- a/sys/netlink/netlink_message_writer.h
+++ b/sys/netlink/netlink_message_writer.h
@@ -37,60 +37,41 @@
  * It is not meant to be included directly
  */
 
-struct mbuf;
+struct nl_buf;
 struct nl_writer;
-typedef bool nl_writer_cb(struct nl_writer *nw, void *buf, int buflen, int cnt);
+typedef bool nl_writer_cb(struct nl_writer *nw);
 
 struct nl_writer {
-	int			alloc_len;	/* allocated buffer length */
-	int			offset;		/* offset from the start of the buffer */
-	struct nlmsghdr		*hdr;		/* Pointer to the currently-filled msg */
-	char			*data;		/* pointer to the contiguous storage */
-	void			*_storage;	/* Underlying storage pointer */
-	nl_writer_cb		*cb;		/* Callback to flush data */
+	struct nl_buf		*buf;	/* Underlying storage pointer */
+	struct nlmsghdr		*hdr;	/* Pointer to the currently-filled msg */
+	nl_writer_cb		*cb;	/* Callback to flush data */
 	union {
-		void		*ptr;
+		struct nlpcb	*nlp;
 		struct {
 			uint16_t	proto;
 			uint16_t	id;
 		} group;
-	} arg;
-	int			num_messages;	/* Number of messages in the buffer */
-	int			malloc_flag;	/* M_WAITOK or M_NOWAIT */
-	uint8_t			writer_type;	/* NS_WRITER_TYPE_* */
-	uint8_t			writer_target;	/* NS_WRITER_TARGET_*  */
-	bool			ignore_limit;	/* If true, ignores RCVBUF limit */
-	bool			enomem;		/* True if ENOMEM occured */
-	bool			suppress_ack;	/* If true, don't send NLMSG_ERR */
+	};
+	u_int		num_messages;	/* Number of messages in the buffer */
+	int		malloc_flag;	/* M_WAITOK or M_NOWAIT */
+	bool		ignore_limit;	/* If true, ignores RCVBUF limit */
+	bool		enomem;		/* True if ENOMEM occured */
+	bool		suppress_ack;	/* If true, don't send NLMSG_ERR */
 };
-#define	NS_WRITER_TARGET_SOCKET	0
-#define	NS_WRITER_TARGET_GROUP	1
-#define	NS_WRITER_TARGET_CHAIN	2
-
-#define	NS_WRITER_TYPE_MBUF	0
-#define NS_WRITER_TYPE_BUF	1
-#define NS_WRITER_TYPE_LBUF	2
-#define NS_WRITER_TYPE_MBUFC	3
-#define NS_WRITER_TYPE_STUB	4
-
 
 #define	NLMSG_SMALL	128
 #define	NLMSG_LARGE	2048
 
 /* Message and attribute writing */
-
-struct nlpcb;
-
 #if defined(NETLINK) || defined(NETLINK_MODULE)
 /* Provide optimized calls to the functions inside the same linking unit */
 
 bool _nlmsg_get_unicast_writer(struct nl_writer *nw, int expected_size, struct nlpcb *nlp);
 bool _nlmsg_get_group_writer(struct nl_writer *nw, int expected_size, int proto, int group_id);
-bool _nlmsg_get_chain_writer(struct nl_writer *nw, int expected_size, struct mbuf **pm);
 bool _nlmsg_flush(struct nl_writer *nw);
 void _nlmsg_ignore_limit(struct nl_writer *nw);
 
-bool _nlmsg_refill_buffer(struct nl_writer *nw, int required_size);
+bool _nlmsg_refill_buffer(struct nl_writer *nw, u_int required_len);
 bool _nlmsg_add(struct nl_writer *nw, uint32_t portid, uint32_t seq, uint16_t type,
     uint16_t flags, uint32_t len);
 bool _nlmsg_end(struct nl_writer *nw);
@@ -111,12 +92,6 @@
 	return (_nlmsg_get_group_writer(nw, expected_size, proto, group_id));
 }
 
-static inline bool
-nlmsg_get_chain_writer(struct nl_writer *nw, int expected_size, struct mbuf **pm)
-{
-	return (_nlmsg_get_chain_writer(nw, expected_size, pm));
-}
-
 static inline bool
 nlmsg_flush(struct nl_writer *nw)
 {
@@ -186,8 +161,6 @@
 	    hdr->nlmsg_flags, payload_len));
 }
 
-#define nlmsg_data(_hdr)	((void *)((_hdr) + 1))
-
 /*
  * KPI similar to mtodo():
  * current (uncompleted) header is guaranteed to be contiguous,
diff --git a/sys/netlink/netlink_message_writer.c b/sys/netlink/netlink_message_writer.c
--- a/sys/netlink/netlink_message_writer.c
+++ b/sys/netlink/netlink_message_writer.c
@@ -30,7 +30,6 @@
 #include <sys/lock.h>
 #include <sys/rmlock.h>
 #include <sys/mbuf.h>
-#include <sys/ck.h>
 #include <sys/socket.h>
 #include <sys/socketvar.h>
 #include <sys/syslog.h>
@@ -45,523 +44,44 @@
 #include <netlink/netlink_debug.h>
 _DECLARE_DEBUG(LOG_INFO);
 
-/*
- * The goal of this file is to provide convenient message writing KPI on top of
- * different storage methods (mbufs, uio, temporary memory chunks).
- *
- * The main KPI guarantee is that the (last) message always resides in the contiguous
- *  memory buffer, so one is able to update the header after writing the entire message.
- *
- * This guarantee comes with a side effect of potentially reallocating underlying
- *  buffer, so one needs to update the desired pointers after something is added
- *  to the header.
- *
- * Messaging layer contains hooks performing transparent Linux translation for the messages.
- *
- * There are 3 types of supported targets:
- *  * socket (adds mbufs to the socket buffer, used for message replies)
- *  * group (sends mbuf/chain to the specified groups, used for the notifications)
- *  * chain (returns mbuf chain, used in Linux message translation code)
- *
- * There are 3 types of storage:
- * * NS_WRITER_TYPE_MBUF (mbuf-based, most efficient, used when a single message
- *    fits in NLMBUFSIZE)
- * * NS_WRITER_TYPE_BUF (fallback, malloc-based, used when a single message needs
- *    to be larger than one supported by NS_WRITER_TYPE_MBUF)
- * * NS_WRITER_TYPE_LBUF (malloc-based, similar to NS_WRITER_TYPE_BUF, used for
- *    Linux sockets, calls translation hook prior to sending messages to the socket).
- *
- * Internally, KPI switches between different types of storage when memory requirements
- *  change. It happens transparently to the caller.
- */
-
-/*
- * Uma zone for the mbuf-based Netlink storage
- */
-static uma_zone_t	nlmsg_zone;
-
-static void
-nl_free_mbuf_storage(struct mbuf *m)
-{
-	uma_zfree(nlmsg_zone, m->m_ext.ext_buf);
-}
-
-static int
-nl_setup_mbuf_storage(void *mem, int size, void *arg, int how __unused)
-{
-	struct mbuf *m = (struct mbuf *)arg;
-
-	if (m != NULL)
-		m_extadd(m, mem, size, nl_free_mbuf_storage, NULL, NULL, 0, EXT_MOD_TYPE);
-
-	return (0);
-}
-
-static struct mbuf *
-nl_get_mbuf_flags(int size, int malloc_flags, int mbuf_flags)
-{
-	struct mbuf *m, *m_storage;
-
-	if (size <= MHLEN)
-		return (m_get2(size, malloc_flags, MT_DATA, mbuf_flags));
-
-	if (__predict_false(size > NLMBUFSIZE))
-		return (NULL);
-
-	m = m_gethdr(malloc_flags, MT_DATA);
-	if (m == NULL)
-		return (NULL);
-
-	m_storage = uma_zalloc_arg(nlmsg_zone, m, malloc_flags);
-	if (m_storage == NULL) {
-		m_free_raw(m);
-		return (NULL);
-	}
-
-	return (m);
-}
-
-static struct mbuf *
-nl_get_mbuf(int size, int malloc_flags)
-{
-	return (nl_get_mbuf_flags(size, malloc_flags, M_PKTHDR));
-}
-
-/*
- * Gets a chain of Netlink mbufs.
- * This is strip-down version of m_getm2()
- */
-static struct mbuf *
-nl_get_mbuf_chain(int len, int malloc_flags)
-{
-	struct mbuf *m_chain = NULL, *m_tail = NULL;
-	int mbuf_flags = M_PKTHDR;
-
-	while (len > 0) {
-		int sz = len > NLMBUFSIZE ? NLMBUFSIZE: len;
-		struct mbuf *m = nl_get_mbuf_flags(sz, malloc_flags, mbuf_flags);
-
-		if (m == NULL) {
-			m_freem(m_chain);
-			return (NULL);
-		}
-
-		/* Book keeping. */
-		len -= M_SIZE(m);
-		if (m_tail != NULL)
-			m_tail->m_next = m;
-		else
-			m_chain = m;
-		m_tail = m;
-		mbuf_flags &= ~M_PKTHDR;	/* Only valid on the first mbuf. */
-	}
-
-	return (m_chain);
-}
-
-void
-nl_init_msg_zone(void)
-{
-	nlmsg_zone = uma_zcreate("netlink", NLMBUFSIZE, nl_setup_mbuf_storage,
-	    NULL, NULL, NULL, UMA_ALIGN_PTR, 0);
-}
-
-void
-nl_destroy_msg_zone(void)
-{
-	uma_zdestroy(nlmsg_zone);
-}
-
-
-typedef bool nlwriter_op_init(struct nl_writer *nw, int size, bool waitok);
-typedef bool nlwriter_op_write(struct nl_writer *nw, void *buf, int buflen, int cnt);
-
-struct nlwriter_ops {
-	nlwriter_op_init	*init;
-	nlwriter_op_write	*write_socket;
-	nlwriter_op_write	*write_group;
-	nlwriter_op_write	*write_chain;
-};
-
-/*
- * NS_WRITER_TYPE_BUF
- * Writes message to a temporary memory buffer,
- * flushing to the socket/group when buffer size limit is reached
- */
-static bool
-nlmsg_get_ns_buf(struct nl_writer *nw, int size, bool waitok)
-{
-	int mflag = waitok ? M_WAITOK : M_NOWAIT;
-	nw->_storage = malloc(size, M_NETLINK, mflag | M_ZERO);
-	if (__predict_false(nw->_storage == NULL))
-		return (false);
-	nw->alloc_len = size;
-	nw->offset = 0;
-	nw->hdr = NULL;
-	nw->data = nw->_storage;
-	nw->writer_type = NS_WRITER_TYPE_BUF;
-	nw->malloc_flag = mflag;
-	nw->num_messages = 0;
-	nw->enomem = false;
-	return (true);
-}
-
 static bool
-nlmsg_write_socket_buf(struct nl_writer *nw, void *buf, int datalen, int cnt)
+nlmsg_get_buf(struct nl_writer *nw, u_int len, bool waitok)
 {
-	NL_LOG(LOG_DEBUG2, "IN: ptr: %p len: %d arg: %p", buf, datalen, nw->arg.ptr);
-	if (__predict_false(datalen == 0)) {
-		free(buf, M_NETLINK);
-		return (true);
-	}
+	const int mflag = waitok ? M_WAITOK : M_NOWAIT;
 
-	struct mbuf *m = nl_get_mbuf_chain(datalen, nw->malloc_flag);
-	if (__predict_false(m == NULL)) {
-		/* XXX: should we set sorcverr? */
-		free(buf, M_NETLINK);
-		return (false);
-	}
-	m_append(m, datalen, buf);
-	free(buf, M_NETLINK);
+	MPASS(nw->buf == NULL);
 
-	int io_flags = (nw->ignore_limit) ? NL_IOF_IGNORE_LIMIT : 0;
-	return (nl_send_one(m, (struct nlpcb *)(nw->arg.ptr), cnt, io_flags));
-}
-
-static bool
-nlmsg_write_group_buf(struct nl_writer *nw, void *buf, int datalen, int cnt)
-{
-	NL_LOG(LOG_DEBUG2, "IN: ptr: %p len: %d proto: %d id: %d", buf, datalen,
-	    nw->arg.group.proto, nw->arg.group.id);
-	if (__predict_false(datalen == 0)) {
-		free(buf, M_NETLINK);
-		return (true);
-	}
+	NL_LOG(LOG_DEBUG3, "Setting up nw %p len %u %s", nw, len,
+	    waitok ? "wait" : "nowait");
 
-	struct mbuf *m = nl_get_mbuf_chain(datalen, nw->malloc_flag);
-	if (__predict_false(m == NULL)) {
-		free(buf, M_NETLINK);
+	nw->buf = nl_buf_alloc(len, mflag);
+	if (__predict_false(nw->buf == NULL))
 		return (false);
-	}
-	bool success = m_append(m, datalen, buf) != 0;
-	free(buf, M_NETLINK);
-
-	if (!success)
-		return (false);
-
-	nl_send_group(m, cnt, nw->arg.group.proto, nw->arg.group.id);
-	return (true);
-}
-
-static bool
-nlmsg_write_chain_buf(struct nl_writer *nw, void *buf, int datalen, int cnt)
-{
-	struct mbuf **m0 = (struct mbuf **)(nw->arg.ptr);
-	NL_LOG(LOG_DEBUG2, "IN: ptr: %p len: %d arg: %p", buf, datalen, nw->arg.ptr);
-
-	if (__predict_false(datalen == 0)) {
-		free(buf, M_NETLINK);
-		return (true);
-	}
-
-	if (*m0 == NULL) {
-		struct mbuf *m = nl_get_mbuf_chain(datalen, nw->malloc_flag);
-
-		if (__predict_false(m == NULL)) {
-			free(buf, M_NETLINK);
-			return (false);
-		}
-		*m0 = m;
-	}
-	if (__predict_false(m_append(*m0, datalen, buf) == 0)) {
-		free(buf, M_NETLINK);
-		return (false);
-	}
-	return (true);
-}
-
-
-/*
- * NS_WRITER_TYPE_MBUF
- * Writes message to the allocated mbuf,
- * flushing to socket/group when mbuf size limit is reached.
- * This is the most efficient mechanism as it avoids double-copying.
- *
- * Allocates a single mbuf suitable to store up to @size bytes of data.
- * If size < MHLEN (around 160 bytes), allocates mbuf with pkghdr.
- * If the size <= NLMBUFSIZE (2k), allocate mbuf+storage out of nlmsg_zone.
- * Returns NULL on greater size or the allocation failure.
- */
-static bool
-nlmsg_get_ns_mbuf(struct nl_writer *nw, int size, bool waitok)
-{
-	int mflag = waitok ? M_WAITOK : M_NOWAIT;
-	struct mbuf *m = nl_get_mbuf(size, mflag);
-
-	if (__predict_false(m == NULL))
-		return (false);
-	nw->alloc_len = M_TRAILINGSPACE(m);
-	nw->offset = 0;
 	nw->hdr = NULL;
-	nw->_storage = (void *)m;
-	nw->data = mtod(m, void *);
-	nw->writer_type = NS_WRITER_TYPE_MBUF;
 	nw->malloc_flag = mflag;
 	nw->num_messages = 0;
 	nw->enomem = false;
-	memset(nw->data, 0, size);
-	NL_LOG(LOG_DEBUG2, "alloc mbuf %p req_len %d alloc_len %d data_ptr %p",
-	    m, size, nw->alloc_len, nw->data);
-	return (true);
-}
-
-static bool
-nlmsg_write_socket_mbuf(struct nl_writer *nw, void *buf, int datalen, int cnt)
-{
-	struct mbuf *m = (struct mbuf *)buf;
-	NL_LOG(LOG_DEBUG2, "IN: ptr: %p len: %d arg: %p", buf, datalen, nw->arg.ptr);
-
-	if (__predict_false(datalen == 0)) {
-		m_freem(m);
-		return (true);
-	}
-
-	m->m_pkthdr.len = datalen;
-	m->m_len = datalen;
-	int io_flags = (nw->ignore_limit) ? NL_IOF_IGNORE_LIMIT : 0;
-	return (nl_send_one(m, (struct nlpcb *)(nw->arg.ptr), cnt, io_flags));
-}
-
-static bool
-nlmsg_write_group_mbuf(struct nl_writer *nw, void *buf, int datalen, int cnt)
-{
-	struct mbuf *m = (struct mbuf *)buf;
-	NL_LOG(LOG_DEBUG2, "IN: ptr: %p len: %d proto: %d id: %d", buf, datalen,
-	    nw->arg.group.proto, nw->arg.group.id);
-
-	if (__predict_false(datalen == 0)) {
-		m_freem(m);
-		return (true);
-	}
 
-	m->m_pkthdr.len = datalen;
-	m->m_len = datalen;
-	nl_send_group(m, cnt, nw->arg.group.proto, nw->arg.group.id);
 	return (true);
 }
 
-static bool
-nlmsg_write_chain_mbuf(struct nl_writer *nw, void *buf, int datalen, int cnt)
-{
-	struct mbuf *m_new = (struct mbuf *)buf;
-	struct mbuf **m0 = (struct mbuf **)(nw->arg.ptr);
-
-	NL_LOG(LOG_DEBUG2, "IN: ptr: %p len: %d arg: %p", buf, datalen, nw->arg.ptr);
-
-	if (__predict_false(datalen == 0)) {
-		m_freem(m_new);
-		return (true);
-	}
-
-	m_new->m_pkthdr.len = datalen;
-	m_new->m_len = datalen;
-
-	if (*m0 == NULL) {
-		*m0 = m_new;
-	} else {
-		struct mbuf *m_last;
-		for (m_last = *m0; m_last->m_next != NULL; m_last = m_last->m_next)
-			;
-		m_last->m_next = m_new;
-		(*m0)->m_pkthdr.len += datalen;
-	}
-
-	return (true);
-}
-
-/*
- * NS_WRITER_TYPE_LBUF
- * Writes message to the allocated memory buffer,
- * flushing to socket/group when mbuf size limit is reached.
- * Calls linux handler to rewrite messages before sending to the socket.
- */
-static bool
-nlmsg_get_ns_lbuf(struct nl_writer *nw, int size, bool waitok)
-{
-	int mflag = waitok ? M_WAITOK : M_NOWAIT;
-	size = roundup2(size, sizeof(void *));
-	int add_size = sizeof(struct linear_buffer) + SCRATCH_BUFFER_SIZE;
-	char *buf = malloc(add_size + size * 2, M_NETLINK, mflag | M_ZERO);
-	if (__predict_false(buf == NULL))
-		return (false);
-
-	/* Fill buffer header first */
-	struct linear_buffer *lb = (struct linear_buffer *)buf;
-	lb->base = &buf[sizeof(struct linear_buffer) + size];
-	lb->size = size + SCRATCH_BUFFER_SIZE;
-
-	nw->alloc_len = size;
-	nw->offset = 0;
-	nw->hdr = NULL;
-	nw->_storage = buf;
-	nw->data = (char *)(lb + 1);
-	nw->malloc_flag = mflag;
-	nw->writer_type = NS_WRITER_TYPE_LBUF;
-	nw->num_messages = 0;
-	nw->enomem = false;
-	return (true);
-}
-
-static bool
-nlmsg_write_socket_lbuf(struct nl_writer *nw, void *buf, int datalen, int cnt)
-{
-	struct linear_buffer *lb = (struct linear_buffer *)buf;
-	char *data = (char *)(lb + 1);
-	struct nlpcb *nlp = (struct nlpcb *)(nw->arg.ptr);
-
-	if (__predict_false(datalen == 0)) {
-		free(buf, M_NETLINK);
-		return (true);
-	}
-
-	struct mbuf *m = NULL;
-	if (linux_netlink_p != NULL)
-		m = linux_netlink_p->msgs_to_linux(nlp->nl_proto, data, datalen, nlp);
-	free(buf, M_NETLINK);
-
-	if (__predict_false(m == NULL)) {
-		/* XXX: should we set sorcverr? */
-		return (false);
-	}
-
-	int io_flags = (nw->ignore_limit) ? NL_IOF_IGNORE_LIMIT : 0;
-	return (nl_send_one(m, nlp, cnt, io_flags));
-}
-
-/* Shouldn't be called (maybe except Linux code originating message) */
-static bool
-nlmsg_write_group_lbuf(struct nl_writer *nw, void *buf, int datalen, int cnt)
-{
-	struct linear_buffer *lb = (struct linear_buffer *)buf;
-	char *data = (char *)(lb + 1);
-
-	if (__predict_false(datalen == 0)) {
-		free(buf, M_NETLINK);
-		return (true);
-	}
-
-	struct mbuf *m = nl_get_mbuf_chain(datalen, nw->malloc_flag);
-	if (__predict_false(m == NULL)) {
-		free(buf, M_NETLINK);
-		return (false);
-	}
-	m_append(m, datalen, data);
-	free(buf, M_NETLINK);
-
-	nl_send_group(m, cnt, nw->arg.group.proto, nw->arg.group.id);
-	return (true);
-}
-
-static const struct nlwriter_ops nlmsg_writers[] = {
-	/* NS_WRITER_TYPE_MBUF */
-	{
-		.init = nlmsg_get_ns_mbuf,
-		.write_socket = nlmsg_write_socket_mbuf,
-		.write_group = nlmsg_write_group_mbuf,
-		.write_chain = nlmsg_write_chain_mbuf,
-	},
-	/* NS_WRITER_TYPE_BUF */
-	{
-		.init = nlmsg_get_ns_buf,
-		.write_socket = nlmsg_write_socket_buf,
-		.write_group = nlmsg_write_group_buf,
-		.write_chain = nlmsg_write_chain_buf,
-	},
-	/* NS_WRITER_TYPE_LBUF */
-	{
-		.init = nlmsg_get_ns_lbuf,
-		.write_socket = nlmsg_write_socket_lbuf,
-		.write_group = nlmsg_write_group_lbuf,
-	},
-};
-
-static void
-nlmsg_set_callback(struct nl_writer *nw)
-{
-	const struct nlwriter_ops *pops = &nlmsg_writers[nw->writer_type];
-
-	switch (nw->writer_target) {
-	case NS_WRITER_TARGET_SOCKET:
-		nw->cb = pops->write_socket;
-		break;
-	case NS_WRITER_TARGET_GROUP:
-		nw->cb = pops->write_group;
-		break;
-	case NS_WRITER_TARGET_CHAIN:
-		nw->cb = pops->write_chain;
-		break;
-	default:
-		panic("not implemented");
-	}
-}
-
-static bool
-nlmsg_get_buf_type(struct nl_writer *nw, int size, int type, bool waitok)
-{
-	MPASS(type + 1 <= sizeof(nlmsg_writers) / sizeof(nlmsg_writers[0]));
-	NL_LOG(LOG_DEBUG3, "Setting up nw %p size %d type %d", nw, size, type);
-	return (nlmsg_writers[type].init(nw, size, waitok));
-}
-
-static bool
-nlmsg_get_buf(struct nl_writer *nw, int size, bool waitok, bool is_linux)
-{
-	int type;
-
-	if (!is_linux) {
-		if (__predict_true(size <= NLMBUFSIZE))
-			type = NS_WRITER_TYPE_MBUF;
-		else
-			type = NS_WRITER_TYPE_BUF;
-	} else
-		type = NS_WRITER_TYPE_LBUF;
-	return (nlmsg_get_buf_type(nw, size, type, waitok));
-}
-
 bool
 _nlmsg_get_unicast_writer(struct nl_writer *nw, int size, struct nlpcb *nlp)
 {
-	if (!nlmsg_get_buf(nw, size, false, nlp->nl_linux))
-		return (false);
-	nw->arg.ptr = (void *)nlp;
-	nw->writer_target = NS_WRITER_TARGET_SOCKET;
-	nlmsg_set_callback(nw);
-	return (true);
+	nw->nlp = nlp;
+	nw->cb = nl_send_one;
+
+	return (nlmsg_get_buf(nw, size, false));
 }
 
 bool
 _nlmsg_get_group_writer(struct nl_writer *nw, int size, int protocol, int group_id)
 {
-	if (!nlmsg_get_buf(nw, size, false, false))
-		return (false);
-	nw->arg.group.proto = protocol;
-	nw->arg.group.id = group_id;
-	nw->writer_target = NS_WRITER_TARGET_GROUP;
-	nlmsg_set_callback(nw);
-	return (true);
-}
+	nw->group.proto = protocol;
+	nw->group.id = group_id;
+	nw->cb = nl_send_group;
 
-bool
-_nlmsg_get_chain_writer(struct nl_writer *nw, int size, struct mbuf **pm)
-{
-	if (!nlmsg_get_buf(nw, size, false, false))
-		return (false);
-	*pm = NULL;
-	nw->arg.ptr = (void *)pm;
-	nw->writer_target = NS_WRITER_TARGET_CHAIN;
-	nlmsg_set_callback(nw);
-	NL_LOG(LOG_DEBUG3, "setup cb %p (need %p)", nw->cb, &nlmsg_write_chain_mbuf);
-	return (true);
+	return (nlmsg_get_buf(nw, size, false));
 }
 
 void
@@ -576,18 +96,18 @@
 
 	if (__predict_false(nw->hdr != NULL)) {
 		/* Last message has not been completed, skip it. */
-		int completed_len = (char *)nw->hdr - nw->data;
+		int completed_len = (char *)nw->hdr - nw->buf->data;
 		/* Send completed messages */
-		nw->offset -= nw->offset - completed_len;
+		nw->buf->datalen -= nw->buf->datalen - completed_len;
 		nw->hdr = NULL;
-	}
+        }
 
 	NL_LOG(LOG_DEBUG2, "OUT");
-	bool result = nw->cb(nw, nw->_storage, nw->offset, nw->num_messages);
-	nw->_storage = NULL;
+	bool result = nw->cb(nw);
+	nw->num_messages = 0;
 
 	if (!result) {
-		NL_LOG(LOG_DEBUG, "nw %p offset %d: flush with %p() failed", nw, nw->offset, nw->cb);
+		NL_LOG(LOG_DEBUG, "nw %p flush with %p() failed", nw, nw->cb);
 	}
 
 	return (result);
@@ -599,59 +119,61 @@
  * Return true on success.
  */
 bool
-_nlmsg_refill_buffer(struct nl_writer *nw, int required_len)
+_nlmsg_refill_buffer(struct nl_writer *nw, u_int required_len)
 {
-	struct nl_writer ns_new = {};
-	int completed_len, new_len;
+	struct nl_buf *new;
+	u_int completed_len, new_len, last_len;
+
+	MPASS(nw->buf != NULL);
 
 	if (nw->enomem)
 		return (false);
 
-	NL_LOG(LOG_DEBUG3, "no space at offset %d/%d (want %d), trying to reclaim",
-	    nw->offset, nw->alloc_len, required_len);
+	NL_LOG(LOG_DEBUG3, "no space at offset %u/%u (want %u), trying to "
+	    "reclaim", nw->buf->datalen, nw->buf->buflen, required_len);
 
-	/* Calculated new buffer size and allocate it s*/
-	completed_len = (nw->hdr != NULL) ? (char *)nw->hdr - nw->data : nw->offset;
+	/* Calculate new buffer size and allocate it. */
+	completed_len = (nw->hdr != NULL) ?
+	    (char *)nw->hdr - nw->buf->data : nw->buf->datalen;
 	if (completed_len > 0 && required_len < NLMBUFSIZE) {
-		/* We already ran out of space, use the largest effective size */
-		new_len = max(nw->alloc_len, NLMBUFSIZE);
+		/* We already ran out of space, use largest effective size. */
+		new_len = max(nw->buf->buflen, NLMBUFSIZE);
 	} else {
-		if (nw->alloc_len < NLMBUFSIZE)
+		if (nw->buf->buflen < NLMBUFSIZE)
+			/* XXXGL: does this happen? */
 			new_len = NLMBUFSIZE;
 		else
-			new_len = nw->alloc_len * 2;
+			new_len = nw->buf->buflen * 2;
 		while (new_len < required_len)
 			new_len *= 2;
 	}
-	bool waitok = (nw->malloc_flag == M_WAITOK);
-	bool is_linux = (nw->writer_type == NS_WRITER_TYPE_LBUF);
-	if (!nlmsg_get_buf(&ns_new, new_len, waitok, is_linux)) {
+
+	new = nl_buf_alloc(new_len, nw->malloc_flag | M_ZERO);
+	if (__predict_false(new == NULL)) {
 		nw->enomem = true;
 		NL_LOG(LOG_DEBUG, "getting new buf failed, setting ENOMEM");
 		return (false);
 	}
-	if (nw->ignore_limit)
-		nlmsg_ignore_limit(&ns_new);
 
-	/* Update callback data */
-	ns_new.writer_target = nw->writer_target;
-	nlmsg_set_callback(&ns_new);
-	ns_new.arg = nw->arg;
-
-	/* Copy last (unfinished) header to the new storage */
-	int last_len = nw->offset - completed_len;
+	/* Copy last (unfinished) header to the new storage. */
+	last_len = nw->buf->datalen - completed_len;
 	if (last_len > 0) {
-		memcpy(ns_new.data, nw->hdr, last_len);
-		ns_new.hdr = (struct nlmsghdr *)ns_new.data;
-		ns_new.offset = last_len;
+		memcpy(new->data, nw->hdr, last_len);
+		new->datalen = last_len;
 	}
 
-	NL_LOG(LOG_DEBUG2, "completed: %d bytes, copied: %d bytes", completed_len, last_len);
+	NL_LOG(LOG_DEBUG2, "completed: %u bytes, copied: %u bytes",
+	    completed_len, last_len);
 
-	/* Flush completed headers & switch to the new nw */
-	nlmsg_flush(nw);
-	memcpy(nw, &ns_new, sizeof(struct nl_writer));
-	NL_LOG(LOG_DEBUG2, "switched buffer: used %d/%d bytes", nw->offset, nw->alloc_len);
+	if (completed_len > 0) {
+		nlmsg_flush(nw);
+		MPASS(nw->buf == NULL);
+	} else
+		nl_buf_free(nw->buf);
+	nw->buf = new;
+	nw->hdr = (last_len > 0) ? (struct nlmsghdr *)new->data : NULL;
+	NL_LOG(LOG_DEBUG2, "switched buffer: used %u/%u bytes",
+	    new->datalen, new->buflen);
 
 	return (true);
 }
@@ -660,17 +182,20 @@
 _nlmsg_add(struct nl_writer *nw, uint32_t portid, uint32_t seq, uint16_t type,
     uint16_t flags, uint32_t len)
 {
+	struct nl_buf *nb = nw->buf;
 	struct nlmsghdr *hdr;
+	u_int required_len;
 
 	MPASS(nw->hdr == NULL);
 
-	int required_len = NETLINK_ALIGN(len + sizeof(struct nlmsghdr));
-	if (__predict_false(nw->offset + required_len > nw->alloc_len)) {
+	required_len = NETLINK_ALIGN(len + sizeof(struct nlmsghdr));
+	if (__predict_false(nb->datalen + required_len > nb->buflen)) {
 		if (!nlmsg_refill_buffer(nw, required_len))
 			return (false);
+		nb = nw->buf;
 	}
 
-	hdr = (struct nlmsghdr *)(&nw->data[nw->offset]);
+	hdr = (struct nlmsghdr *)(&nb->data[nb->datalen]);
 
 	hdr->nlmsg_len = len;
 	hdr->nlmsg_type = type;
@@ -679,7 +204,7 @@
 	hdr->nlmsg_pid = portid;
 
 	nw->hdr = hdr;
-	nw->offset += sizeof(struct nlmsghdr);
+	nb->datalen += sizeof(struct nlmsghdr);
 
 	return (true);
 }
@@ -687,6 +212,8 @@
 bool
 _nlmsg_end(struct nl_writer *nw)
 {
+	struct nl_buf *nb = nw->buf;
+
 	MPASS(nw->hdr != NULL);
 
 	if (nw->enomem) {
@@ -695,7 +222,7 @@
 		return (false);
 	}
 
-	nw->hdr->nlmsg_len = (uint32_t)(nw->data + nw->offset - (char *)nw->hdr);
+	nw->hdr->nlmsg_len = nb->data + nb->datalen - (char *)nw->hdr;
 	NL_LOG(LOG_DEBUG2, "wrote msg len: %u type: %d: flags: 0x%X seq: %u pid: %u",
 	    nw->hdr->nlmsg_len, nw->hdr->nlmsg_type, nw->hdr->nlmsg_flags,
 	    nw->hdr->nlmsg_seq, nw->hdr->nlmsg_pid);
@@ -707,8 +234,10 @@
 void
 _nlmsg_abort(struct nl_writer *nw)
 {
+	struct nl_buf *nb = nw->buf;
+
 	if (nw->hdr != NULL) {
-		nw->offset = (uint32_t)((char *)nw->hdr - nw->data);
+		nb->datalen = (char *)nw->hdr - nb->data;
 		nw->hdr = NULL;
 	}
 }
@@ -775,7 +304,7 @@
 	/* Save operation result */
 	int *perror = nlmsg_reserve_object(nw, int);
 	NL_LOG(LOG_DEBUG2, "record error=%d at off %d (%p)", error,
-	    nw->offset, perror);
+	    nw->buf->datalen, perror);
 	*perror = error;
 	nlmsg_end(nw);
 	nw->suppress_ack = true;
@@ -787,40 +316,47 @@
  * KPI functions.
  */
 
-int
+u_int
 nlattr_save_offset(const struct nl_writer *nw)
 {
-	return (nw->offset - ((char *)nw->hdr - nw->data));
+	return (nw->buf->datalen - ((char *)nw->hdr - nw->buf->data));
 }
 
 void *
 nlmsg_reserve_data_raw(struct nl_writer *nw, size_t sz)
 {
-	sz = NETLINK_ALIGN(sz);
+	struct nl_buf *nb = nw->buf;
+	void *data;
 
-	if (__predict_false(nw->offset + sz > nw->alloc_len)) {
+	sz = NETLINK_ALIGN(sz);
+	if (__predict_false(nb->datalen + sz > nb->buflen)) {
 		if (!nlmsg_refill_buffer(nw, sz))
 			return (NULL);
+		nb = nw->buf;
 	}
 
-	void *data_ptr = &nw->data[nw->offset];
-	nw->offset += sz;
-	bzero(data_ptr, sz);
+	data = &nb->data[nb->datalen];
+	bzero(data, sz);
+	nb->datalen += sz;
 
-	return (data_ptr);
+	return (data);
 }
 
 bool
 nlattr_add(struct nl_writer *nw, int attr_type, int attr_len, const void *data)
 {
-	int required_len = NLA_ALIGN(attr_len + sizeof(struct nlattr));
+	struct nl_buf *nb = nw->buf;
+	struct nlattr *nla;
+	u_int required_len;
 
-	if (__predict_false(nw->offset + required_len > nw->alloc_len)) {
+	required_len = NLA_ALIGN(attr_len + sizeof(struct nlattr));
+	if (__predict_false(nb->datalen + required_len > nb->buflen)) {
 		if (!nlmsg_refill_buffer(nw, required_len))
 			return (false);
+		nb = nw->buf;
 	}
 
-	struct nlattr *nla = (struct nlattr *)(&nw->data[nw->offset]);
+	nla = (struct nlattr *)(&nb->data[nb->datalen]);
 
 	nla->nla_len = attr_len + sizeof(struct nlattr);
 	nla->nla_type = attr_type;
@@ -831,7 +367,7 @@
 		}
 		memcpy((nla + 1), data, attr_len);
 	}
-	nw->offset += required_len;
+	nb->datalen += required_len;
 	return (true);
 }
 
diff --git a/sys/netlink/netlink_module.c b/sys/netlink/netlink_module.c
--- a/sys/netlink/netlink_module.c
+++ b/sys/netlink/netlink_module.c
@@ -181,7 +181,6 @@
 	.nlmsg_abort = _nlmsg_abort,
 	.nlmsg_get_unicast_writer = _nlmsg_get_unicast_writer,
 	.nlmsg_get_group_writer = _nlmsg_get_group_writer,
-	.nlmsg_get_chain_writer = _nlmsg_get_chain_writer,
 	.nlmsg_end_dump = _nlmsg_end_dump,
 	.nl_modify_ifp_generic = _nl_modify_ifp_generic,
 	.nl_store_ifp_cookie = _nl_store_ifp_cookie,
@@ -219,7 +218,6 @@
 	switch (what) {
 	case MOD_LOAD:
 		NL_LOG(LOG_DEBUG2, "Loading");
-		nl_init_msg_zone();
 		nl_osd_register();
 #if !defined(NETLINK) && defined(NETLINK_MODULE)
 		nl_set_functions(&nl_module);
@@ -235,7 +233,6 @@
 			nl_set_functions(NULL);
 #endif
 			nl_osd_unregister();
-			nl_destroy_msg_zone();
 		} else
 			ret = EBUSY;
 		break;
diff --git a/sys/netlink/netlink_var.h b/sys/netlink/netlink_var.h
--- a/sys/netlink/netlink_var.h
+++ b/sys/netlink/netlink_var.h
@@ -43,14 +43,9 @@
 
 struct ucred;
 
-struct nl_io_queue {
-	STAILQ_HEAD(, mbuf)	head;
-	int			length;
-	int			hiwat;
-};
-
 struct nl_buf {
 	TAILQ_ENTRY(nl_buf)	tailq;
+	struct mbuf		*control;
 	u_int			buflen;
 	u_int			datalen;
 	u_int			offset;
@@ -72,7 +67,6 @@
 	bool			nl_linux; /* true if running under compat */
 	bool			nl_unconstrained_vnet; /* true if running under VNET jail (or without jail) */
 	bool			nl_need_thread_setup;
-	struct nl_io_queue	tx_queue;
 	struct taskqueue	*nl_taskqueue;
 	struct task		nl_task;
 	struct ucred		*nl_cred; /* Copy of nl_socket->so_cred */
@@ -131,7 +125,7 @@
 extern struct nl_proto_handler *nl_handlers;
 
 /* netlink_domain.c */
-void nl_send_group(struct mbuf *m, int cnt, int proto, int group_id);
+bool nl_send_group(struct nl_writer *);
 void nl_osd_register(void);
 void nl_osd_unregister(void);
 void nl_set_thread_nlp(struct thread *td, struct nlpcb *nlp);
@@ -139,22 +133,18 @@
 /* netlink_io.c */
 #define	NL_IOF_UNTRANSLATED	0x01
 #define	NL_IOF_IGNORE_LIMIT	0x02
-bool nl_send_one(struct mbuf *m, struct nlpcb *nlp, int cnt, int io_flags);
+bool nl_send_one(struct nl_writer *);
 void nlmsg_ack(struct nlpcb *nlp, int error, struct nlmsghdr *nlmsg,
     struct nl_pstate *npt);
 void nl_on_transmit(struct nlpcb *nlp);
-void nl_init_io(struct nlpcb *nlp);
-void nl_free_io(struct nlpcb *nlp);
 
 void nl_taskqueue_handler(void *_arg, int pending);
 void nl_schedule_taskqueue(struct nlpcb *nlp);
 void nl_process_receive_locked(struct nlpcb *nlp);
 void nl_set_source_metadata(struct mbuf *m, int num_messages);
-void nl_add_msg_info(struct mbuf *m);
-
-/* netlink_message_writer.c */
-void nl_init_msg_zone(void);
-void nl_destroy_msg_zone(void);
+void nl_add_msg_info(struct nl_buf *nb);
+struct nl_buf *nl_buf_alloc(size_t len, int mflag);
+void nl_buf_free(struct nl_buf *nb);
 
 /* netlink_generic.c */
 struct genl_family {
diff --git a/sys/netlink/route/rt.c b/sys/netlink/route/rt.c
--- a/sys/netlink/route/rt.c
+++ b/sys/netlink/route/rt.c
@@ -556,9 +556,8 @@
 	IF_DEBUG_LEVEL(LOG_DEBUG3) {
 		char rtbuf[INET6_ADDRSTRLEN + 5];
 		FIB_LOG(LOG_DEBUG3, wa->fibnum, wa->family,
-		    "Dump %s, offset %u, error %d",
-		    rt_print_buf(rt, rtbuf, sizeof(rtbuf)),
-		    wa->nw->offset, error);
+		    "Dump %s, error %d",
+		    rt_print_buf(rt, rtbuf, sizeof(rtbuf)), error);
 	}
 	wa->error = error;
 
@@ -578,7 +577,6 @@
 
 	FIB_LOG(LOG_DEBUG2, fibnum, family, "End dump, iterated %d dumped %d",
 	    wa->count, wa->dumped);
-	NL_LOG(LOG_DEBUG2, "Current offset: %d", wa->nw->offset);
 }
 
 static int
diff --git a/tests/sys/netlink/test_netlink_message_writer.py b/tests/sys/netlink/test_netlink_message_writer.py
--- a/tests/sys/netlink/test_netlink_message_writer.py
+++ b/tests/sys/netlink/test_netlink_message_writer.py
@@ -4,19 +4,11 @@
 from atf_python.ktest import BaseKernelTest
 from atf_python.sys.netlink.attrs import NlAttrU32
 
-
 M_NOWAIT = 1
 M_WAITOK = 2
-NS_WRITER_TYPE_MBUF = 0
-NS_WRITER_TYPE_BUF = 1
-NS_WRITER_TYPE_LBUF = 1
-
-MHLEN = 160
-MCLBYTES = 2048  # XXX: may differ on some archs?
-MJUMPAGESIZE = mmap.PAGESIZE
-MJUM9BYTES = 9 * 1024
-MJUM16BYTES = 16 * 1024
 
+NLMSG_SMALL = 128
+NLMSG_LARGE = 2048
 
 class TestNetlinkMessageWriter(BaseKernelTest):
     KTEST_MODULE_NAME = "ktest_netlink_message_writer"
@@ -28,52 +20,20 @@
             pytest.param(M_WAITOK, id="WAITOK"),
         ],
     )
-    @pytest.mark.parametrize(
-        "writer_type",
-        [
-            pytest.param(NS_WRITER_TYPE_MBUF, id="MBUF"),
-            pytest.param(NS_WRITER_TYPE_BUF, id="BUF"),
-        ],
-    )
     @pytest.mark.parametrize(
         "sz",
         [
-            pytest.param([160, 160], id="MHLEN"),
-            pytest.param([MCLBYTES, MCLBYTES], id="MCLBYTES"),
+            pytest.param([NLMSG_SMALL, NLMSG_SMALL], id="NLMSG_SMALL"),
+            pytest.param([NLMSG_LARGE, NLMSG_LARGE], id="NLMSG_LARGE"),
+            pytest.param([NLMSG_LARGE + 256, NLMSG_LARGE + 256], id="NLMSG_LARGE+256"),
         ],
     )
-    def test_mbuf_writer_allocation(self, sz, writer_type, malloc_flags):
+    def test_nlbuf_writer_allocation(self, sz, malloc_flags):
         """override to parametrize"""
 
         test_meta = [
             NlAttrU32(1, sz[0]),  # size
             NlAttrU32(2, sz[1]),  # expected_avail
-            NlAttrU32(4, writer_type),
-            NlAttrU32(5, malloc_flags),
-        ]
-        self.runtest(test_meta)
-
-    @pytest.mark.parametrize(
-        "malloc_flags",
-        [
-            pytest.param(M_NOWAIT, id="NOWAIT"),
-            pytest.param(M_WAITOK, id="WAITOK"),
-        ],
-    )
-    @pytest.mark.parametrize(
-        "sz",
-        [
-            pytest.param([160, 160, 1], id="MHLEN"),
-            pytest.param([MCLBYTES, MCLBYTES, 1], id="MCLBYTES"),
-            pytest.param([MCLBYTES + 1, MCLBYTES + 1, 2], id="MCLBYTES_MHLEN"),
-            pytest.param([MCLBYTES + 256, MCLBYTES * 2, 2], id="MCLBYTESx2"),
-        ],
-    )
-    def test_mbuf_chain_allocation(self, sz, malloc_flags):
-        test_meta = [
-            NlAttrU32(1, sz[0]),  # size
-            NlAttrU32(2, sz[1]),  # expected_avail
-            NlAttrU32(3, sz[2]),  # expected_count
-            NlAttrU32(5, malloc_flags),
+            NlAttrU32(3, malloc_flags),
         ]
         self.runtest(test_meta)