diff --git a/share/man/man4/netlink.4 b/share/man/man4/netlink.4
index c75366f560f0..bbfa55049e2e 100644
--- a/share/man/man4/netlink.4
+++ b/share/man/man4/netlink.4
@@ -1,344 +1,349 @@
 .\"
 .\" Copyright (C) 2022 Alexander Chernikov <melifaro@FreeBSD.org>.
 .\"
 .\" Redistribution and use in source and binary forms, with or without
 .\" modification, are permitted provided that the following conditions
 .\" are met:
 .\" 1. Redistributions of source code must retain the above copyright
 .\"    notice, this list of conditions and the following disclaimer.
 .\" 2. Redistributions in binary form must reproduce the above copyright
 .\"    notice, this list of conditions and the following disclaimer in the
 .\"    documentation and/or other materials provided with the distribution.
 .\"
 .\" THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
 .\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 .\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 .\" ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
 .\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
 .\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
 .\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
 .\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
 .\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
 .\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
 .\" SUCH DAMAGE.
 .\"
 .\" $FreeBSD$
 .\"
-.Dd November 1, 2022
+.Dd November 30, 2022
 .Dt NETLINK 4
 .Os
 .Sh NAME
 .Nm Netlink
 .Nd Kernel network configuration protocol
 .Sh SYNOPSIS
 .In netlink/netlink.h
 .In netlink/netlink_route.h
 .Ft int
 .Fn socket AF_NETLINK SOCK_DGRAM int family
 .Sh DESCRIPTION
 Netlink is a user-kernel message-based communication protocol primarily used
 for network stack configuration.
 Netlink is easily extendable and supports large dumps and event
 notifications, all via a single socket.
 The protocol is fully asynchronous, allowing one to issue and track multiple
 requests at once.
 Netlink consists of multiple families, which commonly group the commands
 belonging to the particular kernel subsystem.
 Currently, the supported families are:
 .Pp
 .Bd -literal -offset indent -compact
 NETLINK_ROUTE	network configuration,
 NETLINK_GENERIC	"container" family
 .Ed
 .Pp
 The
 .Dv NETLINK_ROUTE
 family handles all interfaces, addresses, neighbors, routes, and VNETs
 configuration.
 More details can be found in
 .Xr rtnetlink 4 .
 The
 .Dv NETLINK_GENERIC
 family serves as a
 .Do container Dc ,
 allowing registering other families under the
 .Dv NETLINK_GENERIC
 umbrella.
 This approach allows using a single netlink socket to interact with
 multiple netlink families at once.
 More details can be found in
 .Xr genetlink 4 .
 .Pp
 Netlink has its own sockaddr structure:
 .Bd -literal
 struct sockaddr_nl {
 	uint8_t		nl_len;		/* sizeof(sockaddr_nl) */
 	sa_family_t	nl_family;	/* netlink family */
 	uint16_t	nl_pad;		/* reserved, set to 0 */
 	uint32_t	nl_pid;		/* automatically selected, set to 0 */
 	uint32_t	nl_groups;	/* multicast groups mask to bind to */
 };
 .Ed
 .Pp
 Typically, filling this structure is not required for socket operations.
 It is presented here for completeness.
 .Sh PROTOCOL DESCRIPTION
 The protocol is message-based.
 Each message starts with the mandatory
 .Va nlmsghdr
 header, followed by the family-specific header and the list of
 type-length-value pairs (TLVs).
 TLVs can be nested.
 All headers and TLVS are padded to 4-byte boundaries.
 Each
 .Xr send 2 or
 .Xr recv 2
 system call may contain multiple messages.
 .Ss BASE HEADER
 .Bd -literal
 struct nlmsghdr {
 	uint32_t nlmsg_len;   /* Length of message including header */
 	uint16_t nlmsg_type;  /* Message type identifier */
 	uint16_t nlmsg_flags; /* Flags (NLM_F_) */
 	uint32_t nlmsg_seq;   /* Sequence number */
 	uint32_t nlmsg_pid;   /* Sending process port ID */
 };
 .Ed
 .Pp
 The
 .Va nlmsg_len
 field stores the whole message length, in bytes, including the header.
 This length has to be rounded up to the nearest 4-byte boundary when
 iterating over messages.
 The
 .Va nlmsg_type
 field represents the command/request type.
 This value is family-specific.
 The list of supported commands can be found in the relevant family
 header file.
 .Va nlmsg_seq
 is a user-provided request identifier.
 An application can track the operation result using the
 .Dv NLMSG_ERROR
 messages and matching the
 .Va nlmsg_seq
 .
 The
 .Va nlmsg_pid
 field is the message sender id.
 This field is optional for userland.
 The kernel sender id is zero.
 The
 .Va nlmsg_flags
 field contains the message-specific flags.
 The following generic flags are defined:
 .Pp
 .Bd -literal -offset indent -compact
 NLM_F_REQUEST	Indicates that the message is an actual request to the kernel
 NLM_F_ACK	Request an explicit ACK message with an operation result
 .Ed
 .Pp
 The following generic flags are defined for the "GET" request types:
 .Pp
 .Bd -literal -offset indent -compact
 NLM_F_ROOT	Return the whole dataset
 NLM_F_MATCH	Return all entries matching the criteria
 .Ed
 These two flags are typically used together, aliased to
 .Dv NLM_F_DUMP
 .Pp
 The following generic flags are defined for the "NEW" request types:
 .Pp
 .Bd -literal -offset indent -compact
 NLM_F_CREATE	Create an object if none exists
 NLM_F_EXCL	Don't replace an object if it exists
 NLM_F_REPLACE	Replace an existing matching object
 NLM_F_APPEND	Append to an existing object
 .Ed
 .Pp
 The following generic flags are defined for the replies:
 .Pp
 .Bd -literal -offset indent -compact
 NLM_F_MULTI	Indicates that the message is part of the message group
 NLM_F_DUMP_INTR	Indicates that the state dump was not completed
 NLM_F_DUMP_FILTERED	Indicates that the dump was filtered per request
 NLM_F_CAPPED	Indicates the original message was capped to its header
 NLM_F_ACK_TLVS	Indicates that extended ACK TLVs were included
 .Ed
 .Ss TLVs
 Most messages encode their attributes as type-length-value pairs (TLVs).
 The base TLV header:
 .Bd -literal
 struct nlattr {
 	uint16_t nla_len;	/* Total attribute length */
 	uint16_t nla_type;	/* Attribute type */
 };
 .Ed
 The TLV type
 .Pq Va nla_type
 scope is typically the message type or group within a family.
 For example, the
 .Dv RTN_MULTICAST
 type value is only valid for
 .Dv RTM_NEWROUTE
 ,
 .Dv RTM_DELROUTE
 and
 .Dv RTM_GETROUTE
 messages.
 TLVs can be nested; in that case internal TLVs may have their own sub-types.
 All TLVs are packed with 4-byte padding.
 .Ss CONTROL MESSAGES
 A number of generic control messages are reserved in each family.
 .Pp
 .Dv NLMSG_ERROR
 reports the operation result if requested, optionally followed by
 the metadata TLVs.
 The value of
 .Va nlmsg_seq
 is set to its value in the original messages, while
 .Va nlmsg_pid
 is set to the socket pid of the original socket.
 The operation result is reported via
 .Vt "struct nlmsgerr":
 .Bd -literal
 struct nlmsgerr {
 	int	error;		/* Standard errno */
 	struct	nlmsghdr msg;	/* Original message header */
 };
 .Ed
 If the
 .Dv NETLINK_CAP_ACK
 socket option is not set, the remainder of the original message will follow.
 If the
 .Dv NETLINK_EXT_ACK
-socket option is set, kernel may add a
+socket option is set, the kernel may add a
 .Dv NLMSGERR_ATTR_MSG
 string TLV with the textual error description, optionally followed by the
 .Dv NLMSGERR_ATTR_OFFS
 TLV, indicating the offset from the message start that triggered an error.
+If the operation reply is a multipart message, then no
+.Dv NLMSG_ERROR
+reply is generated, only a
+.Dv NLMSG_DONE
+message, closing multipart sequence.
 .Pp
 .Dv NLMSG_DONE
 indicates the end of the message group: typically, the end of the dump.
 It contains a single
 .Vt int
 field, describing the dump result as a standard errno value.
 .Sh SOCKET OPTIONS
 Netlink supports a number of custom socket options, which can be set with
 .Xr setsockopt 2
 with the
 .Dv SOL_NETLINK
 .Fa level :
 .Bl -tag -width indent
 .It Dv NETLINK_ADD_MEMBERSHIP
 Subscribes to the notifications for the specific group (int).
 .It Dv NETLINK_DROP_MEMBERSHIP
 Unsubscribes from the notifications for the specific group (int).
 .It Dv NETLINK_LIST_MEMBERSHIPS
 Lists the memberships as a bitmask.
 .It Dv NETLINK_CAP_ACK
 Instructs the kernel to send the original message header in the reply
 without the message body.
 .It Dv NETLINK_EXT_ACK
 Acknowledges ability to receive additional TLVs in the ACK message.
 .El
 .Pp
 Additionally, netlink overrides the following socket options from the
 .Dv SOL_SOCKET
 .Fa level :
 .Bl -tag -width indent
 .It Dv SO_RCVBUF
 Sets the maximum size of the socket receive buffer.
 If the caller has
 .Dv PRIV_NET_ROUTE
 permission, the value can exceed the currently-set
 .Va kern.ipc.maxsockbuf
 value.
 .El
 .Sh SYSCTL VARIABLES
 A set of
 .Xr sysctl 8
 variables is available to tweak run-time parameters:
 .Bl -tag -width indent
 .It Va net.netlink.sendspace
 Default send buffer for the netlink socket.
 Note that the socket sendspace has to be at least as long as the longest
 message that can be transmitted via this socket.
 .El
 .Bl -tag -width indent
 .It Va net.netlink.recvspace
 Default receive buffer for the netlink socket.
 Note that the socket recvspace has to be least as long as the longest
 message that can be received from this socket.
 .El
 .Sh DEBUGGING
 Netlink implements per-functional-unit debugging, with different severities
 controllable via the
 .Va net.netlink.debug
 branch.
 These messages are logged in the kernel message buffer and can be seen in
 .Xr dmesg 8
 .
 The following severity levels are defined:
 .Bl -tag -width indent
 .It Dv LOG_DEBUG(7)
 Rare events or per-socket errors are reported here.
 This is the default level, not impacting production performance.
 .It Dv LOG_DEBUG2(8)
 Socket events such as groups memberships, privilege checks, commands and dumps
 are logged.
 This level does not incur significant performance overhead.
 .It Dv LOG_DEBUG9(9)
 All socket events, each dumped or modified entities are logged.
 Turning it on may result in significant performance overhead.
 .El
 .Sh ERRORS
 Netlink reports operation results, including errors and error metadata, by
 sending a
 .Dv NLMSG_ERROR
 message for each request message.
 The following errors can be returned:
 .Bl -tag -width Er
 .It Bq Er EPERM
 when the current privileges are insufficient to perform the required operation;
 .It Bo Er ENOBUFS Bc or Bo Er ENOMEM Bc
 when the system runs out of memory for
 an internal data structure;
 .It Bq Er ENOTSUP
 when the requested command is not supported by the family or
 the family is not supported;
 .It Bq Er EINVAL
 when some necessary TLVs are missing or invalid, detailed info
 may be provided in NLMSGERR_ATTR_MSG and NLMSGERR_ATTR_OFFS TLVs;
 .It Bq Er ENOENT
 when trying to delete a non-existent object.
 .Pp
 Additionally, a socket operation itself may fail with one of the errors
 specified in
 .Xr socket 2
 ,
 .Xr recv 2
 or
 .Xr send 2
 .
 .El
 .Sh SEE ALSO
 .Xr genetrlink 4 ,
 .Xr rtnetlink 4
 .Rs
 .%A "J. Salim"
 .%A "H. Khosravi"
 .%A "A. Kleen"
 .%A "A. Kuznetsov"
 .%T "Linux Netlink as an IP Services Protocol"
 .%O "RFC 3549"
 .Re
 .Sh HISTORY
 The netlink protocol appeared in
 .Fx 14.0 .
 .Sh AUTHORS
 The netlink was implemented by
 .An -nosplit
 .An Alexander Chernikov Aq Mt melifaro@FreeBSD.org .
 It was derived from the Google Summer of Code 2021 project by
 .An Ng Peng Nam Sean .
diff --git a/sys/netlink/netlink_io.c b/sys/netlink/netlink_io.c
index b2a0023a143b..fb8006f689e4 100644
--- a/sys/netlink/netlink_io.c
+++ b/sys/netlink/netlink_io.c
@@ -1,529 +1,532 @@
 /*-
  * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
  *
  * Copyright (c) 2021 Ng Peng Nam Sean
  * Copyright (c) 2022 Alexander V. Chernikov <melifaro@FreeBSD.org>
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 #include <sys/param.h>
 #include <sys/ck.h>
 #include <sys/lock.h>
 #include <sys/malloc.h>
 #include <sys/mbuf.h>
 #include <sys/mutex.h>
 #include <sys/socket.h>
 #include <sys/socketvar.h>
 #include <sys/syslog.h>
 
 #include <netlink/netlink.h>
 #include <netlink/netlink_ctl.h>
 #include <netlink/netlink_linux.h>
 #include <netlink/netlink_var.h>
 
 #define	DEBUG_MOD_NAME	nl_io
 #define	DEBUG_MAX_LEVEL	LOG_DEBUG3
 #include <netlink/netlink_debug.h>
 _DECLARE_DEBUG(LOG_DEBUG);
 
 /*
  * The logic below provide a p2p interface for receiving and
  * sending netlink data between the kernel and userland.
  */
 
 static const struct sockaddr_nl _nl_empty_src = {
 	.nl_len = sizeof(struct sockaddr_nl),
 	.nl_family = PF_NETLINK,
 	.nl_pid = 0 /* comes from the kernel */
 };
 static const struct sockaddr *nl_empty_src = (const struct sockaddr *)&_nl_empty_src;
 
 static struct mbuf *nl_process_mbuf(struct mbuf *m, struct nlpcb *nlp);
 
 
 static void
 queue_push(struct nl_io_queue *q, struct mbuf *mq)
 {
 	while (mq != NULL) {
 		struct mbuf *m = mq;
 		mq = mq->m_nextpkt;
 		m->m_nextpkt = NULL;
 
 		q->length += m_length(m, NULL);
 		STAILQ_INSERT_TAIL(&q->head, m, m_stailqpkt);
 	}
 }
 
 static void
 queue_push_head(struct nl_io_queue *q, struct mbuf *m)
 {
 	MPASS(m->m_nextpkt == NULL);
 
 	q->length += m_length(m, NULL);
 	STAILQ_INSERT_HEAD(&q->head, m, m_stailqpkt);
 }
 
 static struct mbuf *
 queue_pop(struct nl_io_queue *q)
 {
 	if (!STAILQ_EMPTY(&q->head)) {
 		struct mbuf *m = STAILQ_FIRST(&q->head);
 		STAILQ_REMOVE_HEAD(&q->head, m_stailqpkt);
 		m->m_nextpkt = NULL;
 		q->length -= m_length(m, NULL);
 
 		return (m);
 	}
 	return (NULL);
 }
 
 static struct mbuf *
 queue_head(const struct nl_io_queue *q)
 {
 	return (STAILQ_FIRST(&q->head));
 }
 
 static inline bool
 queue_empty(const struct nl_io_queue *q)
 {
 	return (q->length == 0);
 }
 
 static void
 queue_free(struct nl_io_queue *q)
 {
 	while (!STAILQ_EMPTY(&q->head)) {
 		struct mbuf *m = STAILQ_FIRST(&q->head);
 		STAILQ_REMOVE_HEAD(&q->head, m_stailqpkt);
 		m->m_nextpkt = NULL;
 		m_freem(m);
 	}
 	q->length = 0;
 }
 
 
 static void
 nl_schedule_taskqueue(struct nlpcb *nlp)
 {
 	if (!nlp->nl_task_pending) {
 		nlp->nl_task_pending = true;
 		taskqueue_enqueue(nlp->nl_taskqueue, &nlp->nl_task);
 		NL_LOG(LOG_DEBUG3, "taskqueue scheduled");
 	} else {
 		NL_LOG(LOG_DEBUG3, "taskqueue schedule skipped");
 	}
 }
 
 int
 nl_receive_async(struct mbuf *m, struct socket *so)
 {
 	struct nlpcb *nlp = sotonlpcb(so);
 	int error = 0;
 
 	m->m_nextpkt = NULL;
 
 	NLP_LOCK(nlp);
 
 	if ((__predict_true(nlp->nl_active))) {
 		sbappend(&so->so_snd, m, 0);
 		NL_LOG(LOG_DEBUG3, "enqueue %u bytes", m_length(m, NULL));
 		nl_schedule_taskqueue(nlp);
 	} else {
 		NL_LOG(LOG_DEBUG, "ignoring %u bytes on non-active socket",
 		    m_length(m, NULL));
 		m_free(m);
 		error = EINVAL;
 	}
 
 	NLP_UNLOCK(nlp);
 
 	return (error);
 }
 
 static bool
 tx_check_locked(struct nlpcb *nlp)
 {
 	if (queue_empty(&nlp->tx_queue))
 		return (true);
 
 	/*
 	 * Check if something can be moved from the internal TX queue
 	 * to the socket queue.
 	 */
 
 	bool appended = false;
 	struct sockbuf *sb = &nlp->nl_socket->so_rcv;
 	SOCKBUF_LOCK(sb);
 
 	while (true) {
 		struct mbuf *m = queue_head(&nlp->tx_queue);
 		if (m && sbappendaddr_locked(sb, nl_empty_src, m, NULL) != 0) {
 			/* appended successfully */
 			queue_pop(&nlp->tx_queue);
 			appended = true;
 		} else
 			break;
 	}
 
 	SOCKBUF_UNLOCK(sb);
 
 	if (appended)
 		sorwakeup(nlp->nl_socket);
 
 	return (queue_empty(&nlp->tx_queue));
 }
 
 static bool
 nl_process_received_one(struct nlpcb *nlp)
 {
 	bool reschedule = false;
 
 	NLP_LOCK(nlp);
 	nlp->nl_task_pending = false;
 
 	if (!tx_check_locked(nlp)) {
 		/* TX overflow queue still not empty, ignore RX */
 		NLP_UNLOCK(nlp);
 		return (false);
 	}
 
 	if (queue_empty(&nlp->rx_queue)) {
 		/*
 		 * Grab all data we have from the socket TX queue
 		 * and store it the internal queue, so it can be worked on
 		 * w/o holding socket lock.
 		 */
 		struct sockbuf *sb = &nlp->nl_socket->so_snd;
 
 		SOCKBUF_LOCK(sb);
 		unsigned int avail = sbavail(sb);
 		if (avail > 0) {
 			NL_LOG(LOG_DEBUG3, "grabbed %u bytes", avail);
 			queue_push(&nlp->rx_queue, sbcut_locked(sb, avail));
 		}
 		SOCKBUF_UNLOCK(sb);
 	} else {
 		/* Schedule another pass to read from the socket queue */
 		reschedule = true;
 	}
 
 	int prev_hiwat = nlp->tx_queue.hiwat;
 	NLP_UNLOCK(nlp);
 
 	while (!queue_empty(&nlp->rx_queue)) {
 		struct mbuf *m = queue_pop(&nlp->rx_queue);
 
 		m = nl_process_mbuf(m, nlp);
 		if (m != NULL) {
 			queue_push_head(&nlp->rx_queue, m);
 			reschedule = false;
 			break;
 		}
 	}
 	if (nlp->tx_queue.hiwat > prev_hiwat) {
 		NLP_LOG(LOG_DEBUG, nlp, "TX override peaked to %d", nlp->tx_queue.hiwat);
 
 	}
 
 	return (reschedule);
 }
 
 static void
 nl_process_received(struct nlpcb *nlp)
 {
 	NL_LOG(LOG_DEBUG3, "taskqueue called");
 
 	while (nl_process_received_one(nlp))
 		;
 }
 
 void
 nl_init_io(struct nlpcb *nlp)
 {
 	STAILQ_INIT(&nlp->rx_queue.head);
 	STAILQ_INIT(&nlp->tx_queue.head);
 }
 
 void
 nl_free_io(struct nlpcb *nlp)
 {
 	queue_free(&nlp->rx_queue);
 	queue_free(&nlp->tx_queue);
 }
 
 /*
  * Called after some data have been read from the socket.
  */
 void
 nl_on_transmit(struct nlpcb *nlp)
 {
 	NLP_LOCK(nlp);
 
 	struct socket *so = nlp->nl_socket;
 	if (__predict_false(nlp->nl_dropped_bytes > 0 && so != NULL)) {
 		unsigned long dropped_bytes = nlp->nl_dropped_bytes;
 		unsigned long dropped_messages = nlp->nl_dropped_messages;
 		nlp->nl_dropped_bytes = 0;
 		nlp->nl_dropped_messages = 0;
 
 		struct sockbuf *sb = &so->so_rcv;
 		NLP_LOG(LOG_DEBUG, nlp,
 		    "socket RX overflowed, %lu messages (%lu bytes) dropped. "
 		    "bytes: [%u/%u] mbufs: [%u/%u]", dropped_messages, dropped_bytes,
 		    sb->sb_ccc, sb->sb_hiwat, sb->sb_mbcnt, sb->sb_mbmax);
 		/* TODO: send netlink message */
 	}
 
 	nl_schedule_taskqueue(nlp);
 	NLP_UNLOCK(nlp);
 }
 
 void
 nl_taskqueue_handler(void *_arg, int pending)
 {
 	struct nlpcb *nlp = (struct nlpcb *)_arg;
 
 	CURVNET_SET(nlp->nl_socket->so_vnet);
 	nl_process_received(nlp);
 	CURVNET_RESTORE();
 }
 
 static __noinline void
 queue_push_tx(struct nlpcb *nlp, struct mbuf *m)
 {
 	queue_push(&nlp->tx_queue, m);
 	nlp->nl_tx_blocked = true;
 
 	if (nlp->tx_queue.length > nlp->tx_queue.hiwat)
 		nlp->tx_queue.hiwat = nlp->tx_queue.length;
 }
 
 /*
  * Tries to send @m to the socket @nlp.
  *
  * @m: mbuf(s) to send to. Consumed in any case.
  * @nlp: socket to send to
  * @cnt: number of messages in @m
  * @io_flags: combination of NL_IOF_* flags
  *
  * Returns true on success.
  * If no queue overrunes happened, wakes up socket owner.
  */
 bool
 nl_send_one(struct mbuf *m, struct nlpcb *nlp, int num_messages, int io_flags)
 {
 	bool untranslated = io_flags & NL_IOF_UNTRANSLATED;
 	bool ignore_limits = io_flags & NL_IOF_IGNORE_LIMIT;
 	bool result = true;
 
 	IF_DEBUG_LEVEL(LOG_DEBUG2) {
 		struct nlmsghdr *hdr = mtod(m, struct nlmsghdr *);
 		NLP_LOG(LOG_DEBUG2, nlp,
 		    "TX mbuf len %u msgs %u msg type %d first hdrlen %u io_flags %X",
 		    m_length(m, NULL), num_messages, hdr->nlmsg_type, hdr->nlmsg_len,
 		    io_flags);
 	}
 
 	if (__predict_false(nlp->nl_linux && linux_netlink_p != NULL && untranslated)) {
 		m = linux_netlink_p->mbufs_to_linux(nlp->nl_proto, m, nlp);
 		if (m == NULL)
 			return (false);
 	}
 
 	NLP_LOCK(nlp);
 
 	if (__predict_false(nlp->nl_socket == NULL)) {
 		NLP_UNLOCK(nlp);
 		m_freem(m);
 		return (false);
 	}
 
 	if (!queue_empty(&nlp->tx_queue)) {
 		if (ignore_limits) {
 			queue_push_tx(nlp, m);
 		} else {
 			m_free(m);
 			result = false;
 		}
 		NLP_UNLOCK(nlp);
 		return (result);
 	}
 
 	struct socket *so = nlp->nl_socket;
 	if (sbappendaddr(&so->so_rcv, nl_empty_src, m, NULL) != 0) {
 		sorwakeup(so);
 		NLP_LOG(LOG_DEBUG3, nlp, "appended data & woken up");
 	} else {
 		if (ignore_limits) {
 			queue_push_tx(nlp, m);
 		} else {
 			/*
 			 * Store dropped data so it can be reported
 			 * on the next read
 			 */
 			nlp->nl_dropped_bytes += m_length(m, NULL);
 			nlp->nl_dropped_messages += num_messages;
 			NLP_LOG(LOG_DEBUG2, nlp, "RX oveflow: %lu m (+%d), %lu b (+%d)",
 			    (unsigned long)nlp->nl_dropped_messages, num_messages,
 			    (unsigned long)nlp->nl_dropped_bytes, m_length(m, NULL));
 			soroverflow(so);
 			m_freem(m);
 			result = false;
 		}
 	}
 	NLP_UNLOCK(nlp);
 
 	return (result);
 }
 
 static int
 nl_receive_message(struct nlmsghdr *hdr, int remaining_length,
     struct nlpcb *nlp, struct nl_pstate *npt)
 {
 	nl_handler_f handler = nl_handlers[nlp->nl_proto].cb;
 	int error = 0;
 
-	NL_LOG(LOG_DEBUG2, "msg len: %d type: %d", hdr->nlmsg_len,
-	    hdr->nlmsg_type);
+	NLP_LOG(LOG_DEBUG2, nlp, "msg len: %u type: %d: flags: 0x%X seq: %u pid: %u",
+	    hdr->nlmsg_len, hdr->nlmsg_type, hdr->nlmsg_flags, hdr->nlmsg_seq,
+	    hdr->nlmsg_pid);
 
 	if (__predict_false(hdr->nlmsg_len > remaining_length)) {
 		NLP_LOG(LOG_DEBUG, nlp, "message is not entirely present: want %d got %d",
 		    hdr->nlmsg_len, remaining_length);
 		return (EINVAL);
 	} else if (__predict_false(hdr->nlmsg_len < sizeof(*hdr))) {
 		NL_LOG(LOG_DEBUG, "message too short: %d", hdr->nlmsg_len);
 		return (EINVAL);
 	}
 	/* Stamp each message with sender pid */
 	hdr->nlmsg_pid = nlp->nl_port;
 
 	npt->hdr = hdr;
 
 	if (hdr->nlmsg_flags & NLM_F_REQUEST && hdr->nlmsg_type >= NLMSG_MIN_TYPE) {
 		NL_LOG(LOG_DEBUG2, "handling message with msg type: %d",
 		   hdr->nlmsg_type);
 
 		if (nlp->nl_linux && linux_netlink_p != NULL) {
 			struct nlmsghdr *hdr_orig = hdr;
 			hdr = linux_netlink_p->msg_from_linux(nlp->nl_proto, hdr, npt);
 			if (hdr == NULL) {
 				npt->hdr = hdr_orig;
 				if (hdr->nlmsg_flags & NLM_F_ACK)
 					nlmsg_ack(nlp, EAGAIN, hdr, npt);
 				return (0);
 			}
 		}
 		error = handler(hdr, npt);
 		NL_LOG(LOG_DEBUG2, "retcode: %d", error);
 	}
 	if ((hdr->nlmsg_flags & NLM_F_ACK) || (error != 0 && error != EINTR)) {
-		NL_LOG(LOG_DEBUG3, "ack");
-		nlmsg_ack(nlp, error, hdr, npt);
-		NL_LOG(LOG_DEBUG3, "done");
+		if (!npt->nw->suppress_ack) {
+			NL_LOG(LOG_DEBUG3, "ack");
+			nlmsg_ack(nlp, error, hdr, npt);
+		}
 	}
 
 	return (0);
 }
 
 static void
 npt_clear(struct nl_pstate *npt)
 {
 	lb_clear(&npt->lb);
 	npt->error = 0;
 	npt->err_msg = NULL;
 	npt->err_off = 0;
 	npt->hdr = NULL;
+	npt->nw->suppress_ack = false;
 }
 
 /*
  * Processes an incoming packet, which can contain multiple netlink messages
  */
 static struct mbuf *
 nl_process_mbuf(struct mbuf *m, struct nlpcb *nlp)
 {
 	int offset, buffer_length;
 	struct nlmsghdr *hdr;
 	char *buffer;
 	int error;
 
 	NL_LOG(LOG_DEBUG3, "RX netlink mbuf %p on %p", m, nlp->nl_socket);
 
 	struct nl_writer nw = {};
 	if (!nlmsg_get_unicast_writer(&nw, NLMSG_SMALL, nlp)) {
 		m_freem(m);
 		NL_LOG(LOG_DEBUG, "error allocating socket writer");
 		return (NULL);
 	}
 
 	nlmsg_ignore_limit(&nw);
 	/* TODO: alloc this buf once for nlp */
 	int data_length = m_length(m, NULL);
 	buffer_length = roundup2(data_length, 8) + SCRATCH_BUFFER_SIZE;
 	if (nlp->nl_linux)
 		buffer_length += roundup2(data_length, 8);
 	buffer = malloc(buffer_length, M_NETLINK, M_NOWAIT | M_ZERO);
 	if (buffer == NULL) {
 		m_freem(m);
 		nlmsg_flush(&nw);
 		NL_LOG(LOG_DEBUG, "Unable to allocate %d bytes of memory",
 		    buffer_length);
 		return (NULL);
 	}
 	m_copydata(m, 0, data_length, buffer);
 
 	struct nl_pstate npt = {
 		.nlp = nlp,
 		.lb.base = &buffer[roundup2(data_length, 8)],
 		.lb.size = buffer_length - roundup2(data_length, 8),
 		.nw = &nw,
 		.strict = nlp->nl_flags & NLF_STRICT,
 	};
 
 	for (offset = 0; offset + sizeof(struct nlmsghdr) <= data_length;) {
 		hdr = (struct nlmsghdr *)&buffer[offset];
 		/* Save length prior to calling handler */
 		int msglen = NLMSG_ALIGN(hdr->nlmsg_len);
 		NL_LOG(LOG_DEBUG3, "parsing offset %d/%d", offset, data_length);
 		npt_clear(&npt);
 		error = nl_receive_message(hdr, data_length - offset, nlp, &npt);
 		offset += msglen;
 		if (__predict_false(error != 0 || nlp->nl_tx_blocked))
 			break;
 	}
 	NL_LOG(LOG_DEBUG3, "packet parsing done");
 	free(buffer, M_NETLINK);
 	nlmsg_flush(&nw);
 
 	if (nlp->nl_tx_blocked) {
 		NLP_LOCK(nlp);
 		nlp->nl_tx_blocked = false;
 		NLP_UNLOCK(nlp);
 		m_adj(m, offset);
 		return (m);
 	} else {
 		m_freem(m);
 		return (NULL);
 	}
 }
diff --git a/sys/netlink/netlink_message_writer.c b/sys/netlink/netlink_message_writer.c
index 1856f2859b01..37414703c6f6 100644
--- a/sys/netlink/netlink_message_writer.c
+++ b/sys/netlink/netlink_message_writer.c
@@ -1,686 +1,690 @@
 /*-
  * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
  *
  * Copyright (c) 2022 Alexander V. Chernikov <melifaro@FreeBSD.org>
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 #include <sys/param.h>
 #include <sys/malloc.h>
 #include <sys/lock.h>
 #include <sys/rmlock.h>
 #include <sys/mbuf.h>
 #include <sys/ck.h>
 #include <sys/socket.h>
 #include <sys/socketvar.h>
 #include <sys/syslog.h>
 
 #include <netlink/netlink.h>
 #include <netlink/netlink_ctl.h>
 #include <netlink/netlink_linux.h>
 #include <netlink/netlink_var.h>
 
 #define	DEBUG_MOD_NAME	nl_writer
 #define	DEBUG_MAX_LEVEL	LOG_DEBUG3
 #include <netlink/netlink_debug.h>
 _DECLARE_DEBUG(LOG_DEBUG);
 
 /*
  * The goal of this file is to provide convenient message writing KPI on top of
  * different storage methods (mbufs, uio, temporary memory chunks).
  *
  * The main KPI guarantee is the the (last) message always resides in the contiguous
  *  memory buffer, so one is able to update the header after writing the entire message.
  *
  * This guarantee comes with a side effect of potentially reallocating underlying
  *  buffer, so one needs to update the desired pointers after something is added
  *  to the header.
  *
  * Messaging layer contains hooks performing transparent Linux translation for the messages.
  *
  * There are 3 types of supported targets:
  *  * socket (adds mbufs to the socket buffer, used for message replies)
  *  * group (sends mbuf/chain to the specified groups, used for the notifications)
  *  * chain (returns mbuf chain, used in Linux message translation code)
  *
  * There are 3 types of storage:
  * * NS_WRITER_TYPE_MBUF (mbuf-based, most efficient, used when a single message
  *    fits in MCLBYTES)
  * * NS_WRITER_TYPE_BUF (fallback, malloc-based, used when a single message needs
  *    to be larger than one supported by NS_WRITER_TYPE_MBUF)
  * * NS_WRITER_TYPE_LBUF (malloc-based, similar to NS_WRITER_TYPE_BUF, used for
  *    Linux sockets, calls translation hook prior to sending messages to the socket).
  *
  * Internally, KPI switches between different types of storage when memory requirements
  *  change. It happens transparently to the caller.
  */
 
 
 typedef bool nlwriter_op_init(struct nl_writer *nw, int size, bool waitok);
 typedef bool nlwriter_op_write(struct nl_writer *nw, void *buf, int buflen, int cnt);
 
 struct nlwriter_ops {
 	nlwriter_op_init	*init;
 	nlwriter_op_write	*write_socket;
 	nlwriter_op_write	*write_group;
 	nlwriter_op_write	*write_chain;
 };
 
 /*
  * NS_WRITER_TYPE_BUF
  * Writes message to a temporary memory buffer,
  * flushing to the socket/group when buffer size limit is reached
  */
 static bool
 nlmsg_get_ns_buf(struct nl_writer *nw, int size, bool waitok)
 {
 	int mflag = waitok ? M_WAITOK : M_NOWAIT;
 	nw->_storage = malloc(size, M_NETLINK, mflag | M_ZERO);
 	if (__predict_false(nw->_storage == NULL))
 		return (false);
 	nw->alloc_len = size;
 	nw->offset = 0;
 	nw->hdr = NULL;
 	nw->data = nw->_storage;
 	nw->writer_type = NS_WRITER_TYPE_BUF;
 	nw->malloc_flag = mflag;
 	nw->num_messages = 0;
 	nw->enomem = false;
 	return (true);
 }
 
 static bool
 nlmsg_write_socket_buf(struct nl_writer *nw, void *buf, int datalen, int cnt)
 {
 	NL_LOG(LOG_DEBUG2, "IN: ptr: %p len: %d arg: %p", buf, datalen, nw);
 	if (__predict_false(datalen == 0)) {
 		free(buf, M_NETLINK);
 		return (true);
 	}
 
 	struct mbuf *m = m_getm2(NULL, datalen, nw->malloc_flag, MT_DATA, M_PKTHDR);
 	if (__predict_false(m == NULL)) {
 		/* XXX: should we set sorcverr? */
 		free(buf, M_NETLINK);
 		return (false);
 	}
 	m_append(m, datalen, buf);
 	free(buf, M_NETLINK);
 
 	int io_flags = (nw->ignore_limit) ? NL_IOF_IGNORE_LIMIT : 0;
         return (nl_send_one(m, (struct nlpcb *)(nw->arg_ptr), cnt, io_flags));
 }
 
 static bool
 nlmsg_write_group_buf(struct nl_writer *nw, void *buf, int datalen, int cnt)
 {
 	NL_LOG(LOG_DEBUG2, "IN: ptr: %p len: %d arg: %p", buf, datalen, nw->arg_ptr);
 	if (__predict_false(datalen == 0)) {
 		free(buf, M_NETLINK);
 		return (true);
 	}
 
 	struct mbuf *m = m_getm2(NULL, datalen, nw->malloc_flag, MT_DATA, M_PKTHDR);
 	if (__predict_false(m == NULL)) {
 		free(buf, M_NETLINK);
 		return (false);
 	}
 	bool success = m_append(m, datalen, buf) != 0;
 	free(buf, M_NETLINK);
 
 	if (!success)
 		return (false);
 
         nl_send_group(m, cnt, nw->arg_uint >> 16, nw->arg_uint & 0xFFFF);
 	return (true);
 }
 
 static bool
 nlmsg_write_chain_buf(struct nl_writer *nw, void *buf, int datalen, int cnt)
 {
 	struct mbuf **m0 = (struct mbuf **)(nw->arg_ptr);
 	NL_LOG(LOG_DEBUG2, "IN: ptr: %p len: %d arg: %p", buf, datalen, nw->arg_ptr);
 
 	if (__predict_false(datalen == 0)) {
 		free(buf, M_NETLINK);
 		return (true);
 	}
 
 	if (*m0 == NULL) {
 		struct mbuf *m;
 
 		m = m_getm2(NULL, datalen, nw->malloc_flag, MT_DATA, M_PKTHDR);
 		if (__predict_false(m == NULL)) {
 			free(buf, M_NETLINK);
 			return (false);
 		}
 		*m0 = m;
 	}
 	if (__predict_false(m_append(*m0, datalen, buf) == 0)) {
 		free(buf, M_NETLINK);
 		return (false);
 	}
         return (true);
 }
 
 
 /*
  * NS_WRITER_TYPE_MBUF
  * Writes message to the allocated mbuf,
  * flushing to socket/group when mbuf size limit is reached.
  * This is the most efficient mechanism as it avoids double-copying.
  *
  * Allocates a single mbuf suitable to store up to @size bytes of data.
  * If size < MHLEN (around 160 bytes), allocates mbuf with pkghdr
  * If size <= MCLBYTES (2k), allocate a single mbuf cluster
  * Otherwise, return NULL.
  */
 static bool
 nlmsg_get_ns_mbuf(struct nl_writer *nw, int size, bool waitok)
 {
 	struct mbuf *m;
 
 	int mflag = waitok ? M_WAITOK : M_NOWAIT;
         m = m_get2(size, mflag, MT_DATA, M_PKTHDR);
         if (__predict_false(m == NULL))
                 return (false);
         nw->alloc_len = M_TRAILINGSPACE(m);
         nw->offset = 0;
         nw->hdr = NULL;
 	nw->_storage = (void *)m;
 	nw->data = mtod(m, void *);
 	nw->writer_type = NS_WRITER_TYPE_MBUF;
 	nw->malloc_flag = mflag;
 	nw->num_messages = 0;
 	nw->enomem = false;
         NL_LOG(LOG_DEBUG2, "alloc mbuf %p req_len %d alloc_len %d data_ptr %p",
             m, size, nw->alloc_len, nw->data);
         return (true);
 }
 
 static bool
 nlmsg_write_socket_mbuf(struct nl_writer *nw, void *buf, int datalen, int cnt)
 {
 	struct mbuf *m = (struct mbuf *)buf;
 	NL_LOG(LOG_DEBUG2, "IN: ptr: %p len: %d arg: %p", buf, datalen, nw->arg_ptr);
 
 	if (__predict_false(datalen == 0)) {
 		m_freem(m);
 		return (true);
 	}
 
 	m->m_pkthdr.len = datalen;
 	m->m_len = datalen;
 	int io_flags = (nw->ignore_limit) ? NL_IOF_IGNORE_LIMIT : 0;
         return (nl_send_one(m, (struct nlpcb *)(nw->arg_ptr), cnt, io_flags));
 }
 
 static bool
 nlmsg_write_group_mbuf(struct nl_writer *nw, void *buf, int datalen, int cnt)
 {
 	struct mbuf *m = (struct mbuf *)buf;
 	NL_LOG(LOG_DEBUG2, "IN: ptr: %p len: %d arg: %p", buf, datalen, nw->arg_ptr);
 
 	if (__predict_false(datalen == 0)) {
 		m_freem(m);
 		return (true);
 	}
 
 	m->m_pkthdr.len = datalen;
 	m->m_len = datalen;
         nl_send_group(m, cnt, nw->arg_uint >> 16, nw->arg_uint & 0xFFFF);
 	return (true);
 }
 
 static bool
 nlmsg_write_chain_mbuf(struct nl_writer *nw, void *buf, int datalen, int cnt)
 {
 	struct mbuf *m_new = (struct mbuf *)buf;
 	struct mbuf **m0 = (struct mbuf **)(nw->arg_ptr);
 
 	NL_LOG(LOG_DEBUG2, "IN: ptr: %p len: %d arg: %p", buf, datalen, nw->arg_ptr);
 
 	if (__predict_false(datalen == 0)) {
 		m_freem(m_new);
 		return (true);
 	}
 
 	m_new->m_pkthdr.len = datalen;
 	m_new->m_len = datalen;
 
 	if (*m0 == NULL) {
 		*m0 = m_new;
 	} else {
 		struct mbuf *m_last;
 		for (m_last = *m0; m_last->m_next != NULL; m_last = m_last->m_next)
 			;
 		m_last->m_next = m_new;
 		(*m0)->m_pkthdr.len += datalen;
 	}
 
         return (true);
 }
 
 /*
  * NS_WRITER_TYPE_LBUF
  * Writes message to the allocated memory buffer,
  * flushing to socket/group when mbuf size limit is reached.
  * Calls linux handler to rewrite messages before sending to the socket.
  */
 static bool
 nlmsg_get_ns_lbuf(struct nl_writer *nw, int size, bool waitok)
 {
 	int mflag = waitok ? M_WAITOK : M_NOWAIT;
 	size = roundup2(size, sizeof(void *));
 	int add_size = sizeof(struct linear_buffer) + SCRATCH_BUFFER_SIZE;
 	char *buf = malloc(add_size + size * 2, M_NETLINK, mflag | M_ZERO);
 	if (__predict_false(buf == NULL))
 		return (false);
 
 	/* Fill buffer header first */
 	struct linear_buffer *lb = (struct linear_buffer *)buf;
 	lb->base = &buf[sizeof(struct linear_buffer) + size];
 	lb->size = size + SCRATCH_BUFFER_SIZE;
 
 	nw->alloc_len = size;
 	nw->offset = 0;
 	nw->hdr = NULL;
 	nw->_storage = buf;
 	nw->data = (char *)(lb + 1);
 	nw->malloc_flag = mflag;
 	nw->writer_type = NS_WRITER_TYPE_LBUF;
 	nw->num_messages = 0;
 	nw->enomem = false;
 	return (true);
 }
 
 
 static bool
 nlmsg_write_socket_lbuf(struct nl_writer *nw, void *buf, int datalen, int cnt)
 {
 	struct linear_buffer *lb = (struct linear_buffer *)buf;
 	char *data = (char *)(lb + 1);
 	struct nlpcb *nlp = (struct nlpcb *)(nw->arg_ptr);
 
 	if (__predict_false(datalen == 0)) {
 		free(buf, M_NETLINK);
 		return (true);
 	}
 
 	struct mbuf *m = NULL;
 	if (linux_netlink_p != NULL)
 		m = linux_netlink_p->msgs_to_linux(nlp->nl_proto, data, datalen, nlp);
 	free(buf, M_NETLINK);
 
 	if (__predict_false(m == NULL)) {
 		/* XXX: should we set sorcverr? */
 		return (false);
 	}
 
 	int io_flags = (nw->ignore_limit) ? NL_IOF_IGNORE_LIMIT : 0;
         return (nl_send_one(m, nlp, cnt, io_flags));
 }
 
 /* Shouldn't be called (maybe except Linux code originating message) */
 static bool
 nlmsg_write_group_lbuf(struct nl_writer *nw, void *buf, int datalen, int cnt)
 {
 	struct linear_buffer *lb = (struct linear_buffer *)buf;
 	char *data = (char *)(lb + 1);
 
 	if (__predict_false(datalen == 0)) {
 		free(buf, M_NETLINK);
 		return (true);
 	}
 
 	struct mbuf *m = m_getm2(NULL, datalen, nw->malloc_flag, MT_DATA, M_PKTHDR);
 	if (__predict_false(m == NULL)) {
 		free(buf, M_NETLINK);
 		return (false);
 	}
 	m_append(m, datalen, data);
 	free(buf, M_NETLINK);
 
         nl_send_group(m, cnt, nw->arg_uint >> 16, nw->arg_uint & 0xFFFF);
 	return (true);
 }
 
 struct nlwriter_ops nlmsg_writers[] = {
 	/* NS_WRITER_TYPE_MBUF */
 	{
 		.init = nlmsg_get_ns_mbuf,
 		.write_socket = nlmsg_write_socket_mbuf,
 		.write_group = nlmsg_write_group_mbuf,
 		.write_chain = nlmsg_write_chain_mbuf,
 	},
 	/* NS_WRITER_TYPE_BUF */
 	{
 		.init = nlmsg_get_ns_buf,
 		.write_socket = nlmsg_write_socket_buf,
 		.write_group = nlmsg_write_group_buf,
 		.write_chain = nlmsg_write_chain_buf,
 	},
 	/* NS_WRITER_TYPE_LBUF */
 	{
 		.init = nlmsg_get_ns_lbuf,
 		.write_socket = nlmsg_write_socket_lbuf,
 		.write_group = nlmsg_write_group_lbuf,
 	},
 };
 
 static void
 nlmsg_set_callback(struct nl_writer *nw)
 {
 	struct nlwriter_ops *pops = &nlmsg_writers[nw->writer_type];
 
 	switch (nw->writer_target) {
 	case NS_WRITER_TARGET_SOCKET:
 		nw->cb = pops->write_socket;
 		break;
 	case NS_WRITER_TARGET_GROUP:
 		nw->cb = pops->write_group;
 		break;
 	case NS_WRITER_TARGET_CHAIN:
 		nw->cb = pops->write_chain;
 		break;
 	default:
 		panic("not implemented");
 	}
 }
 
 static bool
 nlmsg_get_buf_type(struct nl_writer *nw, int size, int type, bool waitok)
 {
 	MPASS(type + 1 <= sizeof(nlmsg_writers) / sizeof(nlmsg_writers[0]));
 	NL_LOG(LOG_DEBUG3, "Setting up nw %p size %d type %d", nw, size, type);
 	return (nlmsg_writers[type].init(nw, size, waitok));
 }
 
 static bool
 nlmsg_get_buf(struct nl_writer *nw, int size, bool waitok, bool is_linux)
 {
 	int type;
 
 	if (!is_linux) {
 		if (__predict_true(size <= MCLBYTES))
 			type = NS_WRITER_TYPE_MBUF;
 		else
 			type = NS_WRITER_TYPE_BUF;
 	} else
 		type = NS_WRITER_TYPE_LBUF;
 	return (nlmsg_get_buf_type(nw, size, type, waitok));
 }
 
 bool
 nlmsg_get_unicast_writer(struct nl_writer *nw, int size, struct nlpcb *nlp)
 {
         if (!nlmsg_get_buf(nw, size, false, nlp->nl_linux))
                 return (false);
         nw->arg_ptr = (void *)nlp;
 	nw->writer_target = NS_WRITER_TARGET_SOCKET;
 	nlmsg_set_callback(nw);
         return (true);
 }
 
 bool
 nlmsg_get_group_writer(struct nl_writer *nw, int size, int protocol, int group_id)
 {
         if (!nlmsg_get_buf(nw, size, false, false))
                 return (false);
         nw->arg_uint = (uint64_t)protocol << 16 | (uint64_t)group_id;
 	nw->writer_target = NS_WRITER_TARGET_GROUP;
 	nlmsg_set_callback(nw);
         return (true);
 }
 
 bool
 nlmsg_get_chain_writer(struct nl_writer *nw, int size, struct mbuf **pm)
 {
         if (!nlmsg_get_buf(nw, size, false, false))
                 return (false);
 	*pm = NULL;
         nw->arg_ptr = (void *)pm;
 	nw->writer_target = NS_WRITER_TARGET_CHAIN;
 	nlmsg_set_callback(nw);
 	NL_LOG(LOG_DEBUG3, "setup cb %p (need %p)", nw->cb, &nlmsg_write_chain_mbuf);
         return (true);
 }
 
 void
 nlmsg_ignore_limit(struct nl_writer *nw)
 {
 	nw->ignore_limit = true;
 }
 
 bool
 nlmsg_flush(struct nl_writer *nw)
 {
 
         if (__predict_false(nw->hdr != NULL)) {
                 /* Last message has not been completed, skip it. */
                 int completed_len = (char *)nw->hdr - nw->data;
 		/* Send completed messages */
 		nw->offset -= nw->offset - completed_len;
 		nw->hdr = NULL;
         }
 
 	NL_LOG(LOG_DEBUG2, "OUT");
         bool result = nw->cb(nw, nw->_storage, nw->offset, nw->num_messages);
         nw->_storage = NULL;
 
         if (!result) {
                 NL_LOG(LOG_DEBUG, "nw %p offset %d: flush with %p() failed", nw, nw->offset, nw->cb);
         }
 
         return (result);
 }
 
 /*
  * Flushes previous data and allocates new underlying storage
  *  sufficient for holding at least @required_len bytes.
  * Return true on success.
  */
 bool
 nlmsg_refill_buffer(struct nl_writer *nw, int required_len)
 {
         struct nl_writer ns_new = {};
         int completed_len, new_len;
 
 	if (nw->enomem)
 		return (false);
 
 	NL_LOG(LOG_DEBUG3, "no space at offset %d/%d (want %d), trying to reclaim",
 	    nw->offset, nw->alloc_len, required_len);
 
         /* Calculated new buffer size and allocate it s*/
 	completed_len = (nw->hdr != NULL) ? (char *)nw->hdr - nw->data : nw->offset;
 	if (completed_len > 0 && required_len < MCLBYTES) {
 		/* We already ran out of space, use the largest effective size */
 		new_len = max(nw->alloc_len, MCLBYTES);
 	} else {
 		if (nw->alloc_len < MCLBYTES)
 			new_len = MCLBYTES;
 		else
 			new_len = nw->alloc_len * 2;
 		while (new_len < required_len)
 			new_len *= 2;
 	}
 	bool waitok = (nw->malloc_flag == M_WAITOK);
 	bool is_linux = (nw->writer_type == NS_WRITER_TYPE_LBUF);
         if (!nlmsg_get_buf(&ns_new, new_len, waitok, is_linux)) {
 		nw->enomem = true;
 		NL_LOG(LOG_DEBUG, "getting new buf failed, setting ENOMEM");
                 return (false);
 	}
 	if (nw->ignore_limit)
 		nlmsg_ignore_limit(&ns_new);
 
 	/* Update callback data */
 	ns_new.writer_target = nw->writer_target;
 	nlmsg_set_callback(&ns_new);
 	ns_new.arg_uint = nw->arg_uint;
 
         /* Copy last (unfinished) header to the new storage */
         int last_len = nw->offset - completed_len;
 	if (last_len > 0) {
 		memcpy(ns_new.data, nw->hdr, last_len);
 		ns_new.hdr = (struct nlmsghdr *)ns_new.data;
 		ns_new.offset = last_len;
 	}
 
         NL_LOG(LOG_DEBUG2, "completed: %d bytes, copied: %d bytes", completed_len, last_len);
 
         /* Flush completed headers & switch to the new nw */
 	nlmsg_flush(nw);
 	memcpy(nw, &ns_new, sizeof(struct nl_writer));
         NL_LOG(LOG_DEBUG2, "switched buffer: used %d/%d bytes", nw->offset, nw->alloc_len);
 
         return (true);
 }
 
 bool
 nlmsg_add(struct nl_writer *nw, uint32_t portid, uint32_t seq, uint16_t type,
     uint16_t flags, uint32_t len)
 {
 	struct nlmsghdr *hdr;
 
 	MPASS(nw->hdr == NULL);
 
 	int required_len = NETLINK_ALIGN(len + sizeof(struct nlmsghdr));
         if (__predict_false(nw->offset + required_len > nw->alloc_len)) {
 		if (!nlmsg_refill_buffer(nw, required_len))
 			return (false);
         }
 
         hdr = (struct nlmsghdr *)(&nw->data[nw->offset]);
 
         hdr->nlmsg_len = len;
         hdr->nlmsg_type = type;
         hdr->nlmsg_flags = flags;
         hdr->nlmsg_seq = seq;
         hdr->nlmsg_pid = portid;
 
         nw->hdr = hdr;
         nw->offset += sizeof(struct nlmsghdr);
 
         return (true);
 }
 
 bool
 nlmsg_end(struct nl_writer *nw)
 {
 	MPASS(nw->hdr != NULL);
 
 	if (nw->enomem) {
 		NL_LOG(LOG_DEBUG, "ENOMEM when dumping message");
 		nlmsg_abort(nw);
 		return (false);
 	}
 
         nw->hdr->nlmsg_len = (uint32_t)(nw->data + nw->offset - (char *)nw->hdr);
+	NL_LOG(LOG_DEBUG2, "wrote msg len: %u type: %d: flags: 0x%X seq: %u pid: %u",
+	    nw->hdr->nlmsg_len, nw->hdr->nlmsg_type, nw->hdr->nlmsg_flags,
+	    nw->hdr->nlmsg_seq, nw->hdr->nlmsg_pid);
         nw->hdr = NULL;
 	nw->num_messages++;
 	return (true);
 }
 
 void
 nlmsg_abort(struct nl_writer *nw)
 {
         if (nw->hdr != NULL) {
                 nw->offset = (uint32_t)((char *)nw->hdr - nw->data);
                 nw->hdr = NULL;
         }
 }
 
 void
 nlmsg_ack(struct nlpcb *nlp, int error, struct nlmsghdr *hdr,
     struct nl_pstate *npt)
 {
 	struct nlmsgerr *errmsg;
 	int payload_len;
 	uint32_t flags = nlp->nl_flags;
 	struct nl_writer *nw = npt->nw;
 	bool cap_ack;
 
 	payload_len = sizeof(struct nlmsgerr);
 
 	/*
 	 * The only case when we send the full message in the
 	 * reply is when there is an error and NETLINK_CAP_ACK
 	 * is not set.
 	 */
 	cap_ack = (error == 0) || (flags & NLF_CAP_ACK);
 	if (!cap_ack)
 		payload_len += hdr->nlmsg_len - sizeof(struct nlmsghdr);
 	payload_len = NETLINK_ALIGN(payload_len);
 
 	uint16_t nl_flags = cap_ack ? NLM_F_CAPPED : 0;
 	if ((npt->err_msg || npt->err_off) && nlp->nl_flags & NLF_EXT_ACK)
 		nl_flags |= NLM_F_ACK_TLVS;
 
 	/*
 	 * TODO: handle cookies
 	 */
 
 	NL_LOG(LOG_DEBUG3, "acknowledging message type %d seq %d",
 	    hdr->nlmsg_type, hdr->nlmsg_seq);
 
 	if (!nlmsg_add(nw, nlp->nl_port, hdr->nlmsg_seq, NLMSG_ERROR, nl_flags, payload_len))
 		goto enomem;
 
 	errmsg = nlmsg_reserve_data(nw, payload_len, struct nlmsgerr);
 	errmsg->error = error;
 	/* In case of error copy the whole message, else just the header */
 	memcpy(&errmsg->msg, hdr, cap_ack ? sizeof(*hdr) : hdr->nlmsg_len);
 
 	if (npt->err_msg != NULL && nlp->nl_flags & NLF_EXT_ACK)
 		nlattr_add_string(nw, NLMSGERR_ATTR_MSG, npt->err_msg);
 	if (npt->err_off != 0 && nlp->nl_flags & NLF_EXT_ACK)
 		nlattr_add_u32(nw, NLMSGERR_ATTR_OFFS, npt->err_off);
 
 	if (nlmsg_end(nw))
 		return;
 enomem:
 	NLP_LOG(LOG_DEBUG, nlp, "error allocating ack data for message %d seq %u",
 	    hdr->nlmsg_type, hdr->nlmsg_seq);
 	nlmsg_abort(nw);
 }
 
 bool
 nlmsg_end_dump(struct nl_writer *nw, int error, struct nlmsghdr *hdr)
 {
 	if (!nlmsg_add(nw, hdr->nlmsg_pid, hdr->nlmsg_seq, NLMSG_DONE, 0, sizeof(int))) {
 		NL_LOG(LOG_DEBUG, "Error finalizing table dump");
 		return (false);
 	}
 	/* Save operation result */
 	int *perror = nlmsg_reserve_object(nw, int);
 	NL_LOG(LOG_DEBUG2, "record error=%d at off %d (%p)", error,
 	    nw->offset, perror);
 	*perror = error;
 	nlmsg_end(nw);
+	nw->suppress_ack = true;
 
 	return (true);
 }
diff --git a/sys/netlink/netlink_message_writer.h b/sys/netlink/netlink_message_writer.h
index 424983282e59..99f50fb94213 100644
--- a/sys/netlink/netlink_message_writer.h
+++ b/sys/netlink/netlink_message_writer.h
@@ -1,252 +1,253 @@
 /*-
  * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
  *
  * Copyright (c) 2021 Ng Peng Nam Sean
  * Copyright (c) 2022 Alexander V. Chernikov <melifaro@FreeBSD.org>
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  */
 
 #ifndef _NETLINK_NETLINK_MESSAGE_WRITER_H_
 #define _NETLINK_NETLINK_MESSAGE_WRITER_H_
 
 #ifdef _KERNEL
 /*
  * It is not meant to be included directly
  */
 
 struct mbuf;
 struct nl_writer;
 typedef bool nl_writer_cb(struct nl_writer *nw, void *buf, int buflen, int cnt);
 
 struct nl_writer {
 	int			alloc_len;	/* allocated buffer length */
 	int			offset;		/* offset from the start of the buffer */
 	struct nlmsghdr		*hdr;		/* Pointer to the currently-filled msg */
 	char			*data;		/* pointer to the contiguous storage */
 	void			*_storage;	/* Underlying storage pointer */
 	nl_writer_cb		*cb;		/* Callback to flush data */
 	union {
 		void		*arg_ptr;	/* Callback argument as pointer */
 		uint64_t	arg_uint;	/* Callback argument as int */
 	};
 	int			num_messages;	/* Number of messages in the buffer */
 	int			malloc_flag;	/* M_WAITOK or M_NOWAIT */
 	uint8_t			writer_type;	/* NS_WRITER_TYPE_* */
 	uint8_t			writer_target;	/* NS_WRITER_TARGET_*  */
 	bool			ignore_limit;	/* If true, ignores RCVBUF limit */
 	bool			enomem;		/* True if ENOMEM occured */
+	bool			suppress_ack;	/* If true, don't send NLMSG_ERR */
 };
 #define	NS_WRITER_TARGET_SOCKET	0
 #define	NS_WRITER_TARGET_GROUP	1
 #define	NS_WRITER_TARGET_CHAIN	2
 
 #define	NS_WRITER_TYPE_MBUF	0
 #define NS_WRITER_TYPE_BUF	1
 #define NS_WRITER_TYPE_LBUF	2
 #define NS_WRITER_TYPE_MBUFC	3
 
 
 #define	NLMSG_SMALL	128
 #define	NLMSG_LARGE	2048
 
 /* Message and attribute writing */
 
 struct nlpcb;
 bool nlmsg_get_unicast_writer(struct nl_writer *nw, int expected_size, struct nlpcb *nlp);
 bool nlmsg_get_group_writer(struct nl_writer *nw, int expected_size, int proto, int group_id);
 bool nlmsg_get_chain_writer(struct nl_writer *nw, int expected_size, struct mbuf **pm);
 bool nlmsg_flush(struct nl_writer *nw);
 void nlmsg_ignore_limit(struct nl_writer *nw);
 
 bool nlmsg_refill_buffer(struct nl_writer *nw, int required_size);
 bool nlmsg_add(struct nl_writer *nw, uint32_t portid, uint32_t seq, uint16_t type,
     uint16_t flags, uint32_t len);
 bool nlmsg_end(struct nl_writer *nw);
 void nlmsg_abort(struct nl_writer *nw);
 
 bool nlmsg_end_dump(struct nl_writer *nw, int error, struct nlmsghdr *hdr);
 
 static inline bool
 nlmsg_reply(struct nl_writer *nw, const struct nlmsghdr *hdr, int payload_len)
 {
 	return (nlmsg_add(nw, hdr->nlmsg_pid, hdr->nlmsg_seq, hdr->nlmsg_type,
 	    hdr->nlmsg_flags, payload_len));
 }
 
 #define nlmsg_data(_hdr)        ((void *)((_hdr) + 1))
 
 /*
  * KPI similar to mtodo():
  * current (uncompleted) header is guaranteed to be contiguous,
  *  but can be reallocated, thus pointers may need to be readjusted.
  */
 static inline int
 nlattr_save_offset(const struct nl_writer *nw)
 {
         return (nw->offset - ((char *)nw->hdr - nw->data));
 }
 
 static inline void *
 _nlattr_restore_offset(const struct nl_writer *nw, int off)
 {
 	return ((void *)((char *)nw->hdr + off));
 }
 #define	nlattr_restore_offset(_ns, _off, _t)	((_t *)_nlattr_restore_offset(_ns, _off))
 
 static inline void
 nlattr_set_len(const struct nl_writer *nw, int off)
 {
 	struct nlattr *nla = nlattr_restore_offset(nw, off, struct nlattr);
 	nla->nla_len = nlattr_save_offset(nw) - off;
 }
 
 static inline void *
 nlmsg_reserve_data_raw(struct nl_writer *nw, size_t sz)
 {
         if (__predict_false(nw->offset + NETLINK_ALIGN(sz) > nw->alloc_len)) {
 		if (!nlmsg_refill_buffer(nw, NETLINK_ALIGN(sz)))
 			return (NULL);
         }
 
         void *data_ptr = &nw->data[nw->offset];
         nw->offset += NLMSG_ALIGN(sz);
 
         return (data_ptr);
 }
 #define nlmsg_reserve_object(_ns, _t)	((_t *)nlmsg_reserve_data_raw(_ns, NLA_ALIGN(sizeof(_t))))
 #define nlmsg_reserve_data(_ns, _sz, _t)	((_t *)nlmsg_reserve_data_raw(_ns, _sz))
 
 static inline int
 nlattr_add_nested(struct nl_writer *nw, uint16_t nla_type)
 {
 	int off = nlattr_save_offset(nw);
 	struct nlattr *nla = nlmsg_reserve_data(nw, sizeof(struct nlattr), struct nlattr);
 	if (__predict_false(nla == NULL))
 		return (0);
 	nla->nla_type = nla_type;
 	return (off);
 }
 
 static inline void *
 _nlmsg_reserve_attr(struct nl_writer *nw, uint16_t nla_type, uint16_t sz)
 {
 	sz += sizeof(struct nlattr);
 
 	struct nlattr *nla = nlmsg_reserve_data(nw, sz, struct nlattr);
 	if (__predict_false(nla == NULL))
 		return (NULL);
 	nla->nla_type = nla_type;
 	nla->nla_len = sz;
 
 	return ((void *)(nla + 1));
 }
 #define	nlmsg_reserve_attr(_ns, _at, _t)	((_t *)_nlmsg_reserve_attr(_ns, _at, NLA_ALIGN(sizeof(_t))))
 
 static inline bool
 nlattr_add(struct nl_writer *nw, int attr_type, int attr_len, const void *data)
 {
 	int required_len = NLA_ALIGN(attr_len + sizeof(struct nlattr));
 
         if (__predict_false(nw->offset + required_len > nw->alloc_len)) {
 		if (!nlmsg_refill_buffer(nw, required_len))
 			return (false);
 	}
 
         struct nlattr *nla = (struct nlattr *)(&nw->data[nw->offset]);
 
         nla->nla_len = attr_len + sizeof(struct nlattr);
         nla->nla_type = attr_type;
         if (attr_len > 0) {
 		if ((attr_len % 4) != 0) {
 			/* clear padding bytes */
 			bzero((char *)nla + required_len - 4, 4);
 		}
                 memcpy((nla + 1), data, attr_len);
 	}
         nw->offset += required_len;
         return (true);
 }
 
 static inline bool
 nlattr_add_u8(struct nl_writer *nw, int attrtype, uint8_t value)
 {
 	return (nlattr_add(nw, attrtype, sizeof(uint8_t), &value));
 }
 
 static inline bool
 nlattr_add_u16(struct nl_writer *nw, int attrtype, uint16_t value)
 {
 	return (nlattr_add(nw, attrtype, sizeof(uint16_t), &value));
 }
 
 static inline bool
 nlattr_add_u32(struct nl_writer *nw, int attrtype, uint32_t value)
 {
 	return (nlattr_add(nw, attrtype, sizeof(uint32_t), &value));
 }
 
 static inline bool
 nlattr_add_u64(struct nl_writer *nw, int attrtype, uint64_t value)
 {
 	return (nlattr_add(nw, attrtype, sizeof(uint64_t), &value));
 }
 
 static inline bool
 nlattr_add_s8(struct nl_writer *nw, int attrtype, int8_t value)
 {
 	return (nlattr_add(nw, attrtype, sizeof(int8_t), &value));
 }
 
 static inline bool
 nlattr_add_s16(struct nl_writer *nw, int attrtype, int16_t value)
 {
 	return (nlattr_add(nw, attrtype, sizeof(int16_t), &value));
 }
 
 static inline bool
 nlattr_add_s32(struct nl_writer *nw, int attrtype, int32_t value)
 {
 	return (nlattr_add(nw, attrtype, sizeof(int32_t), &value));
 }
 
 static inline bool
 nlattr_add_s64(struct nl_writer *nw, int attrtype, int64_t value)
 {
 	return (nlattr_add(nw, attrtype, sizeof(int64_t), &value));
 }
 
 static inline bool
 nlattr_add_flag(struct nl_writer *nw, int attrtype)
 {
 	return (nlattr_add(nw, attrtype, 0, NULL));
 }
 
 static inline bool
 nlattr_add_string(struct nl_writer *nw, int attrtype, const char *str)
 {
 	return (nlattr_add(nw, attrtype, strlen(str) + 1, str));
 }
 
 
 #endif
 #endif