Page MenuHomeFreeBSD

D42524.diff
No OneTemporary

D42524.diff

diff --git a/sys/compat/linux/linux_netlink.c b/sys/compat/linux/linux_netlink.c
--- a/sys/compat/linux/linux_netlink.c
+++ b/sys/compat/linux/linux_netlink.c
@@ -32,7 +32,6 @@
#include <sys/ck.h>
#include <sys/lock.h>
#include <sys/malloc.h>
-#include <sys/rmlock.h>
#include <sys/socket.h>
#include <sys/vnode.h>
@@ -44,6 +43,7 @@
#include <netlink/netlink.h>
#include <netlink/netlink_ctl.h>
#include <netlink/netlink_linux.h>
+#include <netlink/netlink_var.h>
#include <netlink/netlink_route.h>
#include <compat/linux/linux.h>
@@ -187,6 +187,7 @@
if (out_hdr != NULL) {
memcpy(out_hdr, hdr, hdr->nlmsg_len);
+ nw->num_messages++;
return (true);
}
return (false);
@@ -518,8 +519,7 @@
}
static bool
-nlmsg_to_linux(int netlink_family, struct nlmsghdr *hdr, struct nlpcb *nlp,
- struct nl_writer *nw)
+nlmsg_to_linux(struct nlmsghdr *hdr, struct nlpcb *nlp, struct nl_writer *nw)
{
if (hdr->nlmsg_type < NLMSG_MIN_TYPE) {
switch (hdr->nlmsg_type) {
@@ -536,7 +536,7 @@
}
}
- switch (netlink_family) {
+ switch (nlp->nl_proto) {
case NETLINK_ROUTE:
return (rtnl_to_linux(hdr, nlp, nw));
default:
@@ -544,64 +544,49 @@
}
}
-static struct mbuf *
-nlmsgs_to_linux(int netlink_family, char *buf, int data_length, struct nlpcb *nlp)
+static bool
+nlmsgs_to_linux(struct nl_writer *nw, struct nlpcb *nlp)
{
- RT_LOG(LOG_DEBUG3, "LINUX: get %p size %d", buf, data_length);
- struct nl_writer nw = {};
-
- struct mbuf *m = NULL;
- if (!nlmsg_get_chain_writer(&nw, data_length, &m)) {
- RT_LOG(LOG_DEBUG, "unable to setup chain writer for size %d",
- data_length);
- return (NULL);
- }
+ struct nl_buf *nb, *orig;
+ u_int offset, msglen, orig_messages __diagused;
+
+ RT_LOG(LOG_DEBUG3, "%p: in %u bytes %u messages", __func__,
+ nw->buf->datalen, nw->num_messages);
+
+ orig = nw->buf;
+ nb = nl_buf_alloc(orig->datalen + SCRATCH_BUFFER_SIZE, M_NOWAIT);
+ if (__predict_false(nb == NULL))
+ return (false);
+ nw->buf = nb;
+#ifdef INVARIANTS
+ orig_messages = nw->num_messages;
+#endif
+ nw->num_messages = 0;
/* Assume correct headers. Buffer IS mutable */
- int count = 0;
- for (int offset = 0; offset + sizeof(struct nlmsghdr) <= data_length;) {
- struct nlmsghdr *hdr = (struct nlmsghdr *)&buf[offset];
- int msglen = NLMSG_ALIGN(hdr->nlmsg_len);
- count++;
+ for (offset = 0;
+ offset + sizeof(struct nlmsghdr) <= orig->datalen;
+ offset += msglen) {
+ struct nlmsghdr *hdr = (struct nlmsghdr *)&orig->data[offset];
- if (!nlmsg_to_linux(netlink_family, hdr, nlp, &nw)) {
+ msglen = NLMSG_ALIGN(hdr->nlmsg_len);
+ if (!nlmsg_to_linux(hdr, nlp, nw)) {
RT_LOG(LOG_DEBUG, "failed to process msg type %d",
hdr->nlmsg_type);
- m_freem(m);
- return (NULL);
+ nl_buf_free(nb);
+ return (false);
}
- offset += msglen;
}
- nlmsg_flush(&nw);
- RT_LOG(LOG_DEBUG3, "Processed %d messages, chain size %d", count,
- m ? m_length(m, NULL) : 0);
- return (m);
-}
+ MPASS(nw->num_messages == orig_messages);
+ MPASS(nw->buf == nb);
+ nl_buf_free(orig);
+ RT_LOG(LOG_DEBUG3, "%p: out %u bytes", __func__, offset);
-static struct mbuf *
-mbufs_to_linux(int netlink_family, struct mbuf *m, struct nlpcb *nlp)
-{
- /* XXX: easiest solution, not optimized for performance */
- int data_length = m_length(m, NULL);
- char *buf = malloc(data_length, M_LINUX, M_NOWAIT);
- if (buf == NULL) {
- RT_LOG(LOG_DEBUG, "unable to allocate %d bytes, dropping message",
- data_length);
- m_freem(m);
- return (NULL);
- }
- m_copydata(m, 0, data_length, buf);
- m_freem(m);
-
- m = nlmsgs_to_linux(netlink_family, buf, data_length, nlp);
- free(buf, M_LINUX);
-
- return (m);
+ return (true);
}
static struct linux_netlink_provider linux_netlink_v1 = {
- .mbufs_to_linux = mbufs_to_linux,
.msgs_to_linux = nlmsgs_to_linux,
.msg_from_linux = nlmsg_from_linux,
};
diff --git a/sys/netlink/ktest_netlink_message_writer.h b/sys/netlink/ktest_netlink_message_writer.h
--- a/sys/netlink/ktest_netlink_message_writer.h
+++ b/sys/netlink/ktest_netlink_message_writer.h
@@ -30,28 +30,14 @@
#if defined(_KERNEL) && defined(INVARIANTS)
-bool nlmsg_get_buf_type_wrapper(struct nl_writer *nw, int size, int type, bool waitok);
-void nlmsg_set_callback_wrapper(struct nl_writer *nw);
-struct mbuf *nl_get_mbuf_chain_wrapper(int len, int malloc_flags);
+bool nlmsg_get_buf_wrapper(struct nl_writer *nw, u_int size, bool waitok);
#ifndef KTEST_CALLER
bool
-nlmsg_get_buf_type_wrapper(struct nl_writer *nw, int size, int type, bool waitok)
+nlmsg_get_buf_wrapper(struct nl_writer *nw, u_int size, bool waitok)
{
- return (nlmsg_get_buf_type(nw, size, type, waitok));
-}
-
-void
-nlmsg_set_callback_wrapper(struct nl_writer *nw)
-{
- nlmsg_set_callback(nw);
-}
-
-struct mbuf *
-nl_get_mbuf_chain_wrapper(int len, int malloc_flags)
-{
- return (nl_get_mbuf_chain(len, malloc_flags));
+ return (nlmsg_get_buf(nw, size, waitok));
}
#endif
diff --git a/sys/netlink/ktest_netlink_message_writer.c b/sys/netlink/ktest_netlink_message_writer.c
--- a/sys/netlink/ktest_netlink_message_writer.c
+++ b/sys/netlink/ktest_netlink_message_writer.c
@@ -29,9 +29,9 @@
#include <sys/cdefs.h>
#include <sys/systm.h>
#include <sys/malloc.h>
-#include <sys/mbuf.h>
#include <netlink/netlink.h>
#include <netlink/netlink_ctl.h>
+#include <netlink/netlink_var.h>
#include <netlink/netlink_message_writer.h>
#define KTEST_CALLER
@@ -39,54 +39,47 @@
#ifdef INVARIANTS
-struct test_mbuf_attrs {
+struct test_nlbuf_attrs {
uint32_t size;
uint32_t expected_avail;
- uint32_t expected_count;
- uint32_t wtype;
int waitok;
};
-#define _OUT(_field) offsetof(struct test_mbuf_attrs, _field)
-static const struct nlattr_parser nla_p_mbuf_w[] = {
+#define _OUT(_field) offsetof(struct test_nlbuf_attrs, _field)
+static const struct nlattr_parser nla_p_nlbuf_w[] = {
{ .type = 1, .off = _OUT(size), .cb = nlattr_get_uint32 },
{ .type = 2, .off = _OUT(expected_avail), .cb = nlattr_get_uint32 },
- { .type = 3, .off = _OUT(expected_count), .cb = nlattr_get_uint32 },
- { .type = 4, .off = _OUT(wtype), .cb = nlattr_get_uint32 },
- { .type = 5, .off = _OUT(waitok), .cb = nlattr_get_uint32 },
+ { .type = 3, .off = _OUT(waitok), .cb = nlattr_get_uint32 },
};
#undef _OUT
-NL_DECLARE_ATTR_PARSER(mbuf_w_parser, nla_p_mbuf_w);
+NL_DECLARE_ATTR_PARSER(nlbuf_w_parser, nla_p_nlbuf_w);
static int
-test_mbuf_parser(struct ktest_test_context *ctx, struct nlattr *nla)
+test_nlbuf_parser(struct ktest_test_context *ctx, struct nlattr *nla)
{
- struct test_mbuf_attrs *attrs = npt_alloc(ctx->npt, sizeof(*attrs));
+ struct test_nlbuf_attrs *attrs = npt_alloc(ctx->npt, sizeof(*attrs));
ctx->arg = attrs;
if (attrs != NULL)
- return (nl_parse_nested(nla, &mbuf_w_parser, ctx->npt, attrs));
+ return (nl_parse_nested(nla, &nlbuf_w_parser, ctx->npt, attrs));
return (ENOMEM);
}
static int
-test_mbuf_writer_allocation(struct ktest_test_context *ctx)
+test_nlbuf_writer_allocation(struct ktest_test_context *ctx)
{
- struct test_mbuf_attrs *attrs = ctx->arg;
- bool ret;
+ struct test_nlbuf_attrs *attrs = ctx->arg;
struct nl_writer nw = {};
+ u_int alloc_len;
+ bool ret;
- ret = nlmsg_get_buf_type_wrapper(&nw, attrs->size, attrs->wtype, attrs->waitok);
+ ret = nlmsg_get_buf_wrapper(&nw, attrs->size, attrs->waitok);
if (!ret)
return (EINVAL);
- int alloc_len = nw.alloc_len;
+ alloc_len = nw.buf->buflen;
KTEST_LOG(ctx, "requested %u, allocated %d", attrs->size, alloc_len);
- /* Set cleanup callback */
- nw.writer_target = NS_WRITER_TARGET_SOCKET;
- nlmsg_set_callback_wrapper(&nw);
-
/* Mark enomem to avoid reallocation */
nw.enomem = true;
@@ -95,9 +88,7 @@
return (EINVAL);
}
- /* Mark as empty to free the storage */
- nw.offset = 0;
- nlmsg_flush(&nw);
+ nl_buf_free(nw.buf);
if (alloc_len < attrs->expected_avail) {
KTEST_LOG(ctx, "alloc_len %d, expected %u",
@@ -107,60 +98,15 @@
return (0);
}
-
-static int
-test_mbuf_chain_allocation(struct ktest_test_context *ctx)
-{
- struct test_mbuf_attrs *attrs = ctx->arg;
- int mflags = attrs->waitok ? M_WAITOK : M_NOWAIT;
- struct mbuf *chain = nl_get_mbuf_chain_wrapper(attrs->size, mflags);
-
- if (chain == NULL) {
- KTEST_LOG(ctx, "nl_get_mbuf_chain(%u) returned NULL", attrs->size);
- return (EINVAL);
- }
-
- /* Iterate and check number of mbufs and space */
- uint32_t allocated_count = 0, allocated_size = 0;
- for (struct mbuf *m = chain; m != NULL; m = m->m_next) {
- allocated_count++;
- allocated_size += M_SIZE(m);
- }
- m_freem(chain);
-
- if (attrs->expected_avail > allocated_size) {
- KTEST_LOG(ctx, "expected/allocated avail(bytes) %u/%u"
- " expected/allocated count %u/%u",
- attrs->expected_avail, allocated_size,
- attrs->expected_count, allocated_count);
- return (EINVAL);
- }
-
- if (attrs->expected_count > 0 && (attrs->expected_count != allocated_count)) {
- KTEST_LOG(ctx, "expected/allocated avail(bytes) %u/%u"
- " expected/allocated count %u/%u",
- attrs->expected_avail, allocated_size,
- attrs->expected_count, allocated_count);
- return (EINVAL);
- }
-
- return (0);
-}
#endif
static const struct ktest_test_info tests[] = {
#ifdef INVARIANTS
{
- .name = "test_mbuf_writer_allocation",
- .desc = "test different mbuf sizes in the mbuf writer",
- .func = &test_mbuf_writer_allocation,
- .parse = &test_mbuf_parser,
- },
- {
- .name = "test_mbuf_chain_allocation",
- .desc = "verify allocation different chain sizes",
- .func = &test_mbuf_chain_allocation,
- .parse = &test_mbuf_parser,
+ .name = "test_nlbuf_writer_allocation",
+ .desc = "test different buffer sizes in the netlink writer",
+ .func = &test_nlbuf_writer_allocation,
+ .parse = &test_nlbuf_parser,
},
#endif
};
diff --git a/sys/netlink/netlink_domain.c b/sys/netlink/netlink_domain.c
--- a/sys/netlink/netlink_domain.c
+++ b/sys/netlink/netlink_domain.c
@@ -179,53 +179,76 @@
}
static void
-nl_send_one_group(struct mbuf *m, struct nlpcb *nlp, int num_messages,
- int io_flags)
+nl_send_one_group(struct nl_writer *nw, struct nl_buf *nb, struct nlpcb *nlp)
{
if (__predict_false(nlp->nl_flags & NLF_MSG_INFO))
- nl_add_msg_info(m);
- nl_send_one(m, nlp, num_messages, io_flags);
+ nl_add_msg_info(nb);
+ nw->buf = nb;
+ (void)nl_send_one(nw);
+}
+
+static struct nl_buf *
+nl_buf_copy(struct nl_buf *nb)
+{
+ struct nl_buf *copy;
+
+ copy = nl_buf_alloc(nb->buflen, M_NOWAIT);
+ if (__predict_false(copy == NULL))
+ return (NULL);
+ memcpy(copy, nb, sizeof(*nb) + nb->buflen);
+ if (nb->control != NULL) {
+ copy->control = m_copym(nb->control, 0, M_COPYALL, M_NOWAIT);
+ if (__predict_false(copy->control == NULL)) {
+ nl_buf_free(copy);
+ return (NULL);
+ }
+ }
+
+ return (copy);
}
/*
- * Broadcasts message @m to the protocol @proto group specified by @group_id
+ * Broadcasts in the writer's buffer.
*/
-void
-nl_send_group(struct mbuf *m, int num_messages, int proto, int group_id)
+bool
+nl_send_group(struct nl_writer *nw)
{
+ struct nl_buf *nb = nw->buf;
struct nlpcb *nlp_last = NULL;
struct nlpcb *nlp;
NLCTL_TRACKER;
IF_DEBUG_LEVEL(LOG_DEBUG2) {
- struct nlmsghdr *hdr = mtod(m, struct nlmsghdr *);
- NL_LOG(LOG_DEBUG2, "MCAST mbuf len %u msg type %d len %u to group %d/%d",
- m->m_len, hdr->nlmsg_type, hdr->nlmsg_len, proto, group_id);
+ struct nlmsghdr *hdr = (struct nlmsghdr *)nb->data;
+ NL_LOG(LOG_DEBUG2, "MCAST len %u msg type %d len %u to group %d/%d",
+ nb->datalen, hdr->nlmsg_type, hdr->nlmsg_len,
+ nw->group.proto, nw->group.id);
}
+ nw->buf = NULL;
+
struct nl_control *ctl = atomic_load_ptr(&V_nl_ctl);
if (__predict_false(ctl == NULL)) {
/*
* Can be the case when notification is sent within VNET
* which doesn't have any netlink sockets.
*/
- m_freem(m);
- return;
+ nl_buf_free(nb);
+ return (false);
}
NLCTL_RLOCK(ctl);
- int io_flags = NL_IOF_UNTRANSLATED;
-
CK_LIST_FOREACH(nlp, &ctl->ctl_pcb_head, nl_next) {
- if (nl_isset_group_locked(nlp, group_id) && nlp->nl_proto == proto) {
+ if (nl_isset_group_locked(nlp, nw->group.id) &&
+ nlp->nl_proto == nw->group.proto) {
if (nlp_last != NULL) {
- struct mbuf *m_copy;
- m_copy = m_copym(m, 0, M_COPYALL, M_NOWAIT);
- if (m_copy != NULL)
- nl_send_one_group(m_copy, nlp_last,
- num_messages, io_flags);
- else {
+ struct nl_buf *copy;
+
+ copy = nl_buf_copy(nb);
+ if (copy != NULL) {
+ nl_send_one_group(nw, copy, nlp_last);
+ } else {
NLP_LOCK(nlp_last);
if (nlp_last->nl_socket != NULL)
sorwakeup(nlp_last->nl_socket);
@@ -236,11 +259,13 @@
}
}
if (nlp_last != NULL)
- nl_send_one_group(m, nlp_last, num_messages, io_flags);
+ nl_send_one_group(nw, nb, nlp_last);
else
- m_freem(m);
+ nl_buf_free(nb);
NLCTL_RUNLOCK(ctl);
+
+ return (true);
}
bool
@@ -331,7 +356,7 @@
free(nlp, M_PCB);
return (error);
}
- so->so_rcv.sb_mtx = &so->so_rcv_mtx;
+ TAILQ_INIT(&so->so_rcv.nl_queue);
TAILQ_INIT(&so->so_snd.nl_queue);
so->so_pcb = nlp;
nlp->nl_socket = so;
@@ -344,7 +369,6 @@
nlp->nl_need_thread_setup = true;
NLP_LOCK_INIT(nlp);
refcount_init(&nlp->nl_refcount, 1);
- nl_init_io(nlp);
nlp->nl_taskqueue = taskqueue_create("netlink_socket", M_WAITOK,
taskqueue_thread_enqueue, &nlp->nl_taskqueue);
@@ -467,15 +491,6 @@
return (0);
}
-static void
-destroy_nlpcb(struct nlpcb *nlp)
-{
- NLP_LOCK(nlp);
- nl_free_io(nlp);
- NLP_LOCK_DESTROY(nlp);
- free(nlp, M_PCB);
-}
-
static void
destroy_nlpcb_epoch(epoch_context_t ctx)
{
@@ -483,10 +498,10 @@
nlp = __containerof(ctx, struct nlpcb, nl_epoch_ctx);
- destroy_nlpcb(nlp);
+ NLP_LOCK_DESTROY(nlp);
+ free(nlp, M_PCB);
}
-
static void
nl_close(struct socket *so)
{
@@ -522,9 +537,12 @@
while ((nb = TAILQ_FIRST(&so->so_snd.nl_queue)) != NULL) {
TAILQ_REMOVE(&so->so_snd.nl_queue, nb, tailq);
- free(nb, M_NETLINK);
+ nl_buf_free(nb);
+ }
+ while ((nb = TAILQ_FIRST(&so->so_rcv.nl_queue)) != NULL) {
+ TAILQ_REMOVE(&so->so_rcv.nl_queue, nb, tailq);
+ nl_buf_free(nb);
}
- sbdestroy(so, SO_RCV);
NL_LOG(LOG_DEBUG3, "socket %p, detached", so);
@@ -597,10 +615,8 @@
len = roundup2(uio->uio_resid, 8) + SCRATCH_BUFFER_SIZE;
if (nlp->nl_linux)
len += roundup2(uio->uio_resid, 8);
- nb = malloc(sizeof(*nb) + len, M_NETLINK, M_WAITOK);
+ nb = nl_buf_alloc(len, M_WAITOK);
nb->datalen = uio->uio_resid;
- nb->buflen = len;
- nb->offset = 0;
error = uiomove(&nb->data[0], uio->uio_resid, uio);
if (__predict_false(error))
goto out;
@@ -635,19 +651,107 @@
out:
SOCK_IO_SEND_UNLOCK(so);
- free(nb, M_NETLINK);
+ if (nb != NULL)
+ nl_buf_free(nb);
return (error);
}
static int
-nl_pru_rcvd(struct socket *so, int flags)
+nl_soreceive(struct socket *so, struct sockaddr **psa, struct uio *uio,
+ struct mbuf **mp, struct mbuf **controlp, int *flagsp)
{
+ static const struct sockaddr_nl nl_empty_src = {
+ .nl_len = sizeof(struct sockaddr_nl),
+ .nl_family = PF_NETLINK,
+ .nl_pid = 0 /* comes from the kernel */
+ };
+ struct sockbuf *sb = &so->so_rcv;
+ struct nl_buf *nb;
+ int flags, error;
+ u_int overflow;
+ bool nonblock, trunc, peek;
+
+ MPASS(mp == NULL && uio != NULL);
+
NL_LOG(LOG_DEBUG3, "socket %p, PID %d", so, curproc->p_pid);
- MPASS(sotonlpcb(so) != NULL);
+
+ if (psa != NULL)
+ *psa = sodupsockaddr((const struct sockaddr *)&nl_empty_src,
+ M_WAITOK);
+
+ flags = flagsp != NULL ? *flagsp & ~MSG_TRUNC : 0;
+ trunc = flagsp != NULL ? *flagsp & MSG_TRUNC : false;
+ nonblock = (so->so_state & SS_NBIO) ||
+ (flags & (MSG_DONTWAIT | MSG_NBIO));
+ peek = flags & MSG_PEEK;
+
+ error = SOCK_IO_RECV_LOCK(so, SBLOCKWAIT(flags));
+ if (__predict_false(error))
+ return (error);
+
+ SOCK_RECVBUF_LOCK(so);
+ while ((nb = TAILQ_FIRST(&sb->nl_queue)) == NULL) {
+ if (nonblock) {
+ SOCK_RECVBUF_UNLOCK(so);
+ SOCK_IO_RECV_UNLOCK(so);
+ return (EWOULDBLOCK);
+ }
+ error = sbwait(so, SO_RCV);
+ if (error) {
+ SOCK_RECVBUF_UNLOCK(so);
+ SOCK_IO_RECV_UNLOCK(so);
+ return (error);
+ }
+ }
+
+ /*
+ * XXXGL
+ * Here we emulate a PR_ATOMIC behavior of soreceive_generic() where
+ * we take only the first "record" in the socket buffer and send it
+ * to uio whole or truncated ignoring how many netlink messages are
+ * in the record and how much space is left in the uio.
+ * This needs to be fixed at next refactoring. First, we should perform
+ * truncation only if the very first message doesn't fit into uio.
+ * That will help an application with small buffer not to lose data.
+ * Second, we should continue working on the sb->nl_queue as long as
+ * there is more space in the uio. That will boost applications with
+ * large buffers.
+ */
+ if (__predict_true(!peek)) {
+ TAILQ_REMOVE(&sb->nl_queue, nb, tailq);
+ sb->sb_acc -= nb->datalen;
+ sb->sb_ccc -= nb->datalen;
+ }
+ SOCK_RECVBUF_UNLOCK(so);
+
+ overflow = __predict_false(nb->datalen > uio->uio_resid) ?
+ nb->datalen - uio->uio_resid : 0;
+ error = uiomove(nb->data, (int)nb->datalen, uio);
+ if (__predict_false(overflow > 0)) {
+ flags |= MSG_TRUNC;
+ if (trunc)
+ uio->uio_resid -= overflow;
+ }
+
+ if (controlp != NULL) {
+ *controlp = nb->control;
+ nb->control = NULL;
+ }
+
+ if (__predict_true(!peek))
+ nl_buf_free(nb);
+
+ if (uio->uio_td)
+ uio->uio_td->td_ru.ru_msgrcv++;
+
+ if (flagsp != NULL)
+ *flagsp |= flags;
+
+ SOCK_IO_RECV_UNLOCK(so);
nl_on_transmit(sotonlpcb(so));
- return (0);
+ return (error);
}
static int
@@ -798,8 +902,7 @@
}
#define NETLINK_PROTOSW \
- .pr_flags = PR_ATOMIC | PR_ADDR | PR_WANTRCVD | \
- PR_SOCKBUF, \
+ .pr_flags = PR_ATOMIC | PR_ADDR | PR_SOCKBUF, \
.pr_ctloutput = nl_ctloutput, \
.pr_setsbopt = nl_setsbopt, \
.pr_attach = nl_pru_attach, \
@@ -807,7 +910,7 @@
.pr_connect = nl_pru_connect, \
.pr_disconnect = nl_pru_disconnect, \
.pr_sosend = nl_sosend, \
- .pr_rcvd = nl_pru_rcvd, \
+ .pr_soreceive = nl_soreceive, \
.pr_shutdown = nl_pru_shutdown, \
.pr_sockaddr = nl_sockaddr, \
.pr_close = nl_close
diff --git a/sys/netlink/netlink_glue.c b/sys/netlink/netlink_glue.c
--- a/sys/netlink/netlink_glue.c
+++ b/sys/netlink/netlink_glue.c
@@ -111,7 +111,6 @@
get_stub_writer(struct nl_writer *nw)
{
bzero(nw, sizeof(*nw));
- nw->writer_type = NS_WRITER_TYPE_STUB;
nw->enomem = true;
return (false);
diff --git a/sys/netlink/netlink_io.c b/sys/netlink/netlink_io.c
--- a/sys/netlink/netlink_io.c
+++ b/sys/netlink/netlink_io.c
@@ -51,69 +51,36 @@
* sending netlink data between the kernel and userland.
*/
-static const struct sockaddr_nl _nl_empty_src = {
- .nl_len = sizeof(struct sockaddr_nl),
- .nl_family = PF_NETLINK,
- .nl_pid = 0 /* comes from the kernel */
-};
-static const struct sockaddr *nl_empty_src = (const struct sockaddr *)&_nl_empty_src;
-
static bool nl_process_nbuf(struct nl_buf *nb, struct nlpcb *nlp);
-static void
-queue_push(struct nl_io_queue *q, struct mbuf *mq)
-{
- while (mq != NULL) {
- struct mbuf *m = mq;
- mq = mq->m_nextpkt;
- m->m_nextpkt = NULL;
-
- q->length += m_length(m, NULL);
- STAILQ_INSERT_TAIL(&q->head, m, m_stailqpkt);
- }
-}
-
-static struct mbuf *
-queue_pop(struct nl_io_queue *q)
+struct nl_buf *
+nl_buf_alloc(size_t len, int mflag)
{
- if (!STAILQ_EMPTY(&q->head)) {
- struct mbuf *m = STAILQ_FIRST(&q->head);
- STAILQ_REMOVE_HEAD(&q->head, m_stailqpkt);
- m->m_nextpkt = NULL;
- q->length -= m_length(m, NULL);
+ struct nl_buf *nb;
- return (m);
+ nb = malloc(sizeof(struct nl_buf) + len, M_NETLINK, mflag);
+ if (__predict_true(nb != NULL)) {
+ nb->buflen = len;
+ nb->datalen = nb->offset = 0;
+ nb->control = NULL;
}
- return (NULL);
-}
-static struct mbuf *
-queue_head(const struct nl_io_queue *q)
-{
- return (STAILQ_FIRST(&q->head));
+ return (nb);
}
-static inline bool
-queue_empty(const struct nl_io_queue *q)
+void
+nl_buf_free(struct nl_buf *nb)
{
- return (q->length == 0);
-}
-static void
-queue_free(struct nl_io_queue *q)
-{
- while (!STAILQ_EMPTY(&q->head)) {
- struct mbuf *m = STAILQ_FIRST(&q->head);
- STAILQ_REMOVE_HEAD(&q->head, m_stailqpkt);
- m->m_nextpkt = NULL;
- m_freem(m);
- }
- q->length = 0;
+ if (nb->control)
+ m_freem(nb->control);
+ free(nb, M_NETLINK);
}
void
-nl_add_msg_info(struct mbuf *m)
+nl_add_msg_info(struct nl_buf *nb)
{
+ /* XXXGL pass nlp as arg? */
struct nlpcb *nlp = nl_get_thread_nlp(curthread);
NL_LOG(LOG_DEBUG2, "Trying to recover nlp from thread %p: %p",
curthread, nlp);
@@ -139,27 +106,15 @@
};
- while (m->m_next != NULL)
- m = m->m_next;
- m->m_next = sbcreatecontrol(data, sizeof(data),
+ nb->control = sbcreatecontrol(data, sizeof(data),
NETLINK_MSG_INFO, SOL_NETLINK, M_NOWAIT);
- NL_LOG(LOG_DEBUG2, "Storing %u bytes of data, ctl: %p",
- (unsigned)sizeof(data), m->m_next);
-}
-
-static __noinline struct mbuf *
-extract_msg_info(struct mbuf *m)
-{
- while (m->m_next != NULL) {
- if (m->m_next->m_type == MT_CONTROL) {
- struct mbuf *ctl = m->m_next;
- m->m_next = NULL;
- return (ctl);
- }
- m = m->m_next;
- }
- return (NULL);
+ if (__predict_true(nb->control != NULL))
+ NL_LOG(LOG_DEBUG2, "Storing %u bytes of control data, ctl: %p",
+ (unsigned)sizeof(data), nb->control);
+ else
+ NL_LOG(LOG_DEBUG2, "Failed to allocate %u bytes of control",
+ (unsigned)sizeof(data));
}
void
@@ -174,65 +129,31 @@
}
}
-static bool
-tx_check_locked(struct nlpcb *nlp)
-{
- if (queue_empty(&nlp->tx_queue))
- return (true);
-
- /*
- * Check if something can be moved from the internal TX queue
- * to the socket queue.
- */
-
- bool appended = false;
- struct sockbuf *sb = &nlp->nl_socket->so_rcv;
- SOCKBUF_LOCK(sb);
-
- while (true) {
- struct mbuf *m = queue_head(&nlp->tx_queue);
- if (m != NULL) {
- struct mbuf *ctl = NULL;
- if (__predict_false(m->m_next != NULL))
- ctl = extract_msg_info(m);
- if (sbappendaddr_locked(sb, nl_empty_src, m, ctl) != 0) {
- /* appended successfully */
- queue_pop(&nlp->tx_queue);
- appended = true;
- } else
- break;
- } else
- break;
- }
-
- SOCKBUF_UNLOCK(sb);
-
- if (appended)
- sorwakeup(nlp->nl_socket);
-
- return (queue_empty(&nlp->tx_queue));
-}
-
static bool
nl_process_received_one(struct nlpcb *nlp)
{
struct socket *so = nlp->nl_socket;
- struct sockbuf *sb = &so->so_snd;
+ struct sockbuf *sb;
struct nl_buf *nb;
bool reschedule = false;
NLP_LOCK(nlp);
nlp->nl_task_pending = false;
+ NLP_UNLOCK(nlp);
- if (!tx_check_locked(nlp)) {
- /* TX overflow queue still not empty, ignore RX */
- NLP_UNLOCK(nlp);
+ /*
+ * Do not process queued up requests if there is no space to queue
+ * replies.
+ */
+ sb = &so->so_rcv;
+ SOCK_RECVBUF_LOCK(so);
+ if (sb->sb_hiwat <= sb->sb_ccc) {
+ SOCK_RECVBUF_UNLOCK(so);
return (false);
}
+ SOCK_RECVBUF_UNLOCK(so);
- int prev_hiwat = nlp->tx_queue.hiwat;
- NLP_UNLOCK(nlp);
-
+ sb = &so->so_snd;
SOCK_SENDBUF_LOCK(so);
while ((nb = TAILQ_FIRST(&sb->nl_queue)) != NULL) {
TAILQ_REMOVE(&sb->nl_queue, nb, tailq);
@@ -244,7 +165,7 @@
sb->sb_ccc -= nb->datalen;
/* XXXGL: potentially can reduce lock&unlock count. */
sowwakeup_locked(so);
- free(nb, M_NETLINK);
+ nl_buf_free(nb);
SOCK_SENDBUF_LOCK(so);
} else {
TAILQ_INSERT_HEAD(&sb->nl_queue, nb, tailq);
@@ -252,10 +173,6 @@
}
}
SOCK_SENDBUF_UNLOCK(so);
- if (nlp->tx_queue.hiwat > prev_hiwat) {
- NLP_LOG(LOG_DEBUG, nlp, "TX override peaked to %d", nlp->tx_queue.hiwat);
-
- }
return (reschedule);
}
@@ -276,18 +193,6 @@
;
}
-void
-nl_init_io(struct nlpcb *nlp)
-{
- STAILQ_INIT(&nlp->tx_queue.head);
-}
-
-void
-nl_free_io(struct nlpcb *nlp)
-{
- queue_free(&nlp->tx_queue);
-}
-
/*
* Called after some data have been read from the socket.
*/
@@ -306,8 +211,8 @@
struct sockbuf *sb = &so->so_rcv;
NLP_LOG(LOG_DEBUG, nlp,
"socket RX overflowed, %lu messages (%lu bytes) dropped. "
- "bytes: [%u/%u] mbufs: [%u/%u]", dropped_messages, dropped_bytes,
- sb->sb_ccc, sb->sb_hiwat, sb->sb_mbcnt, sb->sb_mbmax);
+ "bytes: [%u/%u]", dropped_messages, dropped_bytes,
+ sb->sb_ccc, sb->sb_hiwat);
/* TODO: send netlink message */
}
@@ -325,95 +230,67 @@
CURVNET_RESTORE();
}
-static __noinline void
-queue_push_tx(struct nlpcb *nlp, struct mbuf *m)
-{
- queue_push(&nlp->tx_queue, m);
- nlp->nl_tx_blocked = true;
-
- if (nlp->tx_queue.length > nlp->tx_queue.hiwat)
- nlp->tx_queue.hiwat = nlp->tx_queue.length;
-}
-
/*
- * Tries to send @m to the socket @nlp.
- *
- * @m: mbuf(s) to send to. Consumed in any case.
- * @nlp: socket to send to
- * @cnt: number of messages in @m
- * @io_flags: combination of NL_IOF_* flags
+ * Tries to send current data buffer from writer.
*
* Returns true on success.
* If no queue overrunes happened, wakes up socket owner.
*/
bool
-nl_send_one(struct mbuf *m, struct nlpcb *nlp, int num_messages, int io_flags)
+nl_send_one(struct nl_writer *nw)
{
- bool untranslated = io_flags & NL_IOF_UNTRANSLATED;
- bool ignore_limits = io_flags & NL_IOF_IGNORE_LIMIT;
- bool result = true;
+ struct nlpcb *nlp = nw->nlp;
+ struct socket *so = nlp->nl_socket;
+ struct sockbuf *sb = &so->so_rcv;
+ struct nl_buf *nb;
+
+ MPASS(nw->hdr == NULL);
IF_DEBUG_LEVEL(LOG_DEBUG2) {
- struct nlmsghdr *hdr = mtod(m, struct nlmsghdr *);
+ struct nlmsghdr *hdr = (struct nlmsghdr *)nw->buf->data;
NLP_LOG(LOG_DEBUG2, nlp,
- "TX mbuf len %u msgs %u msg type %d first hdrlen %u io_flags %X",
- m_length(m, NULL), num_messages, hdr->nlmsg_type, hdr->nlmsg_len,
- io_flags);
+ "TX len %u msgs %u msg type %d first hdrlen %u",
+ nw->buf->datalen, nw->num_messages, hdr->nlmsg_type,
+ hdr->nlmsg_len);
}
- if (__predict_false(nlp->nl_linux && linux_netlink_p != NULL && untranslated)) {
- m = linux_netlink_p->mbufs_to_linux(nlp->nl_proto, m, nlp);
- if (m == NULL)
- return (false);
+ if (nlp->nl_linux && linux_netlink_p != NULL &&
+ __predict_false(!linux_netlink_p->msgs_to_linux(nw, nlp))) {
+ nl_buf_free(nw->buf);
+ nw->buf = NULL;
+ return (false);
}
- NLP_LOCK(nlp);
+ nb = nw->buf;
+ nw->buf = NULL;
- if (__predict_false(nlp->nl_socket == NULL)) {
+ SOCK_RECVBUF_LOCK(so);
+ if (!nw->ignore_limit && __predict_false(sb->sb_hiwat <= sb->sb_ccc)) {
+ SOCK_RECVBUF_UNLOCK(so);
+ NLP_LOCK(nlp);
+ nlp->nl_dropped_bytes += nb->datalen;
+ nlp->nl_dropped_messages += nw->num_messages;
+ NLP_LOG(LOG_DEBUG2, nlp, "RX oveflow: %lu m (+%d), %lu b (+%d)",
+ (unsigned long)nlp->nl_dropped_messages, nw->num_messages,
+ (unsigned long)nlp->nl_dropped_bytes, nb->datalen);
NLP_UNLOCK(nlp);
- m_freem(m);
+ nl_buf_free(nb);
return (false);
- }
-
- if (!queue_empty(&nlp->tx_queue)) {
- if (ignore_limits) {
- queue_push_tx(nlp, m);
- } else {
- m_free(m);
- result = false;
- }
- NLP_UNLOCK(nlp);
- return (result);
- }
-
- struct socket *so = nlp->nl_socket;
- struct mbuf *ctl = NULL;
- if (__predict_false(m->m_next != NULL))
- ctl = extract_msg_info(m);
- if (sbappendaddr(&so->so_rcv, nl_empty_src, m, ctl) != 0) {
- sorwakeup(so);
- NLP_LOG(LOG_DEBUG3, nlp, "appended data & woken up");
} else {
- if (ignore_limits) {
- queue_push_tx(nlp, m);
- } else {
- /*
- * Store dropped data so it can be reported
- * on the next read
- */
- nlp->nl_dropped_bytes += m_length(m, NULL);
- nlp->nl_dropped_messages += num_messages;
- NLP_LOG(LOG_DEBUG2, nlp, "RX oveflow: %lu m (+%d), %lu b (+%d)",
- (unsigned long)nlp->nl_dropped_messages, num_messages,
- (unsigned long)nlp->nl_dropped_bytes, m_length(m, NULL));
- soroverflow(so);
- m_freem(m);
- result = false;
+ bool full;
+
+ TAILQ_INSERT_TAIL(&sb->nl_queue, nb, tailq);
+ sb->sb_acc += nb->datalen;
+ sb->sb_ccc += nb->datalen;
+ full = sb->sb_hiwat <= sb->sb_ccc;
+ sorwakeup_locked(so);
+ if (full) {
+ NLP_LOCK(nlp);
+ nlp->nl_tx_blocked = true;
+ NLP_UNLOCK(nlp);
}
+ return (true);
}
- NLP_UNLOCK(nlp);
-
- return (result);
}
static int
diff --git a/sys/netlink/netlink_linux.h b/sys/netlink/netlink_linux.h
--- a/sys/netlink/netlink_linux.h
+++ b/sys/netlink/netlink_linux.h
@@ -27,6 +27,7 @@
#ifndef _NETLINK_LINUX_VAR_H_
#define _NETLINK_LINUX_VAR_H_
+#ifdef _KERNEL
/*
* The file contains headers for the bridge interface between
@@ -34,16 +35,13 @@
*/
struct nlpcb;
struct nl_pstate;
+struct nl_writer;
-typedef struct mbuf *mbufs_to_linux_cb_t(int netlink_family, struct mbuf *m,
- struct nlpcb *nlp);
-typedef struct mbuf *msgs_to_linux_cb_t(int netlink_family, char *buf, int data_length,
- struct nlpcb *nlp);
+typedef bool msgs_to_linux_cb_t(struct nl_writer *nw, struct nlpcb *nlp);
typedef struct nlmsghdr *msg_from_linux_cb_t(int netlink_family, struct nlmsghdr *hdr,
struct nl_pstate *npt);
struct linux_netlink_provider {
- mbufs_to_linux_cb_t *mbufs_to_linux;
msgs_to_linux_cb_t *msgs_to_linux;
msg_from_linux_cb_t *msg_from_linux;
@@ -52,3 +50,4 @@
extern struct linux_netlink_provider *linux_netlink_p;
#endif
+#endif
diff --git a/sys/netlink/netlink_message_writer.h b/sys/netlink/netlink_message_writer.h
--- a/sys/netlink/netlink_message_writer.h
+++ b/sys/netlink/netlink_message_writer.h
@@ -37,60 +37,41 @@
* It is not meant to be included directly
*/
-struct mbuf;
+struct nl_buf;
struct nl_writer;
-typedef bool nl_writer_cb(struct nl_writer *nw, void *buf, int buflen, int cnt);
+typedef bool nl_writer_cb(struct nl_writer *nw);
struct nl_writer {
- int alloc_len; /* allocated buffer length */
- int offset; /* offset from the start of the buffer */
- struct nlmsghdr *hdr; /* Pointer to the currently-filled msg */
- char *data; /* pointer to the contiguous storage */
- void *_storage; /* Underlying storage pointer */
- nl_writer_cb *cb; /* Callback to flush data */
+ struct nl_buf *buf; /* Underlying storage pointer */
+ struct nlmsghdr *hdr; /* Pointer to the currently-filled msg */
+ nl_writer_cb *cb; /* Callback to flush data */
union {
- void *ptr;
+ struct nlpcb *nlp;
struct {
uint16_t proto;
uint16_t id;
} group;
- } arg;
- int num_messages; /* Number of messages in the buffer */
- int malloc_flag; /* M_WAITOK or M_NOWAIT */
- uint8_t writer_type; /* NS_WRITER_TYPE_* */
- uint8_t writer_target; /* NS_WRITER_TARGET_* */
- bool ignore_limit; /* If true, ignores RCVBUF limit */
- bool enomem; /* True if ENOMEM occured */
- bool suppress_ack; /* If true, don't send NLMSG_ERR */
+ };
+ u_int num_messages; /* Number of messages in the buffer */
+ int malloc_flag; /* M_WAITOK or M_NOWAIT */
+ bool ignore_limit; /* If true, ignores RCVBUF limit */
+ bool enomem; /* True if ENOMEM occured */
+ bool suppress_ack; /* If true, don't send NLMSG_ERR */
};
-#define NS_WRITER_TARGET_SOCKET 0
-#define NS_WRITER_TARGET_GROUP 1
-#define NS_WRITER_TARGET_CHAIN 2
-
-#define NS_WRITER_TYPE_MBUF 0
-#define NS_WRITER_TYPE_BUF 1
-#define NS_WRITER_TYPE_LBUF 2
-#define NS_WRITER_TYPE_MBUFC 3
-#define NS_WRITER_TYPE_STUB 4
-
#define NLMSG_SMALL 128
#define NLMSG_LARGE 2048
/* Message and attribute writing */
-
-struct nlpcb;
-
#if defined(NETLINK) || defined(NETLINK_MODULE)
/* Provide optimized calls to the functions inside the same linking unit */
bool _nlmsg_get_unicast_writer(struct nl_writer *nw, int expected_size, struct nlpcb *nlp);
bool _nlmsg_get_group_writer(struct nl_writer *nw, int expected_size, int proto, int group_id);
-bool _nlmsg_get_chain_writer(struct nl_writer *nw, int expected_size, struct mbuf **pm);
bool _nlmsg_flush(struct nl_writer *nw);
void _nlmsg_ignore_limit(struct nl_writer *nw);
-bool _nlmsg_refill_buffer(struct nl_writer *nw, int required_size);
+bool _nlmsg_refill_buffer(struct nl_writer *nw, u_int required_len);
bool _nlmsg_add(struct nl_writer *nw, uint32_t portid, uint32_t seq, uint16_t type,
uint16_t flags, uint32_t len);
bool _nlmsg_end(struct nl_writer *nw);
@@ -111,12 +92,6 @@
return (_nlmsg_get_group_writer(nw, expected_size, proto, group_id));
}
-static inline bool
-nlmsg_get_chain_writer(struct nl_writer *nw, int expected_size, struct mbuf **pm)
-{
- return (_nlmsg_get_chain_writer(nw, expected_size, pm));
-}
-
static inline bool
nlmsg_flush(struct nl_writer *nw)
{
@@ -186,8 +161,6 @@
hdr->nlmsg_flags, payload_len));
}
-#define nlmsg_data(_hdr) ((void *)((_hdr) + 1))
-
/*
* KPI similar to mtodo():
* current (uncompleted) header is guaranteed to be contiguous,
diff --git a/sys/netlink/netlink_message_writer.c b/sys/netlink/netlink_message_writer.c
--- a/sys/netlink/netlink_message_writer.c
+++ b/sys/netlink/netlink_message_writer.c
@@ -30,7 +30,6 @@
#include <sys/lock.h>
#include <sys/rmlock.h>
#include <sys/mbuf.h>
-#include <sys/ck.h>
#include <sys/socket.h>
#include <sys/socketvar.h>
#include <sys/syslog.h>
@@ -45,523 +44,44 @@
#include <netlink/netlink_debug.h>
_DECLARE_DEBUG(LOG_INFO);
-/*
- * The goal of this file is to provide convenient message writing KPI on top of
- * different storage methods (mbufs, uio, temporary memory chunks).
- *
- * The main KPI guarantee is that the (last) message always resides in the contiguous
- * memory buffer, so one is able to update the header after writing the entire message.
- *
- * This guarantee comes with a side effect of potentially reallocating underlying
- * buffer, so one needs to update the desired pointers after something is added
- * to the header.
- *
- * Messaging layer contains hooks performing transparent Linux translation for the messages.
- *
- * There are 3 types of supported targets:
- * * socket (adds mbufs to the socket buffer, used for message replies)
- * * group (sends mbuf/chain to the specified groups, used for the notifications)
- * * chain (returns mbuf chain, used in Linux message translation code)
- *
- * There are 3 types of storage:
- * * NS_WRITER_TYPE_MBUF (mbuf-based, most efficient, used when a single message
- * fits in NLMBUFSIZE)
- * * NS_WRITER_TYPE_BUF (fallback, malloc-based, used when a single message needs
- * to be larger than one supported by NS_WRITER_TYPE_MBUF)
- * * NS_WRITER_TYPE_LBUF (malloc-based, similar to NS_WRITER_TYPE_BUF, used for
- * Linux sockets, calls translation hook prior to sending messages to the socket).
- *
- * Internally, KPI switches between different types of storage when memory requirements
- * change. It happens transparently to the caller.
- */
-
-/*
- * Uma zone for the mbuf-based Netlink storage
- */
-static uma_zone_t nlmsg_zone;
-
-static void
-nl_free_mbuf_storage(struct mbuf *m)
-{
- uma_zfree(nlmsg_zone, m->m_ext.ext_buf);
-}
-
-static int
-nl_setup_mbuf_storage(void *mem, int size, void *arg, int how __unused)
-{
- struct mbuf *m = (struct mbuf *)arg;
-
- if (m != NULL)
- m_extadd(m, mem, size, nl_free_mbuf_storage, NULL, NULL, 0, EXT_MOD_TYPE);
-
- return (0);
-}
-
-static struct mbuf *
-nl_get_mbuf_flags(int size, int malloc_flags, int mbuf_flags)
-{
- struct mbuf *m, *m_storage;
-
- if (size <= MHLEN)
- return (m_get2(size, malloc_flags, MT_DATA, mbuf_flags));
-
- if (__predict_false(size > NLMBUFSIZE))
- return (NULL);
-
- m = m_gethdr(malloc_flags, MT_DATA);
- if (m == NULL)
- return (NULL);
-
- m_storage = uma_zalloc_arg(nlmsg_zone, m, malloc_flags);
- if (m_storage == NULL) {
- m_free_raw(m);
- return (NULL);
- }
-
- return (m);
-}
-
-static struct mbuf *
-nl_get_mbuf(int size, int malloc_flags)
-{
- return (nl_get_mbuf_flags(size, malloc_flags, M_PKTHDR));
-}
-
-/*
- * Gets a chain of Netlink mbufs.
- * This is strip-down version of m_getm2()
- */
-static struct mbuf *
-nl_get_mbuf_chain(int len, int malloc_flags)
-{
- struct mbuf *m_chain = NULL, *m_tail = NULL;
- int mbuf_flags = M_PKTHDR;
-
- while (len > 0) {
- int sz = len > NLMBUFSIZE ? NLMBUFSIZE: len;
- struct mbuf *m = nl_get_mbuf_flags(sz, malloc_flags, mbuf_flags);
-
- if (m == NULL) {
- m_freem(m_chain);
- return (NULL);
- }
-
- /* Book keeping. */
- len -= M_SIZE(m);
- if (m_tail != NULL)
- m_tail->m_next = m;
- else
- m_chain = m;
- m_tail = m;
- mbuf_flags &= ~M_PKTHDR; /* Only valid on the first mbuf. */
- }
-
- return (m_chain);
-}
-
-void
-nl_init_msg_zone(void)
-{
- nlmsg_zone = uma_zcreate("netlink", NLMBUFSIZE, nl_setup_mbuf_storage,
- NULL, NULL, NULL, UMA_ALIGN_PTR, 0);
-}
-
-void
-nl_destroy_msg_zone(void)
-{
- uma_zdestroy(nlmsg_zone);
-}
-
-
-typedef bool nlwriter_op_init(struct nl_writer *nw, int size, bool waitok);
-typedef bool nlwriter_op_write(struct nl_writer *nw, void *buf, int buflen, int cnt);
-
-struct nlwriter_ops {
- nlwriter_op_init *init;
- nlwriter_op_write *write_socket;
- nlwriter_op_write *write_group;
- nlwriter_op_write *write_chain;
-};
-
-/*
- * NS_WRITER_TYPE_BUF
- * Writes message to a temporary memory buffer,
- * flushing to the socket/group when buffer size limit is reached
- */
-static bool
-nlmsg_get_ns_buf(struct nl_writer *nw, int size, bool waitok)
-{
- int mflag = waitok ? M_WAITOK : M_NOWAIT;
- nw->_storage = malloc(size, M_NETLINK, mflag | M_ZERO);
- if (__predict_false(nw->_storage == NULL))
- return (false);
- nw->alloc_len = size;
- nw->offset = 0;
- nw->hdr = NULL;
- nw->data = nw->_storage;
- nw->writer_type = NS_WRITER_TYPE_BUF;
- nw->malloc_flag = mflag;
- nw->num_messages = 0;
- nw->enomem = false;
- return (true);
-}
-
static bool
-nlmsg_write_socket_buf(struct nl_writer *nw, void *buf, int datalen, int cnt)
+nlmsg_get_buf(struct nl_writer *nw, u_int len, bool waitok)
{
- NL_LOG(LOG_DEBUG2, "IN: ptr: %p len: %d arg: %p", buf, datalen, nw->arg.ptr);
- if (__predict_false(datalen == 0)) {
- free(buf, M_NETLINK);
- return (true);
- }
+ const int mflag = waitok ? M_WAITOK : M_NOWAIT;
- struct mbuf *m = nl_get_mbuf_chain(datalen, nw->malloc_flag);
- if (__predict_false(m == NULL)) {
- /* XXX: should we set sorcverr? */
- free(buf, M_NETLINK);
- return (false);
- }
- m_append(m, datalen, buf);
- free(buf, M_NETLINK);
+ MPASS(nw->buf == NULL);
- int io_flags = (nw->ignore_limit) ? NL_IOF_IGNORE_LIMIT : 0;
- return (nl_send_one(m, (struct nlpcb *)(nw->arg.ptr), cnt, io_flags));
-}
-
-static bool
-nlmsg_write_group_buf(struct nl_writer *nw, void *buf, int datalen, int cnt)
-{
- NL_LOG(LOG_DEBUG2, "IN: ptr: %p len: %d proto: %d id: %d", buf, datalen,
- nw->arg.group.proto, nw->arg.group.id);
- if (__predict_false(datalen == 0)) {
- free(buf, M_NETLINK);
- return (true);
- }
+ NL_LOG(LOG_DEBUG3, "Setting up nw %p len %u %s", nw, len,
+ waitok ? "wait" : "nowait");
- struct mbuf *m = nl_get_mbuf_chain(datalen, nw->malloc_flag);
- if (__predict_false(m == NULL)) {
- free(buf, M_NETLINK);
+ nw->buf = nl_buf_alloc(len, mflag);
+ if (__predict_false(nw->buf == NULL))
return (false);
- }
- bool success = m_append(m, datalen, buf) != 0;
- free(buf, M_NETLINK);
-
- if (!success)
- return (false);
-
- nl_send_group(m, cnt, nw->arg.group.proto, nw->arg.group.id);
- return (true);
-}
-
-static bool
-nlmsg_write_chain_buf(struct nl_writer *nw, void *buf, int datalen, int cnt)
-{
- struct mbuf **m0 = (struct mbuf **)(nw->arg.ptr);
- NL_LOG(LOG_DEBUG2, "IN: ptr: %p len: %d arg: %p", buf, datalen, nw->arg.ptr);
-
- if (__predict_false(datalen == 0)) {
- free(buf, M_NETLINK);
- return (true);
- }
-
- if (*m0 == NULL) {
- struct mbuf *m = nl_get_mbuf_chain(datalen, nw->malloc_flag);
-
- if (__predict_false(m == NULL)) {
- free(buf, M_NETLINK);
- return (false);
- }
- *m0 = m;
- }
- if (__predict_false(m_append(*m0, datalen, buf) == 0)) {
- free(buf, M_NETLINK);
- return (false);
- }
- return (true);
-}
-
-
-/*
- * NS_WRITER_TYPE_MBUF
- * Writes message to the allocated mbuf,
- * flushing to socket/group when mbuf size limit is reached.
- * This is the most efficient mechanism as it avoids double-copying.
- *
- * Allocates a single mbuf suitable to store up to @size bytes of data.
- * If size < MHLEN (around 160 bytes), allocates mbuf with pkghdr.
- * If the size <= NLMBUFSIZE (2k), allocate mbuf+storage out of nlmsg_zone.
- * Returns NULL on greater size or the allocation failure.
- */
-static bool
-nlmsg_get_ns_mbuf(struct nl_writer *nw, int size, bool waitok)
-{
- int mflag = waitok ? M_WAITOK : M_NOWAIT;
- struct mbuf *m = nl_get_mbuf(size, mflag);
-
- if (__predict_false(m == NULL))
- return (false);
- nw->alloc_len = M_TRAILINGSPACE(m);
- nw->offset = 0;
nw->hdr = NULL;
- nw->_storage = (void *)m;
- nw->data = mtod(m, void *);
- nw->writer_type = NS_WRITER_TYPE_MBUF;
nw->malloc_flag = mflag;
nw->num_messages = 0;
nw->enomem = false;
- memset(nw->data, 0, size);
- NL_LOG(LOG_DEBUG2, "alloc mbuf %p req_len %d alloc_len %d data_ptr %p",
- m, size, nw->alloc_len, nw->data);
- return (true);
-}
-
-static bool
-nlmsg_write_socket_mbuf(struct nl_writer *nw, void *buf, int datalen, int cnt)
-{
- struct mbuf *m = (struct mbuf *)buf;
- NL_LOG(LOG_DEBUG2, "IN: ptr: %p len: %d arg: %p", buf, datalen, nw->arg.ptr);
-
- if (__predict_false(datalen == 0)) {
- m_freem(m);
- return (true);
- }
-
- m->m_pkthdr.len = datalen;
- m->m_len = datalen;
- int io_flags = (nw->ignore_limit) ? NL_IOF_IGNORE_LIMIT : 0;
- return (nl_send_one(m, (struct nlpcb *)(nw->arg.ptr), cnt, io_flags));
-}
-
-static bool
-nlmsg_write_group_mbuf(struct nl_writer *nw, void *buf, int datalen, int cnt)
-{
- struct mbuf *m = (struct mbuf *)buf;
- NL_LOG(LOG_DEBUG2, "IN: ptr: %p len: %d proto: %d id: %d", buf, datalen,
- nw->arg.group.proto, nw->arg.group.id);
-
- if (__predict_false(datalen == 0)) {
- m_freem(m);
- return (true);
- }
- m->m_pkthdr.len = datalen;
- m->m_len = datalen;
- nl_send_group(m, cnt, nw->arg.group.proto, nw->arg.group.id);
return (true);
}
-static bool
-nlmsg_write_chain_mbuf(struct nl_writer *nw, void *buf, int datalen, int cnt)
-{
- struct mbuf *m_new = (struct mbuf *)buf;
- struct mbuf **m0 = (struct mbuf **)(nw->arg.ptr);
-
- NL_LOG(LOG_DEBUG2, "IN: ptr: %p len: %d arg: %p", buf, datalen, nw->arg.ptr);
-
- if (__predict_false(datalen == 0)) {
- m_freem(m_new);
- return (true);
- }
-
- m_new->m_pkthdr.len = datalen;
- m_new->m_len = datalen;
-
- if (*m0 == NULL) {
- *m0 = m_new;
- } else {
- struct mbuf *m_last;
- for (m_last = *m0; m_last->m_next != NULL; m_last = m_last->m_next)
- ;
- m_last->m_next = m_new;
- (*m0)->m_pkthdr.len += datalen;
- }
-
- return (true);
-}
-
-/*
- * NS_WRITER_TYPE_LBUF
- * Writes message to the allocated memory buffer,
- * flushing to socket/group when mbuf size limit is reached.
- * Calls linux handler to rewrite messages before sending to the socket.
- */
-static bool
-nlmsg_get_ns_lbuf(struct nl_writer *nw, int size, bool waitok)
-{
- int mflag = waitok ? M_WAITOK : M_NOWAIT;
- size = roundup2(size, sizeof(void *));
- int add_size = sizeof(struct linear_buffer) + SCRATCH_BUFFER_SIZE;
- char *buf = malloc(add_size + size * 2, M_NETLINK, mflag | M_ZERO);
- if (__predict_false(buf == NULL))
- return (false);
-
- /* Fill buffer header first */
- struct linear_buffer *lb = (struct linear_buffer *)buf;
- lb->base = &buf[sizeof(struct linear_buffer) + size];
- lb->size = size + SCRATCH_BUFFER_SIZE;
-
- nw->alloc_len = size;
- nw->offset = 0;
- nw->hdr = NULL;
- nw->_storage = buf;
- nw->data = (char *)(lb + 1);
- nw->malloc_flag = mflag;
- nw->writer_type = NS_WRITER_TYPE_LBUF;
- nw->num_messages = 0;
- nw->enomem = false;
- return (true);
-}
-
-static bool
-nlmsg_write_socket_lbuf(struct nl_writer *nw, void *buf, int datalen, int cnt)
-{
- struct linear_buffer *lb = (struct linear_buffer *)buf;
- char *data = (char *)(lb + 1);
- struct nlpcb *nlp = (struct nlpcb *)(nw->arg.ptr);
-
- if (__predict_false(datalen == 0)) {
- free(buf, M_NETLINK);
- return (true);
- }
-
- struct mbuf *m = NULL;
- if (linux_netlink_p != NULL)
- m = linux_netlink_p->msgs_to_linux(nlp->nl_proto, data, datalen, nlp);
- free(buf, M_NETLINK);
-
- if (__predict_false(m == NULL)) {
- /* XXX: should we set sorcverr? */
- return (false);
- }
-
- int io_flags = (nw->ignore_limit) ? NL_IOF_IGNORE_LIMIT : 0;
- return (nl_send_one(m, nlp, cnt, io_flags));
-}
-
-/* Shouldn't be called (maybe except Linux code originating message) */
-static bool
-nlmsg_write_group_lbuf(struct nl_writer *nw, void *buf, int datalen, int cnt)
-{
- struct linear_buffer *lb = (struct linear_buffer *)buf;
- char *data = (char *)(lb + 1);
-
- if (__predict_false(datalen == 0)) {
- free(buf, M_NETLINK);
- return (true);
- }
-
- struct mbuf *m = nl_get_mbuf_chain(datalen, nw->malloc_flag);
- if (__predict_false(m == NULL)) {
- free(buf, M_NETLINK);
- return (false);
- }
- m_append(m, datalen, data);
- free(buf, M_NETLINK);
-
- nl_send_group(m, cnt, nw->arg.group.proto, nw->arg.group.id);
- return (true);
-}
-
-static const struct nlwriter_ops nlmsg_writers[] = {
- /* NS_WRITER_TYPE_MBUF */
- {
- .init = nlmsg_get_ns_mbuf,
- .write_socket = nlmsg_write_socket_mbuf,
- .write_group = nlmsg_write_group_mbuf,
- .write_chain = nlmsg_write_chain_mbuf,
- },
- /* NS_WRITER_TYPE_BUF */
- {
- .init = nlmsg_get_ns_buf,
- .write_socket = nlmsg_write_socket_buf,
- .write_group = nlmsg_write_group_buf,
- .write_chain = nlmsg_write_chain_buf,
- },
- /* NS_WRITER_TYPE_LBUF */
- {
- .init = nlmsg_get_ns_lbuf,
- .write_socket = nlmsg_write_socket_lbuf,
- .write_group = nlmsg_write_group_lbuf,
- },
-};
-
-static void
-nlmsg_set_callback(struct nl_writer *nw)
-{
- const struct nlwriter_ops *pops = &nlmsg_writers[nw->writer_type];
-
- switch (nw->writer_target) {
- case NS_WRITER_TARGET_SOCKET:
- nw->cb = pops->write_socket;
- break;
- case NS_WRITER_TARGET_GROUP:
- nw->cb = pops->write_group;
- break;
- case NS_WRITER_TARGET_CHAIN:
- nw->cb = pops->write_chain;
- break;
- default:
- panic("not implemented");
- }
-}
-
-static bool
-nlmsg_get_buf_type(struct nl_writer *nw, int size, int type, bool waitok)
-{
- MPASS(type + 1 <= sizeof(nlmsg_writers) / sizeof(nlmsg_writers[0]));
- NL_LOG(LOG_DEBUG3, "Setting up nw %p size %d type %d", nw, size, type);
- return (nlmsg_writers[type].init(nw, size, waitok));
-}
-
-static bool
-nlmsg_get_buf(struct nl_writer *nw, int size, bool waitok, bool is_linux)
-{
- int type;
-
- if (!is_linux) {
- if (__predict_true(size <= NLMBUFSIZE))
- type = NS_WRITER_TYPE_MBUF;
- else
- type = NS_WRITER_TYPE_BUF;
- } else
- type = NS_WRITER_TYPE_LBUF;
- return (nlmsg_get_buf_type(nw, size, type, waitok));
-}
-
bool
_nlmsg_get_unicast_writer(struct nl_writer *nw, int size, struct nlpcb *nlp)
{
- if (!nlmsg_get_buf(nw, size, false, nlp->nl_linux))
- return (false);
- nw->arg.ptr = (void *)nlp;
- nw->writer_target = NS_WRITER_TARGET_SOCKET;
- nlmsg_set_callback(nw);
- return (true);
+ nw->nlp = nlp;
+ nw->cb = nl_send_one;
+
+ return (nlmsg_get_buf(nw, size, false));
}
bool
_nlmsg_get_group_writer(struct nl_writer *nw, int size, int protocol, int group_id)
{
- if (!nlmsg_get_buf(nw, size, false, false))
- return (false);
- nw->arg.group.proto = protocol;
- nw->arg.group.id = group_id;
- nw->writer_target = NS_WRITER_TARGET_GROUP;
- nlmsg_set_callback(nw);
- return (true);
-}
+ nw->group.proto = protocol;
+ nw->group.id = group_id;
+ nw->cb = nl_send_group;
-bool
-_nlmsg_get_chain_writer(struct nl_writer *nw, int size, struct mbuf **pm)
-{
- if (!nlmsg_get_buf(nw, size, false, false))
- return (false);
- *pm = NULL;
- nw->arg.ptr = (void *)pm;
- nw->writer_target = NS_WRITER_TARGET_CHAIN;
- nlmsg_set_callback(nw);
- NL_LOG(LOG_DEBUG3, "setup cb %p (need %p)", nw->cb, &nlmsg_write_chain_mbuf);
- return (true);
+ return (nlmsg_get_buf(nw, size, false));
}
void
@@ -576,18 +96,18 @@
if (__predict_false(nw->hdr != NULL)) {
/* Last message has not been completed, skip it. */
- int completed_len = (char *)nw->hdr - nw->data;
+ int completed_len = (char *)nw->hdr - nw->buf->data;
/* Send completed messages */
- nw->offset -= nw->offset - completed_len;
+ nw->buf->datalen -= nw->buf->datalen - completed_len;
nw->hdr = NULL;
- }
+ }
NL_LOG(LOG_DEBUG2, "OUT");
- bool result = nw->cb(nw, nw->_storage, nw->offset, nw->num_messages);
- nw->_storage = NULL;
+ bool result = nw->cb(nw);
+ nw->num_messages = 0;
if (!result) {
- NL_LOG(LOG_DEBUG, "nw %p offset %d: flush with %p() failed", nw, nw->offset, nw->cb);
+ NL_LOG(LOG_DEBUG, "nw %p flush with %p() failed", nw, nw->cb);
}
return (result);
@@ -599,59 +119,61 @@
* Return true on success.
*/
bool
-_nlmsg_refill_buffer(struct nl_writer *nw, int required_len)
+_nlmsg_refill_buffer(struct nl_writer *nw, u_int required_len)
{
- struct nl_writer ns_new = {};
- int completed_len, new_len;
+ struct nl_buf *new;
+ u_int completed_len, new_len, last_len;
+
+ MPASS(nw->buf != NULL);
if (nw->enomem)
return (false);
- NL_LOG(LOG_DEBUG3, "no space at offset %d/%d (want %d), trying to reclaim",
- nw->offset, nw->alloc_len, required_len);
+ NL_LOG(LOG_DEBUG3, "no space at offset %u/%u (want %u), trying to "
+ "reclaim", nw->buf->datalen, nw->buf->buflen, required_len);
- /* Calculated new buffer size and allocate it s*/
- completed_len = (nw->hdr != NULL) ? (char *)nw->hdr - nw->data : nw->offset;
+ /* Calculate new buffer size and allocate it. */
+ completed_len = (nw->hdr != NULL) ?
+ (char *)nw->hdr - nw->buf->data : nw->buf->datalen;
if (completed_len > 0 && required_len < NLMBUFSIZE) {
- /* We already ran out of space, use the largest effective size */
- new_len = max(nw->alloc_len, NLMBUFSIZE);
+ /* We already ran out of space, use largest effective size. */
+ new_len = max(nw->buf->buflen, NLMBUFSIZE);
} else {
- if (nw->alloc_len < NLMBUFSIZE)
+ if (nw->buf->buflen < NLMBUFSIZE)
+ /* XXXGL: does this happen? */
new_len = NLMBUFSIZE;
else
- new_len = nw->alloc_len * 2;
+ new_len = nw->buf->buflen * 2;
while (new_len < required_len)
new_len *= 2;
}
- bool waitok = (nw->malloc_flag == M_WAITOK);
- bool is_linux = (nw->writer_type == NS_WRITER_TYPE_LBUF);
- if (!nlmsg_get_buf(&ns_new, new_len, waitok, is_linux)) {
+
+ new = nl_buf_alloc(new_len, nw->malloc_flag | M_ZERO);
+ if (__predict_false(new == NULL)) {
nw->enomem = true;
NL_LOG(LOG_DEBUG, "getting new buf failed, setting ENOMEM");
return (false);
}
- if (nw->ignore_limit)
- nlmsg_ignore_limit(&ns_new);
- /* Update callback data */
- ns_new.writer_target = nw->writer_target;
- nlmsg_set_callback(&ns_new);
- ns_new.arg = nw->arg;
-
- /* Copy last (unfinished) header to the new storage */
- int last_len = nw->offset - completed_len;
+ /* Copy last (unfinished) header to the new storage. */
+ last_len = nw->buf->datalen - completed_len;
if (last_len > 0) {
- memcpy(ns_new.data, nw->hdr, last_len);
- ns_new.hdr = (struct nlmsghdr *)ns_new.data;
- ns_new.offset = last_len;
+ memcpy(new->data, nw->hdr, last_len);
+ new->datalen = last_len;
}
- NL_LOG(LOG_DEBUG2, "completed: %d bytes, copied: %d bytes", completed_len, last_len);
+ NL_LOG(LOG_DEBUG2, "completed: %u bytes, copied: %u bytes",
+ completed_len, last_len);
- /* Flush completed headers & switch to the new nw */
- nlmsg_flush(nw);
- memcpy(nw, &ns_new, sizeof(struct nl_writer));
- NL_LOG(LOG_DEBUG2, "switched buffer: used %d/%d bytes", nw->offset, nw->alloc_len);
+ if (completed_len > 0) {
+ nlmsg_flush(nw);
+ MPASS(nw->buf == NULL);
+ } else
+ nl_buf_free(nw->buf);
+ nw->buf = new;
+ nw->hdr = (last_len > 0) ? (struct nlmsghdr *)new->data : NULL;
+ NL_LOG(LOG_DEBUG2, "switched buffer: used %u/%u bytes",
+ new->datalen, new->buflen);
return (true);
}
@@ -660,17 +182,20 @@
_nlmsg_add(struct nl_writer *nw, uint32_t portid, uint32_t seq, uint16_t type,
uint16_t flags, uint32_t len)
{
+ struct nl_buf *nb = nw->buf;
struct nlmsghdr *hdr;
+ u_int required_len;
MPASS(nw->hdr == NULL);
- int required_len = NETLINK_ALIGN(len + sizeof(struct nlmsghdr));
- if (__predict_false(nw->offset + required_len > nw->alloc_len)) {
+ required_len = NETLINK_ALIGN(len + sizeof(struct nlmsghdr));
+ if (__predict_false(nb->datalen + required_len > nb->buflen)) {
if (!nlmsg_refill_buffer(nw, required_len))
return (false);
+ nb = nw->buf;
}
- hdr = (struct nlmsghdr *)(&nw->data[nw->offset]);
+ hdr = (struct nlmsghdr *)(&nb->data[nb->datalen]);
hdr->nlmsg_len = len;
hdr->nlmsg_type = type;
@@ -679,7 +204,7 @@
hdr->nlmsg_pid = portid;
nw->hdr = hdr;
- nw->offset += sizeof(struct nlmsghdr);
+ nb->datalen += sizeof(struct nlmsghdr);
return (true);
}
@@ -687,6 +212,8 @@
bool
_nlmsg_end(struct nl_writer *nw)
{
+ struct nl_buf *nb = nw->buf;
+
MPASS(nw->hdr != NULL);
if (nw->enomem) {
@@ -695,7 +222,7 @@
return (false);
}
- nw->hdr->nlmsg_len = (uint32_t)(nw->data + nw->offset - (char *)nw->hdr);
+ nw->hdr->nlmsg_len = nb->data + nb->datalen - (char *)nw->hdr;
NL_LOG(LOG_DEBUG2, "wrote msg len: %u type: %d: flags: 0x%X seq: %u pid: %u",
nw->hdr->nlmsg_len, nw->hdr->nlmsg_type, nw->hdr->nlmsg_flags,
nw->hdr->nlmsg_seq, nw->hdr->nlmsg_pid);
@@ -707,8 +234,10 @@
void
_nlmsg_abort(struct nl_writer *nw)
{
+ struct nl_buf *nb = nw->buf;
+
if (nw->hdr != NULL) {
- nw->offset = (uint32_t)((char *)nw->hdr - nw->data);
+ nb->datalen = (char *)nw->hdr - nb->data;
nw->hdr = NULL;
}
}
@@ -775,7 +304,7 @@
/* Save operation result */
int *perror = nlmsg_reserve_object(nw, int);
NL_LOG(LOG_DEBUG2, "record error=%d at off %d (%p)", error,
- nw->offset, perror);
+ nw->buf->datalen, perror);
*perror = error;
nlmsg_end(nw);
nw->suppress_ack = true;
@@ -787,40 +316,47 @@
* KPI functions.
*/
-int
+u_int
nlattr_save_offset(const struct nl_writer *nw)
{
- return (nw->offset - ((char *)nw->hdr - nw->data));
+ return (nw->buf->datalen - ((char *)nw->hdr - nw->buf->data));
}
void *
nlmsg_reserve_data_raw(struct nl_writer *nw, size_t sz)
{
- sz = NETLINK_ALIGN(sz);
+ struct nl_buf *nb = nw->buf;
+ void *data;
- if (__predict_false(nw->offset + sz > nw->alloc_len)) {
+ sz = NETLINK_ALIGN(sz);
+ if (__predict_false(nb->datalen + sz > nb->buflen)) {
if (!nlmsg_refill_buffer(nw, sz))
return (NULL);
+ nb = nw->buf;
}
- void *data_ptr = &nw->data[nw->offset];
- nw->offset += sz;
- bzero(data_ptr, sz);
+ data = &nb->data[nb->datalen];
+ bzero(data, sz);
+ nb->datalen += sz;
- return (data_ptr);
+ return (data);
}
bool
nlattr_add(struct nl_writer *nw, int attr_type, int attr_len, const void *data)
{
- int required_len = NLA_ALIGN(attr_len + sizeof(struct nlattr));
+ struct nl_buf *nb = nw->buf;
+ struct nlattr *nla;
+ u_int required_len;
- if (__predict_false(nw->offset + required_len > nw->alloc_len)) {
+ required_len = NLA_ALIGN(attr_len + sizeof(struct nlattr));
+ if (__predict_false(nb->datalen + required_len > nb->buflen)) {
if (!nlmsg_refill_buffer(nw, required_len))
return (false);
+ nb = nw->buf;
}
- struct nlattr *nla = (struct nlattr *)(&nw->data[nw->offset]);
+ nla = (struct nlattr *)(&nb->data[nb->datalen]);
nla->nla_len = attr_len + sizeof(struct nlattr);
nla->nla_type = attr_type;
@@ -831,7 +367,7 @@
}
memcpy((nla + 1), data, attr_len);
}
- nw->offset += required_len;
+ nb->datalen += required_len;
return (true);
}
diff --git a/sys/netlink/netlink_module.c b/sys/netlink/netlink_module.c
--- a/sys/netlink/netlink_module.c
+++ b/sys/netlink/netlink_module.c
@@ -181,7 +181,6 @@
.nlmsg_abort = _nlmsg_abort,
.nlmsg_get_unicast_writer = _nlmsg_get_unicast_writer,
.nlmsg_get_group_writer = _nlmsg_get_group_writer,
- .nlmsg_get_chain_writer = _nlmsg_get_chain_writer,
.nlmsg_end_dump = _nlmsg_end_dump,
.nl_modify_ifp_generic = _nl_modify_ifp_generic,
.nl_store_ifp_cookie = _nl_store_ifp_cookie,
@@ -219,7 +218,6 @@
switch (what) {
case MOD_LOAD:
NL_LOG(LOG_DEBUG2, "Loading");
- nl_init_msg_zone();
nl_osd_register();
#if !defined(NETLINK) && defined(NETLINK_MODULE)
nl_set_functions(&nl_module);
@@ -235,7 +233,6 @@
nl_set_functions(NULL);
#endif
nl_osd_unregister();
- nl_destroy_msg_zone();
} else
ret = EBUSY;
break;
diff --git a/sys/netlink/netlink_var.h b/sys/netlink/netlink_var.h
--- a/sys/netlink/netlink_var.h
+++ b/sys/netlink/netlink_var.h
@@ -43,14 +43,9 @@
struct ucred;
-struct nl_io_queue {
- STAILQ_HEAD(, mbuf) head;
- int length;
- int hiwat;
-};
-
struct nl_buf {
TAILQ_ENTRY(nl_buf) tailq;
+ struct mbuf *control;
u_int buflen;
u_int datalen;
u_int offset;
@@ -72,7 +67,6 @@
bool nl_linux; /* true if running under compat */
bool nl_unconstrained_vnet; /* true if running under VNET jail (or without jail) */
bool nl_need_thread_setup;
- struct nl_io_queue tx_queue;
struct taskqueue *nl_taskqueue;
struct task nl_task;
struct ucred *nl_cred; /* Copy of nl_socket->so_cred */
@@ -131,7 +125,7 @@
extern struct nl_proto_handler *nl_handlers;
/* netlink_domain.c */
-void nl_send_group(struct mbuf *m, int cnt, int proto, int group_id);
+bool nl_send_group(struct nl_writer *);
void nl_osd_register(void);
void nl_osd_unregister(void);
void nl_set_thread_nlp(struct thread *td, struct nlpcb *nlp);
@@ -139,22 +133,18 @@
/* netlink_io.c */
#define NL_IOF_UNTRANSLATED 0x01
#define NL_IOF_IGNORE_LIMIT 0x02
-bool nl_send_one(struct mbuf *m, struct nlpcb *nlp, int cnt, int io_flags);
+bool nl_send_one(struct nl_writer *);
void nlmsg_ack(struct nlpcb *nlp, int error, struct nlmsghdr *nlmsg,
struct nl_pstate *npt);
void nl_on_transmit(struct nlpcb *nlp);
-void nl_init_io(struct nlpcb *nlp);
-void nl_free_io(struct nlpcb *nlp);
void nl_taskqueue_handler(void *_arg, int pending);
void nl_schedule_taskqueue(struct nlpcb *nlp);
void nl_process_receive_locked(struct nlpcb *nlp);
void nl_set_source_metadata(struct mbuf *m, int num_messages);
-void nl_add_msg_info(struct mbuf *m);
-
-/* netlink_message_writer.c */
-void nl_init_msg_zone(void);
-void nl_destroy_msg_zone(void);
+void nl_add_msg_info(struct nl_buf *nb);
+struct nl_buf *nl_buf_alloc(size_t len, int mflag);
+void nl_buf_free(struct nl_buf *nb);
/* netlink_generic.c */
struct genl_family {
diff --git a/sys/netlink/route/rt.c b/sys/netlink/route/rt.c
--- a/sys/netlink/route/rt.c
+++ b/sys/netlink/route/rt.c
@@ -556,9 +556,8 @@
IF_DEBUG_LEVEL(LOG_DEBUG3) {
char rtbuf[INET6_ADDRSTRLEN + 5];
FIB_LOG(LOG_DEBUG3, wa->fibnum, wa->family,
- "Dump %s, offset %u, error %d",
- rt_print_buf(rt, rtbuf, sizeof(rtbuf)),
- wa->nw->offset, error);
+ "Dump %s, error %d",
+ rt_print_buf(rt, rtbuf, sizeof(rtbuf)), error);
}
wa->error = error;
@@ -578,7 +577,6 @@
FIB_LOG(LOG_DEBUG2, fibnum, family, "End dump, iterated %d dumped %d",
wa->count, wa->dumped);
- NL_LOG(LOG_DEBUG2, "Current offset: %d", wa->nw->offset);
}
static int
diff --git a/tests/sys/netlink/test_netlink_message_writer.py b/tests/sys/netlink/test_netlink_message_writer.py
--- a/tests/sys/netlink/test_netlink_message_writer.py
+++ b/tests/sys/netlink/test_netlink_message_writer.py
@@ -4,19 +4,11 @@
from atf_python.ktest import BaseKernelTest
from atf_python.sys.netlink.attrs import NlAttrU32
-
M_NOWAIT = 1
M_WAITOK = 2
-NS_WRITER_TYPE_MBUF = 0
-NS_WRITER_TYPE_BUF = 1
-NS_WRITER_TYPE_LBUF = 1
-
-MHLEN = 160
-MCLBYTES = 2048 # XXX: may differ on some archs?
-MJUMPAGESIZE = mmap.PAGESIZE
-MJUM9BYTES = 9 * 1024
-MJUM16BYTES = 16 * 1024
+NLMSG_SMALL = 128
+NLMSG_LARGE = 2048
class TestNetlinkMessageWriter(BaseKernelTest):
KTEST_MODULE_NAME = "ktest_netlink_message_writer"
@@ -28,52 +20,20 @@
pytest.param(M_WAITOK, id="WAITOK"),
],
)
- @pytest.mark.parametrize(
- "writer_type",
- [
- pytest.param(NS_WRITER_TYPE_MBUF, id="MBUF"),
- pytest.param(NS_WRITER_TYPE_BUF, id="BUF"),
- ],
- )
@pytest.mark.parametrize(
"sz",
[
- pytest.param([160, 160], id="MHLEN"),
- pytest.param([MCLBYTES, MCLBYTES], id="MCLBYTES"),
+ pytest.param([NLMSG_SMALL, NLMSG_SMALL], id="NLMSG_SMALL"),
+ pytest.param([NLMSG_LARGE, NLMSG_LARGE], id="NLMSG_LARGE"),
+ pytest.param([NLMSG_LARGE + 256, NLMSG_LARGE + 256], id="NLMSG_LARGE+256"),
],
)
- def test_mbuf_writer_allocation(self, sz, writer_type, malloc_flags):
+ def test_nlbuf_writer_allocation(self, sz, malloc_flags):
"""override to parametrize"""
test_meta = [
NlAttrU32(1, sz[0]), # size
NlAttrU32(2, sz[1]), # expected_avail
- NlAttrU32(4, writer_type),
- NlAttrU32(5, malloc_flags),
- ]
- self.runtest(test_meta)
-
- @pytest.mark.parametrize(
- "malloc_flags",
- [
- pytest.param(M_NOWAIT, id="NOWAIT"),
- pytest.param(M_WAITOK, id="WAITOK"),
- ],
- )
- @pytest.mark.parametrize(
- "sz",
- [
- pytest.param([160, 160, 1], id="MHLEN"),
- pytest.param([MCLBYTES, MCLBYTES, 1], id="MCLBYTES"),
- pytest.param([MCLBYTES + 1, MCLBYTES + 1, 2], id="MCLBYTES_MHLEN"),
- pytest.param([MCLBYTES + 256, MCLBYTES * 2, 2], id="MCLBYTESx2"),
- ],
- )
- def test_mbuf_chain_allocation(self, sz, malloc_flags):
- test_meta = [
- NlAttrU32(1, sz[0]), # size
- NlAttrU32(2, sz[1]), # expected_avail
- NlAttrU32(3, sz[2]), # expected_count
- NlAttrU32(5, malloc_flags),
+ NlAttrU32(3, malloc_flags),
]
self.runtest(test_meta)

File Metadata

Mime Type
text/plain
Expires
Sat, Jul 4, 3:25 AM (14 h, 40 m)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
34648851
Default Alt Text
D42524.diff (60 KB)

Event Timeline