diff --git a/sys/modules/ktest/Makefile b/sys/modules/ktest/Makefile index 21c94caabc30..151db53417df 100644 --- a/sys/modules/ktest/Makefile +++ b/sys/modules/ktest/Makefile @@ -1,7 +1,8 @@ SYSDIR?=${SRCTOP}/sys .include "${SYSDIR}/conf/kern.opts.mk" SUBDIR= ktest \ - ktest_example + ktest_example \ + ktest_netlink_message_writer .include diff --git a/sys/modules/ktest/ktest_netlink_message_writer/Makefile b/sys/modules/ktest/ktest_netlink_message_writer/Makefile new file mode 100644 index 000000000000..2d14d93897f8 --- /dev/null +++ b/sys/modules/ktest/ktest_netlink_message_writer/Makefile @@ -0,0 +1,15 @@ +# $FreeBSD$ + +PACKAGE= tests + +SYSDIR?=${SRCTOP}/sys +.include "${SYSDIR}/conf/kern.opts.mk" + +.PATH: ${SYSDIR}/netlink + +KMOD= ktest_netlink_message_writer +SRCS= ktest_netlink_message_writer.c +SRCS+= opt_netlink.h + +.include + diff --git a/sys/netlink/ktest_netlink_message_writer.c b/sys/netlink/ktest_netlink_message_writer.c new file mode 100644 index 000000000000..c13a25e05a70 --- /dev/null +++ b/sys/netlink/ktest_netlink_message_writer.c @@ -0,0 +1,169 @@ +/*- + * SPDX-License-Identifier: BSD-2-Clause + * + * Copyright (c) 2023 Alexander V. Chernikov + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include "opt_netlink.h" + +#include +#include +#include +#include +#include +#include +#include +#include + +#define KTEST_CALLER +#include + +#ifdef INVARIANTS + +struct test_mbuf_attrs { + uint32_t size; + uint32_t expected_avail; + uint32_t expected_count; + uint32_t wtype; + int waitok; +}; + +#define _OUT(_field) offsetof(struct test_mbuf_attrs, _field) +static const struct nlattr_parser nla_p_mbuf_w[] = { + { .type = 1, .off = _OUT(size), .cb = nlattr_get_uint32 }, + { .type = 2, .off = _OUT(expected_avail), .cb = nlattr_get_uint32 }, + { .type = 3, .off = _OUT(expected_count), .cb = nlattr_get_uint32 }, + { .type = 4, .off = _OUT(wtype), .cb = nlattr_get_uint32 }, + { .type = 5, .off = _OUT(waitok), .cb = nlattr_get_uint32 }, +}; +#undef _OUT +NL_DECLARE_ATTR_PARSER(mbuf_w_parser, nla_p_mbuf_w); + +static int +test_mbuf_parser(struct ktest_test_context *ctx, struct nlattr *nla) +{ + struct test_mbuf_attrs *attrs = npt_alloc(ctx->npt, sizeof(*attrs)); + + ctx->arg = attrs; + if (attrs != NULL) + return (nl_parse_nested(nla, &mbuf_w_parser, ctx->npt, attrs)); + return (ENOMEM); +} + +static int +test_mbuf_writer_allocation(struct ktest_test_context *ctx) +{ + struct test_mbuf_attrs *attrs = ctx->arg; + bool ret; + struct nl_writer nw = {}; + + ret = nlmsg_get_buf_type_wrapper(&nw, attrs->size, attrs->wtype, attrs->waitok); + if (!ret) + return (EINVAL); + + int alloc_len = nw.alloc_len; + KTEST_LOG(ctx, "requested %u, allocated %d", attrs->size, alloc_len); + + /* Set cleanup callback */ + nw.writer_target = NS_WRITER_TARGET_SOCKET; + nlmsg_set_callback_wrapper(&nw); + + /* Mark enomem to avoid reallocation */ + nw.enomem = true; + + if (nlmsg_reserve_data(&nw, alloc_len, void *) == NULL) { + KTEST_LOG(ctx, "unable to get %d bytes from the writer", alloc_len); + return (EINVAL); + } + + /* Mark as empty to free the storage */ + nw.offset = 0; + nlmsg_flush(&nw); + + if (alloc_len < attrs->expected_avail) { + KTEST_LOG(ctx, "alloc_len %d, expected %u", + alloc_len, attrs->expected_avail); + return (EINVAL); + } + + return (0); +} + +static int +test_mbuf_chain_allocation(struct ktest_test_context *ctx) +{ + struct test_mbuf_attrs *attrs = ctx->arg; + int mflags = attrs->waitok ? M_WAITOK : M_NOWAIT; + struct mbuf *chain = nl_get_mbuf_chain_wrapper(attrs->size, mflags); + + if (chain == NULL) { + KTEST_LOG(ctx, "nl_get_mbuf_chain(%u) returned NULL", attrs->size); + return (EINVAL); + } + + /* Iterate and check number of mbufs and space */ + uint32_t allocated_count = 0, allocated_size = 0; + for (struct mbuf *m = chain; m != NULL; m = m->m_next) { + allocated_count++; + allocated_size += M_SIZE(m); + } + m_freem(chain); + + if (attrs->expected_avail > allocated_size) { + KTEST_LOG(ctx, "expected/allocated avail(bytes) %u/%u" + " expected/allocated count %u/%u", + attrs->expected_avail, allocated_size, + attrs->expected_count, allocated_count); + return (EINVAL); + } + + if (attrs->expected_count > 0 && (attrs->expected_count != allocated_count)) { + KTEST_LOG(ctx, "expected/allocated avail(bytes) %u/%u" + " expected/allocated count %u/%u", + attrs->expected_avail, allocated_size, + attrs->expected_count, allocated_count); + return (EINVAL); + } + + return (0); +} +#endif + +static const struct ktest_test_info tests[] = { +#ifdef INVARIANTS + { + .name = "test_mbuf_writer_allocation", + .desc = "test different mbuf sizes in the mbuf writer", + .func = &test_mbuf_writer_allocation, + .parse = &test_mbuf_parser, + }, + { + .name = "test_mbuf_chain_allocation", + .desc = "verify allocation different chain sizes", + .func = &test_mbuf_chain_allocation, + .parse = &test_mbuf_parser, + }, +#endif +}; +KTEST_MODULE_DECLARE(ktest_netlink_message_writer, tests); diff --git a/sys/netlink/ktest_netlink_message_writer.h b/sys/netlink/ktest_netlink_message_writer.h new file mode 100644 index 000000000000..b7864bea59c9 --- /dev/null +++ b/sys/netlink/ktest_netlink_message_writer.h @@ -0,0 +1,60 @@ +/*- + * SPDX-License-Identifier: BSD-2-Clause + * + * Copyright (c) 2023 Alexander V. Chernikov + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#ifndef _NETLINK_KTEST_NETLINK_MESSAGE_WRITER_H_ +#define _NETLINK_KTEST_NETLINK_MESSAGE_WRITER_H_ + +#if defined(_KERNEL) && defined(INVARIANTS) + +bool nlmsg_get_buf_type_wrapper(struct nl_writer *nw, int size, int type, bool waitok); +void nlmsg_set_callback_wrapper(struct nl_writer *nw); +struct mbuf *nl_get_mbuf_chain_wrapper(int len, int malloc_flags); + +#ifndef KTEST_CALLER + +bool +nlmsg_get_buf_type_wrapper(struct nl_writer *nw, int size, int type, bool waitok) +{ + return (nlmsg_get_buf_type(nw, size, type, waitok)); +} + +void +nlmsg_set_callback_wrapper(struct nl_writer *nw) +{ + nlmsg_set_callback(nw); +} + +struct mbuf * +nl_get_mbuf_chain_wrapper(int len, int malloc_flags) +{ + return (nl_get_mbuf_chain(len, malloc_flags)); +} +#endif + +#endif + +#endif diff --git a/sys/netlink/netlink_message_writer.c b/sys/netlink/netlink_message_writer.c index 841bdb2d5c0b..31f1c9f80457 100644 --- a/sys/netlink/netlink_message_writer.c +++ b/sys/netlink/netlink_message_writer.c @@ -1,757 +1,790 @@ /*- * SPDX-License-Identifier: BSD-2-Clause * * Copyright (c) 2022 Alexander V. Chernikov * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #include "opt_netlink.h" #include __FBSDID("$FreeBSD$"); #include #include #include #include #include #include #include #include #include #include #include #include #include #define DEBUG_MOD_NAME nl_writer #define DEBUG_MAX_LEVEL LOG_DEBUG3 #include _DECLARE_DEBUG(LOG_INFO); /* * The goal of this file is to provide convenient message writing KPI on top of * different storage methods (mbufs, uio, temporary memory chunks). * * The main KPI guarantee is that the (last) message always resides in the contiguous * memory buffer, so one is able to update the header after writing the entire message. * * This guarantee comes with a side effect of potentially reallocating underlying * buffer, so one needs to update the desired pointers after something is added * to the header. * * Messaging layer contains hooks performing transparent Linux translation for the messages. * * There are 3 types of supported targets: * * socket (adds mbufs to the socket buffer, used for message replies) * * group (sends mbuf/chain to the specified groups, used for the notifications) * * chain (returns mbuf chain, used in Linux message translation code) * * There are 3 types of storage: * * NS_WRITER_TYPE_MBUF (mbuf-based, most efficient, used when a single message - * fits in MCLBYTES) + * fits in NLMBUFSIZE) * * NS_WRITER_TYPE_BUF (fallback, malloc-based, used when a single message needs * to be larger than one supported by NS_WRITER_TYPE_MBUF) * * NS_WRITER_TYPE_LBUF (malloc-based, similar to NS_WRITER_TYPE_BUF, used for * Linux sockets, calls translation hook prior to sending messages to the socket). * * Internally, KPI switches between different types of storage when memory requirements * change. It happens transparently to the caller. */ /* * Uma zone for the mbuf-based Netlink storage */ static uma_zone_t nlmsg_zone; static void nl_free_mbuf_storage(struct mbuf *m) { uma_zfree(nlmsg_zone, m->m_ext.ext_buf); } static int nl_setup_mbuf_storage(void *mem, int size, void *arg, int how __unused) { struct mbuf *m = (struct mbuf *)arg; if (m != NULL) m_extadd(m, mem, size, nl_free_mbuf_storage, NULL, NULL, 0, EXT_MOD_TYPE); return (0); } static struct mbuf * nl_get_mbuf_flags(int size, int malloc_flags, int mbuf_flags) { struct mbuf *m, *m_storage; if (size <= MHLEN) return (m_get2(size, malloc_flags, MT_DATA, mbuf_flags)); if (__predict_false(size > NLMBUFSIZE)) return (NULL); m = m_gethdr(malloc_flags, MT_DATA); if (m == NULL) return (NULL); m_storage = uma_zalloc_arg(nlmsg_zone, m, malloc_flags); if (m_storage == NULL) { m_free_raw(m); return (NULL); } return (m); } static struct mbuf * nl_get_mbuf(int size, int malloc_flags) { return (nl_get_mbuf_flags(size, malloc_flags, M_PKTHDR)); } +/* + * Gets a chain of Netlink mbufs. + * This is strip-down version of m_getm2() + */ +static struct mbuf * +nl_get_mbuf_chain(int len, int malloc_flags) +{ + struct mbuf *m_chain = NULL, *m_tail = NULL; + int mbuf_flags = M_PKTHDR; + + while (len > 0) { + int sz = len > NLMBUFSIZE ? NLMBUFSIZE: len; + struct mbuf *m = nl_get_mbuf_flags(sz, malloc_flags, mbuf_flags); + + if (m == NULL) { + m_freem(m_chain); + return (NULL); + } + + /* Book keeping. */ + len -= M_SIZE(m); + if (m_tail != NULL) + m_tail->m_next = m; + else + m_chain = m; + m_tail = m; + mbuf_flags &= ~M_PKTHDR; /* Only valid on the first mbuf. */ + } + + return (m_chain); +} + void nl_init_msg_zone(void) { nlmsg_zone = uma_zcreate("netlink", NLMBUFSIZE, nl_setup_mbuf_storage, NULL, NULL, NULL, UMA_ALIGN_PTR, 0); } void nl_destroy_msg_zone(void) { uma_zdestroy(nlmsg_zone); } typedef bool nlwriter_op_init(struct nl_writer *nw, int size, bool waitok); typedef bool nlwriter_op_write(struct nl_writer *nw, void *buf, int buflen, int cnt); struct nlwriter_ops { nlwriter_op_init *init; nlwriter_op_write *write_socket; nlwriter_op_write *write_group; nlwriter_op_write *write_chain; }; /* * NS_WRITER_TYPE_BUF * Writes message to a temporary memory buffer, * flushing to the socket/group when buffer size limit is reached */ static bool nlmsg_get_ns_buf(struct nl_writer *nw, int size, bool waitok) { int mflag = waitok ? M_WAITOK : M_NOWAIT; nw->_storage = malloc(size, M_NETLINK, mflag | M_ZERO); if (__predict_false(nw->_storage == NULL)) return (false); nw->alloc_len = size; nw->offset = 0; nw->hdr = NULL; nw->data = nw->_storage; nw->writer_type = NS_WRITER_TYPE_BUF; nw->malloc_flag = mflag; nw->num_messages = 0; nw->enomem = false; return (true); } static bool nlmsg_write_socket_buf(struct nl_writer *nw, void *buf, int datalen, int cnt) { NL_LOG(LOG_DEBUG2, "IN: ptr: %p len: %d arg: %p", buf, datalen, nw->arg.ptr); if (__predict_false(datalen == 0)) { free(buf, M_NETLINK); return (true); } - struct mbuf *m = m_getm2(NULL, datalen, nw->malloc_flag, MT_DATA, M_PKTHDR); + struct mbuf *m = nl_get_mbuf_chain(datalen, nw->malloc_flag); if (__predict_false(m == NULL)) { /* XXX: should we set sorcverr? */ free(buf, M_NETLINK); return (false); } m_append(m, datalen, buf); free(buf, M_NETLINK); int io_flags = (nw->ignore_limit) ? NL_IOF_IGNORE_LIMIT : 0; return (nl_send_one(m, (struct nlpcb *)(nw->arg.ptr), cnt, io_flags)); } static bool nlmsg_write_group_buf(struct nl_writer *nw, void *buf, int datalen, int cnt) { NL_LOG(LOG_DEBUG2, "IN: ptr: %p len: %d proto: %d id: %d", buf, datalen, nw->arg.group.proto, nw->arg.group.id); if (__predict_false(datalen == 0)) { free(buf, M_NETLINK); return (true); } - struct mbuf *m = m_getm2(NULL, datalen, nw->malloc_flag, MT_DATA, M_PKTHDR); + struct mbuf *m = nl_get_mbuf_chain(datalen, nw->malloc_flag); if (__predict_false(m == NULL)) { free(buf, M_NETLINK); return (false); } bool success = m_append(m, datalen, buf) != 0; free(buf, M_NETLINK); if (!success) return (false); nl_send_group(m, cnt, nw->arg.group.proto, nw->arg.group.id); return (true); } static bool nlmsg_write_chain_buf(struct nl_writer *nw, void *buf, int datalen, int cnt) { struct mbuf **m0 = (struct mbuf **)(nw->arg.ptr); NL_LOG(LOG_DEBUG2, "IN: ptr: %p len: %d arg: %p", buf, datalen, nw->arg.ptr); if (__predict_false(datalen == 0)) { free(buf, M_NETLINK); return (true); } if (*m0 == NULL) { - struct mbuf *m; + struct mbuf *m = nl_get_mbuf_chain(datalen, nw->malloc_flag); - m = m_getm2(NULL, datalen, nw->malloc_flag, MT_DATA, M_PKTHDR); if (__predict_false(m == NULL)) { free(buf, M_NETLINK); return (false); } *m0 = m; } if (__predict_false(m_append(*m0, datalen, buf) == 0)) { free(buf, M_NETLINK); return (false); } return (true); } /* * NS_WRITER_TYPE_MBUF * Writes message to the allocated mbuf, * flushing to socket/group when mbuf size limit is reached. * This is the most efficient mechanism as it avoids double-copying. * * Allocates a single mbuf suitable to store up to @size bytes of data. * If size < MHLEN (around 160 bytes), allocates mbuf with pkghdr. * If the size <= NLMBUFSIZE (2k), allocate mbuf+storage out of nlmsg_zone. * Returns NULL on greater size or the allocation failure. */ static bool nlmsg_get_ns_mbuf(struct nl_writer *nw, int size, bool waitok) { int mflag = waitok ? M_WAITOK : M_NOWAIT; struct mbuf *m = nl_get_mbuf(size, mflag); if (__predict_false(m == NULL)) return (false); nw->alloc_len = M_TRAILINGSPACE(m); nw->offset = 0; nw->hdr = NULL; nw->_storage = (void *)m; nw->data = mtod(m, void *); nw->writer_type = NS_WRITER_TYPE_MBUF; nw->malloc_flag = mflag; nw->num_messages = 0; nw->enomem = false; memset(nw->data, 0, size); NL_LOG(LOG_DEBUG2, "alloc mbuf %p req_len %d alloc_len %d data_ptr %p", m, size, nw->alloc_len, nw->data); return (true); } static bool nlmsg_write_socket_mbuf(struct nl_writer *nw, void *buf, int datalen, int cnt) { struct mbuf *m = (struct mbuf *)buf; NL_LOG(LOG_DEBUG2, "IN: ptr: %p len: %d arg: %p", buf, datalen, nw->arg.ptr); if (__predict_false(datalen == 0)) { m_freem(m); return (true); } m->m_pkthdr.len = datalen; m->m_len = datalen; int io_flags = (nw->ignore_limit) ? NL_IOF_IGNORE_LIMIT : 0; return (nl_send_one(m, (struct nlpcb *)(nw->arg.ptr), cnt, io_flags)); } static bool nlmsg_write_group_mbuf(struct nl_writer *nw, void *buf, int datalen, int cnt) { struct mbuf *m = (struct mbuf *)buf; NL_LOG(LOG_DEBUG2, "IN: ptr: %p len: %d proto: %d id: %d", buf, datalen, nw->arg.group.proto, nw->arg.group.id); if (__predict_false(datalen == 0)) { m_freem(m); return (true); } m->m_pkthdr.len = datalen; m->m_len = datalen; nl_send_group(m, cnt, nw->arg.group.proto, nw->arg.group.id); return (true); } static bool nlmsg_write_chain_mbuf(struct nl_writer *nw, void *buf, int datalen, int cnt) { struct mbuf *m_new = (struct mbuf *)buf; struct mbuf **m0 = (struct mbuf **)(nw->arg.ptr); NL_LOG(LOG_DEBUG2, "IN: ptr: %p len: %d arg: %p", buf, datalen, nw->arg.ptr); if (__predict_false(datalen == 0)) { m_freem(m_new); return (true); } m_new->m_pkthdr.len = datalen; m_new->m_len = datalen; if (*m0 == NULL) { *m0 = m_new; } else { struct mbuf *m_last; for (m_last = *m0; m_last->m_next != NULL; m_last = m_last->m_next) ; m_last->m_next = m_new; (*m0)->m_pkthdr.len += datalen; } return (true); } /* * NS_WRITER_TYPE_LBUF * Writes message to the allocated memory buffer, * flushing to socket/group when mbuf size limit is reached. * Calls linux handler to rewrite messages before sending to the socket. */ static bool nlmsg_get_ns_lbuf(struct nl_writer *nw, int size, bool waitok) { int mflag = waitok ? M_WAITOK : M_NOWAIT; size = roundup2(size, sizeof(void *)); int add_size = sizeof(struct linear_buffer) + SCRATCH_BUFFER_SIZE; char *buf = malloc(add_size + size * 2, M_NETLINK, mflag | M_ZERO); if (__predict_false(buf == NULL)) return (false); /* Fill buffer header first */ struct linear_buffer *lb = (struct linear_buffer *)buf; lb->base = &buf[sizeof(struct linear_buffer) + size]; lb->size = size + SCRATCH_BUFFER_SIZE; nw->alloc_len = size; nw->offset = 0; nw->hdr = NULL; nw->_storage = buf; nw->data = (char *)(lb + 1); nw->malloc_flag = mflag; nw->writer_type = NS_WRITER_TYPE_LBUF; nw->num_messages = 0; nw->enomem = false; return (true); } static bool nlmsg_write_socket_lbuf(struct nl_writer *nw, void *buf, int datalen, int cnt) { struct linear_buffer *lb = (struct linear_buffer *)buf; char *data = (char *)(lb + 1); struct nlpcb *nlp = (struct nlpcb *)(nw->arg.ptr); if (__predict_false(datalen == 0)) { free(buf, M_NETLINK); return (true); } struct mbuf *m = NULL; if (linux_netlink_p != NULL) m = linux_netlink_p->msgs_to_linux(nlp->nl_proto, data, datalen, nlp); free(buf, M_NETLINK); if (__predict_false(m == NULL)) { /* XXX: should we set sorcverr? */ return (false); } int io_flags = (nw->ignore_limit) ? NL_IOF_IGNORE_LIMIT : 0; return (nl_send_one(m, nlp, cnt, io_flags)); } /* Shouldn't be called (maybe except Linux code originating message) */ static bool nlmsg_write_group_lbuf(struct nl_writer *nw, void *buf, int datalen, int cnt) { struct linear_buffer *lb = (struct linear_buffer *)buf; char *data = (char *)(lb + 1); if (__predict_false(datalen == 0)) { free(buf, M_NETLINK); return (true); } - struct mbuf *m = m_getm2(NULL, datalen, nw->malloc_flag, MT_DATA, M_PKTHDR); + struct mbuf *m = nl_get_mbuf_chain(datalen, nw->malloc_flag); if (__predict_false(m == NULL)) { free(buf, M_NETLINK); return (false); } m_append(m, datalen, data); free(buf, M_NETLINK); nl_send_group(m, cnt, nw->arg.group.proto, nw->arg.group.id); return (true); } static const struct nlwriter_ops nlmsg_writers[] = { /* NS_WRITER_TYPE_MBUF */ { .init = nlmsg_get_ns_mbuf, .write_socket = nlmsg_write_socket_mbuf, .write_group = nlmsg_write_group_mbuf, .write_chain = nlmsg_write_chain_mbuf, }, /* NS_WRITER_TYPE_BUF */ { .init = nlmsg_get_ns_buf, .write_socket = nlmsg_write_socket_buf, .write_group = nlmsg_write_group_buf, .write_chain = nlmsg_write_chain_buf, }, /* NS_WRITER_TYPE_LBUF */ { .init = nlmsg_get_ns_lbuf, .write_socket = nlmsg_write_socket_lbuf, .write_group = nlmsg_write_group_lbuf, }, }; static void nlmsg_set_callback(struct nl_writer *nw) { const struct nlwriter_ops *pops = &nlmsg_writers[nw->writer_type]; switch (nw->writer_target) { case NS_WRITER_TARGET_SOCKET: nw->cb = pops->write_socket; break; case NS_WRITER_TARGET_GROUP: nw->cb = pops->write_group; break; case NS_WRITER_TARGET_CHAIN: nw->cb = pops->write_chain; break; default: panic("not implemented"); } } static bool nlmsg_get_buf_type(struct nl_writer *nw, int size, int type, bool waitok) { MPASS(type + 1 <= sizeof(nlmsg_writers) / sizeof(nlmsg_writers[0])); NL_LOG(LOG_DEBUG3, "Setting up nw %p size %d type %d", nw, size, type); return (nlmsg_writers[type].init(nw, size, waitok)); } static bool nlmsg_get_buf(struct nl_writer *nw, int size, bool waitok, bool is_linux) { int type; if (!is_linux) { - if (__predict_true(size <= MCLBYTES)) + if (__predict_true(size <= NLMBUFSIZE)) type = NS_WRITER_TYPE_MBUF; else type = NS_WRITER_TYPE_BUF; } else type = NS_WRITER_TYPE_LBUF; return (nlmsg_get_buf_type(nw, size, type, waitok)); } bool _nlmsg_get_unicast_writer(struct nl_writer *nw, int size, struct nlpcb *nlp) { if (!nlmsg_get_buf(nw, size, false, nlp->nl_linux)) return (false); nw->arg.ptr = (void *)nlp; nw->writer_target = NS_WRITER_TARGET_SOCKET; nlmsg_set_callback(nw); return (true); } bool _nlmsg_get_group_writer(struct nl_writer *nw, int size, int protocol, int group_id) { if (!nlmsg_get_buf(nw, size, false, false)) return (false); nw->arg.group.proto = protocol; nw->arg.group.id = group_id; nw->writer_target = NS_WRITER_TARGET_GROUP; nlmsg_set_callback(nw); return (true); } bool _nlmsg_get_chain_writer(struct nl_writer *nw, int size, struct mbuf **pm) { if (!nlmsg_get_buf(nw, size, false, false)) return (false); *pm = NULL; nw->arg.ptr = (void *)pm; nw->writer_target = NS_WRITER_TARGET_CHAIN; nlmsg_set_callback(nw); NL_LOG(LOG_DEBUG3, "setup cb %p (need %p)", nw->cb, &nlmsg_write_chain_mbuf); return (true); } void _nlmsg_ignore_limit(struct nl_writer *nw) { nw->ignore_limit = true; } bool _nlmsg_flush(struct nl_writer *nw) { if (__predict_false(nw->hdr != NULL)) { /* Last message has not been completed, skip it. */ int completed_len = (char *)nw->hdr - nw->data; /* Send completed messages */ nw->offset -= nw->offset - completed_len; nw->hdr = NULL; } NL_LOG(LOG_DEBUG2, "OUT"); bool result = nw->cb(nw, nw->_storage, nw->offset, nw->num_messages); nw->_storage = NULL; if (!result) { NL_LOG(LOG_DEBUG, "nw %p offset %d: flush with %p() failed", nw, nw->offset, nw->cb); } return (result); } /* * Flushes previous data and allocates new underlying storage * sufficient for holding at least @required_len bytes. * Return true on success. */ bool _nlmsg_refill_buffer(struct nl_writer *nw, int required_len) { struct nl_writer ns_new = {}; int completed_len, new_len; if (nw->enomem) return (false); NL_LOG(LOG_DEBUG3, "no space at offset %d/%d (want %d), trying to reclaim", nw->offset, nw->alloc_len, required_len); /* Calculated new buffer size and allocate it s*/ completed_len = (nw->hdr != NULL) ? (char *)nw->hdr - nw->data : nw->offset; - if (completed_len > 0 && required_len < MCLBYTES) { + if (completed_len > 0 && required_len < NLMBUFSIZE) { /* We already ran out of space, use the largest effective size */ - new_len = max(nw->alloc_len, MCLBYTES); + new_len = max(nw->alloc_len, NLMBUFSIZE); } else { - if (nw->alloc_len < MCLBYTES) - new_len = MCLBYTES; + if (nw->alloc_len < NLMBUFSIZE) + new_len = NLMBUFSIZE; else new_len = nw->alloc_len * 2; while (new_len < required_len) new_len *= 2; } bool waitok = (nw->malloc_flag == M_WAITOK); bool is_linux = (nw->writer_type == NS_WRITER_TYPE_LBUF); if (!nlmsg_get_buf(&ns_new, new_len, waitok, is_linux)) { nw->enomem = true; NL_LOG(LOG_DEBUG, "getting new buf failed, setting ENOMEM"); return (false); } if (nw->ignore_limit) nlmsg_ignore_limit(&ns_new); /* Update callback data */ ns_new.writer_target = nw->writer_target; nlmsg_set_callback(&ns_new); ns_new.arg = nw->arg; /* Copy last (unfinished) header to the new storage */ int last_len = nw->offset - completed_len; if (last_len > 0) { memcpy(ns_new.data, nw->hdr, last_len); ns_new.hdr = (struct nlmsghdr *)ns_new.data; ns_new.offset = last_len; } NL_LOG(LOG_DEBUG2, "completed: %d bytes, copied: %d bytes", completed_len, last_len); /* Flush completed headers & switch to the new nw */ nlmsg_flush(nw); memcpy(nw, &ns_new, sizeof(struct nl_writer)); NL_LOG(LOG_DEBUG2, "switched buffer: used %d/%d bytes", nw->offset, nw->alloc_len); return (true); } bool _nlmsg_add(struct nl_writer *nw, uint32_t portid, uint32_t seq, uint16_t type, uint16_t flags, uint32_t len) { struct nlmsghdr *hdr; MPASS(nw->hdr == NULL); int required_len = NETLINK_ALIGN(len + sizeof(struct nlmsghdr)); if (__predict_false(nw->offset + required_len > nw->alloc_len)) { if (!nlmsg_refill_buffer(nw, required_len)) return (false); } hdr = (struct nlmsghdr *)(&nw->data[nw->offset]); hdr->nlmsg_len = len; hdr->nlmsg_type = type; hdr->nlmsg_flags = flags; hdr->nlmsg_seq = seq; hdr->nlmsg_pid = portid; nw->hdr = hdr; nw->offset += sizeof(struct nlmsghdr); return (true); } bool _nlmsg_end(struct nl_writer *nw) { MPASS(nw->hdr != NULL); if (nw->enomem) { NL_LOG(LOG_DEBUG, "ENOMEM when dumping message"); nlmsg_abort(nw); return (false); } nw->hdr->nlmsg_len = (uint32_t)(nw->data + nw->offset - (char *)nw->hdr); NL_LOG(LOG_DEBUG2, "wrote msg len: %u type: %d: flags: 0x%X seq: %u pid: %u", nw->hdr->nlmsg_len, nw->hdr->nlmsg_type, nw->hdr->nlmsg_flags, nw->hdr->nlmsg_seq, nw->hdr->nlmsg_pid); nw->hdr = NULL; nw->num_messages++; return (true); } void _nlmsg_abort(struct nl_writer *nw) { if (nw->hdr != NULL) { nw->offset = (uint32_t)((char *)nw->hdr - nw->data); nw->hdr = NULL; } } void nlmsg_ack(struct nlpcb *nlp, int error, struct nlmsghdr *hdr, struct nl_pstate *npt) { struct nlmsgerr *errmsg; int payload_len; uint32_t flags = nlp->nl_flags; struct nl_writer *nw = npt->nw; bool cap_ack; payload_len = sizeof(struct nlmsgerr); /* * The only case when we send the full message in the * reply is when there is an error and NETLINK_CAP_ACK * is not set. */ cap_ack = (error == 0) || (flags & NLF_CAP_ACK); if (!cap_ack) payload_len += hdr->nlmsg_len - sizeof(struct nlmsghdr); payload_len = NETLINK_ALIGN(payload_len); uint16_t nl_flags = cap_ack ? NLM_F_CAPPED : 0; if ((npt->err_msg || npt->err_off) && nlp->nl_flags & NLF_EXT_ACK) nl_flags |= NLM_F_ACK_TLVS; NL_LOG(LOG_DEBUG3, "acknowledging message type %d seq %d", hdr->nlmsg_type, hdr->nlmsg_seq); if (!nlmsg_add(nw, nlp->nl_port, hdr->nlmsg_seq, NLMSG_ERROR, nl_flags, payload_len)) goto enomem; errmsg = nlmsg_reserve_data(nw, payload_len, struct nlmsgerr); errmsg->error = error; /* In case of error copy the whole message, else just the header */ memcpy(&errmsg->msg, hdr, cap_ack ? sizeof(*hdr) : hdr->nlmsg_len); if (npt->err_msg != NULL && nlp->nl_flags & NLF_EXT_ACK) nlattr_add_string(nw, NLMSGERR_ATTR_MSG, npt->err_msg); if (npt->err_off != 0 && nlp->nl_flags & NLF_EXT_ACK) nlattr_add_u32(nw, NLMSGERR_ATTR_OFFS, npt->err_off); if (npt->cookie != NULL) nlattr_add_raw(nw, npt->cookie); if (nlmsg_end(nw)) return; enomem: NLP_LOG(LOG_DEBUG, nlp, "error allocating ack data for message %d seq %u", hdr->nlmsg_type, hdr->nlmsg_seq); nlmsg_abort(nw); } bool _nlmsg_end_dump(struct nl_writer *nw, int error, struct nlmsghdr *hdr) { if (!nlmsg_add(nw, hdr->nlmsg_pid, hdr->nlmsg_seq, NLMSG_DONE, 0, sizeof(int))) { NL_LOG(LOG_DEBUG, "Error finalizing table dump"); return (false); } /* Save operation result */ int *perror = nlmsg_reserve_object(nw, int); NL_LOG(LOG_DEBUG2, "record error=%d at off %d (%p)", error, nw->offset, perror); *perror = error; nlmsg_end(nw); nw->suppress_ack = true; return (true); } + +#include diff --git a/tests/sys/netlink/Makefile b/tests/sys/netlink/Makefile index 16559f0e9d3d..83e31027b16f 100644 --- a/tests/sys/netlink/Makefile +++ b/tests/sys/netlink/Makefile @@ -1,17 +1,18 @@ # $FreeBSD$ PACKAGE= tests WARNS?= 1 TESTSDIR= ${TESTSBASE}/sys/netlink ATF_TESTS_C += test_snl test_snl_generic ATF_TESTS_PYTEST += test_nl_core.py ATF_TESTS_PYTEST += test_rtnl_iface.py ATF_TESTS_PYTEST += test_rtnl_ifaddr.py ATF_TESTS_PYTEST += test_rtnl_neigh.py ATF_TESTS_PYTEST += test_rtnl_route.py +ATF_TESTS_PYTEST += test_netlink_message_writer.py CFLAGS+= -I${.CURDIR:H:H:H} .include diff --git a/tests/sys/netlink/test_netlink_message_writer.py b/tests/sys/netlink/test_netlink_message_writer.py new file mode 100644 index 000000000000..df1768129b11 --- /dev/null +++ b/tests/sys/netlink/test_netlink_message_writer.py @@ -0,0 +1,79 @@ +import mmap +import pytest + +from atf_python.ktest import BaseKernelTest +from atf_python.sys.netlink.attrs import NlAttrU32 + + +M_NOWAIT = 1 +M_WAITOK = 2 +NS_WRITER_TYPE_MBUF = 0 +NS_WRITER_TYPE_BUF = 1 +NS_WRITER_TYPE_LBUF = 1 + +MHLEN = 160 +MCLBYTES = 2048 # XXX: may differ on some archs? +MJUMPAGESIZE = mmap.PAGESIZE +MJUM9BYTES = 9 * 1024 +MJUM16BYTES = 16 * 1024 + + +class TestNetlinkMessageWriter(BaseKernelTest): + KTEST_MODULE_NAME = "ktest_netlink_message_writer" + + @pytest.mark.parametrize( + "malloc_flags", + [ + pytest.param(M_NOWAIT, id="NOWAIT"), + pytest.param(M_WAITOK, id="WAITOK"), + ], + ) + @pytest.mark.parametrize( + "writer_type", + [ + pytest.param(NS_WRITER_TYPE_MBUF, id="MBUF"), + pytest.param(NS_WRITER_TYPE_BUF, id="BUF"), + ], + ) + @pytest.mark.parametrize( + "sz", + [ + pytest.param([160, 160], id="MHLEN"), + pytest.param([MCLBYTES, MCLBYTES], id="MCLBYTES"), + ], + ) + def test_mbuf_writer_allocation(self, sz, writer_type, malloc_flags): + """override to parametrize""" + + test_meta = [ + NlAttrU32(1, sz[0]), # size + NlAttrU32(2, sz[1]), # expected_avail + NlAttrU32(4, writer_type), + NlAttrU32(5, malloc_flags), + ] + self.runtest(test_meta) + + @pytest.mark.parametrize( + "malloc_flags", + [ + pytest.param(M_NOWAIT, id="NOWAIT"), + pytest.param(M_WAITOK, id="WAITOK"), + ], + ) + @pytest.mark.parametrize( + "sz", + [ + pytest.param([160, 160, 1], id="MHLEN"), + pytest.param([MCLBYTES, MCLBYTES, 1], id="MCLBYTES"), + pytest.param([MCLBYTES + 1, MCLBYTES + 1, 2], id="MCLBYTES_MHLEN"), + pytest.param([MCLBYTES + 256, MCLBYTES * 2, 2], id="MCLBYTESx2"), + ], + ) + def test_mbuf_chain_allocation(self, sz, malloc_flags): + test_meta = [ + NlAttrU32(1, sz[0]), # size + NlAttrU32(2, sz[1]), # expected_avail + NlAttrU32(3, sz[2]), # expected_count + NlAttrU32(5, malloc_flags), + ] + self.runtest(test_meta)