diff --git a/sys/compat/linux/linux_netlink.c b/sys/compat/linux/linux_netlink.c index d2afec24fe71..8675f830b4ef 100644 --- a/sys/compat/linux/linux_netlink.c +++ b/sys/compat/linux/linux_netlink.c @@ -1,604 +1,622 @@ /*- * SPDX-License-Identifier: BSD-2-Clause * * Copyright (c) 2022 Alexander V. Chernikov * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #include "opt_inet.h" #include "opt_inet6.h" #include #include #include -#include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #define DEBUG_MOD_NAME nl_linux #define DEBUG_MAX_LEVEL LOG_DEBUG3 #include _DECLARE_DEBUG(LOG_INFO); static bool valid_rta_size(const struct rtattr *rta, int sz) { return (NL_RTA_DATA_LEN(rta) == sz); } static bool valid_rta_u32(const struct rtattr *rta) { return (valid_rta_size(rta, sizeof(uint32_t))); } static uint32_t _rta_get_uint32(const struct rtattr *rta) { return (*((const uint32_t *)NL_RTA_DATA_CONST(rta))); } -static struct nlmsghdr * +static int rtnl_neigh_from_linux(struct nlmsghdr *hdr, struct nl_pstate *npt) { struct ndmsg *ndm = (struct ndmsg *)(hdr + 1); + sa_family_t f; + + if (hdr->nlmsg_len < sizeof(struct nlmsghdr) + sizeof(struct ndmsg)) + return (EBADMSG); + if ((f = linux_to_bsd_domain(ndm->ndm_family)) == AF_UNKNOWN) + return (EPFNOSUPPORT); - if (hdr->nlmsg_len >= sizeof(struct nlmsghdr) + sizeof(struct ndmsg)) - ndm->ndm_family = linux_to_bsd_domain(ndm->ndm_family); + ndm->ndm_family = f; - return (hdr); + return (0); } -static struct nlmsghdr * +static int rtnl_ifaddr_from_linux(struct nlmsghdr *hdr, struct nl_pstate *npt) { struct ifaddrmsg *ifam = (struct ifaddrmsg *)(hdr + 1); + sa_family_t f; - if (hdr->nlmsg_len >= sizeof(struct nlmsghdr) + sizeof(struct ifaddrmsg)) - ifam->ifa_family = linux_to_bsd_domain(ifam->ifa_family); + if (hdr->nlmsg_len < sizeof(struct nlmsghdr) + sizeof(struct ifaddrmsg)) + return (EBADMSG); + if ((f = linux_to_bsd_domain(ifam->ifa_family)) == AF_UNKNOWN) + return (EPFNOSUPPORT); - return (hdr); + ifam->ifa_family = f; + + return (0); } -static struct nlmsghdr * +/* + * XXX: in case of error state of hdr is inconsistent. + */ +static int rtnl_route_from_linux(struct nlmsghdr *hdr, struct nl_pstate *npt) { /* Tweak address families and default fib only */ struct rtmsg *rtm = (struct rtmsg *)(hdr + 1); struct nlattr *nla, *nla_head; int attrs_len; + sa_family_t f; - rtm->rtm_family = linux_to_bsd_domain(rtm->rtm_family); + if (hdr->nlmsg_len < sizeof(struct nlmsghdr) + sizeof(struct rtmsg)) + return (EBADMSG); + if ((f = linux_to_bsd_domain(rtm->rtm_family)) == AF_UNKNOWN) + return (EPFNOSUPPORT); + rtm->rtm_family = f; if (rtm->rtm_table == 254) rtm->rtm_table = 0; attrs_len = hdr->nlmsg_len - sizeof(struct nlmsghdr); attrs_len -= NETLINK_ALIGN(sizeof(struct rtmsg)); nla_head = (struct nlattr *)((char *)rtm + NETLINK_ALIGN(sizeof(struct rtmsg))); NLA_FOREACH(nla, nla_head, attrs_len) { RT_LOG(LOG_DEBUG3, "GOT type %d len %d total %d", nla->nla_type, nla->nla_len, attrs_len); struct rtattr *rta = (struct rtattr *)nla; if (rta->rta_len < sizeof(struct rtattr)) { break; } switch (rta->rta_type) { case NL_RTA_TABLE: if (!valid_rta_u32(rta)) - goto done; + return (EBADMSG); rtm->rtm_table = 0; uint32_t fibnum = _rta_get_uint32(rta); RT_LOG(LOG_DEBUG3, "GET RTABLE: %u", fibnum); if (fibnum == 254) { *((uint32_t *)NL_RTA_DATA(rta)) = 0; } break; } } -done: - return (hdr); + return (0); } -static struct nlmsghdr * +static int rtnl_from_linux(struct nlmsghdr *hdr, struct nl_pstate *npt) { + switch (hdr->nlmsg_type) { case NL_RTM_GETROUTE: case NL_RTM_NEWROUTE: case NL_RTM_DELROUTE: return (rtnl_route_from_linux(hdr, npt)); case NL_RTM_GETNEIGH: return (rtnl_neigh_from_linux(hdr, npt)); case NL_RTM_GETADDR: return (rtnl_ifaddr_from_linux(hdr, npt)); /* Silence warning for the messages where no translation is required */ case NL_RTM_NEWLINK: case NL_RTM_DELLINK: case NL_RTM_GETLINK: break; default: RT_LOG(LOG_DEBUG, "Passing message type %d untranslated", hdr->nlmsg_type); + /* XXXGL: maybe return error? */ } - return (hdr); + return (0); } -static struct nlmsghdr * -nlmsg_from_linux(int netlink_family, struct nlmsghdr *hdr, +static int +nlmsg_from_linux(int netlink_family, struct nlmsghdr **hdr, struct nl_pstate *npt) { switch (netlink_family) { case NETLINK_ROUTE: - return (rtnl_from_linux(hdr, npt)); + return (rtnl_from_linux(*hdr, npt)); } - return (hdr); + return (0); } /************************************************************ * Kernel -> Linux ************************************************************/ static bool handle_default_out(struct nlmsghdr *hdr, struct nl_writer *nw) { char *out_hdr; out_hdr = nlmsg_reserve_data(nw, NLMSG_ALIGN(hdr->nlmsg_len), char); if (out_hdr != NULL) { memcpy(out_hdr, hdr, hdr->nlmsg_len); nw->num_messages++; return (true); } return (false); } static bool nlmsg_copy_header(struct nlmsghdr *hdr, struct nl_writer *nw) { return (nlmsg_add(nw, hdr->nlmsg_pid, hdr->nlmsg_seq, hdr->nlmsg_type, hdr->nlmsg_flags, 0)); } static void * _nlmsg_copy_next_header(struct nlmsghdr *hdr, struct nl_writer *nw, int sz) { void *next_hdr = nlmsg_reserve_data(nw, sz, void); memcpy(next_hdr, hdr + 1, NLMSG_ALIGN(sz)); return (next_hdr); } #define nlmsg_copy_next_header(_hdr, _ns, _t) \ ((_t *)(_nlmsg_copy_next_header(_hdr, _ns, sizeof(_t)))) static bool nlmsg_copy_nla(const struct nlattr *nla_orig, struct nl_writer *nw) { struct nlattr *nla = nlmsg_reserve_data(nw, nla_orig->nla_len, struct nlattr); if (nla != NULL) { memcpy(nla, nla_orig, nla_orig->nla_len); return (true); } return (false); } /* * Translate a FreeBSD interface name to a Linux interface name. */ static bool nlmsg_translate_ifname_nla(struct nlattr *nla, struct nl_writer *nw) { char ifname[LINUX_IFNAMSIZ]; if (ifname_bsd_to_linux_name((char *)(nla + 1), ifname, sizeof(ifname)) <= 0) return (false); return (nlattr_add_string(nw, IFLA_IFNAME, ifname)); } #define LINUX_NLA_UNHANDLED -1 /* * Translate a FreeBSD attribute to a Linux attribute. * Returns LINUX_NLA_UNHANDLED when the attribute is not processed * and the caller must take care of it, otherwise the result is returned. */ static int nlmsg_translate_all_nla(struct nlmsghdr *hdr, struct nlattr *nla, struct nl_writer *nw) { switch (hdr->nlmsg_type) { case NL_RTM_NEWLINK: case NL_RTM_DELLINK: case NL_RTM_GETLINK: switch (nla->nla_type) { case IFLA_IFNAME: return (nlmsg_translate_ifname_nla(nla, nw)); default: break; } default: break; } return (LINUX_NLA_UNHANDLED); } static bool nlmsg_copy_all_nla(struct nlmsghdr *hdr, int raw_hdrlen, struct nl_writer *nw) { struct nlattr *nla; int ret; int hdrlen = NETLINK_ALIGN(raw_hdrlen); int attrs_len = hdr->nlmsg_len - sizeof(struct nlmsghdr) - hdrlen; struct nlattr *nla_head = (struct nlattr *)((char *)(hdr + 1) + hdrlen); NLA_FOREACH(nla, nla_head, attrs_len) { RT_LOG(LOG_DEBUG3, "reading attr %d len %d", nla->nla_type, nla->nla_len); if (nla->nla_len < sizeof(struct nlattr)) { return (false); } ret = nlmsg_translate_all_nla(hdr, nla, nw); if (ret == LINUX_NLA_UNHANDLED) ret = nlmsg_copy_nla(nla, nw); if (!ret) return (false); } return (true); } #undef LINUX_NLA_UNHANDLED static unsigned int rtnl_if_flags_to_linux(unsigned int if_flags) { unsigned int result = 0; for (int i = 0; i < 31; i++) { unsigned int flag = 1 << i; if (!(flag & if_flags)) continue; switch (flag) { case IFF_UP: case IFF_BROADCAST: case IFF_DEBUG: case IFF_LOOPBACK: case IFF_POINTOPOINT: case IFF_DRV_RUNNING: case IFF_NOARP: case IFF_PROMISC: case IFF_ALLMULTI: result |= flag; break; case IFF_NEEDSEPOCH: case IFF_DRV_OACTIVE: case IFF_SIMPLEX: case IFF_LINK0: case IFF_LINK1: case IFF_LINK2: case IFF_CANTCONFIG: case IFF_PPROMISC: case IFF_MONITOR: case IFF_STATICARP: case IFF_STICKYARP: case IFF_DYING: case IFF_RENAMING: /* No Linux analogue */ break; case IFF_MULTICAST: result |= 1 << 12; } } return (result); } static bool rtnl_newlink_to_linux(struct nlmsghdr *hdr, struct nlpcb *nlp, struct nl_writer *nw) { if (!nlmsg_copy_header(hdr, nw)) return (false); struct ifinfomsg *ifinfo; ifinfo = nlmsg_copy_next_header(hdr, nw, struct ifinfomsg); ifinfo->ifi_family = bsd_to_linux_domain(ifinfo->ifi_family); /* Convert interface type */ switch (ifinfo->ifi_type) { case IFT_ETHER: ifinfo->ifi_type = LINUX_ARPHRD_ETHER; break; } ifinfo->ifi_flags = rtnl_if_flags_to_linux(ifinfo->ifi_flags); /* Copy attributes unchanged */ if (!nlmsg_copy_all_nla(hdr, sizeof(struct ifinfomsg), nw)) return (false); /* make ip(8) happy */ if (!nlattr_add_string(nw, IFLA_QDISC, "noqueue")) return (false); if (!nlattr_add_u32(nw, IFLA_TXQLEN, 1000)) return (false); nlmsg_end(nw); RT_LOG(LOG_DEBUG2, "done processing nw %p", nw); return (true); } static bool rtnl_newaddr_to_linux(struct nlmsghdr *hdr, struct nlpcb *nlp, struct nl_writer *nw) { if (!nlmsg_copy_header(hdr, nw)) return (false); struct ifaddrmsg *ifamsg; ifamsg = nlmsg_copy_next_header(hdr, nw, struct ifaddrmsg); ifamsg->ifa_family = bsd_to_linux_domain(ifamsg->ifa_family); /* XXX: fake ifa_flags? */ /* Copy attributes unchanged */ if (!nlmsg_copy_all_nla(hdr, sizeof(struct ifaddrmsg), nw)) return (false); nlmsg_end(nw); RT_LOG(LOG_DEBUG2, "done processing nw %p", nw); return (true); } static bool rtnl_newneigh_to_linux(struct nlmsghdr *hdr, struct nlpcb *nlp, struct nl_writer *nw) { if (!nlmsg_copy_header(hdr, nw)) return (false); struct ndmsg *ndm; ndm = nlmsg_copy_next_header(hdr, nw, struct ndmsg); ndm->ndm_family = bsd_to_linux_domain(ndm->ndm_family); /* Copy attributes unchanged */ if (!nlmsg_copy_all_nla(hdr, sizeof(struct ndmsg), nw)) return (false); nlmsg_end(nw); RT_LOG(LOG_DEBUG2, "done processing nw %p", nw); return (true); } static bool rtnl_newroute_to_linux(struct nlmsghdr *hdr, struct nlpcb *nlp, struct nl_writer *nw) { if (!nlmsg_copy_header(hdr, nw)) return (false); struct rtmsg *rtm; rtm = nlmsg_copy_next_header(hdr, nw, struct rtmsg); rtm->rtm_family = bsd_to_linux_domain(rtm->rtm_family); struct nlattr *nla; int hdrlen = NETLINK_ALIGN(sizeof(struct rtmsg)); int attrs_len = hdr->nlmsg_len - sizeof(struct nlmsghdr) - hdrlen; struct nlattr *nla_head = (struct nlattr *)((char *)(hdr + 1) + hdrlen); NLA_FOREACH(nla, nla_head, attrs_len) { struct rtattr *rta = (struct rtattr *)nla; //RT_LOG(LOG_DEBUG, "READING attr %d len %d", nla->nla_type, nla->nla_len); if (rta->rta_len < sizeof(struct rtattr)) { break; } switch (rta->rta_type) { case NL_RTA_TABLE: { uint32_t fibnum; fibnum = _rta_get_uint32(rta); if (fibnum == 0) fibnum = 254; RT_LOG(LOG_DEBUG3, "XFIBNUM %u", fibnum); if (!nlattr_add_u32(nw, NL_RTA_TABLE, fibnum)) return (false); } break; default: if (!nlmsg_copy_nla(nla, nw)) return (false); break; } } nlmsg_end(nw); RT_LOG(LOG_DEBUG2, "done processing nw %p", nw); return (true); } static bool rtnl_to_linux(struct nlmsghdr *hdr, struct nlpcb *nlp, struct nl_writer *nw) { RT_LOG(LOG_DEBUG2, "Got message type %d", hdr->nlmsg_type); switch (hdr->nlmsg_type) { case NL_RTM_NEWLINK: case NL_RTM_DELLINK: case NL_RTM_GETLINK: return (rtnl_newlink_to_linux(hdr, nlp, nw)); case NL_RTM_NEWADDR: case NL_RTM_DELADDR: return (rtnl_newaddr_to_linux(hdr, nlp, nw)); case NL_RTM_NEWROUTE: case NL_RTM_DELROUTE: return (rtnl_newroute_to_linux(hdr, nlp, nw)); case NL_RTM_NEWNEIGH: case NL_RTM_DELNEIGH: case NL_RTM_GETNEIGH: return (rtnl_newneigh_to_linux(hdr, nlp, nw)); default: RT_LOG(LOG_DEBUG, "[WARN] Passing message type %d untranslated", hdr->nlmsg_type); return (handle_default_out(hdr, nw)); } } static bool nlmsg_error_to_linux(struct nlmsghdr *hdr, struct nlpcb *nlp, struct nl_writer *nw) { if (!nlmsg_copy_header(hdr, nw)) return (false); struct nlmsgerr *nlerr; nlerr = nlmsg_copy_next_header(hdr, nw, struct nlmsgerr); nlerr->error = bsd_to_linux_errno(nlerr->error); int copied_len = sizeof(struct nlmsghdr) + sizeof(struct nlmsgerr); if (hdr->nlmsg_len == copied_len) { nlmsg_end(nw); return (true); } /* * CAP_ACK was not set. Original request needs to be translated. * XXX: implement translation of the original message */ RT_LOG(LOG_DEBUG, "[WARN] Passing ack message type %d untranslated", nlerr->msg.nlmsg_type); char *dst_payload, *src_payload; int copy_len = hdr->nlmsg_len - copied_len; dst_payload = nlmsg_reserve_data(nw, NLMSG_ALIGN(copy_len), char); src_payload = (char *)hdr + copied_len; memcpy(dst_payload, src_payload, copy_len); nlmsg_end(nw); return (true); } static bool nlmsg_to_linux(struct nlmsghdr *hdr, struct nlpcb *nlp, struct nl_writer *nw) { if (hdr->nlmsg_type < NLMSG_MIN_TYPE) { switch (hdr->nlmsg_type) { case NLMSG_ERROR: return (nlmsg_error_to_linux(hdr, nlp, nw)); case NLMSG_NOOP: case NLMSG_DONE: case NLMSG_OVERRUN: return (handle_default_out(hdr, nw)); default: RT_LOG(LOG_DEBUG, "[WARN] Passing message type %d untranslated", hdr->nlmsg_type); return (handle_default_out(hdr, nw)); } } switch (nlp->nl_proto) { case NETLINK_ROUTE: return (rtnl_to_linux(hdr, nlp, nw)); default: return (handle_default_out(hdr, nw)); } } static bool nlmsgs_to_linux(struct nl_writer *nw, struct nlpcb *nlp) { struct nl_buf *nb, *orig; u_int offset, msglen, orig_messages; RT_LOG(LOG_DEBUG3, "%p: in %u bytes %u messages", __func__, nw->buf->datalen, nw->num_messages); orig = nw->buf; nb = nl_buf_alloc(orig->datalen + SCRATCH_BUFFER_SIZE, M_NOWAIT); if (__predict_false(nb == NULL)) return (false); nw->buf = nb; orig_messages = nw->num_messages; nw->num_messages = 0; /* Assume correct headers. Buffer IS mutable */ for (offset = 0; offset + sizeof(struct nlmsghdr) <= orig->datalen; offset += msglen) { struct nlmsghdr *hdr = (struct nlmsghdr *)&orig->data[offset]; msglen = NLMSG_ALIGN(hdr->nlmsg_len); if (!nlmsg_to_linux(hdr, nlp, nw)) { RT_LOG(LOG_DEBUG, "failed to process msg type %d", hdr->nlmsg_type); nl_buf_free(nb); nw->buf = orig; nw->num_messages = orig_messages; return (false); } } MPASS(nw->num_messages == orig_messages); MPASS(nw->buf == nb); nl_buf_free(orig); RT_LOG(LOG_DEBUG3, "%p: out %u bytes", __func__, offset); return (true); } static struct linux_netlink_provider linux_netlink_v1 = { .msgs_to_linux = nlmsgs_to_linux, .msg_from_linux = nlmsg_from_linux, }; void linux_netlink_register(void) { linux_netlink_p = &linux_netlink_v1; } void linux_netlink_deregister(void) { linux_netlink_p = NULL; } diff --git a/sys/netlink/netlink_io.c b/sys/netlink/netlink_io.c index 61d9d657556a..2dd49d5e8eb5 100644 --- a/sys/netlink/netlink_io.c +++ b/sys/netlink/netlink_io.c @@ -1,368 +1,364 @@ /*- * SPDX-License-Identifier: BSD-2-Clause * * Copyright (c) 2021 Ng Peng Nam Sean * Copyright (c) 2022 Alexander V. Chernikov * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #include #include #include #include #include #include #include #include #include #include #include #include #include #define DEBUG_MOD_NAME nl_io #define DEBUG_MAX_LEVEL LOG_DEBUG3 #include _DECLARE_DEBUG(LOG_INFO); /* * The logic below provide a p2p interface for receiving and * sending netlink data between the kernel and userland. */ static bool nl_process_nbuf(struct nl_buf *nb, struct nlpcb *nlp); struct nl_buf * nl_buf_alloc(size_t len, int mflag) { struct nl_buf *nb; nb = malloc(sizeof(struct nl_buf) + len, M_NETLINK, mflag); if (__predict_true(nb != NULL)) { nb->buflen = len; nb->datalen = nb->offset = 0; } return (nb); } void nl_buf_free(struct nl_buf *nb) { free(nb, M_NETLINK); } void nl_schedule_taskqueue(struct nlpcb *nlp) { if (!nlp->nl_task_pending) { nlp->nl_task_pending = true; taskqueue_enqueue(nlp->nl_taskqueue, &nlp->nl_task); NL_LOG(LOG_DEBUG3, "taskqueue scheduled"); } else { NL_LOG(LOG_DEBUG3, "taskqueue schedule skipped"); } } static bool nl_process_received_one(struct nlpcb *nlp) { struct socket *so = nlp->nl_socket; struct sockbuf *sb; struct nl_buf *nb; bool reschedule = false; NLP_LOCK(nlp); nlp->nl_task_pending = false; NLP_UNLOCK(nlp); /* * Do not process queued up requests if there is no space to queue * replies. */ sb = &so->so_rcv; SOCK_RECVBUF_LOCK(so); if (sb->sb_hiwat <= sb->sb_ccc) { SOCK_RECVBUF_UNLOCK(so); return (false); } SOCK_RECVBUF_UNLOCK(so); sb = &so->so_snd; SOCK_SENDBUF_LOCK(so); while ((nb = TAILQ_FIRST(&sb->nl_queue)) != NULL) { TAILQ_REMOVE(&sb->nl_queue, nb, tailq); SOCK_SENDBUF_UNLOCK(so); reschedule = nl_process_nbuf(nb, nlp); SOCK_SENDBUF_LOCK(so); if (reschedule) { sb->sb_acc -= nb->datalen; sb->sb_ccc -= nb->datalen; /* XXXGL: potentially can reduce lock&unlock count. */ sowwakeup_locked(so); nl_buf_free(nb); SOCK_SENDBUF_LOCK(so); } else { TAILQ_INSERT_HEAD(&sb->nl_queue, nb, tailq); break; } } SOCK_SENDBUF_UNLOCK(so); return (reschedule); } static void nl_process_received(struct nlpcb *nlp) { NL_LOG(LOG_DEBUG3, "taskqueue called"); if (__predict_false(nlp->nl_need_thread_setup)) { nl_set_thread_nlp(curthread, nlp); NLP_LOCK(nlp); nlp->nl_need_thread_setup = false; NLP_UNLOCK(nlp); } while (nl_process_received_one(nlp)) ; } /* * Called after some data have been read from the socket. */ void nl_on_transmit(struct nlpcb *nlp) { NLP_LOCK(nlp); struct socket *so = nlp->nl_socket; if (__predict_false(nlp->nl_dropped_bytes > 0 && so != NULL)) { unsigned long dropped_bytes = nlp->nl_dropped_bytes; unsigned long dropped_messages = nlp->nl_dropped_messages; nlp->nl_dropped_bytes = 0; nlp->nl_dropped_messages = 0; struct sockbuf *sb = &so->so_rcv; NLP_LOG(LOG_DEBUG, nlp, "socket RX overflowed, %lu messages (%lu bytes) dropped. " "bytes: [%u/%u]", dropped_messages, dropped_bytes, sb->sb_ccc, sb->sb_hiwat); /* TODO: send netlink message */ } nl_schedule_taskqueue(nlp); NLP_UNLOCK(nlp); } void nl_taskqueue_handler(void *_arg, int pending) { struct nlpcb *nlp = (struct nlpcb *)_arg; CURVNET_SET(nlp->nl_socket->so_vnet); nl_process_received(nlp); CURVNET_RESTORE(); } /* * Tries to send current data buffer from writer. * * Returns true on success. * If no queue overrunes happened, wakes up socket owner. */ bool nl_send(struct nl_writer *nw, struct nlpcb *nlp) { struct socket *so = nlp->nl_socket; struct sockbuf *sb = &so->so_rcv; struct nl_buf *nb; MPASS(nw->hdr == NULL); MPASS(nw->buf != NULL); MPASS(nw->buf->datalen > 0); IF_DEBUG_LEVEL(LOG_DEBUG2) { struct nlmsghdr *hdr = (struct nlmsghdr *)nw->buf->data; NLP_LOG(LOG_DEBUG2, nlp, "TX len %u msgs %u msg type %d first hdrlen %u", nw->buf->datalen, nw->num_messages, hdr->nlmsg_type, hdr->nlmsg_len); } if (nlp->nl_linux && linux_netlink_p != NULL && __predict_false(!linux_netlink_p->msgs_to_linux(nw, nlp))) { nl_buf_free(nw->buf); nw->buf = NULL; return (false); } nb = nw->buf; nw->buf = NULL; SOCK_RECVBUF_LOCK(so); if (!nw->ignore_limit && __predict_false(sb->sb_hiwat <= sb->sb_ccc)) { SOCK_RECVBUF_UNLOCK(so); NLP_LOCK(nlp); nlp->nl_dropped_bytes += nb->datalen; nlp->nl_dropped_messages += nw->num_messages; NLP_LOG(LOG_DEBUG2, nlp, "RX oveflow: %lu m (+%d), %lu b (+%d)", (unsigned long)nlp->nl_dropped_messages, nw->num_messages, (unsigned long)nlp->nl_dropped_bytes, nb->datalen); NLP_UNLOCK(nlp); nl_buf_free(nb); return (false); } else { bool full; TAILQ_INSERT_TAIL(&sb->nl_queue, nb, tailq); sb->sb_acc += nb->datalen; sb->sb_ccc += nb->datalen; full = sb->sb_hiwat <= sb->sb_ccc; sorwakeup_locked(so); if (full) { NLP_LOCK(nlp); nlp->nl_tx_blocked = true; NLP_UNLOCK(nlp); } return (true); } } static int nl_receive_message(struct nlmsghdr *hdr, int remaining_length, struct nlpcb *nlp, struct nl_pstate *npt) { nl_handler_f handler = nl_handlers[nlp->nl_proto].cb; int error = 0; NLP_LOG(LOG_DEBUG2, nlp, "msg len: %u type: %d: flags: 0x%X seq: %u pid: %u", hdr->nlmsg_len, hdr->nlmsg_type, hdr->nlmsg_flags, hdr->nlmsg_seq, hdr->nlmsg_pid); if (__predict_false(hdr->nlmsg_len > remaining_length)) { NLP_LOG(LOG_DEBUG, nlp, "message is not entirely present: want %d got %d", hdr->nlmsg_len, remaining_length); return (EINVAL); } else if (__predict_false(hdr->nlmsg_len < sizeof(*hdr))) { NL_LOG(LOG_DEBUG, "message too short: %d", hdr->nlmsg_len); return (EINVAL); } /* Stamp each message with sender pid */ hdr->nlmsg_pid = nlp->nl_port; npt->hdr = hdr; - if (hdr->nlmsg_flags & NLM_F_REQUEST && hdr->nlmsg_type >= NLMSG_MIN_TYPE) { + if (hdr->nlmsg_flags & NLM_F_REQUEST && + hdr->nlmsg_type >= NLMSG_MIN_TYPE) { NL_LOG(LOG_DEBUG2, "handling message with msg type: %d", hdr->nlmsg_type); - - if (nlp->nl_linux && linux_netlink_p != NULL) { - struct nlmsghdr *hdr_orig = hdr; - hdr = linux_netlink_p->msg_from_linux(nlp->nl_proto, hdr, npt); - if (hdr == NULL) { - /* Failed to translate to kernel format. Report an error back */ - hdr = hdr_orig; - npt->hdr = hdr; - if (hdr->nlmsg_flags & NLM_F_ACK) - nlmsg_ack(nlp, EOPNOTSUPP, hdr, npt); - return (0); - } + if (nlp->nl_linux) { + MPASS(linux_netlink_p != NULL); + error = linux_netlink_p->msg_from_linux(nlp->nl_proto, + &hdr, npt); + if (error) + goto ack; } error = handler(hdr, npt); NL_LOG(LOG_DEBUG2, "retcode: %d", error); } +ack: if ((hdr->nlmsg_flags & NLM_F_ACK) || (error != 0 && error != EINTR)) { if (!npt->nw->suppress_ack) { NL_LOG(LOG_DEBUG3, "ack"); nlmsg_ack(nlp, error, hdr, npt); } } return (0); } static void npt_clear(struct nl_pstate *npt) { lb_clear(&npt->lb); npt->error = 0; npt->err_msg = NULL; npt->err_off = 0; npt->hdr = NULL; npt->nw->suppress_ack = false; } /* * Processes an incoming packet, which can contain multiple netlink messages */ static bool nl_process_nbuf(struct nl_buf *nb, struct nlpcb *nlp) { struct nlmsghdr *hdr; int error; NL_LOG(LOG_DEBUG3, "RX netlink buf %p on %p", nb, nlp->nl_socket); struct nl_writer nw = {}; if (!nlmsg_get_unicast_writer(&nw, NLMSG_SMALL, nlp)) { NL_LOG(LOG_DEBUG, "error allocating socket writer"); return (true); } nlmsg_ignore_limit(&nw); struct nl_pstate npt = { .nlp = nlp, .lb.base = &nb->data[roundup2(nb->datalen, 8)], .lb.size = nb->buflen - roundup2(nb->datalen, 8), .nw = &nw, .strict = nlp->nl_flags & NLF_STRICT, }; for (; nb->offset + sizeof(struct nlmsghdr) <= nb->datalen;) { hdr = (struct nlmsghdr *)&nb->data[nb->offset]; /* Save length prior to calling handler */ int msglen = NLMSG_ALIGN(hdr->nlmsg_len); NL_LOG(LOG_DEBUG3, "parsing offset %d/%d", nb->offset, nb->datalen); npt_clear(&npt); error = nl_receive_message(hdr, nb->datalen - nb->offset, nlp, &npt); nb->offset += msglen; if (__predict_false(error != 0 || nlp->nl_tx_blocked)) break; } NL_LOG(LOG_DEBUG3, "packet parsing done"); nlmsg_flush(&nw); if (nlp->nl_tx_blocked) { NLP_LOCK(nlp); nlp->nl_tx_blocked = false; NLP_UNLOCK(nlp); return (false); } else return (true); } diff --git a/sys/netlink/netlink_linux.h b/sys/netlink/netlink_linux.h index 2d9f8d1b7bd6..d4c451d470b2 100644 --- a/sys/netlink/netlink_linux.h +++ b/sys/netlink/netlink_linux.h @@ -1,53 +1,53 @@ /*- * SPDX-License-Identifier: BSD-2-Clause * * Copyright (c) 2022 Alexander V. Chernikov * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #ifndef _NETLINK_LINUX_VAR_H_ #define _NETLINK_LINUX_VAR_H_ #ifdef _KERNEL /* * The file contains headers for the bridge interface between * linux[_common] module and the netlink module */ struct nlpcb; struct nl_pstate; struct nl_writer; typedef bool msgs_to_linux_cb_t(struct nl_writer *nw, struct nlpcb *nlp); -typedef struct nlmsghdr *msg_from_linux_cb_t(int netlink_family, struct nlmsghdr *hdr, +typedef int msg_from_linux_cb_t(int netlink_family, struct nlmsghdr **hdr, struct nl_pstate *npt); struct linux_netlink_provider { msgs_to_linux_cb_t *msgs_to_linux; msg_from_linux_cb_t *msg_from_linux; }; extern struct linux_netlink_provider *linux_netlink_p; #endif #endif