diff --git a/sys/netlink/netlink_domain.c b/sys/netlink/netlink_domain.c index b0ff84401c84..f35cc7f09991 100644 --- a/sys/netlink/netlink_domain.c +++ b/sys/netlink/netlink_domain.c @@ -1,728 +1,728 @@ /*- * SPDX-License-Identifier: BSD-2-Clause-FreeBSD * * Copyright (c) 2021 Ng Peng Nam Sean * Copyright (c) 2022 Alexander V. Chernikov * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ /* * This file contains socket and protocol bindings for netlink. */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include /* priv_check */ #include #include #include #define DEBUG_MOD_NAME nl_domain #define DEBUG_MAX_LEVEL LOG_DEBUG3 #include _DECLARE_DEBUG(LOG_DEBUG); _Static_assert((NLP_MAX_GROUPS % 64) == 0, "NLP_MAX_GROUPS has to be multiple of 64"); _Static_assert(NLP_MAX_GROUPS >= 64, "NLP_MAX_GROUPS has to be at least 64"); #define NLCTL_TRACKER struct rm_priotracker nl_tracker #define NLCTL_RLOCK(_ctl) rm_rlock(&((_ctl)->ctl_lock), &nl_tracker) #define NLCTL_RUNLOCK(_ctl) rm_runlock(&((_ctl)->ctl_lock), &nl_tracker) #define NLCTL_WLOCK(_ctl) rm_wlock(&((_ctl)->ctl_lock)) #define NLCTL_WUNLOCK(_ctl) rm_wunlock(&((_ctl)->ctl_lock)) static u_long nl_sendspace = NLSNDQ; SYSCTL_ULONG(_net_netlink, OID_AUTO, sendspace, CTLFLAG_RW, &nl_sendspace, 0, "Default netlink socket send space"); static u_long nl_recvspace = NLSNDQ; SYSCTL_ULONG(_net_netlink, OID_AUTO, recvspace, CTLFLAG_RW, &nl_recvspace, 0, "Default netlink socket receive space"); extern u_long sb_max_adj; static u_long nl_maxsockbuf = 512 * 1024 * 1024; /* 512M, XXX: init based on physmem */ uint32_t nlp_get_pid(const struct nlpcb *nlp) { return (nlp->nl_process_id); } /* * Looks up a nlpcb struct based on the @portid. Need to claim nlsock_mtx. * Returns nlpcb pointer if present else NULL */ static struct nlpcb * nl_port_lookup(uint32_t port_id) { struct nlpcb *nlp; CK_LIST_FOREACH(nlp, &V_nl_ctl->ctl_port_head, nl_port_next) { if (nlp->nl_port == port_id) return (nlp); } return (NULL); } static void nl_add_group_locked(struct nlpcb *nlp, unsigned int group_id) { MPASS(group_id <= NLP_MAX_GROUPS); --group_id; nlp->nl_groups[group_id / 64] |= (uint64_t)1 << (group_id % 64); } static void nl_del_group_locked(struct nlpcb *nlp, unsigned int group_id) { MPASS(group_id <= NLP_MAX_GROUPS); --group_id; nlp->nl_groups[group_id / 64] &= ~((uint64_t)1 << (group_id % 64)); } static bool nl_isset_group_locked(struct nlpcb *nlp, unsigned int group_id) { MPASS(group_id <= NLP_MAX_GROUPS); --group_id; return (nlp->nl_groups[group_id / 64] & ((uint64_t)1 << (group_id % 64))); } static uint32_t nl_get_groups_compat(struct nlpcb *nlp) { uint32_t groups_mask = 0; for (int i = 0; i < 32; i++) { if (nl_isset_group_locked(nlp, i + 1)) groups_mask |= (1 << i); } return (groups_mask); } /* * Broadcasts message @m to the protocol @proto group specified by @group_id */ void nl_send_group(struct mbuf *m, int num_messages, int proto, int group_id) { struct nlpcb *nlp_last = NULL; struct nlpcb *nlp; NLCTL_TRACKER; IF_DEBUG_LEVEL(LOG_DEBUG2) { struct nlmsghdr *hdr = mtod(m, struct nlmsghdr *); NL_LOG(LOG_DEBUG2, "MCAST mbuf len %u msg type %d len %u to group %d/%d", m->m_len, hdr->nlmsg_type, hdr->nlmsg_len, proto, group_id); } struct nl_control *ctl = atomic_load_ptr(&V_nl_ctl); if (__predict_false(ctl == NULL)) { /* * Can be the case when notification is sent within VNET * which doesn't have any netlink sockets. */ m_freem(m); return; } NLCTL_RLOCK(ctl); int io_flags = NL_IOF_UNTRANSLATED; CK_LIST_FOREACH(nlp, &ctl->ctl_pcb_head, nl_next) { if (nl_isset_group_locked(nlp, group_id) && nlp->nl_proto == proto) { if (nlp_last != NULL) { struct mbuf *m_copy; m_copy = m_copym(m, 0, M_COPYALL, M_NOWAIT); if (m_copy != NULL) nl_send_one(m_copy, nlp_last, num_messages, io_flags); else { NLP_LOCK(nlp_last); if (nlp_last->nl_socket != NULL) sorwakeup(nlp_last->nl_socket); NLP_UNLOCK(nlp_last); } } nlp_last = nlp; } } if (nlp_last != NULL) nl_send_one(m, nlp_last, num_messages, io_flags); else m_freem(m); NLCTL_RUNLOCK(ctl); } bool nl_has_listeners(int netlink_family, uint32_t groups_mask) { return (V_nl_ctl != NULL); } bool nlp_has_priv(struct nlpcb *nlp, int priv) { return (priv_check_cred(nlp->nl_cred, priv) == 0); } static uint32_t nl_find_port(void) { /* * app can open multiple netlink sockets. * Start with current pid, if already taken, * try random numbers in 65k..256k+65k space, * avoiding clash with pids. */ if (nl_port_lookup(curproc->p_pid) == NULL) return (curproc->p_pid); for (int i = 0; i < 16; i++) { uint32_t nl_port = (arc4random() % 65536) + 65536 * 4; if (nl_port_lookup(nl_port) == 0) return (nl_port); NL_LOG(LOG_DEBUG3, "tried %u\n", nl_port); } return (curproc->p_pid); } static int nl_bind_locked(struct nlpcb *nlp, struct sockaddr_nl *snl) { if (nlp->nl_bound) { if (nlp->nl_port != snl->nl_pid) { NL_LOG(LOG_DEBUG, "bind() failed: program pid %d " "is different from provided pid %d", nlp->nl_port, snl->nl_pid); return (EINVAL); // XXX: better error } } else { if (snl->nl_pid == 0) snl->nl_pid = nl_find_port(); if (nl_port_lookup(snl->nl_pid) != NULL) return (EADDRINUSE); nlp->nl_port = snl->nl_pid; nlp->nl_bound = true; CK_LIST_INSERT_HEAD(&V_nl_ctl->ctl_port_head, nlp, nl_port_next); } for (int i = 0; i < 32; i++) { if (snl->nl_groups & ((uint32_t)1 << i)) nl_add_group_locked(nlp, i + 1); else nl_del_group_locked(nlp, i + 1); } return (0); } static int nl_pru_attach(struct socket *so, int proto, struct thread *td) { struct nlpcb *nlp; int error; if (__predict_false(netlink_unloading != 0)) return (EAFNOSUPPORT); error = nl_verify_proto(proto); if (error != 0) return (error); bool is_linux = SV_PROC_ABI(td->td_proc) == SV_ABI_LINUX; NL_LOG(LOG_DEBUG2, "socket %p, %sPID %d: attaching socket to %s", so, is_linux ? "(linux) " : "", curproc->p_pid, nl_get_proto_name(proto)); /* Create per-VNET state on first socket init */ struct nl_control *ctl = atomic_load_ptr(&V_nl_ctl); if (ctl == NULL) ctl = vnet_nl_ctl_init(); KASSERT(V_nl_ctl != NULL, ("nl_attach: vnet_sock_init() failed")); MPASS(sotonlpcb(so) == NULL); nlp = malloc(sizeof(struct nlpcb), M_PCB, M_WAITOK | M_ZERO); error = soreserve(so, nl_sendspace, nl_recvspace); if (error != 0) { free(nlp, M_PCB); return (error); } so->so_pcb = nlp; nlp->nl_socket = so; /* Copy so_cred to avoid having socket_var.h in every header */ nlp->nl_cred = so->so_cred; nlp->nl_proto = proto; nlp->nl_process_id = curproc->p_pid; nlp->nl_linux = is_linux; nlp->nl_active = true; NLP_LOCK_INIT(nlp); refcount_init(&nlp->nl_refcount, 1); nl_init_io(nlp); nlp->nl_taskqueue = taskqueue_create("netlink_socket", M_WAITOK, taskqueue_thread_enqueue, &nlp->nl_taskqueue); TASK_INIT(&nlp->nl_task, 0, nl_taskqueue_handler, nlp); taskqueue_start_threads(&nlp->nl_taskqueue, 1, PWAIT, "netlink_socket (PID %u)", nlp->nl_process_id); NLCTL_WLOCK(ctl); /* XXX: check ctl is still alive */ CK_LIST_INSERT_HEAD(&ctl->ctl_pcb_head, nlp, nl_next); NLCTL_WUNLOCK(ctl); soisconnected(so); return (0); } static void nl_pru_abort(struct socket *so) { NL_LOG(LOG_DEBUG3, "socket %p, PID %d", so, curproc->p_pid); MPASS(sotonlpcb(so) != NULL); soisdisconnected(so); } static int nl_pru_bind(struct socket *so, struct sockaddr *sa, struct thread *td) { struct nl_control *ctl = atomic_load_ptr(&V_nl_ctl); struct nlpcb *nlp = sotonlpcb(so); struct sockaddr_nl *snl = (struct sockaddr_nl *)sa; int error; NL_LOG(LOG_DEBUG3, "socket %p, PID %d", so, curproc->p_pid); if (snl->nl_len != sizeof(*snl)) { NL_LOG(LOG_DEBUG, "socket %p, wrong sizeof(), ignoring bind()", so); return (EINVAL); } NLCTL_WLOCK(ctl); NLP_LOCK(nlp); error = nl_bind_locked(nlp, snl); NLP_UNLOCK(nlp); NLCTL_WUNLOCK(ctl); NL_LOG(LOG_DEBUG2, "socket %p, bind() to %u, groups %u, error %d", so, snl->nl_pid, snl->nl_groups, error); return (error); } static int nl_assign_port(struct nlpcb *nlp, uint32_t port_id) { struct nl_control *ctl = atomic_load_ptr(&V_nl_ctl); struct sockaddr_nl snl = { .nl_pid = port_id, }; int error; NLCTL_WLOCK(ctl); NLP_LOCK(nlp); snl.nl_groups = nl_get_groups_compat(nlp); error = nl_bind_locked(nlp, &snl); NLP_UNLOCK(nlp); NLCTL_WUNLOCK(ctl); NL_LOG(LOG_DEBUG3, "socket %p, port assign: %d, error: %d", nlp->nl_socket, port_id, error); return (error); } /* * nl_autobind_port binds a unused portid to @nlp * @nlp: pcb data for the netlink socket * @candidate_id: first id to consider */ static int nl_autobind_port(struct nlpcb *nlp, uint32_t candidate_id) { struct nl_control *ctl = atomic_load_ptr(&V_nl_ctl); uint32_t port_id = candidate_id; NLCTL_TRACKER; bool exist; int error; for (int i = 0; i < 10; i++) { NL_LOG(LOG_DEBUG3, "socket %p, trying to assign port %d", nlp->nl_socket, port_id); NLCTL_RLOCK(ctl); exist = nl_port_lookup(port_id) != 0; NLCTL_RUNLOCK(ctl); if (!exist) { error = nl_assign_port(nlp, port_id); if (error != EADDRINUSE) break; } port_id++; } NL_LOG(LOG_DEBUG3, "socket %p, autobind to %d, error: %d", nlp->nl_socket, port_id, error); return (error); } static int nl_pru_connect(struct socket *so, struct sockaddr *sa, struct thread *td) { struct sockaddr_nl *snl = (struct sockaddr_nl *)sa; struct nlpcb *nlp; NL_LOG(LOG_DEBUG3, "socket %p, PID %d", so, curproc->p_pid); if (snl->nl_len != sizeof(*snl)) { NL_LOG(LOG_DEBUG, "socket %p, wrong sizeof(), ignoring bind()", so); return (EINVAL); } nlp = sotonlpcb(so); if (!nlp->nl_bound) { int error = nl_autobind_port(nlp, td->td_proc->p_pid); if (error != 0) { NL_LOG(LOG_DEBUG, "socket %p, nl_autobind() failed: %d", so, error); return (error); } } /* XXX: Handle socket flags & multicast */ soisconnected(so); NL_LOG(LOG_DEBUG2, "socket %p, connect to %u", so, snl->nl_pid); return (0); } static void destroy_nlpcb(struct nlpcb *nlp) { NLP_LOCK(nlp); nl_free_io(nlp); NLP_LOCK_DESTROY(nlp); free(nlp, M_PCB); } static void destroy_nlpcb_epoch(epoch_context_t ctx) { struct nlpcb *nlp; nlp = __containerof(ctx, struct nlpcb, nl_epoch_ctx); destroy_nlpcb(nlp); } static void nl_pru_detach(struct socket *so) { struct nl_control *ctl = atomic_load_ptr(&V_nl_ctl); MPASS(sotonlpcb(so) != NULL); struct nlpcb *nlp; NL_LOG(LOG_DEBUG2, "detaching socket %p, PID %d", so, curproc->p_pid); nlp = sotonlpcb(so); /* Mark as inactive so no new work can be enqueued */ NLP_LOCK(nlp); bool was_bound = nlp->nl_bound; nlp->nl_active = false; NLP_UNLOCK(nlp); /* Wait till all scheduled work has been completed */ taskqueue_drain_all(nlp->nl_taskqueue); taskqueue_free(nlp->nl_taskqueue); NLCTL_WLOCK(ctl); NLP_LOCK(nlp); if (was_bound) { CK_LIST_REMOVE(nlp, nl_port_next); NL_LOG(LOG_DEBUG3, "socket %p, unlinking bound pid %u", so, nlp->nl_port); } CK_LIST_REMOVE(nlp, nl_next); nlp->nl_socket = NULL; NLP_UNLOCK(nlp); NLCTL_WUNLOCK(ctl); so->so_pcb = NULL; NL_LOG(LOG_DEBUG3, "socket %p, detached", so); /* XXX: is delayed free needed? */ - epoch_call(net_epoch_preempt, destroy_nlpcb_epoch, &nlp->nl_epoch_ctx); + NET_EPOCH_CALL(destroy_nlpcb_epoch, &nlp->nl_epoch_ctx); } static int nl_pru_disconnect(struct socket *so) { NL_LOG(LOG_DEBUG3, "socket %p, PID %d", so, curproc->p_pid); MPASS(sotonlpcb(so) != NULL); return (ENOTCONN); } static int nl_pru_peeraddr(struct socket *so, struct sockaddr **sa) { NL_LOG(LOG_DEBUG3, "socket %p, PID %d", so, curproc->p_pid); MPASS(sotonlpcb(so) != NULL); return (ENOTCONN); } static int nl_pru_shutdown(struct socket *so) { NL_LOG(LOG_DEBUG3, "socket %p, PID %d", so, curproc->p_pid); MPASS(sotonlpcb(so) != NULL); socantsendmore(so); return (0); } static int nl_pru_sockaddr(struct socket *so, struct sockaddr **sa) { struct sockaddr_nl *snl; snl = malloc(sizeof(struct sockaddr_nl), M_SONAME, M_WAITOK | M_ZERO); /* TODO: set other fields */ snl->nl_len = sizeof(struct sockaddr_nl); snl->nl_family = AF_NETLINK; snl->nl_pid = sotonlpcb(so)->nl_port; *sa = (struct sockaddr *)snl; return (0); } static void nl_pru_close(struct socket *so) { NL_LOG(LOG_DEBUG3, "socket %p, PID %d", so, curproc->p_pid); MPASS(sotonlpcb(so) != NULL); soisdisconnected(so); } static int nl_pru_output(struct mbuf *m, struct socket *so, ...) { if (__predict_false(m == NULL || ((m->m_len < sizeof(struct nlmsghdr)) && (m = m_pullup(m, sizeof(struct nlmsghdr))) == NULL))) return (ENOBUFS); MPASS((m->m_flags & M_PKTHDR) != 0); NL_LOG(LOG_DEBUG3, "sending message to kernel async processing"); nl_receive_async(m, so); return (0); } static int nl_pru_send(struct socket *so, int flags, struct mbuf *m, struct sockaddr *sa, struct mbuf *control, struct thread *td) { NL_LOG(LOG_DEBUG2, "sending message to kernel"); if (__predict_false(control != NULL)) { if (control->m_len) { m_freem(control); return (EINVAL); } m_freem(control); } return (nl_pru_output(m, so)); } static int nl_pru_rcvd(struct socket *so, int flags) { NL_LOG(LOG_DEBUG3, "socket %p, PID %d", so, curproc->p_pid); MPASS(sotonlpcb(so) != NULL); nl_on_transmit(sotonlpcb(so)); return (0); } static int nl_getoptflag(int sopt_name) { switch (sopt_name) { case NETLINK_CAP_ACK: return (NLF_CAP_ACK); case NETLINK_EXT_ACK: return (NLF_EXT_ACK); case NETLINK_GET_STRICT_CHK: return (NLF_STRICT); } return (0); } static int nl_ctloutput(struct socket *so, struct sockopt *sopt) { struct nl_control *ctl = atomic_load_ptr(&V_nl_ctl); struct nlpcb *nlp = sotonlpcb(so); uint32_t flag; int optval, error = 0; NLCTL_TRACKER; NL_LOG(LOG_DEBUG2, "%ssockopt(%p, %d)", (sopt->sopt_dir) ? "set" : "get", so, sopt->sopt_name); switch (sopt->sopt_dir) { case SOPT_SET: switch (sopt->sopt_name) { case NETLINK_ADD_MEMBERSHIP: case NETLINK_DROP_MEMBERSHIP: sooptcopyin(sopt, &optval, sizeof(optval), sizeof(optval)); if (optval <= 0 || optval >= NLP_MAX_GROUPS) { error = ERANGE; break; } NL_LOG(LOG_DEBUG2, "ADD/DEL group %d", (uint32_t)optval); NLCTL_WLOCK(ctl); if (sopt->sopt_name == NETLINK_ADD_MEMBERSHIP) nl_add_group_locked(nlp, optval); else nl_del_group_locked(nlp, optval); NLCTL_WUNLOCK(ctl); break; case NETLINK_CAP_ACK: case NETLINK_EXT_ACK: case NETLINK_GET_STRICT_CHK: sooptcopyin(sopt, &optval, sizeof(optval), sizeof(optval)); flag = nl_getoptflag(sopt->sopt_name); NLCTL_WLOCK(ctl); if (optval != 0) nlp->nl_flags |= flag; else nlp->nl_flags &= ~flag; NLCTL_WUNLOCK(ctl); break; default: error = ENOPROTOOPT; } break; case SOPT_GET: switch (sopt->sopt_name) { case NETLINK_LIST_MEMBERSHIPS: NLCTL_RLOCK(ctl); optval = nl_get_groups_compat(nlp); NLCTL_RUNLOCK(ctl); error = sooptcopyout(sopt, &optval, sizeof(optval)); break; case NETLINK_CAP_ACK: case NETLINK_EXT_ACK: case NETLINK_GET_STRICT_CHK: NLCTL_RLOCK(ctl); optval = (nlp->nl_flags & nl_getoptflag(sopt->sopt_name)) != 0; NLCTL_RUNLOCK(ctl); error = sooptcopyout(sopt, &optval, sizeof(optval)); break; default: error = ENOPROTOOPT; } break; default: error = ENOPROTOOPT; } return (error); } static int nl_setsbopt(struct socket *so, struct sockopt *sopt) { int error, optval; bool result; if (sopt->sopt_name != SO_RCVBUF) return (sbsetopt(so, sopt)); /* Allow to override max buffer size in certain conditions */ error = sooptcopyin(sopt, &optval, sizeof optval, sizeof optval); if (error != 0) return (error); NL_LOG(LOG_DEBUG2, "socket %p, PID %d, SO_RCVBUF=%d", so, curproc->p_pid, optval); if (optval > sb_max_adj) { if (priv_check(curthread, PRIV_NET_ROUTE) != 0) return (EPERM); } SOCK_RECVBUF_LOCK(so); result = sbreserve_locked_limit(so, SO_RCV, optval, nl_maxsockbuf, curthread); SOCK_RECVBUF_UNLOCK(so); return (result ? 0 : ENOBUFS); } static struct protosw netlinksw = { .pr_type = SOCK_RAW, .pr_flags = PR_ATOMIC | PR_ADDR | PR_WANTRCVD, .pr_ctloutput = nl_ctloutput, .pr_setsbopt = nl_setsbopt, .pr_abort = nl_pru_abort, .pr_attach = nl_pru_attach, .pr_bind = nl_pru_bind, .pr_connect = nl_pru_connect, .pr_detach = nl_pru_detach, .pr_disconnect = nl_pru_disconnect, .pr_peeraddr = nl_pru_peeraddr, .pr_send = nl_pru_send, .pr_rcvd = nl_pru_rcvd, .pr_shutdown = nl_pru_shutdown, .pr_sockaddr = nl_pru_sockaddr, .pr_close = nl_pru_close }; static struct domain netlinkdomain = { .dom_family = PF_NETLINK, .dom_name = "netlink", .dom_flags = DOMF_UNLOADABLE, .dom_nprotosw = 1, .dom_protosw = { &netlinksw }, }; DOMAIN_SET(netlink); diff --git a/sys/netlink/netlink_generic.c b/sys/netlink/netlink_generic.c index 267024dfee4b..028ea24fc5e9 100644 --- a/sys/netlink/netlink_generic.c +++ b/sys/netlink/netlink_generic.c @@ -1,518 +1,518 @@ /*- * SPDX-License-Identifier: BSD-2-Clause-FreeBSD * * Copyright (c) 2022 Alexander V. Chernikov * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); #include #include #include #include #include #include #include #include #include #include #include #include #define DEBUG_MOD_NAME nl_generic #define DEBUG_MAX_LEVEL LOG_DEBUG3 #include _DECLARE_DEBUG(LOG_DEBUG3); #define MAX_FAMILIES 20 #define MAX_GROUPS 64 #define MIN_GROUP_NUM 48 static struct sx sx_lock; #define GENL_LOCK_INIT() sx_init(&sx_lock, "genetlink lock") #define GENL_LOCK_DESTROY() sx_destroy(&sx_lock) #define GENL_LOCK() sx_xlock(&sx_lock) #define GENL_UNLOCK() sx_xunlock(&sx_lock) struct genl_family { const char *family_name; uint16_t family_hdrsize; uint16_t family_id; uint16_t family_version; uint16_t family_attr_max; uint16_t family_cmd_size; uint16_t family_num_groups; struct genl_cmd *family_cmds; }; static struct genl_family families[MAX_FAMILIES]; struct genl_group { struct genl_family *group_family; const char *group_name; }; static struct genl_group groups[MAX_GROUPS]; static int dump_family(struct nlmsghdr *hdr, struct genlmsghdr *ghdr, const struct genl_family *gf, struct nl_writer *nw); static void nlctrl_notify(const struct genl_family *gf, int action); static struct genl_family * find_family(const char *family_name) { for (int i = 0; i < MAX_FAMILIES; i++) { struct genl_family *gf = &families[i]; if (gf->family_name != NULL && !strcmp(gf->family_name, family_name)) return (gf); } return (NULL); } uint32_t genl_register_family(const char *family_name, size_t hdrsize, int family_version, int max_attr_idx) { uint32_t family_id = 0; MPASS(family_name != NULL); if (find_family(family_name) != NULL) return (0); GENL_LOCK(); for (int i = 0; i < MAX_FAMILIES; i++) { struct genl_family *gf = &families[i]; if (gf->family_name == NULL) { gf->family_name = family_name; gf->family_version = family_version; gf->family_hdrsize = hdrsize; gf->family_attr_max = max_attr_idx; gf->family_id = i + GENL_MIN_ID; NL_LOG(LOG_DEBUG2, "Registered family %s id %d", gf->family_name, gf->family_id); family_id = gf->family_id; nlctrl_notify(gf, CTRL_CMD_NEWFAMILY); break; } } GENL_UNLOCK(); return (family_id); } static void free_family(struct genl_family *gf) { if (gf->family_cmds != NULL) free(gf->family_cmds, M_NETLINK); } /* * unregister groups of a given family */ static void unregister_groups(const struct genl_family *gf) { for (int i = 0; i < MAX_GROUPS; i++) { struct genl_group *gg = &groups[i]; if (gg->group_family == gf && gg->group_name != NULL) { gg->group_family = NULL; gg->group_name = NULL; } } } /* * Can sleep, I guess */ bool genl_unregister_family(const char *family_name) { bool found = false; GENL_LOCK(); struct genl_family *gf = find_family(family_name); nlctrl_notify(gf, CTRL_CMD_DELFAMILY); if (gf != NULL) { found = true; unregister_groups(gf); /* TODO: zero pointer first */ free_family(gf); bzero(gf, sizeof(*gf)); } GENL_UNLOCK(); return (found); } bool genl_register_cmds(const char *family_name, const struct genl_cmd *cmds, int count) { GENL_LOCK(); struct genl_family *gf = find_family(family_name); if (gf == NULL) { GENL_UNLOCK(); return (false); } int cmd_size = gf->family_cmd_size; for (int i = 0; i < count; i++) { MPASS(cmds[i].cmd_cb != NULL); if (cmds[i].cmd_num >= cmd_size) cmd_size = cmds[i].cmd_num + 1; } if (cmd_size > gf->family_cmd_size) { /* need to realloc */ size_t sz = cmd_size * sizeof(struct genl_cmd); void *data = malloc(sz, M_NETLINK, M_WAITOK | M_ZERO); memcpy(data, gf->family_cmds, gf->family_cmd_size * sizeof(struct genl_cmd)); void *old_data = gf->family_cmds; gf->family_cmds = data; gf->family_cmd_size = cmd_size; free(old_data, M_NETLINK); } for (int i = 0; i < count; i++) { const struct genl_cmd *cmd = &cmds[i]; MPASS(gf->family_cmds[cmd->cmd_num].cmd_cb == NULL); gf->family_cmds[cmd->cmd_num] = cmds[i]; NL_LOG(LOG_DEBUG2, "Adding cmd %s(%d) to family %s", cmd->cmd_name, cmd->cmd_num, gf->family_name); } GENL_UNLOCK(); return (true); } static struct genl_group * find_group(const struct genl_family *gf, const char *group_name) { for (int i = 0; i < MAX_GROUPS; i++) { struct genl_group *gg = &groups[i]; if (gg->group_family == gf && !strcmp(gg->group_name, group_name)) return (gg); } return (NULL); } uint32_t genl_register_group(const char *family_name, const char *group_name) { uint32_t group_id = 0; MPASS(family_name != NULL); MPASS(group_name != NULL); GENL_LOCK(); struct genl_family *gf = find_family(family_name); if (gf == NULL || find_group(gf, group_name) != NULL) { GENL_UNLOCK(); return (0); } for (int i = 0; i < MAX_GROUPS; i++) { struct genl_group *gg = &groups[i]; if (gg->group_family == NULL) { gf->family_num_groups++; gg->group_family = gf; gg->group_name = group_name; group_id = i + MIN_GROUP_NUM; break; } } GENL_UNLOCK(); return (group_id); } /* * Handler called by netlink subsystem when matching netlink message is received */ static int genl_handle_message(struct nlmsghdr *hdr, struct nl_pstate *npt) { struct nlpcb *nlp = npt->nlp; int error = 0; int family_id = (int)hdr->nlmsg_type - GENL_MIN_ID; if (__predict_false(family_id < 0 || family_id > MAX_FAMILIES)) { NLP_LOG(LOG_DEBUG, nlp, "invalid message type: %d", hdr->nlmsg_type); return (ENOTSUP); } if (__predict_false(hdr->nlmsg_len < sizeof(hdr) + GENL_HDRLEN)) { NLP_LOG(LOG_DEBUG, nlp, "invalid message size: %d", hdr->nlmsg_len); return (EINVAL); } struct genl_family *gf = &families[family_id]; struct genlmsghdr *ghdr = (struct genlmsghdr *)(hdr + 1); if (ghdr->cmd >= gf->family_cmd_size || gf->family_cmds[ghdr->cmd].cmd_cb == NULL) { NLP_LOG(LOG_DEBUG, nlp, "family %s: invalid cmd %d", gf->family_name, ghdr->cmd); return (ENOTSUP); } struct genl_cmd *cmd = &gf->family_cmds[ghdr->cmd]; if (cmd->cmd_priv != 0 && !nlp_has_priv(nlp, cmd->cmd_priv)) { NLP_LOG(LOG_DEBUG, nlp, "family %s: cmd %d priv_check() failed", gf->family_name, ghdr->cmd); return (EPERM); } NLP_LOG(LOG_DEBUG2, nlp, "received family %s cmd %s(%d) len %d", gf->family_name, cmd->cmd_name, ghdr->cmd, hdr->nlmsg_len); error = cmd->cmd_cb(hdr, npt); return (error); } static uint32_t get_cmd_flags(const struct genl_cmd *cmd) { uint32_t flags = cmd->cmd_flags; if (cmd->cmd_priv != 0) flags |= GENL_ADMIN_PERM; return (flags); } static int dump_family(struct nlmsghdr *hdr, struct genlmsghdr *ghdr, const struct genl_family *gf, struct nl_writer *nw) { if (!nlmsg_reply(nw, hdr, sizeof(struct genlmsghdr))) goto enomem; struct genlmsghdr *ghdr_new = nlmsg_reserve_object(nw, struct genlmsghdr); ghdr_new->cmd = ghdr->cmd; ghdr_new->version = gf->family_version; ghdr_new->reserved = 0; nlattr_add_string(nw, CTRL_ATTR_FAMILY_NAME, gf->family_name); nlattr_add_u16(nw, CTRL_ATTR_FAMILY_ID, gf->family_id); nlattr_add_u32(nw, CTRL_ATTR_VERSION, gf->family_version); nlattr_add_u32(nw, CTRL_ATTR_HDRSIZE, gf->family_hdrsize); nlattr_add_u32(nw, CTRL_ATTR_MAXATTR, gf->family_attr_max); if (gf->family_cmd_size > 0) { int off = nlattr_add_nested(nw, CTRL_ATTR_OPS); if (off == 0) goto enomem; for (int i = 0, cnt=0; i < gf->family_cmd_size; i++) { struct genl_cmd *cmd = &gf->family_cmds[i]; if (cmd->cmd_cb == NULL) continue; int cmd_off = nlattr_add_nested(nw, ++cnt); if (cmd_off == 0) goto enomem; nlattr_add_u32(nw, CTRL_ATTR_OP_ID, cmd->cmd_num); nlattr_add_u32(nw, CTRL_ATTR_OP_FLAGS, get_cmd_flags(cmd)); nlattr_set_len(nw, cmd_off); } nlattr_set_len(nw, off); } if (gf->family_num_groups > 0) { int off = nlattr_add_nested(nw, CTRL_ATTR_MCAST_GROUPS); if (off == 0) goto enomem; for (int i = 0, cnt = 0; i < MAX_GROUPS; i++) { struct genl_group *gg = &groups[i]; if (gg->group_family != gf) continue; int cmd_off = nlattr_add_nested(nw, ++cnt); if (cmd_off == 0) goto enomem; nlattr_add_u32(nw, CTRL_ATTR_MCAST_GRP_ID, i + MIN_GROUP_NUM); nlattr_add_string(nw, CTRL_ATTR_MCAST_GRP_NAME, gg->group_name); nlattr_set_len(nw, cmd_off); } nlattr_set_len(nw, off); } if (nlmsg_end(nw)) return (0); enomem: NL_LOG(LOG_DEBUG, "unable to dump family %s state (ENOMEM)", gf->family_name); nlmsg_abort(nw); return (ENOMEM); } /* Declare ourself as a user */ #define CTRL_FAMILY_NAME "nlctrl" static uint32_t ctrl_family_id; static uint32_t ctrl_group_id; struct nl_parsed_family { uint32_t family_id; char *family_name; uint8_t version; }; #define _IN(_field) offsetof(struct genlmsghdr, _field) #define _OUT(_field) offsetof(struct nl_parsed_family, _field) static const struct nlfield_parser nlf_p_generic[] = { { .off_in = _IN(version), .off_out = _OUT(version), .cb = nlf_get_u8 }, }; static struct nlattr_parser nla_p_generic[] = { { .type = CTRL_ATTR_FAMILY_ID , .off = _OUT(family_id), .cb = nlattr_get_uint16 }, { .type = CTRL_ATTR_FAMILY_NAME , .off = _OUT(family_name), .cb = nlattr_get_string }, }; #undef _IN #undef _OUT NL_DECLARE_PARSER(genl_parser, struct genlmsghdr, nlf_p_generic, nla_p_generic); static bool match_family(const struct genl_family *gf, const struct nl_parsed_family *attrs) { if (gf->family_name == NULL) return (false); if (attrs->family_id != 0 && attrs->family_id != gf->family_id) return (false); if (attrs->family_name != NULL && strcmp(attrs->family_name, gf->family_name)) return (false); return (true); } static int nlctrl_handle_getfamily(struct nlmsghdr *hdr, struct nl_pstate *npt) { int error = 0; struct nl_parsed_family attrs = {}; error = nl_parse_nlmsg(hdr, &genl_parser, npt, &attrs); if (error != 0) return (error); struct genlmsghdr ghdr = { .cmd = CTRL_CMD_NEWFAMILY, }; if (attrs.family_id != 0 || attrs.family_name != NULL) { /* Resolve request */ for (int i = 0; i < MAX_FAMILIES; i++) { struct genl_family *gf = &families[i]; if (match_family(gf, &attrs)) { error = dump_family(hdr, &ghdr, gf, npt->nw); return (error); } } return (ENOENT); } hdr->nlmsg_flags = hdr->nlmsg_flags | NLM_F_MULTI; for (int i = 0; i < MAX_FAMILIES; i++) { struct genl_family *gf = &families[i]; if (match_family(gf, &attrs)) { error = dump_family(hdr, &ghdr, gf, npt->nw); if (error != 0) break; } } if (!nlmsg_end_dump(npt->nw, error, hdr)) { NL_LOG(LOG_DEBUG, "Unable to finalize the dump"); return (ENOMEM); } return (error); } static void nlctrl_notify(const struct genl_family *gf, int cmd) { struct nlmsghdr hdr = {.nlmsg_type = NETLINK_GENERIC }; struct genlmsghdr ghdr = { .cmd = cmd }; struct nl_writer nw = {}; if (nlmsg_get_group_writer(&nw, NLMSG_SMALL, NETLINK_GENERIC, ctrl_group_id)) { dump_family(&hdr, &ghdr, gf, &nw); nlmsg_flush(&nw); return; } NL_LOG(LOG_DEBUG, "error allocating group writer"); } static const struct genl_cmd nlctrl_cmds[] = { { .cmd_num = CTRL_CMD_GETFAMILY, .cmd_name = "GETFAMILY", .cmd_cb = nlctrl_handle_getfamily, .cmd_flags = GENL_CMD_CAP_DO | GENL_CMD_CAP_DUMP | GENL_CMD_CAP_HASPOL, }, }; static void genl_nlctrl_init(void) { ctrl_family_id = genl_register_family(CTRL_FAMILY_NAME, 0, 2, CTRL_ATTR_MAX); genl_register_cmds(CTRL_FAMILY_NAME, nlctrl_cmds, NL_ARRAY_LEN(nlctrl_cmds)); ctrl_group_id = genl_register_group(CTRL_FAMILY_NAME, "notify"); } static void genl_nlctrl_destroy(void) { genl_unregister_family(CTRL_FAMILY_NAME); } static const struct nlhdr_parser *all_parsers[] = { &genl_parser }; static void genl_load(void *u __unused) { GENL_LOCK_INIT(); NL_VERIFY_PARSERS(all_parsers); netlink_register_proto(NETLINK_GENERIC, "NETLINK_GENERIC", genl_handle_message); genl_nlctrl_init(); } SYSINIT(genl_load, SI_SUB_PROTO_DOMAIN, SI_ORDER_THIRD, genl_load, NULL); static void genl_unload(void *u __unused) { genl_nlctrl_destroy(); GENL_LOCK_DESTROY(); - epoch_wait_preempt(net_epoch_preempt); + NET_EPOCH_WAIT(); } SYSUNINIT(genl_unload, SI_SUB_PROTO_DOMAIN, SI_ORDER_THIRD, genl_unload, NULL); diff --git a/sys/netlink/netlink_route.c b/sys/netlink/netlink_route.c index 0622656715c4..42b0bc66666e 100644 --- a/sys/netlink/netlink_route.c +++ b/sys/netlink/netlink_route.c @@ -1,139 +1,139 @@ /*- * SPDX-License-Identifier: BSD-2-Clause-FreeBSD * * Copyright (c) 2022 Alexander V. Chernikov * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); #include #include #include #include #include #include #include #include #include #include #include #include #define DEBUG_MOD_NAME nl_route_core #define DEBUG_MAX_LEVEL LOG_DEBUG3 #include _DECLARE_DEBUG(LOG_DEBUG); #define HANDLER_MAX_NUM (NL_RTM_MAX + 10) static const struct rtnl_cmd_handler *rtnl_handler[HANDLER_MAX_NUM] = {}; bool rtnl_register_messages(const struct rtnl_cmd_handler *handlers, int count) { for (int i = 0; i < count; i++) { if (handlers[i].cmd >= HANDLER_MAX_NUM) return (false); MPASS(rtnl_handler[handlers[i].cmd] == NULL); } for (int i = 0; i < count; i++) rtnl_handler[handlers[i].cmd] = &handlers[i]; return (true); } /* * Handler called by netlink subsystem when matching netlink message is received */ static int rtnl_handle_message(struct nlmsghdr *hdr, struct nl_pstate *npt) { const struct rtnl_cmd_handler *cmd; struct epoch_tracker et; struct nlpcb *nlp = npt->nlp; int error = 0; if (__predict_false(hdr->nlmsg_type >= HANDLER_MAX_NUM)) { NLMSG_REPORT_ERR_MSG(npt, "unknown message type: %d", hdr->nlmsg_type); return (ENOTSUP); } cmd = rtnl_handler[hdr->nlmsg_type]; if (__predict_false(cmd == NULL)) { NLMSG_REPORT_ERR_MSG(npt, "unknown message type: %d", hdr->nlmsg_type); return (ENOTSUP); } NLP_LOG(LOG_DEBUG2, nlp, "received msg %s(%d) len %d", cmd->name, hdr->nlmsg_type, hdr->nlmsg_len); if (cmd->priv != 0 && !nlp_has_priv(nlp, cmd->priv)) { NLP_LOG(LOG_DEBUG2, nlp, "priv %d check failed for msg %s", cmd->priv, cmd->name); return (EPERM); } else if (cmd->priv != 0) NLP_LOG(LOG_DEBUG3, nlp, "priv %d check passed for msg %s", cmd->priv, cmd->name); bool need_epoch = !(cmd->flags & RTNL_F_NOEPOCH); if (need_epoch) NET_EPOCH_ENTER(et); error = cmd->cb(hdr, nlp, npt); if (need_epoch) NET_EPOCH_EXIT(et); NLP_LOG(LOG_DEBUG3, nlp, "message %s -> error %d", cmd->name, error); return (error); } static struct rtbridge nlbridge = { .route_f = rtnl_handle_route_event, .ifmsg_f = rtnl_handle_ifnet_event, }; static struct rtbridge *nlbridge_orig_p; static void rtnl_load(void *u __unused) { NL_LOG(LOG_NOTICE, "rtnl loading"); nlbridge_orig_p = netlink_callback_p; netlink_callback_p = &nlbridge; rtnl_neighs_init(); rtnl_ifaces_init(); rtnl_nexthops_init(); rtnl_routes_init(); netlink_register_proto(NETLINK_ROUTE, "NETLINK_ROUTE", rtnl_handle_message); } SYSINIT(rtnl_load, SI_SUB_PROTO_DOMAIN, SI_ORDER_THIRD, rtnl_load, NULL); static void rtnl_unload(void *u __unused) { netlink_callback_p = nlbridge_orig_p; rtnl_ifaces_destroy(); rtnl_neighs_destroy(); /* Wait till all consumers read nlbridge data */ - epoch_wait_preempt(net_epoch_preempt); + NET_EPOCH_WAIT(); } SYSUNINIT(rtnl_unload, SI_SUB_PROTO_DOMAIN, SI_ORDER_THIRD, rtnl_unload, NULL); diff --git a/sys/netlink/route/nexthop.c b/sys/netlink/route/nexthop.c index cd184a635eb4..e9f008842644 100644 --- a/sys/netlink/route/nexthop.c +++ b/sys/netlink/route/nexthop.c @@ -1,1005 +1,1004 @@ /*- * SPDX-License-Identifier: BSD-2-Clause-FreeBSD * * Copyright (c) 2022 Alexander V. Chernikov * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); #include "opt_inet.h" #include "opt_inet6.h" #include "opt_route.h" #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #define DEBUG_MOD_NAME nl_nhop #define DEBUG_MAX_LEVEL LOG_DEBUG3 #include _DECLARE_DEBUG(LOG_DEBUG3); /* * This file contains the logic to maintain kernel nexthops and * nexhop groups based om the data provided by the user. * * Kernel stores (nearly) all of the routing data in the nexthops, * including the prefix-specific flags (NHF_HOST and NHF_DEFAULT). * * Netlink API provides higher-level abstraction for the user. Each * user-created nexthop may map to multiple kernel nexthops. * * The following variations require separate kernel nexthop to be * created: * * prefix flags (NHF_HOST, NHF_DEFAULT) * * using IPv6 gateway for IPv4 routes * * different fibnum * * These kernel nexthops have the lifetime bound to the lifetime of * the user_nhop object. They are not collected until user requests * to delete the created user_nhop. * */ struct user_nhop { uint32_t un_idx; /* Userland-provided index */ uint32_t un_fibfam; /* fibnum+af(as highest byte) */ uint8_t un_protocol; /* protocol that install the record */ struct nhop_object *un_nhop; /* "production" nexthop */ struct nhop_object *un_nhop_src; /* nexthop to copy from */ struct weightened_nhop *un_nhgrp_src; /* nexthops for nhg */ uint32_t un_nhgrp_count; /* number of nexthops */ struct user_nhop *un_next; /* next item in hash chain */ struct user_nhop *un_nextchild; /* master -> children */ struct epoch_context un_epoch_ctx; /* epoch ctl helper */ }; /* produce hash value for an object */ #define unhop_hash_obj(_obj) (hash_unhop(_obj)) /* compare two objects */ #define unhop_cmp(_one, _two) (cmp_unhop(_one, _two)) /* next object accessor */ #define unhop_next(_obj) (_obj)->un_next CHT_SLIST_DEFINE(unhop, struct user_nhop); struct unhop_ctl { struct unhop_head un_head; struct rmlock un_lock; }; #define UN_LOCK_INIT(_ctl) rm_init(&(_ctl)->un_lock, "unhop_ctl") #define UN_TRACKER struct rm_priotracker un_tracker #define UN_RLOCK(_ctl) rm_rlock(&((_ctl)->un_lock), &un_tracker) #define UN_RUNLOCK(_ctl) rm_runlock(&((_ctl)->un_lock), &un_tracker) #define UN_WLOCK(_ctl) rm_wlock(&(_ctl)->un_lock); #define UN_WUNLOCK(_ctl) rm_wunlock(&(_ctl)->un_lock); VNET_DEFINE_STATIC(struct unhop_ctl *, un_ctl) = NULL; #define V_un_ctl VNET(un_ctl) static void consider_resize(struct unhop_ctl *ctl, uint32_t new_size); static int cmp_unhop(const struct user_nhop *a, const struct user_nhop *b); static unsigned int hash_unhop(const struct user_nhop *obj); static void destroy_unhop(struct user_nhop *unhop); static struct nhop_object *clone_unhop(const struct user_nhop *unhop, uint32_t fibnum, int family, int nh_flags); static int cmp_unhop(const struct user_nhop *a, const struct user_nhop *b) { return (a->un_idx == b->un_idx && a->un_fibfam == b->un_fibfam); } /* * Hash callback: calculate hash of an object */ static unsigned int hash_unhop(const struct user_nhop *obj) { return (obj->un_idx ^ obj->un_fibfam); } #define UNHOP_IS_MASTER(_unhop) ((_unhop)->un_fibfam == 0) /* * Factory interface for creating matching kernel nexthops/nexthop groups * * @uidx: userland nexhop index used to create the nexthop * @fibnum: fibnum nexthop will be used in * @family: upper family nexthop will be used in * @nh_flags: desired nexthop prefix flags * @perror: pointer to store error to * * Returns referenced nexthop linked to @fibnum/@family rib on success. */ struct nhop_object * nl_find_nhop(uint32_t fibnum, int family, uint32_t uidx, int nh_flags, int *perror) { struct unhop_ctl *ctl = atomic_load_ptr(&V_un_ctl); UN_TRACKER; if (__predict_false(ctl == NULL)) return (NULL); struct user_nhop key= { .un_idx = uidx, .un_fibfam = fibnum | ((uint32_t)family) << 24, }; struct user_nhop *unhop; nh_flags = nh_flags & (NHF_HOST | NHF_DEFAULT); if (__predict_false(family == 0)) return (NULL); UN_RLOCK(ctl); CHT_SLIST_FIND_BYOBJ(&ctl->un_head, unhop, &key, unhop); if (unhop != NULL) { struct nhop_object *nh = unhop->un_nhop; UN_RLOCK(ctl); *perror = 0; nhop_ref_any(nh); return (nh); } /* * Exact nexthop not found. Search for template nexthop to clone from. */ key.un_fibfam = 0; CHT_SLIST_FIND_BYOBJ(&ctl->un_head, unhop, &key, unhop); if (unhop == NULL) { UN_RUNLOCK(ctl); *perror = ESRCH; return (NULL); } UN_RUNLOCK(ctl); /* Create entry to insert first */ struct user_nhop *un_new, *un_tmp; un_new = malloc(sizeof(struct user_nhop), M_NETLINK, M_NOWAIT | M_ZERO); if (un_new == NULL) { *perror = ENOMEM; return (NULL); } un_new->un_idx = uidx; un_new->un_fibfam = fibnum | ((uint32_t)family) << 24; /* Relying on epoch to protect unhop here */ un_new->un_nhop = clone_unhop(unhop, fibnum, family, nh_flags); if (un_new->un_nhop == NULL) { free(un_new, M_NETLINK); *perror = ENOMEM; return (NULL); } /* Insert back and report */ UN_WLOCK(ctl); /* First, find template record once again */ CHT_SLIST_FIND_BYOBJ(&ctl->un_head, unhop, &key, unhop); if (unhop == NULL) { /* Someone deleted the nexthop during the call */ UN_WUNLOCK(ctl); *perror = ESRCH; destroy_unhop(un_new); return (NULL); } /* Second, check the direct match */ CHT_SLIST_FIND_BYOBJ(&ctl->un_head, unhop, un_new, un_tmp); struct nhop_object *nh; if (un_tmp != NULL) { /* Another thread already created the desired nextop, use it */ nh = un_tmp->un_nhop; } else { /* Finally, insert the new nexthop and link it to the primary */ nh = un_new->un_nhop; CHT_SLIST_INSERT_HEAD(&ctl->un_head, unhop, un_new); un_new->un_nextchild = unhop->un_nextchild; unhop->un_nextchild = un_new; un_new = NULL; NL_LOG(LOG_DEBUG2, "linked cloned nexthop %p", nh); } UN_WUNLOCK(ctl); if (un_new != NULL) destroy_unhop(un_new); *perror = 0; nhop_ref_any(nh); return (nh); } static struct user_nhop * nl_find_base_unhop(struct unhop_ctl *ctl, uint32_t uidx) { struct user_nhop key= { .un_idx = uidx }; struct user_nhop *unhop = NULL; UN_TRACKER; UN_RLOCK(ctl); CHT_SLIST_FIND_BYOBJ(&ctl->un_head, unhop, &key, unhop); UN_RUNLOCK(ctl); return (unhop); } #define MAX_STACK_NHOPS 4 static struct nhop_object * clone_unhop(const struct user_nhop *unhop, uint32_t fibnum, int family, int nh_flags) { #ifdef ROUTE_MPATH const struct weightened_nhop *wn; struct weightened_nhop *wn_new, wn_base[MAX_STACK_NHOPS]; uint32_t num_nhops; #endif struct nhop_object *nh = NULL; int error; if (unhop->un_nhop_src != NULL) { IF_DEBUG_LEVEL(LOG_DEBUG2) { char nhbuf[NHOP_PRINT_BUFSIZE]; nhop_print_buf_any(unhop->un_nhop_src, nhbuf, sizeof(nhbuf)); FIB_NH_LOG(LOG_DEBUG2, unhop->un_nhop_src, "cloning nhop %s -> %u.%u flags 0x%X", nhbuf, fibnum, family, nh_flags); } struct nhop_object *nh; nh = nhop_alloc(fibnum, AF_UNSPEC); if (nh == NULL) return (NULL); nhop_copy(nh, unhop->un_nhop_src); /* Check that nexthop gateway is compatible with the new family */ if (!nhop_set_upper_family(nh, family)) { nhop_free(nh); return (NULL); } nhop_set_uidx(nh, unhop->un_idx); nhop_set_pxtype_flag(nh, nh_flags); return (nhop_get_nhop(nh, &error)); } #ifdef ROUTE_MPATH wn = unhop->un_nhgrp_src; num_nhops = unhop->un_nhgrp_count; if (num_nhops > MAX_STACK_NHOPS) { wn_new = malloc(num_nhops * sizeof(struct weightened_nhop), M_TEMP, M_NOWAIT); if (wn_new == NULL) return (NULL); } else wn_new = wn_base; for (int i = 0; i < num_nhops; i++) { uint32_t uidx = nhop_get_uidx(wn[i].nh); MPASS(uidx != 0); wn_new[i].nh = nl_find_nhop(fibnum, family, uidx, nh_flags, &error); if (error != 0) break; wn_new[i].weight = wn[i].weight; } if (error == 0) { struct rib_head *rh = nhop_get_rh(wn_new[0].nh); struct nhgrp_object *nhg; error = nhgrp_get_group(rh, wn_new, num_nhops, unhop->un_idx, &nhg); nh = (struct nhop_object *)nhg; } if (wn_new != wn_base) free(wn_new, M_TEMP); #endif return (nh); } static void destroy_unhop(struct user_nhop *unhop) { if (unhop->un_nhop != NULL) nhop_free_any(unhop->un_nhop); if (unhop->un_nhop_src != NULL) nhop_free_any(unhop->un_nhop_src); free(unhop, M_NETLINK); } static void destroy_unhop_epoch(epoch_context_t ctx) { struct user_nhop *unhop; unhop = __containerof(ctx, struct user_nhop, un_epoch_ctx); destroy_unhop(unhop); } static uint32_t find_spare_uidx(struct unhop_ctl *ctl) { struct user_nhop *unhop, key = {}; uint32_t uidx = 0; UN_TRACKER; UN_RLOCK(ctl); /* This should return spare uid with 75% of 65k used in ~99/100 cases */ for (int i = 0; i < 16; i++) { key.un_idx = (arc4random() % 65536) + 65536 * 4; CHT_SLIST_FIND_BYOBJ(&ctl->un_head, unhop, &key, unhop); if (unhop == NULL) { uidx = key.un_idx; break; } } UN_RUNLOCK(ctl); return (uidx); } /* * Actual netlink code */ struct netlink_walkargs { struct nl_writer *nw; struct nlmsghdr hdr; struct nlpcb *so; int family; int error; int count; int dumped; }; #define ENOMEM_IF_NULL(_v) if ((_v) == NULL) goto enomem static bool dump_nhgrp(const struct user_nhop *unhop, struct nlmsghdr *hdr, struct nl_writer *nw) { if (!nlmsg_reply(nw, hdr, sizeof(struct nhmsg))) goto enomem; struct nhmsg *nhm = nlmsg_reserve_object(nw, struct nhmsg); nhm->nh_family = AF_UNSPEC; nhm->nh_scope = 0; nhm->nh_protocol = unhop->un_protocol; nhm->nh_flags = 0; nlattr_add_u32(nw, NHA_ID, unhop->un_idx); nlattr_add_u16(nw, NHA_GROUP_TYPE, NEXTHOP_GRP_TYPE_MPATH); struct weightened_nhop *wn = unhop->un_nhgrp_src; uint32_t num_nhops = unhop->un_nhgrp_count; /* TODO: a better API? */ int nla_len = sizeof(struct nlattr); nla_len += NETLINK_ALIGN(num_nhops * sizeof(struct nexthop_grp)); struct nlattr *nla = nlmsg_reserve_data(nw, nla_len, struct nlattr); if (nla == NULL) goto enomem; nla->nla_type = NHA_GROUP; nla->nla_len = nla_len; for (int i = 0; i < num_nhops; i++) { struct nexthop_grp *grp = &((struct nexthop_grp *)(nla + 1))[i]; grp->id = nhop_get_uidx(wn[i].nh); grp->weight = wn[i].weight; grp->resvd1 = 0; grp->resvd2 = 0; } if (nlmsg_end(nw)) return (true); enomem: NL_LOG(LOG_DEBUG, "error: unable to allocate attribute memory"); nlmsg_abort(nw); return (false); } static bool dump_nhop(const struct user_nhop *unhop, struct nlmsghdr *hdr, struct nl_writer *nw) { struct nhop_object *nh = unhop->un_nhop_src; if (!nlmsg_reply(nw, hdr, sizeof(struct nhmsg))) goto enomem; struct nhmsg *nhm = nlmsg_reserve_object(nw, struct nhmsg); ENOMEM_IF_NULL(nhm); nhm->nh_family = nhop_get_neigh_family(nh); nhm->nh_scope = 0; // XXX: what's that? nhm->nh_protocol = unhop->un_protocol; nhm->nh_flags = 0; nlattr_add_u32(nw, NHA_ID, unhop->un_idx); if (nh->nh_flags & NHF_BLACKHOLE) { nlattr_add_flag(nw, NHA_BLACKHOLE); goto done; } nlattr_add_u32(nw, NHA_OIF, nh->nh_ifp->if_index); switch (nh->gw_sa.sa_family) { #ifdef INET case AF_INET: nlattr_add(nw, NHA_GATEWAY, 4, &nh->gw4_sa.sin_addr); break; #endif #ifdef INET6 case AF_INET6: { struct in6_addr addr = nh->gw6_sa.sin6_addr; in6_clearscope(&addr); nlattr_add(nw, NHA_GATEWAY, 16, &addr); break; } #endif } done: if (nlmsg_end(nw)) return (true); enomem: nlmsg_abort(nw); return (false); } static void dump_unhop(const struct user_nhop *unhop, struct nlmsghdr *hdr, struct nl_writer *nw) { if (unhop->un_nhop_src != NULL) dump_nhop(unhop, hdr, nw); else dump_nhgrp(unhop, hdr, nw); } static int delete_unhop(struct unhop_ctl *ctl, struct nlmsghdr *hdr, uint32_t uidx) { struct user_nhop *unhop_ret, *unhop_base, *unhop_chain; struct user_nhop key = { .un_idx = uidx }; UN_WLOCK(ctl); CHT_SLIST_FIND_BYOBJ(&ctl->un_head, unhop, &key, unhop_base); if (unhop_base != NULL) { CHT_SLIST_REMOVE(&ctl->un_head, unhop, unhop_base, unhop_ret); IF_DEBUG_LEVEL(LOG_DEBUG2) { char nhbuf[NHOP_PRINT_BUFSIZE]; nhop_print_buf_any(unhop_base->un_nhop, nhbuf, sizeof(nhbuf)); FIB_NH_LOG(LOG_DEBUG3, unhop_base->un_nhop, "removed base nhop %u: %s", uidx, nhbuf); } /* Unlink all child nexhops as well, keeping the chain intact */ unhop_chain = unhop_base->un_nextchild; while (unhop_chain != NULL) { CHT_SLIST_REMOVE(&ctl->un_head, unhop, unhop_chain, unhop_ret); MPASS(unhop_chain == unhop_ret); IF_DEBUG_LEVEL(LOG_DEBUG3) { char nhbuf[NHOP_PRINT_BUFSIZE]; nhop_print_buf_any(unhop_chain->un_nhop, nhbuf, sizeof(nhbuf)); FIB_NH_LOG(LOG_DEBUG3, unhop_chain->un_nhop, "removed child nhop %u: %s", uidx, nhbuf); } unhop_chain = unhop_chain->un_nextchild; } } UN_WUNLOCK(ctl); if (unhop_base == NULL) { NL_LOG(LOG_DEBUG, "unable to find unhop %u", uidx); return (ENOENT); } /* Report nexthop deletion */ struct netlink_walkargs wa = { .hdr.nlmsg_pid = hdr->nlmsg_pid, .hdr.nlmsg_seq = hdr->nlmsg_seq, .hdr.nlmsg_flags = hdr->nlmsg_flags, .hdr.nlmsg_type = NL_RTM_DELNEXTHOP, }; struct nl_writer nw = {}; if (!nlmsg_get_group_writer(&nw, NLMSG_SMALL, NETLINK_ROUTE, RTNLGRP_NEXTHOP)) { NL_LOG(LOG_DEBUG, "error allocating message writer"); return (ENOMEM); } dump_unhop(unhop_base, &wa.hdr, &nw); nlmsg_flush(&nw); while (unhop_base != NULL) { unhop_chain = unhop_base->un_nextchild; - epoch_call(net_epoch_preempt, destroy_unhop_epoch, - &unhop_base->un_epoch_ctx); + NET_EPOCH_CALL(destroy_unhop_epoch, &unhop_base->un_epoch_ctx); unhop_base = unhop_chain; } return (0); } static void consider_resize(struct unhop_ctl *ctl, uint32_t new_size) { void *new_ptr = NULL; size_t alloc_size; if (new_size == 0) return; if (new_size != 0) { alloc_size = CHT_SLIST_GET_RESIZE_SIZE(new_size); new_ptr = malloc(alloc_size, M_NETLINK, M_NOWAIT | M_ZERO); if (new_ptr == NULL) return; } NL_LOG(LOG_DEBUG, "resizing hash: %u -> %u", ctl->un_head.hash_size, new_size); UN_WLOCK(ctl); if (new_ptr != NULL) { CHT_SLIST_RESIZE(&ctl->un_head, unhop, new_ptr, new_size); } UN_WUNLOCK(ctl); if (new_ptr != NULL) free(new_ptr, M_NETLINK); } static bool __noinline vnet_init_unhops(void) { uint32_t num_buckets = 16; size_t alloc_size = CHT_SLIST_GET_RESIZE_SIZE(num_buckets); struct unhop_ctl *ctl = malloc(sizeof(struct unhop_ctl), M_NETLINK, M_NOWAIT | M_ZERO); if (ctl == NULL) return (false); void *ptr = malloc(alloc_size, M_NETLINK, M_NOWAIT | M_ZERO); if (ptr == NULL) { free(ctl, M_NETLINK); return (false); } CHT_SLIST_INIT(&ctl->un_head, ptr, num_buckets); UN_LOCK_INIT(ctl); if (!atomic_cmpset_ptr((uintptr_t *)&V_un_ctl, (uintptr_t)NULL, (uintptr_t)ctl)) { free(ptr, M_NETLINK); free(ctl, M_NETLINK); } if (atomic_load_ptr(&V_un_ctl) == NULL) return (false); NL_LOG(LOG_NOTICE, "UNHOPS init done"); return (true); } static void vnet_destroy_unhops(const void *unused __unused) { struct unhop_ctl *ctl = atomic_load_ptr(&V_un_ctl); struct user_nhop *unhop, *tmp; if (ctl == NULL) return; V_un_ctl = NULL; /* Wait till all unhop users finish their reads */ - epoch_wait_preempt(net_epoch_preempt); + NET_EPOCH_WAIT(); UN_WLOCK(ctl); CHT_SLIST_FOREACH_SAFE(&ctl->un_head, unhop, unhop, tmp) { destroy_unhop(unhop); } CHT_SLIST_FOREACH_SAFE_END; UN_WUNLOCK(ctl); free(ctl->un_head.ptr, M_NETLINK); free(ctl, M_NETLINK); } VNET_SYSUNINIT(vnet_destroy_unhops, SI_SUB_PROTO_IF, SI_ORDER_ANY, vnet_destroy_unhops, NULL); static int nlattr_get_nhg(struct nlattr *nla, struct nl_pstate *npt, const void *arg, void *target) { int error = 0; /* Verify attribute correctness */ struct nexthop_grp *grp = NLA_DATA(nla); int data_len = NLA_DATA_LEN(nla); int count = data_len / sizeof(*grp); if (count == 0 || (count * sizeof(*grp) != data_len)) { NL_LOG(LOG_DEBUG, "Invalid length for RTA_GROUP: %d", data_len); return (EINVAL); } *((struct nlattr **)target) = nla; return (error); } struct nl_parsed_nhop { uint32_t nha_id; uint8_t nha_blackhole; uint8_t nha_groups; struct ifnet *nha_oif; struct sockaddr *nha_gw; struct nlattr *nha_group; uint8_t nh_family; uint8_t nh_protocol; }; #define _IN(_field) offsetof(struct nhmsg, _field) #define _OUT(_field) offsetof(struct nl_parsed_nhop, _field) static const struct nlfield_parser nlf_p_nh[] = { { .off_in = _IN(nh_family), .off_out = _OUT(nh_family), .cb = nlf_get_u8 }, { .off_in = _IN(nh_protocol), .off_out = _OUT(nh_protocol), .cb = nlf_get_u8 }, }; static const struct nlattr_parser nla_p_nh[] = { { .type = NHA_ID, .off = _OUT(nha_id), .cb = nlattr_get_uint32 }, { .type = NHA_GROUP, .off = _OUT(nha_group), .cb = nlattr_get_nhg }, { .type = NHA_BLACKHOLE, .off = _OUT(nha_blackhole), .cb = nlattr_get_flag }, { .type = NHA_OIF, .off = _OUT(nha_oif), .cb = nlattr_get_ifp }, { .type = NHA_GATEWAY, .off = _OUT(nha_gw), .cb = nlattr_get_ip }, { .type = NHA_GROUPS, .off = _OUT(nha_groups), .cb = nlattr_get_flag }, }; #undef _IN #undef _OUT NL_DECLARE_PARSER(nhmsg_parser, struct nhmsg, nlf_p_nh, nla_p_nh); static bool eligible_nhg(const struct nhop_object *nh) { return (nh->nh_flags & NHF_GATEWAY); } static int newnhg(struct unhop_ctl *ctl, struct nl_parsed_nhop *attrs, struct user_nhop *unhop) { struct nexthop_grp *grp = NLA_DATA(attrs->nha_group); int count = NLA_DATA_LEN(attrs->nha_group) / sizeof(*grp); struct weightened_nhop *wn; wn = malloc(sizeof(*wn) * count, M_NETLINK, M_NOWAIT | M_ZERO); if (wn == NULL) return (ENOMEM); for (int i = 0; i < count; i++) { struct user_nhop *unhop; unhop = nl_find_base_unhop(ctl, grp[i].id); if (unhop == NULL) { NL_LOG(LOG_DEBUG, "unable to find uidx %u", grp[i].id); free(wn, M_NETLINK); return (ESRCH); } else if (unhop->un_nhop_src == NULL) { NL_LOG(LOG_DEBUG, "uidx %u is a group, nested group unsupported", grp[i].id); free(wn, M_NETLINK); return (ENOTSUP); } else if (!eligible_nhg(unhop->un_nhop_src)) { NL_LOG(LOG_DEBUG, "uidx %u nhop is not mpath-eligible", grp[i].id); free(wn, M_NETLINK); return (ENOTSUP); } /* * TODO: consider more rigid eligibility checks: * restrict nexthops with the same gateway */ wn[i].nh = unhop->un_nhop_src; wn[i].weight = grp[i].weight; } unhop->un_nhgrp_src = wn; unhop->un_nhgrp_count = count; return (0); } static int newnhop(struct nl_parsed_nhop *attrs, struct user_nhop *unhop) { struct ifaddr *ifa = NULL; struct nhop_object *nh; int error; if (!attrs->nha_blackhole) { if (attrs->nha_gw == NULL) { NL_LOG(LOG_DEBUG, "missing NHA_GATEWAY"); return (EINVAL); } if (attrs->nha_oif == NULL) { NL_LOG(LOG_DEBUG, "missing NHA_OIF"); return (EINVAL); } if (ifa == NULL) ifa = ifaof_ifpforaddr(attrs->nha_gw, attrs->nha_oif); if (ifa == NULL) { NL_LOG(LOG_DEBUG, "Unable to determine default source IP"); return (EINVAL); } } int family = attrs->nha_gw != NULL ? attrs->nha_gw->sa_family : attrs->nh_family; nh = nhop_alloc(RT_DEFAULT_FIB, family); if (nh == NULL) { NL_LOG(LOG_DEBUG, "Unable to allocate nexthop"); return (ENOMEM); } nhop_set_uidx(nh, attrs->nha_id); if (attrs->nha_blackhole) nhop_set_blackhole(nh, NHF_BLACKHOLE); else { nhop_set_gw(nh, attrs->nha_gw, true); nhop_set_transmit_ifp(nh, attrs->nha_oif); nhop_set_src(nh, ifa); } error = nhop_get_unlinked(nh); if (error != 0) { NL_LOG(LOG_DEBUG, "unable to finalize nexthop"); return (error); } IF_DEBUG_LEVEL(LOG_DEBUG2) { char nhbuf[NHOP_PRINT_BUFSIZE]; nhop_print_buf(nh, nhbuf, sizeof(nhbuf)); NL_LOG(LOG_DEBUG2, "Adding unhop %u: %s", attrs->nha_id, nhbuf); } unhop->un_nhop_src = nh; return (0); } static int rtnl_handle_newnhop(struct nlmsghdr *hdr, struct nlpcb *nlp, struct nl_pstate *npt) { struct user_nhop *unhop; int error; if ((__predict_false(V_un_ctl == NULL)) && (!vnet_init_unhops())) return (ENOMEM); struct unhop_ctl *ctl = V_un_ctl; struct nl_parsed_nhop attrs = {}; error = nl_parse_nlmsg(hdr, &nhmsg_parser, npt, &attrs); if (error != 0) return (error); /* * Get valid nha_id. Treat nha_id == 0 (auto-assignment) as a second-class * citizen. */ if (attrs.nha_id == 0) { attrs.nha_id = find_spare_uidx(ctl); if (attrs.nha_id == 0) { NL_LOG(LOG_DEBUG, "Unable to get spare uidx"); return (ENOSPC); } } NL_LOG(LOG_DEBUG, "IFINDEX %d", attrs.nha_oif ? attrs.nha_oif->if_index : 0); unhop = malloc(sizeof(struct user_nhop), M_NETLINK, M_NOWAIT | M_ZERO); if (unhop == NULL) { NL_LOG(LOG_DEBUG, "Unable to allocate user_nhop"); return (ENOMEM); } unhop->un_idx = attrs.nha_id; unhop->un_protocol = attrs.nh_protocol; if (attrs.nha_group) error = newnhg(ctl, &attrs, unhop); else error = newnhop(&attrs, unhop); if (error != 0) { free(unhop, M_NETLINK); return (error); } UN_WLOCK(ctl); /* Check if uidx already exists */ struct user_nhop *tmp = NULL; CHT_SLIST_FIND_BYOBJ(&ctl->un_head, unhop, unhop, tmp); if (tmp != NULL) { UN_WUNLOCK(ctl); NL_LOG(LOG_DEBUG, "nhop idx %u already exists", attrs.nha_id); destroy_unhop(unhop); return (EEXIST); } CHT_SLIST_INSERT_HEAD(&ctl->un_head, unhop, unhop); uint32_t num_buckets_new = CHT_SLIST_GET_RESIZE_BUCKETS(&ctl->un_head); UN_WUNLOCK(ctl); /* Report addition of the next nexhop */ struct netlink_walkargs wa = { .hdr.nlmsg_pid = hdr->nlmsg_pid, .hdr.nlmsg_seq = hdr->nlmsg_seq, .hdr.nlmsg_flags = hdr->nlmsg_flags, .hdr.nlmsg_type = NL_RTM_NEWNEXTHOP, }; struct nl_writer nw = {}; if (!nlmsg_get_group_writer(&nw, NLMSG_SMALL, NETLINK_ROUTE, RTNLGRP_NEXTHOP)) { NL_LOG(LOG_DEBUG, "error allocating message writer"); return (ENOMEM); } dump_unhop(unhop, &wa.hdr, &nw); nlmsg_flush(&nw); consider_resize(ctl, num_buckets_new); return (0); } static int rtnl_handle_delnhop(struct nlmsghdr *hdr, struct nlpcb *nlp, struct nl_pstate *npt) { struct unhop_ctl *ctl = atomic_load_ptr(&V_un_ctl); int error; if (__predict_false(ctl == NULL)) return (ESRCH); struct nl_parsed_nhop attrs = {}; error = nl_parse_nlmsg(hdr, &nhmsg_parser, npt, &attrs); if (error != 0) return (error); if (attrs.nha_id == 0) { NL_LOG(LOG_DEBUG, "NHA_ID not set"); return (EINVAL); } error = delete_unhop(ctl, hdr, attrs.nha_id); return (error); } static bool match_unhop(const struct nl_parsed_nhop *attrs, struct user_nhop *unhop) { if (attrs->nha_id != 0 && unhop->un_idx != attrs->nha_id) return (false); if (attrs->nha_groups != 0 && unhop->un_nhgrp_src == NULL) return (false); if (attrs->nha_oif != NULL && (unhop->un_nhop_src == NULL || unhop->un_nhop_src->nh_ifp != attrs->nha_oif)) return (false); return (true); } static int rtnl_handle_getnhop(struct nlmsghdr *hdr, struct nlpcb *nlp, struct nl_pstate *npt) { struct unhop_ctl *ctl = atomic_load_ptr(&V_un_ctl); struct user_nhop *unhop; UN_TRACKER; int error; if (__predict_false(ctl == NULL)) return (ESRCH); struct nl_parsed_nhop attrs = {}; error = nl_parse_nlmsg(hdr, &nhmsg_parser, npt, &attrs); if (error != 0) return (error); struct netlink_walkargs wa = { .nw = npt->nw, .hdr.nlmsg_pid = hdr->nlmsg_pid, .hdr.nlmsg_seq = hdr->nlmsg_seq, .hdr.nlmsg_flags = hdr->nlmsg_flags, .hdr.nlmsg_type = NL_RTM_NEWNEXTHOP, }; if (attrs.nha_id != 0) { NL_LOG(LOG_DEBUG2, "searching for uidx %u", attrs.nha_id); struct user_nhop key= { .un_idx = attrs.nha_id }; UN_RLOCK(ctl); CHT_SLIST_FIND_BYOBJ(&ctl->un_head, unhop, &key, unhop); UN_RUNLOCK(ctl); if (unhop == NULL) return (ESRCH); dump_unhop(unhop, &wa.hdr, wa.nw); return (0); } UN_RLOCK(ctl); wa.hdr.nlmsg_flags |= NLM_F_MULTI; CHT_SLIST_FOREACH(&ctl->un_head, unhop, unhop) { if (UNHOP_IS_MASTER(unhop) && match_unhop(&attrs, unhop)) dump_unhop(unhop, &wa.hdr, wa.nw); } CHT_SLIST_FOREACH_END; UN_RUNLOCK(ctl); if (wa.error == 0) { if (!nlmsg_end_dump(wa.nw, wa.error, &wa.hdr)) return (ENOMEM); } return (0); } static const struct rtnl_cmd_handler cmd_handlers[] = { { .cmd = NL_RTM_NEWNEXTHOP, .name = "RTM_NEWNEXTHOP", .cb = &rtnl_handle_newnhop, .priv = PRIV_NET_ROUTE, }, { .cmd = NL_RTM_DELNEXTHOP, .name = "RTM_DELNEXTHOP", .cb = &rtnl_handle_delnhop, .priv = PRIV_NET_ROUTE, }, { .cmd = NL_RTM_GETNEXTHOP, .name = "RTM_GETNEXTHOP", .cb = &rtnl_handle_getnhop, } }; static const struct nlhdr_parser *all_parsers[] = { &nhmsg_parser }; void rtnl_nexthops_init(void) { NL_VERIFY_PARSERS(all_parsers); rtnl_register_messages(cmd_handlers, NL_ARRAY_LEN(cmd_handlers)); }