diff --git a/sys/net/if_clone.c b/sys/net/if_clone.c index 59d60645cb89..3dd577850f82 100644 --- a/sys/net/if_clone.c +++ b/sys/net/if_clone.c @@ -1,874 +1,977 @@ /*- * SPDX-License-Identifier: BSD-3-Clause * * Copyright (c) 2012 Gleb Smirnoff * Copyright (c) 1980, 1986, 1993 * The Regents of the University of California. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * @(#)if.c 8.5 (Berkeley) 1/9/95 * $FreeBSD$ */ +#include "opt_netlink.h" + #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include +#include +#include +#include +#include + /* Current IF_MAXUNIT expands maximum to 5 characters. */ #define IFCLOSIZ (IFNAMSIZ - 5) /* * Structure describing a `cloning' interface. * * List of locks * (c) const until freeing * (d) driver specific data, may need external protection. * (e) locked by if_cloners_mtx * (i) locked by ifc_mtx mtx */ struct if_clone { char ifc_name[IFCLOSIZ]; /* (c) Name of device, e.g. `gif' */ struct unrhdr *ifc_unrhdr; /* (c) alloc_unr(9) header */ int ifc_maxunit; /* (c) maximum unit number */ int ifc_flags; long ifc_refcnt; /* (i) Reference count. */ LIST_HEAD(, ifnet) ifc_iflist; /* (i) List of cloned interfaces */ struct mtx ifc_mtx; /* Mutex to protect members. */ ifc_match_f *ifc_match; /* (c) Matcher function */ ifc_create_f *ifc_create; /* (c) Creates new interface */ ifc_destroy_f *ifc_destroy; /* (c) Destroys cloned interface */ + ifc_create_nl_f *create_nl; /* (c) Netlink creation handler */ + ifc_modify_nl_f *modify_nl; /* (c) Netlink modification handler */ + ifc_dump_nl_f *dump_nl; /* (c) Netlink dump handler */ + #ifdef CLONE_COMPAT_13 /* (c) Driver specific cloning functions. Called with no locks held. */ union { struct { /* advanced cloner */ ifc_create_t *_ifc_create; ifc_destroy_t *_ifc_destroy; } A; struct { /* simple cloner */ ifcs_create_t *_ifcs_create; ifcs_destroy_t *_ifcs_destroy; int _ifcs_minifs; /* minimum ifs */ } S; } U; #define ifca_create U.A._ifc_create #define ifca_destroy U.A._ifc_destroy #define ifcs_create U.S._ifcs_create #define ifcs_destroy U.S._ifcs_destroy #define ifcs_minifs U.S._ifcs_minifs #endif LIST_ENTRY(if_clone) ifc_list; /* (e) On list of cloners */ }; static void if_clone_free(struct if_clone *ifc); -static int if_clone_createif(struct if_clone *ifc, char *name, size_t len, - struct ifc_data *ifd, struct ifnet **ifpp); +static int if_clone_createif_nl(struct if_clone *ifc, const char *name, + struct ifc_data_nl *ifd); static int ifc_simple_match(struct if_clone *ifc, const char *name); static int ifc_handle_unit(struct if_clone *ifc, char *name, size_t len, int *punit); static struct if_clone *ifc_find_cloner(const char *name); static struct if_clone *ifc_find_cloner_match(const char *name); #ifdef CLONE_COMPAT_13 static int ifc_simple_create_wrapper(struct if_clone *ifc, char *name, size_t maxlen, struct ifc_data *ifc_data, struct ifnet **ifpp); static int ifc_advanced_create_wrapper(struct if_clone *ifc, char *name, size_t maxlen, struct ifc_data *ifc_data, struct ifnet **ifpp); #endif static struct mtx if_cloners_mtx; MTX_SYSINIT(if_cloners_lock, &if_cloners_mtx, "if_cloners lock", MTX_DEF); VNET_DEFINE_STATIC(int, if_cloners_count); VNET_DEFINE(LIST_HEAD(, if_clone), if_cloners); #define V_if_cloners_count VNET(if_cloners_count) #define V_if_cloners VNET(if_cloners) #define IF_CLONERS_LOCK_ASSERT() mtx_assert(&if_cloners_mtx, MA_OWNED) #define IF_CLONERS_LOCK() mtx_lock(&if_cloners_mtx) #define IF_CLONERS_UNLOCK() mtx_unlock(&if_cloners_mtx) #define IF_CLONE_LOCK_INIT(ifc) \ mtx_init(&(ifc)->ifc_mtx, "if_clone lock", NULL, MTX_DEF) #define IF_CLONE_LOCK_DESTROY(ifc) mtx_destroy(&(ifc)->ifc_mtx) #define IF_CLONE_LOCK_ASSERT(ifc) mtx_assert(&(ifc)->ifc_mtx, MA_OWNED) #define IF_CLONE_LOCK(ifc) mtx_lock(&(ifc)->ifc_mtx) #define IF_CLONE_UNLOCK(ifc) mtx_unlock(&(ifc)->ifc_mtx) #define IF_CLONE_ADDREF(ifc) \ do { \ IF_CLONE_LOCK(ifc); \ IF_CLONE_ADDREF_LOCKED(ifc); \ IF_CLONE_UNLOCK(ifc); \ } while (0) #define IF_CLONE_ADDREF_LOCKED(ifc) \ do { \ IF_CLONE_LOCK_ASSERT(ifc); \ KASSERT((ifc)->ifc_refcnt >= 0, \ ("negative refcnt %ld", (ifc)->ifc_refcnt)); \ (ifc)->ifc_refcnt++; \ } while (0) #define IF_CLONE_REMREF(ifc) \ do { \ IF_CLONE_LOCK(ifc); \ IF_CLONE_REMREF_LOCKED(ifc); \ } while (0) #define IF_CLONE_REMREF_LOCKED(ifc) \ do { \ IF_CLONE_LOCK_ASSERT(ifc); \ KASSERT((ifc)->ifc_refcnt > 0, \ ("bogus refcnt %ld", (ifc)->ifc_refcnt)); \ if (--(ifc)->ifc_refcnt == 0) { \ IF_CLONE_UNLOCK(ifc); \ if_clone_free(ifc); \ } else { \ /* silently free the lock */ \ IF_CLONE_UNLOCK(ifc); \ } \ } while (0) #define IFC_IFLIST_INSERT(_ifc, _ifp) \ LIST_INSERT_HEAD(&_ifc->ifc_iflist, _ifp, if_clones) #define IFC_IFLIST_REMOVE(_ifc, _ifp) \ LIST_REMOVE(_ifp, if_clones) static MALLOC_DEFINE(M_CLONE, "clone", "interface cloning framework"); void vnet_if_clone_init(void) { LIST_INIT(&V_if_cloners); } /* * Lookup and create a clone network interface. */ int -ifc_create_ifp(const char *name, struct ifc_data *ifd, - struct ifnet **ifpp) +ifc_create_ifp(const char *name, struct ifc_data *ifd, struct ifnet **ifpp) { - struct if_clone *ifc; - char ifname[IFNAMSIZ]; - struct ifnet *ifp = NULL; - int error; + struct if_clone *ifc = ifc_find_cloner_match(name); - /* Try to find an applicable cloner for this request */ - ifc = ifc_find_cloner_match(name); if (ifc == NULL) return (EINVAL); - strlcpy(ifname, name, IFNAMSIZ); - error = if_clone_createif(ifc, ifname, IFNAMSIZ, ifd, &ifp); + struct ifc_data_nl ifd_new = { + .flags = ifd->flags, + .unit = ifd->unit, + .params = ifd->params, + }; + + int error = if_clone_createif_nl(ifc, name, &ifd_new); + if (ifpp != NULL) - *ifpp = ifp; + *ifpp = ifd_new.ifp; return (error); } +bool +ifc_create_ifp_nl(const char *name, struct ifc_data_nl *ifd) +{ + struct if_clone *ifc = ifc_find_cloner_match(name); + if (ifc == NULL) { + ifd->error = EINVAL; + return (false); + } + + ifd->error = if_clone_createif_nl(ifc, name, ifd); + + return (true); +} + int if_clone_create(char *name, size_t len, caddr_t params) { struct ifc_data ifd = { .params = params }; struct ifnet *ifp; int error = ifc_create_ifp(name, &ifd, &ifp); if (error == 0) strlcpy(name, if_name(ifp), len); return (error); } +bool +ifc_modify_ifp_nl(struct ifnet *ifp, struct ifc_data_nl *ifd) +{ + struct if_clone *ifc = ifc_find_cloner(ifp->if_dname); + if (ifc == NULL) { + ifd->error = EINVAL; + return (false); + } + + ifd->error = (*ifc->modify_nl)(ifp, ifd); + return (true); +} + +bool +ifc_dump_ifp_nl(struct ifnet *ifp, struct nl_writer *nw) +{ + struct if_clone *ifc = ifc_find_cloner(ifp->if_dname); + if (ifc == NULL) + return (false); + + (*ifc->dump_nl)(ifp, nw); + return (true); +} + +static int +ifc_create_ifp_nl_default(struct if_clone *ifc, char *name, size_t len, + struct ifc_data_nl *ifd) +{ + struct ifc_data ifd_new = { + .flags = ifd->flags, + .unit = ifd->unit, + .params = ifd->params, + }; + + return ((*ifc->ifc_create)(ifc, name, len, &ifd_new, &ifd->ifp)); +} + +static int +ifc_modify_ifp_nl_default(struct ifnet *ifp, struct ifc_data_nl *ifd) +{ + if (ifd->lattrs != NULL) + return (nl_modify_ifp_generic(ifp, ifd->lattrs, ifd->bm, ifd->npt)); + return (0); +} + +static void +ifc_dump_ifp_nl_default(struct ifnet *ifp, struct nl_writer *nw) +{ + int off = nlattr_add_nested(nw, IFLA_LINKINFO); + + if (off != 0) { + nlattr_add_string(nw, IFLA_INFO_KIND, ifp->if_dname); + nlattr_set_len(nw, off); + } +} + void ifc_link_ifp(struct if_clone *ifc, struct ifnet *ifp) { if ((ifc->ifc_flags & IFC_NOGROUP) == 0) if_addgroup(ifp, ifc->ifc_name); IF_CLONE_LOCK(ifc); IFC_IFLIST_INSERT(ifc, ifp); IF_CLONE_UNLOCK(ifc); } void if_clone_addif(struct if_clone *ifc, struct ifnet *ifp) { ifc_link_ifp(ifc, ifp); } bool ifc_unlink_ifp(struct if_clone *ifc, struct ifnet *ifp) { struct ifnet *ifcifp; IF_CLONE_LOCK(ifc); LIST_FOREACH(ifcifp, &ifc->ifc_iflist, if_clones) { if (ifcifp == ifp) { IFC_IFLIST_REMOVE(ifc, ifp); break; } } IF_CLONE_UNLOCK(ifc); if (ifcifp != NULL && (ifc->ifc_flags & IFC_F_NOGROUP) == 0) if_delgroup(ifp, ifc->ifc_name); return (ifcifp != NULL); } static struct if_clone * ifc_find_cloner_match(const char *name) { struct if_clone *ifc; IF_CLONERS_LOCK(); LIST_FOREACH(ifc, &V_if_cloners, ifc_list) { if (ifc->ifc_match(ifc, name)) break; } IF_CLONERS_UNLOCK(); return (ifc); } static struct if_clone * ifc_find_cloner(const char *name) { struct if_clone *ifc; IF_CLONERS_LOCK(); LIST_FOREACH(ifc, &V_if_cloners, ifc_list) { if (strcmp(ifc->ifc_name, name) == 0) { break; } } IF_CLONERS_UNLOCK(); return (ifc); } static struct if_clone * ifc_find_cloner_in_vnet(const char *name, struct vnet *vnet) { CURVNET_SET_QUIET(vnet); struct if_clone *ifc = ifc_find_cloner(name); CURVNET_RESTORE(); return (ifc); } /* * Create a clone network interface. */ static int -if_clone_createif(struct if_clone *ifc, char *name, size_t len, - struct ifc_data *ifd, struct ifnet **ifpp) +if_clone_createif_nl(struct if_clone *ifc, const char *ifname, struct ifc_data_nl *ifd) { - int err, unit = 0; + char name[IFNAMSIZ]; + int error; + + strlcpy(name, ifname, sizeof(name)); if (ifunit(name) != NULL) return (EEXIST); if (ifc->ifc_flags & IFC_F_AUTOUNIT) { - if ((err = ifc_handle_unit(ifc, name, len, &unit)) != 0) - return (err); - ifd->unit = unit; + if ((error = ifc_handle_unit(ifc, name, sizeof(name), &ifd->unit)) != 0) + return (error); } - *ifpp = NULL; - err = (*ifc->ifc_create)(ifc, name, len, ifd, ifpp); - if (err == 0) { - MPASS(*ifpp != NULL); - if_clone_addif(ifc, *ifpp); - } else if (ifc->ifc_flags & IFC_F_AUTOUNIT) - ifc_free_unit(ifc, unit); + if (ifd->lattrs != NULL) + error = (*ifc->create_nl)(ifc, name, sizeof(name), ifd); + else + error = ifc_create_ifp_nl_default(ifc, name, sizeof(name), ifd); + if (error != 0) { + if (ifc->ifc_flags & IFC_F_AUTOUNIT) + ifc_free_unit(ifc, ifd->unit); + return (error); + } - return (err); + MPASS(ifd->ifp != NULL); + if_clone_addif(ifc, ifd->ifp); + + if (ifd->lattrs != NULL) + error = (*ifc->modify_nl)(ifd->ifp, ifd); + + return (error); } /* * Lookup and destroy a clone network interface. */ int if_clone_destroy(const char *name) { int err; struct if_clone *ifc; struct ifnet *ifp; ifp = ifunit_ref(name); if (ifp == NULL) return (ENXIO); ifc = ifc_find_cloner_in_vnet(ifp->if_dname, ifp->if_home_vnet); if (ifc == NULL) { if_rele(ifp); return (EINVAL); } err = if_clone_destroyif(ifc, ifp); if_rele(ifp); return err; } /* * Destroy a clone network interface. */ static int if_clone_destroyif_flags(struct if_clone *ifc, struct ifnet *ifp, uint32_t flags) { int err; /* * Given that the cloned ifnet might be attached to a different * vnet from where its cloner was registered, we have to * switch to the vnet context of the target vnet. */ CURVNET_SET_QUIET(ifp->if_vnet); if (!ifc_unlink_ifp(ifc, ifp)) { CURVNET_RESTORE(); return (ENXIO); /* ifp is not on the list. */ } int unit = ifp->if_dunit; err = (*ifc->ifc_destroy)(ifc, ifp, flags); if (err != 0) ifc_link_ifp(ifc, ifp); else if (ifc->ifc_flags & IFC_F_AUTOUNIT) ifc_free_unit(ifc, unit); CURVNET_RESTORE(); return (err); } int if_clone_destroyif(struct if_clone *ifc, struct ifnet *ifp) { return (if_clone_destroyif_flags(ifc, ifp, 0)); } static struct if_clone * if_clone_alloc(const char *name, int maxunit) { struct if_clone *ifc; KASSERT(name != NULL, ("%s: no name\n", __func__)); ifc = malloc(sizeof(struct if_clone), M_CLONE, M_WAITOK | M_ZERO); strncpy(ifc->ifc_name, name, IFCLOSIZ-1); IF_CLONE_LOCK_INIT(ifc); IF_CLONE_ADDREF(ifc); ifc->ifc_maxunit = maxunit ? maxunit : IF_MAXUNIT; ifc->ifc_unrhdr = new_unrhdr(0, ifc->ifc_maxunit, &ifc->ifc_mtx); LIST_INIT(&ifc->ifc_iflist); + ifc->create_nl = ifc_create_ifp_nl_default; + ifc->modify_nl = ifc_modify_ifp_nl_default; + ifc->dump_nl = ifc_dump_ifp_nl_default; + return (ifc); } static int if_clone_attach(struct if_clone *ifc) { struct if_clone *ifc1; IF_CLONERS_LOCK(); LIST_FOREACH(ifc1, &V_if_cloners, ifc_list) if (strcmp(ifc->ifc_name, ifc1->ifc_name) == 0) { IF_CLONERS_UNLOCK(); IF_CLONE_REMREF(ifc); return (EEXIST); } LIST_INSERT_HEAD(&V_if_cloners, ifc, ifc_list); V_if_cloners_count++; IF_CLONERS_UNLOCK(); return (0); } struct if_clone * ifc_attach_cloner(const char *name, struct if_clone_addreq *req) { if (req->create_f == NULL || req->destroy_f == NULL) return (NULL); if (strnlen(name, IFCLOSIZ) >= (IFCLOSIZ - 1)) return (NULL); struct if_clone *ifc = if_clone_alloc(name, req->maxunit); ifc->ifc_match = req->match_f != NULL ? req->match_f : ifc_simple_match; ifc->ifc_create = req->create_f; ifc->ifc_destroy = req->destroy_f; ifc->ifc_flags = (req->flags & (IFC_F_AUTOUNIT | IFC_F_NOGROUP)); + if (req->version == 2) { + struct if_clone_addreq_v2 *req2 = (struct if_clone_addreq_v2 *)req; + + ifc->create_nl = req2->create_nl_f; + ifc->modify_nl = req2->modify_nl_f; + ifc->dump_nl = req2->dump_nl_f; + } + + ifc->dump_nl = ifc_dump_ifp_nl_default; + if (if_clone_attach(ifc) != 0) return (NULL); EVENTHANDLER_INVOKE(if_clone_event, ifc); return (ifc); } void ifc_detach_cloner(struct if_clone *ifc) { if_clone_detach(ifc); } #ifdef CLONE_COMPAT_13 static int ifc_advanced_create_wrapper(struct if_clone *ifc, char *name, size_t maxlen, struct ifc_data *ifc_data, struct ifnet **ifpp) { int error = ifc->ifca_create(ifc, name, maxlen, ifc_data->params); if (error == 0) *ifpp = ifunit(name); return (error); } static int ifc_advanced_destroy_wrapper(struct if_clone *ifc, struct ifnet *ifp, uint32_t flags) { if (ifc->ifca_destroy == NULL) return (ENOTSUP); return (ifc->ifca_destroy(ifc, ifp)); } struct if_clone * if_clone_advanced(const char *name, u_int maxunit, ifc_match_t match, ifc_create_t create, ifc_destroy_t destroy) { struct if_clone *ifc; ifc = if_clone_alloc(name, maxunit); ifc->ifc_match = match; ifc->ifc_create = ifc_advanced_create_wrapper; ifc->ifc_destroy = ifc_advanced_destroy_wrapper; ifc->ifca_destroy = destroy; ifc->ifca_create = create; if (if_clone_attach(ifc) != 0) return (NULL); EVENTHANDLER_INVOKE(if_clone_event, ifc); return (ifc); } static int ifc_simple_create_wrapper(struct if_clone *ifc, char *name, size_t maxlen, struct ifc_data *ifc_data, struct ifnet **ifpp) { int unit = 0; ifc_name2unit(name, &unit); int error = ifc->ifcs_create(ifc, unit, ifc_data->params); if (error == 0) *ifpp = ifunit(name); return (error); } static int ifc_simple_destroy_wrapper(struct if_clone *ifc, struct ifnet *ifp, uint32_t flags) { if (ifp->if_dunit < ifc->ifcs_minifs && (flags & IFC_F_FORCE) == 0) return (EINVAL); ifc->ifcs_destroy(ifp); return (0); } struct if_clone * if_clone_simple(const char *name, ifcs_create_t create, ifcs_destroy_t destroy, u_int minifs) { struct if_clone *ifc; u_int unit; ifc = if_clone_alloc(name, 0); ifc->ifc_match = ifc_simple_match; ifc->ifc_create = ifc_simple_create_wrapper; ifc->ifc_destroy = ifc_simple_destroy_wrapper; ifc->ifcs_create = create; ifc->ifcs_destroy = destroy; ifc->ifcs_minifs = minifs; ifc->ifc_flags = IFC_F_AUTOUNIT; if (if_clone_attach(ifc) != 0) return (NULL); for (unit = 0; unit < minifs; unit++) { char name[IFNAMSIZ]; int error __unused; - struct ifc_data ifd = {}; - struct ifnet *ifp; + struct ifc_data_nl ifd = {}; snprintf(name, IFNAMSIZ, "%s%d", ifc->ifc_name, unit); - error = if_clone_createif(ifc, name, IFNAMSIZ, &ifd, &ifp); + error = if_clone_createif_nl(ifc, name, &ifd); KASSERT(error == 0, ("%s: failed to create required interface %s", __func__, name)); } EVENTHANDLER_INVOKE(if_clone_event, ifc); return (ifc); } #endif /* * Unregister a network interface cloner. */ void if_clone_detach(struct if_clone *ifc) { IF_CLONERS_LOCK(); LIST_REMOVE(ifc, ifc_list); V_if_cloners_count--; IF_CLONERS_UNLOCK(); /* destroy all interfaces for this cloner */ while (!LIST_EMPTY(&ifc->ifc_iflist)) if_clone_destroyif_flags(ifc, LIST_FIRST(&ifc->ifc_iflist), IFC_F_FORCE); IF_CLONE_REMREF(ifc); } static void if_clone_free(struct if_clone *ifc) { KASSERT(LIST_EMPTY(&ifc->ifc_iflist), ("%s: ifc_iflist not empty", __func__)); IF_CLONE_LOCK_DESTROY(ifc); delete_unrhdr(ifc->ifc_unrhdr); free(ifc, M_CLONE); } /* * Provide list of interface cloners to userspace. */ int if_clone_list(struct if_clonereq *ifcr) { char *buf, *dst, *outbuf = NULL; struct if_clone *ifc; int buf_count, count, err = 0; if (ifcr->ifcr_count < 0) return (EINVAL); IF_CLONERS_LOCK(); /* * Set our internal output buffer size. We could end up not * reporting a cloner that is added between the unlock and lock * below, but that's not a major problem. Not caping our * allocation to the number of cloners actually in the system * could be because that would let arbitrary users cause us to * allocate arbitrary amounts of kernel memory. */ buf_count = (V_if_cloners_count < ifcr->ifcr_count) ? V_if_cloners_count : ifcr->ifcr_count; IF_CLONERS_UNLOCK(); outbuf = malloc(IFNAMSIZ*buf_count, M_CLONE, M_WAITOK | M_ZERO); IF_CLONERS_LOCK(); ifcr->ifcr_total = V_if_cloners_count; if ((dst = ifcr->ifcr_buffer) == NULL) { /* Just asking how many there are. */ goto done; } count = (V_if_cloners_count < buf_count) ? V_if_cloners_count : buf_count; for (ifc = LIST_FIRST(&V_if_cloners), buf = outbuf; ifc != NULL && count != 0; ifc = LIST_NEXT(ifc, ifc_list), count--, buf += IFNAMSIZ) { strlcpy(buf, ifc->ifc_name, IFNAMSIZ); } done: IF_CLONERS_UNLOCK(); if (err == 0 && dst != NULL) err = copyout(outbuf, dst, buf_count*IFNAMSIZ); if (outbuf != NULL) free(outbuf, M_CLONE); return (err); } #ifdef VIMAGE /* * if_clone_restoregroup() is used in context of if_vmove(). * * Since if_detach_internal() has removed the interface from ALL groups, we * need to "restore" interface membership in the cloner's group. Note that * interface belongs to cloner in its home vnet, so we first find the original * cloner, and then we confirm that cloner with the same name exists in the * current vnet. */ void if_clone_restoregroup(struct ifnet *ifp) { struct if_clone *ifc; struct ifnet *ifcifp; char ifc_name[IFCLOSIZ] = { [0] = '\0' }; CURVNET_SET_QUIET(ifp->if_home_vnet); IF_CLONERS_LOCK(); LIST_FOREACH(ifc, &V_if_cloners, ifc_list) { IF_CLONE_LOCK(ifc); LIST_FOREACH(ifcifp, &ifc->ifc_iflist, if_clones) { if (ifp == ifcifp) { strncpy(ifc_name, ifc->ifc_name, IFCLOSIZ-1); break; } } IF_CLONE_UNLOCK(ifc); if (ifc_name[0] != '\0') break; } CURVNET_RESTORE(); LIST_FOREACH(ifc, &V_if_cloners, ifc_list) if (strcmp(ifc->ifc_name, ifc_name) == 0 && ((ifc->ifc_flags & IFC_NOGROUP) == 0)) break; IF_CLONERS_UNLOCK(); if (ifc != NULL) if_addgroup(ifp, ifc_name); } #endif /* * A utility function to extract unit numbers from interface names of * the form name###. * * Returns 0 on success and an error on failure. */ int ifc_name2unit(const char *name, int *unit) { const char *cp; int cutoff = INT_MAX / 10; int cutlim = INT_MAX % 10; for (cp = name; *cp != '\0' && (*cp < '0' || *cp > '9'); cp++) ; if (*cp == '\0') { *unit = -1; } else if (cp[0] == '0' && cp[1] != '\0') { /* Disallow leading zeroes. */ return (EINVAL); } else { for (*unit = 0; *cp != '\0'; cp++) { if (*cp < '0' || *cp > '9') { /* Bogus unit number. */ return (EINVAL); } if (*unit > cutoff || (*unit == cutoff && *cp - '0' > cutlim)) return (EINVAL); *unit = (*unit * 10) + (*cp - '0'); } } return (0); } static int ifc_alloc_unit_specific(struct if_clone *ifc, int *unit) { char name[IFNAMSIZ]; if (*unit > ifc->ifc_maxunit) return (ENOSPC); if (alloc_unr_specific(ifc->ifc_unrhdr, *unit) == -1) return (EEXIST); snprintf(name, IFNAMSIZ, "%s%d", ifc->ifc_name, *unit); if (ifunit(name) != NULL) { free_unr(ifc->ifc_unrhdr, *unit); return (EEXIST); } IF_CLONE_ADDREF(ifc); return (0); } static int ifc_alloc_unit_next(struct if_clone *ifc, int *unit) { int error; *unit = alloc_unr(ifc->ifc_unrhdr); if (*unit == -1) return (ENOSPC); free_unr(ifc->ifc_unrhdr, *unit); for (;;) { error = ifc_alloc_unit_specific(ifc, unit); if (error != EEXIST) break; (*unit)++; } return (error); } int ifc_alloc_unit(struct if_clone *ifc, int *unit) { if (*unit < 0) return (ifc_alloc_unit_next(ifc, unit)); else return (ifc_alloc_unit_specific(ifc, unit)); } void ifc_free_unit(struct if_clone *ifc, int unit) { free_unr(ifc->ifc_unrhdr, unit); IF_CLONE_REMREF(ifc); } static int ifc_simple_match(struct if_clone *ifc, const char *name) { const char *cp; int i; /* Match the name */ for (cp = name, i = 0; i < strlen(ifc->ifc_name); i++, cp++) { if (ifc->ifc_name[i] != *cp) return (0); } /* Make sure there's a unit number or nothing after the name */ for (; *cp != '\0'; cp++) { if (*cp < '0' || *cp > '9') return (0); } return (1); } static int ifc_handle_unit(struct if_clone *ifc, char *name, size_t len, int *punit) { char *dp; int wildcard; int unit; int err; err = ifc_name2unit(name, &unit); if (err != 0) return (err); wildcard = (unit < 0); err = ifc_alloc_unit(ifc, &unit); if (err != 0) return (err); /* In the wildcard case, we need to update the name. */ if (wildcard) { for (dp = name; *dp != '\0'; dp++); if (snprintf(dp, len - (dp-name), "%d", unit) > len - (dp-name) - 1) { /* * This can only be a programmer error and * there's no straightforward way to recover if * it happens. */ panic("if_clone_create(): interface name too long"); } } *punit = unit; return (0); } int ifc_copyin(const struct ifc_data *ifd, void *target, size_t len) { if (ifd->params == NULL) return (EINVAL); if (ifd->flags & IFC_F_SYSSPACE) { memcpy(target, ifd->params, len); return (0); } else return (copyin(ifd->params, target, len)); } const char * ifc_name(struct if_clone *ifc) { return (ifc->ifc_name); } void ifc_flags_set(struct if_clone *ifc, int flags) { ifc->ifc_flags = flags; } int ifc_flags_get(struct if_clone *ifc) { return (ifc->ifc_flags); } diff --git a/sys/net/if_clone.h b/sys/net/if_clone.h index 1d918a012a5b..8b52c375addb 100644 --- a/sys/net/if_clone.h +++ b/sys/net/if_clone.h @@ -1,128 +1,166 @@ /*- * SPDX-License-Identifier: BSD-3-Clause * * Copyright (c) 1982, 1986, 1989, 1993 * The Regents of the University of California. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * From: @(#)if.h 8.1 (Berkeley) 6/10/93 * $FreeBSD$ */ #ifndef _NET_IF_CLONE_H_ #define _NET_IF_CLONE_H_ #ifdef _KERNEL #include #define CLONE_COMPAT_13 struct if_clone; /* Public KPI */ struct ifc_data { uint32_t flags; uint32_t unit; /* Selected unit when IFC_C_AUTOUNIT set */ void *params; struct vnet *vnet; }; typedef int ifc_match_f(struct if_clone *ifc, const char *name); typedef int ifc_create_f(struct if_clone *ifc, char *name, size_t maxlen, struct ifc_data *ifd, struct ifnet **ifpp); typedef int ifc_destroy_f(struct if_clone *ifc, struct ifnet *ifp, uint32_t flags); +struct nl_parsed_link; +struct nlattr_bmask; +struct nl_pstate; +struct nl_writer; +struct ifc_data_nl { + struct nl_parsed_link *lattrs;/* (in) Parsed link attributes */ + const struct nlattr_bmask *bm; /* (in) Bitmask of set link attributes */ + struct nl_pstate *npt; /* (in) Netlink context */ + void *params;/* (in) (Compat) data from ioctl */ + uint32_t flags; /* (in) IFC_F flags */ + uint32_t unit; /* (in/out) Selected unit when IFC_C_AUTOUNIT set */ + int error; /* (out) Return error code */ + struct ifnet *ifp; /* (out) Returned ifp */ +}; + +typedef int ifc_create_nl_f(struct if_clone *ifc, char *name, size_t maxlen, + struct ifc_data_nl *ifd); +typedef int ifc_modify_nl_f(struct ifnet *ifp, struct ifc_data_nl *ifd); +typedef void ifc_dump_nl_f(struct ifnet *ifp, struct nl_writer *nw); + struct if_clone_addreq { uint16_t version; /* Always 0 for now */ uint16_t spare; uint32_t flags; uint32_t maxunit; /* Maximum allowed unit number */ ifc_match_f *match_f; ifc_create_f *create_f; ifc_destroy_f *destroy_f; }; +struct if_clone_addreq_v2 { + uint16_t version; /* 2 */ + uint16_t spare; + uint32_t flags; + uint32_t maxunit; /* Maximum allowed unit number */ + ifc_match_f *match_f; + ifc_create_f *create_f; + ifc_destroy_f *destroy_f; + ifc_create_nl_f *create_nl_f; + ifc_modify_nl_f *modify_nl_f; + ifc_dump_nl_f *dump_nl_f; +}; + + #define IFC_F_NOGROUP 0x01 /* Creation flag: don't add unit group */ #define IFC_F_AUTOUNIT 0x02 /* Creation flag: automatically select unit */ #define IFC_F_SYSSPACE 0x04 /* Cloner callback: params pointer is in kernel memory */ #define IFC_F_FORCE 0x08 /* Deletion flag: force interface deletion */ +#define IFC_F_CREATE 0x10 /* Creation flag: indicate creation request */ #define IFC_NOGROUP IFC_F_NOGROUP struct if_clone *ifc_attach_cloner(const char *name, struct if_clone_addreq *req); void ifc_detach_cloner(struct if_clone *ifc); -int ifc_create_ifp(const char *name, struct ifc_data *ifd, - struct ifnet **ifpp); +int ifc_create_ifp(const char *name, struct ifc_data *ifd, struct ifnet **ifpp); + +bool ifc_create_ifp_nl(const char *name, struct ifc_data_nl *ifd); +bool ifc_modify_ifp_nl(struct ifnet *ifp, struct ifc_data_nl *ifd); +bool ifc_dump_ifp_nl(struct ifnet *ifp, struct nl_writer *nw); void ifc_link_ifp(struct if_clone *ifc, struct ifnet *ifp); bool ifc_unlink_ifp(struct if_clone *ifc, struct ifnet *ifp); int ifc_copyin(const struct ifc_data *ifd, void *target, size_t len); #ifdef CLONE_COMPAT_13 /* Methods. */ typedef int ifc_match_t(struct if_clone *, const char *); typedef int ifc_create_t(struct if_clone *, char *, size_t, caddr_t); typedef int ifc_destroy_t(struct if_clone *, struct ifnet *); typedef int ifcs_create_t(struct if_clone *, int, caddr_t); typedef void ifcs_destroy_t(struct ifnet *); /* Interface cloner (de)allocating functions. */ struct if_clone * if_clone_advanced(const char *, u_int, ifc_match_t, ifc_create_t, ifc_destroy_t); struct if_clone * if_clone_simple(const char *, ifcs_create_t, ifcs_destroy_t, u_int); void if_clone_detach(struct if_clone *); #endif /* Unit (de)allocating functions. */ int ifc_name2unit(const char *name, int *unit); int ifc_alloc_unit(struct if_clone *, int *); void ifc_free_unit(struct if_clone *, int); const char *ifc_name(struct if_clone *); void ifc_flags_set(struct if_clone *, int flags); int ifc_flags_get(struct if_clone *); /* Interface clone event. */ typedef void (*if_clone_event_handler_t)(void *, struct if_clone *); EVENTHANDLER_DECLARE(if_clone_event, if_clone_event_handler_t); /* The below interfaces used only by net/if.c. */ void vnet_if_clone_init(void); int if_clone_create(char *, size_t, caddr_t); int if_clone_destroy(const char *); int if_clone_list(struct if_clonereq *); void if_clone_restoregroup(struct ifnet *); /* The below interfaces are used only by epair(4). */ void if_clone_addif(struct if_clone *, struct ifnet *); int if_clone_destroyif(struct if_clone *, struct ifnet *); #endif /* _KERNEL */ #endif /* !_NET_IF_CLONE_H_ */ diff --git a/sys/net/if_vlan.c b/sys/net/if_vlan.c index 0f2ded3f6040..f5b401c446ed 100644 --- a/sys/net/if_vlan.c +++ b/sys/net/if_vlan.c @@ -1,2346 +1,2520 @@ /*- * Copyright 1998 Massachusetts Institute of Technology * Copyright 2012 ADARA Networks, Inc. * Copyright 2017 Dell EMC Isilon * * Portions of this software were developed by Robert N. M. Watson under * contract to ADARA Networks, Inc. * * Permission to use, copy, modify, and distribute this software and * its documentation for any purpose and without fee is hereby * granted, provided that both the above copyright notice and this * permission notice appear in all copies, that both the above * copyright notice and this permission notice appear in all * supporting documentation, and that the name of M.I.T. not be used * in advertising or publicity pertaining to distribution of the * software without specific, written prior permission. M.I.T. makes * no representations about the suitability of this software for any * purpose. It is provided "as is" without express or implied * warranty. * * THIS SOFTWARE IS PROVIDED BY M.I.T. ``AS IS''. M.I.T. DISCLAIMS * ALL EXPRESS OR IMPLIED WARRANTIES WITH REGARD TO THIS SOFTWARE, * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. IN NO EVENT * SHALL M.I.T. BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ /* * if_vlan.c - pseudo-device driver for IEEE 802.1Q virtual LANs. * This is sort of sneaky in the implementation, since * we need to pretend to be enough of an Ethernet implementation * to make arp work. The way we do this is by telling everyone * that we are an Ethernet, and then catch the packets that * ether_output() sends to us via if_transmit(), rewrite them for * use by the real outgoing interface, and ask it to send them. */ #include __FBSDID("$FreeBSD$"); #include "opt_inet.h" #include "opt_inet6.h" #include "opt_kern_tls.h" +#include "opt_netlink.h" #include "opt_vlan.h" #include "opt_ratelimit.h" #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #ifdef INET #include #include #endif +#include +#include +#include +#include + #define VLAN_DEF_HWIDTH 4 #define VLAN_IFFLAGS (IFF_BROADCAST | IFF_MULTICAST) #define UP_AND_RUNNING(ifp) \ ((ifp)->if_flags & IFF_UP && (ifp)->if_drv_flags & IFF_DRV_RUNNING) CK_SLIST_HEAD(ifvlanhead, ifvlan); struct ifvlantrunk { struct ifnet *parent; /* parent interface of this trunk */ struct mtx lock; #ifdef VLAN_ARRAY #define VLAN_ARRAY_SIZE (EVL_VLID_MASK + 1) struct ifvlan *vlans[VLAN_ARRAY_SIZE]; /* static table */ #else struct ifvlanhead *hash; /* dynamic hash-list table */ uint16_t hmask; uint16_t hwidth; #endif int refcnt; }; #if defined(KERN_TLS) || defined(RATELIMIT) struct vlan_snd_tag { struct m_snd_tag com; struct m_snd_tag *tag; }; static inline struct vlan_snd_tag * mst_to_vst(struct m_snd_tag *mst) { return (__containerof(mst, struct vlan_snd_tag, com)); } #endif /* * This macro provides a facility to iterate over every vlan on a trunk with * the assumption that none will be added/removed during iteration. */ #ifdef VLAN_ARRAY #define VLAN_FOREACH(_ifv, _trunk) \ size_t _i; \ for (_i = 0; _i < VLAN_ARRAY_SIZE; _i++) \ if (((_ifv) = (_trunk)->vlans[_i]) != NULL) #else /* VLAN_ARRAY */ #define VLAN_FOREACH(_ifv, _trunk) \ struct ifvlan *_next; \ size_t _i; \ for (_i = 0; _i < (1 << (_trunk)->hwidth); _i++) \ CK_SLIST_FOREACH_SAFE((_ifv), &(_trunk)->hash[_i], ifv_list, _next) #endif /* VLAN_ARRAY */ /* * This macro provides a facility to iterate over every vlan on a trunk while * also modifying the number of vlans on the trunk. The iteration continues * until some condition is met or there are no more vlans on the trunk. */ #ifdef VLAN_ARRAY /* The VLAN_ARRAY case is simple -- just a for loop using the condition. */ #define VLAN_FOREACH_UNTIL_SAFE(_ifv, _trunk, _cond) \ size_t _i; \ for (_i = 0; !(_cond) && _i < VLAN_ARRAY_SIZE; _i++) \ if (((_ifv) = (_trunk)->vlans[_i])) #else /* VLAN_ARRAY */ /* * The hash table case is more complicated. We allow for the hash table to be * modified (i.e. vlans removed) while we are iterating over it. To allow for * this we must restart the iteration every time we "touch" something during * the iteration, since removal will resize the hash table and invalidate our * current position. If acting on the touched element causes the trunk to be * emptied, then iteration also stops. */ #define VLAN_FOREACH_UNTIL_SAFE(_ifv, _trunk, _cond) \ size_t _i; \ bool _touch = false; \ for (_i = 0; \ !(_cond) && _i < (1 << (_trunk)->hwidth); \ _i = (_touch && ((_trunk) != NULL) ? 0 : _i + 1), _touch = false) \ if (((_ifv) = CK_SLIST_FIRST(&(_trunk)->hash[_i])) != NULL && \ (_touch = true)) #endif /* VLAN_ARRAY */ struct vlan_mc_entry { struct sockaddr_dl mc_addr; CK_SLIST_ENTRY(vlan_mc_entry) mc_entries; struct epoch_context mc_epoch_ctx; }; struct ifvlan { struct ifvlantrunk *ifv_trunk; struct ifnet *ifv_ifp; #define TRUNK(ifv) ((ifv)->ifv_trunk) #define PARENT(ifv) (TRUNK(ifv)->parent) void *ifv_cookie; int ifv_pflags; /* special flags we have set on parent */ int ifv_capenable; int ifv_encaplen; /* encapsulation length */ int ifv_mtufudge; /* MTU fudged by this much */ int ifv_mintu; /* min transmission unit */ struct ether_8021q_tag ifv_qtag; #define ifv_proto ifv_qtag.proto #define ifv_vid ifv_qtag.vid #define ifv_pcp ifv_qtag.pcp struct task lladdr_task; CK_SLIST_HEAD(, vlan_mc_entry) vlan_mc_listhead; #ifndef VLAN_ARRAY CK_SLIST_ENTRY(ifvlan) ifv_list; #endif }; /* Special flags we should propagate to parent. */ static struct { int flag; int (*func)(struct ifnet *, int); } vlan_pflags[] = { {IFF_PROMISC, ifpromisc}, {IFF_ALLMULTI, if_allmulti}, {0, NULL} }; VNET_DECLARE(int, vlan_mtag_pcp); #define V_vlan_mtag_pcp VNET(vlan_mtag_pcp) static const char vlanname[] = "vlan"; static MALLOC_DEFINE(M_VLAN, vlanname, "802.1Q Virtual LAN Interface"); static eventhandler_tag ifdetach_tag; static eventhandler_tag iflladdr_tag; static eventhandler_tag ifevent_tag; /* * if_vlan uses two module-level synchronizations primitives to allow concurrent * modification of vlan interfaces and (mostly) allow for vlans to be destroyed * while they are being used for tx/rx. To accomplish this in a way that has * acceptable performance and cooperation with other parts of the network stack * there is a non-sleepable epoch(9) and an sx(9). * * The performance-sensitive paths that warrant using the epoch(9) are * vlan_transmit and vlan_input. Both have to check for the vlan interface's * existence using if_vlantrunk, and being in the network tx/rx paths the use * of an epoch(9) gives a measureable improvement in performance. * * The reason for having an sx(9) is mostly because there are still areas that * must be sleepable and also have safe concurrent access to a vlan interface. * Since the sx(9) exists, it is used by default in most paths unless sleeping * is not permitted, or if it is not clear whether sleeping is permitted. * */ #define _VLAN_SX_ID ifv_sx static struct sx _VLAN_SX_ID; #define VLAN_LOCKING_INIT() \ sx_init_flags(&_VLAN_SX_ID, "vlan_sx", SX_RECURSE) #define VLAN_LOCKING_DESTROY() \ sx_destroy(&_VLAN_SX_ID) #define VLAN_SLOCK() sx_slock(&_VLAN_SX_ID) #define VLAN_SUNLOCK() sx_sunlock(&_VLAN_SX_ID) #define VLAN_XLOCK() sx_xlock(&_VLAN_SX_ID) #define VLAN_XUNLOCK() sx_xunlock(&_VLAN_SX_ID) #define VLAN_SLOCK_ASSERT() sx_assert(&_VLAN_SX_ID, SA_SLOCKED) #define VLAN_XLOCK_ASSERT() sx_assert(&_VLAN_SX_ID, SA_XLOCKED) #define VLAN_SXLOCK_ASSERT() sx_assert(&_VLAN_SX_ID, SA_LOCKED) /* * We also have a per-trunk mutex that should be acquired when changing * its state. */ #define TRUNK_LOCK_INIT(trunk) mtx_init(&(trunk)->lock, vlanname, NULL, MTX_DEF) #define TRUNK_LOCK_DESTROY(trunk) mtx_destroy(&(trunk)->lock) #define TRUNK_WLOCK(trunk) mtx_lock(&(trunk)->lock) #define TRUNK_WUNLOCK(trunk) mtx_unlock(&(trunk)->lock) #define TRUNK_WLOCK_ASSERT(trunk) mtx_assert(&(trunk)->lock, MA_OWNED); /* * The VLAN_ARRAY substitutes the dynamic hash with a static array * with 4096 entries. In theory this can give a boost in processing, * however in practice it does not. Probably this is because the array * is too big to fit into CPU cache. */ #ifndef VLAN_ARRAY static void vlan_inithash(struct ifvlantrunk *trunk); static void vlan_freehash(struct ifvlantrunk *trunk); static int vlan_inshash(struct ifvlantrunk *trunk, struct ifvlan *ifv); static int vlan_remhash(struct ifvlantrunk *trunk, struct ifvlan *ifv); static void vlan_growhash(struct ifvlantrunk *trunk, int howmuch); static __inline struct ifvlan * vlan_gethash(struct ifvlantrunk *trunk, uint16_t vid); #endif static void trunk_destroy(struct ifvlantrunk *trunk); static void vlan_init(void *foo); static void vlan_input(struct ifnet *ifp, struct mbuf *m); static int vlan_ioctl(struct ifnet *ifp, u_long cmd, caddr_t addr); #if defined(KERN_TLS) || defined(RATELIMIT) static int vlan_snd_tag_alloc(struct ifnet *, union if_snd_tag_alloc_params *, struct m_snd_tag **); static int vlan_snd_tag_modify(struct m_snd_tag *, union if_snd_tag_modify_params *); static int vlan_snd_tag_query(struct m_snd_tag *, union if_snd_tag_query_params *); static void vlan_snd_tag_free(struct m_snd_tag *); static struct m_snd_tag *vlan_next_snd_tag(struct m_snd_tag *); static void vlan_ratelimit_query(struct ifnet *, struct if_ratelimit_query_results *); #endif static void vlan_qflush(struct ifnet *ifp); static int vlan_setflag(struct ifnet *ifp, int flag, int status, int (*func)(struct ifnet *, int)); static int vlan_setflags(struct ifnet *ifp, int status); static int vlan_setmulti(struct ifnet *ifp); static int vlan_transmit(struct ifnet *ifp, struct mbuf *m); #ifdef ALTQ static void vlan_altq_start(struct ifnet *ifp); static int vlan_altq_transmit(struct ifnet *ifp, struct mbuf *m); #endif static int vlan_output(struct ifnet *ifp, struct mbuf *m, const struct sockaddr *dst, struct route *ro); static void vlan_unconfig(struct ifnet *ifp); static void vlan_unconfig_locked(struct ifnet *ifp, int departing); static int vlan_config(struct ifvlan *ifv, struct ifnet *p, uint16_t tag, uint16_t proto); static void vlan_link_state(struct ifnet *ifp); static void vlan_capabilities(struct ifvlan *ifv); static void vlan_trunk_capabilities(struct ifnet *ifp); static struct ifnet *vlan_clone_match_ethervid(const char *, int *); static int vlan_clone_match(struct if_clone *, const char *); static int vlan_clone_create(struct if_clone *, char *, size_t, struct ifc_data *, struct ifnet **); static int vlan_clone_destroy(struct if_clone *, struct ifnet *, uint32_t); +static int vlan_clone_create_nl(struct if_clone *ifc, char *name, size_t len, + struct ifc_data_nl *ifd); +static int vlan_clone_modify_nl(struct ifnet *ifp, struct ifc_data_nl *ifd); +static void vlan_clone_dump_nl(struct ifnet *ifp, struct nl_writer *nw); + static void vlan_ifdetach(void *arg, struct ifnet *ifp); static void vlan_iflladdr(void *arg, struct ifnet *ifp); static void vlan_ifevent(void *arg, struct ifnet *ifp, int event); static void vlan_lladdr_fn(void *arg, int pending); static struct if_clone *vlan_cloner; #ifdef VIMAGE VNET_DEFINE_STATIC(struct if_clone *, vlan_cloner); #define V_vlan_cloner VNET(vlan_cloner) #endif #ifdef RATELIMIT static const struct if_snd_tag_sw vlan_snd_tag_ul_sw = { .snd_tag_modify = vlan_snd_tag_modify, .snd_tag_query = vlan_snd_tag_query, .snd_tag_free = vlan_snd_tag_free, .next_snd_tag = vlan_next_snd_tag, .type = IF_SND_TAG_TYPE_UNLIMITED }; static const struct if_snd_tag_sw vlan_snd_tag_rl_sw = { .snd_tag_modify = vlan_snd_tag_modify, .snd_tag_query = vlan_snd_tag_query, .snd_tag_free = vlan_snd_tag_free, .next_snd_tag = vlan_next_snd_tag, .type = IF_SND_TAG_TYPE_RATE_LIMIT }; #endif #ifdef KERN_TLS static const struct if_snd_tag_sw vlan_snd_tag_tls_sw = { .snd_tag_modify = vlan_snd_tag_modify, .snd_tag_query = vlan_snd_tag_query, .snd_tag_free = vlan_snd_tag_free, .next_snd_tag = vlan_next_snd_tag, .type = IF_SND_TAG_TYPE_TLS }; #ifdef RATELIMIT static const struct if_snd_tag_sw vlan_snd_tag_tls_rl_sw = { .snd_tag_modify = vlan_snd_tag_modify, .snd_tag_query = vlan_snd_tag_query, .snd_tag_free = vlan_snd_tag_free, .next_snd_tag = vlan_next_snd_tag, .type = IF_SND_TAG_TYPE_TLS_RATE_LIMIT }; #endif #endif static void vlan_mc_free(struct epoch_context *ctx) { struct vlan_mc_entry *mc = __containerof(ctx, struct vlan_mc_entry, mc_epoch_ctx); free(mc, M_VLAN); } #ifndef VLAN_ARRAY #define HASH(n, m) ((((n) >> 8) ^ ((n) >> 4) ^ (n)) & (m)) static void vlan_inithash(struct ifvlantrunk *trunk) { int i, n; /* * The trunk must not be locked here since we call malloc(M_WAITOK). * It is OK in case this function is called before the trunk struct * gets hooked up and becomes visible from other threads. */ KASSERT(trunk->hwidth == 0 && trunk->hash == NULL, ("%s: hash already initialized", __func__)); trunk->hwidth = VLAN_DEF_HWIDTH; n = 1 << trunk->hwidth; trunk->hmask = n - 1; trunk->hash = malloc(sizeof(struct ifvlanhead) * n, M_VLAN, M_WAITOK); for (i = 0; i < n; i++) CK_SLIST_INIT(&trunk->hash[i]); } static void vlan_freehash(struct ifvlantrunk *trunk) { #ifdef INVARIANTS int i; KASSERT(trunk->hwidth > 0, ("%s: hwidth not positive", __func__)); for (i = 0; i < (1 << trunk->hwidth); i++) KASSERT(CK_SLIST_EMPTY(&trunk->hash[i]), ("%s: hash table not empty", __func__)); #endif free(trunk->hash, M_VLAN); trunk->hash = NULL; trunk->hwidth = trunk->hmask = 0; } static int vlan_inshash(struct ifvlantrunk *trunk, struct ifvlan *ifv) { int i, b; struct ifvlan *ifv2; VLAN_XLOCK_ASSERT(); KASSERT(trunk->hwidth > 0, ("%s: hwidth not positive", __func__)); b = 1 << trunk->hwidth; i = HASH(ifv->ifv_vid, trunk->hmask); CK_SLIST_FOREACH(ifv2, &trunk->hash[i], ifv_list) if (ifv->ifv_vid == ifv2->ifv_vid) return (EEXIST); /* * Grow the hash when the number of vlans exceeds half of the number of * hash buckets squared. This will make the average linked-list length * buckets/2. */ if (trunk->refcnt > (b * b) / 2) { vlan_growhash(trunk, 1); i = HASH(ifv->ifv_vid, trunk->hmask); } CK_SLIST_INSERT_HEAD(&trunk->hash[i], ifv, ifv_list); trunk->refcnt++; return (0); } static int vlan_remhash(struct ifvlantrunk *trunk, struct ifvlan *ifv) { int i, b; struct ifvlan *ifv2; VLAN_XLOCK_ASSERT(); KASSERT(trunk->hwidth > 0, ("%s: hwidth not positive", __func__)); b = 1 << (trunk->hwidth - 1); i = HASH(ifv->ifv_vid, trunk->hmask); CK_SLIST_FOREACH(ifv2, &trunk->hash[i], ifv_list) if (ifv2 == ifv) { trunk->refcnt--; CK_SLIST_REMOVE(&trunk->hash[i], ifv2, ifvlan, ifv_list); if (trunk->refcnt < (b * b) / 2) vlan_growhash(trunk, -1); return (0); } panic("%s: vlan not found\n", __func__); return (ENOENT); /*NOTREACHED*/ } /* * Grow the hash larger or smaller if memory permits. */ static void vlan_growhash(struct ifvlantrunk *trunk, int howmuch) { struct ifvlan *ifv; struct ifvlanhead *hash2; int hwidth2, i, j, n, n2; VLAN_XLOCK_ASSERT(); KASSERT(trunk->hwidth > 0, ("%s: hwidth not positive", __func__)); if (howmuch == 0) { /* Harmless yet obvious coding error */ printf("%s: howmuch is 0\n", __func__); return; } hwidth2 = trunk->hwidth + howmuch; n = 1 << trunk->hwidth; n2 = 1 << hwidth2; /* Do not shrink the table below the default */ if (hwidth2 < VLAN_DEF_HWIDTH) return; hash2 = malloc(sizeof(struct ifvlanhead) * n2, M_VLAN, M_WAITOK); if (hash2 == NULL) { printf("%s: out of memory -- hash size not changed\n", __func__); return; /* We can live with the old hash table */ } for (j = 0; j < n2; j++) CK_SLIST_INIT(&hash2[j]); for (i = 0; i < n; i++) while ((ifv = CK_SLIST_FIRST(&trunk->hash[i])) != NULL) { CK_SLIST_REMOVE(&trunk->hash[i], ifv, ifvlan, ifv_list); j = HASH(ifv->ifv_vid, n2 - 1); CK_SLIST_INSERT_HEAD(&hash2[j], ifv, ifv_list); } NET_EPOCH_WAIT(); free(trunk->hash, M_VLAN); trunk->hash = hash2; trunk->hwidth = hwidth2; trunk->hmask = n2 - 1; if (bootverbose) if_printf(trunk->parent, "VLAN hash table resized from %d to %d buckets\n", n, n2); } static __inline struct ifvlan * vlan_gethash(struct ifvlantrunk *trunk, uint16_t vid) { struct ifvlan *ifv; NET_EPOCH_ASSERT(); CK_SLIST_FOREACH(ifv, &trunk->hash[HASH(vid, trunk->hmask)], ifv_list) if (ifv->ifv_vid == vid) return (ifv); return (NULL); } #if 0 /* Debugging code to view the hashtables. */ static void vlan_dumphash(struct ifvlantrunk *trunk) { int i; struct ifvlan *ifv; for (i = 0; i < (1 << trunk->hwidth); i++) { printf("%d: ", i); CK_SLIST_FOREACH(ifv, &trunk->hash[i], ifv_list) printf("%s ", ifv->ifv_ifp->if_xname); printf("\n"); } } #endif /* 0 */ #else static __inline struct ifvlan * vlan_gethash(struct ifvlantrunk *trunk, uint16_t vid) { return trunk->vlans[vid]; } static __inline int vlan_inshash(struct ifvlantrunk *trunk, struct ifvlan *ifv) { if (trunk->vlans[ifv->ifv_vid] != NULL) return EEXIST; trunk->vlans[ifv->ifv_vid] = ifv; trunk->refcnt++; return (0); } static __inline int vlan_remhash(struct ifvlantrunk *trunk, struct ifvlan *ifv) { trunk->vlans[ifv->ifv_vid] = NULL; trunk->refcnt--; return (0); } static __inline void vlan_freehash(struct ifvlantrunk *trunk) { } static __inline void vlan_inithash(struct ifvlantrunk *trunk) { } #endif /* !VLAN_ARRAY */ static void trunk_destroy(struct ifvlantrunk *trunk) { VLAN_XLOCK_ASSERT(); vlan_freehash(trunk); trunk->parent->if_vlantrunk = NULL; TRUNK_LOCK_DESTROY(trunk); if_rele(trunk->parent); free(trunk, M_VLAN); } /* * Program our multicast filter. What we're actually doing is * programming the multicast filter of the parent. This has the * side effect of causing the parent interface to receive multicast * traffic that it doesn't really want, which ends up being discarded * later by the upper protocol layers. Unfortunately, there's no way * to avoid this: there really is only one physical interface. */ static int vlan_setmulti(struct ifnet *ifp) { struct ifnet *ifp_p; struct ifmultiaddr *ifma; struct ifvlan *sc; struct vlan_mc_entry *mc; int error; VLAN_XLOCK_ASSERT(); /* Find the parent. */ sc = ifp->if_softc; ifp_p = PARENT(sc); CURVNET_SET_QUIET(ifp_p->if_vnet); /* First, remove any existing filter entries. */ while ((mc = CK_SLIST_FIRST(&sc->vlan_mc_listhead)) != NULL) { CK_SLIST_REMOVE_HEAD(&sc->vlan_mc_listhead, mc_entries); (void)if_delmulti(ifp_p, (struct sockaddr *)&mc->mc_addr); NET_EPOCH_CALL(vlan_mc_free, &mc->mc_epoch_ctx); } /* Now program new ones. */ IF_ADDR_WLOCK(ifp); CK_STAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) { if (ifma->ifma_addr->sa_family != AF_LINK) continue; mc = malloc(sizeof(struct vlan_mc_entry), M_VLAN, M_NOWAIT); if (mc == NULL) { IF_ADDR_WUNLOCK(ifp); CURVNET_RESTORE(); return (ENOMEM); } bcopy(ifma->ifma_addr, &mc->mc_addr, ifma->ifma_addr->sa_len); mc->mc_addr.sdl_index = ifp_p->if_index; CK_SLIST_INSERT_HEAD(&sc->vlan_mc_listhead, mc, mc_entries); } IF_ADDR_WUNLOCK(ifp); CK_SLIST_FOREACH (mc, &sc->vlan_mc_listhead, mc_entries) { error = if_addmulti(ifp_p, (struct sockaddr *)&mc->mc_addr, NULL); if (error) { CURVNET_RESTORE(); return (error); } } CURVNET_RESTORE(); return (0); } /* * A handler for interface ifnet events. */ static void vlan_ifevent(void *arg __unused, struct ifnet *ifp, int event) { struct epoch_tracker et; struct ifvlan *ifv; struct ifvlantrunk *trunk; if (event != IFNET_EVENT_UPDATE_BAUDRATE) return; NET_EPOCH_ENTER(et); trunk = ifp->if_vlantrunk; if (trunk == NULL) { NET_EPOCH_EXIT(et); return; } TRUNK_WLOCK(trunk); VLAN_FOREACH(ifv, trunk) { ifv->ifv_ifp->if_baudrate = ifp->if_baudrate; } TRUNK_WUNLOCK(trunk); NET_EPOCH_EXIT(et); } /* * A handler for parent interface link layer address changes. * If the parent interface link layer address is changed we * should also change it on all children vlans. */ static void vlan_iflladdr(void *arg __unused, struct ifnet *ifp) { struct epoch_tracker et; struct ifvlan *ifv; struct ifnet *ifv_ifp; struct ifvlantrunk *trunk; struct sockaddr_dl *sdl; /* Need the epoch since this is run on taskqueue_swi. */ NET_EPOCH_ENTER(et); trunk = ifp->if_vlantrunk; if (trunk == NULL) { NET_EPOCH_EXIT(et); return; } /* * OK, it's a trunk. Loop over and change all vlan's lladdrs on it. * We need an exclusive lock here to prevent concurrent SIOCSIFLLADDR * ioctl calls on the parent garbling the lladdr of the child vlan. */ TRUNK_WLOCK(trunk); VLAN_FOREACH(ifv, trunk) { /* * Copy new new lladdr into the ifv_ifp, enqueue a task * to actually call if_setlladdr. if_setlladdr needs to * be deferred to a taskqueue because it will call into * the if_vlan ioctl path and try to acquire the global * lock. */ ifv_ifp = ifv->ifv_ifp; bcopy(IF_LLADDR(ifp), IF_LLADDR(ifv_ifp), ifp->if_addrlen); sdl = (struct sockaddr_dl *)ifv_ifp->if_addr->ifa_addr; sdl->sdl_alen = ifp->if_addrlen; taskqueue_enqueue(taskqueue_thread, &ifv->lladdr_task); } TRUNK_WUNLOCK(trunk); NET_EPOCH_EXIT(et); } /* * A handler for network interface departure events. * Track departure of trunks here so that we don't access invalid * pointers or whatever if a trunk is ripped from under us, e.g., * by ejecting its hot-plug card. However, if an ifnet is simply * being renamed, then there's no need to tear down the state. */ static void vlan_ifdetach(void *arg __unused, struct ifnet *ifp) { struct ifvlan *ifv; struct ifvlantrunk *trunk; /* If the ifnet is just being renamed, don't do anything. */ if (ifp->if_flags & IFF_RENAMING) return; VLAN_XLOCK(); trunk = ifp->if_vlantrunk; if (trunk == NULL) { VLAN_XUNLOCK(); return; } /* * OK, it's a trunk. Loop over and detach all vlan's on it. * Check trunk pointer after each vlan_unconfig() as it will * free it and set to NULL after the last vlan was detached. */ VLAN_FOREACH_UNTIL_SAFE(ifv, ifp->if_vlantrunk, ifp->if_vlantrunk == NULL) vlan_unconfig_locked(ifv->ifv_ifp, 1); /* Trunk should have been destroyed in vlan_unconfig(). */ KASSERT(ifp->if_vlantrunk == NULL, ("%s: purge failed", __func__)); VLAN_XUNLOCK(); } /* * Return the trunk device for a virtual interface. */ static struct ifnet * vlan_trunkdev(struct ifnet *ifp) { struct ifvlan *ifv; NET_EPOCH_ASSERT(); if (ifp->if_type != IFT_L2VLAN) return (NULL); ifv = ifp->if_softc; ifp = NULL; if (ifv->ifv_trunk) ifp = PARENT(ifv); return (ifp); } /* * Return the 12-bit VLAN VID for this interface, for use by external * components such as Infiniband. * * XXXRW: Note that the function name here is historical; it should be named * vlan_vid(). */ static int vlan_tag(struct ifnet *ifp, uint16_t *vidp) { struct ifvlan *ifv; if (ifp->if_type != IFT_L2VLAN) return (EINVAL); ifv = ifp->if_softc; *vidp = ifv->ifv_vid; return (0); } static int vlan_pcp(struct ifnet *ifp, uint16_t *pcpp) { struct ifvlan *ifv; if (ifp->if_type != IFT_L2VLAN) return (EINVAL); ifv = ifp->if_softc; *pcpp = ifv->ifv_pcp; return (0); } /* * Return a driver specific cookie for this interface. Synchronization * with setcookie must be provided by the driver. */ static void * vlan_cookie(struct ifnet *ifp) { struct ifvlan *ifv; if (ifp->if_type != IFT_L2VLAN) return (NULL); ifv = ifp->if_softc; return (ifv->ifv_cookie); } /* * Store a cookie in our softc that drivers can use to store driver * private per-instance data in. */ static int vlan_setcookie(struct ifnet *ifp, void *cookie) { struct ifvlan *ifv; if (ifp->if_type != IFT_L2VLAN) return (EINVAL); ifv = ifp->if_softc; ifv->ifv_cookie = cookie; return (0); } /* * Return the vlan device present at the specific VID. */ static struct ifnet * vlan_devat(struct ifnet *ifp, uint16_t vid) { struct ifvlantrunk *trunk; struct ifvlan *ifv; NET_EPOCH_ASSERT(); trunk = ifp->if_vlantrunk; if (trunk == NULL) return (NULL); ifp = NULL; ifv = vlan_gethash(trunk, vid); if (ifv) ifp = ifv->ifv_ifp; return (ifp); } /* * VLAN support can be loaded as a module. The only place in the * system that's intimately aware of this is ether_input. We hook * into this code through vlan_input_p which is defined there and * set here. No one else in the system should be aware of this so * we use an explicit reference here. */ extern void (*vlan_input_p)(struct ifnet *, struct mbuf *); /* For if_link_state_change() eyes only... */ extern void (*vlan_link_state_p)(struct ifnet *); -static struct if_clone_addreq vlan_addreq = { +static struct if_clone_addreq_v2 vlan_addreq = { + .version = 2, .match_f = vlan_clone_match, .create_f = vlan_clone_create, .destroy_f = vlan_clone_destroy, + .create_nl_f = vlan_clone_create_nl, + .modify_nl_f = vlan_clone_modify_nl, + .dump_nl_f = vlan_clone_dump_nl, }; static int vlan_modevent(module_t mod, int type, void *data) { switch (type) { case MOD_LOAD: ifdetach_tag = EVENTHANDLER_REGISTER(ifnet_departure_event, vlan_ifdetach, NULL, EVENTHANDLER_PRI_ANY); if (ifdetach_tag == NULL) return (ENOMEM); iflladdr_tag = EVENTHANDLER_REGISTER(iflladdr_event, vlan_iflladdr, NULL, EVENTHANDLER_PRI_ANY); if (iflladdr_tag == NULL) return (ENOMEM); ifevent_tag = EVENTHANDLER_REGISTER(ifnet_event, vlan_ifevent, NULL, EVENTHANDLER_PRI_ANY); if (ifevent_tag == NULL) return (ENOMEM); VLAN_LOCKING_INIT(); vlan_input_p = vlan_input; vlan_link_state_p = vlan_link_state; vlan_trunk_cap_p = vlan_trunk_capabilities; vlan_trunkdev_p = vlan_trunkdev; vlan_cookie_p = vlan_cookie; vlan_setcookie_p = vlan_setcookie; vlan_tag_p = vlan_tag; vlan_pcp_p = vlan_pcp; vlan_devat_p = vlan_devat; #ifndef VIMAGE - vlan_cloner = ifc_attach_cloner(vlanname, &vlan_addreq); + vlan_cloner = ifc_attach_cloner(vlanname, (struct if_clone_addreq *)&vlan_addreq); #endif if (bootverbose) printf("vlan: initialized, using " #ifdef VLAN_ARRAY "full-size arrays" #else "hash tables with chaining" #endif "\n"); break; case MOD_UNLOAD: #ifndef VIMAGE ifc_detach_cloner(vlan_cloner); #endif EVENTHANDLER_DEREGISTER(ifnet_departure_event, ifdetach_tag); EVENTHANDLER_DEREGISTER(iflladdr_event, iflladdr_tag); EVENTHANDLER_DEREGISTER(ifnet_event, ifevent_tag); vlan_input_p = NULL; vlan_link_state_p = NULL; vlan_trunk_cap_p = NULL; vlan_trunkdev_p = NULL; vlan_tag_p = NULL; vlan_cookie_p = NULL; vlan_setcookie_p = NULL; vlan_devat_p = NULL; VLAN_LOCKING_DESTROY(); if (bootverbose) printf("vlan: unloaded\n"); break; default: return (EOPNOTSUPP); } return (0); } static moduledata_t vlan_mod = { "if_vlan", vlan_modevent, 0 }; DECLARE_MODULE(if_vlan, vlan_mod, SI_SUB_PSEUDO, SI_ORDER_ANY); MODULE_VERSION(if_vlan, 3); #ifdef VIMAGE static void vnet_vlan_init(const void *unused __unused) { - vlan_cloner = ifc_attach_cloner(vlanname, &vlan_addreq); + vlan_cloner = ifc_attach_cloner(vlanname, (struct if_clone_addreq *)&vlan_addreq); V_vlan_cloner = vlan_cloner; } VNET_SYSINIT(vnet_vlan_init, SI_SUB_PROTO_IFATTACHDOMAIN, SI_ORDER_ANY, vnet_vlan_init, NULL); static void vnet_vlan_uninit(const void *unused __unused) { ifc_detach_cloner(V_vlan_cloner); } VNET_SYSUNINIT(vnet_vlan_uninit, SI_SUB_INIT_IF, SI_ORDER_ANY, vnet_vlan_uninit, NULL); #endif /* * Check for .[. ...] style interface names. */ static struct ifnet * vlan_clone_match_ethervid(const char *name, int *vidp) { char ifname[IFNAMSIZ]; char *cp; struct ifnet *ifp; int vid; strlcpy(ifname, name, IFNAMSIZ); if ((cp = strrchr(ifname, '.')) == NULL) return (NULL); *cp = '\0'; if ((ifp = ifunit_ref(ifname)) == NULL) return (NULL); /* Parse VID. */ if (*++cp == '\0') { if_rele(ifp); return (NULL); } vid = 0; for(; *cp >= '0' && *cp <= '9'; cp++) vid = (vid * 10) + (*cp - '0'); if (*cp != '\0') { if_rele(ifp); return (NULL); } if (vidp != NULL) *vidp = vid; return (ifp); } static int vlan_clone_match(struct if_clone *ifc, const char *name) { struct ifnet *ifp; const char *cp; ifp = vlan_clone_match_ethervid(name, NULL); if (ifp != NULL) { if_rele(ifp); return (1); } if (strncmp(vlanname, name, strlen(vlanname)) != 0) return (0); for (cp = name + 4; *cp != '\0'; cp++) { if (*cp < '0' || *cp > '9') return (0); } return (1); } static int vlan_clone_create(struct if_clone *ifc, char *name, size_t len, struct ifc_data *ifd, struct ifnet **ifpp) { char *dp; bool wildcard = false; bool subinterface = false; int unit; int error; int vid = 0; uint16_t proto = ETHERTYPE_VLAN; struct ifvlan *ifv; struct ifnet *ifp; struct ifnet *p = NULL; struct ifaddr *ifa; struct sockaddr_dl *sdl; struct vlanreq vlr; static const u_char eaddr[ETHER_ADDR_LEN]; /* 00:00:00:00:00:00 */ /* * There are three ways to specify the cloned device: * o pass a parameter block with the clone request. * o specify parameters in the text of the clone device name * o specify no parameters and get an unattached device that * must be configured separately. * The first technique is preferred; the latter two are supported * for backwards compatibility. * * XXXRW: Note historic use of the word "tag" here. New ioctls may be * called for. */ if (ifd->params != NULL) { error = ifc_copyin(ifd, &vlr, sizeof(vlr)); if (error) return error; vid = vlr.vlr_tag; proto = vlr.vlr_proto; #ifdef COMPAT_FREEBSD12 if (proto == 0) proto = ETHERTYPE_VLAN; #endif p = ifunit_ref(vlr.vlr_parent); if (p == NULL) return (ENXIO); } if ((error = ifc_name2unit(name, &unit)) == 0) { /* * vlanX interface. Set wildcard to true if the unit number * is not fixed (-1) */ wildcard = (unit < 0); } else { struct ifnet *p_tmp = vlan_clone_match_ethervid(name, &vid); if (p_tmp != NULL) { error = 0; subinterface = true; unit = IF_DUNIT_NONE; wildcard = false; if (p != NULL) { if_rele(p_tmp); if (p != p_tmp) error = EINVAL; } else p = p_tmp; } else error = ENXIO; } if (error != 0) { if (p != NULL) if_rele(p); return (error); } if (!subinterface) { /* vlanX interface, mark X as busy or allocate new unit # */ error = ifc_alloc_unit(ifc, &unit); if (error != 0) { if (p != NULL) if_rele(p); return (error); } } /* In the wildcard case, we need to update the name. */ if (wildcard) { for (dp = name; *dp != '\0'; dp++); if (snprintf(dp, len - (dp-name), "%d", unit) > len - (dp-name) - 1) { panic("%s: interface name too long", __func__); } } ifv = malloc(sizeof(struct ifvlan), M_VLAN, M_WAITOK | M_ZERO); ifp = ifv->ifv_ifp = if_alloc(IFT_ETHER); if (ifp == NULL) { if (!subinterface) ifc_free_unit(ifc, unit); free(ifv, M_VLAN); if (p != NULL) if_rele(p); return (ENOSPC); } CK_SLIST_INIT(&ifv->vlan_mc_listhead); ifp->if_softc = ifv; /* * Set the name manually rather than using if_initname because * we don't conform to the default naming convention for interfaces. */ strlcpy(ifp->if_xname, name, IFNAMSIZ); ifp->if_dname = vlanname; ifp->if_dunit = unit; ifp->if_init = vlan_init; #ifdef ALTQ ifp->if_start = vlan_altq_start; ifp->if_transmit = vlan_altq_transmit; IFQ_SET_MAXLEN(&ifp->if_snd, ifqmaxlen); ifp->if_snd.ifq_drv_maxlen = 0; IFQ_SET_READY(&ifp->if_snd); #else ifp->if_transmit = vlan_transmit; #endif ifp->if_qflush = vlan_qflush; ifp->if_ioctl = vlan_ioctl; #if defined(KERN_TLS) || defined(RATELIMIT) ifp->if_snd_tag_alloc = vlan_snd_tag_alloc; ifp->if_ratelimit_query = vlan_ratelimit_query; #endif ifp->if_flags = VLAN_IFFLAGS; ether_ifattach(ifp, eaddr); /* Now undo some of the damage... */ ifp->if_baudrate = 0; ifp->if_type = IFT_L2VLAN; ifp->if_hdrlen = ETHER_VLAN_ENCAP_LEN; ifa = ifp->if_addr; sdl = (struct sockaddr_dl *)ifa->ifa_addr; sdl->sdl_type = IFT_L2VLAN; if (p != NULL) { error = vlan_config(ifv, p, vid, proto); if_rele(p); if (error != 0) { /* * Since we've partially failed, we need to back * out all the way, otherwise userland could get * confused. Thus, we destroy the interface. */ ether_ifdetach(ifp); vlan_unconfig(ifp); if_free(ifp); if (!subinterface) ifc_free_unit(ifc, unit); free(ifv, M_VLAN); return (error); } } *ifpp = ifp; return (0); } +/* + * + * Parsers of IFLA_INFO_DATA inside IFLA_LINKINFO of RTM_NEWLINK + * {{nla_len=8, nla_type=IFLA_LINK}, 2}, + * {{nla_len=12, nla_type=IFLA_IFNAME}, "xvlan22"}, + * {{nla_len=24, nla_type=IFLA_LINKINFO}, + * [ + * {{nla_len=8, nla_type=IFLA_INFO_KIND}, "vlan"...}, + * {{nla_len=12, nla_type=IFLA_INFO_DATA}, "\x06\x00\x01\x00\x16\x00\x00\x00"}]} + */ + +struct nl_parsed_vlan { + uint16_t vlan_id; + uint16_t vlan_proto; + struct ifla_vlan_flags vlan_flags; +}; + +#define _OUT(_field) offsetof(struct nl_parsed_vlan, _field) +static const struct nlattr_parser nla_p_vlan[] = { + { .type = IFLA_VLAN_ID, .off = _OUT(vlan_id), .cb = nlattr_get_uint16 }, + { .type = IFLA_VLAN_FLAGS, .off = _OUT(vlan_flags), .cb = nlattr_get_nla }, + { .type = IFLA_VLAN_PROTOCOL, .off = _OUT(vlan_proto), .cb = nlattr_get_uint16 }, +}; +#undef _OUT +NL_DECLARE_ATTR_PARSER(vlan_parser, nla_p_vlan); + +static int +vlan_clone_create_nl(struct if_clone *ifc, char *name, size_t len, + struct ifc_data_nl *ifd) +{ + struct epoch_tracker et; + struct ifnet *ifp_parent; + struct nl_pstate *npt = ifd->npt; + struct nl_parsed_link *lattrs = ifd->lattrs; + int error; + + /* + * lattrs.ifla_ifname is the new interface name + * lattrs.ifi_index contains parent interface index + * lattrs.ifla_idata contains un-parsed vlan data + */ + struct nl_parsed_vlan attrs = { + .vlan_id = 0xFEFE, + .vlan_proto = ETHERTYPE_VLAN + }; + + if (lattrs->ifla_idata == NULL) { + nlmsg_report_err_msg(npt, "vlan id is required, guessing not supported"); + return (ENOTSUP); + } + + error = nl_parse_nested(lattrs->ifla_idata, &vlan_parser, npt, &attrs); + if (error != 0) + return (error); + if (attrs.vlan_id > 4095) { + nlmsg_report_err_msg(npt, "Invalid VID: %d", attrs.vlan_id); + return (EINVAL); + } + if (attrs.vlan_proto != ETHERTYPE_VLAN && attrs.vlan_proto != ETHERTYPE_QINQ) { + nlmsg_report_err_msg(npt, "Unsupported ethertype: 0x%04X", attrs.vlan_proto); + return (ENOTSUP); + } + + struct vlanreq params = { + .vlr_tag = attrs.vlan_id, + .vlr_proto = attrs.vlan_proto, + }; + struct ifc_data ifd_new = { .flags = IFC_F_SYSSPACE, .unit = ifd->unit, .params = ¶ms }; + + NET_EPOCH_ENTER(et); + ifp_parent = ifnet_byindex(lattrs->ifi_index); + if (ifp_parent != NULL) + strlcpy(params.vlr_parent, if_name(ifp_parent), sizeof(params.vlr_parent)); + NET_EPOCH_EXIT(et); + + if (ifp_parent == NULL) { + nlmsg_report_err_msg(npt, "unable to find parent interface %u", lattrs->ifi_index); + return (ENOENT); + } + + error = vlan_clone_create(ifc, name, len, &ifd_new, &ifd->ifp); + + return (error); +} + +static int +vlan_clone_modify_nl(struct ifnet *ifp, struct ifc_data_nl *ifd) +{ + struct nl_parsed_link *lattrs = ifd->lattrs; + + if ((lattrs->ifla_idata != NULL) && ((ifd->flags & IFC_F_CREATE) == 0)) { + struct epoch_tracker et; + struct nl_parsed_vlan attrs = { + .vlan_proto = ETHERTYPE_VLAN, + }; + int error; + + error = nl_parse_nested(lattrs->ifla_idata, &vlan_parser, ifd->npt, &attrs); + if (error != 0) + return (error); + + NET_EPOCH_ENTER(et); + struct ifnet *ifp_parent = ifnet_byindex_ref(lattrs->ifla_link); + NET_EPOCH_EXIT(et); + + if (ifp_parent == NULL) { + nlmsg_report_err_msg(ifd->npt, "unable to find parent interface %u", + lattrs->ifla_link); + return (ENOENT); + } + + struct ifvlan *ifv = ifp->if_softc; + error = vlan_config(ifv, ifp_parent, attrs.vlan_id, attrs.vlan_proto); + + if_rele(ifp_parent); + if (error != 0) + return (error); + } + + return (nl_modify_ifp_generic(ifp, ifd->lattrs, ifd->bm, ifd->npt)); +} + +/* + * {{nla_len=24, nla_type=IFLA_LINKINFO}, + * [ + * {{nla_len=8, nla_type=IFLA_INFO_KIND}, "vlan"...}, + * {{nla_len=12, nla_type=IFLA_INFO_DATA}, "\x06\x00\x01\x00\x16\x00\x00\x00"}]} + */ +static void +vlan_clone_dump_nl(struct ifnet *ifp, struct nl_writer *nw) +{ + uint32_t parent_index = 0; + uint16_t vlan_id = 0; + uint16_t vlan_proto = 0; + + VLAN_SLOCK(); + struct ifvlan *ifv = ifp->if_softc; + if (TRUNK(ifv) != NULL) + parent_index = PARENT(ifv)->if_index; + vlan_id = ifv->ifv_vid; + vlan_proto = ifv->ifv_proto; + VLAN_SUNLOCK(); + + if (parent_index != 0) + nlattr_add_u32(nw, IFLA_LINK, parent_index); + + int off = nlattr_add_nested(nw, IFLA_LINKINFO); + if (off != 0) { + nlattr_add_string(nw, IFLA_INFO_KIND, "vlan"); + int off2 = nlattr_add_nested(nw, IFLA_INFO_DATA); + if (off2 != 0) { + nlattr_add_u16(nw, IFLA_VLAN_ID, vlan_id); + nlattr_add_u16(nw, IFLA_VLAN_PROTOCOL, vlan_proto); + nlattr_set_len(nw, off2); + } + nlattr_set_len(nw, off); + } +} + static int vlan_clone_destroy(struct if_clone *ifc, struct ifnet *ifp, uint32_t flags) { struct ifvlan *ifv = ifp->if_softc; int unit = ifp->if_dunit; if (ifp->if_vlantrunk) return (EBUSY); #ifdef ALTQ IFQ_PURGE(&ifp->if_snd); #endif ether_ifdetach(ifp); /* first, remove it from system-wide lists */ vlan_unconfig(ifp); /* now it can be unconfigured and freed */ /* * We should have the only reference to the ifv now, so we can now * drain any remaining lladdr task before freeing the ifnet and the * ifvlan. */ taskqueue_drain(taskqueue_thread, &ifv->lladdr_task); NET_EPOCH_WAIT(); if_free(ifp); free(ifv, M_VLAN); if (unit != IF_DUNIT_NONE) ifc_free_unit(ifc, unit); return (0); } /* * The ifp->if_init entry point for vlan(4) is a no-op. */ static void vlan_init(void *foo __unused) { } /* * The if_transmit method for vlan(4) interface. */ static int vlan_transmit(struct ifnet *ifp, struct mbuf *m) { struct ifvlan *ifv; struct ifnet *p; int error, len, mcast; NET_EPOCH_ASSERT(); ifv = ifp->if_softc; if (TRUNK(ifv) == NULL) { if_inc_counter(ifp, IFCOUNTER_OERRORS, 1); m_freem(m); return (ENETDOWN); } p = PARENT(ifv); len = m->m_pkthdr.len; mcast = (m->m_flags & (M_MCAST | M_BCAST)) ? 1 : 0; BPF_MTAP(ifp, m); #if defined(KERN_TLS) || defined(RATELIMIT) if (m->m_pkthdr.csum_flags & CSUM_SND_TAG) { struct vlan_snd_tag *vst; struct m_snd_tag *mst; MPASS(m->m_pkthdr.snd_tag->ifp == ifp); mst = m->m_pkthdr.snd_tag; vst = mst_to_vst(mst); if (vst->tag->ifp != p) { if_inc_counter(ifp, IFCOUNTER_OERRORS, 1); m_freem(m); return (EAGAIN); } m->m_pkthdr.snd_tag = m_snd_tag_ref(vst->tag); m_snd_tag_rele(mst); } #endif /* * Do not run parent's if_transmit() if the parent is not up, * or parent's driver will cause a system crash. */ if (!UP_AND_RUNNING(p)) { if_inc_counter(ifp, IFCOUNTER_OERRORS, 1); m_freem(m); return (ENETDOWN); } if (!ether_8021q_frame(&m, ifp, p, &ifv->ifv_qtag)) { if_inc_counter(ifp, IFCOUNTER_OERRORS, 1); return (0); } /* * Send it, precisely as ether_output() would have. */ error = (p->if_transmit)(p, m); if (error == 0) { if_inc_counter(ifp, IFCOUNTER_OPACKETS, 1); if_inc_counter(ifp, IFCOUNTER_OBYTES, len); if_inc_counter(ifp, IFCOUNTER_OMCASTS, mcast); } else if_inc_counter(ifp, IFCOUNTER_OERRORS, 1); return (error); } static int vlan_output(struct ifnet *ifp, struct mbuf *m, const struct sockaddr *dst, struct route *ro) { struct ifvlan *ifv; struct ifnet *p; NET_EPOCH_ASSERT(); /* * Find the first non-VLAN parent interface. */ ifv = ifp->if_softc; do { if (TRUNK(ifv) == NULL) { m_freem(m); return (ENETDOWN); } p = PARENT(ifv); ifv = p->if_softc; } while (p->if_type == IFT_L2VLAN); return p->if_output(ifp, m, dst, ro); } #ifdef ALTQ static void vlan_altq_start(if_t ifp) { struct ifaltq *ifq = &ifp->if_snd; struct mbuf *m; IFQ_LOCK(ifq); IFQ_DEQUEUE_NOLOCK(ifq, m); while (m != NULL) { vlan_transmit(ifp, m); IFQ_DEQUEUE_NOLOCK(ifq, m); } IFQ_UNLOCK(ifq); } static int vlan_altq_transmit(if_t ifp, struct mbuf *m) { int err; if (ALTQ_IS_ENABLED(&ifp->if_snd)) { IFQ_ENQUEUE(&ifp->if_snd, m, err); if (err == 0) vlan_altq_start(ifp); } else err = vlan_transmit(ifp, m); return (err); } #endif /* ALTQ */ /* * The ifp->if_qflush entry point for vlan(4) is a no-op. */ static void vlan_qflush(struct ifnet *ifp __unused) { } static void vlan_input(struct ifnet *ifp, struct mbuf *m) { struct ifvlantrunk *trunk; struct ifvlan *ifv; struct m_tag *mtag; uint16_t vid, tag; NET_EPOCH_ASSERT(); trunk = ifp->if_vlantrunk; if (trunk == NULL) { m_freem(m); return; } if (m->m_flags & M_VLANTAG) { /* * Packet is tagged, but m contains a normal * Ethernet frame; the tag is stored out-of-band. */ tag = m->m_pkthdr.ether_vtag; m->m_flags &= ~M_VLANTAG; } else { struct ether_vlan_header *evl; /* * Packet is tagged in-band as specified by 802.1q. */ switch (ifp->if_type) { case IFT_ETHER: if (m->m_len < sizeof(*evl) && (m = m_pullup(m, sizeof(*evl))) == NULL) { if_printf(ifp, "cannot pullup VLAN header\n"); return; } evl = mtod(m, struct ether_vlan_header *); tag = ntohs(evl->evl_tag); /* * Remove the 802.1q header by copying the Ethernet * addresses over it and adjusting the beginning of * the data in the mbuf. The encapsulated Ethernet * type field is already in place. */ bcopy((char *)evl, (char *)evl + ETHER_VLAN_ENCAP_LEN, ETHER_HDR_LEN - ETHER_TYPE_LEN); m_adj(m, ETHER_VLAN_ENCAP_LEN); break; default: #ifdef INVARIANTS panic("%s: %s has unsupported if_type %u", __func__, ifp->if_xname, ifp->if_type); #endif if_inc_counter(ifp, IFCOUNTER_NOPROTO, 1); m_freem(m); return; } } vid = EVL_VLANOFTAG(tag); ifv = vlan_gethash(trunk, vid); if (ifv == NULL || !UP_AND_RUNNING(ifv->ifv_ifp)) { if_inc_counter(ifp, IFCOUNTER_NOPROTO, 1); m_freem(m); return; } if (V_vlan_mtag_pcp) { /* * While uncommon, it is possible that we will find a 802.1q * packet encapsulated inside another packet that also had an * 802.1q header. For example, ethernet tunneled over IPSEC * arriving over ethernet. In that case, we replace the * existing 802.1q PCP m_tag value. */ mtag = m_tag_locate(m, MTAG_8021Q, MTAG_8021Q_PCP_IN, NULL); if (mtag == NULL) { mtag = m_tag_alloc(MTAG_8021Q, MTAG_8021Q_PCP_IN, sizeof(uint8_t), M_NOWAIT); if (mtag == NULL) { if_inc_counter(ifp, IFCOUNTER_IERRORS, 1); m_freem(m); return; } m_tag_prepend(m, mtag); } *(uint8_t *)(mtag + 1) = EVL_PRIOFTAG(tag); } m->m_pkthdr.rcvif = ifv->ifv_ifp; if_inc_counter(ifv->ifv_ifp, IFCOUNTER_IPACKETS, 1); /* Pass it back through the parent's input routine. */ (*ifv->ifv_ifp->if_input)(ifv->ifv_ifp, m); } static void vlan_lladdr_fn(void *arg, int pending __unused) { struct ifvlan *ifv; struct ifnet *ifp; ifv = (struct ifvlan *)arg; ifp = ifv->ifv_ifp; CURVNET_SET(ifp->if_vnet); /* The ifv_ifp already has the lladdr copied in. */ if_setlladdr(ifp, IF_LLADDR(ifp), ifp->if_addrlen); CURVNET_RESTORE(); } static int vlan_config(struct ifvlan *ifv, struct ifnet *p, uint16_t vid, uint16_t proto) { struct epoch_tracker et; struct ifvlantrunk *trunk; struct ifnet *ifp; int error = 0; /* * We can handle non-ethernet hardware types as long as * they handle the tagging and headers themselves. */ if (p->if_type != IFT_ETHER && p->if_type != IFT_L2VLAN && (p->if_capenable & IFCAP_VLAN_HWTAGGING) == 0) return (EPROTONOSUPPORT); if ((p->if_flags & VLAN_IFFLAGS) != VLAN_IFFLAGS) return (EPROTONOSUPPORT); /* * Don't let the caller set up a VLAN VID with * anything except VLID bits. * VID numbers 0x0 and 0xFFF are reserved. */ if (vid == 0 || vid == 0xFFF || (vid & ~EVL_VLID_MASK)) return (EINVAL); if (ifv->ifv_trunk) { trunk = ifv->ifv_trunk; if (trunk->parent != p) return (EBUSY); VLAN_XLOCK(); ifv->ifv_proto = proto; if (ifv->ifv_vid != vid) { /* Re-hash */ vlan_remhash(trunk, ifv); ifv->ifv_vid = vid; error = vlan_inshash(trunk, ifv); } /* Will unlock */ goto done; } VLAN_XLOCK(); if (p->if_vlantrunk == NULL) { trunk = malloc(sizeof(struct ifvlantrunk), M_VLAN, M_WAITOK | M_ZERO); vlan_inithash(trunk); TRUNK_LOCK_INIT(trunk); TRUNK_WLOCK(trunk); p->if_vlantrunk = trunk; trunk->parent = p; if_ref(trunk->parent); TRUNK_WUNLOCK(trunk); } else { trunk = p->if_vlantrunk; } ifv->ifv_vid = vid; /* must set this before vlan_inshash() */ ifv->ifv_pcp = 0; /* Default: best effort delivery. */ error = vlan_inshash(trunk, ifv); if (error) goto done; ifv->ifv_proto = proto; ifv->ifv_encaplen = ETHER_VLAN_ENCAP_LEN; ifv->ifv_mintu = ETHERMIN; ifv->ifv_pflags = 0; ifv->ifv_capenable = -1; /* * If the parent supports the VLAN_MTU capability, * i.e. can Tx/Rx larger than ETHER_MAX_LEN frames, * use it. */ if (p->if_capenable & IFCAP_VLAN_MTU) { /* * No need to fudge the MTU since the parent can * handle extended frames. */ ifv->ifv_mtufudge = 0; } else { /* * Fudge the MTU by the encapsulation size. This * makes us incompatible with strictly compliant * 802.1Q implementations, but allows us to use * the feature with other NetBSD implementations, * which might still be useful. */ ifv->ifv_mtufudge = ifv->ifv_encaplen; } ifv->ifv_trunk = trunk; ifp = ifv->ifv_ifp; /* * Initialize fields from our parent. This duplicates some * work with ether_ifattach() but allows for non-ethernet * interfaces to also work. */ ifp->if_mtu = p->if_mtu - ifv->ifv_mtufudge; ifp->if_baudrate = p->if_baudrate; ifp->if_input = p->if_input; ifp->if_resolvemulti = p->if_resolvemulti; ifp->if_addrlen = p->if_addrlen; ifp->if_broadcastaddr = p->if_broadcastaddr; ifp->if_pcp = ifv->ifv_pcp; /* * We wrap the parent's if_output using vlan_output to ensure that it * can't become stale. */ ifp->if_output = vlan_output; /* * Copy only a selected subset of flags from the parent. * Other flags are none of our business. */ #define VLAN_COPY_FLAGS (IFF_SIMPLEX) ifp->if_flags &= ~VLAN_COPY_FLAGS; ifp->if_flags |= p->if_flags & VLAN_COPY_FLAGS; #undef VLAN_COPY_FLAGS ifp->if_link_state = p->if_link_state; NET_EPOCH_ENTER(et); vlan_capabilities(ifv); NET_EPOCH_EXIT(et); /* * Set up our interface address to reflect the underlying * physical interface's. */ TASK_INIT(&ifv->lladdr_task, 0, vlan_lladdr_fn, ifv); ((struct sockaddr_dl *)ifp->if_addr->ifa_addr)->sdl_alen = p->if_addrlen; /* * Do not schedule link address update if it was the same * as previous parent's. This helps avoid updating for each * associated llentry. */ if (memcmp(IF_LLADDR(p), IF_LLADDR(ifp), p->if_addrlen) != 0) { bcopy(IF_LLADDR(p), IF_LLADDR(ifp), p->if_addrlen); taskqueue_enqueue(taskqueue_thread, &ifv->lladdr_task); } /* We are ready for operation now. */ ifp->if_drv_flags |= IFF_DRV_RUNNING; /* Update flags on the parent, if necessary. */ vlan_setflags(ifp, 1); /* * Configure multicast addresses that may already be * joined on the vlan device. */ (void)vlan_setmulti(ifp); done: if (error == 0) EVENTHANDLER_INVOKE(vlan_config, p, ifv->ifv_vid); VLAN_XUNLOCK(); return (error); } static void vlan_unconfig(struct ifnet *ifp) { VLAN_XLOCK(); vlan_unconfig_locked(ifp, 0); VLAN_XUNLOCK(); } static void vlan_unconfig_locked(struct ifnet *ifp, int departing) { struct ifvlantrunk *trunk; struct vlan_mc_entry *mc; struct ifvlan *ifv; struct ifnet *parent; int error; VLAN_XLOCK_ASSERT(); ifv = ifp->if_softc; trunk = ifv->ifv_trunk; parent = NULL; if (trunk != NULL) { parent = trunk->parent; /* * Since the interface is being unconfigured, we need to * empty the list of multicast groups that we may have joined * while we were alive from the parent's list. */ while ((mc = CK_SLIST_FIRST(&ifv->vlan_mc_listhead)) != NULL) { /* * If the parent interface is being detached, * all its multicast addresses have already * been removed. Warn about errors if * if_delmulti() does fail, but don't abort as * all callers expect vlan destruction to * succeed. */ if (!departing) { error = if_delmulti(parent, (struct sockaddr *)&mc->mc_addr); if (error) if_printf(ifp, "Failed to delete multicast address from parent: %d\n", error); } CK_SLIST_REMOVE_HEAD(&ifv->vlan_mc_listhead, mc_entries); NET_EPOCH_CALL(vlan_mc_free, &mc->mc_epoch_ctx); } vlan_setflags(ifp, 0); /* clear special flags on parent */ vlan_remhash(trunk, ifv); ifv->ifv_trunk = NULL; /* * Check if we were the last. */ if (trunk->refcnt == 0) { parent->if_vlantrunk = NULL; NET_EPOCH_WAIT(); trunk_destroy(trunk); } } /* Disconnect from parent. */ if (ifv->ifv_pflags) if_printf(ifp, "%s: ifv_pflags unclean\n", __func__); ifp->if_mtu = ETHERMTU; ifp->if_link_state = LINK_STATE_UNKNOWN; ifp->if_drv_flags &= ~IFF_DRV_RUNNING; /* * Only dispatch an event if vlan was * attached, otherwise there is nothing * to cleanup anyway. */ if (parent != NULL) EVENTHANDLER_INVOKE(vlan_unconfig, parent, ifv->ifv_vid); } /* Handle a reference counted flag that should be set on the parent as well */ static int vlan_setflag(struct ifnet *ifp, int flag, int status, int (*func)(struct ifnet *, int)) { struct ifvlan *ifv; int error; VLAN_SXLOCK_ASSERT(); ifv = ifp->if_softc; status = status ? (ifp->if_flags & flag) : 0; /* Now "status" contains the flag value or 0 */ /* * See if recorded parent's status is different from what * we want it to be. If it is, flip it. We record parent's * status in ifv_pflags so that we won't clear parent's flag * we haven't set. In fact, we don't clear or set parent's * flags directly, but get or release references to them. * That's why we can be sure that recorded flags still are * in accord with actual parent's flags. */ if (status != (ifv->ifv_pflags & flag)) { error = (*func)(PARENT(ifv), status); if (error) return (error); ifv->ifv_pflags &= ~flag; ifv->ifv_pflags |= status; } return (0); } /* * Handle IFF_* flags that require certain changes on the parent: * if "status" is true, update parent's flags respective to our if_flags; * if "status" is false, forcedly clear the flags set on parent. */ static int vlan_setflags(struct ifnet *ifp, int status) { int error, i; for (i = 0; vlan_pflags[i].flag; i++) { error = vlan_setflag(ifp, vlan_pflags[i].flag, status, vlan_pflags[i].func); if (error) return (error); } return (0); } /* Inform all vlans that their parent has changed link state */ static void vlan_link_state(struct ifnet *ifp) { struct epoch_tracker et; struct ifvlantrunk *trunk; struct ifvlan *ifv; NET_EPOCH_ENTER(et); trunk = ifp->if_vlantrunk; if (trunk == NULL) { NET_EPOCH_EXIT(et); return; } TRUNK_WLOCK(trunk); VLAN_FOREACH(ifv, trunk) { ifv->ifv_ifp->if_baudrate = trunk->parent->if_baudrate; if_link_state_change(ifv->ifv_ifp, trunk->parent->if_link_state); } TRUNK_WUNLOCK(trunk); NET_EPOCH_EXIT(et); } static void vlan_capabilities(struct ifvlan *ifv) { struct ifnet *p; struct ifnet *ifp; struct ifnet_hw_tsomax hw_tsomax; int cap = 0, ena = 0, mena; u_long hwa = 0; NET_EPOCH_ASSERT(); VLAN_SXLOCK_ASSERT(); p = PARENT(ifv); ifp = ifv->ifv_ifp; /* Mask parent interface enabled capabilities disabled by user. */ mena = p->if_capenable & ifv->ifv_capenable; /* * If the parent interface can do checksum offloading * on VLANs, then propagate its hardware-assisted * checksumming flags. Also assert that checksum * offloading requires hardware VLAN tagging. */ if (p->if_capabilities & IFCAP_VLAN_HWCSUM) cap |= p->if_capabilities & (IFCAP_HWCSUM | IFCAP_HWCSUM_IPV6); if (p->if_capenable & IFCAP_VLAN_HWCSUM && p->if_capenable & IFCAP_VLAN_HWTAGGING) { ena |= mena & (IFCAP_HWCSUM | IFCAP_HWCSUM_IPV6); if (ena & IFCAP_TXCSUM) hwa |= p->if_hwassist & (CSUM_IP | CSUM_TCP | CSUM_UDP | CSUM_SCTP); if (ena & IFCAP_TXCSUM_IPV6) hwa |= p->if_hwassist & (CSUM_TCP_IPV6 | CSUM_UDP_IPV6 | CSUM_SCTP_IPV6); } /* * If the parent interface can do TSO on VLANs then * propagate the hardware-assisted flag. TSO on VLANs * does not necessarily require hardware VLAN tagging. */ memset(&hw_tsomax, 0, sizeof(hw_tsomax)); if_hw_tsomax_common(p, &hw_tsomax); if_hw_tsomax_update(ifp, &hw_tsomax); if (p->if_capabilities & IFCAP_VLAN_HWTSO) cap |= p->if_capabilities & IFCAP_TSO; if (p->if_capenable & IFCAP_VLAN_HWTSO) { ena |= mena & IFCAP_TSO; if (ena & IFCAP_TSO) hwa |= p->if_hwassist & CSUM_TSO; } /* * If the parent interface can do LRO and checksum offloading on * VLANs, then guess it may do LRO on VLANs. False positive here * cost nothing, while false negative may lead to some confusions. */ if (p->if_capabilities & IFCAP_VLAN_HWCSUM) cap |= p->if_capabilities & IFCAP_LRO; if (p->if_capenable & IFCAP_VLAN_HWCSUM) ena |= p->if_capenable & IFCAP_LRO; /* * If the parent interface can offload TCP connections over VLANs then * propagate its TOE capability to the VLAN interface. * * All TOE drivers in the tree today can deal with VLANs. If this * changes then IFCAP_VLAN_TOE should be promoted to a full capability * with its own bit. */ #define IFCAP_VLAN_TOE IFCAP_TOE if (p->if_capabilities & IFCAP_VLAN_TOE) cap |= p->if_capabilities & IFCAP_TOE; if (p->if_capenable & IFCAP_VLAN_TOE) { SETTOEDEV(ifp, TOEDEV(p)); ena |= mena & IFCAP_TOE; } /* * If the parent interface supports dynamic link state, so does the * VLAN interface. */ cap |= (p->if_capabilities & IFCAP_LINKSTATE); ena |= (mena & IFCAP_LINKSTATE); #ifdef RATELIMIT /* * If the parent interface supports ratelimiting, so does the * VLAN interface. */ cap |= (p->if_capabilities & IFCAP_TXRTLMT); ena |= (mena & IFCAP_TXRTLMT); #endif /* * If the parent interface supports unmapped mbufs, so does * the VLAN interface. Note that this should be fine even for * interfaces that don't support hardware tagging as headers * are prepended in normal mbufs to unmapped mbufs holding * payload data. */ cap |= (p->if_capabilities & IFCAP_MEXTPG); ena |= (mena & IFCAP_MEXTPG); /* * If the parent interface can offload encryption and segmentation * of TLS records over TCP, propagate it's capability to the VLAN * interface. * * All TLS drivers in the tree today can deal with VLANs. If * this ever changes, then a new IFCAP_VLAN_TXTLS can be * defined. */ if (p->if_capabilities & (IFCAP_TXTLS | IFCAP_TXTLS_RTLMT)) cap |= p->if_capabilities & (IFCAP_TXTLS | IFCAP_TXTLS_RTLMT); if (p->if_capenable & (IFCAP_TXTLS | IFCAP_TXTLS_RTLMT)) ena |= mena & (IFCAP_TXTLS | IFCAP_TXTLS_RTLMT); ifp->if_capabilities = cap; ifp->if_capenable = ena; ifp->if_hwassist = hwa; } static void vlan_trunk_capabilities(struct ifnet *ifp) { struct epoch_tracker et; struct ifvlantrunk *trunk; struct ifvlan *ifv; VLAN_SLOCK(); trunk = ifp->if_vlantrunk; if (trunk == NULL) { VLAN_SUNLOCK(); return; } NET_EPOCH_ENTER(et); VLAN_FOREACH(ifv, trunk) vlan_capabilities(ifv); NET_EPOCH_EXIT(et); VLAN_SUNLOCK(); } static int vlan_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data) { struct ifnet *p; struct ifreq *ifr; #ifdef INET struct ifaddr *ifa; #endif struct ifvlan *ifv; struct ifvlantrunk *trunk; struct vlanreq vlr; int error = 0, oldmtu; ifr = (struct ifreq *)data; #ifdef INET ifa = (struct ifaddr *) data; #endif ifv = ifp->if_softc; switch (cmd) { case SIOCSIFADDR: ifp->if_flags |= IFF_UP; #ifdef INET if (ifa->ifa_addr->sa_family == AF_INET) arp_ifinit(ifp, ifa); #endif break; case SIOCGIFADDR: bcopy(IF_LLADDR(ifp), &ifr->ifr_addr.sa_data[0], ifp->if_addrlen); break; case SIOCGIFMEDIA: VLAN_SLOCK(); if (TRUNK(ifv) != NULL) { p = PARENT(ifv); if_ref(p); error = (*p->if_ioctl)(p, SIOCGIFMEDIA, data); if_rele(p); /* Limit the result to the parent's current config. */ if (error == 0) { struct ifmediareq *ifmr; ifmr = (struct ifmediareq *)data; if (ifmr->ifm_count >= 1 && ifmr->ifm_ulist) { ifmr->ifm_count = 1; error = copyout(&ifmr->ifm_current, ifmr->ifm_ulist, sizeof(int)); } } } else { error = EINVAL; } VLAN_SUNLOCK(); break; case SIOCSIFMEDIA: error = EINVAL; break; case SIOCSIFMTU: /* * Set the interface MTU. */ VLAN_SLOCK(); trunk = TRUNK(ifv); if (trunk != NULL) { TRUNK_WLOCK(trunk); if (ifr->ifr_mtu > (PARENT(ifv)->if_mtu - ifv->ifv_mtufudge) || ifr->ifr_mtu < (ifv->ifv_mintu - ifv->ifv_mtufudge)) error = EINVAL; else ifp->if_mtu = ifr->ifr_mtu; TRUNK_WUNLOCK(trunk); } else error = EINVAL; VLAN_SUNLOCK(); break; case SIOCSETVLAN: #ifdef VIMAGE /* * XXXRW/XXXBZ: The goal in these checks is to allow a VLAN * interface to be delegated to a jail without allowing the * jail to change what underlying interface/VID it is * associated with. We are not entirely convinced that this * is the right way to accomplish that policy goal. */ if (ifp->if_vnet != ifp->if_home_vnet) { error = EPERM; break; } #endif error = copyin(ifr_data_get_ptr(ifr), &vlr, sizeof(vlr)); if (error) break; if (vlr.vlr_parent[0] == '\0') { vlan_unconfig(ifp); break; } p = ifunit_ref(vlr.vlr_parent); if (p == NULL) { error = ENOENT; break; } #ifdef COMPAT_FREEBSD12 if (vlr.vlr_proto == 0) vlr.vlr_proto = ETHERTYPE_VLAN; #endif oldmtu = ifp->if_mtu; error = vlan_config(ifv, p, vlr.vlr_tag, vlr.vlr_proto); if_rele(p); /* * VLAN MTU may change during addition of the vlandev. * If it did, do network layer specific procedure. */ if (ifp->if_mtu != oldmtu) if_notifymtu(ifp); break; case SIOCGETVLAN: #ifdef VIMAGE if (ifp->if_vnet != ifp->if_home_vnet) { error = EPERM; break; } #endif bzero(&vlr, sizeof(vlr)); VLAN_SLOCK(); if (TRUNK(ifv) != NULL) { strlcpy(vlr.vlr_parent, PARENT(ifv)->if_xname, sizeof(vlr.vlr_parent)); vlr.vlr_tag = ifv->ifv_vid; vlr.vlr_proto = ifv->ifv_proto; } VLAN_SUNLOCK(); error = copyout(&vlr, ifr_data_get_ptr(ifr), sizeof(vlr)); break; case SIOCSIFFLAGS: /* * We should propagate selected flags to the parent, * e.g., promiscuous mode. */ VLAN_XLOCK(); if (TRUNK(ifv) != NULL) error = vlan_setflags(ifp, 1); VLAN_XUNLOCK(); break; case SIOCADDMULTI: case SIOCDELMULTI: /* * If we don't have a parent, just remember the membership for * when we do. * * XXX We need the rmlock here to avoid sleeping while * holding in6_multi_mtx. */ VLAN_XLOCK(); trunk = TRUNK(ifv); if (trunk != NULL) error = vlan_setmulti(ifp); VLAN_XUNLOCK(); break; case SIOCGVLANPCP: #ifdef VIMAGE if (ifp->if_vnet != ifp->if_home_vnet) { error = EPERM; break; } #endif ifr->ifr_vlan_pcp = ifv->ifv_pcp; break; case SIOCSVLANPCP: #ifdef VIMAGE if (ifp->if_vnet != ifp->if_home_vnet) { error = EPERM; break; } #endif error = priv_check(curthread, PRIV_NET_SETVLANPCP); if (error) break; if (ifr->ifr_vlan_pcp > VLAN_PCP_MAX) { error = EINVAL; break; } ifv->ifv_pcp = ifr->ifr_vlan_pcp; ifp->if_pcp = ifv->ifv_pcp; /* broadcast event about PCP change */ EVENTHANDLER_INVOKE(ifnet_event, ifp, IFNET_EVENT_PCP); break; case SIOCSIFCAP: VLAN_SLOCK(); ifv->ifv_capenable = ifr->ifr_reqcap; trunk = TRUNK(ifv); if (trunk != NULL) { struct epoch_tracker et; NET_EPOCH_ENTER(et); vlan_capabilities(ifv); NET_EPOCH_EXIT(et); } VLAN_SUNLOCK(); break; default: error = EINVAL; break; } return (error); } #if defined(KERN_TLS) || defined(RATELIMIT) static int vlan_snd_tag_alloc(struct ifnet *ifp, union if_snd_tag_alloc_params *params, struct m_snd_tag **ppmt) { struct epoch_tracker et; const struct if_snd_tag_sw *sw; struct vlan_snd_tag *vst; struct ifvlan *ifv; struct ifnet *parent; struct m_snd_tag *mst; int error; NET_EPOCH_ENTER(et); ifv = ifp->if_softc; switch (params->hdr.type) { #ifdef RATELIMIT case IF_SND_TAG_TYPE_UNLIMITED: sw = &vlan_snd_tag_ul_sw; break; case IF_SND_TAG_TYPE_RATE_LIMIT: sw = &vlan_snd_tag_rl_sw; break; #endif #ifdef KERN_TLS case IF_SND_TAG_TYPE_TLS: sw = &vlan_snd_tag_tls_sw; break; case IF_SND_TAG_TYPE_TLS_RX: sw = NULL; if (params->tls_rx.vlan_id != 0) goto failure; params->tls_rx.vlan_id = ifv->ifv_vid; break; #ifdef RATELIMIT case IF_SND_TAG_TYPE_TLS_RATE_LIMIT: sw = &vlan_snd_tag_tls_rl_sw; break; #endif #endif default: goto failure; } if (ifv->ifv_trunk != NULL) parent = PARENT(ifv); else parent = NULL; if (parent == NULL) goto failure; if_ref(parent); NET_EPOCH_EXIT(et); if (sw != NULL) { vst = malloc(sizeof(*vst), M_VLAN, M_NOWAIT); if (vst == NULL) { if_rele(parent); return (ENOMEM); } } else vst = NULL; error = m_snd_tag_alloc(parent, params, &mst); if_rele(parent); if (error) { free(vst, M_VLAN); return (error); } if (sw != NULL) { m_snd_tag_init(&vst->com, ifp, sw); vst->tag = mst; *ppmt = &vst->com; } else *ppmt = mst; return (0); failure: NET_EPOCH_EXIT(et); return (EOPNOTSUPP); } static struct m_snd_tag * vlan_next_snd_tag(struct m_snd_tag *mst) { struct vlan_snd_tag *vst; vst = mst_to_vst(mst); return (vst->tag); } static int vlan_snd_tag_modify(struct m_snd_tag *mst, union if_snd_tag_modify_params *params) { struct vlan_snd_tag *vst; vst = mst_to_vst(mst); return (vst->tag->sw->snd_tag_modify(vst->tag, params)); } static int vlan_snd_tag_query(struct m_snd_tag *mst, union if_snd_tag_query_params *params) { struct vlan_snd_tag *vst; vst = mst_to_vst(mst); return (vst->tag->sw->snd_tag_query(vst->tag, params)); } static void vlan_snd_tag_free(struct m_snd_tag *mst) { struct vlan_snd_tag *vst; vst = mst_to_vst(mst); m_snd_tag_rele(vst->tag); free(vst, M_VLAN); } static void vlan_ratelimit_query(struct ifnet *ifp __unused, struct if_ratelimit_query_results *q) { /* * For vlan, we have an indirect * interface. The caller needs to * get a ratelimit tag on the actual * interface the flow will go on. */ q->rate_table = NULL; q->flags = RT_IS_INDIRECT; q->max_flows = 0; q->number_of_rates = 0; } #endif diff --git a/sys/netlink/netlink_glue.c b/sys/netlink/netlink_glue.c index 25b891036b5b..069cb9900e03 100644 --- a/sys/netlink/netlink_glue.c +++ b/sys/netlink/netlink_glue.c @@ -1,266 +1,295 @@ /*- * SPDX-License-Identifier: BSD-2-Clause-FreeBSD * * Copyright (c) 2023 Alexander V. Chernikov * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #include "opt_netlink.h" #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include /* priv_check */ #include #include #include #include #include /* Standard bits: built-in the kernel */ SYSCTL_NODE(_net, OID_AUTO, netlink, CTLFLAG_RD, 0, ""); SYSCTL_NODE(_net_netlink, OID_AUTO, debug, CTLFLAG_RD | CTLFLAG_MPSAFE, 0, ""); MALLOC_DEFINE(M_NETLINK, "netlink", "Memory used for netlink packets"); /* Netlink-related callbacks needed to glue rtsock, netlink and linuxolator */ static void ignore_route_event(uint32_t fibnum, const struct rib_cmd_info *rc) { } static void ignore_ifmsg_event(struct ifnet *ifp, int if_flags_mask) { } static struct rtbridge ignore_cb = { .route_f = ignore_route_event, .ifmsg_f = ignore_ifmsg_event, }; void *linux_netlink_p = NULL; /* Callback pointer for Linux translator functions */ struct rtbridge *rtsock_callback_p = &ignore_cb; struct rtbridge *netlink_callback_p = &ignore_cb; /* * nlp accessors. * TODO: move to a separate file once the number grows. */ bool nlp_has_priv(struct nlpcb *nlp, int priv) { return (priv_check_cred(nlp->nl_cred, priv) == 0); } struct ucred * nlp_get_cred(struct nlpcb *nlp) { return (nlp->nl_cred); } uint32_t nlp_get_pid(const struct nlpcb *nlp) { return (nlp->nl_process_id); } bool nlp_unconstrained_vnet(const struct nlpcb *nlp) { return (nlp->nl_unconstrained_vnet); } #ifndef NETLINK /* Stub implementations for the loadable functions */ static bool get_stub_writer(struct nl_writer *nw) { bzero(nw, sizeof(*nw)); nw->writer_type = NS_WRITER_TYPE_STUB; nw->enomem = true; return (false); } static bool nlmsg_get_unicast_writer_stub(struct nl_writer *nw, int size, struct nlpcb *nlp) { return (get_stub_writer(nw)); } static bool nlmsg_get_group_writer_stub(struct nl_writer *nw, int size, int protocol, int group_id) { return (get_stub_writer(nw)); } static bool nlmsg_get_chain_writer_stub(struct nl_writer *nw, int size, struct mbuf **pm) { return (get_stub_writer(nw)); } static bool nlmsg_flush_stub(struct nl_writer *nw __unused) { return (false); } static void nlmsg_ignore_limit_stub(struct nl_writer *nw __unused) { } static bool nlmsg_refill_buffer_stub(struct nl_writer *nw __unused, int required_len __unused) { return (false); } static bool nlmsg_add_stub(struct nl_writer *nw, uint32_t portid, uint32_t seq, uint16_t type, uint16_t flags, uint32_t len) { return (false); } static bool nlmsg_end_stub(struct nl_writer *nw __unused) { return (false); } static void nlmsg_abort_stub(struct nl_writer *nw __unused) { } static bool nlmsg_end_dump_stub(struct nl_writer *nw, int error, struct nlmsghdr *hdr) { return (false); } +static int +nl_modify_ifp_generic_stub(struct ifnet *ifp __unused, + struct nl_parsed_link *lattrs __unused, const struct nlattr_bmask *bm __unused, + struct nl_pstate *npt __unused) +{ + return (ENOTSUP); +} + +static void +nl_store_ifp_cookie_stub(struct nl_pstate *npt __unused, struct ifnet *ifp __unused) +{ +} + const static struct nl_function_wrapper nl_stub = { .nlmsg_add = nlmsg_add_stub, .nlmsg_refill_buffer = nlmsg_refill_buffer_stub, .nlmsg_flush = nlmsg_flush_stub, .nlmsg_end = nlmsg_end_stub, .nlmsg_abort = nlmsg_abort_stub, .nlmsg_ignore_limit = nlmsg_ignore_limit_stub, .nlmsg_get_unicast_writer = nlmsg_get_unicast_writer_stub, .nlmsg_get_group_writer = nlmsg_get_group_writer_stub, .nlmsg_get_chain_writer = nlmsg_get_chain_writer_stub, .nlmsg_end_dump = nlmsg_end_dump_stub, + .nl_modify_ifp_generic = nl_modify_ifp_generic_stub, + .nl_store_ifp_cookie = nl_store_ifp_cookie_stub, }; /* * If the kernel is compiled with netlink as a module, * provide a way to introduce non-stub functioms */ static const struct nl_function_wrapper *_nl = &nl_stub; void nl_set_functions(const struct nl_function_wrapper *nl) { _nl = (nl != NULL) ? nl : &nl_stub; } /* Function wrappers */ bool nlmsg_get_unicast_writer(struct nl_writer *nw, int size, struct nlpcb *nlp) { return (_nl->nlmsg_get_unicast_writer(nw, size, nlp)); } bool nlmsg_get_group_writer(struct nl_writer *nw, int size, int protocol, int group_id) { return (_nl->nlmsg_get_group_writer(nw, size, protocol, group_id)); } bool nlmsg_get_chain_writer(struct nl_writer *nw, int size, struct mbuf **pm) { return (_nl->nlmsg_get_chain_writer(nw, size, pm)); } bool nlmsg_flush(struct nl_writer *nw) { return (_nl->nlmsg_flush(nw)); } void nlmsg_ignore_limit(struct nl_writer *nw) { _nl->nlmsg_ignore_limit(nw); } bool nlmsg_refill_buffer(struct nl_writer *nw, int required_len) { return (_nl->nlmsg_refill_buffer(nw, required_len)); } bool nlmsg_add(struct nl_writer *nw, uint32_t portid, uint32_t seq, uint16_t type, uint16_t flags, uint32_t len) { return (_nl->nlmsg_add(nw, portid, seq, type, flags, len)); } bool nlmsg_end(struct nl_writer *nw) { return (_nl->nlmsg_end(nw)); } void nlmsg_abort(struct nl_writer *nw) { _nl->nlmsg_abort(nw); } bool nlmsg_end_dump(struct nl_writer *nw, int error, struct nlmsghdr *hdr) { return (_nl->nlmsg_end_dump(nw, error, hdr)); } + +int +nl_modify_ifp_generic(struct ifnet *ifp, struct nl_parsed_link *lattrs, + const struct nlattr_bmask *bm , struct nl_pstate *npt) +{ + return (_nl->nl_modify_ifp(ifp, lattrs, bm, npt)); +} + +static void +nl_store_ifp_cookie_stub(struct nl_pstate *npt, struct ifnet *ifp) +{ + return (_nl->nl_store_ifp_cookie(npt, ifp)); +} + #endif /* !NETLINK */ diff --git a/sys/netlink/netlink_module.c b/sys/netlink/netlink_module.c index a881a7540166..051eb0cb120b 100644 --- a/sys/netlink/netlink_module.c +++ b/sys/netlink/netlink_module.c @@ -1,249 +1,250 @@ /*- * SPDX-License-Identifier: BSD-2-Clause-FreeBSD * * Copyright (c) 2021 Ng Peng Nam Sean * Copyright (c) 2022 Alexander V. Chernikov * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #include "opt_netlink.h" #include __FBSDID("$FreeBSD$"); #include #include #include #include #include #include #include #include #include #include #include +#include #include FEATURE(netlink, "Netlink support"); #define DEBUG_MOD_NAME nl_mod #define DEBUG_MAX_LEVEL LOG_DEBUG3 #include _DECLARE_DEBUG(LOG_DEBUG); #define NL_MAX_HANDLERS 20 struct nl_proto_handler _nl_handlers[NL_MAX_HANDLERS]; struct nl_proto_handler *nl_handlers = _nl_handlers; CK_LIST_HEAD(nl_control_head, nl_control); static struct nl_control_head vnets_head = CK_LIST_HEAD_INITIALIZER(); VNET_DEFINE(struct nl_control *, nl_ctl) = NULL; struct mtx nl_global_mtx; MTX_SYSINIT(nl_global_mtx, &nl_global_mtx, "global netlink lock", MTX_DEF); #define NL_GLOBAL_LOCK() mtx_lock(&nl_global_mtx) #define NL_GLOBAL_UNLOCK() mtx_unlock(&nl_global_mtx) int netlink_unloading = 0; static void free_nl_ctl(struct nl_control *ctl) { rm_destroy(&ctl->ctl_lock); free(ctl, M_NETLINK); } struct nl_control * vnet_nl_ctl_init(void) { struct nl_control *ctl; ctl = malloc(sizeof(struct nl_control), M_NETLINK, M_WAITOK | M_ZERO); rm_init(&ctl->ctl_lock, "netlink lock"); CK_LIST_INIT(&ctl->ctl_port_head); CK_LIST_INIT(&ctl->ctl_pcb_head); NL_GLOBAL_LOCK(); struct nl_control *tmp = atomic_load_ptr(&V_nl_ctl); if (tmp == NULL) { atomic_store_ptr(&V_nl_ctl, ctl); CK_LIST_INSERT_HEAD(&vnets_head, ctl, ctl_next); NL_LOG(LOG_DEBUG2, "VNET %p init done, inserted %p into global list", curvnet, ctl); } else { NL_LOG(LOG_DEBUG, "per-VNET init clash, dropping this instance"); free_nl_ctl(ctl); ctl = tmp; } NL_GLOBAL_UNLOCK(); return (ctl); } static void vnet_nl_ctl_destroy(const void *unused __unused) { struct nl_control *ctl; /* Assume at the time all of the processes / sockets are dead */ NL_GLOBAL_LOCK(); ctl = atomic_load_ptr(&V_nl_ctl); atomic_store_ptr(&V_nl_ctl, NULL); if (ctl != NULL) { NL_LOG(LOG_DEBUG2, "Removing %p from global list", ctl); CK_LIST_REMOVE(ctl, ctl_next); } NL_GLOBAL_UNLOCK(); if (ctl != NULL) free_nl_ctl(ctl); } VNET_SYSUNINIT(vnet_nl_ctl_destroy, SI_SUB_PROTO_IF, SI_ORDER_ANY, vnet_nl_ctl_destroy, NULL); int nl_verify_proto(int proto) { if (proto < 0 || proto >= NL_MAX_HANDLERS) { return (EINVAL); } int handler_defined = nl_handlers[proto].cb != NULL; return (handler_defined ? 0 : EPROTONOSUPPORT); } const char * nl_get_proto_name(int proto) { return (nl_handlers[proto].proto_name); } bool netlink_register_proto(int proto, const char *proto_name, nl_handler_f handler) { if ((proto < 0) || (proto >= NL_MAX_HANDLERS)) return (false); NL_GLOBAL_LOCK(); KASSERT((nl_handlers[proto].cb == NULL), ("netlink handler %d is already set", proto)); nl_handlers[proto].cb = handler; nl_handlers[proto].proto_name = proto_name; NL_GLOBAL_UNLOCK(); NL_LOG(LOG_DEBUG2, "Registered netlink %s(%d) handler", proto_name, proto); return (true); } bool netlink_unregister_proto(int proto) { if ((proto < 0) || (proto >= NL_MAX_HANDLERS)) return (false); NL_GLOBAL_LOCK(); KASSERT((nl_handlers[proto].cb != NULL), ("netlink handler %d is not set", proto)); nl_handlers[proto].cb = NULL; nl_handlers[proto].proto_name = NULL; NL_GLOBAL_UNLOCK(); NL_LOG(LOG_DEBUG2, "Unregistered netlink proto %d handler", proto); return (true); } #if !defined(NETLINK) && defined(NETLINK_MODULE) /* Non-stub function provider */ const static struct nl_function_wrapper nl_module = { .nlmsg_add = _nlmsg_add, .nlmsg_refill_buffer = _nlmsg_refill_buffer, .nlmsg_flush = _nlmsg_flush, .nlmsg_end = _nlmsg_end, .nlmsg_abort = _nlmsg_abort, .nlmsg_get_unicast_writer = _nlmsg_get_unicast_writer, .nlmsg_get_group_writer = _nlmsg_get_group_writer, .nlmsg_get_chain_writer = _nlmsg_get_chain_writer, .nlmsg_end_dump = _nlmsg_end_dump, }; #endif static bool can_unload(void) { struct nl_control *ctl; bool result = true; NL_GLOBAL_LOCK(); CK_LIST_FOREACH(ctl, &vnets_head, ctl_next) { NL_LOG(LOG_DEBUG2, "Iterating VNET head %p", ctl); if (!CK_LIST_EMPTY(&ctl->ctl_pcb_head)) { NL_LOG(LOG_NOTICE, "non-empty socket list in ctl %p", ctl); result = false; break; } } NL_GLOBAL_UNLOCK(); return (result); } static int netlink_modevent(module_t mod __unused, int what, void *priv __unused) { int ret = 0; switch (what) { case MOD_LOAD: NL_LOG(LOG_DEBUG2, "Loading"); #if !defined(NETLINK) && defined(NETLINK_MODULE) nl_set_functions(&nl_module); #endif break; case MOD_UNLOAD: NL_LOG(LOG_DEBUG2, "Unload called"); if (can_unload()) { NL_LOG(LOG_WARNING, "unloading"); netlink_unloading = 1; #if !defined(NETLINK) && defined(NETLINK_MODULE) nl_set_functions(NULL); #endif } else ret = EBUSY; break; default: ret = EOPNOTSUPP; break; } return (ret); } static moduledata_t netlink_mod = { "netlink", netlink_modevent, NULL }; DECLARE_MODULE(netlink, netlink_mod, SI_SUB_PSEUDO, SI_ORDER_ANY); MODULE_VERSION(netlink, 1); diff --git a/sys/netlink/netlink_var.h b/sys/netlink/netlink_var.h index 465378f8af1e..cb1e3974b5f5 100644 --- a/sys/netlink/netlink_var.h +++ b/sys/netlink/netlink_var.h @@ -1,192 +1,202 @@ /*- * SPDX-License-Identifier: BSD-2-Clause-FreeBSD * * Copyright (c) 2021 Ng Peng Nam Sean * Copyright (c) 2022 Alexander V. Chernikov * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #ifndef _NETLINK_NETLINK_VAR_H_ #define _NETLINK_NETLINK_VAR_H_ #ifdef _KERNEL #include #include #include #include #include #define NLSNDQ 65536 /* Default socket sendspace */ #define NLRCVQ 65536 /* Default socket recvspace */ struct ucred; struct nl_io_queue { STAILQ_HEAD(, mbuf) head; int length; int hiwat; }; #define NLP_MAX_GROUPS 128 struct nlpcb { struct socket *nl_socket; uint64_t nl_groups[NLP_MAX_GROUPS / 64]; uint32_t nl_port; uint32_t nl_flags; uint32_t nl_process_id; int nl_proto; bool nl_active; bool nl_bound; bool nl_task_pending; bool nl_tx_blocked; /* No new requests accepted */ bool nl_linux; /* true if running under compat */ bool nl_unconstrained_vnet; /* true if running under VNET jail (or without jail) */ struct nl_io_queue rx_queue; struct nl_io_queue tx_queue; struct taskqueue *nl_taskqueue; struct task nl_task; struct ucred *nl_cred; /* Copy of nl_socket->so_cred */ uint64_t nl_dropped_bytes; uint64_t nl_dropped_messages; CK_LIST_ENTRY(nlpcb) nl_next; CK_LIST_ENTRY(nlpcb) nl_port_next; volatile u_int nl_refcount; struct mtx nl_lock; struct epoch_context nl_epoch_ctx; }; #define sotonlpcb(so) ((struct nlpcb *)(so)->so_pcb) #define NLP_LOCK_INIT(_nlp) mtx_init(&((_nlp)->nl_lock), "nlp mtx", NULL, MTX_DEF) #define NLP_LOCK_DESTROY(_nlp) mtx_destroy(&((_nlp)->nl_lock)) #define NLP_LOCK(_nlp) mtx_lock(&((_nlp)->nl_lock)) #define NLP_UNLOCK(_nlp) mtx_unlock(&((_nlp)->nl_lock)) #define ALIGNED_NL_SZ(_data) roundup2((((struct nlmsghdr *)(_data))->nlmsg_len), 16) /* nl_flags */ #define NLF_CAP_ACK 0x01 /* Do not send message body with errmsg */ #define NLF_EXT_ACK 0x02 /* Allow including extended TLVs in ack */ #define NLF_STRICT 0x04 /* Perform strict header checks */ SYSCTL_DECL(_net_netlink); SYSCTL_DECL(_net_netlink_debug); struct nl_io { struct callout callout; struct mbuf *head; struct mbuf *last; int64_t length; }; struct nl_control { CK_LIST_HEAD(nl_pid_head, nlpcb) ctl_port_head; CK_LIST_HEAD(nlpcb_head, nlpcb) ctl_pcb_head; CK_LIST_ENTRY(nl_control) ctl_next; struct nl_io ctl_io; struct rmlock ctl_lock; }; VNET_DECLARE(struct nl_control *, nl_ctl); #define V_nl_ctl VNET(nl_ctl) struct sockaddr_nl; struct sockaddr; struct nlmsghdr; /* netlink_module.c */ struct nl_control *vnet_nl_ctl_init(void); int nl_verify_proto(int proto); const char *nl_get_proto_name(int proto); extern int netlink_unloading; struct nl_proto_handler { nl_handler_f cb; const char *proto_name; }; extern struct nl_proto_handler *nl_handlers; /* netlink_domain.c */ void nl_send_group(struct mbuf *m, int cnt, int proto, int group_id); /* netlink_io.c */ #define NL_IOF_UNTRANSLATED 0x01 #define NL_IOF_IGNORE_LIMIT 0x02 bool nl_send_one(struct mbuf *m, struct nlpcb *nlp, int cnt, int io_flags); void nlmsg_ack(struct nlpcb *nlp, int error, struct nlmsghdr *nlmsg, struct nl_pstate *npt); void nl_on_transmit(struct nlpcb *nlp); void nl_init_io(struct nlpcb *nlp); void nl_free_io(struct nlpcb *nlp); void nl_taskqueue_handler(void *_arg, int pending); int nl_receive_async(struct mbuf *m, struct socket *so); void nl_process_receive_locked(struct nlpcb *nlp); /* netlink_generic.c */ struct genl_family { const char *family_name; uint16_t family_hdrsize; uint16_t family_id; uint16_t family_version; uint16_t family_attr_max; uint16_t family_cmd_size; uint16_t family_num_groups; struct genl_cmd *family_cmds; }; struct genl_group { struct genl_family *group_family; const char *group_name; }; struct genl_family *genl_get_family(uint32_t family_id); struct genl_group *genl_get_group(uint32_t group_id); #define MAX_FAMILIES 20 #define MAX_GROUPS 64 #define MIN_GROUP_NUM 48 #define CTRL_FAMILY_NAME "nlctrl" +struct ifnet; +struct nl_parsed_link; +struct nlattr_bmask; +struct nl_pstate; + /* Function map */ struct nl_function_wrapper { bool (*nlmsg_add)(struct nl_writer *nw, uint32_t portid, uint32_t seq, uint16_t type, uint16_t flags, uint32_t len); bool (*nlmsg_refill_buffer)(struct nl_writer *nw, int required_len); bool (*nlmsg_flush)(struct nl_writer *nw); bool (*nlmsg_end)(struct nl_writer *nw); void (*nlmsg_abort)(struct nl_writer *nw); void (*nlmsg_ignore_limit)(struct nl_writer *nw); bool (*nlmsg_get_unicast_writer)(struct nl_writer *nw, int size, struct nlpcb *nlp); bool (*nlmsg_get_group_writer)(struct nl_writer *nw, int size, int protocol, int group_id); bool (*nlmsg_get_chain_writer)(struct nl_writer *nw, int size, struct mbuf **pm); bool (*nlmsg_end_dump)(struct nl_writer *nw, int error, struct nlmsghdr *hdr); + int (*nl_modify_ifp_generic)(struct ifnet *ifp, struct nl_parsed_link *lattrs, + const struct nlattr_bmask *bm, struct nl_pstate *npt); + void (*nl_store_ifp_cookie)(struct nl_pstate *npt, struct ifnet *ifp); }; void nl_set_functions(const struct nl_function_wrapper *nl); + + #endif #endif diff --git a/sys/netlink/route/iface.c b/sys/netlink/route/iface.c index b27a0193fe0d..d81dc1f0ecae 100644 --- a/sys/netlink/route/iface.c +++ b/sys/netlink/route/iface.c @@ -1,1081 +1,1058 @@ /*- * SPDX-License-Identifier: BSD-2-Clause-FreeBSD * * Copyright (c) 2022 Alexander V. Chernikov * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #include "opt_netlink.h" #include __FBSDID("$FreeBSD$"); #include "opt_inet.h" #include "opt_inet6.h" #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include /* scope deembedding */ #define DEBUG_MOD_NAME nl_iface #define DEBUG_MAX_LEVEL LOG_DEBUG3 #include _DECLARE_DEBUG(LOG_DEBUG); struct netlink_walkargs { struct nl_writer *nw; struct nlmsghdr hdr; struct nlpcb *so; struct ucred *cred; uint32_t fibnum; int family; int error; int count; int dumped; }; static eventhandler_tag ifdetach_event, ifattach_event, iflink_event, ifaddr_event; static SLIST_HEAD(, nl_cloner) nl_cloners = SLIST_HEAD_INITIALIZER(nl_cloners); static struct sx rtnl_cloner_lock; SX_SYSINIT(rtnl_cloner_lock, &rtnl_cloner_lock, "rtnl cloner lock"); static struct nl_cloner *rtnl_iface_find_cloner_locked(const char *name); /* * RTM_GETLINK request * sendto(3, {{len=32, type=RTM_GETLINK, flags=NLM_F_REQUEST|NLM_F_DUMP, seq=1641940952, pid=0}, * {ifi_family=AF_INET, ifi_type=ARPHRD_NETROM, ifi_index=0, ifi_flags=0, ifi_change=0}}, 32, 0, NULL, 0) = 32 * * Reply: * {ifi_family=AF_UNSPEC, ifi_type=ARPHRD_ETHER, ifi_index=if_nametoindex("enp0s31f6"), ifi_flags=IFF_UP|IFF_BROADCAST|IFF_RUNNING|IFF_MULTICAST|IFF_LOWER_UP, ifi_change=0}, {{nla_len=10, nla_type=IFLA_ADDRESS}, "\xfe\x54\x00\x52\x3e\x90"} [ {{nla_len=14, nla_type=IFLA_IFNAME}, "enp0s31f6"}, {{nla_len=8, nla_type=IFLA_TXQLEN}, 1000}, {{nla_len=5, nla_type=IFLA_OPERSTATE}, 6}, {{nla_len=5, nla_type=IFLA_LINKMODE}, 0}, {{nla_len=8, nla_type=IFLA_MTU}, 1500}, {{nla_len=8, nla_type=IFLA_MIN_MTU}, 68}, {{nla_len=8, nla_type=IFLA_MAX_MTU}, 9000}, {{nla_len=8, nla_type=IFLA_GROUP}, 0}, {{nla_len=8, nla_type=IFLA_PROMISCUITY}, 0}, {{nla_len=8, nla_type=IFLA_NUM_TX_QUEUES}, 1}, {{nla_len=8, nla_type=IFLA_GSO_MAX_SEGS}, 65535}, {{nla_len=8, nla_type=IFLA_GSO_MAX_SIZE}, 65536}, {{nla_len=8, nla_type=IFLA_NUM_RX_QUEUES}, 1}, {{nla_len=5, nla_type=IFLA_CARRIER}, 1}, {{nla_len=13, nla_type=IFLA_QDISC}, "fq_codel"}, {{nla_len=8, nla_type=IFLA_CARRIER_CHANGES}, 2}, {{nla_len=5, nla_type=IFLA_PROTO_DOWN}, 0}, {{nla_len=8, nla_type=IFLA_CARRIER_UP_COUNT}, 1}, {{nla_len=8, nla_type=IFLA_CARRIER_DOWN_COUNT}, 1}, */ struct if_state { uint8_t ifla_operstate; uint8_t ifla_carrier; }; static void get_operstate_ether(struct ifnet *ifp, struct if_state *pstate) { struct ifmediareq ifmr = {}; int error; error = (*ifp->if_ioctl)(ifp, SIOCGIFMEDIA, (void *)&ifmr); if (error != 0) { NL_LOG(LOG_DEBUG, "error calling SIOCGIFMEDIA on %s: %d", if_name(ifp), error); return; } switch (IFM_TYPE(ifmr.ifm_active)) { case IFM_ETHER: if (ifmr.ifm_status & IFM_ACTIVE) { pstate->ifla_carrier = 1; if (ifp->if_flags & IFF_MONITOR) pstate->ifla_operstate = IF_OPER_DORMANT; else pstate->ifla_operstate = IF_OPER_UP; } else pstate->ifla_operstate = IF_OPER_DOWN; } } static bool get_stats(struct nl_writer *nw, struct ifnet *ifp) { struct rtnl_link_stats64 *stats; int nla_len = sizeof(struct nlattr) + sizeof(*stats); struct nlattr *nla = nlmsg_reserve_data(nw, nla_len, struct nlattr); if (nla == NULL) return (false); nla->nla_type = IFLA_STATS64; nla->nla_len = nla_len; stats = (struct rtnl_link_stats64 *)(nla + 1); stats->rx_packets = ifp->if_get_counter(ifp, IFCOUNTER_IPACKETS); stats->tx_packets = ifp->if_get_counter(ifp, IFCOUNTER_OPACKETS); stats->rx_bytes = ifp->if_get_counter(ifp, IFCOUNTER_IBYTES); stats->tx_bytes = ifp->if_get_counter(ifp, IFCOUNTER_OBYTES); stats->rx_errors = ifp->if_get_counter(ifp, IFCOUNTER_IERRORS); stats->tx_errors = ifp->if_get_counter(ifp, IFCOUNTER_OERRORS); stats->rx_dropped = ifp->if_get_counter(ifp, IFCOUNTER_IQDROPS); stats->tx_dropped = ifp->if_get_counter(ifp, IFCOUNTER_OQDROPS); stats->multicast = ifp->if_get_counter(ifp, IFCOUNTER_IMCASTS); stats->rx_nohandler = ifp->if_get_counter(ifp, IFCOUNTER_NOPROTO); return (true); } static void get_operstate(struct ifnet *ifp, struct if_state *pstate) { pstate->ifla_operstate = IF_OPER_UNKNOWN; pstate->ifla_carrier = 0; /* no carrier */ switch (ifp->if_type) { case IFT_ETHER: case IFT_L2VLAN: get_operstate_ether(ifp, pstate); break; default: /* Map admin state to the operstate */ if (ifp->if_flags & IFF_UP) { pstate->ifla_operstate = IF_OPER_UP; pstate->ifla_carrier = 1; } else pstate->ifla_operstate = IF_OPER_DOWN; break; } } static unsigned ifp_flags_to_netlink(const struct ifnet *ifp) { return (ifp->if_flags | ifp->if_drv_flags); } #define LLADDR_CONST(s) ((const void *)((s)->sdl_data + (s)->sdl_nlen)) static bool dump_sa(struct nl_writer *nw, int attr, const struct sockaddr *sa) { uint32_t addr_len = 0; const void *addr_data = NULL; #ifdef INET6 struct in6_addr addr6; #endif if (sa == NULL) return (true); switch (sa->sa_family) { #ifdef INET case AF_INET: addr_len = sizeof(struct in_addr); addr_data = &((const struct sockaddr_in *)sa)->sin_addr; break; #endif #ifdef INET6 case AF_INET6: in6_splitscope(&((const struct sockaddr_in6 *)sa)->sin6_addr, &addr6, &addr_len); addr_len = sizeof(struct in6_addr); addr_data = &addr6; break; #endif case AF_LINK: addr_len = ((const struct sockaddr_dl *)sa)->sdl_alen; addr_data = LLADDR_CONST((const struct sockaddr_dl *)sa); break; default: NL_LOG(LOG_DEBUG2, "unsupported family: %d, skipping", sa->sa_family); return (true); } return (nlattr_add(nw, attr, addr_len, addr_data)); } /* * Dumps interface state, properties and metrics. * @nw: message writer * @ifp: target interface * @hdr: template header * @if_flags_mask: changed if_[drv]_flags bitmask * * This function is called without epoch and MAY sleep. */ static bool dump_iface(struct nl_writer *nw, struct ifnet *ifp, const struct nlmsghdr *hdr, int if_flags_mask) { struct ifinfomsg *ifinfo; NL_LOG(LOG_DEBUG3, "dumping interface %s data", if_name(ifp)); if (!nlmsg_reply(nw, hdr, sizeof(struct ifinfomsg))) goto enomem; ifinfo = nlmsg_reserve_object(nw, struct ifinfomsg); ifinfo->ifi_family = AF_UNSPEC; ifinfo->__ifi_pad = 0; ifinfo->ifi_type = ifp->if_type; ifinfo->ifi_index = ifp->if_index; ifinfo->ifi_flags = ifp_flags_to_netlink(ifp); ifinfo->ifi_change = if_flags_mask; struct if_state ifs = {}; get_operstate(ifp, &ifs); if (ifs.ifla_operstate == IF_OPER_UP) ifinfo->ifi_flags |= IFF_LOWER_UP; nlattr_add_string(nw, IFLA_IFNAME, if_name(ifp)); nlattr_add_u8(nw, IFLA_OPERSTATE, ifs.ifla_operstate); nlattr_add_u8(nw, IFLA_CARRIER, ifs.ifla_carrier); /* nlattr_add_u8(nw, IFLA_PROTO_DOWN, val); nlattr_add_u8(nw, IFLA_LINKMODE, val); */ if ((ifp->if_addr != NULL)) { dump_sa(nw, IFLA_ADDRESS, ifp->if_addr->ifa_addr); } if ((ifp->if_broadcastaddr != NULL)) { nlattr_add(nw, IFLA_BROADCAST, ifp->if_addrlen, ifp->if_broadcastaddr); } nlattr_add_u32(nw, IFLA_MTU, ifp->if_mtu); /* nlattr_add_u32(nw, IFLA_MIN_MTU, 60); nlattr_add_u32(nw, IFLA_MAX_MTU, 9000); nlattr_add_u32(nw, IFLA_GROUP, 0); */ if (ifp->if_description != NULL) nlattr_add_string(nw, IFLA_IFALIAS, ifp->if_description); get_stats(nw, ifp); uint32_t val = (ifp->if_flags & IFF_PROMISC) != 0; nlattr_add_u32(nw, IFLA_PROMISCUITY, val); - sx_slock(&rtnl_cloner_lock); - struct nl_cloner *cloner = rtnl_iface_find_cloner_locked(ifp->if_dname); - if (cloner != NULL && cloner->dump_f != NULL) { - /* Ignore any dump error */ - cloner->dump_f(ifp, nw); - } - sx_sunlock(&rtnl_cloner_lock); + ifc_dump_ifp_nl(ifp, nw); if (nlmsg_end(nw)) return (true); enomem: NL_LOG(LOG_DEBUG, "unable to dump interface %s state (ENOMEM)", if_name(ifp)); nlmsg_abort(nw); return (false); } static bool check_ifmsg(void *hdr, struct nl_pstate *npt) { struct ifinfomsg *ifm = hdr; if (ifm->__ifi_pad != 0 || ifm->ifi_type != 0 || ifm->ifi_flags != 0 || ifm->ifi_change != 0) { nlmsg_report_err_msg(npt, "strict checking: non-zero values in ifinfomsg header"); return (false); } return (true); } #define _IN(_field) offsetof(struct ifinfomsg, _field) #define _OUT(_field) offsetof(struct nl_parsed_link, _field) static const struct nlfield_parser nlf_p_if[] = { { .off_in = _IN(ifi_type), .off_out = _OUT(ifi_type), .cb = nlf_get_u16 }, { .off_in = _IN(ifi_index), .off_out = _OUT(ifi_index), .cb = nlf_get_u32 }, { .off_in = _IN(ifi_flags), .off_out = _OUT(ifi_flags), .cb = nlf_get_u32 }, { .off_in = _IN(ifi_change), .off_out = _OUT(ifi_change), .cb = nlf_get_u32 }, }; static const struct nlattr_parser nla_p_linfo[] = { { .type = IFLA_INFO_KIND, .off = _OUT(ifla_cloner), .cb = nlattr_get_stringn }, { .type = IFLA_INFO_DATA, .off = _OUT(ifla_idata), .cb = nlattr_get_nla }, }; NL_DECLARE_ATTR_PARSER(linfo_parser, nla_p_linfo); static const struct nlattr_parser nla_p_if[] = { { .type = IFLA_IFNAME, .off = _OUT(ifla_ifname), .cb = nlattr_get_string }, { .type = IFLA_MTU, .off = _OUT(ifla_mtu), .cb = nlattr_get_uint32 }, - { .type = IFLA_LINK, .off = _OUT(ifi_index), .cb = nlattr_get_uint32 }, + { .type = IFLA_LINK, .off = _OUT(ifla_link), .cb = nlattr_get_uint32 }, { .type = IFLA_LINKINFO, .arg = &linfo_parser, .cb = nlattr_get_nested }, { .type = IFLA_IFALIAS, .off = _OUT(ifla_ifalias), .cb = nlattr_get_string }, { .type = IFLA_GROUP, .off = _OUT(ifla_group), .cb = nlattr_get_string }, { .type = IFLA_ALT_IFNAME, .off = _OUT(ifla_ifname), .cb = nlattr_get_string }, }; #undef _IN #undef _OUT NL_DECLARE_STRICT_PARSER(ifmsg_parser, struct ifinfomsg, check_ifmsg, nlf_p_if, nla_p_if); static bool match_iface(struct ifnet *ifp, void *_arg) { struct nl_parsed_link *attrs = (struct nl_parsed_link *)_arg; if (attrs->ifi_index != 0 && attrs->ifi_index != ifp->if_index) return (false); if (attrs->ifi_type != 0 && attrs->ifi_index != ifp->if_type) return (false); if (attrs->ifla_ifname != NULL && strcmp(attrs->ifla_ifname, if_name(ifp))) return (false); /* TODO: add group match */ return (true); } static int dump_cb(struct ifnet *ifp, void *_arg) { struct netlink_walkargs *wa = (struct netlink_walkargs *)_arg; if (!dump_iface(wa->nw, ifp, &wa->hdr, 0)) return (ENOMEM); return (0); } /* * {nlmsg_len=52, nlmsg_type=RTM_GETLINK, nlmsg_flags=NLM_F_REQUEST, nlmsg_seq=1662842818, nlmsg_pid=0}, * {ifi_family=AF_PACKET, ifi_type=ARPHRD_NETROM, ifi_index=0, ifi_flags=0, ifi_change=0}, * [ * [{nla_len=10, nla_type=IFLA_IFNAME}, "vnet9"], * [{nla_len=8, nla_type=IFLA_EXT_MASK}, RTEXT_FILTER_VF] * ] */ static int rtnl_handle_getlink(struct nlmsghdr *hdr, struct nlpcb *nlp, struct nl_pstate *npt) { struct epoch_tracker et; struct ifnet *ifp; int error = 0; struct nl_parsed_link attrs = {}; error = nl_parse_nlmsg(hdr, &ifmsg_parser, npt, &attrs); if (error != 0) return (error); struct netlink_walkargs wa = { .so = nlp, .nw = npt->nw, .hdr.nlmsg_pid = hdr->nlmsg_pid, .hdr.nlmsg_seq = hdr->nlmsg_seq, .hdr.nlmsg_flags = hdr->nlmsg_flags, .hdr.nlmsg_type = NL_RTM_NEWLINK, }; /* Fast track for an interface w/ explicit name or index match */ if ((attrs.ifi_index != 0) || (attrs.ifla_ifname != NULL)) { if (attrs.ifi_index != 0) { NLP_LOG(LOG_DEBUG3, nlp, "fast track -> searching index %u", attrs.ifi_index); NET_EPOCH_ENTER(et); ifp = ifnet_byindex_ref(attrs.ifi_index); NET_EPOCH_EXIT(et); } else { NLP_LOG(LOG_DEBUG3, nlp, "fast track -> searching name %s", attrs.ifla_ifname); ifp = ifunit_ref(attrs.ifla_ifname); } if (ifp != NULL) { if (match_iface(ifp, &attrs)) { if (!dump_iface(wa.nw, ifp, &wa.hdr, 0)) error = ENOMEM; } else error = ENODEV; if_rele(ifp); } else error = ENODEV; return (error); } /* Always treat non-direct-match as a multipart message */ wa.hdr.nlmsg_flags |= NLM_F_MULTI; /* * Fetching some link properties require performing ioctl's that may be blocking. * Address it by saving referenced pointers of the matching links, * exiting from epoch and going through the list one-by-one. */ NL_LOG(LOG_DEBUG2, "Start dump"); if_foreach_sleep(match_iface, &attrs, dump_cb, &wa); NL_LOG(LOG_DEBUG2, "End dump, iterated %d dumped %d", wa.count, wa.dumped); if (!nlmsg_end_dump(wa.nw, error, &wa.hdr)) { NL_LOG(LOG_DEBUG, "Unable to finalize the dump"); return (ENOMEM); } return (error); } /* * sendmsg(3, {msg_name={sa_family=AF_NETLINK, nl_pid=0, nl_groups=00000000}, msg_namelen=12, msg_iov=[{iov_base=[ * {nlmsg_len=60, nlmsg_type=RTM_NEWLINK, nlmsg_flags=NLM_F_REQUEST|NLM_F_ACK|NLM_F_EXCL|NLM_F_CREATE, nlmsg_seq=1662715618, nlmsg_pid=0}, * {ifi_family=AF_UNSPEC, ifi_type=ARPHRD_NETROM, ifi_index=0, ifi_flags=0, ifi_change=0}, * {nla_len=11, nla_type=IFLA_IFNAME}, "dummy0"], * [ * {nla_len=16, nla_type=IFLA_LINKINFO}, * [ * {nla_len=9, nla_type=IFLA_INFO_KIND}, "dummy"... * ] * ] */ static int rtnl_handle_dellink(struct nlmsghdr *hdr, struct nlpcb *nlp, struct nl_pstate *npt) { struct epoch_tracker et; struct ifnet *ifp; int error; struct nl_parsed_link attrs = {}; error = nl_parse_nlmsg(hdr, &ifmsg_parser, npt, &attrs); if (error != 0) return (error); NET_EPOCH_ENTER(et); ifp = ifnet_byindex_ref(attrs.ifi_index); NET_EPOCH_EXIT(et); if (ifp == NULL) { NLP_LOG(LOG_DEBUG, nlp, "unable to find interface %u", attrs.ifi_index); return (ENOENT); } NLP_LOG(LOG_DEBUG3, nlp, "mapped ifindex %u to %s", attrs.ifi_index, if_name(ifp)); sx_xlock(&ifnet_detach_sxlock); error = if_clone_destroy(if_name(ifp)); sx_xunlock(&ifnet_detach_sxlock); NLP_LOG(LOG_DEBUG2, nlp, "deleting interface %s returned %d", if_name(ifp), error); if_rele(ifp); return (error); } /* * New link: * type=RTM_NEWLINK, flags=NLM_F_REQUEST|NLM_F_ACK|NLM_F_EXCL|NLM_F_CREATE, seq=1668185590, pid=0}, * {ifi_family=AF_UNSPEC, ifi_type=ARPHRD_NETROM, ifi_index=0, ifi_flags=0, ifi_change=0} * [ * {{nla_len=8, nla_type=IFLA_MTU}, 123}, * {{nla_len=10, nla_type=IFLA_IFNAME}, "vlan1"}, * {{nla_len=24, nla_type=IFLA_LINKINFO}, * [ * {{nla_len=8, nla_type=IFLA_INFO_KIND}, "vlan"...}, * {{nla_len=12, nla_type=IFLA_INFO_DATA}, "\x06\x00\x01\x00\x7b\x00\x00\x00"}]}]} * * Update link: * type=RTM_NEWLINK, flags=NLM_F_REQUEST|NLM_F_ACK, seq=1668185923, pid=0}, * {ifi_family=AF_UNSPEC, ifi_type=ARPHRD_NETROM, ifi_index=if_nametoindex("lo"), ifi_flags=0, ifi_change=0}, * {{nla_len=8, nla_type=IFLA_MTU}, 123}} * * * Check command availability: * type=RTM_NEWLINK, flags=NLM_F_REQUEST|NLM_F_ACK, seq=0, pid=0}, * {ifi_family=AF_UNSPEC, ifi_type=ARPHRD_NETROM, ifi_index=0, ifi_flags=0, ifi_change=0} */ static int create_link(struct nlmsghdr *hdr, struct nl_parsed_link *lattrs, struct nlattr_bmask *bm, struct nlpcb *nlp, struct nl_pstate *npt) { if (lattrs->ifla_ifname == NULL || strlen(lattrs->ifla_ifname) == 0) { NLMSG_REPORT_ERR_MSG(npt, "empty IFLA_IFNAME attribute"); return (EINVAL); } if (lattrs->ifla_cloner == NULL || strlen(lattrs->ifla_cloner) == 0) { NLMSG_REPORT_ERR_MSG(npt, "empty IFLA_INFO_KIND attribute"); return (EINVAL); } - bool found = false; - int error = 0; - - sx_slock(&rtnl_cloner_lock); - struct nl_cloner *cloner = rtnl_iface_find_cloner_locked(lattrs->ifla_cloner); - if (cloner != NULL) { - found = true; - error = cloner->create_f(lattrs, bm, nlp, npt); - } - sx_sunlock(&rtnl_cloner_lock); - - if (!found) - error = generic_cloner.create_f(lattrs, bm, nlp, npt); + struct ifc_data_nl ifd = { + .flags = IFC_F_CREATE, + .lattrs = lattrs, + .bm = bm, + .npt = npt, + }; + if (ifc_create_ifp_nl(lattrs->ifla_ifname, &ifd) && ifd.error == 0) + nl_store_ifp_cookie(npt, ifd.ifp); - return (error); + return (ifd.error); } static int modify_link(struct nlmsghdr *hdr, struct nl_parsed_link *lattrs, struct nlattr_bmask *bm, struct nlpcb *nlp, struct nl_pstate *npt) { struct ifnet *ifp = NULL; struct epoch_tracker et; if (lattrs->ifi_index == 0 && lattrs->ifla_ifname == NULL) { /* * Applications like ip(8) verify RTM_NEWLINK command * existence by calling it with empty arguments. Always * return "innocent" error in that case. */ NLMSG_REPORT_ERR_MSG(npt, "empty ifi_index field"); return (EPERM); } if (lattrs->ifi_index != 0) { NET_EPOCH_ENTER(et); ifp = ifnet_byindex_ref(lattrs->ifi_index); NET_EPOCH_EXIT(et); if (ifp == NULL) { NLMSG_REPORT_ERR_MSG(npt, "unable to find interface #%u", lattrs->ifi_index); return (ENOENT); } } if (ifp == NULL && lattrs->ifla_ifname != NULL) { ifp = ifunit_ref(lattrs->ifla_ifname); if (ifp == NULL) { NLMSG_REPORT_ERR_MSG(npt, "unable to find interface %s", lattrs->ifla_ifname); return (ENOENT); } } MPASS(ifp != NULL); /* - * There can be multiple kinds of interfaces: - * 1) cloned, with additional options - * 2) cloned, but w/o additional options - * 3) non-cloned (e.g. "physical). - * - * Thus, try to find cloner-specific callback and fallback to the - * "default" handler if not found. + * Modification request can address either + * 1) cloned interface, in which case we call the cloner-specific + * modification routine + * or + * 2) non-cloned (e.g. "physical") interface, in which case we call + * generic modification routine */ - bool found = false; - int error = 0; - - sx_slock(&rtnl_cloner_lock); - struct nl_cloner *cloner = rtnl_iface_find_cloner_locked(ifp->if_dname); - if (cloner != NULL) { - found = true; - error = cloner->modify_f(ifp, lattrs, bm, nlp, npt); - } - sx_sunlock(&rtnl_cloner_lock); - - if (!found) - error = generic_cloner.modify_f(ifp, lattrs, bm, nlp, npt); + struct ifc_data_nl ifd = { .lattrs = lattrs, .bm = bm, .npt = npt }; + if (!ifc_modify_ifp_nl(ifp, &ifd)) + ifd.error = nl_modify_ifp_generic(ifp, lattrs, bm, npt); if_rele(ifp); - return (error); + return (ifd.error); } static int rtnl_handle_newlink(struct nlmsghdr *hdr, struct nlpcb *nlp, struct nl_pstate *npt) { struct nlattr_bmask bm; int error; struct nl_parsed_link attrs = {}; error = nl_parse_nlmsg(hdr, &ifmsg_parser, npt, &attrs); if (error != 0) return (error); nl_get_attrs_bmask_nlmsg(hdr, &ifmsg_parser, &bm); if (hdr->nlmsg_flags & NLM_F_CREATE) return (create_link(hdr, &attrs, &bm, nlp, npt)); else return (modify_link(hdr, &attrs, &bm, nlp, npt)); } struct nl_parsed_ifa { uint8_t ifa_family; uint8_t ifa_prefixlen; uint8_t ifa_scope; uint32_t ifa_index; uint32_t ifa_flags; struct sockaddr *ifa_address; struct sockaddr *ifa_local; }; #define _IN(_field) offsetof(struct ifaddrmsg, _field) #define _OUT(_field) offsetof(struct nl_parsed_ifa, _field) static const struct nlfield_parser nlf_p_ifa[] = { { .off_in = _IN(ifa_family), .off_out = _OUT(ifa_family), .cb = nlf_get_u8 }, { .off_in = _IN(ifa_prefixlen), .off_out = _OUT(ifa_prefixlen), .cb = nlf_get_u8 }, { .off_in = _IN(ifa_scope), .off_out = _OUT(ifa_scope), .cb = nlf_get_u8 }, { .off_in = _IN(ifa_flags), .off_out = _OUT(ifa_flags), .cb = nlf_get_u8_u32 }, { .off_in = _IN(ifa_index), .off_out = _OUT(ifa_index), .cb = nlf_get_u32 }, }; static const struct nlattr_parser nla_p_ifa[] = { { .type = IFA_ADDRESS, .off = _OUT(ifa_address), .cb = nlattr_get_ip }, { .type = IFA_LOCAL, .off = _OUT(ifa_local), .cb = nlattr_get_ip }, { .type = IFA_FLAGS, .off = _OUT(ifa_flags), .cb = nlattr_get_uint32 }, }; #undef _IN #undef _OUT NL_DECLARE_PARSER(ifaddrmsg_parser, struct ifaddrmsg, nlf_p_ifa, nla_p_ifa); /* {ifa_family=AF_INET, ifa_prefixlen=8, ifa_flags=IFA_F_PERMANENT, ifa_scope=RT_SCOPE_HOST, ifa_index=if_nametoindex("lo")}, [ {{nla_len=8, nla_type=IFA_ADDRESS}, inet_addr("127.0.0.1")}, {{nla_len=8, nla_type=IFA_LOCAL}, inet_addr("127.0.0.1")}, {{nla_len=7, nla_type=IFA_LABEL}, "lo"}, {{nla_len=8, nla_type=IFA_FLAGS}, IFA_F_PERMANENT}, {{nla_len=20, nla_type=IFA_CACHEINFO}, {ifa_prefered=4294967295, ifa_valid=4294967295, cstamp=3619, tstamp=3619}}]}, --- {{len=72, type=RTM_NEWADDR, flags=NLM_F_MULTI, seq=1642191126, pid=566735}, {ifa_family=AF_INET6, ifa_prefixlen=96, ifa_flags=IFA_F_PERMANENT, ifa_scope=RT_SCOPE_UNIVERSE, ifa_index=if_nametoindex("virbr0")}, [ {{nla_len=20, nla_type=IFA_ADDRESS}, inet_pton(AF_INET6, "2a01:4f8:13a:70c:ffff::1")}, {{nla_len=20, nla_type=IFA_CACHEINFO}, {ifa_prefered=4294967295, ifa_valid=4294967295, cstamp=4283, tstamp=4283}}, {{nla_len=8, nla_type=IFA_FLAGS}, IFA_F_PERMANENT}]}, */ static uint8_t ifa_get_scope(const struct ifaddr *ifa) { const struct sockaddr *sa; uint8_t addr_scope = RT_SCOPE_UNIVERSE; sa = ifa->ifa_addr; switch (sa->sa_family) { #ifdef INET case AF_INET: { struct in_addr addr; addr = ((const struct sockaddr_in *)sa)->sin_addr; if (IN_LOOPBACK(addr.s_addr)) addr_scope = RT_SCOPE_HOST; else if (IN_LINKLOCAL(addr.s_addr)) addr_scope = RT_SCOPE_LINK; break; } #endif #ifdef INET6 case AF_INET6: { const struct in6_addr *addr; addr = &((const struct sockaddr_in6 *)sa)->sin6_addr; if (IN6_IS_ADDR_LOOPBACK(addr)) addr_scope = RT_SCOPE_HOST; else if (IN6_IS_ADDR_LINKLOCAL(addr)) addr_scope = RT_SCOPE_LINK; break; } #endif } return (addr_scope); } #ifdef INET6 static uint8_t inet6_get_plen(const struct in6_addr *addr) { return (bitcount32(addr->s6_addr32[0]) + bitcount32(addr->s6_addr32[1]) + bitcount32(addr->s6_addr32[2]) + bitcount32(addr->s6_addr32[3])); } #endif static uint8_t get_sa_plen(const struct sockaddr *sa) { #ifdef INET const struct in_addr *paddr; #endif #ifdef INET6 const struct in6_addr *paddr6; #endif switch (sa->sa_family) { #ifdef INET case AF_INET: paddr = &(((const struct sockaddr_in *)sa)->sin_addr); return bitcount32(paddr->s_addr);; #endif #ifdef INET6 case AF_INET6: paddr6 = &(((const struct sockaddr_in6 *)sa)->sin6_addr); return inet6_get_plen(paddr6); #endif } return (0); } /* * {'attrs': [('IFA_ADDRESS', '12.0.0.1'), ('IFA_LOCAL', '12.0.0.1'), ('IFA_LABEL', 'eth10'), ('IFA_FLAGS', 128), ('IFA_CACHEINFO', {'ifa_preferred': 4294967295, 'ifa_valid': 4294967295, 'cstamp': 63745746, 'tstamp': 63745746})], */ static bool dump_iface_addr(struct nl_writer *nw, struct ifnet *ifp, struct ifaddr *ifa, const struct nlmsghdr *hdr) { struct ifaddrmsg *ifamsg; struct sockaddr *sa = ifa->ifa_addr; NL_LOG(LOG_DEBUG3, "dumping ifa %p type %s(%d) for interface %s", ifa, rib_print_family(sa->sa_family), sa->sa_family, if_name(ifp)); if (!nlmsg_reply(nw, hdr, sizeof(struct ifaddrmsg))) goto enomem; ifamsg = nlmsg_reserve_object(nw, struct ifaddrmsg); ifamsg->ifa_family = sa->sa_family; ifamsg->ifa_prefixlen = get_sa_plen(ifa->ifa_netmask); ifamsg->ifa_flags = 0; // ifa_flags is useless ifamsg->ifa_scope = ifa_get_scope(ifa); ifamsg->ifa_index = ifp->if_index; if (ifp->if_flags & IFF_POINTOPOINT) { dump_sa(nw, IFA_ADDRESS, ifa->ifa_dstaddr); dump_sa(nw, IFA_LOCAL, sa); } else { dump_sa(nw, IFA_ADDRESS, sa); #ifdef INET /* * In most cases, IFA_ADDRESS == IFA_LOCAL * Skip IFA_LOCAL for anything except INET */ if (sa->sa_family == AF_INET) dump_sa(nw, IFA_LOCAL, sa); #endif } if (ifp->if_flags & IFF_BROADCAST) dump_sa(nw, IFA_BROADCAST, ifa->ifa_broadaddr); nlattr_add_string(nw, IFA_LABEL, if_name(ifp)); uint32_t val = 0; // ifa->ifa_flags; nlattr_add_u32(nw, IFA_FLAGS, val); if (nlmsg_end(nw)) return (true); enomem: NL_LOG(LOG_DEBUG, "Failed to dump ifa type %s(%d) for interface %s", rib_print_family(sa->sa_family), sa->sa_family, if_name(ifp)); nlmsg_abort(nw); return (false); } static int dump_iface_addrs(struct netlink_walkargs *wa, struct ifnet *ifp) { struct ifaddr *ifa; CK_STAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) { if (wa->family != 0 && wa->family != ifa->ifa_addr->sa_family) continue; if (ifa->ifa_addr->sa_family == AF_LINK) continue; if (prison_if(wa->cred, ifa->ifa_addr) != 0) continue; wa->count++; if (!dump_iface_addr(wa->nw, ifp, ifa, &wa->hdr)) return (ENOMEM); wa->dumped++; } return (0); } static int rtnl_handle_getaddr(struct nlmsghdr *hdr, struct nlpcb *nlp, struct nl_pstate *npt) { struct ifnet *ifp; int error = 0; struct nl_parsed_ifa attrs = {}; error = nl_parse_nlmsg(hdr, &ifaddrmsg_parser, npt, &attrs); if (error != 0) return (error); struct netlink_walkargs wa = { .so = nlp, .nw = npt->nw, .cred = nlp_get_cred(nlp), .family = attrs.ifa_family, .hdr.nlmsg_pid = hdr->nlmsg_pid, .hdr.nlmsg_seq = hdr->nlmsg_seq, .hdr.nlmsg_flags = hdr->nlmsg_flags | NLM_F_MULTI, .hdr.nlmsg_type = NL_RTM_NEWADDR, }; NL_LOG(LOG_DEBUG2, "Start dump"); if (attrs.ifa_index != 0) { ifp = ifnet_byindex(attrs.ifa_index); if (ifp == NULL) error = ENOENT; else error = dump_iface_addrs(&wa, ifp); } else { CK_STAILQ_FOREACH(ifp, &V_ifnet, if_link) { error = dump_iface_addrs(&wa, ifp); if (error != 0) break; } } NL_LOG(LOG_DEBUG2, "End dump, iterated %d dumped %d", wa.count, wa.dumped); if (!nlmsg_end_dump(wa.nw, error, &wa.hdr)) { NL_LOG(LOG_DEBUG, "Unable to finalize the dump"); return (ENOMEM); } return (error); } static void rtnl_handle_ifaddr(void *arg __unused, struct ifaddr *ifa, int cmd) { struct nlmsghdr hdr = {}; struct nl_writer nw = {}; uint32_t group = 0; switch (ifa->ifa_addr->sa_family) { #ifdef INET case AF_INET: group = RTNLGRP_IPV4_IFADDR; break; #endif #ifdef INET6 case AF_INET6: group = RTNLGRP_IPV6_IFADDR; break; #endif default: NL_LOG(LOG_DEBUG2, "ifa notification for unknown AF: %d", ifa->ifa_addr->sa_family); return; } if (!nl_has_listeners(NETLINK_ROUTE, group)) return; if (!nlmsg_get_group_writer(&nw, NLMSG_LARGE, NETLINK_ROUTE, group)) { NL_LOG(LOG_DEBUG, "error allocating group writer"); return; } hdr.nlmsg_type = (cmd == RTM_DELETE) ? NL_RTM_DELADDR : NL_RTM_NEWADDR; dump_iface_addr(&nw, ifa->ifa_ifp, ifa, &hdr); nlmsg_flush(&nw); } static void rtnl_handle_ifevent(struct ifnet *ifp, int nlmsg_type, int if_flags_mask) { struct nlmsghdr hdr = { .nlmsg_type = nlmsg_type }; struct nl_writer nw = {}; if (!nl_has_listeners(NETLINK_ROUTE, RTNLGRP_LINK)) return; if (!nlmsg_get_group_writer(&nw, NLMSG_LARGE, NETLINK_ROUTE, RTNLGRP_LINK)) { NL_LOG(LOG_DEBUG, "error allocating mbuf"); return; } dump_iface(&nw, ifp, &hdr, if_flags_mask); nlmsg_flush(&nw); } static void rtnl_handle_ifattach(void *arg, struct ifnet *ifp) { NL_LOG(LOG_DEBUG2, "ifnet %s", if_name(ifp)); rtnl_handle_ifevent(ifp, NL_RTM_NEWLINK, 0); } static void rtnl_handle_ifdetach(void *arg, struct ifnet *ifp) { NL_LOG(LOG_DEBUG2, "ifnet %s", if_name(ifp)); rtnl_handle_ifevent(ifp, NL_RTM_DELLINK, 0); } static void rtnl_handle_iflink(void *arg, struct ifnet *ifp) { NL_LOG(LOG_DEBUG2, "ifnet %s", if_name(ifp)); rtnl_handle_ifevent(ifp, NL_RTM_NEWLINK, 0); } void rtnl_handle_ifnet_event(struct ifnet *ifp, int if_flags_mask) { NL_LOG(LOG_DEBUG2, "ifnet %s", if_name(ifp)); rtnl_handle_ifevent(ifp, NL_RTM_NEWLINK, if_flags_mask); } static const struct rtnl_cmd_handler cmd_handlers[] = { { .cmd = NL_RTM_GETLINK, .name = "RTM_GETLINK", .cb = &rtnl_handle_getlink, .flags = RTNL_F_NOEPOCH | RTNL_F_ALLOW_NONVNET_JAIL, }, { .cmd = NL_RTM_DELLINK, .name = "RTM_DELLINK", .cb = &rtnl_handle_dellink, .priv = PRIV_NET_IFDESTROY, .flags = RTNL_F_NOEPOCH, }, { .cmd = NL_RTM_NEWLINK, .name = "RTM_NEWLINK", .cb = &rtnl_handle_newlink, .priv = PRIV_NET_IFCREATE, .flags = RTNL_F_NOEPOCH, }, { .cmd = NL_RTM_GETADDR, .name = "RTM_GETADDR", .cb = &rtnl_handle_getaddr, .flags = RTNL_F_ALLOW_NONVNET_JAIL, }, { .cmd = NL_RTM_NEWADDR, .name = "RTM_NEWADDR", .cb = &rtnl_handle_getaddr, }, { .cmd = NL_RTM_DELADDR, .name = "RTM_DELADDR", .cb = &rtnl_handle_getaddr, }, }; static const struct nlhdr_parser *all_parsers[] = { &ifmsg_parser, &ifaddrmsg_parser }; void rtnl_iface_add_cloner(struct nl_cloner *cloner) { sx_xlock(&rtnl_cloner_lock); SLIST_INSERT_HEAD(&nl_cloners, cloner, next); sx_xunlock(&rtnl_cloner_lock); } void rtnl_iface_del_cloner(struct nl_cloner *cloner) { sx_xlock(&rtnl_cloner_lock); SLIST_REMOVE(&nl_cloners, cloner, nl_cloner, next); sx_xunlock(&rtnl_cloner_lock); } static struct nl_cloner * rtnl_iface_find_cloner_locked(const char *name) { struct nl_cloner *cloner; SLIST_FOREACH(cloner, &nl_cloners, next) { if (!strcmp(name, cloner->name)) return (cloner); } return (NULL); } void rtnl_ifaces_init(void) { ifattach_event = EVENTHANDLER_REGISTER( ifnet_arrival_event, rtnl_handle_ifattach, NULL, EVENTHANDLER_PRI_ANY); ifdetach_event = EVENTHANDLER_REGISTER( ifnet_departure_event, rtnl_handle_ifdetach, NULL, EVENTHANDLER_PRI_ANY); ifaddr_event = EVENTHANDLER_REGISTER( rt_addrmsg, rtnl_handle_ifaddr, NULL, EVENTHANDLER_PRI_ANY); iflink_event = EVENTHANDLER_REGISTER( ifnet_link_event, rtnl_handle_iflink, NULL, EVENTHANDLER_PRI_ANY); NL_VERIFY_PARSERS(all_parsers); - rtnl_iface_drivers_register(); rtnl_register_messages(cmd_handlers, NL_ARRAY_LEN(cmd_handlers)); } void rtnl_ifaces_destroy(void) { EVENTHANDLER_DEREGISTER(ifnet_arrival_event, ifattach_event); EVENTHANDLER_DEREGISTER(ifnet_departure_event, ifdetach_event); EVENTHANDLER_DEREGISTER(rt_addrmsg, ifaddr_event); EVENTHANDLER_DEREGISTER(ifnet_link_event, iflink_event); } diff --git a/sys/netlink/route/iface_drivers.c b/sys/netlink/route/iface_drivers.c index be28a0f3b676..17fbc1000d23 100644 --- a/sys/netlink/route/iface_drivers.c +++ b/sys/netlink/route/iface_drivers.c @@ -1,304 +1,146 @@ /*- * SPDX-License-Identifier: BSD-2-Clause-FreeBSD * * Copyright (c) 2022 Alexander V. Chernikov * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); #include "opt_inet.h" #include "opt_inet6.h" #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include /* scope deembedding */ #define DEBUG_MOD_NAME nl_iface_drivers #define DEBUG_MAX_LEVEL LOG_DEBUG3 #include _DECLARE_DEBUG(LOG_DEBUG); /* * Generic modification interface handler. * Responsible for changing network stack interface attributes * such as state, mtu or description. */ -static int -modify_generic(struct ifnet *ifp, struct nl_parsed_link *lattrs, - const struct nlattr_bmask *bm, struct nlpcb *nlp, struct nl_pstate *npt) +int +nl_modify_ifp_generic(struct ifnet *ifp, struct nl_parsed_link *lattrs, + const struct nlattr_bmask *bm, struct nl_pstate *npt) { int error; if (lattrs->ifla_ifalias != NULL) { - if (nlp_has_priv(nlp, PRIV_NET_SETIFDESCR)) { + if (nlp_has_priv(npt->nlp, PRIV_NET_SETIFDESCR)) { int len = strlen(lattrs->ifla_ifalias) + 1; char *buf = if_allocdescr(len, M_WAITOK); memcpy(buf, lattrs->ifla_ifalias, len); if_setdescr(ifp, buf); getmicrotime(&ifp->if_lastchange); } else { nlmsg_report_err_msg(npt, "Not enough privileges to set descr"); return (EPERM); } } if ((lattrs->ifi_change & IFF_UP) && (lattrs->ifi_flags & IFF_UP) == 0) { /* Request to down the interface */ if_down(ifp); } if (lattrs->ifla_mtu > 0) { - if (nlp_has_priv(nlp, PRIV_NET_SETIFMTU)) { + if (nlp_has_priv(npt->nlp, PRIV_NET_SETIFMTU)) { struct ifreq ifr = { .ifr_mtu = lattrs->ifla_mtu }; error = ifhwioctl(SIOCSIFMTU, ifp, (char *)&ifr, curthread); } else { nlmsg_report_err_msg(npt, "Not enough privileges to set mtu"); return (EPERM); } } if (lattrs->ifi_change & IFF_PROMISC) { error = ifpromisc(ifp, lattrs->ifi_flags & IFF_PROMISC); if (error != 0) { nlmsg_report_err_msg(npt, "unable to set promisc"); return (error); } } return (0); } /* * Saves the resulting ifindex and ifname to report them * to userland along with the operation result. * NLA format: * NLMSGERR_ATTR_COOKIE(nested) * IFLA_NEW_IFINDEX(u32) * IFLA_IFNAME(string) */ -static void -store_cookie(struct nl_pstate *npt, struct ifnet *ifp) +void +nl_store_ifp_cookie(struct nl_pstate *npt, struct ifnet *ifp) { int ifname_len = strlen(if_name(ifp)); uint32_t ifindex = (uint32_t)ifp->if_index; int nla_len = sizeof(struct nlattr) * 3 + sizeof(ifindex) + NL_ITEM_ALIGN(ifname_len + 1); struct nlattr *nla_cookie = npt_alloc(npt, nla_len); /* Nested TLV */ nla_cookie->nla_len = nla_len; nla_cookie->nla_type = NLMSGERR_ATTR_COOKIE; struct nlattr *nla = nla_cookie + 1; nla->nla_len = sizeof(struct nlattr) + sizeof(ifindex); nla->nla_type = IFLA_NEW_IFINDEX; memcpy(NLA_DATA(nla), &ifindex, sizeof(ifindex)); nla = NLA_NEXT(nla); nla->nla_len = sizeof(struct nlattr) + ifname_len + 1; nla->nla_type = IFLA_IFNAME; strlcpy(NLA_DATA(nla), if_name(ifp), ifname_len + 1); nlmsg_report_cookie(npt, nla_cookie); } -static int -create_generic_ifd(struct nl_parsed_link *lattrs, const struct nlattr_bmask *bm, - struct ifc_data *ifd, struct nlpcb *nlp, struct nl_pstate *npt) -{ - int error = 0; - - struct ifnet *ifp = NULL; - error = ifc_create_ifp(lattrs->ifla_ifname, ifd, &ifp); - - NLP_LOG(LOG_DEBUG2, nlp, "clone for %s returned %d", lattrs->ifla_ifname, error); - - if (error == 0) { - struct epoch_tracker et; - - NET_EPOCH_ENTER(et); - bool success = if_try_ref(ifp); - NET_EPOCH_EXIT(et); - if (!success) - return (EINVAL); - error = modify_generic(ifp, lattrs, bm, nlp, npt); - if (error == 0) - store_cookie(npt, ifp); - if_rele(ifp); - } - - return (error); -} -/* - * Generic creation interface handler. - * Responsible for creating interfaces w/o parameters and setting - * misc attributes such as state, mtu or description. - */ -static int -create_generic(struct nl_parsed_link *lattrs, const struct nlattr_bmask *bm, - struct nlpcb *nlp, struct nl_pstate *npt) -{ - struct ifc_data ifd = {}; - - return (create_generic_ifd(lattrs, bm, &ifd, nlp, npt)); -} - -struct nl_cloner generic_cloner = { - .name = "_default_", - .create_f = create_generic, - .modify_f = modify_generic, -}; - -/* - * - * {len=76, type=RTM_NEWLINK, flags=NLM_F_REQUEST|NLM_F_ACK|NLM_F_EXCL|NLM_F_CREATE, seq=1662892737, pid=0}, - * {ifi_family=AF_UNSPEC, ifi_type=ARPHRD_NETROM, ifi_index=0, ifi_flags=0, ifi_change=0}, - * [ - * {{nla_len=8, nla_type=IFLA_LINK}, 2}, - * {{nla_len=12, nla_type=IFLA_IFNAME}, "xvlan22"}, - * {{nla_len=24, nla_type=IFLA_LINKINFO}, - * [ - * {{nla_len=8, nla_type=IFLA_INFO_KIND}, "vlan"...}, - * {{nla_len=12, nla_type=IFLA_INFO_DATA}, "\x06\x00\x01\x00\x16\x00\x00\x00"}]}]}, iov_len=76}], msg_iovlen=1, msg_controllen=0, msg_flags=0}, 0) = 76 - */ - -struct nl_parsed_vlan { - uint16_t vlan_id; - uint16_t vlan_proto; - struct ifla_vlan_flags vlan_flags; -}; - -#define _OUT(_field) offsetof(struct nl_parsed_vlan, _field) -static const struct nlattr_parser nla_p_vlan[] = { - { .type = IFLA_VLAN_ID, .off = _OUT(vlan_id), .cb = nlattr_get_uint16 }, - { .type = IFLA_VLAN_FLAGS, .off = _OUT(vlan_flags), .cb = nlattr_get_nla }, - { .type = IFLA_VLAN_PROTOCOL, .off = _OUT(vlan_proto), .cb = nlattr_get_uint16 }, -}; -#undef _OUT -NL_DECLARE_ATTR_PARSER(vlan_parser, nla_p_vlan); - -static int -create_vlan(struct nl_parsed_link *lattrs, const struct nlattr_bmask *bm, - struct nlpcb *nlp, struct nl_pstate *npt) -{ - struct epoch_tracker et; - struct ifnet *ifp; - int error; - - /* - * lattrs.ifla_ifname is the new interface name - * lattrs.ifi_index contains parent interface index - * lattrs.ifla_idata contains un-parsed vlan data - */ - - struct nl_parsed_vlan attrs = { - .vlan_id = 0xFEFE, - .vlan_proto = ETHERTYPE_VLAN - }; - NLP_LOG(LOG_DEBUG3, nlp, "nested: %p len %d", lattrs->ifla_idata, lattrs->ifla_idata->nla_len); - - if (lattrs->ifla_idata == NULL) { - NLMSG_REPORT_ERR_MSG(npt, "vlan id is required, guessing not supported"); - return (ENOTSUP); - } - - error = nl_parse_nested(lattrs->ifla_idata, &vlan_parser, npt, &attrs); - if (error != 0) - return (error); - if (attrs.vlan_id > 4095) { - NLMSG_REPORT_ERR_MSG(npt, "Invalid VID: %d", attrs.vlan_id); - return (EINVAL); - } - if (attrs.vlan_proto != ETHERTYPE_VLAN && attrs.vlan_proto != ETHERTYPE_QINQ) { - NLMSG_REPORT_ERR_MSG(npt, "Unsupported ethertype: 0x%04X", attrs.vlan_proto); - return (ENOTSUP); - } - - NET_EPOCH_ENTER(et); - ifp = ifnet_byindex_ref(lattrs->ifi_index); - NET_EPOCH_EXIT(et); - if (ifp == NULL) { - NLP_LOG(LOG_DEBUG, nlp, "unable to find parent interface %u", - lattrs->ifi_index); - return (ENOENT); - } - - struct vlanreq params = { - .vlr_tag = attrs.vlan_id, - .vlr_proto = attrs.vlan_proto, - }; - strlcpy(params.vlr_parent, if_name(ifp), sizeof(params.vlr_parent)); - struct ifc_data ifd = { .flags = IFC_F_SYSSPACE, .params = ¶ms }; - - error = create_generic_ifd(lattrs, bm, &ifd, nlp, npt); - - if_rele(ifp); - return (error); -} - -static int -dump_vlan(struct ifnet *ifp, struct nl_writer *nw) -{ - return (0); -} - -static struct nl_cloner vlan_cloner = { - .name = "vlan", - .create_f = create_vlan, - .modify_f = modify_generic, - .dump_f = dump_vlan, - -}; - -static const struct nlhdr_parser *all_parsers[] = { &vlan_parser }; - -void -rtnl_iface_drivers_register(void) -{ - rtnl_iface_add_cloner(&vlan_cloner); - NL_VERIFY_PARSERS(all_parsers); -} - - diff --git a/sys/netlink/route/route_var.h b/sys/netlink/route/route_var.h index a11857b14a1f..cbcc71e9ac21 100644 --- a/sys/netlink/route/route_var.h +++ b/sys/netlink/route/route_var.h @@ -1,112 +1,118 @@ /*- * SPDX-License-Identifier: BSD-2-Clause-FreeBSD * * Copyright (c) 2022 Alexander V. Chernikov * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ /* * This file contains definitions shared among NETLINK_ROUTE family */ #ifndef _NETLINK_ROUTE_ROUTE_VAR_H_ #define _NETLINK_ROUTE_ROUTE_VAR_H_ #include /* values for priv_check */ struct nlmsghdr; struct nlpcb; struct nl_pstate; typedef int rtnl_msg_cb_f(struct nlmsghdr *hdr, struct nlpcb *nlp, struct nl_pstate *npt); struct rtnl_cmd_handler { int cmd; const char *name; rtnl_msg_cb_f *cb; int priv; int flags; }; #define RTNL_F_NOEPOCH 0x01 /* Do not enter epoch when handling command */ #define RTNL_F_ALLOW_NONVNET_JAIL 0x02 /* Allow command execution inside non-VNET jail */ bool rtnl_register_messages(const struct rtnl_cmd_handler *handlers, int count); /* route.c */ struct rib_cmd_info; void rtnl_handle_route_event(uint32_t fibnum, const struct rib_cmd_info *rc); void rtnl_routes_init(void); /* neigh.c */ void rtnl_neighs_init(void); void rtnl_neighs_destroy(void); /* iface.c */ struct nl_parsed_link { char *ifla_group; char *ifla_ifname; char *ifla_cloner; char *ifla_ifalias; struct nlattr *ifla_idata; unsigned short ifi_type; int ifi_index; + uint32_t ifla_link; uint32_t ifla_mtu; uint32_t ifi_flags; uint32_t ifi_change; }; +int nl_modify_ifp_generic(struct ifnet *ifp, struct nl_parsed_link *lattrs, + const struct nlattr_bmask *bm, struct nl_pstate *npt); +void nl_store_ifp_cookie(struct nl_pstate *npt, struct ifnet *ifp); + + typedef int rtnl_iface_create_f(struct nl_parsed_link *lattrs, const struct nlattr_bmask *bm, struct nlpcb *nlp, struct nl_pstate *npt); typedef int rtnl_iface_modify_f(struct ifnet *ifp, struct nl_parsed_link *lattrs, const struct nlattr_bmask *bm, struct nlpcb *nlp, struct nl_pstate *npt); typedef int rtnl_iface_dump_f(struct ifnet *ifp, struct nl_writer *nw); struct nl_cloner { const char *name; rtnl_iface_create_f *create_f; rtnl_iface_modify_f *modify_f; rtnl_iface_dump_f *dump_f; SLIST_ENTRY(nl_cloner) next; }; extern struct nl_cloner generic_cloner; void rtnl_ifaces_init(void); void rtnl_ifaces_destroy(void); void rtnl_iface_add_cloner(struct nl_cloner *cloner); void rtnl_iface_del_cloner(struct nl_cloner *cloner); void rtnl_handle_ifnet_event(struct ifnet *ifp, int if_change_mask); /* iface_drivers.c */ void rtnl_iface_drivers_register(void); /* nexthop.c */ void rtnl_nexthops_init(void); struct nhop_object *nl_find_nhop(uint32_t fibnum, int family, uint32_t uidx, int nh_flags, int *perror); int nl_set_nexthop_gw(struct nhop_object *nh, struct sockaddr *gw, struct ifnet *ifp, struct nl_pstate *npt); #endif diff --git a/tests/sys/netlink/test_rtnl_iface.py b/tests/sys/netlink/test_rtnl_iface.py index 1482ca679c33..d34e36ee8eef 100644 --- a/tests/sys/netlink/test_rtnl_iface.py +++ b/tests/sys/netlink/test_rtnl_iface.py @@ -1,351 +1,354 @@ import errno import socket import pytest from atf_python.sys.netlink.netlink_route import IflattrType from atf_python.sys.netlink.netlink_route import IflinkInfo from atf_python.sys.netlink.netlink_route import IfLinkInfoDataVlan from atf_python.sys.netlink.netlink_route import NetlinkIflaMessage from atf_python.sys.netlink.netlink import NetlinkTestTemplate from atf_python.sys.netlink.attrs import NlAttrNested from atf_python.sys.netlink.attrs import NlAttrStr from atf_python.sys.netlink.attrs import NlAttrStrn from atf_python.sys.netlink.attrs import NlAttrU16 from atf_python.sys.netlink.attrs import NlAttrU32 from atf_python.sys.netlink.utils import NlConst from atf_python.sys.netlink.base_headers import NlmBaseFlags from atf_python.sys.netlink.base_headers import NlmNewFlags from atf_python.sys.netlink.base_headers import NlMsgType from atf_python.sys.netlink.netlink_route import NlRtMsgType from atf_python.sys.netlink.netlink_route import rtnl_ifla_attrs from atf_python.sys.net.vnet import SingleVnetTestTemplate +from atf_python.sys.net.tools import ToolsHelper class TestRtNlIface(NetlinkTestTemplate, SingleVnetTestTemplate): def setup_method(self, method): super().setup_method(method) self.setup_netlink(NlConst.NETLINK_ROUTE) def get_interface_byname(self, ifname): msg = NetlinkIflaMessage(self.helper, NlRtMsgType.RTM_GETLINK.value) msg.nl_hdr.nlmsg_flags = ( NlmBaseFlags.NLM_F_ACK.value | NlmBaseFlags.NLM_F_REQUEST.value ) msg.add_nla(NlAttrStr(IflattrType.IFLA_IFNAME, ifname)) self.write_message(msg) while True: rx_msg = self.read_message() if msg.nl_hdr.nlmsg_seq == rx_msg.nl_hdr.nlmsg_seq: if rx_msg.is_type(NlMsgType.NLMSG_ERROR): if rx_msg.error_code != 0: raise ValueError("unable to get interface {}".format(ifname)) elif rx_msg.is_type(NlRtMsgType.RTM_NEWLINK): return rx_msg else: raise ValueError("bad message") def test_get_iface_byname_error(self): """Tests error on fetching non-existing interface name""" msg = NetlinkIflaMessage(self.helper, NlRtMsgType.RTM_GETLINK.value) msg.nl_hdr.nlmsg_flags = ( NlmBaseFlags.NLM_F_ACK.value | NlmBaseFlags.NLM_F_REQUEST.value ) msg.add_nla(NlAttrStr(IflattrType.IFLA_IFNAME, "lo10")) rx_msg = self.get_reply(msg) assert rx_msg.is_type(NlMsgType.NLMSG_ERROR) assert rx_msg.error_code == errno.ENODEV def test_get_iface_byindex_error(self): """Tests error on fetching non-existing interface index""" msg = NetlinkIflaMessage(self.helper, NlRtMsgType.RTM_GETLINK.value) msg.nl_hdr.nlmsg_flags = ( NlmBaseFlags.NLM_F_ACK.value | NlmBaseFlags.NLM_F_REQUEST.value ) msg.base_hdr.ifi_index = 2147483647 rx_msg = self.get_reply(msg) assert rx_msg.is_type(NlMsgType.NLMSG_ERROR) assert rx_msg.error_code == errno.ENODEV @pytest.mark.require_user("root") def test_create_iface_plain(self): """Tests loopback creation w/o any parameters""" flags = NlmNewFlags.NLM_F_EXCL.value | NlmNewFlags.NLM_F_CREATE.value msg = NetlinkIflaMessage(self.helper, NlRtMsgType.RTM_NEWLINK.value) msg.nl_hdr.nlmsg_flags = ( flags | NlmBaseFlags.NLM_F_ACK.value | NlmBaseFlags.NLM_F_REQUEST.value ) msg.add_nla(NlAttrStr(IflattrType.IFLA_IFNAME, "lo10")) msg.add_nla( NlAttrNested( IflattrType.IFLA_LINKINFO, [ NlAttrStrn(IflinkInfo.IFLA_INFO_KIND, "lo"), ], ) ) rx_msg = self.get_reply(msg) assert rx_msg.is_type(NlMsgType.NLMSG_ERROR) assert rx_msg.error_code == 0 self.get_interface_byname("lo10") @pytest.mark.require_user("root") def test_create_iface_plain_retvals(self): """Tests loopback creation w/o any parameters""" flags = NlmNewFlags.NLM_F_EXCL.value | NlmNewFlags.NLM_F_CREATE.value msg = NetlinkIflaMessage(self.helper, NlRtMsgType.RTM_NEWLINK.value) msg.nl_hdr.nlmsg_flags = ( flags | NlmBaseFlags.NLM_F_ACK.value | NlmBaseFlags.NLM_F_REQUEST.value ) msg.add_nla(NlAttrStr(IflattrType.IFLA_IFNAME, "lo10")) msg.add_nla( NlAttrNested( IflattrType.IFLA_LINKINFO, [ NlAttrStrn(IflinkInfo.IFLA_INFO_KIND, "lo"), ], ) ) rx_msg = self.get_reply(msg) assert rx_msg.is_type(NlMsgType.NLMSG_ERROR) assert rx_msg.error_code == 0 assert rx_msg.cookie is not None nla_list, _ = rx_msg.parse_attrs(bytes(rx_msg.cookie)[4:], rtnl_ifla_attrs) nla_map = {n.nla_type: n for n in nla_list} assert IflattrType.IFLA_IFNAME.value in nla_map assert nla_map[IflattrType.IFLA_IFNAME.value].text == "lo10" assert IflattrType.IFLA_NEW_IFINDEX.value in nla_map assert nla_map[IflattrType.IFLA_NEW_IFINDEX.value].u32 > 0 lo_msg = self.get_interface_byname("lo10") assert ( lo_msg.base_hdr.ifi_index == nla_map[IflattrType.IFLA_NEW_IFINDEX.value].u32 ) @pytest.mark.require_user("root") def test_create_iface_attrs(self): """Tests interface creation with additional properties""" flags = NlmNewFlags.NLM_F_EXCL.value | NlmNewFlags.NLM_F_CREATE.value msg = NetlinkIflaMessage(self.helper, NlRtMsgType.RTM_NEWLINK.value) msg.nl_hdr.nlmsg_flags = ( flags | NlmBaseFlags.NLM_F_ACK.value | NlmBaseFlags.NLM_F_REQUEST.value ) msg.add_nla(NlAttrStr(IflattrType.IFLA_IFNAME, "lo10")) msg.add_nla( NlAttrNested( IflattrType.IFLA_LINKINFO, [ NlAttrStrn(IflinkInfo.IFLA_INFO_KIND, "lo"), ], ) ) # Custom attributes msg.add_nla(NlAttrStr(IflattrType.IFLA_IFALIAS, "test description")) msg.add_nla(NlAttrU32(IflattrType.IFLA_MTU, 1024)) rx_msg = self.get_reply(msg) assert rx_msg.is_type(NlMsgType.NLMSG_ERROR) assert rx_msg.error_code == 0 iface_msg = self.get_interface_byname("lo10") assert iface_msg.get_nla(IflattrType.IFLA_IFALIAS).text == "test description" assert iface_msg.get_nla(IflattrType.IFLA_MTU).u32 == 1024 @pytest.mark.require_user("root") def test_modify_iface_attrs(self): """Tests interface modifications""" flags = NlmNewFlags.NLM_F_EXCL.value | NlmNewFlags.NLM_F_CREATE.value msg = NetlinkIflaMessage(self.helper, NlRtMsgType.RTM_NEWLINK.value) msg.nl_hdr.nlmsg_flags = ( flags | NlmBaseFlags.NLM_F_ACK.value | NlmBaseFlags.NLM_F_REQUEST.value ) msg.add_nla(NlAttrStr(IflattrType.IFLA_IFNAME, "lo10")) msg.add_nla( NlAttrNested( IflattrType.IFLA_LINKINFO, [ NlAttrStrn(IflinkInfo.IFLA_INFO_KIND, "lo"), ], ) ) rx_msg = self.get_reply(msg) assert rx_msg.is_type(NlMsgType.NLMSG_ERROR) assert rx_msg.error_code == 0 msg = NetlinkIflaMessage(self.helper, NlRtMsgType.RTM_NEWLINK.value) msg.nl_hdr.nlmsg_flags = ( NlmBaseFlags.NLM_F_ACK.value | NlmBaseFlags.NLM_F_REQUEST.value ) msg.add_nla(NlAttrStr(IflattrType.IFLA_IFNAME, "lo10")) # Custom attributes msg.add_nla(NlAttrStr(IflattrType.IFLA_IFALIAS, "test description")) msg.add_nla(NlAttrU32(IflattrType.IFLA_MTU, 1024)) rx_msg = self.get_reply(msg) assert rx_msg.is_type(NlMsgType.NLMSG_ERROR) assert rx_msg.error_code == 0 iface_msg = self.get_interface_byname("lo10") assert iface_msg.get_nla(IflattrType.IFLA_IFALIAS).text == "test description" assert iface_msg.get_nla(IflattrType.IFLA_MTU).u32 == 1024 @pytest.mark.require_user("root") def test_delete_iface(self): """Tests interface modifications""" flags = NlmNewFlags.NLM_F_EXCL.value | NlmNewFlags.NLM_F_CREATE.value msg = NetlinkIflaMessage(self.helper, NlRtMsgType.RTM_NEWLINK.value) msg.nl_hdr.nlmsg_flags = ( flags | NlmBaseFlags.NLM_F_ACK.value | NlmBaseFlags.NLM_F_REQUEST.value ) msg.add_nla(NlAttrStr(IflattrType.IFLA_IFNAME, "lo10")) msg.add_nla( NlAttrNested( IflattrType.IFLA_LINKINFO, [ NlAttrStrn(IflinkInfo.IFLA_INFO_KIND, "lo"), ], ) ) rx_msg = self.get_reply(msg) assert rx_msg.is_type(NlMsgType.NLMSG_ERROR) assert rx_msg.error_code == 0 iface_msg = self.get_interface_byname("lo10") iface_idx = iface_msg.base_hdr.ifi_index msg = NetlinkIflaMessage(self.helper, NlRtMsgType.RTM_DELLINK.value) msg.nl_hdr.nlmsg_flags = ( NlmBaseFlags.NLM_F_ACK.value | NlmBaseFlags.NLM_F_REQUEST.value ) msg.base_hdr.ifi_index = iface_idx # msg.add_nla(NlAttrStr(IflattrType.IFLA_IFNAME, "lo10")) rx_msg = self.get_reply(msg) assert rx_msg.is_type(NlMsgType.NLMSG_ERROR) assert rx_msg.error_code == 0 msg = NetlinkIflaMessage(self.helper, NlRtMsgType.RTM_GETLINK.value) msg.nl_hdr.nlmsg_flags = ( NlmBaseFlags.NLM_F_ACK.value | NlmBaseFlags.NLM_F_REQUEST.value ) msg.base_hdr.ifi_index = 2147483647 rx_msg = self.get_reply(msg) assert rx_msg.is_type(NlMsgType.NLMSG_ERROR) assert rx_msg.error_code == errno.ENODEV @pytest.mark.require_user("root") def test_dump_ifaces_many(self): """Tests if interface dummp is not missing interfaces""" ifmap = {} ifmap[socket.if_nametoindex("lo0")] = "lo0" for i in range(40): ifname = "lo{}".format(i + 1) flags = NlmNewFlags.NLM_F_EXCL.value | NlmNewFlags.NLM_F_CREATE.value msg = NetlinkIflaMessage(self.helper, NlRtMsgType.RTM_NEWLINK.value) msg.nl_hdr.nlmsg_flags = ( flags | NlmBaseFlags.NLM_F_ACK.value | NlmBaseFlags.NLM_F_REQUEST.value ) msg.add_nla(NlAttrStr(IflattrType.IFLA_IFNAME, ifname)) msg.add_nla( NlAttrNested( IflattrType.IFLA_LINKINFO, [ NlAttrStrn(IflinkInfo.IFLA_INFO_KIND, "lo"), ], ) ) rx_msg = self.get_reply(msg) assert rx_msg.is_type(NlMsgType.NLMSG_ERROR) nla_list, _ = rx_msg.parse_attrs(bytes(rx_msg.cookie)[4:], rtnl_ifla_attrs) nla_map = {n.nla_type: n for n in nla_list} assert nla_map[IflattrType.IFLA_IFNAME.value].text == ifname ifindex = nla_map[IflattrType.IFLA_NEW_IFINDEX.value].u32 assert ifindex > 0 assert ifindex not in ifmap ifmap[ifindex] = ifname # Dump all interfaces and check if the output matches ifmap kernel_ifmap = {} msg = NetlinkIflaMessage(self.helper, NlRtMsgType.RTM_GETLINK.value) msg.nl_hdr.nlmsg_flags = ( NlmBaseFlags.NLM_F_ACK.value | NlmBaseFlags.NLM_F_REQUEST.value ) self.write_message(msg) while True: rx_msg = self.read_message() if msg.nl_hdr.nlmsg_seq != rx_msg.nl_hdr.nlmsg_seq: raise ValueError( "unexpected seq {}".format(rx_msg.nl_hdr.nlmsg_seq) ) if rx_msg.is_type(NlMsgType.NLMSG_ERROR): raise ValueError("unexpected message {}".format(rx_msg)) if rx_msg.is_type(NlMsgType.NLMSG_DONE): break if not rx_msg.is_type(NlRtMsgType.RTM_NEWLINK): raise ValueError("unexpected message {}".format(rx_msg)) ifindex = rx_msg.base_hdr.ifi_index assert ifindex == rx_msg.base_hdr.ifi_index ifname = rx_msg.get_nla(IflattrType.IFLA_IFNAME).text if ifname.startswith("lo"): kernel_ifmap[ifindex] = ifname assert kernel_ifmap == ifmap # # * # * {len=76, type=RTM_NEWLINK, flags=NLM_F_REQUEST|NLM_F_ACK|NLM_F_EXCL|NLM_F_CREATE, seq=1662892737, pid=0}, # * {ifi_family=AF_UNSPEC, ifi_type=ARPHRD_NETROM, ifi_index=0, ifi_flags=0, ifi_change=0}, # * {{nla_len=8, nla_type=IFLA_LINK}, 2}, # * {{nla_len=12, nla_type=IFLA_IFNAME}, "xvlan22"}, # * {{nla_len=24, nla_type=IFLA_LINKINFO}, # * {{nla_len=8, nla_type=IFLA_INFO_KIND}, "vlan"...}, # * {{nla_len=12, nla_type=IFLA_INFO_DATA}, "\x06\x00\x01\x00\x16\x00\x00\x00"} # */ @pytest.mark.require_user("root") def test_create_vlan_plain(self): """Creates 802.1Q VLAN interface in vlanXX and ifX fashion""" os_ifname = self.vnet.iface_alias_map["if1"].name ifindex = socket.if_nametoindex(os_ifname) flags = NlmNewFlags.NLM_F_EXCL.value | NlmNewFlags.NLM_F_CREATE.value msg = NetlinkIflaMessage(self.helper, NlRtMsgType.RTM_NEWLINK.value) msg.nl_hdr.nlmsg_flags = ( flags | NlmBaseFlags.NLM_F_ACK.value | NlmBaseFlags.NLM_F_REQUEST.value ) + msg.base_hdr.ifi_index = ifindex msg.add_nla(NlAttrU32(IflattrType.IFLA_LINK, ifindex)) msg.add_nla(NlAttrStr(IflattrType.IFLA_IFNAME, "vlan22")) msg.add_nla( NlAttrNested( IflattrType.IFLA_LINKINFO, [ NlAttrStrn(IflinkInfo.IFLA_INFO_KIND, "vlan"), NlAttrNested( IflinkInfo.IFLA_INFO_DATA, [ NlAttrU16(IfLinkInfoDataVlan.IFLA_VLAN_ID, 22), ], ), ], ) ) rx_msg = self.get_reply(msg) assert rx_msg.is_type(NlMsgType.NLMSG_ERROR) assert rx_msg.error_code == 0 + ToolsHelper.print_net_debug() self.get_interface_byname("vlan22") # ToolsHelper.print_net_debug()