Index: head/sys/conf/files =================================================================== --- head/sys/conf/files +++ head/sys/conf/files @@ -4102,6 +4102,7 @@ net/route/route_ctl.c standard net/route/route_ddb.c optional ddb net/route/route_helpers.c standard +net/route/route_tables.c standard net/route/route_temporal.c standard net/rss_config.c optional inet rss | inet6 rss net/rtsock.c standard Index: head/sys/net/route.h =================================================================== --- head/sys/net/route.h +++ head/sys/net/route.h @@ -115,7 +115,10 @@ #define RT_DEFAULT_FIB 0 /* Explicitly mark fib=0 restricted cases */ #define RT_ALL_FIBS -1 /* Announce event for every fib */ #ifdef _KERNEL -extern u_int rt_numfibs; /* number of usable routing tables */ +VNET_DECLARE(uint32_t, _rt_numfibs); /* number of existing route tables */ +#define V_rt_numfibs VNET(_rt_numfibs) +/* temporary compat arg */ +#define rt_numfibs V_rt_numfibs VNET_DECLARE(u_int, rt_add_addr_allfibs); /* Announce interfaces to all fibs */ #define V_rt_add_addr_allfibs VNET(rt_add_addr_allfibs) #endif @@ -379,7 +382,7 @@ void rt_maskedcopy(struct sockaddr *, struct sockaddr *, struct sockaddr *); struct rib_head *rt_table_init(int, int, u_int); void rt_table_destroy(struct rib_head *); -u_int rt_tables_get_gen(int table, int fam); +u_int rt_tables_get_gen(uint32_t table, sa_family_t family); int rtsock_addrmsg(int, struct ifaddr *, int); int rtsock_routemsg(int, struct rtentry *, struct ifnet *ifp, int, int); Index: head/sys/net/route.c =================================================================== --- head/sys/net/route.c +++ head/sys/net/route.c @@ -74,29 +74,6 @@ #include #include -#include - -#define RT_MAXFIBS UINT16_MAX - -/* Kernel config default option. */ -#ifdef ROUTETABLES -#if ROUTETABLES <= 0 -#error "ROUTETABLES defined too low" -#endif -#if ROUTETABLES > RT_MAXFIBS -#error "ROUTETABLES defined too big" -#endif -#define RT_NUMFIBS ROUTETABLES -#endif /* ROUTETABLES */ -/* Initialize to default if not otherwise set. */ -#ifndef RT_NUMFIBS -#define RT_NUMFIBS 1 -#endif - -/* This is read-only.. */ -u_int rt_numfibs = RT_NUMFIBS; -SYSCTL_UINT(_net, OID_AUTO, fibs, CTLFLAG_RDTUN, &rt_numfibs, 0, ""); - /* * By default add routes to all fibs for new interfaces. * Once this is set to 0 then only allocate routes on interface @@ -118,10 +95,6 @@ VNET_PCPUSTAT_SYSUNINIT(rtstat); #endif -VNET_DEFINE(struct rib_head *, rt_tables); -#define V_rt_tables VNET(rt_tables) - - EVENTHANDLER_LIST_DEFINE(rt_addrmsg); static int rt_ifdelroute(const struct rtentry *rt, const struct nhop_object *, @@ -130,63 +103,6 @@ int flags); /* - * handler for net.my_fibnum - */ -static int -sysctl_my_fibnum(SYSCTL_HANDLER_ARGS) -{ - int fibnum; - int error; - - fibnum = curthread->td_proc->p_fibnum; - error = sysctl_handle_int(oidp, &fibnum, 0, req); - return (error); -} - -SYSCTL_PROC(_net, OID_AUTO, my_fibnum, - CTLTYPE_INT | CTLFLAG_RD | CTLFLAG_MPSAFE, NULL, 0, - &sysctl_my_fibnum, "I", - "default FIB of caller"); - -static __inline struct rib_head ** -rt_tables_get_rnh_ptr(int table, int fam) -{ - struct rib_head **rnh; - - KASSERT(table >= 0 && table < rt_numfibs, - ("%s: table out of bounds (0 <= %d < %d)", __func__, table, - rt_numfibs)); - KASSERT(fam >= 0 && fam < (AF_MAX + 1), - ("%s: fam out of bounds (0 <= %d < %d)", __func__, fam, AF_MAX+1)); - - /* rnh is [fib=0][af=0]. */ - rnh = (struct rib_head **)V_rt_tables; - /* Get the offset to the requested table and fam. */ - rnh += table * (AF_MAX+1) + fam; - - return (rnh); -} - -struct rib_head * -rt_tables_get_rnh(int table, int fam) -{ - - return (*rt_tables_get_rnh_ptr(table, fam)); -} - -u_int -rt_tables_get_gen(int table, int fam) -{ - struct rib_head *rnh; - - rnh = *rt_tables_get_rnh_ptr(table, fam); - KASSERT(rnh != NULL, ("%s: NULL rib_head pointer table %d fam %d", - __func__, table, fam)); - return (rnh->rnh_gen); -} - - -/* * route initialization must occur before ip6_init2(), which happenas at * SI_ORDER_MIDDLE. */ @@ -194,89 +110,10 @@ route_init(void) { - /* whack the tunable ints into line. */ - if (rt_numfibs > RT_MAXFIBS) - rt_numfibs = RT_MAXFIBS; - if (rt_numfibs == 0) - rt_numfibs = 1; nhops_init(); } SYSINIT(route_init, SI_SUB_PROTO_DOMAIN, SI_ORDER_THIRD, route_init, NULL); -static void -vnet_route_init(const void *unused __unused) -{ - struct domain *dom; - struct rib_head **rnh; - int table; - int fam; - - V_rt_tables = malloc(rt_numfibs * (AF_MAX+1) * - sizeof(struct rib_head *), M_RTABLE, M_WAITOK|M_ZERO); - - vnet_rtzone_init(); - for (dom = domains; dom; dom = dom->dom_next) { - if (dom->dom_rtattach == NULL) - continue; - - for (table = 0; table < rt_numfibs; table++) { - fam = dom->dom_family; - if (table != 0 && fam != AF_INET6 && fam != AF_INET) - break; - - rnh = rt_tables_get_rnh_ptr(table, fam); - if (rnh == NULL) - panic("%s: rnh NULL", __func__); - *rnh = dom->dom_rtattach(table); - } - } -} -VNET_SYSINIT(vnet_route_init, SI_SUB_PROTO_DOMAIN, SI_ORDER_FOURTH, - vnet_route_init, 0); - -#ifdef VIMAGE -static void -vnet_route_uninit(const void *unused __unused) -{ - int table; - int fam; - struct domain *dom; - struct rib_head **rnh; - - for (dom = domains; dom; dom = dom->dom_next) { - if (dom->dom_rtdetach == NULL) - continue; - - for (table = 0; table < rt_numfibs; table++) { - fam = dom->dom_family; - - if (table != 0 && fam != AF_INET6 && fam != AF_INET) - break; - - rnh = rt_tables_get_rnh_ptr(table, fam); - if (rnh == NULL) - panic("%s: rnh NULL", __func__); - dom->dom_rtdetach(*rnh); - } - } - - /* - * dom_rtdetach calls rt_table_destroy(), which - * schedules deletion for all rtentries, nexthops and control - * structures. Wait for the destruction callbacks to fire. - * Note that this should result in freeing all rtentries, but - * nexthops deletions will be scheduled for the next epoch run - * and will be completed after vnet teardown. - */ - epoch_drain_callbacks(net_epoch_preempt); - - free(V_rt_tables, M_RTABLE); - vnet_rtzone_destroy(); -} -VNET_SYSUNINIT(vnet_route_uninit, SI_SUB_PROTO_DOMAIN, SI_ORDER_FIRST, - vnet_route_uninit, 0); -#endif - struct rib_head * rt_table_init(int offset, int family, u_int fibnum) { @@ -345,21 +182,6 @@ /* Assume table is already empty */ RIB_LOCK_DESTROY(rh); free(rh, M_RTABLE); -} - - -#ifndef _SYS_SYSPROTO_H_ -struct setfib_args { - int fibnum; -}; -#endif -int -sys_setfib(struct thread *td, struct setfib_args *uap) -{ - if (uap->fibnum < 0 || uap->fibnum >= rt_numfibs) - return EINVAL; - td->td_proc->p_fibnum = uap->fibnum; - return (0); } /* Index: head/sys/net/route/route_tables.c =================================================================== --- head/sys/net/route/route_tables.c +++ head/sys/net/route/route_tables.c @@ -0,0 +1,326 @@ +/*- + * SPDX-License-Identifier: BSD-3-Clause + * + * Copyright (c) 1980, 1986, 1991, 1993 + * The Regents of the University of California. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ +/************************************************************************ + * Note: In this file a 'fib' is a "forwarding information base" * + * Which is the new name for an in kernel routing (next hop) table. * + ***********************************************************************/ + +#include +__FBSDID("$FreeBSD$"); +#include "opt_route.h" + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include + +/* Kernel config default option. */ +#ifdef ROUTETABLES +#if ROUTETABLES <= 0 +#error "ROUTETABLES defined too low" +#endif +#if ROUTETABLES > RT_MAXFIBS +#error "ROUTETABLES defined too big" +#endif +#define RT_NUMFIBS ROUTETABLES +#endif /* ROUTETABLES */ +/* Initialize to default if not otherwise set. */ +#ifndef RT_NUMFIBS +#define RT_NUMFIBS 1 +#endif + +static void grow_rtables(uint32_t num_fibs); + +VNET_DEFINE_STATIC(struct sx, rtables_lock); +#define V_rtables_lock VNET(rtables_lock) +#define RTABLES_LOCK() sx_xlock(&V_rtables_lock) +#define RTABLES_UNLOCK() sx_xunlock(&V_rtables_lock) +#define RTABLES_LOCK_INIT() sx_init(&V_rtables_lock, "rtables lock") +#define RTABLES_LOCK_ASSERT() sx_assert(&V_rtables_lock, SA_LOCKED) + +VNET_DEFINE_STATIC(struct rib_head **, rt_tables); +#define V_rt_tables VNET(rt_tables) + +VNET_DEFINE(uint32_t, _rt_numfibs) = RT_NUMFIBS; + +/* + * Handler for net.my_fibnum. + * Returns current fib of the process. + */ +static int +sysctl_my_fibnum(SYSCTL_HANDLER_ARGS) +{ + int fibnum; + int error; + + fibnum = curthread->td_proc->p_fibnum; + error = sysctl_handle_int(oidp, &fibnum, 0, req); + return (error); +} +SYSCTL_PROC(_net, OID_AUTO, my_fibnum, + CTLTYPE_INT | CTLFLAG_RD | CTLFLAG_MPSAFE, NULL, 0, + &sysctl_my_fibnum, "I", + "default FIB of caller"); + +static uint32_t +normalize_num_rtables(uint32_t num_rtables) +{ + + if (num_rtables > RT_MAXFIBS) + num_rtables = RT_MAXFIBS; + else if (num_rtables == 0) + num_rtables = 1; + return (num_rtables); +} + +/* + * Sets the number of fibs in the current vnet. + * Function does not allow shrinking number of rtables. + */ +static int +sysctl_fibs(SYSCTL_HANDLER_ARGS) +{ + uint32_t new_fibs; + int error; + + RTABLES_LOCK(); + new_fibs = V_rt_numfibs; + error = sysctl_handle_32(oidp, &new_fibs, 0, req); + if (error == 0) { + new_fibs = normalize_num_rtables(new_fibs); + + if (new_fibs < V_rt_numfibs) + error = ENOTCAPABLE; + if (new_fibs > V_rt_numfibs) + grow_rtables(new_fibs); + } + RTABLES_UNLOCK(); + + return (error); +} +SYSCTL_PROC(_net, OID_AUTO, fibs, + CTLFLAG_VNET | CTLTYPE_U32 | CTLFLAG_RWTUN | CTLFLAG_MPSAFE, NULL, 0, + &sysctl_fibs, "IU", + "set number of fibs"); + +/* + * Sets fib of a current process. + */ +int +sys_setfib(struct thread *td, struct setfib_args *uap) +{ + int error = 0; + + CURVNET_SET(TD_TO_VNET(td)); + if (uap->fibnum >= 0 && uap->fibnum < V_rt_numfibs) + td->td_proc->p_fibnum = uap->fibnum; + else + error = EINVAL; + CURVNET_RESTORE(); + + return (error); +} + +/* + * Grows up the number of routing tables in the current fib. + * Function creates new index array for all rtables and allocates + * remaining routing tables. + */ +static void +grow_rtables(uint32_t num_tables) +{ + struct domain *dom; + struct rib_head **prnh; + struct rib_head **new_rt_tables, **old_rt_tables; + int family; + + RTABLES_LOCK_ASSERT(); + + KASSERT(num_tables >= V_rt_numfibs, ("num_tables(%u) < rt_numfibs(%u)\n", + num_tables, V_rt_numfibs)); + + new_rt_tables = mallocarray(num_tables * (AF_MAX + 1), sizeof(void *), + M_RTABLE, M_WAITOK | M_ZERO); + + /* + * Current rt_tables layout: + * fib0[af0, af1, af2, .., AF_MAX]fib1[af0, af1, af2, .., Af_MAX].. + * this allows to copy existing tables data by using memcpy() + */ + if (V_rt_tables != NULL) + memcpy(new_rt_tables, V_rt_tables, + V_rt_numfibs * (AF_MAX + 1) * sizeof(void *)); + + /* Populate the remainders */ + for (dom = domains; dom; dom = dom->dom_next) { + if (dom->dom_rtattach == NULL) + continue; + family = dom->dom_family; + for (int i = 0; i < num_tables; i++) { + prnh = &new_rt_tables[i * (AF_MAX + 1) + family]; + if (*prnh != NULL) + continue; + *prnh = dom->dom_rtattach(i); + if (*prnh == NULL) + log(LOG_ERR, "unable to create routing tables for domain %d\n", + dom->dom_family); + } + } + + /* + * Update rtables pointer. + * Ensure all writes to new_rt_tables has been completed before + * switching pointer. + */ + atomic_thread_fence_rel(); + old_rt_tables = V_rt_tables; + V_rt_tables = new_rt_tables; + + /* Wait till all cpus see new pointers */ + atomic_thread_fence_rel(); + epoch_wait_preempt(net_epoch_preempt); + + /* Finally, set number of fibs to a new value */ + V_rt_numfibs = num_tables; + + if (old_rt_tables != NULL) + free(old_rt_tables, M_RTABLE); +} + +static void +vnet_rtables_init(const void *unused __unused) +{ + int num_rtables_base; + + if (IS_DEFAULT_VNET(curvnet)) { + num_rtables_base = RT_NUMFIBS; + TUNABLE_INT_FETCH("net.fibs", &num_rtables_base); + V_rt_numfibs = normalize_num_rtables(num_rtables_base); + } else + V_rt_numfibs = 1; + + vnet_rtzone_init(); + + RTABLES_LOCK_INIT(); + + RTABLES_LOCK(); + grow_rtables(V_rt_numfibs); + RTABLES_UNLOCK(); +} +VNET_SYSINIT(vnet_rtables_init, SI_SUB_PROTO_DOMAIN, SI_ORDER_FOURTH, + vnet_rtables_init, 0); + +#ifdef VIMAGE +static void +rtables_destroy(const void *unused __unused) +{ + struct rib_head *rnh; + struct domain *dom; + int family; + + RTABLES_LOCK(); + for (dom = domains; dom; dom = dom->dom_next) { + if (dom->dom_rtdetach == NULL) + continue; + family = dom->dom_family; + for (int i = 0; i < V_rt_numfibs; i++) { + rnh = rt_tables_get_rnh(i, family); + dom->dom_rtdetach(rnh); + } + } + RTABLES_UNLOCK(); + + /* + * dom_rtdetach calls rt_table_destroy(), which + * schedules deletion for all rtentries, nexthops and control + * structures. Wait for the destruction callbacks to fire. + * Note that this should result in freeing all rtentries, but + * nexthops deletions will be scheduled for the next epoch run + * and will be completed after vnet teardown. + */ + epoch_drain_callbacks(net_epoch_preempt); + + free(V_rt_tables, M_RTABLE); + vnet_rtzone_destroy(); +} +VNET_SYSUNINIT(rtables_destroy, SI_SUB_PROTO_DOMAIN, SI_ORDER_FIRST, + rtables_destroy, 0); +#endif + +static inline struct rib_head * +rt_tables_get_rnh_ptr(uint32_t table, sa_family_t family) +{ + struct rib_head **prnh; + + KASSERT(table < V_rt_numfibs, + ("%s: table out of bounds (%d < %d)", __func__, table, + V_rt_numfibs)); + KASSERT(family < (AF_MAX + 1), + ("%s: fam out of bounds (%d < %d)", __func__, family, AF_MAX + 1)); + + /* rnh is [fib=0][af=0]. */ + prnh = V_rt_tables; + /* Get the offset to the requested table and fam. */ + prnh += table * (AF_MAX + 1) + family; + + return (*prnh); +} + +struct rib_head * +rt_tables_get_rnh(uint32_t table, sa_family_t family) +{ + + return (rt_tables_get_rnh_ptr(table, family)); +} + +u_int +rt_tables_get_gen(uint32_t table, sa_family_t family) +{ + struct rib_head *rnh; + + rnh = rt_tables_get_rnh_ptr(table, family); + KASSERT(rnh != NULL, ("%s: NULL rib_head pointer table %d family %d", + __func__, table, family)); + return (rnh->rnh_gen); +} + Index: head/sys/net/route/route_var.h =================================================================== --- head/sys/net/route/route_var.h +++ head/sys/net/route/route_var.h @@ -79,6 +79,7 @@ /* Constants */ #define RIB_MAX_RETRIES 3 +#define RT_MAXFIBS UINT16_MAX /* Macro for verifying fields in af-specific 'struct route' structures */ #define CHK_STRUCT_FIELD_GENERIC(_s1, _f1, _s2, _f2) \ @@ -104,7 +105,7 @@ _Static_assert(__offsetof(struct route, ro_dst) == __offsetof(_ro_new, _dst_new),\ "ro_dst and " #_dst_new " are at different offset") -struct rib_head *rt_tables_get_rnh(int fib, int family); +struct rib_head *rt_tables_get_rnh(uint32_t table, sa_family_t family); void rt_mpath_init_rnh(struct rib_head *rnh); int rt_getifa_fib(struct rt_addrinfo *info, u_int fibnum); void rt_setmetrics(const struct rt_addrinfo *info, struct rtentry *rt);