Changeset View
Changeset View
Standalone View
Standalone View
sys/net/route/route_algo.c
- This file was added.
/*- | |||||
* SPDX-License-Identifier: BSD-2-Clause-FreeBSD | |||||
* | |||||
* Copyright (c) 2020 Alexander V. Chernikov | |||||
* | |||||
* Redistribution and use in source and binary forms, with or without | |||||
* modification, are permitted provided that the following conditions | |||||
* are met: | |||||
* 1. Redistributions of source code must retain the above copyright | |||||
* notice, this list of conditions and the following disclaimer. | |||||
* 2. Redistributions in binary form must reproduce the above copyright | |||||
* notice, this list of conditions and the following disclaimer in the | |||||
* documentation and/or other materials provided with the distribution. | |||||
* | |||||
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND | |||||
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE | |||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE | |||||
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE | |||||
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL | |||||
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS | |||||
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) | |||||
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT | |||||
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY | |||||
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF | |||||
* SUCH DAMAGE. | |||||
*/ | |||||
#include <sys/cdefs.h> | |||||
__FBSDID("$FreeBSD$"); | |||||
#include "opt_inet.h" | |||||
#include "opt_inet6.h" | |||||
#include "opt_route.h" | |||||
#include <sys/param.h> | |||||
#include <sys/eventhandler.h> | |||||
#include <sys/kernel.h> | |||||
#include <sys/sbuf.h> | |||||
#include <sys/lock.h> | |||||
#include <sys/rmlock.h> | |||||
#include <sys/malloc.h> | |||||
#include <sys/mbuf.h> | |||||
#include <sys/module.h> | |||||
#include <sys/kernel.h> | |||||
#include <sys/priv.h> | |||||
#include <sys/proc.h> | |||||
#include <sys/socket.h> | |||||
#include <sys/socketvar.h> | |||||
#include <sys/sysctl.h> | |||||
#include <sys/syslog.h> | |||||
#include <sys/queue.h> | |||||
#include <net/vnet.h> | |||||
#include <net/if.h> | |||||
#include <net/if_var.h> | |||||
#include <netinet/in.h> | |||||
#include <netinet/in_var.h> | |||||
#include <netinet/ip.h> | |||||
#include <netinet/ip_var.h> | |||||
#ifdef INET6 | |||||
#include <netinet/ip6.h> | |||||
#include <netinet6/ip6_var.h> | |||||
#endif | |||||
#include <net/route.h> | |||||
#include <net/route/nhop.h> | |||||
#include <net/route/route_ctl.h> | |||||
#include <net/route/route_var.h> | |||||
#include <net/route/route_algo.h> | |||||
#include <machine/stdarg.h> | |||||
/* | |||||
* Fib lookup framework. | |||||
* | |||||
* This framework enables dynamic loading of lookup modules enabling | |||||
* accelerated longest-prefix-match lookups for the routing tables. | |||||
* | |||||
* flm - fib lookup modules - kernel modules implementing particular algo | |||||
* fd - fib data - instance of an flm bound to specific routing table | |||||
* | |||||
* For each supported address family, there is a an allocated array of fib_dp | |||||
* structures, indexed by fib number. Each array entry contains callback function | |||||
* and its argument. This function will be called with a family-specific lookup key, | |||||
* scope and provided argument. This array gets re-created every time when new algo | |||||
* instance gets created. Please take a look at the replace_rtables_family() function | |||||
* for more details. | |||||
* | |||||
* Control plane for to setup and update the necessary dataplane structures. | |||||
* 1) nexhops abstraction -> module has to deal with index, refcounting, nexhtop groups etc | |||||
* 2) sync with route tables | |||||
* 3) dataplane attachment points | |||||
* 3) fail early. Some algorithms are immutable, so any change leads to rebuild. Some | |||||
* are mutable till some extent so the module is build over common setup/teardown | |||||
* instances, making error handling * easier. | |||||
* 4) preference. Lookup modules contain callbacks to determine the preference of particula | |||||
* lookup algorithm given the current route table scale. | |||||
* | |||||
*/ | |||||
SYSCTL_DECL(_net_route); | |||||
SYSCTL_NODE(_net_route, OID_AUTO, algo, CTLFLAG_RW | CTLFLAG_MPSAFE, 0, | |||||
"Route algorithm lookups"); | |||||
#ifdef INET6 | |||||
bool algo_fixed_inet6 = false; | |||||
SYSCTL_NODE(_net_route_algo, OID_AUTO, inet6, CTLFLAG_RW | CTLFLAG_MPSAFE, 0, | |||||
"IPv6 algorithm lookups"); | |||||
#endif | |||||
#ifdef INET | |||||
bool algo_fixed_inet = false; | |||||
SYSCTL_NODE(_net_route_algo, OID_AUTO, inet, CTLFLAG_RW | CTLFLAG_MPSAFE, 0, | |||||
"IPv4 algorithm lookups"); | |||||
#endif | |||||
struct nhop_ref_table { | |||||
uint32_t count; | |||||
int32_t refcnt[0]; | |||||
}; | |||||
struct fib_data { | |||||
uint32_t number_nhops; /* current # of nhops */ | |||||
uint32_t number_records; /* current # of routes */ | |||||
uint8_t hit_nhops; /* true if out of nhop limit */ | |||||
uint8_t init_done; /* true if init is competed */ | |||||
uint32_t fd_dead:1; /* Scheduled for deletion */ | |||||
uint32_t fd_linked:1; /* true if linked */ | |||||
uint32_t fd_need_rebuild:1; /* true if rebuild scheduled */ | |||||
uint32_t fd_force_eval:1;/* true if rebuild scheduled */ | |||||
uint8_t fd_family; /* family */ | |||||
uint32_t fd_fibnum; /* fibnum */ | |||||
uint32_t fd_failed_rebuilds; /* stat: failed rebuilds */ | |||||
uint32_t fd_algo_mask; /* bitmask for algo data */ | |||||
struct callout fd_callout; /* rebuild callout */ | |||||
void *fd_algo_data; /* algorithm data */ | |||||
struct nhop_object **nh_idx; /* nhop idx->ptr array */ | |||||
struct nhop_ref_table *nh_ref_table; /* array with # of nhop references */ | |||||
struct rib_head *fd_rh; /* RIB table we're attached to */ | |||||
struct rib_subscription *fd_rs; /* storing table subscription */ | |||||
struct fib_dp fd_dp; /* fib datapath data */ | |||||
struct vnet *fd_vnet; /* vnet fib belongs to */ | |||||
struct epoch_context fd_epoch_ctx; /* epoch context for deletion */ | |||||
uint64_t gencnt; | |||||
struct fib_lookup_module *fd_flm;/* pointer to the lookup module */ | |||||
uint32_t fd_num_changes; /* number of changes since last callout */ | |||||
TAILQ_ENTRY(fib_data) entries; /* list of all fds in vnet */ | |||||
}; | |||||
static void rebuild_callout(void *_data); | |||||
static void destroy_fd_instance_epoch(epoch_context_t ctx); | |||||
static enum flm_op_result attach_datapath(struct fib_data *fd); | |||||
static bool is_idx_free(struct fib_data *fd, uint32_t index); | |||||
static void set_algo_fixed(struct rib_head *rh); | |||||
static uint32_t fib_ref_nhop(struct fib_data *fd, struct nhop_object *nh); | |||||
static void fib_unref_nhop(struct fib_data *fd, struct nhop_object *nh); | |||||
static struct fib_lookup_module *fib_check_best_algo(struct rib_head *rh, | |||||
struct fib_lookup_module *orig_flm); | |||||
static void fib_unref_algo(struct fib_lookup_module *flm); | |||||
struct mtx fib_mtx; | |||||
#define MOD_LOCK() mtx_lock(&fib_mtx) | |||||
#define MOD_UNLOCK() mtx_unlock(&fib_mtx) | |||||
#if 0 | |||||
uint32_t algo_bitmask_idx = 0; | |||||
static uint32_t algo_inet_mask = 0; | |||||
static uint32_t algo_inet6_mask = 0; | |||||
#endif | |||||
/* Algorithm has to be this percent better than the current to switch */ | |||||
#define BEST_DIFF_PERCENT (5 * 256 / 100) | |||||
/* Schedule algo re-evaluation X seconds after a change */ | |||||
#define ALGO_EVAL_DELAY_MS 30000 | |||||
/* Force algo re-evaluation after X changes */ | |||||
#define ALGO_EVAL_NUM_ROUTES 100 | |||||
/* Try to setup algorithm X times */ | |||||
#define FIB_MAX_TRIES 32 | |||||
/* Max amount of supported nexthops */ | |||||
#define FIB_MAX_NHOPS 262144 | |||||
#define FIB_CALLOUT_DELAY_MS 50 | |||||
/* Debug */ | |||||
static int flm_debug_level = LOG_NOTICE; | |||||
SYSCTL_INT(_net_route_algo, OID_AUTO, debug_level, CTLFLAG_RW, | |||||
&flm_debug_level, 0, "debuglevel"); | |||||
#define RTDEBUG | |||||
#ifdef RTDEBUG | |||||
#define _PASS_MSG(_l) (flm_debug_level >= (_l)) | |||||
#define ALGO_PRINTF(_fmt, ...) printf("[rt_algo] %s: " _fmt "\n", __func__, ##__VA_ARGS__) | |||||
#define _ALGO_PRINTF(_fib, _fam, _aname, _func, _fmt, ...) \ | |||||
printf("[rt_algo] %s.%u (%s) %s: " _fmt "\n",\ | |||||
print_family(_fam), _fib, _aname, _func, ## __VA_ARGS__) | |||||
#define _RH_PRINTF(_fib, _fam, _func, _fmt, ...) \ | |||||
printf("[rt_algo] %s.%u %s: " _fmt "\n", print_family(_fam), _fib, _func, ## __VA_ARGS__) | |||||
#define RH_PRINTF(_l, _rh, _fmt, ...) if (_PASS_MSG(_l)) { \ | |||||
_RH_PRINTF(_rh->rib_fibnum, _rh->rib_family, __func__, _fmt, ## __VA_ARGS__);\ | |||||
} | |||||
#define FD_PRINTF(_l, _fd, _fmt, ...) if (_PASS_MSG(_l)) { \ | |||||
_ALGO_PRINTF(_fd->fd_fibnum, _fd->fd_family, _fd->fd_flm->flm_name, \ | |||||
__func__, _fmt, ## __VA_ARGS__); \ | |||||
} | |||||
#else | |||||
#define FD_PRINTF(fd, _fmt, ...) | |||||
#define ALGO_PRINTF(_fmt, ...) | |||||
#define RH_PRINTF(_fmt, ...) | |||||
#endif | |||||
/* List of all fib lookup instances */ | |||||
VNET_DEFINE_STATIC(TAILQ_HEAD(fib_data_head, fib_data), fib_data_list); | |||||
#define V_fib_data_list VNET(fib_data_list) | |||||
struct fib_error { | |||||
int fe_family; | |||||
uint32_t fe_fibnum; | |||||
struct fib_lookup_module *fe_flm; | |||||
TAILQ_ENTRY(fib_error) entries;/* list of all errored entries */ | |||||
}; | |||||
TAILQ_HEAD(fib_error_head, fib_error) fib_error_list; | |||||
struct fib_dp_header { | |||||
struct epoch_context ffi_epoch_ctx; | |||||
uint32_t ffi_algo_mask; | |||||
uint32_t ffi_num_tables; | |||||
struct fib_dp ffi_idx[0]; | |||||
}; | |||||
static TAILQ_HEAD(, fib_lookup_module) all_algo_list; | |||||
static bool | |||||
flm_error_add(struct fib_lookup_module *flm, uint32_t fibnum) | |||||
{ | |||||
struct fib_error *fe, *fe_tmp; | |||||
fe = malloc(sizeof(struct fib_error), M_TEMP, M_NOWAIT | M_ZERO); | |||||
if (fe == NULL) | |||||
return (false); | |||||
fe->fe_flm = flm; | |||||
fe->fe_family = flm->flm_family; | |||||
fe->fe_fibnum = fibnum; | |||||
MOD_LOCK(); | |||||
TAILQ_FOREACH(fe_tmp, &fib_error_list, entries) { | |||||
if ((fe_tmp->fe_flm == flm) && (fe_tmp->fe_fibnum == fibnum)) { | |||||
MOD_UNLOCK(); | |||||
free(fe, M_TEMP); | |||||
return (true); | |||||
} | |||||
} | |||||
TAILQ_INSERT_HEAD(&fib_error_list, fe, entries); | |||||
MOD_UNLOCK(); | |||||
return (true); | |||||
} | |||||
static bool | |||||
flm_error_check(struct fib_lookup_module *flm, uint32_t fibnum) | |||||
{ | |||||
struct fib_error *fe; | |||||
TAILQ_FOREACH(fe, &fib_error_list, entries) { | |||||
if ((fe->fe_flm == flm) && (fe->fe_fibnum == fibnum)) | |||||
return (true); | |||||
} | |||||
return (false); | |||||
} | |||||
static void | |||||
fib_error_clear_flm(struct fib_lookup_module *flm) | |||||
{ | |||||
struct fib_error *fe, *fe_tmp; | |||||
TAILQ_FOREACH_SAFE(fe, &fib_error_list, entries, fe_tmp) { | |||||
if (fe->fe_flm == flm) { | |||||
TAILQ_REMOVE(&fib_error_list, fe, entries); | |||||
free(fe, M_TEMP); | |||||
} | |||||
} | |||||
} | |||||
static const char * | |||||
print_family(int family) | |||||
{ | |||||
if (family == AF_INET) | |||||
return ("inet"); | |||||
else if (family == AF_INET6) | |||||
return ("inet6"); | |||||
else | |||||
return ("unknown"); | |||||
} | |||||
/* | |||||
* Debug function used by lookup modules. | |||||
* Outputs message denoted by @fmt, prepended by "[rt_algo] inetX.Y (algo) " | |||||
*/ | |||||
void | |||||
fib_printf(int level, struct fib_data *fd, const char *func, char *fmt, ...) | |||||
{ | |||||
char buf[128]; | |||||
va_list ap; | |||||
if (level > flm_debug_level) | |||||
return; | |||||
va_start(ap, fmt); | |||||
vsnprintf(buf, sizeof(buf), fmt, ap); | |||||
va_end(ap); | |||||
_ALGO_PRINTF(fd->fd_fibnum, fd->fd_family, fd->fd_flm->flm_name, | |||||
func, "%s", buf); | |||||
} | |||||
static int | |||||
print_algos(struct sysctl_req *req, int family) | |||||
{ | |||||
struct fib_lookup_module *flm; | |||||
struct sbuf sbuf; | |||||
int error, count = 0; | |||||
error = sysctl_wire_old_buffer(req, 0); | |||||
if (error == 0) { | |||||
sbuf_new_for_sysctl(&sbuf, NULL, 512, req); | |||||
TAILQ_FOREACH(flm, &all_algo_list, entries) { | |||||
if (flm->flm_family == family) { | |||||
if (count++ > 0) | |||||
sbuf_cat(&sbuf, ", "); | |||||
sbuf_cat(&sbuf, flm->flm_name); | |||||
} | |||||
} | |||||
error = sbuf_finish(&sbuf); | |||||
sbuf_delete(&sbuf); | |||||
} | |||||
return (error); | |||||
} | |||||
#ifdef INET6 | |||||
static int | |||||
print_algos_inet6(SYSCTL_HANDLER_ARGS) | |||||
{ | |||||
return (print_algos(req, AF_INET6)); | |||||
} | |||||
SYSCTL_PROC(_net_route_algo_inet6, OID_AUTO, algo_list, | |||||
CTLTYPE_STRING | CTLFLAG_RD | CTLFLAG_MPSAFE, NULL, 0, | |||||
print_algos_inet6, "A", "List of IPv6 lookup algorithms"); | |||||
#endif | |||||
#ifdef INET | |||||
static int | |||||
print_algos_inet(SYSCTL_HANDLER_ARGS) | |||||
{ | |||||
return (print_algos(req, AF_INET)); | |||||
} | |||||
SYSCTL_PROC(_net_route_algo_inet, OID_AUTO, algo_list, | |||||
CTLTYPE_STRING | CTLFLAG_RD | CTLFLAG_MPSAFE, NULL, 0, | |||||
print_algos_inet, "A", "List of IPv4 lookup algorithms"); | |||||
#endif | |||||
static uint32_t | |||||
callout_calc_delay(struct fib_data *fd) | |||||
{ | |||||
uint32_t shift; | |||||
if (fd->fd_failed_rebuilds > 10) | |||||
shift = 10; | |||||
else | |||||
shift = fd->fd_failed_rebuilds; | |||||
return ((1 << shift) * FIB_CALLOUT_DELAY_MS); | |||||
} | |||||
static void | |||||
schedule_callout(struct fib_data *fd, int delay_ms) | |||||
{ | |||||
callout_reset_sbt(&fd->fd_callout, 0, SBT_1MS * delay_ms, | |||||
rebuild_callout, fd, 0); | |||||
} | |||||
static void | |||||
schedule_fd_rebuild(struct fib_data *fd) | |||||
{ | |||||
MOD_LOCK(); | |||||
if (!fd->fd_need_rebuild) { | |||||
fd->fd_need_rebuild = true; | |||||
/* | |||||
* Potentially re-schedules pending callout | |||||
* initiated by schedule_algo_eval. | |||||
*/ | |||||
FD_PRINTF(LOG_INFO, fd, "Scheduling rebuilt"); | |||||
schedule_callout(fd, callout_calc_delay(fd)); | |||||
} | |||||
MOD_UNLOCK(); | |||||
} | |||||
static void | |||||
schedule_algo_eval(struct fib_data *fd) | |||||
{ | |||||
if (fd->fd_num_changes++ == 0) { | |||||
/* Start callout to consider switch */ | |||||
MOD_LOCK(); | |||||
if (!callout_pending(&fd->fd_callout)) | |||||
schedule_callout(fd, ALGO_EVAL_DELAY_MS); | |||||
MOD_UNLOCK(); | |||||
} else if (fd->fd_num_changes > ALGO_EVAL_NUM_ROUTES && !fd->fd_force_eval) { | |||||
/* Reset callout to exec immediately */ | |||||
MOD_LOCK(); | |||||
if (!fd->fd_need_rebuild) { | |||||
fd->fd_force_eval = true; | |||||
schedule_callout(fd, 1); | |||||
} | |||||
MOD_UNLOCK(); | |||||
} | |||||
} | |||||
/* | |||||
* rib subscription handler | |||||
*/ | |||||
static void | |||||
handle_rtable_change_cb(struct rib_head *rnh, struct rib_cmd_info *rc, | |||||
void *_data) | |||||
{ | |||||
struct fib_data *fd = (struct fib_data *)_data; | |||||
enum flm_op_result result; | |||||
RIB_WLOCK_ASSERT(rnh); | |||||
/* | |||||
* There is a small gap between subscribing for route changes | |||||
* and initiating rtable dump. Avoid receiving route changes | |||||
* prior to finishing rtable dump by checking `init_done`. | |||||
*/ | |||||
if (!fd->init_done) | |||||
return; | |||||
/* | |||||
* If algo requested rebuild, stop sending updates by default. | |||||
* This simplifies nexthop refcount handling logic. | |||||
*/ | |||||
if (fd->fd_need_rebuild) | |||||
return; | |||||
/* Consider scheduling algorithm re-evaluation */ | |||||
schedule_algo_eval(fd); | |||||
/* | |||||
* Maintain guarantee that every nexthop returned by the dataplane | |||||
* lookup has > 0 refcount, so can be safely referenced within current | |||||
* epoch. | |||||
*/ | |||||
if (rc->rc_nh_new != NULL) { | |||||
if (fib_ref_nhop(fd, rc->rc_nh_new) == 0) { | |||||
/* ran out of indexes */ | |||||
schedule_fd_rebuild(fd); | |||||
return; | |||||
} | |||||
} | |||||
result = fd->fd_flm->flm_change_rib_item_cb(rnh, rc, fd->fd_algo_data); | |||||
switch (result) { | |||||
case FLM_SUCCESS: | |||||
/* Unref old nexthop on success */ | |||||
if (rc->rc_nh_old != NULL) | |||||
fib_unref_nhop(fd, rc->rc_nh_old); | |||||
break; | |||||
case FLM_REBUILD: | |||||
/* | |||||
* Algo reported inability to handle, | |||||
* schedule algo rebuild. | |||||
*/ | |||||
schedule_fd_rebuild(fd); | |||||
break; | |||||
case FLM_ERROR: | |||||
/* | |||||
* Algo reported a non-recoverable error. | |||||
*/ | |||||
FD_PRINTF(LOG_ERR, fd, "algo reported non-recoverable error"); | |||||
if (!flm_error_add(fd->fd_flm, fd->fd_fibnum)) | |||||
FD_PRINTF(LOG_ERR, fd, "failed to ban algo"); | |||||
schedule_fd_rebuild(fd); | |||||
} | |||||
} | |||||
static void | |||||
estimate_nhop_scale(const struct fib_data *old_fd, struct fib_data *fd) | |||||
{ | |||||
if (old_fd == NULL) { | |||||
// TODO: read from rtable | |||||
fd->number_nhops = 16; | |||||
return; | |||||
} | |||||
if (old_fd->hit_nhops && old_fd->number_nhops < FIB_MAX_NHOPS) | |||||
fd->number_nhops = 2 * old_fd->number_nhops; | |||||
else | |||||
fd->number_nhops = old_fd->number_nhops; | |||||
} | |||||
struct walk_cbdata { | |||||
struct fib_data *fd; | |||||
flm_dump_t *func; | |||||
enum flm_op_result result; | |||||
}; | |||||
static void | |||||
sync_algo_end_cb(struct rib_head *rnh, enum rib_walk_hook stage, void *_data) | |||||
{ | |||||
struct walk_cbdata *w = (struct walk_cbdata *)_data; | |||||
struct fib_data *fd = w->fd; | |||||
RIB_WLOCK_ASSERT(w->fd->fd_rh); | |||||
if (rnh->rib_dying) { | |||||
w->result = FLM_ERROR; | |||||
return; | |||||
} | |||||
if (stage != RIB_WALK_HOOK_POST || w->result != FLM_SUCCESS) | |||||
return; | |||||
/* Post-dump hook, dump successful */ | |||||
if (fd->hit_nhops) { | |||||
FD_PRINTF(LOG_INFO, fd, "ran out of nexthops at %u nhops", | |||||
fd->nh_ref_table->count); | |||||
w->result = FLM_REBUILD; | |||||
return; | |||||
} | |||||
w->result = fd->fd_flm->flm_dump_end_cb(fd->fd_algo_data, &fd->fd_dp); | |||||
if (w->result == FLM_SUCCESS) { | |||||
/* Mark init as done to allow routing updates */ | |||||
fd->init_done = 1; | |||||
} | |||||
} | |||||
static int | |||||
sync_algo_cb(struct rtentry *rt, void *_data) | |||||
{ | |||||
struct walk_cbdata *w = (struct walk_cbdata *)_data; | |||||
RIB_WLOCK_ASSERT(w->fd->fd_rh); | |||||
if (w->result == FLM_SUCCESS && w->func) { | |||||
/* | |||||
* Reference nexthops to maintain guarantee that | |||||
* each nexthop returned by datapath has > 0 references | |||||
* and can be safely referenced within current epoch. | |||||
*/ | |||||
struct nhop_object *nh = rt_get_raw_nhop(rt); | |||||
if (fib_ref_nhop(w->fd, nh) != 0) | |||||
w->result = w->func(rt, w->fd->fd_algo_data); | |||||
else | |||||
w->result = FLM_REBUILD; | |||||
} | |||||
return (0); | |||||
} | |||||
/* | |||||
* Dump all routing table state to the algo instance. | |||||
*/ | |||||
static enum flm_op_result | |||||
sync_algo(struct fib_data *fd) | |||||
{ | |||||
struct walk_cbdata w; | |||||
w.fd = fd; | |||||
w.func = fd->fd_flm->flm_dump_rib_item_cb; | |||||
w.result = FLM_SUCCESS; | |||||
rib_walk_ext_internal(fd->fd_rh, true, sync_algo_cb, sync_algo_end_cb, &w); | |||||
FD_PRINTF(LOG_INFO, fd, "initial dump completed."); | |||||
return (w.result); | |||||
} | |||||
/* | |||||
* Assume already unlinked from datapath | |||||
*/ | |||||
static int | |||||
schedule_destroy_fd_instance(struct fib_data *fd, bool in_callout) | |||||
{ | |||||
bool is_dead; | |||||
NET_EPOCH_ASSERT(); | |||||
MOD_LOCK(); | |||||
is_dead = fd->fd_dead; | |||||
if (!is_dead) | |||||
fd->fd_dead = true; | |||||
if (fd->fd_linked) { | |||||
TAILQ_REMOVE(&V_fib_data_list, fd, entries); | |||||
fd->fd_linked = false; | |||||
} | |||||
MOD_UNLOCK(); | |||||
if (is_dead) | |||||
return (0); | |||||
FD_PRINTF(LOG_INFO, fd, "DETACH"); | |||||
if (fd->fd_rs != NULL) | |||||
rib_unsibscribe(fd->fd_rs); | |||||
/* | |||||
* After rib_unsubscribe() no _new_ handle_rtable_change_cb() calls | |||||
* will be executed, hence no _new_ callout schedules will happen. | |||||
* | |||||
* There can be 3 possible scenarious here: | |||||
* 1) we're running inside a callout when we're deleting ourselves | |||||
* due to migration to a newer fd | |||||
* 2) we're running from rt_table_destroy() and callout is scheduled | |||||
* for execution OR is executing | |||||
* | |||||
* For (2) we need to wait for the callout termination, as the routing table | |||||
* will be destroyed after this function returns. | |||||
* For (1) we cannot call drain, but can ensure that this is the last invocation. | |||||
*/ | |||||
if (in_callout) | |||||
callout_stop(&fd->fd_callout); | |||||
else | |||||
callout_drain(&fd->fd_callout); | |||||
/* | |||||
* At this moment there are no other pending work scheduled. | |||||
*/ | |||||
FD_PRINTF(LOG_INFO, fd, "destroying old instance"); | |||||
epoch_call(net_epoch_preempt, destroy_fd_instance_epoch, | |||||
&fd->fd_epoch_ctx); | |||||
return (0); | |||||
} | |||||
/* | |||||
* Wipe all instances from the list matching rib specified by @rh. | |||||
*/ | |||||
static void | |||||
fib_cleanup_algo(struct rib_head *rh, bool keep_first, bool in_callout) | |||||
{ | |||||
struct fib_data_head tmp_head = TAILQ_HEAD_INITIALIZER(tmp_head); | |||||
struct fib_data *fd, *fd_tmp; | |||||
struct epoch_tracker et; | |||||
MOD_LOCK(); | |||||
TAILQ_FOREACH_SAFE(fd, &V_fib_data_list, entries, fd_tmp) { | |||||
if (fd->fd_rh == rh) { | |||||
if (keep_first) { | |||||
keep_first = false; | |||||
continue; | |||||
} | |||||
TAILQ_REMOVE(&V_fib_data_list, fd, entries); | |||||
fd->fd_linked = false; | |||||
TAILQ_INSERT_TAIL(&tmp_head, fd, entries); | |||||
} | |||||
} | |||||
MOD_UNLOCK(); | |||||
/* Pass 2: remove each entry */ | |||||
NET_EPOCH_ENTER(et); | |||||
TAILQ_FOREACH_SAFE(fd, &tmp_head, entries, fd_tmp) { | |||||
schedule_destroy_fd_instance(fd, in_callout); | |||||
} | |||||
NET_EPOCH_EXIT(et); | |||||
} | |||||
void | |||||
fib_destroy_rib(struct rib_head *rh) | |||||
{ | |||||
/* | |||||
* Atm we have set is_dying flag on rnh, so all new fd's will | |||||
* fail at sync_algo() stage, so nothing new will be added to the list. | |||||
*/ | |||||
fib_cleanup_algo(rh, false, false); | |||||
} | |||||
static void | |||||
destroy_instance(struct fib_data *fd) | |||||
{ | |||||
FD_PRINTF(LOG_INFO, fd, "destroy fd %p", fd); | |||||
/* Call destroy callback first */ | |||||
if (fd->fd_algo_data != NULL) | |||||
fd->fd_flm->flm_destroy_cb(fd->fd_algo_data); | |||||
/* Nhop table */ | |||||
if ((fd->nh_idx != NULL) && (fd->nh_ref_table != NULL)) { | |||||
for (int i = 0; i < fd->number_nhops; i++) { | |||||
if (!is_idx_free(fd, i)) { | |||||
FD_PRINTF(LOG_DEBUG, fd, " FREE nhop %d %p", | |||||
i, fd->nh_idx[i]); | |||||
nhop_free_any(fd->nh_idx[i]); | |||||
} | |||||
} | |||||
free(fd->nh_idx, M_RTABLE); | |||||
} | |||||
if (fd->nh_ref_table != NULL) | |||||
free(fd->nh_ref_table, M_RTABLE); | |||||
fib_unref_algo(fd->fd_flm); | |||||
free(fd, M_RTABLE); | |||||
} | |||||
/* | |||||
* Epoch callback indicating fd is safe to destroy | |||||
*/ | |||||
static void | |||||
destroy_fd_instance_epoch(epoch_context_t ctx) | |||||
{ | |||||
struct fib_data *fd; | |||||
fd = __containerof(ctx, struct fib_data, fd_epoch_ctx); | |||||
destroy_instance(fd); | |||||
} | |||||
static enum flm_op_result | |||||
try_setup_instance(struct fib_lookup_module *flm, struct rib_head *rh, | |||||
struct fib_data *old_fd, struct fib_data **pfd) | |||||
{ | |||||
struct fib_data *fd; | |||||
size_t size; | |||||
enum flm_op_result result; | |||||
/* Allocate */ | |||||
fd = malloc(sizeof(struct fib_data), M_RTABLE, M_NOWAIT | M_ZERO); | |||||
if (fd == NULL) { | |||||
*pfd = NULL; | |||||
return (FLM_REBUILD); | |||||
} | |||||
*pfd = fd; | |||||
estimate_nhop_scale(old_fd, fd); | |||||
fd->fd_rh = rh; | |||||
fd->fd_family = rh->rib_family; | |||||
fd->fd_fibnum = rh->rib_fibnum; | |||||
callout_init(&fd->fd_callout, 1); | |||||
fd->fd_vnet = curvnet; | |||||
fd->fd_flm = flm; | |||||
MOD_LOCK(); | |||||
flm->flm_refcount++; | |||||
MOD_UNLOCK(); | |||||
/* Allocate nhidx -> nhop_ptr table */ | |||||
size = fd->number_nhops * sizeof(void *); | |||||
//FD_PRINTF(fd, "malloc(%lu)", size); | |||||
fd->nh_idx = malloc(size, M_RTABLE, M_NOWAIT | M_ZERO); | |||||
if (fd->nh_idx == NULL) { | |||||
FD_PRINTF(LOG_INFO, fd, "Unable to allocate nhop table idx (sz:%zu)", size); | |||||
return (FLM_REBUILD); | |||||
} | |||||
/* Allocate nhop index refcount table */ | |||||
size = sizeof(struct nhop_ref_table); | |||||
size += fd->number_nhops * sizeof(uint32_t); | |||||
//FD_PRINTF(fd, "malloc(%lu)", size); | |||||
fd->nh_ref_table = malloc(size, M_RTABLE, M_NOWAIT | M_ZERO); | |||||
if (fd->nh_ref_table == NULL) { | |||||
FD_PRINTF(LOG_INFO, fd, "Unable to allocate nhop refcount table (sz:%zu)", size); | |||||
return (FLM_REBUILD); | |||||
} | |||||
/* Okay, we're ready for algo init */ | |||||
void *old_algo_data = (old_fd != NULL) ? old_fd->fd_algo_data : NULL; | |||||
result = flm->flm_init_cb(fd->fd_fibnum, fd, old_algo_data, &fd->fd_algo_data); | |||||
if (result != FLM_SUCCESS) | |||||
return (result); | |||||
/* Try to subscribe */ | |||||
if (flm->flm_change_rib_item_cb != NULL) { | |||||
fd->fd_rs = rib_subscribe_internal(fd->fd_rh, | |||||
handle_rtable_change_cb, fd, RIB_NOTIFY_IMMEDIATE, 0); | |||||
if (fd->fd_rs == NULL) | |||||
return (FLM_REBUILD); | |||||
} | |||||
/* Dump */ | |||||
result = sync_algo(fd); | |||||
if (result != FLM_SUCCESS) | |||||
return (result); | |||||
FD_PRINTF(LOG_INFO, fd, "DUMP completed successfully."); | |||||
MOD_LOCK(); | |||||
/* | |||||
* Insert in the beginning of a list, to simplify search | |||||
* first matching entry is the one. | |||||
*/ | |||||
TAILQ_INSERT_HEAD(&V_fib_data_list, fd, entries); | |||||
fd->fd_linked = true; | |||||
MOD_UNLOCK(); | |||||
return (FLM_SUCCESS); | |||||
} | |||||
/* | |||||
* Sets up algo @flm for table @rh and links it to the datapath. | |||||
* | |||||
*/ | |||||
static enum flm_op_result | |||||
setup_instance(struct fib_lookup_module *flm, struct rib_head *rh, | |||||
struct fib_data *orig_fd, struct fib_data **pfd, bool attach) | |||||
{ | |||||
struct fib_data *prev_fd, *new_fd; | |||||
struct epoch_tracker et; | |||||
enum flm_op_result result; | |||||
prev_fd = orig_fd; | |||||
new_fd = NULL; | |||||
for (int i = 0; i < FIB_MAX_TRIES; i++) { | |||||
NET_EPOCH_ENTER(et); | |||||
result = try_setup_instance(flm, rh, prev_fd, &new_fd); | |||||
if ((result == FLM_SUCCESS) && attach) | |||||
result = attach_datapath(new_fd); | |||||
if ((prev_fd != NULL) && (prev_fd != orig_fd)) { | |||||
schedule_destroy_fd_instance(prev_fd, false); | |||||
prev_fd = NULL; | |||||
} | |||||
NET_EPOCH_EXIT(et); | |||||
RH_PRINTF(LOG_INFO, rh, "try %d: fib algo result: %d", i, result); | |||||
if (result == FLM_REBUILD) { | |||||
prev_fd = new_fd; | |||||
new_fd = NULL; | |||||
continue; | |||||
} | |||||
break; | |||||
} | |||||
if (result != FLM_SUCCESS) { | |||||
/* update failure count */ | |||||
MOD_LOCK(); | |||||
if (orig_fd != NULL) | |||||
orig_fd->fd_failed_rebuilds++; | |||||
MOD_UNLOCK(); | |||||
/* Ban algo on non-recoverable error */ | |||||
if (result == FLM_ERROR) | |||||
flm_error_add(flm, rh->rib_fibnum); | |||||
NET_EPOCH_ENTER(et); | |||||
if ((prev_fd != NULL) && (prev_fd != orig_fd)) | |||||
schedule_destroy_fd_instance(prev_fd, false); | |||||
if (new_fd != NULL) { | |||||
schedule_destroy_fd_instance(new_fd, false); | |||||
new_fd = NULL; | |||||
} | |||||
NET_EPOCH_EXIT(et); | |||||
} | |||||
*pfd = new_fd; | |||||
return (result); | |||||
} | |||||
static void | |||||
rebuild_callout(void *_data) | |||||
{ | |||||
struct fib_data *fd, *fd_new, *fd_tmp; | |||||
struct fib_lookup_module *flm_new; | |||||
struct epoch_tracker et; | |||||
enum flm_op_result result; | |||||
bool need_rebuild = false; | |||||
fd = (struct fib_data *)_data; | |||||
MOD_LOCK(); | |||||
need_rebuild = fd->fd_need_rebuild; | |||||
fd->fd_need_rebuild = false; | |||||
fd->fd_force_eval = false; | |||||
fd->fd_num_changes = 0; | |||||
MOD_UNLOCK(); | |||||
CURVNET_SET(fd->fd_vnet); | |||||
/* First, check if we're still OK to use this algo */ | |||||
flm_new = fib_check_best_algo(fd->fd_rh, fd->fd_flm); | |||||
if ((flm_new == NULL) && (!need_rebuild)) { | |||||
/* Keep existing algo, no need to rebuild. */ | |||||
CURVNET_RESTORE(); | |||||
return; | |||||
} | |||||
if (flm_new == NULL) { | |||||
flm_new = fd->fd_flm; | |||||
fd_tmp = fd; | |||||
} else { | |||||
fd_tmp = NULL; | |||||
FD_PRINTF(LOG_NOTICE, fd, "switching algo to %s", flm_new->flm_name); | |||||
} | |||||
result = setup_instance(flm_new, fd->fd_rh, fd_tmp, &fd_new, true); | |||||
if (fd_tmp == NULL) { | |||||
/* fd_new represents new algo */ | |||||
fib_unref_algo(flm_new); | |||||
} | |||||
if (result != FLM_SUCCESS) { | |||||
FD_PRINTF(LOG_NOTICE, fd, "table rebuild failed"); | |||||
CURVNET_RESTORE(); | |||||
return; | |||||
} | |||||
FD_PRINTF(LOG_INFO, fd_new, "switched to new instance"); | |||||
/* Remove old */ | |||||
if (fd != NULL) { | |||||
NET_EPOCH_ENTER(et); | |||||
schedule_destroy_fd_instance(fd, true); | |||||
NET_EPOCH_EXIT(et); | |||||
} | |||||
CURVNET_RESTORE(); | |||||
} | |||||
static struct fib_lookup_module * | |||||
fib_find_algo(const char *algo_name, int family) | |||||
{ | |||||
struct fib_lookup_module *flm; | |||||
MOD_LOCK(); | |||||
TAILQ_FOREACH(flm, &all_algo_list, entries) { | |||||
if ((strcmp(flm->flm_name, algo_name) == 0) && | |||||
(family == flm->flm_family)) { | |||||
flm->flm_refcount++; | |||||
MOD_UNLOCK(); | |||||
return (flm); | |||||
} | |||||
} | |||||
MOD_UNLOCK(); | |||||
return (NULL); | |||||
} | |||||
static void | |||||
fib_unref_algo(struct fib_lookup_module *flm) | |||||
{ | |||||
MOD_LOCK(); | |||||
flm->flm_refcount--; | |||||
MOD_UNLOCK(); | |||||
} | |||||
static int | |||||
set_fib_algo(uint32_t fibnum, int family, struct sysctl_oid *oidp, struct sysctl_req *req) | |||||
{ | |||||
struct fib_lookup_module *flm = NULL; | |||||
struct fib_data *fd = NULL; | |||||
char old_algo_name[32], algo_name[32]; | |||||
struct rib_head *rh = NULL; | |||||
enum flm_op_result result; | |||||
int error; | |||||
MOD_LOCK(); | |||||
TAILQ_FOREACH(fd, &V_fib_data_list, entries) { | |||||
if ((fd->fd_family == family) && (fd->fd_fibnum == fibnum)) | |||||
break; | |||||
} | |||||
if (fd == NULL) { | |||||
MOD_UNLOCK(); | |||||
return (ENOENT); | |||||
} | |||||
rh = fd->fd_rh; | |||||
strlcpy(old_algo_name, fd->fd_flm->flm_name, | |||||
sizeof(old_algo_name)); | |||||
MOD_UNLOCK(); | |||||
strlcpy(algo_name, old_algo_name, sizeof(algo_name)); | |||||
error = sysctl_handle_string(oidp, algo_name, sizeof(algo_name), req); | |||||
if (error != 0 || req->newptr == NULL) | |||||
return (error); | |||||
if (strcmp(algo_name, old_algo_name) == 0) | |||||
return (0); | |||||
flm = fib_find_algo(algo_name, family); | |||||
if (flm == NULL) { | |||||
RH_PRINTF(LOG_INFO, rh, "unable to find algo %s", algo_name); | |||||
return (ESRCH); | |||||
} | |||||
fd = NULL; | |||||
result = setup_instance(flm, rh, NULL, &fd, true); | |||||
fib_unref_algo(flm); | |||||
if (result != FLM_SUCCESS) | |||||
return (EINVAL); | |||||
/* Disable jumping between algos */ | |||||
MOD_LOCK(); | |||||
set_algo_fixed(rh); | |||||
MOD_UNLOCK(); | |||||
/* Remove old instance(s) */ | |||||
fib_cleanup_algo(rh, true, false); | |||||
/* Drain cb so user can unload the module after userret if so desired */ | |||||
epoch_drain_callbacks(net_epoch_preempt); | |||||
return (0); | |||||
} | |||||
#ifdef INET | |||||
static int | |||||
set_algo4_sysctl_handler(SYSCTL_HANDLER_ARGS) | |||||
{ | |||||
return (set_fib_algo(RT_DEFAULT_FIB, AF_INET, oidp, req)); | |||||
} | |||||
SYSCTL_PROC(_net_route_algo_inet, OID_AUTO, algo, | |||||
CTLTYPE_STRING | CTLFLAG_RW | CTLFLAG_MPSAFE, NULL, 0, | |||||
set_algo4_sysctl_handler, "A", "Set IPv4 lookup algo"); | |||||
#endif | |||||
#ifdef INET6 | |||||
static int | |||||
set_algo6_sysctl_handler(SYSCTL_HANDLER_ARGS) | |||||
{ | |||||
return (set_fib_algo(RT_DEFAULT_FIB, AF_INET6, oidp, req)); | |||||
} | |||||
SYSCTL_PROC(_net_route_algo_inet6, OID_AUTO, algo, | |||||
CTLTYPE_STRING | CTLFLAG_RW | CTLFLAG_MPSAFE, NULL, 0, | |||||
set_algo6_sysctl_handler, "A", "Set IPv6 lookup algo"); | |||||
#endif | |||||
static void | |||||
destroy_fdh_epoch(epoch_context_t ctx) | |||||
{ | |||||
struct fib_dp_header *ffi; | |||||
ffi = __containerof(ctx, struct fib_dp_header, ffi_epoch_ctx); | |||||
free(ffi, M_RTABLE); | |||||
} | |||||
static struct fib_dp_header * | |||||
alloc_fib_dp_array(uint32_t num_tables, bool waitok) | |||||
{ | |||||
size_t sz; | |||||
struct fib_dp_header *ffi; | |||||
sz = sizeof(struct fib_dp_header); | |||||
sz += sizeof(struct fib_dp) * num_tables; | |||||
ffi = malloc(sz, M_RTABLE, (waitok ? M_WAITOK : M_NOWAIT) | M_ZERO); | |||||
if (ffi != NULL) | |||||
ffi->ffi_num_tables = num_tables; | |||||
return (ffi); | |||||
} | |||||
static struct fib_dp_header * | |||||
get_fib_dp_header(struct fib_dp *dp) | |||||
{ | |||||
return (__containerof((void *)dp, struct fib_dp_header, ffi_idx)); | |||||
} | |||||
/* | |||||
* Replace per-family index pool @pdp with a new one which | |||||
* contains updated callback/algo data from @fd. | |||||
* Returns 0 on success. | |||||
*/ | |||||
static enum flm_op_result | |||||
replace_rtables_family(struct fib_dp **pdp, struct fib_data *fd) | |||||
{ | |||||
struct fib_dp_header *new_ffi, *old_ffi; | |||||
NET_EPOCH_ASSERT(); | |||||
FD_PRINTF(LOG_DEBUG, fd, "[vnet %p] replace with f:%p arg:%p", | |||||
curvnet, fd->fd_dp.f, fd->fd_dp.arg); | |||||
MOD_LOCK(); | |||||
old_ffi = get_fib_dp_header(*pdp); | |||||
new_ffi = alloc_fib_dp_array(old_ffi->ffi_num_tables, false); | |||||
FD_PRINTF(LOG_DEBUG, fd, "OLD FDH: %p NEW FDH: %p", old_ffi, new_ffi); | |||||
if (new_ffi == NULL) { | |||||
MOD_UNLOCK(); | |||||
FD_PRINTF(LOG_WARNING, fd, "error attaching datapath"); | |||||
return (FLM_REBUILD); | |||||
} | |||||
memcpy(&new_ffi->ffi_idx[0], &old_ffi->ffi_idx[0], | |||||
old_ffi->ffi_num_tables * sizeof(struct fib_dp)); | |||||
/* Update relevant data structure for @fd */ | |||||
new_ffi->ffi_idx[fd->fd_fibnum] = fd->fd_dp; | |||||
/* Ensure memcpy() writes have completed */ | |||||
atomic_thread_fence_rel(); | |||||
/* Set new datapath pointer */ | |||||
*pdp = &new_ffi->ffi_idx[0]; | |||||
MOD_UNLOCK(); | |||||
FD_PRINTF(LOG_DEBUG, fd, "update %p -> %p", old_ffi, new_ffi); | |||||
epoch_call(net_epoch_preempt, destroy_fdh_epoch, | |||||
&old_ffi->ffi_epoch_ctx); | |||||
return (FLM_SUCCESS); | |||||
} | |||||
static struct fib_dp ** | |||||
get_family_dp_ptr(int family) | |||||
{ | |||||
switch (family) { | |||||
case AF_INET: | |||||
return (&V_inet_dp); | |||||
case AF_INET6: | |||||
return (&V_inet6_dp); | |||||
} | |||||
return (NULL); | |||||
} | |||||
/* | |||||
* Make datapath use fib instance @fd | |||||
*/ | |||||
static enum flm_op_result | |||||
attach_datapath(struct fib_data *fd) | |||||
{ | |||||
struct fib_dp **pdp; | |||||
pdp = get_family_dp_ptr(fd->fd_family); | |||||
return (replace_rtables_family(pdp, fd)); | |||||
} | |||||
/* | |||||
* Grow datapath pointers array. | |||||
* Called from sysctl handler on growing number of routing tables. | |||||
*/ | |||||
static void | |||||
grow_rtables_family(struct fib_dp **pdp, uint32_t new_num_tables) | |||||
{ | |||||
struct fib_dp_header *new_fdh, *old_fdh = NULL; | |||||
new_fdh = alloc_fib_dp_array(new_num_tables, true); | |||||
MOD_LOCK(); | |||||
if (*pdp != NULL) { | |||||
old_fdh = get_fib_dp_header(*pdp); | |||||
memcpy(&new_fdh->ffi_idx[0], &old_fdh->ffi_idx[0], | |||||
old_fdh->ffi_num_tables * sizeof(struct fib_dp)); | |||||
} | |||||
/* Wait till all writes completed */ | |||||
atomic_thread_fence_rel(); | |||||
*pdp = &new_fdh->ffi_idx[0]; | |||||
MOD_UNLOCK(); | |||||
if (old_fdh != NULL) | |||||
epoch_call(net_epoch_preempt, destroy_fdh_epoch, | |||||
&old_fdh->ffi_epoch_ctx); | |||||
} | |||||
/* | |||||
* Grows per-AF arrays of datapath pointers for each supported family. | |||||
* Called from fibs resize sysctl handler. | |||||
*/ | |||||
void | |||||
fib_grow_rtables(uint32_t new_num_tables) | |||||
{ | |||||
#ifdef INET | |||||
grow_rtables_family(get_family_dp_ptr(AF_INET), new_num_tables); | |||||
#endif | |||||
#ifdef INET6 | |||||
grow_rtables_family(get_family_dp_ptr(AF_INET6), new_num_tables); | |||||
#endif | |||||
} | |||||
void | |||||
fib_get_rtable_info(struct rib_head *rh, struct rib_rtable_info *rinfo) | |||||
{ | |||||
bzero(rinfo, sizeof(struct rib_rtable_info)); | |||||
rinfo->num_prefixes = rh->rnh_prefixes; | |||||
rinfo->num_nhops = nhops_get_count(rh); | |||||
#ifdef ROUTE_MPATH | |||||
rinfo->num_nhgrp = nhgrp_get_count(rh); | |||||
#endif | |||||
} | |||||
/* | |||||
* Accessor to get rib instance @fd is attached to. | |||||
*/ | |||||
struct rib_head * | |||||
fib_get_rh(struct fib_data *fd) | |||||
{ | |||||
return (fd->fd_rh); | |||||
} | |||||
/* | |||||
* Accessor to export idx->nhop array | |||||
*/ | |||||
struct nhop_object ** | |||||
fib_get_nhop_array(struct fib_data *fd) | |||||
{ | |||||
return (fd->nh_idx); | |||||
} | |||||
static uint32_t | |||||
get_nhop_idx(struct nhop_object *nh) | |||||
{ | |||||
#ifdef ROUTE_MPATH | |||||
if (NH_IS_NHGRP(nh)) | |||||
return (nhgrp_get_idx((struct nhgrp_object *)nh) * 2 - 1); | |||||
else | |||||
return (nhop_get_idx(nh) * 2); | |||||
#else | |||||
return (nhop_get_idx(nh)); | |||||
#endif | |||||
} | |||||
uint32_t | |||||
fib_get_nhop_idx(struct fib_data *fd, struct nhop_object *nh) | |||||
{ | |||||
return (get_nhop_idx(nh)); | |||||
} | |||||
static bool | |||||
is_idx_free(struct fib_data *fd, uint32_t index) | |||||
{ | |||||
return (fd->nh_ref_table->refcnt[index] == 0); | |||||
} | |||||
static uint32_t | |||||
fib_ref_nhop(struct fib_data *fd, struct nhop_object *nh) | |||||
{ | |||||
uint32_t idx = get_nhop_idx(nh); | |||||
if (idx >= fd->number_nhops) { | |||||
fd->hit_nhops = 1; | |||||
return (0); | |||||
} | |||||
if (is_idx_free(fd, idx)) { | |||||
nhop_ref_any(nh); | |||||
fd->nh_idx[idx] = nh; | |||||
fd->nh_ref_table->count++; | |||||
FD_PRINTF(LOG_DEBUG, fd, " REF nhop %u %p", idx, fd->nh_idx[idx]); | |||||
} | |||||
fd->nh_ref_table->refcnt[idx]++; | |||||
return (idx); | |||||
} | |||||
struct nhop_release_data { | |||||
struct nhop_object *nh; | |||||
struct epoch_context ctx; | |||||
}; | |||||
static void | |||||
release_nhop_epoch(epoch_context_t ctx) | |||||
{ | |||||
struct nhop_release_data *nrd; | |||||
nrd = __containerof(ctx, struct nhop_release_data, ctx); | |||||
nhop_free_any(nrd->nh); | |||||
free(nrd, M_RTABLE); | |||||
} | |||||
static void | |||||
fib_schedule_release_nhop(struct fib_data *fd, struct nhop_object *nh) | |||||
{ | |||||
struct nhop_release_data *nrd; | |||||
nrd = malloc(sizeof(struct nhop_release_data), M_RTABLE, M_NOWAIT | M_ZERO); | |||||
if (nrd != NULL) { | |||||
nrd->nh = nh; | |||||
epoch_call(net_epoch_preempt, release_nhop_epoch, &nrd->ctx); | |||||
} else { | |||||
/* | |||||
* Unable to allocate memory. Leak nexthop to maintain guarantee | |||||
* that each nhop can be referenced. | |||||
*/ | |||||
FD_PRINTF(LOG_ERR, fd, "unable to schedule nhop %p deletion", nh); | |||||
} | |||||
} | |||||
static void | |||||
fib_unref_nhop(struct fib_data *fd, struct nhop_object *nh) | |||||
{ | |||||
uint32_t idx = get_nhop_idx(nh); | |||||
KASSERT((idx < fd->number_nhops), ("invalid nhop index")); | |||||
KASSERT((nh == fd->nh_idx[idx]), ("index table contains whong nh")); | |||||
fd->nh_ref_table->refcnt[idx]--; | |||||
if (fd->nh_ref_table->refcnt[idx] == 0) { | |||||
FD_PRINTF(LOG_DEBUG, fd, " FREE nhop %d %p", idx, fd->nh_idx[idx]); | |||||
fib_schedule_release_nhop(fd, fd->nh_idx[idx]); | |||||
} | |||||
} | |||||
static void | |||||
set_algo_fixed(struct rib_head *rh) | |||||
{ | |||||
switch (rh->rib_family) { | |||||
case AF_INET: | |||||
algo_fixed_inet = true; | |||||
break; | |||||
case AF_INET6: | |||||
algo_fixed_inet6 = true; | |||||
break; | |||||
} | |||||
} | |||||
static bool | |||||
is_algo_fixed(struct rib_head *rh) | |||||
{ | |||||
switch (rh->rib_family) { | |||||
case AF_INET: | |||||
return (algo_fixed_inet); | |||||
case AF_INET6: | |||||
return (algo_fixed_inet6); | |||||
} | |||||
return (false); | |||||
} | |||||
static struct fib_lookup_module * | |||||
fib_check_best_algo(struct rib_head *rh, struct fib_lookup_module *orig_flm) | |||||
{ | |||||
uint8_t preference, curr_preference = 0, best_preference = 0; | |||||
struct fib_lookup_module *flm, *best_flm = NULL; | |||||
struct rib_rtable_info rinfo; | |||||
int candidate_algos = 0; | |||||
fib_get_rtable_info(rh, &rinfo); | |||||
MOD_LOCK(); | |||||
if (is_algo_fixed(rh)) { | |||||
MOD_UNLOCK(); | |||||
return (NULL); | |||||
} | |||||
TAILQ_FOREACH(flm, &all_algo_list, entries) { | |||||
if (flm->flm_family != rh->rib_family) | |||||
continue; | |||||
candidate_algos++; | |||||
preference = flm->flm_get_pref(&rinfo); | |||||
if (preference > best_preference) { | |||||
if (!flm_error_check(flm, rh->rib_fibnum)) { | |||||
best_preference = preference; | |||||
best_flm = flm; | |||||
} | |||||
} | |||||
if (flm == orig_flm) | |||||
curr_preference = preference; | |||||
} | |||||
if ((best_flm != NULL) && (curr_preference + BEST_DIFF_PERCENT < best_preference)) | |||||
best_flm->flm_refcount++; | |||||
else | |||||
best_flm = NULL; | |||||
MOD_UNLOCK(); | |||||
RH_PRINTF(LOG_INFO, rh, "candidate_algos: %d, curr: %s(%d) result: %s(%d)", | |||||
candidate_algos, orig_flm ? orig_flm->flm_name : "NULL", curr_preference, | |||||
best_flm ? best_flm->flm_name : (orig_flm ? orig_flm->flm_name : "NULL"), | |||||
best_preference); | |||||
return (best_flm); | |||||
} | |||||
/* | |||||
* Called when new route table is created. | |||||
* Selects, allocates and attaches fib algo for the table. | |||||
*/ | |||||
int | |||||
fib_select_algo_initial(struct rib_head *rh) | |||||
{ | |||||
struct fib_lookup_module *flm; | |||||
struct fib_data *fd = NULL; | |||||
enum flm_op_result result; | |||||
flm = fib_check_best_algo(rh, NULL); | |||||
if (flm == NULL) { | |||||
RH_PRINTF(LOG_CRIT, rh, "no algo selected"); | |||||
return (ENOENT); | |||||
} | |||||
RH_PRINTF(LOG_INFO, rh, "selected algo %s", flm->flm_name); | |||||
result = setup_instance(flm, rh, NULL, &fd, false); | |||||
fib_unref_algo(flm); | |||||
RH_PRINTF(LOG_DEBUG, rh, "result=%d fd=%p", result, fd); | |||||
if (result == FLM_SUCCESS) { | |||||
/* | |||||
* Attach datapath directly to avoid multiple reallocations | |||||
* during fib growth | |||||
*/ | |||||
struct fib_dp_header *fdp; | |||||
struct fib_dp **pdp; | |||||
pdp = get_family_dp_ptr(rh->rib_family); | |||||
if (pdp != NULL) { | |||||
fdp = get_fib_dp_header(*pdp); | |||||
fdp->ffi_idx[fd->fd_fibnum] = fd->fd_dp; | |||||
FD_PRINTF(LOG_INFO, fd, "datapath attached"); | |||||
} | |||||
} | |||||
return (0); | |||||
} | |||||
int | |||||
fib_module_register(struct fib_lookup_module *flm) | |||||
{ | |||||
MOD_LOCK(); | |||||
ALGO_PRINTF("attaching %s to %s", flm->flm_name, | |||||
print_family(flm->flm_family)); | |||||
TAILQ_INSERT_TAIL(&all_algo_list, flm, entries); | |||||
MOD_UNLOCK(); | |||||
return (0); | |||||
} | |||||
int | |||||
fib_module_unregister(struct fib_lookup_module *flm) | |||||
{ | |||||
MOD_LOCK(); | |||||
if (flm->flm_refcount > 0) { | |||||
MOD_UNLOCK(); | |||||
return (EBUSY); | |||||
} | |||||
fib_error_clear_flm(flm); | |||||
ALGO_PRINTF("detaching %s from %s", flm->flm_name, | |||||
print_family(flm->flm_family)); | |||||
TAILQ_REMOVE(&all_algo_list, flm, entries); | |||||
MOD_UNLOCK(); | |||||
return (0); | |||||
} | |||||
void | |||||
vnet_fib_init(void) | |||||
{ | |||||
TAILQ_INIT(&V_fib_data_list); | |||||
} | |||||
static void | |||||
fib_init(void) | |||||
{ | |||||
mtx_init(&fib_mtx, "algo list mutex", NULL, MTX_DEF); | |||||
TAILQ_INIT(&all_algo_list); | |||||
} | |||||
SYSINIT(fib_init, SI_SUB_PROTO_DOMAIN, SI_ORDER_SECOND, fib_init, NULL); | |||||