Changeset View
Changeset View
Standalone View
Standalone View
sys/net/route/fib_algo.c
Show First 20 Lines • Show All 145 Lines • ▼ Show 20 Lines | struct nhop_ref_table { | ||||
uint32_t count; | uint32_t count; | ||||
int32_t refcnt[0]; | int32_t refcnt[0]; | ||||
}; | }; | ||||
enum fib_callout_action { | enum fib_callout_action { | ||||
FDA_NONE, /* No callout scheduled */ | FDA_NONE, /* No callout scheduled */ | ||||
FDA_REBUILD, /* Asks to rebuild algo instance */ | FDA_REBUILD, /* Asks to rebuild algo instance */ | ||||
FDA_EVAL, /* Asks to evaluate if the current algo is still be best */ | FDA_EVAL, /* Asks to evaluate if the current algo is still be best */ | ||||
FDA_BATCH, /* Asks to submit batch of updates to the algo */ | |||||
}; | }; | ||||
struct fib_sync_status { | struct fib_sync_status { | ||||
struct timeval diverge_time; /* ts when diverged */ | struct timeval diverge_time; /* ts when diverged */ | ||||
uint32_t num_changes; /* number of changes since sync */ | uint32_t num_changes; /* number of changes since sync */ | ||||
uint32_t bucket_changes; /* num changes within the current bucket */ | uint32_t bucket_changes; /* num changes within the current bucket */ | ||||
uint64_t bucket_id; /* 50ms bucket # */ | uint64_t bucket_id; /* 50ms bucket # */ | ||||
struct fib_change_queue fd_change_queue;/* list of scheduled entries */ | |||||
}; | }; | ||||
/* | /* | ||||
* Data structure for the fib lookup instance tied to the particular rib. | * Data structure for the fib lookup instance tied to the particular rib. | ||||
*/ | */ | ||||
struct fib_data { | struct fib_data { | ||||
uint32_t number_nhops; /* current # of nhops */ | uint32_t number_nhops; /* current # of nhops */ | ||||
uint8_t hit_nhops; /* true if out of nhop limit */ | uint8_t hit_nhops; /* true if out of nhop limit */ | ||||
uint8_t init_done; /* true if init is competed */ | uint8_t init_done; /* true if init is competed */ | ||||
uint32_t fd_dead:1; /* Scheduled for deletion */ | uint32_t fd_dead:1; /* Scheduled for deletion */ | ||||
uint32_t fd_linked:1; /* true if linked */ | uint32_t fd_linked:1; /* true if linked */ | ||||
uint32_t fd_need_rebuild:1; /* true if rebuild scheduled */ | uint32_t fd_need_rebuild:1; /* true if rebuild scheduled */ | ||||
uint32_t fd_batch:1; /* true if batched notification scheduled */ | |||||
uint8_t fd_family; /* family */ | uint8_t fd_family; /* family */ | ||||
uint32_t fd_fibnum; /* fibnum */ | uint32_t fd_fibnum; /* fibnum */ | ||||
uint32_t fd_failed_rebuilds; /* stat: failed rebuilds */ | uint32_t fd_failed_rebuilds; /* stat: failed rebuilds */ | ||||
uint32_t fd_gen; /* instance gen# */ | uint32_t fd_gen; /* instance gen# */ | ||||
struct callout fd_callout; /* rebuild callout */ | struct callout fd_callout; /* rebuild callout */ | ||||
enum fib_callout_action fd_callout_action; /* Callout action to take */ | enum fib_callout_action fd_callout_action; /* Callout action to take */ | ||||
void *fd_algo_data; /* algorithm data */ | void *fd_algo_data; /* algorithm data */ | ||||
struct nhop_object **nh_idx; /* nhop idx->ptr array */ | struct nhop_object **nh_idx; /* nhop idx->ptr array */ | ||||
▲ Show 20 Lines • Show All 197 Lines • ▼ Show 20 Lines | |||||
static const char * | static const char * | ||||
print_op_result(enum flm_op_result result) | print_op_result(enum flm_op_result result) | ||||
{ | { | ||||
switch (result) { | switch (result) { | ||||
case FLM_SUCCESS: | case FLM_SUCCESS: | ||||
return "success"; | return "success"; | ||||
case FLM_REBUILD: | case FLM_REBUILD: | ||||
return "rebuild"; | return "rebuild"; | ||||
case FLM_BATCH: | |||||
return "batch"; | |||||
case FLM_ERROR: | case FLM_ERROR: | ||||
return "error"; | return "error"; | ||||
} | } | ||||
return "unknown"; | return "unknown"; | ||||
} | } | ||||
static const char * | static const char * | ||||
▲ Show 20 Lines • Show All 169 Lines • ▼ Show 20 Lines | |||||
* latency and efficient batching when changing large amount of routes. | * latency and efficient batching when changing large amount of routes. | ||||
* | * | ||||
* High-level algorithm looks the following: | * High-level algorithm looks the following: | ||||
* 1) all changes are bucketed in 50ms intervals | * 1) all changes are bucketed in 50ms intervals | ||||
* 2) If amount of changes within the bucket is greater than the threshold, | * 2) If amount of changes within the bucket is greater than the threshold, | ||||
* the update gets delayed, up to maximum delay threshold. | * the update gets delayed, up to maximum delay threshold. | ||||
*/ | */ | ||||
static void | static void | ||||
update_rebuild_delay(struct fib_data *fd) | update_rebuild_delay(struct fib_data *fd, enum fib_callout_action action) | ||||
{ | { | ||||
uint32_t bucket_id, new_delay = 0; | uint32_t bucket_id, new_delay = 0; | ||||
struct timeval tv; | struct timeval tv; | ||||
/* Fetch all variables at once to ensure consistent reads */ | /* Fetch all variables at once to ensure consistent reads */ | ||||
uint32_t bucket_time_ms = V_bucket_time_ms; | uint32_t bucket_time_ms = V_bucket_time_ms; | ||||
uint32_t threshold_rate = V_bucket_change_threshold_rate; | uint32_t threshold_rate = V_bucket_change_threshold_rate; | ||||
uint32_t max_delay_ms = V_fib_max_sync_delay_ms; | uint32_t max_delay_ms = V_fib_max_sync_delay_ms; | ||||
Show All 31 Lines | update_rebuild_delay(struct fib_data *fd, enum fib_callout_action action) | ||||
} | } | ||||
if (new_delay > 0) { | if (new_delay > 0) { | ||||
/* Calculated time has been updated */ | /* Calculated time has been updated */ | ||||
struct timeval new_tv = fd_ss->diverge_time; | struct timeval new_tv = fd_ss->diverge_time; | ||||
add_tv_diff_ms(&new_tv, new_delay); | add_tv_diff_ms(&new_tv, new_delay); | ||||
int32_t delay_ms = get_tv_diff_ms(&tv, &new_tv); | int32_t delay_ms = get_tv_diff_ms(&tv, &new_tv); | ||||
schedule_callout(fd, FDA_REBUILD, delay_ms); | schedule_callout(fd, action, delay_ms); | ||||
} | } | ||||
} | } | ||||
static void | static void | ||||
update_algo_state(struct fib_data *fd) | update_algo_state(struct fib_data *fd) | ||||
{ | { | ||||
RIB_WLOCK_ASSERT(fd->fd_rh); | RIB_WLOCK_ASSERT(fd->fd_rh); | ||||
if (fd->fd_need_rebuild) { | if (fd->fd_batch || fd->fd_need_rebuild) { | ||||
update_rebuild_delay(fd); | enum fib_callout_action action = fd->fd_need_rebuild ? FDA_REBUILD : FDA_BATCH; | ||||
update_rebuild_delay(fd, action); | |||||
return; | return; | ||||
} | } | ||||
if (fd->fd_num_changes++ == 0) { | if (fd->fd_num_changes++ == 0) { | ||||
/* Start callout to consider switch */ | /* Start callout to consider switch */ | ||||
if (!callout_pending(&fd->fd_callout)) | if (!callout_pending(&fd->fd_callout)) | ||||
schedule_callout(fd, FDA_EVAL, ALGO_EVAL_DELAY_MS); | schedule_callout(fd, FDA_EVAL, ALGO_EVAL_DELAY_MS); | ||||
} else if (fd->fd_num_changes == ALGO_EVAL_NUM_ROUTES) { | } else if (fd->fd_num_changes == ALGO_EVAL_NUM_ROUTES) { | ||||
Show All 20 Lines | case RTM_DELETE: | ||||
if (!NH_IS_NHGRP(nh) && (!(nh->nh_flags & NHF_GATEWAY))) | if (!NH_IS_NHGRP(nh) && (!(nh->nh_flags & NHF_GATEWAY))) | ||||
return (true); | return (true); | ||||
break; | break; | ||||
} | } | ||||
return (false); | return (false); | ||||
} | } | ||||
static bool | |||||
apply_rtable_changes(struct fib_data *fd) | |||||
zec: This one doesn't get called unless immediate_sync is set in handle_rtable_change_cb() , which… | |||||
{ | |||||
enum flm_op_result result; | |||||
struct fib_change_queue *q = &fd->fd_ss.fd_change_queue; | |||||
result = fd->fd_flm->flm_change_rib_items_cb(fd->fd_rh, q, fd->fd_algo_data); | |||||
if (result == FLM_SUCCESS) { | |||||
for (int i = 0; i < q->count; i++) | |||||
Not Done Inline ActionsShouldn't we unref .nh_old here instead, if != NULL? zec: Shouldn't we unref .nh_old here instead, if != NULL? | |||||
if (q->entries[i].nh_old) | |||||
fib_unref_nhop(fd, q->entries[i].nh_old); | |||||
q->count = 0; | |||||
} | |||||
fd->fd_batch = false; | |||||
return (result == FLM_SUCCESS); | |||||
} | |||||
static bool | |||||
fill_change_entry(struct fib_data *fd, struct fib_change_entry *ce, struct rib_cmd_info *rc) | |||||
{ | |||||
int plen = 0; | |||||
switch (fd->fd_family) { | |||||
case AF_INET: | |||||
rt_get_inet_prefix_plen(rc->rc_rt, &ce->addr4, &plen, &ce->scopeid); | |||||
break; | |||||
case AF_INET6: | |||||
rt_get_inet6_prefix_plen(rc->rc_rt, &ce->addr6, &plen, &ce->scopeid); | |||||
break; | |||||
} | |||||
ce->plen = plen; | |||||
ce->nh_old = rc->rc_nh_old; | |||||
ce->nh_new = rc->rc_nh_new; | |||||
if (ce->nh_new != NULL) { | |||||
if (fib_ref_nhop(fd, ce->nh_new) == 0) | |||||
return (false); | |||||
} | |||||
return (true); | |||||
} | |||||
static bool | |||||
queue_rtable_change(struct fib_data *fd, struct rib_cmd_info *rc) | |||||
{ | |||||
struct fib_change_queue *q = &fd->fd_ss.fd_change_queue; | |||||
if (q->count >= q->size) { | |||||
if (q->size == 0) | |||||
q->size = 256; /* ~18k memory */ | |||||
else | |||||
q->size *= 2; | |||||
size_t size = q->size * sizeof(struct fib_change_entry); | |||||
void *a = realloc(q->entries, size, M_TEMP, M_NOWAIT | M_ZERO); | |||||
if (a == NULL) | |||||
return (false); | |||||
q->entries = a; | |||||
} | |||||
return (fill_change_entry(fd, &q->entries[q->count++], rc)); | |||||
} | |||||
/* | /* | ||||
* Rib subscription handler. Checks if the algorithm is ready to | * Rib subscription handler. Checks if the algorithm is ready to | ||||
* receive updates, handles nexthop refcounting and passes change | * receive updates, handles nexthop refcounting and passes change | ||||
* data to the algorithm callback. | * data to the algorithm callback. | ||||
*/ | */ | ||||
static void | static void | ||||
handle_rtable_change_cb(struct rib_head *rnh, struct rib_cmd_info *rc, | handle_rtable_change_cb(struct rib_head *rnh, struct rib_cmd_info *rc, | ||||
void *_data) | void *_data) | ||||
Show All 15 Lines | handle_rtable_change_cb(struct rib_head *rnh, struct rib_cmd_info *rc, | ||||
/* Consider scheduling algorithm re-evaluation */ | /* Consider scheduling algorithm re-evaluation */ | ||||
update_algo_state(fd); | update_algo_state(fd); | ||||
/* | /* | ||||
* If algo requested rebuild, stop sending updates by default. | * If algo requested rebuild, stop sending updates by default. | ||||
* This simplifies nexthop refcount handling logic. | * This simplifies nexthop refcount handling logic. | ||||
*/ | */ | ||||
if (fd->fd_need_rebuild) | if (fd->fd_need_rebuild) { | ||||
if (immediate_sync) | |||||
rebuild_fd(fd, "rtable change type enforced sync"); | |||||
return; | return; | ||||
} | |||||
/* | /* | ||||
* Algo requested updates to be delivered in batches. | |||||
* Add the current change to the queue and return. | |||||
*/ | |||||
if (fd->fd_batch) { | |||||
if (immediate_sync) { | |||||
if (!queue_rtable_change(fd, rc) || !apply_rtable_changes(fd)) | |||||
rebuild_fd(fd, "batch sync failed"); | |||||
} else { | |||||
if (!queue_rtable_change(fd, rc)) | |||||
schedule_fd_rebuild(fd, "batch queue failed"); | |||||
} | |||||
return; | |||||
} | |||||
/* | |||||
* Maintain guarantee that every nexthop returned by the dataplane | * Maintain guarantee that every nexthop returned by the dataplane | ||||
* lookup has > 0 refcount, so can be safely referenced within current | * lookup has > 0 refcount, so can be safely referenced within current | ||||
* epoch. | * epoch. | ||||
*/ | */ | ||||
if (rc->rc_nh_new != NULL) { | if (rc->rc_nh_new != NULL) { | ||||
if (fib_ref_nhop(fd, rc->rc_nh_new) == 0) { | if (fib_ref_nhop(fd, rc->rc_nh_new) == 0) { | ||||
/* ran out of indexes */ | /* ran out of indexes */ | ||||
schedule_fd_rebuild(fd, "ran out of nhop indexes"); | schedule_fd_rebuild(fd, "ran out of nhop indexes"); | ||||
return; | return; | ||||
} | } | ||||
} | } | ||||
result = fd->fd_flm->flm_change_rib_item_cb(rnh, rc, fd->fd_algo_data); | result = fd->fd_flm->flm_change_rib_item_cb(rnh, rc, fd->fd_algo_data); | ||||
switch (result) { | switch (result) { | ||||
case FLM_SUCCESS: | case FLM_SUCCESS: | ||||
/* Unref old nexthop on success */ | /* Unref old nexthop on success */ | ||||
if (rc->rc_nh_old != NULL) | if (rc->rc_nh_old != NULL) | ||||
fib_unref_nhop(fd, rc->rc_nh_old); | fib_unref_nhop(fd, rc->rc_nh_old); | ||||
break; | break; | ||||
case FLM_BATCH: | |||||
/* | |||||
* Algo asks to batch the changes. | |||||
*/ | |||||
if (queue_rtable_change(fd, rc)) { | |||||
if (!immediate_sync) { | |||||
fd->fd_batch = true; | |||||
mark_diverge_time(fd); | |||||
update_rebuild_delay(fd, FDA_BATCH); | |||||
break; | |||||
} | |||||
if (apply_rtable_changes(fd)) | |||||
break; | |||||
} | |||||
FD_PRINTF(LOG_ERR, fd, "batched sync failed, force the rebuild"); | |||||
case FLM_REBUILD: | case FLM_REBUILD: | ||||
/* | /* | ||||
* Algo is not able to apply the update. | * Algo is not able to apply the update. | ||||
* Schedule algo rebuild. | * Schedule algo rebuild. | ||||
*/ | */ | ||||
if (!immediate_sync) { | if (!immediate_sync) { | ||||
mark_diverge_time(fd); | mark_diverge_time(fd); | ||||
▲ Show 20 Lines • Show All 247 Lines • ▼ Show 20 Lines | for (int i = 0; i < fd->number_nhops; i++) { | ||||
nhop_free_any(fd->nh_idx[i]); | nhop_free_any(fd->nh_idx[i]); | ||||
} | } | ||||
} | } | ||||
free(fd->nh_idx, M_RTABLE); | free(fd->nh_idx, M_RTABLE); | ||||
} | } | ||||
if (fd->nh_ref_table != NULL) | if (fd->nh_ref_table != NULL) | ||||
free(fd->nh_ref_table, M_RTABLE); | free(fd->nh_ref_table, M_RTABLE); | ||||
if (fd->fd_ss.fd_change_queue.entries != NULL) | |||||
free(fd->fd_ss.fd_change_queue.entries, M_TEMP); | |||||
fib_unref_algo(fd->fd_flm); | fib_unref_algo(fd->fd_flm); | ||||
free(fd, M_RTABLE); | free(fd, M_RTABLE); | ||||
} | } | ||||
/* | /* | ||||
* Epoch callback indicating fd is safe to destroy | * Epoch callback indicating fd is safe to destroy | ||||
*/ | */ | ||||
▲ Show 20 Lines • Show All 184 Lines • ▼ Show 20 Lines | execute_callout_action(struct fib_data *fd) | ||||
enum fib_callout_action action = fd->fd_callout_action; | enum fib_callout_action action = fd->fd_callout_action; | ||||
struct fib_lookup_module *flm_new = NULL; | struct fib_lookup_module *flm_new = NULL; | ||||
bool result = true; | bool result = true; | ||||
NET_EPOCH_ASSERT(); | NET_EPOCH_ASSERT(); | ||||
RIB_WLOCK_ASSERT(fd->fd_rh); | RIB_WLOCK_ASSERT(fd->fd_rh); | ||||
fd->fd_need_rebuild = false; | fd->fd_need_rebuild = false; | ||||
fd->fd_batch = false; | |||||
fd->fd_num_changes = 0; | fd->fd_num_changes = 0; | ||||
/* First, check if we're still OK to use this algo */ | /* First, check if we're still OK to use this algo */ | ||||
if (!is_algo_fixed(fd->fd_rh)) | if (!is_algo_fixed(fd->fd_rh)) | ||||
flm_new = fib_check_best_algo(fd->fd_rh, fd->fd_flm); | flm_new = fib_check_best_algo(fd->fd_rh, fd->fd_flm); | ||||
if (flm_new != NULL) | if (flm_new != NULL) | ||||
action = FDA_REBUILD; | action = FDA_REBUILD; | ||||
if (action == FDA_BATCH) { | |||||
/* Try to sync */ | |||||
if (!apply_rtable_changes(fd)) | |||||
action = FDA_REBUILD; | |||||
} | |||||
if (action == FDA_REBUILD) | if (action == FDA_REBUILD) | ||||
result = rebuild_fd_flm(fd, flm_new != NULL ? flm_new : fd->fd_flm); | result = rebuild_fd_flm(fd, flm_new != NULL ? flm_new : fd->fd_flm); | ||||
if (flm_new != NULL) | if (flm_new != NULL) | ||||
fib_unref_algo(flm_new); | fib_unref_algo(flm_new); | ||||
return (result); | return (result); | ||||
} | } | ||||
/* | /* | ||||
* Callout for all scheduled fd-related work. | * Callout for all scheduled fd-related work. | ||||
* - Checks if the current algo is still the best algo | * - Checks if the current algo is still the best algo | ||||
* - Synchronises algo instance to the rtable (batch usecase) | |||||
* - Creates a new instance of an algo for af/fib if desired. | * - Creates a new instance of an algo for af/fib if desired. | ||||
*/ | */ | ||||
static void | static void | ||||
handle_fd_callout(void *_data) | handle_fd_callout(void *_data) | ||||
{ | { | ||||
struct fib_data *fd = (struct fib_data *)_data; | struct fib_data *fd = (struct fib_data *)_data; | ||||
struct epoch_tracker et; | struct epoch_tracker et; | ||||
▲ Show 20 Lines • Show All 643 Lines • Show Last 20 Lines |
This one doesn't get called unless immediate_sync is set in handle_rtable_change_cb() , which means it is never called.