Changeset View
Changeset View
Standalone View
Standalone View
sys/netinet/in_fib_algo.c
- This file was added.
/*- | |||||
* SPDX-License-Identifier: BSD-2-Clause-FreeBSD | |||||
* | |||||
* Copyright (c) 2020 Alexander V. Chernikov | |||||
* | |||||
* Redistribution and use in source and binary forms, with or without | |||||
* modification, are permitted provided that the following conditions | |||||
* are met: | |||||
* 1. Redistributions of source code must retain the above copyright | |||||
* notice, this list of conditions and the following disclaimer. | |||||
* 2. Redistributions in binary form must reproduce the above copyright | |||||
* notice, this list of conditions and the following disclaimer in the | |||||
* documentation and/or other materials provided with the distribution. | |||||
* | |||||
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND | |||||
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE | |||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE | |||||
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE | |||||
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL | |||||
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS | |||||
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) | |||||
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT | |||||
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY | |||||
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF | |||||
* SUCH DAMAGE. | |||||
*/ | |||||
#include <sys/cdefs.h> | |||||
__FBSDID("$FreeBSD$"); | |||||
#include "opt_inet.h" | |||||
#include <sys/param.h> | |||||
#include <sys/kernel.h> | |||||
#include <sys/lock.h> | |||||
#include <sys/rmlock.h> | |||||
#include <sys/malloc.h> | |||||
#include <sys/kernel.h> | |||||
#include <sys/priv.h> | |||||
#include <sys/socket.h> | |||||
#include <sys/sysctl.h> | |||||
#include <net/vnet.h> | |||||
#include <net/if.h> | |||||
#include <netinet/in.h> | |||||
#include <net/route.h> | |||||
#include <net/route/nhop.h> | |||||
#include <net/route/route_ctl.h> | |||||
#include <net/route/route_var.h> | |||||
#include <net/route/route_algo.h> | |||||
struct bsearch4_record { | |||||
uint32_t addr4; | |||||
uint32_t mask4; | |||||
struct nhop_object *nh; | |||||
}; | |||||
struct bsearch4_data { | |||||
struct fib_data *fd; | |||||
uint32_t alloc_items; | |||||
uint32_t num_items; | |||||
void *mem; | |||||
struct bsearch4_record *rr; | |||||
struct bsearch4_record br[0]; | |||||
}; | |||||
static struct nhop_object * | |||||
bsearch4_lookup(void *algo_data, const struct flm_lookup_key key, uint32_t scopeid) | |||||
{ | |||||
const struct bsearch4_data *bd = (const struct bsearch4_data *)algo_data; | |||||
const struct bsearch4_record *br; | |||||
uint32_t addr4 = ntohl(key.addr4.s_addr); | |||||
int start = 0; | |||||
int end = bd->num_items; | |||||
int i = (start + end) / 2; | |||||
while (start + 1 < end) { | |||||
i = (start + end) / 2; | |||||
br = &bd->br[i]; | |||||
if (addr4 < br->addr4) { | |||||
/* key < average, reduce right boundary */ | |||||
end = i; | |||||
continue; | |||||
} else if (addr4 > br->addr4) { | |||||
/* key > average, increase left aboundary */ | |||||
start = i; | |||||
continue; | |||||
} else { | |||||
/* direct match */ | |||||
return br->nh; | |||||
} | |||||
} | |||||
/* start + 1 == end */ | |||||
return bd->br[start].nh; | |||||
} | |||||
static uint8_t | |||||
bsearch4_get_pref(const struct rib_rtable_info *rinfo) | |||||
{ | |||||
if (rinfo->num_prefixes < 10) | |||||
return (253); | |||||
else if (rinfo->num_prefixes < 1000) | |||||
return (255 - rinfo->num_prefixes / 4); | |||||
else | |||||
return (1); | |||||
} | |||||
static enum flm_op_result | |||||
bsearch4_init(uint32_t fibnum, struct fib_data *fd, void *_old_data, void **_data) | |||||
{ | |||||
struct bsearch4_data *bd; | |||||
struct rib_rtable_info rinfo; | |||||
uint32_t count; | |||||
size_t sz; | |||||
void *mem; | |||||
fib_get_rtable_info(fib_get_rh(fd), &rinfo); | |||||
count = rinfo.num_prefixes * 11 / 10 + 64; | |||||
if (_old_data != NULL) { | |||||
struct bsearch4_data *old_bd = (struct bsearch4_data *)old_bd; | |||||
} | |||||
sz = sizeof(struct bsearch4_data) + sizeof(struct bsearch4_record) * count; | |||||
/* add cache line sz to ease alignment */ | |||||
sz += CACHE_LINE_SIZE; | |||||
mem = malloc(sz, M_RTABLE, M_NOWAIT | M_ZERO); | |||||
if (mem == NULL) | |||||
return (FLM_REBUILD); | |||||
bd = (struct bsearch4_data *)roundup2((uintptr_t)mem, CACHE_LINE_SIZE); | |||||
bd->mem = mem; | |||||
bd->alloc_items = count; | |||||
bd->fd = fd; | |||||
*_data = bd; | |||||
bd->rr = malloc(sizeof(struct bsearch4_record) * count, M_TEMP, M_NOWAIT | M_ZERO); | |||||
if (bd->rr == NULL) | |||||
return (FLM_REBUILD); | |||||
return (FLM_SUCCESS); | |||||
} | |||||
static void | |||||
bsearch4_destroy(void *_data) | |||||
{ | |||||
struct bsearch4_data *bd = (struct bsearch4_data *)_data; | |||||
if (bd->rr != NULL) | |||||
free(bd->rr, M_TEMP); | |||||
free(bd->mem, M_RTABLE); | |||||
} | |||||
static enum flm_op_result | |||||
bsearch4_add_route_cb(struct rtentry *rt, void *_data) | |||||
{ | |||||
struct bsearch4_data *bd = (struct bsearch4_data *)_data; | |||||
struct nhop_object *nh; | |||||
struct bsearch4_record *rr; | |||||
nh = rt_get_raw_nhop(rt); | |||||
if (bd->num_items >= bd->alloc_items) | |||||
return (FLM_REBUILD); | |||||
rr = &bd->rr[bd->num_items++]; | |||||
uint32_t scopeid; | |||||
struct in_addr addr4, mask4; | |||||
rt_get_inet_prefix_pmask(rt, &addr4, &mask4, &scopeid); | |||||
rr->addr4 = ntohl(addr4.s_addr); | |||||
rr->mask4 = ntohl(mask4.s_addr); | |||||
rr->nh = nh; | |||||
return (FLM_SUCCESS); | |||||
} | |||||
static int | |||||
rr_cmp(const void *_rec1, const void *_rec2) | |||||
{ | |||||
const struct bsearch4_record *rec1, *rec2; | |||||
rec1 = _rec1; | |||||
rec2 = _rec2; | |||||
if (rec1->addr4 < rec2->addr4) | |||||
return (-1); | |||||
else if (rec1->addr4 > rec2->addr4) | |||||
return (1); | |||||
/* | |||||
* wider mask value is lesser mask | |||||
* we want less specific come first, e.g. < | |||||
*/ | |||||
if (rec1->mask4 < rec2->mask4) | |||||
return (-1); | |||||
else if (rec1->mask4 > rec2->mask4) | |||||
return (1); | |||||
return (0); | |||||
} | |||||
static bool | |||||
close_stack(struct bsearch4_data *bd, struct bsearch4_record *pst) | |||||
{ | |||||
if (bd->num_items >= bd->alloc_items) | |||||
return (false); | |||||
struct bsearch4_record *br = &bd->br[bd->num_items]; | |||||
struct bsearch4_record *br_prev = &bd->br[bd->num_items - 1]; | |||||
uint32_t last_prev = (br_prev->addr4 | ~br_prev->mask4); | |||||
uint32_t last_rec = (pst->addr4 | ~pst->mask4); | |||||
if (last_rec > last_prev) { | |||||
br->addr4 = last_prev + 1; | |||||
br->mask4 = pst->mask4; | |||||
br->nh = pst->nh; | |||||
bd->num_items++; | |||||
} | |||||
return (true); | |||||
} | |||||
static enum flm_op_result | |||||
bsearch4_build_array(struct bsearch4_data *bd, int num_items) | |||||
{ | |||||
struct bsearch4_record *br; | |||||
struct bsearch4_record *rr_stack; | |||||
int stack_off = 0; | |||||
/* alloc stack of size 32 */ | |||||
rr_stack = malloc(32 * sizeof(struct bsearch4_record), M_TEMP, M_NOWAIT | M_ZERO); | |||||
if (rr_stack == NULL) | |||||
return (FLM_REBUILD); | |||||
for (int i = 0; i < num_items; i++) { | |||||
struct bsearch4_record *rib_entry = &bd->rr[i]; | |||||
bool diverged = false; | |||||
while (stack_off > 0) { | |||||
struct bsearch4_record *pst = &rr_stack[stack_off - 1]; | |||||
/* | |||||
* Check if we need to pop stack. | |||||
* Rely on the ordering - larger prefixes comes up first | |||||
*/ | |||||
bool match = pst->addr4 == (rib_entry->addr4 & pst->mask4); | |||||
if (match && !diverged) | |||||
break; | |||||
if (!close_stack(bd, pst)) | |||||
return (FLM_REBUILD); | |||||
if (!match) { | |||||
stack_off--; | |||||
diverged = true; | |||||
} else | |||||
break; | |||||
} | |||||
if (bd->num_items >= bd->alloc_items) | |||||
return (FLM_REBUILD); | |||||
br = &bd->br[bd->num_items++]; | |||||
br->addr4 = rib_entry->addr4; | |||||
br->mask4= rib_entry->mask4; | |||||
br->nh = rib_entry->nh; | |||||
rr_stack[stack_off++] = *rib_entry; | |||||
} | |||||
while (stack_off > 0) { | |||||
close_stack(bd, &rr_stack[stack_off - 1]); | |||||
stack_off--; | |||||
} | |||||
return (FLM_SUCCESS); | |||||
} | |||||
static enum flm_op_result | |||||
bsearch4_build(struct bsearch4_data *bd) | |||||
{ | |||||
enum flm_op_result ret; | |||||
int num_items; | |||||
/* Add default route if not exists */ | |||||
bool default_found = false; | |||||
for (int i = 0; i < bd->num_items; i++) { | |||||
if (bd->rr[i].mask4 == 0) { | |||||
default_found = true; | |||||
break; | |||||
} | |||||
} | |||||
if (!default_found) { | |||||
if (bd->num_items >= bd->alloc_items) | |||||
return (FLM_REBUILD); | |||||
/* Add default route with NULL nhop */ | |||||
bd->num_items++; | |||||
} | |||||
/* Sort prefixes */ | |||||
qsort(bd->rr, bd->num_items, sizeof(struct bsearch4_record), rr_cmp); | |||||
num_items = bd->num_items; | |||||
bd->num_items = 0; | |||||
ret = bsearch4_build_array(bd, num_items); | |||||
free(bd->rr, M_TEMP); | |||||
bd->rr = NULL; | |||||
return (ret); | |||||
} | |||||
static enum flm_op_result | |||||
bsearch4_end_dump(void *_data, struct fib_dp *dp) | |||||
{ | |||||
struct bsearch4_data *bd = (struct bsearch4_data *)_data; | |||||
enum flm_op_result ret; | |||||
ret = bsearch4_build(bd); | |||||
if (ret == FLM_SUCCESS) { | |||||
dp->f = bsearch4_lookup; | |||||
dp->arg = bd; | |||||
} | |||||
return (ret); | |||||
} | |||||
static enum flm_op_result | |||||
bsearch4_change_cb(struct rib_head *rnh, struct rib_cmd_info *rc, | |||||
void *_data) | |||||
{ | |||||
return (FLM_REBUILD); | |||||
} | |||||
struct fib_lookup_module flm_bsearch4= { | |||||
.flm_name = "bsearch4", | |||||
.flm_family = AF_INET, | |||||
.flm_init_cb = bsearch4_init, | |||||
.flm_destroy_cb = bsearch4_destroy, | |||||
.flm_dump_rib_item_cb = bsearch4_add_route_cb, | |||||
.flm_dump_end_cb = bsearch4_end_dump, | |||||
.flm_change_rib_item_cb = bsearch4_change_cb, | |||||
.flm_get_pref = bsearch4_get_pref, | |||||
}; | |||||
#define KEY_LEN_INET (offsetof(struct sockaddr_in, sin_addr) + sizeof(in_addr_t)) | |||||
#define OFF_LEN_INET (8 * offsetof(struct sockaddr_in, sin_addr)) | |||||
struct radix4_addr_entry { | |||||
struct radix_node rn[2]; | |||||
struct sockaddr_in addr; | |||||
struct nhop_object *nhop; | |||||
}; | |||||
#define LRADIX4_ITEM_SZ roundup2(sizeof(struct radix4_addr_entry), 64) | |||||
struct lradix4_data { | |||||
struct radix_node_head *rnh; | |||||
struct fib_data *fd; | |||||
void *mem; | |||||
uint32_t alloc_items; | |||||
uint32_t num_items; | |||||
}; | |||||
static struct nhop_object * | |||||
lradix4_lookup(void *algo_data, const struct flm_lookup_key key, uint32_t scopeid) | |||||
{ | |||||
struct radix_node_head *rnh = (struct radix_node_head *)algo_data; | |||||
struct radix4_addr_entry *ent; | |||||
struct sockaddr_in addr4 = { | |||||
.sin_len = KEY_LEN_INET, | |||||
.sin_addr = key.addr4, | |||||
}; | |||||
ent = (struct radix4_addr_entry *)(rnh->rnh_matchaddr(&addr4, &rnh->rh)); | |||||
if (ent != NULL) | |||||
return (ent->nhop); | |||||
return (NULL); | |||||
} | |||||
static uint8_t | |||||
lradix4_get_pref(const struct rib_rtable_info *rinfo) | |||||
{ | |||||
if (rinfo->num_prefixes < 10) | |||||
return (250); | |||||
else if (rinfo->num_prefixes < 1000) | |||||
return (254 - rinfo->num_prefixes / 4); | |||||
else | |||||
return (1); | |||||
} | |||||
static enum flm_op_result | |||||
lradix4_init(uint32_t fibnum, struct fib_data *fd, void *_old_data, void **_data) | |||||
{ | |||||
struct lradix4_data *lr; | |||||
struct rib_rtable_info rinfo; | |||||
uint32_t count; | |||||
lr = malloc(sizeof(struct lradix4_data), M_RTABLE, M_NOWAIT | M_ZERO); | |||||
if (lr == NULL || !rn_inithead((void **)&lr->rnh, OFF_LEN_INET)) | |||||
return (FLM_REBUILD); | |||||
fib_get_rtable_info(fib_get_rh(fd), &rinfo); | |||||
count = rinfo.num_prefixes * 11 / 10; | |||||
// XXX: alignment! | |||||
lr->mem = malloc(count * LRADIX4_ITEM_SZ, M_RTABLE, M_NOWAIT | M_ZERO); | |||||
if (lr->mem == NULL) | |||||
return (FLM_REBUILD); | |||||
lr->alloc_items = count; | |||||
lr->fd = fd; | |||||
*_data = lr; | |||||
return (FLM_SUCCESS); | |||||
} | |||||
static void | |||||
lradix4_destroy(void *_data) | |||||
{ | |||||
struct lradix4_data *lr = (struct lradix4_data *)_data; | |||||
if (lr->rnh != NULL) | |||||
rn_detachhead((void **)&lr->rnh); | |||||
if (lr->mem != NULL) | |||||
free(lr->mem, M_RTABLE); | |||||
free(lr, M_RTABLE); | |||||
} | |||||
static enum flm_op_result | |||||
lradix4_add_route_cb(struct rtentry *rt, void *_data) | |||||
{ | |||||
struct lradix4_data *lr = (struct lradix4_data *)_data; | |||||
struct radix4_addr_entry *ae; | |||||
struct sockaddr_in mask; | |||||
struct sockaddr *rt_mask = NULL; | |||||
struct radix_node *rn; | |||||
struct in_addr addr4, mask4; | |||||
uint32_t scopeid; | |||||
if (lr->num_items >= lr->alloc_items) | |||||
return (FLM_REBUILD); | |||||
ae = (struct radix4_addr_entry *)((char *)lr->mem + lr->num_items * LRADIX4_ITEM_SZ); | |||||
lr->num_items++; | |||||
ae->nhop = rt_get_raw_nhop(rt); | |||||
rt_get_inet_prefix_pmask(rt, &addr4, &mask4, &scopeid); | |||||
ae->addr.sin_len = KEY_LEN_INET; | |||||
ae->addr.sin_addr = addr4; | |||||
if (mask4.s_addr != INADDR_ANY) { | |||||
bzero(&mask, sizeof(mask)); | |||||
mask.sin_len = KEY_LEN_INET; | |||||
mask.sin_addr = mask4; | |||||
rt_mask = (struct sockaddr *)&mask; | |||||
} | |||||
rn = lr->rnh->rnh_addaddr((struct sockaddr *)&ae->addr, rt_mask, | |||||
&lr->rnh->rh, ae->rn); | |||||
if (rn == NULL) | |||||
return (FLM_REBUILD); | |||||
return (FLM_SUCCESS); | |||||
} | |||||
static enum flm_op_result | |||||
lradix4_end_dump(void *_data, struct fib_dp *dp) | |||||
{ | |||||
struct lradix4_data *lr = (struct lradix4_data *)_data; | |||||
dp->f = lradix4_lookup; | |||||
dp->arg = lr->rnh; | |||||
return (FLM_SUCCESS); | |||||
} | |||||
static enum flm_op_result | |||||
lradix4_change_cb(struct rib_head *rnh, struct rib_cmd_info *rc, | |||||
void *_data) | |||||
{ | |||||
return (FLM_REBUILD); | |||||
} | |||||
struct fib_lookup_module flm_radix4_lockless = { | |||||
.flm_name = "radix4_lockless", | |||||
.flm_family = AF_INET, | |||||
.flm_init_cb = lradix4_init, | |||||
.flm_destroy_cb = lradix4_destroy, | |||||
.flm_dump_rib_item_cb = lradix4_add_route_cb, | |||||
.flm_dump_end_cb = lradix4_end_dump, | |||||
.flm_change_rib_item_cb = lradix4_change_cb, | |||||
.flm_get_pref = lradix4_get_pref, | |||||
}; | |||||
struct radix4_data { | |||||
struct fib_data *fd; | |||||
struct rib_head *rh; | |||||
}; | |||||
static struct nhop_object * | |||||
radix4_lookup(void *algo_data, const struct flm_lookup_key key, uint32_t scopeid) | |||||
{ | |||||
RIB_RLOCK_TRACKER; | |||||
struct rib_head *rh = (struct rib_head *)algo_data; | |||||
struct radix_node *rn; | |||||
struct nhop_object *nh; | |||||
/* Prepare lookup key */ | |||||
struct sockaddr_in sin4 = { | |||||
.sin_family = AF_INET, | |||||
.sin_len = sizeof(struct sockaddr_in), | |||||
.sin_addr = key.addr4, | |||||
}; | |||||
nh = NULL; | |||||
RIB_RLOCK(rh); | |||||
rn = rh->rnh_matchaddr((void *)&sin4, &rh->head); | |||||
if (rn != NULL && ((rn->rn_flags & RNF_ROOT) == 0)) | |||||
nh = (RNTORT(rn))->rt_nhop; | |||||
RIB_RUNLOCK(rh); | |||||
return (nh); | |||||
} | |||||
static uint8_t | |||||
radix4_get_pref(const struct rib_rtable_info *rinfo) | |||||
{ | |||||
return (50); | |||||
} | |||||
static enum flm_op_result | |||||
radix4_init(uint32_t fibnum, struct fib_data *fd, void *_old_data, void **_data) | |||||
{ | |||||
struct radix4_data *r4; | |||||
r4 = malloc(sizeof(struct radix4_data), M_RTABLE, M_NOWAIT | M_ZERO); | |||||
if (r4 == NULL) | |||||
return (FLM_REBUILD); | |||||
r4->fd = fd; | |||||
r4->rh = fib_get_rh(fd); | |||||
*_data = r4; | |||||
return (FLM_SUCCESS); | |||||
} | |||||
static void | |||||
radix4_destroy(void *_data) | |||||
{ | |||||
free(_data, M_RTABLE); | |||||
} | |||||
static enum flm_op_result | |||||
radix4_add_route_cb(struct rtentry *rt, void *_data) | |||||
{ | |||||
return (FLM_SUCCESS); | |||||
} | |||||
static enum flm_op_result | |||||
radix4_end_dump(void *_data, struct fib_dp *dp) | |||||
{ | |||||
struct radix4_data *r4 = (struct radix4_data *)_data; | |||||
dp->f = radix4_lookup; | |||||
dp->arg = r4->rh; | |||||
return (FLM_SUCCESS); | |||||
} | |||||
static enum flm_op_result | |||||
radix4_change_cb(struct rib_head *rnh, struct rib_cmd_info *rc, | |||||
void *_data) | |||||
{ | |||||
return (FLM_SUCCESS); | |||||
} | |||||
struct fib_lookup_module flm_radix4 = { | |||||
.flm_name = "radix4", | |||||
.flm_family = AF_INET, | |||||
.flm_init_cb = radix4_init, | |||||
.flm_destroy_cb = radix4_destroy, | |||||
.flm_dump_rib_item_cb = radix4_add_route_cb, | |||||
.flm_dump_end_cb = radix4_end_dump, | |||||
.flm_change_rib_item_cb = radix4_change_cb, | |||||
.flm_get_pref = radix4_get_pref, | |||||
}; | |||||
static void | |||||
fib4_algo_init(void) | |||||
{ | |||||
fib_module_register(&flm_bsearch4); | |||||
fib_module_register(&flm_radix4_lockless); | |||||
fib_module_register(&flm_radix4); | |||||
} | |||||
SYSINIT(fib4_algo_init, SI_SUB_PROTO_DOMAIN, SI_ORDER_THIRD, fib4_algo_init, NULL); |