Changeset View
Changeset View
Standalone View
Standalone View
sys/contrib/dpdk_rte_lpm/dpdk_lpm6.c
- This file was added.
/*- | |||||
* SPDX-License-Identifier: BSD-2-Clause-FreeBSD | |||||
* | |||||
* Copyright (c) 2020 Alexander V. Chernikov | |||||
* | |||||
* Redistribution and use in source and binary forms, with or without | |||||
* modification, are permitted provided that the following conditions | |||||
* are met: | |||||
* 1. Redistributions of source code must retain the above copyright | |||||
* notice, this list of conditions and the following disclaimer. | |||||
* 2. Redistributions in binary form must reproduce the above copyright | |||||
* notice, this list of conditions and the following disclaimer in the | |||||
* documentation and/or other materials provided with the distribution. | |||||
* | |||||
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND | |||||
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE | |||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE | |||||
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE | |||||
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL | |||||
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS | |||||
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) | |||||
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT | |||||
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY | |||||
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF | |||||
* SUCH DAMAGE. | |||||
*/ | |||||
#include <sys/cdefs.h> | |||||
__FBSDID("$FreeBSD$"); | |||||
#include <sys/param.h> | |||||
#include <sys/kernel.h> | |||||
#include <sys/lock.h> | |||||
#include <sys/rmlock.h> | |||||
#include <sys/malloc.h> | |||||
#include <sys/module.h> | |||||
#include <sys/kernel.h> | |||||
#include <sys/socket.h> | |||||
#include <sys/sysctl.h> | |||||
#include <net/vnet.h> | |||||
#include <net/if.h> | |||||
#include <net/if_var.h> | |||||
#include <netinet/in.h> | |||||
#include <netinet/ip.h> | |||||
#include <netinet/ip6.h> | |||||
#include <netinet6/ip6_var.h> | |||||
#include <netinet6/in6_fib.h> | |||||
#include <net/route.h> | |||||
#include <net/route/nhop.h> | |||||
#include <net/route/route_ctl.h> | |||||
#include <net/route/route_var.h> | |||||
#include <net/route/route_algo.h> | |||||
#define RTDEBUG | |||||
#include "rte_lpm6.h" | |||||
#define LPM6_MIN_TBL8 8 /* 2 pages of memory */ | |||||
#define LPM6_MAX_TBL8 65536 * 16 /* 256M */ | |||||
struct fib_algo_calldata { | |||||
void *lookup; | |||||
void *arg; | |||||
}; | |||||
struct dpdk_lpm6_data { | |||||
struct rte_lpm6 *lpm6; | |||||
uint32_t number_tbl8s; | |||||
uint64_t routes_added; | |||||
uint64_t routes_failed; | |||||
uint32_t fibnum; | |||||
uint8_t hit_tables; | |||||
struct fib_data *fd; | |||||
}; | |||||
static struct nhop_object * | |||||
lookup_ptr_ll(const struct rte_lpm6 *lpm6, const struct in6_addr *dst6, | |||||
uint32_t scopeid) | |||||
{ | |||||
const struct rte_lpm6_external *rte_ext; | |||||
struct nhop_object *nh = NULL; | |||||
struct sockaddr_in6 sin6; | |||||
struct rib_head *rh; | |||||
struct radix_node *rn; | |||||
RIB_RLOCK_TRACKER; | |||||
memset(&sin6, 0, sizeof(sin6)); | |||||
sin6.sin6_len = sizeof(struct sockaddr_in6); | |||||
sin6.sin6_addr = *dst6; | |||||
/* Assume scopeid is valid and embed it directly */ | |||||
sin6.sin6_addr.s6_addr16[1] = htons(scopeid & 0xffff); | |||||
rte_ext = (const struct rte_lpm6_external *)lpm6; | |||||
rh = rt_tables_get_rnh(rte_ext->fibnum, AF_INET6); | |||||
if (rh == NULL) | |||||
return (NULL); | |||||
RIB_RLOCK(rh); | |||||
rn = rh->rnh_matchaddr((void *)&sin6, &rh->head); | |||||
if (rn != NULL && ((rn->rn_flags & RNF_ROOT) == 0)) | |||||
nh = RNTORT(rn)->rt_nhop; | |||||
RIB_RUNLOCK(rh); | |||||
//TODO: check LL nhops refcounting | |||||
return (nh); | |||||
} | |||||
/* | |||||
* Main datapath routing | |||||
*/ | |||||
static struct nhop_object * | |||||
lookup_ptr(void *algo_data, const struct flm_lookup_key key, uint32_t scopeid) | |||||
{ | |||||
const struct rte_lpm6 *lpm6; | |||||
const struct rte_lpm6_external *rte_ext; | |||||
const struct in6_addr *addr6; | |||||
uint32_t nhidx = 0; | |||||
int ret; | |||||
lpm6 = (const struct rte_lpm6 *)algo_data; | |||||
addr6 = key.addr6; | |||||
rte_ext = (const struct rte_lpm6_external *)lpm6; | |||||
if (!IN6_IS_SCOPE_LINKLOCAL(addr6)) { | |||||
ret = rte_lpm6_lookup(lpm6, (const uint8_t *)addr6, &nhidx); | |||||
if (ret == 0) { | |||||
/* Success! */ | |||||
return (rte_ext->nh_idx[nhidx]); | |||||
} else { | |||||
/* Not found. Check default route */ | |||||
if (rte_ext->default_idx > 0) | |||||
return (rte_ext->nh_idx[rte_ext->default_idx]); | |||||
else | |||||
return (NULL); | |||||
} | |||||
} else { | |||||
/* LL */ | |||||
return (lookup_ptr_ll(lpm6, addr6, scopeid)); | |||||
} | |||||
} | |||||
static uint8_t | |||||
rte6_get_pref(const struct rib_rtable_info *rinfo) | |||||
{ | |||||
if (rinfo->num_prefixes < 10) | |||||
return (1); | |||||
else if (rinfo->num_prefixes < 1000) | |||||
return (rinfo->num_prefixes / 10); | |||||
else if (rinfo->num_prefixes < 500000) | |||||
return (100 + rinfo->num_prefixes / 3334); | |||||
else | |||||
return (250); | |||||
} | |||||
static int | |||||
contigmask(const uint8_t *p, int len) | |||||
{ | |||||
int i, n; | |||||
for (i = 0; i < len ; i++) | |||||
if ( (p[i/8] & (1 << (7 - (i%8)))) == 0) /* first bit unset */ | |||||
break; | |||||
for (n= i + 1; n < len; n++) | |||||
if ( (p[n/8] & (1 << (7 - (n % 8)))) != 0) | |||||
return (-1); /* mask not contiguous */ | |||||
return (i); | |||||
} | |||||
static uint8_t | |||||
rt_get_plen(const struct rtentry *rt) | |||||
{ | |||||
const struct sockaddr *sa; | |||||
int family; | |||||
int plen; | |||||
sa = rt_key_const(rt); | |||||
family = sa->sa_family; | |||||
sa = rt_mask_const(rt); | |||||
switch (family) { | |||||
case AF_INET: | |||||
if (sa != NULL) { | |||||
const struct in_addr *addr4; | |||||
addr4 = &((const struct sockaddr_in *)sa)->sin_addr; | |||||
plen = contigmask((const uint8_t *)addr4, 32); | |||||
if (plen == -1) | |||||
plen = 0; | |||||
} else | |||||
plen = 32; | |||||
break; | |||||
case AF_INET6: | |||||
if (sa != NULL) { | |||||
const struct in6_addr *addr6; | |||||
addr6 = &((const struct sockaddr_in6 *)sa)->sin6_addr; | |||||
plen = contigmask((const uint8_t *)addr6, 128); | |||||
if (plen == -1) | |||||
plen = 0; | |||||
} else | |||||
plen = 128; | |||||
break; | |||||
default: | |||||
plen = 0; | |||||
} | |||||
return (plen); | |||||
} | |||||
static enum flm_op_result | |||||
handle_default_change(struct dpdk_lpm6_data *dd, struct rib_cmd_info *rc) | |||||
{ | |||||
struct rte_lpm6_external *rte_ext; | |||||
rte_ext = (struct rte_lpm6_external *)dd->lpm6; | |||||
uint32_t old_nhidx = rte_ext->default_idx; | |||||
if (rc->rc_cmd != RTM_DELETE) { | |||||
/* Reference new */ | |||||
uint32_t nhidx = fib_get_nhop_idx(dd->fd, rc->rc_nh_new); | |||||
if (nhidx == 0) | |||||
return (FLM_REBUILD); | |||||
rte_ext->default_idx = nhidx; | |||||
} else { | |||||
/* No default route */ | |||||
rte_ext->default_idx = 0; | |||||
} | |||||
if (old_nhidx != 0) | |||||
fib_free_nhop_idx(dd->fd, old_nhidx); | |||||
return (FLM_SUCCESS); | |||||
} | |||||
static enum flm_op_result | |||||
handle_ll_change(struct dpdk_lpm6_data *dd, struct rib_cmd_info *rc, | |||||
const struct sockaddr_in6 *sin6, int plen) | |||||
{ | |||||
return (FLM_SUCCESS); | |||||
} | |||||
static struct rte_lpm6_rule * | |||||
pack_parent_rule(struct dpdk_lpm6_data *dd, const struct in6_addr *addr6, | |||||
char *buffer) | |||||
{ | |||||
struct rte_lpm6_rule *lsp_rule = NULL; | |||||
struct route_nhop_data rnd; | |||||
struct rtentry *rt; | |||||
int plen; | |||||
rt = fib6_lookup_rt(dd->fibnum, addr6, 0, NHR_UNLOCKED, &rnd); | |||||
/* plen = 0 means default route and it's out of scope */ | |||||
if ((rt != NULL) && (plen = rt_get_plen(rt)) > 0) { | |||||
uint32_t nhidx = fib_get_nhop_idx(dd->fd, rnd.rnd_nhop); | |||||
if (nhidx == 0) { | |||||
/* | |||||
* shouldn't happen as we already have parent route. | |||||
* It will trigger rebuild automatically. | |||||
*/ | |||||
return (NULL); | |||||
} | |||||
const struct sockaddr_in6 *psin6; | |||||
const uint8_t *uaddr6; | |||||
psin6 = (const struct sockaddr_in6 *)rt_key_const(rt); | |||||
uaddr6 = (const uint8_t *)&psin6->sin6_addr; | |||||
lsp_rule = fill_rule6(buffer, uaddr6, plen, nhidx); | |||||
} | |||||
return (lsp_rule); | |||||
} | |||||
static enum flm_op_result | |||||
handle_gu_change(struct dpdk_lpm6_data *dd, const struct rib_cmd_info *rc, | |||||
const struct in6_addr *addr6, int plen) | |||||
{ | |||||
uint32_t nhidx = 0; | |||||
int ret; | |||||
char abuf[INET6_ADDRSTRLEN]; | |||||
inet_ntop(AF_INET6, addr6, abuf, sizeof(abuf)); | |||||
/* So we get sin6, plen and nhidx */ | |||||
if (rc->rc_cmd != RTM_DELETE) { | |||||
/* | |||||
* Addition or change. Save nhop in the internal table | |||||
* and get index. | |||||
*/ | |||||
nhidx = fib_get_nhop_idx(dd->fd, rc->rc_nh_new); | |||||
if (nhidx == 0) { | |||||
DPRINTF("FCK: nhop limit reached, need rebuild"); | |||||
return (FLM_REBUILD); | |||||
} | |||||
ret = rte_lpm6_add(dd->lpm6, (const uint8_t *)addr6, | |||||
plen, nhidx, (rc->rc_cmd == RTM_ADD) ? 1 : 0); | |||||
DPRINTF("DPDK GU: %s %s/%d nhop %u = %d", (rc->rc_cmd == RTM_ADD) ? "ADD" : "UPDATE", | |||||
abuf, plen, nhidx, ret); | |||||
} else { | |||||
/* | |||||
* Need to lookup parent. Assume deletion happened already | |||||
*/ | |||||
char buffer[RTE_LPM6_RULE_SIZE]; | |||||
struct rte_lpm6_rule *lsp_rule = NULL; | |||||
const struct sockaddr_in6 *dst6; | |||||
dst6 = (const struct sockaddr_in6 *)rt_key_const(rc->rc_rt); | |||||
lsp_rule = pack_parent_rule(dd, &dst6->sin6_addr, buffer); | |||||
ret = rte_lpm6_delete(dd->lpm6, (const uint8_t *)addr6, plen, lsp_rule); | |||||
DPRINTF("DPDK GU: %s %s/%d nhop %u = %d", "DEL", abuf, plen, nhidx, ret); | |||||
} | |||||
if (rc->rc_nh_old != NULL) | |||||
fib_free_nhop(dd->fd, rc->rc_nh_old); | |||||
if (ret != 0) { | |||||
DPRINTF("error: %d", ret); | |||||
if (ret == -EOVERFLOW) | |||||
return (FLM_REBUILD); | |||||
return (FLM_ERROR); | |||||
} | |||||
return (FLM_SUCCESS); | |||||
} | |||||
static enum flm_op_result | |||||
handle_any_change(struct dpdk_lpm6_data *dd, struct rib_cmd_info *rc) | |||||
{ | |||||
const struct sockaddr_in6 *sin6; | |||||
int plen = rt_get_plen(rc->rc_rt); | |||||
enum flm_op_result ret; | |||||
sin6 = (const struct sockaddr_in6 *)rt_key_const(rc->rc_rt); | |||||
if (IN6_IS_SCOPE_LINKLOCAL(&sin6->sin6_addr)) | |||||
ret = handle_ll_change(dd, rc, sin6, plen); | |||||
else if (plen == 0) | |||||
ret = handle_default_change(dd, rc); | |||||
else | |||||
ret = handle_gu_change(dd, rc, &sin6->sin6_addr, plen); | |||||
if (ret != 0) | |||||
DPRINTF("error handling route"); | |||||
return (ret); | |||||
} | |||||
static enum flm_op_result | |||||
handle_rtable_change_cb(struct rib_head *rnh, struct rib_cmd_info *rc, | |||||
void *_data) | |||||
{ | |||||
struct dpdk_lpm6_data *dd; | |||||
dd = (struct dpdk_lpm6_data *)_data; | |||||
return (handle_any_change(dd, rc)); | |||||
} | |||||
static void | |||||
destroy_dd(struct dpdk_lpm6_data *dd) | |||||
{ | |||||
DPRINTF("destroy dd %p", dd); | |||||
if (dd->lpm6 != NULL) | |||||
rte_lpm6_free(dd->lpm6); | |||||
free(dd, M_TEMP); | |||||
} | |||||
static void | |||||
destroy_table(void *_data) | |||||
{ | |||||
destroy_dd((struct dpdk_lpm6_data *)_data); | |||||
} | |||||
static enum flm_op_result | |||||
add_route_cb(struct rtentry *rt, void *_data) | |||||
{ | |||||
struct dpdk_lpm6_data *dd = (struct dpdk_lpm6_data *)_data; | |||||
const struct sockaddr_in6 *sin6; | |||||
int plen = rt_get_plen(rt); | |||||
int ret; | |||||
sin6 = (const struct sockaddr_in6 *)rt_key_const(rt); | |||||
char abuf[INET6_ADDRSTRLEN]; | |||||
char mbuf[INET6_ADDRSTRLEN]; | |||||
inet_ntop(AF_INET6, &sin6->sin6_addr, abuf, sizeof(abuf)); | |||||
const struct sockaddr_in6 *mask6; | |||||
mask6 = (const struct sockaddr_in6 *)rt_mask_const(rt); | |||||
if (mask6 != NULL) { | |||||
inet_ntop(AF_INET6, &mask6->sin6_addr, mbuf, sizeof(abuf)); | |||||
} else | |||||
mbuf[0] = '\0'; | |||||
DPRINTF("Operating on %s/%d [%s]", abuf, plen, mbuf); | |||||
if (plen == 0) { | |||||
struct rib_cmd_info rc; | |||||
bzero(&rc, sizeof(rc)); | |||||
rc.rc_cmd = RTM_ADD; | |||||
rc.rc_nh_new = rt->rt_nhop; | |||||
DPRINTF("Adding default route"); | |||||
return (handle_default_change(dd, &rc)); | |||||
} | |||||
uint32_t nhidx = fib_get_nhop_idx(dd->fd, rt->rt_nhop); | |||||
if (nhidx == 0) { | |||||
DPRINTF("unable to get nhop index"); | |||||
return (FLM_REBUILD); | |||||
} | |||||
ret = rte_lpm6_add(dd->lpm6, (const uint8_t *)&sin6->sin6_addr, plen, | |||||
nhidx, 1); | |||||
DPRINTF("ADD %p %s/%d nh %u = %d", dd->lpm6, abuf, plen, nhidx, ret); | |||||
if (ret != 0) { | |||||
DPRINTF("rte_lpm6_add() returned %d", ret); | |||||
if (ret == -ENOSPC) { | |||||
dd->hit_tables = 1; | |||||
return (FLM_REBUILD); | |||||
} | |||||
dd->routes_failed++; | |||||
} else | |||||
dd->routes_added++; | |||||
return (FLM_SUCCESS); | |||||
} | |||||
static enum flm_op_result | |||||
check_dump_success(void *_data, struct fib_dp *dp) | |||||
{ | |||||
struct dpdk_lpm6_data *dd; | |||||
dd = (struct dpdk_lpm6_data *)_data; | |||||
DPRINTF("scan completed. added: %zu failed: %zu", | |||||
dd->routes_added, dd->routes_failed); | |||||
if (dd->hit_tables || dd->routes_failed > 0) | |||||
return (FLM_REBUILD); | |||||
DPRINTF("DPDK lookup engine synced with IPv6 RIB id %u, %zu routes", | |||||
dd->fibnum, dd->routes_added); | |||||
dp->f = lookup_ptr; | |||||
dp->arg = dd->lpm6; | |||||
return (FLM_SUCCESS); | |||||
} | |||||
static void | |||||
estimate_scale(const struct dpdk_lpm6_data *dd_src, struct dpdk_lpm6_data *dd) | |||||
{ | |||||
/* XXX: update at 75% capacity */ | |||||
if (dd_src->hit_tables) | |||||
dd->number_tbl8s = dd_src->number_tbl8s * 2; | |||||
else | |||||
dd->number_tbl8s = dd_src->number_tbl8s; | |||||
/* TODO: look into the appropriate RIB to adjust */ | |||||
} | |||||
static struct dpdk_lpm6_data * | |||||
build_table(struct dpdk_lpm6_data *dd_prev) | |||||
{ | |||||
struct dpdk_lpm6_data *dd; | |||||
struct rte_lpm6 *lpm6; | |||||
dd = malloc(sizeof(struct dpdk_lpm6_data), M_TEMP, M_NOWAIT | M_ZERO); | |||||
if (dd == NULL) { | |||||
DPRINTF("Unable to allocate base datastructure"); | |||||
return (NULL); | |||||
} | |||||
dd->fibnum = dd_prev->fibnum; | |||||
dd->fd = dd_prev->fd; | |||||
estimate_scale(dd_prev, dd); | |||||
struct rte_lpm6_config cfg = {.number_tbl8s = dd->number_tbl8s}; | |||||
lpm6 = rte_lpm6_create("test", 0, &cfg); | |||||
if (lpm6 == NULL) { | |||||
DPRINTF("unable to create lpm6"); | |||||
free(dd, M_TEMP); | |||||
return (NULL); | |||||
} | |||||
dd->lpm6 = lpm6; | |||||
struct rte_lpm6_external *ext = (struct rte_lpm6_external *)lpm6; | |||||
ext->nh_idx = fib_get_nhop_array(dd->fd); | |||||
DPRINTF("allocated %u tbl8s", dd->number_tbl8s); | |||||
return (dd); | |||||
} | |||||
static enum flm_op_result | |||||
init_table(uint32_t fibnum, struct fib_data *fd, void *_old_data, void **data) | |||||
{ | |||||
struct dpdk_lpm6_data *dd, dd_base; | |||||
if (_old_data == NULL) { | |||||
bzero(&dd_base, sizeof(struct dpdk_lpm6_data)); | |||||
dd_base.fibnum = fibnum; | |||||
dd_base.fd = fd; | |||||
/* TODO: get rib statistics */ | |||||
dd_base.number_tbl8s = LPM6_MIN_TBL8; | |||||
dd = &dd_base; | |||||
} else { | |||||
DPRINTF("Starting with old data"); | |||||
dd = (struct dpdk_lpm6_data *)_old_data; | |||||
} | |||||
/* Guaranteed to be in epoch */ | |||||
dd = build_table(dd); | |||||
if (dd == NULL) { | |||||
DPRINTF("table creation failed"); | |||||
return (FLM_REBUILD); | |||||
} | |||||
*data = dd; | |||||
return (FLM_SUCCESS); | |||||
} | |||||
static struct fib_lookup_module dpdk_lpm6 = { | |||||
.flm_name = "dpdk_lpm6", | |||||
.flm_family = AF_INET6, | |||||
.flm_init_cb = init_table, | |||||
.flm_destroy_cb = destroy_table, | |||||
.flm_dump_rib_item_cb = add_route_cb, | |||||
.flm_dump_end_cb = check_dump_success, | |||||
.flm_change_rib_item_cb = handle_rtable_change_cb, | |||||
.flm_get_pref = rte6_get_pref, | |||||
}; | |||||
static int | |||||
lpm6_modevent(module_t mod, int type, void *unused) | |||||
{ | |||||
int error = 0; | |||||
switch (type) { | |||||
case MOD_LOAD: | |||||
fib_module_register(&dpdk_lpm6); | |||||
break; | |||||
case MOD_UNLOAD: | |||||
error = fib_module_unregister(&dpdk_lpm6); | |||||
break; | |||||
default: | |||||
error = EOPNOTSUPP; | |||||
break; | |||||
} | |||||
return (error); | |||||
} | |||||
static moduledata_t lpm6mod = { | |||||
"dpdk_lpm6", | |||||
lpm6_modevent, | |||||
0 | |||||
}; | |||||
DECLARE_MODULE(lpm6mod, lpm6mod, SI_SUB_PSEUDO, SI_ORDER_ANY); | |||||
MODULE_VERSION(lpm6mod, 1); |