diff --git a/sys/contrib/dpdk_rte_lpm/dpdk_lpm.c b/sys/contrib/dpdk_rte_lpm/dpdk_lpm.c --- a/sys/contrib/dpdk_rte_lpm/dpdk_lpm.c +++ b/sys/contrib/dpdk_rte_lpm/dpdk_lpm.c @@ -134,26 +134,27 @@ } static void -get_parent_rule(struct dpdk_lpm_data *dd, struct in_addr addr, uint8_t *plen, uint32_t *nhop_idx) +get_parent_rule(struct dpdk_lpm_data *dd, struct in_addr addr, int plen, + uint8_t *pplen, uint32_t *nhop_idx) { - struct route_nhop_data rnd; struct rtentry *rt; - rt = fib4_lookup_rt(dd->fibnum, addr, 0, NHR_UNLOCKED, &rnd); + rt = rt_get_inet_parent(dd->fibnum, addr, plen); if (rt != NULL) { struct in_addr addr4; uint32_t scopeid; - int inet_plen; - rt_get_inet_prefix_plen(rt, &addr4, &inet_plen, &scopeid); - if (inet_plen > 0) { - *plen = inet_plen; - *nhop_idx = fib_get_nhop_idx(dd->fd, rnd.rnd_nhop); + int parent_plen; + + rt_get_inet_prefix_plen(rt, &addr4, &parent_plen, &scopeid); + if (parent_plen > 0) { + *pplen = parent_plen; + *nhop_idx = fib_get_nhop_idx(dd->fd, rt_get_raw_nhop(rt)); return; } } *nhop_idx = 0; - *plen = 0; + *pplen = 0; } static enum flm_op_result @@ -181,20 +182,23 @@ } ret = rte_lpm_add(dd->lpm, ip, plen, nhidx); - FIB_PRINTF(LOG_DEBUG, dd->fd, "DPDK GU: %s %s/%d nhop %u = %d", + FIB_PRINTF(LOG_DEBUG, dd->fd, "DPDK GU: %s %s/%d nhop %u -> %u ret: %d", (rc->rc_cmd == RTM_ADD) ? "ADD" : "UPDATE", - abuf, plen, nhidx, ret); + abuf, plen, + rc->rc_nh_old != NULL ? fib_get_nhop_idx(dd->fd, rc->rc_nh_old) : 0, + nhidx, ret); } else { /* * Need to lookup parent. Assume deletion happened already */ uint8_t parent_plen; uint32_t parent_nhop_idx; - get_parent_rule(dd, addr, &parent_plen, &parent_nhop_idx); + get_parent_rule(dd, addr, plen, &parent_plen, &parent_nhop_idx); ret = rte_lpm_delete(dd->lpm, ip, plen, parent_plen, parent_nhop_idx); - FIB_PRINTF(LOG_DEBUG, dd->fd, "DPDK: %s %s/%d nhop %u = %d", - "DEL", abuf, plen, nhidx, ret); + FIB_PRINTF(LOG_DEBUG, dd->fd, "DPDK: %s %s/%d -> /%d nhop %u -> %u ret: %d", + "DEL", abuf, plen, parent_plen, fib_get_nhop_idx(dd->fd, rc->rc_nh_old), + parent_nhop_idx, ret); } if (ret != 0) { diff --git a/sys/contrib/dpdk_rte_lpm/dpdk_lpm6.c b/sys/contrib/dpdk_rte_lpm/dpdk_lpm6.c --- a/sys/contrib/dpdk_rte_lpm/dpdk_lpm6.c +++ b/sys/contrib/dpdk_rte_lpm/dpdk_lpm6.c @@ -165,30 +165,26 @@ } static struct rte_lpm6_rule * -pack_parent_rule(struct dpdk_lpm6_data *dd, const struct in6_addr *addr6, - char *buffer) +pack_parent_rule(struct dpdk_lpm6_data *dd, const struct in6_addr *addr6, int plen, + int *pplen, uint32_t *pnhop_idx, char *buffer) { struct rte_lpm6_rule *lsp_rule = NULL; - struct route_nhop_data rnd; struct rtentry *rt; - int plen; - rt = fib6_lookup_rt(dd->fibnum, addr6, 0, NHR_UNLOCKED, &rnd); + *pnhop_idx = 0; + *pplen = 0; + + rt = rt_get_inet6_parent(dd->fibnum, addr6, plen); /* plen = 0 means default route and it's out of scope */ if (rt != NULL) { - uint32_t scopeid; + uint32_t nhop_idx, scopeid; struct in6_addr new_addr6; rt_get_inet6_prefix_plen(rt, &new_addr6, &plen, &scopeid); if (plen > 0) { - uint32_t nhidx = fib_get_nhop_idx(dd->fd, rnd.rnd_nhop); - if (nhidx == 0) { - /* - * shouldn't happen as we already have parent route. - * It will trigger rebuild automatically. - */ - return (NULL); - } - lsp_rule = fill_rule6(buffer, (uint8_t *)&new_addr6, plen, nhidx); + nhop_idx = fib_get_nhop_idx(dd->fd, rt_get_raw_nhop(rt)); + lsp_rule = fill_rule6(buffer, (uint8_t *)&new_addr6, plen, nhop_idx); + *pnhop_idx = nhop_idx; + *pplen = plen; } } @@ -217,20 +213,26 @@ ret = rte_lpm6_add(dd->lpm6, (const uint8_t *)addr6, plen, nhidx, (rc->rc_cmd == RTM_ADD) ? 1 : 0); - FIB_PRINTF(LOG_DEBUG, dd->fd, "DPDK GU: %s %s/%d nhop %u = %d", + FIB_PRINTF(LOG_DEBUG, dd->fd, "DPDK GU: %s %s/%d nhop %u -> %u ret: %d", (rc->rc_cmd == RTM_ADD) ? "ADD" : "UPDATE", - abuf, plen, nhidx, ret); + abuf, plen, + rc->rc_nh_old != NULL ? fib_get_nhop_idx(dd->fd, rc->rc_nh_old) : 0, + nhidx, ret); } else { /* * Need to lookup parent. Assume deletion happened already */ char buffer[RTE_LPM6_RULE_SIZE]; struct rte_lpm6_rule *lsp_rule = NULL; - lsp_rule = pack_parent_rule(dd, addr6, buffer); + int parent_plen; + uint32_t parent_nhop_idx; + lsp_rule = pack_parent_rule(dd, addr6, plen, &parent_plen, + &parent_nhop_idx, buffer); ret = rte_lpm6_delete(dd->lpm6, (const uint8_t *)addr6, plen, lsp_rule); - FIB_PRINTF(LOG_DEBUG, dd->fd, "DPDK GU: %s %s/%d nhop ? = %d", - "DEL", abuf, plen, ret); + FIB_PRINTF(LOG_DEBUG, dd->fd, "DPDK GU: %s %s/%d -> /%d nhop %u -> %u ret: %d", + "DEL", abuf, plen, parent_plen, fib_get_nhop_idx(dd->fd, rc->rc_nh_old), + parent_nhop_idx, ret); } if (ret != 0) { diff --git a/sys/net/radix.h b/sys/net/radix.h --- a/sys/net/radix.h +++ b/sys/net/radix.h @@ -119,6 +119,7 @@ typedef int rn_walktree_from_t(struct radix_head *head, void *a, void *m, walktree_f_t *f, void *w); typedef void rn_close_t(struct radix_node *rn, struct radix_head *head); +struct radix_node *rn_nextprefix(struct radix_node *rn); struct radix_mask_head; diff --git a/sys/net/radix.c b/sys/net/radix.c --- a/sys/net/radix.c +++ b/sys/net/radix.c @@ -371,6 +371,20 @@ return (0); } +/* + * Returns the next (wider) prefix for the key defined by @rn + * if exists. + */ +struct radix_node * +rn_nextprefix(struct radix_node *rn) +{ + for (rn = rn->rn_dupedkey; rn != NULL; rn = rn->rn_dupedkey) { + if (!(rn->rn_flags & RNF_ROOT)) + return (rn); + } + return (NULL); +} + #ifdef RN_DEBUG int rn_nodenum; struct radix_node *rn_clist; diff --git a/sys/net/route/route_ctl.h b/sys/net/route/route_ctl.h --- a/sys/net/route/route_ctl.h +++ b/sys/net/route/route_ctl.h @@ -117,6 +117,7 @@ int *plen, uint32_t *pscopeid); void rt_get_inet_prefix_pmask(const struct rtentry *rt, struct in_addr *paddr, struct in_addr *pmask, uint32_t *pscopeid); +struct rtentry *rt_get_inet_parent(uint32_t fibnum, struct in_addr addr, int plen); #endif #ifdef INET6 struct in6_addr; @@ -124,6 +125,8 @@ int *plen, uint32_t *pscopeid); void rt_get_inet6_prefix_pmask(const struct rtentry *rt, struct in6_addr *paddr, struct in6_addr *pmask, uint32_t *pscopeid); +struct rtentry *rt_get_inet6_parent(uint32_t fibnum, const struct in6_addr *paddr, + int plen); #endif /* Nexthops */ diff --git a/sys/net/route/route_helpers.c b/sys/net/route/route_helpers.c --- a/sys/net/route/route_helpers.c +++ b/sys/net/route/route_helpers.c @@ -60,6 +60,7 @@ #endif #ifdef INET6 #include +#include #endif #include @@ -415,3 +416,152 @@ } } #endif + +#ifdef INET +/* + * Checks if the found key in the trie contains (<=) a prefix covering + * @paddr/@plen. + * Returns the most specific rtentry matching the condition or NULL. + */ +static struct rtentry * +get_inet_parent_prefix(uint32_t fibnum, struct in_addr addr, int plen) +{ + struct route_nhop_data rnd; + struct rtentry *rt; + struct in_addr addr4; + uint32_t scopeid; + int parent_plen; + struct radix_node *rn; + + rt = fib4_lookup_rt(fibnum, addr, 0, NHR_UNLOCKED, &rnd); + rt_get_inet_prefix_plen(rt, &addr4, &parent_plen, &scopeid); + if (parent_plen <= plen) + return (rt); + + /* + * There can be multiple prefixes associated with the found key: + * 10.0.0.0 -> 10.0.0.0/24, 10.0.0.0/23, 10.0.0.0/22, etc. + * All such prefixes are linked via rn_dupedkey, from most specific + * to least specific. Iterate over them to check if any of these + * prefixes are wider than desired plen. + */ + rn = (struct radix_node *)rt; + while ((rn = rn_nextprefix(rn)) != NULL) { + rt = RNTORT(rn); + rt_get_inet_prefix_plen(rt, &addr4, &parent_plen, &scopeid); + if (parent_plen <= plen) + return (rt); + } + + return (NULL); +} + +/* + * Returns the most specific prefix containing (>) @paddr/plen. + */ +struct rtentry * +rt_get_inet_parent(uint32_t fibnum, struct in_addr addr, int plen) +{ + struct in_addr lookup_addr = { .s_addr = INADDR_BROADCAST }; + struct in_addr addr4 = addr; + struct in_addr mask4; + struct rtentry *rt; + + while (plen-- > 0) { + /* Calculate wider mask & new key to lookup */ + mask4.s_addr = htonl(plen ? ~((1 << (32 - plen)) - 1) : 0); + addr4.s_addr = htonl(ntohl(addr4.s_addr) & ntohl(mask4.s_addr)); + if (addr4.s_addr == lookup_addr.s_addr) { + /* Skip lookup if the key is the same */ + continue; + } + lookup_addr = addr4; + + rt = get_inet_parent_prefix(fibnum, lookup_addr, plen); + if (rt != NULL) + return (rt); + } + + return (NULL); +} +#endif + +#ifdef INET6 +/* + * Checks if the found key in the trie contains (<=) a prefix covering + * @paddr/@plen. + * Returns the most specific rtentry matching the condition or NULL. + */ +static struct rtentry * +get_inet6_parent_prefix(uint32_t fibnum, const struct in6_addr *paddr, int plen) +{ + struct route_nhop_data rnd; + struct rtentry *rt; + struct in6_addr addr6; + uint32_t scopeid; + int parent_plen; + struct radix_node *rn; + + rt = fib6_lookup_rt(fibnum, paddr, 0, NHR_UNLOCKED, &rnd); + rt_get_inet6_prefix_plen(rt, &addr6, &parent_plen, &scopeid); + if (parent_plen <= plen) + return (rt); + + /* + * There can be multiple prefixes associated with the found key: + * 2001:db8:1::/64 -> 2001:db8:1::/56, 2001:db8:1::/48, etc. + * All such prefixes are linked via rn_dupedkey, from most specific + * to least specific. Iterate over them to check if any of these + * prefixes are wider than desired plen. + */ + rn = (struct radix_node *)rt; + while ((rn = rn_nextprefix(rn)) != NULL) { + rt = RNTORT(rn); + rt_get_inet6_prefix_plen(rt, &addr6, &parent_plen, &scopeid); + if (parent_plen <= plen) + return (rt); + } + + return (NULL); +} + +static void +ipv6_writemask(struct in6_addr *addr6, uint8_t mask) +{ + uint32_t *cp; + + for (cp = (uint32_t *)addr6; mask >= 32; mask -= 32) + *cp++ = 0xFFFFFFFF; + if (mask > 0) + *cp = htonl(mask ? ~((1 << (32 - mask)) - 1) : 0); +} + +/* + * Returns the most specific prefix containing (>) @paddr/plen. + */ +struct rtentry * +rt_get_inet6_parent(uint32_t fibnum, const struct in6_addr *paddr, int plen) +{ + struct in6_addr lookup_addr = in6mask128; + struct in6_addr addr6 = *paddr; + struct in6_addr mask6; + struct rtentry *rt; + + while (plen-- > 0) { + /* Calculate wider mask & new key to lookup */ + ipv6_writemask(&mask6, plen); + IN6_MASK_ADDR(&addr6, &mask6); + if (IN6_ARE_ADDR_EQUAL(&addr6, &lookup_addr)) { + /* Skip lookup if the key is the same */ + continue; + } + lookup_addr = addr6; + + rt = get_inet6_parent_prefix(fibnum, &lookup_addr, plen); + if (rt != NULL) + return (rt); + } + + return (NULL); +} +#endif