Changeset View
Changeset View
Standalone View
Standalone View
sys/net/route/nhop.c
- This file was added.
/*- | |||||
* SPDX-License-Identifier: BSD-2-Clause-FreeBSD | |||||
* | |||||
* Copyright (c) 2020 Alexander V. Chernikov | |||||
* | |||||
* Redistribution and use in source and binary forms, with or without | |||||
* modification, are permitted provided that the following conditions | |||||
* are met: | |||||
* 1. Redistributions of source code must retain the above copyright | |||||
* notice, this list of conditions and the following disclaimer. | |||||
* 2. Redistributions in binary form must reproduce the above copyright | |||||
* notice, this list of conditions and the following disclaimer in the | |||||
* documentation and/or other materials provided with the distribution. | |||||
* | |||||
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND | |||||
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE | |||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE | |||||
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE | |||||
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL | |||||
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS | |||||
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) | |||||
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT | |||||
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY | |||||
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF | |||||
* SUCH DAMAGE. | |||||
* | |||||
* $FreeBSD$ | |||||
*/ | |||||
ae: I think usually we use __FBSDID() for *.c files. | |||||
#include <sys/cdefs.h> | |||||
#include "opt_inet.h" | |||||
#include "opt_route.h" | |||||
#include <sys/param.h> | |||||
#include <sys/systm.h> | |||||
#include <sys/lock.h> | |||||
#include <sys/rwlock.h> | |||||
#include <sys/malloc.h> | |||||
#include <sys/mbuf.h> | |||||
#include <sys/socket.h> | |||||
#include <sys/kernel.h> | |||||
#include <net/if.h> | |||||
#include <net/if_var.h> | |||||
#include <net/route.h> | |||||
#include <net/route_var.h> | |||||
#include <net/route/nhop_utils.h> | |||||
#include <net/route/nhop.h> | |||||
#include <net/route/nhop_var.h> | |||||
#include <net/route/shared.h> | |||||
#include <net/vnet.h> | |||||
/* | |||||
* This file contains data structures management logic for the nexthop ("nhop") | |||||
* route subsystem. | |||||
* | |||||
* Nexthops in the original sense are the objects containing all the necessary | |||||
* information to forward the packet to the selected destination. | |||||
* In particular, nexthop is defined by a combination of | |||||
* ifp, ifa, aifp, mtu, gw addr(if set), nh_type, nh_family, mask of rt_flags and | |||||
* NHF_DEFAULT | |||||
* | |||||
* All nexthops are stored in the resizable hash table. | |||||
* Additionally, each nexthop gets assigned its unique index (nexthop index) | |||||
* so userland programs can interact with the nexthops easier. Index allocation | |||||
* is backed by the bitmask array. | |||||
*/ | |||||
static MALLOC_DEFINE(M_NHOP, "nhops", "nexthops data"); | |||||
/* Hash management functions */ | |||||
int | |||||
nhops_init_rib(struct rib_head *rh) | |||||
{ | |||||
struct nh_control *ctl; | |||||
size_t alloc_size; | |||||
uint32_t num_buckets, num_items; | |||||
void *ptr; | |||||
ctl = malloc(sizeof(struct nh_control), M_NHOP, M_WAITOK | M_ZERO); | |||||
/* | |||||
* Allocate nexthop hash. Start with 16 items by default (128 bytes). | |||||
* This will be enough for most of the cases. | |||||
*/ | |||||
num_buckets = 16; | |||||
alloc_size = CHT_SLIST_GET_RESIZE_SIZE(num_buckets); | |||||
ptr = malloc(alloc_size, M_NHOP, M_WAITOK | M_ZERO); | |||||
CHT_SLIST_INIT(&ctl->nh_head, ptr, num_buckets); | |||||
/* | |||||
* Allocate nexthop index bitmask. | |||||
*/ | |||||
num_items = 128 * 8; /* 128 bytes */ | |||||
ptr = malloc(bitmask_get_size(num_items), M_NHOP, M_WAITOK | M_ZERO); | |||||
bitmask_init(&ctl->nh_idx_head, ptr, num_items); | |||||
NHOPS_LOCK_INIT(ctl); | |||||
rh->nh_control = ctl; | |||||
ctl->rh = rh; | |||||
DPRINTF("NHOPS init for fib %u af %u: ctl %p rh %p", rh->rib_fibnum, | |||||
rh->rib_family, ctl, rh); | |||||
return (0); | |||||
} | |||||
void | |||||
nhops_destroy_rib(struct rib_head *rh) | |||||
{ | |||||
struct nh_control *ctl; | |||||
struct nhop_priv *nh_priv; | |||||
ctl = rh->nh_control; | |||||
/* | |||||
* All routes should have been deleted in rt_table_destroy(). | |||||
* However, TCP stack or other consumers may store referenced | |||||
* nexthop pointers. When these references go to zero, | |||||
* nhop_free_object() will try to unlink these records from the | |||||
* datastructures, most likely leading to panic. | |||||
* | |||||
* Avoid that by explicitly marking all of the remaining | |||||
* nexthops as unlinked. | |||||
*/ | |||||
NHOPS_RLOCK(ctl); | |||||
CHT_SLIST_FOREACH(&ctl->nh_head, nhops, nh_priv) { | |||||
DPRINTF("Unlinking referenced nhop %u", nh_priv->nh_idx); | |||||
NH_PRIV_LOCK(nh_priv); | |||||
nh_priv->nh_control = NULL; | |||||
nh_priv->nh_idx = 0; | |||||
NH_PRIV_UNLOCK(nh_priv); | |||||
} CHT_SLIST_FOREACH_END; | |||||
NHOPS_RUNLOCK(ctl); | |||||
free(ctl->nh_head.ptr, M_NHOP); | |||||
free(ctl->nh_idx_head.idx, M_NHOP); | |||||
free(ctl, M_NHOP); | |||||
} | |||||
/* | |||||
* Nexhop hash calculation: | |||||
* | |||||
* Nexthops distribution: | |||||
* 2 "mandatory" nexthops per interface ("interface route", "loopback"). | |||||
* For direct peering: 1 nexthop for the peering router per ifp/af. | |||||
* For Ix-like peering: tens to hundreds nexthops of neghbors per ifp/af. | |||||
* IGP control plane & broadcast segment: tens of nexthops per ifp/af. | |||||
* | |||||
* With that in mind, hash nexthops by the combination of the interface | |||||
* and GW IP address. | |||||
* | |||||
* To optimize hash calculation, ignore higher bytes of ifindex, as they | |||||
* give very little entropy. | |||||
* Similarly, use lower 4 bytes of IPv6 address to distinguish between the | |||||
* neighbors. | |||||
*/ | |||||
struct _hash_data { | |||||
uint16_t ifindex; | |||||
uint8_t family; | |||||
uint8_t nh_type; | |||||
uint32_t gw_addr; | |||||
}; | |||||
static unsigned | |||||
djb_hash(const unsigned char *h, const int len) | |||||
{ | |||||
unsigned int result = 0; | |||||
int i; | |||||
for (i = 0; i < len; i++) | |||||
result = 33 * result ^ h[i]; | |||||
return (result); | |||||
} | |||||
static uint32_t | |||||
hash_priv(const struct nhop_priv *priv) | |||||
{ | |||||
struct nhop_object *nh; | |||||
uint16_t ifindex; | |||||
struct _hash_data key; | |||||
nh = priv->nh; | |||||
ifindex = nh->nh_ifp->if_index & 0xFFFF; | |||||
memset(&key, 0, sizeof(key)); | |||||
key.ifindex = ifindex; | |||||
key.family = nh->gw_sa.sa_family; | |||||
key.nh_type = priv->nh_type & 0xFF; | |||||
if (nh->gw_sa.sa_family == AF_INET6) | |||||
memcpy(&key.gw_addr, &nh->gw6_sa.sin6_addr.s6_addr32[3], 4); | |||||
else if (nh->gw_sa.sa_family == AF_INET) | |||||
memcpy(&key.gw_addr, &nh->gw4_sa.sin_addr, 4); | |||||
return (uint32_t)(djb_hash((const unsigned char *)&key, sizeof(key))); | |||||
} | |||||
/* | |||||
* Checks if hash needs resizing and performs this resize if necessary | |||||
* | |||||
*/ | |||||
static void | |||||
consider_resize(struct nh_control *ctl, uint32_t new_nh_buckets, uint32_t new_idx_items) | |||||
{ | |||||
void *nh_ptr, *nh_idx_ptr; | |||||
void *old_idx_ptr; | |||||
size_t alloc_size; | |||||
nh_ptr = NULL; | |||||
if (new_nh_buckets != 0) { | |||||
alloc_size = CHT_SLIST_GET_RESIZE_SIZE(new_nh_buckets); | |||||
nh_ptr = malloc(alloc_size, M_NHOP, M_NOWAIT | M_ZERO); | |||||
} | |||||
nh_idx_ptr = NULL; | |||||
if (new_idx_items != 0) { | |||||
alloc_size = bitmask_get_size(new_idx_items); | |||||
nh_idx_ptr = malloc(alloc_size, M_NHOP, M_NOWAIT | M_ZERO); | |||||
} | |||||
if (nh_ptr == NULL && nh_idx_ptr == NULL) { | |||||
/* Either resize is not required or allocations have failed. */ | |||||
return; | |||||
} | |||||
DPRINTF("going to resize: nh:[ptr:%p sz:%u] idx:[ptr:%p sz:%u]", nh_ptr, | |||||
new_nh_buckets, nh_idx_ptr, new_idx_items); | |||||
old_idx_ptr = NULL; | |||||
NHOPS_WLOCK(ctl); | |||||
if (nh_ptr != NULL) { | |||||
CHT_SLIST_RESIZE(&ctl->nh_head, nhops, nh_ptr, new_nh_buckets); | |||||
} | |||||
if (nh_idx_ptr != NULL) { | |||||
if (bitmask_copy(&ctl->nh_idx_head, nh_idx_ptr, new_idx_items)) | |||||
bitmask_swap(&ctl->nh_idx_head, nh_idx_ptr, new_idx_items, &old_idx_ptr); | |||||
} | |||||
NHOPS_WUNLOCK(ctl); | |||||
if (nh_ptr != NULL) | |||||
free(nh_ptr, M_NHOP); | |||||
if (old_idx_ptr != NULL) | |||||
free(old_idx_ptr, M_NHOP); | |||||
} | |||||
/* | |||||
* Links nextop @nh_priv to the nexhop hash table and allocates | |||||
* nexhop index. | |||||
* Returns allocated index or 0 on failure. | |||||
*/ | |||||
int | |||||
link_nhop(struct nh_control *ctl, struct nhop_priv *nh_priv) | |||||
{ | |||||
uint16_t idx; | |||||
uint32_t num_buckets_new, num_items_new; | |||||
KASSERT((nh_priv->nh_idx == 0), ("nhop index is already allocated")); | |||||
NHOPS_WLOCK(ctl); | |||||
/* | |||||
* Check if we need to resize hash and index. | |||||
* The following 2 functions returns either new size or 0 | |||||
* if resize is not required. | |||||
*/ | |||||
num_buckets_new = CHT_SLIST_GET_RESIZE_BUCKETS(&ctl->nh_head); | |||||
num_items_new = bitmask_get_resize_items(&ctl->nh_idx_head); | |||||
if (bitmask_alloc_idx(&ctl->nh_idx_head, &idx) != 0) { | |||||
NHOPS_WUNLOCK(ctl); | |||||
DPRINTF("Unable to allocate nhop index"); | |||||
RTSTAT_INC(rts_nh_idx_alloc_failure); | |||||
consider_resize(ctl, num_buckets_new, num_items_new); | |||||
return (0); | |||||
} | |||||
NH_PRIV_LOCK(nh_priv); | |||||
aeUnsubmitted Done Inline ActionsThis locking looks redundant. ae: This locking looks redundant. | |||||
nh_priv->nh_idx = idx; | |||||
nh_priv->nh_control = ctl; | |||||
NH_PRIV_UNLOCK(nh_priv); | |||||
CHT_SLIST_INSERT_HEAD(&ctl->nh_head, nhops, nh_priv); | |||||
NHOPS_WUNLOCK(ctl); | |||||
DPRINTF("Linked nhop priv %p to %d, hash %u, ctl %p", nh_priv, idx, | |||||
hash_priv(nh_priv), ctl); | |||||
consider_resize(ctl, num_buckets_new, num_items_new); | |||||
return (idx); | |||||
} | |||||
/* | |||||
* Unlinks nexthop specified by @nh_priv data from the hash. | |||||
* | |||||
* Returns found nexthop or NULL. | |||||
*/ | |||||
struct nhop_priv * | |||||
unlink_nhop(struct nh_control *ctl, struct nhop_priv *nh_priv_del) | |||||
{ | |||||
struct nhop_priv *priv_ret; | |||||
int idx; | |||||
uint32_t num_buckets_new, num_items_new; | |||||
idx = 0; | |||||
NHOPS_WLOCK(ctl); | |||||
CHT_SLIST_REMOVE_BYOBJ(&ctl->nh_head, nhops, nh_priv_del, priv_ret); | |||||
if (priv_ret != NULL) { | |||||
NH_PRIV_LOCK(priv_ret); | |||||
idx = priv_ret->nh_idx; | |||||
priv_ret->nh_idx = 0; | |||||
priv_ret->nh_control = NULL; | |||||
NH_PRIV_UNLOCK(priv_ret); | |||||
KASSERT((idx != 0), ("bogus nhop index 0")); | |||||
if ((bitmask_free_idx(&ctl->nh_idx_head, idx)) != 0) { | |||||
DPRINTF("Unable to remove index %d from fib %u af %d", | |||||
idx, ctl->rh->rib_fibnum, ctl->rh->rib_family); | |||||
} | |||||
} | |||||
/* Check if hash or index needs to be resized */ | |||||
num_buckets_new = CHT_SLIST_GET_RESIZE_BUCKETS(&ctl->nh_head); | |||||
num_items_new = bitmask_get_resize_items(&ctl->nh_idx_head); | |||||
NHOPS_WUNLOCK(ctl); | |||||
if (priv_ret == NULL) | |||||
DPRINTF("Unable to unlink nhop priv %p from hash, hash %u ctl %p", | |||||
nh_priv_del, hash_priv(nh_priv_del), ctl); | |||||
else | |||||
DPRINTF("Unlinked nhop %p priv idx %d", priv_ret, idx); | |||||
consider_resize(ctl, num_buckets_new, num_items_new); | |||||
return (priv_ret); | |||||
} | |||||
/* | |||||
* Searches for the nexthop by data specifcied in @nh_priv. | |||||
* Returns referenced nexthop or NULL. | |||||
*/ | |||||
struct nhop_priv * | |||||
find_nhop(struct nh_control *ctl, const struct nhop_priv *nh_priv) | |||||
{ | |||||
struct nhop_priv *nh_priv_ret; | |||||
NHOPS_RLOCK(ctl); | |||||
CHT_SLIST_FIND_BYOBJ(&ctl->nh_head, nhops, nh_priv, nh_priv_ret); | |||||
if (nh_priv_ret != NULL) { | |||||
if (refcount_acquire_if_not_zero(&nh_priv_ret->nh_refcnt) == 0){ | |||||
/* refcount was 0 -> nhop is being deleted */ | |||||
nh_priv_ret = NULL; | |||||
} | |||||
} | |||||
NHOPS_RUNLOCK(ctl); | |||||
return (nh_priv_ret); | |||||
} | |||||
I think usually we use __FBSDID() for *.c files.