Page Menu
Home
FreeBSD
Search
Configure Global Search
Log In
Files
F131486312
D24232.id70048.diff
No One
Temporary
Actions
View File
Edit File
Delete File
View Transforms
Subscribe
Mute Notifications
Flag For Later
Award Token
Size
132 KB
Referenced Files
None
Subscribers
None
D24232.id70048.diff
View Options
Index: include/Makefile
===================================================================
--- include/Makefile
+++ include/Makefile
@@ -1,5 +1,5 @@
# @(#)Makefile 8.2 (Berkeley) 1/4/94
-# $FreeBSD$
+# $FreeBSD: head/include/Makefile 358500 2020-03-01 20:37:42Z imp $
#
# Doing a "make install" builds /usr/include.
@@ -54,6 +54,7 @@
geom/mirror geom/mountver geom/multipath geom/nop \
geom/raid geom/raid3 geom/shsec geom/stripe geom/virstor \
net/altq \
+ net/route \
netgraph/atm netgraph/netflow \
netinet/cc \
netinet/netdump \
Index: lib/libc/gen/sysctl.3
===================================================================
--- lib/libc/gen/sysctl.3
+++ lib/libc/gen/sysctl.3
@@ -563,6 +563,7 @@
.It Dv NET_RT_IFLIST Ta 0 or if_index Ta None
.It Dv NET_RT_IFMALIST Ta 0 or if_index Ta None
.It Dv NET_RT_IFLISTL Ta 0 or if_index Ta None
+.It Dv NET_RT_NHOPS Ta None Ta fib number
.El
.Pp
The
@@ -583,6 +584,9 @@
.Va struct if_msghdrl
and
.Va struct ifa_msghdrl .
+.Pp
+.Dv NET_RT_NHOPS
+returns all nexthops for specified address family in given fib.
.It Li PF_INET
Get or set various global information about the IPv4
(Internet Protocol version 4).
Index: sys/conf/files
===================================================================
--- sys/conf/files
+++ sys/conf/files
@@ -4094,6 +4094,11 @@
net/raw_usrreq.c standard
net/route.c standard
net/route_temporal.c standard
+net/route/nhop.c standard
+net/route/nhop_ctl.c standard
+net/route/nhop_utils.c standard
+net/route/route_ctl.c standard
+net/route/route_helpers.c standard
net/rss_config.c optional inet rss | inet6 rss
net/rtsock.c standard
net/slcompress.c optional netgraph_vjc | sppp | \
Index: sys/net/route.h
===================================================================
--- sys/net/route.h
+++ sys/net/route.h
@@ -90,7 +90,8 @@
u_long rmx_rttvar; /* estimated rtt variance */
u_long rmx_pksent; /* packets sent using this route */
u_long rmx_weight; /* route weight */
- u_long rmx_filler[3]; /* will be used for T/TCP later */
+ u_long rmx_nhidx; /* route nexhop index */
+ u_long rmx_filler[2]; /* will be used for T/TCP later */
};
/*
@@ -150,6 +151,7 @@
struct sockaddr *rt_gateway; /* value */
struct ifnet *rt_ifp; /* the answer: interface to use */
struct ifaddr *rt_ifa; /* the answer: interface address to use */
+ struct nhop_object *rt_nhop; /* nexthop data */
int rt_flags; /* up/down?, host/net */
int rt_refcnt; /* # held references */
u_int rt_fibnum; /* which FIB */
@@ -215,9 +217,13 @@
#define NHF_HOST 0x0400 /* RTF_HOST */
/* Nexthop request flags */
+#define NHR_NONE 0x00 /* empty flags field */
#define NHR_IFAIF 0x01 /* Return ifa_ifp interface */
#define NHR_REF 0x02 /* For future use */
+/* uRPF */
+#define NHR_NODEFAULT 0x04 /* do not consider default route */
+
/* Control plane route request flags */
#define NHR_COPY 0x100 /* Copy rte data */
@@ -245,6 +251,8 @@
uint64_t rts_newgateway; /* routes modified by redirects */
uint64_t rts_unreach; /* lookups which failed */
uint64_t rts_wildcard; /* lookups satisfied by a wildcard */
+ uint64_t rts_nh_idx_alloc_failure; /* nexthop index alloc failure*/
+ uint64_t rts_nh_alloc_failure; /* nexthop allocation failure*/
};
/*
@@ -507,6 +515,8 @@
struct sockaddr *gateway, struct sockaddr *author, struct ifnet *ifp,
int flags, int expire_sec);
+/* New API */
+void rib_walk(int af, u_int fibnum, rt_walktree_f_t *wa_f, void *arg);
#endif
#endif
Index: sys/net/route.c
===================================================================
--- sys/net/route.c
+++ sys/net/route.c
@@ -62,6 +62,8 @@
#include <net/if_dl.h>
#include <net/route.h>
#include <net/route_var.h>
+#include <net/route/nhop.h>
+#include <net/route/shared.h>
#include <net/vnet.h>
#ifdef RADIX_MPATH
@@ -108,10 +110,7 @@
SYSCTL_UINT(_net, OID_AUTO, add_addr_allfibs, CTLFLAG_RWTUN | CTLFLAG_VNET,
&VNET_NAME(rt_add_addr_allfibs), 0, "");
-VNET_PCPUSTAT_DEFINE_STATIC(struct rtstat, rtstat);
-#define RTSTAT_ADD(name, val) \
- VNET_PCPUSTAT_ADD(struct rtstat, rtstat, name, (val))
-#define RTSTAT_INC(name) RTSTAT_ADD(name, 1)
+VNET_PCPUSTAT_DEFINE(struct rtstat, rtstat);
VNET_PCPUSTAT_SYSINIT(rtstat);
#ifdef VIMAGE
@@ -142,7 +141,6 @@
EVENTHANDLER_LIST_DEFINE(rt_addrmsg);
-static int rt_getifa_fib(struct rt_addrinfo *, u_int);
static int rtrequest1_fib_change(struct rib_head *, struct rt_addrinfo *,
struct rtentry **, u_int);
static void rt_setmetrics(const struct rt_addrinfo *, struct rtentry *);
@@ -235,6 +233,7 @@
rt_numfibs = RT_MAXFIBS;
if (rt_numfibs == 0)
rt_numfibs = 1;
+ nhops_init();
}
SYSINIT(route_init, SI_SUB_PROTO_DOMAIN, SI_ORDER_THIRD, route_init, NULL);
@@ -372,6 +371,8 @@
/* Init locks */
RIB_LOCK_INIT(rh);
+ nhops_init_rib(rh);
+
/* Finally, set base callbacks */
rh->rnh_addaddr = rn_addroute;
rh->rnh_deladdr = rn_delete;
@@ -403,6 +404,8 @@
rn_walktree(&rh->rmhead.head, rt_freeentry, &rh->rmhead.head);
+ nhops_destroy_rib(rh);
+
/* Assume table is already empty */
RIB_LOCK_DESTROY(rh);
free(rh, M_RTABLE);
@@ -581,6 +584,9 @@
*/
R_Free(rt_key(rt));
+ /* Unreference nexthop */
+ nhop_free_object(rt->rt_nhop);
+
/*
* and the rtentry itself of course
*/
@@ -1395,6 +1401,7 @@
RIB_WLOCK(rnh);
rnh->rnh_walktree(&rnh->head, if_updatemtu_cb, &ifmtu);
RIB_WUNLOCK(rnh);
+ nhops_update_ifmtu(rnh, ifp, ifmtu.mtu);
}
}
}
@@ -1538,7 +1545,9 @@
struct rib_head *rnh;
struct ifaddr *ifa;
struct sockaddr *ndst;
+ struct nhop_object *nh;
struct sockaddr_storage mdst;
+ struct epoch_tracker et;
KASSERT((fibnum < rt_numfibs), ("rtrequest1_fib: bad fibnum"));
KASSERT((flags & RTF_RNH_LOCKED) == 0, ("rtrequest1_fib: locked"));
@@ -1614,11 +1623,22 @@
} else {
ifa_ref(info->rti_ifa);
}
+
+ NET_EPOCH_ENTER(et);
+ nh = nhop_create_from_info_wrapper(rnh, info);
+ NET_EPOCH_EXIT(et);
+ if (nh == NULL) {
+ ifa_free(info->rti_ifa);
+ return (ENOBUFS);
+ }
+
rt = uma_zalloc(V_rtzone, M_NOWAIT);
if (rt == NULL) {
ifa_free(info->rti_ifa);
+ nhop_free_object(nh);
return (ENOBUFS);
}
+ rt->rt_nhop = nh;
rt->rt_flags = RTF_UP | flags;
rt->rt_fibnum = fibnum;
/*
@@ -1626,6 +1646,7 @@
*/
if ((error = rt_setgate(rt, dst, gateway)) != 0) {
ifa_free(info->rti_ifa);
+ nhop_free_object(nh);
uma_zfree(V_rtzone, rt);
return (error);
}
@@ -1664,6 +1685,7 @@
RIB_WUNLOCK(rnh);
ifa_free(rt->rt_ifa);
+ nhop_free_object(nh);
R_Free(rt_key(rt));
uma_zfree(V_rtzone, rt);
return (EEXIST);
@@ -1705,6 +1727,7 @@
*/
if (rn == NULL) {
ifa_free(rt->rt_ifa);
+ nhop_free_object(nh);
R_Free(rt_key(rt));
uma_zfree(V_rtzone, rt);
return (EEXIST);
@@ -1734,9 +1757,11 @@
RT_UNLOCK(rt);
break;
case RTM_CHANGE:
+ NET_EPOCH_ENTER(et);
RIB_WLOCK(rnh);
error = rtrequest1_fib_change(rnh, info, ret_nrt, fibnum);
RIB_WUNLOCK(rnh);
+ NET_EPOCH_EXIT(et);
break;
default:
error = EOPNOTSUPP;
@@ -1760,6 +1785,7 @@
int error = 0;
int free_ifa = 0;
int family, mtu;
+ struct nhop_object *nh;
struct if_mtuinfo ifmtu;
RIB_WLOCK_ASSERT(rnh);
@@ -1784,6 +1810,12 @@
RT_LOCK(rt);
+ nh = nhop_create_from_nhop_wrapper(rnh, rt->rt_nhop, info);
+ if (nh == NULL) {
+ RT_UNLOCK(rt);
+ return (ENOBUFS);
+ }
+
rt_setmetrics(info, rt);
/*
@@ -1855,6 +1887,10 @@
}
}
+ /* Update nexthop */
+ nhop_free_object(rt->rt_nhop);
+ rt->rt_nhop = nh;
+
/*
* This route change may have modified the route's gateway. In that
* case, any inpcbs that have cached this route need to invalidate their
@@ -1868,6 +1904,7 @@
}
bad:
RT_UNLOCK(rt);
+ nhop_free_object(nh);
if (free_ifa != 0) {
ifa_free(info->rti_ifa);
info->rti_ifa = NULL;
Index: sys/net/route/nhop.h
===================================================================
--- /dev/null
+++ sys/net/route/nhop.h
@@ -0,0 +1,236 @@
+/*-
+ * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
+ *
+ * Copyright (c) 2020 Alexander V. Chernikov
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * $FreeBSD$
+ */
+
+/*
+ * This header file contains public definitions for the nexthop routing subsystem.
+ */
+
+#ifndef _NET_ROUTE_NHOP_H_
+#define _NET_ROUTE_NHOP_H_
+
+#include <netinet/in.h> /* sockaddr_in && sockaddr_in6 */
+
+#include <sys/counter.h>
+
+enum nhop_type {
+ NH_TYPE_IPV4_ETHER_RSLV = 1, /* IPv4 ethernet without GW */
+ NH_TYPE_IPV4_ETHER_NHOP = 2, /* IPv4 with pre-calculated ethernet encap */
+ NH_TYPE_IPV6_ETHER_RSLV = 3, /* IPv6 ethernet, without GW */
+ NH_TYPE_IPV6_ETHER_NHOP = 4 /* IPv6 with pre-calculated ethernet encap*/
+};
+
+#ifdef _KERNEL
+
+/*
+ * Currently the only use case of AF_LINK gateway is storing
+ * interface index of the interface of the source IPv6 address.
+ * This is used by the IPv6 code for the connections over loopback
+ * interface.
+ *
+ * The structure below copies 'struct sockaddr_dl', reducing the
+ * size of sdl_data buffer, as it is not used. This change
+ * allows to store the AF_LINK gateways in the nhop gateway itself,
+ * simplifying control plane handling.
+ */
+struct sockaddr_dl_short {
+ u_char sdl_len; /* Total length of sockaddr */
+ u_char sdl_family; /* AF_LINK */
+ u_short sdl_index; /* if != 0, system given index for interface */
+ u_char sdl_type; /* interface type */
+ u_char sdl_nlen; /* interface name length, no trailing 0 reqd. */
+ u_char sdl_alen; /* link level address length */
+ u_char sdl_slen; /* link layer selector length */
+ char sdl_data[8]; /* unused */
+};
+
+#define NHOP_RELATED_FLAGS \
+ (RTF_GATEWAY | RTF_HOST | RTF_REJECT | RTF_BLACKHOLE | \
+ RTF_FIXEDMTU | RTF_LOCAL | RTF_BROADCAST | RTF_MULTICAST)
+
+struct nhop_request {
+ struct ifnet *ifp;
+ struct ifaddr *ifa;
+ struct sockaddr *gw;
+ int family;
+ int mtu;
+ int rt_flags; /* gets converted to nh_flags later */
+ uint16_t nh_type;
+ uint16_t nh_flags_additional; /* Additional flags to set to the nh_flags */
+};
+
+struct nh_control;
+struct nhop_priv;
+
+/*
+ * Struct 'nhop_object' field description:
+ *
+ * nh_flags: NHF_ flags used in the dataplane code. NHF_GATEWAY or NHF_BLACKHOLE
+ * can be examples of such flags.
+ * nh_mtu: ready-to-use nexthop mtu. Already accounts for the link-level header,
+ * interface MTU and protocol-specific limitations.
+ * nh_prepend_len: link-level prepend length. Currently unused.
+ * nh_ifp: logical transmit interface. The one from which if_transmit() will be
+ * called. Guaranteed to be non-NULL.
+ * nh_aifp: ifnet of the source address. Same as nh_ifp except IPv6 loopback
+ * routes. See the example below.
+ * nh_ifa: interface address to use. Guaranteed to be non-NULL.
+ * nh_pksent: counter(9) reflecting the number of packets transmitted.
+ *
+ * gw_: storage suitable to hold AF_INET, AF_INET6 or AF_LINK gateway. More
+ * details ara available in the examples below.
+ *
+ *
+ * Direct routes (routes w/o gateway):
+ * NHF_GATEWAY is NOT set.
+ * nh_ifp denotes the logical transmit interface ().
+ * nh_aifp is the same as nh_ifp
+ * gw_sa contains AF_LINK sa with nh_aifp ifindex (compat)
+ * Loopback routes:
+ * NHF_GATEWAY is NOT set.
+ * nh_ifp points to the loopback interface (lo0).
+ * nh_aifp points to the interface where the destination address belongs to.
+ * This is useful in IPv6 link-local-over-loopback communications.
+ * gw_sa contains AF_LINK sa with nh_aifp ifindex (compat)
+ * GW routes:
+ * NHF_GATEWAY is set.
+ * nh_ifp denotes the logical transmit interface.
+ * nh_aifp is the same as nh_ifp
+ * gw_sa contains L3 address (either AF_INET or AF_INET6).
+ *
+ *
+ * Note: struct nhop_object fields are ordered in a way that
+ * supports memcmp-based comparisons.
+ *
+ */
+#define NHOP_END_CMP (__offsetof(struct nhop_object, nh_pksent))
+
+struct nhop_object {
+ uint16_t nh_flags; /* nhop flags */
+ uint16_t nh_mtu; /* nexthop mtu */
+ union {
+ struct sockaddr_in gw4_sa; /* GW accessor as IPv4 */
+ struct sockaddr_in6 gw6_sa; /* GW accessor as IPv6 */
+ struct sockaddr gw_sa;
+ struct sockaddr_dl_short gwl_sa; /* AF_LINK gw (compat) */
+ char gw_buf[28];
+ };
+ struct ifnet *nh_ifp; /* Logical egress interface. Always != NULL */
+ struct ifaddr *nh_ifa; /* interface address to use. Always != NULL */
+ struct ifnet *nh_aifp; /* ifnet of the source address. Always != NULL */
+ counter_u64_t nh_pksent; /* packets sent using this nhop */
+ /* 32 bytes + 4xPTR == 64(amd64) / 48(i386) */
+ uint8_t nh_prepend_len; /* length of prepend data */
+ uint8_t spare[3];
+ uint32_t spare1; /* alignment */
+ char nh_prepend[48]; /* L2 prepend */
+ struct nhop_priv *nh_priv; /* control plane data */
+ /* -- 128 bytes -- */
+};
+
+/*
+ * Nhop validness.
+ *
+ * Currently we verify whether link is up or not on every packet, which can be
+ * quite costy.
+ * TODO: subscribe for the interface notifications and update the nexthops
+ * with NHF_INVALID flag.
+ */
+
+//#define NH_IS_VALID(_nh) (((_nh)->nh_flags & NHF_INVALID) == 0)
+#define NH_IS_VALID(_nh) RT_LINK_IS_UP((_nh)->nh_ifp)
+#define NH_IS_MULTIPATH(_nh) ((_nh)->nh_flags & NHF_MULTIPATH)
+
+#define RT_GATEWAY(_rt) ((struct sockaddr *)&(_rt)->rt_nhop->gw4_sa)
+#define RT_GATEWAY_CONST(_rt) ((const struct sockaddr *)&(_rt)->rt_nhop->gw4_sa)
+
+#define NH_FREE(_nh) do { \
+ nhop_free_object(_nh); \
+ /* guard against invalid refs */ \
+ _nh = NULL; \
+} while (0)
+
+
+void nhop_free_object(struct nhop_object *nh);
+
+struct sysctl_req;
+struct sockaddr_dl;
+struct rib_head;
+
+uint32_t nhop_get_idx(const struct nhop_object *nh);
+void nhop_free(struct nhop_object *nh);
+
+int nhops_dump_sysctl(struct rib_head *rh, struct sysctl_req *w);
+
+#endif
+
+/* Kernel <> userland structures */
+
+struct nhop_external {
+ uint32_t nh_len; /* length of the datastructure */
+ uint32_t nh_idx; /* Nexthop index */
+ uint32_t nh_fib; /* Fib nexhop is attached to */
+ uint32_t ifindex; /* transmit interface ifindex */
+ uint32_t aifindex; /* address ifindex */
+ uint8_t prepend_len; /* length of the prepend */
+ uint8_t nh_family; /* address family */
+ uint16_t nh_type; /* nexthop type */
+ uint16_t nh_mtu; /* nexthop mtu */
+
+ uint16_t nh_flags; /* nhop flags */
+ struct in_addr nh_addr; /* GW/DST IPv4 address */
+ struct in_addr nh_src; /* default source IPv4 address */
+ uint64_t nh_pksent;
+ /* control plane */
+ /* lookup key: address, family, type */
+ char nh_prepend[64]; /* L2 prepend */
+ uint64_t nh_refcount; /* number of references */
+};
+
+struct nhop_addrs {
+ uint32_t na_len; /* length of the datastructure */
+ uint16_t gw_sa_off; /* offset of gateway SA */
+ uint16_t src_sa_off; /* offset of src address SA */
+};
+
+struct mpath_nhop_external {
+ uint32_t nh_idx;
+ uint32_t nh_weight;
+};
+
+struct mpath_external {
+ uint32_t mp_idx;
+ uint32_t mp_refcount;
+ uint32_t mp_nh_count;
+ uint32_t mp_group_size;
+};
+
+
+#endif
+
+
Index: sys/net/route/nhop.c
===================================================================
--- /dev/null
+++ sys/net/route/nhop.c
@@ -0,0 +1,349 @@
+/*-
+ * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
+ *
+ * Copyright (c) 2020 Alexander V. Chernikov
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * $FreeBSD$
+ */
+
+#include <sys/cdefs.h>
+#include "opt_inet.h"
+#include "opt_route.h"
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/lock.h>
+#include <sys/rwlock.h>
+#include <sys/malloc.h>
+#include <sys/mbuf.h>
+#include <sys/socket.h>
+#include <sys/kernel.h>
+
+#include <net/if.h>
+#include <net/if_var.h>
+#include <net/route.h>
+#include <net/route_var.h>
+#include <net/route/nhop_utils.h>
+#include <net/route/nhop.h>
+#include <net/route/nhop_var.h>
+#include <net/route/shared.h>
+#include <net/vnet.h>
+
+/*
+ * This file contains data structures management logic for the nexthop ("nhop")
+ * route subsystem.
+ *
+ * Nexthops in the original sense are the objects containing all the necessary
+ * information to forward the packet to the selected destination.
+ * In particular, nexthop is defined by a combination of
+ * ifp, ifa, aifp, mtu, gw addr(if set), nh_type, nh_family, mask of rt_flags and
+ * NHF_DEFAULT
+ *
+ * All nexthops are stored in the resizable hash table.
+ * Additionally, each nexthop gets assigned its unique index (nexthop index)
+ * so userland programs can interact with the nexthops easier. Index allocation
+ * is backed by the bitmask array.
+ */
+
+static MALLOC_DEFINE(M_NHOP, "nhops", "nexthops data");
+
+
+/* Hash management functions */
+
+int
+nhops_init_rib(struct rib_head *rh)
+{
+ struct nh_control *ctl;
+ size_t alloc_size;
+ uint32_t num_buckets, num_items;
+ void *ptr;
+
+ ctl = malloc(sizeof(struct nh_control), M_NHOP, M_WAITOK | M_ZERO);
+
+ /*
+ * Allocate nexthop hash. Start with 16 items by default (128 bytes).
+ * This will be enough for most of the cases.
+ */
+ num_buckets = 16;
+ alloc_size = CHT_SLIST_GET_RESIZE_SIZE(num_buckets);
+ ptr = malloc(alloc_size, M_NHOP, M_WAITOK | M_ZERO);
+ CHT_SLIST_INIT(&ctl->nh_head, ptr, num_buckets);
+
+ /*
+ * Allocate nexthop index bitmask.
+ */
+ num_items = 128 * 8; /* 128 bytes */
+ ptr = malloc(bitmask_get_size(num_items), M_NHOP, M_WAITOK | M_ZERO);
+ bitmask_init(&ctl->nh_idx_head, ptr, num_items);
+
+ NHOPS_LOCK_INIT(ctl);
+
+ rh->nh_control = ctl;
+ ctl->rh = rh;
+
+ DPRINTF("NHOPS init for fib %u af %u: ctl %p rh %p", rh->rib_fibnum,
+ rh->rib_family, ctl, rh);
+
+ return (0);
+}
+
+void
+nhops_destroy_rib(struct rib_head *rh)
+{
+ struct nh_control *ctl;
+ struct nhop_priv *nh_priv;
+
+ ctl = rh->nh_control;
+
+ /*
+ * All routes should have been deleted in rt_table_destroy().
+ * However, TCP stack or other consumers may store referenced
+ * nexthop pointers. When these references go to zero,
+ * nhop_free_object() will try to unlink these records from the
+ * datastructures, most likely leading to panic.
+ *
+ * Avoid that by explicitly marking all of the remaining
+ * nexthops as unlinked.
+ */
+
+ NHOPS_RLOCK(ctl);
+ CHT_SLIST_FOREACH(&ctl->nh_head, nhops, nh_priv) {
+ DPRINTF("Unlinking referenced nhop %u", nh_priv->nh_idx);
+ NH_PRIV_LOCK(nh_priv);
+ nh_priv->nh_control = NULL;
+ nh_priv->nh_idx = 0;
+ NH_PRIV_UNLOCK(nh_priv);
+ } CHT_SLIST_FOREACH_END;
+ NHOPS_RUNLOCK(ctl);
+
+ free(ctl->nh_head.ptr, M_NHOP);
+ free(ctl->nh_idx_head.idx, M_NHOP);
+ free(ctl, M_NHOP);
+}
+
+/*
+ * Nexthops distribution:
+ *
+ * 2 "mandatory" nexthops per interface ("interface route", "loopback").
+ * For direct peering: 1 nexthop for the peering router per ifp/af.
+ * For Ix-like peering: tens to hundreds nexthops of neghbors per ifp/af.
+ * IGP control plane & broadcast segment: tens of nexthops per ifp/af.
+ *
+ * With that in mind, hash nexthops by the combination of the interface
+ * and GW IP address.
+ */
+struct _hash_data {
+ uint16_t ifindex;
+ uint8_t family;
+ uint8_t nh_type;
+ uint32_t gw_addr;
+};
+
+static uint32_t
+hash_priv(const struct nhop_priv *priv)
+{
+ struct nhop_object *nh;
+ uint16_t ifindex;
+ struct _hash_data key;
+
+ nh = priv->nh;
+ ifindex = nh->nh_ifp->if_index & 0xFFFF;
+ memset(&key, 0, sizeof(key));
+
+ key.ifindex = ifindex;
+ key.family = nh->gw_sa.sa_family;
+ key.nh_type = priv->nh_type & 0xFF;
+ if (nh->gw_sa.sa_family == AF_INET6)
+ memcpy(&key.gw_addr, &nh->gw6_sa.sin6_addr.s6_addr32[3], 4);
+ else if (nh->gw_sa.sa_family == AF_INET)
+ memcpy(&key.gw_addr, &nh->gw4_sa.sin_addr, 4);
+
+ return (uint32_t)(djb_hash((const unsigned char *)&key, sizeof(key)));
+}
+
+/*
+ * Checks if hash needs resizing and performs this resize if necessary
+ *
+ */
+static void
+consider_resize(struct nh_control *ctl, uint32_t new_nh_buckets, uint32_t new_idx_items)
+{
+ void *nh_ptr, *nh_idx_ptr;
+ void *old_idx_ptr;
+ size_t alloc_size;
+
+ nh_ptr = NULL;
+ if (new_nh_buckets != 0) {
+ alloc_size = CHT_SLIST_GET_RESIZE_SIZE(new_nh_buckets);
+ nh_ptr = malloc(alloc_size, M_NHOP, M_NOWAIT | M_ZERO);
+ }
+
+ nh_idx_ptr = NULL;
+ if (new_idx_items != 0) {
+ alloc_size = bitmask_get_size(new_idx_items);
+ nh_idx_ptr = malloc(alloc_size, M_NHOP, M_NOWAIT | M_ZERO);
+ }
+
+ if (nh_ptr == NULL && nh_idx_ptr == NULL) {
+ /* Either resize is not required or allocations have failed. */
+ return;
+ }
+
+ DPRINTF("going to resize: nh:[ptr:%p sz:%u] idx:[ptr:%p sz:%u]", nh_ptr,
+ new_nh_buckets, nh_idx_ptr, new_idx_items);
+
+ old_idx_ptr = NULL;
+
+ NHOPS_WLOCK(ctl);
+ if (nh_ptr != NULL) {
+ CHT_SLIST_RESIZE(&ctl->nh_head, nhops, nh_ptr, new_nh_buckets);
+ }
+ if (nh_idx_ptr != NULL) {
+ if (bitmask_copy(&ctl->nh_idx_head, nh_idx_ptr, new_idx_items))
+ bitmask_swap(&ctl->nh_idx_head, nh_idx_ptr, new_idx_items, &old_idx_ptr);
+ }
+ NHOPS_WUNLOCK(ctl);
+
+ if (nh_ptr != NULL)
+ free(nh_ptr, M_NHOP);
+ if (old_idx_ptr != NULL)
+ free(old_idx_ptr, M_NHOP);
+}
+
+/*
+ * Links nextop @nh_priv to the nexhop hash table and allocates
+ * nexhop index.
+ * Returns allocated index or 0 on failure.
+ */
+int
+link_nhop(struct nh_control *ctl, struct nhop_priv *nh_priv)
+{
+ uint16_t idx;
+ uint32_t new_num_buckets, new_num_items;
+
+ KASSERT((nh_priv->nh_idx == 0), ("nhop index is already allocated"));
+ NHOPS_WLOCK(ctl);
+
+ /*
+ * Check if we need to resize hash and index.
+ * The following 2 functions returns either new size or 0
+ * if resize is not required.
+ */
+ new_num_buckets = CHT_SLIST_GET_RESIZE_BUCKETS(&ctl->nh_head);
+ new_num_items = bitmask_get_resize_items(&ctl->nh_idx_head);
+
+ if (bitmask_alloc_idx(&ctl->nh_idx_head, &idx) != 0) {
+ NHOPS_WUNLOCK(ctl);
+ DPRINTF("Unable to allocate nhop index");
+ RTSTAT_INC(rts_nh_idx_alloc_failure);
+ consider_resize(ctl, new_num_buckets, new_num_items);
+ return (0);
+ }
+
+ nh_priv->nh_idx = idx;
+ nh_priv->nh_control = ctl;
+ CHT_SLIST_INSERT_HEAD(&ctl->nh_head, nhops, nh_priv);
+
+ NHOPS_WUNLOCK(ctl);
+
+ DPRINTF("Linked nhop priv %p to %d, hash %u, ctl %p", nh_priv, idx,
+ hash_priv(nh_priv), ctl);
+ consider_resize(ctl, new_num_buckets, new_num_items);
+
+ return (idx);
+}
+
+/*
+ * Unlinks nexthop specified by @nh_priv data from the hash.
+ *
+ * Returns found nexthop or NULL.
+ */
+struct nhop_priv *
+unlink_nhop(struct nh_control *ctl, struct nhop_priv *nh_priv_del)
+{
+ struct nhop_priv *priv_ret;
+ int idx;
+ uint32_t new_num_buckets, new_num_items;
+
+ idx = 0;
+
+ NHOPS_WLOCK(ctl);
+ CHT_SLIST_REMOVE_BYOBJ(&ctl->nh_head, nhops, nh_priv_del, priv_ret);
+
+ if (priv_ret != NULL) {
+ NH_PRIV_LOCK(priv_ret);
+ idx = priv_ret->nh_idx;
+ priv_ret->nh_idx = 0;
+ priv_ret->nh_control = NULL;
+ NH_PRIV_UNLOCK(priv_ret);
+
+ KASSERT((idx != 0), ("bogus nhop index 0"));
+ if ((bitmask_free_idx(&ctl->nh_idx_head, idx)) != 0) {
+ DPRINTF("Unable to remove index %d from fib %u af %d",
+ idx, ctl->rh->rib_fibnum, ctl->rh->rib_family);
+ }
+ }
+
+ /* Check if hash or index needs to be resized */
+ new_num_buckets = CHT_SLIST_GET_RESIZE_BUCKETS(&ctl->nh_head);
+ new_num_items = bitmask_get_resize_items(&ctl->nh_idx_head);
+
+ NHOPS_WUNLOCK(ctl);
+
+ if (priv_ret == NULL)
+ DPRINTF("Unable to unlink nhop priv %p from hash, hash %u ctl %p",
+ nh_priv_del, hash_priv(nh_priv_del), ctl);
+ else
+ DPRINTF("Unlinked nhop %p priv idx %d", priv_ret, idx);
+
+ consider_resize(ctl, new_num_buckets, new_num_items);
+
+ return (priv_ret);
+}
+
+/*
+ * Searches for the nexthop by data specifcied in @nh_priv.
+ * Returns referenced nexthop or NULL.
+ */
+__noinline struct nhop_priv *
+find_nhop(struct nh_control *ctl, struct nhop_priv *nh_priv)
+{
+ struct nhop_priv *nh_priv_ret;
+
+ //DPRINTF("--- start search ---");
+ NHOPS_RLOCK(ctl);
+ CHT_SLIST_FIND_BYOBJ(&ctl->nh_head, nhops, nh_priv, nh_priv_ret);
+ if (nh_priv_ret != NULL) {
+ if (refcount_acquire_if_not_zero(&nh_priv_ret->nh_refcnt) == 0){
+ /* refcount was 0 -> nhop is being deleted */
+ nh_priv_ret = NULL;
+ }
+ }
+ NHOPS_RUNLOCK(ctl);
+
+ //if (nh_priv_ret == NULL)
+ // DPRINTF("--- end search (not found) ---");
+ return (nh_priv_ret);
+}
+
Index: sys/net/route/nhop_ctl.c
===================================================================
--- /dev/null
+++ sys/net/route/nhop_ctl.c
@@ -0,0 +1,621 @@
+/*-
+ * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
+ *
+ * Copyright (c) 2020 Alexander V. Chernikov
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * $FreeBSD$
+ */
+
+#include <sys/cdefs.h>
+#include "opt_inet.h"
+#include "opt_route.h"
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/lock.h>
+#include <sys/rwlock.h>
+#include <sys/malloc.h>
+#include <sys/socket.h>
+#include <sys/sysctl.h>
+#include <sys/kernel.h>
+#include <sys/epoch.h>
+
+#include <net/if.h>
+#include <net/if_var.h>
+#include <net/if_dl.h>
+#include <net/route.h>
+#include <net/route_var.h>
+#include <net/route/nhop_utils.h>
+#include <net/route/nhop.h>
+#include <net/route/nhop_var.h>
+#include <net/route/shared.h>
+#include <net/vnet.h>
+
+/*
+ * This file contains core functionality for the nexthop ("nhop") route subsystem.
+ * The business logic needed to create nexhop objects is implemented here.
+ *
+ * Nexthops in the original sense are the objects containing all the necessary
+ * information to forward the packet to the selected destination.
+ * In particular, nexthop is defined by a combination of
+ * ifp, ifa, aifp, mtu, gw addr(if set), nh_type, nh_family, mask of rt_flags and
+ * NHF_DEFAULT
+ *
+ * Additionally, each nexthop gets assigned its unique index (nexthop index).
+ * It serves two purposes: first one is to ease the ability of userland programs to
+ * reference nexthops by their index. The second one allows lookup algorithms to
+ * to store index instead of pointer (2 bytes vs 8) as a lookup result.
+ * All nexthops are stored in the resizable hash table.
+ *
+ * Basically, this file revolves around supporting 2 functions:
+ * 1) fill_nhop(), which contains all business logic on filling the nexthop fields
+ * based on the provided request
+ * 2) nhop_get(), which gets a nexthop based on the provided request.
+ *
+ * Conventions:
+ * 1) non-exported functions start with verb
+ * 2) exported function starts with the subsystem prefix: "nhop"
+ *
+ */
+
+static int fill_nhop(const struct nhop_request *req, struct nhop_object *nh);
+static int dump_nhop_entry(struct rib_head *rh, struct nhop_object *nh, struct sysctl_req *w);
+
+static struct nhop_priv *alloc_nhop_partial(const struct nhop_request *req);
+static int finalize_nhop(struct nhop_priv *nh_priv, const struct nhop_request *req);
+static struct ifnet *get_aifp(const struct nhop_request *req, int reference);
+static void fill_sdl_from_ifp(struct sockaddr_dl_short *sdl, const struct ifnet *ifp);
+
+static void destroy_nhop_epoch(epoch_context_t ctx);
+static void destroy_nhop(struct nhop_priv *nh_priv);
+
+static void print_nhop(const char *prefix, const struct nhop_object *nh);
+
+_Static_assert(__offsetof(struct nhop_object, nh_ifp) == 32,
+ "nhop_object: wrong nh_ifp offset");
+_Static_assert(sizeof(struct nhop_object) <= 128,
+ "nhop_object: size exceeds 128 bytes");
+
+static uma_zone_t nhops_zone; /* Global zone for each and every nexthop */
+
+
+#define NHOP_OBJECT_ALIGNED_SIZE roundup2(sizeof(struct nhop_object), \
+ 2 * CACHE_LINE_SIZE)
+#define NHOP_PRIV_ALIGNED_SIZE roundup2(sizeof(struct nhop_priv), \
+ 2 * CACHE_LINE_SIZE)
+void
+nhops_init(void)
+{
+
+ nhops_zone = uma_zcreate("routing nhops",
+ NHOP_OBJECT_ALIGNED_SIZE + NHOP_PRIV_ALIGNED_SIZE,
+ NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, 0);
+}
+
+/*
+ * Fetches the interface of source address used by the route.
+ * In all cases except interface-address-route it would be the
+ * same as the transmit interfaces.
+ * However, for the interface address this function will return
+ * this interface ifp instead of loopback. This is needed to support
+ * link-local IPv6 loopback communications.
+ *
+ * If @reference is non-zero, found ifp is referenced.
+ *
+ * Returns found ifp.
+ */
+static struct ifnet *
+get_aifp(const struct nhop_request *req, int reference)
+{
+ struct ifnet *aifp = NULL;
+ struct sockaddr_dl *sdl;
+ struct epoch_tracker et;
+
+ /*
+ * Adjust the "outgoing" interface. If we're going to loop
+ * the packet back to ourselves, the ifp would be the loopback
+ * interface. However, we'd rather know the interface associated
+ * to the destination address (which should probably be one of
+ * our own addresses.)
+ */
+ if ((req->ifp->if_flags & IFF_LOOPBACK) &&
+ req->gw->sa_family == AF_LINK) {
+ sdl = (struct sockaddr_dl *)req->gw;
+ NET_EPOCH_ENTER(et);
+ if (reference)
+ aifp = ifnet_byindex_ref(sdl->sdl_index);
+ else
+ aifp = ifnet_byindex(sdl->sdl_index);
+ NET_EPOCH_EXIT(et);
+ if (aifp == NULL) {
+ DPRINTF("unable to get aifp for %s index %d",
+ if_name(req->ifp), sdl->sdl_index);
+ }
+ }
+
+ if (aifp == NULL) {
+ aifp = req->ifp;
+ if (reference)
+ if_ref(aifp);
+ }
+
+ return (aifp);
+}
+
+int
+cmp_priv(const struct nhop_priv *_one, const struct nhop_priv *_two)
+{
+
+ if (memcmp(_one->nh, _two->nh, NHOP_END_CMP) != 0)
+ return (0);
+
+ if ((_one->nh_type != _two->nh_type) ||
+ (_one->nh_family != _two->nh_family))
+ return (0);
+
+ return (1);
+}
+
+/*
+ * Finds or creates new nhop_object based on @req.
+ * Returns referenced and linked nhop_object or NULL.
+ */
+__noinline struct nhop_object *
+nhop_get(struct rib_head *rh, const struct nhop_request *req)
+{
+ struct nh_control *ctl = rh->nh_control;
+ struct nhop_priv *nh_priv, *nh_tmp_priv;
+
+ /*
+ * In the cases with large amount of routes, most routes will
+ * share nexthops, making alloc-to-lookup ratio low.
+ *
+ * With that in mind, split nexthop allocation in two stages:
+ * first does the bare minimum to support the lookup for the
+ * existing nexthop.
+ * the second does the heavy-lifting with counters allocations
+ * and external objects refcounting.
+ */
+ nh_tmp_priv = alloc_nhop_partial(req);
+ if (nh_tmp_priv == NULL) {
+ RTSTAT_INC(rts_nh_alloc_failure);
+ DPRINTF("nh_alloc failed");
+ return (NULL);
+ }
+
+ nh_priv = find_nhop(ctl, nh_tmp_priv);
+ if (nh_priv != NULL) {
+ uma_zfree(nhops_zone, nh_tmp_priv->nh);
+ return (nh_priv->nh);
+ }
+
+ nh_priv = nh_tmp_priv;
+
+ if (finalize_nhop(nh_priv, req) != 0) {
+ RTSTAT_INC(rts_nh_alloc_failure);
+ DPRINTF("nh_alloc_finalize failed");
+ return (NULL);
+ }
+ if (link_nhop(ctl, nh_priv) == 0) {
+
+ /*
+ * Adding nexthop to the datastructures
+ * failed. Call destructor w/o waiting for
+ * the epoch end, as nexthop is not used
+ * and return.
+ */
+ DPRINTF("link_nhop failed!");
+ destroy_nhop(nh_priv);
+
+ return (NULL);
+ }
+
+ return (nh_priv->nh);
+}
+
+/*
+ * Fills in shorted link-level sockadd version suitable to be stored inside the
+ * nexthop gateway buffer.
+ */
+static void
+fill_sdl_from_ifp(struct sockaddr_dl_short *sdl, const struct ifnet *ifp)
+{
+
+ sdl->sdl_family = AF_LINK;
+ sdl->sdl_len = sizeof(struct sockaddr_dl_short);
+ sdl->sdl_index = ifp->if_index;
+ sdl->sdl_type = ifp->if_type;
+}
+
+/*
+ * Allocates nexthops and fills in the minimum amount of
+ * data to perform hash lookup.
+ * No external objects are referenced. Packet counters
+ * not allocated.
+ *
+ * Returns nh_priv pointer or NULL.
+ */
+static struct nhop_priv *
+alloc_nhop_partial(const struct nhop_request *req)
+{
+ struct nhop_object *nh;
+ struct nhop_priv *nh_priv;
+
+ KASSERT((req->mtu > 0), ("nh requested mtu is zero"));
+
+ nh = (struct nhop_object *)uma_zalloc(nhops_zone, M_NOWAIT | M_ZERO);
+ if (nh == NULL)
+ return (NULL);
+ nh_priv = (struct nhop_priv *)((char *)nh + NHOP_OBJECT_ALIGNED_SIZE);
+
+ nh->nh_priv = nh_priv;
+ nh_priv->nh = nh;
+
+ if (fill_nhop(req, nh) != 0) {
+ uma_zfree(nhops_zone, nh);
+ return (NULL);
+ }
+
+ /* calculate aifp, but don't reference it */
+ nh->nh_aifp = get_aifp(req, 0);
+
+ return (nh_priv);
+}
+
+/*
+ * Finalizes nexthop data to make nhop suitable for linking into the
+ * hash table.
+ * Returns 0 if successful,
+ * errno otherwise. @nh_priv is freed in case of error.
+ */
+static int
+finalize_nhop(struct nhop_priv *nh_priv, const struct nhop_request *req)
+{
+ struct nhop_object *nh;
+
+ nh = nh_priv->nh;
+
+ /* Allocate per-cpu packet counter */
+ nh->nh_pksent = counter_u64_alloc(M_NOWAIT);
+ if (nh->nh_pksent == NULL) {
+ uma_zfree(nhops_zone, nh);
+ return (ENOMEM);
+ }
+
+ /* Reference external objects and calculate (referenced) ifa */
+ if_ref(nh->nh_ifp);
+ ifa_ref(nh->nh_ifa);
+ nh->nh_aifp = get_aifp(req, 1);
+ DPRINTF("AIFP: %p req->ifp %p nh_ifp %p", nh->nh_aifp, req->ifp, nh->nh_ifp);
+
+ NH_PRIV_LOCK_INIT(nh_priv);
+ refcount_init(&nh_priv->nh_refcnt, 1);
+
+ print_nhop("FINALIZE", nh);
+
+ return (0);
+}
+
+static void
+print_nhop_sa(char *buf, size_t buflen, const struct sockaddr *sa)
+{
+
+ if (sa->sa_family == AF_INET) {
+ const struct sockaddr_in *sin4;
+ sin4 = (const struct sockaddr_in *)sa;
+ inet_ntop(AF_INET, &sin4->sin_addr, buf, buflen);
+ } else if (sa->sa_family == AF_INET6) {
+ const struct sockaddr_in6 *sin6;
+ sin6 = (const struct sockaddr_in6 *)sa;
+ inet_ntop(AF_INET6, &sin6->sin6_addr, buf, buflen);
+ } else if (sa->sa_family == AF_LINK) {
+ const struct sockaddr_dl *sdl;
+ sdl = (const struct sockaddr_dl *)sa;
+ snprintf(buf, buflen, "if#%d", sdl->sdl_index);
+ } else
+ snprintf(buf, buflen, "af:%d", sa->sa_family);
+}
+
+static void
+print_nhop(const char *prefix, const struct nhop_object *nh)
+{
+ char src_buf[INET6_ADDRSTRLEN], addr_buf[INET6_ADDRSTRLEN];
+
+ print_nhop_sa(src_buf, sizeof(src_buf), nh->nh_ifa->ifa_addr);
+ print_nhop_sa(addr_buf, sizeof(addr_buf), &nh->gw_sa);
+
+ DPRINTF("%s nhop priv %p: AF %d ifp %p %s addr %s src %p %s aifp %p %s mtu %d nh_flags %X",
+ prefix, nh->nh_priv, af, nh->nh_ifp, if_name(nh->nh_ifp), addr_buf,
+ nh->nh_ifa, src_buf, nh->nh_aifp, if_name(nh->nh_aifp), nh->nh_mtu,
+ nh->nh_flags);
+}
+
+static void
+destroy_nhop(struct nhop_priv *nh_priv)
+{
+ struct nhop_object *nh;
+
+ nh = nh_priv->nh;
+
+ NH_PRIV_LOCK(nh_priv);
+ print_nhop("DEL", nh);
+ NH_PRIV_UNLOCK(nh_priv);
+
+ if_rele(nh->nh_ifp);
+ if_rele(nh->nh_aifp);
+ ifa_free(nh->nh_ifa);
+ counter_u64_free(nh->nh_pksent);
+
+ uma_zfree(nhops_zone, nh);
+}
+
+/*
+ * Epoch callback indicating nhop is safe to destroy
+ */
+static void
+destroy_nhop_epoch(epoch_context_t ctx)
+{
+ struct nhop_priv *nh_priv;
+
+ nh_priv = __containerof(ctx, struct nhop_priv, nh_epoch_ctx);
+
+ destroy_nhop(nh_priv);
+}
+
+/*
+ * Fills @nh fields with the data supplied in the @req.
+ * Function does NOT fill in nh_aifp and does not take any reference.
+ * Returns 0 on success.
+ */
+static int
+fill_nhop(const struct nhop_request *req, struct nhop_object *nh)
+{
+ int rt_flags;
+
+ rt_flags = req->rt_flags & NHOP_RT_FLAG_MASK;
+
+ nh->nh_ifp = req->ifp;
+ nh->nh_mtu = req->mtu;
+ nh->nh_flags = fib_rte_to_nh_flags(rt_flags);
+ nh->nh_flags |= (req->nh_flags_additional & NHF_DEFAULT);
+ nh->nh_priv->rt_flags = rt_flags;
+ nh->nh_ifa = req->ifa;
+
+ if (req->rt_flags & RTF_GATEWAY) {
+ if (req->gw->sa_len > sizeof(struct sockaddr_in6)) {
+ DPRINTF("nhop SA size too big: AF %d len %u",
+ req->gw->sa_family, req->gw->sa_len);
+ return (ENOMEM);
+ }
+ memcpy(&nh->gw_sa, req->gw, req->gw->sa_len);
+ } else {
+ /*
+ * Interface route. Currently the route.c code adds
+ * empty sa of type AF_LINK, which is 56 bytes long.
+ * The only place where this data is used is the IPv6
+ * loopback output, where we need to preserve the original
+ * interface to maintain proper scoping.
+ * Current code stores original interface in the separate field
+ * (nh_aifp, see below). Given that, write fake empty SA
+ * with the request AF.
+ */
+ fill_sdl_from_ifp(&nh->gwl_sa, req->ifp);
+ }
+
+ nh->nh_priv->nh_family = req->family;
+ nh->nh_priv->nh_type = req->nh_type;
+
+ return (0);
+}
+
+int
+nhop_ref_object(struct nhop_object *nh)
+{
+
+ return (refcount_acquire_if_not_zero(&nh->nh_priv->nh_refcnt));
+}
+
+void
+nhop_free_object(struct nhop_object *nh)
+{
+ struct nh_control *ctl;
+ struct nhop_priv *nh_priv = nh->nh_priv;
+
+ if (!refcount_release(&nh_priv->nh_refcnt))
+ return;
+
+ NH_PRIV_LOCK(nh_priv);
+ ctl = nh_priv->nh_control;
+ /* Use nh_control as an indicator of linked/unlinked entry */
+ nh_priv->nh_control = NULL;
+ NH_PRIV_UNLOCK(nh_priv);
+
+ if (ctl != NULL) {
+ if (unlink_nhop(ctl, nh_priv) == NULL) {
+ /* Do not try to reclaim */
+ DPRINTF("Failed to find nexhop %p", nh_priv);
+ return;
+ }
+ }
+
+ epoch_call(net_epoch_preempt, destroy_nhop_epoch,
+ &nh_priv->nh_epoch_ctx);
+}
+
+int
+nhop_ref_any(struct nhop_object *nh)
+{
+
+ return (nhop_ref_object(nh));
+}
+
+void
+nhop_free_any(struct nhop_object *nh)
+{
+
+ nhop_free_object(nh);
+}
+
+
+/* Helper functions */
+
+uint32_t
+nhop_get_idx(const struct nhop_object *nh)
+{
+
+ return (nh->nh_priv->nh_idx);
+}
+
+void
+nhops_update_ifmtu(struct rib_head *rh, struct ifnet *ifp, uint32_t mtu)
+{
+ struct nh_control *ctl;
+ struct nhop_priv *nh_priv;
+ struct nhop_object *nh;
+
+ ctl = rh->nh_control;
+
+ NHOPS_RLOCK(ctl);
+ CHT_SLIST_FOREACH(&ctl->nh_head, nhops, nh_priv) {
+ nh = nh_priv->nh;
+ if (nh->nh_ifp == ifp) {
+ if ((nh_priv->rt_flags & RTF_FIXEDMTU) == 0 ||
+ nh->nh_mtu > mtu) {
+ /* Update */
+ NH_PRIV_LOCK(nh_priv);
+ nh->nh_mtu = mtu;
+ NH_PRIV_UNLOCK(nh_priv);
+ }
+ }
+ } CHT_SLIST_FOREACH_END;
+ NHOPS_RUNLOCK(ctl);
+
+}
+
+/*
+ * Dumps a single entry to sysctl buffer.
+ *
+ * Layout:
+ * rt_msghdr - generic RTM header to allow users to skip non-understood messages
+ * nhop_external - nexhop description structure (with length)
+ * nhop_addrs - structure encapsulating GW/SRC sockaddrs
+ */
+static int
+dump_nhop_entry(struct rib_head *rh, struct nhop_object *nh, struct sysctl_req *w)
+{
+ struct {
+ struct rt_msghdr rtm;
+ struct nhop_external nhe;
+ struct nhop_addrs na;
+ } arpc;
+ struct nhop_external *pnhe;
+ struct sockaddr *gw_sa, *src_sa;
+ struct sockaddr_storage ss;
+ size_t addrs_len;
+ int error;
+
+ //DPRINTF("Dumping: head %p nh %p flags %X req %p\n", rh, nh, nh->nh_flags, w);
+
+ memset(&arpc, 0, sizeof(arpc));
+
+ arpc.rtm.rtm_msglen = sizeof(arpc);
+ arpc.rtm.rtm_version = RTM_VERSION;
+ arpc.rtm.rtm_type = RTM_GET;
+ //arpc.rtm.rtm_flags = RTF_UP;
+ arpc.rtm.rtm_flags = nh->nh_priv->rt_flags;
+
+ /* nhop_external */
+ pnhe = &arpc.nhe;
+ pnhe->nh_len = sizeof(struct nhop_external);
+ pnhe->nh_idx = nh->nh_priv->nh_idx;
+ pnhe->nh_fib = rh->rib_fibnum;
+ pnhe->ifindex = nh->nh_ifp->if_index;
+ pnhe->aifindex = nh->nh_aifp->if_index;
+ pnhe->nh_family = nh->nh_priv->nh_family;
+ pnhe->nh_type = nh->nh_priv->nh_type;
+ pnhe->nh_mtu = nh->nh_mtu;
+ pnhe->nh_flags = nh->nh_flags;
+
+ memcpy(pnhe->nh_prepend, nh->nh_prepend, sizeof(nh->nh_prepend));
+ pnhe->prepend_len = nh->nh_prepend_len;
+ pnhe->nh_refcount = nh->nh_priv->nh_refcnt;
+ pnhe->nh_pksent = counter_u64_fetch(nh->nh_pksent);
+
+ /* sockaddr container */
+ addrs_len = sizeof(struct nhop_addrs);
+ arpc.na.gw_sa_off = addrs_len;
+ gw_sa = (struct sockaddr *)&nh->gw4_sa;
+ addrs_len += gw_sa->sa_len;
+
+ src_sa = nh->nh_ifa->ifa_addr;
+ if (src_sa->sa_family == AF_LINK) {
+ /* Shorten structure */
+ memset(&ss, 0, sizeof(struct sockaddr_storage));
+ fill_sdl_from_ifp((struct sockaddr_dl_short *)&ss,
+ nh->nh_ifa->ifa_ifp);
+ src_sa = (struct sockaddr *)&ss;
+ }
+ arpc.na.src_sa_off = addrs_len;
+ addrs_len += src_sa->sa_len;
+
+ /* Write total length */
+ arpc.na.na_len = addrs_len;
+
+ arpc.rtm.rtm_msglen += arpc.na.na_len - sizeof(struct nhop_addrs);
+
+ error = SYSCTL_OUT(w, &arpc, sizeof(arpc));
+ if (error == 0)
+ error = SYSCTL_OUT(w, gw_sa, gw_sa->sa_len);
+ if (error == 0)
+ error = SYSCTL_OUT(w, src_sa, src_sa->sa_len);
+
+ /*
+ DPRINTF("Exported %d ifindex %d family %d type %d error %d\n", nh->nh_priv->nh_idx, pnhe->ifindex,
+ pnhe->nh_family, pnhe->nh_type, error);
+ */
+
+ return (error);
+}
+
+int
+nhops_dump_sysctl(struct rib_head *rh, struct sysctl_req *w)
+{
+ struct nh_control *ctl;
+ struct nhop_priv *nh_priv;
+ int error;
+
+ ctl = rh->nh_control;
+
+ NHOPS_RLOCK(ctl);
+ DPRINTF("NHDUMP: count=%u", ctl->nh_head.items_count);
+ CHT_SLIST_FOREACH(&ctl->nh_head, nhops, nh_priv) {
+ error = dump_nhop_entry(rh, nh_priv->nh, w);
+ if (error != 0)
+ return (error);
+ } CHT_SLIST_FOREACH_END;
+ NHOPS_RUNLOCK(ctl);
+
+ return (0);
+}
+
Index: sys/net/route/nhop_utils.h
===================================================================
--- /dev/null
+++ sys/net/route/nhop_utils.h
@@ -0,0 +1,200 @@
+/*-
+ * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
+ *
+ * Copyright (c) 2020 Alexander V. Chernikov
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * $FreeBSD$
+ */
+
+#ifndef _NET_ROUTE_NHOP_UTILS_H_
+#define _NET_ROUTE_NHOP_UTILS_H_
+
+/* Chained hash table */
+struct _cht_head {
+ uint32_t hash_size;
+ uint32_t items_count;
+ void **ptr;
+};
+
+static inline uint32_t
+_cht_get_resize_size(const struct _cht_head *head)
+{
+ uint32_t new_size = 0;
+
+ if ((head->items_count * 2 > head->hash_size) && (head->hash_size < 65536))
+ new_size = head->hash_size * 2;
+ else if ((head->items_count * 4 < head->hash_size) && head->hash_size > 16)
+ new_size = head->hash_size / 2;
+
+ return (new_size);
+}
+
+static inline int
+_cht_need_resize(const struct _cht_head *head)
+{
+
+ return (_cht_get_resize_size(head) > 0);
+}
+
+
+#ifndef typeof
+#define typeof __typeof
+#endif
+
+#define CHT_SLIST_NEED_RESIZE(_head) \
+ _cht_need_resize((const struct _cht_head *)(_head))
+#define CHT_SLIST_GET_RESIZE_BUCKETS(_head) \
+ _cht_get_resize_size((const struct _cht_head *)(_head))
+#define CHT_SLIST_GET_RESIZE_SIZE(_buckets) ((_buckets) * sizeof(void *))
+
+#define CHT_SLIST_DEFINE(_HNAME, _ITEM_TYPE) \
+struct _HNAME##_head { \
+ uint32_t hash_size; \
+ uint32_t items_count; \
+ _ITEM_TYPE **ptr; \
+}
+
+#define CHT_SLIST_INIT(_head, _ptr, _num_buckets) \
+ (_head)->hash_size = _num_buckets; \
+ (_head)->items_count = 0; \
+ (_head)->ptr = _ptr;
+
+/* Default hash method for constant-size keys */
+
+#define CHT_GET_BUCK(_head, _PX, _key) _PX##_hash_key(_key) & ((_head)->hash_size - 1)
+#define CHT_GET_BUCK_OBJ(_head, _PX, _obj) _PX##_hash_obj(_obj) & ((_head)->hash_size - 1)
+
+#define CHT_FIRST(_head, idx) _CHT_FIRST((_head)->ptr, idx)
+#define _CHT_FIRST(_ptr, idx) (_ptr)[idx]
+
+#define CHT_SLIST_FIND(_head, _PX, _key, _ret) do { \
+ uint32_t _buck = CHT_GET_BUCK(_head, _PX, _key); \
+ _ret = CHT_FIRST(_head, _buck); \
+ for ( ; _ret != NULL; _ret = _PX##_next(_ret)) { \
+ if (_PX##_cmp(_key, (_ret))) \
+ break; \
+ } \
+} while(0)
+
+/*
+ * hash_obj, nhop_cmp
+ */
+#define CHT_SLIST_FIND_BYOBJ(_head, _PX, _obj, _ret) do { \
+ uint32_t _buck = CHT_GET_BUCK_OBJ(_head, _PX, _obj); \
+ _ret = CHT_FIRST(_head, _buck); \
+ for ( ; _ret != NULL; _ret = _PX##_next(_ret)) { \
+ if (_PX##_cmp(_obj, _ret)) \
+ break; \
+ } \
+} while(0)
+
+#define CHT_SLIST_INSERT_HEAD(_head, _PX, _obj) do { \
+ uint32_t _buck = CHT_GET_BUCK_OBJ(_head, _PX, _obj); \
+ _PX##_next(_obj) = CHT_FIRST(_head, _buck); \
+ CHT_FIRST(_head, _buck) = _obj; \
+ (_head)->items_count++; \
+} while(0)
+
+#define CHT_SLIST_REMOVE(_head, _PX, _key, _ret) do { \
+ typeof(*(_head)->ptr) _tmp; \
+ uint32_t _buck = CHT_GET_BUCK(_head, _PX, _key); \
+ _ret = CHT_FIRST(_head, _buck); \
+ _tmp = NULL; \
+ for ( ; _ret != NULL; _tmp = _ret, _ret = _PX##_next(_ret)) { \
+ if (_PX##_cmp(_key, _ret)) \
+ break; \
+ } \
+ if (_ret != NULL) { \
+ if (_tmp == NULL) \
+ CHT_FIRST(_head, _buck) = _PX##_next(_ret); \
+ else \
+ _PX##_next(_tmp) = _PX##_next(_ret); \
+ (_head)->items_count--; \
+ } \
+} while(0)
+
+#define CHT_SLIST_REMOVE_BYOBJ(_head, _PX, _obj, _ret) do { \
+ typeof(*(_head)->ptr) _tmp; \
+ uint32_t _buck = CHT_GET_BUCK_OBJ(_head, _PX, _obj); \
+ _ret = CHT_FIRST(_head, _buck); \
+ _tmp = NULL; \
+ for ( ; _ret != NULL; _tmp = _ret, _ret = _PX##_next(_ret)) { \
+ if (_PX##_cmp(_obj, _ret)) \
+ break; \
+ } \
+ if (_ret != NULL) { \
+ if (_tmp == NULL) \
+ CHT_FIRST(_head, _buck) = _PX##_next(_ret); \
+ else \
+ _PX##_next(_tmp) = _PX##_next(_ret); \
+ (_head)->items_count--; \
+ } \
+} while(0)
+
+
+#define CHT_SLIST_FOREACH(_head, _PX, _x) \
+ for (uint32_t _i = 0; _i < (_head)->hash_size; _i++) { \
+ for (_x = CHT_FIRST(_head, _i); _x; _x = _PX##_next(_x))
+
+#define CHT_SLIST_FOREACH_END }
+
+#define CHT_SLIST_RESIZE(_head, _PX, _new_void_ptr, _new_hsize) \
+ uint32_t _new_idx; \
+ typeof((_head)->ptr) _new_ptr = (void *)_new_void_ptr; \
+ typeof(*(_head)->ptr) _x, _y; \
+ for (uint32_t _old_idx = 0; _old_idx < (_head)->hash_size; _old_idx++) {\
+ _x = CHT_FIRST(_head, _old_idx); \
+ _y = _x; \
+ while (_y != NULL) { \
+ _y = _PX##_next(_x); \
+ _new_idx = _PX##_hash_obj(_x) & (_new_hsize - 1);\
+ _PX##_next(_x) = _CHT_FIRST(_new_ptr, _new_idx);\
+ _CHT_FIRST(_new_ptr, _new_idx) = _x; \
+ _x = _y; \
+ } \
+ } \
+ (_head)->hash_size = _new_hsize; \
+ _new_void_ptr = (void *)(_head)->ptr; \
+ (_head)->ptr = _new_ptr;
+
+/* bitmasks */
+
+struct bitmask_head {
+ uint16_t free_off; /* index of the first potentially free block */
+ uint16_t blocks; /* number of 4/8-byte blocks in the index */
+ uint32_t items_count; /* total number of items */
+ u_long *idx;
+};
+
+size_t bitmask_get_size(uint32_t items);
+uint32_t bitmask_get_resize_items(const struct bitmask_head *nh);
+int bitmask_should_resize(const struct bitmask_head *bh);
+void bitmask_swap(struct bitmask_head *bh, void *new_idx, uint32_t new_items, void **pidx);
+void bitmask_init(struct bitmask_head *bh, void *idx, uint32_t num_items);
+int bitmask_copy(const struct bitmask_head *bi, void *new_idx, uint32_t new_items);
+int bitmask_alloc_idx(struct bitmask_head *bi, uint16_t *pidx);
+int bitmask_free_idx(struct bitmask_head *bi, uint16_t idx);
+
+#endif
+
Index: sys/net/route/nhop_utils.c
===================================================================
--- /dev/null
+++ sys/net/route/nhop_utils.c
@@ -0,0 +1,220 @@
+/*-
+ * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
+ *
+ * Copyright (c) 2020 Alexander V. Chernikov
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * $FreeBSD$
+ */
+
+#include <sys/cdefs.h>
+#include "opt_inet.h"
+#include "opt_route.h"
+#include "opt_mpath.h"
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/lock.h>
+#include <sys/malloc.h>
+#include <sys/mbuf.h>
+#include <sys/kernel.h>
+
+#include <net/route/nhop_utils.h>
+
+#define BLOCK_ITEMS (8 * sizeof(u_long)) /* Number of items for ffsl() */
+
+#define _BLOCKS_TO_SZ(_blocks) ((size_t)(_blocks) * sizeof(u_long))
+#define _BLOCKS_TO_ITEMS(_blocks) ((uint32_t)(_blocks) * BLOCK_ITEMS)
+#define _ITEMS_TO_BLOCKS(_items) ((_items) / BLOCK_ITEMS)
+
+
+static void _bitmask_init_idx(void *index, uint32_t items);
+
+void
+bitmask_init(struct bitmask_head *bh, void *idx, uint32_t num_items)
+{
+
+ if (idx != NULL)
+ _bitmask_init_idx(idx, num_items);
+
+ memset(bh, 0, sizeof(struct bitmask_head));
+ bh->blocks = _ITEMS_TO_BLOCKS(num_items);
+ bh->idx = (u_long *)idx;
+}
+
+uint32_t
+bitmask_get_resize_items(const struct bitmask_head *bh)
+{
+ if ((bh->items_count * 2 > _BLOCKS_TO_ITEMS(bh->blocks)) && bh->items_count < 65536)
+ return (_BLOCKS_TO_ITEMS(bh->blocks) * 2);
+
+ return (0);
+}
+
+int
+bitmask_should_resize(const struct bitmask_head *bh)
+{
+
+ return (bitmask_get_resize_items(bh) != 0);
+}
+
+#if 0
+uint32_t
+_bitmask_get_blocks(uint32_t items)
+{
+
+ return (items / BLOCK_ITEMS);
+}
+#endif
+
+size_t
+bitmask_get_size(uint32_t items)
+{
+#if _KERNEL
+ KASSERT((items % BLOCK_ITEMS) == 0,
+ ("bitmask size needs to power of 2 and greater or equal to %zu",
+ BLOCK_ITEMS));
+#else
+ assert((items % BLOCK_ITEMS) == 0);
+#endif
+
+ return (items / 8);
+}
+
+static void
+_bitmask_init_idx(void *_idx, uint32_t items)
+{
+ size_t size = bitmask_get_size(items);
+ u_long *idx = (u_long *)_idx;
+
+ /* Mark all as free */
+ memset(idx, 0xFF, size);
+ *idx &= ~(u_long)1; /* Always skip index 0 */
+}
+
+
+/*
+ * _try_merge api to allow shrinking?
+ */
+int
+bitmask_copy(const struct bitmask_head *bi, void *new_idx, uint32_t new_items)
+{
+ uint32_t new_blocks = _BLOCKS_TO_ITEMS(new_items);
+
+ _bitmask_init_idx(new_idx, new_items);
+
+ if (bi->blocks < new_blocks) {
+ /* extend current blocks */
+ if (bi->blocks > 0)
+ memcpy(new_idx, bi->idx, _BLOCKS_TO_SZ(bi->blocks));
+ return (0);
+ } else {
+ /* XXX: ensure all other blocks are non-zero */
+ for (int i = new_blocks; i < bi->blocks; i++) {
+ }
+
+ return (1);
+ }
+}
+
+void
+bitmask_swap(struct bitmask_head *bh, void *new_idx, uint32_t new_items, void **pidx)
+{
+ void *old_ptr;
+
+ old_ptr = bh->idx;
+
+ bh->idx = (u_long *)new_idx;
+ bh->blocks = _ITEMS_TO_BLOCKS(new_items);
+
+ if (pidx != NULL)
+ *pidx = old_ptr;
+}
+
+/*
+ * Allocate new index in given instance and stores in in @pidx.
+ * Returns 0 on success.
+ */
+int
+bitmask_alloc_idx(struct bitmask_head *bi, uint16_t *pidx)
+{
+ u_long *mask;
+ int i, off, v;
+
+ off = bi->free_off;
+ mask = &bi->idx[off];
+
+ for (i = off; i < bi->blocks; i++, mask++) {
+ if ((v = ffsl(*mask)) == 0)
+ continue;
+
+ /* Mark as busy */
+ *mask &= ~ ((u_long)1 << (v - 1));
+
+ bi->free_off = i;
+
+ v = BLOCK_ITEMS * i + v - 1;
+
+ *pidx = v;
+ bi->items_count++;
+ return (0);
+ }
+
+ return (1);
+}
+
+/*
+ * Removes index from given set.
+ * Returns 0 on success.
+ */
+int
+bitmask_free_idx(struct bitmask_head *bi, uint16_t idx)
+{
+ u_long *mask;
+ int i, v;
+
+ if (idx == 0)
+ return (1);
+
+ i = idx / BLOCK_ITEMS;
+ v = idx % BLOCK_ITEMS;
+
+ if (i >= bi->blocks)
+ return (1);
+
+ mask = &bi->idx[i];
+
+ if ((*mask & ((u_long)1 << v)) != 0)
+ return (1);
+
+ /* Mark as free */
+ *mask |= (u_long)1 << v;
+ bi->items_count--;
+
+ /* Update free offset */
+ if (bi->free_off > i)
+ bi->free_off = i;
+
+ return (0);
+}
+
Index: sys/net/route/nhop_var.h
===================================================================
--- /dev/null
+++ sys/net/route/nhop_var.h
@@ -0,0 +1,127 @@
+/*-
+ * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
+ *
+ * Copyright (c) 2020 Alexander V. Chernikov
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * $FreeBSD$
+ */
+
+/*
+ * This header file contains private definitions for nexthop routing.
+ *
+ * Header is not intended to be included by the code external to the
+ * routing subsystem.
+ */
+
+#ifndef _NET_ROUTE_NHOP_VAR_H_
+#define _NET_ROUTE_NHOP_VAR_H_
+
+static unsigned
+djb_hash(const unsigned char *h, const int len)
+{
+ unsigned int result = 0;
+ int i;
+
+ for (i = 0; i < len; i++)
+ result = 33 * result ^ h[i];
+
+ return (result);
+}
+
+/* define nhop hash table */
+struct nhop_priv;
+CHT_SLIST_DEFINE(nhops, struct nhop_priv);
+/* produce hash value for an object */
+#define nhops_hash_obj(_obj) hash_priv(_obj)
+/* compare two objects */
+#define nhops_cmp(_one, _two) cmp_priv(_one, _two)
+/* next object accessor */
+#define nhops_next(_obj) (_obj)->nh_next
+
+/* XXX: declare! */
+/* define mpath hash table */
+struct nhgrp_priv;
+CHT_SLIST_DEFINE(mpath, struct nhgrp_priv);
+
+
+struct nh_control {
+ struct nhops_head nh_head; /* hash table head */
+ struct bitmask_head nh_idx_head; /* nhop index head */
+ struct mpath_head gr_head; /* nhgrp hash table head */
+ struct bitmask_head gr_idx_head; /* nhgrp index head */
+ struct rwlock nhop_lock; /* overall ctl lock */
+ struct rib_head *rh; /* pointer back to rnh */
+};
+
+#define NHOPS_WLOCK(ctl) rw_wlock(&(ctl)->nhop_lock)
+#define NHOPS_RLOCK(ctl) rw_rlock(&(ctl)->nhop_lock)
+#define NHOPS_WUNLOCK(ctl) rw_wunlock(&(ctl)->nhop_lock)
+#define NHOPS_RUNLOCK(ctl) rw_runlock(&(ctl)->nhop_lock)
+#define NHOPS_LOCK_INIT(ctl) rw_init(&(ctl)->nhop_lock, "ctl")
+#define NHOPS_LOCK_DESTROY(ctl) rw_destroy(&(ctl)->nhop_lock)
+#define NHOPS_WLOCK_ASSERT(ctl) rw_assert(&(ctl)->nhop_lock, RA_WLOCKED)
+
+
+/* Control plane-only nhop data */
+struct nhop_object;
+struct nhop_priv {
+ uint32_t nh_idx; /* nexthop index */
+ uint8_t nh_family; /* address family of the lookup */
+ uint16_t nh_type; /* nexthop type */
+ void *cb_func; /* function handling additional rewrite caps */
+ u_int nh_refcnt; /* number of references */
+ int rt_flags; /* routing flags for the control plane */
+ struct nhop_object *nh; /* backreference to the dataplane nhop */
+ struct nh_control *nh_control; /* backreference to the rnh */
+ struct nhop_priv *nh_next; /* hash table membership */
+ struct mtx nh_mtx; /* mutex */
+ struct epoch_context nh_epoch_ctx; /* epoch data for nhop */
+};
+
+#define NH_PRIV_LOCK_INIT(_priv) mtx_init(&(_priv)->nh_mtx, "nhop", NULL, MTX_DEF)
+#define NH_PRIV_LOCK(_priv) mtx_lock(&(_priv)->nh_mtx)
+#define NH_PRIV_UNLOCK(_priv) mtx_unlock(&(_priv)->nh_mtx)
+#define NH_PRIV_LOCK_DESTROY(_priv) mtx_destroy(&(_priv)->nh_mtx)
+#define NH_PRIV_LOCK_ASSERT(_priv) mtx_assert(&(_priv)->nh_mtx, MA_OWNED)
+
+#define NH_LOCK(_nh) NH_PRIV_LOCK((_nh)->nh_priv)
+#define NH_UNLOCK(_nh) NH_PRIV_UNLOCK((_nh)->nh_priv)
+
+#define NH_IS_PINNED(_nh) ((_nh)->nh_priv->rt_flags & RTF_PINNED)
+
+/* nhop.c */
+struct nhop_priv *find_nhop(struct nh_control *ctl, struct nhop_priv *nh_priv);
+int link_nhop(struct nh_control *ctl, struct nhop_priv *nh_priv);
+struct nhop_priv *unlink_nhop(struct nh_control *ctl, struct nhop_priv *nh_priv);
+
+/* nhop_ctl.c */
+void free_nhop(struct nhop_priv *nh_priv);
+int cmp_priv(const struct nhop_priv *_one, const struct nhop_priv *_two);
+
+/* mpath */
+struct weightened_nhop;
+
+
+#endif
+
Index: sys/net/route/route_ctl.c
===================================================================
--- /dev/null
+++ sys/net/route/route_ctl.c
@@ -0,0 +1,290 @@
+/*-
+ * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
+ *
+ * Copyright (c) 2020 Alexander V. Chernikov
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * $FreeBSD$
+ */
+
+#include "opt_inet.h"
+#include "opt_inet6.h"
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/malloc.h>
+#include <sys/mbuf.h>
+#include <sys/socket.h>
+#include <sys/sysctl.h>
+#include <sys/syslog.h>
+#include <sys/kernel.h>
+#include <sys/lock.h>
+#include <sys/rmlock.h>
+
+#include <net/if.h>
+#include <net/if_var.h>
+#include <net/if_dl.h>
+#include <net/vnet.h>
+#include <net/route.h>
+#include <net/route_var.h>
+#include <net/route/nhop_utils.h>
+#include <net/route/nhop.h>
+#include <net/route/nhop_var.h>
+#include <net/route/shared.h>
+#include <netinet/in.h>
+
+#include <vm/uma.h>
+
+
+/*
+ * This file contains control plane routing tables functions.
+ *
+ * All functions assumes they are called in net epoch.
+ */
+
+static void set_req_mtu(const struct rt_addrinfo *info, struct nhop_request *req);
+
+static void fill_nh_request(struct rib_head *rnh, struct rt_addrinfo *info,
+ struct nhop_request *nh_req);
+static void fill_nh_request_from_nhop(const struct nhop_object *nh,
+ struct sockaddr_storage *gw_storage, struct nhop_request *nh_req);
+
+
+/*
+ * Sets @nh_req mtu data based on the @info data.
+ */
+static void
+set_req_mtu(const struct rt_addrinfo *info, struct nhop_request *nh_req)
+{
+
+ if (info->rti_mflags & RTV_MTU) {
+ if (info->rti_rmx->rmx_mtu != 0) {
+
+ /*
+ * MTU was explicitly provided by user.
+ * Keep it.
+ */
+ nh_req->rt_flags |= RTF_FIXEDMTU;
+ } else {
+
+ /*
+ * User explicitly sets MTU to 0.
+ * Assume rollback to default.
+ */
+ nh_req->rt_flags &= ~RTF_FIXEDMTU;
+ }
+ nh_req->mtu = info->rti_rmx->rmx_mtu;
+ }
+}
+
+
+/*
+ * Fills @nh_req based on the data provided in @info.
+ */
+static void
+fill_nh_request(struct rib_head *rnh, struct rt_addrinfo *info,
+ struct nhop_request *nh_req)
+{
+
+ bzero(nh_req, sizeof(struct nhop_request));
+ nh_req->ifp = info->rti_ifa->ifa_ifp;
+ nh_req->ifa = info->rti_ifa;
+ nh_req->gw = info->rti_info[RTAX_GATEWAY];
+ nh_req->family = info->rti_info[RTAX_DST]->sa_family;
+ nh_req->rt_flags = info->rti_flags; // fill original rt flags
+ nh_req->nh_type = 0; // hook responsibility to set nhop type
+ set_req_mtu(info, nh_req);
+}
+
+/*
+ * Fill @nh_req based on the real @nh.
+ */
+static void
+fill_nh_request_from_nhop(const struct nhop_object *nh,
+ struct sockaddr_storage *gw_storage, struct nhop_request *nh_req)
+{
+
+ memset(nh_req, 0, sizeof(struct nhop_request));
+ nh_req->ifp = nh->nh_ifp;
+ nh_req->ifa = nh->nh_ifa;
+ nh_req->family = nh->nh_priv->nh_family;
+ nh_req->mtu = nh->nh_mtu;
+ nh_req->rt_flags = nh->nh_priv->rt_flags;
+ nh_req->nh_type = nh->nh_priv->nh_type;
+
+ if (nh_req->rt_flags & RTF_GATEWAY) {
+ /* Assume size is already validated */
+ memcpy(gw_storage, &nh->gw4_sa, nh->gw4_sa.sin_len);
+ } else {
+ /* Nhop value is largerly ignored, set some random bits */
+ gw_storage->ss_len = 0;
+ }
+ nh_req->gw = (struct sockaddr *)gw_storage;
+}
+
+/*
+ * Update @nh_req request data based on the parameters supplied in @info.
+ * This is a helper function to support route changes.
+ *
+ * It limits the changes that can be done to the route to the following:
+ * 1) all combination of gateway changes (gw, interface, blackhole/reject)
+ * 2) route flags (FLAG[123],STATIC,BLACKHOLE,REJECT)
+ * 3) route MTU
+ *
+ * Assumes nh_req gw pointer has sockaddr_storage-sized pointer supplied
+ *
+ * Returns:
+ * 0 on success
+ */
+static int
+alter_nh_request(struct rt_addrinfo *info, u_int fibnum, struct nhop_request *nh_req)
+{
+
+ /* Update MTU if set in the request*/
+ set_req_mtu(info, nh_req);
+
+ /* XXX: allow only one of BLACKHOLE,REJECT,GATEWAY */
+
+ /* Allow some flags (FLAG1,STATIC,BLACKHOLE,REJECT) to be toggled on change. */
+ nh_req->rt_flags &= ~RTF_FMASK;
+ nh_req->rt_flags |= info->rti_flags & RTF_FMASK;
+
+ /* Consider gateway change */
+ struct sockaddr *info_gw = info->rti_info[RTAX_GATEWAY];
+
+ if (info_gw != NULL) {
+ nh_req->ifa = info->rti_ifa;
+ nh_req->ifp = info->rti_ifp;
+ /* Update RTF_GATEWAY flag status */
+ nh_req->rt_flags &= ~RTF_GATEWAY;
+ nh_req->rt_flags |= (RTF_GATEWAY & info->rti_flags);
+ }
+
+ return (0);
+}
+
+/*
+ * Creates a new nexthop based on the information in @info.
+ *
+ * Returns:
+ * 0 on success, filling @nh_ret with the desired nexthop object ptr
+ * errno otherwise
+ */
+static int
+create_nhop_from_info(struct rib_head *rnh, struct rt_addrinfo *info,
+ struct nhop_object **nh_ret)
+{
+ struct sockaddr *gateway, *dst, *netmask;
+ struct nhop_request nh_req;
+ int error;
+
+ fill_nh_request(rnh, info, &nh_req);
+
+ /* Give the protocols chance to augment the request data */
+ dst = info->rti_info[RTAX_DST];
+ netmask = info->rti_info[RTAX_NETMASK];
+ gateway = info->rti_info[RTAX_GATEWAY];
+
+ if (rnh->rnh_preadd != NULL) {
+ error = rnh->rnh_preadd(rnh->rib_fibnum, dst, netmask, &nh_req);
+ if (error != 0)
+ return (error);
+ }
+
+ *nh_ret = nhop_get(rnh, &nh_req);
+ if (*nh_ret == NULL) {
+ DPRINTF("failed to get the nexthop from req");
+ return (EAGAIN);
+ }
+
+ return (0);
+}
+
+struct nhop_object *
+nhop_create_from_info_wrapper(struct rib_head *rnh, struct rt_addrinfo *info)
+{
+ struct nhop_object *nh;
+
+ if (create_nhop_from_info(rnh, info, &nh) != 0)
+ return (NULL);
+
+ return (nh);
+}
+
+/*
+ * Creates new nexthop based on @nh_old and augmentation data from @info.
+ * Helper function used in the route changes, please see
+ * alter_nh_request() comments for more details.
+ *
+ * Returns:
+ * 0 on success, filling @nh_ret with the desired nexthop object
+ * errno otherwise
+ */
+static int
+create_nhop_from_nhop(struct rib_head *rnh, const struct nhop_object *nh_old,
+ struct rt_addrinfo *info, struct nhop_object **nh_ret)
+{
+ struct nhop_request nh_req;
+ struct sockaddr_storage gw_storage;
+ int error;
+
+ /* Start with copying data from original nexthop */
+ fill_nh_request_from_nhop(nh_old, &gw_storage, &nh_req);
+
+ /* return ifa/ifp referenced */
+ error = alter_nh_request(info, rnh->rib_fibnum, &nh_req);
+ if (error != 0)
+ return (error);
+
+ /* Give protocol chance to alter the nexthop request */
+ if (rnh->rnh_preadd != NULL) {
+ error = rnh->rnh_preadd(rnh->rib_fibnum, info->rti_info[RTAX_DST],
+ info->rti_info[RTAX_NETMASK], &nh_req);
+ if (error != 0) {
+ DPRINTF("failed to create nhop: prehook returned %d",
+ error);
+ return (error);
+ }
+ }
+
+ *nh_ret = nhop_get(rnh, &nh_req);
+ if (*nh_ret == NULL) {
+ DPRINTF("failed to create nhop: nhop_get() failed");
+ return (EAGAIN);
+ }
+
+ return (0);
+}
+
+struct nhop_object *
+nhop_create_from_nhop_wrapper(struct rib_head *rnh,
+ const struct nhop_object *nh_old, struct rt_addrinfo *info)
+{
+ struct nhop_object *nh;
+
+ if (create_nhop_from_nhop(rnh, nh_old, info, &nh) != 0)
+ return (NULL);
+
+ return (nh);
+}
+
Index: sys/net/route/route_helpers.c
===================================================================
--- /dev/null
+++ sys/net/route/route_helpers.c
@@ -0,0 +1,83 @@
+/*-
+ * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
+ *
+ * Copyright (c) 2020 Alexander V. Chernikov
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * $FreeBSD$
+ */
+
+#include "opt_inet.h"
+#include "opt_inet6.h"
+#include "opt_route.h"
+
+#include <sys/param.h>
+#include <sys/jail.h>
+#include <sys/systm.h>
+#include <sys/malloc.h>
+#include <sys/mbuf.h>
+#include <sys/socket.h>
+#include <sys/sysctl.h>
+#include <sys/syslog.h>
+#include <sys/sysproto.h>
+#include <sys/proc.h>
+#include <sys/domain.h>
+#include <sys/kernel.h>
+#include <sys/lock.h>
+#include <sys/rmlock.h>
+
+#include <net/if.h>
+#include <net/if_var.h>
+#include <net/if_dl.h>
+#include <net/route.h>
+#include <net/route_var.h>
+#include <net/route/nhop_utils.h>
+#include <net/route/nhop.h>
+#include <net/route/nhop_var.h>
+#include <net/route/shared.h>
+#include <net/vnet.h>
+
+/*
+ * RIB helper functions.
+ */
+
+/*
+ * Calls @wa_f with @arg for each entry in the table specified by
+ * @af and @fibnum.
+ *
+ * Table is traversed under read lock.
+ */
+void
+rib_walk(int af, u_int fibnum, rt_walktree_f_t *wa_f, void *arg)
+{
+ RIB_RLOCK_TRACKER;
+ struct rib_head *rnh;
+
+ if ((rnh = rt_tables_get_rnh(fibnum, af)) == NULL)
+ return;
+
+ RIB_RLOCK(rnh);
+ rnh->rnh_walktree(&rnh->head, (walktree_f_t *)wa_f, arg);
+ RIB_RUNLOCK(rnh);
+}
+
Index: sys/net/route/shared.h
===================================================================
--- /dev/null
+++ sys/net/route/shared.h
@@ -0,0 +1,131 @@
+/*-
+ * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
+ *
+ * Copyright (c) 2020 Alexander V. Chernikov
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * $FreeBSD$
+ */
+
+/*
+ * Contains various definitions shared between the parts of a routing subsystem.
+ *
+ * Header is not intended to be included by the code external to the
+ * routing subsystem.
+ */
+
+#ifndef _NET_ROUTE_SHARED_H_
+#define _NET_ROUTE_SHARED_H_
+
+#ifdef INVARIANTS
+#define NET_EPOCH_ASSERT_INVARIANTS() NET_EPOCH_ASSERT()
+#else
+#define NET_EPOCH_ASSERT_INVARIANTS()
+#endif
+
+#ifdef RTDEBUG
+#define DPRINTF(_fmt, ...) printf("%s: " _fmt "\n", __func__ , ## __VA_ARGS__)
+#else
+#define DPRINTF(_fmt, ...)
+#endif
+
+struct rib_head;
+
+/* Nexhops */
+void nhops_init(void);
+int nhops_init_rib(struct rib_head *rh);
+void nhops_destroy_rib(struct rib_head *rh);
+struct nhop_object *nhop_get(struct rib_head *rh, const struct nhop_request *req);
+int nhop_ref_object(struct nhop_object *nh);
+int nhop_ref_any(struct nhop_object *nh);
+void nhop_free_any(struct nhop_object *nh);
+
+void nhops_update_ifmtu(struct rib_head *rh, struct ifnet *ifp, uint32_t mtu);
+int nhops_dump_sysctl(struct rib_head *rh, struct sysctl_req *w);
+
+
+/* multipath */
+#define MPF_MULTIPATH 0x08 /* need to be consistent with NHF_MULTIPATH */
+#define MPF_LINKED 0x10 /* mpath group is linked */
+
+struct nhgrp_object {
+ uint16_t mp_flags; /* mpath flags */
+ uint8_t mp_size; /* size of mpath group used in selection */
+ uint8_t spare;
+ struct nhop_object *nhops[0]; /* nhops */
+};
+
+struct weightened_nhop {
+ struct nhop_object *nh;
+ uint32_t weight;
+};
+
+/* */
+int rt_getifa_fib(struct rt_addrinfo *, u_int);
+
+/* nhgrp.c */
+int nhgrp_ctl_init(struct nh_control *ctl);
+void nhgrp_ctl_free(struct nh_control *ctl);
+
+struct nhgrp_object;
+
+/* nhgrp_ctl.c */
+struct weightened_nhop *nhgrp_get_nhops(struct nhgrp_object *mp,
+ uint32_t *pnum_nhops);
+int nhgrp_dump_sysctl(struct rib_head *rh, struct sysctl_req *w);
+
+struct nhgrp_object *nhgrp_get_group(struct rib_head *rh,
+ struct weightened_nhop *wn, int num_nhops, int *perror);
+struct nhgrp_object *nhgrp_append_nhops(struct rib_head *rh,
+ const struct nhgrp_object *gr_orig, struct weightened_nhop *wn,
+ int num_nhops, uint64_t *paddmask, int *perror);
+struct nhgrp_object *nhgrp_get_del_nhops(struct rib_head *rh,
+ const struct nhgrp_object *src, uint64_t *nhop_mask, int *perror);
+struct nhgrp_object *nhgrp_get_replace_nhop(struct rib_head *rh,
+ const struct nhgrp_object *gr_orig, struct weightened_nhop *wn,
+ uint8_t replace_idx, uint64_t *pmodmask, int *perror);
+
+void nhgrp_free_group(struct nhgrp_object *gr);
+int nhgrp_ref_group(struct nhgrp_object *gr);
+
+/* nhgrp*/
+
+/* route_ctl.c */
+int can_nh_multipath(const struct nhop_object *nh);
+int create_rte_from_rte(struct rib_head *rnh, struct rtentry *rt_orig,
+ struct rtentry **ret_rt);
+int del_route_one(struct rib_head *rnh, struct rtentry *rt,
+ struct rt_addrinfo *info);
+
+int rib_match_nhop_gw(const struct nhop_object *nh,
+ const struct sockaddr *gw);
+
+struct nhop_object *nhop_create_from_info_wrapper(struct rib_head *rnh,
+ struct rt_addrinfo *info);
+struct nhop_object *nhop_create_from_nhop_wrapper(struct rib_head *rnh,
+ const struct nhop_object *nh_old, struct rt_addrinfo *info);
+
+#endif
+
+
+
Index: sys/net/route_var.h
===================================================================
--- sys/net/route_var.h
+++ sys/net/route_var.h
@@ -32,6 +32,11 @@
#ifndef _NET_ROUTE_VAR_H_
#define _NET_ROUTE_VAR_H_
+struct nh_control;
+struct nhop_request;
+typedef int rnh_preadd_entry_f_t(u_int fibnum, const struct sockaddr *addr,
+ const struct sockaddr *mask, struct nhop_request *req);
+
struct rib_head {
struct radix_head head;
rn_matchaddr_f_t *rnh_matchaddr; /* longest match for sockaddr */
@@ -41,6 +46,7 @@
rn_walktree_t *rnh_walktree; /* traverse tree */
rn_walktree_from_t *rnh_walktree_from; /* traverse tree below a */
rn_close_t *rnh_close; /*do something when the last ref drops*/
+ rnh_preadd_entry_f_t *rnh_preadd; /* hook to alter record prior to insertion */
rt_gen_t rnh_gen; /* generation counter */
int rnh_multipath; /* multipath capable ? */
struct radix_node rnh_nodes[3]; /* empty tree for common case */
@@ -51,6 +57,7 @@
u_int rib_fibnum; /* fib number */
struct callout expire_callout; /* Callout for expiring dynamic routes */
time_t next_expire; /* Next expire run ts */
+ struct nh_control *nh_control; /* nexthop subsystem data */
};
#define RIB_RLOCK_TRACKER struct rm_priotracker _rib_tracker
@@ -89,6 +96,44 @@
struct rib_head *rt_tables_get_rnh(int fib, int family);
+VNET_PCPUSTAT_DECLARE(struct rtstat, rtstat);
+#define RTSTAT_ADD(name, val) \
+ VNET_PCPUSTAT_ADD(struct rtstat, rtstat, name, (val))
+#define RTSTAT_INC(name) RTSTAT_ADD(name, 1)
+
+/*
+ * With the split between the routing entry and the nexthop,
+ * rt_flags has to be split between these 2 entries. As rtentry
+ * mostly contains prefix data and is thought to be generic enough
+ * so one can transparently change the nexthop pointer w/o requiring
+ * any other rtentry changes, most of rt_flags shifts to the particular nexthop.
+ * /
+ *
+ * RTF_UP: rtentry, as an indication that it is linked.
+ * RTF_HOST: rtentry, nhop. The latter indication is needed for the datapath
+ * RTF_DYNAMIC: nhop, to make rtentry generic.
+ * RTF_MODIFIED: nhop, to make rtentry generic. (legacy)
+ * -- "native" path (nhop) properties:
+ * RTF_GATEWAY, RTF_STATIC, RTF_PROTO1, RTF_PROTO2, RTF_PROTO3, RTF_FIXEDMTU,
+ * RTF_PINNED, RTF_REJECT, RTF_BLACKHOLE, RTF_BROADCAST
+ */
+
+/* Nexthop rt flags mask */
+#define NHOP_RT_FLAG_MASK (RTF_GATEWAY | RTF_HOST | RTF_REJECT | RTF_DYNAMIC | \
+ RTF_MODIFIED | RTF_STATIC | RTF_BLACKHOLE | RTF_PROTO1 | RTF_PROTO2 | \
+ RTF_PROTO3 | RTF_FIXEDMTU | RTF_PINNED | RTF_BROADCAST)
+
+/* rtentry rt flag mask */
+#define RTE_RT_FLAG_MASK (RTF_UP | RTF_HOST)
+
+/* Nexthop selection */
+#define _NH2MP(_nh) ((struct nhgrp_object *)(_nh))
+#define _SELECT_NHOP(_nh, _flowid) \
+ (_NH2MP(_nh))->nhops[(_flowid) % (_NH2MP(_nh))->mp_size]
+#define _RT_SELECT_NHOP(_nh, _flowid) \
+ ((!NH_IS_MULTIPATH(_nh)) ? (_nh) : _SELECT_NHOP(_nh, _flowid))
+#define RT_SELECT_NHOP(_rt, _flowid) _RT_SELECT_NHOP((_rt)->rt_nhop, _flowid)
+
/* rte<>nhop translation */
static inline uint16_t
fib_rte_to_nh_flags(int rt_flags)
Index: sys/net/rtsock.c
===================================================================
--- sys/net/rtsock.c
+++ sys/net/rtsock.c
@@ -77,6 +77,7 @@
#include <netinet6/ip6_var.h>
#include <netinet6/scope6_var.h>
#endif
+#include <net/route/nhop.h>
#ifdef COMPAT_FREEBSD32
#include <sys/mount.h>
@@ -1076,6 +1077,7 @@
out->rmx_mtu = rt->rt_mtu;
out->rmx_weight = rt->rt_weight;
out->rmx_pksent = counter_u64_fetch(rt->rt_pksent);
+ out->rmx_nhidx = nhop_get_idx(rt->rt_nhop);
/* Kernel -> userland timebase conversion. */
out->rmx_expire = rt->rt_expire ?
rt->rt_expire - time_uptime + time_second : 0;
@@ -2025,7 +2027,7 @@
namelen--;
if (req->newptr)
return (EPERM);
- if (name[1] == NET_RT_DUMP) {
+ if (name[1] == NET_RT_DUMP || name[1] == NET_RT_NHOP) {
if (namelen == 3)
fib = req->td->td_proc->p_fibnum;
else if (namelen == 4)
@@ -2092,7 +2094,25 @@
error = EAFNOSUPPORT;
}
break;
-
+ case NET_RT_NHOP:
+ /* Allow dumping one specific af/fib at a time */
+ if (namelen < 4) {
+ error = EINVAL;
+ break;
+ }
+ fib = name[3];
+ if (fib < 0 || fib > rt_numfibs) {
+ error = EINVAL;
+ break;
+ }
+ rnh = rt_tables_get_rnh(fib, af);
+ if (rnh == NULL) {
+ error = EAFNOSUPPORT;
+ break;
+ }
+ if (w.w_op == NET_RT_NHOP)
+ error = nhops_dump_sysctl(rnh, w.w_req);
+ break;
case NET_RT_IFLIST:
case NET_RT_IFLISTL:
error = sysctl_iflist(af, &w);
Index: sys/netinet/in_fib.h
===================================================================
--- sys/netinet/in_fib.h
+++ sys/netinet/in_fib.h
@@ -58,5 +58,9 @@
uint32_t flowid, struct nhop4_extended *pnh4);
void fib4_free_nh_ext(uint32_t fibnum, struct nhop4_extended *pnh4);
+struct nhop_object *fib4_lookup_nh_ptr(uint32_t fibnum, struct in_addr dst,
+ uint32_t scopeid, uint32_t flags, uint32_t flowid);
+int fib4_lookup_urpf(uint32_t fibnum, struct in_addr dst, uint32_t scopeid,
+ uint32_t flags, const struct ifnet *src_if);
#endif
Index: sys/netinet/in_fib.c
===================================================================
--- sys/netinet/in_fib.c
+++ sys/netinet/in_fib.c
@@ -49,6 +49,8 @@
#include <net/if_dl.h>
#include <net/route.h>
#include <net/route_var.h>
+#include <net/route/nhop.h>
+#include <net/route/shared.h>
#include <net/vnet.h>
#ifdef RADIX_MPATH
@@ -60,59 +62,49 @@
#include <netinet/in_fib.h>
#ifdef INET
-static void fib4_rte_to_nh_basic(struct rtentry *rte, struct in_addr dst,
+static void fib4_rte_to_nh_basic(struct nhop_object *nh, struct in_addr dst,
uint32_t flags, struct nhop4_basic *pnh4);
-static void fib4_rte_to_nh_extended(struct rtentry *rte, struct in_addr dst,
+static void fib4_rte_to_nh_extended(struct nhop_object *nh, struct in_addr dst,
uint32_t flags, struct nhop4_extended *pnh4);
#define RNTORT(p) ((struct rtentry *)(p))
static void
-fib4_rte_to_nh_basic(struct rtentry *rte, struct in_addr dst,
+fib4_rte_to_nh_basic(struct nhop_object *nh, struct in_addr dst,
uint32_t flags, struct nhop4_basic *pnh4)
{
- struct sockaddr_in *gw;
if ((flags & NHR_IFAIF) != 0)
- pnh4->nh_ifp = rte->rt_ifa->ifa_ifp;
+ pnh4->nh_ifp = nh->nh_ifa->ifa_ifp;
+ else
+ pnh4->nh_ifp = nh->nh_ifp;
+ pnh4->nh_mtu = nh->nh_mtu;
+ if (nh->nh_flags & NHF_GATEWAY)
+ pnh4->nh_addr = nh->gw4_sa.sin_addr;
else
- pnh4->nh_ifp = rte->rt_ifp;
- pnh4->nh_mtu = min(rte->rt_mtu, rte->rt_ifp->if_mtu);
- if (rte->rt_flags & RTF_GATEWAY) {
- gw = (struct sockaddr_in *)rte->rt_gateway;
- pnh4->nh_addr = gw->sin_addr;
- } else
pnh4->nh_addr = dst;
/* Set flags */
- pnh4->nh_flags = fib_rte_to_nh_flags(rte->rt_flags);
- gw = (struct sockaddr_in *)rt_key(rte);
- if (gw->sin_addr.s_addr == 0)
- pnh4->nh_flags |= NHF_DEFAULT;
+ pnh4->nh_flags = nh->nh_flags;
/* TODO: Handle RTF_BROADCAST here */
}
static void
-fib4_rte_to_nh_extended(struct rtentry *rte, struct in_addr dst,
+fib4_rte_to_nh_extended(struct nhop_object *nh, struct in_addr dst,
uint32_t flags, struct nhop4_extended *pnh4)
{
- struct sockaddr_in *gw;
if ((flags & NHR_IFAIF) != 0)
- pnh4->nh_ifp = rte->rt_ifa->ifa_ifp;
+ pnh4->nh_ifp = nh->nh_ifa->ifa_ifp;
+ else
+ pnh4->nh_ifp = nh->nh_ifp;
+ pnh4->nh_mtu = nh->nh_mtu;
+ if (nh->nh_flags & NHF_GATEWAY)
+ pnh4->nh_addr = nh->gw4_sa.sin_addr;
else
- pnh4->nh_ifp = rte->rt_ifp;
- pnh4->nh_mtu = min(rte->rt_mtu, rte->rt_ifp->if_mtu);
- if (rte->rt_flags & RTF_GATEWAY) {
- gw = (struct sockaddr_in *)rte->rt_gateway;
- pnh4->nh_addr = gw->sin_addr;
- } else
pnh4->nh_addr = dst;
/* Set flags */
- pnh4->nh_flags = fib_rte_to_nh_flags(rte->rt_flags);
- gw = (struct sockaddr_in *)rt_key(rte);
- if (gw->sin_addr.s_addr == 0)
- pnh4->nh_flags |= NHF_DEFAULT;
- pnh4->nh_ia = ifatoia(rte->rt_ifa);
+ pnh4->nh_flags = nh->nh_flags;
+ pnh4->nh_ia = ifatoia(nh->nh_ifa);
pnh4->nh_src = IA_SIN(pnh4->nh_ia)->sin_addr;
}
@@ -135,7 +127,7 @@
struct rib_head *rh;
struct radix_node *rn;
struct sockaddr_in sin;
- struct rtentry *rte;
+ struct nhop_object *nh;
KASSERT((fibnum < rt_numfibs), ("fib4_lookup_nh_basic: bad fibnum"));
rh = rt_tables_get_rnh(fibnum, AF_INET);
@@ -150,10 +142,10 @@
RIB_RLOCK(rh);
rn = rh->rnh_matchaddr((void *)&sin, &rh->head);
if (rn != NULL && ((rn->rn_flags & RNF_ROOT) == 0)) {
- rte = RNTORT(rn);
+ nh = RNTORT(rn)->rt_nhop;
/* Ensure route & ifp is UP */
- if (RT_LINK_IS_UP(rte->rt_ifp)) {
- fib4_rte_to_nh_basic(rte, dst, flags, pnh4);
+ if (RT_LINK_IS_UP(nh->nh_ifp)) {
+ fib4_rte_to_nh_basic(nh, dst, flags, pnh4);
RIB_RUNLOCK(rh);
return (0);
@@ -183,8 +175,8 @@
RIB_RLOCK_TRACKER;
struct rib_head *rh;
struct radix_node *rn;
+ struct nhop_object *nh;
struct sockaddr_in sin;
- struct rtentry *rte;
KASSERT((fibnum < rt_numfibs), ("fib4_lookup_nh_ext: bad fibnum"));
rh = rt_tables_get_rnh(fibnum, AF_INET);
@@ -193,23 +185,18 @@
/* Prepare lookup key */
memset(&sin, 0, sizeof(sin));
+ sin.sin_family = AF_INET;
sin.sin_len = sizeof(struct sockaddr_in);
sin.sin_addr = dst;
+ nh = NULL;
RIB_RLOCK(rh);
rn = rh->rnh_matchaddr((void *)&sin, &rh->head);
if (rn != NULL && ((rn->rn_flags & RNF_ROOT) == 0)) {
- rte = RNTORT(rn);
-#ifdef RADIX_MPATH
- rte = rt_mpath_select(rte, flowid);
- if (rte == NULL) {
- RIB_RUNLOCK(rh);
- return (ENOENT);
- }
-#endif
+ nh = RNTORT(rn)->rt_nhop;
/* Ensure route & ifp is UP */
- if (RT_LINK_IS_UP(rte->rt_ifp)) {
- fib4_rte_to_nh_extended(rte, dst, flags, pnh4);
+ if (RT_LINK_IS_UP(nh->nh_ifp)) {
+ fib4_rte_to_nh_extended(nh, dst, flags, pnh4);
if ((flags & NHR_REF) != 0) {
/* TODO: lwref on egress ifp's ? */
}
@@ -229,4 +216,105 @@
}
+struct nhop_object *
+fib4_lookup_nh_ptr(uint32_t fibnum, struct in_addr dst, uint32_t scopeid,
+ uint32_t flags, uint32_t flowid)
+{
+ RIB_RLOCK_TRACKER;
+ struct rib_head *rh;
+ struct radix_node *rn;
+ struct nhop_object *nh;
+
+ KASSERT((fibnum < rt_numfibs), ("fib4_lookup_nh_route: bad fibnum"));
+ rh = rt_tables_get_rnh(fibnum, AF_INET);
+ if (rh == NULL)
+ return (NULL);
+
+ /* Prepare lookup key */
+ struct sockaddr_in sin4;
+ memset(&sin4, 0, sizeof(sin4));
+ sin4.sin_family = AF_INET;
+ sin4.sin_len = sizeof(struct sockaddr_in);
+ sin4.sin_addr = dst;
+
+ nh = NULL;
+ RIB_RLOCK(rh);
+ rn = rh->rnh_matchaddr((void *)&sin4, &rh->head);
+ if (rn != NULL && ((rn->rn_flags & RNF_ROOT) == 0)) {
+ nh = (RNTORT(rn))->rt_nhop;
+ /* Ensure route & ifp is UP */
+ if (RT_LINK_IS_UP(nh->nh_ifp)) {
+ if (flags & NHR_REF)
+ nhop_ref_object(nh);
+ RIB_RUNLOCK(rh);
+ return (nh);
+ }
+ }
+ RIB_RUNLOCK(rh);
+
+ RTSTAT_INC(rts_unreach);
+ return (NULL);
+}
+
+inline static int
+check_urpf(const struct nhop_object *nh, uint32_t flags,
+ const struct ifnet *src_if)
+{
+
+ if (src_if != NULL && nh->nh_aifp == src_if) {
+ return (1);
+ }
+ if (src_if == NULL) {
+ if ((flags & NHR_NODEFAULT) == 0)
+ return (1);
+ else if ((nh->nh_flags & NHF_DEFAULT) == 0)
+ return (1);
+ }
+
+ return (0);
+}
+
+/*
+ * Performs reverse path forwarding lookup.
+ * If @src_if is non-zero, verifies that at least 1 path goes via
+ * this interface.
+ * If @src_if is zero, verifies that route exist.
+ * if @flags contains NHR_NOTDEFAULT, do not consider default route.
+ *
+ * Returns 1 if route matching conditions is found, 0 otherwise.
+ */
+int
+fib4_lookup_urpf(uint32_t fibnum, struct in_addr dst, uint32_t scopeid,
+ uint32_t flags, const struct ifnet *src_if)
+{
+ RIB_RLOCK_TRACKER;
+ struct rib_head *rh;
+ struct radix_node *rn;
+ struct nhop_object *nh;
+ int ret;
+
+ KASSERT((fibnum < rt_numfibs), ("fib4_lookup_nh_route: bad fibnum"));
+ rh = rt_tables_get_rnh(fibnum, AF_INET);
+ if (rh == NULL)
+ return (0);
+
+ /* Prepare lookup key */
+ struct sockaddr_in sin4;
+ memset(&sin4, 0, sizeof(sin4));
+ sin4.sin_len = sizeof(struct sockaddr_in);
+ sin4.sin_addr = dst;
+
+ RIB_RLOCK(rh);
+ rn = rh->rnh_matchaddr((void *)&sin4, &rh->head);
+ if (rn != NULL && ((rn->rn_flags & RNF_ROOT) == 0)) {
+ nh = (RNTORT(rn))->rt_nhop;
+ ret = check_urpf(nh, flags, src_if);
+ RIB_RUNLOCK(rh);
+ return (ret);
+ }
+ RIB_RUNLOCK(rh);
+
+ return (0);
+}
+
#endif
Index: sys/netinet/in_rmx.c
===================================================================
--- sys/netinet/in_rmx.c
+++ sys/netinet/in_rmx.c
@@ -41,6 +41,7 @@
#include <net/if_var.h>
#include <net/route.h>
#include <net/route_var.h>
+#include <net/route/nhop.h>
#include <net/vnet.h>
#include <netinet/in.h>
@@ -54,6 +55,58 @@
extern int in_detachhead(void **head, int off);
#endif
+static int
+rib4_preadd(u_int fibnum, const struct sockaddr *addr, const struct sockaddr *mask,
+ struct nhop_request *req)
+{
+ const struct sockaddr_in *addr4 = (const struct sockaddr_in *)addr;
+
+ /* XXX: RTF_LOCAL && RTF_MULTICAST */
+
+ if (req->rt_flags & RTF_HOST) {
+ /*
+ * Backward compatibility:
+ * if the destination is broadcast,
+ * mark route as broadcast.
+ * This behavior was useful when route cloning
+ * was in place, so there was an explicit cloned
+ * route for every broadcasted address.
+ * Currently (2019-12) there are no kernel machinery
+ * to do route cloning, though someone might explicitly
+ * add these routes to support some cases with active-active
+ * load balancing. Given that, retain this support.
+ */
+ if (in_broadcast(addr4->sin_addr, req->ifp))
+ req->rt_flags |= RTF_BROADCAST;
+ }
+
+
+ /*
+ * Check route MTU:
+ * inherit interface MTU if not set or
+ * check if MTU is too large.
+ */
+ if (req->mtu == 0) {
+ req->mtu = req->ifp->if_mtu;
+ } else if (req->mtu > req->ifp->if_mtu)
+ req->mtu = req->ifp->if_mtu;
+
+ /* Ensure that default route nhop has special flag */
+ const struct sockaddr_in *mask4 = (const struct sockaddr_in *)mask;
+ if ((req->rt_flags & RTF_HOST) == 0 && mask4->sin_addr.s_addr == 0)
+ req->nh_flags_additional |= NHF_DEFAULT;
+
+ /* Set nhop type to basic per-AF nhop */
+ if (req->nh_type == 0) {
+ if (req->rt_flags & RTF_GATEWAY)
+ req->nh_type = NH_TYPE_IPV4_ETHER_NHOP;
+ else
+ req->nh_type = NH_TYPE_IPV4_ETHER_RSLV;
+ }
+
+ return (0);
+}
+
/*
* Do what we need to do when inserting a route.
*/
@@ -124,6 +177,7 @@
if (rh == NULL)
return (0);
+ rh->rnh_preadd = rib4_preadd;
rh->rnh_addaddr = in_addroute;
*head = (void *)rh;
Index: sys/netinet6/in6_fib.h
===================================================================
--- sys/netinet6/in6_fib.h
+++ sys/netinet6/in6_fib.h
@@ -58,5 +58,10 @@
uint32_t scopeid, uint32_t flags, uint32_t flowid,
struct nhop6_extended *pnh6);
void fib6_free_nh_ext(uint32_t fibnum, struct nhop6_extended *pnh6);
+struct nhop_object *fib6_lookup_nh_ptr(uint32_t fibnum,
+ const struct in6_addr *dst6, uint32_t scopeid, uint32_t flags,
+ uint32_t flowid);
+int fib6_lookup_urpf(uint32_t fibnum, const struct in6_addr *dst6,
+ uint32_t scopeid, uint32_t flags, const struct ifnet *src_if);
#endif
Index: sys/netinet6/in6_fib.c
===================================================================
--- sys/netinet6/in6_fib.c
+++ sys/netinet6/in6_fib.c
@@ -50,6 +50,8 @@
#include <net/if_dl.h>
#include <net/route.h>
#include <net/route_var.h>
+#include <net/route/nhop.h>
+#include <net/route/shared.h>
#include <net/vnet.h>
#ifdef RADIX_MPATH
@@ -68,94 +70,63 @@
#include <net/if_types.h>
#ifdef INET6
-static void fib6_rte_to_nh_extended(struct rtentry *rte,
+static void fib6_rte_to_nh_extended(const struct nhop_object *nh,
const struct in6_addr *dst, uint32_t flags, struct nhop6_extended *pnh6);
-static void fib6_rte_to_nh_basic(struct rtentry *rte, const struct in6_addr *dst,
+static void fib6_rte_to_nh_basic(const struct nhop_object *nh, const struct in6_addr *dst,
uint32_t flags, struct nhop6_basic *pnh6);
-static struct ifnet *fib6_get_ifaifp(struct rtentry *rte);
#define RNTORT(p) ((struct rtentry *)(p))
#define ifatoia6(ifa) ((struct in6_ifaddr *)(ifa))
CHK_STRUCT_ROUTE_COMPAT(struct route_in6, ro_dst);
-/*
- * Gets real interface for the @rte.
- * Returns rt_ifp for !IFF_LOOPBACK routers.
- * Extracts "real" address interface from interface address
- * loopback routes.
- */
-static struct ifnet *
-fib6_get_ifaifp(struct rtentry *rte)
-{
- struct ifnet *ifp;
- struct sockaddr_dl *sdl;
-
- ifp = rte->rt_ifp;
- if ((ifp->if_flags & IFF_LOOPBACK) &&
- rte->rt_gateway->sa_family == AF_LINK) {
- sdl = (struct sockaddr_dl *)rte->rt_gateway;
- return (ifnet_byindex(sdl->sdl_index));
- }
- return (ifp);
-}
static void
-fib6_rte_to_nh_basic(struct rtentry *rte, const struct in6_addr *dst,
+fib6_rte_to_nh_basic(const struct nhop_object *nh, const struct in6_addr *dst,
uint32_t flags, struct nhop6_basic *pnh6)
{
- struct sockaddr_in6 *gw;
/* Do explicit nexthop zero unless we're copying it */
memset(pnh6, 0, sizeof(*pnh6));
if ((flags & NHR_IFAIF) != 0)
- pnh6->nh_ifp = fib6_get_ifaifp(rte);
+ pnh6->nh_ifp = nh->nh_aifp;
else
- pnh6->nh_ifp = rte->rt_ifp;
+ pnh6->nh_ifp = nh->nh_ifp;
- pnh6->nh_mtu = min(rte->rt_mtu, IN6_LINKMTU(rte->rt_ifp));
- if (rte->rt_flags & RTF_GATEWAY) {
+ pnh6->nh_mtu = nh->nh_mtu;
+ if (nh->nh_flags & NHF_GATEWAY) {
/* Return address with embedded scope. */
- gw = (struct sockaddr_in6 *)rte->rt_gateway;
- pnh6->nh_addr = gw->sin6_addr;
+ pnh6->nh_addr = nh->gw6_sa.sin6_addr;
} else
pnh6->nh_addr = *dst;
/* Set flags */
- pnh6->nh_flags = fib_rte_to_nh_flags(rte->rt_flags);
- gw = (struct sockaddr_in6 *)rt_key(rte);
- if (IN6_IS_ADDR_UNSPECIFIED(&gw->sin6_addr))
- pnh6->nh_flags |= NHF_DEFAULT;
+ pnh6->nh_flags = nh->nh_flags;
}
static void
-fib6_rte_to_nh_extended(struct rtentry *rte, const struct in6_addr *dst,
+fib6_rte_to_nh_extended(const struct nhop_object *nh, const struct in6_addr *dst,
uint32_t flags, struct nhop6_extended *pnh6)
{
- struct sockaddr_in6 *gw;
/* Do explicit nexthop zero unless we're copying it */
memset(pnh6, 0, sizeof(*pnh6));
if ((flags & NHR_IFAIF) != 0)
- pnh6->nh_ifp = fib6_get_ifaifp(rte);
+ pnh6->nh_ifp = nh->nh_aifp;
else
- pnh6->nh_ifp = rte->rt_ifp;
+ pnh6->nh_ifp = nh->nh_ifp;
- pnh6->nh_mtu = min(rte->rt_mtu, IN6_LINKMTU(rte->rt_ifp));
- if (rte->rt_flags & RTF_GATEWAY) {
+ pnh6->nh_mtu = nh->nh_mtu;
+ if (nh->nh_flags & NHF_GATEWAY) {
/* Return address with embedded scope. */
- gw = (struct sockaddr_in6 *)rte->rt_gateway;
- pnh6->nh_addr = gw->sin6_addr;
+ pnh6->nh_addr = nh->gw6_sa.sin6_addr;
} else
pnh6->nh_addr = *dst;
/* Set flags */
- pnh6->nh_flags = fib_rte_to_nh_flags(rte->rt_flags);
- gw = (struct sockaddr_in6 *)rt_key(rte);
- if (IN6_IS_ADDR_UNSPECIFIED(&gw->sin6_addr))
- pnh6->nh_flags |= NHF_DEFAULT;
- pnh6->nh_ia = ifatoia6(rte->rt_ifa);
+ pnh6->nh_flags = nh->nh_flags;
+ pnh6->nh_ia = ifatoia6(nh->nh_ifa);
}
/*
@@ -180,7 +151,7 @@
struct rib_head *rh;
struct radix_node *rn;
struct sockaddr_in6 sin6;
- struct rtentry *rte;
+ struct nhop_object *nh;
KASSERT((fibnum < rt_numfibs), ("fib6_lookup_nh_basic: bad fibnum"));
rh = rt_tables_get_rnh(fibnum, AF_INET6);
@@ -198,10 +169,10 @@
RIB_RLOCK(rh);
rn = rh->rnh_matchaddr((void *)&sin6, &rh->head);
if (rn != NULL && ((rn->rn_flags & RNF_ROOT) == 0)) {
- rte = RNTORT(rn);
+ nh = RNTORT(rn)->rt_nhop;
/* Ensure route & ifp is UP */
- if (RT_LINK_IS_UP(rte->rt_ifp)) {
- fib6_rte_to_nh_basic(rte, &sin6.sin6_addr, flags, pnh6);
+ if (RT_LINK_IS_UP(nh->nh_ifp)) {
+ fib6_rte_to_nh_basic(nh, &sin6.sin6_addr, flags, pnh6);
RIB_RUNLOCK(rh);
return (0);
}
@@ -230,7 +201,7 @@
struct rib_head *rh;
struct radix_node *rn;
struct sockaddr_in6 sin6;
- struct rtentry *rte;
+ struct nhop_object *nh;
KASSERT((fibnum < rt_numfibs), ("fib6_lookup_nh_ext: bad fibnum"));
rh = rt_tables_get_rnh(fibnum, AF_INET6);
@@ -248,17 +219,10 @@
RIB_RLOCK(rh);
rn = rh->rnh_matchaddr((void *)&sin6, &rh->head);
if (rn != NULL && ((rn->rn_flags & RNF_ROOT) == 0)) {
- rte = RNTORT(rn);
-#ifdef RADIX_MPATH
- rte = rt_mpath_select(rte, flowid);
- if (rte == NULL) {
- RIB_RUNLOCK(rh);
- return (ENOENT);
- }
-#endif
+ nh = RNTORT(rn)->rt_nhop;
/* Ensure route & ifp is UP */
- if (RT_LINK_IS_UP(rte->rt_ifp)) {
- fib6_rte_to_nh_extended(rte, &sin6.sin6_addr, flags,
+ if (RT_LINK_IS_UP(nh->nh_ifp)) {
+ fib6_rte_to_nh_extended(nh, &sin6.sin6_addr, flags,
pnh6);
if ((flags & NHR_REF) != 0) {
/* TODO: Do lwref on egress ifp's */
@@ -279,5 +243,114 @@
}
+/*
+ *
+ * Assumes scope is deembedded and provided in @scopeid
+ */
+struct nhop_object *
+fib6_lookup_nh_ptr(uint32_t fibnum, const struct in6_addr *dst6,
+ uint32_t scopeid, uint32_t flags, uint32_t flowid)
+{
+ RIB_RLOCK_TRACKER;
+ struct rib_head *rh;
+ struct radix_node *rn;
+ struct nhop_object *nh;
+ struct sockaddr_in6 sin6;
+
+ KASSERT((fibnum < rt_numfibs), ("fib6_lookup_nh_ext: bad fibnum"));
+ rh = rt_tables_get_rnh(fibnum, AF_INET6);
+ if (rh == NULL)
+ return (NULL);
+
+ /* TODO: radix changes */
+ //addr = *dst6;
+ /* Prepare lookup key */
+ memset(&sin6, 0, sizeof(sin6));
+ sin6.sin6_len = sizeof(struct sockaddr_in6);
+ sin6.sin6_addr = *dst6;
+
+ /* Assume scopeid is valid and embed it directly */
+ if (IN6_IS_SCOPE_LINKLOCAL(dst6))
+ sin6.sin6_addr.s6_addr16[1] = htons(scopeid & 0xffff);
+
+ RIB_RLOCK(rh);
+ rn = rh->rnh_matchaddr((void *)&sin6, &rh->head);
+ if (rn != NULL && ((rn->rn_flags & RNF_ROOT) == 0)) {
+ nh = (RNTORT(rn))->rt_nhop;
+ /* Ensure route & ifp is UP */
+ if (RT_LINK_IS_UP(nh->nh_ifp)) {
+ if (flags & NHR_REF)
+ nhop_ref_object(nh);
+ RIB_RUNLOCK(rh);
+ return (nh);
+ }
+ }
+ RIB_RUNLOCK(rh);
+
+ RTSTAT_INC(rts_unreach);
+ return (NULL);
+}
+
+inline static int
+check_urpf(const struct nhop_object *nh, uint32_t flags,
+ const struct ifnet *src_if)
+{
+
+ if (src_if != NULL && nh->nh_aifp == src_if) {
+ return (1);
+ }
+ if (src_if == NULL) {
+ if ((flags & NHR_NODEFAULT) == 0)
+ return (1);
+ else if ((nh->nh_flags & NHF_DEFAULT) == 0)
+ return (1);
+ }
+
+ return (0);
+}
+
+/*
+ * Performs reverse path forwarding lookup.
+ * If @src_if is non-zero, verifies that at least 1 path goes via
+ * this interface.
+ * If @src_if is zero, verifies that route exist.
+ * if @flags contains NHR_NOTDEFAULT, do not consider default route.
+ *
+ * Returns 1 if route matching conditions is found, 0 otherwise.
+ */
+int
+fib6_lookup_urpf(uint32_t fibnum, const struct in6_addr *dst6,
+ uint32_t scopeid, uint32_t flags, const struct ifnet *src_if)
+{
+ RIB_RLOCK_TRACKER;
+ struct rib_head *rh;
+ struct radix_node *rn;
+ struct nhop_object *nh;
+ struct in6_addr addr;
+ int ret;
+
+ KASSERT((fibnum < rt_numfibs), ("fib6_lookup_nh_ext: bad fibnum"));
+ rh = rt_tables_get_rnh(fibnum, AF_INET6);
+ if (rh == NULL)
+ return (0);
+
+ addr = *dst6;
+ /* Assume scopeid is valid and embed it directly */
+ if (IN6_IS_SCOPE_LINKLOCAL(dst6))
+ addr.s6_addr16[1] = htons(scopeid & 0xffff);
+
+ RIB_RLOCK(rh);
+ rn = rh->rnh_matchaddr((void *)&addr, &rh->head);
+ if (rn != NULL && ((rn->rn_flags & RNF_ROOT) == 0)) {
+ nh = (RNTORT(rn))->rt_nhop;
+ ret = check_urpf(nh, flags, src_if);
+ RIB_RUNLOCK(rh);
+ return (ret);
+ }
+ RIB_RUNLOCK(rh);
+
+ return (0);
+}
+
#endif
Index: sys/netinet6/in6_rmx.c
===================================================================
--- sys/netinet6/in6_rmx.c
+++ sys/netinet6/in6_rmx.c
@@ -80,6 +80,7 @@
#include <net/if_var.h>
#include <net/route.h>
#include <net/route_var.h>
+#include <net/route/nhop.h>
#include <netinet/in.h>
#include <netinet/ip_var.h>
@@ -101,6 +102,39 @@
extern int in6_detachhead(void **head, int off);
#endif
+static int
+rib6_preadd(u_int fibnum, const struct sockaddr *addr, const struct sockaddr *mask,
+ struct nhop_request *req)
+{
+
+ /* XXX: RTF_LOCAL */
+
+ /*
+ * Check route MTU:
+ * inherit interface MTU if not set or
+ * check if MTU is too large.
+ */
+ if (req->mtu == 0) {
+ req->mtu = IN6_LINKMTU(req->ifp);
+ } else if (req->mtu > IN6_LINKMTU(req->ifp))
+ req->mtu = IN6_LINKMTU(req->ifp);
+
+ /* Ensure that default route nhop has special flag */
+ const struct sockaddr_in6 *mask6 = (const struct sockaddr_in6 *)mask;
+ if ((req->rt_flags & RTF_HOST) == 0 && IN6_IS_ADDR_UNSPECIFIED(&mask6->sin6_addr))
+ req->nh_flags_additional |= NHF_DEFAULT;
+
+ /* Set nexthop type */
+ if (req->nh_type == 0) {
+ if (req->rt_flags & RTF_GATEWAY)
+ req->nh_type = NH_TYPE_IPV6_ETHER_NHOP;
+ else
+ req->nh_type = NH_TYPE_IPV6_ETHER_RSLV;
+ }
+
+ return (0);
+}
+
/*
* Do what we need to do when inserting a route.
*/
@@ -167,6 +201,7 @@
return (0);
rh->rnh_addaddr = in6_addroute;
+ rh->rnh_preadd = rib6_preadd;
*head = (void *)rh;
return (1);
Index: sys/sys/socket.h
===================================================================
--- sys/sys/socket.h
+++ sys/sys/socket.h
@@ -416,6 +416,7 @@
#define NET_RT_IFMALIST 4 /* return multicast address list */
#define NET_RT_IFLISTL 5 /* Survey interface list, using 'l'en
* versions of msghdr structs. */
+#define NET_RT_NHOP 6 /* dump routing nexthops */
#endif /* __BSD_VISIBLE */
/*
Index: usr.bin/netstat/Makefile
===================================================================
--- usr.bin/netstat/Makefile
+++ usr.bin/netstat/Makefile
@@ -5,7 +5,7 @@
PROG= netstat
SRCS= if.c inet.c main.c mbuf.c mroute.c netisr.c nl_symbols.c route.c \
- unix.c mroute6.c ipsec.c bpf.c pfkey.c sctp.c \
+ unix.c mroute6.c ipsec.c bpf.c pfkey.c sctp.c common.c nhops.c \
nl_defs.h
nl_symbols.c: nlist_symbols
Index: usr.bin/netstat/common.h
===================================================================
--- /dev/null
+++ usr.bin/netstat/common.h
@@ -0,0 +1,24 @@
+#ifndef _NETSTAT_COMMON_H_
+#define _NETSTAT_COMMON_H_
+
+struct bits {
+ u_long b_mask;
+ char b_val;
+ const char *b_name;
+};
+extern struct bits rt_bits[];
+
+const char *fmt_flags(const struct bits *p, int f);
+void print_flags_generic(int flags, const struct bits *pbits,
+ const char *format, const char *tag_name);
+int print_sockaddr(const char *name, struct sockaddr *sa,
+ struct sockaddr *mask, int flags, int width);
+
+struct ifmap_entry {
+ char ifname[IFNAMSIZ];
+};
+
+struct ifmap_entry *prepare_ifmap(size_t *ifmap_size);
+
+#endif
+
Index: usr.bin/netstat/common.c
===================================================================
--- /dev/null
+++ usr.bin/netstat/common.c
@@ -0,0 +1,140 @@
+/*-
+ * SPDX-License-Identifier: BSD-3-Clause
+ *
+ * Copyright (c) 1983, 1988, 1993
+ * The Regents of the University of California. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
+#include <sys/param.h>
+#include <sys/protosw.h>
+#include <sys/socket.h>
+#include <sys/socketvar.h>
+#include <sys/sysctl.h>
+#include <sys/time.h>
+
+#include <net/if.h>
+#include <net/if_dl.h>
+#include <arpa/inet.h>
+#include <ifaddrs.h>
+#include <libutil.h>
+#include <netdb.h>
+#include <stdbool.h>
+#include <stdint.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <stdbool.h>
+#include <string.h>
+#include <sysexits.h>
+#include <unistd.h>
+#include <err.h>
+#include <libxo/xo.h>
+#include "netstat.h"
+#include "common.h"
+
+const char *
+fmt_flags(const struct bits *p, int f)
+{
+ static char name[33];
+ char *flags;
+
+ for (flags = name; p->b_mask; p++)
+ if (p->b_mask & f)
+ *flags++ = p->b_val;
+ *flags = '\0';
+ return (name);
+}
+
+void
+print_flags_generic(int flags, const struct bits *pbits, const char *format,
+ const char *tag_name)
+{
+ const struct bits *p;
+ char tag_fmt[64];
+
+ xo_emit(format, fmt_flags(pbits, flags));
+
+ snprintf(tag_fmt, sizeof(tag_fmt), "{le:%s/%%s}", tag_name);
+ xo_open_list(tag_name);
+ for (p = pbits; p->b_mask; p++)
+ if (p->b_mask & flags)
+ xo_emit(tag_fmt, p->b_name);
+ xo_close_list(tag_name);
+}
+
+struct ifmap_entry *
+prepare_ifmap(size_t *pifmap_size)
+{
+ int ifindex = 0, size;
+ struct ifaddrs *ifap, *ifa;
+ struct sockaddr_dl *sdl;
+
+ struct ifmap_entry *ifmap = NULL;
+ int ifmap_size = 0;
+
+ /*
+ * Retrieve interface list at first
+ * since we need #ifindex -> if_xname match
+ */
+ if (getifaddrs(&ifap) != 0)
+ err(EX_OSERR, "getifaddrs");
+
+ for (ifa = ifap; ifa; ifa = ifa->ifa_next) {
+
+ if (ifa->ifa_addr->sa_family != AF_LINK)
+ continue;
+
+ sdl = (struct sockaddr_dl *)ifa->ifa_addr;
+ ifindex = sdl->sdl_index;
+
+ if (ifindex >= ifmap_size) {
+ size = roundup(ifindex + 1, 32) *
+ sizeof(struct ifmap_entry);
+ if ((ifmap = realloc(ifmap, size)) == NULL)
+ errx(2, "realloc(%d) failed", size);
+ memset(&ifmap[ifmap_size], 0,
+ size - ifmap_size *
+ sizeof(struct ifmap_entry));
+
+ ifmap_size = roundup(ifindex + 1, 32);
+ }
+
+ if (*ifmap[ifindex].ifname != '\0')
+ continue;
+
+ strlcpy(ifmap[ifindex].ifname, ifa->ifa_name, IFNAMSIZ);
+ }
+
+ freeifaddrs(ifap);
+
+ *pifmap_size = ifmap_size;
+
+ return (ifmap);
+}
+
Index: usr.bin/netstat/main.c
===================================================================
--- usr.bin/netstat/main.c
+++ usr.bin/netstat/main.c
@@ -214,6 +214,7 @@
int noutputs = 0; /* how much outputs before we exit */
int numeric_addr; /* show addresses numerically */
int numeric_port; /* show ports numerically */
+int oflag; /* show nexthop objects*/
int Pflag; /* show TCP log ID */
static int pflag; /* show given protocol */
static int Qflag; /* show netisr information */
@@ -248,7 +249,7 @@
if (argc < 0)
exit(EXIT_FAILURE);
- while ((ch = getopt(argc, argv, "46AaBbdF:f:ghI:iLlM:mN:nPp:Qq:RrSTsuWw:xz"))
+ while ((ch = getopt(argc, argv, "46AaBbdF:f:ghI:iLlM:mN:noPp:Qq:RrSTsuWw:xz"))
!= -1)
switch(ch) {
case '4':
@@ -345,6 +346,9 @@
case 'n':
numeric_addr = numeric_port = 1;
break;
+ case 'o':
+ oflag = 1;
+ break;
case 'P':
Pflag = 1;
break;
@@ -494,6 +498,15 @@
xo_finish();
exit(0);
}
+ if (oflag) {
+ xo_open_container("statistics");
+ nhops_print(fib, af);
+ //nhgrp_print(fib, af);
+ xo_close_container("statistics");
+ xo_finish();
+ exit(0);
+ }
+
if (gflag) {
xo_open_container("statistics");
Index: usr.bin/netstat/netstat.h
===================================================================
--- usr.bin/netstat/netstat.h
+++ usr.bin/netstat/netstat.h
@@ -147,6 +147,10 @@
char *routename(struct sockaddr *, int);
const char *netname(struct sockaddr *, struct sockaddr *);
void routepr(int, int);
+int p_sockaddr(const char *name, struct sockaddr *sa,
+ struct sockaddr *mask, int flags, int width);
+const char *fmt_sockaddr(struct sockaddr *sa, struct sockaddr *mask,
+ int flags);
#ifdef NETGRAPH
void netgraphprotopr(u_long, const char *, int, int);
@@ -157,3 +161,5 @@
void mroutepr(void);
void mrt_stats(void);
void bpf_stats(char *);
+void nhops_print(int fibnum, int af);
+void nhgrp_print(int fibnum, int af);
Index: usr.bin/netstat/nhops.c
===================================================================
--- /dev/null
+++ usr.bin/netstat/nhops.c
@@ -0,0 +1,724 @@
+/*-
+ * SPDX-License-Identifier: BSD-3-Clause
+ *
+ * Copyright (c) 1983, 1988, 1993
+ * The Regents of the University of California. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * $FreeBSD$
+ */
+
+#include <sys/cdefs.h>
+#include <sys/param.h>
+#include <sys/protosw.h>
+#include <sys/socket.h>
+#include <sys/socketvar.h>
+#include <sys/sysctl.h>
+#include <sys/time.h>
+
+#include <net/ethernet.h>
+#include <net/if.h>
+#include <net/if_dl.h>
+#include <net/if_types.h>
+#include <net/route.h>
+#include <net/route/nhop.h>
+
+#include <netinet/in.h>
+#include <netgraph/ng_socket.h>
+
+#include <arpa/inet.h>
+#include <ifaddrs.h>
+#include <libutil.h>
+#include <netdb.h>
+#include <stdbool.h>
+#include <stdint.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <stdbool.h>
+#include <string.h>
+#include <sysexits.h>
+#include <unistd.h>
+#include <err.h>
+#include <libxo/xo.h>
+#include "netstat.h"
+#include "common.h"
+
+/* column widths; each followed by one space */
+#ifndef INET6
+#define WID_DST_DEFAULT(af) 18 /* width of destination column */
+#define WID_GW_DEFAULT(af) 18 /* width of gateway column */
+#define WID_IF_DEFAULT(af) (Wflag ? 10 : 8) /* width of netif column */
+#else
+#define WID_DST_DEFAULT(af) \
+ ((af) == AF_INET6 ? (numeric_addr ? 33: 18) : 18)
+#define WID_GW_DEFAULT(af) \
+ ((af) == AF_INET6 ? (numeric_addr ? 29 : 18) : 18)
+#define WID_IF_DEFAULT(af) ((af) == AF_INET6 ? 8 : (Wflag ? 10 : 8))
+#endif /*INET6*/
+static int wid_dst;
+static int wid_gw;
+static int wid_flags;
+static int wid_pksent;
+static int wid_mtu;
+static int wid_if;
+static int wid_nhidx;
+static int wid_nhtype;
+static int wid_refcnt;
+static int wid_prepend;
+
+static struct bits nh_bits[] = {
+ { NHF_REJECT, 'R', "reject" },
+ { NHF_BLACKHOLE,'B', "blackhole" },
+ { NHF_REDIRECT, 'r', "redirect" },
+ { NHF_GATEWAY, 'G', "gateway" },
+ { NHF_DEFAULT, 'd', "default" },
+ { NHF_BROADCAST,'b', "broadcast" },
+ { 0 , 0, NULL }
+};
+
+static char *nh_types[] = {
+ "empty", /* 0 */
+ "v4/resolve", /* 1 */
+ "v4/gw",
+ "v6/resolve",
+ "v6/gw"
+};
+
+struct nhop_entry {
+ char gw[64];
+ char ifname[IFNAMSIZ];
+};
+
+struct nhop_map {
+ struct nhop_entry *ptr;
+ size_t size;
+};
+static struct nhop_map global_nhop_map;
+
+static void nhop_map_update(struct nhop_map *map, uint32_t idx,
+ char *gw, char *ifname);
+static struct nhop_entry *nhop_get(struct nhop_map *map, uint32_t idx);
+
+
+static struct ifmap_entry *ifmap;
+static size_t ifmap_size;
+
+static void
+print_sockaddr_buf(char *buf, size_t bufsize, const struct sockaddr *sa)
+{
+
+ switch (sa->sa_family) {
+ case AF_INET:
+ inet_ntop(AF_INET, &((struct sockaddr_in *)sa)->sin_addr,
+ buf, bufsize);
+ break;
+ case AF_INET6:
+ inet_ntop(AF_INET6, &((struct sockaddr_in6 *)sa)->sin6_addr,
+ buf, bufsize);
+ break;
+ default:
+ snprintf(buf, bufsize, "unknown:%d", sa->sa_family);
+ break;
+ }
+}
+
+static int
+print_addr(const char *name, const char *addr, int width)
+{
+ char buf[128];
+ int protrusion;
+
+ if (width < 0) {
+ snprintf(buf, sizeof(buf), "{:%s/%%s} ", name);
+ xo_emit(buf, addr);
+ protrusion = 0;
+ } else {
+ if (Wflag != 0 || numeric_addr) {
+ snprintf(buf, sizeof(buf), "{[:%d}{:%s/%%s}{]:} ",
+ -width, name);
+ xo_emit(buf, addr);
+ protrusion = strlen(addr) - width;
+ if (protrusion < 0)
+ protrusion = 0;
+ } else {
+ snprintf(buf, sizeof(buf), "{[:%d}{:%s/%%-.*s}{]:} ",
+ -width, name);
+ xo_emit(buf, width, addr);
+ protrusion = 0;
+ }
+ }
+ return (protrusion);
+}
+
+
+static void
+print_nhop_header(int af1 __unused)
+{
+
+ if (Wflag) {
+ xo_emit("{T:/%-*.*s} {T:/%-*.*s} {T:/%-*.*s} {T:/%-*.*s} {T:/%*.*s} "
+ "{T:/%*.*s} {T:/%-*.*s} {T:/%*.*s} {T:/%*.*s} {T:/%*.*s} {T:/%*s}\n",
+ wid_nhidx, wid_nhidx, "Idx",
+ wid_nhtype, wid_nhtype, "Type",
+ wid_dst, wid_dst, "IFA",
+ wid_gw, wid_gw, "Gateway",
+ wid_flags, wid_flags, "Flags",
+ wid_pksent, wid_pksent, "Use",
+ wid_mtu, wid_mtu, "Mtu",
+ wid_if, wid_if, "Netif",
+ wid_if, wid_if, "Addrif",
+ wid_refcnt, wid_refcnt, "Refcnt",
+ wid_prepend, "Prepend");
+ } else {
+ xo_emit("{T:/%-*.*s} {T:/%-*.*s} {T:/%-*.*s} {T:/%-*.*s} {T:/%*.*s} "
+ " {T:/%*s}\n",
+ wid_nhidx, wid_nhidx, "Idx",
+ wid_dst, wid_dst, "IFA",
+ wid_gw, wid_gw, "Gateway",
+ wid_flags, wid_flags, "Flags",
+ wid_if, wid_if, "Netif",
+ wid_prepend, "Refcnt");
+ }
+}
+
+static void
+print_nhgroup_header(int af1 __unused)
+{
+
+ xo_emit("{T:/%-*.*s} {T:/%-*.*s} {T:/%-*.*s} {T:/%-*.*s} {T:/%*.*s}"
+ " {T:/%-*.*s} {T:/%*s}\n",
+ wid_nhidx, wid_nhidx, "MpIdx",
+ wid_nhidx, wid_nhidx, "NHIdx",
+ wid_nhidx, wid_nhidx, "Weight",
+ wid_nhidx, wid_nhidx, "Slots",
+ wid_gw, wid_gw, "Gateway",
+ wid_if, wid_if, "Netif",
+ wid_nhidx, "Refcnt");
+}
+
+static void
+print_nhgroup_entry_sysctl(const char *name, struct rt_msghdr *rtm,
+ struct mpath_external *mpe)
+{
+ char buffer[128];
+ struct nhop_entry *ne;
+
+ xo_open_instance(name);
+
+ snprintf(buffer, sizeof(buffer), "{[:-%d}{:mp_index/%%lu}{]:} ", wid_nhidx);
+ xo_emit(buffer, mpe->mp_idx);
+
+ xo_emit("{t:dummy-1/%*.*s}", wid_nhidx, wid_nhidx, "----");
+ xo_emit("{t:dummy-2/%*.*s}", wid_nhidx, wid_nhidx, "----");
+ xo_emit("{t:dummy-2/%*.*s}", wid_nhidx, wid_nhidx, "----");
+ xo_emit("{t:dummy-3/%*.*s}", wid_gw, wid_gw, "----");
+ xo_emit("{t:dummy-4/%*.*s}", wid_if, wid_if, "----");
+ xo_emit("{t:mp-refcnt/%*lu}", wid_nhidx, mpe->mp_refcount);
+ xo_emit("\n");
+
+ struct mpath_nhop_external *ext;
+ ext = (struct mpath_nhop_external *)(mpe + 1);
+
+ uint32_t *fwd_c = calloc(sizeof(uint32_t), global_nhop_map.size);
+ uint32_t *pidx;
+ pidx = (uint32_t *)&ext[mpe->mp_nh_count];
+ for (uint32_t i = 0; i < mpe->mp_group_size; i++) {
+ fwd_c[pidx[i]]++;
+ }
+
+ xo_open_list("nhop_weights");
+ for (uint32_t i = 0; i < mpe->mp_nh_count; i++) {
+ xo_open_instance("nhop-weight");
+ snprintf(buffer, sizeof(buffer), "{[:-%d}{:d-0/%%s}{]:} ", wid_nhidx);
+ xo_emit(buffer, "");
+ // nh index
+ xo_emit("{t:nh-index/%*lu} ", wid_nhidx, ext[i].nh_idx);
+ xo_emit("{t:nh-weight/%*lu} ", wid_nhidx, ext[i].nh_weight);
+ xo_emit("{t:nh-slots/%*lu} ", wid_nhidx, fwd_c[ext[i].nh_idx]);
+ ne = nhop_get(&global_nhop_map, ext[i].nh_idx);
+ if (ne != NULL) {
+ xo_emit("{t:nh-gw/%*.*s}", wid_gw, wid_gw, ne->gw);
+ xo_emit("{t:nh-interface/%*.*s}", wid_if, wid_if, ne->ifname);
+ }
+ xo_emit("\n");
+ xo_close_instance("nhop-weight");
+ }
+ xo_close_list("nhop_weights");
+
+#if 0
+ xo_emit("{t:dummy-2/%*.*s}", wid_nhidx, wid_nhidx, "");
+ xo_emit("{t:dummy-1/%*.*s}", wid_nhidx, wid_nhidx, "----");
+ xo_emit("\n");
+
+ uint32_t *pidx;
+ pidx = (uint32_t *)&ext[mpe->mp_nh_count];
+ xo_open_list("fwd-nhops");
+ for (uint32_t i = 0; i < mpe->mp_group_size; i++) {
+ xo_open_instance("fwd-nhop");
+ snprintf(buffer, sizeof(buffer), "{[:-%d}{:d-0/%%s}{]:} ", wid_nhidx);
+ xo_emit(buffer, "");
+ // nh index
+ xo_emit("{t:nh-index/%*lu} ", wid_nhidx, pidx[i]);
+ ne = nhop_get(&global_nhop_map, pidx[i]);
+ if (ne != NULL) {
+ xo_emit("{t:dummy-2/%*.*s}", wid_nhidx, wid_nhidx, "");
+ xo_emit("{t:nh-gw/%*.*s}", wid_gw, wid_gw, ne->gw);
+ xo_emit("{t:nh-interface/%*.*s}", wid_if, wid_if, ne->ifname);
+ }
+ xo_emit("\n");
+ xo_close_instance("fwd-nhop");
+ }
+ xo_close_list("fwd-nhops");
+#endif
+#if 0
+ if (Wflag) {
+ char *cp = nh_types[nh->nh_type];
+ xo_emit("{t:type_str/%*s} ", wid_nhtype, cp);
+ }
+ memset(iface_name, 0, sizeof(iface_name));
+ if (nh->ifindex < (uint32_t)ifmap_size) {
+ strlcpy(iface_name, ifmap[nh->ifindex].ifname,
+ sizeof(iface_name));
+ if (*iface_name == '\0')
+ strlcpy(iface_name, "---", sizeof(iface_name));
+ }
+
+ //inet_ntop(nh->nh_family, &nh->nh_src, src_addr, sizeof(src_addr));
+ //protrusion = p_addr("ifa", src_addr, wid_dst);
+ sa_gw = (struct sockaddr *)(nh + 1);
+ sa_ifa = (struct sockaddr *)((char *)sa_gw + sa_gw->sa_len);
+ protrusion = p_sockaddr("ifa", sa_ifa, NULL, RTF_HOST, wid_dst);
+
+ if (nh->nh_flags & NHF_GATEWAY) {
+ const char *cp;
+ cp = fmt_sockaddr(sa_gw, NULL, RTF_HOST);
+ strlcpy(gw_addr, cp, sizeof(gw_addr));
+ } else
+ snprintf(gw_addr, sizeof(gw_addr), "%s/resolve", iface_name);
+ protrusion = p_addr("gateway", gw_addr, wid_dst - protrusion);
+
+ snprintf(buffer, sizeof(buffer), "{[:-%d}{:flags/%%s}{]:} ",
+ wid_flags - protrusion);
+
+ //p_nhflags(nh->nh_flags, buffer);
+ p_flags(rtm->rtm_flags, buffer);
+
+ if (Wflag) {
+ xo_emit("{t:use/%*lu} ", wid_pksent, nh->nh_pksent);
+ xo_emit("{t:mtu/%*lu} ", wid_mtu, nh->nh_mtu);
+ }
+ //printf("IDX: %d IFACE: %s FAMILY: %d TYPE: %d FLAGS: %X GW \n");
+
+ if (Wflag)
+ xo_emit("{t:interface-name/%*s}", wid_if, iface_name);
+ else
+ xo_emit("{t:interface-name/%*.*s}", wid_if, wid_if, iface_name);
+
+ memset(iface_name, 0, sizeof(iface_name));
+ if (nh->aifindex < (uint32_t)ifmap_size && nh->ifindex != nh->aifindex) {
+ strlcpy(iface_name, ifmap[nh->aifindex].ifname,
+ sizeof(iface_name));
+ if (*iface_name == '\0')
+ strlcpy(iface_name, "---", sizeof(iface_name));
+ }
+ if (Wflag)
+ xo_emit("{t:address-interface-name/%*s}", wid_if, iface_name);
+
+ xo_emit("{t:refcount/%*lu} ", wid_refcnt, nh->nh_refcount);
+ if (Wflag && nh->prepend_len) {
+ char *prepend_hex = "AABBCCDDEE";
+ xo_emit(" {:nhop-prepend/%*s}", wid_prepend, prepend_hex);
+ }
+#endif
+ //xo_emit("\n");
+ xo_close_instance(name);
+}
+
+
+static void
+print_nhgrp_sysctl(int fibnum, int af)
+{
+ size_t needed;
+ int mib[7];
+ char *buf, *next, *lim;
+ struct rt_msghdr *rtm;
+ struct mpath_external *mp;
+ int fam = AF_UNSPEC;
+ int need_table_close = false;
+
+ mib[0] = CTL_NET;
+ mib[1] = PF_ROUTE;
+ mib[2] = 0;
+ mib[3] = af;
+ mib[4] = NET_RT_NHGROUPS;
+ mib[5] = 0;
+ mib[6] = fibnum;
+ if (sysctl(mib, nitems(mib), NULL, &needed, NULL, 0) < 0)
+ err(EX_OSERR, "sysctl: net.route.0.%d.nhgrpdump.%d estimate",
+ af, fibnum);
+ if ((buf = malloc(needed)) == NULL)
+ errx(2, "malloc(%lu)", (unsigned long)needed);
+ if (sysctl(mib, nitems(mib), buf, &needed, NULL, 0) < 0)
+ err(1, "sysctl: net.route.0.%d.nhgrpdump.%d", af, fibnum);
+ printf("BUF: %zu\n", needed);
+ lim = buf + needed;
+ xo_open_container("nhgrp-table");
+ xo_open_list("rt-family");
+ for (next = buf; next < lim; next += rtm->rtm_msglen) {
+ rtm = (struct rt_msghdr *)next;
+ if (rtm->rtm_version != RTM_VERSION)
+ continue;
+
+ mp = (struct mpath_external *)(rtm + 1);
+ /*
+ * Peek inside header to determine AF
+ */
+ /* Only print family first time. */
+ if (fam != af) {
+ if (need_table_close) {
+ xo_close_list("nhgrp-entry");
+ xo_close_instance("rt-family");
+ }
+ need_table_close = true;
+
+ fam = af;
+ wid_dst = WID_GW_DEFAULT(fam);
+ wid_gw = WID_GW_DEFAULT(fam);
+ wid_nhidx = 5;
+ wid_nhtype = 12;
+ wid_refcnt = 6;
+ wid_flags = 6;
+ wid_pksent = 8;
+ wid_mtu = 6;
+ wid_if = WID_IF_DEFAULT(fam);
+ xo_open_instance("rt-family");
+ pr_family(fam);
+ xo_open_list("nhgrp-entry");
+
+ print_nhgroup_header(fam);
+ }
+ print_nhgroup_entry_sysctl("nhgrp-entry", rtm, mp);
+ }
+ if (need_table_close) {
+ xo_close_list("nhgrp-entry");
+ xo_close_instance("rt-family");
+ }
+ xo_close_list("rt-family");
+ xo_close_container("nhgrp-table");
+ free(buf);
+}
+
+static void
+nhop_map_update(struct nhop_map *map, uint32_t idx, char *gw, char *ifname)
+{
+ if (idx >= map->size) {
+ uint32_t new_size;
+ size_t sz;
+ if (map->size == 0)
+ new_size = 32;
+ else
+ new_size = map->size * 2;
+ if (new_size <= idx)
+ new_size = roundup(idx + 1, 32);
+
+ sz = new_size * (sizeof(struct nhop_entry));
+ if ((map->ptr = realloc(map->ptr, sz)) == NULL)
+ errx(2, "realloc(%lu) failed", sz);
+
+ memset(&map->ptr[map->size], 0, (new_size - map->size) * sizeof(struct nhop_entry));
+ map->size = new_size;
+ }
+
+ strlcpy(map->ptr[idx].ifname, ifname, sizeof(map->ptr[idx].ifname));
+ strlcpy(map->ptr[idx].gw, gw, sizeof(map->ptr[idx].gw));
+}
+
+static struct nhop_entry *
+nhop_get(struct nhop_map *map, uint32_t idx)
+{
+
+ if (idx >= map->size)
+ return (NULL);
+ if (*map->ptr[idx].ifname == '\0')
+ return (NULL);
+ return &map->ptr[idx];
+}
+
+static void
+print_nhop_entry_sysctl(const char *name, struct rt_msghdr *rtm, struct nhop_external *nh)
+{
+ char buffer[128];
+ char iface_name[128];
+ int protrusion;
+ char gw_addr[64];
+ struct nhop_addrs *na;
+ struct sockaddr *sa_gw, *sa_ifa;
+
+ xo_open_instance(name);
+
+ snprintf(buffer, sizeof(buffer), "{[:-%d}{:index/%%lu}{]:} ", wid_nhidx);
+ //xo_emit("{t:index/%-lu} ", wid_nhidx, nh->nh_idx);
+ xo_emit(buffer, nh->nh_idx);
+
+ if (Wflag) {
+ char *cp = nh_types[nh->nh_type];
+ xo_emit("{t:type_str/%*s} ", wid_nhtype, cp);
+ }
+ memset(iface_name, 0, sizeof(iface_name));
+ if (nh->ifindex < (uint32_t)ifmap_size) {
+ strlcpy(iface_name, ifmap[nh->ifindex].ifname,
+ sizeof(iface_name));
+ if (*iface_name == '\0')
+ strlcpy(iface_name, "---", sizeof(iface_name));
+ }
+
+ na = (struct nhop_addrs *)((char *)nh + nh->nh_len);
+ //inet_ntop(nh->nh_family, &nh->nh_src, src_addr, sizeof(src_addr));
+ //protrusion = p_addr("ifa", src_addr, wid_dst);
+ sa_gw = (struct sockaddr *)((char *)na + na->gw_sa_off);
+ sa_ifa = (struct sockaddr *)((char *)na + na->src_sa_off);
+ protrusion = p_sockaddr("ifa", sa_ifa, NULL, RTF_HOST, wid_dst);
+
+ if (nh->nh_flags & NHF_GATEWAY) {
+ const char *cp;
+ cp = fmt_sockaddr(sa_gw, NULL, RTF_HOST);
+ strlcpy(gw_addr, cp, sizeof(gw_addr));
+ } else
+ snprintf(gw_addr, sizeof(gw_addr), "%s/resolve", iface_name);
+ protrusion = print_addr("gateway", gw_addr, wid_dst - protrusion);
+
+ nhop_map_update(&global_nhop_map, nh->nh_idx, gw_addr, iface_name);
+
+ snprintf(buffer, sizeof(buffer), "{[:-%d}{:flags/%%s}{]:} ",
+ wid_flags - protrusion);
+
+ //p_nhflags(nh->nh_flags, buffer);
+ print_flags_generic(rtm->rtm_flags, rt_bits, buffer, "rt_flags_pretty");
+
+ if (Wflag) {
+ xo_emit("{t:use/%*lu} ", wid_pksent, nh->nh_pksent);
+ xo_emit("{t:mtu/%*lu} ", wid_mtu, nh->nh_mtu);
+ }
+ //printf("IDX: %d IFACE: %s FAMILY: %d TYPE: %d FLAGS: %X GW \n");
+
+ if (Wflag)
+ xo_emit("{t:interface-name/%*s}", wid_if, iface_name);
+ else
+ xo_emit("{t:interface-name/%*.*s}", wid_if, wid_if, iface_name);
+
+ memset(iface_name, 0, sizeof(iface_name));
+ if (nh->aifindex < (uint32_t)ifmap_size && nh->ifindex != nh->aifindex) {
+ strlcpy(iface_name, ifmap[nh->aifindex].ifname,
+ sizeof(iface_name));
+ if (*iface_name == '\0')
+ strlcpy(iface_name, "---", sizeof(iface_name));
+ }
+ if (Wflag)
+ xo_emit("{t:address-interface-name/%*s}", wid_if, iface_name);
+
+ xo_emit("{t:refcount/%*lu} ", wid_refcnt, nh->nh_refcount);
+ if (Wflag && nh->prepend_len) {
+ char *prepend_hex = "AABBCCDDEE";
+ xo_emit(" {:nhop-prepend/%*s}", wid_prepend, prepend_hex);
+ }
+
+ xo_emit("\n");
+ xo_close_instance(name);
+}
+
+struct nhops_map {
+ uint32_t idx;
+ struct rt_msghdr *rtm;
+};
+
+static int
+cmp_nh_idx(const void *_a, const void *_b)
+{
+ const struct nhops_map *a, *b;
+
+ a = _a;
+ b = _b;
+
+ if (a->idx > b->idx)
+ return (1);
+ else if (a->idx < b->idx)
+ return (-1);
+ return (0);
+}
+
+static void
+print_nhops_sysctl(int fibnum, int af)
+{
+ size_t needed;
+ int mib[7];
+ char *buf, *next, *lim;
+ struct rt_msghdr *rtm;
+ struct nhop_external *nh;
+ int fam;
+ struct nhops_map *nh_map;
+ size_t nh_count, nh_size;
+
+ mib[0] = CTL_NET;
+ mib[1] = PF_ROUTE;
+ mib[2] = 0;
+ mib[3] = af;
+ mib[4] = NET_RT_NHOP;
+ mib[5] = 0;
+ mib[6] = fibnum;
+ if (sysctl(mib, nitems(mib), NULL, &needed, NULL, 0) < 0)
+ err(EX_OSERR, "sysctl: net.route.0.%d.nhdump.%d estimate", af,
+ fibnum);
+ if ((buf = malloc(needed)) == NULL)
+ errx(2, "malloc(%lu)", (unsigned long)needed);
+ if (sysctl(mib, nitems(mib), buf, &needed, NULL, 0) < 0)
+ err(1, "sysctl: net.route.0.%d.nhdump.%d", af, fibnum);
+ lim = buf + needed;
+ xo_open_container("nhop-table");
+ xo_open_list("rt-family");
+
+ /*
+ * nexhops are received unsorted. Collect everything first, sort and then display
+ * sorted.
+ */
+ nh_count = 0;
+ nh_size = 16;
+ nh_map = calloc(nh_size, sizeof(struct nhops_map));
+ for (next = buf; next < lim; next += rtm->rtm_msglen) {
+ rtm = (struct rt_msghdr *)next;
+ if (rtm->rtm_version != RTM_VERSION)
+ continue;
+
+ if (nh_count >= nh_size) {
+ nh_size *= 2;
+ nh_map = realloc(nh_map, nh_size * sizeof(struct nhops_map));
+ }
+
+ nh = (struct nhop_external *)(rtm + 1);
+ nh_map[nh_count].idx = nh->nh_idx;
+ nh_map[nh_count].rtm = rtm;
+ nh_count++;
+ }
+
+ if (nh_count > 0) {
+ qsort(nh_map, nh_count, sizeof(struct nhops_map), cmp_nh_idx);
+ nh = (struct nhop_external *)(nh_map[0].rtm + 1);
+ fam = nh->nh_family;
+
+ wid_dst = WID_GW_DEFAULT(fam);
+ wid_gw = WID_GW_DEFAULT(fam);
+ wid_nhidx = 5;
+ wid_nhtype = 12;
+ wid_refcnt = 6;
+ wid_flags = 6;
+ wid_pksent = 8;
+ wid_mtu = 6;
+ wid_if = WID_IF_DEFAULT(fam);
+ xo_open_instance("rt-family");
+ pr_family(fam);
+ xo_open_list("nh-entry");
+
+ print_nhop_header(fam);
+
+ for (size_t i = 0; i < nh_count; i++) {
+ rtm = nh_map[i].rtm;
+ nh = (struct nhop_external *)(rtm + 1);
+ print_nhop_entry_sysctl("nh-entry", rtm, nh);
+ }
+
+ xo_close_list("nh-entry");
+ xo_close_instance("rt-family");
+ }
+ xo_close_list("rt-family");
+ xo_close_container("nhop-table");
+ free(buf);
+}
+
+static void
+p_nhflags(int f, const char *format)
+{
+ struct bits *p;
+ char *pretty_name = "nh_flags_pretty";
+
+ xo_emit(format, fmt_flags(nh_bits, f));
+
+ xo_open_list(pretty_name);
+ for (p = nh_bits; p->b_mask; p++)
+ if (p->b_mask & f)
+ xo_emit("{le:nh_flags_pretty/%s}", p->b_name);
+ xo_close_list(pretty_name);
+}
+
+void
+nhops_print(int fibnum, int af)
+{
+ size_t intsize;
+ int numfibs;
+
+ intsize = sizeof(int);
+ if (fibnum == -1 &&
+ sysctlbyname("net.my_fibnum", &fibnum, &intsize, NULL, 0) == -1)
+ fibnum = 0;
+ if (sysctlbyname("net.fibs", &numfibs, &intsize, NULL, 0) == -1)
+ numfibs = 1;
+ if (fibnum < 0 || fibnum > numfibs - 1)
+ errx(EX_USAGE, "%d: invalid fib", fibnum);
+
+ ifmap = prepare_ifmap(&ifmap_size);
+
+ xo_open_container("route-nhop-information");
+ xo_emit("{T:Nexthop data}");
+ if (fibnum)
+ xo_emit(" ({L:fib}: {:fib/%d})", fibnum);
+ xo_emit("\n");
+ print_nhops_sysctl(fibnum, af);
+ xo_close_container("route-nhop-information");
+}
+
+void
+nhgrp_print(int fibnum, int af)
+{
+ size_t intsize;
+ int numfibs;
+
+ intsize = sizeof(int);
+ if (fibnum == -1 &&
+ sysctlbyname("net.my_fibnum", &fibnum, &intsize, NULL, 0) == -1)
+ fibnum = 0;
+ if (sysctlbyname("net.fibs", &numfibs, &intsize, NULL, 0) == -1)
+ numfibs = 1;
+ if (fibnum < 0 || fibnum > numfibs - 1)
+ errx(EX_USAGE, "%d: invalid fib", fibnum);
+
+ ifmap = prepare_ifmap(&ifmap_size);
+
+ xo_open_container("route-nhgrp-information");
+ xo_emit("{T:Nexthop groups data}");
+ if (fibnum)
+ xo_emit(" ({L:fib}: {:fib/%d})", fibnum);
+ xo_emit("\n");
+ print_nhgrp_sysctl(fibnum, af);
+ xo_close_container("route-nhgrp-information");
+}
Index: usr.bin/netstat/route.c
===================================================================
--- usr.bin/netstat/route.c
+++ usr.bin/netstat/route.c
@@ -36,7 +36,7 @@
#endif
#include <sys/cdefs.h>
-__FBSDID("$FreeBSD$");
+__FBSDID("$FreeBSD: head/usr.bin/netstat/route.c 355840 2019-12-17 02:02:26Z glebius $");
#include <sys/param.h>
#include <sys/protosw.h>
@@ -69,16 +69,13 @@
#include <err.h>
#include <libxo/xo.h>
#include "netstat.h"
+#include "common.h"
#include "nl_defs.h"
/*
* Definitions for showing gateway flags.
*/
-static struct bits {
- u_long b_mask;
- char b_val;
- const char *b_name;
-} bits[] = {
+struct bits rt_bits[] = {
{ RTF_UP, 'U', "up" },
{ RTF_GATEWAY, 'G', "gateway" },
{ RTF_HOST, 'H', "host" },
@@ -99,11 +96,8 @@
{ 0 , 0, NULL }
};
-struct ifmap_entry {
- char ifname[IFNAMSIZ];
-};
static struct ifmap_entry *ifmap;
-static int ifmap_size;
+static size_t ifmap_size;
static struct timespec uptime;
static const char *netname4(in_addr_t, in_addr_t);
@@ -112,12 +106,7 @@
#endif
static void p_rtable_sysctl(int, int);
static void p_rtentry_sysctl(const char *name, struct rt_msghdr *);
-static int p_sockaddr(const char *name, struct sockaddr *, struct sockaddr *,
- int, int);
-static const char *fmt_sockaddr(struct sockaddr *sa, struct sockaddr *mask,
- int flags);
static void p_flags(int, const char *);
-static const char *fmt_flags(int f);
static void domask(char *, size_t, u_long);
@@ -229,7 +218,7 @@
wid_dst, wid_dst, "Destination",
wid_gw, wid_gw, "Gateway",
wid_flags, wid_flags, "Flags",
- wid_pksent, wid_pksent, "Use",
+ wid_mtu, wid_mtu, "Nhop#",
wid_mtu, wid_mtu, "Mtu",
wid_if, wid_if, "Netif",
wid_expire, "Expire");
@@ -252,46 +241,10 @@
char *buf, *next, *lim;
struct rt_msghdr *rtm;
struct sockaddr *sa;
- int fam = AF_UNSPEC, ifindex = 0, size;
+ int fam = AF_UNSPEC;
int need_table_close = false;
- struct ifaddrs *ifap, *ifa;
- struct sockaddr_dl *sdl;
-
- /*
- * Retrieve interface list at first
- * since we need #ifindex -> if_xname match
- */
- if (getifaddrs(&ifap) != 0)
- err(EX_OSERR, "getifaddrs");
-
- for (ifa = ifap; ifa; ifa = ifa->ifa_next) {
-
- if (ifa->ifa_addr->sa_family != AF_LINK)
- continue;
-
- sdl = (struct sockaddr_dl *)ifa->ifa_addr;
- ifindex = sdl->sdl_index;
-
- if (ifindex >= ifmap_size) {
- size = roundup(ifindex + 1, 32) *
- sizeof(struct ifmap_entry);
- if ((ifmap = realloc(ifmap, size)) == NULL)
- errx(2, "realloc(%d) failed", size);
- memset(&ifmap[ifmap_size], 0,
- size - ifmap_size *
- sizeof(struct ifmap_entry));
-
- ifmap_size = roundup(ifindex + 1, 32);
- }
-
- if (*ifmap[ifindex].ifname != '\0')
- continue;
-
- strlcpy(ifmap[ifindex].ifname, ifa->ifa_name, IFNAMSIZ);
- }
-
- freeifaddrs(ifap);
+ ifmap = prepare_ifmap(&ifmap_size);
mib[0] = CTL_NET;
mib[1] = PF_ROUTE;
@@ -377,7 +330,8 @@
wid_flags - protrusion);
p_flags(rtm->rtm_flags, buffer);
if (Wflag) {
- xo_emit("{t:use/%*lu} ", wid_pksent, rtm->rtm_rmx.rmx_pksent);
+ /* XXX: use=0? */
+ xo_emit("{t:nhop/%*lu} ", wid_mtu, rtm->rtm_rmx.rmx_nhidx);
if (rtm->rtm_rmx.rmx_mtu != 0)
xo_emit("{t:mtu/%*lu} ", wid_mtu, rtm->rtm_rmx.rmx_mtu);
@@ -410,7 +364,7 @@
xo_close_instance(name);
}
-static int
+int
p_sockaddr(const char *name, struct sockaddr *sa, struct sockaddr *mask,
int flags, int width)
{
@@ -442,7 +396,7 @@
return (protrusion);
}
-static const char *
+const char *
fmt_sockaddr(struct sockaddr *sa, struct sockaddr *mask, int flags)
{
static char buf[128];
@@ -519,30 +473,10 @@
static void
p_flags(int f, const char *format)
{
- struct bits *p;
-
- xo_emit(format, fmt_flags(f));
- xo_open_list("flags_pretty");
- for (p = bits; p->b_mask; p++)
- if (p->b_mask & f)
- xo_emit("{le:flags_pretty/%s}", p->b_name);
- xo_close_list("flags_pretty");
+ print_flags_generic(f, rt_bits, format, "flags_pretty");
}
-static const char *
-fmt_flags(int f)
-{
- static char name[33];
- char *flags;
- struct bits *p = bits;
-
- for (flags = name; p->b_mask; p++)
- if (p->b_mask & f)
- *flags++ = p->b_val;
- *flags = '\0';
- return (name);
-}
char *
routename(struct sockaddr *sa, int flags)
File Metadata
Details
Attached
Mime Type
text/plain
Expires
Thu, Oct 9, 12:51 PM (22 h, 27 m)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
23496905
Default Alt Text
D24232.id70048.diff (132 KB)
Attached To
Mode
D24232: Stage 1: Introduce nexhop objects and new routing kpi
Attached
Detach File
Event Timeline
Log In to Comment