Index: sys/conf/files =================================================================== --- sys/conf/files +++ sys/conf/files @@ -4098,6 +4098,7 @@ net/raw_cb.c standard net/raw_usrreq.c standard net/route.c standard +net/route_temporal.c standard net/rss_config.c optional inet rss | inet6 rss net/rtsock.c standard net/slcompress.c optional netgraph_vjc | sppp | \ Index: sys/net/radix_mpath.h =================================================================== --- sys/net/radix_mpath.h +++ sys/net/radix_mpath.h @@ -57,8 +57,8 @@ void rtalloc_mpath_fib(struct route *, u_int32_t, u_int); struct rtentry *rt_mpath_select(struct rtentry *, uint32_t); int rt_mpath_deldup(struct rtentry *, struct rtentry *); -int rn4_mpath_inithead(void **, int); -int rn6_mpath_inithead(void **, int); +int rn4_mpath_inithead(void **, int, u_int); +int rn6_mpath_inithead(void **, int, u_int); #endif Index: sys/net/radix_mpath.c =================================================================== --- sys/net/radix_mpath.c +++ sys/net/radix_mpath.c @@ -290,17 +290,17 @@ RT_UNLOCK(ro->ro_rt); } -extern int in6_inithead(void **head, int off); -extern int in_inithead(void **head, int off); +extern int in6_inithead(void **head, int off, u_int fibnum); +extern int in_inithead(void **head, int off, u_int fibnum); #ifdef INET int -rn4_mpath_inithead(void **head, int off) +rn4_mpath_inithead(void **head, int off, u_int fibnum) { struct rib_head *rnh; hashjitter = arc4random(); - if (in_inithead(head, off) == 1) { + if (in_inithead(head, off, fibnum) == 1) { rnh = (struct rib_head *)*head; rnh->rnh_multipath = 1; return 1; @@ -311,12 +311,12 @@ #ifdef INET6 int -rn6_mpath_inithead(void **head, int off) +rn6_mpath_inithead(void **head, int off, u_int fibnum) { struct rib_head *rnh; hashjitter = arc4random(); - if (in6_inithead(head, off) == 1) { + if (in6_inithead(head, off, fibnum) == 1) { rnh = (struct rib_head *)*head; rnh->rnh_multipath = 1; return 1; Index: sys/net/route.h =================================================================== --- sys/net/route.h +++ sys/net/route.h @@ -455,7 +455,7 @@ void rt_newmaddrmsg(int, struct ifmultiaddr *); int rt_setgate(struct rtentry *, struct sockaddr *, struct sockaddr *); void rt_maskedcopy(struct sockaddr *, struct sockaddr *, struct sockaddr *); -struct rib_head *rt_table_init(int); +struct rib_head *rt_table_init(int, int, u_int); void rt_table_destroy(struct rib_head *); u_int rt_tables_get_gen(int table, int fam); @@ -477,6 +477,8 @@ typedef int rt_walktree_f_t(struct rtentry *, void *); typedef void rt_setwarg_t(struct rib_head *, uint32_t, int, void *); +void rib_walk_del(u_int fibnum, int family, rt_filter_f_t *filter_f, + void *arg, int report); void rt_foreach_fib_walk(int af, rt_setwarg_t *, rt_walktree_f_t *, void *); void rt_foreach_fib_walk_del(int af, rt_filter_f_t *filter_f, void *arg); void rt_flushifroutes_af(struct ifnet *, int); @@ -494,8 +496,8 @@ void rtalloc_ign_fib(struct route *ro, u_long ignflags, u_int fibnum); struct rtentry *rtalloc1_fib(struct sockaddr *, int, u_long, u_int); int rtioctl_fib(u_long, caddr_t, u_int); -void rtredirect_fib(struct sockaddr *, struct sockaddr *, - struct sockaddr *, int, struct sockaddr *, u_int); +int rtredirect_fib(struct sockaddr *, struct sockaddr *, + int, struct sockaddr *, int, u_int); int rtrequest_fib(int, struct sockaddr *, struct sockaddr *, struct sockaddr *, int, struct rtentry **, u_int); int rtrequest1_fib(int, struct rt_addrinfo *, struct rtentry **, u_int); Index: sys/net/route.c =================================================================== --- sys/net/route.c +++ sys/net/route.c @@ -187,10 +187,11 @@ { struct rib_head **rnh; - KASSERT(table >= 0 && table < rt_numfibs, ("%s: table out of bounds.", - __func__)); - KASSERT(fam >= 0 && fam < (AF_MAX+1), ("%s: fam out of bounds.", - __func__)); + KASSERT(table >= 0 && table < rt_numfibs, + ("%s: table out of bounds (0 < %d < %d)", __func__, table, + rt_numfibs)); + KASSERT(fam >= 0 && fam < (AF_MAX + 1), + ("%s: fam out of bounds (0 < %d < %d)", __func__, fam, AF_MAX + 1)); /* rnh is [fib=0][af=0]. */ rnh = (struct rib_head **)V_rt_tables; @@ -304,7 +305,7 @@ rnh = rt_tables_get_rnh_ptr(table, fam); if (rnh == NULL) panic("%s: rnh NULL", __func__); - dom->dom_rtattach((void **)rnh, 0); + dom->dom_rtattach((void **)rnh, 0, table); } } } @@ -345,7 +346,7 @@ #endif struct rib_head * -rt_table_init(int offset) +rt_table_init(int offset, int family, u_int fibnum) { struct rib_head *rh; @@ -357,9 +358,19 @@ rn_inithead_internal(&rh->rmhead.head, rh->rmhead.mask_nodes, 0); rh->head.rnh_masks = &rh->rmhead; + rh->rib_family = family; + rh->rib_fibnum = fibnum; + /* Init locks */ RIB_LOCK_INIT(rh); + tmproutes_init(rh); + + /* Save vnet pointer for callouts */ +#ifdef VIMAGE + rh->rib_vnet = curvnet; +#endif + /* Finally, set base callbacks */ rh->rnh_addaddr = rn_addroute; rh->rnh_deladdr = rn_delete; @@ -387,6 +398,8 @@ rt_table_destroy(struct rib_head *rh) { + tmproutes_destroy(rh); + rn_walktree(&rh->rmhead.head, rt_freeentry, &rh->rmhead.head); /* Assume table is already empty */ @@ -578,131 +591,125 @@ } -/* - * Force a routing table entry to the specified - * destination to go through the given gateway. - * Normally called as a result of a routing redirect - * message from the network layer. - */ -void -rtredirect_fib(struct sockaddr *dst, - struct sockaddr *gateway, - struct sockaddr *netmask, - int flags, - struct sockaddr *src, - u_int fibnum) +static int +verify_redirect_gateway(struct sockaddr *src, struct sockaddr *dst, + struct sockaddr *gateway, int flags, u_int fibnum) { struct rtentry *rt; - int error = 0; - struct rt_addrinfo info; struct ifaddr *ifa; - struct rib_head *rnh; NET_EPOCH_ASSERT(); - ifa = NULL; - rnh = rt_tables_get_rnh(fibnum, dst->sa_family); - if (rnh == NULL) { - error = EAFNOSUPPORT; - goto out; - } /* verify the gateway is directly reachable */ - if ((ifa = ifa_ifwithnet(gateway, 0, fibnum)) == NULL) { - error = ENETUNREACH; - goto out; - } - rt = rtalloc1_fib(dst, 0, 0UL, fibnum); /* NB: rt is locked */ + if ((ifa = ifa_ifwithnet(gateway, 0, fibnum)) == NULL) + return (ENETUNREACH); + + /* TODO: fib-aware */ + if ((flags & RTF_GATEWAY) && ifa_ifwithaddr_check(gateway)) + return (EHOSTUNREACH); + + rt = rtalloc1_fib(dst, 0, 0UL, fibnum); /* NB: rt is locked */ + /* * If the redirect isn't from our current router for this dst, * it's either old or wrong. If it redirects us to ourselves, * we have a routing loop, perhaps as a result of an interface * going down recently. */ - if (!(flags & RTF_DONE) && rt) { + if (rt != NULL) { if (!sa_equal(src, rt->rt_gateway)) { - error = EINVAL; - goto done; + RTFREE_LOCKED(rt); + return (EINVAL); } if (rt->rt_ifa != ifa && ifa->ifa_addr->sa_family != AF_LINK) { - error = EINVAL; - goto done; + RTFREE_LOCKED(rt); + return (EINVAL); } } - if ((flags & RTF_GATEWAY) && ifa_ifwithaddr_check(gateway)) { - error = EHOSTUNREACH; - goto done; + + /* If host route already exists, ignore redirect. */ + if (rt != NULL && (rt->rt_flags & RTF_HOST)) { + RTFREE_LOCKED(rt); + return (EEXIST); } - /* - * Create a new entry if we just got back a wildcard entry - * or the lookup failed. This is necessary for hosts - * which use routing redirects generated by smart gateways - * to dynamically build the routing tables. - */ - if (rt == NULL || (rt_mask(rt) && rt_mask(rt)->sa_len < 2)) - goto create; - /* - * Don't listen to the redirect if it's - * for a route to an interface. - */ - if (rt->rt_flags & RTF_GATEWAY) { - if (((rt->rt_flags & RTF_HOST) == 0) && (flags & RTF_HOST)) { - /* - * Changing from route to net => route to host. - * Create new route, rather than smashing route to net. - */ - create: - if (rt != NULL) - RTFREE_LOCKED(rt); - - flags |= RTF_DYNAMIC; - bzero((caddr_t)&info, sizeof(info)); - info.rti_info[RTAX_DST] = dst; - info.rti_info[RTAX_GATEWAY] = gateway; - info.rti_info[RTAX_NETMASK] = netmask; - ifa_ref(ifa); - info.rti_ifa = ifa; - info.rti_flags = flags; - error = rtrequest1_fib(RTM_ADD, &info, &rt, fibnum); - if (rt != NULL) { - RT_LOCK(rt); - flags = rt->rt_flags; - } - if (error == 0) - RTSTAT_INC(rts_dynamic); - } else { - /* - * Smash the current notion of the gateway to - * this destination. Should check about netmask!!! - */ - if ((flags & RTF_GATEWAY) == 0) - rt->rt_flags &= ~RTF_GATEWAY; - rt->rt_flags |= RTF_MODIFIED; - flags |= RTF_MODIFIED; - RTSTAT_INC(rts_newgateway); - /* - * add the key and gateway (in one malloc'd chunk). - */ - RT_UNLOCK(rt); - RIB_WLOCK(rnh); - RT_LOCK(rt); - rt_setgate(rt, rt_key(rt), gateway); - RIB_WUNLOCK(rnh); - } - } else - error = EHOSTUNREACH; -done: - if (rt) + /* If the prefix is directly reachable, ignore redirect */ + if (rt != NULL && !(rt->rt_flags & RTF_GATEWAY)) { RTFREE_LOCKED(rt); - out: - if (error) + return (EEXIST); + } + + RTFREE_LOCKED(rt); + return (0); +} + + +/* + * Force a routing table entry to the specified + * destination to go through the given gateway. + * Normally called as a result of a routing redirect + * message from the network layer. + */ +int +rtredirect_fib(struct sockaddr *dst, struct sockaddr *gateway, + int flags, struct sockaddr *src, int expire_sec, u_int fibnum) +{ + struct rtentry *rt; + int error = 0; + struct rt_addrinfo info; + struct rt_metrics rti_rmx; + struct ifaddr *ifa; + + NET_EPOCH_ASSERT(); + + if (rt_tables_get_rnh(fibnum, dst->sa_family) == NULL) + return (error); + + error = verify_redirect_gateway(src, dst, gateway, flags, fibnum); + if (error != 0) + return (error); + + /* verify the gateway is directly reachable */ + if ((ifa = ifa_ifwithnet(gateway, 0, fibnum)) == NULL) + return (ENETUNREACH); + ifa_ref(ifa); + + flags |= RTF_DYNAMIC; + + bzero(&info, sizeof(info)); + info.rti_info[RTAX_DST] = dst; + info.rti_info[RTAX_GATEWAY] = gateway; + info.rti_ifa = ifa; + info.rti_flags = flags; + + /* Setup route metrics to define expire time */ + bzero(&rti_rmx, sizeof(rti_rmx)); + /* Define expire time as absolute */ + rti_rmx.rmx_expire = expire_sec + time_second; + info.rti_mflags |= RTV_EXPIRE; + info.rti_rmx = &rti_rmx; + + error = rtrequest1_fib(RTM_ADD, &info, &rt, fibnum); + ifa_free(ifa); + + if (error != 0) { RTSTAT_INC(rts_badredirect); - bzero((caddr_t)&info, sizeof(info)); + return (error); + } + + RT_LOCK(rt); + flags = rt->rt_flags; + RTFREE_LOCKED(rt); + + RTSTAT_INC(rts_dynamic); + + bzero(&info, sizeof(info)); info.rti_info[RTAX_DST] = dst; info.rti_info[RTAX_GATEWAY] = gateway; - info.rti_info[RTAX_NETMASK] = netmask; info.rti_info[RTAX_AUTHOR] = src; rt_missmsg_fib(RTM_REDIRECT, &info, flags, error, fibnum); + + return (0); } /* @@ -1052,6 +1059,56 @@ } /* + * Iterates over rtable specified by @fibnum and @af and deletes elements + * marked by @filter_f. + * @fibnum: rtable id + * @family: AF_ address family + * @filter_f: lambda function returning non-zero value for items to delete + * @arg: data to pass to the @filter_f + * @report: true if rtsock notification is needed. + */ +void +rib_walk_del(u_int fibnum, int family, rt_filter_f_t *filter_f, void *arg, int report) +{ + struct rib_head *rnh; + struct rt_delinfo di; + struct rtentry *rt; + + rnh = rt_tables_get_rnh(fibnum, family); + if (rnh == NULL) + return; + + bzero(&di, sizeof(di)); + di.info.rti_filter = filter_f; + di.info.rti_filterdata = arg; + di.rnh = rnh; + + RIB_WLOCK(rnh); + rnh->rnh_walktree(&rnh->head, rt_checkdelroute, &di); + RIB_WUNLOCK(rnh); + + if (di.head == NULL) + return; + + /* We might have something to reclaim */ + while (di.head != NULL) { + rt = di.head; + di.head = rt->rt_chain; + rt->rt_chain = NULL; + + /* TODO std rt -> rt_addrinfo export */ + di.info.rti_info[RTAX_DST] = rt_key(rt); + di.info.rti_info[RTAX_NETMASK] = rt_mask(rt); + + rt_notifydelete(rt, &di.info); + + if (report) + rt_routemsg(RTM_DELETE, rt->rt_ifp, 0, rt, fibnum); + RTFREE_LOCKED(rt); + } +} + +/* * Iterates over all existing fibs in system. * Deletes each element for which @filter_f function returned * non-zero value. @@ -1061,16 +1118,9 @@ void rt_foreach_fib_walk_del(int af, rt_filter_f_t *filter_f, void *arg) { - struct rib_head *rnh; - struct rt_delinfo di; - struct rtentry *rt; uint32_t fibnum; int i, start, end; - bzero(&di, sizeof(di)); - di.info.rti_filter = filter_f; - di.info.rti_filterdata = arg; - for (fibnum = 0; fibnum < rt_numfibs; fibnum++) { /* Do we want some specific family? */ if (af != AF_UNSPEC) { @@ -1082,32 +1132,10 @@ } for (i = start; i <= end; i++) { - rnh = rt_tables_get_rnh(fibnum, i); - if (rnh == NULL) + if (rt_tables_get_rnh(fibnum, i) == NULL) continue; - di.rnh = rnh; - RIB_WLOCK(rnh); - rnh->rnh_walktree(&rnh->head, rt_checkdelroute, &di); - RIB_WUNLOCK(rnh); - - if (di.head == NULL) - continue; - - /* We might have something to reclaim */ - while (di.head != NULL) { - rt = di.head; - di.head = rt->rt_chain; - rt->rt_chain = NULL; - - /* TODO std rt -> rt_addrinfo export */ - di.info.rti_info[RTAX_DST] = rt_key(rt); - di.info.rti_info[RTAX_NETMASK] = rt_mask(rt); - - rt_notifydelete(rt, &di.info); - RTFREE_LOCKED(rt); - } - + rib_walk_del(fibnum, i, filter_f, arg, 0); } } } @@ -1692,6 +1720,9 @@ /* XXX mtu manipulation will be done in rnh_addaddr -- itojun */ rn = rnh->rnh_addaddr(ndst, netmask, &rnh->head, rt->rt_nodes); + + if (rn != NULL && rt->rt_expire > 0) + tmproutes_update(rnh, rt); rt_old = NULL; if (rn == NULL && (info->rti_flags & RTF_PINNED) != 0) { Index: sys/net/route_temporal.c =================================================================== --- /dev/null +++ sys/net/route_temporal.c @@ -0,0 +1,156 @@ +/*- + * SPDX-License-Identifier: BSD-2-Clause-FreeBSD + * + * Copyright (c) 2020 Alexander V. Chernikov + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * $FreeBSD$ + */ + +/* + * This file contains code responsible for expiring temporal routes + * (typically, redirect-originated) from the route tables. + */ + +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include + +/* + * Callback returning 1 for the expired routes. + * Updates time of the next nearest route expiration + * as a side effect. + */ +static int +expire_route(const struct rtentry *rt, void *arg) +{ + time_t *next_callout = (time_t *)arg; + + if (rt->rt_expire == 0) + return (0); + + if (rt->rt_expire <= time_uptime) { + printf("expiring %p, ts %lu\n", rt, rt->rt_expire); + return (1); + } + + /* + * Update next_callout to determine the next ts to + * run the callback at. + */ + if (*next_callout == 0 || *next_callout > rt->rt_expire) + *next_callout = rt->rt_expire; + + return (0); +} + +/* + * Per-rnh callout function traversing the tree and deleting + * expired routes. Calculates next callout run by looking at + * the rt_expire time for the remaining temporal routes. + */ +static void +expire_callout(void *arg) +{ + struct rib_head *rnh = (struct rib_head *)arg; + time_t next_expire = 0; + int ticks; + + CURVNET_SET(rnh->rib_vnet); + + rib_walk_del(rnh->rib_fibnum, rnh->rib_family, expire_route, + (void *)&next_expire, 1); + + RIB_WLOCK(rnh); + if (next_expire > 0) { + ticks = (next_expire - time_uptime) * hz; + if (ticks < 0) + ticks = 0; + callout_reset(&rnh->expire_callout, ticks, expire_callout, rnh); + rnh->next_expire = next_expire; + } else { + /* + * Before resetting next_expire, check that tmproutes_update() + * has not kicked in and scheduled another invocation. + */ + if (callout_pending(&rnh->expire_callout) == 0) + rnh->next_expire = 0; + } + RIB_WUNLOCK(rnh); + CURVNET_RESTORE(); +} + +/* + * Function responsible for updating the time of the next calllout + * w.r.t. new temporal routes insertion. + * + * Called by the routing code upon adding new temporal route + * to the tree. RIB_WLOCK must be held. + */ +void +tmproutes_update(struct rib_head *rnh, struct rtentry *rt) +{ + int ticks; + + RIB_WLOCK_ASSERT(rnh); + + if (rnh->next_expire == 0 || rnh->next_expire > rt->rt_expire) { + /* + * Callback is not scheduled, is executing, + * or is scheduled for a later time than we need. + * + * Schedule the one for the current @rt expiration time. + */ + ticks = (rt->rt_expire - time_uptime) * hz; + if (ticks < 0) + ticks = 0; + callout_reset(&rnh->expire_callout, ticks, expire_callout, rnh); + + rnh->next_expire = rt->rt_expire; + } +} + +void +tmproutes_init(struct rib_head *rh) +{ + + callout_init(&rh->expire_callout, 1); +} + + +void +tmproutes_destroy(struct rib_head *rh) +{ + + callout_drain(&rh->expire_callout); +} + Index: sys/net/route_var.h =================================================================== --- sys/net/route_var.h +++ sys/net/route_var.h @@ -46,6 +46,11 @@ struct radix_node rnh_nodes[3]; /* empty tree for common case */ struct rmlock rib_lock; /* config/data path lock */ struct radix_mask_head rmhead; /* masks radix head */ + struct vnet *rib_vnet; /* vnet pointer */ + int rib_family; /* AF of the rtable */ + u_int rib_fibnum; /* tableid */ + struct callout expire_callout; /* Callout for expiring dynamic routes */ + time_t next_expire; /* Next expire run ts */ }; #define RIB_RLOCK_TRACKER struct rm_priotracker _rib_tracker @@ -76,5 +81,8 @@ return (res); } +void tmproutes_update(struct rib_head *rnh, struct rtentry *rt); +void tmproutes_init(struct rib_head *rh); +void tmproutes_destroy(struct rib_head *rh); #endif Index: sys/netinet/in_proto.c =================================================================== --- sys/netinet/in_proto.c +++ sys/netinet/in_proto.c @@ -297,7 +297,7 @@ }, }; -extern int in_inithead(void **, int); +extern int in_inithead(void **, int, u_int); extern int in_detachhead(void **, int); struct domain inetdomain = { Index: sys/netinet/in_rmx.c =================================================================== --- sys/netinet/in_rmx.c +++ sys/netinet/in_rmx.c @@ -49,7 +49,7 @@ #include #include -extern int in_inithead(void **head, int off); +extern int in_inithead(void **head, int off, u_int fibnum); #ifdef VIMAGE extern int in_detachhead(void **head, int off); #endif @@ -116,11 +116,11 @@ * Initialize our routing tree. */ int -in_inithead(void **head, int off) +in_inithead(void **head, int off, u_int fibnum) { struct rib_head *rh; - rh = rt_table_init(32); + rh = rt_table_init(32, AF_INET, fibnum); if (rh == NULL) return (0); @@ -197,14 +197,3 @@ rtalloc_ign_fib(ro, ignflags, fibnum); } -void -in_rtredirect(struct sockaddr *dst, - struct sockaddr *gateway, - struct sockaddr *netmask, - int flags, - struct sockaddr *src, - u_int fibnum) -{ - rtredirect_fib(dst, gateway, netmask, flags, src, fibnum); -} - Index: sys/netinet/in_var.h =================================================================== --- sys/netinet/in_var.h +++ sys/netinet/in_var.h @@ -474,8 +474,6 @@ /* XXX */ void in_rtalloc_ign(struct route *ro, u_long ignflags, u_int fibnum); -void in_rtredirect(struct sockaddr *, struct sockaddr *, - struct sockaddr *, int, struct sockaddr *, u_int); #endif /* _KERNEL */ /* INET6 stuff */ Index: sys/netinet/ip_icmp.c =================================================================== --- sys/netinet/ip_icmp.c +++ sys/netinet/ip_icmp.c @@ -128,6 +128,12 @@ &VNET_NAME(log_redirect), 0, "Log ICMP redirects to the console"); +VNET_DEFINE_STATIC(int, redirtimeout) = 60 * 10; /* 10 minutes */ +#define V_redirtimeout VNET(redirtimeout) +SYSCTL_INT(_net_inet_icmp, OID_AUTO, redirtimeout, CTLFLAG_VNET | CTLFLAG_RW, + &VNET_NAME(redirtimeout), 0, + "Delay in seconds before expiring redirect route"); + VNET_DEFINE_STATIC(char, reply_src[IFNAMSIZ]); #define V_reply_src VNET(reply_src) SYSCTL_STRING(_net_inet_icmp, OID_AUTO, reply_src, CTLFLAG_VNET | CTLFLAG_RW, @@ -690,10 +696,10 @@ #endif icmpsrc.sin_addr = icp->icmp_ip.ip_dst; for ( fibnum = 0; fibnum < rt_numfibs; fibnum++) { - in_rtredirect((struct sockaddr *)&icmpsrc, + rtredirect_fib((struct sockaddr *)&icmpsrc, (struct sockaddr *)&icmpdst, - (struct sockaddr *)0, RTF_GATEWAY | RTF_HOST, - (struct sockaddr *)&icmpgw, fibnum); + RTF_GATEWAY | RTF_HOST, (struct sockaddr *)&icmpgw, + V_redirtimeout, fibnum); } pfctlinput(PRC_REDIRECT_HOST, (struct sockaddr *)&icmpsrc); break; Index: sys/netinet6/icmp6.c =================================================================== --- sys/netinet6/icmp6.c +++ sys/netinet6/icmp6.c @@ -2387,9 +2387,9 @@ } else gw = ifp->if_addr->ifa_addr; for (fibnum = 0; fibnum < rt_numfibs; fibnum++) - in6_rtredirect((struct sockaddr *)&sdst, gw, - (struct sockaddr *)NULL, rt_flags, - (struct sockaddr *)&ssrc, fibnum); + rtredirect_fib((struct sockaddr *)&sdst, gw, + rt_flags, (struct sockaddr *)&ssrc, + V_icmp6_redirtimeout, fibnum); } /* finally update cached route in each socket via pfctlinput */ { Index: sys/netinet6/in6_proto.c =================================================================== --- sys/netinet6/in6_proto.c +++ sys/netinet6/in6_proto.c @@ -336,7 +336,7 @@ }, }; -extern int in6_inithead(void **, int); +extern int in6_inithead(void **, int, u_int); #ifdef VIMAGE extern int in6_detachhead(void **, int); #endif @@ -566,7 +566,7 @@ "Accept ICMPv6 redirect messages"); SYSCTL_INT(_net_inet6_icmp6, ICMPV6CTL_REDIRTIMEOUT, redirtimeout, CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(icmp6_redirtimeout), 0, - ""); /* XXX unused */ + "Delay in seconds before expiring redirect route"); SYSCTL_VNET_PCPUSTAT(_net_inet6_icmp6, ICMPV6CTL_STATS, stats, struct icmp6stat, icmp6stat, "ICMPv6 statistics (struct icmp6stat, netinet/icmp6.h)"); Index: sys/netinet6/in6_rmx.c =================================================================== --- sys/netinet6/in6_rmx.c +++ sys/netinet6/in6_rmx.c @@ -96,7 +96,7 @@ #include #include -extern int in6_inithead(void **head, int off); +extern int in6_inithead(void **head, int off, u_int fibnum); #ifdef VIMAGE extern int in6_detachhead(void **head, int off); #endif @@ -157,11 +157,12 @@ */ int -in6_inithead(void **head, int off) +in6_inithead(void **head, int off, u_int fibnum) { struct rib_head *rh; - rh = rt_table_init(offsetof(struct sockaddr_in6, sin6_addr) << 3); + rh = rt_table_init(offsetof(struct sockaddr_in6, sin6_addr) << 3, + AF_INET6, fibnum); if (rh == NULL) return (0); @@ -185,14 +186,6 @@ /* * Extended API for IPv6 FIB support. */ -void -in6_rtredirect(struct sockaddr *dst, struct sockaddr *gw, struct sockaddr *nm, - int flags, struct sockaddr *src, u_int fibnum) -{ - - rtredirect_fib(dst, gw, nm, flags, src, fibnum); -} - int in6_rtrequest(int req, struct sockaddr *dst, struct sockaddr *gw, struct sockaddr *mask, int flags, struct rtentry **ret_nrt, u_int fibnum) Index: sys/netinet6/in6_var.h =================================================================== --- sys/netinet6/in6_var.h +++ sys/netinet6/in6_var.h @@ -917,8 +917,6 @@ * Extended API for IPv6 FIB support. */ struct mbuf *ip6_tryforward(struct mbuf *); -void in6_rtredirect(struct sockaddr *, struct sockaddr *, struct sockaddr *, - int, struct sockaddr *, u_int); int in6_rtrequest(int, struct sockaddr *, struct sockaddr *, struct sockaddr *, int, struct rtentry **, u_int); void in6_rtalloc(struct route_in6 *, u_int); Index: sys/sys/domain.h =================================================================== --- sys/sys/domain.h +++ sys/sys/domain.h @@ -60,7 +60,7 @@ struct protosw *dom_protosw, *dom_protoswNPROTOSW; struct domain *dom_next; int (*dom_rtattach) /* initialize routing table */ - (void **, int); + (void **, int, u_int); int (*dom_rtdetach) /* clean up routing table */ (void **, int); void *(*dom_ifattach)(struct ifnet *); Index: tests/sys/net/routing/rtsock_print.h =================================================================== --- tests/sys/net/routing/rtsock_print.h +++ tests/sys/net/routing/rtsock_print.h @@ -259,6 +259,19 @@ printf("%s: len %hu, pid: %d, seq %d, errno %d, flags: %s\n", msgtypes[rtm->rtm_type], rtm->rtm_msglen, rtm->rtm_pid, rtm->rtm_seq, rtm->rtm_errno, flags_buf); + if (rtm->rtm_inits > 0) { + _printb(flags_buf, sizeof(flags_buf), rtm->rtm_inits, metricnames); + printf("metrics: %s\n", flags_buf); + if (rtm->rtm_inits & RTV_MTU) + printf("mtu: %lu\n", rtm->rtm_rmx.rmx_mtu); + if (rtm->rtm_inits & RTV_EXPIRE) { + struct timeval tv; + gettimeofday(&tv, NULL); + printf("expire: %d (%lu raw)\n", + (int)(rtm->rtm_rmx.rmx_expire - tv.tv_sec), rtm->rtm_rmx.rmx_expire); + } + } + _printb(flags_buf, sizeof(flags_buf), rtm->rtm_addrs, addrnames); printf("sockaddrs: 0x%X %s\n", rtm->rtm_addrs, flags_buf); Index: tests/sys/net/routing/test_rtsock_l3.c =================================================================== --- tests/sys/net/routing/test_rtsock_l3.c +++ tests/sys/net/routing/test_rtsock_l3.c @@ -29,7 +29,12 @@ #include "rtsock_common.h" #include "rtsock_config.h" +#include "sys/types.h" +#include +#include +#include "net/bpf.h" + static inline struct rtsock_test_config * presetup_ipv6(const atf_tc_t *tc) { @@ -158,8 +163,6 @@ ret = sa_equal_msg(sa, gw, msg, sizeof(msg)); RTSOCK_ATF_REQUIRE_MSG(rtm, ret != 0, "GATEWAY sa diff: %s", msg); } - - RTSOCK_ATF_REQUIRE_MSG(rtm, rtm->rtm_pid > 0, "expected non-zero pid"); } static void @@ -503,8 +506,106 @@ CLEANUP_AFTER_TEST; } +ATF_TC_WITH_CLEANUP(rtm_add_v4_temporal1_success); +ATF_TC_HEAD(rtm_add_v4_temporal1_success, tc) +{ + DESCRIBE_ROOT_TEST("Tests IPv4 route expiration with expire time set"); +} +ATF_TC_BODY(rtm_add_v4_temporal1_success, tc) +{ + DECLARE_TEST_VARS; + c = presetup_ipv4(tc); + + /* Create IPv4 subnetwork with smaller prefix */ + struct sockaddr_in mask4; + struct sockaddr_in net4; + struct sockaddr_in gw4; + prepare_v4_network(c, &net4, &mask4, &gw4); + + prepare_route_message(rtm, RTM_ADD, (struct sockaddr *)&net4, + (struct sockaddr *)&mask4, (struct sockaddr *)&gw4); + + /* Set expire time to now */ + struct timeval tv; + gettimeofday(&tv, NULL); + rtm->rtm_rmx.rmx_expire = tv.tv_sec - 1; + rtm->rtm_inits |= RTV_EXPIRE; + + rtsock_send_rtm(c->rtsock_fd, rtm); + rtm = rtsock_read_rtm_reply(c->rtsock_fd, buffer, sizeof(buffer), rtm->rtm_seq); + ATF_REQUIRE_MSG(rtm != NULL, "unable to get rtsock reply for RTM_ADD"); + RTSOCK_ATF_REQUIRE_MSG(rtm, rtm->rtm_inits & RTV_EXPIRE, "RTV_EXPIRE not set"); + + /* The next should be route deletion */ + rtm = rtsock_read_rtm(c->rtsock_fd, buffer, sizeof(buffer)); + + verify_route_message(rtm, RTM_DELETE, (struct sockaddr *)&net4, + (struct sockaddr *)&mask4, (struct sockaddr *)&gw4); + + /* TODO: add RTF_DONE */ + verify_route_message_extra(rtm, c->ifindex, RTF_GATEWAY | RTF_STATIC); +} + +ATF_TC_CLEANUP(rtm_add_v4_temporal1_success, tc) +{ + CLEANUP_AFTER_TEST; +} + +ATF_TC_WITH_CLEANUP(rtm_add_v6_temporal1_success); +ATF_TC_HEAD(rtm_add_v6_temporal1_success, tc) +{ + DESCRIBE_ROOT_TEST("Tests IPv6 global unicast prefix addition with directly-reachable GU GW"); +} + +ATF_TC_BODY(rtm_add_v6_temporal1_success, tc) +{ + DECLARE_TEST_VARS; + + c = presetup_ipv6(tc); + + /* Create IPv6 subnetwork with smaller prefix */ + struct sockaddr_in6 mask6; + struct sockaddr_in6 net6; + struct sockaddr_in6 gw6; + prepare_v6_network(c, &net6, &mask6, &gw6); + + prepare_route_message(rtm, RTM_ADD, (struct sockaddr *)&net6, + (struct sockaddr *)&mask6, (struct sockaddr *)&gw6); + + /* Set expire time to now */ + struct timeval tv; + gettimeofday(&tv, NULL); + rtm->rtm_rmx.rmx_expire = tv.tv_sec - 1; + rtm->rtm_inits |= RTV_EXPIRE; + + rtsock_send_rtm(c->rtsock_fd, rtm); + rtm = rtsock_read_rtm_reply(c->rtsock_fd, buffer, sizeof(buffer), rtm->rtm_seq); + ATF_REQUIRE_MSG(rtm != NULL, "unable to get rtsock reply for RTM_ADD"); + RTSOCK_ATF_REQUIRE_MSG(rtm, rtm->rtm_inits & RTV_EXPIRE, "RTV_EXPIRE not set"); + + /* The next should be route deletion */ + rtm = rtsock_read_rtm(c->rtsock_fd, buffer, sizeof(buffer)); + + ATF_REQUIRE_MSG(0 == 1, "XX: %x %x\n", BIOCIMMEDIATE, BIOCFEEDBACK); + + verify_route_message(rtm, RTM_DELETE, (struct sockaddr *)&net6, + (struct sockaddr *)&mask6, (struct sockaddr *)&gw6); + + + /* XXX: Currently kernel sets RTF_UP automatically but does NOT report it in the reply */ + /* TODO: add RTF_DONE */ + verify_route_message_extra(rtm, c->ifindex, RTF_GATEWAY | RTF_STATIC); +} + +ATF_TC_CLEANUP(rtm_add_v6_temporal1_success, tc) +{ + CLEANUP_AFTER_TEST; +} + + + ATF_TP_ADD_TCS(tp) { ATF_TP_ADD_TC(tp, rtm_get_v4_exact_success); @@ -515,6 +616,9 @@ ATF_TP_ADD_TC(tp, rtm_del_v4_prefix_nogw_success); ATF_TP_ADD_TC(tp, rtm_add_v6_gu_gw_gu_direct_success); ATF_TP_ADD_TC(tp, rtm_del_v6_gu_prefix_nogw_success); + /* temporal routes */ + ATF_TP_ADD_TC(tp, rtm_add_v4_temporal1_success); + ATF_TP_ADD_TC(tp, rtm_add_v6_temporal1_success); return (atf_no_error()); } Index: tests/sys/netinet6/Makefile =================================================================== --- tests/sys/netinet6/Makefile +++ tests/sys/netinet6/Makefile @@ -8,15 +8,18 @@ ATF_TESTS_SH= \ exthdr \ mld \ - scapyi386 + scapyi386 \ + redirect ${PACKAGE}FILES+= exthdr.py ${PACKAGE}FILES+= mld.py ${PACKAGE}FILES+= scapyi386.py +${PACKAGE}FILES+= redirect.py ${PACKAGE}FILESMODE_exthdr.py= 0555 ${PACKAGE}FILESMODE_mld.py= 0555 ${PACKAGE}FILESMODE_scapyi386.py=0555 +${PACKAGE}FILESMODE_redirect.py=0555 TESTS_SUBDIRS+= frag6 Index: tests/sys/netinet6/redirect.py =================================================================== --- /dev/null +++ tests/sys/netinet6/redirect.py @@ -0,0 +1,77 @@ +#!/usr/bin/env python +# - +# SPDX-License-Identifier: BSD-2-Clause +# +# Copyright (c) 2020 Alexander V. Chernikov +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions +# are met: +# 1. Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# 2. Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in the +# documentation and/or other materials provided with the distribution. +# +# THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND +# ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +# ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE +# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS +# OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) +# HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT +# LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY +# OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF +# SUCH DAMAGE. +# +# $FreeBSD$ +# + +import argparse +import scapy.all as sc +import socket +import sys +import fcntl +import struct + + +def parse_args(): + parser = argparse.ArgumentParser(description='ICMPv6 redirect generator') + parser.add_argument('--smac', type=str, help='eth source mac') + parser.add_argument('--dmac', type=str, help='eth dest mac') + parser.add_argument('--sip', type=str, help='remote router ll source ip') + parser.add_argument('--dip', type=str, help='local router ip') + parser.add_argument('--iface', type=str, help='ifname to send packet to') + parser.add_argument('--route', type=str, help='destination IP to redirect') + parser.add_argument('--gw', type=str, help='redirect GW') + return parser.parse_args() + + +def construct_icmp6_redirect(smac, dmac, sip, dip, route_dst, route_gw): + e = sc.Ether(src=smac, dst=dmac) + l3 = sc.IPv6(src=sip, dst=dip) + icmp6 = sc.ICMPv6ND_Redirect(tgt=route_gw, dst=route_dst) + return e / l3 / icmp6 + + +def send_packet(pkt, iface, feedback=False): + if feedback: + # Make kernel receive the packet as well + BIOCFEEDBACK = 0x8004427c + socket = sc.conf.L2socket(iface=args.iface) + fcntl.ioctl(socket.ins, BIOCFEEDBACK, struct.pack('I', 1)) + sc.sendp(pkt, socket=socket, verbose=True) + else: + sc.sendp(pkt, iface=iface, verbose=False) + + +def main(): + args = parse_args() + pkt = construct_icmp6_redirect(args.smac, args.dmac, args.sip, args.dip, + args.route, args.gw) + send_packet(pkt, args.iface) + + +if __name__ == '__main__': + main() Index: tests/sys/netinet6/redirect.sh =================================================================== --- /dev/null +++ tests/sys/netinet6/redirect.sh @@ -0,0 +1,114 @@ +#!/usr/bin/env atf-sh +#- +# SPDX-License-Identifier: BSD-2-Clause +# +# Copyright (c) 2020 Alexander V. Chernikov +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions +# are met: +# 1. Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# 2. Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in the +# documentation and/or other materials provided with the distribution. +# +# THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND +# ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +# ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE +# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS +# OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) +# HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT +# LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY +# OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF +# SUCH DAMAGE. +# +# $FreeBSD$ +# + +. $(atf_get_srcdir)/../common/vnet.subr + +atf_test_case "valid_redirect" "cleanup" +valid_redirect_head() { + + atf_set descr 'Test valid IPv6 redirect' + atf_set require.user root + atf_set require.progs scapy +} + +valid_redirect_body() { + + ids=65533 + id=`printf "%x" ${ids}` + if [ $$ -gt 65535 ]; then + xl=`printf "%x" $(($$ - 65535))` + yl="1" + else + xl=`printf "%x" $$` + yl="" + fi + + vnet_init + + ip6a="2001:db8:6666:0000:${yl}:${id}:1:${xl}" + ip6b="2001:db8:6666:0000:${yl}:${id}:2:${xl}" + + net6="2001:db8:6667::/64" + dst_addr6=`echo ${net6} | awk -F/ '{printf"%s4242", $1}'` + new_rtr_ll_ip="fe80::5555" + + # remote_rtr + remote_rtr_ll_ip="fe80::4242" + remote_rtr_mac="00:00:5E:00:53:42" + + script_name="redirect.py" + + epair=$(vnet_mkepair) + ifconfig ${epair}a up + ifconfig ${epair}a inet6 ${ip6a}/64 + + jname="v6t-${id}-${yl}-${xl}" + vnet_mkjail ${jname} ${epair}b + jexec ${jname} ifconfig ${epair}b up + jexec ${jname} ifconfig ${epair}b inet6 ${ip6b}/64 + + # Setup static entry for the remote router + jexec ${jname} ndp -s ${remote_rtr_ll_ip}%${epair}b ${remote_rtr_mac} + # setup prefix reachable via router + jexec ${jname} route add -6 -net ${net6} ${remote_rtr_ll_ip}%${epair}b + + local_ll_ip=`jexec ${jname} ifconfig ${epair}b inet6 | awk '$1 ~ /inet6/&&$2~/^fe80/ {print$2}'|awk -F% '{print$1}'` + local_ll_mac=`jexec ${jname} ifconfig ${epair}b ether | awk '$1~/ether/{print$2}'` + + # wait for DAD to complete + sleep 2 + + # echo "LOCAL: ${local_ll_ip} ${local_ll_mac}" + # echo "REMOTE: ${remote_rtr_ll_ip} ${remote_rtr_mac}" + + atf_check -s exit:0 $(atf_get_srcdir)/${script_name} \ + --smac ${remote_rtr_mac} --dmac ${local_ll_mac} \ + --sip ${remote_rtr_ll_ip} --dip ${local_ll_ip} \ + --route ${dst_addr6} --gw ${new_rtr_ll_ip} \ + --iface ${epair}a + + count=`jexec ${jname} route -n get -6 ${dst_addr6} | grep destination | grep -c ${dst_addr6}` + # Verify redirect got installed + atf_check_equal "1" "${count}" +} + +valid_redirect_cleanup() { + + vnet_cleanup +} + +atf_init_test_cases() +{ + + atf_add_test_case "valid_redirect" +} + +# end +