Index: sys/modules/tests/Makefile =================================================================== --- sys/modules/tests/Makefile +++ sys/modules/tests/Makefile @@ -3,6 +3,7 @@ SUBDIR+= framework SUBDIR+= .WAIT SUBDIR+= callout_test +SUBDIR+= routing SUBDIR_PARALLEL= Index: sys/net/route/route_ctl2.c =================================================================== --- /dev/null +++ sys/net/route/route_ctl2.c @@ -0,0 +1,1588 @@ +/*- + * SPDX-License-Identifier: BSD-2-Clause-FreeBSD + * + * Copyright (c) 2020 Alexander V. Chernikov + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * $FreeBSD$ + */ + +#include "opt_inet.h" +#include "opt_inet6.h" +#include "opt_route_mpath.h" + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#define NEED_RTZONE +#include +#include +#include +#include +#include +#include +#include + +#include + +/* + * This file contains control plane routing tables functions. + * + * All functions assumes they are called in net epoch. + */ + +#define V_rib_route_multipath VNET(rib_route_multipath) +#ifdef ROUTE_MPATH +VNET_DEFINE(u_int, rib_route_multipath) = 1; +#define MP_FLAGS CTLFLAG_RWTUN +#else +VNET_DEFINE(u_int, rib_route_multipath) = 0; +#define MP_FLAGS CTLFLAG_RD +#endif +SYSCTL_UINT(_net_route, OID_AUTO, multipath, MP_FLAGS | CTLFLAG_VNET, + &VNET_NAME(rib_route_multipath), 0, "Enable route multipath"); + + +static void set_req_mtu(const struct rt_addrinfo *info, struct nhop_request *req); +static int create_rte_from_info(struct rib_head *rnh, struct rt_addrinfo *info, + struct rtentry **ret_rt); +static int can_rib_multipath(struct rib_head *rh); + +static int add_route(struct rib_head *rnh, struct rtentry *rt, + struct rt_addrinfo *info, struct rib_cmd_info *rc); +static int del_route(struct rib_head *rnh, struct rt_addrinfo *info, + struct rib_cmd_info *rc); + +static int update_gateway_metadata(struct rt_addrinfo *info, int fibnum); +static void fill_nh_request(struct rib_head *rnh, struct rt_addrinfo *info, + struct nhop_request *nh_req); +static void fill_nh_request_from_nhop(const struct nhop_object *nh, + struct sockaddr_storage *gw_storage, struct nhop_request *nh_req); + + +/* + * Returns address family to operate on from @info. + */ +static sa_family_t +get_family_from_info(const struct rt_addrinfo *info) +{ + + return ((info->rti_info[RTAX_DST])->sa_family); +} + + +/* + * Sets @nh_req mtu data based on the @info data. + */ +static void +set_req_mtu(const struct rt_addrinfo *info, struct nhop_request *nh_req) +{ + + if (info->rti_mflags & RTV_MTU) { + if (info->rti_rmx->rmx_mtu != 0) { + + /* + * MTU was explicitly provided by user. + * Keep it. + */ + nh_req->rt_flags |= RTF_FIXEDMTU; + } else { + + /* + * User explicitly sets MTU to 0. + * Assume rollback to default. + */ + nh_req->rt_flags &= ~RTF_FIXEDMTU; + } + nh_req->mtu = info->rti_rmx->rmx_mtu; + } +} + + +/* + * Fills @nh_req based on the data provided in @info. + */ +static void +fill_nh_request(struct rib_head *rnh, struct rt_addrinfo *info, + struct nhop_request *nh_req) +{ + + bzero(nh_req, sizeof(struct nhop_request)); + nh_req->ifp = info->rti_ifa->ifa_ifp; + nh_req->ifa = info->rti_ifa; + nh_req->gw = info->rti_info[RTAX_GATEWAY]; + nh_req->family = info->rti_info[RTAX_DST]->sa_family; + nh_req->rt_flags = info->rti_flags; // fill original rt flags + nh_req->nh_type = 0; // hook responsibility to set nhop type + set_req_mtu(info, nh_req); +} + +/* + * Fill @nh_req based on the real @nh. + */ +static void +fill_nh_request_from_nhop(const struct nhop_object *nh, + struct sockaddr_storage *gw_storage, struct nhop_request *nh_req) +{ + + memset(nh_req, 0, sizeof(struct nhop_request)); + nh_req->ifp = nh->nh_ifp; + nh_req->ifa = nh->nh_ifa; + nh_req->family = nh->nh_priv->nh_family; + nh_req->mtu = nh->nh_mtu; + nh_req->rt_flags = nh->nh_priv->rt_flags; + nh_req->nh_type = nh->nh_priv->nh_type; + + if (nh_req->rt_flags & RTF_GATEWAY) { + /* Assume size is already validated */ + memcpy(gw_storage, &nh->gw4_sa, nh->gw4_sa.sin_len); + } else { + /* Nhop value is largerly ignored, set some random bits */ + gw_storage->ss_len = 0; + } + nh_req->gw = (struct sockaddr *)gw_storage; +} + +/* + * Update @nh_req request data based on the parameters supplied in @info. + * This is a helper function to support route changes. + * + * It limits the changes that can be done to the route to the following: + * 1) all combination of gateway changes (gw, interface, blackhole/reject) + * 2) route flags (FLAG[123],STATIC,BLACKHOLE,REJECT) + * 3) route MTU + * 4) route weight (handled by the caller) + * + * Assumes nh_req gw pointer has sockaddr_storage-sized pointer supplied + * + * Returns: + * 0 on success, nh_req->ifa and nh_req->ifp referenced + * error code otherwise + */ +static int +alter_nh_request(struct rt_addrinfo *info, u_int fibnum, struct nhop_request *nh_req) +{ + int error; + + /* Update MTU if set in the request*/ + set_req_mtu(info, nh_req); + + /* Allow some flags (FLAG1,STATIC,BLACKHOLE,REJECT) to be toggled on change. */ + nh_req->rt_flags &= ~RTF_FMASK; + nh_req->rt_flags |= info->rti_flags & RTF_FMASK; + + /* Consider gateway change */ + struct sockaddr *info_gw = info->rti_info[RTAX_GATEWAY]; + + if (info_gw != NULL) { + error = update_gateway_metadata(info, fibnum); + if (error != 0) + return (error); + /* ifa/ifp are already referenced by update_gateway_metadata() */ + nh_req->ifa = info->rti_ifa; + nh_req->ifp = info->rti_ifp; + /* Update RTF_GATEWAY flag status */ + nh_req->rt_flags &= ~RTF_GATEWAY; + nh_req->rt_flags |= (RTF_GATEWAY & info->rti_flags); + } else { + /* Original nexthop data copy haven't been referenced, do it now */ + ifa_ref(nh_req->ifa); + if_ref(nh_req->ifp); + } + + return (0); +} + +/* + * Creates a new nexthop based on the information in @info. + * + * Returns: + * 0 on success, filling @nh_ret with the desired nexthop object ptr + * errno otherwise + */ +static int +create_nhop_from_info(struct rib_head *rnh, struct rt_addrinfo *info, + struct nhop_object **nh_ret) +{ + struct sockaddr *gateway, *dst, *netmask; + struct nhop_request nh_req; + int error; + + fill_nh_request(rnh, info, &nh_req); + + /* Give the protocols chance to augment the request data */ + dst = info->rti_info[RTAX_DST]; + netmask = info->rti_info[RTAX_NETMASK]; + gateway = info->rti_info[RTAX_GATEWAY]; + + if (rnh->rnh_preadd != NULL) { + error = rnh->rnh_preadd(rnh->rib_fibnum, dst, netmask, &nh_req); + if (error != 0) + return (error); + } + + *nh_ret = nhop_get(rnh, &nh_req); + if (*nh_ret == NULL) { + DPRINTF("failed to get the nexthop from req"); + return (EAGAIN); + } + + return (0); +} + +/* + * Creates new nexthop based on @nh_old and augmentation data from @info. + * Helper function used in the route changes, please see + * alter_nh_request() comments for more details. + * + * Returns: + * 0 on success, filling @nh_ret with the desired nexthop object + * errno otherwise + */ +static int +create_nhop_from_nhop(struct rib_head *rnh, const struct nhop_object *nh_old, + struct rt_addrinfo *info, struct nhop_object **nh_ret) +{ + struct nhop_request nh_req; + struct sockaddr_storage gw_storage; + int error; + + /* Start with copying data from original nexthop */ + fill_nh_request_from_nhop(nh_old, &gw_storage, &nh_req); + + /* return ifa/ifp referenced */ + error = alter_nh_request(info, rnh->rib_fibnum, &nh_req); + if (error != 0) + return (error); + + /* Give protocol chance to alter the nexthop request */ + if (rnh->rnh_preadd != NULL) { + error = rnh->rnh_preadd(rnh->rib_fibnum, info->rti_info[RTAX_DST], + info->rti_info[RTAX_NETMASK], &nh_req); + if (error != 0) { + DPRINTF("failed to create nhop: prehook returned %d", + error); + /* cleanup */ + ifa_free(nh_req.ifa); + if_rele(nh_req.ifp); + return (error); + } + } + + *nh_ret = nhop_get(rnh, &nh_req); + if (*nh_ret == NULL) { + DPRINTF("failed to create nhop: nhop_get() failed"); + ifa_free(nh_req.ifa); + if_rele(nh_req.ifp); + return (EAGAIN); + } + + return (0); +} + +/* + * Gets kernel-usable time of the route expiration from @info. + * Userland provides absolute expiration timestamp (UTC), this function + * converts it to the kernel uptime-based interval. + * + * Returns: kernel uptime-based timestamp of the route expiration or 0. + */ +static u_long +get_expire_from_info(const struct rt_addrinfo *info) +{ + u_long expire = 0; + + /* Kernel -> userland timebase conversion. */ + if ((info->rti_mflags & RTV_EXPIRE) && (info->rti_rmx->rmx_expire > 0)) + expire = info->rti_rmx->rmx_expire - time_second + time_uptime; + + return (expire); +} + +/* + * Gets route weight from @info. + * If weight is not set (true in most cases, 2020-01), returns + * ROUTE_DEFAULT_WEIGHT (100). If the weight is too high, + * caps it to ROUTE_MAX_WEIGHT (2^24 -1). + */ +static uint32_t +get_weight_from_info(const struct rt_addrinfo *info) +{ + uint32_t weight; + + + if ((info->rti_mflags & RTV_WEIGHT) && (info->rti_rmx->rmx_weight > 0)) + weight = info->rti_rmx->rmx_weight; + else + weight = ROUTE_DEFAULT_WEIGHT; + + if (weight > ROUTE_MAX_WEIGHT) + weight = ROUTE_MAX_WEIGHT; + + return (weight); +} + +/* + * Creates rtentry based on dst, mask and other metadata in @info. + * + * Returns 0 on success, filling @ret_rt with referenced & unlocked + * rtentry. + */ +static int +create_rte_from_info(struct rib_head *rnh, struct rt_addrinfo *info, + struct rtentry **ret_rt) +{ + struct sockaddr *gateway, *dst, *ndst, *netmask; + struct rtentry *rt; + + dst = info->rti_info[RTAX_DST]; + netmask = info->rti_info[RTAX_NETMASK]; + gateway = info->rti_info[RTAX_GATEWAY]; + + rt = uma_zalloc(V_rtzone, M_NOWAIT); + if (rt == NULL) { + return (ENOBUFS); + } + + /* Bump refcount to return referenced rte */ + rt->rt_refcnt = 1; + rt->rt_fibnum = rnh->rib_fibnum; + + if (dst->sa_len <= sizeof(struct sockaddr_in6)) { + memcpy(&rt->rt_dst, dst, dst->sa_len); + rt_key(rt) = &rt->rt_dst; + } else { + /* dst size is too big. Alloc separately */ + rt_key(rt) = malloc(dst->sa_len, M_RTABLE, M_NOWAIT); + if (rt_key(rt) == NULL) { + uma_zfree(V_rtzone, rt); + return (ENOBUFS); + } + } + + /* + * point to the (possibly newly malloc'd) dest address. + */ + ndst = (struct sockaddr *)rt_key(rt); + + /* + * make sure it contains the value we want (masked if needed). + */ + if (netmask != NULL) { + /* TODO: verify instead of masked copy */ + rt_maskedcopy(dst, ndst, netmask); + if (!sa_equal(dst, ndst)) { + /* contract violation, return */ + char abuf[INET6_ADDRSTRLEN]; + rib_print_sockaddr(abuf, INET6_ADDRSTRLEN, dst); + DPRINTF("warn: masked dst != dst (%s)", abuf); + /* XXX: fix callers! */ +#if 0 + uma_zfree(V_rtzone, rt); + return (EINVAL); +#endif + } + } else + bcopy(dst, ndst, dst->sa_len); + + rt->rt_weight = get_weight_from_info(info); + rt->rt_expire = get_expire_from_info(info); + rt->rte_flags = info->rti_flags & RTE_RT_FLAG_MASK; + + *ret_rt = rt; + + return (0); +} + +int +create_rte_from_rte(struct rib_head *rnh, struct rtentry *rt_orig, + struct rtentry **ret_rt) +{ + struct sockaddr *dst; + struct rtentry *rt; + + dst = rt_key(rt_orig); + + rt = uma_zalloc(V_rtzone, M_NOWAIT); + if (rt == NULL) { + return (ENOBUFS); + } + + /* Bump refcount to return referenced rte */ + rt->rt_refcnt = 1; + rt->rt_fibnum = rnh->rib_fibnum; + + if (dst->sa_len <= sizeof(struct sockaddr_in6)) { + memcpy(&rt->rt_dst, dst, dst->sa_len); + rt_key(rt) = &rt->rt_dst; + } else { + /* dst size is too big. Alloc separately */ + rt_key(rt) = malloc(dst->sa_len, M_RTABLE, M_NOWAIT); + if (rt_key(rt) == NULL) { + uma_zfree(V_rtzone, rt); + return (ENOBUFS); + } + memcpy(rt_key(rt), dst, dst->sa_len); + } + + rt->rt_weight = rt_orig->rt_weight; + rt->rt_expire = rt_orig->rt_expire; + rt->rte_flags = rt_orig->rte_flags; + rt->rt_nhop = rt_orig->rt_nhop; + + *ret_rt = rt; + + return (0); +} + +int +replace_rte(struct rib_head *rnh, struct sockaddr *dst, struct sockaddr *mask, + struct rtentry *rt_new) +{ + struct radix_node *rn; + + RIB_WLOCK_ASSERT(rnh); + + rn = rnh->rnh_deladdr(dst, mask, &rnh->head); + if (rn == NULL) + return (ESRCH); + + rn = rnh->rnh_addaddr(dst, mask, &rnh->head, + rt_new->rt_nodes); + + if (rn == NULL) + return (ENOBUFS); + + return (0); +} + +/* + * Verify that the combination of dst and gateway address families is supported. + * + * Currently accepted options: + * gw_af == dst_af: default option for the routes with RTF_GATEWAY + * gw_af == AF_LINK: IPv4/IPv6 interface routes, storing inteface index in sdl. + * gw_af == AF_UNSPEC: was used to provide raw ethernet header. Currently not supported. + * + * Return 0 on success, errno otherwise. + */ +static int +verify_gateway_family(const struct rt_addrinfo *info) +{ + const struct sockaddr *dst, *gateway; + + dst = info->rti_info[RTAX_DST]; + gateway = info->rti_info[RTAX_GATEWAY]; + + if (dst && gateway && (dst->sa_family != gateway->sa_family) && + (gateway->sa_family != AF_UNSPEC) && (gateway->sa_family != AF_LINK)) + return (EINVAL); + + return (0); +} + +/* + * Check is nhop is multipath-eligible. + * Avoid nhops without gateways and redirects. + * + * Returns 1 for multipath-eligible nexthop, + * 0 otherwise. + */ +int +can_nh_multipath(const struct nhop_object *nh) +{ + + if ((nh->nh_flags & NHF_MULTIPATH) != 0) + return (1); + if ((nh->nh_flags & NHF_GATEWAY) == 0) + return (0); + if ((nh->nh_flags & NHF_REDIRECT) != 0) + return (0); + + return (1); +} + +/* + * Get relativer route priority across other routes. + * Interface routes (RTF_PINNED) are the hightest, + * Normal routes goes next. + * Redirected routes have the least priority. + * + * Returns preference as a number, higher is better. + * + */ +static uint16_t +get_rt_preference(int rt_flags) +{ + uint16_t result; + + result = (!!(rt_flags & RTF_PINNED)) << 2; + result |= (!(rt_flags & (RTF_DYNAMIC))) << 1; + result |= !!(rt_flags & (RTF_DYNAMIC)); + + return (result); +} + +/* + * Tries to add route to the RIB. + * Assumes @rt_new and @rt_new->rt_nhop are referenced and unlocked + * + * Return values: + * 0 for success. @rt and rt->rt_nhop is consumed. + * If @rc is supplied, unlocked operation result is saved there. + * != 0: Error code is returned. It is caller responsibility to free rt / rt->rt_nhop. + */ +static int +add_route(struct rib_head *rnh, struct rtentry *rt_new, struct rt_addrinfo *info, + struct rib_cmd_info *rc) +{ + struct rtentry *rt_orig; + struct nhop_object *nh_orig, *nh_new; + struct sockaddr *ndst, *netmask; + int error; + + ndst = (struct sockaddr *)rt_key(rt_new); + netmask = info->rti_info[RTAX_NETMASK]; + nh_new = rt_new->rt_nhop; + + rc->cmd = RTM_ADD; + + RIB_WLOCK(rnh); + RT_LOCK(rt_new); + + rt_orig = (struct rtentry *)rnh->rnh_addaddr(ndst, netmask, &rnh->head, + rt_new->rt_nodes); + + if (rt_orig != NULL) { + /* Success. Update generation id. */ + rnh->rnh_gen++; + /* Notify temporal routes of a new route */ + if (rt_new->rt_expire != 0) + tmproutes_update(rnh, rt_new); + RIB_WUNLOCK(rnh); + + /* + * Prepare notification: + * RTM_ADD, nh_old: NULL, nh_new: rt_new->rt_nhop + */ + rc->nh_new = nh_new; + rc->rt = rt_new; + RT_UNLOCK(rt_new); + + return (0); + } + + /* Route addition failed. Inspect the prefix in the rib to determine the cause */ + rt_orig = (struct rtentry *)rnh->rnh_lookup(ndst, netmask, &rnh->head); + if (rt_orig == NULL) { + /* + * The only reason this can happen is when + * rnh_addaddr fails to allocate memory, so the first error + * was not really "prefix exists". + * Unlock everything and return. + */ + RIB_WUNLOCK(rnh); + RT_UNLOCK(rt_new); + RTSTAT_INC(rts_algo_add_fail); + return (ENOMEM); + } + + /* We have existing route in the RIB. */ + nh_orig = rt_orig->rt_nhop; + /* TODO: generalise to the protocol preferences */ + if ((info->rti_flags & RTF_PINNED) && !RT_IS_PINNED(rt_orig)) { + /* + * Our new proposed route is an interface route so it + * takes precedence. Replace old nexthop & rte with a new pair. + */ + error = replace_rte(rnh, ndst, netmask, rt_new); + RT_UNLOCK(rt_new); + if (error == 0) + rnh->rnh_gen++; + RIB_WUNLOCK(rnh); + + if (error != 0) + return (error); + + RTSTAT_INC(rts_add_pinned); + + /* Update notification data */ + rc->cmd = RTM_CHANGE; + rc->rt = rt_new; + rc->nh_new = nh_new; + rc->nh_old = nh_orig; + + RTFREE(rt_orig); + nhop_free_any(nh_orig); + + return (0); + } + +#ifdef ROUTE_MPATH + /* Eligible for multipath? */ + if (!can_rib_multipath(rnh) || (can_nh_multipath(rt_new->rt_nhop) == 0) || + (can_nh_multipath(rt_orig->rt_nhop) == 0)) { + /* + * Multipath not enabled OR + * new NH is not a route with gw OR + * existing NH is NOT multipah group / gateway + */ + RIB_WUNLOCK(rnh); + RT_UNLOCK(rt_new); + RTSTAT_INC(rts_mpath_ineligible); + return (EEXIST); + } + + /* + * One or more routes is already in the RIB and we need to add + * another one, which requires getting a new nexthop group. + */ + unsigned int weight_orig = rt_orig->rt_weight; + nh_orig = rt_orig->rt_nhop; + RIB_WUNLOCK(rnh); + RT_UNLOCK(rt_new); + + error = add_route_mpath(rnh, rt_new, nh_orig, weight_orig, info, rc); +#else + RIB_WUNLOCK(rnh); + RT_UNLOCK(rt_new); + error = EEXIST; +#endif + return (error); +} + +/* + * Check if specified @gw matches gw data in the nexthop @nh. + * + * Returns 1 if matches, 0 otherwise. + */ +int +rib_match_nhop_gw(const struct nhop_object *nh, const struct sockaddr *gw) +{ + + if (nh->gw_sa.sa_family != gw->sa_family) + return (0); + + switch (gw->sa_family) { + case AF_INET: + return (nh->gw4_sa.sin_addr.s_addr == + ((const struct sockaddr_in *)gw)->sin_addr.s_addr); + case AF_INET6: + { + const struct sockaddr_in6 *gw6; + gw6 = (const struct sockaddr_in6 *)gw; + /* + * Currently (2020-01) IPv6 gws in kernel have their + * scope embedded. Once this becomes false, this code + * has to be revisited. + */ + if (IN6_ARE_ADDR_EQUAL(&nh->gw6_sa.sin6_addr, + &gw6->sin6_addr)) + return (1); + return (0); + } + default: + if (memcmp(&nh->gw_sa, gw, nh->gw_sa.sa_len) != 0) + return (0); + return (1); + } + + /* NOTREACHED */ + return (0); +} + +int +del_route_one(struct rib_head *rnh, struct rtentry *rt, struct rt_addrinfo *info) +{ + struct sockaddr *dst, *netmask, *gw; + struct radix_node *rn; + + RIB_WLOCK_ASSERT(rnh); + KASSERT((!NH_IS_MULTIPATH(rt->rt_nhop)), ("called with mpath route")); + + /* TODO: generalise priorities */ + if (RT_IS_PINNED(rt) && ((info->rti_flags & RTF_PINNED) == 0)) { + /* + * Target route is PINNED, while our request does not + * contain RTF_PINNED flag -> refuse to delete. + */ + RTSTAT_INC(rts_del_fail_priority); + return (EADDRINUSE); + } + + gw = info->rti_info[RTAX_GATEWAY]; + if ((info->rti_flags & RTF_GATEWAY) && (gw != NULL)) { + /* + * Delete request contains specific gateway. + * Have to verify it prior to the deletion. + */ + if (rib_match_nhop_gw(rt->rt_nhop, gw) == 0) { + return (ESRCH); + } + } + + if (info->rti_filter != NULL) { + /* + * Delete request contains specific matching function. + * Run the found rte through it. + */ + if (info->rti_filter(rt, rt->rt_nhop, info->rti_filterdata) == 0) { + /* Not matched */ + return (ESRCH); + } + } + + /* Finally, remove record */ + dst = info->rti_info[RTAX_DST]; + netmask = info->rti_info[RTAX_NETMASK]; + rn = rnh->rnh_deladdr(dst, netmask, &rnh->head); + + if (rn == NULL) { + /* Should not happen */ + RTSTAT_INC(rts_del_algo_fail); + return (ESRCH); + } + KASSERT((struct rtentry *)rn == rt, + ("rnh_deladdr returned wrong rte: expected %p got %p", rt, rn)); + + /* Mark rte as deleted */ + rt->rte_flags &= ~RTF_UP; + + return (0); +} + +/* + * Tries to delete route specified by @info. + * Returns 0 on success. + * If successful, references rt, nhop and + * returns them unlocked. + */ +static int +del_route(struct rib_head *rnh, struct rt_addrinfo *info, struct rib_cmd_info *rc) +{ + struct sockaddr *dst, *netmask; + struct rtentry *rt; +#ifdef ROUTE_MPATH + struct nhgrp_object *mp; +#endif + int error; + + dst = info->rti_info[RTAX_DST]; + netmask = info->rti_info[RTAX_NETMASK]; + + rc->cmd = RTM_DELETE; + + RIB_WLOCK(rnh); + rt = (struct rtentry *)rnh->rnh_lookup(dst, netmask, &rnh->head); + if (rt == NULL) { + RIB_WUNLOCK(rnh); + return (ESRCH); + } + + if (NH_IS_MULTIPATH(rt->rt_nhop)) { + /* We hit multipath group */ +#ifdef ROUTE_MPATH + if (info->rti_info[RTAX_GATEWAY] == NULL) { + /* + * No gateway specification in the delete + * request, aborting. + */ + RIB_WUNLOCK(rnh); + return (ESRCH); + } + + mp = (struct nhgrp_object *)rt->rt_nhop; + + RIB_WUNLOCK(rnh); + + return (del_route_mpath(rnh, rt, mp, info, rc)); +#else + RIB_WUNLOCK(rnh); + return (ENOTSUP); +#endif + } + + error = del_route_one(rnh, rt, info); + RIB_WUNLOCK(rnh); + if (error != 0) + return (error); + + RTSTAT_INC(rts_del_success); + + /* Finalise notification data */ + rc->rt = rt; + rc->nh_old = rt->rt_nhop; + rc->rt_weight = rt->rt_weight; + + /* + * rt was removed from the tree as well as rt_nhop. + * Decrease their reference counts. + */ + NH_FREE(rt->rt_nhop); + RTFREE(rt); + + return (0); +} + +static int +clone_rte_conditional(struct rib_head *rnh, struct rtentry *rt_orig, + struct nhop_object *nh_orig, struct rt_addrinfo *info, struct rtentry **ret) +{ + struct rtentry *rt_new; + int error; + + rt_new = NULL; + if (info->rti_mflags & RTV_EXPIRE) { + if (NH_IS_MULTIPATH(nh_orig)) { + return (ENOTSUP); + } + + if (get_expire_from_info(info) != rt_orig->rt_expire) { + error = create_rte_from_rte(rnh, rt_orig, &rt_new); + if (error != 0) + return (error); + } + } + if ((info->rti_mflags & RTV_WEIGHT) && !NH_IS_MULTIPATH(nh_orig) && + get_weight_from_info(info) != rt_orig->rt_weight) { + if (rt_new == NULL) { + error = create_rte_from_rte(rnh, rt_orig, &rt_new); + if (error != 0) + return (error); + } + } + + *ret = rt_new; + return (0); +} + +static int +change_route(struct rib_head *rnh, struct rt_addrinfo *info, + struct rib_cmd_info *rc) +{ + struct nhop_object *nh_orig, *nh_new, *nh_src, *nh_insert; + struct sockaddr *gw; + struct rtentry *rt_orig, *rt_curr, *rt_new; + int error; + uint32_t weight_orig; + unsigned long expire_orig; +#ifdef ROUTE_MPATH + struct weightened_nhop *wn_orig, wn_new; + struct nhgrp_object *mp_new; + uint32_t changed_idx, num_nhops; + uint64_t modmask; +#endif + RIB_RLOCK_TRACKER; + + gw = info->rti_info[RTAX_GATEWAY]; + + RIB_RLOCK(rnh); + rt_orig = (struct rtentry *)rnh->rnh_lookup(info->rti_info[RTAX_DST], + info->rti_info[RTAX_NETMASK], &rnh->head); + + if (rt_orig == NULL) { + RIB_RUNLOCK(rnh); + return (ESRCH); + } + + weight_orig = rt_orig->rt_weight; + expire_orig = rt_orig->rt_expire; + nh_orig = rt_orig->rt_nhop; + if (NH_IS_MULTIPATH(nh_orig)) { +#ifdef ROUTE_MPATH + if (gw == NULL) { + /* Unable to choose the proper nexthop */ + RIB_RUNLOCK(rnh); + return (ESRCH); + } + + nh_src = NULL; + wn_orig = nhgrp_get_nhops((struct nhgrp_object *)nh_orig, + &num_nhops); + for (uint32_t i = 0; i < num_nhops; i++) { + if (rib_match_nhop_gw(wn_orig[i].nh, gw)) { + nh_src = wn_orig[i].nh; + changed_idx = i; + break; + } + } + + if (nh_src == NULL) { + RIB_RUNLOCK(rnh); + return (ESRCH); + } +#else + RIB_RUNLOCK(rnh); + return (ENOTSUP); +#endif + } else { + if (gw != NULL && !rib_match_nhop_gw(nh_orig, gw)) { + RIB_RUNLOCK(rnh); + return (ESRCH); + } + nh_src = nh_orig; + } + + /* + * Chosen nexthop is nh_src, original rt is rt_orig, original + * nhop/nhop group is nh_orig. + * Drop the lock and try to create a new nexthop and a new + * nhop group if needed. + */ + RIB_RUNLOCK(rnh); + + /* + * Route change may request weight / expire time change. + * As these changes has to be stored in rtentry and we need + * to maintain immutability of most fields, we clone&insert + * cloned rtentry in the rib in such cases. + * + * Note: rt_new CAN be NULL and is NULL for all common cases. + */ + error = clone_rte_conditional(rnh, rt_orig, nh_orig, info, &rt_new); + if (error != 0) + return (error); + + error = create_nhop_from_nhop(rnh, nh_src, info, &nh_new); + if (error != 0) { + if (rt_new != NULL) + RTFREE(rt_new); + return (error); + } + DPRINTF("Update nhop: %d -> %d", nh_orig->nh_priv->nh_idx, + nh_new->nh_priv->nh_idx); + +#ifdef ROUTE_MPATH + mp_new = NULL; + if (NH_IS_MULTIPATH(nh_orig)) { + /* Create mpath group with an updated nhop/weight */ + wn_new.nh = nh_src; + if (info->rti_mflags & RTV_WEIGHT) + wn_new.weight = get_weight_from_info(info); + else + wn_new.weight = wn_orig[changed_idx].weight; + + mp_new = nhgrp_get_replace_nhop(rnh, + (struct nhgrp_object *)nh_orig, &wn_new, + changed_idx, &modmask, &error); + + if (mp_new == NULL) { + NH_FREE(nh_src); + if (rt_new != NULL) + RTFREE(rt_new); + return (error); + } + nh_insert = (struct nhop_object *)mp_new; + } else +#endif + nh_insert = nh_new; + + if (rt_new != NULL) + rt_new->rt_nhop = nh_insert; + + /* Update notification metadata */ + rc->nh_old = nh_src; + rc->nh_new = nh_insert; + + RIB_WLOCK(rnh); + + /* + * Lookup route once again as it may have been changed or deleted. + */ + rt_curr = (struct rtentry *)rnh->rnh_lookup(info->rti_info[RTAX_DST], + info->rti_info[RTAX_NETMASK], &rnh->head); + + /* Check if anything has changed */ + if ((rt_curr != rt_orig) || (rt_curr->rt_nhop != nh_orig)) { + /* + * The original nexthop has changed. Free the resources + * and return EAGAIN, so the caller can retry. + */ + RIB_WUNLOCK(rnh); + NH_FREE(nh_new); +#ifdef ROUTE_MPATH + if (mp_new != NULL) + nhgrp_free_group(mp_new); +#endif + if (rt_new != NULL) + RTFREE(rt_new); + return (EAGAIN); + } + + if (rt_new != NULL) { + error = replace_rte(rnh, info->rti_info[RTAX_DST], + info->rti_info[RTAX_NETMASK], rt_new); + } else { + RT_LOCK(rt_orig); + rt_orig->rt_nhop = nh_insert; + RT_UNLOCK(rt_orig); + error = 0; + } + + if (error == 0) + rnh->rnh_gen++; + RIB_WUNLOCK(rnh); + + if (error != 0) { + /* + * Failed to install new rte with new nexthop. + * Free resources. + */ + NH_FREE(nh_new); +#ifdef ROUTE_MPATH + if (mp_new != NULL) + nhgrp_free_group(mp_new); +#endif + if (rt_new != NULL) + RTFREE(rt_new); + + return (error); + } + + /* Remove refcount from the old nhop */ + nhop_free_any(nh_orig); + + if (rt_new != NULL) { + RTFREE(rt_orig); + rc->rt = rt_new; + } else + rc->rt = rt_orig; + + return (0); +} + +/* + * Assumes RTAX_GATEWAY is set + * Returns 0 on success, references ifa/ifp + * XXX: verify freeing refcount + */ +static int +update_gateway_metadata(struct rt_addrinfo *info, int fibnum) +{ + int error; + + KASSERT((info->rti_info[RTAX_GATEWAY] != NULL), ("gateway is NULL")); + + /* + * Allow the same set of rules as with route creation + */ + error = verify_gateway_family(info); + if (error != 0) + return (error); + + if (info->rti_ifa == NULL) { + error = rt_getifa_fib(info, fibnum); + if (error != 0) + return (error); + } else { + ifa_ref(info->rti_ifa); + } + + if (info->rti_ifp == NULL) + info->rti_ifp = info->rti_ifa->ifa_ifp; + if_ref(info->rti_ifp); + + return (0); +} + + + +static void +refine_info(struct rt_addrinfo *info) +{ + + /* + * If we are adding a host route then we don't want to put + * a netmask in the tree, nor do we want to clone it. + */ + if (info->rti_flags & RTF_HOST) + info->rti_info[RTAX_NETMASK] = NULL; +} + +/* + * Allocates rtentry and gets referenced&linked nhop. + * + * Returns 0 on success, storing rtentry with the valid nhop into @ret_rt. + * + */ +static int +create_rt_nh_pair_from_info(struct rib_head *rnh, struct rt_addrinfo *info, + struct rtentry **ret_rt) +{ + struct rtentry *rt; + struct nhop_object *nh; + int error; + + error = create_rte_from_info(rnh, info, &rt); + if (error != 0) { + DPRINTF("failed to create rte: %d", error); + return (error); + } + + DPRINTF("new rte %p af %d", rt, (int)(info->rti_info[RTAX_DST])->sa_family); + + error = create_nhop_from_info(rnh, info, &nh); + if (error != 0) { + DPRINTF("failed to create nhop: %d", error); + uma_zfree(V_rtzone, rt); + return (error); + } + + rt->rt_nhop = nh; + + *ret_rt = rt; + + return (0); +} + + +/* + * Adds route defined by @info into the kernel table specified by @fibnum and + * sa_family in @info->rti_info[RTAX_DST]. + * + * Returns 0 on success and fills in operation metadata into @rc. + */ +int +rib_add_route(u_int fibnum, struct rt_addrinfo *info, struct rib_cmd_info *rc) +{ + int error = 0; + struct rtentry *rt; + struct sockaddr *gateway, *dst, *netmask; + struct rib_head *rnh; + int ifa_referenced = 0; + + NET_EPOCH_ASSERT_INVARIANTS(); + + refine_info(info); + + dst = info->rti_info[RTAX_DST]; + netmask = info->rti_info[RTAX_NETMASK]; + gateway = info->rti_info[RTAX_GATEWAY]; + + if ((info->rti_flags & RTF_GATEWAY) && gateway == NULL) + return (EINVAL); + error = verify_gateway_family(info); + if (error != 0) + return (error); + + /* ensure route is UP */ + info->rti_flags |= RTF_UP; + + if (info->rti_ifa == NULL) { + /* rt_getifa_fib() references ifa upon successful completion */ + error = rt_getifa_fib(info, fibnum); + if (error != 0) + return (error); + ifa_referenced = 1; + } + + rnh = rt_tables_get_rnh(fibnum, dst->sa_family); + + error = create_rt_nh_pair_from_info(rnh, info, &rt); + if (error != 0) { + if (ifa_referenced != 0) + ifa_free(info->rti_ifa); + return (error); + } + + bzero(rc, sizeof(struct rib_cmd_info)); + + error = add_route(rnh, rt, info, rc); + + /* + * If it still failed to go into the tree, + * then un-make it (this should be a function) + */ + if (error != 0) { + NH_FREE(rt->rt_nhop); + if (rt_key(rt) != &rt->rt_dst) + R_Free(rt_key(rt)); + uma_zfree(V_rtzone, rt); + if (ifa_referenced != 0) + ifa_free(info->rti_ifa); + return (error); + } + + rib_notify_subscribers(rnh, info, rc); + + return (0); +} + + +/* + * Removes route defined by @info from the kernel table specified by @fibnum and + * sa_family in @info->rti_info[RTAX_DST]. + * + * Returns 0 on success and fills in operation metadata into @rc. + */ +int +rib_del_route(u_int fibnum, struct rt_addrinfo *info, struct rib_cmd_info *rc) +{ + struct rib_head *rnh; + int error = 0; + + NET_EPOCH_ASSERT_INVARIANTS(); + + refine_info(info); + + rnh = rt_tables_get_rnh(fibnum, get_family_from_info(info)); + + error = del_route(rnh, info, rc); + + if (error == 0) + rib_notify_subscribers(rnh, info, rc); + + return (error); +} + + +/* + * Changes route properties defined by @info in the kernel table specified by + * @fibnum and sa_family in @info->rti_info[RTAX_DST]. + * + * Returns 0 on success and fills in operation metadata into @rc. + */ +int +rib_change_route(u_int fibnum, struct rt_addrinfo *info, struct rib_cmd_info *rc) +{ + struct rib_head *rnh; + int error = 0; + + NET_EPOCH_ASSERT_INVARIANTS(); + + refine_info(info); + + rnh = rt_tables_get_rnh(fibnum, get_family_from_info(info)); + + for (int i = 0; i < RIB_MAX_RETRIES; i++) { + error = change_route(rnh, info, rc); + if (error != EAGAIN) + break; + } + + if (error == 0) + rib_notify_subscribers(rnh, info, rc); + + return (error); +} + +static int +can_rib_multipath(struct rib_head *rh) +{ + int result; + + CURVNET_SET(rh->rib_vnet); + result = !!V_rib_route_multipath; + CURVNET_RESTORE(); + + return (result); +} + +/* + * Looks up route based on @dst and @mask. + * + * @dst: destination to lookup. + * @mask: route netmask for exact prefix match, can be NULL. + * + * Returns 0 on success, filling @ret with found rtentry. + * rtentry is returned locked. + */ +int +rib_lookup_route_netmask(u_int fibnum, const struct sockaddr *dst, + const struct sockaddr *mask, struct rtentry **ret) +{ + RIB_RLOCK_TRACKER; + struct rib_head *rnh; + struct radix_node *rn; + struct rtentry *rt; + + rnh = rt_tables_get_rnh(fibnum, dst->sa_family); + if (rnh == NULL) + return (EAFNOSUPPORT); + + RIB_RLOCK(rnh); + + if (mask == NULL) { + /* Longest prefix match lookup */ + rn = rnh->rnh_matchaddr(__DECONST(void *, dst), &rnh->head); + } else { + /* Exact match lookup */ + rn = rnh->rnh_lookup(__DECONST(void *, dst), + __DECONST(void *, mask), &rnh->head); + } + + if (rn != NULL && ((rn->rn_flags & RNF_ROOT) == 0)) { + rt = RNTORT(rn); + RT_LOCK(rt); + RIB_RUNLOCK(rnh); + + *ret = rt; + return (0); + } + + RIB_RUNLOCK(rnh); + return (ESRCH); +} + +static int +contigmask(const uint8_t *p, int len) +{ + int i, n; + + for (i = 0; i < len ; i++) + if ( (p[i/8] & (1 << (7 - (i%8)))) == 0) /* first bit unset */ + break; + for (n= i + 1; n < len; n++) + if ( (p[n/8] & (1 << (7 - (n % 8)))) != 0) + return (-1); /* mask not contiguous */ + return (i); +} + + +/* + * Retrieves address and prefix from @rt. + * @dst: prefix dst storage. Can be NULL, if not NULL, buffer size in sa_len. + * @netmask: prefix mask storage. Can be NULL, if not NULL, buffer size in sa_len. + * @plen: CIDR len, can be NULL. -1 on failure (non-contig mask). + * + * Returns 0 on success. + * + */ +int +rib_get_entry_prefix(const struct rtentry *rt, struct sockaddr *dst, + struct sockaddr *netmask, int *plen) +{ + const struct sockaddr *src; + + if (dst != NULL) { + src = rt_key_const(rt); + if (src->sa_len > dst->sa_len) + return (ENOBUFS); + memcpy(dst, src, src->sa_len); + } + + if (netmask != NULL) { + src = rt_mask_const(rt); + if (src->sa_len > netmask->sa_len) + return (ENOBUFS); + /* + * Currently in-tree netmasks + * a) does not have address family attached + * b) have different notion of sa_len, + * limiting it to the amount of + * non-zero bytes in netmask to + * speedup lookup. + * Fix this by copyin the remaining data + * from the key. + */ + const struct sockaddr *dst_sa = rt_key_const(rt); + memcpy(netmask, src, dst_sa->sa_len); + netmask->sa_family = dst_sa->sa_family; + netmask->sa_len = dst_sa->sa_len; + } + + if (plen != NULL) { + int family = (rt_key_const(rt))->sa_family; + const char *ptr = NULL; + int max_prefix = -1; + src = rt_mask_const(rt); + if (family == AF_INET) { + max_prefix = 32; + ptr = (const char *)&((const struct sockaddr_in *)src)->sin_addr; + } else if (family == AF_INET6) { + max_prefix = 128; + ptr = (const char *)&((const struct sockaddr_in6 *)src)->sin6_addr; + } + + if (src != NULL && ptr != NULL) + *plen = contigmask(ptr, max_prefix); + else + *plen = max_prefix; + } + + return (0); +} + +struct sockaddr * +rib_get_entry_dst_sa(const struct rtentry *rt, struct sockaddr *dst, + size_t sa_len, int *error) +{ + const struct sockaddr *src = rt_key_const(rt); + + if (src->sa_len > sa_len) { + *error = ENOBUFS; + return (NULL); + } + memcpy(dst, src, src->sa_len); + *error = 0; + + return (dst); +} + +struct sockaddr * +rib_get_entry_netmask_sa(const struct rtentry *rt, struct sockaddr *netmask, + size_t sa_len, int *error) +{ + const struct sockaddr *src = rt_mask_const(rt); + if (src == NULL) { + *error = 0; + return (NULL); + } + + if (src->sa_len > sa_len) { + *error = ENOBUFS; + return (NULL); + } + + *error = 0; + + /* + * Currently in-tree netmasks + * a) do not have address family attached + * b) have different notion of sa_len, + * limiting it to the amount of + * non-zero bytes in netmask to + * speedup lookup. + * Fix this by copyin the remaining data + * from the key. + */ + const struct sockaddr *dst = rt_key_const(rt); + + bzero(netmask, dst->sa_len); + netmask->sa_len = dst->sa_len; + netmask->sa_family = dst->sa_family; + + switch (dst->sa_family) { + case AF_INET: + ((struct sockaddr_in *)netmask)->sin_addr = + ((const struct sockaddr_in *)src)->sin_addr; + break; + case AF_INET6: + ((struct sockaddr_in6 *)netmask)->sin6_addr = + ((const struct sockaddr_in6 *)src)->sin6_addr; + break; + default: + memcpy(netmask, src, dst->sa_len); + netmask->sa_family = dst->sa_family; + netmask->sa_len = dst->sa_len; + } + + return (netmask); +} + +int +rib_get_entry_plen(const struct rtentry *rt) +{ + int family = (rt_key_const(rt))->sa_family; + const char *ptr = NULL; + int max_prefix = -1; + const struct sockaddr *src = rt_mask_const(rt); + int plen; + + if (family == AF_INET) { + max_prefix = 32; + ptr = (const char *)&((const struct sockaddr_in *)src)->sin_addr; + } else if (family == AF_INET6) { + max_prefix = 128; + ptr = (const char *)&((const struct sockaddr_in6 *)src)->sin6_addr; + } + + if (src != NULL && ptr != NULL) + plen = contigmask(ptr, max_prefix); + else + plen = max_prefix; + + return (plen); +} + +int +rib_get_entry_weight(const struct rtentry *rt) +{ + + return (rt->rt_weight); +} + +int +rib_get_entry_rtflags(const struct rtentry *rt, const struct nhop_object *nh) +{ + + return (rt->rte_flags | nhop_get_rt_flags(nh)); +} + +const struct nhop_object * +rib_get_entry_nhop(const struct rtentry *rt) +{ + + return (rt->rt_nhop); +} + +sa_family_t +rib_get_entry_family(const struct rtentry *rt) +{ + + return ((rt_key_const(rt))->sa_family); +} + +unsigned int +rib_get_entry_fibnum(const struct rtentry *rt) +{ + + return (rt->rt_fibnum); +} + +unsigned long +rib_get_entry_expire_time(const struct rtentry *rt) +{ + + return (rt->rt_expire); +} + +/* +int +rib_is_host_entry(const struct rtentry *rt) +{ + + return (rt->rt_flags & RTF_HOST); +} +*/ + + + + + + +#include "tests/routing/test_route_ctl.h" + Index: sys/tests/routing/module.h =================================================================== --- /dev/null +++ sys/tests/routing/module.h @@ -0,0 +1,39 @@ +#ifndef SYS_TESTS_ROUTING_MODULE_H_ +#define SYS_TESTS_ROUTING_MODULE_H_ + +typedef int (ktest_f_t)(void); + +struct ktest_item { + char *name; + ktest_f_t *fn; + char *descr; +}; +#define DECLARE_KTEST(_f) {#_f, &(_f), ""} +#define DECLARE_KTEST_DESC(_f, _d) {#_f, &(_f), _d} + +struct ktests { + char *name; + char *descr; + struct ktest_item *tests; + int num_tests; +}; + +#define ARRAYLEN(_a) (sizeof(_a) / sizeof(_a[0])) +#define DEFINE_KTESTS(_name, _descr, _tests) \ + struct ktests kt_##_name = {#_name, #_descr, _tests, ARRAYLEN(_tests)} + +#define DECLARE_KTESTS(_name) extern struct ktests kt_##_name + +#define TPRINTF(_arg, ...) printf("KTEST:%s:%d " _arg "\n", __func__, __LINE__, ##__VA_ARGS__) + +#define TASSERT(_cond, _fmt, ...) do { \ + if (!(_cond)) { \ + TPRINTF(_fmt, ##__VA_ARGS__); \ + error = EINVAL; \ + } \ +} while (0); + + +DECLARE_KTESTS(route_ctl); + +#endif Index: sys/tests/routing/module.c =================================================================== --- /dev/null +++ sys/tests/routing/module.c @@ -0,0 +1,163 @@ +/*- + * Copyright (c) 2019, Alexander V. Chernikov + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * + * 2. Neither the name of Alexander V. Chernikov nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#include +__FBSDID("$FreeBSD$"); + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "tests/routing/module.h" + +static int inited; +#define ET_EXITING 0x1 +static volatile int state_flags; + +struct sysctl_ctx_list ctx; +static struct ktest_item *ki = NULL; +static int ki_size = 0, ki_count = 0; + +static int +invoke_test_handler(SYSCTL_HANDLER_ARGS) +{ + int error, v; + struct ktest_item *item; + + if (inited == 0) + return (ENOENT); + + v = 0; + error = sysctl_handle_int(oidp, &v, 0, req); + if (error) + return (error); + if (req->newptr == NULL) + return (error); + if (v == 0) + return (0); + + item = &ki[arg2]; + printf("running item %d: %s\n", (int)arg2, item->name); + error = item->fn(); + printf("done running item %d: %s - ret %d\n", (int)arg2, item->name, error); + + return (error); +} + +SYSCTL_NODE(_kern, OID_AUTO, test, CTLFLAG_RW, 0, "Test framework"); +SYSCTL_NODE(_kern_test, OID_AUTO, routing, CTLFLAG_RW, 0, "Routing test framework"); + +static int +test_init_one(struct ktests *kt) +{ + struct sysctl_oid *tree; + + if (kt->num_tests + ki_count > ki_size) { + size_t new_size = roundup2(kt->num_tests + ki_count, 32); + void *new_ptr; + new_ptr = malloc(new_size * sizeof(struct ktest_item), M_TEMP, M_WAITOK | M_ZERO); + if (ki_count > 0) + memcpy(new_ptr, ki, ki_count * sizeof(struct ktest_item)); + free(ki, M_TEMP); + ki = new_ptr; + ki_size = new_size; + } + + tree = SYSCTL_ADD_NODE(&ctx, SYSCTL_STATIC_CHILDREN(_kern_test_routing), + OID_AUTO, kt->name, CTLFLAG_RW, 0, "routing tests"); + + memcpy(&ki[ki_count], kt->tests, kt->num_tests * sizeof(struct ktest_item)); + + for (int i = 0; i < kt->num_tests; i++) { + SYSCTL_ADD_PROC(&ctx, SYSCTL_CHILDREN(tree), OID_AUTO, + kt->tests[i].name, (CTLTYPE_INT | CTLFLAG_RW), NULL, ki_count + i, + invoke_test_handler, "I", kt->tests[i].descr); + } + ki_count += kt->num_tests; + + return (0); +} + +static int +test_modinit(void) +{ + sysctl_ctx_init(&ctx); + + test_init_one(&kt_route_ctl); + + inited = 1; + return (0); +} + + +static int +routing_test_module_event_handler(module_t mod, int what, void *arg __unused) +{ + int err; + + switch (what) { + case MOD_LOAD: + if ((err = test_modinit()) != 0) + return (err); + break; + case MOD_UNLOAD: + //mtx_lock(&state_mtx); + state_flags = ET_EXITING; + sysctl_ctx_free(&ctx); + free(ki, M_TEMP); + //wakeup(&state_mtx); + //mtx_unlock(&state_mtx); + /* yes --- gross */ + pause("epoch unload", 2 * hz); + break; + default: + return (EOPNOTSUPP); + } + + return (0); +} + +static moduledata_t routing_test_moduledata = { + "routing_test", + routing_test_module_event_handler, + NULL +}; + +MODULE_VERSION(routing_test, 1); +DECLARE_MODULE(routing_test, routing_test_moduledata, SI_SUB_PSEUDO, SI_ORDER_ANY); Index: sys/tests/routing/test_route_ctl.h =================================================================== --- /dev/null +++ sys/tests/routing/test_route_ctl.h @@ -0,0 +1,73 @@ +#ifndef _SYS_TESTS_ROUTING_TEST_ROUTE_CTL_H_ +#define _SYS_TESTS_ROUTING_TEST_ROUTE_CTL_H_ + +int create_rte_from_info_wrapper(struct rib_head *rnh, struct rt_addrinfo *info, + struct rtentry **ret_rt); + +int create_nhop_from_info_wrapper(struct rib_head *rnh, struct rt_addrinfo *info, + struct nhop_object **nh_ret); + +int create_rt_nh_pair_from_info_wrapper(struct rib_head *rnh, + struct rt_addrinfo *info, struct rtentry **ret_rt); + +int add_route_wrapper(struct rib_head *rnh, struct rtentry *rt_new, + struct rt_addrinfo *info, struct rib_cmd_info *rc); + +int del_route_one_wrapper(struct rib_head *rnh, struct rtentry *rt, + struct rt_addrinfo *info); + +int change_route_wrapper(struct rib_head *rnh, struct rt_addrinfo *info, + struct rib_cmd_info *rc); + +#ifndef _TEST_CALLER +int +create_nhop_from_info_wrapper(struct rib_head *rnh, struct rt_addrinfo *info, + struct nhop_object **nh_ret) +{ + + return (create_nhop_from_info(rnh, info, nh_ret)); +} + +int +create_rte_from_info_wrapper(struct rib_head *rnh, struct rt_addrinfo *info, + struct rtentry **ret_rt) +{ + + return (create_rte_from_info(rnh, info, ret_rt)); +} + +int +create_rt_nh_pair_from_info_wrapper(struct rib_head *rnh, + struct rt_addrinfo *info, struct rtentry **ret_rt) +{ + + return (create_rt_nh_pair_from_info(rnh, info, ret_rt)); +} + +int +add_route_wrapper(struct rib_head *rnh, struct rtentry *rt_new, + struct rt_addrinfo *info, struct rib_cmd_info *rc) +{ + + return (add_route(rnh, rt_new, info, rc)); +} + +int +del_route_one_wrapper(struct rib_head *rnh, struct rtentry *rt, + struct rt_addrinfo *info) +{ + + return (del_route_one(rnh, rt, info)); +} + +int +change_route_wrapper(struct rib_head *rnh, struct rt_addrinfo *info, + struct rib_cmd_info *rc) +{ + + return (change_route(rnh, info, rc)); +} +#endif + +#endif + Index: sys/tests/routing/test_route_ctl.c =================================================================== --- /dev/null +++ sys/tests/routing/test_route_ctl.c @@ -0,0 +1,389 @@ +/*- + * Copyright (c) 2020, Alexander V. Chernikov + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * + * 2. Neither the name of Alexander V. Chernikov nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#include +__FBSDID("$FreeBSD$"); + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include + +#include +#include +#include +#include +#include + +#include + +#include "tests/routing/module.h" +#define _TEST_CALLER +#include "tests/routing/test_route_ctl.h" + +static struct rib_head * +create_rnh(int family, u_long fibnum) +{ + struct domain *dom; + struct rib_head *rnh; + + for (dom = domains; dom; dom = dom->dom_next) { + if (dom->dom_family != family) + continue; + dom->dom_rtattach((void **)&rnh, 0, fibnum); + return (rnh); + } + + return (NULL); +} + +static void +free_rnh(struct rib_head *rnh) +{ + struct domain *dom; + + if (rnh == NULL) + return; + + for (dom = domains; dom; dom = dom->dom_next) { + if (dom->dom_family != rnh->rib_family) + continue; + dom->dom_rtdetach((void **)&rnh, 0); + break; + } + +} + +static size_t +fill_sa(struct sockaddr *sa, const char *addr) +{ + size_t sz; + + if (strchr(addr, ':')) { + struct sockaddr_in6 *sa6 = (struct sockaddr_in6 *)sa; + + sz = sizeof(struct sockaddr_in6); + bzero(sa6, sz); + sa6->sin6_family = AF_INET6; + sa6->sin6_len = sz; + inet_pton(AF_INET6, addr, &sa6->sin6_addr); + } else { + struct sockaddr_in *sa4 = (struct sockaddr_in *)sa; + + sz = sizeof(struct sockaddr_in); + bzero(sa4, sz); + sa4->sin_family = AF_INET; + sa4->sin_len = sz; + inet_pton(AF_INET, addr, &sa4->sin_addr); + } + + return (sz); +} + +static void +sa_fill_mask4(struct sockaddr_in *sin, int plen) +{ + + memset(sin, 0, sizeof(struct sockaddr_in)); + sin->sin_family = AF_INET; + sin->sin_len = sizeof(struct sockaddr_in); + sin->sin_addr.s_addr = htonl(plen ? ~((1 << (32 - plen)) - 1) : 0); +} + +static void +sa_fill_mask6(struct sockaddr_in6 *sin6, uint8_t mask) +{ + uint32_t *cp; + + memset(sin6, 0, sizeof(struct sockaddr_in6)); + sin6->sin6_family = AF_INET6; + sin6->sin6_len = sizeof(struct sockaddr_in6); + + for (cp = (uint32_t *)&sin6->sin6_addr; mask >= 32; mask -= 32) + *cp++ = 0xFFFFFFFF; + if (mask > 0) + *cp = htonl(mask ? ~((1 << (32 - mask)) - 1) : 0); +} + + +static struct rt_addrinfo * +get_info(char *buf, char *_prefix, char *gw_s) +{ + struct rt_addrinfo *info; + struct sockaddr *dst, *gw; + struct sockaddr_in6 *sa6; + struct sockaddr_in *sa4; + char prefix[128], *d; + struct epoch_tracker et; + size_t sz; + + info = (struct rt_addrinfo *)buf; + buf += sizeof(struct rt_addrinfo); + + bzero(info, sizeof(struct rt_addrinfo)); + + strlcpy(prefix, _prefix, sizeof(prefix)); + d = strchr(prefix, '/'); + if (d != NULL) { + *d++ = '\0'; + if (strchr(prefix, ':')) { + sa6 = (struct sockaddr_in6 *)buf; + sa_fill_mask6(sa6, strtol(d, NULL, 10)); + sz = sa6->sin6_len; + } else { + sa4 = (struct sockaddr_in *)buf; + sa_fill_mask4(sa4, strtol(d, NULL, 10)); + sz = sa4->sin_len; + } + info->rti_info[RTAX_NETMASK] = (struct sockaddr *)buf; + buf += sz; + } + + dst = (struct sockaddr *)buf; + buf += fill_sa(dst, prefix); + info->rti_info[RTAX_DST] = dst; + + if (gw_s != NULL) { + gw = (struct sockaddr *)buf; + buf += fill_sa(gw, gw_s); + info->rti_info[RTAX_GATEWAY] = gw; + + NET_EPOCH_ENTER(et); + info->rti_ifa = ifa_ifwithnet(gw, 0, 0); + NET_EPOCH_EXIT(et); + if (info->rti_ifa != NULL) + info->rti_ifp = info->rti_ifa->ifa_ifp; + } + + return (info); +} + +static int +test_add_route_plain_add_success() +{ + int error; + struct rib_cmd_info rc; + struct rt_addrinfo *info; + struct rib_head *rnh; + struct rtentry *rt; + struct radix_node *rn; + struct epoch_tracker et; + + char *buf = malloc(1024, M_TEMP, M_WAITOK | M_ZERO); + + rnh = create_rnh(AF_INET6, 0); + info = get_info(buf, "2001:db8:1::/64", "::1"); + + if (info == NULL || info->rti_ifp == NULL || info->rti_ifa == NULL) { + TPRINTF("failed to create info"); + free_rnh(rnh); + free(buf, M_TEMP); + return (EINVAL); + } + + /* done by rib_add_route() */ + bzero(&rc, sizeof(struct rib_cmd_info)); + + NET_EPOCH_ENTER(et); + + error = create_rt_nh_pair_from_info_wrapper(rnh, info, &rt); + if (error != 0) { + TPRINTF("create_rt_nh_pair_from_info() failed: %d", error); + } else { + error = add_route_wrapper(rnh, rt, info, &rc); + if (error == 0) { + rn = rnh->rnh_lookup(rt_key(rt), rt_mask(rt), &rnh->head); + TASSERT((struct rtentry *)rn == rt, "inserted rt not found"); + /* verify rc */ + TASSERT(rc.cmd == RTM_ADD, "cmd!=RTM_ADD:%d", rc.cmd); + TASSERT(rc.rt == rt, "rc.rt!=rt"); + TASSERT(rc.nh_old == NULL, "rc.nh_old!=NULL"); + TASSERT(rc.nh_new == rt->rt_nhop, "rc.nh_new!=rt.rt_nhop"); + } else { + TPRINTF("add_route() returned %d", error); + } + } + NET_EPOCH_EXIT(et); + free(buf, M_TEMP); + free_rnh(rnh); + + return (error); +} + +static int +test_add_route_exist_fail() +{ + int error; + struct rib_cmd_info rc; + struct rt_addrinfo *info; + struct rib_head *rnh; + struct rtentry *rt, *rt2; + struct epoch_tracker et; + + char *buf = malloc(1024, M_TEMP, M_WAITOK | M_ZERO); + + rnh = create_rnh(AF_INET6, 0); + info = get_info(buf, "2001:db8:1::/64", "::1"); + + if (info == NULL || info->rti_ifp == NULL || info->rti_ifa == NULL) { + TPRINTF("failed to create info"); + free_rnh(rnh); + free(buf, M_TEMP); + return (EINVAL); + } + /* Do not set RTF_GATEWAY so the first route is multipath ineligible */ + + /* done by rib_add_route() */ + bzero(&rc, sizeof(struct rib_cmd_info)); + + NET_EPOCH_ENTER(et); + + error = create_rt_nh_pair_from_info_wrapper(rnh, info, &rt); + if (error != 0) { + TPRINTF("create_rt_nh_pair_from_info() failed: %d", error); + goto cleanup; + } + /* Set RTF_GATEWAY so the new nexthop is different */ + info->rti_flags |= RTF_GATEWAY; + error = create_rt_nh_pair_from_info_wrapper(rnh, info, &rt2); + if (error != 0) { + TPRINTF("second create_rt_nh_pair_from_info() failed: %d", error); + goto cleanup; + } + + error = add_route_wrapper(rnh, rt, info, &rc); + if (error != 0) { + TPRINTF("add_route() returned %d", error); + goto cleanup; + } + + if (rnh->rnh_lookup(rt_key(rt), rt_mask(rt), &rnh->head) == NULL) { + TPRINTF("added route not found"); + error = EINVAL; + goto cleanup; + } + + error = add_route_wrapper(rnh, rt2, info, &rc); + if (error != EEXIST) { + TPRINTF("add_route() returned %d instead of EEXIST", error); + goto cleanup; + } + + error = 0; +cleanup: + NET_EPOCH_EXIT(et); + free(buf, M_TEMP); + free_rnh(rnh); + + return (error); +} + +static int +test_add_route_pinned_success() +{ + int error; + struct rib_cmd_info rc; + struct rt_addrinfo *info; + struct rib_head *rnh; + struct rtentry *rt, *rt2; + struct epoch_tracker et; + + char *buf = malloc(1024, M_TEMP, M_WAITOK | M_ZERO); + + rnh = create_rnh(AF_INET6, 0); + info = get_info(buf, "2001:db8:1::/64", "::1"); + + if (info == NULL || info->rti_ifp == NULL || info->rti_ifa == NULL) { + TPRINTF("failed to create info"); + free_rnh(rnh); + free(buf, M_TEMP); + return (EINVAL); + } + + /* done by rib_add_route() */ + bzero(&rc, sizeof(struct rib_cmd_info)); + + NET_EPOCH_ENTER(et); + + error = create_rt_nh_pair_from_info_wrapper(rnh, info, &rt); + if (error != 0) { + TPRINTF("create_rt_nh_pair_from_info() failed: %d", error); + goto cleanup; + } + + info->rti_flags |= RTF_PINNED; + error = create_rt_nh_pair_from_info_wrapper(rnh, info, &rt2); + if (error != 0) { + TPRINTF("second create_rt_nh_pair_from_info() failed: %d", error); + goto cleanup; + } + + error = add_route_wrapper(rnh, rt, info, &rc); + if (error != 0) { + TPRINTF("add_route() returned %d", error); + goto cleanup; + } + + if (rnh->rnh_lookup(rt_key(rt), rt_mask(rt), &rnh->head) == NULL) { + TPRINTF("added route not found"); + error = EINVAL; + goto cleanup; + } + + error = add_route_wrapper(rnh, rt2, info, &rc); + if (error != 0) { + TPRINTF("second add_route() returned %d", error); + goto cleanup; + } +cleanup: + NET_EPOCH_EXIT(et); + free(buf, M_TEMP); + free_rnh(rnh); + + return (error); +} + +struct ktest_item tests[] = { + DECLARE_KTEST(test_add_route_plain_add_success), + DECLARE_KTEST(test_add_route_exist_fail), + DECLARE_KTEST(test_add_route_pinned_success), +}; +DEFINE_KTESTS(route_ctl, "routing control plane tests", tests); +