Index: sys/net/if_var.h =================================================================== --- sys/net/if_var.h +++ sys/net/if_var.h @@ -686,8 +686,8 @@ struct ifaddr *ifa_ifwithbroadaddr(const struct sockaddr *, int); struct ifaddr *ifa_ifwithdstaddr(const struct sockaddr *, int); struct ifaddr *ifa_ifwithnet(const struct sockaddr *, int, int); -struct ifaddr *ifa_ifwithroute(int, const struct sockaddr *, struct sockaddr *, - u_int); +struct ifaddr *ifa_ifwithroute(int, const struct sockaddr *, + const struct sockaddr *, u_int); struct ifaddr *ifaof_ifpforaddr(const struct sockaddr *, struct ifnet *); int ifa_preferred(struct ifaddr *, struct ifaddr *); Index: sys/net/route.c =================================================================== --- sys/net/route.c +++ sys/net/route.c @@ -121,45 +121,17 @@ #define V_rt_tables VNET(rt_tables) -/* - * Convert a 'struct radix_node *' to a 'struct rtentry *'. - * The operation can be done safely (in this code) because a - * 'struct rtentry' starts with two 'struct radix_node''s, the first - * one representing leaf nodes in the routing tree, which is - * what the code in radix.c passes us as a 'struct radix_node'. - * - * But because there are a lot of assumptions in this conversion, - * do not cast explicitly, but always use the macro below. - */ -#define RNTORT(p) ((struct rtentry *)(p)) - -VNET_DEFINE_STATIC(uma_zone_t, rtzone); /* Routing table UMA zone. */ +VNET_DEFINE(uma_zone_t, rtzone); /* Routing table UMA zone. */ #define V_rtzone VNET(rtzone) EVENTHANDLER_LIST_DEFINE(rt_addrmsg); -static int rt_getifa_fib(struct rt_addrinfo *, u_int); -static void rt_setmetrics(const struct rt_addrinfo *, struct rtentry *); static int rt_ifdelroute(const struct rtentry *rt, const struct nhop_object *, void *arg); -static struct rtentry *rt_unlinkrte(struct rib_head *rnh, - struct rt_addrinfo *info, int *perror); -static void rt_notifydelete(struct rtentry *rt, struct rt_addrinfo *info); static void destroy_rtentry_epoch(epoch_context_t ctx); -#ifdef RADIX_MPATH -static struct radix_node *rt_mpath_unlink(struct rib_head *rnh, - struct rt_addrinfo *info, struct rtentry *rto, int *perror); -#endif static int rt_exportinfo(struct rtentry *rt, struct rt_addrinfo *info, int flags); -static int add_route(struct rib_head *rnh, struct rt_addrinfo *info, - struct rtentry **ret_nrt); -static int del_route(struct rib_head *rnh, struct rt_addrinfo *info, - struct rtentry **ret_nrt); -static int change_route(struct rib_head *, struct rt_addrinfo *, - struct rtentry **); - /* * handler for net.my_fibnum */ @@ -572,8 +544,8 @@ } struct ifaddr * -ifa_ifwithroute(int flags, const struct sockaddr *dst, struct sockaddr *gateway, - u_int fibnum) +ifa_ifwithroute(int flags, const struct sockaddr *dst, + const struct sockaddr *gateway, u_int fibnum) { struct ifaddr *ifa; @@ -839,98 +811,6 @@ } } -struct rt_delinfo -{ - struct rt_addrinfo info; - struct rib_head *rnh; - struct rtentry *head; -}; - -/* - * Conditionally unlinks @rn from radix tree based - * on info data passed in @arg. - */ -static int -rt_checkdelroute(struct radix_node *rn, void *arg) -{ - struct rt_delinfo *di; - struct rt_addrinfo *info; - struct rtentry *rt; - int error; - - di = (struct rt_delinfo *)arg; - rt = (struct rtentry *)rn; - info = &di->info; - error = 0; - - info->rti_info[RTAX_DST] = rt_key(rt); - info->rti_info[RTAX_NETMASK] = rt_mask(rt); - info->rti_info[RTAX_GATEWAY] = &rt->rt_nhop->gw_sa; - - rt = rt_unlinkrte(di->rnh, info, &error); - if (rt == NULL) { - /* Either not allowed or not matched. Skip entry */ - return (0); - } - - /* Entry was unlinked. Add to the list and return */ - rt->rt_chain = di->head; - di->head = rt; - - return (0); -} - -/* - * Iterates over a routing table specified by @fibnum and @family and - * deletes elements marked by @filter_f. - * @fibnum: rtable id - * @family: AF_ address family - * @filter_f: function returning non-zero value for items to delete - * @arg: data to pass to the @filter_f function - * @report: true if rtsock notification is needed. - */ -void -rib_walk_del(u_int fibnum, int family, rt_filter_f_t *filter_f, void *arg, bool report) -{ - struct rib_head *rnh; - struct rt_delinfo di; - struct rtentry *rt; - - rnh = rt_tables_get_rnh(fibnum, family); - if (rnh == NULL) - return; - - bzero(&di, sizeof(di)); - di.info.rti_filter = filter_f; - di.info.rti_filterdata = arg; - di.rnh = rnh; - - RIB_WLOCK(rnh); - rnh->rnh_walktree(&rnh->head, rt_checkdelroute, &di); - RIB_WUNLOCK(rnh); - - if (di.head == NULL) - return; - - /* We might have something to reclaim. */ - while (di.head != NULL) { - rt = di.head; - di.head = rt->rt_chain; - rt->rt_chain = NULL; - - /* TODO std rt -> rt_addrinfo export */ - di.info.rti_info[RTAX_DST] = rt_key(rt); - di.info.rti_info[RTAX_NETMASK] = rt_mask(rt); - - rt_notifydelete(rt, &di.info); - - if (report) - rt_routemsg(RTM_DELETE, rt, rt->rt_nhop->nh_ifp, 0, - fibnum); - rtfree(rt); - } -} - /* * Iterates over all existing fibs in system and deletes each element * for which @filter_f function returns non-zero value. @@ -1017,104 +897,6 @@ rt_foreach_fib_walk_del(AF_UNSPEC, rt_ifdelroute, ifp); } -/* - * Conditionally unlinks rtentry matching data inside @info from @rnh. - * Returns unlinked, locked and referenced @rtentry on success, - * Returns NULL and sets @perror to: - * ESRCH - if prefix was not found, - * EADDRINUSE - if trying to delete PINNED route without appropriate flag. - * ENOENT - if supplied filter function returned 0 (not matched). - */ -static struct rtentry * -rt_unlinkrte(struct rib_head *rnh, struct rt_addrinfo *info, int *perror) -{ - struct sockaddr *dst, *netmask; - struct rtentry *rt; - struct radix_node *rn; - - dst = info->rti_info[RTAX_DST]; - netmask = info->rti_info[RTAX_NETMASK]; - - rt = (struct rtentry *)rnh->rnh_lookup(dst, netmask, &rnh->head); - if (rt == NULL) { - *perror = ESRCH; - return (NULL); - } - - if ((info->rti_flags & RTF_PINNED) == 0) { - /* Check if target route can be deleted */ - if (rt->rt_flags & RTF_PINNED) { - *perror = EADDRINUSE; - return (NULL); - } - } - - if (info->rti_filter != NULL) { - if (info->rti_filter(rt, rt->rt_nhop, info->rti_filterdata)==0){ - /* Not matched */ - *perror = ENOENT; - return (NULL); - } - - /* - * Filter function requested rte deletion. - * Ease the caller work by filling in remaining info - * from that particular entry. - */ - info->rti_info[RTAX_GATEWAY] = &rt->rt_nhop->gw_sa; - } - - /* - * Remove the item from the tree and return it. - * Complain if it is not there and do no more processing. - */ - *perror = ESRCH; -#ifdef RADIX_MPATH - if (rt_mpath_capable(rnh)) - rn = rt_mpath_unlink(rnh, info, rt, perror); - else -#endif - rn = rnh->rnh_deladdr(dst, netmask, &rnh->head); - if (rn == NULL) - return (NULL); - - if (rn->rn_flags & (RNF_ACTIVE | RNF_ROOT)) - panic ("rtrequest delete"); - - rt = RNTORT(rn); - RT_LOCK(rt); - rt->rt_flags &= ~RTF_UP; - - *perror = 0; - - return (rt); -} - -static void -rt_notifydelete(struct rtentry *rt, struct rt_addrinfo *info) -{ - struct ifaddr *ifa; - - /* - * give the protocol a chance to keep things in sync. - */ - ifa = rt->rt_nhop->nh_ifa; - if (ifa != NULL && ifa->ifa_rtrequest != NULL) - ifa->ifa_rtrequest(RTM_DELETE, rt, rt->rt_nhop, info); -} - - -/* - * These (questionable) definitions of apparent local variables apply - * to the next two functions. XXXXXX!!! - */ -#define dst info->rti_info[RTAX_DST] -#define gateway info->rti_info[RTAX_GATEWAY] -#define netmask info->rti_info[RTAX_NETMASK] -#define ifaaddr info->rti_info[RTAX_IFA] -#define ifpaddr info->rti_info[RTAX_IFP] -#define flags info->rti_flags - /* * Look up rt_addrinfo for a specific fib. Note that if rti_ifa is defined, * it will be referenced so the caller must free it. @@ -1125,8 +907,15 @@ int rt_getifa_fib(struct rt_addrinfo *info, u_int fibnum) { + const struct sockaddr *dst, *gateway, *ifpaddr, *ifaaddr; struct epoch_tracker et; - int needref, error; + int needref, error, flags; + + dst = info->rti_info[RTAX_DST]; + gateway = info->rti_info[RTAX_GATEWAY]; + ifpaddr = info->rti_info[RTAX_IFP]; + ifaaddr = info->rti_info[RTAX_IFA]; + flags = info->rti_flags; /* * ifp may be specified by sockaddr_dl @@ -1150,7 +939,7 @@ if (info->rti_ifa == NULL && ifaaddr != NULL) info->rti_ifa = ifa_ifwithaddr(ifaaddr); if (info->rti_ifa == NULL) { - struct sockaddr *sa; + const struct sockaddr *sa; /* * Most common use case for the userland-supplied routes. @@ -1282,7 +1071,7 @@ * Returnes unlinked entry. In case of failure, returns NULL * and sets @perror to ESRCH. */ -static struct radix_node * +struct radix_node * rt_mpath_unlink(struct rib_head *rnh, struct rt_addrinfo *info, struct rtentry *rto, int *perror) { @@ -1335,7 +1124,9 @@ * use the normal delete code to remove * the first entry */ - rn = rnh->rnh_deladdr(dst, netmask, &rnh->head); + rn = rnh->rnh_deladdr(info->rti_info[RTAX_DST], + info->rti_info[RTAX_NETMASK], + &rnh->head); *perror = 0; return (rn); } @@ -1351,410 +1142,7 @@ } #endif -#undef dst -#undef gateway -#undef netmask -#undef ifaaddr -#undef ifpaddr -#undef flags - -int -rtrequest1_fib(int req, struct rt_addrinfo *info, struct rtentry **ret_nrt, - u_int fibnum) -{ - const struct sockaddr *dst; - struct rib_head *rnh; - int error; - - KASSERT((fibnum < rt_numfibs), ("rtrequest1_fib: bad fibnum")); - KASSERT((info->rti_flags & RTF_RNH_LOCKED) == 0, ("rtrequest1_fib: locked")); - NET_EPOCH_ASSERT(); - - dst = info->rti_info[RTAX_DST]; - - switch (dst->sa_family) { - case AF_INET6: - case AF_INET: - /* We support multiple FIBs. */ - break; - default: - fibnum = RT_DEFAULT_FIB; - break; - } - - /* - * Find the correct routing tree to use for this Address Family - */ - rnh = rt_tables_get_rnh(fibnum, dst->sa_family); - if (rnh == NULL) - return (EAFNOSUPPORT); - - /* - * If we are adding a host route then we don't want to put - * a netmask in the tree, nor do we want to clone it. - */ - if (info->rti_flags & RTF_HOST) - info->rti_info[RTAX_NETMASK] = NULL; - - error = 0; - switch (req) { - case RTM_DELETE: - error = del_route(rnh, info, ret_nrt); - break; - case RTM_RESOLVE: - /* - * resolve was only used for route cloning - * here for compat - */ - break; - case RTM_ADD: - error = add_route(rnh, info, ret_nrt); - break; - case RTM_CHANGE: - error = change_route(rnh, info, ret_nrt); - break; - default: - error = EOPNOTSUPP; - } - - return (error); -} - -static int -add_route(struct rib_head *rnh, struct rt_addrinfo *info, - struct rtentry **ret_nrt) -{ - struct sockaddr *dst, *ndst, *gateway, *netmask; - struct rtentry *rt, *rt_old; - struct nhop_object *nh; - struct radix_node *rn; - struct ifaddr *ifa; - int error, flags; - struct epoch_tracker et; - - dst = info->rti_info[RTAX_DST]; - gateway = info->rti_info[RTAX_GATEWAY]; - netmask = info->rti_info[RTAX_NETMASK]; - flags = info->rti_flags; - - if ((flags & RTF_GATEWAY) && !gateway) - return (EINVAL); - if (dst && gateway && (dst->sa_family != gateway->sa_family) && - (gateway->sa_family != AF_UNSPEC) && (gateway->sa_family != AF_LINK)) - return (EINVAL); - - if (dst->sa_len > sizeof(((struct rtentry *)NULL)->rt_dstb)) - return (EINVAL); - - if (info->rti_ifa == NULL) { - error = rt_getifa_fib(info, rnh->rib_fibnum); - if (error) - return (error); - } else { - ifa_ref(info->rti_ifa); - } - - NET_EPOCH_ENTER(et); - error = nhop_create_from_info(rnh, info, &nh); - NET_EPOCH_EXIT(et); - if (error != 0) { - ifa_free(info->rti_ifa); - return (error); - } - - rt = uma_zalloc(V_rtzone, M_NOWAIT); - if (rt == NULL) { - ifa_free(info->rti_ifa); - nhop_free(nh); - return (ENOBUFS); - } - rt->rt_flags = RTF_UP | flags; - rt->rt_nhop = nh; - - /* Fill in dst */ - memcpy(&rt->rt_dst, dst, dst->sa_len); - rt_key(rt) = &rt->rt_dst; - - /* - * point to the (possibly newly malloc'd) dest address. - */ - ndst = (struct sockaddr *)rt_key(rt); - - /* - * make sure it contains the value we want (masked if needed). - */ - if (netmask) { - rt_maskedcopy(dst, ndst, netmask); - } else - bcopy(dst, ndst, dst->sa_len); - - /* - * We use the ifa reference returned by rt_getifa_fib(). - * This moved from below so that rnh->rnh_addaddr() can - * examine the ifa and ifa->ifa_ifp if it so desires. - */ - ifa = info->rti_ifa; - rt->rt_weight = 1; - - rt_setmetrics(info, rt); - - RIB_WLOCK(rnh); - RT_LOCK(rt); -#ifdef RADIX_MPATH - /* do not permit exactly the same dst/mask/gw pair */ - if (rt_mpath_capable(rnh) && - rt_mpath_conflict(rnh, rt, netmask)) { - RIB_WUNLOCK(rnh); - - nhop_free(nh); - uma_zfree(V_rtzone, rt); - return (EEXIST); - } -#endif - - rn = rnh->rnh_addaddr(ndst, netmask, &rnh->head, rt->rt_nodes); - - if (rn != NULL && rt->rt_expire > 0) - tmproutes_update(rnh, rt); - - rt_old = NULL; - if (rn == NULL && (info->rti_flags & RTF_PINNED) != 0) { - - /* - * Force removal and re-try addition - * TODO: better multipath&pinned support - */ - struct sockaddr *info_dst = info->rti_info[RTAX_DST]; - info->rti_info[RTAX_DST] = ndst; - /* Do not delete existing PINNED(interface) routes */ - info->rti_flags &= ~RTF_PINNED; - rt_old = rt_unlinkrte(rnh, info, &error); - info->rti_flags |= RTF_PINNED; - info->rti_info[RTAX_DST] = info_dst; - if (rt_old != NULL) - rn = rnh->rnh_addaddr(ndst, netmask, &rnh->head, - rt->rt_nodes); - } - RIB_WUNLOCK(rnh); - - if (rt_old != NULL) { - rt_notifydelete(rt_old, info); - rtfree(rt_old); - } - - /* - * If it still failed to go into the tree, - * then un-make it (this should be a function) - */ - if (rn == NULL) { - nhop_free(nh); - uma_zfree(V_rtzone, rt); - return (EEXIST); - } - - /* - * If this protocol has something to add to this then - * allow it to do that as well. - */ - if (ifa->ifa_rtrequest) - ifa->ifa_rtrequest(RTM_ADD, rt, rt->rt_nhop, info); - - /* - * actually return a resultant rtentry - */ - if (ret_nrt) - *ret_nrt = rt; - rnh->rnh_gen++; /* Routing table updated */ - RT_UNLOCK(rt); - - return (0); -} - -static int -del_route(struct rib_head *rnh, struct rt_addrinfo *info, - struct rtentry **ret_nrt) -{ - struct sockaddr *dst, *netmask; - struct sockaddr_storage mdst; - struct rtentry *rt; - int error; - - dst = info->rti_info[RTAX_DST]; - netmask = info->rti_info[RTAX_NETMASK]; - - if (netmask) { - if (dst->sa_len > sizeof(mdst)) - return (EINVAL); - rt_maskedcopy(dst, (struct sockaddr *)&mdst, netmask); - dst = (struct sockaddr *)&mdst; - } - - RIB_WLOCK(rnh); - rt = rt_unlinkrte(rnh, info, &error); - RIB_WUNLOCK(rnh); - if (error != 0) - return (error); - - rt_notifydelete(rt, info); - - /* - * If the caller wants it, then it can have it, - * the entry will be deleted after the end of the current epoch. - */ - if (ret_nrt) - *ret_nrt = rt; - - rtfree(rt); - - return (0); -} - -static int -change_route_one(struct rib_head *rnh, struct rt_addrinfo *info, - struct rtentry **ret_nrt) -{ - RIB_RLOCK_TRACKER; - struct rtentry *rt = NULL; - int error = 0; - int free_ifa = 0; - struct nhop_object *nh, *nh_orig; - - RIB_RLOCK(rnh); - rt = (struct rtentry *)rnh->rnh_lookup(info->rti_info[RTAX_DST], - info->rti_info[RTAX_NETMASK], &rnh->head); - - if (rt == NULL) { - RIB_RUNLOCK(rnh); - return (ESRCH); - } - -#ifdef RADIX_MPATH - /* - * If we got multipath routes, - * we require users to specify a matching RTAX_GATEWAY. - */ - if (rt_mpath_capable(rnh)) { - rt = rt_mpath_matchgate(rt, info->rti_info[RTAX_GATEWAY]); - if (rt == NULL) { - RIB_RUNLOCK(rnh); - return (ESRCH); - } - } -#endif - nh_orig = rt->rt_nhop; - - RIB_RUNLOCK(rnh); - - rt = NULL; - nh = NULL; - - /* - * New gateway could require new ifaddr, ifp; - * flags may also be different; ifp may be specified - * by ll sockaddr when protocol address is ambiguous - */ - if (((nh_orig->nh_flags & NHF_GATEWAY) && - info->rti_info[RTAX_GATEWAY] != NULL) || - info->rti_info[RTAX_IFP] != NULL || - (info->rti_info[RTAX_IFA] != NULL && - !sa_equal(info->rti_info[RTAX_IFA], nh_orig->nh_ifa->ifa_addr))) { - error = rt_getifa_fib(info, rnh->rib_fibnum); - if (info->rti_ifa != NULL) - free_ifa = 1; - - if (error != 0) { - if (free_ifa) { - ifa_free(info->rti_ifa); - info->rti_ifa = NULL; - } - - return (error); - } - } - - error = nhop_create_from_nhop(rnh, nh_orig, info, &nh); - if (free_ifa) { - ifa_free(info->rti_ifa); - info->rti_ifa = NULL; - } - if (error != 0) - return (error); - - RIB_WLOCK(rnh); - - /* Lookup rtentry once again and check if nexthop is still the same */ - rt = (struct rtentry *)rnh->rnh_lookup(info->rti_info[RTAX_DST], - info->rti_info[RTAX_NETMASK], &rnh->head); - - if (rt == NULL) { - RIB_WUNLOCK(rnh); - nhop_free(nh); - return (ESRCH); - } - - if (rt->rt_nhop != nh_orig) { - RIB_WUNLOCK(rnh); - nhop_free(nh); - return (EAGAIN); - } - - /* Proceed with the update */ - RT_LOCK(rt); - - /* Provide notification to the protocols.*/ - if ((nh_orig->nh_ifa != nh->nh_ifa) && nh_orig->nh_ifa->ifa_rtrequest) - nh_orig->nh_ifa->ifa_rtrequest(RTM_DELETE, rt, nh_orig, info); - - rt->rt_nhop = nh; - rt_setmetrics(info, rt); - - if ((nh_orig->nh_ifa != nh->nh_ifa) && nh_orig->nh_ifa->ifa_rtrequest) - nh_orig->nh_ifa->ifa_rtrequest(RTM_DELETE, rt, nh_orig, info); - - if (ret_nrt != NULL) - *ret_nrt = rt; - - RT_UNLOCK(rt); - - /* Update generation id to reflect rtable change */ - rnh->rnh_gen++; - - RIB_WUNLOCK(rnh); - - nhop_free(nh_orig); - - return (0); -} - -static int -change_route(struct rib_head *rnh, struct rt_addrinfo *info, - struct rtentry **ret_nrt) -{ - int error; - - /* Check if updated gateway exists */ - if ((info->rti_flags & RTF_GATEWAY) && - (info->rti_info[RTAX_GATEWAY] == NULL)) - return (EINVAL); - - /* - * route change is done in multiple steps, with dropping and - * reacquiring lock. In the situations with multiple processes - * changes the same route in can lead to the case when route - * is changed between the steps. Address it by retrying the operation - * multiple times before failing. - */ - for (int i = 0; i < RIB_MAX_RETRIES; i++) { - error = change_route_one(rnh, info, ret_nrt); - if (error != EAGAIN) - break; - } - - return (error); -} - - -static void +void rt_setmetrics(const struct rt_addrinfo *info, struct rtentry *rt) { Index: sys/net/route/route_ctl.c =================================================================== --- sys/net/route/route_ctl.c +++ sys/net/route/route_ctl.c @@ -29,6 +29,7 @@ __FBSDID("$FreeBSD$"); #include "opt_inet.h" #include "opt_inet6.h" +#include "opt_mpath.h" #include #include @@ -53,6 +54,10 @@ #include #include +#ifdef RADIX_MPATH +#include +#endif + #include @@ -62,4 +67,587 @@ * All functions assumes they are called in net epoch. */ +static int add_route(struct rib_head *rnh, struct rt_addrinfo *info, + struct rtentry **ret_nrt); +static int del_route(struct rib_head *rnh, struct rt_addrinfo *info, + struct rtentry **ret_nrt); +static int change_route(struct rib_head *, struct rt_addrinfo *, + struct rtentry **); + +static void rt_notifydelete(struct rtentry *rt, struct rt_addrinfo *info); + +int +rtrequest1_fib(int req, struct rt_addrinfo *info, struct rtentry **ret_nrt, + u_int fibnum) +{ + const struct sockaddr *dst; + struct rib_head *rnh; + int error; + + KASSERT((fibnum < rt_numfibs), ("rtrequest1_fib: bad fibnum")); + KASSERT((info->rti_flags & RTF_RNH_LOCKED) == 0, ("rtrequest1_fib: locked")); + NET_EPOCH_ASSERT(); + + dst = info->rti_info[RTAX_DST]; + + switch (dst->sa_family) { + case AF_INET6: + case AF_INET: + /* We support multiple FIBs. */ + break; + default: + fibnum = RT_DEFAULT_FIB; + break; + } + + /* + * Find the correct routing tree to use for this Address Family + */ + rnh = rt_tables_get_rnh(fibnum, dst->sa_family); + if (rnh == NULL) + return (EAFNOSUPPORT); + + /* + * If we are adding a host route then we don't want to put + * a netmask in the tree, nor do we want to clone it. + */ + if (info->rti_flags & RTF_HOST) + info->rti_info[RTAX_NETMASK] = NULL; + + error = 0; + switch (req) { + case RTM_DELETE: + error = del_route(rnh, info, ret_nrt); + break; + case RTM_RESOLVE: + /* + * resolve was only used for route cloning + * here for compat + */ + break; + case RTM_ADD: + error = add_route(rnh, info, ret_nrt); + break; + case RTM_CHANGE: + error = change_route(rnh, info, ret_nrt); + break; + default: + error = EOPNOTSUPP; + } + + return (error); +} + +static int +add_route(struct rib_head *rnh, struct rt_addrinfo *info, + struct rtentry **ret_nrt) +{ + struct sockaddr *dst, *ndst, *gateway, *netmask; + struct rtentry *rt, *rt_old; + struct nhop_object *nh; + struct radix_node *rn; + struct ifaddr *ifa; + int error, flags; + struct epoch_tracker et; + + dst = info->rti_info[RTAX_DST]; + gateway = info->rti_info[RTAX_GATEWAY]; + netmask = info->rti_info[RTAX_NETMASK]; + flags = info->rti_flags; + + if ((flags & RTF_GATEWAY) && !gateway) + return (EINVAL); + if (dst && gateway && (dst->sa_family != gateway->sa_family) && + (gateway->sa_family != AF_UNSPEC) && (gateway->sa_family != AF_LINK)) + return (EINVAL); + + if (dst->sa_len > sizeof(((struct rtentry *)NULL)->rt_dstb)) + return (EINVAL); + + if (info->rti_ifa == NULL) { + error = rt_getifa_fib(info, rnh->rib_fibnum); + if (error) + return (error); + } else { + ifa_ref(info->rti_ifa); + } + + NET_EPOCH_ENTER(et); + error = nhop_create_from_info(rnh, info, &nh); + NET_EPOCH_EXIT(et); + if (error != 0) { + ifa_free(info->rti_ifa); + return (error); + } + + rt = uma_zalloc(V_rtzone, M_NOWAIT); + if (rt == NULL) { + ifa_free(info->rti_ifa); + nhop_free(nh); + return (ENOBUFS); + } + rt->rt_flags = RTF_UP | flags; + rt->rt_nhop = nh; + + /* Fill in dst */ + memcpy(&rt->rt_dst, dst, dst->sa_len); + rt_key(rt) = &rt->rt_dst; + + /* + * point to the (possibly newly malloc'd) dest address. + */ + ndst = (struct sockaddr *)rt_key(rt); + + /* + * make sure it contains the value we want (masked if needed). + */ + if (netmask) { + rt_maskedcopy(dst, ndst, netmask); + } else + bcopy(dst, ndst, dst->sa_len); + + /* + * We use the ifa reference returned by rt_getifa_fib(). + * This moved from below so that rnh->rnh_addaddr() can + * examine the ifa and ifa->ifa_ifp if it so desires. + */ + ifa = info->rti_ifa; + rt->rt_weight = 1; + + rt_setmetrics(info, rt); + + RIB_WLOCK(rnh); + RT_LOCK(rt); +#ifdef RADIX_MPATH + /* do not permit exactly the same dst/mask/gw pair */ + if (rt_mpath_capable(rnh) && + rt_mpath_conflict(rnh, rt, netmask)) { + RIB_WUNLOCK(rnh); + + nhop_free(nh); + uma_zfree(V_rtzone, rt); + return (EEXIST); + } +#endif + + rn = rnh->rnh_addaddr(ndst, netmask, &rnh->head, rt->rt_nodes); + + if (rn != NULL && rt->rt_expire > 0) + tmproutes_update(rnh, rt); + + rt_old = NULL; + if (rn == NULL && (info->rti_flags & RTF_PINNED) != 0) { + + /* + * Force removal and re-try addition + * TODO: better multipath&pinned support + */ + struct sockaddr *info_dst = info->rti_info[RTAX_DST]; + info->rti_info[RTAX_DST] = ndst; + /* Do not delete existing PINNED(interface) routes */ + info->rti_flags &= ~RTF_PINNED; + rt_old = rt_unlinkrte(rnh, info, &error); + info->rti_flags |= RTF_PINNED; + info->rti_info[RTAX_DST] = info_dst; + if (rt_old != NULL) + rn = rnh->rnh_addaddr(ndst, netmask, &rnh->head, + rt->rt_nodes); + } + RIB_WUNLOCK(rnh); + + if (rt_old != NULL) { + rt_notifydelete(rt_old, info); + rtfree(rt_old); + } + + /* + * If it still failed to go into the tree, + * then un-make it (this should be a function) + */ + if (rn == NULL) { + nhop_free(nh); + uma_zfree(V_rtzone, rt); + return (EEXIST); + } + + /* + * If this protocol has something to add to this then + * allow it to do that as well. + */ + if (ifa->ifa_rtrequest) + ifa->ifa_rtrequest(RTM_ADD, rt, rt->rt_nhop, info); + + /* + * actually return a resultant rtentry + */ + if (ret_nrt) + *ret_nrt = rt; + rnh->rnh_gen++; /* Routing table updated */ + RT_UNLOCK(rt); + + return (0); +} + + +/* + * Conditionally unlinks rtentry matching data inside @info from @rnh. + * Returns unlinked, locked and referenced @rtentry on success, + * Returns NULL and sets @perror to: + * ESRCH - if prefix was not found, + * EADDRINUSE - if trying to delete PINNED route without appropriate flag. + * ENOENT - if supplied filter function returned 0 (not matched). + */ +struct rtentry * +rt_unlinkrte(struct rib_head *rnh, struct rt_addrinfo *info, int *perror) +{ + struct sockaddr *dst, *netmask; + struct rtentry *rt; + struct radix_node *rn; + + dst = info->rti_info[RTAX_DST]; + netmask = info->rti_info[RTAX_NETMASK]; + + rt = (struct rtentry *)rnh->rnh_lookup(dst, netmask, &rnh->head); + if (rt == NULL) { + *perror = ESRCH; + return (NULL); + } + + if ((info->rti_flags & RTF_PINNED) == 0) { + /* Check if target route can be deleted */ + if (rt->rt_flags & RTF_PINNED) { + *perror = EADDRINUSE; + return (NULL); + } + } + + if (info->rti_filter != NULL) { + if (info->rti_filter(rt, rt->rt_nhop, info->rti_filterdata)==0){ + /* Not matched */ + *perror = ENOENT; + return (NULL); + } + + /* + * Filter function requested rte deletion. + * Ease the caller work by filling in remaining info + * from that particular entry. + */ + info->rti_info[RTAX_GATEWAY] = &rt->rt_nhop->gw_sa; + } + + /* + * Remove the item from the tree and return it. + * Complain if it is not there and do no more processing. + */ + *perror = ESRCH; +#ifdef RADIX_MPATH + if (rt_mpath_capable(rnh)) + rn = rt_mpath_unlink(rnh, info, rt, perror); + else +#endif + rn = rnh->rnh_deladdr(dst, netmask, &rnh->head); + if (rn == NULL) + return (NULL); + + if (rn->rn_flags & (RNF_ACTIVE | RNF_ROOT)) + panic ("rtrequest delete"); + + rt = RNTORT(rn); + RT_LOCK(rt); + rt->rt_flags &= ~RTF_UP; + + *perror = 0; + + return (rt); +} + +static int +del_route(struct rib_head *rnh, struct rt_addrinfo *info, + struct rtentry **ret_nrt) +{ + struct sockaddr *dst, *netmask; + struct sockaddr_storage mdst; + struct rtentry *rt; + int error; + + dst = info->rti_info[RTAX_DST]; + netmask = info->rti_info[RTAX_NETMASK]; + + if (netmask) { + if (dst->sa_len > sizeof(mdst)) + return (EINVAL); + rt_maskedcopy(dst, (struct sockaddr *)&mdst, netmask); + dst = (struct sockaddr *)&mdst; + } + + RIB_WLOCK(rnh); + rt = rt_unlinkrte(rnh, info, &error); + RIB_WUNLOCK(rnh); + if (error != 0) + return (error); + + rt_notifydelete(rt, info); + + /* + * If the caller wants it, then it can have it, + * the entry will be deleted after the end of the current epoch. + */ + if (ret_nrt) + *ret_nrt = rt; + + rtfree(rt); + + return (0); +} + +static int +change_route_one(struct rib_head *rnh, struct rt_addrinfo *info, + struct rtentry **ret_nrt) +{ + RIB_RLOCK_TRACKER; + struct rtentry *rt = NULL; + int error = 0; + int free_ifa = 0; + struct nhop_object *nh, *nh_orig; + + RIB_RLOCK(rnh); + rt = (struct rtentry *)rnh->rnh_lookup(info->rti_info[RTAX_DST], + info->rti_info[RTAX_NETMASK], &rnh->head); + + if (rt == NULL) { + RIB_RUNLOCK(rnh); + return (ESRCH); + } + +#ifdef RADIX_MPATH + /* + * If we got multipath routes, + * we require users to specify a matching RTAX_GATEWAY. + */ + if (rt_mpath_capable(rnh)) { + rt = rt_mpath_matchgate(rt, info->rti_info[RTAX_GATEWAY]); + if (rt == NULL) { + RIB_RUNLOCK(rnh); + return (ESRCH); + } + } +#endif + nh_orig = rt->rt_nhop; + + RIB_RUNLOCK(rnh); + + rt = NULL; + nh = NULL; + + /* + * New gateway could require new ifaddr, ifp; + * flags may also be different; ifp may be specified + * by ll sockaddr when protocol address is ambiguous + */ + if (((nh_orig->nh_flags & NHF_GATEWAY) && + info->rti_info[RTAX_GATEWAY] != NULL) || + info->rti_info[RTAX_IFP] != NULL || + (info->rti_info[RTAX_IFA] != NULL && + !sa_equal(info->rti_info[RTAX_IFA], nh_orig->nh_ifa->ifa_addr))) { + error = rt_getifa_fib(info, rnh->rib_fibnum); + if (info->rti_ifa != NULL) + free_ifa = 1; + + if (error != 0) { + if (free_ifa) { + ifa_free(info->rti_ifa); + info->rti_ifa = NULL; + } + + return (error); + } + } + + error = nhop_create_from_nhop(rnh, nh_orig, info, &nh); + if (free_ifa) { + ifa_free(info->rti_ifa); + info->rti_ifa = NULL; + } + if (error != 0) + return (error); + + RIB_WLOCK(rnh); + + /* Lookup rtentry once again and check if nexthop is still the same */ + rt = (struct rtentry *)rnh->rnh_lookup(info->rti_info[RTAX_DST], + info->rti_info[RTAX_NETMASK], &rnh->head); + + if (rt == NULL) { + RIB_WUNLOCK(rnh); + nhop_free(nh); + return (ESRCH); + } + + if (rt->rt_nhop != nh_orig) { + RIB_WUNLOCK(rnh); + nhop_free(nh); + return (EAGAIN); + } + + /* Proceed with the update */ + RT_LOCK(rt); + + /* Provide notification to the protocols.*/ + if ((nh_orig->nh_ifa != nh->nh_ifa) && nh_orig->nh_ifa->ifa_rtrequest) + nh_orig->nh_ifa->ifa_rtrequest(RTM_DELETE, rt, nh_orig, info); + + rt->rt_nhop = nh; + rt_setmetrics(info, rt); + + if ((nh_orig->nh_ifa != nh->nh_ifa) && nh_orig->nh_ifa->ifa_rtrequest) + nh_orig->nh_ifa->ifa_rtrequest(RTM_DELETE, rt, nh_orig, info); + + if (ret_nrt != NULL) + *ret_nrt = rt; + + RT_UNLOCK(rt); + + /* Update generation id to reflect rtable change */ + rnh->rnh_gen++; + + RIB_WUNLOCK(rnh); + + nhop_free(nh_orig); + + return (0); +} + +static int +change_route(struct rib_head *rnh, struct rt_addrinfo *info, + struct rtentry **ret_nrt) +{ + int error; + + /* Check if updated gateway exists */ + if ((info->rti_flags & RTF_GATEWAY) && + (info->rti_info[RTAX_GATEWAY] == NULL)) + return (EINVAL); + + /* + * route change is done in multiple steps, with dropping and + * reacquiring lock. In the situations with multiple processes + * changes the same route in can lead to the case when route + * is changed between the steps. Address it by retrying the operation + * multiple times before failing. + */ + for (int i = 0; i < RIB_MAX_RETRIES; i++) { + error = change_route_one(rnh, info, ret_nrt); + if (error != EAGAIN) + break; + } + + return (error); +} + +static void +rt_notifydelete(struct rtentry *rt, struct rt_addrinfo *info) +{ + struct ifaddr *ifa; + + /* + * give the protocol a chance to keep things in sync. + */ + ifa = rt->rt_nhop->nh_ifa; + if (ifa != NULL && ifa->ifa_rtrequest != NULL) + ifa->ifa_rtrequest(RTM_DELETE, rt, rt->rt_nhop, info); +} + +struct rt_delinfo +{ + struct rt_addrinfo info; + struct rib_head *rnh; + struct rtentry *head; +}; + +/* + * Conditionally unlinks @rn from radix tree based + * on info data passed in @arg. + */ +static int +rt_checkdelroute(struct radix_node *rn, void *arg) +{ + struct rt_delinfo *di; + struct rt_addrinfo *info; + struct rtentry *rt; + int error; + + di = (struct rt_delinfo *)arg; + rt = (struct rtentry *)rn; + info = &di->info; + error = 0; + + info->rti_info[RTAX_DST] = rt_key(rt); + info->rti_info[RTAX_NETMASK] = rt_mask(rt); + info->rti_info[RTAX_GATEWAY] = &rt->rt_nhop->gw_sa; + + rt = rt_unlinkrte(di->rnh, info, &error); + if (rt == NULL) { + /* Either not allowed or not matched. Skip entry */ + return (0); + } + + /* Entry was unlinked. Add to the list and return */ + rt->rt_chain = di->head; + di->head = rt; + + return (0); +} + +/* + * Iterates over a routing table specified by @fibnum and @family and + * deletes elements marked by @filter_f. + * @fibnum: rtable id + * @family: AF_ address family + * @filter_f: function returning non-zero value for items to delete + * @arg: data to pass to the @filter_f function + * @report: true if rtsock notification is needed. + */ +void +rib_walk_del(u_int fibnum, int family, rt_filter_f_t *filter_f, void *arg, bool report) +{ + struct rib_head *rnh; + struct rt_delinfo di; + struct rtentry *rt; + + rnh = rt_tables_get_rnh(fibnum, family); + if (rnh == NULL) + return; + + bzero(&di, sizeof(di)); + di.info.rti_filter = filter_f; + di.info.rti_filterdata = arg; + di.rnh = rnh; + + RIB_WLOCK(rnh); + rnh->rnh_walktree(&rnh->head, rt_checkdelroute, &di); + RIB_WUNLOCK(rnh); + + if (di.head == NULL) + return; + + /* We might have something to reclaim. */ + while (di.head != NULL) { + rt = di.head; + di.head = rt->rt_chain; + rt->rt_chain = NULL; + + /* TODO std rt -> rt_addrinfo export */ + di.info.rti_info[RTAX_DST] = rt_key(rt); + di.info.rti_info[RTAX_NETMASK] = rt_mask(rt); + + rt_notifydelete(rt, &di.info); + + if (report) + rt_routemsg(RTM_DELETE, rt, rt->rt_nhop->nh_ifp, 0, + fibnum); + rtfree(rt); + } +} + Index: sys/net/route/route_var.h =================================================================== --- sys/net/route/route_var.h +++ sys/net/route/route_var.h @@ -104,12 +104,31 @@ struct rib_head *rt_tables_get_rnh(int fib, int family); void rt_mpath_init_rnh(struct rib_head *rnh); +int rt_getifa_fib(struct rt_addrinfo *info, u_int fibnum); +void rt_setmetrics(const struct rt_addrinfo *info, struct rtentry *rt); +#ifdef RADIX_MPATH +static struct radix_node *rt_mpath_unlink(struct rib_head *rnh, + struct rt_addrinfo *info, struct rtentry *rto, int *perror); +#endif VNET_PCPUSTAT_DECLARE(struct rtstat, rtstat); #define RTSTAT_ADD(name, val) \ VNET_PCPUSTAT_ADD(struct rtstat, rtstat, name, (val)) #define RTSTAT_INC(name) RTSTAT_ADD(name, 1) + +/* + * Convert a 'struct radix_node *' to a 'struct rtentry *'. + * The operation can be done safely (in this code) because a + * 'struct rtentry' starts with two 'struct radix_node''s, the first + * one representing leaf nodes in the routing tree, which is + * what the code in radix.c passes us as a 'struct radix_node'. + * + * But because there are a lot of assumptions in this conversion, + * do not cast explicitly, but always use the macro below. + */ +#define RNTORT(p) ((struct rtentry *)(p)) + struct rtentry { struct radix_node rt_nodes[2]; /* tree glue, and other values */ /* Index: sys/net/route/shared.h =================================================================== --- sys/net/route/shared.h +++ sys/net/route/shared.h @@ -37,6 +37,8 @@ #ifndef _NET_ROUTE_SHARED_H_ #define _NET_ROUTE_SHARED_H_ +#include + #ifdef RTDEBUG #define DPRINTF(_fmt, ...) printf("%s: " _fmt "\n", __func__ , ## __VA_ARGS__) #else @@ -64,5 +66,12 @@ void nhops_update_ifmtu(struct rib_head *rh, struct ifnet *ifp, uint32_t mtu); int nhops_dump_sysctl(struct rib_head *rh, struct sysctl_req *w); +/* route */ +VNET_DECLARE(uma_zone_t, rtzone); /* Routing table UMA zone. */ +#define V_rtzone VNET(rtzone) + +struct rtentry *rt_unlinkrte(struct rib_head *rnh, struct rt_addrinfo *info, + int *perror); + #endif Index: sys/netinet/in_fib.c =================================================================== --- sys/netinet/in_fib.c +++ sys/netinet/in_fib.c @@ -71,7 +71,6 @@ static void fib4_rte_to_nh_extended(struct nhop_object *nh, struct in_addr dst, uint32_t flags, struct nhop4_extended *pnh4); -#define RNTORT(p) ((struct rtentry *)(p)) static void fib4_rte_to_nh_basic(struct nhop_object *nh, struct in_addr dst, Index: sys/netinet6/in6_fib.c =================================================================== --- sys/netinet6/in6_fib.c +++ sys/netinet6/in6_fib.c @@ -74,7 +74,6 @@ const struct in6_addr *dst, uint32_t flags, struct nhop6_extended *pnh6); static void fib6_rte_to_nh_basic(const struct nhop_object *nh, const struct in6_addr *dst, uint32_t flags, struct nhop6_basic *pnh6); -#define RNTORT(p) ((struct rtentry *)(p)) #define ifatoia6(ifa) ((struct in6_ifaddr *)(ifa))