Index: head/sys/net/route.c =================================================================== --- head/sys/net/route.c +++ head/sys/net/route.c @@ -162,14 +162,6 @@ static int change_route(struct rib_head *, struct rt_addrinfo *, struct rtentry **); -struct if_mtuinfo -{ - struct ifnet *ifp; - int mtu; -}; - -static int if_updatemtu_cb(struct radix_node *, void *); - /* * handler for net.my_fibnum */ @@ -560,12 +552,6 @@ } #endif /* - * release references on items we hold them on.. - * e.g other routes and ifaddrs. - */ - if (rt->rt_ifa) - ifa_free(rt->rt_ifa); - /* * The key is separatly alloc'd so free it (see rt_setgate()). * This also frees the gateway, as they are always malloc'd * together. @@ -1346,66 +1332,25 @@ return (error); } -static int -if_updatemtu_cb(struct radix_node *rn, void *arg) -{ - struct rtentry *rt; - struct if_mtuinfo *ifmtu; - - rt = (struct rtentry *)rn; - ifmtu = (struct if_mtuinfo *)arg; - - if (rt->rt_ifp != ifmtu->ifp) - return (0); - - if (rt->rt_mtu >= ifmtu->mtu) { - /* We have to decrease mtu regardless of flags */ - rt->rt_mtu = ifmtu->mtu; - return (0); - } - - /* - * New MTU is bigger. Check if are allowed to alter it - */ - if ((rt->rt_flags & (RTF_FIXEDMTU | RTF_GATEWAY | RTF_HOST)) != 0) { - - /* - * Skip routes with user-supplied MTU and - * non-interface routes - */ - return (0); - } - - /* We are safe to update route MTU */ - rt->rt_mtu = ifmtu->mtu; - - return (0); -} - void rt_updatemtu(struct ifnet *ifp) { - struct if_mtuinfo ifmtu; struct rib_head *rnh; + int mtu; int i, j; - ifmtu.ifp = ifp; - /* * Try to update rt_mtu for all routes using this interface * Unfortunately the only way to do this is to traverse all * routing tables in all fibs/domains. */ for (i = 1; i <= AF_MAX; i++) { - ifmtu.mtu = if_getmtu_family(ifp, i); + mtu = if_getmtu_family(ifp, i); for (j = 0; j < rt_numfibs; j++) { rnh = rt_tables_get_rnh(j, i); if (rnh == NULL) continue; - RIB_WLOCK(rnh); - rnh->rnh_walktree(&rnh->head, if_updatemtu_cb, &ifmtu); - RIB_WUNLOCK(rnh); - nhops_update_ifmtu(rnh, ifp, ifmtu.mtu); + nhops_update_ifmtu(rnh, ifp, mtu); } } } @@ -1550,7 +1495,6 @@ rtrequest1_fib(int req, struct rt_addrinfo *info, struct rtentry **ret_nrt, u_int fibnum) { - struct epoch_tracker et; const struct sockaddr *dst; struct rib_head *rnh; int error; @@ -1599,11 +1543,7 @@ error = add_route(rnh, info, ret_nrt); break; case RTM_CHANGE: - NET_EPOCH_ENTER(et); - RIB_WLOCK(rnh); error = change_route(rnh, info, ret_nrt); - RIB_WUNLOCK(rnh); - NET_EPOCH_EXIT(et); break; default: error = EOPNOTSUPP; @@ -1689,8 +1629,6 @@ * examine the ifa and ifa->ifa_ifp if it so desires. */ ifa = info->rti_ifa; - rt->rt_ifa = ifa; - rt->rt_ifp = ifa->ifa_ifp; rt->rt_weight = 1; rt_setmetrics(info, rt); @@ -1703,7 +1641,6 @@ rt_mpath_conflict(rnh, rt, netmask)) { RIB_WUNLOCK(rnh); - ifa_free(rt->rt_ifa); R_Free(rt_key(rt)); nhop_free(nh); uma_zfree(V_rtzone, rt); @@ -1711,7 +1648,6 @@ } #endif - /* XXX mtu manipulation will be done in rnh_addaddr -- itojun */ rn = rnh->rnh_addaddr(ndst, netmask, &rnh->head, rt->rt_nodes); if (rn != NULL && rt->rt_expire > 0) @@ -1745,7 +1681,6 @@ * then un-make it (this should be a function) */ if (rn == NULL) { - ifa_free(rt->rt_ifa); R_Free(rt_key(rt)); nhop_free(nh); uma_zfree(V_rtzone, rt); @@ -1820,23 +1755,23 @@ } static int -change_route(struct rib_head *rnh, struct rt_addrinfo *info, +change_route_one(struct rib_head *rnh, struct rt_addrinfo *info, struct rtentry **ret_nrt) { + RIB_RLOCK_TRACKER; struct rtentry *rt = NULL; int error = 0; int free_ifa = 0; - int family, mtu; - struct nhop_object *nh; - struct if_mtuinfo ifmtu; + struct nhop_object *nh, *nh_orig; - RIB_WLOCK_ASSERT(rnh); - + RIB_RLOCK(rnh); rt = (struct rtentry *)rnh->rnh_lookup(info->rti_info[RTAX_DST], info->rti_info[RTAX_NETMASK], &rnh->head); - if (rt == NULL) + if (rt == NULL) { + RIB_RUNLOCK(rnh); return (ESRCH); + } #ifdef RADIX_MPATH /* @@ -1845,139 +1780,135 @@ */ if (rt_mpath_capable(rnh)) { rt = rt_mpath_matchgate(rt, info->rti_info[RTAX_GATEWAY]); - if (rt == NULL) + if (rt == NULL) { + RIB_RUNLOCK(rnh); return (ESRCH); + } } #endif + nh_orig = rt->rt_nhop; + RIB_RUNLOCK(rnh); + + rt = NULL; nh = NULL; - RT_LOCK(rt); - rt_setmetrics(info, rt); - /* * New gateway could require new ifaddr, ifp; * flags may also be different; ifp may be specified * by ll sockaddr when protocol address is ambiguous */ - if (((rt->rt_flags & RTF_GATEWAY) && + if (((nh_orig->nh_flags & NHF_GATEWAY) && info->rti_info[RTAX_GATEWAY] != NULL) || info->rti_info[RTAX_IFP] != NULL || (info->rti_info[RTAX_IFA] != NULL && - !sa_equal(info->rti_info[RTAX_IFA], rt->rt_nhop->nh_ifa->ifa_addr))) { - /* - * XXX: Temporarily set RTF_RNH_LOCKED flag in the rti_flags - * to avoid rlock in the ifa_ifwithroute(). - */ - info->rti_flags |= RTF_RNH_LOCKED; + !sa_equal(info->rti_info[RTAX_IFA], nh_orig->nh_ifa->ifa_addr))) { error = rt_getifa_fib(info, rnh->rib_fibnum); - info->rti_flags &= ~RTF_RNH_LOCKED; if (info->rti_ifa != NULL) free_ifa = 1; - if (error != 0) - goto bad; + if (error != 0) { + if (free_ifa) { + ifa_free(info->rti_ifa); + info->rti_ifa = NULL; + } + + return (error); + } } - error = nhop_create_from_nhop(rnh, rt->rt_nhop, info, &nh); + error = nhop_create_from_nhop(rnh, nh_orig, info, &nh); + if (free_ifa) { + ifa_free(info->rti_ifa); + info->rti_ifa = NULL; + } if (error != 0) - goto bad; + return (error); - /* Check if outgoing interface has changed */ - if (info->rti_ifa != NULL && info->rti_ifa != rt->rt_ifa && - rt->rt_ifa != NULL) { - if (rt->rt_ifa->ifa_rtrequest != NULL) - rt->rt_ifa->ifa_rtrequest(RTM_DELETE, rt, rt->rt_nhop, - info); - ifa_free(rt->rt_ifa); - rt->rt_ifa = NULL; - } - /* Update gateway address */ - if (info->rti_info[RTAX_GATEWAY] != NULL) { - error = rt_setgate(rt, rt_key(rt), info->rti_info[RTAX_GATEWAY]); - if (error != 0) - goto bad; + RIB_WLOCK(rnh); - rt->rt_flags &= ~RTF_GATEWAY; - rt->rt_flags |= (RTF_GATEWAY & info->rti_flags); + /* Lookup rtentry once again and check if nexthop is still the same */ + rt = (struct rtentry *)rnh->rnh_lookup(info->rti_info[RTAX_DST], + info->rti_info[RTAX_NETMASK], &rnh->head); + + if (rt == NULL) { + RIB_WUNLOCK(rnh); + nhop_free(nh); + return (ESRCH); } - if (info->rti_ifa != NULL && info->rti_ifa != rt->rt_ifa) { - ifa_ref(info->rti_ifa); - rt->rt_ifa = info->rti_ifa; - rt->rt_ifp = info->rti_ifp; + if (rt->rt_nhop != nh_orig) { + RIB_WUNLOCK(rnh); + nhop_free(nh); + return (EAGAIN); } - /* Allow some flags to be toggled on change. */ - rt->rt_flags &= ~RTF_FMASK; - rt->rt_flags |= info->rti_flags & RTF_FMASK; - if (rt->rt_ifa && rt->rt_ifa->ifa_rtrequest != NULL) - rt->rt_ifa->ifa_rtrequest(RTM_ADD, rt, nh, info); + /* Proceed with the update */ + RT_LOCK(rt); - /* Alter route MTU if necessary */ - if (rt->rt_ifp != NULL) { - family = info->rti_info[RTAX_DST]->sa_family; - mtu = if_getmtu_family(rt->rt_ifp, family); - /* Set default MTU */ - if (rt->rt_mtu == 0) - rt->rt_mtu = mtu; - if (rt->rt_mtu != mtu) { - /* Check if we really need to update */ - ifmtu.ifp = rt->rt_ifp; - ifmtu.mtu = mtu; - if_updatemtu_cb(rt->rt_nodes, &ifmtu); - } - } + /* Provide notification to the protocols.*/ + if ((nh_orig->nh_ifa != nh->nh_ifa) && nh_orig->nh_ifa->ifa_rtrequest) + nh_orig->nh_ifa->ifa_rtrequest(RTM_DELETE, rt, nh_orig, info); - /* Update nexthop */ - nhop_free(rt->rt_nhop); rt->rt_nhop = nh; - nh = NULL; + rt_setmetrics(info, rt); - /* - * This route change may have modified the route's gateway. In that - * case, any inpcbs that have cached this route need to invalidate their - * llentry cache. - */ - rnh->rnh_gen++; + if ((nh_orig->nh_ifa != nh->nh_ifa) && nh_orig->nh_ifa->ifa_rtrequest) + nh_orig->nh_ifa->ifa_rtrequest(RTM_DELETE, rt, nh_orig, info); - if (ret_nrt) { + if (ret_nrt != NULL) { *ret_nrt = rt; RT_ADDREF(rt); } -bad: + RT_UNLOCK(rt); - if (nh != NULL) - nhop_free(nh); - if (free_ifa != 0) { - ifa_free(info->rti_ifa); - info->rti_ifa = NULL; + + /* Update generation id to reflect rtable change */ + rnh->rnh_gen++; + + RIB_WUNLOCK(rnh); + + nhop_free(nh_orig); + + return (0); +} + +static int +change_route(struct rib_head *rnh, struct rt_addrinfo *info, + struct rtentry **ret_nrt) +{ + struct epoch_tracker et; + int error; + + /* Check if updated gateway exists */ + if ((info->rti_flags & RTF_GATEWAY) && + (info->rti_info[RTAX_GATEWAY] == NULL)) + return (EINVAL); + + NET_EPOCH_ENTER(et); + + /* + * route change is done in multiple steps, with dropping and + * reacquiring lock. In the situations with multiple processes + * changes the same route in can lead to the case when route + * is changed between the steps. Address it by retrying the operation + * multiple times before failing. + */ + for (int i = 0; i < RIB_MAX_RETRIES; i++) { + error = change_route_one(rnh, info, ret_nrt); + if (error != EAGAIN) + break; } + NET_EPOCH_EXIT(et); + return (error); } + static void rt_setmetrics(const struct rt_addrinfo *info, struct rtentry *rt) { - if (info->rti_mflags & RTV_MTU) { - if (info->rti_rmx->rmx_mtu != 0) { - - /* - * MTU was explicitly provided by user. - * Keep it. - */ - rt->rt_flags |= RTF_FIXEDMTU; - } else { - - /* - * User explicitly sets MTU to 0. - * Assume rollback to default. - */ - rt->rt_flags &= ~RTF_FIXEDMTU; - } - rt->rt_mtu = info->rti_rmx->rmx_mtu; - } if (info->rti_mflags & RTV_WEIGHT) rt->rt_weight = info->rti_rmx->rmx_weight; /* Kernel -> userland timebase conversion. */ @@ -1999,7 +1930,7 @@ * rt_gateway already points to the right place. * Otherwise, malloc a new block and update the 'dst' address. */ - if (rt->rt_gateway == NULL || glen > SA_SIZE(rt->rt_gateway)) { + if (rt_key(rt) == NULL) { caddr_t new; R_Malloc(new, caddr_t, dlen + glen); @@ -2015,14 +1946,8 @@ bcopy(dst, new, dlen); R_Free(rt_key(rt)); /* free old block, if any */ rt_key(rt) = (struct sockaddr *)new; - rt->rt_gateway = (struct sockaddr *)(new + dlen); } - /* - * Copy the new gateway value into the memory chunk. - */ - bcopy(gate, rt->rt_gateway, glen); - return (0); } @@ -2149,8 +2074,8 @@ else { rt = RNTORT(rn); /* - * for interface route the - * rt->rt_gateway is sockaddr_dl, so + * for interface route the gateway + * gateway is sockaddr_dl, so * rt_mpath_matchgate must use the * interface address */ @@ -2192,22 +2117,8 @@ * notify any listening routing agents of the change */ RT_LOCK(rt); -#ifdef RADIX_MPATH - /* - * in case address alias finds the first address - * e.g. ifconfig bge0 192.0.2.246/24 - * e.g. ifconfig bge0 192.0.2.247/24 - * the address set in the route is 192.0.2.246 - * so we need to replace it with 192.0.2.247 - */ - if (memcmp(rt->rt_ifa->ifa_addr, - ifa->ifa_addr, ifa->ifa_addr->sa_len)) { - ifa_free(rt->rt_ifa); - ifa_ref(ifa); - rt->rt_ifp = ifa->ifa_ifp; - rt->rt_ifa = ifa; - } -#endif + + /* TODO: interface routes/aliases */ RT_ADDREF(rt); RT_UNLOCK(rt); rt_newaddrmsg_fib(cmd, ifa, rt, fibnum); Index: head/sys/net/route/route_var.h =================================================================== --- head/sys/net/route/route_var.h +++ head/sys/net/route/route_var.h @@ -74,6 +74,9 @@ #define RIB_LOCK_ASSERT(rh) rm_assert(&(rh)->rib_lock, RA_LOCKED) #define RIB_WLOCK_ASSERT(rh) rm_assert(&(rh)->rib_lock, RA_WLOCKED) +/* Constants */ +#define RIB_MAX_RETRIES 3 + /* Macro for verifying fields in af-specific 'struct route' structures */ #define CHK_STRUCT_FIELD_GENERIC(_s1, _f1, _s2, _f2) \ _Static_assert(sizeof(((_s1 *)0)->_f1) == sizeof(((_s2 *)0)->_f2), \ @@ -117,14 +120,10 @@ #define rt_mask(r) (*((struct sockaddr **)(&(r)->rt_nodes->rn_mask))) #define rt_key_const(r) (*((const struct sockaddr * const *)(&(r)->rt_nodes->rn_key))) #define rt_mask_const(r) (*((const struct sockaddr * const *)(&(r)->rt_nodes->rn_mask))) - struct sockaddr *rt_gateway; /* value */ - struct ifnet *rt_ifp; /* the answer: interface to use */ - struct ifaddr *rt_ifa; /* the answer: interface address to use */ struct nhop_object *rt_nhop; /* nexthop data */ int rt_flags; /* up/down?, host/net */ int rt_refcnt; /* # held references */ u_int rt_fibnum; /* which FIB */ - u_long rt_mtu; /* MTU for this path */ u_long rt_weight; /* absolute weight */ u_long rt_expire; /* lifetime for route, e.g. redirect */ #define rt_endzero rt_pksent Index: head/sys/netinet/in_rmx.c =================================================================== --- head/sys/netinet/in_rmx.c +++ head/sys/netinet/in_rmx.c @@ -120,63 +120,6 @@ return (0); } -/* - * Do what we need to do when inserting a route. - */ -static struct radix_node * -in_addroute(void *v_arg, void *n_arg, struct radix_head *head, - struct radix_node *treenodes) -{ - struct rtentry *rt = (struct rtentry *)treenodes; - struct sockaddr_in *sin = (struct sockaddr_in *)rt_key(rt); - - /* - * A little bit of help for both IP output and input: - * For host routes, we make sure that RTF_BROADCAST - * is set for anything that looks like a broadcast address. - * This way, we can avoid an expensive call to in_broadcast() - * in ip_output() most of the time (because the route passed - * to ip_output() is almost always a host route). - * - * We also do the same for local addresses, with the thought - * that this might one day be used to speed up ip_input(). - * - * We also mark routes to multicast addresses as such, because - * it's easy to do and might be useful (but this is much more - * dubious since it's so easy to inspect the address). - */ - if (rt->rt_flags & RTF_HOST) { - struct epoch_tracker et; - bool bcast; - - NET_EPOCH_ENTER(et); - bcast = in_broadcast(sin->sin_addr, rt->rt_ifp); - NET_EPOCH_EXIT(et); - if (bcast) - rt->rt_flags |= RTF_BROADCAST; - else if (satosin(rt->rt_ifa->ifa_addr)->sin_addr.s_addr == - sin->sin_addr.s_addr) - rt->rt_flags |= RTF_LOCAL; - } - if (IN_MULTICAST(ntohl(sin->sin_addr.s_addr))) - rt->rt_flags |= RTF_MULTICAST; - - if (rt->rt_ifp != NULL) { - - /* - * Check route MTU: - * inherit interface MTU if not set or - * check if MTU is too large. - */ - if (rt->rt_mtu == 0) { - rt->rt_mtu = rt->rt_ifp->if_mtu; - } else if (rt->rt_mtu > rt->rt_ifp->if_mtu) - rt->rt_mtu = rt->rt_ifp->if_mtu; - } - - return (rn_addroute(v_arg, n_arg, head, treenodes)); -} - static int _in_rt_was_here; /* * Initialize our routing tree. @@ -191,7 +134,6 @@ return (0); rh->rnh_preadd = rib4_preadd; - rh->rnh_addaddr = in_addroute; #ifdef RADIX_MPATH rt_mpath_init_rnh(rh); #endif Index: head/sys/netinet6/in6_rmx.c =================================================================== --- head/sys/netinet6/in6_rmx.c +++ head/sys/netinet6/in6_rmx.c @@ -143,57 +143,6 @@ } /* - * Do what we need to do when inserting a route. - */ -static struct radix_node * -in6_addroute(void *v_arg, void *n_arg, struct radix_head *head, - struct radix_node *treenodes) -{ - struct rtentry *rt = (struct rtentry *)treenodes; - struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *)rt_key(rt); - - if (IN6_IS_ADDR_MULTICAST(&sin6->sin6_addr)) - rt->rt_flags |= RTF_MULTICAST; - - /* - * A little bit of help for both IPv6 output and input: - * For local addresses, we make sure that RTF_LOCAL is set, - * with the thought that this might one day be used to speed up - * ip_input(). - * - * We also mark routes to multicast addresses as such, because - * it's easy to do and might be useful (but this is much more - * dubious since it's so easy to inspect the address). (This - * is done above.) - * - * XXX - * should elaborate the code. - */ - if (rt->rt_flags & RTF_HOST) { - if (IN6_ARE_ADDR_EQUAL(&satosin6(rt->rt_ifa->ifa_addr) - ->sin6_addr, - &sin6->sin6_addr)) { - rt->rt_flags |= RTF_LOCAL; - } - } - - if (rt->rt_ifp != NULL) { - - /* - * Check route MTU: - * inherit interface MTU if not set or - * check if MTU is too large. - */ - if (rt->rt_mtu == 0) { - rt->rt_mtu = IN6_LINKMTU(rt->rt_ifp); - } else if (rt->rt_mtu > IN6_LINKMTU(rt->rt_ifp)) - rt->rt_mtu = IN6_LINKMTU(rt->rt_ifp); - } - - return (rn_addroute(v_arg, n_arg, head, treenodes)); -} - -/* * Initialize our routing tree. */ @@ -207,7 +156,6 @@ if (rh == NULL) return (0); - rh->rnh_addaddr = in6_addroute; rh->rnh_preadd = rib6_preadd; #ifdef RADIX_MPATH rt_mpath_init_rnh(rh);