Index: sys/fs/nfsclient/nfs_clvfsops.c =================================================================== --- sys/fs/nfsclient/nfs_clvfsops.c +++ sys/fs/nfsclient/nfs_clvfsops.c @@ -465,18 +465,21 @@ if (nd->mygateway.sin_len != 0 && nd->mygateway.sin_addr.s_addr != 0) { struct sockaddr_in mask, sin; + struct epoch_tracker et; bzero((caddr_t)&mask, sizeof(mask)); sin = mask; sin.sin_family = AF_INET; sin.sin_len = sizeof(sin); /* XXX MRT use table 0 for this sort of thing */ + NET_EPOCH_ENTER(et); CURVNET_SET(TD_TO_VNET(td)); error = rtrequest_fib(RTM_ADD, (struct sockaddr *)&sin, (struct sockaddr *)&nd->mygateway, (struct sockaddr *)&mask, RTF_UP | RTF_GATEWAY, NULL, RT_DEFAULT_FIB); CURVNET_RESTORE(); + NET_EPOCH_EXIT(et); if (error) panic("nfs_mountroot: RTM_ADD: %d", error); } Index: sys/net/if.c =================================================================== --- sys/net/if.c +++ sys/net/if.c @@ -1854,18 +1854,17 @@ ifp = ifa->ifa_ifp; + NET_EPOCH_ENTER(et); bzero(&info, sizeof(info)); if (cmd != RTM_DELETE) info.rti_ifp = V_loif; if (cmd == RTM_ADD) { /* explicitly specify (loopback) ifa */ if (info.rti_ifp != NULL) { - NET_EPOCH_ENTER(et); rti_ifa = ifaof_ifpforaddr(ifa->ifa_addr, info.rti_ifp); if (rti_ifa != NULL) ifa_ref(rti_ifa); info.rti_ifa = rti_ifa; - NET_EPOCH_EXIT(et); } } info.rti_flags = ifa->ifa_flags | RTF_HOST | RTF_STATIC | RTF_PINNED; @@ -1874,6 +1873,7 @@ link_init_sdl(ifp, (struct sockaddr *)&null_sdl, ifp->if_type); error = rtrequest1_fib(cmd, &info, NULL, ifp->if_fib); + NET_EPOCH_EXIT(et); if (rti_ifa != NULL) ifa_free(rti_ifa); Index: sys/net/route.c =================================================================== --- sys/net/route.c +++ sys/net/route.c @@ -120,9 +120,6 @@ VNET_DEFINE(struct rib_head *, rt_tables); #define V_rt_tables VNET(rt_tables) -VNET_DEFINE(int, rttrash); /* routes not in table but not freed */ -#define V_rttrash VNET(rttrash) - /* * Convert a 'struct radix_node *' to a 'struct rtentry *'. @@ -148,6 +145,7 @@ static struct rtentry *rt_unlinkrte(struct rib_head *rnh, struct rt_addrinfo *info, int *perror); static void rt_notifydelete(struct rtentry *rt, struct rt_addrinfo *info); +static void destroy_rtentry_epoch(epoch_context_t ctx); #ifdef RADIX_MPATH static struct radix_node *rt_mpath_unlink(struct rib_head *rnh, struct rt_addrinfo *info, struct rtentry *rto, int *perror); @@ -332,6 +330,16 @@ } } + /* + * dom_rtdetach calls rt_table_destroy(), which + * schedules deletion for all rtentries, nexthops and control + * structures. Wait for the destruction callbacks to fire. + * Note that this should result in freeing all rtentries, but + * nexthops deletions will be scheduled for the next epoch run + * and will be completed after vnet teardown. + */ + epoch_drain_callbacks(net_epoch_preempt); + free(V_rt_tables, M_RTABLE); uma_zdestroy(V_rtzone); } @@ -449,31 +457,56 @@ if ((rt->rt_flags & RTF_UP) == 0) { if (rt->rt_nodes->rn_flags & (RNF_ACTIVE | RNF_ROOT)) panic("rtfree 2"); - /* - * the rtentry must have been removed from the routing table - * so it is represented in rttrash.. remove that now. - */ - V_rttrash--; #ifdef DIAGNOSTIC if (rt->rt_refcnt < 0) { printf("rtfree: %p not freed (neg refs)\n", rt); goto done; } #endif - - /* Unreference nexthop */ - nhop_free(rt->rt_nhop); + epoch_call(net_epoch_preempt, destroy_rtentry_epoch, + &rt->rt_epoch_ctx); /* - * and the rtentry itself of course + * FALLTHROUGH to RT_UNLOCK() so the reporting functions + * have consistent behaviour of operating on unlocked entry. */ - uma_zfree(V_rtzone, rt); - return; } done: RT_UNLOCK(rt); } +static void +destroy_rtentry(struct rtentry *rt) +{ + + /* + * At this moment rnh, nh_control may be already freed. + * nhop interface may have been migrated to a different vnet. + * Use vnet stored in the nexthop to delete the entry. + */ + CURVNET_SET(nhop_get_vnet(rt->rt_nhop)); + + /* Unreference nexthop */ + nhop_free(rt->rt_nhop); + + uma_zfree(V_rtzone, rt); + + CURVNET_RESTORE(); +} + +/* + * Epoch callback indicating rtentry is safe to destroy + */ +static void +destroy_rtentry_epoch(epoch_context_t ctx) +{ + struct rtentry *rt; + + rt = __containerof(ctx, struct rtentry, rt_epoch_ctx); + + destroy_rtentry(rt); +} + /* * Temporary RTFREE() function wrapper. * Intended to use in control plane code to @@ -546,7 +579,7 @@ RT_LOCK(rt); flags = rt->rt_flags; - RTFREE_LOCKED(rt); + RT_UNLOCK(rt); RTSTAT_INC(rts_dynamic); @@ -1112,13 +1145,6 @@ ifa = rt->rt_nhop->nh_ifa; if (ifa != NULL && ifa->ifa_rtrequest != NULL) ifa->ifa_rtrequest(RTM_DELETE, rt, rt->rt_nhop, info); - - /* - * One more rtentry floating around that is not - * linked to the routing table. rttrash will be decremented - * when RTFREE(rt) is eventually called. - */ - V_rttrash++; } @@ -1386,6 +1412,7 @@ KASSERT((fibnum < rt_numfibs), ("rtrequest1_fib: bad fibnum")); KASSERT((info->rti_flags & RTF_RNH_LOCKED) == 0, ("rtrequest1_fib: locked")); + NET_EPOCH_ASSERT(); dst = info->rti_info[RTAX_DST]; @@ -1580,13 +1607,10 @@ ifa->ifa_rtrequest(RTM_ADD, rt, rt->rt_nhop, info); /* - * actually return a resultant rtentry and - * give the caller a single reference. + * actually return a resultant rtentry */ - if (ret_nrt) { + if (ret_nrt) *ret_nrt = rt; - RT_ADDREF(rt); - } rnh->rnh_gen++; /* Routing table updated */ RT_UNLOCK(rt); @@ -1622,15 +1646,13 @@ /* * If the caller wants it, then it can have it, - * but it's up to it to free the rtentry as we won't be - * doing it. + * the entry will be deleted after the end of the current epoch. */ - if (ret_nrt) { + if (ret_nrt) *ret_nrt = rt; - RT_UNLOCK(rt); - } else - RTFREE_LOCKED(rt); - + + RTFREE_LOCKED(rt); + return (0); } @@ -1736,10 +1758,8 @@ if ((nh_orig->nh_ifa != nh->nh_ifa) && nh_orig->nh_ifa->ifa_rtrequest) nh_orig->nh_ifa->ifa_rtrequest(RTM_DELETE, rt, nh_orig, info); - if (ret_nrt != NULL) { + if (ret_nrt != NULL) *ret_nrt = rt; - RT_ADDREF(rt); - } RT_UNLOCK(rt); @@ -1757,7 +1777,6 @@ change_route(struct rib_head *rnh, struct rt_addrinfo *info, struct rtentry **ret_nrt) { - struct epoch_tracker et; int error; /* Check if updated gateway exists */ @@ -1765,8 +1784,6 @@ (info->rti_info[RTAX_GATEWAY] == NULL)) return (EINVAL); - NET_EPOCH_ENTER(et); - /* * route change is done in multiple steps, with dropping and * reacquiring lock. In the situations with multiple processes @@ -1779,7 +1796,6 @@ if (error != EAGAIN) break; } - NET_EPOCH_EXIT(et); return (error); } @@ -1825,6 +1841,7 @@ rtinit1(struct ifaddr *ifa, int cmd, int flags, int fibnum) { RIB_RLOCK_TRACKER; + struct epoch_tracker et; struct sockaddr *dst; struct sockaddr *netmask; struct rtentry *rt = NULL; @@ -1957,38 +1974,18 @@ else info.rti_info[RTAX_GATEWAY] = ifa->ifa_addr; info.rti_info[RTAX_NETMASK] = netmask; + NET_EPOCH_ENTER(et); error = rtrequest1_fib(cmd, &info, &rt, fibnum); if (error == 0 && rt != NULL) { /* * notify any listening routing agents of the change */ - RT_LOCK(rt); /* TODO: interface routes/aliases */ - RT_ADDREF(rt); - RT_UNLOCK(rt); rt_newaddrmsg_fib(cmd, ifa, rt, fibnum); - RT_LOCK(rt); - RT_REMREF(rt); - if (cmd == RTM_DELETE) { - /* - * If we are deleting, and we found an entry, - * then it's been removed from the tree.. - * now throw it away. - */ - RTFREE_LOCKED(rt); - } else { - if (cmd == RTM_ADD) { - /* - * We just wanted to add it.. - * we don't actually need a reference. - */ - RT_REMREF(rt); - } - RT_UNLOCK(rt); - } didwork = 1; } + NET_EPOCH_EXIT(et); if (error) a_failure = error; } Index: sys/net/route/nhop.h =================================================================== --- sys/net/route/nhop.h +++ sys/net/route/nhop.h @@ -176,6 +176,7 @@ uint32_t nhop_get_idx(const struct nhop_object *nh); enum nhop_type nhop_get_type(const struct nhop_object *nh); int nhop_get_rtflags(const struct nhop_object *nh); +struct vnet *nhop_get_vnet(const struct nhop_object *nh); #endif /* _KERNEL */ Index: sys/net/route/nhop_ctl.c =================================================================== --- sys/net/route/nhop_ctl.c +++ sys/net/route/nhop_ctl.c @@ -500,6 +500,9 @@ return (ENOMEM); } + /* Save vnet to ease destruction */ + nh_priv->nh_vnet = curvnet; + /* Reference external objects and calculate (referenced) ifa */ if_ref(nh->nh_ifp); ifa_ref(nh->nh_ifa); @@ -698,6 +701,13 @@ nh->nh_priv->rt_flags = rt_flags; } +struct vnet * +nhop_get_vnet(const struct nhop_object *nh) +{ + + return (nh->nh_priv->nh_vnet); +} + void nhops_update_ifmtu(struct rib_head *rh, struct ifnet *ifp, uint32_t mtu) { Index: sys/net/route/nhop_var.h =================================================================== --- sys/net/route/nhop_var.h +++ sys/net/route/nhop_var.h @@ -78,6 +78,7 @@ struct nhop_object *nh; /* backreference to the dataplane nhop */ struct nh_control *nh_control; /* backreference to the rnh */ struct nhop_priv *nh_next; /* hash table membership */ + struct vnet *nh_vnet; /* vnet nhop belongs to */ struct epoch_context nh_epoch_ctx; /* epoch data for nhop */ }; Index: sys/net/route/route_var.h =================================================================== --- sys/net/route/route_var.h +++ sys/net/route/route_var.h @@ -35,6 +35,7 @@ #ifndef RNF_NORMAL #include #endif +#include #include /* struct sockaddr_in */ #include @@ -148,6 +149,7 @@ #define rt_endzero rt_mtx struct mtx rt_mtx; /* mutex for routing entry */ struct rtentry *rt_chain; /* pointer to next rtentry to delete */ + struct epoch_context rt_epoch_ctx; /* net epoch tracker */ }; #define RT_LOCK_INIT(_rt) \ Index: sys/net/rtsock.c =================================================================== --- sys/net/rtsock.c +++ sys/net/rtsock.c @@ -742,7 +742,6 @@ } } RT_LOCK(rt); - RT_ADDREF(rt); RIB_RUNLOCK(rnh); *ret_nrt = rt; @@ -930,7 +929,6 @@ #endif RT_LOCK(saved_nrt); rtm->rtm_index = saved_nrt->rt_nhop->nh_ifp->if_index; - RT_REMREF(saved_nrt); RT_UNLOCK(saved_nrt); } break; @@ -987,8 +985,7 @@ flush: NET_EPOCH_EXIT(et); - if (rt != NULL) - RTFREE(rt); + rt = NULL; #ifdef INET6 if (rtm != NULL) { Index: sys/netinet6/nd6.c =================================================================== --- sys/netinet6/nd6.c +++ sys/netinet6/nd6.c @@ -1566,14 +1566,17 @@ int fibnum; struct sockaddr_in6 sin6; struct rt_addrinfo info; + struct epoch_tracker et; lltable_fill_sa_entry(ln, (struct sockaddr *)&sin6); memset(&info, 0, sizeof(info)); info.rti_info[RTAX_DST] = (struct sockaddr *)&sin6; info.rti_filter = nd6_isdynrte; + NET_EPOCH_ENTER(et); for (fibnum = 0; fibnum < rt_numfibs; fibnum++) rtrequest1_fib(RTM_DELETE, &info, NULL, fibnum); + NET_EPOCH_EXIT(et); } /* Index: sys/netinet6/nd6_rtr.c =================================================================== --- sys/netinet6/nd6_rtr.c +++ sys/netinet6/nd6_rtr.c @@ -690,10 +690,8 @@ error = in6_rtrequest(RTM_ADD, (struct sockaddr *)&def, (struct sockaddr *)&gate, (struct sockaddr *)&mask, RTF_GATEWAY, &newrt, fibnum); - if (newrt) { + if (newrt != NULL) rt_routemsg(RTM_ADD, newrt, new->ifp, 0, fibnum); - RTFREE_FUNC(newrt); - } if (error == 0) new->installed = 1; } @@ -708,6 +706,7 @@ { struct sockaddr_in6 def, mask, gate; struct rtentry *oldrt = NULL; + struct epoch_tracker et; unsigned int fibnum; bzero(&def, sizeof(def)); @@ -720,13 +719,13 @@ gate.sin6_addr = dr->rtaddr; fibnum = dr->ifp->if_fib; + NET_EPOCH_ENTER(et); in6_rtrequest(RTM_DELETE, (struct sockaddr *)&def, (struct sockaddr *)&gate, (struct sockaddr *)&mask, RTF_GATEWAY, &oldrt, fibnum); - if (oldrt) { + if (oldrt != NULL) rt_routemsg(RTM_DELETE, oldrt, dr->ifp, 0, fibnum); - RTFREE_FUNC(oldrt); - } + NET_EPOCH_EXIT(et); dr->installed = 0; } @@ -1022,6 +1021,7 @@ } ND6_RUNLOCK(); + NET_EPOCH_ENTER(et); /* * If we selected a router for this FIB and it's different * than the installed one, remove the installed router and @@ -1037,6 +1037,7 @@ } if (selected_dr != NULL) defrouter_rele(selected_dr); + NET_EPOCH_EXIT(et); } static struct nd_defrouter * @@ -2064,7 +2065,6 @@ pr->ndpr_stateflags |= NDPRF_ONLINK; rt_routemsg(RTM_ADD, rt, pr->ndpr_ifp, 0, fibnum); - RTFREE_FUNC(rt); } /* Return the last error we got. */ @@ -2132,7 +2132,6 @@ } /* should we care about ia6_flags? */ } - NET_EPOCH_EXIT(et); if (ifa == NULL) { /* * This can still happen, when, for example, we receive an RA @@ -2145,13 +2144,12 @@ "prefix(%s/%d) on %s\n", __func__, ip6_sprintf(ip6buf, &pr->ndpr_prefix.sin6_addr), pr->ndpr_plen, if_name(ifp))); - return (0); - } - - error = nd6_prefix_onlink_rtrequest(pr, ifa); - - if (ifa != NULL) + error = 0; + } else { + error = nd6_prefix_onlink_rtrequest(pr, ifa); ifa_free(ifa); + } + NET_EPOCH_EXIT(et); return (error); } @@ -2167,6 +2165,7 @@ char ip6buf[INET6_ADDRSTRLEN]; uint64_t genid; int fibnum, maxfib, a_failure; + struct epoch_tracker et; ND6_ONLINK_LOCK_ASSERT(); ND6_UNLOCK_ASSERT(); @@ -2193,6 +2192,7 @@ } a_failure = 0; + NET_EPOCH_ENTER(et); for (; fibnum < maxfib; fibnum++) { rt = NULL; error = in6_rtrequest(RTM_DELETE, (struct sockaddr *)&sa6, NULL, @@ -2205,8 +2205,8 @@ /* report route deletion to the routing socket. */ rt_routemsg(RTM_DELETE, rt, ifp, 0, fibnum); - RTFREE_FUNC(rt); } + NET_EPOCH_EXIT(et); error = a_failure; a_failure = 1; if (error == 0) {