Index: sys/contrib/ipfilter/netinet/mlfk_ipl.c =================================================================== --- sys/contrib/ipfilter/netinet/mlfk_ipl.c +++ sys/contrib/ipfilter/netinet/mlfk_ipl.c @@ -287,7 +287,7 @@ }; -DECLARE_MODULE(ipfilter, ipfiltermod, SI_SUB_PROTO_DOMAIN, SI_ORDER_ANY); +DECLARE_MODULE(ipfilter, ipfiltermod, SI_SUB_PROTO_FIREWALL, SI_ORDER_ANY); #ifdef MODULE_VERSION MODULE_VERSION(ipfilter, 1); #endif Index: sys/dev/usb/net/usb_ethernet.c =================================================================== --- sys/dev/usb/net/usb_ethernet.c +++ sys/dev/usb/net/usb_ethernet.c @@ -641,5 +641,9 @@ } } -DECLARE_MODULE(uether, uether_mod, SI_SUB_PSEUDO, SI_ORDER_ANY); +/* + * USB net drivers are run by DRIVER_MODULE() thus SI_SUB_DRIVERS, + * SI_ORDER_MIDDLE. Run uether after that. + */ +DECLARE_MODULE(uether, uether_mod, SI_SUB_DRIVERS, SI_ORDER_ANY); MODULE_VERSION(uether, 1); Index: sys/kern/kern_hhook.c =================================================================== --- sys/kern/kern_hhook.c +++ sys/kern/kern_hhook.c @@ -510,7 +510,7 @@ /* * When a vnet is created and being initialised, init the V_hhook_vhead_list. */ -VNET_SYSINIT(hhook_vnet_init, SI_SUB_MBUF, SI_ORDER_FIRST, +VNET_SYSINIT(hhook_vnet_init, SI_SUB_INIT_IF, SI_ORDER_FIRST, hhook_vnet_init, NULL); /* @@ -518,5 +518,5 @@ * points to clean up on vnet tear down, but in case the KPI is misused, * provide a function to clean up and free memory for a vnet being destroyed. */ -VNET_SYSUNINIT(hhook_vnet_uninit, SI_SUB_MBUF, SI_ORDER_ANY, +VNET_SYSUNINIT(hhook_vnet_uninit, SI_SUB_INIT_IF, SI_ORDER_FIRST, hhook_vnet_uninit, NULL); Index: sys/net/if.c =================================================================== --- sys/net/if.c +++ sys/net/if.c @@ -914,6 +914,16 @@ CURVNET_RESTORE(); } +/* + * The vmove flag, if set, indicates that we are called from a callpath + * that is moving an interface to a different vnet instance. + * + * The shutdown flag, if set, indicates that we are called in the + * process of shutting down a vnet instance. Currently only the + * vnet_if_return SYSUNINIT function sets it. Note: we can be called + * on a vnet instance shutdown without this flag being set, e.g., when + * the cloned interfaces are destoyed as first thing of teardown. + */ static int if_detach_internal(struct ifnet *ifp, int vmove, struct if_clone **ifcp) { @@ -921,8 +931,10 @@ int i; struct domain *dp; struct ifnet *iter; - int found = 0; + int found = 0, shutdown; + shutdown = (ifp->if_vnet->vnet_state > SI_SUB_VNET && + ifp->if_vnet->vnet_state < SI_SUB_VNET_DONE) ? 1 : 0; IFNET_WLOCK(); TAILQ_FOREACH(iter, &V_ifnet, if_link) if (iter == ifp) { @@ -930,10 +942,6 @@ found = 1; break; } -#ifdef VIMAGE - if (found) - curvnet->vnet_ifcnt--; -#endif IFNET_WUNLOCK(); if (!found) { /* @@ -951,19 +959,58 @@ #endif } - /* Check if this is a cloned interface or not. */ + /* + * At this point we know the interface still was on the ifnet list + * and we removed it so we are in a stable state. + */ +#ifdef VIMAGE + curvnet->vnet_ifcnt--; +#endif + + /* + * In any case (destroy or vmove) detach us from the groups + * and remove/wait for pending events on the taskq. + * XXX-BZ in theory an interface could still enqueue a taskq change? + */ + if_delgroups(ifp); + + taskqueue_drain(taskqueue_swi, &ifp->if_linktask); + + /* + * Check if this is a cloned interface or not. Must do even if + * shutting down as a if_vmove_reclaim() would move the ifp and + * the if_clone_addgroup() will have a corrupted string overwise + * from a gibberish pointer. + */ if (vmove && ifcp != NULL) *ifcp = if_clone_findifc(ifp); + if_down(ifp); + /* - * Remove/wait for pending events. + * On VNET shutdown abort here as the stack teardown will do all + * the work top-down for us. + */ + if (shutdown) { + /* + * In case of a vmove we are done here without error. + * If we would signal an error it would lead to the same + * abort as if we did not find the ifnet anymore. + * if_detach() calls us in void context and does not care + * about an early abort notification, so life is splendid :) + */ + goto finish_vnet_shutdown; + } + + /* + * At this point we are not tearing down a VNET and are either + * going to destroy or vmove the interface and have to cleanup + * accordingly. */ - taskqueue_drain(taskqueue_swi, &ifp->if_linktask); /* * Remove routes and flush queues. */ - if_down(ifp); #ifdef ALTQ if (ALTQ_IS_ENABLED(&ifp->if_snd)) altq_disable(&ifp->if_snd); @@ -974,7 +1021,7 @@ if_purgeaddrs(ifp); #ifdef INET - in_ifdetach(ifp); + in_ifdetach(ifp, 1); #endif #ifdef INET6 @@ -984,7 +1031,7 @@ * routes are expected to be removed by the IPv6-specific kernel API. * Otherwise, the kernel will detect some inconsistency and bark it. */ - in6_ifdetach(ifp); + in6_ifdetach(ifp, 1); #endif if_purgemaddrs(ifp); @@ -1018,8 +1065,8 @@ } rt_flushifroutes(ifp); - if_delgroups(ifp); +finish_vnet_shutdown: /* * We cannot hold the lock over dom_ifdetach calls as they might * sleep, for example trying to drain a callout, thus open up the @@ -1048,7 +1095,7 @@ * unused if_index in target vnet and calls if_grow() if necessary, * and finally find an unused if_xname for the target vnet. */ -void +static void if_vmove(struct ifnet *ifp, struct vnet *new_vnet) { struct if_clone *ifc; @@ -1115,6 +1162,7 @@ { struct prison *pr; struct ifnet *difp; + int shutdown; /* Try to find the prison within our visibility. */ sx_slock(&allprison_lock); @@ -1135,12 +1183,22 @@ /* XXX Lock interfaces to avoid races. */ CURVNET_SET_QUIET(pr->pr_vnet); difp = ifunit(ifname); - CURVNET_RESTORE(); if (difp != NULL) { + CURVNET_RESTORE(); prison_free(pr); return (EEXIST); } + /* Make sure the VNET is stable. */ + shutdown = (ifp->if_vnet->vnet_state > SI_SUB_VNET && + ifp->if_vnet->vnet_state < SI_SUB_VNET_DONE) ? 1 : 0; + if (shutdown) { + CURVNET_RESTORE(); + prison_free(pr); + return (EBUSY); + } + CURVNET_RESTORE(); + /* Move the interface into the child jail/vnet. */ if_vmove(ifp, pr->pr_vnet); @@ -1157,6 +1215,7 @@ struct prison *pr; struct vnet *vnet_dst; struct ifnet *ifp; + int shutdown; /* Try to find the prison within our visibility. */ sx_slock(&allprison_lock); @@ -1184,6 +1243,15 @@ return (EEXIST); } + /* Make sure the VNET is stable. */ + shutdown = (ifp->if_vnet->vnet_state > SI_SUB_VNET && + ifp->if_vnet->vnet_state < SI_SUB_VNET_DONE) ? 1 : 0; + if (shutdown) { + CURVNET_RESTORE(); + prison_free(pr); + return (EBUSY); + } + /* Get interface back from child jail/vnet. */ if_vmove(ifp, vnet_dst); CURVNET_RESTORE(); @@ -2642,8 +2710,22 @@ struct ifreq *ifr; int error; int oif_flags; +#ifdef VIMAGE + int shutdown; +#endif CURVNET_SET(so->so_vnet); +#ifdef VIMAGE + /* Make sure the VNET is stable. */ + shutdown = (so->so_vnet->vnet_state > SI_SUB_VNET && + so->so_vnet->vnet_state < SI_SUB_VNET_DONE) ? 1 : 0; + if (shutdown) { + CURVNET_RESTORE(); + return (EBUSY); + } +#endif + + switch (cmd) { case SIOCGIFCONF: error = ifconf(cmd, data); Index: sys/net/if_bridge.c =================================================================== --- sys/net/if_bridge.c +++ sys/net/if_bridge.c @@ -541,7 +541,7 @@ V_bridge_cloner = NULL; BRIDGE_LIST_LOCK_DESTROY(); } -VNET_SYSUNINIT(vnet_bridge_uninit, SI_SUB_PROTO_IFATTACHDOMAIN, SI_ORDER_ANY, +VNET_SYSUNINIT(vnet_bridge_uninit, SI_SUB_PSEUDO, SI_ORDER_ANY, vnet_bridge_uninit, NULL); static int @@ -1132,7 +1132,7 @@ LIST_FOREACH(bif, &sc->sc_iflist, bif_next) { if (in6ifa_llaonifp(bif->bif_ifp)) { BRIDGE_UNLOCK(sc); - in6_ifdetach(bif->bif_ifp); + in6_ifdetach(bif->bif_ifp, 1); BRIDGE_LOCK(sc); if_printf(sc->sc_ifp, "IPv6 addresses on %s have been removed " @@ -1144,7 +1144,7 @@ BRIDGE_XDROP(sc); if (in6ifa_llaonifp(ifs)) { BRIDGE_UNLOCK(sc); - in6_ifdetach(ifs); + in6_ifdetach(ifs, 1); BRIDGE_LOCK(sc); if_printf(sc->sc_ifp, "IPv6 addresses on %s have been removed " Index: sys/net/if_disc.c =================================================================== --- sys/net/if_disc.c +++ sys/net/if_disc.c @@ -137,7 +137,7 @@ V_disc_cloner = if_clone_simple(discname, disc_clone_create, disc_clone_destroy, 0); } -VNET_SYSINIT(vnet_disc_init, SI_SUB_PROTO_IFATTACHDOMAIN, SI_ORDER_ANY, +VNET_SYSINIT(vnet_disc_init, SI_SUB_PSEUDO, SI_ORDER_ANY, vnet_disc_init, NULL); static void @@ -146,7 +146,7 @@ if_clone_detach(V_disc_cloner); } -VNET_SYSUNINIT(vnet_disc_uninit, SI_SUB_PROTO_IFATTACHDOMAIN, SI_ORDER_ANY, +VNET_SYSUNINIT(vnet_disc_uninit, SI_SUB_INIT_IF, SI_ORDER_ANY, vnet_disc_uninit, NULL); static int Index: sys/net/if_edsc.c =================================================================== --- sys/net/if_edsc.c +++ sys/net/if_edsc.c @@ -336,7 +336,7 @@ */ if_clone_detach(V_edsc_cloner); } -VNET_SYSUNINIT(vnet_edsc_uninit, SI_SUB_PROTO_IFATTACHDOMAIN, SI_ORDER_ANY, +VNET_SYSUNINIT(vnet_edsc_uninit, SI_SUB_INIT_IF, SI_ORDER_ANY, vnet_edsc_uninit, NULL); /* Index: sys/net/if_enc.c =================================================================== --- sys/net/if_enc.c +++ sys/net/if_enc.c @@ -136,7 +136,6 @@ sc = ifp->if_softc; KASSERT(sc == V_enc_sc, ("sc != ifp->if_softc")); - enc_remove_hhooks(sc); bpfdetach(ifp); if_detach(ifp); if_free(ifp); @@ -170,10 +169,6 @@ ifp->if_softc = sc; if_attach(ifp); bpfattach(ifp, DLT_ENC, sizeof(struct enchdr)); - if (enc_add_hhooks(sc) != 0) { - enc_clone_destroy(ifp); - return (ENXIO); - } return (0); } @@ -369,18 +364,44 @@ V_enc_cloner = if_clone_simple(encname, enc_clone_create, enc_clone_destroy, 1); } -VNET_SYSINIT(vnet_enc_init, SI_SUB_PROTO_IFATTACHDOMAIN, SI_ORDER_ANY, +VNET_SYSINIT(vnet_enc_init, SI_SUB_PSEUDO, SI_ORDER_ANY, vnet_enc_init, NULL); static void +vnet_enc_init_proto(void *unused __unused) +{ + KASSERT(V_enc_sc != NULL, ("%s: V_enc_sc is %p\n", __func__, V_enc_sc)); + + if (enc_add_hhooks(V_enc_sc) != 0) + enc_clone_destroy(V_enc_sc->sc_ifp); +} +VNET_SYSINIT(vnet_enc_init_proto, SI_SUB_PROTO_IFATTACHDOMAIN, SI_ORDER_ANY, + vnet_enc_init_proto, NULL); + +static void vnet_enc_uninit(const void *unused __unused) { + KASSERT(V_enc_sc != NULL, ("%s: V_enc_sc is %p\n", __func__, V_enc_sc)); if_clone_detach(V_enc_cloner); } -VNET_SYSUNINIT(vnet_enc_uninit, SI_SUB_PROTO_IFATTACHDOMAIN, SI_ORDER_ANY, +VNET_SYSUNINIT(vnet_enc_uninit, SI_SUB_INIT_IF, SI_ORDER_ANY, vnet_enc_uninit, NULL); +/* + * The hhook consumer needs to go before ip[6]_destroy are called on + * SI_ORDER_THIRD. + */ +static void +vnet_enc_uninit_hhook(const void *unused __unused) +{ + KASSERT(V_enc_sc != NULL, ("%s: V_enc_sc is %p\n", __func__, V_enc_sc)); + + enc_remove_hhooks(V_enc_sc); +} +VNET_SYSUNINIT(vnet_enc_uninit_hhook, SI_SUB_PROTO_DOMAIN, SI_ORDER_FOURTH, + vnet_enc_uninit_hhook, NULL); + static int enc_modevent(module_t mod, int type, void *data) { @@ -401,4 +422,4 @@ 0 }; -DECLARE_MODULE(if_enc, enc_mod, SI_SUB_PROTO_IFATTACHDOMAIN, SI_ORDER_ANY); +DECLARE_MODULE(if_enc, enc_mod, SI_SUB_PSEUDO, SI_ORDER_ANY); Index: sys/net/if_epair.c =================================================================== --- sys/net/if_epair.c +++ sys/net/if_epair.c @@ -963,7 +963,7 @@ netisr_register_vnet(&epair_nh); #endif } -VNET_SYSINIT(vnet_epair_init, SI_SUB_PROTO_IFATTACHDOMAIN, SI_ORDER_ANY, +VNET_SYSINIT(vnet_epair_init, SI_SUB_PSEUDO, SI_ORDER_ANY, vnet_epair_init, NULL); static void @@ -975,7 +975,7 @@ #endif if_clone_detach(V_epair_cloner); } -VNET_SYSUNINIT(vnet_epair_uninit, SI_SUB_PROTO_IFATTACHDOMAIN, SI_ORDER_ANY, +VNET_SYSUNINIT(vnet_epair_uninit, SI_SUB_INIT_IF, SI_ORDER_ANY, vnet_epair_uninit, NULL); static int @@ -1012,5 +1012,5 @@ 0 }; -DECLARE_MODULE(if_epair, epair_mod, SI_SUB_PSEUDO, SI_ORDER_ANY); +DECLARE_MODULE(if_epair, epair_mod, SI_SUB_PSEUDO, SI_ORDER_MIDDLE); MODULE_VERSION(if_epair, 1); Index: sys/net/if_lagg.c =================================================================== --- sys/net/if_lagg.c +++ sys/net/if_lagg.c @@ -271,7 +271,7 @@ if_clone_detach(V_lagg_cloner); LAGG_LIST_LOCK_DESTROY(); } -VNET_SYSUNINIT(vnet_lagg_uninit, SI_SUB_PROTO_IFATTACHDOMAIN, SI_ORDER_ANY, +VNET_SYSUNINIT(vnet_lagg_uninit, SI_SUB_INIT_IF, SI_ORDER_ANY, vnet_lagg_uninit, NULL); static int @@ -1466,7 +1466,7 @@ * interface. */ if (in6ifa_llaonifp(tpif)) { - in6_ifdetach(tpif); + in6_ifdetach(tpif, 1); if_printf(sc->sc_ifp, "IPv6 addresses on %s have been removed " "before adding it as a member to prevent " Index: sys/net/if_loop.c =================================================================== --- sys/net/if_loop.c +++ sys/net/if_loop.c @@ -156,7 +156,7 @@ 1); #endif } -VNET_SYSINIT(vnet_loif_init, SI_SUB_PROTO_IFATTACHDOMAIN, SI_ORDER_ANY, +VNET_SYSINIT(vnet_loif_init, SI_SUB_PSEUDO, SI_ORDER_ANY, vnet_loif_init, NULL); #ifdef VIMAGE @@ -167,7 +167,7 @@ if_clone_detach(V_lo_cloner); V_loif = NULL; } -VNET_SYSUNINIT(vnet_loif_uninit, SI_SUB_PROTO_IFATTACHDOMAIN, SI_ORDER_ANY, +VNET_SYSUNINIT(vnet_loif_uninit, SI_SUB_INIT_IF, SI_ORDER_SECOND, vnet_loif_uninit, NULL); #endif Index: sys/net/if_vlan.c =================================================================== --- sys/net/if_vlan.c +++ sys/net/if_vlan.c @@ -823,7 +823,7 @@ if_clone_detach(V_vlan_cloner); } -VNET_SYSUNINIT(vnet_vlan_uninit, SI_SUB_PROTO_IFATTACHDOMAIN, SI_ORDER_FIRST, +VNET_SYSUNINIT(vnet_vlan_uninit, SI_SUB_INIT_IF, SI_ORDER_FIRST, vnet_vlan_uninit, NULL); #endif Index: sys/net/pfil.c =================================================================== --- sys/net/pfil.c +++ sys/net/pfil.c @@ -383,17 +383,14 @@ PFIL_LOCK_DESTROY_REAL(&V_pfil_lock); } -/* Define startup order. */ -#define PFIL_SYSINIT_ORDER SI_SUB_PROTO_BEGIN -#define PFIL_MODEVENT_ORDER (SI_ORDER_FIRST) /* On boot slot in here. */ -#define PFIL_VNET_ORDER (PFIL_MODEVENT_ORDER + 2) /* Later still. */ - /* * Starting up. * * VNET_SYSINIT is called for each existing vnet and each new vnet. + * Make sure the pfil bits are first before any possible subsystem which + * might piggyback on the SI_SUB_PROTO_PFIL. */ -VNET_SYSINIT(vnet_pfil_init, PFIL_SYSINIT_ORDER, PFIL_VNET_ORDER, +VNET_SYSINIT(vnet_pfil_init, SI_SUB_PROTO_PFIL, SI_ORDER_FIRST, vnet_pfil_init, NULL); /* @@ -401,5 +398,5 @@ * * VNET_SYSUNINIT is called for each exiting vnet as it exits. */ -VNET_SYSUNINIT(vnet_pfil_uninit, PFIL_SYSINIT_ORDER, PFIL_VNET_ORDER, +VNET_SYSUNINIT(vnet_pfil_uninit, SI_SUB_PROTO_PFIL, SI_ORDER_FIRST, vnet_pfil_uninit, NULL); Index: sys/net/route.c =================================================================== --- sys/net/route.c +++ sys/net/route.c @@ -334,7 +334,7 @@ free(V_rt_tables, M_RTABLE); uma_zdestroy(V_rtzone); } -VNET_SYSUNINIT(vnet_route_uninit, SI_SUB_PROTO_DOMAIN, SI_ORDER_THIRD, +VNET_SYSUNINIT(vnet_route_uninit, SI_SUB_PROTO_DOMAIN, SI_ORDER_FIRST, vnet_route_uninit, 0); #endif Index: sys/net/vnet.h =================================================================== --- sys/net/vnet.h +++ sys/net/vnet.h @@ -111,8 +111,8 @@ { \ VNET_PCPUSTAT_ALLOC(name, M_WAITOK); \ } \ -VNET_SYSINIT(vnet_ ## name ## _init, SI_SUB_PROTO_IFATTACHDOMAIN, \ - SI_ORDER_ANY, vnet_ ## name ## _init, NULL) +VNET_SYSINIT(vnet_ ## name ## _init, SI_SUB_INIT_IF, \ + SI_ORDER_FIRST, vnet_ ## name ## _init, NULL) #define VNET_PCPUSTAT_SYSUNINIT(name) \ static void \ @@ -120,8 +120,8 @@ { \ VNET_PCPUSTAT_FREE(name); \ } \ -VNET_SYSUNINIT(vnet_ ## name ## _uninit, SI_SUB_PROTO_IFATTACHDOMAIN, \ - SI_ORDER_ANY, vnet_ ## name ## _uninit, NULL) +VNET_SYSUNINIT(vnet_ ## name ## _uninit, SI_SUB_INIT_IF, \ + SI_ORDER_FIRST, vnet_ ## name ## _uninit, NULL) #ifdef SYSCTL_OID #define SYSCTL_VNET_PCPUSTAT(parent, nbr, name, type, array, desc) \ Index: sys/net/vnet.c =================================================================== --- sys/net/vnet.c +++ sys/net/vnet.c @@ -331,8 +331,7 @@ curvnet = NULL; } - -SYSINIT(vnet_init_done, SI_SUB_VNET_DONE, SI_ORDER_FIRST, vnet_init_done, +SYSINIT(vnet_init_done, SI_SUB_VNET_DONE, SI_ORDER_ANY, vnet_init_done, NULL); /* Index: sys/netgraph/ng_eiface.c =================================================================== --- sys/netgraph/ng_eiface.c +++ sys/netgraph/ng_eiface.c @@ -679,5 +679,5 @@ delete_unrhdr(V_ng_eiface_unit); } -VNET_SYSUNINIT(vnet_ng_eiface_uninit, SI_SUB_PSEUDO, SI_ORDER_ANY, +VNET_SYSUNINIT(vnet_ng_eiface_uninit, SI_SUB_INIT_IF, SI_ORDER_ANY, vnet_ng_eiface_uninit, NULL); Index: sys/netgraph/ng_iface.c =================================================================== --- sys/netgraph/ng_iface.c +++ sys/netgraph/ng_iface.c @@ -786,5 +786,5 @@ delete_unrhdr(V_ng_iface_unit); } -VNET_SYSUNINIT(vnet_ng_iface_uninit, SI_SUB_PSEUDO, SI_ORDER_ANY, +VNET_SYSUNINIT(vnet_ng_iface_uninit, SI_SUB_INIT_IF, SI_ORDER_ANY, vnet_ng_iface_uninit, NULL); Index: sys/netinet/igmp.c =================================================================== --- sys/netinet/igmp.c +++ sys/netinet/igmp.c @@ -227,7 +227,8 @@ #define V_state_change_timers_running VNET(state_change_timers_running) #define V_current_state_timers_running VNET(current_state_timers_running) -static VNET_DEFINE(LIST_HEAD(, igmp_ifsoftc), igi_head); +static VNET_DEFINE(LIST_HEAD(, igmp_ifsoftc), igi_head) = + LIST_HEAD_INITIALIZER(igi_head); static VNET_DEFINE(struct igmpstat, igmpstat) = { .igps_version = IGPS_VERSION_3, .igps_len = sizeof(struct igmpstat), @@ -701,10 +702,6 @@ return; } } - -#ifdef INVARIANTS - panic("%s: igmp_ifsoftc not found for ifp %p\n", __func__, ifp); -#endif } /* @@ -3595,57 +3592,28 @@ } #endif -static void -igmp_init(void *unused __unused) -{ - - CTR1(KTR_IGMPV3, "%s: initializing", __func__); - - IGMP_LOCK_INIT(); - - m_raopt = igmp_ra_alloc(); - - netisr_register(&igmp_nh); -} -SYSINIT(igmp_init, SI_SUB_PSEUDO, SI_ORDER_MIDDLE, igmp_init, NULL); - -static void -igmp_uninit(void *unused __unused) -{ - - CTR1(KTR_IGMPV3, "%s: tearing down", __func__); - - netisr_unregister(&igmp_nh); - - m_free(m_raopt); - m_raopt = NULL; - - IGMP_LOCK_DESTROY(); -} -SYSUNINIT(igmp_uninit, SI_SUB_PSEUDO, SI_ORDER_MIDDLE, igmp_uninit, NULL); - +#ifdef VIMAGE static void vnet_igmp_init(const void *unused __unused) { - CTR1(KTR_IGMPV3, "%s: initializing", __func__); - - LIST_INIT(&V_igi_head); + netisr_register_vnet(&igmp_nh); } -VNET_SYSINIT(vnet_igmp_init, SI_SUB_PSEUDO, SI_ORDER_ANY, vnet_igmp_init, - NULL); +VNET_SYSINIT(vnet_igmp_init, SI_SUB_PROTO_MC, SI_ORDER_ANY, + vnet_igmp_init, NULL); static void vnet_igmp_uninit(const void *unused __unused) { + /* This can happen when we shutdown the entire network stack. */ CTR1(KTR_IGMPV3, "%s: tearing down", __func__); - KASSERT(LIST_EMPTY(&V_igi_head), - ("%s: igi list not empty; ifnets not detached?", __func__)); + netisr_unregister_vnet(&igmp_nh); } -VNET_SYSUNINIT(vnet_igmp_uninit, SI_SUB_PSEUDO, SI_ORDER_ANY, +VNET_SYSUNINIT(vnet_igmp_uninit, SI_SUB_PROTO_MC, SI_ORDER_ANY, vnet_igmp_uninit, NULL); +#endif #ifdef DDB DB_SHOW_COMMAND(igi_list, db_show_igi_list) @@ -3682,14 +3650,24 @@ igmp_modevent(module_t mod, int type, void *unused __unused) { - switch (type) { - case MOD_LOAD: - case MOD_UNLOAD: - break; - default: - return (EOPNOTSUPP); - } - return (0); + switch (type) { + case MOD_LOAD: + CTR1(KTR_IGMPV3, "%s: initializing", __func__); + IGMP_LOCK_INIT(); + m_raopt = igmp_ra_alloc(); + netisr_register(&igmp_nh); + break; + case MOD_UNLOAD: + CTR1(KTR_IGMPV3, "%s: tearing down", __func__); + netisr_unregister(&igmp_nh); + m_free(m_raopt); + m_raopt = NULL; + IGMP_LOCK_DESTROY(); + break; + default: + return (EOPNOTSUPP); + } + return (0); } static moduledata_t igmp_mod = { @@ -3697,4 +3675,4 @@ igmp_modevent, 0 }; -DECLARE_MODULE(igmp, igmp_mod, SI_SUB_PSEUDO, SI_ORDER_ANY); +DECLARE_MODULE(igmp, igmp_mod, SI_SUB_PROTO_MC, SI_ORDER_MIDDLE); Index: sys/netinet/in.h =================================================================== --- sys/netinet/in.h +++ sys/netinet/in.h @@ -648,7 +648,7 @@ char *inet_ntoa_r(struct in_addr ina, char *buf); /* in libkern */ char *inet_ntop(int, const void *, char *, socklen_t); /* in libkern */ int inet_pton(int af, const char *, void *); /* in libkern */ -void in_ifdetach(struct ifnet *); +void in_ifdetach(struct ifnet *, int); #define in_hosteq(s, t) ((s).s_addr == (t).s_addr) #define in_nullhost(x) ((x).s_addr == INADDR_ANY) Index: sys/netinet/in.c =================================================================== --- sys/netinet/in.c +++ sys/netinet/in.c @@ -895,6 +895,39 @@ #undef rtinitflags +void +in_ifscrub_all(void) +{ + struct ifnet *ifp; + struct ifaddr *ifa, *nifa; + struct ifaliasreq ifr; + + IFNET_RLOCK(); + TAILQ_FOREACH(ifp, &V_ifnet, if_link) { + /* Cannot lock here - lock recursion. */ + /* IF_ADDR_RLOCK(ifp); */ + TAILQ_FOREACH_SAFE(ifa, &ifp->if_addrhead, ifa_link, nifa) { + if (ifa->ifa_addr->sa_family != AF_INET) + continue; + + /* + * This is ugly but the only way for legacy IP to + * cleanly remove addresses and everything attached. + */ + bzero(&ifr, sizeof(ifr)); + ifr.ifra_addr = *ifa->ifa_addr; + if (ifa->ifa_dstaddr) + ifr.ifra_broadaddr = *ifa->ifa_dstaddr; + (void)in_control(NULL, SIOCDIFADDR, (caddr_t)&ifr, + ifp, NULL); + } + /* IF_ADDR_RUNLOCK(ifp); */ + in_purgemaddrs(ifp); + igmp_domifdetach(ifp); + } + IFNET_RUNLOCK(); +} + /* * Return 1 if the address might be a local broadcast address. */ @@ -937,14 +970,23 @@ /* * On interface removal, clean up IPv4 data structures hung off of the ifnet. + * + * When shutting down a VNET we clean up layers top-down. In that case + * upper layer protocols (ulp) are cleaned up already and locks are destroyed + * and we must not call into these cleanup functions anymore, thus purgeulp + * is set to 0 in that case. + * The normal case of destroying a (cloned) interface still needs to cleanup + * everything related to the interface and will have purgeulp set to 1. */ void -in_ifdetach(struct ifnet *ifp) +in_ifdetach(struct ifnet *ifp, int purgeulp) { - in_pcbpurgeif0(&V_ripcbinfo, ifp); - in_pcbpurgeif0(&V_udbinfo, ifp); - in_pcbpurgeif0(&V_ulitecbinfo, ifp); + if (purgeulp) { + in_pcbpurgeif0(&V_ripcbinfo, ifp); + in_pcbpurgeif0(&V_udbinfo, ifp); + in_pcbpurgeif0(&V_ulitecbinfo, ifp); + } in_purgemaddrs(ifp); } Index: sys/netinet/in_var.h =================================================================== --- sys/netinet/in_var.h +++ sys/netinet/in_var.h @@ -376,6 +376,7 @@ struct thread *); int in_addprefix(struct in_ifaddr *, int); int in_scrubprefix(struct in_ifaddr *, u_int); +void in_ifscrub_all(void); void ip_input(struct mbuf *); void ip_direct_input(struct mbuf *); void in_ifadown(struct ifaddr *ifa, int); Index: sys/netinet/ip_id.c =================================================================== --- sys/netinet/ip_id.c +++ sys/netinet/ip_id.c @@ -294,4 +294,4 @@ counter_u64_free(V_ip_id); mtx_destroy(&V_ip_id_mtx); } -VNET_SYSUNINIT(ip_id, SI_SUB_PROTO_DOMAIN, SI_ORDER_ANY, ipid_sysuninit, NULL); +VNET_SYSUNINIT(ip_id, SI_SUB_PROTO_DOMAIN, SI_ORDER_THIRD, ipid_sysuninit, NULL); Index: sys/netinet/ip_input.c =================================================================== --- sys/netinet/ip_input.c +++ sys/netinet/ip_input.c @@ -371,6 +371,7 @@ static void ip_destroy(void *unused __unused) { + struct ifnet *ifp; int error; #ifdef RSS @@ -394,11 +395,21 @@ "type HHOOK_TYPE_IPSEC_OUT, id HHOOK_IPSEC_INET: " "error %d returned\n", __func__, error); } - /* Cleanup in_ifaddr hash table; should be empty. */ - hashdestroy(V_in_ifaddrhashtbl, M_IFADDR, V_in_ifaddrhmask); + + /* Remove the IPv4 addresses from all interfaces. */ + in_ifscrub_all(); + + /* Make sure the IPv4 routes are gone as well. */ + IFNET_RLOCK(); + TAILQ_FOREACH(ifp, &V_ifnet, if_link) + rt_flushifroutes_af(ifp, AF_INET); + IFNET_RUNLOCK(); /* Destroy IP reassembly queue. */ ipreass_destroy(); + + /* Cleanup in_ifaddr hash table; should be empty. */ + hashdestroy(V_in_ifaddrhashtbl, M_IFADDR, V_in_ifaddrhmask); } VNET_SYSUNINIT(ip, SI_SUB_PROTO_DOMAIN, SI_ORDER_THIRD, ip_destroy, NULL); Index: sys/netinet/ip_mroute.c =================================================================== --- sys/netinet/ip_mroute.c +++ sys/netinet/ip_mroute.c @@ -2822,7 +2822,7 @@ callout_init(&V_bw_meter_ch, 1); } -VNET_SYSINIT(vnet_mroute_init, SI_SUB_PSEUDO, SI_ORDER_ANY, vnet_mroute_init, +VNET_SYSINIT(vnet_mroute_init, SI_SUB_PROTO_MC, SI_ORDER_ANY, vnet_mroute_init, NULL); static void @@ -2833,7 +2833,7 @@ V_nexpire = NULL; } -VNET_SYSUNINIT(vnet_mroute_uninit, SI_SUB_PSEUDO, SI_ORDER_MIDDLE, +VNET_SYSUNINIT(vnet_mroute_uninit, SI_SUB_PROTO_MC, SI_ORDER_MIDDLE, vnet_mroute_uninit, NULL); static int @@ -2946,4 +2946,4 @@ 0 }; -DECLARE_MODULE(ip_mroute, ip_mroutemod, SI_SUB_PSEUDO, SI_ORDER_MIDDLE); +DECLARE_MODULE(ip_mroute, ip_mroutemod, SI_SUB_PROTO_MC, SI_ORDER_MIDDLE); Index: sys/netinet6/in6.c =================================================================== --- sys/netinet6/in6.c +++ sys/netinet6/in6.c @@ -2463,7 +2463,7 @@ mld_domifdetach(ifp); scope6_ifdetach(ext->scope6_id); - nd6_ifdetach(ext->nd_ifinfo); + nd6_ifdetach(ifp, ext->nd_ifinfo); lltable_free(ext->lltable); COUNTER_ARRAY_FREE(ext->in6_ifstat, sizeof(struct in6_ifstat) / sizeof(uint64_t)); Index: sys/netinet6/in6_ifattach.h =================================================================== --- sys/netinet6/in6_ifattach.h +++ sys/netinet6/in6_ifattach.h @@ -36,7 +36,7 @@ #ifdef _KERNEL void in6_ifattach(struct ifnet *, struct ifnet *); void in6_ifattach_destroy(void); -void in6_ifdetach(struct ifnet *); +void in6_ifdetach(struct ifnet *, int); int in6_get_tmpifid(struct ifnet *, u_int8_t *, const u_int8_t *, int); void in6_tmpaddrtimer(void *); int in6_get_hw_ifid(struct ifnet *, struct in6_addr *); Index: sys/netinet6/in6_ifattach.c =================================================================== --- sys/netinet6/in6_ifattach.c +++ sys/netinet6/in6_ifattach.c @@ -761,19 +761,30 @@ /* * NOTE: in6_ifdetach() does not support loopback if at this moment. - * We don't need this function in bsdi, because interfaces are never removed - * from the ifnet list in bsdi. + * + * When shutting down a VNET we clean up layers top-down. In that case + * upper layer protocols (ulp) are cleaned up already and locks are destroyed + * and we must not call into these cleanup functions anymore, thus purgeulp + * is set to 0 in that case. + * The normal case of destroying a (cloned) interface still needs to cleanup + * everything related to the interface and will have purgeulp set to 1. */ void -in6_ifdetach(struct ifnet *ifp) +in6_ifdetach(struct ifnet *ifp, int purgeulp) { struct ifaddr *ifa, *next; if (ifp->if_afdata[AF_INET6] == NULL) return; - /* remove neighbor management table */ - nd6_purge(ifp); + /* + * Remove neighbor management table. + * Enabling the nd6_purge will panic on vmove for interfaces on VNET + * teardown as the IPv6 layer is cleaned up already and the locks + * are destroyed. + */ + if (purgeulp) + nd6_purge(ifp); /* * nuke any of IPv6 addresses we have @@ -784,9 +795,11 @@ continue; in6_purgeaddr(ifa); } - in6_pcbpurgeif0(&V_udbinfo, ifp); - in6_pcbpurgeif0(&V_ulitecbinfo, ifp); - in6_pcbpurgeif0(&V_ripcbinfo, ifp); + if (purgeulp) { + in6_pcbpurgeif0(&V_udbinfo, ifp); + in6_pcbpurgeif0(&V_ulitecbinfo, ifp); + in6_pcbpurgeif0(&V_ripcbinfo, ifp); + } /* leave from all multicast groups joined */ in6_purgemaddrs(ifp); @@ -798,7 +811,8 @@ * prefixes after removing all addresses above. * (Or can we just delay calling nd6_purge until at this point?) */ - nd6_purge(ifp); + if (purgeulp) + nd6_purge(ifp); } int Index: sys/netinet6/ip6_input.c =================================================================== --- sys/netinet6/ip6_input.c +++ sys/netinet6/ip6_input.c @@ -114,6 +114,7 @@ #include #include #include +#include #include #include @@ -315,6 +316,8 @@ static void ip6_destroy(void *unused __unused) { + struct ifaddr *ifa, *nifa; + struct ifnet *ifp; int error; #ifdef RSS @@ -337,9 +340,30 @@ "type HHOOK_TYPE_IPSEC_OUT, id HHOOK_IPSEC_INET6: " "error %d returned\n", __func__, error); } - hashdestroy(V_in6_ifaddrhashtbl, M_IFADDR, V_in6_ifaddrhmask); + + /* Cleanup addresses. */ + IFNET_RLOCK(); + TAILQ_FOREACH(ifp, &V_ifnet, if_link) { + /* Cannot lock here - lock recursion. */ + /* IF_ADDR_LOCK(ifp); */ + TAILQ_FOREACH_SAFE(ifa, &ifp->if_addrhead, ifa_link, nifa) { + + if (ifa->ifa_addr->sa_family != AF_INET6) + continue; + in6_purgeaddr(ifa); + } + /* IF_ADDR_UNLOCK(ifp); */ + in6_ifdetach(ifp, 0); + mld_domifdetach(ifp); + /* Make sure any routes are gone as well. */ + rt_flushifroutes_af(ifp, AF_INET6); + } + IFNET_RUNLOCK(); + nd6_destroy(); in6_ifattach_destroy(); + + hashdestroy(V_in6_ifaddrhashtbl, M_IFADDR, V_in6_ifaddrhmask); } VNET_SYSUNINIT(inet6, SI_SUB_PROTO_DOMAIN, SI_ORDER_THIRD, ip6_destroy, NULL); Index: sys/netinet6/ip6_mroute.c =================================================================== --- sys/netinet6/ip6_mroute.c +++ sys/netinet6/ip6_mroute.c @@ -1966,4 +1966,4 @@ 0 }; -DECLARE_MODULE(ip6_mroute, ip6_mroutemod, SI_SUB_PSEUDO, SI_ORDER_ANY); +DECLARE_MODULE(ip6_mroute, ip6_mroutemod, SI_SUB_PROTO_MC, SI_ORDER_ANY); Index: sys/netinet6/mld6.c =================================================================== --- sys/netinet6/mld6.c +++ sys/netinet6/mld6.c @@ -612,9 +612,6 @@ return; } } -#ifdef INVARIANTS - panic("%s: mld_ifsoftc not found for ifp %p\n", __func__, ifp); -#endif } /* @@ -3265,7 +3262,7 @@ mld_po.ip6po_prefer_tempaddr = IP6PO_TEMPADDR_NOTPREFER; mld_po.ip6po_flags = IP6PO_DONTFRAG; } -SYSINIT(mld_init, SI_SUB_PSEUDO, SI_ORDER_MIDDLE, mld_init, NULL); +SYSINIT(mld_init, SI_SUB_PROTO_MC, SI_ORDER_MIDDLE, mld_init, NULL); static void mld_uninit(void *unused __unused) @@ -3274,7 +3271,7 @@ CTR1(KTR_MLD, "%s: tearing down", __func__); MLD_LOCK_DESTROY(); } -SYSUNINIT(mld_uninit, SI_SUB_PSEUDO, SI_ORDER_MIDDLE, mld_uninit, NULL); +SYSUNINIT(mld_uninit, SI_SUB_PROTO_MC, SI_ORDER_MIDDLE, mld_uninit, NULL); static void vnet_mld_init(const void *unused __unused) @@ -3284,19 +3281,17 @@ LIST_INIT(&V_mli_head); } -VNET_SYSINIT(vnet_mld_init, SI_SUB_PSEUDO, SI_ORDER_ANY, vnet_mld_init, +VNET_SYSINIT(vnet_mld_init, SI_SUB_PROTO_MC, SI_ORDER_ANY, vnet_mld_init, NULL); static void vnet_mld_uninit(const void *unused __unused) { + /* This can happen if we shutdown the network stack. */ CTR1(KTR_MLD, "%s: tearing down", __func__); - - KASSERT(LIST_EMPTY(&V_mli_head), - ("%s: mli list not empty; ifnets not detached?", __func__)); } -VNET_SYSUNINIT(vnet_mld_uninit, SI_SUB_PSEUDO, SI_ORDER_ANY, vnet_mld_uninit, +VNET_SYSUNINIT(vnet_mld_uninit, SI_SUB_PROTO_MC, SI_ORDER_ANY, vnet_mld_uninit, NULL); static int @@ -3318,4 +3313,4 @@ mld_modevent, 0 }; -DECLARE_MODULE(mld, mld_mod, SI_SUB_PSEUDO, SI_ORDER_ANY); +DECLARE_MODULE(mld, mld_mod, SI_SUB_PROTO_MC, SI_ORDER_ANY); Index: sys/netinet6/nd6.h =================================================================== --- sys/netinet6/nd6.h +++ sys/netinet6/nd6.h @@ -414,7 +414,7 @@ void nd6_destroy(void); #endif struct nd_ifinfo *nd6_ifattach(struct ifnet *); -void nd6_ifdetach(struct nd_ifinfo *); +void nd6_ifdetach(struct ifnet *, struct nd_ifinfo *); int nd6_is_addr_neighbor(const struct sockaddr_in6 *, struct ifnet *); void nd6_option_init(void *, int, union nd_opts *); struct nd_opt_hdr *nd6_option(union nd_opts *); Index: sys/netinet6/nd6.c =================================================================== --- sys/netinet6/nd6.c +++ sys/netinet6/nd6.c @@ -292,8 +292,19 @@ } void -nd6_ifdetach(struct nd_ifinfo *nd) +nd6_ifdetach(struct ifnet *ifp, struct nd_ifinfo *nd) { + struct ifaddr *ifa, *next; + + IF_ADDR_RLOCK(ifp); + TAILQ_FOREACH_SAFE(ifa, &ifp->if_addrhead, ifa_link, next) { + if (ifa->ifa_addr->sa_family != AF_INET6) + continue; + + /* stop DAD processing */ + nd6_dad_stop(ifa); + } + IF_ADDR_RUNLOCK(ifp); free(nd, M_IP6NDP); } Index: sys/netipsec/ipsec.c =================================================================== --- sys/netipsec/ipsec.c +++ sys/netipsec/ipsec.c @@ -1715,7 +1715,7 @@ V_def_policy.policy = IPSEC_POLICY_NONE; V_def_policy.refcnt = 1; } -VNET_SYSINIT(def_policy_init, SI_SUB_PROTO_DOMAININIT, SI_ORDER_ANY, +VNET_SYSINIT(def_policy_init, SI_SUB_PROTO_DOMAIN, SI_ORDER_FIRST, def_policy_init, NULL); Index: sys/netipsec/xform_tcp.c =================================================================== --- sys/netipsec/xform_tcp.c +++ sys/netipsec/xform_tcp.c @@ -166,5 +166,5 @@ xform_register(&tcpsignature_xformsw); } -SYSINIT(tcpsignature_xform_init, SI_SUB_DRIVERS, SI_ORDER_FIRST, +SYSINIT(tcpsignature_xform_init, SI_SUB_PROTO_DOMAIN, SI_ORDER_MIDDLE, tcpsignature_attach, NULL); Index: sys/netpfil/ipfw/dn_sched.h =================================================================== --- sys/netpfil/ipfw/dn_sched.h +++ sys/netpfil/ipfw/dn_sched.h @@ -196,6 +196,6 @@ #name, dn_sched_modevent, dnsched \ }; \ DECLARE_MODULE(name, name##_mod, \ - SI_SUB_PROTO_IFATTACHDOMAIN, SI_ORDER_ANY); \ + SI_SUB_PROTO_FIREWALL, SI_ORDER_ANY); \ MODULE_DEPEND(name, dummynet, 3, 3, 3) #endif /* _DN_SCHED_H */ Index: sys/netpfil/ipfw/ip_dummynet.c =================================================================== --- sys/netpfil/ipfw/ip_dummynet.c +++ sys/netpfil/ipfw/ip_dummynet.c @@ -2629,7 +2629,7 @@ "dummynet", dummynet_modevent, NULL }; -#define DN_SI_SUB SI_SUB_PROTO_IFATTACHDOMAIN +#define DN_SI_SUB SI_SUB_PROTO_FIREWALL #define DN_MODEV_ORD (SI_ORDER_ANY - 128) /* after ipfw */ DECLARE_MODULE(dummynet, dummynet_mod, DN_SI_SUB, DN_MODEV_ORD); MODULE_DEPEND(dummynet, ipfw, 3, 3, 3); Index: sys/netpfil/ipfw/ip_fw2.c =================================================================== --- sys/netpfil/ipfw/ip_fw2.c +++ sys/netpfil/ipfw/ip_fw2.c @@ -2890,7 +2890,7 @@ }; /* Define startup order. */ -#define IPFW_SI_SUB_FIREWALL SI_SUB_PROTO_IFATTACHDOMAIN +#define IPFW_SI_SUB_FIREWALL SI_SUB_PROTO_FIREWALL #define IPFW_MODEVENT_ORDER (SI_ORDER_ANY - 255) /* On boot slot in here. */ #define IPFW_MODULE_ORDER (IPFW_MODEVENT_ORDER + 1) /* A little later. */ #define IPFW_VNET_ORDER (IPFW_MODEVENT_ORDER + 2) /* Later still. */ Index: sys/netpfil/ipfw/ip_fw_nat.c =================================================================== --- sys/netpfil/ipfw/ip_fw_nat.c +++ sys/netpfil/ipfw/ip_fw_nat.c @@ -1213,7 +1213,7 @@ }; /* Define startup order. */ -#define IPFW_NAT_SI_SUB_FIREWALL SI_SUB_PROTO_IFATTACHDOMAIN +#define IPFW_NAT_SI_SUB_FIREWALL SI_SUB_PROTO_FIREWALL #define IPFW_NAT_MODEVENT_ORDER (SI_ORDER_ANY - 128) /* after ipfw */ #define IPFW_NAT_MODULE_ORDER (IPFW_NAT_MODEVENT_ORDER + 1) #define IPFW_NAT_VNET_ORDER (IPFW_NAT_MODEVENT_ORDER + 2) Index: sys/netpfil/pf/pf_ioctl.c =================================================================== --- sys/netpfil/pf/pf_ioctl.c +++ sys/netpfil/pf/pf_ioctl.c @@ -3790,5 +3790,5 @@ 0 }; -DECLARE_MODULE(pf, pf_mod, SI_SUB_PSEUDO, SI_ORDER_FIRST); +DECLARE_MODULE(pf, pf_mod, SI_SUB_PROTO_FIREWALL, SI_ORDER_FIRST); MODULE_VERSION(pf, PF_MODVER);