Index: sys/net/if.h =================================================================== --- sys/net/if.h +++ sys/net/if.h @@ -160,6 +160,7 @@ #define IFF_PPROMISC 0x20000 /* (n) user-requested promisc mode */ #define IFF_MONITOR 0x40000 /* (n) user-requested monitor mode */ #define IFF_STATICARP 0x80000 /* (n) static ARP */ +#define IFF_DETACHING 0x100000 /* (n) interface is detaching */ #define IFF_DYING 0x200000 /* (n) interface is winding down */ #define IFF_RENAMING 0x400000 /* (n) interface is being renamed */ #define IFF_NOGROUP 0x800000 /* (n) interface is not part of any groups */ Index: sys/net/if.c =================================================================== --- sys/net/if.c +++ sys/net/if.c @@ -37,6 +37,7 @@ #include "opt_inet.h" #include +#include #include #include #include @@ -263,6 +264,9 @@ static void if_input_default(struct ifnet *, struct mbuf *); static int if_requestencap_default(struct ifnet *, struct if_encap_req *); static void if_route(struct ifnet *, int flag, int fam); +static bool if_busy(struct ifnet *ifp, int setflags); +static void if_unbusy(struct ifnet *ifp); +static void if_busy_wait(struct ifnet *ifp); static int if_setflag(struct ifnet *, int, int, int *, int); static int if_transmit(struct ifnet *ifp, struct mbuf *m); static void if_unroute(struct ifnet *, int flag, int fam); @@ -563,6 +567,7 @@ } IF_ADDR_LOCK_INIT(ifp); + IF_BUSY_LOCK_INIT(ifp); TASK_INIT(&ifp->if_linktask, 0, do_link_state_change, ifp); TASK_INIT(&ifp->if_addmultitask, 0, if_siocaddmulti, ifp); ifp->if_afdata_initialized = 0; @@ -575,6 +580,7 @@ #endif ifq_init(&ifp->if_snd, ifp); + blockcount_init(&ifp->if_busycount); refcount_init(&ifp->if_refcount, 1); /* Index reference. */ for (int i = 0; i < IFCOUNTERS; i++) ifp->if_counters[i] = counter_u64_alloc(M_WAITOK); @@ -609,7 +615,7 @@ if_free_internal(struct ifnet *ifp) { - KASSERT((ifp->if_flags & IFF_DYING), + KASSERT((ifp->if_flags & IFF_DYING) && (ifp->if_flags & IFF_DETACHING), ("if_free_internal: interface not dying")); if (if_com_free[ifp->if_alloctype] != NULL) @@ -621,6 +627,7 @@ #endif /* MAC */ IF_AFDATA_DESTROY(ifp); IF_ADDR_LOCK_DESTROY(ifp); + IF_BUSY_LOCK_DESTROY(ifp); ifq_delete(&ifp->if_snd); for (int i = 0; i < IFCOUNTERS; i++) @@ -1059,6 +1066,24 @@ if_detach(struct ifnet *ifp) { + /* + * Set IFF_DETACHING here so that new ioctl entry is refused. We will + * then proceed to wait until the busy count drops, then proceed with + * detaching. + */ + IFNET_WLOCK(); + IF_BUSY_LOCK(ifp); + ifp->if_flags |= IFF_DETACHING; + + /* + * Unlock the ifnet, then we'll sleep on the busy lock. Once we're + * back, it's safe to drop the busy lock and we can assume nothing else + * pertinent is going on. + */ + IFNET_WUNLOCK(); + if_busy_wait(ifp); + IF_BUSY_UNLOCK(ifp); + CURVNET_SET_QUIET(ifp->if_vnet); if_detach_internal(ifp, 0, NULL); CURVNET_RESTORE(); @@ -1087,6 +1112,19 @@ shutdown = VNET_IS_SHUTTING_DOWN(ifp->if_vnet); #endif + if (!vmove) { + /* + * XXX IFF_DETACHING should be checked for vmove || !vmove, but + * the former case can't be checked until we make vmove() + * coordinate detach. + */ + KASSERT((ifp->if_flags & IFF_DETACHING) != 0, + ("%s: detach flag not set", __func__)); + KASSERT(blockcount_read(&ifp->if_busycount) == 0, + ("%s: busy threads still present", __func__)); + KASSERT((ifp->if_flags & IFF_DYING) == 0, + ("%s: out of order detach/free", __func__)); + } IFNET_WLOCK(); CK_STAILQ_FOREACH(iter, &V_ifnet, if_link) if (iter == ifp) { @@ -2414,6 +2452,44 @@ return (ifp); } +#define IFF_BUSY_FLAGS (IFF_DETACHING | IFF_DYING) + +/* + * Busy the interface, if it's not already detaching/dying. The caller may + * request, via setflags, flags to set on the ifp while we're still holding the + * busy lock if we've succeeded. The caller is responsible for clearing any + * temporary flags that were set. + */ +static bool +if_busy(struct ifnet *ifp, int setflags) +{ + + IF_BUSY_LOCK(ifp); + if ((ifp->if_flags & IFF_BUSY_FLAGS) != 0) { + IF_BUSY_UNLOCK(ifp); + return (false); + } + blockcount_acquire(&ifp->if_busycount, 1); + ifp->if_flags |= setflags; + IF_BUSY_UNLOCK(ifp); + return (true); +} + +static void +if_unbusy(struct ifnet *ifp) +{ + + blockcount_release(&ifp->if_busycount, 1); +} + +static void +if_busy_wait(struct ifnet *ifp) +{ + + IF_BUSY_LOCK_ASSERT(ifp); + blockcount_wait(&ifp->if_busycount, &ifp->if_busy_lock, "ifbusy", 0); +} + void * ifr_buffer_get_buffer(void *data) { @@ -2486,8 +2562,8 @@ /* * Hardware specific interface ioctls. */ -int -ifhwioctl(u_long cmd, struct ifnet *ifp, caddr_t data, struct thread *td) +static int +ifhwioctl_internal(u_long cmd, struct ifnet *ifp, caddr_t data, struct thread *td) { struct ifreq *ifr; int error = 0, do_ifup = 0; @@ -2499,6 +2575,14 @@ struct ifaddr *ifa; struct sockaddr_dl *sdl; + /* + * We can't assert that we're not detaching because the procedure is to + * mark us as detaching then wait for the different "management" paths + * to exit before proceeding with the detach. We should have properly + * increased the refcount, at least. + */ + KASSERT(blockcount_read(&ifp->if_busycount) > 0, + ("%s: mgmt refcount not increased", __func__)); ifr = (struct ifreq *)data; switch (cmd) { case SIOCGIFINDEX: @@ -2902,6 +2986,23 @@ return (error); } +/* + * Hardware specific interface ioctls. + */ +int +ifhwioctl(u_long cmd, struct ifnet *ifp, caddr_t data, struct thread *td) +{ + int error; + + if (!if_busy(ifp, 0)) + return (ENXIO); + + error = ifhwioctl_internal(cmd, ifp, data, td); + + if_unbusy(ifp); + return (error); +} + #ifdef COMPAT_FREEBSD32 struct ifconf32 { int32_t ifc_len; @@ -2976,7 +3077,7 @@ switch (cmd) { case SIOCGIFCONF: error = ifconf(cmd, data); - goto out_noref; + goto out_notbusy; #ifdef COMPAT_FREEBSD32 case SIOCGIFCONF32: @@ -2991,7 +3092,7 @@ error = ifconf(SIOCGIFCONF, (void *)&ifc); if (error == 0) ifc32->ifc_len = ifc.ifc_len; - goto out_noref; + goto out_notbusy; } #endif } @@ -3016,7 +3117,7 @@ if (error == 0) error = if_vmove_reclaim(td, ifr->ifr_name, ifr->ifr_jid); - goto out_noref; + goto out_notbusy; #endif case SIOCIFCREATE: case SIOCIFCREATE2: @@ -3025,20 +3126,20 @@ error = if_clone_create(ifr->ifr_name, sizeof(ifr->ifr_name), cmd == SIOCIFCREATE2 ? ifr_data_get_ptr(ifr) : NULL); - goto out_noref; + goto out_notbusy; case SIOCIFDESTROY: error = priv_check(td, PRIV_NET_IFDESTROY); if (error == 0) error = if_clone_destroy(ifr->ifr_name); - goto out_noref; + goto out_notbusy; case SIOCIFGCLONERS: error = if_clone_list((struct if_clonereq *)data); - goto out_noref; + goto out_notbusy; case CASE_IOC_IFGROUPREQ(SIOCGIFGMEMB): error = if_getgroupmembers((struct ifgroupreq *)data); - goto out_noref; + goto out_notbusy; #if defined(INET) || defined(INET6) case SIOCSVH: @@ -3047,24 +3148,34 @@ error = EPROTONOSUPPORT; else error = (*carp_ioctl_p)(ifr, cmd, td); - goto out_noref; + goto out_notbusy; #endif } - ifp = ifunit_ref(ifr->ifr_name); + /* + * Don't increment the refcount; we'll do administrative refcounting as + * we want to ensure that we're no longer busy before + * if_detach/if_free may proceed. + */ + ifp = ifunit(ifr->ifr_name); if (ifp == NULL) { error = ENXIO; - goto out_noref; + goto out_notbusy; + } + + if (!if_busy(ifp, 0)) { + error = ENXIO; + goto out_notbusy; } - error = ifhwioctl(cmd, ifp, data, td); + error = ifhwioctl_internal(cmd, ifp, data, td); if (error != ENOIOCTL) - goto out_ref; + goto out_busy; oif_flags = ifp->if_flags; if (so->so_proto == NULL) { error = EOPNOTSUPP; - goto out_ref; + goto out_busy; } /* @@ -3090,9 +3201,9 @@ #endif } -out_ref: - if_rele(ifp); -out_noref: +out_busy: + if_unbusy(ifp); +out_notbusy: #ifdef COMPAT_FREEBSD32 if (ifmrp != NULL) { KASSERT((cmd == SIOCGIFMEDIA || cmd == SIOCGIFXMEDIA), Index: sys/net/if_tuntap.c =================================================================== --- sys/net/if_tuntap.c +++ sys/net/if_tuntap.c @@ -190,9 +190,6 @@ static TAILQ_HEAD(,tuntap_softc) tunhead = TAILQ_HEAD_INITIALIZER(tunhead); SYSCTL_INT(_debug, OID_AUTO, if_tun_debug, CTLFLAG_RW, &tundebug, 0, ""); -static struct sx tun_ioctl_sx; -SX_SYSINIT(tun_ioctl_sx, &tun_ioctl_sx, "tun_ioctl"); - SYSCTL_DECL(_net_link); /* tun */ static SYSCTL_NODE(_net_link, OID_AUTO, tun, CTLFLAG_RW | CTLFLAG_MPSAFE, 0, @@ -641,9 +638,6 @@ bpfdetach(TUN2IFP(tp)); if_detach(TUN2IFP(tp)); } - sx_xlock(&tun_ioctl_sx); - TUN2IFP(tp)->if_softc = NULL; - sx_xunlock(&tun_ioctl_sx); free_unr(tp->tun_drv->unrhdr, TUN2IFP(tp)->if_dunit); if_free(TUN2IFP(tp)); mtx_destroy(&tp->tun_mtx); @@ -1011,14 +1005,8 @@ * from dying until we've created the alias (that will then be * subsequently destroyed). */ - sx_xlock(&tun_ioctl_sx); tp = ifp->if_softc; - if (tp == NULL) { - sx_xunlock(&tun_ioctl_sx); - return; - } error = tun_busy(tp); - sx_xunlock(&tun_ioctl_sx); if (error != 0) return; if (tp->tun_alias != NULL) { @@ -1324,12 +1312,7 @@ bool l2tun; ifmr = NULL; - sx_xlock(&tun_ioctl_sx); tp = ifp->if_softc; - if (tp == NULL) { - error = ENXIO; - goto bad; - } l2tun = (tp->tun_flags & TUN_L2) != 0; switch(cmd) { case SIOCGIFSTATUS: @@ -1391,8 +1374,6 @@ error = EINVAL; } } -bad: - sx_xunlock(&tun_ioctl_sx); return (error); } Index: sys/net/if_var.h =================================================================== --- sys/net/if_var.h +++ sys/net/if_var.h @@ -78,6 +78,7 @@ #include #include #endif /* _KERNEL */ +#include #include #include #include @@ -424,6 +425,13 @@ struct debugnet_methods *if_debugnet_methods; struct epoch_context if_epoch_ctx; + /* + * Bits for serializing management operations (ioctl/destroy) + * and ensuring order. + */ + struct mtx if_busy_lock; + blockcount_t if_busycount; + /* * Spare fields to be added before branching a stable branch, so * that structure can be enhanced without changing the kernel @@ -447,6 +455,12 @@ #define IF_ADDR_LOCK_ASSERT(if) MPASS(in_epoch(net_epoch_preempt) || mtx_owned(&(if)->if_addr_lock)) #define IF_ADDR_WLOCK_ASSERT(if) mtx_assert(&(if)->if_addr_lock, MA_OWNED) +#define IF_BUSY_LOCK_INIT(if) mtx_init(&(if)->if_busy_lock, "if_busy_lock", NULL, MTX_DEF) +#define IF_BUSY_LOCK_DESTROY(if) mtx_destroy(&(if)->if_busy_lock) +#define IF_BUSY_LOCK(if) mtx_lock(&(if)->if_busy_lock) +#define IF_BUSY_UNLOCK(if) mtx_unlock(&(if)->if_busy_lock) +#define IF_BUSY_LOCK_ASSERT(if) mtx_assert(&(if)->if_busy_lock, MA_OWNED) + #ifdef _KERNEL /* interface link layer address change event */ typedef void (*iflladdr_event_handler_t)(void *, struct ifnet *);