Page MenuHomeFreeBSD

D22691.id65557.diff
No OneTemporary

D22691.id65557.diff

Index: sys/net/if.h
===================================================================
--- sys/net/if.h
+++ sys/net/if.h
@@ -160,9 +160,11 @@
#define IFF_PPROMISC 0x20000 /* (n) user-requested promisc mode */
#define IFF_MONITOR 0x40000 /* (n) user-requested monitor mode */
#define IFF_STATICARP 0x80000 /* (n) static ARP */
+#define IFF_DETACHING 0x100000 /* (n) interface is detaching */
#define IFF_DYING 0x200000 /* (n) interface is winding down */
#define IFF_RENAMING 0x400000 /* (n) interface is being renamed */
#define IFF_NOGROUP 0x800000 /* (n) interface is not part of any groups */
+#define IFF_VMOVING 0x1000000 /* (n) interface is changing vnets */
/*
Index: sys/net/if.c
===================================================================
--- sys/net/if.c
+++ sys/net/if.c
@@ -262,6 +262,9 @@
static void if_input_default(struct ifnet *, struct mbuf *);
static int if_requestencap_default(struct ifnet *, struct if_encap_req *);
static void if_route(struct ifnet *, int flag, int fam);
+static bool if_busy(struct ifnet *ifp, int setflags);
+static void if_unbusy(struct ifnet *ifp);
+static void if_busy_wait(struct ifnet *ifp);
static int if_setflag(struct ifnet *, int, int, int *, int);
static int if_transmit(struct ifnet *ifp, struct mbuf *m);
static void if_unroute(struct ifnet *, int flag, int fam);
@@ -274,6 +277,7 @@
static int if_detach_internal(struct ifnet *, int, struct if_clone **);
static void if_siocaddmulti(void *, int);
#ifdef VIMAGE
+static int if_vmove_busied(struct ifnet *, struct vnet *);
static int if_vmove(struct ifnet *, struct vnet *);
#endif
@@ -471,10 +475,60 @@
{
struct ifnet *ifp, *nifp;
+ /*
+ * We do a two pass approach here: first, we go through and busy up all
+ * of the interfaces with the ifnet writelock held. This ensures the
+ * list is stable while we determine what still needs to be detached and
+ * busy up the rest. Any that are already attempting to if_detach()
+ * will have been properly marked IFF_DETACHING, and we'll skip over
+ * them here.
+ */
+ IFNET_WLOCK();
+ CK_STAILQ_FOREACH(ifp, &V_ifnet, if_link) {
+ if (ifp->if_home_vnet == ifp->if_vnet)
+ continue;
+ IF_BUSY_LOCK(ifp);
+ if ((ifp->if_flags & IFF_DETACHING) == 0) {
+ /*
+ * If the interface needs to be moved back to the parent
+ * vnet and it's not already in the process of
+ * detaching, go ahead and busy it and mark it as such.
+ *
+ * This guarantees that the interface can't be destroyed
+ * or moved while we're in the process of tearing down
+ * the vnet. If it's already detaching, then it will be
+ * a non-issue as it has to be leaving the vnet and we
+ * can assume it will be gone in due time.
+ */
+ refcount_acquire(&ifp->if_busy_count);
+ ifp->if_flags |= IFF_DETACHING | IFF_VMOVING;
+ }
+ IF_BUSY_UNLOCK(ifp);
+ }
+ IFNET_WUNLOCK();
/* Return all inherited interfaces to their parent vnets. */
CK_STAILQ_FOREACH_SAFE(ifp, &V_ifnet, if_link, nifp) {
- if (ifp->if_home_vnet != ifp->if_vnet)
- if_vmove(ifp, ifp->if_home_vnet);
+ if (ifp->if_home_vnet == ifp->if_vnet)
+ continue;
+ /*
+ * At this point, we have two possible scenarios for every ifnet
+ * still here that needs re-homed: either we're re-homing it
+ * because we marked it as such above, or someone else is
+ * removing it. If we're re-homing it, IFF_VMOVING should
+ * still be set from up above. If someone else is trying to
+ * remove it, then they've unset IFF_VMOVING and are waiting for
+ * us to drop the refcount. In that scenario, we don't need to
+ * bother with an if_vmove() because the waiter is simply going
+ * to drop it as soon as we let go of the reference.
+ */
+ IF_BUSY_LOCK(ifp);
+ if ((ifp->if_flags & IFF_VMOVING) != 0) {
+ IF_BUSY_UNLOCK(ifp);
+ if_vmove_busied(ifp, ifp->if_home_vnet);
+ } else {
+ IF_BUSY_UNLOCK(ifp);
+ }
+ if_unbusy(ifp);
}
}
/*
@@ -561,6 +615,7 @@
}
IF_ADDR_LOCK_INIT(ifp);
+ IF_BUSY_LOCK_INIT(ifp);
TASK_INIT(&ifp->if_linktask, 0, do_link_state_change, ifp);
TASK_INIT(&ifp->if_addmultitask, 0, if_siocaddmulti, ifp);
ifp->if_afdata_initialized = 0;
@@ -607,7 +662,7 @@
if_free_internal(struct ifnet *ifp)
{
- KASSERT((ifp->if_flags & IFF_DYING),
+ KASSERT((ifp->if_flags & IFF_DYING) && (ifp->if_flags & IFF_DETACHING),
("if_free_internal: interface not dying"));
if (if_com_free[ifp->if_alloctype] != NULL)
@@ -619,6 +674,7 @@
#endif /* MAC */
IF_AFDATA_DESTROY(ifp);
IF_ADDR_LOCK_DESTROY(ifp);
+ IF_BUSY_LOCK_DESTROY(ifp);
ifq_delete(&ifp->if_snd);
for (int i = 0; i < IFCOUNTERS; i++)
@@ -1058,6 +1114,22 @@
if_detach(struct ifnet *ifp)
{
+ /*
+ * Grab the ifnet writelock as well as the interface busy lock here to
+ * serialize parts of vnet shutdown with interface detach. If we win
+ * here over vnet_sysuninit_shutdown, then we'll proceed to detach and
+ * vnet_shutdown will be properly set before we do something we regret.
+ * If we lose here, the interface will be busy'd as it gets removed from
+ * the vnet and then we'll swoop in and finish the process.
+ */
+ IFNET_WLOCK();
+ IF_BUSY_LOCK(ifp);
+ ifp->if_flags |= IFF_DETACHING;
+ ifp->if_flags &= ~IFF_VMOVING;
+ IF_BUSY_UNLOCK(ifp);
+ IFNET_WUNLOCK();
+
+ if_busy_wait(ifp);
CURVNET_SET_QUIET(ifp->if_vnet);
if_detach_internal(ifp, 0, NULL);
CURVNET_RESTORE();
@@ -1082,6 +1154,16 @@
struct ifnet *iter;
int found = 0;
+ KASSERT((ifp->if_flags & IFF_DETACHING) != 0,
+ ("%s: detach flag not set", __func__));
+ if (!vmove) {
+ KASSERT(ifp->if_busy_count == 0,
+ ("%s: busy threads still present", __func__));
+ KASSERT((ifp->if_flags & IFF_VMOVING) == 0,
+ ("%s: detach while ifnet changing vnets", __func__));
+ KASSERT((ifp->if_flags & IFF_DYING) == 0,
+ ("%s: out of order detach/free", __func__));
+ }
IFNET_WLOCK();
CK_STAILQ_FOREACH(iter, &V_ifnet, if_link)
if (iter == ifp) {
@@ -1250,15 +1332,8 @@
}
#ifdef VIMAGE
-/*
- * if_vmove() performs a limited version of if_detach() in current
- * vnet and if_attach()es the ifnet to the vnet specified as 2nd arg.
- * An attempt is made to shrink if_index in current vnet, find an
- * unused if_index in target vnet and calls if_grow() if necessary,
- * and finally find an unused if_xname for the target vnet.
- */
static int
-if_vmove(struct ifnet *ifp, struct vnet *new_vnet)
+if_vmove_busied(struct ifnet *ifp, struct vnet *new_vnet)
{
struct if_clone *ifc;
#ifdef DEV_BPF
@@ -1268,7 +1343,7 @@
int rc;
#ifdef DEV_BPF
- /*
+ /*
* if_detach_internal() will call the eventhandler to notify
* interface departure. That will detach if_bpf. We need to
* safe the dlt and hdrlen so we can re-attach it later.
@@ -1283,7 +1358,19 @@
*/
rc = if_detach_internal(ifp, 1, &ifc);
if (rc != 0)
- return (rc);
+ goto out;
+
+ /*
+ * We could defer clearing IFF_DETACHING until the end, but
+ * IFF_VMOVING is sufficient for keeping the interface 'busy' until
+ * death or ioctls are invoked, and this keeps if_flags looking somewhat
+ * correct for what's happening right now- we've exited the detach phase
+ * of if_vmove, and we're now attaching to the new vnet.
+ */
+ IF_BUSY_LOCK(ifp);
+ if ((ifp->if_flags & IFF_VMOVING) != 0)
+ ifp->if_flags &= ~IFF_DETACHING;
+ IF_BUSY_UNLOCK(ifp);
/*
* Unlink the ifnet from ifindex_table[] in current vnet, and shrink
@@ -1327,7 +1414,53 @@
#endif
CURVNET_RESTORE();
- return (0);
+out:
+ /*
+ * Either we failed or the interface is no longer visible in the current
+ * vnet, so we can go ahead and drop IFF_DETACHED and IFF_VMOVING. If
+ * IFF_VMOVING is no longer set, then if_detach() came along and
+ * solidifed the detaching. We may have already cleared IFF_DETACHING
+ * up above, but we're blocked from doing further vnet moves and
+ * if_detach would have cleared IFF_VMOVING, so we can go ahead and
+ * unconditionally clear it -- we'll need to clear it anyways if we're
+ * in the error path.
+ */
+ IF_BUSY_LOCK(ifp);
+ if ((ifp->if_flags & IFF_VMOVING) != 0)
+ ifp->if_flags &= ~(IFF_DETACHING | IFF_VMOVING);
+ IF_BUSY_UNLOCK(ifp);
+ return (rc);
+}
+
+/*
+ * if_vmove() performs a limited version of if_detach() in current
+ * vnet and if_attach()es the ifnet to the vnet specified as 2nd arg.
+ * An attempt is made to shrink if_index in current vnet, find an
+ * unused if_index in target vnet and calls if_grow() if necessary,
+ * and finally find an unused if_xname for the target vnet.
+ *
+ * The caller may indicate that it's already busied thet interface prior
+ * to entry. If it has done so, assume it's ok to proceed with the move.
+ * The caller will handle unbusy.
+ */
+static int
+if_vmove(struct ifnet *ifp, struct vnet *new_vnet)
+{
+ int error;
+
+ /*
+ * Busy the ifnet, taking into account whether this is a vnet shutdown
+ * or not. We don't let detach block us if we're in the middle of
+ * shutdown so that we don't end up with a vnet that still has
+ * interfaces in it; if_detach will specifically handle the case that
+ * the ifnet in question will be getting re-homed as it's shutting down.
+ */
+ if (!if_busy(ifp, IFF_DETACHING | IFF_VMOVING))
+ return (EBUSY);
+
+ error = if_vmove_busied(ifp, new_vnet);
+ if_unbusy(ifp);
+ return (error);
}
/*
@@ -2414,6 +2547,43 @@
return (ifp);
}
+#define IFF_BUSY_FLAGS (IFF_DETACHING | IFF_DYING | IFF_VMOVING)
+
+/*
+ * Busy the interface, if it's not already detaching/dying. The caller may
+ * request, via setflags, flags to set on the ifp while we're still holding the
+ * busy lock if we've succeeded. The caller is responsible for clearing any
+ * temporary flags that were set.
+ */
+static bool
+if_busy(struct ifnet *ifp, int setflags)
+{
+
+ IF_BUSY_LOCK(ifp);
+ if ((ifp->if_flags & IFF_BUSY_FLAGS) != 0) {
+ IF_BUSY_UNLOCK(ifp);
+ return (false);
+ }
+ refcount_acquire(&ifp->if_busy_count);
+ ifp->if_flags |= setflags;
+ IF_BUSY_UNLOCK(ifp);
+ return (true);
+}
+
+static void
+if_unbusy(struct ifnet *ifp)
+{
+
+ refcount_release(&ifp->if_busy_count);
+}
+
+static void
+if_busy_wait(struct ifnet *ifp)
+{
+
+ refcount_wait(&ifp->if_busy_count, "ifbusy", 0);
+}
+
static void *
ifr_buffer_get_buffer(void *data)
{
@@ -2486,8 +2656,8 @@
/*
* Hardware specific interface ioctls.
*/
-int
-ifhwioctl(u_long cmd, struct ifnet *ifp, caddr_t data, struct thread *td)
+static int
+ifhwioctl_internal(u_long cmd, struct ifnet *ifp, caddr_t data, struct thread *td)
{
struct ifreq *ifr;
int error = 0, do_ifup = 0;
@@ -2499,6 +2669,14 @@
struct ifaddr *ifa;
struct sockaddr_dl *sdl;
+ /*
+ * We can't assert that we're not detaching because the procedure is to
+ * mark us as detaching then wait for the different "management" paths
+ * to exit before proceeding with the detach. We should have properly
+ * increased the refcount, at least.
+ */
+ KASSERT(ifp->if_busy_count > 0,
+ ("%s: mgmt refcount not increased", __func__));
ifr = (struct ifreq *)data;
switch (cmd) {
case SIOCGIFINDEX:
@@ -2902,6 +3080,23 @@
return (error);
}
+/*
+ * Hardware specific interface ioctls.
+ */
+int
+ifhwioctl(u_long cmd, struct ifnet *ifp, caddr_t data, struct thread *td)
+{
+ int error;
+
+ if (!if_busy(ifp, 0))
+ return (ENXIO);
+
+ error = ifhwioctl_internal(cmd, ifp, data, td);
+
+ if_unbusy(ifp);
+ return (error);
+}
+
#ifdef COMPAT_FREEBSD32
struct ifconf32 {
int32_t ifc_len;
@@ -2972,7 +3167,7 @@
switch (cmd) {
case SIOCGIFCONF:
error = ifconf(cmd, data);
- goto out_noref;
+ goto out_notbusy;
#ifdef COMPAT_FREEBSD32
case SIOCGIFCONF32:
@@ -2987,7 +3182,7 @@
error = ifconf(SIOCGIFCONF, (void *)&ifc);
if (error == 0)
ifc32->ifc_len = ifc.ifc_len;
- goto out_noref;
+ goto out_notbusy;
}
#endif
}
@@ -3012,7 +3207,7 @@
if (error == 0)
error = if_vmove_reclaim(td, ifr->ifr_name,
ifr->ifr_jid);
- goto out_noref;
+ goto out_notbusy;
#endif
case SIOCIFCREATE:
case SIOCIFCREATE2:
@@ -3021,20 +3216,20 @@
error = if_clone_create(ifr->ifr_name,
sizeof(ifr->ifr_name), cmd == SIOCIFCREATE2 ?
ifr_data_get_ptr(ifr) : NULL);
- goto out_noref;
+ goto out_notbusy;
case SIOCIFDESTROY:
error = priv_check(td, PRIV_NET_IFDESTROY);
if (error == 0)
error = if_clone_destroy(ifr->ifr_name);
- goto out_noref;
+ goto out_notbusy;
case SIOCIFGCLONERS:
error = if_clone_list((struct if_clonereq *)data);
- goto out_noref;
+ goto out_notbusy;
case CASE_IOC_IFGROUPREQ(SIOCGIFGMEMB):
error = if_getgroupmembers((struct ifgroupreq *)data);
- goto out_noref;
+ goto out_notbusy;
#if defined(INET) || defined(INET6)
case SIOCSVH:
@@ -3043,24 +3238,34 @@
error = EPROTONOSUPPORT;
else
error = (*carp_ioctl_p)(ifr, cmd, td);
- goto out_noref;
+ goto out_notbusy;
#endif
}
- ifp = ifunit_ref(ifr->ifr_name);
+ /*
+ * Don't increment the refcount; we'll do administrative refcounting as
+ * we want to ensure that we're no longer busy before
+ * if_detach/if_free may proceed.
+ */
+ ifp = ifunit(ifr->ifr_name);
if (ifp == NULL) {
error = ENXIO;
- goto out_noref;
+ goto out_notbusy;
+ }
+
+ if (!if_busy(ifp, 0)) {
+ error = ENXIO;
+ goto out_notbusy;
}
- error = ifhwioctl(cmd, ifp, data, td);
+ error = ifhwioctl_internal(cmd, ifp, data, td);
if (error != ENOIOCTL)
- goto out_ref;
+ goto out_busy;
oif_flags = ifp->if_flags;
if (so->so_proto == NULL) {
error = EOPNOTSUPP;
- goto out_ref;
+ goto out_busy;
}
/*
@@ -3086,9 +3291,9 @@
#endif
}
-out_ref:
- if_rele(ifp);
-out_noref:
+out_busy:
+ if_unbusy(ifp);
+out_notbusy:
#ifdef COMPAT_FREEBSD32
if (ifmrp != NULL) {
KASSERT((cmd == SIOCGIFMEDIA || cmd == SIOCGIFXMEDIA),
Index: sys/net/if_tuntap.c
===================================================================
--- sys/net/if_tuntap.c
+++ sys/net/if_tuntap.c
@@ -190,9 +190,6 @@
static TAILQ_HEAD(,tuntap_softc) tunhead = TAILQ_HEAD_INITIALIZER(tunhead);
SYSCTL_INT(_debug, OID_AUTO, if_tun_debug, CTLFLAG_RW, &tundebug, 0, "");
-static struct sx tun_ioctl_sx;
-SX_SYSINIT(tun_ioctl_sx, &tun_ioctl_sx, "tun_ioctl");
-
SYSCTL_DECL(_net_link);
/* tun */
static SYSCTL_NODE(_net_link, OID_AUTO, tun, CTLFLAG_RW, 0,
@@ -641,9 +638,6 @@
bpfdetach(TUN2IFP(tp));
if_detach(TUN2IFP(tp));
}
- sx_xlock(&tun_ioctl_sx);
- TUN2IFP(tp)->if_softc = NULL;
- sx_xunlock(&tun_ioctl_sx);
free_unr(tp->tun_drv->unrhdr, TUN2IFP(tp)->if_dunit);
if_free(TUN2IFP(tp));
mtx_destroy(&tp->tun_mtx);
@@ -1011,14 +1005,8 @@
* from dying until we've created the alias (that will then be
* subsequently destroyed).
*/
- sx_xlock(&tun_ioctl_sx);
tp = ifp->if_softc;
- if (tp == NULL) {
- sx_xunlock(&tun_ioctl_sx);
- return;
- }
error = tun_busy(tp);
- sx_xunlock(&tun_ioctl_sx);
if (error != 0)
return;
if (tp->tun_alias != NULL) {
@@ -1324,12 +1312,7 @@
bool l2tun;
ifmr = NULL;
- sx_xlock(&tun_ioctl_sx);
tp = ifp->if_softc;
- if (tp == NULL) {
- error = ENXIO;
- goto bad;
- }
l2tun = (tp->tun_flags & TUN_L2) != 0;
switch(cmd) {
case SIOCGIFSTATUS:
@@ -1391,8 +1374,6 @@
error = EINVAL;
}
}
-bad:
- sx_xunlock(&tun_ioctl_sx);
return (error);
}
Index: sys/net/if_var.h
===================================================================
--- sys/net/if_var.h
+++ sys/net/if_var.h
@@ -423,6 +423,13 @@
struct debugnet_methods *if_debugnet_methods;
struct epoch_context if_epoch_ctx;
+ /*
+ * Bits for serializing management operations (ioctl/destroy/vmove)
+ * and ensuring order.
+ */
+ struct mtx if_busy_lock;
+ u_int if_busy_count;
+
/*
* Spare fields to be added before branching a stable branch, so
* that structure can be enhanced without changing the kernel
@@ -450,6 +457,12 @@
#define NET_EPOCH_WAIT() epoch_wait_preempt(net_epoch_preempt)
#define NET_EPOCH_ASSERT() MPASS(in_epoch(net_epoch_preempt))
+#define IF_BUSY_LOCK_INIT(if) mtx_init(&(if)->if_busy_lock, "if_busy_lock", NULL, MTX_DEF)
+#define IF_BUSY_LOCK_DESTROY(if) mtx_destroy(&(if)->if_busy_lock)
+#define IF_BUSY_LOCK(if) mtx_lock(&(if)->if_busy_lock)
+#define IF_BUSY_UNLOCK(if) mtx_unlock(&(if)->if_busy_lock)
+#define IF_BUSY_LOCK_ASSERT(if) mtx_assert(&(if)->if_busy_lock, MA_OWNED)
+
#ifdef _KERNEL
/* interface link layer address change event */
typedef void (*iflladdr_event_handler_t)(void *, struct ifnet *);

File Metadata

Mime Type
text/plain
Expires
Thu, May 21, 8:57 AM (20 h, 3 m)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
33382938
Default Alt Text
D22691.id65557.diff (16 KB)

Event Timeline