Page Menu
Home
FreeBSD
Search
Configure Global Search
Log In
Files
F157326865
D22691.id65557.diff
No One
Temporary
Actions
View File
Edit File
Delete File
View Transforms
Subscribe
Mute Notifications
Flag For Later
Award Token
Size
16 KB
Referenced Files
None
Subscribers
None
D22691.id65557.diff
View Options
Index: sys/net/if.h
===================================================================
--- sys/net/if.h
+++ sys/net/if.h
@@ -160,9 +160,11 @@
#define IFF_PPROMISC 0x20000 /* (n) user-requested promisc mode */
#define IFF_MONITOR 0x40000 /* (n) user-requested monitor mode */
#define IFF_STATICARP 0x80000 /* (n) static ARP */
+#define IFF_DETACHING 0x100000 /* (n) interface is detaching */
#define IFF_DYING 0x200000 /* (n) interface is winding down */
#define IFF_RENAMING 0x400000 /* (n) interface is being renamed */
#define IFF_NOGROUP 0x800000 /* (n) interface is not part of any groups */
+#define IFF_VMOVING 0x1000000 /* (n) interface is changing vnets */
/*
Index: sys/net/if.c
===================================================================
--- sys/net/if.c
+++ sys/net/if.c
@@ -262,6 +262,9 @@
static void if_input_default(struct ifnet *, struct mbuf *);
static int if_requestencap_default(struct ifnet *, struct if_encap_req *);
static void if_route(struct ifnet *, int flag, int fam);
+static bool if_busy(struct ifnet *ifp, int setflags);
+static void if_unbusy(struct ifnet *ifp);
+static void if_busy_wait(struct ifnet *ifp);
static int if_setflag(struct ifnet *, int, int, int *, int);
static int if_transmit(struct ifnet *ifp, struct mbuf *m);
static void if_unroute(struct ifnet *, int flag, int fam);
@@ -274,6 +277,7 @@
static int if_detach_internal(struct ifnet *, int, struct if_clone **);
static void if_siocaddmulti(void *, int);
#ifdef VIMAGE
+static int if_vmove_busied(struct ifnet *, struct vnet *);
static int if_vmove(struct ifnet *, struct vnet *);
#endif
@@ -471,10 +475,60 @@
{
struct ifnet *ifp, *nifp;
+ /*
+ * We do a two pass approach here: first, we go through and busy up all
+ * of the interfaces with the ifnet writelock held. This ensures the
+ * list is stable while we determine what still needs to be detached and
+ * busy up the rest. Any that are already attempting to if_detach()
+ * will have been properly marked IFF_DETACHING, and we'll skip over
+ * them here.
+ */
+ IFNET_WLOCK();
+ CK_STAILQ_FOREACH(ifp, &V_ifnet, if_link) {
+ if (ifp->if_home_vnet == ifp->if_vnet)
+ continue;
+ IF_BUSY_LOCK(ifp);
+ if ((ifp->if_flags & IFF_DETACHING) == 0) {
+ /*
+ * If the interface needs to be moved back to the parent
+ * vnet and it's not already in the process of
+ * detaching, go ahead and busy it and mark it as such.
+ *
+ * This guarantees that the interface can't be destroyed
+ * or moved while we're in the process of tearing down
+ * the vnet. If it's already detaching, then it will be
+ * a non-issue as it has to be leaving the vnet and we
+ * can assume it will be gone in due time.
+ */
+ refcount_acquire(&ifp->if_busy_count);
+ ifp->if_flags |= IFF_DETACHING | IFF_VMOVING;
+ }
+ IF_BUSY_UNLOCK(ifp);
+ }
+ IFNET_WUNLOCK();
/* Return all inherited interfaces to their parent vnets. */
CK_STAILQ_FOREACH_SAFE(ifp, &V_ifnet, if_link, nifp) {
- if (ifp->if_home_vnet != ifp->if_vnet)
- if_vmove(ifp, ifp->if_home_vnet);
+ if (ifp->if_home_vnet == ifp->if_vnet)
+ continue;
+ /*
+ * At this point, we have two possible scenarios for every ifnet
+ * still here that needs re-homed: either we're re-homing it
+ * because we marked it as such above, or someone else is
+ * removing it. If we're re-homing it, IFF_VMOVING should
+ * still be set from up above. If someone else is trying to
+ * remove it, then they've unset IFF_VMOVING and are waiting for
+ * us to drop the refcount. In that scenario, we don't need to
+ * bother with an if_vmove() because the waiter is simply going
+ * to drop it as soon as we let go of the reference.
+ */
+ IF_BUSY_LOCK(ifp);
+ if ((ifp->if_flags & IFF_VMOVING) != 0) {
+ IF_BUSY_UNLOCK(ifp);
+ if_vmove_busied(ifp, ifp->if_home_vnet);
+ } else {
+ IF_BUSY_UNLOCK(ifp);
+ }
+ if_unbusy(ifp);
}
}
/*
@@ -561,6 +615,7 @@
}
IF_ADDR_LOCK_INIT(ifp);
+ IF_BUSY_LOCK_INIT(ifp);
TASK_INIT(&ifp->if_linktask, 0, do_link_state_change, ifp);
TASK_INIT(&ifp->if_addmultitask, 0, if_siocaddmulti, ifp);
ifp->if_afdata_initialized = 0;
@@ -607,7 +662,7 @@
if_free_internal(struct ifnet *ifp)
{
- KASSERT((ifp->if_flags & IFF_DYING),
+ KASSERT((ifp->if_flags & IFF_DYING) && (ifp->if_flags & IFF_DETACHING),
("if_free_internal: interface not dying"));
if (if_com_free[ifp->if_alloctype] != NULL)
@@ -619,6 +674,7 @@
#endif /* MAC */
IF_AFDATA_DESTROY(ifp);
IF_ADDR_LOCK_DESTROY(ifp);
+ IF_BUSY_LOCK_DESTROY(ifp);
ifq_delete(&ifp->if_snd);
for (int i = 0; i < IFCOUNTERS; i++)
@@ -1058,6 +1114,22 @@
if_detach(struct ifnet *ifp)
{
+ /*
+ * Grab the ifnet writelock as well as the interface busy lock here to
+ * serialize parts of vnet shutdown with interface detach. If we win
+ * here over vnet_sysuninit_shutdown, then we'll proceed to detach and
+ * vnet_shutdown will be properly set before we do something we regret.
+ * If we lose here, the interface will be busy'd as it gets removed from
+ * the vnet and then we'll swoop in and finish the process.
+ */
+ IFNET_WLOCK();
+ IF_BUSY_LOCK(ifp);
+ ifp->if_flags |= IFF_DETACHING;
+ ifp->if_flags &= ~IFF_VMOVING;
+ IF_BUSY_UNLOCK(ifp);
+ IFNET_WUNLOCK();
+
+ if_busy_wait(ifp);
CURVNET_SET_QUIET(ifp->if_vnet);
if_detach_internal(ifp, 0, NULL);
CURVNET_RESTORE();
@@ -1082,6 +1154,16 @@
struct ifnet *iter;
int found = 0;
+ KASSERT((ifp->if_flags & IFF_DETACHING) != 0,
+ ("%s: detach flag not set", __func__));
+ if (!vmove) {
+ KASSERT(ifp->if_busy_count == 0,
+ ("%s: busy threads still present", __func__));
+ KASSERT((ifp->if_flags & IFF_VMOVING) == 0,
+ ("%s: detach while ifnet changing vnets", __func__));
+ KASSERT((ifp->if_flags & IFF_DYING) == 0,
+ ("%s: out of order detach/free", __func__));
+ }
IFNET_WLOCK();
CK_STAILQ_FOREACH(iter, &V_ifnet, if_link)
if (iter == ifp) {
@@ -1250,15 +1332,8 @@
}
#ifdef VIMAGE
-/*
- * if_vmove() performs a limited version of if_detach() in current
- * vnet and if_attach()es the ifnet to the vnet specified as 2nd arg.
- * An attempt is made to shrink if_index in current vnet, find an
- * unused if_index in target vnet and calls if_grow() if necessary,
- * and finally find an unused if_xname for the target vnet.
- */
static int
-if_vmove(struct ifnet *ifp, struct vnet *new_vnet)
+if_vmove_busied(struct ifnet *ifp, struct vnet *new_vnet)
{
struct if_clone *ifc;
#ifdef DEV_BPF
@@ -1268,7 +1343,7 @@
int rc;
#ifdef DEV_BPF
- /*
+ /*
* if_detach_internal() will call the eventhandler to notify
* interface departure. That will detach if_bpf. We need to
* safe the dlt and hdrlen so we can re-attach it later.
@@ -1283,7 +1358,19 @@
*/
rc = if_detach_internal(ifp, 1, &ifc);
if (rc != 0)
- return (rc);
+ goto out;
+
+ /*
+ * We could defer clearing IFF_DETACHING until the end, but
+ * IFF_VMOVING is sufficient for keeping the interface 'busy' until
+ * death or ioctls are invoked, and this keeps if_flags looking somewhat
+ * correct for what's happening right now- we've exited the detach phase
+ * of if_vmove, and we're now attaching to the new vnet.
+ */
+ IF_BUSY_LOCK(ifp);
+ if ((ifp->if_flags & IFF_VMOVING) != 0)
+ ifp->if_flags &= ~IFF_DETACHING;
+ IF_BUSY_UNLOCK(ifp);
/*
* Unlink the ifnet from ifindex_table[] in current vnet, and shrink
@@ -1327,7 +1414,53 @@
#endif
CURVNET_RESTORE();
- return (0);
+out:
+ /*
+ * Either we failed or the interface is no longer visible in the current
+ * vnet, so we can go ahead and drop IFF_DETACHED and IFF_VMOVING. If
+ * IFF_VMOVING is no longer set, then if_detach() came along and
+ * solidifed the detaching. We may have already cleared IFF_DETACHING
+ * up above, but we're blocked from doing further vnet moves and
+ * if_detach would have cleared IFF_VMOVING, so we can go ahead and
+ * unconditionally clear it -- we'll need to clear it anyways if we're
+ * in the error path.
+ */
+ IF_BUSY_LOCK(ifp);
+ if ((ifp->if_flags & IFF_VMOVING) != 0)
+ ifp->if_flags &= ~(IFF_DETACHING | IFF_VMOVING);
+ IF_BUSY_UNLOCK(ifp);
+ return (rc);
+}
+
+/*
+ * if_vmove() performs a limited version of if_detach() in current
+ * vnet and if_attach()es the ifnet to the vnet specified as 2nd arg.
+ * An attempt is made to shrink if_index in current vnet, find an
+ * unused if_index in target vnet and calls if_grow() if necessary,
+ * and finally find an unused if_xname for the target vnet.
+ *
+ * The caller may indicate that it's already busied thet interface prior
+ * to entry. If it has done so, assume it's ok to proceed with the move.
+ * The caller will handle unbusy.
+ */
+static int
+if_vmove(struct ifnet *ifp, struct vnet *new_vnet)
+{
+ int error;
+
+ /*
+ * Busy the ifnet, taking into account whether this is a vnet shutdown
+ * or not. We don't let detach block us if we're in the middle of
+ * shutdown so that we don't end up with a vnet that still has
+ * interfaces in it; if_detach will specifically handle the case that
+ * the ifnet in question will be getting re-homed as it's shutting down.
+ */
+ if (!if_busy(ifp, IFF_DETACHING | IFF_VMOVING))
+ return (EBUSY);
+
+ error = if_vmove_busied(ifp, new_vnet);
+ if_unbusy(ifp);
+ return (error);
}
/*
@@ -2414,6 +2547,43 @@
return (ifp);
}
+#define IFF_BUSY_FLAGS (IFF_DETACHING | IFF_DYING | IFF_VMOVING)
+
+/*
+ * Busy the interface, if it's not already detaching/dying. The caller may
+ * request, via setflags, flags to set on the ifp while we're still holding the
+ * busy lock if we've succeeded. The caller is responsible for clearing any
+ * temporary flags that were set.
+ */
+static bool
+if_busy(struct ifnet *ifp, int setflags)
+{
+
+ IF_BUSY_LOCK(ifp);
+ if ((ifp->if_flags & IFF_BUSY_FLAGS) != 0) {
+ IF_BUSY_UNLOCK(ifp);
+ return (false);
+ }
+ refcount_acquire(&ifp->if_busy_count);
+ ifp->if_flags |= setflags;
+ IF_BUSY_UNLOCK(ifp);
+ return (true);
+}
+
+static void
+if_unbusy(struct ifnet *ifp)
+{
+
+ refcount_release(&ifp->if_busy_count);
+}
+
+static void
+if_busy_wait(struct ifnet *ifp)
+{
+
+ refcount_wait(&ifp->if_busy_count, "ifbusy", 0);
+}
+
static void *
ifr_buffer_get_buffer(void *data)
{
@@ -2486,8 +2656,8 @@
/*
* Hardware specific interface ioctls.
*/
-int
-ifhwioctl(u_long cmd, struct ifnet *ifp, caddr_t data, struct thread *td)
+static int
+ifhwioctl_internal(u_long cmd, struct ifnet *ifp, caddr_t data, struct thread *td)
{
struct ifreq *ifr;
int error = 0, do_ifup = 0;
@@ -2499,6 +2669,14 @@
struct ifaddr *ifa;
struct sockaddr_dl *sdl;
+ /*
+ * We can't assert that we're not detaching because the procedure is to
+ * mark us as detaching then wait for the different "management" paths
+ * to exit before proceeding with the detach. We should have properly
+ * increased the refcount, at least.
+ */
+ KASSERT(ifp->if_busy_count > 0,
+ ("%s: mgmt refcount not increased", __func__));
ifr = (struct ifreq *)data;
switch (cmd) {
case SIOCGIFINDEX:
@@ -2902,6 +3080,23 @@
return (error);
}
+/*
+ * Hardware specific interface ioctls.
+ */
+int
+ifhwioctl(u_long cmd, struct ifnet *ifp, caddr_t data, struct thread *td)
+{
+ int error;
+
+ if (!if_busy(ifp, 0))
+ return (ENXIO);
+
+ error = ifhwioctl_internal(cmd, ifp, data, td);
+
+ if_unbusy(ifp);
+ return (error);
+}
+
#ifdef COMPAT_FREEBSD32
struct ifconf32 {
int32_t ifc_len;
@@ -2972,7 +3167,7 @@
switch (cmd) {
case SIOCGIFCONF:
error = ifconf(cmd, data);
- goto out_noref;
+ goto out_notbusy;
#ifdef COMPAT_FREEBSD32
case SIOCGIFCONF32:
@@ -2987,7 +3182,7 @@
error = ifconf(SIOCGIFCONF, (void *)&ifc);
if (error == 0)
ifc32->ifc_len = ifc.ifc_len;
- goto out_noref;
+ goto out_notbusy;
}
#endif
}
@@ -3012,7 +3207,7 @@
if (error == 0)
error = if_vmove_reclaim(td, ifr->ifr_name,
ifr->ifr_jid);
- goto out_noref;
+ goto out_notbusy;
#endif
case SIOCIFCREATE:
case SIOCIFCREATE2:
@@ -3021,20 +3216,20 @@
error = if_clone_create(ifr->ifr_name,
sizeof(ifr->ifr_name), cmd == SIOCIFCREATE2 ?
ifr_data_get_ptr(ifr) : NULL);
- goto out_noref;
+ goto out_notbusy;
case SIOCIFDESTROY:
error = priv_check(td, PRIV_NET_IFDESTROY);
if (error == 0)
error = if_clone_destroy(ifr->ifr_name);
- goto out_noref;
+ goto out_notbusy;
case SIOCIFGCLONERS:
error = if_clone_list((struct if_clonereq *)data);
- goto out_noref;
+ goto out_notbusy;
case CASE_IOC_IFGROUPREQ(SIOCGIFGMEMB):
error = if_getgroupmembers((struct ifgroupreq *)data);
- goto out_noref;
+ goto out_notbusy;
#if defined(INET) || defined(INET6)
case SIOCSVH:
@@ -3043,24 +3238,34 @@
error = EPROTONOSUPPORT;
else
error = (*carp_ioctl_p)(ifr, cmd, td);
- goto out_noref;
+ goto out_notbusy;
#endif
}
- ifp = ifunit_ref(ifr->ifr_name);
+ /*
+ * Don't increment the refcount; we'll do administrative refcounting as
+ * we want to ensure that we're no longer busy before
+ * if_detach/if_free may proceed.
+ */
+ ifp = ifunit(ifr->ifr_name);
if (ifp == NULL) {
error = ENXIO;
- goto out_noref;
+ goto out_notbusy;
+ }
+
+ if (!if_busy(ifp, 0)) {
+ error = ENXIO;
+ goto out_notbusy;
}
- error = ifhwioctl(cmd, ifp, data, td);
+ error = ifhwioctl_internal(cmd, ifp, data, td);
if (error != ENOIOCTL)
- goto out_ref;
+ goto out_busy;
oif_flags = ifp->if_flags;
if (so->so_proto == NULL) {
error = EOPNOTSUPP;
- goto out_ref;
+ goto out_busy;
}
/*
@@ -3086,9 +3291,9 @@
#endif
}
-out_ref:
- if_rele(ifp);
-out_noref:
+out_busy:
+ if_unbusy(ifp);
+out_notbusy:
#ifdef COMPAT_FREEBSD32
if (ifmrp != NULL) {
KASSERT((cmd == SIOCGIFMEDIA || cmd == SIOCGIFXMEDIA),
Index: sys/net/if_tuntap.c
===================================================================
--- sys/net/if_tuntap.c
+++ sys/net/if_tuntap.c
@@ -190,9 +190,6 @@
static TAILQ_HEAD(,tuntap_softc) tunhead = TAILQ_HEAD_INITIALIZER(tunhead);
SYSCTL_INT(_debug, OID_AUTO, if_tun_debug, CTLFLAG_RW, &tundebug, 0, "");
-static struct sx tun_ioctl_sx;
-SX_SYSINIT(tun_ioctl_sx, &tun_ioctl_sx, "tun_ioctl");
-
SYSCTL_DECL(_net_link);
/* tun */
static SYSCTL_NODE(_net_link, OID_AUTO, tun, CTLFLAG_RW, 0,
@@ -641,9 +638,6 @@
bpfdetach(TUN2IFP(tp));
if_detach(TUN2IFP(tp));
}
- sx_xlock(&tun_ioctl_sx);
- TUN2IFP(tp)->if_softc = NULL;
- sx_xunlock(&tun_ioctl_sx);
free_unr(tp->tun_drv->unrhdr, TUN2IFP(tp)->if_dunit);
if_free(TUN2IFP(tp));
mtx_destroy(&tp->tun_mtx);
@@ -1011,14 +1005,8 @@
* from dying until we've created the alias (that will then be
* subsequently destroyed).
*/
- sx_xlock(&tun_ioctl_sx);
tp = ifp->if_softc;
- if (tp == NULL) {
- sx_xunlock(&tun_ioctl_sx);
- return;
- }
error = tun_busy(tp);
- sx_xunlock(&tun_ioctl_sx);
if (error != 0)
return;
if (tp->tun_alias != NULL) {
@@ -1324,12 +1312,7 @@
bool l2tun;
ifmr = NULL;
- sx_xlock(&tun_ioctl_sx);
tp = ifp->if_softc;
- if (tp == NULL) {
- error = ENXIO;
- goto bad;
- }
l2tun = (tp->tun_flags & TUN_L2) != 0;
switch(cmd) {
case SIOCGIFSTATUS:
@@ -1391,8 +1374,6 @@
error = EINVAL;
}
}
-bad:
- sx_xunlock(&tun_ioctl_sx);
return (error);
}
Index: sys/net/if_var.h
===================================================================
--- sys/net/if_var.h
+++ sys/net/if_var.h
@@ -423,6 +423,13 @@
struct debugnet_methods *if_debugnet_methods;
struct epoch_context if_epoch_ctx;
+ /*
+ * Bits for serializing management operations (ioctl/destroy/vmove)
+ * and ensuring order.
+ */
+ struct mtx if_busy_lock;
+ u_int if_busy_count;
+
/*
* Spare fields to be added before branching a stable branch, so
* that structure can be enhanced without changing the kernel
@@ -450,6 +457,12 @@
#define NET_EPOCH_WAIT() epoch_wait_preempt(net_epoch_preempt)
#define NET_EPOCH_ASSERT() MPASS(in_epoch(net_epoch_preempt))
+#define IF_BUSY_LOCK_INIT(if) mtx_init(&(if)->if_busy_lock, "if_busy_lock", NULL, MTX_DEF)
+#define IF_BUSY_LOCK_DESTROY(if) mtx_destroy(&(if)->if_busy_lock)
+#define IF_BUSY_LOCK(if) mtx_lock(&(if)->if_busy_lock)
+#define IF_BUSY_UNLOCK(if) mtx_unlock(&(if)->if_busy_lock)
+#define IF_BUSY_LOCK_ASSERT(if) mtx_assert(&(if)->if_busy_lock, MA_OWNED)
+
#ifdef _KERNEL
/* interface link layer address change event */
typedef void (*iflladdr_event_handler_t)(void *, struct ifnet *);
File Metadata
Details
Attached
Mime Type
text/plain
Expires
Thu, May 21, 8:57 AM (20 h, 3 m)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
33382938
Default Alt Text
D22691.id65557.diff (16 KB)
Attached To
Mode
D22691: ifnet: drain+halt ioctl operations on detach
Attached
Detach File
Event Timeline
Log In to Comment