Page MenuHomeFreeBSD

D22691.id65512.diff
No OneTemporary

D22691.id65512.diff

Index: sys/net/if.h
===================================================================
--- sys/net/if.h
+++ sys/net/if.h
@@ -160,6 +160,7 @@
#define IFF_PPROMISC 0x20000 /* (n) user-requested promisc mode */
#define IFF_MONITOR 0x40000 /* (n) user-requested monitor mode */
#define IFF_STATICARP 0x80000 /* (n) static ARP */
+#define IFF_DETACHING 0x100000 /* (n) interface is detaching */
#define IFF_DYING 0x200000 /* (n) interface is winding down */
#define IFF_RENAMING 0x400000 /* (n) interface is being renamed */
#define IFF_NOGROUP 0x800000 /* (n) interface is not part of any groups */
Index: sys/net/if.c
===================================================================
--- sys/net/if.c
+++ sys/net/if.c
@@ -274,7 +274,7 @@
static int if_detach_internal(struct ifnet *, int, struct if_clone **);
static void if_siocaddmulti(void *, int);
#ifdef VIMAGE
-static void if_vmove(struct ifnet *, struct vnet *);
+static void if_vmove(struct ifnet *, struct vnet *, bool shutdown);
#endif
#ifdef INET6
@@ -474,7 +474,7 @@
/* Return all inherited interfaces to their parent vnets. */
CK_STAILQ_FOREACH_SAFE(ifp, &V_ifnet, if_link, nifp) {
if (ifp->if_home_vnet != ifp->if_vnet)
- if_vmove(ifp, ifp->if_home_vnet);
+ if_vmove(ifp, ifp->if_home_vnet, true);
}
}
VNET_SYSUNINIT(vnet_if_return, SI_SUB_VNET_DONE, SI_ORDER_ANY,
@@ -557,6 +557,8 @@
}
IF_ADDR_LOCK_INIT(ifp);
+ IF_MGMT_LOCK_INIT(ifp);
+ IF_MGMT_CV_INIT(ifp);
TASK_INIT(&ifp->if_linktask, 0, do_link_state_change, ifp);
TASK_INIT(&ifp->if_addmultitask, 0, if_siocaddmulti, ifp);
ifp->if_afdata_initialized = 0;
@@ -603,6 +605,10 @@
if_free_internal(struct ifnet *ifp)
{
+ /*
+ * XXX Should we also assert that it's been properly detached, a la
+ * IFF_DETACHED? It seems likely.
+ */
KASSERT((ifp->if_flags & IFF_DYING),
("if_free_internal: interface not dying"));
@@ -615,6 +621,8 @@
#endif /* MAC */
IF_AFDATA_DESTROY(ifp);
IF_ADDR_LOCK_DESTROY(ifp);
+ IF_MGMT_LOCK_DESTROY(ifp);
+ IF_MGMT_CV_DESTROY(ifp);
ifq_delete(&ifp->if_snd);
for (int i = 0; i < IFCOUNTERS; i++)
@@ -1054,9 +1062,32 @@
if_detach(struct ifnet *ifp)
{
+#ifdef VIMAGE
+#define ifp_waits_for_shutdown(ifp) \
+ (((ifp)->if_vnet != (ifp)->if_home_vnet) && (ifp)->if_vnet->vnet_shutdown)
+#else
+#define ifp_waits_for_shutdown(ifp) false
+#endif
+
+ /*
+ * We've marked ourselves as detaching, now we wait for the various
+ * management functions (e.g. ioctl handling, vmove) to complete before
+ * we proceed. If the vnet the interface is currently in is in the
+ * shutdown process and it's not already in its home vnet, we wait for
+ * the interface's vnet to stabilize before we proceed with the final
+ * detach. If the ifnet's home vnet is the one that is getting
+ * shutdown, it's not going to change vnets and the expectation is
+ * likely that we're about to detach+free it.
+ */
+ IF_MGMT_LOCK(ifp);
+ ifp->if_flags |= IFF_DETACHING;
+ while (ifp->if_mgmt_refcount > 0 || ifp_waits_for_shutdown(ifp))
+ IF_MGMT_CV_WAIT(ifp);
+ IF_MGMT_UNLOCK(ifp);
CURVNET_SET_QUIET(ifp->if_vnet);
if_detach_internal(ifp, 0, NULL);
CURVNET_RESTORE();
+#undef ifp_waits_for_shutdown
}
/*
@@ -1077,11 +1108,14 @@
struct domain *dp;
struct ifnet *iter;
int found = 0;
-#ifdef VIMAGE
- bool shutdown;
- shutdown = ifp->if_vnet->vnet_shutdown;
-#endif
+ if (!vmove) {
+ KASSERT(ifp->if_mgmt_refcount == 0 &&
+ (ifp->if_flags & IFF_DETACHING) != 0,
+ ("%s: improper detach, dangling mgmt refs", __func__));
+ KASSERT((ifp->if_flags & IFF_DYING) == 0,
+ ("%s: out of order detach/free", __func__));
+ }
IFNET_WLOCK();
CK_STAILQ_FOREACH(iter, &V_ifnet, if_link)
if (iter == ifp) {
@@ -1149,9 +1183,11 @@
#ifdef VIMAGE
/*
* On VNET shutdown abort here as the stack teardown will do all
- * the work top-down for us.
+ * the work top-down for us. XXX This is still racy; nothing is
+ * stopping a vnet from being shutdown while we're in the middle of
+ * detaching but somewhere after this point.
*/
- if (shutdown) {
+ if (ifp->if_vnet->vnet_shutdown) {
/* Give interface users the chance to clean up. */
EVENTHANDLER_INVOKE(ifnet_departure_event, ifp);
@@ -1258,7 +1294,7 @@
* and finally find an unused if_xname for the target vnet.
*/
static void
-if_vmove(struct ifnet *ifp, struct vnet *new_vnet)
+if_vmove(struct ifnet *ifp, struct vnet *new_vnet, bool shutdown)
{
struct if_clone *ifc;
#ifdef DEV_BPF
@@ -1267,6 +1303,19 @@
void *old;
int rc;
+ /*
+ * We can't hold the lock across attach/detach; bump the refcount and
+ * drop it. if_detach will stall until we broadcast at the end. We
+ * don't let detach block us if we're in the middle of shutdown so that
+ * we don't end up with a vnet that still has interfaces in it.
+ */
+ IF_MGMT_LOCK(ifp);
+ if (!shutdown && (ifp->if_flags & IFF_DETACHING) != 0) {
+ IF_MGMT_UNLOCK(ifp);
+ return;
+ }
+ refcount_acquire(&ifp->if_mgmt_refcount);
+ IF_MGMT_UNLOCK(ifp);
#ifdef DEV_BPF
/*
* if_detach_internal() will call the eventhandler to notify
@@ -1283,7 +1332,7 @@
*/
rc = if_detach_internal(ifp, 1, &ifc);
if (rc != 0)
- return;
+ goto out;
/*
* Unlink the ifnet from ifindex_table[] in current vnet, and shrink
@@ -1327,6 +1376,11 @@
#endif
CURVNET_RESTORE();
+out:
+ IF_MGMT_LOCK(ifp);
+ if (refcount_release(&ifp->if_mgmt_refcount))
+ IF_MGMT_CV_BROADCAST(ifp);
+ IF_MGMT_UNLOCK(ifp);
}
/*
@@ -1372,7 +1426,7 @@
CURVNET_RESTORE();
/* Move the interface into the child jail/vnet. */
- if_vmove(ifp, pr->pr_vnet);
+ if_vmove(ifp, pr->pr_vnet, false);
/* Report the new if_xname back to the userland. */
sprintf(ifname, "%s", ifp->if_xname);
@@ -1422,7 +1476,7 @@
}
/* Get interface back from child jail/vnet. */
- if_vmove(ifp, vnet_dst);
+ if_vmove(ifp, vnet_dst, false);
CURVNET_RESTORE();
/* Report the new if_xname back to the userland. */
@@ -2481,8 +2535,8 @@
/*
* Hardware specific interface ioctls.
*/
-int
-ifhwioctl(u_long cmd, struct ifnet *ifp, caddr_t data, struct thread *td)
+static int
+ifhwioctl_internal(u_long cmd, struct ifnet *ifp, caddr_t data, struct thread *td)
{
struct ifreq *ifr;
int error = 0, do_ifup = 0;
@@ -2494,6 +2548,14 @@
struct ifaddr *ifa;
struct sockaddr_dl *sdl;
+ /*
+ * We can't assert that we're not detaching because the procedure is to
+ * mark us as detaching then wait for the different "management" paths
+ * to exit before proceeding with the detach. We should have properly
+ * increased the refcount, at least.
+ */
+ KASSERT(ifp->if_mgmt_refcount > 0,
+ ("%s: mgmt refcount not increased", __func__));
ifr = (struct ifreq *)data;
switch (cmd) {
case SIOCGIFINDEX:
@@ -2897,6 +2959,35 @@
return (error);
}
+/*
+ * Hardware specific interface ioctls.
+ */
+int
+ifhwioctl(u_long cmd, struct ifnet *ifp, caddr_t data, struct thread *td)
+{
+ int error;
+
+ /*
+ * Don't attempt to do anything on a detaching interface.
+ */
+ IF_MGMT_LOCK(ifp);
+ if ((ifp->if_flags & IFF_DETACHING) != 0) {
+ IF_MGMT_UNLOCK(ifp);
+ return (ENXIO);
+ }
+ refcount_acquire(&ifp->if_mgmt_refcount);
+ IF_MGMT_UNLOCK(ifp);
+
+ error = ifhwioctl_internal(cmd, ifp, data, td);
+
+ IF_MGMT_LOCK(ifp);
+ if (refcount_release(&ifp->if_mgmt_refcount))
+ IF_MGMT_CV_BROADCAST(ifp);
+ IF_MGMT_UNLOCK(ifp);
+ return (error);
+}
+
+
#ifdef COMPAT_FREEBSD32
struct ifconf32 {
int32_t ifc_len;
@@ -3042,13 +3133,30 @@
#endif
}
- ifp = ifunit_ref(ifr->ifr_name);
+ /*
+ * Don't increment the refcount; we'll do administrative refcounting as
+ * we want to ensure that we're out of these management paths before
+ * if_detach/if_free may proceed.
+ */
+ ifp = ifunit(ifr->ifr_name);
if (ifp == NULL) {
error = ENXIO;
goto out_noref;
}
- error = ifhwioctl(cmd, ifp, data, td);
+ /*
+ * Don't attempt to do anything on a detaching or dying interface.
+ */
+ IF_MGMT_LOCK(ifp);
+ if ((ifp->if_flags & (IFF_DETACHING | IFF_DYING)) != 0) {
+ IF_MGMT_UNLOCK(ifp);
+ error = ENXIO;
+ goto out_noref;
+ }
+ refcount_acquire(&ifp->if_mgmt_refcount);
+ IF_MGMT_UNLOCK(ifp);
+
+ error = ifhwioctl_internal(cmd, ifp, data, td);
if (error != ENOIOCTL)
goto out_ref;
@@ -3082,7 +3190,10 @@
}
out_ref:
- if_rele(ifp);
+ IF_MGMT_LOCK(ifp);
+ if (refcount_release(&ifp->if_mgmt_refcount))
+ IF_MGMT_CV_BROADCAST(ifp);
+ IF_MGMT_UNLOCK(ifp);
out_noref:
#ifdef COMPAT_FREEBSD32
if (ifmrp != NULL) {
Index: sys/net/if_tuntap.c
===================================================================
--- sys/net/if_tuntap.c
+++ sys/net/if_tuntap.c
@@ -190,9 +190,6 @@
static TAILQ_HEAD(,tuntap_softc) tunhead = TAILQ_HEAD_INITIALIZER(tunhead);
SYSCTL_INT(_debug, OID_AUTO, if_tun_debug, CTLFLAG_RW, &tundebug, 0, "");
-static struct sx tun_ioctl_sx;
-SX_SYSINIT(tun_ioctl_sx, &tun_ioctl_sx, "tun_ioctl");
-
SYSCTL_DECL(_net_link);
/* tun */
static SYSCTL_NODE(_net_link, OID_AUTO, tun, CTLFLAG_RW, 0,
@@ -641,9 +638,6 @@
bpfdetach(TUN2IFP(tp));
if_detach(TUN2IFP(tp));
}
- sx_xlock(&tun_ioctl_sx);
- TUN2IFP(tp)->if_softc = NULL;
- sx_xunlock(&tun_ioctl_sx);
free_unr(tp->tun_drv->unrhdr, TUN2IFP(tp)->if_dunit);
if_free(TUN2IFP(tp));
mtx_destroy(&tp->tun_mtx);
@@ -1011,14 +1005,8 @@
* from dying until we've created the alias (that will then be
* subsequently destroyed).
*/
- sx_xlock(&tun_ioctl_sx);
tp = ifp->if_softc;
- if (tp == NULL) {
- sx_xunlock(&tun_ioctl_sx);
- return;
- }
error = tun_busy(tp);
- sx_xunlock(&tun_ioctl_sx);
if (error != 0)
return;
if (tp->tun_alias != NULL) {
@@ -1324,12 +1312,7 @@
bool l2tun;
ifmr = NULL;
- sx_xlock(&tun_ioctl_sx);
tp = ifp->if_softc;
- if (tp == NULL) {
- error = ENXIO;
- goto bad;
- }
l2tun = (tp->tun_flags & TUN_L2) != 0;
switch(cmd) {
case SIOCGIFSTATUS:
@@ -1391,8 +1374,6 @@
error = EINVAL;
}
}
-bad:
- sx_xunlock(&tun_ioctl_sx);
return (error);
}
Index: sys/net/if_var.h
===================================================================
--- sys/net/if_var.h
+++ sys/net/if_var.h
@@ -423,6 +423,14 @@
struct debugnet_methods *if_debugnet_methods;
struct epoch_context if_epoch_ctx;
+ /*
+ * Bits for serializing management operations (ioctl/destroy/vmove)
+ * and ensuring order.
+ */
+ struct mtx if_mgmt_lock;
+ struct cv if_mgmt_cv;
+ u_int if_mgmt_refcount; /* count of threads in mgmt functions */
+
/*
* Spare fields to be added before branching a stable branch, so
* that structure can be enhanced without changing the kernel
@@ -450,6 +458,17 @@
#define NET_EPOCH_WAIT() epoch_wait_preempt(net_epoch_preempt)
#define NET_EPOCH_ASSERT() MPASS(in_epoch(net_epoch_preempt))
+#define IF_MGMT_CV_INIT(if) cv_init(&(if)->if_mgmt_cv, "if_mgmt_cv")
+#define IF_MGMT_CV_DESTROY(if) cv_destroy(&(if)->if_mgmt_cv)
+#define IF_MGMT_CV_BROADCAST(if) cv_broadcast(&(if)->if_mgmt_cv)
+#define IF_MGMT_CV_WAIT(if) cv_wait(&(if)->if_mgmt_cv, &(if)->if_mgmt_lock)
+
+#define IF_MGMT_LOCK_INIT(if) mtx_init(&(if)->if_mgmt_lock, "if_mgmt_lock", NULL, MTX_DEF)
+#define IF_MGMT_LOCK_DESTROY(if) mtx_destroy(&(if)->if_mgmt_lock)
+#define IF_MGMT_LOCK(if) mtx_lock(&(if)->if_mgmt_lock)
+#define IF_MGMT_UNLOCK(if) mtx_unlock(&(if)->if_mgmt_lock)
+#define IF_MGMT_LOCK_ASSERT(if) mtx_assert(&(if)->if_mgmt_lock, MA_OWNED)
+
#ifdef _KERNEL
/* interface link layer address change event */
typedef void (*iflladdr_event_handler_t)(void *, struct ifnet *);

File Metadata

Mime Type
text/plain
Expires
Wed, May 20, 5:32 PM (16 h, 26 m)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
33352572
Default Alt Text
D22691.id65512.diff (11 KB)

Event Timeline