Index: sys/fs/nullfs/null.h
===================================================================
--- sys/fs/nullfs/null.h
+++ sys/fs/nullfs/null.h
@@ -45,6 +45,8 @@
 	struct mount	*nullm_vfs;
 	struct vnode	*nullm_lowerrootvp;	/* Ref to lower root vnode */
 	uint64_t	nullm_flags;
+	struct mount_upper_node upper_node;
+	struct mount_upper_node notify_node;
 };
 
 #ifdef _KERNEL
Index: sys/fs/nullfs/null_vfsops.c
===================================================================
--- sys/fs/nullfs/null_vfsops.c
+++ sys/fs/nullfs/null_vfsops.c
@@ -163,7 +163,8 @@
 	 * Save pointer to underlying FS and the reference to the
 	 * lower root vnode.
 	 */
-	xmp->nullm_vfs = vfs_pin_from_vp(lowerrootvp);
+	xmp->nullm_vfs = vfs_register_upper_from_vp(lowerrootvp, mp,
+	    &xmp->upper_node);
 	if (xmp->nullm_vfs == NULL) {
 		vput(lowerrootvp);
 		free(xmp, M_NULLFSMNT);
@@ -178,7 +179,7 @@
 	 */
 	error = null_nodeget(mp, lowerrootvp, &nullm_rootvp);
 	if (error != 0) {
-		vfs_unpin(xmp->nullm_vfs);
+		vfs_unregister_upper(xmp->nullm_vfs, &xmp->upper_node);
 		vrele(lowerrootvp);
 		free(xmp, M_NULLFSMNT);
 		return (error);
@@ -195,6 +196,11 @@
 	    (xmp->nullm_vfs->mnt_kern_flag & MNTK_NULL_NOCACHE) != 0)
 		xmp->nullm_flags &= ~NULLM_CACHE;
 
+	if ((xmp->nullm_flags & NULLM_CACHE) != 0) {
+		vfs_register_for_notification(xmp->nullm_vfs, mp,
+		    &xmp->notify_node);
+	}
+
 	MNT_ILOCK(mp);
 	if ((xmp->nullm_flags & NULLM_CACHE) != 0) {
 		mp->mnt_kern_flag |= lowerrootvp->v_mount->mnt_kern_flag &
@@ -206,13 +212,6 @@
 	    (MNTK_USES_BCACHE | MNTK_NO_IOPF | MNTK_UNMAPPED_BUFS);
 	MNT_IUNLOCK(mp);
 	vfs_getnewfsid(mp);
-	if ((xmp->nullm_flags & NULLM_CACHE) != 0) {
-		MNT_ILOCK(xmp->nullm_vfs);
-		TAILQ_INSERT_TAIL(&xmp->nullm_vfs->mnt_uppers, mp,
-		    mnt_upper_link);
-		MNT_IUNLOCK(xmp->nullm_vfs);
-	}
-
 	vfs_mountedfrom(mp, target);
 	vput(nullm_rootvp);
 
@@ -230,7 +229,6 @@
 	int mntflags;
 {
 	struct null_mount *mntdata;
-	struct mount *ump;
 	int error, flags;
 
 	NULLFSDEBUG("nullfs_unmount: mp = %p\n", (void *)mp);
@@ -259,17 +257,11 @@
 	 * Finally, throw away the null_mount structure
 	 */
 	mntdata = mp->mnt_data;
-	ump = mntdata->nullm_vfs;
 	if ((mntdata->nullm_flags & NULLM_CACHE) != 0) {
-		MNT_ILOCK(ump);
-		while ((ump->mnt_kern_flag & MNTK_VGONE_UPPER) != 0) {
-			ump->mnt_kern_flag |= MNTK_VGONE_WAITER;
-			msleep(&ump->mnt_uppers, &ump->mnt_mtx, 0, "vgnupw", 0);
-		}
-		TAILQ_REMOVE(&ump->mnt_uppers, mp, mnt_upper_link);
-		MNT_IUNLOCK(ump);
+		vfs_unregister_for_notification(mntdata->nullm_vfs,
+		    &mntdata->notify_node);
 	}
-	vfs_unpin(ump);
+	vfs_unregister_upper(mntdata->nullm_vfs, &mntdata->upper_node);
 	vrele(mntdata->nullm_lowerrootvp);
 	mp->mnt_data = NULL;
 	free(mntdata, M_NULLFSMNT);
Index: sys/fs/unionfs/union.h
===================================================================
--- sys/fs/unionfs/union.h
+++ sys/fs/unionfs/union.h
@@ -57,6 +57,8 @@
 	struct vnode   *um_lowervp;	/* VREFed once */
 	struct vnode   *um_uppervp;	/* VREFed once */
 	struct vnode   *um_rootvp;	/* ROOT vnode */
+	struct mount_upper_node	um_lower_link;	/* node in lower FS list of uppers */
+	struct mount_upper_node	um_upper_link;	/* node in upper FS list of uppers */
 	unionfs_copymode um_copymode;
 	unionfs_whitemode um_whitemode;
 	uid_t		um_uid;
Index: sys/fs/unionfs/union_vfsops.c
===================================================================
--- sys/fs/unionfs/union_vfsops.c
+++ sys/fs/unionfs/union_vfsops.c
@@ -292,14 +292,16 @@
 		return (error);
 	}
 
-	lowermp = vfs_pin_from_vp(ump->um_lowervp);
-	uppermp = vfs_pin_from_vp(ump->um_uppervp);
+	lowermp = vfs_register_upper_from_vp(ump->um_lowervp, mp,
+	    &ump->um_lower_link);
+	uppermp = vfs_register_upper_from_vp(ump->um_uppervp, mp,
+	    &ump->um_upper_link);
 
 	if (lowermp == NULL || uppermp == NULL) {
 		if (lowermp != NULL)
-			vfs_unpin(lowermp);
+			vfs_unregister_upper(lowermp, &ump->um_lower_link);
 		if (uppermp != NULL)
-			vfs_unpin(uppermp);
+			vfs_unregister_upper(uppermp, &ump->um_upper_link);
 		free(ump, M_UNIONFSMNT);
 		mp->mnt_data = NULL;
 		return (ENOENT);
@@ -357,8 +359,8 @@
 	if (error)
 		return (error);
 
-	vfs_unpin(ump->um_lowervp->v_mount);
-	vfs_unpin(ump->um_uppervp->v_mount);
+	vfs_unregister_upper(ump->um_lowervp->v_mount, &ump->um_lower_link);
+	vfs_unregister_upper(ump->um_uppervp->v_mount, &ump->um_upper_link);
 	free(ump, M_UNIONFSMNT);
 	mp->mnt_data = NULL;
 
Index: sys/kern/vfs_mount.c
===================================================================
--- sys/kern/vfs_mount.c
+++ sys/kern/vfs_mount.c
@@ -65,6 +65,7 @@
 #include <sys/sysctl.h>
 #include <sys/sysent.h>
 #include <sys/systm.h>
+#include <sys/taskqueue.h>
 #include <sys/vnode.h>
 #include <vm/uma.h>
 
@@ -89,6 +90,11 @@
 SYSCTL_BOOL(_vfs, OID_AUTO, default_autoro, CTLFLAG_RW, &default_autoro, 0,
     "Retry failed r/w mount as r/o if no explicit ro/rw option is specified");
 
+static bool	recursive_forced_unmount = false;
+SYSCTL_BOOL(_vfs, OID_AUTO, recursive_forced_unmount, CTLFLAG_RW,
+    &recursive_forced_unmount, 0, "Recursively unmount stacked upper mounts"
+    " when a file system is forcibly unmounted");
+
 MALLOC_DEFINE(M_MOUNT, "mount", "vfs mount structure");
 MALLOC_DEFINE(M_STATFS, "statfs", "statfs structure");
 static uma_zone_t mount_zone;
@@ -103,6 +109,16 @@
 EVENTHANDLER_LIST_DEFINE(vfs_mounted);
 EVENTHANDLER_LIST_DEFINE(vfs_unmounted);
 
+static void vfs_deferred_unmount(void *arg, int pending);
+static struct task deferred_unmount_task =
+    TASK_INITIALIZER(0, vfs_deferred_unmount, NULL);;
+static struct mtx deferred_unmount_lock;
+MTX_SYSINIT(deferred_unmount, &deferred_unmount_lock, "deferred_unmount",
+    MTX_DEF);
+static STAILQ_HEAD(, mount) deferred_unmount_list =
+    STAILQ_HEAD_INITIALIZER(deferred_unmount_list);
+TASKQUEUE_DEFINE_THREAD(deferred_unmount);
+
 static void mount_devctl_event(const char *type, struct mount *mp, bool donew);
 
 /*
@@ -505,8 +521,21 @@
 	MNT_IUNLOCK(mp);
 }
 
+/*
+ * Register ump as an upper mount of the mount associated with
+ * vnode vp.  This registration will be tracked through
+ * mount_upper_node upper, which should be allocated by the
+ * caller and stored in per-mount data associated with mp.
+ *
+ * If successful, this function will return the mount associated
+ * with vp, and will ensure that it cannot be unmounted until
+ * ump has been unregistered as one of its upper mounts.
+ * 
+ * Upon failure this function will return NULL.
+ */
 struct mount *
-vfs_pin_from_vp(struct vnode *vp)
+vfs_register_upper_from_vp(struct vnode *vp, struct mount *ump,
+    struct mount_upper_node *upper)
 {
 	struct mount *mp;
 
@@ -514,26 +543,81 @@
 	if (mp == NULL)
 		return (NULL);
 	MNT_ILOCK(mp);
-	if (mp != vp->v_mount || (mp->mnt_kern_flag & MNTK_UNMOUNT) != 0) {
+	if (mp != vp->v_mount ||
+	    ((mp->mnt_kern_flag & (MNTK_UNMOUNT | MNTK_RECURSE)) != 0)) {
 		MNT_IUNLOCK(mp);
 		return (NULL);
 	}
+	KASSERT(ump != mp, ("upper and lower mounts are identical"));
+	upper->mp = ump;
 	MNT_REF(mp);
-	KASSERT(mp->mnt_pinned_count < INT_MAX,
-	    ("mount pinned count overflow"));
-	++mp->mnt_pinned_count;
+	TAILQ_INSERT_TAIL(&mp->mnt_uppers, upper, mnt_upper_link);
 	MNT_IUNLOCK(mp);
 	return (mp);
 }
 
+/*
+ * Register upper mount ump to receive vnode unlink/reclaim
+ * notifications from lower mount mp. This registration will
+ * be tracked through mount_upper_node upper, which should be
+ * allocated by the caller and stored in per-mount data
+ * associated with mp.
+ *
+ * ump must already be registered as an upper mount of mp
+ * through a call to vfs_register_upper_from_vp().
+ */
 void
-vfs_unpin(struct mount *mp)
+vfs_register_for_notification(struct mount *mp, struct mount *ump,
+    struct mount_upper_node *upper)
+{
+	upper->mp = ump;
+	MNT_ILOCK(mp);
+	TAILQ_INSERT_TAIL(&mp->mnt_notify, upper, mnt_upper_link);
+	MNT_IUNLOCK(mp);
+}
+
+static void
+vfs_drain_upper_locked(struct mount *mp)
+{
+	mtx_assert(MNT_MTX(mp), MA_OWNED);
+	while (mp->mnt_upper_pending != 0) {
+		mp->mnt_kern_flag |= MNTK_UPPER_WAITER;
+		msleep(&mp->mnt_uppers, MNT_MTX(mp), 0, "mntupw", 0);
+	}
+}
+
+/*
+ * Undo a previous call to vfs_register_for_notification().
+ * The mount represented by upper must be currently registered
+ * as an upper mount for mp.
+ */
+void
+vfs_unregister_for_notification(struct mount *mp,
+    struct mount_upper_node *upper)
+{
+	MNT_ILOCK(mp);
+	vfs_drain_upper_locked(mp);
+	TAILQ_REMOVE(&mp->mnt_notify, upper, mnt_upper_link);
+	MNT_IUNLOCK(mp);
+}
+
+/*
+ * Undo a previous call to vfs_register_upper_from_vp().
+ * This must be done before mp can be unmounted.
+ */
+void
+vfs_unregister_upper(struct mount *mp, struct mount_upper_node *upper)
 {
 	MNT_ILOCK(mp);
-	KASSERT(mp->mnt_pinned_count > 0, ("mount pinned count underflow"));
 	KASSERT((mp->mnt_kern_flag & MNTK_UNMOUNT) == 0,
-	    ("mount pinned with pending unmount"));
-	--mp->mnt_pinned_count;
+	    ("registered upper with pending unmount"));
+	vfs_drain_upper_locked(mp);
+	TAILQ_REMOVE(&mp->mnt_uppers, upper, mnt_upper_link);
+	if ((mp->mnt_kern_flag & MNTK_TASKQUEUE_WAITER) != 0 &&
+	    TAILQ_EMPTY(&mp->mnt_uppers)) {
+		mp->mnt_kern_flag &= ~MNTK_TASKQUEUE_WAITER;
+		wakeup(&mp->taskqueue_link);
+	}
 	MNT_REL(mp);
 	MNT_IUNLOCK(mp);
 }
@@ -600,8 +684,10 @@
 	mac_mount_create(cred, mp);
 #endif
 	arc4rand(&mp->mnt_hashseed, sizeof mp->mnt_hashseed, 0);
+	mp->mnt_upper_pending = 0;
 	TAILQ_INIT(&mp->mnt_uppers);
-	mp->mnt_pinned_count = 0;
+	TAILQ_INIT(&mp->mnt_notify);
+	mp->taskqueue_flags = 0;
 	return (mp);
 }
 
@@ -640,9 +726,9 @@
 			vn_printf(vp, "dangling vnode ");
 		panic("unmount: dangling vnode");
 	}
-	KASSERT(mp->mnt_pinned_count == 0,
-	   ("mnt_pinned_count = %d", mp->mnt_pinned_count));
+	KASSERT(mp->mnt_upper_pending == 0, ("mnt_upper_pending"));
 	KASSERT(TAILQ_EMPTY(&mp->mnt_uppers), ("mnt_uppers"));
+	KASSERT(TAILQ_EMPTY(&mp->mnt_notify), ("mnt_notify"));
 	if (mp->mnt_nvnodelistsize != 0)
 		panic("vfs_mount_destroy: nonzero nvnodelistsize");
 	if (mp->mnt_lazyvnodelistsize != 0)
@@ -1799,17 +1885,165 @@
 	return (sum);
 }
 
+static bool
+deferred_unmount_enqueue(struct mount *mp, uint64_t flags, bool requeue)
+{
+	bool enqueued;
+
+	enqueued = false;
+	mtx_lock(&deferred_unmount_lock);
+	if ((mp->taskqueue_flags & MNT_TASKQUEUE) == 0 || requeue) {
+		mp->taskqueue_flags = flags | MNT_TASKQUEUE;
+		STAILQ_INSERT_TAIL(&deferred_unmount_list, mp, taskqueue_link);
+		enqueued = true;
+	}
+	mtx_unlock(&deferred_unmount_lock);
+
+	if (enqueued) {
+		taskqueue_enqueue(taskqueue_deferred_unmount,
+		    &deferred_unmount_task);
+	}
+
+	return (enqueued);
+}
+
+/*
+ * Taskqueue handler for processing async/recursive unmounts
+ */
+static void
+vfs_deferred_unmount(void *argi __unused, int pending __unused)
+{
+	STAILQ_HEAD(, mount) local_unmounts;
+	uint64_t flags;
+	struct mount *mp, *tmp;
+	bool unmounted;
+
+	STAILQ_INIT(&local_unmounts);
+	mtx_lock(&deferred_unmount_lock);
+	STAILQ_CONCAT(&local_unmounts, &deferred_unmount_list); 
+	mtx_unlock(&deferred_unmount_lock);
+
+	STAILQ_FOREACH_SAFE(mp, &local_unmounts, taskqueue_link, tmp) {
+		flags = mp->taskqueue_flags;
+		KASSERT((flags & MNT_TASKQUEUE) != 0,
+		    ("taskqueue unmount without MNT_TASKQUEUE"));
+		if (dounmount(mp, flags, curthread) != 0) {
+			MNT_ILOCK(mp);
+			unmounted = ((mp->mnt_kern_flag & MNTK_REFEXPIRE) != 0);
+			MNT_IUNLOCK(mp);
+			if (!unmounted)
+				deferred_unmount_enqueue(mp, flags, true);
+			else
+				vfs_rel(mp);
+		}
+	}
+}
+
 /*
  * Do the actual filesystem unmount.
  */
 int
-dounmount(struct mount *mp, int flags, struct thread *td)
+dounmount(struct mount *mp, uint64_t flags, struct thread *td)
 {
+	struct mount_upper_node *upper;
 	struct vnode *coveredvp, *rootvp;
 	int error;
 	uint64_t async_flag;
 	int mnt_gen_r;
 
+	KASSERT((flags & MNT_TASKQUEUE) == 0 ||
+	    (flags & (MNT_RECURSE | MNT_FORCE)) == (MNT_RECURSE | MNT_FORCE),
+	    ("MNT_TASKQUEUE requires MNT_RECURSE | MNT_FORCE"));
+
+	/*
+	 * If the caller has explicitly requested the unmount to be handled by
+	 * the taskqueue and we're not already in taskqueue context, queue
+	 * up the unmount request and exit.  This is done prior to any
+	 * credential checks; MNT_TASKQUEUE should be used only for kernel-
+	 * initiated unmounts and will therefore be processed with the
+	 * (kernel) credentials of the taskqueue thread.  Still, callers
+	 * should be sure this is the behavior they want.
+	 */
+	if ((flags & MNT_TASKQUEUE) != 0 &&
+	    taskqueue_member(taskqueue_deferred_unmount, curthread) == 0) {
+		if (!deferred_unmount_enqueue(mp, flags, false))
+			vfs_rel(mp);
+		return (EINPROGRESS);
+	}
+
+	/*
+	 * Only privileged root, or (if MNT_USER is set) the user that did the
+	 * original mount is permitted to unmount this filesystem.
+	 * This check should be made prior to queueing up any recursive
+	 * unmounts of upper filesystems.  Those unmounts will be executed
+	 * with kernel thread credentials and are expected to succeed, so
+	 * we must at least ensure the originating context has sufficient
+	 * privilege to unmount the base filesystem before proceeding with
+	 * the uppers.
+	 */
+	error = vfs_suser(mp, td);
+	if (error != 0) {
+		KASSERT((flags & MNT_TASKQUEUE) == 0,
+		    ("taskqueue unmount with insufficient privilege"));
+		vfs_rel(mp);
+		return (error);
+	}
+
+	if (recursive_forced_unmount && ((flags & MNT_FORCE) != 0))
+		flags |= MNT_RECURSE;
+
+	if ((flags & MNT_RECURSE) != 0) {
+		KASSERT((flags & MNT_FORCE) != 0,
+		    ("MNT_RECURSE requires MNT_FORCE"));
+
+		MNT_ILOCK(mp);
+		/*
+		 * Set MNTK_RECURSE to prevent new upper mounts from being
+		 * added, and note that an operation on the uppers list is in
+		 * progress.  This will ensure that unregistration from the
+		 * uppers list, and therefore any pending unmount of the upper
+		 * FS, can't complete until after we finish walking the list.
+		 */
+		mp->mnt_kern_flag |= MNTK_RECURSE;
+		mp->mnt_upper_pending++;
+		TAILQ_FOREACH(upper, &mp->mnt_uppers, mnt_upper_link) {
+			MNT_IUNLOCK(mp);
+			vfs_ref(upper->mp);
+			if (!deferred_unmount_enqueue(upper->mp, flags, false))
+				vfs_rel(upper->mp);
+			MNT_ILOCK(mp);
+		}
+		mp->mnt_upper_pending--;
+		if ((mp->mnt_kern_flag & MNTK_UPPER_WAITER) != 0 &&
+		    mp->mnt_upper_pending == 0) {
+			mp->mnt_kern_flag &= ~MNTK_UPPER_WAITER;
+			wakeup(&mp->mnt_uppers);
+		}
+		/*
+		 * If we're not on the taskqueue, wait until the uppers list
+		 * is drained before proceeding with unmount.  Otherwise, if
+		 * we are on the taskqueue and there are still pending uppers,
+		 * just re-enqueue on the end of the taskqueue.
+		 */
+		if ((flags & MNT_TASKQUEUE) == 0) {
+			while (!TAILQ_EMPTY(&mp->mnt_uppers)) {
+				mp->mnt_kern_flag |= MNTK_TASKQUEUE_WAITER;
+				msleep(&mp->taskqueue_link, MNT_MTX(mp), 0,
+				    "umntqw", 0);
+			}
+		} else if (!TAILQ_EMPTY(&mp->mnt_uppers)) {
+			MNT_IUNLOCK(mp);
+			deferred_unmount_enqueue(mp, flags, true);
+			return (0);
+		}
+		MNT_IUNLOCK(mp);
+		KASSERT(TAILQ_EMPTY(&mp->mnt_uppers), ("mnt_uppers not empty"));
+	}
+
+	/* Allow the taskqueue to safely re-enqueue on failure */
+	if ((flags & MNT_TASKQUEUE) != 0)
+		vfs_ref(mp);
+
 	if ((coveredvp = mp->mnt_vnodecovered) != NULL) {
 		mnt_gen_r = mp->mnt_gen;
 		VI_LOCK(coveredvp);
@@ -1828,27 +2062,13 @@
 		}
 	}
 
-	/*
-	 * Only privileged root, or (if MNT_USER is set) the user that did the
-	 * original mount is permitted to unmount this filesystem.
-	 */
-	error = vfs_suser(mp, td);
-	if (error != 0) {
-		if (coveredvp != NULL) {
-			VOP_UNLOCK(coveredvp);
-			vdrop(coveredvp);
-		}
-		vfs_rel(mp);
-		return (error);
-	}
-
 	vfs_op_enter(mp);
 
 	vn_start_write(NULL, &mp, V_WAIT | V_MNTREF);
 	MNT_ILOCK(mp);
 	if ((mp->mnt_kern_flag & MNTK_UNMOUNT) != 0 ||
 	    (mp->mnt_flag & MNT_UPDATE) != 0 ||
-	    mp->mnt_pinned_count != 0) {
+	    !TAILQ_EMPTY(&mp->mnt_uppers)) {
 		dounmount_cleanup(mp, coveredvp, 0);
 		return (EBUSY);
 	}
@@ -1952,6 +2172,7 @@
 		}
 		return (error);
 	}
+
 	mtx_lock(&mountlist_mtx);
 	TAILQ_REMOVE(&mountlist, mp, mnt_list);
 	mtx_unlock(&mountlist_mtx);
@@ -1977,6 +2198,8 @@
 	}
 	if (mp == rootdevmp)
 		rootdevmp = NULL;
+	if ((flags & MNT_TASKQUEUE) != 0)
+		vfs_rel(mp);
 	vfs_mount_destroy(mp);
 	return (0);
 }
Index: sys/kern/vfs_subr.c
===================================================================
--- sys/kern/vfs_subr.c
+++ sys/kern/vfs_subr.c
@@ -831,9 +831,9 @@
 	 * valid.
 	 */
 	while (mp->mnt_kern_flag & MNTK_UNMOUNT) {
-		KASSERT(mp->mnt_pinned_count == 0,
-		    ("%s: non-zero pinned count %d with pending unmount",
-		    __func__, mp->mnt_pinned_count));
+		KASSERT(TAILQ_EMPTY(&mp->mnt_uppers),
+		    ("%s: non-empty upper mount list with pending unmount",
+		    __func__));
 		if (flags & MBF_NOWAIT || mp->mnt_kern_flag & MNTK_REFEXPIRE) {
 			MNT_REL(mp);
 			MNT_IUNLOCK(mp);
@@ -3891,61 +3891,44 @@
 	VI_UNLOCK(vp);
 }
 
-static void
-notify_lowervp_vfs_dummy(struct mount *mp __unused,
-    struct vnode *lowervp __unused)
-{
-}
-
 /*
  * Notify upper mounts about reclaimed or unlinked vnode.
  */
 void
 vfs_notify_upper(struct vnode *vp, int event)
 {
-	static struct vfsops vgonel_vfsops = {
-		.vfs_reclaim_lowervp = notify_lowervp_vfs_dummy,
-		.vfs_unlink_lowervp = notify_lowervp_vfs_dummy,
-	};
-	struct mount *mp, *ump, *mmp;
+	struct mount *mp;
+	struct mount_upper_node *ump;
 
-	mp = vp->v_mount;
+	mp = atomic_load_ptr(&vp->v_mount);
 	if (mp == NULL)
 		return;
-	if (TAILQ_EMPTY(&mp->mnt_uppers))
+	if (TAILQ_EMPTY(&mp->mnt_notify))
 		return;
 
-	mmp = malloc(sizeof(struct mount), M_TEMP, M_WAITOK | M_ZERO);
-	mmp->mnt_op = &vgonel_vfsops;
-	mmp->mnt_kern_flag |= MNTK_MARKER;
 	MNT_ILOCK(mp);
-	mp->mnt_kern_flag |= MNTK_VGONE_UPPER;
-	for (ump = TAILQ_FIRST(&mp->mnt_uppers); ump != NULL;) {
-		if ((ump->mnt_kern_flag & MNTK_MARKER) != 0) {
-			ump = TAILQ_NEXT(ump, mnt_upper_link);
-			continue;
-		}
-		TAILQ_INSERT_AFTER(&mp->mnt_uppers, ump, mmp, mnt_upper_link);
+	mp->mnt_upper_pending++;
+	KASSERT(mp->mnt_upper_pending > 0,
+	    ("%s: mnt_upper_pending %d", __func__, mp->mnt_upper_pending));
+	TAILQ_FOREACH(ump, &mp->mnt_notify, mnt_upper_link) {
 		MNT_IUNLOCK(mp);
 		switch (event) {
 		case VFS_NOTIFY_UPPER_RECLAIM:
-			VFS_RECLAIM_LOWERVP(ump, vp);
+			VFS_RECLAIM_LOWERVP(ump->mp, vp);
 			break;
 		case VFS_NOTIFY_UPPER_UNLINK:
-			VFS_UNLINK_LOWERVP(ump, vp);
+			VFS_UNLINK_LOWERVP(ump->mp, vp);
 			break;
 		default:
 			KASSERT(0, ("invalid event %d", event));
 			break;
 		}
 		MNT_ILOCK(mp);
-		ump = TAILQ_NEXT(mmp, mnt_upper_link);
-		TAILQ_REMOVE(&mp->mnt_uppers, mmp, mnt_upper_link);
 	}
-	free(mmp, M_TEMP);
-	mp->mnt_kern_flag &= ~MNTK_VGONE_UPPER;
-	if ((mp->mnt_kern_flag & MNTK_VGONE_WAITER) != 0) {
-		mp->mnt_kern_flag &= ~MNTK_VGONE_WAITER;
+	mp->mnt_upper_pending--;
+	if ((mp->mnt_kern_flag & MNTK_UPPER_WAITER) != 0 &&
+	    mp->mnt_upper_pending == 0) {
+		mp->mnt_kern_flag &= ~MNTK_UPPER_WAITER;
 		wakeup(&mp->mnt_uppers);
 	}
 	MNT_IUNLOCK(mp);
@@ -4376,12 +4359,12 @@
 	MNT_KERN_FLAG(MNTK_EXTENDED_SHARED);
 	MNT_KERN_FLAG(MNTK_SHARED_WRITES);
 	MNT_KERN_FLAG(MNTK_NO_IOPF);
-	MNT_KERN_FLAG(MNTK_VGONE_UPPER);
-	MNT_KERN_FLAG(MNTK_VGONE_WAITER);
+	MNT_KERN_FLAG(MNTK_RECURSE);
+	MNT_KERN_FLAG(MNTK_UPPER_WAITER);
 	MNT_KERN_FLAG(MNTK_LOOKUP_EXCL_DOTDOT);
-	MNT_KERN_FLAG(MNTK_MARKER);
 	MNT_KERN_FLAG(MNTK_USES_BCACHE);
 	MNT_KERN_FLAG(MNTK_FPLOOKUP);
+	MNT_KERN_FLAG(MNTK_TASKQUEUE_WAITER);
 	MNT_KERN_FLAG(MNTK_NOASYNC);
 	MNT_KERN_FLAG(MNTK_UNMOUNT);
 	MNT_KERN_FLAG(MNTK_MWAIT);
Index: sys/sys/mount.h
===================================================================
--- sys/sys/mount.h
+++ sys/sys/mount.h
@@ -190,6 +190,19 @@
 _Static_assert(sizeof(struct mount_pcpu) == 16,
     "the struct is allocated from pcpu 16 zone");
 
+/*
+ * Structure for tracking a stacked filesystem mounted above another
+ * filesystem.  This is expected to be stored in the upper FS' per-mount data.
+ *
+ * Lock reference:
+ *	i - lower mount interlock
+ *	c - constant from node initialization
+ */
+struct mount_upper_node {
+	struct mount 	*mp;	/* (c) mount object for upper FS */
+	TAILQ_ENTRY(mount_upper_node) mnt_upper_link;	/* (i) position in uppers list */
+};
+
 /*
  * Structure per mounted filesystem.  Each mounted filesystem has an
  * array of operations and an instance record.  The filesystems are
@@ -199,8 +212,8 @@
  * 	l - mnt_listmtx
  *	m - mountlist_mtx
  *	i - interlock
- *	i* - interlock of uppers' list head
  *	v - vnode freelist mutex
+ *	d - deferred unmount list mutex
  *
  * Unmarked fields are considered stable as long as a ref is held.
  *
@@ -242,10 +255,12 @@
 	struct mtx	mnt_listmtx;
 	struct vnodelst	mnt_lazyvnodelist;	/* (l) list of lazy vnodes */
 	int		mnt_lazyvnodelistsize;	/* (l) # of lazy vnodes */
-	int		mnt_pinned_count;	/* (i) unmount prevented */
+	int		mnt_upper_pending;	/* (i) # of pending ops on mnt_uppers */
 	struct lock	mnt_explock;		/* vfs_export walkers lock */
-	TAILQ_ENTRY(mount) mnt_upper_link;	/* (i*) we in the all uppers */
-	TAILQ_HEAD(, mount) mnt_uppers;		/* (i) upper mounts over us */
+	TAILQ_HEAD(, mount_upper_node) mnt_uppers; /* (i) upper mounts over us */
+	TAILQ_HEAD(, mount_upper_node) mnt_notify; /* (i) upper mounts for notification */
+	STAILQ_ENTRY(mount) taskqueue_link;	/* (d) our place in deferred unmount list */
+	uint64_t	taskqueue_flags;	/* (d) unmount flags passed from taskqueue */
 };
 #endif	/* _WANT_MOUNT || _KERNEL */
 
@@ -438,9 +453,13 @@
 #define	MNT_BYFSID	0x0000000008000000ULL /* specify filesystem by ID. */
 #define	MNT_NOCOVER	0x0000001000000000ULL /* Do not cover a mount point */
 #define	MNT_EMPTYDIR	0x0000002000000000ULL /* Only mount on empty dir */
-#define MNT_CMDFLAGS   (MNT_UPDATE	| MNT_DELEXPORT	| MNT_RELOAD	| \
+#define	MNT_RECURSE	0x0000100000000000ULL /* recursively unmount uppers */
+#define	MNT_TASKQUEUE   0x0000200000000000ULL /* unmount in taskqueue context */
+#define	MNT_CMDFLAGS   (MNT_UPDATE	| MNT_DELEXPORT	| MNT_RELOAD	| \
 			MNT_FORCE	| MNT_SNAPSHOT	| MNT_NONBUSY	| \
-			MNT_BYFSID	| MNT_NOCOVER	| MNT_EMPTYDIR)
+			MNT_BYFSID	| MNT_NOCOVER	| MNT_EMPTYDIR	| \
+			MNT_RECURSE	| MNT_TASKQUEUE)
+
 /*
  * Internal filesystem control flags stored in mnt_kern_flag.
  *
@@ -466,10 +485,9 @@
 #define	MNTK_NO_IOPF	0x00000100	/* Disallow page faults during reads
 					   and writes. Filesystem shall properly
 					   handle i/o state on EFAULT. */
-#define	MNTK_VGONE_UPPER	0x00000200
-#define	MNTK_VGONE_WAITER	0x00000400
+#define	MNTK_RECURSE		0x00000200 /* pending recursive unmount */
+#define	MNTK_UPPER_WAITER	0x00000400 /* waiting to drain MNTK_UPPER_PENDING */
 #define	MNTK_LOOKUP_EXCL_DOTDOT	0x00000800
-#define	MNTK_MARKER		0x00001000
 #define	MNTK_UNMAPPED_BUFS	0x00002000
 #define	MNTK_USES_BCACHE	0x00004000 /* FS uses the buffer cache. */
 #define	MNTK_TEXT_REFS		0x00008000 /* Keep use ref for text */
@@ -477,8 +495,9 @@
 #define	MNTK_UNIONFS	0x00020000	/* A hack for F_ISUNIONSTACK */
 #define	MNTK_FPLOOKUP	0x00040000	/* fast path lookup is supported */
 #define	MNTK_SUSPEND_ALL	0x00080000 /* Suspended by all-fs suspension */
-#define MNTK_NOASYNC	0x00800000	/* disable async */
-#define MNTK_UNMOUNT	0x01000000	/* unmount in progress */
+#define	MNTK_TASKQUEUE_WAITER	0x00100000 /* Waiting on unmount taskqueue */
+#define	MNTK_NOASYNC	0x00800000	/* disable async */
+#define	MNTK_UNMOUNT	0x01000000	/* unmount in progress */
 #define	MNTK_MWAIT	0x02000000	/* waiting for unmount to finish */
 #define	MNTK_SUSPEND	0x08000000	/* request write suspension */
 #define	MNTK_SUSPEND2	0x04000000	/* block secondary writes */
@@ -952,7 +971,7 @@
  * exported vnode operations
  */
 
-int	dounmount(struct mount *, int, struct thread *);
+int	dounmount(struct mount *, uint64_t, struct thread *);
 
 int	kernel_mount(struct mntarg *ma, uint64_t flags);
 int	kernel_vmount(int flags, ...);
@@ -1012,8 +1031,13 @@
 int	vfs_suser(struct mount *, struct thread *);
 void	vfs_unbusy(struct mount *);
 void	vfs_unmountall(void);
-struct mount *vfs_pin_from_vp(struct vnode *);
-void	vfs_unpin(struct mount *);
+struct mount *vfs_register_upper_from_vp(struct vnode *,
+	    struct mount *ump, struct mount_upper_node *);
+void	vfs_register_for_notification(struct mount *, struct mount *,
+	    struct mount_upper_node *);
+void	vfs_unregister_for_notification(struct mount *,
+	    struct mount_upper_node *);
+void	vfs_unregister_upper(struct mount *, struct mount_upper_node *);
 extern	TAILQ_HEAD(mntlist, mount) mountlist;	/* mounted filesystem list */
 extern	struct mtx_padalign mountlist_mtx;
 extern	struct nfs_public nfs_pub;
Index: sys/ufs/ffs/ffs_vfsops.c
===================================================================
--- sys/ufs/ffs/ffs_vfsops.c
+++ sys/ufs/ffs/ffs_vfsops.c
@@ -281,28 +281,8 @@
 }
 
 /*
- * Initiate a forcible unmount.
+ * On first ENXIO error, initiate an asynchronous forcible unmount.
  * Used to unmount filesystems whose underlying media has gone away.
- */
-static void
-ffs_fsfail_unmount(void *v, int pending)
-{
-	struct fsfail_task *etp;
-	struct mount *mp;
-
-	etp = v;
-
-	/*
-	 * Find our mount and get a ref on it, then try to unmount.
-	 */
-	mp = vfs_getvfs(&etp->fsid);
-	if (mp != NULL)
-		dounmount(mp, MNT_FORCE, curthread);
-	free(etp, M_UFSMNT);
-}
-
-/*
- * On first ENXIO error, start a task that forcibly unmounts the filesystem.
  *
  * Return true if a cleanup is in progress.
  */
@@ -320,25 +300,18 @@
 int
 ffs_fsfail_cleanup_locked(struct ufsmount *ump, int error)
 {
-	struct fsfail_task *etp;
-	struct task *tp;
-
 	mtx_assert(UFS_MTX(ump), MA_OWNED);
 	if (error == ENXIO && (ump->um_flags & UM_FSFAIL_CLEANUP) == 0) {
 		ump->um_flags |= UM_FSFAIL_CLEANUP;
 		/*
 		 * Queue an async forced unmount.
 		 */
-		etp = ump->um_fsfail_task;
-		ump->um_fsfail_task = NULL;
-		if (etp != NULL) {
-			tp = &etp->task;
-			TASK_INIT(tp, 0, ffs_fsfail_unmount, etp);
-			taskqueue_enqueue(taskqueue_thread, tp);
-			printf("UFS: forcibly unmounting %s from %s\n",
-			    ump->um_mountp->mnt_stat.f_mntfromname,
-			    ump->um_mountp->mnt_stat.f_mntonname);
-		}
+		vfs_ref(ump->um_mountp);
+		dounmount(ump->um_mountp,
+		    MNT_FORCE | MNT_RECURSE | MNT_TASKQUEUE, curthread);
+		printf("UFS: forcibly unmounting %s from %s\n",
+		    ump->um_mountp->mnt_stat.f_mntfromname,
+		    ump->um_mountp->mnt_stat.f_mntonname);
 	}
 	return ((ump->um_flags & UM_FSFAIL_CLEANUP) != 0);
 }
@@ -1046,7 +1019,6 @@
 	struct g_consumer *cp;
 	struct mount *nmp;
 	struct vnode *devvp;
-	struct fsfail_task *etp;
 	int candelete, canspeedup;
 	off_t loc;
 
@@ -1334,9 +1306,6 @@
 	(void) ufs_extattr_autostart(mp, td);
 #endif /* !UFS_EXTATTR_AUTOSTART */
 #endif /* !UFS_EXTATTR */
-	etp = malloc(sizeof *ump->um_fsfail_task, M_UFSMNT, M_WAITOK | M_ZERO);
-	etp->fsid = mp->mnt_stat.f_fsid;
-	ump->um_fsfail_task = etp;
 	return (0);
 out:
 	if (fs != NULL) {
@@ -1583,8 +1552,6 @@
 	free(fs->fs_csp, M_UFSMNT);
 	free(fs->fs_si, M_UFSMNT);
 	free(fs, M_UFSMNT);
-	if (ump->um_fsfail_task != NULL)
-		free(ump->um_fsfail_task, M_UFSMNT);
 	free(ump, M_UFSMNT);
 	mp->mnt_data = NULL;
 	MNT_ILOCK(mp);
Index: sys/ufs/ufs/ufsmount.h
===================================================================
--- sys/ufs/ufs/ufsmount.h
+++ sys/ufs/ufs/ufsmount.h
@@ -67,10 +67,6 @@
 TAILQ_HEAD(inodedeplst, inodedep);
 LIST_HEAD(bmsafemaphd, bmsafemap);
 LIST_HEAD(trimlist_hashhead, ffs_blkfree_trim_params);
-struct fsfail_task {
-	struct task task;
-	fsid_t fsid;
-};
 
 #include <sys/_lock.h>
 #include <sys/_mutex.h>
@@ -123,7 +119,6 @@
 	struct	taskqueue *um_trim_tq;		/* (c) trim request queue */
 	struct	trimlist_hashhead *um_trimhash;	/* (i) trimlist hash table */
 	u_long	um_trimlisthashsize;		/* (i) trim hash table size-1 */
-	struct	fsfail_task *um_fsfail_task;	/* (i) task for fsfail cleanup*/
 						/* (c) - below function ptrs */
 	int	(*um_balloc)(struct vnode *, off_t, int, struct ucred *,
 		    int, struct buf **);
Index: tools/test/stress2/misc/gnop11.sh
===================================================================
--- /dev/null
+++ tools/test/stress2/misc/gnop11.sh
@@ -0,0 +1,85 @@
+#!/bin/sh
+
+#
+# SPDX-License-Identifier: BSD-2-Clause-FreeBSD
+#
+# Copyright (c) 2020 Kirk McKusick <mckusick@mckusick.com>
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+# 1. Redistributions of source code must retain the above copyright
+#    notice, this list of conditions and the following disclaimer.
+# 2. Redistributions in binary form must reproduce the above copyright
+#    notice, this list of conditions and the following disclaimer in the
+#    documentation and/or other materials provided with the distribution.
+#
+# THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+# ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+# ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+# OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+# HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+# LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+# OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+# SUCH DAMAGE.
+#
+
+# 'panic: Lock (lockmgr) ufs not locked @ kern/kern_lock.c:1271' seen:
+# https://people.freebsd.org/~pho/stress/log/gnop8.txt
+
+[ `id -u ` -ne 0 ] && echo "Must be root!" && exit 1
+. ../default.cfg
+
+fsck=/sbin/fsck_ffs
+exp=/sbin/fsck_ffs.exp	# Experimental version
+[ -f $exp ] && { echo "Using $exp"; fsck=$exp; }
+mdconfig -a -t swap -s 5g -u $mdstart || exit 1
+md=md$mdstart
+newfs -j /dev/$md || exit 1
+start=`date +%s`
+
+nullfs_mounts=15
+: ${nullfs_dstdir:=$mntpoint}
+
+while [ $((`date +%s` - start)) -lt 120 ]; do
+	gnop create /dev/$md || exit 1
+	mount /dev/$md.nop /mnt || exit 1
+
+	for i in `jot $nullfs_mounts`; do
+		[ ! -d ${nullfs_dstdir}$i ] && mkdir ${nullfs_dstdir}$i
+		[ ! -d ${nullfs_dstdir}$(($i + $nullfs_mounts)) ] &&
+		    mkdir ${nullfs_dstdir}$(($i + $nullfs_mounts))
+		mount | grep -q " ${nullfs_dstdir}$i " &&
+		    umount ${nullfs_dstdir}$i
+		mount | grep -q " ${nullfs_dstdir}$(($i + $nullfs_mounts)) " &&
+		umount ${nullfs_dstdir}$(($i + $nullfs_mounts))
+		mount_nullfs /mnt ${nullfs_dstdir}$i > \
+		    /dev/null 2>&1
+		mount_nullfs ${nullfs_dstdir}$i ${nullfs_dstdir}$(($i + $nullfs_mounts)) > \
+		    /dev/null 2>&1
+	done
+	# start your favorite I/O test here
+	cp -rp /[a-l]* /[n-z]* /mnt &
+
+	# after some number of seconds
+	sleep 1
+	gnop destroy -f /dev/$md.nop
+	kill $!
+
+	# wait until forcible unmount, may be up to about 30 seconds,
+	# but typically very quick if I/O is in progress
+	while (a=`mount | egrep /mnt`) do sleep 1; done
+
+	# first fsck will attempt journal recovery
+	$fsck -d -y /dev/$md
+
+	# second fsck will do traditional fsck to check for any errors
+	# from journal recovery
+	$fsck -d -y /dev/$md
+	wait
+done
+mdconfig -d -u ${md#md}
+exit 0