Page Menu
Home
FreeBSD
Search
Configure Global Search
Log In
Files
F106101605
D31016.id92718.diff
No One
Temporary
Actions
View File
Edit File
Delete File
View Transforms
Subscribe
Mute Notifications
Flag For Later
Award Token
Size
24 KB
Referenced Files
None
Subscribers
None
D31016.id92718.diff
View Options
diff --git a/sys/fs/nullfs/null.h b/sys/fs/nullfs/null.h
--- a/sys/fs/nullfs/null.h
+++ b/sys/fs/nullfs/null.h
@@ -45,6 +45,8 @@
struct mount *nullm_vfs;
struct vnode *nullm_lowerrootvp; /* Ref to lower root vnode */
uint64_t nullm_flags;
+ struct mount_upper_node upper_node;
+ struct mount_upper_node notify_node;
};
#ifdef _KERNEL
diff --git a/sys/fs/nullfs/null_vfsops.c b/sys/fs/nullfs/null_vfsops.c
--- a/sys/fs/nullfs/null_vfsops.c
+++ b/sys/fs/nullfs/null_vfsops.c
@@ -163,7 +163,8 @@
* Save pointer to underlying FS and the reference to the
* lower root vnode.
*/
- xmp->nullm_vfs = vfs_pin_from_vp(lowerrootvp);
+ xmp->nullm_vfs = vfs_register_upper_from_vp(lowerrootvp, mp,
+ &xmp->upper_node);
if (xmp->nullm_vfs == NULL) {
vput(lowerrootvp);
free(xmp, M_NULLFSMNT);
@@ -178,7 +179,7 @@
*/
error = null_nodeget(mp, lowerrootvp, &nullm_rootvp);
if (error != 0) {
- vfs_unpin(xmp->nullm_vfs);
+ vfs_unregister_upper(xmp->nullm_vfs, &xmp->upper_node);
vrele(lowerrootvp);
free(xmp, M_NULLFSMNT);
return (error);
@@ -195,6 +196,11 @@
(xmp->nullm_vfs->mnt_kern_flag & MNTK_NULL_NOCACHE) != 0)
xmp->nullm_flags &= ~NULLM_CACHE;
+ if ((xmp->nullm_flags & NULLM_CACHE) != 0) {
+ vfs_register_for_notification(xmp->nullm_vfs, mp,
+ &xmp->notify_node);
+ }
+
MNT_ILOCK(mp);
if ((xmp->nullm_flags & NULLM_CACHE) != 0) {
mp->mnt_kern_flag |= lowerrootvp->v_mount->mnt_kern_flag &
@@ -206,13 +212,6 @@
(MNTK_USES_BCACHE | MNTK_NO_IOPF | MNTK_UNMAPPED_BUFS);
MNT_IUNLOCK(mp);
vfs_getnewfsid(mp);
- if ((xmp->nullm_flags & NULLM_CACHE) != 0) {
- MNT_ILOCK(xmp->nullm_vfs);
- TAILQ_INSERT_TAIL(&xmp->nullm_vfs->mnt_uppers, mp,
- mnt_upper_link);
- MNT_IUNLOCK(xmp->nullm_vfs);
- }
-
vfs_mountedfrom(mp, target);
vput(nullm_rootvp);
@@ -230,7 +229,6 @@
int mntflags;
{
struct null_mount *mntdata;
- struct mount *ump;
int error, flags;
NULLFSDEBUG("nullfs_unmount: mp = %p\n", (void *)mp);
@@ -259,17 +257,11 @@
* Finally, throw away the null_mount structure
*/
mntdata = mp->mnt_data;
- ump = mntdata->nullm_vfs;
if ((mntdata->nullm_flags & NULLM_CACHE) != 0) {
- MNT_ILOCK(ump);
- while ((ump->mnt_kern_flag & MNTK_VGONE_UPPER) != 0) {
- ump->mnt_kern_flag |= MNTK_VGONE_WAITER;
- msleep(&ump->mnt_uppers, &ump->mnt_mtx, 0, "vgnupw", 0);
- }
- TAILQ_REMOVE(&ump->mnt_uppers, mp, mnt_upper_link);
- MNT_IUNLOCK(ump);
+ vfs_unregister_for_notification(mntdata->nullm_vfs,
+ &mntdata->notify_node);
}
- vfs_unpin(ump);
+ vfs_unregister_upper(mntdata->nullm_vfs, &mntdata->upper_node);
vrele(mntdata->nullm_lowerrootvp);
mp->mnt_data = NULL;
free(mntdata, M_NULLFSMNT);
diff --git a/sys/fs/unionfs/union.h b/sys/fs/unionfs/union.h
--- a/sys/fs/unionfs/union.h
+++ b/sys/fs/unionfs/union.h
@@ -57,6 +57,8 @@
struct vnode *um_lowervp; /* VREFed once */
struct vnode *um_uppervp; /* VREFed once */
struct vnode *um_rootvp; /* ROOT vnode */
+ struct mount_upper_node um_lower_link; /* node in lower FS list of uppers */
+ struct mount_upper_node um_upper_link; /* node in upper FS list of uppers */
unionfs_copymode um_copymode;
unionfs_whitemode um_whitemode;
uid_t um_uid;
diff --git a/sys/fs/unionfs/union_vfsops.c b/sys/fs/unionfs/union_vfsops.c
--- a/sys/fs/unionfs/union_vfsops.c
+++ b/sys/fs/unionfs/union_vfsops.c
@@ -292,14 +292,16 @@
return (error);
}
- lowermp = vfs_pin_from_vp(ump->um_lowervp);
- uppermp = vfs_pin_from_vp(ump->um_uppervp);
+ lowermp = vfs_register_upper_from_vp(ump->um_lowervp, mp,
+ &ump->um_lower_link);
+ uppermp = vfs_register_upper_from_vp(ump->um_uppervp, mp,
+ &ump->um_upper_link);
if (lowermp == NULL || uppermp == NULL) {
if (lowermp != NULL)
- vfs_unpin(lowermp);
+ vfs_unregister_upper(lowermp, &ump->um_lower_link);
if (uppermp != NULL)
- vfs_unpin(uppermp);
+ vfs_unregister_upper(uppermp, &ump->um_upper_link);
free(ump, M_UNIONFSMNT);
mp->mnt_data = NULL;
return (ENOENT);
@@ -357,8 +359,8 @@
if (error)
return (error);
- vfs_unpin(ump->um_lowervp->v_mount);
- vfs_unpin(ump->um_uppervp->v_mount);
+ vfs_unregister_upper(ump->um_lowervp->v_mount, &ump->um_lower_link);
+ vfs_unregister_upper(ump->um_uppervp->v_mount, &ump->um_upper_link);
free(ump, M_UNIONFSMNT);
mp->mnt_data = NULL;
diff --git a/sys/kern/vfs_mount.c b/sys/kern/vfs_mount.c
--- a/sys/kern/vfs_mount.c
+++ b/sys/kern/vfs_mount.c
@@ -65,6 +65,7 @@
#include <sys/sysctl.h>
#include <sys/sysent.h>
#include <sys/systm.h>
+#include <sys/taskqueue.h>
#include <sys/vnode.h>
#include <vm/uma.h>
@@ -89,6 +90,11 @@
SYSCTL_BOOL(_vfs, OID_AUTO, default_autoro, CTLFLAG_RW, &default_autoro, 0,
"Retry failed r/w mount as r/o if no explicit ro/rw option is specified");
+static bool recursive_forced_unmount = false;
+SYSCTL_BOOL(_vfs, OID_AUTO, recursive_forced_unmount, CTLFLAG_RW,
+ &recursive_forced_unmount, 0, "Recursively unmount stacked upper mounts"
+ " when a file system is forcibly unmounted");
+
MALLOC_DEFINE(M_MOUNT, "mount", "vfs mount structure");
MALLOC_DEFINE(M_STATFS, "statfs", "statfs structure");
static uma_zone_t mount_zone;
@@ -103,6 +109,16 @@
EVENTHANDLER_LIST_DEFINE(vfs_mounted);
EVENTHANDLER_LIST_DEFINE(vfs_unmounted);
+static void vfs_deferred_unmount(void *arg, int pending);
+static struct task deferred_unmount_task =
+ TASK_INITIALIZER(0, vfs_deferred_unmount, NULL);;
+static struct mtx deferred_unmount_lock;
+MTX_SYSINIT(deferred_unmount, &deferred_unmount_lock, "deferred_unmount",
+ MTX_DEF);
+static STAILQ_HEAD(, mount) deferred_unmount_list =
+ STAILQ_HEAD_INITIALIZER(deferred_unmount_list);
+TASKQUEUE_DEFINE_THREAD(deferred_unmount);
+
static void mount_devctl_event(const char *type, struct mount *mp, bool donew);
/*
@@ -505,8 +521,21 @@
MNT_IUNLOCK(mp);
}
+/*
+ * Register ump as an upper mount of the mount associated with
+ * vnode vp. This registration will be tracked through
+ * mount_upper_node upper, which should be allocated by the
+ * caller and stored in per-mount data associated with mp.
+ *
+ * If successful, this function will return the mount associated
+ * with vp, and will ensure that it cannot be unmounted until
+ * ump has been unregistered as one of its upper mounts.
+ *
+ * Upon failure this function will return NULL.
+ */
struct mount *
-vfs_pin_from_vp(struct vnode *vp)
+vfs_register_upper_from_vp(struct vnode *vp, struct mount *ump,
+ struct mount_upper_node *upper)
{
struct mount *mp;
@@ -514,26 +543,81 @@
if (mp == NULL)
return (NULL);
MNT_ILOCK(mp);
- if (mp != vp->v_mount || (mp->mnt_kern_flag & MNTK_UNMOUNT) != 0) {
+ if (mp != vp->v_mount ||
+ ((mp->mnt_kern_flag & (MNTK_UNMOUNT | MNTK_RECURSE)) != 0)) {
MNT_IUNLOCK(mp);
return (NULL);
}
+ KASSERT(ump != mp, ("upper and lower mounts are identical"));
+ upper->mp = ump;
MNT_REF(mp);
- KASSERT(mp->mnt_pinned_count < INT_MAX,
- ("mount pinned count overflow"));
- ++mp->mnt_pinned_count;
+ TAILQ_INSERT_TAIL(&mp->mnt_uppers, upper, mnt_upper_link);
MNT_IUNLOCK(mp);
return (mp);
}
+/*
+ * Register upper mount ump to receive vnode unlink/reclaim
+ * notifications from lower mount mp. This registration will
+ * be tracked through mount_upper_node upper, which should be
+ * allocated by the caller and stored in per-mount data
+ * associated with mp.
+ *
+ * ump must already be registered as an upper mount of mp
+ * through a call to vfs_register_upper_from_vp().
+ */
void
-vfs_unpin(struct mount *mp)
+vfs_register_for_notification(struct mount *mp, struct mount *ump,
+ struct mount_upper_node *upper)
+{
+ upper->mp = ump;
+ MNT_ILOCK(mp);
+ TAILQ_INSERT_TAIL(&mp->mnt_notify, upper, mnt_upper_link);
+ MNT_IUNLOCK(mp);
+}
+
+static void
+vfs_drain_upper_locked(struct mount *mp)
+{
+ mtx_assert(MNT_MTX(mp), MA_OWNED);
+ while (mp->mnt_upper_pending != 0) {
+ mp->mnt_kern_flag |= MNTK_UPPER_WAITER;
+ msleep(&mp->mnt_uppers, MNT_MTX(mp), 0, "mntupw", 0);
+ }
+}
+
+/*
+ * Undo a previous call to vfs_register_for_notification().
+ * The mount represented by upper must be currently registered
+ * as an upper mount for mp.
+ */
+void
+vfs_unregister_for_notification(struct mount *mp,
+ struct mount_upper_node *upper)
+{
+ MNT_ILOCK(mp);
+ vfs_drain_upper_locked(mp);
+ TAILQ_REMOVE(&mp->mnt_notify, upper, mnt_upper_link);
+ MNT_IUNLOCK(mp);
+}
+
+/*
+ * Undo a previous call to vfs_register_upper_from_vp().
+ * This must be done before mp can be unmounted.
+ */
+void
+vfs_unregister_upper(struct mount *mp, struct mount_upper_node *upper)
{
MNT_ILOCK(mp);
- KASSERT(mp->mnt_pinned_count > 0, ("mount pinned count underflow"));
KASSERT((mp->mnt_kern_flag & MNTK_UNMOUNT) == 0,
- ("mount pinned with pending unmount"));
- --mp->mnt_pinned_count;
+ ("registered upper with pending unmount"));
+ vfs_drain_upper_locked(mp);
+ TAILQ_REMOVE(&mp->mnt_uppers, upper, mnt_upper_link);
+ if ((mp->mnt_kern_flag & MNTK_TASKQUEUE_WAITER) != 0 &&
+ TAILQ_EMPTY(&mp->mnt_uppers)) {
+ mp->mnt_kern_flag &= ~MNTK_TASKQUEUE_WAITER;
+ wakeup(&mp->mnt_taskqueue_link);
+ }
MNT_REL(mp);
MNT_IUNLOCK(mp);
}
@@ -600,8 +684,10 @@
mac_mount_create(cred, mp);
#endif
arc4rand(&mp->mnt_hashseed, sizeof mp->mnt_hashseed, 0);
+ mp->mnt_upper_pending = 0;
TAILQ_INIT(&mp->mnt_uppers);
- mp->mnt_pinned_count = 0;
+ TAILQ_INIT(&mp->mnt_notify);
+ mp->mnt_taskqueue_flags = 0;
return (mp);
}
@@ -640,9 +726,9 @@
vn_printf(vp, "dangling vnode ");
panic("unmount: dangling vnode");
}
- KASSERT(mp->mnt_pinned_count == 0,
- ("mnt_pinned_count = %d", mp->mnt_pinned_count));
+ KASSERT(mp->mnt_upper_pending == 0, ("mnt_upper_pending"));
KASSERT(TAILQ_EMPTY(&mp->mnt_uppers), ("mnt_uppers"));
+ KASSERT(TAILQ_EMPTY(&mp->mnt_notify), ("mnt_notify"));
if (mp->mnt_nvnodelistsize != 0)
panic("vfs_mount_destroy: nonzero nvnodelistsize");
if (mp->mnt_lazyvnodelistsize != 0)
@@ -1799,17 +1885,166 @@
return (sum);
}
+static bool
+deferred_unmount_enqueue(struct mount *mp, uint64_t flags, bool requeue)
+{
+ bool enqueued;
+
+ enqueued = false;
+ mtx_lock(&deferred_unmount_lock);
+ if ((mp->mnt_taskqueue_flags & MNT_DEFERRED) == 0 || requeue) {
+ mp->mnt_taskqueue_flags = flags | MNT_DEFERRED;
+ STAILQ_INSERT_TAIL(&deferred_unmount_list, mp,
+ mnt_taskqueue_link);
+ enqueued = true;
+ }
+ mtx_unlock(&deferred_unmount_lock);
+
+ if (enqueued) {
+ taskqueue_enqueue(taskqueue_deferred_unmount,
+ &deferred_unmount_task);
+ }
+
+ return (enqueued);
+}
+
+/*
+ * Taskqueue handler for processing async/recursive unmounts
+ */
+static void
+vfs_deferred_unmount(void *argi __unused, int pending __unused)
+{
+ STAILQ_HEAD(, mount) local_unmounts;
+ uint64_t flags;
+ struct mount *mp, *tmp;
+ bool unmounted;
+
+ STAILQ_INIT(&local_unmounts);
+ mtx_lock(&deferred_unmount_lock);
+ STAILQ_CONCAT(&local_unmounts, &deferred_unmount_list);
+ mtx_unlock(&deferred_unmount_lock);
+
+ STAILQ_FOREACH_SAFE(mp, &local_unmounts, mnt_taskqueue_link, tmp) {
+ flags = mp->mnt_taskqueue_flags;
+ KASSERT((flags & MNT_DEFERRED) != 0,
+ ("taskqueue unmount without MNT_DEFERRED"));
+ if (dounmount(mp, flags, curthread) != 0) {
+ MNT_ILOCK(mp);
+ unmounted = ((mp->mnt_kern_flag & MNTK_REFEXPIRE) != 0);
+ MNT_IUNLOCK(mp);
+ if (!unmounted)
+ deferred_unmount_enqueue(mp, flags, true);
+ else
+ vfs_rel(mp);
+ }
+ }
+}
+
/*
* Do the actual filesystem unmount.
*/
int
-dounmount(struct mount *mp, int flags, struct thread *td)
+dounmount(struct mount *mp, uint64_t flags, struct thread *td)
{
+ struct mount_upper_node *upper;
struct vnode *coveredvp, *rootvp;
int error;
uint64_t async_flag;
int mnt_gen_r;
+ KASSERT((flags & MNT_DEFERRED) == 0 ||
+ (flags & (MNT_RECURSE | MNT_FORCE)) == (MNT_RECURSE | MNT_FORCE),
+ ("MNT_DEFERRED requires MNT_RECURSE | MNT_FORCE"));
+
+ /*
+ * If the caller has explicitly requested the unmount to be handled by
+ * the taskqueue and we're not already in taskqueue context, queue
+ * up the unmount request and exit. This is done prior to any
+ * credential checks; MNT_DEFERRED should be used only for kernel-
+ * initiated unmounts and will therefore be processed with the
+ * (kernel) credentials of the taskqueue thread. Still, callers
+ * should be sure this is the behavior they want.
+ */
+ if ((flags & MNT_DEFERRED) != 0 &&
+ taskqueue_member(taskqueue_deferred_unmount, curthread) == 0) {
+ if (!deferred_unmount_enqueue(mp, flags, false))
+ vfs_rel(mp);
+ return (EINPROGRESS);
+ }
+
+ /*
+ * Only privileged root, or (if MNT_USER is set) the user that did the
+ * original mount is permitted to unmount this filesystem.
+ * This check should be made prior to queueing up any recursive
+ * unmounts of upper filesystems. Those unmounts will be executed
+ * with kernel thread credentials and are expected to succeed, so
+ * we must at least ensure the originating context has sufficient
+ * privilege to unmount the base filesystem before proceeding with
+ * the uppers.
+ */
+ error = vfs_suser(mp, td);
+ if (error != 0) {
+ KASSERT((flags & MNT_DEFERRED) == 0,
+ ("taskqueue unmount with insufficient privilege"));
+ vfs_rel(mp);
+ return (error);
+ }
+
+ if (recursive_forced_unmount && ((flags & MNT_FORCE) != 0))
+ flags |= MNT_RECURSE;
+
+ if ((flags & MNT_RECURSE) != 0) {
+ KASSERT((flags & MNT_FORCE) != 0,
+ ("MNT_RECURSE requires MNT_FORCE"));
+
+ MNT_ILOCK(mp);
+ /*
+ * Set MNTK_RECURSE to prevent new upper mounts from being
+ * added, and note that an operation on the uppers list is in
+ * progress. This will ensure that unregistration from the
+ * uppers list, and therefore any pending unmount of the upper
+ * FS, can't complete until after we finish walking the list.
+ */
+ mp->mnt_kern_flag |= MNTK_RECURSE;
+ mp->mnt_upper_pending++;
+ TAILQ_FOREACH(upper, &mp->mnt_uppers, mnt_upper_link) {
+ MNT_IUNLOCK(mp);
+ vfs_ref(upper->mp);
+ if (!deferred_unmount_enqueue(upper->mp, flags, false))
+ vfs_rel(upper->mp);
+ MNT_ILOCK(mp);
+ }
+ mp->mnt_upper_pending--;
+ if ((mp->mnt_kern_flag & MNTK_UPPER_WAITER) != 0 &&
+ mp->mnt_upper_pending == 0) {
+ mp->mnt_kern_flag &= ~MNTK_UPPER_WAITER;
+ wakeup(&mp->mnt_uppers);
+ }
+ /*
+ * If we're not on the taskqueue, wait until the uppers list
+ * is drained before proceeding with unmount. Otherwise, if
+ * we are on the taskqueue and there are still pending uppers,
+ * just re-enqueue on the end of the taskqueue.
+ */
+ if ((flags & MNT_DEFERRED) == 0) {
+ while (!TAILQ_EMPTY(&mp->mnt_uppers)) {
+ mp->mnt_kern_flag |= MNTK_TASKQUEUE_WAITER;
+ msleep(&mp->mnt_taskqueue_link, MNT_MTX(mp), 0,
+ "umntqw", 0);
+ }
+ } else if (!TAILQ_EMPTY(&mp->mnt_uppers)) {
+ MNT_IUNLOCK(mp);
+ deferred_unmount_enqueue(mp, flags, true);
+ return (0);
+ }
+ MNT_IUNLOCK(mp);
+ KASSERT(TAILQ_EMPTY(&mp->mnt_uppers), ("mnt_uppers not empty"));
+ }
+
+ /* Allow the taskqueue to safely re-enqueue on failure */
+ if ((flags & MNT_DEFERRED) != 0)
+ vfs_ref(mp);
+
if ((coveredvp = mp->mnt_vnodecovered) != NULL) {
mnt_gen_r = mp->mnt_gen;
VI_LOCK(coveredvp);
@@ -1828,27 +2063,13 @@
}
}
- /*
- * Only privileged root, or (if MNT_USER is set) the user that did the
- * original mount is permitted to unmount this filesystem.
- */
- error = vfs_suser(mp, td);
- if (error != 0) {
- if (coveredvp != NULL) {
- VOP_UNLOCK(coveredvp);
- vdrop(coveredvp);
- }
- vfs_rel(mp);
- return (error);
- }
-
vfs_op_enter(mp);
vn_start_write(NULL, &mp, V_WAIT | V_MNTREF);
MNT_ILOCK(mp);
if ((mp->mnt_kern_flag & MNTK_UNMOUNT) != 0 ||
(mp->mnt_flag & MNT_UPDATE) != 0 ||
- mp->mnt_pinned_count != 0) {
+ !TAILQ_EMPTY(&mp->mnt_uppers)) {
dounmount_cleanup(mp, coveredvp, 0);
return (EBUSY);
}
@@ -1952,6 +2173,7 @@
}
return (error);
}
+
mtx_lock(&mountlist_mtx);
TAILQ_REMOVE(&mountlist, mp, mnt_list);
mtx_unlock(&mountlist_mtx);
@@ -1977,6 +2199,8 @@
}
if (mp == rootdevmp)
rootdevmp = NULL;
+ if ((flags & MNT_DEFERRED) != 0)
+ vfs_rel(mp);
vfs_mount_destroy(mp);
return (0);
}
diff --git a/sys/kern/vfs_subr.c b/sys/kern/vfs_subr.c
--- a/sys/kern/vfs_subr.c
+++ b/sys/kern/vfs_subr.c
@@ -831,9 +831,9 @@
* valid.
*/
while (mp->mnt_kern_flag & MNTK_UNMOUNT) {
- KASSERT(mp->mnt_pinned_count == 0,
- ("%s: non-zero pinned count %d with pending unmount",
- __func__, mp->mnt_pinned_count));
+ KASSERT(TAILQ_EMPTY(&mp->mnt_uppers),
+ ("%s: non-empty upper mount list with pending unmount",
+ __func__));
if (flags & MBF_NOWAIT || mp->mnt_kern_flag & MNTK_REFEXPIRE) {
MNT_REL(mp);
MNT_IUNLOCK(mp);
@@ -3897,6 +3897,11 @@
{
}
+struct notify_mount {
+ struct mount mp;
+ struct mount_upper_node upper;
+};
+
/*
* Notify upper mounts about reclaimed or unlinked vnode.
*/
@@ -3907,45 +3912,52 @@
.vfs_reclaim_lowervp = notify_lowervp_vfs_dummy,
.vfs_unlink_lowervp = notify_lowervp_vfs_dummy,
};
- struct mount *mp, *ump, *mmp;
+ struct mount *mp;
+ struct mount_upper_node *ump;
+ struct notify_mount *mmp;
mp = vp->v_mount;
if (mp == NULL)
return;
- if (TAILQ_EMPTY(&mp->mnt_uppers))
+ if (TAILQ_EMPTY(&mp->mnt_notify))
return;
- mmp = malloc(sizeof(struct mount), M_TEMP, M_WAITOK | M_ZERO);
- mmp->mnt_op = &vgonel_vfsops;
- mmp->mnt_kern_flag |= MNTK_MARKER;
+ mmp = malloc(sizeof(*mmp), M_TEMP, M_WAITOK | M_ZERO);
+ mmp->mp.mnt_op = &vgonel_vfsops;
+ mmp->mp.mnt_kern_flag |= MNTK_MARKER;
+ mmp->upper.mp = &mmp->mp;
MNT_ILOCK(mp);
- mp->mnt_kern_flag |= MNTK_VGONE_UPPER;
- for (ump = TAILQ_FIRST(&mp->mnt_uppers); ump != NULL;) {
- if ((ump->mnt_kern_flag & MNTK_MARKER) != 0) {
+ mp->mnt_upper_pending++;
+ KASSERT(mp->mnt_upper_pending > 0,
+ ("%s: mnt_upper_pending %d", __func__, mp->mnt_upper_pending));
+ for (ump = TAILQ_FIRST(&mp->mnt_notify); ump != NULL;) {
+ if ((ump->mp->mnt_kern_flag & MNTK_MARKER) != 0) {
ump = TAILQ_NEXT(ump, mnt_upper_link);
continue;
}
- TAILQ_INSERT_AFTER(&mp->mnt_uppers, ump, mmp, mnt_upper_link);
+ TAILQ_INSERT_AFTER(&mp->mnt_notify, ump, &mmp->upper,
+ mnt_upper_link);
MNT_IUNLOCK(mp);
switch (event) {
case VFS_NOTIFY_UPPER_RECLAIM:
- VFS_RECLAIM_LOWERVP(ump, vp);
+ VFS_RECLAIM_LOWERVP(ump->mp, vp);
break;
case VFS_NOTIFY_UPPER_UNLINK:
- VFS_UNLINK_LOWERVP(ump, vp);
+ VFS_UNLINK_LOWERVP(ump->mp, vp);
break;
default:
KASSERT(0, ("invalid event %d", event));
break;
}
MNT_ILOCK(mp);
- ump = TAILQ_NEXT(mmp, mnt_upper_link);
- TAILQ_REMOVE(&mp->mnt_uppers, mmp, mnt_upper_link);
+ ump = TAILQ_NEXT(&mmp->upper, mnt_upper_link);
+ TAILQ_REMOVE(&mp->mnt_notify, &mmp->upper, mnt_upper_link);
}
free(mmp, M_TEMP);
- mp->mnt_kern_flag &= ~MNTK_VGONE_UPPER;
- if ((mp->mnt_kern_flag & MNTK_VGONE_WAITER) != 0) {
- mp->mnt_kern_flag &= ~MNTK_VGONE_WAITER;
+ mp->mnt_upper_pending--;
+ if ((mp->mnt_kern_flag & MNTK_UPPER_WAITER) != 0 &&
+ mp->mnt_upper_pending == 0) {
+ mp->mnt_kern_flag &= ~MNTK_UPPER_WAITER;
wakeup(&mp->mnt_uppers);
}
MNT_IUNLOCK(mp);
@@ -4376,12 +4388,13 @@
MNT_KERN_FLAG(MNTK_EXTENDED_SHARED);
MNT_KERN_FLAG(MNTK_SHARED_WRITES);
MNT_KERN_FLAG(MNTK_NO_IOPF);
- MNT_KERN_FLAG(MNTK_VGONE_UPPER);
- MNT_KERN_FLAG(MNTK_VGONE_WAITER);
+ MNT_KERN_FLAG(MNTK_RECURSE);
+ MNT_KERN_FLAG(MNTK_UPPER_WAITER);
MNT_KERN_FLAG(MNTK_LOOKUP_EXCL_DOTDOT);
MNT_KERN_FLAG(MNTK_MARKER);
MNT_KERN_FLAG(MNTK_USES_BCACHE);
MNT_KERN_FLAG(MNTK_FPLOOKUP);
+ MNT_KERN_FLAG(MNTK_TASKQUEUE_WAITER);
MNT_KERN_FLAG(MNTK_NOASYNC);
MNT_KERN_FLAG(MNTK_UNMOUNT);
MNT_KERN_FLAG(MNTK_MWAIT);
diff --git a/sys/sys/mount.h b/sys/sys/mount.h
--- a/sys/sys/mount.h
+++ b/sys/sys/mount.h
@@ -190,6 +190,19 @@
_Static_assert(sizeof(struct mount_pcpu) == 16,
"the struct is allocated from pcpu 16 zone");
+/*
+ * Structure for tracking a stacked filesystem mounted above another
+ * filesystem. This is expected to be stored in the upper FS' per-mount data.
+ *
+ * Lock reference:
+ * i - lower mount interlock
+ * c - constant from node initialization
+ */
+struct mount_upper_node {
+ struct mount *mp; /* (c) mount object for upper FS */
+ TAILQ_ENTRY(mount_upper_node) mnt_upper_link; /* (i) position in uppers list */
+};
+
/*
* Structure per mounted filesystem. Each mounted filesystem has an
* array of operations and an instance record. The filesystems are
@@ -199,8 +212,8 @@
* l - mnt_listmtx
* m - mountlist_mtx
* i - interlock
- * i* - interlock of uppers' list head
* v - vnode freelist mutex
+ * d - deferred unmount list mutex
*
* Unmarked fields are considered stable as long as a ref is held.
*
@@ -242,10 +255,12 @@
struct mtx mnt_listmtx;
struct vnodelst mnt_lazyvnodelist; /* (l) list of lazy vnodes */
int mnt_lazyvnodelistsize; /* (l) # of lazy vnodes */
- int mnt_pinned_count; /* (i) unmount prevented */
+ int mnt_upper_pending; /* (i) # of pending ops on mnt_uppers */
struct lock mnt_explock; /* vfs_export walkers lock */
- TAILQ_ENTRY(mount) mnt_upper_link; /* (i*) we in the all uppers */
- TAILQ_HEAD(, mount) mnt_uppers; /* (i) upper mounts over us */
+ TAILQ_HEAD(, mount_upper_node) mnt_uppers; /* (i) upper mounts over us */
+ TAILQ_HEAD(, mount_upper_node) mnt_notify; /* (i) upper mounts for notification */
+ STAILQ_ENTRY(mount) mnt_taskqueue_link; /* (d) our place in deferred unmount list */
+ uint64_t mnt_taskqueue_flags; /* (d) unmount flags passed from taskqueue */
};
#endif /* _WANT_MOUNT || _KERNEL */
@@ -438,9 +453,13 @@
#define MNT_BYFSID 0x0000000008000000ULL /* specify filesystem by ID. */
#define MNT_NOCOVER 0x0000001000000000ULL /* Do not cover a mount point */
#define MNT_EMPTYDIR 0x0000002000000000ULL /* Only mount on empty dir */
-#define MNT_CMDFLAGS (MNT_UPDATE | MNT_DELEXPORT | MNT_RELOAD | \
+#define MNT_RECURSE 0x0000100000000000ULL /* recursively unmount uppers */
+#define MNT_DEFERRED 0x0000200000000000ULL /* unmount in async context */
+#define MNT_CMDFLAGS (MNT_UPDATE | MNT_DELEXPORT | MNT_RELOAD | \
MNT_FORCE | MNT_SNAPSHOT | MNT_NONBUSY | \
- MNT_BYFSID | MNT_NOCOVER | MNT_EMPTYDIR)
+ MNT_BYFSID | MNT_NOCOVER | MNT_EMPTYDIR | \
+ MNT_RECURSE | MNT_DEFERRED)
+
/*
* Internal filesystem control flags stored in mnt_kern_flag.
*
@@ -466,8 +485,8 @@
#define MNTK_NO_IOPF 0x00000100 /* Disallow page faults during reads
and writes. Filesystem shall properly
handle i/o state on EFAULT. */
-#define MNTK_VGONE_UPPER 0x00000200
-#define MNTK_VGONE_WAITER 0x00000400
+#define MNTK_RECURSE 0x00000200 /* pending recursive unmount */
+#define MNTK_UPPER_WAITER 0x00000400 /* waiting to drain MNTK_UPPER_PENDING */
#define MNTK_LOOKUP_EXCL_DOTDOT 0x00000800
#define MNTK_MARKER 0x00001000
#define MNTK_UNMAPPED_BUFS 0x00002000
@@ -477,8 +496,9 @@
#define MNTK_UNIONFS 0x00020000 /* A hack for F_ISUNIONSTACK */
#define MNTK_FPLOOKUP 0x00040000 /* fast path lookup is supported */
#define MNTK_SUSPEND_ALL 0x00080000 /* Suspended by all-fs suspension */
-#define MNTK_NOASYNC 0x00800000 /* disable async */
-#define MNTK_UNMOUNT 0x01000000 /* unmount in progress */
+#define MNTK_TASKQUEUE_WAITER 0x00100000 /* Waiting on unmount taskqueue */
+#define MNTK_NOASYNC 0x00800000 /* disable async */
+#define MNTK_UNMOUNT 0x01000000 /* unmount in progress */
#define MNTK_MWAIT 0x02000000 /* waiting for unmount to finish */
#define MNTK_SUSPEND 0x08000000 /* request write suspension */
#define MNTK_SUSPEND2 0x04000000 /* block secondary writes */
@@ -952,7 +972,7 @@
* exported vnode operations
*/
-int dounmount(struct mount *, int, struct thread *);
+int dounmount(struct mount *, uint64_t, struct thread *);
int kernel_mount(struct mntarg *ma, uint64_t flags);
int kernel_vmount(int flags, ...);
@@ -1012,8 +1032,13 @@
int vfs_suser(struct mount *, struct thread *);
void vfs_unbusy(struct mount *);
void vfs_unmountall(void);
-struct mount *vfs_pin_from_vp(struct vnode *);
-void vfs_unpin(struct mount *);
+struct mount *vfs_register_upper_from_vp(struct vnode *,
+ struct mount *ump, struct mount_upper_node *);
+void vfs_register_for_notification(struct mount *, struct mount *,
+ struct mount_upper_node *);
+void vfs_unregister_for_notification(struct mount *,
+ struct mount_upper_node *);
+void vfs_unregister_upper(struct mount *, struct mount_upper_node *);
extern TAILQ_HEAD(mntlist, mount) mountlist; /* mounted filesystem list */
extern struct mtx_padalign mountlist_mtx;
extern struct nfs_public nfs_pub;
diff --git a/sys/ufs/ffs/ffs_vfsops.c b/sys/ufs/ffs/ffs_vfsops.c
--- a/sys/ufs/ffs/ffs_vfsops.c
+++ b/sys/ufs/ffs/ffs_vfsops.c
@@ -297,7 +297,7 @@
*/
mp = vfs_getvfs(&etp->fsid);
if (mp != NULL)
- dounmount(mp, MNT_FORCE, curthread);
+ dounmount(mp, MNT_FORCE | MNT_RECURSE, curthread);
free(etp, M_UFSMNT);
}
File Metadata
Details
Attached
Mime Type
text/plain
Expires
Thu, Dec 26, 10:28 AM (11 h, 17 m)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
15598999
Default Alt Text
D31016.id92718.diff (24 KB)
Attached To
Mode
D31016: Allow stacked filesystems to be recursively unmounted
Attached
Detach File
Event Timeline
Log In to Comment