Page MenuHomeFreeBSD

D35054.id108486.diff
No OneTemporary

D35054.id108486.diff

Index: sys/fs/unionfs/union_vfsops.c
===================================================================
--- sys/fs/unionfs/union_vfsops.c
+++ sys/fs/unionfs/union_vfsops.c
@@ -310,6 +310,30 @@
return (ENOENT);
}
+ /*
+ * Specify that the covered vnode lock should remain held while
+ * lookup() performs the cross-mount walk. This prevents a lock-order
+ * reversal between the covered vnode lock (which is also locked by
+ * unionfs_lock()) and the mountpoint's busy count. Without this,
+ * unmount will lock the covered vnode lock (directly through the
+ * covered vnode) and wait for the busy count to drain, while a
+ * concurrent lookup will increment the busy count and then lock
+ * the covered vnode lock (indirectly through unionfs_lock()).
+ *
+ * Note that we can't yet use this facility for the 'below' case
+ * in which the upper vnode is the covered vnode, because that would
+ * introduce a different LOR in which the cross-mount lookup would
+ * effectively hold the upper vnode lock before acquiring the lower
+ * vnode lock, while an unrelated lock operation would still acquire
+ * the lower vnode lock before the upper vnode lock, which is the
+ * order unionfs currently requires.
+ */
+ if (!below) {
+ vn_lock(mp->mnt_vnodecovered, LK_EXCLUSIVE | LK_RETRY);
+ mp->mnt_vnodecovered->v_vflag |= VV_CROSSLOCK;
+ VOP_UNLOCK(mp->mnt_vnodecovered);
+ }
+
MNT_ILOCK(mp);
if ((lowermp->mnt_flag & MNT_LOCAL) != 0 &&
(uppermp->mnt_flag & MNT_LOCAL) != 0)
@@ -362,6 +386,9 @@
if (error)
return (error);
+ vn_lock(mp->mnt_vnodecovered, LK_EXCLUSIVE | LK_RETRY);
+ mp->mnt_vnodecovered->v_vflag &= ~VV_CROSSLOCK;
+ VOP_UNLOCK(mp->mnt_vnodecovered);
vfs_unregister_upper(ump->um_lowervp->v_mount, &ump->um_lower_link);
vfs_unregister_upper(ump->um_uppervp->v_mount, &ump->um_upper_link);
free(ump, M_UNIONFSMNT);
Index: sys/kern/vfs_lookup.c
===================================================================
--- sys/kern/vfs_lookup.c
+++ sys/kern/vfs_lookup.c
@@ -107,11 +107,9 @@
file = ap->a_file;
line = ap->a_line;
- if ((flags & LK_SHARED) == 0)
- panic("invalid lock request for crossmp");
+ KASSERT((flags & (LK_SHARED | LK_NOWAIT)) == (LK_SHARED | LK_NOWAIT),
+ ("%s: invalid lock request 0x%x for crossmp", __func__, flags));
- WITNESS_CHECKORDER(&lk->lock_object, LOP_NEWORDER, file, line,
- flags & LK_INTERLOCK ? &VI_MTX(vp)->lock_object : NULL);
WITNESS_LOCK(&lk->lock_object, 0, file, line);
if ((flags & LK_INTERLOCK) != 0)
VI_UNLOCK(vp);
@@ -910,6 +908,8 @@
struct componentname *cnp = &ndp->ni_cnd;
int lkflags_save;
int ni_dvp_unlocked;
+ int crosslkflags;
+ bool crosslock;
/*
* Setup: break out flag bits into variables.
@@ -1243,18 +1243,32 @@
*/
while (dp->v_type == VDIR && (mp = dp->v_mountedhere) &&
(cnp->cn_flags & NOCROSSMOUNT) == 0) {
+ crosslock = ((dp->v_vflag & VV_CROSSLOCK) != 0);
+ crosslkflags = compute_cn_lkflags(mp, cnp->cn_lkflags,
+ cnp->cn_flags);
+ if (__predict_false(crosslock) &&
+ (crosslkflags & LK_EXCLUSIVE) != 0 &&
+ VOP_ISLOCKED(dp) != LK_EXCLUSIVE) {
+ vn_lock(dp, LK_UPGRADE | LK_RETRY);
+ if (VN_IS_DOOMED(dp)) {
+ error = ENOENT;
+ goto bad2;
+ }
+ }
if (vfs_busy(mp, 0))
continue;
- vput(dp);
+ if (__predict_true(!crosslock))
+ vput(dp);
if (dp != ndp->ni_dvp)
vput(ndp->ni_dvp);
else
vrele(ndp->ni_dvp);
vrefact(vp_crossmp);
ndp->ni_dvp = vp_crossmp;
- error = VFS_ROOT(mp, compute_cn_lkflags(mp, cnp->cn_lkflags,
- cnp->cn_flags), &tdp);
+ error = VFS_ROOT(mp, crosslkflags, &tdp);
vfs_unbusy(mp);
+ if (__predict_false(crosslock))
+ vput(dp);
if (vn_lock(vp_crossmp, LK_SHARED | LK_NOWAIT))
panic("vp_crossmp exclusively locked or reclaimed");
if (error) {
Index: sys/kern/vfs_subr.c
===================================================================
--- sys/kern/vfs_subr.c
+++ sys/kern/vfs_subr.c
@@ -770,17 +770,22 @@
* +->F->D->E
*
* The lookup() process for namei("/var") illustrates the process:
- * VOP_LOOKUP() obtains B while A is held
- * vfs_busy() obtains a shared lock on F while A and B are held
- * vput() releases lock on B
- * vput() releases lock on A
- * VFS_ROOT() obtains lock on D while shared lock on F is held
- * vfs_unbusy() releases shared lock on F
- * vn_lock() obtains lock on deadfs vnode vp_crossmp instead of A.
- * Attempt to lock A (instead of vp_crossmp) while D is held would
- * violate the global order, causing deadlocks.
+ * 1. VOP_LOOKUP() obtains B while A is held
+ * 2. vfs_busy() obtains a shared lock on F while A and B are held
+ * 3. vput() releases lock on B
+ * 4. vput() releases lock on A
+ * 5. VFS_ROOT() obtains lock on D while shared lock on F is held
+ * 6. vfs_unbusy() releases shared lock on F
+ * 7. vn_lock() obtains lock on deadfs vnode vp_crossmp instead of A.
+ * Attempt to lock A (instead of vp_crossmp) while D is held would
+ * violate the global order, causing deadlocks.
*
- * dounmount() locks B while F is drained.
+ * dounmount() locks B while F is drained. Note that for stacked
+ * filesystems, D and B in the example above may be the same lock,
+ * which introdues potential lock order reversal deadlock between
+ * dounmount() and step 5 above. These filesystems may avoid the LOR
+ * by setting VV_CROSSLOCK on the covered vnode so that lock B will
+ * remain held until after step 5.
*/
int
vfs_busy(struct mount *mp, int flags)
Index: sys/sys/vnode.h
===================================================================
--- sys/sys/vnode.h
+++ sys/sys/vnode.h
@@ -276,6 +276,7 @@
#define VV_FORCEINSMQ 0x1000 /* force the insmntque to succeed */
#define VV_READLINK 0x2000 /* fdescfs linux vnode */
#define VV_UNREF 0x4000 /* vunref, do not drop lock in inactive() */
+#define VV_CROSSLOCK 0x8000 /* vnode lock is shared w/ root mounted here */
#define VMP_LAZYLIST 0x0001 /* Vnode is on mnt's lazy list */

File Metadata

Mime Type
text/plain
Expires
Wed, Oct 22, 1:14 AM (4 h, 33 m)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
24035538
Default Alt Text
D35054.id108486.diff (5 KB)

Event Timeline