Index: sys/fs/unionfs/union_vfsops.c =================================================================== --- sys/fs/unionfs/union_vfsops.c +++ sys/fs/unionfs/union_vfsops.c @@ -310,6 +310,30 @@ return (ENOENT); } + /* + * Specify that the covered vnode lock should remain held while + * lookup() performs the cross-mount walk. This prevents a lock-order + * reversal between the covered vnode lock (which is also locked by + * unionfs_lock()) and the mountpoint's busy count. Without this, + * unmount will lock the covered vnode lock (directly through the + * covered vnode) and wait for the busy count to drain, while a + * concurrent lookup will increment the busy count and then lock + * the covered vnode lock (indirectly through unionfs_lock()). + * + * Note that we can't yet use this facility for the 'below' case + * in which the upper vnode is the covered vnode, because that would + * introduce a different LOR in which the cross-mount lookup would + * effectively hold the upper vnode lock before acquiring the lower + * vnode lock, while an unrelated lock operation would still acquire + * the lower vnode lock before the upper vnode lock, which is the + * order unionfs currently requires. + */ + if (!below) { + vn_lock(mp->mnt_vnodecovered, LK_EXCLUSIVE | LK_RETRY); + mp->mnt_vnodecovered->v_vflag |= VV_CROSSLOCK; + VOP_UNLOCK(mp->mnt_vnodecovered); + } + MNT_ILOCK(mp); if ((lowermp->mnt_flag & MNT_LOCAL) != 0 && (uppermp->mnt_flag & MNT_LOCAL) != 0) @@ -362,6 +386,9 @@ if (error) return (error); + vn_lock(mp->mnt_vnodecovered, LK_EXCLUSIVE | LK_RETRY); + mp->mnt_vnodecovered->v_vflag &= ~VV_CROSSLOCK; + VOP_UNLOCK(mp->mnt_vnodecovered); vfs_unregister_upper(ump->um_lowervp->v_mount, &ump->um_lower_link); vfs_unregister_upper(ump->um_uppervp->v_mount, &ump->um_upper_link); free(ump, M_UNIONFSMNT); Index: sys/kern/vfs_lookup.c =================================================================== --- sys/kern/vfs_lookup.c +++ sys/kern/vfs_lookup.c @@ -107,15 +107,11 @@ file = ap->a_file; line = ap->a_line; - if ((flags & LK_SHARED) == 0) + if ((flags & (LK_SHARED | LK_NOWAIT)) != (LK_SHARED | LK_NOWAIT)) panic("invalid lock request for crossmp"); - WITNESS_CHECKORDER(&lk->lock_object, LOP_NEWORDER, file, line, - flags & LK_INTERLOCK ? &VI_MTX(vp)->lock_object : NULL); WITNESS_LOCK(&lk->lock_object, 0, file, line); - if ((flags & LK_INTERLOCK) != 0) - VI_UNLOCK(vp); - LOCK_LOG_LOCK("SLOCK", &lk->lock_object, 0, 0, ap->a_file, line); + LOCK_LOG_LOCK("SLOCK", &lk->lock_object, 0, 0, file, line); return (0); } @@ -910,6 +906,8 @@ struct componentname *cnp = &ndp->ni_cnd; int lkflags_save; int ni_dvp_unlocked; + int crosslkflags; + bool crosslock; /* * Setup: break out flag bits into variables. @@ -1243,18 +1241,32 @@ */ while (dp->v_type == VDIR && (mp = dp->v_mountedhere) && (cnp->cn_flags & NOCROSSMOUNT) == 0) { + crosslock = ((dp->v_vflag & VV_CROSSLOCK) != 0); + crosslkflags = compute_cn_lkflags(mp, cnp->cn_lkflags, + cnp->cn_flags); + if (__predict_false(crosslock) && + (crosslkflags & LK_EXCLUSIVE) != 0 && + VOP_ISLOCKED(dp) != LK_EXCLUSIVE) { + vn_lock(dp, LK_UPGRADE | LK_RETRY); + if (VN_IS_DOOMED(dp)) { + error = ENOENT; + goto bad2; + } + } if (vfs_busy(mp, 0)) continue; - vput(dp); + if (__predict_true(!crosslock)) + vput(dp); if (dp != ndp->ni_dvp) vput(ndp->ni_dvp); else vrele(ndp->ni_dvp); vrefact(vp_crossmp); ndp->ni_dvp = vp_crossmp; - error = VFS_ROOT(mp, compute_cn_lkflags(mp, cnp->cn_lkflags, - cnp->cn_flags), &tdp); + error = VFS_ROOT(mp, crosslkflags, &tdp); vfs_unbusy(mp); + if (__predict_false(crosslock)) + vput(dp); if (vn_lock(vp_crossmp, LK_SHARED | LK_NOWAIT)) panic("vp_crossmp exclusively locked or reclaimed"); if (error) { Index: sys/sys/vnode.h =================================================================== --- sys/sys/vnode.h +++ sys/sys/vnode.h @@ -276,6 +276,7 @@ #define VV_FORCEINSMQ 0x1000 /* force the insmntque to succeed */ #define VV_READLINK 0x2000 /* fdescfs linux vnode */ #define VV_UNREF 0x4000 /* vunref, do not drop lock in inactive() */ +#define VV_CROSSLOCK 0x8000 /* vnode lock is shared w/ root mounted here */ #define VMP_LAZYLIST 0x0001 /* Vnode is on mnt's lazy list */