diff --git a/sys/kern/vfs_lookup.c b/sys/kern/vfs_lookup.c --- a/sys/kern/vfs_lookup.c +++ b/sys/kern/vfs_lookup.c @@ -1075,79 +1075,6 @@ return (EEXIST); } -static int __noinline -vfs_lookup_cross_mount(struct nameidata *ndp) -{ - struct componentname *cnp; - struct mount *mp; - struct vnode *dp, *tdp; - int error, crosslkflags; - bool crosslock; - - cnp = &ndp->ni_cnd; - dp = ndp->ni_vp; - - /* - * The vnode has been mounted on, find the root of the mounted - * filesystem. - */ - for (;;) { - mp = dp->v_mountedhere; - ASSERT_VOP_LOCKED(dp, __func__); - VNPASS((vn_irflag_read(dp) & VIRF_MOUNTPOINT) != 0 && mp != NULL, dp); - - crosslock = (dp->v_vflag & VV_CROSSLOCK) != 0; - crosslkflags = enforce_lkflags(mp, cnp->cn_lkflags); - if (__predict_false(crosslock)) { - /* - * We are going to be holding the vnode lock, which - * in this case is shared by the root vnode of the - * filesystem mounted at mp, across the call to - * VFS_ROOT(). Make the situation clear to the - * filesystem by passing LK_CANRECURSE if the - * lock is held exclusive, or by clearinng - * LK_NODDLKTREAT to allow recursion on the shared - * lock in the presence of an exclusive waiter. - */ - if (VOP_ISLOCKED(dp) == LK_EXCLUSIVE) { - crosslkflags &= ~LK_SHARED; - crosslkflags |= LK_EXCLUSIVE | LK_CANRECURSE; - } else if ((crosslkflags & LK_EXCLUSIVE) != 0) { - error = vn_lock(dp, LK_UPGRADE); - if (error != 0) - break; - if (dp->v_mountedhere != mp) { - continue; - } - } else - crosslkflags &= ~LK_NODDLKTREAT; - } - if (vfs_busy(mp, 0) != 0) - continue; - if (__predict_true(!crosslock)) - vput(dp); - if (dp != ndp->ni_dvp) - vput(ndp->ni_dvp); - else - vrele(ndp->ni_dvp); - vrefact(vp_crossmp); - ndp->ni_dvp = vp_crossmp; - error = VFS_ROOT(mp, crosslkflags, &tdp); - vfs_unbusy(mp); - if (__predict_false(crosslock)) - vput(dp); - if (vn_lock(vp_crossmp, LK_SHARED | LK_NOWAIT)) - panic("vp_crossmp exclusively locked or reclaimed"); - if (error != 0) - break; - ndp->ni_vp = dp = tdp; - if ((vn_irflag_read(dp) & VIRF_MOUNTPOINT) == 0) - break; - } - - return (error); -} - /* * Search a pathname. * This is a very central and rather complicated routine. @@ -1559,17 +1486,54 @@ goto success; } - if ((vn_irflag_read(dp) & VIRF_MOUNTPOINT) != 0 && - (cnp->cn_flags & NOCROSSMOUNT) == 0) { - error = vfs_lookup_cross_mount(ndp); - if (error != 0) - goto bad_unlocked; + /* From here: Not a symlink we have to follow. */ + + /* + * Going to cross mounts, if any and if not ruled out. + */ + if (__predict_true((vn_irflag_read(dp) & VIRF_MOUNTPOINT) == 0) || + (cnp->cn_flags & NOCROSSMOUNT) != 0) + goto nextname; + + /* + * Unlock the parent 'ni_dvp' to avoid a problem with NFS where + * VFS_ROOT() can hang forever on a lost server. In this case, 'ni_dvp' + * remains locked forever as well, gradually blocking all lookups (or + * only modifications in the MNTK_LOOKUP_SHARED case) starting with + * vnodes earlier in the path up to the root vnode. + */ + if (ndp->ni_dvp != dp) + VOP_UNLOCK(ndp->ni_dvp); + + error = vn_cross_mounts_mounted(dp, cnp->cn_lkflags, &dp); + if (__predict_false(error != 0)) { + vrele(ndp->ni_dvp); /* - * FALLTHROUGH to nextname + * If 'error' is ENOENT, during crossing, 'dp' was unlocked and + * is now doomed, which, given the pre-existing active + * reference, can only happen on a forced unmount. We could + * climb to the "parent" directory (in the resolution, i.e., + * 'ni_dvp'), but this is likely to be useless (if on the same + * mount it can have been doomed already, and if not, it + * currently is equal to 'vp_crossmp' anyway) and problematic + * semantically if done standalone. The most sensible + * alternative would be to restart the whole resolution, but we + * are not there yet. */ - dp = ndp->ni_vp; + goto bad_unlocked; } + /* + * We effectively crossed some mounts. Release 'ni_dvp', which + * is going to be replaced by 'vp_crossmp'. + */ + ndp->ni_vp = dp; + vrele(ndp->ni_dvp); + vrefact(vp_crossmp); + if (vn_lock(vp_crossmp, LK_SHARED | LK_NOWAIT)) + panic("vp_crossmp exclusively locked or reclaimed"); + ndp->ni_dvp = vp_crossmp; + nextname: /* * Not a symbolic link that we will follow. Continue with the