diff --git a/sys/kern/vfs_lookup.c b/sys/kern/vfs_lookup.c --- a/sys/kern/vfs_lookup.c +++ b/sys/kern/vfs_lookup.c @@ -244,6 +244,28 @@ return (error); } +static int +vn_cross_single_mount_mounted(struct vnode* vp, int root_lkflags, + bool *unlocked, struct vnode **vpp) +{ + struct mount *mp; + int error; + + *vpp = NULL; + + error = vn_busy_mountedhere_mounted(vp, unlocked, &mp); + if (__predict_false(error == 0)) { + if (__predict_true(!*unlocked)) { + VOP_UNLOCK(vp); + *unlocked = true; + } + error = VFS_ROOT(mp, root_lkflags, vpp); + vfs_unbusy(mp); + } + + return (error); +} + /* * Cross a single mounted-on vnode, returning the mount's root vnode. * @@ -263,22 +285,14 @@ vn_cross_single_mount(struct vnode* vp, int root_lkflags, bool *unlocked, struct vnode **vpp) { - struct mount *mp; - int error; - - *vpp = NULL; - - error = vn_busy_mountedhere(vp, unlocked, &mp); - if (__predict_false(error == 0)) { - if (__predict_true(!*unlocked)) { - VOP_UNLOCK(vp); - *unlocked = true; - } - error = VFS_ROOT(mp, root_lkflags, vpp); - vfs_unbusy(mp); + if (__predict_true((vn_irflag_read(vp) & VIRF_MOUNTPOINT) == 0)) { + *vpp = NULL; + *unlocked = false; + return (EJUSTRETURN); } - return (error); + return (vn_cross_single_mount_mounted + (vp, root_lkflags, unlocked, vpp)); } static void @@ -291,61 +305,21 @@ ("%s: vn_lock(LK_RETRY) returned %d", __func__, error)); } -/* - * Repeatedly cross mounts starting from a given vnode. - * - * Traverses all successive mounts on the same path, locking the successive - * vnodes as specified by enforce_lkflags() and unlocking them after obtaining - * their covering mount. Ensures the final vnode is locked and actively - * referenced. The initial vnode is returned unlocked and its active reference - * is released except if it is also the final vnode (no mount points to cross). - * - * Mounts are crossed until reaching vnode that is not covered by a mount, which - * is returned locked. If some traversed vnode happens to be doomed, ENOENT is - * returned. Can return errors reported by VFS_ROOT(). On success, puts the - * final vnode into '*vpp' and returns 0. - * - * This function ensures that the crossed mountpoint cannot be busied and the - * initial vnode locked at the same time. The goal is to avoid establishing - * a lock order between them in order to avoid deadlocks, at lookup with mounted - * stacked filesystems (nullfs, unionfs) where locking the mountpoint's root - * vnode leads to locking the covered vnode as well and vice-versa, but also at - * unmount where parallel vfs_busy() calls block while acquiring the covered - * vnode's lock, which establishes the acquisition order mount point -> covered - * vnode. This function (through the VFS_ROOT() call) only establishes the - * acquisition order mount point -> root vnode, which implies mount point -> - * covered vnode for stacked filesystems, thus the same order as that of - * dounmount(). In other words, the legal order is that a mount point reference - * must always be acquired before the vnode's lock, be it the root vnode under - * the mount point or the covered vnode over it. - */ +/* See vn_cross_mounts() in 'vnode.h'. */ int -vn_cross_mounts(struct vnode* vp, int const lkflags, struct vnode ** const vpp) +vn_cross_mounts_mounted(struct vnode* vp, int const lkflags, + struct vnode ** const vpp) { int error; bool unlocked; for (;;) { - error = vn_cross_single_mount(vp, lkflags, &unlocked, vpp); + error = vn_cross_single_mount_mounted + (vp, lkflags, &unlocked, vpp); /* Optimize for the non-mount-point case. */ - if (__predict_true(error == EJUSTRETURN)) { - /* No more mounts to cross. */ - *vpp = vp; - error = 0; - - if (__predict_false(unlocked)) { - vn_lock_enforced_flags(vp, lkflags); - if (VN_IS_DOOMED(vp)) { - vput(vp); - - *vpp = NULL; - error = ENOENT; - } - } - - return (error); - } + if (__predict_true(error == EJUSTRETURN)) + break; if (__predict_false(error != 0)) { if (__predict_true(unlocked)) @@ -362,9 +336,29 @@ vrele(vp); vp = *vpp; ASSERT_VOP_LOCKED(vp, __func__); + + if (__predict_true((vn_irflag_read(vp) & VIRF_MOUNTPOINT) + == 0)) { + unlocked = false; + break; + } } - __assert_unreachable(); + /* No more mounts to cross. */ + *vpp = vp; + error = 0; + + if (__predict_false(unlocked)) { + vn_lock_enforced_flags(vp, lkflags); + if (VN_IS_DOOMED(vp)) { + vput(vp); + + *vpp = NULL; + error = ENOENT; + } + } + + return (error); } struct nameicap_tracker { diff --git a/sys/sys/vnode.h b/sys/sys/vnode.h --- a/sys/sys/vnode.h +++ b/sys/sys/vnode.h @@ -830,7 +830,8 @@ vn_busy_mountedhere(struct vnode *vp, bool *unlocked, struct mount **mp); int vn_cross_single_mount(struct vnode* vp, int root_lkflags, bool *unlocked, struct vnode **vpp); -int vn_cross_mounts(struct vnode* vp, int lkflags, struct vnode **vpp); +static inline int +vn_cross_mounts(struct vnode* vp, int lkflags, struct vnode **vpp); void vn_seqc_write_begin_locked(struct vnode *vp); void vn_seqc_write_begin(struct vnode *vp); @@ -1092,7 +1093,7 @@ int vnode_create_vobject(struct vnode *vp, off_t size, struct thread *td); void vnode_destroy_vobject(struct vnode *vp); -/* Internal function to implement next function. */ +/* Internal function to implement vn_busy_mountedhere(). */ int vn_busy_mountedhere_mounted(struct vnode *vp, bool *unlocked, struct mount **mp); @@ -1131,6 +1132,52 @@ return (vn_busy_mountedhere_mounted(vp, unlocked, mp)); } +/* Internal function to implement vn_cross_mounts(). */ +int vn_cross_mounts_mounted(struct vnode* vp, int lkflags, struct vnode **vpp); + +/* + * Repeatedly cross mounts starting from a given vnode. + * + * Traverses all successive mounts on the same path, locking the successive + * vnodes as specified by enforce_lkflags() and unlocking them after obtaining + * their covering mount. Ensures the final vnode is locked and actively + * referenced. The initial vnode is returned unlocked and its active reference + * is released except if it is also the final vnode (no mount points to cross). + * + * Mounts are crossed until reaching vnode that is not covered by a mount, which + * is returned locked. If some traversed vnode happens to be doomed, ENOENT is + * returned. Can return errors reported by VFS_ROOT(). On success, puts the + * final vnode into '*vpp' and returns 0. + * + * This function ensures that the crossed mountpoint cannot be busied and the + * initial vnode locked at the same time. The goal is to avoid establishing + * a lock order between them in order to avoid deadlocks, at lookup with mounted + * stacked filesystems (nullfs, unionfs) where locking the mountpoint's root + * vnode leads to locking the covered vnode as well and vice-versa, but also at + * unmount where parallel vfs_busy() calls block while acquiring the covered + * vnode's lock, which establishes the acquisition order mount point -> covered + * vnode. This function (through the VFS_ROOT() call) only establishes the + * acquisition order mount point -> root vnode, which implies mount point -> + * covered vnode for stacked filesystems, thus the same order as that of + * dounmount(). In other words, the legal order is that a mount point reference + * must always be acquired before the vnode's lock, be it the root vnode under + * the mount point or the covered vnode over it. + */ +static inline int +vn_cross_mounts(struct vnode* vp, int const lkflags, struct vnode ** const vpp) +{ + ASSERT_VOP_LOCKED(vp, __func__); + ASSERT_VI_UNLOCKED(vp, __func__); + VNASSERT(!VN_IS_DOOMED(vp), vp, ("Input vnode is doomed")); + + if (__predict_true((vn_irflag_read(vp) & VIRF_MOUNTPOINT) == 0)) { + *vpp = vp; + return (0); + } + + return (vn_cross_mounts_mounted(vp, lkflags, vpp)); +} + extern struct vop_vector fifo_specops; extern struct vop_vector dead_vnodeops;