Index: sys/fs/tmpfs/tmpfs_vfsops.c =================================================================== --- sys/fs/tmpfs/tmpfs_vfsops.c +++ sys/fs/tmpfs/tmpfs_vfsops.c @@ -710,7 +710,8 @@ struct vfsops tmpfs_vfsops = { .vfs_mount = tmpfs_mount, .vfs_unmount = tmpfs_unmount, - .vfs_root = tmpfs_root, + .vfs_root = vfs_cache_root, + .vfs_cachedroot = tmpfs_root, .vfs_statfs = tmpfs_statfs, .vfs_fhtovp = tmpfs_fhtovp, .vfs_sync = tmpfs_sync, Index: sys/kern/vfs_mount.c =================================================================== --- sys/kern/vfs_mount.c +++ sys/kern/vfs_mount.c @@ -134,6 +134,7 @@ M_WAITOK | M_ZERO); mp->mnt_ref = 0; mp->mnt_vfs_ops = 1; + mp->mnt_rootvnode = NULL; return (0); } @@ -582,6 +583,10 @@ panic("%s: vfs_ops should be 1 but %d found\n", __func__, mp->mnt_vfs_ops); + if (mp->mnt_rootvnode != NULL) + panic("%s: mount point still has a root vnode %p\n", __func__, + mp->mnt_rootvnode); + if (mp->mnt_vnodecovered != NULL) vrele(mp->mnt_vnodecovered); #ifdef MAC @@ -1010,6 +1015,7 @@ ) { struct export_args export; + struct vnode *rootvp; void *bufp; struct mount *mp; int error, export_error, len; @@ -1075,7 +1081,10 @@ MNT_SNAPSHOT | MNT_ROOTFS | MNT_UPDATEMASK | MNT_RDONLY); if ((mp->mnt_flag & MNT_ASYNC) == 0) mp->mnt_kern_flag &= ~MNTK_ASYNC; + rootvp = vfs_cache_root_clear(mp); MNT_IUNLOCK(mp); + if (rootvp != NULL) + vrele(rootvp); mp->mnt_optnew = *optlist; vfs_mergeopts(mp->mnt_optnew, mp->mnt_opt); @@ -1553,7 +1562,7 @@ int dounmount(struct mount *mp, int flags, struct thread *td) { - struct vnode *coveredvp; + struct vnode *coveredvp, *rootvp; int error; uint64_t async_flag; int mnt_gen_r; @@ -1601,12 +1610,15 @@ return (EBUSY); } mp->mnt_kern_flag |= MNTK_UNMOUNT; + rootvp = vfs_cache_root_clear(mp); if (flags & MNT_NONBUSY) { MNT_IUNLOCK(mp); error = vfs_check_usecounts(mp); MNT_ILOCK(mp); if (error != 0) { dounmount_cleanup(mp, coveredvp, MNTK_UNMOUNT); + if (rootvp != NULL) + vrele(rootvp); return (error); } } @@ -1635,6 +1647,9 @@ ("%s: invalid return value for msleep in the drain path @ %s:%d", __func__, __FILE__, __LINE__)); + if (rootvp != NULL) + vrele(rootvp); + if (mp->mnt_flag & MNT_EXPUBLIC) vfs_setpublicfs(NULL, NULL, NULL); Index: sys/kern/vfs_subr.c =================================================================== --- sys/kern/vfs_subr.c +++ sys/kern/vfs_subr.c @@ -5633,6 +5633,112 @@ return (0); } +/* + * Clear out a doomed vnode (if any) and replace it with a new one as long + * as the fs is not being unmounted. Return the root vnode to the caller. + */ +static int __noinline +vfs_cache_root_fallback(struct mount *mp, int flags, struct vnode **vpp) +{ + struct vnode *vp; + int error; + +restart: + if (mp->mnt_rootvnode != NULL) { + MNT_ILOCK(mp); + vp = mp->mnt_rootvnode; + if (vp != NULL) { + if ((vp->v_iflag & VI_DOOMED) == 0) { + vrefact(vp); + MNT_IUNLOCK(mp); + error = vn_lock(vp, flags); + if (error == 0) { + *vpp = vp; + return (0); + } + vrele(vp); + goto restart; + } + /* + * Clear the old one. + */ + mp->mnt_rootvnode = NULL; + } + MNT_IUNLOCK(mp); + if (vp != NULL) { + /* + * Paired with a fence in vfs_op_thread_exit(). + */ + atomic_thread_fence_acq(); + vfs_op_barrier_wait(mp); + vrele(vp); + } + } + error = VFS_CACHEDROOT(mp, flags, vpp); + if (error != 0) + return (error); + if (mp->mnt_vfs_ops == 0) { + MNT_ILOCK(mp); + if (mp->mnt_vfs_ops != 0) { + MNT_IUNLOCK(mp); + return (0); + } + if (mp->mnt_rootvnode == NULL) { + vrefact(*vpp); + mp->mnt_rootvnode = *vpp; + } else { + if (mp->mnt_rootvnode != *vpp) { + if ((mp->mnt_rootvnode->v_iflag & VI_DOOMED) == 0) { + panic("%s: mismatch between vnode returned " + " by VFS_CACHEDROOT and the one cached " + " (%p != %p)", + __func__, *vpp, mp->mnt_rootvnode); + } + } + } + MNT_IUNLOCK(mp); + } + return (0); +} + +int +vfs_cache_root(struct mount *mp, int flags, struct vnode **vpp) +{ + struct vnode *vp; + int error; + + if (!vfs_op_thread_enter(mp)) + return (vfs_cache_root_fallback(mp, flags, vpp)); + vp = (struct vnode *)atomic_load_ptr(&mp->mnt_rootvnode); + if (vp == NULL || (vp->v_iflag & VI_DOOMED)) { + vfs_op_thread_exit(mp); + return (vfs_cache_root_fallback(mp, flags, vpp)); + } + vrefact(vp); + vfs_op_thread_exit(mp); + error = vn_lock(vp, flags); + if (error != 0) { + vrele(vp); + return (vfs_cache_root_fallback(mp, flags, vpp)); + } + *vpp = vp; + return (0); +} + +struct vnode * +vfs_cache_root_clear(struct mount *mp) +{ + struct vnode *vp; + + /* + * ops > 0 guarantees there is nobody who can see this vnode + */ + MPASS(mp->mnt_vfs_ops > 0); + vp = mp->mnt_rootvnode; + mp->mnt_rootvnode = NULL; + return (vp); +} + /* * These are helper functions for filesystems to traverse all * their vnodes. See MNT_VNODE_FOREACH_ALL() in sys/mount.h. Index: sys/sys/mount.h =================================================================== --- sys/sys/mount.h +++ sys/sys/mount.h @@ -231,6 +231,7 @@ int *mnt_ref_pcpu; int *mnt_lockref_pcpu; int *mnt_writeopcount_pcpu; + struct vnode *mnt_rootvnode; }; /* @@ -700,6 +701,7 @@ vfs_cmount_t *vfs_cmount; vfs_unmount_t *vfs_unmount; vfs_root_t *vfs_root; + vfs_root_t *vfs_cachedroot; vfs_quotactl_t *vfs_quotactl; vfs_statfs_t *vfs_statfs; vfs_sync_t *vfs_sync; @@ -739,6 +741,12 @@ _rc = (*(MP)->mnt_op->vfs_root)(MP, FLAGS, VPP); \ _rc; }) +#define VFS_CACHEDROOT(MP, FLAGS, VPP) ({ \ + int _rc; \ + \ + _rc = (*(MP)->mnt_op->vfs_cachedroot)(MP, FLAGS, VPP); \ + _rc; }) + #define VFS_QUOTACTL(MP, C, U, A) ({ \ int _rc; \ \ @@ -948,6 +956,8 @@ void syncer_suspend(void); void syncer_resume(void); +struct vnode *vfs_cache_root_clear(struct mount *); + void vfs_op_barrier_wait(struct mount *); void vfs_op_enter(struct mount *); void vfs_op_exit_locked(struct mount *); Index: sys/sys/vnode.h =================================================================== --- sys/sys/vnode.h +++ sys/sys/vnode.h @@ -747,6 +747,7 @@ rangelock_trywlock(&(vp)->v_rl, (start), (end), VI_MTX(vp)) int vfs_cache_lookup(struct vop_lookup_args *ap); +int vfs_cache_root(struct mount *mp, int flags, struct vnode **vpp); void vfs_timestamp(struct timespec *); void vfs_write_resume(struct mount *mp, int flags); int vfs_write_suspend(struct mount *mp, int flags); Index: sys/ufs/ffs/ffs_vfsops.c =================================================================== --- sys/ufs/ffs/ffs_vfsops.c +++ sys/ufs/ffs/ffs_vfsops.c @@ -109,7 +109,8 @@ .vfs_mount = ffs_mount, .vfs_cmount = ffs_cmount, .vfs_quotactl = ufs_quotactl, - .vfs_root = ufs_root, + .vfs_root = vfs_cache_root, + .vfs_cachedroot = ufs_root, .vfs_statfs = ffs_statfs, .vfs_sync = ffs_sync, .vfs_uninit = ffs_uninit,