Index: sys/cddl/compat/opensolaris/kern/opensolaris_vfs.c =================================================================== --- sys/cddl/compat/opensolaris/kern/opensolaris_vfs.c +++ sys/cddl/compat/opensolaris/kern/opensolaris_vfs.c @@ -154,6 +154,7 @@ vput(vp); return (error); } + vn_seqc_write_begin(vp); VOP_UNLOCK(vp); /* @@ -206,6 +207,7 @@ VI_LOCK(vp); vp->v_iflag &= ~VI_MOUNT; VI_UNLOCK(vp); + vn_seqc_write_end(vp); vput(vp); vfs_unbusy(mp); vfs_freeopts(mp->mnt_optnew); @@ -241,6 +243,7 @@ vfs_event_signal(NULL, VQ_MOUNT, 0); if (VFS_ROOT(mp, LK_EXCLUSIVE, &mvp)) panic("mount: lost mount"); + vn_seqc_write_end(vp); VOP_UNLOCK(vp); vfs_op_exit(mp); vfs_unbusy(mp); Index: sys/fs/tmpfs/tmpfs.h =================================================================== --- sys/fs/tmpfs/tmpfs.h +++ sys/fs/tmpfs/tmpfs.h @@ -526,6 +526,14 @@ return (node); } +static inline struct tmpfs_node * +VP_TO_TMPFS_NODE_SMR(struct vnode *vp) +{ + + MPASS(vp != NULL); + return (atomic_load_ptr(&vp->v_data)); +} + static inline struct tmpfs_node * VP_TO_TMPFS_DIR(struct vnode *vp) { Index: sys/fs/tmpfs/tmpfs_subr.c =================================================================== --- sys/fs/tmpfs/tmpfs_subr.c +++ sys/fs/tmpfs/tmpfs_subr.c @@ -75,6 +75,7 @@ static uma_zone_t tmpfs_dirent_pool; static uma_zone_t tmpfs_node_pool; +VFS_SMR_DECLARE; static int tmpfs_node_ctor(void *mem, int size, void *arg, int flags) @@ -131,6 +132,7 @@ tmpfs_node_pool = uma_zcreate("TMPFS node", sizeof(struct tmpfs_node), tmpfs_node_ctor, tmpfs_node_dtor, tmpfs_node_init, tmpfs_node_fini, UMA_ALIGN_PTR, 0); + VFS_SMR_ZONE_SET(tmpfs_node_pool); } void @@ -288,7 +290,7 @@ if ((mp->mnt_kern_flag & MNT_RDONLY) != 0) return (EROFS); - nnode = uma_zalloc_arg(tmpfs_node_pool, tmp, M_WAITOK); + nnode = uma_zalloc_smr(tmpfs_node_pool, M_WAITOK); /* Generic initialization. */ nnode->tn_type = type; @@ -435,7 +437,7 @@ panic("tmpfs_free_node: type %p %d", node, (int)node->tn_type); } - uma_zfree(tmpfs_node_pool, node); + uma_zfree_smr(tmpfs_node_pool, node); TMPFS_LOCK(tmp); tmpfs_free_tmp(tmp); return (true); @@ -1619,10 +1621,11 @@ int tmpfs_chmod(struct vnode *vp, mode_t mode, struct ucred *cred, struct thread *p) { - int error; + int error, newmode; struct tmpfs_node *node; ASSERT_VOP_ELOCKED(vp, "chmod"); + ASSERT_VOP_IN_SEQC(vp); node = VP_TO_TMPFS_NODE(vp); @@ -1656,9 +1659,9 @@ return (error); } - - node->tn_mode &= ~ALLPERMS; - node->tn_mode |= mode & ALLPERMS; + newmode = node->tn_mode & ~ALLPERMS; + newmode |= mode & ALLPERMS; + atomic_store_int(&node->tn_mode, newmode); node->tn_status |= TMPFS_NODE_CHANGED; @@ -1684,6 +1687,7 @@ gid_t ogid; ASSERT_VOP_ELOCKED(vp, "chown"); + ASSERT_VOP_IN_SEQC(vp); node = VP_TO_TMPFS_NODE(vp); @@ -1730,7 +1734,7 @@ if ((node->tn_mode & (S_ISUID | S_ISGID)) && (ouid != uid || ogid != gid)) { if (priv_check_cred(cred, PRIV_VFS_RETAINSUGID)) - node->tn_mode &= ~(S_ISUID | S_ISGID); + atomic_store_int(&node->tn_mode, node->tn_mode & ~(S_ISUID | S_ISGID)); } ASSERT_VOP_ELOCKED(vp, "chown2"); Index: sys/fs/tmpfs/tmpfs_vfsops.c =================================================================== --- sys/fs/tmpfs/tmpfs_vfsops.c +++ sys/fs/tmpfs/tmpfs_vfsops.c @@ -462,6 +462,8 @@ mp->mnt_flag |= MNT_LOCAL; mp->mnt_kern_flag |= MNTK_LOOKUP_SHARED | MNTK_EXTENDED_SHARED | MNTK_TEXT_REFS | MNTK_NOMSYNC; + if (!nonc) + mp->mnt_kern_flag |= MNTK_FPLOOKUP; MNT_IUNLOCK(mp); mp->mnt_data = tmp; Index: sys/fs/tmpfs/tmpfs_vnops.h =================================================================== --- sys/fs/tmpfs/tmpfs_vnops.h +++ sys/fs/tmpfs/tmpfs_vnops.h @@ -49,6 +49,7 @@ extern struct vop_vector tmpfs_vnodeop_nonc_entries; vop_access_t tmpfs_access; +vop_fplookup_vexec_t tmpfs_fplookup_vexec; vop_getattr_t tmpfs_getattr; vop_setattr_t tmpfs_setattr; vop_pathconf_t tmpfs_pathconf; Index: sys/fs/tmpfs/tmpfs_vnops.c =================================================================== --- sys/fs/tmpfs/tmpfs_vnops.c +++ sys/fs/tmpfs/tmpfs_vnops.c @@ -317,6 +317,32 @@ return (0); } +/* + * VOP_FPLOOKUP_VEXEC routines are subject to special circumstances, see + * the comment above cache_fplookup for details. + */ +int +tmpfs_fplookup_vexec(struct vop_fplookup_vexec_args *v) +{ + struct vnode *vp; + struct tmpfs_node *node; + struct ucred *cred; + mode_t all_x, mode; + + vp = v->a_vp; + node = VP_TO_TMPFS_NODE_SMR(vp); + if (__predict_false(node == NULL)) + return (EAGAIN); + + all_x = S_IXUSR | S_IXGRP | S_IXOTH; + mode = atomic_load_int(&node->tn_mode); + if (__predict_true((mode & all_x) == all_x)) + return (0); + + cred = v->a_cred; + return (vaccess_vexec_smr(mode, node->tn_uid, node->tn_gid, cred)); +} + int tmpfs_access(struct vop_access_args *v) { @@ -428,6 +454,8 @@ MPASS(VOP_ISLOCKED(vp)); + vn_seqc_write_begin(vp); + error = 0; /* Abort if any unsettable attribute is given. */ @@ -466,6 +494,8 @@ * from tmpfs_update. */ tmpfs_update(vp); + vn_seqc_write_end(vp); + MPASS(VOP_ISLOCKED(vp)); return error; @@ -806,12 +836,15 @@ struct tmpfs_node *tnode; struct tmpfs_node *tdnode; int error; + bool want_seqc_end; MPASS(VOP_ISLOCKED(tdvp)); MPASS(IMPLIES(tvp != NULL, VOP_ISLOCKED(tvp))); MPASS(fcnp->cn_flags & HASBUF); MPASS(tcnp->cn_flags & HASBUF); + want_seqc_end = false; + /* * Disallow cross-device renames. * XXX Why isn't this done by the caller? @@ -852,6 +885,13 @@ } } + if (tvp != NULL) + vn_seqc_write_begin(tvp); + vn_seqc_write_begin(tdvp); + vn_seqc_write_begin(fvp); + vn_seqc_write_begin(fdvp); + want_seqc_end = true; + tmp = VFS_TO_TMPFS(tdvp->v_mount); tdnode = VP_TO_TMPFS_DIR(tdvp); tnode = (tvp == NULL) ? NULL : VP_TO_TMPFS_NODE(tvp); @@ -1065,6 +1105,14 @@ VOP_UNLOCK(fdvp); out: + if (want_seqc_end) { + if (tvp != NULL) + vn_seqc_write_end(tvp); + vn_seqc_write_end(tdvp); + vn_seqc_write_end(fvp); + vn_seqc_write_end(fdvp); + } + /* * Release target nodes. * XXX: I don't understand when tdvp can be the same as tvp, but @@ -1621,6 +1669,7 @@ .vop_mknod = tmpfs_mknod, .vop_open = tmpfs_open, .vop_close = tmpfs_close, + .vop_fplookup_vexec = tmpfs_fplookup_vexec, .vop_access = tmpfs_access, .vop_getattr = tmpfs_getattr, .vop_setattr = tmpfs_setattr, Index: sys/kern/kern_descrip.c =================================================================== --- sys/kern/kern_descrip.c +++ sys/kern/kern_descrip.c @@ -102,8 +102,8 @@ static __read_mostly uma_zone_t file_zone; static __read_mostly uma_zone_t filedesc0_zone; -static __read_mostly uma_zone_t pwd_zone; -static __read_mostly smr_t pwd_smr; +__read_mostly uma_zone_t pwd_zone; +VFS_SMR_DECLARE; static int closefp(struct filedesc *fdp, int fd, struct file *fp, struct thread *td, int holdleaders); @@ -3346,14 +3346,24 @@ fdp = td->td_proc->p_fd; - smr_enter(pwd_smr); + vfs_smr_enter(); for (;;) { - pwd = smr_entered_load(&fdp->fd_pwd, pwd_smr); + pwd = smr_entered_load(&fdp->fd_pwd, VFS_SMR()); MPASS(pwd != NULL); if (refcount_acquire_if_not_zero(&pwd->pwd_refcount)) break; } - smr_exit(pwd_smr); + vfs_smr_exit(); + return (pwd); +} + +struct pwd * +pwd_get_smr(void) +{ + struct pwd *pwd; + + pwd = smr_entered_load(&curproc->p_fd->fd_pwd, VFS_SMR()); + MPASS(pwd != NULL); return (pwd); } @@ -4363,7 +4373,11 @@ NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, 0); pwd_zone = uma_zcreate("PWD", sizeof(struct pwd), NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, UMA_ZONE_SMR); - pwd_smr = uma_zone_get_smr(pwd_zone); + /* + * XXXMJG this is a temporary hack due to boot ordering issues against + * the vnode zone. + */ + vfs_smr = uma_zone_get_smr(pwd_zone); mtx_init(&sigio_lock, "sigio lock", NULL, MTX_DEF); } SYSINIT(select, SI_SUB_LOCK, SI_ORDER_FIRST, filelistinit, NULL); Index: sys/kern/vfs_cache.c =================================================================== --- sys/kern/vfs_cache.c +++ sys/kern/vfs_cache.c @@ -55,6 +55,7 @@ #include #include #include +#include #include #include #include @@ -67,6 +68,11 @@ #include #endif +#include + +#include +#include + #ifdef DDB #include #endif @@ -100,6 +106,8 @@ SDT_PROBE_DEFINE2(vfs, namecache, shrink_negative, done, "struct vnode *", "char *"); +SDT_PROBE_DEFINE3(vfs, fplookup, lookup, done, "struct nameidata", "int", "bool"); + /* * This structure describes the elements in the cache of recent * names looked up by namei. @@ -2810,3 +2818,841 @@ } #endif + +extern uma_zone_t namei_zone; + +static bool __read_frequently cache_fast_lookup = true; +SYSCTL_BOOL(_vfs, OID_AUTO, cache_fast_lookup, CTLFLAG_RW, + &cache_fast_lookup, 0, ""); + +#define CACHE_FPL_FAILED -2020 + +static void +cache_fpl_cleanup_cnp(struct componentname *cnp) +{ + + uma_zfree(namei_zone, cnp->cn_pnbuf); +#ifdef DIAGNOSTIC + cnp->cn_pnbuf = NULL; + cnp->cn_nameptr = NULL; +#endif +} + +static void +cache_fpl_handle_root(struct nameidata *ndp, struct vnode **dpp) +{ + struct componentname *cnp; + + cnp = &ndp->ni_cnd; + while (*(cnp->cn_nameptr) == '/') { + cnp->cn_nameptr++; + ndp->ni_pathlen--; + } +} + +static void +cache_fpl_handle_root_initial(struct nameidata *ndp, struct vnode **dpp) +{ + + cache_fpl_handle_root(ndp, dpp); + *dpp = ndp->ni_rootdir; +} + +/* + * Components of nameidata (or objects it can point to) which may + * need restoring in case fast path lookup fails. + */ +struct nameidata_saved { + int cn_flags; + long cn_namelen; + char *cn_nameptr; + size_t ni_pathlen; +}; + +struct cache_fpl { + int line; + enum cache_fpl_status status; + bool in_smr; + struct nameidata *ndp; + struct nameidata_saved snd; + struct componentname *cnp; + struct vnode *dvp; + seqc_t dvp_seqc; + struct vnode *tvp; + seqc_t tvp_seqc; + struct pwd *pwd; +}; + +static void +cache_fpl_checkpoint(struct cache_fpl *fpl, struct nameidata_saved *snd) +{ + + snd->cn_flags = fpl->ndp->ni_cnd.cn_flags; + snd->cn_namelen = fpl->ndp->ni_cnd.cn_namelen; + snd->cn_nameptr = fpl->ndp->ni_cnd.cn_nameptr; + snd->ni_pathlen = fpl->ndp->ni_pathlen; +} + +static void +cache_fpl_restore(struct cache_fpl *fpl, struct nameidata_saved *snd) +{ + + fpl->ndp->ni_cnd.cn_flags = snd->cn_flags; + fpl->ndp->ni_cnd.cn_namelen = snd->cn_namelen; + fpl->ndp->ni_cnd.cn_nameptr = snd->cn_nameptr; + fpl->ndp->ni_pathlen = snd->ni_pathlen; +} + +#ifdef INVARIANTS +#define cache_fpl_smr_assert_entered(fpl) ({ \ + struct cache_fpl *_fpl = (fpl); \ + MPASS(_fpl->in_smr == true); \ + VFS_SMR_ASSERT_ENTERED(); \ +}) +#define cache_fpl_smr_assert_not_entered(fpl) ({ \ + struct cache_fpl *_fpl = (fpl); \ + MPASS(_fpl->in_smr == false); \ + VFS_SMR_ASSERT_NOT_ENTERED(); \ +}) +#else +#define cache_fpl_smr_assert_entered(fpl) do { } while (0) +#define cache_fpl_smr_assert_not_entered(fpl) do { } while (0) +#endif + +#define cache_fpl_smr_enter(fpl) ({ \ + struct cache_fpl *_fpl = (fpl); \ + MPASS(_fpl->in_smr == false); \ + vfs_smr_enter(); \ + _fpl->in_smr = true; \ +}) + +#define cache_fpl_smr_exit(fpl) ({ \ + struct cache_fpl *_fpl = (fpl); \ + MPASS(_fpl->in_smr == true); \ + vfs_smr_exit(); \ + _fpl->in_smr = false; \ +}) + +static int +cache_fpl_aborted_impl(struct cache_fpl *fpl, int line) +{ + + KASSERT(fpl->status == CACHE_FPL_STATUS_UNSET, + ("%s: lookup status already set at %d\n", __func__, fpl->line)); + fpl->status = CACHE_FPL_STATUS_ABORTED; + fpl->line = line; + return (CACHE_FPL_FAILED); +} + +#define cache_fpl_aborted(x) cache_fpl_aborted_impl((x), __LINE__) + +static int +cache_fpl_partial_impl(struct cache_fpl *fpl, int line) +{ + + KASSERT(fpl->status == CACHE_FPL_STATUS_UNSET, + ("%s: lookup status already set at %d\n", __func__, fpl->line)); + cache_fpl_smr_assert_entered(fpl); + fpl->status = CACHE_FPL_STATUS_PARTIAL; + fpl->line = line; + return (CACHE_FPL_FAILED); +} + +#define cache_fpl_partial(x) cache_fpl_partial_impl((x), __LINE__) + +static int +cache_fpl_handled_impl(struct cache_fpl *fpl, int error, int line) +{ + + KASSERT(fpl->status == CACHE_FPL_STATUS_UNSET, + ("%s: lookup status already set at %d\n", __func__, fpl->line)); + cache_fpl_smr_assert_not_entered(fpl); + MPASS(error != CACHE_FPL_FAILED); + fpl->status = CACHE_FPL_STATUS_HANDLED; + fpl->line = line; + return (error); +} + +#define cache_fpl_handled(x, e) cache_fpl_handled_impl((x), (e), __LINE__) + +#define CACHE_FPL_SUPPORTED_CN_FLAGS \ + (LOCKLEAF | FOLLOW | LOCKSHARED | SAVENAME | ISOPEN | AUDITVNODE1) + +static bool +cache_can_fplookup(struct cache_fpl *fpl) +{ + struct nameidata *ndp; + struct componentname *cnp; + struct thread *td; + + ndp = fpl->ndp; + cnp = fpl->cnp; + td = cnp->cn_thread; + + if (!cache_fast_lookup) { + cache_fpl_aborted(fpl); + return (false); + } + if (mac_vnode_check_lookup_enabled()) { + cache_fpl_aborted(fpl); + return (false); + } + if (cnp->cn_flags & ~CACHE_FPL_SUPPORTED_CN_FLAGS) { + cache_fpl_aborted(fpl); + return (false); + } + if ((cnp->cn_flags & LOCKLEAF) == 0) { + cache_fpl_aborted(fpl); + return (false); + } + if (cnp->cn_nameiop != LOOKUP) { + cache_fpl_aborted(fpl); + return (false); + } + if (ndp->ni_dirfd != AT_FDCWD) { + cache_fpl_aborted(fpl); + return (false); + } + if (IN_CAPABILITY_MODE(td)) { + cache_fpl_aborted(fpl); + return (false); + } + if (AUDITING_TD(td)) { + cache_fpl_aborted(fpl); + return (false); + } + if (ndp->ni_startdir != NULL) { + cache_fpl_aborted(fpl); + return (false); + } + return (true); +} + +static bool +cache_fplookup_vnode_supported(struct vnode *vp) +{ + + switch (vp->v_type) { + case VLNK: + return (false); + default: + break; + } + return (true); +} + +/* + * The target vnode is not supported, prepare for the slow path to take over. + */ +static int +cache_fplookup_partial_setup(struct cache_fpl *fpl) +{ + struct componentname *cnp; + struct vnode *dvp; + struct pwd *pwd; + seqc_t dvp_seqc; + + cnp = fpl->cnp; + dvp = fpl->dvp; + dvp_seqc = fpl->dvp_seqc; + + if (!vref_smr(dvp)) { + fpl->status = CACHE_FPL_STATUS_ABORTED; + cache_fpl_smr_exit(fpl); + return (CACHE_FPL_FAILED); + } + + cache_fpl_smr_exit(fpl); + if (!seqc_consistent(&dvp->v_seqc, dvp_seqc)) { + fpl->status = CACHE_FPL_STATUS_ABORTED; + vrele(dvp); + return (CACHE_FPL_FAILED); + } + + pwd = pwd_hold(curthread); + if (fpl->pwd != pwd) { + fpl->status = CACHE_FPL_STATUS_ABORTED; + vrele(dvp); + pwd_drop(pwd); + return (CACHE_FPL_FAILED); + } + + fpl->ndp->ni_startdir = dvp; + return (0); +} + +static int +cache_fplookup_final(struct cache_fpl *fpl) +{ + struct componentname *cnp; + enum vgetstate tvs; + struct vnode *dvp, *tvp; + seqc_t dvp_seqc, tvp_seqc; + int error; + + cnp = fpl->cnp; + dvp = fpl->dvp; + dvp_seqc = fpl->dvp_seqc; + tvp = fpl->tvp; + tvp_seqc = fpl->tvp_seqc; + + VNPASS(cache_fplookup_vnode_supported(dvp), dvp); + MPASS((cnp->cn_flags & LOCKLEAF) != 0); + + tvs = vget_prep_smr(tvp); + if (tvs == VGET_NONE) { + return (cache_fpl_partial(fpl)); + } + + if (!seqc_consistent(&dvp->v_seqc, dvp_seqc)) { + cache_fpl_smr_exit(fpl); + vget_abort(tvp, tvs); + return (cache_fpl_aborted(fpl)); + } + + cache_fpl_smr_exit(fpl); + + error = vget_finish(tvp, cnp->cn_lkflags, tvs); + if (error != 0) { + return (cache_fpl_aborted(fpl)); + } + + if (!seqc_consistent(&tvp->v_seqc, tvp_seqc)) { + vput(tvp); + return (cache_fpl_aborted(fpl)); + } + + return (cache_fpl_handled(fpl, 0)); +} + +static int +cache_fplookup_next(struct cache_fpl *fpl) +{ + struct componentname *cnp; + struct namecache *ncp; + struct vnode *dvp, *tvp; + u_char nc_flag; + uint32_t hash; + + cnp = fpl->cnp; + dvp = fpl->dvp; + + if (__predict_false(cnp->cn_namelen == 1 && cnp->cn_nameptr[0] == '.')) { + fpl->tvp = dvp; + fpl->tvp_seqc = seqc_read_any(&dvp->v_seqc); + if (seqc_in_modify(fpl->tvp_seqc)) { + return (cache_fpl_partial(fpl)); + } + return (0); + } + + hash = cache_get_hash(cnp->cn_nameptr, cnp->cn_namelen, dvp); + + CK_LIST_FOREACH(ncp, (NCHHASH(hash)), nc_hash) { + counter_u64_add(numchecks, 1); + if (ncp->nc_dvp == dvp && ncp->nc_nlen == cnp->cn_namelen && + !bcmp(ncp->nc_name, cnp->cn_nameptr, ncp->nc_nlen)) + break; + } + + /* + * If there is no entry we have to punt to the slow path to perform + * actual lookup. Should there be nothing with this name a negative + * entry will be created. + */ + if (__predict_false(ncp == NULL)) { + return (cache_fpl_partial(fpl)); + } + + tvp = atomic_load_ptr(&ncp->nc_vp); + nc_flag = atomic_load_char(&ncp->nc_flag); + if (__predict_false(cache_ncp_invalid(ncp))) { + return (cache_fpl_partial(fpl)); + } + if (__predict_false(nc_flag & NCF_WHITE)) { + return (cache_fpl_partial(fpl)); + } + + fpl->tvp = tvp; + if (nc_flag & NCF_NEGATIVE) { + if ((nc_flag & NCF_HOTNEGATIVE) == 0) { + /* + * TODO + * Promoting to hot negative requires locks which are + * not yet supported for simplicity. + */ + return (cache_fpl_partial(fpl)); + } + SDT_PROBE2(vfs, namecache, lookup, hit__negative, dvp, + ncp->nc_name); + counter_u64_add(numneghits, 1); + cache_fpl_smr_exit(fpl); + return (cache_fpl_handled(fpl, ENOENT)); + } + + fpl->tvp_seqc = seqc_read_any(&tvp->v_seqc); + if (seqc_in_modify(fpl->tvp_seqc)) { + return (cache_fpl_partial(fpl)); + } + + if (!cache_fplookup_vnode_supported(tvp)) { + return (cache_fpl_partial(fpl)); + } + + counter_u64_add(numposhits, 1); + SDT_PROBE3(vfs, namecache, lookup, hit, dvp, ncp->nc_name, tvp); + return (0); +} + +static bool +cache_fplookup_mp_supported(struct mount *mp) +{ + + if (mp == NULL) + return (false); + if ((mp->mnt_kern_flag & MNTK_FPLOOKUP) == 0) + return (false); + if (mp->mnt_flag & MNT_UNION) + return (false); + return (true); +} + +/* + * Walk up the mount stack (if any). + * + * Correctness is provided in the following ways: + * - all vnodes are protected from freeing with SMR + * - struct mount objects are type stable making them always safe to access + * - stability of the particular mount is provided by busying it + * - relationship between the vnode which is mounted on and the mount is + * verified with the vnode sequence counter after busying + * - association between root vnode of the mount and the mount is protected + * by busy + * + * From that point on we can read the sequence counter of the root vnode + * and get the next mount on the stack (if any) using the same protection. + * + * By the end of successful walk we are guaranteed the reached state was + * indeed present at least at some point which matches the regular lookup. + */ +static int +cache_fplookup_climb_mount(struct cache_fpl *fpl) +{ + struct mount *mp, *prev_mp; + struct vnode *vp; + seqc_t vp_seqc; + + vp = fpl->tvp; + vp_seqc = fpl->tvp_seqc; + if (vp->v_type != VDIR) + return (0); + + mp = atomic_load_ptr(&vp->v_mountedhere); + if (mp == NULL) + return (0); + + prev_mp = NULL; + for (;;) { + if (!vfs_op_thread_enter(mp)) { + if (prev_mp != NULL) + vfs_op_thread_exit(prev_mp); + return (cache_fpl_partial(fpl)); + } + if (prev_mp != NULL) + vfs_op_thread_exit(prev_mp); + if (!seqc_consistent(&vp->v_seqc, vp_seqc)) { + vfs_op_thread_exit(mp); + return (cache_fpl_partial(fpl)); + } + if (!cache_fplookup_mp_supported(mp)) { + vfs_op_thread_exit(mp); + return (cache_fpl_partial(fpl)); + } + vp = atomic_load_ptr(&mp->mnt_rootvnode); + if (vp == NULL || VN_IS_DOOMED(vp)) { + vfs_op_thread_exit(mp); + return (cache_fpl_partial(fpl)); + } + vp_seqc = seqc_read_any(&vp->v_seqc); + if (seqc_in_modify(vp_seqc)) { + vfs_op_thread_exit(mp); + return (cache_fpl_partial(fpl)); + } + prev_mp = mp; + mp = atomic_load_ptr(&vp->v_mountedhere); + if (mp == NULL) + break; + } + + vfs_op_thread_exit(prev_mp); + fpl->tvp = vp; + fpl->tvp_seqc = vp_seqc; + return (0); +} + +/* + * Parse the path. + * + * The code is mostly copy-pasted from regular lookup, see lookup(). + * The structure is maintained along with comments for easier maintenance. + * Deduplicating the code will become feasible after fast path lookup + * becomes more feature-complete. + */ +static int +cache_fplookup_parse(struct cache_fpl *fpl) +{ + struct nameidata *ndp; + struct componentname *cnp; + char *cp; + char *prev_ni_next; /* saved ndp->ni_next */ + size_t prev_ni_pathlen; /* saved ndp->ni_pathlen */ + + ndp = fpl->ndp; + cnp = fpl->cnp; + + /* + * Search a new directory. + * + * The last component of the filename is left accessible via + * cnp->cn_nameptr for callers that need the name. Callers needing + * the name set the SAVENAME flag. When done, they assume + * responsibility for freeing the pathname buffer. + */ + for (cp = cnp->cn_nameptr; *cp != 0 && *cp != '/'; cp++) + continue; + cnp->cn_namelen = cp - cnp->cn_nameptr; + if (cnp->cn_namelen > NAME_MAX) { + cache_fpl_smr_exit(fpl); + return (cache_fpl_handled(fpl, ENAMETOOLONG)); + } + prev_ni_pathlen = ndp->ni_pathlen; + ndp->ni_pathlen -= cnp->cn_namelen; + KASSERT(ndp->ni_pathlen <= PATH_MAX, + ("%s: ni_pathlen underflow to %zd\n", __func__, ndp->ni_pathlen)); + prev_ni_next = ndp->ni_next; + ndp->ni_next = cp; + + /* + * Replace multiple slashes by a single slash and trailing slashes + * by a null. This must be done before VOP_LOOKUP() because some + * fs's don't know about trailing slashes. Remember if there were + * trailing slashes to handle symlinks, existing non-directories + * and non-existing files that won't be directories specially later. + */ + while (*cp == '/' && (cp[1] == '/' || cp[1] == '\0')) { + cp++; + ndp->ni_pathlen--; + if (*cp == '\0') { + /* + * TODO + * Regular lookup performs the following: + * *ndp->ni_next = '\0'; + * cnp->cn_flags |= TRAILINGSLASH; + * + * Which is problematic since it modifies data read + * from userspace. Then if fast path lookup was to + * abort we would have to either restore it or convey + * the flag. Since this is a corner case just ignore + * it for simplicity. + */ + return (cache_fpl_partial(fpl)); + } + } + ndp->ni_next = cp; + + cnp->cn_flags |= MAKEENTRY; + + if (cnp->cn_namelen == 2 && + cnp->cn_nameptr[1] == '.' && cnp->cn_nameptr[0] == '.') + cnp->cn_flags |= ISDOTDOT; + else + cnp->cn_flags &= ~ISDOTDOT; + if (*ndp->ni_next == 0) + cnp->cn_flags |= ISLASTCN; + else + cnp->cn_flags &= ~ISLASTCN; + + /* + * Check for degenerate name (e.g. / or "") + * which is a way of talking about a directory, + * e.g. like "/." or ".". + * + * TODO + * Another corner case handled by the regular lookup + */ + if (__predict_false(cnp->cn_nameptr[0] == '\0')) { + return (cache_fpl_partial(fpl)); + } + return (0); +} + +static void +cache_fplookup_parse_advance(struct cache_fpl *fpl) +{ + struct nameidata *ndp; + struct componentname *cnp; + + ndp = fpl->ndp; + cnp = fpl->cnp; + + cnp->cn_nameptr = ndp->ni_next; + while (*cnp->cn_nameptr == '/') { + cnp->cn_nameptr++; + ndp->ni_pathlen--; + } +} + +static int +cache_fplookup_impl(struct vnode *dvp, struct cache_fpl *fpl) +{ + struct nameidata *ndp; + struct componentname *cnp; + struct mount *mp; + int error; + + error = CACHE_FPL_FAILED; + ndp = fpl->ndp; + ndp->ni_lcf = 0; + cnp = fpl->cnp; + cnp->cn_lkflags = LK_SHARED; + if ((cnp->cn_flags & LOCKSHARED) == 0) + cnp->cn_lkflags = LK_EXCLUSIVE; + + cache_fpl_checkpoint(fpl, &fpl->snd); + + fpl->dvp = dvp; + fpl->dvp_seqc = seqc_read_any(&fpl->dvp->v_seqc); + if (seqc_in_modify(fpl->dvp_seqc)) { + cache_fpl_aborted(fpl); + goto out; + } + mp = atomic_load_ptr(&fpl->dvp->v_mount); + if (!cache_fplookup_mp_supported(mp)) { + cache_fpl_aborted(fpl); + goto out; + } + + VNPASS(cache_fplookup_vnode_supported(fpl->dvp), fpl->dvp); + + for (;;) { + error = cache_fplookup_parse(fpl); + if (__predict_false(error != 0)) { + break; + } + + if (cnp->cn_flags & ISDOTDOT) { + error = cache_fpl_partial(fpl); + break; + } + + VNPASS(cache_fplookup_vnode_supported(fpl->dvp), fpl->dvp); + + error = VOP_FPLOOKUP_VEXEC(fpl->dvp, cnp->cn_cred, cnp->cn_thread); + if (__predict_false(error != 0)) { + switch (error) { + case EAGAIN: + case EOPNOTSUPP: /* can happen when racing against vgone */ + cache_fpl_partial(fpl); + break; + default: + /* + * See the API contract for VOP_FPLOOKUP_VEXEC. + */ + if (!seqc_consistent(&fpl->dvp->v_seqc, fpl->dvp_seqc)) { + error = cache_fpl_aborted(fpl); + } else { + cache_fpl_smr_exit(fpl); + cache_fpl_handled(fpl, error); + } + break; + } + break; + } + + error = cache_fplookup_next(fpl); + if (__predict_false(error != 0)) { + break; + } + + VNPASS(!seqc_in_modify(fpl->tvp_seqc), fpl->tvp); + + error = cache_fplookup_climb_mount(fpl); + if (__predict_false(error != 0)) { + break; + } + + VNPASS(!seqc_in_modify(fpl->tvp_seqc), fpl->tvp); + + if (cnp->cn_flags & ISLASTCN) { + error = cache_fplookup_final(fpl); + break; + } + + if (!seqc_consistent(&fpl->dvp->v_seqc, fpl->dvp_seqc)) { + error = cache_fpl_aborted(fpl); + break; + } + + fpl->dvp = fpl->tvp; + fpl->dvp_seqc = fpl->tvp_seqc; + + cache_fplookup_parse_advance(fpl); + cache_fpl_checkpoint(fpl, &fpl->snd); + } +out: + switch (fpl->status) { + case CACHE_FPL_STATUS_UNSET: + __assert_unreachable(); + break; + case CACHE_FPL_STATUS_PARTIAL: + cache_fpl_smr_assert_entered(fpl); + return (cache_fplookup_partial_setup(fpl)); + case CACHE_FPL_STATUS_ABORTED: + if (fpl->in_smr) + cache_fpl_smr_exit(fpl); + return (CACHE_FPL_FAILED); + case CACHE_FPL_STATUS_HANDLED: + cache_fpl_smr_assert_not_entered(fpl); + if (__predict_false(error != 0)) { + ndp->ni_dvp = NULL; + ndp->ni_vp = NULL; + cache_fpl_cleanup_cnp(cnp); + return (error); + } + ndp->ni_dvp = fpl->dvp; + ndp->ni_vp = fpl->tvp; + if (cnp->cn_flags & SAVENAME) + cnp->cn_flags |= HASBUF; + else + cache_fpl_cleanup_cnp(cnp); + return (error); + } +} + +/* + * Fast path lookup protected with SMR and sequence counters. + * + * Note: all VOP_FPLOOKUP_VEXEC routines have a comment referencing this one. + * + * Filesystems can opt in by setting the MNTK_FPLOOKUP flag and meeting criteria + * outlined below. + * + * Traditional vnode lookup conceptually looks like this: + * + * vn_lock(current); + * for (;;) { + * next = find(); + * vn_lock(next); + * vn_unlock(current); + * current = next; + * if (last) + * break; + * } + * + * Each jump to the next vnode is safe memory-wise and atomic with respect to + * any modifications thanks to holding respective locks. + * + * The same guarantee can be provided with a combination of safe memory + * reclamation and sequence counters instead. If all operations which affect + * the relationship between the current vnode and the one we are looking for + * also modify the counter, we can verify whether all the conditions held as + * we made the jump. This includes things like permissions, mount point etc. + * You can grep for vn_seqc_write_begin to check all the places. + * + * Thus this translates to: + * + * vfs_smr_enter(); + * current_seqc = seqc_read_any(current); + * if (seqc_in_modify(current_seqc)) // someone is altering the vnode + * abort(); + * for (;;) { + * next = find(); + * next_seqc = seqc_read_any(next); + * if (!seqc_consistent(current, current_seqc) // someone is altering the vnode + * abort(); + * current = next; // we know nothing of importance has changed + * current_seqc = next_seqc; // store the counter for the next iteration + * if (last) + * break; + * } + * + * API contract for VOP_FPLOOKUP_VEXEC routines is as follows: + * - they are called while within vfs_smr protection which they must never exit + * - EAGAIN can be returned to denote checking could not be performed, it is + * always valid to return it + * - if the sequence counter has not changed the result must be valid + * - if the sequence counter has changed both false positives and false negatives + * are permitted (since the result will be rejected later) + * - for simple cases of unix permission checks vaccess_vexec_smr can be used + * + * Caveats to watch out for: + * - vnodes are passed unlocked and unreferenced with nothing stopping + * VOP_RECLAIM, in turn meaning that ->v_data can become NULL. It is advised + * to use atomic_load_ptr to fetch it. + * - aforementioned object can also get freed, meaning absent other means it + * should be protected with vfs_smr + * - either safely checking permissions as they are modified or guaranteeing + * their stability is left to the routine + */ +int +cache_fplookup(struct nameidata *ndp, enum cache_fpl_status *status, + struct pwd **pwdp) +{ + struct cache_fpl fpl; + struct pwd *pwd; + struct vnode *dvp; + struct componentname *cnp; + struct nameidata_saved orig; + int error; + + *status = CACHE_FPL_STATUS_UNSET; + bzero(&fpl, sizeof(fpl)); + fpl.status = CACHE_FPL_STATUS_UNSET; + fpl.ndp = ndp; + fpl.cnp = &ndp->ni_cnd; + MPASS(curthread == fpl.cnp->cn_thread); + + if (!cache_can_fplookup(&fpl)) { + SDT_PROBE3(vfs, fplookup, lookup, done, ndp, fpl.line, fpl.status); + *status = fpl.status; + return (EOPNOTSUPP); + } + + cache_fpl_checkpoint(&fpl, &orig); + + cache_fpl_smr_enter(&fpl); + pwd = pwd_get_smr(); + fpl.pwd = pwd; + ndp->ni_rootdir = pwd->pwd_rdir; + ndp->ni_topdir = pwd->pwd_jdir; + + cnp = fpl.cnp; + cnp->cn_nameptr = cnp->cn_pnbuf; + if (cnp->cn_pnbuf[0] == '/') { + cache_fpl_handle_root_initial(ndp, &dvp); + } else { + MPASS(ndp->ni_dirfd == AT_FDCWD); + dvp = pwd->pwd_cdir; + } + + error = cache_fplookup_impl(dvp, &fpl); + cache_fpl_smr_assert_not_entered(&fpl); + SDT_PROBE3(vfs, fplookup, lookup, done, ndp, fpl.line, fpl.status); + + *status = fpl.status; + switch (fpl.status) { + case CACHE_FPL_STATUS_UNSET: + __assert_unreachable(); + break; + case CACHE_FPL_STATUS_HANDLED: + break; + case CACHE_FPL_STATUS_PARTIAL: + *pwdp = fpl.pwd; + cache_fpl_restore(&fpl, &fpl.snd); + break; + case CACHE_FPL_STATUS_ABORTED: + cache_fpl_restore(&fpl, &orig); + break; + } + return (error); +} Index: sys/kern/vfs_lookup.c =================================================================== --- sys/kern/vfs_lookup.c +++ sys/kern/vfs_lookup.c @@ -280,77 +280,21 @@ return (0); } -/* - * Convert a pathname into a pointer to a locked vnode. - * - * The FOLLOW flag is set when symbolic links are to be followed - * when they occur at the end of the name translation process. - * Symbolic links are always followed for all other pathname - * components other than the last. - * - * The segflg defines whether the name is to be copied from user - * space or kernel space. - * - * Overall outline of namei: - * - * copy in name - * get starting directory - * while (!done && !error) { - * call lookup to search path. - * if symbolic link, massage name in buffer and continue - * } - */ -int -namei(struct nameidata *ndp) +static int +namei_setup(struct nameidata *ndp, struct vnode **dpp, struct pwd **pwdp) { - char *cp; /* pointer into pathname argument */ - struct vnode *dp; /* the directory we are searching */ - struct iovec aiov; /* uio for reading symbolic links */ struct componentname *cnp; struct file *dfp; struct thread *td; - struct proc *p; struct pwd *pwd; cap_rights_t rights; struct filecaps dirfd_caps; - struct uio auio; - int error, linklen, startdir_used; + int error, startdir_used; cnp = &ndp->ni_cnd; td = cnp->cn_thread; - p = td->td_proc; - ndp->ni_cnd.cn_cred = ndp->ni_cnd.cn_thread->td_ucred; - KASSERT(cnp->cn_cred && p, ("namei: bad cred/proc")); - KASSERT((cnp->cn_nameiop & (~OPMASK)) == 0, - ("namei: nameiop contaminated with flags")); - KASSERT((cnp->cn_flags & OPMASK) == 0, - ("namei: flags contaminated with nameiops")); - MPASS(ndp->ni_startdir == NULL || ndp->ni_startdir->v_type == VDIR || - ndp->ni_startdir->v_type == VBAD); - TAILQ_INIT(&ndp->ni_cap_tracker); - ndp->ni_lcf = 0; - - /* We will set this ourselves if we need it. */ - cnp->cn_flags &= ~TRAILINGSLASH; - /* - * Get a buffer for the name to be translated, and copy the - * name into the buffer. - */ - if ((cnp->cn_flags & HASBUF) == 0) - cnp->cn_pnbuf = uma_zalloc(namei_zone, M_WAITOK); - if (ndp->ni_segflg == UIO_SYSSPACE) - error = copystr(ndp->ni_dirp, cnp->cn_pnbuf, MAXPATHLEN, - &ndp->ni_pathlen); - else - error = copyinstr(ndp->ni_dirp, cnp->cn_pnbuf, MAXPATHLEN, - &ndp->ni_pathlen); - - /* - * Don't allow empty pathnames. - */ - if (error == 0 && *cnp->cn_pnbuf == '\0') - error = ENOENT; + *pwdp = NULL; #ifdef CAPABILITY_MODE /* @@ -366,24 +310,17 @@ * previously walked by us, which prevents an escape from * the relative root. */ - if (error == 0 && IN_CAPABILITY_MODE(td) && - (cnp->cn_flags & NOCAPCHECK) == 0) { + if (IN_CAPABILITY_MODE(td) && (cnp->cn_flags & NOCAPCHECK) == 0) { ndp->ni_lcf |= NI_LCF_STRICTRELATIVE; if (ndp->ni_dirfd == AT_FDCWD) { #ifdef KTRACE if (KTRPOINT(td, KTR_CAPFAIL)) ktrcapfail(CAPFAIL_LOOKUP, NULL, NULL); #endif - error = ECAPMODE; + return (ECAPMODE); } } #endif - if (error != 0) { - namei_cleanup_cnp(cnp); - ndp->ni_vp = NULL; - return (error); - } - ndp->ni_loopcnt = 0; #ifdef KTRACE if (KTRPOINT(td, KTR_NAMEI)) { KASSERT(cnp->cn_thread == curthread, @@ -391,6 +328,8 @@ ktrnamei(cnp->cn_pnbuf); } #endif + error = 0; + /* * Get starting point for the translation. */ @@ -402,19 +341,16 @@ ndp->ni_rootdir = pwd->pwd_rdir; ndp->ni_topdir = pwd->pwd_jdir; - startdir_used = 0; - dp = NULL; - cnp->cn_nameptr = cnp->cn_pnbuf; if (cnp->cn_pnbuf[0] == '/') { ndp->ni_resflags |= NIRES_ABS; - error = namei_handle_root(ndp, &dp); + error = namei_handle_root(ndp, dpp); } else { if (ndp->ni_startdir != NULL) { - dp = ndp->ni_startdir; + *dpp = ndp->ni_startdir; startdir_used = 1; } else if (ndp->ni_dirfd == AT_FDCWD) { - dp = pwd->pwd_cdir; - vrefact(dp); + *dpp = pwd->pwd_cdir; + vrefact(*dpp); } else { rights = ndp->ni_rightsneeded; cap_rights_set_one(&rights, CAP_LOOKUP); @@ -441,8 +377,8 @@ } else if (dfp->f_vnode == NULL) { error = ENOTDIR; } else { - dp = dfp->f_vnode; - vrefact(dp); + *dpp = dfp->f_vnode; + vrefact(*dpp); if ((dfp->f_flag & FSEARCH) != 0) cnp->cn_flags |= NOEXECCHECK; @@ -464,7 +400,7 @@ } #endif } - if (error == 0 && dp->v_type != VDIR) + if (error == 0 && (*dpp)->v_type != VDIR) error = ENOTDIR; } if (error == 0 && (cnp->cn_flags & BENEATH) != 0) { @@ -476,7 +412,7 @@ cap_rights_set_one(&rights, CAP_LOOKUP); error = fgetvp_rights(td, ndp->ni_dirfd, &rights, &dirfd_caps, &ndp->ni_beneath_latch); - if (error == 0 && dp->v_type != VDIR) { + if (error == 0 && (*dpp)->v_type != VDIR) { vrele(ndp->ni_beneath_latch); error = ENOTDIR; } @@ -488,15 +424,15 @@ * If we are auditing the kernel pathname, save the user pathname. */ if (cnp->cn_flags & AUDITVNODE1) - AUDIT_ARG_UPATH1_VP(td, ndp->ni_rootdir, dp, cnp->cn_pnbuf); + AUDIT_ARG_UPATH1_VP(td, ndp->ni_rootdir, *dpp, cnp->cn_pnbuf); if (cnp->cn_flags & AUDITVNODE2) - AUDIT_ARG_UPATH2_VP(td, ndp->ni_rootdir, dp, cnp->cn_pnbuf); + AUDIT_ARG_UPATH2_VP(td, ndp->ni_rootdir, *dpp, cnp->cn_pnbuf); if (ndp->ni_startdir != NULL && !startdir_used) vrele(ndp->ni_startdir); if (error != 0) { - if (dp != NULL) - vrele(dp); - goto out; + if (*dpp != NULL) + vrele(*dpp); + return (error); } MPASS((ndp->ni_lcf & (NI_LCF_BENEATH_ABS | NI_LCF_LATCH)) != NI_LCF_BENEATH_ABS); @@ -505,8 +441,124 @@ ((ndp->ni_lcf & NI_LCF_STRICTRELATIVE) == 0 && (cnp->cn_flags & BENEATH) != 0)) ndp->ni_lcf |= NI_LCF_CAP_DOTDOT; - SDT_PROBE3(vfs, namei, lookup, entry, dp, cnp->cn_pnbuf, + SDT_PROBE3(vfs, namei, lookup, entry, *dpp, cnp->cn_pnbuf, cnp->cn_flags); + *pwdp = pwd; + return (0); +} + +/* + * Convert a pathname into a pointer to a locked vnode. + * + * The FOLLOW flag is set when symbolic links are to be followed + * when they occur at the end of the name translation process. + * Symbolic links are always followed for all other pathname + * components other than the last. + * + * The segflg defines whether the name is to be copied from user + * space or kernel space. + * + * Overall outline of namei: + * + * copy in name + * get starting directory + * while (!done && !error) { + * call lookup to search path. + * if symbolic link, massage name in buffer and continue + * } + */ +int +namei(struct nameidata *ndp) +{ + char *cp; /* pointer into pathname argument */ + struct vnode *dp; /* the directory we are searching */ + struct iovec aiov; /* uio for reading symbolic links */ + struct componentname *cnp; + struct thread *td; + struct proc *p; + struct pwd *pwd; + struct uio auio; + int error, linklen; + enum cache_fpl_status status; + + cnp = &ndp->ni_cnd; + td = cnp->cn_thread; + p = td->td_proc; + ndp->ni_cnd.cn_cred = ndp->ni_cnd.cn_thread->td_ucred; + KASSERT(cnp->cn_cred && p, ("namei: bad cred/proc")); + KASSERT((cnp->cn_nameiop & (~OPMASK)) == 0, + ("namei: nameiop contaminated with flags")); + KASSERT((cnp->cn_flags & OPMASK) == 0, + ("namei: flags contaminated with nameiops")); + MPASS(ndp->ni_startdir == NULL || ndp->ni_startdir->v_type == VDIR || + ndp->ni_startdir->v_type == VBAD); + TAILQ_INIT(&ndp->ni_cap_tracker); + ndp->ni_lcf = 0; + ndp->ni_loopcnt = 0; + dp = NULL; + + /* We will set this ourselves if we need it. */ + cnp->cn_flags &= ~TRAILINGSLASH; + + ndp->ni_vp = NULL; + + /* + * Get a buffer for the name to be translated, and copy the + * name into the buffer. + */ + if ((cnp->cn_flags & HASBUF) == 0) + cnp->cn_pnbuf = uma_zalloc(namei_zone, M_WAITOK); + if (ndp->ni_segflg == UIO_SYSSPACE) + error = copystr(ndp->ni_dirp, cnp->cn_pnbuf, MAXPATHLEN, + &ndp->ni_pathlen); + else + error = copyinstr(ndp->ni_dirp, cnp->cn_pnbuf, MAXPATHLEN, + &ndp->ni_pathlen); + + if (error != 0) { + namei_cleanup_cnp(cnp); + return (error); + } + + cnp->cn_nameptr = cnp->cn_pnbuf; + + /* + * Don't allow empty pathnames. + */ + if (*cnp->cn_pnbuf == '\0') { + namei_cleanup_cnp(cnp); + return (ENOENT); + } + + /* + * First try the fast path. + * + * If it fails to handle the lookup, we are going to do perform it below. + * Note this means that we either start from scratch or continue where it + * left off. + */ + error = cache_fplookup(ndp, &status, &pwd); + switch (status) { + case CACHE_FPL_STATUS_UNSET: + __assert_unreachable(); + break; + case CACHE_FPL_STATUS_HANDLED: + return (error); + case CACHE_FPL_STATUS_PARTIAL: + dp = ndp->ni_startdir; + break; + case CACHE_FPL_STATUS_ABORTED: + error = namei_setup(ndp, &dp, &pwd); + if (error != 0) { + namei_cleanup_cnp(cnp); + return (error); + } + break; + } + + /* + * Perform the lookup. + */ for (;;) { ndp->ni_startdir = dp; error = lookup(ndp); Index: sys/kern/vfs_mount.c =================================================================== --- sys/kern/vfs_mount.c +++ sys/kern/vfs_mount.c @@ -947,6 +947,7 @@ vput(vp); return (error); } + vn_seqc_write_begin(vp); VOP_UNLOCK(vp); /* Allocate and initialize the filesystem. */ @@ -979,9 +980,11 @@ VI_LOCK(vp); vp->v_iflag &= ~VI_MOUNT; VI_UNLOCK(vp); + vn_seqc_write_end(vp); vrele(vp); return (error); } + vn_seqc_write_begin(newdp); VOP_UNLOCK(newdp); if (mp->mnt_opt != NULL) @@ -1018,6 +1021,8 @@ EVENTHANDLER_DIRECT_INVOKE(vfs_mounted, mp, newdp, td); VOP_UNLOCK(newdp); mountcheckdirs(vp, newdp); + vn_seqc_write_end(vp); + vn_seqc_write_end(newdp); vrele(newdp); if ((mp->mnt_flag & MNT_RDONLY) == 0) vfs_allocate_syncvnode(mp); @@ -1094,7 +1099,9 @@ VOP_UNLOCK(vp); vfs_op_enter(mp); + vn_seqc_write_begin(vp); + rootvp = NULL; MNT_ILOCK(mp); if ((mp->mnt_kern_flag & MNTK_UNMOUNT) != 0) { MNT_IUNLOCK(mp); @@ -1108,8 +1115,6 @@ mp->mnt_kern_flag &= ~MNTK_ASYNC; rootvp = vfs_cache_root_clear(mp); MNT_IUNLOCK(mp); - if (rootvp != NULL) - vrele(rootvp); mp->mnt_optnew = *optlist; vfs_mergeopts(mp->mnt_optnew, mp->mnt_opt); @@ -1233,6 +1238,11 @@ vfs_deallocate_syncvnode(mp); end: vfs_op_exit(mp); + if (rootvp != NULL) { + vn_seqc_write_end(rootvp); + vrele(rootvp); + } + vn_seqc_write_end(vp); vfs_unbusy(mp); VI_LOCK(vp); vp->v_iflag &= ~VI_MOUNT; @@ -1723,14 +1733,19 @@ } mp->mnt_kern_flag |= MNTK_UNMOUNT; rootvp = vfs_cache_root_clear(mp); + if (coveredvp != NULL) + vn_seqc_write_begin(coveredvp); if (flags & MNT_NONBUSY) { MNT_IUNLOCK(mp); error = vfs_check_usecounts(mp); MNT_ILOCK(mp); if (error != 0) { + vn_seqc_write_end(coveredvp); dounmount_cleanup(mp, coveredvp, MNTK_UNMOUNT); - if (rootvp != NULL) + if (rootvp != NULL) { + vn_seqc_write_end(rootvp); vrele(rootvp); + } return (error); } } @@ -1759,22 +1774,19 @@ ("%s: invalid return value for msleep in the drain path @ %s:%d", __func__, __FILE__, __LINE__)); - if (rootvp != NULL) + /* + * We want to keep the vnode around so that we can vn_seqc_write_end + * after we are done with unmount. Downgrade our reference to a mere + * hold count so that we don't interefere with anything. + */ + if (rootvp != NULL) { + vhold(rootvp); vrele(rootvp); + } if (mp->mnt_flag & MNT_EXPUBLIC) vfs_setpublicfs(NULL, NULL, NULL); - /* - * From now, we can claim that the use reference on the - * coveredvp is ours, and the ref can be released only by - * successfull unmount by us, or left for later unmount - * attempt. The previously acquired hold reference is no - * longer needed to protect the vnode from reuse. - */ - if (coveredvp != NULL) - vdrop(coveredvp); - vfs_periodic(mp, MNT_WAIT); MNT_ILOCK(mp); async_flag = mp->mnt_flag & MNT_ASYNC; @@ -1809,8 +1821,15 @@ } vfs_op_exit_locked(mp); MNT_IUNLOCK(mp); - if (coveredvp) + if (coveredvp) { + vn_seqc_write_end(coveredvp); VOP_UNLOCK(coveredvp); + vdrop(coveredvp); + } + if (rootvp != NULL) { + vn_seqc_write_end(rootvp); + vdrop(rootvp); + } return (error); } mtx_lock(&mountlist_mtx); @@ -1819,7 +1838,13 @@ EVENTHANDLER_DIRECT_INVOKE(vfs_unmounted, mp, td); if (coveredvp != NULL) { coveredvp->v_mountedhere = NULL; + vn_seqc_write_end(coveredvp); VOP_UNLOCK(coveredvp); + vdrop(coveredvp); + } + if (rootvp != NULL) { + vn_seqc_write_end(rootvp); + vdrop(rootvp); } vfs_event_signal(NULL, VQ_UNMOUNT, 0); if (rootvnode != NULL && mp == rootvnode->v_mount) { Index: sys/kern/vfs_subr.c =================================================================== --- sys/kern/vfs_subr.c +++ sys/kern/vfs_subr.c @@ -664,8 +664,8 @@ vnode_list_reclaim_marker = vn_alloc_marker(NULL); TAILQ_INSERT_HEAD(&vnode_list, vnode_list_reclaim_marker, v_vnodelist); vnode_zone = uma_zcreate("VNODE", sizeof (struct vnode), NULL, NULL, - vnode_init, vnode_fini, UMA_ALIGN_PTR, UMA_ZONE_SMR); - vfs_smr = uma_zone_get_smr(vnode_zone); + vnode_init, vnode_fini, UMA_ALIGN_PTR, 0); + uma_zone_set_smr(vnode_zone, vfs_smr); vnodepoll_zone = uma_zcreate("VNODEPOLL", sizeof (struct vpollinfo), NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, 0); /* @@ -1761,6 +1761,7 @@ */ CTR2(KTR_VFS, "%s: destroying the vnode %p", __func__, vp); bo = &vp->v_bufobj; + VNPASS(vp->v_seqc_users == 0, vp); VNASSERT(vp->v_data == NULL, vp, ("cleaned vnode isn't")); VNPASS(vp->v_holdcnt == VHOLD_NO_SMR, vp); VNASSERT(vp->v_usecount == 0, vp, ("Non-zero use count")); @@ -2889,6 +2890,17 @@ return (vs); } +void +vget_abort(struct vnode *vp, enum vgetstate vs) +{ + + VNPASS(vs == VGET_HOLDCNT || vs == VGET_USECOUNT, vp); + if (vs == VGET_USECOUNT) + vrele(vp); + else + vdrop(vp); +} + int vget(struct vnode *vp, int flags, struct thread *td) { @@ -2951,10 +2963,7 @@ error = vn_lock(vp, flags); if (__predict_false(error != 0)) { - if (vs == VGET_USECOUNT) - vrele(vp); - else - vdrop(vp); + vget_abort(vp, vs); CTR2(KTR_VFS, "%s: impossible to lock vnode %p", __func__, vp); return (error); @@ -3032,6 +3041,44 @@ return; } +bool +vref_smr(struct vnode *vp) +{ + int old; + + CTR2(KTR_VFS, "%s: vp %p", __func__, vp); + VFS_SMR_ASSERT_ENTERED(); + + /* + * Devices are not supported since they may require taking the interlock. + */ + VNPASS(vp->v_type != VCHR, vp); + + if (refcount_acquire_if_not_zero(&vp->v_usecount)) { + VNODE_REFCOUNT_FENCE_ACQ(); + VNPASS(vp->v_holdcnt > 0, vp); + return (true); + } + + if (!vhold_smr(vp)) + return (false); + + /* + * See the comment in vget_finish. + */ + old = atomic_fetchadd_int(&vp->v_usecount, 1); + VNASSERT(old >= 0, vp, ("%s: wrong use count %d", __func__, old)); + if (old != 0) { +#ifdef INVARIANTS + old = atomic_fetchadd_int(&vp->v_holdcnt, -1); + VNASSERT(old > 1, vp, ("%s: wrong hold count %d", __func__, old)); +#else + refcount_release(&vp->v_holdcnt); +#endif + } + return (true); +} + void vref(struct vnode *vp) { @@ -3986,6 +4033,7 @@ */ if (vp->v_irflag & VIRF_DOOMED) return; + vn_seqc_write_begin_locked(vp); vunlazy_gone(vp); vp->v_irflag |= VIRF_DOOMED; @@ -4088,6 +4136,7 @@ vp->v_vnlock = &vp->v_lock; vp->v_op = &dead_vnodeops; vp->v_type = VBAD; + vn_seqc_write_end_locked(vp); } /* @@ -4128,8 +4177,9 @@ printf("%p: ", (void *)vp); printf("type %s\n", typename[vp->v_type]); holdcnt = atomic_load_int(&vp->v_holdcnt); - printf(" usecount %d, writecount %d, refcount %d", - vp->v_usecount, vp->v_writecount, holdcnt & ~VHOLD_ALL_FLAGS); + printf(" usecount %d, writecount %d, refcount %d seqc users %d", + vp->v_usecount, vp->v_writecount, holdcnt & ~VHOLD_ALL_FLAGS, + vp->v_seqc_users); switch (vp->v_type) { case VDIR: printf(" mountedhere %p\n", vp->v_mountedhere); @@ -4381,6 +4431,7 @@ MNT_KERN_FLAG(MNTK_LOOKUP_EXCL_DOTDOT); MNT_KERN_FLAG(MNTK_MARKER); MNT_KERN_FLAG(MNTK_USES_BCACHE); + MNT_KERN_FLAG(MNTK_FPLOOKUP); MNT_KERN_FLAG(MNTK_NOASYNC); MNT_KERN_FLAG(MNTK_UNMOUNT); MNT_KERN_FLAG(MNTK_MWAIT); @@ -5196,6 +5247,38 @@ return (error == 0); } +/* + * VOP_FPLOOKUP_VEXEC routines are subject to special circumstances, see + * the comment above cache_fplookup for details. + * + * We never deny as priv_check_cred calls are not yet supported, see vaccess. + */ +int +vaccess_vexec_smr(mode_t file_mode, uid_t file_uid, gid_t file_gid, struct ucred *cred) +{ + + VFS_SMR_ASSERT_ENTERED(); + + /* Check the owner. */ + if (cred->cr_uid == file_uid) { + if (file_mode & S_IXUSR) + return (0); + return (EAGAIN); + } + + /* Otherwise, check the groups (first match) */ + if (groupmember(file_gid, cred)) { + if (file_mode & S_IXGRP) + return (0); + return (EAGAIN); + } + + /* Otherwise, check everyone else. */ + if (file_mode & S_IXOTH) + return (0); + return (EAGAIN); +} + /* * Common filesystem object access control check routine. Accepts a * vnode's type, "mode", uid and gid, requested access mode, credentials, @@ -5476,6 +5559,14 @@ ASSERT_VOP_LOCKED(a->a_tvp, "vop_rename: tvp not locked"); ASSERT_VOP_LOCKED(a->a_tdvp, "vop_rename: tdvp not locked"); #endif + /* + * It may be tempting to add vn_seqc_write_begin/end calls here and + * in vop_rename_post but that's not going to work out since some + * filesystems relookup vnodes mid-rename. This is probably a bug. + * + * For now filesystems are expected to do the relevant calls after they + * decide what vnodes to operate on. + */ if (a->a_tdvp != a->a_fdvp) vhold(a->a_fdvp); if (a->a_tvp != a->a_fvp) @@ -5486,6 +5577,20 @@ } #ifdef DEBUG_VFS_LOCKS +void +vop_fplookup_vexec_pre(void *ap __unused) +{ + + VFS_SMR_ASSERT_ENTERED(); +} + +void +vop_fplookup_vexec_post(void *ap __unused, int rc __unused) +{ + + VFS_SMR_ASSERT_ENTERED(); +} + void vop_strategy_pre(void *ap) { @@ -5565,11 +5670,26 @@ VFS_KNOTE_LOCKED(a->a_dvp, NOTE_WRITE); } +void +vop_deleteextattr_pre(void *ap) +{ + struct vop_deleteextattr_args *a; + struct vnode *vp; + + a = ap; + vp = a->a_vp; + vn_seqc_write_begin(vp); +} + void vop_deleteextattr_post(void *ap, int rc) { - struct vop_deleteextattr_args *a = ap; + struct vop_deleteextattr_args *a; + struct vnode *vp; + a = ap; + vp = a->a_vp; + vn_seqc_write_end(vp); if (!rc) VFS_KNOTE_LOCKED(a->a_vp, NOTE_ATTRIB); } @@ -5672,22 +5792,74 @@ } } +void +vop_setattr_pre(void *ap) +{ + struct vop_setattr_args *a; + struct vnode *vp; + + a = ap; + vp = a->a_vp; + vn_seqc_write_begin(vp); +} + void vop_setattr_post(void *ap, int rc) { - struct vop_setattr_args *a = ap; + struct vop_setattr_args *a; + struct vnode *vp; + a = ap; + vp = a->a_vp; + vn_seqc_write_end(vp); if (!rc) - VFS_KNOTE_LOCKED(a->a_vp, NOTE_ATTRIB); + VFS_KNOTE_LOCKED(vp, NOTE_ATTRIB); +} + +void +vop_setacl_pre(void *ap) +{ + struct vop_setacl_args *a; + struct vnode *vp; + + a = ap; + vp = a->a_vp; + vn_seqc_write_begin(vp); +} + +void +vop_setacl_post(void *ap, int rc __unused) +{ + struct vop_setacl_args *a; + struct vnode *vp; + + a = ap; + vp = a->a_vp; + vn_seqc_write_end(vp); +} + +void +vop_setextattr_pre(void *ap) +{ + struct vop_setextattr_args *a; + struct vnode *vp; + + a = ap; + vp = a->a_vp; + vn_seqc_write_begin(vp); } void vop_setextattr_post(void *ap, int rc) { - struct vop_setextattr_args *a = ap; + struct vop_setextattr_args *a; + struct vnode *vp; + a = ap; + vp = a->a_vp; + vn_seqc_write_end(vp); if (!rc) - VFS_KNOTE_LOCKED(a->a_vp, NOTE_ATTRIB); + VFS_KNOTE_LOCKED(vp, NOTE_ATTRIB); } void @@ -6249,6 +6421,8 @@ */ MPASS(mp->mnt_vfs_ops > 0); vp = mp->mnt_rootvnode; + if (vp != NULL) + vn_seqc_write_begin(vp); mp->mnt_rootvnode = NULL; return (vp); } @@ -6545,3 +6719,45 @@ return (VOP_ACCESS(vp, VEXEC, cnp->cn_cred, cnp->cn_thread)); } + +void +vn_seqc_write_begin_locked(struct vnode *vp) +{ + + ASSERT_VI_LOCKED(vp, __func__); + VNPASS(vp->v_holdcnt > 0, vp); + VNPASS(vp->v_seqc_users >= 0, vp); + vp->v_seqc_users++; + if (vp->v_seqc_users == 1) + seqc_sleepable_write_begin(&vp->v_seqc); +} + +void +vn_seqc_write_begin(struct vnode *vp) +{ + + VI_LOCK(vp); + vn_seqc_write_begin_locked(vp); + VI_UNLOCK(vp); +} + +void +vn_seqc_write_end_locked(struct vnode *vp) +{ + + ASSERT_VI_LOCKED(vp, __func__); + VNPASS(vp->v_holdcnt > 0, vp); + VNPASS(vp->v_seqc_users > 0, vp); + vp->v_seqc_users--; + if (vp->v_seqc_users == 0) + seqc_sleepable_write_end(&vp->v_seqc); +} + +void +vn_seqc_write_end(struct vnode *vp) +{ + + VI_LOCK(vp); + vn_seqc_write_end_locked(vp); + VI_UNLOCK(vp); +} Index: sys/kern/vnode_if.src =================================================================== --- sys/kern/vnode_if.src +++ sys/kern/vnode_if.src @@ -142,6 +142,17 @@ }; +%% fplookup_vexec vp - - - +%! fplookup_vexec pre vop_fplookup_vexec_pre +%! fplookup_vexec post vop_fplookup_vexec_post + +vop_fplookup_vexec { + IN struct vnode *vp; + IN struct ucred *cred; + IN struct thread *td; +}; + + %% access vp L L L vop_access { @@ -172,6 +183,7 @@ %% setattr vp E E E +%! setattr pre vop_setattr_pre %! setattr post vop_setattr_post vop_setattr { @@ -523,6 +535,8 @@ %% setacl vp E E E +%! setacl pre vop_setacl_pre +%! setacl post vop_setacl_post vop_setacl { IN struct vnode *vp; @@ -589,6 +603,7 @@ %% deleteextattr vp E E E +%! deleteextattr pre vop_deleteextattr_pre %! deleteextattr post vop_deleteextattr_post vop_deleteextattr { @@ -601,6 +616,7 @@ %% setextattr vp E E E +%! setextattr pre vop_setextattr_pre %! setextattr post vop_setextattr_post vop_setextattr { Index: sys/security/mac/mac_framework.h =================================================================== --- sys/security/mac/mac_framework.h +++ sys/security/mac/mac_framework.h @@ -422,13 +422,14 @@ int mac_vnode_check_lookup_impl(struct ucred *cred, struct vnode *dvp, struct componentname *cnp); extern bool mac_vnode_check_lookup_fp_flag; +#define mac_vnode_check_lookup_enabled() __predict_false(mac_vnode_check_lookup_fp_flag) static inline int mac_vnode_check_lookup(struct ucred *cred, struct vnode *dvp, struct componentname *cnp) { mac_vnode_assert_locked(dvp, "mac_vnode_check_lookup"); - if (__predict_false(mac_vnode_check_lookup_fp_flag)) + if (mac_vnode_check_lookup_enabled()) return (mac_vnode_check_lookup_impl(cred, dvp, cnp)); return (0); } Index: sys/sys/_seqc.h =================================================================== --- /dev/null +++ sys/sys/_seqc.h @@ -0,0 +1,6 @@ +#ifndef _SYS__SEQC_H_ +#define _SYS__SEQC_H_ + +typedef uint32_t seqc_t; + +#endif /* _SYS__SEQC_H */ Index: sys/sys/filedesc.h =================================================================== --- sys/sys/filedesc.h +++ sys/sys/filedesc.h @@ -310,6 +310,7 @@ smr_serialized_store(&fdp->fd_pwd, newpwd, (FILEDESC_XLOCK_ASSERT(fdp), true)); } +struct pwd *pwd_get_smr(void); #endif /* _KERNEL */ Index: sys/sys/mount.h =================================================================== --- sys/sys/mount.h +++ sys/sys/mount.h @@ -420,6 +420,7 @@ #define MNTK_TEXT_REFS 0x00008000 /* Keep use ref for text */ #define MNTK_VMSETSIZE_BUG 0x00010000 #define MNTK_UNIONFS 0x00020000 /* A hack for F_ISUNIONSTACK */ +#define MNTK_FPLOOKUP 0x00040000 /* fast path lookup is supported */ #define MNTK_NOASYNC 0x00800000 /* disable async */ #define MNTK_UNMOUNT 0x01000000 /* unmount in progress */ #define MNTK_MWAIT 0x02000000 /* waiting for unmount to finish */ Index: sys/sys/namei.h =================================================================== --- sys/sys/namei.h +++ sys/sys/namei.h @@ -108,6 +108,12 @@ }; #ifdef _KERNEL + +enum cache_fpl_status { CACHE_FPL_STATUS_ABORTED, CACHE_FPL_STATUS_PARTIAL, + CACHE_FPL_STATUS_HANDLED, CACHE_FPL_STATUS_UNSET }; +int cache_fplookup(struct nameidata *ndp, enum cache_fpl_status *status, + struct pwd **pwdp); + /* * namei operations */ Index: sys/sys/seqc.h =================================================================== --- sys/sys/seqc.h +++ sys/sys/seqc.h @@ -36,7 +36,7 @@ /* * seqc_t may be included in structs visible to userspace */ -typedef uint32_t seqc_t; +#include #ifdef _KERNEL @@ -111,5 +111,26 @@ return (seqc_consistent_nomb(seqcp, oldseqc)); } +/* + * Variant which does not critical enter/exit. + */ +static __inline void +seqc_sleepable_write_begin(seqc_t *seqcp) +{ + + MPASS(!seqc_in_modify(*seqcp)); + *seqcp += 1; + atomic_thread_fence_rel(); +} + +static __inline void +seqc_sleepable_write_end(seqc_t *seqcp) +{ + + atomic_thread_fence_rel(); + *seqcp += 1; + MPASS(!seqc_in_modify(*seqcp)); +} + #endif /* _KERNEL */ #endif /* _SYS_SEQC_H_ */ Index: sys/sys/vnode.h =================================================================== --- sys/sys/vnode.h +++ sys/sys/vnode.h @@ -45,6 +45,7 @@ #include #include #include +#include /* * The vnode is the focus of all file activity in UNIX. There is a @@ -105,6 +106,7 @@ */ enum vtype v_type:8; /* u vnode type */ short v_irflag; /* i frequently read flags */ + seqc_t v_seqc; /* i modification count */ struct vop_vector *v_op; /* u vnode operations vector */ void *v_data; /* u private data for fs */ @@ -175,6 +177,7 @@ short v_dbatchcpu; /* i LRU requeue deferral batch */ int v_writecount; /* I ref count of writers or (negative) text users */ + int v_seqc_users; /* i modifications pending */ u_int v_hash; }; @@ -539,6 +542,18 @@ #define ASSERT_VOP_LOCKED(vp, str) assert_vop_locked((vp), (str)) #define ASSERT_VOP_UNLOCKED(vp, str) assert_vop_unlocked((vp), (str)) +#define ASSERT_VOP_IN_SEQC(vp) do { \ + struct vnode *_vp = (vp); \ + \ + VNPASS(seqc_in_modify(_vp->v_seqc), _vp); \ +} while (0) + +#define ASSERT_VOP_NOT_IN_SEQC(vp) do { \ + struct vnode *_vp = (vp); \ + \ + VNPASS(!seqc_in_modify(_vp->v_seqc), _vp); \ +} while (0) + #else /* !DEBUG_VFS_LOCKS */ #define ASSERT_VI_LOCKED(vp, str) ((void)0) @@ -546,6 +561,10 @@ #define ASSERT_VOP_ELOCKED(vp, str) ((void)0) #define ASSERT_VOP_LOCKED(vp, str) ((void)0) #define ASSERT_VOP_UNLOCKED(vp, str) ((void)0) + +#define ASSERT_VOP_IN_SEQC(vp) ((void)0) +#define ASSERT_VOP_NOT_IN_SEQC(vp) ((void)0) + #endif /* DEBUG_VFS_LOCKS */ @@ -602,6 +621,7 @@ struct vattr; struct vfsops; struct vnode; +struct pwd; typedef int (*vn_get_ino_t)(struct mount *, void *, int, struct vnode **); @@ -619,6 +639,10 @@ void cache_purge(struct vnode *vp); void cache_purge_negative(struct vnode *vp); void cache_purgevfs(struct mount *mp, bool force); +void vn_seqc_write_begin_locked(struct vnode *vp); +void vn_seqc_write_begin(struct vnode *vp); +void vn_seqc_write_end_locked(struct vnode *vp); +void vn_seqc_write_end(struct vnode *vp); int change_dir(struct vnode *vp, struct thread *td); void cvtstat(struct stat *st, struct ostat *ost); void freebsd11_cvtnstat(struct stat *sb, struct nstat *nsb); @@ -644,6 +668,8 @@ int vn_commname(struct vnode *vn, char *buf, u_int buflen); int vn_path_to_global_path(struct thread *td, struct vnode *vp, char *path, u_int pathlen); +int vaccess_vexec_smr(mode_t file_mode, uid_t file_uid, gid_t file_gid, + struct ucred *cred); int vaccess(enum vtype type, mode_t file_mode, uid_t file_uid, gid_t file_gid, accmode_t accmode, struct ucred *cred, int *privused); @@ -663,6 +689,7 @@ enum vgetstate vget_prep_smr(struct vnode *vp); enum vgetstate vget_prep(struct vnode *vp); int vget_finish(struct vnode *vp, int flags, enum vgetstate vs); +void vget_abort(struct vnode *vp, enum vgetstate vs); void vgone(struct vnode *vp); void vhold(struct vnode *); void vholdl(struct vnode *); @@ -805,6 +832,7 @@ /* These are called from within the actual VOPS. */ void vop_close_post(void *a, int rc); void vop_create_post(void *a, int rc); +void vop_deleteextattr_pre(void *a); void vop_deleteextattr_post(void *a, int rc); void vop_link_post(void *a, int rc); void vop_lookup_post(void *a, int rc); @@ -819,12 +847,18 @@ void vop_rename_post(void *a, int rc); void vop_rename_pre(void *a); void vop_rmdir_post(void *a, int rc); +void vop_setattr_pre(void *a); void vop_setattr_post(void *a, int rc); +void vop_setacl_pre(void *a); +void vop_setacl_post(void *a, int rc); +void vop_setextattr_pre(void *a); void vop_setextattr_post(void *a, int rc); void vop_symlink_post(void *a, int rc); int vop_sigdefer(struct vop_vector *vop, struct vop_generic_args *a); #ifdef DEBUG_VFS_LOCKS +void vop_fplookup_vexec_pre(void *a); +void vop_fplookup_vexec_post(void *a, int rc); void vop_strategy_pre(void *a); void vop_lock_pre(void *a); void vop_lock_post(void *a, int rc); @@ -832,6 +866,8 @@ void vop_need_inactive_pre(void *a); void vop_need_inactive_post(void *a, int rc); #else +#define vop_fplookup_vexec_pre(x) do { } while (0) +#define vop_fplookup_vexec_post(x, y) do { } while (0) #define vop_strategy_pre(x) do { } while (0) #define vop_lock_pre(x) do { } while (0) #define vop_lock_post(x, y) do { } while (0) @@ -901,6 +937,7 @@ void vput(struct vnode *vp); void vrele(struct vnode *vp); void vref(struct vnode *vp); +bool vref_smr(struct vnode *vp); void vrefl(struct vnode *vp); void vrefact(struct vnode *vp); void vrefactn(struct vnode *vp, u_int n);