Index: sys/cddl/compat/opensolaris/kern/opensolaris_vfs.c =================================================================== --- sys/cddl/compat/opensolaris/kern/opensolaris_vfs.c +++ sys/cddl/compat/opensolaris/kern/opensolaris_vfs.c @@ -154,6 +154,7 @@ vput(vp); return (error); } + vfs_seqc_write_begin(vp); VOP_UNLOCK(vp); /* @@ -206,6 +207,7 @@ VI_LOCK(vp); vp->v_iflag &= ~VI_MOUNT; VI_UNLOCK(vp); + vfs_seqc_write_end(vp); vput(vp); vfs_unbusy(mp); vfs_freeopts(mp->mnt_optnew); @@ -241,6 +243,7 @@ vfs_event_signal(NULL, VQ_MOUNT, 0); if (VFS_ROOT(mp, LK_EXCLUSIVE, &mvp)) panic("mount: lost mount"); + vfs_seqc_write_end(vp); VOP_UNLOCK(vp); vfs_op_exit(mp); vfs_unbusy(mp); Index: sys/kern/kern_descrip.c =================================================================== --- sys/kern/kern_descrip.c +++ sys/kern/kern_descrip.c @@ -102,8 +102,8 @@ static __read_mostly uma_zone_t file_zone; static __read_mostly uma_zone_t filedesc0_zone; -static __read_mostly uma_zone_t pwd_zone; -static __read_mostly smr_t pwd_smr; +__read_mostly uma_zone_t pwd_zone; +extern smr_t vfs_smr; static int closefp(struct filedesc *fdp, int fd, struct file *fp, struct thread *td, int holdleaders); @@ -3297,14 +3297,24 @@ fdp = td->td_proc->p_fd; - smr_enter(pwd_smr); + smr_enter(vfs_smr); for (;;) { - pwd = smr_entered_load(&fdp->fd_pwd, pwd_smr); + pwd = smr_entered_load(&fdp->fd_pwd, vfs_smr); MPASS(pwd != NULL); if (refcount_acquire_if_not_zero(&pwd->pwd_refcount)) break; } - smr_exit(pwd_smr); + smr_exit(vfs_smr); + return (pwd); +} + +struct pwd * +pwd_get_smr(void) +{ + struct pwd *pwd; + + pwd = smr_entered_load(&curproc->p_fd->fd_pwd, vfs_smr); + MPASS(pwd != NULL); return (pwd); } @@ -4293,7 +4303,7 @@ NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, 0); pwd_zone = uma_zcreate("PWD", sizeof(struct pwd), NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, UMA_ZONE_SMR); - pwd_smr = uma_zone_get_smr(pwd_zone); + vfs_smr = uma_zone_get_smr(pwd_zone); mtx_init(&sigio_lock, "sigio lock", NULL, MTX_DEF); } SYSINIT(select, SI_SUB_LOCK, SI_ORDER_FIRST, filelistinit, NULL); Index: sys/kern/vfs_cache.c =================================================================== --- sys/kern/vfs_cache.c +++ sys/kern/vfs_cache.c @@ -55,6 +55,7 @@ #include #include #include +#include #include #include #include @@ -67,6 +68,11 @@ #include #endif +#include + +#include +#include + #ifdef DDB #include #endif @@ -100,6 +106,8 @@ SDT_PROBE_DEFINE2(vfs, namecache, shrink_negative, done, "struct vnode *", "char *"); +SDT_PROBE_DEFINE3(vfs, fplookup, lookup, done, "struct nameidata", "int", "bool"); + /* * This structure describes the elements in the cache of recent * names looked up by namei. @@ -2817,3 +2825,760 @@ } #endif + +extern uma_zone_t namei_zone; + +static bool __read_frequently cache_fast_lookup = true; +SYSCTL_BOOL(_vfs, OID_AUTO, cache_fast_lookup, CTLFLAG_RW, + &cache_fast_lookup, 0, ""); + +#define CACHE_FPL_UNHANDLED -2020 + +static void +cache_fpl_cleanup_cnp(struct componentname *cnp) +{ + + uma_zfree(namei_zone, cnp->cn_pnbuf); +#ifdef DIAGNOSTIC + cnp->cn_pnbuf = NULL; + cnp->cn_nameptr = NULL; +#endif +} + +static void +cache_fpl_handle_root(struct nameidata *ndp, struct vnode **dpp) +{ + struct componentname *cnp; + + cnp = &ndp->ni_cnd; + while (*(cnp->cn_nameptr) == '/') { + cnp->cn_nameptr++; + ndp->ni_pathlen--; + } +} + +static void +cache_fpl_handle_root_initial(struct nameidata *ndp, struct vnode **dpp) +{ + + cache_fpl_handle_root(ndp, dpp); + *dpp = ndp->ni_rootdir; +} + +enum cache_fpl_status { CACHE_FPL_STATUS_UNSET, CACHE_FPL_STATUS_HANDLED, + CACHE_FPL_STATUS_UNHANDLED }; + +struct cache_fpl { + int line; + enum cache_fpl_status handled; + bool in_smr; + struct nameidata *ndp; + struct componentname *cnp; + struct vnode *dvp; + seqc_t dvp_seqc; + struct vnode *tvp; + seqc_t tvp_seqc; +}; + +static void +cache_fpl_smr_assert_not_entered(struct cache_fpl *fpl) +{ + + SMR_ASSERT_NOT_ENTERED(vfs_smr); + MPASS(fpl->in_smr == false); +} + +static void +cache_fpl_smr_enter(struct cache_fpl *fpl) +{ + + MPASS(fpl->in_smr == false); + smr_enter(vfs_smr); + fpl->in_smr = true; +} + +static void +cache_fpl_smr_exit(struct cache_fpl *fpl) +{ + + MPASS(fpl->in_smr == true); + fpl->in_smr = false; + smr_exit(vfs_smr); +} + +static int +cache_fpl_unhandled_impl(struct cache_fpl *fpl, int line) +{ + + KASSERT(fpl->handled == CACHE_FPL_STATUS_UNSET, + ("%s: lookup status already set at %d\n", __func__, fpl->line)); + fpl->handled = CACHE_FPL_STATUS_UNHANDLED; + fpl->line = line; + return (CACHE_FPL_UNHANDLED); +} + +#define cache_fpl_unhandled(x) cache_fpl_unhandled_impl((x), __LINE__) + +static int +cache_fpl_handled_impl(struct cache_fpl *fpl, int error, int line) +{ + + KASSERT(fpl->handled == CACHE_FPL_STATUS_UNSET, + ("%s: lookup status already set at %d\n", __func__, fpl->line)); + fpl->handled = CACHE_FPL_STATUS_HANDLED; + fpl->line = line; + return (error); +} + +#define cache_fpl_handled(x, e) cache_fpl_handled_impl((x), (e), __LINE__) + +#define CACHE_FPL_SUPPORTED_CN_FLAGS \ + (LOCKLEAF | FOLLOW | LOCKSHARED | SAVENAME | ISOPEN | AUDITVNODE1) + +static bool +cache_can_fplookup(struct cache_fpl *fpl) +{ + struct nameidata *ndp; + struct componentname *cnp; + struct thread *td; + + ndp = fpl->ndp; + cnp = fpl->cnp; + td = cnp->cn_thread; + + if (!cache_fast_lookup) { + cache_fpl_unhandled(fpl); + return (false); + } + if (mac_vnode_check_lookup_enabled()) { + cache_fpl_unhandled(fpl); + return (false); + } + if (cnp->cn_flags & ~CACHE_FPL_SUPPORTED_CN_FLAGS) { + cache_fpl_unhandled(fpl); + return (false); + } + if ((cnp->cn_flags & LOCKLEAF) == 0) { + cache_fpl_unhandled(fpl); + return (false); + } + if (cnp->cn_nameiop != LOOKUP) { + cache_fpl_unhandled(fpl); + return (false); + } + if (ndp->ni_dirfd != AT_FDCWD) { + cache_fpl_unhandled(fpl); + return (false); + } + if (IN_CAPABILITY_MODE(td)) { + cache_fpl_unhandled(fpl); + return (false); + } + if (AUDITING_TD(td)) { + cache_fpl_unhandled(fpl); + return (false); + } + return (true); +} + +/* + * TODO + * + * Save and restore nameidata in case we need to fall back to the regular lookup. + * + * Everything which is modified (some direct fields and cnp) is covered, but the + * copy below is overzealous for simplicity. It can be modified to only save few + * fields. + */ +static void +cache_save_nameidata(struct nameidata *ndp, struct nameidata *saved) +{ + + *saved = *ndp; +} + +static void +cache_restore_nameidata(struct nameidata *ndp, struct nameidata *saved) +{ + + *ndp = *saved; +} + +static bool +cache_fplookup_vnode_supported(struct vnode *vp) +{ + + switch (vp->v_type) { + case VLNK: + return (false); + default: + break; + } + return (true); +} + +static int +cache_fplookup_final(struct cache_fpl *fpl) +{ + struct componentname *cnp; + enum vgetstate tvs; + struct vnode *dvp, *tvp; + seqc_t dvp_seqc, tvp_seqc; + int error; + + cnp = fpl->cnp; + dvp = fpl->dvp; + dvp_seqc = fpl->dvp_seqc; + tvp = fpl->tvp; + tvp_seqc = fpl->tvp_seqc; + + VNPASS(cache_fplookup_vnode_supported(dvp), dvp); + MPASS((cnp->cn_flags & LOCKLEAF) != 0); + + tvs = vget_prep_smr(tvp); + if (tvs == VGET_NONE) { + return (cache_fpl_unhandled(fpl)); + } + + if (!seqc_consistent(&dvp->v_seqc, dvp_seqc)) { + cache_fpl_smr_exit(fpl); + vget_abort(tvp, tvs); + return (cache_fpl_unhandled(fpl)); + } + + cache_fpl_smr_exit(fpl); + + error = vget_finish(tvp, cnp->cn_lkflags, tvs); + if (error != 0) { + return (cache_fpl_unhandled(fpl)); + } + + if (!seqc_consistent(&tvp->v_seqc, tvp_seqc)) { + vput(tvp); + return (cache_fpl_unhandled(fpl)); + } + + return (cache_fpl_handled(fpl, 0)); +} + +static int +cache_fplookup_next(struct cache_fpl *fpl) +{ + struct componentname *cnp; + struct namecache *ncp; + struct vnode *dvp, *tvp; + u_char nc_flag; + uint32_t hash; + + cnp = fpl->cnp; + dvp = fpl->dvp; + + if (__predict_false(cnp->cn_namelen == 1 && cnp->cn_nameptr[0] == '.')) { + fpl->tvp = dvp; + fpl->tvp_seqc = seqc_read_any(&dvp->v_seqc); + if (seqc_in_modify(fpl->tvp_seqc)) { + return (cache_fpl_unhandled(fpl)); + } + return (0); + } + + hash = cache_get_hash(cnp->cn_nameptr, cnp->cn_namelen, dvp); + + CK_LIST_FOREACH(ncp, (NCHHASH(hash)), nc_hash) { + counter_u64_add(numchecks, 1); + if (ncp->nc_dvp == dvp && ncp->nc_nlen == cnp->cn_namelen && + !bcmp(ncp->nc_name, cnp->cn_nameptr, ncp->nc_nlen)) + break; + } + + fpl->tvp = (void *)0xdeadbeef; + + /* + * If there is no entry we have to punt to the slow path to perform + * actual lookup. Should there be nothing with this name a negative + * entry will be created. + */ + if (__predict_false(ncp == NULL)) { + return (cache_fpl_unhandled(fpl)); + } + + tvp = atomic_load_ptr(&ncp->nc_vp); + nc_flag = atomic_load_char(&ncp->nc_flag); + if (__predict_false(cache_ncp_invalid(ncp))) { + return (cache_fpl_unhandled(fpl)); + } + if (__predict_false(nc_flag & NCF_WHITE)) { + return (cache_fpl_unhandled(fpl)); + } + + fpl->tvp = tvp; + if (nc_flag & NCF_NEGATIVE) { + if ((nc_flag & NCF_HOTNEGATIVE) == 0) { + /* + * TODO + * Promoting to hot negative requires locks which are + * not yet supported for simplicity. + */ + return (cache_fpl_unhandled(fpl)); + } + SDT_PROBE2(vfs, namecache, lookup, hit__negative, dvp, + ncp->nc_name); + counter_u64_add(numneghits, 1); + cache_fpl_smr_exit(fpl); + return (cache_fpl_handled(fpl, ENOENT)); + } + + fpl->tvp_seqc = seqc_read_any(&tvp->v_seqc); + if (seqc_in_modify(fpl->tvp_seqc)) { + return (cache_fpl_unhandled(fpl)); + } + + if (!cache_fplookup_vnode_supported(tvp)) { + return (cache_fpl_unhandled(fpl)); + } + + counter_u64_add(numposhits, 1); + SDT_PROBE3(vfs, namecache, lookup, hit, dvp, ncp->nc_name, tvp); + return (0); +} + +static bool +cache_fplookup_mp_supported(struct mount *mp) +{ + + if (mp == NULL) + return (false); + if ((mp->mnt_kern_flag & MNTK_FPLOOKUP) == 0) + return (false); + if (mp->mnt_flag & MNT_UNION) + return (false); + return (true); +} + +/* + * Walk up the mount stack (if any). + * + * Correctness is provided in the following ways: + * - all vnodes are protected from freeing with SMR + * - struct mount objects are type stable making them always safe to access + * - stability of the particular mount is provided by busying it + * - relationship between the vnode which is mounted on and the mount is + * verified with the vnode sequence counter after busying + * - association between root vnode of the mount and the mount is protected + * by busy + * + * From that point on we can read the sequence counter of the root vnode + * and get the next mount on the stack (if any) using the same protection. + * + * By the end of successful walk we are guaranteed the reached state was + * indeed present at least at some point which matches the regular lookup. + */ +static int +cache_fplookup_climb_mount(struct cache_fpl *fpl) +{ + struct mount *mp, *prev_mp; + struct vnode *vp; + seqc_t vp_seqc; + + vp = fpl->tvp; + vp_seqc = fpl->tvp_seqc; + if (vp->v_type != VDIR) + return (0); + + mp = atomic_load_ptr(&vp->v_mountedhere); + if (mp == NULL) + return (0); + + prev_mp = NULL; + for (;;) { + if (!vfs_op_thread_enter(mp)) { + if (prev_mp != NULL) + vfs_op_thread_exit(prev_mp); + return (cache_fpl_unhandled(fpl)); + } + if (prev_mp != NULL) + vfs_op_thread_exit(prev_mp); + if (!seqc_consistent(&vp->v_seqc, vp_seqc)) { + vfs_op_thread_exit(mp); + return (cache_fpl_unhandled(fpl)); + } + if (!cache_fplookup_mp_supported(mp)) { + vfs_op_thread_exit(mp); + return (cache_fpl_unhandled(fpl)); + } + vp = atomic_load_ptr(&mp->mnt_rootvnode); + if (vp == NULL || VN_IS_DOOMED(vp)) { + vfs_op_thread_exit(mp); + return (cache_fpl_unhandled(fpl)); + } + vp_seqc = seqc_read_any(&vp->v_seqc); + if (seqc_in_modify(vp_seqc)) { + vfs_op_thread_exit(mp); + return (cache_fpl_unhandled(fpl)); + } + prev_mp = mp; + mp = atomic_load_ptr(&vp->v_mountedhere); + if (mp == NULL) + break; + } + + vfs_op_thread_exit(prev_mp); + fpl->tvp = vp; + fpl->tvp_seqc = vp_seqc; + return (0); +} + +/* + * Parse the path. + * + * The code is mostly copy-pasted from regular lookup, see lookup(). + * The structure is maintained along with comments for easier maintenance. + * Deduplicating the code will become feasible after fast path lookup + * becomes more feature-complete. + */ +static int +cache_fplookup_parse(struct cache_fpl *fpl) +{ + struct nameidata *ndp; + struct componentname *cnp; + char *cp; + char *prev_ni_next; /* saved ndp->ni_next */ + size_t prev_ni_pathlen; /* saved ndp->ni_pathlen */ + + ndp = fpl->ndp; + cnp = fpl->cnp; + + /* + * Search a new directory. + * + * The last component of the filename is left accessible via + * cnp->cn_nameptr for callers that need the name. Callers needing + * the name set the SAVENAME flag. When done, they assume + * responsibility for freeing the pathname buffer. + */ + for (cp = cnp->cn_nameptr; *cp != 0 && *cp != '/'; cp++) + continue; + cnp->cn_namelen = cp - cnp->cn_nameptr; + if (cnp->cn_namelen > NAME_MAX) { + return (cache_fpl_handled(fpl, ENAMETOOLONG)); + } + prev_ni_pathlen = ndp->ni_pathlen; + ndp->ni_pathlen -= cnp->cn_namelen; + KASSERT(ndp->ni_pathlen <= PATH_MAX, + ("%s: ni_pathlen underflow to %zd\n", __func__, ndp->ni_pathlen)); + prev_ni_next = ndp->ni_next; + ndp->ni_next = cp; + + /* + * Replace multiple slashes by a single slash and trailing slashes + * by a null. This must be done before VOP_LOOKUP() because some + * fs's don't know about trailing slashes. Remember if there were + * trailing slashes to handle symlinks, existing non-directories + * and non-existing files that won't be directories specially later. + */ + while (*cp == '/' && (cp[1] == '/' || cp[1] == '\0')) { + cp++; + ndp->ni_pathlen--; + if (*cp == '\0') { + /* + * TODO + * Regular lookup performs the following: + * *ndp->ni_next = '\0'; + * cnp->cn_flags |= TRAILINGSLASH; + * + * Which is problematic since it modifies data read + * from userspace. Then if fast path lookup was to + * abort we would have to either restore it or convey + * the flag. Since this is a corner case just ignore + * it for simplicity. + */ + return (cache_fpl_unhandled(fpl)); + } + } + ndp->ni_next = cp; + + cnp->cn_flags |= MAKEENTRY; + + if (cnp->cn_namelen == 2 && + cnp->cn_nameptr[1] == '.' && cnp->cn_nameptr[0] == '.') + cnp->cn_flags |= ISDOTDOT; + else + cnp->cn_flags &= ~ISDOTDOT; + if (*ndp->ni_next == 0) + cnp->cn_flags |= ISLASTCN; + else + cnp->cn_flags &= ~ISLASTCN; + + /* + * Check for degenerate name (e.g. / or "") + * which is a way of talking about a directory, + * e.g. like "/." or ".". + * + * TODO + * Another corner case handled by the regular lookup + */ + if (__predict_false(cnp->cn_nameptr[0] == '\0')) { + return (cache_fpl_unhandled(fpl)); + } + return (0); +} + +static void +cache_fplookup_parse_advance(struct cache_fpl *fpl) +{ + struct nameidata *ndp; + struct componentname *cnp; + + ndp = fpl->ndp; + cnp = fpl->cnp; + + cnp->cn_nameptr = ndp->ni_next; + while (*cnp->cn_nameptr == '/') { + cnp->cn_nameptr++; + ndp->ni_pathlen--; + } +} + +static int +cache_fplookup_impl(struct vnode *dvp, struct cache_fpl *fpl) +{ + struct nameidata *ndp; + struct componentname *cnp; + struct mount *mp; + int error; + + error = CACHE_FPL_UNHANDLED; + ndp = fpl->ndp; + ndp->ni_lcf = 0; + cnp = fpl->cnp; + cnp->cn_lkflags = LK_SHARED; + if ((cnp->cn_flags & LOCKSHARED) == 0) + cnp->cn_lkflags = LK_EXCLUSIVE; + + fpl->dvp = dvp; + fpl->dvp_seqc = seqc_read_any(&fpl->dvp->v_seqc); + if (seqc_in_modify(fpl->dvp_seqc)) { + cache_fpl_unhandled(fpl); + goto out; + } + mp = atomic_load_ptr(&fpl->dvp->v_mount); + if (!cache_fplookup_mp_supported(mp)) { + cache_fpl_unhandled(fpl); + goto out; + } + + VNPASS(cache_fplookup_vnode_supported(fpl->dvp), fpl->dvp); + + for (;;) { + error = cache_fplookup_parse(fpl); + if (__predict_false(error != 0)) { + break; + } + + if (cnp->cn_flags & ISDOTDOT) { + error = cache_fpl_unhandled(fpl); + break; + } + + VNPASS(cache_fplookup_vnode_supported(fpl->dvp), fpl->dvp); + + error = VOP_FPLOOKUP_VEXEC(fpl->dvp, cnp->cn_cred, cnp->cn_thread); + if (__predict_false(error != 0)) { + switch (error) { + case EAGAIN: + case EOPNOTSUPP: /* can happen when racing against vgone */ + cache_fpl_unhandled(fpl); + break; + default: + /* + * See the API contract for VOP_FPLOOKUP_VEXEC. + */ + if (!seqc_consistent(&fpl->dvp->v_seqc, fpl->dvp_seqc)) { + error = cache_fpl_unhandled(fpl); + } else { + cache_fpl_handled(fpl, error); + } + break; + } + break; + } + + error = cache_fplookup_next(fpl); + if (__predict_false(error != 0)) { + break; + } + + VNPASS(!seqc_in_modify(fpl->tvp_seqc), fpl->tvp); + + error = cache_fplookup_climb_mount(fpl); + if (__predict_false(error != 0)) { + break; + } + + VNPASS(!seqc_in_modify(fpl->tvp_seqc), fpl->tvp); + + if (cnp->cn_flags & ISLASTCN) { + error = cache_fplookup_final(fpl); + break; + } + + if (!seqc_consistent(&fpl->dvp->v_seqc, fpl->dvp_seqc)) { + error = cache_fpl_unhandled(fpl); + break; + } + + fpl->dvp = fpl->tvp; + fpl->dvp_seqc = fpl->tvp_seqc; + + if (!cache_fplookup_vnode_supported(fpl->dvp)) { + error = cache_fpl_unhandled(fpl); + break; + } + + cache_fplookup_parse_advance(fpl); + } +out: + MPASS(fpl->handled != CACHE_FPL_STATUS_UNSET); + if (fpl->handled == CACHE_FPL_STATUS_UNHANDLED) { + if (fpl->in_smr) + cache_fpl_smr_exit(fpl); + return (error); + } + + cache_fpl_smr_assert_not_entered(fpl); + if (__predict_false(error != 0)) { + ndp->ni_dvp = NULL; + ndp->ni_vp = NULL; + cache_fpl_cleanup_cnp(cnp); + return (error); + } + ndp->ni_dvp = fpl->dvp; + ndp->ni_vp = fpl->tvp; + if (cnp->cn_flags & SAVENAME) + cnp->cn_flags |= HASBUF; + else + cache_fpl_cleanup_cnp(cnp); + return (error); +} + +/* + * Fast path lookup protected with SMR and sequence counters. + * + * Note: all VOP_FPLOOKUP_VEXEC routines have a comment referencing this one. + * + * Filesystems can opt in by setting the MNTK_FPLOOKUP flag and meeting criteria + * outlined below. + * + * Traditional vnode lookup conceptually looks like this: + * + * vn_lock(current); + * for (;;) { + * next = find(); + * vn_lock(next); + * vn_unlock(current); + * current = next; + * if (last) + * break; + * } + * + * Each jump to the next vnode is safe memory-wise and atomic with respect to + * any modifications thanks to holding respective locks. + * + * The same guarantee can be provided with a combination of safe memory + * reclamation and sequence counters instead. If all places which affect the + * relationship or permissions also the counter, the following provides the + * same guarantee. + * + * smr_enter(vfs_smr); + * current_seqc = seqc_read_any(current); + * if (seqc_in_modify(current_seqc)) + * abort(); + * for (;;) { + * next = find(); + * next_seqc = seqc_read_any(next); + * if (!seqc_consistent(current, current_seqc) + * abort(); + * current = next; // if anything changed the above would fail + * current_seqc = next_seqc; + * if (last) + * break; + * } + * + * API contract for VOP_FPLOOKUP_VEXEC routines is as follows: + * - they are called while within vfs_smr protection which they must never exit + * - EAGAIN can be returned to denote checking could not be performed, it is + * always valid to return it + * - if the sequence counter has not changed the result must be valid + * - if the sequence counter has changed both false positives and false negatives + * are permitted (since the result will be rejected later) + * - for simple cases of unix permission checks vaccess_vexec_smr can be used + * + * Caveats to watch out for: + * - vnodes are passed unlocked and unreferenced with nothing stopping + * VOP_RECLAIM, in turn meaning that ->v_data can become NULL. It is advised + * to use atomic_load_ptr to fetch it. + * - aforementioned object can also get freed, meaning absent other means it + * should be protected with vfs_smr + * - either safely checking permissions as they are modified or guaranteeing + * their stability is left to the routine + */ +int +cache_fplookup(struct nameidata *ndp, bool *handled) +{ + struct nameidata saved_ndp; + struct cache_fpl fpl; + struct pwd *pwd; + struct vnode *dvp, *startdir; + struct componentname *cnp; + int error; + + *handled = false; + bzero(&fpl, sizeof(fpl)); + fpl.handled = CACHE_FPL_STATUS_UNSET; + fpl.ndp = ndp; + fpl.cnp = &ndp->ni_cnd; + MPASS(curthread == fpl.cnp->cn_thread); + + if (!cache_can_fplookup(&fpl)) { + SDT_PROBE3(vfs, fplookup, lookup, done, ndp, fpl.line, fpl.handled); + return (EOPNOTSUPP); + } + + cache_save_nameidata(ndp, &saved_ndp); + + cache_fpl_smr_enter(&fpl); + pwd = pwd_get_smr(); + ndp->ni_rootdir = pwd->pwd_rdir; + ndp->ni_topdir = pwd->pwd_jdir; + + cnp = fpl.cnp; + cnp->cn_nameptr = cnp->cn_pnbuf; + startdir = ndp->ni_startdir; + if (cnp->cn_pnbuf[0] == '/') { + cache_fpl_handle_root_initial(ndp, &dvp); + } else { + if (ndp->ni_startdir != NULL) { + dvp = ndp->ni_startdir; + } else { + MPASS(ndp->ni_dirfd == AT_FDCWD); + dvp = pwd->pwd_cdir; + } + } + + error = cache_fplookup_impl(dvp, &fpl); + cache_fpl_smr_assert_not_entered(&fpl); + SDT_PROBE3(vfs, fplookup, lookup, done, ndp, fpl.line, fpl.handled); + + MPASS(fpl.handled != CACHE_FPL_STATUS_UNSET); + if (fpl.handled == CACHE_FPL_STATUS_HANDLED) { + if (error == CACHE_FPL_UNHANDLED) + panic("cache: bad error code from line %d\n", fpl.line); + *handled = true; + if (startdir != NULL) + vrele(startdir); + return (error); + } else { + cache_restore_nameidata(ndp, &saved_ndp); + return (error); + } +} Index: sys/kern/vfs_lookup.c =================================================================== --- sys/kern/vfs_lookup.c +++ sys/kern/vfs_lookup.c @@ -315,6 +315,7 @@ struct filecaps dirfd_caps; struct uio auio; int error, linklen, startdir_used; + bool handled; cnp = &ndp->ni_cnd; td = cnp->cn_thread; @@ -329,10 +330,15 @@ ndp->ni_startdir->v_type == VBAD); TAILQ_INIT(&ndp->ni_cap_tracker); ndp->ni_lcf = 0; + ndp->ni_loopcnt = 0; + startdir_used = 0; + dp = NULL; /* We will set this ourselves if we need it. */ cnp->cn_flags &= ~TRAILINGSLASH; + ndp->ni_vp = NULL; + /* * Get a buffer for the name to be translated, and copy the * name into the buffer. @@ -346,12 +352,29 @@ error = copyinstr(ndp->ni_dirp, cnp->cn_pnbuf, MAXPATHLEN, &ndp->ni_pathlen); + if (error != 0) { + namei_cleanup_cnp(cnp); + return (error); + } + + cnp->cn_nameptr = cnp->cn_pnbuf; + /* * Don't allow empty pathnames. */ - if (error == 0 && *cnp->cn_pnbuf == '\0') - error = ENOENT; + if (*cnp->cn_pnbuf == '\0') { + namei_cleanup_cnp(cnp); + return (ENOENT); + } + error = cache_fplookup(ndp, &handled); + if (handled) + return (error); + + /* + * Ignore fast path. + */ + error = 0; #ifdef CAPABILITY_MODE /* * In capability mode, lookups must be restricted to happen in @@ -380,10 +403,8 @@ #endif if (error != 0) { namei_cleanup_cnp(cnp); - ndp->ni_vp = NULL; return (error); } - ndp->ni_loopcnt = 0; #ifdef KTRACE if (KTRPOINT(td, KTR_NAMEI)) { KASSERT(cnp->cn_thread == curthread, @@ -402,9 +423,6 @@ ndp->ni_rootdir = pwd->pwd_rdir; ndp->ni_topdir = pwd->pwd_jdir; - startdir_used = 0; - dp = NULL; - cnp->cn_nameptr = cnp->cn_pnbuf; if (cnp->cn_pnbuf[0] == '/') { ndp->ni_resflags |= NIRES_ABS; error = namei_handle_root(ndp, &dp); Index: sys/kern/vfs_mount.c =================================================================== --- sys/kern/vfs_mount.c +++ sys/kern/vfs_mount.c @@ -949,6 +949,8 @@ } VOP_UNLOCK(vp); + vfs_seqc_write_begin(vp); + /* Allocate and initialize the filesystem. */ mp = vfs_mount_alloc(vp, vfsp, fspath, td->td_ucred); /* XXXMAC: pass to vfs_mount_alloc? */ @@ -976,9 +978,11 @@ VI_LOCK(vp); vp->v_iflag &= ~VI_MOUNT; VI_UNLOCK(vp); + vfs_seqc_write_end(vp); vrele(vp); return (error); } + vfs_seqc_write_begin(newdp); VOP_UNLOCK(newdp); if (mp->mnt_opt != NULL) @@ -1015,6 +1019,8 @@ EVENTHANDLER_DIRECT_INVOKE(vfs_mounted, mp, newdp, td); VOP_UNLOCK(newdp); mountcheckdirs(vp, newdp); + vfs_seqc_write_end(vp); + vfs_seqc_write_end(newdp); vrele(newdp); if ((mp->mnt_flag & MNT_RDONLY) == 0) vfs_allocate_syncvnode(mp); @@ -1089,7 +1095,9 @@ VOP_UNLOCK(vp); vfs_op_enter(mp); + vfs_seqc_write_begin(vp); + rootvp = NULL; MNT_ILOCK(mp); if ((mp->mnt_kern_flag & MNTK_UNMOUNT) != 0) { MNT_IUNLOCK(mp); @@ -1103,8 +1111,6 @@ mp->mnt_kern_flag &= ~MNTK_ASYNC; rootvp = vfs_cache_root_clear(mp); MNT_IUNLOCK(mp); - if (rootvp != NULL) - vrele(rootvp); mp->mnt_optnew = *optlist; vfs_mergeopts(mp->mnt_optnew, mp->mnt_opt); @@ -1175,6 +1181,11 @@ vfs_deallocate_syncvnode(mp); end: vfs_op_exit(mp); + if (rootvp != NULL) { + vfs_seqc_write_end(rootvp); + vrele(rootvp); + } + vfs_seqc_write_end(vp); vfs_unbusy(mp); VI_LOCK(vp); vp->v_iflag &= ~VI_MOUNT; @@ -1665,14 +1676,19 @@ } mp->mnt_kern_flag |= MNTK_UNMOUNT; rootvp = vfs_cache_root_clear(mp); + if (coveredvp != NULL) + vfs_seqc_write_begin(coveredvp); if (flags & MNT_NONBUSY) { MNT_IUNLOCK(mp); error = vfs_check_usecounts(mp); MNT_ILOCK(mp); if (error != 0) { + vfs_seqc_write_end(coveredvp); dounmount_cleanup(mp, coveredvp, MNTK_UNMOUNT); - if (rootvp != NULL) + if (rootvp != NULL) { + vfs_seqc_write_end(rootvp); vrele(rootvp); + } return (error); } } @@ -1701,22 +1717,14 @@ ("%s: invalid return value for msleep in the drain path @ %s:%d", __func__, __FILE__, __LINE__)); - if (rootvp != NULL) + if (rootvp != NULL) { + vhold(rootvp); vrele(rootvp); + } if (mp->mnt_flag & MNT_EXPUBLIC) vfs_setpublicfs(NULL, NULL, NULL); - /* - * From now, we can claim that the use reference on the - * coveredvp is ours, and the ref can be released only by - * successfull unmount by us, or left for later unmount - * attempt. The previously acquired hold reference is no - * longer needed to protect the vnode from reuse. - */ - if (coveredvp != NULL) - vdrop(coveredvp); - vfs_periodic(mp, MNT_WAIT); MNT_ILOCK(mp); async_flag = mp->mnt_flag & MNT_ASYNC; @@ -1751,8 +1759,15 @@ } vfs_op_exit_locked(mp); MNT_IUNLOCK(mp); - if (coveredvp) + if (coveredvp) { + vfs_seqc_write_end(coveredvp); VOP_UNLOCK(coveredvp); + vdrop(coveredvp); + } + if (rootvp != NULL) { + vfs_seqc_write_end(rootvp); + vdrop(rootvp); + } return (error); } mtx_lock(&mountlist_mtx); @@ -1761,7 +1776,13 @@ EVENTHANDLER_DIRECT_INVOKE(vfs_unmounted, mp, td); if (coveredvp != NULL) { coveredvp->v_mountedhere = NULL; + vfs_seqc_write_end(coveredvp); VOP_UNLOCK(coveredvp); + vdrop(coveredvp); + } + if (rootvp != NULL) { + vfs_seqc_write_end(rootvp); + vdrop(rootvp); } vfs_event_signal(NULL, VQ_UNMOUNT, 0); if (rootvnode != NULL && mp == rootvnode->v_mount) { Index: sys/kern/vfs_subr.c =================================================================== --- sys/kern/vfs_subr.c +++ sys/kern/vfs_subr.c @@ -664,8 +664,8 @@ vnode_list_reclaim_marker = vn_alloc_marker(NULL); TAILQ_INSERT_HEAD(&vnode_list, vnode_list_reclaim_marker, v_vnodelist); vnode_zone = uma_zcreate("VNODE", sizeof (struct vnode), NULL, NULL, - vnode_init, vnode_fini, UMA_ALIGN_PTR, UMA_ZONE_SMR); - vfs_smr = uma_zone_get_smr(vnode_zone); + vnode_init, vnode_fini, UMA_ALIGN_PTR, 0); + uma_zone_set_smr(vnode_zone, vfs_smr); vnodepoll_zone = uma_zcreate("VNODEPOLL", sizeof (struct vpollinfo), NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, 0); /* @@ -1766,6 +1766,7 @@ */ CTR2(KTR_VFS, "%s: destroying the vnode %p", __func__, vp); bo = &vp->v_bufobj; + VNPASS(vp->v_seqc_users == 0, vp); VNASSERT(vp->v_data == NULL, vp, ("cleaned vnode isn't")); VNASSERT(vp->v_holdcnt == VHOLD_NO_SMR, vp, ("Invalid hold count")); VNASSERT(vp->v_usecount == 0, vp, ("Non-zero use count")); @@ -2892,6 +2893,17 @@ return (vs); } +void +vget_abort(struct vnode *vp, enum vgetstate vs) +{ + + VNPASS(vs == VGET_HOLDCNT || vs == VGET_USECOUNT, vp); + if (vs == VGET_USECOUNT) + vrele(vp); + else + vdrop(vp); +} + int vget(struct vnode *vp, int flags, struct thread *td) { @@ -2954,10 +2966,7 @@ error = vn_lock(vp, flags); if (__predict_false(error != 0)) { - if (vs == VGET_USECOUNT) - vrele(vp); - else - vdrop(vp); + vget_abort(vp, vs); CTR2(KTR_VFS, "%s: impossible to lock vnode %p", __func__, vp); return (error); @@ -3982,6 +3991,7 @@ */ if (vp->v_irflag & VIRF_DOOMED) return; + vfs_seqc_write_begin_locked(vp); vunlazy_gone(vp); vp->v_irflag |= VIRF_DOOMED; @@ -4084,6 +4094,7 @@ vp->v_vnlock = &vp->v_lock; vp->v_op = &dead_vnodeops; vp->v_type = VBAD; + vfs_seqc_write_end_locked(vp); } /* @@ -4124,6 +4135,7 @@ printf(" usecount %d, writecount %d, refcount %d (flags %s)", vp->v_usecount, vp->v_writecount, holdcnt & ~VHOLD_NO_SMR, holdcnt & VHOLD_NO_SMR ? "VHOLD_NO_SMR" : "none"); + printf(", seqc users %d", vp->v_seqc_users); switch (vp->v_type) { case VDIR: printf(" mountedhere %p\n", vp->v_mountedhere); @@ -4369,6 +4381,7 @@ MNT_KERN_FLAG(MNTK_LOOKUP_EXCL_DOTDOT); MNT_KERN_FLAG(MNTK_MARKER); MNT_KERN_FLAG(MNTK_USES_BCACHE); + MNT_KERN_FLAG(MNTK_FPLOOKUP); MNT_KERN_FLAG(MNTK_NOASYNC); MNT_KERN_FLAG(MNTK_UNMOUNT); MNT_KERN_FLAG(MNTK_MWAIT); @@ -5184,6 +5197,38 @@ return (error == 0); } +/* + * VOP_FPLOOKUP_VEXEC routines are subject to special circumstances, see + * the comment above cache_fplookup for details. + * + * We never deny as priv_check_cred calls are not yet supported, see vaccess. + */ +int +vaccess_vexec_smr(mode_t file_mode, uid_t file_uid, gid_t file_gid, struct ucred *cred) +{ + + SMR_ASSERT_ENTERED(vfs_smr); + + /* Check the owner. */ + if (cred->cr_uid == file_uid) { + if (file_mode & S_IXUSR) + return (0); + return (EAGAIN); + } + + /* Otherwise, check the groups (first match) */ + if (groupmember(file_gid, cred)) { + if (file_mode & S_IXGRP) + return (0); + return (EAGAIN); + } + + /* Otherwise, check everyone else. */ + if (file_mode & S_IXOTH) + return (0); + return (EAGAIN); +} + /* * Common filesystem object access control check routine. Accepts a * vnode's type, "mode", uid and gid, requested access mode, credentials, @@ -5464,6 +5509,14 @@ ASSERT_VOP_LOCKED(a->a_tvp, "vop_rename: tvp not locked"); ASSERT_VOP_LOCKED(a->a_tdvp, "vop_rename: tdvp not locked"); #endif + /* + * It may be tempting to add vfs_seqc_write_begin/end calls here and + * in vop_rename_post but that's not going to work out since some + * filesystems relookup vnodes mid-rename. This is probably a bug. + * + * For now filesystems are expected to do the relevant calls after they + * decide what vnodes to operate on. + */ if (a->a_tdvp != a->a_fdvp) vhold(a->a_fdvp); if (a->a_tvp != a->a_fvp) @@ -5553,11 +5606,26 @@ VFS_KNOTE_LOCKED(a->a_dvp, NOTE_WRITE); } +void +vop_deleteextattr_pre(void *ap) +{ + struct vop_deleteextattr_args *a; + struct vnode *vp; + + a = ap; + vp = a->a_vp; + vfs_seqc_write_begin(vp); +} + void vop_deleteextattr_post(void *ap, int rc) { - struct vop_deleteextattr_args *a = ap; + struct vop_deleteextattr_args *a; + struct vnode *vp; + a = ap; + vp = a->a_vp; + vfs_seqc_write_end(vp); if (!rc) VFS_KNOTE_LOCKED(a->a_vp, NOTE_ATTRIB); } @@ -5660,22 +5728,74 @@ } } +void +vop_setattr_pre(void *ap) +{ + struct vop_setattr_args *a; + struct vnode *vp; + + a = ap; + vp = a->a_vp; + vfs_seqc_write_begin(vp); +} + void vop_setattr_post(void *ap, int rc) { - struct vop_setattr_args *a = ap; + struct vop_setattr_args *a; + struct vnode *vp; + a = ap; + vp = a->a_vp; + vfs_seqc_write_end(vp); if (!rc) - VFS_KNOTE_LOCKED(a->a_vp, NOTE_ATTRIB); + VFS_KNOTE_LOCKED(vp, NOTE_ATTRIB); +} + +void +vop_setacl_pre(void *ap) +{ + struct vop_setacl_args *a; + struct vnode *vp; + + a = ap; + vp = a->a_vp; + vfs_seqc_write_begin(vp); +} + +void +vop_setacl_post(void *ap, int rc __unused) +{ + struct vop_setacl_args *a; + struct vnode *vp; + + a = ap; + vp = a->a_vp; + vfs_seqc_write_end(vp); +} + +void +vop_setextattr_pre(void *ap) +{ + struct vop_setextattr_args *a; + struct vnode *vp; + + a = ap; + vp = a->a_vp; + vfs_seqc_write_begin(vp); } void vop_setextattr_post(void *ap, int rc) { - struct vop_setextattr_args *a = ap; + struct vop_setextattr_args *a; + struct vnode *vp; + a = ap; + vp = a->a_vp; + vfs_seqc_write_end(vp); if (!rc) - VFS_KNOTE_LOCKED(a->a_vp, NOTE_ATTRIB); + VFS_KNOTE_LOCKED(vp, NOTE_ATTRIB); } void @@ -6237,6 +6357,8 @@ */ MPASS(mp->mnt_vfs_ops > 0); vp = mp->mnt_rootvnode; + if (vp != NULL) + vfs_seqc_write_begin(vp); mp->mnt_rootvnode = NULL; return (vp); } @@ -6533,3 +6655,45 @@ return (VOP_ACCESS(vp, VEXEC, cnp->cn_cred, cnp->cn_thread)); } + +void +vfs_seqc_write_begin_locked(struct vnode *vp) +{ + + ASSERT_VI_LOCKED(vp, __func__); + VNPASS(vp->v_holdcnt > 0, vp); + VNPASS(vp->v_seqc_users >= 0, vp); + vp->v_seqc_users++; + if (vp->v_seqc_users == 1) + seqc_sleepable_write_begin(&vp->v_seqc); +} + +void +vfs_seqc_write_begin(struct vnode *vp) +{ + + VI_LOCK(vp); + vfs_seqc_write_begin_locked(vp); + VI_UNLOCK(vp); +} + +void +vfs_seqc_write_end_locked(struct vnode *vp) +{ + + ASSERT_VI_LOCKED(vp, __func__); + VNPASS(vp->v_holdcnt > 0, vp); + VNPASS(vp->v_seqc_users > 0, vp); + vp->v_seqc_users--; + if (vp->v_seqc_users == 0) + seqc_sleepable_write_end(&vp->v_seqc); +} + +void +vfs_seqc_write_end(struct vnode *vp) +{ + + VI_LOCK(vp); + vfs_seqc_write_end_locked(vp); + VI_UNLOCK(vp); +} Index: sys/kern/vnode_if.src =================================================================== --- sys/kern/vnode_if.src +++ sys/kern/vnode_if.src @@ -142,6 +142,15 @@ }; +%% fplookup_vexec vp - - - + +vop_fplookup_vexec { + IN struct vnode *vp; + IN struct ucred *cred; + IN struct thread *td; +}; + + %% access vp L L L vop_access { @@ -172,6 +181,7 @@ %% setattr vp E E E +%! setattr pre vop_setattr_pre %! setattr post vop_setattr_post vop_setattr { @@ -523,6 +533,8 @@ %% setacl vp E E E +%! setacl pre vop_setacl_pre +%! setacl post vop_setacl_post vop_setacl { IN struct vnode *vp; @@ -589,6 +601,7 @@ %% deleteextattr vp E E E +%! deleteextattr pre vop_deleteextattr_pre %! deleteextattr post vop_deleteextattr_post vop_deleteextattr { @@ -601,6 +614,7 @@ %% setextattr vp E E E +%! setextattr pre vop_setextattr_pre %! setextattr post vop_setextattr_post vop_setextattr { Index: sys/security/mac/mac_framework.h =================================================================== --- sys/security/mac/mac_framework.h +++ sys/security/mac/mac_framework.h @@ -422,13 +422,14 @@ int mac_vnode_check_lookup_impl(struct ucred *cred, struct vnode *dvp, struct componentname *cnp); extern bool mac_vnode_check_lookup_fp_flag; +#define mac_vnode_check_lookup_enabled() __predict_false(mac_vnode_check_lookup_fp_flag) static inline int mac_vnode_check_lookup(struct ucred *cred, struct vnode *dvp, struct componentname *cnp) { mac_vnode_assert_locked(dvp, "mac_vnode_check_lookup"); - if (__predict_false(mac_vnode_check_lookup_fp_flag)) + if (mac_vnode_check_lookup_enabled()) return (mac_vnode_check_lookup_impl(cred, dvp, cnp)); return (0); } Index: sys/sys/_seqc.h =================================================================== --- /dev/null +++ sys/sys/_seqc.h @@ -0,0 +1,6 @@ +#ifndef _SYS__SEQC_H_ +#define _SYS__SEQC_H_ + +typedef uint32_t seqc_t; + +#endif /* _SYS__SEQC_H */ Index: sys/sys/filedesc.h =================================================================== --- sys/sys/filedesc.h +++ sys/sys/filedesc.h @@ -289,6 +289,7 @@ smr_serialized_store(&fdp->fd_pwd, newpwd, (FILEDESC_XLOCK_ASSERT(fdp), true)); } +struct pwd *pwd_get_smr(void); #endif /* _KERNEL */ Index: sys/sys/mount.h =================================================================== --- sys/sys/mount.h +++ sys/sys/mount.h @@ -415,6 +415,7 @@ #define MNTK_TEXT_REFS 0x00008000 /* Keep use ref for text */ #define MNTK_VMSETSIZE_BUG 0x00010000 #define MNTK_UNIONFS 0x00020000 /* A hack for F_ISUNIONSTACK */ +#define MNTK_FPLOOKUP 0x00040000 /* fast path lookup is supported */ #define MNTK_NOASYNC 0x00800000 /* disable async */ #define MNTK_UNMOUNT 0x01000000 /* unmount in progress */ #define MNTK_MWAIT 0x02000000 /* waiting for unmount to finish */ Index: sys/sys/seqc.h =================================================================== --- sys/sys/seqc.h +++ sys/sys/seqc.h @@ -36,7 +36,7 @@ /* * seqc_t may be included in structs visible to userspace */ -typedef uint32_t seqc_t; +#include #ifdef _KERNEL @@ -110,5 +110,22 @@ return (seqc_consistent_nomb(seqcp, oldseqc)); } +static __inline void +seqc_sleepable_write_begin(seqc_t *seqcp) +{ + + MPASS(!seqc_in_modify(*seqcp)); + *seqcp += 1; + atomic_thread_fence_rel(); +} + +static __inline void +seqc_sleepable_write_end(seqc_t *seqcp) +{ + + atomic_store_rel_int(seqcp, *seqcp + 1); + MPASS(!seqc_in_modify(*seqcp)); +} + #endif /* _KERNEL */ #endif /* _SYS_SEQC_H_ */ Index: sys/sys/vnode.h =================================================================== --- sys/sys/vnode.h +++ sys/sys/vnode.h @@ -45,6 +45,7 @@ #include #include #include +#include /* * The vnode is the focus of all file activity in UNIX. There is a @@ -105,6 +106,7 @@ */ enum vtype v_type:8; /* u vnode type */ short v_irflag; /* i frequently read flags */ + seqc_t v_seqc; /* i modification count */ struct vop_vector *v_op; /* u vnode operations vector */ void *v_data; /* u private data for fs */ @@ -175,6 +177,7 @@ short v_dbatchcpu; /* i LRU requeue deferral batch */ int v_writecount; /* I ref count of writers or (negative) text users */ + int v_seqc_users; /* i modifications pending */ u_int v_hash; }; @@ -538,6 +541,18 @@ #define ASSERT_VOP_LOCKED(vp, str) assert_vop_locked((vp), (str)) #define ASSERT_VOP_UNLOCKED(vp, str) assert_vop_unlocked((vp), (str)) +#define ASSERT_VOP_IN_SEQC(vp) do { \ + struct vnode *_vp = (vp); \ + \ + VNPASS(seqc_in_modify(_vp->v_seqc), _vp); \ +} while (0) + +#define ASSERT_VOP_NOT_IN_SEQC(vp) do { \ + struct vnode *_vp = (vp); \ + \ + VNPASS(!seqc_in_modify(_vp->v_seqc), _vp); \ +} while (0) + #else /* !DEBUG_VFS_LOCKS */ #define ASSERT_VI_LOCKED(vp, str) ((void)0) @@ -545,6 +560,10 @@ #define ASSERT_VOP_ELOCKED(vp, str) ((void)0) #define ASSERT_VOP_LOCKED(vp, str) ((void)0) #define ASSERT_VOP_UNLOCKED(vp, str) ((void)0) + +#define ASSERT_VOP_IN_SEQC(vp) ((void)0) +#define ASSERT_VOP_NOT_IN_SEQC(vp) ((void)0) + #endif /* DEBUG_VFS_LOCKS */ @@ -601,6 +620,7 @@ struct vattr; struct vfsops; struct vnode; +struct pwd; typedef int (*vn_get_ino_t)(struct mount *, void *, int, struct vnode **); @@ -613,11 +633,16 @@ void cache_enter_time(struct vnode *dvp, struct vnode *vp, struct componentname *cnp, struct timespec *tsp, struct timespec *dtsp); +int cache_fplookup(struct nameidata *ndp, bool *handled); int cache_lookup(struct vnode *dvp, struct vnode **vpp, struct componentname *cnp, struct timespec *tsp, int *ticksp); void cache_purge(struct vnode *vp); void cache_purge_negative(struct vnode *vp); void cache_purgevfs(struct mount *mp, bool force); +void vfs_seqc_write_begin_locked(struct vnode *vp); +void vfs_seqc_write_begin(struct vnode *vp); +void vfs_seqc_write_end_locked(struct vnode *vp); +void vfs_seqc_write_end(struct vnode *vp); int change_dir(struct vnode *vp, struct thread *td); void cvtstat(struct stat *st, struct ostat *ost); void freebsd11_cvtnstat(struct stat *sb, struct nstat *nsb); @@ -643,6 +668,8 @@ int vn_commname(struct vnode *vn, char *buf, u_int buflen); int vn_path_to_global_path(struct thread *td, struct vnode *vp, char *path, u_int pathlen); +int vaccess_vexec_smr(mode_t file_mode, uid_t file_uid, gid_t file_gid, + struct ucred *cred); int vaccess(enum vtype type, mode_t file_mode, uid_t file_uid, gid_t file_gid, accmode_t accmode, struct ucred *cred, int *privused); @@ -662,6 +689,7 @@ enum vgetstate vget_prep_smr(struct vnode *vp); enum vgetstate vget_prep(struct vnode *vp); int vget_finish(struct vnode *vp, int flags, enum vgetstate vs); +void vget_abort(struct vnode *vp, enum vgetstate vs); void vgone(struct vnode *vp); void vhold(struct vnode *); void vholdl(struct vnode *); @@ -804,6 +832,7 @@ /* These are called from within the actual VOPS. */ void vop_close_post(void *a, int rc); void vop_create_post(void *a, int rc); +void vop_deleteextattr_pre(void *a); void vop_deleteextattr_post(void *a, int rc); void vop_link_post(void *a, int rc); void vop_lookup_post(void *a, int rc); @@ -818,7 +847,11 @@ void vop_rename_post(void *a, int rc); void vop_rename_pre(void *a); void vop_rmdir_post(void *a, int rc); +void vop_setattr_pre(void *a); void vop_setattr_post(void *a, int rc); +void vop_setacl_pre(void *a); +void vop_setacl_post(void *a, int rc); +void vop_setextattr_pre(void *a); void vop_setextattr_post(void *a, int rc); void vop_symlink_post(void *a, int rc); int vop_sigdefer(struct vop_vector *vop, struct vop_generic_args *a);