Changeset View
Changeset View
Standalone View
Standalone View
sys/kern/vfs_cache.c
Show First 20 Lines • Show All 49 Lines • ▼ Show 20 Lines | |||||
#include <sys/ktr.h> | #include <sys/ktr.h> | ||||
#include <sys/lock.h> | #include <sys/lock.h> | ||||
#include <sys/malloc.h> | #include <sys/malloc.h> | ||||
#include <sys/fcntl.h> | #include <sys/fcntl.h> | ||||
#include <sys/mount.h> | #include <sys/mount.h> | ||||
#include <sys/namei.h> | #include <sys/namei.h> | ||||
#include <sys/proc.h> | #include <sys/proc.h> | ||||
#include <sys/rwlock.h> | #include <sys/rwlock.h> | ||||
#include <sys/seqc.h> | |||||
#include <sys/sdt.h> | #include <sys/sdt.h> | ||||
#include <sys/smr.h> | #include <sys/smr.h> | ||||
#include <sys/smp.h> | #include <sys/smp.h> | ||||
#include <sys/syscallsubr.h> | #include <sys/syscallsubr.h> | ||||
#include <sys/sysctl.h> | #include <sys/sysctl.h> | ||||
#include <sys/sysproto.h> | #include <sys/sysproto.h> | ||||
#include <sys/vnode.h> | #include <sys/vnode.h> | ||||
#include <ck_queue.h> | #include <ck_queue.h> | ||||
#ifdef KTRACE | #ifdef KTRACE | ||||
#include <sys/ktrace.h> | #include <sys/ktrace.h> | ||||
#endif | #endif | ||||
#include <sys/capsicum.h> | |||||
#include <security/audit/audit.h> | |||||
#include <security/mac/mac_framework.h> | |||||
#ifdef DDB | #ifdef DDB | ||||
#include <ddb/ddb.h> | #include <ddb/ddb.h> | ||||
#endif | #endif | ||||
#include <vm/uma.h> | #include <vm/uma.h> | ||||
SDT_PROVIDER_DECLARE(vfs); | SDT_PROVIDER_DECLARE(vfs); | ||||
SDT_PROBE_DEFINE3(vfs, namecache, enter, done, "struct vnode *", "char *", | SDT_PROBE_DEFINE3(vfs, namecache, enter, done, "struct vnode *", "char *", | ||||
Show All 17 Lines | |||||
SDT_PROBE_DEFINE1(vfs, namecache, purgevfs, done, "struct mount *"); | SDT_PROBE_DEFINE1(vfs, namecache, purgevfs, done, "struct mount *"); | ||||
SDT_PROBE_DEFINE3(vfs, namecache, zap, done, "struct vnode *", "char *", | SDT_PROBE_DEFINE3(vfs, namecache, zap, done, "struct vnode *", "char *", | ||||
"struct vnode *"); | "struct vnode *"); | ||||
SDT_PROBE_DEFINE2(vfs, namecache, zap_negative, done, "struct vnode *", | SDT_PROBE_DEFINE2(vfs, namecache, zap_negative, done, "struct vnode *", | ||||
"char *"); | "char *"); | ||||
SDT_PROBE_DEFINE2(vfs, namecache, shrink_negative, done, "struct vnode *", | SDT_PROBE_DEFINE2(vfs, namecache, shrink_negative, done, "struct vnode *", | ||||
"char *"); | "char *"); | ||||
SDT_PROBE_DEFINE3(vfs, fplookup, lookup, done, "struct nameidata", "int", "bool"); | |||||
/* | /* | ||||
* This structure describes the elements in the cache of recent | * This structure describes the elements in the cache of recent | ||||
* names looked up by namei. | * names looked up by namei. | ||||
*/ | */ | ||||
struct namecache { | struct namecache { | ||||
CK_LIST_ENTRY(namecache) nc_hash;/* hash chain */ | CK_LIST_ENTRY(namecache) nc_hash;/* hash chain */ | ||||
LIST_ENTRY(namecache) nc_src; /* source vnode list */ | LIST_ENTRY(namecache) nc_src; /* source vnode list */ | ||||
▲ Show 20 Lines • Show All 2,701 Lines • ▼ Show 20 Lines | if (!have_addr) { | ||||
return; | return; | ||||
} | } | ||||
vp = (struct vnode *)addr; | vp = (struct vnode *)addr; | ||||
db_print_vpath(vp); | db_print_vpath(vp); | ||||
} | } | ||||
#endif | #endif | ||||
extern uma_zone_t namei_zone; | |||||
static bool __read_frequently cache_fast_lookup = true; | |||||
SYSCTL_BOOL(_vfs, OID_AUTO, cache_fast_lookup, CTLFLAG_RW, | |||||
&cache_fast_lookup, 0, ""); | |||||
#define CACHE_FPL_UNHANDLED -2020 | |||||
static void | |||||
cache_fpl_cleanup_cnp(struct componentname *cnp) | |||||
{ | |||||
uma_zfree(namei_zone, cnp->cn_pnbuf); | |||||
#ifdef DIAGNOSTIC | |||||
cnp->cn_pnbuf = NULL; | |||||
cnp->cn_nameptr = NULL; | |||||
#endif | |||||
} | |||||
static void | |||||
cache_fpl_handle_root(struct nameidata *ndp, struct vnode **dpp) | |||||
{ | |||||
struct componentname *cnp; | |||||
cnp = &ndp->ni_cnd; | |||||
while (*(cnp->cn_nameptr) == '/') { | |||||
cnp->cn_nameptr++; | |||||
ndp->ni_pathlen--; | |||||
} | |||||
} | |||||
static void | |||||
cache_fpl_handle_root_initial(struct nameidata *ndp, struct vnode **dpp) | |||||
{ | |||||
cache_fpl_handle_root(ndp, dpp); | |||||
*dpp = ndp->ni_rootdir; | |||||
} | |||||
enum cache_fpl_status { CACHE_FPL_STATUS_UNHANDLED, CACHE_FPL_STATUS_HANDLED, | |||||
CACHE_FPL_STATUS_UNSET }; | |||||
struct cache_fpl { | |||||
int line; | |||||
enum cache_fpl_status handled; | |||||
bool in_smr; | |||||
struct nameidata *ndp; | |||||
struct componentname *cnp; | |||||
struct vnode *dvp; | |||||
seqc_t dvp_seqc; | |||||
struct vnode *tvp; | |||||
seqc_t tvp_seqc; | |||||
}; | |||||
#define cache_fpl_smr_assert_not_entered(fpl) ({ \ | |||||
struct cache_fpl *_fpl = (fpl); \ | |||||
VFS_SMR_ASSERT_NOT_ENTERED(); \ | |||||
MPASS(_fpl->in_smr == false); \ | |||||
}) | |||||
jeff: I assume you want FILE:LINE to propagate all the way through. | |||||
#define cache_fpl_smr_enter(fpl) ({ \ | |||||
struct cache_fpl *_fpl = (fpl); \ | |||||
MPASS(_fpl->in_smr == false); \ | |||||
vfs_smr_enter(); \ | |||||
_fpl->in_smr = true; \ | |||||
}) | |||||
#define cache_fpl_smr_exit(fpl) ({ \ | |||||
struct cache_fpl *_fpl = (fpl); \ | |||||
MPASS(_fpl->in_smr == true); \ | |||||
vfs_smr_exit(); \ | |||||
_fpl->in_smr = false; \ | |||||
}) | |||||
static int | |||||
cache_fpl_unhandled_impl(struct cache_fpl *fpl, int line) | |||||
{ | |||||
KASSERT(fpl->handled == CACHE_FPL_STATUS_UNSET, | |||||
("%s: lookup status already set at %d\n", __func__, fpl->line)); | |||||
fpl->handled = CACHE_FPL_STATUS_UNHANDLED; | |||||
fpl->line = line; | |||||
return (CACHE_FPL_UNHANDLED); | |||||
} | |||||
#define cache_fpl_unhandled(x) cache_fpl_unhandled_impl((x), __LINE__) | |||||
static int | |||||
cache_fpl_handled_impl(struct cache_fpl *fpl, int error, int line) | |||||
{ | |||||
KASSERT(fpl->handled == CACHE_FPL_STATUS_UNSET, | |||||
("%s: lookup status already set at %d\n", __func__, fpl->line)); | |||||
fpl->handled = CACHE_FPL_STATUS_HANDLED; | |||||
fpl->line = line; | |||||
return (error); | |||||
} | |||||
#define cache_fpl_handled(x, e) cache_fpl_handled_impl((x), (e), __LINE__) | |||||
#define CACHE_FPL_SUPPORTED_CN_FLAGS \ | |||||
(LOCKLEAF | FOLLOW | LOCKSHARED | SAVENAME | ISOPEN | AUDITVNODE1) | |||||
static bool | |||||
cache_can_fplookup(struct cache_fpl *fpl) | |||||
{ | |||||
struct nameidata *ndp; | |||||
struct componentname *cnp; | |||||
struct thread *td; | |||||
ndp = fpl->ndp; | |||||
cnp = fpl->cnp; | |||||
td = cnp->cn_thread; | |||||
if (!cache_fast_lookup) { | |||||
cache_fpl_unhandled(fpl); | |||||
return (false); | |||||
} | |||||
if (mac_vnode_check_lookup_enabled()) { | |||||
cache_fpl_unhandled(fpl); | |||||
return (false); | |||||
} | |||||
if (cnp->cn_flags & ~CACHE_FPL_SUPPORTED_CN_FLAGS) { | |||||
cache_fpl_unhandled(fpl); | |||||
return (false); | |||||
} | |||||
if ((cnp->cn_flags & LOCKLEAF) == 0) { | |||||
cache_fpl_unhandled(fpl); | |||||
return (false); | |||||
} | |||||
if (cnp->cn_nameiop != LOOKUP) { | |||||
cache_fpl_unhandled(fpl); | |||||
return (false); | |||||
} | |||||
if (ndp->ni_dirfd != AT_FDCWD) { | |||||
cache_fpl_unhandled(fpl); | |||||
return (false); | |||||
} | |||||
if (IN_CAPABILITY_MODE(td)) { | |||||
cache_fpl_unhandled(fpl); | |||||
return (false); | |||||
} | |||||
if (AUDITING_TD(td)) { | |||||
cache_fpl_unhandled(fpl); | |||||
return (false); | |||||
} | |||||
if (ndp->ni_startdir != NULL) { | |||||
cache_fpl_unhandled(fpl); | |||||
return (false); | |||||
} | |||||
return (true); | |||||
} | |||||
/* | |||||
* TODO | |||||
* | |||||
* Save and restore nameidata in case we need to fall back to the regular lookup. | |||||
* | |||||
* Everything which is modified (some direct fields and cnp) is covered, but the | |||||
* copy below is overzealous for simplicity. It can be modified to only save few | |||||
* fields. | |||||
*/ | |||||
static void | |||||
cache_save_nameidata(struct nameidata *ndp, struct nameidata *saved) | |||||
{ | |||||
*saved = *ndp; | |||||
} | |||||
static void | |||||
cache_restore_nameidata(struct nameidata *ndp, struct nameidata *saved) | |||||
{ | |||||
*ndp = *saved; | |||||
} | |||||
static bool | |||||
cache_fplookup_vnode_supported(struct vnode *vp) | |||||
{ | |||||
switch (vp->v_type) { | |||||
case VLNK: | |||||
return (false); | |||||
default: | |||||
break; | |||||
} | |||||
return (true); | |||||
} | |||||
static int | |||||
cache_fplookup_final(struct cache_fpl *fpl) | |||||
{ | |||||
struct componentname *cnp; | |||||
enum vgetstate tvs; | |||||
struct vnode *dvp, *tvp; | |||||
seqc_t dvp_seqc, tvp_seqc; | |||||
int error; | |||||
cnp = fpl->cnp; | |||||
dvp = fpl->dvp; | |||||
dvp_seqc = fpl->dvp_seqc; | |||||
tvp = fpl->tvp; | |||||
tvp_seqc = fpl->tvp_seqc; | |||||
VNPASS(cache_fplookup_vnode_supported(dvp), dvp); | |||||
MPASS((cnp->cn_flags & LOCKLEAF) != 0); | |||||
tvs = vget_prep_smr(tvp); | |||||
if (tvs == VGET_NONE) { | |||||
return (cache_fpl_unhandled(fpl)); | |||||
} | |||||
if (!seqc_consistent(&dvp->v_seqc, dvp_seqc)) { | |||||
cache_fpl_smr_exit(fpl); | |||||
vget_abort(tvp, tvs); | |||||
return (cache_fpl_unhandled(fpl)); | |||||
} | |||||
cache_fpl_smr_exit(fpl); | |||||
error = vget_finish(tvp, cnp->cn_lkflags, tvs); | |||||
if (error != 0) { | |||||
return (cache_fpl_unhandled(fpl)); | |||||
} | |||||
if (!seqc_consistent(&tvp->v_seqc, tvp_seqc)) { | |||||
vput(tvp); | |||||
return (cache_fpl_unhandled(fpl)); | |||||
} | |||||
return (cache_fpl_handled(fpl, 0)); | |||||
} | |||||
static int | |||||
cache_fplookup_next(struct cache_fpl *fpl) | |||||
{ | |||||
struct componentname *cnp; | |||||
struct namecache *ncp; | |||||
struct vnode *dvp, *tvp; | |||||
u_char nc_flag; | |||||
uint32_t hash; | |||||
cnp = fpl->cnp; | |||||
dvp = fpl->dvp; | |||||
if (__predict_false(cnp->cn_namelen == 1 && cnp->cn_nameptr[0] == '.')) { | |||||
fpl->tvp = dvp; | |||||
fpl->tvp_seqc = seqc_read_any(&dvp->v_seqc); | |||||
if (seqc_in_modify(fpl->tvp_seqc)) { | |||||
return (cache_fpl_unhandled(fpl)); | |||||
} | |||||
return (0); | |||||
} | |||||
hash = cache_get_hash(cnp->cn_nameptr, cnp->cn_namelen, dvp); | |||||
CK_LIST_FOREACH(ncp, (NCHHASH(hash)), nc_hash) { | |||||
counter_u64_add(numchecks, 1); | |||||
if (ncp->nc_dvp == dvp && ncp->nc_nlen == cnp->cn_namelen && | |||||
!bcmp(ncp->nc_name, cnp->cn_nameptr, ncp->nc_nlen)) | |||||
break; | |||||
} | |||||
fpl->tvp = (void *)0xdeadbeef; | |||||
/* | |||||
* If there is no entry we have to punt to the slow path to perform | |||||
* actual lookup. Should there be nothing with this name a negative | |||||
* entry will be created. | |||||
*/ | |||||
if (__predict_false(ncp == NULL)) { | |||||
return (cache_fpl_unhandled(fpl)); | |||||
} | |||||
tvp = atomic_load_ptr(&ncp->nc_vp); | |||||
nc_flag = atomic_load_char(&ncp->nc_flag); | |||||
if (__predict_false(cache_ncp_invalid(ncp))) { | |||||
return (cache_fpl_unhandled(fpl)); | |||||
} | |||||
if (__predict_false(nc_flag & NCF_WHITE)) { | |||||
return (cache_fpl_unhandled(fpl)); | |||||
} | |||||
fpl->tvp = tvp; | |||||
if (nc_flag & NCF_NEGATIVE) { | |||||
if ((nc_flag & NCF_HOTNEGATIVE) == 0) { | |||||
/* | |||||
* TODO | |||||
* Promoting to hot negative requires locks which are | |||||
* not yet supported for simplicity. | |||||
*/ | |||||
return (cache_fpl_unhandled(fpl)); | |||||
} | |||||
SDT_PROBE2(vfs, namecache, lookup, hit__negative, dvp, | |||||
ncp->nc_name); | |||||
counter_u64_add(numneghits, 1); | |||||
cache_fpl_smr_exit(fpl); | |||||
return (cache_fpl_handled(fpl, ENOENT)); | |||||
} | |||||
fpl->tvp_seqc = seqc_read_any(&tvp->v_seqc); | |||||
if (seqc_in_modify(fpl->tvp_seqc)) { | |||||
return (cache_fpl_unhandled(fpl)); | |||||
} | |||||
if (!cache_fplookup_vnode_supported(tvp)) { | |||||
return (cache_fpl_unhandled(fpl)); | |||||
} | |||||
counter_u64_add(numposhits, 1); | |||||
SDT_PROBE3(vfs, namecache, lookup, hit, dvp, ncp->nc_name, tvp); | |||||
return (0); | |||||
} | |||||
static bool | |||||
cache_fplookup_mp_supported(struct mount *mp) | |||||
{ | |||||
if (mp == NULL) | |||||
return (false); | |||||
if ((mp->mnt_kern_flag & MNTK_FPLOOKUP) == 0) | |||||
return (false); | |||||
if (mp->mnt_flag & MNT_UNION) | |||||
return (false); | |||||
return (true); | |||||
} | |||||
/* | |||||
* Walk up the mount stack (if any). | |||||
* | |||||
* Correctness is provided in the following ways: | |||||
* - all vnodes are protected from freeing with SMR | |||||
* - struct mount objects are type stable making them always safe to access | |||||
* - stability of the particular mount is provided by busying it | |||||
* - relationship between the vnode which is mounted on and the mount is | |||||
* verified with the vnode sequence counter after busying | |||||
* - association between root vnode of the mount and the mount is protected | |||||
* by busy | |||||
* | |||||
* From that point on we can read the sequence counter of the root vnode | |||||
* and get the next mount on the stack (if any) using the same protection. | |||||
* | |||||
* By the end of successful walk we are guaranteed the reached state was | |||||
* indeed present at least at some point which matches the regular lookup. | |||||
*/ | |||||
static int | |||||
cache_fplookup_climb_mount(struct cache_fpl *fpl) | |||||
{ | |||||
struct mount *mp, *prev_mp; | |||||
struct vnode *vp; | |||||
seqc_t vp_seqc; | |||||
vp = fpl->tvp; | |||||
vp_seqc = fpl->tvp_seqc; | |||||
if (vp->v_type != VDIR) | |||||
return (0); | |||||
mp = atomic_load_ptr(&vp->v_mountedhere); | |||||
if (mp == NULL) | |||||
return (0); | |||||
prev_mp = NULL; | |||||
for (;;) { | |||||
if (!vfs_op_thread_enter(mp)) { | |||||
if (prev_mp != NULL) | |||||
vfs_op_thread_exit(prev_mp); | |||||
return (cache_fpl_unhandled(fpl)); | |||||
} | |||||
if (prev_mp != NULL) | |||||
vfs_op_thread_exit(prev_mp); | |||||
if (!seqc_consistent(&vp->v_seqc, vp_seqc)) { | |||||
vfs_op_thread_exit(mp); | |||||
return (cache_fpl_unhandled(fpl)); | |||||
} | |||||
if (!cache_fplookup_mp_supported(mp)) { | |||||
vfs_op_thread_exit(mp); | |||||
return (cache_fpl_unhandled(fpl)); | |||||
} | |||||
vp = atomic_load_ptr(&mp->mnt_rootvnode); | |||||
if (vp == NULL || VN_IS_DOOMED(vp)) { | |||||
vfs_op_thread_exit(mp); | |||||
return (cache_fpl_unhandled(fpl)); | |||||
} | |||||
vp_seqc = seqc_read_any(&vp->v_seqc); | |||||
if (seqc_in_modify(vp_seqc)) { | |||||
vfs_op_thread_exit(mp); | |||||
return (cache_fpl_unhandled(fpl)); | |||||
} | |||||
prev_mp = mp; | |||||
mp = atomic_load_ptr(&vp->v_mountedhere); | |||||
if (mp == NULL) | |||||
break; | |||||
} | |||||
vfs_op_thread_exit(prev_mp); | |||||
fpl->tvp = vp; | |||||
fpl->tvp_seqc = vp_seqc; | |||||
return (0); | |||||
} | |||||
/* | |||||
* Parse the path. | |||||
* | |||||
* The code is mostly copy-pasted from regular lookup, see lookup(). | |||||
* The structure is maintained along with comments for easier maintenance. | |||||
* Deduplicating the code will become feasible after fast path lookup | |||||
* becomes more feature-complete. | |||||
*/ | |||||
static int | |||||
cache_fplookup_parse(struct cache_fpl *fpl) | |||||
{ | |||||
struct nameidata *ndp; | |||||
struct componentname *cnp; | |||||
char *cp; | |||||
char *prev_ni_next; /* saved ndp->ni_next */ | |||||
size_t prev_ni_pathlen; /* saved ndp->ni_pathlen */ | |||||
ndp = fpl->ndp; | |||||
cnp = fpl->cnp; | |||||
/* | |||||
* Search a new directory. | |||||
* | |||||
* The last component of the filename is left accessible via | |||||
* cnp->cn_nameptr for callers that need the name. Callers needing | |||||
* the name set the SAVENAME flag. When done, they assume | |||||
* responsibility for freeing the pathname buffer. | |||||
*/ | |||||
for (cp = cnp->cn_nameptr; *cp != 0 && *cp != '/'; cp++) | |||||
continue; | |||||
cnp->cn_namelen = cp - cnp->cn_nameptr; | |||||
if (cnp->cn_namelen > NAME_MAX) { | |||||
return (cache_fpl_handled(fpl, ENAMETOOLONG)); | |||||
} | |||||
prev_ni_pathlen = ndp->ni_pathlen; | |||||
ndp->ni_pathlen -= cnp->cn_namelen; | |||||
KASSERT(ndp->ni_pathlen <= PATH_MAX, | |||||
("%s: ni_pathlen underflow to %zd\n", __func__, ndp->ni_pathlen)); | |||||
prev_ni_next = ndp->ni_next; | |||||
ndp->ni_next = cp; | |||||
/* | |||||
* Replace multiple slashes by a single slash and trailing slashes | |||||
* by a null. This must be done before VOP_LOOKUP() because some | |||||
* fs's don't know about trailing slashes. Remember if there were | |||||
* trailing slashes to handle symlinks, existing non-directories | |||||
* and non-existing files that won't be directories specially later. | |||||
*/ | |||||
while (*cp == '/' && (cp[1] == '/' || cp[1] == '\0')) { | |||||
cp++; | |||||
ndp->ni_pathlen--; | |||||
if (*cp == '\0') { | |||||
/* | |||||
* TODO | |||||
* Regular lookup performs the following: | |||||
* *ndp->ni_next = '\0'; | |||||
* cnp->cn_flags |= TRAILINGSLASH; | |||||
* | |||||
* Which is problematic since it modifies data read | |||||
* from userspace. Then if fast path lookup was to | |||||
* abort we would have to either restore it or convey | |||||
* the flag. Since this is a corner case just ignore | |||||
* it for simplicity. | |||||
*/ | |||||
return (cache_fpl_unhandled(fpl)); | |||||
} | |||||
} | |||||
ndp->ni_next = cp; | |||||
cnp->cn_flags |= MAKEENTRY; | |||||
if (cnp->cn_namelen == 2 && | |||||
cnp->cn_nameptr[1] == '.' && cnp->cn_nameptr[0] == '.') | |||||
cnp->cn_flags |= ISDOTDOT; | |||||
else | |||||
cnp->cn_flags &= ~ISDOTDOT; | |||||
if (*ndp->ni_next == 0) | |||||
cnp->cn_flags |= ISLASTCN; | |||||
else | |||||
cnp->cn_flags &= ~ISLASTCN; | |||||
/* | |||||
* Check for degenerate name (e.g. / or "") | |||||
* which is a way of talking about a directory, | |||||
* e.g. like "/." or ".". | |||||
* | |||||
* TODO | |||||
* Another corner case handled by the regular lookup | |||||
*/ | |||||
if (__predict_false(cnp->cn_nameptr[0] == '\0')) { | |||||
return (cache_fpl_unhandled(fpl)); | |||||
} | |||||
return (0); | |||||
} | |||||
static void | |||||
cache_fplookup_parse_advance(struct cache_fpl *fpl) | |||||
{ | |||||
struct nameidata *ndp; | |||||
struct componentname *cnp; | |||||
ndp = fpl->ndp; | |||||
cnp = fpl->cnp; | |||||
cnp->cn_nameptr = ndp->ni_next; | |||||
while (*cnp->cn_nameptr == '/') { | |||||
cnp->cn_nameptr++; | |||||
ndp->ni_pathlen--; | |||||
} | |||||
} | |||||
static int | |||||
cache_fplookup_impl(struct vnode *dvp, struct cache_fpl *fpl) | |||||
{ | |||||
struct nameidata *ndp; | |||||
struct componentname *cnp; | |||||
struct mount *mp; | |||||
int error; | |||||
error = CACHE_FPL_UNHANDLED; | |||||
ndp = fpl->ndp; | |||||
ndp->ni_lcf = 0; | |||||
cnp = fpl->cnp; | |||||
cnp->cn_lkflags = LK_SHARED; | |||||
if ((cnp->cn_flags & LOCKSHARED) == 0) | |||||
cnp->cn_lkflags = LK_EXCLUSIVE; | |||||
fpl->dvp = dvp; | |||||
fpl->dvp_seqc = seqc_read_any(&fpl->dvp->v_seqc); | |||||
if (seqc_in_modify(fpl->dvp_seqc)) { | |||||
cache_fpl_unhandled(fpl); | |||||
goto out; | |||||
} | |||||
mp = atomic_load_ptr(&fpl->dvp->v_mount); | |||||
if (!cache_fplookup_mp_supported(mp)) { | |||||
cache_fpl_unhandled(fpl); | |||||
goto out; | |||||
} | |||||
VNPASS(cache_fplookup_vnode_supported(fpl->dvp), fpl->dvp); | |||||
for (;;) { | |||||
error = cache_fplookup_parse(fpl); | |||||
if (__predict_false(error != 0)) { | |||||
break; | |||||
} | |||||
if (cnp->cn_flags & ISDOTDOT) { | |||||
error = cache_fpl_unhandled(fpl); | |||||
break; | |||||
} | |||||
VNPASS(cache_fplookup_vnode_supported(fpl->dvp), fpl->dvp); | |||||
error = VOP_FPLOOKUP_VEXEC(fpl->dvp, cnp->cn_cred, cnp->cn_thread); | |||||
if (__predict_false(error != 0)) { | |||||
switch (error) { | |||||
case EAGAIN: | |||||
case EOPNOTSUPP: /* can happen when racing against vgone */ | |||||
cache_fpl_unhandled(fpl); | |||||
break; | |||||
default: | |||||
/* | |||||
* See the API contract for VOP_FPLOOKUP_VEXEC. | |||||
*/ | |||||
if (!seqc_consistent(&fpl->dvp->v_seqc, fpl->dvp_seqc)) { | |||||
error = cache_fpl_unhandled(fpl); | |||||
} else { | |||||
cache_fpl_handled(fpl, error); | |||||
} | |||||
break; | |||||
} | |||||
break; | |||||
} | |||||
error = cache_fplookup_next(fpl); | |||||
if (__predict_false(error != 0)) { | |||||
break; | |||||
} | |||||
VNPASS(!seqc_in_modify(fpl->tvp_seqc), fpl->tvp); | |||||
error = cache_fplookup_climb_mount(fpl); | |||||
if (__predict_false(error != 0)) { | |||||
break; | |||||
} | |||||
VNPASS(!seqc_in_modify(fpl->tvp_seqc), fpl->tvp); | |||||
if (cnp->cn_flags & ISLASTCN) { | |||||
error = cache_fplookup_final(fpl); | |||||
break; | |||||
} | |||||
if (!seqc_consistent(&fpl->dvp->v_seqc, fpl->dvp_seqc)) { | |||||
error = cache_fpl_unhandled(fpl); | |||||
break; | |||||
} | |||||
fpl->dvp = fpl->tvp; | |||||
fpl->dvp_seqc = fpl->tvp_seqc; | |||||
if (!cache_fplookup_vnode_supported(fpl->dvp)) { | |||||
error = cache_fpl_unhandled(fpl); | |||||
break; | |||||
} | |||||
cache_fplookup_parse_advance(fpl); | |||||
} | |||||
out: | |||||
MPASS(fpl->handled != CACHE_FPL_STATUS_UNSET); | |||||
if (fpl->handled == CACHE_FPL_STATUS_UNHANDLED) { | |||||
if (fpl->in_smr) | |||||
cache_fpl_smr_exit(fpl); | |||||
return (error); | |||||
} | |||||
cache_fpl_smr_assert_not_entered(fpl); | |||||
if (__predict_false(error != 0)) { | |||||
ndp->ni_dvp = NULL; | |||||
ndp->ni_vp = NULL; | |||||
cache_fpl_cleanup_cnp(cnp); | |||||
return (error); | |||||
} | |||||
ndp->ni_dvp = fpl->dvp; | |||||
ndp->ni_vp = fpl->tvp; | |||||
if (cnp->cn_flags & SAVENAME) | |||||
cnp->cn_flags |= HASBUF; | |||||
else | |||||
cache_fpl_cleanup_cnp(cnp); | |||||
return (error); | |||||
} | |||||
/* | |||||
* Fast path lookup protected with SMR and sequence counters. | |||||
* | |||||
* Note: all VOP_FPLOOKUP_VEXEC routines have a comment referencing this one. | |||||
* | |||||
* Filesystems can opt in by setting the MNTK_FPLOOKUP flag and meeting criteria | |||||
* outlined below. | |||||
* | |||||
* Traditional vnode lookup conceptually looks like this: | |||||
* | |||||
* vn_lock(current); | |||||
* for (;;) { | |||||
* next = find(); | |||||
* vn_lock(next); | |||||
* vn_unlock(current); | |||||
* current = next; | |||||
* if (last) | |||||
* break; | |||||
* } | |||||
* | |||||
* Each jump to the next vnode is safe memory-wise and atomic with respect to | |||||
* any modifications thanks to holding respective locks. | |||||
* | |||||
* The same guarantee can be provided with a combination of safe memory | |||||
* reclamation and sequence counters instead. If all operations which affect | |||||
* the relationship between the current vnode and the one we are looking for | |||||
* also modify the counter, we can verify whether all the conditions held as | |||||
* we made the jump. This includes things like permissions, mount point etc. | |||||
* You can grep for vn_seqc_write_begin to check all the places. | |||||
* | |||||
* Thus this translates to: | |||||
* | |||||
* vfs_smr_enter(); | |||||
* current_seqc = seqc_read_any(current); | |||||
* if (seqc_in_modify(current_seqc)) // someone is altering the vnode | |||||
* abort(); | |||||
* for (;;) { | |||||
* next = find(); | |||||
* next_seqc = seqc_read_any(next); | |||||
* if (!seqc_consistent(current, current_seqc) // someone is altering the vnode | |||||
* abort(); | |||||
* current = next; // we know nothing of importance has changed | |||||
* current_seqc = next_seqc; // store the counter for the next iteration | |||||
* if (last) | |||||
* break; | |||||
* } | |||||
* | |||||
* API contract for VOP_FPLOOKUP_VEXEC routines is as follows: | |||||
* - they are called while within vfs_smr protection which they must never exit | |||||
* - EAGAIN can be returned to denote checking could not be performed, it is | |||||
* always valid to return it | |||||
* - if the sequence counter has not changed the result must be valid | |||||
* - if the sequence counter has changed both false positives and false negatives | |||||
* are permitted (since the result will be rejected later) | |||||
* - for simple cases of unix permission checks vaccess_vexec_smr can be used | |||||
* | |||||
* Caveats to watch out for: | |||||
* - vnodes are passed unlocked and unreferenced with nothing stopping | |||||
* VOP_RECLAIM, in turn meaning that ->v_data can become NULL. It is advised | |||||
* to use atomic_load_ptr to fetch it. | |||||
* - aforementioned object can also get freed, meaning absent other means it | |||||
* should be protected with vfs_smr | |||||
* - either safely checking permissions as they are modified or guaranteeing | |||||
* their stability is left to the routine | |||||
*/ | |||||
int | |||||
cache_fplookup(struct nameidata *ndp, bool *handled) | |||||
{ | |||||
struct nameidata saved_ndp; | |||||
struct cache_fpl fpl; | |||||
struct pwd *pwd; | |||||
struct vnode *dvp; | |||||
struct componentname *cnp; | |||||
int error; | |||||
*handled = false; | |||||
bzero(&fpl, sizeof(fpl)); | |||||
fpl.handled = CACHE_FPL_STATUS_UNSET; | |||||
fpl.ndp = ndp; | |||||
fpl.cnp = &ndp->ni_cnd; | |||||
MPASS(curthread == fpl.cnp->cn_thread); | |||||
if (!cache_can_fplookup(&fpl)) { | |||||
SDT_PROBE3(vfs, fplookup, lookup, done, ndp, fpl.line, fpl.handled); | |||||
return (EOPNOTSUPP); | |||||
} | |||||
cache_save_nameidata(ndp, &saved_ndp); | |||||
cache_fpl_smr_enter(&fpl); | |||||
pwd = pwd_get_smr(); | |||||
ndp->ni_rootdir = pwd->pwd_rdir; | |||||
ndp->ni_topdir = pwd->pwd_jdir; | |||||
cnp = fpl.cnp; | |||||
cnp->cn_nameptr = cnp->cn_pnbuf; | |||||
if (cnp->cn_pnbuf[0] == '/') { | |||||
cache_fpl_handle_root_initial(ndp, &dvp); | |||||
} else { | |||||
MPASS(ndp->ni_dirfd == AT_FDCWD); | |||||
dvp = pwd->pwd_cdir; | |||||
} | |||||
error = cache_fplookup_impl(dvp, &fpl); | |||||
cache_fpl_smr_assert_not_entered(&fpl); | |||||
SDT_PROBE3(vfs, fplookup, lookup, done, ndp, fpl.line, fpl.handled); | |||||
MPASS(fpl.handled != CACHE_FPL_STATUS_UNSET); | |||||
if (fpl.handled == CACHE_FPL_STATUS_HANDLED) { | |||||
*handled = true; | |||||
return (error); | |||||
} else { | |||||
cache_restore_nameidata(ndp, &saved_ndp); | |||||
return (error); | |||||
} | |||||
} |
I assume you want FILE:LINE to propagate all the way through.