Index: sys/cddl/compat/opensolaris/kern/opensolaris_vfs.c
===================================================================
--- sys/cddl/compat/opensolaris/kern/opensolaris_vfs.c
+++ sys/cddl/compat/opensolaris/kern/opensolaris_vfs.c
@@ -154,6 +154,7 @@
 		vput(vp);
 		return (error);
 	}
+	vn_seqc_write_begin(vp);
 	VOP_UNLOCK(vp);
 
 	/*
@@ -206,6 +207,7 @@
 		VI_LOCK(vp);
 		vp->v_iflag &= ~VI_MOUNT;
 		VI_UNLOCK(vp);
+		vn_seqc_write_end(vp);
 		vput(vp);
 		vfs_unbusy(mp);
 		vfs_freeopts(mp->mnt_optnew);
@@ -241,6 +243,7 @@
 	vfs_event_signal(NULL, VQ_MOUNT, 0);
 	if (VFS_ROOT(mp, LK_EXCLUSIVE, &mvp))
 		panic("mount: lost mount");
+	vn_seqc_write_end(vp);
 	VOP_UNLOCK(vp);
 	vfs_op_exit(mp);
 	vfs_unbusy(mp);
Index: sys/fs/tmpfs/tmpfs.h
===================================================================
--- sys/fs/tmpfs/tmpfs.h
+++ sys/fs/tmpfs/tmpfs.h
@@ -526,6 +526,14 @@
 	return (node);
 }
 
+static inline struct tmpfs_node *
+VP_TO_TMPFS_NODE_SMR(struct vnode *vp)
+{
+
+	MPASS(vp != NULL);
+	return (atomic_load_ptr(&vp->v_data));
+}
+
 static inline struct tmpfs_node *
 VP_TO_TMPFS_DIR(struct vnode *vp)
 {
Index: sys/fs/tmpfs/tmpfs_subr.c
===================================================================
--- sys/fs/tmpfs/tmpfs_subr.c
+++ sys/fs/tmpfs/tmpfs_subr.c
@@ -75,6 +75,7 @@
 
 static uma_zone_t tmpfs_dirent_pool;
 static uma_zone_t tmpfs_node_pool;
+VFS_SMR_DECLARE;
 
 static int
 tmpfs_node_ctor(void *mem, int size, void *arg, int flags)
@@ -131,6 +132,7 @@
 	tmpfs_node_pool = uma_zcreate("TMPFS node",
 	    sizeof(struct tmpfs_node), tmpfs_node_ctor, tmpfs_node_dtor,
 	    tmpfs_node_init, tmpfs_node_fini, UMA_ALIGN_PTR, 0);
+	VFS_SMR_ZONE_SET(tmpfs_node_pool);
 }
 
 void
@@ -288,7 +290,7 @@
 	if ((mp->mnt_kern_flag & MNT_RDONLY) != 0)
 		return (EROFS);
 
-	nnode = uma_zalloc_arg(tmpfs_node_pool, tmp, M_WAITOK);
+	nnode = uma_zalloc_smr(tmpfs_node_pool, M_WAITOK);
 
 	/* Generic initialization. */
 	nnode->tn_type = type;
@@ -435,7 +437,7 @@
 		panic("tmpfs_free_node: type %p %d", node, (int)node->tn_type);
 	}
 
-	uma_zfree(tmpfs_node_pool, node);
+	uma_zfree_smr(tmpfs_node_pool, node);
 	TMPFS_LOCK(tmp);
 	tmpfs_free_tmp(tmp);
 	return (true);
@@ -1619,10 +1621,11 @@
 int
 tmpfs_chmod(struct vnode *vp, mode_t mode, struct ucred *cred, struct thread *p)
 {
-	int error;
+	int error, newmode;
 	struct tmpfs_node *node;
 
 	ASSERT_VOP_ELOCKED(vp, "chmod");
+	ASSERT_VOP_IN_SEQC(vp);
 
 	node = VP_TO_TMPFS_NODE(vp);
 
@@ -1656,9 +1659,9 @@
 			return (error);
 	}
 
-
-	node->tn_mode &= ~ALLPERMS;
-	node->tn_mode |= mode & ALLPERMS;
+	newmode = node->tn_mode & ~ALLPERMS;
+	newmode |= mode & ALLPERMS;
+	atomic_store_int(&node->tn_mode, newmode);
 
 	node->tn_status |= TMPFS_NODE_CHANGED;
 
@@ -1684,6 +1687,7 @@
 	gid_t ogid;
 
 	ASSERT_VOP_ELOCKED(vp, "chown");
+	ASSERT_VOP_IN_SEQC(vp);
 
 	node = VP_TO_TMPFS_NODE(vp);
 
@@ -1730,7 +1734,7 @@
 
 	if ((node->tn_mode & (S_ISUID | S_ISGID)) && (ouid != uid || ogid != gid)) {
 		if (priv_check_cred(cred, PRIV_VFS_RETAINSUGID))
-			node->tn_mode &= ~(S_ISUID | S_ISGID);
+			atomic_store_int(&node->tn_mode, node->tn_mode & ~(S_ISUID | S_ISGID));
 	}
 
 	ASSERT_VOP_ELOCKED(vp, "chown2");
Index: sys/fs/tmpfs/tmpfs_vfsops.c
===================================================================
--- sys/fs/tmpfs/tmpfs_vfsops.c
+++ sys/fs/tmpfs/tmpfs_vfsops.c
@@ -462,6 +462,8 @@
 	mp->mnt_flag |= MNT_LOCAL;
 	mp->mnt_kern_flag |= MNTK_LOOKUP_SHARED | MNTK_EXTENDED_SHARED |
 	    MNTK_TEXT_REFS | MNTK_NOMSYNC;
+	if (!nonc)
+		mp->mnt_kern_flag |= MNTK_FPLOOKUP;
 	MNT_IUNLOCK(mp);
 
 	mp->mnt_data = tmp;
Index: sys/fs/tmpfs/tmpfs_vnops.h
===================================================================
--- sys/fs/tmpfs/tmpfs_vnops.h
+++ sys/fs/tmpfs/tmpfs_vnops.h
@@ -49,6 +49,7 @@
 extern struct vop_vector tmpfs_vnodeop_nonc_entries;
 
 vop_access_t	tmpfs_access;
+vop_fplookup_vexec_t tmpfs_fplookup_vexec;
 vop_getattr_t	tmpfs_getattr;
 vop_setattr_t	tmpfs_setattr;
 vop_pathconf_t	tmpfs_pathconf;
Index: sys/fs/tmpfs/tmpfs_vnops.c
===================================================================
--- sys/fs/tmpfs/tmpfs_vnops.c
+++ sys/fs/tmpfs/tmpfs_vnops.c
@@ -317,6 +317,32 @@
 	return (0);
 }
 
+/*
+ * VOP_FPLOOKUP_VEXEC routines are subject to special circumstances, see
+ * the comment above cache_fplookup for details.
+ */
+int
+tmpfs_fplookup_vexec(struct vop_fplookup_vexec_args *v)
+{
+	struct vnode *vp;
+	struct tmpfs_node *node;
+	struct ucred *cred;
+	mode_t all_x, mode;
+
+	vp = v->a_vp;
+	node = VP_TO_TMPFS_NODE_SMR(vp);
+	if (__predict_false(node == NULL))
+		return (EAGAIN);
+
+	all_x = S_IXUSR | S_IXGRP | S_IXOTH;
+	mode = atomic_load_int(&node->tn_mode);
+	if (__predict_true((mode & all_x) == all_x))
+		return (0);
+
+	cred = v->a_cred;
+	return (vaccess_vexec_smr(mode, node->tn_uid, node->tn_gid, cred));
+}
+
 int
 tmpfs_access(struct vop_access_args *v)
 {
@@ -428,6 +454,8 @@
 
 	MPASS(VOP_ISLOCKED(vp));
 
+	vn_seqc_write_begin(vp);
+
 	error = 0;
 
 	/* Abort if any unsettable attribute is given. */
@@ -466,6 +494,8 @@
 	 * from tmpfs_update. */
 	tmpfs_update(vp);
 
+	vn_seqc_write_end(vp);
+
 	MPASS(VOP_ISLOCKED(vp));
 
 	return error;
@@ -806,12 +836,15 @@
 	struct tmpfs_node *tnode;
 	struct tmpfs_node *tdnode;
 	int error;
+	bool want_seqc_end;
 
 	MPASS(VOP_ISLOCKED(tdvp));
 	MPASS(IMPLIES(tvp != NULL, VOP_ISLOCKED(tvp)));
 	MPASS(fcnp->cn_flags & HASBUF);
 	MPASS(tcnp->cn_flags & HASBUF);
 
+	want_seqc_end = false;
+
 	/*
 	 * Disallow cross-device renames.
 	 * XXX Why isn't this done by the caller?
@@ -852,6 +885,13 @@
 		}
 	}
 
+	if (tvp != NULL)
+		vn_seqc_write_begin(tvp);
+	vn_seqc_write_begin(tdvp);
+	vn_seqc_write_begin(fvp);
+	vn_seqc_write_begin(fdvp);
+	want_seqc_end = true;
+
 	tmp = VFS_TO_TMPFS(tdvp->v_mount);
 	tdnode = VP_TO_TMPFS_DIR(tdvp);
 	tnode = (tvp == NULL) ? NULL : VP_TO_TMPFS_NODE(tvp);
@@ -1065,6 +1105,14 @@
 		VOP_UNLOCK(fdvp);
 
 out:
+	if (want_seqc_end) {
+		if (tvp != NULL)
+			vn_seqc_write_end(tvp);
+		vn_seqc_write_end(tdvp);
+		vn_seqc_write_end(fvp);
+		vn_seqc_write_end(fdvp);
+	}
+
 	/*
 	 * Release target nodes.
 	 * XXX: I don't understand when tdvp can be the same as tvp, but
@@ -1621,6 +1669,7 @@
 	.vop_mknod =			tmpfs_mknod,
 	.vop_open =			tmpfs_open,
 	.vop_close =			tmpfs_close,
+	.vop_fplookup_vexec =		tmpfs_fplookup_vexec,
 	.vop_access =			tmpfs_access,
 	.vop_getattr =			tmpfs_getattr,
 	.vop_setattr =			tmpfs_setattr,
Index: sys/kern/kern_descrip.c
===================================================================
--- sys/kern/kern_descrip.c
+++ sys/kern/kern_descrip.c
@@ -102,8 +102,8 @@
 
 static __read_mostly uma_zone_t file_zone;
 static __read_mostly uma_zone_t filedesc0_zone;
-static __read_mostly uma_zone_t pwd_zone;
-static __read_mostly smr_t pwd_smr;
+__read_mostly uma_zone_t pwd_zone;
+VFS_SMR_DECLARE;
 
 static int	closefp(struct filedesc *fdp, int fd, struct file *fp,
 		    struct thread *td, int holdleaders);
@@ -3346,14 +3346,24 @@
 
 	fdp = td->td_proc->p_fd;
 
-	smr_enter(pwd_smr);
+	vfs_smr_enter();
 	for (;;) {
-		pwd = smr_entered_load(&fdp->fd_pwd, pwd_smr);
+		pwd = smr_entered_load(&fdp->fd_pwd, VFS_SMR());
 		MPASS(pwd != NULL);
 		if (refcount_acquire_if_not_zero(&pwd->pwd_refcount))
 			break;
 	}
-	smr_exit(pwd_smr);
+	vfs_smr_exit();
+	return (pwd);
+}
+
+struct pwd *
+pwd_get_smr(void)
+{
+	struct pwd *pwd;
+
+	pwd = smr_entered_load(&curproc->p_fd->fd_pwd, VFS_SMR());
+	MPASS(pwd != NULL);
 	return (pwd);
 }
 
@@ -4363,7 +4373,11 @@
 	    NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, 0);
 	pwd_zone = uma_zcreate("PWD", sizeof(struct pwd), NULL, NULL,
 	    NULL, NULL, UMA_ALIGN_PTR, UMA_ZONE_SMR);
-	pwd_smr = uma_zone_get_smr(pwd_zone);
+	/*
+	 * XXXMJG this is a temporary hack due to boot ordering issues against
+	 * the vnode zone.
+	 */
+	vfs_smr = uma_zone_get_smr(pwd_zone);
 	mtx_init(&sigio_lock, "sigio lock", NULL, MTX_DEF);
 }
 SYSINIT(select, SI_SUB_LOCK, SI_ORDER_FIRST, filelistinit, NULL);
Index: sys/kern/vfs_cache.c
===================================================================
--- sys/kern/vfs_cache.c
+++ sys/kern/vfs_cache.c
@@ -55,6 +55,7 @@
 #include <sys/namei.h>
 #include <sys/proc.h>
 #include <sys/rwlock.h>
+#include <sys/seqc.h>
 #include <sys/sdt.h>
 #include <sys/smr.h>
 #include <sys/smp.h>
@@ -67,6 +68,11 @@
 #include <sys/ktrace.h>
 #endif
 
+#include <sys/capsicum.h>
+
+#include <security/audit/audit.h>
+#include <security/mac/mac_framework.h>
+
 #ifdef DDB
 #include <ddb/ddb.h>
 #endif
@@ -100,6 +106,8 @@
 SDT_PROBE_DEFINE2(vfs, namecache, shrink_negative, done, "struct vnode *",
     "char *");
 
+SDT_PROBE_DEFINE3(vfs, fplookup, lookup, done, "struct nameidata", "int", "bool");
+
 /*
  * This structure describes the elements in the cache of recent
  * names looked up by namei.
@@ -2810,3 +2818,841 @@
 }
 
 #endif
+
+extern uma_zone_t namei_zone;
+
+static bool __read_frequently cache_fast_lookup = true;
+SYSCTL_BOOL(_vfs, OID_AUTO, cache_fast_lookup, CTLFLAG_RW,
+    &cache_fast_lookup, 0, "");
+
+#define CACHE_FPL_FAILED	-2020
+
+static void
+cache_fpl_cleanup_cnp(struct componentname *cnp)
+{
+
+	uma_zfree(namei_zone, cnp->cn_pnbuf);
+#ifdef DIAGNOSTIC
+	cnp->cn_pnbuf = NULL;
+	cnp->cn_nameptr = NULL;
+#endif
+}
+
+static void
+cache_fpl_handle_root(struct nameidata *ndp, struct vnode **dpp)
+{
+	struct componentname *cnp;
+
+	cnp = &ndp->ni_cnd;
+	while (*(cnp->cn_nameptr) == '/') {
+		cnp->cn_nameptr++;
+		ndp->ni_pathlen--;
+	}
+}
+
+static void
+cache_fpl_handle_root_initial(struct nameidata *ndp, struct vnode **dpp)
+{
+
+	cache_fpl_handle_root(ndp, dpp);
+	*dpp = ndp->ni_rootdir;
+}
+
+/*
+ * Components of nameidata (or objects it can point to) which may
+ * need restoring in case fast path lookup fails.
+ */
+struct nameidata_saved {
+	int cn_flags;
+	long cn_namelen;
+	char *cn_nameptr;
+	size_t ni_pathlen;
+};
+
+struct cache_fpl {
+	int line;
+	enum cache_fpl_status status;
+	bool in_smr;
+	struct nameidata *ndp;
+	struct nameidata_saved snd;
+	struct componentname *cnp;
+	struct vnode *dvp;
+	seqc_t dvp_seqc;
+	struct vnode *tvp;
+	seqc_t tvp_seqc;
+	struct pwd *pwd;
+};
+
+static void
+cache_fpl_checkpoint(struct cache_fpl *fpl, struct nameidata_saved *snd)
+{
+
+	snd->cn_flags = fpl->ndp->ni_cnd.cn_flags;
+	snd->cn_namelen = fpl->ndp->ni_cnd.cn_namelen;
+	snd->cn_nameptr = fpl->ndp->ni_cnd.cn_nameptr;
+	snd->ni_pathlen = fpl->ndp->ni_pathlen;
+}
+
+static void
+cache_fpl_restore(struct cache_fpl *fpl, struct nameidata_saved *snd)
+{
+
+	fpl->ndp->ni_cnd.cn_flags = snd->cn_flags;
+	fpl->ndp->ni_cnd.cn_namelen = snd->cn_namelen;
+	fpl->ndp->ni_cnd.cn_nameptr = snd->cn_nameptr;
+	fpl->ndp->ni_pathlen = snd->ni_pathlen;
+}
+
+#ifdef INVARIANTS
+#define cache_fpl_smr_assert_entered(fpl) ({			\
+	struct cache_fpl *_fpl = (fpl);				\
+	MPASS(_fpl->in_smr == true);				\
+	VFS_SMR_ASSERT_ENTERED();				\
+})
+#define cache_fpl_smr_assert_not_entered(fpl) ({		\
+	struct cache_fpl *_fpl = (fpl);				\
+	MPASS(_fpl->in_smr == false);				\
+	VFS_SMR_ASSERT_NOT_ENTERED();				\
+})
+#else
+#define cache_fpl_smr_assert_entered(fpl) do { } while (0)
+#define cache_fpl_smr_assert_not_entered(fpl) do { } while (0)
+#endif
+
+#define cache_fpl_smr_enter(fpl) ({				\
+	struct cache_fpl *_fpl = (fpl);				\
+	MPASS(_fpl->in_smr == false);				\
+	vfs_smr_enter();					\
+	_fpl->in_smr = true;					\
+})
+
+#define cache_fpl_smr_exit(fpl) ({				\
+	struct cache_fpl *_fpl = (fpl);				\
+	MPASS(_fpl->in_smr == true);				\
+	vfs_smr_exit();						\
+	_fpl->in_smr = false;					\
+})
+
+static int
+cache_fpl_aborted_impl(struct cache_fpl *fpl, int line)
+{
+
+	KASSERT(fpl->status == CACHE_FPL_STATUS_UNSET,
+	    ("%s: lookup status already set at %d\n", __func__, fpl->line));
+	fpl->status = CACHE_FPL_STATUS_ABORTED;
+	fpl->line = line;
+	return (CACHE_FPL_FAILED);
+}
+
+#define cache_fpl_aborted(x)	cache_fpl_aborted_impl((x), __LINE__)
+
+static int
+cache_fpl_partial_impl(struct cache_fpl *fpl, int line)
+{
+
+	KASSERT(fpl->status == CACHE_FPL_STATUS_UNSET,
+	    ("%s: lookup status already set at %d\n", __func__, fpl->line));
+	cache_fpl_smr_assert_entered(fpl);
+	fpl->status = CACHE_FPL_STATUS_PARTIAL;
+	fpl->line = line;
+	return (CACHE_FPL_FAILED);
+}
+
+#define cache_fpl_partial(x)	cache_fpl_partial_impl((x), __LINE__)
+
+static int
+cache_fpl_handled_impl(struct cache_fpl *fpl, int error, int line)
+{
+
+	KASSERT(fpl->status == CACHE_FPL_STATUS_UNSET,
+	    ("%s: lookup status already set at %d\n", __func__, fpl->line));
+	cache_fpl_smr_assert_not_entered(fpl);
+	MPASS(error != CACHE_FPL_FAILED);
+	fpl->status = CACHE_FPL_STATUS_HANDLED;
+	fpl->line = line;
+	return (error);
+}
+
+#define cache_fpl_handled(x, e)	cache_fpl_handled_impl((x), (e), __LINE__)
+
+#define CACHE_FPL_SUPPORTED_CN_FLAGS \
+	(LOCKLEAF | FOLLOW | LOCKSHARED | SAVENAME | ISOPEN | AUDITVNODE1)
+
+static bool
+cache_can_fplookup(struct cache_fpl *fpl)
+{
+	struct nameidata *ndp;
+	struct componentname *cnp;
+	struct thread *td;
+
+	ndp = fpl->ndp;
+	cnp = fpl->cnp;
+	td = cnp->cn_thread;
+
+	if (!cache_fast_lookup) {
+		cache_fpl_aborted(fpl);
+		return (false);
+	}
+	if (mac_vnode_check_lookup_enabled()) {
+		cache_fpl_aborted(fpl);
+		return (false);
+	}
+	if (cnp->cn_flags & ~CACHE_FPL_SUPPORTED_CN_FLAGS) {
+		cache_fpl_aborted(fpl);
+		return (false);
+	}
+	if ((cnp->cn_flags & LOCKLEAF) == 0) {
+		cache_fpl_aborted(fpl);
+		return (false);
+	}
+	if (cnp->cn_nameiop != LOOKUP) {
+		cache_fpl_aborted(fpl);
+		return (false);
+	}
+	if (ndp->ni_dirfd != AT_FDCWD) {
+		cache_fpl_aborted(fpl);
+		return (false);
+	}
+	if (IN_CAPABILITY_MODE(td)) {
+		cache_fpl_aborted(fpl);
+		return (false);
+	}
+	if (AUDITING_TD(td)) {
+		cache_fpl_aborted(fpl);
+		return (false);
+	}
+	if (ndp->ni_startdir != NULL) {
+		cache_fpl_aborted(fpl);
+		return (false);
+	}
+	return (true);
+}
+
+static bool
+cache_fplookup_vnode_supported(struct vnode *vp)
+{
+
+	switch (vp->v_type) {
+	case VLNK:
+		return (false);
+	default:
+		break;
+	}
+	return (true);
+}
+
+/*
+ * The target vnode is not supported, prepare for the slow path to take over.
+ */
+static int
+cache_fplookup_partial_setup(struct cache_fpl *fpl)
+{
+	struct componentname *cnp;
+	struct vnode *dvp;
+	struct pwd *pwd;
+	seqc_t dvp_seqc;
+
+	cnp = fpl->cnp;
+	dvp = fpl->dvp;
+	dvp_seqc = fpl->dvp_seqc;
+
+	if (!vref_smr(dvp)) {
+		fpl->status = CACHE_FPL_STATUS_ABORTED;
+		cache_fpl_smr_exit(fpl);
+		return (CACHE_FPL_FAILED);
+	}
+
+	cache_fpl_smr_exit(fpl);
+	if (!seqc_consistent(&dvp->v_seqc, dvp_seqc)) {
+		fpl->status = CACHE_FPL_STATUS_ABORTED;
+		vrele(dvp);
+		return (CACHE_FPL_FAILED);
+	}
+
+	pwd = pwd_hold(curthread);
+	if (fpl->pwd != pwd) {
+		fpl->status = CACHE_FPL_STATUS_ABORTED;
+		vrele(dvp);
+		pwd_drop(pwd);
+		return (CACHE_FPL_FAILED);
+	}
+
+	fpl->ndp->ni_startdir = dvp;
+	return (0);
+}
+
+static int
+cache_fplookup_final(struct cache_fpl *fpl)
+{
+	struct componentname *cnp;
+	enum vgetstate tvs;
+	struct vnode *dvp, *tvp;
+	seqc_t dvp_seqc, tvp_seqc;
+	int error;
+
+	cnp = fpl->cnp;
+	dvp = fpl->dvp;
+	dvp_seqc = fpl->dvp_seqc;
+	tvp = fpl->tvp;
+	tvp_seqc = fpl->tvp_seqc;
+
+	VNPASS(cache_fplookup_vnode_supported(dvp), dvp);
+	MPASS((cnp->cn_flags & LOCKLEAF) != 0);
+
+	tvs = vget_prep_smr(tvp);
+	if (tvs == VGET_NONE) {
+		return (cache_fpl_partial(fpl));
+	}
+
+	if (!seqc_consistent(&dvp->v_seqc, dvp_seqc)) {
+		cache_fpl_smr_exit(fpl);
+		vget_abort(tvp, tvs);
+		return (cache_fpl_aborted(fpl));
+	}
+
+	cache_fpl_smr_exit(fpl);
+
+	error = vget_finish(tvp, cnp->cn_lkflags, tvs);
+	if (error != 0) {
+		return (cache_fpl_aborted(fpl));
+	}
+
+	if (!seqc_consistent(&tvp->v_seqc, tvp_seqc)) {
+		vput(tvp);
+		return (cache_fpl_aborted(fpl));
+	}
+
+	return (cache_fpl_handled(fpl, 0));
+}
+
+static int
+cache_fplookup_next(struct cache_fpl *fpl)
+{
+	struct componentname *cnp;
+	struct namecache *ncp;
+	struct vnode *dvp, *tvp;
+	u_char nc_flag;
+	uint32_t hash;
+
+	cnp = fpl->cnp;
+	dvp = fpl->dvp;
+
+	if (__predict_false(cnp->cn_namelen == 1 && cnp->cn_nameptr[0] == '.')) {
+		fpl->tvp = dvp;
+		fpl->tvp_seqc = seqc_read_any(&dvp->v_seqc);
+		if (seqc_in_modify(fpl->tvp_seqc)) {
+			return (cache_fpl_partial(fpl));
+		}
+		return (0);
+	}
+
+	hash = cache_get_hash(cnp->cn_nameptr, cnp->cn_namelen, dvp);
+
+	CK_LIST_FOREACH(ncp, (NCHHASH(hash)), nc_hash) {
+		counter_u64_add(numchecks, 1);
+		if (ncp->nc_dvp == dvp && ncp->nc_nlen == cnp->cn_namelen &&
+		    !bcmp(ncp->nc_name, cnp->cn_nameptr, ncp->nc_nlen))
+			break;
+	}
+
+	/*
+	 * If there is no entry we have to punt to the slow path to perform
+	 * actual lookup. Should there be nothing with this name a negative
+	 * entry will be created.
+	 */
+	if (__predict_false(ncp == NULL)) {
+		return (cache_fpl_partial(fpl));
+	}
+
+	tvp = atomic_load_ptr(&ncp->nc_vp);
+	nc_flag = atomic_load_char(&ncp->nc_flag);
+	if (__predict_false(cache_ncp_invalid(ncp))) {
+		return (cache_fpl_partial(fpl));
+	}
+	if (__predict_false(nc_flag & NCF_WHITE)) {
+		return (cache_fpl_partial(fpl));
+	}
+
+	fpl->tvp = tvp;
+	if (nc_flag & NCF_NEGATIVE) {
+		if ((nc_flag & NCF_HOTNEGATIVE) == 0) {
+			/*
+			 * TODO
+			 * Promoting to hot negative requires locks which are
+			 * not yet supported for simplicity.
+			 */
+			return (cache_fpl_partial(fpl));
+		}
+		SDT_PROBE2(vfs, namecache, lookup, hit__negative, dvp,
+		    ncp->nc_name);
+		counter_u64_add(numneghits, 1);
+		cache_fpl_smr_exit(fpl);
+		return (cache_fpl_handled(fpl, ENOENT));
+	}
+
+	fpl->tvp_seqc = seqc_read_any(&tvp->v_seqc);
+	if (seqc_in_modify(fpl->tvp_seqc)) {
+		return (cache_fpl_partial(fpl));
+	}
+
+	if (!cache_fplookup_vnode_supported(tvp)) {
+		return (cache_fpl_partial(fpl));
+	}
+
+	counter_u64_add(numposhits, 1);
+	SDT_PROBE3(vfs, namecache, lookup, hit, dvp, ncp->nc_name, tvp);
+	return (0);
+}
+
+static bool
+cache_fplookup_mp_supported(struct mount *mp)
+{
+
+	if (mp == NULL)
+		return (false);
+	if ((mp->mnt_kern_flag & MNTK_FPLOOKUP) == 0)
+		return (false);
+	if (mp->mnt_flag & MNT_UNION)
+		return (false);
+	return (true);
+}
+
+/*
+ * Walk up the mount stack (if any).
+ *
+ * Correctness is provided in the following ways:
+ * - all vnodes are protected from freeing with SMR
+ * - struct mount objects are type stable making them always safe to access
+ * - stability of the particular mount is provided by busying it
+ * - relationship between the vnode which is mounted on and the mount is
+ *   verified with the vnode sequence counter after busying
+ * - association between root vnode of the mount and the mount is protected
+ *   by busy
+ *
+ * From that point on we can read the sequence counter of the root vnode
+ * and get the next mount on the stack (if any) using the same protection.
+ *
+ * By the end of successful walk we are guaranteed the reached state was
+ * indeed present at least at some point which matches the regular lookup.
+ */
+static int
+cache_fplookup_climb_mount(struct cache_fpl *fpl)
+{
+	struct mount *mp, *prev_mp;
+	struct vnode *vp;
+	seqc_t vp_seqc;
+
+	vp = fpl->tvp;
+	vp_seqc = fpl->tvp_seqc;
+	if (vp->v_type != VDIR)
+		return (0);
+
+	mp = atomic_load_ptr(&vp->v_mountedhere);
+	if (mp == NULL)
+		return (0);
+
+	prev_mp = NULL;
+	for (;;) {
+		if (!vfs_op_thread_enter(mp)) {
+			if (prev_mp != NULL)
+				vfs_op_thread_exit(prev_mp);
+			return (cache_fpl_partial(fpl));
+		}
+		if (prev_mp != NULL)
+			vfs_op_thread_exit(prev_mp);
+		if (!seqc_consistent(&vp->v_seqc, vp_seqc)) {
+			vfs_op_thread_exit(mp);
+			return (cache_fpl_partial(fpl));
+		}
+		if (!cache_fplookup_mp_supported(mp)) {
+			vfs_op_thread_exit(mp);
+			return (cache_fpl_partial(fpl));
+		}
+		vp = atomic_load_ptr(&mp->mnt_rootvnode);
+		if (vp == NULL || VN_IS_DOOMED(vp)) {
+			vfs_op_thread_exit(mp);
+			return (cache_fpl_partial(fpl));
+		}
+		vp_seqc = seqc_read_any(&vp->v_seqc);
+		if (seqc_in_modify(vp_seqc)) {
+			vfs_op_thread_exit(mp);
+			return (cache_fpl_partial(fpl));
+		}
+		prev_mp = mp;
+		mp = atomic_load_ptr(&vp->v_mountedhere);
+		if (mp == NULL)
+			break;
+	}
+
+	vfs_op_thread_exit(prev_mp);
+	fpl->tvp = vp;
+	fpl->tvp_seqc = vp_seqc;
+	return (0);
+}
+
+/*
+ * Parse the path.
+ *
+ * The code is mostly copy-pasted from regular lookup, see lookup().
+ * The structure is maintained along with comments for easier maintenance.
+ * Deduplicating the code will become feasible after fast path lookup
+ * becomes more feature-complete.
+ */
+static int
+cache_fplookup_parse(struct cache_fpl *fpl)
+{
+	struct nameidata *ndp;
+	struct componentname *cnp;
+	char *cp;
+	char *prev_ni_next;             /* saved ndp->ni_next */
+	size_t prev_ni_pathlen;         /* saved ndp->ni_pathlen */
+
+	ndp = fpl->ndp;
+	cnp = fpl->cnp;
+
+	/*
+	 * Search a new directory.
+	 *
+	 * The last component of the filename is left accessible via
+	 * cnp->cn_nameptr for callers that need the name. Callers needing
+	 * the name set the SAVENAME flag. When done, they assume
+	 * responsibility for freeing the pathname buffer.
+	 */
+	for (cp = cnp->cn_nameptr; *cp != 0 && *cp != '/'; cp++)
+		continue;
+	cnp->cn_namelen = cp - cnp->cn_nameptr;
+	if (cnp->cn_namelen > NAME_MAX) {
+		cache_fpl_smr_exit(fpl);
+		return (cache_fpl_handled(fpl, ENAMETOOLONG));
+	}
+	prev_ni_pathlen = ndp->ni_pathlen;
+	ndp->ni_pathlen -= cnp->cn_namelen;
+	KASSERT(ndp->ni_pathlen <= PATH_MAX,
+	    ("%s: ni_pathlen underflow to %zd\n", __func__, ndp->ni_pathlen));
+	prev_ni_next = ndp->ni_next;
+	ndp->ni_next = cp;
+
+	/*
+	 * Replace multiple slashes by a single slash and trailing slashes
+	 * by a null.  This must be done before VOP_LOOKUP() because some
+	 * fs's don't know about trailing slashes.  Remember if there were
+	 * trailing slashes to handle symlinks, existing non-directories
+	 * and non-existing files that won't be directories specially later.
+	 */
+	while (*cp == '/' && (cp[1] == '/' || cp[1] == '\0')) {
+		cp++;
+		ndp->ni_pathlen--;
+		if (*cp == '\0') {
+			/*
+			 * TODO
+			 * Regular lookup performs the following:
+			 * *ndp->ni_next = '\0';
+			 * cnp->cn_flags |= TRAILINGSLASH;
+			 *
+			 * Which is problematic since it modifies data read
+			 * from userspace. Then if fast path lookup was to
+			 * abort we would have to either restore it or convey
+			 * the flag. Since this is a corner case just ignore
+			 * it for simplicity.
+			 */
+			return (cache_fpl_partial(fpl));
+		}
+	}
+	ndp->ni_next = cp;
+
+	cnp->cn_flags |= MAKEENTRY;
+
+	if (cnp->cn_namelen == 2 &&
+	    cnp->cn_nameptr[1] == '.' && cnp->cn_nameptr[0] == '.')
+		cnp->cn_flags |= ISDOTDOT;
+	else
+		cnp->cn_flags &= ~ISDOTDOT;
+	if (*ndp->ni_next == 0)
+		cnp->cn_flags |= ISLASTCN;
+	else
+		cnp->cn_flags &= ~ISLASTCN;
+
+	/*
+	 * Check for degenerate name (e.g. / or "")
+	 * which is a way of talking about a directory,
+	 * e.g. like "/." or ".".
+	 *
+	 * TODO
+	 * Another corner case handled by the regular lookup
+	 */
+	if (__predict_false(cnp->cn_nameptr[0] == '\0')) {
+		return (cache_fpl_partial(fpl));
+	}
+	return (0);
+}
+
+static void
+cache_fplookup_parse_advance(struct cache_fpl *fpl)
+{
+	struct nameidata *ndp;
+	struct componentname *cnp;
+
+	ndp = fpl->ndp;
+	cnp = fpl->cnp;
+
+	cnp->cn_nameptr = ndp->ni_next;
+	while (*cnp->cn_nameptr == '/') {
+		cnp->cn_nameptr++;
+		ndp->ni_pathlen--;
+	}
+}
+
+static int
+cache_fplookup_impl(struct vnode *dvp, struct cache_fpl *fpl)
+{
+	struct nameidata *ndp;
+	struct componentname *cnp;
+	struct mount *mp;
+	int error;
+
+	error = CACHE_FPL_FAILED;
+	ndp = fpl->ndp;
+	ndp->ni_lcf = 0;
+	cnp = fpl->cnp;
+	cnp->cn_lkflags = LK_SHARED;
+	if ((cnp->cn_flags & LOCKSHARED) == 0)
+		cnp->cn_lkflags = LK_EXCLUSIVE;
+
+	cache_fpl_checkpoint(fpl, &fpl->snd);
+
+	fpl->dvp = dvp;
+	fpl->dvp_seqc = seqc_read_any(&fpl->dvp->v_seqc);
+	if (seqc_in_modify(fpl->dvp_seqc)) {
+		cache_fpl_aborted(fpl);
+		goto out;
+	}
+	mp = atomic_load_ptr(&fpl->dvp->v_mount);
+	if (!cache_fplookup_mp_supported(mp)) {
+		cache_fpl_aborted(fpl);
+		goto out;
+	}
+
+	VNPASS(cache_fplookup_vnode_supported(fpl->dvp), fpl->dvp);
+
+	for (;;) {
+		error = cache_fplookup_parse(fpl);
+		if (__predict_false(error != 0)) {
+			break;
+		}
+
+		if (cnp->cn_flags & ISDOTDOT) {
+			error = cache_fpl_partial(fpl);
+			break;
+		}
+
+		VNPASS(cache_fplookup_vnode_supported(fpl->dvp), fpl->dvp);
+
+		error = VOP_FPLOOKUP_VEXEC(fpl->dvp, cnp->cn_cred, cnp->cn_thread);
+		if (__predict_false(error != 0)) {
+			switch (error) {
+			case EAGAIN:
+			case EOPNOTSUPP: /* can happen when racing against vgone */
+				cache_fpl_partial(fpl);
+				break;
+			default:
+				/*
+				 * See the API contract for VOP_FPLOOKUP_VEXEC.
+				 */
+				if (!seqc_consistent(&fpl->dvp->v_seqc, fpl->dvp_seqc)) {
+					error = cache_fpl_aborted(fpl);
+				} else {
+					cache_fpl_smr_exit(fpl);
+					cache_fpl_handled(fpl, error);
+				}
+				break;
+			}
+			break;
+		}
+
+		error = cache_fplookup_next(fpl);
+		if (__predict_false(error != 0)) {
+			break;
+		}
+
+		VNPASS(!seqc_in_modify(fpl->tvp_seqc), fpl->tvp);
+
+		error = cache_fplookup_climb_mount(fpl);
+		if (__predict_false(error != 0)) {
+			break;
+		}
+
+		VNPASS(!seqc_in_modify(fpl->tvp_seqc), fpl->tvp);
+
+		if (cnp->cn_flags & ISLASTCN) {
+			error = cache_fplookup_final(fpl);
+			break;
+		}
+
+		if (!seqc_consistent(&fpl->dvp->v_seqc, fpl->dvp_seqc)) {
+			error = cache_fpl_aborted(fpl);
+			break;
+		}
+
+		fpl->dvp = fpl->tvp;
+		fpl->dvp_seqc = fpl->tvp_seqc;
+
+		cache_fplookup_parse_advance(fpl);
+		cache_fpl_checkpoint(fpl, &fpl->snd);
+	}
+out:
+	switch (fpl->status) {
+	case CACHE_FPL_STATUS_UNSET:
+		__assert_unreachable();
+		break;
+	case CACHE_FPL_STATUS_PARTIAL:
+		cache_fpl_smr_assert_entered(fpl);
+		return (cache_fplookup_partial_setup(fpl));
+	case CACHE_FPL_STATUS_ABORTED:
+		if (fpl->in_smr)
+			cache_fpl_smr_exit(fpl);
+		return (CACHE_FPL_FAILED);
+	case CACHE_FPL_STATUS_HANDLED:
+		cache_fpl_smr_assert_not_entered(fpl);
+		if (__predict_false(error != 0)) {
+			ndp->ni_dvp = NULL;
+			ndp->ni_vp = NULL;
+			cache_fpl_cleanup_cnp(cnp);
+			return (error);
+		}
+		ndp->ni_dvp = fpl->dvp;
+		ndp->ni_vp = fpl->tvp;
+		if (cnp->cn_flags & SAVENAME)
+			cnp->cn_flags |= HASBUF;
+		else
+			cache_fpl_cleanup_cnp(cnp);
+		return (error);
+	}
+}
+
+/*
+ * Fast path lookup protected with SMR and sequence counters.
+ *
+ * Note: all VOP_FPLOOKUP_VEXEC routines have a comment referencing this one.
+ *
+ * Filesystems can opt in by setting the MNTK_FPLOOKUP flag and meeting criteria
+ * outlined below.
+ *
+ * Traditional vnode lookup conceptually looks like this:
+ *
+ * vn_lock(current);
+ * for (;;) {
+ *	next = find();
+ *	vn_lock(next);
+ *	vn_unlock(current);
+ *	current = next;
+ *	if (last)
+ *	    break;
+ * }
+ *
+ * Each jump to the next vnode is safe memory-wise and atomic with respect to
+ * any modifications thanks to holding respective locks.
+ *
+ * The same guarantee can be provided with a combination of safe memory
+ * reclamation and sequence counters instead. If all operations which affect
+ * the relationship between the current vnode and the one we are looking for
+ * also modify the counter, we can verify whether all the conditions held as
+ * we made the jump. This includes things like permissions, mount point etc.
+ * You can grep for vn_seqc_write_begin to check all the places.
+ *
+ * Thus this translates to:
+ *
+ * vfs_smr_enter();
+ * current_seqc = seqc_read_any(current);
+ * if (seqc_in_modify(current_seqc)) // someone is altering the vnode
+ *     abort();
+ * for (;;) {
+ * 	next = find();
+ * 	next_seqc = seqc_read_any(next);
+ * 	if (!seqc_consistent(current, current_seqc) // someone is altering the vnode
+ * 	    abort();
+ * 	current = next; // we know nothing of importance has changed
+ * 	current_seqc = next_seqc; // store the counter for the next iteration
+ * 	if (last)
+ * 	    break;
+ * }
+ *
+ * API contract for VOP_FPLOOKUP_VEXEC routines is as follows:
+ * - they are called while within vfs_smr protection which they must never exit
+ * - EAGAIN can be returned to denote checking could not be performed, it is
+ *   always valid to return it
+ * - if the sequence counter has not changed the result must be valid
+ * - if the sequence counter has changed both false positives and false negatives
+ *   are permitted (since the result will be rejected later)
+ * - for simple cases of unix permission checks vaccess_vexec_smr can be used
+ *
+ * Caveats to watch out for:
+ * - vnodes are passed unlocked and unreferenced with nothing stopping
+ *   VOP_RECLAIM, in turn meaning that ->v_data can become NULL. It is advised
+ *   to use atomic_load_ptr to fetch it.
+ * - aforementioned object can also get freed, meaning absent other means it
+ *   should be protected with vfs_smr
+ * - either safely checking permissions as they are modified or guaranteeing
+ *   their stability is left to the routine
+ */
+int
+cache_fplookup(struct nameidata *ndp, enum cache_fpl_status *status,
+    struct pwd **pwdp)
+{
+	struct cache_fpl fpl;
+	struct pwd *pwd;
+	struct vnode *dvp;
+	struct componentname *cnp;
+	struct nameidata_saved orig;
+	int error;
+
+	*status = CACHE_FPL_STATUS_UNSET;
+	bzero(&fpl, sizeof(fpl));
+	fpl.status = CACHE_FPL_STATUS_UNSET;
+	fpl.ndp = ndp;
+	fpl.cnp = &ndp->ni_cnd;
+	MPASS(curthread == fpl.cnp->cn_thread);
+
+	if (!cache_can_fplookup(&fpl)) {
+		SDT_PROBE3(vfs, fplookup, lookup, done, ndp, fpl.line, fpl.status);
+		*status = fpl.status;
+		return (EOPNOTSUPP);
+	}
+
+	cache_fpl_checkpoint(&fpl, &orig);
+
+	cache_fpl_smr_enter(&fpl);
+	pwd = pwd_get_smr();
+	fpl.pwd = pwd;
+	ndp->ni_rootdir = pwd->pwd_rdir;
+	ndp->ni_topdir = pwd->pwd_jdir;
+
+	cnp = fpl.cnp;
+	cnp->cn_nameptr = cnp->cn_pnbuf;
+	if (cnp->cn_pnbuf[0] == '/') {
+		cache_fpl_handle_root_initial(ndp, &dvp);
+	} else {
+		MPASS(ndp->ni_dirfd == AT_FDCWD);
+		dvp = pwd->pwd_cdir;
+	}
+
+	error = cache_fplookup_impl(dvp, &fpl);
+	cache_fpl_smr_assert_not_entered(&fpl);
+	SDT_PROBE3(vfs, fplookup, lookup, done, ndp, fpl.line, fpl.status);
+
+	*status = fpl.status;
+	switch (fpl.status) {
+	case CACHE_FPL_STATUS_UNSET:
+		__assert_unreachable();
+		break;
+	case CACHE_FPL_STATUS_HANDLED:
+		break;
+	case CACHE_FPL_STATUS_PARTIAL:
+		*pwdp = fpl.pwd;
+		cache_fpl_restore(&fpl, &fpl.snd);
+		break;
+	case CACHE_FPL_STATUS_ABORTED:
+		cache_fpl_restore(&fpl, &orig);
+		break;
+	}
+	return (error);
+}
Index: sys/kern/vfs_lookup.c
===================================================================
--- sys/kern/vfs_lookup.c
+++ sys/kern/vfs_lookup.c
@@ -280,77 +280,21 @@
 	return (0);
 }
 
-/*
- * Convert a pathname into a pointer to a locked vnode.
- *
- * The FOLLOW flag is set when symbolic links are to be followed
- * when they occur at the end of the name translation process.
- * Symbolic links are always followed for all other pathname
- * components other than the last.
- *
- * The segflg defines whether the name is to be copied from user
- * space or kernel space.
- *
- * Overall outline of namei:
- *
- *	copy in name
- *	get starting directory
- *	while (!done && !error) {
- *		call lookup to search path.
- *		if symbolic link, massage name in buffer and continue
- *	}
- */
-int
-namei(struct nameidata *ndp)
+static int
+namei_setup(struct nameidata *ndp, struct vnode **dpp, struct pwd **pwdp)
 {
-	char *cp;		/* pointer into pathname argument */
-	struct vnode *dp;	/* the directory we are searching */
-	struct iovec aiov;		/* uio for reading symbolic links */
 	struct componentname *cnp;
 	struct file *dfp;
 	struct thread *td;
-	struct proc *p;
 	struct pwd *pwd;
 	cap_rights_t rights;
 	struct filecaps dirfd_caps;
-	struct uio auio;
-	int error, linklen, startdir_used;
+	int error, startdir_used;
 
 	cnp = &ndp->ni_cnd;
 	td = cnp->cn_thread;
-	p = td->td_proc;
-	ndp->ni_cnd.cn_cred = ndp->ni_cnd.cn_thread->td_ucred;
-	KASSERT(cnp->cn_cred && p, ("namei: bad cred/proc"));
-	KASSERT((cnp->cn_nameiop & (~OPMASK)) == 0,
-	    ("namei: nameiop contaminated with flags"));
-	KASSERT((cnp->cn_flags & OPMASK) == 0,
-	    ("namei: flags contaminated with nameiops"));
-	MPASS(ndp->ni_startdir == NULL || ndp->ni_startdir->v_type == VDIR ||
-	    ndp->ni_startdir->v_type == VBAD);
-	TAILQ_INIT(&ndp->ni_cap_tracker);
-	ndp->ni_lcf = 0;
-
-	/* We will set this ourselves if we need it. */
-	cnp->cn_flags &= ~TRAILINGSLASH;
 
-	/*
-	 * Get a buffer for the name to be translated, and copy the
-	 * name into the buffer.
-	 */
-	if ((cnp->cn_flags & HASBUF) == 0)
-		cnp->cn_pnbuf = uma_zalloc(namei_zone, M_WAITOK);
-	if (ndp->ni_segflg == UIO_SYSSPACE)
-		error = copystr(ndp->ni_dirp, cnp->cn_pnbuf, MAXPATHLEN,
-		    &ndp->ni_pathlen);
-	else
-		error = copyinstr(ndp->ni_dirp, cnp->cn_pnbuf, MAXPATHLEN,
-		    &ndp->ni_pathlen);
-
-	/*
-	 * Don't allow empty pathnames.
-	 */
-	if (error == 0 && *cnp->cn_pnbuf == '\0')
-		error = ENOENT;
+	*pwdp = NULL;
 
 #ifdef CAPABILITY_MODE
 	/*
@@ -366,24 +310,17 @@
 	 *   previously walked by us, which prevents an escape from
 	 *   the relative root.
 	 */
-	if (error == 0 && IN_CAPABILITY_MODE(td) &&
-	    (cnp->cn_flags & NOCAPCHECK) == 0) {
+	if (IN_CAPABILITY_MODE(td) && (cnp->cn_flags & NOCAPCHECK) == 0) {
 		ndp->ni_lcf |= NI_LCF_STRICTRELATIVE;
 		if (ndp->ni_dirfd == AT_FDCWD) {
 #ifdef KTRACE
 			if (KTRPOINT(td, KTR_CAPFAIL))
 				ktrcapfail(CAPFAIL_LOOKUP, NULL, NULL);
 #endif
-			error = ECAPMODE;
+			return (ECAPMODE);
 		}
 	}
 #endif
-	if (error != 0) {
-		namei_cleanup_cnp(cnp);
-		ndp->ni_vp = NULL;
-		return (error);
-	}
-	ndp->ni_loopcnt = 0;
 #ifdef KTRACE
 	if (KTRPOINT(td, KTR_NAMEI)) {
 		KASSERT(cnp->cn_thread == curthread,
@@ -391,6 +328,8 @@
 		ktrnamei(cnp->cn_pnbuf);
 	}
 #endif
+	error = 0;
+
 	/*
 	 * Get starting point for the translation.
 	 */
@@ -402,19 +341,16 @@
 	ndp->ni_rootdir = pwd->pwd_rdir;
 	ndp->ni_topdir = pwd->pwd_jdir;
 
-	startdir_used = 0;
-	dp = NULL;
-	cnp->cn_nameptr = cnp->cn_pnbuf;
 	if (cnp->cn_pnbuf[0] == '/') {
 		ndp->ni_resflags |= NIRES_ABS;
-		error = namei_handle_root(ndp, &dp);
+		error = namei_handle_root(ndp, dpp);
 	} else {
 		if (ndp->ni_startdir != NULL) {
-			dp = ndp->ni_startdir;
+			*dpp = ndp->ni_startdir;
 			startdir_used = 1;
 		} else if (ndp->ni_dirfd == AT_FDCWD) {
-			dp = pwd->pwd_cdir;
-			vrefact(dp);
+			*dpp = pwd->pwd_cdir;
+			vrefact(*dpp);
 		} else {
 			rights = ndp->ni_rightsneeded;
 			cap_rights_set_one(&rights, CAP_LOOKUP);
@@ -441,8 +377,8 @@
 				} else if (dfp->f_vnode == NULL) {
 					error = ENOTDIR;
 				} else {
-					dp = dfp->f_vnode;
-					vrefact(dp);
+					*dpp = dfp->f_vnode;
+					vrefact(*dpp);
 
 					if ((dfp->f_flag & FSEARCH) != 0)
 						cnp->cn_flags |= NOEXECCHECK;
@@ -464,7 +400,7 @@
 			}
 #endif
 		}
-		if (error == 0 && dp->v_type != VDIR)
+		if (error == 0 && (*dpp)->v_type != VDIR)
 			error = ENOTDIR;
 	}
 	if (error == 0 && (cnp->cn_flags & BENEATH) != 0) {
@@ -476,7 +412,7 @@
 			cap_rights_set_one(&rights, CAP_LOOKUP);
 			error = fgetvp_rights(td, ndp->ni_dirfd, &rights,
 			    &dirfd_caps, &ndp->ni_beneath_latch);
-			if (error == 0 && dp->v_type != VDIR) {
+			if (error == 0 && (*dpp)->v_type != VDIR) {
 				vrele(ndp->ni_beneath_latch);
 				error = ENOTDIR;
 			}
@@ -488,15 +424,15 @@
 	 * If we are auditing the kernel pathname, save the user pathname.
 	 */
 	if (cnp->cn_flags & AUDITVNODE1)
-		AUDIT_ARG_UPATH1_VP(td, ndp->ni_rootdir, dp, cnp->cn_pnbuf);
+		AUDIT_ARG_UPATH1_VP(td, ndp->ni_rootdir, *dpp, cnp->cn_pnbuf);
 	if (cnp->cn_flags & AUDITVNODE2)
-		AUDIT_ARG_UPATH2_VP(td, ndp->ni_rootdir, dp, cnp->cn_pnbuf);
+		AUDIT_ARG_UPATH2_VP(td, ndp->ni_rootdir, *dpp, cnp->cn_pnbuf);
 	if (ndp->ni_startdir != NULL && !startdir_used)
 		vrele(ndp->ni_startdir);
 	if (error != 0) {
-		if (dp != NULL)
-			vrele(dp);
-		goto out;
+		if (*dpp != NULL)
+			vrele(*dpp);
+		return (error);
 	}
 	MPASS((ndp->ni_lcf & (NI_LCF_BENEATH_ABS | NI_LCF_LATCH)) !=
 	    NI_LCF_BENEATH_ABS);
@@ -505,8 +441,124 @@
 	    ((ndp->ni_lcf & NI_LCF_STRICTRELATIVE) == 0 &&
 	    (cnp->cn_flags & BENEATH) != 0))
 		ndp->ni_lcf |= NI_LCF_CAP_DOTDOT;
-	SDT_PROBE3(vfs, namei, lookup, entry, dp, cnp->cn_pnbuf,
+	SDT_PROBE3(vfs, namei, lookup, entry, *dpp, cnp->cn_pnbuf,
 	    cnp->cn_flags);
+	*pwdp = pwd;
+	return (0);
+}
+
+/*
+ * Convert a pathname into a pointer to a locked vnode.
+ *
+ * The FOLLOW flag is set when symbolic links are to be followed
+ * when they occur at the end of the name translation process.
+ * Symbolic links are always followed for all other pathname
+ * components other than the last.
+ *
+ * The segflg defines whether the name is to be copied from user
+ * space or kernel space.
+ *
+ * Overall outline of namei:
+ *
+ *	copy in name
+ *	get starting directory
+ *	while (!done && !error) {
+ *		call lookup to search path.
+ *		if symbolic link, massage name in buffer and continue
+ *	}
+ */
+int
+namei(struct nameidata *ndp)
+{
+	char *cp;		/* pointer into pathname argument */
+	struct vnode *dp;	/* the directory we are searching */
+	struct iovec aiov;		/* uio for reading symbolic links */
+	struct componentname *cnp;
+	struct thread *td;
+	struct proc *p;
+	struct pwd *pwd;
+	struct uio auio;
+	int error, linklen;
+	enum cache_fpl_status status;
+
+	cnp = &ndp->ni_cnd;
+	td = cnp->cn_thread;
+	p = td->td_proc;
+	ndp->ni_cnd.cn_cred = ndp->ni_cnd.cn_thread->td_ucred;
+	KASSERT(cnp->cn_cred && p, ("namei: bad cred/proc"));
+	KASSERT((cnp->cn_nameiop & (~OPMASK)) == 0,
+	    ("namei: nameiop contaminated with flags"));
+	KASSERT((cnp->cn_flags & OPMASK) == 0,
+	    ("namei: flags contaminated with nameiops"));
+	MPASS(ndp->ni_startdir == NULL || ndp->ni_startdir->v_type == VDIR ||
+	    ndp->ni_startdir->v_type == VBAD);
+	TAILQ_INIT(&ndp->ni_cap_tracker);
+	ndp->ni_lcf = 0;
+	ndp->ni_loopcnt = 0;
+	dp = NULL;
+
+	/* We will set this ourselves if we need it. */
+	cnp->cn_flags &= ~TRAILINGSLASH;
+
+	ndp->ni_vp = NULL;
+
+	/*
+	 * Get a buffer for the name to be translated, and copy the
+	 * name into the buffer.
+	 */
+	if ((cnp->cn_flags & HASBUF) == 0)
+		cnp->cn_pnbuf = uma_zalloc(namei_zone, M_WAITOK);
+	if (ndp->ni_segflg == UIO_SYSSPACE)
+		error = copystr(ndp->ni_dirp, cnp->cn_pnbuf, MAXPATHLEN,
+		    &ndp->ni_pathlen);
+	else
+		error = copyinstr(ndp->ni_dirp, cnp->cn_pnbuf, MAXPATHLEN,
+		    &ndp->ni_pathlen);
+
+	if (error != 0) {
+		namei_cleanup_cnp(cnp);
+		return (error);
+	}
+
+	cnp->cn_nameptr = cnp->cn_pnbuf;
+
+	/*
+	 * Don't allow empty pathnames.
+	 */
+	if (*cnp->cn_pnbuf == '\0') {
+		namei_cleanup_cnp(cnp);
+		return (ENOENT);
+	}
+
+	/*
+	 * First try the fast path.
+	 *
+	 * If it fails to handle the lookup, we are going to do perform it below.
+	 * Note this means that we either start from scratch or continue where it
+	 * left off.
+	 */
+	error = cache_fplookup(ndp, &status, &pwd);
+	switch (status) {
+	case CACHE_FPL_STATUS_UNSET:
+		__assert_unreachable();
+		break;
+	case CACHE_FPL_STATUS_HANDLED:
+		return (error);
+	case CACHE_FPL_STATUS_PARTIAL:
+		dp = ndp->ni_startdir;
+		break;
+	case CACHE_FPL_STATUS_ABORTED:
+		error = namei_setup(ndp, &dp, &pwd);
+		if (error != 0) {
+			namei_cleanup_cnp(cnp);
+			return (error);
+		}
+		break;
+	}
+
+	/*
+	 * Perform the lookup.
+	 */
 	for (;;) {
 		ndp->ni_startdir = dp;
 		error = lookup(ndp);
Index: sys/kern/vfs_mount.c
===================================================================
--- sys/kern/vfs_mount.c
+++ sys/kern/vfs_mount.c
@@ -947,6 +947,7 @@
 		vput(vp);
 		return (error);
 	}
+	vn_seqc_write_begin(vp);
 	VOP_UNLOCK(vp);
 
 	/* Allocate and initialize the filesystem. */
@@ -979,9 +980,11 @@
 		VI_LOCK(vp);
 		vp->v_iflag &= ~VI_MOUNT;
 		VI_UNLOCK(vp);
+		vn_seqc_write_end(vp);
 		vrele(vp);
 		return (error);
 	}
+	vn_seqc_write_begin(newdp);
 	VOP_UNLOCK(newdp);
 
 	if (mp->mnt_opt != NULL)
@@ -1018,6 +1021,8 @@
 	EVENTHANDLER_DIRECT_INVOKE(vfs_mounted, mp, newdp, td);
 	VOP_UNLOCK(newdp);
 	mountcheckdirs(vp, newdp);
+	vn_seqc_write_end(vp);
+	vn_seqc_write_end(newdp);
 	vrele(newdp);
 	if ((mp->mnt_flag & MNT_RDONLY) == 0)
 		vfs_allocate_syncvnode(mp);
@@ -1094,7 +1099,9 @@
 	VOP_UNLOCK(vp);
 
 	vfs_op_enter(mp);
+	vn_seqc_write_begin(vp);
 
+	rootvp = NULL;
 	MNT_ILOCK(mp);
 	if ((mp->mnt_kern_flag & MNTK_UNMOUNT) != 0) {
 		MNT_IUNLOCK(mp);
@@ -1108,8 +1115,6 @@
 		mp->mnt_kern_flag &= ~MNTK_ASYNC;
 	rootvp = vfs_cache_root_clear(mp);
 	MNT_IUNLOCK(mp);
-	if (rootvp != NULL)
-		vrele(rootvp);
 	mp->mnt_optnew = *optlist;
 	vfs_mergeopts(mp->mnt_optnew, mp->mnt_opt);
 
@@ -1233,6 +1238,11 @@
 		vfs_deallocate_syncvnode(mp);
 end:
 	vfs_op_exit(mp);
+	if (rootvp != NULL) {
+		vn_seqc_write_end(rootvp);
+		vrele(rootvp);
+	}
+	vn_seqc_write_end(vp);
 	vfs_unbusy(mp);
 	VI_LOCK(vp);
 	vp->v_iflag &= ~VI_MOUNT;
@@ -1723,14 +1733,19 @@
 	}
 	mp->mnt_kern_flag |= MNTK_UNMOUNT;
 	rootvp = vfs_cache_root_clear(mp);
+	if (coveredvp != NULL)
+		vn_seqc_write_begin(coveredvp);
 	if (flags & MNT_NONBUSY) {
 		MNT_IUNLOCK(mp);
 		error = vfs_check_usecounts(mp);
 		MNT_ILOCK(mp);
 		if (error != 0) {
+			vn_seqc_write_end(coveredvp);
 			dounmount_cleanup(mp, coveredvp, MNTK_UNMOUNT);
-			if (rootvp != NULL)
+			if (rootvp != NULL) {
+				vn_seqc_write_end(rootvp);
 				vrele(rootvp);
+			}
 			return (error);
 		}
 	}
@@ -1759,22 +1774,19 @@
 	    ("%s: invalid return value for msleep in the drain path @ %s:%d",
 	    __func__, __FILE__, __LINE__));
 
-	if (rootvp != NULL)
+	/*
+	 * We want to keep the vnode around so that we can vn_seqc_write_end
+	 * after we are done with unmount. Downgrade our reference to a mere
+	 * hold count so that we don't interefere with anything.
+	 */
+	if (rootvp != NULL) {
+		vhold(rootvp);
 		vrele(rootvp);
+	}
 
 	if (mp->mnt_flag & MNT_EXPUBLIC)
 		vfs_setpublicfs(NULL, NULL, NULL);
 
-	/*
-	 * From now, we can claim that the use reference on the
-	 * coveredvp is ours, and the ref can be released only by
-	 * successfull unmount by us, or left for later unmount
-	 * attempt.  The previously acquired hold reference is no
-	 * longer needed to protect the vnode from reuse.
-	 */
-	if (coveredvp != NULL)
-		vdrop(coveredvp);
-
 	vfs_periodic(mp, MNT_WAIT);
 	MNT_ILOCK(mp);
 	async_flag = mp->mnt_flag & MNT_ASYNC;
@@ -1809,8 +1821,15 @@
 		}
 		vfs_op_exit_locked(mp);
 		MNT_IUNLOCK(mp);
-		if (coveredvp)
+		if (coveredvp) {
+			vn_seqc_write_end(coveredvp);
 			VOP_UNLOCK(coveredvp);
+			vdrop(coveredvp);
+		}
+		if (rootvp != NULL) {
+			vn_seqc_write_end(rootvp);
+			vdrop(rootvp);
+		}
 		return (error);
 	}
 	mtx_lock(&mountlist_mtx);
@@ -1819,7 +1838,13 @@
 	EVENTHANDLER_DIRECT_INVOKE(vfs_unmounted, mp, td);
 	if (coveredvp != NULL) {
 		coveredvp->v_mountedhere = NULL;
+		vn_seqc_write_end(coveredvp);
 		VOP_UNLOCK(coveredvp);
+		vdrop(coveredvp);
+	}
+	if (rootvp != NULL) {
+		vn_seqc_write_end(rootvp);
+		vdrop(rootvp);
 	}
 	vfs_event_signal(NULL, VQ_UNMOUNT, 0);
 	if (rootvnode != NULL && mp == rootvnode->v_mount) {
Index: sys/kern/vfs_subr.c
===================================================================
--- sys/kern/vfs_subr.c
+++ sys/kern/vfs_subr.c
@@ -664,8 +664,8 @@
 	vnode_list_reclaim_marker = vn_alloc_marker(NULL);
 	TAILQ_INSERT_HEAD(&vnode_list, vnode_list_reclaim_marker, v_vnodelist);
 	vnode_zone = uma_zcreate("VNODE", sizeof (struct vnode), NULL, NULL,
-	    vnode_init, vnode_fini, UMA_ALIGN_PTR, UMA_ZONE_SMR);
-	vfs_smr = uma_zone_get_smr(vnode_zone);
+	    vnode_init, vnode_fini, UMA_ALIGN_PTR, 0);
+	uma_zone_set_smr(vnode_zone, vfs_smr);
 	vnodepoll_zone = uma_zcreate("VNODEPOLL", sizeof (struct vpollinfo),
 	    NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, 0);
 	/*
@@ -1761,6 +1761,7 @@
 	 */
 	CTR2(KTR_VFS, "%s: destroying the vnode %p", __func__, vp);
 	bo = &vp->v_bufobj;
+	VNPASS(vp->v_seqc_users == 0, vp);
 	VNASSERT(vp->v_data == NULL, vp, ("cleaned vnode isn't"));
 	VNPASS(vp->v_holdcnt == VHOLD_NO_SMR, vp);
 	VNASSERT(vp->v_usecount == 0, vp, ("Non-zero use count"));
@@ -2889,6 +2890,17 @@
 	return (vs);
 }
 
+void
+vget_abort(struct vnode *vp, enum vgetstate vs)
+{
+
+	VNPASS(vs == VGET_HOLDCNT || vs == VGET_USECOUNT, vp);
+	if (vs == VGET_USECOUNT)
+		vrele(vp);
+	else
+		vdrop(vp);
+}
+
 int
 vget(struct vnode *vp, int flags, struct thread *td)
 {
@@ -2951,10 +2963,7 @@
 
 	error = vn_lock(vp, flags);
 	if (__predict_false(error != 0)) {
-		if (vs == VGET_USECOUNT)
-			vrele(vp);
-		else
-			vdrop(vp);
+		vget_abort(vp, vs);
 		CTR2(KTR_VFS, "%s: impossible to lock vnode %p", __func__,
 		    vp);
 		return (error);
@@ -3032,6 +3041,44 @@
 	return;
 }
 
+bool
+vref_smr(struct vnode *vp)
+{
+	int old;
+
+	CTR2(KTR_VFS, "%s: vp %p", __func__, vp);
+	VFS_SMR_ASSERT_ENTERED();
+
+	/*
+	 * Devices are not supported since they may require taking the interlock.
+	 */
+	VNPASS(vp->v_type != VCHR, vp);
+
+	if (refcount_acquire_if_not_zero(&vp->v_usecount)) {
+		VNODE_REFCOUNT_FENCE_ACQ();
+		VNPASS(vp->v_holdcnt > 0, vp);
+		return (true);
+	}
+
+	if (!vhold_smr(vp))
+		return (false);
+
+	/*
+	 * See the comment in vget_finish.
+	 */
+	old = atomic_fetchadd_int(&vp->v_usecount, 1);
+	VNASSERT(old >= 0, vp, ("%s: wrong use count %d", __func__, old));
+	if (old != 0) {
+#ifdef INVARIANTS
+		old = atomic_fetchadd_int(&vp->v_holdcnt, -1);
+		VNASSERT(old > 1, vp, ("%s: wrong hold count %d", __func__, old));
+#else
+		refcount_release(&vp->v_holdcnt);
+#endif
+	}
+	return (true);
+}
+
 void
 vref(struct vnode *vp)
 {
@@ -3986,6 +4033,7 @@
 	 */
 	if (vp->v_irflag & VIRF_DOOMED)
 		return;
+	vn_seqc_write_begin_locked(vp);
 	vunlazy_gone(vp);
 	vp->v_irflag |= VIRF_DOOMED;
 
@@ -4088,6 +4136,7 @@
 	vp->v_vnlock = &vp->v_lock;
 	vp->v_op = &dead_vnodeops;
 	vp->v_type = VBAD;
+	vn_seqc_write_end_locked(vp);
 }
 
 /*
@@ -4128,8 +4177,9 @@
 	printf("%p: ", (void *)vp);
 	printf("type %s\n", typename[vp->v_type]);
 	holdcnt = atomic_load_int(&vp->v_holdcnt);
-	printf("    usecount %d, writecount %d, refcount %d",
-	    vp->v_usecount, vp->v_writecount, holdcnt & ~VHOLD_ALL_FLAGS);
+	printf("    usecount %d, writecount %d, refcount %d seqc users %d",
+	    vp->v_usecount, vp->v_writecount, holdcnt & ~VHOLD_ALL_FLAGS,
+	    vp->v_seqc_users);
 	switch (vp->v_type) {
 	case VDIR:
 		printf(" mountedhere %p\n", vp->v_mountedhere);
@@ -4381,6 +4431,7 @@
 	MNT_KERN_FLAG(MNTK_LOOKUP_EXCL_DOTDOT);
 	MNT_KERN_FLAG(MNTK_MARKER);
 	MNT_KERN_FLAG(MNTK_USES_BCACHE);
+	MNT_KERN_FLAG(MNTK_FPLOOKUP);
 	MNT_KERN_FLAG(MNTK_NOASYNC);
 	MNT_KERN_FLAG(MNTK_UNMOUNT);
 	MNT_KERN_FLAG(MNTK_MWAIT);
@@ -5196,6 +5247,38 @@
 	return (error == 0);
 }
 
+/*
+ * VOP_FPLOOKUP_VEXEC routines are subject to special circumstances, see
+ * the comment above cache_fplookup for details.
+ *
+ * We never deny as priv_check_cred calls are not yet supported, see vaccess.
+ */
+int
+vaccess_vexec_smr(mode_t file_mode, uid_t file_uid, gid_t file_gid, struct ucred *cred)
+{
+
+	VFS_SMR_ASSERT_ENTERED();
+
+	/* Check the owner. */
+	if (cred->cr_uid == file_uid) {
+		if (file_mode & S_IXUSR)
+			return (0);
+		return (EAGAIN);
+	}
+
+	/* Otherwise, check the groups (first match) */
+	if (groupmember(file_gid, cred)) {
+		if (file_mode & S_IXGRP)
+			return (0);
+		return (EAGAIN);
+	}
+
+	/* Otherwise, check everyone else. */
+	if (file_mode & S_IXOTH)
+		return (0);
+	return (EAGAIN);
+}
+
 /*
  * Common filesystem object access control check routine.  Accepts a
  * vnode's type, "mode", uid and gid, requested access mode, credentials,
@@ -5476,6 +5559,14 @@
 		ASSERT_VOP_LOCKED(a->a_tvp, "vop_rename: tvp not locked");
 	ASSERT_VOP_LOCKED(a->a_tdvp, "vop_rename: tdvp not locked");
 #endif
+	/*
+	 * It may be tempting to add vn_seqc_write_begin/end calls here and
+	 * in vop_rename_post but that's not going to work out since some
+	 * filesystems relookup vnodes mid-rename. This is probably a bug.
+	 *
+	 * For now filesystems are expected to do the relevant calls after they
+	 * decide what vnodes to operate on.
+	 */
 	if (a->a_tdvp != a->a_fdvp)
 		vhold(a->a_fdvp);
 	if (a->a_tvp != a->a_fvp)
@@ -5486,6 +5577,20 @@
 }
 
 #ifdef DEBUG_VFS_LOCKS
+void
+vop_fplookup_vexec_pre(void *ap __unused)
+{
+
+	VFS_SMR_ASSERT_ENTERED();
+}
+
+void
+vop_fplookup_vexec_post(void *ap __unused, int rc __unused)
+{
+
+	VFS_SMR_ASSERT_ENTERED();
+}
+
 void
 vop_strategy_pre(void *ap)
 {
@@ -5565,11 +5670,26 @@
 		VFS_KNOTE_LOCKED(a->a_dvp, NOTE_WRITE);
 }
 
+void
+vop_deleteextattr_pre(void *ap)
+{
+	struct vop_deleteextattr_args *a;
+	struct vnode *vp;
+
+	a = ap;
+	vp = a->a_vp;
+	vn_seqc_write_begin(vp);
+}
+
 void
 vop_deleteextattr_post(void *ap, int rc)
 {
-	struct vop_deleteextattr_args *a = ap;
+	struct vop_deleteextattr_args *a;
+	struct vnode *vp;
 
+	a = ap;
+	vp = a->a_vp;
+	vn_seqc_write_end(vp);
 	if (!rc)
 		VFS_KNOTE_LOCKED(a->a_vp, NOTE_ATTRIB);
 }
@@ -5672,22 +5792,74 @@
 	}
 }
 
+void
+vop_setattr_pre(void *ap)
+{
+	struct vop_setattr_args *a;
+	struct vnode *vp;
+
+	a = ap;
+	vp = a->a_vp;
+	vn_seqc_write_begin(vp);
+}
+
 void
 vop_setattr_post(void *ap, int rc)
 {
-	struct vop_setattr_args *a = ap;
+	struct vop_setattr_args *a;
+	struct vnode *vp;
 
+	a = ap;
+	vp = a->a_vp;
+	vn_seqc_write_end(vp);
 	if (!rc)
-		VFS_KNOTE_LOCKED(a->a_vp, NOTE_ATTRIB);
+		VFS_KNOTE_LOCKED(vp, NOTE_ATTRIB);
+}
+
+void
+vop_setacl_pre(void *ap)
+{
+	struct vop_setacl_args *a;
+	struct vnode *vp;
+
+	a = ap;
+	vp = a->a_vp;
+	vn_seqc_write_begin(vp);
+}
+
+void
+vop_setacl_post(void *ap, int rc __unused)
+{
+	struct vop_setacl_args *a;
+	struct vnode *vp;
+
+	a = ap;
+	vp = a->a_vp;
+	vn_seqc_write_end(vp);
+}
+
+void
+vop_setextattr_pre(void *ap)
+{
+	struct vop_setextattr_args *a;
+	struct vnode *vp;
+
+	a = ap;
+	vp = a->a_vp;
+	vn_seqc_write_begin(vp);
 }
 
 void
 vop_setextattr_post(void *ap, int rc)
 {
-	struct vop_setextattr_args *a = ap;
+	struct vop_setextattr_args *a;
+	struct vnode *vp;
 
+	a = ap;
+	vp = a->a_vp;
+	vn_seqc_write_end(vp);
 	if (!rc)
-		VFS_KNOTE_LOCKED(a->a_vp, NOTE_ATTRIB);
+		VFS_KNOTE_LOCKED(vp, NOTE_ATTRIB);
 }
 
 void
@@ -6249,6 +6421,8 @@
 	 */
 	MPASS(mp->mnt_vfs_ops > 0);
 	vp = mp->mnt_rootvnode;
+	if (vp != NULL)
+		vn_seqc_write_begin(vp);
 	mp->mnt_rootvnode = NULL;
 	return (vp);
 }
@@ -6545,3 +6719,45 @@
 
 	return (VOP_ACCESS(vp, VEXEC, cnp->cn_cred, cnp->cn_thread));
 }
+
+void
+vn_seqc_write_begin_locked(struct vnode *vp)
+{
+
+	ASSERT_VI_LOCKED(vp, __func__);
+	VNPASS(vp->v_holdcnt > 0, vp);
+	VNPASS(vp->v_seqc_users >= 0, vp);
+	vp->v_seqc_users++;
+	if (vp->v_seqc_users == 1)
+		seqc_sleepable_write_begin(&vp->v_seqc);
+}
+
+void
+vn_seqc_write_begin(struct vnode *vp)
+{
+
+	VI_LOCK(vp);
+	vn_seqc_write_begin_locked(vp);
+	VI_UNLOCK(vp);
+}
+
+void
+vn_seqc_write_end_locked(struct vnode *vp)
+{
+
+	ASSERT_VI_LOCKED(vp, __func__);
+	VNPASS(vp->v_holdcnt > 0, vp);
+	VNPASS(vp->v_seqc_users > 0, vp);
+	vp->v_seqc_users--;
+	if (vp->v_seqc_users == 0)
+		seqc_sleepable_write_end(&vp->v_seqc);
+}
+
+void
+vn_seqc_write_end(struct vnode *vp)
+{
+
+	VI_LOCK(vp);
+	vn_seqc_write_end_locked(vp);
+	VI_UNLOCK(vp);
+}
Index: sys/kern/vnode_if.src
===================================================================
--- sys/kern/vnode_if.src
+++ sys/kern/vnode_if.src
@@ -142,6 +142,17 @@
 };
 
 
+%% fplookup_vexec	vp	- - -
+%! fplookup_vexec	pre	vop_fplookup_vexec_pre
+%! fplookup_vexec	post	vop_fplookup_vexec_post
+
+vop_fplookup_vexec {
+	IN struct vnode *vp;
+	IN struct ucred *cred;
+	IN struct thread *td;
+};
+
+
 %% access	vp	L L L
 
 vop_access {
@@ -172,6 +183,7 @@
 
 
 %% setattr	vp	E E E
+%! setattr	pre	vop_setattr_pre
 %! setattr	post	vop_setattr_post
 
 vop_setattr {
@@ -523,6 +535,8 @@
 
 
 %% setacl	vp	E E E
+%! setacl	pre	vop_setacl_pre
+%! setacl	post	vop_setacl_post
 
 vop_setacl {
 	IN struct vnode *vp;
@@ -589,6 +603,7 @@
 
 
 %% deleteextattr	vp	E E E
+%! deleteextattr	pre	vop_deleteextattr_pre
 %! deleteextattr	post	vop_deleteextattr_post
 
 vop_deleteextattr {
@@ -601,6 +616,7 @@
 
 
 %% setextattr	vp	E E E
+%! setextattr	pre	vop_setextattr_pre
 %! setextattr	post	vop_setextattr_post
 
 vop_setextattr {
Index: sys/security/mac/mac_framework.h
===================================================================
--- sys/security/mac/mac_framework.h
+++ sys/security/mac/mac_framework.h
@@ -422,13 +422,14 @@
 int	mac_vnode_check_lookup_impl(struct ucred *cred, struct vnode *dvp,
  	    struct componentname *cnp);
 extern bool mac_vnode_check_lookup_fp_flag;
+#define mac_vnode_check_lookup_enabled() __predict_false(mac_vnode_check_lookup_fp_flag)
 static inline int
 mac_vnode_check_lookup(struct ucred *cred, struct vnode *dvp,
     struct componentname *cnp)
 {
 
 	mac_vnode_assert_locked(dvp, "mac_vnode_check_lookup");
-	if (__predict_false(mac_vnode_check_lookup_fp_flag))
+	if (mac_vnode_check_lookup_enabled())
                 return (mac_vnode_check_lookup_impl(cred, dvp, cnp));
 	return (0);
 }
Index: sys/sys/_seqc.h
===================================================================
--- /dev/null
+++ sys/sys/_seqc.h
@@ -0,0 +1,6 @@
+#ifndef _SYS__SEQC_H_
+#define _SYS__SEQC_H_
+
+typedef uint32_t seqc_t;
+
+#endif /* _SYS__SEQC_H */
Index: sys/sys/filedesc.h
===================================================================
--- sys/sys/filedesc.h
+++ sys/sys/filedesc.h
@@ -310,6 +310,7 @@
 	smr_serialized_store(&fdp->fd_pwd, newpwd,
 	    (FILEDESC_XLOCK_ASSERT(fdp), true));
 }
+struct pwd *pwd_get_smr(void);
 
 #endif /* _KERNEL */
 
Index: sys/sys/mount.h
===================================================================
--- sys/sys/mount.h
+++ sys/sys/mount.h
@@ -420,6 +420,7 @@
 #define	MNTK_TEXT_REFS		0x00008000 /* Keep use ref for text */
 #define	MNTK_VMSETSIZE_BUG	0x00010000
 #define	MNTK_UNIONFS	0x00020000	/* A hack for F_ISUNIONSTACK */
+#define	MNTK_FPLOOKUP	0x00040000	/* fast path lookup is supported */
 #define MNTK_NOASYNC	0x00800000	/* disable async */
 #define MNTK_UNMOUNT	0x01000000	/* unmount in progress */
 #define	MNTK_MWAIT	0x02000000	/* waiting for unmount to finish */
Index: sys/sys/namei.h
===================================================================
--- sys/sys/namei.h
+++ sys/sys/namei.h
@@ -108,6 +108,12 @@
 };
 
 #ifdef _KERNEL
+
+enum cache_fpl_status { CACHE_FPL_STATUS_ABORTED, CACHE_FPL_STATUS_PARTIAL,
+    CACHE_FPL_STATUS_HANDLED, CACHE_FPL_STATUS_UNSET };
+int	cache_fplookup(struct nameidata *ndp, enum cache_fpl_status *status,
+    struct pwd **pwdp);
+
 /*
  * namei operations
  */
Index: sys/sys/seqc.h
===================================================================
--- sys/sys/seqc.h
+++ sys/sys/seqc.h
@@ -36,7 +36,7 @@
 /*
  * seqc_t may be included in structs visible to userspace
  */
-typedef uint32_t seqc_t;
+#include <sys/_seqc.h>
 
 #ifdef _KERNEL
 
@@ -111,5 +111,26 @@
 	return (seqc_consistent_nomb(seqcp, oldseqc));
 }
 
+/*
+ * Variant which does not critical enter/exit.
+ */
+static __inline void
+seqc_sleepable_write_begin(seqc_t *seqcp)
+{
+
+	MPASS(!seqc_in_modify(*seqcp));
+	*seqcp += 1;
+	atomic_thread_fence_rel();
+}
+
+static __inline void
+seqc_sleepable_write_end(seqc_t *seqcp)
+{
+
+	atomic_thread_fence_rel();
+	*seqcp += 1;
+	MPASS(!seqc_in_modify(*seqcp));
+}
+
 #endif	/* _KERNEL */
 #endif	/* _SYS_SEQC_H_ */
Index: sys/sys/vnode.h
===================================================================
--- sys/sys/vnode.h
+++ sys/sys/vnode.h
@@ -45,6 +45,7 @@
 #include <sys/uio.h>
 #include <sys/acl.h>
 #include <sys/ktr.h>
+#include <sys/_seqc.h>
 
 /*
  * The vnode is the focus of all file activity in UNIX.  There is a
@@ -105,6 +106,7 @@
 	 */
 	enum	vtype v_type:8;			/* u vnode type */
 	short	v_irflag;			/* i frequently read flags */
+	seqc_t	v_seqc;				/* i modification count */
 	struct	vop_vector *v_op;		/* u vnode operations vector */
 	void	*v_data;			/* u private data for fs */
 
@@ -175,6 +177,7 @@
 	short	v_dbatchcpu;			/* i LRU requeue deferral batch */
 	int	v_writecount;			/* I ref count of writers or
 						   (negative) text users */
+	int	v_seqc_users;			/* i modifications pending */
 	u_int	v_hash;
 };
 
@@ -539,6 +542,18 @@
 #define	ASSERT_VOP_LOCKED(vp, str)	assert_vop_locked((vp), (str))
 #define	ASSERT_VOP_UNLOCKED(vp, str)	assert_vop_unlocked((vp), (str))
 
+#define ASSERT_VOP_IN_SEQC(vp)	do {				\
+	struct vnode *_vp = (vp);				\
+								\
+	VNPASS(seqc_in_modify(_vp->v_seqc), _vp);		\
+} while (0)
+
+#define ASSERT_VOP_NOT_IN_SEQC(vp)	do {			\
+	struct vnode *_vp = (vp);				\
+								\
+	VNPASS(!seqc_in_modify(_vp->v_seqc), _vp);		\
+} while (0)
+
 #else /* !DEBUG_VFS_LOCKS */
 
 #define	ASSERT_VI_LOCKED(vp, str)	((void)0)
@@ -546,6 +561,10 @@
 #define	ASSERT_VOP_ELOCKED(vp, str)	((void)0)
 #define	ASSERT_VOP_LOCKED(vp, str)	((void)0)
 #define	ASSERT_VOP_UNLOCKED(vp, str)	((void)0)
+
+#define ASSERT_VOP_IN_SEQC(vp)		((void)0)
+#define ASSERT_VOP_NOT_IN_SEQC(vp)	((void)0)
+
 #endif /* DEBUG_VFS_LOCKS */
 
 
@@ -602,6 +621,7 @@
 struct vattr;
 struct vfsops;
 struct vnode;
+struct pwd;
 
 typedef int (*vn_get_ino_t)(struct mount *, void *, int, struct vnode **);
 
@@ -619,6 +639,10 @@
 void	cache_purge(struct vnode *vp);
 void	cache_purge_negative(struct vnode *vp);
 void	cache_purgevfs(struct mount *mp, bool force);
+void	vn_seqc_write_begin_locked(struct vnode *vp);
+void	vn_seqc_write_begin(struct vnode *vp);
+void	vn_seqc_write_end_locked(struct vnode *vp);
+void	vn_seqc_write_end(struct vnode *vp);
 int	change_dir(struct vnode *vp, struct thread *td);
 void	cvtstat(struct stat *st, struct ostat *ost);
 void	freebsd11_cvtnstat(struct stat *sb, struct nstat *nsb);
@@ -644,6 +668,8 @@
 int	vn_commname(struct vnode *vn, char *buf, u_int buflen);
 int	vn_path_to_global_path(struct thread *td, struct vnode *vp,
 	    char *path, u_int pathlen);
+int	vaccess_vexec_smr(mode_t file_mode, uid_t file_uid, gid_t file_gid,
+	    struct ucred *cred);
 int	vaccess(enum vtype type, mode_t file_mode, uid_t file_uid,
 	    gid_t file_gid, accmode_t accmode, struct ucred *cred,
 	    int *privused);
@@ -663,6 +689,7 @@
 enum vgetstate	vget_prep_smr(struct vnode *vp);
 enum vgetstate	vget_prep(struct vnode *vp);
 int	vget_finish(struct vnode *vp, int flags, enum vgetstate vs);
+void	vget_abort(struct vnode *vp, enum vgetstate vs);
 void	vgone(struct vnode *vp);
 void	vhold(struct vnode *);
 void	vholdl(struct vnode *);
@@ -805,6 +832,7 @@
 /* These are called from within the actual VOPS. */
 void	vop_close_post(void *a, int rc);
 void	vop_create_post(void *a, int rc);
+void	vop_deleteextattr_pre(void *a);
 void	vop_deleteextattr_post(void *a, int rc);
 void	vop_link_post(void *a, int rc);
 void	vop_lookup_post(void *a, int rc);
@@ -819,12 +847,18 @@
 void	vop_rename_post(void *a, int rc);
 void	vop_rename_pre(void *a);
 void	vop_rmdir_post(void *a, int rc);
+void	vop_setattr_pre(void *a);
 void	vop_setattr_post(void *a, int rc);
+void	vop_setacl_pre(void *a);
+void	vop_setacl_post(void *a, int rc);
+void	vop_setextattr_pre(void *a);
 void	vop_setextattr_post(void *a, int rc);
 void	vop_symlink_post(void *a, int rc);
 int	vop_sigdefer(struct vop_vector *vop, struct vop_generic_args *a);
 
 #ifdef DEBUG_VFS_LOCKS
+void	vop_fplookup_vexec_pre(void *a);
+void	vop_fplookup_vexec_post(void *a, int rc);
 void	vop_strategy_pre(void *a);
 void	vop_lock_pre(void *a);
 void	vop_lock_post(void *a, int rc);
@@ -832,6 +866,8 @@
 void	vop_need_inactive_pre(void *a);
 void	vop_need_inactive_post(void *a, int rc);
 #else
+#define	vop_fplookup_vexec_pre(x)	do { } while (0)
+#define	vop_fplookup_vexec_post(x, y)	do { } while (0)
 #define	vop_strategy_pre(x)	do { } while (0)
 #define	vop_lock_pre(x)		do { } while (0)
 #define	vop_lock_post(x, y)	do { } while (0)
@@ -901,6 +937,7 @@
 void	vput(struct vnode *vp);
 void	vrele(struct vnode *vp);
 void	vref(struct vnode *vp);
+bool	vref_smr(struct vnode *vp);
 void	vrefl(struct vnode *vp);
 void	vrefact(struct vnode *vp);
 void	vrefactn(struct vnode *vp, u_int n);