Index: sys/kern/kern_descrip.c =================================================================== --- sys/kern/kern_descrip.c +++ sys/kern/kern_descrip.c @@ -102,8 +102,8 @@ static __read_mostly uma_zone_t file_zone; static __read_mostly uma_zone_t filedesc0_zone; -static __read_mostly uma_zone_t pwd_zone; -static __read_mostly smr_t pwd_smr; +__read_mostly uma_zone_t pwd_zone; +VFS_SMR_DECLARE; static int closefp(struct filedesc *fdp, int fd, struct file *fp, struct thread *td, int holdleaders); @@ -3346,14 +3346,24 @@ fdp = td->td_proc->p_fd; - smr_enter(pwd_smr); + vfs_smr_enter(); for (;;) { - pwd = smr_entered_load(&fdp->fd_pwd, pwd_smr); + pwd = smr_entered_load(&fdp->fd_pwd, VFS_SMR()); MPASS(pwd != NULL); if (refcount_acquire_if_not_zero(&pwd->pwd_refcount)) break; } - smr_exit(pwd_smr); + vfs_smr_exit(); + return (pwd); +} + +struct pwd * +pwd_get_smr(void) +{ + struct pwd *pwd; + + pwd = smr_entered_load(&curproc->p_fd->fd_pwd, VFS_SMR()); + MPASS(pwd != NULL); return (pwd); } @@ -4363,7 +4373,11 @@ NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, 0); pwd_zone = uma_zcreate("PWD", sizeof(struct pwd), NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, UMA_ZONE_SMR); - pwd_smr = uma_zone_get_smr(pwd_zone); + /* + * XXXMJG this is a temporary hack due to boot ordering issues against + * the vnode zone. + */ + vfs_smr = uma_zone_get_smr(pwd_zone); mtx_init(&sigio_lock, "sigio lock", NULL, MTX_DEF); } SYSINIT(select, SI_SUB_LOCK, SI_ORDER_FIRST, filelistinit, NULL); Index: sys/kern/vfs_subr.c =================================================================== --- sys/kern/vfs_subr.c +++ sys/kern/vfs_subr.c @@ -664,8 +664,8 @@ vnode_list_reclaim_marker = vn_alloc_marker(NULL); TAILQ_INSERT_HEAD(&vnode_list, vnode_list_reclaim_marker, v_vnodelist); vnode_zone = uma_zcreate("VNODE", sizeof (struct vnode), NULL, NULL, - vnode_init, vnode_fini, UMA_ALIGN_PTR, UMA_ZONE_SMR); - vfs_smr = uma_zone_get_smr(vnode_zone); + vnode_init, vnode_fini, UMA_ALIGN_PTR, 0); + uma_zone_set_smr(vnode_zone, vfs_smr); vnodepoll_zone = uma_zcreate("VNODEPOLL", sizeof (struct vpollinfo), NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, 0); /* @@ -2890,6 +2890,21 @@ return (vs); } +void +vget_abort(struct vnode *vp, enum vgetstate vs) +{ + + switch (vs) { + case VGET_USECOUNT: + vrele(vp); + break; + case VGET_HOLDCNT: + vdrop(vp); + default: + __assert_unreachable(); + } +} + int vget(struct vnode *vp, int flags, struct thread *td) { @@ -2952,10 +2967,7 @@ error = vn_lock(vp, flags); if (__predict_false(error != 0)) { - if (vs == VGET_USECOUNT) - vrele(vp); - else - vdrop(vp); + vget_abort(vp, vs); CTR2(KTR_VFS, "%s: impossible to lock vnode %p", __func__, vp); return (error); @@ -3033,6 +3045,44 @@ return; } +bool +vref_smr(struct vnode *vp) +{ + int old; + + CTR2(KTR_VFS, "%s: vp %p", __func__, vp); + VFS_SMR_ASSERT_ENTERED(); + + /* + * Devices are not supported since they may require taking the interlock. + */ + VNPASS(vp->v_type != VCHR, vp); + + if (refcount_acquire_if_not_zero(&vp->v_usecount)) { + VNODE_REFCOUNT_FENCE_ACQ(); + VNPASS(vp->v_holdcnt > 0, vp); + return (true); + } + + if (!vhold_smr(vp)) + return (false); + + /* + * See the comment in vget_finish. + */ + old = atomic_fetchadd_int(&vp->v_usecount, 1); + VNASSERT(old >= 0, vp, ("%s: wrong use count %d", __func__, old)); + if (old != 0) { +#ifdef INVARIANTS + old = atomic_fetchadd_int(&vp->v_holdcnt, -1); + VNASSERT(old > 1, vp, ("%s: wrong hold count %d", __func__, old)); +#else + refcount_release(&vp->v_holdcnt); +#endif + } + return (true); +} + void vref(struct vnode *vp) { @@ -4398,6 +4448,7 @@ MNT_KERN_FLAG(MNTK_LOOKUP_EXCL_DOTDOT); MNT_KERN_FLAG(MNTK_MARKER); MNT_KERN_FLAG(MNTK_USES_BCACHE); + MNT_KERN_FLAG(MNTK_FPLOOKUP); MNT_KERN_FLAG(MNTK_NOASYNC); MNT_KERN_FLAG(MNTK_UNMOUNT); MNT_KERN_FLAG(MNTK_MWAIT); @@ -5213,6 +5264,38 @@ return (error == 0); } +/* + * VOP_FPLOOKUP_VEXEC routines are subject to special circumstances, see + * the comment above cache_fplookup for details. + * + * We never deny as priv_check_cred calls are not yet supported, see vaccess. + */ +int +vaccess_vexec_smr(mode_t file_mode, uid_t file_uid, gid_t file_gid, struct ucred *cred) +{ + + VFS_SMR_ASSERT_ENTERED(); + + /* Check the owner. */ + if (cred->cr_uid == file_uid) { + if (file_mode & S_IXUSR) + return (0); + return (EAGAIN); + } + + /* Otherwise, check the groups (first match) */ + if (groupmember(file_gid, cred)) { + if (file_mode & S_IXGRP) + return (0); + return (EAGAIN); + } + + /* Otherwise, check everyone else. */ + if (file_mode & S_IXOTH) + return (0); + return (EAGAIN); +} + /* * Common filesystem object access control check routine. Accepts a * vnode's type, "mode", uid and gid, requested access mode, credentials, @@ -5511,6 +5594,20 @@ } #ifdef DEBUG_VFS_LOCKS +void +vop_fplookup_vexec_pre(void *ap __unused) +{ + + VFS_SMR_ASSERT_ENTERED(); +} + +void +vop_fplookup_vexec_post(void *ap __unused, int rc __unused) +{ + + VFS_SMR_ASSERT_ENTERED(); +} + void vop_strategy_pre(void *ap) { Index: sys/kern/vnode_if.src =================================================================== --- sys/kern/vnode_if.src +++ sys/kern/vnode_if.src @@ -142,6 +142,17 @@ }; +%% fplookup_vexec vp - - - +%! fplookup_vexec pre vop_fplookup_vexec_pre +%! fplookup_vexec post vop_fplookup_vexec_post + +vop_fplookup_vexec { + IN struct vnode *vp; + IN struct ucred *cred; + IN struct thread *td; +}; + + %% access vp L L L vop_access { Index: sys/security/mac/mac_framework.h =================================================================== --- sys/security/mac/mac_framework.h +++ sys/security/mac/mac_framework.h @@ -422,13 +422,14 @@ int mac_vnode_check_lookup_impl(struct ucred *cred, struct vnode *dvp, struct componentname *cnp); extern bool mac_vnode_check_lookup_fp_flag; +#define mac_vnode_check_lookup_enabled() __predict_false(mac_vnode_check_lookup_fp_flag) static inline int mac_vnode_check_lookup(struct ucred *cred, struct vnode *dvp, struct componentname *cnp) { mac_vnode_assert_locked(dvp, "mac_vnode_check_lookup"); - if (__predict_false(mac_vnode_check_lookup_fp_flag)) + if (mac_vnode_check_lookup_enabled()) return (mac_vnode_check_lookup_impl(cred, dvp, cnp)); return (0); } Index: sys/sys/filedesc.h =================================================================== --- sys/sys/filedesc.h +++ sys/sys/filedesc.h @@ -310,6 +310,7 @@ smr_serialized_store(&fdp->fd_pwd, newpwd, (FILEDESC_XLOCK_ASSERT(fdp), true)); } +struct pwd *pwd_get_smr(void); #endif /* _KERNEL */ Index: sys/sys/mount.h =================================================================== --- sys/sys/mount.h +++ sys/sys/mount.h @@ -420,6 +420,7 @@ #define MNTK_TEXT_REFS 0x00008000 /* Keep use ref for text */ #define MNTK_VMSETSIZE_BUG 0x00010000 #define MNTK_UNIONFS 0x00020000 /* A hack for F_ISUNIONSTACK */ +#define MNTK_FPLOOKUP 0x00040000 /* fast path lookup is supported */ #define MNTK_NOASYNC 0x00800000 /* disable async */ #define MNTK_UNMOUNT 0x01000000 /* unmount in progress */ #define MNTK_MWAIT 0x02000000 /* waiting for unmount to finish */ Index: sys/sys/vnode.h =================================================================== --- sys/sys/vnode.h +++ sys/sys/vnode.h @@ -666,6 +666,8 @@ int vaccess(enum vtype type, mode_t file_mode, uid_t file_uid, gid_t file_gid, accmode_t accmode, struct ucred *cred, int *privused); +int vaccess_vexec_smr(mode_t file_mode, uid_t file_uid, gid_t file_gid, + struct ucred *cred); int vaccess_acl_nfs4(enum vtype type, uid_t file_uid, gid_t file_gid, struct acl *aclp, accmode_t accmode, struct ucred *cred, int *privused); @@ -682,6 +684,7 @@ enum vgetstate vget_prep_smr(struct vnode *vp); enum vgetstate vget_prep(struct vnode *vp); int vget_finish(struct vnode *vp, int flags, enum vgetstate vs); +void vget_abort(struct vnode *vp, enum vgetstate vs); void vgone(struct vnode *vp); void vhold(struct vnode *); void vholdl(struct vnode *); @@ -856,6 +859,8 @@ int vop_sigdefer(struct vop_vector *vop, struct vop_generic_args *a); #ifdef DEBUG_VFS_LOCKS +void vop_fplookup_vexec_pre(void *a); +void vop_fplookup_vexec_post(void *a, int rc); void vop_strategy_pre(void *a); void vop_lock_pre(void *a); void vop_lock_post(void *a, int rc); @@ -863,6 +868,8 @@ void vop_need_inactive_pre(void *a); void vop_need_inactive_post(void *a, int rc); #else +#define vop_fplookup_vexec_pre(x) do { } while (0) +#define vop_fplookup_vexec_post(x, y) do { } while (0) #define vop_strategy_pre(x) do { } while (0) #define vop_lock_pre(x) do { } while (0) #define vop_lock_post(x, y) do { } while (0) @@ -932,6 +939,7 @@ void vput(struct vnode *vp); void vrele(struct vnode *vp); void vref(struct vnode *vp); +bool vref_smr(struct vnode *vp); void vrefl(struct vnode *vp); void vrefact(struct vnode *vp); void vrefactn(struct vnode *vp, u_int n);