Index: sys/cddl/contrib/opensolaris/uts/common/fs/vnode.c =================================================================== --- sys/cddl/contrib/opensolaris/uts/common/fs/vnode.c +++ sys/cddl/contrib/opensolaris/uts/common/fs/vnode.c @@ -87,7 +87,6 @@ { VERIFY(vp->v_count > 0); if (refcount_release_if_not_last(&vp->v_usecount)) { - vdrop(vp); return; } VERIFY(taskq_dispatch((taskq_t *)taskq, Index: sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_ctldir.c =================================================================== --- sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_ctldir.c +++ sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_ctldir.c @@ -1196,7 +1196,7 @@ vnode_t *vp; sfs_node_t *node; size_t len; - int locked; + int locked, vget_flags; int error; vp = ap->a_vp; @@ -1224,7 +1224,7 @@ * before we can lock the vnode again. */ locked = VOP_ISLOCKED(vp); - vhold(vp); + vget_flags = vget_prep(vp); vput(vp); /* Look up .zfs/snapshot, our parent. */ @@ -1236,7 +1236,7 @@ bcopy(node->sn_name, ap->a_buf + *ap->a_buflen, len); } vfs_unbusy(mp); - vget(vp, locked | LK_VNHELD | LK_RETRY, curthread); + vget(vp, vget_flags | locked | LK_RETRY, curthread); return (error); } Index: sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_vnops.c =================================================================== --- sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_vnops.c +++ sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_vnops.c @@ -5916,7 +5916,7 @@ vnode_t *vp = ap->a_vp;; zfsvfs_t *zfsvfs = vp->v_vfsp->vfs_data; znode_t *zp = VTOZ(vp); - int ltype; + int ltype, vget_flags; int error; ZFS_ENTER(zfsvfs); @@ -5948,10 +5948,10 @@ ZFS_EXIT(zfsvfs); covered_vp = vp->v_mount->mnt_vnodecovered; - vhold(covered_vp); + vget_flags = vget_prep(covered_vp); ltype = VOP_ISLOCKED(vp); VOP_UNLOCK(vp, 0); - error = vget(covered_vp, LK_SHARED | LK_VNHELD, curthread); + error = vget_finish(covered_vp, vget_flags | LK_SHARED); if (error == 0) { error = VOP_VPTOCNP(covered_vp, ap->a_vpp, ap->a_cred, ap->a_buf, ap->a_buflen); Index: sys/kern/vfs_cache.c =================================================================== --- sys/kern/vfs_cache.c +++ sys/kern/vfs_cache.c @@ -1255,7 +1255,7 @@ struct rwlock *blp; struct mtx *dvlp; uint32_t hash; - int error, ltype; + int error, ltype, vget_flags; if (__predict_false(!doingcache)) { cnp->cn_flags &= ~MAKEENTRY; @@ -1369,9 +1369,9 @@ ltype = VOP_ISLOCKED(dvp); VOP_UNLOCK(dvp, 0); } - vhold(*vpp); + vget_flags = vget_prep(*vpp); cache_lookup_unlock(blp, dvlp); - error = vget(*vpp, cnp->cn_lkflags | LK_VNHELD, cnp->cn_thread); + error = vget_finish(*vpp, vget_flags | cnp->cn_lkflags); if (cnp->cn_flags & ISDOTDOT) { vn_lock(dvp, ltype | LK_RETRY); if (dvp->v_iflag & VI_DOOMED) { @@ -2430,6 +2430,7 @@ struct namecache *ncp; struct vnode *ddvp; struct mtx *vlp; + int vget_flags; ASSERT_VOP_LOCKED(vp, "vn_dir_dd_ino"); vlp = VP2VNODELOCK(vp); @@ -2438,9 +2439,9 @@ if ((ncp->nc_flag & NCF_ISDOTDOT) != 0) continue; ddvp = ncp->nc_dvp; - vhold(ddvp); + vget_flags = vget_prep(ddvp); mtx_unlock(vlp); - if (vget(ddvp, LK_SHARED | LK_NOWAIT | LK_VNHELD, curthread)) + if (vget_finish(ddvp, vget_flags | LK_SHARED | LK_NOWAIT)) return (NULL); return (ddvp); } Index: sys/kern/vfs_hash.c =================================================================== --- sys/kern/vfs_hash.c +++ sys/kern/vfs_hash.c @@ -76,7 +76,7 @@ struct vnode **vpp, vfs_hash_cmp_t *fn, void *arg) { struct vnode *vp; - int error; + int error, vget_flags; while (1) { rw_rlock(&vfs_hash_lock); @@ -87,9 +87,9 @@ continue; if (fn != NULL && fn(vp, arg)) continue; - vhold(vp); + vget_flags = vget_prep(vp); rw_runlock(&vfs_hash_lock); - error = vget(vp, flags | LK_VNHELD, td); + error = vget_finish(vp, vget_flags | flags); if (error == ENOENT && (flags & LK_NOWAIT) == 0) break; if (error) @@ -149,7 +149,7 @@ struct vnode **vpp, vfs_hash_cmp_t *fn, void *arg) { struct vnode *vp2; - int error; + int error, vget_flags; *vpp = NULL; while (1) { @@ -162,9 +162,9 @@ continue; if (fn != NULL && fn(vp2, arg)) continue; - vhold(vp2); + vget_flags = vget_prep(vp2); rw_wunlock(&vfs_hash_lock); - error = vget(vp2, flags | LK_VNHELD, td); + error = vget_finish(vp2, vget_flags | flags); if (error == ENOENT && (flags & LK_NOWAIT) == 0) break; rw_wlock(&vfs_hash_lock); Index: sys/kern/vfs_subr.c =================================================================== --- sys/kern/vfs_subr.c +++ sys/kern/vfs_subr.c @@ -107,8 +107,6 @@ static void syncer_shutdown(void *arg, int howto); static int vtryrecycle(struct vnode *vp); static void v_init_counters(struct vnode *); -static void v_incr_usecount(struct vnode *); -static void v_incr_usecount_locked(struct vnode *); static void v_incr_devcount(struct vnode *); static void v_decr_devcount(struct vnode *); static void vgonel(struct vnode *); @@ -2632,44 +2630,6 @@ refcount_init(&vp->v_usecount, 1); } -static void -v_incr_usecount_locked(struct vnode *vp) -{ - - ASSERT_VI_LOCKED(vp, __func__); - if ((vp->v_iflag & VI_OWEINACT) != 0) { - VNASSERT(vp->v_usecount == 0, vp, - ("vnode with usecount and VI_OWEINACT set")); - vp->v_iflag &= ~VI_OWEINACT; - VNODE_REFCOUNT_FENCE_REL(); - } - refcount_acquire(&vp->v_usecount); - v_incr_devcount(vp); -} - -/* - * Increment the use count on the vnode, taking care to reference - * the driver's usecount if this is a chardev. - */ -static void -v_incr_usecount(struct vnode *vp) -{ - - ASSERT_VI_UNLOCKED(vp, __func__); - CTR2(KTR_VFS, "%s: vp %p", __func__, vp); - - if (vp->v_type != VCHR && - refcount_acquire_if_not_zero(&vp->v_usecount)) { - VNODE_REFCOUNT_FENCE_ACQ(); - VNASSERT((vp->v_iflag & VI_OWEINACT) == 0, vp, - ("vnode with usecount and VI_OWEINACT set")); - } else { - VI_LOCK(vp); - v_incr_usecount_locked(vp); - VI_UNLOCK(vp); - } -} - /* * Increment si_usecount of the associated device, if any. */ @@ -2714,35 +2674,106 @@ * with atomic operations. Otherwise the interlock is taken covering * both the atomic and additional actions. */ +static int +_vget_prep(struct vnode *vp, bool interlock) +{ + int rv; + + if (__predict_true(vp->v_type != VCHR)) { + if (refcount_acquire_if_not_zero(&vp->v_usecount)) { + rv = LK_VNUSED; + } else { + _vhold(vp, interlock); + rv = LK_VNHELD; + } + } else { + if (!interlock) { + VI_LOCK(vp); + interlock = true; + } + if (vp->v_usecount == 0) { + vholdl(vp); + rv = LK_VNHELD; + } else { + v_incr_devcount(vp); + refcount_acquire(&vp->v_usecount); + rv = LK_VNUSED; + } + } + if (interlock) + VI_UNLOCK(vp); + return (rv); +} + +int +vget_prep(struct vnode *vp) +{ + + return (_vget_prep(vp, false)); +} + int vget(struct vnode *vp, int flags, struct thread *td) { - int error, oweinact; - VNASSERT((flags & LK_TYPE_MASK) != 0, vp, - ("vget: invalid lock operation")); + MPASS(td == curthread); - if ((flags & LK_INTERLOCK) != 0) - ASSERT_VI_LOCKED(vp, __func__); - else - ASSERT_VI_UNLOCKED(vp, __func__); - if ((flags & LK_VNHELD) != 0) - VNASSERT((vp->v_holdcnt > 0), vp, - ("vget: LK_VNHELD passed but vnode not held")); + flags |= _vget_prep(vp, (flags & LK_INTERLOCK) != 0); + return (vget_finish(vp, flags & ~LK_INTERLOCK)); +} - CTR3(KTR_VFS, "%s: vp %p with flags %d", __func__, vp, flags); +int +vget_finish(struct vnode *vp, int flags) +{ + int error, oweinact; - if ((flags & LK_VNHELD) == 0) - _vhold(vp, (flags & LK_INTERLOCK) != 0); + VNASSERT((flags & LK_TYPE_MASK) != 0, vp, + ("%s: invalid lock operation", __func__)); + KASSERT(((flags & LK_INTERLOCK) == 0), + ("%s: LK_INTERLOCK passed\n", __func__)); + ASSERT_VI_UNLOCKED(vp, __func__); + VNASSERT(vp->v_holdcnt > 0, vp, ("%s: vnode not held", __func__)); + if (flags & LK_VNUSED) { + VNASSERT(vp->v_usecount > 0, vp, + ("%s: vnode without usecount when LK_VNUSED was set", + __func__)); + } if ((error = vn_lock(vp, flags)) != 0) { - vdrop(vp); + if (flags & LK_VNUSED) + vrele(vp); + else + vdrop(vp); CTR2(KTR_VFS, "%s: impossible to lock vnode %p", __func__, vp); return (error); } - if (vp->v_iflag & VI_DOOMED && (flags & LK_RETRY) == 0) - panic("vget: vn_lock failed to return ENOENT\n"); + + if (flags & LK_VNUSED) { + VNASSERT((vp->v_iflag & VI_OWEINACT) == 0, vp, + ("%s: vnode with usecount and VI_OWEINACT set", __func__)); + return (0); + } + + /* + * We hold the vnode. If the usecount is 0 it will be utilized to keep + * the vnode around. Otherwise someone else lended their hold count and + * we have to drop ours. + */ + if (vp->v_type != VCHR && + refcount_acquire_if_not_zero(&vp->v_usecount)) { +#ifdef INVARIANTS + int old = atomic_fetchadd_int(&vp->v_holdcnt, -1) - 1; + VNASSERT(old > 0, vp, ("%s: wrong hold count", __func__)); +#else + refcount_release(&vp->v_holdcnt); +#endif + VNODE_REFCOUNT_FENCE_ACQ(); + VNASSERT((vp->v_iflag & VI_OWEINACT) == 0, vp, + ("%s: vnode with usecount and VI_OWEINACT set", __func__)); + return (0); + } + /* * We don't guarantee that any particular close will * trigger inactive processing so just make a best effort @@ -2751,23 +2782,22 @@ * * Upgrade our holdcnt to a usecount. */ - if (vp->v_type == VCHR || - !refcount_acquire_if_not_zero(&vp->v_usecount)) { - VI_LOCK(vp); - if ((vp->v_iflag & VI_OWEINACT) == 0) { - oweinact = 0; - } else { - oweinact = 1; - vp->v_iflag &= ~VI_OWEINACT; - VNODE_REFCOUNT_FENCE_REL(); - } - refcount_acquire(&vp->v_usecount); - v_incr_devcount(vp); - if (oweinact && VOP_ISLOCKED(vp) == LK_EXCLUSIVE && - (flags & LK_NOWAIT) == 0) - vinactive(vp, td); - VI_UNLOCK(vp); + VI_LOCK(vp); + if ((vp->v_iflag & VI_OWEINACT) == 0) { + oweinact = 0; + } else { + oweinact = 1; + vp->v_iflag &= ~VI_OWEINACT; + VNODE_REFCOUNT_FENCE_REL(); } + if (vp->v_usecount > 0) + refcount_release(&vp->v_holdcnt); + v_incr_devcount(vp); + refcount_acquire(&vp->v_usecount); + if (oweinact && VOP_ISLOCKED(vp) == LK_EXCLUSIVE && + (flags & LK_NOWAIT) == 0) + vinactive(vp, curthread); + VI_UNLOCK(vp); return (0); } @@ -2781,8 +2811,18 @@ ASSERT_VI_UNLOCKED(vp, __func__); CTR2(KTR_VFS, "%s: vp %p", __func__, vp); - _vhold(vp, false); - v_incr_usecount(vp); + if (vp->v_type != VCHR && + refcount_acquire_if_not_zero(&vp->v_usecount)) { + VNODE_REFCOUNT_FENCE_ACQ(); + VNASSERT(vp->v_holdcnt > 0, vp, + ("%s: active vnode not held", __func__)); + VNASSERT((vp->v_iflag & VI_OWEINACT) == 0, vp, + ("%s: vnode with usecount and VI_OWEINACT set", __func__)); + return; + } + VI_LOCK(vp); + vrefl(vp); + VI_UNLOCK(vp); } void @@ -2791,8 +2831,14 @@ ASSERT_VI_LOCKED(vp, __func__); CTR2(KTR_VFS, "%s: vp %p", __func__, vp); - _vhold(vp, true); - v_incr_usecount_locked(vp); + if (vp->v_usecount == 0) + vholdl(vp); + if ((vp->v_iflag & VI_OWEINACT) != 0) { + vp->v_iflag &= ~VI_OWEINACT; + VNODE_REFCOUNT_FENCE_REL(); + } + v_incr_devcount(vp); + refcount_acquire(&vp->v_usecount); } void @@ -2807,12 +2853,9 @@ return; } #ifdef INVARIANTS - int old = atomic_fetchadd_int(&vp->v_holdcnt, 1); - VNASSERT(old > 0, vp, ("%s: wrong hold count", __func__)); - old = atomic_fetchadd_int(&vp->v_usecount, 1); + int old = atomic_fetchadd_int(&vp->v_usecount, 1); VNASSERT(old > 0, vp, ("%s: wrong use count", __func__)); #else - refcount_acquire(&vp->v_holdcnt); refcount_acquire(&vp->v_usecount); #endif } @@ -2857,16 +2900,24 @@ ASSERT_VI_UNLOCKED(vp, __func__); VNASSERT(vp->v_holdcnt > 0 && vp->v_usecount > 0, vp, ("%s: wrong ref counts", __func__)); - CTR2(KTR_VFS, "%s: vp %p", __func__, vp); + /* + * It is an invariant that all VOP_* calls operate on a held vnode. + * We may be only having an implicit hold stemming from our usecount, + * which we are about to release. If we unlock the vnode afterwards we + * open a time window where someone else dropped the last usecount and + * proceeded to free the vnode before our unlock finished. For this + * reason we unlock the vnode early. This is a little bit wasteful as + * it may be the vnode is exclusively locked and inactive processing is + * needed, in which case we are adding work. + */ + if (func == VPUTX_VPUT) + VOP_UNLOCK(vp, 0); + if (vp->v_type != VCHR && - refcount_release_if_not_last(&vp->v_usecount)) { - if (func == VPUTX_VPUT) - VOP_UNLOCK(vp, 0); - vdrop(vp); + refcount_release_if_not_last(&vp->v_usecount)) return; - } VI_LOCK(vp); @@ -2875,17 +2926,16 @@ * prevent vgone() races. We drop the use count here and the * hold count below when we're done. */ - if (!refcount_release(&vp->v_usecount) || - (vp->v_iflag & VI_DOINGINACT)) { - if (func == VPUTX_VPUT) - VOP_UNLOCK(vp, 0); - v_decr_devcount(vp); + v_decr_devcount(vp); + if (!refcount_release(&vp->v_usecount)) { + VI_UNLOCK(vp); + return; + } + if (vp->v_iflag & VI_DOINGINACT) { vdropl(vp); return; } - v_decr_devcount(vp); - error = 0; if (vp->v_usecount != 0) { @@ -2904,8 +2954,6 @@ */ if (__predict_false(vp->v_iflag & VI_DOOMED) || VOP_NEED_INACTIVE(vp) == 0) { - if (func == VPUTX_VPUT) - VOP_UNLOCK(vp, 0); vdropl(vp); return; } @@ -2921,11 +2969,8 @@ VI_LOCK(vp); break; case VPUTX_VPUT: - if (VOP_ISLOCKED(vp) != LK_EXCLUSIVE) { - error = VOP_LOCK(vp, LK_UPGRADE | LK_INTERLOCK | - LK_NOWAIT); - VI_LOCK(vp); - } + error = VOP_LOCK(vp, LK_EXCLUSIVE | LK_INTERLOCK | LK_NOWAIT); + VI_LOCK(vp); break; case VPUTX_VUNREF: if (VOP_ISLOCKED(vp) != LK_EXCLUSIVE) { Index: sys/sys/lockmgr.h =================================================================== --- sys/sys/lockmgr.h +++ sys/sys/lockmgr.h @@ -165,6 +165,7 @@ #define LK_TIMELOCK 0x001000 #define LK_NODDLKTREAT 0x002000 #define LK_VNHELD 0x004000 +#define LK_VNUSED 0x008000 /* * Operations for lockmgr(). Index: sys/sys/vnode.h =================================================================== --- sys/sys/vnode.h +++ sys/sys/vnode.h @@ -653,6 +653,8 @@ void _vdrop(struct vnode *, bool); int vflush(struct mount *mp, int rootrefs, int flags, struct thread *td); int vget(struct vnode *vp, int lockflag, struct thread *td); +int vget_prep(struct vnode *vp); +int vget_finish(struct vnode *vp, int lockflag); void vgone(struct vnode *vp); #define vhold(vp) _vhold((vp), 0) #define vholdl(vp) _vhold((vp), 1)