Index: sys/fs/nullfs/null_vnops.c =================================================================== --- sys/fs/nullfs/null_vnops.c +++ sys/fs/nullfs/null_vnops.c @@ -844,18 +844,24 @@ struct null_node *xp; struct vnode *lowervp; struct vnode *vp; + int error; vp = ap->a_vp; - VI_LOCK(vp); - xp = VTONULL(vp); - if (xp && (lowervp = xp->null_lowervp)) { - vholdnz(lowervp); - VI_UNLOCK(vp); - VOP_GETWRITEMOUNT(lowervp, ap->a_mpp); - vdrop(lowervp); + *(ap->a_mpp) = NULL; + if ((ap->a_flags & V_NONBLOCKING) != 0) { + xp = VTONULL(vp); + if (xp && (lowervp = xp->null_lowervp)) + error = VOP_GETWRITEMOUNT(lowervp, ap->a_mpp, ap->a_flags); } else { - VI_UNLOCK(vp); - *(ap->a_mpp) = NULL; + VI_LOCK(vp); + xp = VTONULL(vp); + if (xp && (lowervp = xp->null_lowervp)) { + vholdnz(lowervp); + VI_UNLOCK(vp); + error = VOP_GETWRITEMOUNT(lowervp, ap->a_mpp, ap->a_flags); + vdrop(lowervp); + } else + VI_UNLOCK(vp); } return (0); } Index: sys/fs/unionfs/union_vnops.c =================================================================== --- sys/fs/unionfs/union_vnops.c +++ sys/fs/unionfs/union_vnops.c @@ -1698,12 +1698,24 @@ KASSERT_UNIONFS_VNODE(vp); + if ((ap->a_flags & V_NONBLOCKING) != 0) { + uvp = UNIONFSVPTOUPPERVP(vp); + if (uvp == NULLVP && VREG == vp->v_type) + uvp = UNIONFSVPTOUPPERVP(VTOUNIONFS(vp)->un_dvp); + if (uvp == NULLVP) { + error = EAGAIN; + goto out; + } + error = VOP_GETWRITEMOUNT(uvp, ap->a_mpp, ap->a_flags); + goto out; + } + uvp = UNIONFSVPTOUPPERVP(vp); if (uvp == NULLVP && VREG == vp->v_type) uvp = UNIONFSVPTOUPPERVP(VTOUNIONFS(vp)->un_dvp); if (uvp != NULLVP) - error = VOP_GETWRITEMOUNT(uvp, ap->a_mpp); + error = VOP_GETWRITEMOUNT(uvp, ap->a_mpp, ap->a_flags); else { VI_LOCK(vp); if (vp->v_iflag & VI_FREE) @@ -1712,7 +1724,7 @@ error = EACCES; VI_UNLOCK(vp); } - +out: UNIONFS_INTERNAL_DEBUG("unionfs_getwritemount: leave (%d)\n", error); return (error); Index: sys/kern/vfs_default.c =================================================================== --- sys/kern/vfs_default.c +++ sys/kern/vfs_default.c @@ -612,13 +612,17 @@ mp = NULL; vfs_op_thread_exit(mp); } else { - MNT_ILOCK(mp); - if (mp == vp->v_mount) { - MNT_REF(mp); - MNT_IUNLOCK(mp); - } else { - MNT_IUNLOCK(mp); + if ((ap->a_flags & V_NONBLOCKING) != 0) mp = NULL; + else { + MNT_ILOCK(mp); + if (mp == vp->v_mount) { + MNT_REF(mp); + MNT_IUNLOCK(mp); + } else { + MNT_IUNLOCK(mp); + mp = NULL; + } } } *(ap->a_mpp) = mp; Index: sys/kern/vfs_subr.c =================================================================== --- sys/kern/vfs_subr.c +++ sys/kern/vfs_subr.c @@ -1159,19 +1159,11 @@ * activating. */ freevnodes--; - vp->v_iflag &= ~VI_FREE; - VNODE_REFCOUNT_FENCE_REL(); - refcount_acquire(&vp->v_holdcnt); - mtx_unlock(&vnode_free_list_mtx); - VI_UNLOCK(vp); vtryrecycle(vp); /* - * If the recycled succeeded this vdrop will actually free - * the vnode. If not it will simply place it back on - * the free list. + * If recycle failed the vnode is at the tail or in the batch. */ - vdrop(vp); mtx_lock(&vnode_free_list_mtx); } } @@ -1394,54 +1386,70 @@ vtryrecycle(struct vnode *vp) { struct mount *vnmp; + int error; CTR2(KTR_VFS, "%s: vp %p", __func__, vp); - VNASSERT(vp->v_holdcnt, vp, - ("vtryrecycle: Recycling vp %p without a reference.", vp)); + + ASSERT_VI_LOCKED(vp, __func__); + + VNASSERT((vp->v_iflag & VI_DOOMED) == 0, vp, + ("%s: vnode already doomed", __func__)); + VNASSERT(vp->v_holdcnt == 0 && vp->v_usecount == 0, vp, + ("%s: vnode has non-zero counts", __func__)); + /* * This vnode may found and locked via some other list, if so we * can't recycle it yet. */ - if (VOP_LOCK(vp, LK_EXCLUSIVE | LK_NOWAIT) != 0) { + if (VOP_LOCK(vp, LK_EXCLUSIVE | LK_NOWAIT | LK_IGNORE_INTERLOCK) != 0) { CTR2(KTR_VFS, "%s: impossible to recycle, vp %p lock is already held", __func__, vp); - return (EWOULDBLOCK); + error = EWOULDBLOCK; + goto out_no_recycle; } /* * Don't recycle if its filesystem is being suspended. */ - if (vn_start_write(vp, &vnmp, V_NOWAIT) != 0) { + if (vn_start_write(vp, &vnmp, V_NONBLOCKING) != 0) { VOP_UNLOCK(vp, 0); CTR2(KTR_VFS, "%s: impossible to recycle, cannot start the write for %p", __func__, vp); - return (EBUSY); + error = EBUSY; + goto out_no_recycle; } + /* - * If we got this far, we need to acquire the interlock and see if - * anyone picked up this vnode from another list. If not, we will - * mark it with DOOMED via vgonel() so that anyone who does find it - * will skip over it. + * Fake enough of hold count for vgone et al to pass. While this allows + * other threads to bump it with atomics they will see a VI_DOOMED + * vnode by the time they get the interlock. */ - VI_LOCK(vp); - if (vp->v_usecount) { - VOP_UNLOCK(vp, 0); - VI_UNLOCK(vp); - vn_finished_write(vnmp); - CTR2(KTR_VFS, - "%s: impossible to recycle, %p is already referenced", - __func__, vp); - return (EBUSY); - } - if ((vp->v_iflag & VI_DOOMED) == 0) { - counter_u64_add(recycles_count, 1); - vgonel(vp); - } + vp->v_iflag &= ~VI_FREE; + VNODE_REFCOUNT_FENCE_REL(); + refcount_acquire(&vp->v_holdcnt); + + vgonel(vp); + VOP_UNLOCK(vp, 0); - VI_UNLOCK(vp); + vdropl(vp); + vn_finished_write(vnmp); + + counter_u64_add(recycles_count, 1); return (0); +out_no_recycle: + /* + * Put the vnode back on the free list. + */ + mtx_lock(&vnode_free_list_mtx); + TAILQ_INSERT_TAIL(&vnode_free_list, vp, v_actfreelist); + freevnodes++; + mtx_unlock(&vnode_free_list_mtx); + VI_UNLOCK(vp); + if (error == EBUSY && vnmp != NULL) + vfs_rel(vnmp); + return (error); } static void @@ -5035,6 +5043,12 @@ { struct vop_lock1_args *a = ap; + if ((a->a_flags & LK_IGNORE_INTERLOCK) != 0) { + KASSERT(((a->a_flags & LK_NOWAIT) != 0), + ("passing LK_IGNORE_INTERLOCK requires LK_NOWAIT")); + return; + } + if ((a->a_flags & LK_INTERLOCK) == 0) ASSERT_VI_UNLOCKED(a->a_vp, "VOP_LOCK"); else @@ -5046,7 +5060,8 @@ { struct vop_lock1_args *a = ap; - ASSERT_VI_UNLOCKED(a->a_vp, "VOP_LOCK"); + if ((a->a_flags & LK_IGNORE_INTERLOCK) == 0) + ASSERT_VI_UNLOCKED(a->a_vp, "VOP_LOCK"); if (rc == 0 && (a->a_flags & LK_EXCLOTHER) == 0) ASSERT_VOP_LOCKED(a->a_vp, "VOP_LOCK"); } Index: sys/kern/vfs_vnops.c =================================================================== --- sys/kern/vfs_vnops.c +++ sys/kern/vfs_vnops.c @@ -1668,6 +1668,40 @@ return (error); } +/* + * The routine guarantees to not take any locks. Howerver, since it performs + * two separate operations internally it may still fail and require cleanup + * (if the second op fails, it is possible that reverting the first one would + * require locks). + * Callers are expected to handle it as follows: + * error == 0 indicates success and requires a matching vn_finished_write. + * error !=0 && *mpp != NULL require matching vfs_rel. + * error !=0 && *mpp == NULL have nothing to clean up. + */ +static int +vn_start_write_nb(struct vnode *vp, struct mount **mpp) +{ + struct mount *mp; + int error; + + if ((error = VOP_GETWRITEMOUNT(vp, mpp, V_NONBLOCKING)) != 0) + *mpp = NULL; + if ((mp = *mpp) == NULL) + return (error); + + if (!vn_suspendable(mp)) + return (0); + + if (vfs_op_thread_enter(mp)) { + MPASS((mp->mnt_kern_flag & MNTK_SUSPEND) == 0); + vfs_mp_count_add_pcpu(mp, writeopcount, 1); + vfs_op_thread_exit(mp); + return (0); + } + + return (EAGAIN); +} + int vn_start_write(struct vnode *vp, struct mount **mpp, int flags) { @@ -1677,13 +1711,18 @@ KASSERT((flags & V_MNTREF) == 0 || (*mpp != NULL && vp == NULL), ("V_MNTREF requires mp")); - error = 0; + if (__predict_false((flags & V_NONBLOCKING) != 0)) { + KASSERT(((flags & ~V_NONBLOCKING) == 0), + ("incompatible or unhandled flags passed")); + return (vn_start_write_nb(vp, mpp)); + } + /* * If a vnode is provided, get and return the mount point that * to which it will write. */ if (vp != NULL) { - if ((error = VOP_GETWRITEMOUNT(vp, mpp)) != 0) { + if ((error = VOP_GETWRITEMOUNT(vp, mpp, 0)) != 0) { *mpp = NULL; if (error != EOPNOTSUPP) return (error); @@ -1693,12 +1732,6 @@ if ((mp = *mpp) == NULL) return (0); - if (!vn_suspendable(mp)) { - if (vp != NULL || (flags & V_MNTREF) != 0) - vfs_rel(mp); - return (0); - } - /* * VOP_GETWRITEMOUNT() returns with the mp refcount held through * a vfs_ref(). @@ -1709,6 +1742,9 @@ if (vp == NULL && (flags & V_MNTREF) == 0) vfs_ref(mp); + if (!vn_suspendable(mp)) + return (0); + return (vn_start_write_refed(mp, flags, false)); } @@ -1730,7 +1766,7 @@ retry: if (vp != NULL) { - if ((error = VOP_GETWRITEMOUNT(vp, mpp)) != 0) { + if ((error = VOP_GETWRITEMOUNT(vp, mpp, 0)) != 0) { *mpp = NULL; if (error != EOPNOTSUPP) return (error); @@ -1793,9 +1829,14 @@ { int c; - if (mp == NULL || !vn_suspendable(mp)) + if (mp == NULL) return; + if (!vn_suspendable(mp)) { + vfs_rel(mp); + return; + } + if (vfs_op_thread_enter(mp)) { vfs_mp_count_sub_pcpu(mp, writeopcount, 1); vfs_mp_count_sub_pcpu(mp, ref, 1); Index: sys/kern/vnode_if.src =================================================================== --- sys/kern/vnode_if.src +++ sys/kern/vnode_if.src @@ -415,11 +415,12 @@ }; -%% getwritemount vp = = = +%% getwritemount vp - - - vop_getwritemount { IN struct vnode *vp; OUT struct mount **mpp; + IN int flags; }; Index: sys/sys/lockmgr.h =================================================================== --- sys/sys/lockmgr.h +++ sys/sys/lockmgr.h @@ -164,6 +164,7 @@ #define LK_SLEEPFAIL 0x000800 #define LK_TIMELOCK 0x001000 #define LK_NODDLKTREAT 0x002000 +#define LK_IGNORE_INTERLOCK 0x004000 /* * Operations for lockmgr(). Index: sys/sys/vnode.h =================================================================== --- sys/sys/vnode.h +++ sys/sys/vnode.h @@ -409,6 +409,7 @@ #define V_NOWAIT 0x0002 /* vn_start_write: don't sleep for suspend */ #define V_XSLEEP 0x0004 /* vn_start_write: just return after sleep */ #define V_MNTREF 0x0010 /* vn_start_write: mp is already ref-ed */ +#define V_NONBLOCKING 0x0020 /* don't take any blocking locks */ #define VR_START_WRITE 0x0001 /* vfs_write_resume: start write atomically */ #define VR_NO_SUSPCLR 0x0002 /* vfs_write_resume: do not clear suspension */ Index: sys/ufs/ffs/ffs_snapshot.c =================================================================== --- sys/ufs/ffs/ffs_snapshot.c +++ sys/ufs/ffs/ffs_snapshot.c @@ -282,7 +282,7 @@ vat.va_type = VREG; vat.va_mode = S_IRUSR; vat.va_vaflags |= VA_EXCLUSIVE; - if (VOP_GETWRITEMOUNT(nd.ni_dvp, &wrtmp)) + if (VOP_GETWRITEMOUNT(nd.ni_dvp, &wrtmp, 0)) wrtmp = NULL; if (wrtmp != mp) panic("ffs_snapshot: mount mismatch"); Index: sys/ufs/ffs/ffs_vnops.c =================================================================== --- sys/ufs/ffs/ffs_vnops.c +++ sys/ufs/ffs/ffs_vnops.c @@ -421,10 +421,6 @@ vp = ap->a_vp; flags = ap->a_flags; for (;;) { -#ifdef DEBUG_VFS_LOCKS - KASSERT(vp->v_holdcnt != 0, - ("ffs_lock %p: zero hold count", vp)); -#endif lkp = vp->v_vnlock; result = _lockmgr_args(lkp, flags, VI_MTX(vp), LK_WMESG_DEFAULT, LK_PRIO_DEFAULT, LK_TIMO_DEFAULT,