Index: sys/kern/vfs_subr.c =================================================================== --- sys/kern/vfs_subr.c +++ sys/kern/vfs_subr.c @@ -114,6 +114,7 @@ static void vfs_knlunlock(void *arg); static void vfs_knl_assert_locked(void *arg); static void vfs_knl_assert_unlocked(void *arg); +static void vfs_return_deferred(struct mount *mp); static void vnlru_return_batches(struct vfsops *mnt_op); static void destroy_vpollinfo(struct vpollinfo *vi); static int v_inval_buf_range_locked(struct vnode *vp, struct bufobj *bo, @@ -1104,9 +1105,9 @@ { struct vnode *vp; struct mount *mp; - bool tried_batches; + bool tried_batches, tried_deferred; - tried_batches = false; + tried_batches = tried_deferred = false; mtx_assert(&vnode_free_list_mtx, MA_OWNED); if (count > max_vnlru_free) count = max_vnlru_free; @@ -1117,9 +1118,15 @@ * has been dropped and vp could be NULL here. */ if (vp == NULL) { - if (tried_batches) - break; - mtx_unlock(&vnode_free_list_mtx); + if (tried_batches) { + if (tried_deferred) + break; + mtx_unlock(&vnode_free_list_mtx); + vfs_return_deferred(mp); + tried_deferred = true; + } else { + mtx_unlock(&vnode_free_list_mtx); + } vnlru_return_batches(mnt_op); tried_batches = true; mtx_lock(&vnode_free_list_mtx); @@ -2970,7 +2977,7 @@ * the count back to > 0. */ if (vp->v_usecount > 0) { - vdropl(vp); + vdrop_deferred(vp); return; } if (vp->v_iflag & VI_DOINGINACT) { @@ -2994,12 +3001,16 @@ * Since vgone performs inactive on its own there is nothing to do * here but to drop our hold count. */ - if (__predict_false(vp->v_iflag & VI_DOOMED) || - VOP_NEED_INACTIVE(vp) == 0) { + if (__predict_false(vp->v_iflag & VI_DOOMED)) { vdropl(vp); return; } + if (__predict_true(VOP_NEED_INACTIVE(vp) == 0)) { + vdrop_deferred(vp); + return; + } + /* * We must call VOP_INACTIVE with the node locked. Mark * as VI_DOINGINACT to avoid recursion. @@ -3140,6 +3151,29 @@ #endif } +static void +_vdrop_return_vnode(struct mount *mp, struct vnode *vp) +{ + + ASSERT_VI_LOCKED(vp, __func__); + MPASS(vp->v_mount == mp); + mtx_assert(&mp->mnt_listmtx, MA_OWNED); + + if (vp->v_iflag & VI_ACTIVE) { + vp->v_iflag &= ~VI_ACTIVE; + TAILQ_REMOVE(&mp->mnt_activevnodelist, vp, v_actfreelist); + mp->mnt_activevnodelistsize--; + } + TAILQ_INSERT_TAIL(&mp->mnt_tmpfreevnodelist, vp, v_actfreelist); + mp->mnt_tmpfreevnodelistsize++; + vp->v_iflag |= VI_FREE; + vp->v_mflag |= VMP_TMPMNTFREELIST; + VI_UNLOCK(vp); + if (mp->mnt_tmpfreevnodelistsize >= mnt_free_list_batch) + vnlru_return_batch_locked(mp); + mtx_unlock(&mp->mnt_listmtx); +} + /* * Drop the hold count of the vnode. If this is the last reference to * the vnode we place it on the free list unless it has been vgone'd @@ -3188,22 +3222,7 @@ mp = vp->v_mount; if (mp != NULL) { mtx_lock(&mp->mnt_listmtx); - if (vp->v_iflag & VI_ACTIVE) { - vp->v_iflag &= ~VI_ACTIVE; - TAILQ_REMOVE(&mp->mnt_activevnodelist, - vp, v_actfreelist); - mp->mnt_activevnodelistsize--; - } - TAILQ_INSERT_TAIL(&mp->mnt_tmpfreevnodelist, - vp, v_actfreelist); - mp->mnt_tmpfreevnodelistsize++; - vp->v_iflag |= VI_FREE; - vp->v_mflag |= VMP_TMPMNTFREELIST; - VI_UNLOCK(vp); - if (mp->mnt_tmpfreevnodelistsize >= - mnt_free_list_batch) - vnlru_return_batch_locked(mp); - mtx_unlock(&mp->mnt_listmtx); + _vdrop_return_vnode(mp, vp); } else { VNASSERT((vp->v_iflag & VI_ACTIVE) == 0, vp, ("vdropl: active vnode not on per mount " @@ -3275,6 +3294,48 @@ uma_zfree(vnode_zone, vp); } +/* + * Defer releasing the last hold count if the mount lock is contended. + * Should releasing fail, VI_DEFERRED is set on the vnode to delegate + * the task to the syncer. + */ +void +vdrop_deferred(struct vnode *vp) +{ + struct mount *mp; + + ASSERT_VI_LOCKED(vp, __func__); + if (refcount_release_if_not_last(&vp->v_holdcnt)) { + VI_UNLOCK(vp); + return; + } + VNASSERT((vp->v_iflag & VI_DEFERRED) == 0, vp, + ("%s: VI_DEFERRED already set but holdcnt is 1", __func__)); + + /* + * We only deal with the simplest case: neither the vnode + * nor the filesystem are going away. + */ + mp = vp->v_mount; + if (vp->v_iflag & (VI_DOOMED | VI_OWEINACT) || vp->v_type == VCHR || + mp == NULL || mp->mnt_kern_flag & MNTK_UNMOUNT) { + vdropl(vp); + return; + } + if (!mtx_trylock(&mp->mnt_listmtx)) { + vp->v_iflag |= VI_DEFERRED; + VI_UNLOCK(vp); + return; + } + if (refcount_release(&vp->v_holdcnt) == 0) { + mtx_unlock(&mp->mnt_listmtx); + VI_UNLOCK(vp); + return; + } + + _vdrop_return_vnode(mp, vp); +} + /* * Call VOP_INACTIVE on the vnode and manage the DOINGINACT and OWEINACT * flags. DOINGINACT prevents us from recursing in calls to vinactive. @@ -3594,7 +3655,12 @@ */ active = vp->v_usecount > 0; oweinact = (vp->v_iflag & VI_OWEINACT) != 0; - VI_UNLOCK(vp); + if (vp->v_iflag & VI_DEFERRED) { + vp->v_iflag &= ~VI_DEFERRED; + vdropl(vp); + } else { + VI_UNLOCK(vp); + } vfs_notify_upper(vp, VFS_NOTIFY_UPPER_RECLAIM); /* @@ -4341,6 +4407,9 @@ struct vm_object *obj; int flags = *(int *)arg; + if (vp->v_iflag & VI_DEFERRED) + return (true); + obj = vp->v_object; return (obj != NULL && vm_object_mightbedirty(obj) && (flags == MNT_WAIT || VOP_ISLOCKED(vp) == 0)); @@ -4352,35 +4421,70 @@ struct vnode *vp, *mvp; struct vm_object *obj; struct thread *td; + bool want_vdrop; CTR2(KTR_VFS, "%s: mp %p", __func__, mp); - if ((mp->mnt_kern_flag & MNTK_NOMSYNC) != 0) - return; - td = curthread; MNT_VNODE_FOREACH_ACTIVE_FILTER(vp, mp, mvp, vfs_msync_filter, &flags) { + want_vdrop = false; + if (vp->v_iflag & VI_DEFERRED) { + vp->v_iflag &= ~VI_DEFERRED; + want_vdrop = true; + } + obj = vp->v_object; - if (obj != NULL && vm_object_mightbedirty(obj) && - (flags == MNT_WAIT || VOP_ISLOCKED(vp) == 0)) { - if (vget(vp, LK_EXCLUSIVE | LK_INTERLOCK, td) == 0) { - if (vp->v_vflag & VV_NOSYNC) { /* unlinked */ - vput(vp); - continue; - } + if (!(obj != NULL && vm_object_mightbedirty(obj) && + (flags == MNT_WAIT || VOP_ISLOCKED(vp) == 0))) { + if (want_vdrop) + vdropl(vp); + else + VI_UNLOCK(vp); + continue; + } - obj = vp->v_object; - if (obj != NULL) { - VM_OBJECT_WLOCK(obj); - vm_object_page_clean(obj, 0, 0, - flags == MNT_WAIT ? - OBJPC_SYNC : OBJPC_NOSYNC); - VM_OBJECT_WUNLOCK(obj); - } + if (vget(vp, LK_EXCLUSIVE | LK_INTERLOCK, td) == 0) { + if (vp->v_vflag & VV_NOSYNC) { /* unlinked */ vput(vp); + if (want_vdrop) + vdrop(vp); + continue; } - } else + + obj = vp->v_object; + if (obj != NULL) { + VM_OBJECT_WLOCK(obj); + vm_object_page_clean(obj, 0, 0, + flags == MNT_WAIT ? + OBJPC_SYNC : OBJPC_NOSYNC); + VM_OBJECT_WUNLOCK(obj); + } + vput(vp); + } + if (want_vdrop) + vdrop(vp); + } +} + +static bool +vfs_deferred_filter(struct vnode *vp, void *arg __unused) +{ + + return (vp->v_iflag & VI_DEFERRED); +} + +static void +vfs_return_deferred(struct mount *mp) +{ + struct vnode *vp, *mvp; + + MNT_VNODE_FOREACH_ACTIVE_FILTER(vp, mp, mvp, vfs_deferred_filter, NULL) { + if (vp->v_iflag & VI_DEFERRED) { + vp->v_iflag &= ~VI_DEFERRED; + vdropl(vp); + } else { VI_UNLOCK(vp); + } } } @@ -4592,8 +4696,11 @@ * The filesystem at hand may be idle with free vnodes stored in the * batch. Return them instead of letting them stay there indefinitely. */ + if ((mp->mnt_kern_flag & MNTK_NOMSYNC) != 0) + vfs_return_deferred(mp); + else + vfs_msync(mp, MNT_NOWAIT); vnlru_return_batch(mp); - vfs_msync(mp, MNT_NOWAIT); error = VFS_SYNC(mp, MNT_LAZY); curthread_pflags_restore(save); vn_finished_write(mp); Index: sys/sys/vnode.h =================================================================== --- sys/sys/vnode.h +++ sys/sys/vnode.h @@ -241,6 +241,7 @@ #define VI_ACTIVE 0x0200 /* This vnode is on the active list */ #define VI_DOINGINACT 0x0800 /* VOP_INACTIVE is in progress */ #define VI_OWEINACT 0x1000 /* Need to call inactive */ +#define VI_DEFERRED 0x2000 /* has an additional hold count */ #define VV_ROOT 0x0001 /* root of its filesystem */ #define VV_ISTTY 0x0002 /* vnode represents a tty */ @@ -654,6 +655,7 @@ #define vdrop(vp) _vdrop((vp), 0) #define vdropl(vp) _vdrop((vp), 1) void _vdrop(struct vnode *, bool); +void vdrop_deferred(struct vnode *); int vflush(struct mount *mp, int rootrefs, int flags, struct thread *td); int vget(struct vnode *vp, int flags, struct thread *td); enum vgetstate vget_prep(struct vnode *vp);