Index: sys/fs/msdosfs/msdosfs_vfsops.c =================================================================== --- sys/fs/msdosfs/msdosfs_vfsops.c +++ sys/fs/msdosfs/msdosfs_vfsops.c @@ -792,7 +792,7 @@ vn_printf(vp, "msdosfs_umount(): just before calling VOP_CLOSE()\n"); printf("freef %p, freeb %p, mount %p\n", - TAILQ_NEXT(vp, v_actfreelist), vp->v_actfreelist.tqe_prev, + TAILQ_NEXT(vp, v_vnodelist), vp->v_vnodelist.tqe_prev, vp->v_mount); printf("cleanblkhd %p, dirtyblkhd %p, numoutput %ld, type %d\n", TAILQ_FIRST(&vp->v_bufobj.bo_clean.bv_hd), Index: sys/fs/nfsserver/nfs_nfsdport.c =================================================================== --- sys/fs/nfsserver/nfs_nfsdport.c +++ sys/fs/nfsserver/nfs_nfsdport.c @@ -3318,7 +3318,6 @@ inited = 1; nfsv4root_mnt.mnt_flag = (MNT_RDONLY | MNT_EXPORTED); TAILQ_INIT(&nfsv4root_mnt.mnt_nvnodelist); - TAILQ_INIT(&nfsv4root_mnt.mnt_activevnodelist); TAILQ_INIT(&nfsv4root_mnt.mnt_dirtyvnodelist); nfsv4root_mnt.mnt_export = NULL; TAILQ_INIT(&nfsv4root_opt); @@ -3326,7 +3325,6 @@ nfsv4root_mnt.mnt_opt = &nfsv4root_opt; nfsv4root_mnt.mnt_optnew = &nfsv4root_newopt; nfsv4root_mnt.mnt_nvnodelistsize = 0; - nfsv4root_mnt.mnt_activevnodelistsize = 0; nfsv4root_mnt.mnt_dirtyvnodelistsize = 0; } Index: sys/fs/tmpfs/tmpfs_vfsops.c =================================================================== --- sys/fs/tmpfs/tmpfs_vfsops.c +++ sys/fs/tmpfs/tmpfs_vfsops.c @@ -101,7 +101,7 @@ /* * Handle updates of time from writes to mmaped regions. Use - * MNT_VNODE_FOREACH_ALL instead of MNT_VNODE_FOREACH_ACTIVE, since + * MNT_VNODE_FOREACH_ALL instead of MNT_VNODE_FOREACH_DIRTY, since * unmap of the tmpfs-backed vnode does not call vinactive(), due to * vm object type is OBJT_SWAP. * If lazy, only handle delayed update of mtime due to the writes to Index: sys/fs/unionfs/union_vnops.c =================================================================== --- sys/fs/unionfs/union_vnops.c +++ sys/fs/unionfs/union_vnops.c @@ -1706,7 +1706,7 @@ error = VOP_GETWRITEMOUNT(uvp, ap->a_mpp); else { VI_LOCK(vp); - if (vp->v_iflag & VI_FREE) + if (vp->v_holdcnt == 0) error = EOPNOTSUPP; else error = EACCES; Index: sys/kern/vfs_mount.c =================================================================== --- sys/kern/vfs_mount.c +++ sys/kern/vfs_mount.c @@ -502,10 +502,6 @@ __rangeof(struct mount, mnt_startzero, mnt_endzero)); TAILQ_INIT(&mp->mnt_nvnodelist); mp->mnt_nvnodelistsize = 0; - TAILQ_INIT(&mp->mnt_activevnodelist); - mp->mnt_activevnodelistsize = 0; - TAILQ_INIT(&mp->mnt_tmpfreevnodelist); - mp->mnt_tmpfreevnodelistsize = 0; TAILQ_INIT(&mp->mnt_dirtyvnodelist); mp->mnt_dirtyvnodelistsize = 0; if (mp->mnt_ref != 0 || mp->mnt_lockref != 0 || @@ -573,10 +569,6 @@ KASSERT(TAILQ_EMPTY(&mp->mnt_uppers), ("mnt_uppers")); if (mp->mnt_nvnodelistsize != 0) panic("vfs_mount_destroy: nonzero nvnodelistsize"); - if (mp->mnt_activevnodelistsize != 0) - panic("vfs_mount_destroy: nonzero activevnodelistsize"); - if (mp->mnt_tmpfreevnodelistsize != 0) - panic("vfs_mount_destroy: nonzero tmpfreevnodelistsize"); if (mp->mnt_dirtyvnodelistsize != 0) panic("vfs_mount_destroy: nonzero dirtyvnodelistsize"); if (mp->mnt_lockref != 0) Index: sys/kern/vfs_subr.c =================================================================== --- sys/kern/vfs_subr.c +++ sys/kern/vfs_subr.c @@ -101,6 +101,8 @@ #include #endif +MALLOC_DECLARE(M_VNODE_MARKER); + static void delmntque(struct vnode *vp); static int flushbuflist(struct bufv *bufv, int flags, struct bufobj *bo, int slpflag, int slptimeo); @@ -114,7 +116,6 @@ static void vfs_knlunlock(void *arg); static void vfs_knl_assert_locked(void *arg); static void vfs_knl_assert_unlocked(void *arg); -static void vnlru_return_batches(struct vfsops *mnt_op); static void destroy_vpollinfo(struct vpollinfo *vi); static int v_inval_buf_range_locked(struct vnode *vp, struct bufobj *bo, daddr_t startlbn, daddr_t endlbn); @@ -148,10 +149,6 @@ SYSCTL_COUNTER_U64(_vfs, OID_AUTO, vnodes_created, CTLFLAG_RD, &vnodes_created, "Number of vnodes created by getnewvnode"); -static u_long mnt_free_list_batch = 128; -SYSCTL_ULONG(_vfs, OID_AUTO, mnt_free_list_batch, CTLFLAG_RW, - &mnt_free_list_batch, 0, "Limit of vnodes held on mnt's free list"); - /* * Conversion tables for conversion from vnode types to inode formats * and back. @@ -166,9 +163,10 @@ }; /* - * List of vnodes that are ready for recycling. + * List of allocates vnodes in the system. */ -static TAILQ_HEAD(freelst, vnode) vnode_free_list; +static TAILQ_HEAD(freelst, vnode) vnode_list; +static struct vnode *vnode_list_marker; /* * "Free" vnode target. Free vnodes are rarely completely free, but are @@ -204,6 +202,14 @@ SYSCTL_COUNTER_U64(_vfs, OID_AUTO, recycles, CTLFLAG_RD, &recycles_count, "Number of vnodes recycled to meet vnode cache targets"); +static counter_u64_t recycles_inspected; +SYSCTL_COUNTER_U64(_vfs, OID_AUTO, recycles_inspected, CTLFLAG_RD, + &recycles_inspected, ""); + +static counter_u64_t recycles_tried; +SYSCTL_COUNTER_U64(_vfs, OID_AUTO, recycles_tried, CTLFLAG_RD, + &recycles_tried, ""); + /* * Various variables used for debugging the new implementation of * reassignbuf(). @@ -222,11 +228,11 @@ /* * Lock for any access to the following: - * vnode_free_list + * vnode_list * numvnodes * freevnodes */ -static struct mtx vnode_free_list_mtx; +static struct mtx vnode_list_mtx; /* Publicly exported FS */ struct nfs_public nfs_pub; @@ -491,6 +497,10 @@ * Initialize rangelocks. */ rangelock_init(&vp->v_rl); + + mtx_lock(&vnode_list_mtx); + TAILQ_INSERT_BEFORE(vnode_list_marker, vp, v_vnodelist); + mtx_unlock(&vnode_list_mtx); return (0); } @@ -504,6 +514,9 @@ struct bufobj *bo; vp = mem; + mtx_lock(&vnode_list_mtx); + TAILQ_REMOVE(&vnode_list, vp, v_vnodelist); + mtx_unlock(&vnode_list_mtx); rangelock_destroy(&vp->v_rl); lockdestroy(vp->v_vnlock); mtx_destroy(&vp->v_interlock); @@ -562,8 +575,13 @@ } wantfreevnodes = desiredvnodes / 4; mtx_init(&mntid_mtx, "mntid", NULL, MTX_DEF); - TAILQ_INIT(&vnode_free_list); - mtx_init(&vnode_free_list_mtx, "vnode_free_list", NULL, MTX_DEF); + TAILQ_INIT(&vnode_list); + mtx_init(&vnode_list_mtx, "vnode_list", NULL, MTX_DEF); + vnode_list_marker = malloc(sizeof(struct vnode), M_VNODE_MARKER, + M_WAITOK | M_ZERO); + vnode_list_marker->v_type = VMARKER; + vnode_list_marker->v_holdcnt = 1; + TAILQ_INSERT_HEAD(&vnode_list, vnode_list_marker, v_vnodelist); vnode_zone = uma_zcreate("VNODE", sizeof (struct vnode), NULL, NULL, vnode_init, vnode_fini, UMA_ALIGN_PTR, 0); vnodepoll_zone = uma_zcreate("VNODEPOLL", sizeof (struct vpollinfo), @@ -581,6 +599,8 @@ vnodes_created = counter_u64_alloc(M_WAITOK); recycles_count = counter_u64_alloc(M_WAITOK); deferred_inact = counter_u64_alloc(M_WAITOK); + recycles_inspected = counter_u64_alloc(M_WAITOK); + recycles_tried = counter_u64_alloc(M_WAITOK); /* * Initialize the filesystem syncer. @@ -1031,7 +1051,7 @@ */ if (vp->v_usecount || (!reclaim_nc_src && !LIST_EMPTY(&vp->v_cache_src)) || - ((vp->v_iflag & VI_FREE) != 0) || + vp->v_holdcnt == 0 || VN_IS_DOOMED(vp) || (vp->v_object != NULL && vp->v_object->resident_page_count > trigger)) { VI_UNLOCK(vp); @@ -1102,36 +1122,20 @@ { struct vnode *vp; struct mount *mp; - bool tried_batches; - tried_batches = false; - mtx_assert(&vnode_free_list_mtx, MA_OWNED); + mtx_assert(&vnode_list_mtx, MA_OWNED); if (count > max_vnlru_free) count = max_vnlru_free; - for (; count > 0; count--) { - vp = TAILQ_FIRST(&vnode_free_list); - /* - * The list can be modified while the free_list_mtx - * has been dropped and vp could be NULL here. - */ - if (vp == NULL) { - if (tried_batches) - break; - mtx_unlock(&vnode_free_list_mtx); - vnlru_return_batches(mnt_op); - tried_batches = true; - mtx_lock(&vnode_free_list_mtx); - continue; +restart: + vp = vnode_list_marker; + while (count > 0) { + counter_u64_add(recycles_inspected, 1); + vp = TAILQ_NEXT(vp, v_vnodelist); + if (__predict_false(vp == NULL)) { + TAILQ_REMOVE(&vnode_list, vnode_list_marker, v_vnodelist); + TAILQ_INSERT_TAIL(&vnode_list, vnode_list_marker, v_vnodelist); + break; } - - VNASSERT(vp->v_op != NULL, vp, - ("vnlru_free: vnode already reclaimed.")); - KASSERT((vp->v_iflag & VI_FREE) != 0, - ("Removing vnode not on freelist")); - KASSERT((vp->v_iflag & VI_ACTIVE) == 0, - ("Mangling active vnode")); - TAILQ_REMOVE(&vnode_free_list, vp, v_actfreelist); - /* * Don't recycle if our vnode is from different type * of mount point. Note that mp is type-safe, the @@ -1140,28 +1144,21 @@ * Don't recycle if we can't get the interlock without * blocking. */ - if ((mnt_op != NULL && (mp = vp->v_mount) != NULL && + if (vp->v_holdcnt > 0 || (mnt_op != NULL && (mp = vp->v_mount) != NULL && mp->mnt_op != mnt_op) || !VI_TRYLOCK(vp)) { - TAILQ_INSERT_TAIL(&vnode_free_list, vp, v_actfreelist); continue; } - VNASSERT((vp->v_iflag & VI_FREE) != 0 && vp->v_holdcnt == 0, - vp, ("vp inconsistent on freelist")); - - /* - * The clear of VI_FREE prevents activation of the - * vnode. There is no sense in putting the vnode on - * the mount point active list, only to remove it - * later during recycling. Inline the relevant part - * of vholdl(), to avoid triggering assertions or - * activating. - */ - freevnodes--; - vp->v_iflag &= ~VI_FREE; - VNODE_REFCOUNT_FENCE_REL(); - refcount_acquire(&vp->v_holdcnt); - - mtx_unlock(&vnode_free_list_mtx); + TAILQ_REMOVE(&vnode_list, vnode_list_marker, v_vnodelist); + TAILQ_INSERT_AFTER(&vnode_list, vp, vnode_list_marker, v_vnodelist); + if (__predict_false(vp->v_type == VMARKER || vp->v_type == VBAD || + vp->v_type == VNON)) { + VI_UNLOCK(vp); + continue; + } + vholdl(vp); + counter_u64_add(recycles_tried, 1); + count--; + mtx_unlock(&vnode_list_mtx); VI_UNLOCK(vp); vtryrecycle(vp); /* @@ -1170,7 +1167,8 @@ * the free list. */ vdrop(vp); - mtx_lock(&vnode_free_list_mtx); + mtx_lock(&vnode_list_mtx); + goto restart; } } @@ -1178,12 +1176,11 @@ vnlru_free(int count, struct vfsops *mnt_op) { - mtx_lock(&vnode_free_list_mtx); + mtx_lock(&vnode_list_mtx); vnlru_free_locked(count, mnt_op); - mtx_unlock(&vnode_free_list_mtx); + mtx_unlock(&vnode_list_mtx); } - /* XXX some names and initialization are bad for limits and watermarks. */ static int vspace(void) @@ -1201,63 +1198,6 @@ return (space); } -static void -vnlru_return_batch_locked(struct mount *mp) -{ - struct vnode *vp; - - mtx_assert(&mp->mnt_listmtx, MA_OWNED); - - if (mp->mnt_tmpfreevnodelistsize == 0) - return; - - TAILQ_FOREACH(vp, &mp->mnt_tmpfreevnodelist, v_actfreelist) { - VNASSERT((vp->v_mflag & VMP_TMPMNTFREELIST) != 0, vp, - ("vnode without VMP_TMPMNTFREELIST on mnt_tmpfreevnodelist")); - vp->v_mflag &= ~VMP_TMPMNTFREELIST; - } - mtx_lock(&vnode_free_list_mtx); - TAILQ_CONCAT(&vnode_free_list, &mp->mnt_tmpfreevnodelist, v_actfreelist); - freevnodes += mp->mnt_tmpfreevnodelistsize; - mtx_unlock(&vnode_free_list_mtx); - mp->mnt_tmpfreevnodelistsize = 0; -} - -static void -vnlru_return_batch(struct mount *mp) -{ - - mtx_lock(&mp->mnt_listmtx); - vnlru_return_batch_locked(mp); - mtx_unlock(&mp->mnt_listmtx); -} - -static void -vnlru_return_batches(struct vfsops *mnt_op) -{ - struct mount *mp, *nmp; - bool need_unbusy; - - mtx_lock(&mountlist_mtx); - for (mp = TAILQ_FIRST(&mountlist); mp != NULL; mp = nmp) { - need_unbusy = false; - if (mnt_op != NULL && mp->mnt_op != mnt_op) - goto next; - if (mp->mnt_tmpfreevnodelistsize == 0) - goto next; - if (vfs_busy(mp, MBF_NOWAIT | MBF_MNTLSTLOCK) == 0) { - vnlru_return_batch(mp); - need_unbusy = true; - mtx_lock(&mountlist_mtx); - } -next: - nmp = TAILQ_NEXT(mp, mnt_list); - if (need_unbusy) - vfs_unbusy(mp); - } - mtx_unlock(&mountlist_mtx); -} - /* * Attempt to recycle vnodes in a context that is always safe to block. * Calling vlrurecycle() from the bowels of filesystem code has some @@ -1280,7 +1220,7 @@ force = 0; for (;;) { kproc_suspend_check(vnlruproc); - mtx_lock(&vnode_free_list_mtx); + mtx_lock(&vnode_list_mtx); /* * If numvnodes is too large (due to desiredvnodes being * adjusted using its sysctl, or emergency growth), first @@ -1303,11 +1243,11 @@ if (vsp >= vlowat && force == 0) { vnlruproc_sig = 0; wakeup(&vnlruproc_sig); - msleep(vnlruproc, &vnode_free_list_mtx, + msleep(vnlruproc, &vnode_list_mtx, PVFS|PDROP, "vlruwt", hz); continue; } - mtx_unlock(&vnode_free_list_mtx); + mtx_unlock(&vnode_list_mtx); done = 0; onumvnodes = numvnodes; /* @@ -1463,7 +1403,7 @@ getnewvnode_wait(int suspended) { - mtx_assert(&vnode_free_list_mtx, MA_OWNED); + mtx_assert(&vnode_list_mtx, MA_OWNED); if (numvnodes >= desiredvnodes) { if (suspended) { /* @@ -1477,7 +1417,7 @@ vnlruproc_sig = 1; /* avoid unnecessary wakeups */ wakeup(vnlruproc); } - msleep(&vnlruproc_sig, &vnode_free_list_mtx, PVFS, + msleep(&vnlruproc_sig, &vnode_list_mtx, PVFS, "vlruwk", hz); } /* Post-adjust like the pre-adjust in getnewvnode(). */ @@ -1497,11 +1437,11 @@ /* Pre-adjust like the pre-adjust in getnewvnode(), with any count. */ /* XXX no longer so quick, but this part is not racy. */ - mtx_lock(&vnode_free_list_mtx); + mtx_lock(&vnode_list_mtx); if (numvnodes + count > desiredvnodes && freevnodes > wantfreevnodes) vnlru_free_locked(ulmin(numvnodes + count - desiredvnodes, freevnodes - wantfreevnodes), NULL); - mtx_unlock(&vnode_free_list_mtx); + mtx_unlock(&vnode_list_mtx); td = curthread; /* First try to be quick and racy. */ @@ -1512,7 +1452,7 @@ } else atomic_subtract_long(&numvnodes, count); - mtx_lock(&vnode_free_list_mtx); + mtx_lock(&vnode_list_mtx); while (count > 0) { if (getnewvnode_wait(0) == 0) { count--; @@ -1521,7 +1461,7 @@ } } vcheckspace(); - mtx_unlock(&vnode_free_list_mtx); + mtx_unlock(&vnode_list_mtx); } /* @@ -1566,7 +1506,7 @@ td->td_vp_reserv -= 1; goto alloc; } - mtx_lock(&vnode_free_list_mtx); + mtx_lock(&vnode_list_mtx); if (numvnodes < desiredvnodes) cyclecount = 0; else if (cyclecount++ >= freevnodes) { @@ -1592,14 +1532,14 @@ MNTK_SUSPEND)); #if 0 /* XXX Not all VFS_VGET/ffs_vget callers check returns. */ if (error != 0) { - mtx_unlock(&vnode_free_list_mtx); + mtx_unlock(&vnode_list_mtx); return (error); } #endif } vcheckspace(); atomic_add_long(&numvnodes, 1); - mtx_unlock(&vnode_free_list_mtx); + mtx_unlock(&vnode_list_mtx); alloc: counter_u64_add(vnodes_created, 1); vp = (struct vnode *) uma_zalloc(vnode_zone, M_WAITOK); @@ -1682,8 +1622,6 @@ CTR2(KTR_VFS, "%s: destroying the vnode %p", __func__, vp); atomic_subtract_long(&numvnodes, 1); bo = &vp->v_bufobj; - VNASSERT((vp->v_iflag & VI_FREE) == 0, vp, - ("cleaned vnode still on the free list.")); VNASSERT(vp->v_data == NULL, vp, ("cleaned vnode isn't")); VNASSERT(vp->v_holdcnt == 0, vp, ("Non-zero hold count")); VNASSERT(vp->v_usecount == 0, vp, ("Non-zero use count")); @@ -1737,16 +1675,6 @@ return; MNT_ILOCK(mp); VI_LOCK(vp); - KASSERT(mp->mnt_activevnodelistsize <= mp->mnt_nvnodelistsize, - ("Active vnode list size %d > Vnode list size %d", - mp->mnt_activevnodelistsize, mp->mnt_nvnodelistsize)); - if (vp->v_iflag & VI_ACTIVE) { - vp->v_iflag &= ~VI_ACTIVE; - mtx_lock(&mp->mnt_listmtx); - TAILQ_REMOVE(&mp->mnt_activevnodelist, vp, v_actfreelist); - mp->mnt_activevnodelistsize--; - mtx_unlock(&mp->mnt_listmtx); - } if (vp->v_mflag & VMP_DIRTYLIST) { mtx_lock(&mp->mnt_listmtx); if (vp->v_mflag & VMP_DIRTYLIST) { @@ -1816,13 +1744,6 @@ VNASSERT(mp->mnt_nvnodelistsize >= 0, vp, ("neg mount point vnode list size")); mp->mnt_nvnodelistsize++; - KASSERT((vp->v_iflag & VI_ACTIVE) == 0, - ("Activating already active vnode")); - vp->v_iflag |= VI_ACTIVE; - mtx_lock(&mp->mnt_listmtx); - TAILQ_INSERT_HEAD(&mp->mnt_activevnodelist, vp, v_actfreelist); - mp->mnt_activevnodelistsize++; - mtx_unlock(&mp->mnt_listmtx); VI_UNLOCK(vp); MNT_IUNLOCK(mp); return (0); @@ -3166,38 +3087,13 @@ static void vhold_activate(struct vnode *vp) { - struct mount *mp; ASSERT_VI_LOCKED(vp, __func__); VNASSERT(vp->v_holdcnt == 0, vp, ("%s: wrong hold count", __func__)); VNASSERT(vp->v_op != NULL, vp, ("%s: vnode already reclaimed.", __func__)); - /* - * Remove a vnode from the free list, mark it as in use, - * and put it on the active list. - */ - VNASSERT(vp->v_mount != NULL, vp, - ("_vhold: vnode not on per mount vnode list")); - mp = vp->v_mount; - mtx_lock(&mp->mnt_listmtx); - if ((vp->v_mflag & VMP_TMPMNTFREELIST) != 0) { - TAILQ_REMOVE(&mp->mnt_tmpfreevnodelist, vp, v_actfreelist); - mp->mnt_tmpfreevnodelistsize--; - vp->v_mflag &= ~VMP_TMPMNTFREELIST; - } else { - mtx_lock(&vnode_free_list_mtx); - TAILQ_REMOVE(&vnode_free_list, vp, v_actfreelist); - freevnodes--; - mtx_unlock(&vnode_free_list_mtx); - } - KASSERT((vp->v_iflag & VI_ACTIVE) == 0, - ("Activating already active vnode")); - vp->v_iflag &= ~VI_FREE; - vp->v_iflag |= VI_ACTIVE; - TAILQ_INSERT_HEAD(&mp->mnt_activevnodelist, vp, v_actfreelist); - mp->mnt_activevnodelistsize++; - mtx_unlock(&mp->mnt_listmtx); + atomic_subtract_long(&freevnodes, 1); refcount_acquire(&vp->v_holdcnt); } @@ -3207,12 +3103,8 @@ ASSERT_VI_UNLOCKED(vp, __func__); CTR2(KTR_VFS, "%s: vp %p", __func__, vp); - if (refcount_acquire_if_not_zero(&vp->v_holdcnt)) { - VNODE_REFCOUNT_FENCE_ACQ(); - VNASSERT((vp->v_iflag & VI_FREE) == 0, vp, - ("vhold: vnode with holdcnt is free")); + if (refcount_acquire_if_not_zero(&vp->v_holdcnt)) return; - } VI_LOCK(vp); vholdl(vp); VI_UNLOCK(vp); @@ -3224,7 +3116,7 @@ ASSERT_VI_LOCKED(vp, __func__); CTR2(KTR_VFS, "%s: vp %p", __func__, vp); - if ((vp->v_iflag & VI_FREE) == 0) { + if (vp->v_holdcnt > 0) { refcount_acquire(&vp->v_holdcnt); return; } @@ -3267,34 +3159,26 @@ ("vdrop: returning doomed vnode")); VNASSERT(vp->v_op != NULL, vp, ("vdrop: vnode already reclaimed.")); - VNASSERT((vp->v_iflag & VI_FREE) == 0, vp, - ("vnode already free")); - VNASSERT((vp->v_iflag & VI_OWEINACT) == 0, vp, - ("vnode with VI_OWEINACT set")); - VNASSERT((vp->v_iflag & VI_DEFERRED_INACTIVE) == 0, vp, - ("vnode with VI_DEFERRED_INACTIVE set")); VNASSERT(vp->v_holdcnt == 0, vp, ("vdrop: freeing when we shouldn't")); - mp = vp->v_mount; - mtx_lock(&mp->mnt_listmtx); + VNASSERT((vp->v_iflag & VI_OWEINACT) == 0, vp, + ("vdrop: vnode with VI_OWEINACT")); + VNASSERT((vp->v_iflag & VI_DEFERRED_INACTIVE) == 0, vp, + ("vdrop: vnode with VI_DEFERRED_INACTIVE")); if (vp->v_mflag & VMP_DIRTYLIST) { + mp = vp->v_mount; + mtx_lock(&mp->mnt_listmtx); vp->v_mflag &= ~VMP_DIRTYLIST; TAILQ_REMOVE(&mp->mnt_dirtyvnodelist, vp, v_dirtylist); mp->mnt_dirtyvnodelistsize--; + mtx_unlock(&mp->mnt_listmtx); } - if (vp->v_iflag & VI_ACTIVE) { - vp->v_iflag &= ~VI_ACTIVE; - TAILQ_REMOVE(&mp->mnt_activevnodelist, vp, v_actfreelist); - mp->mnt_activevnodelistsize--; - } - TAILQ_INSERT_TAIL(&mp->mnt_tmpfreevnodelist, vp, v_actfreelist); - mp->mnt_tmpfreevnodelistsize++; - vp->v_iflag |= VI_FREE; - vp->v_mflag |= VMP_TMPMNTFREELIST; + mtx_lock(&vnode_list_mtx); + TAILQ_REMOVE(&vnode_list, vp, v_vnodelist); + TAILQ_INSERT_TAIL(&vnode_list, vp, v_vnodelist); + mtx_unlock(&vnode_list_mtx); + atomic_add_long(&freevnodes, 1); VI_UNLOCK(vp); - if (mp->mnt_tmpfreevnodelistsize >= mnt_free_list_batch) - vnlru_return_batch_locked(mp); - mtx_unlock(&mp->mnt_listmtx); } void @@ -3843,25 +3727,19 @@ strlcat(buf, "|VI_DEFERRED_INACTIVE", sizeof(buf)); if (vp->v_iflag & VI_MOUNT) strlcat(buf, "|VI_MOUNT", sizeof(buf)); - if (vp->v_iflag & VI_FREE) - strlcat(buf, "|VI_FREE", sizeof(buf)); - if (vp->v_iflag & VI_ACTIVE) - strlcat(buf, "|VI_ACTIVE", sizeof(buf)); if (vp->v_iflag & VI_DOINGINACT) strlcat(buf, "|VI_DOINGINACT", sizeof(buf)); if (vp->v_iflag & VI_OWEINACT) strlcat(buf, "|VI_OWEINACT", sizeof(buf)); flags = vp->v_iflag & ~(VI_TEXT_REF | VI_DEFERRED_INACTIVE | VI_MOUNT | - VI_FREE | VI_ACTIVE | VI_DOINGINACT | VI_OWEINACT); + VI_DOINGINACT | VI_OWEINACT); if (flags != 0) { snprintf(buf2, sizeof(buf2), "|VI(0x%lx)", flags); strlcat(buf, buf2, sizeof(buf)); } - if (vp->v_mflag & VMP_TMPMNTFREELIST) - strlcat(buf, "|VMP_TMPMNTFREELIST", sizeof(buf)); if (vp->v_mflag & VMP_DIRTYLIST) strlcat(buf, "|VMP_DIRTYLIST", sizeof(buf)); - flags = vp->v_mflag & ~(VMP_TMPMNTFREELIST | VMP_DIRTYLIST); + flags = vp->v_mflag & ~(VMP_DIRTYLIST); if (flags != 0) { snprintf(buf2, sizeof(buf2), "|VMP(0x%lx)", flags); strlcat(buf, buf2, sizeof(buf)); @@ -4079,8 +3957,6 @@ vfs_mount_fetch_counter(mp, MNT_COUNT_REF), mp->mnt_ref); db_printf(" mnt_gen = %d\n", mp->mnt_gen); db_printf(" mnt_nvnodelistsize = %d\n", mp->mnt_nvnodelistsize); - db_printf(" mnt_activevnodelistsize = %d\n", - mp->mnt_activevnodelistsize); db_printf(" mnt_dirtyvnodelistsize = %d\n", mp->mnt_dirtyvnodelistsize); db_printf(" mnt_writeopcount = %d (with %d in the struct)\n", @@ -4098,8 +3974,8 @@ db_printf(" mnt_vfs_ops = %d\n", mp->mnt_vfs_ops); db_printf("\n\nList of active vnodes\n"); - TAILQ_FOREACH(vp, &mp->mnt_activevnodelist, v_actfreelist) { - if (vp->v_type != VMARKER) { + TAILQ_FOREACH(vp, &mp->mnt_nvnodelist, v_nmntvnodes) { + if (vp->v_type != VMARKER && vp->v_holdcnt > 0) { vn_printf(vp, "vnode "); if (db_pager_quit) break; @@ -4107,7 +3983,7 @@ } db_printf("\n\nList of inactive vnodes\n"); TAILQ_FOREACH(vp, &mp->mnt_nvnodelist, v_nmntvnodes) { - if (vp->v_type != VMARKER && (vp->v_iflag & VI_ACTIVE) == 0) { + if (vp->v_type != VMARKER && vp->v_holdcnt == 0) { vn_printf(vp, "vnode "); if (db_pager_quit) break; @@ -4737,7 +4613,6 @@ * The filesystem at hand may be idle with free vnodes stored in the * batch. Return them instead of letting them stay there indefinitely. */ - vnlru_return_batch(mp); vfs_periodic(mp, MNT_NOWAIT); error = VFS_SYNC(mp, MNT_LAZY); curthread_pflags_restore(save); @@ -6019,193 +5894,6 @@ *mvp = NULL; } -/* - * These are helper functions for filesystems to traverse their - * active vnodes. See MNT_VNODE_FOREACH_ACTIVE() in sys/mount.h - */ -static void -mnt_vnode_markerfree_active(struct vnode **mvp, struct mount *mp) -{ - - KASSERT((*mvp)->v_mount == mp, ("marker vnode mount list mismatch")); - - MNT_ILOCK(mp); - MNT_REL(mp); - MNT_IUNLOCK(mp); - free(*mvp, M_VNODE_MARKER); - *mvp = NULL; -} - -/* - * Relock the mp mount vnode list lock with the vp vnode interlock in the - * conventional lock order during mnt_vnode_next_active iteration. - * - * On entry, the mount vnode list lock is held and the vnode interlock is not. - * The list lock is dropped and reacquired. On success, both locks are held. - * On failure, the mount vnode list lock is held but the vnode interlock is - * not, and the procedure may have yielded. - */ -static bool -mnt_vnode_next_active_relock(struct vnode *mvp, struct mount *mp, - struct vnode *vp) -{ - const struct vnode *tmp; - bool held, ret; - - VNASSERT(mvp->v_mount == mp && mvp->v_type == VMARKER && - TAILQ_NEXT(mvp, v_actfreelist) != NULL, mvp, - ("%s: bad marker", __func__)); - VNASSERT(vp->v_mount == mp && vp->v_type != VMARKER, vp, - ("%s: inappropriate vnode", __func__)); - ASSERT_VI_UNLOCKED(vp, __func__); - mtx_assert(&mp->mnt_listmtx, MA_OWNED); - - ret = false; - - TAILQ_REMOVE(&mp->mnt_activevnodelist, mvp, v_actfreelist); - TAILQ_INSERT_BEFORE(vp, mvp, v_actfreelist); - - /* - * Use a hold to prevent vp from disappearing while the mount vnode - * list lock is dropped and reacquired. Normally a hold would be - * acquired with vhold(), but that might try to acquire the vnode - * interlock, which would be a LOR with the mount vnode list lock. - */ - held = refcount_acquire_if_not_zero(&vp->v_holdcnt); - mtx_unlock(&mp->mnt_listmtx); - if (!held) - goto abort; - VI_LOCK(vp); - if (!refcount_release_if_not_last(&vp->v_holdcnt)) { - vdropl(vp); - goto abort; - } - mtx_lock(&mp->mnt_listmtx); - - /* - * Determine whether the vnode is still the next one after the marker, - * excepting any other markers. If the vnode has not been doomed by - * vgone() then the hold should have ensured that it remained on the - * active list. If it has been doomed but is still on the active list, - * don't abort, but rather skip over it (avoid spinning on doomed - * vnodes). - */ - tmp = mvp; - do { - tmp = TAILQ_NEXT(tmp, v_actfreelist); - } while (tmp != NULL && tmp->v_type == VMARKER); - if (tmp != vp) { - mtx_unlock(&mp->mnt_listmtx); - VI_UNLOCK(vp); - goto abort; - } - - ret = true; - goto out; -abort: - maybe_yield(); - mtx_lock(&mp->mnt_listmtx); -out: - if (ret) - ASSERT_VI_LOCKED(vp, __func__); - else - ASSERT_VI_UNLOCKED(vp, __func__); - mtx_assert(&mp->mnt_listmtx, MA_OWNED); - return (ret); -} - -static struct vnode * -mnt_vnode_next_active(struct vnode **mvp, struct mount *mp) -{ - struct vnode *vp, *nvp; - - mtx_assert(&mp->mnt_listmtx, MA_OWNED); - KASSERT((*mvp)->v_mount == mp, ("marker vnode mount list mismatch")); -restart: - vp = TAILQ_NEXT(*mvp, v_actfreelist); - while (vp != NULL) { - if (vp->v_type == VMARKER) { - vp = TAILQ_NEXT(vp, v_actfreelist); - continue; - } - /* - * Try-lock because this is the wrong lock order. If that does - * not succeed, drop the mount vnode list lock and try to - * reacquire it and the vnode interlock in the right order. - */ - if (!VI_TRYLOCK(vp) && - !mnt_vnode_next_active_relock(*mvp, mp, vp)) - goto restart; - KASSERT(vp->v_type != VMARKER, ("locked marker %p", vp)); - KASSERT(vp->v_mount == mp || vp->v_mount == NULL, - ("alien vnode on the active list %p %p", vp, mp)); - if (vp->v_mount == mp && !VN_IS_DOOMED(vp)) - break; - nvp = TAILQ_NEXT(vp, v_actfreelist); - VI_UNLOCK(vp); - vp = nvp; - } - TAILQ_REMOVE(&mp->mnt_activevnodelist, *mvp, v_actfreelist); - - /* Check if we are done */ - if (vp == NULL) { - mtx_unlock(&mp->mnt_listmtx); - mnt_vnode_markerfree_active(mvp, mp); - return (NULL); - } - TAILQ_INSERT_AFTER(&mp->mnt_activevnodelist, vp, *mvp, v_actfreelist); - mtx_unlock(&mp->mnt_listmtx); - ASSERT_VI_LOCKED(vp, "active iter"); - KASSERT((vp->v_iflag & VI_ACTIVE) != 0, ("Non-active vp %p", vp)); - return (vp); -} - -struct vnode * -__mnt_vnode_next_active(struct vnode **mvp, struct mount *mp) -{ - - if (should_yield()) - kern_yield(PRI_USER); - mtx_lock(&mp->mnt_listmtx); - return (mnt_vnode_next_active(mvp, mp)); -} - -struct vnode * -__mnt_vnode_first_active(struct vnode **mvp, struct mount *mp) -{ - struct vnode *vp; - - *mvp = malloc(sizeof(struct vnode), M_VNODE_MARKER, M_WAITOK | M_ZERO); - MNT_ILOCK(mp); - MNT_REF(mp); - MNT_IUNLOCK(mp); - (*mvp)->v_type = VMARKER; - (*mvp)->v_mount = mp; - - mtx_lock(&mp->mnt_listmtx); - vp = TAILQ_FIRST(&mp->mnt_activevnodelist); - if (vp == NULL) { - mtx_unlock(&mp->mnt_listmtx); - mnt_vnode_markerfree_active(mvp, mp); - return (NULL); - } - TAILQ_INSERT_BEFORE(vp, *mvp, v_actfreelist); - return (mnt_vnode_next_active(mvp, mp)); -} - -void -__mnt_vnode_markerfree_active(struct vnode **mvp, struct mount *mp) -{ - - if (*mvp == NULL) - return; - - mtx_lock(&mp->mnt_listmtx); - TAILQ_REMOVE(&mp->mnt_activevnodelist, *mvp, v_actfreelist); - mtx_unlock(&mp->mnt_listmtx); - mnt_vnode_markerfree_active(mvp, mp); -} - /* * These are helper functions for filesystems to traverse their * dirty vnodes. See MNT_VNODE_FOREACH_DIRTY() in sys/mount.h @@ -6363,7 +6051,6 @@ TAILQ_INSERT_AFTER(&mp->mnt_dirtyvnodelist, vp, *mvp, v_dirtylist); mtx_unlock(&mp->mnt_listmtx); ASSERT_VI_LOCKED(vp, "dirty iter"); - KASSERT((vp->v_iflag & VI_ACTIVE) != 0, ("Non-active vp %p", vp)); return (vp); } Index: sys/sys/mount.h =================================================================== --- sys/sys/mount.h +++ sys/sys/mount.h @@ -219,10 +219,6 @@ #define mnt_endzero mnt_gjprovider char *mnt_gjprovider; /* gjournal provider name */ struct mtx mnt_listmtx; - struct vnodelst mnt_activevnodelist; /* (l) list of active vnodes */ - int mnt_activevnodelistsize;/* (l) # of active vnodes */ - struct vnodelst mnt_tmpfreevnodelist; /* (l) list of free vnodes */ - int mnt_tmpfreevnodelistsize;/* (l) # of free vnodes */ struct vnodelst mnt_dirtyvnodelist; /* (l) list of dirty vnodes */ int mnt_dirtyvnodelistsize; /* (l) # of dirty vnodes */ struct lock mnt_explock; /* vfs_export walkers lock */ @@ -255,20 +251,6 @@ mtx_assert(MNT_MTX(mp), MA_NOTOWNED); \ } while (0) -/* - * Definitions for MNT_VNODE_FOREACH_ACTIVE. - */ -struct vnode *__mnt_vnode_next_active(struct vnode **mvp, struct mount *mp); -struct vnode *__mnt_vnode_first_active(struct vnode **mvp, struct mount *mp); -void __mnt_vnode_markerfree_active(struct vnode **mvp, struct mount *); - -#define MNT_VNODE_FOREACH_ACTIVE(vp, mp, mvp) \ - for (vp = __mnt_vnode_first_active(&(mvp), (mp)); \ - (vp) != NULL; vp = __mnt_vnode_next_active(&(mvp), (mp))) - -#define MNT_VNODE_FOREACH_ACTIVE_ABORT(mp, mvp) \ - __mnt_vnode_markerfree_active(&(mvp), (mp)) - /* * Definitions for MNT_VNODE_FOREACH_DIRTY. */ Index: sys/sys/vnode.h =================================================================== --- sys/sys/vnode.h +++ sys/sys/vnode.h @@ -147,7 +147,7 @@ /* * The machinery of being a vnode */ - TAILQ_ENTRY(vnode) v_actfreelist; /* l vnode active/free lists */ + TAILQ_ENTRY(vnode) v_vnodelist; /* l vnode lists */ TAILQ_ENTRY(vnode) v_dirtylist; /* l vnode dirty list */ struct bufobj v_bufobj; /* * Buffer cache object */ @@ -241,8 +241,6 @@ #define VI_TEXT_REF 0x0001 /* Text ref grabbed use ref */ #define VI_DEFERRED_INACTIVE 0x0002 /* deferred inactive */ #define VI_MOUNT 0x0020 /* Mount in progress */ -#define VI_FREE 0x0100 /* This vnode is on the freelist */ -#define VI_ACTIVE 0x0200 /* This vnode is on the active list */ #define VI_DOINGINACT 0x0800 /* VOP_INACTIVE is in progress */ #define VI_OWEINACT 0x1000 /* Need to call inactive */ @@ -261,8 +259,7 @@ #define VV_FORCEINSMQ 0x1000 /* force the insmntque to succeed */ #define VV_READLINK 0x2000 /* fdescfs linux vnode */ -#define VMP_TMPMNTFREELIST 0x0001 /* Vnode is on mnt's tmp free list */ -#define VMP_DIRTYLIST 0x0002 /* Vnode is on mnt's dirty free list */ +#define VMP_DIRTYLIST 0x0001 /* Vnode is on mnt's dirty list */ /* * Vnode attributes. A field value of VNOVAL represents a field whose value Index: sys/ufs/ufs/ufs_quota.c =================================================================== --- sys/ufs/ufs/ufs_quota.c +++ sys/ufs/ufs/ufs_quota.c @@ -1083,7 +1083,7 @@ * synchronizing any modified dquot structures. */ again: - MNT_VNODE_FOREACH_ACTIVE(vp, mp, mvp) { + MNT_VNODE_FOREACH_ALL(vp, mp, mvp) { if (vp->v_type == VNON) { VI_UNLOCK(vp); continue; @@ -1091,7 +1091,7 @@ error = vget(vp, LK_EXCLUSIVE | LK_INTERLOCK, td); if (error) { if (error == ENOENT) { - MNT_VNODE_FOREACH_ACTIVE_ABORT(mp, mvp); + MNT_VNODE_FOREACH_ALL_ABORT(mp, mvp); goto again; } continue;