Index: sys/fs/msdosfs/msdosfs_vfsops.c =================================================================== --- sys/fs/msdosfs/msdosfs_vfsops.c +++ sys/fs/msdosfs/msdosfs_vfsops.c @@ -792,7 +792,7 @@ vn_printf(vp, "msdosfs_umount(): just before calling VOP_CLOSE()\n"); printf("freef %p, freeb %p, mount %p\n", - TAILQ_NEXT(vp, v_actfreelist), vp->v_actfreelist.tqe_prev, + TAILQ_NEXT(vp, v_vnodelist), vp->v_vnodelist.tqe_prev, vp->v_mount); printf("cleanblkhd %p, dirtyblkhd %p, numoutput %ld, type %d\n", TAILQ_FIRST(&vp->v_bufobj.bo_clean.bv_hd), Index: sys/fs/nfsserver/nfs_nfsdport.c =================================================================== --- sys/fs/nfsserver/nfs_nfsdport.c +++ sys/fs/nfsserver/nfs_nfsdport.c @@ -3317,7 +3317,6 @@ inited = 1; nfsv4root_mnt.mnt_flag = (MNT_RDONLY | MNT_EXPORTED); TAILQ_INIT(&nfsv4root_mnt.mnt_nvnodelist); - TAILQ_INIT(&nfsv4root_mnt.mnt_activevnodelist); TAILQ_INIT(&nfsv4root_mnt.mnt_lazyvnodelist); nfsv4root_mnt.mnt_export = NULL; TAILQ_INIT(&nfsv4root_opt); @@ -3325,7 +3324,6 @@ nfsv4root_mnt.mnt_opt = &nfsv4root_opt; nfsv4root_mnt.mnt_optnew = &nfsv4root_newopt; nfsv4root_mnt.mnt_nvnodelistsize = 0; - nfsv4root_mnt.mnt_activevnodelistsize = 0; nfsv4root_mnt.mnt_lazyvnodelistsize = 0; } Index: sys/fs/tmpfs/tmpfs_vfsops.c =================================================================== --- sys/fs/tmpfs/tmpfs_vfsops.c +++ sys/fs/tmpfs/tmpfs_vfsops.c @@ -101,7 +101,7 @@ /* * Handle updates of time from writes to mmaped regions. Use - * MNT_VNODE_FOREACH_ALL instead of MNT_VNODE_FOREACH_ACTIVE, since + * MNT_VNODE_FOREACH_ALL instead of MNT_VNODE_FOREACH_LAZY, since * unmap of the tmpfs-backed vnode does not call vinactive(), due to * vm object type is OBJT_SWAP. * If lazy, only handle delayed update of mtime due to the writes to Index: sys/fs/unionfs/union_vnops.c =================================================================== --- sys/fs/unionfs/union_vnops.c +++ sys/fs/unionfs/union_vnops.c @@ -1706,7 +1706,7 @@ error = VOP_GETWRITEMOUNT(uvp, ap->a_mpp); else { VI_LOCK(vp); - if (vp->v_iflag & VI_FREE) + if (vp->v_holdcnt == 0) error = EOPNOTSUPP; else error = EACCES; Index: sys/kern/vfs_mount.c =================================================================== --- sys/kern/vfs_mount.c +++ sys/kern/vfs_mount.c @@ -502,10 +502,6 @@ __rangeof(struct mount, mnt_startzero, mnt_endzero)); TAILQ_INIT(&mp->mnt_nvnodelist); mp->mnt_nvnodelistsize = 0; - TAILQ_INIT(&mp->mnt_activevnodelist); - mp->mnt_activevnodelistsize = 0; - TAILQ_INIT(&mp->mnt_tmpfreevnodelist); - mp->mnt_tmpfreevnodelistsize = 0; TAILQ_INIT(&mp->mnt_lazyvnodelist); mp->mnt_lazyvnodelistsize = 0; if (mp->mnt_ref != 0 || mp->mnt_lockref != 0 || @@ -573,10 +569,6 @@ KASSERT(TAILQ_EMPTY(&mp->mnt_uppers), ("mnt_uppers")); if (mp->mnt_nvnodelistsize != 0) panic("vfs_mount_destroy: nonzero nvnodelistsize"); - if (mp->mnt_activevnodelistsize != 0) - panic("vfs_mount_destroy: nonzero activevnodelistsize"); - if (mp->mnt_tmpfreevnodelistsize != 0) - panic("vfs_mount_destroy: nonzero tmpfreevnodelistsize"); if (mp->mnt_lazyvnodelistsize != 0) panic("vfs_mount_destroy: nonzero lazyvnodelistsize"); if (mp->mnt_lockref != 0) Index: sys/kern/vfs_subr.c =================================================================== --- sys/kern/vfs_subr.c +++ sys/kern/vfs_subr.c @@ -114,7 +114,6 @@ static void vfs_knlunlock(void *arg); static void vfs_knl_assert_locked(void *arg); static void vfs_knl_assert_unlocked(void *arg); -static void vnlru_return_batches(struct vfsops *mnt_op); static void destroy_vpollinfo(struct vpollinfo *vi); static int v_inval_buf_range_locked(struct vnode *vp, struct bufobj *bo, daddr_t startlbn, daddr_t endlbn); @@ -148,10 +147,6 @@ SYSCTL_COUNTER_U64(_vfs, OID_AUTO, vnodes_created, CTLFLAG_RD, &vnodes_created, "Number of vnodes created by getnewvnode"); -static u_long mnt_free_list_batch = 128; -SYSCTL_ULONG(_vfs, OID_AUTO, mnt_free_list_batch, CTLFLAG_RW, - &mnt_free_list_batch, 0, "Limit of vnodes held on mnt's free list"); - /* * Conversion tables for conversion from vnode types to inode formats * and back. @@ -166,9 +161,10 @@ }; /* - * List of vnodes that are ready for recycling. + * List of allocates vnodes in the system. */ -static TAILQ_HEAD(freelst, vnode) vnode_free_list; +static TAILQ_HEAD(freelst, vnode) vnode_list; +static struct vnode *vnode_list_free_marker; /* * "Free" vnode target. Free vnodes are rarely completely free, but are @@ -196,7 +192,7 @@ static u_long wantfreevnodes; SYSCTL_ULONG(_vfs, OID_AUTO, wantfreevnodes, CTLFLAG_RW, &wantfreevnodes, 0, "Target for minimum number of \"free\" vnodes"); -static u_long freevnodes; +static u_long __exclusive_cache_line freevnodes; SYSCTL_ULONG(_vfs, OID_AUTO, freevnodes, CTLFLAG_RD, &freevnodes, 0, "Number of \"free\" vnodes"); @@ -226,11 +222,11 @@ /* * Lock for any access to the following: - * vnode_free_list + * vnode_list * numvnodes * freevnodes */ -static struct mtx __exclusive_cache_line vnode_free_list_mtx; +static struct mtx __exclusive_cache_line vnode_list_mtx; /* Publicly exported FS */ struct nfs_public nfs_pub; @@ -517,6 +513,10 @@ * Initialize rangelocks. */ rangelock_init(&vp->v_rl); + + mtx_lock(&vnode_list_mtx); + TAILQ_INSERT_BEFORE(vnode_list_free_marker, vp, v_vnodelist); + mtx_unlock(&vnode_list_mtx); return (0); } @@ -530,6 +530,9 @@ struct bufobj *bo; vp = mem; + mtx_lock(&vnode_list_mtx); + TAILQ_REMOVE(&vnode_list, vp, v_vnodelist); + mtx_unlock(&vnode_list_mtx); rangelock_destroy(&vp->v_rl); lockdestroy(vp->v_vnlock); mtx_destroy(&vp->v_interlock); @@ -588,8 +591,10 @@ } wantfreevnodes = desiredvnodes / 4; mtx_init(&mntid_mtx, "mntid", NULL, MTX_DEF); - TAILQ_INIT(&vnode_free_list); - mtx_init(&vnode_free_list_mtx, "vnode_free_list", NULL, MTX_DEF); + TAILQ_INIT(&vnode_list); + mtx_init(&vnode_list_mtx, "vnode_list", NULL, MTX_DEF); + vnode_list_free_marker = vn_alloc_marker(NULL); + TAILQ_INSERT_HEAD(&vnode_list, vnode_list_free_marker, v_vnodelist); vnode_zone = uma_zcreate("VNODE", sizeof (struct vnode), NULL, NULL, vnode_init, vnode_fini, UMA_ALIGN_PTR, 0); vnodepoll_zone = uma_zcreate("VNODEPOLL", sizeof (struct vpollinfo), @@ -1058,7 +1063,7 @@ */ if (vp->v_usecount || (!reclaim_nc_src && !LIST_EMPTY(&vp->v_cache_src)) || - ((vp->v_iflag & VI_FREE) != 0) || + vp->v_holdcnt == 0 || VN_IS_DOOMED(vp) || (vp->v_object != NULL && vp->v_object->resident_page_count > trigger)) { VI_UNLOCK(vp); @@ -1127,37 +1132,24 @@ static void vnlru_free_locked(int count, struct vfsops *mnt_op) { - struct vnode *vp; + struct vnode *vp, *mvp; struct mount *mp; - bool tried_batches; - tried_batches = false; - mtx_assert(&vnode_free_list_mtx, MA_OWNED); + mtx_assert(&vnode_list_mtx, MA_OWNED); if (count > max_vnlru_free) count = max_vnlru_free; - for (; count > 0; count--) { - vp = TAILQ_FIRST(&vnode_free_list); - /* - * The list can be modified while the free_list_mtx - * has been dropped and vp could be NULL here. - */ - if (vp == NULL) { - if (tried_batches) - break; - mtx_unlock(&vnode_free_list_mtx); - vnlru_return_batches(mnt_op); - tried_batches = true; - mtx_lock(&vnode_free_list_mtx); - continue; + mvp = vnode_list_free_marker; +restart: + vp = mvp; + while (count > 0) { + vp = TAILQ_NEXT(vp, v_vnodelist); + if (__predict_false(vp == NULL)) { + TAILQ_REMOVE(&vnode_list, mvp, v_vnodelist); + TAILQ_INSERT_TAIL(&vnode_list, mvp, v_vnodelist); + break; } - - VNASSERT(vp->v_op != NULL, vp, - ("vnlru_free: vnode already reclaimed.")); - KASSERT((vp->v_iflag & VI_FREE) != 0, - ("Removing vnode not on freelist")); - KASSERT((vp->v_iflag & VI_ACTIVE) == 0, - ("Mangling active vnode")); - TAILQ_REMOVE(&vnode_free_list, vp, v_actfreelist); + if (__predict_false(vp->v_type == VMARKER)) + continue; /* * Don't recycle if our vnode is from different type @@ -1167,37 +1159,24 @@ * Don't recycle if we can't get the interlock without * blocking. */ - if ((mnt_op != NULL && (mp = vp->v_mount) != NULL && + if (vp->v_holdcnt > 0 || (mnt_op != NULL && (mp = vp->v_mount) != NULL && mp->mnt_op != mnt_op) || !VI_TRYLOCK(vp)) { - TAILQ_INSERT_TAIL(&vnode_free_list, vp, v_actfreelist); continue; } - VNASSERT((vp->v_iflag & VI_FREE) != 0 && vp->v_holdcnt == 0, - vp, ("vp inconsistent on freelist")); - - /* - * The clear of VI_FREE prevents activation of the - * vnode. There is no sense in putting the vnode on - * the mount point active list, only to remove it - * later during recycling. Inline the relevant part - * of vholdl(), to avoid triggering assertions or - * activating. - */ - freevnodes--; - vp->v_iflag &= ~VI_FREE; - VNODE_REFCOUNT_FENCE_REL(); - refcount_acquire(&vp->v_holdcnt); - - mtx_unlock(&vnode_free_list_mtx); + TAILQ_REMOVE(&vnode_list, mvp, v_vnodelist); + TAILQ_INSERT_AFTER(&vnode_list, vp, mvp, v_vnodelist); + if (__predict_false(vp->v_type == VBAD || vp->v_type == VNON)) { + VI_UNLOCK(vp); + continue; + } + vholdl(vp); + count--; + mtx_unlock(&vnode_list_mtx); VI_UNLOCK(vp); vtryrecycle(vp); - /* - * If the recycled succeeded this vdrop will actually free - * the vnode. If not it will simply place it back on - * the free list. - */ vdrop(vp); - mtx_lock(&vnode_free_list_mtx); + mtx_lock(&vnode_list_mtx); + goto restart; } } @@ -1205,9 +1184,9 @@ vnlru_free(int count, struct vfsops *mnt_op) { - mtx_lock(&vnode_free_list_mtx); + mtx_lock(&vnode_list_mtx); vnlru_free_locked(count, mnt_op); - mtx_unlock(&vnode_free_list_mtx); + mtx_unlock(&vnode_list_mtx); } @@ -1231,63 +1210,6 @@ return (space); } -static void -vnlru_return_batch_locked(struct mount *mp) -{ - struct vnode *vp; - - mtx_assert(&mp->mnt_listmtx, MA_OWNED); - - if (mp->mnt_tmpfreevnodelistsize == 0) - return; - - TAILQ_FOREACH(vp, &mp->mnt_tmpfreevnodelist, v_actfreelist) { - VNASSERT((vp->v_mflag & VMP_TMPMNTFREELIST) != 0, vp, - ("vnode without VMP_TMPMNTFREELIST on mnt_tmpfreevnodelist")); - vp->v_mflag &= ~VMP_TMPMNTFREELIST; - } - mtx_lock(&vnode_free_list_mtx); - TAILQ_CONCAT(&vnode_free_list, &mp->mnt_tmpfreevnodelist, v_actfreelist); - freevnodes += mp->mnt_tmpfreevnodelistsize; - mtx_unlock(&vnode_free_list_mtx); - mp->mnt_tmpfreevnodelistsize = 0; -} - -static void -vnlru_return_batch(struct mount *mp) -{ - - mtx_lock(&mp->mnt_listmtx); - vnlru_return_batch_locked(mp); - mtx_unlock(&mp->mnt_listmtx); -} - -static void -vnlru_return_batches(struct vfsops *mnt_op) -{ - struct mount *mp, *nmp; - bool need_unbusy; - - mtx_lock(&mountlist_mtx); - for (mp = TAILQ_FIRST(&mountlist); mp != NULL; mp = nmp) { - need_unbusy = false; - if (mnt_op != NULL && mp->mnt_op != mnt_op) - goto next; - if (mp->mnt_tmpfreevnodelistsize == 0) - goto next; - if (vfs_busy(mp, MBF_NOWAIT | MBF_MNTLSTLOCK) == 0) { - vnlru_return_batch(mp); - need_unbusy = true; - mtx_lock(&mountlist_mtx); - } -next: - nmp = TAILQ_NEXT(mp, mnt_list); - if (need_unbusy) - vfs_unbusy(mp); - } - mtx_unlock(&mountlist_mtx); -} - /* * Attempt to recycle vnodes in a context that is always safe to block. * Calling vlrurecycle() from the bowels of filesystem code has some @@ -1311,7 +1233,7 @@ force = 0; for (;;) { kproc_suspend_check(vnlruproc); - mtx_lock(&vnode_free_list_mtx); + mtx_lock(&vnode_list_mtx); rnumvnodes = atomic_load_long(&numvnodes); /* * If numvnodes is too large (due to desiredvnodes being @@ -1335,11 +1257,11 @@ if (vsp >= vlowat && force == 0) { vnlruproc_sig = 0; wakeup(&vnlruproc_sig); - msleep(vnlruproc, &vnode_free_list_mtx, + msleep(vnlruproc, &vnode_list_mtx, PVFS|PDROP, "vlruwt", hz); continue; } - mtx_unlock(&vnode_free_list_mtx); + mtx_unlock(&vnode_list_mtx); done = 0; rnumvnodes = atomic_load_long(&numvnodes); rfreevnodes = atomic_load_long(&freevnodes); @@ -1498,7 +1420,7 @@ getnewvnode_wait(int suspended) { - mtx_assert(&vnode_free_list_mtx, MA_OWNED); + mtx_assert(&vnode_list_mtx, MA_OWNED); if (numvnodes >= desiredvnodes) { if (suspended) { /* @@ -1512,7 +1434,7 @@ vnlruproc_sig = 1; /* avoid unnecessary wakeups */ wakeup(vnlruproc); } - msleep(&vnlruproc_sig, &vnode_free_list_mtx, PVFS, + msleep(&vnlruproc_sig, &vnode_list_mtx, PVFS, "vlruwk", hz); } /* Post-adjust like the pre-adjust in getnewvnode(). */ @@ -1533,13 +1455,13 @@ /* Pre-adjust like the pre-adjust in getnewvnode(), with any count. */ /* XXX no longer so quick, but this part is not racy. */ - mtx_lock(&vnode_free_list_mtx); + mtx_lock(&vnode_list_mtx); rnumvnodes = atomic_load_long(&numvnodes); rfreevnodes = atomic_load_long(&freevnodes); if (rnumvnodes + count > desiredvnodes && rfreevnodes > wantfreevnodes) vnlru_free_locked(ulmin(rnumvnodes + count - desiredvnodes, rfreevnodes - wantfreevnodes), NULL); - mtx_unlock(&vnode_free_list_mtx); + mtx_unlock(&vnode_list_mtx); td = curthread; /* First try to be quick and racy. */ @@ -1550,7 +1472,7 @@ } else atomic_subtract_long(&numvnodes, count); - mtx_lock(&vnode_free_list_mtx); + mtx_lock(&vnode_list_mtx); while (count > 0) { if (getnewvnode_wait(0) == 0) { count--; @@ -1559,7 +1481,7 @@ } } vcheckspace(); - mtx_unlock(&vnode_free_list_mtx); + mtx_unlock(&vnode_list_mtx); } /* @@ -1604,7 +1526,7 @@ td->td_vp_reserv -= 1; goto alloc; } - mtx_lock(&vnode_free_list_mtx); + mtx_lock(&vnode_list_mtx); if (numvnodes < desiredvnodes) cyclecount = 0; else if (cyclecount++ >= freevnodes) { @@ -1630,14 +1552,14 @@ MNTK_SUSPEND)); #if 0 /* XXX Not all VFS_VGET/ffs_vget callers check returns. */ if (error != 0) { - mtx_unlock(&vnode_free_list_mtx); + mtx_unlock(&vnode_list_mtx); return (error); } #endif } vcheckspace(); atomic_add_long(&numvnodes, 1); - mtx_unlock(&vnode_free_list_mtx); + mtx_unlock(&vnode_list_mtx); alloc: counter_u64_add(vnodes_created, 1); vp = (struct vnode *) uma_zalloc(vnode_zone, M_WAITOK); @@ -1719,8 +1641,6 @@ CTR2(KTR_VFS, "%s: destroying the vnode %p", __func__, vp); atomic_subtract_long(&numvnodes, 1); bo = &vp->v_bufobj; - VNASSERT((vp->v_iflag & VI_FREE) == 0, vp, - ("cleaned vnode still on the free list.")); VNASSERT(vp->v_data == NULL, vp, ("cleaned vnode isn't")); VNASSERT(vp->v_holdcnt == 0, vp, ("Non-zero hold count")); VNASSERT(vp->v_usecount == 0, vp, ("Non-zero use count")); @@ -1774,16 +1694,6 @@ return; MNT_ILOCK(mp); VI_LOCK(vp); - KASSERT(mp->mnt_activevnodelistsize <= mp->mnt_nvnodelistsize, - ("Active vnode list size %d > Vnode list size %d", - mp->mnt_activevnodelistsize, mp->mnt_nvnodelistsize)); - if (vp->v_iflag & VI_ACTIVE) { - vp->v_iflag &= ~VI_ACTIVE; - mtx_lock(&mp->mnt_listmtx); - TAILQ_REMOVE(&mp->mnt_activevnodelist, vp, v_actfreelist); - mp->mnt_activevnodelistsize--; - mtx_unlock(&mp->mnt_listmtx); - } if (vp->v_mflag & VMP_LAZYLIST) { mtx_lock(&mp->mnt_listmtx); if (vp->v_mflag & VMP_LAZYLIST) { @@ -1853,13 +1763,6 @@ VNASSERT(mp->mnt_nvnodelistsize >= 0, vp, ("neg mount point vnode list size")); mp->mnt_nvnodelistsize++; - KASSERT((vp->v_iflag & VI_ACTIVE) == 0, - ("Activating already active vnode")); - vp->v_iflag |= VI_ACTIVE; - mtx_lock(&mp->mnt_listmtx); - TAILQ_INSERT_HEAD(&mp->mnt_activevnodelist, vp, v_actfreelist); - mp->mnt_activevnodelistsize++; - mtx_unlock(&mp->mnt_listmtx); VI_UNLOCK(vp); MNT_IUNLOCK(mp); return (0); @@ -3220,38 +3123,13 @@ static void vhold_activate(struct vnode *vp) { - struct mount *mp; ASSERT_VI_LOCKED(vp, __func__); VNASSERT(vp->v_holdcnt == 0, vp, ("%s: wrong hold count", __func__)); VNASSERT(vp->v_op != NULL, vp, ("%s: vnode already reclaimed.", __func__)); - /* - * Remove a vnode from the free list, mark it as in use, - * and put it on the active list. - */ - VNASSERT(vp->v_mount != NULL, vp, - ("_vhold: vnode not on per mount vnode list")); - mp = vp->v_mount; - mtx_lock(&mp->mnt_listmtx); - if ((vp->v_mflag & VMP_TMPMNTFREELIST) != 0) { - TAILQ_REMOVE(&mp->mnt_tmpfreevnodelist, vp, v_actfreelist); - mp->mnt_tmpfreevnodelistsize--; - vp->v_mflag &= ~VMP_TMPMNTFREELIST; - } else { - mtx_lock(&vnode_free_list_mtx); - TAILQ_REMOVE(&vnode_free_list, vp, v_actfreelist); - freevnodes--; - mtx_unlock(&vnode_free_list_mtx); - } - KASSERT((vp->v_iflag & VI_ACTIVE) == 0, - ("Activating already active vnode")); - vp->v_iflag &= ~VI_FREE; - vp->v_iflag |= VI_ACTIVE; - TAILQ_INSERT_HEAD(&mp->mnt_activevnodelist, vp, v_actfreelist); - mp->mnt_activevnodelistsize++; - mtx_unlock(&mp->mnt_listmtx); + atomic_subtract_long(&freevnodes, 1); refcount_acquire(&vp->v_holdcnt); } @@ -3261,12 +3139,8 @@ ASSERT_VI_UNLOCKED(vp, __func__); CTR2(KTR_VFS, "%s: vp %p", __func__, vp); - if (refcount_acquire_if_not_zero(&vp->v_holdcnt)) { - VNODE_REFCOUNT_FENCE_ACQ(); - VNASSERT((vp->v_iflag & VI_FREE) == 0, vp, - ("vhold: vnode with holdcnt is free")); + if (refcount_acquire_if_not_zero(&vp->v_holdcnt)) return; - } VI_LOCK(vp); vholdl(vp); VI_UNLOCK(vp); @@ -3278,7 +3152,7 @@ ASSERT_VI_LOCKED(vp, __func__); CTR2(KTR_VFS, "%s: vp %p", __func__, vp); - if ((vp->v_iflag & VI_FREE) == 0) { + if (vp->v_holdcnt > 0) { refcount_acquire(&vp->v_holdcnt); return; } @@ -3321,34 +3195,26 @@ ("vdrop: returning doomed vnode")); VNASSERT(vp->v_op != NULL, vp, ("vdrop: vnode already reclaimed.")); - VNASSERT((vp->v_iflag & VI_FREE) == 0, vp, - ("vnode already free")); + VNASSERT(vp->v_holdcnt == 0, vp, + ("vdrop: freeing when we shouldn't")); VNASSERT((vp->v_iflag & VI_OWEINACT) == 0, vp, ("vnode with VI_OWEINACT set")); VNASSERT((vp->v_iflag & VI_DEFINACT) == 0, vp, ("vnode with VI_DEFINACT set")); - VNASSERT(vp->v_holdcnt == 0, vp, - ("vdrop: freeing when we shouldn't")); - mp = vp->v_mount; - mtx_lock(&mp->mnt_listmtx); if (vp->v_mflag & VMP_LAZYLIST) { + mp = vp->v_mount; + mtx_lock(&mp->mnt_listmtx); vp->v_mflag &= ~VMP_LAZYLIST; TAILQ_REMOVE(&mp->mnt_lazyvnodelist, vp, v_lazylist); mp->mnt_lazyvnodelistsize--; + mtx_unlock(&mp->mnt_listmtx); } - if (vp->v_iflag & VI_ACTIVE) { - vp->v_iflag &= ~VI_ACTIVE; - TAILQ_REMOVE(&mp->mnt_activevnodelist, vp, v_actfreelist); - mp->mnt_activevnodelistsize--; - } - TAILQ_INSERT_TAIL(&mp->mnt_tmpfreevnodelist, vp, v_actfreelist); - mp->mnt_tmpfreevnodelistsize++; - vp->v_iflag |= VI_FREE; - vp->v_mflag |= VMP_TMPMNTFREELIST; + mtx_lock(&vnode_list_mtx); + TAILQ_REMOVE(&vnode_list, vp, v_vnodelist); + TAILQ_INSERT_TAIL(&vnode_list, vp, v_vnodelist); + mtx_unlock(&vnode_list_mtx); + atomic_add_long(&freevnodes, 1); VI_UNLOCK(vp); - if (mp->mnt_tmpfreevnodelistsize >= mnt_free_list_batch) - vnlru_return_batch_locked(mp); - mtx_unlock(&mp->mnt_listmtx); } void @@ -3895,27 +3761,21 @@ strlcat(buf, "|VI_TEXT_REF", sizeof(buf)); if (vp->v_iflag & VI_MOUNT) strlcat(buf, "|VI_MOUNT", sizeof(buf)); - if (vp->v_iflag & VI_FREE) - strlcat(buf, "|VI_FREE", sizeof(buf)); - if (vp->v_iflag & VI_ACTIVE) - strlcat(buf, "|VI_ACTIVE", sizeof(buf)); if (vp->v_iflag & VI_DOINGINACT) strlcat(buf, "|VI_DOINGINACT", sizeof(buf)); if (vp->v_iflag & VI_OWEINACT) strlcat(buf, "|VI_OWEINACT", sizeof(buf)); if (vp->v_iflag & VI_DEFINACT) strlcat(buf, "|VI_DEFINACT", sizeof(buf)); - flags = vp->v_iflag & ~(VI_TEXT_REF | VI_MOUNT | VI_FREE | VI_ACTIVE | - VI_DOINGINACT | VI_OWEINACT | VI_DEFINACT); + flags = vp->v_iflag & ~(VI_TEXT_REF | VI_MOUNT | VI_DOINGINACT | + VI_OWEINACT | VI_DEFINACT); if (flags != 0) { snprintf(buf2, sizeof(buf2), "|VI(0x%lx)", flags); strlcat(buf, buf2, sizeof(buf)); } - if (vp->v_mflag & VMP_TMPMNTFREELIST) - strlcat(buf, "|VMP_TMPMNTFREELIST", sizeof(buf)); if (vp->v_mflag & VMP_LAZYLIST) strlcat(buf, "|VMP_LAZYLIST", sizeof(buf)); - flags = vp->v_mflag & ~(VMP_TMPMNTFREELIST | VMP_LAZYLIST); + flags = vp->v_mflag & ~(VMP_LAZYLIST); if (flags != 0) { snprintf(buf2, sizeof(buf2), "|VMP(0x%lx)", flags); strlcat(buf, buf2, sizeof(buf)); @@ -4133,8 +3993,6 @@ vfs_mount_fetch_counter(mp, MNT_COUNT_REF), mp->mnt_ref); db_printf(" mnt_gen = %d\n", mp->mnt_gen); db_printf(" mnt_nvnodelistsize = %d\n", mp->mnt_nvnodelistsize); - db_printf(" mnt_activevnodelistsize = %d\n", - mp->mnt_activevnodelistsize); db_printf(" mnt_lazyvnodelistsize = %d\n", mp->mnt_lazyvnodelistsize); db_printf(" mnt_writeopcount = %d (with %d in the struct)\n", @@ -4152,8 +4010,8 @@ db_printf(" mnt_vfs_ops = %d\n", mp->mnt_vfs_ops); db_printf("\n\nList of active vnodes\n"); - TAILQ_FOREACH(vp, &mp->mnt_activevnodelist, v_actfreelist) { - if (vp->v_type != VMARKER) { + TAILQ_FOREACH(vp, &mp->mnt_nvnodelist, v_nmntvnodes) { + if (vp->v_type != VMARKER && vp->v_holdcnt > 0) { vn_printf(vp, "vnode "); if (db_pager_quit) break; @@ -4161,7 +4019,7 @@ } db_printf("\n\nList of inactive vnodes\n"); TAILQ_FOREACH(vp, &mp->mnt_nvnodelist, v_nmntvnodes) { - if (vp->v_type != VMARKER && (vp->v_iflag & VI_ACTIVE) == 0) { + if (vp->v_type != VMARKER && vp->v_holdcnt == 0) { vn_printf(vp, "vnode "); if (db_pager_quit) break; @@ -4811,7 +4669,6 @@ * The filesystem at hand may be idle with free vnodes stored in the * batch. Return them instead of letting them stay there indefinitely. */ - vnlru_return_batch(mp); vfs_periodic(mp, MNT_NOWAIT); error = VFS_SYNC(mp, MNT_LAZY); curthread_pflags_restore(save); @@ -6085,191 +5942,6 @@ *mvp = NULL; } -/* - * These are helper functions for filesystems to traverse their - * active vnodes. See MNT_VNODE_FOREACH_ACTIVE() in sys/mount.h - */ -static void -mnt_vnode_markerfree_active(struct vnode **mvp, struct mount *mp) -{ - - KASSERT((*mvp)->v_mount == mp, ("marker vnode mount list mismatch")); - - MNT_ILOCK(mp); - MNT_REL(mp); - MNT_IUNLOCK(mp); - vn_free_marker(*mvp); - *mvp = NULL; -} - -/* - * Relock the mp mount vnode list lock with the vp vnode interlock in the - * conventional lock order during mnt_vnode_next_active iteration. - * - * On entry, the mount vnode list lock is held and the vnode interlock is not. - * The list lock is dropped and reacquired. On success, both locks are held. - * On failure, the mount vnode list lock is held but the vnode interlock is - * not, and the procedure may have yielded. - */ -static bool -mnt_vnode_next_active_relock(struct vnode *mvp, struct mount *mp, - struct vnode *vp) -{ - const struct vnode *tmp; - bool held, ret; - - VNASSERT(mvp->v_mount == mp && mvp->v_type == VMARKER && - TAILQ_NEXT(mvp, v_actfreelist) != NULL, mvp, - ("%s: bad marker", __func__)); - VNASSERT(vp->v_mount == mp && vp->v_type != VMARKER, vp, - ("%s: inappropriate vnode", __func__)); - ASSERT_VI_UNLOCKED(vp, __func__); - mtx_assert(&mp->mnt_listmtx, MA_OWNED); - - ret = false; - - TAILQ_REMOVE(&mp->mnt_activevnodelist, mvp, v_actfreelist); - TAILQ_INSERT_BEFORE(vp, mvp, v_actfreelist); - - /* - * Use a hold to prevent vp from disappearing while the mount vnode - * list lock is dropped and reacquired. Normally a hold would be - * acquired with vhold(), but that might try to acquire the vnode - * interlock, which would be a LOR with the mount vnode list lock. - */ - held = refcount_acquire_if_not_zero(&vp->v_holdcnt); - mtx_unlock(&mp->mnt_listmtx); - if (!held) - goto abort; - VI_LOCK(vp); - if (!refcount_release_if_not_last(&vp->v_holdcnt)) { - vdropl(vp); - goto abort; - } - mtx_lock(&mp->mnt_listmtx); - - /* - * Determine whether the vnode is still the next one after the marker, - * excepting any other markers. If the vnode has not been doomed by - * vgone() then the hold should have ensured that it remained on the - * active list. If it has been doomed but is still on the active list, - * don't abort, but rather skip over it (avoid spinning on doomed - * vnodes). - */ - tmp = mvp; - do { - tmp = TAILQ_NEXT(tmp, v_actfreelist); - } while (tmp != NULL && tmp->v_type == VMARKER); - if (tmp != vp) { - mtx_unlock(&mp->mnt_listmtx); - VI_UNLOCK(vp); - goto abort; - } - - ret = true; - goto out; -abort: - maybe_yield(); - mtx_lock(&mp->mnt_listmtx); -out: - if (ret) - ASSERT_VI_LOCKED(vp, __func__); - else - ASSERT_VI_UNLOCKED(vp, __func__); - mtx_assert(&mp->mnt_listmtx, MA_OWNED); - return (ret); -} - -static struct vnode * -mnt_vnode_next_active(struct vnode **mvp, struct mount *mp) -{ - struct vnode *vp, *nvp; - - mtx_assert(&mp->mnt_listmtx, MA_OWNED); - KASSERT((*mvp)->v_mount == mp, ("marker vnode mount list mismatch")); -restart: - vp = TAILQ_NEXT(*mvp, v_actfreelist); - while (vp != NULL) { - if (vp->v_type == VMARKER) { - vp = TAILQ_NEXT(vp, v_actfreelist); - continue; - } - /* - * Try-lock because this is the wrong lock order. If that does - * not succeed, drop the mount vnode list lock and try to - * reacquire it and the vnode interlock in the right order. - */ - if (!VI_TRYLOCK(vp) && - !mnt_vnode_next_active_relock(*mvp, mp, vp)) - goto restart; - KASSERT(vp->v_type != VMARKER, ("locked marker %p", vp)); - KASSERT(vp->v_mount == mp || vp->v_mount == NULL, - ("alien vnode on the active list %p %p", vp, mp)); - if (vp->v_mount == mp && !VN_IS_DOOMED(vp)) - break; - nvp = TAILQ_NEXT(vp, v_actfreelist); - VI_UNLOCK(vp); - vp = nvp; - } - TAILQ_REMOVE(&mp->mnt_activevnodelist, *mvp, v_actfreelist); - - /* Check if we are done */ - if (vp == NULL) { - mtx_unlock(&mp->mnt_listmtx); - mnt_vnode_markerfree_active(mvp, mp); - return (NULL); - } - TAILQ_INSERT_AFTER(&mp->mnt_activevnodelist, vp, *mvp, v_actfreelist); - mtx_unlock(&mp->mnt_listmtx); - ASSERT_VI_LOCKED(vp, "active iter"); - KASSERT((vp->v_iflag & VI_ACTIVE) != 0, ("Non-active vp %p", vp)); - return (vp); -} - -struct vnode * -__mnt_vnode_next_active(struct vnode **mvp, struct mount *mp) -{ - - if (should_yield()) - kern_yield(PRI_USER); - mtx_lock(&mp->mnt_listmtx); - return (mnt_vnode_next_active(mvp, mp)); -} - -struct vnode * -__mnt_vnode_first_active(struct vnode **mvp, struct mount *mp) -{ - struct vnode *vp; - - *mvp = vn_alloc_marker(mp); - MNT_ILOCK(mp); - MNT_REF(mp); - MNT_IUNLOCK(mp); - - mtx_lock(&mp->mnt_listmtx); - vp = TAILQ_FIRST(&mp->mnt_activevnodelist); - if (vp == NULL) { - mtx_unlock(&mp->mnt_listmtx); - mnt_vnode_markerfree_active(mvp, mp); - return (NULL); - } - TAILQ_INSERT_BEFORE(vp, *mvp, v_actfreelist); - return (mnt_vnode_next_active(mvp, mp)); -} - -void -__mnt_vnode_markerfree_active(struct vnode **mvp, struct mount *mp) -{ - - if (*mvp == NULL) - return; - - mtx_lock(&mp->mnt_listmtx); - TAILQ_REMOVE(&mp->mnt_activevnodelist, *mvp, v_actfreelist); - mtx_unlock(&mp->mnt_listmtx); - mnt_vnode_markerfree_active(mvp, mp); -} - /* * These are helper functions for filesystems to traverse their * lazy vnodes. See MNT_VNODE_FOREACH_LAZY() in sys/mount.h @@ -6283,7 +5955,7 @@ MNT_ILOCK(mp); MNT_REL(mp); MNT_IUNLOCK(mp); - free(*mvp, M_VNODE_MARKER); + vn_free_marker(*mvp); *mvp = NULL; } @@ -6427,7 +6099,6 @@ TAILQ_INSERT_AFTER(&mp->mnt_lazyvnodelist, vp, *mvp, v_lazylist); mtx_unlock(&mp->mnt_listmtx); ASSERT_VI_LOCKED(vp, "lazy iter"); - KASSERT((vp->v_iflag & VI_ACTIVE) != 0, ("Non-active vp %p", vp)); return (vp); } @@ -6448,12 +6119,10 @@ { struct vnode *vp; - *mvp = malloc(sizeof(struct vnode), M_VNODE_MARKER, M_WAITOK | M_ZERO); + *mvp = vn_alloc_marker(mp); MNT_ILOCK(mp); MNT_REF(mp); MNT_IUNLOCK(mp); - (*mvp)->v_type = VMARKER; - (*mvp)->v_mount = mp; mtx_lock(&mp->mnt_listmtx); vp = TAILQ_FIRST(&mp->mnt_lazyvnodelist); Index: sys/sys/mount.h =================================================================== --- sys/sys/mount.h +++ sys/sys/mount.h @@ -219,10 +219,6 @@ #define mnt_endzero mnt_gjprovider char *mnt_gjprovider; /* gjournal provider name */ struct mtx mnt_listmtx; - struct vnodelst mnt_activevnodelist; /* (l) list of active vnodes */ - int mnt_activevnodelistsize;/* (l) # of active vnodes */ - struct vnodelst mnt_tmpfreevnodelist; /* (l) list of free vnodes */ - int mnt_tmpfreevnodelistsize;/* (l) # of free vnodes */ struct vnodelst mnt_lazyvnodelist; /* (l) list of lazy vnodes */ int mnt_lazyvnodelistsize; /* (l) # of lazy vnodes */ struct lock mnt_explock; /* vfs_export walkers lock */ @@ -255,20 +251,6 @@ mtx_assert(MNT_MTX(mp), MA_NOTOWNED); \ } while (0) -/* - * Definitions for MNT_VNODE_FOREACH_ACTIVE. - */ -struct vnode *__mnt_vnode_next_active(struct vnode **mvp, struct mount *mp); -struct vnode *__mnt_vnode_first_active(struct vnode **mvp, struct mount *mp); -void __mnt_vnode_markerfree_active(struct vnode **mvp, struct mount *); - -#define MNT_VNODE_FOREACH_ACTIVE(vp, mp, mvp) \ - for (vp = __mnt_vnode_first_active(&(mvp), (mp)); \ - (vp) != NULL; vp = __mnt_vnode_next_active(&(mvp), (mp))) - -#define MNT_VNODE_FOREACH_ACTIVE_ABORT(mp, mvp) \ - __mnt_vnode_markerfree_active(&(mvp), (mp)) - /* * Definitions for MNT_VNODE_FOREACH_LAZY. */ Index: sys/sys/vnode.h =================================================================== --- sys/sys/vnode.h +++ sys/sys/vnode.h @@ -147,7 +147,7 @@ /* * The machinery of being a vnode */ - TAILQ_ENTRY(vnode) v_actfreelist; /* l vnode active/free lists */ + TAILQ_ENTRY(vnode) v_vnodelist; /* l vnode lists */ TAILQ_ENTRY(vnode) v_lazylist; /* l vnode lazy list */ struct bufobj v_bufobj; /* * Buffer cache object */ @@ -239,8 +239,6 @@ #define VI_TEXT_REF 0x0001 /* Text ref grabbed use ref */ #define VI_MOUNT 0x0020 /* Mount in progress */ -#define VI_FREE 0x0100 /* This vnode is on the freelist */ -#define VI_ACTIVE 0x0200 /* This vnode is on the active list */ #define VI_DOINGINACT 0x0800 /* VOP_INACTIVE is in progress */ #define VI_OWEINACT 0x1000 /* Need to call inactive */ #define VI_DEFINACT 0x2000 /* deferred inactive */ @@ -260,8 +258,7 @@ #define VV_FORCEINSMQ 0x1000 /* force the insmntque to succeed */ #define VV_READLINK 0x2000 /* fdescfs linux vnode */ -#define VMP_TMPMNTFREELIST 0x0001 /* Vnode is on mnt's tmp free list */ -#define VMP_LAZYLIST 0x0002 /* Vnode is on mnt's lazy list */ +#define VMP_LAZYLIST 0x0001 /* Vnode is on mnt's lazy list */ /* * Vnode attributes. A field value of VNOVAL represents a field whose value Index: sys/ufs/ufs/ufs_quota.c =================================================================== --- sys/ufs/ufs/ufs_quota.c +++ sys/ufs/ufs/ufs_quota.c @@ -1083,7 +1083,7 @@ * synchronizing any modified dquot structures. */ again: - MNT_VNODE_FOREACH_ACTIVE(vp, mp, mvp) { + MNT_VNODE_FOREACH_ALL(vp, mp, mvp) { if (vp->v_type == VNON) { VI_UNLOCK(vp); continue; @@ -1091,7 +1091,7 @@ error = vget(vp, LK_EXCLUSIVE | LK_INTERLOCK, td); if (error) { if (error == ENOENT) { - MNT_VNODE_FOREACH_ACTIVE_ABORT(mp, mvp); + MNT_VNODE_FOREACH_ALL_ABORT(mp, mvp); goto again; } continue;