Index: sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_dir.c =================================================================== --- sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_dir.c +++ sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_dir.c @@ -815,7 +815,7 @@ return (SET_ERROR(EDQUOT)); } - getnewvnode_reserve(1); + getnewvnode_reserve(); tx = dmu_tx_create(zfsvfs->z_os); dmu_tx_hold_sa_create(tx, acl_ids.z_aclp->z_acl_bytes + Index: sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_vnops.c =================================================================== --- sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_vnops.c +++ sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_vnops.c @@ -1800,7 +1800,7 @@ goto out; } - getnewvnode_reserve(1); + getnewvnode_reserve(); tx = dmu_tx_create(os); @@ -2092,7 +2092,7 @@ /* * Add a new entry to the directory. */ - getnewvnode_reserve(1); + getnewvnode_reserve(); tx = dmu_tx_create(zfsvfs->z_os); dmu_tx_hold_zap(tx, dzp->z_id, TRUE, dirname); dmu_tx_hold_zap(tx, DMU_NEW_OBJECT, FALSE, NULL); @@ -4003,7 +4003,7 @@ return (SET_ERROR(EDQUOT)); } - getnewvnode_reserve(1); + getnewvnode_reserve(); tx = dmu_tx_create(zfsvfs->z_os); fuid_dirtied = zfsvfs->z_fuid_dirty; dmu_tx_hold_write(tx, DMU_NEW_OBJECT, 0, MAX(1, len)); Index: sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_znode.c =================================================================== --- sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_znode.c +++ sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_znode.c @@ -644,7 +644,7 @@ zp = kmem_cache_alloc(znode_cache, KM_SLEEP); - KASSERT(curthread->td_vp_reserv > 0, + KASSERT(curthread->td_vp_reserved != NULL, ("zfs_znode_alloc: getnewvnode without any vnodes reserved")); error = getnewvnode("zfs", zfsvfs->z_parent->z_vfs, &zfs_vnodeops, &vp); if (error != 0) { @@ -1157,7 +1157,7 @@ int err; td = curthread; - getnewvnode_reserve(1); + getnewvnode_reserve(); again: *zpp = NULL; ZFS_OBJ_HOLD_ENTER(zfsvfs, obj_num); Index: sys/kern/kern_thread.c =================================================================== --- sys/kern/kern_thread.c +++ sys/kern/kern_thread.c @@ -82,7 +82,7 @@ "struct thread KBI td_flags"); _Static_assert(offsetof(struct thread, td_pflags) == 0x104, "struct thread KBI td_pflags"); -_Static_assert(offsetof(struct thread, td_frame) == 0x478, +_Static_assert(offsetof(struct thread, td_frame) == 0x480, "struct thread KBI td_frame"); _Static_assert(offsetof(struct thread, td_emuldata) == 0x690, "struct thread KBI td_emuldata"); Index: sys/kern/subr_trap.c =================================================================== --- sys/kern/subr_trap.c +++ sys/kern/subr_trap.c @@ -187,8 +187,8 @@ } KASSERT(td->td_pinned == 0 || (td->td_pflags & TDP_CALLCHAIN) != 0, ("userret: Returning with with pinned thread")); - KASSERT(td->td_vp_reserv == 0, - ("userret: Returning while holding vnode reservation")); + KASSERT(td->td_vp_reserved == NULL, + ("userret: Returning with a reserved vnode")); KASSERT((td->td_flags & (TDF_SBDRY | TDF_SEINTR | TDF_SERESTART)) == 0, ("userret: Returning with stop signals deferred")); KASSERT(td->td_su == NULL, Index: sys/kern/vfs_subr.c =================================================================== --- sys/kern/vfs_subr.c +++ sys/kern/vfs_subr.c @@ -1437,7 +1437,7 @@ * Wait if necessary for space for a new vnode. */ static int -getnewvnode_wait(int suspended) +vn_alloc_wait(int suspended) { mtx_assert(&vnode_list_mtx, MA_OWNED); @@ -1463,89 +1463,20 @@ return (numvnodes >= desiredvnodes ? ENFILE : 0); } -/* - * This hack is fragile, and probably not needed any more now that the - * watermark handling works. - */ -void -getnewvnode_reserve(u_int count) -{ - u_long rnumvnodes, rfreevnodes; - struct thread *td; - - /* Pre-adjust like the pre-adjust in getnewvnode(), with any count. */ - /* XXX no longer so quick, but this part is not racy. */ - mtx_lock(&vnode_list_mtx); - rnumvnodes = atomic_load_long(&numvnodes); - rfreevnodes = atomic_load_long(&freevnodes); - if (rnumvnodes + count > desiredvnodes && rfreevnodes > wantfreevnodes) - vnlru_free_locked(ulmin(rnumvnodes + count - desiredvnodes, - rfreevnodes - wantfreevnodes), NULL); - mtx_unlock(&vnode_list_mtx); - - td = curthread; - /* First try to be quick and racy. */ - if (atomic_fetchadd_long(&numvnodes, count) + count <= desiredvnodes) { - td->td_vp_reserv += count; - vcheckspace(); /* XXX no longer so quick, but more racy */ - return; - } else - atomic_subtract_long(&numvnodes, count); - - mtx_lock(&vnode_list_mtx); - while (count > 0) { - if (getnewvnode_wait(0) == 0) { - count--; - td->td_vp_reserv++; - atomic_add_long(&numvnodes, 1); - } - } - vcheckspace(); - mtx_unlock(&vnode_list_mtx); -} - -/* - * This hack is fragile, especially if desiredvnodes or wantvnodes are - * misconfgured or changed significantly. Reducing desiredvnodes below - * the reserved amount should cause bizarre behaviour like reducing it - * below the number of active vnodes -- the system will try to reduce - * numvnodes to match, but should fail, so the subtraction below should - * not overflow. - */ -void -getnewvnode_drop_reserve(void) -{ - struct thread *td; - - td = curthread; - atomic_subtract_long(&numvnodes, td->td_vp_reserv); - td->td_vp_reserv = 0; -} - -/* - * Return the next vnode from the free list. - */ -int -getnewvnode(const char *tag, struct mount *mp, struct vop_vector *vops, - struct vnode **vpp) +static struct vnode * +vn_alloc(struct mount *mp) { struct vnode *vp; - struct thread *td; - struct lock_object *lo; static int cyclecount; int error __unused; - CTR3(KTR_VFS, "%s: mp %p with tag %s", __func__, mp, tag); - - KASSERT(vops->registered, - ("%s: not registered vector op %p\n", __func__, vops)); - - vp = NULL; - td = curthread; - if (td->td_vp_reserv > 0) { - td->td_vp_reserv -= 1; - goto alloc; + vp = uma_zalloc(vnode_zone, M_NOWAIT); + if (vp != NULL) { + atomic_add_long(&numvnodes, 1); + counter_u64_add(vnodes_created, 1); + return (vp); } + mtx_lock(&vnode_list_mtx); if (numvnodes < desiredvnodes) cyclecount = 0; @@ -1568,7 +1499,7 @@ else if (freevnodes > 0) vnlru_free_locked(1, NULL); else { - error = getnewvnode_wait(mp != NULL && (mp->mnt_kern_flag & + error = vn_alloc_wait(mp != NULL && (mp->mnt_kern_flag & MNTK_SUSPEND)); #if 0 /* XXX Not all VFS_VGET/ffs_vget callers check returns. */ if (error != 0) { @@ -1580,9 +1511,64 @@ vcheckspace(); atomic_add_long(&numvnodes, 1); mtx_unlock(&vnode_list_mtx); -alloc: + counter_u64_add(vnodes_created, 1); - vp = (struct vnode *) uma_zalloc(vnode_zone, M_WAITOK); + return (uma_zalloc(vnode_zone, M_WAITOK)); +} + +static void +vn_free(struct vnode *vp) +{ + + atomic_subtract_long(&numvnodes, 1); + uma_zfree(vnode_zone, vp); +} + +void +getnewvnode_reserve(void) +{ + struct thread *td; + + td = curthread; + MPASS(td->td_vp_reserved == NULL); + td->td_vp_reserved = vn_alloc(NULL); +} + +void +getnewvnode_drop_reserve(void) +{ + struct thread *td; + + td = curthread; + if (td->td_vp_reserved != NULL) { + vn_free(td->td_vp_reserved); + td->td_vp_reserved = NULL; + } +} + +/* + * Return the next vnode from the free list. + */ +int +getnewvnode(const char *tag, struct mount *mp, struct vop_vector *vops, + struct vnode **vpp) +{ + struct vnode *vp; + struct thread *td; + struct lock_object *lo; + + CTR3(KTR_VFS, "%s: mp %p with tag %s", __func__, mp, tag); + + KASSERT(vops->registered, + ("%s: not registered vector op %p\n", __func__, vops)); + + td = curthread; + if (td->td_vp_reserved != NULL) { + vp = td->td_vp_reserved; + td->td_vp_reserved = NULL; + } else { + vp = vn_alloc(mp); + } /* * Locks are given the generic name "vnode" when created. * Follow the historic practice of using the filesystem @@ -1659,7 +1645,6 @@ * so as not to contaminate the freshly allocated vnode. */ CTR2(KTR_VFS, "%s: destroying the vnode %p", __func__, vp); - atomic_subtract_long(&numvnodes, 1); bo = &vp->v_bufobj; VNASSERT(vp->v_data == NULL, vp, ("cleaned vnode isn't")); VNASSERT(vp->v_holdcnt == 0, vp, ("Non-zero hold count")); @@ -1698,7 +1683,7 @@ vp->v_iflag = 0; vp->v_vflag = 0; bo->bo_flag = 0; - uma_zfree(vnode_zone, vp); + vn_free(vp); } /* Index: sys/sys/proc.h =================================================================== --- sys/sys/proc.h +++ sys/sys/proc.h @@ -297,7 +297,7 @@ struct osd td_osd; /* (k) Object specific data. */ struct vm_map_entry *td_map_def_user; /* (k) Deferred entries. */ pid_t td_dbg_forked; /* (c) Child pid for debugger. */ - u_int td_vp_reserv; /* (k) Count of reserved vnodes. */ + struct vnode *td_vp_reserved;/* (k) Preallocated vnode. */ u_int td_no_sleeping; /* (k) Sleeping disabled count. */ void *td_su; /* (k) FFS SU private */ sbintime_t td_sleeptimo; /* (t) Sleep timeout. */ Index: sys/sys/vnode.h =================================================================== --- sys/sys/vnode.h +++ sys/sys/vnode.h @@ -624,7 +624,7 @@ int freebsd11_cvtstat(struct stat *st, struct freebsd11_stat *ost); int getnewvnode(const char *tag, struct mount *mp, struct vop_vector *vops, struct vnode **vpp); -void getnewvnode_reserve(u_int count); +void getnewvnode_reserve(void); void getnewvnode_drop_reserve(void); int insmntque1(struct vnode *vp, struct mount *mp, void (*dtr)(struct vnode *, void *), void *dtr_arg);