diff --git a/sys/fs/nullfs/null.h b/sys/fs/nullfs/null.h --- a/sys/fs/nullfs/null.h +++ b/sys/fs/nullfs/null.h @@ -37,6 +37,9 @@ #define NULLM_CACHE 0x0001 +#include +#include + struct null_mount { struct mount *nullm_vfs; struct vnode *nullm_lowerrootvp; /* Ref to lower root vnode */ @@ -50,7 +53,7 @@ * A cache of vnode references */ struct null_node { - LIST_ENTRY(null_node) null_hash; /* Hash list */ + CK_LIST_ENTRY(null_node) null_hash; /* Hash list */ struct vnode *null_lowervp; /* VREFed once */ struct vnode *null_vnode; /* Back pointer */ u_int null_flags; @@ -61,6 +64,7 @@ #define MOUNTTONULLMOUNT(mp) ((struct null_mount *)((mp)->mnt_data)) #define VTONULL(vp) ((struct null_node *)(vp)->v_data) +#define VTONULL_SMR(vp) ((struct null_node *)vn_load_v_data_smr(vp)) #define NULLTOV(xp) ((xp)->null_vnode) int nullfs_init(struct vfsconf *vfsp); @@ -79,9 +83,7 @@ extern struct vop_vector null_vnodeops; -#ifdef MALLOC_DECLARE -MALLOC_DECLARE(M_NULLFSNODE); -#endif +extern uma_zone_t null_node_zone; #ifdef NULLFS_DEBUG #define NULLFSDEBUG(format, args...) printf(format ,## args) diff --git a/sys/fs/nullfs/null_subr.c b/sys/fs/nullfs/null_subr.c --- a/sys/fs/nullfs/null_subr.c +++ b/sys/fs/nullfs/null_subr.c @@ -41,9 +41,14 @@ #include #include #include +#include #include +#include + +VFS_SMR_DECLARE; + /* * Null layer cache: * Each cache entry holds a reference to the lower vnode @@ -54,12 +59,12 @@ #define NULL_NHASH(vp) (&null_node_hashtbl[vfs_hash_index(vp) & null_hash_mask]) -static LIST_HEAD(null_node_hashhead, null_node) *null_node_hashtbl; +static CK_LIST_HEAD(null_node_hashhead, null_node) *null_node_hashtbl; static struct rwlock null_hash_lock; static u_long null_hash_mask; static MALLOC_DEFINE(M_NULLFSHASH, "nullfs_hash", "NULLFS hash table"); -MALLOC_DEFINE(M_NULLFSNODE, "nullfs_node", "NULLFS vnode private part"); +uma_zone_t __read_mostly null_node_zone; static void null_hashins(struct mount *, struct null_node *); @@ -73,6 +78,10 @@ null_node_hashtbl = hashinit(desiredvnodes, M_NULLFSHASH, &null_hash_mask); rw_init(&null_hash_lock, "nullhs"); + null_node_zone = uma_zcreate("nullfs node", sizeof(struct null_node), + NULL, NULL, NULL, NULL, 0, UMA_ZONE_ZINIT); + VFS_SMR_ZONE_SET(null_node_zone); + return (0); } @@ -80,6 +89,7 @@ nullfs_uninit(struct vfsconf *vfsp) { + uma_zdestroy(null_node_zone); rw_destroy(&null_hash_lock); hashdestroy(null_node_hashtbl, M_NULLFSHASH, null_hash_mask); return (0); @@ -96,7 +106,7 @@ struct null_node *a; struct vnode *vp; - ASSERT_VOP_LOCKED(lowervp, "null_hashget"); + ASSERT_VOP_LOCKED(lowervp, __func__); rw_assert(&null_hash_lock, RA_LOCKED); /* @@ -106,15 +116,18 @@ * reference count (but NOT the lower vnode's VREF counter). */ hd = NULL_NHASH(lowervp); - LIST_FOREACH(a, hd, null_hash) { - if (a->null_lowervp == lowervp && NULLTOV(a)->v_mount == mp) { - /* - * Since we have the lower node locked the nullfs - * node can not be in the process of recycling. If - * it had been recycled before we grabed the lower - * lock it would not have been found on the hash. - */ - vp = NULLTOV(a); + CK_LIST_FOREACH(a, hd, null_hash) { + if (a->null_lowervp != lowervp) + continue; + /* + * Since we have the lower node locked the nullfs + * node can not be in the process of recycling. If + * it had been recycled before we grabed the lower + * lock it would not have been found on the hash. + */ + vp = NULLTOV(a); + VNPASS(!VN_IS_DOOMED(vp), vp); + if (vp->v_mount == mp) { vref(vp); return (vp); } @@ -126,17 +139,34 @@ null_hashget(struct mount *mp, struct vnode *lowervp) { struct null_node_hashhead *hd; + struct null_node *a; struct vnode *vp; + enum vgetstate vs; - hd = NULL_NHASH(lowervp); - if (LIST_EMPTY(hd)) - return (NULL); - - rw_rlock(&null_hash_lock); - vp = null_hashget_locked(mp, lowervp); - rw_runlock(&null_hash_lock); + ASSERT_VOP_LOCKED(lowervp, "null_hashget"); + rw_assert(&null_hash_lock, RA_UNLOCKED); - return (vp); + vfs_smr_enter(); + hd = NULL_NHASH(lowervp); + CK_LIST_FOREACH(a, hd, null_hash) { + if (a->null_lowervp != lowervp) + continue; + /* + * See null_hashget_locked as to why the nullfs vnode can't be + * doomed here. + */ + vp = NULLTOV(a); + VNPASS(!VN_IS_DOOMED(vp), vp); + if (vp->v_mount != mp) + continue; + vs = vget_prep_smr(vp); + vfs_smr_exit(); + VNPASS(vs != VGET_NONE, vp); + vget_finish_ref(vp, vs); + return (vp); + } + vfs_smr_exit(); + return (NULL); } static void @@ -151,7 +181,7 @@ hd = NULL_NHASH(xp->null_lowervp); #ifdef INVARIANTS - LIST_FOREACH(oxp, hd, null_hash) { + CK_LIST_FOREACH(oxp, hd, null_hash) { if (oxp->null_lowervp == xp->null_lowervp && NULLTOV(oxp)->v_mount == mp) { VNASSERT(0, NULLTOV(oxp), @@ -159,7 +189,7 @@ } } #endif - LIST_INSERT_HEAD(hd, xp, null_hash); + CK_LIST_INSERT_HEAD(hd, xp, null_hash); } static void @@ -174,7 +204,7 @@ VI_UNLOCK(vp); vgone(vp); vput(vp); - free(xp, M_NULLFSNODE); + uma_zfree_smr(null_node_zone, xp); } /* @@ -208,12 +238,12 @@ * Note that duplicate can only appear in hash if the lowervp is * locked LK_SHARED. */ - xp = malloc(sizeof(struct null_node), M_NULLFSNODE, M_WAITOK); + xp = uma_zalloc_smr(null_node_zone, M_WAITOK); error = getnewvnode("nullfs", mp, &null_vnodeops, &vp); if (error) { vput(lowervp); - free(xp, M_NULLFSNODE); + uma_zfree_smr(null_node_zone, xp); return (error); } @@ -261,8 +291,8 @@ return (error); } - null_hashins(mp, xp); vn_set_state(vp, VSTATE_CONSTRUCTED); + null_hashins(mp, xp); rw_wunlock(&null_hash_lock); *vpp = vp; @@ -277,7 +307,7 @@ { rw_wlock(&null_hash_lock); - LIST_REMOVE(xp, null_hash); + CK_LIST_REMOVE(xp, null_hash); rw_wunlock(&null_hash_lock); } diff --git a/sys/fs/nullfs/null_vnops.c b/sys/fs/nullfs/null_vnops.c --- a/sys/fs/nullfs/null_vnops.c +++ b/sys/fs/nullfs/null_vnops.c @@ -177,6 +177,8 @@ #include #include #include +#include +#include #include @@ -185,10 +187,17 @@ #include #include +VFS_SMR_DECLARE; + static int null_bug_bypass = 0; /* for debugging: enables bypass printf'ing */ SYSCTL_INT(_debug, OID_AUTO, nullfs_bug_bypass, CTLFLAG_RW, &null_bug_bypass, 0, ""); +static bool null_smr_lock = true; +SYSCTL_BOOL(_debug, OID_AUTO, nullfs_smr_lock, CTLFLAG_RW, + &null_smr_lock, 0, ""); + + /* * Synchronize inotify flags with the lower vnode: * - If the upper vnode has the flag set and the lower does not, then the lower @@ -772,6 +781,43 @@ * interlock flag as it applies only to our vnode, not the * vnodes below us on the stack. */ +static int +null_lock_smr(struct vnode *vp, int flags) +{ + struct null_node *nn; + struct vnode *lvp; + int error; + + if (!atomic_load_bool(&null_smr_lock)) + return (EAGAIN); + + vfs_smr_enter(); + + nn = VTONULL_SMR(vp); + if (__predict_false(nn == NULL)) + goto out_bad; + + lvp = nn->null_lowervp; + if (__predict_false(lvp == NULL)) + goto out_bad; + + if (__predict_false(!vhold_smr(lvp))) + goto out_bad; + + vfs_smr_exit(); + + error = VOP_LOCK(lvp, flags); + if (VTONULL(vp) == NULL && error == 0) { + VOP_UNLOCK(lvp); + error = EAGAIN; + } + vdrop(lvp); + return (error); +out_bad: + vfs_smr_exit(); + return (EAGAIN); +} + static int null_lock(struct vop_lock1_args *ap) { @@ -781,10 +827,14 @@ struct vnode *lvp; int error; - if ((ap->a_flags & LK_INTERLOCK) == 0) + if ((ap->a_flags & LK_INTERLOCK) == 0) { + error = null_lock_smr(vp, ap->a_flags); + if (error == 0) + return (error); VI_LOCK(vp); - else + } else { ap->a_flags &= ~LK_INTERLOCK; + } flags = ap->a_flags; nn = VTONULL(vp); /* @@ -961,7 +1011,7 @@ vunref(lowervp); else vput(lowervp); - free(xp, M_NULLFSNODE); + uma_zfree_smr(null_node_zone, xp); return (0); }