D52819.id163246.diff
No OneTemporary
Actions

Size

10 KB

Referenced Files

None

Subscribers

None

D52819.id163246.diff
View Options

	diff --git a/sys/fs/nullfs/null.h b/sys/fs/nullfs/null.h
	--- a/sys/fs/nullfs/null.h
	+++ b/sys/fs/nullfs/null.h
	@@ -37,6 +37,9 @@

	#define NULLM_CACHE 0x0001

	+#include <vm/uma.h>
	+#include <sys/ck.h>
	+
	struct null_mount {
	struct mount *nullm_vfs;
	struct vnode nullm_lowerrootvp; / Ref to lower root vnode */
	@@ -50,7 +53,7 @@
	* A cache of vnode references
	*/
	struct null_node {
	- LIST_ENTRY(null_node) null_hash; /* Hash list */
	+ CK_LIST_ENTRY(null_node) null_hash; /* Hash list */
	struct vnode null_lowervp; / VREFed once */
	struct vnode null_vnode; / Back pointer */
	u_int null_flags;
	@@ -61,6 +64,7 @@

	#define MOUNTTONULLMOUNT(mp) ((struct null_mount *)((mp)->mnt_data))
	#define VTONULL(vp) ((struct null_node *)(vp)->v_data)
	+#define VTONULL_SMR(vp) ((struct null_node *)vn_load_v_data_smr(vp))
	#define NULLTOV(xp) ((xp)->null_vnode)

	int nullfs_init(struct vfsconf *vfsp);
	@@ -79,9 +83,7 @@

	extern struct vop_vector null_vnodeops;

	-#ifdef MALLOC_DECLARE
	-MALLOC_DECLARE(M_NULLFSNODE);
	-#endif
	+extern uma_zone_t null_node_zone;

	#ifdef NULLFS_DEBUG
	#define NULLFSDEBUG(format, args...) printf(format ,## args)
	diff --git a/sys/fs/nullfs/null_subr.c b/sys/fs/nullfs/null_subr.c
	--- a/sys/fs/nullfs/null_subr.c
	+++ b/sys/fs/nullfs/null_subr.c
	@@ -41,9 +41,14 @@
	#include <sys/mount.h>
	#include <sys/proc.h>
	#include <sys/vnode.h>
	+#include <sys/smr.h>

	#include <fs/nullfs/null.h>

	+#include <vm/uma.h>
	+
	+VFS_SMR_DECLARE;
	+
	/*
	* Null layer cache:
	* Each cache entry holds a reference to the lower vnode
	@@ -54,12 +59,12 @@

	#define NULL_NHASH(vp) (&null_node_hashtbl[vfs_hash_index(vp) & null_hash_mask])

	-static LIST_HEAD(null_node_hashhead, null_node) *null_node_hashtbl;
	+static CK_LIST_HEAD(null_node_hashhead, null_node) *null_node_hashtbl;
	static struct rwlock null_hash_lock;
	static u_long null_hash_mask;

	static MALLOC_DEFINE(M_NULLFSHASH, "nullfs_hash", "NULLFS hash table");
	-MALLOC_DEFINE(M_NULLFSNODE, "nullfs_node", "NULLFS vnode private part");
	+uma_zone_t __read_mostly null_node_zone;

	static void null_hashins(struct mount , struct null_node );

	@@ -73,6 +78,10 @@
	null_node_hashtbl = hashinit(desiredvnodes, M_NULLFSHASH,
	&null_hash_mask);
	rw_init(&null_hash_lock, "nullhs");
	+ null_node_zone = uma_zcreate("nullfs node", sizeof(struct null_node),
	+ NULL, NULL, NULL, NULL, 0, UMA_ZONE_ZINIT);
	+ VFS_SMR_ZONE_SET(null_node_zone);
	+
	return (0);
	}

	@@ -80,6 +89,7 @@
	nullfs_uninit(struct vfsconf *vfsp)
	{

	+ uma_zdestroy(null_node_zone);
	rw_destroy(&null_hash_lock);
	hashdestroy(null_node_hashtbl, M_NULLFSHASH, null_hash_mask);
	return (0);
	@@ -96,7 +106,7 @@
	struct null_node *a;
	struct vnode *vp;

	- ASSERT_VOP_LOCKED(lowervp, "null_hashget");
	+ ASSERT_VOP_LOCKED(lowervp, __func__);
	rw_assert(&null_hash_lock, RA_LOCKED);

	/*
	@@ -106,18 +116,21 @@
	* reference count (but NOT the lower vnode's VREF counter).
	*/
	hd = NULL_NHASH(lowervp);
	- LIST_FOREACH(a, hd, null_hash) {
	- if (a->null_lowervp == lowervp && NULLTOV(a)->v_mount == mp) {
	- /*
	- * Since we have the lower node locked the nullfs
	- * node can not be in the process of recycling. If
	- * it had been recycled before we grabed the lower
	- * lock it would not have been found on the hash.
	- */
	- vp = NULLTOV(a);
	- vref(vp);
	- return (vp);
	- }
	+ CK_LIST_FOREACH(a, hd, null_hash) {
	+ if (a->null_lowervp != lowervp)
	+ continue;
	+ /*
	+ * Since we have the lower node locked the nullfs
	+ * node can not be in the process of recycling. If
	+ * it had been recycled before we grabed the lower
	+ * lock it would not have been found on the hash.
	+ */
	+ vp = NULLTOV(a);
	+ VNPASS(!VN_IS_DOOMED(vp), vp);
	+ if (vp->v_mount != mp)
	+ continue;
	+ vref(vp);
	+ return (vp);
	}
	return (NULL);
	}
	@@ -126,17 +139,34 @@
	null_hashget(struct mount mp, struct vnode lowervp)
	{
	struct null_node_hashhead *hd;
	+ struct null_node *a;
	struct vnode *vp;
	+ enum vgetstate vs;

	- hd = NULL_NHASH(lowervp);
	- if (LIST_EMPTY(hd))
	- return (NULL);
	-
	- rw_rlock(&null_hash_lock);
	- vp = null_hashget_locked(mp, lowervp);
	- rw_runlock(&null_hash_lock);
	+ ASSERT_VOP_LOCKED(lowervp, "null_hashget");
	+ rw_assert(&null_hash_lock, RA_UNLOCKED);

	- return (vp);
	+ vfs_smr_enter();
	+ hd = NULL_NHASH(lowervp);
	+ CK_LIST_FOREACH(a, hd, null_hash) {
	+ if (a->null_lowervp != lowervp)
	+ continue;
	+ /*
	+ * See null_hashget_locked as to why the nullfs vnode can't be
	+ * doomed here.
	+ */
	+ vp = NULLTOV(a);
	+ VNPASS(!VN_IS_DOOMED(vp), vp);
	+ if (vp->v_mount != mp)
	+ continue;
	+ vs = vget_prep_smr(vp);
	+ vfs_smr_exit();
	+ VNPASS(vs != VGET_NONE, vp);
	+ vget_finish_ref(vp, vs);
	+ return (vp);
	+ }
	+ vfs_smr_exit();
	+ return (NULL);
	}

	static void
	@@ -151,7 +181,7 @@

	hd = NULL_NHASH(xp->null_lowervp);
	#ifdef INVARIANTS
	- LIST_FOREACH(oxp, hd, null_hash) {
	+ CK_LIST_FOREACH(oxp, hd, null_hash) {
	if (oxp->null_lowervp == xp->null_lowervp &&
	NULLTOV(oxp)->v_mount == mp) {
	VNASSERT(0, NULLTOV(oxp),
	@@ -159,7 +189,7 @@
	}
	}
	#endif
	- LIST_INSERT_HEAD(hd, xp, null_hash);
	+ CK_LIST_INSERT_HEAD(hd, xp, null_hash);
	}

	static void
	@@ -174,7 +204,7 @@
	VI_UNLOCK(vp);
	vgone(vp);
	vput(vp);
	- free(xp, M_NULLFSNODE);
	+ uma_zfree_smr(null_node_zone, xp);
	}

	/*
	@@ -208,12 +238,12 @@
	* Note that duplicate can only appear in hash if the lowervp is
	* locked LK_SHARED.
	*/
	- xp = malloc(sizeof(struct null_node), M_NULLFSNODE, M_WAITOK);
	+ xp = uma_zalloc_smr(null_node_zone, M_WAITOK);

	error = getnewvnode("nullfs", mp, &null_vnodeops, &vp);
	if (error) {
	vput(lowervp);
	- free(xp, M_NULLFSNODE);
	+ uma_zfree_smr(null_node_zone, xp);
	return (error);
	}

	@@ -261,8 +291,8 @@
	return (error);
	}

	- null_hashins(mp, xp);
	vn_set_state(vp, VSTATE_CONSTRUCTED);
	+ null_hashins(mp, xp);
	rw_wunlock(&null_hash_lock);
	*vpp = vp;

	@@ -277,7 +307,7 @@
	{

	rw_wlock(&null_hash_lock);
	- LIST_REMOVE(xp, null_hash);
	+ CK_LIST_REMOVE(xp, null_hash);
	rw_wunlock(&null_hash_lock);
	}

	diff --git a/sys/fs/nullfs/null_vnops.c b/sys/fs/nullfs/null_vnops.c
	--- a/sys/fs/nullfs/null_vnops.c
	+++ b/sys/fs/nullfs/null_vnops.c
	@@ -177,6 +177,8 @@
	#include <sys/sysctl.h>
	#include <sys/vnode.h>
	#include <sys/stat.h>
	+#include <sys/proc.h>
	+#include <sys/smr.h>

	#include <fs/nullfs/null.h>

	@@ -185,6 +187,8 @@
	#include <vm/vm_object.h>
	#include <vm/vnode_pager.h>

	+VFS_SMR_DECLARE;
	+
	static int null_bug_bypass = 0; /* for debugging: enables bypass printf'ing */
	SYSCTL_INT(_debug, OID_AUTO, nullfs_bug_bypass, CTLFLAG_RW,
	&null_bug_bypass, 0, "");
	@@ -768,10 +772,50 @@
	}

	/*
	- * We need to process our own vnode lock and then clear the
	- * interlock flag as it applies only to our vnode, not the
	- * vnodes below us on the stack.
	+ * We need to process our own vnode lock and then clear the interlock flag as
	+ * it applies only to our vnode, not the vnodes below us on the stack.
	+ *
	+ * We have to hold the vnode here to solve a potential reclaim race. If we're
	+ * forcibly vgone'd while we still have refs, a thread could be sleeping inside
	+ * the lowervp's vop_lock routine. When we vgone we will drop our last ref to
	+ * the lowervp, which would allow it to be reclaimed. The lowervp could then
	+ * be recycled, in which case it is not legal to be sleeping in its VOP. We
	+ * prevent it from being recycled by holding the vnode here.
	*/
	+static int
	+null_lock_smr(struct vnode *vp, int flags)
	+{
	+ struct null_node *nn;
	+ struct vnode *lvp;
	+ int error;
	+
	+ vfs_smr_enter();
	+
	+ nn = VTONULL_SMR(vp);
	+ if (__predict_false(nn == NULL))
	+ goto out_bad;
	+
	+ lvp = nn->null_lowervp;
	+ if (__predict_false(lvp == NULL))
	+ goto out_bad;
	+
	+ if (__predict_false(!vhold_smr(lvp)))
	+ goto out_bad;
	+
	+ vfs_smr_exit();
	+
	+ error = VOP_LOCK(lvp, flags);
	+ if (VTONULL(vp) == NULL && error == 0) {
	+ VOP_UNLOCK(lvp);
	+ error = EAGAIN;
	+ }
	+ vdrop(lvp);
	+ return (error);
	+out_bad:
	+ vfs_smr_exit();
	+ return (EAGAIN);
	+}
	+
	static int
	null_lock(struct vop_lock1_args *ap)
	{
	@@ -781,10 +825,20 @@
	struct vnode *lvp;
	int error;

	- if ((ap->a_flags & LK_INTERLOCK) == 0)
	+ if ((ap->a_flags & LK_INTERLOCK) == 0) {
	+ /*
	+ * This is the common case.
	+ *
	+ * Try to avoid acquiring the interlock, fallback in case there
	+ * is any trouble.
	+ */
	+ error = null_lock_smr(vp, ap->a_flags);
	+ if (error == 0)
	+ return (error);
	VI_LOCK(vp);
	- else
	+ } else {
	ap->a_flags &= ~LK_INTERLOCK;
	+ }
	flags = ap->a_flags;
	nn = VTONULL(vp);
	/*
	@@ -793,17 +847,6 @@
	* vop lock.
	*/
	if (nn != NULL && (lvp = NULLVPTOLOWERVP(vp)) != NULL) {
	- /*
	- * We have to hold the vnode here to solve a potential
	- * reclaim race. If we're forcibly vgone'd while we
	- * still have refs, a thread could be sleeping inside
	- * the lowervp's vop_lock routine. When we vgone we will
	- * drop our last ref to the lowervp, which would allow it
	- * to be reclaimed. The lowervp could then be recycled,
	- * in which case it is not legal to be sleeping in its VOP.
	- * We prevent it from being recycled by holding the vnode
	- * here.
	- */
	vholdnz(lvp);
	VI_UNLOCK(vp);
	error = VOP_LOCK(lvp, flags);
	@@ -840,11 +883,6 @@
	return (error);
	}

	-/*
	- * We need to process our own vnode unlock and then clear the
	- * interlock flag as it applies only to our vnode, not the
	- * vnodes below us on the stack.
	- */
	static int
	null_unlock(struct vop_unlock_args *ap)
	{
	@@ -853,11 +891,20 @@
	struct vnode *lvp;
	int error;

	+ /*
	+ * Contrary to null_lock, we don't need to hold the vnode around
	+ * unlock.
	+ *
	+ * We hold the lock, which means we can't be racing against vgone.
	+ *
	+ * At the same time VOP_UNLOCK promises to not touch anything after
	+ * it finishes unlock, just like we don't.
	+ *
	+ * vop_stdunlock for a doomed vnode matches doomed locking in null_lock.
	+ */
	nn = VTONULL(vp);
	if (nn != NULL && (lvp = NULLVPTOLOWERVP(vp)) != NULL) {
	- vholdnz(lvp);
	error = VOP_UNLOCK(lvp);
	- vdrop(lvp);
	} else {
	error = vop_stdunlock(ap);
	}
	@@ -961,7 +1008,7 @@
	vunref(lowervp);
	else
	vput(lowervp);
	- free(xp, M_NULLFSNODE);
	+ uma_zfree_smr(null_node_zone, xp);

	return (0);
	}

File Metadata

Mime Type: text/plain
Expires: Wed, Nov 19, 9:55 PM (21 h, 3 m)
Storage Engine: blob
Storage Format: Raw Data
Storage Handle: 25682415
Default Alt Text: D52819.id163246.diff (10 KB)

D52819.id163246.diffNo OneTemporaryActions

D52819.id163246.diffView Options

File Metadata

Event Timeline

D52819.id163246.diff
No OneTemporary
Actions

D52819.id163246.diff
View Options