D52819.diff
No OneTemporary
Actions

Size

11 KB

Referenced Files

None

Subscribers

None

D52819.diff
View Options

	diff --git a/sys/fs/nullfs/null.h b/sys/fs/nullfs/null.h
	--- a/sys/fs/nullfs/null.h
	+++ b/sys/fs/nullfs/null.h
	@@ -37,6 +37,9 @@

	#define NULLM_CACHE 0x0001

	+#include <sys/ck.h>
	+#include <vm/uma.h>
	+
	struct null_mount {
	struct mount *nullm_vfs;
	struct vnode nullm_lowerrootvp; / Ref to lower root vnode */
	@@ -50,7 +53,7 @@
	* A cache of vnode references
	*/
	struct null_node {
	- LIST_ENTRY(null_node) null_hash; /* Hash list */
	+ CK_LIST_ENTRY(null_node) null_hash; /* Hash list */
	struct vnode null_lowervp; / VREFed once */
	struct vnode null_vnode; / Back pointer */
	u_int null_flags;
	@@ -61,6 +64,7 @@

	#define MOUNTTONULLMOUNT(mp) ((struct null_mount *)((mp)->mnt_data))
	#define VTONULL(vp) ((struct null_node *)(vp)->v_data)
	+#define VTONULL_SMR(vp) ((struct null_node *)vn_load_v_data_smr(vp))
	#define NULLTOV(xp) ((xp)->null_vnode)

	int nullfs_init(struct vfsconf *vfsp);
	@@ -79,9 +83,7 @@

	extern struct vop_vector null_vnodeops;

	-#ifdef MALLOC_DECLARE
	-MALLOC_DECLARE(M_NULLFSNODE);
	-#endif
	+extern uma_zone_t null_node_zone;

	#ifdef NULLFS_DEBUG
	#define NULLFSDEBUG(format, args...) printf(format ,## args)
	diff --git a/sys/fs/nullfs/null_subr.c b/sys/fs/nullfs/null_subr.c
	--- a/sys/fs/nullfs/null_subr.c
	+++ b/sys/fs/nullfs/null_subr.c
	@@ -41,9 +41,14 @@
	#include <sys/mount.h>
	#include <sys/proc.h>
	#include <sys/vnode.h>
	+#include <sys/smr.h>

	#include <fs/nullfs/null.h>

	+#include <vm/uma.h>
	+
	+VFS_SMR_DECLARE;
	+
	/*
	* Null layer cache:
	* Each cache entry holds a reference to the lower vnode
	@@ -54,12 +59,12 @@

	#define NULL_NHASH(vp) (&null_node_hashtbl[vfs_hash_index(vp) & null_hash_mask])

	-static LIST_HEAD(null_node_hashhead, null_node) *null_node_hashtbl;
	+static CK_LIST_HEAD(null_node_hashhead, null_node) *null_node_hashtbl;
	static struct rwlock null_hash_lock;
	static u_long null_hash_mask;

	static MALLOC_DEFINE(M_NULLFSHASH, "nullfs_hash", "NULLFS hash table");
	-MALLOC_DEFINE(M_NULLFSNODE, "nullfs_node", "NULLFS vnode private part");
	+uma_zone_t __read_mostly null_node_zone;

	static void null_hashins(struct mount , struct null_node );

	@@ -73,6 +78,10 @@
	null_node_hashtbl = hashinit(desiredvnodes, M_NULLFSHASH,
	&null_hash_mask);
	rw_init(&null_hash_lock, "nullhs");
	+ null_node_zone = uma_zcreate("nullfs node", sizeof(struct null_node),
	+ NULL, NULL, NULL, NULL, 0, UMA_ZONE_ZINIT);
	+ VFS_SMR_ZONE_SET(null_node_zone);
	+
	return (0);
	}

	@@ -80,6 +89,7 @@
	nullfs_uninit(struct vfsconf *vfsp)
	{

	+ uma_zdestroy(null_node_zone);
	rw_destroy(&null_hash_lock);
	hashdestroy(null_node_hashtbl, M_NULLFSHASH, null_hash_mask);
	return (0);
	@@ -96,7 +106,7 @@
	struct null_node *a;
	struct vnode *vp;

	- ASSERT_VOP_LOCKED(lowervp, "null_hashget");
	+ ASSERT_VOP_LOCKED(lowervp, __func__);
	rw_assert(&null_hash_lock, RA_LOCKED);

	/*
	@@ -106,18 +116,21 @@
	* reference count (but NOT the lower vnode's VREF counter).
	*/
	hd = NULL_NHASH(lowervp);
	- LIST_FOREACH(a, hd, null_hash) {
	- if (a->null_lowervp == lowervp && NULLTOV(a)->v_mount == mp) {
	- /*
	- * Since we have the lower node locked the nullfs
	- * node can not be in the process of recycling. If
	- * it had been recycled before we grabed the lower
	- * lock it would not have been found on the hash.
	- */
	- vp = NULLTOV(a);
	- vref(vp);
	- return (vp);
	- }
	+ CK_LIST_FOREACH(a, hd, null_hash) {
	+ if (a->null_lowervp != lowervp)
	+ continue;
	+ /*
	+ * Since we have the lower node locked the nullfs
	+ * node can not be in the process of recycling. If
	+ * it had been recycled before we grabed the lower
	+ * lock it would not have been found on the hash.
	+ */
	+ vp = NULLTOV(a);
	+ VNPASS(!VN_IS_DOOMED(vp), vp);
	+ if (vp->v_mount != mp)
	+ continue;
	+ vref(vp);
	+ return (vp);
	}
	return (NULL);
	}
	@@ -126,17 +139,34 @@
	null_hashget(struct mount mp, struct vnode lowervp)
	{
	struct null_node_hashhead *hd;
	+ struct null_node *a;
	struct vnode *vp;
	+ enum vgetstate vs;

	- hd = NULL_NHASH(lowervp);
	- if (LIST_EMPTY(hd))
	- return (NULL);
	-
	- rw_rlock(&null_hash_lock);
	- vp = null_hashget_locked(mp, lowervp);
	- rw_runlock(&null_hash_lock);
	+ ASSERT_VOP_LOCKED(lowervp, "null_hashget");
	+ rw_assert(&null_hash_lock, RA_UNLOCKED);

	- return (vp);
	+ vfs_smr_enter();
	+ hd = NULL_NHASH(lowervp);
	+ CK_LIST_FOREACH(a, hd, null_hash) {
	+ if (a->null_lowervp != lowervp)
	+ continue;
	+ /*
	+ * See null_hashget_locked as to why the nullfs vnode can't be
	+ * doomed here.
	+ */
	+ vp = NULLTOV(a);
	+ VNPASS(!VN_IS_DOOMED(vp), vp);
	+ if (vp->v_mount != mp)
	+ continue;
	+ vs = vget_prep_smr(vp);
	+ vfs_smr_exit();
	+ VNPASS(vs != VGET_NONE, vp);
	+ vget_finish_ref(vp, vs);
	+ return (vp);
	+ }
	+ vfs_smr_exit();
	+ return (NULL);
	}

	static void
	@@ -151,7 +181,7 @@

	hd = NULL_NHASH(xp->null_lowervp);
	#ifdef INVARIANTS
	- LIST_FOREACH(oxp, hd, null_hash) {
	+ CK_LIST_FOREACH(oxp, hd, null_hash) {
	if (oxp->null_lowervp == xp->null_lowervp &&
	NULLTOV(oxp)->v_mount == mp) {
	VNASSERT(0, NULLTOV(oxp),
	@@ -159,7 +189,7 @@
	}
	}
	#endif
	- LIST_INSERT_HEAD(hd, xp, null_hash);
	+ CK_LIST_INSERT_HEAD(hd, xp, null_hash);
	}

	static void
	@@ -174,7 +204,7 @@
	VI_UNLOCK(vp);
	vgone(vp);
	vput(vp);
	- free(xp, M_NULLFSNODE);
	+ uma_zfree_smr(null_node_zone, xp);
	}

	/*
	@@ -208,12 +238,12 @@
	* Note that duplicate can only appear in hash if the lowervp is
	* locked LK_SHARED.
	*/
	- xp = malloc(sizeof(struct null_node), M_NULLFSNODE, M_WAITOK);
	+ xp = uma_zalloc_smr(null_node_zone, M_WAITOK);

	error = getnewvnode("nullfs", mp, &null_vnodeops, &vp);
	if (error) {
	vput(lowervp);
	- free(xp, M_NULLFSNODE);
	+ uma_zfree_smr(null_node_zone, xp);
	return (error);
	}

	@@ -261,8 +291,8 @@
	return (error);
	}

	- null_hashins(mp, xp);
	vn_set_state(vp, VSTATE_CONSTRUCTED);
	+ null_hashins(mp, xp);
	rw_wunlock(&null_hash_lock);
	*vpp = vp;

	@@ -277,7 +307,7 @@
	{

	rw_wlock(&null_hash_lock);
	- LIST_REMOVE(xp, null_hash);
	+ CK_LIST_REMOVE(xp, null_hash);
	rw_wunlock(&null_hash_lock);
	}

	diff --git a/sys/fs/nullfs/null_vnops.c b/sys/fs/nullfs/null_vnops.c
	--- a/sys/fs/nullfs/null_vnops.c
	+++ b/sys/fs/nullfs/null_vnops.c
	@@ -174,6 +174,8 @@
	#include <sys/mount.h>
	#include <sys/mutex.h>
	#include <sys/namei.h>
	+#include <sys/proc.h>
	+#include <sys/smr.h>
	#include <sys/sysctl.h>
	#include <sys/vnode.h>
	#include <sys/stat.h>
	@@ -185,6 +187,8 @@
	#include <vm/vm_object.h>
	#include <vm/vnode_pager.h>

	+VFS_SMR_DECLARE;
	+
	static int null_bug_bypass = 0; /* for debugging: enables bypass printf'ing */
	SYSCTL_INT(_debug, OID_AUTO, nullfs_bug_bypass, CTLFLAG_RW,
	&null_bug_bypass, 0, "");
	@@ -768,83 +772,110 @@
	}

	/*
	- * We need to process our own vnode lock and then clear the
	- * interlock flag as it applies only to our vnode, not the
	- * vnodes below us on the stack.
	+ * We need to process our own vnode lock and then clear the interlock flag as
	+ * it applies only to our vnode, not the vnodes below us on the stack.
	+ *
	+ * We have to hold the vnode here to solve a potential reclaim race. If we're
	+ * forcibly vgone'd while we still have refs, a thread could be sleeping inside
	+ * the lowervp's vop_lock routine. When we vgone we will drop our last ref to
	+ * the lowervp, which would allow it to be reclaimed. The lowervp could then
	+ * be recycled, in which case it is not legal to be sleeping in its VOP. We
	+ * prevent it from being recycled by holding the vnode here.
	*/
	+static struct vnode *
	+null_lock_prep_with_smr(struct vop_lock1_args *ap)
	+{
	+ struct null_node *nn;
	+ struct vnode *lvp;
	+
	+ vfs_smr_enter();
	+
	+ lvp = NULL;
	+
	+ nn = VTONULL_SMR(ap->a_vp);
	+ if (__predict_true(nn != NULL)) {
	+ lvp = nn->null_lowervp;
	+ if (lvp != NULL && !vhold_smr(lvp))
	+ lvp = NULL;
	+ }
	+
	+ vfs_smr_exit();
	+ return (lvp);
	+}
	+
	+static struct vnode *
	+null_lock_prep_with_interlock(struct vop_lock1_args *ap)
	+{
	+ struct null_node *nn;
	+ struct vnode *lvp;
	+
	+ ASSERT_VI_LOCKED(ap->a_vp, __func__);
	+
	+ ap->a_flags &= ~LK_INTERLOCK;
	+
	+ lvp = NULL;
	+
	+ nn = VTONULL(ap->a_vp);
	+ if (__predict_true(nn != NULL)) {
	+ lvp = nn->null_lowervp;
	+ if (lvp != NULL)
	+ vholdnz(lvp);
	+ }
	+ VI_UNLOCK(ap->a_vp);
	+ return (lvp);
	+}
	+
	static int
	null_lock(struct vop_lock1_args *ap)
	{
	- struct vnode *vp = ap->a_vp;
	- int flags;
	- struct null_node *nn;
	struct vnode *lvp;
	- int error;
	+ int error, flags;

	- if ((ap->a_flags & LK_INTERLOCK) == 0)
	- VI_LOCK(vp);
	- else
	- ap->a_flags &= ~LK_INTERLOCK;
	- flags = ap->a_flags;
	- nn = VTONULL(vp);
	+ if (__predict_true((ap->a_flags & LK_INTERLOCK) == 0)) {
	+ lvp = null_lock_prep_with_smr(ap);
	+ if (__predict_false(lvp == NULL)) {
	+ VI_LOCK(ap->a_vp);
	+ lvp = null_lock_prep_with_interlock(ap);
	+ }
	+ } else {
	+ lvp = null_lock_prep_with_interlock(ap);
	+ }
	+
	+ ASSERT_VI_UNLOCKED(ap->a_vp, __func__);
	+
	+ if (__predict_false(lvp == NULL))
	+ return (vop_stdlock(ap));
	+
	+ VNPASS(lvp->v_holdcnt > 0, lvp);
	+ error = VOP_LOCK(lvp, ap->a_flags);
	/*
	- * If we're still active we must ask the lower layer to
	- * lock as ffs has special lock considerations in its
	- * vop lock.
	+ * We might have slept to get the lock and someone might have
	+ * clean our vnode already, switching vnode lock from one in
	+ * lowervp to v_lock in our own vnode structure. Handle this
	+ * case by reacquiring correct lock in requested mode.
	*/
	- if (nn != NULL && (lvp = NULLVPTOLOWERVP(vp)) != NULL) {
	- /*
	- * We have to hold the vnode here to solve a potential
	- * reclaim race. If we're forcibly vgone'd while we
	- * still have refs, a thread could be sleeping inside
	- * the lowervp's vop_lock routine. When we vgone we will
	- * drop our last ref to the lowervp, which would allow it
	- * to be reclaimed. The lowervp could then be recycled,
	- * in which case it is not legal to be sleeping in its VOP.
	- * We prevent it from being recycled by holding the vnode
	- * here.
	- */
	- vholdnz(lvp);
	- VI_UNLOCK(vp);
	- error = VOP_LOCK(lvp, flags);
	-
	- /*
	- * We might have slept to get the lock and someone might have
	- * clean our vnode already, switching vnode lock from one in
	- * lowervp to v_lock in our own vnode structure. Handle this
	- * case by reacquiring correct lock in requested mode.
	- */
	- if (VTONULL(vp) == NULL && error == 0) {
	- ap->a_flags &= ~LK_TYPE_MASK;
	- switch (flags & LK_TYPE_MASK) {
	- case LK_SHARED:
	- ap->a_flags \|= LK_SHARED;
	- break;
	- case LK_UPGRADE:
	- case LK_EXCLUSIVE:
	- ap->a_flags \|= LK_EXCLUSIVE;
	- break;
	- default:
	- panic("Unsupported lock request %d\n",
	- ap->a_flags);
	- }
	- VOP_UNLOCK(lvp);
	- error = vop_stdlock(ap);
	+ if (VTONULL(ap->a_vp) == NULL && error == 0) {
	+ flags = ap->a_flags;
	+ ap->a_flags &= ~LK_TYPE_MASK;
	+ switch (flags & LK_TYPE_MASK) {
	+ case LK_SHARED:
	+ ap->a_flags \|= LK_SHARED;
	+ break;
	+ case LK_UPGRADE:
	+ case LK_EXCLUSIVE:
	+ ap->a_flags \|= LK_EXCLUSIVE;
	+ break;
	+ default:
	+ panic("Unsupported lock request %d\n",
	+ flags);
	}
	- vdrop(lvp);
	- } else {
	- VI_UNLOCK(vp);
	+ VOP_UNLOCK(lvp);
	error = vop_stdlock(ap);
	}
	-
	+ vdrop(lvp);
	return (error);
	}

	-/*
	- * We need to process our own vnode unlock and then clear the
	- * interlock flag as it applies only to our vnode, not the
	- * vnodes below us on the stack.
	- */
	static int
	null_unlock(struct vop_unlock_args *ap)
	{
	@@ -853,11 +884,20 @@
	struct vnode *lvp;
	int error;

	+ /*
	+ * Contrary to null_lock, we don't need to hold the vnode around
	+ * unlock.
	+ *
	+ * We hold the lock, which means we can't be racing against vgone.
	+ *
	+ * At the same time VOP_UNLOCK promises to not touch anything after
	+ * it finishes unlock, just like we don't.
	+ *
	+ * vop_stdunlock for a doomed vnode matches doomed locking in null_lock.
	+ */
	nn = VTONULL(vp);
	if (nn != NULL && (lvp = NULLVPTOLOWERVP(vp)) != NULL) {
	- vholdnz(lvp);
	error = VOP_UNLOCK(lvp);
	- vdrop(lvp);
	} else {
	error = vop_stdunlock(ap);
	}
	@@ -961,7 +1001,7 @@
	vunref(lowervp);
	else
	vput(lowervp);
	- free(xp, M_NULLFSNODE);
	+ uma_zfree_smr(null_node_zone, xp);

	return (0);
	}

File Metadata

Mime Type: text/plain
Expires: Tue, Jan 20, 12:17 PM (7 h, 25 m)
Storage Engine: blob
Storage Format: Raw Data
Storage Handle: 27763158
Default Alt Text: D52819.diff (11 KB)

D52819.diffNo OneTemporaryActions

D52819.diffView Options

File Metadata

Event Timeline

D52819.diff
No OneTemporary
Actions

D52819.diff
View Options