Page Menu
Home
FreeBSD
Search
Configure Global Search
Log In
Files
F142392029
D52819.diff
No One
Temporary
Actions
View File
Edit File
Delete File
View Transforms
Subscribe
Mute Notifications
Flag For Later
Award Token
Size
11 KB
Referenced Files
None
Subscribers
None
D52819.diff
View Options
diff --git a/sys/fs/nullfs/null.h b/sys/fs/nullfs/null.h
--- a/sys/fs/nullfs/null.h
+++ b/sys/fs/nullfs/null.h
@@ -37,6 +37,9 @@
#define NULLM_CACHE 0x0001
+#include <sys/ck.h>
+#include <vm/uma.h>
+
struct null_mount {
struct mount *nullm_vfs;
struct vnode *nullm_lowerrootvp; /* Ref to lower root vnode */
@@ -50,7 +53,7 @@
* A cache of vnode references
*/
struct null_node {
- LIST_ENTRY(null_node) null_hash; /* Hash list */
+ CK_LIST_ENTRY(null_node) null_hash; /* Hash list */
struct vnode *null_lowervp; /* VREFed once */
struct vnode *null_vnode; /* Back pointer */
u_int null_flags;
@@ -61,6 +64,7 @@
#define MOUNTTONULLMOUNT(mp) ((struct null_mount *)((mp)->mnt_data))
#define VTONULL(vp) ((struct null_node *)(vp)->v_data)
+#define VTONULL_SMR(vp) ((struct null_node *)vn_load_v_data_smr(vp))
#define NULLTOV(xp) ((xp)->null_vnode)
int nullfs_init(struct vfsconf *vfsp);
@@ -79,9 +83,7 @@
extern struct vop_vector null_vnodeops;
-#ifdef MALLOC_DECLARE
-MALLOC_DECLARE(M_NULLFSNODE);
-#endif
+extern uma_zone_t null_node_zone;
#ifdef NULLFS_DEBUG
#define NULLFSDEBUG(format, args...) printf(format ,## args)
diff --git a/sys/fs/nullfs/null_subr.c b/sys/fs/nullfs/null_subr.c
--- a/sys/fs/nullfs/null_subr.c
+++ b/sys/fs/nullfs/null_subr.c
@@ -41,9 +41,14 @@
#include <sys/mount.h>
#include <sys/proc.h>
#include <sys/vnode.h>
+#include <sys/smr.h>
#include <fs/nullfs/null.h>
+#include <vm/uma.h>
+
+VFS_SMR_DECLARE;
+
/*
* Null layer cache:
* Each cache entry holds a reference to the lower vnode
@@ -54,12 +59,12 @@
#define NULL_NHASH(vp) (&null_node_hashtbl[vfs_hash_index(vp) & null_hash_mask])
-static LIST_HEAD(null_node_hashhead, null_node) *null_node_hashtbl;
+static CK_LIST_HEAD(null_node_hashhead, null_node) *null_node_hashtbl;
static struct rwlock null_hash_lock;
static u_long null_hash_mask;
static MALLOC_DEFINE(M_NULLFSHASH, "nullfs_hash", "NULLFS hash table");
-MALLOC_DEFINE(M_NULLFSNODE, "nullfs_node", "NULLFS vnode private part");
+uma_zone_t __read_mostly null_node_zone;
static void null_hashins(struct mount *, struct null_node *);
@@ -73,6 +78,10 @@
null_node_hashtbl = hashinit(desiredvnodes, M_NULLFSHASH,
&null_hash_mask);
rw_init(&null_hash_lock, "nullhs");
+ null_node_zone = uma_zcreate("nullfs node", sizeof(struct null_node),
+ NULL, NULL, NULL, NULL, 0, UMA_ZONE_ZINIT);
+ VFS_SMR_ZONE_SET(null_node_zone);
+
return (0);
}
@@ -80,6 +89,7 @@
nullfs_uninit(struct vfsconf *vfsp)
{
+ uma_zdestroy(null_node_zone);
rw_destroy(&null_hash_lock);
hashdestroy(null_node_hashtbl, M_NULLFSHASH, null_hash_mask);
return (0);
@@ -96,7 +106,7 @@
struct null_node *a;
struct vnode *vp;
- ASSERT_VOP_LOCKED(lowervp, "null_hashget");
+ ASSERT_VOP_LOCKED(lowervp, __func__);
rw_assert(&null_hash_lock, RA_LOCKED);
/*
@@ -106,18 +116,21 @@
* reference count (but NOT the lower vnode's VREF counter).
*/
hd = NULL_NHASH(lowervp);
- LIST_FOREACH(a, hd, null_hash) {
- if (a->null_lowervp == lowervp && NULLTOV(a)->v_mount == mp) {
- /*
- * Since we have the lower node locked the nullfs
- * node can not be in the process of recycling. If
- * it had been recycled before we grabed the lower
- * lock it would not have been found on the hash.
- */
- vp = NULLTOV(a);
- vref(vp);
- return (vp);
- }
+ CK_LIST_FOREACH(a, hd, null_hash) {
+ if (a->null_lowervp != lowervp)
+ continue;
+ /*
+ * Since we have the lower node locked the nullfs
+ * node can not be in the process of recycling. If
+ * it had been recycled before we grabed the lower
+ * lock it would not have been found on the hash.
+ */
+ vp = NULLTOV(a);
+ VNPASS(!VN_IS_DOOMED(vp), vp);
+ if (vp->v_mount != mp)
+ continue;
+ vref(vp);
+ return (vp);
}
return (NULL);
}
@@ -126,17 +139,34 @@
null_hashget(struct mount *mp, struct vnode *lowervp)
{
struct null_node_hashhead *hd;
+ struct null_node *a;
struct vnode *vp;
+ enum vgetstate vs;
- hd = NULL_NHASH(lowervp);
- if (LIST_EMPTY(hd))
- return (NULL);
-
- rw_rlock(&null_hash_lock);
- vp = null_hashget_locked(mp, lowervp);
- rw_runlock(&null_hash_lock);
+ ASSERT_VOP_LOCKED(lowervp, "null_hashget");
+ rw_assert(&null_hash_lock, RA_UNLOCKED);
- return (vp);
+ vfs_smr_enter();
+ hd = NULL_NHASH(lowervp);
+ CK_LIST_FOREACH(a, hd, null_hash) {
+ if (a->null_lowervp != lowervp)
+ continue;
+ /*
+ * See null_hashget_locked as to why the nullfs vnode can't be
+ * doomed here.
+ */
+ vp = NULLTOV(a);
+ VNPASS(!VN_IS_DOOMED(vp), vp);
+ if (vp->v_mount != mp)
+ continue;
+ vs = vget_prep_smr(vp);
+ vfs_smr_exit();
+ VNPASS(vs != VGET_NONE, vp);
+ vget_finish_ref(vp, vs);
+ return (vp);
+ }
+ vfs_smr_exit();
+ return (NULL);
}
static void
@@ -151,7 +181,7 @@
hd = NULL_NHASH(xp->null_lowervp);
#ifdef INVARIANTS
- LIST_FOREACH(oxp, hd, null_hash) {
+ CK_LIST_FOREACH(oxp, hd, null_hash) {
if (oxp->null_lowervp == xp->null_lowervp &&
NULLTOV(oxp)->v_mount == mp) {
VNASSERT(0, NULLTOV(oxp),
@@ -159,7 +189,7 @@
}
}
#endif
- LIST_INSERT_HEAD(hd, xp, null_hash);
+ CK_LIST_INSERT_HEAD(hd, xp, null_hash);
}
static void
@@ -174,7 +204,7 @@
VI_UNLOCK(vp);
vgone(vp);
vput(vp);
- free(xp, M_NULLFSNODE);
+ uma_zfree_smr(null_node_zone, xp);
}
/*
@@ -208,12 +238,12 @@
* Note that duplicate can only appear in hash if the lowervp is
* locked LK_SHARED.
*/
- xp = malloc(sizeof(struct null_node), M_NULLFSNODE, M_WAITOK);
+ xp = uma_zalloc_smr(null_node_zone, M_WAITOK);
error = getnewvnode("nullfs", mp, &null_vnodeops, &vp);
if (error) {
vput(lowervp);
- free(xp, M_NULLFSNODE);
+ uma_zfree_smr(null_node_zone, xp);
return (error);
}
@@ -261,8 +291,8 @@
return (error);
}
- null_hashins(mp, xp);
vn_set_state(vp, VSTATE_CONSTRUCTED);
+ null_hashins(mp, xp);
rw_wunlock(&null_hash_lock);
*vpp = vp;
@@ -277,7 +307,7 @@
{
rw_wlock(&null_hash_lock);
- LIST_REMOVE(xp, null_hash);
+ CK_LIST_REMOVE(xp, null_hash);
rw_wunlock(&null_hash_lock);
}
diff --git a/sys/fs/nullfs/null_vnops.c b/sys/fs/nullfs/null_vnops.c
--- a/sys/fs/nullfs/null_vnops.c
+++ b/sys/fs/nullfs/null_vnops.c
@@ -174,6 +174,8 @@
#include <sys/mount.h>
#include <sys/mutex.h>
#include <sys/namei.h>
+#include <sys/proc.h>
+#include <sys/smr.h>
#include <sys/sysctl.h>
#include <sys/vnode.h>
#include <sys/stat.h>
@@ -185,6 +187,8 @@
#include <vm/vm_object.h>
#include <vm/vnode_pager.h>
+VFS_SMR_DECLARE;
+
static int null_bug_bypass = 0; /* for debugging: enables bypass printf'ing */
SYSCTL_INT(_debug, OID_AUTO, nullfs_bug_bypass, CTLFLAG_RW,
&null_bug_bypass, 0, "");
@@ -768,83 +772,110 @@
}
/*
- * We need to process our own vnode lock and then clear the
- * interlock flag as it applies only to our vnode, not the
- * vnodes below us on the stack.
+ * We need to process our own vnode lock and then clear the interlock flag as
+ * it applies only to our vnode, not the vnodes below us on the stack.
+ *
+ * We have to hold the vnode here to solve a potential reclaim race. If we're
+ * forcibly vgone'd while we still have refs, a thread could be sleeping inside
+ * the lowervp's vop_lock routine. When we vgone we will drop our last ref to
+ * the lowervp, which would allow it to be reclaimed. The lowervp could then
+ * be recycled, in which case it is not legal to be sleeping in its VOP. We
+ * prevent it from being recycled by holding the vnode here.
*/
+static struct vnode *
+null_lock_prep_with_smr(struct vop_lock1_args *ap)
+{
+ struct null_node *nn;
+ struct vnode *lvp;
+
+ vfs_smr_enter();
+
+ lvp = NULL;
+
+ nn = VTONULL_SMR(ap->a_vp);
+ if (__predict_true(nn != NULL)) {
+ lvp = nn->null_lowervp;
+ if (lvp != NULL && !vhold_smr(lvp))
+ lvp = NULL;
+ }
+
+ vfs_smr_exit();
+ return (lvp);
+}
+
+static struct vnode *
+null_lock_prep_with_interlock(struct vop_lock1_args *ap)
+{
+ struct null_node *nn;
+ struct vnode *lvp;
+
+ ASSERT_VI_LOCKED(ap->a_vp, __func__);
+
+ ap->a_flags &= ~LK_INTERLOCK;
+
+ lvp = NULL;
+
+ nn = VTONULL(ap->a_vp);
+ if (__predict_true(nn != NULL)) {
+ lvp = nn->null_lowervp;
+ if (lvp != NULL)
+ vholdnz(lvp);
+ }
+ VI_UNLOCK(ap->a_vp);
+ return (lvp);
+}
+
static int
null_lock(struct vop_lock1_args *ap)
{
- struct vnode *vp = ap->a_vp;
- int flags;
- struct null_node *nn;
struct vnode *lvp;
- int error;
+ int error, flags;
- if ((ap->a_flags & LK_INTERLOCK) == 0)
- VI_LOCK(vp);
- else
- ap->a_flags &= ~LK_INTERLOCK;
- flags = ap->a_flags;
- nn = VTONULL(vp);
+ if (__predict_true((ap->a_flags & LK_INTERLOCK) == 0)) {
+ lvp = null_lock_prep_with_smr(ap);
+ if (__predict_false(lvp == NULL)) {
+ VI_LOCK(ap->a_vp);
+ lvp = null_lock_prep_with_interlock(ap);
+ }
+ } else {
+ lvp = null_lock_prep_with_interlock(ap);
+ }
+
+ ASSERT_VI_UNLOCKED(ap->a_vp, __func__);
+
+ if (__predict_false(lvp == NULL))
+ return (vop_stdlock(ap));
+
+ VNPASS(lvp->v_holdcnt > 0, lvp);
+ error = VOP_LOCK(lvp, ap->a_flags);
/*
- * If we're still active we must ask the lower layer to
- * lock as ffs has special lock considerations in its
- * vop lock.
+ * We might have slept to get the lock and someone might have
+ * clean our vnode already, switching vnode lock from one in
+ * lowervp to v_lock in our own vnode structure. Handle this
+ * case by reacquiring correct lock in requested mode.
*/
- if (nn != NULL && (lvp = NULLVPTOLOWERVP(vp)) != NULL) {
- /*
- * We have to hold the vnode here to solve a potential
- * reclaim race. If we're forcibly vgone'd while we
- * still have refs, a thread could be sleeping inside
- * the lowervp's vop_lock routine. When we vgone we will
- * drop our last ref to the lowervp, which would allow it
- * to be reclaimed. The lowervp could then be recycled,
- * in which case it is not legal to be sleeping in its VOP.
- * We prevent it from being recycled by holding the vnode
- * here.
- */
- vholdnz(lvp);
- VI_UNLOCK(vp);
- error = VOP_LOCK(lvp, flags);
-
- /*
- * We might have slept to get the lock and someone might have
- * clean our vnode already, switching vnode lock from one in
- * lowervp to v_lock in our own vnode structure. Handle this
- * case by reacquiring correct lock in requested mode.
- */
- if (VTONULL(vp) == NULL && error == 0) {
- ap->a_flags &= ~LK_TYPE_MASK;
- switch (flags & LK_TYPE_MASK) {
- case LK_SHARED:
- ap->a_flags |= LK_SHARED;
- break;
- case LK_UPGRADE:
- case LK_EXCLUSIVE:
- ap->a_flags |= LK_EXCLUSIVE;
- break;
- default:
- panic("Unsupported lock request %d\n",
- ap->a_flags);
- }
- VOP_UNLOCK(lvp);
- error = vop_stdlock(ap);
+ if (VTONULL(ap->a_vp) == NULL && error == 0) {
+ flags = ap->a_flags;
+ ap->a_flags &= ~LK_TYPE_MASK;
+ switch (flags & LK_TYPE_MASK) {
+ case LK_SHARED:
+ ap->a_flags |= LK_SHARED;
+ break;
+ case LK_UPGRADE:
+ case LK_EXCLUSIVE:
+ ap->a_flags |= LK_EXCLUSIVE;
+ break;
+ default:
+ panic("Unsupported lock request %d\n",
+ flags);
}
- vdrop(lvp);
- } else {
- VI_UNLOCK(vp);
+ VOP_UNLOCK(lvp);
error = vop_stdlock(ap);
}
-
+ vdrop(lvp);
return (error);
}
-/*
- * We need to process our own vnode unlock and then clear the
- * interlock flag as it applies only to our vnode, not the
- * vnodes below us on the stack.
- */
static int
null_unlock(struct vop_unlock_args *ap)
{
@@ -853,11 +884,20 @@
struct vnode *lvp;
int error;
+ /*
+ * Contrary to null_lock, we don't need to hold the vnode around
+ * unlock.
+ *
+ * We hold the lock, which means we can't be racing against vgone.
+ *
+ * At the same time VOP_UNLOCK promises to not touch anything after
+ * it finishes unlock, just like we don't.
+ *
+ * vop_stdunlock for a doomed vnode matches doomed locking in null_lock.
+ */
nn = VTONULL(vp);
if (nn != NULL && (lvp = NULLVPTOLOWERVP(vp)) != NULL) {
- vholdnz(lvp);
error = VOP_UNLOCK(lvp);
- vdrop(lvp);
} else {
error = vop_stdunlock(ap);
}
@@ -961,7 +1001,7 @@
vunref(lowervp);
else
vput(lowervp);
- free(xp, M_NULLFSNODE);
+ uma_zfree_smr(null_node_zone, xp);
return (0);
}
File Metadata
Details
Attached
Mime Type
text/plain
Expires
Tue, Jan 20, 12:17 PM (7 h, 25 m)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
27763158
Default Alt Text
D52819.diff (11 KB)
Attached To
Mode
D52819: nullfs: smr-protected hash lookup and locking
Attached
Detach File
Event Timeline
Log In to Comment