Page Menu
Home
FreeBSD
Search
Configure Global Search
Log In
Files
F136688200
D52819.id163246.diff
No One
Temporary
Actions
View File
Edit File
Delete File
View Transforms
Subscribe
Mute Notifications
Flag For Later
Award Token
Size
10 KB
Referenced Files
None
Subscribers
None
D52819.id163246.diff
View Options
diff --git a/sys/fs/nullfs/null.h b/sys/fs/nullfs/null.h
--- a/sys/fs/nullfs/null.h
+++ b/sys/fs/nullfs/null.h
@@ -37,6 +37,9 @@
#define NULLM_CACHE 0x0001
+#include <vm/uma.h>
+#include <sys/ck.h>
+
struct null_mount {
struct mount *nullm_vfs;
struct vnode *nullm_lowerrootvp; /* Ref to lower root vnode */
@@ -50,7 +53,7 @@
* A cache of vnode references
*/
struct null_node {
- LIST_ENTRY(null_node) null_hash; /* Hash list */
+ CK_LIST_ENTRY(null_node) null_hash; /* Hash list */
struct vnode *null_lowervp; /* VREFed once */
struct vnode *null_vnode; /* Back pointer */
u_int null_flags;
@@ -61,6 +64,7 @@
#define MOUNTTONULLMOUNT(mp) ((struct null_mount *)((mp)->mnt_data))
#define VTONULL(vp) ((struct null_node *)(vp)->v_data)
+#define VTONULL_SMR(vp) ((struct null_node *)vn_load_v_data_smr(vp))
#define NULLTOV(xp) ((xp)->null_vnode)
int nullfs_init(struct vfsconf *vfsp);
@@ -79,9 +83,7 @@
extern struct vop_vector null_vnodeops;
-#ifdef MALLOC_DECLARE
-MALLOC_DECLARE(M_NULLFSNODE);
-#endif
+extern uma_zone_t null_node_zone;
#ifdef NULLFS_DEBUG
#define NULLFSDEBUG(format, args...) printf(format ,## args)
diff --git a/sys/fs/nullfs/null_subr.c b/sys/fs/nullfs/null_subr.c
--- a/sys/fs/nullfs/null_subr.c
+++ b/sys/fs/nullfs/null_subr.c
@@ -41,9 +41,14 @@
#include <sys/mount.h>
#include <sys/proc.h>
#include <sys/vnode.h>
+#include <sys/smr.h>
#include <fs/nullfs/null.h>
+#include <vm/uma.h>
+
+VFS_SMR_DECLARE;
+
/*
* Null layer cache:
* Each cache entry holds a reference to the lower vnode
@@ -54,12 +59,12 @@
#define NULL_NHASH(vp) (&null_node_hashtbl[vfs_hash_index(vp) & null_hash_mask])
-static LIST_HEAD(null_node_hashhead, null_node) *null_node_hashtbl;
+static CK_LIST_HEAD(null_node_hashhead, null_node) *null_node_hashtbl;
static struct rwlock null_hash_lock;
static u_long null_hash_mask;
static MALLOC_DEFINE(M_NULLFSHASH, "nullfs_hash", "NULLFS hash table");
-MALLOC_DEFINE(M_NULLFSNODE, "nullfs_node", "NULLFS vnode private part");
+uma_zone_t __read_mostly null_node_zone;
static void null_hashins(struct mount *, struct null_node *);
@@ -73,6 +78,10 @@
null_node_hashtbl = hashinit(desiredvnodes, M_NULLFSHASH,
&null_hash_mask);
rw_init(&null_hash_lock, "nullhs");
+ null_node_zone = uma_zcreate("nullfs node", sizeof(struct null_node),
+ NULL, NULL, NULL, NULL, 0, UMA_ZONE_ZINIT);
+ VFS_SMR_ZONE_SET(null_node_zone);
+
return (0);
}
@@ -80,6 +89,7 @@
nullfs_uninit(struct vfsconf *vfsp)
{
+ uma_zdestroy(null_node_zone);
rw_destroy(&null_hash_lock);
hashdestroy(null_node_hashtbl, M_NULLFSHASH, null_hash_mask);
return (0);
@@ -96,7 +106,7 @@
struct null_node *a;
struct vnode *vp;
- ASSERT_VOP_LOCKED(lowervp, "null_hashget");
+ ASSERT_VOP_LOCKED(lowervp, __func__);
rw_assert(&null_hash_lock, RA_LOCKED);
/*
@@ -106,18 +116,21 @@
* reference count (but NOT the lower vnode's VREF counter).
*/
hd = NULL_NHASH(lowervp);
- LIST_FOREACH(a, hd, null_hash) {
- if (a->null_lowervp == lowervp && NULLTOV(a)->v_mount == mp) {
- /*
- * Since we have the lower node locked the nullfs
- * node can not be in the process of recycling. If
- * it had been recycled before we grabed the lower
- * lock it would not have been found on the hash.
- */
- vp = NULLTOV(a);
- vref(vp);
- return (vp);
- }
+ CK_LIST_FOREACH(a, hd, null_hash) {
+ if (a->null_lowervp != lowervp)
+ continue;
+ /*
+ * Since we have the lower node locked the nullfs
+ * node can not be in the process of recycling. If
+ * it had been recycled before we grabed the lower
+ * lock it would not have been found on the hash.
+ */
+ vp = NULLTOV(a);
+ VNPASS(!VN_IS_DOOMED(vp), vp);
+ if (vp->v_mount != mp)
+ continue;
+ vref(vp);
+ return (vp);
}
return (NULL);
}
@@ -126,17 +139,34 @@
null_hashget(struct mount *mp, struct vnode *lowervp)
{
struct null_node_hashhead *hd;
+ struct null_node *a;
struct vnode *vp;
+ enum vgetstate vs;
- hd = NULL_NHASH(lowervp);
- if (LIST_EMPTY(hd))
- return (NULL);
-
- rw_rlock(&null_hash_lock);
- vp = null_hashget_locked(mp, lowervp);
- rw_runlock(&null_hash_lock);
+ ASSERT_VOP_LOCKED(lowervp, "null_hashget");
+ rw_assert(&null_hash_lock, RA_UNLOCKED);
- return (vp);
+ vfs_smr_enter();
+ hd = NULL_NHASH(lowervp);
+ CK_LIST_FOREACH(a, hd, null_hash) {
+ if (a->null_lowervp != lowervp)
+ continue;
+ /*
+ * See null_hashget_locked as to why the nullfs vnode can't be
+ * doomed here.
+ */
+ vp = NULLTOV(a);
+ VNPASS(!VN_IS_DOOMED(vp), vp);
+ if (vp->v_mount != mp)
+ continue;
+ vs = vget_prep_smr(vp);
+ vfs_smr_exit();
+ VNPASS(vs != VGET_NONE, vp);
+ vget_finish_ref(vp, vs);
+ return (vp);
+ }
+ vfs_smr_exit();
+ return (NULL);
}
static void
@@ -151,7 +181,7 @@
hd = NULL_NHASH(xp->null_lowervp);
#ifdef INVARIANTS
- LIST_FOREACH(oxp, hd, null_hash) {
+ CK_LIST_FOREACH(oxp, hd, null_hash) {
if (oxp->null_lowervp == xp->null_lowervp &&
NULLTOV(oxp)->v_mount == mp) {
VNASSERT(0, NULLTOV(oxp),
@@ -159,7 +189,7 @@
}
}
#endif
- LIST_INSERT_HEAD(hd, xp, null_hash);
+ CK_LIST_INSERT_HEAD(hd, xp, null_hash);
}
static void
@@ -174,7 +204,7 @@
VI_UNLOCK(vp);
vgone(vp);
vput(vp);
- free(xp, M_NULLFSNODE);
+ uma_zfree_smr(null_node_zone, xp);
}
/*
@@ -208,12 +238,12 @@
* Note that duplicate can only appear in hash if the lowervp is
* locked LK_SHARED.
*/
- xp = malloc(sizeof(struct null_node), M_NULLFSNODE, M_WAITOK);
+ xp = uma_zalloc_smr(null_node_zone, M_WAITOK);
error = getnewvnode("nullfs", mp, &null_vnodeops, &vp);
if (error) {
vput(lowervp);
- free(xp, M_NULLFSNODE);
+ uma_zfree_smr(null_node_zone, xp);
return (error);
}
@@ -261,8 +291,8 @@
return (error);
}
- null_hashins(mp, xp);
vn_set_state(vp, VSTATE_CONSTRUCTED);
+ null_hashins(mp, xp);
rw_wunlock(&null_hash_lock);
*vpp = vp;
@@ -277,7 +307,7 @@
{
rw_wlock(&null_hash_lock);
- LIST_REMOVE(xp, null_hash);
+ CK_LIST_REMOVE(xp, null_hash);
rw_wunlock(&null_hash_lock);
}
diff --git a/sys/fs/nullfs/null_vnops.c b/sys/fs/nullfs/null_vnops.c
--- a/sys/fs/nullfs/null_vnops.c
+++ b/sys/fs/nullfs/null_vnops.c
@@ -177,6 +177,8 @@
#include <sys/sysctl.h>
#include <sys/vnode.h>
#include <sys/stat.h>
+#include <sys/proc.h>
+#include <sys/smr.h>
#include <fs/nullfs/null.h>
@@ -185,6 +187,8 @@
#include <vm/vm_object.h>
#include <vm/vnode_pager.h>
+VFS_SMR_DECLARE;
+
static int null_bug_bypass = 0; /* for debugging: enables bypass printf'ing */
SYSCTL_INT(_debug, OID_AUTO, nullfs_bug_bypass, CTLFLAG_RW,
&null_bug_bypass, 0, "");
@@ -768,10 +772,50 @@
}
/*
- * We need to process our own vnode lock and then clear the
- * interlock flag as it applies only to our vnode, not the
- * vnodes below us on the stack.
+ * We need to process our own vnode lock and then clear the interlock flag as
+ * it applies only to our vnode, not the vnodes below us on the stack.
+ *
+ * We have to hold the vnode here to solve a potential reclaim race. If we're
+ * forcibly vgone'd while we still have refs, a thread could be sleeping inside
+ * the lowervp's vop_lock routine. When we vgone we will drop our last ref to
+ * the lowervp, which would allow it to be reclaimed. The lowervp could then
+ * be recycled, in which case it is not legal to be sleeping in its VOP. We
+ * prevent it from being recycled by holding the vnode here.
*/
+static int
+null_lock_smr(struct vnode *vp, int flags)
+{
+ struct null_node *nn;
+ struct vnode *lvp;
+ int error;
+
+ vfs_smr_enter();
+
+ nn = VTONULL_SMR(vp);
+ if (__predict_false(nn == NULL))
+ goto out_bad;
+
+ lvp = nn->null_lowervp;
+ if (__predict_false(lvp == NULL))
+ goto out_bad;
+
+ if (__predict_false(!vhold_smr(lvp)))
+ goto out_bad;
+
+ vfs_smr_exit();
+
+ error = VOP_LOCK(lvp, flags);
+ if (VTONULL(vp) == NULL && error == 0) {
+ VOP_UNLOCK(lvp);
+ error = EAGAIN;
+ }
+ vdrop(lvp);
+ return (error);
+out_bad:
+ vfs_smr_exit();
+ return (EAGAIN);
+}
+
static int
null_lock(struct vop_lock1_args *ap)
{
@@ -781,10 +825,20 @@
struct vnode *lvp;
int error;
- if ((ap->a_flags & LK_INTERLOCK) == 0)
+ if ((ap->a_flags & LK_INTERLOCK) == 0) {
+ /*
+ * This is the common case.
+ *
+ * Try to avoid acquiring the interlock, fallback in case there
+ * is any trouble.
+ */
+ error = null_lock_smr(vp, ap->a_flags);
+ if (error == 0)
+ return (error);
VI_LOCK(vp);
- else
+ } else {
ap->a_flags &= ~LK_INTERLOCK;
+ }
flags = ap->a_flags;
nn = VTONULL(vp);
/*
@@ -793,17 +847,6 @@
* vop lock.
*/
if (nn != NULL && (lvp = NULLVPTOLOWERVP(vp)) != NULL) {
- /*
- * We have to hold the vnode here to solve a potential
- * reclaim race. If we're forcibly vgone'd while we
- * still have refs, a thread could be sleeping inside
- * the lowervp's vop_lock routine. When we vgone we will
- * drop our last ref to the lowervp, which would allow it
- * to be reclaimed. The lowervp could then be recycled,
- * in which case it is not legal to be sleeping in its VOP.
- * We prevent it from being recycled by holding the vnode
- * here.
- */
vholdnz(lvp);
VI_UNLOCK(vp);
error = VOP_LOCK(lvp, flags);
@@ -840,11 +883,6 @@
return (error);
}
-/*
- * We need to process our own vnode unlock and then clear the
- * interlock flag as it applies only to our vnode, not the
- * vnodes below us on the stack.
- */
static int
null_unlock(struct vop_unlock_args *ap)
{
@@ -853,11 +891,20 @@
struct vnode *lvp;
int error;
+ /*
+ * Contrary to null_lock, we don't need to hold the vnode around
+ * unlock.
+ *
+ * We hold the lock, which means we can't be racing against vgone.
+ *
+ * At the same time VOP_UNLOCK promises to not touch anything after
+ * it finishes unlock, just like we don't.
+ *
+ * vop_stdunlock for a doomed vnode matches doomed locking in null_lock.
+ */
nn = VTONULL(vp);
if (nn != NULL && (lvp = NULLVPTOLOWERVP(vp)) != NULL) {
- vholdnz(lvp);
error = VOP_UNLOCK(lvp);
- vdrop(lvp);
} else {
error = vop_stdunlock(ap);
}
@@ -961,7 +1008,7 @@
vunref(lowervp);
else
vput(lowervp);
- free(xp, M_NULLFSNODE);
+ uma_zfree_smr(null_node_zone, xp);
return (0);
}
File Metadata
Details
Attached
Mime Type
text/plain
Expires
Wed, Nov 19, 9:55 PM (21 h, 3 m)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
25682415
Default Alt Text
D52819.id163246.diff (10 KB)
Attached To
Mode
D52819: nullfs: smr-protected hash lookup and locking
Attached
Detach File
Event Timeline
Log In to Comment