Page Menu
Home
FreeBSD
Search
Configure Global Search
Log In
Files
F149811719
D52628.id162565.diff
No One
Temporary
Actions
View File
Edit File
Delete File
View Transforms
Subscribe
Mute Notifications
Flag For Later
Award Token
Size
7 KB
Referenced Files
None
Subscribers
None
D52628.id162565.diff
View Options
diff --git a/sys/kern/vfs_subr.c b/sys/kern/vfs_subr.c
--- a/sys/kern/vfs_subr.c
+++ b/sys/kern/vfs_subr.c
@@ -113,6 +113,7 @@
static bool vhold_recycle_free(struct vnode *);
static void vdropl_recycle(struct vnode *vp);
static void vdrop_recycle(struct vnode *vp);
+static void vdrop_doomed_process(void);
static void vfs_knllock(void *arg);
static void vfs_knlunlock(void *arg);
static void vfs_knl_assert_lock(void *arg, int what);
@@ -226,6 +227,10 @@
SYSCTL_ULONG(_vfs, OID_AUTO, deferred_inact, CTLFLAG_RD,
&deferred_inact, 0, "Number of times inactive processing was deferred");
+static u_long deferred_drop;
+SYSCTL_ULONG(_vfs, OID_AUTO, deferred_drop, CTLFLAG_RD,
+ &deferred_drop, 0, "Number of times drop was deferred");
+
/* To keep more than one thread at a time from running vfs_getnewfsid */
static struct mtx mntid_mtx;
@@ -318,6 +323,9 @@
static void vdbatch_dequeue(struct vnode *vp);
+struct mtx vnode_deferred_drop_mtx;
+struct vnodelst vnode_deferred_drop_list;
+
/*
* The syncer will require at least SYNCER_MAXDELAY iterations to shutdown;
* we probably don't want to pause for the whole second each time.
@@ -774,6 +782,8 @@
}
wantfreevnodes = desiredvnodes / 4;
mtx_init(&mntid_mtx, "mntid", NULL, MTX_DEF);
+ TAILQ_INIT(&vnode_deferred_drop_list);
+ mtx_init(&vnode_deferred_drop_mtx, "vnode_defdrop", NULL, MTX_DEF);
TAILQ_INIT(&vnode_list);
mtx_init(&vnode_list_mtx, "vnode_list", NULL, MTX_DEF);
/*
@@ -1784,6 +1794,8 @@
for (;;) {
kproc_suspend_check(vnlruproc);
+ vdrop_doomed_process();
+
if (force == 0 && vnlru_proc_light())
continue;
@@ -2217,6 +2229,7 @@
("dirty blk trie not empty"));
VNASSERT((vp->v_iflag & (VI_DOINGINACT | VI_OWEINACT)) == 0, vp,
("Leaked inactivation"));
+ VNPASS((vp->v_iflag & VI_DEFDROP) == 0, vp);
VI_UNLOCK(vp);
cache_assert_no_entries(vp);
@@ -3450,6 +3463,7 @@
struct mount *mp;
VNASSERT(vp->v_holdcnt > 0, vp, ("%s: vnode not held", __func__));
+ VNPASS((vp->v_iflag & VI_DEFDROP) == 0, vp);
if ((vp->v_mflag & VMP_LAZYLIST) != 0)
return;
@@ -3644,26 +3658,26 @@
}
break;
}
- if (error == 0) {
- if (func == VUNREF) {
- VNASSERT((vp->v_vflag & VV_UNREF) == 0, vp,
- ("recursive vunref"));
- vp->v_vflag |= VV_UNREF;
- }
- for (;;) {
- error = vinactive(vp);
- if (want_unlock)
- VOP_UNLOCK(vp);
- if (error != ERELOOKUP || !want_unlock)
- break;
- VOP_LOCK(vp, LK_EXCLUSIVE);
- }
- if (func == VUNREF)
- vp->v_vflag &= ~VV_UNREF;
- vdropl(vp);
- } else {
+ if (error != 0) {
vdefer_inactive(vp);
+ return;
+ }
+ if (func == VUNREF) {
+ VNASSERT((vp->v_vflag & VV_UNREF) == 0, vp,
+ ("recursive vunref"));
+ vp->v_vflag |= VV_UNREF;
+ }
+ for (;;) {
+ error = vinactive(vp);
+ if (want_unlock)
+ VOP_UNLOCK(vp);
+ if (error != ERELOOKUP || !want_unlock)
+ break;
+ VOP_LOCK(vp, LK_EXCLUSIVE);
}
+ if (func == VUNREF)
+ vp->v_vflag &= ~VV_UNREF;
+ vdropl(vp);
return;
out:
if (func == VPUT)
@@ -3771,6 +3785,10 @@
*
* However, while this is more performant, it hinders debugging by eliminating
* the previously mentioned invariant.
+ *
+ * Note: there is a corner case where VHOLD_NO_SMR can transiently appear and
+ * disappear. This is fine as if vhold_smr fails, the caller is expected to try
+ * to reach the vnode in other ways, if at all possible. See vdropl_final.
*/
bool
vhold_smr(struct vnode *vp)
@@ -3960,6 +3978,92 @@
MPASS(vp->v_dbatchcpu == NOCPU);
}
+/*
+ * Deferred vdrop for doomed vnodes.
+ *
+ * This is *not* a general-purpose async vdrop mechanism.
+ *
+ * The only purpose is to facilitate the invariant that VOP_UNLOCK is safe to
+ * call *after* dropping the reference.
+ *
+ * It works around the following race:
+ *
+ * Suppose the vnode is doomed, v_usecount is 2, v_holdcnt is 1 (on account of
+ * v_usecount > 0) and both usecount holders are issuing vput while having a
+ * read lock on the vnode.
+ *
+ * thread1 thread2
+ * refcount_release usecount
+ * // v_usecount is now 1
+ * // will proceed to unlock
+ * refcount_release usecount
+ * // v_usecount is now 0
+ * // will proceed to vdrop
+ * VOP_UNLOCK
+ * refcount_release holdcnt
+ * // v_holdcnt is now 0
+ * // since the vnode is doomed
+ * // it will proceed to free it
+ * freevnode
+ * // goes ahead with unlock, but
+ * // the vnode is already freed
+ * VOP_UNLOCK
+ *
+ * In order to combat the problem we check if there are any lock holders
+ * still active and wait for them to exit. It might not be safe to do it
+ * in some of the places vdrop is called from, hence we defer this work.
+ */
+static void
+vdrop_doomed_defer(struct vnode *vp)
+{
+
+ ASSERT_VI_LOCKED(vp, __func__);
+ VNPASS(VN_IS_DOOMED(vp), vp);
+ VNPASS(vp->v_mount == NULL, vp);
+ VNPASS((vp->v_mflag & VMP_LAZYLIST) == 0, vp);
+ VNPASS((vp->v_iflag & VI_DEFDROP) == 0, vp);
+
+ vp->v_iflag |= VI_DEFDROP;
+ mtx_lock(&vnode_deferred_drop_mtx);
+ TAILQ_INSERT_TAIL(&vnode_deferred_drop_list, vp, v_lazylist);
+ deferred_drop++;
+ mtx_unlock(&vnode_deferred_drop_mtx);
+ VI_UNLOCK(vp);
+}
+
+/*
+ * Called by vnlru.
+ *
+ * If dooming ever gets reworked this should probably land in the syncer.
+ */
+static void __noinline
+vdrop_doomed_process(void)
+{
+ struct vnodelst drop_list;
+ struct vnode *vp, *vptmp;
+
+ if (TAILQ_EMPTY(&vnode_deferred_drop_list))
+ return;
+
+ TAILQ_INIT(&drop_list);
+ mtx_lock(&vnode_deferred_drop_mtx);
+ TAILQ_CONCAT(&drop_list, &vnode_deferred_drop_list, v_lazylist);
+ mtx_unlock(&vnode_deferred_drop_mtx);
+
+ TAILQ_FOREACH_SAFE(vp, &drop_list, v_lazylist, vptmp) {
+ VNPASS(VN_IS_DOOMED(vp), vp);
+ VNPASS((vp->v_iflag & VI_DEFDROP) != 0, vp);
+
+ VOP_LOCK(vp, LK_EXCLUSIVE | LK_RETRY);
+ VOP_UNLOCK(vp);
+
+ VI_LOCK(vp);
+ vp->v_iflag &= ~VI_DEFDROP;
+ vdropl(vp);
+ /* unlocks the interlock */
+ }
+}
+
/*
* Drop the hold count of the vnode.
*
@@ -3975,6 +4079,7 @@
ASSERT_VI_LOCKED(vp, __func__);
VNPASS(VN_IS_DOOMED(vp), vp);
+ VNPASS((vp->v_iflag & VI_DEFDROP) == 0, vp);
/*
* Set the VHOLD_NO_SMR flag.
*
@@ -3990,6 +4095,27 @@
*/
return;
}
+
+ /*
+ * v_holdcnt is now VHOLD_NO_SMR and we hold the interlock.
+ *
+ * While nobody can legally gain a reference, we need to wait for any pending
+ * unlock operations to finish. See vdrop_doomed_defer for details.
+ *
+ * If someone holds the lock it may be we can't safely wait for them.
+ * Instead, we backpedal the above transition and defer the work.
+ *
+ * Note we depend on a full fence from atomic_cmpset_int.
+ */
+ if (__predict_false(VOP_ISLOCKED(vp))) {
+ if (!atomic_cmpset_int(&vp->v_holdcnt, VHOLD_NO_SMR, 1)) {
+ VNASSERT(0, vp, ("failed to transition holdcnt from VHOLD_NO_SMR to 1"));
+ panic("failed to transition holdcnt from VHOLD_NO_SMR to 1 on vnode %p", vp);
+ }
+ vdrop_doomed_defer(vp);
+ return;
+ }
+
/*
* Don't bump freevnodes as this one is going away.
*/
@@ -4651,8 +4777,10 @@
strlcat(buf, "|VI_DEFINACT", sizeof(buf));
if (vp->v_iflag & VI_FOPENING)
strlcat(buf, "|VI_FOPENING", sizeof(buf));
+ if (vp->v_iflag & VI_DEFDROP)
+ strlcat(buf, "|VI_DEFDROP", sizeof(buf));
flags = vp->v_iflag & ~(VI_MOUNT | VI_DOINGINACT |
- VI_OWEINACT | VI_DEFINACT | VI_FOPENING);
+ VI_OWEINACT | VI_DEFINACT | VI_FOPENING | VI_DEFDROP);
if (flags != 0) {
snprintf(buf2, sizeof(buf2), "|VI(0x%lx)", flags);
strlcat(buf, buf2, sizeof(buf));
diff --git a/sys/sys/vnode.h b/sys/sys/vnode.h
--- a/sys/sys/vnode.h
+++ b/sys/sys/vnode.h
@@ -268,6 +268,7 @@
#define VI_DEFINACT 0x0010 /* deferred inactive */
#define VI_FOPENING 0x0020 /* In open, with opening process having the
first right to advlock file */
+#define VI_DEFDROP 0x0040 /* deferred vdrop */
#define VV_ROOT 0x0001 /* root of its filesystem */
#define VV_ISTTY 0x0002 /* vnode represents a tty */
File Metadata
Details
Attached
Mime Type
text/plain
Expires
Sat, Mar 28, 7:00 AM (10 h, 55 m)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
30462601
Default Alt Text
D52628.id162565.diff (7 KB)
Attached To
Mode
D52628: vfs: fix races between vdrop and VOP_UNLOCK
Attached
Detach File
Event Timeline
Log In to Comment