Page Menu
Home
FreeBSD
Search
Configure Global Search
Log In
Files
F155029292
D21425.id.diff
No One
Temporary
Actions
View File
Edit File
Delete File
View Transforms
Subscribe
Mute Notifications
Flag For Later
Award Token
Size
10 KB
Referenced Files
None
Subscribers
None
D21425.id.diff
View Options
Index: head/sys/cddl/compat/opensolaris/kern/opensolaris_vfs.c
===================================================================
--- head/sys/cddl/compat/opensolaris/kern/opensolaris_vfs.c
+++ head/sys/cddl/compat/opensolaris/kern/opensolaris_vfs.c
@@ -242,6 +242,7 @@
if (VFS_ROOT(mp, LK_EXCLUSIVE, &mvp))
panic("mount: lost mount");
VOP_UNLOCK(vp, 0);
+ vfs_op_exit(mp);
vfs_unbusy(mp);
*vpp = mvp;
return (0);
Index: head/sys/kern/subr_pcpu.c
===================================================================
--- head/sys/kern/subr_pcpu.c
+++ head/sys/kern/subr_pcpu.c
@@ -131,15 +131,19 @@
/*
* UMA_PCPU_ZONE zones, that are available for all kernel
- * consumers. Right now 64 bit zone is used for counter(9).
+ * consumers. Right now 64 bit zone is used for counter(9)
+ * and int zone is used for mount point counters.
*/
+uma_zone_t pcpu_zone_int;
uma_zone_t pcpu_zone_64;
static void
pcpu_zones_startup(void)
{
+ pcpu_zone_int = uma_zcreate("int pcpu", sizeof(int),
+ NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, UMA_ZONE_PCPU);
pcpu_zone_64 = uma_zcreate("64 pcpu", sizeof(uint64_t),
NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, UMA_ZONE_PCPU);
}
Index: head/sys/kern/vfs_default.c
===================================================================
--- head/sys/kern/vfs_default.c
+++ head/sys/kern/vfs_default.c
@@ -601,17 +601,24 @@
*/
vp = ap->a_vp;
mp = vp->v_mount;
- if (mp == NULL)
- goto out;
- MNT_ILOCK(mp);
- if (mp != vp->v_mount) {
+ if (mp == NULL) {
+ *(ap->a_mpp) = NULL;
+ return (0);
+ }
+ if (vfs_op_thread_enter(mp)) {
+ if (mp == vp->v_mount)
+ MNT_REF_UNLOCKED(mp);
+ else
+ mp = NULL;
+ vfs_op_thread_exit(mp);
+ } else {
+ MNT_ILOCK(mp);
+ if (mp == vp->v_mount)
+ MNT_REF(mp);
+ else
+ mp = NULL;
MNT_IUNLOCK(mp);
- mp = NULL;
- goto out;
}
- MNT_REF(mp);
- MNT_IUNLOCK(mp);
-out:
*(ap->a_mpp) = mp;
return (0);
}
Index: head/sys/kern/vfs_mount.c
===================================================================
--- head/sys/kern/vfs_mount.c
+++ head/sys/kern/vfs_mount.c
@@ -41,6 +41,7 @@
#include <sys/param.h>
#include <sys/conf.h>
+#include <sys/smp.h>
#include <sys/eventhandler.h>
#include <sys/fcntl.h>
#include <sys/jail.h>
@@ -123,6 +124,10 @@
mtx_init(&mp->mnt_mtx, "struct mount mtx", NULL, MTX_DEF);
mtx_init(&mp->mnt_listmtx, "struct mount vlist mtx", NULL, MTX_DEF);
lockinit(&mp->mnt_explock, PVFS, "explock", 0, 0);
+ mp->mnt_thread_in_ops_pcpu = uma_zalloc_pcpu(pcpu_zone_int,
+ M_WAITOK | M_ZERO);
+ mp->mnt_ref = 0;
+ mp->mnt_vfs_ops = 1;
return (0);
}
@@ -132,6 +137,7 @@
struct mount *mp;
mp = (struct mount *)mem;
+ uma_zfree_pcpu(pcpu_zone_int, mp->mnt_thread_in_ops_pcpu);
lockdestroy(&mp->mnt_explock);
mtx_destroy(&mp->mnt_listmtx);
mtx_destroy(&mp->mnt_mtx);
@@ -445,6 +451,12 @@
{
CTR2(KTR_VFS, "%s: mp %p", __func__, mp);
+ if (vfs_op_thread_enter(mp)) {
+ MNT_REF_UNLOCKED(mp);
+ vfs_op_thread_exit(mp);
+ return;
+ }
+
MNT_ILOCK(mp);
MNT_REF(mp);
MNT_IUNLOCK(mp);
@@ -455,6 +467,12 @@
{
CTR2(KTR_VFS, "%s: mp %p", __func__, mp);
+ if (vfs_op_thread_enter(mp)) {
+ MNT_REL_UNLOCKED(mp);
+ vfs_op_thread_exit(mp);
+ return;
+ }
+
MNT_ILOCK(mp);
MNT_REL(mp);
MNT_IUNLOCK(mp);
@@ -478,7 +496,12 @@
mp->mnt_activevnodelistsize = 0;
TAILQ_INIT(&mp->mnt_tmpfreevnodelist);
mp->mnt_tmpfreevnodelistsize = 0;
- mp->mnt_ref = 0;
+ if (mp->mnt_ref != 0 || mp->mnt_lockref != 0 ||
+ mp->mnt_writeopcount != 0)
+ panic("%s: non-zero counters on new mp %p\n", __func__, mp);
+ if (mp->mnt_vfs_ops != 1)
+ panic("%s: vfs_ops should be 1 but %d found\n", __func__,
+ mp->mnt_vfs_ops);
(void) vfs_busy(mp, MBF_NOWAIT);
atomic_add_acq_int(&vfsp->vfc_refcount, 1);
mp->mnt_op = vfsp->vfc_vfsops;
@@ -507,6 +530,9 @@
vfs_mount_destroy(struct mount *mp)
{
+ if (mp->mnt_vfs_ops == 0)
+ panic("%s: entered with zero vfs_ops\n", __func__);
+
MNT_ILOCK(mp);
mp->mnt_kern_flag |= MNTK_REFEXPIRE;
if (mp->mnt_kern_flag & MNTK_MWAIT) {
@@ -540,6 +566,11 @@
if (mp->mnt_lockref != 0)
panic("vfs_mount_destroy: nonzero lock refcount");
MNT_IUNLOCK(mp);
+
+ if (mp->mnt_vfs_ops != 1)
+ panic("%s: vfs_ops should be 1 but %d found\n", __func__,
+ mp->mnt_vfs_ops);
+
if (mp->mnt_vnodecovered != NULL)
vrele(mp->mnt_vnodecovered);
#ifdef MAC
@@ -951,6 +982,7 @@
vrele(newdp);
if ((mp->mnt_flag & MNT_RDONLY) == 0)
vfs_allocate_syncvnode(mp);
+ vfs_op_exit(mp);
vfs_unbusy(mp);
return (0);
}
@@ -1019,6 +1051,8 @@
VI_UNLOCK(vp);
VOP_UNLOCK(vp, 0);
+ vfs_op_enter(mp);
+
MNT_ILOCK(mp);
if ((mp->mnt_kern_flag & MNTK_UNMOUNT) != 0) {
MNT_IUNLOCK(mp);
@@ -1100,6 +1134,7 @@
else
vfs_deallocate_syncvnode(mp);
end:
+ vfs_op_exit(mp);
vfs_unbusy(mp);
VI_LOCK(vp);
vp->v_iflag &= ~VI_MOUNT;
@@ -1328,6 +1363,7 @@
mp->mnt_kern_flag &= ~MNTK_MWAIT;
wakeup(mp);
}
+ vfs_op_exit_locked(mp);
MNT_IUNLOCK(mp);
if (coveredvp != NULL) {
VOP_UNLOCK(coveredvp, 0);
@@ -1337,6 +1373,69 @@
}
/*
+ * There are various reference counters associated with the mount point.
+ * Normally it is permitted to modify them without taking the mnt ilock,
+ * but this behavior can be temporarily disabled if stable value is needed
+ * or callers are expected to block (e.g. to not allow new users during
+ * forced unmount).
+ */
+void
+vfs_op_enter(struct mount *mp)
+{
+
+ MNT_ILOCK(mp);
+ mp->mnt_vfs_ops++;
+ if (mp->mnt_vfs_ops > 1) {
+ MNT_IUNLOCK(mp);
+ return;
+ }
+ /*
+ * Paired with a fence in vfs_op_thread_enter(). See the comment
+ * above it for details.
+ */
+ atomic_thread_fence_seq_cst();
+ vfs_op_barrier_wait(mp);
+ MNT_IUNLOCK(mp);
+}
+
+void
+vfs_op_exit_locked(struct mount *mp)
+{
+
+ mtx_assert(MNT_MTX(mp), MA_OWNED);
+
+ if (mp->mnt_vfs_ops <= 0)
+ panic("%s: invalid vfs_ops count %d for mp %p\n",
+ __func__, mp->mnt_vfs_ops, mp);
+ mp->mnt_vfs_ops--;
+}
+
+void
+vfs_op_exit(struct mount *mp)
+{
+
+ MNT_ILOCK(mp);
+ vfs_op_exit_locked(mp);
+ MNT_IUNLOCK(mp);
+}
+
+/*
+ * It is assumed the caller already posted at least an acquire barrier.
+ */
+void
+vfs_op_barrier_wait(struct mount *mp)
+{
+ int *in_op;
+ int cpu;
+
+ CPU_FOREACH(cpu) {
+ in_op = zpcpu_get_cpu(mp->mnt_thread_in_ops_pcpu, cpu);
+ while (atomic_load_int(in_op))
+ cpu_spinwait();
+ }
+}
+
+/*
* Do the actual filesystem unmount.
*/
int
@@ -1379,6 +1478,8 @@
return (error);
}
+ vfs_op_enter(mp);
+
vn_start_write(NULL, &mp, V_WAIT | V_MNTREF);
MNT_ILOCK(mp);
if ((mp->mnt_kern_flag & MNTK_UNMOUNT) != 0 ||
@@ -1469,6 +1570,7 @@
mp->mnt_kern_flag &= ~MNTK_MWAIT;
wakeup(mp);
}
+ vfs_op_exit_locked(mp);
MNT_IUNLOCK(mp);
if (coveredvp)
VOP_UNLOCK(coveredvp, 0);
Index: head/sys/kern/vfs_mountroot.c
===================================================================
--- head/sys/kern/vfs_mountroot.c
+++ head/sys/kern/vfs_mountroot.c
@@ -273,6 +273,7 @@
*mpp = mp;
rootdevmp = mp;
+ vfs_op_exit(mp);
}
set_rootvnode();
Index: head/sys/kern/vfs_subr.c
===================================================================
--- head/sys/kern/vfs_subr.c
+++ head/sys/kern/vfs_subr.c
@@ -4032,6 +4032,7 @@
mp->mnt_secondary_accwrites);
db_printf(" mnt_gjprovider = %s\n",
mp->mnt_gjprovider != NULL ? mp->mnt_gjprovider : "NULL");
+ db_printf(" mnt_vfs_ops = %d\n", mp->mnt_vfs_ops);
db_printf("\n\nList of active vnodes\n");
TAILQ_FOREACH(vp, &mp->mnt_activevnodelist, v_actfreelist) {
Index: head/sys/sys/mount.h
===================================================================
--- head/sys/sys/mount.h
+++ head/sys/sys/mount.h
@@ -226,6 +226,8 @@
struct lock mnt_explock; /* vfs_export walkers lock */
TAILQ_ENTRY(mount) mnt_upper_link; /* (m) we in the all uppers */
TAILQ_HEAD(, mount) mnt_uppers; /* (m) upper mounts over us*/
+ int mnt_vfs_ops; /* (i) pending vfs ops */
+ int *mnt_thread_in_ops_pcpu;
};
/*
@@ -265,15 +267,26 @@
#define MNT_ITRYLOCK(mp) mtx_trylock(&(mp)->mnt_mtx)
#define MNT_IUNLOCK(mp) mtx_unlock(&(mp)->mnt_mtx)
#define MNT_MTX(mp) (&(mp)->mnt_mtx)
+
+#define MNT_REF_UNLOCKED(mp) do { \
+ atomic_add_int(&(mp)->mnt_ref, 1); \
+} while (0)
+#define MNT_REL_UNLOCKED(mp) do { \
+ int _c; \
+ _c = atomic_fetchadd_int(&(mp)->mnt_ref, -1) - 1; \
+ KASSERT(_c >= 0, ("negative mnt_ref %d", _c)); \
+} while (0)
+
#define MNT_REF(mp) do { \
mtx_assert(MNT_MTX(mp), MA_OWNED); \
- (mp)->mnt_ref++; \
+ atomic_add_int(&(mp)->mnt_ref, 1); \
} while (0)
#define MNT_REL(mp) do { \
+ int _c; \
mtx_assert(MNT_MTX(mp), MA_OWNED); \
- KASSERT((mp)->mnt_ref > 0, ("negative mnt_ref")); \
- (mp)->mnt_ref--; \
- if ((mp)->mnt_ref == 0) \
+ _c = atomic_fetchadd_int(&(mp)->mnt_ref, -1) - 1; \
+ KASSERT(_c >= 0, ("negative mnt_ref %d", _c)); \
+ if (_c == 0) \
wakeup((mp)); \
} while (0)
@@ -940,6 +953,48 @@
void syncer_suspend(void);
void syncer_resume(void);
+
+void vfs_op_barrier_wait(struct mount *);
+void vfs_op_enter(struct mount *);
+void vfs_op_exit_locked(struct mount *);
+void vfs_op_exit(struct mount *);
+
+/*
+ * We mark ourselves as entering the section and post a sequentially consistent
+ * fence, meaning the store is completed before we get into the section and
+ * mnt_vfs_ops is only read afterwards.
+ *
+ * Any thread transitioning the ops counter 0->1 does things in the opposite
+ * order - first bumps the count, posts a sequentially consistent fence and
+ * observes all CPUs not executing within the section.
+ *
+ * This provides an invariant that by the time the last CPU is observed not
+ * executing, everyone else entering will see the counter > 0 and exit.
+ *
+ * Note there is no barrier between vfs_ops and the rest of the code in the
+ * section. It is not necessary as the writer has to wait for everyone to drain
+ * before making any changes or only make changes safe while the section is
+ * executed.
+ */
+
+#define vfs_op_thread_enter(mp) ({ \
+ struct mount *_mp = (mp); \
+ bool _retval = true; \
+ critical_enter(); \
+ *(int *)zpcpu_get(_mp->mnt_thread_in_ops_pcpu) = 1; \
+ atomic_thread_fence_seq_cst(); \
+ if (__predict_false(_mp->mnt_vfs_ops > 0)) { \
+ vfs_op_thread_exit(_mp); \
+ _retval = false; \
+ } \
+ _retval; \
+})
+
+#define vfs_op_thread_exit(mp) do { \
+ atomic_thread_fence_rel(); \
+ *(int *)zpcpu_get(mp->mnt_thread_in_ops_pcpu) = 0; \
+ critical_exit(); \
+} while (0)
#else /* !_KERNEL */
Index: head/sys/vm/uma.h
===================================================================
--- head/sys/vm/uma.h
+++ head/sys/vm/uma.h
@@ -650,6 +650,7 @@
/*
* Common UMA_ZONE_PCPU zones.
*/
+extern uma_zone_t pcpu_zone_int;
extern uma_zone_t pcpu_zone_64;
/*
File Metadata
Details
Attached
Mime Type
text/plain
Expires
Fri, May 1, 7:16 PM (10 h, 34 m)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
32595003
Default Alt Text
D21425.id.diff (10 KB)
Attached To
Mode
D21425: vfs: manage mnt_ref with atomics
Attached
Detach File
Event Timeline
Log In to Comment