Page MenuHomeFreeBSD

D21425.id.diff
No OneTemporary

D21425.id.diff

Index: head/sys/cddl/compat/opensolaris/kern/opensolaris_vfs.c
===================================================================
--- head/sys/cddl/compat/opensolaris/kern/opensolaris_vfs.c
+++ head/sys/cddl/compat/opensolaris/kern/opensolaris_vfs.c
@@ -242,6 +242,7 @@
if (VFS_ROOT(mp, LK_EXCLUSIVE, &mvp))
panic("mount: lost mount");
VOP_UNLOCK(vp, 0);
+ vfs_op_exit(mp);
vfs_unbusy(mp);
*vpp = mvp;
return (0);
Index: head/sys/kern/subr_pcpu.c
===================================================================
--- head/sys/kern/subr_pcpu.c
+++ head/sys/kern/subr_pcpu.c
@@ -131,15 +131,19 @@
/*
* UMA_PCPU_ZONE zones, that are available for all kernel
- * consumers. Right now 64 bit zone is used for counter(9).
+ * consumers. Right now 64 bit zone is used for counter(9)
+ * and int zone is used for mount point counters.
*/
+uma_zone_t pcpu_zone_int;
uma_zone_t pcpu_zone_64;
static void
pcpu_zones_startup(void)
{
+ pcpu_zone_int = uma_zcreate("int pcpu", sizeof(int),
+ NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, UMA_ZONE_PCPU);
pcpu_zone_64 = uma_zcreate("64 pcpu", sizeof(uint64_t),
NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, UMA_ZONE_PCPU);
}
Index: head/sys/kern/vfs_default.c
===================================================================
--- head/sys/kern/vfs_default.c
+++ head/sys/kern/vfs_default.c
@@ -601,17 +601,24 @@
*/
vp = ap->a_vp;
mp = vp->v_mount;
- if (mp == NULL)
- goto out;
- MNT_ILOCK(mp);
- if (mp != vp->v_mount) {
+ if (mp == NULL) {
+ *(ap->a_mpp) = NULL;
+ return (0);
+ }
+ if (vfs_op_thread_enter(mp)) {
+ if (mp == vp->v_mount)
+ MNT_REF_UNLOCKED(mp);
+ else
+ mp = NULL;
+ vfs_op_thread_exit(mp);
+ } else {
+ MNT_ILOCK(mp);
+ if (mp == vp->v_mount)
+ MNT_REF(mp);
+ else
+ mp = NULL;
MNT_IUNLOCK(mp);
- mp = NULL;
- goto out;
}
- MNT_REF(mp);
- MNT_IUNLOCK(mp);
-out:
*(ap->a_mpp) = mp;
return (0);
}
Index: head/sys/kern/vfs_mount.c
===================================================================
--- head/sys/kern/vfs_mount.c
+++ head/sys/kern/vfs_mount.c
@@ -41,6 +41,7 @@
#include <sys/param.h>
#include <sys/conf.h>
+#include <sys/smp.h>
#include <sys/eventhandler.h>
#include <sys/fcntl.h>
#include <sys/jail.h>
@@ -123,6 +124,10 @@
mtx_init(&mp->mnt_mtx, "struct mount mtx", NULL, MTX_DEF);
mtx_init(&mp->mnt_listmtx, "struct mount vlist mtx", NULL, MTX_DEF);
lockinit(&mp->mnt_explock, PVFS, "explock", 0, 0);
+ mp->mnt_thread_in_ops_pcpu = uma_zalloc_pcpu(pcpu_zone_int,
+ M_WAITOK | M_ZERO);
+ mp->mnt_ref = 0;
+ mp->mnt_vfs_ops = 1;
return (0);
}
@@ -132,6 +137,7 @@
struct mount *mp;
mp = (struct mount *)mem;
+ uma_zfree_pcpu(pcpu_zone_int, mp->mnt_thread_in_ops_pcpu);
lockdestroy(&mp->mnt_explock);
mtx_destroy(&mp->mnt_listmtx);
mtx_destroy(&mp->mnt_mtx);
@@ -445,6 +451,12 @@
{
CTR2(KTR_VFS, "%s: mp %p", __func__, mp);
+ if (vfs_op_thread_enter(mp)) {
+ MNT_REF_UNLOCKED(mp);
+ vfs_op_thread_exit(mp);
+ return;
+ }
+
MNT_ILOCK(mp);
MNT_REF(mp);
MNT_IUNLOCK(mp);
@@ -455,6 +467,12 @@
{
CTR2(KTR_VFS, "%s: mp %p", __func__, mp);
+ if (vfs_op_thread_enter(mp)) {
+ MNT_REL_UNLOCKED(mp);
+ vfs_op_thread_exit(mp);
+ return;
+ }
+
MNT_ILOCK(mp);
MNT_REL(mp);
MNT_IUNLOCK(mp);
@@ -478,7 +496,12 @@
mp->mnt_activevnodelistsize = 0;
TAILQ_INIT(&mp->mnt_tmpfreevnodelist);
mp->mnt_tmpfreevnodelistsize = 0;
- mp->mnt_ref = 0;
+ if (mp->mnt_ref != 0 || mp->mnt_lockref != 0 ||
+ mp->mnt_writeopcount != 0)
+ panic("%s: non-zero counters on new mp %p\n", __func__, mp);
+ if (mp->mnt_vfs_ops != 1)
+ panic("%s: vfs_ops should be 1 but %d found\n", __func__,
+ mp->mnt_vfs_ops);
(void) vfs_busy(mp, MBF_NOWAIT);
atomic_add_acq_int(&vfsp->vfc_refcount, 1);
mp->mnt_op = vfsp->vfc_vfsops;
@@ -507,6 +530,9 @@
vfs_mount_destroy(struct mount *mp)
{
+ if (mp->mnt_vfs_ops == 0)
+ panic("%s: entered with zero vfs_ops\n", __func__);
+
MNT_ILOCK(mp);
mp->mnt_kern_flag |= MNTK_REFEXPIRE;
if (mp->mnt_kern_flag & MNTK_MWAIT) {
@@ -540,6 +566,11 @@
if (mp->mnt_lockref != 0)
panic("vfs_mount_destroy: nonzero lock refcount");
MNT_IUNLOCK(mp);
+
+ if (mp->mnt_vfs_ops != 1)
+ panic("%s: vfs_ops should be 1 but %d found\n", __func__,
+ mp->mnt_vfs_ops);
+
if (mp->mnt_vnodecovered != NULL)
vrele(mp->mnt_vnodecovered);
#ifdef MAC
@@ -951,6 +982,7 @@
vrele(newdp);
if ((mp->mnt_flag & MNT_RDONLY) == 0)
vfs_allocate_syncvnode(mp);
+ vfs_op_exit(mp);
vfs_unbusy(mp);
return (0);
}
@@ -1019,6 +1051,8 @@
VI_UNLOCK(vp);
VOP_UNLOCK(vp, 0);
+ vfs_op_enter(mp);
+
MNT_ILOCK(mp);
if ((mp->mnt_kern_flag & MNTK_UNMOUNT) != 0) {
MNT_IUNLOCK(mp);
@@ -1100,6 +1134,7 @@
else
vfs_deallocate_syncvnode(mp);
end:
+ vfs_op_exit(mp);
vfs_unbusy(mp);
VI_LOCK(vp);
vp->v_iflag &= ~VI_MOUNT;
@@ -1328,6 +1363,7 @@
mp->mnt_kern_flag &= ~MNTK_MWAIT;
wakeup(mp);
}
+ vfs_op_exit_locked(mp);
MNT_IUNLOCK(mp);
if (coveredvp != NULL) {
VOP_UNLOCK(coveredvp, 0);
@@ -1337,6 +1373,69 @@
}
/*
+ * There are various reference counters associated with the mount point.
+ * Normally it is permitted to modify them without taking the mnt ilock,
+ * but this behavior can be temporarily disabled if stable value is needed
+ * or callers are expected to block (e.g. to not allow new users during
+ * forced unmount).
+ */
+void
+vfs_op_enter(struct mount *mp)
+{
+
+ MNT_ILOCK(mp);
+ mp->mnt_vfs_ops++;
+ if (mp->mnt_vfs_ops > 1) {
+ MNT_IUNLOCK(mp);
+ return;
+ }
+ /*
+ * Paired with a fence in vfs_op_thread_enter(). See the comment
+ * above it for details.
+ */
+ atomic_thread_fence_seq_cst();
+ vfs_op_barrier_wait(mp);
+ MNT_IUNLOCK(mp);
+}
+
+void
+vfs_op_exit_locked(struct mount *mp)
+{
+
+ mtx_assert(MNT_MTX(mp), MA_OWNED);
+
+ if (mp->mnt_vfs_ops <= 0)
+ panic("%s: invalid vfs_ops count %d for mp %p\n",
+ __func__, mp->mnt_vfs_ops, mp);
+ mp->mnt_vfs_ops--;
+}
+
+void
+vfs_op_exit(struct mount *mp)
+{
+
+ MNT_ILOCK(mp);
+ vfs_op_exit_locked(mp);
+ MNT_IUNLOCK(mp);
+}
+
+/*
+ * It is assumed the caller already posted at least an acquire barrier.
+ */
+void
+vfs_op_barrier_wait(struct mount *mp)
+{
+ int *in_op;
+ int cpu;
+
+ CPU_FOREACH(cpu) {
+ in_op = zpcpu_get_cpu(mp->mnt_thread_in_ops_pcpu, cpu);
+ while (atomic_load_int(in_op))
+ cpu_spinwait();
+ }
+}
+
+/*
* Do the actual filesystem unmount.
*/
int
@@ -1379,6 +1478,8 @@
return (error);
}
+ vfs_op_enter(mp);
+
vn_start_write(NULL, &mp, V_WAIT | V_MNTREF);
MNT_ILOCK(mp);
if ((mp->mnt_kern_flag & MNTK_UNMOUNT) != 0 ||
@@ -1469,6 +1570,7 @@
mp->mnt_kern_flag &= ~MNTK_MWAIT;
wakeup(mp);
}
+ vfs_op_exit_locked(mp);
MNT_IUNLOCK(mp);
if (coveredvp)
VOP_UNLOCK(coveredvp, 0);
Index: head/sys/kern/vfs_mountroot.c
===================================================================
--- head/sys/kern/vfs_mountroot.c
+++ head/sys/kern/vfs_mountroot.c
@@ -273,6 +273,7 @@
*mpp = mp;
rootdevmp = mp;
+ vfs_op_exit(mp);
}
set_rootvnode();
Index: head/sys/kern/vfs_subr.c
===================================================================
--- head/sys/kern/vfs_subr.c
+++ head/sys/kern/vfs_subr.c
@@ -4032,6 +4032,7 @@
mp->mnt_secondary_accwrites);
db_printf(" mnt_gjprovider = %s\n",
mp->mnt_gjprovider != NULL ? mp->mnt_gjprovider : "NULL");
+ db_printf(" mnt_vfs_ops = %d\n", mp->mnt_vfs_ops);
db_printf("\n\nList of active vnodes\n");
TAILQ_FOREACH(vp, &mp->mnt_activevnodelist, v_actfreelist) {
Index: head/sys/sys/mount.h
===================================================================
--- head/sys/sys/mount.h
+++ head/sys/sys/mount.h
@@ -226,6 +226,8 @@
struct lock mnt_explock; /* vfs_export walkers lock */
TAILQ_ENTRY(mount) mnt_upper_link; /* (m) we in the all uppers */
TAILQ_HEAD(, mount) mnt_uppers; /* (m) upper mounts over us*/
+ int mnt_vfs_ops; /* (i) pending vfs ops */
+ int *mnt_thread_in_ops_pcpu;
};
/*
@@ -265,15 +267,26 @@
#define MNT_ITRYLOCK(mp) mtx_trylock(&(mp)->mnt_mtx)
#define MNT_IUNLOCK(mp) mtx_unlock(&(mp)->mnt_mtx)
#define MNT_MTX(mp) (&(mp)->mnt_mtx)
+
+#define MNT_REF_UNLOCKED(mp) do { \
+ atomic_add_int(&(mp)->mnt_ref, 1); \
+} while (0)
+#define MNT_REL_UNLOCKED(mp) do { \
+ int _c; \
+ _c = atomic_fetchadd_int(&(mp)->mnt_ref, -1) - 1; \
+ KASSERT(_c >= 0, ("negative mnt_ref %d", _c)); \
+} while (0)
+
#define MNT_REF(mp) do { \
mtx_assert(MNT_MTX(mp), MA_OWNED); \
- (mp)->mnt_ref++; \
+ atomic_add_int(&(mp)->mnt_ref, 1); \
} while (0)
#define MNT_REL(mp) do { \
+ int _c; \
mtx_assert(MNT_MTX(mp), MA_OWNED); \
- KASSERT((mp)->mnt_ref > 0, ("negative mnt_ref")); \
- (mp)->mnt_ref--; \
- if ((mp)->mnt_ref == 0) \
+ _c = atomic_fetchadd_int(&(mp)->mnt_ref, -1) - 1; \
+ KASSERT(_c >= 0, ("negative mnt_ref %d", _c)); \
+ if (_c == 0) \
wakeup((mp)); \
} while (0)
@@ -940,6 +953,48 @@
void syncer_suspend(void);
void syncer_resume(void);
+
+void vfs_op_barrier_wait(struct mount *);
+void vfs_op_enter(struct mount *);
+void vfs_op_exit_locked(struct mount *);
+void vfs_op_exit(struct mount *);
+
+/*
+ * We mark ourselves as entering the section and post a sequentially consistent
+ * fence, meaning the store is completed before we get into the section and
+ * mnt_vfs_ops is only read afterwards.
+ *
+ * Any thread transitioning the ops counter 0->1 does things in the opposite
+ * order - first bumps the count, posts a sequentially consistent fence and
+ * observes all CPUs not executing within the section.
+ *
+ * This provides an invariant that by the time the last CPU is observed not
+ * executing, everyone else entering will see the counter > 0 and exit.
+ *
+ * Note there is no barrier between vfs_ops and the rest of the code in the
+ * section. It is not necessary as the writer has to wait for everyone to drain
+ * before making any changes or only make changes safe while the section is
+ * executed.
+ */
+
+#define vfs_op_thread_enter(mp) ({ \
+ struct mount *_mp = (mp); \
+ bool _retval = true; \
+ critical_enter(); \
+ *(int *)zpcpu_get(_mp->mnt_thread_in_ops_pcpu) = 1; \
+ atomic_thread_fence_seq_cst(); \
+ if (__predict_false(_mp->mnt_vfs_ops > 0)) { \
+ vfs_op_thread_exit(_mp); \
+ _retval = false; \
+ } \
+ _retval; \
+})
+
+#define vfs_op_thread_exit(mp) do { \
+ atomic_thread_fence_rel(); \
+ *(int *)zpcpu_get(mp->mnt_thread_in_ops_pcpu) = 0; \
+ critical_exit(); \
+} while (0)
#else /* !_KERNEL */
Index: head/sys/vm/uma.h
===================================================================
--- head/sys/vm/uma.h
+++ head/sys/vm/uma.h
@@ -650,6 +650,7 @@
/*
* Common UMA_ZONE_PCPU zones.
*/
+extern uma_zone_t pcpu_zone_int;
extern uma_zone_t pcpu_zone_64;
/*

File Metadata

Mime Type
text/plain
Expires
Fri, May 1, 7:16 PM (10 h, 34 m)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
32595003
Default Alt Text
D21425.id.diff (10 KB)

Event Timeline