diff --git a/sys/fs/nullfs/null_subr.c b/sys/fs/nullfs/null_subr.c --- a/sys/fs/nullfs/null_subr.c +++ b/sys/fs/nullfs/null_subr.c @@ -65,7 +65,7 @@ static MALLOC_DEFINE(M_NULLFSHASH, "nullfs_hash", "NULLFS hash table"); MALLOC_DEFINE(M_NULLFSNODE, "nullfs_node", "NULLFS vnode private part"); -static struct vnode * null_hashins(struct mount *, struct null_node *); +static void null_hashins(struct mount *, struct null_node *); /* * Initialise cache headers @@ -93,14 +93,15 @@ * Return a VREF'ed alias for lower vnode if already exists, else 0. * Lower vnode should be locked on entry and will be left locked on exit. */ -struct vnode * -null_hashget(struct mount *mp, struct vnode *lowervp) +static struct vnode * +null_hashget_locked(struct mount *mp, struct vnode *lowervp) { struct null_node_hashhead *hd; struct null_node *a; struct vnode *vp; ASSERT_VOP_LOCKED(lowervp, "null_hashget"); + rw_assert(&null_hash_lock, RA_LOCKED); /* * Find hash base, and then search the (two-way) linked @@ -109,9 +110,6 @@ * reference count (but NOT the lower vnode's VREF counter). */ hd = NULL_NHASH(lowervp); - if (LIST_EMPTY(hd)) - return (NULLVP); - rw_rlock(&null_hash_lock); LIST_FOREACH(a, hd, null_hash) { if (a->null_lowervp == lowervp && NULLTOV(a)->v_mount == mp) { /* @@ -122,43 +120,50 @@ */ vp = NULLTOV(a); vref(vp); - rw_runlock(&null_hash_lock); return (vp); } } - rw_runlock(&null_hash_lock); return (NULLVP); } -/* - * Act like null_hashget, but add passed null_node to hash if no existing - * node found. - */ -static struct vnode * +struct vnode * +null_hashget(struct mount *mp, struct vnode *lowervp) +{ + struct null_node_hashhead *hd; + struct vnode *vp; + + hd = NULL_NHASH(lowervp); + if (LIST_EMPTY(hd)) + return (NULLVP); + + rw_rlock(&null_hash_lock); + vp = null_hashget_locked(mp, lowervp); + rw_runlock(&null_hash_lock); + + return (vp); +} + +static void null_hashins(struct mount *mp, struct null_node *xp) { struct null_node_hashhead *hd; +#ifdef INVARIANTS struct null_node *oxp; - struct vnode *ovp; +#endif + + rw_assert(&null_hash_lock, RA_WLOCKED); hd = NULL_NHASH(xp->null_lowervp); - rw_wlock(&null_hash_lock); +#ifdef INVARIANTS LIST_FOREACH(oxp, hd, null_hash) { if (oxp->null_lowervp == xp->null_lowervp && NULLTOV(oxp)->v_mount == mp) { - /* - * See null_hashget for a description of this - * operation. - */ - ovp = NULLTOV(oxp); - vref(ovp); - rw_wunlock(&null_hash_lock); - return (ovp); + VNASSERT(0, NULLTOV(oxp), + ("vnode already in hash")); } } +#endif LIST_INSERT_HEAD(hd, xp, null_hash); - rw_wunlock(&null_hash_lock); - return (NULLVP); } static void @@ -201,19 +206,6 @@ return (0); } - /* - * The insmntque1() call below requires the exclusive lock on - * the nullfs vnode. Upgrade the lock now if hash failed to - * provide ready to use vnode. - */ - if (VOP_ISLOCKED(lowervp) != LK_EXCLUSIVE) { - vn_lock(lowervp, LK_UPGRADE | LK_RETRY); - if (VN_IS_DOOMED(lowervp)) { - vput(lowervp); - return (ENOENT); - } - } - /* * We do not serialize vnode creation, instead we will check for * duplicates later, when adding new vnode to hash. @@ -229,14 +221,24 @@ return (error); } + rw_wlock(&null_hash_lock); xp->null_vnode = vp; xp->null_lowervp = lowervp; xp->null_flags = 0; vp->v_type = lowervp->v_type; vp->v_data = xp; vp->v_vnlock = lowervp->v_vnlock; + *vpp = null_hashget_locked(mp, lowervp); + if (*vpp != NULL) { + rw_wunlock(&null_hash_lock); + vrele(lowervp); + null_destroy_proto(vp, xp); + return (0); + } + error = insmntque1(vp, mp); if (error != 0) { + rw_wunlock(&null_hash_lock); vput(lowervp); null_destroy_proto(vp, xp); return (error); @@ -262,17 +264,8 @@ } } - /* - * Atomically insert our new node into the hash or vget existing - * if someone else has beaten us to it. - */ - *vpp = null_hashins(mp, xp); - if (*vpp != NULL) { - vrele(lowervp); - vp->v_object = NULL; /* in case VIRF_PGREAD set it */ - null_destroy_proto(vp, xp); - return (0); - } + null_hashins(mp, xp); + rw_wunlock(&null_hash_lock); *vpp = vp; return (0); diff --git a/sys/fs/nullfs/null_vfsops.c b/sys/fs/nullfs/null_vfsops.c --- a/sys/fs/nullfs/null_vfsops.c +++ b/sys/fs/nullfs/null_vfsops.c @@ -207,7 +207,7 @@ (MNTK_SHARED_WRITES | MNTK_LOOKUP_SHARED | MNTK_EXTENDED_SHARED); } - mp->mnt_kern_flag |= MNTK_LOOKUP_EXCL_DOTDOT | MNTK_NOMSYNC; + mp->mnt_kern_flag |= MNTK_NOMSYNC | MNTK_UNLOCKED_INSMNTQUE; mp->mnt_kern_flag |= lowerrootvp->v_mount->mnt_kern_flag & (MNTK_USES_BCACHE | MNTK_NO_IOPF | MNTK_UNMAPPED_BUFS); MNT_IUNLOCK(mp); diff --git a/sys/kern/vfs_lookup.c b/sys/kern/vfs_lookup.c --- a/sys/kern/vfs_lookup.c +++ b/sys/kern/vfs_lookup.c @@ -756,9 +756,7 @@ { if (mp == NULL || ((lkflags & LK_SHARED) && - (!(mp->mnt_kern_flag & MNTK_LOOKUP_SHARED) || - ((cnflags & ISDOTDOT) && - (mp->mnt_kern_flag & MNTK_LOOKUP_EXCL_DOTDOT))))) { + !(mp->mnt_kern_flag & MNTK_LOOKUP_SHARED))) { lkflags &= ~LK_SHARED; lkflags |= LK_EXCLUSIVE; } diff --git a/sys/kern/vfs_subr.c b/sys/kern/vfs_subr.c --- a/sys/kern/vfs_subr.c +++ b/sys/kern/vfs_subr.c @@ -1942,7 +1942,8 @@ KASSERT(vp->v_mount == NULL, ("insmntque: vnode already on per mount vnode list")); VNASSERT(mp != NULL, vp, ("Don't call insmntque(foo, NULL)")); - ASSERT_VOP_ELOCKED(vp, "insmntque: non-locked vp"); + if ((mp->mnt_kern_flag & MNTK_UNLOCKED_INSMNTQUE) == 0) + ASSERT_VOP_ELOCKED(vp, "insmntque: non-locked vp"); /* * We acquire the vnode interlock early to ensure that the @@ -4343,7 +4344,7 @@ MNT_KERN_FLAG(MNTK_NO_IOPF); MNT_KERN_FLAG(MNTK_RECURSE); MNT_KERN_FLAG(MNTK_UPPER_WAITER); - MNT_KERN_FLAG(MNTK_LOOKUP_EXCL_DOTDOT); + MNT_KERN_FLAG(MNTK_UNLOCKED_INSMNTQUE); MNT_KERN_FLAG(MNTK_USES_BCACHE); MNT_KERN_FLAG(MNTK_FPLOOKUP); MNT_KERN_FLAG(MNTK_TASKQUEUE_WAITER); diff --git a/sys/sys/mount.h b/sys/sys/mount.h --- a/sys/sys/mount.h +++ b/sys/sys/mount.h @@ -486,8 +486,8 @@ handle i/o state on EFAULT. */ #define MNTK_RECURSE 0x00000200 /* pending recursive unmount */ #define MNTK_UPPER_WAITER 0x00000400 /* waiting to drain MNTK_UPPER_PENDING */ -#define MNTK_LOOKUP_EXCL_DOTDOT 0x00000800 -/* UNUSED 0x00001000 */ +/* UNUSED 0x00000800 */ +#define MNTK_UNLOCKED_INSMNTQUE 0x00001000 /* fs does not lock the vnode for insmntque */ #define MNTK_UNMAPPED_BUFS 0x00002000 #define MNTK_USES_BCACHE 0x00004000 /* FS uses the buffer cache. */ /* UNUSED 0x00008000 */