Index: head/sys/cddl/compat/opensolaris/kern/opensolaris_vfs.c =================================================================== --- head/sys/cddl/compat/opensolaris/kern/opensolaris_vfs.c +++ head/sys/cddl/compat/opensolaris/kern/opensolaris_vfs.c @@ -154,6 +154,7 @@ vput(vp); return (error); } + vn_seqc_write_begin(vp); VOP_UNLOCK(vp); /* @@ -206,6 +207,7 @@ VI_LOCK(vp); vp->v_iflag &= ~VI_MOUNT; VI_UNLOCK(vp); + vn_seqc_write_end(vp); vput(vp); vfs_unbusy(mp); vfs_freeopts(mp->mnt_optnew); @@ -241,6 +243,7 @@ vfs_event_signal(NULL, VQ_MOUNT, 0); if (VFS_ROOT(mp, LK_EXCLUSIVE, &mvp)) panic("mount: lost mount"); + vn_seqc_write_end(vp); VOP_UNLOCK(vp); vfs_op_exit(mp); vfs_unbusy(mp); Index: head/sys/kern/vfs_mount.c =================================================================== --- head/sys/kern/vfs_mount.c +++ head/sys/kern/vfs_mount.c @@ -947,6 +947,7 @@ vput(vp); return (error); } + vn_seqc_write_begin(vp); VOP_UNLOCK(vp); /* Allocate and initialize the filesystem. */ @@ -979,9 +980,11 @@ VI_LOCK(vp); vp->v_iflag &= ~VI_MOUNT; VI_UNLOCK(vp); + vn_seqc_write_end(vp); vrele(vp); return (error); } + vn_seqc_write_begin(newdp); VOP_UNLOCK(newdp); if (mp->mnt_opt != NULL) @@ -1018,6 +1021,8 @@ EVENTHANDLER_DIRECT_INVOKE(vfs_mounted, mp, newdp, td); VOP_UNLOCK(newdp); mountcheckdirs(vp, newdp); + vn_seqc_write_end(vp); + vn_seqc_write_end(newdp); vrele(newdp); if ((mp->mnt_flag & MNT_RDONLY) == 0) vfs_allocate_syncvnode(mp); @@ -1094,7 +1099,9 @@ VOP_UNLOCK(vp); vfs_op_enter(mp); + vn_seqc_write_begin(vp); + rootvp = NULL; MNT_ILOCK(mp); if ((mp->mnt_kern_flag & MNTK_UNMOUNT) != 0) { MNT_IUNLOCK(mp); @@ -1108,8 +1115,6 @@ mp->mnt_kern_flag &= ~MNTK_ASYNC; rootvp = vfs_cache_root_clear(mp); MNT_IUNLOCK(mp); - if (rootvp != NULL) - vrele(rootvp); mp->mnt_optnew = *optlist; vfs_mergeopts(mp->mnt_optnew, mp->mnt_opt); @@ -1233,6 +1238,11 @@ vfs_deallocate_syncvnode(mp); end: vfs_op_exit(mp); + if (rootvp != NULL) { + vn_seqc_write_end(rootvp); + vrele(rootvp); + } + vn_seqc_write_end(vp); vfs_unbusy(mp); VI_LOCK(vp); vp->v_iflag &= ~VI_MOUNT; @@ -1723,14 +1733,19 @@ } mp->mnt_kern_flag |= MNTK_UNMOUNT; rootvp = vfs_cache_root_clear(mp); + if (coveredvp != NULL) + vn_seqc_write_begin(coveredvp); if (flags & MNT_NONBUSY) { MNT_IUNLOCK(mp); error = vfs_check_usecounts(mp); MNT_ILOCK(mp); if (error != 0) { + vn_seqc_write_end(coveredvp); dounmount_cleanup(mp, coveredvp, MNTK_UNMOUNT); - if (rootvp != NULL) + if (rootvp != NULL) { + vn_seqc_write_end(rootvp); vrele(rootvp); + } return (error); } } @@ -1759,22 +1774,19 @@ ("%s: invalid return value for msleep in the drain path @ %s:%d", __func__, __FILE__, __LINE__)); - if (rootvp != NULL) + /* + * We want to keep the vnode around so that we can vn_seqc_write_end + * after we are done with unmount. Downgrade our reference to a mere + * hold count so that we don't interefere with anything. + */ + if (rootvp != NULL) { + vhold(rootvp); vrele(rootvp); + } if (mp->mnt_flag & MNT_EXPUBLIC) vfs_setpublicfs(NULL, NULL, NULL); - /* - * From now, we can claim that the use reference on the - * coveredvp is ours, and the ref can be released only by - * successfull unmount by us, or left for later unmount - * attempt. The previously acquired hold reference is no - * longer needed to protect the vnode from reuse. - */ - if (coveredvp != NULL) - vdrop(coveredvp); - vfs_periodic(mp, MNT_WAIT); MNT_ILOCK(mp); async_flag = mp->mnt_flag & MNT_ASYNC; @@ -1809,8 +1821,15 @@ } vfs_op_exit_locked(mp); MNT_IUNLOCK(mp); - if (coveredvp) + if (coveredvp) { + vn_seqc_write_end(coveredvp); VOP_UNLOCK(coveredvp); + vdrop(coveredvp); + } + if (rootvp != NULL) { + vn_seqc_write_end(rootvp); + vdrop(rootvp); + } return (error); } mtx_lock(&mountlist_mtx); @@ -1819,7 +1838,13 @@ EVENTHANDLER_DIRECT_INVOKE(vfs_unmounted, mp, td); if (coveredvp != NULL) { coveredvp->v_mountedhere = NULL; + vn_seqc_write_end(coveredvp); VOP_UNLOCK(coveredvp); + vdrop(coveredvp); + } + if (rootvp != NULL) { + vn_seqc_write_end(rootvp); + vdrop(rootvp); } vfs_event_signal(NULL, VQ_UNMOUNT, 0); if (rootvnode != NULL && mp == rootvnode->v_mount) { Index: head/sys/kern/vfs_subr.c =================================================================== --- head/sys/kern/vfs_subr.c +++ head/sys/kern/vfs_subr.c @@ -1761,6 +1761,12 @@ * so as not to contaminate the freshly allocated vnode. */ CTR2(KTR_VFS, "%s: destroying the vnode %p", __func__, vp); + /* + * Paired with vgone. + */ + vn_seqc_write_end_locked(vp); + VNPASS(vp->v_seqc_users == 0, vp); + bo = &vp->v_bufobj; VNASSERT(vp->v_data == NULL, vp, ("cleaned vnode isn't")); VNPASS(vp->v_holdcnt == VHOLD_NO_SMR, vp); @@ -4018,6 +4024,10 @@ */ if (vp->v_irflag & VIRF_DOOMED) return; + /* + * Paired with freevnode. + */ + vn_seqc_write_begin_locked(vp); vunlazy_gone(vp); vp->v_irflag |= VIRF_DOOMED; @@ -4160,8 +4170,9 @@ printf("%p: ", (void *)vp); printf("type %s\n", typename[vp->v_type]); holdcnt = atomic_load_int(&vp->v_holdcnt); - printf(" usecount %d, writecount %d, refcount %d", - vp->v_usecount, vp->v_writecount, holdcnt & ~VHOLD_ALL_FLAGS); + printf(" usecount %d, writecount %d, refcount %d seqc users %d", + vp->v_usecount, vp->v_writecount, holdcnt & ~VHOLD_ALL_FLAGS, + vp->v_seqc_users); switch (vp->v_type) { case VDIR: printf(" mountedhere %p\n", vp->v_mountedhere); @@ -5508,6 +5519,14 @@ ASSERT_VOP_LOCKED(a->a_tvp, "vop_rename: tvp not locked"); ASSERT_VOP_LOCKED(a->a_tdvp, "vop_rename: tdvp not locked"); #endif + /* + * It may be tempting to add vn_seqc_write_begin/end calls here and + * in vop_rename_post but that's not going to work out since some + * filesystems relookup vnodes mid-rename. This is probably a bug. + * + * For now filesystems are expected to do the relevant calls after they + * decide what vnodes to operate on. + */ if (a->a_tdvp != a->a_fdvp) vhold(a->a_fdvp); if (a->a_tvp != a->a_fvp) @@ -5589,69 +5608,193 @@ #endif void +vop_create_pre(void *ap) +{ + struct vop_create_args *a; + struct vnode *dvp; + + a = ap; + dvp = a->a_dvp; + vn_seqc_write_begin(dvp); +} + +void vop_create_post(void *ap, int rc) { - struct vop_create_args *a = ap; + struct vop_create_args *a; + struct vnode *dvp; + a = ap; + dvp = a->a_dvp; + vn_seqc_write_end(dvp); if (!rc) - VFS_KNOTE_LOCKED(a->a_dvp, NOTE_WRITE); + VFS_KNOTE_LOCKED(dvp, NOTE_WRITE); } void +vop_whiteout_pre(void *ap) +{ + struct vop_whiteout_args *a; + struct vnode *dvp; + + a = ap; + dvp = a->a_dvp; + vn_seqc_write_begin(dvp); +} + +void +vop_whiteout_post(void *ap, int rc) +{ + struct vop_whiteout_args *a; + struct vnode *dvp; + + a = ap; + dvp = a->a_dvp; + vn_seqc_write_end(dvp); +} + +void +vop_deleteextattr_pre(void *ap) +{ + struct vop_deleteextattr_args *a; + struct vnode *vp; + + a = ap; + vp = a->a_vp; + vn_seqc_write_begin(vp); +} + +void vop_deleteextattr_post(void *ap, int rc) { - struct vop_deleteextattr_args *a = ap; + struct vop_deleteextattr_args *a; + struct vnode *vp; + a = ap; + vp = a->a_vp; + vn_seqc_write_end(vp); if (!rc) VFS_KNOTE_LOCKED(a->a_vp, NOTE_ATTRIB); } void +vop_link_pre(void *ap) +{ + struct vop_link_args *a; + struct vnode *vp, *tdvp; + + a = ap; + vp = a->a_vp; + tdvp = a->a_tdvp; + vn_seqc_write_begin(vp); + vn_seqc_write_begin(tdvp); +} + +void vop_link_post(void *ap, int rc) { - struct vop_link_args *a = ap; + struct vop_link_args *a; + struct vnode *vp, *tdvp; + a = ap; + vp = a->a_vp; + tdvp = a->a_tdvp; + vn_seqc_write_end(vp); + vn_seqc_write_end(tdvp); if (!rc) { - VFS_KNOTE_LOCKED(a->a_vp, NOTE_LINK); - VFS_KNOTE_LOCKED(a->a_tdvp, NOTE_WRITE); + VFS_KNOTE_LOCKED(vp, NOTE_LINK); + VFS_KNOTE_LOCKED(tdvp, NOTE_WRITE); } } void +vop_mkdir_pre(void *ap) +{ + struct vop_mkdir_args *a; + struct vnode *dvp; + + a = ap; + dvp = a->a_dvp; + vn_seqc_write_begin(dvp); +} + +void vop_mkdir_post(void *ap, int rc) { - struct vop_mkdir_args *a = ap; + struct vop_mkdir_args *a; + struct vnode *dvp; + a = ap; + dvp = a->a_dvp; + vn_seqc_write_end(dvp); if (!rc) - VFS_KNOTE_LOCKED(a->a_dvp, NOTE_WRITE | NOTE_LINK); + VFS_KNOTE_LOCKED(dvp, NOTE_WRITE | NOTE_LINK); } void +vop_mknod_pre(void *ap) +{ + struct vop_mknod_args *a; + struct vnode *dvp; + + a = ap; + dvp = a->a_dvp; + vn_seqc_write_begin(dvp); +} + +void vop_mknod_post(void *ap, int rc) { - struct vop_mknod_args *a = ap; + struct vop_mknod_args *a; + struct vnode *dvp; + a = ap; + dvp = a->a_dvp; + vn_seqc_write_end(dvp); if (!rc) - VFS_KNOTE_LOCKED(a->a_dvp, NOTE_WRITE); + VFS_KNOTE_LOCKED(dvp, NOTE_WRITE); } void vop_reclaim_post(void *ap, int rc) { - struct vop_reclaim_args *a = ap; + struct vop_reclaim_args *a; + struct vnode *vp; + a = ap; + vp = a->a_vp; + ASSERT_VOP_IN_SEQC(vp); if (!rc) - VFS_KNOTE_LOCKED(a->a_vp, NOTE_REVOKE); + VFS_KNOTE_LOCKED(vp, NOTE_REVOKE); } void +vop_remove_pre(void *ap) +{ + struct vop_remove_args *a; + struct vnode *dvp, *vp; + + a = ap; + dvp = a->a_dvp; + vp = a->a_vp; + vn_seqc_write_begin(dvp); + vn_seqc_write_begin(vp); +} + +void vop_remove_post(void *ap, int rc) { - struct vop_remove_args *a = ap; + struct vop_remove_args *a; + struct vnode *dvp, *vp; + a = ap; + dvp = a->a_dvp; + vp = a->a_vp; + vn_seqc_write_end(dvp); + vn_seqc_write_end(vp); if (!rc) { - VFS_KNOTE_LOCKED(a->a_dvp, NOTE_WRITE); - VFS_KNOTE_LOCKED(a->a_vp, NOTE_DELETE); + VFS_KNOTE_LOCKED(dvp, NOTE_WRITE); + VFS_KNOTE_LOCKED(vp, NOTE_DELETE); } } @@ -5694,41 +5837,127 @@ } void +vop_rmdir_pre(void *ap) +{ + struct vop_rmdir_args *a; + struct vnode *dvp, *vp; + + a = ap; + dvp = a->a_dvp; + vp = a->a_vp; + vn_seqc_write_begin(dvp); + vn_seqc_write_begin(vp); +} + +void vop_rmdir_post(void *ap, int rc) { - struct vop_rmdir_args *a = ap; + struct vop_rmdir_args *a; + struct vnode *dvp, *vp; + a = ap; + dvp = a->a_dvp; + vp = a->a_vp; + vn_seqc_write_end(dvp); + vn_seqc_write_end(vp); if (!rc) { - VFS_KNOTE_LOCKED(a->a_dvp, NOTE_WRITE | NOTE_LINK); - VFS_KNOTE_LOCKED(a->a_vp, NOTE_DELETE); + VFS_KNOTE_LOCKED(dvp, NOTE_WRITE | NOTE_LINK); + VFS_KNOTE_LOCKED(vp, NOTE_DELETE); } } void +vop_setattr_pre(void *ap) +{ + struct vop_setattr_args *a; + struct vnode *vp; + + a = ap; + vp = a->a_vp; + vn_seqc_write_begin(vp); +} + +void vop_setattr_post(void *ap, int rc) { - struct vop_setattr_args *a = ap; + struct vop_setattr_args *a; + struct vnode *vp; + a = ap; + vp = a->a_vp; + vn_seqc_write_end(vp); if (!rc) - VFS_KNOTE_LOCKED(a->a_vp, NOTE_ATTRIB); + VFS_KNOTE_LOCKED(vp, NOTE_ATTRIB); } void +vop_setacl_pre(void *ap) +{ + struct vop_setacl_args *a; + struct vnode *vp; + + a = ap; + vp = a->a_vp; + vn_seqc_write_begin(vp); +} + +void +vop_setacl_post(void *ap, int rc __unused) +{ + struct vop_setacl_args *a; + struct vnode *vp; + + a = ap; + vp = a->a_vp; + vn_seqc_write_end(vp); +} + +void +vop_setextattr_pre(void *ap) +{ + struct vop_setextattr_args *a; + struct vnode *vp; + + a = ap; + vp = a->a_vp; + vn_seqc_write_begin(vp); +} + +void vop_setextattr_post(void *ap, int rc) { - struct vop_setextattr_args *a = ap; + struct vop_setextattr_args *a; + struct vnode *vp; + a = ap; + vp = a->a_vp; + vn_seqc_write_end(vp); if (!rc) - VFS_KNOTE_LOCKED(a->a_vp, NOTE_ATTRIB); + VFS_KNOTE_LOCKED(vp, NOTE_ATTRIB); } void +vop_symlink_pre(void *ap) +{ + struct vop_symlink_args *a; + struct vnode *dvp; + + a = ap; + dvp = a->a_dvp; + vn_seqc_write_begin(dvp); +} + +void vop_symlink_post(void *ap, int rc) { - struct vop_symlink_args *a = ap; + struct vop_symlink_args *a; + struct vnode *dvp; + a = ap; + dvp = a->a_dvp; + vn_seqc_write_end(dvp); if (!rc) - VFS_KNOTE_LOCKED(a->a_dvp, NOTE_WRITE); + VFS_KNOTE_LOCKED(dvp, NOTE_WRITE); } void @@ -6281,6 +6510,8 @@ */ MPASS(mp->mnt_vfs_ops > 0); vp = mp->mnt_rootvnode; + if (vp != NULL) + vn_seqc_write_begin(vp); mp->mnt_rootvnode = NULL; return (vp); } @@ -6576,4 +6807,45 @@ } return (VOP_ACCESS(vp, VEXEC, cnp->cn_cred, cnp->cn_thread)); +} + +void +vn_seqc_write_begin_locked(struct vnode *vp) +{ + + ASSERT_VI_LOCKED(vp, __func__); + VNPASS(vp->v_holdcnt > 0, vp); + VNPASS(vp->v_seqc_users >= 0, vp); + vp->v_seqc_users++; + if (vp->v_seqc_users == 1) + seqc_sleepable_write_begin(&vp->v_seqc); +} + +void +vn_seqc_write_begin(struct vnode *vp) +{ + + VI_LOCK(vp); + vn_seqc_write_begin_locked(vp); + VI_UNLOCK(vp); +} + +void +vn_seqc_write_end_locked(struct vnode *vp) +{ + + ASSERT_VI_LOCKED(vp, __func__); + VNPASS(vp->v_seqc_users > 0, vp); + vp->v_seqc_users--; + if (vp->v_seqc_users == 0) + seqc_sleepable_write_end(&vp->v_seqc); +} + +void +vn_seqc_write_end(struct vnode *vp) +{ + + VI_LOCK(vp); + vn_seqc_write_end_locked(vp); + VI_UNLOCK(vp); } Index: head/sys/kern/vnode_if.src =================================================================== --- head/sys/kern/vnode_if.src +++ head/sys/kern/vnode_if.src @@ -88,6 +88,7 @@ %% create dvp E E E %% create vpp - L - +%! create pre vop_create_pre %! create post vop_create_post vop_create { @@ -99,6 +100,8 @@ %% whiteout dvp E E E +%! whiteout pre vop_whiteout_pre +%! whiteout post vop_whiteout_post vop_whiteout { IN struct vnode *dvp; @@ -109,6 +112,7 @@ %% mknod dvp E E E %% mknod vpp - L - +%! mknod pre vop_mknod_pre %! mknod post vop_mknod_post vop_mknod { @@ -172,6 +176,7 @@ %% setattr vp E E E +%! setattr pre vop_setattr_pre %! setattr post vop_setattr_post vop_setattr { @@ -260,6 +265,7 @@ %% remove dvp E E E %% remove vp E E E +%! remove pre vop_remove_pre %! remove post vop_remove_post vop_remove { @@ -271,6 +277,7 @@ %% link tdvp E E E %% link vp E E E +%! link pre vop_link_pre %! link post vop_link_post vop_link { @@ -295,6 +302,7 @@ %% mkdir dvp E E E %% mkdir vpp - E - +%! mkdir pre vop_mkdir_pre %! mkdir post vop_mkdir_post vop_mkdir { @@ -307,6 +315,7 @@ %% rmdir dvp E E E %% rmdir vp E E E +%! rmdir pre vop_rmdir_pre %! rmdir post vop_rmdir_post vop_rmdir { @@ -318,6 +327,7 @@ %% symlink dvp E E E %% symlink vpp - E - +%! symlink pre vop_symlink_pre %! symlink post vop_symlink_post vop_symlink { @@ -523,6 +533,8 @@ %% setacl vp E E E +%! setacl pre vop_setacl_pre +%! setacl post vop_setacl_post vop_setacl { IN struct vnode *vp; @@ -589,6 +601,7 @@ %% deleteextattr vp E E E +%! deleteextattr pre vop_deleteextattr_pre %! deleteextattr post vop_deleteextattr_post vop_deleteextattr { @@ -601,6 +614,7 @@ %% setextattr vp E E E +%! setextattr pre vop_setextattr_pre %! setextattr post vop_setextattr_post vop_setextattr { Index: head/sys/sys/vnode.h =================================================================== --- head/sys/sys/vnode.h +++ head/sys/sys/vnode.h @@ -45,6 +45,7 @@ #include #include #include +#include /* * The vnode is the focus of all file activity in UNIX. There is a @@ -105,6 +106,7 @@ */ enum vtype v_type:8; /* u vnode type */ short v_irflag; /* i frequently read flags */ + seqc_t v_seqc; /* i modification count */ struct vop_vector *v_op; /* u vnode operations vector */ void *v_data; /* u private data for fs */ @@ -175,6 +177,7 @@ short v_dbatchcpu; /* i LRU requeue deferral batch */ int v_writecount; /* I ref count of writers or (negative) text users */ + int v_seqc_users; /* i modifications pending */ u_int v_hash; }; @@ -539,6 +542,18 @@ #define ASSERT_VOP_LOCKED(vp, str) assert_vop_locked((vp), (str)) #define ASSERT_VOP_UNLOCKED(vp, str) assert_vop_unlocked((vp), (str)) +#define ASSERT_VOP_IN_SEQC(vp) do { \ + struct vnode *_vp = (vp); \ + \ + VNPASS(seqc_in_modify(_vp->v_seqc), _vp); \ +} while (0) + +#define ASSERT_VOP_NOT_IN_SEQC(vp) do { \ + struct vnode *_vp = (vp); \ + \ + VNPASS(!seqc_in_modify(_vp->v_seqc), _vp); \ +} while (0) + #else /* !DEBUG_VFS_LOCKS */ #define ASSERT_VI_LOCKED(vp, str) ((void)0) @@ -546,6 +561,10 @@ #define ASSERT_VOP_ELOCKED(vp, str) ((void)0) #define ASSERT_VOP_LOCKED(vp, str) ((void)0) #define ASSERT_VOP_UNLOCKED(vp, str) ((void)0) + +#define ASSERT_VOP_IN_SEQC(vp) ((void)0) +#define ASSERT_VOP_NOT_IN_SEQC(vp) ((void)0) + #endif /* DEBUG_VFS_LOCKS */ @@ -738,6 +757,13 @@ int vn_io_fault_pgmove(vm_page_t ma[], vm_offset_t offset, int xfersize, struct uio *uio); +void vn_seqc_write_begin_locked(struct vnode *vp); +void vn_seqc_write_begin(struct vnode *vp); +void vn_seqc_write_end_locked(struct vnode *vp); +void vn_seqc_write_end(struct vnode *vp); +#define vn_seqc_read_any(vp) seqc_read_any(&(vp)->v_seqc) +#define vn_seqc_consistent(vp, seq) seqc_consistent(&(vp)->v_seqc, seq) + #define vn_rangelock_unlock(vp, cookie) \ rangelock_unlock(&(vp)->v_rl, (cookie), VI_MTX(vp)) #define vn_rangelock_unlock_range(vp, cookie, start, end) \ @@ -804,23 +830,37 @@ /* These are called from within the actual VOPS. */ void vop_close_post(void *a, int rc); +void vop_create_pre(void *a); void vop_create_post(void *a, int rc); +void vop_whiteout_pre(void *a); +void vop_whiteout_post(void *a, int rc); +void vop_deleteextattr_pre(void *a); void vop_deleteextattr_post(void *a, int rc); +void vop_link_pre(void *a); void vop_link_post(void *a, int rc); void vop_lookup_post(void *a, int rc); void vop_lookup_pre(void *a); +void vop_mkdir_pre(void *a); void vop_mkdir_post(void *a, int rc); +void vop_mknod_pre(void *a); void vop_mknod_post(void *a, int rc); void vop_open_post(void *a, int rc); void vop_read_post(void *a, int rc); void vop_readdir_post(void *a, int rc); void vop_reclaim_post(void *a, int rc); +void vop_remove_pre(void *a); void vop_remove_post(void *a, int rc); void vop_rename_post(void *a, int rc); void vop_rename_pre(void *a); +void vop_rmdir_pre(void *a); void vop_rmdir_post(void *a, int rc); +void vop_setattr_pre(void *a); void vop_setattr_post(void *a, int rc); +void vop_setacl_pre(void *a); +void vop_setacl_post(void *a, int rc); +void vop_setextattr_pre(void *a); void vop_setextattr_post(void *a, int rc); +void vop_symlink_pre(void *a); void vop_symlink_post(void *a, int rc); int vop_sigdefer(struct vop_vector *vop, struct vop_generic_args *a);