Index: head/sys/ufs/ffs/ffs_vfsops.c =================================================================== --- head/sys/ufs/ffs/ffs_vfsops.c +++ head/sys/ufs/ffs/ffs_vfsops.c @@ -84,6 +84,7 @@ #include static uma_zone_t uma_inode, uma_ufs1, uma_ufs2; +VFS_SMR_DECLARE; static int ffs_mountfs(struct vnode *, struct mount *, struct thread *); static void ffs_oldfscompat_read(struct fs *, struct ufsmount *, @@ -393,6 +394,7 @@ uma_ufs2 = uma_zcreate("FFS2 dinode", sizeof(struct ufs2_dinode), NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, 0); + VFS_SMR_ZONE_SET(uma_inode); } vfs_deleteopt(mp->mnt_optnew, "groupquota"); @@ -455,6 +457,7 @@ } MNT_ILOCK(mp); + mp->mnt_kern_flag &= ~MNTK_FPLOOKUP; mp->mnt_flag |= mntorflags; MNT_IUNLOCK(mp); /* @@ -795,6 +798,17 @@ } } } + + MNT_ILOCK(mp); + /* + * This is racy versus lookup, see ufs_fplookup_vexec for details. + */ + if ((mp->mnt_kern_flag & MNTK_FPLOOKUP) != 0) + panic("MNTK_FPLOOKUP set on mount %p when it should not be", mp); + if ((mp->mnt_flag & (MNT_ACLS | MNT_NFS4ACLS)) == 0) + mp->mnt_kern_flag |= MNTK_FPLOOKUP; + MNT_IUNLOCK(mp); + vfs_mountedfrom(mp, fspec); return (0); } @@ -1968,14 +1982,14 @@ ump = VFSTOUFS(mp); fs = ump->um_fs; - ip = uma_zalloc(uma_inode, M_WAITOK | M_ZERO); + ip = uma_zalloc_smr(uma_inode, M_WAITOK | M_ZERO); /* Allocate a new vnode/inode. */ error = getnewvnode("ufs", mp, fs->fs_magic == FS_UFS1_MAGIC ? &ffs_vnodeops1 : &ffs_vnodeops2, &vp); if (error) { *vpp = NULL; - uma_zfree(uma_inode, ip); + uma_zfree_smr(uma_inode, ip); return (error); } /* @@ -2004,7 +2018,7 @@ vp->v_vflag |= VV_FORCEINSMQ; error = insmntque(vp, mp); if (error != 0) { - uma_zfree(uma_inode, ip); + uma_zfree_smr(uma_inode, ip); *vpp = NULL; return (error); } @@ -2327,7 +2341,7 @@ uma_zfree(uma_ufs1, ip->i_din1); else if (ip->i_din2 != NULL) uma_zfree(uma_ufs2, ip->i_din2); - uma_zfree(uma_inode, ip); + uma_zfree_smr(uma_inode, ip); } static int dobkgrdwrite = 1; Index: head/sys/ufs/ffs/ffs_vnops.c =================================================================== --- head/sys/ufs/ffs/ffs_vnops.c +++ head/sys/ufs/ffs/ffs_vnops.c @@ -905,8 +905,10 @@ if ((ip->i_mode & (ISUID | ISGID)) && resid > uio->uio_resid && ap->a_cred) { if (priv_check_cred(ap->a_cred, PRIV_VFS_RETAINSUGID)) { - ip->i_mode &= ~(ISUID | ISGID); + vn_seqc_write_begin(vp); + UFS_INODE_SET_MODE(ip, ip->i_mode & ~(ISUID | ISGID)); DIP_SET(ip, i_mode, ip->i_mode); + vn_seqc_write_end(vp); } } if (error) { @@ -1152,8 +1154,10 @@ */ if ((ip->i_mode & (ISUID | ISGID)) && resid > uio->uio_resid && ucred) { if (priv_check_cred(ucred, PRIV_VFS_RETAINSUGID)) { - ip->i_mode &= ~(ISUID | ISGID); + vn_seqc_write_begin(vp); + UFS_INODE_SET_MODE(ip, ip->i_mode & ~(ISUID | ISGID)); dp->di_mode = ip->i_mode; + vn_seqc_write_end(vp); } } if (error) { Index: head/sys/ufs/ufs/inode.h =================================================================== --- head/sys/ufs/ufs/inode.h +++ head/sys/ufs/ufs/inode.h @@ -43,6 +43,7 @@ #include #include #include +#include /* * This must agree with the definition in . @@ -149,6 +150,14 @@ #define UFS_INODE_FLAG_LAZY_MASK_ASSERTABLE \ (UFS_INODE_FLAG_LAZY_MASK & ~(IN_LAZYMOD | IN_LAZYACCESS)) +#define UFS_INODE_SET_MODE(ip, mode) do { \ + struct inode *_ip = (ip); \ + int _mode = (mode); \ + \ + ASSERT_VOP_IN_SEQC(ITOV(_ip)); \ + atomic_store_short(&(_ip)->i_mode, _mode); \ +} while (0) + #define UFS_INODE_SET_FLAG(ip, flags) do { \ struct inode *_ip = (ip); \ struct vnode *_vp = ITOV(_ip); \ @@ -229,6 +238,7 @@ /* Convert between inode pointers and vnode pointers. */ #define VTOI(vp) ((struct inode *)(vp)->v_data) +#define VTOI_SMR(vp) ((struct inode *)vn_load_v_data_smr(vp)) #define ITOV(ip) ((ip)->i_vnode) /* Determine if soft dependencies are being done */ Index: head/sys/ufs/ufs/ufs_acl.c =================================================================== --- head/sys/ufs/ufs/ufs_acl.c +++ head/sys/ufs/ufs/ufs_acl.c @@ -139,9 +139,11 @@ void ufs_sync_inode_from_acl(struct acl *acl, struct inode *ip) { + int newmode; - ip->i_mode &= ACL_PRESERVE_MASK; - ip->i_mode |= acl_posix1e_acl_to_mode(acl); + newmode = ip->i_mode & ACL_PRESERVE_MASK; + newmode |= acl_posix1e_acl_to_mode(acl); + UFS_INODE_SET_MODE(ip, newmode); DIP_SET(ip, i_mode, ip->i_mode); } @@ -381,7 +383,7 @@ ufs_setacl_nfs4_internal(struct vnode *vp, struct acl *aclp, struct thread *td) { int error; - mode_t mode; + mode_t mode, newmode; struct inode *ip = VTOI(vp); KASSERT(acl_nfs4_check(aclp, vp->v_type == VDIR) == 0, @@ -418,8 +420,9 @@ acl_nfs4_sync_mode_from_acl(&mode, aclp); - ip->i_mode &= ACL_PRESERVE_MASK; - ip->i_mode |= mode; + newmode = ip->i_mode & ACL_PRESERVE_MASK; + newmode |= mode; + UFS_INODE_SET_MODE(ip, newmode); DIP_SET(ip, i_mode, ip->i_mode); UFS_INODE_SET_FLAG(ip, IN_CHANGE); Index: head/sys/ufs/ufs/ufs_vnops.c =================================================================== --- head/sys/ufs/ufs/ufs_vnops.c +++ head/sys/ufs/ufs/ufs_vnops.c @@ -63,6 +63,7 @@ #include #include #include +#include #include @@ -96,10 +97,12 @@ "Give all new files in directory the same ownership as the directory"); #endif +VFS_SMR_DECLARE; #include static vop_accessx_t ufs_accessx; +static vop_fplookup_vexec_t ufs_fplookup_vexec; static int ufs_chmod(struct vnode *, int, struct ucred *, struct thread *); static int ufs_chown(struct vnode *, uid_t, gid_t, struct ucred *, struct thread *); static vop_close_t ufs_close; @@ -422,6 +425,46 @@ return (error); } +/* + * VOP_FPLOOKUP_VEXEC routines are subject to special circumstances, see + * the comment above cache_fplookup for details. + */ +static int +ufs_fplookup_vexec(ap) + struct vop_fplookup_vexec_args /* { + struct vnode *a_vp; + struct ucred *a_cred; + struct thread *a_td; + } */ *ap; +{ + struct vnode *vp; + struct inode *ip; + struct ucred *cred; + mode_t all_x, mode; + + vp = ap->a_vp; + ip = VTOI_SMR(vp); + if (__predict_false(ip == NULL)) + return (EAGAIN); + + /* + * XXX ACL race + * + * ACLs are not supported and UFS clears/sets this flag on mount and + * remount. However, we may still be racing with seeing them and there + * is no provision to make sure they were accounted for. This matches + * the behavior of the locked case, since the lookup there is also + * racy: mount takes no measures to block anyone from progressing. + */ + all_x = S_IXUSR | S_IXGRP | S_IXOTH; + mode = atomic_load_short(&ip->i_mode); + if (__predict_true((mode & all_x) == all_x)) + return (0); + + cred = ap->a_cred; + return (vaccess_vexec_smr(mode, ip->i_uid, ip->i_gid, cred)); +} + /* ARGSUSED */ static int ufs_getattr(ap) @@ -711,7 +754,7 @@ struct thread *td; { struct inode *ip = VTOI(vp); - int error; + int newmode, error; /* * To modify the permissions on a file, must possess VADMIN @@ -744,8 +787,9 @@ return (error); } - ip->i_mode &= ~ALLPERMS; - ip->i_mode |= (mode & ALLPERMS); + newmode = ip->i_mode & ~ALLPERMS; + newmode |= (mode & ALLPERMS); + UFS_INODE_SET_MODE(ip, newmode); DIP_SET(ip, i_mode, ip->i_mode); UFS_INODE_SET_FLAG(ip, IN_CHANGE); #ifdef UFS_ACL @@ -869,7 +913,7 @@ UFS_INODE_SET_FLAG(ip, IN_CHANGE); if ((ip->i_mode & (ISUID | ISGID)) && (ouid != uid || ogid != gid)) { if (priv_check_cred(cred, PRIV_VFS_RETAINSUGID)) { - ip->i_mode &= ~(ISUID | ISGID); + UFS_INODE_SET_MODE(ip, ip->i_mode & ~(ISUID | ISGID)); DIP_SET(ip, i_mode, ip->i_mode); } } @@ -1111,7 +1155,10 @@ int error = 0; struct mount *mp; ino_t ino; + bool want_seqc_end; + want_seqc_end = false; + #ifdef INVARIANTS if ((tcnp->cn_flags & HASBUF) == 0 || (fcnp->cn_flags & HASBUF) == 0) @@ -1315,6 +1362,13 @@ tdp->i_effnlink == 0) panic("Bad effnlink fip %p, fdp %p, tdp %p", fip, fdp, tdp); + if (tvp != NULL) + vn_seqc_write_begin(tvp); + vn_seqc_write_begin(tdvp); + vn_seqc_write_begin(fvp); + vn_seqc_write_begin(fdvp); + want_seqc_end = true; + /* * 1) Bump link count while we're moving stuff * around. If we crash somewhere before @@ -1513,6 +1567,14 @@ cache_purge_negative(tdvp); unlockout: + if (want_seqc_end) { + if (tvp != NULL) + vn_seqc_write_end(tvp); + vn_seqc_write_end(tdvp); + vn_seqc_write_end(fvp); + vn_seqc_write_end(fdvp); + } + vput(fdvp); vput(fvp); if (tvp) @@ -1556,6 +1618,14 @@ goto unlockout; releout: + if (want_seqc_end) { + if (tvp != NULL) + vn_seqc_write_end(tvp); + vn_seqc_write_end(tdvp); + vn_seqc_write_end(fvp); + vn_seqc_write_end(fdvp); + } + vrele(fdvp); vrele(fvp); vrele(tdvp); @@ -1590,7 +1660,7 @@ */ if (acl->acl_cnt != 0) { dmode = acl_posix1e_newfilemode(dmode, acl); - ip->i_mode = dmode; + UFS_INODE_SET_MODE(ip, dmode); DIP_SET(ip, i_mode, dmode); *dacl = *acl; ufs_sync_acl_from_inode(ip, acl); @@ -1602,7 +1672,7 @@ /* * Just use the mode as-is. */ - ip->i_mode = dmode; + UFS_INODE_SET_MODE(ip, dmode); DIP_SET(ip, i_mode, dmode); error = 0; goto out; @@ -1673,7 +1743,7 @@ * the it's not defined case. */ mode = acl_posix1e_newfilemode(mode, acl); - ip->i_mode = mode; + UFS_INODE_SET_MODE(ip, mode); DIP_SET(ip, i_mode, mode); ufs_sync_acl_from_inode(ip, acl); break; @@ -1684,7 +1754,7 @@ /* * Just use the mode as-is. */ - ip->i_mode = mode; + UFS_INODE_SET_MODE(ip, mode); DIP_SET(ip, i_mode, mode); error = 0; goto out; @@ -1796,6 +1866,7 @@ error = UFS_VALLOC(dvp, dmode, cnp->cn_cred, &tvp); if (error) goto out; + vn_seqc_write_begin(tvp); ip = VTOI(tvp); ip->i_gid = dp->i_gid; DIP_SET(ip, i_gid, dp->i_gid); @@ -1846,6 +1917,7 @@ if (DOINGSOFTDEP(tvp)) softdep_revert_link(dp, ip); UFS_VFREE(tvp, ip->i_number, dmode); + vn_seqc_write_end(tvp); vgone(tvp); vput(tvp); return (error); @@ -1861,6 +1933,7 @@ if (DOINGSOFTDEP(tvp)) softdep_revert_link(dp, ip); UFS_VFREE(tvp, ip->i_number, dmode); + vn_seqc_write_end(tvp); vgone(tvp); vput(tvp); return (error); @@ -1868,7 +1941,7 @@ #endif #endif /* !SUIDDIR */ UFS_INODE_SET_FLAG(ip, IN_ACCESS | IN_CHANGE | IN_UPDATE); - ip->i_mode = dmode; + UFS_INODE_SET_MODE(ip, dmode); DIP_SET(ip, i_mode, dmode); tvp->v_type = VDIR; /* Rest init'd in getnewvnode(). */ ip->i_effnlink = 2; @@ -1974,6 +2047,7 @@ bad: if (error == 0) { *ap->a_vpp = tvp; + vn_seqc_write_end(tvp); } else { dp->i_effnlink--; dp->i_nlink--; @@ -1989,6 +2063,7 @@ UFS_INODE_SET_FLAG(ip, IN_CHANGE); if (DOINGSOFTDEP(tvp)) softdep_revert_mkdir(dp, ip); + vn_seqc_write_end(tvp); vgone(tvp); vput(tvp); } @@ -2637,8 +2712,9 @@ } #endif #endif /* !SUIDDIR */ + vn_seqc_write_begin(tvp); /* Mostly to cover asserts */ UFS_INODE_SET_FLAG(ip, IN_ACCESS | IN_CHANGE | IN_UPDATE); - ip->i_mode = mode; + UFS_INODE_SET_MODE(ip, mode); DIP_SET(ip, i_mode, mode); tvp->v_type = IFTOVT(mode); /* Rest init'd in getnewvnode(). */ ip->i_effnlink = 1; @@ -2648,7 +2724,7 @@ softdep_setup_create(VTOI(dvp), ip); if ((ip->i_mode & ISGID) && !groupmember(ip->i_gid, cnp->cn_cred) && priv_check_cred(cnp->cn_cred, PRIV_VFS_SETGID)) { - ip->i_mode &= ~ISGID; + UFS_INODE_SET_MODE(ip, ip->i_mode & ~ISGID); DIP_SET(ip, i_mode, ip->i_mode); } @@ -2688,6 +2764,7 @@ error = ufs_direnter(dvp, tvp, &newdir, cnp, NULL, 0); if (error) goto bad; + vn_seqc_write_end(tvp); *vpp = tvp; return (0); @@ -2702,6 +2779,7 @@ UFS_INODE_SET_FLAG(ip, IN_CHANGE); if (DOINGSOFTDEP(tvp)) softdep_revert_create(VTOI(dvp), ip); + vn_seqc_write_end(tvp); vgone(tvp); vput(tvp); return (error); @@ -2740,6 +2818,7 @@ .vop_write = VOP_PANIC, .vop_accessx = ufs_accessx, .vop_bmap = ufs_bmap, + .vop_fplookup_vexec = ufs_fplookup_vexec, .vop_cachedlookup = ufs_lookup, .vop_close = ufs_close, .vop_create = ufs_create,