diff --git a/sys/fs/unionfs/union.h b/sys/fs/unionfs/union.h --- a/sys/fs/unionfs/union.h +++ b/sys/fs/unionfs/union.h @@ -144,8 +144,8 @@ struct vattr *, struct ucred *, struct thread *); int unionfs_mkshadowdir(struct unionfs_mount *, struct vnode *, struct unionfs_node *, struct componentname *, struct thread *); -int unionfs_mkwhiteout(struct vnode *, struct componentname *, - struct thread *, char *, int); +int unionfs_mkwhiteout(struct vnode *, struct vnode *, + struct componentname *, struct thread *, char *, int); int unionfs_relookup(struct vnode *, struct vnode **, struct componentname *, struct componentname *, struct thread *, char *, int, u_long); @@ -155,6 +155,24 @@ struct thread *); int unionfs_relookup_for_rename(struct vnode *, struct componentname *, struct thread *); +void unionfs_forward_vop_start_pair(struct vnode *, int *, + struct vnode *, int *); +bool unionfs_forward_vop_finish_pair(struct vnode *, struct vnode *, int, + struct vnode *, struct vnode *, int); + +static inline void +unionfs_forward_vop_start(struct vnode *basevp, int *lkflags) +{ + unionfs_forward_vop_start_pair(basevp, lkflags, NULL, NULL); +} + +static inline bool +unionfs_forward_vop_finish(struct vnode *unionvp, struct vnode *basevp, + int lkflags) +{ + return (unionfs_forward_vop_finish_pair(unionvp, basevp, lkflags, + NULL, NULL, 0)); +} #define UNIONFSVPTOLOWERVP(vp) (VTOUNIONFS(vp)->un_lowervp) #define UNIONFSVPTOUPPERVP(vp) (VTOUNIONFS(vp)->un_uppervp) diff --git a/sys/fs/unionfs/union_subr.c b/sys/fs/unionfs/union_subr.c --- a/sys/fs/unionfs/union_subr.c +++ b/sys/fs/unionfs/union_subr.c @@ -936,14 +936,21 @@ *pathend = pathterm; if (!error) { - unionfs_node_update(unp, uvp, td); - /* * XXX The bug which cannot set uid/gid was corrected. * Ignore errors. */ va.va_type = VNON; VOP_SETATTR(uvp, &va, nd.ni_cnd.cn_cred); + + /* + * VOP_SETATTR() may transiently drop uvp's lock, so it's + * important to call it before unionfs_node_update() transfers + * the unionfs vnode's lock from lvp to uvp; otherwise the + * unionfs vnode itself would be transiently unlocked and + * potentially doomed. + */ + unionfs_node_update(unp, uvp, td); } vn_finished_write(mp); @@ -955,28 +962,180 @@ return (error); } +static inline void +unionfs_forward_vop_ref(struct vnode *basevp, int *lkflags) +{ + ASSERT_VOP_LOCKED(basevp, __func__); + *lkflags = VOP_ISLOCKED(basevp); + vref(basevp); +} + +/* + * Prepare unionfs to issue a forwarded VOP to either the upper or lower + * FS. This should be used for any VOP which may drop the vnode lock; + * it is not required otherwise. + * The unionfs vnode shares its lock with the base-layer vnode(s); if the + * base FS must transiently drop its vnode lock, the unionfs vnode may + * effectively become unlocked. During that window, a concurrent forced + * unmount may doom the unionfs vnode, which leads to two significant + * issues: + * 1) Completion of, and return from, the unionfs VOP with the unionfs + * vnode completely unlocked. When the unionfs vnode becomes doomed + * it stops sharing its lock with the base vnode, so even if the + * forwarded VOP reacquires the base vnode lock the unionfs vnode + * lock will no longer be held. This can lead to violation of the + * caller's sychronization requirements as well as various failed + * locking assertions when DEBUG_VFS_LOCKS is enabled. + * 2) Loss of reference on the base vnode. The caller is expected to + * hold a v_usecount reference on the unionfs vnode, while the + * unionfs vnode holds a reference on the base-layer vnode(s). But + * these references are released when the unionfs vnode becomes + * doomed, violating the base layer's expectation that its caller + * must hold a reference to prevent vnode recycling. + * + * basevp1 and basevp2 represent two base-layer vnodes which are + * expected to be locked when this function is called. basevp2 + * may be NULL, but if not NULL basevp1 and basevp2 should represent + * a parent directory and a filed linked to it, respectively. + * lkflags1 and lkflags2 are output parameters that will store the + * current lock status of basevp1 and basevp2, respectively. They + * are intended to be passed as the lkflags1 and lkflags2 parameters + * in the subsequent call to unionfs_forward_vop_finish_pair(). + * lkflags2 may be NULL iff basevp2 is NULL. + */ +void +unionfs_forward_vop_start_pair(struct vnode *basevp1, int *lkflags1, + struct vnode *basevp2, int *lkflags2) +{ + /* + * Take an additional reference on the base-layer vnodes to + * avoid loss of reference if the unionfs vnodes are doomed. + */ + unionfs_forward_vop_ref(basevp1, lkflags1); + if (basevp2 != NULL) + unionfs_forward_vop_ref(basevp2, lkflags2); +} + +static inline bool +unionfs_forward_vop_rele(struct vnode *unionvp, struct vnode *basevp, + int lkflags) +{ + bool unionvp_doomed; + + if (__predict_false(VTOUNIONFS(unionvp) == NULL)) { + if ((lkflags & LK_EXCLUSIVE) != 0) + ASSERT_VOP_ELOCKED(basevp, __func__); + else + ASSERT_VOP_LOCKED(basevp, __func__); + unionvp_doomed = true; + } else { + vrele(basevp); + unionvp_doomed = false; + } + + return (unionvp_doomed); +} + + +/* + * Indicate completion of a forwarded VOP previously prepared by + * unionfs_forward_vop_start_pair(). + * basevp1 and basevp2 must be the same values passed to the prior + * call to unionfs_forward_vop_start_pair(). unionvp1 and unionvp2 + * must be the unionfs vnodes that were initially above basevp1 and + * basevp2, respectively. + * basevp1 and basevp2 (if not NULL) must be locked when this function + * is called, while unionvp1 and/or unionvp2 may be unlocked if either + * unionfs vnode has become doomed. + * lkflags1 and lkflag2 represent the locking flags that should be + * used to re-lock unionvp1 and unionvp2, respectively, if either + * vnode has become doomed. + * + * Returns true if any unionfs vnode was found to be doomed, false + * otherwise. + */ +bool +unionfs_forward_vop_finish_pair( + struct vnode *unionvp1, struct vnode *basevp1, int lkflags1, + struct vnode *unionvp2, struct vnode *basevp2, int lkflags2) +{ + bool vp1_doomed, vp2_doomed; + + /* + * If either vnode is found to have been doomed, set + * a flag indicating that it needs to be re-locked. + * Otherwise, simply drop the base-vnode reference that + * was taken in unionfs_forward_vop_start(). + */ + vp1_doomed = unionfs_forward_vop_rele(unionvp1, basevp1, lkflags1); + + if (unionvp2 != NULL) + vp2_doomed = unionfs_forward_vop_rele(unionvp2, basevp2, lkflags2); + else + vp2_doomed = false; + + /* + * If any of the unionfs vnodes need to be re-locked, that + * means the unionfs vnode's lock is now de-coupled from the + * corresponding base vnode. We therefore need to drop the + * base vnode lock (since nothing else will after this point), + * and also release the reference taken in + * unionfs_forward_vop_start_pair(). + */ + if (__predict_false(vp1_doomed && vp2_doomed)) + VOP_VPUT_PAIR(basevp1, &basevp2, true); + else if (__predict_false(vp1_doomed)) { + /* + * If basevp1 needs to be unlocked, then we may not + * be able to safely unlock it with basevp2 still locked, + * for the same reason that an ordinary VFS call would + * need to use VOP_VPUT_PAIR() here. We might be able + * to use VOP_VPUT_PAIR(..., false) here, but then we + * would need to deal with the possibility of basevp2 + * changing out from under us, which could result in + * either the unionfs vnode becoming doomed or its + * upper/lower vp no longer matching basevp2. Either + * scenario would require at least re-locking the unionfs + * vnode anyway. + */ + if (unionvp2 != NULL) { + VOP_UNLOCK(unionvp2); + vp2_doomed = true; + } + vput(basevp1); + } else if (__predict_false(vp2_doomed)) + vput(basevp2); + + if (__predict_false(vp1_doomed || vp2_doomed)) + vn_lock_pair(unionvp1, !vp1_doomed, lkflags1, + unionvp2, !vp2_doomed, lkflags2); + + return (vp1_doomed || vp2_doomed); +} + /* * Create a new whiteout. * - * dvp should be locked on entry and will be locked on return. + * udvp and dvp should be locked on entry and will be locked on return. */ int -unionfs_mkwhiteout(struct vnode *dvp, struct componentname *cnp, - struct thread *td, char *path, int pathlen) +unionfs_mkwhiteout(struct vnode *dvp, struct vnode *udvp, + struct componentname *cnp, struct thread *td, char *path, int pathlen) { struct vnode *wvp; struct nameidata nd; struct mount *mp; int error; + int lkflags; wvp = NULLVP; NDPREINIT(&nd); - if ((error = unionfs_relookup(dvp, &wvp, cnp, &nd.ni_cnd, td, path, + if ((error = unionfs_relookup(udvp, &wvp, cnp, &nd.ni_cnd, td, path, pathlen, CREATE))) { return (error); } if (wvp != NULLVP) { - if (dvp == wvp) + if (udvp == wvp) vrele(wvp); else vput(wvp); @@ -984,9 +1143,11 @@ return (EEXIST); } - if ((error = vn_start_write(dvp, &mp, V_WAIT | V_PCATCH))) + if ((error = vn_start_write(udvp, &mp, V_WAIT | V_PCATCH))) goto unionfs_mkwhiteout_free_out; - error = VOP_WHITEOUT(dvp, &nd.ni_cnd, CREATE); + unionfs_forward_vop_start(udvp, &lkflags); + error = VOP_WHITEOUT(udvp, &nd.ni_cnd, CREATE); + unionfs_forward_vop_finish(dvp, udvp, lkflags); vn_finished_write(mp); diff --git a/sys/fs/unionfs/union_vnops.c b/sys/fs/unionfs/union_vnops.c --- a/sys/fs/unionfs/union_vnops.c +++ b/sys/fs/unionfs/union_vnops.c @@ -369,21 +369,27 @@ error = EROFS; if (udvp != NULLVP) { + int lkflags; + bool vp_created = false; + unionfs_forward_vop_start(udvp, &lkflags); error = VOP_CREATE(udvp, &vp, cnp, ap->a_vap); - if (error != 0) - goto unionfs_create_abort; - - if (vp->v_type == VSOCK) + if (error == 0) + vp_created = true; + if (__predict_false(unionfs_forward_vop_finish(ap->a_dvp, udvp, + lkflags)) && error == 0) { + error = ENOENT; + } + if (error == 0 && vp->v_type == VSOCK) *(ap->a_vpp) = vp; - else { + else if (error == 0) { VOP_UNLOCK(vp); error = unionfs_nodeget(ap->a_dvp->v_mount, vp, NULLVP, ap->a_dvp, ap->a_vpp, cnp); vrele(vp); - } + } else if (vp_created) + vput(vp); } -unionfs_create_abort: UNIONFS_INTERNAL_DEBUG("unionfs_create: leave (%d)\n", error); return (error); @@ -407,11 +413,14 @@ error = EOPNOTSUPP; if (udvp != NULLVP) { + int lkflags; switch (ap->a_flags) { case CREATE: case DELETE: case LOOKUP: + unionfs_forward_vop_start(udvp, &lkflags); error = VOP_WHITEOUT(udvp, cnp, ap->a_flags); + unionfs_forward_vop_finish(ap->a_dvp, udvp, lkflags); break; default: error = EINVAL; @@ -443,21 +452,27 @@ error = EROFS; if (udvp != NULLVP) { + int lkflags; + bool vp_created = false; + unionfs_forward_vop_start(udvp, &lkflags); error = VOP_MKNOD(udvp, &vp, cnp, ap->a_vap); - if (error != 0) - goto unionfs_mknod_abort; - - if (vp->v_type == VSOCK) + if (error == 0) + vp_created = true; + if (__predict_false(unionfs_forward_vop_finish(ap->a_dvp, udvp, + lkflags)) && error == 0) { + error = ENOENT; + } + if (error == 0 && vp->v_type == VSOCK) *(ap->a_vpp) = vp; - else { + else if (error == 0) { VOP_UNLOCK(vp); error = unionfs_nodeget(ap->a_dvp->v_mount, vp, NULLVP, ap->a_dvp, ap->a_vpp, cnp); vrele(vp); - } + } else if (vp_created) + vput(vp); } -unionfs_mknod_abort: UNIONFS_INTERNAL_DEBUG("unionfs_mknod: leave (%d)\n", error); return (error); @@ -890,8 +905,12 @@ uvp = unp->un_uppervp; } - if (uvp != NULLVP) + if (uvp != NULLVP) { + int lkflags; + unionfs_forward_vop_start(uvp, &lkflags); error = VOP_SETATTR(uvp, vap, ap->a_cred); + unionfs_forward_vop_finish(ap->a_vp, uvp, lkflags); + } UNIONFS_INTERNAL_DEBUG("unionfs_setattr: leave (%d)\n", error); @@ -925,6 +944,7 @@ struct unionfs_node *unp; struct vnode *tvp; int error; + int lkflags; /* UNIONFS_INTERNAL_DEBUG("unionfs_write: enter\n"); */ @@ -933,7 +953,9 @@ unp = VTOUNIONFS(ap->a_vp); tvp = (unp->un_uppervp != NULLVP ? unp->un_uppervp : unp->un_lowervp); + unionfs_forward_vop_start(tvp, &lkflags); error = VOP_WRITE(tvp, ap->a_uio, ap->a_ioflag, ap->a_cred); + unionfs_forward_vop_finish(ap->a_vp, tvp, lkflags); /* UNIONFS_INTERNAL_DEBUG("unionfs_write: leave (%d)\n", error); */ @@ -999,6 +1021,7 @@ struct unionfs_node_status *unsp; struct vnode *ovp; enum unionfs_lkupgrade lkstatus; + int error, lkflags; KASSERT_UNIONFS_VNODE(ap->a_vp); @@ -1017,7 +1040,11 @@ if (ovp == NULLVP) return (EBADF); - return (VOP_FSYNC(ovp, ap->a_waitfor, ap->a_td)); + unionfs_forward_vop_start(ovp, &lkflags); + error = VOP_FSYNC(ovp, ap->a_waitfor, ap->a_td); + unionfs_forward_vop_finish(ap->a_vp, ovp, lkflags); + + return (error); } static int @@ -1046,6 +1073,7 @@ udvp = dunp->un_uppervp; cnp = ap->a_cnp; td = curthread; + unp = NULL; if (ap->a_vp->v_op != &unionfs_vnodeops) { if (ap->a_vp->v_type != VSOCK) @@ -1095,12 +1123,17 @@ * XXX: if the vnode type is VSOCK, it will create whiteout * after remove. */ + int udvp_lkflags, uvp_lkflags; if (ump == NULL || ump->um_whitemode == UNIONFS_WHITE_ALWAYS || lvp != NULLVP) cnp->cn_flags |= DOWHITEOUT; + unionfs_forward_vop_start_pair(udvp, &udvp_lkflags, + ((unp == NULL) ? NULL : uvp), &uvp_lkflags); error = VOP_REMOVE(udvp, uvp, cnp); + unionfs_forward_vop_finish_pair(ap->a_dvp, udvp, udvp_lkflags, + ((unp == NULL) ? NULL : ap->a_vp), uvp, uvp_lkflags); } else if (lvp != NULLVP) - error = unionfs_mkwhiteout(udvp, cnp, td, path, pathlen); + error = unionfs_mkwhiteout(ap->a_dvp, udvp, cnp, td, path, pathlen); UNIONFS_INTERNAL_DEBUG("unionfs_remove: leave (%d)\n", error); @@ -1156,8 +1189,14 @@ if (needrelookup != 0) error = unionfs_relookup_for_create(ap->a_tdvp, cnp, td); - if (error == 0) + if (error == 0) { + int udvp_lkflags, uvp_lkflags; + unionfs_forward_vop_start_pair(udvp, &udvp_lkflags, + uvp, &uvp_lkflags); error = VOP_LINK(udvp, uvp, cnp); + unionfs_forward_vop_finish_pair(ap->a_tdvp, udvp, udvp_lkflags, + ap->a_vp, uvp, uvp_lkflags); + } UNIONFS_INTERNAL_DEBUG("unionfs_link: leave (%d)\n", error); @@ -1413,7 +1452,6 @@ udvp = dunp->un_uppervp; if (udvp != NULLVP) { - vref(udvp); /* check opaque */ if (!(cnp->cn_flags & ISWHITEOUT)) { error = VOP_GETATTR(udvp, &va, cnp->cn_cred); @@ -1423,38 +1461,27 @@ cnp->cn_flags |= ISWHITEOUT; } - if ((error = VOP_MKDIR(udvp, &uvp, cnp, ap->a_vap)) == 0) { + int udvp_lkflags; + bool uvp_created = false; + unionfs_forward_vop_start(udvp, &udvp_lkflags); + error = VOP_MKDIR(udvp, &uvp, cnp, ap->a_vap); + if (error == 0) + uvp_created = true; + if (__predict_false(unionfs_forward_vop_finish(dvp, udvp, + udvp_lkflags)) && error == 0) + error = ENOENT; + if (error == 0) { VOP_UNLOCK(uvp); cnp->cn_lkflags = LK_EXCLUSIVE; - /* - * The underlying VOP_MKDIR() implementation may have - * temporarily dropped the parent directory vnode lock. - * Because the unionfs vnode ordinarily shares that - * lock, this may allow the unionfs vnode to be reclaimed - * and its lock field reset. In that case, the unionfs - * vnode is effectively no longer locked, and we must - * explicitly lock it before returning in order to meet - * the locking requirements of VOP_MKDIR(). - */ - if (__predict_false(VTOUNIONFS(dvp) == NULL)) { - error = ENOENT; - goto unionfs_mkdir_cleanup; - } error = unionfs_nodeget(dvp->v_mount, uvp, NULLVP, dvp, ap->a_vpp, cnp); - cnp->cn_lkflags = lkflags; vrele(uvp); - } + cnp->cn_lkflags = lkflags; + } else if (uvp_created) + vput(uvp); } unionfs_mkdir_cleanup: - - if (__predict_false(VTOUNIONFS(dvp) == NULL)) { - vput(udvp); - vn_lock(dvp, LK_EXCLUSIVE | LK_RETRY); - } else if (udvp != NULLVP) - vrele(udvp); - UNIONFS_INTERNAL_DEBUG("unionfs_mkdir: leave (%d)\n", error); return (error); @@ -1521,10 +1548,16 @@ */ if (error == 0 && VN_IS_DOOMED(uvp)) error = ENOENT; - if (error == 0) + if (error == 0) { + int udvp_lkflags, uvp_lkflags; + unionfs_forward_vop_start_pair(udvp, &udvp_lkflags, + uvp, &uvp_lkflags); error = VOP_RMDIR(udvp, uvp, cnp); + unionfs_forward_vop_finish_pair(ap->a_dvp, udvp, udvp_lkflags, + ap->a_vp, uvp, uvp_lkflags); + } } else if (lvp != NULLVP) - error = unionfs_mkwhiteout(udvp, cnp, td, + error = unionfs_mkwhiteout(ap->a_dvp, udvp, cnp, td, unp->un_path, unp->un_pathlen); if (error == 0) { @@ -1558,15 +1591,24 @@ udvp = dunp->un_uppervp; if (udvp != NULLVP) { + int udvp_lkflags; + bool uvp_created = false; + unionfs_forward_vop_start(udvp, &udvp_lkflags); error = VOP_SYMLINK(udvp, &uvp, cnp, ap->a_vap, ap->a_target); + if (error == 0) + uvp_created = true; + if (__predict_false(unionfs_forward_vop_finish(ap->a_dvp, udvp, + udvp_lkflags)) && error == 0) + error = ENOENT; if (error == 0) { VOP_UNLOCK(uvp); cnp->cn_lkflags = LK_EXCLUSIVE; error = unionfs_nodeget(ap->a_dvp->v_mount, uvp, NULLVP, ap->a_dvp, ap->a_vpp, cnp); - cnp->cn_lkflags = lkflags; vrele(uvp); - } + cnp->cn_lkflags = lkflags; + } else if (uvp_created) + vput(uvp); } UNIONFS_INTERNAL_DEBUG("unionfs_symlink: leave (%d)\n", error); @@ -2275,8 +2317,12 @@ uvp = unp->un_uppervp; } - if (uvp != NULLVP) + if (uvp != NULLVP) { + int lkflags; + unionfs_forward_vop_start(uvp, &lkflags); error = VOP_SETACL(uvp, ap->a_type, ap->a_aclp, ap->a_cred, td); + unionfs_forward_vop_finish(ap->a_vp, uvp, lkflags); + } UNIONFS_INTERNAL_DEBUG("unionfs_setacl: leave (%d)\n", error); @@ -2458,9 +2504,13 @@ ovp = uvp; } - if (ovp == uvp) + if (ovp == uvp) { + int lkflags; + unionfs_forward_vop_start(ovp, &lkflags); error = VOP_SETEXTATTR(ovp, ap->a_attrnamespace, ap->a_name, ap->a_uio, cred, td); + unionfs_forward_vop_finish(ap->a_vp, ovp, lkflags); + } unionfs_setextattr_abort: UNIONFS_INTERNAL_DEBUG("unionfs_setextattr: leave (%d)\n", error);