Page MenuHomeFreeBSD

D44076.id134963.diff
No OneTemporary

D44076.id134963.diff

diff --git a/sys/fs/unionfs/union.h b/sys/fs/unionfs/union.h
--- a/sys/fs/unionfs/union.h
+++ b/sys/fs/unionfs/union.h
@@ -144,8 +144,8 @@
struct vattr *, struct ucred *, struct thread *);
int unionfs_mkshadowdir(struct unionfs_mount *, struct vnode *,
struct unionfs_node *, struct componentname *, struct thread *);
-int unionfs_mkwhiteout(struct vnode *, struct componentname *,
- struct thread *, char *, int);
+int unionfs_mkwhiteout(struct vnode *, struct vnode *,
+ struct componentname *, struct thread *, char *, int);
int unionfs_relookup(struct vnode *, struct vnode **,
struct componentname *, struct componentname *, struct thread *,
char *, int, u_long);
@@ -155,6 +155,10 @@
struct thread *);
int unionfs_relookup_for_rename(struct vnode *, struct componentname *,
struct thread *);
+void unionfs_forward_vop_start(struct vnode *, int *,
+ struct vnode *, int *);
+bool unionfs_forward_vop_finish(struct vnode *, struct vnode *, int,
+ struct vnode *, struct vnode *, int);
#define UNIONFSVPTOLOWERVP(vp) (VTOUNIONFS(vp)->un_lowervp)
#define UNIONFSVPTOUPPERVP(vp) (VTOUNIONFS(vp)->un_uppervp)
diff --git a/sys/fs/unionfs/union_subr.c b/sys/fs/unionfs/union_subr.c
--- a/sys/fs/unionfs/union_subr.c
+++ b/sys/fs/unionfs/union_subr.c
@@ -936,14 +936,21 @@
*pathend = pathterm;
if (!error) {
- unionfs_node_update(unp, uvp, td);
-
/*
* XXX The bug which cannot set uid/gid was corrected.
* Ignore errors.
*/
va.va_type = VNON;
VOP_SETATTR(uvp, &va, nd.ni_cnd.cn_cred);
+
+ /*
+ * VOP_SETATTR() may transiently drop uvp's lock, so it's
+ * important to call it before unionfs_node_update() transfers
+ * the unionfs vnode's lock from lvp to uvp; otherwise the
+ * unionfs vnode itself would be transiently unlocked and
+ * potentially doomed.
+ */
+ unionfs_node_update(unp, uvp, td);
}
vn_finished_write(mp);
@@ -955,28 +962,172 @@
return (error);
}
+/*
+ * Prepare unionfs to issue a forwarded VOP to either the upper or lower
+ * FS. This should be used for any VOP which may drop the vnode lock;
+ * it is not required otherwise.
+ * The unionfs vnode shares its lock with the base-layer vnode(s); if the
+ * base FS must transiently drop its vnode lock, the unionfs vnode may
+ * effectively become unlocked. During that window, a concurrent forced
+ * unmount may doom the unionfs vnode, which leads to two significant
+ * issues:
+ * 1) Completion of, and return from, the unionfs VOP with the unionfs
+ * vnode completely unlocked. When the unionfs vnode becomes doomed
+ * it stops sharing its lock with the base vnode, so even if the
+ * forwarded VOP reacquires the base vnode lock the unionfs vnode
+ * lock will no longer be held. This can lead to violation of the
+ * caller's sychronization requirements as well as various failed
+ * locking assertions when DEBUG_VFS_LOCKS is enabled.
+ * 2) Loss of reference on the base vnode. The caller is expected to
+ * hold a v_usecount reference on the unionfs vnode, while the
+ * unionfs vnode holds a reference on the base-layer vnode(s). But
+ * these references are released when the unionfs vnode becomes
+ * doomed, violating the base layer's expectation that its caller
+ * must hold a reference to prevent vnode recycling.
+ *
+ * basevp1 and basevp2 represent two base-layer vnodes which are
+ * expected to be locked when this function is called. basevp2
+ * may be NULL, but if not NULL basevp1 and basevp2 should represent
+ * a parent directory and a filed linked to it, respectively.
+ * lkflags1 and lkflags2 are output parameters that will store the
+ * current lock status of basevp1 and basevp2, respectively. They
+ * are intended to be passed as the lkflags1 and lkflags2 parameters
+ * in the subsequent call to unionfs_forward_vop_finish(). lkflags2
+ * may be NULL iff basevp2 is NULL.
+ */
+void
+unionfs_forward_vop_start(struct vnode *basevp1, int *lkflags1,
+ struct vnode *basevp2, int *lkflags2)
+{
+ /*
+ * Take an additional reference on the base-layer vnodes to
+ * avoid loss of reference if the unionfs vnodes are doomed.
+ */
+ ASSERT_VOP_LOCKED(basevp1, __func__);
+ *lkflags1 = (VOP_ISLOCKED(basevp1) == LK_EXCLUSIVE ?
+ LK_EXCLUSIVE : LK_SHARED);
+ vref(basevp1);
+ if (basevp2 != NULL) {
+ ASSERT_VOP_LOCKED(basevp2, __func__);
+ *lkflags2 = (VOP_ISLOCKED(basevp2) == LK_EXCLUSIVE ?
+ LK_EXCLUSIVE : LK_SHARED);
+ vref(basevp2);
+ }
+}
+
+/*
+ * Indicate completion of a forwarded VOP previously prepared by
+ * unionfs_forward_vop_start().
+ * basevp1 and basevp2 must be the same values passed to the prior
+ * call to unionfs_forward_vop_start(). unionvp1 and unionvp2
+ * must be the unionfs vnodes that were initially above basevp1 and
+ * basevp2, respectively.
+ * basevp1 and basevp2 (if not NULL) must be locked when this function
+ * is called, while unionvp1 and/or unionvp2 may be unlocked if either
+ * unionfs vnode has become doomed.
+ * lkflags1 and lkflag2 represent the locking flags that should be
+ * used to re-lock unionvp1 and unionvp2, respectively, if either
+ * vnode has become doomed.
+ *
+ * Returns false if any unionfs vnode was found to be doomed, true
+ * otherwise.
+ */
+bool
+unionfs_forward_vop_finish(
+ struct vnode *unionvp1, struct vnode *basevp1, int lkflags1,
+ struct vnode *unionvp2, struct vnode *basevp2, int lkflags2)
+{
+ bool vp1_locked, vp2_locked;
+
+ vp1_locked = vp2_locked = true;
+
+ /*
+ * If either vnode is found to have been doomed, set
+ * a flag indicating that it needs to be re-locked.
+ * Otherwise, simply drop the base-vnode reference that
+ * was taken in unionfs_forward_vop_start().
+ */
+ if (__predict_false(VTOUNIONFS(unionvp1) == NULL)) {
+ if ((lkflags1 & LK_EXCLUSIVE) != 0)
+ ASSERT_VOP_ELOCKED(basevp1, __func__);
+ else
+ ASSERT_VOP_LOCKED(basevp1, __func__);
+ vp1_locked = false;
+ } else
+ vrele(basevp1);
+
+ if (__predict_false(unionvp2 != NULL &&
+ VTOUNIONFS(unionvp2) == NULL)) {
+ if ((lkflags2 & LK_EXCLUSIVE) != 0)
+ ASSERT_VOP_ELOCKED(basevp2, __func__);
+ else
+ ASSERT_VOP_LOCKED(basevp2, __func__);
+ vp2_locked = false;
+ } else if (unionvp2 != NULL)
+ vrele(basevp2);
+
+ /*
+ * If any of the unionfs vnodes need to be re-locked, that
+ * means the unionfs vnode's lock is now de-coupled from the
+ * corresponding base vnode. We therefore need to drop the
+ * base vnode lock (since nothing else will after this point),
+ * as well as release the reference taken in
+ * unionfs_forward_vop_start().
+ */
+ if (__predict_false(!vp1_locked && !vp2_locked))
+ VOP_VPUT_PAIR(basevp1, &basevp2, true);
+ else if (__predict_false(!vp1_locked)) {
+ /*
+ * If basevp1 needs to be unlocked, then we may not
+ * be able to safely unlock it with basevp2 still locked,
+ * for the same reason that an ordinary VFS call would
+ * need to use VOP_VPUT_PAIR() here. We might be able
+ * to use VOP_VPUT_PAIR(..., false) here, but then we
+ * would need to deal with the possibility of basevp2
+ * changing out from under us, which could result in
+ * either the unionfs vnode becoming doomed or its
+ * upper/lower vp no longer matching basevp2. Either
+ * scenario would require at least re-locking the unionfs
+ * vnode anyway.
+ */
+ if (unionvp2 != NULL) {
+ VOP_UNLOCK(unionvp2);
+ vp2_locked = false;
+ }
+ vput(basevp1);
+ } else if (__predict_false(!vp2_locked))
+ vput(basevp2);
+
+ if (__predict_false(!vp1_locked || !vp2_locked))
+ vn_lock_pair(unionvp1, vp1_locked, lkflags1,
+ unionvp2, vp2_locked, lkflags2);
+
+ return (vp1_locked && vp2_locked);
+}
+
/*
* Create a new whiteout.
*
- * dvp should be locked on entry and will be locked on return.
+ * udvp and dvp should be locked on entry and will be locked on return.
*/
int
-unionfs_mkwhiteout(struct vnode *dvp, struct componentname *cnp,
- struct thread *td, char *path, int pathlen)
+unionfs_mkwhiteout(struct vnode *dvp, struct vnode *udvp,
+ struct componentname *cnp, struct thread *td, char *path, int pathlen)
{
struct vnode *wvp;
struct nameidata nd;
struct mount *mp;
int error;
+ int lkflags;
wvp = NULLVP;
NDPREINIT(&nd);
- if ((error = unionfs_relookup(dvp, &wvp, cnp, &nd.ni_cnd, td, path,
+ if ((error = unionfs_relookup(udvp, &wvp, cnp, &nd.ni_cnd, td, path,
pathlen, CREATE))) {
return (error);
}
if (wvp != NULLVP) {
- if (dvp == wvp)
+ if (udvp == wvp)
vrele(wvp);
else
vput(wvp);
@@ -984,9 +1135,11 @@
return (EEXIST);
}
- if ((error = vn_start_write(dvp, &mp, V_WAIT | V_PCATCH)))
+ if ((error = vn_start_write(udvp, &mp, V_WAIT | V_PCATCH)))
goto unionfs_mkwhiteout_free_out;
- error = VOP_WHITEOUT(dvp, &nd.ni_cnd, CREATE);
+ unionfs_forward_vop_start(udvp, &lkflags, NULL, NULL);
+ error = VOP_WHITEOUT(udvp, &nd.ni_cnd, CREATE);
+ unionfs_forward_vop_finish(dvp, udvp, lkflags, NULL, NULL, 0);
vn_finished_write(mp);
diff --git a/sys/fs/unionfs/union_vnops.c b/sys/fs/unionfs/union_vnops.c
--- a/sys/fs/unionfs/union_vnops.c
+++ b/sys/fs/unionfs/union_vnops.c
@@ -369,21 +369,27 @@
error = EROFS;
if (udvp != NULLVP) {
+ int lkflags;
+ bool vp_created = false;
+ unionfs_forward_vop_start(udvp, &lkflags, NULL, NULL);
error = VOP_CREATE(udvp, &vp, cnp, ap->a_vap);
- if (error != 0)
- goto unionfs_create_abort;
-
- if (vp->v_type == VSOCK)
+ if (error == 0)
+ vp_created = true;
+ if (__predict_false(!unionfs_forward_vop_finish(ap->a_dvp, udvp,
+ lkflags, NULL, NULL, 0))) {
+ error = (error != 0 ? error : ENOENT);
+ }
+ if (error == 0 && vp->v_type == VSOCK)
*(ap->a_vpp) = vp;
- else {
+ else if (error == 0) {
VOP_UNLOCK(vp);
error = unionfs_nodeget(ap->a_dvp->v_mount, vp, NULLVP,
ap->a_dvp, ap->a_vpp, cnp);
vrele(vp);
- }
+ } else if (vp_created)
+ vput(vp);
}
-unionfs_create_abort:
UNIONFS_INTERNAL_DEBUG("unionfs_create: leave (%d)\n", error);
return (error);
@@ -407,11 +413,15 @@
error = EOPNOTSUPP;
if (udvp != NULLVP) {
+ int lkflags;
switch (ap->a_flags) {
case CREATE:
case DELETE:
case LOOKUP:
+ unionfs_forward_vop_start(udvp, &lkflags, NULL, NULL);
error = VOP_WHITEOUT(udvp, cnp, ap->a_flags);
+ unionfs_forward_vop_finish(ap->a_dvp, udvp, lkflags,
+ NULL, NULL, 0);
break;
default:
error = EINVAL;
@@ -443,21 +453,27 @@
error = EROFS;
if (udvp != NULLVP) {
+ int lkflags;
+ bool vp_created = false;
+ unionfs_forward_vop_start(udvp, &lkflags, NULL, NULL);
error = VOP_MKNOD(udvp, &vp, cnp, ap->a_vap);
- if (error != 0)
- goto unionfs_mknod_abort;
-
- if (vp->v_type == VSOCK)
+ if (error == 0)
+ vp_created = true;
+ if (__predict_false(!unionfs_forward_vop_finish(ap->a_dvp, udvp,
+ lkflags, NULL, NULL, 0))) {
+ error = (error != 0 ? error : ENOENT);
+ }
+ if (error == 0 && vp->v_type == VSOCK)
*(ap->a_vpp) = vp;
- else {
+ else if (error == 0) {
VOP_UNLOCK(vp);
error = unionfs_nodeget(ap->a_dvp->v_mount, vp, NULLVP,
ap->a_dvp, ap->a_vpp, cnp);
vrele(vp);
- }
+ } else if (vp_created)
+ vput(vp);
}
-unionfs_mknod_abort:
UNIONFS_INTERNAL_DEBUG("unionfs_mknod: leave (%d)\n", error);
return (error);
@@ -890,8 +906,13 @@
uvp = unp->un_uppervp;
}
- if (uvp != NULLVP)
+ if (uvp != NULLVP) {
+ int lkflags;
+ unionfs_forward_vop_start(uvp, &lkflags, NULL, NULL);
error = VOP_SETATTR(uvp, vap, ap->a_cred);
+ unionfs_forward_vop_finish(ap->a_vp, uvp, lkflags,
+ NULL, NULL, 0);
+ }
UNIONFS_INTERNAL_DEBUG("unionfs_setattr: leave (%d)\n", error);
@@ -925,6 +946,7 @@
struct unionfs_node *unp;
struct vnode *tvp;
int error;
+ int lkflags;
/* UNIONFS_INTERNAL_DEBUG("unionfs_write: enter\n"); */
@@ -933,7 +955,10 @@
unp = VTOUNIONFS(ap->a_vp);
tvp = (unp->un_uppervp != NULLVP ? unp->un_uppervp : unp->un_lowervp);
+ unionfs_forward_vop_start(tvp, &lkflags, NULL, NULL);
error = VOP_WRITE(tvp, ap->a_uio, ap->a_ioflag, ap->a_cred);
+ unionfs_forward_vop_finish(ap->a_vp, tvp, lkflags,
+ NULL, NULL, 0);
/* UNIONFS_INTERNAL_DEBUG("unionfs_write: leave (%d)\n", error); */
@@ -999,6 +1024,7 @@
struct unionfs_node_status *unsp;
struct vnode *ovp;
enum unionfs_lkupgrade lkstatus;
+ int error, lkflags;
KASSERT_UNIONFS_VNODE(ap->a_vp);
@@ -1017,7 +1043,12 @@
if (ovp == NULLVP)
return (EBADF);
- return (VOP_FSYNC(ovp, ap->a_waitfor, ap->a_td));
+ unionfs_forward_vop_start(ovp, &lkflags, NULL, NULL);
+ error = VOP_FSYNC(ovp, ap->a_waitfor, ap->a_td);
+ unionfs_forward_vop_finish(ap->a_vp, ovp, lkflags,
+ NULL, NULL, 0);
+
+ return (error);
}
static int
@@ -1046,6 +1077,7 @@
udvp = dunp->un_uppervp;
cnp = ap->a_cnp;
td = curthread;
+ unp = NULL;
if (ap->a_vp->v_op != &unionfs_vnodeops) {
if (ap->a_vp->v_type != VSOCK)
@@ -1095,12 +1127,17 @@
* XXX: if the vnode type is VSOCK, it will create whiteout
* after remove.
*/
+ int udvp_lkflags, uvp_lkflags;
if (ump == NULL || ump->um_whitemode == UNIONFS_WHITE_ALWAYS ||
lvp != NULLVP)
cnp->cn_flags |= DOWHITEOUT;
+ unionfs_forward_vop_start(udvp, &udvp_lkflags,
+ ((unp == NULL) ? NULL : uvp), &uvp_lkflags);
error = VOP_REMOVE(udvp, uvp, cnp);
+ unionfs_forward_vop_finish(ap->a_dvp, udvp, udvp_lkflags,
+ ((unp == NULL) ? NULL : ap->a_vp), uvp, uvp_lkflags);
} else if (lvp != NULLVP)
- error = unionfs_mkwhiteout(udvp, cnp, td, path, pathlen);
+ error = unionfs_mkwhiteout(ap->a_dvp, udvp, cnp, td, path, pathlen);
UNIONFS_INTERNAL_DEBUG("unionfs_remove: leave (%d)\n", error);
@@ -1156,8 +1193,14 @@
if (needrelookup != 0)
error = unionfs_relookup_for_create(ap->a_tdvp, cnp, td);
- if (error == 0)
+ if (error == 0) {
+ int udvp_lkflags, uvp_lkflags;
+ unionfs_forward_vop_start(udvp, &udvp_lkflags,
+ uvp, &uvp_lkflags);
error = VOP_LINK(udvp, uvp, cnp);
+ unionfs_forward_vop_finish(ap->a_tdvp, udvp, udvp_lkflags,
+ ap->a_vp, uvp, uvp_lkflags);
+ }
UNIONFS_INTERNAL_DEBUG("unionfs_link: leave (%d)\n", error);
@@ -1413,7 +1456,6 @@
udvp = dunp->un_uppervp;
if (udvp != NULLVP) {
- vref(udvp);
/* check opaque */
if (!(cnp->cn_flags & ISWHITEOUT)) {
error = VOP_GETATTR(udvp, &va, cnp->cn_cred);
@@ -1423,38 +1465,27 @@
cnp->cn_flags |= ISWHITEOUT;
}
- if ((error = VOP_MKDIR(udvp, &uvp, cnp, ap->a_vap)) == 0) {
+ int udvp_lkflags;
+ bool uvp_created = false;
+ unionfs_forward_vop_start(udvp, &udvp_lkflags, NULL, NULL);
+ error = VOP_MKDIR(udvp, &uvp, cnp, ap->a_vap);
+ if (error == 0)
+ uvp_created = true;
+ if (__predict_false(!unionfs_forward_vop_finish(dvp, udvp,
+ udvp_lkflags, NULL, NULL, 0)))
+ error = (error != 0 ? error : ENOENT);
+ if (error == 0) {
VOP_UNLOCK(uvp);
cnp->cn_lkflags = LK_EXCLUSIVE;
- /*
- * The underlying VOP_MKDIR() implementation may have
- * temporarily dropped the parent directory vnode lock.
- * Because the unionfs vnode ordinarily shares that
- * lock, this may allow the unionfs vnode to be reclaimed
- * and its lock field reset. In that case, the unionfs
- * vnode is effectively no longer locked, and we must
- * explicitly lock it before returning in order to meet
- * the locking requirements of VOP_MKDIR().
- */
- if (__predict_false(VTOUNIONFS(dvp) == NULL)) {
- error = ENOENT;
- goto unionfs_mkdir_cleanup;
- }
error = unionfs_nodeget(dvp->v_mount, uvp, NULLVP,
dvp, ap->a_vpp, cnp);
- cnp->cn_lkflags = lkflags;
vrele(uvp);
- }
+ cnp->cn_lkflags = lkflags;
+ } else if (uvp_created)
+ vput(uvp);
}
unionfs_mkdir_cleanup:
-
- if (__predict_false(VTOUNIONFS(dvp) == NULL)) {
- vput(udvp);
- vn_lock(dvp, LK_EXCLUSIVE | LK_RETRY);
- } else if (udvp != NULLVP)
- vrele(udvp);
-
UNIONFS_INTERNAL_DEBUG("unionfs_mkdir: leave (%d)\n", error);
return (error);
@@ -1521,10 +1552,16 @@
*/
if (error == 0 && VN_IS_DOOMED(uvp))
error = ENOENT;
- if (error == 0)
+ if (error == 0) {
+ int udvp_lkflags, uvp_lkflags;
+ unionfs_forward_vop_start(udvp, &udvp_lkflags,
+ uvp, &uvp_lkflags);
error = VOP_RMDIR(udvp, uvp, cnp);
+ unionfs_forward_vop_finish(ap->a_dvp, udvp, udvp_lkflags,
+ ap->a_vp, uvp, uvp_lkflags);
+ }
} else if (lvp != NULLVP)
- error = unionfs_mkwhiteout(udvp, cnp, td,
+ error = unionfs_mkwhiteout(ap->a_dvp, udvp, cnp, td,
unp->un_path, unp->un_pathlen);
if (error == 0) {
@@ -1558,15 +1595,24 @@
udvp = dunp->un_uppervp;
if (udvp != NULLVP) {
+ int udvp_lkflags;
+ bool uvp_created = false;
+ unionfs_forward_vop_start(udvp, &udvp_lkflags, NULL, NULL);
error = VOP_SYMLINK(udvp, &uvp, cnp, ap->a_vap, ap->a_target);
+ if (error == 0)
+ uvp_created = true;
+ if (__predict_false(!unionfs_forward_vop_finish(ap->a_dvp, udvp,
+ udvp_lkflags, NULL, NULL, 0)))
+ error = (error != 0 ? error : ENOENT);
if (error == 0) {
VOP_UNLOCK(uvp);
cnp->cn_lkflags = LK_EXCLUSIVE;
error = unionfs_nodeget(ap->a_dvp->v_mount, uvp, NULLVP,
ap->a_dvp, ap->a_vpp, cnp);
- cnp->cn_lkflags = lkflags;
vrele(uvp);
- }
+ cnp->cn_lkflags = lkflags;
+ } else if (uvp_created)
+ vput(uvp);
}
UNIONFS_INTERNAL_DEBUG("unionfs_symlink: leave (%d)\n", error);
@@ -2275,8 +2321,13 @@
uvp = unp->un_uppervp;
}
- if (uvp != NULLVP)
+ if (uvp != NULLVP) {
+ int lkflags;
+ unionfs_forward_vop_start(uvp, &lkflags, NULL, NULL);
error = VOP_SETACL(uvp, ap->a_type, ap->a_aclp, ap->a_cred, td);
+ unionfs_forward_vop_finish(ap->a_vp, uvp, lkflags,
+ NULL, NULL, 0);
+ }
UNIONFS_INTERNAL_DEBUG("unionfs_setacl: leave (%d)\n", error);
@@ -2458,9 +2509,14 @@
ovp = uvp;
}
- if (ovp == uvp)
+ if (ovp == uvp) {
+ int lkflags;
+ unionfs_forward_vop_start(ovp, &lkflags, NULL, NULL);
error = VOP_SETEXTATTR(ovp, ap->a_attrnamespace, ap->a_name,
ap->a_uio, cred, td);
+ unionfs_forward_vop_finish(ap->a_vp, ovp, lkflags,
+ NULL, NULL, 0);
+ }
unionfs_setextattr_abort:
UNIONFS_INTERNAL_DEBUG("unionfs_setextattr: leave (%d)\n", error);

File Metadata

Mime Type
text/plain
Expires
Fri, May 1, 7:36 AM (12 h, 38 m)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
32562673
Default Alt Text
D44076.id134963.diff (17 KB)

Event Timeline