diff --git a/sys/fs/unionfs/union.h b/sys/fs/unionfs/union.h index 467db3b29ff8..c535a3b288b8 100644 --- a/sys/fs/unionfs/union.h +++ b/sys/fs/unionfs/union.h @@ -1,191 +1,189 @@ /*- * SPDX-License-Identifier: BSD-3-Clause * * Copyright (c) 1994 The Regents of the University of California. * Copyright (c) 1994 Jan-Simon Pendry. * Copyright (c) 2005, 2006 Masanori Ozawa , ONGS Inc. * Copyright (c) 2006 Daichi Goto * All rights reserved. * * This code is derived from software donated to Berkeley by * Jan-Simon Pendry. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #ifdef _KERNEL /* copy method of attr from lower to upper */ typedef enum _unionfs_copymode { UNIONFS_TRADITIONAL = 0, UNIONFS_TRANSPARENT, UNIONFS_MASQUERADE } unionfs_copymode; /* whiteout policy of upper layer */ typedef enum _unionfs_whitemode { UNIONFS_WHITE_ALWAYS = 0, UNIONFS_WHITE_WHENNEEDED } unionfs_whitemode; struct unionfs_mount { struct mount *um_lowermp; /* MNT_REFed lower mount object */ struct mount *um_uppermp; /* MNT_REFed upper mount object */ struct vnode *um_lowervp; /* VREFed once */ struct vnode *um_uppervp; /* VREFed once */ struct vnode *um_rootvp; /* ROOT vnode */ struct mount_upper_node um_lower_link; /* node in lower FS list of uppers */ struct mount_upper_node um_upper_link; /* node in upper FS list of uppers */ unionfs_copymode um_copymode; unionfs_whitemode um_whitemode; uid_t um_uid; gid_t um_gid; u_short um_udir; u_short um_ufile; }; /* unionfs status list */ struct unionfs_node_status { LIST_ENTRY(unionfs_node_status) uns_list; /* Status list */ pid_t uns_pid; /* current process id */ int uns_node_flag; /* uns flag */ int uns_lower_opencnt; /* open count of lower */ int uns_upper_opencnt; /* open count of upper */ int uns_lower_openmode; /* open mode of lower */ int uns_readdir_status; /* read status of readdir */ }; /* union node status flags */ #define UNS_OPENL_4_READDIR 0x01 /* open lower layer for readdir */ /* A cache of vnode references */ struct unionfs_node { struct vnode *un_lowervp; /* lower side vnode */ struct vnode *un_uppervp; /* upper side vnode */ struct vnode *un_dvp; /* parent unionfs vnode */ struct vnode *un_vnode; /* Back pointer */ LIST_HEAD(, unionfs_node_status) un_unshead; /* unionfs status head */ LIST_HEAD(unionfs_node_hashhead, unionfs_node) *un_hashtbl; /* dir vnode hash table */ union { LIST_ENTRY(unionfs_node) un_hash; /* hash list entry */ STAILQ_ENTRY(unionfs_node) un_rele; /* deferred release list */ }; char *un_path; /* path */ int un_pathlen; /* strlen of path */ - int un_flag; /* unionfs node flag */ -}; -/* - * unionfs node flags - * It needs the vnode with exclusive lock, when changing the un_flag variable. - */ -#define UNIONFS_OPENEXTL 0x01 /* openextattr (lower) */ -#define UNIONFS_OPENEXTU 0x02 /* openextattr (upper) */ + /* + * unionfs node flags + * Changing these flags requires the vnode to be locked exclusive. + */ + #define UNIONFS_OPENEXTL 0x01 /* openextattr (lower) */ + #define UNIONFS_OPENEXTU 0x02 /* openextattr (upper) */ + #define UNIONFS_COPY_IN_PROGRESS 0x04 /* copy/dir shadow in progres */ + #define UNIONFS_LOOKUP_IN_PROGRESS 0x08 + unsigned int un_flag; /* unionfs node flag */ +}; extern struct vop_vector unionfs_vnodeops; static inline struct unionfs_node * unionfs_check_vnode(struct vnode *vp, const char *file __unused, int line __unused) { /* * unionfs_lock() needs the NULL check here, as it explicitly * handles the case in which the vnode has been vgonel()'ed. */ KASSERT(vp->v_op == &unionfs_vnodeops || vp->v_data == NULL, ("%s:%d: non-unionfs vnode %p", file, line, vp)); return ((struct unionfs_node *)vp->v_data); } #define MOUNTTOUNIONFSMOUNT(mp) ((struct unionfs_mount *)((mp)->mnt_data)) #define VTOUNIONFS(vp) unionfs_check_vnode(vp, __FILE__, __LINE__) #define UNIONFSTOV(xp) ((xp)->un_vnode) int unionfs_init(struct vfsconf *); int unionfs_uninit(struct vfsconf *); int unionfs_nodeget(struct mount *, struct vnode *, struct vnode *, struct vnode *, struct vnode **, struct componentname *); void unionfs_noderem(struct vnode *); void unionfs_get_node_status(struct unionfs_node *, struct thread *, struct unionfs_node_status **); void unionfs_tryrem_node_status(struct unionfs_node *, struct unionfs_node_status *); int unionfs_check_rmdir(struct vnode *, struct ucred *, struct thread *td); -int unionfs_copyfile(struct unionfs_node *, int, struct ucred *, +int unionfs_copyfile(struct vnode *, int, struct ucred *, struct thread *); void unionfs_create_uppervattr_core(struct unionfs_mount *, struct vattr *, struct vattr *, struct thread *); int unionfs_create_uppervattr(struct unionfs_mount *, struct vnode *, struct vattr *, struct ucred *, struct thread *); -int unionfs_mkshadowdir(struct unionfs_mount *, struct vnode *, - struct unionfs_node *, struct componentname *, struct thread *); +int unionfs_mkshadowdir(struct vnode *, struct vnode *, + struct componentname *, struct thread *); int unionfs_mkwhiteout(struct vnode *, struct vnode *, struct componentname *, struct thread *, char *, int); int unionfs_relookup(struct vnode *, struct vnode **, struct componentname *, struct componentname *, struct thread *, char *, int, u_long); -int unionfs_relookup_for_create(struct vnode *, struct componentname *, - struct thread *); -int unionfs_relookup_for_delete(struct vnode *, struct componentname *, - struct thread *); -int unionfs_relookup_for_rename(struct vnode *, struct componentname *, - struct thread *); void unionfs_forward_vop_start_pair(struct vnode *, int *, struct vnode *, int *); bool unionfs_forward_vop_finish_pair(struct vnode *, struct vnode *, int, struct vnode *, struct vnode *, int); +int unionfs_set_in_progress_flag(struct vnode *, unsigned int); +void unionfs_clear_in_progress_flag(struct vnode *, unsigned int); static inline void unionfs_forward_vop_start(struct vnode *basevp, int *lkflags) { unionfs_forward_vop_start_pair(basevp, lkflags, NULL, NULL); } static inline bool unionfs_forward_vop_finish(struct vnode *unionvp, struct vnode *basevp, int lkflags) { return (unionfs_forward_vop_finish_pair(unionvp, basevp, lkflags, NULL, NULL, 0)); } #define UNIONFSVPTOLOWERVP(vp) (VTOUNIONFS(vp)->un_lowervp) #define UNIONFSVPTOUPPERVP(vp) (VTOUNIONFS(vp)->un_uppervp) #ifdef MALLOC_DECLARE MALLOC_DECLARE(M_UNIONFSNODE); MALLOC_DECLARE(M_UNIONFSPATH); #endif #ifdef UNIONFS_DEBUG #define UNIONFSDEBUG(format, args...) printf(format ,## args) #else #define UNIONFSDEBUG(format, args...) #endif /* UNIONFS_DEBUG */ #endif /* _KERNEL */ diff --git a/sys/fs/unionfs/union_subr.c b/sys/fs/unionfs/union_subr.c index bb57f3d56ade..671322704dc5 100644 --- a/sys/fs/unionfs/union_subr.c +++ b/sys/fs/unionfs/union_subr.c @@ -1,1502 +1,1652 @@ /*- * SPDX-License-Identifier: BSD-3-Clause * * Copyright (c) 1994 Jan-Simon Pendry * Copyright (c) 1994 * The Regents of the University of California. All rights reserved. * Copyright (c) 2005, 2006, 2012 Masanori Ozawa , ONGS Inc. * Copyright (c) 2006, 2012 Daichi Goto * * This code is derived from software contributed to Berkeley by * Jan-Simon Pendry. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #define NUNIONFSNODECACHE 16 #define UNIONFSHASHMASK (NUNIONFSNODECACHE - 1) static MALLOC_DEFINE(M_UNIONFSHASH, "UNIONFS hash", "UNIONFS hash table"); MALLOC_DEFINE(M_UNIONFSNODE, "UNIONFS node", "UNIONFS vnode private part"); MALLOC_DEFINE(M_UNIONFSPATH, "UNIONFS path", "UNIONFS path private part"); static struct task unionfs_deferred_rele_task; static struct mtx unionfs_deferred_rele_lock; static STAILQ_HEAD(, unionfs_node) unionfs_deferred_rele_list = STAILQ_HEAD_INITIALIZER(unionfs_deferred_rele_list); static TASKQUEUE_DEFINE_THREAD(unionfs_rele); unsigned int unionfs_ndeferred = 0; SYSCTL_UINT(_vfs, OID_AUTO, unionfs_ndeferred, CTLFLAG_RD, &unionfs_ndeferred, 0, "unionfs deferred vnode release"); static void unionfs_deferred_rele(void *, int); /* * Initialize */ int unionfs_init(struct vfsconf *vfsp) { UNIONFSDEBUG("unionfs_init\n"); /* printed during system boot */ TASK_INIT(&unionfs_deferred_rele_task, 0, unionfs_deferred_rele, NULL); mtx_init(&unionfs_deferred_rele_lock, "uniondefr", NULL, MTX_DEF); return (0); } /* * Uninitialize */ int unionfs_uninit(struct vfsconf *vfsp) { taskqueue_quiesce(taskqueue_unionfs_rele); taskqueue_free(taskqueue_unionfs_rele); mtx_destroy(&unionfs_deferred_rele_lock); return (0); } static void unionfs_deferred_rele(void *arg __unused, int pending __unused) { STAILQ_HEAD(, unionfs_node) local_rele_list; struct unionfs_node *unp, *tunp; unsigned int ndeferred; ndeferred = 0; STAILQ_INIT(&local_rele_list); mtx_lock(&unionfs_deferred_rele_lock); STAILQ_CONCAT(&local_rele_list, &unionfs_deferred_rele_list); mtx_unlock(&unionfs_deferred_rele_lock); STAILQ_FOREACH_SAFE(unp, &local_rele_list, un_rele, tunp) { ++ndeferred; MPASS(unp->un_dvp != NULL); vrele(unp->un_dvp); free(unp, M_UNIONFSNODE); } /* We expect this function to be single-threaded, thus no atomic */ unionfs_ndeferred += ndeferred; } static struct unionfs_node_hashhead * unionfs_get_hashhead(struct vnode *dvp, struct vnode *lookup) { struct unionfs_node *unp; unp = VTOUNIONFS(dvp); return (&(unp->un_hashtbl[vfs_hash_index(lookup) & UNIONFSHASHMASK])); } /* * Attempt to lookup a cached unionfs vnode by upper/lower vp * from dvp, with dvp's interlock held. */ static struct vnode * unionfs_get_cached_vnode_locked(struct vnode *lookup, struct vnode *dvp) { struct unionfs_node *unp; struct unionfs_node_hashhead *hd; struct vnode *vp; hd = unionfs_get_hashhead(dvp, lookup); LIST_FOREACH(unp, hd, un_hash) { if (unp->un_uppervp == lookup || unp->un_lowervp == lookup) { vp = UNIONFSTOV(unp); VI_LOCK_FLAGS(vp, MTX_DUPOK); vp->v_iflag &= ~VI_OWEINACT; if (VN_IS_DOOMED(vp) || ((vp->v_iflag & VI_DOINGINACT) != 0)) { VI_UNLOCK(vp); vp = NULLVP; } else { vrefl(vp); VI_UNLOCK(vp); } return (vp); } } return (NULLVP); } /* * Get the cached vnode. */ static struct vnode * unionfs_get_cached_vnode(struct vnode *uvp, struct vnode *lvp, struct vnode *dvp) { struct vnode *vp; vp = NULLVP; VI_LOCK(dvp); if (uvp != NULLVP) vp = unionfs_get_cached_vnode_locked(uvp, dvp); else if (lvp != NULLVP) vp = unionfs_get_cached_vnode_locked(lvp, dvp); VI_UNLOCK(dvp); return (vp); } /* * Add the new vnode into cache. */ static struct vnode * unionfs_ins_cached_vnode(struct unionfs_node *uncp, struct vnode *dvp) { struct unionfs_node_hashhead *hd; struct vnode *vp; - ASSERT_VOP_ELOCKED(uncp->un_uppervp, __func__); - ASSERT_VOP_ELOCKED(uncp->un_lowervp, __func__); - KASSERT(uncp->un_uppervp == NULLVP || uncp->un_uppervp->v_type == VDIR, - ("%s: v_type != VDIR", __func__)); - KASSERT(uncp->un_lowervp == NULLVP || uncp->un_lowervp->v_type == VDIR, - ("%s: v_type != VDIR", __func__)); - vp = NULLVP; VI_LOCK(dvp); - if (uncp->un_uppervp != NULL) + if (uncp->un_uppervp != NULLVP) { + ASSERT_VOP_ELOCKED(uncp->un_uppervp, __func__); + KASSERT(uncp->un_uppervp->v_type == VDIR, + ("%s: v_type != VDIR", __func__)); vp = unionfs_get_cached_vnode_locked(uncp->un_uppervp, dvp); - else if (uncp->un_lowervp != NULL) + } else if (uncp->un_lowervp != NULLVP) { + ASSERT_VOP_ELOCKED(uncp->un_lowervp, __func__); + KASSERT(uncp->un_lowervp->v_type == VDIR, + ("%s: v_type != VDIR", __func__)); vp = unionfs_get_cached_vnode_locked(uncp->un_lowervp, dvp); + } if (vp == NULLVP) { hd = unionfs_get_hashhead(dvp, (uncp->un_uppervp != NULLVP ? uncp->un_uppervp : uncp->un_lowervp)); LIST_INSERT_HEAD(hd, uncp, un_hash); } VI_UNLOCK(dvp); return (vp); } /* * Remove the vnode. */ static void unionfs_rem_cached_vnode(struct unionfs_node *unp, struct vnode *dvp) { KASSERT(unp != NULL, ("%s: null node", __func__)); KASSERT(dvp != NULLVP, ("%s: null parent vnode", __func__)); VI_LOCK(dvp); if (unp->un_hash.le_prev != NULL) { LIST_REMOVE(unp, un_hash); unp->un_hash.le_next = NULL; unp->un_hash.le_prev = NULL; } VI_UNLOCK(dvp); } /* * Common cleanup handling for unionfs_nodeget * Upper, lower, and parent directory vnodes are expected to be referenced by * the caller. Upper and lower vnodes, if non-NULL, are also expected to be * exclusively locked by the caller. * This function will return with the caller's locks and references undone. */ static void unionfs_nodeget_cleanup(struct vnode *vp, struct unionfs_node *unp) { /* * Lock and reset the default vnode lock; vgone() expects a locked * vnode, and we're going to reset the vnode ops. */ lockmgr(&vp->v_lock, LK_EXCLUSIVE, NULL); /* * Clear out private data and reset the vnode ops to avoid use of * unionfs vnode ops on a partially constructed vnode. */ VI_LOCK(vp); vp->v_data = NULL; vp->v_vnlock = &vp->v_lock; vp->v_op = &dead_vnodeops; VI_UNLOCK(vp); vgone(vp); vput(vp); if (unp->un_dvp != NULLVP) vrele(unp->un_dvp); - if (unp->un_uppervp != NULLVP) + if (unp->un_uppervp != NULLVP) { vput(unp->un_uppervp); - if (unp->un_lowervp != NULLVP) + if (unp->un_lowervp != NULLVP) + vrele(unp->un_lowervp); + } else if (unp->un_lowervp != NULLVP) vput(unp->un_lowervp); if (unp->un_hashtbl != NULL) hashdestroy(unp->un_hashtbl, M_UNIONFSHASH, UNIONFSHASHMASK); free(unp->un_path, M_UNIONFSPATH); free(unp, M_UNIONFSNODE); } /* * Make a new or get existing unionfs node. * * uppervp and lowervp should be unlocked. Because if new unionfs vnode is * locked, uppervp or lowervp is locked too. In order to prevent dead lock, * you should not lock plurality simultaneously. */ int unionfs_nodeget(struct mount *mp, struct vnode *uppervp, struct vnode *lowervp, struct vnode *dvp, struct vnode **vpp, struct componentname *cnp) { char *path; struct unionfs_mount *ump; struct unionfs_node *unp; struct vnode *vp; u_long hashmask; int error; int lkflags; __enum_uint8(vtype) vt; error = 0; ump = MOUNTTOUNIONFSMOUNT(mp); lkflags = (cnp ? cnp->cn_lkflags : 0); path = (cnp ? cnp->cn_nameptr : NULL); *vpp = NULLVP; if (uppervp == NULLVP && lowervp == NULLVP) - panic("%s: upper and lower is null", __func__); + panic("%s: upper and lower are both null", __func__); vt = (uppervp != NULLVP ? uppervp->v_type : lowervp->v_type); /* If it has no ISLASTCN flag, path check is skipped. */ if (cnp && !(cnp->cn_flags & ISLASTCN)) path = NULL; /* check the cache */ if (dvp != NULLVP && vt == VDIR) { vp = unionfs_get_cached_vnode(uppervp, lowervp, dvp); if (vp != NULLVP) { *vpp = vp; - goto unionfs_nodeget_out; + if (lkflags != 0) + vn_lock(*vpp, lkflags | LK_RETRY); + return (0); } } unp = malloc(sizeof(struct unionfs_node), M_UNIONFSNODE, M_WAITOK | M_ZERO); error = getnewvnode("unionfs", mp, &unionfs_vnodeops, &vp); if (error != 0) { free(unp, M_UNIONFSNODE); return (error); } if (dvp != NULLVP) vref(dvp); if (uppervp != NULLVP) vref(uppervp); if (lowervp != NULLVP) vref(lowervp); if (vt == VDIR) { unp->un_hashtbl = hashinit(NUNIONFSNODECACHE, M_UNIONFSHASH, &hashmask); KASSERT(hashmask == UNIONFSHASHMASK, ("unexpected unionfs hash mask 0x%lx", hashmask)); } unp->un_vnode = vp; unp->un_uppervp = uppervp; unp->un_lowervp = lowervp; unp->un_dvp = dvp; if (uppervp != NULLVP) vp->v_vnlock = uppervp->v_vnlock; else vp->v_vnlock = lowervp->v_vnlock; if (path != NULL) { unp->un_path = malloc(cnp->cn_namelen + 1, M_UNIONFSPATH, M_WAITOK | M_ZERO); bcopy(cnp->cn_nameptr, unp->un_path, cnp->cn_namelen); unp->un_path[cnp->cn_namelen] = '\0'; unp->un_pathlen = cnp->cn_namelen; } vp->v_type = vt; vp->v_data = unp; /* * TODO: This is an imperfect check, as there's no guarantee that * the underlying filesystems will always return vnode pointers * for the root inodes that match our cached values. To reduce * the likelihood of failure, for example in the case where either * vnode has been forcibly doomed, we check both pointers and set * VV_ROOT if either matches. */ if (ump->um_uppervp == uppervp || ump->um_lowervp == lowervp) vp->v_vflag |= VV_ROOT; KASSERT(dvp != NULL || (vp->v_vflag & VV_ROOT) != 0, ("%s: NULL dvp for non-root vp %p", __func__, vp)); - vn_lock_pair(lowervp, false, LK_EXCLUSIVE, uppervp, false, - LK_EXCLUSIVE); + + /* + * NOTE: There is still a possibility for cross-filesystem locking here. + * If dvp has an upper FS component and is locked, while the new vnode + * created here only has a lower-layer FS component, then we will end + * up taking a lower-FS lock while holding an upper-FS lock. + * That situation could be dealt with here using vn_lock_pair(). + * However, that would only address one instance out of many in which + * a child vnode lock is taken while holding a lock on its parent + * directory. This is done in many places in common VFS code, as well as + * a few places within unionfs (which could lead to the same cross-FS + * locking issue if, for example, the upper FS is another nested unionfs + * instance). Additionally, it is unclear under what circumstances this + * specific lock sequence (a directory on one FS followed by a child of + * its 'peer' directory on another FS) would present the practical + * possibility of deadlock due to some other agent on the system + * attempting to lock those two specific vnodes in the opposite order. + */ + if (uppervp != NULLVP) + vn_lock(uppervp, LK_EXCLUSIVE | LK_RETRY); + else + vn_lock(lowervp, LK_EXCLUSIVE | LK_RETRY); error = insmntque1(vp, mp); if (error != 0) { unionfs_nodeget_cleanup(vp, unp); return (error); } - if (lowervp != NULL && VN_IS_DOOMED(lowervp)) { - vput(lowervp); - unp->un_lowervp = lowervp = NULL; - } - if (uppervp != NULL && VN_IS_DOOMED(uppervp)) { - vput(uppervp); - unp->un_uppervp = uppervp = NULL; - if (lowervp != NULLVP) - vp->v_vnlock = lowervp->v_vnlock; - } - if (lowervp == NULL && uppervp == NULL) { - unionfs_nodeget_cleanup(vp, unp); - return (ENOENT); - } + /* + * lowervp and uppervp should only be doomed by a forced unmount of + * their respective filesystems, but that can only happen if the + * unionfs instance is first unmounted. We also effectively hold the + * lock on the new unionfs vnode at this point. Therefore, if a + * unionfs umount has not yet reached the point at which the above + * insmntque1() would fail, then its vflush() call will end up + * blocked on our vnode lock, effectively also preventing unmount + * of the underlying filesystems. + */ + VNASSERT(lowervp == NULLVP || !VN_IS_DOOMED(lowervp), vp, + ("%s: doomed lowervp %p", __func__, lowervp)); + VNASSERT(uppervp == NULLVP || !VN_IS_DOOMED(uppervp), vp, + ("%s: doomed lowervp %p", __func__, uppervp)); vn_set_state(vp, VSTATE_CONSTRUCTED); if (dvp != NULLVP && vt == VDIR) *vpp = unionfs_ins_cached_vnode(unp, dvp); if (*vpp != NULLVP) { unionfs_nodeget_cleanup(vp, unp); - vp = *vpp; - } else { - if (uppervp != NULL) - VOP_UNLOCK(uppervp); - if (lowervp != NULL) - VOP_UNLOCK(lowervp); + if (lkflags != 0) + vn_lock(*vpp, lkflags | LK_RETRY); + return (0); + } else *vpp = vp; - } -unionfs_nodeget_out: - if (lkflags & LK_TYPE_MASK) - vn_lock(vp, lkflags | LK_RETRY); + if ((lkflags & LK_SHARED) != 0) + vn_lock(vp, LK_DOWNGRADE); + else if ((lkflags & LK_EXCLUSIVE) == 0) + VOP_UNLOCK(vp); return (0); } /* * Clean up the unionfs node. */ void unionfs_noderem(struct vnode *vp) { struct unionfs_node *unp, *unp_t1, *unp_t2; struct unionfs_node_hashhead *hd; struct unionfs_node_status *unsp, *unsp_tmp; struct vnode *lvp; struct vnode *uvp; struct vnode *dvp; int count; int writerefs; + bool unlock_lvp; /* * The root vnode lock may be recursed during unmount, because * it may share the same lock as the unionfs mount's covered vnode, * which is locked across VFS_UNMOUNT(). This lock will then be * recursively taken during the vflush() issued by unionfs_unmount(). * But we still only need to lock the unionfs lock once, because only * one of those lock operations was taken against a unionfs vnode and * will be undone against a unionfs vnode. */ KASSERT(vp->v_vnlock->lk_recurse == 0 || (vp->v_vflag & VV_ROOT) != 0, ("%s: vnode %p locked recursively", __func__, vp)); + + unp = VTOUNIONFS(vp); + VNASSERT(unp != NULL, vp, ("%s: already reclaimed", __func__)); + lvp = unp->un_lowervp; + uvp = unp->un_uppervp; + dvp = unp->un_dvp; + unlock_lvp = (uvp == NULLVP); + + /* + * Lock the lower vnode in addition to the upper vnode lock in order + * to synchronize against any unionfs_lock() operation which may still + * hold the lower vnode lock. We do not need to do this for the root + * vnode, as the root vnode should always have both upper and lower + * base vnodes for its entire lifecycled, so unionfs_lock() should + * never attempt to lock its lower vnode in the first place. + * Moreover, during unmount of a non-"below" unionfs mount, the lower + * root vnode will already be locked as it is the covered vnode. + */ + if (uvp != NULLVP && lvp != NULLVP && (vp->v_vflag & VV_ROOT) == 0) { + vn_lock_pair(uvp, true, LK_EXCLUSIVE, lvp, false, LK_EXCLUSIVE); + unlock_lvp = true; + } + if (lockmgr(&vp->v_lock, LK_EXCLUSIVE | LK_NOWAIT, NULL) != 0) panic("%s: failed to acquire lock for vnode lock", __func__); - /* * Use the interlock to protect the clearing of v_data to * prevent faults in unionfs_lock(). */ VI_LOCK(vp); - unp = VTOUNIONFS(vp); - lvp = unp->un_lowervp; - uvp = unp->un_uppervp; - dvp = unp->un_dvp; unp->un_lowervp = unp->un_uppervp = NULLVP; vp->v_vnlock = &(vp->v_lock); vp->v_data = NULL; vp->v_object = NULL; if (unp->un_hashtbl != NULL) { /* * Clear out any cached child vnodes. This should only * be necessary during forced unmount, when the vnode may * be reclaimed with a non-zero use count. Otherwise the * reference held by each child should prevent reclamation. */ for (count = 0; count <= UNIONFSHASHMASK; count++) { hd = unp->un_hashtbl + count; LIST_FOREACH_SAFE(unp_t1, hd, un_hash, unp_t2) { LIST_REMOVE(unp_t1, un_hash); unp_t1->un_hash.le_next = NULL; unp_t1->un_hash.le_prev = NULL; } } } VI_UNLOCK(vp); writerefs = atomic_load_int(&vp->v_writecount); VNASSERT(writerefs >= 0, vp, ("%s: write count %d, unexpected text ref", __func__, writerefs)); /* * If we were opened for write, we leased the write reference * to the lower vnode. If this is a reclamation due to the * forced unmount, undo the reference now. */ if (writerefs > 0) { VNASSERT(uvp != NULL, vp, ("%s: write reference without upper vnode", __func__)); VOP_ADD_WRITECOUNT(uvp, -writerefs); } - if (lvp != NULLVP) - VOP_UNLOCK(lvp); if (uvp != NULLVP) - VOP_UNLOCK(uvp); + vput(uvp); + if (unlock_lvp) + vput(lvp); + else if (lvp != NULLVP) + vrele(lvp); if (dvp != NULLVP) unionfs_rem_cached_vnode(unp, dvp); - if (lvp != NULLVP) - vrele(lvp); - if (uvp != NULLVP) - vrele(uvp); if (unp->un_path != NULL) { free(unp->un_path, M_UNIONFSPATH); unp->un_path = NULL; unp->un_pathlen = 0; } if (unp->un_hashtbl != NULL) { hashdestroy(unp->un_hashtbl, M_UNIONFSHASH, UNIONFSHASHMASK); } LIST_FOREACH_SAFE(unsp, &(unp->un_unshead), uns_list, unsp_tmp) { LIST_REMOVE(unsp, uns_list); free(unsp, M_TEMP); } if (dvp != NULLVP) { mtx_lock(&unionfs_deferred_rele_lock); STAILQ_INSERT_TAIL(&unionfs_deferred_rele_list, unp, un_rele); mtx_unlock(&unionfs_deferred_rele_lock); taskqueue_enqueue(taskqueue_unionfs_rele, &unionfs_deferred_rele_task); } else free(unp, M_UNIONFSNODE); } /* * Get the unionfs node status object for the vnode corresponding to unp, * for the process that owns td. Allocate a new status object if one * does not already exist. */ void unionfs_get_node_status(struct unionfs_node *unp, struct thread *td, struct unionfs_node_status **unspp) { struct unionfs_node_status *unsp; pid_t pid; pid = td->td_proc->p_pid; KASSERT(NULL != unspp, ("%s: NULL status", __func__)); ASSERT_VOP_ELOCKED(UNIONFSTOV(unp), __func__); LIST_FOREACH(unsp, &(unp->un_unshead), uns_list) { if (unsp->uns_pid == pid) { *unspp = unsp; return; } } /* create a new unionfs node status */ unsp = malloc(sizeof(struct unionfs_node_status), M_TEMP, M_WAITOK | M_ZERO); unsp->uns_pid = pid; LIST_INSERT_HEAD(&(unp->un_unshead), unsp, uns_list); *unspp = unsp; } /* * Remove the unionfs node status, if you can. * You need exclusive lock this vnode. */ void unionfs_tryrem_node_status(struct unionfs_node *unp, struct unionfs_node_status *unsp) { KASSERT(NULL != unsp, ("%s: NULL status", __func__)); ASSERT_VOP_ELOCKED(UNIONFSTOV(unp), __func__); if (0 < unsp->uns_lower_opencnt || 0 < unsp->uns_upper_opencnt) return; LIST_REMOVE(unsp, uns_list); free(unsp, M_TEMP); } /* * Create upper node attr. */ void unionfs_create_uppervattr_core(struct unionfs_mount *ump, struct vattr *lva, struct vattr *uva, struct thread *td) { VATTR_NULL(uva); uva->va_type = lva->va_type; uva->va_atime = lva->va_atime; uva->va_mtime = lva->va_mtime; uva->va_ctime = lva->va_ctime; switch (ump->um_copymode) { case UNIONFS_TRANSPARENT: uva->va_mode = lva->va_mode; uva->va_uid = lva->va_uid; uva->va_gid = lva->va_gid; break; case UNIONFS_MASQUERADE: if (ump->um_uid == lva->va_uid) { uva->va_mode = lva->va_mode & 077077; uva->va_mode |= (lva->va_type == VDIR ? ump->um_udir : ump->um_ufile) & 0700; uva->va_uid = lva->va_uid; uva->va_gid = lva->va_gid; } else { uva->va_mode = (lva->va_type == VDIR ? ump->um_udir : ump->um_ufile); uva->va_uid = ump->um_uid; uva->va_gid = ump->um_gid; } break; default: /* UNIONFS_TRADITIONAL */ uva->va_mode = 0777 & ~td->td_proc->p_pd->pd_cmask; uva->va_uid = ump->um_uid; uva->va_gid = ump->um_gid; break; } } /* * Create upper node attr. */ int unionfs_create_uppervattr(struct unionfs_mount *ump, struct vnode *lvp, struct vattr *uva, struct ucred *cred, struct thread *td) { struct vattr lva; int error; if ((error = VOP_GETATTR(lvp, &lva, cred))) return (error); unionfs_create_uppervattr_core(ump, &lva, uva, td); return (error); } /* * relookup * * dvp should be locked on entry and will be locked on return. * * If an error is returned, *vpp will be invalid, otherwise it will hold a * locked, referenced vnode. If *vpp == dvp then remember that only one * LK_EXCLUSIVE lock is held. */ int unionfs_relookup(struct vnode *dvp, struct vnode **vpp, struct componentname *cnp, struct componentname *cn, struct thread *td, char *path, int pathlen, u_long nameiop) { int error; bool refstart; cn->cn_namelen = pathlen; cn->cn_pnbuf = path; cn->cn_nameiop = nameiop; cn->cn_flags = (LOCKPARENT | LOCKLEAF | ISLASTCN); cn->cn_lkflags = LK_EXCLUSIVE; cn->cn_cred = cnp->cn_cred; cn->cn_nameptr = cn->cn_pnbuf; refstart = false; if (nameiop == DELETE) { cn->cn_flags |= (cnp->cn_flags & DOWHITEOUT); } else if (nameiop == RENAME) { refstart = true; } else if (nameiop == CREATE) { cn->cn_flags |= NOCACHE; } vref(dvp); VOP_UNLOCK(dvp); if ((error = vfs_relookup(dvp, vpp, cn, refstart))) { vn_lock(dvp, LK_EXCLUSIVE | LK_RETRY); } else vrele(dvp); KASSERT(cn->cn_pnbuf == path, ("%s: cn_pnbuf changed", __func__)); return (error); } -/* - * relookup for CREATE namei operation. - * - * dvp is unionfs vnode. dvp should be locked. - * - * If it called 'unionfs_copyfile' function by unionfs_link etc, - * VOP_LOOKUP information is broken. - * So it need relookup in order to create link etc. - */ -int -unionfs_relookup_for_create(struct vnode *dvp, struct componentname *cnp, - struct thread *td) -{ - struct vnode *udvp; - struct vnode *vp; - struct componentname cn; - int error; - - udvp = UNIONFSVPTOUPPERVP(dvp); - vp = NULLVP; - - error = unionfs_relookup(udvp, &vp, cnp, &cn, td, cnp->cn_nameptr, - cnp->cn_namelen, CREATE); - if (error) - return (error); - - if (vp != NULLVP) { - if (udvp == vp) - vrele(vp); - else - vput(vp); - - error = EEXIST; - } - - return (error); -} - -/* - * relookup for DELETE namei operation. - * - * dvp is unionfs vnode. dvp should be locked. - */ -int -unionfs_relookup_for_delete(struct vnode *dvp, struct componentname *cnp, - struct thread *td) -{ - struct vnode *udvp; - struct vnode *vp; - struct componentname cn; - int error; - - udvp = UNIONFSVPTOUPPERVP(dvp); - vp = NULLVP; - - error = unionfs_relookup(udvp, &vp, cnp, &cn, td, cnp->cn_nameptr, - cnp->cn_namelen, DELETE); - if (error) - return (error); - - if (vp == NULLVP) - error = ENOENT; - else { - if (udvp == vp) - vrele(vp); - else - vput(vp); - } - - return (error); -} - -/* - * relookup for RENAME namei operation. - * - * dvp is unionfs vnode. dvp should be locked. - */ -int -unionfs_relookup_for_rename(struct vnode *dvp, struct componentname *cnp, - struct thread *td) -{ - struct vnode *udvp; - struct vnode *vp; - struct componentname cn; - int error; - - udvp = UNIONFSVPTOUPPERVP(dvp); - vp = NULLVP; - - error = unionfs_relookup(udvp, &vp, cnp, &cn, td, cnp->cn_nameptr, - cnp->cn_namelen, RENAME); - if (error) - return (error); - - if (vp != NULLVP) { - if (udvp == vp) - vrele(vp); - else - vput(vp); - } - - return (error); -} - /* * Update the unionfs_node. * * uvp is new locked upper vnode. unionfs vnode's lock will be exchanged to the * uvp's lock and lower's lock will be unlocked. */ static void unionfs_node_update(struct unionfs_node *unp, struct vnode *uvp, struct thread *td) { struct unionfs_node_hashhead *hd; struct vnode *vp; struct vnode *lvp; struct vnode *dvp; unsigned count, lockrec; vp = UNIONFSTOV(unp); lvp = unp->un_lowervp; ASSERT_VOP_ELOCKED(lvp, __func__); ASSERT_VOP_ELOCKED(uvp, __func__); dvp = unp->un_dvp; VNASSERT(vp->v_writecount == 0, vp, ("%s: non-zero writecount", __func__)); /* * Update the upper vnode's lock state to match the lower vnode, * and then switch the unionfs vnode's lock to the upper vnode. */ lockrec = lvp->v_vnlock->lk_recurse; for (count = 0; count < lockrec; count++) vn_lock(uvp, LK_EXCLUSIVE | LK_CANRECURSE | LK_RETRY); VI_LOCK(vp); unp->un_uppervp = uvp; vp->v_vnlock = uvp->v_vnlock; VI_UNLOCK(vp); + for (count = 0; count < lockrec + 1; count++) + VOP_UNLOCK(lvp); /* * Re-cache the unionfs vnode against the upper vnode */ if (dvp != NULLVP && vp->v_type == VDIR) { VI_LOCK(dvp); if (unp->un_hash.le_prev != NULL) { LIST_REMOVE(unp, un_hash); hd = unionfs_get_hashhead(dvp, uvp); LIST_INSERT_HEAD(hd, unp, un_hash); } VI_UNLOCK(unp->un_dvp); } } +/* + * Mark a unionfs operation as being in progress, sleeping if the + * same operation is already in progress. + * This is useful, for example, during copy-up operations in which + * we may drop the target vnode lock, but we want to avoid the + * possibility of a concurrent copy-up on the same vnode triggering + * a spurious failure. + */ +int +unionfs_set_in_progress_flag(struct vnode *vp, unsigned int flag) +{ + struct unionfs_node *unp; + int error; + + error = 0; + ASSERT_VOP_ELOCKED(vp, __func__); + VI_LOCK(vp); + unp = VTOUNIONFS(vp); + while (error == 0 && (unp->un_flag & flag) != 0) { + VOP_UNLOCK(vp); + error = msleep(vp, VI_MTX(vp), PCATCH | PDROP, "unioncp", 0); + vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); + VI_LOCK(vp); + if (error == 0) { + /* + * If we waited on a concurrent copy-up and that + * copy-up was successful, return a non-fatal + * indication that the desired operation is already + * complete. If we waited on a concurrent lookup, + * return ERELOOKUP to indicate the VFS cache should + * be re-queried to avoid creating a duplicate unionfs + * vnode. + */ + unp = VTOUNIONFS(vp); + if (unp == NULL) + error = ENOENT; + else if (flag == UNIONFS_COPY_IN_PROGRESS && + unp->un_uppervp != NULLVP) + error = EJUSTRETURN; + else if (flag == UNIONFS_LOOKUP_IN_PROGRESS) + error = ERELOOKUP; + } + } + if (error == 0) + unp->un_flag |= flag; + VI_UNLOCK(vp); + + return (error); +} + +void +unionfs_clear_in_progress_flag(struct vnode *vp, unsigned int flag) +{ + struct unionfs_node *unp; + + ASSERT_VOP_ELOCKED(vp, __func__); + unp = VTOUNIONFS(vp); + VI_LOCK(vp); + if (unp != NULL) { + VNASSERT((unp->un_flag & flag) != 0, vp, + ("%s: copy not in progress", __func__)); + unp->un_flag &= ~flag; + } + wakeup(vp); + VI_UNLOCK(vp); +} + /* * Create a new shadow dir. * - * udvp should be locked on entry and will be locked on return. + * dvp and vp are unionfs vnodes representing a parent directory and + * child file, should be locked on entry, and will be locked on return. * * If no error returned, unp will be updated. */ int -unionfs_mkshadowdir(struct unionfs_mount *ump, struct vnode *udvp, - struct unionfs_node *unp, struct componentname *cnp, struct thread *td) +unionfs_mkshadowdir(struct vnode *dvp, struct vnode *vp, + struct componentname *cnp, struct thread *td) { struct vnode *lvp; struct vnode *uvp; + struct vnode *udvp; struct vattr va; struct vattr lva; struct nameidata nd; struct mount *mp; struct ucred *cred; struct ucred *credbk; struct uidinfo *rootinfo; + struct unionfs_mount *ump; + struct unionfs_node *dunp; + struct unionfs_node *unp; int error; + ASSERT_VOP_ELOCKED(dvp, __func__); + ASSERT_VOP_ELOCKED(vp, __func__); + ump = MOUNTTOUNIONFSMOUNT(vp->v_mount); + unp = VTOUNIONFS(vp); if (unp->un_uppervp != NULLVP) return (EEXIST); + dunp = VTOUNIONFS(dvp); + udvp = dunp->un_uppervp; + + error = unionfs_set_in_progress_flag(vp, UNIONFS_COPY_IN_PROGRESS); + if (error == EJUSTRETURN) + return (0); + else if (error != 0) + return (error); lvp = unp->un_lowervp; uvp = NULLVP; credbk = cnp->cn_cred; /* Authority change to root */ rootinfo = uifind((uid_t)0); cred = crdup(cnp->cn_cred); /* * The calls to chgproccnt() are needed to compensate for change_ruid() * calling chgproccnt(). */ chgproccnt(cred->cr_ruidinfo, 1, 0); change_euid(cred, rootinfo); change_ruid(cred, rootinfo); change_svuid(cred, (uid_t)0); uifree(rootinfo); cnp->cn_cred = cred; memset(&nd.ni_cnd, 0, sizeof(struct componentname)); NDPREINIT(&nd); if ((error = VOP_GETATTR(lvp, &lva, cnp->cn_cred))) - goto unionfs_mkshadowdir_abort; + goto unionfs_mkshadowdir_finish; + vref(udvp); + VOP_UNLOCK(vp); if ((error = unionfs_relookup(udvp, &uvp, cnp, &nd.ni_cnd, td, - cnp->cn_nameptr, cnp->cn_namelen, CREATE))) - goto unionfs_mkshadowdir_abort; + cnp->cn_nameptr, cnp->cn_namelen, CREATE))) { + /* + * When handling error cases here, we drop udvp's lock and + * then jump to exit code that relocks dvp, which in most + * cases will effectively relock udvp. However, this is + * not guaranteed to be the case, as various calls made + * here (such as unionfs_relookup() above and VOP_MKDIR() + * below) may unlock and then relock udvp, allowing dvp to + * be reclaimed in the meantime. In such a situation dvp + * will no longer share its lock with udvp. Since + * performance isn't a concern for these error cases, it + * makes more sense to reuse the common code that locks + * dvp on exit than to explicitly check for reclamation + * of dvp. + */ + vput(udvp); + goto unionfs_mkshadowdir_relock; + } if (uvp != NULLVP) { if (udvp == uvp) vrele(uvp); else vput(uvp); error = EEXIST; - goto unionfs_mkshadowdir_abort; + vput(udvp); + goto unionfs_mkshadowdir_relock; } - if ((error = vn_start_write(udvp, &mp, V_WAIT | V_PCATCH))) - goto unionfs_mkshadowdir_abort; + if ((error = vn_start_write(udvp, &mp, V_WAIT | V_PCATCH))) { + vput(udvp); + goto unionfs_mkshadowdir_relock; + } unionfs_create_uppervattr_core(ump, &lva, &va, td); /* * Temporarily NUL-terminate the current pathname component. * This function may be called during lookup operations in which * the current pathname component is not the leaf, meaning that * the NUL terminator is some distance beyond the end of the current * component. This *should* be fine, as cn_namelen will still * correctly indicate the length of only the current component, * but ZFS in particular does not respect cn_namelen in its VOP_MKDIR - * implementation + * implementation. * Note that this assumes nd.ni_cnd.cn_pnbuf was allocated by * something like a local namei() operation and the temporary * NUL-termination will not have an effect on other threads. */ char *pathend = &nd.ni_cnd.cn_nameptr[nd.ni_cnd.cn_namelen]; char pathterm = *pathend; *pathend = '\0'; error = VOP_MKDIR(udvp, &uvp, &nd.ni_cnd, &va); *pathend = pathterm; - - if (!error) { - /* - * XXX The bug which cannot set uid/gid was corrected. - * Ignore errors. - */ - va.va_type = VNON; - VOP_SETATTR(uvp, &va, nd.ni_cnd.cn_cred); - + if (error != 0) { /* - * VOP_SETATTR() may transiently drop uvp's lock, so it's - * important to call it before unionfs_node_update() transfers - * the unionfs vnode's lock from lvp to uvp; otherwise the - * unionfs vnode itself would be transiently unlocked and - * potentially doomed. + * See the comment after unionfs_relookup() above for an + * explanation of why we unlock udvp here only to relock + * dvp on exit. */ - unionfs_node_update(unp, uvp, td); + vput(udvp); + vn_finished_write(mp); + goto unionfs_mkshadowdir_relock; } + + /* + * XXX The bug which cannot set uid/gid was corrected. + * Ignore errors. + */ + va.va_type = VNON; + /* + * VOP_SETATTR() may transiently drop uvp's lock, so it's + * important to call it before unionfs_node_update() transfers + * the unionfs vnode's lock from lvp to uvp; otherwise the + * unionfs vnode itself would be transiently unlocked and + * potentially doomed. + */ + VOP_SETATTR(uvp, &va, nd.ni_cnd.cn_cred); + + /* + * uvp may become doomed during VOP_VPUT_PAIR() if the implementation + * must temporarily drop uvp's lock. However, since we hold a + * reference to uvp from the VOP_MKDIR() call above, this would require + * a forcible unmount of uvp's filesystem, which in turn can only + * happen if our unionfs instance is first forcibly unmounted. We'll + * therefore catch this case in the NULL check of unp below. + */ + VOP_VPUT_PAIR(udvp, &uvp, false); vn_finished_write(mp); + vn_lock_pair(vp, false, LK_EXCLUSIVE, uvp, true, LK_EXCLUSIVE); + unp = VTOUNIONFS(vp); + if (unp == NULL) { + vput(uvp); + error = ENOENT; + } else + unionfs_node_update(unp, uvp, td); + VOP_UNLOCK(vp); + +unionfs_mkshadowdir_relock: + vn_lock(dvp, LK_EXCLUSIVE | LK_RETRY); + vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); + if (error == 0 && (VN_IS_DOOMED(dvp) || VN_IS_DOOMED(vp))) + error = ENOENT; -unionfs_mkshadowdir_abort: +unionfs_mkshadowdir_finish: + unionfs_clear_in_progress_flag(vp, UNIONFS_COPY_IN_PROGRESS); cnp->cn_cred = credbk; chgproccnt(cred->cr_ruidinfo, -1, 0); crfree(cred); return (error); } static inline void unionfs_forward_vop_ref(struct vnode *basevp, int *lkflags) { ASSERT_VOP_LOCKED(basevp, __func__); *lkflags = VOP_ISLOCKED(basevp); vref(basevp); } /* * Prepare unionfs to issue a forwarded VOP to either the upper or lower * FS. This should be used for any VOP which may drop the vnode lock; * it is not required otherwise. * The unionfs vnode shares its lock with the base-layer vnode(s); if the * base FS must transiently drop its vnode lock, the unionfs vnode may * effectively become unlocked. During that window, a concurrent forced * unmount may doom the unionfs vnode, which leads to two significant * issues: * 1) Completion of, and return from, the unionfs VOP with the unionfs * vnode completely unlocked. When the unionfs vnode becomes doomed * it stops sharing its lock with the base vnode, so even if the * forwarded VOP reacquires the base vnode lock the unionfs vnode * lock will no longer be held. This can lead to violation of the * caller's sychronization requirements as well as various failed * locking assertions when DEBUG_VFS_LOCKS is enabled. * 2) Loss of reference on the base vnode. The caller is expected to * hold a v_usecount reference on the unionfs vnode, while the * unionfs vnode holds a reference on the base-layer vnode(s). But * these references are released when the unionfs vnode becomes * doomed, violating the base layer's expectation that its caller * must hold a reference to prevent vnode recycling. * * basevp1 and basevp2 represent two base-layer vnodes which are * expected to be locked when this function is called. basevp2 * may be NULL, but if not NULL basevp1 and basevp2 should represent * a parent directory and a filed linked to it, respectively. * lkflags1 and lkflags2 are output parameters that will store the * current lock status of basevp1 and basevp2, respectively. They * are intended to be passed as the lkflags1 and lkflags2 parameters * in the subsequent call to unionfs_forward_vop_finish_pair(). * lkflags2 may be NULL iff basevp2 is NULL. */ void unionfs_forward_vop_start_pair(struct vnode *basevp1, int *lkflags1, struct vnode *basevp2, int *lkflags2) { /* * Take an additional reference on the base-layer vnodes to * avoid loss of reference if the unionfs vnodes are doomed. */ unionfs_forward_vop_ref(basevp1, lkflags1); if (basevp2 != NULL) unionfs_forward_vop_ref(basevp2, lkflags2); } static inline bool unionfs_forward_vop_rele(struct vnode *unionvp, struct vnode *basevp, int lkflags) { bool unionvp_doomed; if (__predict_false(VTOUNIONFS(unionvp) == NULL)) { if ((lkflags & LK_EXCLUSIVE) != 0) ASSERT_VOP_ELOCKED(basevp, __func__); else ASSERT_VOP_LOCKED(basevp, __func__); unionvp_doomed = true; } else { vrele(basevp); unionvp_doomed = false; } return (unionvp_doomed); } /* * Indicate completion of a forwarded VOP previously prepared by * unionfs_forward_vop_start_pair(). * basevp1 and basevp2 must be the same values passed to the prior * call to unionfs_forward_vop_start_pair(). unionvp1 and unionvp2 * must be the unionfs vnodes that were initially above basevp1 and * basevp2, respectively. * basevp1 and basevp2 (if not NULL) must be locked when this function * is called, while unionvp1 and/or unionvp2 may be unlocked if either * unionfs vnode has become doomed. * lkflags1 and lkflag2 represent the locking flags that should be * used to re-lock unionvp1 and unionvp2, respectively, if either * vnode has become doomed. * * Returns true if any unionfs vnode was found to be doomed, false * otherwise. */ bool unionfs_forward_vop_finish_pair( struct vnode *unionvp1, struct vnode *basevp1, int lkflags1, struct vnode *unionvp2, struct vnode *basevp2, int lkflags2) { bool vp1_doomed, vp2_doomed; /* * If either vnode is found to have been doomed, set * a flag indicating that it needs to be re-locked. * Otherwise, simply drop the base-vnode reference that * was taken in unionfs_forward_vop_start(). */ vp1_doomed = unionfs_forward_vop_rele(unionvp1, basevp1, lkflags1); if (unionvp2 != NULL) vp2_doomed = unionfs_forward_vop_rele(unionvp2, basevp2, lkflags2); else vp2_doomed = false; /* * If any of the unionfs vnodes need to be re-locked, that * means the unionfs vnode's lock is now de-coupled from the * corresponding base vnode. We therefore need to drop the * base vnode lock (since nothing else will after this point), * and also release the reference taken in * unionfs_forward_vop_start_pair(). */ if (__predict_false(vp1_doomed && vp2_doomed)) VOP_VPUT_PAIR(basevp1, &basevp2, true); else if (__predict_false(vp1_doomed)) { /* * If basevp1 needs to be unlocked, then we may not * be able to safely unlock it with basevp2 still locked, * for the same reason that an ordinary VFS call would * need to use VOP_VPUT_PAIR() here. We might be able * to use VOP_VPUT_PAIR(..., false) here, but then we * would need to deal with the possibility of basevp2 * changing out from under us, which could result in * either the unionfs vnode becoming doomed or its * upper/lower vp no longer matching basevp2. Either * scenario would require at least re-locking the unionfs * vnode anyway. */ if (unionvp2 != NULL) { VOP_UNLOCK(unionvp2); vp2_doomed = true; } vput(basevp1); } else if (__predict_false(vp2_doomed)) vput(basevp2); if (__predict_false(vp1_doomed || vp2_doomed)) vn_lock_pair(unionvp1, !vp1_doomed, lkflags1, unionvp2, !vp2_doomed, lkflags2); return (vp1_doomed || vp2_doomed); } /* * Create a new whiteout. * - * udvp and dvp should be locked on entry and will be locked on return. + * dvp and vp are unionfs vnodes representing a parent directory and + * child file, should be locked on entry, and will be locked on return. */ int -unionfs_mkwhiteout(struct vnode *dvp, struct vnode *udvp, +unionfs_mkwhiteout(struct vnode *dvp, struct vnode *vp, struct componentname *cnp, struct thread *td, char *path, int pathlen) { + struct vnode *udvp; struct vnode *wvp; struct nameidata nd; struct mount *mp; int error; - int lkflags; + bool dvp_locked; + + ASSERT_VOP_ELOCKED(dvp, __func__); + ASSERT_VOP_ELOCKED(vp, __func__); + udvp = VTOUNIONFS(dvp)->un_uppervp; wvp = NULLVP; NDPREINIT(&nd); + vref(udvp); + VOP_UNLOCK(vp); if ((error = unionfs_relookup(udvp, &wvp, cnp, &nd.ni_cnd, td, path, pathlen, CREATE))) { - return (error); + goto unionfs_mkwhiteout_cleanup; } if (wvp != NULLVP) { if (udvp == wvp) vrele(wvp); else vput(wvp); - return (EEXIST); + if (nd.ni_cnd.cn_flags & ISWHITEOUT) + error = 0; + else + error = EEXIST; + goto unionfs_mkwhiteout_cleanup; } if ((error = vn_start_write(udvp, &mp, V_WAIT | V_PCATCH))) - goto unionfs_mkwhiteout_free_out; - unionfs_forward_vop_start(udvp, &lkflags); + goto unionfs_mkwhiteout_cleanup; error = VOP_WHITEOUT(udvp, &nd.ni_cnd, CREATE); - unionfs_forward_vop_finish(dvp, udvp, lkflags); - vn_finished_write(mp); -unionfs_mkwhiteout_free_out: +unionfs_mkwhiteout_cleanup: + if (VTOUNIONFS(dvp) == NULL) { + vput(udvp); + dvp_locked = false; + } else { + vrele(udvp); + dvp_locked = true; + } + vn_lock_pair(dvp, dvp_locked, LK_EXCLUSIVE, vp, false, LK_EXCLUSIVE); return (error); } /* * Create a new vnode for create a new shadow file. * * If an error is returned, *vpp will be invalid, otherwise it will hold a * locked, referenced and opened vnode. * * unp is never updated. */ static int unionfs_vn_create_on_upper(struct vnode **vpp, struct vnode *udvp, - struct unionfs_node *unp, struct vattr *uvap, struct thread *td) + struct vnode *vp, struct vattr *uvap, struct thread *td) { struct unionfs_mount *ump; - struct vnode *vp; + struct unionfs_node *unp; + struct vnode *uvp; struct vnode *lvp; struct ucred *cred; struct vattr lva; struct nameidata nd; int fmode; int error; + ASSERT_VOP_ELOCKED(vp, __func__); + unp = VTOUNIONFS(vp); ump = MOUNTTOUNIONFSMOUNT(UNIONFSTOV(unp)->v_mount); - vp = NULLVP; + uvp = NULLVP; lvp = unp->un_lowervp; cred = td->td_ucred; fmode = FFLAGS(O_WRONLY | O_CREAT | O_TRUNC | O_EXCL); error = 0; if ((error = VOP_GETATTR(lvp, &lva, cred)) != 0) return (error); unionfs_create_uppervattr_core(ump, &lva, uvap, td); if (unp->un_path == NULL) panic("%s: NULL un_path", __func__); nd.ni_cnd.cn_namelen = unp->un_pathlen; nd.ni_cnd.cn_pnbuf = unp->un_path; nd.ni_cnd.cn_nameiop = CREATE; nd.ni_cnd.cn_flags = LOCKPARENT | LOCKLEAF | ISLASTCN; nd.ni_cnd.cn_lkflags = LK_EXCLUSIVE; nd.ni_cnd.cn_cred = cred; nd.ni_cnd.cn_nameptr = nd.ni_cnd.cn_pnbuf; NDPREINIT(&nd); vref(udvp); - if ((error = vfs_relookup(udvp, &vp, &nd.ni_cnd, false)) != 0) - goto unionfs_vn_create_on_upper_free_out2; - vrele(udvp); + VOP_UNLOCK(vp); + if ((error = vfs_relookup(udvp, &uvp, &nd.ni_cnd, false)) != 0) { + vrele(udvp); + return (error); + } - if (vp != NULLVP) { - if (vp == udvp) - vrele(vp); + if (uvp != NULLVP) { + if (uvp == udvp) + vrele(uvp); else - vput(vp); + vput(uvp); error = EEXIST; - goto unionfs_vn_create_on_upper_free_out1; + goto unionfs_vn_create_on_upper_cleanup; } - if ((error = VOP_CREATE(udvp, &vp, &nd.ni_cnd, uvap)) != 0) - goto unionfs_vn_create_on_upper_free_out1; + if ((error = VOP_CREATE(udvp, &uvp, &nd.ni_cnd, uvap)) != 0) + goto unionfs_vn_create_on_upper_cleanup; - if ((error = VOP_OPEN(vp, fmode, cred, td, NULL)) != 0) { - vput(vp); - goto unionfs_vn_create_on_upper_free_out1; + if ((error = VOP_OPEN(uvp, fmode, cred, td, NULL)) != 0) { + vput(uvp); + goto unionfs_vn_create_on_upper_cleanup; } - error = VOP_ADD_WRITECOUNT(vp, 1); - CTR3(KTR_VFS, "%s: vp %p v_writecount increased to %d", - __func__, vp, vp->v_writecount); + error = VOP_ADD_WRITECOUNT(uvp, 1); + CTR3(KTR_VFS, "%s: newvp %p v_writecount increased to %d", + __func__, newvp, newvp->v_writecount); if (error == 0) { - *vpp = vp; + *vpp = uvp; } else { - VOP_CLOSE(vp, fmode, cred, td); + VOP_CLOSE(uvp, fmode, cred, td); } -unionfs_vn_create_on_upper_free_out1: - VOP_UNLOCK(udvp); - -unionfs_vn_create_on_upper_free_out2: - KASSERT(nd.ni_cnd.cn_pnbuf == unp->un_path, - ("%s: cn_pnbuf changed", __func__)); - +unionfs_vn_create_on_upper_cleanup: + vput(udvp); return (error); } /* * Copy from lvp to uvp. * * lvp and uvp should be locked and opened on entry and will be locked and * opened on return. */ static int unionfs_copyfile_core(struct vnode *lvp, struct vnode *uvp, struct ucred *cred, struct thread *td) { char *buf; struct uio uio; struct iovec iov; off_t offset; int count; int error; int bufoffset; error = 0; memset(&uio, 0, sizeof(uio)); uio.uio_td = td; uio.uio_segflg = UIO_SYSSPACE; uio.uio_offset = 0; buf = malloc(MAXBSIZE, M_TEMP, M_WAITOK); while (error == 0) { offset = uio.uio_offset; uio.uio_iov = &iov; uio.uio_iovcnt = 1; iov.iov_base = buf; iov.iov_len = MAXBSIZE; uio.uio_resid = iov.iov_len; uio.uio_rw = UIO_READ; if ((error = VOP_READ(lvp, &uio, 0, cred)) != 0) break; if ((count = MAXBSIZE - uio.uio_resid) == 0) break; bufoffset = 0; while (bufoffset < count) { uio.uio_iov = &iov; uio.uio_iovcnt = 1; iov.iov_base = buf + bufoffset; iov.iov_len = count - bufoffset; uio.uio_offset = offset + bufoffset; uio.uio_resid = iov.iov_len; uio.uio_rw = UIO_WRITE; if ((error = VOP_WRITE(uvp, &uio, 0, cred)) != 0) break; bufoffset += (count - bufoffset) - uio.uio_resid; } uio.uio_offset = offset + bufoffset; } free(buf, M_TEMP); return (error); } /* * Copy file from lower to upper. * * If you need copy of the contents, set 1 to docopy. Otherwise, set 0 to * docopy. + * + * vp is a unionfs vnode that should be locked on entry and will be + * locked on return. * * If no error returned, unp will be updated. */ int -unionfs_copyfile(struct unionfs_node *unp, int docopy, struct ucred *cred, +unionfs_copyfile(struct vnode *vp, int docopy, struct ucred *cred, struct thread *td) { + struct unionfs_node *unp; + struct unionfs_node *dunp; struct mount *mp; struct vnode *udvp; struct vnode *lvp; struct vnode *uvp; struct vattr uva; int error; + ASSERT_VOP_ELOCKED(vp, __func__); + unp = VTOUNIONFS(vp); lvp = unp->un_lowervp; uvp = NULLVP; if ((UNIONFSTOV(unp)->v_mount->mnt_flag & MNT_RDONLY)) return (EROFS); if (unp->un_dvp == NULLVP) return (EINVAL); if (unp->un_uppervp != NULLVP) return (EEXIST); - udvp = VTOUNIONFS(unp->un_dvp)->un_uppervp; + + udvp = NULLVP; + VI_LOCK(unp->un_dvp); + dunp = VTOUNIONFS(unp->un_dvp); + if (dunp != NULL) + udvp = dunp->un_uppervp; + VI_UNLOCK(unp->un_dvp); + if (udvp == NULLVP) return (EROFS); if ((udvp->v_mount->mnt_flag & MNT_RDONLY)) return (EROFS); + ASSERT_VOP_UNLOCKED(udvp, __func__); + + error = unionfs_set_in_progress_flag(vp, UNIONFS_COPY_IN_PROGRESS); + if (error == EJUSTRETURN) + return (0); + else if (error != 0) + return (error); error = VOP_ACCESS(lvp, VREAD, cred, td); if (error != 0) - return (error); + goto unionfs_copyfile_cleanup; if ((error = vn_start_write(udvp, &mp, V_WAIT | V_PCATCH)) != 0) - return (error); - error = unionfs_vn_create_on_upper(&uvp, udvp, unp, &uva, td); + goto unionfs_copyfile_cleanup; + error = unionfs_vn_create_on_upper(&uvp, udvp, vp, &uva, td); if (error != 0) { vn_finished_write(mp); - return (error); + vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); + goto unionfs_copyfile_cleanup; + } + + /* + * Note that it's still possible for e.g. VOP_WRITE to relock + * uvp below while holding vp[=lvp] locked. Replacing + * unionfs_copyfile_core with vn_generic_copy_file_range() will + * allow us to avoid the problem by moving this vn_lock_pair() + * call much later. + */ + vn_lock_pair(vp, false, LK_EXCLUSIVE, uvp, true, LK_EXCLUSIVE); + unp = VTOUNIONFS(vp); + if (unp == NULL) { + error = ENOENT; + goto unionfs_copyfile_cleanup; } if (docopy != 0) { error = VOP_OPEN(lvp, FREAD, cred, td, NULL); if (error == 0) { error = unionfs_copyfile_core(lvp, uvp, cred, td); VOP_CLOSE(lvp, FREAD, cred, td); } } VOP_CLOSE(uvp, FWRITE, cred, td); VOP_ADD_WRITECOUNT_CHECKED(uvp, -1); CTR3(KTR_VFS, "%s: vp %p v_writecount decreased to %d", __func__, uvp, uvp->v_writecount); vn_finished_write(mp); if (error == 0) { /* Reset the attributes. Ignore errors. */ uva.va_type = VNON; VOP_SETATTR(uvp, &uva, cred); + unionfs_node_update(unp, uvp, td); } - unionfs_node_update(unp, uvp, td); - +unionfs_copyfile_cleanup: + unionfs_clear_in_progress_flag(vp, UNIONFS_COPY_IN_PROGRESS); return (error); } /* - * It checks whether vp can rmdir. (check empty) + * Determine if the unionfs view of a directory is empty such that + * an rmdir operation can be permitted. + * + * We assume the VOP_RMDIR() against the upper layer vnode will take + * care of this check for us where the upper FS is concerned, so here + * we concentrate on the lower FS. We need to check for the presence + * of files other than "." and ".." in the lower FS directory and + * then cross-check any files we find against the upper FS to see if + * a whiteout is present (in which case we treat the lower file as + * non-present). * - * vp is unionfs vnode. - * vp should be locked. + * The logic here is based heavily on vn_dir_check_empty(). + * + * vp should be a locked unionfs node, and vp's lowervp should also be + * locked. */ int unionfs_check_rmdir(struct vnode *vp, struct ucred *cred, struct thread *td) { struct vnode *uvp; struct vnode *lvp; struct vnode *tvp; + char *dirbuf; + size_t dirbuflen, len; + off_t off; struct dirent *dp; - struct dirent *edp; struct componentname cn; - struct iovec iov; - struct uio uio; struct vattr va; int error; int eofflag; - int lookuperr; - - /* - * The size of buf needs to be larger than DIRBLKSIZ. - */ - char buf[256 * 6]; - - ASSERT_VOP_ELOCKED(vp, __func__); eofflag = 0; - uvp = UNIONFSVPTOUPPERVP(vp); lvp = UNIONFSVPTOLOWERVP(vp); + uvp = UNIONFSVPTOUPPERVP(vp); + + /* + * Note that the locking here still isn't ideal: We expect the caller + * to hold both the upper and lower layer locks as well as the upper + * parent directory lock, which it can do in a manner that avoids + * deadlock. However, if the cross-check logic below needs to call + * VOP_LOOKUP(), that may relock the upper vnode and lock any found + * child vnode in a way that doesn't protect against deadlock given + * the other held locks. Beyond that, the various other VOPs we issue + * below, such as VOP_OPEN() and VOP_READDIR(), may also re-lock the + * lower vnode. + * We might instead just handoff between the upper vnode lock + * (and its parent directory lock) and the lower vnode lock as needed, + * so that the lower lock is never held at the same time as the upper + * locks, but that opens up a wider window in which the upper + * directory (and also the lower directory if it isn't truly + * read-only) may change while the relevant lock is dropped. But + * since re-locking may happen here and open up such a window anyway, + * perhaps that is a worthwile tradeoff? Or perhaps we can ultimately + * do sufficient tracking of empty state within the unionfs vnode + * (in conjunction with upcalls from the lower FSes to notify us + * of out-of-band state changes) that we can avoid these costly checks + * altogether. + */ + ASSERT_VOP_LOCKED(lvp, __func__); + ASSERT_VOP_ELOCKED(uvp, __func__); - /* check opaque */ if ((error = VOP_GETATTR(uvp, &va, cred)) != 0) return (error); if (va.va_flags & OPAQUE) return (0); - /* open vnode */ #ifdef MAC - if ((error = mac_vnode_check_open(cred, vp, VEXEC|VREAD)) != 0) + if ((error = mac_vnode_check_open(cred, lvp, VEXEC | VREAD)) != 0) return (error); #endif - if ((error = VOP_ACCESS(vp, VEXEC|VREAD, cred, td)) != 0) + if ((error = VOP_ACCESS(lvp, VEXEC | VREAD, cred, td)) != 0) return (error); - if ((error = VOP_OPEN(vp, FREAD, cred, td, NULL)) != 0) + if ((error = VOP_OPEN(lvp, FREAD, cred, td, NULL)) != 0) + return (error); + if ((error = VOP_GETATTR(lvp, &va, cred)) != 0) return (error); - uio.uio_rw = UIO_READ; - uio.uio_segflg = UIO_SYSSPACE; - uio.uio_td = td; - uio.uio_offset = 0; + dirbuflen = max(DEV_BSIZE, GENERIC_MAXDIRSIZ); + if (dirbuflen < va.va_blocksize) + dirbuflen = va.va_blocksize; + dirbuf = malloc(dirbuflen, M_TEMP, M_WAITOK); -#ifdef MAC - error = mac_vnode_check_readdir(td->td_ucred, lvp); -#endif - while (!error && !eofflag) { - iov.iov_base = buf; - iov.iov_len = sizeof(buf); - uio.uio_iov = &iov; - uio.uio_iovcnt = 1; - uio.uio_resid = iov.iov_len; + len = 0; + off = 0; + eofflag = 0; - error = VOP_READDIR(lvp, &uio, cred, &eofflag, NULL, NULL); + for (;;) { + error = vn_dir_next_dirent(lvp, td, dirbuf, dirbuflen, + &dp, &len, &off, &eofflag); if (error != 0) break; - KASSERT(eofflag != 0 || uio.uio_resid < sizeof(buf), - ("%s: empty read from lower FS", __func__)); - - edp = (struct dirent*)&buf[sizeof(buf) - uio.uio_resid]; - for (dp = (struct dirent*)buf; !error && dp < edp; - dp = (struct dirent*)((caddr_t)dp + dp->d_reclen)) { - if (dp->d_type == DT_WHT || dp->d_fileno == 0 || - (dp->d_namlen == 1 && dp->d_name[0] == '.') || - (dp->d_namlen == 2 && !bcmp(dp->d_name, "..", 2))) - continue; - - cn.cn_namelen = dp->d_namlen; - cn.cn_pnbuf = NULL; - cn.cn_nameptr = dp->d_name; - cn.cn_nameiop = LOOKUP; - cn.cn_flags = LOCKPARENT | LOCKLEAF | RDONLY | ISLASTCN; - cn.cn_lkflags = LK_EXCLUSIVE; - cn.cn_cred = cred; - - /* - * check entry in lower. - * Sometimes, readdir function returns - * wrong entry. - */ - lookuperr = VOP_LOOKUP(lvp, &tvp, &cn); - if (!lookuperr) - vput(tvp); - else - continue; /* skip entry */ - - /* - * check entry - * If it has no exist/whiteout entry in upper, - * directory is not empty. - */ - cn.cn_flags = LOCKPARENT | LOCKLEAF | RDONLY | ISLASTCN; - lookuperr = VOP_LOOKUP(uvp, &tvp, &cn); + if (len == 0) { + /* EOF */ + error = 0; + break; + } - if (!lookuperr) - vput(tvp); + if (dp->d_type == DT_WHT) + continue; - /* ignore exist or whiteout entry */ - if (!lookuperr || - (lookuperr == ENOENT && (cn.cn_flags & ISWHITEOUT))) - continue; + /* + * Any file in the directory which is not '.' or '..' indicates + * the directory is not empty. + */ + switch (dp->d_namlen) { + case 2: + if (dp->d_name[1] != '.') { + /* Can't be '..' (nor '.') */ + break; + } + /* FALLTHROUGH */ + case 1: + if (dp->d_name[0] != '.') { + /* Can't be '..' nor '.' */ + break; + } + continue; + default: + break; + } + cn.cn_namelen = dp->d_namlen; + cn.cn_pnbuf = NULL; + cn.cn_nameptr = dp->d_name; + cn.cn_nameiop = LOOKUP; + cn.cn_flags = LOCKPARENT | LOCKLEAF | RDONLY | ISLASTCN; + cn.cn_lkflags = LK_EXCLUSIVE; + cn.cn_cred = cred; + + error = VOP_LOOKUP(uvp, &tvp, &cn); + if (tvp != NULLVP) + vput(tvp); + if (error != 0 && error != ENOENT && error != EJUSTRETURN) + break; + else if ((cn.cn_flags & ISWHITEOUT) == 0) { error = ENOTEMPTY; - } + break; + } else + error = 0; } - /* close vnode */ - VOP_CLOSE(vp, FREAD, cred, td); - + VOP_CLOSE(lvp, FREAD, cred, td); + free(dirbuf, M_TEMP); return (error); } - diff --git a/sys/fs/unionfs/union_vfsops.c b/sys/fs/unionfs/union_vfsops.c index cb55c2dd6474..9342317ad08e 100644 --- a/sys/fs/unionfs/union_vfsops.c +++ b/sys/fs/unionfs/union_vfsops.c @@ -1,588 +1,585 @@ /*- * SPDX-License-Identifier: BSD-3-Clause * * Copyright (c) 1994, 1995 The Regents of the University of California. * Copyright (c) 1994, 1995 Jan-Simon Pendry. * Copyright (c) 2005, 2006, 2012 Masanori Ozawa , ONGS Inc. * Copyright (c) 2006, 2012 Daichi Goto * All rights reserved. * * This code is derived from software donated to Berkeley by * Jan-Simon Pendry. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #include #include #include #include #include #include #include #include #include #include #include #include #include static MALLOC_DEFINE(M_UNIONFSMNT, "UNIONFS mount", "UNIONFS mount structure"); static vfs_fhtovp_t unionfs_fhtovp; static vfs_checkexp_t unionfs_checkexp; static vfs_mount_t unionfs_domount; static vfs_quotactl_t unionfs_quotactl; static vfs_root_t unionfs_root; static vfs_sync_t unionfs_sync; static vfs_statfs_t unionfs_statfs; static vfs_unmount_t unionfs_unmount; static vfs_vget_t unionfs_vget; static vfs_extattrctl_t unionfs_extattrctl; static struct vfsops unionfs_vfsops; /* * Mount unionfs layer. */ static int unionfs_domount(struct mount *mp) { struct vnode *lowerrootvp; struct vnode *upperrootvp; struct unionfs_mount *ump; char *target; char *tmp; char *ep; struct nameidata nd, *ndp; struct vattr va; unionfs_copymode copymode; unionfs_whitemode whitemode; int below; int error; int len; uid_t uid; gid_t gid; u_short udir; u_short ufile; UNIONFSDEBUG("unionfs_mount(mp = %p)\n", mp); error = 0; below = 0; uid = 0; gid = 0; udir = 0; ufile = 0; copymode = UNIONFS_TRANSPARENT; /* default */ whitemode = UNIONFS_WHITE_ALWAYS; ndp = &nd; if (mp->mnt_flag & MNT_ROOTFS) { vfs_mount_error(mp, "Cannot union mount root filesystem"); return (EOPNOTSUPP); } /* * Update is a no operation. */ if (mp->mnt_flag & MNT_UPDATE) { vfs_mount_error(mp, "unionfs does not support mount update"); return (EOPNOTSUPP); } /* * Get argument */ error = vfs_getopt(mp->mnt_optnew, "target", (void **)&target, &len); if (error) error = vfs_getopt(mp->mnt_optnew, "from", (void **)&target, &len); if (error || target[len - 1] != '\0') { vfs_mount_error(mp, "Invalid target"); return (EINVAL); } if (vfs_getopt(mp->mnt_optnew, "below", NULL, NULL) == 0) below = 1; if (vfs_getopt(mp->mnt_optnew, "udir", (void **)&tmp, NULL) == 0) { if (tmp != NULL) udir = (mode_t)strtol(tmp, &ep, 8); if (tmp == NULL || *ep) { vfs_mount_error(mp, "Invalid udir"); return (EINVAL); } udir &= S_IRWXU | S_IRWXG | S_IRWXO; } if (vfs_getopt(mp->mnt_optnew, "ufile", (void **)&tmp, NULL) == 0) { if (tmp != NULL) ufile = (mode_t)strtol(tmp, &ep, 8); if (tmp == NULL || *ep) { vfs_mount_error(mp, "Invalid ufile"); return (EINVAL); } ufile &= S_IRWXU | S_IRWXG | S_IRWXO; } /* check umask, uid and gid */ if (udir == 0 && ufile != 0) udir = ufile; if (ufile == 0 && udir != 0) ufile = udir; vn_lock(mp->mnt_vnodecovered, LK_SHARED | LK_RETRY); error = VOP_GETATTR(mp->mnt_vnodecovered, &va, mp->mnt_cred); if (!error) { if (udir == 0) udir = va.va_mode; if (ufile == 0) ufile = va.va_mode; uid = va.va_uid; gid = va.va_gid; } VOP_UNLOCK(mp->mnt_vnodecovered); if (error) return (error); if (mp->mnt_cred->cr_ruid == 0) { /* root only */ if (vfs_getopt(mp->mnt_optnew, "uid", (void **)&tmp, NULL) == 0) { if (tmp != NULL) uid = (uid_t)strtol(tmp, &ep, 10); if (tmp == NULL || *ep) { vfs_mount_error(mp, "Invalid uid"); return (EINVAL); } } if (vfs_getopt(mp->mnt_optnew, "gid", (void **)&tmp, NULL) == 0) { if (tmp != NULL) gid = (gid_t)strtol(tmp, &ep, 10); if (tmp == NULL || *ep) { vfs_mount_error(mp, "Invalid gid"); return (EINVAL); } } if (vfs_getopt(mp->mnt_optnew, "copymode", (void **)&tmp, NULL) == 0) { if (tmp == NULL) { vfs_mount_error(mp, "Invalid copymode"); return (EINVAL); } else if (strcasecmp(tmp, "traditional") == 0) copymode = UNIONFS_TRADITIONAL; else if (strcasecmp(tmp, "transparent") == 0) copymode = UNIONFS_TRANSPARENT; else if (strcasecmp(tmp, "masquerade") == 0) copymode = UNIONFS_MASQUERADE; else { vfs_mount_error(mp, "Invalid copymode"); return (EINVAL); } } if (vfs_getopt(mp->mnt_optnew, "whiteout", (void **)&tmp, NULL) == 0) { if (tmp == NULL) { vfs_mount_error(mp, "Invalid whiteout mode"); return (EINVAL); } else if (strcasecmp(tmp, "always") == 0) whitemode = UNIONFS_WHITE_ALWAYS; else if (strcasecmp(tmp, "whenneeded") == 0) whitemode = UNIONFS_WHITE_WHENNEEDED; else { vfs_mount_error(mp, "Invalid whiteout mode"); return (EINVAL); } } } /* If copymode is UNIONFS_TRADITIONAL, uid/gid is mounted user. */ if (copymode == UNIONFS_TRADITIONAL) { uid = mp->mnt_cred->cr_ruid; gid = mp->mnt_cred->cr_rgid; } UNIONFSDEBUG("unionfs_mount: uid=%d, gid=%d\n", uid, gid); UNIONFSDEBUG("unionfs_mount: udir=0%03o, ufile=0%03o\n", udir, ufile); UNIONFSDEBUG("unionfs_mount: copymode=%d\n", copymode); /* * Find upper node */ NDINIT(ndp, LOOKUP, FOLLOW | LOCKLEAF, UIO_SYSSPACE, target); if ((error = namei(ndp))) return (error); NDFREE_PNBUF(ndp); /* get root vnodes */ lowerrootvp = mp->mnt_vnodecovered; upperrootvp = ndp->ni_vp; KASSERT(lowerrootvp != NULL, ("%s: NULL lower root vp", __func__)); KASSERT(upperrootvp != NULL, ("%s: NULL upper root vp", __func__)); /* create unionfs_mount */ ump = malloc(sizeof(struct unionfs_mount), M_UNIONFSMNT, M_WAITOK | M_ZERO); /* * Save reference */ if (below) { VOP_UNLOCK(upperrootvp); vn_lock(lowerrootvp, LK_EXCLUSIVE | LK_RETRY); ump->um_lowervp = upperrootvp; ump->um_uppervp = lowerrootvp; } else { ump->um_lowervp = lowerrootvp; ump->um_uppervp = upperrootvp; } ump->um_rootvp = NULLVP; ump->um_uid = uid; ump->um_gid = gid; ump->um_udir = udir; ump->um_ufile = ufile; ump->um_copymode = copymode; ump->um_whitemode = whitemode; mp->mnt_data = ump; /* * Copy upper layer's RDONLY flag. */ mp->mnt_flag |= ump->um_uppervp->v_mount->mnt_flag & MNT_RDONLY; /* * Unlock the node */ VOP_UNLOCK(ump->um_uppervp); /* * Get the unionfs root vnode. */ error = unionfs_nodeget(mp, ump->um_uppervp, ump->um_lowervp, NULLVP, &(ump->um_rootvp), NULL); if (error != 0) { vrele(upperrootvp); free(ump, M_UNIONFSMNT); mp->mnt_data = NULL; return (error); } KASSERT(ump->um_rootvp != NULL, ("rootvp cannot be NULL")); KASSERT((ump->um_rootvp->v_vflag & VV_ROOT) != 0, ("%s: rootvp without VV_ROOT", __func__)); /* * Do not release the namei() reference on upperrootvp until after * we attempt to register the upper mounts. A concurrent unmount * of the upper or lower FS may have caused unionfs_nodeget() to * create a unionfs node with a NULL upper or lower vp and with * no reference held on upperrootvp or lowerrootvp. * vfs_register_upper() should subsequently fail, which is what * we want, but we must ensure neither underlying vnode can be * reused until that happens. We assume the caller holds a reference * to lowerrootvp as it is the mount's covered vnode. */ ump->um_lowermp = vfs_register_upper_from_vp(ump->um_lowervp, mp, &ump->um_lower_link); ump->um_uppermp = vfs_register_upper_from_vp(ump->um_uppervp, mp, &ump->um_upper_link); vrele(upperrootvp); if (ump->um_lowermp == NULL || ump->um_uppermp == NULL) { if (ump->um_lowermp != NULL) vfs_unregister_upper(ump->um_lowermp, &ump->um_lower_link); if (ump->um_uppermp != NULL) vfs_unregister_upper(ump->um_uppermp, &ump->um_upper_link); vflush(mp, 1, FORCECLOSE, curthread); free(ump, M_UNIONFSMNT); mp->mnt_data = NULL; return (ENOENT); } /* * Specify that the covered vnode lock should remain held while * lookup() performs the cross-mount walk. This prevents a lock-order * reversal between the covered vnode lock (which is also locked by * unionfs_lock()) and the mountpoint's busy count. Without this, * unmount will lock the covered vnode lock (directly through the * covered vnode) and wait for the busy count to drain, while a - * concurrent lookup will increment the busy count and then lock + * concurrent lookup will increment the busy count and then may lock * the covered vnode lock (indirectly through unionfs_lock()). * - * Note that we can't yet use this facility for the 'below' case - * in which the upper vnode is the covered vnode, because that would - * introduce a different LOR in which the cross-mount lookup would - * effectively hold the upper vnode lock before acquiring the lower - * vnode lock, while an unrelated lock operation would still acquire - * the lower vnode lock before the upper vnode lock, which is the - * order unionfs currently requires. + * Note that this is only needed for the 'below' case in which the + * upper vnode is also the covered vnode, because unionfs_lock() + * only locks the upper vnode as long as both lower and upper vnodes + * are present (which they will always be for the unionfs mount root). */ - if (!below) { + if (below) { vn_lock(mp->mnt_vnodecovered, LK_EXCLUSIVE | LK_RETRY | LK_CANRECURSE); mp->mnt_vnodecovered->v_vflag |= VV_CROSSLOCK; VOP_UNLOCK(mp->mnt_vnodecovered); } MNT_ILOCK(mp); if ((ump->um_lowermp->mnt_flag & MNT_LOCAL) != 0 && (ump->um_uppermp->mnt_flag & MNT_LOCAL) != 0) mp->mnt_flag |= MNT_LOCAL; mp->mnt_kern_flag |= MNTK_NOMSYNC | MNTK_UNIONFS | (ump->um_uppermp->mnt_kern_flag & MNTK_SHARED_WRITES); MNT_IUNLOCK(mp); /* * Get new fsid */ vfs_getnewfsid(mp); snprintf(mp->mnt_stat.f_mntfromname, MNAMELEN, "<%s>:%s", below ? "below" : "above", target); UNIONFSDEBUG("unionfs_mount: from %s, on %s\n", mp->mnt_stat.f_mntfromname, mp->mnt_stat.f_mntonname); return (0); } /* * Free reference to unionfs layer */ static int unionfs_unmount(struct mount *mp, int mntflags) { struct unionfs_mount *ump; int error; int num; int freeing; int flags; UNIONFSDEBUG("unionfs_unmount: mp = %p\n", mp); ump = MOUNTTOUNIONFSMOUNT(mp); flags = 0; if (mntflags & MNT_FORCE) flags |= FORCECLOSE; /* vflush (no need to call vrele) */ for (freeing = 0; (error = vflush(mp, 1, flags, curthread)) != 0;) { num = mp->mnt_nvnodelistsize; if (num == freeing) break; freeing = num; } if (error) return (error); vn_lock(mp->mnt_vnodecovered, LK_EXCLUSIVE | LK_RETRY | LK_CANRECURSE); mp->mnt_vnodecovered->v_vflag &= ~VV_CROSSLOCK; VOP_UNLOCK(mp->mnt_vnodecovered); vfs_unregister_upper(ump->um_lowermp, &ump->um_lower_link); vfs_unregister_upper(ump->um_uppermp, &ump->um_upper_link); free(ump, M_UNIONFSMNT); mp->mnt_data = NULL; return (0); } static int unionfs_root(struct mount *mp, int flags, struct vnode **vpp) { struct unionfs_mount *ump; struct vnode *vp; ump = MOUNTTOUNIONFSMOUNT(mp); vp = ump->um_rootvp; UNIONFSDEBUG("unionfs_root: rootvp=%p locked=%x\n", vp, VOP_ISLOCKED(vp)); vref(vp); if (flags & LK_TYPE_MASK) vn_lock(vp, flags); *vpp = vp; return (0); } static int unionfs_quotactl(struct mount *mp, int cmd, uid_t uid, void *arg, bool *mp_busy) { struct mount *uppermp; struct unionfs_mount *ump; int error; bool unbusy; ump = MOUNTTOUNIONFSMOUNT(mp); /* * Issue a volatile load of um_uppermp here, as the mount may be * torn down after we call vfs_unbusy(). */ uppermp = atomic_load_ptr(&ump->um_uppermp); KASSERT(*mp_busy == true, ("upper mount not busy")); /* * See comment in sys_quotactl() for an explanation of why the * lower mount needs to be busied by the caller of VFS_QUOTACTL() * but may be unbusied by the implementation. We must unbusy * the upper mount for the same reason; otherwise a namei lookup * issued by the VFS_QUOTACTL() implementation could traverse the * upper mount and deadlock. */ vfs_unbusy(mp); *mp_busy = false; unbusy = true; error = vfs_busy(uppermp, 0); /* * Writing is always performed to upper vnode. */ if (error == 0) error = VFS_QUOTACTL(uppermp, cmd, uid, arg, &unbusy); if (unbusy) vfs_unbusy(uppermp); return (error); } static int unionfs_statfs(struct mount *mp, struct statfs *sbp) { struct unionfs_mount *ump; struct statfs *mstat; uint64_t lbsize; int error; ump = MOUNTTOUNIONFSMOUNT(mp); UNIONFSDEBUG("unionfs_statfs(mp = %p, lvp = %p, uvp = %p)\n", mp, ump->um_lowervp, ump->um_uppervp); mstat = malloc(sizeof(struct statfs), M_STATFS, M_WAITOK | M_ZERO); error = VFS_STATFS(ump->um_lowermp, mstat); if (error) { free(mstat, M_STATFS); return (error); } /* now copy across the "interesting" information and fake the rest */ sbp->f_blocks = mstat->f_blocks; sbp->f_files = mstat->f_files; lbsize = mstat->f_bsize; error = VFS_STATFS(ump->um_uppermp, mstat); if (error) { free(mstat, M_STATFS); return (error); } /* * The FS type etc is copy from upper vfs. * (write able vfs have priority) */ sbp->f_type = mstat->f_type; sbp->f_flags = mstat->f_flags; sbp->f_bsize = mstat->f_bsize; sbp->f_iosize = mstat->f_iosize; if (mstat->f_bsize != lbsize) sbp->f_blocks = ((off_t)sbp->f_blocks * lbsize) / mstat->f_bsize; sbp->f_blocks += mstat->f_blocks; sbp->f_bfree = mstat->f_bfree; sbp->f_bavail = mstat->f_bavail; sbp->f_files += mstat->f_files; sbp->f_ffree = mstat->f_ffree; free(mstat, M_STATFS); return (0); } static int unionfs_sync(struct mount *mp, int waitfor) { /* nothing to do */ return (0); } static int unionfs_vget(struct mount *mp, ino_t ino, int flags, struct vnode **vpp) { return (EOPNOTSUPP); } static int unionfs_fhtovp(struct mount *mp, struct fid *fidp, int flags, struct vnode **vpp) { return (EOPNOTSUPP); } static int unionfs_checkexp(struct mount *mp, struct sockaddr *nam, uint64_t *extflagsp, struct ucred **credanonp, int *numsecflavors, int *secflavors) { return (EOPNOTSUPP); } static int unionfs_extattrctl(struct mount *mp, int cmd, struct vnode *filename_vp, int namespace, const char *attrname) { struct unionfs_mount *ump; struct unionfs_node *unp; ump = MOUNTTOUNIONFSMOUNT(mp); unp = VTOUNIONFS(filename_vp); if (unp->un_uppervp != NULLVP) { return (VFS_EXTATTRCTL(ump->um_uppermp, cmd, unp->un_uppervp, namespace, attrname)); } else { return (VFS_EXTATTRCTL(ump->um_lowermp, cmd, unp->un_lowervp, namespace, attrname)); } } static struct vfsops unionfs_vfsops = { .vfs_checkexp = unionfs_checkexp, .vfs_extattrctl = unionfs_extattrctl, .vfs_fhtovp = unionfs_fhtovp, .vfs_init = unionfs_init, .vfs_mount = unionfs_domount, .vfs_quotactl = unionfs_quotactl, .vfs_root = unionfs_root, .vfs_statfs = unionfs_statfs, .vfs_sync = unionfs_sync, .vfs_uninit = unionfs_uninit, .vfs_unmount = unionfs_unmount, .vfs_vget = unionfs_vget, }; VFS_SET(unionfs_vfsops, unionfs, VFCF_LOOPBACK); diff --git a/sys/fs/unionfs/union_vnops.c b/sys/fs/unionfs/union_vnops.c index aa2a7273825a..ae1d3946266d 100644 --- a/sys/fs/unionfs/union_vnops.c +++ b/sys/fs/unionfs/union_vnops.c @@ -1,2933 +1,3021 @@ /*- * SPDX-License-Identifier: BSD-3-Clause * * Copyright (c) 1992, 1993, 1994, 1995 Jan-Simon Pendry. * Copyright (c) 1992, 1993, 1994, 1995 * The Regents of the University of California. * Copyright (c) 2005, 2006, 2012 Masanori Ozawa , ONGS Inc. * Copyright (c) 2006, 2012 Daichi Goto * All rights reserved. * * This code is derived from software contributed to Berkeley by * Jan-Simon Pendry. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #if 0 #define UNIONFS_INTERNAL_DEBUG(msg, args...) printf(msg, ## args) #define UNIONFS_IDBG_RENAME #else #define UNIONFS_INTERNAL_DEBUG(msg, args...) #endif #define KASSERT_UNIONFS_VNODE(vp) \ VNASSERT(((vp)->v_op == &unionfs_vnodeops), vp, \ ("%s: non-unionfs vnode", __func__)) static int unionfs_lookup(struct vop_cachedlookup_args *ap) { struct unionfs_node *dunp, *unp; struct vnode *dvp, *udvp, *ldvp, *vp, *uvp, *lvp, *dtmpvp; struct vattr va; struct componentname *cnp; struct thread *td; u_long nameiop; - u_long cnflags, cnflagsbk; - int iswhiteout; + u_long cnflags; int lockflag; - int error , uerror, lerror; + int lkflags; + int error, uerror, lerror; - iswhiteout = 0; lockflag = 0; error = uerror = lerror = ENOENT; cnp = ap->a_cnp; nameiop = cnp->cn_nameiop; cnflags = cnp->cn_flags; dvp = ap->a_dvp; dunp = VTOUNIONFS(dvp); udvp = dunp->un_uppervp; ldvp = dunp->un_lowervp; vp = uvp = lvp = NULLVP; td = curthread; *(ap->a_vpp) = NULLVP; UNIONFS_INTERNAL_DEBUG( "unionfs_lookup: enter: nameiop=%ld, flags=%lx, path=%s\n", nameiop, cnflags, cnp->cn_nameptr); if (dvp->v_type != VDIR) return (ENOTDIR); /* * If read-only and op is not LOOKUP, will return EROFS. */ if ((cnflags & ISLASTCN) && (dvp->v_mount->mnt_flag & MNT_RDONLY) && LOOKUP != nameiop) return (EROFS); + /* + * Note that a lookup is in-flight, and block if another lookup + * is already in-flight against dvp. This is done because we may + * end up dropping dvp's lock to look up a lower vnode or to create + * a shadow directory, opening up the possibility of parallel lookups + * against the same directory creating duplicate unionfs vnodes for + * the same file(s). Note that if this function encounters an + * in-progress lookup for the directory, it will block until the + * lookup is complete and then return ERELOOKUP to allow any + * existing unionfs vnode to be loaded from the VFS cache. + * This is really a hack; filesystems that support MNTK_LOOKUP_SHARED + * (which unionfs currently doesn't) seem to deal with this by using + * the vfs_hash_* functions to manage a per-mount vnode cache keyed + * by the inode number (or some roughly equivalent unique ID + * usually assocated with the storage medium). It may make sense + * for unionfs to adopt something similar as a replacement for its + * current half-baked directory-only cache implementation, particularly + * if we want to support MNTK_LOOKUP_SHARED here. + */ + error = unionfs_set_in_progress_flag(dvp, UNIONFS_LOOKUP_IN_PROGRESS); + if (error != 0) + return (error); /* * lookup dotdot */ if (cnflags & ISDOTDOT) { - if (LOOKUP != nameiop && udvp == NULLVP) - return (EROFS); + if (LOOKUP != nameiop && udvp == NULLVP) { + error = EROFS; + goto unionfs_lookup_return; + } - if (udvp != NULLVP) { + if (udvp != NULLVP) dtmpvp = udvp; - if (ldvp != NULLVP) - VOP_UNLOCK(ldvp); - } else dtmpvp = ldvp; + unionfs_forward_vop_start(dtmpvp, &lkflags); error = VOP_LOOKUP(dtmpvp, &vp, cnp); + unionfs_forward_vop_finish(dvp, dtmpvp, lkflags); - if (dtmpvp == udvp && ldvp != NULLVP) { - VOP_UNLOCK(udvp); - vn_lock(dvp, LK_EXCLUSIVE | LK_RETRY); - dunp = VTOUNIONFS(dvp); - if (error == 0 && dunp == NULL) - error = ENOENT; - } + /* + * Drop the lock and reference on vp. If the lookup was + * successful, we'll either need to exchange vp's lock and + * reference for the unionfs parent vnode's lock and + * reference, or (if dvp was reclaimed) we'll need to drop + * vp's lock and reference to return early. + */ + if (vp != NULLVP) + vput(vp); + dunp = VTOUNIONFS(dvp); + if (error == 0 && dunp == NULL) + error = ENOENT; if (error == 0) { - /* - * Exchange lock and reference from vp to - * dunp->un_dvp. vp is upper/lower vnode, but it - * will need to return the unionfs vnode. - */ - if (nameiop == DELETE || nameiop == RENAME || - (cnp->cn_lkflags & LK_TYPE_MASK)) - VOP_UNLOCK(vp); - vrele(vp); - dtmpvp = dunp->un_dvp; vref(dtmpvp); VOP_UNLOCK(dvp); *(ap->a_vpp) = dtmpvp; - if (nameiop == DELETE || nameiop == RENAME) - vn_lock(dtmpvp, LK_EXCLUSIVE | LK_RETRY); - else if (cnp->cn_lkflags & LK_TYPE_MASK) - vn_lock(dtmpvp, cnp->cn_lkflags | - LK_RETRY); + vn_lock(dtmpvp, cnp->cn_lkflags | LK_RETRY); + if (VN_IS_DOOMED(dtmpvp)) { + vput(dtmpvp); + *(ap->a_vpp) = NULLVP; + error = ENOENT; + } vn_lock(dvp, LK_EXCLUSIVE | LK_RETRY); - } else if (error == ENOENT && (cnflags & MAKEENTRY) != 0) - cache_enter(dvp, NULLVP, cnp); + } - goto unionfs_lookup_return; + goto unionfs_lookup_cleanup; } + /* + * Lookup lower layer. We do this before looking up the the upper + * layer, as we may drop the upper parent directory's lock, and we + * want to ensure the upper parent remains locked from the point of + * lookup through any ensuing VOP that may require it to be locked. + * The cost of this is that we may end up performing an unnecessary + * lower layer lookup if a whiteout is present in the upper layer. + */ + if (ldvp != NULLVP && !(cnflags & DOWHITEOUT)) { + struct componentname lcn; + bool is_dot; + + if (udvp != NULLVP) { + vref(ldvp); + VOP_UNLOCK(dvp); + vn_lock(ldvp, LK_EXCLUSIVE | LK_RETRY); + } + + lcn = *cnp; + /* always op is LOOKUP */ + lcn.cn_nameiop = LOOKUP; + lcn.cn_flags = cnflags; + is_dot = false; + + if (udvp == NULLVP) + unionfs_forward_vop_start(ldvp, &lkflags); + lerror = VOP_LOOKUP(ldvp, &lvp, &lcn); + if (udvp == NULLVP && + unionfs_forward_vop_finish(dvp, ldvp, lkflags)) { + if (lvp != NULLVP) + VOP_UNLOCK(lvp); + error = ENOENT; + goto unionfs_lookup_cleanup; + } + + if (udvp == NULLVP) + cnp->cn_flags = lcn.cn_flags; + + if (lerror == 0) { + if (ldvp == lvp) { /* is dot */ + vrele(lvp); + *(ap->a_vpp) = dvp; + vref(dvp); + is_dot = true; + error = lerror; + } else if (lvp != NULLVP) + VOP_UNLOCK(lvp); + } + + if (udvp != NULLVP) { + vput(ldvp); + vn_lock(dvp, LK_EXCLUSIVE | LK_RETRY); + if (VN_IS_DOOMED(dvp)) + error = ENOENT; + } + if (is_dot) + goto unionfs_lookup_return; + else if (error != 0) + goto unionfs_lookup_cleanup; + } /* * lookup upper layer */ if (udvp != NULLVP) { + bool iswhiteout = false; + + unionfs_forward_vop_start(udvp, &lkflags); uerror = VOP_LOOKUP(udvp, &uvp, cnp); + if (unionfs_forward_vop_finish(dvp, udvp, lkflags)) { + if (uvp != NULLVP) + VOP_UNLOCK(uvp); + error = ENOENT; + goto unionfs_lookup_cleanup; + } if (uerror == 0) { if (udvp == uvp) { /* is dot */ + if (lvp != NULLVP) + vrele(lvp); vrele(uvp); *(ap->a_vpp) = dvp; vref(dvp); error = uerror; goto unionfs_lookup_return; - } - if (nameiop == DELETE || nameiop == RENAME || - (cnp->cn_lkflags & LK_TYPE_MASK)) + } else if (uvp != NULLVP) VOP_UNLOCK(uvp); } /* check whiteout */ - if (uerror == ENOENT || uerror == EJUSTRETURN) - if (cnp->cn_flags & ISWHITEOUT) - iswhiteout = 1; /* don't lookup lower */ - if (iswhiteout == 0 && ldvp != NULLVP) - if (!VOP_GETATTR(udvp, &va, cnp->cn_cred) && - (va.va_flags & OPAQUE)) - iswhiteout = 1; /* don't lookup lower */ + if ((uerror == ENOENT || uerror == EJUSTRETURN) && + (cnp->cn_flags & ISWHITEOUT)) + iswhiteout = true; + else if (VOP_GETATTR(udvp, &va, cnp->cn_cred) == 0 && + (va.va_flags & OPAQUE)) + iswhiteout = true; + + if (iswhiteout && lvp != NULLVP) { + vrele(lvp); + lvp = NULLVP; + } + #if 0 UNIONFS_INTERNAL_DEBUG( "unionfs_lookup: debug: whiteout=%d, path=%s\n", iswhiteout, cnp->cn_nameptr); #endif } - /* - * lookup lower layer - */ - if (ldvp != NULLVP && !(cnflags & DOWHITEOUT) && iswhiteout == 0) { - /* always op is LOOKUP */ - cnp->cn_nameiop = LOOKUP; - cnflagsbk = cnp->cn_flags; - cnp->cn_flags = cnflags; - - lerror = VOP_LOOKUP(ldvp, &lvp, cnp); - - cnp->cn_nameiop = nameiop; - if (udvp != NULLVP && (uerror == 0 || uerror == EJUSTRETURN)) - cnp->cn_flags = cnflagsbk; - - if (lerror == 0) { - if (ldvp == lvp) { /* is dot */ - if (uvp != NULLVP) - vrele(uvp); /* no need? */ - vrele(lvp); - *(ap->a_vpp) = dvp; - vref(dvp); - - UNIONFS_INTERNAL_DEBUG( - "unionfs_lookup: leave (%d)\n", lerror); - - return (lerror); - } - if (cnp->cn_lkflags & LK_TYPE_MASK) - VOP_UNLOCK(lvp); - } - } - /* * check lookup result */ if (uvp == NULLVP && lvp == NULLVP) { error = (udvp != NULLVP ? uerror : lerror); goto unionfs_lookup_return; } /* * check vnode type */ if (uvp != NULLVP && lvp != NULLVP && uvp->v_type != lvp->v_type) { vrele(lvp); lvp = NULLVP; } /* * check shadow dir */ if (uerror != 0 && uerror != EJUSTRETURN && udvp != NULLVP && lerror == 0 && lvp != NULLVP && lvp->v_type == VDIR && !(dvp->v_mount->mnt_flag & MNT_RDONLY) && (1 < cnp->cn_namelen || '.' != *(cnp->cn_nameptr))) { /* get unionfs vnode in order to create a new shadow dir. */ error = unionfs_nodeget(dvp->v_mount, NULLVP, lvp, dvp, &vp, cnp); if (error != 0) goto unionfs_lookup_cleanup; if (LK_SHARED == (cnp->cn_lkflags & LK_TYPE_MASK)) VOP_UNLOCK(vp); if (LK_EXCLUSIVE != VOP_ISLOCKED(vp)) { vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); lockflag = 1; } unp = VTOUNIONFS(vp); if (unp == NULL) error = ENOENT; else - error = unionfs_mkshadowdir(MOUNTTOUNIONFSMOUNT(dvp->v_mount), - udvp, unp, cnp, td); + error = unionfs_mkshadowdir(dvp, vp, cnp, td); if (lockflag != 0) VOP_UNLOCK(vp); if (error != 0) { UNIONFSDEBUG( "unionfs_lookup: Unable to create shadow dir."); if ((cnp->cn_lkflags & LK_TYPE_MASK) == LK_EXCLUSIVE) vput(vp); else vrele(vp); goto unionfs_lookup_cleanup; } + /* + * TODO: Since unionfs_mkshadowdir() relocks udvp after + * creating the new directory, return ERELOOKUP here? + */ if ((cnp->cn_lkflags & LK_TYPE_MASK) == LK_SHARED) vn_lock(vp, LK_SHARED | LK_RETRY); } /* * get unionfs vnode. */ else { if (uvp != NULLVP) error = uerror; else error = lerror; if (error != 0) goto unionfs_lookup_cleanup; error = unionfs_nodeget(dvp->v_mount, uvp, lvp, dvp, &vp, cnp); if (error != 0) { UNIONFSDEBUG( "unionfs_lookup: Unable to create unionfs vnode."); goto unionfs_lookup_cleanup; } - if ((nameiop == DELETE || nameiop == RENAME) && - (cnp->cn_lkflags & LK_TYPE_MASK) == 0) - vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); + } + + if (VN_IS_DOOMED(dvp) || VN_IS_DOOMED(vp)) { + error = ENOENT; + vput(vp); + goto unionfs_lookup_cleanup; } *(ap->a_vpp) = vp; if (cnflags & MAKEENTRY) cache_enter(dvp, vp, cnp); unionfs_lookup_cleanup: if (uvp != NULLVP) vrele(uvp); if (lvp != NULLVP) vrele(lvp); - if (error == ENOENT && (cnflags & MAKEENTRY) != 0) + if (error == ENOENT && (cnflags & MAKEENTRY) != 0 && + !VN_IS_DOOMED(dvp)) cache_enter(dvp, NULLVP, cnp); unionfs_lookup_return: + unionfs_clear_in_progress_flag(dvp, UNIONFS_LOOKUP_IN_PROGRESS); UNIONFS_INTERNAL_DEBUG("unionfs_lookup: leave (%d)\n", error); return (error); } static int unionfs_create(struct vop_create_args *ap) { struct unionfs_node *dunp; struct componentname *cnp; struct vnode *udvp; struct vnode *vp; int error; UNIONFS_INTERNAL_DEBUG("unionfs_create: enter\n"); KASSERT_UNIONFS_VNODE(ap->a_dvp); dunp = VTOUNIONFS(ap->a_dvp); cnp = ap->a_cnp; udvp = dunp->un_uppervp; error = EROFS; if (udvp != NULLVP) { int lkflags; bool vp_created = false; unionfs_forward_vop_start(udvp, &lkflags); error = VOP_CREATE(udvp, &vp, cnp, ap->a_vap); if (error == 0) vp_created = true; if (__predict_false(unionfs_forward_vop_finish(ap->a_dvp, udvp, lkflags)) && error == 0) { error = ENOENT; } if (error == 0) { VOP_UNLOCK(vp); error = unionfs_nodeget(ap->a_dvp->v_mount, vp, NULLVP, ap->a_dvp, ap->a_vpp, cnp); vrele(vp); } else if (vp_created) vput(vp); } UNIONFS_INTERNAL_DEBUG("unionfs_create: leave (%d)\n", error); return (error); } static int unionfs_whiteout(struct vop_whiteout_args *ap) { struct unionfs_node *dunp; struct componentname *cnp; struct vnode *udvp; int error; UNIONFS_INTERNAL_DEBUG("unionfs_whiteout: enter\n"); KASSERT_UNIONFS_VNODE(ap->a_dvp); dunp = VTOUNIONFS(ap->a_dvp); cnp = ap->a_cnp; udvp = dunp->un_uppervp; error = EOPNOTSUPP; if (udvp != NULLVP) { int lkflags; switch (ap->a_flags) { case CREATE: case DELETE: case LOOKUP: unionfs_forward_vop_start(udvp, &lkflags); error = VOP_WHITEOUT(udvp, cnp, ap->a_flags); unionfs_forward_vop_finish(ap->a_dvp, udvp, lkflags); break; default: error = EINVAL; break; } } UNIONFS_INTERNAL_DEBUG("unionfs_whiteout: leave (%d)\n", error); return (error); } static int unionfs_mknod(struct vop_mknod_args *ap) { struct unionfs_node *dunp; struct componentname *cnp; struct vnode *udvp; struct vnode *vp; int error; UNIONFS_INTERNAL_DEBUG("unionfs_mknod: enter\n"); KASSERT_UNIONFS_VNODE(ap->a_dvp); dunp = VTOUNIONFS(ap->a_dvp); cnp = ap->a_cnp; udvp = dunp->un_uppervp; error = EROFS; if (udvp != NULLVP) { int lkflags; bool vp_created = false; unionfs_forward_vop_start(udvp, &lkflags); error = VOP_MKNOD(udvp, &vp, cnp, ap->a_vap); if (error == 0) vp_created = true; if (__predict_false(unionfs_forward_vop_finish(ap->a_dvp, udvp, lkflags)) && error == 0) { error = ENOENT; } if (error == 0) { VOP_UNLOCK(vp); error = unionfs_nodeget(ap->a_dvp->v_mount, vp, NULLVP, ap->a_dvp, ap->a_vpp, cnp); vrele(vp); } else if (vp_created) vput(vp); } UNIONFS_INTERNAL_DEBUG("unionfs_mknod: leave (%d)\n", error); return (error); } enum unionfs_lkupgrade { UNIONFS_LKUPGRADE_SUCCESS, /* lock successfully upgraded */ UNIONFS_LKUPGRADE_ALREADY, /* lock already held exclusive */ UNIONFS_LKUPGRADE_DOOMED /* lock was upgraded, but vnode reclaimed */ }; static inline enum unionfs_lkupgrade unionfs_upgrade_lock(struct vnode *vp) { ASSERT_VOP_LOCKED(vp, __func__); if (VOP_ISLOCKED(vp) == LK_EXCLUSIVE) return (UNIONFS_LKUPGRADE_ALREADY); if (vn_lock(vp, LK_UPGRADE) != 0) { vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); if (VN_IS_DOOMED(vp)) return (UNIONFS_LKUPGRADE_DOOMED); } return (UNIONFS_LKUPGRADE_SUCCESS); } static inline void unionfs_downgrade_lock(struct vnode *vp, enum unionfs_lkupgrade status) { if (status != UNIONFS_LKUPGRADE_ALREADY) vn_lock(vp, LK_DOWNGRADE | LK_RETRY); } +/* + * Exchange the default (upper vnode) lock on a unionfs vnode for the lower + * vnode lock, in support of operations that require access to the lower vnode + * even when an upper vnode is present. We don't use vn_lock_pair() to hold + * both vnodes at the same time, primarily because the caller may proceed + * to issue VOPs to the lower layer which re-lock or perform other operations + * which may not be safe in the presence of a locked vnode from another FS. + * Moreover, vn_lock_pair()'s deadlock resolution approach can introduce + * additional overhead that isn't necessary on these paths. + * + * vp must be a locked unionfs vnode; the lock state of this vnode is + * returned through *lkflags for later use in unionfs_unlock_lvp(). + * + * Returns the locked lower vnode, or NULL if the lower vnode (and therefore + * also the unionfs vnode above it) has been doomed. + */ +static struct vnode * +unionfs_lock_lvp(struct vnode *vp, int *lkflags) +{ + struct unionfs_node *unp; + struct vnode *lvp; + + unp = VTOUNIONFS(vp); + lvp = unp->un_lowervp; + ASSERT_VOP_LOCKED(vp, __func__); + ASSERT_VOP_UNLOCKED(lvp, __func__); + *lkflags = VOP_ISLOCKED(vp); + vref(lvp); + VOP_UNLOCK(vp); + vn_lock(lvp, *lkflags | LK_RETRY); + if (VN_IS_DOOMED(lvp)) { + vput(lvp); + lvp = NULLVP; + vn_lock(vp, *lkflags | LK_RETRY); + } + return (lvp); +} + +/* + * Undo a previous call to unionfs_lock_lvp(), restoring the default lock + * on the unionfs vnode. This function reloads and returns the vnode + * private data for the unionfs vnode, which will be NULL if the unionfs + * vnode became doomed while its lock was dropped. The caller must check + * for this case. + */ +static struct unionfs_node * +unionfs_unlock_lvp(struct vnode *vp, struct vnode *lvp, int lkflags) +{ + ASSERT_VOP_LOCKED(lvp, __func__); + ASSERT_VOP_UNLOCKED(vp, __func__); + vput(lvp); + vn_lock(vp, lkflags | LK_RETRY); + return (VTOUNIONFS(vp)); +} + static int unionfs_open(struct vop_open_args *ap) { struct unionfs_node *unp; struct unionfs_node_status *unsp; struct vnode *vp; struct vnode *uvp; struct vnode *lvp; struct vnode *targetvp; struct ucred *cred; struct thread *td; int error; + int lkflags; enum unionfs_lkupgrade lkstatus; + bool lock_lvp, open_lvp; UNIONFS_INTERNAL_DEBUG("unionfs_open: enter\n"); KASSERT_UNIONFS_VNODE(ap->a_vp); error = 0; vp = ap->a_vp; targetvp = NULLVP; cred = ap->a_cred; td = ap->a_td; + open_lvp = lock_lvp = false; /* * The executable loader path may call this function with vp locked * shared. If the vnode is reclaimed while upgrading, we can't safely * use unp or do anything else unionfs- specific. */ lkstatus = unionfs_upgrade_lock(vp); if (lkstatus == UNIONFS_LKUPGRADE_DOOMED) { error = ENOENT; goto unionfs_open_cleanup; } unp = VTOUNIONFS(vp); uvp = unp->un_uppervp; lvp = unp->un_lowervp; unionfs_get_node_status(unp, td, &unsp); if (unsp->uns_lower_opencnt > 0 || unsp->uns_upper_opencnt > 0) { /* vnode is already opend. */ if (unsp->uns_upper_opencnt > 0) targetvp = uvp; else targetvp = lvp; if (targetvp == lvp && (ap->a_mode & FWRITE) && lvp->v_type == VREG) targetvp = NULLVP; } if (targetvp == NULLVP) { if (uvp == NULLVP) { if ((ap->a_mode & FWRITE) && lvp->v_type == VREG) { - error = unionfs_copyfile(unp, + error = unionfs_copyfile(vp, !(ap->a_mode & O_TRUNC), cred, td); - if (error != 0) + if (error != 0) { + unp = VTOUNIONFS(vp); goto unionfs_open_abort; + } targetvp = uvp = unp->un_uppervp; } else targetvp = lvp; } else targetvp = uvp; } + if (targetvp == uvp && uvp->v_type == VDIR && lvp != NULLVP && + unsp->uns_lower_opencnt <= 0) + open_lvp = true; + else if (targetvp == lvp && uvp != NULLVP) + lock_lvp = true; + + if (lock_lvp) { + unp = NULL; + lvp = unionfs_lock_lvp(vp, &lkflags); + if (lvp == NULLVP) { + error = ENOENT; + goto unionfs_open_abort; + } + } else + unionfs_forward_vop_start(targetvp, &lkflags); + error = VOP_OPEN(targetvp, ap->a_mode, cred, td, ap->a_fp); - if (error == 0) { - if (targetvp == uvp) { - if (uvp->v_type == VDIR && lvp != NULLVP && - unsp->uns_lower_opencnt <= 0) { - /* open lower for readdir */ - error = VOP_OPEN(lvp, FREAD, cred, td, NULL); - if (error != 0) { - VOP_CLOSE(uvp, ap->a_mode, cred, td); - goto unionfs_open_abort; - } - unsp->uns_node_flag |= UNS_OPENL_4_READDIR; - unsp->uns_lower_opencnt++; + + if (lock_lvp) { + unp = unionfs_unlock_lvp(vp, lvp, lkflags); + if (unp == NULL && error == 0) + error = ENOENT; + } else if (unionfs_forward_vop_finish(vp, targetvp, lkflags)) + error = error ? error : ENOENT; + + if (error != 0) + goto unionfs_open_abort; + + if (targetvp == uvp) { + if (open_lvp) { + unp = NULL; + lvp = unionfs_lock_lvp(vp, &lkflags); + if (lvp == NULLVP) { + error = ENOENT; + goto unionfs_open_abort; } - unsp->uns_upper_opencnt++; - } else { + /* open lower for readdir */ + error = VOP_OPEN(lvp, FREAD, cred, td, NULL); + unp = unionfs_unlock_lvp(vp, lvp, lkflags); + if (unp == NULL) { + error = error ? error : ENOENT; + goto unionfs_open_abort; + } + if (error != 0) { + unionfs_forward_vop_start(uvp, &lkflags); + VOP_CLOSE(uvp, ap->a_mode, cred, td); + if (unionfs_forward_vop_finish(vp, uvp, lkflags)) + unp = NULL; + goto unionfs_open_abort; + } + unsp->uns_node_flag |= UNS_OPENL_4_READDIR; unsp->uns_lower_opencnt++; - unsp->uns_lower_openmode = ap->a_mode; } - vp->v_object = targetvp->v_object; + unsp->uns_upper_opencnt++; + } else { + unsp->uns_lower_opencnt++; + unsp->uns_lower_openmode = ap->a_mode; } + vp->v_object = targetvp->v_object; unionfs_open_abort: - if (error != 0) + + if (error != 0 && unp != NULL) unionfs_tryrem_node_status(unp, unsp); unionfs_open_cleanup: unionfs_downgrade_lock(vp, lkstatus); UNIONFS_INTERNAL_DEBUG("unionfs_open: leave (%d)\n", error); return (error); } static int unionfs_close(struct vop_close_args *ap) { struct unionfs_node *unp; struct unionfs_node_status *unsp; struct ucred *cred; struct thread *td; struct vnode *vp; + struct vnode *uvp; + struct vnode *lvp; struct vnode *ovp; int error; + int lkflags; enum unionfs_lkupgrade lkstatus; + bool lock_lvp; UNIONFS_INTERNAL_DEBUG("unionfs_close: enter\n"); KASSERT_UNIONFS_VNODE(ap->a_vp); vp = ap->a_vp; cred = ap->a_cred; td = ap->a_td; error = 0; + lock_lvp = false; /* * If the vnode is reclaimed while upgrading, we can't safely use unp * or do anything else unionfs- specific. */ lkstatus = unionfs_upgrade_lock(vp); if (lkstatus == UNIONFS_LKUPGRADE_DOOMED) goto unionfs_close_cleanup; unp = VTOUNIONFS(vp); + lvp = unp->un_lowervp; + uvp = unp->un_uppervp; unionfs_get_node_status(unp, td, &unsp); if (unsp->uns_lower_opencnt <= 0 && unsp->uns_upper_opencnt <= 0) { -#ifdef DIAGNOSTIC - printf("unionfs_close: warning: open count is 0\n"); -#endif - if (unp->un_uppervp != NULLVP) - ovp = unp->un_uppervp; + if (uvp != NULLVP) + ovp = uvp; else - ovp = unp->un_lowervp; + ovp = lvp; } else if (unsp->uns_upper_opencnt > 0) - ovp = unp->un_uppervp; + ovp = uvp; else - ovp = unp->un_lowervp; + ovp = lvp; + + if (ovp == lvp && uvp != NULLVP) { + lock_lvp = true; + unp = NULL; + lvp = unionfs_lock_lvp(vp, &lkflags); + if (lvp == NULLVP) { + error = ENOENT; + goto unionfs_close_abort; + } + } else + unionfs_forward_vop_start(ovp, &lkflags); error = VOP_CLOSE(ovp, ap->a_fflag, cred, td); + if (lock_lvp) { + unp = unionfs_unlock_lvp(vp, lvp, lkflags); + if (unp == NULL && error == 0) + error = ENOENT; + } else if (unionfs_forward_vop_finish(vp, ovp, lkflags)) + error = error ? error : ENOENT; + if (error != 0) goto unionfs_close_abort; vp->v_object = ovp->v_object; - if (ovp == unp->un_uppervp) { - unsp->uns_upper_opencnt--; - if (unsp->uns_upper_opencnt == 0) { + if (ovp == uvp) { + if (unsp != NULL && ((--unsp->uns_upper_opencnt) == 0)) { if (unsp->uns_node_flag & UNS_OPENL_4_READDIR) { - VOP_CLOSE(unp->un_lowervp, FREAD, cred, td); + unp = NULL; + lvp = unionfs_lock_lvp(vp, &lkflags); + if (lvp == NULLVP) { + error = ENOENT; + goto unionfs_close_abort; + } + VOP_CLOSE(lvp, FREAD, cred, td); + unp = unionfs_unlock_lvp(vp, lvp, lkflags); + if (unp == NULL) { + error = ENOENT; + goto unionfs_close_abort; + } unsp->uns_node_flag &= ~UNS_OPENL_4_READDIR; unsp->uns_lower_opencnt--; } if (unsp->uns_lower_opencnt > 0) - vp->v_object = unp->un_lowervp->v_object; + vp->v_object = lvp->v_object; } - } else + } else if (unsp != NULL) unsp->uns_lower_opencnt--; unionfs_close_abort: - unionfs_tryrem_node_status(unp, unsp); + if (unp != NULL && unsp != NULL) + unionfs_tryrem_node_status(unp, unsp); unionfs_close_cleanup: unionfs_downgrade_lock(vp, lkstatus); UNIONFS_INTERNAL_DEBUG("unionfs_close: leave (%d)\n", error); return (error); } /* * Check the access mode toward shadow file/dir. */ static int unionfs_check_corrected_access(accmode_t accmode, struct vattr *va, struct ucred *cred) { uid_t uid; /* upper side vnode's uid */ gid_t gid; /* upper side vnode's gid */ u_short vmode; /* upper side vnode's mode */ u_short mask; mask = 0; uid = va->va_uid; gid = va->va_gid; vmode = va->va_mode; /* check owner */ if (cred->cr_uid == uid) { if (accmode & VEXEC) mask |= S_IXUSR; if (accmode & VREAD) mask |= S_IRUSR; if (accmode & VWRITE) mask |= S_IWUSR; return ((vmode & mask) == mask ? 0 : EACCES); } /* check group */ if (groupmember(gid, cred)) { if (accmode & VEXEC) mask |= S_IXGRP; if (accmode & VREAD) mask |= S_IRGRP; if (accmode & VWRITE) mask |= S_IWGRP; return ((vmode & mask) == mask ? 0 : EACCES); } /* check other */ if (accmode & VEXEC) mask |= S_IXOTH; if (accmode & VREAD) mask |= S_IROTH; if (accmode & VWRITE) mask |= S_IWOTH; return ((vmode & mask) == mask ? 0 : EACCES); } static int unionfs_access(struct vop_access_args *ap) { struct unionfs_mount *ump; struct unionfs_node *unp; struct vnode *uvp; struct vnode *lvp; struct thread *td; struct vattr va; accmode_t accmode; int error; UNIONFS_INTERNAL_DEBUG("unionfs_access: enter\n"); KASSERT_UNIONFS_VNODE(ap->a_vp); ump = MOUNTTOUNIONFSMOUNT(ap->a_vp->v_mount); unp = VTOUNIONFS(ap->a_vp); uvp = unp->un_uppervp; lvp = unp->un_lowervp; td = ap->a_td; accmode = ap->a_accmode; error = EACCES; if ((accmode & VWRITE) && (ap->a_vp->v_mount->mnt_flag & MNT_RDONLY)) { switch (ap->a_vp->v_type) { case VREG: case VDIR: case VLNK: return (EROFS); default: break; } } if (uvp != NULLVP) { error = VOP_ACCESS(uvp, accmode, ap->a_cred, td); UNIONFS_INTERNAL_DEBUG("unionfs_access: leave (%d)\n", error); return (error); } if (lvp != NULLVP) { if (accmode & VWRITE) { if ((ump->um_uppermp->mnt_flag & MNT_RDONLY) != 0) { switch (ap->a_vp->v_type) { case VREG: case VDIR: case VLNK: return (EROFS); default: break; } } else if (ap->a_vp->v_type == VREG || ap->a_vp->v_type == VDIR) { /* check shadow file/dir */ if (ump->um_copymode != UNIONFS_TRANSPARENT) { error = unionfs_create_uppervattr(ump, lvp, &va, ap->a_cred, td); if (error != 0) return (error); error = unionfs_check_corrected_access( accmode, &va, ap->a_cred); if (error != 0) return (error); } } accmode &= ~(VWRITE | VAPPEND); accmode |= VREAD; /* will copy to upper */ } error = VOP_ACCESS(lvp, accmode, ap->a_cred, td); } UNIONFS_INTERNAL_DEBUG("unionfs_access: leave (%d)\n", error); return (error); } static int unionfs_getattr(struct vop_getattr_args *ap) { struct unionfs_node *unp; struct unionfs_mount *ump; struct vnode *uvp; struct vnode *lvp; struct thread *td; struct vattr va; int error; UNIONFS_INTERNAL_DEBUG("unionfs_getattr: enter\n"); KASSERT_UNIONFS_VNODE(ap->a_vp); unp = VTOUNIONFS(ap->a_vp); ump = MOUNTTOUNIONFSMOUNT(ap->a_vp->v_mount); uvp = unp->un_uppervp; lvp = unp->un_lowervp; td = curthread; if (uvp != NULLVP) { if ((error = VOP_GETATTR(uvp, ap->a_vap, ap->a_cred)) == 0) ap->a_vap->va_fsid = ap->a_vp->v_mount->mnt_stat.f_fsid.val[0]; UNIONFS_INTERNAL_DEBUG( "unionfs_getattr: leave mode=%o, uid=%d, gid=%d (%d)\n", ap->a_vap->va_mode, ap->a_vap->va_uid, ap->a_vap->va_gid, error); return (error); } error = VOP_GETATTR(lvp, ap->a_vap, ap->a_cred); if (error == 0 && (ump->um_uppermp->mnt_flag & MNT_RDONLY) == 0) { /* correct the attr toward shadow file/dir. */ if (ap->a_vp->v_type == VREG || ap->a_vp->v_type == VDIR) { unionfs_create_uppervattr_core(ump, ap->a_vap, &va, td); ap->a_vap->va_mode = va.va_mode; ap->a_vap->va_uid = va.va_uid; ap->a_vap->va_gid = va.va_gid; } } if (error == 0) ap->a_vap->va_fsid = ap->a_vp->v_mount->mnt_stat.f_fsid.val[0]; UNIONFS_INTERNAL_DEBUG( "unionfs_getattr: leave mode=%o, uid=%d, gid=%d (%d)\n", ap->a_vap->va_mode, ap->a_vap->va_uid, ap->a_vap->va_gid, error); return (error); } static int unionfs_setattr(struct vop_setattr_args *ap) { struct unionfs_node *unp; struct vnode *uvp; struct vnode *lvp; struct thread *td; struct vattr *vap; int error; UNIONFS_INTERNAL_DEBUG("unionfs_setattr: enter\n"); KASSERT_UNIONFS_VNODE(ap->a_vp); error = EROFS; unp = VTOUNIONFS(ap->a_vp); uvp = unp->un_uppervp; lvp = unp->un_lowervp; td = curthread; vap = ap->a_vap; if ((ap->a_vp->v_mount->mnt_flag & MNT_RDONLY) && (vap->va_flags != VNOVAL || vap->va_uid != (uid_t)VNOVAL || vap->va_gid != (gid_t)VNOVAL || vap->va_atime.tv_sec != VNOVAL || vap->va_mtime.tv_sec != VNOVAL || vap->va_mode != (mode_t)VNOVAL)) return (EROFS); if (uvp == NULLVP && lvp->v_type == VREG) { - error = unionfs_copyfile(unp, (vap->va_size != 0), + error = unionfs_copyfile(ap->a_vp, (vap->va_size != 0), ap->a_cred, td); if (error != 0) return (error); uvp = unp->un_uppervp; } if (uvp != NULLVP) { int lkflags; unionfs_forward_vop_start(uvp, &lkflags); error = VOP_SETATTR(uvp, vap, ap->a_cred); unionfs_forward_vop_finish(ap->a_vp, uvp, lkflags); } UNIONFS_INTERNAL_DEBUG("unionfs_setattr: leave (%d)\n", error); return (error); } static int unionfs_read(struct vop_read_args *ap) { struct unionfs_node *unp; struct vnode *tvp; int error; /* UNIONFS_INTERNAL_DEBUG("unionfs_read: enter\n"); */ KASSERT_UNIONFS_VNODE(ap->a_vp); unp = VTOUNIONFS(ap->a_vp); tvp = (unp->un_uppervp != NULLVP ? unp->un_uppervp : unp->un_lowervp); error = VOP_READ(tvp, ap->a_uio, ap->a_ioflag, ap->a_cred); /* UNIONFS_INTERNAL_DEBUG("unionfs_read: leave (%d)\n", error); */ return (error); } static int unionfs_write(struct vop_write_args *ap) { struct unionfs_node *unp; struct vnode *tvp; int error; int lkflags; /* UNIONFS_INTERNAL_DEBUG("unionfs_write: enter\n"); */ KASSERT_UNIONFS_VNODE(ap->a_vp); unp = VTOUNIONFS(ap->a_vp); tvp = (unp->un_uppervp != NULLVP ? unp->un_uppervp : unp->un_lowervp); unionfs_forward_vop_start(tvp, &lkflags); error = VOP_WRITE(tvp, ap->a_uio, ap->a_ioflag, ap->a_cred); unionfs_forward_vop_finish(ap->a_vp, tvp, lkflags); /* UNIONFS_INTERNAL_DEBUG("unionfs_write: leave (%d)\n", error); */ return (error); } static int unionfs_ioctl(struct vop_ioctl_args *ap) { struct unionfs_node *unp; struct unionfs_node_status *unsp; struct vnode *ovp; int error; UNIONFS_INTERNAL_DEBUG("unionfs_ioctl: enter\n"); KASSERT_UNIONFS_VNODE(ap->a_vp); vn_lock(ap->a_vp, LK_EXCLUSIVE | LK_RETRY); unp = VTOUNIONFS(ap->a_vp); unionfs_get_node_status(unp, ap->a_td, &unsp); ovp = (unsp->uns_upper_opencnt ? unp->un_uppervp : unp->un_lowervp); unionfs_tryrem_node_status(unp, unsp); VOP_UNLOCK(ap->a_vp); if (ovp == NULLVP) return (EBADF); error = VOP_IOCTL(ovp, ap->a_command, ap->a_data, ap->a_fflag, ap->a_cred, ap->a_td); UNIONFS_INTERNAL_DEBUG("unionfs_ioctl: leave (%d)\n", error); return (error); } static int unionfs_poll(struct vop_poll_args *ap) { struct unionfs_node *unp; struct unionfs_node_status *unsp; struct vnode *ovp; KASSERT_UNIONFS_VNODE(ap->a_vp); vn_lock(ap->a_vp, LK_EXCLUSIVE | LK_RETRY); unp = VTOUNIONFS(ap->a_vp); unionfs_get_node_status(unp, ap->a_td, &unsp); ovp = (unsp->uns_upper_opencnt ? unp->un_uppervp : unp->un_lowervp); unionfs_tryrem_node_status(unp, unsp); VOP_UNLOCK(ap->a_vp); if (ovp == NULLVP) return (EBADF); return (VOP_POLL(ovp, ap->a_events, ap->a_cred, ap->a_td)); } static int unionfs_fsync(struct vop_fsync_args *ap) { struct unionfs_node *unp; struct unionfs_node_status *unsp; struct vnode *ovp; enum unionfs_lkupgrade lkstatus; int error, lkflags; KASSERT_UNIONFS_VNODE(ap->a_vp); unp = VTOUNIONFS(ap->a_vp); lkstatus = unionfs_upgrade_lock(ap->a_vp); if (lkstatus == UNIONFS_LKUPGRADE_DOOMED) { unionfs_downgrade_lock(ap->a_vp, lkstatus); return (ENOENT); } unionfs_get_node_status(unp, ap->a_td, &unsp); ovp = (unsp->uns_upper_opencnt ? unp->un_uppervp : unp->un_lowervp); unionfs_tryrem_node_status(unp, unsp); unionfs_downgrade_lock(ap->a_vp, lkstatus); if (ovp == NULLVP) return (EBADF); unionfs_forward_vop_start(ovp, &lkflags); error = VOP_FSYNC(ovp, ap->a_waitfor, ap->a_td); unionfs_forward_vop_finish(ap->a_vp, ovp, lkflags); return (error); } static int unionfs_remove(struct vop_remove_args *ap) { char *path; struct unionfs_node *dunp; struct unionfs_node *unp; struct unionfs_mount *ump; struct vnode *udvp; struct vnode *uvp; struct vnode *lvp; struct componentname *cnp; struct thread *td; int error; int pathlen; UNIONFS_INTERNAL_DEBUG("unionfs_remove: enter\n"); KASSERT_UNIONFS_VNODE(ap->a_dvp); KASSERT_UNIONFS_VNODE(ap->a_vp); error = 0; dunp = VTOUNIONFS(ap->a_dvp); udvp = dunp->un_uppervp; cnp = ap->a_cnp; td = curthread; ump = MOUNTTOUNIONFSMOUNT(ap->a_vp->v_mount); unp = VTOUNIONFS(ap->a_vp); uvp = unp->un_uppervp; lvp = unp->un_lowervp; path = unp->un_path; pathlen = unp->un_pathlen; if (udvp == NULLVP) return (EROFS); if (uvp != NULLVP) { int udvp_lkflags, uvp_lkflags; if (ump == NULL || ump->um_whitemode == UNIONFS_WHITE_ALWAYS || lvp != NULLVP) cnp->cn_flags |= DOWHITEOUT; unionfs_forward_vop_start_pair(udvp, &udvp_lkflags, uvp, &uvp_lkflags); error = VOP_REMOVE(udvp, uvp, cnp); unionfs_forward_vop_finish_pair(ap->a_dvp, udvp, udvp_lkflags, ap->a_vp, uvp, uvp_lkflags); - } else if (lvp != NULLVP) - error = unionfs_mkwhiteout(ap->a_dvp, udvp, cnp, td, path, pathlen); + } else if (lvp != NULLVP) { + error = unionfs_mkwhiteout(ap->a_dvp, ap->a_vp, cnp, td, + path, pathlen); + } UNIONFS_INTERNAL_DEBUG("unionfs_remove: leave (%d)\n", error); return (error); } static int unionfs_link(struct vop_link_args *ap) { struct unionfs_node *dunp; struct unionfs_node *unp; struct vnode *udvp; struct vnode *uvp; struct componentname *cnp; struct thread *td; int error; - int needrelookup; UNIONFS_INTERNAL_DEBUG("unionfs_link: enter\n"); KASSERT_UNIONFS_VNODE(ap->a_tdvp); KASSERT_UNIONFS_VNODE(ap->a_vp); error = 0; - needrelookup = 0; dunp = VTOUNIONFS(ap->a_tdvp); unp = NULL; udvp = dunp->un_uppervp; uvp = NULLVP; cnp = ap->a_cnp; td = curthread; if (udvp == NULLVP) return (EROFS); unp = VTOUNIONFS(ap->a_vp); if (unp->un_uppervp == NULLVP) { if (ap->a_vp->v_type != VREG) return (EOPNOTSUPP); - error = unionfs_copyfile(unp, 1, cnp->cn_cred, td); - if (error != 0) - return (error); - needrelookup = 1; + VOP_UNLOCK(ap->a_tdvp); + error = unionfs_copyfile(ap->a_vp, 1, cnp->cn_cred, td); + vn_lock(ap->a_tdvp, LK_EXCLUSIVE | LK_RETRY); + if (error == 0) + error = ERELOOKUP; + return (error); } uvp = unp->un_uppervp; - if (needrelookup != 0) - error = unionfs_relookup_for_create(ap->a_tdvp, cnp, td); - if (error == 0) { int udvp_lkflags, uvp_lkflags; unionfs_forward_vop_start_pair(udvp, &udvp_lkflags, uvp, &uvp_lkflags); error = VOP_LINK(udvp, uvp, cnp); unionfs_forward_vop_finish_pair(ap->a_tdvp, udvp, udvp_lkflags, ap->a_vp, uvp, uvp_lkflags); } UNIONFS_INTERNAL_DEBUG("unionfs_link: leave (%d)\n", error); return (error); } static int unionfs_rename(struct vop_rename_args *ap) { struct vnode *fdvp; struct vnode *fvp; struct componentname *fcnp; struct vnode *tdvp; struct vnode *tvp; struct componentname *tcnp; - struct vnode *ltdvp; - struct vnode *ltvp; struct thread *td; /* rename target vnodes */ struct vnode *rfdvp; struct vnode *rfvp; struct vnode *rtdvp; struct vnode *rtvp; - struct unionfs_mount *ump; struct unionfs_node *unp; int error; UNIONFS_INTERNAL_DEBUG("unionfs_rename: enter\n"); error = 0; fdvp = ap->a_fdvp; fvp = ap->a_fvp; fcnp = ap->a_fcnp; tdvp = ap->a_tdvp; tvp = ap->a_tvp; tcnp = ap->a_tcnp; - ltdvp = NULLVP; - ltvp = NULLVP; td = curthread; rfdvp = fdvp; rfvp = fvp; rtdvp = tdvp; rtvp = tvp; /* check for cross device rename */ if (fvp->v_mount != tdvp->v_mount || (tvp != NULLVP && fvp->v_mount != tvp->v_mount)) { if (fvp->v_op != &unionfs_vnodeops) error = ENODEV; else error = EXDEV; goto unionfs_rename_abort; } /* Renaming a file to itself has no effect. */ if (fvp == tvp) goto unionfs_rename_abort; KASSERT_UNIONFS_VNODE(tdvp); if (tvp != NULLVP) KASSERT_UNIONFS_VNODE(tvp); if (fdvp != tdvp) VI_LOCK(fdvp); unp = VTOUNIONFS(fdvp); if (unp == NULL) { if (fdvp != tdvp) VI_UNLOCK(fdvp); error = ENOENT; goto unionfs_rename_abort; } #ifdef UNIONFS_IDBG_RENAME UNIONFS_INTERNAL_DEBUG("fdvp=%p, ufdvp=%p, lfdvp=%p\n", fdvp, unp->un_uppervp, unp->un_lowervp); #endif if (unp->un_uppervp == NULLVP) { error = ENODEV; } else { rfdvp = unp->un_uppervp; vref(rfdvp); } if (fdvp != tdvp) VI_UNLOCK(fdvp); if (error != 0) goto unionfs_rename_abort; VI_LOCK(fvp); unp = VTOUNIONFS(fvp); if (unp == NULL) { VI_UNLOCK(fvp); error = ENOENT; goto unionfs_rename_abort; } #ifdef UNIONFS_IDBG_RENAME UNIONFS_INTERNAL_DEBUG("fvp=%p, ufvp=%p, lfvp=%p\n", fvp, unp->un_uppervp, unp->un_lowervp); #endif - ump = MOUNTTOUNIONFSMOUNT(fvp->v_mount); /* * If we only have a lower vnode, copy the source file to the upper * FS so that the rename operation can be issued against the upper FS. */ if (unp->un_uppervp == NULLVP) { bool unlock_fdvp = false, relock_tdvp = false; VI_UNLOCK(fvp); if (tvp != NULLVP) VOP_UNLOCK(tvp); if (fvp->v_type == VREG) { /* * For regular files, unionfs_copyfile() will expect * fdvp's upper parent directory vnode to be unlocked * and will temporarily lock it. If fdvp == tdvp, we * should unlock tdvp to avoid recursion on tdvp's * lock. If fdvp != tdvp, we should also unlock tdvp * to avoid potential deadlock due to holding tdvp's * lock while locking unrelated vnodes associated with * fdvp/fvp. */ VOP_UNLOCK(tdvp); relock_tdvp = true; } else if (fvp->v_type == VDIR && tdvp != fdvp) { /* * For directories, unionfs_mkshadowdir() will expect * fdvp's upper parent directory vnode to be locked * and will temporarily unlock it. If fdvp == tdvp, * we can therefore leave tdvp locked. If fdvp != * tdvp, we should exchange the lock on tdvp for a * lock on fdvp. */ VOP_UNLOCK(tdvp); unlock_fdvp = true; relock_tdvp = true; vn_lock(fdvp, LK_EXCLUSIVE | LK_RETRY); } vn_lock(fvp, LK_EXCLUSIVE | LK_RETRY); unp = VTOUNIONFS(fvp); if (unp == NULL) error = ENOENT; else if (unp->un_uppervp == NULLVP) { switch (fvp->v_type) { case VREG: - error = unionfs_copyfile(unp, 1, fcnp->cn_cred, td); + error = unionfs_copyfile(fvp, 1, fcnp->cn_cred, td); break; case VDIR: - error = unionfs_mkshadowdir(ump, rfdvp, unp, fcnp, td); + error = unionfs_mkshadowdir(fdvp, fvp, fcnp, td); break; default: error = ENODEV; break; } } VOP_UNLOCK(fvp); if (unlock_fdvp) VOP_UNLOCK(fdvp); if (relock_tdvp) vn_lock(tdvp, LK_EXCLUSIVE | LK_RETRY); if (tvp != NULLVP) vn_lock(tvp, LK_EXCLUSIVE | LK_RETRY); /* * Since we've dropped tdvp's lock at some point in the copy * sequence above, force the caller to re-drive the lookup * in case the relationship between tdvp and tvp has changed. */ if (error == 0) error = ERELOOKUP; goto unionfs_rename_abort; } if (unp->un_lowervp != NULLVP) fcnp->cn_flags |= DOWHITEOUT; rfvp = unp->un_uppervp; vref(rfvp); VI_UNLOCK(fvp); unp = VTOUNIONFS(tdvp); #ifdef UNIONFS_IDBG_RENAME UNIONFS_INTERNAL_DEBUG("tdvp=%p, utdvp=%p, ltdvp=%p\n", tdvp, unp->un_uppervp, unp->un_lowervp); #endif if (unp->un_uppervp == NULLVP) { error = ENODEV; goto unionfs_rename_abort; } rtdvp = unp->un_uppervp; - ltdvp = unp->un_lowervp; vref(rtdvp); if (tvp != NULLVP) { unp = VTOUNIONFS(tvp); if (unp == NULL) { error = ENOENT; goto unionfs_rename_abort; } #ifdef UNIONFS_IDBG_RENAME UNIONFS_INTERNAL_DEBUG("tvp=%p, utvp=%p, ltvp=%p\n", tvp, unp->un_uppervp, unp->un_lowervp); #endif if (unp->un_uppervp == NULLVP) rtvp = NULLVP; else { if (tvp->v_type == VDIR) { error = EINVAL; goto unionfs_rename_abort; } rtvp = unp->un_uppervp; - ltvp = unp->un_lowervp; vref(rtvp); } } if (rfvp == rtvp) goto unionfs_rename_abort; error = VOP_RENAME(rfdvp, rfvp, fcnp, rtdvp, rtvp, tcnp); if (error == 0) { if (rtvp != NULLVP && rtvp->v_type == VDIR) cache_purge(tdvp); if (fvp->v_type == VDIR && fdvp != tdvp) cache_purge(fdvp); } - if (ltdvp != NULLVP) - VOP_UNLOCK(ltdvp); if (tdvp != rtdvp) vrele(tdvp); - if (ltvp != NULLVP) - VOP_UNLOCK(ltvp); if (tvp != rtvp && tvp != NULLVP) { if (rtvp == NULLVP) vput(tvp); else vrele(tvp); } if (fdvp != rfdvp) vrele(fdvp); if (fvp != rfvp) vrele(fvp); UNIONFS_INTERNAL_DEBUG("unionfs_rename: leave (%d)\n", error); return (error); unionfs_rename_abort: vput(tdvp); if (tdvp != rtdvp) vrele(rtdvp); if (tvp != NULLVP) { if (tdvp != tvp) vput(tvp); else vrele(tvp); } if (tvp != rtvp && rtvp != NULLVP) vrele(rtvp); if (fdvp != rfdvp) vrele(rfdvp); if (fvp != rfvp) vrele(rfvp); vrele(fdvp); vrele(fvp); UNIONFS_INTERNAL_DEBUG("unionfs_rename: leave (%d)\n", error); return (error); } static int unionfs_mkdir(struct vop_mkdir_args *ap) { struct unionfs_node *dunp; struct componentname *cnp; struct vnode *dvp; struct vnode *udvp; struct vnode *uvp; struct vattr va; int error; int lkflags; UNIONFS_INTERNAL_DEBUG("unionfs_mkdir: enter\n"); KASSERT_UNIONFS_VNODE(ap->a_dvp); error = EROFS; dvp = ap->a_dvp; dunp = VTOUNIONFS(dvp); cnp = ap->a_cnp; lkflags = cnp->cn_lkflags; udvp = dunp->un_uppervp; if (udvp != NULLVP) { /* check opaque */ if (!(cnp->cn_flags & ISWHITEOUT)) { error = VOP_GETATTR(udvp, &va, cnp->cn_cred); if (error != 0) goto unionfs_mkdir_cleanup; if ((va.va_flags & OPAQUE) != 0) cnp->cn_flags |= ISWHITEOUT; } int udvp_lkflags; bool uvp_created = false; unionfs_forward_vop_start(udvp, &udvp_lkflags); error = VOP_MKDIR(udvp, &uvp, cnp, ap->a_vap); if (error == 0) uvp_created = true; if (__predict_false(unionfs_forward_vop_finish(dvp, udvp, udvp_lkflags)) && error == 0) error = ENOENT; if (error == 0) { VOP_UNLOCK(uvp); cnp->cn_lkflags = LK_EXCLUSIVE; error = unionfs_nodeget(dvp->v_mount, uvp, NULLVP, dvp, ap->a_vpp, cnp); vrele(uvp); cnp->cn_lkflags = lkflags; } else if (uvp_created) vput(uvp); } unionfs_mkdir_cleanup: UNIONFS_INTERNAL_DEBUG("unionfs_mkdir: leave (%d)\n", error); return (error); } static int unionfs_rmdir(struct vop_rmdir_args *ap) { struct unionfs_node *dunp; struct unionfs_node *unp; struct unionfs_mount *ump; struct componentname *cnp; struct thread *td; struct vnode *udvp; struct vnode *uvp; struct vnode *lvp; int error; UNIONFS_INTERNAL_DEBUG("unionfs_rmdir: enter\n"); KASSERT_UNIONFS_VNODE(ap->a_dvp); KASSERT_UNIONFS_VNODE(ap->a_vp); error = 0; dunp = VTOUNIONFS(ap->a_dvp); unp = VTOUNIONFS(ap->a_vp); cnp = ap->a_cnp; td = curthread; udvp = dunp->un_uppervp; uvp = unp->un_uppervp; lvp = unp->un_lowervp; if (udvp == NULLVP) return (EROFS); if (udvp == uvp) return (EOPNOTSUPP); if (uvp != NULLVP) { if (lvp != NULLVP) { + /* + * We need to keep dvp and vp's upper vnodes locked + * going into the VOP_RMDIR() call, but the empty + * directory check also requires the lower vnode lock. + * For this third, cross-filesystem lock we use a + * similar approach taken by various FS' VOP_RENAME + * implementations (which require 2-4 vnode locks). + * First we attempt a NOWAIT acquisition, then if + * that fails we drops the other two vnode locks, + * acquire lvp's lock in the normal fashion to reduce + * the likelihood of spinning on it in the future, + * then drop, reacquire the other locks, and return + * ERELOOKUP to re-drive the lookup in case the dvp-> + * vp relationship has changed. + */ + if (vn_lock(lvp, LK_SHARED | LK_NOWAIT) != 0) { + VOP_UNLOCK(ap->a_vp); + VOP_UNLOCK(ap->a_dvp); + vn_lock(lvp, LK_SHARED | LK_RETRY); + VOP_UNLOCK(lvp); + vn_lock(ap->a_dvp, LK_EXCLUSIVE | LK_RETRY); + vn_lock(ap->a_vp, LK_EXCLUSIVE | LK_RETRY); + return (ERELOOKUP); + } error = unionfs_check_rmdir(ap->a_vp, cnp->cn_cred, td); + /* + * It's possible for a direct operation on the lower FS + * to make the lower directory non-empty after we drop + * the lock, but it's also possible for the upper-layer + * VOP_RMDIR to relock udvp/uvp which would lead to + * LOR if we kept lvp locked across that call. + */ + VOP_UNLOCK(lvp); if (error != 0) return (error); } ump = MOUNTTOUNIONFSMOUNT(ap->a_vp->v_mount); if (ump->um_whitemode == UNIONFS_WHITE_ALWAYS || lvp != NULLVP) cnp->cn_flags |= DOWHITEOUT; - /* - * The relookup path will need to relock the parent dvp and - * possibly the vp as well. Locking is expected to be done - * in parent->child order; drop the lock on vp to avoid LOR - * and potential recursion on vp's lock. - * vp is expected to remain referenced during VOP_RMDIR(), - * so vref/vrele should not be necessary here. - */ - VOP_UNLOCK(ap->a_vp); - VNPASS(vrefcnt(ap->a_vp) > 0, ap->a_vp); - error = unionfs_relookup_for_delete(ap->a_dvp, cnp, td); - vn_lock(ap->a_vp, LK_EXCLUSIVE | LK_RETRY); - /* - * VOP_RMDIR is dispatched against udvp, so if uvp became - * doomed while the lock was dropped above the target - * filesystem may not be able to cope. - */ - if (error == 0 && VN_IS_DOOMED(uvp)) - error = ENOENT; - if (error == 0) { - int udvp_lkflags, uvp_lkflags; - unionfs_forward_vop_start_pair(udvp, &udvp_lkflags, - uvp, &uvp_lkflags); - error = VOP_RMDIR(udvp, uvp, cnp); - unionfs_forward_vop_finish_pair(ap->a_dvp, udvp, udvp_lkflags, - ap->a_vp, uvp, uvp_lkflags); - } - } else if (lvp != NULLVP) - error = unionfs_mkwhiteout(ap->a_dvp, udvp, cnp, td, + int udvp_lkflags, uvp_lkflags; + unionfs_forward_vop_start_pair(udvp, &udvp_lkflags, + uvp, &uvp_lkflags); + error = VOP_RMDIR(udvp, uvp, cnp); + unionfs_forward_vop_finish_pair(ap->a_dvp, udvp, udvp_lkflags, + ap->a_vp, uvp, uvp_lkflags); + } else if (lvp != NULLVP) { + error = unionfs_mkwhiteout(ap->a_dvp, ap->a_vp, cnp, td, unp->un_path, unp->un_pathlen); + } if (error == 0) { cache_purge(ap->a_dvp); cache_purge(ap->a_vp); } UNIONFS_INTERNAL_DEBUG("unionfs_rmdir: leave (%d)\n", error); return (error); } static int unionfs_symlink(struct vop_symlink_args *ap) { struct unionfs_node *dunp; struct componentname *cnp; struct vnode *udvp; struct vnode *uvp; int error; int lkflags; UNIONFS_INTERNAL_DEBUG("unionfs_symlink: enter\n"); KASSERT_UNIONFS_VNODE(ap->a_dvp); error = EROFS; dunp = VTOUNIONFS(ap->a_dvp); cnp = ap->a_cnp; lkflags = cnp->cn_lkflags; udvp = dunp->un_uppervp; if (udvp != NULLVP) { int udvp_lkflags; bool uvp_created = false; unionfs_forward_vop_start(udvp, &udvp_lkflags); error = VOP_SYMLINK(udvp, &uvp, cnp, ap->a_vap, ap->a_target); if (error == 0) uvp_created = true; if (__predict_false(unionfs_forward_vop_finish(ap->a_dvp, udvp, udvp_lkflags)) && error == 0) error = ENOENT; if (error == 0) { VOP_UNLOCK(uvp); cnp->cn_lkflags = LK_EXCLUSIVE; error = unionfs_nodeget(ap->a_dvp->v_mount, uvp, NULLVP, ap->a_dvp, ap->a_vpp, cnp); vrele(uvp); cnp->cn_lkflags = lkflags; } else if (uvp_created) vput(uvp); } UNIONFS_INTERNAL_DEBUG("unionfs_symlink: leave (%d)\n", error); return (error); } static int unionfs_readdir(struct vop_readdir_args *ap) { struct unionfs_node *unp; struct unionfs_node_status *unsp; struct uio *uio; struct vnode *vp; struct vnode *uvp; struct vnode *lvp; struct thread *td; struct vattr va; uint64_t *cookies_bk; int error; int eofflag; + int lkflags; int ncookies_bk; int uio_offset_bk; enum unionfs_lkupgrade lkstatus; UNIONFS_INTERNAL_DEBUG("unionfs_readdir: enter\n"); KASSERT_UNIONFS_VNODE(ap->a_vp); error = 0; eofflag = 0; uio_offset_bk = 0; uio = ap->a_uio; uvp = NULLVP; lvp = NULLVP; td = uio->uio_td; ncookies_bk = 0; cookies_bk = NULL; vp = ap->a_vp; if (vp->v_type != VDIR) return (ENOTDIR); /* * If the vnode is reclaimed while upgrading, we can't safely use unp * or do anything else unionfs- specific. */ lkstatus = unionfs_upgrade_lock(vp); if (lkstatus == UNIONFS_LKUPGRADE_DOOMED) error = EBADF; if (error == 0) { unp = VTOUNIONFS(vp); uvp = unp->un_uppervp; lvp = unp->un_lowervp; /* check the open count. unionfs needs open before readdir. */ unionfs_get_node_status(unp, td, &unsp); if ((uvp != NULLVP && unsp->uns_upper_opencnt <= 0) || (lvp != NULLVP && unsp->uns_lower_opencnt <= 0)) { unionfs_tryrem_node_status(unp, unsp); error = EBADF; } } unionfs_downgrade_lock(vp, lkstatus); if (error != 0) goto unionfs_readdir_exit; /* check opaque */ if (uvp != NULLVP && lvp != NULLVP) { if ((error = VOP_GETATTR(uvp, &va, ap->a_cred)) != 0) goto unionfs_readdir_exit; if (va.va_flags & OPAQUE) lvp = NULLVP; } /* upper only */ if (uvp != NULLVP && lvp == NULLVP) { + unionfs_forward_vop_start(uvp, &lkflags); error = VOP_READDIR(uvp, uio, ap->a_cred, ap->a_eofflag, ap->a_ncookies, ap->a_cookies); - unsp->uns_readdir_status = 0; + if (unionfs_forward_vop_finish(vp, uvp, lkflags)) + error = error ? error : ENOENT; + else + unsp->uns_readdir_status = 0; goto unionfs_readdir_exit; } /* lower only */ if (uvp == NULLVP && lvp != NULLVP) { + unionfs_forward_vop_start(lvp, &lkflags); error = VOP_READDIR(lvp, uio, ap->a_cred, ap->a_eofflag, ap->a_ncookies, ap->a_cookies); - unsp->uns_readdir_status = 2; + if (unionfs_forward_vop_finish(vp, lvp, lkflags)) + error = error ? error : ENOENT; + else + unsp->uns_readdir_status = 2; goto unionfs_readdir_exit; } /* * readdir upper and lower */ KASSERT(uvp != NULLVP, ("unionfs_readdir: null upper vp")); KASSERT(lvp != NULLVP, ("unionfs_readdir: null lower vp")); + if (uio->uio_offset == 0) unsp->uns_readdir_status = 0; if (unsp->uns_readdir_status == 0) { /* read upper */ + unionfs_forward_vop_start(uvp, &lkflags); error = VOP_READDIR(uvp, uio, ap->a_cred, &eofflag, ap->a_ncookies, ap->a_cookies); - + if (unionfs_forward_vop_finish(vp, uvp, lkflags) && error == 0) + error = ENOENT; if (error != 0 || eofflag == 0) goto unionfs_readdir_exit; unsp->uns_readdir_status = 1; /* * UFS(and other FS) needs size of uio_resid larger than * DIRBLKSIZ. * size of DIRBLKSIZ equals DEV_BSIZE. * (see: ufs/ufs/ufs_vnops.c ufs_readdir func , ufs/ufs/dir.h) */ if (uio->uio_resid <= (uio->uio_resid & (DEV_BSIZE -1))) goto unionfs_readdir_exit; /* * Backup cookies. * It prepares to readdir in lower. */ if (ap->a_ncookies != NULL) { ncookies_bk = *(ap->a_ncookies); *(ap->a_ncookies) = 0; } if (ap->a_cookies != NULL) { cookies_bk = *(ap->a_cookies); *(ap->a_cookies) = NULL; } } /* initialize for readdir in lower */ if (unsp->uns_readdir_status == 1) { unsp->uns_readdir_status = 2; /* * Backup uio_offset. See the comment after the * VOP_READDIR call on the lower layer. */ uio_offset_bk = uio->uio_offset; uio->uio_offset = 0; } - if (lvp == NULLVP) { - error = EBADF; + lvp = unionfs_lock_lvp(vp, &lkflags); + if (lvp == NULL) { + error = ENOENT; goto unionfs_readdir_exit; } + /* read lower */ error = VOP_READDIR(lvp, uio, ap->a_cred, ap->a_eofflag, ap->a_ncookies, ap->a_cookies); + + unp = unionfs_unlock_lvp(vp, lvp, lkflags); + if (unp == NULL && error == 0) + error = ENOENT; + + /* * We can't return an uio_offset of 0: this would trigger an * infinite loop, because the next call to unionfs_readdir would * always restart with the upper layer (uio_offset == 0) and * always return some data. * * This happens when the lower layer root directory is removed. * (A root directory deleting of unionfs should not be permitted. * But current VFS can not do it.) */ if (uio->uio_offset == 0) uio->uio_offset = uio_offset_bk; if (cookies_bk != NULL) { /* merge cookies */ int size; uint64_t *newcookies, *pos; size = *(ap->a_ncookies) + ncookies_bk; newcookies = (uint64_t *) malloc(size * sizeof(*newcookies), M_TEMP, M_WAITOK); pos = newcookies; memcpy(pos, cookies_bk, ncookies_bk * sizeof(*newcookies)); pos += ncookies_bk; memcpy(pos, *(ap->a_cookies), *(ap->a_ncookies) * sizeof(*newcookies)); free(cookies_bk, M_TEMP); free(*(ap->a_cookies), M_TEMP); *(ap->a_ncookies) = size; *(ap->a_cookies) = newcookies; } unionfs_readdir_exit: if (error != 0 && ap->a_eofflag != NULL) *(ap->a_eofflag) = 1; UNIONFS_INTERNAL_DEBUG("unionfs_readdir: leave (%d)\n", error); return (error); } static int unionfs_readlink(struct vop_readlink_args *ap) { struct unionfs_node *unp; struct vnode *vp; int error; UNIONFS_INTERNAL_DEBUG("unionfs_readlink: enter\n"); KASSERT_UNIONFS_VNODE(ap->a_vp); unp = VTOUNIONFS(ap->a_vp); vp = (unp->un_uppervp != NULLVP ? unp->un_uppervp : unp->un_lowervp); error = VOP_READLINK(vp, ap->a_uio, ap->a_cred); UNIONFS_INTERNAL_DEBUG("unionfs_readlink: leave (%d)\n", error); return (error); } static int unionfs_getwritemount(struct vop_getwritemount_args *ap) { struct unionfs_node *unp; struct vnode *uvp; struct vnode *vp, *ovp; int error; UNIONFS_INTERNAL_DEBUG("unionfs_getwritemount: enter\n"); error = 0; vp = ap->a_vp; uvp = NULLVP; VI_LOCK(vp); unp = VTOUNIONFS(vp); if (unp != NULL) uvp = unp->un_uppervp; /* * If our node has no upper vnode, check the parent directory. * We may be initiating a write operation that will produce a * new upper vnode through CoW. */ if (uvp == NULLVP && unp != NULL) { ovp = vp; vp = unp->un_dvp; /* * Only the root vnode should have an empty parent, but it * should not have an empty uppervp, so we shouldn't get here. */ VNASSERT(vp != NULL, ovp, ("%s: NULL parent vnode", __func__)); VI_UNLOCK(ovp); VI_LOCK(vp); unp = VTOUNIONFS(vp); if (unp != NULL) uvp = unp->un_uppervp; if (uvp == NULLVP) error = EACCES; } if (uvp != NULLVP) { vholdnz(uvp); VI_UNLOCK(vp); error = VOP_GETWRITEMOUNT(uvp, ap->a_mpp); vdrop(uvp); } else { VI_UNLOCK(vp); *(ap->a_mpp) = NULL; } UNIONFS_INTERNAL_DEBUG("unionfs_getwritemount: leave (%d)\n", error); return (error); } static int unionfs_inactive(struct vop_inactive_args *ap) { ap->a_vp->v_object = NULL; vrecycle(ap->a_vp); return (0); } static int unionfs_reclaim(struct vop_reclaim_args *ap) { /* UNIONFS_INTERNAL_DEBUG("unionfs_reclaim: enter\n"); */ unionfs_noderem(ap->a_vp); /* UNIONFS_INTERNAL_DEBUG("unionfs_reclaim: leave\n"); */ return (0); } static int unionfs_print(struct vop_print_args *ap) { struct unionfs_node *unp; /* struct unionfs_node_status *unsp; */ unp = VTOUNIONFS(ap->a_vp); /* unionfs_get_node_status(unp, curthread, &unsp); */ printf("unionfs_vp=%p, uppervp=%p, lowervp=%p\n", ap->a_vp, unp->un_uppervp, unp->un_lowervp); /* printf("unionfs opencnt: uppervp=%d, lowervp=%d\n", unsp->uns_upper_opencnt, unsp->uns_lower_opencnt); */ if (unp->un_uppervp != NULLVP) vn_printf(unp->un_uppervp, "unionfs: upper "); if (unp->un_lowervp != NULLVP) vn_printf(unp->un_lowervp, "unionfs: lower "); return (0); } -static int -unionfs_get_llt_revlock(struct vnode *vp, int flags) -{ - int revlock; - - revlock = 0; - - switch (flags & LK_TYPE_MASK) { - case LK_SHARED: - if (VOP_ISLOCKED(vp) == LK_EXCLUSIVE) - revlock = LK_UPGRADE; - else - revlock = LK_RELEASE; - break; - case LK_EXCLUSIVE: - case LK_UPGRADE: - revlock = LK_RELEASE; - break; - case LK_DOWNGRADE: - revlock = LK_UPGRADE; - break; - default: - break; - } - - return (revlock); -} - -/* - * The state of an acquired lock is adjusted similarly to - * the time of error generating. - * flags: LK_RELEASE or LK_UPGRADE - */ -static void -unionfs_revlock(struct vnode *vp, int flags) -{ - if (flags & LK_RELEASE) - VOP_UNLOCK_FLAGS(vp, flags); - else { - /* UPGRADE */ - if (vn_lock(vp, flags) != 0) - vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); - } -} - static int unionfs_lock(struct vop_lock1_args *ap) { struct unionfs_node *unp; struct vnode *vp; - struct vnode *uvp; - struct vnode *lvp; + struct vnode *tvp; int error; int flags; - int revlock; - int interlock; - int uhold; + bool lvp_locked; - /* - * TODO: rework the unionfs locking scheme. - * It's not guaranteed to be safe to blindly lock two vnodes on - * different mounts as is done here. Further, the entanglement - * of locking both vnodes with the various options that can be - * passed to VOP_LOCK() makes this code hard to reason about. - * Instead, consider locking only the upper vnode, or the lower - * vnode is the upper is not present, and taking separate measures - * to lock both vnodes in the few cases when that is needed. - */ error = 0; - interlock = 1; - uhold = 0; flags = ap->a_flags; vp = ap->a_vp; if (LK_RELEASE == (flags & LK_TYPE_MASK) || !(flags & LK_TYPE_MASK)) return (VOP_UNLOCK_FLAGS(vp, flags | LK_RELEASE)); +unionfs_lock_restart: + /* + * We currently need the interlock here to ensure we can safely + * access the unionfs vnode's private data. We may be able to + * eliminate this extra locking by instead using vfs_smr_enter() + * and vn_load_v_data_smr() here in conjunction with an SMR UMA + * zone for unionfs nodes. + */ if ((flags & LK_INTERLOCK) == 0) VI_LOCK(vp); + else + flags &= ~LK_INTERLOCK; unp = VTOUNIONFS(vp); - if (unp == NULL) - goto unionfs_lock_null_vnode; - - KASSERT_UNIONFS_VNODE(ap->a_vp); - - lvp = unp->un_lowervp; - uvp = unp->un_uppervp; + if (unp == NULL) { + VI_UNLOCK(vp); + ap->a_flags = flags; + return (vop_stdlock(ap)); + } - if ((revlock = unionfs_get_llt_revlock(vp, flags)) == 0) - panic("unknown lock type: 0x%x", flags & LK_TYPE_MASK); + if (unp->un_uppervp != NULL) { + tvp = unp->un_uppervp; + lvp_locked = false; + } else { + tvp = unp->un_lowervp; + lvp_locked = true; + } /* * During unmount, the root vnode lock may be taken recursively, * because it may share the same v_vnlock field as the vnode covered by * the unionfs mount. The covered vnode is locked across VFS_UNMOUNT(), * and the same lock may be taken recursively here during vflush() * issued by unionfs_unmount(). */ if ((flags & LK_TYPE_MASK) == LK_EXCLUSIVE && (vp->v_vflag & VV_ROOT) != 0) flags |= LK_CANRECURSE; - if (lvp != NULLVP) { - if (uvp != NULLVP && flags & LK_UPGRADE) { + vholdnz(tvp); + VI_UNLOCK(vp); + error = VOP_LOCK(tvp, flags); + vdrop(tvp); + if (error == 0 && (lvp_locked || VTOUNIONFS(vp) == NULL)) { + /* + * After dropping the interlock above, there exists a window + * in which another thread may acquire the lower vnode lock + * and then either doom the unionfs vnode or create an upper + * vnode. In either case, we will effectively be holding the + * wrong lock, so we must drop the lower vnode lock and + * restart the lock operation. + * + * If unp is not already NULL, we assume that we can safely + * access it because we currently hold lvp's lock. + * unionfs_noderem() acquires lvp's lock before freeing + * the vnode private data, ensuring it can't be concurrently + * freed while we are using it here. Likewise, + * unionfs_node_update() acquires lvp's lock before installing + * an upper vnode. Without those guarantees, we would need to + * reacquire the vnode interlock here. + * Note that unionfs_noderem() doesn't acquire lvp's lock if + * this is the root vnode, but the root vnode should always + * have an upper vnode and therefore we should never use its + * lower vnode lock here. + */ + unp = VTOUNIONFS(vp); + if (unp == NULL || unp->un_uppervp != NULLVP) { + VOP_UNLOCK(tvp); /* - * Share Lock is once released and a deadlock is - * avoided. + * If we previously held the lock, the upgrade may + * have temporarily dropped the lock, in which case + * concurrent dooming or copy-up will necessitate + * acquiring a different lock. Since we never held + * the new lock, LK_UPGRADE must be cleared here to + * avoid triggering a lockmgr panic. */ - vholdnz(uvp); - uhold = 1; - VOP_UNLOCK(uvp); - } - VI_LOCK_FLAGS(lvp, MTX_DUPOK); - flags |= LK_INTERLOCK; - vholdl(lvp); - - VI_UNLOCK(vp); - ap->a_flags &= ~LK_INTERLOCK; - - error = VOP_LOCK(lvp, flags); - - VI_LOCK(vp); - unp = VTOUNIONFS(vp); - if (unp == NULL) { - /* vnode is released. */ - VI_UNLOCK(vp); - if (error == 0) - VOP_UNLOCK(lvp); - vdrop(lvp); - if (uhold != 0) - vdrop(uvp); - goto unionfs_lock_fallback; + if (flags & LK_UPGRADE) + flags = (flags & ~LK_TYPE_MASK) | LK_EXCLUSIVE; + VNASSERT((flags & LK_DOWNGRADE) == 0, vp, + ("%s: vnode doomed during downgrade", __func__)); + goto unionfs_lock_restart; } } - if (error == 0 && uvp != NULLVP) { - if (uhold && flags & LK_UPGRADE) { - flags &= ~LK_TYPE_MASK; - flags |= LK_EXCLUSIVE; - } - VI_LOCK_FLAGS(uvp, MTX_DUPOK); - flags |= LK_INTERLOCK; - if (uhold == 0) { - vholdl(uvp); - uhold = 1; - } - - VI_UNLOCK(vp); - ap->a_flags &= ~LK_INTERLOCK; - - error = VOP_LOCK(uvp, flags); - - VI_LOCK(vp); - unp = VTOUNIONFS(vp); - if (unp == NULL) { - /* vnode is released. */ - VI_UNLOCK(vp); - if (error == 0) - VOP_UNLOCK(uvp); - vdrop(uvp); - if (lvp != NULLVP) { - VOP_UNLOCK(lvp); - vdrop(lvp); - } - goto unionfs_lock_fallback; - } - if (error != 0 && lvp != NULLVP) { - /* rollback */ - VI_UNLOCK(vp); - unionfs_revlock(lvp, revlock); - interlock = 0; - } - } - - if (interlock) - VI_UNLOCK(vp); - if (lvp != NULLVP) - vdrop(lvp); - if (uhold != 0) - vdrop(uvp); - return (error); - -unionfs_lock_null_vnode: - ap->a_flags |= LK_INTERLOCK; - return (vop_stdlock(ap)); - -unionfs_lock_fallback: - /* - * If we reach this point, we've discovered the unionfs vnode - * has been reclaimed while the upper/lower vnode locks were - * temporarily dropped. Such temporary droppage may happen - * during the course of an LK_UPGRADE operation itself, and in - * that case LK_UPGRADE must be cleared as the unionfs vnode's - * lock has been reset to point to the standard v_lock field, - * which has not previously been held. - */ - if (flags & LK_UPGRADE) { - ap->a_flags &= ~LK_TYPE_MASK; - ap->a_flags |= LK_EXCLUSIVE; - } - return (vop_stdlock(ap)); } static int unionfs_unlock(struct vop_unlock_args *ap) { struct vnode *vp; - struct vnode *lvp; - struct vnode *uvp; + struct vnode *tvp; struct unionfs_node *unp; int error; - int uhold; KASSERT_UNIONFS_VNODE(ap->a_vp); - error = 0; - uhold = 0; vp = ap->a_vp; unp = VTOUNIONFS(vp); if (unp == NULL) - goto unionfs_unlock_null_vnode; - lvp = unp->un_lowervp; - uvp = unp->un_uppervp; - - if (lvp != NULLVP) { - vholdnz(lvp); - error = VOP_UNLOCK(lvp); - } - - if (error == 0 && uvp != NULLVP) { - vholdnz(uvp); - uhold = 1; - error = VOP_UNLOCK(uvp); - } + return (vop_stdunlock(ap)); - if (lvp != NULLVP) - vdrop(lvp); - if (uhold != 0) - vdrop(uvp); + tvp = (unp->un_uppervp != NULL ? unp->un_uppervp : unp->un_lowervp); - return error; + vholdnz(tvp); + error = VOP_UNLOCK(tvp); + vdrop(tvp); -unionfs_unlock_null_vnode: - return (vop_stdunlock(ap)); + return (error); } static int unionfs_pathconf(struct vop_pathconf_args *ap) { struct unionfs_node *unp; struct vnode *vp; KASSERT_UNIONFS_VNODE(ap->a_vp); unp = VTOUNIONFS(ap->a_vp); vp = (unp->un_uppervp != NULLVP ? unp->un_uppervp : unp->un_lowervp); return (VOP_PATHCONF(vp, ap->a_name, ap->a_retval)); } static int unionfs_advlock(struct vop_advlock_args *ap) { struct unionfs_node *unp; struct unionfs_node_status *unsp; struct vnode *vp; struct vnode *uvp; struct thread *td; int error; UNIONFS_INTERNAL_DEBUG("unionfs_advlock: enter\n"); KASSERT_UNIONFS_VNODE(ap->a_vp); vp = ap->a_vp; td = curthread; vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); unp = VTOUNIONFS(ap->a_vp); uvp = unp->un_uppervp; if (uvp == NULLVP) { - error = unionfs_copyfile(unp, 1, td->td_ucred, td); + error = unionfs_copyfile(ap->a_vp, 1, td->td_ucred, td); if (error != 0) goto unionfs_advlock_abort; uvp = unp->un_uppervp; unionfs_get_node_status(unp, td, &unsp); if (unsp->uns_lower_opencnt > 0) { /* try reopen the vnode */ error = VOP_OPEN(uvp, unsp->uns_lower_openmode, td->td_ucred, td, NULL); if (error) goto unionfs_advlock_abort; unsp->uns_upper_opencnt++; VOP_CLOSE(unp->un_lowervp, unsp->uns_lower_openmode, td->td_ucred, td); unsp->uns_lower_opencnt--; } else unionfs_tryrem_node_status(unp, unsp); } VOP_UNLOCK(vp); error = VOP_ADVLOCK(uvp, ap->a_id, ap->a_op, ap->a_fl, ap->a_flags); UNIONFS_INTERNAL_DEBUG("unionfs_advlock: leave (%d)\n", error); return error; unionfs_advlock_abort: VOP_UNLOCK(vp); UNIONFS_INTERNAL_DEBUG("unionfs_advlock: leave (%d)\n", error); return error; } static int unionfs_strategy(struct vop_strategy_args *ap) { struct unionfs_node *unp; struct vnode *vp; KASSERT_UNIONFS_VNODE(ap->a_vp); unp = VTOUNIONFS(ap->a_vp); vp = (unp->un_uppervp != NULLVP ? unp->un_uppervp : unp->un_lowervp); #ifdef DIAGNOSTIC if (vp == NULLVP) panic("unionfs_strategy: nullvp"); if (ap->a_bp->b_iocmd == BIO_WRITE && vp == unp->un_lowervp) panic("unionfs_strategy: writing to lowervp"); #endif return (VOP_STRATEGY(vp, ap->a_bp)); } static int unionfs_getacl(struct vop_getacl_args *ap) { struct unionfs_node *unp; struct vnode *vp; int error; KASSERT_UNIONFS_VNODE(ap->a_vp); unp = VTOUNIONFS(ap->a_vp); vp = (unp->un_uppervp != NULLVP ? unp->un_uppervp : unp->un_lowervp); UNIONFS_INTERNAL_DEBUG("unionfs_getacl: enter\n"); error = VOP_GETACL(vp, ap->a_type, ap->a_aclp, ap->a_cred, ap->a_td); UNIONFS_INTERNAL_DEBUG("unionfs_getacl: leave (%d)\n", error); return (error); } static int unionfs_setacl(struct vop_setacl_args *ap) { struct unionfs_node *unp; struct vnode *uvp; struct vnode *lvp; struct thread *td; int error; UNIONFS_INTERNAL_DEBUG("unionfs_setacl: enter\n"); KASSERT_UNIONFS_VNODE(ap->a_vp); error = EROFS; unp = VTOUNIONFS(ap->a_vp); uvp = unp->un_uppervp; lvp = unp->un_lowervp; td = ap->a_td; if (ap->a_vp->v_mount->mnt_flag & MNT_RDONLY) return (EROFS); if (uvp == NULLVP && lvp->v_type == VREG) { - if ((error = unionfs_copyfile(unp, 1, ap->a_cred, td)) != 0) + if ((error = unionfs_copyfile(ap->a_vp, 1, ap->a_cred, td)) != 0) return (error); uvp = unp->un_uppervp; } if (uvp != NULLVP) { int lkflags; unionfs_forward_vop_start(uvp, &lkflags); error = VOP_SETACL(uvp, ap->a_type, ap->a_aclp, ap->a_cred, td); unionfs_forward_vop_finish(ap->a_vp, uvp, lkflags); } UNIONFS_INTERNAL_DEBUG("unionfs_setacl: leave (%d)\n", error); return (error); } static int unionfs_aclcheck(struct vop_aclcheck_args *ap) { struct unionfs_node *unp; struct vnode *vp; int error; UNIONFS_INTERNAL_DEBUG("unionfs_aclcheck: enter\n"); KASSERT_UNIONFS_VNODE(ap->a_vp); unp = VTOUNIONFS(ap->a_vp); vp = (unp->un_uppervp != NULLVP ? unp->un_uppervp : unp->un_lowervp); error = VOP_ACLCHECK(vp, ap->a_type, ap->a_aclp, ap->a_cred, ap->a_td); UNIONFS_INTERNAL_DEBUG("unionfs_aclcheck: leave (%d)\n", error); return (error); } static int unionfs_openextattr(struct vop_openextattr_args *ap) { struct unionfs_node *unp; struct vnode *vp; struct vnode *tvp; int error; KASSERT_UNIONFS_VNODE(ap->a_vp); vp = ap->a_vp; unp = VTOUNIONFS(vp); tvp = (unp->un_uppervp != NULLVP ? unp->un_uppervp : unp->un_lowervp); if ((tvp == unp->un_uppervp && (unp->un_flag & UNIONFS_OPENEXTU)) || (tvp == unp->un_lowervp && (unp->un_flag & UNIONFS_OPENEXTL))) return (EBUSY); error = VOP_OPENEXTATTR(tvp, ap->a_cred, ap->a_td); if (error == 0) { if (vn_lock(vp, LK_UPGRADE) != 0) vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); if (!VN_IS_DOOMED(vp)) { if (tvp == unp->un_uppervp) unp->un_flag |= UNIONFS_OPENEXTU; else unp->un_flag |= UNIONFS_OPENEXTL; } vn_lock(vp, LK_DOWNGRADE | LK_RETRY); } return (error); } static int unionfs_closeextattr(struct vop_closeextattr_args *ap) { struct unionfs_node *unp; struct vnode *vp; struct vnode *tvp; int error; KASSERT_UNIONFS_VNODE(ap->a_vp); vp = ap->a_vp; unp = VTOUNIONFS(vp); tvp = NULLVP; if (unp->un_flag & UNIONFS_OPENEXTU) tvp = unp->un_uppervp; else if (unp->un_flag & UNIONFS_OPENEXTL) tvp = unp->un_lowervp; if (tvp == NULLVP) return (EOPNOTSUPP); error = VOP_CLOSEEXTATTR(tvp, ap->a_commit, ap->a_cred, ap->a_td); if (error == 0) { if (vn_lock(vp, LK_UPGRADE) != 0) vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); if (!VN_IS_DOOMED(vp)) { if (tvp == unp->un_uppervp) unp->un_flag &= ~UNIONFS_OPENEXTU; else unp->un_flag &= ~UNIONFS_OPENEXTL; } vn_lock(vp, LK_DOWNGRADE | LK_RETRY); } return (error); } static int unionfs_getextattr(struct vop_getextattr_args *ap) { struct unionfs_node *unp; struct vnode *vp; KASSERT_UNIONFS_VNODE(ap->a_vp); unp = VTOUNIONFS(ap->a_vp); vp = NULLVP; if (unp->un_flag & UNIONFS_OPENEXTU) vp = unp->un_uppervp; else if (unp->un_flag & UNIONFS_OPENEXTL) vp = unp->un_lowervp; if (vp == NULLVP) return (EOPNOTSUPP); return (VOP_GETEXTATTR(vp, ap->a_attrnamespace, ap->a_name, ap->a_uio, ap->a_size, ap->a_cred, ap->a_td)); } static int unionfs_setextattr(struct vop_setextattr_args *ap) { struct unionfs_node *unp; struct vnode *uvp; struct vnode *lvp; struct vnode *ovp; struct ucred *cred; struct thread *td; int error; KASSERT_UNIONFS_VNODE(ap->a_vp); error = EROFS; unp = VTOUNIONFS(ap->a_vp); uvp = unp->un_uppervp; lvp = unp->un_lowervp; ovp = NULLVP; cred = ap->a_cred; td = ap->a_td; UNIONFS_INTERNAL_DEBUG("unionfs_setextattr: enter (un_flag=%x)\n", unp->un_flag); if (ap->a_vp->v_mount->mnt_flag & MNT_RDONLY) return (EROFS); if (unp->un_flag & UNIONFS_OPENEXTU) ovp = unp->un_uppervp; else if (unp->un_flag & UNIONFS_OPENEXTL) ovp = unp->un_lowervp; if (ovp == NULLVP) return (EOPNOTSUPP); if (ovp == lvp && lvp->v_type == VREG) { VOP_CLOSEEXTATTR(lvp, 0, cred, td); if (uvp == NULLVP && - (error = unionfs_copyfile(unp, 1, cred, td)) != 0) { + (error = unionfs_copyfile(ap->a_vp, 1, cred, td)) != 0) { unionfs_setextattr_reopen: - if ((unp->un_flag & UNIONFS_OPENEXTL) && + unp = VTOUNIONFS(ap->a_vp); + if (unp != NULL && (unp->un_flag & UNIONFS_OPENEXTL) && VOP_OPENEXTATTR(lvp, cred, td)) { #ifdef DIAGNOSTIC panic("unionfs: VOP_OPENEXTATTR failed"); #endif unp->un_flag &= ~UNIONFS_OPENEXTL; } goto unionfs_setextattr_abort; } uvp = unp->un_uppervp; if ((error = VOP_OPENEXTATTR(uvp, cred, td)) != 0) goto unionfs_setextattr_reopen; unp->un_flag &= ~UNIONFS_OPENEXTL; unp->un_flag |= UNIONFS_OPENEXTU; ovp = uvp; } if (ovp == uvp) { int lkflags; unionfs_forward_vop_start(ovp, &lkflags); error = VOP_SETEXTATTR(ovp, ap->a_attrnamespace, ap->a_name, ap->a_uio, cred, td); unionfs_forward_vop_finish(ap->a_vp, ovp, lkflags); } unionfs_setextattr_abort: UNIONFS_INTERNAL_DEBUG("unionfs_setextattr: leave (%d)\n", error); return (error); } static int unionfs_listextattr(struct vop_listextattr_args *ap) { struct unionfs_node *unp; struct vnode *vp; KASSERT_UNIONFS_VNODE(ap->a_vp); unp = VTOUNIONFS(ap->a_vp); vp = NULLVP; if (unp->un_flag & UNIONFS_OPENEXTU) vp = unp->un_uppervp; else if (unp->un_flag & UNIONFS_OPENEXTL) vp = unp->un_lowervp; if (vp == NULLVP) return (EOPNOTSUPP); return (VOP_LISTEXTATTR(vp, ap->a_attrnamespace, ap->a_uio, ap->a_size, ap->a_cred, ap->a_td)); } static int unionfs_deleteextattr(struct vop_deleteextattr_args *ap) { struct unionfs_node *unp; struct vnode *uvp; struct vnode *lvp; struct vnode *ovp; struct ucred *cred; struct thread *td; int error; KASSERT_UNIONFS_VNODE(ap->a_vp); error = EROFS; unp = VTOUNIONFS(ap->a_vp); uvp = unp->un_uppervp; lvp = unp->un_lowervp; ovp = NULLVP; cred = ap->a_cred; td = ap->a_td; UNIONFS_INTERNAL_DEBUG("unionfs_deleteextattr: enter (un_flag=%x)\n", unp->un_flag); if (ap->a_vp->v_mount->mnt_flag & MNT_RDONLY) return (EROFS); if (unp->un_flag & UNIONFS_OPENEXTU) ovp = unp->un_uppervp; else if (unp->un_flag & UNIONFS_OPENEXTL) ovp = unp->un_lowervp; if (ovp == NULLVP) return (EOPNOTSUPP); if (ovp == lvp && lvp->v_type == VREG) { VOP_CLOSEEXTATTR(lvp, 0, cred, td); if (uvp == NULLVP && - (error = unionfs_copyfile(unp, 1, cred, td)) != 0) { + (error = unionfs_copyfile(ap->a_vp, 1, cred, td)) != 0) { unionfs_deleteextattr_reopen: - if ((unp->un_flag & UNIONFS_OPENEXTL) && + unp = VTOUNIONFS(ap->a_vp); + if (unp != NULL && (unp->un_flag & UNIONFS_OPENEXTL) && VOP_OPENEXTATTR(lvp, cred, td)) { #ifdef DIAGNOSTIC panic("unionfs: VOP_OPENEXTATTR failed"); #endif unp->un_flag &= ~UNIONFS_OPENEXTL; } goto unionfs_deleteextattr_abort; } uvp = unp->un_uppervp; if ((error = VOP_OPENEXTATTR(uvp, cred, td)) != 0) goto unionfs_deleteextattr_reopen; unp->un_flag &= ~UNIONFS_OPENEXTL; unp->un_flag |= UNIONFS_OPENEXTU; ovp = uvp; } if (ovp == uvp) error = VOP_DELETEEXTATTR(ovp, ap->a_attrnamespace, ap->a_name, ap->a_cred, ap->a_td); unionfs_deleteextattr_abort: UNIONFS_INTERNAL_DEBUG("unionfs_deleteextattr: leave (%d)\n", error); return (error); } static int unionfs_setlabel(struct vop_setlabel_args *ap) { struct unionfs_node *unp; struct vnode *uvp; struct vnode *lvp; struct thread *td; int error; UNIONFS_INTERNAL_DEBUG("unionfs_setlabel: enter\n"); KASSERT_UNIONFS_VNODE(ap->a_vp); error = EROFS; unp = VTOUNIONFS(ap->a_vp); uvp = unp->un_uppervp; lvp = unp->un_lowervp; td = ap->a_td; if (ap->a_vp->v_mount->mnt_flag & MNT_RDONLY) return (EROFS); if (uvp == NULLVP && lvp->v_type == VREG) { - if ((error = unionfs_copyfile(unp, 1, ap->a_cred, td)) != 0) + if ((error = unionfs_copyfile(ap->a_vp, 1, ap->a_cred, td)) != 0) return (error); uvp = unp->un_uppervp; } if (uvp != NULLVP) error = VOP_SETLABEL(uvp, ap->a_label, ap->a_cred, td); UNIONFS_INTERNAL_DEBUG("unionfs_setlabel: leave (%d)\n", error); return (error); } static int unionfs_vptofh(struct vop_vptofh_args *ap) { return (EOPNOTSUPP); } static int unionfs_add_writecount(struct vop_add_writecount_args *ap) { struct vnode *tvp, *vp; struct unionfs_node *unp; int error, writerefs __diagused; vp = ap->a_vp; unp = VTOUNIONFS(vp); tvp = unp->un_uppervp; KASSERT(tvp != NULL, ("%s: adding write ref without upper vnode", __func__)); error = VOP_ADD_WRITECOUNT(tvp, ap->a_inc); if (error != 0) return (error); /* * We need to track the write refs we've passed to the underlying * vnodes so that we can undo them in case we are forcibly unmounted. */ writerefs = atomic_fetchadd_int(&vp->v_writecount, ap->a_inc); /* text refs are bypassed to lowervp */ VNASSERT(writerefs >= 0, vp, ("%s: invalid write count %d", __func__, writerefs)); VNASSERT(writerefs + ap->a_inc >= 0, vp, ("%s: invalid write count inc %d + %d", __func__, writerefs, ap->a_inc)); return (0); } static int unionfs_vput_pair(struct vop_vput_pair_args *ap) { struct mount *mp; - struct vnode *dvp, *vp, **vpp, *lvp, *ldvp, *uvp, *udvp, *tempvp; + struct vnode *dvp, *vp, **vpp, *lvp, *uvp, *tvp, *tdvp, *tempvp; struct unionfs_node *dunp, *unp; int error, res; dvp = ap->a_dvp; vpp = ap->a_vpp; vp = NULLVP; lvp = NULLVP; uvp = NULLVP; + tvp = NULLVP; unp = NULL; dunp = VTOUNIONFS(dvp); - udvp = dunp->un_uppervp; - ldvp = dunp->un_lowervp; + if (dunp->un_uppervp != NULL) + tdvp = dunp->un_uppervp; + else + tdvp = dunp->un_lowervp; /* * Underlying vnodes should be locked because the encompassing unionfs * node is locked, but will not be referenced, as the reference will * only be on the unionfs node. Reference them now so that the vput()s * performed by VOP_VPUT_PAIR() will have a reference to drop. */ - if (udvp != NULLVP) - vref(udvp); - if (ldvp != NULLVP) - vref(ldvp); + vref(tdvp); if (vpp != NULL) vp = *vpp; if (vp != NULLVP) { unp = VTOUNIONFS(vp); uvp = unp->un_uppervp; lvp = unp->un_lowervp; if (uvp != NULLVP) - vref(uvp); - if (lvp != NULLVP) - vref(lvp); + tvp = uvp; + else + tvp = lvp; + vref(tvp); /* * If we're being asked to return a locked child vnode, then * we may need to create a replacement vnode in case the * original is reclaimed while the lock is dropped. In that * case we'll need to ensure the mount and the underlying * vnodes aren't also recycled during that window. */ if (!ap->a_unlock_vp) { vhold(vp); if (uvp != NULLVP) vhold(uvp); if (lvp != NULLVP) vhold(lvp); mp = vp->v_mount; vfs_ref(mp); } } - /* - * TODO: Because unionfs_lock() locks both the lower and upper vnodes - * (if available), we must also call VOP_VPUT_PAIR() on both the lower - * and upper parent/child pairs. If unionfs_lock() is reworked to lock - * only a single vnode, this code will need to change to also only - * operate on one vnode pair. - */ - ASSERT_VOP_LOCKED(ldvp, __func__); - ASSERT_VOP_LOCKED(udvp, __func__); - ASSERT_VOP_LOCKED(lvp, __func__); - ASSERT_VOP_LOCKED(uvp, __func__); - - KASSERT(lvp == NULLVP || ldvp != NULLVP, - ("%s: NULL ldvp with non-NULL lvp", __func__)); - if (ldvp != NULLVP) - res = VOP_VPUT_PAIR(ldvp, lvp != NULLVP ? &lvp : NULL, true); - KASSERT(uvp == NULLVP || udvp != NULLVP, - ("%s: NULL udvp with non-NULL uvp", __func__)); - if (udvp != NULLVP) - res = VOP_VPUT_PAIR(udvp, uvp != NULLVP ? &uvp : NULL, true); - - ASSERT_VOP_UNLOCKED(ldvp, __func__); - ASSERT_VOP_UNLOCKED(udvp, __func__); - ASSERT_VOP_UNLOCKED(lvp, __func__); - ASSERT_VOP_UNLOCKED(uvp, __func__); + ASSERT_VOP_LOCKED(tdvp, __func__); + ASSERT_VOP_LOCKED(tvp, __func__); + + if (tdvp == dunp->un_uppervp && tvp != NULLVP && tvp == lvp) { + vput(tvp); + vput(tdvp); + res = 0; + } else { + res = VOP_VPUT_PAIR(tdvp, tvp != NULLVP ? &tvp : NULL, true); + } + + ASSERT_VOP_UNLOCKED(tdvp, __func__); + ASSERT_VOP_UNLOCKED(tvp, __func__); /* * VOP_VPUT_PAIR() dropped the references we added to the underlying * vnodes, now drop the caller's reference to the unionfs vnodes. */ if (vp != NULLVP && ap->a_unlock_vp) vrele(vp); vrele(dvp); if (vp == NULLVP || ap->a_unlock_vp) return (res); /* * We're being asked to return a locked vnode. At this point, the * underlying vnodes have been unlocked, so vp may have been reclaimed. */ vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); if (vp->v_data == NULL && vfs_busy(mp, MBF_NOWAIT) == 0) { vput(vp); error = unionfs_nodeget(mp, uvp, lvp, dvp, &tempvp, NULL); if (error == 0) { vn_lock(tempvp, LK_EXCLUSIVE | LK_RETRY); *vpp = tempvp; } else vget(vp, LK_EXCLUSIVE | LK_RETRY); vfs_unbusy(mp); } if (lvp != NULLVP) vdrop(lvp); if (uvp != NULLVP) vdrop(uvp); vdrop(vp); vfs_rel(mp); return (res); } static int unionfs_set_text(struct vop_set_text_args *ap) { struct vnode *tvp; struct unionfs_node *unp; int error; /* * We assume text refs are managed against lvp/uvp through the * executable mapping backed by its VM object. We therefore don't * need to track leased text refs in the case of a forcible unmount. */ unp = VTOUNIONFS(ap->a_vp); ASSERT_VOP_LOCKED(ap->a_vp, __func__); tvp = unp->un_uppervp != NULL ? unp->un_uppervp : unp->un_lowervp; error = VOP_SET_TEXT(tvp); return (error); } static int unionfs_unset_text(struct vop_unset_text_args *ap) { struct vnode *tvp; struct unionfs_node *unp; ASSERT_VOP_LOCKED(ap->a_vp, __func__); unp = VTOUNIONFS(ap->a_vp); tvp = unp->un_uppervp != NULL ? unp->un_uppervp : unp->un_lowervp; VOP_UNSET_TEXT_CHECKED(tvp); return (0); } static int unionfs_unp_bind(struct vop_unp_bind_args *ap) { struct vnode *tvp; struct unionfs_node *unp; ASSERT_VOP_LOCKED(ap->a_vp, __func__); unp = VTOUNIONFS(ap->a_vp); tvp = unp->un_uppervp != NULL ? unp->un_uppervp : unp->un_lowervp; VOP_UNP_BIND(tvp, ap->a_unpcb); return (0); } static int unionfs_unp_connect(struct vop_unp_connect_args *ap) { struct vnode *tvp; struct unionfs_node *unp; ASSERT_VOP_LOCKED(ap->a_vp, __func__); unp = VTOUNIONFS(ap->a_vp); tvp = unp->un_uppervp != NULL ? unp->un_uppervp : unp->un_lowervp; VOP_UNP_CONNECT(tvp, ap->a_unpcb); return (0); } static int unionfs_unp_detach(struct vop_unp_detach_args *ap) { struct vnode *tvp; struct unionfs_node *unp; tvp = NULL; /* * VOP_UNP_DETACH() is not guaranteed to be called with the unionfs * vnode locked, so we take the interlock to prevent a concurrent * unmount from freeing the unionfs private data. */ VI_LOCK(ap->a_vp); unp = VTOUNIONFS(ap->a_vp); if (unp != NULL) { tvp = unp->un_uppervp != NULL ? unp->un_uppervp : unp->un_lowervp; /* * Hold the target vnode to prevent a concurrent unionfs * unmount from causing it to be recycled once the interlock * is dropped. */ vholdnz(tvp); } VI_UNLOCK(ap->a_vp); if (tvp != NULL) { VOP_UNP_DETACH(tvp); vdrop(tvp); } return (0); } struct vop_vector unionfs_vnodeops = { .vop_default = &default_vnodeops, .vop_access = unionfs_access, .vop_aclcheck = unionfs_aclcheck, .vop_advlock = unionfs_advlock, .vop_bmap = VOP_EOPNOTSUPP, .vop_cachedlookup = unionfs_lookup, .vop_close = unionfs_close, .vop_closeextattr = unionfs_closeextattr, .vop_create = unionfs_create, .vop_deleteextattr = unionfs_deleteextattr, .vop_fsync = unionfs_fsync, .vop_getacl = unionfs_getacl, .vop_getattr = unionfs_getattr, .vop_getextattr = unionfs_getextattr, .vop_getwritemount = unionfs_getwritemount, .vop_inactive = unionfs_inactive, .vop_need_inactive = vop_stdneed_inactive, .vop_islocked = vop_stdislocked, .vop_ioctl = unionfs_ioctl, .vop_link = unionfs_link, .vop_listextattr = unionfs_listextattr, .vop_lock1 = unionfs_lock, .vop_lookup = vfs_cache_lookup, .vop_mkdir = unionfs_mkdir, .vop_mknod = unionfs_mknod, .vop_open = unionfs_open, .vop_openextattr = unionfs_openextattr, .vop_pathconf = unionfs_pathconf, .vop_poll = unionfs_poll, .vop_print = unionfs_print, .vop_read = unionfs_read, .vop_readdir = unionfs_readdir, .vop_readlink = unionfs_readlink, .vop_reclaim = unionfs_reclaim, .vop_remove = unionfs_remove, .vop_rename = unionfs_rename, .vop_rmdir = unionfs_rmdir, .vop_setacl = unionfs_setacl, .vop_setattr = unionfs_setattr, .vop_setextattr = unionfs_setextattr, .vop_setlabel = unionfs_setlabel, .vop_strategy = unionfs_strategy, .vop_symlink = unionfs_symlink, .vop_unlock = unionfs_unlock, .vop_whiteout = unionfs_whiteout, .vop_write = unionfs_write, .vop_vptofh = unionfs_vptofh, .vop_add_writecount = unionfs_add_writecount, .vop_vput_pair = unionfs_vput_pair, .vop_set_text = unionfs_set_text, .vop_unset_text = unionfs_unset_text, .vop_unp_bind = unionfs_unp_bind, .vop_unp_connect = unionfs_unp_connect, .vop_unp_detach = unionfs_unp_detach, }; VFS_VOP_VECTOR_REGISTER(unionfs_vnodeops);