diff --git a/sys/fs/unionfs/union.h b/sys/fs/unionfs/union.h index 58afd2148d08..caca116a2dde 100644 --- a/sys/fs/unionfs/union.h +++ b/sys/fs/unionfs/union.h @@ -1,143 +1,144 @@ /* * Copyright (c) 1994 The Regents of the University of California. * Copyright (c) 1994 Jan-Simon Pendry. * All rights reserved. * * This code is derived from software donated to Berkeley by * Jan-Simon Pendry. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. All advertising materials mentioning features or use of this software * must display the following acknowledgement: * This product includes software developed by the University of * California, Berkeley and its contributors. * 4. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * @(#)union.h 8.9 (Berkeley) 12/10/94 * $FreeBSD$ */ #define UNMNT_ABOVE 0x0001 /* Target appears above mount point */ #define UNMNT_BELOW 0x0002 /* Target appears below mount point */ #define UNMNT_REPLACE 0x0003 /* Target replaces mount point */ struct union_mount { struct vnode *um_uppervp; /* UN_ULOCK holds locking state */ struct vnode *um_lowervp; /* Left unlocked */ struct ucred *um_cred; /* Credentials of user calling mount */ int um_cmode; /* cmask from mount process */ int um_op; /* Operation mode */ }; #ifdef _KERNEL #ifndef DIAGNOSTIC #define DIAGNOSTIC #endif /* * DEFDIRMODE is the mode bits used to create a shadow directory. */ #define VRWXMODE (VREAD|VWRITE|VEXEC) #define VRWMODE (VREAD|VWRITE) #define UN_DIRMODE ((VRWXMODE)|(VRWXMODE>>3)|(VRWXMODE>>6)) #define UN_FILEMODE ((VRWMODE)|(VRWMODE>>3)|(VRWMODE>>6)) /* * A cache of vnode references (hangs off v_data) */ struct union_node { LIST_ENTRY(union_node) un_cache; /* Hash chain */ struct vnode *un_vnode; /* Back pointer */ struct vnode *un_uppervp; /* overlaying object */ struct vnode *un_lowervp; /* underlying object */ struct vnode *un_dirvp; /* Parent dir of uppervp */ struct vnode *un_pvp; /* Parent vnode */ char *un_path; /* saved component name */ int un_openl; /* # of opens on lowervp */ int un_exclcnt; /* exclusive count */ unsigned int un_flags; struct vnode **un_dircache; /* cached union stack */ off_t un_uppersz; /* size of upper object */ off_t un_lowersz; /* size of lower object */ #ifdef DIAGNOSTIC pid_t un_pid; #endif }; /* * XXX UN_ULOCK - indicates that the uppervp is locked * * UN_CACHED - node is in the union cache */ /*#define UN_ULOCK 0x04*/ /* Upper node is locked */ #define UN_CACHED 0x10 /* In union cache */ /* * Hash table locking flags */ #define UNVP_WANT 0x01 #define UNVP_LOCKED 0x02 extern int union_allocvp(struct vnode **, struct mount *, struct vnode *, struct vnode *, struct componentname *, struct vnode *, struct vnode *, int); extern int union_freevp(struct vnode *); -extern struct vnode *union_dircache(struct vnode *, struct thread *); +extern struct vnode *union_dircache_get(struct vnode *, struct thread *); +extern void union_dircache_free(struct union_node *); extern int union_copyup(struct union_node *, int, struct ucred *, struct thread *); extern int union_dowhiteout(struct union_node *, struct ucred *, struct thread *); extern int union_mkshadow(struct union_mount *, struct vnode *, struct componentname *, struct vnode **); extern int union_mkwhiteout(struct union_mount *, struct vnode *, struct componentname *, char *); extern int union_cn_close(struct vnode *, int, struct ucred *, struct thread *); extern void union_removed_upper(struct union_node *un); extern struct vnode *union_lowervp(struct vnode *); extern void union_newsize(struct vnode *, off_t, off_t); extern int (*union_dircheckp)(struct thread *, struct vnode **, struct file *); #define MOUNTTOUNIONMOUNT(mp) ((struct union_mount *)((mp)->mnt_data)) #define VTOUNION(vp) ((struct union_node *)(vp)->v_data) #define UNIONTOV(un) ((un)->un_vnode) #define LOWERVP(vp) (VTOUNION(vp)->un_lowervp) #define UPPERVP(vp) (VTOUNION(vp)->un_uppervp) #define OTHERVP(vp) (UPPERVP(vp) ? UPPERVP(vp) : LOWERVP(vp)) #define UDEBUG(x) if (uniondebug) printf x #define UDEBUG_ENABLED 1 extern vop_t **union_vnodeop_p; extern struct vfsops union_vfsops; extern int uniondebug; #endif /* _KERNEL */ diff --git a/sys/fs/unionfs/union_subr.c b/sys/fs/unionfs/union_subr.c index 077726834e6f..ae0323aa4d16 100644 --- a/sys/fs/unionfs/union_subr.c +++ b/sys/fs/unionfs/union_subr.c @@ -1,1379 +1,1384 @@ /* * Copyright (c) 1994 Jan-Simon Pendry * Copyright (c) 1994 * The Regents of the University of California. All rights reserved. * * This code is derived from software contributed to Berkeley by * Jan-Simon Pendry. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. All advertising materials mentioning features or use of this software * must display the following acknowledgement: * This product includes software developed by the University of * California, Berkeley and its contributors. * 4. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * @(#)union_subr.c 8.20 (Berkeley) 5/20/95 * $FreeBSD$ */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include /* for vnode_pager_setsize */ #include /* for vm cache coherency */ #include #include #include extern int union_init(void); /* must be power of two, otherwise change UNION_HASH() */ #define NHASH 32 /* unsigned int ... */ #define UNION_HASH(u, l) \ (((((uintptr_t) (u)) + ((uintptr_t) l)) >> 8) & (NHASH-1)) static LIST_HEAD(unhead, union_node) unhead[NHASH]; static int unvplock[NHASH]; static void union_dircache_r(struct vnode *vp, struct vnode ***vppp, int *cntp); static int union_list_lock(int ix); static void union_list_unlock(int ix); static int union_relookup(struct union_mount *um, struct vnode *dvp, struct vnode **vpp, struct componentname *cnp, struct componentname *cn, char *path, int pathlen); static void union_updatevp(struct union_node *un, struct vnode *uppervp, struct vnode *lowervp); static void union_newlower(struct union_node *, struct vnode *); static void union_newupper(struct union_node *, struct vnode *); static int union_copyfile(struct vnode *, struct vnode *, struct ucred *, struct thread *); static int union_vn_create(struct vnode **, struct union_node *, struct thread *); static int union_vn_close(struct vnode *, int, struct ucred *, struct thread *); int union_init() { int i; for (i = 0; i < NHASH; i++) LIST_INIT(&unhead[i]); bzero((caddr_t)unvplock, sizeof(unvplock)); return (0); } static int union_list_lock(ix) int ix; { if (unvplock[ix] & UNVP_LOCKED) { unvplock[ix] |= UNVP_WANT; (void) tsleep( &unvplock[ix], PINOD, "unllck", 0); return (1); } unvplock[ix] |= UNVP_LOCKED; return (0); } static void union_list_unlock(ix) int ix; { unvplock[ix] &= ~UNVP_LOCKED; if (unvplock[ix] & UNVP_WANT) { unvplock[ix] &= ~UNVP_WANT; wakeup( &unvplock[ix]); } } /* * union_updatevp: * * The uppervp, if not NULL, must be referenced and not locked by us * The lowervp, if not NULL, must be referenced. * * If uppervp and lowervp match pointers already installed, then * nothing happens. The passed vp's (when matching) are not adjusted. * * This routine may only be called by union_newupper() and * union_newlower(). */ static void union_updatevp(un, uppervp, lowervp) struct union_node *un; struct vnode *uppervp; struct vnode *lowervp; { int ohash = UNION_HASH(un->un_uppervp, un->un_lowervp); int nhash = UNION_HASH(uppervp, lowervp); int docache = (lowervp != NULLVP || uppervp != NULLVP); int lhash, uhash; /* * Ensure locking is ordered from lower to higher * to avoid deadlocks. */ if (nhash < ohash) { lhash = nhash; uhash = ohash; } else { lhash = ohash; uhash = nhash; } if (lhash != uhash) { while (union_list_lock(lhash)) continue; } while (union_list_lock(uhash)) continue; if (ohash != nhash || !docache) { if (un->un_flags & UN_CACHED) { un->un_flags &= ~UN_CACHED; LIST_REMOVE(un, un_cache); } } if (ohash != nhash) union_list_unlock(ohash); if (un->un_lowervp != lowervp) { if (un->un_lowervp) { vrele(un->un_lowervp); if (un->un_path) { free(un->un_path, M_TEMP); un->un_path = 0; } } un->un_lowervp = lowervp; un->un_lowersz = VNOVAL; } if (un->un_uppervp != uppervp) { if (un->un_uppervp) vrele(un->un_uppervp); un->un_uppervp = uppervp; un->un_uppersz = VNOVAL; } if (docache && (ohash != nhash)) { LIST_INSERT_HEAD(&unhead[nhash], un, un_cache); un->un_flags |= UN_CACHED; } union_list_unlock(nhash); } /* * Set a new lowervp. The passed lowervp must be referenced and will be * stored in the vp in a referenced state. */ static void union_newlower(un, lowervp) struct union_node *un; struct vnode *lowervp; { union_updatevp(un, un->un_uppervp, lowervp); } /* * Set a new uppervp. The passed uppervp must be locked and will be * stored in the vp in a locked state. The caller should not unlock * uppervp. */ static void union_newupper(un, uppervp) struct union_node *un; struct vnode *uppervp; { union_updatevp(un, uppervp, un->un_lowervp); } /* * Keep track of size changes in the underlying vnodes. * If the size changes, then callback to the vm layer * giving priority to the upper layer size. */ void union_newsize(vp, uppersz, lowersz) struct vnode *vp; off_t uppersz, lowersz; { struct union_node *un; off_t sz; /* only interested in regular files */ if (vp->v_type != VREG) return; un = VTOUNION(vp); sz = VNOVAL; if ((uppersz != VNOVAL) && (un->un_uppersz != uppersz)) { un->un_uppersz = uppersz; if (sz == VNOVAL) sz = un->un_uppersz; } if ((lowersz != VNOVAL) && (un->un_lowersz != lowersz)) { un->un_lowersz = lowersz; if (sz == VNOVAL) sz = un->un_lowersz; } if (sz != VNOVAL) { UDEBUG(("union: %s size now %ld\n", (uppersz != VNOVAL ? "upper" : "lower"), (long)sz)); /* * There is no need to change size of non-existent object. */ /* vnode_pager_setsize(vp, sz); */ } } /* * union_allocvp: allocate a union_node and associate it with a * parent union_node and one or two vnodes. * * vpp Holds the returned vnode locked and referenced if no * error occurs. * * mp Holds the mount point. mp may or may not be busied. * allocvp() makes no changes to mp. * * dvp Holds the parent union_node to the one we wish to create. * XXX may only be used to traverse an uncopied lowervp-based * tree? XXX * * dvp may or may not be locked. allocvp() makes no changes * to dvp. * * upperdvp Holds the parent vnode to uppervp, generally used along * with path component information to create a shadow of * lowervp when uppervp does not exist. * * upperdvp is referenced but unlocked on entry, and will be * dereferenced on return. * * uppervp Holds the new uppervp vnode to be stored in the * union_node we are allocating. uppervp is referenced but * not locked, and will be dereferenced on return. * * lowervp Holds the new lowervp vnode to be stored in the * union_node we are allocating. lowervp is referenced but * not locked, and will be dereferenced on return. * * cnp Holds path component information to be coupled with * lowervp and upperdvp to allow unionfs to create an uppervp * later on. Only used if lowervp is valid. The contents * of cnp is only valid for the duration of the call. * * docache Determine whether this node should be entered in the * cache or whether it should be destroyed as soon as possible. * * All union_nodes are maintained on a singly-linked * list. New nodes are only allocated when they cannot * be found on this list. Entries on the list are * removed when the vfs reclaim entry is called. * * A single lock is kept for the entire list. This is * needed because the getnewvnode() function can block * waiting for a vnode to become free, in which case there * may be more than one process trying to get the same * vnode. This lock is only taken if we are going to * call getnewvnode(), since the kernel itself is single-threaded. * * If an entry is found on the list, then call vget() to * take a reference. This is done because there may be * zero references to it and so it needs to removed from * the vnode free list. */ int union_allocvp(vpp, mp, dvp, upperdvp, cnp, uppervp, lowervp, docache) struct vnode **vpp; struct mount *mp; struct vnode *dvp; /* parent union vnode */ struct vnode *upperdvp; /* parent vnode of uppervp */ struct componentname *cnp; /* may be null */ struct vnode *uppervp; /* may be null */ struct vnode *lowervp; /* may be null */ int docache; { int error; struct union_node *un = 0; struct union_mount *um = MOUNTTOUNIONMOUNT(mp); struct thread *td = (cnp) ? cnp->cn_thread : curthread; int hash = 0; int vflag; int try; if (uppervp == NULLVP && lowervp == NULLVP) panic("union: unidentifiable allocation"); if (uppervp && lowervp && (uppervp->v_type != lowervp->v_type)) { vrele(lowervp); lowervp = NULLVP; } /* detect the root vnode (and aliases) */ vflag = 0; if ((uppervp == um->um_uppervp) && ((lowervp == NULLVP) || lowervp == um->um_lowervp)) { if (lowervp == NULLVP) { lowervp = um->um_lowervp; if (lowervp != NULLVP) VREF(lowervp); } vflag = VV_ROOT; } loop: if (!docache) { un = 0; } else for (try = 0; try < 3; try++) { switch (try) { case 0: if (lowervp == NULLVP) continue; hash = UNION_HASH(uppervp, lowervp); break; case 1: if (uppervp == NULLVP) continue; hash = UNION_HASH(uppervp, NULLVP); break; case 2: if (lowervp == NULLVP) continue; hash = UNION_HASH(NULLVP, lowervp); break; } while (union_list_lock(hash)) continue; LIST_FOREACH(un, &unhead[hash], un_cache) { if ((un->un_lowervp == lowervp || un->un_lowervp == NULLVP) && (un->un_uppervp == uppervp || un->un_uppervp == NULLVP) && (UNIONTOV(un)->v_mount == mp)) { if (vget(UNIONTOV(un), 0, cnp ? cnp->cn_thread : NULL)) { union_list_unlock(hash); goto loop; } break; } } union_list_unlock(hash); if (un) break; } if (un) { /* * Obtain a lock on the union_node. Everything is unlocked * except for dvp, so check that case. If they match, our * new un is already locked. Otherwise we have to lock our * new un. * * A potential deadlock situation occurs when we are holding * one lock while trying to get another. We must follow * strict ordering rules to avoid it. We try to locate dvp * by scanning up from un_vnode, since the most likely * scenario is un being under dvp. */ if (dvp && un->un_vnode != dvp) { struct vnode *scan = un->un_vnode; do { scan = VTOUNION(scan)->un_pvp; } while (scan && scan->v_op == union_vnodeop_p && scan != dvp); if (scan != dvp) { /* * our new un is above dvp (we never saw dvp * while moving up the tree). */ VREF(dvp); VOP_UNLOCK(dvp, 0, td); error = vn_lock(un->un_vnode, LK_EXCLUSIVE, td); vn_lock(dvp, LK_EXCLUSIVE | LK_RETRY, td); vrele(dvp); } else { /* * our new un is under dvp */ error = vn_lock(un->un_vnode, LK_EXCLUSIVE, td); } } else if (dvp == NULLVP) { /* * dvp is NULL, we need to lock un. */ error = vn_lock(un->un_vnode, LK_EXCLUSIVE, td); } else { /* * dvp == un->un_vnode, we are already locked. */ error = 0; } if (error) goto loop; /* * At this point, the union_node is locked and referenced. * * uppervp is locked and referenced or NULL, lowervp is * referenced or NULL. */ UDEBUG(("Modify existing un %p vn %p upper %p(refs %d) -> %p(refs %d)\n", un, un->un_vnode, un->un_uppervp, (un->un_uppervp ? vrefcnt(un->un_uppervp) : -99), uppervp, (uppervp ? vrefcnt(uppervp) : -99) )); if (uppervp != un->un_uppervp) { KASSERT(uppervp == NULL || vrefcnt(uppervp) > 0, ("union_allocvp: too few refs %d (at least 1 required) on uppervp", vrefcnt(uppervp))); union_newupper(un, uppervp); } else if (uppervp) { KASSERT(vrefcnt(uppervp) > 1, ("union_allocvp: too few refs %d (at least 2 required) on uppervp", vrefcnt(uppervp))); vrele(uppervp); } /* * Save information about the lower layer. * This needs to keep track of pathname * and directory information which union_vn_create() * might need. */ if (lowervp != un->un_lowervp) { union_newlower(un, lowervp); if (cnp && (lowervp != NULLVP)) { un->un_path = malloc(cnp->cn_namelen+1, M_TEMP, M_WAITOK); bcopy(cnp->cn_nameptr, un->un_path, cnp->cn_namelen); un->un_path[cnp->cn_namelen] = '\0'; } } else if (lowervp) { vrele(lowervp); } /* * and upperdvp */ if (upperdvp != un->un_dirvp) { if (un->un_dirvp) vrele(un->un_dirvp); un->un_dirvp = upperdvp; } else if (upperdvp) { vrele(upperdvp); } *vpp = UNIONTOV(un); return (0); } if (docache) { /* * Otherwise lock the vp list while we call getnewvnode() * since that can block. */ hash = UNION_HASH(uppervp, lowervp); if (union_list_lock(hash)) goto loop; } /* * Create new node rather than replace old node. */ error = getnewvnode("union", mp, union_vnodeop_p, vpp); if (error) { /* * If an error occurs, clear out vnodes. */ if (lowervp) vrele(lowervp); if (uppervp) vrele(uppervp); if (upperdvp) vrele(upperdvp); *vpp = NULL; goto out; } MALLOC((*vpp)->v_data, void *, sizeof(struct union_node), M_TEMP, M_WAITOK); ASSERT_VOP_LOCKED(*vpp, "union_allocvp"); (*vpp)->v_vflag |= vflag; if (uppervp) (*vpp)->v_type = uppervp->v_type; else (*vpp)->v_type = lowervp->v_type; un = VTOUNION(*vpp); bzero(un, sizeof(*un)); vn_lock(*vpp, LK_EXCLUSIVE | LK_RETRY, td); un->un_vnode = *vpp; un->un_uppervp = uppervp; un->un_uppersz = VNOVAL; un->un_lowervp = lowervp; un->un_lowersz = VNOVAL; un->un_dirvp = upperdvp; un->un_pvp = dvp; /* only parent dir in new allocation */ if (dvp != NULLVP) VREF(dvp); un->un_dircache = 0; un->un_openl = 0; if (cnp && (lowervp != NULLVP)) { un->un_path = malloc(cnp->cn_namelen+1, M_TEMP, M_WAITOK); bcopy(cnp->cn_nameptr, un->un_path, cnp->cn_namelen); un->un_path[cnp->cn_namelen] = '\0'; } else { un->un_path = 0; un->un_dirvp = NULL; } if (docache) { LIST_INSERT_HEAD(&unhead[hash], un, un_cache); un->un_flags |= UN_CACHED; } out: if (docache) union_list_unlock(hash); return (error); } int union_freevp(vp) struct vnode *vp; { struct union_node *un = VTOUNION(vp); if (un->un_flags & UN_CACHED) { un->un_flags &= ~UN_CACHED; LIST_REMOVE(un, un_cache); } if (un->un_pvp != NULLVP) { vrele(un->un_pvp); un->un_pvp = NULL; } if (un->un_uppervp != NULLVP) { vrele(un->un_uppervp); un->un_uppervp = NULL; } if (un->un_lowervp != NULLVP) { vrele(un->un_lowervp); un->un_lowervp = NULL; } if (un->un_dirvp != NULLVP) { vrele(un->un_dirvp); un->un_dirvp = NULL; } if (un->un_path) { free(un->un_path, M_TEMP); un->un_path = NULL; } FREE(vp->v_data, M_TEMP); vp->v_data = 0; return (0); } /* * copyfile. Copy the vnode (fvp) to the vnode (tvp) * using a sequence of reads and writes. Both (fvp) * and (tvp) are locked on entry and exit. * * fvp and tvp are both exclusive locked on call, but their refcount's * haven't been bumped at all. */ static int union_copyfile(fvp, tvp, cred, td) struct vnode *fvp; struct vnode *tvp; struct ucred *cred; struct thread *td; { char *buf; struct uio uio; struct iovec iov; int error = 0; /* * strategy: * Allocate a buffer of size MAXBSIZE. * Loop doing reads and writes, keeping track * of the current uio offset. * Give up at the first sign of trouble. */ bzero(&uio, sizeof(uio)); uio.uio_td = td; uio.uio_segflg = UIO_SYSSPACE; uio.uio_offset = 0; VOP_LEASE(fvp, td, cred, LEASE_READ); VOP_LEASE(tvp, td, cred, LEASE_WRITE); buf = malloc(MAXBSIZE, M_TEMP, M_WAITOK); /* ugly loop follows... */ do { off_t offset = uio.uio_offset; int count; int bufoffset; /* * Setup for big read. */ uio.uio_iov = &iov; uio.uio_iovcnt = 1; iov.iov_base = buf; iov.iov_len = MAXBSIZE; uio.uio_resid = iov.iov_len; uio.uio_rw = UIO_READ; if ((error = VOP_READ(fvp, &uio, 0, cred)) != 0) break; /* * Get bytes read, handle read eof case and setup for * write loop. */ if ((count = MAXBSIZE - uio.uio_resid) == 0) break; bufoffset = 0; /* * Write until an error occurs or our buffer has been * exhausted, then update the offset for the next read. */ while (bufoffset < count) { uio.uio_iov = &iov; uio.uio_iovcnt = 1; iov.iov_base = buf + bufoffset; iov.iov_len = count - bufoffset; uio.uio_offset = offset + bufoffset; uio.uio_rw = UIO_WRITE; uio.uio_resid = iov.iov_len; if ((error = VOP_WRITE(tvp, &uio, 0, cred)) != 0) break; bufoffset += (count - bufoffset) - uio.uio_resid; } uio.uio_offset = offset + bufoffset; } while (error == 0); free(buf, M_TEMP); return (error); } /* * * un's vnode is assumed to be locked on entry and remains locked on exit. */ int union_copyup(un, docopy, cred, td) struct union_node *un; int docopy; struct ucred *cred; struct thread *td; { int error; struct mount *mp; struct vnode *lvp, *uvp; /* * If the user does not have read permission, the vnode should not * be copied to upper layer. */ vn_lock(un->un_lowervp, LK_EXCLUSIVE | LK_RETRY, td); error = VOP_ACCESS(un->un_lowervp, VREAD, cred, td); VOP_UNLOCK(un->un_lowervp, 0, td); if (error) return (error); if ((error = vn_start_write(un->un_dirvp, &mp, V_WAIT | PCATCH)) != 0) return (error); if ((error = union_vn_create(&uvp, un, td)) != 0) { vn_finished_write(mp); return (error); } lvp = un->un_lowervp; KASSERT(vrefcnt(uvp) > 0, ("copy: uvp refcount 0: %d", vrefcnt(uvp))); if (docopy) { /* * XX - should not ignore errors * from VOP_CLOSE() */ vn_lock(lvp, LK_EXCLUSIVE | LK_RETRY, td); error = VOP_OPEN(lvp, FREAD, cred, td); if (error == 0 && vn_canvmio(lvp) == TRUE) error = vfs_object_create(lvp, td, cred); if (error == 0) { error = union_copyfile(lvp, uvp, cred, td); VOP_UNLOCK(lvp, 0, td); (void) VOP_CLOSE(lvp, FREAD, cred, td); } if (error == 0) UDEBUG(("union: copied up %s\n", un->un_path)); } VOP_UNLOCK(uvp, 0, td); vn_finished_write(mp); union_newupper(un, uvp); KASSERT(vrefcnt(uvp) > 0, ("copy: uvp refcount 0: %d", vrefcnt(uvp))); union_vn_close(uvp, FWRITE, cred, td); KASSERT(vrefcnt(uvp) > 0, ("copy: uvp refcount 0: %d", vrefcnt(uvp))); /* * Subsequent IOs will go to the top layer, so * call close on the lower vnode and open on the * upper vnode to ensure that the filesystem keeps * its references counts right. This doesn't do * the right thing with (cred) and (FREAD) though. * Ignoring error returns is not right, either. */ if (error == 0) { int i; for (i = 0; i < un->un_openl; i++) { (void) VOP_CLOSE(lvp, FREAD, cred, td); (void) VOP_OPEN(uvp, FREAD, cred, td); } if (un->un_openl) { if (vn_canvmio(uvp) == TRUE) error = vfs_object_create(uvp, td, cred); } un->un_openl = 0; } return (error); } /* * union_relookup: * * dvp should be locked on entry and will be locked on return. No * net change in the ref count will occur. * * If an error is returned, *vpp will be invalid, otherwise it * will hold a locked, referenced vnode. If *vpp == dvp then * remember that only one exclusive lock is held. */ static int union_relookup(um, dvp, vpp, cnp, cn, path, pathlen) struct union_mount *um; struct vnode *dvp; struct vnode **vpp; struct componentname *cnp; struct componentname *cn; char *path; int pathlen; { int error; /* * A new componentname structure must be faked up because * there is no way to know where the upper level cnp came * from or what it is being used for. This must duplicate * some of the work done by NDINIT(), some of the work done * by namei(), some of the work done by lookup() and some of * the work done by VOP_LOOKUP() when given a CREATE flag. * Conclusion: Horrible. */ cn->cn_namelen = pathlen; cn->cn_pnbuf = uma_zalloc(namei_zone, M_WAITOK); bcopy(path, cn->cn_pnbuf, cn->cn_namelen); cn->cn_pnbuf[cn->cn_namelen] = '\0'; cn->cn_nameiop = CREATE; cn->cn_flags = (LOCKPARENT|LOCKLEAF|HASBUF|SAVENAME|ISLASTCN); cn->cn_thread = cnp->cn_thread; if (um->um_op == UNMNT_ABOVE) cn->cn_cred = cnp->cn_cred; else cn->cn_cred = um->um_cred; cn->cn_nameptr = cn->cn_pnbuf; cn->cn_consume = cnp->cn_consume; VREF(dvp); VOP_UNLOCK(dvp, 0, cnp->cn_thread); /* * Pass dvp unlocked and referenced on call to relookup(). * * If an error occurs, dvp will be returned unlocked and dereferenced. */ if ((error = relookup(dvp, vpp, cn)) != 0) { vn_lock(dvp, LK_EXCLUSIVE | LK_RETRY, cnp->cn_thread); return(error); } /* * If no error occurs, dvp will be returned locked with the reference * left as before, and vpp will be returned referenced and locked. * * We want to return with dvp as it was passed to us, so we get * rid of our reference. */ vrele(dvp); return (0); } /* * Create a shadow directory in the upper layer. * The new vnode is returned locked. * * (um) points to the union mount structure for access to the * the mounting process's credentials. * (dvp) is the directory in which to create the shadow directory, * It is locked (but not ref'd) on entry and return. * (cnp) is the component name to be created. * (vpp) is the returned newly created shadow directory, which * is returned locked and ref'd */ int union_mkshadow(um, dvp, cnp, vpp) struct union_mount *um; struct vnode *dvp; struct componentname *cnp; struct vnode **vpp; { int error; struct vattr va; struct thread *td = cnp->cn_thread; struct componentname cn; struct mount *mp; if ((error = vn_start_write(dvp, &mp, V_WAIT | PCATCH)) != 0) return (error); if ((error = union_relookup(um, dvp, vpp, cnp, &cn, cnp->cn_nameptr, cnp->cn_namelen)) != 0) { vn_finished_write(mp); return (error); } if (*vpp) { if (cn.cn_flags & HASBUF) { uma_zfree(namei_zone, cn.cn_pnbuf); cn.cn_flags &= ~HASBUF; } if (dvp == *vpp) vrele(*vpp); else vput(*vpp); vn_finished_write(mp); *vpp = NULLVP; return (EEXIST); } /* * Policy: when creating the shadow directory in the * upper layer, create it owned by the user who did * the mount, group from parent directory, and mode * 777 modified by umask (ie mostly identical to the * mkdir syscall). (jsp, kb) */ VATTR_NULL(&va); va.va_type = VDIR; va.va_mode = um->um_cmode; /* VOP_LEASE: dvp is locked */ VOP_LEASE(dvp, td, cn.cn_cred, LEASE_WRITE); error = VOP_MKDIR(dvp, vpp, &cn, &va); if (cn.cn_flags & HASBUF) { uma_zfree(namei_zone, cn.cn_pnbuf); cn.cn_flags &= ~HASBUF; } /*vput(dvp);*/ vn_finished_write(mp); return (error); } /* * Create a whiteout entry in the upper layer. * * (um) points to the union mount structure for access to the * the mounting process's credentials. * (dvp) is the directory in which to create the whiteout. * It is locked on entry and return. * (cnp) is the component name to be created. */ int union_mkwhiteout(um, dvp, cnp, path) struct union_mount *um; struct vnode *dvp; struct componentname *cnp; char *path; { int error; struct thread *td = cnp->cn_thread; struct vnode *wvp; struct componentname cn; struct mount *mp; if ((error = vn_start_write(dvp, &mp, V_WAIT | PCATCH)) != 0) return (error); error = union_relookup(um, dvp, &wvp, cnp, &cn, path, strlen(path)); if (error) { vn_finished_write(mp); return (error); } if (wvp) { if (cn.cn_flags & HASBUF) { uma_zfree(namei_zone, cn.cn_pnbuf); cn.cn_flags &= ~HASBUF; } if (wvp == dvp) vrele(wvp); else vput(wvp); vn_finished_write(mp); return (EEXIST); } /* VOP_LEASE: dvp is locked */ VOP_LEASE(dvp, td, td->td_ucred, LEASE_WRITE); error = VOP_WHITEOUT(dvp, &cn, CREATE); if (cn.cn_flags & HASBUF) { uma_zfree(namei_zone, cn.cn_pnbuf); cn.cn_flags &= ~HASBUF; } vn_finished_write(mp); return (error); } /* * union_vn_create: creates and opens a new shadow file * on the upper union layer. This function is similar * in spirit to calling vn_open() but it avoids calling namei(). * The problem with calling namei() is that a) it locks too many * things, and b) it doesn't start at the "right" directory, * whereas relookup() is told where to start. * * On entry, the vnode associated with un is locked. It remains locked * on return. * * If no error occurs, *vpp contains a locked referenced vnode for your * use. If an error occurs *vpp iis undefined. */ static int union_vn_create(vpp, un, td) struct vnode **vpp; struct union_node *un; struct thread *td; { struct vnode *vp; struct ucred *cred = td->td_ucred; struct vattr vat; struct vattr *vap = &vat; int fmode = FFLAGS(O_WRONLY|O_CREAT|O_TRUNC|O_EXCL); int error; int cmode; struct componentname cn; *vpp = NULLVP; FILEDESC_LOCK(td->td_proc->p_fd); cmode = UN_FILEMODE & ~td->td_proc->p_fd->fd_cmask; FILEDESC_UNLOCK(td->td_proc->p_fd); /* * Build a new componentname structure (for the same * reasons outlines in union_mkshadow()). * The difference here is that the file is owned by * the current user, rather than by the person who * did the mount, since the current user needs to be * able to write the file (that's why it is being * copied in the first place). */ cn.cn_namelen = strlen(un->un_path); cn.cn_pnbuf = uma_zalloc(namei_zone, M_WAITOK); bcopy(un->un_path, cn.cn_pnbuf, cn.cn_namelen+1); cn.cn_nameiop = CREATE; cn.cn_flags = (LOCKPARENT|LOCKLEAF|HASBUF|SAVENAME|ISLASTCN); cn.cn_thread = td; cn.cn_cred = td->td_ucred; cn.cn_nameptr = cn.cn_pnbuf; cn.cn_consume = 0; /* * Pass dvp unlocked and referenced on call to relookup(). * * If an error occurs, dvp will be returned unlocked and dereferenced. */ VREF(un->un_dirvp); error = relookup(un->un_dirvp, &vp, &cn); if (error) return (error); /* * If no error occurs, dvp will be returned locked with the reference * left as before, and vpp will be returned referenced and locked. */ if (vp) { vput(un->un_dirvp); if (cn.cn_flags & HASBUF) { uma_zfree(namei_zone, cn.cn_pnbuf); cn.cn_flags &= ~HASBUF; } if (vp == un->un_dirvp) vrele(vp); else vput(vp); return (EEXIST); } /* * Good - there was no race to create the file * so go ahead and create it. The permissions * on the file will be 0666 modified by the * current user's umask. Access to the file, while * it is unioned, will require access to the top *and* * bottom files. Access when not unioned will simply * require access to the top-level file. * TODO: confirm choice of access permissions. */ VATTR_NULL(vap); vap->va_type = VREG; vap->va_mode = cmode; VOP_LEASE(un->un_dirvp, td, cred, LEASE_WRITE); error = VOP_CREATE(un->un_dirvp, &vp, &cn, vap); if (cn.cn_flags & HASBUF) { uma_zfree(namei_zone, cn.cn_pnbuf); cn.cn_flags &= ~HASBUF; } vput(un->un_dirvp); if (error) return (error); error = VOP_OPEN(vp, fmode, cred, td); if (error == 0 && vn_canvmio(vp) == TRUE) error = vfs_object_create(vp, td, cred); if (error) { vput(vp); return (error); } vp->v_writecount++; *vpp = vp; return (0); } static int union_vn_close(vp, fmode, cred, td) struct vnode *vp; int fmode; struct ucred *cred; struct thread *td; { if (fmode & FWRITE) --vp->v_writecount; return (VOP_CLOSE(vp, fmode, cred, td)); } #if 0 /* * union_removed_upper: * * called with union_node unlocked. XXX */ void union_removed_upper(un) struct union_node *un; { struct thread *td = curthread; /* XXX */ struct vnode **vpp; /* * Do not set the uppervp to NULLVP. If lowervp is NULLVP, * union node will have neither uppervp nor lowervp. We remove * the union node from cache, so that it will not be referrenced. */ union_newupper(un, NULLVP); - if (un->un_dircache != 0) { - for (vpp = un->un_dircache; *vpp != NULLVP; vpp++) - vrele(*vpp); - free(un->un_dircache, M_TEMP); - un->un_dircache = 0; - } + if (un->un_dircache != NULL) + union_dircache_free(un); if (un->un_flags & UN_CACHED) { un->un_flags &= ~UN_CACHED; LIST_REMOVE(un, un_cache); } } #endif /* * Determine whether a whiteout is needed * during a remove/rmdir operation. */ int union_dowhiteout(un, cred, td) struct union_node *un; struct ucred *cred; struct thread *td; { struct vattr va; if (un->un_lowervp != NULLVP) return (1); if (VOP_GETATTR(un->un_uppervp, &va, cred, td) == 0 && (va.va_flags & OPAQUE)) return (1); return (0); } static void union_dircache_r(vp, vppp, cntp) struct vnode *vp; struct vnode ***vppp; int *cntp; { struct union_node *un; if (vp->v_op != union_vnodeop_p) { if (vppp) { VREF(vp); *(*vppp)++ = vp; if (--(*cntp) == 0) panic("union: dircache table too small"); } else { (*cntp)++; } - - return; + } else { + un = VTOUNION(vp); + if (un->un_uppervp != NULLVP) + union_dircache_r(un->un_uppervp, vppp, cntp); + if (un->un_lowervp != NULLVP) + union_dircache_r(un->un_lowervp, vppp, cntp); } - - un = VTOUNION(vp); - if (un->un_uppervp != NULLVP) - union_dircache_r(un->un_uppervp, vppp, cntp); - if (un->un_lowervp != NULLVP) - union_dircache_r(un->un_lowervp, vppp, cntp); } struct vnode * -union_dircache(vp, td) +union_dircache_get(vp, td) struct vnode *vp; struct thread *td; { int cnt; struct vnode *nvp; struct vnode **vpp; struct vnode **dircache, **newdircache; struct union_node *un; int error; vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td); un = VTOUNION(vp); dircache = un->un_dircache; newdircache = NULL; nvp = NULLVP; if (dircache == NULL) { cnt = 0; union_dircache_r(vp, 0, &cnt); cnt++; newdircache = dircache = malloc(cnt * sizeof(struct vnode *), M_TEMP, M_WAITOK); vpp = dircache; union_dircache_r(vp, &vpp, &cnt); *vpp = NULLVP; vpp = dircache + 1; } else { vpp = dircache; do { if (*vpp++ == un->un_uppervp) break; } while (*vpp != NULLVP); } if (*vpp == NULLVP) goto out; /*vn_lock(*vpp, LK_EXCLUSIVE | LK_RETRY, td);*/ UDEBUG(("ALLOCVP-3 %p ref %d\n", *vpp, (*vpp ? vrefcnt(*vpp) : -99))); VREF(*vpp); error = union_allocvp(&nvp, vp->v_mount, NULLVP, NULLVP, NULL, *vpp, NULLVP, 0); UDEBUG(("ALLOCVP-3B %p ref %d\n", nvp, (*vpp ? vrefcnt(*vpp) : -99))); if (error) goto out; un->un_dircache = NULL; VTOUNION(nvp)->un_dircache = dircache; newdircache = NULL; out: /* * If we allocated a new dircache and couldn't attach * it to a new vp, free the resources we allocated. */ if (newdircache) { for (vpp = newdircache; *vpp != NULLVP; vpp++) vrele(*vpp); free(newdircache, M_TEMP); } VOP_UNLOCK(vp, 0, td); return (nvp); } +void +union_dircache_free(struct union_node *un) +{ + struct vnode **vpp; + + for (vpp = un->un_dircache; *vpp != NULLVP; vpp++) + vrele(*vpp); + free(un->un_dircache, M_TEMP); + un->un_dircache = NULL; +} + /* * Module glue to remove #ifdef UNION from vfs_syscalls.c */ static int union_dircheck(struct thread *td, struct vnode **vp, struct file *fp) { int error = 0; if ((*vp)->v_op == union_vnodeop_p) { struct vnode *lvp; - lvp = union_dircache(*vp, td); + lvp = union_dircache_get(*vp, td); if (lvp != NULLVP) { struct vattr va; /* * If the directory is opaque, * then don't show lower entries */ error = VOP_GETATTR(*vp, &va, fp->f_cred, td); if (va.va_flags & OPAQUE) { vput(lvp); lvp = NULL; } } if (lvp != NULLVP) { error = VOP_OPEN(lvp, FREAD, fp->f_cred, td); if (error == 0 && vn_canvmio(lvp) == TRUE) error = vfs_object_create(lvp, td, fp->f_cred); if (error) { vput(lvp); return (error); } VOP_UNLOCK(lvp, 0, td); FILE_LOCK(fp); fp->f_data = lvp; fp->f_offset = 0; FILE_UNLOCK(fp); error = vn_close(*vp, FREAD, fp->f_cred, td); if (error) return (error); *vp = lvp; return -1; /* goto unionread */ } } return error; } static int union_modevent(module_t mod, int type, void *data) { switch (type) { case MOD_LOAD: union_dircheckp = union_dircheck; break; case MOD_UNLOAD: union_dircheckp = NULL; break; default: break; } return 0; } static moduledata_t union_mod = { "union_dircheck", union_modevent, NULL }; DECLARE_MODULE(union_dircheck, union_mod, SI_SUB_VFS, SI_ORDER_ANY); diff --git a/sys/fs/unionfs/union_vnops.c b/sys/fs/unionfs/union_vnops.c index d8e07bf6a9f4..65714cd2a415 100644 --- a/sys/fs/unionfs/union_vnops.c +++ b/sys/fs/unionfs/union_vnops.c @@ -1,1882 +1,1877 @@ /* * Copyright (c) 1992, 1993, 1994, 1995 Jan-Simon Pendry. * Copyright (c) 1992, 1993, 1994, 1995 * The Regents of the University of California. All rights reserved. * * This code is derived from software contributed to Berkeley by * Jan-Simon Pendry. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. All advertising materials mentioning features or use of this software * must display the following acknowledgement: * This product includes software developed by the University of * California, Berkeley and its contributors. * 4. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * @(#)union_vnops.c 8.32 (Berkeley) 6/23/95 * $FreeBSD$ */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include int uniondebug = 0; #if UDEBUG_ENABLED SYSCTL_INT(_vfs, OID_AUTO, uniondebug, CTLFLAG_RW, &uniondebug, 0, ""); #else SYSCTL_INT(_vfs, OID_AUTO, uniondebug, CTLFLAG_RD, &uniondebug, 0, ""); #endif static int union_access(struct vop_access_args *ap); static int union_advlock(struct vop_advlock_args *ap); static int union_close(struct vop_close_args *ap); static int union_create(struct vop_create_args *ap); static int union_createvobject(struct vop_createvobject_args *ap); static int union_destroyvobject(struct vop_destroyvobject_args *ap); static int union_fsync(struct vop_fsync_args *ap); static int union_getattr(struct vop_getattr_args *ap); static int union_getvobject(struct vop_getvobject_args *ap); static int union_inactive(struct vop_inactive_args *ap); static int union_ioctl(struct vop_ioctl_args *ap); static int union_lease(struct vop_lease_args *ap); static int union_link(struct vop_link_args *ap); static int union_lookup(struct vop_lookup_args *ap); static int union_lookup1(struct vnode *udvp, struct vnode **dvp, struct vnode **vpp, struct componentname *cnp); static int union_mkdir(struct vop_mkdir_args *ap); static int union_mknod(struct vop_mknod_args *ap); static int union_open(struct vop_open_args *ap); static int union_pathconf(struct vop_pathconf_args *ap); static int union_print(struct vop_print_args *ap); static int union_read(struct vop_read_args *ap); static int union_readdir(struct vop_readdir_args *ap); static int union_readlink(struct vop_readlink_args *ap); static int union_getwritemount(struct vop_getwritemount_args *ap); static int union_reclaim(struct vop_reclaim_args *ap); static int union_remove(struct vop_remove_args *ap); static int union_rename(struct vop_rename_args *ap); static int union_revoke(struct vop_revoke_args *ap); static int union_rmdir(struct vop_rmdir_args *ap); static int union_poll(struct vop_poll_args *ap); static int union_setattr(struct vop_setattr_args *ap); static int union_strategy(struct vop_strategy_args *ap); static int union_symlink(struct vop_symlink_args *ap); static int union_whiteout(struct vop_whiteout_args *ap); static int union_write(struct vop_read_args *ap); static __inline struct vnode * union_lock_upper(struct union_node *un, struct thread *td) { struct vnode *uppervp; if ((uppervp = un->un_uppervp) != NULL) { VREF(uppervp); vn_lock(uppervp, LK_EXCLUSIVE | LK_CANRECURSE | LK_RETRY, td); } KASSERT((uppervp == NULL || vrefcnt(uppervp) > 0), ("uppervp usecount is 0")); return(uppervp); } static __inline void union_unlock_upper(struct vnode *uppervp, struct thread *td) { vput(uppervp); } static __inline struct vnode * union_lock_other(struct union_node *un, struct thread *td) { struct vnode *vp; if (un->un_uppervp != NULL) { vp = union_lock_upper(un, td); } else if ((vp = un->un_lowervp) != NULL) { VREF(vp); vn_lock(vp, LK_EXCLUSIVE | LK_CANRECURSE | LK_RETRY, td); } return(vp); } static __inline void union_unlock_other(struct vnode *vp, struct thread *td) { vput(vp); } /* * union_lookup: * * udvp must be exclusively locked on call and will remain * exclusively locked on return. This is the mount point * for our filesystem. * * dvp Our base directory, locked and referenced. * The passed dvp will be dereferenced and unlocked on return * and a new dvp will be returned which is locked and * referenced in the same variable. * * vpp is filled in with the result if no error occured, * locked and ref'd. * * If an error is returned, *vpp is set to NULLVP. If no * error occurs, *vpp is returned with a reference and an * exclusive lock. */ static int union_lookup1(udvp, pdvp, vpp, cnp) struct vnode *udvp; struct vnode **pdvp; struct vnode **vpp; struct componentname *cnp; { int error; struct thread *td = cnp->cn_thread; struct vnode *dvp = *pdvp; struct vnode *tdvp; struct mount *mp; /* * If stepping up the directory tree, check for going * back across the mount point, in which case do what * lookup would do by stepping back down the mount * hierarchy. */ if (cnp->cn_flags & ISDOTDOT) { while ((dvp != udvp) && (dvp->v_vflag & VV_ROOT)) { /* * Don't do the NOCROSSMOUNT check * at this level. By definition, * union fs deals with namespaces, not * filesystems. */ tdvp = dvp; dvp = dvp->v_mount->mnt_vnodecovered; VREF(dvp); vput(tdvp); vn_lock(dvp, LK_EXCLUSIVE | LK_RETRY, td); } } /* * Set return dvp to be the upperdvp 'parent directory. */ *pdvp = dvp; /* * If the VOP_LOOKUP() call generates an error, tdvp is invalid and * no changes will have been made to dvp, so we are set to return. */ error = VOP_LOOKUP(dvp, &tdvp, cnp); if (error) { UDEBUG(("dvp %p error %d flags %lx\n", dvp, error, cnp->cn_flags)); *vpp = NULL; return (error); } /* * The parent directory will have been unlocked, unless lookup * found the last component or if dvp == tdvp (tdvp must be locked). * * We want our dvp to remain locked and ref'd. We also want tdvp * to remain locked and ref'd. */ UDEBUG(("parentdir %p result %p flag %lx\n", dvp, tdvp, cnp->cn_flags)); if (dvp != tdvp && (cnp->cn_flags & ISLASTCN) == 0) vn_lock(dvp, LK_EXCLUSIVE | LK_RETRY, td); /* * Lastly check if the current node is a mount point in * which case walk up the mount hierarchy making sure not to * bump into the root of the mount tree (ie. dvp != udvp). * * We use dvp as a temporary variable here, it is no longer related * to the dvp above. However, we have to ensure that both *pdvp and * tdvp are locked on return. */ dvp = tdvp; while ( dvp != udvp && (dvp->v_type == VDIR) && (mp = dvp->v_mountedhere) ) { int relock_pdvp = 0; if (vfs_busy(mp, 0, 0, td)) continue; if (dvp == *pdvp) relock_pdvp = 1; vput(dvp); dvp = NULL; error = VFS_ROOT(mp, &dvp); vfs_unbusy(mp, td); if (relock_pdvp) vn_lock(*pdvp, LK_EXCLUSIVE | LK_RETRY, td); if (error) { *vpp = NULL; return (error); } } *vpp = dvp; return (0); } static int union_lookup(ap) struct vop_lookup_args /* { struct vnodeop_desc *a_desc; struct vnode *a_dvp; struct vnode **a_vpp; struct componentname *a_cnp; } */ *ap; { int error; int uerror, lerror; struct vnode *uppervp, *lowervp; struct vnode *upperdvp, *lowerdvp; struct vnode *dvp = ap->a_dvp; /* starting dir */ struct union_node *dun = VTOUNION(dvp); /* associated union node */ struct componentname *cnp = ap->a_cnp; struct thread *td = cnp->cn_thread; int lockparent = cnp->cn_flags & LOCKPARENT; struct union_mount *um = MOUNTTOUNIONMOUNT(dvp->v_mount); struct ucred *saved_cred = NULL; int iswhiteout; struct vattr va; *ap->a_vpp = NULLVP; /* * Disallow write attempts to the filesystem mounted read-only. */ if ((cnp->cn_flags & ISLASTCN) && (dvp->v_mount->mnt_flag & MNT_RDONLY) && (cnp->cn_nameiop == DELETE || cnp->cn_nameiop == RENAME)) { return (EROFS); } /* * For any lookups we do, always return with the parent locked. */ cnp->cn_flags |= LOCKPARENT; lowerdvp = dun->un_lowervp; uppervp = NULLVP; lowervp = NULLVP; iswhiteout = 0; uerror = ENOENT; lerror = ENOENT; /* * Get a private lock on uppervp and a reference, effectively * taking it out of the union_node's control. * * We must lock upperdvp while holding our lock on dvp * to avoid a deadlock. */ upperdvp = union_lock_upper(dun, td); /* * Do the lookup in the upper level. * If that level consumes additional pathnames, * then assume that something special is going * on and just return that vnode. */ if (upperdvp != NULLVP) { /* * We do not have to worry about the DOTDOT case, we've * already unlocked dvp. */ UDEBUG(("A %p\n", upperdvp)); /* * Do the lookup. We must supply a locked and referenced * upperdvp to the function and will get a new locked and * referenced upperdvp back, with the old having been * dereferenced. * * If an error is returned, uppervp will be NULLVP. If no * error occurs, uppervp will be the locked and referenced. * Return vnode, or possibly NULL, depending on what is being * requested. It is possible that the returned uppervp * will be the same as upperdvp. */ uerror = union_lookup1(um->um_uppervp, &upperdvp, &uppervp, cnp); UDEBUG(( "uerror %d upperdvp %p %d/%d, uppervp %p ref=%d/lck=%d\n", uerror, upperdvp, vrefcnt(upperdvp), VOP_ISLOCKED(upperdvp, NULL), uppervp, (uppervp ? vrefcnt(uppervp) : -99), (uppervp ? VOP_ISLOCKED(uppervp, NULL) : -99) )); /* * Disallow write attempts to the filesystem mounted read-only. */ if (uerror == EJUSTRETURN && (cnp->cn_flags & ISLASTCN) && (dvp->v_mount->mnt_flag & MNT_RDONLY) && (cnp->cn_nameiop == CREATE || cnp->cn_nameiop == RENAME)) { error = EROFS; goto out; } /* * Special case: If cn_consume != 0 then skip out. The result * of the lookup is transfered to our return variable. If * an error occured we have to throw away the results. */ if (cnp->cn_consume != 0) { if ((error = uerror) == 0) { *ap->a_vpp = uppervp; uppervp = NULL; } goto out; } /* * Calculate whiteout, fall through. */ if (uerror == ENOENT || uerror == EJUSTRETURN) { if (cnp->cn_flags & ISWHITEOUT) { iswhiteout = 1; } else if (lowerdvp != NULLVP) { int terror; terror = VOP_GETATTR(upperdvp, &va, cnp->cn_cred, cnp->cn_thread); if (terror == 0 && (va.va_flags & OPAQUE)) iswhiteout = 1; } } } /* * In a similar way to the upper layer, do the lookup * in the lower layer. This time, if there is some * component magic going on, then vput whatever we got * back from the upper layer and return the lower vnode * instead. */ if (lowerdvp != NULLVP && !iswhiteout) { int nameiop; UDEBUG(("B %p\n", lowerdvp)); /* * Force only LOOKUPs on the lower node, since * we won't be making changes to it anyway. */ nameiop = cnp->cn_nameiop; cnp->cn_nameiop = LOOKUP; if (um->um_op == UNMNT_BELOW) { saved_cred = cnp->cn_cred; cnp->cn_cred = um->um_cred; } /* * We shouldn't have to worry about locking interactions * between the lower layer and our union layer (w.r.t. * `..' processing) because we don't futz with lowervp * locks in the union-node instantiation code path. * * union_lookup1() requires lowervp to be locked on entry, * and it will be unlocked on return. The ref count will * not change. On return lowervp doesn't represent anything * to us so we NULL it out. */ VREF(lowerdvp); vn_lock(lowerdvp, LK_EXCLUSIVE | LK_RETRY, td); lerror = union_lookup1(um->um_lowervp, &lowerdvp, &lowervp, cnp); if (lowerdvp == lowervp) vrele(lowerdvp); else vput(lowerdvp); lowerdvp = NULL; /* lowerdvp invalid after vput */ if (um->um_op == UNMNT_BELOW) cnp->cn_cred = saved_cred; cnp->cn_nameiop = nameiop; if (cnp->cn_consume != 0 || lerror == EACCES) { if ((error = lerror) == 0) { *ap->a_vpp = lowervp; lowervp = NULL; } goto out; } } else { UDEBUG(("C %p\n", lowerdvp)); if ((cnp->cn_flags & ISDOTDOT) && dun->un_pvp != NULLVP) { if ((lowervp = LOWERVP(dun->un_pvp)) != NULL) { VREF(lowervp); vn_lock(lowervp, LK_EXCLUSIVE | LK_RETRY, td); lerror = 0; } } } /* * Ok. Now we have uerror, uppervp, upperdvp, lerror, and lowervp. * * 1. If both layers returned an error, select the upper layer. * * 2. If the upper layer failed and the bottom layer succeeded, * two subcases occur: * * a. The bottom vnode is not a directory, in which case * just return a new union vnode referencing an * empty top layer and the existing bottom layer. * * b. The bottom vnode is a directory, in which case * create a new directory in the top layer and * and fall through to case 3. * * 3. If the top layer succeeded, then return a new union * vnode referencing whatever the new top layer and * whatever the bottom layer returned. */ /* case 1. */ if ((uerror != 0) && (lerror != 0)) { error = uerror; goto out; } /* case 2. */ if (uerror != 0 /* && (lerror == 0) */ ) { if (lowervp->v_type == VDIR) { /* case 2b. */ KASSERT(uppervp == NULL, ("uppervp unexpectedly non-NULL")); /* * Oops, uppervp has a problem, we may have to shadow. */ uerror = union_mkshadow(um, upperdvp, cnp, &uppervp); if (uerror) { error = uerror; goto out; } } } /* * Must call union_allocvp() with both the upper and lower vnodes * referenced and the upper vnode locked. ap->a_vpp is returned * referenced and locked. lowervp, uppervp, and upperdvp are * absorbed by union_allocvp() whether it succeeds or fails. * * upperdvp is the parent directory of uppervp which may be * different, depending on the path, from dvp->un_uppervp. That's * why it is a separate argument. Note that it must be unlocked. * * dvp must be locked on entry to the call and will be locked on * return. */ if (uppervp && uppervp != upperdvp) VOP_UNLOCK(uppervp, 0, td); if (lowervp) VOP_UNLOCK(lowervp, 0, td); if (upperdvp) VOP_UNLOCK(upperdvp, 0, td); error = union_allocvp(ap->a_vpp, dvp->v_mount, dvp, upperdvp, cnp, uppervp, lowervp, 1); UDEBUG(("Create %p = %p %p refs=%d\n", *ap->a_vpp, uppervp, lowervp, (*ap->a_vpp) ? vrefcnt(*ap->a_vpp) : -99)); uppervp = NULL; upperdvp = NULL; lowervp = NULL; /* * Termination Code * * - put away any extra junk laying around. Note that lowervp * (if not NULL) will never be the same as *ap->a_vp and * neither will uppervp, because when we set that state we * NULL-out lowervp or uppervp. On the otherhand, upperdvp * may match uppervp or *ap->a_vpp. * * - relock/unlock dvp if appropriate. */ out: if (upperdvp) { if (upperdvp == uppervp || upperdvp == *ap->a_vpp) vrele(upperdvp); else vput(upperdvp); } if (uppervp) vput(uppervp); if (lowervp) vput(lowervp); /* * Restore LOCKPARENT state */ if (!lockparent) cnp->cn_flags &= ~LOCKPARENT; UDEBUG(("Out %d vpp %p/%d lower %p upper %p\n", error, *ap->a_vpp, ((*ap->a_vpp) ? vrefcnt(*ap->a_vpp) : -99), lowervp, uppervp)); /* * dvp lock state, determine whether to relock dvp. dvp is expected * to be locked on return if: * * - there was an error (except not EJUSTRETURN), or * - we hit the last component and lockparent is true * * dvp_is_locked is the current state of the dvp lock, not counting * the possibility that *ap->a_vpp == dvp (in which case it is locked * anyway). Note that *ap->a_vpp == dvp only if no error occured. */ if (*ap->a_vpp != dvp) { if ((error == 0 || error == EJUSTRETURN) && (!lockparent || (cnp->cn_flags & ISLASTCN) == 0)) { VOP_UNLOCK(dvp, 0, td); } } /* * Diagnostics */ #ifdef DIAGNOSTIC if (cnp->cn_namelen == 1 && cnp->cn_nameptr[0] == '.' && *ap->a_vpp != dvp) { panic("union_lookup returning . (%p) not same as startdir (%p)", ap->a_vpp, dvp); } #endif return (error); } /* * union_create: * * a_dvp is locked on entry and remains locked on return. a_vpp is returned * locked if no error occurs, otherwise it is garbage. */ static int union_create(ap) struct vop_create_args /* { struct vnode *a_dvp; struct vnode **a_vpp; struct componentname *a_cnp; struct vattr *a_vap; } */ *ap; { struct union_node *dun = VTOUNION(ap->a_dvp); struct componentname *cnp = ap->a_cnp; struct thread *td = cnp->cn_thread; struct vnode *dvp; int error = EROFS; if ((dvp = union_lock_upper(dun, td)) != NULL) { struct vnode *vp; struct mount *mp; error = VOP_CREATE(dvp, &vp, cnp, ap->a_vap); if (error == 0) { mp = ap->a_dvp->v_mount; VOP_UNLOCK(vp, 0, td); UDEBUG(("ALLOCVP-1 FROM %p REFS %d\n", vp, vrefcnt(vp))); error = union_allocvp(ap->a_vpp, mp, NULLVP, NULLVP, cnp, vp, NULLVP, 1); UDEBUG(("ALLOCVP-2B FROM %p REFS %d\n", *ap->a_vpp, vrefcnt(vp))); } union_unlock_upper(dvp, td); } return (error); } static int union_whiteout(ap) struct vop_whiteout_args /* { struct vnode *a_dvp; struct componentname *a_cnp; int a_flags; } */ *ap; { struct union_node *un = VTOUNION(ap->a_dvp); struct componentname *cnp = ap->a_cnp; struct vnode *uppervp; int error = EOPNOTSUPP; if ((uppervp = union_lock_upper(un, cnp->cn_thread)) != NULLVP) { error = VOP_WHITEOUT(un->un_uppervp, cnp, ap->a_flags); union_unlock_upper(uppervp, cnp->cn_thread); } return(error); } /* * union_mknod: * * a_dvp is locked on entry and should remain locked on return. * a_vpp is garbagre whether an error occurs or not. */ static int union_mknod(ap) struct vop_mknod_args /* { struct vnode *a_dvp; struct vnode **a_vpp; struct componentname *a_cnp; struct vattr *a_vap; } */ *ap; { struct union_node *dun = VTOUNION(ap->a_dvp); struct componentname *cnp = ap->a_cnp; struct vnode *dvp; int error = EROFS; if ((dvp = union_lock_upper(dun, cnp->cn_thread)) != NULL) { error = VOP_MKNOD(dvp, ap->a_vpp, cnp, ap->a_vap); union_unlock_upper(dvp, cnp->cn_thread); } return (error); } /* * union_open: * * run open VOP. When opening the underlying vnode we have to mimic * vn_open(). What we *really* need to do to avoid screwups if the * open semantics change is to call vn_open(). For example, ufs blows * up if you open a file but do not vmio it prior to writing. */ static int union_open(ap) struct vop_open_args /* { struct vnodeop_desc *a_desc; struct vnode *a_vp; int a_mode; struct ucred *a_cred; struct thread *a_td; } */ *ap; { struct union_node *un = VTOUNION(ap->a_vp); struct vnode *tvp; int mode = ap->a_mode; struct ucred *cred = ap->a_cred; struct thread *td = ap->a_td; int error = 0; int tvpisupper = 1; /* * If there is an existing upper vp then simply open that. * The upper vp takes precedence over the lower vp. When opening * a lower vp for writing copy it to the uppervp and then open the * uppervp. * * At the end of this section tvp will be left locked. */ if ((tvp = union_lock_upper(un, td)) == NULLVP) { /* * If the lower vnode is being opened for writing, then * copy the file contents to the upper vnode and open that, * otherwise can simply open the lower vnode. */ tvp = un->un_lowervp; if ((ap->a_mode & FWRITE) && (tvp->v_type == VREG)) { int docopy = !(mode & O_TRUNC); error = union_copyup(un, docopy, cred, td); tvp = union_lock_upper(un, td); } else { un->un_openl++; VREF(tvp); vn_lock(tvp, LK_EXCLUSIVE | LK_RETRY, td); tvpisupper = 0; } } /* * We are holding the correct vnode, open it. */ if (error == 0) error = VOP_OPEN(tvp, mode, cred, td); /* * This is absolutely necessary or UFS will blow up. */ if (error == 0 && vn_canvmio(tvp) == TRUE) { error = vfs_object_create(tvp, td, cred); } /* * Release any locks held. */ if (tvpisupper) { if (tvp) union_unlock_upper(tvp, td); } else { vput(tvp); } return (error); } /* * union_close: * * It is unclear whether a_vp is passed locked or unlocked. Whatever * the case we do not change it. */ static int union_close(ap) struct vop_close_args /* { struct vnode *a_vp; int a_fflag; struct ucred *a_cred; struct thread *a_td; } */ *ap; { struct union_node *un = VTOUNION(ap->a_vp); struct vnode *vp; if ((vp = un->un_uppervp) == NULLVP) { #ifdef UNION_DIAGNOSTIC if (un->un_openl <= 0) panic("union: un_openl cnt"); #endif --un->un_openl; vp = un->un_lowervp; } ap->a_vp = vp; return (VCALL(vp, VOFFSET(vop_close), ap)); } /* * Check access permission on the union vnode. * The access check being enforced is to check * against both the underlying vnode, and any * copied vnode. This ensures that no additional * file permissions are given away simply because * the user caused an implicit file copy. */ static int union_access(ap) struct vop_access_args /* { struct vnodeop_desc *a_desc; struct vnode *a_vp; int a_mode; struct ucred *a_cred; struct thread *a_td; } */ *ap; { struct union_node *un = VTOUNION(ap->a_vp); struct thread *td = ap->a_td; int error = EACCES; struct vnode *vp; /* * Disallow write attempts on filesystems mounted read-only. */ if ((ap->a_mode & VWRITE) && (ap->a_vp->v_mount->mnt_flag & MNT_RDONLY)) { switch (ap->a_vp->v_type) { case VREG: case VDIR: case VLNK: return (EROFS); default: break; } } if ((vp = union_lock_upper(un, td)) != NULLVP) { ap->a_vp = vp; error = VCALL(vp, VOFFSET(vop_access), ap); union_unlock_upper(vp, td); return(error); } if ((vp = un->un_lowervp) != NULLVP) { vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td); ap->a_vp = vp; /* * Remove VWRITE from a_mode if our mount point is RW, because * we want to allow writes and lowervp may be read-only. */ if ((un->un_vnode->v_mount->mnt_flag & MNT_RDONLY) == 0) ap->a_mode &= ~VWRITE; error = VCALL(vp, VOFFSET(vop_access), ap); if (error == 0) { struct union_mount *um; um = MOUNTTOUNIONMOUNT(un->un_vnode->v_mount); if (um->um_op == UNMNT_BELOW) { ap->a_cred = um->um_cred; error = VCALL(vp, VOFFSET(vop_access), ap); } } VOP_UNLOCK(vp, 0, td); } return(error); } /* * We handle getattr only to change the fsid and * track object sizes * * It's not clear whether VOP_GETATTR is to be * called with the vnode locked or not. stat() calls * it with (vp) locked, and fstat() calls it with * (vp) unlocked. * * Because of this we cannot use our normal locking functions * if we do not intend to lock the main a_vp node. At the moment * we are running without any specific locking at all, but beware * to any programmer that care must be taken if locking is added * to this function. */ static int union_getattr(ap) struct vop_getattr_args /* { struct vnode *a_vp; struct vattr *a_vap; struct ucred *a_cred; struct thread *a_td; } */ *ap; { int error; struct union_node *un = VTOUNION(ap->a_vp); struct vnode *vp; struct vattr *vap; struct vattr va; /* * Some programs walk the filesystem hierarchy by counting * links to directories to avoid stat'ing all the time. * This means the link count on directories needs to be "correct". * The only way to do that is to call getattr on both layers * and fix up the link count. The link count will not necessarily * be accurate but will be large enough to defeat the tree walkers. */ vap = ap->a_vap; if ((vp = un->un_uppervp) != NULLVP) { error = VOP_GETATTR(vp, vap, ap->a_cred, ap->a_td); if (error) return (error); /* XXX isn't this dangerous without a lock? */ union_newsize(ap->a_vp, vap->va_size, VNOVAL); } if (vp == NULLVP) { vp = un->un_lowervp; } else if (vp->v_type == VDIR && un->un_lowervp != NULLVP) { vp = un->un_lowervp; vap = &va; } else { vp = NULLVP; } if (vp != NULLVP) { error = VOP_GETATTR(vp, vap, ap->a_cred, ap->a_td); if (error) return (error); /* XXX isn't this dangerous without a lock? */ union_newsize(ap->a_vp, VNOVAL, vap->va_size); } if ((vap != ap->a_vap) && (vap->va_type == VDIR)) ap->a_vap->va_nlink += vap->va_nlink; return (0); } static int union_setattr(ap) struct vop_setattr_args /* { struct vnode *a_vp; struct vattr *a_vap; struct ucred *a_cred; struct thread *a_td; } */ *ap; { struct union_node *un = VTOUNION(ap->a_vp); struct thread *td = ap->a_td; struct vattr *vap = ap->a_vap; struct vnode *uppervp; int error; /* * Disallow write attempts on filesystems mounted read-only. */ if ((ap->a_vp->v_mount->mnt_flag & MNT_RDONLY) && (vap->va_flags != VNOVAL || vap->va_uid != (uid_t)VNOVAL || vap->va_gid != (gid_t)VNOVAL || vap->va_atime.tv_sec != VNOVAL || vap->va_mtime.tv_sec != VNOVAL || vap->va_mode != (mode_t)VNOVAL)) { return (EROFS); } /* * Handle case of truncating lower object to zero size * by creating a zero length upper object. This is to * handle the case of open with O_TRUNC and O_CREAT. */ if (un->un_uppervp == NULLVP && (un->un_lowervp->v_type == VREG)) { error = union_copyup(un, (ap->a_vap->va_size != 0), ap->a_cred, ap->a_td); if (error) return (error); } /* * Try to set attributes in upper layer, * otherwise return read-only filesystem error. */ error = EROFS; if ((uppervp = union_lock_upper(un, td)) != NULLVP) { error = VOP_SETATTR(un->un_uppervp, ap->a_vap, ap->a_cred, ap->a_td); if ((error == 0) && (ap->a_vap->va_size != VNOVAL)) union_newsize(ap->a_vp, ap->a_vap->va_size, VNOVAL); union_unlock_upper(uppervp, td); } return (error); } static int union_read(ap) struct vop_read_args /* { struct vnode *a_vp; struct uio *a_uio; int a_ioflag; struct ucred *a_cred; } */ *ap; { struct union_node *un = VTOUNION(ap->a_vp); struct thread *td = ap->a_uio->uio_td; struct vnode *uvp; int error; uvp = union_lock_other(un, td); KASSERT(uvp != NULL, ("union_read: backing vnode missing!")); error = VOP_READ(uvp, ap->a_uio, ap->a_ioflag, ap->a_cred); union_unlock_other(uvp, td); /* * XXX * Perhaps the size of the underlying object has changed under * our feet. Take advantage of the offset information present * in the uio structure. */ if (error == 0) { struct union_node *un = VTOUNION(ap->a_vp); off_t cur = ap->a_uio->uio_offset; if (uvp == un->un_uppervp) { if (cur > un->un_uppersz) union_newsize(ap->a_vp, cur, VNOVAL); } else { if (cur > un->un_lowersz) union_newsize(ap->a_vp, VNOVAL, cur); } } return (error); } static int union_write(ap) struct vop_read_args /* { struct vnode *a_vp; struct uio *a_uio; int a_ioflag; struct ucred *a_cred; } */ *ap; { struct union_node *un = VTOUNION(ap->a_vp); struct thread *td = ap->a_uio->uio_td; struct vnode *uppervp; int error; if ((uppervp = union_lock_upper(un, td)) == NULLVP) panic("union: missing upper layer in write"); error = VOP_WRITE(uppervp, ap->a_uio, ap->a_ioflag, ap->a_cred); /* * The size of the underlying object may be changed by the * write. */ if (error == 0) { off_t cur = ap->a_uio->uio_offset; if (cur > un->un_uppersz) union_newsize(ap->a_vp, cur, VNOVAL); } union_unlock_upper(uppervp, td); return (error); } static int union_lease(ap) struct vop_lease_args /* { struct vnode *a_vp; struct thread *a_td; struct ucred *a_cred; int a_flag; } */ *ap; { struct vnode *ovp = OTHERVP(ap->a_vp); ap->a_vp = ovp; return (VCALL(ovp, VOFFSET(vop_lease), ap)); } static int union_ioctl(ap) struct vop_ioctl_args /* { struct vnode *a_vp; u_long a_command; caddr_t a_data; int a_fflag; struct ucred *a_cred; struct thread *a_td; } */ *ap; { struct vnode *ovp = OTHERVP(ap->a_vp); ap->a_vp = ovp; return (VCALL(ovp, VOFFSET(vop_ioctl), ap)); } static int union_poll(ap) struct vop_poll_args /* { struct vnode *a_vp; int a_events; struct ucred *a_cred; struct thread *a_td; } */ *ap; { struct vnode *ovp = OTHERVP(ap->a_vp); ap->a_vp = ovp; return (VCALL(ovp, VOFFSET(vop_poll), ap)); } static int union_revoke(ap) struct vop_revoke_args /* { struct vnode *a_vp; int a_flags; struct thread *a_td; } */ *ap; { struct vnode *vp = ap->a_vp; if (UPPERVP(vp)) VOP_REVOKE(UPPERVP(vp), ap->a_flags); if (LOWERVP(vp)) VOP_REVOKE(LOWERVP(vp), ap->a_flags); vgone(vp); return (0); } static int union_fsync(ap) struct vop_fsync_args /* { struct vnode *a_vp; struct ucred *a_cred; int a_waitfor; struct thread *a_td; } */ *ap; { int error = 0; struct thread *td = ap->a_td; struct vnode *targetvp; struct union_node *un = VTOUNION(ap->a_vp); if ((targetvp = union_lock_other(un, td)) != NULLVP) { error = VOP_FSYNC(targetvp, ap->a_cred, ap->a_waitfor, td); union_unlock_other(targetvp, td); } return (error); } /* * union_remove: * * Remove the specified cnp. The dvp and vp are passed to us locked * and must remain locked on return. */ static int union_remove(ap) struct vop_remove_args /* { struct vnode *a_dvp; struct vnode *a_vp; struct componentname *a_cnp; } */ *ap; { struct union_node *dun = VTOUNION(ap->a_dvp); struct union_node *un = VTOUNION(ap->a_vp); struct componentname *cnp = ap->a_cnp; struct thread *td = cnp->cn_thread; struct vnode *uppervp; struct vnode *upperdvp; int error; if ((upperdvp = union_lock_upper(dun, td)) == NULLVP) panic("union remove: null upper vnode"); if ((uppervp = union_lock_upper(un, td)) != NULLVP) { if (union_dowhiteout(un, cnp->cn_cred, td)) cnp->cn_flags |= DOWHITEOUT; error = VOP_REMOVE(upperdvp, uppervp, cnp); #if 0 /* XXX */ if (!error) union_removed_upper(un); #endif union_unlock_upper(uppervp, td); } else { error = union_mkwhiteout( MOUNTTOUNIONMOUNT(ap->a_dvp->v_mount), upperdvp, ap->a_cnp, un->un_path); } union_unlock_upper(upperdvp, td); return (error); } /* * union_link: * * tdvp and vp will be locked on entry. * tdvp and vp should remain locked on return. */ static int union_link(ap) struct vop_link_args /* { struct vnode *a_tdvp; struct vnode *a_vp; struct componentname *a_cnp; } */ *ap; { struct componentname *cnp = ap->a_cnp; struct thread *td = cnp->cn_thread; struct union_node *dun = VTOUNION(ap->a_tdvp); struct vnode *vp; struct vnode *tdvp; int error = 0; if (ap->a_tdvp->v_op != ap->a_vp->v_op) { vp = ap->a_vp; } else { struct union_node *tun = VTOUNION(ap->a_vp); if (tun->un_uppervp == NULLVP) { #if 0 if (dun->un_uppervp == tun->un_dirvp) { if (dun->un_flags & UN_ULOCK) { dun->un_flags &= ~UN_ULOCK; VOP_UNLOCK(dun->un_uppervp, 0, td); } } #endif error = union_copyup(tun, 1, cnp->cn_cred, td); #if 0 if (dun->un_uppervp == tun->un_dirvp) { vn_lock(dun->un_uppervp, LK_EXCLUSIVE | LK_RETRY, td); dun->un_flags |= UN_ULOCK; } #endif if (error) return (error); } vp = tun->un_uppervp; vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td); } /* * Make sure upper is locked, then unlock the union directory we were * called with to avoid a deadlock while we are calling VOP_LINK() on * the upper (with tdvp locked and vp not locked). Our ap->a_tdvp * is expected to be locked on return. */ if ((tdvp = union_lock_upper(dun, td)) == NULLVP) return (EROFS); VOP_UNLOCK(ap->a_tdvp, 0, td); /* unlock calling node */ error = VOP_LINK(tdvp, vp, cnp); /* call link on upper */ /* * Unlock tun->un_uppervp if we locked it above. */ if (ap->a_tdvp->v_op == ap->a_vp->v_op) VOP_UNLOCK(vp, 0, td); /* * We have to unlock tdvp prior to relocking our calling node in * order to avoid a deadlock. We also have to unlock ap->a_vp * before relocking the directory, but then we have to relock * ap->a_vp as our caller expects. */ VOP_UNLOCK(ap->a_vp, 0, td); union_unlock_upper(tdvp, td); vn_lock(ap->a_tdvp, LK_EXCLUSIVE | LK_RETRY, td); vn_lock(ap->a_vp, LK_EXCLUSIVE | LK_RETRY, td); return (error); } static int union_rename(ap) struct vop_rename_args /* { struct vnode *a_fdvp; struct vnode *a_fvp; struct componentname *a_fcnp; struct vnode *a_tdvp; struct vnode *a_tvp; struct componentname *a_tcnp; } */ *ap; { int error; struct vnode *fdvp = ap->a_fdvp; struct vnode *fvp = ap->a_fvp; struct vnode *tdvp = ap->a_tdvp; struct vnode *tvp = ap->a_tvp; /* * Figure out what fdvp to pass to our upper or lower vnode. If we * replace the fdvp, release the original one and ref the new one. */ if (fdvp->v_op == union_vnodeop_p) { /* always true */ struct union_node *un = VTOUNION(fdvp); if (un->un_uppervp == NULLVP) { /* * this should never happen in normal * operation but might if there was * a problem creating the top-level shadow * directory. */ error = EXDEV; goto bad; } fdvp = un->un_uppervp; VREF(fdvp); vrele(ap->a_fdvp); } /* * Figure out what fvp to pass to our upper or lower vnode. If we * replace the fvp, release the original one and ref the new one. */ if (fvp->v_op == union_vnodeop_p) { /* always true */ struct union_node *un = VTOUNION(fvp); #if 0 struct union_mount *um = MOUNTTOUNIONMOUNT(fvp->v_mount); #endif if (un->un_uppervp == NULLVP) { switch(fvp->v_type) { case VREG: vn_lock(un->un_vnode, LK_EXCLUSIVE | LK_RETRY, ap->a_fcnp->cn_thread); error = union_copyup(un, 1, ap->a_fcnp->cn_cred, ap->a_fcnp->cn_thread); VOP_UNLOCK(un->un_vnode, 0, ap->a_fcnp->cn_thread); if (error) goto bad; break; case VDIR: /* * XXX not yet. * * There is only one way to rename a directory * based in the lowervp, and that is to copy * the entire directory hierarchy. Otherwise * it would not last across a reboot. */ #if 0 vrele(fvp); fvp = NULL; vn_lock(fdvp, LK_EXCLUSIVE | LK_RETRY, ap->a_fcnp->cn_thread); error = union_mkshadow(um, fdvp, ap->a_fcnp, &un->un_uppervp); VOP_UNLOCK(fdvp, 0, ap->a_fcnp->cn_thread); if (un->un_uppervp) VOP_UNLOCK(un->un_uppervp, 0, ap->a_fcnp->cn_thread); if (error) goto bad; break; #endif default: error = EXDEV; goto bad; } } if (un->un_lowervp != NULLVP) ap->a_fcnp->cn_flags |= DOWHITEOUT; fvp = un->un_uppervp; VREF(fvp); vrele(ap->a_fvp); } /* * Figure out what tdvp (destination directory) to pass to the * lower level. If we replace it with uppervp, we need to vput the * old one. The exclusive lock is transfered to what we will pass * down in the VOP_RENAME() and we replace uppervp with a simple * reference. */ if (tdvp->v_op == union_vnodeop_p) { struct union_node *un = VTOUNION(tdvp); if (un->un_uppervp == NULLVP) { /* * This should never happen in normal * operation but might if there was * a problem creating the top-level shadow * directory. */ error = EXDEV; goto bad; } /* * New tdvp is a lock and reference on uppervp. * Put away the old tdvp. */ tdvp = union_lock_upper(un, ap->a_tcnp->cn_thread); vput(ap->a_tdvp); } /* * Figure out what tvp (destination file) to pass to the * lower level. * * If the uppervp file does not exist, put away the (wrong) * file and change tvp to NULL. */ if (tvp != NULLVP && tvp->v_op == union_vnodeop_p) { struct union_node *un = VTOUNION(tvp); tvp = union_lock_upper(un, ap->a_tcnp->cn_thread); vput(ap->a_tvp); /* note: tvp may be NULL */ } /* * VOP_RENAME() releases/vputs prior to returning, so we have no * cleanup to do. */ return (VOP_RENAME(fdvp, fvp, ap->a_fcnp, tdvp, tvp, ap->a_tcnp)); /* * Error. We still have to release / vput the various elements. */ bad: vrele(fdvp); if (fvp) vrele(fvp); vput(tdvp); if (tvp != NULLVP) { if (tvp != tdvp) vput(tvp); else vrele(tvp); } return (error); } static int union_mkdir(ap) struct vop_mkdir_args /* { struct vnode *a_dvp; struct vnode **a_vpp; struct componentname *a_cnp; struct vattr *a_vap; } */ *ap; { struct union_node *dun = VTOUNION(ap->a_dvp); struct componentname *cnp = ap->a_cnp; struct thread *td = cnp->cn_thread; struct vnode *upperdvp; int error = EROFS; if ((upperdvp = union_lock_upper(dun, td)) != NULLVP) { struct vnode *vp; error = VOP_MKDIR(upperdvp, &vp, cnp, ap->a_vap); union_unlock_upper(upperdvp, td); if (error == 0) { VOP_UNLOCK(vp, 0, td); UDEBUG(("ALLOCVP-2 FROM %p REFS %d\n", vp, vrefcnt(vp))); error = union_allocvp(ap->a_vpp, ap->a_dvp->v_mount, ap->a_dvp, NULLVP, cnp, vp, NULLVP, 1); UDEBUG(("ALLOCVP-2B FROM %p REFS %d\n", *ap->a_vpp, vrefcnt(vp))); } } return (error); } static int union_rmdir(ap) struct vop_rmdir_args /* { struct vnode *a_dvp; struct vnode *a_vp; struct componentname *a_cnp; } */ *ap; { struct union_node *dun = VTOUNION(ap->a_dvp); struct union_node *un = VTOUNION(ap->a_vp); struct componentname *cnp = ap->a_cnp; struct thread *td = cnp->cn_thread; struct vnode *upperdvp; struct vnode *uppervp; int error; if ((upperdvp = union_lock_upper(dun, td)) == NULLVP) panic("union rmdir: null upper vnode"); if ((uppervp = union_lock_upper(un, td)) != NULLVP) { if (union_dowhiteout(un, cnp->cn_cred, td)) cnp->cn_flags |= DOWHITEOUT; error = VOP_RMDIR(upperdvp, uppervp, ap->a_cnp); union_unlock_upper(uppervp, td); } else { error = union_mkwhiteout( MOUNTTOUNIONMOUNT(ap->a_dvp->v_mount), dun->un_uppervp, ap->a_cnp, un->un_path); } union_unlock_upper(upperdvp, td); return (error); } /* * union_symlink: * * dvp is locked on entry and remains locked on return. a_vpp is garbage * (unused). */ static int union_symlink(ap) struct vop_symlink_args /* { struct vnode *a_dvp; struct vnode **a_vpp; struct componentname *a_cnp; struct vattr *a_vap; char *a_target; } */ *ap; { struct union_node *dun = VTOUNION(ap->a_dvp); struct componentname *cnp = ap->a_cnp; struct thread *td = cnp->cn_thread; struct vnode *dvp; int error = EROFS; if ((dvp = union_lock_upper(dun, td)) != NULLVP) { error = VOP_SYMLINK(dvp, ap->a_vpp, cnp, ap->a_vap, ap->a_target); union_unlock_upper(dvp, td); } return (error); } /* * union_readdir ()works in concert with getdirentries() and * readdir(3) to provide a list of entries in the unioned * directories. getdirentries() is responsible for walking * down the union stack. readdir(3) is responsible for * eliminating duplicate names from the returned data stream. */ static int union_readdir(ap) struct vop_readdir_args /* { struct vnode *a_vp; struct uio *a_uio; struct ucred *a_cred; int *a_eofflag; u_long *a_cookies; int a_ncookies; } */ *ap; { struct union_node *un = VTOUNION(ap->a_vp); struct thread *td = ap->a_uio->uio_td; struct vnode *uvp; int error = 0; if ((uvp = union_lock_upper(un, td)) != NULLVP) { ap->a_vp = uvp; error = VCALL(uvp, VOFFSET(vop_readdir), ap); union_unlock_upper(uvp, td); } return(error); } static int union_readlink(ap) struct vop_readlink_args /* { struct vnode *a_vp; struct uio *a_uio; struct ucred *a_cred; } */ *ap; { int error; struct union_node *un = VTOUNION(ap->a_vp); struct uio *uio = ap->a_uio; struct thread *td = uio->uio_td; struct vnode *vp; vp = union_lock_other(un, td); KASSERT(vp != NULL, ("union_readlink: backing vnode missing!")); ap->a_vp = vp; error = VCALL(vp, VOFFSET(vop_readlink), ap); union_unlock_other(vp, td); return (error); } static int union_getwritemount(ap) struct vop_getwritemount_args /* { struct vnode *a_vp; struct mount **a_mpp; } */ *ap; { struct vnode *vp = ap->a_vp; struct vnode *uvp = UPPERVP(vp); if (uvp == NULL) { VI_LOCK(vp); if (vp->v_iflag & VI_FREE) { VI_UNLOCK(vp); return (EOPNOTSUPP); } VI_UNLOCK(vp); return (EACCES); } return(VOP_GETWRITEMOUNT(uvp, ap->a_mpp)); } /* * union_inactive: * * Called with the vnode locked. We are expected to unlock the vnode. */ static int union_inactive(ap) struct vop_inactive_args /* { struct vnode *a_vp; struct thread *a_td; } */ *ap; { struct vnode *vp = ap->a_vp; struct thread *td = ap->a_td; struct union_node *un = VTOUNION(vp); - struct vnode **vpp; /* * Do nothing (and _don't_ bypass). * Wait to vrele lowervp until reclaim, * so that until then our union_node is in the * cache and reusable. * */ - if (un->un_dircache != 0) { - for (vpp = un->un_dircache; *vpp != NULLVP; vpp++) - vrele(*vpp); - free (un->un_dircache, M_TEMP); - un->un_dircache = 0; - } + if (un->un_dircache != NULL) + union_dircache_free(un); #if 0 if ((un->un_flags & UN_ULOCK) && un->un_uppervp) { un->un_flags &= ~UN_ULOCK; VOP_UNLOCK(un->un_uppervp, 0, td); } #endif VOP_UNLOCK(vp, 0, td); if ((un->un_flags & UN_CACHED) == 0) vgone(vp); return (0); } static int union_reclaim(ap) struct vop_reclaim_args /* { struct vnode *a_vp; } */ *ap; { union_freevp(ap->a_vp); return (0); } /* * unionvp do not hold a VM object and there is no need to create one for * upper or lower vp because it is done in the union_open() */ static int union_createvobject(ap) struct vop_createvobject_args /* { struct vnode *vp; struct ucred *cred; struct thread *td; } */ *ap; { struct vnode *vp = ap->a_vp; vp->v_vflag |= VV_OBJBUF; return (0); } /* * We have nothing to destroy and this operation shouldn't be bypassed. */ static int union_destroyvobject(ap) struct vop_destroyvobject_args /* { struct vnode *vp; } */ *ap; { struct vnode *vp = ap->a_vp; vp->v_vflag &= ~VV_OBJBUF; return (0); } /* * Get VM object from the upper or lower vp */ static int union_getvobject(ap) struct vop_getvobject_args /* { struct vnode *vp; struct vm_object **objpp; } */ *ap; { struct vnode *ovp = OTHERVP(ap->a_vp); if (ovp == NULL) return EINVAL; return (VOP_GETVOBJECT(ovp, ap->a_objpp)); } static int union_print(ap) struct vop_print_args /* { struct vnode *a_vp; } */ *ap; { struct vnode *vp = ap->a_vp; printf("\tvp=%p, uppervp=%p, lowervp=%p\n", vp, UPPERVP(vp), LOWERVP(vp)); if (UPPERVP(vp) != NULLVP) vprint("union: upper", UPPERVP(vp)); if (LOWERVP(vp) != NULLVP) vprint("union: lower", LOWERVP(vp)); return (0); } static int union_pathconf(ap) struct vop_pathconf_args /* { struct vnode *a_vp; int a_name; int *a_retval; } */ *ap; { int error; struct thread *td = curthread; /* XXX */ struct union_node *un = VTOUNION(ap->a_vp); struct vnode *vp; vp = union_lock_other(un, td); KASSERT(vp != NULL, ("union_pathconf: backing vnode missing!")); ap->a_vp = vp; error = VCALL(vp, VOFFSET(vop_pathconf), ap); union_unlock_other(vp, td); return (error); } static int union_advlock(ap) struct vop_advlock_args /* { struct vnode *a_vp; caddr_t a_id; int a_op; struct flock *a_fl; int a_flags; } */ *ap; { register struct vnode *ovp = OTHERVP(ap->a_vp); ap->a_vp = ovp; return (VCALL(ovp, VOFFSET(vop_advlock), ap)); } /* * XXX - vop_strategy must be hand coded because it has no * YYY - and it is not coherent with anything * * vnode in its arguments. * This goes away with a merged VM/buffer cache. */ static int union_strategy(ap) struct vop_strategy_args /* { struct vnode *a_vp; struct buf *a_bp; } */ *ap; { struct buf *bp = ap->a_bp; struct vnode *othervp = OTHERVP(bp->b_vp); #ifdef DIAGNOSTIC if (othervp == NULLVP) panic("union_strategy: nil vp"); if ((bp->b_iocmd == BIO_WRITE) && (othervp == LOWERVP(bp->b_vp))) panic("union_strategy: writing to lowervp"); #endif return (VOP_STRATEGY(othervp, bp)); } /* * Global vfs data structures */ vop_t **union_vnodeop_p; static struct vnodeopv_entry_desc union_vnodeop_entries[] = { { &vop_default_desc, (vop_t *) vop_defaultop }, { &vop_access_desc, (vop_t *) union_access }, { &vop_advlock_desc, (vop_t *) union_advlock }, { &vop_bmap_desc, (vop_t *) vop_eopnotsupp }, { &vop_close_desc, (vop_t *) union_close }, { &vop_create_desc, (vop_t *) union_create }, { &vop_createvobject_desc, (vop_t *) union_createvobject }, { &vop_destroyvobject_desc, (vop_t *) union_destroyvobject }, { &vop_fsync_desc, (vop_t *) union_fsync }, { &vop_getattr_desc, (vop_t *) union_getattr }, { &vop_getvobject_desc, (vop_t *) union_getvobject }, { &vop_inactive_desc, (vop_t *) union_inactive }, { &vop_ioctl_desc, (vop_t *) union_ioctl }, { &vop_lease_desc, (vop_t *) union_lease }, { &vop_link_desc, (vop_t *) union_link }, { &vop_lookup_desc, (vop_t *) union_lookup }, { &vop_mkdir_desc, (vop_t *) union_mkdir }, { &vop_mknod_desc, (vop_t *) union_mknod }, { &vop_open_desc, (vop_t *) union_open }, { &vop_pathconf_desc, (vop_t *) union_pathconf }, { &vop_poll_desc, (vop_t *) union_poll }, { &vop_print_desc, (vop_t *) union_print }, { &vop_read_desc, (vop_t *) union_read }, { &vop_readdir_desc, (vop_t *) union_readdir }, { &vop_readlink_desc, (vop_t *) union_readlink }, { &vop_getwritemount_desc, (vop_t *) union_getwritemount }, { &vop_reclaim_desc, (vop_t *) union_reclaim }, { &vop_remove_desc, (vop_t *) union_remove }, { &vop_rename_desc, (vop_t *) union_rename }, { &vop_revoke_desc, (vop_t *) union_revoke }, { &vop_rmdir_desc, (vop_t *) union_rmdir }, { &vop_setattr_desc, (vop_t *) union_setattr }, { &vop_strategy_desc, (vop_t *) union_strategy }, { &vop_symlink_desc, (vop_t *) union_symlink }, { &vop_whiteout_desc, (vop_t *) union_whiteout }, { &vop_write_desc, (vop_t *) union_write }, { NULL, NULL } }; static struct vnodeopv_desc union_vnodeop_opv_desc = { &union_vnodeop_p, union_vnodeop_entries }; VNODEOP_SET(union_vnodeop_opv_desc);