diff --git a/sys/fs/nullfs/null_vnops.c b/sys/fs/nullfs/null_vnops.c index 74c1a8f3acb6..ba29b0485326 100644 --- a/sys/fs/nullfs/null_vnops.c +++ b/sys/fs/nullfs/null_vnops.c @@ -1,1217 +1,1217 @@ /*- * SPDX-License-Identifier: BSD-3-Clause * * Copyright (c) 1992, 1993 * The Regents of the University of California. All rights reserved. * * This code is derived from software contributed to Berkeley by * John Heidemann of the UCLA Ficus project. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * Ancestors: * ...and... */ /* * Null Layer * * (See mount_nullfs(8) for more information.) * * The null layer duplicates a portion of the filesystem * name space under a new name. In this respect, it is * similar to the loopback filesystem. It differs from * the loopback fs in two respects: it is implemented using * a stackable layers techniques, and its "null-node"s stack above * all lower-layer vnodes, not just over directory vnodes. * * The null layer has two purposes. First, it serves as a demonstration * of layering by proving a layer which does nothing. (It actually * does everything the loopback filesystem does, which is slightly * more than nothing.) Second, the null layer can serve as a prototype * layer. Since it provides all necessary layer framework, * new filesystem layers can be created very easily be starting * with a null layer. * * The remainder of this man page examines the null layer as a basis * for constructing new layers. * * * INSTANTIATING NEW NULL LAYERS * * New null layers are created with mount_nullfs(8). * Mount_nullfs(8) takes two arguments, the pathname * of the lower vfs (target-pn) and the pathname where the null * layer will appear in the namespace (alias-pn). After * the null layer is put into place, the contents * of target-pn subtree will be aliased under alias-pn. * * * OPERATION OF A NULL LAYER * * The null layer is the minimum filesystem layer, * simply bypassing all possible operations to the lower layer * for processing there. The majority of its activity centers * on the bypass routine, through which nearly all vnode operations * pass. * * The bypass routine accepts arbitrary vnode operations for * handling by the lower layer. It begins by examining vnode * operation arguments and replacing any null-nodes by their * lower-layer equivlants. It then invokes the operation * on the lower layer. Finally, it replaces the null-nodes * in the arguments and, if a vnode is return by the operation, * stacks a null-node on top of the returned vnode. * * Although bypass handles most operations, vop_getattr, vop_lock, * vop_unlock, vop_inactive, vop_reclaim, and vop_print are not * bypassed. Vop_getattr must change the fsid being returned. * Vop_lock and vop_unlock must handle any locking for the * current vnode as well as pass the lock request down. * Vop_inactive and vop_reclaim are not bypassed so that * they can handle freeing null-layer specific data. Vop_print * is not bypassed to avoid excessive debugging information. * Also, certain vnode operations change the locking state within * the operation (create, mknod, remove, link, rename, mkdir, rmdir, * and symlink). Ideally these operations should not change the * lock state, but should be changed to let the caller of the * function unlock them. Otherwise all intermediate vnode layers * (such as union, umapfs, etc) must catch these functions to do * the necessary locking at their layer. * * * INSTANTIATING VNODE STACKS * * Mounting associates the null layer with a lower layer, * effect stacking two VFSes. Vnode stacks are instead * created on demand as files are accessed. * * The initial mount creates a single vnode stack for the * root of the new null layer. All other vnode stacks * are created as a result of vnode operations on * this or other null vnode stacks. * * New vnode stacks come into existence as a result of * an operation which returns a vnode. * The bypass routine stacks a null-node above the new * vnode before returning it to the caller. * * For example, imagine mounting a null layer with * "mount_nullfs /usr/include /dev/layer/null". * Changing directory to /dev/layer/null will assign * the root null-node (which was created when the null layer was mounted). * Now consider opening "sys". A vop_lookup would be * done on the root null-node. This operation would bypass through * to the lower layer which would return a vnode representing * the UFS "sys". Null_bypass then builds a null-node * aliasing the UFS "sys" and returns this to the caller. * Later operations on the null-node "sys" will repeat this * process when constructing other vnode stacks. * * * CREATING OTHER FILE SYSTEM LAYERS * * One of the easiest ways to construct new filesystem layers is to make * a copy of the null layer, rename all files and variables, and * then begin modifing the copy. Sed can be used to easily rename * all variables. * * The umap layer is an example of a layer descended from the * null layer. * * * INVOKING OPERATIONS ON LOWER LAYERS * * There are two techniques to invoke operations on a lower layer * when the operation cannot be completely bypassed. Each method * is appropriate in different situations. In both cases, * it is the responsibility of the aliasing layer to make * the operation arguments "correct" for the lower layer * by mapping a vnode arguments to the lower layer. * * The first approach is to call the aliasing layer's bypass routine. * This method is most suitable when you wish to invoke the operation * currently being handled on the lower layer. It has the advantage * that the bypass routine already must do argument mapping. * An example of this is null_getattrs in the null layer. * * A second approach is to directly invoke vnode operations on * the lower layer with the VOP_OPERATIONNAME interface. * The advantage of this method is that it is easy to invoke * arbitrary operations on the lower layer. The disadvantage * is that vnode arguments must be manualy mapped. * */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include static int null_bug_bypass = 0; /* for debugging: enables bypass printf'ing */ SYSCTL_INT(_debug, OID_AUTO, nullfs_bug_bypass, CTLFLAG_RW, &null_bug_bypass, 0, ""); /* * Synchronize inotify flags with the lower vnode: * - If the upper vnode has the flag set and the lower does not, then the lower * vnode is unwatched and the upper vnode does not need to go through * VOP_INOTIFY. * - If the lower vnode is watched, then the upper vnode should go through * VOP_INOTIFY, so copy the flag up. */ static void null_copy_inotify(struct vnode *vp, struct vnode *lvp, short flag) { if ((vn_irflag_read(vp) & flag) != 0) { if (__predict_false((vn_irflag_read(lvp) & flag) == 0)) vn_irflag_unset(vp, flag); } else if ((vn_irflag_read(lvp) & flag) != 0) { if (__predict_false((vn_irflag_read(vp) & flag) == 0)) vn_irflag_set(vp, flag); } } /* * This is the 10-Apr-92 bypass routine. * This version has been optimized for speed, throwing away some * safety checks. It should still always work, but it's not as * robust to programmer errors. * * In general, we map all vnodes going down and unmap them on the way back. * As an exception to this, vnodes can be marked "unmapped" by setting * the Nth bit in operation's vdesc_flags. * * Also, some BSD vnode operations have the side effect of vrele'ing * their arguments. With stacking, the reference counts are held * by the upper node, not the lower one, so we must handle these * side-effects here. This is not of concern in Sun-derived systems * since there are no such side-effects. * * This makes the following assumptions: * - only one returned vpp * - no INOUT vpp's (Sun's vop_open has one of these) * - the vnode operation vector of the first vnode should be used * to determine what implementation of the op should be invoked * - all mapped vnodes are of our vnode-type (NEEDSWORK: * problems on rmdir'ing mount points and renaming?) */ int null_bypass(struct vop_generic_args *ap) { struct vnode **this_vp_p; struct vnode *old_vps[VDESC_MAX_VPS]; struct vnode **vps_p[VDESC_MAX_VPS]; struct vnode ***vppp; struct vnode *lvp; struct vnodeop_desc *descp = ap->a_desc; int error, i, reles; if (null_bug_bypass) printf ("null_bypass: %s\n", descp->vdesc_name); #ifdef DIAGNOSTIC /* * We require at least one vp. */ if (descp->vdesc_vp_offsets == NULL || descp->vdesc_vp_offsets[0] == VDESC_NO_OFFSET) panic ("null_bypass: no vp's in map"); #endif /* * Map the vnodes going in. * Later, we'll invoke the operation based on * the first mapped vnode's operation vector. */ reles = descp->vdesc_flags; for (i = 0; i < VDESC_MAX_VPS; reles >>= 1, i++) { if (descp->vdesc_vp_offsets[i] == VDESC_NO_OFFSET) break; /* bail out at end of list */ vps_p[i] = this_vp_p = VOPARG_OFFSETTO(struct vnode **, descp->vdesc_vp_offsets[i], ap); /* * We're not guaranteed that any but the first vnode * are of our type. Check for and don't map any * that aren't. (We must always map first vp or vclean fails.) */ if (i != 0 && (*this_vp_p == NULLVP || (*this_vp_p)->v_op != &null_vnodeops)) { old_vps[i] = NULLVP; } else { old_vps[i] = *this_vp_p; *(vps_p[i]) = NULLVPTOLOWERVP(*this_vp_p); /* * The upper vnode reference to the lower * vnode is the only reference that keeps our * pointer to the lower vnode alive. If lower * vnode is relocked during the VOP call, * upper vnode might become unlocked and * reclaimed, which invalidates our reference. * Add a transient hold around VOP call. */ vhold(*this_vp_p); /* * XXX - Several operations have the side effect * of vrele'ing their vp's. We must account for * that. (This should go away in the future.) */ if (reles & VDESC_VP0_WILLRELE) vref(*this_vp_p); } } /* * Call the operation on the lower layer * with the modified argument structure. */ if (vps_p[0] != NULL && *vps_p[0] != NULL) { - error = VCALL(ap); + error = ap->a_desc->vdesc_call(ap); } else { printf("null_bypass: no map for %s\n", descp->vdesc_name); error = EINVAL; } /* * Maintain the illusion of call-by-value * by restoring vnodes in the argument structure * to their original value. */ reles = descp->vdesc_flags; for (i = 0; i < VDESC_MAX_VPS; reles >>= 1, i++) { if (descp->vdesc_vp_offsets[i] == VDESC_NO_OFFSET) break; /* bail out at end of list */ if (old_vps[i] != NULL) { lvp = *(vps_p[i]); /* * Get rid of the transient hold on lvp. Copy inotify * flags up in case something is watching the lower * layer. * * If lowervp was unlocked during VOP * operation, nullfs upper vnode could have * been reclaimed, which changes its v_vnlock * back to private v_lock. In this case we * must move lock ownership from lower to * upper (reclaimed) vnode. */ if (lvp != NULLVP) { null_copy_inotify(old_vps[i], lvp, VIRF_INOTIFY); null_copy_inotify(old_vps[i], lvp, VIRF_INOTIFY_PARENT); if (VOP_ISLOCKED(lvp) == LK_EXCLUSIVE && old_vps[i]->v_vnlock != lvp->v_vnlock) { VOP_UNLOCK(lvp); VOP_LOCK(old_vps[i], LK_EXCLUSIVE | LK_RETRY); } vdrop(lvp); } *(vps_p[i]) = old_vps[i]; #if 0 if (reles & VDESC_VP0_WILLUNLOCK) VOP_UNLOCK(*(vps_p[i]), 0); #endif if (reles & VDESC_VP0_WILLRELE) vrele(*(vps_p[i])); } } /* * Map the possible out-going vpp * (Assumes that the lower layer always returns * a VREF'ed vpp unless it gets an error.) */ if (descp->vdesc_vpp_offset != VDESC_NO_OFFSET && error == 0) { /* * XXX - even though some ops have vpp returned vp's, * several ops actually vrele this before returning. * We must avoid these ops. * (This should go away when these ops are regularized.) */ vppp = VOPARG_OFFSETTO(struct vnode ***, descp->vdesc_vpp_offset, ap); if (*vppp != NULL) error = null_nodeget(old_vps[0]->v_mount, **vppp, *vppp); } return (error); } static int null_add_writecount(struct vop_add_writecount_args *ap) { struct vnode *lvp, *vp; int error; vp = ap->a_vp; lvp = NULLVPTOLOWERVP(vp); VI_LOCK(vp); /* text refs are bypassed to lowervp */ VNASSERT(vp->v_writecount >= 0, vp, ("wrong null writecount")); VNASSERT(vp->v_writecount + ap->a_inc >= 0, vp, ("wrong writecount inc %d", ap->a_inc)); error = VOP_ADD_WRITECOUNT(lvp, ap->a_inc); if (error == 0) vp->v_writecount += ap->a_inc; VI_UNLOCK(vp); return (error); } /* * We have to carry on the locking protocol on the null layer vnodes * as we progress through the tree. We also have to enforce read-only * if this layer is mounted read-only. */ static int null_lookup(struct vop_lookup_args *ap) { struct componentname *cnp = ap->a_cnp; struct vnode *dvp = ap->a_dvp; uint64_t flags = cnp->cn_flags; struct vnode *vp, *ldvp, *lvp; struct mount *mp; int error; mp = dvp->v_mount; if ((flags & ISLASTCN) != 0 && (mp->mnt_flag & MNT_RDONLY) != 0 && (cnp->cn_nameiop == DELETE || cnp->cn_nameiop == RENAME)) return (EROFS); /* * Although it is possible to call null_bypass(), we'll do * a direct call to reduce overhead */ ldvp = NULLVPTOLOWERVP(dvp); vp = lvp = NULL; /* * Renames in the lower mounts might create an inconsistent * configuration where lower vnode is moved out of the directory tree * remounted by our null mount. * * Do not try to handle it fancy, just avoid VOP_LOOKUP() with DOTDOT * name which cannot be handled by the VOP. */ if ((flags & ISDOTDOT) != 0) { struct nameidata *ndp; if ((ldvp->v_vflag & VV_ROOT) != 0) { KASSERT((dvp->v_vflag & VV_ROOT) == 0, ("ldvp %p fl %#x dvp %p fl %#x flags %#jx", ldvp, ldvp->v_vflag, dvp, dvp->v_vflag, (uintmax_t)flags)); return (ENOENT); } ndp = vfs_lookup_nameidata(cnp); if (ndp != NULL && vfs_lookup_isroot(ndp, ldvp)) return (ENOENT); } /* * Hold ldvp. The reference on it, owned by dvp, is lost in * case of dvp reclamation, and we need ldvp to move our lock * from ldvp to dvp. */ vhold(ldvp); error = VOP_LOOKUP(ldvp, &lvp, cnp); /* * VOP_LOOKUP() on lower vnode may unlock ldvp, which allows * dvp to be reclaimed due to shared v_vnlock. Check for the * doomed state and return error. */ if (VN_IS_DOOMED(dvp)) { if (error == 0 || error == EJUSTRETURN) { if (lvp != NULL) vput(lvp); error = ENOENT; } /* * If vgone() did reclaimed dvp before curthread * relocked ldvp, the locks of dvp and ldpv are no * longer shared. In this case, relock of ldvp in * lower fs VOP_LOOKUP() does not restore the locking * state of dvp. Compensate for this by unlocking * ldvp and locking dvp, which is also correct if the * locks are still shared. */ VOP_UNLOCK(ldvp); vn_lock(dvp, LK_EXCLUSIVE | LK_RETRY); } vdrop(ldvp); if (error == EJUSTRETURN && (flags & ISLASTCN) != 0 && (mp->mnt_flag & MNT_RDONLY) != 0 && (cnp->cn_nameiop == CREATE || cnp->cn_nameiop == RENAME)) error = EROFS; if ((error == 0 || error == EJUSTRETURN) && lvp != NULL) { if (ldvp == lvp) { *ap->a_vpp = dvp; VREF(dvp); vrele(lvp); } else { error = null_nodeget(mp, lvp, &vp); if (error == 0) *ap->a_vpp = vp; } } return (error); } static int null_open(struct vop_open_args *ap) { int retval; struct vnode *vp, *ldvp; vp = ap->a_vp; ldvp = NULLVPTOLOWERVP(vp); retval = null_bypass(&ap->a_gen); if (retval == 0) { vp->v_object = ldvp->v_object; if ((vn_irflag_read(ldvp) & VIRF_PGREAD) != 0) { MPASS(vp->v_object != NULL); if ((vn_irflag_read(vp) & VIRF_PGREAD) == 0) { vn_irflag_set_cond(vp, VIRF_PGREAD); } } } return (retval); } /* * Setattr call. Disallow write attempts if the layer is mounted read-only. */ static int null_setattr(struct vop_setattr_args *ap) { struct vnode *vp = ap->a_vp; struct vattr *vap = ap->a_vap; if ((vap->va_flags != VNOVAL || vap->va_uid != (uid_t)VNOVAL || vap->va_gid != (gid_t)VNOVAL || vap->va_atime.tv_sec != VNOVAL || vap->va_mtime.tv_sec != VNOVAL || vap->va_mode != (mode_t)VNOVAL) && (vp->v_mount->mnt_flag & MNT_RDONLY)) return (EROFS); if (vap->va_size != VNOVAL) { switch (vp->v_type) { case VDIR: return (EISDIR); case VCHR: case VBLK: case VSOCK: case VFIFO: if (vap->va_flags != VNOVAL) return (EOPNOTSUPP); return (0); case VREG: case VLNK: default: /* * Disallow write attempts if the filesystem is * mounted read-only. */ if (vp->v_mount->mnt_flag & MNT_RDONLY) return (EROFS); } } return (null_bypass(&ap->a_gen)); } /* * We handle stat and getattr only to change the fsid. */ static int null_stat(struct vop_stat_args *ap) { int error; if ((error = null_bypass(&ap->a_gen)) != 0) return (error); ap->a_sb->st_dev = ap->a_vp->v_mount->mnt_stat.f_fsid.val[0]; return (0); } static int null_getattr(struct vop_getattr_args *ap) { int error; if ((error = null_bypass(&ap->a_gen)) != 0) return (error); ap->a_vap->va_fsid = ap->a_vp->v_mount->mnt_stat.f_fsid.val[0]; return (0); } /* * Handle to disallow write access if mounted read-only. */ static int null_access(struct vop_access_args *ap) { struct vnode *vp = ap->a_vp; accmode_t accmode = ap->a_accmode; /* * Disallow write attempts on read-only layers; * unless the file is a socket, fifo, or a block or * character device resident on the filesystem. */ if (accmode & VWRITE) { switch (vp->v_type) { case VDIR: case VLNK: case VREG: if (vp->v_mount->mnt_flag & MNT_RDONLY) return (EROFS); break; default: break; } } return (null_bypass(&ap->a_gen)); } static int null_accessx(struct vop_accessx_args *ap) { struct vnode *vp = ap->a_vp; accmode_t accmode = ap->a_accmode; /* * Disallow write attempts on read-only layers; * unless the file is a socket, fifo, or a block or * character device resident on the filesystem. */ if (accmode & VWRITE) { switch (vp->v_type) { case VDIR: case VLNK: case VREG: if (vp->v_mount->mnt_flag & MNT_RDONLY) return (EROFS); break; default: break; } } return (null_bypass(&ap->a_gen)); } /* * Increasing refcount of lower vnode is needed at least for the case * when lower FS is NFS to do sillyrename if the file is in use. * Unfortunately v_usecount is incremented in many places in * the kernel and, as such, there may be races that result in * the NFS client doing an extraneous silly rename, but that seems * preferable to not doing a silly rename when it is needed. */ static int null_remove(struct vop_remove_args *ap) { int retval, vreleit; struct vnode *lvp, *vp; vp = ap->a_vp; if (vrefcnt(vp) > 1) { lvp = NULLVPTOLOWERVP(vp); VREF(lvp); vreleit = 1; } else vreleit = 0; VTONULL(vp)->null_flags |= NULLV_DROP; retval = null_bypass(&ap->a_gen); if (vreleit != 0) vrele(lvp); return (retval); } /* * We handle this to eliminate null FS to lower FS * file moving. Don't know why we don't allow this, * possibly we should. */ static int null_rename(struct vop_rename_args *ap) { struct vnode *fdvp, *fvp, *tdvp, *tvp; struct vnode *lfdvp, *lfvp, *ltdvp, *ltvp; struct null_node *fdnn, *fnn, *tdnn, *tnn; int error; tdvp = ap->a_tdvp; fvp = ap->a_fvp; fdvp = ap->a_fdvp; tvp = ap->a_tvp; lfdvp = NULL; /* Check for cross-device rename. */ if ((fvp->v_mount != tdvp->v_mount) || (tvp != NULL && fvp->v_mount != tvp->v_mount)) { error = EXDEV; goto upper_err; } VI_LOCK(fdvp); fdnn = VTONULL(fdvp); if (fdnn == NULL) { /* fdvp is not locked, can be doomed */ VI_UNLOCK(fdvp); error = ENOENT; goto upper_err; } lfdvp = fdnn->null_lowervp; vref(lfdvp); VI_UNLOCK(fdvp); VI_LOCK(fvp); fnn = VTONULL(fvp); if (fnn == NULL) { VI_UNLOCK(fvp); error = ENOENT; goto upper_err; } lfvp = fnn->null_lowervp; vref(lfvp); VI_UNLOCK(fvp); tdnn = VTONULL(tdvp); ltdvp = tdnn->null_lowervp; vref(ltdvp); if (tvp != NULL) { tnn = VTONULL(tvp); ltvp = tnn->null_lowervp; vref(ltvp); tnn->null_flags |= NULLV_DROP; } else { ltvp = NULL; } error = VOP_RENAME(lfdvp, lfvp, ap->a_fcnp, ltdvp, ltvp, ap->a_tcnp); vrele(fdvp); vrele(fvp); vrele(tdvp); if (tvp != NULL) vrele(tvp); return (error); upper_err: if (tdvp == tvp) vrele(tdvp); else vput(tdvp); if (tvp) vput(tvp); if (lfdvp != NULL) vrele(lfdvp); vrele(fdvp); vrele(fvp); return (error); } static int null_rmdir(struct vop_rmdir_args *ap) { VTONULL(ap->a_vp)->null_flags |= NULLV_DROP; return (null_bypass(&ap->a_gen)); } /* * We need to process our own vnode lock and then clear the * interlock flag as it applies only to our vnode, not the * vnodes below us on the stack. */ static int null_lock(struct vop_lock1_args *ap) { struct vnode *vp = ap->a_vp; int flags; struct null_node *nn; struct vnode *lvp; int error; if ((ap->a_flags & LK_INTERLOCK) == 0) VI_LOCK(vp); else ap->a_flags &= ~LK_INTERLOCK; flags = ap->a_flags; nn = VTONULL(vp); /* * If we're still active we must ask the lower layer to * lock as ffs has special lock considerations in its * vop lock. */ if (nn != NULL && (lvp = NULLVPTOLOWERVP(vp)) != NULL) { /* * We have to hold the vnode here to solve a potential * reclaim race. If we're forcibly vgone'd while we * still have refs, a thread could be sleeping inside * the lowervp's vop_lock routine. When we vgone we will * drop our last ref to the lowervp, which would allow it * to be reclaimed. The lowervp could then be recycled, * in which case it is not legal to be sleeping in its VOP. * We prevent it from being recycled by holding the vnode * here. */ vholdnz(lvp); VI_UNLOCK(vp); error = VOP_LOCK(lvp, flags); /* * We might have slept to get the lock and someone might have * clean our vnode already, switching vnode lock from one in * lowervp to v_lock in our own vnode structure. Handle this * case by reacquiring correct lock in requested mode. */ if (VTONULL(vp) == NULL && error == 0) { ap->a_flags &= ~LK_TYPE_MASK; switch (flags & LK_TYPE_MASK) { case LK_SHARED: ap->a_flags |= LK_SHARED; break; case LK_UPGRADE: case LK_EXCLUSIVE: ap->a_flags |= LK_EXCLUSIVE; break; default: panic("Unsupported lock request %d\n", ap->a_flags); } VOP_UNLOCK(lvp); error = vop_stdlock(ap); } vdrop(lvp); } else { VI_UNLOCK(vp); error = vop_stdlock(ap); } return (error); } /* * We need to process our own vnode unlock and then clear the * interlock flag as it applies only to our vnode, not the * vnodes below us on the stack. */ static int null_unlock(struct vop_unlock_args *ap) { struct vnode *vp = ap->a_vp; struct null_node *nn; struct vnode *lvp; int error; nn = VTONULL(vp); if (nn != NULL && (lvp = NULLVPTOLOWERVP(vp)) != NULL) { vholdnz(lvp); error = VOP_UNLOCK(lvp); vdrop(lvp); } else { error = vop_stdunlock(ap); } return (error); } /* * Do not allow the VOP_INACTIVE to be passed to the lower layer, * since the reference count on the lower vnode is not related to * ours. */ static int null_want_recycle(struct vnode *vp) { struct vnode *lvp; struct null_node *xp; struct mount *mp; struct null_mount *xmp; xp = VTONULL(vp); lvp = NULLVPTOLOWERVP(vp); mp = vp->v_mount; xmp = MOUNTTONULLMOUNT(mp); if ((xmp->nullm_flags & NULLM_CACHE) == 0 || (xp->null_flags & NULLV_DROP) != 0 || (lvp->v_vflag & VV_NOSYNC) != 0) { /* * If this is the last reference and caching of the * nullfs vnodes is not enabled, or the lower vnode is * deleted, then free up the vnode so as not to tie up * the lower vnodes. */ return (1); } return (0); } static int null_inactive(struct vop_inactive_args *ap) { struct vnode *vp; vp = ap->a_vp; if (null_want_recycle(vp)) { vp->v_object = NULL; vrecycle(vp); } return (0); } static int null_need_inactive(struct vop_need_inactive_args *ap) { return (null_want_recycle(ap->a_vp) || vn_need_pageq_flush(ap->a_vp)); } /* * Now, the nullfs vnode and, due to the sharing lock, the lower * vnode, are exclusively locked, and we shall destroy the null vnode. */ static int null_reclaim(struct vop_reclaim_args *ap) { struct vnode *vp; struct null_node *xp; struct vnode *lowervp; vp = ap->a_vp; xp = VTONULL(vp); lowervp = xp->null_lowervp; KASSERT(lowervp != NULL && vp->v_vnlock != &vp->v_lock, ("Reclaiming incomplete null vnode %p", vp)); null_hashrem(xp); /* * Use the interlock to protect the clearing of v_data to * prevent faults in null_lock(). */ lockmgr(&vp->v_lock, LK_EXCLUSIVE, NULL); VI_LOCK(vp); vp->v_data = NULL; vp->v_object = NULL; vp->v_vnlock = &vp->v_lock; /* * If we were opened for write, we leased the write reference * to the lower vnode. If this is a reclamation due to the * forced unmount, undo the reference now. */ if (vp->v_writecount > 0) VOP_ADD_WRITECOUNT(lowervp, -vp->v_writecount); else if (vp->v_writecount < 0) vp->v_writecount = 0; VI_UNLOCK(vp); if ((xp->null_flags & NULLV_NOUNLOCK) != 0) vunref(lowervp); else vput(lowervp); free(xp, M_NULLFSNODE); return (0); } static int null_print(struct vop_print_args *ap) { struct vnode *vp = ap->a_vp; printf("\tvp=%p, lowervp=%p\n", vp, VTONULL(vp)->null_lowervp); return (0); } /* ARGSUSED */ static int null_getwritemount(struct vop_getwritemount_args *ap) { struct null_node *xp; struct vnode *lowervp; struct vnode *vp; vp = ap->a_vp; VI_LOCK(vp); xp = VTONULL(vp); if (xp && (lowervp = xp->null_lowervp)) { vholdnz(lowervp); VI_UNLOCK(vp); VOP_GETWRITEMOUNT(lowervp, ap->a_mpp); vdrop(lowervp); } else { VI_UNLOCK(vp); *(ap->a_mpp) = NULL; } return (0); } static int null_vptofh(struct vop_vptofh_args *ap) { struct vnode *lvp; lvp = NULLVPTOLOWERVP(ap->a_vp); return VOP_VPTOFH(lvp, ap->a_fhp); } static int null_vptocnp(struct vop_vptocnp_args *ap) { struct vnode *vp = ap->a_vp; struct vnode **dvp = ap->a_vpp; struct vnode *lvp, *ldvp; struct mount *mp; int error, locked; locked = VOP_ISLOCKED(vp); lvp = NULLVPTOLOWERVP(vp); mp = vp->v_mount; error = vfs_busy(mp, MBF_NOWAIT); if (error != 0) return (error); vhold(lvp); VOP_UNLOCK(vp); /* vp is held by vn_vptocnp_locked that called us */ ldvp = lvp; vref(lvp); error = vn_vptocnp(&ldvp, ap->a_buf, ap->a_buflen); vdrop(lvp); if (error != 0) { vn_lock(vp, locked | LK_RETRY); vfs_unbusy(mp); return (ENOENT); } error = vn_lock(ldvp, LK_SHARED); if (error != 0) { vrele(ldvp); vn_lock(vp, locked | LK_RETRY); vfs_unbusy(mp); return (ENOENT); } error = null_nodeget(mp, ldvp, dvp); if (error == 0) { #ifdef DIAGNOSTIC NULLVPTOLOWERVP(*dvp); #endif VOP_UNLOCK(*dvp); /* keep reference on *dvp */ } vn_lock(vp, locked | LK_RETRY); vfs_unbusy(mp); return (error); } static int null_read_pgcache(struct vop_read_pgcache_args *ap) { struct vnode *lvp, *vp; struct null_node *xp; int error; vp = ap->a_vp; VI_LOCK(vp); xp = VTONULL(vp); if (xp == NULL) { VI_UNLOCK(vp); return (EJUSTRETURN); } lvp = xp->null_lowervp; vref(lvp); VI_UNLOCK(vp); error = VOP_READ_PGCACHE(lvp, ap->a_uio, ap->a_ioflag, ap->a_cred); vrele(lvp); return (error); } static int null_advlock(struct vop_advlock_args *ap) { struct vnode *lvp, *vp; struct null_node *xp; int error; vp = ap->a_vp; VI_LOCK(vp); xp = VTONULL(vp); if (xp == NULL) { VI_UNLOCK(vp); return (EBADF); } lvp = xp->null_lowervp; vref(lvp); VI_UNLOCK(vp); error = VOP_ADVLOCK(lvp, ap->a_id, ap->a_op, ap->a_fl, ap->a_flags); vrele(lvp); return (error); } /* * Avoid standard bypass, since lower dvp and vp could be no longer * valid after vput(). */ static int null_vput_pair(struct vop_vput_pair_args *ap) { struct mount *mp; struct vnode *dvp, *ldvp, *lvp, *vp, *vp1, **vpp; int error, res; dvp = ap->a_dvp; ldvp = NULLVPTOLOWERVP(dvp); vref(ldvp); vpp = ap->a_vpp; vp = NULL; lvp = NULL; mp = NULL; if (vpp != NULL) vp = *vpp; if (vp != NULL) { lvp = NULLVPTOLOWERVP(vp); vref(lvp); if (!ap->a_unlock_vp) { vhold(vp); vhold(lvp); mp = vp->v_mount; vfs_ref(mp); } } res = VOP_VPUT_PAIR(ldvp, lvp != NULL ? &lvp : NULL, true); if (vp != NULL && ap->a_unlock_vp) vrele(vp); vrele(dvp); if (vp == NULL || ap->a_unlock_vp) return (res); /* lvp has been unlocked and vp might be reclaimed */ VOP_LOCK(vp, LK_EXCLUSIVE | LK_RETRY); if (vp->v_data == NULL && vfs_busy(mp, MBF_NOWAIT) == 0) { vput(vp); vget(lvp, LK_EXCLUSIVE | LK_RETRY); if (VN_IS_DOOMED(lvp)) { vput(lvp); vget(vp, LK_EXCLUSIVE | LK_RETRY); } else { error = null_nodeget(mp, lvp, &vp1); if (error == 0) { *vpp = vp1; } else { vget(vp, LK_EXCLUSIVE | LK_RETRY); } } vfs_unbusy(mp); } vdrop(lvp); vdrop(vp); vfs_rel(mp); return (res); } static int null_getlowvnode(struct vop_getlowvnode_args *ap) { struct vnode *vp, *vpl; vp = ap->a_vp; if (vn_lock(vp, LK_SHARED) != 0) return (EBADF); vpl = NULLVPTOLOWERVP(vp); vhold(vpl); VOP_UNLOCK(vp); VOP_GETLOWVNODE(vpl, ap->a_vplp, ap->a_flags); vdrop(vpl); return (0); } /* * Global vfs data structures */ struct vop_vector null_vnodeops = { .vop_bypass = null_bypass, .vop_access = null_access, .vop_accessx = null_accessx, .vop_advlock = null_advlock, .vop_advlockpurge = vop_stdadvlockpurge, .vop_bmap = VOP_EOPNOTSUPP, .vop_stat = null_stat, .vop_getattr = null_getattr, .vop_getlowvnode = null_getlowvnode, .vop_getwritemount = null_getwritemount, .vop_inactive = null_inactive, .vop_need_inactive = null_need_inactive, .vop_islocked = vop_stdislocked, .vop_lock1 = null_lock, .vop_lookup = null_lookup, .vop_open = null_open, .vop_print = null_print, .vop_read_pgcache = null_read_pgcache, .vop_reclaim = null_reclaim, .vop_remove = null_remove, .vop_rename = null_rename, .vop_rmdir = null_rmdir, .vop_setattr = null_setattr, .vop_strategy = VOP_EOPNOTSUPP, .vop_unlock = null_unlock, .vop_vptocnp = null_vptocnp, .vop_vptofh = null_vptofh, .vop_add_writecount = null_add_writecount, .vop_vput_pair = null_vput_pair, .vop_copy_file_range = VOP_PANIC, }; VFS_VOP_VECTOR_REGISTER(null_vnodeops); diff --git a/sys/sys/vnode.h b/sys/sys/vnode.h index 6ef9bbec9446..fcfb8716fc52 100644 --- a/sys/sys/vnode.h +++ b/sys/sys/vnode.h @@ -1,1263 +1,1258 @@ /*- * SPDX-License-Identifier: BSD-3-Clause * * Copyright (c) 1989, 1993 * The Regents of the University of California. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #ifndef _SYS_VNODE_H_ #define _SYS_VNODE_H_ #include #include #include #include #include #include #include #include #include #include #include /* * The vnode is the focus of all file activity in UNIX. There is a * unique vnode allocated for each active file, each current directory, * each mounted-on file, text file, and the root. */ /* * Vnode types. VNON means no type. */ __enum_uint8_decl(vtype) { VNON, VREG, VDIR, VBLK, VCHR, VLNK, VSOCK, VFIFO, VBAD, VMARKER, VLASTTYPE = VMARKER, }; /* * We frequently need to test is something is a device node. */ #define VTYPE_ISDEV(vtype) ((vtype) == VCHR || (vtype) == VBLK) __enum_uint8_decl(vstate) { VSTATE_UNINITIALIZED, VSTATE_CONSTRUCTED, VSTATE_DESTROYING, VSTATE_DEAD, VLASTSTATE = VSTATE_DEAD, }; enum vgetstate { VGET_NONE, VGET_HOLDCNT, VGET_USECOUNT, }; /* * Each underlying filesystem allocates its own private area and hangs * it from v_data. If non-null, this area is freed in getnewvnode(). */ struct cache_fpl; struct inotify_watch; struct namecache; struct vpollinfo { struct mtx vpi_lock; /* lock to protect below */ TAILQ_HEAD(, inotify_watch) vpi_inotify; /* list of inotify watchers */ struct selinfo vpi_selinfo; /* identity of poller(s) */ short vpi_events; /* what they are looking for */ short vpi_revents; /* what has happened */ }; /* * Reading or writing any of these items requires holding the appropriate lock. * * Lock reference: * c - namecache mutex * i - interlock * l - mp mnt_listmtx or freelist mutex * I - updated with atomics, 0->1 and 1->0 transitions with interlock held * m - mount point interlock * p - pollinfo lock * u - Only a reference to the vnode is needed to read. * v - vnode lock * * Vnodes may be found on many lists. The general way to deal with operating * on a vnode that is on a list is: * 1) Lock the list and find the vnode. * 2) Lock interlock so that the vnode does not go away. * 3) Unlock the list to avoid lock order reversals. * 4) vget with LK_INTERLOCK and check for ENOENT, or * 5) Check for DOOMED if the vnode lock is not required. * 6) Perform your operation, then vput(). */ #if defined(_KERNEL) || defined(_KVM_VNODE) struct vnode { /* * Fields which define the identity of the vnode. These fields are * owned by the filesystem (XXX: and vgone() ?) */ __enum_uint8(vtype) v_type; /* u vnode type */ __enum_uint8(vstate) v_state; /* u vnode state */ short v_irflag; /* i frequently read flags */ seqc_t v_seqc; /* i modification count */ uint32_t v_nchash; /* u namecache hash */ u_int v_hash; const struct vop_vector *v_op; /* u vnode operations vector */ void *v_data; /* u private data for fs */ /* * Filesystem instance stuff */ struct mount *v_mount; /* u ptr to vfs we are in */ TAILQ_ENTRY(vnode) v_nmntvnodes; /* m vnodes for mount point */ /* * Type specific fields, only one applies to any given vnode. */ union { struct mount *v_mountedhere; /* v ptr to mountpoint (VDIR) */ struct unpcb *v_unpcb; /* v unix domain net (VSOCK) */ struct cdev *v_rdev; /* v device (VCHR, VBLK) */ struct fifoinfo *v_fifoinfo; /* v fifo (VFIFO) */ }; /* * vfs_hash: (mount + inode) -> vnode hash. The hash value * itself is grouped with other int fields, to avoid padding. */ LIST_ENTRY(vnode) v_hashlist; /* * VFS_namecache stuff */ LIST_HEAD(, namecache) v_cache_src; /* c Cache entries from us */ TAILQ_HEAD(, namecache) v_cache_dst; /* c Cache entries to us */ struct namecache *v_cache_dd; /* c Cache entry for .. vnode */ /* * Locking */ struct lock v_lock; /* u (if fs don't have one) */ struct mtx v_interlock; /* lock for "i" things */ struct lock *v_vnlock; /* u pointer to vnode lock */ /* * The machinery of being a vnode */ TAILQ_ENTRY(vnode) v_vnodelist; /* l vnode lists */ TAILQ_ENTRY(vnode) v_lazylist; /* l vnode lazy list */ struct bufobj v_bufobj; /* * Buffer cache object */ /* * Hooks for various subsystems and features. */ struct vpollinfo *v_pollinfo; /* i Poll events, p for *v_pi */ struct label *v_label; /* MAC label for vnode */ struct lockf *v_lockf; /* Byte-level advisory lock list */ struct rangelock v_rl; /* Byte-range lock */ u_int v_holdcnt; /* I prevents recycling. */ u_int v_usecount; /* I ref count of users */ u_short v_iflag; /* i vnode flags (see below) */ u_short v_vflag; /* v vnode flags */ u_short v_mflag; /* l mnt-specific vnode flags */ short v_dbatchcpu; /* i LRU requeue deferral batch */ int v_writecount; /* I ref count of writers or (negative) text users */ int v_seqc_users; /* i modifications pending */ }; #define VN_ISDEV(vp) VTYPE_ISDEV((vp)->v_type) #ifndef DEBUG_LOCKS #ifdef _LP64 /* * Not crossing 448 bytes fits 9 vnodes per page. If you have to add fields * to the structure and there is nothing which can be done to prevent growth * then so be it. But don't grow it without a good reason. */ _Static_assert(sizeof(struct vnode) <= 448, "vnode size crosses 448 bytes"); #endif #endif #endif /* defined(_KERNEL) || defined(_KVM_VNODE) */ #define bo2vnode(bo) __containerof((bo), struct vnode, v_bufobj) /* XXX: These are temporary to avoid a source sweep at this time */ #define v_object v_bufobj.bo_object /* We don't need to lock the knlist */ #define VN_KNLIST_EMPTY(vp) ((vp)->v_pollinfo == NULL || \ KNLIST_EMPTY(&(vp)->v_pollinfo->vpi_selinfo.si_note)) #define VN_KNOTE(vp, b, a) \ do { \ if (!VN_KNLIST_EMPTY(vp)) \ KNOTE(&vp->v_pollinfo->vpi_selinfo.si_note, (b), \ (a) | KNF_NOKQLOCK); \ } while (0) #define VN_KNOTE_LOCKED(vp, b) VN_KNOTE(vp, b, KNF_LISTLOCKED) #define VN_KNOTE_UNLOCKED(vp, b) VN_KNOTE(vp, b, 0) /* * Vnode flags. * VI flags are protected by interlock and live in v_iflag * VIRF flags are protected by interlock and live in v_irflag * VV flags are protected by the vnode lock and live in v_vflag * * VIRF_DOOMED is doubly protected by the interlock and vnode lock. Both * are required for writing but the status may be checked with either. */ #define VHOLD_NO_SMR (1<<29) /* Disable vhold_smr */ #define VHOLD_ALL_FLAGS (VHOLD_NO_SMR) #define VIRF_DOOMED 0x0001 /* This vnode is being recycled */ #define VIRF_PGREAD 0x0002 /* Direct reads from the page cache are permitted, never cleared once set */ #define VIRF_MOUNTPOINT 0x0004 /* This vnode is mounted on */ #define VIRF_TEXT_REF 0x0008 /* Executable mappings ref the vnode */ #define VIRF_CROSSMP 0x0010 /* Cross-mp vnode, no locking */ #define VIRF_NAMEDDIR 0x0020 /* Named attribute directory */ #define VIRF_NAMEDATTR 0x0040 /* Named attribute */ #define VIRF_INOTIFY 0x0080 /* This vnode is being watched */ #define VIRF_INOTIFY_PARENT 0x0100 /* A parent of this vnode may be being watched */ #define VI_UNUSED0 0x0001 /* unused */ #define VI_MOUNT 0x0002 /* Mount in progress */ #define VI_DOINGINACT 0x0004 /* VOP_INACTIVE is in progress */ #define VI_OWEINACT 0x0008 /* Need to call inactive */ #define VI_DEFINACT 0x0010 /* deferred inactive */ #define VI_FOPENING 0x0020 /* In open, with opening process having the first right to advlock file */ #define VV_ROOT 0x0001 /* root of its filesystem */ #define VV_ISTTY 0x0002 /* vnode represents a tty */ #define VV_NOSYNC 0x0004 /* unlinked, stop syncing */ #define VV_ETERNALDEV 0x0008 /* device that is never destroyed */ #define VV_CACHEDLABEL 0x0010 /* Vnode has valid cached MAC label */ #define VV_VMSIZEVNLOCK 0x0020 /* object size check requires vnode lock */ #define VV_COPYONWRITE 0x0040 /* vnode is doing copy-on-write */ #define VV_SYSTEM 0x0080 /* vnode being used by kernel */ #define VV_PROCDEP 0x0100 /* vnode is process dependent */ #define VV_UNLINKED 0x0200 /* unlinked but stil open directory */ #define VV_DELETED 0x0400 /* should be removed */ #define VV_MD 0x0800 /* vnode backs the md device */ #define VV_FORCEINSMQ 0x1000 /* force the insmntque to succeed */ #define VV_READLINK 0x2000 /* fdescfs linux vnode */ #define VV_UNREF 0x4000 /* vunref, do not drop lock in inactive() */ #define VV_CROSSLOCK 0x8000 /* vnode lock is shared w/ root mounted here */ #define VMP_LAZYLIST 0x0001 /* Vnode is on mnt's lazy list */ /* * Vnode attributes. A field value of VNOVAL represents a field whose value * is unavailable (getattr) or which is not to be changed (setattr). */ struct vattr { __enum_uint8(vtype) va_type; /* vnode type (for create) */ u_short va_mode; /* files access mode and type */ uint16_t va_bsdflags; /* same as st_bsdflags from stat(2) */ uid_t va_uid; /* owner user id */ gid_t va_gid; /* owner group id */ nlink_t va_nlink; /* number of references to file */ dev_t va_fsid; /* filesystem id */ ino_t va_fileid; /* file id */ u_quad_t va_size; /* file size in bytes */ long va_blocksize; /* blocksize preferred for i/o */ struct timespec va_atime; /* time of last access */ struct timespec va_mtime; /* time of last modification */ struct timespec va_ctime; /* time file changed */ struct timespec va_birthtime; /* time file created */ u_long va_gen; /* generation number of file */ u_long va_flags; /* flags defined for file */ dev_t va_rdev; /* device the special file represents */ u_quad_t va_bytes; /* bytes of disk space held by file */ u_quad_t va_filerev; /* file modification number */ u_int va_vaflags; /* operations flags, see below */ long va_spare; /* remain quad aligned */ }; #define VATTR_ISDEV(vap) VTYPE_ISDEV((vap)->va_type) /* * Flags for va_vaflags. */ #define VA_UTIMES_NULL 0x01 /* utimes argument was NULL */ #define VA_EXCLUSIVE 0x02 /* exclusive create request */ #define VA_SYNC 0x04 /* O_SYNC truncation */ /* * Flags for ioflag. (high 16 bits used to ask for read-ahead and * help with write clustering) * NB: IO_NDELAY and IO_DIRECT are linked to fcntl.h */ #define IO_UNIT 0x0001 /* do I/O as atomic unit */ #define IO_APPEND 0x0002 /* append write to end */ #define IO_NDELAY 0x0004 /* FNDELAY flag set in file table */ #define IO_NODELOCKED 0x0008 /* underlying node already locked */ #define IO_ASYNC 0x0010 /* bawrite rather then bdwrite */ #define IO_VMIO 0x0020 /* data already in VMIO space */ #define IO_INVAL 0x0040 /* invalidate after I/O */ #define IO_SYNC 0x0080 /* do I/O synchronously */ #define IO_DIRECT 0x0100 /* attempt to bypass buffer cache */ #define IO_NOREUSE 0x0200 /* VMIO data won't be reused */ #define IO_EXT 0x0400 /* operate on external attributes */ #define IO_NORMAL 0x0800 /* operate on regular data */ #define IO_NOMACCHECK 0x1000 /* MAC checks unnecessary */ #define IO_BUFLOCKED 0x2000 /* ffs flag; indir buf is locked */ #define IO_RANGELOCKED 0x4000 /* range locked */ #define IO_DATASYNC 0x8000 /* do only data I/O synchronously */ #define IO_SEQMAX 0x7F /* seq heuristic max value */ #define IO_SEQSHIFT 16 /* seq heuristic in upper 16 bits */ /* * Flags for accmode_t. */ #define VEXEC 000000000100 /* execute/search permission */ #define VWRITE 000000000200 /* write permission */ #define VREAD 000000000400 /* read permission */ #define VADMIN 000000010000 /* being the file owner */ #define VAPPEND 000000040000 /* permission to write/append */ /* * VEXPLICIT_DENY makes VOP_ACCESSX(9) return EPERM or EACCES only * if permission was denied explicitly, by a "deny" rule in NFSv4 ACL, * and 0 otherwise. This never happens with ordinary unix access rights * or POSIX.1e ACLs. Obviously, VEXPLICIT_DENY must be OR-ed with * some other V* constant. */ #define VEXPLICIT_DENY 000000100000 #define VREAD_NAMED_ATTRS 000000200000 /* not used */ #define VWRITE_NAMED_ATTRS 000000400000 /* not used */ #define VDELETE_CHILD 000001000000 #define VREAD_ATTRIBUTES 000002000000 /* permission to stat(2) */ #define VWRITE_ATTRIBUTES 000004000000 /* change {m,c,a}time */ #define VDELETE 000010000000 #define VREAD_ACL 000020000000 /* read ACL and file mode */ #define VWRITE_ACL 000040000000 /* change ACL and/or file mode */ #define VWRITE_OWNER 000100000000 /* change file owner */ #define VSYNCHRONIZE 000200000000 /* not used */ #define VCREAT 000400000000 /* creating new file */ #define VVERIFY 001000000000 /* verification required */ /* * Permissions that were traditionally granted only to the file owner. */ #define VADMIN_PERMS (VADMIN | VWRITE_ATTRIBUTES | VWRITE_ACL | \ VWRITE_OWNER) /* * Permissions that were traditionally granted to everyone. */ #define VSTAT_PERMS (VREAD_ATTRIBUTES | VREAD_ACL) /* * Permissions that allow to change the state of the file in any way. */ #define VMODIFY_PERMS (VWRITE | VAPPEND | VADMIN_PERMS | VDELETE_CHILD | \ VDELETE) /* * Token indicating no attribute value yet assigned. */ #define VNOVAL (-1) /* * LK_TIMELOCK timeout for vnode locks (used mainly by the pageout daemon) */ #define VLKTIMEOUT (hz / 20 + 1) #ifdef _KERNEL #ifdef MALLOC_DECLARE MALLOC_DECLARE(M_VNODE); #endif extern u_int ncsizefactor; extern const u_int io_hold_cnt; /* * Convert between vnode types and inode formats (since POSIX.1 * defines mode word of stat structure in terms of inode formats). */ extern __enum_uint8(vtype) iftovt_tab[]; extern int vttoif_tab[]; #define IFTOVT(mode) (iftovt_tab[((mode) & S_IFMT) >> 12]) #define VTTOIF(indx) (vttoif_tab[(int)(indx)]) #define MAKEIMODE(indx, mode) (int)(VTTOIF(indx) | (mode)) /* * Flags to various vnode functions. */ #define SKIPSYSTEM 0x0001 /* vflush: skip vnodes marked VSYSTEM */ #define FORCECLOSE 0x0002 /* vflush: force file closure */ #define WRITECLOSE 0x0004 /* vflush: only close writable files */ #define EARLYFLUSH 0x0008 /* vflush: early call for ffs_flushfiles */ #define V_SAVE 0x0001 /* vinvalbuf: sync file first */ #define V_ALT 0x0002 /* vinvalbuf: invalidate only alternate bufs */ #define V_NORMAL 0x0004 /* vinvalbuf: invalidate only regular bufs */ #define V_CLEANONLY 0x0008 /* vinvalbuf: invalidate only clean bufs */ #define V_VMIO 0x0010 /* vinvalbuf: called during pageout */ #define V_ALLOWCLEAN 0x0020 /* vinvalbuf: allow clean buffers after flush */ #define REVOKEALL 0x0001 /* vop_revoke: revoke all aliases */ #define V_WAIT 0x0001 /* vn_start_write: sleep for suspend */ #define V_NOWAIT 0x0002 /* vn_start_write: don't sleep for suspend */ #define V_XSLEEP 0x0004 /* vn_start_write: just return after sleep */ #define V_PCATCH 0x0008 /* vn_start_write: make the sleep interruptible */ #define V_VALID_FLAGS (V_WAIT | V_NOWAIT | V_XSLEEP | V_PCATCH) #define VR_START_WRITE 0x0001 /* vfs_write_resume: start write atomically */ #define VR_NO_SUSPCLR 0x0002 /* vfs_write_resume: do not clear suspension */ #define VS_SKIP_UNMOUNT 0x0001 /* vfs_write_suspend: fail if the filesystem is being unmounted */ #define VREF(vp) vref(vp) #ifdef DIAGNOSTIC #define VATTR_NULL(vap) vattr_null(vap) #else #define VATTR_NULL(vap) (*(vap) = va_null) /* initialize a vattr */ #endif /* DIAGNOSTIC */ #define NULLVP ((struct vnode *)NULL) /* * Global vnode data. */ extern struct vnode *rootvnode; /* root (i.e. "/") vnode */ extern struct mount *rootdevmp; /* "/dev" mount */ extern u_long desiredvnodes; /* number of vnodes desired */ extern struct uma_zone *namei_zone; extern struct vattr va_null; /* predefined null vattr structure */ extern u_int vn_lock_pair_pause_max; #define VI_LOCK(vp) mtx_lock(&(vp)->v_interlock) #define VI_LOCK_FLAGS(vp, flags) mtx_lock_flags(&(vp)->v_interlock, (flags)) #define VI_TRYLOCK(vp) mtx_trylock(&(vp)->v_interlock) #define VI_UNLOCK(vp) mtx_unlock(&(vp)->v_interlock) #define VI_MTX(vp) (&(vp)->v_interlock) #define VN_LOCK_AREC(vp) lockallowrecurse((vp)->v_vnlock) #define VN_LOCK_ASHARE(vp) lockallowshare((vp)->v_vnlock) #define VN_LOCK_DSHARE(vp) lockdisableshare((vp)->v_vnlock) #endif /* _KERNEL */ /* * Mods for extensibility. */ /* * Flags for vdesc_flags: */ #define VDESC_MAX_VPS 16 /* Low order 16 flag bits are reserved for willrele flags for vp arguments. */ #define VDESC_VP0_WILLRELE 0x0001 #define VDESC_VP1_WILLRELE 0x0002 #define VDESC_VP2_WILLRELE 0x0004 #define VDESC_VP3_WILLRELE 0x0008 /* * A generic structure. * This can be used by bypass routines to identify generic arguments. */ struct vop_generic_args { struct vnodeop_desc *a_desc; /* other random data follows, presumably */ }; typedef int vop_bypass_t(struct vop_generic_args *); /* * VDESC_NO_OFFSET is used to identify the end of the offset list * and in places where no such field exists. */ #define VDESC_NO_OFFSET -1 /* * This structure describes the vnode operation taking place. */ struct vnodeop_desc { char *vdesc_name; /* a readable name for debugging */ int vdesc_flags; /* VDESC_* flags */ int vdesc_vop_offset; vop_bypass_t *vdesc_call; /* Function to call */ /* * These ops are used by bypass routines to map and locate arguments. * Creds and procs are not needed in bypass routines, but sometimes * they are useful to (for example) transport layers. * Nameidata is useful because it has a cred in it. */ int *vdesc_vp_offsets; /* list ended by VDESC_NO_OFFSET */ int vdesc_vpp_offset; /* return vpp location */ int vdesc_cred_offset; /* cred location, if any */ int vdesc_thread_offset; /* thread location, if any */ int vdesc_componentname_offset; /* if any */ }; #ifdef _KERNEL /* * A list of all the operation descs. */ extern struct vnodeop_desc *vnodeop_descs[]; #define VOPARG_OFFSETOF(s_type, field) __offsetof(s_type, field) #define VOPARG_OFFSETTO(s_type, s_offset, struct_p) \ ((s_type)(((char*)(struct_p)) + (s_offset))) #ifdef INVARIANTS /* * Support code to aid in debugging VFS locking problems. Not totally * reliable since if the thread sleeps between changing the lock * state and checking it with the assert, some other thread could * change the state. They are good enough for debugging a single * filesystem using a single-threaded test. Note that the unreliability is * limited to false negatives; efforts were made to ensure that false * positives cannot occur. */ void assert_vi_locked(struct vnode *vp, const char *str); void assert_vi_unlocked(struct vnode *vp, const char *str); void assert_vop_elocked(struct vnode *vp, const char *str); void assert_vop_locked(struct vnode *vp, const char *str); void assert_vop_unlocked(struct vnode *vp, const char *str); #define ASSERT_VI_LOCKED(vp, str) assert_vi_locked((vp), (str)) #define ASSERT_VI_UNLOCKED(vp, str) assert_vi_unlocked((vp), (str)) #define ASSERT_VOP_ELOCKED(vp, str) assert_vop_elocked((vp), (str)) #define ASSERT_VOP_LOCKED(vp, str) assert_vop_locked((vp), (str)) #define ASSERT_VOP_UNLOCKED(vp, str) assert_vop_unlocked((vp), (str)) #define ASSERT_VOP_IN_SEQC(vp) do { \ struct vnode *_vp = (vp); \ \ VNPASS(seqc_in_modify(_vp->v_seqc), _vp); \ } while (0) #define ASSERT_VOP_NOT_IN_SEQC(vp) do { \ struct vnode *_vp = (vp); \ \ VNPASS(!seqc_in_modify(_vp->v_seqc), _vp); \ } while (0) #else /* !INVARIANTS */ #define ASSERT_VI_LOCKED(vp, str) ((void)0) #define ASSERT_VI_UNLOCKED(vp, str) ((void)0) #define ASSERT_VOP_ELOCKED(vp, str) ((void)0) #define ASSERT_VOP_LOCKED(vp, str) ((void)0) #define ASSERT_VOP_UNLOCKED(vp, str) ((void)0) #define ASSERT_VOP_IN_SEQC(vp) ((void)0) #define ASSERT_VOP_NOT_IN_SEQC(vp) ((void)0) #endif /* INVARIANTS */ -/* - * This call works for vnodes in the kernel. - */ -#define VCALL(c) ((c)->a_desc->vdesc_call(c)) - #define DOINGASYNC(vp) \ (((vp)->v_mount->mnt_kern_flag & MNTK_ASYNC) != 0 && \ ((curthread->td_pflags & TDP_SYNCIO) == 0)) /* * VMIO support inline */ extern int vmiodirenable; static __inline int vn_canvmio(struct vnode *vp) { if (vp && (vp->v_type == VREG || (vmiodirenable && vp->v_type == VDIR))) return(TRUE); return(FALSE); } /* * Finally, include the default set of vnode operations. */ typedef void vop_getpages_iodone_t(void *, vm_page_t *, int, int); #include "vnode_if.h" /* vn_open_flags */ #define VN_OPEN_NOAUDIT 0x00000001 #define VN_OPEN_NOCAPCHECK 0x00000002 #define VN_OPEN_NAMECACHE 0x00000004 #define VN_OPEN_INVFS 0x00000008 #define VN_OPEN_WANTIOCTLCAPS 0x00000010 /* copy_file_range kernel flags */ #define COPY_FILE_RANGE_KFLAGS 0xff000000 #define COPY_FILE_RANGE_TIMEO1SEC 0x01000000 /* Return after 1sec. */ /* * Public vnode manipulation functions. */ struct componentname; struct file; struct mount; struct nameidata; struct ostat; struct freebsd11_stat; struct thread; struct proc; struct stat; struct nstat; struct ucred; struct uio; struct vattr; struct vfsops; struct vnode; typedef int (*vn_get_ino_t)(struct mount *, void *, int, struct vnode **); int bnoreuselist(struct bufv *bufv, struct bufobj *bo, daddr_t startn, daddr_t endn); /* cache_* may belong in namei.h. */ void cache_changesize(u_long newhashsize); #define VFS_CACHE_DROPOLD 0x1 void cache_enter_time_flags(struct vnode *dvp, struct vnode *vp, struct componentname *cnp, struct timespec *tsp, struct timespec *dtsp, int flags); #define cache_enter(dvp, vp, cnp) \ cache_enter_time(dvp, vp, cnp, NULL, NULL) void cache_enter_time(struct vnode *dvp, struct vnode *vp, struct componentname *cnp, struct timespec *tsp, struct timespec *dtsp); int cache_lookup(struct vnode *dvp, struct vnode **vpp, struct componentname *cnp, struct timespec *tsp, int *ticksp); void cache_vnode_init(struct vnode *vp); void cache_purge(struct vnode *vp); void cache_purge_vgone(struct vnode *vp); void cache_purge_negative(struct vnode *vp); void cache_purgevfs(struct mount *mp); char *cache_symlink_alloc(size_t size, int flags); void cache_symlink_free(char *string, size_t size); int cache_symlink_resolve(struct cache_fpl *fpl, const char *string, size_t len); void cache_vop_inotify(struct vnode *vp, int event, uint32_t cookie); void cache_vop_rename(struct vnode *fdvp, struct vnode *fvp, struct vnode *tdvp, struct vnode *tvp, struct componentname *fcnp, struct componentname *tcnp); void cache_vop_rmdir(struct vnode *dvp, struct vnode *vp); void cache_vop_vector_register(struct vop_vector *); #ifdef INVARIANTS void cache_validate(struct vnode *dvp, struct vnode *vp, struct componentname *cnp); void cache_validate_vop_vector(struct mount *mp, struct vop_vector *vops); void cache_assert_no_entries(struct vnode *vp); #else static inline void cache_validate(struct vnode *dvp, struct vnode *vp, struct componentname *cnp) { } static inline void cache_validate_vop_vector(struct mount *mp, struct vop_vector *vops) { } static inline void cache_assert_no_entries(struct vnode *vp) { } #endif void cache_fast_lookup_enabled_recalc(void); int change_dir(struct vnode *vp, struct thread *td); void cvtstat(struct stat *st, struct ostat *ost); int freebsd11_cvtnstat(struct stat *sb, struct nstat *nsb); int freebsd11_cvtstat(struct stat *st, struct freebsd11_stat *ost); int getnewvnode(const char *tag, struct mount *mp, struct vop_vector *vops, struct vnode **vpp); void getnewvnode_reserve(void); void getnewvnode_drop_reserve(void); int insmntque(struct vnode *vp, struct mount *mp); int insmntque1(struct vnode *vp, struct mount *mp); u_quad_t init_va_filerev(void); int speedup_syncer(void); int vn_vptocnp(struct vnode **vp, char *buf, size_t *buflen); int vn_getcwd(char *buf, char **retbuf, size_t *buflen); int vn_fullpath(struct vnode *vp, char **retbuf, char **freebuf); int vn_fullpath_global(struct vnode *vp, char **retbuf, char **freebuf); int vn_fullpath_hardlink(struct vnode *vp, struct vnode *dvp, const char *hdrl_name, size_t hrdl_name_length, char **retbuf, char **freebuf, size_t *buflen); struct vnode * vn_dir_dd_ino(struct vnode *vp); int vn_commname(struct vnode *vn, char *buf, u_int buflen); int vn_path_to_global_path(struct thread *td, struct vnode *vp, char *path, u_int pathlen); int vn_path_to_global_path_hardlink(struct thread *td, struct vnode *vp, struct vnode *dvp, char *path, u_int pathlen, const char *leaf_name, size_t leaf_length); int vaccess(__enum_uint8(vtype) type, mode_t file_mode, uid_t file_uid, gid_t file_gid, accmode_t accmode, struct ucred *cred); int vaccess_vexec_smr(mode_t file_mode, uid_t file_uid, gid_t file_gid, struct ucred *cred); int vaccess_acl_nfs4(__enum_uint8(vtype) type, uid_t file_uid, gid_t file_gid, struct acl *aclp, accmode_t accmode, struct ucred *cred); int vaccess_acl_posix1e(__enum_uint8(vtype) type, uid_t file_uid, gid_t file_gid, struct acl *acl, accmode_t accmode, struct ucred *cred); void vattr_null(struct vattr *vap); void vlazy(struct vnode *); void vdrop(struct vnode *); void vdropl(struct vnode *); int vflush(struct mount *mp, int rootrefs, int flags, struct thread *td); int vget(struct vnode *vp, int flags); enum vgetstate vget_prep_smr(struct vnode *vp); enum vgetstate vget_prep(struct vnode *vp); int vget_finish(struct vnode *vp, int flags, enum vgetstate vs); void vget_finish_ref(struct vnode *vp, enum vgetstate vs); void vget_abort(struct vnode *vp, enum vgetstate vs); void vgone(struct vnode *vp); void vhold(struct vnode *); void vholdnz(struct vnode *); bool vhold_smr(struct vnode *); int vinactive(struct vnode *vp); int vinvalbuf(struct vnode *vp, int save, int slpflag, int slptimeo); int vtruncbuf(struct vnode *vp, off_t length, int blksize); void v_inval_buf_range(struct vnode *vp, daddr_t startlbn, daddr_t endlbn, int blksize); void vunref(struct vnode *); void vn_printf(struct vnode *vp, const char *fmt, ...) __printflike(2,3); int vrecycle(struct vnode *vp); int vrecyclel(struct vnode *vp); int vn_bmap_seekhole_locked(struct vnode *vp, u_long cmd, off_t *off, struct ucred *cred); int vn_bmap_seekhole(struct vnode *vp, u_long cmd, off_t *off, struct ucred *cred); int vn_close(struct vnode *vp, int flags, struct ucred *file_cred, struct thread *td); int vn_copy_file_range(struct vnode *invp, off_t *inoffp, struct vnode *outvp, off_t *outoffp, size_t *lenp, unsigned int flags, struct ucred *incred, struct ucred *outcred, struct thread *fsize_td); int vn_deallocate(struct vnode *vp, off_t *offset, off_t *length, int flags, int ioflg, struct ucred *active_cred, struct ucred *file_cred); void vn_finished_write(struct mount *mp); void vn_finished_secondary_write(struct mount *mp); int vn_fsync_buf(struct vnode *vp, int waitfor); int vn_generic_copy_file_range(struct vnode *invp, off_t *inoffp, struct vnode *outvp, off_t *outoffp, size_t *lenp, unsigned int flags, struct ucred *incred, struct ucred *outcred, struct thread *fsize_td); int vn_need_pageq_flush(struct vnode *vp); bool vn_isdisk_error(struct vnode *vp, int *errp); bool vn_isdisk(struct vnode *vp); int _vn_lock(struct vnode *vp, int flags, const char *file, int line); #define vn_lock(vp, flags) _vn_lock(vp, flags, __FILE__, __LINE__) void vn_lock_pair(struct vnode *vp1, bool vp1_locked, int lkflags1, struct vnode *vp2, bool vp2_locked, int lkflags2); int vn_open(struct nameidata *ndp, int *flagp, int cmode, struct file *fp); int vn_open_cred(struct nameidata *ndp, int *flagp, int cmode, u_int vn_open_flags, struct ucred *cred, struct file *fp); int vn_open_vnode(struct vnode *vp, int fmode, struct ucred *cred, struct thread *td, struct file *fp); void vn_pages_remove(struct vnode *vp, vm_pindex_t start, vm_pindex_t end); void vn_pages_remove_valid(struct vnode *vp, vm_pindex_t start, vm_pindex_t end); int vn_pollrecord(struct vnode *vp, struct thread *p, int events); int vn_rdwr(enum uio_rw rw, struct vnode *vp, void *base, int len, off_t offset, enum uio_seg segflg, int ioflg, struct ucred *active_cred, struct ucred *file_cred, ssize_t *aresid, struct thread *td); int vn_rdwr_inchunks(enum uio_rw rw, struct vnode *vp, void *base, size_t len, off_t offset, enum uio_seg segflg, int ioflg, struct ucred *active_cred, struct ucred *file_cred, size_t *aresid, struct thread *td); int vn_read_from_obj(struct vnode *vp, struct uio *uio); int vn_rlimit_fsize(const struct vnode *vp, const struct uio *uio, struct thread *td); int vn_rlimit_fsizex(const struct vnode *vp, struct uio *uio, off_t maxfsz, ssize_t *resid_adj, struct thread *td); void vn_rlimit_fsizex_res(struct uio *uio, ssize_t resid_adj); int vn_rlimit_trunc(u_quad_t size, struct thread *td); int vn_start_write(struct vnode *vp, struct mount **mpp, int flags); int vn_start_secondary_write(struct vnode *vp, struct mount **mpp, int flags); int vn_truncate_locked(struct vnode *vp, off_t length, bool sync, struct ucred *cred); int vn_writechk(struct vnode *vp); int vn_extattr_get(struct vnode *vp, int ioflg, int attrnamespace, const char *attrname, int *buflen, char *buf, struct thread *td); int vn_extattr_set(struct vnode *vp, int ioflg, int attrnamespace, const char *attrname, int buflen, char *buf, struct thread *td); int vn_extattr_rm(struct vnode *vp, int ioflg, int attrnamespace, const char *attrname, struct thread *td); int vn_vget_ino(struct vnode *vp, ino_t ino, int lkflags, struct vnode **rvp); int vn_vget_ino_gen(struct vnode *vp, vn_get_ino_t alloc, void *alloc_arg, int lkflags, struct vnode **rvp); int vn_utimes_perm(struct vnode *vp, struct vattr *vap, struct ucred *cred, struct thread *td); int vn_cmp(struct file *, struct file *, struct thread *td); int vn_io_fault_uiomove(char *data, int xfersize, struct uio *uio); int vn_io_fault_pgmove(vm_page_t ma[], vm_offset_t offset, int xfersize, struct uio *uio); void vn_seqc_write_begin_locked(struct vnode *vp); void vn_seqc_write_begin(struct vnode *vp); void vn_seqc_write_end_locked(struct vnode *vp); void vn_seqc_write_end(struct vnode *vp); #define vn_seqc_read_any(vp) seqc_read_any(&(vp)->v_seqc) #define vn_seqc_read_notmodify(vp) seqc_read_notmodify(&(vp)->v_seqc) #define vn_seqc_consistent(vp, seq) seqc_consistent(&(vp)->v_seqc, seq) #define vn_rangelock_unlock(vp, cookie) \ rangelock_unlock(&(vp)->v_rl, (cookie)) #define vn_rangelock_rlock(vp, start, end) \ rangelock_rlock(&(vp)->v_rl, (start), (end)) #define vn_rangelock_tryrlock(vp, start, end) \ rangelock_tryrlock(&(vp)->v_rl, (start), (end)) #define vn_rangelock_wlock(vp, start, end) \ rangelock_wlock(&(vp)->v_rl, (start), (end)) #define vn_rangelock_trywlock(vp, start, end) \ rangelock_trywlock(&(vp)->v_rl, (start), (end)) #define vn_irflag_read(vp) atomic_load_short(&(vp)->v_irflag) void vn_irflag_set_locked(struct vnode *vp, short toset); void vn_irflag_set(struct vnode *vp, short toset); void vn_irflag_set_cond_locked(struct vnode *vp, short toset); void vn_irflag_set_cond(struct vnode *vp, short toset); void vn_irflag_unset_locked(struct vnode *vp, short tounset); void vn_irflag_unset(struct vnode *vp, short tounset); int vfs_cache_lookup(struct vop_lookup_args *ap); int vfs_cache_root(struct mount *mp, int flags, struct vnode **vpp); void vfs_timestamp(struct timespec *); void vfs_write_resume(struct mount *mp, int flags); int vfs_write_suspend(struct mount *mp, int flags); int vfs_write_suspend_umnt(struct mount *mp); struct vnode *vnlru_alloc_marker(void); void vnlru_free_marker(struct vnode *); void vnlru_free_vfsops(int, struct vfsops *, struct vnode *); int vop_stdbmap(struct vop_bmap_args *); int vop_stdfdatasync_buf(struct vop_fdatasync_args *); int vop_stdfsync(struct vop_fsync_args *); int vop_stdgetwritemount(struct vop_getwritemount_args *); int vop_stdgetpages(struct vop_getpages_args *); int vop_stdinactive(struct vop_inactive_args *); int vop_stdneed_inactive(struct vop_need_inactive_args *); int vop_stdinotify(struct vop_inotify_args *); int vop_stdinotify_add_watch(struct vop_inotify_add_watch_args *); int vop_stdioctl(struct vop_ioctl_args *); int vop_stdkqfilter(struct vop_kqfilter_args *); int vop_stdlock(struct vop_lock1_args *); int vop_stdunlock(struct vop_unlock_args *); int vop_stdislocked(struct vop_islocked_args *); int vop_lock(struct vop_lock1_args *); int vop_unlock(struct vop_unlock_args *); int vop_islocked(struct vop_islocked_args *); int vop_stdputpages(struct vop_putpages_args *); int vop_nopoll(struct vop_poll_args *); int vop_stdaccess(struct vop_access_args *ap); int vop_stdaccessx(struct vop_accessx_args *ap); int vop_stdadvise(struct vop_advise_args *ap); int vop_stdadvlock(struct vop_advlock_args *ap); int vop_stdadvlockasync(struct vop_advlockasync_args *ap); int vop_stdadvlockpurge(struct vop_advlockpurge_args *ap); int vop_stdallocate(struct vop_allocate_args *ap); int vop_stddeallocate(struct vop_deallocate_args *ap); int vop_stdset_text(struct vop_set_text_args *ap); int vop_stdpathconf(struct vop_pathconf_args *); int vop_stdpoll(struct vop_poll_args *); int vop_stdvptocnp(struct vop_vptocnp_args *ap); int vop_stdvptofh(struct vop_vptofh_args *ap); int vop_stdunp_bind(struct vop_unp_bind_args *ap); int vop_stdunp_connect(struct vop_unp_connect_args *ap); int vop_stdunp_detach(struct vop_unp_detach_args *ap); int vop_stdadd_writecount_nomsync(struct vop_add_writecount_args *ap); int vop_eopnotsupp(struct vop_generic_args *ap); int vop_ebadf(struct vop_generic_args *ap); int vop_einval(struct vop_generic_args *ap); int vop_enoent(struct vop_generic_args *ap); int vop_enotty(struct vop_generic_args *ap); int vop_eagain(struct vop_generic_args *ap); int vop_null(struct vop_generic_args *ap); int vop_panic(struct vop_generic_args *ap); int dead_poll(struct vop_poll_args *ap); int dead_read(struct vop_read_args *ap); int dead_write(struct vop_write_args *ap); /* These are called from within the actual VOPS. */ void vop_allocate_post(void *a, int rc); void vop_copy_file_range_post(void *ap, int rc); void vop_close_post(void *a, int rc); void vop_create_pre(void *a); void vop_create_post(void *a, int rc); void vop_deallocate_post(void *a, int rc); void vop_whiteout_pre(void *a); void vop_whiteout_post(void *a, int rc); void vop_deleteextattr_pre(void *a); void vop_deleteextattr_post(void *a, int rc); void vop_link_pre(void *a); void vop_link_post(void *a, int rc); void vop_lookup_post(void *a, int rc); void vop_lookup_pre(void *a); void vop_mkdir_pre(void *a); void vop_mkdir_post(void *a, int rc); void vop_mknod_pre(void *a); void vop_mknod_post(void *a, int rc); void vop_open_post(void *a, int rc); void vop_read_post(void *a, int rc); void vop_read_pgcache_post(void *ap, int rc); void vop_reclaim_post(void *a, int rc); void vop_remove_pre(void *a); void vop_remove_post(void *a, int rc); void vop_rename_post(void *a, int rc); void vop_rename_pre(void *a); void vop_rmdir_pre(void *a); void vop_rmdir_post(void *a, int rc); void vop_setattr_pre(void *a); void vop_setattr_post(void *a, int rc); void vop_setacl_pre(void *a); void vop_setacl_post(void *a, int rc); void vop_setextattr_pre(void *a); void vop_setextattr_post(void *a, int rc); void vop_symlink_pre(void *a); void vop_symlink_post(void *a, int rc); int vop_sigdefer(struct vop_vector *vop, struct vop_generic_args *a); #ifdef INVARIANTS void vop_fdatasync_debugpre(void *a); void vop_fdatasync_debugpost(void *a, int rc); void vop_fplookup_vexec_debugpre(void *a); void vop_fplookup_vexec_debugpost(void *a, int rc); void vop_fplookup_symlink_debugpre(void *a); void vop_fplookup_symlink_debugpost(void *a, int rc); void vop_fsync_debugpre(void *a); void vop_fsync_debugpost(void *a, int rc); void vop_strategy_debugpre(void *a); void vop_lock_debugpre(void *a); void vop_lock_debugpost(void *a, int rc); void vop_unlock_debugpre(void *a); void vop_need_inactive_debugpre(void *a); void vop_need_inactive_debugpost(void *a, int rc); void vop_mkdir_debugpost(void *a, int rc); #else #define vop_fdatasync_debugpre(x) do { } while (0) #define vop_fdatasync_debugpost(x, y) do { } while (0) #define vop_fplookup_vexec_debugpre(x) do { } while (0) #define vop_fplookup_vexec_debugpost(x, y) do { } while (0) #define vop_fplookup_symlink_debugpre(x) do { } while (0) #define vop_fplookup_symlink_debugpost(x, y) do { } while (0) #define vop_fsync_debugpre(x) do { } while (0) #define vop_fsync_debugpost(x, y) do { } while (0) #define vop_strategy_debugpre(x) do { } while (0) #define vop_lock_debugpre(x) do { } while (0) #define vop_lock_debugpost(x, y) do { } while (0) #define vop_unlock_debugpre(x) do { } while (0) #define vop_need_inactive_debugpre(x) do { } while (0) #define vop_need_inactive_debugpost(x, y) do { } while (0) #define vop_mkdir_debugpost(x, y) do { } while (0) #endif void vop_rename_fail(struct vop_rename_args *ap); #define vop_stat_helper_pre(ap) ({ \ struct vop_stat_args *_ap = (ap); \ int _error; \ AUDIT_ARG_VNODE1(ap->a_vp); \ _error = mac_vnode_check_stat(_ap->a_active_cred, _ap->a_file_cred, _ap->a_vp);\ if (__predict_true(_error == 0)) { \ ap->a_sb->st_padding1 = 0; \ bzero(_ap->a_sb->st_spare, sizeof(_ap->a_sb->st_spare)); \ ap->a_sb->st_filerev = 0; \ ap->a_sb->st_bsdflags = 0; \ } \ _error; \ }) #define vop_stat_helper_post(ap, error) ({ \ struct vop_stat_args *_ap = (ap); \ int _error = (error); \ if (priv_check_cred_vfs_generation(_ap->a_active_cred)) \ _ap->a_sb->st_gen = 0; \ _error; \ }) #ifdef INVARIANTS #define vop_readdir_pre_assert(ap) \ ssize_t nresid, oresid; \ \ oresid = (ap)->a_uio->uio_resid; #define vop_readdir_post_assert(ap, ret) \ nresid = (ap)->a_uio->uio_resid; \ if ((ret) == 0 && (ap)->a_eofflag != NULL) { \ VNASSERT(oresid == 0 || nresid != oresid || \ *(ap)->a_eofflag == 1, \ (ap)->a_vp, ("VOP_READDIR: eofflag not set")); \ } #else #define vop_readdir_pre_assert(ap) #define vop_readdir_post_assert(ap, ret) #endif #define vop_readdir_pre(ap) do { \ vop_readdir_pre_assert(ap) #define vop_readdir_post(ap, ret) \ vop_readdir_post_assert(ap, ret); \ if ((ret) == 0) { \ VFS_KNOTE_LOCKED((ap)->a_vp, NOTE_READ); \ INOTIFY((ap)->a_vp, IN_ACCESS); \ } \ } while (0) #define vop_write_pre(ap) \ struct vattr va; \ int error; \ off_t osize, ooffset, noffset; \ \ osize = ooffset = noffset = 0; \ if (!VN_KNLIST_EMPTY((ap)->a_vp)) { \ error = VOP_GETATTR((ap)->a_vp, &va, (ap)->a_cred); \ if (error) \ return (error); \ ooffset = (ap)->a_uio->uio_offset; \ osize = (off_t)va.va_size; \ } #define vop_write_post(ap, ret) \ noffset = (ap)->a_uio->uio_offset; \ if (noffset > ooffset) { \ if (!VN_KNLIST_EMPTY((ap)->a_vp)) { \ VFS_KNOTE_LOCKED((ap)->a_vp, NOTE_WRITE | \ (noffset > osize ? NOTE_EXTEND : 0)); \ } \ INOTIFY((ap)->a_vp, IN_MODIFY); \ } #define VOP_LOCK(vp, flags) VOP_LOCK1(vp, flags, __FILE__, __LINE__) #ifdef INVARIANTS #define VOP_ADD_WRITECOUNT_CHECKED(vp, cnt) \ do { \ int error_; \ \ error_ = VOP_ADD_WRITECOUNT((vp), (cnt)); \ VNASSERT(error_ == 0, (vp), ("VOP_ADD_WRITECOUNT returned %d", \ error_)); \ } while (0) #define VOP_SET_TEXT_CHECKED(vp) \ do { \ int error_; \ \ error_ = VOP_SET_TEXT((vp)); \ VNASSERT(error_ == 0, (vp), ("VOP_SET_TEXT returned %d", \ error_)); \ } while (0) #define VOP_UNSET_TEXT_CHECKED(vp) \ do { \ int error_; \ \ error_ = VOP_UNSET_TEXT((vp)); \ VNASSERT(error_ == 0, (vp), ("VOP_UNSET_TEXT returned %d", \ error_)); \ } while (0) #else #define VOP_ADD_WRITECOUNT_CHECKED(vp, cnt) VOP_ADD_WRITECOUNT((vp), (cnt)) #define VOP_SET_TEXT_CHECKED(vp) VOP_SET_TEXT((vp)) #define VOP_UNSET_TEXT_CHECKED(vp) VOP_UNSET_TEXT((vp)) #endif #define VN_IS_DOOMED(vp) __predict_false((vn_irflag_read(vp) & VIRF_DOOMED) != 0) void vput(struct vnode *vp); void vrele(struct vnode *vp); void vref(struct vnode *vp); void vrefact(struct vnode *vp); void v_addpollinfo(struct vnode *vp); static __inline int vrefcnt(struct vnode *vp) { return (vp->v_usecount); } #define vholdl(vp) do { \ ASSERT_VI_LOCKED(vp, __func__); \ vhold(vp); \ } while (0) #define vrefl(vp) do { \ ASSERT_VI_LOCKED(vp, __func__); \ vref(vp); \ } while (0) /* * The caller doesn't know the file size and vnode_create_vobject() should * determine the size on its own. */ #define VNODE_NO_SIZE ((off_t)-1) int vnode_create_vobject(struct vnode *vp, off_t size, struct thread *td); int vnode_create_disk_vobject(struct vnode *vp, off_t size, struct thread *td); void vnode_destroy_vobject(struct vnode *vp); extern struct vop_vector fifo_specops; extern struct vop_vector dead_vnodeops; extern struct vop_vector default_vnodeops; #define VOP_PANIC ((void*)(uintptr_t)vop_panic) #define VOP_NULL ((void*)(uintptr_t)vop_null) #define VOP_EBADF ((void*)(uintptr_t)vop_ebadf) #define VOP_ENOTTY ((void*)(uintptr_t)vop_enotty) #define VOP_EINVAL ((void*)(uintptr_t)vop_einval) #define VOP_ENOENT ((void*)(uintptr_t)vop_enoent) #define VOP_EOPNOTSUPP ((void*)(uintptr_t)vop_eopnotsupp) #define VOP_EAGAIN ((void*)(uintptr_t)vop_eagain) /* fifo_vnops.c */ int fifo_printinfo(struct vnode *); /* vfs_hash.c */ typedef int vfs_hash_cmp_t(struct vnode *vp, void *arg); void vfs_hash_changesize(u_long newhashsize); int vfs_hash_get(const struct mount *mp, u_int hash, int flags, struct thread *td, struct vnode **vpp, vfs_hash_cmp_t *fn, void *arg); u_int vfs_hash_index(struct vnode *vp); int vfs_hash_insert(struct vnode *vp, u_int hash, int flags, struct thread *td, struct vnode **vpp, vfs_hash_cmp_t *fn, void *arg); void vfs_hash_ref(const struct mount *mp, u_int hash, struct thread *td, struct vnode **vpp, vfs_hash_cmp_t *fn, void *arg); void vfs_hash_rehash(struct vnode *vp, u_int hash); void vfs_hash_remove(struct vnode *vp); int vfs_kqfilter(struct vop_kqfilter_args *); struct dirent; int vn_dir_next_dirent(struct vnode *vp, struct thread *td, char *dirbuf, size_t dirbuflen, struct dirent **dpp, size_t *len, off_t *off, int *eofflag); int vn_dir_check_empty(struct vnode *vp); int vfs_read_dirent(struct vop_readdir_args *ap, struct dirent *dp, off_t off); int vfs_unixify_accmode(accmode_t *accmode); void vfs_unp_reclaim(struct vnode *vp); int setfmode(struct thread *td, struct ucred *cred, struct vnode *vp, int mode); int setfown(struct thread *td, struct ucred *cred, struct vnode *vp, uid_t uid, gid_t gid); int vn_chmod(struct file *fp, mode_t mode, struct ucred *active_cred, struct thread *td); int vn_chown(struct file *fp, uid_t uid, gid_t gid, struct ucred *active_cred, struct thread *td); int vn_getsize_locked(struct vnode *vp, off_t *size, struct ucred *active_cred); int vn_getsize(struct vnode *vp, off_t *size, struct ucred *active_cred); void vn_fsid(struct vnode *vp, struct vattr *va); int vn_dir_check_exec(struct vnode *vp, struct componentname *cnp); int vn_lktype_write(struct mount *mp, struct vnode *vp); #ifdef INVARIANTS void vn_set_state_validate(struct vnode *vp, __enum_uint8(vstate) state); #endif static inline void vn_set_state(struct vnode *vp, __enum_uint8(vstate) state) { #ifdef INVARIANTS vn_set_state_validate(vp, state); #endif vp->v_state = state; } static inline __enum_uint8(vstate) vn_get_state(struct vnode *vp) { return (vp->v_state); } #define VOP_UNLOCK_FLAGS(vp, flags) ({ \ struct vnode *_vp = (vp); \ int _flags = (flags); \ int _error; \ \ if ((_flags & ~(LK_INTERLOCK | LK_RELEASE)) != 0) \ panic("%s: unsupported flags %x\n", __func__, flags); \ _error = VOP_UNLOCK(_vp); \ if (_flags & LK_INTERLOCK) \ VI_UNLOCK(_vp); \ _error; \ }) #include #define VFS_VOP_VECTOR_REGISTER(vnodeops) \ SYSINIT(vfs_vector_##vnodeops##_f, SI_SUB_VFS, SI_ORDER_ANY, \ vfs_vector_op_register, &vnodeops) #define VFS_SMR_DECLARE \ extern smr_t vfs_smr #define VFS_SMR() vfs_smr #define vfs_smr_enter() smr_enter(VFS_SMR()) #define vfs_smr_exit() smr_exit(VFS_SMR()) #define vfs_smr_synchronize() smr_synchronize(VFS_SMR()) #define vfs_smr_entered_load(ptr) smr_entered_load((ptr), VFS_SMR()) #define VFS_SMR_ENTERED() SMR_ENTERED(VFS_SMR()) #define VFS_SMR_ASSERT_ENTERED() SMR_ASSERT_ENTERED(VFS_SMR()) #define VFS_SMR_ASSERT_NOT_ENTERED() SMR_ASSERT_NOT_ENTERED(VFS_SMR()) #define VFS_SMR_ZONE_SET(zone) uma_zone_set_smr((zone), VFS_SMR()) #define vn_load_v_data_smr(vp) ({ \ struct vnode *_vp = (vp); \ \ VFS_SMR_ASSERT_ENTERED(); \ atomic_load_consume_ptr(&(_vp)->v_data);\ }) #endif /* _KERNEL */ #endif /* !_SYS_VNODE_H_ */