Index: head/sys/kern/vfs_cache.c =================================================================== --- head/sys/kern/vfs_cache.c (revision 155384) +++ head/sys/kern/vfs_cache.c (revision 155385) @@ -1,875 +1,875 @@ /*- * Copyright (c) 1989, 1993, 1995 * The Regents of the University of California. All rights reserved. * * This code is derived from software contributed to Berkeley by * Poul-Henning Kamp of the FreeBSD Project. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 4. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * @(#)vfs_cache.c 8.5 (Berkeley) 3/22/95 */ #include __FBSDID("$FreeBSD$"); #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include /* * This structure describes the elements in the cache of recent * names looked up by namei. */ struct namecache { LIST_ENTRY(namecache) nc_hash; /* hash chain */ LIST_ENTRY(namecache) nc_src; /* source vnode list */ TAILQ_ENTRY(namecache) nc_dst; /* destination vnode list */ struct vnode *nc_dvp; /* vnode of parent of name */ struct vnode *nc_vp; /* vnode the name refers to */ u_char nc_flag; /* flag bits */ u_char nc_nlen; /* length of name */ char nc_name[0]; /* segment name */ }; /* * Name caching works as follows: * * Names found by directory scans are retained in a cache * for future reference. It is managed LRU, so frequently * used names will hang around. Cache is indexed by hash value * obtained from (vp, name) where vp refers to the directory * containing name. * * If it is a "negative" entry, (i.e. for a name that is known NOT to * exist) the vnode pointer will be NULL. * * Upon reaching the last segment of a path, if the reference * is for DELETE, or NOCACHE is set (rewrite), and the * name is located in the cache, it will be dropped. */ /* * Structures associated with name cacheing. */ #define NCHHASH(hash) \ (&nchashtbl[(hash) & nchash]) static LIST_HEAD(nchashhead, namecache) *nchashtbl; /* Hash Table */ static TAILQ_HEAD(, namecache) ncneg; /* Hash Table */ static u_long nchash; /* size of hash table */ SYSCTL_ULONG(_debug, OID_AUTO, nchash, CTLFLAG_RD, &nchash, 0, ""); static u_long ncnegfactor = 16; /* ratio of negative entries */ SYSCTL_ULONG(_debug, OID_AUTO, ncnegfactor, CTLFLAG_RW, &ncnegfactor, 0, ""); static u_long numneg; /* number of cache entries allocated */ SYSCTL_ULONG(_debug, OID_AUTO, numneg, CTLFLAG_RD, &numneg, 0, ""); static u_long numcache; /* number of cache entries allocated */ SYSCTL_ULONG(_debug, OID_AUTO, numcache, CTLFLAG_RD, &numcache, 0, ""); static u_long numcachehv; /* number of cache entries with vnodes held */ SYSCTL_ULONG(_debug, OID_AUTO, numcachehv, CTLFLAG_RD, &numcachehv, 0, ""); #if 0 static u_long numcachepl; /* number of cache purge for leaf entries */ SYSCTL_ULONG(_debug, OID_AUTO, numcachepl, CTLFLAG_RD, &numcachepl, 0, ""); #endif struct nchstats nchstats; /* cache effectiveness statistics */ static struct mtx cache_lock; MTX_SYSINIT(vfscache, &cache_lock, "Name Cache", MTX_DEF); #define CACHE_LOCK() mtx_lock(&cache_lock) #define CACHE_UNLOCK() mtx_unlock(&cache_lock) /* * UMA zones for the VFS cache. * * The small cache is used for entries with short names, which are the * most common. The large cache is used for entries which are too big to * fit in the small cache. */ static uma_zone_t cache_zone_small; static uma_zone_t cache_zone_large; #define CACHE_PATH_CUTOFF 32 #define CACHE_ZONE_SMALL (sizeof(struct namecache) + CACHE_PATH_CUTOFF) #define CACHE_ZONE_LARGE (sizeof(struct namecache) + NAME_MAX) #define cache_alloc(len) uma_zalloc(((len) <= CACHE_PATH_CUTOFF) ? \ cache_zone_small : cache_zone_large, M_WAITOK) #define cache_free(ncp) do { \ if (ncp != NULL) \ uma_zfree(((ncp)->nc_nlen <= CACHE_PATH_CUTOFF) ? \ cache_zone_small : cache_zone_large, (ncp)); \ } while (0) static int doingcache = 1; /* 1 => enable the cache */ SYSCTL_INT(_debug, OID_AUTO, vfscache, CTLFLAG_RW, &doingcache, 0, ""); /* Export size information to userland */ SYSCTL_INT(_debug, OID_AUTO, vnsize, CTLFLAG_RD, 0, sizeof(struct vnode), ""); SYSCTL_INT(_debug, OID_AUTO, ncsize, CTLFLAG_RD, 0, sizeof(struct namecache), ""); /* * The new name cache statistics */ static SYSCTL_NODE(_vfs, OID_AUTO, cache, CTLFLAG_RW, 0, "Name cache statistics"); #define STATNODE(mode, name, var) \ SYSCTL_ULONG(_vfs_cache, OID_AUTO, name, mode, var, 0, ""); STATNODE(CTLFLAG_RD, numneg, &numneg); STATNODE(CTLFLAG_RD, numcache, &numcache); static u_long numcalls; STATNODE(CTLFLAG_RD, numcalls, &numcalls); static u_long dothits; STATNODE(CTLFLAG_RD, dothits, &dothits); static u_long dotdothits; STATNODE(CTLFLAG_RD, dotdothits, &dotdothits); static u_long numchecks; STATNODE(CTLFLAG_RD, numchecks, &numchecks); static u_long nummiss; STATNODE(CTLFLAG_RD, nummiss, &nummiss); static u_long nummisszap; STATNODE(CTLFLAG_RD, nummisszap, &nummisszap); static u_long numposzaps; STATNODE(CTLFLAG_RD, numposzaps, &numposzaps); static u_long numposhits; STATNODE(CTLFLAG_RD, numposhits, &numposhits); static u_long numnegzaps; STATNODE(CTLFLAG_RD, numnegzaps, &numnegzaps); static u_long numneghits; STATNODE(CTLFLAG_RD, numneghits, &numneghits); SYSCTL_OPAQUE(_vfs_cache, OID_AUTO, nchstats, CTLFLAG_RD, &nchstats, sizeof(nchstats), "LU", "VFS cache effectiveness statistics"); static void cache_zap(struct namecache *ncp); static int vn_fullpath1(struct thread *td, struct vnode *vp, struct vnode *rdir, char *buf, char **retbuf, u_int buflen); static MALLOC_DEFINE(M_VFSCACHE, "vfscache", "VFS name cache entries"); /* * Flags in namecache.nc_flag */ #define NCF_WHITE 1 /* * Grab an atomic snapshot of the name cache hash chain lengths */ SYSCTL_NODE(_debug, OID_AUTO, hashstat, CTLFLAG_RW, NULL, "hash table stats"); static int sysctl_debug_hashstat_rawnchash(SYSCTL_HANDLER_ARGS) { int error; struct nchashhead *ncpp; struct namecache *ncp; int n_nchash; int count; n_nchash = nchash + 1; /* nchash is max index, not count */ if (!req->oldptr) return SYSCTL_OUT(req, 0, n_nchash * sizeof(int)); /* Scan hash tables for applicable entries */ for (ncpp = nchashtbl; n_nchash > 0; n_nchash--, ncpp++) { count = 0; LIST_FOREACH(ncp, ncpp, nc_hash) { count++; } error = SYSCTL_OUT(req, &count, sizeof(count)); if (error) return (error); } return (0); } SYSCTL_PROC(_debug_hashstat, OID_AUTO, rawnchash, CTLTYPE_INT|CTLFLAG_RD, 0, 0, sysctl_debug_hashstat_rawnchash, "S,int", "nchash chain lengths"); static int sysctl_debug_hashstat_nchash(SYSCTL_HANDLER_ARGS) { int error; struct nchashhead *ncpp; struct namecache *ncp; int n_nchash; int count, maxlength, used, pct; if (!req->oldptr) return SYSCTL_OUT(req, 0, 4 * sizeof(int)); n_nchash = nchash + 1; /* nchash is max index, not count */ used = 0; maxlength = 0; /* Scan hash tables for applicable entries */ for (ncpp = nchashtbl; n_nchash > 0; n_nchash--, ncpp++) { count = 0; LIST_FOREACH(ncp, ncpp, nc_hash) { count++; } if (count) used++; if (maxlength < count) maxlength = count; } n_nchash = nchash + 1; pct = (used * 100 * 100) / n_nchash; error = SYSCTL_OUT(req, &n_nchash, sizeof(n_nchash)); if (error) return (error); error = SYSCTL_OUT(req, &used, sizeof(used)); if (error) return (error); error = SYSCTL_OUT(req, &maxlength, sizeof(maxlength)); if (error) return (error); error = SYSCTL_OUT(req, &pct, sizeof(pct)); if (error) return (error); return (0); } SYSCTL_PROC(_debug_hashstat, OID_AUTO, nchash, CTLTYPE_INT|CTLFLAG_RD, 0, 0, sysctl_debug_hashstat_nchash, "I", "nchash chain lengths"); /* * cache_zap(): * * Removes a namecache entry from cache, whether it contains an actual * pointer to a vnode or if it is just a negative cache entry. */ static void cache_zap(ncp) struct namecache *ncp; { struct vnode *vp; mtx_assert(&cache_lock, MA_OWNED); CTR2(KTR_VFS, "cache_zap(%p) vp %p", ncp, ncp->nc_vp); vp = NULL; LIST_REMOVE(ncp, nc_hash); LIST_REMOVE(ncp, nc_src); if (LIST_EMPTY(&ncp->nc_dvp->v_cache_src)) { vp = ncp->nc_dvp; numcachehv--; } if (ncp->nc_vp) { TAILQ_REMOVE(&ncp->nc_vp->v_cache_dst, ncp, nc_dst); ncp->nc_vp->v_dd = NULL; } else { TAILQ_REMOVE(&ncneg, ncp, nc_dst); numneg--; } numcache--; cache_free(ncp); if (vp) vdrop(vp); } /* * cache_leaf_test() * * Test whether this (directory) vnode's namei cache entry contains * subdirectories or not. Used to determine whether the directory is * a leaf in the namei cache or not. Note: the directory may still * contain files in the namei cache. * * Returns 0 if the directory is a leaf, -1 if it isn't. */ int cache_leaf_test(struct vnode *vp) { struct namecache *ncpc; int leaf; leaf = 0; CACHE_LOCK(); for (ncpc = LIST_FIRST(&vp->v_cache_src); ncpc != NULL; ncpc = LIST_NEXT(ncpc, nc_src) ) { if (ncpc->nc_vp != NULL && ncpc->nc_vp->v_type == VDIR) { leaf = -1; break; } } CACHE_UNLOCK(); return (leaf); } /* * Lookup an entry in the cache * * Lookup is called with dvp pointing to the directory to search, * cnp pointing to the name of the entry being sought. If the lookup * succeeds, the vnode is returned in *vpp, and a status of -1 is * returned. If the lookup determines that the name does not exist * (negative cacheing), a status of ENOENT is returned. If the lookup * fails, a status of zero is returned. * * vpp is locked and ref'd on return. If we're looking up DOTDOT, dvp is * unlocked. If we're looking up . an extra ref is taken, but the lock is * not recursively acquired. */ int cache_lookup(dvp, vpp, cnp) struct vnode *dvp; struct vnode **vpp; struct componentname *cnp; { struct namecache *ncp; u_int32_t hash; int error; if (!doingcache) { cnp->cn_flags &= ~MAKEENTRY; return (0); } retry: CACHE_LOCK(); numcalls++; if (cnp->cn_nameptr[0] == '.') { if (cnp->cn_namelen == 1) { *vpp = dvp; CTR2(KTR_VFS, "cache_lookup(%p, %s) found via .", dvp, cnp->cn_nameptr); dothits++; goto success; } if (cnp->cn_namelen == 2 && cnp->cn_nameptr[1] == '.') { dotdothits++; if (dvp->v_dd == NULL || (cnp->cn_flags & MAKEENTRY) == 0) { CACHE_UNLOCK(); return (0); } *vpp = dvp->v_dd; CTR3(KTR_VFS, "cache_lookup(%p, %s) found %p via ..", dvp, cnp->cn_nameptr, *vpp); goto success; } } hash = fnv_32_buf(cnp->cn_nameptr, cnp->cn_namelen, FNV1_32_INIT); hash = fnv_32_buf(&dvp, sizeof(dvp), hash); LIST_FOREACH(ncp, (NCHHASH(hash)), nc_hash) { numchecks++; if (ncp->nc_dvp == dvp && ncp->nc_nlen == cnp->cn_namelen && !bcmp(ncp->nc_name, cnp->cn_nameptr, ncp->nc_nlen)) break; } /* We failed to find an entry */ if (ncp == 0) { if ((cnp->cn_flags & MAKEENTRY) == 0) { nummisszap++; } else { nummiss++; } nchstats.ncs_miss++; CACHE_UNLOCK(); return (0); } /* We don't want to have an entry, so dump it */ if ((cnp->cn_flags & MAKEENTRY) == 0) { numposzaps++; nchstats.ncs_badhits++; cache_zap(ncp); CACHE_UNLOCK(); return (0); } /* We found a "positive" match, return the vnode */ if (ncp->nc_vp) { numposhits++; nchstats.ncs_goodhits++; *vpp = ncp->nc_vp; CTR4(KTR_VFS, "cache_lookup(%p, %s) found %p via ncp %p", dvp, cnp->cn_nameptr, *vpp, ncp); goto success; } /* We found a negative match, and want to create it, so purge */ if (cnp->cn_nameiop == CREATE) { numnegzaps++; nchstats.ncs_badhits++; cache_zap(ncp); CACHE_UNLOCK(); return (0); } numneghits++; /* * We found a "negative" match, so we shift it to the end of * the "negative" cache entries queue to satisfy LRU. Also, * check to see if the entry is a whiteout; indicate this to * the componentname, if so. */ TAILQ_REMOVE(&ncneg, ncp, nc_dst); TAILQ_INSERT_TAIL(&ncneg, ncp, nc_dst); nchstats.ncs_neghits++; if (ncp->nc_flag & NCF_WHITE) cnp->cn_flags |= ISWHITEOUT; CACHE_UNLOCK(); return (ENOENT); success: /* * On success we return a locked and ref'd vnode as per the lookup * protocol. */ if (dvp == *vpp) { /* lookup on "." */ VREF(*vpp); CACHE_UNLOCK(); return (-1); } if (cnp->cn_flags & ISDOTDOT) VOP_UNLOCK(dvp, 0, cnp->cn_thread); VI_LOCK(*vpp); CACHE_UNLOCK(); error = vget(*vpp, cnp->cn_lkflags | LK_INTERLOCK, cnp->cn_thread); if (cnp->cn_flags & ISDOTDOT) vn_lock(dvp, LK_EXCLUSIVE | LK_RETRY, cnp->cn_thread); if (error) { *vpp = NULL; goto retry; } return (-1); } /* * Add an entry to the cache. */ void cache_enter(dvp, vp, cnp) struct vnode *dvp; struct vnode *vp; struct componentname *cnp; { struct namecache *ncp; struct nchashhead *ncpp; u_int32_t hash; int hold; int zap; int len; CTR3(KTR_VFS, "cache_enter(%p, %p, %s)", dvp, vp, cnp->cn_nameptr); VNASSERT(vp == NULL || (vp->v_iflag & VI_DOOMED) == 0, vp, ("cahe_enter: Adding a doomed vnode")); if (!doingcache) return; if (cnp->cn_nameptr[0] == '.') { if (cnp->cn_namelen == 1) { return; } /* * For dotdot lookups only cache the v_dd pointer if the * directory has a link back to its parent via v_cache_dst. * Without this an unlinked directory would keep a soft * reference to its parent which could not be NULLd at * cache_purge() time. */ if (cnp->cn_namelen == 2 && cnp->cn_nameptr[1] == '.') { CACHE_LOCK(); if (!TAILQ_EMPTY(&dvp->v_cache_dst)) dvp->v_dd = vp; CACHE_UNLOCK(); return; } } hold = 0; zap = 0; ncp = cache_alloc(cnp->cn_namelen); CACHE_LOCK(); numcache++; if (!vp) { numneg++; ncp->nc_flag = cnp->cn_flags & ISWHITEOUT ? NCF_WHITE : 0; } else if (vp->v_type == VDIR) { vp->v_dd = dvp; } else { vp->v_dd = NULL; } /* * Set the rest of the namecache entry elements, calculate it's * hash key and insert it into the appropriate chain within * the cache entries table. */ ncp->nc_vp = vp; ncp->nc_dvp = dvp; len = ncp->nc_nlen = cnp->cn_namelen; hash = fnv_32_buf(cnp->cn_nameptr, len, FNV1_32_INIT); bcopy(cnp->cn_nameptr, ncp->nc_name, len); hash = fnv_32_buf(&dvp, sizeof(dvp), hash); ncpp = NCHHASH(hash); LIST_INSERT_HEAD(ncpp, ncp, nc_hash); if (LIST_EMPTY(&dvp->v_cache_src)) { hold = 1; numcachehv++; } LIST_INSERT_HEAD(&dvp->v_cache_src, ncp, nc_src); /* * If the entry is "negative", we place it into the * "negative" cache queue, otherwise, we place it into the * destination vnode's cache entries queue. */ if (vp) { TAILQ_INSERT_HEAD(&vp->v_cache_dst, ncp, nc_dst); } else { TAILQ_INSERT_TAIL(&ncneg, ncp, nc_dst); } if (numneg * ncnegfactor > numcache) { ncp = TAILQ_FIRST(&ncneg); zap = 1; } if (hold) vhold(dvp); if (zap) cache_zap(ncp); CACHE_UNLOCK(); } /* * Name cache initialization, from vfs_init() when we are booting */ static void nchinit(void *dummy __unused) { TAILQ_INIT(&ncneg); cache_zone_small = uma_zcreate("S VFS Cache", CACHE_ZONE_SMALL, NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, UMA_ZONE_ZINIT); cache_zone_large = uma_zcreate("L VFS Cache", CACHE_ZONE_LARGE, NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, UMA_ZONE_ZINIT); nchashtbl = hashinit(desiredvnodes * 2, M_VFSCACHE, &nchash); } SYSINIT(vfs, SI_SUB_VFS, SI_ORDER_SECOND, nchinit, NULL) /* * Invalidate all entries to a particular vnode. */ void cache_purge(vp) struct vnode *vp; { CTR1(KTR_VFS, "cache_purge(%p)", vp); CACHE_LOCK(); while (!LIST_EMPTY(&vp->v_cache_src)) cache_zap(LIST_FIRST(&vp->v_cache_src)); while (!TAILQ_EMPTY(&vp->v_cache_dst)) cache_zap(TAILQ_FIRST(&vp->v_cache_dst)); vp->v_dd = NULL; CACHE_UNLOCK(); } /* * Flush all entries referencing a particular filesystem. * * Since we need to check it anyway, we will flush all the invalid * entries at the same time. */ void cache_purgevfs(mp) struct mount *mp; { struct nchashhead *ncpp; struct namecache *ncp, *nnp; struct nchashhead mplist; LIST_INIT(&mplist); ncp = NULL; /* Scan hash tables for applicable entries */ CACHE_LOCK(); for (ncpp = &nchashtbl[nchash]; ncpp >= nchashtbl; ncpp--) { for (ncp = LIST_FIRST(ncpp); ncp != 0; ncp = nnp) { nnp = LIST_NEXT(ncp, nc_hash); if (ncp->nc_dvp->v_mount == mp) { LIST_REMOVE(ncp, nc_hash); LIST_INSERT_HEAD(&mplist, ncp, nc_hash); } } } while (!LIST_EMPTY(&mplist)) cache_zap(LIST_FIRST(&mplist)); CACHE_UNLOCK(); } /* * Perform canonical checks and cache lookup and pass on to filesystem * through the vop_cachedlookup only if needed. */ int vfs_cache_lookup(ap) struct vop_lookup_args /* { struct vnode *a_dvp; struct vnode **a_vpp; struct componentname *a_cnp; } */ *ap; { struct vnode *dvp; int error; struct vnode **vpp = ap->a_vpp; struct componentname *cnp = ap->a_cnp; struct ucred *cred = cnp->cn_cred; int flags = cnp->cn_flags; struct thread *td = cnp->cn_thread; *vpp = NULL; dvp = ap->a_dvp; if (dvp->v_type != VDIR) return (ENOTDIR); if ((flags & ISLASTCN) && (dvp->v_mount->mnt_flag & MNT_RDONLY) && (cnp->cn_nameiop == DELETE || cnp->cn_nameiop == RENAME)) return (EROFS); error = VOP_ACCESS(dvp, VEXEC, cred, td); if (error) return (error); error = cache_lookup(dvp, vpp, cnp); if (error == 0) return (VOP_CACHEDLOOKUP(dvp, vpp, cnp)); if (error == ENOENT) return (error); return (0); } #ifndef _SYS_SYSPROTO_H_ struct __getcwd_args { u_char *buf; u_int buflen; }; #endif /* * XXX All of these sysctls would probably be more productive dead. */ static int disablecwd; SYSCTL_INT(_debug, OID_AUTO, disablecwd, CTLFLAG_RW, &disablecwd, 0, "Disable the getcwd syscall"); /* Implementation of the getcwd syscall */ int __getcwd(td, uap) struct thread *td; struct __getcwd_args *uap; { return (kern___getcwd(td, uap->buf, UIO_USERSPACE, uap->buflen)); } int kern___getcwd(struct thread *td, u_char *buf, enum uio_seg bufseg, u_int buflen) { char *bp, *tmpbuf; struct filedesc *fdp; int error; if (disablecwd) return (ENODEV); if (buflen < 2) return (EINVAL); if (buflen > MAXPATHLEN) buflen = MAXPATHLEN; tmpbuf = malloc(buflen, M_TEMP, M_WAITOK); fdp = td->td_proc->p_fd; mtx_lock(&Giant); FILEDESC_LOCK(fdp); error = vn_fullpath1(td, fdp->fd_cdir, fdp->fd_rdir, tmpbuf, &bp, buflen); FILEDESC_UNLOCK(fdp); mtx_unlock(&Giant); if (!error) { if (bufseg == UIO_SYSSPACE) bcopy(bp, buf, strlen(bp) + 1); else error = copyout(bp, buf, strlen(bp) + 1); } free(tmpbuf, M_TEMP); return (error); } /* * Thus begins the fullpath magic. */ #undef STATNODE #define STATNODE(name) \ static u_int name; \ SYSCTL_UINT(_vfs_cache, OID_AUTO, name, CTLFLAG_RD, &name, 0, "") static int disablefullpath; SYSCTL_INT(_debug, OID_AUTO, disablefullpath, CTLFLAG_RW, &disablefullpath, 0, "Disable the vn_fullpath function"); /* These count for kern___getcwd(), too. */ STATNODE(numfullpathcalls); STATNODE(numfullpathfail1); STATNODE(numfullpathfail2); STATNODE(numfullpathfail4); STATNODE(numfullpathfound); /* * Retrieve the full filesystem path that correspond to a vnode from the name * cache (if available) */ int vn_fullpath(struct thread *td, struct vnode *vn, char **retbuf, char **freebuf) { char *buf; struct filedesc *fdp; int error; if (disablefullpath) return (ENODEV); if (vn == NULL) return (EINVAL); buf = malloc(MAXPATHLEN, M_TEMP, M_WAITOK); fdp = td->td_proc->p_fd; mtx_lock(&Giant); FILEDESC_LOCK(fdp); error = vn_fullpath1(td, vn, fdp->fd_rdir, buf, retbuf, MAXPATHLEN); FILEDESC_UNLOCK(fdp); mtx_unlock(&Giant); if (!error) *freebuf = buf; else free(buf, M_TEMP); return (error); } /* * The magic behind kern___getcwd() and vn_fullpath(). */ static int vn_fullpath1(struct thread *td, struct vnode *vp, struct vnode *rdir, char *buf, char **retbuf, u_int buflen) { char *bp; int error, i, slash_prefixed; struct namecache *ncp; mtx_assert(&Giant, MA_OWNED); bp = buf + buflen - 1; *bp = '\0'; error = 0; slash_prefixed = 0; CACHE_LOCK(); numfullpathcalls++; if (vp->v_type != VDIR) { ncp = TAILQ_FIRST(&vp->v_cache_dst); if (!ncp) { numfullpathfail2++; CACHE_UNLOCK(); return (ENOENT); } for (i = ncp->nc_nlen - 1; i >= 0 && bp > buf; i--) *--bp = ncp->nc_name[i]; if (bp == buf) { numfullpathfail4++; CACHE_UNLOCK(); return (ENOMEM); } *--bp = '/'; slash_prefixed = 1; vp = ncp->nc_dvp; } while (vp != rdir && vp != rootvnode) { if (vp->v_vflag & VV_ROOT) { - if (vp->v_mount == NULL) { /* forced unmount */ + if (vp->v_iflag & VI_DOOMED) { /* forced unmount */ error = EBADF; break; } vp = vp->v_mount->mnt_vnodecovered; continue; } if (vp->v_dd == NULL) { numfullpathfail1++; error = ENOTDIR; break; } ncp = TAILQ_FIRST(&vp->v_cache_dst); if (!ncp) { numfullpathfail2++; error = ENOENT; break; } MPASS(ncp->nc_dvp == vp->v_dd); for (i = ncp->nc_nlen - 1; i >= 0 && bp != buf; i--) *--bp = ncp->nc_name[i]; if (bp == buf) { numfullpathfail4++; error = ENOMEM; break; } *--bp = '/'; slash_prefixed = 1; vp = ncp->nc_dvp; } if (error) { CACHE_UNLOCK(); return (error); } if (!slash_prefixed) { if (bp == buf) { numfullpathfail4++; CACHE_UNLOCK(); return (ENOMEM); } else { *--bp = '/'; } } numfullpathfound++; CACHE_UNLOCK(); *retbuf = bp; return (0); } Index: head/sys/kern/vfs_extattr.c =================================================================== --- head/sys/kern/vfs_extattr.c (revision 155384) +++ head/sys/kern/vfs_extattr.c (revision 155385) @@ -1,4965 +1,4967 @@ /*- * Copyright (c) 1989, 1993 * The Regents of the University of California. All rights reserved. * (c) UNIX System Laboratories, Inc. * All or some portions of this file are derived from material licensed * to the University of California by American Telephone and Telegraph * Co. or Unix System Laboratories, Inc. and are reproduced herein with * the permission of UNIX System Laboratories, Inc. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 4. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * @(#)vfs_syscalls.c 8.13 (Berkeley) 4/15/94 */ #include __FBSDID("$FreeBSD$"); #include "opt_compat.h" #include "opt_mac.h" #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include static int chroot_refuse_vdir_fds(struct filedesc *fdp); static int getutimes(const struct timeval *, enum uio_seg, struct timespec *); static int setfown(struct thread *td, struct vnode *, uid_t, gid_t); static int setfmode(struct thread *td, struct vnode *, int); static int setfflags(struct thread *td, struct vnode *, int); static int setutimes(struct thread *td, struct vnode *, const struct timespec *, int, int); static int vn_access(struct vnode *vp, int user_flags, struct ucred *cred, struct thread *td); static int extattr_list_vp(struct vnode *vp, int attrnamespace, void *data, size_t nbytes, struct thread *td); int (*union_dircheckp)(struct thread *td, struct vnode **, struct file *); /* * The module initialization routine for POSIX asynchronous I/O will * set this to the version of AIO that it implements. (Zero means * that it is not implemented.) This value is used here by pathconf() * and in kern_descrip.c by fpathconf(). */ int async_io_version; /* * Sync each mounted filesystem. */ #ifndef _SYS_SYSPROTO_H_ struct sync_args { int dummy; }; #endif #ifdef DEBUG static int syncprt = 0; SYSCTL_INT(_debug, OID_AUTO, syncprt, CTLFLAG_RW, &syncprt, 0, ""); #endif /* ARGSUSED */ int sync(td, uap) struct thread *td; struct sync_args *uap; { struct mount *mp, *nmp; int asyncflag; mtx_lock(&Giant); mtx_lock(&mountlist_mtx); for (mp = TAILQ_FIRST(&mountlist); mp != NULL; mp = nmp) { if (vfs_busy(mp, LK_NOWAIT, &mountlist_mtx, td)) { nmp = TAILQ_NEXT(mp, mnt_list); continue; } if ((mp->mnt_flag & MNT_RDONLY) == 0 && vn_start_write(NULL, &mp, V_NOWAIT) == 0) { asyncflag = mp->mnt_flag & MNT_ASYNC; mp->mnt_flag &= ~MNT_ASYNC; vfs_msync(mp, MNT_NOWAIT); VFS_SYNC(mp, MNT_NOWAIT, td); mp->mnt_flag |= asyncflag; vn_finished_write(mp); } mtx_lock(&mountlist_mtx); nmp = TAILQ_NEXT(mp, mnt_list); vfs_unbusy(mp, td); } mtx_unlock(&mountlist_mtx); #if 0 /* * XXX don't call vfs_bufstats() yet because that routine * was not imported in the Lite2 merge. */ #ifdef DIAGNOSTIC if (syncprt) vfs_bufstats(); #endif /* DIAGNOSTIC */ #endif mtx_unlock(&Giant); return (0); } /* XXX PRISON: could be per prison flag */ static int prison_quotas; #if 0 SYSCTL_INT(_kern_prison, OID_AUTO, quotas, CTLFLAG_RW, &prison_quotas, 0, ""); #endif /* * Change filesystem quotas. * * MP SAFE */ #ifndef _SYS_SYSPROTO_H_ struct quotactl_args { char *path; int cmd; int uid; caddr_t arg; }; #endif int quotactl(td, uap) struct thread *td; register struct quotactl_args /* { char *path; int cmd; int uid; caddr_t arg; } */ *uap; { struct mount *mp, *vmp; int error; struct nameidata nd; if (jailed(td->td_ucred) && !prison_quotas) return (EPERM); mtx_lock(&Giant); NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, uap->path, td); if ((error = namei(&nd)) != 0) { mtx_unlock(&Giant); return (error); } NDFREE(&nd, NDF_ONLY_PNBUF); error = vn_start_write(nd.ni_vp, &vmp, V_WAIT | PCATCH); mp = nd.ni_vp->v_mount; vrele(nd.ni_vp); if (error) { mtx_unlock(&Giant); return (error); } error = VFS_QUOTACTL(mp, uap->cmd, uap->uid, uap->arg, td); vn_finished_write(vmp); mtx_unlock(&Giant); return (error); } /* * Get filesystem statistics. */ #ifndef _SYS_SYSPROTO_H_ struct statfs_args { char *path; struct statfs *buf; }; #endif int statfs(td, uap) struct thread *td; register struct statfs_args /* { char *path; struct statfs *buf; } */ *uap; { struct statfs sf; int error; error = kern_statfs(td, uap->path, UIO_USERSPACE, &sf); if (error == 0) error = copyout(&sf, uap->buf, sizeof(sf)); return (error); } int kern_statfs(struct thread *td, char *path, enum uio_seg pathseg, struct statfs *buf) { struct mount *mp; struct statfs *sp, sb; int error; struct nameidata nd; mtx_lock(&Giant); NDINIT(&nd, LOOKUP, FOLLOW, pathseg, path, td); error = namei(&nd); if (error) { mtx_unlock(&Giant); return (error); } mp = nd.ni_vp->v_mount; sp = &mp->mnt_stat; NDFREE(&nd, NDF_ONLY_PNBUF); vrele(nd.ni_vp); #ifdef MAC error = mac_check_mount_stat(td->td_ucred, mp); if (error) { mtx_unlock(&Giant); return (error); } #endif /* * Set these in case the underlying filesystem fails to do so. */ sp->f_version = STATFS_VERSION; sp->f_namemax = NAME_MAX; sp->f_flags = mp->mnt_flag & MNT_VISFLAGMASK; error = VFS_STATFS(mp, sp, td); if (error) { mtx_unlock(&Giant); return (error); } if (suser(td)) { bcopy(sp, &sb, sizeof(sb)); sb.f_fsid.val[0] = sb.f_fsid.val[1] = 0; prison_enforce_statfs(td->td_ucred, mp, &sb); sp = &sb; } mtx_unlock(&Giant); *buf = *sp; return (0); } /* * Get filesystem statistics. */ #ifndef _SYS_SYSPROTO_H_ struct fstatfs_args { int fd; struct statfs *buf; }; #endif int fstatfs(td, uap) struct thread *td; register struct fstatfs_args /* { int fd; struct statfs *buf; } */ *uap; { struct statfs sf; int error; error = kern_fstatfs(td, uap->fd, &sf); if (error == 0) error = copyout(&sf, uap->buf, sizeof(sf)); return (error); } int kern_fstatfs(struct thread *td, int fd, struct statfs *buf) { struct file *fp; struct mount *mp; struct statfs *sp, sb; + struct vnode *vp; int error; error = getvnode(td->td_proc->p_fd, fd, &fp); if (error) return (error); - mp = fp->f_vnode->v_mount; + vp = fp->f_vnode; + mp = vp->v_mount; fdrop(fp, td); - if (mp == NULL) + if (vp->v_iflag & VI_DOOMED) return (EBADF); mtx_lock(&Giant); #ifdef MAC error = mac_check_mount_stat(td->td_ucred, mp); if (error) { mtx_unlock(&Giant); return (error); } #endif sp = &mp->mnt_stat; /* * Set these in case the underlying filesystem fails to do so. */ sp->f_version = STATFS_VERSION; sp->f_namemax = NAME_MAX; sp->f_flags = mp->mnt_flag & MNT_VISFLAGMASK; error = VFS_STATFS(mp, sp, td); if (error) { mtx_unlock(&Giant); return (error); } if (suser(td)) { bcopy(sp, &sb, sizeof(sb)); sb.f_fsid.val[0] = sb.f_fsid.val[1] = 0; prison_enforce_statfs(td->td_ucred, mp, &sb); sp = &sb; } mtx_unlock(&Giant); *buf = *sp; return (0); } /* * Get statistics on all filesystems. */ #ifndef _SYS_SYSPROTO_H_ struct getfsstat_args { struct statfs *buf; long bufsize; int flags; }; #endif int getfsstat(td, uap) struct thread *td; register struct getfsstat_args /* { struct statfs *buf; long bufsize; int flags; } */ *uap; { return (kern_getfsstat(td, &uap->buf, uap->bufsize, UIO_USERSPACE, uap->flags)); } /* * If (bufsize > 0 && bufseg == UIO_SYSSPACE) * The caller is responsible for freeing memory which will be allocated * in '*buf'. */ int kern_getfsstat(struct thread *td, struct statfs **buf, size_t bufsize, enum uio_seg bufseg, int flags) { struct mount *mp, *nmp; struct statfs *sfsp, *sp, sb; size_t count, maxcount; int error; maxcount = bufsize / sizeof(struct statfs); if (bufsize == 0) sfsp = NULL; else if (bufseg == UIO_USERSPACE) sfsp = *buf; else /* if (bufseg == UIO_SYSSPACE) */ { count = 0; mtx_lock(&mountlist_mtx); TAILQ_FOREACH(mp, &mountlist, mnt_list) { count++; } mtx_unlock(&mountlist_mtx); if (maxcount > count) maxcount = count; sfsp = *buf = malloc(maxcount * sizeof(struct statfs), M_TEMP, M_WAITOK); } count = 0; mtx_lock(&Giant); mtx_lock(&mountlist_mtx); for (mp = TAILQ_FIRST(&mountlist); mp != NULL; mp = nmp) { if (prison_canseemount(td->td_ucred, mp) != 0) { nmp = TAILQ_NEXT(mp, mnt_list); continue; } #ifdef MAC if (mac_check_mount_stat(td->td_ucred, mp) != 0) { nmp = TAILQ_NEXT(mp, mnt_list); continue; } #endif if (vfs_busy(mp, LK_NOWAIT, &mountlist_mtx, td)) { nmp = TAILQ_NEXT(mp, mnt_list); continue; } if (sfsp && count < maxcount) { sp = &mp->mnt_stat; /* * Set these in case the underlying filesystem * fails to do so. */ sp->f_version = STATFS_VERSION; sp->f_namemax = NAME_MAX; sp->f_flags = mp->mnt_flag & MNT_VISFLAGMASK; /* * If MNT_NOWAIT or MNT_LAZY is specified, do not * refresh the fsstat cache. MNT_NOWAIT or MNT_LAZY * overrides MNT_WAIT. */ if (((flags & (MNT_LAZY|MNT_NOWAIT)) == 0 || (flags & MNT_WAIT)) && (error = VFS_STATFS(mp, sp, td))) { mtx_lock(&mountlist_mtx); nmp = TAILQ_NEXT(mp, mnt_list); vfs_unbusy(mp, td); continue; } if (suser(td)) { bcopy(sp, &sb, sizeof(sb)); sb.f_fsid.val[0] = sb.f_fsid.val[1] = 0; prison_enforce_statfs(td->td_ucred, mp, &sb); sp = &sb; } if (bufseg == UIO_SYSSPACE) bcopy(sp, sfsp, sizeof(*sp)); else /* if (bufseg == UIO_USERSPACE) */ { error = copyout(sp, sfsp, sizeof(*sp)); if (error) { vfs_unbusy(mp, td); mtx_unlock(&Giant); return (error); } } sfsp++; } count++; mtx_lock(&mountlist_mtx); nmp = TAILQ_NEXT(mp, mnt_list); vfs_unbusy(mp, td); } mtx_unlock(&mountlist_mtx); mtx_unlock(&Giant); if (sfsp && count > maxcount) td->td_retval[0] = maxcount; else td->td_retval[0] = count; return (0); } #ifdef COMPAT_FREEBSD4 /* * Get old format filesystem statistics. */ static void cvtstatfs(struct statfs *, struct ostatfs *); #ifndef _SYS_SYSPROTO_H_ struct freebsd4_statfs_args { char *path; struct ostatfs *buf; }; #endif int freebsd4_statfs(td, uap) struct thread *td; struct freebsd4_statfs_args /* { char *path; struct ostatfs *buf; } */ *uap; { struct ostatfs osb; struct statfs sf; int error; error = kern_statfs(td, uap->path, UIO_USERSPACE, &sf); if (error) return (error); cvtstatfs(&sf, &osb); return (copyout(&osb, uap->buf, sizeof(osb))); } /* * Get filesystem statistics. */ #ifndef _SYS_SYSPROTO_H_ struct freebsd4_fstatfs_args { int fd; struct ostatfs *buf; }; #endif int freebsd4_fstatfs(td, uap) struct thread *td; struct freebsd4_fstatfs_args /* { int fd; struct ostatfs *buf; } */ *uap; { struct ostatfs osb; struct statfs sf; int error; error = kern_fstatfs(td, uap->fd, &sf); if (error) return (error); cvtstatfs(&sf, &osb); return (copyout(&osb, uap->buf, sizeof(osb))); } /* * Get statistics on all filesystems. */ #ifndef _SYS_SYSPROTO_H_ struct freebsd4_getfsstat_args { struct ostatfs *buf; long bufsize; int flags; }; #endif int freebsd4_getfsstat(td, uap) struct thread *td; register struct freebsd4_getfsstat_args /* { struct ostatfs *buf; long bufsize; int flags; } */ *uap; { struct statfs *buf, *sp; struct ostatfs osb; size_t count, size; int error; count = uap->bufsize / sizeof(struct ostatfs); size = count * sizeof(struct statfs); error = kern_getfsstat(td, &buf, size, UIO_SYSSPACE, uap->flags); if (size > 0) { count = td->td_retval[0]; sp = buf; while (count > 0 && error == 0) { cvtstatfs(sp, &osb); error = copyout(&osb, uap->buf, sizeof(osb)); sp++; uap->buf++; count--; } free(buf, M_TEMP); } return (error); } /* * Implement fstatfs() for (NFS) file handles. */ #ifndef _SYS_SYSPROTO_H_ struct freebsd4_fhstatfs_args { struct fhandle *u_fhp; struct ostatfs *buf; }; #endif int freebsd4_fhstatfs(td, uap) struct thread *td; struct freebsd4_fhstatfs_args /* { struct fhandle *u_fhp; struct ostatfs *buf; } */ *uap; { struct ostatfs osb; struct statfs sf; fhandle_t fh; int error; error = copyin(uap->u_fhp, &fh, sizeof(fhandle_t)); if (error) return (error); error = kern_fhstatfs(td, fh, &sf); if (error) return (error); cvtstatfs(&sf, &osb); return (copyout(&osb, uap->buf, sizeof(osb))); } /* * Convert a new format statfs structure to an old format statfs structure. */ static void cvtstatfs(nsp, osp) struct statfs *nsp; struct ostatfs *osp; { bzero(osp, sizeof(*osp)); osp->f_bsize = MIN(nsp->f_bsize, LONG_MAX); osp->f_iosize = MIN(nsp->f_iosize, LONG_MAX); osp->f_blocks = MIN(nsp->f_blocks, LONG_MAX); osp->f_bfree = MIN(nsp->f_bfree, LONG_MAX); osp->f_bavail = MIN(nsp->f_bavail, LONG_MAX); osp->f_files = MIN(nsp->f_files, LONG_MAX); osp->f_ffree = MIN(nsp->f_ffree, LONG_MAX); osp->f_owner = nsp->f_owner; osp->f_type = nsp->f_type; osp->f_flags = nsp->f_flags; osp->f_syncwrites = MIN(nsp->f_syncwrites, LONG_MAX); osp->f_asyncwrites = MIN(nsp->f_asyncwrites, LONG_MAX); osp->f_syncreads = MIN(nsp->f_syncreads, LONG_MAX); osp->f_asyncreads = MIN(nsp->f_asyncreads, LONG_MAX); bcopy(nsp->f_fstypename, osp->f_fstypename, MIN(MFSNAMELEN, OMNAMELEN)); bcopy(nsp->f_mntonname, osp->f_mntonname, MIN(MFSNAMELEN, OMNAMELEN)); bcopy(nsp->f_mntfromname, osp->f_mntfromname, MIN(MFSNAMELEN, OMNAMELEN)); osp->f_fsid = nsp->f_fsid; } #endif /* COMPAT_FREEBSD4 */ /* * Change current working directory to a given file descriptor. */ #ifndef _SYS_SYSPROTO_H_ struct fchdir_args { int fd; }; #endif int fchdir(td, uap) struct thread *td; struct fchdir_args /* { int fd; } */ *uap; { register struct filedesc *fdp = td->td_proc->p_fd; struct vnode *vp, *tdp, *vpold; struct mount *mp; struct file *fp; int vfslocked; int error; if ((error = getvnode(fdp, uap->fd, &fp)) != 0) return (error); vp = fp->f_vnode; VREF(vp); fdrop(fp, td); vfslocked = VFS_LOCK_GIANT(vp->v_mount); vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td); if (vp->v_type != VDIR) error = ENOTDIR; #ifdef MAC else if ((error = mac_check_vnode_chdir(td->td_ucred, vp)) != 0) { } #endif else error = VOP_ACCESS(vp, VEXEC, td->td_ucred, td); while (!error && (mp = vp->v_mountedhere) != NULL) { int tvfslocked; if (vfs_busy(mp, 0, 0, td)) continue; tvfslocked = VFS_LOCK_GIANT(mp); error = VFS_ROOT(mp, LK_EXCLUSIVE, &tdp, td); vfs_unbusy(mp, td); if (error) { VFS_UNLOCK_GIANT(tvfslocked); break; } vput(vp); VFS_UNLOCK_GIANT(vfslocked); vp = tdp; vfslocked = tvfslocked; } if (error) { vput(vp); VFS_UNLOCK_GIANT(vfslocked); return (error); } VOP_UNLOCK(vp, 0, td); VFS_UNLOCK_GIANT(vfslocked); FILEDESC_LOCK_FAST(fdp); vpold = fdp->fd_cdir; fdp->fd_cdir = vp; FILEDESC_UNLOCK_FAST(fdp); vfslocked = VFS_LOCK_GIANT(vpold->v_mount); vrele(vpold); VFS_UNLOCK_GIANT(vfslocked); return (0); } /* * Change current working directory (``.''). */ #ifndef _SYS_SYSPROTO_H_ struct chdir_args { char *path; }; #endif int chdir(td, uap) struct thread *td; struct chdir_args /* { char *path; } */ *uap; { return (kern_chdir(td, uap->path, UIO_USERSPACE)); } int kern_chdir(struct thread *td, char *path, enum uio_seg pathseg) { register struct filedesc *fdp = td->td_proc->p_fd; int error; struct nameidata nd; struct vnode *vp; int vfslocked; NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | MPSAFE, pathseg, path, td); if ((error = namei(&nd)) != 0) return (error); vfslocked = NDHASGIANT(&nd); if ((error = change_dir(nd.ni_vp, td)) != 0) { vput(nd.ni_vp); VFS_UNLOCK_GIANT(vfslocked); NDFREE(&nd, NDF_ONLY_PNBUF); return (error); } VOP_UNLOCK(nd.ni_vp, 0, td); VFS_UNLOCK_GIANT(vfslocked); NDFREE(&nd, NDF_ONLY_PNBUF); FILEDESC_LOCK_FAST(fdp); vp = fdp->fd_cdir; fdp->fd_cdir = nd.ni_vp; FILEDESC_UNLOCK_FAST(fdp); vfslocked = VFS_LOCK_GIANT(vp->v_mount); vrele(vp); VFS_UNLOCK_GIANT(vfslocked); return (0); } /* * Helper function for raised chroot(2) security function: Refuse if * any filedescriptors are open directories. */ static int chroot_refuse_vdir_fds(fdp) struct filedesc *fdp; { struct vnode *vp; struct file *fp; int fd; FILEDESC_LOCK_ASSERT(fdp, MA_OWNED); for (fd = 0; fd < fdp->fd_nfiles ; fd++) { fp = fget_locked(fdp, fd); if (fp == NULL) continue; if (fp->f_type == DTYPE_VNODE) { vp = fp->f_vnode; if (vp->v_type == VDIR) return (EPERM); } } return (0); } /* * This sysctl determines if we will allow a process to chroot(2) if it * has a directory open: * 0: disallowed for all processes. * 1: allowed for processes that were not already chroot(2)'ed. * 2: allowed for all processes. */ static int chroot_allow_open_directories = 1; SYSCTL_INT(_kern, OID_AUTO, chroot_allow_open_directories, CTLFLAG_RW, &chroot_allow_open_directories, 0, ""); /* * Change notion of root (``/'') directory. */ #ifndef _SYS_SYSPROTO_H_ struct chroot_args { char *path; }; #endif int chroot(td, uap) struct thread *td; struct chroot_args /* { char *path; } */ *uap; { int error; struct nameidata nd; int vfslocked; error = suser_cred(td->td_ucred, SUSER_ALLOWJAIL); if (error) return (error); NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | MPSAFE, UIO_USERSPACE, uap->path, td); error = namei(&nd); if (error) goto error; vfslocked = NDHASGIANT(&nd); if ((error = change_dir(nd.ni_vp, td)) != 0) goto e_vunlock; #ifdef MAC if ((error = mac_check_vnode_chroot(td->td_ucred, nd.ni_vp))) goto e_vunlock; #endif VOP_UNLOCK(nd.ni_vp, 0, td); error = change_root(nd.ni_vp, td); vrele(nd.ni_vp); VFS_UNLOCK_GIANT(vfslocked); NDFREE(&nd, NDF_ONLY_PNBUF); return (error); e_vunlock: vput(nd.ni_vp); VFS_UNLOCK_GIANT(vfslocked); error: NDFREE(&nd, NDF_ONLY_PNBUF); return (error); } /* * Common routine for chroot and chdir. Callers must provide a locked vnode * instance. */ int change_dir(vp, td) struct vnode *vp; struct thread *td; { int error; ASSERT_VOP_LOCKED(vp, "change_dir(): vp not locked"); if (vp->v_type != VDIR) return (ENOTDIR); #ifdef MAC error = mac_check_vnode_chdir(td->td_ucred, vp); if (error) return (error); #endif error = VOP_ACCESS(vp, VEXEC, td->td_ucred, td); return (error); } /* * Common routine for kern_chroot() and jail_attach(). The caller is * responsible for invoking suser() and mac_check_chroot() to authorize this * operation. */ int change_root(vp, td) struct vnode *vp; struct thread *td; { struct filedesc *fdp; struct vnode *oldvp; int vfslocked; int error; VFS_ASSERT_GIANT(vp->v_mount); fdp = td->td_proc->p_fd; FILEDESC_LOCK(fdp); if (chroot_allow_open_directories == 0 || (chroot_allow_open_directories == 1 && fdp->fd_rdir != rootvnode)) { error = chroot_refuse_vdir_fds(fdp); if (error) { FILEDESC_UNLOCK(fdp); return (error); } } oldvp = fdp->fd_rdir; fdp->fd_rdir = vp; VREF(fdp->fd_rdir); if (!fdp->fd_jdir) { fdp->fd_jdir = vp; VREF(fdp->fd_jdir); } FILEDESC_UNLOCK(fdp); vfslocked = VFS_LOCK_GIANT(oldvp->v_mount); vrele(oldvp); VFS_UNLOCK_GIANT(vfslocked); return (0); } /* * Check permissions, allocate an open file structure, * and call the device open routine if any. * * MP SAFE */ #ifndef _SYS_SYSPROTO_H_ struct open_args { char *path; int flags; int mode; }; #endif int open(td, uap) struct thread *td; register struct open_args /* { char *path; int flags; int mode; } */ *uap; { int error; error = kern_open(td, uap->path, UIO_USERSPACE, uap->flags, uap->mode); if (mtx_owned(&Giant)) printf("open: %s: %d\n", uap->path, error); return (error); } int kern_open(struct thread *td, char *path, enum uio_seg pathseg, int flags, int mode) { struct proc *p = td->td_proc; struct filedesc *fdp = p->p_fd; struct file *fp; struct vnode *vp; struct vattr vat; struct mount *mp; int cmode; struct file *nfp; int type, indx, error; struct flock lf; struct nameidata nd; int vfslocked; if ((flags & O_ACCMODE) == O_ACCMODE) return (EINVAL); flags = FFLAGS(flags); error = falloc(td, &nfp, &indx); if (error) return (error); /* An extra reference on `nfp' has been held for us by falloc(). */ fp = nfp; cmode = ((mode &~ fdp->fd_cmask) & ALLPERMS) &~ S_ISTXT; NDINIT(&nd, LOOKUP, FOLLOW, pathseg, path, td); td->td_dupfd = -1; /* XXX check for fdopen */ error = vn_open(&nd, &flags, cmode, indx); if (error) { /* * If the vn_open replaced the method vector, something * wonderous happened deep below and we just pass it up * pretending we know what we do. */ if (error == ENXIO && fp->f_ops != &badfileops) { fdrop(fp, td); td->td_retval[0] = indx; return (0); } /* * release our own reference */ fdrop(fp, td); /* * handle special fdopen() case. bleh. dupfdopen() is * responsible for dropping the old contents of ofiles[indx] * if it succeeds. */ if ((error == ENODEV || error == ENXIO) && td->td_dupfd >= 0 && /* XXX from fdopen */ (error = dupfdopen(td, fdp, indx, td->td_dupfd, flags, error)) == 0) { td->td_retval[0] = indx; return (0); } /* * Clean up the descriptor, but only if another thread hadn't * replaced or closed it. */ fdclose(fdp, fp, indx, td); if (error == ERESTART) error = EINTR; return (error); } td->td_dupfd = 0; vfslocked = NDHASGIANT(&nd); NDFREE(&nd, NDF_ONLY_PNBUF); vp = nd.ni_vp; /* * There should be 2 references on the file, one from the descriptor * table, and one for us. * * Handle the case where someone closed the file (via its file * descriptor) while we were blocked. The end result should look * like opening the file succeeded but it was immediately closed. * We call vn_close() manually because we haven't yet hooked up * the various 'struct file' fields. */ FILEDESC_LOCK(fdp); FILE_LOCK(fp); if (fp->f_count == 1) { mp = vp->v_mount; KASSERT(fdp->fd_ofiles[indx] != fp, ("Open file descriptor lost all refs")); FILE_UNLOCK(fp); FILEDESC_UNLOCK(fdp); VOP_UNLOCK(vp, 0, td); vn_close(vp, flags & FMASK, fp->f_cred, td); VFS_UNLOCK_GIANT(vfslocked); fdrop(fp, td); td->td_retval[0] = indx; return (0); } fp->f_vnode = vp; if (fp->f_data == NULL) fp->f_data = vp; fp->f_flag = flags & FMASK; if (fp->f_ops == &badfileops) fp->f_ops = &vnops; fp->f_seqcount = 1; fp->f_type = (vp->v_type == VFIFO ? DTYPE_FIFO : DTYPE_VNODE); FILE_UNLOCK(fp); FILEDESC_UNLOCK(fdp); VOP_UNLOCK(vp, 0, td); if (flags & (O_EXLOCK | O_SHLOCK)) { lf.l_whence = SEEK_SET; lf.l_start = 0; lf.l_len = 0; if (flags & O_EXLOCK) lf.l_type = F_WRLCK; else lf.l_type = F_RDLCK; type = F_FLOCK; if ((flags & FNONBLOCK) == 0) type |= F_WAIT; if ((error = VOP_ADVLOCK(vp, (caddr_t)fp, F_SETLK, &lf, type)) != 0) goto bad; fp->f_flag |= FHASLOCK; } if (flags & O_TRUNC) { if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0) goto bad; VOP_LEASE(vp, td, td->td_ucred, LEASE_WRITE); VATTR_NULL(&vat); vat.va_size = 0; vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td); #ifdef MAC error = mac_check_vnode_write(td->td_ucred, fp->f_cred, vp); if (error == 0) #endif error = VOP_SETATTR(vp, &vat, td->td_ucred, td); VOP_UNLOCK(vp, 0, td); vn_finished_write(mp); if (error) goto bad; } VFS_UNLOCK_GIANT(vfslocked); /* * Release our private reference, leaving the one associated with * the descriptor table intact. */ fdrop(fp, td); td->td_retval[0] = indx; return (0); bad: VFS_UNLOCK_GIANT(vfslocked); fdclose(fdp, fp, indx, td); fdrop(fp, td); return (error); } #ifdef COMPAT_43 /* * Create a file. * * MP SAFE */ #ifndef _SYS_SYSPROTO_H_ struct ocreat_args { char *path; int mode; }; #endif int ocreat(td, uap) struct thread *td; register struct ocreat_args /* { char *path; int mode; } */ *uap; { return (kern_open(td, uap->path, UIO_USERSPACE, O_WRONLY | O_CREAT | O_TRUNC, uap->mode)); } #endif /* COMPAT_43 */ /* * Create a special file. */ #ifndef _SYS_SYSPROTO_H_ struct mknod_args { char *path; int mode; int dev; }; #endif int mknod(td, uap) struct thread *td; register struct mknod_args /* { char *path; int mode; int dev; } */ *uap; { return (kern_mknod(td, uap->path, UIO_USERSPACE, uap->mode, uap->dev)); } int kern_mknod(struct thread *td, char *path, enum uio_seg pathseg, int mode, int dev) { struct vnode *vp; struct mount *mp; struct vattr vattr; int error; int whiteout = 0; struct nameidata nd; int vfslocked; switch (mode & S_IFMT) { case S_IFCHR: case S_IFBLK: error = suser(td); break; default: error = suser_cred(td->td_ucred, SUSER_ALLOWJAIL); break; } if (error) return (error); restart: bwillwrite(); NDINIT(&nd, CREATE, LOCKPARENT | SAVENAME | MPSAFE, pathseg, path, td); if ((error = namei(&nd)) != 0) return (error); vfslocked = NDHASGIANT(&nd); vp = nd.ni_vp; if (vp != NULL) { NDFREE(&nd, NDF_ONLY_PNBUF); if (vp == nd.ni_dvp) vrele(nd.ni_dvp); else vput(nd.ni_dvp); vrele(vp); VFS_UNLOCK_GIANT(vfslocked); return (EEXIST); } else { VATTR_NULL(&vattr); FILEDESC_LOCK_FAST(td->td_proc->p_fd); vattr.va_mode = (mode & ALLPERMS) & ~td->td_proc->p_fd->fd_cmask; FILEDESC_UNLOCK_FAST(td->td_proc->p_fd); vattr.va_rdev = dev; whiteout = 0; switch (mode & S_IFMT) { case S_IFMT: /* used by badsect to flag bad sectors */ vattr.va_type = VBAD; break; case S_IFCHR: vattr.va_type = VCHR; break; case S_IFBLK: vattr.va_type = VBLK; break; case S_IFWHT: whiteout = 1; break; default: error = EINVAL; break; } } if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) { NDFREE(&nd, NDF_ONLY_PNBUF); vput(nd.ni_dvp); VFS_UNLOCK_GIANT(vfslocked); if ((error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH)) != 0) return (error); goto restart; } #ifdef MAC if (error == 0 && !whiteout) error = mac_check_vnode_create(td->td_ucred, nd.ni_dvp, &nd.ni_cnd, &vattr); #endif if (!error) { VOP_LEASE(nd.ni_dvp, td, td->td_ucred, LEASE_WRITE); if (whiteout) error = VOP_WHITEOUT(nd.ni_dvp, &nd.ni_cnd, CREATE); else { error = VOP_MKNOD(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr); if (error == 0) vput(nd.ni_vp); } } NDFREE(&nd, NDF_ONLY_PNBUF); vput(nd.ni_dvp); vn_finished_write(mp); VFS_UNLOCK_GIANT(vfslocked); return (error); } /* * Create a named pipe. */ #ifndef _SYS_SYSPROTO_H_ struct mkfifo_args { char *path; int mode; }; #endif int mkfifo(td, uap) struct thread *td; register struct mkfifo_args /* { char *path; int mode; } */ *uap; { return (kern_mkfifo(td, uap->path, UIO_USERSPACE, uap->mode)); } int kern_mkfifo(struct thread *td, char *path, enum uio_seg pathseg, int mode) { struct mount *mp; struct vattr vattr; int error; struct nameidata nd; int vfslocked; restart: bwillwrite(); NDINIT(&nd, CREATE, LOCKPARENT | SAVENAME | MPSAFE, pathseg, path, td); if ((error = namei(&nd)) != 0) return (error); vfslocked = NDHASGIANT(&nd); if (nd.ni_vp != NULL) { NDFREE(&nd, NDF_ONLY_PNBUF); if (nd.ni_vp == nd.ni_dvp) vrele(nd.ni_dvp); else vput(nd.ni_dvp); vrele(nd.ni_vp); VFS_UNLOCK_GIANT(vfslocked); return (EEXIST); } if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) { NDFREE(&nd, NDF_ONLY_PNBUF); vput(nd.ni_dvp); VFS_UNLOCK_GIANT(vfslocked); if ((error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH)) != 0) return (error); goto restart; } VATTR_NULL(&vattr); vattr.va_type = VFIFO; FILEDESC_LOCK_FAST(td->td_proc->p_fd); vattr.va_mode = (mode & ALLPERMS) & ~td->td_proc->p_fd->fd_cmask; FILEDESC_UNLOCK_FAST(td->td_proc->p_fd); #ifdef MAC error = mac_check_vnode_create(td->td_ucred, nd.ni_dvp, &nd.ni_cnd, &vattr); if (error) goto out; #endif VOP_LEASE(nd.ni_dvp, td, td->td_ucred, LEASE_WRITE); error = VOP_MKNOD(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr); if (error == 0) vput(nd.ni_vp); #ifdef MAC out: #endif vput(nd.ni_dvp); vn_finished_write(mp); VFS_UNLOCK_GIANT(vfslocked); NDFREE(&nd, NDF_ONLY_PNBUF); return (error); } /* * Make a hard file link. */ #ifndef _SYS_SYSPROTO_H_ struct link_args { char *path; char *link; }; #endif int link(td, uap) struct thread *td; register struct link_args /* { char *path; char *link; } */ *uap; { int error; error = kern_link(td, uap->path, uap->link, UIO_USERSPACE); return (error); } SYSCTL_DECL(_security_bsd); static int hardlink_check_uid = 0; SYSCTL_INT(_security_bsd, OID_AUTO, hardlink_check_uid, CTLFLAG_RW, &hardlink_check_uid, 0, "Unprivileged processes cannot create hard links to files owned by other " "users"); static int hardlink_check_gid = 0; SYSCTL_INT(_security_bsd, OID_AUTO, hardlink_check_gid, CTLFLAG_RW, &hardlink_check_gid, 0, "Unprivileged processes cannot create hard links to files owned by other " "groups"); static int can_hardlink(struct vnode *vp, struct thread *td, struct ucred *cred) { struct vattr va; int error; if (suser_cred(cred, SUSER_ALLOWJAIL) == 0) return (0); if (!hardlink_check_uid && !hardlink_check_gid) return (0); error = VOP_GETATTR(vp, &va, cred, td); if (error != 0) return (error); if (hardlink_check_uid) { if (cred->cr_uid != va.va_uid) return (EPERM); } if (hardlink_check_gid) { if (!groupmember(va.va_gid, cred)) return (EPERM); } return (0); } int kern_link(struct thread *td, char *path, char *link, enum uio_seg segflg) { struct vnode *vp; struct mount *mp; struct nameidata nd; int vfslocked; int lvfslocked; int error; bwillwrite(); NDINIT(&nd, LOOKUP, FOLLOW | MPSAFE, segflg, path, td); if ((error = namei(&nd)) != 0) return (error); vfslocked = NDHASGIANT(&nd); NDFREE(&nd, NDF_ONLY_PNBUF); vp = nd.ni_vp; if (vp->v_type == VDIR) { vrele(vp); VFS_UNLOCK_GIANT(vfslocked); return (EPERM); /* POSIX */ } if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0) { vrele(vp); VFS_UNLOCK_GIANT(vfslocked); return (error); } NDINIT(&nd, CREATE, LOCKPARENT | SAVENAME | MPSAFE, segflg, link, td); if ((error = namei(&nd)) == 0) { lvfslocked = NDHASGIANT(&nd); if (nd.ni_vp != NULL) { if (nd.ni_dvp == nd.ni_vp) vrele(nd.ni_dvp); else vput(nd.ni_dvp); vrele(nd.ni_vp); error = EEXIST; } else if ((error = vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td)) == 0) { VOP_LEASE(nd.ni_dvp, td, td->td_ucred, LEASE_WRITE); VOP_LEASE(vp, td, td->td_ucred, LEASE_WRITE); error = can_hardlink(vp, td, td->td_ucred); if (error == 0) #ifdef MAC error = mac_check_vnode_link(td->td_ucred, nd.ni_dvp, vp, &nd.ni_cnd); if (error == 0) #endif error = VOP_LINK(nd.ni_dvp, vp, &nd.ni_cnd); VOP_UNLOCK(vp, 0, td); vput(nd.ni_dvp); } NDFREE(&nd, NDF_ONLY_PNBUF); VFS_UNLOCK_GIANT(lvfslocked); } vrele(vp); vn_finished_write(mp); VFS_UNLOCK_GIANT(vfslocked); return (error); } /* * Make a symbolic link. */ #ifndef _SYS_SYSPROTO_H_ struct symlink_args { char *path; char *link; }; #endif int symlink(td, uap) struct thread *td; register struct symlink_args /* { char *path; char *link; } */ *uap; { return (kern_symlink(td, uap->path, uap->link, UIO_USERSPACE)); } int kern_symlink(struct thread *td, char *path, char *link, enum uio_seg segflg) { struct mount *mp; struct vattr vattr; char *syspath; int error; struct nameidata nd; int vfslocked; if (segflg == UIO_SYSSPACE) { syspath = path; } else { syspath = uma_zalloc(namei_zone, M_WAITOK); if ((error = copyinstr(path, syspath, MAXPATHLEN, NULL)) != 0) goto out; } restart: bwillwrite(); NDINIT(&nd, CREATE, LOCKPARENT | SAVENAME | MPSAFE, segflg, link, td); if ((error = namei(&nd)) != 0) goto out; vfslocked = NDHASGIANT(&nd); if (nd.ni_vp) { NDFREE(&nd, NDF_ONLY_PNBUF); if (nd.ni_vp == nd.ni_dvp) vrele(nd.ni_dvp); else vput(nd.ni_dvp); vrele(nd.ni_vp); VFS_UNLOCK_GIANT(vfslocked); error = EEXIST; goto out; } if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) { NDFREE(&nd, NDF_ONLY_PNBUF); vput(nd.ni_dvp); VFS_UNLOCK_GIANT(vfslocked); if ((error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH)) != 0) goto out; goto restart; } VATTR_NULL(&vattr); FILEDESC_LOCK_FAST(td->td_proc->p_fd); vattr.va_mode = ACCESSPERMS &~ td->td_proc->p_fd->fd_cmask; FILEDESC_UNLOCK_FAST(td->td_proc->p_fd); #ifdef MAC vattr.va_type = VLNK; error = mac_check_vnode_create(td->td_ucred, nd.ni_dvp, &nd.ni_cnd, &vattr); if (error) goto out2; #endif VOP_LEASE(nd.ni_dvp, td, td->td_ucred, LEASE_WRITE); error = VOP_SYMLINK(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr, syspath); if (error == 0) vput(nd.ni_vp); #ifdef MAC out2: #endif NDFREE(&nd, NDF_ONLY_PNBUF); vput(nd.ni_dvp); vn_finished_write(mp); VFS_UNLOCK_GIANT(vfslocked); out: if (segflg != UIO_SYSSPACE) uma_zfree(namei_zone, syspath); return (error); } /* * Delete a whiteout from the filesystem. */ int undelete(td, uap) struct thread *td; register struct undelete_args /* { char *path; } */ *uap; { int error; struct mount *mp; struct nameidata nd; int vfslocked; restart: bwillwrite(); NDINIT(&nd, DELETE, LOCKPARENT | DOWHITEOUT | MPSAFE, UIO_USERSPACE, uap->path, td); error = namei(&nd); if (error) return (error); vfslocked = NDHASGIANT(&nd); if (nd.ni_vp != NULLVP || !(nd.ni_cnd.cn_flags & ISWHITEOUT)) { NDFREE(&nd, NDF_ONLY_PNBUF); if (nd.ni_vp == nd.ni_dvp) vrele(nd.ni_dvp); else vput(nd.ni_dvp); if (nd.ni_vp) vrele(nd.ni_vp); VFS_UNLOCK_GIANT(vfslocked); return (EEXIST); } if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) { NDFREE(&nd, NDF_ONLY_PNBUF); vput(nd.ni_dvp); VFS_UNLOCK_GIANT(vfslocked); if ((error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH)) != 0) return (error); goto restart; } VOP_LEASE(nd.ni_dvp, td, td->td_ucred, LEASE_WRITE); error = VOP_WHITEOUT(nd.ni_dvp, &nd.ni_cnd, DELETE); NDFREE(&nd, NDF_ONLY_PNBUF); vput(nd.ni_dvp); vn_finished_write(mp); VFS_UNLOCK_GIANT(vfslocked); return (error); } /* * Delete a name from the filesystem. */ #ifndef _SYS_SYSPROTO_H_ struct unlink_args { char *path; }; #endif int unlink(td, uap) struct thread *td; struct unlink_args /* { char *path; } */ *uap; { int error; error = kern_unlink(td, uap->path, UIO_USERSPACE); return (error); } int kern_unlink(struct thread *td, char *path, enum uio_seg pathseg) { struct mount *mp; struct vnode *vp; int error; struct nameidata nd; int vfslocked; restart: bwillwrite(); NDINIT(&nd, DELETE, LOCKPARENT | LOCKLEAF | MPSAFE, pathseg, path, td); if ((error = namei(&nd)) != 0) return (error == EINVAL ? EPERM : error); vfslocked = NDHASGIANT(&nd); vp = nd.ni_vp; if (vp->v_type == VDIR) error = EPERM; /* POSIX */ else { /* * The root of a mounted filesystem cannot be deleted. * * XXX: can this only be a VDIR case? */ if (vp->v_vflag & VV_ROOT) error = EBUSY; } if (error == 0) { if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) { NDFREE(&nd, NDF_ONLY_PNBUF); vput(nd.ni_dvp); if (vp == nd.ni_dvp) vrele(vp); else vput(vp); VFS_UNLOCK_GIANT(vfslocked); if ((error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH)) != 0) return (error); goto restart; } #ifdef MAC error = mac_check_vnode_delete(td->td_ucred, nd.ni_dvp, vp, &nd.ni_cnd); if (error) goto out; #endif VOP_LEASE(nd.ni_dvp, td, td->td_ucred, LEASE_WRITE); error = VOP_REMOVE(nd.ni_dvp, vp, &nd.ni_cnd); #ifdef MAC out: #endif vn_finished_write(mp); } NDFREE(&nd, NDF_ONLY_PNBUF); vput(nd.ni_dvp); if (vp == nd.ni_dvp) vrele(vp); else vput(vp); VFS_UNLOCK_GIANT(vfslocked); return (error); } /* * Reposition read/write file offset. */ #ifndef _SYS_SYSPROTO_H_ struct lseek_args { int fd; int pad; off_t offset; int whence; }; #endif int lseek(td, uap) struct thread *td; register struct lseek_args /* { int fd; int pad; off_t offset; int whence; } */ *uap; { struct ucred *cred = td->td_ucred; struct file *fp; struct vnode *vp; struct vattr vattr; off_t offset; int error, noneg; int vfslocked; if ((error = fget(td, uap->fd, &fp)) != 0) return (error); if (!(fp->f_ops->fo_flags & DFLAG_SEEKABLE)) { fdrop(fp, td); return (ESPIPE); } vp = fp->f_vnode; vfslocked = VFS_LOCK_GIANT(vp->v_mount); noneg = (vp->v_type != VCHR); offset = uap->offset; switch (uap->whence) { case L_INCR: if (noneg && (fp->f_offset < 0 || (offset > 0 && fp->f_offset > OFF_MAX - offset))) { error = EOVERFLOW; break; } offset += fp->f_offset; break; case L_XTND: vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td); error = VOP_GETATTR(vp, &vattr, cred, td); VOP_UNLOCK(vp, 0, td); if (error) break; if (noneg && (vattr.va_size > OFF_MAX || (offset > 0 && vattr.va_size > OFF_MAX - offset))) { error = EOVERFLOW; break; } offset += vattr.va_size; break; case L_SET: break; default: error = EINVAL; } if (error == 0 && noneg && offset < 0) error = EINVAL; if (error != 0) goto drop; fp->f_offset = offset; *(off_t *)(td->td_retval) = fp->f_offset; drop: fdrop(fp, td); VFS_UNLOCK_GIANT(vfslocked); return (error); } #if defined(COMPAT_43) /* * Reposition read/write file offset. */ #ifndef _SYS_SYSPROTO_H_ struct olseek_args { int fd; long offset; int whence; }; #endif int olseek(td, uap) struct thread *td; register struct olseek_args /* { int fd; long offset; int whence; } */ *uap; { struct lseek_args /* { int fd; int pad; off_t offset; int whence; } */ nuap; int error; nuap.fd = uap->fd; nuap.offset = uap->offset; nuap.whence = uap->whence; error = lseek(td, &nuap); return (error); } #endif /* COMPAT_43 */ /* * Check access permissions using passed credentials. */ static int vn_access(vp, user_flags, cred, td) struct vnode *vp; int user_flags; struct ucred *cred; struct thread *td; { int error, flags; /* Flags == 0 means only check for existence. */ error = 0; if (user_flags) { flags = 0; if (user_flags & R_OK) flags |= VREAD; if (user_flags & W_OK) flags |= VWRITE; if (user_flags & X_OK) flags |= VEXEC; #ifdef MAC error = mac_check_vnode_access(cred, vp, flags); if (error) return (error); #endif if ((flags & VWRITE) == 0 || (error = vn_writechk(vp)) == 0) error = VOP_ACCESS(vp, flags, cred, td); } return (error); } /* * Check access permissions using "real" credentials. */ #ifndef _SYS_SYSPROTO_H_ struct access_args { char *path; int flags; }; #endif int access(td, uap) struct thread *td; register struct access_args /* { char *path; int flags; } */ *uap; { return (kern_access(td, uap->path, UIO_USERSPACE, uap->flags)); } int kern_access(struct thread *td, char *path, enum uio_seg pathseg, int flags) { struct ucred *cred, *tmpcred; register struct vnode *vp; struct nameidata nd; int vfslocked; int error; /* * Create and modify a temporary credential instead of one that * is potentially shared. This could also mess up socket * buffer accounting which can run in an interrupt context. */ cred = td->td_ucred; tmpcred = crdup(cred); tmpcred->cr_uid = cred->cr_ruid; tmpcred->cr_groups[0] = cred->cr_rgid; td->td_ucred = tmpcred; NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | MPSAFE, pathseg, path, td); if ((error = namei(&nd)) != 0) goto out1; vfslocked = NDHASGIANT(&nd); vp = nd.ni_vp; error = vn_access(vp, flags, tmpcred, td); NDFREE(&nd, NDF_ONLY_PNBUF); vput(vp); VFS_UNLOCK_GIANT(vfslocked); out1: td->td_ucred = cred; crfree(tmpcred); return (error); } /* * Check access permissions using "effective" credentials. */ #ifndef _SYS_SYSPROTO_H_ struct eaccess_args { char *path; int flags; }; #endif int eaccess(td, uap) struct thread *td; register struct eaccess_args /* { char *path; int flags; } */ *uap; { struct nameidata nd; struct vnode *vp; int vfslocked; int error; NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | MPSAFE, UIO_USERSPACE, uap->path, td); if ((error = namei(&nd)) != 0) return (error); vp = nd.ni_vp; vfslocked = NDHASGIANT(&nd); error = vn_access(vp, uap->flags, td->td_ucred, td); NDFREE(&nd, NDF_ONLY_PNBUF); vput(vp); VFS_UNLOCK_GIANT(vfslocked); return (error); } #if defined(COMPAT_43) /* * Get file status; this version follows links. */ #ifndef _SYS_SYSPROTO_H_ struct ostat_args { char *path; struct ostat *ub; }; #endif int ostat(td, uap) struct thread *td; register struct ostat_args /* { char *path; struct ostat *ub; } */ *uap; { struct stat sb; struct ostat osb; int error; error = kern_stat(td, uap->path, UIO_USERSPACE, &sb); if (error) return (error); cvtstat(&sb, &osb); error = copyout(&osb, uap->ub, sizeof (osb)); return (error); } /* * Get file status; this version does not follow links. */ #ifndef _SYS_SYSPROTO_H_ struct olstat_args { char *path; struct ostat *ub; }; #endif int olstat(td, uap) struct thread *td; register struct olstat_args /* { char *path; struct ostat *ub; } */ *uap; { struct stat sb; struct ostat osb; int error; error = kern_lstat(td, uap->path, UIO_USERSPACE, &sb); if (error) return (error); cvtstat(&sb, &osb); error = copyout(&osb, uap->ub, sizeof (osb)); return (error); } /* * Convert from an old to a new stat structure. */ void cvtstat(st, ost) struct stat *st; struct ostat *ost; { ost->st_dev = st->st_dev; ost->st_ino = st->st_ino; ost->st_mode = st->st_mode; ost->st_nlink = st->st_nlink; ost->st_uid = st->st_uid; ost->st_gid = st->st_gid; ost->st_rdev = st->st_rdev; if (st->st_size < (quad_t)1 << 32) ost->st_size = st->st_size; else ost->st_size = -2; ost->st_atime = st->st_atime; ost->st_mtime = st->st_mtime; ost->st_ctime = st->st_ctime; ost->st_blksize = st->st_blksize; ost->st_blocks = st->st_blocks; ost->st_flags = st->st_flags; ost->st_gen = st->st_gen; } #endif /* COMPAT_43 */ /* * Get file status; this version follows links. */ #ifndef _SYS_SYSPROTO_H_ struct stat_args { char *path; struct stat *ub; }; #endif int stat(td, uap) struct thread *td; register struct stat_args /* { char *path; struct stat *ub; } */ *uap; { struct stat sb; int error; error = kern_stat(td, uap->path, UIO_USERSPACE, &sb); if (error == 0) error = copyout(&sb, uap->ub, sizeof (sb)); return (error); } int kern_stat(struct thread *td, char *path, enum uio_seg pathseg, struct stat *sbp) { struct nameidata nd; struct stat sb; int error, vfslocked; NDINIT(&nd, LOOKUP, FOLLOW | LOCKSHARED | LOCKLEAF | MPSAFE, pathseg, path, td); if ((error = namei(&nd)) != 0) return (error); vfslocked = NDHASGIANT(&nd); error = vn_stat(nd.ni_vp, &sb, td->td_ucred, NOCRED, td); NDFREE(&nd, NDF_ONLY_PNBUF); vput(nd.ni_vp); VFS_UNLOCK_GIANT(vfslocked); if (error) return (error); *sbp = sb; return (0); } /* * Get file status; this version does not follow links. */ #ifndef _SYS_SYSPROTO_H_ struct lstat_args { char *path; struct stat *ub; }; #endif int lstat(td, uap) struct thread *td; register struct lstat_args /* { char *path; struct stat *ub; } */ *uap; { struct stat sb; int error; error = kern_lstat(td, uap->path, UIO_USERSPACE, &sb); if (error == 0) error = copyout(&sb, uap->ub, sizeof (sb)); return (error); } int kern_lstat(struct thread *td, char *path, enum uio_seg pathseg, struct stat *sbp) { struct vnode *vp; struct stat sb; struct nameidata nd; int error, vfslocked; NDINIT(&nd, LOOKUP, NOFOLLOW | LOCKLEAF | LOCKSHARED | MPSAFE, pathseg, path, td); if ((error = namei(&nd)) != 0) return (error); vfslocked = NDHASGIANT(&nd); vp = nd.ni_vp; error = vn_stat(vp, &sb, td->td_ucred, NOCRED, td); NDFREE(&nd, NDF_ONLY_PNBUF); vput(vp); VFS_UNLOCK_GIANT(vfslocked); if (error) return (error); *sbp = sb; return (0); } /* * Implementation of the NetBSD [l]stat() functions. */ void cvtnstat(sb, nsb) struct stat *sb; struct nstat *nsb; { bzero(nsb, sizeof *nsb); nsb->st_dev = sb->st_dev; nsb->st_ino = sb->st_ino; nsb->st_mode = sb->st_mode; nsb->st_nlink = sb->st_nlink; nsb->st_uid = sb->st_uid; nsb->st_gid = sb->st_gid; nsb->st_rdev = sb->st_rdev; nsb->st_atimespec = sb->st_atimespec; nsb->st_mtimespec = sb->st_mtimespec; nsb->st_ctimespec = sb->st_ctimespec; nsb->st_size = sb->st_size; nsb->st_blocks = sb->st_blocks; nsb->st_blksize = sb->st_blksize; nsb->st_flags = sb->st_flags; nsb->st_gen = sb->st_gen; nsb->st_birthtimespec = sb->st_birthtimespec; } #ifndef _SYS_SYSPROTO_H_ struct nstat_args { char *path; struct nstat *ub; }; #endif int nstat(td, uap) struct thread *td; register struct nstat_args /* { char *path; struct nstat *ub; } */ *uap; { struct stat sb; struct nstat nsb; int error; error = kern_stat(td, uap->path, UIO_USERSPACE, &sb); if (error) return (error); cvtnstat(&sb, &nsb); error = copyout(&nsb, uap->ub, sizeof (nsb)); return (error); } /* * NetBSD lstat. Get file status; this version does not follow links. */ #ifndef _SYS_SYSPROTO_H_ struct lstat_args { char *path; struct stat *ub; }; #endif int nlstat(td, uap) struct thread *td; register struct nlstat_args /* { char *path; struct nstat *ub; } */ *uap; { struct stat sb; struct nstat nsb; int error; error = kern_lstat(td, uap->path, UIO_USERSPACE, &sb); if (error) return (error); cvtnstat(&sb, &nsb); error = copyout(&nsb, uap->ub, sizeof (nsb)); return (error); } /* * Get configurable pathname variables. */ #ifndef _SYS_SYSPROTO_H_ struct pathconf_args { char *path; int name; }; #endif int pathconf(td, uap) struct thread *td; register struct pathconf_args /* { char *path; int name; } */ *uap; { return (kern_pathconf(td, uap->path, UIO_USERSPACE, uap->name)); } int kern_pathconf(struct thread *td, char *path, enum uio_seg pathseg, int name) { struct nameidata nd; int error, vfslocked; NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | MPSAFE, pathseg, path, td); if ((error = namei(&nd)) != 0) return (error); vfslocked = NDHASGIANT(&nd); NDFREE(&nd, NDF_ONLY_PNBUF); /* If asynchronous I/O is available, it works for all files. */ if (name == _PC_ASYNC_IO) td->td_retval[0] = async_io_version; else error = VOP_PATHCONF(nd.ni_vp, name, td->td_retval); vput(nd.ni_vp); VFS_UNLOCK_GIANT(vfslocked); return (error); } /* * Return target name of a symbolic link. */ #ifndef _SYS_SYSPROTO_H_ struct readlink_args { char *path; char *buf; int count; }; #endif int readlink(td, uap) struct thread *td; register struct readlink_args /* { char *path; char *buf; int count; } */ *uap; { return (kern_readlink(td, uap->path, UIO_USERSPACE, uap->buf, UIO_USERSPACE, uap->count)); } int kern_readlink(struct thread *td, char *path, enum uio_seg pathseg, char *buf, enum uio_seg bufseg, int count) { register struct vnode *vp; struct iovec aiov; struct uio auio; int error; struct nameidata nd; int vfslocked; NDINIT(&nd, LOOKUP, NOFOLLOW | LOCKLEAF | MPSAFE, pathseg, path, td); if ((error = namei(&nd)) != 0) return (error); NDFREE(&nd, NDF_ONLY_PNBUF); vfslocked = NDHASGIANT(&nd); vp = nd.ni_vp; #ifdef MAC error = mac_check_vnode_readlink(td->td_ucred, vp); if (error) { vput(vp); VFS_UNLOCK_GIANT(vfslocked); return (error); } #endif if (vp->v_type != VLNK) error = EINVAL; else { aiov.iov_base = buf; aiov.iov_len = count; auio.uio_iov = &aiov; auio.uio_iovcnt = 1; auio.uio_offset = 0; auio.uio_rw = UIO_READ; auio.uio_segflg = bufseg; auio.uio_td = td; auio.uio_resid = count; error = VOP_READLINK(vp, &auio, td->td_ucred); } vput(vp); VFS_UNLOCK_GIANT(vfslocked); td->td_retval[0] = count - auio.uio_resid; return (error); } /* * Common implementation code for chflags() and fchflags(). */ static int setfflags(td, vp, flags) struct thread *td; struct vnode *vp; int flags; { int error; struct mount *mp; struct vattr vattr; /* * Prevent non-root users from setting flags on devices. When * a device is reused, users can retain ownership of the device * if they are allowed to set flags and programs assume that * chown can't fail when done as root. */ if (vp->v_type == VCHR || vp->v_type == VBLK) { error = suser_cred(td->td_ucred, SUSER_ALLOWJAIL); if (error) return (error); } if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0) return (error); VOP_LEASE(vp, td, td->td_ucred, LEASE_WRITE); vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td); VATTR_NULL(&vattr); vattr.va_flags = flags; #ifdef MAC error = mac_check_vnode_setflags(td->td_ucred, vp, vattr.va_flags); if (error == 0) #endif error = VOP_SETATTR(vp, &vattr, td->td_ucred, td); VOP_UNLOCK(vp, 0, td); vn_finished_write(mp); return (error); } /* * Change flags of a file given a path name. */ #ifndef _SYS_SYSPROTO_H_ struct chflags_args { char *path; int flags; }; #endif int chflags(td, uap) struct thread *td; register struct chflags_args /* { char *path; int flags; } */ *uap; { int error; struct nameidata nd; int vfslocked; NDINIT(&nd, LOOKUP, FOLLOW | MPSAFE, UIO_USERSPACE, uap->path, td); if ((error = namei(&nd)) != 0) return (error); NDFREE(&nd, NDF_ONLY_PNBUF); vfslocked = NDHASGIANT(&nd); error = setfflags(td, nd.ni_vp, uap->flags); vrele(nd.ni_vp); VFS_UNLOCK_GIANT(vfslocked); return (error); } /* * Same as chflags() but doesn't follow symlinks. */ int lchflags(td, uap) struct thread *td; register struct lchflags_args /* { char *path; int flags; } */ *uap; { int error; struct nameidata nd; int vfslocked; NDINIT(&nd, LOOKUP, NOFOLLOW | MPSAFE, UIO_USERSPACE, uap->path, td); if ((error = namei(&nd)) != 0) return (error); vfslocked = NDHASGIANT(&nd); NDFREE(&nd, NDF_ONLY_PNBUF); error = setfflags(td, nd.ni_vp, uap->flags); vrele(nd.ni_vp); VFS_UNLOCK_GIANT(vfslocked); return (error); } /* * Change flags of a file given a file descriptor. */ #ifndef _SYS_SYSPROTO_H_ struct fchflags_args { int fd; int flags; }; #endif int fchflags(td, uap) struct thread *td; register struct fchflags_args /* { int fd; int flags; } */ *uap; { struct file *fp; int vfslocked; int error; if ((error = getvnode(td->td_proc->p_fd, uap->fd, &fp)) != 0) return (error); vfslocked = VFS_LOCK_GIANT(fp->f_vnode->v_mount); error = setfflags(td, fp->f_vnode, uap->flags); VFS_UNLOCK_GIANT(vfslocked); fdrop(fp, td); return (error); } /* * Common implementation code for chmod(), lchmod() and fchmod(). */ static int setfmode(td, vp, mode) struct thread *td; struct vnode *vp; int mode; { int error; struct mount *mp; struct vattr vattr; if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0) return (error); VOP_LEASE(vp, td, td->td_ucred, LEASE_WRITE); vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td); VATTR_NULL(&vattr); vattr.va_mode = mode & ALLPERMS; #ifdef MAC error = mac_check_vnode_setmode(td->td_ucred, vp, vattr.va_mode); if (error == 0) #endif error = VOP_SETATTR(vp, &vattr, td->td_ucred, td); VOP_UNLOCK(vp, 0, td); vn_finished_write(mp); return (error); } /* * Change mode of a file given path name. */ #ifndef _SYS_SYSPROTO_H_ struct chmod_args { char *path; int mode; }; #endif int chmod(td, uap) struct thread *td; register struct chmod_args /* { char *path; int mode; } */ *uap; { return (kern_chmod(td, uap->path, UIO_USERSPACE, uap->mode)); } int kern_chmod(struct thread *td, char *path, enum uio_seg pathseg, int mode) { int error; struct nameidata nd; int vfslocked; NDINIT(&nd, LOOKUP, FOLLOW | MPSAFE, pathseg, path, td); if ((error = namei(&nd)) != 0) return (error); vfslocked = NDHASGIANT(&nd); NDFREE(&nd, NDF_ONLY_PNBUF); error = setfmode(td, nd.ni_vp, mode); vrele(nd.ni_vp); VFS_UNLOCK_GIANT(vfslocked); return (error); } /* * Change mode of a file given path name (don't follow links.) */ #ifndef _SYS_SYSPROTO_H_ struct lchmod_args { char *path; int mode; }; #endif int lchmod(td, uap) struct thread *td; register struct lchmod_args /* { char *path; int mode; } */ *uap; { int error; struct nameidata nd; int vfslocked; NDINIT(&nd, LOOKUP, NOFOLLOW | MPSAFE, UIO_USERSPACE, uap->path, td); if ((error = namei(&nd)) != 0) return (error); vfslocked = NDHASGIANT(&nd); NDFREE(&nd, NDF_ONLY_PNBUF); error = setfmode(td, nd.ni_vp, uap->mode); vrele(nd.ni_vp); VFS_UNLOCK_GIANT(vfslocked); return (error); } /* * Change mode of a file given a file descriptor. */ #ifndef _SYS_SYSPROTO_H_ struct fchmod_args { int fd; int mode; }; #endif int fchmod(td, uap) struct thread *td; register struct fchmod_args /* { int fd; int mode; } */ *uap; { struct file *fp; int vfslocked; int error; if ((error = getvnode(td->td_proc->p_fd, uap->fd, &fp)) != 0) return (error); vfslocked = VFS_LOCK_GIANT(fp->f_vnode->v_mount); error = setfmode(td, fp->f_vnode, uap->mode); VFS_UNLOCK_GIANT(vfslocked); fdrop(fp, td); return (error); } /* * Common implementation for chown(), lchown(), and fchown() */ static int setfown(td, vp, uid, gid) struct thread *td; struct vnode *vp; uid_t uid; gid_t gid; { int error; struct mount *mp; struct vattr vattr; if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0) return (error); VOP_LEASE(vp, td, td->td_ucred, LEASE_WRITE); vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td); VATTR_NULL(&vattr); vattr.va_uid = uid; vattr.va_gid = gid; #ifdef MAC error = mac_check_vnode_setowner(td->td_ucred, vp, vattr.va_uid, vattr.va_gid); if (error == 0) #endif error = VOP_SETATTR(vp, &vattr, td->td_ucred, td); VOP_UNLOCK(vp, 0, td); vn_finished_write(mp); return (error); } /* * Set ownership given a path name. */ #ifndef _SYS_SYSPROTO_H_ struct chown_args { char *path; int uid; int gid; }; #endif int chown(td, uap) struct thread *td; register struct chown_args /* { char *path; int uid; int gid; } */ *uap; { return (kern_chown(td, uap->path, UIO_USERSPACE, uap->uid, uap->gid)); } int kern_chown(struct thread *td, char *path, enum uio_seg pathseg, int uid, int gid) { int error; struct nameidata nd; int vfslocked; NDINIT(&nd, LOOKUP, FOLLOW | MPSAFE, pathseg, path, td); if ((error = namei(&nd)) != 0) return (error); vfslocked = NDHASGIANT(&nd); NDFREE(&nd, NDF_ONLY_PNBUF); error = setfown(td, nd.ni_vp, uid, gid); vrele(nd.ni_vp); VFS_UNLOCK_GIANT(vfslocked); return (error); } /* * Set ownership given a path name, do not cross symlinks. */ #ifndef _SYS_SYSPROTO_H_ struct lchown_args { char *path; int uid; int gid; }; #endif int lchown(td, uap) struct thread *td; register struct lchown_args /* { char *path; int uid; int gid; } */ *uap; { return (kern_lchown(td, uap->path, UIO_USERSPACE, uap->uid, uap->gid)); } int kern_lchown(struct thread *td, char *path, enum uio_seg pathseg, int uid, int gid) { int error; struct nameidata nd; int vfslocked; NDINIT(&nd, LOOKUP, NOFOLLOW | MPSAFE, pathseg, path, td); if ((error = namei(&nd)) != 0) return (error); vfslocked = NDHASGIANT(&nd); NDFREE(&nd, NDF_ONLY_PNBUF); error = setfown(td, nd.ni_vp, uid, gid); vrele(nd.ni_vp); VFS_UNLOCK_GIANT(vfslocked); return (error); } /* * Set ownership given a file descriptor. */ #ifndef _SYS_SYSPROTO_H_ struct fchown_args { int fd; int uid; int gid; }; #endif int fchown(td, uap) struct thread *td; register struct fchown_args /* { int fd; int uid; int gid; } */ *uap; { struct file *fp; int vfslocked; int error; if ((error = getvnode(td->td_proc->p_fd, uap->fd, &fp)) != 0) return (error); vfslocked = VFS_LOCK_GIANT(fp->f_vnode->v_mount); error = setfown(td, fp->f_vnode, uap->uid, uap->gid); VFS_UNLOCK_GIANT(vfslocked); fdrop(fp, td); return (error); } /* * Common implementation code for utimes(), lutimes(), and futimes(). */ static int getutimes(usrtvp, tvpseg, tsp) const struct timeval *usrtvp; enum uio_seg tvpseg; struct timespec *tsp; { struct timeval tv[2]; const struct timeval *tvp; int error; if (usrtvp == NULL) { microtime(&tv[0]); TIMEVAL_TO_TIMESPEC(&tv[0], &tsp[0]); tsp[1] = tsp[0]; } else { if (tvpseg == UIO_SYSSPACE) { tvp = usrtvp; } else { if ((error = copyin(usrtvp, tv, sizeof(tv))) != 0) return (error); tvp = tv; } if (tvp[0].tv_usec < 0 || tvp[0].tv_usec >= 1000000 || tvp[1].tv_usec < 0 || tvp[1].tv_usec >= 1000000) return (EINVAL); TIMEVAL_TO_TIMESPEC(&tvp[0], &tsp[0]); TIMEVAL_TO_TIMESPEC(&tvp[1], &tsp[1]); } return (0); } /* * Common implementation code for utimes(), lutimes(), and futimes(). */ static int setutimes(td, vp, ts, numtimes, nullflag) struct thread *td; struct vnode *vp; const struct timespec *ts; int numtimes; int nullflag; { int error, setbirthtime; struct mount *mp; struct vattr vattr; if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0) return (error); VOP_LEASE(vp, td, td->td_ucred, LEASE_WRITE); vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td); setbirthtime = 0; if (numtimes < 3 && VOP_GETATTR(vp, &vattr, td->td_ucred, td) == 0 && timespeccmp(&ts[1], &vattr.va_birthtime, < )) setbirthtime = 1; VATTR_NULL(&vattr); vattr.va_atime = ts[0]; vattr.va_mtime = ts[1]; if (setbirthtime) vattr.va_birthtime = ts[1]; if (numtimes > 2) vattr.va_birthtime = ts[2]; if (nullflag) vattr.va_vaflags |= VA_UTIMES_NULL; #ifdef MAC error = mac_check_vnode_setutimes(td->td_ucred, vp, vattr.va_atime, vattr.va_mtime); #endif if (error == 0) error = VOP_SETATTR(vp, &vattr, td->td_ucred, td); VOP_UNLOCK(vp, 0, td); vn_finished_write(mp); return (error); } /* * Set the access and modification times of a file. */ #ifndef _SYS_SYSPROTO_H_ struct utimes_args { char *path; struct timeval *tptr; }; #endif int utimes(td, uap) struct thread *td; register struct utimes_args /* { char *path; struct timeval *tptr; } */ *uap; { return (kern_utimes(td, uap->path, UIO_USERSPACE, uap->tptr, UIO_USERSPACE)); } int kern_utimes(struct thread *td, char *path, enum uio_seg pathseg, struct timeval *tptr, enum uio_seg tptrseg) { struct timespec ts[2]; int error; struct nameidata nd; int vfslocked; if ((error = getutimes(tptr, tptrseg, ts)) != 0) return (error); NDINIT(&nd, LOOKUP, FOLLOW | MPSAFE, pathseg, path, td); if ((error = namei(&nd)) != 0) return (error); vfslocked = NDHASGIANT(&nd); NDFREE(&nd, NDF_ONLY_PNBUF); error = setutimes(td, nd.ni_vp, ts, 2, tptr == NULL); vrele(nd.ni_vp); VFS_UNLOCK_GIANT(vfslocked); return (error); } /* * Set the access and modification times of a file. */ #ifndef _SYS_SYSPROTO_H_ struct lutimes_args { char *path; struct timeval *tptr; }; #endif int lutimes(td, uap) struct thread *td; register struct lutimes_args /* { char *path; struct timeval *tptr; } */ *uap; { return (kern_lutimes(td, uap->path, UIO_USERSPACE, uap->tptr, UIO_USERSPACE)); } int kern_lutimes(struct thread *td, char *path, enum uio_seg pathseg, struct timeval *tptr, enum uio_seg tptrseg) { struct timespec ts[2]; int error; struct nameidata nd; int vfslocked; if ((error = getutimes(tptr, tptrseg, ts)) != 0) return (error); NDINIT(&nd, LOOKUP, NOFOLLOW | MPSAFE, pathseg, path, td); if ((error = namei(&nd)) != 0) return (error); vfslocked = NDHASGIANT(&nd); NDFREE(&nd, NDF_ONLY_PNBUF); error = setutimes(td, nd.ni_vp, ts, 2, tptr == NULL); vrele(nd.ni_vp); VFS_UNLOCK_GIANT(vfslocked); return (error); } /* * Set the access and modification times of a file. */ #ifndef _SYS_SYSPROTO_H_ struct futimes_args { int fd; struct timeval *tptr; }; #endif int futimes(td, uap) struct thread *td; register struct futimes_args /* { int fd; struct timeval *tptr; } */ *uap; { return (kern_futimes(td, uap->fd, uap->tptr, UIO_USERSPACE)); } int kern_futimes(struct thread *td, int fd, struct timeval *tptr, enum uio_seg tptrseg) { struct timespec ts[2]; struct file *fp; int vfslocked; int error; if ((error = getutimes(tptr, tptrseg, ts)) != 0) return (error); if ((error = getvnode(td->td_proc->p_fd, fd, &fp)) != 0) return (error); vfslocked = VFS_LOCK_GIANT(fp->f_vnode->v_mount); error = setutimes(td, fp->f_vnode, ts, 2, tptr == NULL); VFS_UNLOCK_GIANT(vfslocked); fdrop(fp, td); return (error); } /* * Truncate a file given its path name. */ #ifndef _SYS_SYSPROTO_H_ struct truncate_args { char *path; int pad; off_t length; }; #endif int truncate(td, uap) struct thread *td; register struct truncate_args /* { char *path; int pad; off_t length; } */ *uap; { return (kern_truncate(td, uap->path, UIO_USERSPACE, uap->length)); } int kern_truncate(struct thread *td, char *path, enum uio_seg pathseg, off_t length) { struct mount *mp; struct vnode *vp; struct vattr vattr; int error; struct nameidata nd; int vfslocked; if (length < 0) return(EINVAL); NDINIT(&nd, LOOKUP, FOLLOW | MPSAFE, pathseg, path, td); if ((error = namei(&nd)) != 0) return (error); vfslocked = NDHASGIANT(&nd); vp = nd.ni_vp; if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0) { vrele(vp); VFS_UNLOCK_GIANT(vfslocked); return (error); } NDFREE(&nd, NDF_ONLY_PNBUF); VOP_LEASE(vp, td, td->td_ucred, LEASE_WRITE); vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td); if (vp->v_type == VDIR) error = EISDIR; #ifdef MAC else if ((error = mac_check_vnode_write(td->td_ucred, NOCRED, vp))) { } #endif else if ((error = vn_writechk(vp)) == 0 && (error = VOP_ACCESS(vp, VWRITE, td->td_ucred, td)) == 0) { VATTR_NULL(&vattr); vattr.va_size = length; error = VOP_SETATTR(vp, &vattr, td->td_ucred, td); } vput(vp); vn_finished_write(mp); VFS_UNLOCK_GIANT(vfslocked); return (error); } /* * Truncate a file given a file descriptor. */ #ifndef _SYS_SYSPROTO_H_ struct ftruncate_args { int fd; int pad; off_t length; }; #endif int ftruncate(td, uap) struct thread *td; register struct ftruncate_args /* { int fd; int pad; off_t length; } */ *uap; { struct mount *mp; struct vattr vattr; struct vnode *vp; struct file *fp; int vfslocked; int error; if (uap->length < 0) return(EINVAL); if ((error = getvnode(td->td_proc->p_fd, uap->fd, &fp)) != 0) return (error); if ((fp->f_flag & FWRITE) == 0) { fdrop(fp, td); return (EINVAL); } vp = fp->f_vnode; vfslocked = VFS_LOCK_GIANT(vp->v_mount); if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0) goto drop; VOP_LEASE(vp, td, td->td_ucred, LEASE_WRITE); vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td); if (vp->v_type == VDIR) error = EISDIR; #ifdef MAC else if ((error = mac_check_vnode_write(td->td_ucred, fp->f_cred, vp))) { } #endif else if ((error = vn_writechk(vp)) == 0) { VATTR_NULL(&vattr); vattr.va_size = uap->length; error = VOP_SETATTR(vp, &vattr, fp->f_cred, td); } VOP_UNLOCK(vp, 0, td); vn_finished_write(mp); drop: VFS_UNLOCK_GIANT(vfslocked); fdrop(fp, td); return (error); } #if defined(COMPAT_43) /* * Truncate a file given its path name. */ #ifndef _SYS_SYSPROTO_H_ struct otruncate_args { char *path; long length; }; #endif int otruncate(td, uap) struct thread *td; register struct otruncate_args /* { char *path; long length; } */ *uap; { struct truncate_args /* { char *path; int pad; off_t length; } */ nuap; nuap.path = uap->path; nuap.length = uap->length; return (truncate(td, &nuap)); } /* * Truncate a file given a file descriptor. */ #ifndef _SYS_SYSPROTO_H_ struct oftruncate_args { int fd; long length; }; #endif int oftruncate(td, uap) struct thread *td; register struct oftruncate_args /* { int fd; long length; } */ *uap; { struct ftruncate_args /* { int fd; int pad; off_t length; } */ nuap; nuap.fd = uap->fd; nuap.length = uap->length; return (ftruncate(td, &nuap)); } #endif /* COMPAT_43 */ /* * Sync an open file. */ #ifndef _SYS_SYSPROTO_H_ struct fsync_args { int fd; }; #endif int fsync(td, uap) struct thread *td; struct fsync_args /* { int fd; } */ *uap; { struct vnode *vp; struct mount *mp; struct file *fp; int vfslocked; int error; if ((error = getvnode(td->td_proc->p_fd, uap->fd, &fp)) != 0) return (error); vp = fp->f_vnode; vfslocked = VFS_LOCK_GIANT(vp->v_mount); if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0) goto drop; vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td); if (vp->v_object != NULL) { VM_OBJECT_LOCK(vp->v_object); vm_object_page_clean(vp->v_object, 0, 0, 0); VM_OBJECT_UNLOCK(vp->v_object); } error = VOP_FSYNC(vp, MNT_WAIT, td); VOP_UNLOCK(vp, 0, td); vn_finished_write(mp); drop: VFS_UNLOCK_GIANT(vfslocked); fdrop(fp, td); return (error); } /* * Rename files. Source and destination must either both be directories, * or both not be directories. If target is a directory, it must be empty. */ #ifndef _SYS_SYSPROTO_H_ struct rename_args { char *from; char *to; }; #endif int rename(td, uap) struct thread *td; register struct rename_args /* { char *from; char *to; } */ *uap; { return (kern_rename(td, uap->from, uap->to, UIO_USERSPACE)); } int kern_rename(struct thread *td, char *from, char *to, enum uio_seg pathseg) { struct mount *mp = NULL; struct vnode *tvp, *fvp, *tdvp; struct nameidata fromnd, tond; int tvfslocked; int fvfslocked; int error; bwillwrite(); #ifdef MAC NDINIT(&fromnd, DELETE, LOCKPARENT | LOCKLEAF | SAVESTART | MPSAFE, pathseg, from, td); #else NDINIT(&fromnd, DELETE, WANTPARENT | SAVESTART | MPSAFE, pathseg, from, td); #endif if ((error = namei(&fromnd)) != 0) return (error); fvfslocked = NDHASGIANT(&fromnd); tvfslocked = 0; #ifdef MAC error = mac_check_vnode_rename_from(td->td_ucred, fromnd.ni_dvp, fromnd.ni_vp, &fromnd.ni_cnd); VOP_UNLOCK(fromnd.ni_dvp, 0, td); VOP_UNLOCK(fromnd.ni_vp, 0, td); #endif fvp = fromnd.ni_vp; if (error == 0) error = vn_start_write(fvp, &mp, V_WAIT | PCATCH); if (error != 0) { NDFREE(&fromnd, NDF_ONLY_PNBUF); vrele(fromnd.ni_dvp); vrele(fvp); goto out1; } NDINIT(&tond, RENAME, LOCKPARENT | LOCKLEAF | NOCACHE | SAVESTART | MPSAFE, pathseg, to, td); if (fromnd.ni_vp->v_type == VDIR) tond.ni_cnd.cn_flags |= WILLBEDIR; if ((error = namei(&tond)) != 0) { /* Translate error code for rename("dir1", "dir2/."). */ if (error == EISDIR && fvp->v_type == VDIR) error = EINVAL; NDFREE(&fromnd, NDF_ONLY_PNBUF); vrele(fromnd.ni_dvp); vrele(fvp); vn_finished_write(mp); goto out1; } tvfslocked = NDHASGIANT(&tond); tdvp = tond.ni_dvp; tvp = tond.ni_vp; if (tvp != NULL) { if (fvp->v_type == VDIR && tvp->v_type != VDIR) { error = ENOTDIR; goto out; } else if (fvp->v_type != VDIR && tvp->v_type == VDIR) { error = EISDIR; goto out; } } if (fvp == tdvp) error = EINVAL; /* * If the source is the same as the destination (that is, if they * are links to the same vnode), then there is nothing to do. */ if (fvp == tvp) error = -1; #ifdef MAC else error = mac_check_vnode_rename_to(td->td_ucred, tdvp, tond.ni_vp, fromnd.ni_dvp == tdvp, &tond.ni_cnd); #endif out: if (!error) { VOP_LEASE(tdvp, td, td->td_ucred, LEASE_WRITE); if (fromnd.ni_dvp != tdvp) { VOP_LEASE(fromnd.ni_dvp, td, td->td_ucred, LEASE_WRITE); } if (tvp) { VOP_LEASE(tvp, td, td->td_ucred, LEASE_WRITE); } error = VOP_RENAME(fromnd.ni_dvp, fromnd.ni_vp, &fromnd.ni_cnd, tond.ni_dvp, tond.ni_vp, &tond.ni_cnd); NDFREE(&fromnd, NDF_ONLY_PNBUF); NDFREE(&tond, NDF_ONLY_PNBUF); } else { NDFREE(&fromnd, NDF_ONLY_PNBUF); NDFREE(&tond, NDF_ONLY_PNBUF); if (tvp) vput(tvp); if (tdvp == tvp) vrele(tdvp); else vput(tdvp); vrele(fromnd.ni_dvp); vrele(fvp); } vrele(tond.ni_startdir); vn_finished_write(mp); out1: if (fromnd.ni_startdir) vrele(fromnd.ni_startdir); VFS_UNLOCK_GIANT(fvfslocked); VFS_UNLOCK_GIANT(tvfslocked); if (error == -1) return (0); return (error); } /* * Make a directory file. */ #ifndef _SYS_SYSPROTO_H_ struct mkdir_args { char *path; int mode; }; #endif int mkdir(td, uap) struct thread *td; register struct mkdir_args /* { char *path; int mode; } */ *uap; { return (kern_mkdir(td, uap->path, UIO_USERSPACE, uap->mode)); } int kern_mkdir(struct thread *td, char *path, enum uio_seg segflg, int mode) { struct mount *mp; struct vnode *vp; struct vattr vattr; int error; struct nameidata nd; int vfslocked; restart: bwillwrite(); NDINIT(&nd, CREATE, LOCKPARENT | SAVENAME | MPSAFE, segflg, path, td); nd.ni_cnd.cn_flags |= WILLBEDIR; if ((error = namei(&nd)) != 0) return (error); vfslocked = NDHASGIANT(&nd); vp = nd.ni_vp; if (vp != NULL) { NDFREE(&nd, NDF_ONLY_PNBUF); /* * XXX namei called with LOCKPARENT but not LOCKLEAF has * the strange behaviour of leaving the vnode unlocked * if the target is the same vnode as the parent. */ if (vp == nd.ni_dvp) vrele(nd.ni_dvp); else vput(nd.ni_dvp); vrele(vp); VFS_UNLOCK_GIANT(vfslocked); return (EEXIST); } if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) { NDFREE(&nd, NDF_ONLY_PNBUF); vput(nd.ni_dvp); VFS_UNLOCK_GIANT(vfslocked); if ((error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH)) != 0) return (error); goto restart; } VATTR_NULL(&vattr); vattr.va_type = VDIR; FILEDESC_LOCK_FAST(td->td_proc->p_fd); vattr.va_mode = (mode & ACCESSPERMS) &~ td->td_proc->p_fd->fd_cmask; FILEDESC_UNLOCK_FAST(td->td_proc->p_fd); #ifdef MAC error = mac_check_vnode_create(td->td_ucred, nd.ni_dvp, &nd.ni_cnd, &vattr); if (error) goto out; #endif VOP_LEASE(nd.ni_dvp, td, td->td_ucred, LEASE_WRITE); error = VOP_MKDIR(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr); #ifdef MAC out: #endif NDFREE(&nd, NDF_ONLY_PNBUF); vput(nd.ni_dvp); if (!error) vput(nd.ni_vp); vn_finished_write(mp); VFS_UNLOCK_GIANT(vfslocked); return (error); } /* * Remove a directory file. */ #ifndef _SYS_SYSPROTO_H_ struct rmdir_args { char *path; }; #endif int rmdir(td, uap) struct thread *td; struct rmdir_args /* { char *path; } */ *uap; { return (kern_rmdir(td, uap->path, UIO_USERSPACE)); } int kern_rmdir(struct thread *td, char *path, enum uio_seg pathseg) { struct mount *mp; struct vnode *vp; int error; struct nameidata nd; int vfslocked; restart: bwillwrite(); NDINIT(&nd, DELETE, LOCKPARENT | LOCKLEAF | MPSAFE, pathseg, path, td); if ((error = namei(&nd)) != 0) return (error); vfslocked = NDHASGIANT(&nd); vp = nd.ni_vp; if (vp->v_type != VDIR) { error = ENOTDIR; goto out; } /* * No rmdir "." please. */ if (nd.ni_dvp == vp) { error = EINVAL; goto out; } /* * The root of a mounted filesystem cannot be deleted. */ if (vp->v_vflag & VV_ROOT) { error = EBUSY; goto out; } #ifdef MAC error = mac_check_vnode_delete(td->td_ucred, nd.ni_dvp, vp, &nd.ni_cnd); if (error) goto out; #endif if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) { NDFREE(&nd, NDF_ONLY_PNBUF); vput(vp); if (nd.ni_dvp == vp) vrele(nd.ni_dvp); else vput(nd.ni_dvp); VFS_UNLOCK_GIANT(vfslocked); if ((error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH)) != 0) return (error); goto restart; } VOP_LEASE(nd.ni_dvp, td, td->td_ucred, LEASE_WRITE); VOP_LEASE(vp, td, td->td_ucred, LEASE_WRITE); error = VOP_RMDIR(nd.ni_dvp, nd.ni_vp, &nd.ni_cnd); vn_finished_write(mp); out: NDFREE(&nd, NDF_ONLY_PNBUF); vput(vp); if (nd.ni_dvp == vp) vrele(nd.ni_dvp); else vput(nd.ni_dvp); VFS_UNLOCK_GIANT(vfslocked); return (error); } #ifdef COMPAT_43 /* * Read a block of directory entries in a filesystem independent format. */ #ifndef _SYS_SYSPROTO_H_ struct ogetdirentries_args { int fd; char *buf; u_int count; long *basep; }; #endif int ogetdirentries(td, uap) struct thread *td; register struct ogetdirentries_args /* { int fd; char *buf; u_int count; long *basep; } */ *uap; { struct vnode *vp; struct file *fp; struct uio auio, kuio; struct iovec aiov, kiov; struct dirent *dp, *edp; caddr_t dirbuf; int error, eofflag, readcnt; long loff; /* XXX arbitrary sanity limit on `count'. */ if (uap->count > 64 * 1024) return (EINVAL); if ((error = getvnode(td->td_proc->p_fd, uap->fd, &fp)) != 0) return (error); if ((fp->f_flag & FREAD) == 0) { fdrop(fp, td); return (EBADF); } vp = fp->f_vnode; unionread: if (vp->v_type != VDIR) { fdrop(fp, td); return (EINVAL); } aiov.iov_base = uap->buf; aiov.iov_len = uap->count; auio.uio_iov = &aiov; auio.uio_iovcnt = 1; auio.uio_rw = UIO_READ; auio.uio_segflg = UIO_USERSPACE; auio.uio_td = td; auio.uio_resid = uap->count; vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td); loff = auio.uio_offset = fp->f_offset; #ifdef MAC error = mac_check_vnode_readdir(td->td_ucred, vp); if (error) { VOP_UNLOCK(vp, 0, td); fdrop(fp, td); return (error); } #endif # if (BYTE_ORDER != LITTLE_ENDIAN) if (vp->v_mount->mnt_maxsymlinklen <= 0) { error = VOP_READDIR(vp, &auio, fp->f_cred, &eofflag, NULL, NULL); fp->f_offset = auio.uio_offset; } else # endif { kuio = auio; kuio.uio_iov = &kiov; kuio.uio_segflg = UIO_SYSSPACE; kiov.iov_len = uap->count; MALLOC(dirbuf, caddr_t, uap->count, M_TEMP, M_WAITOK); kiov.iov_base = dirbuf; error = VOP_READDIR(vp, &kuio, fp->f_cred, &eofflag, NULL, NULL); fp->f_offset = kuio.uio_offset; if (error == 0) { readcnt = uap->count - kuio.uio_resid; edp = (struct dirent *)&dirbuf[readcnt]; for (dp = (struct dirent *)dirbuf; dp < edp; ) { # if (BYTE_ORDER == LITTLE_ENDIAN) /* * The expected low byte of * dp->d_namlen is our dp->d_type. * The high MBZ byte of dp->d_namlen * is our dp->d_namlen. */ dp->d_type = dp->d_namlen; dp->d_namlen = 0; # else /* * The dp->d_type is the high byte * of the expected dp->d_namlen, * so must be zero'ed. */ dp->d_type = 0; # endif if (dp->d_reclen > 0) { dp = (struct dirent *) ((char *)dp + dp->d_reclen); } else { error = EIO; break; } } if (dp >= edp) error = uiomove(dirbuf, readcnt, &auio); } FREE(dirbuf, M_TEMP); } VOP_UNLOCK(vp, 0, td); if (error) { fdrop(fp, td); return (error); } if (uap->count == auio.uio_resid) { if (union_dircheckp) { error = union_dircheckp(td, &vp, fp); if (error == -1) goto unionread; if (error) { fdrop(fp, td); return (error); } } /* * XXX We could delay dropping the lock above but * union_dircheckp complicates things. */ vn_lock(vp, LK_EXCLUSIVE|LK_RETRY, td); if ((vp->v_vflag & VV_ROOT) && (vp->v_mount->mnt_flag & MNT_UNION)) { struct vnode *tvp = vp; vp = vp->v_mount->mnt_vnodecovered; VREF(vp); fp->f_vnode = vp; fp->f_data = vp; fp->f_offset = 0; vput(tvp); goto unionread; } VOP_UNLOCK(vp, 0, td); } error = copyout(&loff, uap->basep, sizeof(long)); fdrop(fp, td); td->td_retval[0] = uap->count - auio.uio_resid; return (error); } #endif /* COMPAT_43 */ /* * Read a block of directory entries in a filesystem independent format. */ #ifndef _SYS_SYSPROTO_H_ struct getdirentries_args { int fd; char *buf; u_int count; long *basep; }; #endif int getdirentries(td, uap) struct thread *td; register struct getdirentries_args /* { int fd; char *buf; u_int count; long *basep; } */ *uap; { struct vnode *vp; struct file *fp; struct uio auio; struct iovec aiov; int vfslocked; long loff; int error, eofflag; if ((error = getvnode(td->td_proc->p_fd, uap->fd, &fp)) != 0) return (error); if ((fp->f_flag & FREAD) == 0) { fdrop(fp, td); return (EBADF); } vp = fp->f_vnode; unionread: vfslocked = VFS_LOCK_GIANT(vp->v_mount); if (vp->v_type != VDIR) { error = EINVAL; goto fail; } aiov.iov_base = uap->buf; aiov.iov_len = uap->count; auio.uio_iov = &aiov; auio.uio_iovcnt = 1; auio.uio_rw = UIO_READ; auio.uio_segflg = UIO_USERSPACE; auio.uio_td = td; auio.uio_resid = uap->count; /* vn_lock(vp, LK_SHARED | LK_RETRY, td); */ vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td); loff = auio.uio_offset = fp->f_offset; #ifdef MAC error = mac_check_vnode_readdir(td->td_ucred, vp); if (error == 0) #endif error = VOP_READDIR(vp, &auio, fp->f_cred, &eofflag, NULL, NULL); fp->f_offset = auio.uio_offset; VOP_UNLOCK(vp, 0, td); if (error) goto fail; if (uap->count == auio.uio_resid) { if (union_dircheckp) { error = union_dircheckp(td, &vp, fp); if (error == -1) { VFS_UNLOCK_GIANT(vfslocked); goto unionread; } if (error) goto fail; } /* * XXX We could delay dropping the lock above but * union_dircheckp complicates things. */ vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td); if ((vp->v_vflag & VV_ROOT) && (vp->v_mount->mnt_flag & MNT_UNION)) { struct vnode *tvp = vp; vp = vp->v_mount->mnt_vnodecovered; VREF(vp); fp->f_vnode = vp; fp->f_data = vp; fp->f_offset = 0; vput(tvp); VFS_UNLOCK_GIANT(vfslocked); goto unionread; } VOP_UNLOCK(vp, 0, td); } if (uap->basep != NULL) { error = copyout(&loff, uap->basep, sizeof(long)); } td->td_retval[0] = uap->count - auio.uio_resid; fail: VFS_UNLOCK_GIANT(vfslocked); fdrop(fp, td); return (error); } #ifndef _SYS_SYSPROTO_H_ struct getdents_args { int fd; char *buf; size_t count; }; #endif int getdents(td, uap) struct thread *td; register struct getdents_args /* { int fd; char *buf; u_int count; } */ *uap; { struct getdirentries_args ap; ap.fd = uap->fd; ap.buf = uap->buf; ap.count = uap->count; ap.basep = NULL; return (getdirentries(td, &ap)); } /* * Set the mode mask for creation of filesystem nodes. * * MP SAFE */ #ifndef _SYS_SYSPROTO_H_ struct umask_args { int newmask; }; #endif int umask(td, uap) struct thread *td; struct umask_args /* { int newmask; } */ *uap; { register struct filedesc *fdp; FILEDESC_LOCK_FAST(td->td_proc->p_fd); fdp = td->td_proc->p_fd; td->td_retval[0] = fdp->fd_cmask; fdp->fd_cmask = uap->newmask & ALLPERMS; FILEDESC_UNLOCK_FAST(td->td_proc->p_fd); return (0); } /* * Void all references to file by ripping underlying filesystem * away from vnode. */ #ifndef _SYS_SYSPROTO_H_ struct revoke_args { char *path; }; #endif int revoke(td, uap) struct thread *td; register struct revoke_args /* { char *path; } */ *uap; { struct vnode *vp; struct vattr vattr; int error; struct nameidata nd; int vfslocked; NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | MPSAFE, UIO_USERSPACE, uap->path, td); if ((error = namei(&nd)) != 0) return (error); vfslocked = NDHASGIANT(&nd); vp = nd.ni_vp; NDFREE(&nd, NDF_ONLY_PNBUF); if (vp->v_type != VCHR) { error = EINVAL; goto out; } #ifdef MAC error = mac_check_vnode_revoke(td->td_ucred, vp); if (error) goto out; #endif error = VOP_GETATTR(vp, &vattr, td->td_ucred, td); if (error) goto out; if (td->td_ucred->cr_uid != vattr.va_uid) { error = suser_cred(td->td_ucred, SUSER_ALLOWJAIL); if (error) goto out; } if (vcount(vp) > 1) VOP_REVOKE(vp, REVOKEALL); out: vput(vp); VFS_UNLOCK_GIANT(vfslocked); return (error); } /* * Convert a user file descriptor to a kernel file entry. * A reference on the file entry is held upon returning. */ int getvnode(fdp, fd, fpp) struct filedesc *fdp; int fd; struct file **fpp; { int error; struct file *fp; fp = NULL; if (fdp == NULL) error = EBADF; else { FILEDESC_LOCK(fdp); if ((u_int)fd >= fdp->fd_nfiles || (fp = fdp->fd_ofiles[fd]) == NULL) error = EBADF; else if (fp->f_vnode == NULL) { fp = NULL; error = EINVAL; } else { fhold(fp); error = 0; } FILEDESC_UNLOCK(fdp); } *fpp = fp; return (error); } /* * Get (NFS) file handle */ #ifndef _SYS_SYSPROTO_H_ struct lgetfh_args { char *fname; fhandle_t *fhp; }; #endif int lgetfh(td, uap) struct thread *td; register struct lgetfh_args *uap; { struct nameidata nd; fhandle_t fh; register struct vnode *vp; int vfslocked; int error; error = suser(td); if (error) return (error); NDINIT(&nd, LOOKUP, NOFOLLOW | LOCKLEAF | MPSAFE, UIO_USERSPACE, uap->fname, td); error = namei(&nd); if (error) return (error); vfslocked = NDHASGIANT(&nd); NDFREE(&nd, NDF_ONLY_PNBUF); vp = nd.ni_vp; bzero(&fh, sizeof(fh)); fh.fh_fsid = vp->v_mount->mnt_stat.f_fsid; error = VFS_VPTOFH(vp, &fh.fh_fid); vput(vp); VFS_UNLOCK_GIANT(vfslocked); if (error) return (error); error = copyout(&fh, uap->fhp, sizeof (fh)); return (error); } #ifndef _SYS_SYSPROTO_H_ struct getfh_args { char *fname; fhandle_t *fhp; }; #endif int getfh(td, uap) struct thread *td; register struct getfh_args *uap; { struct nameidata nd; fhandle_t fh; register struct vnode *vp; int vfslocked; int error; error = suser(td); if (error) return (error); NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | MPSAFE, UIO_USERSPACE, uap->fname, td); error = namei(&nd); if (error) return (error); vfslocked = NDHASGIANT(&nd); NDFREE(&nd, NDF_ONLY_PNBUF); vp = nd.ni_vp; bzero(&fh, sizeof(fh)); fh.fh_fsid = vp->v_mount->mnt_stat.f_fsid; error = VFS_VPTOFH(vp, &fh.fh_fid); vput(vp); VFS_UNLOCK_GIANT(vfslocked); if (error) return (error); error = copyout(&fh, uap->fhp, sizeof (fh)); return (error); } /* * syscall for the rpc.lockd to use to translate a NFS file handle into * an open descriptor. * * warning: do not remove the suser() call or this becomes one giant * security hole. * * MP SAFE */ #ifndef _SYS_SYSPROTO_H_ struct fhopen_args { const struct fhandle *u_fhp; int flags; }; #endif int fhopen(td, uap) struct thread *td; struct fhopen_args /* { const struct fhandle *u_fhp; int flags; } */ *uap; { struct proc *p = td->td_proc; struct mount *mp; struct vnode *vp; struct fhandle fhp; struct vattr vat; struct vattr *vap = &vat; struct flock lf; struct file *fp; register struct filedesc *fdp = p->p_fd; int fmode, mode, error, type; struct file *nfp; int indx; error = suser(td); if (error) return (error); fmode = FFLAGS(uap->flags); /* why not allow a non-read/write open for our lockd? */ if (((fmode & (FREAD | FWRITE)) == 0) || (fmode & O_CREAT)) return (EINVAL); error = copyin(uap->u_fhp, &fhp, sizeof(fhp)); if (error) return(error); /* find the mount point */ mtx_lock(&Giant); mp = vfs_getvfs(&fhp.fh_fsid); if (mp == NULL) { error = ESTALE; goto out; } /* now give me my vnode, it gets returned to me locked */ error = VFS_FHTOVP(mp, &fhp.fh_fid, &vp); if (error) goto out; /* * from now on we have to make sure not * to forget about the vnode * any error that causes an abort must vput(vp) * just set error = err and 'goto bad;'. */ /* * from vn_open */ if (vp->v_type == VLNK) { error = EMLINK; goto bad; } if (vp->v_type == VSOCK) { error = EOPNOTSUPP; goto bad; } mode = 0; if (fmode & (FWRITE | O_TRUNC)) { if (vp->v_type == VDIR) { error = EISDIR; goto bad; } error = vn_writechk(vp); if (error) goto bad; mode |= VWRITE; } if (fmode & FREAD) mode |= VREAD; if (fmode & O_APPEND) mode |= VAPPEND; #ifdef MAC error = mac_check_vnode_open(td->td_ucred, vp, mode); if (error) goto bad; #endif if (mode) { error = VOP_ACCESS(vp, mode, td->td_ucred, td); if (error) goto bad; } if (fmode & O_TRUNC) { VOP_UNLOCK(vp, 0, td); /* XXX */ if ((error = vn_start_write(NULL, &mp, V_WAIT | PCATCH)) != 0) { vrele(vp); goto out; } VOP_LEASE(vp, td, td->td_ucred, LEASE_WRITE); vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td); /* XXX */ #ifdef MAC /* * We don't yet have fp->f_cred, so use td->td_ucred, which * should be right. */ error = mac_check_vnode_write(td->td_ucred, td->td_ucred, vp); if (error == 0) { #endif VATTR_NULL(vap); vap->va_size = 0; error = VOP_SETATTR(vp, vap, td->td_ucred, td); #ifdef MAC } #endif vn_finished_write(mp); if (error) goto bad; } error = VOP_OPEN(vp, fmode, td->td_ucred, td, -1); if (error) goto bad; if (fmode & FWRITE) vp->v_writecount++; /* * end of vn_open code */ if ((error = falloc(td, &nfp, &indx)) != 0) { if (fmode & FWRITE) vp->v_writecount--; goto bad; } /* An extra reference on `nfp' has been held for us by falloc(). */ fp = nfp; nfp->f_vnode = vp; nfp->f_data = vp; nfp->f_flag = fmode & FMASK; nfp->f_ops = &vnops; nfp->f_type = DTYPE_VNODE; if (fmode & (O_EXLOCK | O_SHLOCK)) { lf.l_whence = SEEK_SET; lf.l_start = 0; lf.l_len = 0; if (fmode & O_EXLOCK) lf.l_type = F_WRLCK; else lf.l_type = F_RDLCK; type = F_FLOCK; if ((fmode & FNONBLOCK) == 0) type |= F_WAIT; VOP_UNLOCK(vp, 0, td); if ((error = VOP_ADVLOCK(vp, (caddr_t)fp, F_SETLK, &lf, type)) != 0) { /* * The lock request failed. Normally close the * descriptor but handle the case where someone might * have dup()d or close()d it when we weren't looking. */ fdclose(fdp, fp, indx, td); /* * release our private reference */ fdrop(fp, td); goto out; } vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td); fp->f_flag |= FHASLOCK; } VOP_UNLOCK(vp, 0, td); fdrop(fp, td); mtx_unlock(&Giant); td->td_retval[0] = indx; return (0); bad: vput(vp); out: mtx_unlock(&Giant); return (error); } /* * Stat an (NFS) file handle. * * MP SAFE */ #ifndef _SYS_SYSPROTO_H_ struct fhstat_args { struct fhandle *u_fhp; struct stat *sb; }; #endif int fhstat(td, uap) struct thread *td; register struct fhstat_args /* { struct fhandle *u_fhp; struct stat *sb; } */ *uap; { struct stat sb; fhandle_t fh; struct mount *mp; struct vnode *vp; int error; error = suser(td); if (error) return (error); error = copyin(uap->u_fhp, &fh, sizeof(fhandle_t)); if (error) return (error); mtx_lock(&Giant); if ((mp = vfs_getvfs(&fh.fh_fsid)) == NULL) { mtx_unlock(&Giant); return (ESTALE); } if ((error = VFS_FHTOVP(mp, &fh.fh_fid, &vp))) { mtx_unlock(&Giant); return (error); } error = vn_stat(vp, &sb, td->td_ucred, NOCRED, td); vput(vp); mtx_unlock(&Giant); if (error) return (error); error = copyout(&sb, uap->sb, sizeof(sb)); return (error); } /* * Implement fstatfs() for (NFS) file handles. * * MP SAFE */ #ifndef _SYS_SYSPROTO_H_ struct fhstatfs_args { struct fhandle *u_fhp; struct statfs *buf; }; #endif int fhstatfs(td, uap) struct thread *td; struct fhstatfs_args /* { struct fhandle *u_fhp; struct statfs *buf; } */ *uap; { struct statfs sf; fhandle_t fh; int error; error = copyin(uap->u_fhp, &fh, sizeof(fhandle_t)); if (error) return (error); error = kern_fhstatfs(td, fh, &sf); if (error) return (error); return (copyout(&sf, uap->buf, sizeof(sf))); } int kern_fhstatfs(struct thread *td, fhandle_t fh, struct statfs *buf) { struct statfs *sp; struct mount *mp; struct vnode *vp; int error; error = suser(td); if (error) return (error); mtx_lock(&Giant); if ((mp = vfs_getvfs(&fh.fh_fsid)) == NULL) { mtx_unlock(&Giant); return (ESTALE); } error = VFS_FHTOVP(mp, &fh.fh_fid, &vp); if (error) { mtx_unlock(&Giant); return (error); } mp = vp->v_mount; sp = &mp->mnt_stat; vput(vp); error = prison_canseemount(td->td_ucred, mp); if (error) return (error); #ifdef MAC error = mac_check_mount_stat(td->td_ucred, mp); if (error) { mtx_unlock(&Giant); return (error); } #endif /* * Set these in case the underlying filesystem fails to do so. */ sp->f_version = STATFS_VERSION; sp->f_namemax = NAME_MAX; sp->f_flags = mp->mnt_flag & MNT_VISFLAGMASK; error = VFS_STATFS(mp, sp, td); mtx_unlock(&Giant); if (error) return (error); *buf = *sp; return (0); } /* * Syscall to push extended attribute configuration information into the * VFS. Accepts a path, which it converts to a mountpoint, as well as * a command (int cmd), and attribute name and misc data. For now, the * attribute name is left in userspace for consumption by the VFS_op. * It will probably be changed to be copied into sysspace by the * syscall in the future, once issues with various consumers of the * attribute code have raised their hands. * * Currently this is used only by UFS Extended Attributes. */ int extattrctl(td, uap) struct thread *td; struct extattrctl_args /* { const char *path; int cmd; const char *filename; int attrnamespace; const char *attrname; } */ *uap; { struct vnode *filename_vp; struct nameidata nd; struct mount *mp, *mp_writable; char attrname[EXTATTR_MAXNAMELEN]; int vfslocked, fnvfslocked, error; /* * uap->attrname is not always defined. We check again later when we * invoke the VFS call so as to pass in NULL there if needed. */ if (uap->attrname != NULL) { error = copyinstr(uap->attrname, attrname, EXTATTR_MAXNAMELEN, NULL); if (error) return (error); } vfslocked = fnvfslocked = 0; /* * uap->filename is not always defined. If it is, grab a vnode lock, * which VFS_EXTATTRCTL() will later release. */ filename_vp = NULL; if (uap->filename != NULL) { NDINIT(&nd, LOOKUP, MPSAFE | FOLLOW | LOCKLEAF, UIO_USERSPACE, uap->filename, td); error = namei(&nd); if (error) return (error); fnvfslocked = NDHASGIANT(&nd); filename_vp = nd.ni_vp; NDFREE(&nd, NDF_NO_VP_RELE | NDF_NO_VP_UNLOCK); } /* uap->path is always defined. */ NDINIT(&nd, LOOKUP, MPSAFE | FOLLOW, UIO_USERSPACE, uap->path, td); error = namei(&nd); if (error) { if (filename_vp != NULL) vput(filename_vp); goto out; } vfslocked = NDHASGIANT(&nd); mp = nd.ni_vp->v_mount; error = vn_start_write(nd.ni_vp, &mp_writable, V_WAIT | PCATCH); NDFREE(&nd, 0); if (error) { if (filename_vp != NULL) vput(filename_vp); goto out; } error = VFS_EXTATTRCTL(mp, uap->cmd, filename_vp, uap->attrnamespace, uap->attrname != NULL ? attrname : NULL, td); vn_finished_write(mp_writable); /* * VFS_EXTATTRCTL will have unlocked, but not de-ref'd, * filename_vp, so vrele it if it is defined. */ if (filename_vp != NULL) vrele(filename_vp); out: VFS_UNLOCK_GIANT(fnvfslocked); VFS_UNLOCK_GIANT(vfslocked); return (error); } /*- * Set a named extended attribute on a file or directory * * Arguments: unlocked vnode "vp", attribute namespace "attrnamespace", * kernelspace string pointer "attrname", userspace buffer * pointer "data", buffer length "nbytes", thread "td". * Returns: 0 on success, an error number otherwise * Locks: none * References: vp must be a valid reference for the duration of the call */ static int extattr_set_vp(struct vnode *vp, int attrnamespace, const char *attrname, void *data, size_t nbytes, struct thread *td) { struct mount *mp; struct uio auio; struct iovec aiov; ssize_t cnt; int error; VFS_ASSERT_GIANT(vp->v_mount); error = vn_start_write(vp, &mp, V_WAIT | PCATCH); if (error) return (error); VOP_LEASE(vp, td, td->td_ucred, LEASE_WRITE); vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td); aiov.iov_base = data; aiov.iov_len = nbytes; auio.uio_iov = &aiov; auio.uio_iovcnt = 1; auio.uio_offset = 0; if (nbytes > INT_MAX) { error = EINVAL; goto done; } auio.uio_resid = nbytes; auio.uio_rw = UIO_WRITE; auio.uio_segflg = UIO_USERSPACE; auio.uio_td = td; cnt = nbytes; #ifdef MAC error = mac_check_vnode_setextattr(td->td_ucred, vp, attrnamespace, attrname, &auio); if (error) goto done; #endif error = VOP_SETEXTATTR(vp, attrnamespace, attrname, &auio, td->td_ucred, td); cnt -= auio.uio_resid; td->td_retval[0] = cnt; done: VOP_UNLOCK(vp, 0, td); vn_finished_write(mp); return (error); } int extattr_set_fd(td, uap) struct thread *td; struct extattr_set_fd_args /* { int fd; int attrnamespace; const char *attrname; void *data; size_t nbytes; } */ *uap; { struct file *fp; char attrname[EXTATTR_MAXNAMELEN]; int vfslocked, error; error = copyinstr(uap->attrname, attrname, EXTATTR_MAXNAMELEN, NULL); if (error) return (error); error = getvnode(td->td_proc->p_fd, uap->fd, &fp); if (error) return (error); vfslocked = VFS_LOCK_GIANT(fp->f_vnode->v_mount); error = extattr_set_vp(fp->f_vnode, uap->attrnamespace, attrname, uap->data, uap->nbytes, td); fdrop(fp, td); VFS_UNLOCK_GIANT(vfslocked); return (error); } int extattr_set_file(td, uap) struct thread *td; struct extattr_set_file_args /* { const char *path; int attrnamespace; const char *attrname; void *data; size_t nbytes; } */ *uap; { struct nameidata nd; char attrname[EXTATTR_MAXNAMELEN]; int vfslocked, error; error = copyinstr(uap->attrname, attrname, EXTATTR_MAXNAMELEN, NULL); if (error) return (error); NDINIT(&nd, LOOKUP, MPSAFE | FOLLOW, UIO_USERSPACE, uap->path, td); error = namei(&nd); if (error) return (error); NDFREE(&nd, NDF_ONLY_PNBUF); vfslocked = NDHASGIANT(&nd); error = extattr_set_vp(nd.ni_vp, uap->attrnamespace, attrname, uap->data, uap->nbytes, td); vrele(nd.ni_vp); VFS_UNLOCK_GIANT(vfslocked); return (error); } int extattr_set_link(td, uap) struct thread *td; struct extattr_set_link_args /* { const char *path; int attrnamespace; const char *attrname; void *data; size_t nbytes; } */ *uap; { struct nameidata nd; char attrname[EXTATTR_MAXNAMELEN]; int vfslocked, error; error = copyinstr(uap->attrname, attrname, EXTATTR_MAXNAMELEN, NULL); if (error) return (error); NDINIT(&nd, LOOKUP, MPSAFE | NOFOLLOW, UIO_USERSPACE, uap->path, td); error = namei(&nd); if (error) return (error); NDFREE(&nd, NDF_ONLY_PNBUF); vfslocked = NDHASGIANT(&nd); error = extattr_set_vp(nd.ni_vp, uap->attrnamespace, attrname, uap->data, uap->nbytes, td); vrele(nd.ni_vp); VFS_UNLOCK_GIANT(vfslocked); return (error); } /*- * Get a named extended attribute on a file or directory * * Arguments: unlocked vnode "vp", attribute namespace "attrnamespace", * kernelspace string pointer "attrname", userspace buffer * pointer "data", buffer length "nbytes", thread "td". * Returns: 0 on success, an error number otherwise * Locks: none * References: vp must be a valid reference for the duration of the call */ static int extattr_get_vp(struct vnode *vp, int attrnamespace, const char *attrname, void *data, size_t nbytes, struct thread *td) { struct uio auio, *auiop; struct iovec aiov; ssize_t cnt; size_t size, *sizep; int error; VFS_ASSERT_GIANT(vp->v_mount); VOP_LEASE(vp, td, td->td_ucred, LEASE_READ); vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td); /* * Slightly unusual semantics: if the user provides a NULL data * pointer, they don't want to receive the data, just the * maximum read length. */ auiop = NULL; sizep = NULL; cnt = 0; if (data != NULL) { aiov.iov_base = data; aiov.iov_len = nbytes; auio.uio_iov = &aiov; auio.uio_iovcnt = 1; auio.uio_offset = 0; if (nbytes > INT_MAX) { error = EINVAL; goto done; } auio.uio_resid = nbytes; auio.uio_rw = UIO_READ; auio.uio_segflg = UIO_USERSPACE; auio.uio_td = td; auiop = &auio; cnt = nbytes; } else sizep = &size; #ifdef MAC error = mac_check_vnode_getextattr(td->td_ucred, vp, attrnamespace, attrname, &auio); if (error) goto done; #endif error = VOP_GETEXTATTR(vp, attrnamespace, attrname, auiop, sizep, td->td_ucred, td); if (auiop != NULL) { cnt -= auio.uio_resid; td->td_retval[0] = cnt; } else td->td_retval[0] = size; done: VOP_UNLOCK(vp, 0, td); return (error); } int extattr_get_fd(td, uap) struct thread *td; struct extattr_get_fd_args /* { int fd; int attrnamespace; const char *attrname; void *data; size_t nbytes; } */ *uap; { struct file *fp; char attrname[EXTATTR_MAXNAMELEN]; int vfslocked, error; error = copyinstr(uap->attrname, attrname, EXTATTR_MAXNAMELEN, NULL); if (error) return (error); error = getvnode(td->td_proc->p_fd, uap->fd, &fp); if (error) return (error); vfslocked = VFS_LOCK_GIANT(fp->f_vnode->v_mount); error = extattr_get_vp(fp->f_vnode, uap->attrnamespace, attrname, uap->data, uap->nbytes, td); fdrop(fp, td); VFS_UNLOCK_GIANT(vfslocked); return (error); } int extattr_get_file(td, uap) struct thread *td; struct extattr_get_file_args /* { const char *path; int attrnamespace; const char *attrname; void *data; size_t nbytes; } */ *uap; { struct nameidata nd; char attrname[EXTATTR_MAXNAMELEN]; int vfslocked, error; error = copyinstr(uap->attrname, attrname, EXTATTR_MAXNAMELEN, NULL); if (error) return (error); NDINIT(&nd, LOOKUP, MPSAFE | FOLLOW, UIO_USERSPACE, uap->path, td); error = namei(&nd); if (error) return (error); NDFREE(&nd, NDF_ONLY_PNBUF); vfslocked = NDHASGIANT(&nd); error = extattr_get_vp(nd.ni_vp, uap->attrnamespace, attrname, uap->data, uap->nbytes, td); vrele(nd.ni_vp); VFS_UNLOCK_GIANT(vfslocked); return (error); } int extattr_get_link(td, uap) struct thread *td; struct extattr_get_link_args /* { const char *path; int attrnamespace; const char *attrname; void *data; size_t nbytes; } */ *uap; { struct nameidata nd; char attrname[EXTATTR_MAXNAMELEN]; int vfslocked, error; error = copyinstr(uap->attrname, attrname, EXTATTR_MAXNAMELEN, NULL); if (error) return (error); NDINIT(&nd, LOOKUP, MPSAFE | NOFOLLOW, UIO_USERSPACE, uap->path, td); error = namei(&nd); if (error) return (error); NDFREE(&nd, NDF_ONLY_PNBUF); vfslocked = NDHASGIANT(&nd); error = extattr_get_vp(nd.ni_vp, uap->attrnamespace, attrname, uap->data, uap->nbytes, td); vrele(nd.ni_vp); VFS_UNLOCK_GIANT(vfslocked); return (error); } /* * extattr_delete_vp(): Delete a named extended attribute on a file or * directory * * Arguments: unlocked vnode "vp", attribute namespace "attrnamespace", * kernelspace string pointer "attrname", proc "p" * Returns: 0 on success, an error number otherwise * Locks: none * References: vp must be a valid reference for the duration of the call */ static int extattr_delete_vp(struct vnode *vp, int attrnamespace, const char *attrname, struct thread *td) { struct mount *mp; int error; VFS_ASSERT_GIANT(vp->v_mount); error = vn_start_write(vp, &mp, V_WAIT | PCATCH); if (error) return (error); VOP_LEASE(vp, td, td->td_ucred, LEASE_WRITE); vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td); #ifdef MAC error = mac_check_vnode_deleteextattr(td->td_ucred, vp, attrnamespace, attrname); if (error) goto done; #endif error = VOP_DELETEEXTATTR(vp, attrnamespace, attrname, td->td_ucred, td); if (error == EOPNOTSUPP) error = VOP_SETEXTATTR(vp, attrnamespace, attrname, NULL, td->td_ucred, td); #ifdef MAC done: #endif VOP_UNLOCK(vp, 0, td); vn_finished_write(mp); return (error); } int extattr_delete_fd(td, uap) struct thread *td; struct extattr_delete_fd_args /* { int fd; int attrnamespace; const char *attrname; } */ *uap; { struct file *fp; char attrname[EXTATTR_MAXNAMELEN]; int vfslocked, error; error = copyinstr(uap->attrname, attrname, EXTATTR_MAXNAMELEN, NULL); if (error) return (error); error = getvnode(td->td_proc->p_fd, uap->fd, &fp); if (error) return (error); vfslocked = VFS_LOCK_GIANT(fp->f_vnode->v_mount); error = extattr_delete_vp(fp->f_vnode, uap->attrnamespace, attrname, td); fdrop(fp, td); VFS_UNLOCK_GIANT(vfslocked); return (error); } int extattr_delete_file(td, uap) struct thread *td; struct extattr_delete_file_args /* { const char *path; int attrnamespace; const char *attrname; } */ *uap; { struct nameidata nd; char attrname[EXTATTR_MAXNAMELEN]; int vfslocked, error; error = copyinstr(uap->attrname, attrname, EXTATTR_MAXNAMELEN, NULL); if (error) return(error); NDINIT(&nd, LOOKUP, MPSAFE | FOLLOW, UIO_USERSPACE, uap->path, td); error = namei(&nd); if (error) return(error); NDFREE(&nd, NDF_ONLY_PNBUF); vfslocked = NDHASGIANT(&nd); error = extattr_delete_vp(nd.ni_vp, uap->attrnamespace, attrname, td); vrele(nd.ni_vp); VFS_UNLOCK_GIANT(vfslocked); return(error); } int extattr_delete_link(td, uap) struct thread *td; struct extattr_delete_link_args /* { const char *path; int attrnamespace; const char *attrname; } */ *uap; { struct nameidata nd; char attrname[EXTATTR_MAXNAMELEN]; int vfslocked, error; error = copyinstr(uap->attrname, attrname, EXTATTR_MAXNAMELEN, NULL); if (error) return(error); NDINIT(&nd, LOOKUP, MPSAFE | NOFOLLOW, UIO_USERSPACE, uap->path, td); error = namei(&nd); if (error) return(error); NDFREE(&nd, NDF_ONLY_PNBUF); vfslocked = NDHASGIANT(&nd); error = extattr_delete_vp(nd.ni_vp, uap->attrnamespace, attrname, td); vrele(nd.ni_vp); VFS_UNLOCK_GIANT(vfslocked); return(error); } /*- * Retrieve a list of extended attributes on a file or directory. * * Arguments: unlocked vnode "vp", attribute namespace 'attrnamespace", * userspace buffer pointer "data", buffer length "nbytes", * thread "td". * Returns: 0 on success, an error number otherwise * Locks: none * References: vp must be a valid reference for the duration of the call */ static int extattr_list_vp(struct vnode *vp, int attrnamespace, void *data, size_t nbytes, struct thread *td) { struct uio auio, *auiop; size_t size, *sizep; struct iovec aiov; ssize_t cnt; int error; VFS_ASSERT_GIANT(vp->v_mount); VOP_LEASE(vp, td, td->td_ucred, LEASE_READ); vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td); auiop = NULL; sizep = NULL; cnt = 0; if (data != NULL) { aiov.iov_base = data; aiov.iov_len = nbytes; auio.uio_iov = &aiov; auio.uio_iovcnt = 1; auio.uio_offset = 0; if (nbytes > INT_MAX) { error = EINVAL; goto done; } auio.uio_resid = nbytes; auio.uio_rw = UIO_READ; auio.uio_segflg = UIO_USERSPACE; auio.uio_td = td; auiop = &auio; cnt = nbytes; } else sizep = &size; #ifdef MAC error = mac_check_vnode_listextattr(td->td_ucred, vp, attrnamespace); if (error) goto done; #endif error = VOP_LISTEXTATTR(vp, attrnamespace, auiop, sizep, td->td_ucred, td); if (auiop != NULL) { cnt -= auio.uio_resid; td->td_retval[0] = cnt; } else td->td_retval[0] = size; done: VOP_UNLOCK(vp, 0, td); return (error); } int extattr_list_fd(td, uap) struct thread *td; struct extattr_list_fd_args /* { int fd; int attrnamespace; void *data; size_t nbytes; } */ *uap; { struct file *fp; int vfslocked, error; error = getvnode(td->td_proc->p_fd, uap->fd, &fp); if (error) return (error); vfslocked = VFS_LOCK_GIANT(fp->f_vnode->v_mount); error = extattr_list_vp(fp->f_vnode, uap->attrnamespace, uap->data, uap->nbytes, td); fdrop(fp, td); VFS_UNLOCK_GIANT(vfslocked); return (error); } int extattr_list_file(td, uap) struct thread*td; struct extattr_list_file_args /* { const char *path; int attrnamespace; void *data; size_t nbytes; } */ *uap; { struct nameidata nd; int vfslocked, error; NDINIT(&nd, LOOKUP, MPSAFE | FOLLOW, UIO_USERSPACE, uap->path, td); error = namei(&nd); if (error) return (error); NDFREE(&nd, NDF_ONLY_PNBUF); vfslocked = NDHASGIANT(&nd); error = extattr_list_vp(nd.ni_vp, uap->attrnamespace, uap->data, uap->nbytes, td); vrele(nd.ni_vp); VFS_UNLOCK_GIANT(vfslocked); return (error); } int extattr_list_link(td, uap) struct thread*td; struct extattr_list_link_args /* { const char *path; int attrnamespace; void *data; size_t nbytes; } */ *uap; { struct nameidata nd; int vfslocked, error; NDINIT(&nd, LOOKUP, MPSAFE | NOFOLLOW, UIO_USERSPACE, uap->path, td); error = namei(&nd); if (error) return (error); NDFREE(&nd, NDF_ONLY_PNBUF); vfslocked = NDHASGIANT(&nd); error = extattr_list_vp(nd.ni_vp, uap->attrnamespace, uap->data, uap->nbytes, td); vrele(nd.ni_vp); VFS_UNLOCK_GIANT(vfslocked); return (error); } Index: head/sys/kern/vfs_lookup.c =================================================================== --- head/sys/kern/vfs_lookup.c (revision 155384) +++ head/sys/kern/vfs_lookup.c (revision 155385) @@ -1,1065 +1,1065 @@ /*- * Copyright (c) 1982, 1986, 1989, 1993 * The Regents of the University of California. All rights reserved. * (c) UNIX System Laboratories, Inc. * All or some portions of this file are derived from material licensed * to the University of California by American Telephone and Telegraph * Co. or Unix System Laboratories, Inc. and are reproduced herein with * the permission of UNIX System Laboratories, Inc. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 4. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * @(#)vfs_lookup.c 8.4 (Berkeley) 2/16/94 */ #include __FBSDID("$FreeBSD$"); #include "opt_ktrace.h" #include "opt_mac.h" #include "opt_vfs.h" #include #include #include #include #include #include #include #include #include #include #include #include #include #ifdef KTRACE #include #endif #include #include #define NAMEI_DIAGNOSTIC 1 #undef NAMEI_DIAGNOSTIC /* * Allocation zone for namei */ uma_zone_t namei_zone; static void nameiinit(void *dummy __unused) { namei_zone = uma_zcreate("NAMEI", MAXPATHLEN, NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, 0); } SYSINIT(vfs, SI_SUB_VFS, SI_ORDER_SECOND, nameiinit, NULL) #ifdef LOOKUP_SHARED static int lookup_shared = 1; #else static int lookup_shared = 0; #endif SYSCTL_INT(_vfs, OID_AUTO, lookup_shared, CTLFLAG_RW, &lookup_shared, 0, "Enables/Disables shared locks for path name translation"); /* * Convert a pathname into a pointer to a locked inode. * * The FOLLOW flag is set when symbolic links are to be followed * when they occur at the end of the name translation process. * Symbolic links are always followed for all other pathname * components other than the last. * * The segflg defines whether the name is to be copied from user * space or kernel space. * * Overall outline of namei: * * copy in name * get starting directory * while (!done && !error) { * call lookup to search path. * if symbolic link, massage name in buffer and continue * } */ int namei(ndp) register struct nameidata *ndp; { register struct filedesc *fdp; /* pointer to file descriptor state */ register char *cp; /* pointer into pathname argument */ register struct vnode *dp; /* the directory we are searching */ struct iovec aiov; /* uio for reading symbolic links */ struct uio auio; int error, linklen; struct componentname *cnp = &ndp->ni_cnd; struct thread *td = cnp->cn_thread; struct proc *p = td->td_proc; int vfslocked; KASSERT((cnp->cn_flags & MPSAFE) != 0 || mtx_owned(&Giant) != 0, ("NOT MPSAFE and Giant not held")); ndp->ni_cnd.cn_cred = ndp->ni_cnd.cn_thread->td_ucred; KASSERT(cnp->cn_cred && p, ("namei: bad cred/proc")); KASSERT((cnp->cn_nameiop & (~OPMASK)) == 0, ("namei: nameiop contaminated with flags")); KASSERT((cnp->cn_flags & OPMASK) == 0, ("namei: flags contaminated with nameiops")); if (!lookup_shared) cnp->cn_flags &= ~LOCKSHARED; fdp = p->p_fd; /* * Get a buffer for the name to be translated, and copy the * name into the buffer. */ if ((cnp->cn_flags & HASBUF) == 0) cnp->cn_pnbuf = uma_zalloc(namei_zone, M_WAITOK); if (ndp->ni_segflg == UIO_SYSSPACE) error = copystr(ndp->ni_dirp, cnp->cn_pnbuf, MAXPATHLEN, (size_t *)&ndp->ni_pathlen); else error = copyinstr(ndp->ni_dirp, cnp->cn_pnbuf, MAXPATHLEN, (size_t *)&ndp->ni_pathlen); /* If we are auditing the kernel pathname, save the user pathname. */ if (cnp->cn_flags & AUDITVNODE1) AUDIT_ARG(upath, td, cnp->cn_pnbuf, ARG_UPATH1); if (cnp->cn_flags & AUDITVNODE2) AUDIT_ARG(upath, td, cnp->cn_pnbuf, ARG_UPATH2); /* * Don't allow empty pathnames. */ if (!error && *cnp->cn_pnbuf == '\0') error = ENOENT; if (error) { uma_zfree(namei_zone, cnp->cn_pnbuf); #ifdef DIAGNOSTIC cnp->cn_pnbuf = NULL; cnp->cn_nameptr = NULL; #endif ndp->ni_vp = NULL; return (error); } ndp->ni_loopcnt = 0; #ifdef KTRACE if (KTRPOINT(td, KTR_NAMEI)) { KASSERT(cnp->cn_thread == curthread, ("namei not using curthread")); ktrnamei(cnp->cn_pnbuf); } #endif /* * Get starting point for the translation. */ FILEDESC_LOCK(fdp); ndp->ni_rootdir = fdp->fd_rdir; ndp->ni_topdir = fdp->fd_jdir; dp = fdp->fd_cdir; vfslocked = VFS_LOCK_GIANT(dp->v_mount); VREF(dp); FILEDESC_UNLOCK(fdp); for (;;) { /* * Check if root directory should replace current directory. * Done at start of translation and after symbolic link. */ cnp->cn_nameptr = cnp->cn_pnbuf; if (*(cnp->cn_nameptr) == '/') { vrele(dp); VFS_UNLOCK_GIANT(vfslocked); while (*(cnp->cn_nameptr) == '/') { cnp->cn_nameptr++; ndp->ni_pathlen--; } dp = ndp->ni_rootdir; vfslocked = VFS_LOCK_GIANT(dp->v_mount); VREF(dp); } if (vfslocked) ndp->ni_cnd.cn_flags |= GIANTHELD; ndp->ni_startdir = dp; error = lookup(ndp); if (error) { uma_zfree(namei_zone, cnp->cn_pnbuf); #ifdef DIAGNOSTIC cnp->cn_pnbuf = NULL; cnp->cn_nameptr = NULL; #endif return (error); } vfslocked = (ndp->ni_cnd.cn_flags & GIANTHELD) != 0; ndp->ni_cnd.cn_flags &= ~GIANTHELD; /* * Check for symbolic link */ if ((cnp->cn_flags & ISSYMLINK) == 0) { if ((cnp->cn_flags & (SAVENAME | SAVESTART)) == 0) { uma_zfree(namei_zone, cnp->cn_pnbuf); #ifdef DIAGNOSTIC cnp->cn_pnbuf = NULL; cnp->cn_nameptr = NULL; #endif } else cnp->cn_flags |= HASBUF; if ((cnp->cn_flags & MPSAFE) == 0) { VFS_UNLOCK_GIANT(vfslocked); } else if (vfslocked) ndp->ni_cnd.cn_flags |= GIANTHELD; return (0); } if (ndp->ni_loopcnt++ >= MAXSYMLINKS) { error = ELOOP; break; } #ifdef MAC if ((cnp->cn_flags & NOMACCHECK) == 0) { error = mac_check_vnode_readlink(td->td_ucred, ndp->ni_vp); if (error) break; } #endif if (ndp->ni_pathlen > 1) cp = uma_zalloc(namei_zone, M_WAITOK); else cp = cnp->cn_pnbuf; aiov.iov_base = cp; aiov.iov_len = MAXPATHLEN; auio.uio_iov = &aiov; auio.uio_iovcnt = 1; auio.uio_offset = 0; auio.uio_rw = UIO_READ; auio.uio_segflg = UIO_SYSSPACE; auio.uio_td = (struct thread *)0; auio.uio_resid = MAXPATHLEN; error = VOP_READLINK(ndp->ni_vp, &auio, cnp->cn_cred); if (error) { if (ndp->ni_pathlen > 1) uma_zfree(namei_zone, cp); break; } linklen = MAXPATHLEN - auio.uio_resid; if (linklen == 0) { if (ndp->ni_pathlen > 1) uma_zfree(namei_zone, cp); error = ENOENT; break; } if (linklen + ndp->ni_pathlen >= MAXPATHLEN) { if (ndp->ni_pathlen > 1) uma_zfree(namei_zone, cp); error = ENAMETOOLONG; break; } if (ndp->ni_pathlen > 1) { bcopy(ndp->ni_next, cp + linklen, ndp->ni_pathlen); uma_zfree(namei_zone, cnp->cn_pnbuf); cnp->cn_pnbuf = cp; } else cnp->cn_pnbuf[linklen] = '\0'; ndp->ni_pathlen += linklen; vput(ndp->ni_vp); dp = ndp->ni_dvp; } uma_zfree(namei_zone, cnp->cn_pnbuf); #ifdef DIAGNOSTIC cnp->cn_pnbuf = NULL; cnp->cn_nameptr = NULL; #endif vput(ndp->ni_vp); ndp->ni_vp = NULL; vrele(ndp->ni_dvp); VFS_UNLOCK_GIANT(vfslocked); return (error); } /* * Search a pathname. * This is a very central and rather complicated routine. * * The pathname is pointed to by ni_ptr and is of length ni_pathlen. * The starting directory is taken from ni_startdir. The pathname is * descended until done, or a symbolic link is encountered. The variable * ni_more is clear if the path is completed; it is set to one if a * symbolic link needing interpretation is encountered. * * The flag argument is LOOKUP, CREATE, RENAME, or DELETE depending on * whether the name is to be looked up, created, renamed, or deleted. * When CREATE, RENAME, or DELETE is specified, information usable in * creating, renaming, or deleting a directory entry may be calculated. * If flag has LOCKPARENT or'ed into it, the parent directory is returned * locked. If flag has WANTPARENT or'ed into it, the parent directory is * returned unlocked. Otherwise the parent directory is not returned. If * the target of the pathname exists and LOCKLEAF is or'ed into the flag * the target is returned locked, otherwise it is returned unlocked. * When creating or renaming and LOCKPARENT is specified, the target may not * be ".". When deleting and LOCKPARENT is specified, the target may be ".". * * Overall outline of lookup: * * dirloop: * identify next component of name at ndp->ni_ptr * handle degenerate case where name is null string * if .. and crossing mount points and on mounted filesys, find parent * call VOP_LOOKUP routine for next component name * directory vnode returned in ni_dvp, unlocked unless LOCKPARENT set * component vnode returned in ni_vp (if it exists), locked. * if result vnode is mounted on and crossing mount points, * find mounted on vnode * if more components of name, do next level at dirloop * return the answer in ni_vp, locked if LOCKLEAF set * if LOCKPARENT set, return locked parent in ni_dvp * if WANTPARENT set, return unlocked parent in ni_dvp */ int lookup(ndp) register struct nameidata *ndp; { register char *cp; /* pointer into pathname argument */ register struct vnode *dp = 0; /* the directory we are searching */ struct vnode *tdp; /* saved dp */ struct mount *mp; /* mount table entry */ int docache; /* == 0 do not cache last component */ int wantparent; /* 1 => wantparent or lockparent flag */ int rdonly; /* lookup read-only flag bit */ int trailing_slash; int error = 0; int dpunlocked = 0; /* dp has already been unlocked */ struct componentname *cnp = &ndp->ni_cnd; struct thread *td = cnp->cn_thread; int vfslocked; int tvfslocked; int dvfslocked; /* * Setup: break out flag bits into variables. */ vfslocked = (ndp->ni_cnd.cn_flags & GIANTHELD) != 0; dvfslocked = 0; ndp->ni_cnd.cn_flags &= ~GIANTHELD; wantparent = cnp->cn_flags & (LOCKPARENT | WANTPARENT); KASSERT(cnp->cn_nameiop == LOOKUP || wantparent, ("CREATE, DELETE, RENAME require LOCKPARENT or WANTPARENT.")); docache = (cnp->cn_flags & NOCACHE) ^ NOCACHE; if (cnp->cn_nameiop == DELETE || (wantparent && cnp->cn_nameiop != CREATE && cnp->cn_nameiop != LOOKUP)) docache = 0; rdonly = cnp->cn_flags & RDONLY; cnp->cn_flags &= ~ISSYMLINK; ndp->ni_dvp = NULL; /* * We use shared locks until we hit the parent of the last cn then * we adjust based on the requesting flags. */ if (lookup_shared) cnp->cn_lkflags = LK_SHARED; else cnp->cn_lkflags = LK_EXCLUSIVE; dp = ndp->ni_startdir; ndp->ni_startdir = NULLVP; vn_lock(dp, cnp->cn_lkflags | LK_RETRY, td); dirloop: /* * Search a new directory. * * The last component of the filename is left accessible via * cnp->cn_nameptr for callers that need the name. Callers needing * the name set the SAVENAME flag. When done, they assume * responsibility for freeing the pathname buffer. */ cnp->cn_consume = 0; for (cp = cnp->cn_nameptr; *cp != 0 && *cp != '/'; cp++) continue; cnp->cn_namelen = cp - cnp->cn_nameptr; if (cnp->cn_namelen > NAME_MAX) { error = ENAMETOOLONG; goto bad; } #ifdef NAMEI_DIAGNOSTIC { char c = *cp; *cp = '\0'; printf("{%s}: ", cnp->cn_nameptr); *cp = c; } #endif ndp->ni_pathlen -= cnp->cn_namelen; ndp->ni_next = cp; /* * Replace multiple slashes by a single slash and trailing slashes * by a null. This must be done before VOP_LOOKUP() because some * fs's don't know about trailing slashes. Remember if there were * trailing slashes to handle symlinks, existing non-directories * and non-existing files that won't be directories specially later. */ trailing_slash = 0; while (*cp == '/' && (cp[1] == '/' || cp[1] == '\0')) { cp++; ndp->ni_pathlen--; if (*cp == '\0') { trailing_slash = 1; *ndp->ni_next = '\0'; /* XXX for direnter() ... */ } } ndp->ni_next = cp; cnp->cn_flags |= MAKEENTRY; if (*cp == '\0' && docache == 0) cnp->cn_flags &= ~MAKEENTRY; if (cnp->cn_namelen == 2 && cnp->cn_nameptr[1] == '.' && cnp->cn_nameptr[0] == '.') cnp->cn_flags |= ISDOTDOT; else cnp->cn_flags &= ~ISDOTDOT; if (*ndp->ni_next == 0) cnp->cn_flags |= ISLASTCN; else cnp->cn_flags &= ~ISLASTCN; /* * Check for degenerate name (e.g. / or "") * which is a way of talking about a directory, * e.g. like "/." or ".". */ if (cnp->cn_nameptr[0] == '\0') { if (dp->v_type != VDIR) { error = ENOTDIR; goto bad; } if (cnp->cn_nameiop != LOOKUP) { error = EISDIR; goto bad; } if (wantparent) { ndp->ni_dvp = dp; VREF(dp); } ndp->ni_vp = dp; if (cnp->cn_flags & AUDITVNODE1) AUDIT_ARG(vnode, dp, ARG_VNODE1); else if (cnp->cn_flags & AUDITVNODE2) AUDIT_ARG(vnode, dp, ARG_VNODE2); if (!(cnp->cn_flags & (LOCKPARENT | LOCKLEAF))) VOP_UNLOCK(dp, 0, td); /* XXX This should probably move to the top of function. */ if (cnp->cn_flags & SAVESTART) panic("lookup: SAVESTART"); goto success; } /* * Handle "..": four special cases. * 1. Return an error if this is the last component of * the name and the operation is DELETE or RENAME. * 2. If at root directory (e.g. after chroot) * or at absolute root directory * then ignore it so can't get out. * 3. If this vnode is the root of a mounted * filesystem, then replace it with the * vnode which was mounted on so we take the * .. in the other filesystem. * 4. If the vnode is the top directory of * the jail or chroot, don't let them out. */ if (cnp->cn_flags & ISDOTDOT) { if ((cnp->cn_flags & ISLASTCN) != 0 && (cnp->cn_nameiop == DELETE || cnp->cn_nameiop == RENAME)) { error = EINVAL; goto bad; } for (;;) { if (dp == ndp->ni_rootdir || dp == ndp->ni_topdir || dp == rootvnode) { ndp->ni_dvp = dp; ndp->ni_vp = dp; VREF(dp); goto nextname; } if ((dp->v_vflag & VV_ROOT) == 0 || (cnp->cn_flags & NOCROSSMOUNT)) break; - if (dp->v_mount == NULL) { /* forced unmount */ + if (dp->v_iflag & VI_DOOMED) { /* forced unmount */ error = EBADF; goto bad; } tdp = dp; dp = dp->v_mount->mnt_vnodecovered; tvfslocked = vfslocked; vfslocked = VFS_LOCK_GIANT(dp->v_mount); VREF(dp); vput(tdp); VFS_UNLOCK_GIANT(tvfslocked); vn_lock(dp, cnp->cn_lkflags | LK_RETRY, td); } } /* * We now have a segment name to search for, and a directory to search. */ unionlookup: #ifdef MAC if ((cnp->cn_flags & NOMACCHECK) == 0) { error = mac_check_vnode_lookup(td->td_ucred, dp, cnp); if (error) goto bad; } #endif ndp->ni_dvp = dp; ndp->ni_vp = NULL; ASSERT_VOP_LOCKED(dp, "lookup"); /* * If we have a shared lock we may need to upgrade the lock for the * last operation. */ if (VOP_ISLOCKED(dp, td) == LK_SHARED && (cnp->cn_flags & ISLASTCN) && (cnp->cn_flags & LOCKPARENT)) vn_lock(dp, LK_UPGRADE|LK_RETRY, td); /* * If we're looking up the last component and we need an exclusive * lock, adjust our lkflags. */ if ((cnp->cn_flags & (ISLASTCN|LOCKSHARED|LOCKLEAF)) == (ISLASTCN|LOCKLEAF)) cnp->cn_lkflags = LK_EXCLUSIVE; #ifdef NAMEI_DIAGNOSTIC vprint("lookup in", dp); #endif if ((error = VOP_LOOKUP(dp, &ndp->ni_vp, cnp)) != 0) { KASSERT(ndp->ni_vp == NULL, ("leaf should be empty")); #ifdef NAMEI_DIAGNOSTIC printf("not found\n"); #endif if ((error == ENOENT) && (dp->v_vflag & VV_ROOT) && (dp->v_mount != NULL) && (dp->v_mount->mnt_flag & MNT_UNION)) { tdp = dp; dp = dp->v_mount->mnt_vnodecovered; tvfslocked = vfslocked; vfslocked = VFS_LOCK_GIANT(dp->v_mount); VREF(dp); vput(tdp); VFS_UNLOCK_GIANT(tvfslocked); vn_lock(dp, cnp->cn_lkflags | LK_RETRY, td); goto unionlookup; } if (error != EJUSTRETURN) goto bad; /* * If creating and at end of pathname, then can consider * allowing file to be created. */ if (rdonly) { error = EROFS; goto bad; } if (*cp == '\0' && trailing_slash && !(cnp->cn_flags & WILLBEDIR)) { error = ENOENT; goto bad; } if ((cnp->cn_flags & LOCKPARENT) == 0) VOP_UNLOCK(dp, 0, td); /* * This is a temporary assert to make sure I know what the * behavior here was. */ KASSERT((cnp->cn_flags & (WANTPARENT|LOCKPARENT)) != 0, ("lookup: Unhandled case.")); /* * We return with ni_vp NULL to indicate that the entry * doesn't currently exist, leaving a pointer to the * (possibly locked) directory inode in ndp->ni_dvp. */ if (cnp->cn_flags & SAVESTART) { ndp->ni_startdir = ndp->ni_dvp; VREF(ndp->ni_startdir); } goto success; } #ifdef NAMEI_DIAGNOSTIC printf("found\n"); #endif /* * Take into account any additional components consumed by * the underlying filesystem. */ if (cnp->cn_consume > 0) { cnp->cn_nameptr += cnp->cn_consume; ndp->ni_next += cnp->cn_consume; ndp->ni_pathlen -= cnp->cn_consume; cnp->cn_consume = 0; } dp = ndp->ni_vp; /* * Check to see if the vnode has been mounted on; * if so find the root of the mounted filesystem. */ while (dp->v_type == VDIR && (mp = dp->v_mountedhere) && (cnp->cn_flags & NOCROSSMOUNT) == 0) { KASSERT(dp != ndp->ni_dvp, ("XXX")); if (vfs_busy(mp, 0, 0, td)) continue; vput(dp); dvfslocked = vfslocked; vfslocked = VFS_LOCK_GIANT(mp); VOP_UNLOCK(ndp->ni_dvp, 0, td); error = VFS_ROOT(mp, cnp->cn_lkflags, &tdp, td); VOP_LOCK(ndp->ni_dvp, cnp->cn_lkflags | LK_RETRY, td); vfs_unbusy(mp, td); if (error) { dpunlocked = 1; goto bad2; } ndp->ni_vp = dp = tdp; } /* * Check for symbolic link */ if ((dp->v_type == VLNK) && ((cnp->cn_flags & FOLLOW) || trailing_slash || *ndp->ni_next == '/')) { cnp->cn_flags |= ISSYMLINK; - if (dp->v_mount == NULL) { + if (dp->v_iflag & VI_DOOMED) { /* We can't know whether the directory was mounted with * NOSYMFOLLOW, so we can't follow safely. */ error = EBADF; goto bad2; } if (dp->v_mount->mnt_flag & MNT_NOSYMFOLLOW) { error = EACCES; goto bad2; } /* * Symlink code always expects an unlocked dvp. */ if (ndp->ni_dvp != ndp->ni_vp) VOP_UNLOCK(ndp->ni_dvp, 0, td); goto success; } /* * Check for bogus trailing slashes. */ if (trailing_slash && dp->v_type != VDIR) { error = ENOTDIR; goto bad2; } nextname: /* * Not a symbolic link. If more pathname, * continue at next component, else return. */ KASSERT((cnp->cn_flags & ISLASTCN) || *ndp->ni_next == '/', ("lookup: invalid path state.")); if (*ndp->ni_next == '/') { cnp->cn_nameptr = ndp->ni_next; while (*cnp->cn_nameptr == '/') { cnp->cn_nameptr++; ndp->ni_pathlen--; } if (ndp->ni_dvp != dp) vput(ndp->ni_dvp); else vrele(ndp->ni_dvp); VFS_UNLOCK_GIANT(dvfslocked); dvfslocked = 0; goto dirloop; } /* * Disallow directory write attempts on read-only filesystems. */ if (rdonly && (cnp->cn_nameiop == DELETE || cnp->cn_nameiop == RENAME)) { error = EROFS; goto bad2; } if (cnp->cn_flags & SAVESTART) { ndp->ni_startdir = ndp->ni_dvp; VREF(ndp->ni_startdir); } if (!wantparent) { if (ndp->ni_dvp != dp) vput(ndp->ni_dvp); else vrele(ndp->ni_dvp); VFS_UNLOCK_GIANT(dvfslocked); dvfslocked = 0; } else if ((cnp->cn_flags & LOCKPARENT) == 0 && ndp->ni_dvp != dp) VOP_UNLOCK(ndp->ni_dvp, 0, td); if (cnp->cn_flags & AUDITVNODE1) AUDIT_ARG(vnode, dp, ARG_VNODE1); else if (cnp->cn_flags & AUDITVNODE2) AUDIT_ARG(vnode, dp, ARG_VNODE2); if ((cnp->cn_flags & LOCKLEAF) == 0) VOP_UNLOCK(dp, 0, td); success: if (vfslocked && dvfslocked) VFS_UNLOCK_GIANT(dvfslocked); /* Only need one */ if (vfslocked || dvfslocked) ndp->ni_cnd.cn_flags |= GIANTHELD; return (0); bad2: if (dp != ndp->ni_dvp) vput(ndp->ni_dvp); else vrele(ndp->ni_dvp); bad: if (!dpunlocked) vput(dp); VFS_UNLOCK_GIANT(vfslocked); VFS_UNLOCK_GIANT(dvfslocked); ndp->ni_cnd.cn_flags &= ~GIANTHELD; ndp->ni_vp = NULL; return (error); } /* * relookup - lookup a path name component * Used by lookup to re-aquire things. */ int relookup(dvp, vpp, cnp) struct vnode *dvp, **vpp; struct componentname *cnp; { struct thread *td = cnp->cn_thread; struct vnode *dp = 0; /* the directory we are searching */ int wantparent; /* 1 => wantparent or lockparent flag */ int rdonly; /* lookup read-only flag bit */ int error = 0; KASSERT(cnp->cn_flags & ISLASTCN, ("relookup: Not given last component.")); /* * Setup: break out flag bits into variables. */ wantparent = cnp->cn_flags & (LOCKPARENT|WANTPARENT); KASSERT(wantparent, ("relookup: parent not wanted.")); rdonly = cnp->cn_flags & RDONLY; cnp->cn_flags &= ~ISSYMLINK; dp = dvp; cnp->cn_lkflags = LK_EXCLUSIVE; vn_lock(dp, LK_EXCLUSIVE | LK_RETRY, td); /* * Search a new directory. * * The last component of the filename is left accessible via * cnp->cn_nameptr for callers that need the name. Callers needing * the name set the SAVENAME flag. When done, they assume * responsibility for freeing the pathname buffer. */ #ifdef NAMEI_DIAGNOSTIC printf("{%s}: ", cnp->cn_nameptr); #endif /* * Check for degenerate name (e.g. / or "") * which is a way of talking about a directory, * e.g. like "/." or ".". */ if (cnp->cn_nameptr[0] == '\0') { if (cnp->cn_nameiop != LOOKUP || wantparent) { error = EISDIR; goto bad; } if (dp->v_type != VDIR) { error = ENOTDIR; goto bad; } if (!(cnp->cn_flags & LOCKLEAF)) VOP_UNLOCK(dp, 0, td); *vpp = dp; /* XXX This should probably move to the top of function. */ if (cnp->cn_flags & SAVESTART) panic("lookup: SAVESTART"); return (0); } if (cnp->cn_flags & ISDOTDOT) panic ("relookup: lookup on dot-dot"); /* * We now have a segment name to search for, and a directory to search. */ #ifdef NAMEI_DIAGNOSTIC vprint("search in:", dp); #endif if ((error = VOP_LOOKUP(dp, vpp, cnp)) != 0) { KASSERT(*vpp == NULL, ("leaf should be empty")); if (error != EJUSTRETURN) goto bad; /* * If creating and at end of pathname, then can consider * allowing file to be created. */ if (rdonly) { error = EROFS; goto bad; } /* ASSERT(dvp == ndp->ni_startdir) */ if (cnp->cn_flags & SAVESTART) VREF(dvp); if ((cnp->cn_flags & LOCKPARENT) == 0) VOP_UNLOCK(dp, 0, td); /* * This is a temporary assert to make sure I know what the * behavior here was. */ KASSERT((cnp->cn_flags & (WANTPARENT|LOCKPARENT)) != 0, ("relookup: Unhandled case.")); /* * We return with ni_vp NULL to indicate that the entry * doesn't currently exist, leaving a pointer to the * (possibly locked) directory inode in ndp->ni_dvp. */ return (0); } dp = *vpp; /* * Disallow directory write attempts on read-only filesystems. */ if (rdonly && (cnp->cn_nameiop == DELETE || cnp->cn_nameiop == RENAME)) { if (dvp == dp) vrele(dvp); else vput(dvp); error = EROFS; goto bad; } /* * Set the parent lock/ref state to the requested state. */ if ((cnp->cn_flags & LOCKPARENT) == 0 && dvp != dp) { if (wantparent) VOP_UNLOCK(dvp, 0, td); else vput(dvp); } else if (!wantparent) vrele(dvp); /* * Check for symbolic link */ KASSERT(dp->v_type != VLNK || !(cnp->cn_flags & FOLLOW), ("relookup: symlink found.\n")); /* ASSERT(dvp == ndp->ni_startdir) */ if (cnp->cn_flags & SAVESTART) VREF(dvp); if ((cnp->cn_flags & LOCKLEAF) == 0) VOP_UNLOCK(dp, 0, td); return (0); bad: vput(dp); *vpp = NULL; return (error); } /* * Free data allocated by namei(); see namei(9) for details. */ void NDFREE(ndp, flags) struct nameidata *ndp; const u_int flags; { int unlock_dvp; int unlock_vp; unlock_dvp = 0; unlock_vp = 0; if (!(flags & NDF_NO_FREE_PNBUF) && (ndp->ni_cnd.cn_flags & HASBUF)) { uma_zfree(namei_zone, ndp->ni_cnd.cn_pnbuf); ndp->ni_cnd.cn_flags &= ~HASBUF; } if (!(flags & NDF_NO_VP_UNLOCK) && (ndp->ni_cnd.cn_flags & LOCKLEAF) && ndp->ni_vp) unlock_vp = 1; if (!(flags & NDF_NO_VP_RELE) && ndp->ni_vp) { if (unlock_vp) { vput(ndp->ni_vp); unlock_vp = 0; } else vrele(ndp->ni_vp); ndp->ni_vp = NULL; } if (unlock_vp) VOP_UNLOCK(ndp->ni_vp, 0, ndp->ni_cnd.cn_thread); if (!(flags & NDF_NO_DVP_UNLOCK) && (ndp->ni_cnd.cn_flags & LOCKPARENT) && ndp->ni_dvp != ndp->ni_vp) unlock_dvp = 1; if (!(flags & NDF_NO_DVP_RELE) && (ndp->ni_cnd.cn_flags & (LOCKPARENT|WANTPARENT))) { if (unlock_dvp) { vput(ndp->ni_dvp); unlock_dvp = 0; } else vrele(ndp->ni_dvp); ndp->ni_dvp = NULL; } if (unlock_dvp) VOP_UNLOCK(ndp->ni_dvp, 0, ndp->ni_cnd.cn_thread); if (!(flags & NDF_NO_STARTDIR_RELE) && (ndp->ni_cnd.cn_flags & SAVESTART)) { vrele(ndp->ni_startdir); ndp->ni_startdir = NULL; } } /* * Determine if there is a suitable alternate filename under the specified * prefix for the specified path. If the create flag is set, then the * alternate prefix will be used so long as the parent directory exists. * This is used by the various compatiblity ABIs so that Linux binaries prefer * files under /compat/linux for example. The chosen path (whether under * the prefix or under /) is returned in a kernel malloc'd buffer pointed * to by pathbuf. The caller is responsible for free'ing the buffer from * the M_TEMP bucket if one is returned. */ int kern_alternate_path(struct thread *td, const char *prefix, char *path, enum uio_seg pathseg, char **pathbuf, int create) { struct nameidata nd, ndroot; char *ptr, *buf, *cp; size_t len, sz; int error; buf = (char *) malloc(MAXPATHLEN, M_TEMP, M_WAITOK); *pathbuf = buf; /* Copy the prefix into the new pathname as a starting point. */ len = strlcpy(buf, prefix, MAXPATHLEN); if (len >= MAXPATHLEN) { *pathbuf = NULL; free(buf, M_TEMP); return (EINVAL); } sz = MAXPATHLEN - len; ptr = buf + len; /* Append the filename to the prefix. */ if (pathseg == UIO_SYSSPACE) error = copystr(path, ptr, sz, &len); else error = copyinstr(path, ptr, sz, &len); if (error) { *pathbuf = NULL; free(buf, M_TEMP); return (error); } /* Only use a prefix with absolute pathnames. */ if (*ptr != '/') { error = EINVAL; goto keeporig; } /* * We know that there is a / somewhere in this pathname. * Search backwards for it, to find the file's parent dir * to see if it exists in the alternate tree. If it does, * and we want to create a file (cflag is set). We don't * need to worry about the root comparison in this case. */ if (create) { for (cp = &ptr[len] - 1; *cp != '/'; cp--); *cp = '\0'; NDINIT(&nd, LOOKUP, FOLLOW | MPSAFE, UIO_SYSSPACE, buf, td); error = namei(&nd); *cp = '/'; if (error != 0) goto keeporig; } else { NDINIT(&nd, LOOKUP, FOLLOW | MPSAFE, UIO_SYSSPACE, buf, td); error = namei(&nd); if (error != 0) goto keeporig; /* * We now compare the vnode of the prefix to the one * vnode asked. If they resolve to be the same, then we * ignore the match so that the real root gets used. * This avoids the problem of traversing "../.." to find the * root directory and never finding it, because "/" resolves * to the emulation root directory. This is expensive :-( */ NDINIT(&ndroot, LOOKUP, FOLLOW | MPSAFE, UIO_SYSSPACE, prefix, td); /* We shouldn't ever get an error from this namei(). */ error = namei(&ndroot); if (error == 0) { if (nd.ni_vp == ndroot.ni_vp) error = ENOENT; NDFREE(&ndroot, NDF_ONLY_PNBUF); vrele(ndroot.ni_vp); VFS_UNLOCK_GIANT(NDHASGIANT(&ndroot)); } } NDFREE(&nd, NDF_ONLY_PNBUF); vrele(nd.ni_vp); VFS_UNLOCK_GIANT(NDHASGIANT(&nd)); keeporig: /* If there was an error, use the original path name. */ if (error) bcopy(ptr, buf, len); return (error); } Index: head/sys/kern/vfs_syscalls.c =================================================================== --- head/sys/kern/vfs_syscalls.c (revision 155384) +++ head/sys/kern/vfs_syscalls.c (revision 155385) @@ -1,4965 +1,4967 @@ /*- * Copyright (c) 1989, 1993 * The Regents of the University of California. All rights reserved. * (c) UNIX System Laboratories, Inc. * All or some portions of this file are derived from material licensed * to the University of California by American Telephone and Telegraph * Co. or Unix System Laboratories, Inc. and are reproduced herein with * the permission of UNIX System Laboratories, Inc. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 4. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * @(#)vfs_syscalls.c 8.13 (Berkeley) 4/15/94 */ #include __FBSDID("$FreeBSD$"); #include "opt_compat.h" #include "opt_mac.h" #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include static int chroot_refuse_vdir_fds(struct filedesc *fdp); static int getutimes(const struct timeval *, enum uio_seg, struct timespec *); static int setfown(struct thread *td, struct vnode *, uid_t, gid_t); static int setfmode(struct thread *td, struct vnode *, int); static int setfflags(struct thread *td, struct vnode *, int); static int setutimes(struct thread *td, struct vnode *, const struct timespec *, int, int); static int vn_access(struct vnode *vp, int user_flags, struct ucred *cred, struct thread *td); static int extattr_list_vp(struct vnode *vp, int attrnamespace, void *data, size_t nbytes, struct thread *td); int (*union_dircheckp)(struct thread *td, struct vnode **, struct file *); /* * The module initialization routine for POSIX asynchronous I/O will * set this to the version of AIO that it implements. (Zero means * that it is not implemented.) This value is used here by pathconf() * and in kern_descrip.c by fpathconf(). */ int async_io_version; /* * Sync each mounted filesystem. */ #ifndef _SYS_SYSPROTO_H_ struct sync_args { int dummy; }; #endif #ifdef DEBUG static int syncprt = 0; SYSCTL_INT(_debug, OID_AUTO, syncprt, CTLFLAG_RW, &syncprt, 0, ""); #endif /* ARGSUSED */ int sync(td, uap) struct thread *td; struct sync_args *uap; { struct mount *mp, *nmp; int asyncflag; mtx_lock(&Giant); mtx_lock(&mountlist_mtx); for (mp = TAILQ_FIRST(&mountlist); mp != NULL; mp = nmp) { if (vfs_busy(mp, LK_NOWAIT, &mountlist_mtx, td)) { nmp = TAILQ_NEXT(mp, mnt_list); continue; } if ((mp->mnt_flag & MNT_RDONLY) == 0 && vn_start_write(NULL, &mp, V_NOWAIT) == 0) { asyncflag = mp->mnt_flag & MNT_ASYNC; mp->mnt_flag &= ~MNT_ASYNC; vfs_msync(mp, MNT_NOWAIT); VFS_SYNC(mp, MNT_NOWAIT, td); mp->mnt_flag |= asyncflag; vn_finished_write(mp); } mtx_lock(&mountlist_mtx); nmp = TAILQ_NEXT(mp, mnt_list); vfs_unbusy(mp, td); } mtx_unlock(&mountlist_mtx); #if 0 /* * XXX don't call vfs_bufstats() yet because that routine * was not imported in the Lite2 merge. */ #ifdef DIAGNOSTIC if (syncprt) vfs_bufstats(); #endif /* DIAGNOSTIC */ #endif mtx_unlock(&Giant); return (0); } /* XXX PRISON: could be per prison flag */ static int prison_quotas; #if 0 SYSCTL_INT(_kern_prison, OID_AUTO, quotas, CTLFLAG_RW, &prison_quotas, 0, ""); #endif /* * Change filesystem quotas. * * MP SAFE */ #ifndef _SYS_SYSPROTO_H_ struct quotactl_args { char *path; int cmd; int uid; caddr_t arg; }; #endif int quotactl(td, uap) struct thread *td; register struct quotactl_args /* { char *path; int cmd; int uid; caddr_t arg; } */ *uap; { struct mount *mp, *vmp; int error; struct nameidata nd; if (jailed(td->td_ucred) && !prison_quotas) return (EPERM); mtx_lock(&Giant); NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, uap->path, td); if ((error = namei(&nd)) != 0) { mtx_unlock(&Giant); return (error); } NDFREE(&nd, NDF_ONLY_PNBUF); error = vn_start_write(nd.ni_vp, &vmp, V_WAIT | PCATCH); mp = nd.ni_vp->v_mount; vrele(nd.ni_vp); if (error) { mtx_unlock(&Giant); return (error); } error = VFS_QUOTACTL(mp, uap->cmd, uap->uid, uap->arg, td); vn_finished_write(vmp); mtx_unlock(&Giant); return (error); } /* * Get filesystem statistics. */ #ifndef _SYS_SYSPROTO_H_ struct statfs_args { char *path; struct statfs *buf; }; #endif int statfs(td, uap) struct thread *td; register struct statfs_args /* { char *path; struct statfs *buf; } */ *uap; { struct statfs sf; int error; error = kern_statfs(td, uap->path, UIO_USERSPACE, &sf); if (error == 0) error = copyout(&sf, uap->buf, sizeof(sf)); return (error); } int kern_statfs(struct thread *td, char *path, enum uio_seg pathseg, struct statfs *buf) { struct mount *mp; struct statfs *sp, sb; int error; struct nameidata nd; mtx_lock(&Giant); NDINIT(&nd, LOOKUP, FOLLOW, pathseg, path, td); error = namei(&nd); if (error) { mtx_unlock(&Giant); return (error); } mp = nd.ni_vp->v_mount; sp = &mp->mnt_stat; NDFREE(&nd, NDF_ONLY_PNBUF); vrele(nd.ni_vp); #ifdef MAC error = mac_check_mount_stat(td->td_ucred, mp); if (error) { mtx_unlock(&Giant); return (error); } #endif /* * Set these in case the underlying filesystem fails to do so. */ sp->f_version = STATFS_VERSION; sp->f_namemax = NAME_MAX; sp->f_flags = mp->mnt_flag & MNT_VISFLAGMASK; error = VFS_STATFS(mp, sp, td); if (error) { mtx_unlock(&Giant); return (error); } if (suser(td)) { bcopy(sp, &sb, sizeof(sb)); sb.f_fsid.val[0] = sb.f_fsid.val[1] = 0; prison_enforce_statfs(td->td_ucred, mp, &sb); sp = &sb; } mtx_unlock(&Giant); *buf = *sp; return (0); } /* * Get filesystem statistics. */ #ifndef _SYS_SYSPROTO_H_ struct fstatfs_args { int fd; struct statfs *buf; }; #endif int fstatfs(td, uap) struct thread *td; register struct fstatfs_args /* { int fd; struct statfs *buf; } */ *uap; { struct statfs sf; int error; error = kern_fstatfs(td, uap->fd, &sf); if (error == 0) error = copyout(&sf, uap->buf, sizeof(sf)); return (error); } int kern_fstatfs(struct thread *td, int fd, struct statfs *buf) { struct file *fp; struct mount *mp; struct statfs *sp, sb; + struct vnode *vp; int error; error = getvnode(td->td_proc->p_fd, fd, &fp); if (error) return (error); - mp = fp->f_vnode->v_mount; + vp = fp->f_vnode; + mp = vp->v_mount; fdrop(fp, td); - if (mp == NULL) + if (vp->v_iflag & VI_DOOMED) return (EBADF); mtx_lock(&Giant); #ifdef MAC error = mac_check_mount_stat(td->td_ucred, mp); if (error) { mtx_unlock(&Giant); return (error); } #endif sp = &mp->mnt_stat; /* * Set these in case the underlying filesystem fails to do so. */ sp->f_version = STATFS_VERSION; sp->f_namemax = NAME_MAX; sp->f_flags = mp->mnt_flag & MNT_VISFLAGMASK; error = VFS_STATFS(mp, sp, td); if (error) { mtx_unlock(&Giant); return (error); } if (suser(td)) { bcopy(sp, &sb, sizeof(sb)); sb.f_fsid.val[0] = sb.f_fsid.val[1] = 0; prison_enforce_statfs(td->td_ucred, mp, &sb); sp = &sb; } mtx_unlock(&Giant); *buf = *sp; return (0); } /* * Get statistics on all filesystems. */ #ifndef _SYS_SYSPROTO_H_ struct getfsstat_args { struct statfs *buf; long bufsize; int flags; }; #endif int getfsstat(td, uap) struct thread *td; register struct getfsstat_args /* { struct statfs *buf; long bufsize; int flags; } */ *uap; { return (kern_getfsstat(td, &uap->buf, uap->bufsize, UIO_USERSPACE, uap->flags)); } /* * If (bufsize > 0 && bufseg == UIO_SYSSPACE) * The caller is responsible for freeing memory which will be allocated * in '*buf'. */ int kern_getfsstat(struct thread *td, struct statfs **buf, size_t bufsize, enum uio_seg bufseg, int flags) { struct mount *mp, *nmp; struct statfs *sfsp, *sp, sb; size_t count, maxcount; int error; maxcount = bufsize / sizeof(struct statfs); if (bufsize == 0) sfsp = NULL; else if (bufseg == UIO_USERSPACE) sfsp = *buf; else /* if (bufseg == UIO_SYSSPACE) */ { count = 0; mtx_lock(&mountlist_mtx); TAILQ_FOREACH(mp, &mountlist, mnt_list) { count++; } mtx_unlock(&mountlist_mtx); if (maxcount > count) maxcount = count; sfsp = *buf = malloc(maxcount * sizeof(struct statfs), M_TEMP, M_WAITOK); } count = 0; mtx_lock(&Giant); mtx_lock(&mountlist_mtx); for (mp = TAILQ_FIRST(&mountlist); mp != NULL; mp = nmp) { if (prison_canseemount(td->td_ucred, mp) != 0) { nmp = TAILQ_NEXT(mp, mnt_list); continue; } #ifdef MAC if (mac_check_mount_stat(td->td_ucred, mp) != 0) { nmp = TAILQ_NEXT(mp, mnt_list); continue; } #endif if (vfs_busy(mp, LK_NOWAIT, &mountlist_mtx, td)) { nmp = TAILQ_NEXT(mp, mnt_list); continue; } if (sfsp && count < maxcount) { sp = &mp->mnt_stat; /* * Set these in case the underlying filesystem * fails to do so. */ sp->f_version = STATFS_VERSION; sp->f_namemax = NAME_MAX; sp->f_flags = mp->mnt_flag & MNT_VISFLAGMASK; /* * If MNT_NOWAIT or MNT_LAZY is specified, do not * refresh the fsstat cache. MNT_NOWAIT or MNT_LAZY * overrides MNT_WAIT. */ if (((flags & (MNT_LAZY|MNT_NOWAIT)) == 0 || (flags & MNT_WAIT)) && (error = VFS_STATFS(mp, sp, td))) { mtx_lock(&mountlist_mtx); nmp = TAILQ_NEXT(mp, mnt_list); vfs_unbusy(mp, td); continue; } if (suser(td)) { bcopy(sp, &sb, sizeof(sb)); sb.f_fsid.val[0] = sb.f_fsid.val[1] = 0; prison_enforce_statfs(td->td_ucred, mp, &sb); sp = &sb; } if (bufseg == UIO_SYSSPACE) bcopy(sp, sfsp, sizeof(*sp)); else /* if (bufseg == UIO_USERSPACE) */ { error = copyout(sp, sfsp, sizeof(*sp)); if (error) { vfs_unbusy(mp, td); mtx_unlock(&Giant); return (error); } } sfsp++; } count++; mtx_lock(&mountlist_mtx); nmp = TAILQ_NEXT(mp, mnt_list); vfs_unbusy(mp, td); } mtx_unlock(&mountlist_mtx); mtx_unlock(&Giant); if (sfsp && count > maxcount) td->td_retval[0] = maxcount; else td->td_retval[0] = count; return (0); } #ifdef COMPAT_FREEBSD4 /* * Get old format filesystem statistics. */ static void cvtstatfs(struct statfs *, struct ostatfs *); #ifndef _SYS_SYSPROTO_H_ struct freebsd4_statfs_args { char *path; struct ostatfs *buf; }; #endif int freebsd4_statfs(td, uap) struct thread *td; struct freebsd4_statfs_args /* { char *path; struct ostatfs *buf; } */ *uap; { struct ostatfs osb; struct statfs sf; int error; error = kern_statfs(td, uap->path, UIO_USERSPACE, &sf); if (error) return (error); cvtstatfs(&sf, &osb); return (copyout(&osb, uap->buf, sizeof(osb))); } /* * Get filesystem statistics. */ #ifndef _SYS_SYSPROTO_H_ struct freebsd4_fstatfs_args { int fd; struct ostatfs *buf; }; #endif int freebsd4_fstatfs(td, uap) struct thread *td; struct freebsd4_fstatfs_args /* { int fd; struct ostatfs *buf; } */ *uap; { struct ostatfs osb; struct statfs sf; int error; error = kern_fstatfs(td, uap->fd, &sf); if (error) return (error); cvtstatfs(&sf, &osb); return (copyout(&osb, uap->buf, sizeof(osb))); } /* * Get statistics on all filesystems. */ #ifndef _SYS_SYSPROTO_H_ struct freebsd4_getfsstat_args { struct ostatfs *buf; long bufsize; int flags; }; #endif int freebsd4_getfsstat(td, uap) struct thread *td; register struct freebsd4_getfsstat_args /* { struct ostatfs *buf; long bufsize; int flags; } */ *uap; { struct statfs *buf, *sp; struct ostatfs osb; size_t count, size; int error; count = uap->bufsize / sizeof(struct ostatfs); size = count * sizeof(struct statfs); error = kern_getfsstat(td, &buf, size, UIO_SYSSPACE, uap->flags); if (size > 0) { count = td->td_retval[0]; sp = buf; while (count > 0 && error == 0) { cvtstatfs(sp, &osb); error = copyout(&osb, uap->buf, sizeof(osb)); sp++; uap->buf++; count--; } free(buf, M_TEMP); } return (error); } /* * Implement fstatfs() for (NFS) file handles. */ #ifndef _SYS_SYSPROTO_H_ struct freebsd4_fhstatfs_args { struct fhandle *u_fhp; struct ostatfs *buf; }; #endif int freebsd4_fhstatfs(td, uap) struct thread *td; struct freebsd4_fhstatfs_args /* { struct fhandle *u_fhp; struct ostatfs *buf; } */ *uap; { struct ostatfs osb; struct statfs sf; fhandle_t fh; int error; error = copyin(uap->u_fhp, &fh, sizeof(fhandle_t)); if (error) return (error); error = kern_fhstatfs(td, fh, &sf); if (error) return (error); cvtstatfs(&sf, &osb); return (copyout(&osb, uap->buf, sizeof(osb))); } /* * Convert a new format statfs structure to an old format statfs structure. */ static void cvtstatfs(nsp, osp) struct statfs *nsp; struct ostatfs *osp; { bzero(osp, sizeof(*osp)); osp->f_bsize = MIN(nsp->f_bsize, LONG_MAX); osp->f_iosize = MIN(nsp->f_iosize, LONG_MAX); osp->f_blocks = MIN(nsp->f_blocks, LONG_MAX); osp->f_bfree = MIN(nsp->f_bfree, LONG_MAX); osp->f_bavail = MIN(nsp->f_bavail, LONG_MAX); osp->f_files = MIN(nsp->f_files, LONG_MAX); osp->f_ffree = MIN(nsp->f_ffree, LONG_MAX); osp->f_owner = nsp->f_owner; osp->f_type = nsp->f_type; osp->f_flags = nsp->f_flags; osp->f_syncwrites = MIN(nsp->f_syncwrites, LONG_MAX); osp->f_asyncwrites = MIN(nsp->f_asyncwrites, LONG_MAX); osp->f_syncreads = MIN(nsp->f_syncreads, LONG_MAX); osp->f_asyncreads = MIN(nsp->f_asyncreads, LONG_MAX); bcopy(nsp->f_fstypename, osp->f_fstypename, MIN(MFSNAMELEN, OMNAMELEN)); bcopy(nsp->f_mntonname, osp->f_mntonname, MIN(MFSNAMELEN, OMNAMELEN)); bcopy(nsp->f_mntfromname, osp->f_mntfromname, MIN(MFSNAMELEN, OMNAMELEN)); osp->f_fsid = nsp->f_fsid; } #endif /* COMPAT_FREEBSD4 */ /* * Change current working directory to a given file descriptor. */ #ifndef _SYS_SYSPROTO_H_ struct fchdir_args { int fd; }; #endif int fchdir(td, uap) struct thread *td; struct fchdir_args /* { int fd; } */ *uap; { register struct filedesc *fdp = td->td_proc->p_fd; struct vnode *vp, *tdp, *vpold; struct mount *mp; struct file *fp; int vfslocked; int error; if ((error = getvnode(fdp, uap->fd, &fp)) != 0) return (error); vp = fp->f_vnode; VREF(vp); fdrop(fp, td); vfslocked = VFS_LOCK_GIANT(vp->v_mount); vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td); if (vp->v_type != VDIR) error = ENOTDIR; #ifdef MAC else if ((error = mac_check_vnode_chdir(td->td_ucred, vp)) != 0) { } #endif else error = VOP_ACCESS(vp, VEXEC, td->td_ucred, td); while (!error && (mp = vp->v_mountedhere) != NULL) { int tvfslocked; if (vfs_busy(mp, 0, 0, td)) continue; tvfslocked = VFS_LOCK_GIANT(mp); error = VFS_ROOT(mp, LK_EXCLUSIVE, &tdp, td); vfs_unbusy(mp, td); if (error) { VFS_UNLOCK_GIANT(tvfslocked); break; } vput(vp); VFS_UNLOCK_GIANT(vfslocked); vp = tdp; vfslocked = tvfslocked; } if (error) { vput(vp); VFS_UNLOCK_GIANT(vfslocked); return (error); } VOP_UNLOCK(vp, 0, td); VFS_UNLOCK_GIANT(vfslocked); FILEDESC_LOCK_FAST(fdp); vpold = fdp->fd_cdir; fdp->fd_cdir = vp; FILEDESC_UNLOCK_FAST(fdp); vfslocked = VFS_LOCK_GIANT(vpold->v_mount); vrele(vpold); VFS_UNLOCK_GIANT(vfslocked); return (0); } /* * Change current working directory (``.''). */ #ifndef _SYS_SYSPROTO_H_ struct chdir_args { char *path; }; #endif int chdir(td, uap) struct thread *td; struct chdir_args /* { char *path; } */ *uap; { return (kern_chdir(td, uap->path, UIO_USERSPACE)); } int kern_chdir(struct thread *td, char *path, enum uio_seg pathseg) { register struct filedesc *fdp = td->td_proc->p_fd; int error; struct nameidata nd; struct vnode *vp; int vfslocked; NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | MPSAFE, pathseg, path, td); if ((error = namei(&nd)) != 0) return (error); vfslocked = NDHASGIANT(&nd); if ((error = change_dir(nd.ni_vp, td)) != 0) { vput(nd.ni_vp); VFS_UNLOCK_GIANT(vfslocked); NDFREE(&nd, NDF_ONLY_PNBUF); return (error); } VOP_UNLOCK(nd.ni_vp, 0, td); VFS_UNLOCK_GIANT(vfslocked); NDFREE(&nd, NDF_ONLY_PNBUF); FILEDESC_LOCK_FAST(fdp); vp = fdp->fd_cdir; fdp->fd_cdir = nd.ni_vp; FILEDESC_UNLOCK_FAST(fdp); vfslocked = VFS_LOCK_GIANT(vp->v_mount); vrele(vp); VFS_UNLOCK_GIANT(vfslocked); return (0); } /* * Helper function for raised chroot(2) security function: Refuse if * any filedescriptors are open directories. */ static int chroot_refuse_vdir_fds(fdp) struct filedesc *fdp; { struct vnode *vp; struct file *fp; int fd; FILEDESC_LOCK_ASSERT(fdp, MA_OWNED); for (fd = 0; fd < fdp->fd_nfiles ; fd++) { fp = fget_locked(fdp, fd); if (fp == NULL) continue; if (fp->f_type == DTYPE_VNODE) { vp = fp->f_vnode; if (vp->v_type == VDIR) return (EPERM); } } return (0); } /* * This sysctl determines if we will allow a process to chroot(2) if it * has a directory open: * 0: disallowed for all processes. * 1: allowed for processes that were not already chroot(2)'ed. * 2: allowed for all processes. */ static int chroot_allow_open_directories = 1; SYSCTL_INT(_kern, OID_AUTO, chroot_allow_open_directories, CTLFLAG_RW, &chroot_allow_open_directories, 0, ""); /* * Change notion of root (``/'') directory. */ #ifndef _SYS_SYSPROTO_H_ struct chroot_args { char *path; }; #endif int chroot(td, uap) struct thread *td; struct chroot_args /* { char *path; } */ *uap; { int error; struct nameidata nd; int vfslocked; error = suser_cred(td->td_ucred, SUSER_ALLOWJAIL); if (error) return (error); NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | MPSAFE, UIO_USERSPACE, uap->path, td); error = namei(&nd); if (error) goto error; vfslocked = NDHASGIANT(&nd); if ((error = change_dir(nd.ni_vp, td)) != 0) goto e_vunlock; #ifdef MAC if ((error = mac_check_vnode_chroot(td->td_ucred, nd.ni_vp))) goto e_vunlock; #endif VOP_UNLOCK(nd.ni_vp, 0, td); error = change_root(nd.ni_vp, td); vrele(nd.ni_vp); VFS_UNLOCK_GIANT(vfslocked); NDFREE(&nd, NDF_ONLY_PNBUF); return (error); e_vunlock: vput(nd.ni_vp); VFS_UNLOCK_GIANT(vfslocked); error: NDFREE(&nd, NDF_ONLY_PNBUF); return (error); } /* * Common routine for chroot and chdir. Callers must provide a locked vnode * instance. */ int change_dir(vp, td) struct vnode *vp; struct thread *td; { int error; ASSERT_VOP_LOCKED(vp, "change_dir(): vp not locked"); if (vp->v_type != VDIR) return (ENOTDIR); #ifdef MAC error = mac_check_vnode_chdir(td->td_ucred, vp); if (error) return (error); #endif error = VOP_ACCESS(vp, VEXEC, td->td_ucred, td); return (error); } /* * Common routine for kern_chroot() and jail_attach(). The caller is * responsible for invoking suser() and mac_check_chroot() to authorize this * operation. */ int change_root(vp, td) struct vnode *vp; struct thread *td; { struct filedesc *fdp; struct vnode *oldvp; int vfslocked; int error; VFS_ASSERT_GIANT(vp->v_mount); fdp = td->td_proc->p_fd; FILEDESC_LOCK(fdp); if (chroot_allow_open_directories == 0 || (chroot_allow_open_directories == 1 && fdp->fd_rdir != rootvnode)) { error = chroot_refuse_vdir_fds(fdp); if (error) { FILEDESC_UNLOCK(fdp); return (error); } } oldvp = fdp->fd_rdir; fdp->fd_rdir = vp; VREF(fdp->fd_rdir); if (!fdp->fd_jdir) { fdp->fd_jdir = vp; VREF(fdp->fd_jdir); } FILEDESC_UNLOCK(fdp); vfslocked = VFS_LOCK_GIANT(oldvp->v_mount); vrele(oldvp); VFS_UNLOCK_GIANT(vfslocked); return (0); } /* * Check permissions, allocate an open file structure, * and call the device open routine if any. * * MP SAFE */ #ifndef _SYS_SYSPROTO_H_ struct open_args { char *path; int flags; int mode; }; #endif int open(td, uap) struct thread *td; register struct open_args /* { char *path; int flags; int mode; } */ *uap; { int error; error = kern_open(td, uap->path, UIO_USERSPACE, uap->flags, uap->mode); if (mtx_owned(&Giant)) printf("open: %s: %d\n", uap->path, error); return (error); } int kern_open(struct thread *td, char *path, enum uio_seg pathseg, int flags, int mode) { struct proc *p = td->td_proc; struct filedesc *fdp = p->p_fd; struct file *fp; struct vnode *vp; struct vattr vat; struct mount *mp; int cmode; struct file *nfp; int type, indx, error; struct flock lf; struct nameidata nd; int vfslocked; if ((flags & O_ACCMODE) == O_ACCMODE) return (EINVAL); flags = FFLAGS(flags); error = falloc(td, &nfp, &indx); if (error) return (error); /* An extra reference on `nfp' has been held for us by falloc(). */ fp = nfp; cmode = ((mode &~ fdp->fd_cmask) & ALLPERMS) &~ S_ISTXT; NDINIT(&nd, LOOKUP, FOLLOW, pathseg, path, td); td->td_dupfd = -1; /* XXX check for fdopen */ error = vn_open(&nd, &flags, cmode, indx); if (error) { /* * If the vn_open replaced the method vector, something * wonderous happened deep below and we just pass it up * pretending we know what we do. */ if (error == ENXIO && fp->f_ops != &badfileops) { fdrop(fp, td); td->td_retval[0] = indx; return (0); } /* * release our own reference */ fdrop(fp, td); /* * handle special fdopen() case. bleh. dupfdopen() is * responsible for dropping the old contents of ofiles[indx] * if it succeeds. */ if ((error == ENODEV || error == ENXIO) && td->td_dupfd >= 0 && /* XXX from fdopen */ (error = dupfdopen(td, fdp, indx, td->td_dupfd, flags, error)) == 0) { td->td_retval[0] = indx; return (0); } /* * Clean up the descriptor, but only if another thread hadn't * replaced or closed it. */ fdclose(fdp, fp, indx, td); if (error == ERESTART) error = EINTR; return (error); } td->td_dupfd = 0; vfslocked = NDHASGIANT(&nd); NDFREE(&nd, NDF_ONLY_PNBUF); vp = nd.ni_vp; /* * There should be 2 references on the file, one from the descriptor * table, and one for us. * * Handle the case where someone closed the file (via its file * descriptor) while we were blocked. The end result should look * like opening the file succeeded but it was immediately closed. * We call vn_close() manually because we haven't yet hooked up * the various 'struct file' fields. */ FILEDESC_LOCK(fdp); FILE_LOCK(fp); if (fp->f_count == 1) { mp = vp->v_mount; KASSERT(fdp->fd_ofiles[indx] != fp, ("Open file descriptor lost all refs")); FILE_UNLOCK(fp); FILEDESC_UNLOCK(fdp); VOP_UNLOCK(vp, 0, td); vn_close(vp, flags & FMASK, fp->f_cred, td); VFS_UNLOCK_GIANT(vfslocked); fdrop(fp, td); td->td_retval[0] = indx; return (0); } fp->f_vnode = vp; if (fp->f_data == NULL) fp->f_data = vp; fp->f_flag = flags & FMASK; if (fp->f_ops == &badfileops) fp->f_ops = &vnops; fp->f_seqcount = 1; fp->f_type = (vp->v_type == VFIFO ? DTYPE_FIFO : DTYPE_VNODE); FILE_UNLOCK(fp); FILEDESC_UNLOCK(fdp); VOP_UNLOCK(vp, 0, td); if (flags & (O_EXLOCK | O_SHLOCK)) { lf.l_whence = SEEK_SET; lf.l_start = 0; lf.l_len = 0; if (flags & O_EXLOCK) lf.l_type = F_WRLCK; else lf.l_type = F_RDLCK; type = F_FLOCK; if ((flags & FNONBLOCK) == 0) type |= F_WAIT; if ((error = VOP_ADVLOCK(vp, (caddr_t)fp, F_SETLK, &lf, type)) != 0) goto bad; fp->f_flag |= FHASLOCK; } if (flags & O_TRUNC) { if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0) goto bad; VOP_LEASE(vp, td, td->td_ucred, LEASE_WRITE); VATTR_NULL(&vat); vat.va_size = 0; vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td); #ifdef MAC error = mac_check_vnode_write(td->td_ucred, fp->f_cred, vp); if (error == 0) #endif error = VOP_SETATTR(vp, &vat, td->td_ucred, td); VOP_UNLOCK(vp, 0, td); vn_finished_write(mp); if (error) goto bad; } VFS_UNLOCK_GIANT(vfslocked); /* * Release our private reference, leaving the one associated with * the descriptor table intact. */ fdrop(fp, td); td->td_retval[0] = indx; return (0); bad: VFS_UNLOCK_GIANT(vfslocked); fdclose(fdp, fp, indx, td); fdrop(fp, td); return (error); } #ifdef COMPAT_43 /* * Create a file. * * MP SAFE */ #ifndef _SYS_SYSPROTO_H_ struct ocreat_args { char *path; int mode; }; #endif int ocreat(td, uap) struct thread *td; register struct ocreat_args /* { char *path; int mode; } */ *uap; { return (kern_open(td, uap->path, UIO_USERSPACE, O_WRONLY | O_CREAT | O_TRUNC, uap->mode)); } #endif /* COMPAT_43 */ /* * Create a special file. */ #ifndef _SYS_SYSPROTO_H_ struct mknod_args { char *path; int mode; int dev; }; #endif int mknod(td, uap) struct thread *td; register struct mknod_args /* { char *path; int mode; int dev; } */ *uap; { return (kern_mknod(td, uap->path, UIO_USERSPACE, uap->mode, uap->dev)); } int kern_mknod(struct thread *td, char *path, enum uio_seg pathseg, int mode, int dev) { struct vnode *vp; struct mount *mp; struct vattr vattr; int error; int whiteout = 0; struct nameidata nd; int vfslocked; switch (mode & S_IFMT) { case S_IFCHR: case S_IFBLK: error = suser(td); break; default: error = suser_cred(td->td_ucred, SUSER_ALLOWJAIL); break; } if (error) return (error); restart: bwillwrite(); NDINIT(&nd, CREATE, LOCKPARENT | SAVENAME | MPSAFE, pathseg, path, td); if ((error = namei(&nd)) != 0) return (error); vfslocked = NDHASGIANT(&nd); vp = nd.ni_vp; if (vp != NULL) { NDFREE(&nd, NDF_ONLY_PNBUF); if (vp == nd.ni_dvp) vrele(nd.ni_dvp); else vput(nd.ni_dvp); vrele(vp); VFS_UNLOCK_GIANT(vfslocked); return (EEXIST); } else { VATTR_NULL(&vattr); FILEDESC_LOCK_FAST(td->td_proc->p_fd); vattr.va_mode = (mode & ALLPERMS) & ~td->td_proc->p_fd->fd_cmask; FILEDESC_UNLOCK_FAST(td->td_proc->p_fd); vattr.va_rdev = dev; whiteout = 0; switch (mode & S_IFMT) { case S_IFMT: /* used by badsect to flag bad sectors */ vattr.va_type = VBAD; break; case S_IFCHR: vattr.va_type = VCHR; break; case S_IFBLK: vattr.va_type = VBLK; break; case S_IFWHT: whiteout = 1; break; default: error = EINVAL; break; } } if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) { NDFREE(&nd, NDF_ONLY_PNBUF); vput(nd.ni_dvp); VFS_UNLOCK_GIANT(vfslocked); if ((error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH)) != 0) return (error); goto restart; } #ifdef MAC if (error == 0 && !whiteout) error = mac_check_vnode_create(td->td_ucred, nd.ni_dvp, &nd.ni_cnd, &vattr); #endif if (!error) { VOP_LEASE(nd.ni_dvp, td, td->td_ucred, LEASE_WRITE); if (whiteout) error = VOP_WHITEOUT(nd.ni_dvp, &nd.ni_cnd, CREATE); else { error = VOP_MKNOD(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr); if (error == 0) vput(nd.ni_vp); } } NDFREE(&nd, NDF_ONLY_PNBUF); vput(nd.ni_dvp); vn_finished_write(mp); VFS_UNLOCK_GIANT(vfslocked); return (error); } /* * Create a named pipe. */ #ifndef _SYS_SYSPROTO_H_ struct mkfifo_args { char *path; int mode; }; #endif int mkfifo(td, uap) struct thread *td; register struct mkfifo_args /* { char *path; int mode; } */ *uap; { return (kern_mkfifo(td, uap->path, UIO_USERSPACE, uap->mode)); } int kern_mkfifo(struct thread *td, char *path, enum uio_seg pathseg, int mode) { struct mount *mp; struct vattr vattr; int error; struct nameidata nd; int vfslocked; restart: bwillwrite(); NDINIT(&nd, CREATE, LOCKPARENT | SAVENAME | MPSAFE, pathseg, path, td); if ((error = namei(&nd)) != 0) return (error); vfslocked = NDHASGIANT(&nd); if (nd.ni_vp != NULL) { NDFREE(&nd, NDF_ONLY_PNBUF); if (nd.ni_vp == nd.ni_dvp) vrele(nd.ni_dvp); else vput(nd.ni_dvp); vrele(nd.ni_vp); VFS_UNLOCK_GIANT(vfslocked); return (EEXIST); } if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) { NDFREE(&nd, NDF_ONLY_PNBUF); vput(nd.ni_dvp); VFS_UNLOCK_GIANT(vfslocked); if ((error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH)) != 0) return (error); goto restart; } VATTR_NULL(&vattr); vattr.va_type = VFIFO; FILEDESC_LOCK_FAST(td->td_proc->p_fd); vattr.va_mode = (mode & ALLPERMS) & ~td->td_proc->p_fd->fd_cmask; FILEDESC_UNLOCK_FAST(td->td_proc->p_fd); #ifdef MAC error = mac_check_vnode_create(td->td_ucred, nd.ni_dvp, &nd.ni_cnd, &vattr); if (error) goto out; #endif VOP_LEASE(nd.ni_dvp, td, td->td_ucred, LEASE_WRITE); error = VOP_MKNOD(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr); if (error == 0) vput(nd.ni_vp); #ifdef MAC out: #endif vput(nd.ni_dvp); vn_finished_write(mp); VFS_UNLOCK_GIANT(vfslocked); NDFREE(&nd, NDF_ONLY_PNBUF); return (error); } /* * Make a hard file link. */ #ifndef _SYS_SYSPROTO_H_ struct link_args { char *path; char *link; }; #endif int link(td, uap) struct thread *td; register struct link_args /* { char *path; char *link; } */ *uap; { int error; error = kern_link(td, uap->path, uap->link, UIO_USERSPACE); return (error); } SYSCTL_DECL(_security_bsd); static int hardlink_check_uid = 0; SYSCTL_INT(_security_bsd, OID_AUTO, hardlink_check_uid, CTLFLAG_RW, &hardlink_check_uid, 0, "Unprivileged processes cannot create hard links to files owned by other " "users"); static int hardlink_check_gid = 0; SYSCTL_INT(_security_bsd, OID_AUTO, hardlink_check_gid, CTLFLAG_RW, &hardlink_check_gid, 0, "Unprivileged processes cannot create hard links to files owned by other " "groups"); static int can_hardlink(struct vnode *vp, struct thread *td, struct ucred *cred) { struct vattr va; int error; if (suser_cred(cred, SUSER_ALLOWJAIL) == 0) return (0); if (!hardlink_check_uid && !hardlink_check_gid) return (0); error = VOP_GETATTR(vp, &va, cred, td); if (error != 0) return (error); if (hardlink_check_uid) { if (cred->cr_uid != va.va_uid) return (EPERM); } if (hardlink_check_gid) { if (!groupmember(va.va_gid, cred)) return (EPERM); } return (0); } int kern_link(struct thread *td, char *path, char *link, enum uio_seg segflg) { struct vnode *vp; struct mount *mp; struct nameidata nd; int vfslocked; int lvfslocked; int error; bwillwrite(); NDINIT(&nd, LOOKUP, FOLLOW | MPSAFE, segflg, path, td); if ((error = namei(&nd)) != 0) return (error); vfslocked = NDHASGIANT(&nd); NDFREE(&nd, NDF_ONLY_PNBUF); vp = nd.ni_vp; if (vp->v_type == VDIR) { vrele(vp); VFS_UNLOCK_GIANT(vfslocked); return (EPERM); /* POSIX */ } if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0) { vrele(vp); VFS_UNLOCK_GIANT(vfslocked); return (error); } NDINIT(&nd, CREATE, LOCKPARENT | SAVENAME | MPSAFE, segflg, link, td); if ((error = namei(&nd)) == 0) { lvfslocked = NDHASGIANT(&nd); if (nd.ni_vp != NULL) { if (nd.ni_dvp == nd.ni_vp) vrele(nd.ni_dvp); else vput(nd.ni_dvp); vrele(nd.ni_vp); error = EEXIST; } else if ((error = vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td)) == 0) { VOP_LEASE(nd.ni_dvp, td, td->td_ucred, LEASE_WRITE); VOP_LEASE(vp, td, td->td_ucred, LEASE_WRITE); error = can_hardlink(vp, td, td->td_ucred); if (error == 0) #ifdef MAC error = mac_check_vnode_link(td->td_ucred, nd.ni_dvp, vp, &nd.ni_cnd); if (error == 0) #endif error = VOP_LINK(nd.ni_dvp, vp, &nd.ni_cnd); VOP_UNLOCK(vp, 0, td); vput(nd.ni_dvp); } NDFREE(&nd, NDF_ONLY_PNBUF); VFS_UNLOCK_GIANT(lvfslocked); } vrele(vp); vn_finished_write(mp); VFS_UNLOCK_GIANT(vfslocked); return (error); } /* * Make a symbolic link. */ #ifndef _SYS_SYSPROTO_H_ struct symlink_args { char *path; char *link; }; #endif int symlink(td, uap) struct thread *td; register struct symlink_args /* { char *path; char *link; } */ *uap; { return (kern_symlink(td, uap->path, uap->link, UIO_USERSPACE)); } int kern_symlink(struct thread *td, char *path, char *link, enum uio_seg segflg) { struct mount *mp; struct vattr vattr; char *syspath; int error; struct nameidata nd; int vfslocked; if (segflg == UIO_SYSSPACE) { syspath = path; } else { syspath = uma_zalloc(namei_zone, M_WAITOK); if ((error = copyinstr(path, syspath, MAXPATHLEN, NULL)) != 0) goto out; } restart: bwillwrite(); NDINIT(&nd, CREATE, LOCKPARENT | SAVENAME | MPSAFE, segflg, link, td); if ((error = namei(&nd)) != 0) goto out; vfslocked = NDHASGIANT(&nd); if (nd.ni_vp) { NDFREE(&nd, NDF_ONLY_PNBUF); if (nd.ni_vp == nd.ni_dvp) vrele(nd.ni_dvp); else vput(nd.ni_dvp); vrele(nd.ni_vp); VFS_UNLOCK_GIANT(vfslocked); error = EEXIST; goto out; } if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) { NDFREE(&nd, NDF_ONLY_PNBUF); vput(nd.ni_dvp); VFS_UNLOCK_GIANT(vfslocked); if ((error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH)) != 0) goto out; goto restart; } VATTR_NULL(&vattr); FILEDESC_LOCK_FAST(td->td_proc->p_fd); vattr.va_mode = ACCESSPERMS &~ td->td_proc->p_fd->fd_cmask; FILEDESC_UNLOCK_FAST(td->td_proc->p_fd); #ifdef MAC vattr.va_type = VLNK; error = mac_check_vnode_create(td->td_ucred, nd.ni_dvp, &nd.ni_cnd, &vattr); if (error) goto out2; #endif VOP_LEASE(nd.ni_dvp, td, td->td_ucred, LEASE_WRITE); error = VOP_SYMLINK(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr, syspath); if (error == 0) vput(nd.ni_vp); #ifdef MAC out2: #endif NDFREE(&nd, NDF_ONLY_PNBUF); vput(nd.ni_dvp); vn_finished_write(mp); VFS_UNLOCK_GIANT(vfslocked); out: if (segflg != UIO_SYSSPACE) uma_zfree(namei_zone, syspath); return (error); } /* * Delete a whiteout from the filesystem. */ int undelete(td, uap) struct thread *td; register struct undelete_args /* { char *path; } */ *uap; { int error; struct mount *mp; struct nameidata nd; int vfslocked; restart: bwillwrite(); NDINIT(&nd, DELETE, LOCKPARENT | DOWHITEOUT | MPSAFE, UIO_USERSPACE, uap->path, td); error = namei(&nd); if (error) return (error); vfslocked = NDHASGIANT(&nd); if (nd.ni_vp != NULLVP || !(nd.ni_cnd.cn_flags & ISWHITEOUT)) { NDFREE(&nd, NDF_ONLY_PNBUF); if (nd.ni_vp == nd.ni_dvp) vrele(nd.ni_dvp); else vput(nd.ni_dvp); if (nd.ni_vp) vrele(nd.ni_vp); VFS_UNLOCK_GIANT(vfslocked); return (EEXIST); } if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) { NDFREE(&nd, NDF_ONLY_PNBUF); vput(nd.ni_dvp); VFS_UNLOCK_GIANT(vfslocked); if ((error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH)) != 0) return (error); goto restart; } VOP_LEASE(nd.ni_dvp, td, td->td_ucred, LEASE_WRITE); error = VOP_WHITEOUT(nd.ni_dvp, &nd.ni_cnd, DELETE); NDFREE(&nd, NDF_ONLY_PNBUF); vput(nd.ni_dvp); vn_finished_write(mp); VFS_UNLOCK_GIANT(vfslocked); return (error); } /* * Delete a name from the filesystem. */ #ifndef _SYS_SYSPROTO_H_ struct unlink_args { char *path; }; #endif int unlink(td, uap) struct thread *td; struct unlink_args /* { char *path; } */ *uap; { int error; error = kern_unlink(td, uap->path, UIO_USERSPACE); return (error); } int kern_unlink(struct thread *td, char *path, enum uio_seg pathseg) { struct mount *mp; struct vnode *vp; int error; struct nameidata nd; int vfslocked; restart: bwillwrite(); NDINIT(&nd, DELETE, LOCKPARENT | LOCKLEAF | MPSAFE, pathseg, path, td); if ((error = namei(&nd)) != 0) return (error == EINVAL ? EPERM : error); vfslocked = NDHASGIANT(&nd); vp = nd.ni_vp; if (vp->v_type == VDIR) error = EPERM; /* POSIX */ else { /* * The root of a mounted filesystem cannot be deleted. * * XXX: can this only be a VDIR case? */ if (vp->v_vflag & VV_ROOT) error = EBUSY; } if (error == 0) { if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) { NDFREE(&nd, NDF_ONLY_PNBUF); vput(nd.ni_dvp); if (vp == nd.ni_dvp) vrele(vp); else vput(vp); VFS_UNLOCK_GIANT(vfslocked); if ((error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH)) != 0) return (error); goto restart; } #ifdef MAC error = mac_check_vnode_delete(td->td_ucred, nd.ni_dvp, vp, &nd.ni_cnd); if (error) goto out; #endif VOP_LEASE(nd.ni_dvp, td, td->td_ucred, LEASE_WRITE); error = VOP_REMOVE(nd.ni_dvp, vp, &nd.ni_cnd); #ifdef MAC out: #endif vn_finished_write(mp); } NDFREE(&nd, NDF_ONLY_PNBUF); vput(nd.ni_dvp); if (vp == nd.ni_dvp) vrele(vp); else vput(vp); VFS_UNLOCK_GIANT(vfslocked); return (error); } /* * Reposition read/write file offset. */ #ifndef _SYS_SYSPROTO_H_ struct lseek_args { int fd; int pad; off_t offset; int whence; }; #endif int lseek(td, uap) struct thread *td; register struct lseek_args /* { int fd; int pad; off_t offset; int whence; } */ *uap; { struct ucred *cred = td->td_ucred; struct file *fp; struct vnode *vp; struct vattr vattr; off_t offset; int error, noneg; int vfslocked; if ((error = fget(td, uap->fd, &fp)) != 0) return (error); if (!(fp->f_ops->fo_flags & DFLAG_SEEKABLE)) { fdrop(fp, td); return (ESPIPE); } vp = fp->f_vnode; vfslocked = VFS_LOCK_GIANT(vp->v_mount); noneg = (vp->v_type != VCHR); offset = uap->offset; switch (uap->whence) { case L_INCR: if (noneg && (fp->f_offset < 0 || (offset > 0 && fp->f_offset > OFF_MAX - offset))) { error = EOVERFLOW; break; } offset += fp->f_offset; break; case L_XTND: vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td); error = VOP_GETATTR(vp, &vattr, cred, td); VOP_UNLOCK(vp, 0, td); if (error) break; if (noneg && (vattr.va_size > OFF_MAX || (offset > 0 && vattr.va_size > OFF_MAX - offset))) { error = EOVERFLOW; break; } offset += vattr.va_size; break; case L_SET: break; default: error = EINVAL; } if (error == 0 && noneg && offset < 0) error = EINVAL; if (error != 0) goto drop; fp->f_offset = offset; *(off_t *)(td->td_retval) = fp->f_offset; drop: fdrop(fp, td); VFS_UNLOCK_GIANT(vfslocked); return (error); } #if defined(COMPAT_43) /* * Reposition read/write file offset. */ #ifndef _SYS_SYSPROTO_H_ struct olseek_args { int fd; long offset; int whence; }; #endif int olseek(td, uap) struct thread *td; register struct olseek_args /* { int fd; long offset; int whence; } */ *uap; { struct lseek_args /* { int fd; int pad; off_t offset; int whence; } */ nuap; int error; nuap.fd = uap->fd; nuap.offset = uap->offset; nuap.whence = uap->whence; error = lseek(td, &nuap); return (error); } #endif /* COMPAT_43 */ /* * Check access permissions using passed credentials. */ static int vn_access(vp, user_flags, cred, td) struct vnode *vp; int user_flags; struct ucred *cred; struct thread *td; { int error, flags; /* Flags == 0 means only check for existence. */ error = 0; if (user_flags) { flags = 0; if (user_flags & R_OK) flags |= VREAD; if (user_flags & W_OK) flags |= VWRITE; if (user_flags & X_OK) flags |= VEXEC; #ifdef MAC error = mac_check_vnode_access(cred, vp, flags); if (error) return (error); #endif if ((flags & VWRITE) == 0 || (error = vn_writechk(vp)) == 0) error = VOP_ACCESS(vp, flags, cred, td); } return (error); } /* * Check access permissions using "real" credentials. */ #ifndef _SYS_SYSPROTO_H_ struct access_args { char *path; int flags; }; #endif int access(td, uap) struct thread *td; register struct access_args /* { char *path; int flags; } */ *uap; { return (kern_access(td, uap->path, UIO_USERSPACE, uap->flags)); } int kern_access(struct thread *td, char *path, enum uio_seg pathseg, int flags) { struct ucred *cred, *tmpcred; register struct vnode *vp; struct nameidata nd; int vfslocked; int error; /* * Create and modify a temporary credential instead of one that * is potentially shared. This could also mess up socket * buffer accounting which can run in an interrupt context. */ cred = td->td_ucred; tmpcred = crdup(cred); tmpcred->cr_uid = cred->cr_ruid; tmpcred->cr_groups[0] = cred->cr_rgid; td->td_ucred = tmpcred; NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | MPSAFE, pathseg, path, td); if ((error = namei(&nd)) != 0) goto out1; vfslocked = NDHASGIANT(&nd); vp = nd.ni_vp; error = vn_access(vp, flags, tmpcred, td); NDFREE(&nd, NDF_ONLY_PNBUF); vput(vp); VFS_UNLOCK_GIANT(vfslocked); out1: td->td_ucred = cred; crfree(tmpcred); return (error); } /* * Check access permissions using "effective" credentials. */ #ifndef _SYS_SYSPROTO_H_ struct eaccess_args { char *path; int flags; }; #endif int eaccess(td, uap) struct thread *td; register struct eaccess_args /* { char *path; int flags; } */ *uap; { struct nameidata nd; struct vnode *vp; int vfslocked; int error; NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | MPSAFE, UIO_USERSPACE, uap->path, td); if ((error = namei(&nd)) != 0) return (error); vp = nd.ni_vp; vfslocked = NDHASGIANT(&nd); error = vn_access(vp, uap->flags, td->td_ucred, td); NDFREE(&nd, NDF_ONLY_PNBUF); vput(vp); VFS_UNLOCK_GIANT(vfslocked); return (error); } #if defined(COMPAT_43) /* * Get file status; this version follows links. */ #ifndef _SYS_SYSPROTO_H_ struct ostat_args { char *path; struct ostat *ub; }; #endif int ostat(td, uap) struct thread *td; register struct ostat_args /* { char *path; struct ostat *ub; } */ *uap; { struct stat sb; struct ostat osb; int error; error = kern_stat(td, uap->path, UIO_USERSPACE, &sb); if (error) return (error); cvtstat(&sb, &osb); error = copyout(&osb, uap->ub, sizeof (osb)); return (error); } /* * Get file status; this version does not follow links. */ #ifndef _SYS_SYSPROTO_H_ struct olstat_args { char *path; struct ostat *ub; }; #endif int olstat(td, uap) struct thread *td; register struct olstat_args /* { char *path; struct ostat *ub; } */ *uap; { struct stat sb; struct ostat osb; int error; error = kern_lstat(td, uap->path, UIO_USERSPACE, &sb); if (error) return (error); cvtstat(&sb, &osb); error = copyout(&osb, uap->ub, sizeof (osb)); return (error); } /* * Convert from an old to a new stat structure. */ void cvtstat(st, ost) struct stat *st; struct ostat *ost; { ost->st_dev = st->st_dev; ost->st_ino = st->st_ino; ost->st_mode = st->st_mode; ost->st_nlink = st->st_nlink; ost->st_uid = st->st_uid; ost->st_gid = st->st_gid; ost->st_rdev = st->st_rdev; if (st->st_size < (quad_t)1 << 32) ost->st_size = st->st_size; else ost->st_size = -2; ost->st_atime = st->st_atime; ost->st_mtime = st->st_mtime; ost->st_ctime = st->st_ctime; ost->st_blksize = st->st_blksize; ost->st_blocks = st->st_blocks; ost->st_flags = st->st_flags; ost->st_gen = st->st_gen; } #endif /* COMPAT_43 */ /* * Get file status; this version follows links. */ #ifndef _SYS_SYSPROTO_H_ struct stat_args { char *path; struct stat *ub; }; #endif int stat(td, uap) struct thread *td; register struct stat_args /* { char *path; struct stat *ub; } */ *uap; { struct stat sb; int error; error = kern_stat(td, uap->path, UIO_USERSPACE, &sb); if (error == 0) error = copyout(&sb, uap->ub, sizeof (sb)); return (error); } int kern_stat(struct thread *td, char *path, enum uio_seg pathseg, struct stat *sbp) { struct nameidata nd; struct stat sb; int error, vfslocked; NDINIT(&nd, LOOKUP, FOLLOW | LOCKSHARED | LOCKLEAF | MPSAFE, pathseg, path, td); if ((error = namei(&nd)) != 0) return (error); vfslocked = NDHASGIANT(&nd); error = vn_stat(nd.ni_vp, &sb, td->td_ucred, NOCRED, td); NDFREE(&nd, NDF_ONLY_PNBUF); vput(nd.ni_vp); VFS_UNLOCK_GIANT(vfslocked); if (error) return (error); *sbp = sb; return (0); } /* * Get file status; this version does not follow links. */ #ifndef _SYS_SYSPROTO_H_ struct lstat_args { char *path; struct stat *ub; }; #endif int lstat(td, uap) struct thread *td; register struct lstat_args /* { char *path; struct stat *ub; } */ *uap; { struct stat sb; int error; error = kern_lstat(td, uap->path, UIO_USERSPACE, &sb); if (error == 0) error = copyout(&sb, uap->ub, sizeof (sb)); return (error); } int kern_lstat(struct thread *td, char *path, enum uio_seg pathseg, struct stat *sbp) { struct vnode *vp; struct stat sb; struct nameidata nd; int error, vfslocked; NDINIT(&nd, LOOKUP, NOFOLLOW | LOCKLEAF | LOCKSHARED | MPSAFE, pathseg, path, td); if ((error = namei(&nd)) != 0) return (error); vfslocked = NDHASGIANT(&nd); vp = nd.ni_vp; error = vn_stat(vp, &sb, td->td_ucred, NOCRED, td); NDFREE(&nd, NDF_ONLY_PNBUF); vput(vp); VFS_UNLOCK_GIANT(vfslocked); if (error) return (error); *sbp = sb; return (0); } /* * Implementation of the NetBSD [l]stat() functions. */ void cvtnstat(sb, nsb) struct stat *sb; struct nstat *nsb; { bzero(nsb, sizeof *nsb); nsb->st_dev = sb->st_dev; nsb->st_ino = sb->st_ino; nsb->st_mode = sb->st_mode; nsb->st_nlink = sb->st_nlink; nsb->st_uid = sb->st_uid; nsb->st_gid = sb->st_gid; nsb->st_rdev = sb->st_rdev; nsb->st_atimespec = sb->st_atimespec; nsb->st_mtimespec = sb->st_mtimespec; nsb->st_ctimespec = sb->st_ctimespec; nsb->st_size = sb->st_size; nsb->st_blocks = sb->st_blocks; nsb->st_blksize = sb->st_blksize; nsb->st_flags = sb->st_flags; nsb->st_gen = sb->st_gen; nsb->st_birthtimespec = sb->st_birthtimespec; } #ifndef _SYS_SYSPROTO_H_ struct nstat_args { char *path; struct nstat *ub; }; #endif int nstat(td, uap) struct thread *td; register struct nstat_args /* { char *path; struct nstat *ub; } */ *uap; { struct stat sb; struct nstat nsb; int error; error = kern_stat(td, uap->path, UIO_USERSPACE, &sb); if (error) return (error); cvtnstat(&sb, &nsb); error = copyout(&nsb, uap->ub, sizeof (nsb)); return (error); } /* * NetBSD lstat. Get file status; this version does not follow links. */ #ifndef _SYS_SYSPROTO_H_ struct lstat_args { char *path; struct stat *ub; }; #endif int nlstat(td, uap) struct thread *td; register struct nlstat_args /* { char *path; struct nstat *ub; } */ *uap; { struct stat sb; struct nstat nsb; int error; error = kern_lstat(td, uap->path, UIO_USERSPACE, &sb); if (error) return (error); cvtnstat(&sb, &nsb); error = copyout(&nsb, uap->ub, sizeof (nsb)); return (error); } /* * Get configurable pathname variables. */ #ifndef _SYS_SYSPROTO_H_ struct pathconf_args { char *path; int name; }; #endif int pathconf(td, uap) struct thread *td; register struct pathconf_args /* { char *path; int name; } */ *uap; { return (kern_pathconf(td, uap->path, UIO_USERSPACE, uap->name)); } int kern_pathconf(struct thread *td, char *path, enum uio_seg pathseg, int name) { struct nameidata nd; int error, vfslocked; NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | MPSAFE, pathseg, path, td); if ((error = namei(&nd)) != 0) return (error); vfslocked = NDHASGIANT(&nd); NDFREE(&nd, NDF_ONLY_PNBUF); /* If asynchronous I/O is available, it works for all files. */ if (name == _PC_ASYNC_IO) td->td_retval[0] = async_io_version; else error = VOP_PATHCONF(nd.ni_vp, name, td->td_retval); vput(nd.ni_vp); VFS_UNLOCK_GIANT(vfslocked); return (error); } /* * Return target name of a symbolic link. */ #ifndef _SYS_SYSPROTO_H_ struct readlink_args { char *path; char *buf; int count; }; #endif int readlink(td, uap) struct thread *td; register struct readlink_args /* { char *path; char *buf; int count; } */ *uap; { return (kern_readlink(td, uap->path, UIO_USERSPACE, uap->buf, UIO_USERSPACE, uap->count)); } int kern_readlink(struct thread *td, char *path, enum uio_seg pathseg, char *buf, enum uio_seg bufseg, int count) { register struct vnode *vp; struct iovec aiov; struct uio auio; int error; struct nameidata nd; int vfslocked; NDINIT(&nd, LOOKUP, NOFOLLOW | LOCKLEAF | MPSAFE, pathseg, path, td); if ((error = namei(&nd)) != 0) return (error); NDFREE(&nd, NDF_ONLY_PNBUF); vfslocked = NDHASGIANT(&nd); vp = nd.ni_vp; #ifdef MAC error = mac_check_vnode_readlink(td->td_ucred, vp); if (error) { vput(vp); VFS_UNLOCK_GIANT(vfslocked); return (error); } #endif if (vp->v_type != VLNK) error = EINVAL; else { aiov.iov_base = buf; aiov.iov_len = count; auio.uio_iov = &aiov; auio.uio_iovcnt = 1; auio.uio_offset = 0; auio.uio_rw = UIO_READ; auio.uio_segflg = bufseg; auio.uio_td = td; auio.uio_resid = count; error = VOP_READLINK(vp, &auio, td->td_ucred); } vput(vp); VFS_UNLOCK_GIANT(vfslocked); td->td_retval[0] = count - auio.uio_resid; return (error); } /* * Common implementation code for chflags() and fchflags(). */ static int setfflags(td, vp, flags) struct thread *td; struct vnode *vp; int flags; { int error; struct mount *mp; struct vattr vattr; /* * Prevent non-root users from setting flags on devices. When * a device is reused, users can retain ownership of the device * if they are allowed to set flags and programs assume that * chown can't fail when done as root. */ if (vp->v_type == VCHR || vp->v_type == VBLK) { error = suser_cred(td->td_ucred, SUSER_ALLOWJAIL); if (error) return (error); } if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0) return (error); VOP_LEASE(vp, td, td->td_ucred, LEASE_WRITE); vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td); VATTR_NULL(&vattr); vattr.va_flags = flags; #ifdef MAC error = mac_check_vnode_setflags(td->td_ucred, vp, vattr.va_flags); if (error == 0) #endif error = VOP_SETATTR(vp, &vattr, td->td_ucred, td); VOP_UNLOCK(vp, 0, td); vn_finished_write(mp); return (error); } /* * Change flags of a file given a path name. */ #ifndef _SYS_SYSPROTO_H_ struct chflags_args { char *path; int flags; }; #endif int chflags(td, uap) struct thread *td; register struct chflags_args /* { char *path; int flags; } */ *uap; { int error; struct nameidata nd; int vfslocked; NDINIT(&nd, LOOKUP, FOLLOW | MPSAFE, UIO_USERSPACE, uap->path, td); if ((error = namei(&nd)) != 0) return (error); NDFREE(&nd, NDF_ONLY_PNBUF); vfslocked = NDHASGIANT(&nd); error = setfflags(td, nd.ni_vp, uap->flags); vrele(nd.ni_vp); VFS_UNLOCK_GIANT(vfslocked); return (error); } /* * Same as chflags() but doesn't follow symlinks. */ int lchflags(td, uap) struct thread *td; register struct lchflags_args /* { char *path; int flags; } */ *uap; { int error; struct nameidata nd; int vfslocked; NDINIT(&nd, LOOKUP, NOFOLLOW | MPSAFE, UIO_USERSPACE, uap->path, td); if ((error = namei(&nd)) != 0) return (error); vfslocked = NDHASGIANT(&nd); NDFREE(&nd, NDF_ONLY_PNBUF); error = setfflags(td, nd.ni_vp, uap->flags); vrele(nd.ni_vp); VFS_UNLOCK_GIANT(vfslocked); return (error); } /* * Change flags of a file given a file descriptor. */ #ifndef _SYS_SYSPROTO_H_ struct fchflags_args { int fd; int flags; }; #endif int fchflags(td, uap) struct thread *td; register struct fchflags_args /* { int fd; int flags; } */ *uap; { struct file *fp; int vfslocked; int error; if ((error = getvnode(td->td_proc->p_fd, uap->fd, &fp)) != 0) return (error); vfslocked = VFS_LOCK_GIANT(fp->f_vnode->v_mount); error = setfflags(td, fp->f_vnode, uap->flags); VFS_UNLOCK_GIANT(vfslocked); fdrop(fp, td); return (error); } /* * Common implementation code for chmod(), lchmod() and fchmod(). */ static int setfmode(td, vp, mode) struct thread *td; struct vnode *vp; int mode; { int error; struct mount *mp; struct vattr vattr; if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0) return (error); VOP_LEASE(vp, td, td->td_ucred, LEASE_WRITE); vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td); VATTR_NULL(&vattr); vattr.va_mode = mode & ALLPERMS; #ifdef MAC error = mac_check_vnode_setmode(td->td_ucred, vp, vattr.va_mode); if (error == 0) #endif error = VOP_SETATTR(vp, &vattr, td->td_ucred, td); VOP_UNLOCK(vp, 0, td); vn_finished_write(mp); return (error); } /* * Change mode of a file given path name. */ #ifndef _SYS_SYSPROTO_H_ struct chmod_args { char *path; int mode; }; #endif int chmod(td, uap) struct thread *td; register struct chmod_args /* { char *path; int mode; } */ *uap; { return (kern_chmod(td, uap->path, UIO_USERSPACE, uap->mode)); } int kern_chmod(struct thread *td, char *path, enum uio_seg pathseg, int mode) { int error; struct nameidata nd; int vfslocked; NDINIT(&nd, LOOKUP, FOLLOW | MPSAFE, pathseg, path, td); if ((error = namei(&nd)) != 0) return (error); vfslocked = NDHASGIANT(&nd); NDFREE(&nd, NDF_ONLY_PNBUF); error = setfmode(td, nd.ni_vp, mode); vrele(nd.ni_vp); VFS_UNLOCK_GIANT(vfslocked); return (error); } /* * Change mode of a file given path name (don't follow links.) */ #ifndef _SYS_SYSPROTO_H_ struct lchmod_args { char *path; int mode; }; #endif int lchmod(td, uap) struct thread *td; register struct lchmod_args /* { char *path; int mode; } */ *uap; { int error; struct nameidata nd; int vfslocked; NDINIT(&nd, LOOKUP, NOFOLLOW | MPSAFE, UIO_USERSPACE, uap->path, td); if ((error = namei(&nd)) != 0) return (error); vfslocked = NDHASGIANT(&nd); NDFREE(&nd, NDF_ONLY_PNBUF); error = setfmode(td, nd.ni_vp, uap->mode); vrele(nd.ni_vp); VFS_UNLOCK_GIANT(vfslocked); return (error); } /* * Change mode of a file given a file descriptor. */ #ifndef _SYS_SYSPROTO_H_ struct fchmod_args { int fd; int mode; }; #endif int fchmod(td, uap) struct thread *td; register struct fchmod_args /* { int fd; int mode; } */ *uap; { struct file *fp; int vfslocked; int error; if ((error = getvnode(td->td_proc->p_fd, uap->fd, &fp)) != 0) return (error); vfslocked = VFS_LOCK_GIANT(fp->f_vnode->v_mount); error = setfmode(td, fp->f_vnode, uap->mode); VFS_UNLOCK_GIANT(vfslocked); fdrop(fp, td); return (error); } /* * Common implementation for chown(), lchown(), and fchown() */ static int setfown(td, vp, uid, gid) struct thread *td; struct vnode *vp; uid_t uid; gid_t gid; { int error; struct mount *mp; struct vattr vattr; if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0) return (error); VOP_LEASE(vp, td, td->td_ucred, LEASE_WRITE); vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td); VATTR_NULL(&vattr); vattr.va_uid = uid; vattr.va_gid = gid; #ifdef MAC error = mac_check_vnode_setowner(td->td_ucred, vp, vattr.va_uid, vattr.va_gid); if (error == 0) #endif error = VOP_SETATTR(vp, &vattr, td->td_ucred, td); VOP_UNLOCK(vp, 0, td); vn_finished_write(mp); return (error); } /* * Set ownership given a path name. */ #ifndef _SYS_SYSPROTO_H_ struct chown_args { char *path; int uid; int gid; }; #endif int chown(td, uap) struct thread *td; register struct chown_args /* { char *path; int uid; int gid; } */ *uap; { return (kern_chown(td, uap->path, UIO_USERSPACE, uap->uid, uap->gid)); } int kern_chown(struct thread *td, char *path, enum uio_seg pathseg, int uid, int gid) { int error; struct nameidata nd; int vfslocked; NDINIT(&nd, LOOKUP, FOLLOW | MPSAFE, pathseg, path, td); if ((error = namei(&nd)) != 0) return (error); vfslocked = NDHASGIANT(&nd); NDFREE(&nd, NDF_ONLY_PNBUF); error = setfown(td, nd.ni_vp, uid, gid); vrele(nd.ni_vp); VFS_UNLOCK_GIANT(vfslocked); return (error); } /* * Set ownership given a path name, do not cross symlinks. */ #ifndef _SYS_SYSPROTO_H_ struct lchown_args { char *path; int uid; int gid; }; #endif int lchown(td, uap) struct thread *td; register struct lchown_args /* { char *path; int uid; int gid; } */ *uap; { return (kern_lchown(td, uap->path, UIO_USERSPACE, uap->uid, uap->gid)); } int kern_lchown(struct thread *td, char *path, enum uio_seg pathseg, int uid, int gid) { int error; struct nameidata nd; int vfslocked; NDINIT(&nd, LOOKUP, NOFOLLOW | MPSAFE, pathseg, path, td); if ((error = namei(&nd)) != 0) return (error); vfslocked = NDHASGIANT(&nd); NDFREE(&nd, NDF_ONLY_PNBUF); error = setfown(td, nd.ni_vp, uid, gid); vrele(nd.ni_vp); VFS_UNLOCK_GIANT(vfslocked); return (error); } /* * Set ownership given a file descriptor. */ #ifndef _SYS_SYSPROTO_H_ struct fchown_args { int fd; int uid; int gid; }; #endif int fchown(td, uap) struct thread *td; register struct fchown_args /* { int fd; int uid; int gid; } */ *uap; { struct file *fp; int vfslocked; int error; if ((error = getvnode(td->td_proc->p_fd, uap->fd, &fp)) != 0) return (error); vfslocked = VFS_LOCK_GIANT(fp->f_vnode->v_mount); error = setfown(td, fp->f_vnode, uap->uid, uap->gid); VFS_UNLOCK_GIANT(vfslocked); fdrop(fp, td); return (error); } /* * Common implementation code for utimes(), lutimes(), and futimes(). */ static int getutimes(usrtvp, tvpseg, tsp) const struct timeval *usrtvp; enum uio_seg tvpseg; struct timespec *tsp; { struct timeval tv[2]; const struct timeval *tvp; int error; if (usrtvp == NULL) { microtime(&tv[0]); TIMEVAL_TO_TIMESPEC(&tv[0], &tsp[0]); tsp[1] = tsp[0]; } else { if (tvpseg == UIO_SYSSPACE) { tvp = usrtvp; } else { if ((error = copyin(usrtvp, tv, sizeof(tv))) != 0) return (error); tvp = tv; } if (tvp[0].tv_usec < 0 || tvp[0].tv_usec >= 1000000 || tvp[1].tv_usec < 0 || tvp[1].tv_usec >= 1000000) return (EINVAL); TIMEVAL_TO_TIMESPEC(&tvp[0], &tsp[0]); TIMEVAL_TO_TIMESPEC(&tvp[1], &tsp[1]); } return (0); } /* * Common implementation code for utimes(), lutimes(), and futimes(). */ static int setutimes(td, vp, ts, numtimes, nullflag) struct thread *td; struct vnode *vp; const struct timespec *ts; int numtimes; int nullflag; { int error, setbirthtime; struct mount *mp; struct vattr vattr; if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0) return (error); VOP_LEASE(vp, td, td->td_ucred, LEASE_WRITE); vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td); setbirthtime = 0; if (numtimes < 3 && VOP_GETATTR(vp, &vattr, td->td_ucred, td) == 0 && timespeccmp(&ts[1], &vattr.va_birthtime, < )) setbirthtime = 1; VATTR_NULL(&vattr); vattr.va_atime = ts[0]; vattr.va_mtime = ts[1]; if (setbirthtime) vattr.va_birthtime = ts[1]; if (numtimes > 2) vattr.va_birthtime = ts[2]; if (nullflag) vattr.va_vaflags |= VA_UTIMES_NULL; #ifdef MAC error = mac_check_vnode_setutimes(td->td_ucred, vp, vattr.va_atime, vattr.va_mtime); #endif if (error == 0) error = VOP_SETATTR(vp, &vattr, td->td_ucred, td); VOP_UNLOCK(vp, 0, td); vn_finished_write(mp); return (error); } /* * Set the access and modification times of a file. */ #ifndef _SYS_SYSPROTO_H_ struct utimes_args { char *path; struct timeval *tptr; }; #endif int utimes(td, uap) struct thread *td; register struct utimes_args /* { char *path; struct timeval *tptr; } */ *uap; { return (kern_utimes(td, uap->path, UIO_USERSPACE, uap->tptr, UIO_USERSPACE)); } int kern_utimes(struct thread *td, char *path, enum uio_seg pathseg, struct timeval *tptr, enum uio_seg tptrseg) { struct timespec ts[2]; int error; struct nameidata nd; int vfslocked; if ((error = getutimes(tptr, tptrseg, ts)) != 0) return (error); NDINIT(&nd, LOOKUP, FOLLOW | MPSAFE, pathseg, path, td); if ((error = namei(&nd)) != 0) return (error); vfslocked = NDHASGIANT(&nd); NDFREE(&nd, NDF_ONLY_PNBUF); error = setutimes(td, nd.ni_vp, ts, 2, tptr == NULL); vrele(nd.ni_vp); VFS_UNLOCK_GIANT(vfslocked); return (error); } /* * Set the access and modification times of a file. */ #ifndef _SYS_SYSPROTO_H_ struct lutimes_args { char *path; struct timeval *tptr; }; #endif int lutimes(td, uap) struct thread *td; register struct lutimes_args /* { char *path; struct timeval *tptr; } */ *uap; { return (kern_lutimes(td, uap->path, UIO_USERSPACE, uap->tptr, UIO_USERSPACE)); } int kern_lutimes(struct thread *td, char *path, enum uio_seg pathseg, struct timeval *tptr, enum uio_seg tptrseg) { struct timespec ts[2]; int error; struct nameidata nd; int vfslocked; if ((error = getutimes(tptr, tptrseg, ts)) != 0) return (error); NDINIT(&nd, LOOKUP, NOFOLLOW | MPSAFE, pathseg, path, td); if ((error = namei(&nd)) != 0) return (error); vfslocked = NDHASGIANT(&nd); NDFREE(&nd, NDF_ONLY_PNBUF); error = setutimes(td, nd.ni_vp, ts, 2, tptr == NULL); vrele(nd.ni_vp); VFS_UNLOCK_GIANT(vfslocked); return (error); } /* * Set the access and modification times of a file. */ #ifndef _SYS_SYSPROTO_H_ struct futimes_args { int fd; struct timeval *tptr; }; #endif int futimes(td, uap) struct thread *td; register struct futimes_args /* { int fd; struct timeval *tptr; } */ *uap; { return (kern_futimes(td, uap->fd, uap->tptr, UIO_USERSPACE)); } int kern_futimes(struct thread *td, int fd, struct timeval *tptr, enum uio_seg tptrseg) { struct timespec ts[2]; struct file *fp; int vfslocked; int error; if ((error = getutimes(tptr, tptrseg, ts)) != 0) return (error); if ((error = getvnode(td->td_proc->p_fd, fd, &fp)) != 0) return (error); vfslocked = VFS_LOCK_GIANT(fp->f_vnode->v_mount); error = setutimes(td, fp->f_vnode, ts, 2, tptr == NULL); VFS_UNLOCK_GIANT(vfslocked); fdrop(fp, td); return (error); } /* * Truncate a file given its path name. */ #ifndef _SYS_SYSPROTO_H_ struct truncate_args { char *path; int pad; off_t length; }; #endif int truncate(td, uap) struct thread *td; register struct truncate_args /* { char *path; int pad; off_t length; } */ *uap; { return (kern_truncate(td, uap->path, UIO_USERSPACE, uap->length)); } int kern_truncate(struct thread *td, char *path, enum uio_seg pathseg, off_t length) { struct mount *mp; struct vnode *vp; struct vattr vattr; int error; struct nameidata nd; int vfslocked; if (length < 0) return(EINVAL); NDINIT(&nd, LOOKUP, FOLLOW | MPSAFE, pathseg, path, td); if ((error = namei(&nd)) != 0) return (error); vfslocked = NDHASGIANT(&nd); vp = nd.ni_vp; if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0) { vrele(vp); VFS_UNLOCK_GIANT(vfslocked); return (error); } NDFREE(&nd, NDF_ONLY_PNBUF); VOP_LEASE(vp, td, td->td_ucred, LEASE_WRITE); vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td); if (vp->v_type == VDIR) error = EISDIR; #ifdef MAC else if ((error = mac_check_vnode_write(td->td_ucred, NOCRED, vp))) { } #endif else if ((error = vn_writechk(vp)) == 0 && (error = VOP_ACCESS(vp, VWRITE, td->td_ucred, td)) == 0) { VATTR_NULL(&vattr); vattr.va_size = length; error = VOP_SETATTR(vp, &vattr, td->td_ucred, td); } vput(vp); vn_finished_write(mp); VFS_UNLOCK_GIANT(vfslocked); return (error); } /* * Truncate a file given a file descriptor. */ #ifndef _SYS_SYSPROTO_H_ struct ftruncate_args { int fd; int pad; off_t length; }; #endif int ftruncate(td, uap) struct thread *td; register struct ftruncate_args /* { int fd; int pad; off_t length; } */ *uap; { struct mount *mp; struct vattr vattr; struct vnode *vp; struct file *fp; int vfslocked; int error; if (uap->length < 0) return(EINVAL); if ((error = getvnode(td->td_proc->p_fd, uap->fd, &fp)) != 0) return (error); if ((fp->f_flag & FWRITE) == 0) { fdrop(fp, td); return (EINVAL); } vp = fp->f_vnode; vfslocked = VFS_LOCK_GIANT(vp->v_mount); if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0) goto drop; VOP_LEASE(vp, td, td->td_ucred, LEASE_WRITE); vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td); if (vp->v_type == VDIR) error = EISDIR; #ifdef MAC else if ((error = mac_check_vnode_write(td->td_ucred, fp->f_cred, vp))) { } #endif else if ((error = vn_writechk(vp)) == 0) { VATTR_NULL(&vattr); vattr.va_size = uap->length; error = VOP_SETATTR(vp, &vattr, fp->f_cred, td); } VOP_UNLOCK(vp, 0, td); vn_finished_write(mp); drop: VFS_UNLOCK_GIANT(vfslocked); fdrop(fp, td); return (error); } #if defined(COMPAT_43) /* * Truncate a file given its path name. */ #ifndef _SYS_SYSPROTO_H_ struct otruncate_args { char *path; long length; }; #endif int otruncate(td, uap) struct thread *td; register struct otruncate_args /* { char *path; long length; } */ *uap; { struct truncate_args /* { char *path; int pad; off_t length; } */ nuap; nuap.path = uap->path; nuap.length = uap->length; return (truncate(td, &nuap)); } /* * Truncate a file given a file descriptor. */ #ifndef _SYS_SYSPROTO_H_ struct oftruncate_args { int fd; long length; }; #endif int oftruncate(td, uap) struct thread *td; register struct oftruncate_args /* { int fd; long length; } */ *uap; { struct ftruncate_args /* { int fd; int pad; off_t length; } */ nuap; nuap.fd = uap->fd; nuap.length = uap->length; return (ftruncate(td, &nuap)); } #endif /* COMPAT_43 */ /* * Sync an open file. */ #ifndef _SYS_SYSPROTO_H_ struct fsync_args { int fd; }; #endif int fsync(td, uap) struct thread *td; struct fsync_args /* { int fd; } */ *uap; { struct vnode *vp; struct mount *mp; struct file *fp; int vfslocked; int error; if ((error = getvnode(td->td_proc->p_fd, uap->fd, &fp)) != 0) return (error); vp = fp->f_vnode; vfslocked = VFS_LOCK_GIANT(vp->v_mount); if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0) goto drop; vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td); if (vp->v_object != NULL) { VM_OBJECT_LOCK(vp->v_object); vm_object_page_clean(vp->v_object, 0, 0, 0); VM_OBJECT_UNLOCK(vp->v_object); } error = VOP_FSYNC(vp, MNT_WAIT, td); VOP_UNLOCK(vp, 0, td); vn_finished_write(mp); drop: VFS_UNLOCK_GIANT(vfslocked); fdrop(fp, td); return (error); } /* * Rename files. Source and destination must either both be directories, * or both not be directories. If target is a directory, it must be empty. */ #ifndef _SYS_SYSPROTO_H_ struct rename_args { char *from; char *to; }; #endif int rename(td, uap) struct thread *td; register struct rename_args /* { char *from; char *to; } */ *uap; { return (kern_rename(td, uap->from, uap->to, UIO_USERSPACE)); } int kern_rename(struct thread *td, char *from, char *to, enum uio_seg pathseg) { struct mount *mp = NULL; struct vnode *tvp, *fvp, *tdvp; struct nameidata fromnd, tond; int tvfslocked; int fvfslocked; int error; bwillwrite(); #ifdef MAC NDINIT(&fromnd, DELETE, LOCKPARENT | LOCKLEAF | SAVESTART | MPSAFE, pathseg, from, td); #else NDINIT(&fromnd, DELETE, WANTPARENT | SAVESTART | MPSAFE, pathseg, from, td); #endif if ((error = namei(&fromnd)) != 0) return (error); fvfslocked = NDHASGIANT(&fromnd); tvfslocked = 0; #ifdef MAC error = mac_check_vnode_rename_from(td->td_ucred, fromnd.ni_dvp, fromnd.ni_vp, &fromnd.ni_cnd); VOP_UNLOCK(fromnd.ni_dvp, 0, td); VOP_UNLOCK(fromnd.ni_vp, 0, td); #endif fvp = fromnd.ni_vp; if (error == 0) error = vn_start_write(fvp, &mp, V_WAIT | PCATCH); if (error != 0) { NDFREE(&fromnd, NDF_ONLY_PNBUF); vrele(fromnd.ni_dvp); vrele(fvp); goto out1; } NDINIT(&tond, RENAME, LOCKPARENT | LOCKLEAF | NOCACHE | SAVESTART | MPSAFE, pathseg, to, td); if (fromnd.ni_vp->v_type == VDIR) tond.ni_cnd.cn_flags |= WILLBEDIR; if ((error = namei(&tond)) != 0) { /* Translate error code for rename("dir1", "dir2/."). */ if (error == EISDIR && fvp->v_type == VDIR) error = EINVAL; NDFREE(&fromnd, NDF_ONLY_PNBUF); vrele(fromnd.ni_dvp); vrele(fvp); vn_finished_write(mp); goto out1; } tvfslocked = NDHASGIANT(&tond); tdvp = tond.ni_dvp; tvp = tond.ni_vp; if (tvp != NULL) { if (fvp->v_type == VDIR && tvp->v_type != VDIR) { error = ENOTDIR; goto out; } else if (fvp->v_type != VDIR && tvp->v_type == VDIR) { error = EISDIR; goto out; } } if (fvp == tdvp) error = EINVAL; /* * If the source is the same as the destination (that is, if they * are links to the same vnode), then there is nothing to do. */ if (fvp == tvp) error = -1; #ifdef MAC else error = mac_check_vnode_rename_to(td->td_ucred, tdvp, tond.ni_vp, fromnd.ni_dvp == tdvp, &tond.ni_cnd); #endif out: if (!error) { VOP_LEASE(tdvp, td, td->td_ucred, LEASE_WRITE); if (fromnd.ni_dvp != tdvp) { VOP_LEASE(fromnd.ni_dvp, td, td->td_ucred, LEASE_WRITE); } if (tvp) { VOP_LEASE(tvp, td, td->td_ucred, LEASE_WRITE); } error = VOP_RENAME(fromnd.ni_dvp, fromnd.ni_vp, &fromnd.ni_cnd, tond.ni_dvp, tond.ni_vp, &tond.ni_cnd); NDFREE(&fromnd, NDF_ONLY_PNBUF); NDFREE(&tond, NDF_ONLY_PNBUF); } else { NDFREE(&fromnd, NDF_ONLY_PNBUF); NDFREE(&tond, NDF_ONLY_PNBUF); if (tvp) vput(tvp); if (tdvp == tvp) vrele(tdvp); else vput(tdvp); vrele(fromnd.ni_dvp); vrele(fvp); } vrele(tond.ni_startdir); vn_finished_write(mp); out1: if (fromnd.ni_startdir) vrele(fromnd.ni_startdir); VFS_UNLOCK_GIANT(fvfslocked); VFS_UNLOCK_GIANT(tvfslocked); if (error == -1) return (0); return (error); } /* * Make a directory file. */ #ifndef _SYS_SYSPROTO_H_ struct mkdir_args { char *path; int mode; }; #endif int mkdir(td, uap) struct thread *td; register struct mkdir_args /* { char *path; int mode; } */ *uap; { return (kern_mkdir(td, uap->path, UIO_USERSPACE, uap->mode)); } int kern_mkdir(struct thread *td, char *path, enum uio_seg segflg, int mode) { struct mount *mp; struct vnode *vp; struct vattr vattr; int error; struct nameidata nd; int vfslocked; restart: bwillwrite(); NDINIT(&nd, CREATE, LOCKPARENT | SAVENAME | MPSAFE, segflg, path, td); nd.ni_cnd.cn_flags |= WILLBEDIR; if ((error = namei(&nd)) != 0) return (error); vfslocked = NDHASGIANT(&nd); vp = nd.ni_vp; if (vp != NULL) { NDFREE(&nd, NDF_ONLY_PNBUF); /* * XXX namei called with LOCKPARENT but not LOCKLEAF has * the strange behaviour of leaving the vnode unlocked * if the target is the same vnode as the parent. */ if (vp == nd.ni_dvp) vrele(nd.ni_dvp); else vput(nd.ni_dvp); vrele(vp); VFS_UNLOCK_GIANT(vfslocked); return (EEXIST); } if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) { NDFREE(&nd, NDF_ONLY_PNBUF); vput(nd.ni_dvp); VFS_UNLOCK_GIANT(vfslocked); if ((error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH)) != 0) return (error); goto restart; } VATTR_NULL(&vattr); vattr.va_type = VDIR; FILEDESC_LOCK_FAST(td->td_proc->p_fd); vattr.va_mode = (mode & ACCESSPERMS) &~ td->td_proc->p_fd->fd_cmask; FILEDESC_UNLOCK_FAST(td->td_proc->p_fd); #ifdef MAC error = mac_check_vnode_create(td->td_ucred, nd.ni_dvp, &nd.ni_cnd, &vattr); if (error) goto out; #endif VOP_LEASE(nd.ni_dvp, td, td->td_ucred, LEASE_WRITE); error = VOP_MKDIR(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr); #ifdef MAC out: #endif NDFREE(&nd, NDF_ONLY_PNBUF); vput(nd.ni_dvp); if (!error) vput(nd.ni_vp); vn_finished_write(mp); VFS_UNLOCK_GIANT(vfslocked); return (error); } /* * Remove a directory file. */ #ifndef _SYS_SYSPROTO_H_ struct rmdir_args { char *path; }; #endif int rmdir(td, uap) struct thread *td; struct rmdir_args /* { char *path; } */ *uap; { return (kern_rmdir(td, uap->path, UIO_USERSPACE)); } int kern_rmdir(struct thread *td, char *path, enum uio_seg pathseg) { struct mount *mp; struct vnode *vp; int error; struct nameidata nd; int vfslocked; restart: bwillwrite(); NDINIT(&nd, DELETE, LOCKPARENT | LOCKLEAF | MPSAFE, pathseg, path, td); if ((error = namei(&nd)) != 0) return (error); vfslocked = NDHASGIANT(&nd); vp = nd.ni_vp; if (vp->v_type != VDIR) { error = ENOTDIR; goto out; } /* * No rmdir "." please. */ if (nd.ni_dvp == vp) { error = EINVAL; goto out; } /* * The root of a mounted filesystem cannot be deleted. */ if (vp->v_vflag & VV_ROOT) { error = EBUSY; goto out; } #ifdef MAC error = mac_check_vnode_delete(td->td_ucred, nd.ni_dvp, vp, &nd.ni_cnd); if (error) goto out; #endif if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) { NDFREE(&nd, NDF_ONLY_PNBUF); vput(vp); if (nd.ni_dvp == vp) vrele(nd.ni_dvp); else vput(nd.ni_dvp); VFS_UNLOCK_GIANT(vfslocked); if ((error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH)) != 0) return (error); goto restart; } VOP_LEASE(nd.ni_dvp, td, td->td_ucred, LEASE_WRITE); VOP_LEASE(vp, td, td->td_ucred, LEASE_WRITE); error = VOP_RMDIR(nd.ni_dvp, nd.ni_vp, &nd.ni_cnd); vn_finished_write(mp); out: NDFREE(&nd, NDF_ONLY_PNBUF); vput(vp); if (nd.ni_dvp == vp) vrele(nd.ni_dvp); else vput(nd.ni_dvp); VFS_UNLOCK_GIANT(vfslocked); return (error); } #ifdef COMPAT_43 /* * Read a block of directory entries in a filesystem independent format. */ #ifndef _SYS_SYSPROTO_H_ struct ogetdirentries_args { int fd; char *buf; u_int count; long *basep; }; #endif int ogetdirentries(td, uap) struct thread *td; register struct ogetdirentries_args /* { int fd; char *buf; u_int count; long *basep; } */ *uap; { struct vnode *vp; struct file *fp; struct uio auio, kuio; struct iovec aiov, kiov; struct dirent *dp, *edp; caddr_t dirbuf; int error, eofflag, readcnt; long loff; /* XXX arbitrary sanity limit on `count'. */ if (uap->count > 64 * 1024) return (EINVAL); if ((error = getvnode(td->td_proc->p_fd, uap->fd, &fp)) != 0) return (error); if ((fp->f_flag & FREAD) == 0) { fdrop(fp, td); return (EBADF); } vp = fp->f_vnode; unionread: if (vp->v_type != VDIR) { fdrop(fp, td); return (EINVAL); } aiov.iov_base = uap->buf; aiov.iov_len = uap->count; auio.uio_iov = &aiov; auio.uio_iovcnt = 1; auio.uio_rw = UIO_READ; auio.uio_segflg = UIO_USERSPACE; auio.uio_td = td; auio.uio_resid = uap->count; vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td); loff = auio.uio_offset = fp->f_offset; #ifdef MAC error = mac_check_vnode_readdir(td->td_ucred, vp); if (error) { VOP_UNLOCK(vp, 0, td); fdrop(fp, td); return (error); } #endif # if (BYTE_ORDER != LITTLE_ENDIAN) if (vp->v_mount->mnt_maxsymlinklen <= 0) { error = VOP_READDIR(vp, &auio, fp->f_cred, &eofflag, NULL, NULL); fp->f_offset = auio.uio_offset; } else # endif { kuio = auio; kuio.uio_iov = &kiov; kuio.uio_segflg = UIO_SYSSPACE; kiov.iov_len = uap->count; MALLOC(dirbuf, caddr_t, uap->count, M_TEMP, M_WAITOK); kiov.iov_base = dirbuf; error = VOP_READDIR(vp, &kuio, fp->f_cred, &eofflag, NULL, NULL); fp->f_offset = kuio.uio_offset; if (error == 0) { readcnt = uap->count - kuio.uio_resid; edp = (struct dirent *)&dirbuf[readcnt]; for (dp = (struct dirent *)dirbuf; dp < edp; ) { # if (BYTE_ORDER == LITTLE_ENDIAN) /* * The expected low byte of * dp->d_namlen is our dp->d_type. * The high MBZ byte of dp->d_namlen * is our dp->d_namlen. */ dp->d_type = dp->d_namlen; dp->d_namlen = 0; # else /* * The dp->d_type is the high byte * of the expected dp->d_namlen, * so must be zero'ed. */ dp->d_type = 0; # endif if (dp->d_reclen > 0) { dp = (struct dirent *) ((char *)dp + dp->d_reclen); } else { error = EIO; break; } } if (dp >= edp) error = uiomove(dirbuf, readcnt, &auio); } FREE(dirbuf, M_TEMP); } VOP_UNLOCK(vp, 0, td); if (error) { fdrop(fp, td); return (error); } if (uap->count == auio.uio_resid) { if (union_dircheckp) { error = union_dircheckp(td, &vp, fp); if (error == -1) goto unionread; if (error) { fdrop(fp, td); return (error); } } /* * XXX We could delay dropping the lock above but * union_dircheckp complicates things. */ vn_lock(vp, LK_EXCLUSIVE|LK_RETRY, td); if ((vp->v_vflag & VV_ROOT) && (vp->v_mount->mnt_flag & MNT_UNION)) { struct vnode *tvp = vp; vp = vp->v_mount->mnt_vnodecovered; VREF(vp); fp->f_vnode = vp; fp->f_data = vp; fp->f_offset = 0; vput(tvp); goto unionread; } VOP_UNLOCK(vp, 0, td); } error = copyout(&loff, uap->basep, sizeof(long)); fdrop(fp, td); td->td_retval[0] = uap->count - auio.uio_resid; return (error); } #endif /* COMPAT_43 */ /* * Read a block of directory entries in a filesystem independent format. */ #ifndef _SYS_SYSPROTO_H_ struct getdirentries_args { int fd; char *buf; u_int count; long *basep; }; #endif int getdirentries(td, uap) struct thread *td; register struct getdirentries_args /* { int fd; char *buf; u_int count; long *basep; } */ *uap; { struct vnode *vp; struct file *fp; struct uio auio; struct iovec aiov; int vfslocked; long loff; int error, eofflag; if ((error = getvnode(td->td_proc->p_fd, uap->fd, &fp)) != 0) return (error); if ((fp->f_flag & FREAD) == 0) { fdrop(fp, td); return (EBADF); } vp = fp->f_vnode; unionread: vfslocked = VFS_LOCK_GIANT(vp->v_mount); if (vp->v_type != VDIR) { error = EINVAL; goto fail; } aiov.iov_base = uap->buf; aiov.iov_len = uap->count; auio.uio_iov = &aiov; auio.uio_iovcnt = 1; auio.uio_rw = UIO_READ; auio.uio_segflg = UIO_USERSPACE; auio.uio_td = td; auio.uio_resid = uap->count; /* vn_lock(vp, LK_SHARED | LK_RETRY, td); */ vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td); loff = auio.uio_offset = fp->f_offset; #ifdef MAC error = mac_check_vnode_readdir(td->td_ucred, vp); if (error == 0) #endif error = VOP_READDIR(vp, &auio, fp->f_cred, &eofflag, NULL, NULL); fp->f_offset = auio.uio_offset; VOP_UNLOCK(vp, 0, td); if (error) goto fail; if (uap->count == auio.uio_resid) { if (union_dircheckp) { error = union_dircheckp(td, &vp, fp); if (error == -1) { VFS_UNLOCK_GIANT(vfslocked); goto unionread; } if (error) goto fail; } /* * XXX We could delay dropping the lock above but * union_dircheckp complicates things. */ vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td); if ((vp->v_vflag & VV_ROOT) && (vp->v_mount->mnt_flag & MNT_UNION)) { struct vnode *tvp = vp; vp = vp->v_mount->mnt_vnodecovered; VREF(vp); fp->f_vnode = vp; fp->f_data = vp; fp->f_offset = 0; vput(tvp); VFS_UNLOCK_GIANT(vfslocked); goto unionread; } VOP_UNLOCK(vp, 0, td); } if (uap->basep != NULL) { error = copyout(&loff, uap->basep, sizeof(long)); } td->td_retval[0] = uap->count - auio.uio_resid; fail: VFS_UNLOCK_GIANT(vfslocked); fdrop(fp, td); return (error); } #ifndef _SYS_SYSPROTO_H_ struct getdents_args { int fd; char *buf; size_t count; }; #endif int getdents(td, uap) struct thread *td; register struct getdents_args /* { int fd; char *buf; u_int count; } */ *uap; { struct getdirentries_args ap; ap.fd = uap->fd; ap.buf = uap->buf; ap.count = uap->count; ap.basep = NULL; return (getdirentries(td, &ap)); } /* * Set the mode mask for creation of filesystem nodes. * * MP SAFE */ #ifndef _SYS_SYSPROTO_H_ struct umask_args { int newmask; }; #endif int umask(td, uap) struct thread *td; struct umask_args /* { int newmask; } */ *uap; { register struct filedesc *fdp; FILEDESC_LOCK_FAST(td->td_proc->p_fd); fdp = td->td_proc->p_fd; td->td_retval[0] = fdp->fd_cmask; fdp->fd_cmask = uap->newmask & ALLPERMS; FILEDESC_UNLOCK_FAST(td->td_proc->p_fd); return (0); } /* * Void all references to file by ripping underlying filesystem * away from vnode. */ #ifndef _SYS_SYSPROTO_H_ struct revoke_args { char *path; }; #endif int revoke(td, uap) struct thread *td; register struct revoke_args /* { char *path; } */ *uap; { struct vnode *vp; struct vattr vattr; int error; struct nameidata nd; int vfslocked; NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | MPSAFE, UIO_USERSPACE, uap->path, td); if ((error = namei(&nd)) != 0) return (error); vfslocked = NDHASGIANT(&nd); vp = nd.ni_vp; NDFREE(&nd, NDF_ONLY_PNBUF); if (vp->v_type != VCHR) { error = EINVAL; goto out; } #ifdef MAC error = mac_check_vnode_revoke(td->td_ucred, vp); if (error) goto out; #endif error = VOP_GETATTR(vp, &vattr, td->td_ucred, td); if (error) goto out; if (td->td_ucred->cr_uid != vattr.va_uid) { error = suser_cred(td->td_ucred, SUSER_ALLOWJAIL); if (error) goto out; } if (vcount(vp) > 1) VOP_REVOKE(vp, REVOKEALL); out: vput(vp); VFS_UNLOCK_GIANT(vfslocked); return (error); } /* * Convert a user file descriptor to a kernel file entry. * A reference on the file entry is held upon returning. */ int getvnode(fdp, fd, fpp) struct filedesc *fdp; int fd; struct file **fpp; { int error; struct file *fp; fp = NULL; if (fdp == NULL) error = EBADF; else { FILEDESC_LOCK(fdp); if ((u_int)fd >= fdp->fd_nfiles || (fp = fdp->fd_ofiles[fd]) == NULL) error = EBADF; else if (fp->f_vnode == NULL) { fp = NULL; error = EINVAL; } else { fhold(fp); error = 0; } FILEDESC_UNLOCK(fdp); } *fpp = fp; return (error); } /* * Get (NFS) file handle */ #ifndef _SYS_SYSPROTO_H_ struct lgetfh_args { char *fname; fhandle_t *fhp; }; #endif int lgetfh(td, uap) struct thread *td; register struct lgetfh_args *uap; { struct nameidata nd; fhandle_t fh; register struct vnode *vp; int vfslocked; int error; error = suser(td); if (error) return (error); NDINIT(&nd, LOOKUP, NOFOLLOW | LOCKLEAF | MPSAFE, UIO_USERSPACE, uap->fname, td); error = namei(&nd); if (error) return (error); vfslocked = NDHASGIANT(&nd); NDFREE(&nd, NDF_ONLY_PNBUF); vp = nd.ni_vp; bzero(&fh, sizeof(fh)); fh.fh_fsid = vp->v_mount->mnt_stat.f_fsid; error = VFS_VPTOFH(vp, &fh.fh_fid); vput(vp); VFS_UNLOCK_GIANT(vfslocked); if (error) return (error); error = copyout(&fh, uap->fhp, sizeof (fh)); return (error); } #ifndef _SYS_SYSPROTO_H_ struct getfh_args { char *fname; fhandle_t *fhp; }; #endif int getfh(td, uap) struct thread *td; register struct getfh_args *uap; { struct nameidata nd; fhandle_t fh; register struct vnode *vp; int vfslocked; int error; error = suser(td); if (error) return (error); NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | MPSAFE, UIO_USERSPACE, uap->fname, td); error = namei(&nd); if (error) return (error); vfslocked = NDHASGIANT(&nd); NDFREE(&nd, NDF_ONLY_PNBUF); vp = nd.ni_vp; bzero(&fh, sizeof(fh)); fh.fh_fsid = vp->v_mount->mnt_stat.f_fsid; error = VFS_VPTOFH(vp, &fh.fh_fid); vput(vp); VFS_UNLOCK_GIANT(vfslocked); if (error) return (error); error = copyout(&fh, uap->fhp, sizeof (fh)); return (error); } /* * syscall for the rpc.lockd to use to translate a NFS file handle into * an open descriptor. * * warning: do not remove the suser() call or this becomes one giant * security hole. * * MP SAFE */ #ifndef _SYS_SYSPROTO_H_ struct fhopen_args { const struct fhandle *u_fhp; int flags; }; #endif int fhopen(td, uap) struct thread *td; struct fhopen_args /* { const struct fhandle *u_fhp; int flags; } */ *uap; { struct proc *p = td->td_proc; struct mount *mp; struct vnode *vp; struct fhandle fhp; struct vattr vat; struct vattr *vap = &vat; struct flock lf; struct file *fp; register struct filedesc *fdp = p->p_fd; int fmode, mode, error, type; struct file *nfp; int indx; error = suser(td); if (error) return (error); fmode = FFLAGS(uap->flags); /* why not allow a non-read/write open for our lockd? */ if (((fmode & (FREAD | FWRITE)) == 0) || (fmode & O_CREAT)) return (EINVAL); error = copyin(uap->u_fhp, &fhp, sizeof(fhp)); if (error) return(error); /* find the mount point */ mtx_lock(&Giant); mp = vfs_getvfs(&fhp.fh_fsid); if (mp == NULL) { error = ESTALE; goto out; } /* now give me my vnode, it gets returned to me locked */ error = VFS_FHTOVP(mp, &fhp.fh_fid, &vp); if (error) goto out; /* * from now on we have to make sure not * to forget about the vnode * any error that causes an abort must vput(vp) * just set error = err and 'goto bad;'. */ /* * from vn_open */ if (vp->v_type == VLNK) { error = EMLINK; goto bad; } if (vp->v_type == VSOCK) { error = EOPNOTSUPP; goto bad; } mode = 0; if (fmode & (FWRITE | O_TRUNC)) { if (vp->v_type == VDIR) { error = EISDIR; goto bad; } error = vn_writechk(vp); if (error) goto bad; mode |= VWRITE; } if (fmode & FREAD) mode |= VREAD; if (fmode & O_APPEND) mode |= VAPPEND; #ifdef MAC error = mac_check_vnode_open(td->td_ucred, vp, mode); if (error) goto bad; #endif if (mode) { error = VOP_ACCESS(vp, mode, td->td_ucred, td); if (error) goto bad; } if (fmode & O_TRUNC) { VOP_UNLOCK(vp, 0, td); /* XXX */ if ((error = vn_start_write(NULL, &mp, V_WAIT | PCATCH)) != 0) { vrele(vp); goto out; } VOP_LEASE(vp, td, td->td_ucred, LEASE_WRITE); vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td); /* XXX */ #ifdef MAC /* * We don't yet have fp->f_cred, so use td->td_ucred, which * should be right. */ error = mac_check_vnode_write(td->td_ucred, td->td_ucred, vp); if (error == 0) { #endif VATTR_NULL(vap); vap->va_size = 0; error = VOP_SETATTR(vp, vap, td->td_ucred, td); #ifdef MAC } #endif vn_finished_write(mp); if (error) goto bad; } error = VOP_OPEN(vp, fmode, td->td_ucred, td, -1); if (error) goto bad; if (fmode & FWRITE) vp->v_writecount++; /* * end of vn_open code */ if ((error = falloc(td, &nfp, &indx)) != 0) { if (fmode & FWRITE) vp->v_writecount--; goto bad; } /* An extra reference on `nfp' has been held for us by falloc(). */ fp = nfp; nfp->f_vnode = vp; nfp->f_data = vp; nfp->f_flag = fmode & FMASK; nfp->f_ops = &vnops; nfp->f_type = DTYPE_VNODE; if (fmode & (O_EXLOCK | O_SHLOCK)) { lf.l_whence = SEEK_SET; lf.l_start = 0; lf.l_len = 0; if (fmode & O_EXLOCK) lf.l_type = F_WRLCK; else lf.l_type = F_RDLCK; type = F_FLOCK; if ((fmode & FNONBLOCK) == 0) type |= F_WAIT; VOP_UNLOCK(vp, 0, td); if ((error = VOP_ADVLOCK(vp, (caddr_t)fp, F_SETLK, &lf, type)) != 0) { /* * The lock request failed. Normally close the * descriptor but handle the case where someone might * have dup()d or close()d it when we weren't looking. */ fdclose(fdp, fp, indx, td); /* * release our private reference */ fdrop(fp, td); goto out; } vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td); fp->f_flag |= FHASLOCK; } VOP_UNLOCK(vp, 0, td); fdrop(fp, td); mtx_unlock(&Giant); td->td_retval[0] = indx; return (0); bad: vput(vp); out: mtx_unlock(&Giant); return (error); } /* * Stat an (NFS) file handle. * * MP SAFE */ #ifndef _SYS_SYSPROTO_H_ struct fhstat_args { struct fhandle *u_fhp; struct stat *sb; }; #endif int fhstat(td, uap) struct thread *td; register struct fhstat_args /* { struct fhandle *u_fhp; struct stat *sb; } */ *uap; { struct stat sb; fhandle_t fh; struct mount *mp; struct vnode *vp; int error; error = suser(td); if (error) return (error); error = copyin(uap->u_fhp, &fh, sizeof(fhandle_t)); if (error) return (error); mtx_lock(&Giant); if ((mp = vfs_getvfs(&fh.fh_fsid)) == NULL) { mtx_unlock(&Giant); return (ESTALE); } if ((error = VFS_FHTOVP(mp, &fh.fh_fid, &vp))) { mtx_unlock(&Giant); return (error); } error = vn_stat(vp, &sb, td->td_ucred, NOCRED, td); vput(vp); mtx_unlock(&Giant); if (error) return (error); error = copyout(&sb, uap->sb, sizeof(sb)); return (error); } /* * Implement fstatfs() for (NFS) file handles. * * MP SAFE */ #ifndef _SYS_SYSPROTO_H_ struct fhstatfs_args { struct fhandle *u_fhp; struct statfs *buf; }; #endif int fhstatfs(td, uap) struct thread *td; struct fhstatfs_args /* { struct fhandle *u_fhp; struct statfs *buf; } */ *uap; { struct statfs sf; fhandle_t fh; int error; error = copyin(uap->u_fhp, &fh, sizeof(fhandle_t)); if (error) return (error); error = kern_fhstatfs(td, fh, &sf); if (error) return (error); return (copyout(&sf, uap->buf, sizeof(sf))); } int kern_fhstatfs(struct thread *td, fhandle_t fh, struct statfs *buf) { struct statfs *sp; struct mount *mp; struct vnode *vp; int error; error = suser(td); if (error) return (error); mtx_lock(&Giant); if ((mp = vfs_getvfs(&fh.fh_fsid)) == NULL) { mtx_unlock(&Giant); return (ESTALE); } error = VFS_FHTOVP(mp, &fh.fh_fid, &vp); if (error) { mtx_unlock(&Giant); return (error); } mp = vp->v_mount; sp = &mp->mnt_stat; vput(vp); error = prison_canseemount(td->td_ucred, mp); if (error) return (error); #ifdef MAC error = mac_check_mount_stat(td->td_ucred, mp); if (error) { mtx_unlock(&Giant); return (error); } #endif /* * Set these in case the underlying filesystem fails to do so. */ sp->f_version = STATFS_VERSION; sp->f_namemax = NAME_MAX; sp->f_flags = mp->mnt_flag & MNT_VISFLAGMASK; error = VFS_STATFS(mp, sp, td); mtx_unlock(&Giant); if (error) return (error); *buf = *sp; return (0); } /* * Syscall to push extended attribute configuration information into the * VFS. Accepts a path, which it converts to a mountpoint, as well as * a command (int cmd), and attribute name and misc data. For now, the * attribute name is left in userspace for consumption by the VFS_op. * It will probably be changed to be copied into sysspace by the * syscall in the future, once issues with various consumers of the * attribute code have raised their hands. * * Currently this is used only by UFS Extended Attributes. */ int extattrctl(td, uap) struct thread *td; struct extattrctl_args /* { const char *path; int cmd; const char *filename; int attrnamespace; const char *attrname; } */ *uap; { struct vnode *filename_vp; struct nameidata nd; struct mount *mp, *mp_writable; char attrname[EXTATTR_MAXNAMELEN]; int vfslocked, fnvfslocked, error; /* * uap->attrname is not always defined. We check again later when we * invoke the VFS call so as to pass in NULL there if needed. */ if (uap->attrname != NULL) { error = copyinstr(uap->attrname, attrname, EXTATTR_MAXNAMELEN, NULL); if (error) return (error); } vfslocked = fnvfslocked = 0; /* * uap->filename is not always defined. If it is, grab a vnode lock, * which VFS_EXTATTRCTL() will later release. */ filename_vp = NULL; if (uap->filename != NULL) { NDINIT(&nd, LOOKUP, MPSAFE | FOLLOW | LOCKLEAF, UIO_USERSPACE, uap->filename, td); error = namei(&nd); if (error) return (error); fnvfslocked = NDHASGIANT(&nd); filename_vp = nd.ni_vp; NDFREE(&nd, NDF_NO_VP_RELE | NDF_NO_VP_UNLOCK); } /* uap->path is always defined. */ NDINIT(&nd, LOOKUP, MPSAFE | FOLLOW, UIO_USERSPACE, uap->path, td); error = namei(&nd); if (error) { if (filename_vp != NULL) vput(filename_vp); goto out; } vfslocked = NDHASGIANT(&nd); mp = nd.ni_vp->v_mount; error = vn_start_write(nd.ni_vp, &mp_writable, V_WAIT | PCATCH); NDFREE(&nd, 0); if (error) { if (filename_vp != NULL) vput(filename_vp); goto out; } error = VFS_EXTATTRCTL(mp, uap->cmd, filename_vp, uap->attrnamespace, uap->attrname != NULL ? attrname : NULL, td); vn_finished_write(mp_writable); /* * VFS_EXTATTRCTL will have unlocked, but not de-ref'd, * filename_vp, so vrele it if it is defined. */ if (filename_vp != NULL) vrele(filename_vp); out: VFS_UNLOCK_GIANT(fnvfslocked); VFS_UNLOCK_GIANT(vfslocked); return (error); } /*- * Set a named extended attribute on a file or directory * * Arguments: unlocked vnode "vp", attribute namespace "attrnamespace", * kernelspace string pointer "attrname", userspace buffer * pointer "data", buffer length "nbytes", thread "td". * Returns: 0 on success, an error number otherwise * Locks: none * References: vp must be a valid reference for the duration of the call */ static int extattr_set_vp(struct vnode *vp, int attrnamespace, const char *attrname, void *data, size_t nbytes, struct thread *td) { struct mount *mp; struct uio auio; struct iovec aiov; ssize_t cnt; int error; VFS_ASSERT_GIANT(vp->v_mount); error = vn_start_write(vp, &mp, V_WAIT | PCATCH); if (error) return (error); VOP_LEASE(vp, td, td->td_ucred, LEASE_WRITE); vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td); aiov.iov_base = data; aiov.iov_len = nbytes; auio.uio_iov = &aiov; auio.uio_iovcnt = 1; auio.uio_offset = 0; if (nbytes > INT_MAX) { error = EINVAL; goto done; } auio.uio_resid = nbytes; auio.uio_rw = UIO_WRITE; auio.uio_segflg = UIO_USERSPACE; auio.uio_td = td; cnt = nbytes; #ifdef MAC error = mac_check_vnode_setextattr(td->td_ucred, vp, attrnamespace, attrname, &auio); if (error) goto done; #endif error = VOP_SETEXTATTR(vp, attrnamespace, attrname, &auio, td->td_ucred, td); cnt -= auio.uio_resid; td->td_retval[0] = cnt; done: VOP_UNLOCK(vp, 0, td); vn_finished_write(mp); return (error); } int extattr_set_fd(td, uap) struct thread *td; struct extattr_set_fd_args /* { int fd; int attrnamespace; const char *attrname; void *data; size_t nbytes; } */ *uap; { struct file *fp; char attrname[EXTATTR_MAXNAMELEN]; int vfslocked, error; error = copyinstr(uap->attrname, attrname, EXTATTR_MAXNAMELEN, NULL); if (error) return (error); error = getvnode(td->td_proc->p_fd, uap->fd, &fp); if (error) return (error); vfslocked = VFS_LOCK_GIANT(fp->f_vnode->v_mount); error = extattr_set_vp(fp->f_vnode, uap->attrnamespace, attrname, uap->data, uap->nbytes, td); fdrop(fp, td); VFS_UNLOCK_GIANT(vfslocked); return (error); } int extattr_set_file(td, uap) struct thread *td; struct extattr_set_file_args /* { const char *path; int attrnamespace; const char *attrname; void *data; size_t nbytes; } */ *uap; { struct nameidata nd; char attrname[EXTATTR_MAXNAMELEN]; int vfslocked, error; error = copyinstr(uap->attrname, attrname, EXTATTR_MAXNAMELEN, NULL); if (error) return (error); NDINIT(&nd, LOOKUP, MPSAFE | FOLLOW, UIO_USERSPACE, uap->path, td); error = namei(&nd); if (error) return (error); NDFREE(&nd, NDF_ONLY_PNBUF); vfslocked = NDHASGIANT(&nd); error = extattr_set_vp(nd.ni_vp, uap->attrnamespace, attrname, uap->data, uap->nbytes, td); vrele(nd.ni_vp); VFS_UNLOCK_GIANT(vfslocked); return (error); } int extattr_set_link(td, uap) struct thread *td; struct extattr_set_link_args /* { const char *path; int attrnamespace; const char *attrname; void *data; size_t nbytes; } */ *uap; { struct nameidata nd; char attrname[EXTATTR_MAXNAMELEN]; int vfslocked, error; error = copyinstr(uap->attrname, attrname, EXTATTR_MAXNAMELEN, NULL); if (error) return (error); NDINIT(&nd, LOOKUP, MPSAFE | NOFOLLOW, UIO_USERSPACE, uap->path, td); error = namei(&nd); if (error) return (error); NDFREE(&nd, NDF_ONLY_PNBUF); vfslocked = NDHASGIANT(&nd); error = extattr_set_vp(nd.ni_vp, uap->attrnamespace, attrname, uap->data, uap->nbytes, td); vrele(nd.ni_vp); VFS_UNLOCK_GIANT(vfslocked); return (error); } /*- * Get a named extended attribute on a file or directory * * Arguments: unlocked vnode "vp", attribute namespace "attrnamespace", * kernelspace string pointer "attrname", userspace buffer * pointer "data", buffer length "nbytes", thread "td". * Returns: 0 on success, an error number otherwise * Locks: none * References: vp must be a valid reference for the duration of the call */ static int extattr_get_vp(struct vnode *vp, int attrnamespace, const char *attrname, void *data, size_t nbytes, struct thread *td) { struct uio auio, *auiop; struct iovec aiov; ssize_t cnt; size_t size, *sizep; int error; VFS_ASSERT_GIANT(vp->v_mount); VOP_LEASE(vp, td, td->td_ucred, LEASE_READ); vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td); /* * Slightly unusual semantics: if the user provides a NULL data * pointer, they don't want to receive the data, just the * maximum read length. */ auiop = NULL; sizep = NULL; cnt = 0; if (data != NULL) { aiov.iov_base = data; aiov.iov_len = nbytes; auio.uio_iov = &aiov; auio.uio_iovcnt = 1; auio.uio_offset = 0; if (nbytes > INT_MAX) { error = EINVAL; goto done; } auio.uio_resid = nbytes; auio.uio_rw = UIO_READ; auio.uio_segflg = UIO_USERSPACE; auio.uio_td = td; auiop = &auio; cnt = nbytes; } else sizep = &size; #ifdef MAC error = mac_check_vnode_getextattr(td->td_ucred, vp, attrnamespace, attrname, &auio); if (error) goto done; #endif error = VOP_GETEXTATTR(vp, attrnamespace, attrname, auiop, sizep, td->td_ucred, td); if (auiop != NULL) { cnt -= auio.uio_resid; td->td_retval[0] = cnt; } else td->td_retval[0] = size; done: VOP_UNLOCK(vp, 0, td); return (error); } int extattr_get_fd(td, uap) struct thread *td; struct extattr_get_fd_args /* { int fd; int attrnamespace; const char *attrname; void *data; size_t nbytes; } */ *uap; { struct file *fp; char attrname[EXTATTR_MAXNAMELEN]; int vfslocked, error; error = copyinstr(uap->attrname, attrname, EXTATTR_MAXNAMELEN, NULL); if (error) return (error); error = getvnode(td->td_proc->p_fd, uap->fd, &fp); if (error) return (error); vfslocked = VFS_LOCK_GIANT(fp->f_vnode->v_mount); error = extattr_get_vp(fp->f_vnode, uap->attrnamespace, attrname, uap->data, uap->nbytes, td); fdrop(fp, td); VFS_UNLOCK_GIANT(vfslocked); return (error); } int extattr_get_file(td, uap) struct thread *td; struct extattr_get_file_args /* { const char *path; int attrnamespace; const char *attrname; void *data; size_t nbytes; } */ *uap; { struct nameidata nd; char attrname[EXTATTR_MAXNAMELEN]; int vfslocked, error; error = copyinstr(uap->attrname, attrname, EXTATTR_MAXNAMELEN, NULL); if (error) return (error); NDINIT(&nd, LOOKUP, MPSAFE | FOLLOW, UIO_USERSPACE, uap->path, td); error = namei(&nd); if (error) return (error); NDFREE(&nd, NDF_ONLY_PNBUF); vfslocked = NDHASGIANT(&nd); error = extattr_get_vp(nd.ni_vp, uap->attrnamespace, attrname, uap->data, uap->nbytes, td); vrele(nd.ni_vp); VFS_UNLOCK_GIANT(vfslocked); return (error); } int extattr_get_link(td, uap) struct thread *td; struct extattr_get_link_args /* { const char *path; int attrnamespace; const char *attrname; void *data; size_t nbytes; } */ *uap; { struct nameidata nd; char attrname[EXTATTR_MAXNAMELEN]; int vfslocked, error; error = copyinstr(uap->attrname, attrname, EXTATTR_MAXNAMELEN, NULL); if (error) return (error); NDINIT(&nd, LOOKUP, MPSAFE | NOFOLLOW, UIO_USERSPACE, uap->path, td); error = namei(&nd); if (error) return (error); NDFREE(&nd, NDF_ONLY_PNBUF); vfslocked = NDHASGIANT(&nd); error = extattr_get_vp(nd.ni_vp, uap->attrnamespace, attrname, uap->data, uap->nbytes, td); vrele(nd.ni_vp); VFS_UNLOCK_GIANT(vfslocked); return (error); } /* * extattr_delete_vp(): Delete a named extended attribute on a file or * directory * * Arguments: unlocked vnode "vp", attribute namespace "attrnamespace", * kernelspace string pointer "attrname", proc "p" * Returns: 0 on success, an error number otherwise * Locks: none * References: vp must be a valid reference for the duration of the call */ static int extattr_delete_vp(struct vnode *vp, int attrnamespace, const char *attrname, struct thread *td) { struct mount *mp; int error; VFS_ASSERT_GIANT(vp->v_mount); error = vn_start_write(vp, &mp, V_WAIT | PCATCH); if (error) return (error); VOP_LEASE(vp, td, td->td_ucred, LEASE_WRITE); vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td); #ifdef MAC error = mac_check_vnode_deleteextattr(td->td_ucred, vp, attrnamespace, attrname); if (error) goto done; #endif error = VOP_DELETEEXTATTR(vp, attrnamespace, attrname, td->td_ucred, td); if (error == EOPNOTSUPP) error = VOP_SETEXTATTR(vp, attrnamespace, attrname, NULL, td->td_ucred, td); #ifdef MAC done: #endif VOP_UNLOCK(vp, 0, td); vn_finished_write(mp); return (error); } int extattr_delete_fd(td, uap) struct thread *td; struct extattr_delete_fd_args /* { int fd; int attrnamespace; const char *attrname; } */ *uap; { struct file *fp; char attrname[EXTATTR_MAXNAMELEN]; int vfslocked, error; error = copyinstr(uap->attrname, attrname, EXTATTR_MAXNAMELEN, NULL); if (error) return (error); error = getvnode(td->td_proc->p_fd, uap->fd, &fp); if (error) return (error); vfslocked = VFS_LOCK_GIANT(fp->f_vnode->v_mount); error = extattr_delete_vp(fp->f_vnode, uap->attrnamespace, attrname, td); fdrop(fp, td); VFS_UNLOCK_GIANT(vfslocked); return (error); } int extattr_delete_file(td, uap) struct thread *td; struct extattr_delete_file_args /* { const char *path; int attrnamespace; const char *attrname; } */ *uap; { struct nameidata nd; char attrname[EXTATTR_MAXNAMELEN]; int vfslocked, error; error = copyinstr(uap->attrname, attrname, EXTATTR_MAXNAMELEN, NULL); if (error) return(error); NDINIT(&nd, LOOKUP, MPSAFE | FOLLOW, UIO_USERSPACE, uap->path, td); error = namei(&nd); if (error) return(error); NDFREE(&nd, NDF_ONLY_PNBUF); vfslocked = NDHASGIANT(&nd); error = extattr_delete_vp(nd.ni_vp, uap->attrnamespace, attrname, td); vrele(nd.ni_vp); VFS_UNLOCK_GIANT(vfslocked); return(error); } int extattr_delete_link(td, uap) struct thread *td; struct extattr_delete_link_args /* { const char *path; int attrnamespace; const char *attrname; } */ *uap; { struct nameidata nd; char attrname[EXTATTR_MAXNAMELEN]; int vfslocked, error; error = copyinstr(uap->attrname, attrname, EXTATTR_MAXNAMELEN, NULL); if (error) return(error); NDINIT(&nd, LOOKUP, MPSAFE | NOFOLLOW, UIO_USERSPACE, uap->path, td); error = namei(&nd); if (error) return(error); NDFREE(&nd, NDF_ONLY_PNBUF); vfslocked = NDHASGIANT(&nd); error = extattr_delete_vp(nd.ni_vp, uap->attrnamespace, attrname, td); vrele(nd.ni_vp); VFS_UNLOCK_GIANT(vfslocked); return(error); } /*- * Retrieve a list of extended attributes on a file or directory. * * Arguments: unlocked vnode "vp", attribute namespace 'attrnamespace", * userspace buffer pointer "data", buffer length "nbytes", * thread "td". * Returns: 0 on success, an error number otherwise * Locks: none * References: vp must be a valid reference for the duration of the call */ static int extattr_list_vp(struct vnode *vp, int attrnamespace, void *data, size_t nbytes, struct thread *td) { struct uio auio, *auiop; size_t size, *sizep; struct iovec aiov; ssize_t cnt; int error; VFS_ASSERT_GIANT(vp->v_mount); VOP_LEASE(vp, td, td->td_ucred, LEASE_READ); vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td); auiop = NULL; sizep = NULL; cnt = 0; if (data != NULL) { aiov.iov_base = data; aiov.iov_len = nbytes; auio.uio_iov = &aiov; auio.uio_iovcnt = 1; auio.uio_offset = 0; if (nbytes > INT_MAX) { error = EINVAL; goto done; } auio.uio_resid = nbytes; auio.uio_rw = UIO_READ; auio.uio_segflg = UIO_USERSPACE; auio.uio_td = td; auiop = &auio; cnt = nbytes; } else sizep = &size; #ifdef MAC error = mac_check_vnode_listextattr(td->td_ucred, vp, attrnamespace); if (error) goto done; #endif error = VOP_LISTEXTATTR(vp, attrnamespace, auiop, sizep, td->td_ucred, td); if (auiop != NULL) { cnt -= auio.uio_resid; td->td_retval[0] = cnt; } else td->td_retval[0] = size; done: VOP_UNLOCK(vp, 0, td); return (error); } int extattr_list_fd(td, uap) struct thread *td; struct extattr_list_fd_args /* { int fd; int attrnamespace; void *data; size_t nbytes; } */ *uap; { struct file *fp; int vfslocked, error; error = getvnode(td->td_proc->p_fd, uap->fd, &fp); if (error) return (error); vfslocked = VFS_LOCK_GIANT(fp->f_vnode->v_mount); error = extattr_list_vp(fp->f_vnode, uap->attrnamespace, uap->data, uap->nbytes, td); fdrop(fp, td); VFS_UNLOCK_GIANT(vfslocked); return (error); } int extattr_list_file(td, uap) struct thread*td; struct extattr_list_file_args /* { const char *path; int attrnamespace; void *data; size_t nbytes; } */ *uap; { struct nameidata nd; int vfslocked, error; NDINIT(&nd, LOOKUP, MPSAFE | FOLLOW, UIO_USERSPACE, uap->path, td); error = namei(&nd); if (error) return (error); NDFREE(&nd, NDF_ONLY_PNBUF); vfslocked = NDHASGIANT(&nd); error = extattr_list_vp(nd.ni_vp, uap->attrnamespace, uap->data, uap->nbytes, td); vrele(nd.ni_vp); VFS_UNLOCK_GIANT(vfslocked); return (error); } int extattr_list_link(td, uap) struct thread*td; struct extattr_list_link_args /* { const char *path; int attrnamespace; void *data; size_t nbytes; } */ *uap; { struct nameidata nd; int vfslocked, error; NDINIT(&nd, LOOKUP, MPSAFE | NOFOLLOW, UIO_USERSPACE, uap->path, td); error = namei(&nd); if (error) return (error); NDFREE(&nd, NDF_ONLY_PNBUF); vfslocked = NDHASGIANT(&nd); error = extattr_list_vp(nd.ni_vp, uap->attrnamespace, uap->data, uap->nbytes, td); vrele(nd.ni_vp); VFS_UNLOCK_GIANT(vfslocked); return (error); }