diff --git a/sys/ufs/ffs/ffs_inode.c b/sys/ufs/ffs/ffs_inode.c index 0b4172b34300..3df7bf8e8596 100644 --- a/sys/ufs/ffs/ffs_inode.c +++ b/sys/ufs/ffs/ffs_inode.c @@ -1,804 +1,817 @@ /*- * SPDX-License-Identifier: BSD-3-Clause * * Copyright (c) 1982, 1986, 1989, 1993 * The Regents of the University of California. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * @(#)ffs_inode.c 8.13 (Berkeley) 4/21/95 */ #include __FBSDID("$FreeBSD$"); +#include "opt_ufs.h" #include "opt_quota.h" #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include +#include +#ifdef UFS_DIRHASH +#include +#endif #include #include #include static int ffs_indirtrunc(struct inode *, ufs2_daddr_t, ufs2_daddr_t, ufs2_daddr_t, int, ufs2_daddr_t *); static void ffs_inode_bwrite(struct vnode *vp, struct buf *bp, int flags) { if ((flags & IO_SYNC) != 0) bwrite(bp); else if (DOINGASYNC(vp)) bdwrite(bp); else bawrite(bp); } /* * Update the access, modified, and inode change times as specified by the * IN_ACCESS, IN_UPDATE, and IN_CHANGE flags respectively. Write the inode * to disk if the IN_MODIFIED flag is set (it may be set initially, or by * the timestamp update). The IN_LAZYMOD flag is set to force a write * later if not now. The IN_LAZYACCESS is set instead of IN_MODIFIED if the fs * is currently being suspended (or is suspended) and vnode has been accessed. * If we write now, then clear IN_MODIFIED, IN_LAZYACCESS and IN_LAZYMOD to * reflect the presumably successful write, and if waitfor is set, then wait * for the write to complete. */ int ffs_update(vp, waitfor) struct vnode *vp; int waitfor; { struct fs *fs; struct buf *bp; struct inode *ip; daddr_t bn; int flags, error; ASSERT_VOP_ELOCKED(vp, "ffs_update"); ufs_itimes(vp); ip = VTOI(vp); if ((ip->i_flag & IN_MODIFIED) == 0 && waitfor == 0) return (0); ip->i_flag &= ~(IN_LAZYACCESS | IN_LAZYMOD | IN_MODIFIED); /* * The IN_SIZEMOD and IN_IBLKDATA flags indicate changes to the * file size and block pointer fields in the inode. When these * fields have been changed, the fsync() and fsyncdata() system * calls must write the inode to ensure their semantics that the * file is on stable store. * * The IN_SIZEMOD and IN_IBLKDATA flags cannot be cleared until * a synchronous write of the inode is done. If they are cleared * on an asynchronous write, then the inode may not yet have been * written to the disk when an fsync() or fsyncdata() call is done. * Absent these flags, these calls would not know that they needed * to write the inode. Thus, these flags only can be cleared on * synchronous writes of the inode. Since the inode will be locked * for the duration of the I/O that writes it to disk, no fsync() * or fsyncdata() will be able to run before the on-disk inode * is complete. */ if (waitfor) ip->i_flag &= ~(IN_SIZEMOD | IN_IBLKDATA); fs = ITOFS(ip); if (fs->fs_ronly && ITOUMP(ip)->um_fsckpid == 0) return (0); /* * If we are updating a snapshot and another process is currently * writing the buffer containing the inode for this snapshot then * a deadlock can occur when it tries to check the snapshot to see * if that block needs to be copied. Thus when updating a snapshot * we check to see if the buffer is already locked, and if it is * we drop the snapshot lock until the buffer has been written * and is available to us. We have to grab a reference to the * snapshot vnode to prevent it from being removed while we are * waiting for the buffer. */ flags = 0; if (IS_SNAPSHOT(ip)) flags = GB_LOCK_NOWAIT; loop: bn = fsbtodb(fs, ino_to_fsba(fs, ip->i_number)); error = ffs_breadz(VFSTOUFS(vp->v_mount), ITODEVVP(ip), bn, bn, (int) fs->fs_bsize, NULL, NULL, 0, NOCRED, flags, NULL, &bp); if (error != 0) { if (error != EBUSY) return (error); KASSERT((IS_SNAPSHOT(ip)), ("EBUSY from non-snapshot")); /* * Wait for our inode block to become available. * * Hold a reference to the vnode to protect against * ffs_snapgone(). Since we hold a reference, it can only * get reclaimed (VIRF_DOOMED flag) in a forcible downgrade * or unmount. For an unmount, the entire filesystem will be * gone, so we cannot attempt to touch anything associated * with it while the vnode is unlocked; all we can do is * pause briefly and try again. If when we relock the vnode * we discover that it has been reclaimed, updating it is no * longer necessary and we can just return an error. */ vref(vp); VOP_UNLOCK(vp); pause("ffsupd", 1); vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); vrele(vp); if (VN_IS_DOOMED(vp)) return (ENOENT); goto loop; } if (DOINGSOFTDEP(vp)) softdep_update_inodeblock(ip, bp, waitfor); else if (ip->i_effnlink != ip->i_nlink) panic("ffs_update: bad link cnt"); if (I_IS_UFS1(ip)) { *((struct ufs1_dinode *)bp->b_data + ino_to_fsbo(fs, ip->i_number)) = *ip->i_din1; /* * XXX: FIX? The entropy here is desirable, * but the harvesting may be expensive */ random_harvest_queue(&(ip->i_din1), sizeof(ip->i_din1), RANDOM_FS_ATIME); } else { ffs_update_dinode_ckhash(fs, ip->i_din2); *((struct ufs2_dinode *)bp->b_data + ino_to_fsbo(fs, ip->i_number)) = *ip->i_din2; /* * XXX: FIX? The entropy here is desirable, * but the harvesting may be expensive */ random_harvest_queue(&(ip->i_din2), sizeof(ip->i_din2), RANDOM_FS_ATIME); } if (waitfor) { error = bwrite(bp); if (ffs_fsfail_cleanup(VFSTOUFS(vp->v_mount), error)) error = 0; } else if (vm_page_count_severe() || buf_dirty_count_severe()) { bawrite(bp); error = 0; } else { if (bp->b_bufsize == fs->fs_bsize) bp->b_flags |= B_CLUSTEROK; bdwrite(bp); error = 0; } return (error); } #define SINGLE 0 /* index of single indirect block */ #define DOUBLE 1 /* index of double indirect block */ #define TRIPLE 2 /* index of triple indirect block */ /* * Truncate the inode ip to at most length size, freeing the * disk blocks. */ int ffs_truncate(vp, length, flags, cred) struct vnode *vp; off_t length; int flags; struct ucred *cred; { struct inode *ip; ufs2_daddr_t bn, lbn, lastblock, lastiblock[UFS_NIADDR]; ufs2_daddr_t indir_lbn[UFS_NIADDR], oldblks[UFS_NDADDR + UFS_NIADDR]; ufs2_daddr_t newblks[UFS_NDADDR + UFS_NIADDR]; ufs2_daddr_t count, blocksreleased = 0, datablocks, blkno; struct bufobj *bo; struct fs *fs; struct buf *bp; struct ufsmount *ump; int softdeptrunc, journaltrunc; int needextclean, extblocks; int offset, size, level, nblocks; int i, error, allerror, indiroff, waitforupdate; u_long key; off_t osize; ip = VTOI(vp); ump = VFSTOUFS(vp->v_mount); fs = ump->um_fs; bo = &vp->v_bufobj; ASSERT_VOP_LOCKED(vp, "ffs_truncate"); if (length < 0) return (EINVAL); if (length > fs->fs_maxfilesize) return (EFBIG); #ifdef QUOTA error = getinoquota(ip); if (error) return (error); #endif /* * Historically clients did not have to specify which data * they were truncating. So, if not specified, we assume * traditional behavior, e.g., just the normal data. */ if ((flags & (IO_EXT | IO_NORMAL)) == 0) flags |= IO_NORMAL; if (!DOINGSOFTDEP(vp) && !DOINGASYNC(vp)) flags |= IO_SYNC; waitforupdate = (flags & IO_SYNC) != 0 || !DOINGASYNC(vp); /* * If we are truncating the extended-attributes, and cannot * do it with soft updates, then do it slowly here. If we are * truncating both the extended attributes and the file contents * (e.g., the file is being unlinked), then pick it off with * soft updates below. */ allerror = 0; needextclean = 0; softdeptrunc = 0; journaltrunc = DOINGSUJ(vp); journaltrunc = 0; /* XXX temp patch until bug found */ if (journaltrunc == 0 && DOINGSOFTDEP(vp) && length == 0) softdeptrunc = !softdep_slowdown(vp); extblocks = 0; datablocks = DIP(ip, i_blocks); if (fs->fs_magic == FS_UFS2_MAGIC && ip->i_din2->di_extsize > 0) { extblocks = btodb(fragroundup(fs, ip->i_din2->di_extsize)); datablocks -= extblocks; } if ((flags & IO_EXT) && extblocks > 0) { if (length != 0) panic("ffs_truncate: partial trunc of extdata"); if (softdeptrunc || journaltrunc) { if ((flags & IO_NORMAL) == 0) goto extclean; needextclean = 1; } else { if ((error = ffs_syncvnode(vp, MNT_WAIT, 0)) != 0) return (error); #ifdef QUOTA (void) chkdq(ip, -extblocks, NOCRED, FORCE); #endif vinvalbuf(vp, V_ALT, 0, 0); vn_pages_remove(vp, OFF_TO_IDX(lblktosize(fs, -extblocks)), 0); osize = ip->i_din2->di_extsize; ip->i_din2->di_blocks -= extblocks; ip->i_din2->di_extsize = 0; for (i = 0; i < UFS_NXADDR; i++) { oldblks[i] = ip->i_din2->di_extb[i]; ip->i_din2->di_extb[i] = 0; } UFS_INODE_SET_FLAG(ip, IN_SIZEMOD | IN_CHANGE); if ((error = ffs_update(vp, waitforupdate))) return (error); for (i = 0; i < UFS_NXADDR; i++) { if (oldblks[i] == 0) continue; ffs_blkfree(ump, fs, ITODEVVP(ip), oldblks[i], sblksize(fs, osize, i), ip->i_number, vp->v_type, NULL, SINGLETON_KEY); } } } if ((flags & IO_NORMAL) == 0) return (0); if (vp->v_type == VLNK && (ip->i_size < vp->v_mount->mnt_maxsymlinklen || datablocks == 0)) { #ifdef INVARIANTS if (length != 0) panic("ffs_truncate: partial truncate of symlink"); #endif bzero(SHORTLINK(ip), (u_int)ip->i_size); ip->i_size = 0; DIP_SET(ip, i_size, 0); UFS_INODE_SET_FLAG(ip, IN_SIZEMOD | IN_CHANGE | IN_UPDATE); if (needextclean) goto extclean; return (ffs_update(vp, waitforupdate)); } if (ip->i_size == length) { UFS_INODE_SET_FLAG(ip, IN_CHANGE | IN_UPDATE); if (needextclean) goto extclean; return (ffs_update(vp, 0)); } if (fs->fs_ronly) panic("ffs_truncate: read-only filesystem"); if (IS_SNAPSHOT(ip)) ffs_snapremove(vp); vp->v_lasta = vp->v_clen = vp->v_cstart = vp->v_lastw = 0; osize = ip->i_size; /* * Lengthen the size of the file. We must ensure that the * last byte of the file is allocated. Since the smallest * value of osize is 0, length will be at least 1. */ if (osize < length) { vnode_pager_setsize(vp, length); flags |= BA_CLRBUF; error = UFS_BALLOC(vp, length - 1, 1, cred, flags, &bp); if (error) { vnode_pager_setsize(vp, osize); return (error); } ip->i_size = length; DIP_SET(ip, i_size, length); if (bp->b_bufsize == fs->fs_bsize) bp->b_flags |= B_CLUSTEROK; ffs_inode_bwrite(vp, bp, flags); UFS_INODE_SET_FLAG(ip, IN_SIZEMOD | IN_CHANGE | IN_UPDATE); return (ffs_update(vp, waitforupdate)); } /* * Lookup block number for a given offset. Zero length files * have no blocks, so return a blkno of -1. */ lbn = lblkno(fs, length - 1); if (length == 0) { blkno = -1; } else if (lbn < UFS_NDADDR) { blkno = DIP(ip, i_db[lbn]); } else { error = UFS_BALLOC(vp, lblktosize(fs, (off_t)lbn), fs->fs_bsize, cred, BA_METAONLY, &bp); if (error) return (error); indiroff = (lbn - UFS_NDADDR) % NINDIR(fs); if (I_IS_UFS1(ip)) blkno = ((ufs1_daddr_t *)(bp->b_data))[indiroff]; else blkno = ((ufs2_daddr_t *)(bp->b_data))[indiroff]; /* * If the block number is non-zero, then the indirect block * must have been previously allocated and need not be written. * If the block number is zero, then we may have allocated * the indirect block and hence need to write it out. */ if (blkno != 0) brelse(bp); else if (flags & IO_SYNC) bwrite(bp); else bdwrite(bp); } /* * If the block number at the new end of the file is zero, * then we must allocate it to ensure that the last block of * the file is allocated. Soft updates does not handle this * case, so here we have to clean up the soft updates data * structures describing the allocation past the truncation * point. Finding and deallocating those structures is a lot of * work. Since partial truncation with a hole at the end occurs * rarely, we solve the problem by syncing the file so that it * will have no soft updates data structures left. */ if (blkno == 0 && (error = ffs_syncvnode(vp, MNT_WAIT, 0)) != 0) return (error); if (blkno != 0 && DOINGSOFTDEP(vp)) { if (softdeptrunc == 0 && journaltrunc == 0) { /* * If soft updates cannot handle this truncation, * clean up soft dependency data structures and * fall through to the synchronous truncation. */ if ((error = ffs_syncvnode(vp, MNT_WAIT, 0)) != 0) return (error); } else { flags = IO_NORMAL | (needextclean ? IO_EXT: 0); if (journaltrunc) softdep_journal_freeblocks(ip, cred, length, flags); else softdep_setup_freeblocks(ip, length, flags); ASSERT_VOP_LOCKED(vp, "ffs_truncate1"); if (journaltrunc == 0) { UFS_INODE_SET_FLAG(ip, IN_CHANGE | IN_UPDATE); error = ffs_update(vp, 0); } return (error); } } /* * Shorten the size of the file. If the last block of the * shortened file is unallocated, we must allocate it. * Additionally, if the file is not being truncated to a * block boundary, the contents of the partial block * following the end of the file must be zero'ed in * case it ever becomes accessible again because of * subsequent file growth. Directories however are not * zero'ed as they should grow back initialized to empty. */ offset = blkoff(fs, length); if (blkno != 0 && offset == 0) { ip->i_size = length; DIP_SET(ip, i_size, length); UFS_INODE_SET_FLAG(ip, IN_SIZEMOD | IN_CHANGE | IN_UPDATE); +#ifdef UFS_DIRHASH + if (vp->v_type == VDIR && ip->i_dirhash != NULL) + ufsdirhash_dirtrunc(ip, length); +#endif } else { lbn = lblkno(fs, length); flags |= BA_CLRBUF; error = UFS_BALLOC(vp, length - 1, 1, cred, flags, &bp); if (error) return (error); ffs_inode_bwrite(vp, bp, flags); /* * When we are doing soft updates and the UFS_BALLOC * above fills in a direct block hole with a full sized * block that will be truncated down to a fragment below, * we must flush out the block dependency with an FSYNC * so that we do not get a soft updates inconsistency * when we create the fragment below. */ if (DOINGSOFTDEP(vp) && lbn < UFS_NDADDR && fragroundup(fs, blkoff(fs, length)) < fs->fs_bsize && (error = ffs_syncvnode(vp, MNT_WAIT, 0)) != 0) return (error); error = UFS_BALLOC(vp, length - 1, 1, cred, flags, &bp); if (error) return (error); ip->i_size = length; DIP_SET(ip, i_size, length); +#ifdef UFS_DIRHASH + if (vp->v_type == VDIR && ip->i_dirhash != NULL) + ufsdirhash_dirtrunc(ip, length); +#endif size = blksize(fs, ip, lbn); if (vp->v_type != VDIR && offset != 0) bzero((char *)bp->b_data + offset, (u_int)(size - offset)); /* Kirk's code has reallocbuf(bp, size, 1) here */ allocbuf(bp, size); if (bp->b_bufsize == fs->fs_bsize) bp->b_flags |= B_CLUSTEROK; ffs_inode_bwrite(vp, bp, flags); UFS_INODE_SET_FLAG(ip, IN_SIZEMOD | IN_CHANGE | IN_UPDATE); } /* * Calculate index into inode's block list of * last direct and indirect blocks (if any) * which we want to keep. Lastblock is -1 when * the file is truncated to 0. */ lastblock = lblkno(fs, length + fs->fs_bsize - 1) - 1; lastiblock[SINGLE] = lastblock - UFS_NDADDR; lastiblock[DOUBLE] = lastiblock[SINGLE] - NINDIR(fs); lastiblock[TRIPLE] = lastiblock[DOUBLE] - NINDIR(fs) * NINDIR(fs); nblocks = btodb(fs->fs_bsize); /* * Update file and block pointers on disk before we start freeing * blocks. If we crash before free'ing blocks below, the blocks * will be returned to the free list. lastiblock values are also * normalized to -1 for calls to ffs_indirtrunc below. */ for (level = TRIPLE; level >= SINGLE; level--) { oldblks[UFS_NDADDR + level] = DIP(ip, i_ib[level]); if (lastiblock[level] < 0) { DIP_SET(ip, i_ib[level], 0); lastiblock[level] = -1; } } for (i = 0; i < UFS_NDADDR; i++) { oldblks[i] = DIP(ip, i_db[i]); if (i > lastblock) DIP_SET(ip, i_db[i], 0); } UFS_INODE_SET_FLAG(ip, IN_CHANGE | IN_UPDATE); allerror = ffs_update(vp, waitforupdate); /* * Having written the new inode to disk, save its new configuration * and put back the old block pointers long enough to process them. * Note that we save the new block configuration so we can check it * when we are done. */ for (i = 0; i < UFS_NDADDR; i++) { newblks[i] = DIP(ip, i_db[i]); DIP_SET(ip, i_db[i], oldblks[i]); } for (i = 0; i < UFS_NIADDR; i++) { newblks[UFS_NDADDR + i] = DIP(ip, i_ib[i]); DIP_SET(ip, i_ib[i], oldblks[UFS_NDADDR + i]); } ip->i_size = osize; DIP_SET(ip, i_size, osize); UFS_INODE_SET_FLAG(ip, IN_SIZEMOD | IN_CHANGE | IN_UPDATE); error = vtruncbuf(vp, length, fs->fs_bsize); if (error && (allerror == 0)) allerror = error; /* * Indirect blocks first. */ indir_lbn[SINGLE] = -UFS_NDADDR; indir_lbn[DOUBLE] = indir_lbn[SINGLE] - NINDIR(fs) - 1; indir_lbn[TRIPLE] = indir_lbn[DOUBLE] - NINDIR(fs) * NINDIR(fs) - 1; for (level = TRIPLE; level >= SINGLE; level--) { bn = DIP(ip, i_ib[level]); if (bn != 0) { error = ffs_indirtrunc(ip, indir_lbn[level], fsbtodb(fs, bn), lastiblock[level], level, &count); if (error) allerror = error; blocksreleased += count; if (lastiblock[level] < 0) { DIP_SET(ip, i_ib[level], 0); ffs_blkfree(ump, fs, ump->um_devvp, bn, fs->fs_bsize, ip->i_number, vp->v_type, NULL, SINGLETON_KEY); blocksreleased += nblocks; } } if (lastiblock[level] >= 0) goto done; } /* * All whole direct blocks or frags. */ key = ffs_blkrelease_start(ump, ump->um_devvp, ip->i_number); for (i = UFS_NDADDR - 1; i > lastblock; i--) { long bsize; bn = DIP(ip, i_db[i]); if (bn == 0) continue; DIP_SET(ip, i_db[i], 0); bsize = blksize(fs, ip, i); ffs_blkfree(ump, fs, ump->um_devvp, bn, bsize, ip->i_number, vp->v_type, NULL, key); blocksreleased += btodb(bsize); } ffs_blkrelease_finish(ump, key); if (lastblock < 0) goto done; /* * Finally, look for a change in size of the * last direct block; release any frags. */ bn = DIP(ip, i_db[lastblock]); if (bn != 0) { long oldspace, newspace; /* * Calculate amount of space we're giving * back as old block size minus new block size. */ oldspace = blksize(fs, ip, lastblock); ip->i_size = length; DIP_SET(ip, i_size, length); UFS_INODE_SET_FLAG(ip, IN_SIZEMOD | IN_CHANGE | IN_UPDATE); newspace = blksize(fs, ip, lastblock); if (newspace == 0) panic("ffs_truncate: newspace"); if (oldspace - newspace > 0) { /* * Block number of space to be free'd is * the old block # plus the number of frags * required for the storage we're keeping. */ bn += numfrags(fs, newspace); ffs_blkfree(ump, fs, ump->um_devvp, bn, oldspace - newspace, ip->i_number, vp->v_type, NULL, SINGLETON_KEY); blocksreleased += btodb(oldspace - newspace); } } done: #ifdef INVARIANTS for (level = SINGLE; level <= TRIPLE; level++) if (newblks[UFS_NDADDR + level] != DIP(ip, i_ib[level])) panic("ffs_truncate1: level %d newblks %jd != i_ib %jd", level, (intmax_t)newblks[UFS_NDADDR + level], (intmax_t)DIP(ip, i_ib[level])); for (i = 0; i < UFS_NDADDR; i++) if (newblks[i] != DIP(ip, i_db[i])) panic("ffs_truncate2: blkno %d newblks %jd != i_db %jd", i, (intmax_t)newblks[UFS_NDADDR + level], (intmax_t)DIP(ip, i_ib[level])); BO_LOCK(bo); if (length == 0 && (fs->fs_magic != FS_UFS2_MAGIC || ip->i_din2->di_extsize == 0) && (bo->bo_dirty.bv_cnt > 0 || bo->bo_clean.bv_cnt > 0)) panic("ffs_truncate3: vp = %p, buffers: dirty = %d, clean = %d", vp, bo->bo_dirty.bv_cnt, bo->bo_clean.bv_cnt); BO_UNLOCK(bo); #endif /* INVARIANTS */ /* * Put back the real size. */ ip->i_size = length; DIP_SET(ip, i_size, length); if (DIP(ip, i_blocks) >= blocksreleased) DIP_SET(ip, i_blocks, DIP(ip, i_blocks) - blocksreleased); else /* sanity */ DIP_SET(ip, i_blocks, 0); UFS_INODE_SET_FLAG(ip, IN_SIZEMOD | IN_CHANGE); #ifdef QUOTA (void) chkdq(ip, -blocksreleased, NOCRED, FORCE); #endif return (allerror); extclean: if (journaltrunc) softdep_journal_freeblocks(ip, cred, length, IO_EXT); else softdep_setup_freeblocks(ip, length, IO_EXT); return (ffs_update(vp, waitforupdate)); } /* * Release blocks associated with the inode ip and stored in the indirect * block bn. Blocks are free'd in LIFO order up to (but not including) * lastbn. If level is greater than SINGLE, the block is an indirect block * and recursive calls to indirtrunc must be used to cleanse other indirect * blocks. */ static int ffs_indirtrunc(ip, lbn, dbn, lastbn, level, countp) struct inode *ip; ufs2_daddr_t lbn, lastbn; ufs2_daddr_t dbn; int level; ufs2_daddr_t *countp; { struct buf *bp; struct fs *fs; struct ufsmount *ump; struct vnode *vp; caddr_t copy = NULL; u_long key; int i, nblocks, error = 0, allerror = 0; ufs2_daddr_t nb, nlbn, last; ufs2_daddr_t blkcount, factor, blocksreleased = 0; ufs1_daddr_t *bap1 = NULL; ufs2_daddr_t *bap2 = NULL; #define BAP(ip, i) (I_IS_UFS1(ip) ? bap1[i] : bap2[i]) fs = ITOFS(ip); ump = ITOUMP(ip); /* * Calculate index in current block of last * block to be kept. -1 indicates the entire * block so we need not calculate the index. */ factor = lbn_offset(fs, level); last = lastbn; if (lastbn > 0) last /= factor; nblocks = btodb(fs->fs_bsize); /* * Get buffer of block pointers, zero those entries corresponding * to blocks to be free'd, and update on disk copy first. Since * double(triple) indirect before single(double) indirect, calls * to VOP_BMAP() on these blocks will fail. However, we already * have the on-disk address, so we just pass it to bread() instead * of having bread() attempt to calculate it using VOP_BMAP(). */ vp = ITOV(ip); error = ffs_breadz(ump, vp, lbn, dbn, (int)fs->fs_bsize, NULL, NULL, 0, NOCRED, 0, NULL, &bp); if (error) { *countp = 0; return (error); } if (I_IS_UFS1(ip)) bap1 = (ufs1_daddr_t *)bp->b_data; else bap2 = (ufs2_daddr_t *)bp->b_data; if (lastbn != -1) { copy = malloc(fs->fs_bsize, M_TEMP, M_WAITOK); bcopy((caddr_t)bp->b_data, copy, (u_int)fs->fs_bsize); for (i = last + 1; i < NINDIR(fs); i++) if (I_IS_UFS1(ip)) bap1[i] = 0; else bap2[i] = 0; if (DOINGASYNC(vp)) { bdwrite(bp); } else { error = bwrite(bp); if (error) allerror = error; } if (I_IS_UFS1(ip)) bap1 = (ufs1_daddr_t *)copy; else bap2 = (ufs2_daddr_t *)copy; } /* * Recursively free totally unused blocks. */ key = ffs_blkrelease_start(ump, ITODEVVP(ip), ip->i_number); for (i = NINDIR(fs) - 1, nlbn = lbn + 1 - i * factor; i > last; i--, nlbn += factor) { nb = BAP(ip, i); if (nb == 0) continue; if (level > SINGLE) { if ((error = ffs_indirtrunc(ip, nlbn, fsbtodb(fs, nb), (ufs2_daddr_t)-1, level - 1, &blkcount)) != 0) allerror = error; blocksreleased += blkcount; } ffs_blkfree(ump, fs, ITODEVVP(ip), nb, fs->fs_bsize, ip->i_number, vp->v_type, NULL, key); blocksreleased += nblocks; } ffs_blkrelease_finish(ump, key); /* * Recursively free last partial block. */ if (level > SINGLE && lastbn >= 0) { last = lastbn % factor; nb = BAP(ip, i); if (nb != 0) { error = ffs_indirtrunc(ip, nlbn, fsbtodb(fs, nb), last, level - 1, &blkcount); if (error) allerror = error; blocksreleased += blkcount; } } if (copy != NULL) { free(copy, M_TEMP); } else { bp->b_flags |= B_INVAL | B_NOCACHE; brelse(bp); } *countp = blocksreleased; return (allerror); } int ffs_rdonly(struct inode *ip) { return (ITOFS(ip)->fs_ronly != 0); } diff --git a/sys/ufs/ufs/ufs_lookup.c b/sys/ufs/ufs/ufs_lookup.c index 7dbd58f795cc..2f5fbf3d8001 100644 --- a/sys/ufs/ufs/ufs_lookup.c +++ b/sys/ufs/ufs/ufs_lookup.c @@ -1,1630 +1,1626 @@ /*- * SPDX-License-Identifier: BSD-3-Clause * * Copyright (c) 1989, 1993 * The Regents of the University of California. All rights reserved. * (c) UNIX System Laboratories, Inc. * All or some portions of this file are derived from material licensed * to the University of California by American Telephone and Telegraph * Co. or Unix System Laboratories, Inc. and are reproduced herein with * the permission of UNIX System Laboratories, Inc. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * @(#)ufs_lookup.c 8.15 (Berkeley) 6/16/95 */ #include __FBSDID("$FreeBSD$"); #include "opt_ufs.h" #include "opt_quota.h" #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #ifdef UFS_DIRHASH #include #endif #include #include #include #ifdef DIAGNOSTIC static int dirchk = 1; #else static int dirchk = 0; #endif SYSCTL_INT(_debug, OID_AUTO, dircheck, CTLFLAG_RW, &dirchk, 0, ""); /* true if old FS format...*/ #define OFSFMT(vp) ((vp)->v_mount->mnt_maxsymlinklen <= 0) static int ufs_delete_denied(struct vnode *vdp, struct vnode *tdp, struct ucred *cred, struct thread *td) { int error; #ifdef UFS_ACL /* * NFSv4 Minor Version 1, draft-ietf-nfsv4-minorversion1-03.txt * * 3.16.2.1. ACE4_DELETE vs. ACE4_DELETE_CHILD */ /* * XXX: Is this check required? */ error = VOP_ACCESS(vdp, VEXEC, cred, td); if (error) return (error); error = VOP_ACCESSX(tdp, VDELETE, cred, td); if (error == 0) return (0); error = VOP_ACCESSX(vdp, VDELETE_CHILD, cred, td); if (error == 0) return (0); error = VOP_ACCESSX(vdp, VEXPLICIT_DENY | VDELETE_CHILD, cred, td); if (error) return (error); #endif /* !UFS_ACL */ /* * Standard Unix access control - delete access requires VWRITE. */ error = VOP_ACCESS(vdp, VWRITE, cred, td); if (error) return (error); /* * If directory is "sticky", then user must own * the directory, or the file in it, else she * may not delete it (unless she's root). This * implements append-only directories. */ if ((VTOI(vdp)->i_mode & ISVTX) && VOP_ACCESS(vdp, VADMIN, cred, td) && VOP_ACCESS(tdp, VADMIN, cred, td)) return (EPERM); return (0); } /* * Convert a component of a pathname into a pointer to a locked inode. * This is a very central and rather complicated routine. * If the filesystem is not maintained in a strict tree hierarchy, * this can result in a deadlock situation (see comments in code below). * * The cnp->cn_nameiop argument is LOOKUP, CREATE, RENAME, or DELETE depending * on whether the name is to be looked up, created, renamed, or deleted. * When CREATE, RENAME, or DELETE is specified, information usable in * creating, renaming, or deleting a directory entry may be calculated. * If flag has LOCKPARENT or'ed into it and the target of the pathname * exists, lookup returns both the target and its parent directory locked. * When creating or renaming and LOCKPARENT is specified, the target may * not be ".". When deleting and LOCKPARENT is specified, the target may * be "."., but the caller must check to ensure it does an vrele and vput * instead of two vputs. * * This routine is actually used as VOP_CACHEDLOOKUP method, and the * filesystem employs the generic vfs_cache_lookup() as VOP_LOOKUP * method. * * vfs_cache_lookup() performs the following for us: * check that it is a directory * check accessibility of directory * check for modification attempts on read-only mounts * if name found in cache * if at end of path and deleting or creating * drop it * else * return name. * return VOP_CACHEDLOOKUP() * * Overall outline of ufs_lookup: * * search for name in directory, to found or notfound * notfound: * if creating, return locked directory, leaving info on available slots * else return error * found: * if at end of path and deleting, return information to allow delete * if at end of path and rewriting (RENAME and LOCKPARENT), lock target * inode and return info to allow rewrite * if not at end, add name to cache; if at end and neither creating * nor deleting, add name to cache */ int ufs_lookup(ap) struct vop_cachedlookup_args /* { struct vnode *a_dvp; struct vnode **a_vpp; struct componentname *a_cnp; } */ *ap; { return (ufs_lookup_ino(ap->a_dvp, ap->a_vpp, ap->a_cnp, NULL)); } int ufs_lookup_ino(struct vnode *vdp, struct vnode **vpp, struct componentname *cnp, ino_t *dd_ino) { struct inode *dp; /* inode for directory being searched */ struct buf *bp; /* a buffer of directory entries */ struct direct *ep; /* the current directory entry */ int entryoffsetinblock; /* offset of ep in bp's buffer */ enum {NONE, COMPACT, FOUND} slotstatus; doff_t slotoffset; /* offset of area with free space */ doff_t i_diroff; /* cached i_diroff value. */ doff_t i_offset; /* cached i_offset value. */ int slotsize; /* size of area at slotoffset */ int slotfreespace; /* amount of space free in slot */ int slotneeded; /* size of the entry we're seeking */ int numdirpasses; /* strategy for directory search */ doff_t endsearch; /* offset to end directory search */ doff_t prevoff; /* prev entry dp->i_offset */ struct vnode *pdp; /* saved dp during symlink work */ struct vnode *tdp; /* returned by VFS_VGET */ doff_t enduseful; /* pointer past last used dir slot */ u_long bmask; /* block offset mask */ int namlen, error; struct ucred *cred = cnp->cn_cred; int flags = cnp->cn_flags; int nameiop = cnp->cn_nameiop; ino_t ino, ino1; int ltype; if (vpp != NULL) *vpp = NULL; dp = VTOI(vdp); if (dp->i_effnlink == 0) return (ENOENT); /* * Create a vm object if vmiodirenable is enabled. * Alternatively we could call vnode_create_vobject * in VFS_VGET but we could end up creating objects * that are never used. */ vnode_create_vobject(vdp, DIP(dp, i_size), cnp->cn_thread); bmask = VFSTOUFS(vdp->v_mount)->um_mountp->mnt_stat.f_iosize - 1; #ifdef DEBUG_VFS_LOCKS /* * Assert that the directory vnode is locked, and locked * exclusively for the last component lookup for modifying * operations. * * The directory-modifying operations need to save * intermediate state in the inode between namei() call and * actual directory manipulations. See fields in the struct * inode marked as 'used during directory lookup'. We must * ensure that upgrade in namei() does not happen, since * upgrade might need to unlock vdp. If quotas are enabled, * getinoquota() also requires exclusive lock to modify inode. */ ASSERT_VOP_LOCKED(vdp, "ufs_lookup1"); if ((nameiop == CREATE || nameiop == DELETE || nameiop == RENAME) && (flags & (LOCKPARENT | ISLASTCN)) == (LOCKPARENT | ISLASTCN)) ASSERT_VOP_ELOCKED(vdp, "ufs_lookup2"); #endif restart: bp = NULL; slotoffset = -1; /* * We now have a segment name to search for, and a directory to search. * * Suppress search for slots unless creating * file and at end of pathname, in which case * we watch for a place to put the new file in * case it doesn't already exist. */ ino = 0; i_diroff = dp->i_diroff; slotstatus = FOUND; slotfreespace = slotsize = slotneeded = 0; if ((nameiop == CREATE || nameiop == RENAME) && (flags & ISLASTCN)) { slotstatus = NONE; slotneeded = DIRECTSIZ(cnp->cn_namelen); } #ifdef UFS_DIRHASH /* * Use dirhash for fast operations on large directories. The logic * to determine whether to hash the directory is contained within * ufsdirhash_build(); a zero return means that it decided to hash * this directory and it successfully built up the hash table. */ if (ufsdirhash_build(dp) == 0) { /* Look for a free slot if needed. */ enduseful = dp->i_size; if (slotstatus != FOUND) { slotoffset = ufsdirhash_findfree(dp, slotneeded, &slotsize); if (slotoffset >= 0) { slotstatus = COMPACT; enduseful = ufsdirhash_enduseful(dp); if (enduseful < 0) enduseful = dp->i_size; } } /* Look up the component. */ numdirpasses = 1; entryoffsetinblock = 0; /* silence compiler warning */ switch (ufsdirhash_lookup(dp, cnp->cn_nameptr, cnp->cn_namelen, &i_offset, &bp, nameiop == DELETE ? &prevoff : NULL)) { case 0: ep = (struct direct *)((char *)bp->b_data + (i_offset & bmask)); goto foundentry; case ENOENT: i_offset = roundup2(dp->i_size, DIRBLKSIZ); goto notfound; default: /* Something failed; just do a linear search. */ break; } } #endif /* UFS_DIRHASH */ /* * If there is cached information on a previous search of * this directory, pick up where we last left off. * We cache only lookups as these are the most common * and have the greatest payoff. Caching CREATE has little * benefit as it usually must search the entire directory * to determine that the entry does not exist. Caching the * location of the last DELETE or RENAME has not reduced * profiling time and hence has been removed in the interest * of simplicity. */ if (nameiop != LOOKUP || i_diroff == 0 || i_diroff >= dp->i_size) { entryoffsetinblock = 0; i_offset = 0; numdirpasses = 1; } else { i_offset = i_diroff; if ((entryoffsetinblock = i_offset & bmask) && (error = UFS_BLKATOFF(vdp, (off_t)i_offset, NULL, &bp))) return (error); numdirpasses = 2; nchstats.ncs_2passes++; } prevoff = i_offset; endsearch = roundup2(dp->i_size, DIRBLKSIZ); enduseful = 0; searchloop: while (i_offset < endsearch) { /* * If necessary, get the next directory block. */ if ((i_offset & bmask) == 0) { if (bp != NULL) brelse(bp); error = UFS_BLKATOFF(vdp, (off_t)i_offset, NULL, &bp); if (error) return (error); entryoffsetinblock = 0; } /* * If still looking for a slot, and at a DIRBLKSIZE * boundary, have to start looking for free space again. */ if (slotstatus == NONE && (entryoffsetinblock & (DIRBLKSIZ - 1)) == 0) { slotoffset = -1; slotfreespace = 0; } /* * Get pointer to next entry. * Full validation checks are slow, so we only check * enough to insure forward progress through the * directory. Complete checks can be run by patching * "dirchk" to be true. */ ep = (struct direct *)((char *)bp->b_data + entryoffsetinblock); if (ep->d_reclen == 0 || ep->d_reclen > DIRBLKSIZ - (entryoffsetinblock & (DIRBLKSIZ - 1)) || (dirchk && ufs_dirbadentry(vdp, ep, entryoffsetinblock))) { int i; ufs_dirbad(dp, i_offset, "mangled entry"); i = DIRBLKSIZ - (entryoffsetinblock & (DIRBLKSIZ - 1)); i_offset += i; entryoffsetinblock += i; continue; } /* * If an appropriate sized slot has not yet been found, * check to see if one is available. Also accumulate space * in the current block so that we can determine if * compaction is viable. */ if (slotstatus != FOUND) { int size = ep->d_reclen; if (ep->d_ino != 0) size -= DIRSIZ(OFSFMT(vdp), ep); if (size > 0) { if (size >= slotneeded) { slotstatus = FOUND; slotoffset = i_offset; slotsize = ep->d_reclen; } else if (slotstatus == NONE) { slotfreespace += size; if (slotoffset == -1) slotoffset = i_offset; if (slotfreespace >= slotneeded) { slotstatus = COMPACT; slotsize = i_offset + ep->d_reclen - slotoffset; } } } } /* * Check for a name match. */ if (ep->d_ino) { # if (BYTE_ORDER == LITTLE_ENDIAN) if (OFSFMT(vdp)) namlen = ep->d_type; else namlen = ep->d_namlen; # else namlen = ep->d_namlen; # endif if (namlen == cnp->cn_namelen && (cnp->cn_nameptr[0] == ep->d_name[0]) && !bcmp(cnp->cn_nameptr, ep->d_name, (unsigned)namlen)) { #ifdef UFS_DIRHASH foundentry: #endif /* * Save directory entry's inode number and * reclen in ndp->ni_ufs area, and release * directory buffer. */ if (vdp->v_mount->mnt_maxsymlinklen > 0 && ep->d_type == DT_WHT) { slotstatus = FOUND; slotoffset = i_offset; slotsize = ep->d_reclen; enduseful = dp->i_size; cnp->cn_flags |= ISWHITEOUT; numdirpasses--; goto notfound; } ino = ep->d_ino; goto found; } } prevoff = i_offset; i_offset += ep->d_reclen; entryoffsetinblock += ep->d_reclen; if (ep->d_ino) enduseful = i_offset; } notfound: /* * If we started in the middle of the directory and failed * to find our target, we must check the beginning as well. */ if (numdirpasses == 2) { numdirpasses--; i_offset = 0; endsearch = i_diroff; goto searchloop; } if (bp != NULL) brelse(bp); /* * If creating, and at end of pathname and current * directory has not been removed, then can consider * allowing file to be created. */ if ((nameiop == CREATE || nameiop == RENAME || (nameiop == DELETE && (cnp->cn_flags & DOWHITEOUT) && (cnp->cn_flags & ISWHITEOUT))) && (flags & ISLASTCN) && dp->i_effnlink != 0) { /* * Access for write is interpreted as allowing * creation of files in the directory. * * XXX: Fix the comment above. */ if (flags & WILLBEDIR) error = VOP_ACCESSX(vdp, VWRITE | VAPPEND, cred, cnp->cn_thread); else error = VOP_ACCESS(vdp, VWRITE, cred, cnp->cn_thread); if (error) return (error); /* * Return an indication of where the new directory * entry should be put. If we didn't find a slot, * then set dp->i_count to 0 indicating * that the new slot belongs at the end of the * directory. If we found a slot, then the new entry * can be put in the range from dp->i_offset to * dp->i_offset + dp->i_count. */ if (slotstatus == NONE) { SET_I_OFFSET(dp, roundup2(dp->i_size, DIRBLKSIZ)); SET_I_COUNT(dp, 0); enduseful = I_OFFSET(dp); } else if (nameiop == DELETE) { SET_I_OFFSET(dp, slotoffset); if ((I_OFFSET(dp) & (DIRBLKSIZ - 1)) == 0) SET_I_COUNT(dp, 0); else SET_I_COUNT(dp, I_OFFSET(dp) - prevoff); } else { SET_I_OFFSET(dp, slotoffset); SET_I_COUNT(dp, slotsize); if (enduseful < slotoffset + slotsize) enduseful = slotoffset + slotsize; } SET_I_ENDOFF(dp, roundup2(enduseful, DIRBLKSIZ)); /* * We return with the directory locked, so that * the parameters we set up above will still be * valid if we actually decide to do a direnter(). * We return ni_vp == NULL to indicate that the entry * does not currently exist; we leave a pointer to * the (locked) directory inode in ndp->ni_dvp. * The pathname buffer is saved so that the name * can be obtained later. * * NB - if the directory is unlocked, then this * information cannot be used. */ cnp->cn_flags |= SAVENAME; return (EJUSTRETURN); } /* * Insert name into cache (as non-existent) if appropriate. */ if ((cnp->cn_flags & MAKEENTRY) != 0) cache_enter(vdp, NULL, cnp); return (ENOENT); found: if (dd_ino != NULL) *dd_ino = ino; if (numdirpasses == 2) nchstats.ncs_pass2++; /* * Check that directory length properly reflects presence * of this entry. */ if (i_offset + DIRSIZ(OFSFMT(vdp), ep) > dp->i_size) { ufs_dirbad(dp, i_offset, "i_size too small"); dp->i_size = i_offset + DIRSIZ(OFSFMT(vdp), ep); DIP_SET(dp, i_size, dp->i_size); UFS_INODE_SET_FLAG(dp, IN_SIZEMOD | IN_CHANGE | IN_UPDATE); } brelse(bp); /* * Found component in pathname. * If the final component of path name, save information * in the cache as to where the entry was found. */ if ((flags & ISLASTCN) && nameiop == LOOKUP) dp->i_diroff = rounddown2(i_offset, DIRBLKSIZ); /* * If deleting, and at end of pathname, return * parameters which can be used to remove file. */ if (nameiop == DELETE && (flags & ISLASTCN)) { if (flags & LOCKPARENT) ASSERT_VOP_ELOCKED(vdp, __FUNCTION__); if (VOP_ISLOCKED(vdp) == LK_EXCLUSIVE) { /* * Return pointer to current entry in * dp->i_offset, and distance past previous * entry (if there is a previous entry in this * block) in dp->i_count. * * We shouldn't be setting these in the * WANTPARENT case (first lookup in rename()), but any * lookups that will result in directory changes will * overwrite these. */ SET_I_OFFSET(dp, i_offset); if ((I_OFFSET(dp) & (DIRBLKSIZ - 1)) == 0) SET_I_COUNT(dp, 0); else SET_I_COUNT(dp, I_OFFSET(dp) - prevoff); } if (dd_ino != NULL) return (0); /* * Save directory inode pointer in ndp->ni_dvp for * dirremove(). */ if ((error = VFS_VGET(vdp->v_mount, ino, LK_EXCLUSIVE, &tdp)) != 0) return (error); error = ufs_delete_denied(vdp, tdp, cred, cnp->cn_thread); if (error) { vput(tdp); return (error); } if (dp->i_number == ino) { VREF(vdp); *vpp = vdp; vput(tdp); return (0); } *vpp = tdp; return (0); } /* * If rewriting (RENAME), return the inode and the * information required to rewrite the present directory * Must get inode of directory entry to verify it's a * regular file, or empty directory. */ if (nameiop == RENAME && (flags & ISLASTCN)) { if (flags & WILLBEDIR) error = VOP_ACCESSX(vdp, VWRITE | VAPPEND, cred, cnp->cn_thread); else error = VOP_ACCESS(vdp, VWRITE, cred, cnp->cn_thread); if (error) return (error); /* * Careful about locking second inode. * This can only occur if the target is ".". */ SET_I_OFFSET(dp, i_offset); if (dp->i_number == ino) return (EISDIR); if (dd_ino != NULL) return (0); if ((error = VFS_VGET(vdp->v_mount, ino, LK_EXCLUSIVE, &tdp)) != 0) return (error); error = ufs_delete_denied(vdp, tdp, cred, cnp->cn_thread); if (error) { vput(tdp); return (error); } #ifdef SunOS_doesnt_do_that /* * The only purpose of this check is to return the correct * error. Assume that we want to rename directory "a" * to a file "b", and that we have no ACL_WRITE_DATA on * a containing directory, but we _do_ have ACL_APPEND_DATA. * In that case, the VOP_ACCESS check above will return 0, * and the operation will fail with ENOTDIR instead * of EACCESS. */ if (tdp->v_type == VDIR) error = VOP_ACCESSX(vdp, VWRITE | VAPPEND, cred, cnp->cn_thread); else error = VOP_ACCESS(vdp, VWRITE, cred, cnp->cn_thread); if (error) { vput(tdp); return (error); } #endif *vpp = tdp; cnp->cn_flags |= SAVENAME; return (0); } if (dd_ino != NULL) return (0); /* * Step through the translation in the name. We do not `vput' the * directory because we may need it again if a symbolic link * is relative to the current directory. Instead we save it * unlocked as "pdp". We must get the target inode before unlocking * the directory to insure that the inode will not be removed * before we get it. We prevent deadlock by always fetching * inodes from the root, moving down the directory tree. Thus * when following backward pointers ".." we must unlock the * parent directory before getting the requested directory. * There is a potential race condition here if both the current * and parent directories are removed before the VFS_VGET for the * inode associated with ".." returns. We hope that this occurs * infrequently since we cannot avoid this race condition without * implementing a sophisticated deadlock detection algorithm. * Note also that this simple deadlock detection scheme will not * work if the filesystem has any hard links other than ".." * that point backwards in the directory structure. */ pdp = vdp; if (flags & ISDOTDOT) { error = vn_vget_ino(pdp, ino, cnp->cn_lkflags, &tdp); if (error) return (error); /* * Recheck that ".." entry in the vdp directory points * to the inode we looked up before vdp lock was * dropped. */ error = ufs_lookup_ino(pdp, NULL, cnp, &ino1); if (error) { vput(tdp); return (error); } if (ino1 != ino) { vput(tdp); goto restart; } *vpp = tdp; } else if (dp->i_number == ino) { VREF(vdp); /* we want ourself, ie "." */ /* * When we lookup "." we still can be asked to lock it * differently. */ ltype = cnp->cn_lkflags & LK_TYPE_MASK; if (ltype != VOP_ISLOCKED(vdp)) { if (ltype == LK_EXCLUSIVE) vn_lock(vdp, LK_UPGRADE | LK_RETRY); else /* if (ltype == LK_SHARED) */ vn_lock(vdp, LK_DOWNGRADE | LK_RETRY); /* * Relock for the "." case may left us with * reclaimed vnode. */ if (VN_IS_DOOMED(vdp)) { vrele(vdp); return (ENOENT); } } *vpp = vdp; } else { error = VFS_VGET(pdp->v_mount, ino, cnp->cn_lkflags, &tdp); if (error == 0 && VTOI(tdp)->i_mode == 0) { vgone(tdp); vput(tdp); error = ENOENT; } if (error) return (error); *vpp = tdp; } /* * Insert name into cache if appropriate. */ if (cnp->cn_flags & MAKEENTRY) cache_enter(vdp, *vpp, cnp); return (0); } void ufs_dirbad(ip, offset, how) struct inode *ip; doff_t offset; char *how; { struct mount *mp; mp = ITOV(ip)->v_mount; if ((mp->mnt_flag & MNT_RDONLY) == 0) panic("ufs_dirbad: %s: bad dir ino %ju at offset %ld: %s", mp->mnt_stat.f_mntonname, (uintmax_t)ip->i_number, (long)offset, how); else (void)printf("%s: bad dir ino %ju at offset %ld: %s\n", mp->mnt_stat.f_mntonname, (uintmax_t)ip->i_number, (long)offset, how); } /* * Do consistency checking on a directory entry: * record length must be multiple of 4 * entry must fit in rest of its DIRBLKSIZ block * record must be large enough to contain entry * name is not longer than UFS_MAXNAMLEN * name must be as long as advertised, and null terminated */ int ufs_dirbadentry(dp, ep, entryoffsetinblock) struct vnode *dp; struct direct *ep; int entryoffsetinblock; { int i, namlen; # if (BYTE_ORDER == LITTLE_ENDIAN) if (OFSFMT(dp)) namlen = ep->d_type; else namlen = ep->d_namlen; # else namlen = ep->d_namlen; # endif if ((ep->d_reclen & 0x3) != 0 || ep->d_reclen > DIRBLKSIZ - (entryoffsetinblock & (DIRBLKSIZ - 1)) || ep->d_reclen < DIRSIZ(OFSFMT(dp), ep) || namlen > UFS_MAXNAMLEN) { /*return (1); */ printf("First bad\n"); goto bad; } if (ep->d_ino == 0) return (0); for (i = 0; i < namlen; i++) if (ep->d_name[i] == '\0') { /*return (1); */ printf("Second bad\n"); goto bad; } if (ep->d_name[i]) goto bad; return (0); bad: return (1); } /* * Construct a new directory entry after a call to namei, using the * parameters that it left in the componentname argument cnp. The * argument ip is the inode to which the new directory entry will refer. */ void ufs_makedirentry(ip, cnp, newdirp) struct inode *ip; struct componentname *cnp; struct direct *newdirp; { u_int namelen; namelen = (unsigned)cnp->cn_namelen; KASSERT((cnp->cn_flags & SAVENAME) != 0, ("ufs_makedirentry: missing name")); KASSERT(namelen <= UFS_MAXNAMLEN, ("ufs_makedirentry: name too long")); newdirp->d_ino = ip->i_number; newdirp->d_namlen = namelen; /* Zero out after-name padding */ *(u_int32_t *)(&newdirp->d_name[namelen & ~(DIR_ROUNDUP - 1)]) = 0; bcopy(cnp->cn_nameptr, newdirp->d_name, namelen); if (ITOV(ip)->v_mount->mnt_maxsymlinklen > 0) newdirp->d_type = IFTODT(ip->i_mode); else { newdirp->d_type = 0; # if (BYTE_ORDER == LITTLE_ENDIAN) { u_char tmp = newdirp->d_namlen; newdirp->d_namlen = newdirp->d_type; newdirp->d_type = tmp; } # endif } } /* * Write a directory entry after a call to namei, using the parameters * that it left in nameidata. The argument dirp is the new directory * entry contents. Dvp is a pointer to the directory to be written, * which was left locked by namei. Remaining parameters (dp->i_offset, * dp->i_count) indicate how the space for the new entry is to be obtained. * Non-null bp indicates that a directory is being created (for the * soft dependency code). */ int ufs_direnter(dvp, tvp, dirp, cnp, newdirbp, isrename) struct vnode *dvp; struct vnode *tvp; struct direct *dirp; struct componentname *cnp; struct buf *newdirbp; int isrename; { struct ucred *cr; struct thread *td; int newentrysize; struct inode *dp; struct buf *bp; u_int dsize; struct direct *ep, *nep; u_int64_t old_isize; int error, ret, blkoff, loc, spacefree, flags, namlen; char *dirbuf; td = curthread; /* XXX */ cr = td->td_ucred; dp = VTOI(dvp); newentrysize = DIRSIZ(OFSFMT(dvp), dirp); if (I_COUNT(dp) == 0) { /* * If dp->i_count is 0, then namei could find no * space in the directory. Here, dp->i_offset will * be on a directory block boundary and we will write the * new entry into a fresh block. */ if (I_OFFSET(dp) & (DIRBLKSIZ - 1)) panic("ufs_direnter: newblk"); flags = BA_CLRBUF; if (!DOINGSOFTDEP(dvp) && !DOINGASYNC(dvp)) flags |= IO_SYNC; #ifdef QUOTA if ((error = getinoquota(dp)) != 0) { if (DOINGSOFTDEP(dvp) && newdirbp != NULL) bdwrite(newdirbp); return (error); } #endif old_isize = dp->i_size; vnode_pager_setsize(dvp, (u_long)I_OFFSET(dp) + DIRBLKSIZ); if ((error = UFS_BALLOC(dvp, (off_t)I_OFFSET(dp), DIRBLKSIZ, cr, flags, &bp)) != 0) { if (DOINGSOFTDEP(dvp) && newdirbp != NULL) bdwrite(newdirbp); vnode_pager_setsize(dvp, (u_long)old_isize); return (error); } dp->i_size = I_OFFSET(dp) + DIRBLKSIZ; DIP_SET(dp, i_size, dp->i_size); SET_I_ENDOFF(dp, dp->i_size); UFS_INODE_SET_FLAG(dp, IN_SIZEMOD | IN_CHANGE | IN_UPDATE); dirp->d_reclen = DIRBLKSIZ; blkoff = I_OFFSET(dp) & (VFSTOUFS(dvp->v_mount)->um_mountp->mnt_stat.f_iosize - 1); bcopy((caddr_t)dirp, (caddr_t)bp->b_data + blkoff,newentrysize); #ifdef UFS_DIRHASH if (dp->i_dirhash != NULL) { ufsdirhash_newblk(dp, I_OFFSET(dp)); ufsdirhash_add(dp, dirp, I_OFFSET(dp)); ufsdirhash_checkblock(dp, (char *)bp->b_data + blkoff, I_OFFSET(dp)); } #endif if (DOINGSOFTDEP(dvp)) { /* * Ensure that the entire newly allocated block is a * valid directory so that future growth within the * block does not have to ensure that the block is * written before the inode. */ blkoff += DIRBLKSIZ; while (blkoff < bp->b_bcount) { ((struct direct *) (bp->b_data + blkoff))->d_reclen = DIRBLKSIZ; blkoff += DIRBLKSIZ; } if (softdep_setup_directory_add(bp, dp, I_OFFSET(dp), dirp->d_ino, newdirbp, 1)) UFS_INODE_SET_FLAG(dp, IN_NEEDSYNC); if (newdirbp) bdwrite(newdirbp); bdwrite(bp); if ((dp->i_flag & IN_NEEDSYNC) == 0) return (UFS_UPDATE(dvp, 0)); return (0); } if (DOINGASYNC(dvp)) { bdwrite(bp); return (UFS_UPDATE(dvp, 0)); } error = bwrite(bp); ret = UFS_UPDATE(dvp, 1); if (error == 0) return (ret); return (error); } /* * If dp->i_count is non-zero, then namei found space for the new * entry in the range dp->i_offset to dp->i_offset + dp->i_count * in the directory. To use this space, we may have to compact * the entries located there, by copying them together towards the * beginning of the block, leaving the free space in one usable * chunk at the end. */ /* * Increase size of directory if entry eats into new space. * This should never push the size past a new multiple of * DIRBLKSIZE. * * N.B. - THIS IS AN ARTIFACT OF 4.2 AND SHOULD NEVER HAPPEN. */ if (I_OFFSET(dp) + I_COUNT(dp) > dp->i_size) { dp->i_size = I_OFFSET(dp) + I_COUNT(dp); DIP_SET(dp, i_size, dp->i_size); UFS_INODE_SET_FLAG(dp, IN_SIZEMOD | IN_MODIFIED); } /* * Get the block containing the space for the new directory entry. */ error = UFS_BLKATOFF(dvp, (off_t)I_OFFSET(dp), &dirbuf, &bp); if (error) { if (DOINGSOFTDEP(dvp) && newdirbp != NULL) bdwrite(newdirbp); return (error); } /* * Find space for the new entry. In the simple case, the entry at * offset base will have the space. If it does not, then namei * arranged that compacting the region dp->i_offset to * dp->i_offset + dp->i_count would yield the space. */ ep = (struct direct *)dirbuf; dsize = ep->d_ino ? DIRSIZ(OFSFMT(dvp), ep) : 0; spacefree = ep->d_reclen - dsize; for (loc = ep->d_reclen; loc < I_COUNT(dp); ) { nep = (struct direct *)(dirbuf + loc); /* Trim the existing slot (NB: dsize may be zero). */ ep->d_reclen = dsize; ep = (struct direct *)((char *)ep + dsize); /* Read nep->d_reclen now as the bcopy() may clobber it. */ loc += nep->d_reclen; if (nep->d_ino == 0) { /* * A mid-block unused entry. Such entries are * never created by the kernel, but fsck_ffs * can create them (and it doesn't fix them). * * Add up the free space, and initialise the * relocated entry since we don't bcopy it. */ spacefree += nep->d_reclen; ep->d_ino = 0; dsize = 0; continue; } dsize = DIRSIZ(OFSFMT(dvp), nep); spacefree += nep->d_reclen - dsize; #ifdef UFS_DIRHASH if (dp->i_dirhash != NULL) ufsdirhash_move(dp, nep, I_OFFSET(dp) + ((char *)nep - dirbuf), I_OFFSET(dp) + ((char *)ep - dirbuf)); #endif if (DOINGSOFTDEP(dvp)) softdep_change_directoryentry_offset(bp, dp, dirbuf, (caddr_t)nep, (caddr_t)ep, dsize); else bcopy((caddr_t)nep, (caddr_t)ep, dsize); } /* * Here, `ep' points to a directory entry containing `dsize' in-use * bytes followed by `spacefree' unused bytes. If ep->d_ino == 0, * then the entry is completely unused (dsize == 0). The value * of ep->d_reclen is always indeterminate. * * Update the pointer fields in the previous entry (if any), * copy in the new entry, and write out the block. */ # if (BYTE_ORDER == LITTLE_ENDIAN) if (OFSFMT(dvp)) namlen = ep->d_type; else namlen = ep->d_namlen; # else namlen = ep->d_namlen; # endif if (ep->d_ino == 0 || (ep->d_ino == UFS_WINO && namlen == dirp->d_namlen && bcmp(ep->d_name, dirp->d_name, dirp->d_namlen) == 0)) { if (spacefree + dsize < newentrysize) panic("ufs_direnter: compact1"); dirp->d_reclen = spacefree + dsize; } else { if (spacefree < newentrysize) panic("ufs_direnter: compact2"); dirp->d_reclen = spacefree; ep->d_reclen = dsize; ep = (struct direct *)((char *)ep + dsize); } #ifdef UFS_DIRHASH if (dp->i_dirhash != NULL && (ep->d_ino == 0 || dirp->d_reclen == spacefree)) ufsdirhash_add(dp, dirp, I_OFFSET(dp) + ((char *)ep - dirbuf)); #endif bcopy((caddr_t)dirp, (caddr_t)ep, (u_int)newentrysize); #ifdef UFS_DIRHASH if (dp->i_dirhash != NULL) ufsdirhash_checkblock(dp, dirbuf - (I_OFFSET(dp) & (DIRBLKSIZ - 1)), rounddown2(I_OFFSET(dp), DIRBLKSIZ)); #endif if (DOINGSOFTDEP(dvp)) { (void) softdep_setup_directory_add(bp, dp, I_OFFSET(dp) + (caddr_t)ep - dirbuf, dirp->d_ino, newdirbp, 0); if (newdirbp != NULL) bdwrite(newdirbp); bdwrite(bp); } else { if (DOINGASYNC(dvp)) { bdwrite(bp); error = 0; } else { error = bwrite(bp); } } UFS_INODE_SET_FLAG(dp, IN_CHANGE | IN_UPDATE); /* * If all went well, and the directory can be shortened, proceed * with the truncation. Note that we have to unlock the inode for * the entry that we just entered, as the truncation may need to * lock other inodes which can lead to deadlock if we also hold a * lock on the newly entered node. */ if (isrename == 0 && error == 0 && I_ENDOFF(dp) != 0 && I_ENDOFF(dp) < dp->i_size) { if (tvp != NULL) VOP_UNLOCK(tvp); error = UFS_TRUNCATE(dvp, (off_t)I_ENDOFF(dp), IO_NORMAL | (DOINGASYNC(dvp) ? 0 : IO_SYNC), cr); if (error != 0) vn_printf(dvp, "ufs_direnter: failed to truncate, error %d\n", error); -#ifdef UFS_DIRHASH - if (error == 0 && dp->i_dirhash != NULL) - ufsdirhash_dirtrunc(dp, I_ENDOFF(dp)); -#endif error = 0; if (tvp != NULL) vn_lock(tvp, LK_EXCLUSIVE | LK_RETRY); } return (error); } /* * Remove a directory entry after a call to namei, using * the parameters which it left in nameidata. The entry * dp->i_offset contains the offset into the directory of the * entry to be eliminated. The dp->i_count field contains the * size of the previous record in the directory. If this * is 0, the first entry is being deleted, so we need only * zero the inode number to mark the entry as free. If the * entry is not the first in the directory, we must reclaim * the space of the now empty record by adding the record size * to the size of the previous entry. */ int ufs_dirremove(dvp, ip, flags, isrmdir) struct vnode *dvp; struct inode *ip; int flags; int isrmdir; { struct inode *dp; struct direct *ep, *rep; struct buf *bp; off_t offset; int error; dp = VTOI(dvp); /* * Adjust the link count early so softdep can block if necessary. */ if (ip) { ip->i_effnlink--; UFS_INODE_SET_FLAG(ip, IN_CHANGE); if (DOINGSOFTDEP(dvp)) { softdep_setup_unlink(dp, ip); } else { ip->i_nlink--; DIP_SET(ip, i_nlink, ip->i_nlink); UFS_INODE_SET_FLAG(ip, IN_CHANGE); } } if (flags & DOWHITEOUT) offset = I_OFFSET(dp); else offset = I_OFFSET(dp) - I_COUNT(dp); if ((error = UFS_BLKATOFF(dvp, offset, (char **)&ep, &bp)) != 0) { if (ip) { ip->i_effnlink++; UFS_INODE_SET_FLAG(ip, IN_CHANGE); if (DOINGSOFTDEP(dvp)) { softdep_change_linkcnt(ip); } else { ip->i_nlink++; DIP_SET(ip, i_nlink, ip->i_nlink); UFS_INODE_SET_FLAG(ip, IN_CHANGE); } } return (error); } if (flags & DOWHITEOUT) { /* * Whiteout entry: set d_ino to UFS_WINO. */ ep->d_ino = UFS_WINO; ep->d_type = DT_WHT; goto out; } /* Set 'rep' to the entry being removed. */ if (I_COUNT(dp) == 0) rep = ep; else rep = (struct direct *)((char *)ep + ep->d_reclen); #ifdef UFS_DIRHASH /* * Remove the dirhash entry. This is complicated by the fact * that `ep' is the previous entry when dp->i_count != 0. */ if (dp->i_dirhash != NULL) ufsdirhash_remove(dp, rep, I_OFFSET(dp)); #endif if (ip && rep->d_ino != ip->i_number) panic("ufs_dirremove: ip %ju does not match dirent ino %ju\n", (uintmax_t)ip->i_number, (uintmax_t)rep->d_ino); /* * Zero out the file directory entry metadata to reduce disk * scavenging disclosure. */ bzero(&rep->d_name[0], rep->d_namlen); rep->d_namlen = 0; rep->d_type = 0; rep->d_ino = 0; if (I_COUNT(dp) != 0) { /* * Collapse new free space into previous entry. */ ep->d_reclen += rep->d_reclen; rep->d_reclen = 0; } #ifdef UFS_DIRHASH if (dp->i_dirhash != NULL) ufsdirhash_checkblock(dp, (char *)ep - ((I_OFFSET(dp) - I_COUNT(dp)) & (DIRBLKSIZ - 1)), rounddown2(I_OFFSET(dp), DIRBLKSIZ)); #endif out: error = 0; if (DOINGSOFTDEP(dvp)) { if (ip) softdep_setup_remove(bp, dp, ip, isrmdir); if (softdep_slowdown(dvp)) error = bwrite(bp); else bdwrite(bp); } else { if (flags & DOWHITEOUT) error = bwrite(bp); else if (DOINGASYNC(dvp)) bdwrite(bp); else error = bwrite(bp); } UFS_INODE_SET_FLAG(dp, IN_CHANGE | IN_UPDATE); /* * If the last named reference to a snapshot goes away, * drop its snapshot reference so that it will be reclaimed * when last open reference goes away. */ if (ip != NULL && (ip->i_flags & SF_SNAPSHOT) != 0 && ip->i_effnlink == 0) UFS_SNAPGONE(ip); return (error); } /* * Rewrite an existing directory entry to point at the inode * supplied. The parameters describing the directory entry are * set up by a call to namei. */ int ufs_dirrewrite(dp, oip, newinum, newtype, isrmdir) struct inode *dp, *oip; ino_t newinum; int newtype; int isrmdir; { struct buf *bp; struct direct *ep; struct vnode *vdp = ITOV(dp); int error; /* * Drop the link before we lock the buf so softdep can block if * necessary. */ oip->i_effnlink--; UFS_INODE_SET_FLAG(oip, IN_CHANGE); if (DOINGSOFTDEP(vdp)) { softdep_setup_unlink(dp, oip); } else { oip->i_nlink--; DIP_SET(oip, i_nlink, oip->i_nlink); UFS_INODE_SET_FLAG(oip, IN_CHANGE); } error = UFS_BLKATOFF(vdp, (off_t)I_OFFSET(dp), (char **)&ep, &bp); if (error == 0 && ep->d_namlen == 2 && ep->d_name[1] == '.' && ep->d_name[0] == '.' && ep->d_ino != oip->i_number) { brelse(bp); error = EIDRM; } if (error) { oip->i_effnlink++; UFS_INODE_SET_FLAG(oip, IN_CHANGE); if (DOINGSOFTDEP(vdp)) { softdep_change_linkcnt(oip); } else { oip->i_nlink++; DIP_SET(oip, i_nlink, oip->i_nlink); UFS_INODE_SET_FLAG(oip, IN_CHANGE); } return (error); } ep->d_ino = newinum; if (!OFSFMT(vdp)) ep->d_type = newtype; if (DOINGSOFTDEP(vdp)) { softdep_setup_directory_change(bp, dp, oip, newinum, isrmdir); bdwrite(bp); } else { if (DOINGASYNC(vdp)) { bdwrite(bp); error = 0; } else { error = bwrite(bp); } } UFS_INODE_SET_FLAG(dp, IN_CHANGE | IN_UPDATE); /* * If the last named reference to a snapshot goes away, * drop its snapshot reference so that it will be reclaimed * when last open reference goes away. */ if ((oip->i_flags & SF_SNAPSHOT) != 0 && oip->i_effnlink == 0) UFS_SNAPGONE(oip); return (error); } /* * Check if a directory is empty or not. * Inode supplied must be locked. * * Using a struct dirtemplate here is not precisely * what we want, but better than using a struct direct. * * NB: does not handle corrupted directories. */ int ufs_dirempty(ip, parentino, cred) struct inode *ip; ino_t parentino; struct ucred *cred; { doff_t off; struct dirtemplate dbuf; struct direct *dp = (struct direct *)&dbuf; int error, namlen; ssize_t count; #define MINDIRSIZ (sizeof (struct dirtemplate) / 2) for (off = 0; off < ip->i_size; off += dp->d_reclen) { error = vn_rdwr(UIO_READ, ITOV(ip), (caddr_t)dp, MINDIRSIZ, off, UIO_SYSSPACE, IO_NODELOCKED | IO_NOMACCHECK, cred, NOCRED, &count, (struct thread *)0); /* * Since we read MINDIRSIZ, residual must * be 0 unless we're at end of file. */ if (error || count != 0) return (0); /* avoid infinite loops */ if (dp->d_reclen == 0) return (0); /* skip empty entries */ if (dp->d_ino == 0 || dp->d_ino == UFS_WINO) continue; /* accept only "." and ".." */ # if (BYTE_ORDER == LITTLE_ENDIAN) if (OFSFMT(ITOV(ip))) namlen = dp->d_type; else namlen = dp->d_namlen; # else namlen = dp->d_namlen; # endif if (namlen > 2) return (0); if (dp->d_name[0] != '.') return (0); /* * At this point namlen must be 1 or 2. * 1 implies ".", 2 implies ".." if second * char is also "." */ if (namlen == 1 && dp->d_ino == ip->i_number) continue; if (dp->d_name[1] == '.' && dp->d_ino == parentino) continue; return (0); } return (1); } static int ufs_dir_dd_ino(struct vnode *vp, struct ucred *cred, ino_t *dd_ino, struct vnode **dd_vp) { struct dirtemplate dirbuf; struct vnode *ddvp; int error, namlen; ASSERT_VOP_LOCKED(vp, "ufs_dir_dd_ino"); *dd_vp = NULL; if (vp->v_type != VDIR) return (ENOTDIR); /* * First check to see if we have it in the name cache. */ if ((ddvp = vn_dir_dd_ino(vp)) != NULL) { KASSERT(ddvp->v_mount == vp->v_mount, ("ufs_dir_dd_ino: Unexpected mount point crossing")); *dd_ino = VTOI(ddvp)->i_number; *dd_vp = ddvp; return (0); } /* * Have to read the directory. */ error = vn_rdwr(UIO_READ, vp, (caddr_t)&dirbuf, sizeof (struct dirtemplate), (off_t)0, UIO_SYSSPACE, IO_NODELOCKED | IO_NOMACCHECK, cred, NOCRED, NULL, NULL); if (error != 0) return (error); #if (BYTE_ORDER == LITTLE_ENDIAN) if (OFSFMT(vp)) namlen = dirbuf.dotdot_type; else namlen = dirbuf.dotdot_namlen; #else namlen = dirbuf.dotdot_namlen; #endif if (namlen != 2 || dirbuf.dotdot_name[0] != '.' || dirbuf.dotdot_name[1] != '.') return (ENOTDIR); *dd_ino = dirbuf.dotdot_ino; return (0); } /* * Check if source directory is in the path of the target directory. */ int ufs_checkpath(ino_t source_ino, ino_t parent_ino, struct inode *target, struct ucred *cred, ino_t *wait_ino) { struct mount *mp; struct vnode *tvp, *vp, *vp1; int error; ino_t dd_ino; vp = tvp = ITOV(target); mp = vp->v_mount; *wait_ino = 0; if (target->i_number == source_ino) return (EEXIST); if (target->i_number == parent_ino) return (0); if (target->i_number == UFS_ROOTINO) return (0); for (;;) { error = ufs_dir_dd_ino(vp, cred, &dd_ino, &vp1); if (error != 0) break; if (dd_ino == source_ino) { error = EINVAL; break; } if (dd_ino == UFS_ROOTINO) break; if (dd_ino == parent_ino) break; if (vp1 == NULL) { error = VFS_VGET(mp, dd_ino, LK_SHARED | LK_NOWAIT, &vp1); if (error != 0) { *wait_ino = dd_ino; break; } } KASSERT(dd_ino == VTOI(vp1)->i_number, ("directory %ju reparented\n", (uintmax_t)VTOI(vp1)->i_number)); if (vp != tvp) vput(vp); vp = vp1; } if (error == ENOTDIR) panic("checkpath: .. not a directory\n"); if (vp1 != NULL) vput(vp1); if (vp != tvp) vput(vp); return (error); } #ifdef DIAGNOSTIC static void ufs_assert_inode_offset_owner(struct inode *ip, struct iown_tracker *tr, const char *name, const char *file, int line) { char msg[128]; snprintf(msg, sizeof(msg), "at %s@%d", file, line); ASSERT_VOP_ELOCKED(ITOV(ip), msg); MPASS((ip->i_mode & IFMT) == IFDIR); if (curthread == tr->tr_owner && ip->i_lock_gen == tr->tr_gen) return; printf("locked at\n"); stack_print(&tr->tr_st); printf("unlocked at\n"); stack_print(&tr->tr_unlock); panic("%s ip %p %jd offset owner %p %d gen %d " "curthread %p %d gen %d at %s@%d\n", name, ip, (uintmax_t)ip->i_number, tr->tr_owner, tr->tr_owner->td_tid, tr->tr_gen, curthread, curthread->td_tid, ip->i_lock_gen, file, line); } static void ufs_set_inode_offset_owner(struct inode *ip, struct iown_tracker *tr, const char *file, int line) { char msg[128]; snprintf(msg, sizeof(msg), "at %s@%d", file, line); ASSERT_VOP_ELOCKED(ITOV(ip), msg); MPASS((ip->i_mode & IFMT) == IFDIR); tr->tr_owner = curthread; tr->tr_gen = ip->i_lock_gen; stack_save(&tr->tr_st); } static void ufs_init_one_tracker(struct iown_tracker *tr) { tr->tr_owner = NULL; stack_zero(&tr->tr_st); } void ufs_init_trackers(struct inode *ip) { ufs_init_one_tracker(&ip->i_offset_tracker); ufs_init_one_tracker(&ip->i_count_tracker); ufs_init_one_tracker(&ip->i_endoff_tracker); } void ufs_unlock_tracker(struct inode *ip) { if (ip->i_count_tracker.tr_gen == ip->i_lock_gen) stack_save(&ip->i_count_tracker.tr_unlock); if (ip->i_offset_tracker.tr_gen == ip->i_lock_gen) stack_save(&ip->i_offset_tracker.tr_unlock); if (ip->i_endoff_tracker.tr_gen == ip->i_lock_gen) stack_save(&ip->i_endoff_tracker.tr_unlock); ip->i_lock_gen++; } doff_t ufs_get_i_offset(struct inode *ip, const char *file, int line) { ufs_assert_inode_offset_owner(ip, &ip->i_offset_tracker, "i_offset", file, line); return (ip->i_offset); } void ufs_set_i_offset(struct inode *ip, doff_t off, const char *file, int line) { ufs_set_inode_offset_owner(ip, &ip->i_offset_tracker, file, line); ip->i_offset = off; } int32_t ufs_get_i_count(struct inode *ip, const char *file, int line) { ufs_assert_inode_offset_owner(ip, &ip->i_count_tracker, "i_count", file, line); return (ip->i_count); } void ufs_set_i_count(struct inode *ip, int32_t cnt, const char *file, int line) { ufs_set_inode_offset_owner(ip, &ip->i_count_tracker, file, line); ip->i_count = cnt; } doff_t ufs_get_i_endoff(struct inode *ip, const char *file, int line) { ufs_assert_inode_offset_owner(ip, &ip->i_endoff_tracker, "i_endoff", file, line); return (ip->i_endoff); } void ufs_set_i_endoff(struct inode *ip, doff_t off, const char *file, int line) { ufs_set_inode_offset_owner(ip, &ip->i_endoff_tracker, file, line); ip->i_endoff = off; } #endif diff --git a/sys/ufs/ufs/ufs_vnops.c b/sys/ufs/ufs/ufs_vnops.c index 0e7ec7ae5453..70b5a44ca21d 100644 --- a/sys/ufs/ufs/ufs_vnops.c +++ b/sys/ufs/ufs/ufs_vnops.c @@ -1,3039 +1,3037 @@ /*- * SPDX-License-Identifier: BSD-3-Clause * * Copyright (c) 1982, 1986, 1989, 1993, 1995 * The Regents of the University of California. All rights reserved. * (c) UNIX System Laboratories, Inc. * All or some portions of this file are derived from material licensed * to the University of California by American Telephone and Telegraph * Co. or Unix System Laboratories, Inc. and are reproduced herein with * the permission of UNIX System Laboratories, Inc. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * @(#)ufs_vnops.c 8.27 (Berkeley) 5/27/95 */ #include __FBSDID("$FreeBSD$"); #include "opt_quota.h" #include "opt_suiddir.h" #include "opt_ufs.h" #include "opt_ffs.h" #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include /* XXX */ #include #include #include #include #include #include #include #include #include #ifdef UFS_DIRHASH #include #endif #ifdef UFS_GJOURNAL #include FEATURE(ufs_gjournal, "Journaling support through GEOM for UFS"); #endif #ifdef QUOTA FEATURE(ufs_quota, "UFS disk quotas support"); FEATURE(ufs_quota64, "64bit UFS disk quotas support"); #endif #ifdef SUIDDIR FEATURE(suiddir, "Give all new files in directory the same ownership as the directory"); #endif VFS_SMR_DECLARE; #include static vop_accessx_t ufs_accessx; static vop_fplookup_vexec_t ufs_fplookup_vexec; static int ufs_chmod(struct vnode *, int, struct ucred *, struct thread *); static int ufs_chown(struct vnode *, uid_t, gid_t, struct ucred *, struct thread *); static vop_close_t ufs_close; static vop_create_t ufs_create; static vop_stat_t ufs_stat; static vop_getattr_t ufs_getattr; static vop_ioctl_t ufs_ioctl; static vop_link_t ufs_link; static int ufs_makeinode(int mode, struct vnode *, struct vnode **, struct componentname *, const char *); static vop_mmapped_t ufs_mmapped; static vop_mkdir_t ufs_mkdir; static vop_mknod_t ufs_mknod; static vop_open_t ufs_open; static vop_pathconf_t ufs_pathconf; static vop_print_t ufs_print; static vop_readlink_t ufs_readlink; static vop_remove_t ufs_remove; static vop_rename_t ufs_rename; static vop_rmdir_t ufs_rmdir; static vop_setattr_t ufs_setattr; static vop_strategy_t ufs_strategy; static vop_symlink_t ufs_symlink; static vop_whiteout_t ufs_whiteout; static vop_close_t ufsfifo_close; static vop_kqfilter_t ufsfifo_kqfilter; SYSCTL_NODE(_vfs, OID_AUTO, ufs, CTLFLAG_RD | CTLFLAG_MPSAFE, 0, "UFS filesystem"); /* * A virgin directory (no blushing please). */ static struct dirtemplate mastertemplate = { 0, 12, DT_DIR, 1, ".", 0, DIRBLKSIZ - 12, DT_DIR, 2, ".." }; static struct odirtemplate omastertemplate = { 0, 12, 1, ".", 0, DIRBLKSIZ - 12, 2, ".." }; static void ufs_itimes_locked(struct vnode *vp) { struct inode *ip; struct timespec ts; ASSERT_VI_LOCKED(vp, __func__); ip = VTOI(vp); if (UFS_RDONLY(ip)) goto out; if ((ip->i_flag & (IN_ACCESS | IN_CHANGE | IN_UPDATE)) == 0) return; if ((vp->v_type == VBLK || vp->v_type == VCHR) && !DOINGSOFTDEP(vp)) UFS_INODE_SET_FLAG(ip, IN_LAZYMOD); else if (((vp->v_mount->mnt_kern_flag & (MNTK_SUSPENDED | MNTK_SUSPEND)) == 0) || (ip->i_flag & (IN_CHANGE | IN_UPDATE))) UFS_INODE_SET_FLAG(ip, IN_MODIFIED); else if (ip->i_flag & IN_ACCESS) UFS_INODE_SET_FLAG(ip, IN_LAZYACCESS); vfs_timestamp(&ts); if (ip->i_flag & IN_ACCESS) { DIP_SET(ip, i_atime, ts.tv_sec); DIP_SET(ip, i_atimensec, ts.tv_nsec); } if (ip->i_flag & IN_UPDATE) { DIP_SET(ip, i_mtime, ts.tv_sec); DIP_SET(ip, i_mtimensec, ts.tv_nsec); } if (ip->i_flag & IN_CHANGE) { DIP_SET(ip, i_ctime, ts.tv_sec); DIP_SET(ip, i_ctimensec, ts.tv_nsec); DIP_SET(ip, i_modrev, DIP(ip, i_modrev) + 1); } out: ip->i_flag &= ~(IN_ACCESS | IN_CHANGE | IN_UPDATE); } void ufs_itimes(struct vnode *vp) { VI_LOCK(vp); ufs_itimes_locked(vp); VI_UNLOCK(vp); } /* * Create a regular file */ static int ufs_create(ap) struct vop_create_args /* { struct vnode *a_dvp; struct vnode **a_vpp; struct componentname *a_cnp; struct vattr *a_vap; } */ *ap; { int error; error = ufs_makeinode(MAKEIMODE(ap->a_vap->va_type, ap->a_vap->va_mode), ap->a_dvp, ap->a_vpp, ap->a_cnp, "ufs_create"); if (error != 0) return (error); if ((ap->a_cnp->cn_flags & MAKEENTRY) != 0) cache_enter(ap->a_dvp, *ap->a_vpp, ap->a_cnp); return (0); } /* * Mknod vnode call */ /* ARGSUSED */ static int ufs_mknod(ap) struct vop_mknod_args /* { struct vnode *a_dvp; struct vnode **a_vpp; struct componentname *a_cnp; struct vattr *a_vap; } */ *ap; { struct vattr *vap = ap->a_vap; struct vnode **vpp = ap->a_vpp; struct inode *ip; ino_t ino; int error; error = ufs_makeinode(MAKEIMODE(vap->va_type, vap->va_mode), ap->a_dvp, vpp, ap->a_cnp, "ufs_mknod"); if (error) return (error); ip = VTOI(*vpp); UFS_INODE_SET_FLAG(ip, IN_ACCESS | IN_CHANGE | IN_UPDATE); if (vap->va_rdev != VNOVAL) { /* * Want to be able to use this to make badblock * inodes, so don't truncate the dev number. */ DIP_SET(ip, i_rdev, vap->va_rdev); } /* * Remove inode, then reload it through VFS_VGET so it is * checked to see if it is an alias of an existing entry in * the inode cache. XXX I don't believe this is necessary now. */ (*vpp)->v_type = VNON; ino = ip->i_number; /* Save this before vgone() invalidates ip. */ vgone(*vpp); vput(*vpp); error = VFS_VGET(ap->a_dvp->v_mount, ino, LK_EXCLUSIVE, vpp); if (error) { *vpp = NULL; return (error); } return (0); } /* * Open called. */ /* ARGSUSED */ static int ufs_open(struct vop_open_args *ap) { struct vnode *vp = ap->a_vp; struct inode *ip; if (vp->v_type == VCHR || vp->v_type == VBLK) return (EOPNOTSUPP); ip = VTOI(vp); vnode_create_vobject(vp, DIP(ip, i_size), ap->a_td); if (vp->v_type == VREG && (vn_irflag_read(vp) & VIRF_PGREAD) == 0) { vn_irflag_set_cond(vp, VIRF_PGREAD); } /* * Files marked append-only must be opened for appending. */ if ((ip->i_flags & APPEND) && (ap->a_mode & (FWRITE | O_APPEND)) == FWRITE) return (EPERM); return (0); } /* * Close called. * * Update the times on the inode. */ /* ARGSUSED */ static int ufs_close(ap) struct vop_close_args /* { struct vnode *a_vp; int a_fflag; struct ucred *a_cred; struct thread *a_td; } */ *ap; { struct vnode *vp = ap->a_vp; int usecount; VI_LOCK(vp); usecount = vp->v_usecount; if (usecount > 1) ufs_itimes_locked(vp); VI_UNLOCK(vp); return (0); } static int ufs_accessx(ap) struct vop_accessx_args /* { struct vnode *a_vp; accmode_t a_accmode; struct ucred *a_cred; struct thread *a_td; } */ *ap; { struct vnode *vp = ap->a_vp; struct inode *ip = VTOI(vp); accmode_t accmode = ap->a_accmode; int error; #ifdef UFS_ACL struct acl *acl; acl_type_t type; #endif /* * Disallow write attempts on read-only filesystems; * unless the file is a socket, fifo, or a block or * character device resident on the filesystem. */ if (accmode & VMODIFY_PERMS) { switch (vp->v_type) { case VDIR: case VLNK: case VREG: if (vp->v_mount->mnt_flag & MNT_RDONLY) return (EROFS); #ifdef QUOTA /* * Inode is accounted in the quotas only if struct * dquot is attached to it. VOP_ACCESS() is called * from vn_open_cred() and provides a convenient * point to call getinoquota(). The lock mode is * exclusive when the file is opening for write. */ if (VOP_ISLOCKED(vp) == LK_EXCLUSIVE) { error = getinoquota(ip); if (error != 0) return (error); } #endif break; default: break; } } /* * If immutable bit set, nobody gets to write it. "& ~VADMIN_PERMS" * permits the owner of the file to remove the IMMUTABLE flag. */ if ((accmode & (VMODIFY_PERMS & ~VADMIN_PERMS)) && (ip->i_flags & (IMMUTABLE | SF_SNAPSHOT))) return (EPERM); #ifdef UFS_ACL if ((vp->v_mount->mnt_flag & (MNT_ACLS | MNT_NFS4ACLS)) != 0) { if (vp->v_mount->mnt_flag & MNT_NFS4ACLS) type = ACL_TYPE_NFS4; else type = ACL_TYPE_ACCESS; acl = acl_alloc(M_WAITOK); if (type == ACL_TYPE_NFS4) error = ufs_getacl_nfs4_internal(vp, acl, ap->a_td); else error = VOP_GETACL(vp, type, acl, ap->a_cred, ap->a_td); switch (error) { case 0: if (type == ACL_TYPE_NFS4) { error = vaccess_acl_nfs4(vp->v_type, ip->i_uid, ip->i_gid, acl, accmode, ap->a_cred); } else { error = vfs_unixify_accmode(&accmode); if (error == 0) error = vaccess_acl_posix1e(vp->v_type, ip->i_uid, ip->i_gid, acl, accmode, ap->a_cred); } break; default: if (error != EOPNOTSUPP) printf( "ufs_accessx(): Error retrieving ACL on object (%d).\n", error); /* * XXX: Fall back until debugged. Should * eventually possibly log an error, and return * EPERM for safety. */ error = vfs_unixify_accmode(&accmode); if (error == 0) error = vaccess(vp->v_type, ip->i_mode, ip->i_uid, ip->i_gid, accmode, ap->a_cred); } acl_free(acl); return (error); } #endif /* !UFS_ACL */ error = vfs_unixify_accmode(&accmode); if (error == 0) error = vaccess(vp->v_type, ip->i_mode, ip->i_uid, ip->i_gid, accmode, ap->a_cred); return (error); } /* * VOP_FPLOOKUP_VEXEC routines are subject to special circumstances, see * the comment above cache_fplookup for details. */ static int ufs_fplookup_vexec(ap) struct vop_fplookup_vexec_args /* { struct vnode *a_vp; struct ucred *a_cred; struct thread *a_td; } */ *ap; { struct vnode *vp; struct inode *ip; struct ucred *cred; mode_t all_x, mode; vp = ap->a_vp; ip = VTOI_SMR(vp); if (__predict_false(ip == NULL)) return (EAGAIN); /* * XXX ACL race * * ACLs are not supported and UFS clears/sets this flag on mount and * remount. However, we may still be racing with seeing them and there * is no provision to make sure they were accounted for. This matches * the behavior of the locked case, since the lookup there is also * racy: mount takes no measures to block anyone from progressing. */ all_x = S_IXUSR | S_IXGRP | S_IXOTH; mode = atomic_load_short(&ip->i_mode); if (__predict_true((mode & all_x) == all_x)) return (0); cred = ap->a_cred; return (vaccess_vexec_smr(mode, ip->i_uid, ip->i_gid, cred)); } /* ARGSUSED */ static int ufs_stat(struct vop_stat_args *ap) { struct vnode *vp = ap->a_vp; struct inode *ip = VTOI(vp); struct stat *sb = ap->a_sb; int error; error = vop_stat_helper_pre(ap); if (__predict_false(error)) return (error); VI_LOCK(vp); ufs_itimes_locked(vp); if (I_IS_UFS1(ip)) { sb->st_atim.tv_sec = ip->i_din1->di_atime; sb->st_atim.tv_nsec = ip->i_din1->di_atimensec; } else { sb->st_atim.tv_sec = ip->i_din2->di_atime; sb->st_atim.tv_nsec = ip->i_din2->di_atimensec; } VI_UNLOCK(vp); sb->st_dev = dev2udev(ITOUMP(ip)->um_dev); sb->st_ino = ip->i_number; sb->st_mode = (ip->i_mode & ~IFMT) | VTTOIF(vp->v_type); sb->st_nlink = ip->i_effnlink; sb->st_uid = ip->i_uid; sb->st_gid = ip->i_gid; if (I_IS_UFS1(ip)) { sb->st_rdev = ip->i_din1->di_rdev; sb->st_size = ip->i_din1->di_size; sb->st_mtim.tv_sec = ip->i_din1->di_mtime; sb->st_mtim.tv_nsec = ip->i_din1->di_mtimensec; sb->st_ctim.tv_sec = ip->i_din1->di_ctime; sb->st_ctim.tv_nsec = ip->i_din1->di_ctimensec; sb->st_birthtim.tv_sec = -1; sb->st_birthtim.tv_nsec = 0; sb->st_blocks = dbtob((u_quad_t)ip->i_din1->di_blocks) / S_BLKSIZE; } else { sb->st_rdev = ip->i_din2->di_rdev; sb->st_size = ip->i_din2->di_size; sb->st_mtim.tv_sec = ip->i_din2->di_mtime; sb->st_mtim.tv_nsec = ip->i_din2->di_mtimensec; sb->st_ctim.tv_sec = ip->i_din2->di_ctime; sb->st_ctim.tv_nsec = ip->i_din2->di_ctimensec; sb->st_birthtim.tv_sec = ip->i_din2->di_birthtime; sb->st_birthtim.tv_nsec = ip->i_din2->di_birthnsec; sb->st_blocks = dbtob((u_quad_t)ip->i_din2->di_blocks) / S_BLKSIZE; } sb->st_blksize = max(PAGE_SIZE, vp->v_mount->mnt_stat.f_iosize); sb->st_flags = ip->i_flags; sb->st_gen = ip->i_gen; return (vop_stat_helper_post(ap, error)); } /* ARGSUSED */ static int ufs_getattr(ap) struct vop_getattr_args /* { struct vnode *a_vp; struct vattr *a_vap; struct ucred *a_cred; } */ *ap; { struct vnode *vp = ap->a_vp; struct inode *ip = VTOI(vp); struct vattr *vap = ap->a_vap; VI_LOCK(vp); ufs_itimes_locked(vp); if (I_IS_UFS1(ip)) { vap->va_atime.tv_sec = ip->i_din1->di_atime; vap->va_atime.tv_nsec = ip->i_din1->di_atimensec; } else { vap->va_atime.tv_sec = ip->i_din2->di_atime; vap->va_atime.tv_nsec = ip->i_din2->di_atimensec; } VI_UNLOCK(vp); /* * Copy from inode table */ vap->va_fsid = dev2udev(ITOUMP(ip)->um_dev); vap->va_fileid = ip->i_number; vap->va_mode = ip->i_mode & ~IFMT; vap->va_nlink = ip->i_effnlink; vap->va_uid = ip->i_uid; vap->va_gid = ip->i_gid; if (I_IS_UFS1(ip)) { vap->va_rdev = ip->i_din1->di_rdev; vap->va_size = ip->i_din1->di_size; vap->va_mtime.tv_sec = ip->i_din1->di_mtime; vap->va_mtime.tv_nsec = ip->i_din1->di_mtimensec; vap->va_ctime.tv_sec = ip->i_din1->di_ctime; vap->va_ctime.tv_nsec = ip->i_din1->di_ctimensec; vap->va_bytes = dbtob((u_quad_t)ip->i_din1->di_blocks); vap->va_filerev = ip->i_din1->di_modrev; } else { vap->va_rdev = ip->i_din2->di_rdev; vap->va_size = ip->i_din2->di_size; vap->va_mtime.tv_sec = ip->i_din2->di_mtime; vap->va_mtime.tv_nsec = ip->i_din2->di_mtimensec; vap->va_ctime.tv_sec = ip->i_din2->di_ctime; vap->va_ctime.tv_nsec = ip->i_din2->di_ctimensec; vap->va_birthtime.tv_sec = ip->i_din2->di_birthtime; vap->va_birthtime.tv_nsec = ip->i_din2->di_birthnsec; vap->va_bytes = dbtob((u_quad_t)ip->i_din2->di_blocks); vap->va_filerev = ip->i_din2->di_modrev; } vap->va_flags = ip->i_flags; vap->va_gen = ip->i_gen; vap->va_blocksize = vp->v_mount->mnt_stat.f_iosize; vap->va_type = IFTOVT(ip->i_mode); return (0); } /* * Set attribute vnode op. called from several syscalls */ static int ufs_setattr(ap) struct vop_setattr_args /* { struct vnode *a_vp; struct vattr *a_vap; struct ucred *a_cred; } */ *ap; { struct vattr *vap = ap->a_vap; struct vnode *vp = ap->a_vp; struct inode *ip = VTOI(vp); struct ucred *cred = ap->a_cred; struct thread *td = curthread; int error; /* * Check for unsettable attributes. */ if ((vap->va_type != VNON) || (vap->va_nlink != VNOVAL) || (vap->va_fsid != VNOVAL) || (vap->va_fileid != VNOVAL) || (vap->va_blocksize != VNOVAL) || (vap->va_rdev != VNOVAL) || ((int)vap->va_bytes != VNOVAL) || (vap->va_gen != VNOVAL)) { return (EINVAL); } if (vap->va_flags != VNOVAL) { if ((vap->va_flags & ~(SF_APPEND | SF_ARCHIVED | SF_IMMUTABLE | SF_NOUNLINK | SF_SNAPSHOT | UF_APPEND | UF_ARCHIVE | UF_HIDDEN | UF_IMMUTABLE | UF_NODUMP | UF_NOUNLINK | UF_OFFLINE | UF_OPAQUE | UF_READONLY | UF_REPARSE | UF_SPARSE | UF_SYSTEM)) != 0) return (EOPNOTSUPP); if (vp->v_mount->mnt_flag & MNT_RDONLY) return (EROFS); /* * Callers may only modify the file flags on objects they * have VADMIN rights for. */ if ((error = VOP_ACCESS(vp, VADMIN, cred, td))) return (error); /* * Unprivileged processes are not permitted to unset system * flags, or modify flags if any system flags are set. * Privileged non-jail processes may not modify system flags * if securelevel > 0 and any existing system flags are set. * Privileged jail processes behave like privileged non-jail * processes if the PR_ALLOW_CHFLAGS permission bit is set; * otherwise, they behave like unprivileged processes. */ if (!priv_check_cred(cred, PRIV_VFS_SYSFLAGS)) { if (ip->i_flags & (SF_NOUNLINK | SF_IMMUTABLE | SF_APPEND)) { error = securelevel_gt(cred, 0); if (error) return (error); } /* The snapshot flag cannot be toggled. */ if ((vap->va_flags ^ ip->i_flags) & SF_SNAPSHOT) return (EPERM); } else { if (ip->i_flags & (SF_NOUNLINK | SF_IMMUTABLE | SF_APPEND) || ((vap->va_flags ^ ip->i_flags) & SF_SETTABLE)) return (EPERM); } ip->i_flags = vap->va_flags; DIP_SET(ip, i_flags, vap->va_flags); UFS_INODE_SET_FLAG(ip, IN_CHANGE); error = UFS_UPDATE(vp, 0); if (ip->i_flags & (IMMUTABLE | APPEND)) return (error); } /* * If immutable or append, no one can change any of its attributes * except the ones already handled (in some cases, file flags * including the immutability flags themselves for the superuser). */ if (ip->i_flags & (IMMUTABLE | APPEND)) return (EPERM); /* * Go through the fields and update iff not VNOVAL. */ if (vap->va_uid != (uid_t)VNOVAL || vap->va_gid != (gid_t)VNOVAL) { if (vp->v_mount->mnt_flag & MNT_RDONLY) return (EROFS); if ((error = ufs_chown(vp, vap->va_uid, vap->va_gid, cred, td)) != 0) return (error); } if (vap->va_size != VNOVAL) { /* * XXX most of the following special cases should be in * callers instead of in N filesystems. The VDIR check * mostly already is. */ switch (vp->v_type) { case VDIR: return (EISDIR); case VLNK: case VREG: /* * Truncation should have an effect in these cases. * Disallow it if the filesystem is read-only or * the file is being snapshotted. */ if (vp->v_mount->mnt_flag & MNT_RDONLY) return (EROFS); if ((ip->i_flags & SF_SNAPSHOT) != 0) return (EPERM); break; default: /* * According to POSIX, the result is unspecified * for file types other than regular files, * directories and shared memory objects. We * don't support shared memory objects in the file * system, and have dubious support for truncating * symlinks. Just ignore the request in other cases. */ return (0); } if ((error = UFS_TRUNCATE(vp, vap->va_size, IO_NORMAL | ((vap->va_vaflags & VA_SYNC) != 0 ? IO_SYNC : 0), cred)) != 0) return (error); } if (vap->va_atime.tv_sec != VNOVAL || vap->va_mtime.tv_sec != VNOVAL || vap->va_birthtime.tv_sec != VNOVAL) { if (vp->v_mount->mnt_flag & MNT_RDONLY) return (EROFS); if ((ip->i_flags & SF_SNAPSHOT) != 0) return (EPERM); error = vn_utimes_perm(vp, vap, cred, td); if (error != 0) return (error); UFS_INODE_SET_FLAG(ip, IN_CHANGE | IN_MODIFIED); if (vap->va_atime.tv_sec != VNOVAL) { ip->i_flag &= ~IN_ACCESS; DIP_SET(ip, i_atime, vap->va_atime.tv_sec); DIP_SET(ip, i_atimensec, vap->va_atime.tv_nsec); } if (vap->va_mtime.tv_sec != VNOVAL) { ip->i_flag &= ~IN_UPDATE; DIP_SET(ip, i_mtime, vap->va_mtime.tv_sec); DIP_SET(ip, i_mtimensec, vap->va_mtime.tv_nsec); } if (vap->va_birthtime.tv_sec != VNOVAL && I_IS_UFS2(ip)) { ip->i_din2->di_birthtime = vap->va_birthtime.tv_sec; ip->i_din2->di_birthnsec = vap->va_birthtime.tv_nsec; } error = UFS_UPDATE(vp, 0); if (error) return (error); } error = 0; if (vap->va_mode != (mode_t)VNOVAL) { if (vp->v_mount->mnt_flag & MNT_RDONLY) return (EROFS); if ((ip->i_flags & SF_SNAPSHOT) != 0 && (vap->va_mode & (S_IXUSR | S_IWUSR | S_IXGRP | S_IWGRP | S_IXOTH | S_IWOTH))) return (EPERM); error = ufs_chmod(vp, (int)vap->va_mode, cred, td); } return (error); } #ifdef UFS_ACL static int ufs_update_nfs4_acl_after_mode_change(struct vnode *vp, int mode, int file_owner_id, struct ucred *cred, struct thread *td) { int error; struct acl *aclp; aclp = acl_alloc(M_WAITOK); error = ufs_getacl_nfs4_internal(vp, aclp, td); /* * We don't have to handle EOPNOTSUPP here, as the filesystem claims * it supports ACLs. */ if (error) goto out; acl_nfs4_sync_acl_from_mode(aclp, mode, file_owner_id); error = ufs_setacl_nfs4_internal(vp, aclp, td); out: acl_free(aclp); return (error); } #endif /* UFS_ACL */ static int ufs_mmapped(ap) struct vop_mmapped_args /* { struct vnode *a_vp; } */ *ap; { struct vnode *vp; struct inode *ip; struct mount *mp; vp = ap->a_vp; ip = VTOI(vp); mp = vp->v_mount; if ((mp->mnt_flag & (MNT_NOATIME | MNT_RDONLY)) == 0) UFS_INODE_SET_FLAG_SHARED(ip, IN_ACCESS); /* * XXXKIB No UFS_UPDATE(ap->a_vp, 0) there. */ return (0); } /* * Change the mode on a file. * Inode must be locked before calling. */ static int ufs_chmod(vp, mode, cred, td) struct vnode *vp; int mode; struct ucred *cred; struct thread *td; { struct inode *ip = VTOI(vp); int newmode, error; /* * To modify the permissions on a file, must possess VADMIN * for that file. */ if ((error = VOP_ACCESSX(vp, VWRITE_ACL, cred, td))) return (error); /* * Privileged processes may set the sticky bit on non-directories, * as well as set the setgid bit on a file with a group that the * process is not a member of. Both of these are allowed in * jail(8). */ if (vp->v_type != VDIR && (mode & S_ISTXT)) { if (priv_check_cred(cred, PRIV_VFS_STICKYFILE)) return (EFTYPE); } if (!groupmember(ip->i_gid, cred) && (mode & ISGID)) { error = priv_check_cred(cred, PRIV_VFS_SETGID); if (error) return (error); } /* * Deny setting setuid if we are not the file owner. */ if ((mode & ISUID) && ip->i_uid != cred->cr_uid) { error = priv_check_cred(cred, PRIV_VFS_ADMIN); if (error) return (error); } newmode = ip->i_mode & ~ALLPERMS; newmode |= (mode & ALLPERMS); UFS_INODE_SET_MODE(ip, newmode); DIP_SET(ip, i_mode, ip->i_mode); UFS_INODE_SET_FLAG(ip, IN_CHANGE); #ifdef UFS_ACL if ((vp->v_mount->mnt_flag & MNT_NFS4ACLS) != 0) error = ufs_update_nfs4_acl_after_mode_change(vp, mode, ip->i_uid, cred, td); #endif if (error == 0 && (ip->i_flag & IN_CHANGE) != 0) error = UFS_UPDATE(vp, 0); return (error); } /* * Perform chown operation on inode ip; * inode must be locked prior to call. */ static int ufs_chown(vp, uid, gid, cred, td) struct vnode *vp; uid_t uid; gid_t gid; struct ucred *cred; struct thread *td; { struct inode *ip = VTOI(vp); uid_t ouid; gid_t ogid; int error = 0; #ifdef QUOTA int i; ufs2_daddr_t change; #endif if (uid == (uid_t)VNOVAL) uid = ip->i_uid; if (gid == (gid_t)VNOVAL) gid = ip->i_gid; /* * To modify the ownership of a file, must possess VADMIN for that * file. */ if ((error = VOP_ACCESSX(vp, VWRITE_OWNER, cred, td))) return (error); /* * To change the owner of a file, or change the group of a file to a * group of which we are not a member, the caller must have * privilege. */ if (((uid != ip->i_uid && uid != cred->cr_uid) || (gid != ip->i_gid && !groupmember(gid, cred))) && (error = priv_check_cred(cred, PRIV_VFS_CHOWN))) return (error); ogid = ip->i_gid; ouid = ip->i_uid; #ifdef QUOTA if ((error = getinoquota(ip)) != 0) return (error); if (ouid == uid) { dqrele(vp, ip->i_dquot[USRQUOTA]); ip->i_dquot[USRQUOTA] = NODQUOT; } if (ogid == gid) { dqrele(vp, ip->i_dquot[GRPQUOTA]); ip->i_dquot[GRPQUOTA] = NODQUOT; } change = DIP(ip, i_blocks); (void) chkdq(ip, -change, cred, CHOWN|FORCE); (void) chkiq(ip, -1, cred, CHOWN|FORCE); for (i = 0; i < MAXQUOTAS; i++) { dqrele(vp, ip->i_dquot[i]); ip->i_dquot[i] = NODQUOT; } #endif ip->i_gid = gid; DIP_SET(ip, i_gid, gid); ip->i_uid = uid; DIP_SET(ip, i_uid, uid); #ifdef QUOTA if ((error = getinoquota(ip)) == 0) { if (ouid == uid) { dqrele(vp, ip->i_dquot[USRQUOTA]); ip->i_dquot[USRQUOTA] = NODQUOT; } if (ogid == gid) { dqrele(vp, ip->i_dquot[GRPQUOTA]); ip->i_dquot[GRPQUOTA] = NODQUOT; } if ((error = chkdq(ip, change, cred, CHOWN)) == 0) { if ((error = chkiq(ip, 1, cred, CHOWN)) == 0) goto good; else (void) chkdq(ip, -change, cred, CHOWN|FORCE); } for (i = 0; i < MAXQUOTAS; i++) { dqrele(vp, ip->i_dquot[i]); ip->i_dquot[i] = NODQUOT; } } ip->i_gid = ogid; DIP_SET(ip, i_gid, ogid); ip->i_uid = ouid; DIP_SET(ip, i_uid, ouid); if (getinoquota(ip) == 0) { if (ouid == uid) { dqrele(vp, ip->i_dquot[USRQUOTA]); ip->i_dquot[USRQUOTA] = NODQUOT; } if (ogid == gid) { dqrele(vp, ip->i_dquot[GRPQUOTA]); ip->i_dquot[GRPQUOTA] = NODQUOT; } (void) chkdq(ip, change, cred, FORCE|CHOWN); (void) chkiq(ip, 1, cred, FORCE|CHOWN); (void) getinoquota(ip); } return (error); good: if (getinoquota(ip)) panic("ufs_chown: lost quota"); #endif /* QUOTA */ UFS_INODE_SET_FLAG(ip, IN_CHANGE); if ((ip->i_mode & (ISUID | ISGID)) && (ouid != uid || ogid != gid)) { if (priv_check_cred(cred, PRIV_VFS_RETAINSUGID)) { UFS_INODE_SET_MODE(ip, ip->i_mode & ~(ISUID | ISGID)); DIP_SET(ip, i_mode, ip->i_mode); } } error = UFS_UPDATE(vp, 0); return (error); } static int ufs_remove(ap) struct vop_remove_args /* { struct vnode *a_dvp; struct vnode *a_vp; struct componentname *a_cnp; } */ *ap; { struct inode *ip; struct vnode *vp = ap->a_vp; struct vnode *dvp = ap->a_dvp; int error; struct thread *td; td = curthread; ip = VTOI(vp); if ((ip->i_flags & (NOUNLINK | IMMUTABLE | APPEND)) || (VTOI(dvp)->i_flags & APPEND)) return (EPERM); if (DOINGSOFTDEP(dvp)) { error = softdep_prelink(dvp, vp, true); if (error != 0) { MPASS(error == ERELOOKUP); return (error); } } #ifdef UFS_GJOURNAL ufs_gjournal_orphan(vp); #endif error = ufs_dirremove(dvp, ip, ap->a_cnp->cn_flags, 0); if (ip->i_nlink <= 0) vp->v_vflag |= VV_NOSYNC; if ((ip->i_flags & SF_SNAPSHOT) != 0) { /* * Avoid deadlock where another thread is trying to * update the inodeblock for dvp and is waiting on * snaplk. Temporary unlock the vnode lock for the * unlinked file and sync the directory. This should * allow vput() of the directory to not block later on * while holding the snapshot vnode locked, assuming * that the directory hasn't been unlinked too. */ VOP_UNLOCK(vp); (void) VOP_FSYNC(dvp, MNT_WAIT, td); vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); } return (error); } static void print_bad_link_count(const char *funcname, struct vnode *dvp) { struct inode *dip; dip = VTOI(dvp); uprintf("%s: Bad link count %d on parent inode %jd in file system %s\n", funcname, dip->i_effnlink, (intmax_t)dip->i_number, dvp->v_mount->mnt_stat.f_mntonname); } /* * link vnode call */ static int ufs_link(ap) struct vop_link_args /* { struct vnode *a_tdvp; struct vnode *a_vp; struct componentname *a_cnp; } */ *ap; { struct vnode *vp = ap->a_vp; struct vnode *tdvp = ap->a_tdvp; struct componentname *cnp = ap->a_cnp; struct inode *ip; struct direct newdir; int error; #ifdef INVARIANTS if ((cnp->cn_flags & HASBUF) == 0) panic("ufs_link: no name"); #endif if (DOINGSOFTDEP(tdvp)) { error = softdep_prelink(tdvp, vp, true); if (error != 0) { MPASS(error == ERELOOKUP); return (error); } } if (VTOI(tdvp)->i_effnlink < 2) { print_bad_link_count("ufs_link", tdvp); error = EINVAL; goto out; } ip = VTOI(vp); if (ip->i_nlink >= UFS_LINK_MAX) { error = EMLINK; goto out; } /* * The file may have been removed after namei droped the original * lock. */ if (ip->i_effnlink == 0) { error = ENOENT; goto out; } if (ip->i_flags & (IMMUTABLE | APPEND)) { error = EPERM; goto out; } ip->i_effnlink++; ip->i_nlink++; DIP_SET(ip, i_nlink, ip->i_nlink); UFS_INODE_SET_FLAG(ip, IN_CHANGE); if (DOINGSOFTDEP(vp)) softdep_setup_link(VTOI(tdvp), ip); error = UFS_UPDATE(vp, !DOINGSOFTDEP(vp) && !DOINGASYNC(vp)); if (!error) { ufs_makedirentry(ip, cnp, &newdir); error = ufs_direnter(tdvp, vp, &newdir, cnp, NULL, 0); } if (error) { ip->i_effnlink--; ip->i_nlink--; DIP_SET(ip, i_nlink, ip->i_nlink); UFS_INODE_SET_FLAG(ip, IN_CHANGE); if (DOINGSOFTDEP(vp)) softdep_revert_link(VTOI(tdvp), ip); } out: return (error); } /* * whiteout vnode call */ static int ufs_whiteout(ap) struct vop_whiteout_args /* { struct vnode *a_dvp; struct componentname *a_cnp; int a_flags; } */ *ap; { struct vnode *dvp = ap->a_dvp; struct componentname *cnp = ap->a_cnp; struct direct newdir; int error = 0; if (DOINGSOFTDEP(dvp) && (ap->a_flags == CREATE || ap->a_flags == DELETE)) { error = softdep_prelink(dvp, NULL, true); if (error != 0) { MPASS(error == ERELOOKUP); return (error); } } switch (ap->a_flags) { case LOOKUP: /* 4.4 format directories support whiteout operations */ if (dvp->v_mount->mnt_maxsymlinklen > 0) return (0); return (EOPNOTSUPP); case CREATE: /* create a new directory whiteout */ #ifdef INVARIANTS if ((cnp->cn_flags & SAVENAME) == 0) panic("ufs_whiteout: missing name"); if (dvp->v_mount->mnt_maxsymlinklen <= 0) panic("ufs_whiteout: old format filesystem"); #endif newdir.d_ino = UFS_WINO; newdir.d_namlen = cnp->cn_namelen; bcopy(cnp->cn_nameptr, newdir.d_name, (unsigned)cnp->cn_namelen + 1); newdir.d_type = DT_WHT; error = ufs_direnter(dvp, NULL, &newdir, cnp, NULL, 0); break; case DELETE: /* remove an existing directory whiteout */ #ifdef INVARIANTS if (dvp->v_mount->mnt_maxsymlinklen <= 0) panic("ufs_whiteout: old format filesystem"); #endif cnp->cn_flags &= ~DOWHITEOUT; error = ufs_dirremove(dvp, NULL, cnp->cn_flags, 0); break; default: panic("ufs_whiteout: unknown op"); } return (error); } static volatile int rename_restarts; SYSCTL_INT(_vfs_ufs, OID_AUTO, rename_restarts, CTLFLAG_RD, __DEVOLATILE(int *, &rename_restarts), 0, "Times rename had to restart due to lock contention"); /* * Rename system call. * rename("foo", "bar"); * is essentially * unlink("bar"); * link("foo", "bar"); * unlink("foo"); * but ``atomically''. Can't do full commit without saving state in the * inode on disk which isn't feasible at this time. Best we can do is * always guarantee the target exists. * * Basic algorithm is: * * 1) Bump link count on source while we're linking it to the * target. This also ensure the inode won't be deleted out * from underneath us while we work (it may be truncated by * a concurrent `trunc' or `open' for creation). * 2) Link source to destination. If destination already exists, * delete it first. * 3) Unlink source reference to inode if still around. If a * directory was moved and the parent of the destination * is different from the source, patch the ".." entry in the * directory. */ static int ufs_rename(ap) struct vop_rename_args /* { struct vnode *a_fdvp; struct vnode *a_fvp; struct componentname *a_fcnp; struct vnode *a_tdvp; struct vnode *a_tvp; struct componentname *a_tcnp; } */ *ap; { struct vnode *tvp = ap->a_tvp; struct vnode *tdvp = ap->a_tdvp; struct vnode *fvp = ap->a_fvp; struct vnode *fdvp = ap->a_fdvp; struct vnode *nvp; struct componentname *tcnp = ap->a_tcnp; struct componentname *fcnp = ap->a_fcnp; struct thread *td = fcnp->cn_thread; struct inode *fip, *tip, *tdp, *fdp; struct direct newdir; off_t endoff; int doingdirectory, newparent; int error = 0; struct mount *mp; ino_t ino; bool want_seqc_end; want_seqc_end = false; #ifdef INVARIANTS if ((tcnp->cn_flags & HASBUF) == 0 || (fcnp->cn_flags & HASBUF) == 0) panic("ufs_rename: no name"); #endif endoff = 0; mp = tdvp->v_mount; VOP_UNLOCK(tdvp); if (tvp && tvp != tdvp) VOP_UNLOCK(tvp); /* * Check for cross-device rename. */ if ((fvp->v_mount != tdvp->v_mount) || (tvp && (fvp->v_mount != tvp->v_mount))) { error = EXDEV; mp = NULL; goto releout; } relock: /* * We need to acquire 2 to 4 locks depending on whether tvp is NULL * and fdvp and tdvp are the same directory. Subsequently we need * to double-check all paths and in the directory rename case we * need to verify that we are not creating a directory loop. To * handle this we acquire all but fdvp using non-blocking * acquisitions. If we fail to acquire any lock in the path we will * drop all held locks, acquire the new lock in a blocking fashion, * and then release it and restart the rename. This acquire/release * step ensures that we do not spin on a lock waiting for release. */ error = vn_lock(fdvp, LK_EXCLUSIVE); if (error) goto releout; if (vn_lock(tdvp, LK_EXCLUSIVE | LK_NOWAIT) != 0) { VOP_UNLOCK(fdvp); error = vn_lock(tdvp, LK_EXCLUSIVE); if (error) goto releout; VOP_UNLOCK(tdvp); atomic_add_int(&rename_restarts, 1); goto relock; } /* * Re-resolve fvp to be certain it still exists and fetch the * correct vnode. */ error = ufs_lookup_ino(fdvp, NULL, fcnp, &ino); if (error) { VOP_UNLOCK(fdvp); VOP_UNLOCK(tdvp); goto releout; } error = VFS_VGET(mp, ino, LK_EXCLUSIVE | LK_NOWAIT, &nvp); if (error) { VOP_UNLOCK(fdvp); VOP_UNLOCK(tdvp); if (error != EBUSY) goto releout; error = VFS_VGET(mp, ino, LK_EXCLUSIVE, &nvp); if (error != 0) goto releout; VOP_UNLOCK(nvp); vrele(fvp); fvp = nvp; atomic_add_int(&rename_restarts, 1); goto relock; } vrele(fvp); fvp = nvp; /* * Re-resolve tvp and acquire the vnode lock if present. */ error = ufs_lookup_ino(tdvp, NULL, tcnp, &ino); if (error != 0 && error != EJUSTRETURN) { VOP_UNLOCK(fdvp); VOP_UNLOCK(tdvp); VOP_UNLOCK(fvp); goto releout; } /* * If tvp disappeared we just carry on. */ if (error == EJUSTRETURN && tvp != NULL) { vrele(tvp); tvp = NULL; } /* * Get the tvp ino if the lookup succeeded. We may have to restart * if the non-blocking acquire fails. */ if (error == 0) { nvp = NULL; error = VFS_VGET(mp, ino, LK_EXCLUSIVE | LK_NOWAIT, &nvp); if (tvp) vrele(tvp); tvp = nvp; if (error) { VOP_UNLOCK(fdvp); VOP_UNLOCK(tdvp); VOP_UNLOCK(fvp); if (error != EBUSY) goto releout; error = VFS_VGET(mp, ino, LK_EXCLUSIVE, &nvp); if (error != 0) goto releout; vput(nvp); atomic_add_int(&rename_restarts, 1); goto relock; } } if (DOINGSOFTDEP(fdvp)) { error = softdep_prerename(fdvp, fvp, tdvp, tvp); if (error != 0) { if (error == ERELOOKUP) { atomic_add_int(&rename_restarts, 1); goto relock; } goto releout; } } fdp = VTOI(fdvp); fip = VTOI(fvp); tdp = VTOI(tdvp); tip = NULL; if (tvp) tip = VTOI(tvp); if (tvp && ((VTOI(tvp)->i_flags & (NOUNLINK | IMMUTABLE | APPEND)) || (VTOI(tdvp)->i_flags & APPEND))) { error = EPERM; goto unlockout; } /* * Renaming a file to itself has no effect. The upper layers should * not call us in that case. However, things could change after * we drop the locks above. */ if (fvp == tvp) { error = 0; goto unlockout; } doingdirectory = 0; newparent = 0; ino = fip->i_number; if (fip->i_nlink >= UFS_LINK_MAX) { error = EMLINK; goto unlockout; } if ((fip->i_flags & (NOUNLINK | IMMUTABLE | APPEND)) || (fdp->i_flags & APPEND)) { error = EPERM; goto unlockout; } if ((fip->i_mode & IFMT) == IFDIR) { /* * Avoid ".", "..", and aliases of "." for obvious reasons. */ if ((fcnp->cn_namelen == 1 && fcnp->cn_nameptr[0] == '.') || fdp == fip || (fcnp->cn_flags | tcnp->cn_flags) & ISDOTDOT) { error = EINVAL; goto unlockout; } if (fdp->i_number != tdp->i_number) newparent = tdp->i_number; doingdirectory = 1; } if ((fvp->v_type == VDIR && fvp->v_mountedhere != NULL) || (tvp != NULL && tvp->v_type == VDIR && tvp->v_mountedhere != NULL)) { error = EXDEV; goto unlockout; } /* * If ".." must be changed (ie the directory gets a new * parent) then the source directory must not be in the * directory hierarchy above the target, as this would * orphan everything below the source directory. Also * the user must have write permission in the source so * as to be able to change "..". */ if (doingdirectory && newparent) { error = VOP_ACCESS(fvp, VWRITE, tcnp->cn_cred, tcnp->cn_thread); if (error) goto unlockout; error = ufs_checkpath(ino, fdp->i_number, tdp, tcnp->cn_cred, &ino); /* * We encountered a lock that we have to wait for. Unlock * everything else and VGET before restarting. */ if (ino) { VOP_UNLOCK(fdvp); VOP_UNLOCK(fvp); VOP_UNLOCK(tdvp); if (tvp) VOP_UNLOCK(tvp); error = VFS_VGET(mp, ino, LK_SHARED, &nvp); if (error == 0) vput(nvp); atomic_add_int(&rename_restarts, 1); goto relock; } if (error) goto unlockout; if ((tcnp->cn_flags & SAVESTART) == 0) panic("ufs_rename: lost to startdir"); } if (fip->i_effnlink == 0 || fdp->i_effnlink == 0 || tdp->i_effnlink == 0) panic("Bad effnlink fip %p, fdp %p, tdp %p", fip, fdp, tdp); if (tvp != NULL) vn_seqc_write_begin(tvp); vn_seqc_write_begin(tdvp); vn_seqc_write_begin(fvp); vn_seqc_write_begin(fdvp); want_seqc_end = true; /* * 1) Bump link count while we're moving stuff * around. If we crash somewhere before * completing our work, the link count * may be wrong, but correctable. */ fip->i_effnlink++; fip->i_nlink++; DIP_SET(fip, i_nlink, fip->i_nlink); UFS_INODE_SET_FLAG(fip, IN_CHANGE); if (DOINGSOFTDEP(fvp)) softdep_setup_link(tdp, fip); error = UFS_UPDATE(fvp, !DOINGSOFTDEP(fvp) && !DOINGASYNC(fvp)); if (error) goto bad; /* * 2) If target doesn't exist, link the target * to the source and unlink the source. * Otherwise, rewrite the target directory * entry to reference the source inode and * expunge the original entry's existence. */ if (tip == NULL) { if (ITODEV(tdp) != ITODEV(fip)) panic("ufs_rename: EXDEV"); if (doingdirectory && newparent) { /* * Account for ".." in new directory. * When source and destination have the same * parent we don't adjust the link count. The * actual link modification is completed when * .. is rewritten below. */ if (tdp->i_nlink >= UFS_LINK_MAX) { error = EMLINK; goto bad; } } ufs_makedirentry(fip, tcnp, &newdir); error = ufs_direnter(tdvp, NULL, &newdir, tcnp, NULL, 1); if (error) goto bad; /* Setup tdvp for directory compaction if needed. */ if (I_COUNT(tdp) != 0 && I_ENDOFF(tdp) != 0 && I_ENDOFF(tdp) < tdp->i_size) endoff = I_ENDOFF(tdp); } else { if (ITODEV(tip) != ITODEV(tdp) || ITODEV(tip) != ITODEV(fip)) panic("ufs_rename: EXDEV"); /* * Short circuit rename(foo, foo). */ if (tip->i_number == fip->i_number) panic("ufs_rename: same file"); /* * If the parent directory is "sticky", then the caller * must possess VADMIN for the parent directory, or the * destination of the rename. This implements append-only * directories. */ if ((tdp->i_mode & S_ISTXT) && VOP_ACCESS(tdvp, VADMIN, tcnp->cn_cred, td) && VOP_ACCESS(tvp, VADMIN, tcnp->cn_cred, td)) { error = EPERM; goto bad; } /* * Target must be empty if a directory and have no links * to it. Also, ensure source and target are compatible * (both directories, or both not directories). */ if ((tip->i_mode & IFMT) == IFDIR) { if ((tip->i_effnlink > 2) || !ufs_dirempty(tip, tdp->i_number, tcnp->cn_cred)) { error = ENOTEMPTY; goto bad; } if (!doingdirectory) { error = ENOTDIR; goto bad; } cache_purge(tdvp); } else if (doingdirectory) { error = EISDIR; goto bad; } if (doingdirectory) { if (!newparent) { tdp->i_effnlink--; if (DOINGSOFTDEP(tdvp)) softdep_change_linkcnt(tdp); } tip->i_effnlink--; if (DOINGSOFTDEP(tvp)) softdep_change_linkcnt(tip); } error = ufs_dirrewrite(tdp, tip, fip->i_number, IFTODT(fip->i_mode), (doingdirectory && newparent) ? newparent : doingdirectory); if (error) { if (doingdirectory) { if (!newparent) { tdp->i_effnlink++; if (DOINGSOFTDEP(tdvp)) softdep_change_linkcnt(tdp); } tip->i_effnlink++; if (DOINGSOFTDEP(tvp)) softdep_change_linkcnt(tip); } goto bad; } if (doingdirectory && !DOINGSOFTDEP(tvp)) { /* * The only stuff left in the directory is "." * and "..". The "." reference is inconsequential * since we are quashing it. We have removed the "." * reference and the reference in the parent directory, * but there may be other hard links. The soft * dependency code will arrange to do these operations * after the parent directory entry has been deleted on * disk, so when running with that code we avoid doing * them now. */ if (!newparent) { tdp->i_nlink--; DIP_SET(tdp, i_nlink, tdp->i_nlink); UFS_INODE_SET_FLAG(tdp, IN_CHANGE); } tip->i_nlink--; DIP_SET(tip, i_nlink, tip->i_nlink); UFS_INODE_SET_FLAG(tip, IN_CHANGE); } } /* * 3) Unlink the source. We have to resolve the path again to * fixup the directory offset and count for ufs_dirremove. */ if (fdvp == tdvp) { error = ufs_lookup_ino(fdvp, NULL, fcnp, &ino); if (error) panic("ufs_rename: from entry went away!"); if (ino != fip->i_number) panic("ufs_rename: ino mismatch %ju != %ju\n", (uintmax_t)ino, (uintmax_t)fip->i_number); } /* * If the source is a directory with a * new parent, the link count of the old * parent directory must be decremented * and ".." set to point to the new parent. */ if (doingdirectory && newparent) { /* * If tip exists we simply use its link, otherwise we must * add a new one. */ if (tip == NULL) { tdp->i_effnlink++; tdp->i_nlink++; DIP_SET(tdp, i_nlink, tdp->i_nlink); UFS_INODE_SET_FLAG(tdp, IN_CHANGE); if (DOINGSOFTDEP(tdvp)) softdep_setup_dotdot_link(tdp, fip); error = UFS_UPDATE(tdvp, !DOINGSOFTDEP(tdvp) && !DOINGASYNC(tdvp)); /* Don't go to bad here as the new link exists. */ if (error) goto unlockout; } else if (DOINGSUJ(tdvp)) /* Journal must account for each new link. */ softdep_setup_dotdot_link(tdp, fip); SET_I_OFFSET(fip, mastertemplate.dot_reclen); ufs_dirrewrite(fip, fdp, newparent, DT_DIR, 0); cache_purge(fdvp); } error = ufs_dirremove(fdvp, fip, fcnp->cn_flags, 0); /* * The kern_renameat() looks up the fvp using the DELETE flag, which * causes the removal of the name cache entry for fvp. * As the relookup of the fvp is done in two steps: * ufs_lookup_ino() and then VFS_VGET(), another thread might do a * normal lookup of the from name just before the VFS_VGET() call, * causing the cache entry to be re-instantiated. * * The same issue also applies to tvp if it exists as * otherwise we may have a stale name cache entry for the new * name that references the old i-node if it has other links * or open file descriptors. */ cache_vop_rename(fdvp, fvp, tdvp, tvp, fcnp, tcnp); unlockout: if (want_seqc_end) { if (tvp != NULL) vn_seqc_write_end(tvp); vn_seqc_write_end(tdvp); vn_seqc_write_end(fvp); vn_seqc_write_end(fdvp); } vput(fdvp); vput(fvp); if (tvp) vput(tvp); /* * If compaction or fsync was requested do it now that other locks * are no longer needed. */ if (error == 0 && endoff != 0) { do { error = UFS_TRUNCATE(tdvp, endoff, IO_NORMAL | (DOINGASYNC(tdvp) ? 0 : IO_SYNC), tcnp->cn_cred); } while (error == ERELOOKUP); if (error != 0 && !ffs_fsfail_cleanup(VFSTOUFS(mp), error)) vn_printf(tdvp, "ufs_rename: failed to truncate, error %d\n", error); #ifdef UFS_DIRHASH if (error != 0) ufsdirhash_free(tdp); - else if (tdp->i_dirhash != NULL) - ufsdirhash_dirtrunc(tdp, endoff); #endif /* * Even if the directory compaction failed, rename was * succesful. Do not propagate a UFS_TRUNCATE() error * to the caller. */ error = 0; } if (error == 0 && tdp->i_flag & IN_NEEDSYNC) { do { error = VOP_FSYNC(tdvp, MNT_WAIT, td); } while (error == ERELOOKUP); } vput(tdvp); return (error); bad: fip->i_effnlink--; fip->i_nlink--; DIP_SET(fip, i_nlink, fip->i_nlink); UFS_INODE_SET_FLAG(fip, IN_CHANGE); if (DOINGSOFTDEP(fvp)) softdep_revert_link(tdp, fip); goto unlockout; releout: if (want_seqc_end) { if (tvp != NULL) vn_seqc_write_end(tvp); vn_seqc_write_end(tdvp); vn_seqc_write_end(fvp); vn_seqc_write_end(fdvp); } vrele(fdvp); vrele(fvp); vrele(tdvp); if (tvp) vrele(tvp); return (error); } #ifdef UFS_ACL static int ufs_do_posix1e_acl_inheritance_dir(struct vnode *dvp, struct vnode *tvp, mode_t dmode, struct ucred *cred, struct thread *td) { int error; struct inode *ip = VTOI(tvp); struct acl *dacl, *acl; acl = acl_alloc(M_WAITOK); dacl = acl_alloc(M_WAITOK); /* * Retrieve default ACL from parent, if any. */ error = VOP_GETACL(dvp, ACL_TYPE_DEFAULT, acl, cred, td); switch (error) { case 0: /* * Retrieved a default ACL, so merge mode and ACL if * necessary. If the ACL is empty, fall through to * the "not defined or available" case. */ if (acl->acl_cnt != 0) { dmode = acl_posix1e_newfilemode(dmode, acl); UFS_INODE_SET_MODE(ip, dmode); DIP_SET(ip, i_mode, dmode); *dacl = *acl; ufs_sync_acl_from_inode(ip, acl); break; } /* FALLTHROUGH */ case EOPNOTSUPP: /* * Just use the mode as-is. */ UFS_INODE_SET_MODE(ip, dmode); DIP_SET(ip, i_mode, dmode); error = 0; goto out; default: goto out; } /* * XXX: If we abort now, will Soft Updates notify the extattr * code that the EAs for the file need to be released? */ error = VOP_SETACL(tvp, ACL_TYPE_ACCESS, acl, cred, td); if (error == 0) error = VOP_SETACL(tvp, ACL_TYPE_DEFAULT, dacl, cred, td); switch (error) { case 0: break; case EOPNOTSUPP: /* * XXX: This should not happen, as EOPNOTSUPP above * was supposed to free acl. */ printf("ufs_mkdir: VOP_GETACL() but no VOP_SETACL()\n"); /* panic("ufs_mkdir: VOP_GETACL() but no VOP_SETACL()"); */ break; default: goto out; } out: acl_free(acl); acl_free(dacl); return (error); } static int ufs_do_posix1e_acl_inheritance_file(struct vnode *dvp, struct vnode *tvp, mode_t mode, struct ucred *cred, struct thread *td) { int error; struct inode *ip = VTOI(tvp); struct acl *acl; acl = acl_alloc(M_WAITOK); /* * Retrieve default ACL for parent, if any. */ error = VOP_GETACL(dvp, ACL_TYPE_DEFAULT, acl, cred, td); switch (error) { case 0: /* * Retrieved a default ACL, so merge mode and ACL if * necessary. */ if (acl->acl_cnt != 0) { /* * Two possible ways for default ACL to not * be present. First, the EA can be * undefined, or second, the default ACL can * be blank. If it's blank, fall through to * the it's not defined case. */ mode = acl_posix1e_newfilemode(mode, acl); UFS_INODE_SET_MODE(ip, mode); DIP_SET(ip, i_mode, mode); ufs_sync_acl_from_inode(ip, acl); break; } /* FALLTHROUGH */ case EOPNOTSUPP: /* * Just use the mode as-is. */ UFS_INODE_SET_MODE(ip, mode); DIP_SET(ip, i_mode, mode); error = 0; goto out; default: goto out; } /* * XXX: If we abort now, will Soft Updates notify the extattr * code that the EAs for the file need to be released? */ error = VOP_SETACL(tvp, ACL_TYPE_ACCESS, acl, cred, td); switch (error) { case 0: break; case EOPNOTSUPP: /* * XXX: This should not happen, as EOPNOTSUPP above was * supposed to free acl. */ printf("ufs_do_posix1e_acl_inheritance_file: VOP_GETACL() " "but no VOP_SETACL()\n"); /* panic("ufs_do_posix1e_acl_inheritance_file: VOP_GETACL() " "but no VOP_SETACL()"); */ break; default: goto out; } out: acl_free(acl); return (error); } static int ufs_do_nfs4_acl_inheritance(struct vnode *dvp, struct vnode *tvp, mode_t child_mode, struct ucred *cred, struct thread *td) { int error; struct acl *parent_aclp, *child_aclp; parent_aclp = acl_alloc(M_WAITOK); child_aclp = acl_alloc(M_WAITOK | M_ZERO); error = ufs_getacl_nfs4_internal(dvp, parent_aclp, td); if (error) goto out; acl_nfs4_compute_inherited_acl(parent_aclp, child_aclp, child_mode, VTOI(tvp)->i_uid, tvp->v_type == VDIR); error = ufs_setacl_nfs4_internal(tvp, child_aclp, td); if (error) goto out; out: acl_free(parent_aclp); acl_free(child_aclp); return (error); } #endif /* * Mkdir system call */ static int ufs_mkdir(ap) struct vop_mkdir_args /* { struct vnode *a_dvp; struct vnode **a_vpp; struct componentname *a_cnp; struct vattr *a_vap; } */ *ap; { struct vnode *dvp = ap->a_dvp; struct vattr *vap = ap->a_vap; struct componentname *cnp = ap->a_cnp; struct inode *ip, *dp; struct vnode *tvp; struct buf *bp; struct dirtemplate dirtemplate, *dtp; struct direct newdir; int error, dmode; long blkoff; #ifdef INVARIANTS if ((cnp->cn_flags & HASBUF) == 0) panic("ufs_mkdir: no name"); #endif dp = VTOI(dvp); if (dp->i_nlink >= UFS_LINK_MAX) { error = EMLINK; goto out; } dmode = vap->va_mode & 0777; dmode |= IFDIR; /* * Must simulate part of ufs_makeinode here to acquire the inode, * but not have it entered in the parent directory. The entry is * made later after writing "." and ".." entries. */ if (dp->i_effnlink < 2) { print_bad_link_count("ufs_mkdir", dvp); error = EINVAL; goto out; } if (DOINGSOFTDEP(dvp)) { error = softdep_prelink(dvp, NULL, true); if (error != 0) { MPASS(error == ERELOOKUP); return (error); } } error = UFS_VALLOC(dvp, dmode, cnp->cn_cred, &tvp); if (error) goto out; vn_seqc_write_begin(tvp); ip = VTOI(tvp); ip->i_gid = dp->i_gid; DIP_SET(ip, i_gid, dp->i_gid); #ifdef SUIDDIR { #ifdef QUOTA struct ucred ucred, *ucp; gid_t ucred_group; ucp = cnp->cn_cred; #endif /* * If we are hacking owners here, (only do this where told to) * and we are not giving it TO root, (would subvert quotas) * then go ahead and give it to the other user. * The new directory also inherits the SUID bit. * If user's UID and dir UID are the same, * 'give it away' so that the SUID is still forced on. */ if ((dvp->v_mount->mnt_flag & MNT_SUIDDIR) && (dp->i_mode & ISUID) && dp->i_uid) { dmode |= ISUID; ip->i_uid = dp->i_uid; DIP_SET(ip, i_uid, dp->i_uid); #ifdef QUOTA if (dp->i_uid != cnp->cn_cred->cr_uid) { /* * Make sure the correct user gets charged * for the space. * Make a dummy credential for the victim. * XXX This seems to never be accessed out of * our context so a stack variable is ok. */ refcount_init(&ucred.cr_ref, 1); ucred.cr_uid = ip->i_uid; ucred.cr_ngroups = 1; ucred.cr_groups = &ucred_group; ucred.cr_groups[0] = dp->i_gid; ucp = &ucred; } #endif } else { ip->i_uid = cnp->cn_cred->cr_uid; DIP_SET(ip, i_uid, ip->i_uid); } #ifdef QUOTA if ((error = getinoquota(ip)) || (error = chkiq(ip, 1, ucp, 0))) { if (DOINGSOFTDEP(tvp)) softdep_revert_link(dp, ip); UFS_VFREE(tvp, ip->i_number, dmode); vn_seqc_write_end(tvp); vgone(tvp); vput(tvp); return (error); } #endif } #else /* !SUIDDIR */ ip->i_uid = cnp->cn_cred->cr_uid; DIP_SET(ip, i_uid, ip->i_uid); #ifdef QUOTA if ((error = getinoquota(ip)) || (error = chkiq(ip, 1, cnp->cn_cred, 0))) { if (DOINGSOFTDEP(tvp)) softdep_revert_link(dp, ip); UFS_VFREE(tvp, ip->i_number, dmode); vn_seqc_write_end(tvp); vgone(tvp); vput(tvp); return (error); } #endif #endif /* !SUIDDIR */ UFS_INODE_SET_FLAG(ip, IN_ACCESS | IN_CHANGE | IN_UPDATE); UFS_INODE_SET_MODE(ip, dmode); DIP_SET(ip, i_mode, dmode); tvp->v_type = VDIR; /* Rest init'd in getnewvnode(). */ ip->i_effnlink = 2; ip->i_nlink = 2; DIP_SET(ip, i_nlink, 2); if (cnp->cn_flags & ISWHITEOUT) { ip->i_flags |= UF_OPAQUE; DIP_SET(ip, i_flags, ip->i_flags); } /* * Bump link count in parent directory to reflect work done below. * Should be done before reference is created so cleanup is * possible if we crash. */ dp->i_effnlink++; dp->i_nlink++; DIP_SET(dp, i_nlink, dp->i_nlink); UFS_INODE_SET_FLAG(dp, IN_CHANGE); if (DOINGSOFTDEP(dvp)) softdep_setup_mkdir(dp, ip); error = UFS_UPDATE(dvp, !DOINGSOFTDEP(dvp) && !DOINGASYNC(dvp)); if (error) goto bad; #ifdef MAC if (dvp->v_mount->mnt_flag & MNT_MULTILABEL) { error = mac_vnode_create_extattr(cnp->cn_cred, dvp->v_mount, dvp, tvp, cnp); if (error) goto bad; } #endif #ifdef UFS_ACL if (dvp->v_mount->mnt_flag & MNT_ACLS) { error = ufs_do_posix1e_acl_inheritance_dir(dvp, tvp, dmode, cnp->cn_cred, cnp->cn_thread); if (error) goto bad; } else if (dvp->v_mount->mnt_flag & MNT_NFS4ACLS) { error = ufs_do_nfs4_acl_inheritance(dvp, tvp, dmode, cnp->cn_cred, cnp->cn_thread); if (error) goto bad; } #endif /* !UFS_ACL */ /* * Initialize directory with "." and ".." from static template. */ if (dvp->v_mount->mnt_maxsymlinklen > 0) dtp = &mastertemplate; else dtp = (struct dirtemplate *)&omastertemplate; dirtemplate = *dtp; dirtemplate.dot_ino = ip->i_number; dirtemplate.dotdot_ino = dp->i_number; vnode_pager_setsize(tvp, DIRBLKSIZ); if ((error = UFS_BALLOC(tvp, (off_t)0, DIRBLKSIZ, cnp->cn_cred, BA_CLRBUF, &bp)) != 0) goto bad; ip->i_size = DIRBLKSIZ; DIP_SET(ip, i_size, DIRBLKSIZ); UFS_INODE_SET_FLAG(ip, IN_SIZEMOD | IN_CHANGE | IN_UPDATE); bcopy((caddr_t)&dirtemplate, (caddr_t)bp->b_data, sizeof dirtemplate); if (DOINGSOFTDEP(tvp)) { /* * Ensure that the entire newly allocated block is a * valid directory so that future growth within the * block does not have to ensure that the block is * written before the inode. */ blkoff = DIRBLKSIZ; while (blkoff < bp->b_bcount) { ((struct direct *) (bp->b_data + blkoff))->d_reclen = DIRBLKSIZ; blkoff += DIRBLKSIZ; } } if ((error = UFS_UPDATE(tvp, !DOINGSOFTDEP(tvp) && !DOINGASYNC(tvp))) != 0) { (void)bwrite(bp); goto bad; } /* * Directory set up, now install its entry in the parent directory. * * If we are not doing soft dependencies, then we must write out the * buffer containing the new directory body before entering the new * name in the parent. If we are doing soft dependencies, then the * buffer containing the new directory body will be passed to and * released in the soft dependency code after the code has attached * an appropriate ordering dependency to the buffer which ensures that * the buffer is written before the new name is written in the parent. */ if (DOINGASYNC(dvp)) bdwrite(bp); else if (!DOINGSOFTDEP(dvp) && ((error = bwrite(bp)))) goto bad; ufs_makedirentry(ip, cnp, &newdir); error = ufs_direnter(dvp, tvp, &newdir, cnp, bp, 0); bad: if (error == 0) { *ap->a_vpp = tvp; vn_seqc_write_end(tvp); } else { dp->i_effnlink--; dp->i_nlink--; DIP_SET(dp, i_nlink, dp->i_nlink); UFS_INODE_SET_FLAG(dp, IN_CHANGE); /* * No need to do an explicit VOP_TRUNCATE here, vrele will * do this for us because we set the link count to 0. */ ip->i_effnlink = 0; ip->i_nlink = 0; DIP_SET(ip, i_nlink, 0); UFS_INODE_SET_FLAG(ip, IN_CHANGE); if (DOINGSOFTDEP(tvp)) softdep_revert_mkdir(dp, ip); vn_seqc_write_end(tvp); vgone(tvp); vput(tvp); } out: return (error); } /* * Rmdir system call. */ static int ufs_rmdir(ap) struct vop_rmdir_args /* { struct vnode *a_dvp; struct vnode *a_vp; struct componentname *a_cnp; } */ *ap; { struct vnode *vp = ap->a_vp; struct vnode *dvp = ap->a_dvp; struct componentname *cnp = ap->a_cnp; struct inode *ip, *dp; int error; ip = VTOI(vp); dp = VTOI(dvp); /* * Do not remove a directory that is in the process of being renamed. * Verify the directory is empty (and valid). Rmdir ".." will not be * valid since ".." will contain a reference to the current directory * and thus be non-empty. Do not allow the removal of mounted on * directories (this can happen when an NFS exported filesystem * tries to remove a locally mounted on directory). */ error = 0; if (dp->i_effnlink <= 2) { if (dp->i_effnlink == 2) print_bad_link_count("ufs_rmdir", dvp); error = EINVAL; goto out; } if (!ufs_dirempty(ip, dp->i_number, cnp->cn_cred)) { error = ENOTEMPTY; goto out; } if ((dp->i_flags & APPEND) || (ip->i_flags & (NOUNLINK | IMMUTABLE | APPEND))) { error = EPERM; goto out; } if (vp->v_mountedhere != 0) { error = EINVAL; goto out; } if (DOINGSOFTDEP(dvp)) { error = softdep_prelink(dvp, vp, false); if (error != 0) { MPASS(error == ERELOOKUP); return (error); } } #ifdef UFS_GJOURNAL ufs_gjournal_orphan(vp); #endif /* * Delete reference to directory before purging * inode. If we crash in between, the directory * will be reattached to lost+found, */ dp->i_effnlink--; ip->i_effnlink--; if (DOINGSOFTDEP(vp)) softdep_setup_rmdir(dp, ip); error = ufs_dirremove(dvp, ip, cnp->cn_flags, 1); if (error) { dp->i_effnlink++; ip->i_effnlink++; if (DOINGSOFTDEP(vp)) softdep_revert_rmdir(dp, ip); goto out; } /* * The only stuff left in the directory is "." and "..". The "." * reference is inconsequential since we are quashing it. The soft * dependency code will arrange to do these operations after * the parent directory entry has been deleted on disk, so * when running with that code we avoid doing them now. */ if (!DOINGSOFTDEP(vp)) { dp->i_nlink--; DIP_SET(dp, i_nlink, dp->i_nlink); UFS_INODE_SET_FLAG(dp, IN_CHANGE); error = UFS_UPDATE(dvp, 0); ip->i_nlink--; DIP_SET(ip, i_nlink, ip->i_nlink); UFS_INODE_SET_FLAG(ip, IN_CHANGE); } cache_vop_rmdir(dvp, vp); #ifdef UFS_DIRHASH /* Kill any active hash; i_effnlink == 0, so it will not come back. */ if (ip->i_dirhash != NULL) ufsdirhash_free(ip); #endif out: return (error); } /* * symlink -- make a symbolic link */ static int ufs_symlink(ap) struct vop_symlink_args /* { struct vnode *a_dvp; struct vnode **a_vpp; struct componentname *a_cnp; struct vattr *a_vap; const char *a_target; } */ *ap; { struct vnode *vp, **vpp = ap->a_vpp; struct inode *ip; int len, error; error = ufs_makeinode(IFLNK | ap->a_vap->va_mode, ap->a_dvp, vpp, ap->a_cnp, "ufs_symlink"); if (error) return (error); vp = *vpp; len = strlen(ap->a_target); if (len < vp->v_mount->mnt_maxsymlinklen) { ip = VTOI(vp); bcopy(ap->a_target, SHORTLINK(ip), len); ip->i_size = len; DIP_SET(ip, i_size, len); UFS_INODE_SET_FLAG(ip, IN_SIZEMOD | IN_CHANGE | IN_UPDATE); error = UFS_UPDATE(vp, 0); } else error = vn_rdwr(UIO_WRITE, vp, __DECONST(void *, ap->a_target), len, (off_t)0, UIO_SYSSPACE, IO_NODELOCKED | IO_NOMACCHECK, ap->a_cnp->cn_cred, NOCRED, NULL, NULL); if (error) vput(vp); return (error); } /* * Vnode op for reading directories. */ int ufs_readdir(ap) struct vop_readdir_args /* { struct vnode *a_vp; struct uio *a_uio; struct ucred *a_cred; int *a_eofflag; int *a_ncookies; u_long **a_cookies; } */ *ap; { struct vnode *vp = ap->a_vp; struct uio *uio = ap->a_uio; struct buf *bp; struct inode *ip; struct direct *dp, *edp; u_long *cookies; struct dirent dstdp; off_t offset, startoffset; size_t readcnt, skipcnt; ssize_t startresid; u_int ncookies; int error; if (uio->uio_offset < 0) return (EINVAL); ip = VTOI(vp); if (ip->i_effnlink == 0) return (0); if (ap->a_ncookies != NULL) { if (uio->uio_resid < 0) ncookies = 0; else ncookies = uio->uio_resid; if (uio->uio_offset >= ip->i_size) ncookies = 0; else if (ip->i_size - uio->uio_offset < ncookies) ncookies = ip->i_size - uio->uio_offset; ncookies = ncookies / (offsetof(struct direct, d_name) + 4) + 1; cookies = malloc(ncookies * sizeof(*cookies), M_TEMP, M_WAITOK); *ap->a_ncookies = ncookies; *ap->a_cookies = cookies; } else { ncookies = 0; cookies = NULL; } offset = startoffset = uio->uio_offset; startresid = uio->uio_resid; error = 0; while (error == 0 && uio->uio_resid > 0 && uio->uio_offset < ip->i_size) { error = UFS_BLKATOFF(vp, uio->uio_offset, NULL, &bp); if (error) break; if (bp->b_offset + bp->b_bcount > ip->i_size) readcnt = ip->i_size - bp->b_offset; else readcnt = bp->b_bcount; skipcnt = (size_t)(uio->uio_offset - bp->b_offset) & ~(size_t)(DIRBLKSIZ - 1); offset = bp->b_offset + skipcnt; dp = (struct direct *)&bp->b_data[skipcnt]; edp = (struct direct *)&bp->b_data[readcnt]; while (error == 0 && uio->uio_resid > 0 && dp < edp) { if (dp->d_reclen <= offsetof(struct direct, d_name) || (caddr_t)dp + dp->d_reclen > (caddr_t)edp) { error = EIO; break; } #if BYTE_ORDER == LITTLE_ENDIAN /* Old filesystem format. */ if (vp->v_mount->mnt_maxsymlinklen <= 0) { dstdp.d_namlen = dp->d_type; dstdp.d_type = dp->d_namlen; } else #endif { dstdp.d_namlen = dp->d_namlen; dstdp.d_type = dp->d_type; } if (offsetof(struct direct, d_name) + dstdp.d_namlen > dp->d_reclen) { error = EIO; break; } if (offset < startoffset || dp->d_ino == 0) goto nextentry; dstdp.d_fileno = dp->d_ino; dstdp.d_reclen = GENERIC_DIRSIZ(&dstdp); bcopy(dp->d_name, dstdp.d_name, dstdp.d_namlen); /* NOTE: d_off is the offset of the *next* entry. */ dstdp.d_off = offset + dp->d_reclen; dirent_terminate(&dstdp); if (dstdp.d_reclen > uio->uio_resid) { if (uio->uio_resid == startresid) error = EINVAL; else error = EJUSTRETURN; break; } /* Advance dp. */ error = uiomove((caddr_t)&dstdp, dstdp.d_reclen, uio); if (error) break; if (cookies != NULL) { KASSERT(ncookies > 0, ("ufs_readdir: cookies buffer too small")); *cookies = offset + dp->d_reclen; cookies++; ncookies--; } nextentry: offset += dp->d_reclen; dp = (struct direct *)((caddr_t)dp + dp->d_reclen); } bqrelse(bp); uio->uio_offset = offset; } /* We need to correct uio_offset. */ uio->uio_offset = offset; if (error == EJUSTRETURN) error = 0; if (ap->a_ncookies != NULL) { if (error == 0) { ap->a_ncookies -= ncookies; } else { free(*ap->a_cookies, M_TEMP); *ap->a_ncookies = 0; *ap->a_cookies = NULL; } } if (error == 0 && ap->a_eofflag) *ap->a_eofflag = ip->i_size <= uio->uio_offset; return (error); } /* * Return target name of a symbolic link */ static int ufs_readlink(ap) struct vop_readlink_args /* { struct vnode *a_vp; struct uio *a_uio; struct ucred *a_cred; } */ *ap; { struct vnode *vp = ap->a_vp; struct inode *ip = VTOI(vp); doff_t isize; isize = ip->i_size; if ((isize < vp->v_mount->mnt_maxsymlinklen) || DIP(ip, i_blocks) == 0) { /* XXX - for old fastlink support */ return (uiomove(SHORTLINK(ip), isize, ap->a_uio)); } return (VOP_READ(vp, ap->a_uio, 0, ap->a_cred)); } /* * Calculate the logical to physical mapping if not done already, * then call the device strategy routine. * * In order to be able to swap to a file, the ufs_bmaparray() operation may not * deadlock on memory. See ufs_bmap() for details. */ static int ufs_strategy(ap) struct vop_strategy_args /* { struct vnode *a_vp; struct buf *a_bp; } */ *ap; { struct buf *bp = ap->a_bp; struct vnode *vp = ap->a_vp; ufs2_daddr_t blkno; int error; if (bp->b_blkno == bp->b_lblkno) { error = ufs_bmaparray(vp, bp->b_lblkno, &blkno, bp, NULL, NULL); bp->b_blkno = blkno; if (error) { bp->b_error = error; bp->b_ioflags |= BIO_ERROR; bufdone(bp); return (0); } if ((long)bp->b_blkno == -1) vfs_bio_clrbuf(bp); } if ((long)bp->b_blkno == -1) { bufdone(bp); return (0); } bp->b_iooffset = dbtob(bp->b_blkno); BO_STRATEGY(VFSTOUFS(vp->v_mount)->um_bo, bp); return (0); } /* * Print out the contents of an inode. */ static int ufs_print(ap) struct vop_print_args /* { struct vnode *a_vp; } */ *ap; { struct vnode *vp = ap->a_vp; struct inode *ip = VTOI(vp); printf("\tnlink=%d, effnlink=%d, size=%jd", ip->i_nlink, ip->i_effnlink, (intmax_t)ip->i_size); if (I_IS_UFS2(ip)) printf(", extsize %d", ip->i_din2->di_extsize); printf("\n\tgeneration=%jx, uid=%d, gid=%d, flags=0x%b\n", (uintmax_t)ip->i_gen, ip->i_uid, ip->i_gid, (u_int)ip->i_flags, PRINT_INODE_FLAGS); printf("\tino %lu, on dev %s", (u_long)ip->i_number, devtoname(ITODEV(ip))); if (vp->v_type == VFIFO) fifo_printinfo(vp); printf("\n"); return (0); } /* * Close wrapper for fifos. * * Update the times on the inode then do device close. */ static int ufsfifo_close(ap) struct vop_close_args /* { struct vnode *a_vp; int a_fflag; struct ucred *a_cred; struct thread *a_td; } */ *ap; { struct vnode *vp = ap->a_vp; int usecount; VI_LOCK(vp); usecount = vp->v_usecount; if (usecount > 1) ufs_itimes_locked(vp); VI_UNLOCK(vp); return (fifo_specops.vop_close(ap)); } /* * Kqfilter wrapper for fifos. * * Fall through to ufs kqfilter routines if needed */ static int ufsfifo_kqfilter(ap) struct vop_kqfilter_args *ap; { int error; error = fifo_specops.vop_kqfilter(ap); if (error) error = vfs_kqfilter(ap); return (error); } /* * Return POSIX pathconf information applicable to ufs filesystems. */ static int ufs_pathconf(ap) struct vop_pathconf_args /* { struct vnode *a_vp; int a_name; int *a_retval; } */ *ap; { int error; error = 0; switch (ap->a_name) { case _PC_LINK_MAX: *ap->a_retval = UFS_LINK_MAX; break; case _PC_NAME_MAX: *ap->a_retval = UFS_MAXNAMLEN; break; case _PC_PIPE_BUF: if (ap->a_vp->v_type == VDIR || ap->a_vp->v_type == VFIFO) *ap->a_retval = PIPE_BUF; else error = EINVAL; break; case _PC_CHOWN_RESTRICTED: *ap->a_retval = 1; break; case _PC_NO_TRUNC: *ap->a_retval = 1; break; #ifdef UFS_ACL case _PC_ACL_EXTENDED: if (ap->a_vp->v_mount->mnt_flag & MNT_ACLS) *ap->a_retval = 1; else *ap->a_retval = 0; break; case _PC_ACL_NFS4: if (ap->a_vp->v_mount->mnt_flag & MNT_NFS4ACLS) *ap->a_retval = 1; else *ap->a_retval = 0; break; #endif case _PC_ACL_PATH_MAX: #ifdef UFS_ACL if (ap->a_vp->v_mount->mnt_flag & (MNT_ACLS | MNT_NFS4ACLS)) *ap->a_retval = ACL_MAX_ENTRIES; else *ap->a_retval = 3; #else *ap->a_retval = 3; #endif break; #ifdef MAC case _PC_MAC_PRESENT: if (ap->a_vp->v_mount->mnt_flag & MNT_MULTILABEL) *ap->a_retval = 1; else *ap->a_retval = 0; break; #endif case _PC_MIN_HOLE_SIZE: *ap->a_retval = ap->a_vp->v_mount->mnt_stat.f_iosize; break; case _PC_PRIO_IO: *ap->a_retval = 0; break; case _PC_SYNC_IO: *ap->a_retval = 0; break; case _PC_ALLOC_SIZE_MIN: *ap->a_retval = ap->a_vp->v_mount->mnt_stat.f_bsize; break; case _PC_FILESIZEBITS: *ap->a_retval = 64; break; case _PC_REC_INCR_XFER_SIZE: *ap->a_retval = ap->a_vp->v_mount->mnt_stat.f_iosize; break; case _PC_REC_MAX_XFER_SIZE: *ap->a_retval = -1; /* means ``unlimited'' */ break; case _PC_REC_MIN_XFER_SIZE: *ap->a_retval = ap->a_vp->v_mount->mnt_stat.f_iosize; break; case _PC_REC_XFER_ALIGN: *ap->a_retval = PAGE_SIZE; break; case _PC_SYMLINK_MAX: *ap->a_retval = MAXPATHLEN; break; default: error = vop_stdpathconf(ap); break; } return (error); } /* * Initialize the vnode associated with a new inode, handle aliased * vnodes. */ int ufs_vinit(mntp, fifoops, vpp) struct mount *mntp; struct vop_vector *fifoops; struct vnode **vpp; { struct inode *ip; struct vnode *vp; vp = *vpp; ASSERT_VOP_LOCKED(vp, "ufs_vinit"); ip = VTOI(vp); vp->v_type = IFTOVT(ip->i_mode); /* * Only unallocated inodes should be of type VNON. */ if (ip->i_mode != 0 && vp->v_type == VNON) return (EINVAL); if (vp->v_type == VFIFO) vp->v_op = fifoops; if (ip->i_number == UFS_ROOTINO) vp->v_vflag |= VV_ROOT; *vpp = vp; return (0); } /* * Allocate a new inode. * Vnode dvp must be locked. */ static int ufs_makeinode(mode, dvp, vpp, cnp, callfunc) int mode; struct vnode *dvp; struct vnode **vpp; struct componentname *cnp; const char *callfunc; { struct inode *ip, *pdir; struct direct newdir; struct vnode *tvp; int error; pdir = VTOI(dvp); #ifdef INVARIANTS if ((cnp->cn_flags & HASBUF) == 0) panic("%s: no name", callfunc); #endif *vpp = NULL; if ((mode & IFMT) == 0) mode |= IFREG; if (pdir->i_effnlink < 2) { print_bad_link_count(callfunc, dvp); return (EINVAL); } if (DOINGSOFTDEP(dvp)) { error = softdep_prelink(dvp, NULL, true); if (error != 0) { MPASS(error == ERELOOKUP); return (error); } } error = UFS_VALLOC(dvp, mode, cnp->cn_cred, &tvp); if (error) return (error); ip = VTOI(tvp); ip->i_gid = pdir->i_gid; DIP_SET(ip, i_gid, pdir->i_gid); #ifdef SUIDDIR { #ifdef QUOTA struct ucred ucred, *ucp; gid_t ucred_group; ucp = cnp->cn_cred; #endif /* * If we are not the owner of the directory, * and we are hacking owners here, (only do this where told to) * and we are not giving it TO root, (would subvert quotas) * then go ahead and give it to the other user. * Note that this drops off the execute bits for security. */ if ((dvp->v_mount->mnt_flag & MNT_SUIDDIR) && (pdir->i_mode & ISUID) && (pdir->i_uid != cnp->cn_cred->cr_uid) && pdir->i_uid) { ip->i_uid = pdir->i_uid; DIP_SET(ip, i_uid, ip->i_uid); mode &= ~07111; #ifdef QUOTA /* * Make sure the correct user gets charged * for the space. * Quickly knock up a dummy credential for the victim. * XXX This seems to never be accessed out of our * context so a stack variable is ok. */ refcount_init(&ucred.cr_ref, 1); ucred.cr_uid = ip->i_uid; ucred.cr_ngroups = 1; ucred.cr_groups = &ucred_group; ucred.cr_groups[0] = pdir->i_gid; ucp = &ucred; #endif } else { ip->i_uid = cnp->cn_cred->cr_uid; DIP_SET(ip, i_uid, ip->i_uid); } #ifdef QUOTA if ((error = getinoquota(ip)) || (error = chkiq(ip, 1, ucp, 0))) { if (DOINGSOFTDEP(tvp)) softdep_revert_link(pdir, ip); UFS_VFREE(tvp, ip->i_number, mode); vgone(tvp); vput(tvp); return (error); } #endif } #else /* !SUIDDIR */ ip->i_uid = cnp->cn_cred->cr_uid; DIP_SET(ip, i_uid, ip->i_uid); #ifdef QUOTA if ((error = getinoquota(ip)) || (error = chkiq(ip, 1, cnp->cn_cred, 0))) { if (DOINGSOFTDEP(tvp)) softdep_revert_link(pdir, ip); UFS_VFREE(tvp, ip->i_number, mode); vgone(tvp); vput(tvp); return (error); } #endif #endif /* !SUIDDIR */ vn_seqc_write_begin(tvp); /* Mostly to cover asserts */ UFS_INODE_SET_FLAG(ip, IN_ACCESS | IN_CHANGE | IN_UPDATE); UFS_INODE_SET_MODE(ip, mode); DIP_SET(ip, i_mode, mode); tvp->v_type = IFTOVT(mode); /* Rest init'd in getnewvnode(). */ ip->i_effnlink = 1; ip->i_nlink = 1; DIP_SET(ip, i_nlink, 1); if (DOINGSOFTDEP(tvp)) softdep_setup_create(VTOI(dvp), ip); if ((ip->i_mode & ISGID) && !groupmember(ip->i_gid, cnp->cn_cred) && priv_check_cred(cnp->cn_cred, PRIV_VFS_SETGID)) { UFS_INODE_SET_MODE(ip, ip->i_mode & ~ISGID); DIP_SET(ip, i_mode, ip->i_mode); } if (cnp->cn_flags & ISWHITEOUT) { ip->i_flags |= UF_OPAQUE; DIP_SET(ip, i_flags, ip->i_flags); } /* * Make sure inode goes to disk before directory entry. */ error = UFS_UPDATE(tvp, !DOINGSOFTDEP(tvp) && !DOINGASYNC(tvp)); if (error) goto bad; #ifdef MAC if (dvp->v_mount->mnt_flag & MNT_MULTILABEL) { error = mac_vnode_create_extattr(cnp->cn_cred, dvp->v_mount, dvp, tvp, cnp); if (error) goto bad; } #endif #ifdef UFS_ACL if (dvp->v_mount->mnt_flag & MNT_ACLS) { error = ufs_do_posix1e_acl_inheritance_file(dvp, tvp, mode, cnp->cn_cred, cnp->cn_thread); if (error) goto bad; } else if (dvp->v_mount->mnt_flag & MNT_NFS4ACLS) { error = ufs_do_nfs4_acl_inheritance(dvp, tvp, mode, cnp->cn_cred, cnp->cn_thread); if (error) goto bad; } #endif /* !UFS_ACL */ ufs_makedirentry(ip, cnp, &newdir); error = ufs_direnter(dvp, tvp, &newdir, cnp, NULL, 0); if (error) goto bad; vn_seqc_write_end(tvp); *vpp = tvp; return (0); bad: /* * Write error occurred trying to update the inode * or the directory so must deallocate the inode. */ ip->i_effnlink = 0; ip->i_nlink = 0; DIP_SET(ip, i_nlink, 0); UFS_INODE_SET_FLAG(ip, IN_CHANGE); if (DOINGSOFTDEP(tvp)) softdep_revert_create(VTOI(dvp), ip); vn_seqc_write_end(tvp); vgone(tvp); vput(tvp); return (error); } static int ufs_ioctl(struct vop_ioctl_args *ap) { struct vnode *vp; int error; vp = ap->a_vp; switch (ap->a_command) { case FIOSEEKDATA: error = vn_lock(vp, LK_SHARED); if (error == 0) { error = ufs_bmap_seekdata(vp, (off_t *)ap->a_data); VOP_UNLOCK(vp); } else error = EBADF; return (error); case FIOSEEKHOLE: return (vn_bmap_seekhole(vp, ap->a_command, (off_t *)ap->a_data, ap->a_cred)); default: return (ENOTTY); } } static int ufs_read_pgcache(struct vop_read_pgcache_args *ap) { struct uio *uio; struct vnode *vp; uio = ap->a_uio; vp = ap->a_vp; VNPASS((vn_irflag_read(vp) & VIRF_PGREAD) != 0, vp); if (uio->uio_resid > ptoa(io_hold_cnt) || uio->uio_offset < 0 || (ap->a_ioflag & IO_DIRECT) != 0) return (EJUSTRETURN); return (vn_read_from_obj(vp, uio)); } /* Global vfs data structures for ufs. */ struct vop_vector ufs_vnodeops = { .vop_default = &default_vnodeops, .vop_fsync = VOP_PANIC, .vop_read = VOP_PANIC, .vop_reallocblks = VOP_PANIC, .vop_write = VOP_PANIC, .vop_accessx = ufs_accessx, .vop_bmap = ufs_bmap, .vop_fplookup_vexec = ufs_fplookup_vexec, .vop_fplookup_symlink = VOP_EAGAIN, .vop_cachedlookup = ufs_lookup, .vop_close = ufs_close, .vop_create = ufs_create, .vop_stat = ufs_stat, .vop_getattr = ufs_getattr, .vop_inactive = ufs_inactive, .vop_ioctl = ufs_ioctl, .vop_link = ufs_link, .vop_lookup = vfs_cache_lookup, .vop_mmapped = ufs_mmapped, .vop_mkdir = ufs_mkdir, .vop_mknod = ufs_mknod, .vop_need_inactive = ufs_need_inactive, .vop_open = ufs_open, .vop_pathconf = ufs_pathconf, .vop_poll = vop_stdpoll, .vop_print = ufs_print, .vop_read_pgcache = ufs_read_pgcache, .vop_readdir = ufs_readdir, .vop_readlink = ufs_readlink, .vop_reclaim = ufs_reclaim, .vop_remove = ufs_remove, .vop_rename = ufs_rename, .vop_rmdir = ufs_rmdir, .vop_setattr = ufs_setattr, #ifdef MAC .vop_setlabel = vop_stdsetlabel_ea, #endif .vop_strategy = ufs_strategy, .vop_symlink = ufs_symlink, .vop_whiteout = ufs_whiteout, #ifdef UFS_EXTATTR .vop_getextattr = ufs_getextattr, .vop_deleteextattr = ufs_deleteextattr, .vop_setextattr = ufs_setextattr, #endif #ifdef UFS_ACL .vop_getacl = ufs_getacl, .vop_setacl = ufs_setacl, .vop_aclcheck = ufs_aclcheck, #endif }; VFS_VOP_VECTOR_REGISTER(ufs_vnodeops); struct vop_vector ufs_fifoops = { .vop_default = &fifo_specops, .vop_fsync = VOP_PANIC, .vop_accessx = ufs_accessx, .vop_close = ufsfifo_close, .vop_getattr = ufs_getattr, .vop_inactive = ufs_inactive, .vop_kqfilter = ufsfifo_kqfilter, .vop_pathconf = ufs_pathconf, .vop_print = ufs_print, .vop_read = VOP_PANIC, .vop_reclaim = ufs_reclaim, .vop_setattr = ufs_setattr, #ifdef MAC .vop_setlabel = vop_stdsetlabel_ea, #endif .vop_write = VOP_PANIC, #ifdef UFS_EXTATTR .vop_getextattr = ufs_getextattr, .vop_deleteextattr = ufs_deleteextattr, .vop_setextattr = ufs_setextattr, #endif #ifdef UFS_ACL .vop_getacl = ufs_getacl, .vop_setacl = ufs_setacl, .vop_aclcheck = ufs_aclcheck, #endif }; VFS_VOP_VECTOR_REGISTER(ufs_fifoops);