diff --git a/sbin/dump/traverse.c b/sbin/dump/traverse.c index 3630d2240f58..08e902667759 100644 --- a/sbin/dump/traverse.c +++ b/sbin/dump/traverse.c @@ -1,1014 +1,1010 @@ /*- * SPDX-License-Identifier: BSD-3-Clause * * Copyright (c) 1980, 1988, 1991, 1993 * The Regents of the University of California. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #ifndef lint #if 0 static char sccsid[] = "@(#)traverse.c 8.7 (Berkeley) 6/15/95"; #endif static const char rcsid[] = "$FreeBSD$"; #endif /* not lint */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include "dump.h" union dinode { struct ufs1_dinode dp1; struct ufs2_dinode dp2; }; #define DIP(dp, field) \ ((sblock->fs_magic == FS_UFS1_MAGIC) ? \ (dp)->dp1.field : (dp)->dp2.field) #define DIP_SET(dp, field, val) do {\ if (sblock->fs_magic == FS_UFS1_MAGIC) \ (dp)->dp1.field = (val); \ else \ (dp)->dp2.field = (val); \ } while (0) #define HASDUMPEDFILE 0x1 #define HASSUBDIRS 0x2 static int dirindir(ino_t ino, ufs2_daddr_t blkno, int level, long *size, long *tapesize, int nodump, ino_t maxino); static void dmpindir(union dinode *dp, ino_t ino, ufs2_daddr_t blk, int level, off_t *size); static void ufs1_blksout(ufs1_daddr_t *blkp, int frags, ino_t ino); static void ufs2_blksout(union dinode *dp, ufs2_daddr_t *blkp, int frags, ino_t ino, int last); static int appendextdata(union dinode *dp); static void writeextdata(union dinode *dp, ino_t ino, int added); static int searchdir(ino_t ino, ufs2_daddr_t blkno, long size, long filesize, long *tapesize, int nodump, ino_t maxino); static long blockest(union dinode *dp); /* * This is an estimation of the number of TP_BSIZE blocks in the file. * It estimates the number of blocks in files with holes by assuming * that all of the blocks accounted for by di_blocks are data blocks * (when some of the blocks are usually used for indirect pointers); * hence the estimate may be high. */ static long blockest(union dinode *dp) { long blkest, sizeest; /* * dp->di_size is the size of the file in bytes. * dp->di_blocks stores the number of sectors actually in the file. * If there are more sectors than the size would indicate, this just * means that there are indirect blocks in the file or unused * sectors in the last file block; we can safely ignore these * (blkest = sizeest below). * If the file is bigger than the number of sectors would indicate, * then the file has holes in it. In this case we must use the * block count to estimate the number of data blocks used, but * we use the actual size for estimating the number of indirect * dump blocks (sizeest vs. blkest in the indirect block * calculation). */ if ((DIP(dp, di_flags) & SF_SNAPSHOT) != 0) return (1); blkest = howmany(dbtob(DIP(dp, di_blocks)), TP_BSIZE); sizeest = howmany(DIP(dp, di_size), TP_BSIZE); if (blkest > sizeest) blkest = sizeest; if (DIP(dp, di_size) > sblock->fs_bsize * UFS_NDADDR) { /* calculate the number of indirect blocks on the dump tape */ blkest += howmany(sizeest - UFS_NDADDR * sblock->fs_bsize / TP_BSIZE, TP_NINDIR); } return (blkest + 1); } /* Auxiliary macro to pick up files changed since previous dump. */ #define CHANGEDSINCE(dp, t) \ (DIP(dp, di_mtime) >= (t) || DIP(dp, di_ctime) >= (t)) /* The WANTTODUMP macro decides whether a file should be dumped. */ #ifdef UF_NODUMP #define WANTTODUMP(dp) \ (CHANGEDSINCE(dp, spcl.c_ddate) && \ (nonodump || (DIP(dp, di_flags) & UF_NODUMP) != UF_NODUMP)) #else #define WANTTODUMP(dp) CHANGEDSINCE(dp, spcl.c_ddate) #endif /* * Dump pass 1. * * Walk the inode list for a file system to find all allocated inodes * that have been modified since the previous dump time. Also, find all * the directories in the file system. */ int mapfiles(ino_t maxino, long *tapesize) { int i, cg, mode, inosused; int anydirskipped = 0; union dinode *dp; struct cg *cgp; ino_t ino; u_char *cp; if ((cgp = malloc(sblock->fs_cgsize)) == NULL) quit("mapfiles: cannot allocate memory.\n"); for (cg = 0; cg < sblock->fs_ncg; cg++) { ino = cg * sblock->fs_ipg; blkread(fsbtodb(sblock, cgtod(sblock, cg)), (char *)cgp, sblock->fs_cgsize); if (sblock->fs_magic == FS_UFS2_MAGIC) inosused = cgp->cg_initediblk; else inosused = sblock->fs_ipg; /* * If we are using soft updates, then we can trust the * cylinder group inode allocation maps to tell us which * inodes are allocated. We will scan the used inode map * to find the inodes that are really in use, and then * read only those inodes in from disk. */ if (sblock->fs_flags & FS_DOSOFTDEP) { if (!cg_chkmagic(cgp)) quit("mapfiles: cg %d: bad magic number\n", cg); cp = &cg_inosused(cgp)[(inosused - 1) / CHAR_BIT]; for ( ; inosused > 0; inosused -= CHAR_BIT, cp--) { if (*cp == 0) continue; for (i = 1 << (CHAR_BIT - 1); i > 0; i >>= 1) { if (*cp & i) break; inosused--; } break; } if (inosused <= 0) continue; } for (i = 0; i < inosused; i++, ino++) { if (ino < UFS_ROOTINO || (dp = getino(ino, &mode)) == NULL || (mode & IFMT) == 0) continue; if (ino >= maxino) { msg("Skipping inode %ju >= maxino %ju\n", (uintmax_t)ino, (uintmax_t)maxino); continue; } /* * Everything must go in usedinomap so that a check * for "in dumpdirmap but not in usedinomap" to detect * dirs with nodump set has a chance of succeeding * (this is used in mapdirs()). */ SETINO(ino, usedinomap); if (mode == IFDIR) SETINO(ino, dumpdirmap); if (WANTTODUMP(dp)) { SETINO(ino, dumpinomap); if (mode != IFREG && mode != IFDIR && mode != IFLNK) *tapesize += 1; else *tapesize += blockest(dp); continue; } if (mode == IFDIR) { if (!nonodump && (DIP(dp, di_flags) & UF_NODUMP)) CLRINO(ino, usedinomap); anydirskipped = 1; } } } /* * Restore gets very upset if the root is not dumped, * so ensure that it always is dumped. */ SETINO(UFS_ROOTINO, dumpinomap); return (anydirskipped); } /* * Dump pass 2. * * Scan each directory on the file system to see if it has any modified * files in it. If it does, and has not already been added to the dump * list (because it was itself modified), then add it. If a directory * has not been modified itself, contains no modified files and has no * subdirectories, then it can be deleted from the dump list and from * the list of directories. By deleting it from the list of directories, * its parent may now qualify for the same treatment on this or a later * pass using this algorithm. */ int mapdirs(ino_t maxino, long *tapesize) { union dinode *dp; int i, isdir, nodump; char *map; ino_t ino; union dinode di; long filesize; int ret, change = 0; isdir = 0; /* XXX just to get gcc to shut up */ for (map = dumpdirmap, ino = 1; ino < maxino; ino++) { if (((ino - 1) % CHAR_BIT) == 0) /* map is offset by 1 */ isdir = *map++; else isdir >>= 1; /* * If a directory has been removed from usedinomap, it * either has the nodump flag set, or has inherited * it. Although a directory can't be in dumpinomap if * it isn't in usedinomap, we have to go through it to * propagate the nodump flag. */ nodump = !nonodump && (TSTINO(ino, usedinomap) == 0); if ((isdir & 1) == 0 || (TSTINO(ino, dumpinomap) && !nodump)) continue; dp = getino(ino, &i); /* * inode buf may change in searchdir(). */ if (sblock->fs_magic == FS_UFS1_MAGIC) di.dp1 = dp->dp1; else di.dp2 = dp->dp2; filesize = DIP(&di, di_size); for (ret = 0, i = 0; filesize > 0 && i < UFS_NDADDR; i++) { if (DIP(&di, di_db[i]) != 0) ret |= searchdir(ino, DIP(&di, di_db[i]), (long)sblksize(sblock, DIP(&di, di_size), i), filesize, tapesize, nodump, maxino); if (ret & HASDUMPEDFILE) filesize = 0; else filesize -= sblock->fs_bsize; } for (i = 0; filesize > 0 && i < UFS_NIADDR; i++) { if (DIP(&di, di_ib[i]) == 0) continue; ret |= dirindir(ino, DIP(&di, di_ib[i]), i, &filesize, tapesize, nodump, maxino); } if (ret & HASDUMPEDFILE) { SETINO(ino, dumpinomap); *tapesize += blockest(&di); change = 1; continue; } if (nodump) { if (ret & HASSUBDIRS) change = 1; /* subdirs inherit nodump */ CLRINO(ino, dumpdirmap); } else if ((ret & HASSUBDIRS) == 0) if (!TSTINO(ino, dumpinomap)) { CLRINO(ino, dumpdirmap); change = 1; } } return (change); } /* * Read indirect blocks, and pass the data blocks to be searched * as directories. Quit as soon as any entry is found that will * require the directory to be dumped. */ static int dirindir( ino_t ino, ufs2_daddr_t blkno, int ind_level, long *filesize, long *tapesize, int nodump, ino_t maxino) { union { ufs1_daddr_t ufs1[MAXBSIZE / sizeof(ufs1_daddr_t)]; ufs2_daddr_t ufs2[MAXBSIZE / sizeof(ufs2_daddr_t)]; } idblk; int ret = 0; int i; blkread(fsbtodb(sblock, blkno), (char *)&idblk, (int)sblock->fs_bsize); if (ind_level <= 0) { for (i = 0; *filesize > 0 && i < NINDIR(sblock); i++) { if (sblock->fs_magic == FS_UFS1_MAGIC) blkno = idblk.ufs1[i]; else blkno = idblk.ufs2[i]; if (blkno != 0) ret |= searchdir(ino, blkno, sblock->fs_bsize, *filesize, tapesize, nodump, maxino); if (ret & HASDUMPEDFILE) *filesize = 0; else *filesize -= sblock->fs_bsize; } return (ret); } ind_level--; for (i = 0; *filesize > 0 && i < NINDIR(sblock); i++) { if (sblock->fs_magic == FS_UFS1_MAGIC) blkno = idblk.ufs1[i]; else blkno = idblk.ufs2[i]; if (blkno != 0) ret |= dirindir(ino, blkno, ind_level, filesize, tapesize, nodump, maxino); } return (ret); } /* * Scan a disk block containing directory information looking to see if * any of the entries are on the dump list and to see if the directory * contains any subdirectories. */ static int searchdir( ino_t ino, ufs2_daddr_t blkno, long size, long filesize, long *tapesize, int nodump, ino_t maxino) { int mode; struct direct *dp; union dinode *ip; long loc, ret = 0; static caddr_t dblk; if (dblk == NULL && (dblk = malloc(sblock->fs_bsize)) == NULL) quit("searchdir: cannot allocate indirect memory.\n"); blkread(fsbtodb(sblock, blkno), dblk, (int)size); if (filesize < size) size = filesize; for (loc = 0; loc < size; ) { dp = (struct direct *)(dblk + loc); if (dp->d_reclen == 0) { msg("corrupted directory, inumber %ju\n", (uintmax_t)ino); break; } loc += dp->d_reclen; if (dp->d_ino == 0) continue; if (dp->d_ino >= maxino) { msg("corrupted directory entry, d_ino %ju >= %ju\n", (uintmax_t)dp->d_ino, (uintmax_t)maxino); break; } if (dp->d_name[0] == '.') { if (dp->d_name[1] == '\0') continue; if (dp->d_name[1] == '.' && dp->d_name[2] == '\0') continue; } if (nodump) { ip = getino(dp->d_ino, &mode); if (TSTINO(dp->d_ino, dumpinomap)) { CLRINO(dp->d_ino, dumpinomap); *tapesize -= blockest(ip); } /* * Add back to dumpdirmap and remove from usedinomap * to propagate nodump. */ if (mode == IFDIR) { SETINO(dp->d_ino, dumpdirmap); CLRINO(dp->d_ino, usedinomap); ret |= HASSUBDIRS; } } else { if (TSTINO(dp->d_ino, dumpinomap)) { ret |= HASDUMPEDFILE; if (ret & HASSUBDIRS) break; } if (TSTINO(dp->d_ino, dumpdirmap)) { ret |= HASSUBDIRS; if (ret & HASDUMPEDFILE) break; } } } return (ret); } /* * Dump passes 3 and 4. * * Dump the contents of an inode to tape. */ void dumpino(union dinode *dp, ino_t ino) { int ind_level, cnt, last, added; off_t size; char buf[TP_BSIZE]; if (newtape) { newtape = 0; dumpmap(dumpinomap, TS_BITS, ino); } CLRINO(ino, dumpinomap); /* * Zero out the size of a snapshot so that it will be dumped * as a zero length file. */ if ((DIP(dp, di_flags) & SF_SNAPSHOT) != 0) { DIP_SET(dp, di_size, 0); DIP_SET(dp, di_flags, DIP(dp, di_flags) & ~SF_SNAPSHOT); } if (sblock->fs_magic == FS_UFS1_MAGIC) { spcl.c_mode = dp->dp1.di_mode; spcl.c_size = dp->dp1.di_size; spcl.c_extsize = 0; spcl.c_atime = _time32_to_time(dp->dp1.di_atime); spcl.c_atimensec = dp->dp1.di_atimensec; spcl.c_mtime = _time32_to_time(dp->dp1.di_mtime); spcl.c_mtimensec = dp->dp1.di_mtimensec; spcl.c_birthtime = 0; spcl.c_birthtimensec = 0; spcl.c_rdev = dp->dp1.di_rdev; spcl.c_file_flags = dp->dp1.di_flags; spcl.c_uid = dp->dp1.di_uid; spcl.c_gid = dp->dp1.di_gid; } else { spcl.c_mode = dp->dp2.di_mode; spcl.c_size = dp->dp2.di_size; spcl.c_extsize = dp->dp2.di_extsize; spcl.c_atime = _time64_to_time(dp->dp2.di_atime); spcl.c_atimensec = dp->dp2.di_atimensec; spcl.c_mtime = _time64_to_time(dp->dp2.di_mtime); spcl.c_mtimensec = dp->dp2.di_mtimensec; spcl.c_birthtime = _time64_to_time(dp->dp2.di_birthtime); spcl.c_birthtimensec = dp->dp2.di_birthnsec; spcl.c_rdev = dp->dp2.di_rdev; spcl.c_file_flags = dp->dp2.di_flags; spcl.c_uid = dp->dp2.di_uid; spcl.c_gid = dp->dp2.di_gid; } spcl.c_type = TS_INODE; spcl.c_count = 0; switch (DIP(dp, di_mode) & S_IFMT) { case 0: /* * Freed inode. */ return; case S_IFLNK: /* * Check for short symbolic link. */ if (DIP(dp, di_size) > 0 && DIP(dp, di_size) < sblock->fs_maxsymlinklen) { spcl.c_addr[0] = 1; spcl.c_count = 1; added = appendextdata(dp); writeheader(ino); - if (sblock->fs_magic == FS_UFS1_MAGIC) - memmove(buf, (caddr_t)dp->dp1.di_db, - (u_long)DIP(dp, di_size)); - else - memmove(buf, (caddr_t)dp->dp2.di_db, - (u_long)DIP(dp, di_size)); + memmove(buf, DIP(dp, di_shortlink), + (u_long)DIP(dp, di_size)); buf[DIP(dp, di_size)] = '\0'; writerec(buf, 0); writeextdata(dp, ino, added); return; } /* FALLTHROUGH */ case S_IFDIR: case S_IFREG: if (DIP(dp, di_size) > 0) break; /* FALLTHROUGH */ case S_IFIFO: case S_IFSOCK: case S_IFCHR: case S_IFBLK: added = appendextdata(dp); writeheader(ino); writeextdata(dp, ino, added); return; default: msg("Warning: undefined file type 0%o\n", DIP(dp, di_mode) & IFMT); return; } if (DIP(dp, di_size) > UFS_NDADDR * sblock->fs_bsize) { cnt = UFS_NDADDR * sblock->fs_frag; last = 0; } else { cnt = howmany(DIP(dp, di_size), sblock->fs_fsize); last = 1; } if (sblock->fs_magic == FS_UFS1_MAGIC) ufs1_blksout(&dp->dp1.di_db[0], cnt, ino); else ufs2_blksout(dp, &dp->dp2.di_db[0], cnt, ino, last); if ((size = DIP(dp, di_size) - UFS_NDADDR * sblock->fs_bsize) <= 0) return; for (ind_level = 0; ind_level < UFS_NIADDR; ind_level++) { dmpindir(dp, ino, DIP(dp, di_ib[ind_level]), ind_level, &size); if (size <= 0) return; } } /* * Read indirect blocks, and pass the data blocks to be dumped. */ static void dmpindir(union dinode *dp, ino_t ino, ufs2_daddr_t blk, int ind_level, off_t *size) { union { ufs1_daddr_t ufs1[MAXBSIZE / sizeof(ufs1_daddr_t)]; ufs2_daddr_t ufs2[MAXBSIZE / sizeof(ufs2_daddr_t)]; } idblk; int i, cnt, last; if (blk != 0) blkread(fsbtodb(sblock, blk), (char *)&idblk, (int)sblock->fs_bsize); else memset(&idblk, 0, sblock->fs_bsize); if (ind_level <= 0) { if (*size > NINDIR(sblock) * sblock->fs_bsize) { cnt = NINDIR(sblock) * sblock->fs_frag; last = 0; } else { cnt = howmany(*size, sblock->fs_fsize); last = 1; } *size -= NINDIR(sblock) * sblock->fs_bsize; if (sblock->fs_magic == FS_UFS1_MAGIC) ufs1_blksout(idblk.ufs1, cnt, ino); else ufs2_blksout(dp, idblk.ufs2, cnt, ino, last); return; } ind_level--; for (i = 0; i < NINDIR(sblock); i++) { if (sblock->fs_magic == FS_UFS1_MAGIC) dmpindir(dp, ino, idblk.ufs1[i], ind_level, size); else dmpindir(dp, ino, idblk.ufs2[i], ind_level, size); if (*size <= 0) return; } } /* * Collect up the data into tape record sized buffers and output them. */ static void ufs1_blksout(ufs1_daddr_t *blkp, int frags, ino_t ino) { ufs1_daddr_t *bp; int i, j, count, blks, tbperdb; blks = howmany(frags * sblock->fs_fsize, TP_BSIZE); tbperdb = sblock->fs_bsize >> tp_bshift; for (i = 0; i < blks; i += TP_NINDIR) { if (i + TP_NINDIR > blks) count = blks; else count = i + TP_NINDIR; assert(count <= TP_NINDIR + i); for (j = i; j < count; j++) if (blkp[j / tbperdb] != 0) spcl.c_addr[j - i] = 1; else spcl.c_addr[j - i] = 0; spcl.c_count = count - i; writeheader(ino); bp = &blkp[i / tbperdb]; for (j = i; j < count; j += tbperdb, bp++) if (*bp != 0) { if (j + tbperdb <= count) dumpblock(*bp, (int)sblock->fs_bsize); else dumpblock(*bp, (count - j) * TP_BSIZE); } spcl.c_type = TS_ADDR; } } /* * Collect up the data into tape record sized buffers and output them. */ static void ufs2_blksout(union dinode *dp, ufs2_daddr_t *blkp, int frags, ino_t ino, int last) { ufs2_daddr_t *bp; int i, j, count, resid, blks, tbperdb, added; static int writingextdata = 0; /* * Calculate the number of TP_BSIZE blocks to be dumped. * For filesystems with a fragment size bigger than TP_BSIZE, * only part of the final fragment may need to be dumped. */ blks = howmany(frags * sblock->fs_fsize, TP_BSIZE); if (last) { if (writingextdata) resid = howmany(fragoff(sblock, spcl.c_extsize), TP_BSIZE); else resid = howmany(fragoff(sblock, dp->dp2.di_size), TP_BSIZE); if (resid > 0) blks -= howmany(sblock->fs_fsize, TP_BSIZE) - resid; } tbperdb = sblock->fs_bsize >> tp_bshift; for (i = 0; i < blks; i += TP_NINDIR) { if (i + TP_NINDIR > blks) count = blks; else count = i + TP_NINDIR; assert(count <= TP_NINDIR + i); for (j = i; j < count; j++) if (blkp[j / tbperdb] != 0) spcl.c_addr[j - i] = 1; else spcl.c_addr[j - i] = 0; spcl.c_count = count - i; if (last && count == blks && !writingextdata) added = appendextdata(dp); writeheader(ino); bp = &blkp[i / tbperdb]; for (j = i; j < count; j += tbperdb, bp++) if (*bp != 0) { if (j + tbperdb <= count) dumpblock(*bp, (int)sblock->fs_bsize); else dumpblock(*bp, (count - j) * TP_BSIZE); } spcl.c_type = TS_ADDR; spcl.c_count = 0; if (last && count == blks && !writingextdata) { writingextdata = 1; writeextdata(dp, ino, added); writingextdata = 0; } } } /* * If there is room in the current block for the extended attributes * as well as the file data, update the header to reflect the added * attribute data at the end. Attributes are placed at the end so that * old versions of restore will correctly restore the file and simply * discard the extra data at the end that it does not understand. * The attribute data is dumped following the file data by the * writeextdata() function (below). */ static int appendextdata(union dinode *dp) { int i, blks, tbperdb; /* * If no extended attributes, there is nothing to do. */ if (spcl.c_extsize == 0) return (0); /* * If there is not enough room at the end of this block * to add the extended attributes, then rather than putting * part of them here, we simply push them entirely into a * new block rather than putting some here and some later. */ if (spcl.c_extsize > UFS_NXADDR * sblock->fs_bsize) blks = howmany(UFS_NXADDR * sblock->fs_bsize, TP_BSIZE); else blks = howmany(spcl.c_extsize, TP_BSIZE); if (spcl.c_count + blks > TP_NINDIR) return (0); /* * Update the block map in the header to indicate the added * extended attribute. They will be appended after the file * data by the writeextdata() routine. */ tbperdb = sblock->fs_bsize >> tp_bshift; assert(spcl.c_count + blks <= TP_NINDIR); for (i = 0; i < blks; i++) if (&dp->dp2.di_extb[i / tbperdb] != 0) spcl.c_addr[spcl.c_count + i] = 1; else spcl.c_addr[spcl.c_count + i] = 0; spcl.c_count += blks; return (blks); } /* * Dump the extended attribute data. If there was room in the file * header, then all we need to do is output the data blocks. If there * was not room in the file header, then an additional TS_ADDR header * is created to hold the attribute data. */ static void writeextdata(union dinode *dp, ino_t ino, int added) { int i, frags, blks, tbperdb, last; ufs2_daddr_t *bp; off_t size; /* * If no extended attributes, there is nothing to do. */ if (spcl.c_extsize == 0) return; /* * If there was no room in the file block for the attributes, * dump them out in a new block, otherwise just dump the data. */ if (added == 0) { if (spcl.c_extsize > UFS_NXADDR * sblock->fs_bsize) { frags = UFS_NXADDR * sblock->fs_frag; last = 0; } else { frags = howmany(spcl.c_extsize, sblock->fs_fsize); last = 1; } ufs2_blksout(dp, &dp->dp2.di_extb[0], frags, ino, last); } else { if (spcl.c_extsize > UFS_NXADDR * sblock->fs_bsize) blks = howmany(UFS_NXADDR * sblock->fs_bsize, TP_BSIZE); else blks = howmany(spcl.c_extsize, TP_BSIZE); tbperdb = sblock->fs_bsize >> tp_bshift; for (i = 0; i < blks; i += tbperdb) { bp = &dp->dp2.di_extb[i / tbperdb]; if (*bp != 0) { if (i + tbperdb <= blks) dumpblock(*bp, (int)sblock->fs_bsize); else dumpblock(*bp, (blks - i) * TP_BSIZE); } } } /* * If an indirect block is added for extended attributes, then * di_exti below should be changed to the structure element * that references the extended attribute indirect block. This * definition is here only to make it compile without complaint. */ #define di_exti di_spare[0] /* * If the extended attributes fall into an indirect block, * dump it as well. */ if ((size = spcl.c_extsize - UFS_NXADDR * sblock->fs_bsize) > 0) dmpindir(dp, ino, dp->dp2.di_exti, 0, &size); } /* * Dump a map to the tape. */ void dumpmap(char *map, int type, ino_t ino) { int i; char *cp; spcl.c_type = type; spcl.c_count = howmany(mapsize * sizeof(char), TP_BSIZE); writeheader(ino); for (i = 0, cp = map; i < spcl.c_count; i++, cp += TP_BSIZE) writerec(cp, 0); } /* * Write a header record to the dump tape. */ void writeheader(ino_t ino) { int32_t sum, cnt, *lp; if (rsync_friendly >= 2) { /* don't track changes to access time */ spcl.c_atime = spcl.c_mtime; spcl.c_atimensec = spcl.c_mtimensec; } spcl.c_inumber = ino; spcl.c_magic = FS_UFS2_MAGIC; spcl.c_checksum = 0; lp = (int32_t *)&spcl; sum = 0; cnt = sizeof(union u_spcl) / (4 * sizeof(int32_t)); while (--cnt >= 0) { sum += *lp++; sum += *lp++; sum += *lp++; sum += *lp++; } spcl.c_checksum = CHECKSUM - sum; writerec((char *)&spcl, 1); } union dinode * getino(ino_t inum, int *modep) { static ino_t minino, maxino; static caddr_t inoblock; struct ufs1_dinode *dp1; struct ufs2_dinode *dp2; if (inoblock == NULL && (inoblock = malloc(sblock->fs_bsize)) == NULL) quit("cannot allocate inode memory.\n"); curino = inum; if (inum >= minino && inum < maxino) goto gotit; blkread(fsbtodb(sblock, ino_to_fsba(sblock, inum)), inoblock, (int)sblock->fs_bsize); minino = inum - (inum % INOPB(sblock)); maxino = minino + INOPB(sblock); gotit: if (sblock->fs_magic == FS_UFS1_MAGIC) { dp1 = &((struct ufs1_dinode *)inoblock)[inum - minino]; *modep = (dp1->di_mode & IFMT); return ((union dinode *)dp1); } dp2 = &((struct ufs2_dinode *)inoblock)[inum - minino]; *modep = (dp2->di_mode & IFMT); return ((union dinode *)dp2); } /* * Read a chunk of data from the disk. * Try to recover from hard errors by reading in sector sized pieces. * Error recovery is attempted at most BREADEMAX times before seeking * consent from the operator to continue. */ int breaderrors = 0; #define BREADEMAX 32 void blkread(ufs2_daddr_t blkno, char *buf, int size) { int secsize, bytes, resid, xfer, base, cnt, i; static char *tmpbuf; off_t offset; loop: offset = blkno << dev_bshift; secsize = sblock->fs_fsize; base = offset % secsize; resid = size % secsize; /* * If the transfer request starts or ends on a non-sector * boundary, we must read the entire sector and copy out * just the part that we need. */ if (base == 0 && resid == 0) { cnt = cread(diskfd, buf, size, offset); if (cnt == size) return; } else { if (tmpbuf == NULL && (tmpbuf = malloc(secsize)) == NULL) quit("buffer malloc failed\n"); xfer = 0; bytes = size; if (base != 0) { cnt = cread(diskfd, tmpbuf, secsize, offset - base); if (cnt != secsize) goto bad; xfer = MIN(secsize - base, size); offset += xfer; bytes -= xfer; resid = bytes % secsize; memcpy(buf, &tmpbuf[base], xfer); } if (bytes >= secsize) { cnt = cread(diskfd, &buf[xfer], bytes - resid, offset); if (cnt != bytes - resid) goto bad; xfer += cnt; offset += cnt; } if (resid == 0) return; cnt = cread(diskfd, tmpbuf, secsize, offset); if (cnt == secsize) { memcpy(&buf[xfer], tmpbuf, resid); return; } } bad: if (blkno + (size / dev_bsize) > fsbtodb(sblock, sblock->fs_size)) { /* * Trying to read the final fragment. * * NB - dump only works in TP_BSIZE blocks, hence * rounds `dev_bsize' fragments up to TP_BSIZE pieces. * It should be smarter about not actually trying to * read more than it can get, but for the time being * we punt and scale back the read only when it gets * us into trouble. (mkm 9/25/83) */ size -= dev_bsize; goto loop; } if (cnt == -1) msg("read error from %s: %s: [block %jd]: count=%d\n", disk, strerror(errno), (intmax_t)blkno, size); else msg("short read error from %s: [block %jd]: count=%d, got=%d\n", disk, (intmax_t)blkno, size, cnt); if (++breaderrors > BREADEMAX) { msg("More than %d block read errors from %s\n", BREADEMAX, disk); broadcast("DUMP IS AILING!\n"); msg("This is an unrecoverable error.\n"); if (!query("Do you want to attempt to continue?")){ dumpabort(0); /*NOTREACHED*/ } else breaderrors = 0; } /* * Zero buffer, then try to read each sector of buffer separately, * and bypass the cache. */ memset(buf, 0, size); for (i = 0; i < size; i += dev_bsize, buf += dev_bsize, blkno++) { if ((cnt = pread(diskfd, buf, (int)dev_bsize, ((off_t)blkno << dev_bshift))) == dev_bsize) continue; if (cnt == -1) { msg("read error from %s: %s: [sector %jd]: count=%ld\n", disk, strerror(errno), (intmax_t)blkno, dev_bsize); continue; } msg("short read from %s: [sector %jd]: count=%ld, got=%d\n", disk, (intmax_t)blkno, dev_bsize, cnt); } } diff --git a/sbin/fsdb/fsdbutil.c b/sbin/fsdb/fsdbutil.c index 2c18d9910c6f..c8a3a8a525e3 100644 --- a/sbin/fsdb/fsdbutil.c +++ b/sbin/fsdb/fsdbutil.c @@ -1,253 +1,250 @@ /* $NetBSD: fsdbutil.c,v 1.2 1995/10/08 23:18:12 thorpej Exp $ */ /*- * SPDX-License-Identifier: BSD-3-Clause * * Copyright (c) 1995 John T. Kohl * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. The name of the author may not be used to endorse or promote products * derived from this software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR `AS IS'' AND ANY EXPRESS OR * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE * DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE * POSSIBILITY OF SUCH DAMAGE. */ #ifndef lint static const char rcsid[] = "$FreeBSD$"; #endif /* not lint */ #include #include #include #include #include #include #include #include #include #include #include #include #include "fsdb.h" #include "fsck.h" void prtblknos(struct fs *fs, union dinode *dp); char ** crack(char *line, int *argc) { static char *argv[8]; int i; char *p, *val; for (p = line, i = 0; p != NULL && i < 8; i++) { while ((val = strsep(&p, " \t\n")) != NULL && *val == '\0') /**/; if (val) argv[i] = val; else break; } *argc = i; return argv; } char ** recrack(char *line, int *argc, int argc_max) { static char *argv[8]; int i; char *p, *val; for (p = line, i = 0; p != NULL && i < 8 && i < argc_max - 1; i++) { while ((val = strsep(&p, " \t\n")) != NULL && *val == '\0') /**/; if (val) argv[i] = val; else break; } argv[i] = argv[i - 1] + strlen(argv[i - 1]) + 1; argv[i][strcspn(argv[i], "\n")] = '\0'; *argc = i + 1; return argv; } int argcount(struct cmdtable *cmdp, int argc, char *argv[]) { if (cmdp->minargc == cmdp->maxargc) warnx("command `%s' takes %u arguments, got %u", cmdp->cmd, cmdp->minargc-1, argc-1); else warnx("command `%s' takes from %u to %u arguments", cmdp->cmd, cmdp->minargc-1, cmdp->maxargc-1); warnx("usage: %s: %s", cmdp->cmd, cmdp->helptxt); return 1; } void printstat(const char *cp, ino_t inum, union dinode *dp) { struct group *grp; struct passwd *pw; ufs2_daddr_t blocks; int64_t gen; char *p; time_t t; printf("%s: ", cp); switch (DIP(dp, di_mode) & IFMT) { case IFDIR: puts("directory"); break; case IFREG: puts("regular file"); break; case IFBLK: printf("block special (%#jx)", (uintmax_t)DIP(dp, di_rdev)); break; case IFCHR: printf("character special (%#jx)", DIP(dp, di_rdev)); break; case IFLNK: fputs("symlink",stdout); if (DIP(dp, di_size) > 0 && DIP(dp, di_size) < sblock.fs_maxsymlinklen && DIP(dp, di_blocks) == 0) { - if (sblock.fs_magic == FS_UFS1_MAGIC) - p = (caddr_t)dp->dp1.di_db; - else - p = (caddr_t)dp->dp2.di_db; - printf(" to `%.*s'\n", (int) DIP(dp, di_size), p); + printf(" to `%.*s'\n", (int) DIP(dp, di_size), + DIP(dp, di_shortlink)); } else { putchar('\n'); } break; case IFSOCK: puts("socket"); break; case IFIFO: puts("fifo"); break; } printf("I=%ju MODE=%o SIZE=%ju", (uintmax_t)inum, DIP(dp, di_mode), (uintmax_t)DIP(dp, di_size)); if (sblock.fs_magic != FS_UFS1_MAGIC) { t = _time64_to_time(dp->dp2.di_birthtime); p = ctime(&t); printf("\n\tBTIME=%15.15s %4.4s [%d nsec]", &p[4], &p[20], dp->dp2.di_birthnsec); } if (sblock.fs_magic == FS_UFS1_MAGIC) t = _time32_to_time(dp->dp1.di_mtime); else t = _time64_to_time(dp->dp2.di_mtime); p = ctime(&t); printf("\n\tMTIME=%15.15s %4.4s [%d nsec]", &p[4], &p[20], DIP(dp, di_mtimensec)); if (sblock.fs_magic == FS_UFS1_MAGIC) t = _time32_to_time(dp->dp1.di_ctime); else t = _time64_to_time(dp->dp2.di_ctime); p = ctime(&t); printf("\n\tCTIME=%15.15s %4.4s [%d nsec]", &p[4], &p[20], DIP(dp, di_ctimensec)); if (sblock.fs_magic == FS_UFS1_MAGIC) t = _time32_to_time(dp->dp1.di_atime); else t = _time64_to_time(dp->dp2.di_atime); p = ctime(&t); printf("\n\tATIME=%15.15s %4.4s [%d nsec]\n", &p[4], &p[20], DIP(dp, di_atimensec)); if ((pw = getpwuid(DIP(dp, di_uid)))) printf("OWNER=%s ", pw->pw_name); else printf("OWNUID=%u ", DIP(dp, di_uid)); if ((grp = getgrgid(DIP(dp, di_gid)))) printf("GRP=%s ", grp->gr_name); else printf("GID=%u ", DIP(dp, di_gid)); blocks = DIP(dp, di_blocks); gen = DIP(dp, di_gen); printf("LINKCNT=%d FLAGS=%#x BLKCNT=%jx GEN=%jx\n", DIP(dp, di_nlink), DIP(dp, di_flags), (intmax_t)blocks, (intmax_t)gen); } int checkactive(void) { if (!curinode) { warnx("no current inode\n"); return 0; } return 1; } int checkactivedir(void) { if (!curinode) { warnx("no current inode\n"); return 0; } if ((DIP(curinode, di_mode) & IFMT) != IFDIR) { warnx("inode %ju not a directory", (uintmax_t)curinum); return 0; } return 1; } int printactive(int doblocks) { if (!checkactive()) return 1; switch (DIP(curinode, di_mode) & IFMT) { case IFDIR: case IFREG: case IFBLK: case IFCHR: case IFLNK: case IFSOCK: case IFIFO: if (doblocks) prtblknos(&sblock, curinode); else printstat("current inode", curinum, curinode); break; case 0: printf("current inode %ju: unallocated inode\n", (uintmax_t)curinum); break; default: printf("current inode %ju: screwy itype 0%o (mode 0%o)?\n", (uintmax_t)curinum, DIP(curinode, di_mode) & IFMT, DIP(curinode, di_mode)); break; } return 0; } diff --git a/stand/libsa/ufs.c b/stand/libsa/ufs.c index a4015dea74c2..fa3820617860 100644 --- a/stand/libsa/ufs.c +++ b/stand/libsa/ufs.c @@ -1,966 +1,963 @@ /* $NetBSD: ufs.c,v 1.20 1998/03/01 07:15:39 ross Exp $ */ /*- * Copyright (c) 2002 Networks Associates Technology, Inc. * All rights reserved. * * This software was developed for the FreeBSD Project by Marshall * Kirk McKusick and Network Associates Laboratories, the Security * Research Division of Network Associates, Inc. under DARPA/SPAWAR * contract N66001-01-C-8035 ("CBOSS"), as part of the DARPA CHATS * research program * * Copyright (c) 1982, 1989, 1993 * The Regents of the University of California. All rights reserved. * * This code is derived from software contributed to Berkeley by * The Mach Operating System project at Carnegie-Mellon University. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * * Copyright (c) 1990, 1991 Carnegie Mellon University * All Rights Reserved. * * Author: David Golub * * Permission to use, copy, modify and distribute this software and its * documentation is hereby granted, provided that both the copyright * notice and this permission notice appear in all copies of the * software, derivative works or modified versions, and any portions * thereof, and that both notices appear in supporting documentation. * * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. * * Carnegie Mellon requests users of this software to return to * * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU * School of Computer Science * Carnegie Mellon University * Pittsburgh PA 15213-3890 * * any improvements or extensions that they make and grant Carnegie the * rights to redistribute these changes. */ #include __FBSDID("$FreeBSD$"); /* * Stand-alone file reading package. */ #include #include #include #include #include #include #include "stand.h" #include "disk.h" #include "string.h" static int ufs_open(const char *path, struct open_file *f); static int ufs_write(struct open_file *f, const void *buf, size_t size, size_t *resid); static int ufs_close(struct open_file *f); static int ufs_read(struct open_file *f, void *buf, size_t size, size_t *resid); static off_t ufs_seek(struct open_file *f, off_t offset, int where); static int ufs_stat(struct open_file *f, struct stat *sb); static int ufs_readdir(struct open_file *f, struct dirent *d); static int ufs_mount(const char *dev, const char *path, void **data); static int ufs_unmount(const char *dev, void *data); struct fs_ops ufs_fsops = { .fs_name = "ufs", .fo_open = ufs_open, .fo_close = ufs_close, .fo_read = ufs_read, .fo_write = ufs_write, .fo_seek = ufs_seek, .fo_stat = ufs_stat, .fo_readdir = ufs_readdir, .fo_mount = ufs_mount, .fo_unmount = ufs_unmount }; /* * In-core open file. */ struct file { off_t f_seekp; /* seek pointer */ struct fs *f_fs; /* pointer to super-block */ union dinode { struct ufs1_dinode di1; struct ufs2_dinode di2; } f_di; /* copy of on-disk inode */ int f_nindir[UFS_NIADDR]; /* number of blocks mapped by indirect block at level i */ char *f_blk[UFS_NIADDR]; /* buffer for indirect block at level i */ size_t f_blksize[UFS_NIADDR]; /* size of buffer */ ufs2_daddr_t f_blkno[UFS_NIADDR];/* disk address of block in buffer */ ufs2_daddr_t f_buf_blkno; /* block number of data block */ char *f_buf; /* buffer for data block */ size_t f_buf_size; /* size of data block */ int f_inumber; /* inumber */ }; #define DIP(fp, field) \ ((fp)->f_fs->fs_magic == FS_UFS1_MAGIC ? \ (fp)->f_di.di1.field : (fp)->f_di.di2.field) typedef struct ufs_mnt { char *um_dev; int um_fd; STAILQ_ENTRY(ufs_mnt) um_link; } ufs_mnt_t; typedef STAILQ_HEAD(ufs_mnt_list, ufs_mnt) ufs_mnt_list_t; static ufs_mnt_list_t mnt_list = STAILQ_HEAD_INITIALIZER(mnt_list); static int read_inode(ino_t, struct open_file *); static int block_map(struct open_file *, ufs2_daddr_t, ufs2_daddr_t *); static int buf_read_file(struct open_file *, char **, size_t *); static int buf_write_file(struct open_file *, const char *, size_t *); static int search_directory(char *, struct open_file *, ino_t *); static int ufs_use_sa_read(void *, off_t, void **, int); /* from ffs_subr.c */ int ffs_sbget(void *, struct fs **, off_t, char *, int (*)(void *, off_t, void **, int)); /* * Request standard superblock location in ffs_sbget */ #define STDSB -1 /* Fail if check-hash is bad */ #define STDSB_NOHASHFAIL -2 /* Ignore check-hash failure */ /* * Read a new inode into a file structure. */ static int read_inode(ino_t inumber, struct open_file *f) { struct file *fp = (struct file *)f->f_fsdata; struct fs *fs = fp->f_fs; char *buf; size_t rsize; int rc; if (fs == NULL) panic("fs == NULL"); /* * Read inode and save it. */ buf = malloc(fs->fs_bsize); twiddle(1); rc = (f->f_dev->dv_strategy)(f->f_devdata, F_READ, fsbtodb(fs, ino_to_fsba(fs, inumber)), fs->fs_bsize, buf, &rsize); if (rc) goto out; if (rsize != fs->fs_bsize) { rc = EIO; goto out; } if (fp->f_fs->fs_magic == FS_UFS1_MAGIC) fp->f_di.di1 = ((struct ufs1_dinode *)buf) [ino_to_fsbo(fs, inumber)]; else fp->f_di.di2 = ((struct ufs2_dinode *)buf) [ino_to_fsbo(fs, inumber)]; /* * Clear out the old buffers */ { int level; for (level = 0; level < UFS_NIADDR; level++) fp->f_blkno[level] = -1; fp->f_buf_blkno = -1; } fp->f_seekp = 0; fp->f_inumber = inumber; out: free(buf); return (rc); } /* * Given an offset in a file, find the disk block number that * contains that block. */ static int block_map(struct open_file *f, ufs2_daddr_t file_block, ufs2_daddr_t *disk_block_p) { struct file *fp = (struct file *)f->f_fsdata; struct fs *fs = fp->f_fs; int level; int idx; ufs2_daddr_t ind_block_num; int rc; /* * Index structure of an inode: * * di_db[0..UFS_NDADDR-1] hold block numbers for blocks * 0..UFS_NDADDR-1 * * di_ib[0] index block 0 is the single indirect block * holds block numbers for blocks * UFS_NDADDR .. UFS_NDADDR + NINDIR(fs)-1 * * di_ib[1] index block 1 is the double indirect block * holds block numbers for INDEX blocks for blocks * UFS_NDADDR + NINDIR(fs) .. * UFS_NDADDR + NINDIR(fs) + NINDIR(fs)**2 - 1 * * di_ib[2] index block 2 is the triple indirect block * holds block numbers for double-indirect * blocks for blocks * UFS_NDADDR + NINDIR(fs) + NINDIR(fs)**2 .. * UFS_NDADDR + NINDIR(fs) + NINDIR(fs)**2 * + NINDIR(fs)**3 - 1 */ if (file_block < UFS_NDADDR) { /* Direct block. */ *disk_block_p = DIP(fp, di_db[file_block]); return (0); } file_block -= UFS_NDADDR; /* * nindir[0] = NINDIR * nindir[1] = NINDIR**2 * nindir[2] = NINDIR**3 * etc */ for (level = 0; level < UFS_NIADDR; level++) { if (file_block < fp->f_nindir[level]) break; file_block -= fp->f_nindir[level]; } if (level == UFS_NIADDR) { /* Block number too high */ return (EFBIG); } ind_block_num = DIP(fp, di_ib[level]); for (; level >= 0; level--) { if (ind_block_num == 0) { *disk_block_p = 0; /* missing */ return (0); } if (fp->f_blkno[level] != ind_block_num) { if (fp->f_blk[level] == (char *)0) fp->f_blk[level] = malloc(fs->fs_bsize); twiddle(1); rc = (f->f_dev->dv_strategy)(f->f_devdata, F_READ, fsbtodb(fp->f_fs, ind_block_num), fs->fs_bsize, fp->f_blk[level], &fp->f_blksize[level]); if (rc) return (rc); if (fp->f_blksize[level] != fs->fs_bsize) return (EIO); fp->f_blkno[level] = ind_block_num; } if (level > 0) { idx = file_block / fp->f_nindir[level - 1]; file_block %= fp->f_nindir[level - 1]; } else idx = file_block; if (fp->f_fs->fs_magic == FS_UFS1_MAGIC) ind_block_num = ((ufs1_daddr_t *)fp->f_blk[level])[idx]; else ind_block_num = ((ufs2_daddr_t *)fp->f_blk[level])[idx]; } *disk_block_p = ind_block_num; return (0); } /* * Write a portion of a file from an internal buffer. */ static int buf_write_file(struct open_file *f, const char *buf_p, size_t *size_p) { struct file *fp = (struct file *)f->f_fsdata; struct fs *fs = fp->f_fs; long off; ufs_lbn_t file_block; ufs2_daddr_t disk_block; size_t block_size; int rc; /* * Calculate the starting block address and offset. */ off = blkoff(fs, fp->f_seekp); file_block = lblkno(fs, fp->f_seekp); block_size = sblksize(fs, DIP(fp, di_size), file_block); rc = block_map(f, file_block, &disk_block); if (rc) return (rc); if (disk_block == 0) /* Because we can't allocate space on the drive */ return (EFBIG); /* * Truncate buffer at end of file, and at the end of * this block. */ if (*size_p > DIP(fp, di_size) - fp->f_seekp) *size_p = DIP(fp, di_size) - fp->f_seekp; if (*size_p > block_size - off) *size_p = block_size - off; /* * If we don't entirely occlude the block and it's not * in memory already, read it in first. */ if (((off > 0) || (*size_p + off < block_size)) && (file_block != fp->f_buf_blkno)) { if (fp->f_buf == (char *)0) fp->f_buf = malloc(fs->fs_bsize); twiddle(4); rc = (f->f_dev->dv_strategy)(f->f_devdata, F_READ, fsbtodb(fs, disk_block), block_size, fp->f_buf, &fp->f_buf_size); if (rc) return (rc); fp->f_buf_blkno = file_block; } /* * Copy the user data into the cached block. */ bcopy(buf_p, fp->f_buf + off, *size_p); /* * Write the block out to storage. */ twiddle(4); rc = (f->f_dev->dv_strategy)(f->f_devdata, F_WRITE, fsbtodb(fs, disk_block), block_size, fp->f_buf, &fp->f_buf_size); return (rc); } /* * Read a portion of a file into an internal buffer. Return * the location in the buffer and the amount in the buffer. */ static int buf_read_file(struct open_file *f, char **buf_p, size_t *size_p) { struct file *fp = (struct file *)f->f_fsdata; struct fs *fs = fp->f_fs; long off; ufs_lbn_t file_block; ufs2_daddr_t disk_block; size_t block_size; int rc; off = blkoff(fs, fp->f_seekp); file_block = lblkno(fs, fp->f_seekp); block_size = sblksize(fs, DIP(fp, di_size), file_block); if (file_block != fp->f_buf_blkno) { if (fp->f_buf == (char *)0) fp->f_buf = malloc(fs->fs_bsize); rc = block_map(f, file_block, &disk_block); if (rc) return (rc); if (disk_block == 0) { bzero(fp->f_buf, block_size); fp->f_buf_size = block_size; } else { twiddle(4); rc = (f->f_dev->dv_strategy)(f->f_devdata, F_READ, fsbtodb(fs, disk_block), block_size, fp->f_buf, &fp->f_buf_size); if (rc) return (rc); } fp->f_buf_blkno = file_block; } /* * Return address of byte in buffer corresponding to * offset, and size of remainder of buffer after that * byte. */ *buf_p = fp->f_buf + off; *size_p = block_size - off; /* * But truncate buffer at end of file. */ if (*size_p > DIP(fp, di_size) - fp->f_seekp) *size_p = DIP(fp, di_size) - fp->f_seekp; return (0); } /* * Search a directory for a name and return its * i_number. */ static int search_directory(char *name, struct open_file *f, ino_t *inumber_p) { struct file *fp = (struct file *)f->f_fsdata; struct direct *dp; struct direct *edp; char *buf; size_t buf_size; int namlen, length; int rc; length = strlen(name); fp->f_seekp = 0; while (fp->f_seekp < DIP(fp, di_size)) { rc = buf_read_file(f, &buf, &buf_size); if (rc) return (rc); dp = (struct direct *)buf; edp = (struct direct *)(buf + buf_size); while (dp < edp) { if (dp->d_ino == (ino_t)0) goto next; #if BYTE_ORDER == LITTLE_ENDIAN if (fp->f_fs->fs_maxsymlinklen <= 0) namlen = dp->d_type; else #endif namlen = dp->d_namlen; if (namlen == length && !strcmp(name, dp->d_name)) { /* found entry */ *inumber_p = dp->d_ino; return (0); } next: dp = (struct direct *)((char *)dp + dp->d_reclen); } fp->f_seekp += buf_size; } return (ENOENT); } /* * Open a file. */ static int ufs_open(const char *upath, struct open_file *f) { char *cp, *ncp; int c; ino_t inumber, parent_inumber; struct file *fp; struct fs *fs; int rc; int nlinks = 0; char namebuf[MAXPATHLEN+1]; char *buf = NULL; char *path = NULL; const char *dev; ufs_mnt_t *mnt; /* allocate file system specific data structure */ errno = 0; fp = calloc(1, sizeof(struct file)); if (fp == NULL) return (errno); f->f_fsdata = (void *)fp; dev = disk_fmtdev(f->f_devdata); /* Is this device mounted? */ STAILQ_FOREACH(mnt, &mnt_list, um_link) { if (strcmp(dev, mnt->um_dev) == 0) break; } if (mnt == NULL) { /* read super block */ twiddle(1); if ((rc = ffs_sbget(f, &fs, STDSB_NOHASHFAIL, "stand", ufs_use_sa_read)) != 0) { goto out; } } else { struct open_file *sbf; struct file *sfp; /* get superblock from mounted file system */ sbf = fd2open_file(mnt->um_fd); sfp = sbf->f_fsdata; fs = sfp->f_fs; } fp->f_fs = fs; /* * Calculate indirect block levels. */ { ufs2_daddr_t mult; int level; mult = 1; for (level = 0; level < UFS_NIADDR; level++) { mult *= NINDIR(fs); fp->f_nindir[level] = mult; } } inumber = UFS_ROOTINO; if ((rc = read_inode(inumber, f)) != 0) goto out; cp = path = strdup(upath); if (path == NULL) { rc = ENOMEM; goto out; } while (*cp) { /* * Remove extra separators */ while (*cp == '/') cp++; if (*cp == '\0') break; /* * Check that current node is a directory. */ if ((DIP(fp, di_mode) & IFMT) != IFDIR) { rc = ENOTDIR; goto out; } /* * Get next component of path name. */ { int len = 0; ncp = cp; while ((c = *cp) != '\0' && c != '/') { if (++len > UFS_MAXNAMLEN) { rc = ENOENT; goto out; } cp++; } *cp = '\0'; } /* * Look up component in current directory. * Save directory inumber in case we find a * symbolic link. */ parent_inumber = inumber; rc = search_directory(ncp, f, &inumber); *cp = c; if (rc) goto out; /* * Open next component. */ if ((rc = read_inode(inumber, f)) != 0) goto out; /* * Check for symbolic link. */ if ((DIP(fp, di_mode) & IFMT) == IFLNK) { int link_len = DIP(fp, di_size); int len; len = strlen(cp); if (link_len + len > MAXPATHLEN || ++nlinks > MAXSYMLINKS) { rc = ENOENT; goto out; } bcopy(cp, &namebuf[link_len], len + 1); if (link_len < fs->fs_maxsymlinklen) { - if (fp->f_fs->fs_magic == FS_UFS1_MAGIC) - cp = (caddr_t)(fp->f_di.di1.di_db); - else - cp = (caddr_t)(fp->f_di.di2.di_db); - bcopy(cp, namebuf, (unsigned) link_len); + bcopy(DIP(fp, di_shortlink), namebuf, + (unsigned) link_len); } else { /* * Read file for symbolic link */ size_t buf_size; ufs2_daddr_t disk_block; struct fs *fs = fp->f_fs; if (!buf) buf = malloc(fs->fs_bsize); rc = block_map(f, (ufs2_daddr_t)0, &disk_block); if (rc) goto out; twiddle(1); rc = (f->f_dev->dv_strategy)(f->f_devdata, F_READ, fsbtodb(fs, disk_block), fs->fs_bsize, buf, &buf_size); if (rc) goto out; bcopy((char *)buf, namebuf, (unsigned)link_len); } /* * If relative pathname, restart at parent directory. * If absolute pathname, restart at root. */ cp = namebuf; if (*cp != '/') inumber = parent_inumber; else inumber = (ino_t)UFS_ROOTINO; if ((rc = read_inode(inumber, f)) != 0) goto out; } } /* * Found terminal component. */ rc = 0; fp->f_seekp = 0; out: free(buf); free(path); if (rc) { free(fp->f_buf); if (mnt == NULL && fp->f_fs != NULL) { free(fp->f_fs->fs_csp); free(fp->f_fs->fs_si); free(fp->f_fs); } free(fp); } return (rc); } /* * A read function for use by standalone-layer routines. */ static int ufs_use_sa_read(void *devfd, off_t loc, void **bufp, int size) { struct open_file *f; size_t buf_size; int error; f = (struct open_file *)devfd; if ((*bufp = malloc(size)) == NULL) return (ENOSPC); error = (f->f_dev->dv_strategy)(f->f_devdata, F_READ, loc / DEV_BSIZE, size, *bufp, &buf_size); if (error != 0) return (error); if (buf_size != size) return (EIO); return (0); } static int ufs_close(struct open_file *f) { ufs_mnt_t *mnt; struct file *fp = (struct file *)f->f_fsdata; int level; char *dev; f->f_fsdata = NULL; if (fp == NULL) return (0); for (level = 0; level < UFS_NIADDR; level++) { free(fp->f_blk[level]); } free(fp->f_buf); dev = disk_fmtdev(f->f_devdata); STAILQ_FOREACH(mnt, &mnt_list, um_link) { if (strcmp(dev, mnt->um_dev) == 0) break; } if (mnt == NULL && fp->f_fs != NULL) { free(fp->f_fs->fs_csp); free(fp->f_fs->fs_si); free(fp->f_fs); } free(fp); return (0); } /* * Copy a portion of a file into kernel memory. * Cross block boundaries when necessary. */ static int ufs_read(struct open_file *f, void *start, size_t size, size_t *resid) { struct file *fp = (struct file *)f->f_fsdata; size_t csize; char *buf; size_t buf_size; int rc = 0; char *addr = start; while (size != 0) { if (fp->f_seekp >= DIP(fp, di_size)) break; rc = buf_read_file(f, &buf, &buf_size); if (rc) break; csize = size; if (csize > buf_size) csize = buf_size; bcopy(buf, addr, csize); fp->f_seekp += csize; addr += csize; size -= csize; } if (resid) *resid = size; return (rc); } /* * Write to a portion of an already allocated file. * Cross block boundaries when necessary. Can not * extend the file. */ static int ufs_write(struct open_file *f, const void *start, size_t size, size_t *resid) { struct file *fp = (struct file *)f->f_fsdata; size_t csize; int rc = 0; const char *addr = start; csize = size; while ((size != 0) && (csize != 0)) { if (fp->f_seekp >= DIP(fp, di_size)) break; if (csize >= 512) csize = 512; /* XXX */ rc = buf_write_file(f, addr, &csize); if (rc) break; fp->f_seekp += csize; addr += csize; size -= csize; } if (resid) *resid = size; return (rc); } static off_t ufs_seek(struct open_file *f, off_t offset, int where) { struct file *fp = (struct file *)f->f_fsdata; switch (where) { case SEEK_SET: fp->f_seekp = offset; break; case SEEK_CUR: fp->f_seekp += offset; break; case SEEK_END: fp->f_seekp = DIP(fp, di_size) - offset; break; default: errno = EINVAL; return (-1); } return (fp->f_seekp); } static int ufs_stat(struct open_file *f, struct stat *sb) { struct file *fp = (struct file *)f->f_fsdata; /* only important stuff */ sb->st_mode = DIP(fp, di_mode); sb->st_uid = DIP(fp, di_uid); sb->st_gid = DIP(fp, di_gid); sb->st_size = DIP(fp, di_size); sb->st_mtime = DIP(fp, di_mtime); /* * The items below are ufs specific! * Other fs types will need their own solution * if these fields are needed. */ sb->st_ino = fp->f_inumber; /* * We need something to differentiate devs. * fs_id is unique but 64bit, we xor the two * halves to squeeze it into 32bits. */ sb->st_dev = (dev_t)(fp->f_fs->fs_id[0] ^ fp->f_fs->fs_id[1]); return (0); } static int ufs_readdir(struct open_file *f, struct dirent *d) { struct file *fp = (struct file *)f->f_fsdata; struct direct *dp; char *buf; size_t buf_size; int error; /* * assume that a directory entry will not be split across blocks */ again: if (fp->f_seekp >= DIP(fp, di_size)) return (ENOENT); error = buf_read_file(f, &buf, &buf_size); if (error) return (error); dp = (struct direct *)buf; fp->f_seekp += dp->d_reclen; if (dp->d_ino == (ino_t)0) goto again; d->d_type = dp->d_type; strcpy(d->d_name, dp->d_name); return (0); } static int ufs_mount(const char *dev, const char *path, void **data) { char *fs; ufs_mnt_t *mnt; struct open_file *f; errno = 0; mnt = calloc(1, sizeof(*mnt)); if (mnt == NULL) return (errno); mnt->um_fd = -1; mnt->um_dev = strdup(dev); if (mnt->um_dev == NULL) goto done; if (asprintf(&fs, "%s%s", dev, path) < 0) goto done; mnt->um_fd = open(fs, O_RDONLY); free(fs); if (mnt->um_fd == -1) goto done; /* Is it ufs file system? */ f = fd2open_file(mnt->um_fd); if (strcmp(f->f_ops->fs_name, "ufs") == 0) STAILQ_INSERT_TAIL(&mnt_list, mnt, um_link); else errno = ENXIO; done: if (errno != 0) { free(mnt->um_dev); if (mnt->um_fd >= 0) close(mnt->um_fd); free(mnt); } else { *data = mnt; } return (errno); } static int ufs_unmount(const char *dev __unused, void *data) { ufs_mnt_t *mnt = data; STAILQ_REMOVE(&mnt_list, mnt, ufs_mnt, um_link); free(mnt->um_dev); close(mnt->um_fd); free(mnt); return (0); } diff --git a/sys/ufs/ffs/ffs_inode.c b/sys/ufs/ffs/ffs_inode.c index 1aef2bb57930..d9d4f3c4a155 100644 --- a/sys/ufs/ffs/ffs_inode.c +++ b/sys/ufs/ffs/ffs_inode.c @@ -1,820 +1,820 @@ /*- * SPDX-License-Identifier: BSD-3-Clause * * Copyright (c) 1982, 1986, 1989, 1993 * The Regents of the University of California. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * @(#)ffs_inode.c 8.13 (Berkeley) 4/21/95 */ #include __FBSDID("$FreeBSD$"); #include "opt_ufs.h" #include "opt_quota.h" #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #ifdef UFS_DIRHASH #include #endif #include #include #include static int ffs_indirtrunc(struct inode *, ufs2_daddr_t, ufs2_daddr_t, ufs2_daddr_t, int, ufs2_daddr_t *); static void ffs_inode_bwrite(struct vnode *vp, struct buf *bp, int flags) { if ((flags & IO_SYNC) != 0) bwrite(bp); else if (DOINGASYNC(vp)) bdwrite(bp); else bawrite(bp); } /* * Update the access, modified, and inode change times as specified by the * IN_ACCESS, IN_UPDATE, and IN_CHANGE flags respectively. Write the inode * to disk if the IN_MODIFIED flag is set (it may be set initially, or by * the timestamp update). The IN_LAZYMOD flag is set to force a write * later if not now. The IN_LAZYACCESS is set instead of IN_MODIFIED if the fs * is currently being suspended (or is suspended) and vnode has been accessed. * If we write now, then clear IN_MODIFIED, IN_LAZYACCESS and IN_LAZYMOD to * reflect the presumably successful write, and if waitfor is set, then wait * for the write to complete. */ int ffs_update(struct vnode *vp, int waitfor) { struct fs *fs; struct buf *bp; struct inode *ip; daddr_t bn; int flags, error; ASSERT_VOP_ELOCKED(vp, "ffs_update"); ufs_itimes(vp); ip = VTOI(vp); if ((ip->i_flag & IN_MODIFIED) == 0 && waitfor == 0) return (0); ip->i_flag &= ~(IN_LAZYACCESS | IN_LAZYMOD | IN_MODIFIED); /* * The IN_SIZEMOD and IN_IBLKDATA flags indicate changes to the * file size and block pointer fields in the inode. When these * fields have been changed, the fsync() and fsyncdata() system * calls must write the inode to ensure their semantics that the * file is on stable store. * * The IN_SIZEMOD and IN_IBLKDATA flags cannot be cleared until * a synchronous write of the inode is done. If they are cleared * on an asynchronous write, then the inode may not yet have been * written to the disk when an fsync() or fsyncdata() call is done. * Absent these flags, these calls would not know that they needed * to write the inode. Thus, these flags only can be cleared on * synchronous writes of the inode. Since the inode will be locked * for the duration of the I/O that writes it to disk, no fsync() * or fsyncdata() will be able to run before the on-disk inode * is complete. */ if (waitfor) ip->i_flag &= ~(IN_SIZEMOD | IN_IBLKDATA); fs = ITOFS(ip); if (fs->fs_ronly) return (0); /* * If we are updating a snapshot and another process is currently * writing the buffer containing the inode for this snapshot then * a deadlock can occur when it tries to check the snapshot to see * if that block needs to be copied. Thus when updating a snapshot * we check to see if the buffer is already locked, and if it is * we drop the snapshot lock until the buffer has been written * and is available to us. We have to grab a reference to the * snapshot vnode to prevent it from being removed while we are * waiting for the buffer. */ loop: flags = 0; if (IS_SNAPSHOT(ip)) flags = GB_LOCK_NOWAIT; bn = fsbtodb(fs, ino_to_fsba(fs, ip->i_number)); error = ffs_breadz(VFSTOUFS(vp->v_mount), ITODEVVP(ip), bn, bn, (int) fs->fs_bsize, NULL, NULL, 0, NOCRED, flags, NULL, &bp); if (error != 0) { /* * If EBUSY was returned without GB_LOCK_NOWAIT (which * requests trylock for buffer lock), it is for some * other reason and we should not handle it specially. */ if (error != EBUSY || (flags & GB_LOCK_NOWAIT) == 0) return (error); /* * Wait for our inode block to become available. * * Hold a reference to the vnode to protect against * ffs_snapgone(). Since we hold a reference, it can only * get reclaimed (VIRF_DOOMED flag) in a forcible downgrade * or unmount. For an unmount, the entire filesystem will be * gone, so we cannot attempt to touch anything associated * with it while the vnode is unlocked; all we can do is * pause briefly and try again. If when we relock the vnode * we discover that it has been reclaimed, updating it is no * longer necessary and we can just return an error. */ vref(vp); VOP_UNLOCK(vp); pause("ffsupd", 1); vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); vrele(vp); if (!IS_UFS(vp)) return (ENOENT); /* * Recalculate flags, because the vnode was relocked and * could no longer be a snapshot. */ goto loop; } if (DOINGSOFTDEP(vp)) softdep_update_inodeblock(ip, bp, waitfor); else if (ip->i_effnlink != ip->i_nlink) panic("ffs_update: bad link cnt"); if (I_IS_UFS1(ip)) { *((struct ufs1_dinode *)bp->b_data + ino_to_fsbo(fs, ip->i_number)) = *ip->i_din1; /* * XXX: FIX? The entropy here is desirable, * but the harvesting may be expensive */ random_harvest_queue(&(ip->i_din1), sizeof(ip->i_din1), RANDOM_FS_ATIME); } else { ffs_update_dinode_ckhash(fs, ip->i_din2); *((struct ufs2_dinode *)bp->b_data + ino_to_fsbo(fs, ip->i_number)) = *ip->i_din2; /* * XXX: FIX? The entropy here is desirable, * but the harvesting may be expensive */ random_harvest_queue(&(ip->i_din2), sizeof(ip->i_din2), RANDOM_FS_ATIME); } if (waitfor) { error = bwrite(bp); if (ffs_fsfail_cleanup(VFSTOUFS(vp->v_mount), error)) error = 0; } else if (vm_page_count_severe() || buf_dirty_count_severe()) { bawrite(bp); error = 0; } else { if (bp->b_bufsize == fs->fs_bsize) bp->b_flags |= B_CLUSTEROK; bdwrite(bp); error = 0; } return (error); } #define SINGLE 0 /* index of single indirect block */ #define DOUBLE 1 /* index of double indirect block */ #define TRIPLE 2 /* index of triple indirect block */ /* * Truncate the inode ip to at most length size, freeing the * disk blocks. */ int ffs_truncate(struct vnode *vp, off_t length, int flags, struct ucred *cred) { struct inode *ip; ufs2_daddr_t bn, lbn, lastblock, lastiblock[UFS_NIADDR]; ufs2_daddr_t indir_lbn[UFS_NIADDR], oldblks[UFS_NDADDR + UFS_NIADDR]; ufs2_daddr_t newblks[UFS_NDADDR + UFS_NIADDR]; ufs2_daddr_t count, blocksreleased = 0, blkno; struct bufobj *bo __diagused; struct fs *fs; struct buf *bp; struct ufsmount *ump; int softdeptrunc, journaltrunc; int needextclean, extblocks; int offset, size, level, nblocks; int i, error, allerror, indiroff, waitforupdate; u_long key; off_t osize; ip = VTOI(vp); ump = VFSTOUFS(vp->v_mount); fs = ump->um_fs; bo = &vp->v_bufobj; ASSERT_VOP_LOCKED(vp, "ffs_truncate"); if (length < 0) return (EINVAL); if (length > fs->fs_maxfilesize) return (EFBIG); #ifdef QUOTA error = getinoquota(ip); if (error) return (error); #endif /* * Historically clients did not have to specify which data * they were truncating. So, if not specified, we assume * traditional behavior, e.g., just the normal data. */ if ((flags & (IO_EXT | IO_NORMAL)) == 0) flags |= IO_NORMAL; if (!DOINGSOFTDEP(vp) && !DOINGASYNC(vp)) flags |= IO_SYNC; waitforupdate = (flags & IO_SYNC) != 0 || !DOINGASYNC(vp); /* * If we are truncating the extended-attributes, and cannot * do it with soft updates, then do it slowly here. If we are * truncating both the extended attributes and the file contents * (e.g., the file is being unlinked), then pick it off with * soft updates below. */ allerror = 0; needextclean = 0; softdeptrunc = 0; journaltrunc = DOINGSUJ(vp); journaltrunc = 0; /* XXX temp patch until bug found */ if (journaltrunc == 0 && DOINGSOFTDEP(vp) && length == 0) softdeptrunc = !softdep_slowdown(vp); extblocks = 0; if (fs->fs_magic == FS_UFS2_MAGIC && ip->i_din2->di_extsize > 0) { extblocks = btodb(fragroundup(fs, ip->i_din2->di_extsize)); } if ((flags & IO_EXT) && extblocks > 0) { if (length != 0) panic("ffs_truncate: partial trunc of extdata"); if (softdeptrunc || journaltrunc) { if ((flags & IO_NORMAL) == 0) goto extclean; needextclean = 1; } else { if ((error = ffs_syncvnode(vp, MNT_WAIT, 0)) != 0) return (error); #ifdef QUOTA (void) chkdq(ip, -extblocks, NOCRED, FORCE); #endif vinvalbuf(vp, V_ALT, 0, 0); vn_pages_remove(vp, OFF_TO_IDX(lblktosize(fs, -extblocks)), 0); osize = ip->i_din2->di_extsize; ip->i_din2->di_blocks -= extblocks; ip->i_din2->di_extsize = 0; for (i = 0; i < UFS_NXADDR; i++) { oldblks[i] = ip->i_din2->di_extb[i]; ip->i_din2->di_extb[i] = 0; } UFS_INODE_SET_FLAG(ip, IN_SIZEMOD | IN_CHANGE); if ((error = ffs_update(vp, waitforupdate))) return (error); for (i = 0; i < UFS_NXADDR; i++) { if (oldblks[i] == 0) continue; ffs_blkfree(ump, fs, ITODEVVP(ip), oldblks[i], sblksize(fs, osize, i), ip->i_number, vp->v_type, NULL, SINGLETON_KEY); } } } if ((flags & IO_NORMAL) == 0) return (0); if (vp->v_type == VLNK && ip->i_size < ump->um_maxsymlinklen) { #ifdef INVARIANTS if (length != 0) panic("ffs_truncate: partial truncate of symlink"); #endif - bzero(SHORTLINK(ip), (u_int)ip->i_size); + bzero(DIP(ip, i_shortlink), (u_int)ip->i_size); ip->i_size = 0; DIP_SET(ip, i_size, 0); UFS_INODE_SET_FLAG(ip, IN_SIZEMOD | IN_CHANGE | IN_UPDATE); if (needextclean) goto extclean; return (ffs_update(vp, waitforupdate)); } if (ip->i_size == length) { UFS_INODE_SET_FLAG(ip, IN_CHANGE | IN_UPDATE); if (needextclean) goto extclean; return (ffs_update(vp, 0)); } if (fs->fs_ronly) panic("ffs_truncate: read-only filesystem"); if (IS_SNAPSHOT(ip)) ffs_snapremove(vp); vp->v_lasta = vp->v_clen = vp->v_cstart = vp->v_lastw = 0; osize = ip->i_size; /* * Lengthen the size of the file. We must ensure that the * last byte of the file is allocated. Since the smallest * value of osize is 0, length will be at least 1. */ if (osize < length) { vnode_pager_setsize(vp, length); flags |= BA_CLRBUF; error = UFS_BALLOC(vp, length - 1, 1, cred, flags, &bp); if (error) { vnode_pager_setsize(vp, osize); return (error); } ip->i_size = length; DIP_SET(ip, i_size, length); if (bp->b_bufsize == fs->fs_bsize) bp->b_flags |= B_CLUSTEROK; ffs_inode_bwrite(vp, bp, flags); UFS_INODE_SET_FLAG(ip, IN_SIZEMOD | IN_CHANGE | IN_UPDATE); return (ffs_update(vp, waitforupdate)); } /* * Lookup block number for a given offset. Zero length files * have no blocks, so return a blkno of -1. */ lbn = lblkno(fs, length - 1); if (length == 0) { blkno = -1; } else if (lbn < UFS_NDADDR) { blkno = DIP(ip, i_db[lbn]); } else { error = UFS_BALLOC(vp, lblktosize(fs, (off_t)lbn), fs->fs_bsize, cred, BA_METAONLY, &bp); if (error) return (error); indiroff = (lbn - UFS_NDADDR) % NINDIR(fs); if (I_IS_UFS1(ip)) blkno = ((ufs1_daddr_t *)(bp->b_data))[indiroff]; else blkno = ((ufs2_daddr_t *)(bp->b_data))[indiroff]; /* * If the block number is non-zero, then the indirect block * must have been previously allocated and need not be written. * If the block number is zero, then we may have allocated * the indirect block and hence need to write it out. */ if (blkno != 0) brelse(bp); else if (flags & IO_SYNC) bwrite(bp); else bdwrite(bp); } /* * If the block number at the new end of the file is zero, * then we must allocate it to ensure that the last block of * the file is allocated. Soft updates does not handle this * case, so here we have to clean up the soft updates data * structures describing the allocation past the truncation * point. Finding and deallocating those structures is a lot of * work. Since partial truncation with a hole at the end occurs * rarely, we solve the problem by syncing the file so that it * will have no soft updates data structures left. */ if (blkno == 0 && (error = ffs_syncvnode(vp, MNT_WAIT, 0)) != 0) return (error); if (blkno != 0 && DOINGSOFTDEP(vp)) { if (softdeptrunc == 0 && journaltrunc == 0) { /* * If soft updates cannot handle this truncation, * clean up soft dependency data structures and * fall through to the synchronous truncation. */ if ((error = ffs_syncvnode(vp, MNT_WAIT, 0)) != 0) return (error); } else { flags = IO_NORMAL | (needextclean ? IO_EXT: 0); if (journaltrunc) softdep_journal_freeblocks(ip, cred, length, flags); else softdep_setup_freeblocks(ip, length, flags); ASSERT_VOP_LOCKED(vp, "ffs_truncate1"); if (journaltrunc == 0) { UFS_INODE_SET_FLAG(ip, IN_CHANGE | IN_UPDATE); error = ffs_update(vp, 0); } return (error); } } /* * Shorten the size of the file. If the last block of the * shortened file is unallocated, we must allocate it. * Additionally, if the file is not being truncated to a * block boundary, the contents of the partial block * following the end of the file must be zero'ed in * case it ever becomes accessible again because of * subsequent file growth. Directories however are not * zero'ed as they should grow back initialized to empty. */ offset = blkoff(fs, length); if (blkno != 0 && offset == 0) { ip->i_size = length; DIP_SET(ip, i_size, length); UFS_INODE_SET_FLAG(ip, IN_SIZEMOD | IN_CHANGE | IN_UPDATE); #ifdef UFS_DIRHASH if (vp->v_type == VDIR && ip->i_dirhash != NULL) ufsdirhash_dirtrunc(ip, length); #endif } else { lbn = lblkno(fs, length); flags |= BA_CLRBUF; error = UFS_BALLOC(vp, length - 1, 1, cred, flags, &bp); if (error) return (error); ffs_inode_bwrite(vp, bp, flags); /* * When we are doing soft updates and the UFS_BALLOC * above fills in a direct block hole with a full sized * block that will be truncated down to a fragment below, * we must flush out the block dependency with an FSYNC * so that we do not get a soft updates inconsistency * when we create the fragment below. */ if (DOINGSOFTDEP(vp) && lbn < UFS_NDADDR && fragroundup(fs, blkoff(fs, length)) < fs->fs_bsize && (error = ffs_syncvnode(vp, MNT_WAIT, 0)) != 0) return (error); error = UFS_BALLOC(vp, length - 1, 1, cred, flags, &bp); if (error) return (error); ip->i_size = length; DIP_SET(ip, i_size, length); #ifdef UFS_DIRHASH if (vp->v_type == VDIR && ip->i_dirhash != NULL) ufsdirhash_dirtrunc(ip, length); #endif size = blksize(fs, ip, lbn); if (vp->v_type != VDIR && offset != 0) bzero((char *)bp->b_data + offset, (u_int)(size - offset)); /* Kirk's code has reallocbuf(bp, size, 1) here */ allocbuf(bp, size); if (bp->b_bufsize == fs->fs_bsize) bp->b_flags |= B_CLUSTEROK; ffs_inode_bwrite(vp, bp, flags); UFS_INODE_SET_FLAG(ip, IN_SIZEMOD | IN_CHANGE | IN_UPDATE); } /* * Calculate index into inode's block list of * last direct and indirect blocks (if any) * which we want to keep. Lastblock is -1 when * the file is truncated to 0. */ lastblock = lblkno(fs, length + fs->fs_bsize - 1) - 1; lastiblock[SINGLE] = lastblock - UFS_NDADDR; lastiblock[DOUBLE] = lastiblock[SINGLE] - NINDIR(fs); lastiblock[TRIPLE] = lastiblock[DOUBLE] - NINDIR(fs) * NINDIR(fs); nblocks = btodb(fs->fs_bsize); /* * Update file and block pointers on disk before we start freeing * blocks. If we crash before free'ing blocks below, the blocks * will be returned to the free list. lastiblock values are also * normalized to -1 for calls to ffs_indirtrunc below. */ for (level = TRIPLE; level >= SINGLE; level--) { oldblks[UFS_NDADDR + level] = DIP(ip, i_ib[level]); if (lastiblock[level] < 0) { DIP_SET(ip, i_ib[level], 0); lastiblock[level] = -1; } } for (i = 0; i < UFS_NDADDR; i++) { oldblks[i] = DIP(ip, i_db[i]); if (i > lastblock) DIP_SET(ip, i_db[i], 0); } UFS_INODE_SET_FLAG(ip, IN_CHANGE | IN_UPDATE); allerror = ffs_update(vp, waitforupdate); /* * Having written the new inode to disk, save its new configuration * and put back the old block pointers long enough to process them. * Note that we save the new block configuration so we can check it * when we are done. */ for (i = 0; i < UFS_NDADDR; i++) { newblks[i] = DIP(ip, i_db[i]); DIP_SET(ip, i_db[i], oldblks[i]); } for (i = 0; i < UFS_NIADDR; i++) { newblks[UFS_NDADDR + i] = DIP(ip, i_ib[i]); DIP_SET(ip, i_ib[i], oldblks[UFS_NDADDR + i]); } ip->i_size = osize; DIP_SET(ip, i_size, osize); UFS_INODE_SET_FLAG(ip, IN_SIZEMOD | IN_CHANGE | IN_UPDATE); error = vtruncbuf(vp, length, fs->fs_bsize); if (error && (allerror == 0)) allerror = error; /* * Indirect blocks first. */ indir_lbn[SINGLE] = -UFS_NDADDR; indir_lbn[DOUBLE] = indir_lbn[SINGLE] - NINDIR(fs) - 1; indir_lbn[TRIPLE] = indir_lbn[DOUBLE] - NINDIR(fs) * NINDIR(fs) - 1; for (level = TRIPLE; level >= SINGLE; level--) { bn = DIP(ip, i_ib[level]); if (bn != 0) { error = ffs_indirtrunc(ip, indir_lbn[level], fsbtodb(fs, bn), lastiblock[level], level, &count); if (error) allerror = error; blocksreleased += count; if (lastiblock[level] < 0) { DIP_SET(ip, i_ib[level], 0); ffs_blkfree(ump, fs, ump->um_devvp, bn, fs->fs_bsize, ip->i_number, vp->v_type, NULL, SINGLETON_KEY); blocksreleased += nblocks; } } if (lastiblock[level] >= 0) goto done; } /* * All whole direct blocks or frags. */ key = ffs_blkrelease_start(ump, ump->um_devvp, ip->i_number); for (i = UFS_NDADDR - 1; i > lastblock; i--) { long bsize; bn = DIP(ip, i_db[i]); if (bn == 0) continue; DIP_SET(ip, i_db[i], 0); bsize = blksize(fs, ip, i); ffs_blkfree(ump, fs, ump->um_devvp, bn, bsize, ip->i_number, vp->v_type, NULL, key); blocksreleased += btodb(bsize); } ffs_blkrelease_finish(ump, key); if (lastblock < 0) goto done; /* * Finally, look for a change in size of the * last direct block; release any frags. */ bn = DIP(ip, i_db[lastblock]); if (bn != 0) { long oldspace, newspace; /* * Calculate amount of space we're giving * back as old block size minus new block size. */ oldspace = blksize(fs, ip, lastblock); ip->i_size = length; DIP_SET(ip, i_size, length); UFS_INODE_SET_FLAG(ip, IN_SIZEMOD | IN_CHANGE | IN_UPDATE); newspace = blksize(fs, ip, lastblock); if (newspace == 0) panic("ffs_truncate: newspace"); if (oldspace - newspace > 0) { /* * Block number of space to be free'd is * the old block # plus the number of frags * required for the storage we're keeping. */ bn += numfrags(fs, newspace); ffs_blkfree(ump, fs, ump->um_devvp, bn, oldspace - newspace, ip->i_number, vp->v_type, NULL, SINGLETON_KEY); blocksreleased += btodb(oldspace - newspace); } } done: #ifdef INVARIANTS for (level = SINGLE; level <= TRIPLE; level++) if (newblks[UFS_NDADDR + level] != DIP(ip, i_ib[level])) panic("ffs_truncate1: level %d newblks %jd != i_ib %jd", level, (intmax_t)newblks[UFS_NDADDR + level], (intmax_t)DIP(ip, i_ib[level])); for (i = 0; i < UFS_NDADDR; i++) if (newblks[i] != DIP(ip, i_db[i])) panic("ffs_truncate2: blkno %d newblks %jd != i_db %jd", i, (intmax_t)newblks[UFS_NDADDR + level], (intmax_t)DIP(ip, i_ib[level])); BO_LOCK(bo); if (length == 0 && (fs->fs_magic != FS_UFS2_MAGIC || ip->i_din2->di_extsize == 0) && (bo->bo_dirty.bv_cnt > 0 || bo->bo_clean.bv_cnt > 0)) panic("ffs_truncate3: vp = %p, buffers: dirty = %d, clean = %d", vp, bo->bo_dirty.bv_cnt, bo->bo_clean.bv_cnt); BO_UNLOCK(bo); #endif /* INVARIANTS */ /* * Put back the real size. */ ip->i_size = length; DIP_SET(ip, i_size, length); if (DIP(ip, i_blocks) >= blocksreleased) DIP_SET(ip, i_blocks, DIP(ip, i_blocks) - blocksreleased); else /* sanity */ DIP_SET(ip, i_blocks, 0); UFS_INODE_SET_FLAG(ip, IN_SIZEMOD | IN_CHANGE); #ifdef QUOTA (void) chkdq(ip, -blocksreleased, NOCRED, FORCE); #endif return (allerror); extclean: if (journaltrunc) softdep_journal_freeblocks(ip, cred, length, IO_EXT); else softdep_setup_freeblocks(ip, length, IO_EXT); return (ffs_update(vp, waitforupdate)); } /* * Release blocks associated with the inode ip and stored in the indirect * block bn. Blocks are free'd in LIFO order up to (but not including) * lastbn. If level is greater than SINGLE, the block is an indirect block * and recursive calls to indirtrunc must be used to cleanse other indirect * blocks. */ static int ffs_indirtrunc(struct inode *ip, ufs2_daddr_t lbn, ufs2_daddr_t dbn, ufs2_daddr_t lastbn, int level, ufs2_daddr_t *countp) { struct buf *bp; struct fs *fs; struct ufsmount *ump; struct vnode *vp; caddr_t copy = NULL; u_long key; int i, nblocks, error = 0, allerror = 0; ufs2_daddr_t nb, nlbn, last; ufs2_daddr_t blkcount, factor, blocksreleased = 0; ufs1_daddr_t *bap1 = NULL; ufs2_daddr_t *bap2 = NULL; #define BAP(ip, i) (I_IS_UFS1(ip) ? bap1[i] : bap2[i]) fs = ITOFS(ip); ump = ITOUMP(ip); /* * Calculate index in current block of last * block to be kept. -1 indicates the entire * block so we need not calculate the index. */ factor = lbn_offset(fs, level); last = lastbn; if (lastbn > 0) last /= factor; nblocks = btodb(fs->fs_bsize); /* * Get buffer of block pointers, zero those entries corresponding * to blocks to be free'd, and update on disk copy first. Since * double(triple) indirect before single(double) indirect, calls * to VOP_BMAP() on these blocks will fail. However, we already * have the on-disk address, so we just pass it to bread() instead * of having bread() attempt to calculate it using VOP_BMAP(). */ vp = ITOV(ip); error = ffs_breadz(ump, vp, lbn, dbn, (int)fs->fs_bsize, NULL, NULL, 0, NOCRED, 0, NULL, &bp); if (error) { *countp = 0; return (error); } if (I_IS_UFS1(ip)) bap1 = (ufs1_daddr_t *)bp->b_data; else bap2 = (ufs2_daddr_t *)bp->b_data; if (lastbn != -1) { copy = malloc(fs->fs_bsize, M_TEMP, M_WAITOK); bcopy((caddr_t)bp->b_data, copy, (u_int)fs->fs_bsize); for (i = last + 1; i < NINDIR(fs); i++) if (I_IS_UFS1(ip)) bap1[i] = 0; else bap2[i] = 0; if (DOINGASYNC(vp)) { bdwrite(bp); } else { error = bwrite(bp); if (error) allerror = error; } if (I_IS_UFS1(ip)) bap1 = (ufs1_daddr_t *)copy; else bap2 = (ufs2_daddr_t *)copy; } /* * Recursively free totally unused blocks. */ key = ffs_blkrelease_start(ump, ITODEVVP(ip), ip->i_number); for (i = NINDIR(fs) - 1, nlbn = lbn + 1 - i * factor; i > last; i--, nlbn += factor) { nb = BAP(ip, i); if (nb == 0) continue; if (level > SINGLE) { if ((error = ffs_indirtrunc(ip, nlbn, fsbtodb(fs, nb), (ufs2_daddr_t)-1, level - 1, &blkcount)) != 0) allerror = error; blocksreleased += blkcount; } ffs_blkfree(ump, fs, ITODEVVP(ip), nb, fs->fs_bsize, ip->i_number, vp->v_type, NULL, key); blocksreleased += nblocks; } ffs_blkrelease_finish(ump, key); /* * Recursively free last partial block. */ if (level > SINGLE && lastbn >= 0) { last = lastbn % factor; nb = BAP(ip, i); if (nb != 0) { error = ffs_indirtrunc(ip, nlbn, fsbtodb(fs, nb), last, level - 1, &blkcount); if (error) allerror = error; blocksreleased += blkcount; } } if (copy != NULL) { free(copy, M_TEMP); } else { bp->b_flags |= B_INVAL | B_NOCACHE; brelse(bp); } *countp = blocksreleased; return (allerror); } int ffs_rdonly(struct inode *ip) { return (ITOFS(ip)->fs_ronly != 0); } diff --git a/sys/ufs/ufs/dinode.h b/sys/ufs/ufs/dinode.h index 1f0f25c4d5ec..840a4cc7d40f 100644 --- a/sys/ufs/ufs/dinode.h +++ b/sys/ufs/ufs/dinode.h @@ -1,194 +1,210 @@ /*- * SPDX-License-Identifier: (BSD-2-Clause-FreeBSD AND BSD-3-Clause) * * Copyright (c) 2002 Networks Associates Technology, Inc. * All rights reserved. * * This software was developed for the FreeBSD Project by Marshall * Kirk McKusick and Network Associates Laboratories, the Security * Research Division of Network Associates, Inc. under DARPA/SPAWAR * contract N66001-01-C-8035 ("CBOSS"), as part of the DARPA CHATS * research program * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * Copyright (c) 1982, 1989, 1993 * The Regents of the University of California. All rights reserved. * (c) UNIX System Laboratories, Inc. * All or some portions of this file are derived from material licensed * to the University of California by American Telephone and Telegraph * Co. or Unix System Laboratories, Inc. and are reproduced herein with * the permission of UNIX System Laboratories, Inc. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. The names of the authors may not be used to endorse or promote * products derived from this software without specific prior written * permission. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * @(#)dinode.h 8.3 (Berkeley) 1/21/94 * $FreeBSD$ */ #ifndef _UFS_UFS_DINODE_H_ #define _UFS_UFS_DINODE_H_ /* * The root inode is the root of the filesystem. Inode 0 can't be used for * normal purposes and historically bad blocks were linked to inode 1, thus * the root inode is 2. (Inode 1 is no longer used for this purpose, however * numerous dump tapes make this assumption, so we are stuck with it). */ #define UFS_ROOTINO ((ino_t)2) /* * The Whiteout inode# is a dummy non-zero inode number which will * never be allocated to a real file. It is used as a place holder * in the directory entry which has been tagged as a DT_WHT entry. * See the comments about UFS_ROOTINO above. */ #define UFS_WINO ((ino_t)1) /* * The size of physical and logical block numbers and time fields in UFS. */ typedef int32_t ufs1_daddr_t; typedef int64_t ufs2_daddr_t; typedef int64_t ufs_lbn_t; typedef int64_t ufs_time_t; /* File permissions. */ #define IEXEC 0000100 /* Executable. */ #define IWRITE 0000200 /* Writeable. */ #define IREAD 0000400 /* Readable. */ #define ISVTX 0001000 /* Sticky bit. */ #define ISGID 0002000 /* Set-gid. */ #define ISUID 0004000 /* Set-uid. */ /* File types. */ #define IFMT 0170000 /* Mask of file type. */ #define IFIFO 0010000 /* Named pipe (fifo). */ #define IFCHR 0020000 /* Character device. */ #define IFDIR 0040000 /* Directory file. */ #define IFBLK 0060000 /* Block device. */ #define IFREG 0100000 /* Regular file. */ #define IFLNK 0120000 /* Symbolic link. */ #define IFSOCK 0140000 /* UNIX domain socket. */ #define IFWHT 0160000 /* Whiteout. */ /* * A dinode contains all the meta-data associated with a UFS2 file. * This structure defines the on-disk format of a dinode. Since * this structure describes an on-disk structure, all its fields * are defined by types with precise widths. */ #define UFS_NXADDR 2 /* External addresses in inode. */ #define UFS_NDADDR 12 /* Direct addresses in inode. */ #define UFS_NIADDR 3 /* Indirect addresses in inode. */ struct ufs2_dinode { u_int16_t di_mode; /* 0: IFMT, permissions; see below. */ int16_t di_nlink; /* 2: File link count. */ u_int32_t di_uid; /* 4: File owner. */ u_int32_t di_gid; /* 8: File group. */ u_int32_t di_blksize; /* 12: Inode blocksize. */ u_int64_t di_size; /* 16: File byte count. */ u_int64_t di_blocks; /* 24: Blocks actually held. */ ufs_time_t di_atime; /* 32: Last access time. */ ufs_time_t di_mtime; /* 40: Last modified time. */ ufs_time_t di_ctime; /* 48: Last inode change time. */ ufs_time_t di_birthtime; /* 56: Inode creation time. */ int32_t di_mtimensec; /* 64: Last modified time. */ int32_t di_atimensec; /* 68: Last access time. */ int32_t di_ctimensec; /* 72: Last inode change time. */ int32_t di_birthnsec; /* 76: Inode creation time. */ u_int32_t di_gen; /* 80: Generation number. */ u_int32_t di_kernflags; /* 84: Kernel flags. */ u_int32_t di_flags; /* 88: Status flags (chflags). */ u_int32_t di_extsize; /* 92: External attributes size. */ ufs2_daddr_t di_extb[UFS_NXADDR];/* 96: External attributes block. */ - ufs2_daddr_t di_db[UFS_NDADDR]; /* 112: Direct disk blocks. */ - ufs2_daddr_t di_ib[UFS_NIADDR]; /* 208: Indirect disk blocks. */ + union { + struct { + ufs2_daddr_t di_db /* 112: Direct disk blocks. */ + [UFS_NDADDR]; + ufs2_daddr_t di_ib /* 208: Indirect disk blocks. */ + [UFS_NIADDR]; + }; + char di_shortlink /* 112: Embedded symbolic link. */ + [(UFS_NDADDR + UFS_NIADDR) * sizeof(ufs2_daddr_t)]; + }; u_int64_t di_modrev; /* 232: i_modrev for NFSv4 */ uint32_t di_freelink; /* 240: SUJ: Next unlinked inode. */ uint32_t di_ckhash; /* 244: if CK_INODE, its check-hash */ uint32_t di_spare[2]; /* 248: Reserved; currently unused */ }; /* * The di_db fields may be overlaid with other information for * file types that do not have associated disk storage. Block * and character devices overlay the first data block with their * dev_t value. Short symbolic links place their path in the * di_db area. */ #define di_rdev di_db[0] /* * A UFS1 dinode contains all the meta-data associated with a UFS1 file. * This structure defines the on-disk format of a UFS1 dinode. Since * this structure describes an on-disk structure, all its fields * are defined by types with precise widths. */ struct ufs1_dinode { u_int16_t di_mode; /* 0: IFMT, permissions; see below. */ int16_t di_nlink; /* 2: File link count. */ uint32_t di_freelink; /* 4: SUJ: Next unlinked inode. */ u_int64_t di_size; /* 8: File byte count. */ int32_t di_atime; /* 16: Last access time. */ int32_t di_atimensec; /* 20: Last access time. */ int32_t di_mtime; /* 24: Last modified time. */ int32_t di_mtimensec; /* 28: Last modified time. */ int32_t di_ctime; /* 32: Last inode change time. */ int32_t di_ctimensec; /* 36: Last inode change time. */ - ufs1_daddr_t di_db[UFS_NDADDR]; /* 40: Direct disk blocks. */ - ufs1_daddr_t di_ib[UFS_NIADDR]; /* 88: Indirect disk blocks. */ + union { + struct { + ufs1_daddr_t di_db /* 40: Direct disk blocks. */ + [UFS_NDADDR]; + ufs1_daddr_t di_ib /* 88: Indirect disk blocks. */ + [UFS_NIADDR]; + }; + char di_shortlink /* 40: Embedded symbolic link. */ + [(UFS_NDADDR + UFS_NIADDR) * sizeof(ufs1_daddr_t)]; + }; u_int32_t di_flags; /* 100: Status flags (chflags). */ u_int32_t di_blocks; /* 104: Blocks actually held. */ u_int32_t di_gen; /* 108: Generation number. */ u_int32_t di_uid; /* 112: File owner. */ u_int32_t di_gid; /* 116: File group. */ u_int64_t di_modrev; /* 120: i_modrev for NFSv4 */ }; #define UFS_LINK_MAX 32767 #endif /* _UFS_UFS_DINODE_H_ */ diff --git a/sys/ufs/ufs/inode.h b/sys/ufs/ufs/inode.h index 3e555dc52a32..8cdd40894608 100644 --- a/sys/ufs/ufs/inode.h +++ b/sys/ufs/ufs/inode.h @@ -1,309 +1,307 @@ /*- * SPDX-License-Identifier: BSD-3-Clause * * Copyright (c) 1982, 1989, 1993 * The Regents of the University of California. All rights reserved. * (c) UNIX System Laboratories, Inc. * All or some portions of this file are derived from material licensed * to the University of California by American Telephone and Telegraph * Co. or Unix System Laboratories, Inc. and are reproduced herein with * the permission of UNIX System Laboratories, Inc. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * @(#)inode.h 8.9 (Berkeley) 5/14/95 * $FreeBSD$ */ #ifndef _UFS_UFS_INODE_H_ #define _UFS_UFS_INODE_H_ #include #include #include #include #ifdef DIAGNOSTIC #include #endif /* * This must agree with the definition in . */ #define doff_t int32_t #ifdef DIAGNOSTIC struct iown_tracker { struct thread *tr_owner; struct stack tr_st; struct stack tr_unlock; int tr_gen; }; #endif /* * The inode is used to describe each active (or recently active) file in the * UFS filesystem. It is composed of two types of information. The first part * is the information that is needed only while the file is active (such as * the identity of the file and linkage to speed its lookup). The second part * is the permanent meta-data associated with the file which is read in * from the permanent dinode from long term storage when the file becomes * active, and is put back when the file is no longer being used. * * An inode may only be changed while holding either the exclusive * vnode lock or the shared vnode lock and the vnode interlock. We use * the latter only for "read" and "get" operations that require * changing i_flag, or a timestamp. This locking protocol allows executing * those operations without having to upgrade the vnode lock from shared to * exclusive. */ struct inode { TAILQ_ENTRY(inode) i_nextsnap; /* snapshot file list. */ struct vnode *i_vnode;/* Vnode associated with this inode. */ struct ufsmount *i_ump;/* Ufsmount point associated with this inode. */ struct dquot *i_dquot[MAXQUOTAS]; /* Dquot structures. */ union { struct dirhash *dirhash; /* Hashing for large directories. */ daddr_t *snapblklist; /* Collect expunged snapshot blocks. */ } i_un; /* * The real copy of the on-disk inode. */ union { struct ufs1_dinode *din1; /* UFS1 on-disk dinode. */ struct ufs2_dinode *din2; /* UFS2 on-disk dinode. */ } dinode_u; ino_t i_number; /* The identity of the inode. */ u_int32_t i_flag; /* flags, see below */ int i_effnlink; /* i_nlink when I/O completes */ /* * Side effects; used during directory lookup. */ int32_t i_count; /* Size of free slot in directory. */ doff_t i_endoff; /* End of useful stuff in directory. */ doff_t i_diroff; /* Offset in dir, where we found last entry. */ doff_t i_offset; /* Offset of free space in directory. */ #ifdef DIAGNOSTIC int i_lock_gen; struct iown_tracker i_count_tracker; struct iown_tracker i_endoff_tracker; struct iown_tracker i_offset_tracker; #endif int i_nextclustercg; /* last cg searched for cluster */ /* * Data for extended attribute modification. */ u_char *i_ea_area; /* Pointer to malloced copy of EA area */ unsigned i_ea_len; /* Length of i_ea_area */ int i_ea_error; /* First errno in transaction */ int i_ea_refs; /* Number of users of EA area */ /* * Copies from the on-disk dinode itself. */ u_int64_t i_size; /* File byte count. */ u_int64_t i_gen; /* Generation number. */ u_int32_t i_flags; /* Status flags (chflags). */ u_int32_t i_uid; /* File owner. */ u_int32_t i_gid; /* File group. */ u_int16_t i_mode; /* IFMT, permissions; see below. */ int16_t i_nlink; /* File link count. */ }; /* * These flags are kept in i_flag. */ #define IN_ACCESS 0x0001 /* Access time update request. */ #define IN_CHANGE 0x0002 /* Inode change time update request. */ #define IN_UPDATE 0x0004 /* Modification time update request. */ #define IN_MODIFIED 0x0008 /* Inode has been modified. */ #define IN_NEEDSYNC 0x0010 /* Inode requires fsync. */ #define IN_LAZYMOD 0x0020 /* Modified, but don't write yet. */ #define IN_LAZYACCESS 0x0040 /* Process IN_ACCESS after the suspension finished */ #define IN_EA_LOCKED 0x0080 /* Extended attributes locked */ #define IN_EA_LOCKWAIT 0x0100 /* Want extended attributes lock */ #define IN_TRUNCATED 0x0200 /* Journaled truncation pending. */ #define IN_UFS2 0x0400 /* UFS2 vs UFS1 */ #define IN_IBLKDATA 0x0800 /* datasync requires inode block update */ #define IN_SIZEMOD 0x1000 /* Inode size has been modified */ #define IN_ENDOFF 0x2000 /* Free space at the end of directory, try to truncate when possible */ #define PRINT_INODE_FLAGS "\20\20b16\17b15\16b14\15sizemod" \ "\14iblkdata\13is_ufs2\12truncated\11ea_lockwait\10ea_locked" \ "\7lazyaccess\6lazymod\5needsync\4modified\3update\2change\1access" #define UFS_INODE_FLAG_LAZY_MASK \ (IN_ACCESS | IN_CHANGE | IN_MODIFIED | IN_UPDATE | IN_LAZYMOD | \ IN_LAZYACCESS) /* * Some flags can persist a vnode transitioning to 0 hold count and being tkaen * off the list. */ #define UFS_INODE_FLAG_LAZY_MASK_ASSERTABLE \ (UFS_INODE_FLAG_LAZY_MASK & ~(IN_LAZYMOD | IN_LAZYACCESS)) #define UFS_INODE_SET_MODE(ip, mode) do { \ struct inode *_ip = (ip); \ int _mode = (mode); \ \ ASSERT_VOP_IN_SEQC(ITOV(_ip)); \ atomic_store_short(&(_ip)->i_mode, _mode); \ } while (0) #define UFS_INODE_SET_FLAG(ip, flags) do { \ struct inode *_ip = (ip); \ struct vnode *_vp = ITOV(_ip); \ int _flags = (flags); \ \ _ip->i_flag |= _flags; \ if (_flags & UFS_INODE_FLAG_LAZY_MASK) \ vlazy(_vp); \ } while (0) #define UFS_INODE_SET_FLAG_SHARED(ip, flags) do { \ struct inode *_ip = (ip); \ struct vnode *_vp = ITOV(_ip); \ int _flags = (flags); \ \ ASSERT_VI_UNLOCKED(_vp, __func__); \ if ((_ip->i_flag & (_flags)) != _flags) { \ VI_LOCK(_vp); \ _ip->i_flag |= _flags; \ if (_flags & UFS_INODE_FLAG_LAZY_MASK) \ vlazy(_vp); \ VI_UNLOCK(_vp); \ } \ } while (0) #define i_dirhash i_un.dirhash #define i_snapblklist i_un.snapblklist #define i_din1 dinode_u.din1 #define i_din2 dinode_u.din2 #ifdef _KERNEL #define ITOUMP(ip) ((ip)->i_ump) #define ITODEV(ip) (ITOUMP(ip)->um_dev) #define ITODEVVP(ip) (ITOUMP(ip)->um_devvp) #define ITOFS(ip) (ITOUMP(ip)->um_fs) #define ITOVFS(ip) ((ip)->i_vnode->v_mount) static inline _Bool I_IS_UFS1(const struct inode *ip) { return ((ip->i_flag & IN_UFS2) == 0); } static inline _Bool I_IS_UFS2(const struct inode *ip) { return ((ip->i_flag & IN_UFS2) != 0); } /* * The DIP macro is used to access fields in the dinode that are * not cached in the inode itself. */ #define DIP(ip, field) (I_IS_UFS1(ip) ? (ip)->i_din1->d##field : \ (ip)->i_din2->d##field) #define DIP_SET(ip, field, val) do { \ if (I_IS_UFS1(ip)) \ (ip)->i_din1->d##field = (val); \ else \ (ip)->i_din2->d##field = (val); \ } while (0) -#define SHORTLINK(ip) (I_IS_UFS1(ip) ? \ - (caddr_t)(ip)->i_din1->di_db : (caddr_t)(ip)->i_din2->di_db) #define IS_SNAPSHOT(ip) ((ip)->i_flags & SF_SNAPSHOT) #define IS_UFS(vp) ((vp)->v_data != NULL) /* * Structure used to pass around logical block paths generated by * ufs_getlbns and used by truncate and bmap code. */ struct indir { ufs2_daddr_t in_lbn; /* Logical block number. */ int in_off; /* Offset in buffer. */ }; /* Convert between inode pointers and vnode pointers. */ #define VTOI(vp) ((struct inode *)(vp)->v_data) #define VTOI_SMR(vp) ((struct inode *)vn_load_v_data_smr(vp)) #define ITOV(ip) ((ip)->i_vnode) /* Determine if soft dependencies are being done */ #define MOUNTEDSOFTDEP(mp) (((mp)->mnt_flag & MNT_SOFTDEP) != 0) #define DOINGSOFTDEP(vp) MOUNTEDSOFTDEP((vp)->v_mount) #define MOUNTEDSUJ(mp) (((mp)->mnt_flag & (MNT_SOFTDEP | MNT_SUJ)) == \ (MNT_SOFTDEP | MNT_SUJ)) #define DOINGSUJ(vp) MOUNTEDSUJ((vp)->v_mount) /* This overlays the fid structure (see mount.h). */ struct ufid { u_int16_t ufid_len; /* Length of structure. */ u_int16_t ufid_pad; /* Force 32-bit alignment. */ uint32_t ufid_ino; /* File number (ino). */ uint32_t ufid_gen; /* Generation number. */ }; #ifdef DIAGNOSTIC void ufs_init_trackers(struct inode *ip); void ufs_unlock_tracker(struct inode *ip); doff_t ufs_get_i_offset(struct inode *ip, const char *file, int line); void ufs_set_i_offset(struct inode *ip, doff_t off, const char *file, int line); #define I_OFFSET(ip) ufs_get_i_offset(ip, __FILE__, __LINE__) #define SET_I_OFFSET(ip, off) ufs_set_i_offset(ip, off, __FILE__, __LINE__) int32_t ufs_get_i_count(struct inode *ip, const char *file, int line); void ufs_set_i_count(struct inode *ip, int32_t cnt, const char *file, int line); #define I_COUNT(ip) ufs_get_i_count(ip, __FILE__, __LINE__) #define SET_I_COUNT(ip, cnt) ufs_set_i_count(ip, cnt, __FILE__, __LINE__) doff_t ufs_get_i_endoff(struct inode *ip, const char *file, int line); void ufs_set_i_endoff(struct inode *ip, doff_t off, const char *file, int line); #define I_ENDOFF(ip) ufs_get_i_endoff(ip, __FILE__, __LINE__) #define SET_I_ENDOFF(ip, off) ufs_set_i_endoff(ip, off, __FILE__, __LINE__) #else #define I_OFFSET(ip) ((ip)->i_offset) #define SET_I_OFFSET(ip, off) ((ip)->i_offset = (off)) #define I_COUNT(ip) ((ip)->i_count) #define SET_I_COUNT(ip, cnt) ((ip)->i_count = cnt) #define I_ENDOFF(ip) ((ip)->i_endoff) #define SET_I_ENDOFF(ip, off) ((ip)->i_endoff = off) #endif #endif /* _KERNEL */ #endif /* !_UFS_UFS_INODE_H_ */ diff --git a/sys/ufs/ufs/ufs_vnops.c b/sys/ufs/ufs/ufs_vnops.c index 93ac04941db7..57560efffbbf 100644 --- a/sys/ufs/ufs/ufs_vnops.c +++ b/sys/ufs/ufs/ufs_vnops.c @@ -1,3087 +1,3087 @@ /*- * SPDX-License-Identifier: BSD-3-Clause * * Copyright (c) 1982, 1986, 1989, 1993, 1995 * The Regents of the University of California. All rights reserved. * (c) UNIX System Laboratories, Inc. * All or some portions of this file are derived from material licensed * to the University of California by American Telephone and Telegraph * Co. or Unix System Laboratories, Inc. and are reproduced herein with * the permission of UNIX System Laboratories, Inc. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * @(#)ufs_vnops.c 8.27 (Berkeley) 5/27/95 */ #include __FBSDID("$FreeBSD$"); #include "opt_quota.h" #include "opt_suiddir.h" #include "opt_ufs.h" #include "opt_ffs.h" #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include /* XXX */ #include #include #include #include #include #include #include #include #include #ifdef UFS_DIRHASH #include #endif #ifdef UFS_GJOURNAL #include FEATURE(ufs_gjournal, "Journaling support through GEOM for UFS"); #endif #ifdef QUOTA FEATURE(ufs_quota, "UFS disk quotas support"); FEATURE(ufs_quota64, "64bit UFS disk quotas support"); #endif #ifdef SUIDDIR FEATURE(suiddir, "Give all new files in directory the same ownership as the directory"); #endif VFS_SMR_DECLARE; #include static vop_accessx_t ufs_accessx; static vop_fplookup_vexec_t ufs_fplookup_vexec; static int ufs_chmod(struct vnode *, int, struct ucred *, struct thread *); static int ufs_chown(struct vnode *, uid_t, gid_t, struct ucred *, struct thread *); static vop_close_t ufs_close; static vop_create_t ufs_create; static vop_stat_t ufs_stat; static vop_getattr_t ufs_getattr; static vop_ioctl_t ufs_ioctl; static vop_link_t ufs_link; static int ufs_makeinode(int mode, struct vnode *, struct vnode **, struct componentname *, const char *); static vop_mmapped_t ufs_mmapped; static vop_mkdir_t ufs_mkdir; static vop_mknod_t ufs_mknod; static vop_open_t ufs_open; static vop_pathconf_t ufs_pathconf; static vop_print_t ufs_print; static vop_readlink_t ufs_readlink; static vop_remove_t ufs_remove; static vop_rename_t ufs_rename; static vop_rmdir_t ufs_rmdir; static vop_setattr_t ufs_setattr; static vop_strategy_t ufs_strategy; static vop_symlink_t ufs_symlink; static vop_whiteout_t ufs_whiteout; static vop_close_t ufsfifo_close; SYSCTL_NODE(_vfs, OID_AUTO, ufs, CTLFLAG_RD | CTLFLAG_MPSAFE, 0, "UFS filesystem"); /* * A virgin directory (no blushing please). */ static struct dirtemplate mastertemplate = { 0, 12, DT_DIR, 1, ".", 0, DIRBLKSIZ - 12, DT_DIR, 2, ".." }; static struct odirtemplate omastertemplate = { 0, 12, 1, ".", 0, DIRBLKSIZ - 12, 2, ".." }; static void ufs_itimes_locked(struct vnode *vp) { struct inode *ip; struct timespec ts; ASSERT_VI_LOCKED(vp, __func__); ip = VTOI(vp); if (UFS_RDONLY(ip)) goto out; if ((ip->i_flag & (IN_ACCESS | IN_CHANGE | IN_UPDATE)) == 0) return; if ((vp->v_type == VBLK || vp->v_type == VCHR) && !DOINGSOFTDEP(vp)) UFS_INODE_SET_FLAG(ip, IN_LAZYMOD); else if (((vp->v_mount->mnt_kern_flag & (MNTK_SUSPENDED | MNTK_SUSPEND)) == 0) || (ip->i_flag & (IN_CHANGE | IN_UPDATE))) UFS_INODE_SET_FLAG(ip, IN_MODIFIED); else if (ip->i_flag & IN_ACCESS) UFS_INODE_SET_FLAG(ip, IN_LAZYACCESS); vfs_timestamp(&ts); if (ip->i_flag & IN_ACCESS) { DIP_SET(ip, i_atime, ts.tv_sec); DIP_SET(ip, i_atimensec, ts.tv_nsec); } if (ip->i_flag & IN_UPDATE) { DIP_SET(ip, i_mtime, ts.tv_sec); DIP_SET(ip, i_mtimensec, ts.tv_nsec); } if (ip->i_flag & IN_CHANGE) { DIP_SET(ip, i_ctime, ts.tv_sec); DIP_SET(ip, i_ctimensec, ts.tv_nsec); DIP_SET(ip, i_modrev, DIP(ip, i_modrev) + 1); } out: ip->i_flag &= ~(IN_ACCESS | IN_CHANGE | IN_UPDATE); } void ufs_itimes(struct vnode *vp) { VI_LOCK(vp); ufs_itimes_locked(vp); VI_UNLOCK(vp); } static int ufs_sync_nlink1(struct mount *mp) { int error; error = vfs_busy(mp, 0); if (error == 0) { VFS_SYNC(mp, MNT_WAIT); vfs_unbusy(mp); error = ERELOOKUP; } vfs_rel(mp); return (error); } static int ufs_sync_nlink(struct vnode *vp, struct vnode *vp1) { struct inode *ip; struct mount *mp; int error; ip = VTOI(vp); if (ip->i_nlink < UFS_LINK_MAX) return (0); if (!DOINGSOFTDEP(vp) || ip->i_effnlink >= UFS_LINK_MAX) return (EMLINK); mp = vp->v_mount; vfs_ref(mp); VOP_UNLOCK(vp); if (vp1 != NULL) VOP_UNLOCK(vp1); error = ufs_sync_nlink1(mp); vn_lock_pair(vp, false, vp1, false); return (error); } /* * Create a regular file */ static int ufs_create( struct vop_create_args /* { struct vnode *a_dvp; struct vnode **a_vpp; struct componentname *a_cnp; struct vattr *a_vap; } */ *ap) { int error; error = ufs_makeinode(MAKEIMODE(ap->a_vap->va_type, ap->a_vap->va_mode), ap->a_dvp, ap->a_vpp, ap->a_cnp, "ufs_create"); if (error != 0) return (error); if ((ap->a_cnp->cn_flags & MAKEENTRY) != 0) cache_enter(ap->a_dvp, *ap->a_vpp, ap->a_cnp); return (0); } /* * Mknod vnode call */ /* ARGSUSED */ static int ufs_mknod( struct vop_mknod_args /* { struct vnode *a_dvp; struct vnode **a_vpp; struct componentname *a_cnp; struct vattr *a_vap; } */ *ap) { struct vattr *vap = ap->a_vap; struct vnode **vpp = ap->a_vpp; struct inode *ip; ino_t ino; int error; error = ufs_makeinode(MAKEIMODE(vap->va_type, vap->va_mode), ap->a_dvp, vpp, ap->a_cnp, "ufs_mknod"); if (error) return (error); ip = VTOI(*vpp); UFS_INODE_SET_FLAG(ip, IN_ACCESS | IN_CHANGE | IN_UPDATE); if (vap->va_rdev != VNOVAL) { /* * Want to be able to use this to make badblock * inodes, so don't truncate the dev number. */ DIP_SET(ip, i_rdev, vap->va_rdev); } /* * Remove inode, then reload it through VFS_VGET(). This is * needed to do further inode initialization, for instance * fifo, which was too early for VFS_VGET() done as part of * UFS_VALLOC(). */ (*vpp)->v_type = VNON; ino = ip->i_number; /* Save this before vgone() invalidates ip. */ vgone(*vpp); vput(*vpp); error = VFS_VGET(ap->a_dvp->v_mount, ino, LK_EXCLUSIVE, vpp); if (error) { *vpp = NULL; return (error); } return (0); } /* * Open called. */ /* ARGSUSED */ static int ufs_open(struct vop_open_args *ap) { struct vnode *vp = ap->a_vp; struct inode *ip; if (vp->v_type == VCHR || vp->v_type == VBLK) return (EOPNOTSUPP); ip = VTOI(vp); vnode_create_vobject(vp, DIP(ip, i_size), ap->a_td); if (vp->v_type == VREG && (vn_irflag_read(vp) & VIRF_PGREAD) == 0) { vn_irflag_set_cond(vp, VIRF_PGREAD); } /* * Files marked append-only must be opened for appending. */ if ((ip->i_flags & APPEND) && (ap->a_mode & (FWRITE | O_APPEND)) == FWRITE) return (EPERM); return (0); } /* * Close called. * * Update the times on the inode. */ /* ARGSUSED */ static int ufs_close( struct vop_close_args /* { struct vnode *a_vp; int a_fflag; struct ucred *a_cred; struct thread *a_td; } */ *ap) { struct vnode *vp = ap->a_vp; int usecount; VI_LOCK(vp); usecount = vp->v_usecount; if (usecount > 1) ufs_itimes_locked(vp); VI_UNLOCK(vp); return (0); } static int ufs_accessx( struct vop_accessx_args /* { struct vnode *a_vp; accmode_t a_accmode; struct ucred *a_cred; struct thread *a_td; } */ *ap) { struct vnode *vp = ap->a_vp; struct inode *ip = VTOI(vp); accmode_t accmode = ap->a_accmode; int error; #ifdef UFS_ACL struct acl *acl; acl_type_t type; #endif /* * Disallow write attempts on read-only filesystems; * unless the file is a socket, fifo, or a block or * character device resident on the filesystem. */ if (accmode & VMODIFY_PERMS) { switch (vp->v_type) { case VDIR: case VLNK: case VREG: if (vp->v_mount->mnt_flag & MNT_RDONLY) return (EROFS); #ifdef QUOTA /* * Inode is accounted in the quotas only if struct * dquot is attached to it. VOP_ACCESS() is called * from vn_open_cred() and provides a convenient * point to call getinoquota(). The lock mode is * exclusive when the file is opening for write. */ if (VOP_ISLOCKED(vp) == LK_EXCLUSIVE) { error = getinoquota(ip); if (error != 0) return (error); } #endif break; default: break; } } /* * If immutable bit set, nobody gets to write it. "& ~VADMIN_PERMS" * permits the owner of the file to remove the IMMUTABLE flag. */ if ((accmode & (VMODIFY_PERMS & ~VADMIN_PERMS)) && (ip->i_flags & (IMMUTABLE | SF_SNAPSHOT))) return (EPERM); #ifdef UFS_ACL if ((vp->v_mount->mnt_flag & (MNT_ACLS | MNT_NFS4ACLS)) != 0) { if (vp->v_mount->mnt_flag & MNT_NFS4ACLS) type = ACL_TYPE_NFS4; else type = ACL_TYPE_ACCESS; acl = acl_alloc(M_WAITOK); if (type == ACL_TYPE_NFS4) error = ufs_getacl_nfs4_internal(vp, acl, ap->a_td); else error = VOP_GETACL(vp, type, acl, ap->a_cred, ap->a_td); switch (error) { case 0: if (type == ACL_TYPE_NFS4) { error = vaccess_acl_nfs4(vp->v_type, ip->i_uid, ip->i_gid, acl, accmode, ap->a_cred); } else { error = vfs_unixify_accmode(&accmode); if (error == 0) error = vaccess_acl_posix1e(vp->v_type, ip->i_uid, ip->i_gid, acl, accmode, ap->a_cred); } break; default: if (error != EOPNOTSUPP) printf( "ufs_accessx(): Error retrieving ACL on object (%d).\n", error); /* * XXX: Fall back until debugged. Should * eventually possibly log an error, and return * EPERM for safety. */ error = vfs_unixify_accmode(&accmode); if (error == 0) error = vaccess(vp->v_type, ip->i_mode, ip->i_uid, ip->i_gid, accmode, ap->a_cred); } acl_free(acl); return (error); } #endif /* !UFS_ACL */ error = vfs_unixify_accmode(&accmode); if (error == 0) error = vaccess(vp->v_type, ip->i_mode, ip->i_uid, ip->i_gid, accmode, ap->a_cred); return (error); } /* * VOP_FPLOOKUP_VEXEC routines are subject to special circumstances, see * the comment above cache_fplookup for details. */ static int ufs_fplookup_vexec( struct vop_fplookup_vexec_args /* { struct vnode *a_vp; struct ucred *a_cred; struct thread *a_td; } */ *ap) { struct vnode *vp; struct inode *ip; struct ucred *cred; mode_t all_x, mode; vp = ap->a_vp; ip = VTOI_SMR(vp); if (__predict_false(ip == NULL)) return (EAGAIN); /* * XXX ACL race * * ACLs are not supported and UFS clears/sets this flag on mount and * remount. However, we may still be racing with seeing them and there * is no provision to make sure they were accounted for. This matches * the behavior of the locked case, since the lookup there is also * racy: mount takes no measures to block anyone from progressing. */ all_x = S_IXUSR | S_IXGRP | S_IXOTH; mode = atomic_load_short(&ip->i_mode); if (__predict_true((mode & all_x) == all_x)) return (0); cred = ap->a_cred; return (vaccess_vexec_smr(mode, ip->i_uid, ip->i_gid, cred)); } /* ARGSUSED */ static int ufs_stat(struct vop_stat_args *ap) { struct vnode *vp = ap->a_vp; struct inode *ip = VTOI(vp); struct stat *sb = ap->a_sb; int error; error = vop_stat_helper_pre(ap); if (__predict_false(error)) return (error); VI_LOCK(vp); ufs_itimes_locked(vp); if (I_IS_UFS1(ip)) { sb->st_atim.tv_sec = ip->i_din1->di_atime; sb->st_atim.tv_nsec = ip->i_din1->di_atimensec; } else { sb->st_atim.tv_sec = ip->i_din2->di_atime; sb->st_atim.tv_nsec = ip->i_din2->di_atimensec; } VI_UNLOCK(vp); sb->st_dev = dev2udev(ITOUMP(ip)->um_dev); sb->st_ino = ip->i_number; sb->st_mode = (ip->i_mode & ~IFMT) | VTTOIF(vp->v_type); sb->st_nlink = ip->i_effnlink; sb->st_uid = ip->i_uid; sb->st_gid = ip->i_gid; if (I_IS_UFS1(ip)) { sb->st_rdev = ip->i_din1->di_rdev; sb->st_size = ip->i_din1->di_size; sb->st_mtim.tv_sec = ip->i_din1->di_mtime; sb->st_mtim.tv_nsec = ip->i_din1->di_mtimensec; sb->st_ctim.tv_sec = ip->i_din1->di_ctime; sb->st_ctim.tv_nsec = ip->i_din1->di_ctimensec; sb->st_birthtim.tv_sec = -1; sb->st_birthtim.tv_nsec = 0; sb->st_blocks = dbtob((u_quad_t)ip->i_din1->di_blocks) / S_BLKSIZE; } else { sb->st_rdev = ip->i_din2->di_rdev; sb->st_size = ip->i_din2->di_size; sb->st_mtim.tv_sec = ip->i_din2->di_mtime; sb->st_mtim.tv_nsec = ip->i_din2->di_mtimensec; sb->st_ctim.tv_sec = ip->i_din2->di_ctime; sb->st_ctim.tv_nsec = ip->i_din2->di_ctimensec; sb->st_birthtim.tv_sec = ip->i_din2->di_birthtime; sb->st_birthtim.tv_nsec = ip->i_din2->di_birthnsec; sb->st_blocks = dbtob((u_quad_t)ip->i_din2->di_blocks) / S_BLKSIZE; } sb->st_blksize = max(PAGE_SIZE, vp->v_mount->mnt_stat.f_iosize); sb->st_flags = ip->i_flags; sb->st_gen = ip->i_gen; return (vop_stat_helper_post(ap, error)); } /* ARGSUSED */ static int ufs_getattr( struct vop_getattr_args /* { struct vnode *a_vp; struct vattr *a_vap; struct ucred *a_cred; } */ *ap) { struct vnode *vp = ap->a_vp; struct inode *ip = VTOI(vp); struct vattr *vap = ap->a_vap; VI_LOCK(vp); ufs_itimes_locked(vp); if (I_IS_UFS1(ip)) { vap->va_atime.tv_sec = ip->i_din1->di_atime; vap->va_atime.tv_nsec = ip->i_din1->di_atimensec; } else { vap->va_atime.tv_sec = ip->i_din2->di_atime; vap->va_atime.tv_nsec = ip->i_din2->di_atimensec; } VI_UNLOCK(vp); /* * Copy from inode table */ vap->va_fsid = dev2udev(ITOUMP(ip)->um_dev); vap->va_fileid = ip->i_number; vap->va_mode = ip->i_mode & ~IFMT; vap->va_nlink = ip->i_effnlink; vap->va_uid = ip->i_uid; vap->va_gid = ip->i_gid; if (I_IS_UFS1(ip)) { vap->va_rdev = ip->i_din1->di_rdev; vap->va_size = ip->i_din1->di_size; vap->va_mtime.tv_sec = ip->i_din1->di_mtime; vap->va_mtime.tv_nsec = ip->i_din1->di_mtimensec; vap->va_ctime.tv_sec = ip->i_din1->di_ctime; vap->va_ctime.tv_nsec = ip->i_din1->di_ctimensec; vap->va_bytes = dbtob((u_quad_t)ip->i_din1->di_blocks); vap->va_filerev = ip->i_din1->di_modrev; } else { vap->va_rdev = ip->i_din2->di_rdev; vap->va_size = ip->i_din2->di_size; vap->va_mtime.tv_sec = ip->i_din2->di_mtime; vap->va_mtime.tv_nsec = ip->i_din2->di_mtimensec; vap->va_ctime.tv_sec = ip->i_din2->di_ctime; vap->va_ctime.tv_nsec = ip->i_din2->di_ctimensec; vap->va_birthtime.tv_sec = ip->i_din2->di_birthtime; vap->va_birthtime.tv_nsec = ip->i_din2->di_birthnsec; vap->va_bytes = dbtob((u_quad_t)ip->i_din2->di_blocks); vap->va_filerev = ip->i_din2->di_modrev; } vap->va_flags = ip->i_flags; vap->va_gen = ip->i_gen; vap->va_blocksize = vp->v_mount->mnt_stat.f_iosize; vap->va_type = IFTOVT(ip->i_mode); return (0); } /* * Set attribute vnode op. called from several syscalls */ static int ufs_setattr( struct vop_setattr_args /* { struct vnode *a_vp; struct vattr *a_vap; struct ucred *a_cred; } */ *ap) { struct vattr *vap = ap->a_vap; struct vnode *vp = ap->a_vp; struct inode *ip = VTOI(vp); struct ucred *cred = ap->a_cred; struct thread *td = curthread; int error; /* * Check for unsettable attributes. */ if ((vap->va_type != VNON) || (vap->va_nlink != VNOVAL) || (vap->va_fsid != VNOVAL) || (vap->va_fileid != VNOVAL) || (vap->va_blocksize != VNOVAL) || (vap->va_rdev != VNOVAL) || ((int)vap->va_bytes != VNOVAL) || (vap->va_gen != VNOVAL)) { return (EINVAL); } if (vap->va_flags != VNOVAL) { if ((vap->va_flags & ~(SF_APPEND | SF_ARCHIVED | SF_IMMUTABLE | SF_NOUNLINK | SF_SNAPSHOT | UF_APPEND | UF_ARCHIVE | UF_HIDDEN | UF_IMMUTABLE | UF_NODUMP | UF_NOUNLINK | UF_OFFLINE | UF_OPAQUE | UF_READONLY | UF_REPARSE | UF_SPARSE | UF_SYSTEM)) != 0) return (EOPNOTSUPP); if (vp->v_mount->mnt_flag & MNT_RDONLY) return (EROFS); /* * Callers may only modify the file flags on objects they * have VADMIN rights for. */ if ((error = VOP_ACCESS(vp, VADMIN, cred, td))) return (error); /* * Unprivileged processes are not permitted to unset system * flags, or modify flags if any system flags are set. * Privileged non-jail processes may not modify system flags * if securelevel > 0 and any existing system flags are set. * Privileged jail processes behave like privileged non-jail * processes if the PR_ALLOW_CHFLAGS permission bit is set; * otherwise, they behave like unprivileged processes. */ if (!priv_check_cred(cred, PRIV_VFS_SYSFLAGS)) { if (ip->i_flags & (SF_NOUNLINK | SF_IMMUTABLE | SF_APPEND)) { error = securelevel_gt(cred, 0); if (error) return (error); } /* The snapshot flag cannot be toggled. */ if ((vap->va_flags ^ ip->i_flags) & SF_SNAPSHOT) return (EPERM); } else { if (ip->i_flags & (SF_NOUNLINK | SF_IMMUTABLE | SF_APPEND) || ((vap->va_flags ^ ip->i_flags) & SF_SETTABLE)) return (EPERM); } ip->i_flags = vap->va_flags; DIP_SET(ip, i_flags, vap->va_flags); UFS_INODE_SET_FLAG(ip, IN_CHANGE); error = UFS_UPDATE(vp, 0); if (ip->i_flags & (IMMUTABLE | APPEND)) return (error); } /* * If immutable or append, no one can change any of its attributes * except the ones already handled (in some cases, file flags * including the immutability flags themselves for the superuser). */ if (ip->i_flags & (IMMUTABLE | APPEND)) return (EPERM); /* * Go through the fields and update iff not VNOVAL. */ if (vap->va_uid != (uid_t)VNOVAL || vap->va_gid != (gid_t)VNOVAL) { if (vp->v_mount->mnt_flag & MNT_RDONLY) return (EROFS); if ((error = ufs_chown(vp, vap->va_uid, vap->va_gid, cred, td)) != 0) return (error); } if (vap->va_size != VNOVAL) { /* * XXX most of the following special cases should be in * callers instead of in N filesystems. The VDIR check * mostly already is. */ switch (vp->v_type) { case VDIR: return (EISDIR); case VLNK: case VREG: /* * Truncation should have an effect in these cases. * Disallow it if the filesystem is read-only or * the file is being snapshotted. */ if (vp->v_mount->mnt_flag & MNT_RDONLY) return (EROFS); if (IS_SNAPSHOT(ip)) return (EPERM); break; default: /* * According to POSIX, the result is unspecified * for file types other than regular files, * directories and shared memory objects. We * don't support shared memory objects in the file * system, and have dubious support for truncating * symlinks. Just ignore the request in other cases. */ return (0); } error = vn_rlimit_trunc(vap->va_size, td); if (error != 0) return (error); if ((error = UFS_TRUNCATE(vp, vap->va_size, IO_NORMAL | ((vap->va_vaflags & VA_SYNC) != 0 ? IO_SYNC : 0), cred)) != 0) return (error); } if (vap->va_atime.tv_sec != VNOVAL || vap->va_mtime.tv_sec != VNOVAL || vap->va_birthtime.tv_sec != VNOVAL) { if (vp->v_mount->mnt_flag & MNT_RDONLY) return (EROFS); if (IS_SNAPSHOT(ip)) return (EPERM); error = vn_utimes_perm(vp, vap, cred, td); if (error != 0) return (error); UFS_INODE_SET_FLAG(ip, IN_CHANGE | IN_MODIFIED); if (vap->va_atime.tv_sec != VNOVAL) { ip->i_flag &= ~IN_ACCESS; DIP_SET(ip, i_atime, vap->va_atime.tv_sec); DIP_SET(ip, i_atimensec, vap->va_atime.tv_nsec); } if (vap->va_mtime.tv_sec != VNOVAL) { ip->i_flag &= ~IN_UPDATE; DIP_SET(ip, i_mtime, vap->va_mtime.tv_sec); DIP_SET(ip, i_mtimensec, vap->va_mtime.tv_nsec); } if (vap->va_birthtime.tv_sec != VNOVAL && I_IS_UFS2(ip)) { ip->i_din2->di_birthtime = vap->va_birthtime.tv_sec; ip->i_din2->di_birthnsec = vap->va_birthtime.tv_nsec; } error = UFS_UPDATE(vp, 0); if (error) return (error); } error = 0; if (vap->va_mode != (mode_t)VNOVAL) { if (vp->v_mount->mnt_flag & MNT_RDONLY) return (EROFS); if (IS_SNAPSHOT(ip) && (vap->va_mode & (S_IXUSR | S_IWUSR | S_IXGRP | S_IWGRP | S_IXOTH | S_IWOTH)) != 0) return (EPERM); error = ufs_chmod(vp, (int)vap->va_mode, cred, td); } return (error); } #ifdef UFS_ACL static int ufs_update_nfs4_acl_after_mode_change(struct vnode *vp, int mode, int file_owner_id, struct ucred *cred, struct thread *td) { int error; struct acl *aclp; aclp = acl_alloc(M_WAITOK); error = ufs_getacl_nfs4_internal(vp, aclp, td); /* * We don't have to handle EOPNOTSUPP here, as the filesystem claims * it supports ACLs. */ if (error) goto out; acl_nfs4_sync_acl_from_mode(aclp, mode, file_owner_id); error = ufs_setacl_nfs4_internal(vp, aclp, td); out: acl_free(aclp); return (error); } #endif /* UFS_ACL */ static int ufs_mmapped( struct vop_mmapped_args /* { struct vnode *a_vp; } */ *ap) { struct vnode *vp; struct inode *ip; struct mount *mp; vp = ap->a_vp; ip = VTOI(vp); mp = vp->v_mount; if ((mp->mnt_flag & (MNT_NOATIME | MNT_RDONLY)) == 0) UFS_INODE_SET_FLAG_SHARED(ip, IN_ACCESS); /* * XXXKIB No UFS_UPDATE(ap->a_vp, 0) there. */ return (0); } /* * Change the mode on a file. * Inode must be locked before calling. */ static int ufs_chmod(struct vnode *vp, int mode, struct ucred *cred, struct thread *td) { struct inode *ip = VTOI(vp); int newmode, error; /* * To modify the permissions on a file, must possess VADMIN * for that file. */ if ((error = VOP_ACCESSX(vp, VWRITE_ACL, cred, td))) return (error); /* * Privileged processes may set the sticky bit on non-directories, * as well as set the setgid bit on a file with a group that the * process is not a member of. Both of these are allowed in * jail(8). */ if (vp->v_type != VDIR && (mode & S_ISTXT)) { if (priv_check_cred(cred, PRIV_VFS_STICKYFILE)) return (EFTYPE); } if (!groupmember(ip->i_gid, cred) && (mode & ISGID)) { error = priv_check_cred(cred, PRIV_VFS_SETGID); if (error) return (error); } /* * Deny setting setuid if we are not the file owner. */ if ((mode & ISUID) && ip->i_uid != cred->cr_uid) { error = priv_check_cred(cred, PRIV_VFS_ADMIN); if (error) return (error); } newmode = ip->i_mode & ~ALLPERMS; newmode |= (mode & ALLPERMS); UFS_INODE_SET_MODE(ip, newmode); DIP_SET(ip, i_mode, ip->i_mode); UFS_INODE_SET_FLAG(ip, IN_CHANGE); #ifdef UFS_ACL if ((vp->v_mount->mnt_flag & MNT_NFS4ACLS) != 0) error = ufs_update_nfs4_acl_after_mode_change(vp, mode, ip->i_uid, cred, td); #endif if (error == 0 && (ip->i_flag & IN_CHANGE) != 0) error = UFS_UPDATE(vp, 0); return (error); } /* * Perform chown operation on inode ip; * inode must be locked prior to call. */ static int ufs_chown(struct vnode *vp, uid_t uid, gid_t gid, struct ucred *cred, struct thread *td) { struct inode *ip = VTOI(vp); uid_t ouid; gid_t ogid; int error = 0; #ifdef QUOTA int i; ufs2_daddr_t change; #endif if (uid == (uid_t)VNOVAL) uid = ip->i_uid; if (gid == (gid_t)VNOVAL) gid = ip->i_gid; /* * To modify the ownership of a file, must possess VADMIN for that * file. */ if ((error = VOP_ACCESSX(vp, VWRITE_OWNER, cred, td))) return (error); /* * To change the owner of a file, or change the group of a file to a * group of which we are not a member, the caller must have * privilege. */ if (((uid != ip->i_uid && uid != cred->cr_uid) || (gid != ip->i_gid && !groupmember(gid, cred))) && (error = priv_check_cred(cred, PRIV_VFS_CHOWN))) return (error); ogid = ip->i_gid; ouid = ip->i_uid; #ifdef QUOTA if ((error = getinoquota(ip)) != 0) return (error); if (ouid == uid) { dqrele(vp, ip->i_dquot[USRQUOTA]); ip->i_dquot[USRQUOTA] = NODQUOT; } if (ogid == gid) { dqrele(vp, ip->i_dquot[GRPQUOTA]); ip->i_dquot[GRPQUOTA] = NODQUOT; } change = DIP(ip, i_blocks); (void) chkdq(ip, -change, cred, CHOWN|FORCE); (void) chkiq(ip, -1, cred, CHOWN|FORCE); for (i = 0; i < MAXQUOTAS; i++) { dqrele(vp, ip->i_dquot[i]); ip->i_dquot[i] = NODQUOT; } #endif ip->i_gid = gid; DIP_SET(ip, i_gid, gid); ip->i_uid = uid; DIP_SET(ip, i_uid, uid); #ifdef QUOTA if ((error = getinoquota(ip)) == 0) { if (ouid == uid) { dqrele(vp, ip->i_dquot[USRQUOTA]); ip->i_dquot[USRQUOTA] = NODQUOT; } if (ogid == gid) { dqrele(vp, ip->i_dquot[GRPQUOTA]); ip->i_dquot[GRPQUOTA] = NODQUOT; } if ((error = chkdq(ip, change, cred, CHOWN)) == 0) { if ((error = chkiq(ip, 1, cred, CHOWN)) == 0) goto good; else (void) chkdq(ip, -change, cred, CHOWN|FORCE); } for (i = 0; i < MAXQUOTAS; i++) { dqrele(vp, ip->i_dquot[i]); ip->i_dquot[i] = NODQUOT; } } ip->i_gid = ogid; DIP_SET(ip, i_gid, ogid); ip->i_uid = ouid; DIP_SET(ip, i_uid, ouid); if (getinoquota(ip) == 0) { if (ouid == uid) { dqrele(vp, ip->i_dquot[USRQUOTA]); ip->i_dquot[USRQUOTA] = NODQUOT; } if (ogid == gid) { dqrele(vp, ip->i_dquot[GRPQUOTA]); ip->i_dquot[GRPQUOTA] = NODQUOT; } (void) chkdq(ip, change, cred, FORCE|CHOWN); (void) chkiq(ip, 1, cred, FORCE|CHOWN); (void) getinoquota(ip); } return (error); good: if (getinoquota(ip)) panic("ufs_chown: lost quota"); #endif /* QUOTA */ UFS_INODE_SET_FLAG(ip, IN_CHANGE); if ((ip->i_mode & (ISUID | ISGID)) && (ouid != uid || ogid != gid)) { if (priv_check_cred(cred, PRIV_VFS_RETAINSUGID)) { UFS_INODE_SET_MODE(ip, ip->i_mode & ~(ISUID | ISGID)); DIP_SET(ip, i_mode, ip->i_mode); } } error = UFS_UPDATE(vp, 0); return (error); } static int ufs_remove( struct vop_remove_args /* { struct vnode *a_dvp; struct vnode *a_vp; struct componentname *a_cnp; } */ *ap) { struct inode *ip; struct vnode *vp = ap->a_vp; struct vnode *dvp = ap->a_dvp; int error; struct thread *td; td = curthread; ip = VTOI(vp); if ((ip->i_flags & (NOUNLINK | IMMUTABLE | APPEND)) || (VTOI(dvp)->i_flags & APPEND)) return (EPERM); if (DOINGSUJ(dvp)) { error = softdep_prelink(dvp, vp, ap->a_cnp); if (error != 0) { MPASS(error == ERELOOKUP); return (error); } } #ifdef UFS_GJOURNAL ufs_gjournal_orphan(vp); #endif error = ufs_dirremove(dvp, ip, ap->a_cnp->cn_flags, 0); if (ip->i_nlink <= 0) vp->v_vflag |= VV_NOSYNC; if (IS_SNAPSHOT(ip)) { /* * Avoid deadlock where another thread is trying to * update the inodeblock for dvp and is waiting on * snaplk. Temporary unlock the vnode lock for the * unlinked file and sync the directory. This should * allow vput() of the directory to not block later on * while holding the snapshot vnode locked, assuming * that the directory hasn't been unlinked too. */ VOP_UNLOCK(vp); (void) VOP_FSYNC(dvp, MNT_WAIT, td); vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); } return (error); } static void print_bad_link_count(const char *funcname, struct vnode *dvp) { struct inode *dip; dip = VTOI(dvp); uprintf("%s: Bad link count %d on parent inode %jd in file system %s\n", funcname, dip->i_effnlink, (intmax_t)dip->i_number, dvp->v_mount->mnt_stat.f_mntonname); } /* * link vnode call */ static int ufs_link( struct vop_link_args /* { struct vnode *a_tdvp; struct vnode *a_vp; struct componentname *a_cnp; } */ *ap) { struct vnode *vp = ap->a_vp; struct vnode *tdvp = ap->a_tdvp; struct componentname *cnp = ap->a_cnp; struct inode *ip; struct direct newdir; int error; #ifdef INVARIANTS if ((cnp->cn_flags & HASBUF) == 0) panic("ufs_link: no name"); #endif if (DOINGSUJ(tdvp)) { error = softdep_prelink(tdvp, vp, cnp); if (error != 0) { MPASS(error == ERELOOKUP); return (error); } } if (VTOI(tdvp)->i_effnlink < 2) { print_bad_link_count("ufs_link", tdvp); error = EINVAL; goto out; } error = ufs_sync_nlink(vp, tdvp); if (error != 0) goto out; ip = VTOI(vp); /* * The file may have been removed after namei dropped the original * lock. */ if (ip->i_effnlink == 0) { error = ENOENT; goto out; } if (ip->i_flags & (IMMUTABLE | APPEND)) { error = EPERM; goto out; } ip->i_effnlink++; ip->i_nlink++; DIP_SET(ip, i_nlink, ip->i_nlink); UFS_INODE_SET_FLAG(ip, IN_CHANGE); if (DOINGSOFTDEP(vp)) softdep_setup_link(VTOI(tdvp), ip); error = UFS_UPDATE(vp, !DOINGSOFTDEP(vp) && !DOINGASYNC(vp)); if (!error) { ufs_makedirentry(ip, cnp, &newdir); error = ufs_direnter(tdvp, vp, &newdir, cnp, NULL); } if (error) { ip->i_effnlink--; ip->i_nlink--; DIP_SET(ip, i_nlink, ip->i_nlink); UFS_INODE_SET_FLAG(ip, IN_CHANGE); if (DOINGSOFTDEP(vp)) softdep_revert_link(VTOI(tdvp), ip); } out: return (error); } /* * whiteout vnode call */ static int ufs_whiteout( struct vop_whiteout_args /* { struct vnode *a_dvp; struct componentname *a_cnp; int a_flags; } */ *ap) { struct vnode *dvp = ap->a_dvp; struct componentname *cnp = ap->a_cnp; struct direct newdir; int error = 0; if (DOINGSUJ(dvp) && (ap->a_flags == CREATE || ap->a_flags == DELETE)) { error = softdep_prelink(dvp, NULL, cnp); if (error != 0) { MPASS(error == ERELOOKUP); return (error); } } switch (ap->a_flags) { case LOOKUP: /* 4.4 format directories support whiteout operations */ if (!OFSFMT(dvp)) return (0); return (EOPNOTSUPP); case CREATE: /* create a new directory whiteout */ #ifdef INVARIANTS if ((cnp->cn_flags & SAVENAME) == 0) panic("ufs_whiteout: missing name"); if (OFSFMT(dvp)) panic("ufs_whiteout: old format filesystem"); #endif newdir.d_ino = UFS_WINO; newdir.d_namlen = cnp->cn_namelen; bcopy(cnp->cn_nameptr, newdir.d_name, (unsigned)cnp->cn_namelen + 1); newdir.d_type = DT_WHT; error = ufs_direnter(dvp, NULL, &newdir, cnp, NULL); break; case DELETE: /* remove an existing directory whiteout */ #ifdef INVARIANTS if (OFSFMT(dvp)) panic("ufs_whiteout: old format filesystem"); #endif cnp->cn_flags &= ~DOWHITEOUT; error = ufs_dirremove(dvp, NULL, cnp->cn_flags, 0); break; default: panic("ufs_whiteout: unknown op"); } return (error); } static volatile int rename_restarts; SYSCTL_INT(_vfs_ufs, OID_AUTO, rename_restarts, CTLFLAG_RD, __DEVOLATILE(int *, &rename_restarts), 0, "Times rename had to restart due to lock contention"); /* * Rename system call. * rename("foo", "bar"); * is essentially * unlink("bar"); * link("foo", "bar"); * unlink("foo"); * but ``atomically''. Can't do full commit without saving state in the * inode on disk which isn't feasible at this time. Best we can do is * always guarantee the target exists. * * Basic algorithm is: * * 1) Bump link count on source while we're linking it to the * target. This also ensure the inode won't be deleted out * from underneath us while we work (it may be truncated by * a concurrent `trunc' or `open' for creation). * 2) Link source to destination. If destination already exists, * delete it first. * 3) Unlink source reference to inode if still around. If a * directory was moved and the parent of the destination * is different from the source, patch the ".." entry in the * directory. */ static int ufs_rename( struct vop_rename_args /* { struct vnode *a_fdvp; struct vnode *a_fvp; struct componentname *a_fcnp; struct vnode *a_tdvp; struct vnode *a_tvp; struct componentname *a_tcnp; } */ *ap) { struct vnode *tvp = ap->a_tvp; struct vnode *tdvp = ap->a_tdvp; struct vnode *fvp = ap->a_fvp; struct vnode *fdvp = ap->a_fdvp; struct vnode *nvp; struct componentname *tcnp = ap->a_tcnp; struct componentname *fcnp = ap->a_fcnp; struct thread *td = fcnp->cn_thread; struct inode *fip, *tip, *tdp, *fdp; struct direct newdir; off_t endoff; int doingdirectory, newparent; int error = 0; struct mount *mp; ino_t ino; seqc_t fdvp_s, fvp_s, tdvp_s, tvp_s; bool checkpath_locked, want_seqc_end; checkpath_locked = want_seqc_end = false; #ifdef INVARIANTS if ((tcnp->cn_flags & HASBUF) == 0 || (fcnp->cn_flags & HASBUF) == 0) panic("ufs_rename: no name"); #endif endoff = 0; mp = tdvp->v_mount; VOP_UNLOCK(tdvp); if (tvp && tvp != tdvp) VOP_UNLOCK(tvp); /* * Check for cross-device rename. */ if ((fvp->v_mount != tdvp->v_mount) || (tvp && (fvp->v_mount != tvp->v_mount))) { error = EXDEV; mp = NULL; goto releout; } fdvp_s = fvp_s = tdvp_s = tvp_s = SEQC_MOD; relock: /* * We need to acquire 2 to 4 locks depending on whether tvp is NULL * and fdvp and tdvp are the same directory. Subsequently we need * to double-check all paths and in the directory rename case we * need to verify that we are not creating a directory loop. To * handle this we acquire all but fdvp using non-blocking * acquisitions. If we fail to acquire any lock in the path we will * drop all held locks, acquire the new lock in a blocking fashion, * and then release it and restart the rename. This acquire/release * step ensures that we do not spin on a lock waiting for release. */ error = vn_lock(fdvp, LK_EXCLUSIVE); if (error) goto releout; if (vn_lock(tdvp, LK_EXCLUSIVE | LK_NOWAIT) != 0) { VOP_UNLOCK(fdvp); error = vn_lock(tdvp, LK_EXCLUSIVE); if (error) goto releout; VOP_UNLOCK(tdvp); atomic_add_int(&rename_restarts, 1); goto relock; } /* * Re-resolve fvp to be certain it still exists and fetch the * correct vnode. */ error = ufs_lookup_ino(fdvp, NULL, fcnp, &ino); if (error) { VOP_UNLOCK(fdvp); VOP_UNLOCK(tdvp); goto releout; } error = VFS_VGET(mp, ino, LK_EXCLUSIVE | LK_NOWAIT, &nvp); if (error) { VOP_UNLOCK(fdvp); VOP_UNLOCK(tdvp); if (error != EBUSY) goto releout; error = VFS_VGET(mp, ino, LK_EXCLUSIVE, &nvp); if (error != 0) goto releout; VOP_UNLOCK(nvp); vrele(fvp); fvp = nvp; atomic_add_int(&rename_restarts, 1); goto relock; } vrele(fvp); fvp = nvp; /* * Re-resolve tvp and acquire the vnode lock if present. */ error = ufs_lookup_ino(tdvp, NULL, tcnp, &ino); if (error != 0 && error != EJUSTRETURN) { VOP_UNLOCK(fdvp); VOP_UNLOCK(tdvp); VOP_UNLOCK(fvp); goto releout; } /* * If tvp disappeared we just carry on. */ if (error == EJUSTRETURN && tvp != NULL) { vrele(tvp); tvp = NULL; } /* * Get the tvp ino if the lookup succeeded. We may have to restart * if the non-blocking acquire fails. */ if (error == 0) { nvp = NULL; error = VFS_VGET(mp, ino, LK_EXCLUSIVE | LK_NOWAIT, &nvp); if (tvp) vrele(tvp); tvp = nvp; if (error) { VOP_UNLOCK(fdvp); VOP_UNLOCK(tdvp); VOP_UNLOCK(fvp); if (error != EBUSY) goto releout; error = VFS_VGET(mp, ino, LK_EXCLUSIVE, &nvp); if (error != 0) goto releout; vput(nvp); atomic_add_int(&rename_restarts, 1); goto relock; } } if (DOINGSUJ(fdvp) && (seqc_in_modify(fdvp_s) || !vn_seqc_consistent(fdvp, fdvp_s) || seqc_in_modify(fvp_s) || !vn_seqc_consistent(fvp, fvp_s) || seqc_in_modify(tdvp_s) || !vn_seqc_consistent(tdvp, tdvp_s) || (tvp != NULL && (seqc_in_modify(tvp_s) || !vn_seqc_consistent(tvp, tvp_s))))) { error = softdep_prerename(fdvp, fvp, tdvp, tvp); if (error != 0) goto releout; } fdp = VTOI(fdvp); fip = VTOI(fvp); tdp = VTOI(tdvp); tip = NULL; if (tvp) tip = VTOI(tvp); if (tvp && ((VTOI(tvp)->i_flags & (NOUNLINK | IMMUTABLE | APPEND)) || (VTOI(tdvp)->i_flags & APPEND))) { error = EPERM; goto unlockout; } /* * Renaming a file to itself has no effect. The upper layers should * not call us in that case. However, things could change after * we drop the locks above. */ if (fvp == tvp) { error = 0; goto unlockout; } doingdirectory = 0; newparent = 0; ino = fip->i_number; if (fip->i_nlink >= UFS_LINK_MAX) { if (!DOINGSOFTDEP(fvp) || fip->i_effnlink >= UFS_LINK_MAX) { error = EMLINK; goto unlockout; } vfs_ref(mp); MPASS(!want_seqc_end); if (checkpath_locked) { sx_xunlock(&VFSTOUFS(mp)->um_checkpath_lock); checkpath_locked = false; } VOP_UNLOCK(fdvp); VOP_UNLOCK(fvp); vref(tdvp); if (tvp != NULL) vref(tvp); VOP_VPUT_PAIR(tdvp, &tvp, true); error = ufs_sync_nlink1(mp); vrele(fdvp); vrele(fvp); vrele(tdvp); if (tvp != NULL) vrele(tvp); return (error); } if ((fip->i_flags & (NOUNLINK | IMMUTABLE | APPEND)) || (fdp->i_flags & APPEND)) { error = EPERM; goto unlockout; } if ((fip->i_mode & IFMT) == IFDIR) { /* * Avoid ".", "..", and aliases of "." for obvious reasons. */ if ((fcnp->cn_namelen == 1 && fcnp->cn_nameptr[0] == '.') || fdp == fip || (fcnp->cn_flags | tcnp->cn_flags) & ISDOTDOT) { error = EINVAL; goto unlockout; } if (fdp->i_number != tdp->i_number) newparent = tdp->i_number; doingdirectory = 1; } if ((fvp->v_type == VDIR && fvp->v_mountedhere != NULL) || (tvp != NULL && tvp->v_type == VDIR && tvp->v_mountedhere != NULL)) { error = EXDEV; goto unlockout; } /* * If ".." must be changed (ie the directory gets a new * parent) then the source directory must not be in the * directory hierarchy above the target, as this would * orphan everything below the source directory. Also * the user must have write permission in the source so * as to be able to change "..". */ if (doingdirectory && newparent) { error = VOP_ACCESS(fvp, VWRITE, tcnp->cn_cred, tcnp->cn_thread); if (error) goto unlockout; sx_xlock(&VFSTOUFS(mp)->um_checkpath_lock); checkpath_locked = true; error = ufs_checkpath(ino, fdp->i_number, tdp, tcnp->cn_cred, &ino); /* * We encountered a lock that we have to wait for. Unlock * everything else and VGET before restarting. */ if (ino) { sx_xunlock(&VFSTOUFS(mp)->um_checkpath_lock); checkpath_locked = false; VOP_UNLOCK(fdvp); VOP_UNLOCK(fvp); VOP_UNLOCK(tdvp); if (tvp) VOP_UNLOCK(tvp); error = VFS_VGET(mp, ino, LK_SHARED, &nvp); if (error == 0) vput(nvp); atomic_add_int(&rename_restarts, 1); goto relock; } if (error) goto unlockout; if ((tcnp->cn_flags & SAVESTART) == 0) panic("ufs_rename: lost to startdir"); } if (fip->i_effnlink == 0 || fdp->i_effnlink == 0 || tdp->i_effnlink == 0) panic("Bad effnlink fip %p, fdp %p, tdp %p", fip, fdp, tdp); if (tvp != NULL) vn_seqc_write_begin(tvp); vn_seqc_write_begin(tdvp); vn_seqc_write_begin(fvp); vn_seqc_write_begin(fdvp); want_seqc_end = true; /* * 1) Bump link count while we're moving stuff * around. If we crash somewhere before * completing our work, the link count * may be wrong, but correctable. */ fip->i_effnlink++; fip->i_nlink++; DIP_SET(fip, i_nlink, fip->i_nlink); UFS_INODE_SET_FLAG(fip, IN_CHANGE); if (DOINGSOFTDEP(fvp)) softdep_setup_link(tdp, fip); error = UFS_UPDATE(fvp, !DOINGSOFTDEP(fvp) && !DOINGASYNC(fvp)); if (error) goto bad; /* * 2) If target doesn't exist, link the target * to the source and unlink the source. * Otherwise, rewrite the target directory * entry to reference the source inode and * expunge the original entry's existence. */ if (tip == NULL) { if (ITODEV(tdp) != ITODEV(fip)) panic("ufs_rename: EXDEV"); if (doingdirectory && newparent) { /* * Account for ".." in new directory. * When source and destination have the same * parent we don't adjust the link count. The * actual link modification is completed when * .. is rewritten below. */ if (tdp->i_nlink >= UFS_LINK_MAX) { fip->i_effnlink--; fip->i_nlink--; DIP_SET(fip, i_nlink, fip->i_nlink); UFS_INODE_SET_FLAG(fip, IN_CHANGE); if (DOINGSOFTDEP(fvp)) softdep_revert_link(tdp, fip); if (!DOINGSOFTDEP(tdvp) || tdp->i_effnlink >= UFS_LINK_MAX) { error = EMLINK; goto unlockout; } MPASS(want_seqc_end); if (tvp != NULL) vn_seqc_write_end(tvp); vn_seqc_write_end(tdvp); vn_seqc_write_end(fvp); vn_seqc_write_end(fdvp); want_seqc_end = false; vfs_ref(mp); MPASS(checkpath_locked); sx_xunlock(&VFSTOUFS(mp)->um_checkpath_lock); checkpath_locked = false; VOP_UNLOCK(fdvp); VOP_UNLOCK(fvp); vref(tdvp); if (tvp != NULL) vref(tvp); VOP_VPUT_PAIR(tdvp, &tvp, true); error = ufs_sync_nlink1(mp); vrele(fdvp); vrele(fvp); vrele(tdvp); if (tvp != NULL) vrele(tvp); return (error); } } ufs_makedirentry(fip, tcnp, &newdir); error = ufs_direnter(tdvp, NULL, &newdir, tcnp, NULL); if (error) goto bad; /* Setup tdvp for directory compaction if needed. */ if (I_COUNT(tdp) != 0 && I_ENDOFF(tdp) != 0 && I_ENDOFF(tdp) < tdp->i_size) endoff = I_ENDOFF(tdp); } else { if (ITODEV(tip) != ITODEV(tdp) || ITODEV(tip) != ITODEV(fip)) panic("ufs_rename: EXDEV"); /* * Short circuit rename(foo, foo). */ if (tip->i_number == fip->i_number) panic("ufs_rename: same file"); /* * If the parent directory is "sticky", then the caller * must possess VADMIN for the parent directory, or the * destination of the rename. This implements append-only * directories. */ if ((tdp->i_mode & S_ISTXT) && VOP_ACCESS(tdvp, VADMIN, tcnp->cn_cred, td) && VOP_ACCESS(tvp, VADMIN, tcnp->cn_cred, td)) { error = EPERM; goto bad; } /* * Target must be empty if a directory and have no links * to it. Also, ensure source and target are compatible * (both directories, or both not directories). */ if ((tip->i_mode & IFMT) == IFDIR) { if ((tip->i_effnlink > 2) || !ufs_dirempty(tip, tdp->i_number, tcnp->cn_cred)) { error = ENOTEMPTY; goto bad; } if (!doingdirectory) { error = ENOTDIR; goto bad; } cache_purge(tdvp); } else if (doingdirectory) { error = EISDIR; goto bad; } if (doingdirectory) { if (!newparent) { tdp->i_effnlink--; if (DOINGSOFTDEP(tdvp)) softdep_change_linkcnt(tdp); } tip->i_effnlink--; if (DOINGSOFTDEP(tvp)) softdep_change_linkcnt(tip); } error = ufs_dirrewrite(tdp, tip, fip->i_number, IFTODT(fip->i_mode), (doingdirectory && newparent) ? newparent : doingdirectory); if (error) { if (doingdirectory) { if (!newparent) { tdp->i_effnlink++; if (DOINGSOFTDEP(tdvp)) softdep_change_linkcnt(tdp); } tip->i_effnlink++; if (DOINGSOFTDEP(tvp)) softdep_change_linkcnt(tip); } goto bad; } if (doingdirectory && !DOINGSOFTDEP(tvp)) { /* * The only stuff left in the directory is "." * and "..". The "." reference is inconsequential * since we are quashing it. We have removed the "." * reference and the reference in the parent directory, * but there may be other hard links. The soft * dependency code will arrange to do these operations * after the parent directory entry has been deleted on * disk, so when running with that code we avoid doing * them now. */ if (!newparent) { tdp->i_nlink--; DIP_SET(tdp, i_nlink, tdp->i_nlink); UFS_INODE_SET_FLAG(tdp, IN_CHANGE); } tip->i_nlink--; DIP_SET(tip, i_nlink, tip->i_nlink); UFS_INODE_SET_FLAG(tip, IN_CHANGE); } } /* * 3) Unlink the source. We have to resolve the path again to * fixup the directory offset and count for ufs_dirremove. */ if (fdvp == tdvp) { error = ufs_lookup_ino(fdvp, NULL, fcnp, &ino); if (error) panic("ufs_rename: from entry went away!"); if (ino != fip->i_number) panic("ufs_rename: ino mismatch %ju != %ju\n", (uintmax_t)ino, (uintmax_t)fip->i_number); } /* * If the source is a directory with a * new parent, the link count of the old * parent directory must be decremented * and ".." set to point to the new parent. */ if (doingdirectory && newparent) { /* * If tip exists we simply use its link, otherwise we must * add a new one. */ if (tip == NULL) { tdp->i_effnlink++; tdp->i_nlink++; DIP_SET(tdp, i_nlink, tdp->i_nlink); UFS_INODE_SET_FLAG(tdp, IN_CHANGE); if (DOINGSOFTDEP(tdvp)) softdep_setup_dotdot_link(tdp, fip); error = UFS_UPDATE(tdvp, !DOINGSOFTDEP(tdvp) && !DOINGASYNC(tdvp)); /* Don't go to bad here as the new link exists. */ if (error) goto unlockout; } else if (DOINGSUJ(tdvp)) /* Journal must account for each new link. */ softdep_setup_dotdot_link(tdp, fip); SET_I_OFFSET(fip, mastertemplate.dot_reclen); ufs_dirrewrite(fip, fdp, newparent, DT_DIR, 0); cache_purge(fdvp); } error = ufs_dirremove(fdvp, fip, fcnp->cn_flags, 0); /* * The kern_renameat() looks up the fvp using the DELETE flag, which * causes the removal of the name cache entry for fvp. * As the relookup of the fvp is done in two steps: * ufs_lookup_ino() and then VFS_VGET(), another thread might do a * normal lookup of the from name just before the VFS_VGET() call, * causing the cache entry to be re-instantiated. * * The same issue also applies to tvp if it exists as * otherwise we may have a stale name cache entry for the new * name that references the old i-node if it has other links * or open file descriptors. */ cache_vop_rename(fdvp, fvp, tdvp, tvp, fcnp, tcnp); unlockout: if (want_seqc_end) { if (tvp != NULL) vn_seqc_write_end(tvp); vn_seqc_write_end(tdvp); vn_seqc_write_end(fvp); vn_seqc_write_end(fdvp); } if (checkpath_locked) sx_xunlock(&VFSTOUFS(mp)->um_checkpath_lock); vput(fdvp); vput(fvp); /* * If compaction or fsync was requested do it in * ffs_vput_pair() now that other locks are no longer needed. */ if (error == 0 && endoff != 0) { UFS_INODE_SET_FLAG(tdp, IN_ENDOFF); SET_I_ENDOFF(tdp, endoff); } VOP_VPUT_PAIR(tdvp, &tvp, true); return (error); bad: fip->i_effnlink--; fip->i_nlink--; DIP_SET(fip, i_nlink, fip->i_nlink); UFS_INODE_SET_FLAG(fip, IN_CHANGE); if (DOINGSOFTDEP(fvp)) softdep_revert_link(tdp, fip); goto unlockout; releout: if (want_seqc_end) { if (tvp != NULL) vn_seqc_write_end(tvp); vn_seqc_write_end(tdvp); vn_seqc_write_end(fvp); vn_seqc_write_end(fdvp); } vrele(fdvp); vrele(fvp); vrele(tdvp); if (tvp) vrele(tvp); return (error); } #ifdef UFS_ACL static int ufs_do_posix1e_acl_inheritance_dir(struct vnode *dvp, struct vnode *tvp, mode_t dmode, struct ucred *cred, struct thread *td) { int error; struct inode *ip = VTOI(tvp); struct acl *dacl, *acl; acl = acl_alloc(M_WAITOK); dacl = acl_alloc(M_WAITOK); /* * Retrieve default ACL from parent, if any. */ error = VOP_GETACL(dvp, ACL_TYPE_DEFAULT, acl, cred, td); switch (error) { case 0: /* * Retrieved a default ACL, so merge mode and ACL if * necessary. If the ACL is empty, fall through to * the "not defined or available" case. */ if (acl->acl_cnt != 0) { dmode = acl_posix1e_newfilemode(dmode, acl); UFS_INODE_SET_MODE(ip, dmode); DIP_SET(ip, i_mode, dmode); *dacl = *acl; ufs_sync_acl_from_inode(ip, acl); break; } /* FALLTHROUGH */ case EOPNOTSUPP: /* * Just use the mode as-is. */ UFS_INODE_SET_MODE(ip, dmode); DIP_SET(ip, i_mode, dmode); error = 0; goto out; default: goto out; } /* * XXX: If we abort now, will Soft Updates notify the extattr * code that the EAs for the file need to be released? */ error = VOP_SETACL(tvp, ACL_TYPE_ACCESS, acl, cred, td); if (error == 0) error = VOP_SETACL(tvp, ACL_TYPE_DEFAULT, dacl, cred, td); switch (error) { case 0: break; case EOPNOTSUPP: /* * XXX: This should not happen, as EOPNOTSUPP above * was supposed to free acl. */ printf("ufs_mkdir: VOP_GETACL() but no VOP_SETACL()\n"); /* panic("ufs_mkdir: VOP_GETACL() but no VOP_SETACL()"); */ break; default: goto out; } out: acl_free(acl); acl_free(dacl); return (error); } static int ufs_do_posix1e_acl_inheritance_file(struct vnode *dvp, struct vnode *tvp, mode_t mode, struct ucred *cred, struct thread *td) { int error; struct inode *ip = VTOI(tvp); struct acl *acl; acl = acl_alloc(M_WAITOK); /* * Retrieve default ACL for parent, if any. */ error = VOP_GETACL(dvp, ACL_TYPE_DEFAULT, acl, cred, td); switch (error) { case 0: /* * Retrieved a default ACL, so merge mode and ACL if * necessary. */ if (acl->acl_cnt != 0) { /* * Two possible ways for default ACL to not * be present. First, the EA can be * undefined, or second, the default ACL can * be blank. If it's blank, fall through to * the it's not defined case. */ mode = acl_posix1e_newfilemode(mode, acl); UFS_INODE_SET_MODE(ip, mode); DIP_SET(ip, i_mode, mode); ufs_sync_acl_from_inode(ip, acl); break; } /* FALLTHROUGH */ case EOPNOTSUPP: /* * Just use the mode as-is. */ UFS_INODE_SET_MODE(ip, mode); DIP_SET(ip, i_mode, mode); error = 0; goto out; default: goto out; } /* * XXX: If we abort now, will Soft Updates notify the extattr * code that the EAs for the file need to be released? */ error = VOP_SETACL(tvp, ACL_TYPE_ACCESS, acl, cred, td); switch (error) { case 0: break; case EOPNOTSUPP: /* * XXX: This should not happen, as EOPNOTSUPP above was * supposed to free acl. */ printf("ufs_do_posix1e_acl_inheritance_file: VOP_GETACL() " "but no VOP_SETACL()\n"); /* panic("ufs_do_posix1e_acl_inheritance_file: VOP_GETACL() " "but no VOP_SETACL()"); */ break; default: goto out; } out: acl_free(acl); return (error); } static int ufs_do_nfs4_acl_inheritance(struct vnode *dvp, struct vnode *tvp, mode_t child_mode, struct ucred *cred, struct thread *td) { int error; struct acl *parent_aclp, *child_aclp; parent_aclp = acl_alloc(M_WAITOK); child_aclp = acl_alloc(M_WAITOK | M_ZERO); error = ufs_getacl_nfs4_internal(dvp, parent_aclp, td); if (error) goto out; acl_nfs4_compute_inherited_acl(parent_aclp, child_aclp, child_mode, VTOI(tvp)->i_uid, tvp->v_type == VDIR); error = ufs_setacl_nfs4_internal(tvp, child_aclp, td); if (error) goto out; out: acl_free(parent_aclp); acl_free(child_aclp); return (error); } #endif /* * Mkdir system call */ static int ufs_mkdir( struct vop_mkdir_args /* { struct vnode *a_dvp; struct vnode **a_vpp; struct componentname *a_cnp; struct vattr *a_vap; } */ *ap) { struct vnode *dvp = ap->a_dvp; struct vattr *vap = ap->a_vap; struct componentname *cnp = ap->a_cnp; struct inode *ip, *dp; struct vnode *tvp; struct buf *bp; struct dirtemplate dirtemplate, *dtp; struct direct newdir; int error, dmode; long blkoff; #ifdef INVARIANTS if ((cnp->cn_flags & HASBUF) == 0) panic("ufs_mkdir: no name"); #endif dp = VTOI(dvp); error = ufs_sync_nlink(dvp, NULL); if (error != 0) goto out; dmode = vap->va_mode & 0777; dmode |= IFDIR; /* * Must simulate part of ufs_makeinode here to acquire the inode, * but not have it entered in the parent directory. The entry is * made later after writing "." and ".." entries. */ if (dp->i_effnlink < 2) { print_bad_link_count("ufs_mkdir", dvp); error = EINVAL; goto out; } if (DOINGSUJ(dvp)) { error = softdep_prelink(dvp, NULL, cnp); if (error != 0) { MPASS(error == ERELOOKUP); return (error); } } error = UFS_VALLOC(dvp, dmode, cnp->cn_cred, &tvp); if (error) goto out; vn_seqc_write_begin(tvp); ip = VTOI(tvp); ip->i_gid = dp->i_gid; DIP_SET(ip, i_gid, dp->i_gid); #ifdef SUIDDIR { #ifdef QUOTA struct ucred ucred, *ucp; gid_t ucred_group; ucp = cnp->cn_cred; #endif /* * If we are hacking owners here, (only do this where told to) * and we are not giving it TO root, (would subvert quotas) * then go ahead and give it to the other user. * The new directory also inherits the SUID bit. * If user's UID and dir UID are the same, * 'give it away' so that the SUID is still forced on. */ if ((dvp->v_mount->mnt_flag & MNT_SUIDDIR) && (dp->i_mode & ISUID) && dp->i_uid) { dmode |= ISUID; ip->i_uid = dp->i_uid; DIP_SET(ip, i_uid, dp->i_uid); #ifdef QUOTA if (dp->i_uid != cnp->cn_cred->cr_uid) { /* * Make sure the correct user gets charged * for the space. * Make a dummy credential for the victim. * XXX This seems to never be accessed out of * our context so a stack variable is ok. */ refcount_init(&ucred.cr_ref, 1); ucred.cr_uid = ip->i_uid; ucred.cr_ngroups = 1; ucred.cr_groups = &ucred_group; ucred.cr_groups[0] = dp->i_gid; ucp = &ucred; } #endif } else { ip->i_uid = cnp->cn_cred->cr_uid; DIP_SET(ip, i_uid, ip->i_uid); } #ifdef QUOTA if ((error = getinoquota(ip)) || (error = chkiq(ip, 1, ucp, 0))) { if (DOINGSOFTDEP(tvp)) softdep_revert_link(dp, ip); UFS_VFREE(tvp, ip->i_number, dmode); vn_seqc_write_end(tvp); vgone(tvp); vput(tvp); return (error); } #endif } #else /* !SUIDDIR */ ip->i_uid = cnp->cn_cred->cr_uid; DIP_SET(ip, i_uid, ip->i_uid); #ifdef QUOTA if ((error = getinoquota(ip)) || (error = chkiq(ip, 1, cnp->cn_cred, 0))) { if (DOINGSOFTDEP(tvp)) softdep_revert_link(dp, ip); UFS_VFREE(tvp, ip->i_number, dmode); vn_seqc_write_end(tvp); vgone(tvp); vput(tvp); return (error); } #endif #endif /* !SUIDDIR */ UFS_INODE_SET_FLAG(ip, IN_ACCESS | IN_CHANGE | IN_UPDATE); UFS_INODE_SET_MODE(ip, dmode); DIP_SET(ip, i_mode, dmode); tvp->v_type = VDIR; /* Rest init'd in getnewvnode(). */ ip->i_effnlink = 2; ip->i_nlink = 2; DIP_SET(ip, i_nlink, 2); if (cnp->cn_flags & ISWHITEOUT) { ip->i_flags |= UF_OPAQUE; DIP_SET(ip, i_flags, ip->i_flags); } /* * Bump link count in parent directory to reflect work done below. * Should be done before reference is created so cleanup is * possible if we crash. */ dp->i_effnlink++; dp->i_nlink++; DIP_SET(dp, i_nlink, dp->i_nlink); UFS_INODE_SET_FLAG(dp, IN_CHANGE); if (DOINGSOFTDEP(dvp)) softdep_setup_mkdir(dp, ip); error = UFS_UPDATE(dvp, !DOINGSOFTDEP(dvp) && !DOINGASYNC(dvp)); if (error) goto bad; #ifdef MAC if (dvp->v_mount->mnt_flag & MNT_MULTILABEL) { error = mac_vnode_create_extattr(cnp->cn_cred, dvp->v_mount, dvp, tvp, cnp); if (error) goto bad; } #endif #ifdef UFS_ACL if (dvp->v_mount->mnt_flag & MNT_ACLS) { error = ufs_do_posix1e_acl_inheritance_dir(dvp, tvp, dmode, cnp->cn_cred, cnp->cn_thread); if (error) goto bad; } else if (dvp->v_mount->mnt_flag & MNT_NFS4ACLS) { error = ufs_do_nfs4_acl_inheritance(dvp, tvp, dmode, cnp->cn_cred, cnp->cn_thread); if (error) goto bad; } #endif /* !UFS_ACL */ /* * Initialize directory with "." and ".." from static template. */ if (!OFSFMT(dvp)) dtp = &mastertemplate; else dtp = (struct dirtemplate *)&omastertemplate; dirtemplate = *dtp; dirtemplate.dot_ino = ip->i_number; dirtemplate.dotdot_ino = dp->i_number; vnode_pager_setsize(tvp, DIRBLKSIZ); if ((error = UFS_BALLOC(tvp, (off_t)0, DIRBLKSIZ, cnp->cn_cred, BA_CLRBUF, &bp)) != 0) goto bad; ip->i_size = DIRBLKSIZ; DIP_SET(ip, i_size, DIRBLKSIZ); UFS_INODE_SET_FLAG(ip, IN_SIZEMOD | IN_CHANGE | IN_UPDATE); bcopy((caddr_t)&dirtemplate, (caddr_t)bp->b_data, sizeof dirtemplate); if (DOINGSOFTDEP(tvp)) { /* * Ensure that the entire newly allocated block is a * valid directory so that future growth within the * block does not have to ensure that the block is * written before the inode. */ blkoff = DIRBLKSIZ; while (blkoff < bp->b_bcount) { ((struct direct *) (bp->b_data + blkoff))->d_reclen = DIRBLKSIZ; blkoff += DIRBLKSIZ; } } if ((error = UFS_UPDATE(tvp, !DOINGSOFTDEP(tvp) && !DOINGASYNC(tvp))) != 0) { (void)bwrite(bp); goto bad; } /* * Directory set up, now install its entry in the parent directory. * * If we are not doing soft dependencies, then we must write out the * buffer containing the new directory body before entering the new * name in the parent. If we are doing soft dependencies, then the * buffer containing the new directory body will be passed to and * released in the soft dependency code after the code has attached * an appropriate ordering dependency to the buffer which ensures that * the buffer is written before the new name is written in the parent. */ if (DOINGASYNC(dvp)) bdwrite(bp); else if (!DOINGSOFTDEP(dvp) && ((error = bwrite(bp)))) goto bad; ufs_makedirentry(ip, cnp, &newdir); error = ufs_direnter(dvp, tvp, &newdir, cnp, bp); bad: if (error == 0) { *ap->a_vpp = tvp; vn_seqc_write_end(tvp); } else { dp->i_effnlink--; dp->i_nlink--; DIP_SET(dp, i_nlink, dp->i_nlink); UFS_INODE_SET_FLAG(dp, IN_CHANGE); /* * No need to do an explicit VOP_TRUNCATE here, vrele will * do this for us because we set the link count to 0. */ ip->i_effnlink = 0; ip->i_nlink = 0; DIP_SET(ip, i_nlink, 0); UFS_INODE_SET_FLAG(ip, IN_CHANGE); if (DOINGSOFTDEP(tvp)) softdep_revert_mkdir(dp, ip); vn_seqc_write_end(tvp); vgone(tvp); vput(tvp); } out: return (error); } /* * Rmdir system call. */ static int ufs_rmdir( struct vop_rmdir_args /* { struct vnode *a_dvp; struct vnode *a_vp; struct componentname *a_cnp; } */ *ap) { struct vnode *vp = ap->a_vp; struct vnode *dvp = ap->a_dvp; struct componentname *cnp = ap->a_cnp; struct inode *ip, *dp; int error; ip = VTOI(vp); dp = VTOI(dvp); /* * Do not remove a directory that is in the process of being renamed. * Verify the directory is empty (and valid). Rmdir ".." will not be * valid since ".." will contain a reference to the current directory * and thus be non-empty. Do not allow the removal of mounted on * directories (this can happen when an NFS exported filesystem * tries to remove a locally mounted on directory). */ error = 0; if (dp->i_effnlink <= 2) { if (dp->i_effnlink == 2) print_bad_link_count("ufs_rmdir", dvp); error = EINVAL; goto out; } if (!ufs_dirempty(ip, dp->i_number, cnp->cn_cred)) { error = ENOTEMPTY; goto out; } if ((dp->i_flags & APPEND) || (ip->i_flags & (NOUNLINK | IMMUTABLE | APPEND))) { error = EPERM; goto out; } if (vp->v_mountedhere != 0) { error = EINVAL; goto out; } if (DOINGSUJ(dvp)) { error = softdep_prelink(dvp, vp, cnp); if (error != 0) { MPASS(error == ERELOOKUP); return (error); } } #ifdef UFS_GJOURNAL ufs_gjournal_orphan(vp); #endif /* * Delete reference to directory before purging * inode. If we crash in between, the directory * will be reattached to lost+found, */ dp->i_effnlink--; ip->i_effnlink--; if (DOINGSOFTDEP(vp)) softdep_setup_rmdir(dp, ip); error = ufs_dirremove(dvp, ip, cnp->cn_flags, 1); if (error) { dp->i_effnlink++; ip->i_effnlink++; if (DOINGSOFTDEP(vp)) softdep_revert_rmdir(dp, ip); goto out; } /* * The only stuff left in the directory is "." and "..". The "." * reference is inconsequential since we are quashing it. The soft * dependency code will arrange to do these operations after * the parent directory entry has been deleted on disk, so * when running with that code we avoid doing them now. */ if (!DOINGSOFTDEP(vp)) { dp->i_nlink--; DIP_SET(dp, i_nlink, dp->i_nlink); UFS_INODE_SET_FLAG(dp, IN_CHANGE); error = UFS_UPDATE(dvp, 0); ip->i_nlink--; DIP_SET(ip, i_nlink, ip->i_nlink); UFS_INODE_SET_FLAG(ip, IN_CHANGE); } cache_vop_rmdir(dvp, vp); #ifdef UFS_DIRHASH /* Kill any active hash; i_effnlink == 0, so it will not come back. */ if (ip->i_dirhash != NULL) ufsdirhash_free(ip); #endif out: return (error); } /* * symlink -- make a symbolic link */ static int ufs_symlink( struct vop_symlink_args /* { struct vnode *a_dvp; struct vnode **a_vpp; struct componentname *a_cnp; struct vattr *a_vap; const char *a_target; } */ *ap) { struct vnode *vp, **vpp = ap->a_vpp; struct inode *ip; int len, error; error = ufs_makeinode(IFLNK | ap->a_vap->va_mode, ap->a_dvp, vpp, ap->a_cnp, "ufs_symlink"); if (error) return (error); vp = *vpp; len = strlen(ap->a_target); if (len < VFSTOUFS(vp->v_mount)->um_maxsymlinklen) { ip = VTOI(vp); - bcopy(ap->a_target, SHORTLINK(ip), len); + bcopy(ap->a_target, DIP(ip, i_shortlink), len); ip->i_size = len; DIP_SET(ip, i_size, len); UFS_INODE_SET_FLAG(ip, IN_SIZEMOD | IN_CHANGE | IN_UPDATE); error = UFS_UPDATE(vp, 0); } else error = vn_rdwr(UIO_WRITE, vp, __DECONST(void *, ap->a_target), len, (off_t)0, UIO_SYSSPACE, IO_NODELOCKED | IO_NOMACCHECK, ap->a_cnp->cn_cred, NOCRED, NULL, NULL); if (error) vput(vp); return (error); } /* * Vnode op for reading directories. */ int ufs_readdir( struct vop_readdir_args /* { struct vnode *a_vp; struct uio *a_uio; struct ucred *a_cred; int *a_eofflag; int *a_ncookies; u_long **a_cookies; } */ *ap) { struct vnode *vp = ap->a_vp; struct uio *uio = ap->a_uio; struct buf *bp; struct inode *ip; struct direct *dp, *edp; u_long *cookies; struct dirent dstdp; off_t offset, startoffset; size_t readcnt, skipcnt; ssize_t startresid; u_int ncookies; int error; if (uio->uio_offset < 0) return (EINVAL); ip = VTOI(vp); if (ip->i_effnlink == 0) return (0); if (ap->a_ncookies != NULL) { if (uio->uio_resid < 0) ncookies = 0; else ncookies = uio->uio_resid; if (uio->uio_offset >= ip->i_size) ncookies = 0; else if (ip->i_size - uio->uio_offset < ncookies) ncookies = ip->i_size - uio->uio_offset; ncookies = ncookies / (offsetof(struct direct, d_name) + 4) + 1; cookies = malloc(ncookies * sizeof(*cookies), M_TEMP, M_WAITOK); *ap->a_ncookies = ncookies; *ap->a_cookies = cookies; } else { ncookies = 0; cookies = NULL; } offset = startoffset = uio->uio_offset; startresid = uio->uio_resid; error = 0; while (error == 0 && uio->uio_resid > 0 && uio->uio_offset < ip->i_size) { error = UFS_BLKATOFF(vp, uio->uio_offset, NULL, &bp); if (error) break; if (bp->b_offset + bp->b_bcount > ip->i_size) readcnt = ip->i_size - bp->b_offset; else readcnt = bp->b_bcount; skipcnt = (size_t)(uio->uio_offset - bp->b_offset) & ~(size_t)(DIRBLKSIZ - 1); offset = bp->b_offset + skipcnt; dp = (struct direct *)&bp->b_data[skipcnt]; edp = (struct direct *)&bp->b_data[readcnt]; while (error == 0 && uio->uio_resid > 0 && dp < edp) { if (dp->d_reclen <= offsetof(struct direct, d_name) || (caddr_t)dp + dp->d_reclen > (caddr_t)edp) { error = EIO; break; } #if BYTE_ORDER == LITTLE_ENDIAN /* Old filesystem format. */ if (OFSFMT(vp)) { dstdp.d_namlen = dp->d_type; dstdp.d_type = dp->d_namlen; } else #endif { dstdp.d_namlen = dp->d_namlen; dstdp.d_type = dp->d_type; } if (offsetof(struct direct, d_name) + dstdp.d_namlen > dp->d_reclen) { error = EIO; break; } if (offset < startoffset || dp->d_ino == 0) goto nextentry; dstdp.d_fileno = dp->d_ino; dstdp.d_reclen = GENERIC_DIRSIZ(&dstdp); bcopy(dp->d_name, dstdp.d_name, dstdp.d_namlen); /* NOTE: d_off is the offset of the *next* entry. */ dstdp.d_off = offset + dp->d_reclen; dirent_terminate(&dstdp); if (dstdp.d_reclen > uio->uio_resid) { if (uio->uio_resid == startresid) error = EINVAL; else error = EJUSTRETURN; break; } /* Advance dp. */ error = uiomove((caddr_t)&dstdp, dstdp.d_reclen, uio); if (error) break; if (cookies != NULL) { KASSERT(ncookies > 0, ("ufs_readdir: cookies buffer too small")); *cookies = offset + dp->d_reclen; cookies++; ncookies--; } nextentry: offset += dp->d_reclen; dp = (struct direct *)((caddr_t)dp + dp->d_reclen); } bqrelse(bp); uio->uio_offset = offset; } /* We need to correct uio_offset. */ uio->uio_offset = offset; if (error == EJUSTRETURN) error = 0; if (ap->a_ncookies != NULL) { if (error == 0) { *ap->a_ncookies -= ncookies; } else { free(*ap->a_cookies, M_TEMP); *ap->a_ncookies = 0; *ap->a_cookies = NULL; } } if (error == 0 && ap->a_eofflag) *ap->a_eofflag = ip->i_size <= uio->uio_offset; return (error); } /* * Return target name of a symbolic link */ static int ufs_readlink( struct vop_readlink_args /* { struct vnode *a_vp; struct uio *a_uio; struct ucred *a_cred; } */ *ap) { struct vnode *vp = ap->a_vp; struct inode *ip = VTOI(vp); doff_t isize; isize = ip->i_size; if (isize < VFSTOUFS(vp->v_mount)->um_maxsymlinklen) - return (uiomove(SHORTLINK(ip), isize, ap->a_uio)); + return (uiomove(DIP(ip, i_shortlink), isize, ap->a_uio)); return (VOP_READ(vp, ap->a_uio, 0, ap->a_cred)); } /* * Calculate the logical to physical mapping if not done already, * then call the device strategy routine. * * In order to be able to swap to a file, the ufs_bmaparray() operation may not * deadlock on memory. See ufs_bmap() for details. */ static int ufs_strategy( struct vop_strategy_args /* { struct vnode *a_vp; struct buf *a_bp; } */ *ap) { struct buf *bp = ap->a_bp; struct vnode *vp = ap->a_vp; ufs2_daddr_t blkno; int error; if (bp->b_blkno == bp->b_lblkno) { error = ufs_bmaparray(vp, bp->b_lblkno, &blkno, bp, NULL, NULL); bp->b_blkno = blkno; if (error) { bp->b_error = error; bp->b_ioflags |= BIO_ERROR; bufdone(bp); return (0); } if ((long)bp->b_blkno == -1) vfs_bio_clrbuf(bp); } if ((long)bp->b_blkno == -1) { bufdone(bp); return (0); } bp->b_iooffset = dbtob(bp->b_blkno); BO_STRATEGY(VFSTOUFS(vp->v_mount)->um_bo, bp); return (0); } /* * Print out the contents of an inode. */ static int ufs_print( struct vop_print_args /* { struct vnode *a_vp; } */ *ap) { struct vnode *vp = ap->a_vp; struct inode *ip = VTOI(vp); printf("\tnlink=%d, effnlink=%d, size=%jd", ip->i_nlink, ip->i_effnlink, (intmax_t)ip->i_size); if (I_IS_UFS2(ip)) printf(", extsize %d", ip->i_din2->di_extsize); printf("\n\tgeneration=%jx, uid=%d, gid=%d, flags=0x%b\n", (uintmax_t)ip->i_gen, ip->i_uid, ip->i_gid, (u_int)ip->i_flags, PRINT_INODE_FLAGS); printf("\tino %lu, on dev %s", (u_long)ip->i_number, devtoname(ITODEV(ip))); if (vp->v_type == VFIFO) fifo_printinfo(vp); printf("\n"); return (0); } /* * Close wrapper for fifos. * * Update the times on the inode then do device close. */ static int ufsfifo_close( struct vop_close_args /* { struct vnode *a_vp; int a_fflag; struct ucred *a_cred; struct thread *a_td; } */ *ap) { struct vnode *vp = ap->a_vp; int usecount; VI_LOCK(vp); usecount = vp->v_usecount; if (usecount > 1) ufs_itimes_locked(vp); VI_UNLOCK(vp); return (fifo_specops.vop_close(ap)); } /* * Return POSIX pathconf information applicable to ufs filesystems. */ static int ufs_pathconf( struct vop_pathconf_args /* { struct vnode *a_vp; int a_name; int *a_retval; } */ *ap) { int error; error = 0; switch (ap->a_name) { case _PC_LINK_MAX: *ap->a_retval = UFS_LINK_MAX; break; case _PC_NAME_MAX: *ap->a_retval = UFS_MAXNAMLEN; break; case _PC_PIPE_BUF: if (ap->a_vp->v_type == VDIR || ap->a_vp->v_type == VFIFO) *ap->a_retval = PIPE_BUF; else error = EINVAL; break; case _PC_CHOWN_RESTRICTED: *ap->a_retval = 1; break; case _PC_NO_TRUNC: *ap->a_retval = 1; break; #ifdef UFS_ACL case _PC_ACL_EXTENDED: if (ap->a_vp->v_mount->mnt_flag & MNT_ACLS) *ap->a_retval = 1; else *ap->a_retval = 0; break; case _PC_ACL_NFS4: if (ap->a_vp->v_mount->mnt_flag & MNT_NFS4ACLS) *ap->a_retval = 1; else *ap->a_retval = 0; break; #endif case _PC_ACL_PATH_MAX: #ifdef UFS_ACL if (ap->a_vp->v_mount->mnt_flag & (MNT_ACLS | MNT_NFS4ACLS)) *ap->a_retval = ACL_MAX_ENTRIES; else *ap->a_retval = 3; #else *ap->a_retval = 3; #endif break; #ifdef MAC case _PC_MAC_PRESENT: if (ap->a_vp->v_mount->mnt_flag & MNT_MULTILABEL) *ap->a_retval = 1; else *ap->a_retval = 0; break; #endif case _PC_MIN_HOLE_SIZE: *ap->a_retval = ap->a_vp->v_mount->mnt_stat.f_iosize; break; case _PC_PRIO_IO: *ap->a_retval = 0; break; case _PC_SYNC_IO: *ap->a_retval = 0; break; case _PC_ALLOC_SIZE_MIN: *ap->a_retval = ap->a_vp->v_mount->mnt_stat.f_bsize; break; case _PC_FILESIZEBITS: *ap->a_retval = 64; break; case _PC_REC_INCR_XFER_SIZE: *ap->a_retval = ap->a_vp->v_mount->mnt_stat.f_iosize; break; case _PC_REC_MAX_XFER_SIZE: *ap->a_retval = -1; /* means ``unlimited'' */ break; case _PC_REC_MIN_XFER_SIZE: *ap->a_retval = ap->a_vp->v_mount->mnt_stat.f_iosize; break; case _PC_REC_XFER_ALIGN: *ap->a_retval = PAGE_SIZE; break; case _PC_SYMLINK_MAX: *ap->a_retval = MAXPATHLEN; break; default: error = vop_stdpathconf(ap); break; } return (error); } /* * Initialize the vnode associated with a new inode, handle aliased * vnodes. */ int ufs_vinit(struct mount *mntp, struct vop_vector *fifoops, struct vnode **vpp) { struct inode *ip; struct vnode *vp; vp = *vpp; ASSERT_VOP_LOCKED(vp, "ufs_vinit"); ip = VTOI(vp); vp->v_type = IFTOVT(ip->i_mode); /* * Only unallocated inodes should be of type VNON. */ if (ip->i_mode != 0 && vp->v_type == VNON) return (EINVAL); if (vp->v_type == VFIFO) vp->v_op = fifoops; if (ip->i_number == UFS_ROOTINO) vp->v_vflag |= VV_ROOT; *vpp = vp; return (0); } /* * Allocate a new inode. * Vnode dvp must be locked. */ static int ufs_makeinode(int mode, struct vnode *dvp, struct vnode **vpp, struct componentname *cnp, const char *callfunc) { struct inode *ip, *pdir; struct direct newdir; struct vnode *tvp; int error; pdir = VTOI(dvp); #ifdef INVARIANTS if ((cnp->cn_flags & HASBUF) == 0) panic("%s: no name", callfunc); #endif *vpp = NULL; if ((mode & IFMT) == 0) mode |= IFREG; if (pdir->i_effnlink < 2) { print_bad_link_count(callfunc, dvp); return (EINVAL); } if (DOINGSUJ(dvp)) { error = softdep_prelink(dvp, NULL, cnp); if (error != 0) { MPASS(error == ERELOOKUP); return (error); } } error = UFS_VALLOC(dvp, mode, cnp->cn_cred, &tvp); if (error) return (error); ip = VTOI(tvp); ip->i_gid = pdir->i_gid; DIP_SET(ip, i_gid, pdir->i_gid); #ifdef SUIDDIR { #ifdef QUOTA struct ucred ucred, *ucp; gid_t ucred_group; ucp = cnp->cn_cred; #endif /* * If we are not the owner of the directory, * and we are hacking owners here, (only do this where told to) * and we are not giving it TO root, (would subvert quotas) * then go ahead and give it to the other user. * Note that this drops off the execute bits for security. */ if ((dvp->v_mount->mnt_flag & MNT_SUIDDIR) && (pdir->i_mode & ISUID) && (pdir->i_uid != cnp->cn_cred->cr_uid) && pdir->i_uid) { ip->i_uid = pdir->i_uid; DIP_SET(ip, i_uid, ip->i_uid); mode &= ~07111; #ifdef QUOTA /* * Make sure the correct user gets charged * for the space. * Quickly knock up a dummy credential for the victim. * XXX This seems to never be accessed out of our * context so a stack variable is ok. */ refcount_init(&ucred.cr_ref, 1); ucred.cr_uid = ip->i_uid; ucred.cr_ngroups = 1; ucred.cr_groups = &ucred_group; ucred.cr_groups[0] = pdir->i_gid; ucp = &ucred; #endif } else { ip->i_uid = cnp->cn_cred->cr_uid; DIP_SET(ip, i_uid, ip->i_uid); } #ifdef QUOTA if ((error = getinoquota(ip)) || (error = chkiq(ip, 1, ucp, 0))) { if (DOINGSOFTDEP(tvp)) softdep_revert_link(pdir, ip); UFS_VFREE(tvp, ip->i_number, mode); vgone(tvp); vput(tvp); return (error); } #endif } #else /* !SUIDDIR */ ip->i_uid = cnp->cn_cred->cr_uid; DIP_SET(ip, i_uid, ip->i_uid); #ifdef QUOTA if ((error = getinoquota(ip)) || (error = chkiq(ip, 1, cnp->cn_cred, 0))) { if (DOINGSOFTDEP(tvp)) softdep_revert_link(pdir, ip); UFS_VFREE(tvp, ip->i_number, mode); vgone(tvp); vput(tvp); return (error); } #endif #endif /* !SUIDDIR */ vn_seqc_write_begin(tvp); /* Mostly to cover asserts */ UFS_INODE_SET_FLAG(ip, IN_ACCESS | IN_CHANGE | IN_UPDATE); UFS_INODE_SET_MODE(ip, mode); DIP_SET(ip, i_mode, mode); tvp->v_type = IFTOVT(mode); /* Rest init'd in getnewvnode(). */ ip->i_effnlink = 1; ip->i_nlink = 1; DIP_SET(ip, i_nlink, 1); if (DOINGSOFTDEP(tvp)) softdep_setup_create(VTOI(dvp), ip); if ((ip->i_mode & ISGID) && !groupmember(ip->i_gid, cnp->cn_cred) && priv_check_cred(cnp->cn_cred, PRIV_VFS_SETGID)) { UFS_INODE_SET_MODE(ip, ip->i_mode & ~ISGID); DIP_SET(ip, i_mode, ip->i_mode); } if (cnp->cn_flags & ISWHITEOUT) { ip->i_flags |= UF_OPAQUE; DIP_SET(ip, i_flags, ip->i_flags); } /* * Make sure inode goes to disk before directory entry. */ error = UFS_UPDATE(tvp, !DOINGSOFTDEP(tvp) && !DOINGASYNC(tvp)); if (error) goto bad; #ifdef MAC if (dvp->v_mount->mnt_flag & MNT_MULTILABEL) { error = mac_vnode_create_extattr(cnp->cn_cred, dvp->v_mount, dvp, tvp, cnp); if (error) goto bad; } #endif #ifdef UFS_ACL if (dvp->v_mount->mnt_flag & MNT_ACLS) { error = ufs_do_posix1e_acl_inheritance_file(dvp, tvp, mode, cnp->cn_cred, cnp->cn_thread); if (error) goto bad; } else if (dvp->v_mount->mnt_flag & MNT_NFS4ACLS) { error = ufs_do_nfs4_acl_inheritance(dvp, tvp, mode, cnp->cn_cred, cnp->cn_thread); if (error) goto bad; } #endif /* !UFS_ACL */ ufs_makedirentry(ip, cnp, &newdir); error = ufs_direnter(dvp, tvp, &newdir, cnp, NULL); if (error) goto bad; vn_seqc_write_end(tvp); *vpp = tvp; return (0); bad: /* * Write error occurred trying to update the inode * or the directory so must deallocate the inode. */ ip->i_effnlink = 0; ip->i_nlink = 0; DIP_SET(ip, i_nlink, 0); UFS_INODE_SET_FLAG(ip, IN_CHANGE); if (DOINGSOFTDEP(tvp)) softdep_revert_create(VTOI(dvp), ip); vn_seqc_write_end(tvp); vgone(tvp); vput(tvp); return (error); } static int ufs_ioctl(struct vop_ioctl_args *ap) { struct vnode *vp; int error; vp = ap->a_vp; switch (ap->a_command) { case FIOSEEKDATA: error = vn_lock(vp, LK_SHARED); if (error == 0) { error = ufs_bmap_seekdata(vp, (off_t *)ap->a_data); VOP_UNLOCK(vp); } else error = EBADF; return (error); case FIOSEEKHOLE: return (vn_bmap_seekhole(vp, ap->a_command, (off_t *)ap->a_data, ap->a_cred)); default: return (ENOTTY); } } static int ufs_read_pgcache(struct vop_read_pgcache_args *ap) { struct uio *uio; struct vnode *vp; uio = ap->a_uio; vp = ap->a_vp; VNPASS((vn_irflag_read(vp) & VIRF_PGREAD) != 0, vp); if (uio->uio_resid > ptoa(io_hold_cnt) || uio->uio_offset < 0 || (ap->a_ioflag & IO_DIRECT) != 0) return (EJUSTRETURN); return (vn_read_from_obj(vp, uio)); } /* Global vfs data structures for ufs. */ struct vop_vector ufs_vnodeops = { .vop_default = &default_vnodeops, .vop_fsync = VOP_PANIC, .vop_read = VOP_PANIC, .vop_reallocblks = VOP_PANIC, .vop_write = VOP_PANIC, .vop_accessx = ufs_accessx, .vop_bmap = ufs_bmap, .vop_fplookup_vexec = ufs_fplookup_vexec, .vop_fplookup_symlink = VOP_EAGAIN, .vop_cachedlookup = ufs_lookup, .vop_close = ufs_close, .vop_create = ufs_create, .vop_stat = ufs_stat, .vop_getattr = ufs_getattr, .vop_inactive = ufs_inactive, .vop_ioctl = ufs_ioctl, .vop_link = ufs_link, .vop_lookup = vfs_cache_lookup, .vop_mmapped = ufs_mmapped, .vop_mkdir = ufs_mkdir, .vop_mknod = ufs_mknod, .vop_need_inactive = ufs_need_inactive, .vop_open = ufs_open, .vop_pathconf = ufs_pathconf, .vop_poll = vop_stdpoll, .vop_print = ufs_print, .vop_read_pgcache = ufs_read_pgcache, .vop_readdir = ufs_readdir, .vop_readlink = ufs_readlink, .vop_reclaim = ufs_reclaim, .vop_remove = ufs_remove, .vop_rename = ufs_rename, .vop_rmdir = ufs_rmdir, .vop_setattr = ufs_setattr, #ifdef MAC .vop_setlabel = vop_stdsetlabel_ea, #endif .vop_strategy = ufs_strategy, .vop_symlink = ufs_symlink, .vop_whiteout = ufs_whiteout, #ifdef UFS_EXTATTR .vop_getextattr = ufs_getextattr, .vop_deleteextattr = ufs_deleteextattr, .vop_setextattr = ufs_setextattr, #endif #ifdef UFS_ACL .vop_getacl = ufs_getacl, .vop_setacl = ufs_setacl, .vop_aclcheck = ufs_aclcheck, #endif }; VFS_VOP_VECTOR_REGISTER(ufs_vnodeops); struct vop_vector ufs_fifoops = { .vop_default = &fifo_specops, .vop_fsync = VOP_PANIC, .vop_accessx = ufs_accessx, .vop_close = ufsfifo_close, .vop_getattr = ufs_getattr, .vop_inactive = ufs_inactive, .vop_pathconf = ufs_pathconf, .vop_print = ufs_print, .vop_read = VOP_PANIC, .vop_reclaim = ufs_reclaim, .vop_setattr = ufs_setattr, #ifdef MAC .vop_setlabel = vop_stdsetlabel_ea, #endif .vop_write = VOP_PANIC, #ifdef UFS_EXTATTR .vop_getextattr = ufs_getextattr, .vop_deleteextattr = ufs_deleteextattr, .vop_setextattr = ufs_setextattr, #endif #ifdef UFS_ACL .vop_getacl = ufs_getacl, .vop_setacl = ufs_setacl, .vop_aclcheck = ufs_aclcheck, #endif }; VFS_VOP_VECTOR_REGISTER(ufs_fifoops); diff --git a/tools/diag/prtblknos/prtblknos.c b/tools/diag/prtblknos/prtblknos.c index 771d79e43ec9..ae53471156a6 100644 --- a/tools/diag/prtblknos/prtblknos.c +++ b/tools/diag/prtblknos/prtblknos.c @@ -1,322 +1,322 @@ /* * Copyright (c) 1998, 2003, 2013, 2018 Marshall Kirk McKusick. * All Rights Reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY MARSHALL KIRK MCKUSICK ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL MARSHALL KIRK MCKUSICK BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * $FreeBSD$ */ #include #include #include #include #include #include #ifdef PRTBLKNOS union dinode { struct ufs1_dinode dp1; struct ufs2_dinode dp2; }; extern struct uufsd disk; #else /* used by fsdb */ #include static struct bufarea *bp; #endif void prtblknos(struct fs *fs, union dinode *dp); static const char *distance(struct fs *, ufs2_daddr_t, ufs2_daddr_t); static void printblk(struct fs *, ufs_lbn_t, ufs2_daddr_t, int, ufs_lbn_t); static void indirprt(struct fs *, int, ufs_lbn_t, ufs_lbn_t, ufs2_daddr_t, ufs_lbn_t); void prtblknos(fs, dp) struct fs *fs; union dinode *dp; { int i, mode, frags; ufs_lbn_t lbn, lastlbn, len, blksperindir; ufs2_daddr_t blkno; off_t size; if (fs->fs_magic == FS_UFS1_MAGIC) { size = dp->dp1.di_size; mode = dp->dp1.di_mode; } else { size = dp->dp2.di_size; mode = dp->dp2.di_mode; } switch (mode & IFMT) { default: printf("unknown inode type 0%d\n", (mode & IFMT)); return; case 0: printf("unallocated inode\n"); return; case IFIFO: printf("fifo\n"); return; case IFCHR: printf("character device\n"); return; case IFBLK: printf("block device\n"); return; case IFSOCK: printf("socket\n"); return; case IFWHT: printf("whiteout\n"); return; case IFLNK: if (size == 0) { printf("empty symbolic link\n"); return; } if (size < fs->fs_maxsymlinklen) { printf("symbolic link referencing %s\n", (fs->fs_magic == FS_UFS1_MAGIC) ? - (char *)dp->dp1.di_db : - (char *)dp->dp2.di_db); + dp->dp1.di_shortlink : + dp->dp2.di_shortlink); return; } printf("symbolic link\n"); break; case IFREG: if (size == 0) { printf("empty file\n"); return; } printf("regular file, size %jd\n", (intmax_t)size); break; case IFDIR: if (size == 0) { printf("empty directory\n"); return; } printf("directory, size %jd\n", (intmax_t)size); break; } lastlbn = howmany(size, fs->fs_bsize); len = lastlbn < UFS_NDADDR ? lastlbn : UFS_NDADDR; for (i = 0; i < len; i++) { if (i < lastlbn - 1) frags = fs->fs_frag; else frags = howmany(size - (lastlbn - 1) * fs->fs_bsize, fs->fs_fsize); if (fs->fs_magic == FS_UFS1_MAGIC) blkno = dp->dp1.di_db[i]; else blkno = dp->dp2.di_db[i]; printblk(fs, i, blkno, frags, lastlbn); } blksperindir = 1; len = lastlbn - UFS_NDADDR; lbn = UFS_NDADDR; for (i = 0; len > 0 && i < UFS_NIADDR; i++) { if (fs->fs_magic == FS_UFS1_MAGIC) blkno = dp->dp1.di_ib[i]; else blkno = dp->dp2.di_ib[i]; indirprt(fs, i, blksperindir, lbn, blkno, lastlbn); blksperindir *= NINDIR(fs); lbn += blksperindir; len -= blksperindir; } /* dummy print to flush out last extent */ printblk(fs, lastlbn, 0, frags, 0); } static void indirprt(fs, level, blksperindir, lbn, blkno, lastlbn) struct fs *fs; int level; ufs_lbn_t blksperindir; ufs_lbn_t lbn; ufs2_daddr_t blkno; ufs_lbn_t lastlbn; { char indir[MAXBSIZE]; ufs_lbn_t i, last; if (blkno == 0) { printblk(fs, lbn, blkno, blksperindir * NINDIR(fs) * fs->fs_frag, lastlbn); return; } printblk(fs, lbn, blkno, fs->fs_frag, -level); /* read in the indirect block. */ #ifdef PRTBLKNOS if (bread(&disk, fsbtodb(fs, blkno), indir, fs->fs_bsize) == -1) { #else /* used by fsdb */ bp = getdatablk(blkno, fs->fs_bsize, BT_LEVEL1 + level); if (bp->b_errs == 0) { memcpy(indir, bp->b_un.b_buf, fs->fs_bsize); } else { #endif warn("Read of indirect block %jd failed", (intmax_t)blkno); /* List the unreadable part as a hole */ printblk(fs, lbn, 0, blksperindir * NINDIR(fs) * fs->fs_frag, lastlbn); return; } last = howmany(lastlbn - lbn, blksperindir) < NINDIR(fs) ? howmany(lastlbn - lbn, blksperindir) : NINDIR(fs); if (blksperindir == 1) { for (i = 0; i < last; i++) { if (fs->fs_magic == FS_UFS1_MAGIC) blkno = ((ufs1_daddr_t *)indir)[i]; else blkno = ((ufs2_daddr_t *)indir)[i]; printblk(fs, lbn + i, blkno, fs->fs_frag, lastlbn); } return; } for (i = 0; i < last; i++) { if (fs->fs_magic == FS_UFS1_MAGIC) blkno = ((ufs1_daddr_t *)indir)[i]; else blkno = ((ufs2_daddr_t *)indir)[i]; indirprt(fs, level - 1, blksperindir / NINDIR(fs), lbn + blksperindir * i, blkno, lastlbn); } } static const char * distance(fs, lastblk, firstblk) struct fs *fs; ufs2_daddr_t lastblk; ufs2_daddr_t firstblk; { ufs2_daddr_t delta; int firstcg, lastcg; static char buf[100]; if (lastblk == 0) return (""); delta = firstblk - lastblk - 1; firstcg = dtog(fs, firstblk); lastcg = dtog(fs, lastblk); if (firstcg == lastcg) { snprintf(buf, 100, " distance %jd", (intmax_t)delta); return (&buf[0]); } snprintf(buf, 100, " cg %d blk %jd to cg %d blk %jd", lastcg, (intmax_t)dtogd(fs, lastblk), firstcg, (intmax_t)dtogd(fs, firstblk)); return (&buf[0]); } static const char *indirname[UFS_NIADDR] = { "First", "Second", "Third" }; static void printblk(fs, lbn, blkno, numfrags, lastlbn) struct fs *fs; ufs_lbn_t lbn; ufs2_daddr_t blkno; int numfrags; ufs_lbn_t lastlbn; { static int seq; static ufs2_daddr_t totfrags, lastindirblk, lastblk, firstblk; if (lastlbn <= 0) goto flush; if (seq == 0) { seq = howmany(numfrags, fs->fs_frag); totfrags = numfrags; firstblk = blkno; return; } if (lbn == 0) { seq = howmany(numfrags, fs->fs_frag); totfrags = numfrags; lastblk = 0; firstblk = blkno; lastindirblk = 0; return; } if (lbn < lastlbn && ((firstblk == 0 && blkno == 0) || (firstblk == BLK_NOCOPY && blkno == BLK_NOCOPY) || (firstblk == BLK_SNAP && blkno == BLK_SNAP) || blkno == firstblk + seq * fs->fs_frag)) { seq += howmany(numfrags, fs->fs_frag); totfrags += numfrags; return; } flush: if (seq == 0) goto prtindir; if (firstblk <= BLK_SNAP) { if (seq == 1) printf("\tlbn %jd %s\n", (intmax_t)(lbn - seq), firstblk == 0 ? "hole" : firstblk == BLK_NOCOPY ? "nocopy" : "snapblk"); else printf("\tlbn %jd-%jd %s\n", (intmax_t)lbn - seq, (intmax_t)lbn - 1, firstblk == 0 ? "hole" : firstblk == BLK_NOCOPY ? "nocopy" : "snapblk"); } else if (seq == 1) { if (totfrags == 1) printf("\tlbn %jd blkno %jd%s\n", (intmax_t)(lbn - seq), (intmax_t)firstblk, distance(fs, lastblk, firstblk)); else printf("\tlbn %jd blkno %jd-%jd%s\n", (intmax_t)(lbn - seq), (intmax_t)firstblk, (intmax_t)(firstblk + totfrags - 1), distance(fs, lastblk, firstblk)); lastblk = firstblk + totfrags - 1; } else { printf("\tlbn %jd-%jd blkno %jd-%jd%s\n", (intmax_t)(lbn - seq), (intmax_t)(lbn - 1), (intmax_t)firstblk, (intmax_t)(firstblk + totfrags - 1), distance(fs, lastblk, firstblk)); lastblk = firstblk + totfrags - 1; } if (lastlbn > 0 || blkno == 0) { seq = 1; totfrags = numfrags; firstblk = blkno; return; } prtindir: if (seq != 0 && (fs->fs_metaspace == 0 || lastindirblk == 0)) lastindirblk = lastblk; printf("%s-level indirect, blkno %jd-%jd%s\n", indirname[-lastlbn], (intmax_t)blkno, (intmax_t)(blkno + numfrags - 1), distance(fs, lastindirblk, blkno)); lastindirblk = blkno + numfrags - 1; if (fs->fs_metaspace == 0) lastblk = lastindirblk; seq = 0; } diff --git a/usr.sbin/makefs/ffs.c b/usr.sbin/makefs/ffs.c index d91ed156c9e0..1911f75fb4fd 100644 --- a/usr.sbin/makefs/ffs.c +++ b/usr.sbin/makefs/ffs.c @@ -1,1205 +1,1205 @@ /* $NetBSD: ffs.c,v 1.45 2011/10/09 22:49:26 christos Exp $ */ /*- * SPDX-License-Identifier: BSD-4-Clause * * Copyright (c) 2001 Wasabi Systems, Inc. * All rights reserved. * * Written by Luke Mewburn for Wasabi Systems, Inc. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. All advertising materials mentioning features or use of this software * must display the following acknowledgement: * This product includes software developed for the NetBSD Project by * Wasabi Systems, Inc. * 4. The name of Wasabi Systems, Inc. may not be used to endorse * or promote products derived from this software without specific prior * written permission. * * THIS SOFTWARE IS PROVIDED BY WASABI SYSTEMS, INC. ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL WASABI SYSTEMS, INC * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE * POSSIBILITY OF SUCH DAMAGE. */ /* * Copyright (c) 1982, 1986, 1989, 1993 * The Regents of the University of California. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * @(#)ffs_alloc.c 8.19 (Berkeley) 7/13/95 */ #include __FBSDID("$FreeBSD$"); #if HAVE_NBTOOL_CONFIG_H #include "nbtool_config.h" #endif #include #include #include #include #include #include #include #include #include #include #include #include #include #include "makefs.h" #include "ffs.h" #if HAVE_STRUCT_STATVFS_F_IOSIZE && HAVE_FSTATVFS #include #endif #include #include #include #include "ffs/ufs_bswap.h" #include "ffs/ufs_inode.h" #include "ffs/newfs_extern.h" #include "ffs/ffs_extern.h" #undef DIP #define DIP(dp, field) \ ((ffs_opts->version == 1) ? \ (dp)->ffs1_din.di_##field : (dp)->ffs2_din.di_##field) /* * Various file system defaults (cribbed from newfs(8)). */ #define DFL_FRAGSIZE 4096 /* fragment size */ #define DFL_BLKSIZE 32768 /* block size */ #define DFL_SECSIZE 512 /* sector size */ #define DFL_CYLSPERGROUP 65536 /* cylinders per group */ #define DFL_FRAGSPERINODE 4 /* fragments per inode */ #define DFL_ROTDELAY 0 /* rotational delay */ #define DFL_NRPOS 1 /* rotational positions */ #define DFL_RPM 3600 /* rpm of disk */ #define DFL_NSECTORS 64 /* # of sectors */ #define DFL_NTRACKS 16 /* # of tracks */ typedef struct { u_char *buf; /* buf for directory */ doff_t size; /* full size of buf */ doff_t cur; /* offset of current entry */ } dirbuf_t; static int ffs_create_image(const char *, fsinfo_t *); static void ffs_dump_fsinfo(fsinfo_t *); static void ffs_dump_dirbuf(dirbuf_t *, const char *, int); static void ffs_make_dirbuf(dirbuf_t *, const char *, fsnode *, int); static int ffs_populate_dir(const char *, fsnode *, fsinfo_t *); static void ffs_size_dir(fsnode *, fsinfo_t *); static void ffs_validate(const char *, fsnode *, fsinfo_t *); static void ffs_write_file(union dinode *, uint32_t, void *, fsinfo_t *); static void ffs_write_inode(union dinode *, uint32_t, const fsinfo_t *); static void *ffs_build_dinode1(struct ufs1_dinode *, dirbuf_t *, fsnode *, fsnode *, fsinfo_t *); static void *ffs_build_dinode2(struct ufs2_dinode *, dirbuf_t *, fsnode *, fsnode *, fsinfo_t *); /* publicly visible functions */ void ffs_prep_opts(fsinfo_t *fsopts) { ffs_opt_t *ffs_opts = ecalloc(1, sizeof(*ffs_opts)); const option_t ffs_options[] = { { 'b', "bsize", &ffs_opts->bsize, OPT_INT32, 1, INT_MAX, "block size" }, { 'f', "fsize", &ffs_opts->fsize, OPT_INT32, 1, INT_MAX, "fragment size" }, { 'd', "density", &ffs_opts->density, OPT_INT32, 1, INT_MAX, "bytes per inode" }, { 'm', "minfree", &ffs_opts->minfree, OPT_INT32, 0, 99, "minfree" }, { 'M', "maxbpg", &ffs_opts->maxbpg, OPT_INT32, 1, INT_MAX, "max blocks per file in a cg" }, { 'a', "avgfilesize", &ffs_opts->avgfilesize, OPT_INT32, 1, INT_MAX, "expected average file size" }, { 'n', "avgfpdir", &ffs_opts->avgfpdir, OPT_INT32, 1, INT_MAX, "expected # of files per directory" }, { 'x', "extent", &ffs_opts->maxbsize, OPT_INT32, 1, INT_MAX, "maximum # extent size" }, { 'g', "maxbpcg", &ffs_opts->maxblkspercg, OPT_INT32, 1, INT_MAX, "max # of blocks per group" }, { 'v', "version", &ffs_opts->version, OPT_INT32, 1, 2, "UFS version" }, { 'o', "optimization", NULL, OPT_STRBUF, 0, 0, "Optimization (time|space)" }, { 'l', "label", ffs_opts->label, OPT_STRARRAY, 1, sizeof(ffs_opts->label), "UFS label" }, { 's', "softupdates", &ffs_opts->softupdates, OPT_INT32, 0, 1, "enable softupdates" }, { .name = NULL } }; ffs_opts->bsize= -1; ffs_opts->fsize= -1; ffs_opts->cpg= -1; ffs_opts->density= -1; ffs_opts->min_inodes= false; ffs_opts->minfree= -1; ffs_opts->optimization= -1; ffs_opts->maxcontig= -1; ffs_opts->maxbpg= -1; ffs_opts->avgfilesize= -1; ffs_opts->avgfpdir= -1; ffs_opts->version = 1; ffs_opts->softupdates = 0; fsopts->fs_specific = ffs_opts; fsopts->fs_options = copy_opts(ffs_options); } void ffs_cleanup_opts(fsinfo_t *fsopts) { free(fsopts->fs_specific); free(fsopts->fs_options); } int ffs_parse_opts(const char *option, fsinfo_t *fsopts) { ffs_opt_t *ffs_opts = fsopts->fs_specific; option_t *ffs_options = fsopts->fs_options; char buf[1024]; int rv; assert(option != NULL); assert(fsopts != NULL); assert(ffs_opts != NULL); if (debug & DEBUG_FS_PARSE_OPTS) printf("ffs_parse_opts: got `%s'\n", option); rv = set_option(ffs_options, option, buf, sizeof(buf)); if (rv == -1) return 0; if (ffs_options[rv].name == NULL) abort(); switch (ffs_options[rv].letter) { case 'o': if (strcmp(buf, "time") == 0) { ffs_opts->optimization = FS_OPTTIME; } else if (strcmp(buf, "space") == 0) { ffs_opts->optimization = FS_OPTSPACE; } else { warnx("Invalid optimization `%s'", buf); return 0; } break; default: break; } return 1; } void ffs_makefs(const char *image, const char *dir, fsnode *root, fsinfo_t *fsopts) { struct fs *superblock; struct timeval start; assert(image != NULL); assert(dir != NULL); assert(root != NULL); assert(fsopts != NULL); if (debug & DEBUG_FS_MAKEFS) printf("ffs_makefs: image %s directory %s root %p\n", image, dir, root); /* if user wants no free space, use minimum number of inodes */ if (fsopts->minsize == 0 && fsopts->freeblockpc == 0 && fsopts->freeblocks == 0) ((ffs_opt_t *)fsopts->fs_specific)->min_inodes = true; /* validate tree and options */ TIMER_START(start); ffs_validate(dir, root, fsopts); TIMER_RESULTS(start, "ffs_validate"); printf("Calculated size of `%s': %lld bytes, %lld inodes\n", image, (long long)fsopts->size, (long long)fsopts->inodes); /* create image */ TIMER_START(start); if (ffs_create_image(image, fsopts) == -1) errx(1, "Image file `%s' not created.", image); TIMER_RESULTS(start, "ffs_create_image"); fsopts->curinode = UFS_ROOTINO; if (debug & DEBUG_FS_MAKEFS) putchar('\n'); /* populate image */ printf("Populating `%s'\n", image); TIMER_START(start); if (! ffs_populate_dir(dir, root, fsopts)) errx(1, "Image file `%s' not populated.", image); TIMER_RESULTS(start, "ffs_populate_dir"); /* ensure no outstanding buffers remain */ if (debug & DEBUG_FS_MAKEFS) bcleanup(); /* update various superblock parameters */ superblock = fsopts->superblock; superblock->fs_fmod = 0; superblock->fs_old_cstotal.cs_ndir = superblock->fs_cstotal.cs_ndir; superblock->fs_old_cstotal.cs_nbfree = superblock->fs_cstotal.cs_nbfree; superblock->fs_old_cstotal.cs_nifree = superblock->fs_cstotal.cs_nifree; superblock->fs_old_cstotal.cs_nffree = superblock->fs_cstotal.cs_nffree; /* write out superblock; image is now complete */ ffs_write_superblock(fsopts->superblock, fsopts); if (close(fsopts->fd) == -1) err(1, "Closing `%s'", image); fsopts->fd = -1; printf("Image `%s' complete\n", image); } /* end of public functions */ static void ffs_validate(const char *dir, fsnode *root, fsinfo_t *fsopts) { #ifdef notyet int32_t spc, nspf, ncyl, fssize; #endif ffs_opt_t *ffs_opts = fsopts->fs_specific; assert(dir != NULL); assert(root != NULL); assert(fsopts != NULL); assert(ffs_opts != NULL); if (debug & DEBUG_FS_VALIDATE) { printf("ffs_validate: before defaults set:\n"); ffs_dump_fsinfo(fsopts); } /* set FFS defaults */ if (fsopts->sectorsize == -1) fsopts->sectorsize = DFL_SECSIZE; if (ffs_opts->fsize == -1) ffs_opts->fsize = MAX(DFL_FRAGSIZE, fsopts->sectorsize); if (ffs_opts->bsize == -1) ffs_opts->bsize = MIN(DFL_BLKSIZE, 8 * ffs_opts->fsize); if (ffs_opts->cpg == -1) ffs_opts->cpg = DFL_CYLSPERGROUP; else ffs_opts->cpgflg = 1; /* fsopts->density is set below */ if (ffs_opts->nsectors == -1) ffs_opts->nsectors = DFL_NSECTORS; if (ffs_opts->minfree == -1) ffs_opts->minfree = MINFREE; if (ffs_opts->optimization == -1) ffs_opts->optimization = DEFAULTOPT; if (ffs_opts->maxcontig == -1) ffs_opts->maxcontig = MAX(1, MIN(MAXPHYS, FFS_MAXBSIZE) / ffs_opts->bsize); /* XXX ondisk32 */ if (ffs_opts->maxbpg == -1) ffs_opts->maxbpg = ffs_opts->bsize / sizeof(int32_t); if (ffs_opts->avgfilesize == -1) ffs_opts->avgfilesize = AVFILESIZ; if (ffs_opts->avgfpdir == -1) ffs_opts->avgfpdir = AFPDIR; if (fsopts->maxsize > 0 && roundup(fsopts->minsize, ffs_opts->bsize) > fsopts->maxsize) errx(1, "`%s' minsize of %lld rounded up to ffs bsize of %d " "exceeds maxsize %lld. Lower bsize, or round the minimum " "and maximum sizes to bsize.", dir, (long long)fsopts->minsize, ffs_opts->bsize, (long long)fsopts->maxsize); /* calculate size of tree */ ffs_size_dir(root, fsopts); fsopts->inodes += UFS_ROOTINO; /* include first two inodes */ if (debug & DEBUG_FS_VALIDATE) printf("ffs_validate: size of tree: %lld bytes, %lld inodes\n", (long long)fsopts->size, (long long)fsopts->inodes); /* add requested slop */ fsopts->size += fsopts->freeblocks; fsopts->inodes += fsopts->freefiles; if (fsopts->freefilepc > 0) fsopts->inodes = fsopts->inodes * (100 + fsopts->freefilepc) / 100; if (fsopts->freeblockpc > 0) fsopts->size = fsopts->size * (100 + fsopts->freeblockpc) / 100; /* * Add space needed for superblock, cylblock and to store inodes. * All of those segments are aligned to the block size. * XXX: This has to match calculations done in ffs_mkfs. */ if (ffs_opts->version == 1) { fsopts->size += roundup(SBLOCK_UFS1 + SBLOCKSIZE, ffs_opts->bsize); fsopts->size += roundup(SBLOCKSIZE, ffs_opts->bsize); fsopts->size += ffs_opts->bsize; fsopts->size += DINODE1_SIZE * roundup(fsopts->inodes, ffs_opts->bsize / DINODE1_SIZE); } else { fsopts->size += roundup(SBLOCK_UFS2 + SBLOCKSIZE, ffs_opts->bsize); fsopts->size += roundup(SBLOCKSIZE, ffs_opts->bsize); fsopts->size += ffs_opts->bsize; fsopts->size += DINODE2_SIZE * roundup(fsopts->inodes, ffs_opts->bsize / DINODE2_SIZE); } /* add minfree */ if (ffs_opts->minfree > 0) fsopts->size = fsopts->size * (100 + ffs_opts->minfree) / 100; /* * XXX any other fs slop to add, such as csum's, bitmaps, etc ?? */ if (fsopts->size < fsopts->minsize) /* ensure meets minimum size */ fsopts->size = fsopts->minsize; /* round up to the next block */ fsopts->size = roundup(fsopts->size, ffs_opts->bsize); /* round up to requested block size, if any */ if (fsopts->roundup > 0) fsopts->size = roundup(fsopts->size, fsopts->roundup); /* calculate density to just fit inodes if no free space */ if (ffs_opts->density == -1) ffs_opts->density = fsopts->size / fsopts->inodes + 1; if (debug & DEBUG_FS_VALIDATE) { printf("ffs_validate: after defaults set:\n"); ffs_dump_fsinfo(fsopts); printf("ffs_validate: dir %s; %lld bytes, %lld inodes\n", dir, (long long)fsopts->size, (long long)fsopts->inodes); } /* now check calculated sizes vs requested sizes */ if (fsopts->maxsize > 0 && fsopts->size > fsopts->maxsize) { errx(1, "`%s' size of %lld is larger than the maxsize of %lld.", dir, (long long)fsopts->size, (long long)fsopts->maxsize); } } static void ffs_dump_fsinfo(fsinfo_t *f) { ffs_opt_t *fs = f->fs_specific; printf("fsopts at %p\n", f); printf("\tsize %lld, inodes %lld, curinode %u\n", (long long)f->size, (long long)f->inodes, f->curinode); printf("\tminsize %lld, maxsize %lld\n", (long long)f->minsize, (long long)f->maxsize); printf("\tfree files %lld, freefile %% %d\n", (long long)f->freefiles, f->freefilepc); printf("\tfree blocks %lld, freeblock %% %d\n", (long long)f->freeblocks, f->freeblockpc); printf("\tneedswap %d, sectorsize %d\n", f->needswap, f->sectorsize); printf("\tbsize %d, fsize %d, cpg %d, density %d\n", fs->bsize, fs->fsize, fs->cpg, fs->density); printf("\tnsectors %d, rpm %d, minfree %d\n", fs->nsectors, fs->rpm, fs->minfree); printf("\tmaxcontig %d, maxbpg %d\n", fs->maxcontig, fs->maxbpg); printf("\toptimization %s\n", fs->optimization == FS_OPTSPACE ? "space" : "time"); } static int ffs_create_image(const char *image, fsinfo_t *fsopts) { #if HAVE_STRUCT_STATVFS_F_IOSIZE && HAVE_FSTATVFS struct statvfs sfs; #endif struct fs *fs; char *buf; int i, bufsize; off_t bufrem; int oflags = O_RDWR | O_CREAT; time_t tstamp; assert (image != NULL); assert (fsopts != NULL); /* create image */ if (fsopts->offset == 0) oflags |= O_TRUNC; if ((fsopts->fd = open(image, oflags, 0666)) == -1) { warn("Can't open `%s' for writing", image); return (-1); } /* zero image */ #if HAVE_STRUCT_STATVFS_F_IOSIZE && HAVE_FSTATVFS if (fstatvfs(fsopts->fd, &sfs) == -1) { #endif bufsize = 8192; #if HAVE_STRUCT_STATVFS_F_IOSIZE && HAVE_FSTATVFS warn("can't fstatvfs `%s', using default %d byte chunk", image, bufsize); } else bufsize = sfs.f_iosize; #endif bufrem = fsopts->size; if (fsopts->sparse) { if (ftruncate(fsopts->fd, bufrem) == -1) { warn("sparse option disabled."); fsopts->sparse = 0; } } if (fsopts->sparse) { /* File truncated at bufrem. Remaining is 0 */ bufrem = 0; buf = NULL; } else { if (debug & DEBUG_FS_CREATE_IMAGE) printf("zero-ing image `%s', %lld sectors, " "using %d byte chunks\n", image, (long long)bufrem, bufsize); buf = ecalloc(1, bufsize); } if (fsopts->offset != 0) if (lseek(fsopts->fd, fsopts->offset, SEEK_SET) == -1) { warn("can't seek"); free(buf); return -1; } while (bufrem > 0) { i = write(fsopts->fd, buf, MIN(bufsize, bufrem)); if (i == -1) { warn("zeroing image, %lld bytes to go", (long long)bufrem); free(buf); return (-1); } bufrem -= i; } if (buf) free(buf); /* make the file system */ if (debug & DEBUG_FS_CREATE_IMAGE) printf("calling mkfs(\"%s\", ...)\n", image); if (stampst.st_ino != 0) tstamp = stampst.st_ctime; else tstamp = start_time.tv_sec; srandom(tstamp); fs = ffs_mkfs(image, fsopts, tstamp); fsopts->superblock = (void *)fs; if (debug & DEBUG_FS_CREATE_IMAGE) { time_t t; t = (time_t)((struct fs *)fsopts->superblock)->fs_time; printf("mkfs returned %p; fs_time %s", fsopts->superblock, ctime(&t)); printf("fs totals: nbfree %lld, nffree %lld, nifree %lld, ndir %lld\n", (long long)fs->fs_cstotal.cs_nbfree, (long long)fs->fs_cstotal.cs_nffree, (long long)fs->fs_cstotal.cs_nifree, (long long)fs->fs_cstotal.cs_ndir); } if (fs->fs_cstotal.cs_nifree + (off_t)UFS_ROOTINO < fsopts->inodes) { warnx( "Image file `%s' has %lld free inodes; %lld are required.", image, (long long)(fs->fs_cstotal.cs_nifree + UFS_ROOTINO), (long long)fsopts->inodes); return (-1); } return (fsopts->fd); } static void ffs_size_dir(fsnode *root, fsinfo_t *fsopts) { struct direct tmpdir; fsnode * node; int curdirsize, this; ffs_opt_t *ffs_opts = fsopts->fs_specific; /* node may be NULL (empty directory) */ assert(fsopts != NULL); assert(ffs_opts != NULL); if (debug & DEBUG_FS_SIZE_DIR) printf("ffs_size_dir: entry: bytes %lld inodes %lld\n", (long long)fsopts->size, (long long)fsopts->inodes); #define ADDDIRENT(e) do { \ tmpdir.d_namlen = strlen((e)); \ this = DIRSIZ_SWAP(0, &tmpdir, 0); \ if (debug & DEBUG_FS_SIZE_DIR_ADD_DIRENT) \ printf("ADDDIRENT: was: %s (%d) this %d cur %d\n", \ e, tmpdir.d_namlen, this, curdirsize); \ if (this + curdirsize > roundup(curdirsize, DIRBLKSIZ)) \ curdirsize = roundup(curdirsize, DIRBLKSIZ); \ curdirsize += this; \ if (debug & DEBUG_FS_SIZE_DIR_ADD_DIRENT) \ printf("ADDDIRENT: now: %s (%d) this %d cur %d\n", \ e, tmpdir.d_namlen, this, curdirsize); \ } while (0); #define ADDSIZE(x) do { \ if ((size_t)(x) < UFS_NDADDR * (size_t)ffs_opts->bsize) { \ fsopts->size += roundup((x), ffs_opts->fsize); \ } else { \ /* Count space consumed by indirecttion blocks. */ \ fsopts->size += ffs_opts->bsize * \ (howmany((x), UFS_NDADDR * ffs_opts->bsize) - 1); \ /* \ * If the file is big enough to use indirect blocks, \ * we allocate bsize block for trailing data. \ */ \ fsopts->size += roundup((x), ffs_opts->bsize); \ } \ } while (0); curdirsize = 0; for (node = root; node != NULL; node = node->next) { ADDDIRENT(node->name); if (node == root) { /* we're at "." */ assert(strcmp(node->name, ".") == 0); ADDDIRENT(".."); } else if ((node->inode->flags & FI_SIZED) == 0) { /* don't count duplicate names */ node->inode->flags |= FI_SIZED; if (debug & DEBUG_FS_SIZE_DIR_NODE) printf("ffs_size_dir: `%s' size %lld\n", node->name, (long long)node->inode->st.st_size); fsopts->inodes++; if (node->type == S_IFREG) ADDSIZE(node->inode->st.st_size); if (node->type == S_IFLNK) { size_t slen; slen = strlen(node->symlink) + 1; if (slen >= (ffs_opts->version == 1 ? UFS1_MAXSYMLINKLEN : UFS2_MAXSYMLINKLEN)) ADDSIZE(slen); } } if (node->type == S_IFDIR) ffs_size_dir(node->child, fsopts); } ADDSIZE(curdirsize); if (debug & DEBUG_FS_SIZE_DIR) printf("ffs_size_dir: exit: size %lld inodes %lld\n", (long long)fsopts->size, (long long)fsopts->inodes); } static void * ffs_build_dinode1(struct ufs1_dinode *dinp, dirbuf_t *dbufp, fsnode *cur, fsnode *root, fsinfo_t *fsopts) { size_t slen; void *membuf; struct stat *st = stampst.st_ino != 0 ? &stampst : &cur->inode->st; memset(dinp, 0, sizeof(*dinp)); dinp->di_mode = cur->inode->st.st_mode; dinp->di_nlink = cur->inode->nlink; dinp->di_size = cur->inode->st.st_size; #if HAVE_STRUCT_STAT_ST_FLAGS dinp->di_flags = cur->inode->st.st_flags; #endif dinp->di_gen = random(); dinp->di_uid = cur->inode->st.st_uid; dinp->di_gid = cur->inode->st.st_gid; dinp->di_atime = st->st_atime; dinp->di_mtime = st->st_mtime; dinp->di_ctime = st->st_ctime; #if HAVE_STRUCT_STAT_ST_MTIMENSEC dinp->di_atimensec = st->st_atimensec; dinp->di_mtimensec = st->st_mtimensec; dinp->di_ctimensec = st->st_ctimensec; #endif /* not set: di_db, di_ib, di_blocks, di_spare */ membuf = NULL; if (cur == root) { /* "."; write dirbuf */ membuf = dbufp->buf; dinp->di_size = dbufp->size; } else if (S_ISBLK(cur->type) || S_ISCHR(cur->type)) { dinp->di_size = 0; /* a device */ dinp->di_rdev = ufs_rw32(cur->inode->st.st_rdev, fsopts->needswap); } else if (S_ISLNK(cur->type)) { /* symlink */ slen = strlen(cur->symlink); if (slen < UFS1_MAXSYMLINKLEN) { /* short link */ - memcpy(dinp->di_db, cur->symlink, slen); + memcpy(dinp->di_shortlink, cur->symlink, slen); } else membuf = cur->symlink; dinp->di_size = slen; } return membuf; } static void * ffs_build_dinode2(struct ufs2_dinode *dinp, dirbuf_t *dbufp, fsnode *cur, fsnode *root, fsinfo_t *fsopts) { size_t slen; void *membuf; struct stat *st = stampst.st_ino != 0 ? &stampst : &cur->inode->st; memset(dinp, 0, sizeof(*dinp)); dinp->di_mode = cur->inode->st.st_mode; dinp->di_nlink = cur->inode->nlink; dinp->di_size = cur->inode->st.st_size; #if HAVE_STRUCT_STAT_ST_FLAGS dinp->di_flags = cur->inode->st.st_flags; #endif dinp->di_gen = random(); dinp->di_uid = cur->inode->st.st_uid; dinp->di_gid = cur->inode->st.st_gid; dinp->di_atime = st->st_atime; dinp->di_mtime = st->st_mtime; dinp->di_ctime = st->st_ctime; #if HAVE_STRUCT_STAT_BIRTHTIME dinp->di_birthtime = st->st_birthtime; #else dinp->di_birthtime = st->st_ctime; #endif #if HAVE_STRUCT_STAT_ST_MTIMENSEC dinp->di_atimensec = st->st_atimensec; dinp->di_mtimensec = st->st_mtimensec; dinp->di_ctimensec = st->st_ctimensec; #if HAVE_STRUCT_STAT_BIRTHTIME dinp->di_birthnsec = st->st_birthtimensec; #else dinp->di_birthnsec = st->st_ctimensec; #endif #endif /* not set: di_db, di_ib, di_blocks, di_spare */ membuf = NULL; if (cur == root) { /* "."; write dirbuf */ membuf = dbufp->buf; dinp->di_size = dbufp->size; } else if (S_ISBLK(cur->type) || S_ISCHR(cur->type)) { dinp->di_size = 0; /* a device */ dinp->di_rdev = ufs_rw64(cur->inode->st.st_rdev, fsopts->needswap); } else if (S_ISLNK(cur->type)) { /* symlink */ slen = strlen(cur->symlink); if (slen < UFS2_MAXSYMLINKLEN) { /* short link */ - memcpy(dinp->di_db, cur->symlink, slen); + memcpy(dinp->di_shortlink, cur->symlink, slen); } else membuf = cur->symlink; dinp->di_size = slen; } return membuf; } static int ffs_populate_dir(const char *dir, fsnode *root, fsinfo_t *fsopts) { fsnode *cur; dirbuf_t dirbuf; union dinode din; void *membuf; char path[MAXPATHLEN + 1]; ffs_opt_t *ffs_opts = fsopts->fs_specific; assert(dir != NULL); assert(root != NULL); assert(fsopts != NULL); assert(ffs_opts != NULL); (void)memset(&dirbuf, 0, sizeof(dirbuf)); if (debug & DEBUG_FS_POPULATE) printf("ffs_populate_dir: PASS 1 dir %s node %p\n", dir, root); /* * pass 1: allocate inode numbers, build directory `file' */ for (cur = root; cur != NULL; cur = cur->next) { if ((cur->inode->flags & FI_ALLOCATED) == 0) { cur->inode->flags |= FI_ALLOCATED; if (cur == root && cur->parent != NULL) cur->inode->ino = cur->parent->inode->ino; else { cur->inode->ino = fsopts->curinode; fsopts->curinode++; } } ffs_make_dirbuf(&dirbuf, cur->name, cur, fsopts->needswap); if (cur == root) { /* we're at "."; add ".." */ ffs_make_dirbuf(&dirbuf, "..", cur->parent == NULL ? cur : cur->parent->first, fsopts->needswap); root->inode->nlink++; /* count my parent's link */ } else if (cur->child != NULL) root->inode->nlink++; /* count my child's link */ /* * XXX possibly write file and long symlinks here, * ensuring that blocks get written before inodes? * otoh, this isn't a real filesystem, so who * cares about ordering? :-) */ } if (debug & DEBUG_FS_POPULATE_DIRBUF) ffs_dump_dirbuf(&dirbuf, dir, fsopts->needswap); /* * pass 2: write out dirbuf, then non-directories at this level */ if (debug & DEBUG_FS_POPULATE) printf("ffs_populate_dir: PASS 2 dir %s\n", dir); for (cur = root; cur != NULL; cur = cur->next) { if (cur->inode->flags & FI_WRITTEN) continue; /* skip hard-linked entries */ cur->inode->flags |= FI_WRITTEN; if (cur->contents == NULL) { if (snprintf(path, sizeof(path), "%s/%s/%s", cur->root, cur->path, cur->name) >= (int)sizeof(path)) errx(1, "Pathname too long."); } if (cur->child != NULL) continue; /* child creates own inode */ /* build on-disk inode */ if (ffs_opts->version == 1) membuf = ffs_build_dinode1(&din.ffs1_din, &dirbuf, cur, root, fsopts); else membuf = ffs_build_dinode2(&din.ffs2_din, &dirbuf, cur, root, fsopts); if (debug & DEBUG_FS_POPULATE_NODE) { printf("ffs_populate_dir: writing ino %d, %s", cur->inode->ino, inode_type(cur->type)); if (cur->inode->nlink > 1) printf(", nlink %d", cur->inode->nlink); putchar('\n'); } if (membuf != NULL) { ffs_write_file(&din, cur->inode->ino, membuf, fsopts); } else if (S_ISREG(cur->type)) { ffs_write_file(&din, cur->inode->ino, (cur->contents) ? cur->contents : path, fsopts); } else { assert (! S_ISDIR(cur->type)); ffs_write_inode(&din, cur->inode->ino, fsopts); } } /* * pass 3: write out sub-directories */ if (debug & DEBUG_FS_POPULATE) printf("ffs_populate_dir: PASS 3 dir %s\n", dir); for (cur = root; cur != NULL; cur = cur->next) { if (cur->child == NULL) continue; if ((size_t)snprintf(path, sizeof(path), "%s/%s", dir, cur->name) >= sizeof(path)) errx(1, "Pathname too long."); if (! ffs_populate_dir(path, cur->child, fsopts)) return (0); } if (debug & DEBUG_FS_POPULATE) printf("ffs_populate_dir: DONE dir %s\n", dir); /* cleanup */ if (dirbuf.buf != NULL) free(dirbuf.buf); return (1); } static void ffs_write_file(union dinode *din, uint32_t ino, void *buf, fsinfo_t *fsopts) { int isfile, ffd; char *fbuf, *p; off_t bufleft, chunk, offset; ssize_t nread; struct inode in; struct buf * bp; ffs_opt_t *ffs_opts = fsopts->fs_specific; struct vnode vp = { fsopts, NULL }; assert (din != NULL); assert (buf != NULL); assert (fsopts != NULL); assert (ffs_opts != NULL); isfile = S_ISREG(DIP(din, mode)); fbuf = NULL; ffd = -1; p = NULL; in.i_fs = (struct fs *)fsopts->superblock; in.i_devvp = &vp; if (debug & DEBUG_FS_WRITE_FILE) { printf( "ffs_write_file: ino %u, din %p, isfile %d, %s, size %lld", ino, din, isfile, inode_type(DIP(din, mode) & S_IFMT), (long long)DIP(din, size)); if (isfile) printf(", file '%s'\n", (char *)buf); else printf(", buffer %p\n", buf); } in.i_number = ino; in.i_size = DIP(din, size); if (ffs_opts->version == 1) memcpy(&in.i_din.ffs1_din, &din->ffs1_din, sizeof(in.i_din.ffs1_din)); else memcpy(&in.i_din.ffs2_din, &din->ffs2_din, sizeof(in.i_din.ffs2_din)); if (DIP(din, size) == 0) goto write_inode_and_leave; /* mmm, cheating */ if (isfile) { fbuf = emalloc(ffs_opts->bsize); if ((ffd = open((char *)buf, O_RDONLY, 0444)) == -1) { err(EXIT_FAILURE, "Can't open `%s' for reading", (char *)buf); } } else { p = buf; } chunk = 0; for (bufleft = DIP(din, size); bufleft > 0; bufleft -= chunk) { chunk = MIN(bufleft, ffs_opts->bsize); if (!isfile) ; else if ((nread = read(ffd, fbuf, chunk)) == -1) err(EXIT_FAILURE, "Reading `%s', %lld bytes to go", (char *)buf, (long long)bufleft); else if (nread != chunk) errx(EXIT_FAILURE, "Reading `%s', %lld bytes to go, " "read %zd bytes, expected %ju bytes, does " "metalog size= attribute mismatch source size?", (char *)buf, (long long)bufleft, nread, (uintmax_t)chunk); else p = fbuf; offset = DIP(din, size) - bufleft; if (debug & DEBUG_FS_WRITE_FILE_BLOCK) printf( "ffs_write_file: write %p offset %lld size %lld left %lld\n", p, (long long)offset, (long long)chunk, (long long)bufleft); /* * XXX if holey support is desired, do the check here * * XXX might need to write out last bit in fragroundup * sized chunk. however, ffs_balloc() handles this for us */ errno = ffs_balloc(&in, offset, chunk, &bp); bad_ffs_write_file: if (errno != 0) err(1, "Writing inode %d (%s), bytes %lld + %lld", ino, isfile ? (char *)buf : inode_type(DIP(din, mode) & S_IFMT), (long long)offset, (long long)chunk); memcpy(bp->b_data, p, chunk); errno = bwrite(bp); if (errno != 0) goto bad_ffs_write_file; brelse(bp); if (!isfile) p += chunk; } write_inode_and_leave: ffs_write_inode(&in.i_din, in.i_number, fsopts); if (fbuf) free(fbuf); if (ffd != -1) close(ffd); } static void ffs_dump_dirbuf(dirbuf_t *dbuf, const char *dir, int needswap) { doff_t i; struct direct *de; uint16_t reclen; assert (dbuf != NULL); assert (dir != NULL); printf("ffs_dump_dirbuf: dir %s size %d cur %d\n", dir, dbuf->size, dbuf->cur); for (i = 0; i < dbuf->size; ) { de = (struct direct *)(dbuf->buf + i); reclen = ufs_rw16(de->d_reclen, needswap); printf( " inode %4d %7s offset %4d reclen %3d namlen %3d name %s\n", ufs_rw32(de->d_ino, needswap), inode_type(DTTOIF(de->d_type)), i, reclen, de->d_namlen, de->d_name); i += reclen; assert(reclen > 0); } } static void ffs_make_dirbuf(dirbuf_t *dbuf, const char *name, fsnode *node, int needswap) { struct direct de, *dp; uint16_t llen, reclen; u_char *newbuf; assert (dbuf != NULL); assert (name != NULL); assert (node != NULL); /* create direct entry */ (void)memset(&de, 0, sizeof(de)); de.d_ino = ufs_rw32(node->inode->ino, needswap); de.d_type = IFTODT(node->type); de.d_namlen = (uint8_t)strlen(name); strcpy(de.d_name, name); reclen = DIRSIZ_SWAP(0, &de, needswap); de.d_reclen = ufs_rw16(reclen, needswap); dp = (struct direct *)(dbuf->buf + dbuf->cur); llen = 0; if (dp != NULL) llen = DIRSIZ_SWAP(0, dp, needswap); if (debug & DEBUG_FS_MAKE_DIRBUF) printf( "ffs_make_dirbuf: dbuf siz %d cur %d lastlen %d\n" " ino %d type %d reclen %d namlen %d name %.30s\n", dbuf->size, dbuf->cur, llen, ufs_rw32(de.d_ino, needswap), de.d_type, reclen, de.d_namlen, de.d_name); if (reclen + dbuf->cur + llen > roundup(dbuf->size, DIRBLKSIZ)) { if (debug & DEBUG_FS_MAKE_DIRBUF) printf("ffs_make_dirbuf: growing buf to %d\n", dbuf->size + DIRBLKSIZ); newbuf = erealloc(dbuf->buf, dbuf->size + DIRBLKSIZ); dbuf->buf = newbuf; dbuf->size += DIRBLKSIZ; memset(dbuf->buf + dbuf->size - DIRBLKSIZ, 0, DIRBLKSIZ); dbuf->cur = dbuf->size - DIRBLKSIZ; } else if (dp) { /* shrink end of previous */ dp->d_reclen = ufs_rw16(llen,needswap); dbuf->cur += llen; } dp = (struct direct *)(dbuf->buf + dbuf->cur); memcpy(dp, &de, reclen); dp->d_reclen = ufs_rw16(dbuf->size - dbuf->cur, needswap); } /* * cribbed from sys/ufs/ffs/ffs_alloc.c */ static void ffs_write_inode(union dinode *dp, uint32_t ino, const fsinfo_t *fsopts) { char *buf; struct ufs1_dinode *dp1; struct ufs2_dinode *dp2, *dip; struct cg *cgp; struct fs *fs; int cg, cgino; uint32_t i; daddr_t d; char sbbuf[FFS_MAXBSIZE]; uint32_t initediblk; ffs_opt_t *ffs_opts = fsopts->fs_specific; assert (dp != NULL); assert (ino > 0); assert (fsopts != NULL); assert (ffs_opts != NULL); fs = (struct fs *)fsopts->superblock; cg = ino_to_cg(fs, ino); cgino = ino % fs->fs_ipg; if (debug & DEBUG_FS_WRITE_INODE) printf("ffs_write_inode: din %p ino %u cg %d cgino %d\n", dp, ino, cg, cgino); ffs_rdfs(fsbtodb(fs, cgtod(fs, cg)), (int)fs->fs_cgsize, &sbbuf, fsopts); cgp = (struct cg *)sbbuf; if (!cg_chkmagic_swap(cgp, fsopts->needswap)) errx(1, "ffs_write_inode: cg %d: bad magic number", cg); assert (isclr(cg_inosused_swap(cgp, fsopts->needswap), cgino)); buf = emalloc(fs->fs_bsize); dp1 = (struct ufs1_dinode *)buf; dp2 = (struct ufs2_dinode *)buf; if (fs->fs_cstotal.cs_nifree == 0) errx(1, "ffs_write_inode: fs out of inodes for ino %u", ino); if (fs->fs_cs(fs, cg).cs_nifree == 0) errx(1, "ffs_write_inode: cg %d out of inodes for ino %u", cg, ino); setbit(cg_inosused_swap(cgp, fsopts->needswap), cgino); ufs_add32(cgp->cg_cs.cs_nifree, -1, fsopts->needswap); fs->fs_cstotal.cs_nifree--; fs->fs_cs(fs, cg).cs_nifree--; if (S_ISDIR(DIP(dp, mode))) { ufs_add32(cgp->cg_cs.cs_ndir, 1, fsopts->needswap); fs->fs_cstotal.cs_ndir++; fs->fs_cs(fs, cg).cs_ndir++; } /* * Initialize inode blocks on the fly for UFS2. */ initediblk = ufs_rw32(cgp->cg_initediblk, fsopts->needswap); while (ffs_opts->version == 2 && cgino + INOPB(fs) > initediblk && initediblk < ufs_rw32(cgp->cg_niblk, fsopts->needswap)) { memset(buf, 0, fs->fs_bsize); dip = (struct ufs2_dinode *)buf; for (i = 0; i < INOPB(fs); i++) { dip->di_gen = random(); dip++; } ffs_wtfs(fsbtodb(fs, ino_to_fsba(fs, cg * fs->fs_ipg + initediblk)), fs->fs_bsize, buf, fsopts); initediblk += INOPB(fs); cgp->cg_initediblk = ufs_rw32(initediblk, fsopts->needswap); } ffs_wtfs(fsbtodb(fs, cgtod(fs, cg)), (int)fs->fs_cgsize, &sbbuf, fsopts); /* now write inode */ d = fsbtodb(fs, ino_to_fsba(fs, ino)); ffs_rdfs(d, fs->fs_bsize, buf, fsopts); if (fsopts->needswap) { if (ffs_opts->version == 1) ffs_dinode1_swap(&dp->ffs1_din, &dp1[ino_to_fsbo(fs, ino)]); else ffs_dinode2_swap(&dp->ffs2_din, &dp2[ino_to_fsbo(fs, ino)]); } else { if (ffs_opts->version == 1) dp1[ino_to_fsbo(fs, ino)] = dp->ffs1_din; else dp2[ino_to_fsbo(fs, ino)] = dp->ffs2_din; } ffs_wtfs(d, fs->fs_bsize, buf, fsopts); free(buf); } void panic(const char *fmt, ...) { va_list ap; va_start(ap, fmt); vwarnx(fmt, ap); va_end(ap); exit(1); } diff --git a/usr.sbin/makefs/ffs/ufs_inode.h b/usr.sbin/makefs/ffs/ufs_inode.h index 07c1ff63b865..e7baff23cbfc 100644 --- a/usr.sbin/makefs/ffs/ufs_inode.h +++ b/usr.sbin/makefs/ffs/ufs_inode.h @@ -1,99 +1,99 @@ /* $NetBSD: ufs_inode.h,v 1.3 2003/08/07 11:25:34 agc Exp $ */ /* From: NetBSD: inode.h,v 1.27 2001/12/18 10:57:23 fvdl Exp $ */ /*- * SPDX-License-Identifier: BSD-3-Clause * * Copyright (c) 1982, 1989, 1993 * The Regents of the University of California. All rights reserved. * (c) UNIX System Laboratories, Inc. * All or some portions of this file are derived from material licensed * to the University of California by American Telephone and Telegraph * Co. or Unix System Laboratories, Inc. and are reproduced herein with * the permission of UNIX System Laboratories, Inc. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * @(#)inode.h 8.9 (Berkeley) 5/14/95 * $FreeBSD$ */ union dinode { struct ufs1_dinode ffs1_din; struct ufs2_dinode ffs2_din; }; struct inode { ino_t i_number; /* The identity of the inode. */ struct vnode *i_devvp; /* vnode pointer (contains fsopts) */ struct fs *i_fs; /* File system */ union dinode i_din; uint64_t i_size; }; #define i_ffs1_atime i_din.ffs1_din.di_atime #define i_ffs1_atimensec i_din.ffs1_din.di_atimensec #define i_ffs1_blocks i_din.ffs1_din.di_blocks #define i_ffs1_ctime i_din.ffs1_din.di_ctime #define i_ffs1_ctimensec i_din.ffs1_din.di_ctimensec #define i_ffs1_db i_din.ffs1_din.di_db #define i_ffs1_flags i_din.ffs1_din.di_flags #define i_ffs1_gen i_din.ffs1_din.di_gen #define i_ffs11_gid i_din.ffs1_din.di_gid #define i_ffs1_ib i_din.ffs1_din.di_ib #define i_ffs1_mode i_din.ffs1_din.di_mode #define i_ffs1_mtime i_din.ffs1_din.di_mtime #define i_ffs1_mtimensec i_din.ffs1_din.di_mtimensec #define i_ffs1_nlink i_din.ffs1_din.di_nlink #define i_ffs1_rdev i_din.ffs1_din.di_rdev -#define i_ffs1_shortlink i_din.ffs1_din.db +#define i_ffs1_shortlink i_din.ffs1_din.di_shortlink #define i_ffs1_size i_din.ffs1_din.di_size #define i_ffs1_uid i_din.ffs1_din.di_uid #define i_ffs2_atime i_din.ffs2_din.di_atime #define i_ffs2_atimensec i_din.ffs2_din.di_atimensec #define i_ffs2_blocks i_din.ffs2_din.di_blocks #define i_ffs2_ctime i_din.ffs2_din.di_ctime #define i_ffs2_ctimensec i_din.ffs2_din.di_ctimensec #define i_ffs2_birthtime i_din.ffs2_din.di_birthtime #define i_ffs2_birthnsec i_din.ffs2_din.di_birthnsec #define i_ffs2_db i_din.ffs2_din.di_db #define i_ffs2_flags i_din.ffs2_din.di_flags #define i_ffs2_gen i_din.ffs2_din.di_gen #define i_ffs21_gid i_din.ffs2_din.di_gid #define i_ffs2_ib i_din.ffs2_din.di_ib #define i_ffs2_mode i_din.ffs2_din.di_mode #define i_ffs2_mtime i_din.ffs2_din.di_mtime #define i_ffs2_mtimensec i_din.ffs2_din.di_mtimensec #define i_ffs2_nlink i_din.ffs2_din.di_nlink #define i_ffs2_rdev i_din.ffs2_din.di_rdev -#define i_ffs2_shortlink i_din.ffs2_din.db +#define i_ffs2_shortlink i_din.ffs2_din.di_shortlink #define i_ffs2_size i_din.ffs2_din.di_size #define i_ffs2_uid i_din.ffs2_din.di_uid #undef DIP #define DIP(ip, field) \ (((ip)->i_fs->fs_magic == FS_UFS1_MAGIC) ? \ (ip)->i_ffs1_##field : (ip)->i_ffs2_##field)