diff --git a/lib/libufs/inode.c b/lib/libufs/inode.c index 9ddb4d7d4e83..863e71867daa 100644 --- a/lib/libufs/inode.c +++ b/lib/libufs/inode.c @@ -1,107 +1,119 @@ /*- * SPDX-License-Identifier: BSD-2-Clause * * Copyright (c) 2002 Juli Mallett. All rights reserved. * * This software was written by Juli Mallett for the * FreeBSD project. Redistribution and use in source and binary forms, with * or without modification, are permitted provided that the following * conditions are met: * * 1. Redistribution of source code must retain the above copyright notice, * this list of conditions and the following disclaimer. * 2. Redistribution in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE * DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE * POSSIBILITY OF SUCH DAMAGE. */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include +#include #include #include int getinode(struct uufsd *disk, union dinodep *dp, ino_t inum) { ino_t min, max; caddr_t inoblock; struct fs *fs; + struct timespec now; ERROR(disk, NULL); fs = &disk->d_fs; if (inum >= (ino_t)fs->fs_ipg * fs->fs_ncg) { ERROR(disk, "inode number out of range"); return (-1); } inoblock = (caddr_t)&disk->d_inos[0]; min = disk->d_inomin; max = disk->d_inomax; + if (clock_gettime(CLOCK_REALTIME_FAST, &now) != 0) { + ERROR(disk, "cannot get current time of day"); + return (-1); + } if (inum >= min && inum < max) goto gotit; bread(disk, fsbtodb(fs, ino_to_fsba(fs, inum)), inoblock, fs->fs_bsize); disk->d_inomin = min = inum - (inum % INOPB(fs)); disk->d_inomax = max = min + INOPB(fs); gotit: switch (disk->d_ufs) { case 1: disk->d_dp.dp1 = &((struct ufs1_dinode *)inoblock)[inum - min]; + if (ffs_oldfscompat_inode_read(fs, disk->d_dp, now.tv_sec)) + putinode(disk); if (dp != NULL) *dp = disk->d_dp; return (0); case 2: disk->d_dp.dp2 = &((struct ufs2_dinode *)inoblock)[inum - min]; if (dp != NULL) *dp = disk->d_dp; - if (ffs_verify_dinode_ckhash(fs, disk->d_dp.dp2) == 0) + if (ffs_verify_dinode_ckhash(fs, disk->d_dp.dp2) == 0) { + if (ffs_oldfscompat_inode_read(fs, disk->d_dp, + now.tv_sec)) + putinode(disk); return (0); + } ERROR(disk, "check-hash failed for inode read from disk"); return (-1); default: break; } ERROR(disk, "unknown UFS filesystem type"); return (-1); } int putinode(struct uufsd *disk) { struct fs *fs; fs = &disk->d_fs; if (disk->d_ufs == 2) ffs_update_dinode_ckhash(fs, disk->d_dp.dp2); if (bwrite(disk, fsbtodb(fs, ino_to_fsba(&disk->d_fs, disk->d_inomin)), (caddr_t)&disk->d_inos[0], disk->d_fs.fs_bsize) <= 0) return (-1); return (0); } diff --git a/lib/libufs/libufs.h b/lib/libufs/libufs.h index b91866164e64..bb92e082a875 100644 --- a/lib/libufs/libufs.h +++ b/lib/libufs/libufs.h @@ -1,185 +1,187 @@ /*- * SPDX-License-Identifier: BSD-2-Clause * * Copyright (c) 2002 Juli Mallett. All rights reserved. * * This software was written by Juli Mallett for the * FreeBSD project. Redistribution and use in source and binary forms, with * or without modification, are permitted provided that the following * conditions are met: * * 1. Redistribution of source code must retain the above copyright notice, * this list of conditions and the following disclaimer. * 2. Redistribution in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE * DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE * POSSIBILITY OF SUCH DAMAGE. */ #ifndef __LIBUFS_H__ #define __LIBUFS_H__ +#include /* * Various disk controllers require their buffers to be aligned to the size * of a cache line. The LIBUFS_BUFALIGN defines the required alignment size. * The alignment must be a power of 2. */ #define LIBUFS_BUFALIGN 128 /* * userland ufs disk. */ struct uufsd { union { struct fs d_fs; /* filesystem information */ char d_sb[SBLOCKSIZE]; /* superblock as buffer */ } d_sbunion __aligned(LIBUFS_BUFALIGN); union { struct cg d_cg; /* cylinder group */ char d_buf[MAXBSIZE]; /* cylinder group storage */ } d_cgunion __aligned(LIBUFS_BUFALIGN); union { union dinodep d_ino[1]; /* inode block */ char d_inos[MAXBSIZE]; /* inode block as buffer */ } d_inosunion __aligned(LIBUFS_BUFALIGN); const char *d_name; /* disk name */ const char *d_error; /* human readable disk error */ ufs2_daddr_t d_sblock; /* superblock location */ struct fs_summary_info *d_si; /* Superblock summary info */ union dinodep d_dp; /* pointer to currently active inode */ ino_t d_inomin; /* low ino */ ino_t d_inomax; /* high ino */ off_t d_sblockloc; /* where to look for the superblock */ int64_t d_bsize; /* device bsize */ int64_t d_lookupflags; /* flags to superblock lookup */ int64_t d_mine; /* internal flags */ int32_t d_ccg; /* current cylinder group */ int32_t d_ufs; /* decimal UFS version */ int32_t d_fd; /* raw device file descriptor */ int32_t d_lcg; /* last cylinder group (in d_cg) */ }; #define d_inos d_inosunion.d_inos #define d_fs d_sbunion.d_fs #define d_cg d_cgunion.d_cg /* * libufs macros (internal, non-exported). */ #ifdef _LIBUFS /* * Ensure that the buffer is aligned to the I/O subsystem requirements. */ #define BUF_MALLOC(newbufpp, data, size) { \ if (data != NULL && (((intptr_t)data) & (LIBUFS_BUFALIGN - 1)) == 0) \ *newbufpp = (void *)data; \ else \ *newbufpp = aligned_alloc(LIBUFS_BUFALIGN, size); \ } /* * Trace steps through libufs, to be used at entry and erroneous return. */ static inline void ERROR(struct uufsd *u, const char *str) { #ifdef _LIBUFS_DEBUGGING if (str != NULL) { fprintf(stderr, "libufs: %s", str); if (errno != 0) fprintf(stderr, ": %s", strerror(errno)); fprintf(stderr, "\n"); } #endif if (u != NULL) u->d_error = str; } #endif /* _LIBUFS */ __BEGIN_DECLS /* * libufs prototypes. */ /* * ffs_subr.c */ void ffs_clrblock(struct fs *, u_char *, ufs1_daddr_t); void ffs_clusteracct(struct fs *, struct cg *, ufs1_daddr_t, int); void ffs_fragacct(struct fs *, int, int32_t [], int); int ffs_isblock(struct fs *, u_char *, ufs1_daddr_t); int ffs_isfreeblock(struct fs *, u_char *, ufs1_daddr_t); +bool ffs_oldfscompat_inode_read(struct fs *, union dinodep, time_t); int ffs_sbsearch(void *, struct fs **, int, char *, int (*)(void *, off_t, void **, int)); void ffs_setblock(struct fs *, u_char *, ufs1_daddr_t); int ffs_sbget(void *, struct fs **, off_t, int, char *, int (*)(void *, off_t, void **, int)); int ffs_sbput(void *, struct fs *, off_t, int (*)(void *, off_t, void *, int)); void ffs_update_dinode_ckhash(struct fs *, struct ufs2_dinode *); int ffs_verify_dinode_ckhash(struct fs *, struct ufs2_dinode *); /* * block.c */ ssize_t bread(struct uufsd *, ufs2_daddr_t, void *, size_t); ssize_t bwrite(struct uufsd *, ufs2_daddr_t, const void *, size_t); int berase(struct uufsd *, ufs2_daddr_t, ufs2_daddr_t); /* * cgroup.c */ ufs2_daddr_t cgballoc(struct uufsd *); int cgbfree(struct uufsd *, ufs2_daddr_t, long); ino_t cgialloc(struct uufsd *); int cgget(int, struct fs *, int, struct cg *); int cgput(int, struct fs *, struct cg *); int cgread(struct uufsd *); int cgread1(struct uufsd *, int); int cgwrite(struct uufsd *); int cgwrite1(struct uufsd *, int); /* * inode.c */ int getinode(struct uufsd *, union dinodep *, ino_t); int putinode(struct uufsd *); /* * sblock.c */ int sbread(struct uufsd *); int sbfind(struct uufsd *, int); int sbwrite(struct uufsd *, int); /* low level superblock read/write functions */ int sbget(int, struct fs **, off_t, int); int sbsearch(int, struct fs **, int); int sbput(int, struct fs *, int); /* * type.c */ int ufs_disk_close(struct uufsd *); int ufs_disk_fillout(struct uufsd *, const char *); int ufs_disk_fillout_blank(struct uufsd *, const char *); int ufs_disk_write(struct uufsd *); /* * crc32c.c */ uint32_t calculate_crc32c(uint32_t, const void *, size_t); __END_DECLS #endif /* __LIBUFS_H__ */ diff --git a/sbin/fsck_ffs/inode.c b/sbin/fsck_ffs/inode.c index c3ae283b7198..b30e3aa5068b 100644 --- a/sbin/fsck_ffs/inode.c +++ b/sbin/fsck_ffs/inode.c @@ -1,1468 +1,1492 @@ /*- * SPDX-License-Identifier: BSD-3-Clause * * Copyright (c) 1980, 1986, 1993 * The Regents of the University of California. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #include #include #include #include #include #include #include #include #include #include #include #include "fsck.h" struct bufarea *icachebp; /* inode cache buffer */ +static time_t now; /* current time of day */ static int iblock(struct inodesc *, off_t isize, int type); static ufs2_daddr_t indir_blkatoff(ufs2_daddr_t, ino_t, ufs_lbn_t, ufs_lbn_t, struct bufarea **); static int snapclean(struct inodesc *idesc); static void chkcopyonwrite(struct fs *, ufs2_daddr_t, ufs2_daddr_t (*checkblkavail)(ufs2_daddr_t, long)); int ckinode(union dinode *dp, struct inodesc *idesc) { off_t remsize, sizepb; int i, offset, ret; struct inode ip; union dinode dino; ufs2_daddr_t ndb; mode_t mode; char pathbuf[MAXPATHLEN + 1]; if (idesc->id_fix != IGNORE) idesc->id_fix = DONTKNOW; idesc->id_dp = dp; idesc->id_lbn = -1; idesc->id_lballoc = -1; idesc->id_level = 0; idesc->id_entryno = 0; idesc->id_filesize = DIP(dp, di_size); mode = DIP(dp, di_mode) & IFMT; if (mode == IFBLK || mode == IFCHR || (mode == IFLNK && DIP(dp, di_size) < (unsigned)sblock.fs_maxsymlinklen)) return (KEEPON); if (sblock.fs_magic == FS_UFS1_MAGIC) dino.dp1 = dp->dp1; else dino.dp2 = dp->dp2; if (DIP(&dino, di_size) < 0) { pfatal("NEGATIVE INODE SIZE %jd\n", DIP(&dino, di_size)); return (STOP); } ndb = howmany(DIP(&dino, di_size), sblock.fs_bsize); for (i = 0; i < UFS_NDADDR; i++) { idesc->id_lbn++; if (--ndb == 0 && (offset = blkoff(&sblock, DIP(&dino, di_size))) != 0) idesc->id_numfrags = numfrags(&sblock, fragroundup(&sblock, offset)); else idesc->id_numfrags = sblock.fs_frag; if (DIP(&dino, di_db[i]) == 0) { if (idesc->id_type == DATA && ndb >= 0) { /* An empty block in a directory XXX */ getpathname(pathbuf, idesc->id_number, idesc->id_number); pfatal("DIRECTORY %s: CONTAINS EMPTY BLOCKS", pathbuf); if (reply("ADJUST LENGTH") == 1) { ginode(idesc->id_number, &ip); DIP_SET(ip.i_dp, di_size, i * sblock.fs_bsize); printf( "YOU MUST RERUN FSCK AFTERWARDS\n"); rerun = 1; inodirty(&ip); irelse(&ip); } return (STOP); } continue; } idesc->id_blkno = DIP(&dino, di_db[i]); if (idesc->id_type != DATA) ret = (*idesc->id_func)(idesc); else ret = dirscan(idesc); if (ret & STOP) return (ret); } idesc->id_numfrags = sblock.fs_frag; remsize = DIP(&dino, di_size) - sblock.fs_bsize * UFS_NDADDR; sizepb = sblock.fs_bsize; for (i = 0; i < UFS_NIADDR; i++) { sizepb *= NINDIR(&sblock); idesc->id_level = i + 1; if (DIP(&dino, di_ib[i])) { idesc->id_blkno = DIP(&dino, di_ib[i]); ret = iblock(idesc, remsize, BT_LEVEL1 + i); if (ret & STOP) return (ret); } else if (remsize > 0) { idesc->id_lbn += sizepb / sblock.fs_bsize; if (idesc->id_type == DATA) { /* An empty block in a directory XXX */ getpathname(pathbuf, idesc->id_number, idesc->id_number); pfatal("DIRECTORY %s: CONTAINS EMPTY BLOCKS", pathbuf); if (reply("ADJUST LENGTH") == 1) { ginode(idesc->id_number, &ip); DIP_SET(ip.i_dp, di_size, DIP(ip.i_dp, di_size) - remsize); remsize = 0; printf( "YOU MUST RERUN FSCK AFTERWARDS\n"); rerun = 1; inodirty(&ip); irelse(&ip); break; } } } remsize -= sizepb; } return (KEEPON); } static int iblock(struct inodesc *idesc, off_t isize, int type) { struct inode ip; struct bufarea *bp; int i, n, (*func)(struct inodesc *), nif; off_t sizepb; char buf[BUFSIZ]; char pathbuf[MAXPATHLEN + 1]; if (idesc->id_type != DATA) { func = idesc->id_func; if (((n = (*func)(idesc)) & KEEPON) == 0) return (n); } else func = dirscan; bp = getdatablk(idesc->id_blkno, sblock.fs_bsize, type); if (bp->b_errs != 0) { brelse(bp); return (SKIP); } idesc->id_bp = bp; idesc->id_level--; for (sizepb = sblock.fs_bsize, i = 0; i < idesc->id_level; i++) sizepb *= NINDIR(&sblock); if (howmany(isize, sizepb) > NINDIR(&sblock)) nif = NINDIR(&sblock); else nif = howmany(isize, sizepb); if (idesc->id_func == pass1check && nif < NINDIR(&sblock)) { for (i = nif; i < NINDIR(&sblock); i++) { if (IBLK(bp, i) == 0) continue; (void)sprintf(buf, "PARTIALLY TRUNCATED INODE I=%lu", (u_long)idesc->id_number); if (preen) { pfatal("%s", buf); } else if (dofix(idesc, buf)) { IBLK_SET(bp, i, 0); dirty(bp); } } flush(fswritefd, bp); } for (i = 0; i < nif; i++) { if (IBLK(bp, i)) { idesc->id_blkno = IBLK(bp, i); bp->b_index = i; if (idesc->id_level == 0) { idesc->id_lbn++; n = (*func)(idesc); } else { n = iblock(idesc, isize, type - 1); idesc->id_level++; } if (n & STOP) { brelse(bp); return (n); } } else { idesc->id_lbn += sizepb / sblock.fs_bsize; if (idesc->id_type == DATA && isize > 0) { /* An empty block in a directory XXX */ getpathname(pathbuf, idesc->id_number, idesc->id_number); pfatal("DIRECTORY %s: CONTAINS EMPTY BLOCKS", pathbuf); if (reply("ADJUST LENGTH") == 1) { ginode(idesc->id_number, &ip); DIP_SET(ip.i_dp, di_size, DIP(ip.i_dp, di_size) - isize); isize = 0; printf( "YOU MUST RERUN FSCK AFTERWARDS\n"); rerun = 1; inodirty(&ip); brelse(bp); return(STOP); } } } isize -= sizepb; } brelse(bp); return (KEEPON); } /* * Finds the disk block address at the specified lbn within the inode * specified by dp. This follows the whole tree and honors di_size and * di_extsize so it is a true test of reachability. The lbn may be * negative if an extattr or indirect block is requested. */ ufs2_daddr_t ino_blkatoff(union dinode *dp, ino_t ino, ufs_lbn_t lbn, int *frags, struct bufarea **bpp) { ufs_lbn_t tmpval; ufs_lbn_t cur; ufs_lbn_t next; int i; *frags = 0; if (bpp != NULL) *bpp = NULL; /* * Handle extattr blocks first. */ if (lbn < 0 && lbn >= -UFS_NXADDR) { lbn = -1 - lbn; if (lbn > lblkno(&sblock, dp->dp2.di_extsize - 1)) return (0); *frags = numfrags(&sblock, sblksize(&sblock, dp->dp2.di_extsize, lbn)); return (dp->dp2.di_extb[lbn]); } /* * Now direct and indirect. */ if (DIP(dp, di_mode) == IFLNK && DIP(dp, di_size) < sblock.fs_maxsymlinklen) return (0); if (lbn >= 0 && lbn < UFS_NDADDR) { *frags = numfrags(&sblock, sblksize(&sblock, DIP(dp, di_size), lbn)); return (DIP(dp, di_db[lbn])); } *frags = sblock.fs_frag; for (i = 0, tmpval = NINDIR(&sblock), cur = UFS_NDADDR; i < UFS_NIADDR; i++, tmpval *= NINDIR(&sblock), cur = next) { next = cur + tmpval; if (lbn == -cur - i) return (DIP(dp, di_ib[i])); /* * Determine whether the lbn in question is within this tree. */ if (lbn < 0 && -lbn >= next) continue; if (lbn > 0 && lbn >= next) continue; if (DIP(dp, di_ib[i]) == 0) return (0); return (indir_blkatoff(DIP(dp, di_ib[i]), ino, -cur - i, lbn, bpp)); } pfatal("lbn %jd not in ino %ju\n", lbn, (uintmax_t)ino); return (0); } /* * Fetch an indirect block to find the block at a given lbn. The lbn * may be negative to fetch a specific indirect block pointer or positive * to fetch a specific block. */ static ufs2_daddr_t indir_blkatoff(ufs2_daddr_t blk, ino_t ino, ufs_lbn_t cur, ufs_lbn_t lbn, struct bufarea **bpp) { struct bufarea *bp; ufs_lbn_t lbnadd; ufs_lbn_t base; int i, level; level = lbn_level(cur); if (level == -1) pfatal("Invalid indir lbn %jd in ino %ju\n", lbn, (uintmax_t)ino); if (level == 0 && lbn < 0) pfatal("Invalid lbn %jd in ino %ju\n", lbn, (uintmax_t)ino); lbnadd = 1; base = -(cur + level); for (i = level; i > 0; i--) lbnadd *= NINDIR(&sblock); if (lbn > 0) i = (lbn - base) / lbnadd; else i = (-lbn - base) / lbnadd; if (i < 0 || i >= NINDIR(&sblock)) { pfatal("Invalid indirect index %d produced by lbn %jd " "in ino %ju\n", i, lbn, (uintmax_t)ino); return (0); } if (level == 0) cur = base + (i * lbnadd); else cur = -(base + (i * lbnadd)) - (level - 1); bp = getdatablk(blk, sblock.fs_bsize, BT_LEVEL1 + level); if (bp->b_errs != 0) return (0); blk = IBLK(bp, i); bp->b_index = i; if (cur == lbn || blk == 0) { if (bpp != NULL) *bpp = bp; else brelse(bp); return (blk); } brelse(bp); if (level == 0) pfatal("Invalid lbn %jd at level 0 for ino %ju\n", lbn, (uintmax_t)ino); return (indir_blkatoff(blk, ino, cur, lbn, bpp)); } /* * Check that a block in a legal block number. * Return 0 if in range, 1 if out of range. */ int chkrange(ufs2_daddr_t blk, int cnt) { int c; if (cnt <= 0 || blk <= 0 || blk >= maxfsblock || cnt > maxfsblock - blk) { if (debug) printf("out of range: blk %ld, offset %i, size %d\n", (long)blk, (int)fragnum(&sblock, blk), cnt); return (1); } if (cnt > sblock.fs_frag || fragnum(&sblock, blk) + cnt > sblock.fs_frag) { if (debug) printf("bad size: blk %ld, offset %i, size %d\n", (long)blk, (int)fragnum(&sblock, blk), cnt); return (1); } c = dtog(&sblock, blk); if (blk < cgdmin(&sblock, c)) { if ((blk + cnt) > cgsblock(&sblock, c)) { if (debug) { printf("blk %ld < cgdmin %ld;", (long)blk, (long)cgdmin(&sblock, c)); printf(" blk + cnt %ld > cgsbase %ld\n", (long)(blk + cnt), (long)cgsblock(&sblock, c)); } return (1); } } else { if ((blk + cnt) > cgbase(&sblock, c+1)) { if (debug) { printf("blk %ld >= cgdmin %ld;", (long)blk, (long)cgdmin(&sblock, c)); printf(" blk + cnt %ld > sblock.fs_fpg %ld\n", (long)(blk + cnt), (long)sblock.fs_fpg); } return (1); } } return (0); } /* * General purpose interface for reading inodes. * * firstinum and lastinum track contents of getnextino() cache (below). */ static ino_t firstinum, lastinum; static struct bufarea inobuf; void ginode(ino_t inumber, struct inode *ip) { ufs2_daddr_t iblk; + union dinodep dpp; struct ufs2_dinode *dp; if (inumber < UFS_ROOTINO || inumber >= maxino) errx(EEXIT, "bad inode number %ju to ginode", (uintmax_t)inumber); ip->i_number = inumber; if (inumber >= firstinum && inumber < lastinum) { /* contents in getnextino() cache */ ip->i_bp = &inobuf; inobuf.b_refcnt++; inobuf.b_index = firstinum; } else if (icachebp != NULL && inumber >= icachebp->b_index && inumber < icachebp->b_index + INOPB(&sblock)) { /* take an additional reference for the returned inode */ icachebp->b_refcnt++; ip->i_bp = icachebp; } else { iblk = ino_to_fsba(&sblock, inumber); /* release our cache-hold reference on old icachebp */ if (icachebp != NULL) brelse(icachebp); icachebp = getdatablk(iblk, sblock.fs_bsize, BT_INODES); if (icachebp->b_errs != 0) { icachebp = NULL; ip->i_bp = NULL; ip->i_dp = &zino; return; } /* take a cache-hold reference on new icachebp */ icachebp->b_refcnt++; icachebp->b_index = rounddown(inumber, INOPB(&sblock)); ip->i_bp = icachebp; } if (sblock.fs_magic == FS_UFS1_MAGIC) { ip->i_dp = (union dinode *) &ip->i_bp->b_un.b_dinode1[inumber - ip->i_bp->b_index]; + dpp.dp1 = (struct ufs1_dinode *)ip->i_dp; + if (ffs_oldfscompat_inode_read(&sblock, dpp, now)) + inodirty(ip); return; } ip->i_dp = (union dinode *) &ip->i_bp->b_un.b_dinode2[inumber - ip->i_bp->b_index]; + dpp.dp2 = dp = (struct ufs2_dinode *)ip->i_dp; /* Do not check hash of inodes being created */ if (dp->di_mode != 0 && ffs_verify_dinode_ckhash(&sblock, dp)) { pwarn("INODE CHECK-HASH FAILED"); prtinode(ip); if (preen || reply("FIX") != 0) { if (preen) printf(" (FIXED)\n"); ffs_update_dinode_ckhash(&sblock, dp); inodirty(ip); } } + if (ffs_oldfscompat_inode_read(&sblock, dpp, now)) + inodirty(ip); } /* * Release a held inode. */ void irelse(struct inode *ip) { /* Check for failed inode read */ if (ip->i_bp == NULL) return; if (debug && sblock.fs_magic == FS_UFS2_MAGIC && ffs_verify_dinode_ckhash(&sblock, (struct ufs2_dinode *)ip->i_dp)) { pwarn("irelse: releasing inode with bad check-hash"); prtinode(ip); } if (ip->i_bp->b_refcnt <= 0) pfatal("irelse: releasing unreferenced ino %ju\n", (uintmax_t) ip->i_number); brelse(ip->i_bp); } /* * Special purpose version of ginode used to optimize first pass * over all the inodes in numerical order. */ static ino_t nextinum, lastvalidinum; static long readcount, readpercg, fullcnt, inobufsize, partialcnt, partialsize; union dinode * getnextinode(ino_t inumber, int rebuiltcg) { int j; long size; mode_t mode; ufs2_daddr_t ndb, blk; union dinode *dp; + union dinodep dpp; struct inode ip; static caddr_t nextinop; if (inumber != nextinum++ || inumber > lastvalidinum) errx(EEXIT, "bad inode number %ju to nextinode", (uintmax_t)inumber); if (inumber >= lastinum) { readcount++; firstinum = lastinum; blk = ino_to_fsba(&sblock, lastinum); if (readcount % readpercg == 0) { size = partialsize; lastinum += partialcnt; } else { size = inobufsize; lastinum += fullcnt; } /* * Flush old contents in case they have been updated. * If getblk encounters an error, it will already have zeroed * out the buffer, so we do not need to do so here. */ if (inobuf.b_refcnt != 0) pfatal("Non-zero getnextinode() ref count %d\n", inobuf.b_refcnt); flush(fswritefd, &inobuf); getblk(&inobuf, blk, size); nextinop = inobuf.b_un.b_buf; } dp = (union dinode *)nextinop; if (sblock.fs_magic == FS_UFS1_MAGIC) { nextinop += sizeof(struct ufs1_dinode); + dpp.dp1 = (struct ufs1_dinode *)dp; } else { nextinop += sizeof(struct ufs2_dinode); + dpp.dp2 = (struct ufs2_dinode *)dp; } if ((ckhashadd & CK_INODE) != 0) { ffs_update_dinode_ckhash(&sblock, (struct ufs2_dinode *)dp); dirty(&inobuf); } if (ffs_verify_dinode_ckhash(&sblock, (struct ufs2_dinode *)dp) != 0) { pwarn("INODE CHECK-HASH FAILED"); ip.i_bp = NULL; ip.i_dp = dp; ip.i_number = inumber; prtinode(&ip); if (preen || reply("FIX") != 0) { if (preen) printf(" (FIXED)\n"); ffs_update_dinode_ckhash(&sblock, (struct ufs2_dinode *)dp); dirty(&inobuf); } } + if (ffs_oldfscompat_inode_read(&sblock, dpp, now)) + dirty(&inobuf); if (rebuiltcg && (char *)dp == inobuf.b_un.b_buf) { /* * Try to determine if we have reached the end of the * allocated inodes. */ mode = DIP(dp, di_mode) & IFMT; if (mode == 0) { if (memcmp(dp->dp2.di_db, zino.dp2.di_db, UFS_NDADDR * sizeof(ufs2_daddr_t)) || memcmp(dp->dp2.di_ib, zino.dp2.di_ib, UFS_NIADDR * sizeof(ufs2_daddr_t)) || dp->dp2.di_mode || dp->dp2.di_size) return (NULL); return (dp); } if (!ftypeok(dp)) return (NULL); ndb = howmany(DIP(dp, di_size), sblock.fs_bsize); if (ndb < 0) return (NULL); if (mode == IFBLK || mode == IFCHR) ndb++; if (mode == IFLNK) { /* * Fake ndb value so direct/indirect block checks below * will detect any garbage after symlink string. */ if (DIP(dp, di_size) < (off_t)sblock.fs_maxsymlinklen) { ndb = howmany(DIP(dp, di_size), sizeof(ufs2_daddr_t)); if (ndb > UFS_NDADDR) { j = ndb - UFS_NDADDR; for (ndb = 1; j > 1; j--) ndb *= NINDIR(&sblock); ndb += UFS_NDADDR; } } } for (j = ndb; ndb < UFS_NDADDR && j < UFS_NDADDR; j++) if (DIP(dp, di_db[j]) != 0) return (NULL); for (j = 0, ndb -= UFS_NDADDR; ndb > 0; j++) ndb /= NINDIR(&sblock); for (; j < UFS_NIADDR; j++) if (DIP(dp, di_ib[j]) != 0) return (NULL); } return (dp); } void setinodebuf(int cg, ino_t inosused) { + struct timespec time; ino_t inum; + /* + * Get the current value of the present time. + * This will happen before each cylinder group is scanned. + * If for some reason getting the time fails, we will use + * the last time that the superblock was updated. + */ + if (clock_gettime(CLOCK_REALTIME_FAST, &time) == 0) + now = time.tv_sec; + else + now = sblock.fs_time; inum = cg * sblock.fs_ipg; lastvalidinum = inum + inosused - 1; nextinum = inum; lastinum = inum; readcount = 0; /* Flush old contents in case they have been updated */ flush(fswritefd, &inobuf); inobuf.b_bno = 0; if (inobuf.b_un.b_buf == NULL) { inobufsize = blkroundup(&sblock, MAX(INOBUFSIZE, sblock.fs_bsize)); initbarea(&inobuf, BT_INODES); if ((inobuf.b_un.b_buf = Balloc((unsigned)inobufsize)) == NULL) errx(EEXIT, "cannot allocate space for inode buffer"); } fullcnt = inobufsize / ((sblock.fs_magic == FS_UFS1_MAGIC) ? sizeof(struct ufs1_dinode) : sizeof(struct ufs2_dinode)); readpercg = inosused / fullcnt; partialcnt = inosused % fullcnt; partialsize = fragroundup(&sblock, partialcnt * ((sblock.fs_magic == FS_UFS1_MAGIC) ? sizeof(struct ufs1_dinode) : sizeof(struct ufs2_dinode))); if (partialcnt != 0) { readpercg++; } else { partialcnt = fullcnt; partialsize = inobufsize; } } int freeblock(struct inodesc *idesc) { struct dups *dlp; struct bufarea *cgbp; struct cg *cgp; ufs2_daddr_t blkno; long size, nfrags; blkno = idesc->id_blkno; if (idesc->id_type == SNAP) { pfatal("clearing a snapshot dinode\n"); return (STOP); } size = lfragtosize(&sblock, idesc->id_numfrags); if (snapblkfree(&sblock, blkno, size, idesc->id_number, std_checkblkavail)) return (KEEPON); for (nfrags = idesc->id_numfrags; nfrags > 0; blkno++, nfrags--) { if (chkrange(blkno, 1)) { return (SKIP); } else if (testbmap(blkno)) { for (dlp = duplist; dlp; dlp = dlp->next) { if (dlp->dup != blkno) continue; dlp->dup = duplist->dup; dlp = duplist; duplist = duplist->next; free((char *)dlp); break; } if (dlp == NULL) { clrbmap(blkno); n_blks--; } } } /* * If all successfully returned, account for them. */ if (nfrags == 0) { cgbp = cglookup(dtog(&sblock, idesc->id_blkno)); cgp = cgbp->b_un.b_cg; if (idesc->id_numfrags == sblock.fs_frag) cgp->cg_cs.cs_nbfree++; else cgp->cg_cs.cs_nffree += idesc->id_numfrags; cgdirty(cgbp); } return (KEEPON); } /* * Prepare a snapshot file for being removed. */ void snapremove(ino_t inum) { struct inodesc idesc; struct inode ip; int i; for (i = 0; i < snapcnt; i++) if (snaplist[i].i_number == inum) break; if (i == snapcnt) ginode(inum, &ip); else ip = snaplist[i]; if ((DIP(ip.i_dp, di_flags) & SF_SNAPSHOT) == 0) { printf("snapremove: inode %jd is not a snapshot\n", (intmax_t)inum); if (i == snapcnt) irelse(&ip); return; } if (debug) printf("snapremove: remove %sactive snapshot %jd\n", i == snapcnt ? "in" : "", (intmax_t)inum); /* * If on active snapshot list, remove it. */ if (i < snapcnt) { for (i++; i < FSMAXSNAP; i++) { if (sblock.fs_snapinum[i] == 0) break; snaplist[i - 1] = snaplist[i]; sblock.fs_snapinum[i - 1] = sblock.fs_snapinum[i]; } sblock.fs_snapinum[i - 1] = 0; bzero(&snaplist[i - 1], sizeof(struct inode)); snapcnt--; } memset(&idesc, 0, sizeof(struct inodesc)); idesc.id_type = SNAP; idesc.id_func = snapclean; idesc.id_number = inum; (void)ckinode(ip.i_dp, &idesc); DIP_SET(ip.i_dp, di_flags, DIP(ip.i_dp, di_flags) & ~SF_SNAPSHOT); inodirty(&ip); irelse(&ip); } static int snapclean(struct inodesc *idesc) { ufs2_daddr_t blkno; struct bufarea *bp; union dinode *dp; blkno = idesc->id_blkno; if (blkno == 0) return (KEEPON); dp = idesc->id_dp; if (blkno == BLK_NOCOPY || blkno == BLK_SNAP) { if (idesc->id_lbn < UFS_NDADDR) { DIP_SET(dp, di_db[idesc->id_lbn], 0); } else { bp = idesc->id_bp; IBLK_SET(bp, bp->b_index, 0); dirty(bp); } } return (KEEPON); } /* * Notification that a block is being freed. Return zero if the free * should be allowed to proceed. Return non-zero if the snapshot file * wants to claim the block. The block will be claimed if it is an * uncopied part of one of the snapshots. It will be freed if it is * either a BLK_NOCOPY or has already been copied in all of the snapshots. * If a fragment is being freed, then all snapshots that care about * it must make a copy since a snapshot file can only claim full sized * blocks. Note that if more than one snapshot file maps the block, * we can pick one at random to claim it. Since none of the snapshots * can change, we are assurred that they will all see the same unmodified * image. When deleting a snapshot file (see ino_trunc above), we * must push any of these claimed blocks to one of the other snapshots * that maps it. These claimed blocks are easily identified as they will * have a block number equal to their logical block number within the * snapshot. A copied block can never have this property because they * must always have been allocated from a BLK_NOCOPY location. */ int snapblkfree(struct fs *fs, ufs2_daddr_t bno, long size, ino_t inum, ufs2_daddr_t (*checkblkavail)(ufs2_daddr_t blkno, long frags)) { union dinode *dp; struct inode ip; struct bufarea *snapbp; ufs_lbn_t lbn; ufs2_daddr_t blkno, relblkno; int i, frags, claimedblk, copydone; /* If no snapshots, nothing to do */ if (snapcnt == 0) return (0); if (debug) printf("snapblkfree: in ino %jd free blkno %jd, size %jd\n", (intmax_t)inum, (intmax_t)bno, (intmax_t)size); relblkno = blknum(fs, bno); lbn = fragstoblks(fs, relblkno); /* Direct blocks are always pre-copied */ if (lbn < UFS_NDADDR) return (0); copydone = 0; claimedblk = 0; for (i = 0; i < snapcnt; i++) { /* * Lookup block being freed. */ ip = snaplist[i]; dp = ip.i_dp; blkno = ino_blkatoff(dp, inum != 0 ? inum : ip.i_number, lbn, &frags, &snapbp); /* * Check to see if block needs to be copied. */ if (blkno == 0) { /* * A block that we map is being freed. If it has not * been claimed yet, we will claim or copy it (below). */ claimedblk = 1; } else if (blkno == BLK_SNAP) { /* * No previous snapshot claimed the block, * so it will be freed and become a BLK_NOCOPY * (don't care) for us. */ if (claimedblk) pfatal("snapblkfree: inconsistent block type"); IBLK_SET(snapbp, snapbp->b_index, BLK_NOCOPY); dirty(snapbp); brelse(snapbp); continue; } else /* BLK_NOCOPY or default */ { /* * If the snapshot has already copied the block * (default), or does not care about the block, * it is not needed. */ brelse(snapbp); continue; } /* * If this is a full size block, we will just grab it * and assign it to the snapshot inode. Otherwise we * will proceed to copy it. See explanation for this * routine as to why only a single snapshot needs to * claim this block. */ if (size == fs->fs_bsize) { if (debug) printf("Grabonremove snapshot %ju lbn %jd " "from inum %ju\n", (intmax_t)ip.i_number, (intmax_t)lbn, (uintmax_t)inum); IBLK_SET(snapbp, snapbp->b_index, relblkno); dirty(snapbp); brelse(snapbp); DIP_SET(dp, di_blocks, DIP(dp, di_blocks) + btodb(size)); inodirty(&ip); return (1); } /* First time through, read the contents of the old block. */ if (copydone == 0) { copydone = 1; if (blread(fsreadfd, copybuf, fsbtodb(fs, relblkno), fs->fs_bsize) != 0) { pfatal("Could not read snapshot %ju block " "%jd\n", (intmax_t)ip.i_number, (intmax_t)relblkno); continue; } } /* * This allocation will never require any additional * allocations for the snapshot inode. */ blkno = allocblk(dtog(fs, relblkno), fs->fs_frag, checkblkavail); if (blkno == 0) { pfatal("Could not allocate block for snapshot %ju\n", (intmax_t)ip.i_number); continue; } if (debug) printf("Copyonremove: snapino %jd lbn %jd for inum %ju " "size %ld new blkno %jd\n", (intmax_t)ip.i_number, (intmax_t)lbn, (uintmax_t)inum, size, (intmax_t)blkno); blwrite(fswritefd, copybuf, fsbtodb(fs, blkno), fs->fs_bsize); IBLK_SET(snapbp, snapbp->b_index, blkno); dirty(snapbp); brelse(snapbp); DIP_SET(dp, di_blocks, DIP(dp, di_blocks) + btodb(fs->fs_bsize)); inodirty(&ip); } return (0); } /* * Notification that a block is being written. Return if the block * is part of a snapshot as snapshots never track other snapshots. * The block will be copied in all of the snapshots that are tracking * it and have not yet copied it. Some buffers may hold more than one * block. Here we need to check each block in the buffer. */ void copyonwrite(struct fs *fs, struct bufarea *bp, ufs2_daddr_t (*checkblkavail)(ufs2_daddr_t blkno, long frags)) { ufs2_daddr_t copyblkno; long i, numblks; /* If no snapshots, nothing to do. */ if (snapcnt == 0) return; numblks = blkroundup(fs, bp->b_size) / fs->fs_bsize; if (debug) prtbuf(bp, "copyonwrite: checking %jd block%s in buffer", (intmax_t)numblks, numblks > 1 ? "s" : ""); copyblkno = blknum(fs, dbtofsb(fs, bp->b_bno)); for (i = 0; i < numblks; i++) { chkcopyonwrite(fs, copyblkno, checkblkavail); copyblkno += fs->fs_frag; } } static void chkcopyonwrite(struct fs *fs, ufs2_daddr_t copyblkno, ufs2_daddr_t (*checkblkavail)(ufs2_daddr_t blkno, long frags)) { struct inode ip; union dinode *dp; struct bufarea *snapbp; ufs2_daddr_t blkno; int i, frags, copydone; ufs_lbn_t lbn; lbn = fragstoblks(fs, copyblkno); /* Direct blocks are always pre-copied */ if (lbn < UFS_NDADDR) return; copydone = 0; for (i = 0; i < snapcnt; i++) { /* * Lookup block being freed. */ ip = snaplist[i]; dp = ip.i_dp; blkno = ino_blkatoff(dp, ip.i_number, lbn, &frags, &snapbp); /* * Check to see if block needs to be copied. */ if (blkno != 0) { /* * A block that we have already copied or don't track. */ brelse(snapbp); continue; } /* First time through, read the contents of the old block. */ if (copydone == 0) { copydone = 1; if (blread(fsreadfd, copybuf, fsbtodb(fs, copyblkno), fs->fs_bsize) != 0) { pfatal("Could not read snapshot %ju block " "%jd\n", (intmax_t)ip.i_number, (intmax_t)copyblkno); continue; } } /* * This allocation will never require any additional * allocations for the snapshot inode. */ if ((blkno = allocblk(dtog(fs, copyblkno), fs->fs_frag, checkblkavail)) == 0) { pfatal("Could not allocate block for snapshot %ju\n", (intmax_t)ip.i_number); continue; } if (debug) prtbuf(snapbp, "Copyonwrite: snapino %jd lbn %jd using " "blkno %ju setting in buffer", (intmax_t)ip.i_number, (intmax_t)lbn, (intmax_t)blkno); blwrite(fswritefd, copybuf, fsbtodb(fs, blkno), fs->fs_bsize); IBLK_SET(snapbp, snapbp->b_index, blkno); dirty(snapbp); brelse(snapbp); DIP_SET(dp, di_blocks, DIP(dp, di_blocks) + btodb(fs->fs_bsize)); inodirty(&ip); } return; } /* * Traverse an inode and check that its block count is correct * fixing it if necessary. */ void check_blkcnt(struct inode *ip) { struct inodesc idesc; union dinode *dp; ufs2_daddr_t ndb; int j, ret, offset; dp = ip->i_dp; memset(&idesc, 0, sizeof(struct inodesc)); idesc.id_func = pass1check; idesc.id_number = ip->i_number; idesc.id_type = (DIP(dp, di_flags) & SF_SNAPSHOT) == 0 ? ADDR : SNAP; (void)ckinode(dp, &idesc); if (sblock.fs_magic == FS_UFS2_MAGIC && dp->dp2.di_extsize > 0) { ndb = howmany(dp->dp2.di_extsize, sblock.fs_bsize); for (j = 0; j < UFS_NXADDR; j++) { if (--ndb == 0 && (offset = blkoff(&sblock, dp->dp2.di_extsize)) != 0) idesc.id_numfrags = numfrags(&sblock, fragroundup(&sblock, offset)); else idesc.id_numfrags = sblock.fs_frag; if (dp->dp2.di_extb[j] == 0) continue; idesc.id_blkno = dp->dp2.di_extb[j]; ret = (*idesc.id_func)(&idesc); if (ret & STOP) break; } } idesc.id_entryno *= btodb(sblock.fs_fsize); if (DIP(dp, di_blocks) != idesc.id_entryno) { if (!(sujrecovery && preen)) { pwarn("INCORRECT BLOCK COUNT I=%lu (%ju should be %ju)", (u_long)idesc.id_number, (uintmax_t)DIP(dp, di_blocks), (uintmax_t)idesc.id_entryno); if (preen) printf(" (CORRECTED)\n"); else if (reply("CORRECT") == 0) return; } if (bkgrdflag == 0) { DIP_SET(dp, di_blocks, idesc.id_entryno); inodirty(ip); } else { cmd.value = idesc.id_number; cmd.size = idesc.id_entryno - DIP(dp, di_blocks); if (debug) printf("adjblkcnt ino %ju amount %lld\n", (uintmax_t)cmd.value, (long long)cmd.size); if (sysctl(adjblkcnt, MIBSIZE, 0, 0, &cmd, sizeof cmd) == -1) rwerror("ADJUST INODE BLOCK COUNT", cmd.value); } } } void freeinodebuf(void) { struct bufarea *bp; int i; /* * Flush old contents in case they have been updated. */ flush(fswritefd, &inobuf); if (inobuf.b_un.b_buf != NULL) free((char *)inobuf.b_un.b_buf); inobuf.b_un.b_buf = NULL; firstinum = lastinum = 0; /* * Reload the snapshot inodes in case any of them changed. */ for (i = 0; i < snapcnt; i++) { bp = snaplist[i].i_bp; bp->b_errs = blread(fsreadfd, bp->b_un.b_buf, bp->b_bno, bp->b_size); } } /* * Routines to maintain information about directory inodes. * This is built during the first pass and used during the * second and third passes. * * Enter inodes into the cache. */ struct inoinfo * cacheino(union dinode *dp, ino_t inumber) { struct inoinfo *inp; int i, blks; if (getinoinfo(inumber) != NULL) pfatal("cacheino: duplicate entry for ino %jd\n", (intmax_t)inumber); if (howmany(DIP(dp, di_size), sblock.fs_bsize) > UFS_NDADDR) blks = UFS_NDADDR + UFS_NIADDR; else if (DIP(dp, di_size) > 0) blks = howmany(DIP(dp, di_size), sblock.fs_bsize); else blks = 1; inp = (struct inoinfo *) Malloc(sizeof(*inp) + (blks - 1) * sizeof(ufs2_daddr_t)); if (inp == NULL) errx(EEXIT, "cannot increase directory list"); SLIST_INSERT_HEAD(&inphash[inumber % dirhash], inp, i_hash); inp->i_flags = 0; inp->i_parent = inumber == UFS_ROOTINO ? UFS_ROOTINO : (ino_t)0; inp->i_dotdot = (ino_t)0; inp->i_number = inumber; inp->i_isize = DIP(dp, di_size); inp->i_depth = DIP(dp, di_dirdepth); inp->i_numblks = blks; for (i = 0; i < MIN(blks, UFS_NDADDR); i++) inp->i_blks[i] = DIP(dp, di_db[i]); if (blks > UFS_NDADDR) for (i = 0; i < UFS_NIADDR; i++) inp->i_blks[UFS_NDADDR + i] = DIP(dp, di_ib[i]); if (inplast == listmax) { listmax += 100; inpsort = (struct inoinfo **)reallocarray((char *)inpsort, listmax, sizeof(struct inoinfo *)); if (inpsort == NULL) errx(EEXIT, "cannot increase directory list"); } inpsort[inplast++] = inp; return (inp); } /* * Look up an inode cache structure. */ struct inoinfo * getinoinfo(ino_t inumber) { struct inoinfo *inp; SLIST_FOREACH(inp, &inphash[inumber % dirhash], i_hash) { if (inp->i_number != inumber) continue; return (inp); } return (NULL); } /* * Remove an entry from the inode cache and disk-order sorted list. * Return 0 on success and 1 on failure. */ int removecachedino(ino_t inumber) { struct inoinfo *inp, **inpp; char *listtype; listtype = "hash"; SLIST_FOREACH(inp, &inphash[inumber % dirhash], i_hash) { if (inp->i_number != inumber) continue; SLIST_REMOVE(&inphash[inumber % dirhash], inp, inoinfo, i_hash); for (inpp = &inpsort[inplast - 1]; inpp >= inpsort; inpp--) { if (*inpp != inp) continue; *inpp = inpsort[inplast - 1]; inplast--; free(inp); return (0); } listtype = "sort"; break; } pfatal("removecachedino: entry for ino %jd not found on %s list\n", (intmax_t)inumber, listtype); return (1); } /* * Clean up all the inode cache structure. */ void inocleanup(void) { struct inoinfo **inpp; if (inphash == NULL) return; for (inpp = &inpsort[inplast - 1]; inpp >= inpsort; inpp--) free((char *)(*inpp)); free((char *)inphash); inphash = NULL; free((char *)inpsort); inpsort = NULL; } void inodirty(struct inode *ip) { if (sblock.fs_magic == FS_UFS2_MAGIC) ffs_update_dinode_ckhash(&sblock, (struct ufs2_dinode *)ip->i_dp); dirty(ip->i_bp); } void clri(struct inodesc *idesc, const char *type, int flag) { union dinode *dp; struct inode ip; ginode(idesc->id_number, &ip); dp = ip.i_dp; if (flag == 1) { pwarn("%s %s", type, (DIP(dp, di_mode) & IFMT) == IFDIR ? "DIR" : "FILE"); prtinode(&ip); printf("\n"); } if (preen || reply("CLEAR") == 1) { if (preen) printf(" (CLEARED)\n"); n_files--; if (bkgrdflag == 0) { if (idesc->id_type == SNAP) { snapremove(idesc->id_number); idesc->id_type = ADDR; } (void)ckinode(dp, idesc); inoinfo(idesc->id_number)->ino_state = USTATE; clearinode(dp); inodirty(&ip); } else { cmd.value = idesc->id_number; cmd.size = -DIP(dp, di_nlink); if (debug) printf("adjrefcnt ino %ld amt %lld\n", (long)cmd.value, (long long)cmd.size); if (sysctl(adjrefcnt, MIBSIZE, 0, 0, &cmd, sizeof cmd) == -1) rwerror("ADJUST INODE", cmd.value); } } irelse(&ip); } int findname(struct inodesc *idesc) { struct direct *dirp = idesc->id_dirp; if (dirp->d_ino != idesc->id_parent || idesc->id_entryno < 2) { idesc->id_entryno++; return (KEEPON); } memmove(idesc->id_name, dirp->d_name, (size_t)dirp->d_namlen + 1); return (STOP|FOUND); } int findino(struct inodesc *idesc) { struct direct *dirp = idesc->id_dirp; if (dirp->d_ino == 0) return (KEEPON); if (strcmp(dirp->d_name, idesc->id_name) == 0 && dirp->d_ino >= UFS_ROOTINO && dirp->d_ino < maxino) { idesc->id_parent = dirp->d_ino; return (STOP|FOUND); } return (KEEPON); } int clearentry(struct inodesc *idesc) { struct direct *dirp = idesc->id_dirp; if (dirp->d_ino != idesc->id_parent || idesc->id_entryno < 2) { idesc->id_entryno++; return (KEEPON); } dirp->d_ino = 0; return (STOP|FOUND|ALTERED); } void prtinode(struct inode *ip) { char *p; union dinode *dp; struct passwd *pw; time_t t; dp = ip->i_dp; printf(" I=%lu ", (u_long)ip->i_number); if (ip->i_number < UFS_ROOTINO || ip->i_number >= maxino) return; printf(" OWNER="); if ((pw = getpwuid((int)DIP(dp, di_uid))) != NULL) printf("%s ", pw->pw_name); else printf("%u ", (unsigned)DIP(dp, di_uid)); printf("MODE=%o\n", DIP(dp, di_mode)); if (preen) printf("%s: ", cdevname); printf("SIZE=%ju ", (uintmax_t)DIP(dp, di_size)); t = DIP(dp, di_mtime); if ((p = ctime(&t)) != NULL) printf("MTIME=%12.12s %4.4s ", &p[4], &p[20]); } void blkerror(ino_t ino, const char *type, ufs2_daddr_t blk) { pfatal("%jd %s I=%ju", (intmax_t)blk, type, (uintmax_t)ino); printf("\n"); switch (inoinfo(ino)->ino_state) { case FSTATE: case FZLINK: inoinfo(ino)->ino_state = FCLEAR; return; case DSTATE: case DZLINK: inoinfo(ino)->ino_state = DCLEAR; return; case FCLEAR: case DCLEAR: return; default: errx(EEXIT, "BAD STATE %d TO BLKERR", inoinfo(ino)->ino_state); /* NOTREACHED */ } } /* * allocate an unused inode */ ino_t allocino(ino_t request, int type) { ino_t ino; struct inode ip; union dinode *dp; struct bufarea *cgbp; struct cg *cgp; int cg, anyino; anyino = 0; if (request == 0) { request = UFS_ROOTINO; anyino = 1; } else if (inoinfo(request)->ino_state != USTATE) return (0); retry: for (ino = request; ino < maxino; ino++) if (inoinfo(ino)->ino_state == USTATE) break; if (ino >= maxino) return (0); cg = ino_to_cg(&sblock, ino); cgbp = cglookup(cg); cgp = cgbp->b_un.b_cg; if (!check_cgmagic(cg, cgbp)) { if (anyino == 0) return (0); request = (cg + 1) * sblock.fs_ipg; goto retry; } setbit(cg_inosused(cgp), ino % sblock.fs_ipg); cgp->cg_cs.cs_nifree--; switch (type & IFMT) { case IFDIR: inoinfo(ino)->ino_state = DSTATE; cgp->cg_cs.cs_ndir++; break; case IFREG: case IFLNK: inoinfo(ino)->ino_state = FSTATE; break; default: return (0); } cgdirty(cgbp); ginode(ino, &ip); dp = ip.i_dp; memset(dp, 0, ((sblock.fs_magic == FS_UFS1_MAGIC) ? sizeof(struct ufs1_dinode) : sizeof(struct ufs2_dinode))); DIP_SET(dp, di_db[0], allocblk(ino_to_cg(&sblock, ino), (long)1, std_checkblkavail)); if (DIP(dp, di_db[0]) == 0) { inoinfo(ino)->ino_state = USTATE; inodirty(&ip); irelse(&ip); return (0); } DIP_SET(dp, di_mode, type); DIP_SET(dp, di_atime, time(NULL)); DIP_SET(dp, di_ctime, DIP(dp, di_atime)); DIP_SET(dp, di_mtime, DIP(dp, di_ctime)); DIP_SET(dp, di_size, sblock.fs_fsize); DIP_SET(dp, di_blocks, btodb(sblock.fs_fsize)); n_files++; inodirty(&ip); irelse(&ip); inoinfo(ino)->ino_type = IFTODT(type); return (ino); } /* * deallocate an inode */ void freeino(ino_t ino) { struct inodesc idesc; union dinode *dp; struct inode ip; memset(&idesc, 0, sizeof(struct inodesc)); idesc.id_type = ADDR; idesc.id_func = freeblock; idesc.id_number = ino; ginode(ino, &ip); dp = ip.i_dp; (void)ckinode(dp, &idesc); clearinode(dp); inodirty(&ip); irelse(&ip); inoinfo(ino)->ino_state = USTATE; n_files--; } diff --git a/share/man/man4/ffs.4 b/share/man/man4/ffs.4 index fa0cf9be1510..8f18dacffcc4 100644 --- a/share/man/man4/ffs.4 +++ b/share/man/man4/ffs.4 @@ -1,328 +1,332 @@ .\" Copyright (c) 2001 Networks Associates Technology, Inc. .\" All rights reserved. .\" .\" This software was developed for the FreeBSD Project by Chris .\" Costello at Safeport Network Services and NAI Labs, the Security .\" Research Division of Network Associates, Inc. under DARPA/SPAWAR .\" contract N66001-01-C-8035 ("CBOSS"), as part of the DARPA CHATS .\" research program. .\" .\" Redistribution and use in source and binary forms, with or without .\" modification, are permitted provided that the following conditions .\" are met: .\" 1. Redistributions of source code must retain the above copyright .\" notice, this list of conditions and the following disclaimer. .\" 2. Redistributions in binary form must reproduce the above copyright .\" notice, this list of conditions and the following disclaimer in the .\" documentation and/or other materials provided with the distribution. .\" .\" THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND .\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE .\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE .\" ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE .\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL .\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS .\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) .\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT .\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY .\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF .\" SUCH DAMAGE. .\" .Dd May 3, 2020 .Dt FFS 4 .Os .Sh NAME .Nm ffs .Nd Berkeley fast file system .Sh SYNOPSIS In the kernel configuration file: .Cd "options FFS" .Cd "options QUOTA" .Cd "options SOFTUPDATES" .Cd "options SUIDDIR" .Cd "options UFS_ACL" .Cd "options UFS_DIRHASH" .Cd "options UFS_EXTATTR" .Cd "options UFS_EXTATTR_AUTOSTART" .Cd "options UFS_GJOURNAL" .Pp In .Xr fstab 5 : .Bd -literal -compact /dev/disk0a /mnt ufs rw 1 1 .Ed .Sh DESCRIPTION The Berkeley fast file system provides facilities to store file system data onto a disk device. .Nm has been optimized over the years for speed and reliability and is the default .Fx file system. .Ss Quotas .Bl -tag -width 2n .It Cd "options QUOTA" This option allows system administrators to set limits on disk usage on a per-user basis. Quotas can be used only on file systems mounted with the .Cm quota option; see .Xr quota 1 and .Xr edquota 8 . .El .Ss Soft Updates .Bl -tag -width 2n .It Cd "options SOFTUPDATES" The soft updates feature tracks writes to the disk and enforces metadata update dependencies (e.g., updating free block maps) to ensure that the file system remains consistent. .Pp To create a new file system with the soft updates enabled, use .Xr newfs 8 command: .Pp .D1 Nm newfs Fl U Ar fs .Pp .Ar fs can be either a mount point listed in .Xr fstab 5 .Pq e.g. , Pa /usr , or a disk device .Pq e.g., Pa /dev/da0a . .Pp It is possible to enable soft updates on an .Em unmounted file system by using .Xr tunefs 8 command: .Pp .D1 Nm tunefs Fl n Cm enable Ar fs .Pp Soft updates can also add journaling that reduces the time spent by .Xr fsck_ffs 8 cleaning up a filesystem after a crash from several minutes to a few seconds. The journal is placed in an inode named .Pa .sujournal , and is kept as a circular log of segments containing records that describe metadata operations. .Pp To create a new file system with both the soft updates and soft updates journaling enabled, use the following command: .Pp .D1 Nm newfs Fl j Ar fs .Pp This runs .Xr tunefs 8 command after .Xr newfs 8 command with .Fl U flag enabled. It is possible to enable soft updates journaling on an .Em unmounted file system by using .Xr tunefs 8 command: .Pp .D1 Nm tunefs Fl j Cm enable Ar fs .Pp This flag automatically enables the soft updates feature when it is not enabled. Note that this .Xr tunefs 8 command will fail if a file .Pa .sujournal already exists before enabling the soft updates journaling. .El .Ss File Ownership Inheritance .Bl -tag -width 2n .It Cd "options SUIDDIR" For use in file sharing environments on networks including .Tn "Microsoft Windows" and .Tn "Apple Macintosh" computers, this option allows files on file systems mounted with the .Cm suiddir option to inherit the ownership of its directory, i.e., .Dq "if it's my directory, it must be my file." .El .Ss Access Control Lists .Bl -tag -width 2n .It Cd "options UFS_ACL" Access control lists allow the association of fine-grained discretionary access control information with files and directories. This option requires the presence of the .Dv UFS_EXTATTR option, and it is recommended that .Dv UFS_EXTATTR_AUTOSTART is included as well, so that ACLs are enabled atomically upon mounting the file system. .El .Pp In order to enable support for ACLs, two extended attributes must be available in the .Dv EXTATTR_NAMESPACE_SYSTEM namespace: .Pa posix1e.acl_access , which holds the access ACL, and .Pa posix1e.acl_default , which holds the default ACL for directories. If you are using file system extended attributes, the following commands may be used to allocate space for and create the necessary EA backing files for ACLs in the root of each file system. In these examples, the root file system is used; see .Sx "Extended Attributes" for more details. .Bd -literal -offset indent mkdir -p /.attribute/system cd /.attribute/system extattrctl initattr -p / 388 posix1e.acl_access extattrctl initattr -p / 388 posix1e.acl_default .Ed .Pp On the next mount of the root file system, the attributes will be automatically started if .Dv UFS_EXTATTR_AUTOSTART is included in the kernel configuration, and ACLs will be enabled. .Ss Directory Hashing .Bl -tag -width 2n .It Cd "options UFS_DIRHASH" Implements a hash-based lookup scheme for directories in order to speed up accesses to very large directories. .El .Ss Extended Attributes .Bl -tag -width 2n .It Cd "options UFS_EXTATTR" Extended attributes allow the association of additional arbitrary metadata with files and directories, which can be assigned and retrieved from userland as well as from within the kernel; see .Xr extattrctl 8 . .It Cd "options UFS_EXTATTR_AUTOSTART" If this option is defined, .Nm will search for a .Pa .attribute subdirectory of the file system root during the mount operation. If found, extended attribute support will be automatically started for that file system. .El .Ss GEOM-based Journaling .Bl -tag -width 2n .It Cd "options UFS_GJOURNAL" Implements a block level journaling of a UFS file system, which is for both data and metadata. To enable this, create a .Xr gjournal 8 GEOM provider for a block device by using the following command: .Pp .D1 Nm gjournal label Ar da0 .Pp In this example, .Pa /dev/da0 is used as the target block device, and .Pa /dev/da0.journal is created. Then create a new file system by using .Xr newfs 8 with the block level journaling flag and mount it: .Pp .D1 Nm newfs Fl J Ar /dev/da0.journal .D1 Nm mount Fl o Cm async Ar /dev/da0.journal Ar /mnt .Pp .Cm async option is not mandatory but recommended for better performance because the journaling guarantees the consistency of an .Cm async mount. .Pp It is also possible to enable the block level journaling on an existing file system. To do so, use .Xr gjournal 8 utility to label the underlying block device and .Xr tunefs 8 utility to enable the block level journaling flag: .Pp .D1 Nm gjournal label Ar da0 .D1 Nm tunefs Fl J Cm enable Ar /dev/da0.journal .D1 Nm mount Fl o Cm async Ar /dev/da0.journal Ar /mnt .El .Ss Xr sysctl 8 MIBs The following .Xr sysctl 8 MIBs are defined for use with .Nm : .Bl -hang -width ".Va vfs.ffs.doreallocblk" .It Va vfs.ffs.doasyncfree Asynchronously write out modified i-node and indirect blocks upon reallocating file system blocks to be contiguous. .Pq Default: 1 . .It Va vfs.ffs.doreallocblks Enable support for the rearrangement of blocks to be contiguous. .Pq Default: 1 . +.It Va vfs.ffs.prttimechgs +Print a console message when timestamps for UFS1 filesystems are found +to be in the future and are changed to be the present time. +.Pq Default: 0 . .El .Sh HISTORY The .Nm manual page first appeared in .Fx 4.5 . .Sh SEE ALSO .Xr quota 1 , .Xr acl 3 , .Xr extattr 3 , .Xr edquota 8 , .Xr extattrctl 8 , .Xr fsck_ffs 8 , .Xr sysctl 8 , .Xr tunefs 8 .Rs .%A M. McKusick .%A W. Joy .%A S. Leffler .%A R. Fabry .%D August 1984 .%T "A Fast File System for UNIX" .%J "ACM Transactions on Computer Systems" .%N 2 .%V 3 .%P 181-197 .Re .Rs .%A M. McKusick .%D June 2000 .%T "Soft Updates: A Technique for Eliminating Most Synchronous Writes in the Fast Filesystem" .%J "Proceedings of the Freenix Track at the 1999 Usenix Annual Technical Conference" .%P 71-84 .Re .Rs .%A M. McKusick .%A J. Roberson .%D May 2010 .%T "Journaled Soft-updates" .%J "BSD Canada Conference 2010 (BSDCan)" .Re diff --git a/sys/ufs/ffs/ffs_extern.h b/sys/ufs/ffs/ffs_extern.h index 119de616003d..0dc52ee48b1e 100644 --- a/sys/ufs/ffs/ffs_extern.h +++ b/sys/ufs/ffs/ffs_extern.h @@ -1,243 +1,244 @@ /*- * SPDX-License-Identifier: BSD-3-Clause * * Copyright (c) 1991, 1993, 1994 * The Regents of the University of California. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #ifndef _UFS_FFS_EXTERN_H #define _UFS_FFS_EXTERN_H #ifndef _KERNEL #error "No user-serving parts inside" #else struct buf; struct cg; struct fid; struct fs; struct inode; struct malloc_type; struct mount; struct thread; struct sockaddr; struct statfs; struct ucred; struct vnode; struct vop_fsync_args; struct vop_reallocblks_args; struct workhead; int ffs_alloc(struct inode *, ufs2_daddr_t, ufs2_daddr_t, int, int, struct ucred *, ufs2_daddr_t *); int ffs_balloc_ufs1(struct vnode *a_vp, off_t a_startoffset, int a_size, struct ucred *a_cred, int a_flags, struct buf **a_bpp); int ffs_balloc_ufs2(struct vnode *a_vp, off_t a_startoffset, int a_size, struct ucred *a_cred, int a_flags, struct buf **a_bpp); void ffs_blkfree(struct ufsmount *, struct fs *, struct vnode *, ufs2_daddr_t, long, ino_t, __enum_uint8(vtype), struct workhead *, uint64_t); ufs2_daddr_t ffs_blkpref_ufs1(struct inode *, ufs_lbn_t, int, ufs1_daddr_t *); ufs2_daddr_t ffs_blkpref_ufs2(struct inode *, ufs_lbn_t, int, ufs2_daddr_t *); void ffs_blkrelease_finish(struct ufsmount *, uint64_t); uint64_t ffs_blkrelease_start(struct ufsmount *, struct vnode *, ino_t); uint32_t ffs_calc_sbhash(struct fs *); int ffs_checkfreefile(struct fs *, struct vnode *, ino_t); void ffs_clrblock(struct fs *, uint8_t *, ufs1_daddr_t); void ffs_clusteracct(struct fs *, struct cg *, ufs1_daddr_t, int); void ffs_bdflush(struct bufobj *, struct buf *); int ffs_copyonwrite(struct vnode *, struct buf *); int ffs_flushfiles(struct mount *, int, struct thread *); void ffs_fragacct(struct fs *, int, int32_t [], int); int ffs_freefile(struct ufsmount *, struct fs *, struct vnode *, ino_t, int, struct workhead *); void ffs_fserr(struct fs *, ino_t, char *); int ffs_getcg(struct fs *, struct vnode *, uint64_t, int, struct buf **, struct cg **); int ffs_inotovp(struct mount *, ino_t, uint64_t, int, struct vnode **, int); int ffs_isblock(struct fs *, uint8_t *, ufs1_daddr_t); int ffs_isfreeblock(struct fs *, uint8_t *, ufs1_daddr_t); void ffs_oldfscompat_write(struct fs *); +bool ffs_oldfscompat_inode_read(struct fs *, union dinodep, time_t); int ffs_own_mount(const struct mount *mp); int ffs_sbsearch(void *, struct fs **, int, struct malloc_type *, int (*)(void *, off_t, void **, int)); int ffs_reallocblks(struct vop_reallocblks_args *); int ffs_realloccg(struct inode *, ufs2_daddr_t, ufs2_daddr_t, ufs2_daddr_t, int, int, int, struct ucred *, struct buf **); int ffs_reload(struct mount *, int); int ffs_sbget(void *, struct fs **, off_t, int, struct malloc_type *, int (*)(void *, off_t, void **, int)); int ffs_sbput(void *, struct fs *, off_t, int (*)(void *, off_t, void *, int)); int ffs_sbupdate(struct ufsmount *, int, int); void ffs_setblock(struct fs *, uint8_t *, ufs1_daddr_t); int ffs_snapblkfree(struct fs *, struct vnode *, ufs2_daddr_t, long, ino_t, __enum_uint8(vtype), struct workhead *); void ffs_snapremove(struct vnode *vp); int ffs_snapshot(struct mount *mp, char *snapfile); void ffs_snapshot_mount(struct mount *mp); void ffs_snapshot_unmount(struct mount *mp); void ffs_susp_initialize(void); void ffs_susp_uninitialize(void); void ffs_sync_snap(struct mount *, int); int ffs_syncvnode(struct vnode *vp, int waitfor, int flags); int ffs_truncate(struct vnode *, off_t, int, struct ucred *); int ffs_update(struct vnode *, int); void ffs_update_dinode_ckhash(struct fs *, struct ufs2_dinode *); int ffs_verify_dinode_ckhash(struct fs *, struct ufs2_dinode *); int ffs_valloc(struct vnode *, int, struct ucred *, struct vnode **); int ffs_vfree(struct vnode *, ino_t, int); vfs_vget_t ffs_vget; int ffs_vgetf(struct mount *, ino_t, int, struct vnode **, int); void process_deferred_inactive(struct mount *mp); int ffs_fsfail_cleanup(struct ufsmount *, int); int ffs_fsfail_cleanup_locked(struct ufsmount *, int); int ffs_breadz(struct ufsmount *, struct vnode *, daddr_t, daddr_t, int, daddr_t *, int *, int, struct ucred *, int, void (*)(struct buf *), struct buf **); /* * Flags to ffs_vgetf */ #define FFSV_FORCEINSMQ 0x0001 /* Force insertion into mount list */ #define FFSV_REPLACE 0x0002 /* Replace existing vnode */ #define FFSV_REPLACE_DOOMED 0x0004 /* Replace existing vnode if it is doomed */ #define FFSV_FORCEINODEDEP 0x0008 /* Force allocation of inodedep, ignore MNT_SOFTDEP */ #define FFSV_NEWINODE 0x0010 /* Newly allocated inode */ /* * Flags to ffs_reload */ #define FFSR_FORCE 0x0001 #define FFSR_UNSUSPEND 0x0002 /* * Definitions for TRIM interface * * Special keys and recommended hash table size */ #define NOTRIM_KEY 1 /* never written, so don't call trim for it */ #define SINGLETON_KEY 2 /* only block being freed, so trim it now */ #define FIRST_VALID_KEY 3 /* first valid key describing a block range */ #define MAXTRIMIO 1024 /* maximum expected outstanding trim requests */ extern struct vop_vector ffs_vnodeops1; extern struct vop_vector ffs_fifoops1; extern struct vop_vector ffs_vnodeops2; extern struct vop_vector ffs_fifoops2; /* * Soft update function prototypes. */ int softdep_check_suspend(struct mount *, struct vnode *, int, int, int, int); void softdep_get_depcounts(struct mount *, int *, int *); void softdep_initialize(void); void softdep_uninitialize(void); int softdep_mount(struct vnode *, struct mount *, struct fs *, struct ucred *); void softdep_unmount(struct mount *); void softdep_handle_error(struct buf *); int softdep_move_dependencies(struct buf *, struct buf *); int softdep_flushworklist(struct mount *, int *, struct thread *); int softdep_flushfiles(struct mount *, int, struct thread *); void softdep_update_inodeblock(struct inode *, struct buf *, int); void softdep_load_inodeblock(struct inode *); void softdep_freefile(struct vnode *, ino_t, int); int softdep_request_cleanup(struct fs *, struct vnode *, struct ucred *, int); int softdep_prerename(struct vnode *, struct vnode *, struct vnode *, struct vnode *); int softdep_prelink(struct vnode *, struct vnode *, struct componentname *); void softdep_setup_freeblocks(struct inode *, off_t, int); void softdep_setup_inomapdep(struct buf *, struct inode *, ino_t, int); void softdep_setup_blkmapdep(struct buf *, struct mount *, ufs2_daddr_t, int, int); void softdep_setup_allocdirect(struct inode *, ufs_lbn_t, ufs2_daddr_t, ufs2_daddr_t, long, long, struct buf *); void softdep_setup_allocext(struct inode *, ufs_lbn_t, ufs2_daddr_t, ufs2_daddr_t, long, long, struct buf *); void softdep_setup_allocindir_meta(struct buf *, struct inode *, struct buf *, int, ufs2_daddr_t); void softdep_setup_allocindir_page(struct inode *, ufs_lbn_t, struct buf *, int, ufs2_daddr_t, ufs2_daddr_t, struct buf *); void softdep_setup_blkfree(struct mount *, struct buf *, ufs2_daddr_t, int, struct workhead *, bool); void softdep_setup_inofree(struct mount *, struct buf *, ino_t, struct workhead *, bool); void softdep_setup_sbupdate(struct ufsmount *, struct fs *, struct buf *); void softdep_fsync_mountdev(struct vnode *); int softdep_sync_metadata(struct vnode *); int softdep_sync_buf(struct vnode *, struct buf *, int); int softdep_fsync(struct vnode *); int softdep_prealloc(struct vnode *, int); int softdep_journal_lookup(struct mount *, struct vnode **); void softdep_journal_freeblocks(struct inode *, struct ucred *, off_t, int); void softdep_journal_fsync(struct inode *); void softdep_buf_append(struct buf *, struct workhead *); void softdep_inode_append(struct inode *, struct ucred *, struct workhead *); void softdep_freework(struct workhead *); /* * Things to request flushing in softdep_request_cleanup() */ #define FLUSH_INODES 1 #define FLUSH_INODES_WAIT 2 #define FLUSH_BLOCKS 3 #define FLUSH_BLOCKS_WAIT 4 /* * Flag to ffs_syncvnode() to request flushing of data only, * but skip the ffs_update() on the inode itself. Used to avoid * deadlock when flushing snapshot inodes while holding snaplk. */ #define NO_INO_UPDT 0x00000001 /* * Request data sync only from ffs_syncvnode(), not touching even more * metadata than NO_INO_UPDT. */ #define DATA_ONLY 0x00000002 int ffs_rdonly(struct inode *); TAILQ_HEAD(snaphead, inode); struct snapdata { LIST_ENTRY(snapdata) sn_link; struct snaphead sn_head; daddr_t sn_listsize; daddr_t *sn_blklist; struct lock sn_lock; }; #endif /* _KERNEL */ #endif /* !_UFS_FFS_EXTERN_H */ diff --git a/sys/ufs/ffs/ffs_subr.c b/sys/ufs/ffs/ffs_subr.c index 7c2971d885ea..e2b09da86ae5 100644 --- a/sys/ufs/ffs/ffs_subr.c +++ b/sys/ufs/ffs/ffs_subr.c @@ -1,1181 +1,1240 @@ /*- * SPDX-License-Identifier: BSD-3-Clause * * Copyright (c) 1982, 1986, 1989, 1993 * The Regents of the University of California. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #include #include #include #ifndef _KERNEL +#include #include #include #include #include #include #include #include uint32_t calculate_crc32c(uint32_t, const void *, size_t); uint32_t ffs_calc_sbhash(struct fs *); struct malloc_type; #define UFS_MALLOC(size, type, flags) malloc(size) #define UFS_FREE(ptr, type) free(ptr) #define maxphys MAXPHYS #else /* _KERNEL */ #include #include #include #include #include #include #include #include #include +#include #include #include #include #include #include #include #include #define UFS_MALLOC(size, type, flags) malloc(size, type, flags) #define UFS_FREE(ptr, type) free(ptr, type) #endif /* _KERNEL */ /* * Verify an inode check-hash. */ int ffs_verify_dinode_ckhash(struct fs *fs, struct ufs2_dinode *dip) { uint32_t ckhash, save_ckhash; /* * Return success if unallocated or we are not doing inode check-hash. */ if (dip->di_mode == 0 || (fs->fs_metackhash & CK_INODE) == 0) return (0); /* * Exclude di_ckhash from the crc32 calculation, e.g., always use * a check-hash value of zero when calculating the check-hash. */ save_ckhash = dip->di_ckhash; dip->di_ckhash = 0; ckhash = calculate_crc32c(~0L, (void *)dip, sizeof(*dip)); dip->di_ckhash = save_ckhash; if (save_ckhash == ckhash) return (0); return (EINVAL); } /* * Update an inode check-hash. */ void ffs_update_dinode_ckhash(struct fs *fs, struct ufs2_dinode *dip) { if (dip->di_mode == 0 || (fs->fs_metackhash & CK_INODE) == 0) return; /* * Exclude old di_ckhash from the crc32 calculation, e.g., always use * a check-hash value of zero when calculating the new check-hash. */ dip->di_ckhash = 0; dip->di_ckhash = calculate_crc32c(~0L, (void *)dip, sizeof(*dip)); } /* * These are the low-level functions that actually read and write * the superblock and its associated data. */ static off_t sblock_try[] = SBLOCKSEARCH; static int readsuper(void *, struct fs **, off_t, int, int (*)(void *, off_t, void **, int)); static void ffs_oldfscompat_read(struct fs *, ufs2_daddr_t); static int validate_sblock(struct fs *, int); /* * Read a superblock from the devfd device. * * If an alternate superblock is specified, it is read. Otherwise the * set of locations given in the SBLOCKSEARCH list is searched for a * superblock. Memory is allocated for the superblock by the readfunc and * is returned. If filltype is non-NULL, additional memory is allocated * of type filltype and filled in with the superblock summary information. * All memory is freed when any error is returned. * * If a superblock is found, zero is returned. Otherwise one of the * following error values is returned: * EIO: non-existent or truncated superblock. * EIO: error reading summary information. * ENOENT: no usable known superblock found. * EILSEQ: filesystem with wrong byte order found. * ENOMEM: failed to allocate space for the superblock. * EINVAL: The previous newfs operation on this volume did not complete. * The administrator must complete newfs before using this volume. */ int ffs_sbget(void *devfd, struct fs **fsp, off_t sblock, int flags, struct malloc_type *filltype, int (*readfunc)(void *devfd, off_t loc, void **bufp, int size)) { struct fs *fs; struct fs_summary_info *fs_si; int i, error; uint64_t size, blks; uint8_t *space; int32_t *lp; char *buf; fs = NULL; *fsp = NULL; if (sblock != UFS_STDSB) { if ((error = readsuper(devfd, &fs, sblock, flags | UFS_ALTSBLK, readfunc)) != 0) { if (fs != NULL) UFS_FREE(fs, filltype); return (error); } } else { for (i = 0; sblock_try[i] != -1; i++) { if ((error = readsuper(devfd, &fs, sblock_try[i], flags, readfunc)) == 0) { if ((flags & UFS_NOCSUM) != 0) { *fsp = fs; return (0); } break; } if (fs != NULL) { UFS_FREE(fs, filltype); fs = NULL; } if (error == ENOENT) continue; return (error); } if (sblock_try[i] == -1) return (ENOENT); } /* * Read in the superblock summary information. */ size = fs->fs_cssize; blks = howmany(size, fs->fs_fsize); if (fs->fs_contigsumsize > 0) size += fs->fs_ncg * sizeof(int32_t); size += fs->fs_ncg * sizeof(uint8_t); if ((fs_si = UFS_MALLOC(sizeof(*fs_si), filltype, M_NOWAIT)) == NULL) { UFS_FREE(fs, filltype); return (ENOMEM); } bzero(fs_si, sizeof(*fs_si)); fs->fs_si = fs_si; if ((space = UFS_MALLOC(size, filltype, M_NOWAIT)) == NULL) { UFS_FREE(fs->fs_si, filltype); UFS_FREE(fs, filltype); return (ENOMEM); } fs->fs_csp = (struct csum *)space; for (i = 0; i < blks; i += fs->fs_frag) { size = fs->fs_bsize; if (i + fs->fs_frag > blks) size = (blks - i) * fs->fs_fsize; buf = NULL; error = (*readfunc)(devfd, dbtob(fsbtodb(fs, fs->fs_csaddr + i)), (void **)&buf, size); if (error) { if (buf != NULL) UFS_FREE(buf, filltype); UFS_FREE(fs->fs_csp, filltype); UFS_FREE(fs->fs_si, filltype); UFS_FREE(fs, filltype); return (error); } memcpy(space, buf, size); UFS_FREE(buf, filltype); space += size; } if (fs->fs_contigsumsize > 0) { fs->fs_maxcluster = lp = (int32_t *)space; for (i = 0; i < fs->fs_ncg; i++) *lp++ = fs->fs_contigsumsize; space = (uint8_t *)lp; } size = fs->fs_ncg * sizeof(uint8_t); fs->fs_contigdirs = (uint8_t *)space; bzero(fs->fs_contigdirs, size); *fsp = fs; return (0); } /* * Try to read a superblock from the location specified by sblockloc. * Return zero on success or an errno on failure. */ static int readsuper(void *devfd, struct fs **fsp, off_t sblockloc, int flags, int (*readfunc)(void *devfd, off_t loc, void **bufp, int size)) { struct fs *fs; int error, res; uint32_t ckhash; error = (*readfunc)(devfd, sblockloc, (void **)fsp, SBLOCKSIZE); if (error != 0) return (error); fs = *fsp; if (fs->fs_magic == FS_BAD_MAGIC) return (EINVAL); /* * For UFS1 with a 65536 block size, the first backup superblock * is at the same location as the UFS2 superblock. Since SBLOCK_UFS2 * is the first location checked, the first backup is the superblock * that will be accessed. Here we fail the lookup so that we can * retry with the correct location for the UFS1 superblock. */ if (fs->fs_magic == FS_UFS1_MAGIC && (flags & UFS_ALTSBLK) == 0 && fs->fs_bsize == SBLOCK_UFS2 && sblockloc == SBLOCK_UFS2) return (ENOENT); ffs_oldfscompat_read(fs, sblockloc); if ((error = validate_sblock(fs, flags)) > 0) return (error); /* * If the filesystem has been run on a kernel without * metadata check hashes, disable them. */ if ((fs->fs_flags & FS_METACKHASH) == 0) fs->fs_metackhash = 0; /* * Clear any check-hashes that are not maintained * by this kernel. Also clear any unsupported flags. */ fs->fs_metackhash &= CK_SUPPORTED; fs->fs_flags &= FS_SUPPORTED; if (fs->fs_ckhash != (ckhash = ffs_calc_sbhash(fs))) { if ((flags & (UFS_NOMSG | UFS_NOHASHFAIL)) == (UFS_NOMSG | UFS_NOHASHFAIL)) return (0); if ((flags & UFS_NOMSG) != 0) return (EINTEGRITY); #ifdef _KERNEL res = uprintf("Superblock check-hash failed: recorded " "check-hash 0x%x != computed check-hash 0x%x%s\n", fs->fs_ckhash, ckhash, (flags & UFS_NOHASHFAIL) != 0 ? " (Ignored)" : ""); #else res = 0; #endif /* * Print check-hash failure if no controlling terminal * in kernel or always if in user-mode (libufs). */ if (res == 0) printf("Superblock check-hash failed: recorded " "check-hash 0x%x != computed check-hash " "0x%x%s\n", fs->fs_ckhash, ckhash, (flags & UFS_NOHASHFAIL) ? " (Ignored)" : ""); if ((flags & UFS_NOHASHFAIL) != 0) return (0); return (EINTEGRITY); } /* Have to set for old filesystems that predate this field */ fs->fs_sblockactualloc = sblockloc; /* Not yet any summary information */ fs->fs_si = NULL; return (0); } /* * Sanity checks for loading old filesystem superblocks. * See ffs_oldfscompat_write below for unwound actions. * * XXX - Parts get retired eventually. * Unfortunately new bits get added. */ static void ffs_oldfscompat_read(struct fs *fs, ufs2_daddr_t sblockloc) { uint64_t maxfilesize; /* * If not yet done, update fs_flags location and value of fs_sblockloc. */ if ((fs->fs_old_flags & FS_FLAGS_UPDATED) == 0) { fs->fs_flags = fs->fs_old_flags; fs->fs_old_flags |= FS_FLAGS_UPDATED; fs->fs_sblockloc = sblockloc; } /* * If not yet done, update UFS1 superblock with new wider fields. */ if (fs->fs_magic == FS_UFS1_MAGIC && fs->fs_maxbsize != fs->fs_bsize) { fs->fs_maxbsize = fs->fs_bsize; fs->fs_time = fs->fs_old_time; fs->fs_size = fs->fs_old_size; fs->fs_dsize = fs->fs_old_dsize; fs->fs_csaddr = fs->fs_old_csaddr; fs->fs_cstotal.cs_ndir = fs->fs_old_cstotal.cs_ndir; fs->fs_cstotal.cs_nbfree = fs->fs_old_cstotal.cs_nbfree; fs->fs_cstotal.cs_nifree = fs->fs_old_cstotal.cs_nifree; fs->fs_cstotal.cs_nffree = fs->fs_old_cstotal.cs_nffree; } if (fs->fs_magic == FS_UFS1_MAGIC && fs->fs_old_inodefmt < FS_44INODEFMT) { fs->fs_maxfilesize = ((uint64_t)1 << 31) - 1; fs->fs_qbmask = ~fs->fs_bmask; fs->fs_qfmask = ~fs->fs_fmask; } if (fs->fs_magic == FS_UFS1_MAGIC) { fs->fs_save_maxfilesize = fs->fs_maxfilesize; maxfilesize = (uint64_t)0x80000000 * fs->fs_bsize - 1; if (fs->fs_maxfilesize > maxfilesize) fs->fs_maxfilesize = maxfilesize; } /* Compatibility for old filesystems */ if (fs->fs_avgfilesize <= 0) fs->fs_avgfilesize = AVFILESIZ; if (fs->fs_avgfpdir <= 0) fs->fs_avgfpdir = AFPDIR; } /* * Unwinding superblock updates for old filesystems. * See ffs_oldfscompat_read above for details. * * XXX - Parts get retired eventually. * Unfortunately new bits get added. */ void ffs_oldfscompat_write(struct fs *fs) { /* * Copy back UFS2 updated fields that UFS1 inspects. */ if (fs->fs_magic == FS_UFS1_MAGIC) { fs->fs_old_time = fs->fs_time; fs->fs_old_cstotal.cs_ndir = fs->fs_cstotal.cs_ndir; fs->fs_old_cstotal.cs_nbfree = fs->fs_cstotal.cs_nbfree; fs->fs_old_cstotal.cs_nifree = fs->fs_cstotal.cs_nifree; fs->fs_old_cstotal.cs_nffree = fs->fs_cstotal.cs_nffree; fs->fs_maxfilesize = fs->fs_save_maxfilesize; } } +/* + * Sanity checks for loading old filesystem inodes. + * + * XXX - Parts get retired eventually. + * Unfortunately new bits get added. + */ +static int prttimechgs = 0; +#ifdef _KERNEL +SYSCTL_DECL(_vfs_ffs); +SYSCTL_INT(_vfs_ffs, OID_AUTO, prttimechgs, CTLFLAG_RWTUN, &prttimechgs, 0, + "print UFS1 time changes made to inodes"); +#endif /* _KERNEL */ +bool +ffs_oldfscompat_inode_read(struct fs *fs, union dinodep dp, time_t now) +{ + bool change; + + change = false; + switch (fs->fs_magic) { + case FS_UFS2_MAGIC: + /* No changes for now */ + break; + + case FS_UFS1_MAGIC: + /* + * With the change to unsigned time values in UFS1, times set + * before Jan 1, 1970 will appear to be in the future. Check + * for future times and set them to be the current time. + */ + if (dp.dp1->di_ctime > now) { + if (prttimechgs) + printf("ctime %ud changed to %ld\n", + dp.dp1->di_ctime, (long)now); + dp.dp1->di_ctime = now; + change = true; + } + if (dp.dp1->di_mtime > now) { + if (prttimechgs) + printf("mtime %ud changed to %ld\n", + dp.dp1->di_mtime, (long)now); + dp.dp1->di_mtime = now; + dp.dp1->di_ctime = now; + change = true; + } + if (dp.dp1->di_atime > now) { + if (prttimechgs) + printf("atime %ud changed to %ld\n", + dp.dp1->di_atime, (long)now); + dp.dp1->di_atime = now; + dp.dp1->di_ctime = now; + change = true; + } + break; + } + return (change); +} + /* * Verify the filesystem values. */ #define ILOG2(num) (fls(num) - 1) #ifdef STANDALONE_SMALL #define MPRINT(...) do { } while (0) #else #define MPRINT(...) if (prtmsg) printf(__VA_ARGS__) #endif #define FCHK(lhs, op, rhs, fmt) \ if (lhs op rhs) { \ MPRINT("UFS%d superblock failed: %s (" #fmt ") %s %s (" \ #fmt ")\n", fs->fs_magic == FS_UFS1_MAGIC ? 1 : 2, \ #lhs, (intmax_t)lhs, #op, #rhs, (intmax_t)rhs); \ if (error < 0) \ return (ENOENT); \ if (error == 0) \ error = ENOENT; \ } #define WCHK(lhs, op, rhs, fmt) \ if (lhs op rhs) { \ MPRINT("UFS%d superblock failed: %s (" #fmt ") %s %s (" \ #fmt ")%s\n", fs->fs_magic == FS_UFS1_MAGIC ? 1 : 2,\ #lhs, (intmax_t)lhs, #op, #rhs, (intmax_t)rhs, wmsg);\ if (error == 0) \ error = warnerr; \ if (warnerr == 0) \ lhs = rhs; \ } #define FCHK2(lhs1, op1, rhs1, lhs2, op2, rhs2, fmt) \ if (lhs1 op1 rhs1 && lhs2 op2 rhs2) { \ MPRINT("UFS%d superblock failed: %s (" #fmt ") %s %s (" \ #fmt ") && %s (" #fmt ") %s %s (" #fmt ")\n", \ fs->fs_magic == FS_UFS1_MAGIC ? 1 : 2, #lhs1, \ (intmax_t)lhs1, #op1, #rhs1, (intmax_t)rhs1, #lhs2, \ (intmax_t)lhs2, #op2, #rhs2, (intmax_t)rhs2); \ if (error < 0) \ return (ENOENT); \ if (error == 0) \ error = ENOENT; \ } static int validate_sblock(struct fs *fs, int flags) { uint64_t i, sectorsize; uint64_t maxfilesize, sizepb; int error, prtmsg, warnerr; char *wmsg; error = 0; sectorsize = dbtob(1); prtmsg = ((flags & UFS_NOMSG) == 0); warnerr = (flags & UFS_NOWARNFAIL) == UFS_NOWARNFAIL ? 0 : ENOENT; wmsg = warnerr ? "" : " (Ignored)"; /* * Check for endian mismatch between machine and filesystem. */ if (((fs->fs_magic != FS_UFS2_MAGIC) && (bswap32(fs->fs_magic) == FS_UFS2_MAGIC)) || ((fs->fs_magic != FS_UFS1_MAGIC) && (bswap32(fs->fs_magic) == FS_UFS1_MAGIC))) { MPRINT("UFS superblock failed due to endian mismatch " "between machine and filesystem\n"); return(EILSEQ); } /* * If just validating for recovery, then do just the minimal * checks needed for the superblock fields needed to find * alternate superblocks. */ if ((flags & UFS_FSRONLY) == UFS_FSRONLY && (fs->fs_magic == FS_UFS1_MAGIC || fs->fs_magic == FS_UFS2_MAGIC)) { error = -1; /* fail on first error */ if (fs->fs_magic == FS_UFS2_MAGIC) { FCHK(fs->fs_sblockloc, !=, SBLOCK_UFS2, %#jx); } else if (fs->fs_magic == FS_UFS1_MAGIC) { FCHK(fs->fs_sblockloc, <, 0, %jd); FCHK(fs->fs_sblockloc, >, SBLOCK_UFS1, %jd); } FCHK(fs->fs_frag, <, 1, %jd); FCHK(fs->fs_frag, >, MAXFRAG, %jd); FCHK(fs->fs_bsize, <, MINBSIZE, %jd); FCHK(fs->fs_bsize, >, MAXBSIZE, %jd); FCHK(fs->fs_bsize, <, roundup(sizeof(struct fs), DEV_BSIZE), %jd); FCHK(fs->fs_fsize, <, sectorsize, %jd); FCHK(fs->fs_fsize * fs->fs_frag, !=, fs->fs_bsize, %jd); FCHK(powerof2(fs->fs_fsize), ==, 0, %jd); FCHK(fs->fs_sbsize, >, SBLOCKSIZE, %jd); FCHK(fs->fs_sbsize, <, (signed)sizeof(struct fs), %jd); FCHK(fs->fs_sbsize % sectorsize, !=, 0, %jd); FCHK(fs->fs_fpg, <, 3 * fs->fs_frag, %jd); FCHK(fs->fs_ncg, <, 1, %jd); FCHK(fs->fs_fsbtodb, !=, ILOG2(fs->fs_fsize / sectorsize), %jd); FCHK(fs->fs_old_cgoffset, <, 0, %jd); FCHK2(fs->fs_old_cgoffset, >, 0, ~fs->fs_old_cgmask, <, 0, %jd); FCHK(fs->fs_old_cgoffset * (~fs->fs_old_cgmask), >, fs->fs_fpg, %jd); FCHK(fs->fs_sblkno, !=, roundup( howmany(fs->fs_sblockloc + SBLOCKSIZE, fs->fs_fsize), fs->fs_frag), %jd); FCHK(CGSIZE(fs), >, fs->fs_bsize, %jd); /* Only need to validate these if reading in csum data */ if ((flags & UFS_NOCSUM) != 0) return (error); FCHK((uint64_t)fs->fs_ipg * fs->fs_ncg, >, (((int64_t)(1)) << 32) - INOPB(fs), %jd); FCHK(fs->fs_cstotal.cs_nifree, <, 0, %jd); FCHK(fs->fs_cstotal.cs_nifree, >, (uint64_t)fs->fs_ipg * fs->fs_ncg, %jd); FCHK(fs->fs_cstotal.cs_ndir, >, ((uint64_t)fs->fs_ipg * fs->fs_ncg) - fs->fs_cstotal.cs_nifree, %jd); FCHK(fs->fs_size, <, 8 * fs->fs_frag, %jd); FCHK(fs->fs_size, <=, ((int64_t)fs->fs_ncg - 1) * fs->fs_fpg, %jd); FCHK(fs->fs_size, >, (int64_t)fs->fs_ncg * fs->fs_fpg, %jd); FCHK(fs->fs_csaddr, <, 0, %jd); FCHK(fs->fs_cssize, !=, fragroundup(fs, fs->fs_ncg * sizeof(struct csum)), %jd); FCHK(fs->fs_csaddr + howmany(fs->fs_cssize, fs->fs_fsize), >, fs->fs_size, %jd); FCHK(fs->fs_csaddr, <, cgdmin(fs, dtog(fs, fs->fs_csaddr)), %jd); FCHK(dtog(fs, fs->fs_csaddr + howmany(fs->fs_cssize, fs->fs_fsize)), >, dtog(fs, fs->fs_csaddr), %jd); return (error); } if (fs->fs_magic == FS_UFS2_MAGIC) { if ((flags & UFS_ALTSBLK) == 0) FCHK2(fs->fs_sblockactualloc, !=, SBLOCK_UFS2, fs->fs_sblockactualloc, !=, 0, %jd); FCHK(fs->fs_sblockloc, !=, SBLOCK_UFS2, %#jx); FCHK(fs->fs_maxsymlinklen, !=, ((UFS_NDADDR + UFS_NIADDR) * sizeof(ufs2_daddr_t)), %jd); FCHK(fs->fs_nindir, !=, fs->fs_bsize / sizeof(ufs2_daddr_t), %jd); FCHK(fs->fs_inopb, !=, fs->fs_bsize / sizeof(struct ufs2_dinode), %jd); } else if (fs->fs_magic == FS_UFS1_MAGIC) { if ((flags & UFS_ALTSBLK) == 0) FCHK(fs->fs_sblockactualloc, >, SBLOCK_UFS1, %jd); FCHK(fs->fs_sblockloc, <, 0, %jd); FCHK(fs->fs_sblockloc, >, SBLOCK_UFS1, %jd); FCHK(fs->fs_nindir, !=, fs->fs_bsize / sizeof(ufs1_daddr_t), %jd); FCHK(fs->fs_inopb, !=, fs->fs_bsize / sizeof(struct ufs1_dinode), %jd); FCHK(fs->fs_maxsymlinklen, !=, ((UFS_NDADDR + UFS_NIADDR) * sizeof(ufs1_daddr_t)), %jd); WCHK(fs->fs_old_inodefmt, !=, FS_44INODEFMT, %jd); WCHK(fs->fs_old_rotdelay, !=, 0, %jd); WCHK(fs->fs_old_rps, !=, 60, %jd); WCHK(fs->fs_old_nspf, !=, fs->fs_fsize / sectorsize, %jd); WCHK(fs->fs_old_interleave, !=, 1, %jd); WCHK(fs->fs_old_trackskew, !=, 0, %jd); WCHK(fs->fs_old_cpc, !=, 0, %jd); WCHK(fs->fs_old_postblformat, !=, 1, %jd); FCHK(fs->fs_old_nrpos, !=, 1, %jd); WCHK(fs->fs_old_nsect, !=, fs->fs_old_spc, %jd); WCHK(fs->fs_old_npsect, !=, fs->fs_old_spc, %jd); } else { /* Bad magic number, so assume not a superblock */ return (ENOENT); } FCHK(fs->fs_bsize, <, MINBSIZE, %jd); FCHK(fs->fs_bsize, >, MAXBSIZE, %jd); FCHK(fs->fs_bsize, <, roundup(sizeof(struct fs), DEV_BSIZE), %jd); FCHK(powerof2(fs->fs_bsize), ==, 0, %jd); FCHK(fs->fs_frag, <, 1, %jd); FCHK(fs->fs_frag, >, MAXFRAG, %jd); FCHK(fs->fs_frag, !=, numfrags(fs, fs->fs_bsize), %jd); FCHK(fs->fs_fsize, <, sectorsize, %jd); FCHK(fs->fs_fsize * fs->fs_frag, !=, fs->fs_bsize, %jd); FCHK(powerof2(fs->fs_fsize), ==, 0, %jd); FCHK(fs->fs_fpg, <, 3 * fs->fs_frag, %jd); FCHK(fs->fs_ncg, <, 1, %jd); FCHK(fs->fs_ipg, <, fs->fs_inopb, %jd); FCHK((uint64_t)fs->fs_ipg * fs->fs_ncg, >, (((int64_t)(1)) << 32) - INOPB(fs), %jd); FCHK(fs->fs_cstotal.cs_nifree, <, 0, %jd); FCHK(fs->fs_cstotal.cs_nifree, >, (uint64_t)fs->fs_ipg * fs->fs_ncg, %jd); FCHK(fs->fs_cstotal.cs_ndir, <, 0, %jd); FCHK(fs->fs_cstotal.cs_ndir, >, ((uint64_t)fs->fs_ipg * fs->fs_ncg) - fs->fs_cstotal.cs_nifree, %jd); FCHK(fs->fs_sbsize, >, SBLOCKSIZE, %jd); FCHK(fs->fs_sbsize, <, (signed)sizeof(struct fs), %jd); /* fix for misconfigured filesystems */ if (fs->fs_maxbsize == 0) fs->fs_maxbsize = fs->fs_bsize; FCHK(fs->fs_maxbsize, <, fs->fs_bsize, %jd); FCHK(powerof2(fs->fs_maxbsize), ==, 0, %jd); FCHK(fs->fs_maxbsize, >, FS_MAXCONTIG * fs->fs_bsize, %jd); FCHK(fs->fs_bmask, !=, ~(fs->fs_bsize - 1), %#jx); FCHK(fs->fs_fmask, !=, ~(fs->fs_fsize - 1), %#jx); FCHK(fs->fs_qbmask, !=, ~fs->fs_bmask, %#jx); FCHK(fs->fs_qfmask, !=, ~fs->fs_fmask, %#jx); FCHK(fs->fs_bshift, !=, ILOG2(fs->fs_bsize), %jd); FCHK(fs->fs_fshift, !=, ILOG2(fs->fs_fsize), %jd); FCHK(fs->fs_fragshift, !=, ILOG2(fs->fs_frag), %jd); FCHK(fs->fs_fsbtodb, !=, ILOG2(fs->fs_fsize / sectorsize), %jd); FCHK(fs->fs_old_cgoffset, <, 0, %jd); FCHK2(fs->fs_old_cgoffset, >, 0, ~fs->fs_old_cgmask, <, 0, %jd); FCHK(fs->fs_old_cgoffset * (~fs->fs_old_cgmask), >, fs->fs_fpg, %jd); FCHK(CGSIZE(fs), >, fs->fs_bsize, %jd); /* * If anything has failed up to this point, it is usafe to proceed * as checks below may divide by zero or make other fatal calculations. * So if we have any errors at this point, give up. */ if (error) return (error); FCHK(fs->fs_sbsize % sectorsize, !=, 0, %jd); FCHK(fs->fs_ipg % fs->fs_inopb, !=, 0, %jd); FCHK(fs->fs_sblkno, !=, roundup( howmany(fs->fs_sblockloc + SBLOCKSIZE, fs->fs_fsize), fs->fs_frag), %jd); FCHK(fs->fs_cblkno, !=, fs->fs_sblkno + roundup(howmany(SBLOCKSIZE, fs->fs_fsize), fs->fs_frag), %jd); FCHK(fs->fs_iblkno, !=, fs->fs_cblkno + fs->fs_frag, %jd); FCHK(fs->fs_dblkno, !=, fs->fs_iblkno + fs->fs_ipg / INOPF(fs), %jd); FCHK(fs->fs_cgsize, >, fs->fs_bsize, %jd); FCHK(fs->fs_cgsize, <, fs->fs_fsize, %jd); FCHK(fs->fs_cgsize % fs->fs_fsize, !=, 0, %jd); /* * This test is valid, however older versions of growfs failed * to correctly update fs_dsize so will fail this test. Thus we * exclude it from the requirements. */ #ifdef notdef WCHK(fs->fs_dsize, !=, fs->fs_size - fs->fs_sblkno - fs->fs_ncg * (fs->fs_dblkno - fs->fs_sblkno) - howmany(fs->fs_cssize, fs->fs_fsize), %jd); #endif WCHK(fs->fs_metaspace, <, 0, %jd); WCHK(fs->fs_metaspace, >, fs->fs_fpg / 2, %jd); WCHK(fs->fs_minfree, >, 99, %jd%%); maxfilesize = fs->fs_bsize * UFS_NDADDR - 1; for (sizepb = fs->fs_bsize, i = 0; i < UFS_NIADDR; i++) { sizepb *= NINDIR(fs); maxfilesize += sizepb; } WCHK(fs->fs_maxfilesize, >, maxfilesize, %jd); /* * These values have a tight interaction with each other that * makes it hard to tightly bound them. So we can only check * that they are within a broader possible range. * * The size cannot always be accurately determined, but ensure * that it is consistent with the number of cylinder groups (fs_ncg) * and the number of fragments per cylinder group (fs_fpg). Ensure * that the summary information size is correct and that it starts * and ends in the data area of the same cylinder group. */ FCHK(fs->fs_size, <, 8 * fs->fs_frag, %jd); FCHK(fs->fs_size, <=, ((int64_t)fs->fs_ncg - 1) * fs->fs_fpg, %jd); FCHK(fs->fs_size, >, (int64_t)fs->fs_ncg * fs->fs_fpg, %jd); /* * If we are not requested to read in the csum data stop here * as the correctness of the remaining values is only important * to bound the space needed to be allocated to hold the csum data. */ if ((flags & UFS_NOCSUM) != 0) return (error); FCHK(fs->fs_csaddr, <, 0, %jd); FCHK(fs->fs_cssize, !=, fragroundup(fs, fs->fs_ncg * sizeof(struct csum)), %jd); FCHK(fs->fs_csaddr + howmany(fs->fs_cssize, fs->fs_fsize), >, fs->fs_size, %jd); FCHK(fs->fs_csaddr, <, cgdmin(fs, dtog(fs, fs->fs_csaddr)), %jd); FCHK(dtog(fs, fs->fs_csaddr + howmany(fs->fs_cssize, fs->fs_fsize)), >, dtog(fs, fs->fs_csaddr), %jd); /* * With file system clustering it is possible to allocate * many contiguous blocks. The kernel variable maxphys defines * the maximum transfer size permitted by the controller and/or * buffering. The fs_maxcontig parameter controls the maximum * number of blocks that the filesystem will read or write * in a single transfer. It is calculated when the filesystem * is created as maxphys / fs_bsize. The loader uses a maxphys * of 128K even when running on a system that supports larger * values. If the filesystem was built on a system that supports * a larger maxphys (1M is typical) it will have configured * fs_maxcontig for that larger system. So we bound the upper * allowable limit for fs_maxconfig to be able to at least * work with a 1M maxphys on the smallest block size filesystem: * 1M / 4096 == 256. There is no harm in allowing the mounting of * filesystems that make larger than maxphys I/O requests because * those (mostly 32-bit machines) can (very slowly) handle I/O * requests that exceed maxphys. */ WCHK(fs->fs_maxcontig, <, 0, %jd); WCHK(fs->fs_maxcontig, >, MAX(256, maxphys / fs->fs_bsize), %jd); FCHK2(fs->fs_maxcontig, ==, 0, fs->fs_contigsumsize, !=, 0, %jd); FCHK2(fs->fs_maxcontig, >, 1, fs->fs_contigsumsize, !=, MIN(fs->fs_maxcontig, FS_MAXCONTIG), %jd); return (error); } /* * Make an extensive search to find a superblock. If the superblock * in the standard place cannot be used, try looking for one of the * backup superblocks. * * Flags are made up of the following or'ed together options: * * UFS_NOMSG indicates that superblock inconsistency error messages * should not be printed. * * UFS_NOCSUM causes only the superblock itself to be returned, but does * not read in any auxillary data structures like the cylinder group * summary information. */ int ffs_sbsearch(void *devfd, struct fs **fsp, int reqflags, struct malloc_type *filltype, int (*readfunc)(void *devfd, off_t loc, void **bufp, int size)) { struct fsrecovery *fsr; struct fs *protofs; void *fsrbuf; char *cp; long nocsum, flags, msg, cg; off_t sblk, secsize; int error; msg = (reqflags & UFS_NOMSG) == 0; nocsum = reqflags & UFS_NOCSUM; /* * Try normal superblock read and return it if it works. * * Suppress messages if it fails until we find out if * failure can be avoided. */ flags = UFS_NOMSG | nocsum; error = ffs_sbget(devfd, fsp, UFS_STDSB, flags, filltype, readfunc); /* * If successful or endian error, no need to try further. */ if (error == 0 || error == EILSEQ) { if (msg && error == EILSEQ) printf("UFS superblock failed due to endian mismatch " "between machine and filesystem\n"); return (error); } /* * First try: ignoring hash failures. */ flags |= UFS_NOHASHFAIL; if (msg) flags &= ~UFS_NOMSG; if (ffs_sbget(devfd, fsp, UFS_STDSB, flags, filltype, readfunc) == 0) return (0); /* * Next up is to check if fields of the superblock that are * needed to find backup superblocks are usable. */ if (msg) printf("Attempted recovery for standard superblock: failed\n"); flags = UFS_FSRONLY | UFS_NOHASHFAIL | UFS_NOCSUM | UFS_NOMSG; if (ffs_sbget(devfd, &protofs, UFS_STDSB, flags, filltype, readfunc) == 0) { if (msg) printf("Attempt extraction of recovery data from " "standard superblock.\n"); } else { /* * Final desperation is to see if alternate superblock * parameters have been saved in the boot area. */ if (msg) printf("Attempted extraction of recovery data from " "standard superblock: failed\nAttempt to find " "boot zone recovery data.\n"); /* * Look to see if recovery information has been saved. * If so we can generate a prototype superblock based * on that information. * * We need fragments-per-group, number of cylinder groups, * location of the superblock within the cylinder group, and * the conversion from filesystem fragments to disk blocks. * * When building a UFS2 filesystem, newfs(8) stores these * details at the end of the boot block area at the start * of the filesystem partition. If they have been overwritten * by a boot block, we fail. But usually they are there * and we can use them. * * We could ask the underlying device for its sector size, * but some devices lie. So we just try a plausible range. */ error = ENOENT; fsrbuf = NULL; for (secsize = dbtob(1); secsize <= SBLOCKSIZE; secsize *= 2) if ((error = (*readfunc)(devfd, (SBLOCK_UFS2 - secsize), &fsrbuf, secsize)) == 0) break; if (error != 0) goto trynowarn; cp = fsrbuf; /* type change to keep compiler happy */ fsr = (struct fsrecovery *)&cp[secsize - sizeof *fsr]; if (fsr->fsr_magic != FS_UFS2_MAGIC || (protofs = UFS_MALLOC(SBLOCKSIZE, filltype, M_NOWAIT)) == NULL) { UFS_FREE(fsrbuf, filltype); goto trynowarn; } memset(protofs, 0, sizeof(struct fs)); protofs->fs_fpg = fsr->fsr_fpg; protofs->fs_fsbtodb = fsr->fsr_fsbtodb; protofs->fs_sblkno = fsr->fsr_sblkno; protofs->fs_magic = fsr->fsr_magic; protofs->fs_ncg = fsr->fsr_ncg; UFS_FREE(fsrbuf, filltype); } /* * Scan looking for alternative superblocks. */ flags = nocsum; if (!msg) flags |= UFS_NOMSG; for (cg = 0; cg < protofs->fs_ncg; cg++) { sblk = fsbtodb(protofs, cgsblock(protofs, cg)); if (msg) printf("Try cg %ld at sblock loc %jd\n", cg, (intmax_t)sblk); if (ffs_sbget(devfd, fsp, dbtob(sblk), flags, filltype, readfunc) == 0) { if (msg) printf("Succeeded with alternate superblock " "at %jd\n", (intmax_t)sblk); UFS_FREE(protofs, filltype); return (0); } } UFS_FREE(protofs, filltype); /* * Our alternate superblock strategies failed. Our last ditch effort * is to see if the standard superblock has only non-critical errors. */ trynowarn: flags = UFS_NOWARNFAIL | UFS_NOMSG | nocsum; if (msg) { printf("Finding an alternate superblock failed.\nCheck for " "only non-critical errors in standard superblock\n"); flags &= ~UFS_NOMSG; } if (ffs_sbget(devfd, fsp, UFS_STDSB, flags, filltype, readfunc) != 0) { if (msg) printf("Failed, superblock has critical errors\n"); return (ENOENT); } if (msg) printf("Success, using standard superblock with " "non-critical errors.\n"); return (0); } /* * Write a superblock to the devfd device from the memory pointed to by fs. * Write out the superblock summary information if it is present. * * If the write is successful, zero is returned. Otherwise one of the * following error values is returned: * EIO: failed to write superblock. * EIO: failed to write superblock summary information. */ int ffs_sbput(void *devfd, struct fs *fs, off_t loc, int (*writefunc)(void *devfd, off_t loc, void *buf, int size)) { int i, error, blks, size; uint8_t *space; /* * If there is summary information, write it first, so if there * is an error, the superblock will not be marked as clean. */ if (fs->fs_si != NULL && fs->fs_csp != NULL) { blks = howmany(fs->fs_cssize, fs->fs_fsize); space = (uint8_t *)fs->fs_csp; for (i = 0; i < blks; i += fs->fs_frag) { size = fs->fs_bsize; if (i + fs->fs_frag > blks) size = (blks - i) * fs->fs_fsize; if ((error = (*writefunc)(devfd, dbtob(fsbtodb(fs, fs->fs_csaddr + i)), space, size)) != 0) return (error); space += size; } } fs->fs_fmod = 0; #ifndef _KERNEL { struct fs_summary_info *fs_si; fs->fs_time = time(NULL); /* Clear the pointers for the duration of writing. */ fs_si = fs->fs_si; fs->fs_si = NULL; fs->fs_ckhash = ffs_calc_sbhash(fs); error = (*writefunc)(devfd, loc, fs, fs->fs_sbsize); fs->fs_si = fs_si; } #else /* _KERNEL */ fs->fs_time = time_second; fs->fs_ckhash = ffs_calc_sbhash(fs); error = (*writefunc)(devfd, loc, fs, fs->fs_sbsize); #endif /* _KERNEL */ return (error); } /* * Calculate the check-hash for a superblock. */ uint32_t ffs_calc_sbhash(struct fs *fs) { uint32_t ckhash, save_ckhash; /* * A filesystem that was using a superblock ckhash may be moved * to an older kernel that does not support ckhashes. The * older kernel will clear the FS_METACKHASH flag indicating * that it does not update hashes. When the disk is moved back * to a kernel capable of ckhashes it disables them on mount: * * if ((fs->fs_flags & FS_METACKHASH) == 0) * fs->fs_metackhash = 0; * * This leaves (fs->fs_metackhash & CK_SUPERBLOCK) == 0) with an * old stale value in the fs->fs_ckhash field. Thus the need to * just accept what is there. */ if ((fs->fs_metackhash & CK_SUPERBLOCK) == 0) return (fs->fs_ckhash); save_ckhash = fs->fs_ckhash; fs->fs_ckhash = 0; /* * If newly read from disk, the caller is responsible for * verifying that fs->fs_sbsize <= SBLOCKSIZE. */ ckhash = calculate_crc32c(~0L, (void *)fs, fs->fs_sbsize); fs->fs_ckhash = save_ckhash; return (ckhash); } /* * Update the frsum fields to reflect addition or deletion * of some frags. */ void ffs_fragacct(struct fs *fs, int fragmap, int32_t fraglist[], int cnt) { int inblk; int field, subfield; int siz, pos; inblk = (int)(fragtbl[fs->fs_frag][fragmap]) << 1; fragmap <<= 1; for (siz = 1; siz < fs->fs_frag; siz++) { if ((inblk & (1 << (siz + (fs->fs_frag % NBBY)))) == 0) continue; field = around[siz]; subfield = inside[siz]; for (pos = siz; pos <= fs->fs_frag; pos++) { if ((fragmap & field) == subfield) { fraglist[siz] += cnt; pos += siz; field <<= siz; subfield <<= siz; } field <<= 1; subfield <<= 1; } } } /* * block operations * * check if a block is available */ int ffs_isblock(struct fs *fs, unsigned char *cp, ufs1_daddr_t h) { unsigned char mask; switch ((int)fs->fs_frag) { case 8: return (cp[h] == 0xff); case 4: mask = 0x0f << ((h & 0x1) << 2); return ((cp[h >> 1] & mask) == mask); case 2: mask = 0x03 << ((h & 0x3) << 1); return ((cp[h >> 2] & mask) == mask); case 1: mask = 0x01 << (h & 0x7); return ((cp[h >> 3] & mask) == mask); default: #ifdef _KERNEL panic("ffs_isblock"); #endif break; } return (0); } /* * check if a block is free */ int ffs_isfreeblock(struct fs *fs, uint8_t *cp, ufs1_daddr_t h) { switch ((int)fs->fs_frag) { case 8: return (cp[h] == 0); case 4: return ((cp[h >> 1] & (0x0f << ((h & 0x1) << 2))) == 0); case 2: return ((cp[h >> 2] & (0x03 << ((h & 0x3) << 1))) == 0); case 1: return ((cp[h >> 3] & (0x01 << (h & 0x7))) == 0); default: #ifdef _KERNEL panic("ffs_isfreeblock"); #endif break; } return (0); } /* * take a block out of the map */ void ffs_clrblock(struct fs *fs, uint8_t *cp, ufs1_daddr_t h) { switch ((int)fs->fs_frag) { case 8: cp[h] = 0; return; case 4: cp[h >> 1] &= ~(0x0f << ((h & 0x1) << 2)); return; case 2: cp[h >> 2] &= ~(0x03 << ((h & 0x3) << 1)); return; case 1: cp[h >> 3] &= ~(0x01 << (h & 0x7)); return; default: #ifdef _KERNEL panic("ffs_clrblock"); #endif break; } } /* * put a block into the map */ void ffs_setblock(struct fs *fs, unsigned char *cp, ufs1_daddr_t h) { switch ((int)fs->fs_frag) { case 8: cp[h] = 0xff; return; case 4: cp[h >> 1] |= (0x0f << ((h & 0x1) << 2)); return; case 2: cp[h >> 2] |= (0x03 << ((h & 0x3) << 1)); return; case 1: cp[h >> 3] |= (0x01 << (h & 0x7)); return; default: #ifdef _KERNEL panic("ffs_setblock"); #endif break; } } /* * Update the cluster map because of an allocation or free. * * Cnt == 1 means free; cnt == -1 means allocating. */ void ffs_clusteracct(struct fs *fs, struct cg *cgp, ufs1_daddr_t blkno, int cnt) { int32_t *sump; int32_t *lp; uint8_t *freemapp, *mapp; int i, start, end, forw, back, map; uint64_t bit; if (fs->fs_contigsumsize <= 0) return; freemapp = cg_clustersfree(cgp); sump = cg_clustersum(cgp); /* * Allocate or clear the actual block. */ if (cnt > 0) setbit(freemapp, blkno); else clrbit(freemapp, blkno); /* * Find the size of the cluster going forward. */ start = blkno + 1; end = start + fs->fs_contigsumsize; if (end >= cgp->cg_nclusterblks) end = cgp->cg_nclusterblks; mapp = &freemapp[start / NBBY]; map = *mapp++; bit = 1U << (start % NBBY); for (i = start; i < end; i++) { if ((map & bit) == 0) break; if ((i & (NBBY - 1)) != (NBBY - 1)) { bit <<= 1; } else { map = *mapp++; bit = 1; } } forw = i - start; /* * Find the size of the cluster going backward. */ start = blkno - 1; end = start - fs->fs_contigsumsize; if (end < 0) end = -1; mapp = &freemapp[start / NBBY]; map = *mapp--; bit = 1U << (start % NBBY); for (i = start; i > end; i--) { if ((map & bit) == 0) break; if ((i & (NBBY - 1)) != 0) { bit >>= 1; } else { map = *mapp--; bit = 1U << (NBBY - 1); } } back = start - i; /* * Account for old cluster and the possibly new forward and * back clusters. */ i = back + forw + 1; if (i > fs->fs_contigsumsize) i = fs->fs_contigsumsize; sump[i] += cnt; if (back > 0) sump[back] -= cnt; if (forw > 0) sump[forw] -= cnt; /* * Update cluster summary information. */ lp = &sump[fs->fs_contigsumsize]; for (i = fs->fs_contigsumsize; i > 0; i--) if (*lp-- > 0) break; fs->fs_maxcluster[cgp->cg_cgx] = i; } diff --git a/sys/ufs/ffs/ffs_vfsops.c b/sys/ufs/ffs/ffs_vfsops.c index da079ce1efb6..498ef61d7cf1 100644 --- a/sys/ufs/ffs/ffs_vfsops.c +++ b/sys/ufs/ffs/ffs_vfsops.c @@ -1,2472 +1,2478 @@ /*- * SPDX-License-Identifier: BSD-3-Clause * * Copyright (c) 1989, 1991, 1993, 1994 * The Regents of the University of California. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #include #include "opt_quota.h" #include "opt_ufs.h" #include "opt_ffs.h" #include "opt_ddb.h" #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include static uma_zone_t uma_inode, uma_ufs1, uma_ufs2; VFS_SMR_DECLARE; static int ffs_mountfs(struct vnode *, struct mount *, struct thread *); static void ffs_ifree(struct ufsmount *ump, struct inode *ip); static int ffs_sync_lazy(struct mount *mp); static int ffs_use_bread(void *devfd, off_t loc, void **bufp, int size); static int ffs_use_bwrite(void *devfd, off_t loc, void *buf, int size); static vfs_init_t ffs_init; static vfs_uninit_t ffs_uninit; static vfs_extattrctl_t ffs_extattrctl; static vfs_cmount_t ffs_cmount; static vfs_unmount_t ffs_unmount; static vfs_mount_t ffs_mount; static vfs_statfs_t ffs_statfs; static vfs_fhtovp_t ffs_fhtovp; static vfs_sync_t ffs_sync; static struct vfsops ufs_vfsops = { .vfs_extattrctl = ffs_extattrctl, .vfs_fhtovp = ffs_fhtovp, .vfs_init = ffs_init, .vfs_mount = ffs_mount, .vfs_cmount = ffs_cmount, .vfs_quotactl = ufs_quotactl, .vfs_root = vfs_cache_root, .vfs_cachedroot = ufs_root, .vfs_statfs = ffs_statfs, .vfs_sync = ffs_sync, .vfs_uninit = ffs_uninit, .vfs_unmount = ffs_unmount, .vfs_vget = ffs_vget, .vfs_susp_clean = process_deferred_inactive, }; VFS_SET(ufs_vfsops, ufs, VFCF_FILEREVINC); MODULE_VERSION(ufs, 1); static b_strategy_t ffs_geom_strategy; static b_write_t ffs_bufwrite; static struct buf_ops ffs_ops = { .bop_name = "FFS", .bop_write = ffs_bufwrite, .bop_strategy = ffs_geom_strategy, .bop_sync = bufsync, #ifdef NO_FFS_SNAPSHOT .bop_bdflush = bufbdflush, #else .bop_bdflush = ffs_bdflush, #endif }; /* * Note that userquota and groupquota options are not currently used * by UFS/FFS code and generally mount(8) does not pass those options * from userland, but they can be passed by loader(8) via * vfs.root.mountfrom.options. */ static const char *ffs_opts[] = { "acls", "async", "noatime", "noclusterr", "noclusterw", "noexec", "export", "force", "from", "groupquota", "multilabel", "nfsv4acls", "snapshot", "nosuid", "suiddir", "nosymfollow", "sync", "union", "userquota", "untrusted", NULL }; static int ffs_enxio_enable = 1; SYSCTL_DECL(_vfs_ffs); SYSCTL_INT(_vfs_ffs, OID_AUTO, enxio_enable, CTLFLAG_RWTUN, &ffs_enxio_enable, 0, "enable mapping of other disk I/O errors to ENXIO"); /* * Return buffer with the contents of block "offset" from the beginning of * directory "ip". If "res" is non-zero, fill it in with a pointer to the * remaining space in the directory. */ static int ffs_blkatoff(struct vnode *vp, off_t offset, char **res, struct buf **bpp) { struct inode *ip; struct fs *fs; struct buf *bp; ufs_lbn_t lbn; int bsize, error; ip = VTOI(vp); fs = ITOFS(ip); lbn = lblkno(fs, offset); bsize = blksize(fs, ip, lbn); *bpp = NULL; error = bread(vp, lbn, bsize, NOCRED, &bp); if (error) { return (error); } if (res) *res = (char *)bp->b_data + blkoff(fs, offset); *bpp = bp; return (0); } /* * Load up the contents of an inode and copy the appropriate pieces * to the incore copy. */ static int ffs_load_inode(struct buf *bp, struct inode *ip, struct fs *fs, ino_t ino) { struct ufs1_dinode *dip1; struct ufs2_dinode *dip2; int error; if (I_IS_UFS1(ip)) { dip1 = ip->i_din1; *dip1 = *((struct ufs1_dinode *)bp->b_data + ino_to_fsbo(fs, ino)); ip->i_mode = dip1->di_mode; ip->i_nlink = dip1->di_nlink; ip->i_effnlink = dip1->di_nlink; ip->i_size = dip1->di_size; ip->i_flags = dip1->di_flags; ip->i_gen = dip1->di_gen; ip->i_uid = dip1->di_uid; ip->i_gid = dip1->di_gid; + if (ffs_oldfscompat_inode_read(fs, ip->i_dp, time_second) && + fs->fs_ronly == 0) + UFS_INODE_SET_FLAG(ip, IN_MODIFIED); return (0); } dip2 = ((struct ufs2_dinode *)bp->b_data + ino_to_fsbo(fs, ino)); if ((error = ffs_verify_dinode_ckhash(fs, dip2)) != 0 && !ffs_fsfail_cleanup(ITOUMP(ip), error)) { printf("%s: inode %jd: check-hash failed\n", fs->fs_fsmnt, (intmax_t)ino); return (error); } *ip->i_din2 = *dip2; dip2 = ip->i_din2; ip->i_mode = dip2->di_mode; ip->i_nlink = dip2->di_nlink; ip->i_effnlink = dip2->di_nlink; ip->i_size = dip2->di_size; ip->i_flags = dip2->di_flags; ip->i_gen = dip2->di_gen; ip->i_uid = dip2->di_uid; ip->i_gid = dip2->di_gid; + if (ffs_oldfscompat_inode_read(fs, ip->i_dp, time_second) && + fs->fs_ronly == 0) + UFS_INODE_SET_FLAG(ip, IN_MODIFIED); return (0); } /* * Verify that a filesystem block number is a valid data block. * This routine is only called on untrusted filesystems. */ static int ffs_check_blkno(struct mount *mp, ino_t inum, ufs2_daddr_t daddr, int blksize) { struct fs *fs; struct ufsmount *ump; ufs2_daddr_t end_daddr; int cg, havemtx; KASSERT((mp->mnt_flag & MNT_UNTRUSTED) != 0, ("ffs_check_blkno called on a trusted file system")); ump = VFSTOUFS(mp); fs = ump->um_fs; cg = dtog(fs, daddr); end_daddr = daddr + numfrags(fs, blksize); /* * Verify that the block number is a valid data block. Also check * that it does not point to an inode block or a superblock. Accept * blocks that are unalloacted (0) or part of snapshot metadata * (BLK_NOCOPY or BLK_SNAP). * * Thus, the block must be in a valid range for the filesystem and * either in the space before a backup superblock (except the first * cylinder group where that space is used by the bootstrap code) or * after the inode blocks and before the end of the cylinder group. */ if ((uint64_t)daddr <= BLK_SNAP || ((uint64_t)end_daddr <= fs->fs_size && ((cg > 0 && end_daddr <= cgsblock(fs, cg)) || (daddr >= cgdmin(fs, cg) && end_daddr <= cgbase(fs, cg) + fs->fs_fpg)))) return (0); if ((havemtx = mtx_owned(UFS_MTX(ump))) == 0) UFS_LOCK(ump); if (ppsratecheck(&ump->um_last_integritymsg, &ump->um_secs_integritymsg, 1)) { UFS_UNLOCK(ump); uprintf("\n%s: inode %jd, out-of-range indirect block " "number %jd\n", mp->mnt_stat.f_mntonname, inum, daddr); if (havemtx) UFS_LOCK(ump); } else if (!havemtx) UFS_UNLOCK(ump); return (EINTEGRITY); } /* * On first ENXIO error, initiate an asynchronous forcible unmount. * Used to unmount filesystems whose underlying media has gone away. * * Return true if a cleanup is in progress. */ int ffs_fsfail_cleanup(struct ufsmount *ump, int error) { int retval; UFS_LOCK(ump); retval = ffs_fsfail_cleanup_locked(ump, error); UFS_UNLOCK(ump); return (retval); } int ffs_fsfail_cleanup_locked(struct ufsmount *ump, int error) { mtx_assert(UFS_MTX(ump), MA_OWNED); if (error == ENXIO && (ump->um_flags & UM_FSFAIL_CLEANUP) == 0) { ump->um_flags |= UM_FSFAIL_CLEANUP; if (ump->um_mountp == rootvnode->v_mount) panic("UFS: root fs would be forcibly unmounted"); /* * Queue an async forced unmount. */ vfs_ref(ump->um_mountp); dounmount(ump->um_mountp, MNT_FORCE | MNT_RECURSE | MNT_DEFERRED, curthread); printf("UFS: forcibly unmounting %s from %s\n", ump->um_mountp->mnt_stat.f_mntfromname, ump->um_mountp->mnt_stat.f_mntonname); } return ((ump->um_flags & UM_FSFAIL_CLEANUP) != 0); } /* * Wrapper used during ENXIO cleanup to allocate empty buffers when * the kernel is unable to read the real one. They are needed so that * the soft updates code can use them to unwind its dependencies. */ int ffs_breadz(struct ufsmount *ump, struct vnode *vp, daddr_t lblkno, daddr_t dblkno, int size, daddr_t *rablkno, int *rabsize, int cnt, struct ucred *cred, int flags, void (*ckhashfunc)(struct buf *), struct buf **bpp) { int error; flags |= GB_CVTENXIO; error = breadn_flags(vp, lblkno, dblkno, size, rablkno, rabsize, cnt, cred, flags, ckhashfunc, bpp); if (error != 0 && ffs_fsfail_cleanup(ump, error)) { error = getblkx(vp, lblkno, dblkno, size, 0, 0, flags, bpp); KASSERT(error == 0, ("getblkx failed")); vfs_bio_bzero_buf(*bpp, 0, size); } return (error); } static int ffs_mount(struct mount *mp) { struct vnode *devvp, *odevvp; struct thread *td; struct ufsmount *ump = NULL; struct fs *fs; int error, flags; int error1 __diagused; uint64_t mntorflags, saved_mnt_flag; accmode_t accmode; struct nameidata ndp; char *fspec; bool mounted_softdep; td = curthread; if (vfs_filteropt(mp->mnt_optnew, ffs_opts)) return (EINVAL); if (uma_inode == NULL) { uma_inode = uma_zcreate("FFS inode", sizeof(struct inode), NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, 0); uma_ufs1 = uma_zcreate("FFS1 dinode", sizeof(struct ufs1_dinode), NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, 0); uma_ufs2 = uma_zcreate("FFS2 dinode", sizeof(struct ufs2_dinode), NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, 0); VFS_SMR_ZONE_SET(uma_inode); } vfs_deleteopt(mp->mnt_optnew, "groupquota"); vfs_deleteopt(mp->mnt_optnew, "userquota"); fspec = vfs_getopts(mp->mnt_optnew, "from", &error); if (error) return (error); mntorflags = 0; if (vfs_getopt(mp->mnt_optnew, "untrusted", NULL, NULL) == 0) mntorflags |= MNT_UNTRUSTED; if (vfs_getopt(mp->mnt_optnew, "acls", NULL, NULL) == 0) mntorflags |= MNT_ACLS; if (vfs_getopt(mp->mnt_optnew, "snapshot", NULL, NULL) == 0) { mntorflags |= MNT_SNAPSHOT; /* * Once we have set the MNT_SNAPSHOT flag, do not * persist "snapshot" in the options list. */ vfs_deleteopt(mp->mnt_optnew, "snapshot"); vfs_deleteopt(mp->mnt_opt, "snapshot"); } if (vfs_getopt(mp->mnt_optnew, "nfsv4acls", NULL, NULL) == 0) { if (mntorflags & MNT_ACLS) { vfs_mount_error(mp, "\"acls\" and \"nfsv4acls\" options " "are mutually exclusive"); return (EINVAL); } mntorflags |= MNT_NFS4ACLS; } MNT_ILOCK(mp); mp->mnt_kern_flag &= ~MNTK_FPLOOKUP; mp->mnt_flag |= mntorflags; MNT_IUNLOCK(mp); /* * If this is a snapshot request, take the snapshot. */ if (mp->mnt_flag & MNT_SNAPSHOT) { if ((mp->mnt_flag & MNT_UPDATE) == 0) return (EINVAL); return (ffs_snapshot(mp, fspec)); } /* * Must not call namei() while owning busy ref. */ if (mp->mnt_flag & MNT_UPDATE) vfs_unbusy(mp); /* * Not an update, or updating the name: look up the name * and verify that it refers to a sensible disk device. */ NDINIT(&ndp, LOOKUP, FOLLOW | LOCKLEAF, UIO_SYSSPACE, fspec); error = namei(&ndp); if ((mp->mnt_flag & MNT_UPDATE) != 0) { /* * Unmount does not start if MNT_UPDATE is set. Mount * update busies mp before setting MNT_UPDATE. We * must be able to retain our busy ref successfully, * without sleep. */ error1 = vfs_busy(mp, MBF_NOWAIT); MPASS(error1 == 0); } if (error != 0) return (error); NDFREE_PNBUF(&ndp); if (!vn_isdisk_error(ndp.ni_vp, &error)) { vput(ndp.ni_vp); return (error); } /* * If mount by non-root, then verify that user has necessary * permissions on the device. */ accmode = VREAD; if ((mp->mnt_flag & MNT_RDONLY) == 0) accmode |= VWRITE; error = VOP_ACCESS(ndp.ni_vp, accmode, td->td_ucred, td); if (error) error = priv_check(td, PRIV_VFS_MOUNT_PERM); if (error) { vput(ndp.ni_vp); return (error); } /* * New mount * * We need the name for the mount point (also used for * "last mounted on") copied in. If an error occurs, * the mount point is discarded by the upper level code. * Note that vfs_mount_alloc() populates f_mntonname for us. */ if ((mp->mnt_flag & MNT_UPDATE) == 0) { if ((error = ffs_mountfs(ndp.ni_vp, mp, td)) != 0) { vrele(ndp.ni_vp); return (error); } } else { /* * When updating, check whether changing from read-only to * read/write; if there is no device name, that's all we do. */ ump = VFSTOUFS(mp); fs = ump->um_fs; odevvp = ump->um_odevvp; devvp = ump->um_devvp; /* * If it's not the same vnode, or at least the same device * then it's not correct. */ if (ndp.ni_vp->v_rdev != ump->um_odevvp->v_rdev) error = EINVAL; /* needs translation */ vput(ndp.ni_vp); if (error) return (error); if (fs->fs_ronly == 0 && vfs_flagopt(mp->mnt_optnew, "ro", NULL, 0)) { /* * Flush any dirty data and suspend filesystem. */ if ((error = vn_start_write(NULL, &mp, V_WAIT)) != 0) return (error); error = vfs_write_suspend_umnt(mp); if (error != 0) return (error); fs->fs_ronly = 1; if (MOUNTEDSOFTDEP(mp)) { MNT_ILOCK(mp); mp->mnt_flag &= ~MNT_SOFTDEP; MNT_IUNLOCK(mp); mounted_softdep = true; } else mounted_softdep = false; /* * Check for and optionally get rid of files open * for writing. */ flags = WRITECLOSE; if (mp->mnt_flag & MNT_FORCE) flags |= FORCECLOSE; if (mounted_softdep) { error = softdep_flushfiles(mp, flags, td); } else { error = ffs_flushfiles(mp, flags, td); } if (error) { fs->fs_ronly = 0; if (mounted_softdep) { MNT_ILOCK(mp); mp->mnt_flag |= MNT_SOFTDEP; MNT_IUNLOCK(mp); } vfs_write_resume(mp, 0); return (error); } if (fs->fs_pendingblocks != 0 || fs->fs_pendinginodes != 0) { printf("WARNING: %s Update error: blocks %jd " "files %d\n", fs->fs_fsmnt, (intmax_t)fs->fs_pendingblocks, fs->fs_pendinginodes); fs->fs_pendingblocks = 0; fs->fs_pendinginodes = 0; } if ((fs->fs_flags & (FS_UNCLEAN | FS_NEEDSFSCK)) == 0) fs->fs_clean = 1; if ((error = ffs_sbupdate(ump, MNT_WAIT, 0)) != 0) { fs->fs_ronly = 0; fs->fs_clean = 0; if (mounted_softdep) { MNT_ILOCK(mp); mp->mnt_flag |= MNT_SOFTDEP; MNT_IUNLOCK(mp); } vfs_write_resume(mp, 0); return (error); } if (mounted_softdep) softdep_unmount(mp); g_topology_lock(); /* * Drop our write and exclusive access. */ g_access(ump->um_cp, 0, -1, -1); g_topology_unlock(); MNT_ILOCK(mp); mp->mnt_flag |= MNT_RDONLY; MNT_IUNLOCK(mp); /* * Allow the writers to note that filesystem * is ro now. */ vfs_write_resume(mp, 0); } if ((mp->mnt_flag & MNT_RELOAD) && (error = ffs_reload(mp, 0)) != 0) { return (error); } else { /* ffs_reload replaces the superblock structure */ fs = ump->um_fs; } if (fs->fs_ronly && !vfs_flagopt(mp->mnt_optnew, "ro", NULL, 0)) { /* * If upgrade to read-write by non-root, then verify * that user has necessary permissions on the device. */ vn_lock(odevvp, LK_EXCLUSIVE | LK_RETRY); error = VOP_ACCESS(odevvp, VREAD | VWRITE, td->td_ucred, td); if (error) error = priv_check(td, PRIV_VFS_MOUNT_PERM); VOP_UNLOCK(odevvp); if (error) { return (error); } fs->fs_flags &= ~FS_UNCLEAN; if (fs->fs_clean == 0) { fs->fs_flags |= FS_UNCLEAN; if ((mp->mnt_flag & MNT_FORCE) || ((fs->fs_flags & (FS_SUJ | FS_NEEDSFSCK)) == 0 && (fs->fs_flags & FS_DOSOFTDEP))) { printf("WARNING: %s was not properly " "dismounted\n", mp->mnt_stat.f_mntonname); } else { vfs_mount_error(mp, "R/W mount of %s denied. %s.%s", mp->mnt_stat.f_mntonname, "Filesystem is not clean - run fsck", (fs->fs_flags & FS_SUJ) == 0 ? "" : " Forced mount will invalidate" " journal contents"); return (EPERM); } } g_topology_lock(); /* * Request exclusive write access. */ error = g_access(ump->um_cp, 0, 1, 1); g_topology_unlock(); if (error) return (error); if ((error = vn_start_write(NULL, &mp, V_WAIT)) != 0) return (error); error = vfs_write_suspend_umnt(mp); if (error != 0) return (error); fs->fs_ronly = 0; MNT_ILOCK(mp); saved_mnt_flag = MNT_RDONLY; if (MOUNTEDSOFTDEP(mp) && (mp->mnt_flag & MNT_ASYNC) != 0) saved_mnt_flag |= MNT_ASYNC; mp->mnt_flag &= ~saved_mnt_flag; MNT_IUNLOCK(mp); fs->fs_mtime = time_second; /* check to see if we need to start softdep */ if ((fs->fs_flags & FS_DOSOFTDEP) && (error = softdep_mount(devvp, mp, fs, td->td_ucred))){ fs->fs_ronly = 1; MNT_ILOCK(mp); mp->mnt_flag |= saved_mnt_flag; MNT_IUNLOCK(mp); vfs_write_resume(mp, 0); return (error); } fs->fs_clean = 0; if ((error = ffs_sbupdate(ump, MNT_WAIT, 0)) != 0) { fs->fs_ronly = 1; if ((fs->fs_flags & FS_DOSOFTDEP) != 0) softdep_unmount(mp); MNT_ILOCK(mp); mp->mnt_flag |= saved_mnt_flag; MNT_IUNLOCK(mp); vfs_write_resume(mp, 0); return (error); } if (fs->fs_snapinum[0] != 0) ffs_snapshot_mount(mp); vfs_write_resume(mp, 0); } /* * Soft updates is incompatible with "async", * so if we are doing softupdates stop the user * from setting the async flag in an update. * Softdep_mount() clears it in an initial mount * or ro->rw remount. */ if (MOUNTEDSOFTDEP(mp)) { /* XXX: Reset too late ? */ MNT_ILOCK(mp); mp->mnt_flag &= ~MNT_ASYNC; MNT_IUNLOCK(mp); } /* * Keep MNT_ACLS flag if it is stored in superblock. */ if ((fs->fs_flags & FS_ACLS) != 0) { /* XXX: Set too late ? */ MNT_ILOCK(mp); mp->mnt_flag |= MNT_ACLS; MNT_IUNLOCK(mp); } if ((fs->fs_flags & FS_NFS4ACLS) != 0) { /* XXX: Set too late ? */ MNT_ILOCK(mp); mp->mnt_flag |= MNT_NFS4ACLS; MNT_IUNLOCK(mp); } } MNT_ILOCK(mp); /* * This is racy versus lookup, see ufs_fplookup_vexec for details. */ if ((mp->mnt_kern_flag & MNTK_FPLOOKUP) != 0) panic("MNTK_FPLOOKUP set on mount %p when it should not be", mp); if ((mp->mnt_flag & (MNT_ACLS | MNT_NFS4ACLS | MNT_UNION)) == 0) mp->mnt_kern_flag |= MNTK_FPLOOKUP; MNT_IUNLOCK(mp); vfs_mountedfrom(mp, fspec); return (0); } /* * Compatibility with old mount system call. */ static int ffs_cmount(struct mntarg *ma, void *data, uint64_t flags) { struct ufs_args args; int error; if (data == NULL) return (EINVAL); error = copyin(data, &args, sizeof args); if (error) return (error); ma = mount_argsu(ma, "from", args.fspec, MAXPATHLEN); ma = mount_arg(ma, "export", &args.export, sizeof(args.export)); error = kernel_mount(ma, flags); return (error); } /* * Reload all incore data for a filesystem (used after running fsck on * the root filesystem and finding things to fix). If the 'force' flag * is 0, the filesystem must be mounted read-only. * * Things to do to update the mount: * 1) invalidate all cached meta-data. * 2) re-read superblock from disk. * 3) If requested, clear MNTK_SUSPEND2 and MNTK_SUSPENDED flags * to allow secondary writers. * 4) invalidate all cached file data. * 5) re-read inode data for all active vnodes. */ int ffs_reload(struct mount *mp, int flags) { struct vnode *vp, *mvp, *devvp; struct inode *ip; struct buf *bp; struct fs *fs, *newfs; struct ufsmount *ump; int error; ump = VFSTOUFS(mp); MNT_ILOCK(mp); if ((mp->mnt_flag & MNT_RDONLY) == 0 && (flags & FFSR_FORCE) == 0) { MNT_IUNLOCK(mp); return (EINVAL); } MNT_IUNLOCK(mp); /* * Step 1: invalidate all cached meta-data. */ devvp = VFSTOUFS(mp)->um_devvp; vn_lock(devvp, LK_EXCLUSIVE | LK_RETRY); if (vinvalbuf(devvp, 0, 0, 0) != 0) panic("ffs_reload: dirty1"); VOP_UNLOCK(devvp); /* * Step 2: re-read superblock from disk. */ if ((error = ffs_sbget(devvp, &newfs, UFS_STDSB, 0, M_UFSMNT, ffs_use_bread)) != 0) return (error); /* * Replace our superblock with the new superblock. Preserve * our read-only status. */ fs = VFSTOUFS(mp)->um_fs; newfs->fs_ronly = fs->fs_ronly; free(fs->fs_csp, M_UFSMNT); free(fs->fs_si, M_UFSMNT); free(fs, M_UFSMNT); fs = VFSTOUFS(mp)->um_fs = newfs; ump->um_bsize = fs->fs_bsize; ump->um_maxsymlinklen = fs->fs_maxsymlinklen; UFS_LOCK(ump); if (fs->fs_pendingblocks != 0 || fs->fs_pendinginodes != 0) { printf("WARNING: %s: reload pending error: blocks %jd " "files %d\n", mp->mnt_stat.f_mntonname, (intmax_t)fs->fs_pendingblocks, fs->fs_pendinginodes); fs->fs_pendingblocks = 0; fs->fs_pendinginodes = 0; } UFS_UNLOCK(ump); /* * Step 3: If requested, clear MNTK_SUSPEND2 and MNTK_SUSPENDED flags * to allow secondary writers. */ if ((flags & FFSR_UNSUSPEND) != 0) { MNT_ILOCK(mp); mp->mnt_kern_flag &= ~(MNTK_SUSPENDED | MNTK_SUSPEND2); wakeup(&mp->mnt_flag); MNT_IUNLOCK(mp); } loop: MNT_VNODE_FOREACH_ALL(vp, mp, mvp) { /* * Skip syncer vnode. */ if (vp->v_type == VNON) { VI_UNLOCK(vp); continue; } /* * Step 4: invalidate all cached file data. */ if (vget(vp, LK_EXCLUSIVE | LK_INTERLOCK)) { MNT_VNODE_FOREACH_ALL_ABORT(mp, mvp); goto loop; } if (vinvalbuf(vp, 0, 0, 0)) panic("ffs_reload: dirty2"); /* * Step 5: re-read inode data for all active vnodes. */ ip = VTOI(vp); error = bread(devvp, fsbtodb(fs, ino_to_fsba(fs, ip->i_number)), (int)fs->fs_bsize, NOCRED, &bp); if (error) { vput(vp); MNT_VNODE_FOREACH_ALL_ABORT(mp, mvp); return (error); } if ((error = ffs_load_inode(bp, ip, fs, ip->i_number)) != 0) { brelse(bp); vput(vp); MNT_VNODE_FOREACH_ALL_ABORT(mp, mvp); return (error); } ip->i_effnlink = ip->i_nlink; brelse(bp); vput(vp); } return (0); } /* * Common code for mount and mountroot */ static int ffs_mountfs(struct vnode *odevvp, struct mount *mp, struct thread *td) { struct ufsmount *ump; struct fs *fs; struct cdev *dev; int error, i, len, ronly; struct ucred *cred; struct g_consumer *cp; struct mount *nmp; struct vnode *devvp; int candelete, canspeedup; fs = NULL; ump = NULL; cred = td ? td->td_ucred : NOCRED; ronly = (mp->mnt_flag & MNT_RDONLY) != 0; devvp = mntfs_allocvp(mp, odevvp); KASSERT(devvp->v_type == VCHR, ("reclaimed devvp")); dev = devvp->v_rdev; KASSERT(dev->si_snapdata == NULL, ("non-NULL snapshot data")); if (atomic_cmpset_acq_ptr((uintptr_t *)&dev->si_mountpt, 0, (uintptr_t)mp) == 0) { mntfs_freevp(devvp); return (EBUSY); } g_topology_lock(); error = g_vfs_open(devvp, &cp, "ffs", ronly ? 0 : 1); g_topology_unlock(); if (error != 0) { atomic_store_rel_ptr((uintptr_t *)&dev->si_mountpt, 0); mntfs_freevp(devvp); return (error); } dev_ref(dev); devvp->v_bufobj.bo_ops = &ffs_ops; BO_LOCK(&odevvp->v_bufobj); odevvp->v_bufobj.bo_flag |= BO_NOBUFS; BO_UNLOCK(&odevvp->v_bufobj); VOP_UNLOCK(devvp); if (dev->si_iosize_max != 0) mp->mnt_iosize_max = dev->si_iosize_max; if (mp->mnt_iosize_max > maxphys) mp->mnt_iosize_max = maxphys; if ((SBLOCKSIZE % cp->provider->sectorsize) != 0) { error = EINVAL; vfs_mount_error(mp, "Invalid sectorsize %d for superblock size %d", cp->provider->sectorsize, SBLOCKSIZE); goto out; } /* fetch the superblock and summary information */ if ((mp->mnt_flag & (MNT_ROOTFS | MNT_FORCE)) != 0) error = ffs_sbsearch(devvp, &fs, 0, M_UFSMNT, ffs_use_bread); else error = ffs_sbget(devvp, &fs, UFS_STDSB, 0, M_UFSMNT, ffs_use_bread); if (error != 0) goto out; fs->fs_flags &= ~FS_UNCLEAN; if (fs->fs_clean == 0) { fs->fs_flags |= FS_UNCLEAN; if (ronly || (mp->mnt_flag & MNT_FORCE) || ((fs->fs_flags & (FS_SUJ | FS_NEEDSFSCK)) == 0 && (fs->fs_flags & FS_DOSOFTDEP))) { printf("WARNING: %s was not properly dismounted\n", mp->mnt_stat.f_mntonname); } else { vfs_mount_error(mp, "R/W mount on %s denied. " "Filesystem is not clean - run fsck.%s", mp->mnt_stat.f_mntonname, (fs->fs_flags & FS_SUJ) == 0 ? "" : " Forced mount will invalidate journal contents"); error = EPERM; goto out; } if ((fs->fs_pendingblocks != 0 || fs->fs_pendinginodes != 0) && (mp->mnt_flag & MNT_FORCE)) { printf("WARNING: %s: lost blocks %jd files %d\n", mp->mnt_stat.f_mntonname, (intmax_t)fs->fs_pendingblocks, fs->fs_pendinginodes); fs->fs_pendingblocks = 0; fs->fs_pendinginodes = 0; } } if (fs->fs_pendingblocks != 0 || fs->fs_pendinginodes != 0) { printf("WARNING: %s: mount pending error: blocks %jd " "files %d\n", mp->mnt_stat.f_mntonname, (intmax_t)fs->fs_pendingblocks, fs->fs_pendinginodes); fs->fs_pendingblocks = 0; fs->fs_pendinginodes = 0; } if ((fs->fs_flags & FS_GJOURNAL) != 0) { #ifdef UFS_GJOURNAL /* * Get journal provider name. */ len = 1024; mp->mnt_gjprovider = malloc((uint64_t)len, M_UFSMNT, M_WAITOK); if (g_io_getattr("GJOURNAL::provider", cp, &len, mp->mnt_gjprovider) == 0) { mp->mnt_gjprovider = realloc(mp->mnt_gjprovider, len, M_UFSMNT, M_WAITOK); MNT_ILOCK(mp); mp->mnt_flag |= MNT_GJOURNAL; MNT_IUNLOCK(mp); } else { if ((mp->mnt_flag & MNT_RDONLY) == 0) printf("WARNING: %s: GJOURNAL flag on fs " "but no gjournal provider below\n", mp->mnt_stat.f_mntonname); free(mp->mnt_gjprovider, M_UFSMNT); mp->mnt_gjprovider = NULL; } #else printf("WARNING: %s: GJOURNAL flag on fs but no " "UFS_GJOURNAL support\n", mp->mnt_stat.f_mntonname); #endif } else { mp->mnt_gjprovider = NULL; } ump = malloc(sizeof *ump, M_UFSMNT, M_WAITOK | M_ZERO); ump->um_cp = cp; ump->um_bo = &devvp->v_bufobj; ump->um_fs = fs; if (fs->fs_magic == FS_UFS1_MAGIC) { ump->um_fstype = UFS1; ump->um_balloc = ffs_balloc_ufs1; } else { ump->um_fstype = UFS2; ump->um_balloc = ffs_balloc_ufs2; } ump->um_blkatoff = ffs_blkatoff; ump->um_truncate = ffs_truncate; ump->um_update = ffs_update; ump->um_valloc = ffs_valloc; ump->um_vfree = ffs_vfree; ump->um_ifree = ffs_ifree; ump->um_rdonly = ffs_rdonly; ump->um_snapgone = ffs_snapgone; if ((mp->mnt_flag & MNT_UNTRUSTED) != 0) ump->um_check_blkno = ffs_check_blkno; else ump->um_check_blkno = NULL; mtx_init(UFS_MTX(ump), "FFS", "FFS Lock", MTX_DEF); sx_init(&ump->um_checkpath_lock, "uchpth"); fs->fs_ronly = ronly; fs->fs_active = NULL; mp->mnt_data = ump; mp->mnt_stat.f_fsid.val[0] = fs->fs_id[0]; mp->mnt_stat.f_fsid.val[1] = fs->fs_id[1]; nmp = NULL; if (fs->fs_id[0] == 0 || fs->fs_id[1] == 0 || (nmp = vfs_getvfs(&mp->mnt_stat.f_fsid))) { if (nmp) vfs_rel(nmp); vfs_getnewfsid(mp); } ump->um_bsize = fs->fs_bsize; ump->um_maxsymlinklen = fs->fs_maxsymlinklen; MNT_ILOCK(mp); mp->mnt_flag |= MNT_LOCAL; MNT_IUNLOCK(mp); if ((fs->fs_flags & FS_MULTILABEL) != 0) { #ifdef MAC MNT_ILOCK(mp); mp->mnt_flag |= MNT_MULTILABEL; MNT_IUNLOCK(mp); #else printf("WARNING: %s: multilabel flag on fs but " "no MAC support\n", mp->mnt_stat.f_mntonname); #endif } if ((fs->fs_flags & FS_ACLS) != 0) { #ifdef UFS_ACL MNT_ILOCK(mp); if (mp->mnt_flag & MNT_NFS4ACLS) printf("WARNING: %s: ACLs flag on fs conflicts with " "\"nfsv4acls\" mount option; option ignored\n", mp->mnt_stat.f_mntonname); mp->mnt_flag &= ~MNT_NFS4ACLS; mp->mnt_flag |= MNT_ACLS; MNT_IUNLOCK(mp); #else printf("WARNING: %s: ACLs flag on fs but no ACLs support\n", mp->mnt_stat.f_mntonname); #endif } if ((fs->fs_flags & FS_NFS4ACLS) != 0) { #ifdef UFS_ACL MNT_ILOCK(mp); if (mp->mnt_flag & MNT_ACLS) printf("WARNING: %s: NFSv4 ACLs flag on fs conflicts " "with \"acls\" mount option; option ignored\n", mp->mnt_stat.f_mntonname); mp->mnt_flag &= ~MNT_ACLS; mp->mnt_flag |= MNT_NFS4ACLS; MNT_IUNLOCK(mp); #else printf("WARNING: %s: NFSv4 ACLs flag on fs but no " "ACLs support\n", mp->mnt_stat.f_mntonname); #endif } if ((fs->fs_flags & FS_TRIM) != 0) { len = sizeof(int); if (g_io_getattr("GEOM::candelete", cp, &len, &candelete) == 0) { if (candelete) ump->um_flags |= UM_CANDELETE; else printf("WARNING: %s: TRIM flag on fs but disk " "does not support TRIM\n", mp->mnt_stat.f_mntonname); } else { printf("WARNING: %s: TRIM flag on fs but disk does " "not confirm that it supports TRIM\n", mp->mnt_stat.f_mntonname); } if (((ump->um_flags) & UM_CANDELETE) != 0) { ump->um_trim_tq = taskqueue_create("trim", M_WAITOK, taskqueue_thread_enqueue, &ump->um_trim_tq); taskqueue_start_threads(&ump->um_trim_tq, 1, PVFS, "%s trim", mp->mnt_stat.f_mntonname); ump->um_trimhash = hashinit(MAXTRIMIO, M_TRIM, &ump->um_trimlisthashsize); } } len = sizeof(int); if (g_io_getattr("GEOM::canspeedup", cp, &len, &canspeedup) == 0) { if (canspeedup) ump->um_flags |= UM_CANSPEEDUP; } ump->um_mountp = mp; ump->um_dev = dev; ump->um_devvp = devvp; ump->um_odevvp = odevvp; ump->um_nindir = fs->fs_nindir; ump->um_bptrtodb = fs->fs_fsbtodb; ump->um_seqinc = fs->fs_frag; for (i = 0; i < MAXQUOTAS; i++) ump->um_quotas[i] = NULLVP; #ifdef UFS_EXTATTR ufs_extattr_uepm_init(&ump->um_extattr); #endif /* * Set FS local "last mounted on" information (NULL pad) */ bzero(fs->fs_fsmnt, MAXMNTLEN); strlcpy(fs->fs_fsmnt, mp->mnt_stat.f_mntonname, MAXMNTLEN); mp->mnt_stat.f_iosize = fs->fs_bsize; if (mp->mnt_flag & MNT_ROOTFS) { /* * Root mount; update timestamp in mount structure. * this will be used by the common root mount code * to update the system clock. */ mp->mnt_time = fs->fs_time; } if (ronly == 0) { fs->fs_mtime = time_second; if ((fs->fs_flags & FS_DOSOFTDEP) && (error = softdep_mount(devvp, mp, fs, cred)) != 0) { ffs_flushfiles(mp, FORCECLOSE, td); goto out; } if (fs->fs_snapinum[0] != 0) ffs_snapshot_mount(mp); fs->fs_fmod = 1; fs->fs_clean = 0; (void) ffs_sbupdate(ump, MNT_WAIT, 0); } /* * Initialize filesystem state information in mount struct. */ MNT_ILOCK(mp); mp->mnt_kern_flag |= MNTK_LOOKUP_SHARED | MNTK_EXTENDED_SHARED | MNTK_NO_IOPF | MNTK_UNMAPPED_BUFS | MNTK_USES_BCACHE; MNT_IUNLOCK(mp); #ifdef UFS_EXTATTR #ifdef UFS_EXTATTR_AUTOSTART /* * * Auto-starting does the following: * - check for /.attribute in the fs, and extattr_start if so * - for each file in .attribute, enable that file with * an attribute of the same name. * Not clear how to report errors -- probably eat them. * This would all happen while the filesystem was busy/not * available, so would effectively be "atomic". */ (void) ufs_extattr_autostart(mp, td); #endif /* !UFS_EXTATTR_AUTOSTART */ #endif /* !UFS_EXTATTR */ return (0); out: if (fs != NULL) { free(fs->fs_csp, M_UFSMNT); free(fs->fs_si, M_UFSMNT); free(fs, M_UFSMNT); } if (cp != NULL) { g_topology_lock(); g_vfs_close(cp); g_topology_unlock(); } if (ump != NULL) { mtx_destroy(UFS_MTX(ump)); sx_destroy(&ump->um_checkpath_lock); if (mp->mnt_gjprovider != NULL) { free(mp->mnt_gjprovider, M_UFSMNT); mp->mnt_gjprovider = NULL; } MPASS(ump->um_softdep == NULL); free(ump, M_UFSMNT); mp->mnt_data = NULL; } BO_LOCK(&odevvp->v_bufobj); odevvp->v_bufobj.bo_flag &= ~BO_NOBUFS; BO_UNLOCK(&odevvp->v_bufobj); atomic_store_rel_ptr((uintptr_t *)&dev->si_mountpt, 0); vn_lock(devvp, LK_EXCLUSIVE | LK_RETRY); mntfs_freevp(devvp); dev_rel(dev); return (error); } /* * A read function for use by filesystem-layer routines. */ static int ffs_use_bread(void *devfd, off_t loc, void **bufp, int size) { struct buf *bp; int error; KASSERT(*bufp == NULL, ("ffs_use_bread: non-NULL *bufp %p\n", *bufp)); *bufp = malloc(size, M_UFSMNT, M_WAITOK); if ((error = bread((struct vnode *)devfd, btodb(loc), size, NOCRED, &bp)) != 0) return (error); bcopy(bp->b_data, *bufp, size); bp->b_flags |= B_INVAL | B_NOCACHE; brelse(bp); return (0); } /* * unmount system call */ static int ffs_unmount(struct mount *mp, int mntflags) { struct thread *td; struct ufsmount *ump = VFSTOUFS(mp); struct fs *fs; int error, flags, susp; #ifdef UFS_EXTATTR int e_restart; #endif flags = 0; td = curthread; fs = ump->um_fs; if (mntflags & MNT_FORCE) flags |= FORCECLOSE; susp = fs->fs_ronly == 0; #ifdef UFS_EXTATTR if ((error = ufs_extattr_stop(mp, td))) { if (error != EOPNOTSUPP) printf("WARNING: unmount %s: ufs_extattr_stop " "returned errno %d\n", mp->mnt_stat.f_mntonname, error); e_restart = 0; } else { ufs_extattr_uepm_destroy(&ump->um_extattr); e_restart = 1; } #endif if (susp) { error = vfs_write_suspend_umnt(mp); if (error != 0) goto fail1; } if (MOUNTEDSOFTDEP(mp)) error = softdep_flushfiles(mp, flags, td); else error = ffs_flushfiles(mp, flags, td); if (error != 0 && !ffs_fsfail_cleanup(ump, error)) goto fail; UFS_LOCK(ump); if (fs->fs_pendingblocks != 0 || fs->fs_pendinginodes != 0) { printf("WARNING: unmount %s: pending error: blocks %jd " "files %d\n", fs->fs_fsmnt, (intmax_t)fs->fs_pendingblocks, fs->fs_pendinginodes); fs->fs_pendingblocks = 0; fs->fs_pendinginodes = 0; } UFS_UNLOCK(ump); if (MOUNTEDSOFTDEP(mp)) softdep_unmount(mp); MPASS(ump->um_softdep == NULL); if (fs->fs_ronly == 0) { fs->fs_clean = fs->fs_flags & (FS_UNCLEAN|FS_NEEDSFSCK) ? 0 : 1; error = ffs_sbupdate(ump, MNT_WAIT, 0); if (ffs_fsfail_cleanup(ump, error)) error = 0; if (error != 0 && !ffs_fsfail_cleanup(ump, error)) { fs->fs_clean = 0; goto fail; } } if (susp) vfs_write_resume(mp, VR_START_WRITE); if (ump->um_trim_tq != NULL) { MPASS(ump->um_trim_inflight == 0); taskqueue_free(ump->um_trim_tq); free (ump->um_trimhash, M_TRIM); } vn_lock(ump->um_devvp, LK_EXCLUSIVE | LK_RETRY); g_topology_lock(); g_vfs_close(ump->um_cp); g_topology_unlock(); BO_LOCK(&ump->um_odevvp->v_bufobj); ump->um_odevvp->v_bufobj.bo_flag &= ~BO_NOBUFS; BO_UNLOCK(&ump->um_odevvp->v_bufobj); atomic_store_rel_ptr((uintptr_t *)&ump->um_dev->si_mountpt, 0); mntfs_freevp(ump->um_devvp); vrele(ump->um_odevvp); dev_rel(ump->um_dev); mtx_destroy(UFS_MTX(ump)); sx_destroy(&ump->um_checkpath_lock); if (mp->mnt_gjprovider != NULL) { free(mp->mnt_gjprovider, M_UFSMNT); mp->mnt_gjprovider = NULL; } free(fs->fs_csp, M_UFSMNT); free(fs->fs_si, M_UFSMNT); free(fs, M_UFSMNT); free(ump, M_UFSMNT); mp->mnt_data = NULL; if (td->td_su == mp) { td->td_su = NULL; vfs_rel(mp); } return (error); fail: if (susp) vfs_write_resume(mp, VR_START_WRITE); fail1: #ifdef UFS_EXTATTR if (e_restart) { ufs_extattr_uepm_init(&ump->um_extattr); #ifdef UFS_EXTATTR_AUTOSTART (void) ufs_extattr_autostart(mp, td); #endif } #endif return (error); } /* * Flush out all the files in a filesystem. */ int ffs_flushfiles(struct mount *mp, int flags, struct thread *td) { struct ufsmount *ump; int qerror, error; ump = VFSTOUFS(mp); qerror = 0; #ifdef QUOTA if (mp->mnt_flag & MNT_QUOTA) { int i; error = vflush(mp, 0, SKIPSYSTEM|flags, td); if (error) return (error); for (i = 0; i < MAXQUOTAS; i++) { error = quotaoff(td, mp, i); if (error != 0) { if ((flags & EARLYFLUSH) == 0) return (error); else qerror = error; } } /* * Here we fall through to vflush again to ensure that * we have gotten rid of all the system vnodes, unless * quotas must not be closed. */ } #endif /* devvp is not locked there */ if (ump->um_devvp->v_vflag & VV_COPYONWRITE) { if ((error = vflush(mp, 0, SKIPSYSTEM | flags, td)) != 0) return (error); ffs_snapshot_unmount(mp); flags |= FORCECLOSE; /* * Here we fall through to vflush again to ensure * that we have gotten rid of all the system vnodes. */ } /* * Do not close system files if quotas were not closed, to be * able to sync the remaining dquots. The freeblks softupdate * workitems might hold a reference on a dquot, preventing * quotaoff() from completing. Next round of * softdep_flushworklist() iteration should process the * blockers, allowing the next run of quotaoff() to finally * flush held dquots. * * Otherwise, flush all the files. */ if (qerror == 0 && (error = vflush(mp, 0, flags, td)) != 0) return (error); /* * If this is a forcible unmount and there were any files that * were unlinked but still open, then vflush() will have * truncated and freed those files, which might have started * some trim work. Wait here for any trims to complete * and process the blkfrees which follow the trims. * This may create more dirty devvp buffers and softdep deps. */ if (ump->um_trim_tq != NULL) { while (ump->um_trim_inflight != 0) pause("ufsutr", hz); taskqueue_drain_all(ump->um_trim_tq); } /* * Flush filesystem metadata. */ vn_lock(ump->um_devvp, LK_EXCLUSIVE | LK_RETRY); error = VOP_FSYNC(ump->um_devvp, MNT_WAIT, td); VOP_UNLOCK(ump->um_devvp); return (error); } /* * Get filesystem statistics. */ static int ffs_statfs(struct mount *mp, struct statfs *sbp) { struct ufsmount *ump; struct fs *fs; ump = VFSTOUFS(mp); fs = ump->um_fs; if (fs->fs_magic != FS_UFS1_MAGIC && fs->fs_magic != FS_UFS2_MAGIC) panic("ffs_statfs"); sbp->f_version = STATFS_VERSION; sbp->f_bsize = fs->fs_fsize; sbp->f_iosize = fs->fs_bsize; sbp->f_blocks = fs->fs_dsize; UFS_LOCK(ump); sbp->f_bfree = fs->fs_cstotal.cs_nbfree * fs->fs_frag + fs->fs_cstotal.cs_nffree + dbtofsb(fs, fs->fs_pendingblocks); sbp->f_bavail = freespace(fs, fs->fs_minfree) + dbtofsb(fs, fs->fs_pendingblocks); sbp->f_files = fs->fs_ncg * fs->fs_ipg - UFS_ROOTINO; sbp->f_ffree = fs->fs_cstotal.cs_nifree + fs->fs_pendinginodes; UFS_UNLOCK(ump); sbp->f_namemax = UFS_MAXNAMLEN; return (0); } static bool sync_doupdate(struct inode *ip) { return ((ip->i_flag & (IN_ACCESS | IN_CHANGE | IN_MODIFIED | IN_UPDATE)) != 0); } static int ffs_sync_lazy_filter(struct vnode *vp, void *arg __unused) { struct inode *ip; /* * Flags are safe to access because ->v_data invalidation * is held off by listmtx. */ if (vp->v_type == VNON) return (false); ip = VTOI(vp); if (!sync_doupdate(ip) && (vp->v_iflag & VI_OWEINACT) == 0) return (false); return (true); } /* * For a lazy sync, we only care about access times, quotas and the * superblock. Other filesystem changes are already converted to * cylinder group blocks or inode blocks updates and are written to * disk by syncer. */ static int ffs_sync_lazy(struct mount *mp) { struct vnode *mvp, *vp; struct inode *ip; int allerror, error; allerror = 0; if ((mp->mnt_flag & MNT_NOATIME) != 0) { #ifdef QUOTA qsync(mp); #endif goto sbupdate; } MNT_VNODE_FOREACH_LAZY(vp, mp, mvp, ffs_sync_lazy_filter, NULL) { if (vp->v_type == VNON) { VI_UNLOCK(vp); continue; } ip = VTOI(vp); /* * The IN_ACCESS flag is converted to IN_MODIFIED by * ufs_close() and ufs_getattr() by the calls to * ufs_itimes_locked(), without subsequent UFS_UPDATE(). * Test also all the other timestamp flags too, to pick up * any other cases that could be missed. */ if (!sync_doupdate(ip) && (vp->v_iflag & VI_OWEINACT) == 0) { VI_UNLOCK(vp); continue; } if ((error = vget(vp, LK_EXCLUSIVE | LK_NOWAIT | LK_INTERLOCK)) != 0) continue; #ifdef QUOTA qsyncvp(vp); #endif if (sync_doupdate(ip)) error = ffs_update(vp, 0); if (error != 0) allerror = error; vput(vp); } sbupdate: if (VFSTOUFS(mp)->um_fs->fs_fmod != 0 && (error = ffs_sbupdate(VFSTOUFS(mp), MNT_LAZY, 0)) != 0) allerror = error; return (allerror); } /* * Go through the disk queues to initiate sandbagged IO; * go through the inodes to write those that have been modified; * initiate the writing of the super block if it has been modified. * * Note: we are always called with the filesystem marked busy using * vfs_busy(). */ static int ffs_sync(struct mount *mp, int waitfor) { struct vnode *mvp, *vp, *devvp; struct thread *td; struct inode *ip; struct ufsmount *ump = VFSTOUFS(mp); struct fs *fs; int error, count, lockreq, allerror = 0; int suspend; int suspended; int secondary_writes; int secondary_accwrites; int softdep_deps; int softdep_accdeps; struct bufobj *bo; suspend = 0; suspended = 0; td = curthread; fs = ump->um_fs; if (fs->fs_fmod != 0 && fs->fs_ronly != 0) panic("%s: ffs_sync: modification on read-only filesystem", fs->fs_fsmnt); if (waitfor == MNT_LAZY) { if (!rebooting) return (ffs_sync_lazy(mp)); waitfor = MNT_NOWAIT; } /* * Write back each (modified) inode. */ lockreq = LK_EXCLUSIVE | LK_NOWAIT; if (waitfor == MNT_SUSPEND) { suspend = 1; waitfor = MNT_WAIT; } if (waitfor == MNT_WAIT) lockreq = LK_EXCLUSIVE; lockreq |= LK_INTERLOCK; loop: /* Grab snapshot of secondary write counts */ MNT_ILOCK(mp); secondary_writes = mp->mnt_secondary_writes; secondary_accwrites = mp->mnt_secondary_accwrites; MNT_IUNLOCK(mp); /* Grab snapshot of softdep dependency counts */ softdep_get_depcounts(mp, &softdep_deps, &softdep_accdeps); MNT_VNODE_FOREACH_ALL(vp, mp, mvp) { /* * Depend on the vnode interlock to keep things stable enough * for a quick test. Since there might be hundreds of * thousands of vnodes, we cannot afford even a subroutine * call unless there's a good chance that we have work to do. */ if (vp->v_type == VNON) { VI_UNLOCK(vp); continue; } ip = VTOI(vp); if ((ip->i_flag & (IN_ACCESS | IN_CHANGE | IN_MODIFIED | IN_UPDATE)) == 0 && vp->v_bufobj.bo_dirty.bv_cnt == 0) { VI_UNLOCK(vp); continue; } if ((error = vget(vp, lockreq)) != 0) { if (error == ENOENT) { MNT_VNODE_FOREACH_ALL_ABORT(mp, mvp); goto loop; } continue; } #ifdef QUOTA qsyncvp(vp); #endif for (;;) { error = ffs_syncvnode(vp, waitfor, 0); if (error == ERELOOKUP) continue; if (error != 0) allerror = error; break; } vput(vp); } /* * Force stale filesystem control information to be flushed. */ if (waitfor == MNT_WAIT || rebooting) { if ((error = softdep_flushworklist(ump->um_mountp, &count, td))) allerror = error; if (ffs_fsfail_cleanup(ump, allerror)) allerror = 0; /* Flushed work items may create new vnodes to clean */ if (allerror == 0 && count) goto loop; } devvp = ump->um_devvp; bo = &devvp->v_bufobj; BO_LOCK(bo); if (bo->bo_numoutput > 0 || bo->bo_dirty.bv_cnt > 0) { BO_UNLOCK(bo); vn_lock(devvp, LK_EXCLUSIVE | LK_RETRY); error = VOP_FSYNC(devvp, waitfor, td); VOP_UNLOCK(devvp); if (MOUNTEDSOFTDEP(mp) && (error == 0 || error == EAGAIN)) error = ffs_sbupdate(ump, waitfor, 0); if (error != 0) allerror = error; if (ffs_fsfail_cleanup(ump, allerror)) allerror = 0; if (allerror == 0 && waitfor == MNT_WAIT) goto loop; } else if (suspend != 0) { if (softdep_check_suspend(mp, devvp, softdep_deps, softdep_accdeps, secondary_writes, secondary_accwrites) != 0) { MNT_IUNLOCK(mp); goto loop; /* More work needed */ } mtx_assert(MNT_MTX(mp), MA_OWNED); mp->mnt_kern_flag |= MNTK_SUSPEND2 | MNTK_SUSPENDED; MNT_IUNLOCK(mp); suspended = 1; } else BO_UNLOCK(bo); /* * Write back modified superblock. */ if (fs->fs_fmod != 0 && (error = ffs_sbupdate(ump, waitfor, suspended)) != 0) allerror = error; if (ffs_fsfail_cleanup(ump, allerror)) allerror = 0; return (allerror); } int ffs_vget(struct mount *mp, ino_t ino, int flags, struct vnode **vpp) { return (ffs_vgetf(mp, ino, flags, vpp, 0)); } int ffs_vgetf(struct mount *mp, ino_t ino, int flags, struct vnode **vpp, int ffs_flags) { struct fs *fs; struct inode *ip; struct ufsmount *ump; struct buf *bp; struct vnode *vp; daddr_t dbn; int error; MPASS((ffs_flags & (FFSV_REPLACE | FFSV_REPLACE_DOOMED)) == 0 || (flags & LK_EXCLUSIVE) != 0); error = vfs_hash_get(mp, ino, flags, curthread, vpp, NULL, NULL); if (error != 0) return (error); if (*vpp != NULL) { if ((ffs_flags & FFSV_REPLACE) == 0 || ((ffs_flags & FFSV_REPLACE_DOOMED) == 0 || !VN_IS_DOOMED(*vpp))) return (0); vgone(*vpp); vput(*vpp); } /* * We must promote to an exclusive lock for vnode creation. This * can happen if lookup is passed LOCKSHARED. */ if ((flags & LK_TYPE_MASK) == LK_SHARED) { flags &= ~LK_TYPE_MASK; flags |= LK_EXCLUSIVE; } /* * We do not lock vnode creation as it is believed to be too * expensive for such rare case as simultaneous creation of vnode * for same ino by different processes. We just allow them to race * and check later to decide who wins. Let the race begin! */ ump = VFSTOUFS(mp); fs = ump->um_fs; ip = uma_zalloc_smr(uma_inode, M_WAITOK | M_ZERO); /* Allocate a new vnode/inode. */ error = getnewvnode("ufs", mp, fs->fs_magic == FS_UFS1_MAGIC ? &ffs_vnodeops1 : &ffs_vnodeops2, &vp); if (error) { *vpp = NULL; uma_zfree_smr(uma_inode, ip); return (error); } /* * FFS supports recursive locking. */ lockmgr(vp->v_vnlock, LK_EXCLUSIVE | LK_NOWITNESS, NULL); VN_LOCK_AREC(vp); vp->v_data = ip; vp->v_bufobj.bo_bsize = fs->fs_bsize; ip->i_vnode = vp; ip->i_ump = ump; ip->i_number = ino; ip->i_ea_refs = 0; ip->i_nextclustercg = -1; ip->i_flag = fs->fs_magic == FS_UFS1_MAGIC ? 0 : IN_UFS2; ip->i_mode = 0; /* ensure error cases below throw away vnode */ cluster_init_vn(&ip->i_clusterw); #ifdef DIAGNOSTIC ufs_init_trackers(ip); #endif #ifdef QUOTA { int i; for (i = 0; i < MAXQUOTAS; i++) ip->i_dquot[i] = NODQUOT; } #endif if (ffs_flags & FFSV_FORCEINSMQ) vp->v_vflag |= VV_FORCEINSMQ; error = insmntque(vp, mp); if (error != 0) { uma_zfree_smr(uma_inode, ip); *vpp = NULL; return (error); } vp->v_vflag &= ~VV_FORCEINSMQ; error = vfs_hash_insert(vp, ino, flags, curthread, vpp, NULL, NULL); if (error != 0) return (error); if (*vpp != NULL) { /* * Calls from ffs_valloc() (i.e. FFSV_REPLACE set) * operate on empty inode, which must not be found by * other threads until fully filled. Vnode for empty * inode must be not re-inserted on the hash by other * thread, after removal by us at the beginning. */ MPASS((ffs_flags & FFSV_REPLACE) == 0); return (0); } if (I_IS_UFS1(ip)) ip->i_din1 = uma_zalloc(uma_ufs1, M_WAITOK); else ip->i_din2 = uma_zalloc(uma_ufs2, M_WAITOK); if ((ffs_flags & FFSV_NEWINODE) != 0) { /* New inode, just zero out its contents. */ if (I_IS_UFS1(ip)) memset(ip->i_din1, 0, sizeof(struct ufs1_dinode)); else memset(ip->i_din2, 0, sizeof(struct ufs2_dinode)); } else { /* Read the disk contents for the inode, copy into the inode. */ dbn = fsbtodb(fs, ino_to_fsba(fs, ino)); error = ffs_breadz(ump, ump->um_devvp, dbn, dbn, (int)fs->fs_bsize, NULL, NULL, 0, NOCRED, 0, NULL, &bp); if (error != 0) { /* * The inode does not contain anything useful, so it * would be misleading to leave it on its hash chain. * With mode still zero, it will be unlinked and * returned to the free list by vput(). */ vgone(vp); vput(vp); *vpp = NULL; return (error); } if ((error = ffs_load_inode(bp, ip, fs, ino)) != 0) { bqrelse(bp); vgone(vp); vput(vp); *vpp = NULL; return (error); } bqrelse(bp); } if (DOINGSOFTDEP(vp) && (!fs->fs_ronly || (ffs_flags & FFSV_FORCEINODEDEP) != 0)) softdep_load_inodeblock(ip); else ip->i_effnlink = ip->i_nlink; /* * Initialize the vnode from the inode, check for aliases. * Note that the underlying vnode may have changed. */ error = ufs_vinit(mp, I_IS_UFS1(ip) ? &ffs_fifoops1 : &ffs_fifoops2, &vp); if (error) { vgone(vp); vput(vp); *vpp = NULL; return (error); } /* * Finish inode initialization. */ if (vp->v_type != VFIFO) { /* FFS supports shared locking for all files except fifos. */ VN_LOCK_ASHARE(vp); } /* * Set up a generation number for this inode if it does not * already have one. This should only happen on old filesystems. */ if (ip->i_gen == 0) { while (ip->i_gen == 0) ip->i_gen = arc4random(); if ((vp->v_mount->mnt_flag & MNT_RDONLY) == 0) { UFS_INODE_SET_FLAG(ip, IN_MODIFIED); DIP_SET(ip, i_gen, ip->i_gen); } } #ifdef MAC if ((mp->mnt_flag & MNT_MULTILABEL) && ip->i_mode) { /* * If this vnode is already allocated, and we're running * multi-label, attempt to perform a label association * from the extended attributes on the inode. */ error = mac_vnode_associate_extattr(mp, vp); if (error) { /* ufs_inactive will release ip->i_devvp ref. */ vgone(vp); vput(vp); *vpp = NULL; return (error); } } #endif vn_set_state(vp, VSTATE_CONSTRUCTED); *vpp = vp; return (0); } /* * File handle to vnode * * Have to be really careful about stale file handles: * - check that the inode number is valid * - for UFS2 check that the inode number is initialized * - call ffs_vget() to get the locked inode * - check for an unallocated inode (i_mode == 0) * - check that the given client host has export rights and return * those rights via. exflagsp and credanonp */ static int ffs_fhtovp(struct mount *mp, struct fid *fhp, int flags, struct vnode **vpp) { struct ufid *ufhp; ufhp = (struct ufid *)fhp; return (ffs_inotovp(mp, ufhp->ufid_ino, ufhp->ufid_gen, flags, vpp, 0)); } /* * Return a vnode from a mounted filesystem for inode with specified * generation number. Return ESTALE if the inode with given generation * number no longer exists on that filesystem. */ int ffs_inotovp(struct mount *mp, ino_t ino, uint64_t gen, int lflags, struct vnode **vpp, int ffs_flags) { struct ufsmount *ump; struct vnode *nvp; struct inode *ip; struct fs *fs; struct cg *cgp; struct buf *bp; uint64_t cg; ump = VFSTOUFS(mp); fs = ump->um_fs; *vpp = NULL; if (ino < UFS_ROOTINO || ino >= fs->fs_ncg * fs->fs_ipg) return (ESTALE); /* * Need to check if inode is initialized because UFS2 does lazy * initialization and nfs_fhtovp can offer arbitrary inode numbers. */ if (fs->fs_magic == FS_UFS2_MAGIC) { cg = ino_to_cg(fs, ino); if (ffs_getcg(fs, ump->um_devvp, cg, 0, &bp, &cgp) != 0) return (ESTALE); if (ino >= cg * fs->fs_ipg + cgp->cg_initediblk) { brelse(bp); return (ESTALE); } brelse(bp); } if (ffs_vgetf(mp, ino, lflags, &nvp, ffs_flags) != 0) return (ESTALE); ip = VTOI(nvp); if (ip->i_mode == 0 || ip->i_gen != gen || ip->i_effnlink <= 0) { if (ip->i_mode == 0) vgone(nvp); vput(nvp); return (ESTALE); } vnode_create_vobject(nvp, DIP(ip, i_size), curthread); *vpp = nvp; return (0); } /* * Initialize the filesystem. */ static int ffs_init(struct vfsconf *vfsp) { ffs_susp_initialize(); softdep_initialize(); return (ufs_init(vfsp)); } /* * Undo the work of ffs_init(). */ static int ffs_uninit(struct vfsconf *vfsp) { int ret; ret = ufs_uninit(vfsp); softdep_uninitialize(); ffs_susp_uninitialize(); taskqueue_drain_all(taskqueue_thread); return (ret); } /* * Structure used to pass information from ffs_sbupdate to its * helper routine ffs_use_bwrite. */ struct devfd { struct ufsmount *ump; struct buf *sbbp; int waitfor; int suspended; int error; }; /* * Write a superblock and associated information back to disk. */ int ffs_sbupdate(struct ufsmount *ump, int waitfor, int suspended) { struct fs *fs; struct buf *sbbp; struct devfd devfd; fs = ump->um_fs; if (fs->fs_ronly == 1 && (ump->um_mountp->mnt_flag & (MNT_RDONLY | MNT_UPDATE)) != (MNT_RDONLY | MNT_UPDATE)) panic("ffs_sbupdate: write read-only filesystem"); /* * We use the superblock's buf to serialize calls to ffs_sbupdate(). */ sbbp = getblk(ump->um_devvp, btodb(fs->fs_sblockloc), (int)fs->fs_sbsize, 0, 0, 0); /* * Initialize info needed for write function. */ devfd.ump = ump; devfd.sbbp = sbbp; devfd.waitfor = waitfor; devfd.suspended = suspended; devfd.error = 0; return (ffs_sbput(&devfd, fs, fs->fs_sblockloc, ffs_use_bwrite)); } /* * Write function for use by filesystem-layer routines. */ static int ffs_use_bwrite(void *devfd, off_t loc, void *buf, int size) { struct devfd *devfdp; struct ufsmount *ump; struct buf *bp; struct fs *fs; int error; devfdp = devfd; ump = devfdp->ump; fs = ump->um_fs; /* * Writing the superblock summary information. */ if (loc != fs->fs_sblockloc) { bp = getblk(ump->um_devvp, btodb(loc), size, 0, 0, 0); bcopy(buf, bp->b_data, (uint64_t)size); if (devfdp->suspended) bp->b_flags |= B_VALIDSUSPWRT; if (devfdp->waitfor != MNT_WAIT) bawrite(bp); else if ((error = bwrite(bp)) != 0) devfdp->error = error; return (0); } /* * Writing the superblock itself. We need to do special checks for it. */ bp = devfdp->sbbp; if (ffs_fsfail_cleanup(ump, devfdp->error)) devfdp->error = 0; if (devfdp->error != 0) { brelse(bp); return (devfdp->error); } if (fs->fs_magic == FS_UFS1_MAGIC && fs->fs_sblockloc != SBLOCK_UFS1 && (fs->fs_old_flags & FS_FLAGS_UPDATED) == 0) { printf("WARNING: %s: correcting fs_sblockloc from %jd to %d\n", fs->fs_fsmnt, fs->fs_sblockloc, SBLOCK_UFS1); fs->fs_sblockloc = SBLOCK_UFS1; } if (fs->fs_magic == FS_UFS2_MAGIC && fs->fs_sblockloc != SBLOCK_UFS2 && (fs->fs_old_flags & FS_FLAGS_UPDATED) == 0) { printf("WARNING: %s: correcting fs_sblockloc from %jd to %d\n", fs->fs_fsmnt, fs->fs_sblockloc, SBLOCK_UFS2); fs->fs_sblockloc = SBLOCK_UFS2; } if (MOUNTEDSOFTDEP(ump->um_mountp)) softdep_setup_sbupdate(ump, (struct fs *)bp->b_data, bp); UFS_LOCK(ump); bcopy((caddr_t)fs, bp->b_data, (uint64_t)fs->fs_sbsize); UFS_UNLOCK(ump); fs = (struct fs *)bp->b_data; fs->fs_fmod = 0; ffs_oldfscompat_write(fs); fs->fs_si = NULL; /* Recalculate the superblock hash */ fs->fs_ckhash = ffs_calc_sbhash(fs); if (devfdp->suspended) bp->b_flags |= B_VALIDSUSPWRT; if (devfdp->waitfor != MNT_WAIT) bawrite(bp); else if ((error = bwrite(bp)) != 0) devfdp->error = error; return (devfdp->error); } static int ffs_extattrctl(struct mount *mp, int cmd, struct vnode *filename_vp, int attrnamespace, const char *attrname) { #ifdef UFS_EXTATTR return (ufs_extattrctl(mp, cmd, filename_vp, attrnamespace, attrname)); #else return (vfs_stdextattrctl(mp, cmd, filename_vp, attrnamespace, attrname)); #endif } static void ffs_ifree(struct ufsmount *ump, struct inode *ip) { if (ump->um_fstype == UFS1 && ip->i_din1 != NULL) uma_zfree(uma_ufs1, ip->i_din1); else if (ip->i_din2 != NULL) uma_zfree(uma_ufs2, ip->i_din2); uma_zfree_smr(uma_inode, ip); } static int dobkgrdwrite = 1; SYSCTL_INT(_debug, OID_AUTO, dobkgrdwrite, CTLFLAG_RW, &dobkgrdwrite, 0, "Do background writes (honoring the BV_BKGRDWRITE flag)?"); /* * Complete a background write started from bwrite. */ static void ffs_backgroundwritedone(struct buf *bp) { struct bufobj *bufobj; struct buf *origbp; #ifdef SOFTUPDATES if (!LIST_EMPTY(&bp->b_dep) && (bp->b_ioflags & BIO_ERROR) != 0) softdep_handle_error(bp); #endif /* * Find the original buffer that we are writing. */ bufobj = bp->b_bufobj; BO_LOCK(bufobj); if ((origbp = gbincore(bp->b_bufobj, bp->b_lblkno)) == NULL) panic("backgroundwritedone: lost buffer"); /* * We should mark the cylinder group buffer origbp as * dirty, to not lose the failed write. */ if ((bp->b_ioflags & BIO_ERROR) != 0) origbp->b_vflags |= BV_BKGRDERR; BO_UNLOCK(bufobj); /* * Process dependencies then return any unfinished ones. */ if (!LIST_EMPTY(&bp->b_dep) && (bp->b_ioflags & BIO_ERROR) == 0) buf_complete(bp); #ifdef SOFTUPDATES if (!LIST_EMPTY(&bp->b_dep)) softdep_move_dependencies(bp, origbp); #endif /* * This buffer is marked B_NOCACHE so when it is released * by biodone it will be tossed. Clear B_IOSTARTED in case of error. */ bp->b_flags |= B_NOCACHE; bp->b_flags &= ~(B_CACHE | B_IOSTARTED); pbrelvp(bp); /* * Prevent brelse() from trying to keep and re-dirtying bp on * errors. It causes b_bufobj dereference in * bdirty()/reassignbuf(), and b_bufobj was cleared in * pbrelvp() above. */ if ((bp->b_ioflags & BIO_ERROR) != 0) bp->b_flags |= B_INVAL; bufdone(bp); BO_LOCK(bufobj); /* * Clear the BV_BKGRDINPROG flag in the original buffer * and awaken it if it is waiting for the write to complete. * If BV_BKGRDINPROG is not set in the original buffer it must * have been released and re-instantiated - which is not legal. */ KASSERT((origbp->b_vflags & BV_BKGRDINPROG), ("backgroundwritedone: lost buffer2")); origbp->b_vflags &= ~BV_BKGRDINPROG; if (origbp->b_vflags & BV_BKGRDWAIT) { origbp->b_vflags &= ~BV_BKGRDWAIT; wakeup(&origbp->b_xflags); } BO_UNLOCK(bufobj); } /* * Write, release buffer on completion. (Done by iodone * if async). Do not bother writing anything if the buffer * is invalid. * * Note that we set B_CACHE here, indicating that buffer is * fully valid and thus cacheable. This is true even of NFS * now so we set it generally. This could be set either here * or in biodone() since the I/O is synchronous. We put it * here. */ static int ffs_bufwrite(struct buf *bp) { struct buf *newbp; struct cg *cgp; CTR3(KTR_BUF, "bufwrite(%p) vp %p flags %X", bp, bp->b_vp, bp->b_flags); if (bp->b_flags & B_INVAL) { brelse(bp); return (0); } if (!BUF_ISLOCKED(bp)) panic("bufwrite: buffer is not busy???"); /* * If a background write is already in progress, delay * writing this block if it is asynchronous. Otherwise * wait for the background write to complete. */ BO_LOCK(bp->b_bufobj); if (bp->b_vflags & BV_BKGRDINPROG) { if (bp->b_flags & B_ASYNC) { BO_UNLOCK(bp->b_bufobj); bdwrite(bp); return (0); } bp->b_vflags |= BV_BKGRDWAIT; msleep(&bp->b_xflags, BO_LOCKPTR(bp->b_bufobj), PRIBIO, "bwrbg", 0); if (bp->b_vflags & BV_BKGRDINPROG) panic("bufwrite: still writing"); } bp->b_vflags &= ~BV_BKGRDERR; BO_UNLOCK(bp->b_bufobj); /* * If this buffer is marked for background writing and we * do not have to wait for it, make a copy and write the * copy so as to leave this buffer ready for further use. * * This optimization eats a lot of memory. If we have a page * or buffer shortfall we can't do it. */ if (dobkgrdwrite && (bp->b_xflags & BX_BKGRDWRITE) && (bp->b_flags & B_ASYNC) && !vm_page_count_severe() && !buf_dirty_count_severe()) { KASSERT(bp->b_iodone == NULL, ("bufwrite: needs chained iodone (%p)", bp->b_iodone)); /* get a new block */ newbp = geteblk(bp->b_bufsize, GB_NOWAIT_BD); if (newbp == NULL) goto normal_write; KASSERT(buf_mapped(bp), ("Unmapped cg")); memcpy(newbp->b_data, bp->b_data, bp->b_bufsize); BO_LOCK(bp->b_bufobj); bp->b_vflags |= BV_BKGRDINPROG; BO_UNLOCK(bp->b_bufobj); newbp->b_xflags |= (bp->b_xflags & BX_FSPRIV) | BX_BKGRDMARKER; newbp->b_lblkno = bp->b_lblkno; newbp->b_blkno = bp->b_blkno; newbp->b_offset = bp->b_offset; newbp->b_iodone = ffs_backgroundwritedone; newbp->b_flags |= B_ASYNC; newbp->b_flags &= ~B_INVAL; pbgetvp(bp->b_vp, newbp); #ifdef SOFTUPDATES /* * Move over the dependencies. If there are rollbacks, * leave the parent buffer dirtied as it will need to * be written again. */ if (LIST_EMPTY(&bp->b_dep) || softdep_move_dependencies(bp, newbp) == 0) bundirty(bp); #else bundirty(bp); #endif /* * Initiate write on the copy, release the original. The * BKGRDINPROG flag prevents it from going away until * the background write completes. We have to recalculate * its check hash in case the buffer gets freed and then * reconstituted from the buffer cache during a later read. */ if ((bp->b_xflags & BX_CYLGRP) != 0) { cgp = (struct cg *)bp->b_data; cgp->cg_ckhash = 0; cgp->cg_ckhash = calculate_crc32c(~0L, bp->b_data, bp->b_bcount); } bqrelse(bp); bp = newbp; } else /* Mark the buffer clean */ bundirty(bp); /* Let the normal bufwrite do the rest for us */ normal_write: /* * If we are writing a cylinder group, update its time. */ if ((bp->b_xflags & BX_CYLGRP) != 0) { cgp = (struct cg *)bp->b_data; cgp->cg_old_time = cgp->cg_time = time_second; } return (bufwrite(bp)); } static void ffs_geom_strategy(struct bufobj *bo, struct buf *bp) { struct vnode *vp; struct buf *tbp; int error, nocopy; /* * This is the bufobj strategy for the private VCHR vnodes * used by FFS to access the underlying storage device. * We override the default bufobj strategy and thus bypass * VOP_STRATEGY() for these vnodes. */ vp = bo2vnode(bo); KASSERT(bp->b_vp == NULL || bp->b_vp->v_type != VCHR || bp->b_vp->v_rdev == NULL || bp->b_vp->v_rdev->si_mountpt == NULL || VFSTOUFS(bp->b_vp->v_rdev->si_mountpt) == NULL || vp == VFSTOUFS(bp->b_vp->v_rdev->si_mountpt)->um_devvp, ("ffs_geom_strategy() with wrong vp")); if (bp->b_iocmd == BIO_WRITE) { if ((bp->b_flags & B_VALIDSUSPWRT) == 0 && bp->b_vp != NULL && bp->b_vp->v_mount != NULL && (bp->b_vp->v_mount->mnt_kern_flag & MNTK_SUSPENDED) != 0) panic("ffs_geom_strategy: bad I/O"); nocopy = bp->b_flags & B_NOCOPY; bp->b_flags &= ~(B_VALIDSUSPWRT | B_NOCOPY); if ((vp->v_vflag & VV_COPYONWRITE) && nocopy == 0 && vp->v_rdev->si_snapdata != NULL) { if ((bp->b_flags & B_CLUSTER) != 0) { runningbufwakeup(bp); TAILQ_FOREACH(tbp, &bp->b_cluster.cluster_head, b_cluster.cluster_entry) { error = ffs_copyonwrite(vp, tbp); if (error != 0 && error != EOPNOTSUPP) { bp->b_error = error; bp->b_ioflags |= BIO_ERROR; bp->b_flags &= ~B_BARRIER; bufdone(bp); return; } } (void)runningbufclaim(bp, bp->b_bufsize); } else { error = ffs_copyonwrite(vp, bp); if (error != 0 && error != EOPNOTSUPP) { bp->b_error = error; bp->b_ioflags |= BIO_ERROR; bp->b_flags &= ~B_BARRIER; bufdone(bp); return; } } } #ifdef SOFTUPDATES if ((bp->b_flags & B_CLUSTER) != 0) { TAILQ_FOREACH(tbp, &bp->b_cluster.cluster_head, b_cluster.cluster_entry) { if (!LIST_EMPTY(&tbp->b_dep)) buf_start(tbp); } } else { if (!LIST_EMPTY(&bp->b_dep)) buf_start(bp); } #endif /* * Check for metadata that needs check-hashes and update them. */ switch (bp->b_xflags & BX_FSPRIV) { case BX_CYLGRP: ((struct cg *)bp->b_data)->cg_ckhash = 0; ((struct cg *)bp->b_data)->cg_ckhash = calculate_crc32c(~0L, bp->b_data, bp->b_bcount); break; case BX_SUPERBLOCK: case BX_INODE: case BX_INDIR: case BX_DIR: printf("Check-hash write is unimplemented!!!\n"); break; case 0: break; default: printf("multiple buffer types 0x%b\n", (bp->b_xflags & BX_FSPRIV), PRINT_UFS_BUF_XFLAGS); break; } } if (bp->b_iocmd != BIO_READ && ffs_enxio_enable) bp->b_xflags |= BX_CVTENXIO; g_vfs_strategy(bo, bp); } int ffs_own_mount(const struct mount *mp) { if (mp->mnt_op == &ufs_vfsops) return (1); return (0); } #ifdef DDB #ifdef SOFTUPDATES /* defined in ffs_softdep.c */ extern void db_print_ffs(struct ufsmount *ump); DB_SHOW_COMMAND(ffs, db_show_ffs) { struct mount *mp; struct ufsmount *ump; if (have_addr) { ump = VFSTOUFS((struct mount *)addr); db_print_ffs(ump); return; } TAILQ_FOREACH(mp, &mountlist, mnt_list) { if (!strcmp(mp->mnt_stat.f_fstypename, ufs_vfsconf.vfc_name)) db_print_ffs(VFSTOUFS(mp)); } } #endif /* SOFTUPDATES */ #endif /* DDB */ diff --git a/sys/ufs/ufs/dinode.h b/sys/ufs/ufs/dinode.h index 5265326b0b5b..2d41a45f5752 100644 --- a/sys/ufs/ufs/dinode.h +++ b/sys/ufs/ufs/dinode.h @@ -1,227 +1,227 @@ /*- * SPDX-License-Identifier: (BSD-2-Clause AND BSD-3-Clause) * * Copyright (c) 2002 Networks Associates Technology, Inc. * All rights reserved. * * This software was developed for the FreeBSD Project by Marshall * Kirk McKusick and Network Associates Laboratories, the Security * Research Division of Network Associates, Inc. under DARPA/SPAWAR * contract N66001-01-C-8035 ("CBOSS"), as part of the DARPA CHATS * research program * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * Copyright (c) 1982, 1989, 1993 * The Regents of the University of California. All rights reserved. * (c) UNIX System Laboratories, Inc. * All or some portions of this file are derived from material licensed * to the University of California by American Telephone and Telegraph * Co. or Unix System Laboratories, Inc. and are reproduced herein with * the permission of UNIX System Laboratories, Inc. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. The names of the authors may not be used to endorse or promote * products derived from this software without specific prior written * permission. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #ifndef _UFS_UFS_DINODE_H_ #define _UFS_UFS_DINODE_H_ /* * The root inode is the root of the filesystem. Inode 0 can't be used for * normal purposes and historically bad blocks were linked to inode 1, thus * the root inode is 2. (Inode 1 is no longer used for this purpose, however * numerous dump tapes make this assumption, so we are stuck with it). */ #define UFS_ROOTINO ((ino_t)2) /* * The Whiteout inode# is a dummy non-zero inode number which will * never be allocated to a real file. It is used as a place holder * in the directory entry which has been tagged as a DT_WHT entry. * See the comments about UFS_ROOTINO above. */ #define UFS_WINO ((ino_t)1) /* * The size of physical and logical block numbers and time fields in UFS. */ typedef int32_t ufs1_daddr_t; typedef int64_t ufs2_daddr_t; typedef int64_t ufs_lbn_t; typedef int64_t ufs_time_t; /* File permissions. */ #define IEXEC 0000100 /* Executable. */ #define IWRITE 0000200 /* Writeable. */ #define IREAD 0000400 /* Readable. */ #define ISVTX 0001000 /* Sticky bit. */ #define ISGID 0002000 /* Set-gid. */ #define ISUID 0004000 /* Set-uid. */ /* File types. */ #define IFMT 0170000 /* Mask of file type. */ #define IFIFO 0010000 /* Named pipe (fifo). */ #define IFCHR 0020000 /* Character device. */ #define IFDIR 0040000 /* Directory file. */ #define IFBLK 0060000 /* Block device. */ #define IFREG 0100000 /* Regular file. */ #define IFLNK 0120000 /* Symbolic link. */ #define IFSOCK 0140000 /* UNIX domain socket. */ #define IFWHT 0160000 /* Whiteout. */ /* * Each UFS filesystem version defines the on-disk format of its dinode. * * A UFS2 dinode contains all the meta-data associated with a UFS2 file. * This structure defines the on-disk format of a UFS2 dinode. Since * this structure describes an on-disk structure, all its fields * are defined by types with precise widths. */ #define UFS_NXADDR 2 /* External addresses in inode. */ #define UFS_NDADDR 12 /* Direct addresses in inode. */ #define UFS_NIADDR 3 /* Indirect addresses in inode. */ struct ufs2_dinode { uint16_t di_mode; /* 0: IFMT, permissions; see below. */ uint16_t di_nlink; /* 2: File link count. */ uint32_t di_uid; /* 4: File owner. */ uint32_t di_gid; /* 8: File group. */ uint32_t di_blksize; /* 12: Inode blocksize. */ uint64_t di_size; /* 16: File byte count. */ uint64_t di_blocks; /* 24: Blocks actually held. */ ufs_time_t di_atime; /* 32: Last access time. */ ufs_time_t di_mtime; /* 40: Last modified time. */ ufs_time_t di_ctime; /* 48: Last inode change time. */ ufs_time_t di_birthtime; /* 56: Inode creation time. */ int32_t di_mtimensec; /* 64: Last modified time. */ int32_t di_atimensec; /* 68: Last access time. */ int32_t di_ctimensec; /* 72: Last inode change time. */ int32_t di_birthnsec; /* 76: Inode creation time. */ uint32_t di_gen; /* 80: Generation number. */ uint32_t di_kernflags; /* 84: Kernel flags. */ uint32_t di_flags; /* 88: Status flags (chflags). */ uint32_t di_extsize; /* 92: External attributes size. */ ufs2_daddr_t di_extb[UFS_NXADDR];/* 96: External attributes block. */ union { struct { ufs2_daddr_t di_db /* 112: Direct disk blocks. */ [UFS_NDADDR]; ufs2_daddr_t di_ib /* 208: Indirect disk blocks. */ [UFS_NIADDR]; }; char di_shortlink /* 112: Embedded symbolic link. */ [(UFS_NDADDR + UFS_NIADDR) * sizeof(ufs2_daddr_t)]; }; uint64_t di_modrev; /* 232: i_modrev for NFSv4 */ union { uint32_t di_freelink; /* 240: SUJ: Next unlinked inode. */ uint32_t di_dirdepth; /* 240: IFDIR: depth from root dir */ }; uint32_t di_ckhash; /* 244: if CK_INODE, its check-hash */ uint32_t di_spare[2]; /* 248: Reserved; currently unused */ }; /* * The di_db fields may be overlaid with other information for * file types that do not have associated disk storage. Block * and character devices overlay the first data block with their * dev_t value. Short symbolic links place their path in the * di_db area. */ #define di_rdev di_db[0] /* * A UFS1 dinode contains all the meta-data associated with a UFS1 file. * This structure defines the on-disk format of a UFS1 dinode. Since * this structure describes an on-disk structure, all its fields * are defined by types with precise widths. */ struct ufs1_dinode { uint16_t di_mode; /* 0: IFMT, permissions; see below. */ uint16_t di_nlink; /* 2: File link count. */ union { uint32_t di_freelink; /* 4: SUJ: Next unlinked inode. */ uint32_t di_dirdepth; /* 4: IFDIR: depth from root dir */ }; uint64_t di_size; /* 8: File byte count. */ - int32_t di_atime; /* 16: Last access time. */ + uint32_t di_atime; /* 16: Last access time. */ int32_t di_atimensec; /* 20: Last access time. */ - int32_t di_mtime; /* 24: Last modified time. */ + uint32_t di_mtime; /* 24: Last modified time. */ int32_t di_mtimensec; /* 28: Last modified time. */ - int32_t di_ctime; /* 32: Last inode change time. */ + uint32_t di_ctime; /* 32: Last inode change time. */ int32_t di_ctimensec; /* 36: Last inode change time. */ union { struct { ufs1_daddr_t di_db /* 40: Direct disk blocks. */ [UFS_NDADDR]; ufs1_daddr_t di_ib /* 88: Indirect disk blocks. */ [UFS_NIADDR]; }; char di_shortlink /* 40: Embedded symbolic link. */ [(UFS_NDADDR + UFS_NIADDR) * sizeof(ufs1_daddr_t)]; }; uint32_t di_flags; /* 100: Status flags (chflags). */ uint32_t di_blocks; /* 104: Blocks actually held. */ uint32_t di_gen; /* 108: Generation number. */ uint32_t di_uid; /* 112: File owner. */ uint32_t di_gid; /* 116: File group. */ uint64_t di_modrev; /* 120: i_modrev for NFSv4 */ }; #define UFS_LINK_MAX 65500 /* leave a few spare for special values */ /* * These structures hold or reference an on-disk dinode. */ union dinode { struct ufs1_dinode dp1; struct ufs2_dinode dp2; }; union dinodep { struct ufs1_dinode *dp1; struct ufs2_dinode *dp2; }; #endif /* _UFS_UFS_DINODE_H_ */