diff --git a/sbin/fsck_ffs/fsck.h b/sbin/fsck_ffs/fsck.h index a32e6feb3bea..df09a373bdd2 100644 --- a/sbin/fsck_ffs/fsck.h +++ b/sbin/fsck_ffs/fsck.h @@ -1,545 +1,545 @@ /*- * SPDX-License-Identifier: BSD-3-Clause and BSD-2-Clause * * Copyright (c) 2002 Networks Associates Technology, Inc. * All rights reserved. * * This software was developed for the FreeBSD Project by Marshall * Kirk McKusick and Network Associates Laboratories, the Security * Research Division of Network Associates, Inc. under DARPA/SPAWAR * contract N66001-01-C-8035 ("CBOSS"), as part of the DARPA CHATS * research program. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * Copyright (c) 1980, 1986, 1993 * The Regents of the University of California. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * @(#)fsck.h 8.4 (Berkeley) 5/9/95 * $FreeBSD$ */ #ifndef _FSCK_H_ #define _FSCK_H_ #include #include #include #include #define MAXDUP 10 /* limit on dup blks (per inode) */ #define MAXBAD 10 /* limit on bad blks (per inode) */ #define MINBUFS 100 /* minimum number of buffers required */ #define INOBUFSIZE 64*1024 /* size of buffer to read inodes in pass1 */ #define ZEROBUFSIZE (dev_bsize * 128) /* size of zero buffer used by -Z */ union dinode { struct ufs1_dinode dp1; struct ufs2_dinode dp2; }; #define DIP(dp, field) \ ((sblock.fs_magic == FS_UFS1_MAGIC) ? \ (dp)->dp1.field : (dp)->dp2.field) #define DIP_SET(dp, field, val) do { \ if (sblock.fs_magic == FS_UFS1_MAGIC) \ (dp)->dp1.field = (val); \ else \ (dp)->dp2.field = (val); \ } while (0) /* * Each inode on the file system is described by the following structure. * The linkcnt is initially set to the value in the inode. Each time it * is found during the descent in passes 2, 3, and 4 the count is * decremented. Any inodes whose count is non-zero after pass 4 needs to * have its link count adjusted by the value remaining in ino_linkcnt. */ struct inostat { u_char ino_state; /* state of inode, see below */ u_char ino_type:4; /* type of inode */ u_char ino_idtype:4; /* idesc id_type, SNAP or ADDR */ u_short ino_linkcnt; /* number of links not found */ }; /* * Inode states. */ #define USTATE 0x1 /* inode not allocated */ #define FSTATE 0x2 /* inode is file */ #define FZLINK 0x3 /* inode is file with a link count of zero */ #define DSTATE 0x4 /* inode is directory */ -#define DZLINK 0x5 /* inode is directory with a zero link count */ +#define DZLINK 0x5 /* inode is directory with a zero link count */ #define DFOUND 0x6 /* directory found during descent */ /* 0x7 UNUSED - see S_IS_DVALID() definition */ #define DCLEAR 0x8 /* directory is to be cleared */ #define FCLEAR 0x9 /* file is to be cleared */ /* DUNFOUND === (state == DSTATE || state == DZLINK) */ #define S_IS_DUNFOUND(state) (((state) & ~0x1) == DSTATE) /* DVALID === (state == DSTATE || state == DZLINK || state == DFOUND) */ #define S_IS_DVALID(state) (((state) & ~0x3) == DSTATE) #define INO_IS_DUNFOUND(ino) S_IS_DUNFOUND(inoinfo(ino)->ino_state) #define INO_IS_DVALID(ino) S_IS_DVALID(inoinfo(ino)->ino_state) /* * Inode state information is contained on per cylinder group lists * which are described by the following structure. */ extern struct inostatlist { long il_numalloced; /* number of inodes allocated in this cg */ struct inostat *il_stat;/* inostat info for this cylinder group */ } *inostathead; /* * Structure to reference a dinode. */ struct inode { struct bufarea *i_bp; /* buffer containing the dinode */ union dinode *i_dp; /* pointer to dinode in buffer */ ino_t i_number; /* inode number */ }; /* * Size of hash tables */ #define HASHSIZE 2048 #define HASH(x) ((x * 2654435761) & (HASHSIZE - 1)) /* * buffer cache structure. */ struct bufarea { TAILQ_ENTRY(bufarea) b_list; /* LRU buffer queue */ LIST_ENTRY(bufarea) b_hash; /* hash list */ ufs2_daddr_t b_bno; /* disk block number */ int b_size; /* size of I/O */ int b_errs; /* I/O error */ int b_flags; /* B_ flags below */ int b_type; /* BT_ type below */ int b_refcnt; /* ref count of users */ int b_index; /* for BT_LEVEL, ptr index */ /* for BT_INODES, first inum */ union { char *b_buf; /* buffer space */ ufs1_daddr_t *b_indir1; /* UFS1 indirect block */ ufs2_daddr_t *b_indir2; /* UFS2 indirect block */ struct fs *b_fs; /* super block */ struct cg *b_cg; /* cylinder group */ struct ufs1_dinode *b_dinode1; /* UFS1 inode block */ struct ufs2_dinode *b_dinode2; /* UFS2 inode block */ } b_un; }; #define IBLK(bp, i) \ ((sblock.fs_magic == FS_UFS1_MAGIC) ? \ (bp)->b_un.b_indir1[i] : (bp)->b_un.b_indir2[i]) #define IBLK_SET(bp, i, val) do { \ if (sblock.fs_magic == FS_UFS1_MAGIC) \ (bp)->b_un.b_indir1[i] = (val); \ else \ (bp)->b_un.b_indir2[i] = (val); \ } while (0) /* * Buffer flags */ #define B_DIRTY 0x00000001 /* Buffer is dirty */ /* * Type of data in buffer */ #define BT_UNKNOWN 0 /* Buffer type is unknown */ #define BT_SUPERBLK 1 /* Buffer holds a superblock */ #define BT_CYLGRP 2 /* Buffer holds a cylinder group map */ #define BT_LEVEL1 3 /* Buffer holds single level indirect */ #define BT_LEVEL2 4 /* Buffer holds double level indirect */ #define BT_LEVEL3 5 /* Buffer holds triple level indirect */ #define BT_EXTATTR 6 /* Buffer holds external attribute data */ #define BT_INODES 7 /* Buffer holds inodes */ #define BT_DIRDATA 8 /* Buffer holds directory data */ #define BT_DATA 9 /* Buffer holds user data */ #define BT_NUMBUFTYPES 10 #define BT_NAMES { \ "unknown", \ "Superblock", \ "Cylinder Group", \ "Single Level Indirect", \ "Double Level Indirect", \ "Triple Level Indirect", \ "External Attribute", \ "Inode Block", \ "Directory Contents", \ "User Data" } extern char *buftype[]; #define BT_BUFTYPE(type) \ type < BT_NUMBUFTYPES ? buftype[type] : buftype[BT_UNKNOWN] extern long readcnt[BT_NUMBUFTYPES]; extern long totalreadcnt[BT_NUMBUFTYPES]; extern struct timespec readtime[BT_NUMBUFTYPES]; extern struct timespec totalreadtime[BT_NUMBUFTYPES]; extern struct timespec startprog; extern struct bufarea *icachebp; /* inode cache buffer */ extern struct bufarea sblk; /* file system superblock */ extern struct bufarea *pdirbp; /* current directory contents */ #define dirty(bp) do { \ if (fswritefd < 0) \ pfatal("SETTING DIRTY FLAG IN READ_ONLY MODE\n"); \ else \ (bp)->b_flags |= B_DIRTY; \ } while (0) #define initbarea(bp, type) do { \ (bp)->b_bno = (ufs2_daddr_t)-4; \ (bp)->b_size = 0; \ (bp)->b_errs = 0; \ (bp)->b_flags = 0; \ (bp)->b_type = type; \ (bp)->b_refcnt = 0; \ (bp)->b_index = 0; \ } while (0) #define sbdirty() dirty(&sblk) #define sblock (*sblk.b_un.b_fs) enum fixstate {DONTKNOW, NOFIX, FIX, IGNORE}; extern ino_t cursnapshot; struct inodesc { enum fixstate id_fix; /* policy on fixing errors */ int (*id_func)(struct inodesc *); /* function to be applied to blocks of inode */ struct bufarea *id_bp; /* ckinode: buffer with indirect pointers */ union dinode *id_dp; /* ckinode: dinode being traversed */ ino_t id_number; /* inode number described */ ino_t id_parent; /* for DATA nodes, their parent */ ufs_lbn_t id_lbn; /* logical block number of current block */ ufs2_daddr_t id_blkno; /* current block number being examined */ int id_level; /* level of indirection of this block */ int id_numfrags; /* number of frags contained in block */ ufs_lbn_t id_lballoc; /* pass1: last LBN that is allocated */ off_t id_filesize; /* for DATA nodes, the size of the directory */ ufs2_daddr_t id_entryno;/* for DATA nodes, current entry number */ int id_loc; /* for DATA nodes, current location in dir */ struct direct *id_dirp; /* for DATA nodes, ptr to current entry */ char *id_name; /* for DATA nodes, name to find or enter */ char id_type; /* type of descriptor, DATA, ADDR, or SNAP */ }; /* file types */ #define DATA 1 /* a directory */ #define SNAP 2 /* a snapshot */ #define ADDR 3 /* anything but a directory or a snapshot */ /* * Linked list of duplicate blocks. * * The list is composed of two parts. The first part of the * list (from duplist through the node pointed to by muldup) * contains a single copy of each duplicate block that has been * found. The second part of the list (from muldup to the end) * contains duplicate blocks that have been found more than once. * To check if a block has been found as a duplicate it is only * necessary to search from duplist through muldup. To find the * total number of times that a block has been found as a duplicate * the entire list must be searched for occurrences of the block * in question. The following diagram shows a sample list where * w (found twice), x (found once), y (found three times), and z * (found once) are duplicate block numbers: * * w -> y -> x -> z -> y -> w -> y * ^ ^ * | | * duplist muldup */ struct dups { struct dups *next; ufs2_daddr_t dup; }; -extern struct dups *duplist; /* head of dup list */ -extern struct dups *muldup; /* end of unique duplicate dup block numbers */ +extern struct dups *duplist; /* head of dup list */ +extern struct dups *muldup; /* end of unique duplicate dup block numbers */ /* * Inode cache data structures. */ struct inoinfo { SLIST_ENTRY(inoinfo) i_hash; /* hash list */ ino_t i_number; /* inode number of this entry */ ino_t i_parent; /* inode number of parent */ ino_t i_dotdot; /* inode number of `..' */ size_t i_isize; /* size of inode */ u_int i_depth; /* depth of directory from root */ u_int i_flags; /* flags, see below */ u_int i_numblks; /* size of block array in bytes */ ufs2_daddr_t i_blks[1]; /* actually longer */ }; extern SLIST_HEAD(inohash, inoinfo) *inphash; extern struct inoinfo **inpsort; /* * flags for struct inoinfo */ #define INFO_NEW 0x0000001 /* replaced broken directory */ extern long dirhash, inplast; extern unsigned long numdirs, listmax; extern long countdirs; /* number of directories we actually found */ #define MIBSIZE 3 /* size of fsck sysctl MIBs */ extern int adjblkcnt[MIBSIZE]; /* MIB cmd to adjust inode block count */ extern int adjrefcnt[MIBSIZE]; /* MIB cmd to adjust inode reference count */ extern int adjndir[MIBSIZE]; /* MIB cmd to adjust number of directories */ extern int adjnbfree[MIBSIZE]; /* MIB cmd to adjust number of free blocks */ extern int adjnifree[MIBSIZE]; /* MIB cmd to adjust number of free inodes */ extern int adjnffree[MIBSIZE]; /* MIB cmd to adjust number of free frags */ extern int adjnumclusters[MIBSIZE]; /* MIB cmd adjust number of free clusters */ extern int adjdepth[MIBSIZE]; /* MIB cmd to adjust directory depth count */ extern int freefiles[MIBSIZE]; /* MIB cmd to free a set of files */ extern int freedirs[MIBSIZE]; /* MIB cmd to free a set of directories */ extern int freeblks[MIBSIZE]; /* MIB cmd to free a set of data blocks */ extern int setsize[MIBSIZE]; /* MIB cmd to set inode size */ extern struct fsck_cmd cmd; /* sysctl file system update commands */ extern int bkgrdcheck; /* determine if background check is possible */ extern int bkgrdsumadj; /* whether the kernel has the ability to adjust the superblock summary fields */ extern off_t bflag; /* location of alternate super block */ extern int bkgrdflag; /* use a snapshot to run on an active system */ extern char *blockmap; /* ptr to primary blk allocation map */ extern char *cdevname; /* name of device being checked */ extern int cgheader_corrupt; /* one or more CG headers are corrupt */ extern char ckclean; /* only do work if not cleanly unmounted */ extern int ckhashadd; /* check hashes to be added */ extern char *copybuf; /* buffer to copy snapshot blocks */ extern int cvtlevel; /* convert to newer file system format */ extern long dev_bsize; /* computed value of DEV_BSIZE */ extern u_int real_dev_bsize; /* actual disk sector size, not overridden */ extern int debug; /* output debugging info */ extern int Eflag; /* delete empty data blocks */ extern int fsmodified; /* 1 => write done to file system */ extern int fsreadfd; /* file descriptor for reading file system */ extern int fswritefd; /* file descriptor for writing file system */ extern char havesb; /* superblock has been read */ extern int inoopt; /* trim out unused inodes */ extern ino_t lfdir; /* lost & found directory inode number */ extern int lfmode; /* lost & found directory creation mode */ extern const char *lfname; /* lost & found directory name */ extern ufs2_daddr_t maxfsblock; /* number of blocks in the file system */ extern ino_t maxino; /* number of inodes in file system */ extern ufs2_daddr_t n_blks; /* number of blocks in use */ extern ino_t n_files; /* number of files in use */ extern char nflag; /* assume a no response */ extern char preen; /* just fix normal inconsistencies */ extern char rerun; /* rerun fsck. Only used in non-preen mode */ extern char resolved; /* cleared if unresolved changes => not clean */ extern int returntosingle; /* 1 => return to single user mode on exit */ extern int sbhashfailed; /* when reading superblock check hash failed */ extern long secsize; /* actual disk sector size */ extern char skipclean; /* skip clean file systems if preening */ extern int snapcnt; /* number of active snapshots */ extern struct inode snaplist[FSMAXSNAP + 1]; /* list of active snapshots */ extern char snapname[BUFSIZ]; /* when doing snapshots, the name of the file */ extern int sujrecovery; /* 1 => doing check using the journal */ extern int surrender; /* Give up if reads fail */ extern char usedsoftdep; /* just fix soft dependency inconsistencies */ extern int wantrestart; /* Restart fsck on early termination */ extern char yflag; /* assume a yes response */ extern int zflag; /* zero unused directory space */ extern int Zflag; /* zero empty data blocks */ extern volatile sig_atomic_t got_siginfo; /* received a SIGINFO */ extern volatile sig_atomic_t got_sigalarm; /* received a SIGALRM */ #define clearinode(dp) \ if (sblock.fs_magic == FS_UFS1_MAGIC) { \ (dp)->dp1 = zino.dp1; \ } else { \ (dp)->dp2 = zino.dp2; \ } extern union dinode zino; #define setbmap(blkno) setbit(blockmap, blkno) #define testbmap(blkno) isset(blockmap, blkno) #define clrbmap(blkno) clrbit(blockmap, blkno) #define STOP 0x01 #define SKIP 0x02 #define KEEPON 0x04 #define ALTERED 0x08 #define FOUND 0x10 #define EEXIT 8 /* Standard error exit. */ #define ERERUN 16 /* fsck needs to be re-run. */ #define ERESTART -1 int flushentry(void); /* * Wrapper for malloc() that flushes the cylinder group cache to try * to get space. */ static inline void* Malloc(size_t size) { void *retval; while ((retval = malloc(size)) == NULL) if (flushentry() == 0) break; return (retval); } /* * Wrapper for calloc() that flushes the cylinder group cache to try * to get space. */ static inline void* Calloc(size_t cnt, size_t size) { void *retval; while ((retval = calloc(cnt, size)) == NULL) if (flushentry() == 0) break; return (retval); } struct fstab; void adjust(struct inodesc *, int lcnt); void alarmhandler(int sig); ufs2_daddr_t allocblk(long cg, long frags, ufs2_daddr_t (*checkblkavail) (ufs2_daddr_t blkno, long frags)); ino_t allocdir(ino_t parent, ino_t request, int mode); ino_t allocino(ino_t request, int type); void binval(struct bufarea *); void blkerror(ino_t ino, const char *type, ufs2_daddr_t blk); char *blockcheck(char *name); int blread(int fd, char *buf, ufs2_daddr_t blk, long size); void bufinit(void); void blwrite(int fd, char *buf, ufs2_daddr_t blk, ssize_t size); void blerase(int fd, ufs2_daddr_t blk, long size); void blzero(int fd, ufs2_daddr_t blk, long size); void brelse(struct bufarea *); struct inoinfo *cacheino(union dinode *dp, ino_t inumber); void catch(int); void catchquit(int); void cgdirty(struct bufarea *); struct bufarea *cglookup(int cg); int changeino(ino_t dir, const char *name, ino_t newnum, int depth); void check_blkcnt(struct inode *ip); int check_cgmagic(int cg, struct bufarea *cgbp); void rebuild_cg(int cg, struct bufarea *cgbp); void check_dirdepth(struct inoinfo *inp); int chkfilesize(mode_t mode, u_int64_t filesize); int chkrange(ufs2_daddr_t blk, int cnt); void ckfini(int markclean); int ckinode(union dinode *dp, struct inodesc *); void clri(struct inodesc *, const char *type, int flag); int clearentry(struct inodesc *); void copyonwrite(struct fs *, struct bufarea *, ufs2_daddr_t (*checkblkavail)(ufs2_daddr_t, long)); void direrror(ino_t ino, const char *errmesg); int dirscan(struct inodesc *); int dofix(struct inodesc *, const char *msg); int eascan(struct inodesc *, struct ufs2_dinode *dp); void fileerror(ino_t cwd, ino_t ino, const char *errmesg); void finalIOstats(void); int findino(struct inodesc *); int findname(struct inodesc *); void flush(int fd, struct bufarea *bp); int freeblock(struct inodesc *); void freedirino(ino_t ino, ino_t parent); void freeino(ino_t ino); void freeinodebuf(void); void fsckinit(void); void fsutilinit(void); int ftypeok(union dinode *dp); void getblk(struct bufarea *bp, ufs2_daddr_t blk, long size); struct bufarea *getdatablk(ufs2_daddr_t blkno, long size, int type); struct inoinfo *getinoinfo(ino_t inumber); union dinode *getnextinode(ino_t inumber, int rebuiltcg); void getpathname(char *namebuf, ino_t curdir, ino_t ino); void ginode(ino_t, struct inode *); void gjournal_check(const char *filesys); void infohandler(int sig); void irelse(struct inode *); ufs2_daddr_t ino_blkatoff(union dinode *, ino_t, ufs_lbn_t, int *, struct bufarea **); void inocleanup(void); void inodirty(struct inode *); struct inostat *inoinfo(ino_t inum); void IOstats(char *what); int linkup(ino_t orphan, ino_t parentdir, char *name); int makeentry(ino_t parent, ino_t ino, const char *name); int openfilesys(char *dev); void panic(const char *fmt, ...) __printflike(1, 2); void pass1(void); void pass1b(void); int pass1check(struct inodesc *); void pass2(void); void pass3(void); void pass4(void); void pass5(void); void pfatal(const char *fmt, ...) __printflike(1, 2); void propagate(void); void prtbuf(struct bufarea *, const char *, ...) __printflike(2, 3); void prtinode(struct inode *); void pwarn(const char *fmt, ...) __printflike(1, 2); int readsb(int listerr); int removecachedino(ino_t); int reply(const char *question); void rwerror(const char *mesg, ufs2_daddr_t blk); void sblock_init(void); void setinodebuf(int, ino_t); int setup(char *dev); int snapblkfree(struct fs *, ufs2_daddr_t, long, ino_t, ufs2_daddr_t (*)(ufs2_daddr_t, long)); void snapremove(ino_t); void snapflush(ufs2_daddr_t (*checkblkavail)(ufs2_daddr_t, long)); ufs2_daddr_t std_checkblkavail(ufs2_daddr_t blkno, long frags); ufs2_daddr_t suj_checkblkavail(ufs2_daddr_t, long); int suj_check(const char *filesys); void update_maps(struct cg *, struct cg*, int); #endif /* !_FSCK_H_ */ diff --git a/sbin/fsck_ffs/gjournal.c b/sbin/fsck_ffs/gjournal.c index f3bdd5a66907..03bf7f08b662 100644 --- a/sbin/fsck_ffs/gjournal.c +++ b/sbin/fsck_ffs/gjournal.c @@ -1,171 +1,164 @@ /*- * SPDX-License-Identifier: BSD-3-Clause AND BSD-2-Clause * * Copyright (c) 2006 Pawel Jakub Dawidek * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * Copyright (c) 1982, 1986, 1989, 1993 * The Regents of the University of California. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); #include #include #include #include "fsck.h" void gjournal_check(const char *filesys) { struct fs *fs; struct inode ip; union dinode *dp; struct bufarea *cgbp; struct cg *cgp; struct inodesc idesc; uint8_t *inosused; ino_t cino, ino; int cg; fs = &sblock; /* Are there any unreferenced inodes in this file system? */ if (fs->fs_unrefs == 0) { - //printf("No unreferenced inodes.\n"); sbdirty(); ckfini(1); return; } for (cg = 0; cg < fs->fs_ncg; cg++) { /* Show progress if requested. */ if (got_siginfo) { printf("%s: phase j: cyl group %d of %d (%d%%)\n", cdevname, cg, fs->fs_ncg, cg * 100 / fs->fs_ncg); got_siginfo = 0; } if (got_sigalarm) { setproctitle("%s pj %d%%", cdevname, cg * 100 / fs->fs_ncg); got_sigalarm = 0; } cgbp = cglookup(cg); cgp = cgbp->b_un.b_cg; if (!check_cgmagic(cg, cgbp)) { rerun = 1; ckfini(0); return; } /* Are there any unreferenced inodes in this cylinder group? */ if (cgp->cg_unrefs == 0) continue; - //printf("Analizing cylinder group %d (count=%d)\n", cg, cgp->cg_unrefs); /* * Now go through the list of all inodes in this cylinder group * to find unreferenced ones. */ inosused = cg_inosused(cgp); for (cino = 0; cino < fs->fs_ipg; cino++) { ino = fs->fs_ipg * cg + cino; /* Unallocated? Skip it. */ if (isclr(inosused, cino)) continue; ginode(ino, &ip); dp = ip.i_dp; /* Not a regular file nor directory? Skip it. */ if (!S_ISREG(dp->dp2.di_mode) && !S_ISDIR(dp->dp2.di_mode)) { irelse(&ip); continue; } /* Has reference(s)? Skip it. */ if (dp->dp2.di_nlink > 0) { irelse(&ip); continue; } /* printf("Clearing inode=%d (size=%jd)\n", ino, (intmax_t)dp->dp2->di_size); */ /* Deallocate it. */ memset(&idesc, 0, sizeof(struct inodesc)); idesc.id_type = ADDR; idesc.id_func = freeblock; idesc.id_number = ino; clri(&idesc, "UNREF", 1); clrbit(inosused, cino); /* Update position of last used inode. */ if (ino < cgp->cg_irotor) cgp->cg_irotor = ino; /* Update statistics. */ cgp->cg_unrefs--; fs->fs_unrefs--; /* Zero-fill the inode. */ dp->dp2 = zino.dp2; /* Write the inode back. */ inodirty(&ip); irelse(&ip); cgdirty(cgbp); - if (cgp->cg_unrefs == 0) { - //printf("No more unreferenced inodes in cg=%d.\n", cg); + if (cgp->cg_unrefs == 0) break; - } } /* - * If there are no more unreferenced inodes, there is no need to - * check other cylinder groups. + * If there are no more unreferenced inodes, there is no + * need to check other cylinder groups. */ - if (fs->fs_unrefs == 0) { - //printf("No more unreferenced inodes (cg=%d/%d).\n", cg, - // fs->fs_ncg); + if (fs->fs_unrefs == 0) break; - } } /* Write back updated statistics and super-block. */ sbdirty(); ckfini(1); } diff --git a/sbin/fsck_ffs/globs.c b/sbin/fsck_ffs/globs.c index 2340636fe3a9..4e1b4a266baf 100644 --- a/sbin/fsck_ffs/globs.c +++ b/sbin/fsck_ffs/globs.c @@ -1,177 +1,177 @@ /*- * SPDX-License-Identifier: BSD-3-Clause * * Copyright (c) 1980, 1986, 1993 * The Regents of the University of California. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #if 0 #ifndef lint static const char copyright[] = "@(#) Copyright (c) 1980, 1986, 1993\n\ The Regents of the University of California. All rights reserved.\n"; #endif /* not lint */ #ifndef lint static char sccsid[] = "@(#)main.c 8.6 (Berkeley) 5/14/95"; #endif /* not lint */ #endif #include __FBSDID("$FreeBSD$"); #include #include #include #include #include "fsck.h" long readcnt[BT_NUMBUFTYPES]; long totalreadcnt[BT_NUMBUFTYPES]; struct timespec readtime[BT_NUMBUFTYPES]; struct timespec totalreadtime[BT_NUMBUFTYPES]; struct timespec startprog; struct bufarea sblk; /* file system superblock */ struct bufarea *pdirbp; /* current directory contents */ ino_t cursnapshot; long dirhash, inplast; unsigned long numdirs, listmax; long countdirs; /* number of directories we actually found */ -int adjrefcnt[MIBSIZE]; /* MIB command to adjust inode reference cnt */ -int adjblkcnt[MIBSIZE]; /* MIB command to adjust inode block count */ -int setsize[MIBSIZE]; /* MIB command to set inode size */ -int adjndir[MIBSIZE]; /* MIB command to adjust number of directories */ -int adjnbfree[MIBSIZE]; /* MIB command to adjust number of free blocks */ -int adjnifree[MIBSIZE]; /* MIB command to adjust number of free inodes */ -int adjnffree[MIBSIZE]; /* MIB command to adjust number of free frags */ -int adjnumclusters[MIBSIZE]; /* MIB command to adjust number of free clusters */ +int adjrefcnt[MIBSIZE]; /* MIB cmd to adjust inode reference cnt */ +int adjblkcnt[MIBSIZE]; /* MIB cmd to adjust inode block count */ +int setsize[MIBSIZE]; /* MIB cmd to set inode size */ +int adjndir[MIBSIZE]; /* MIB cmd to adjust number of directories */ +int adjnbfree[MIBSIZE]; /* MIB cmd to adjust number of free blocks */ +int adjnifree[MIBSIZE]; /* MIB cmd to adjust number of free inodes */ +int adjnffree[MIBSIZE]; /* MIB cmd to adjust number of free frags */ +int adjnumclusters[MIBSIZE]; /* MIB cmd to adjust number of free clusters */ int adjdepth[MIBSIZE]; /* MIB cmd to adjust directory depth count */ -int freefiles[MIBSIZE]; /* MIB command to free a set of files */ -int freedirs[MIBSIZE]; /* MIB command to free a set of directories */ -int freeblks[MIBSIZE]; /* MIB command to free a set of data blocks */ +int freefiles[MIBSIZE]; /* MIB cmd to free a set of files */ +int freedirs[MIBSIZE]; /* MIB cmd to free a set of directories */ +int freeblks[MIBSIZE]; /* MIB cmd to free a set of data blocks */ struct fsck_cmd cmd; /* sysctl file system update commands */ char snapname[BUFSIZ]; /* when doing snapshots, the name of the file */ char *cdevname; /* name of device being checked */ long dev_bsize; /* computed value of DEV_BSIZE */ long secsize; /* actual disk sector size */ u_int real_dev_bsize; /* actual disk sector size, not overridden */ char nflag; /* assume a no response */ char yflag; /* assume a yes response */ int bkgrdflag; /* use a snapshot to run on an active system */ off_t bflag; /* location of alternate super block */ int debug; /* output debugging info */ int Eflag; /* delete empty data blocks */ int Zflag; /* zero empty data blocks */ int zflag; /* zero unused directory space */ int inoopt; /* trim out unused inodes */ char ckclean; /* only do work if not cleanly unmounted */ int cvtlevel; /* convert to newer file system format */ int ckhashadd; /* check hashes to be added */ int bkgrdcheck; /* determine if background check is possible */ -int bkgrdsumadj; /* whether the kernel have ability to adjust superblock summary */ +int bkgrdsumadj; /* kernel able to adjust superblock summary */ char usedsoftdep; /* just fix soft dependency inconsistencies */ char preen; /* just fix normal inconsistencies */ char rerun; /* rerun fsck. Only used in non-preen mode */ int returntosingle; /* 1 => return to single user mode on exit */ char resolved; /* cleared if unresolved changes => not clean */ int sbhashfailed; /* when reading superblock check hash failed */ char havesb; /* superblock has been read */ char skipclean; /* skip clean file systems if preening */ int fsmodified; /* 1 => write done to file system */ int fsreadfd; /* file descriptor for reading file system */ int fswritefd; /* file descriptor for writing file system */ int surrender; /* Give up if reads fail */ int wantrestart; /* Restart fsck on early termination */ ufs2_daddr_t maxfsblock; /* number of blocks in the file system */ char *blockmap; /* ptr to primary blk allocation map */ ino_t maxino; /* number of inodes in file system */ ino_t lfdir; /* lost & found directory inode number */ const char *lfname; /* lost & found directory name */ int lfmode; /* lost & found directory creation mode */ ufs2_daddr_t n_blks; /* number of blocks in use */ int cgheader_corrupt; /* one or more CG headers are corrupt */ ino_t n_files; /* number of files in use */ volatile sig_atomic_t got_siginfo; /* received a SIGINFO */ volatile sig_atomic_t got_sigalarm; /* received a SIGALRM */ union dinode zino; struct dups *duplist; struct dups *muldup; struct inostatlist *inostathead; void fsckinit(void) { bzero(readcnt, sizeof(long) * BT_NUMBUFTYPES); bzero(totalreadcnt, sizeof(long) * BT_NUMBUFTYPES); bzero(readtime, sizeof(struct timespec) * BT_NUMBUFTYPES); bzero(totalreadtime, sizeof(struct timespec) * BT_NUMBUFTYPES); bzero(&startprog, sizeof(struct timespec)); bzero(&sblk, sizeof(struct bufarea)); cursnapshot = 0; listmax = numdirs = dirhash = inplast = 0; countdirs = 0; bzero(adjrefcnt, sizeof(int) * MIBSIZE); bzero(adjblkcnt, sizeof(int) * MIBSIZE); bzero(setsize, sizeof(int) * MIBSIZE); bzero(adjndir, sizeof(int) * MIBSIZE); bzero(adjnbfree, sizeof(int) * MIBSIZE); bzero(adjnifree, sizeof(int) * MIBSIZE); bzero(adjnffree, sizeof(int) * MIBSIZE); bzero(adjnumclusters, sizeof(int) * MIBSIZE); bzero(adjdepth, sizeof(int) * MIBSIZE); bzero(freefiles, sizeof(int) * MIBSIZE); bzero(freedirs, sizeof(int) * MIBSIZE); bzero(freeblks, sizeof(int) * MIBSIZE); bzero(&cmd, sizeof(struct fsck_cmd)); bzero(snapname, sizeof(char) * BUFSIZ); cdevname = NULL; dev_bsize = 0; secsize = 0; real_dev_bsize = 0; bkgrdsumadj = 0; usedsoftdep = 0; rerun = 0; returntosingle = 0; resolved = 0; havesb = 0; fsmodified = 0; sbhashfailed = 0; fsreadfd = -1; fswritefd = -1; maxfsblock = 0; maxino = 0; lfdir = 0; lfname = "lost+found"; lfmode = 0700; n_blks = 0; n_files = 0; cgheader_corrupt = 0; got_siginfo = 0; got_sigalarm = 0; bzero(&zino.dp1, sizeof(struct ufs1_dinode)); bzero(&zino.dp2, sizeof(struct ufs2_dinode)); } diff --git a/sbin/fsck_ffs/main.c b/sbin/fsck_ffs/main.c index df86cb5fb107..dab614d61bf4 100644 --- a/sbin/fsck_ffs/main.c +++ b/sbin/fsck_ffs/main.c @@ -1,740 +1,742 @@ /*- * SPDX-License-Identifier: BSD-3-Clause * * Copyright (c) 1980, 1986, 1993 * The Regents of the University of California. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #if 0 #ifndef lint static const char copyright[] = "@(#) Copyright (c) 1980, 1986, 1993\n\ The Regents of the University of California. All rights reserved.\n"; #endif /* not lint */ #ifndef lint static char sccsid[] = "@(#)main.c 8.6 (Berkeley) 5/14/95"; #endif /* not lint */ #endif #include __FBSDID("$FreeBSD$"); #define _WANT_P_OSREL #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include "fsck.h" static int restarts; static void usage(void) __dead2; static intmax_t argtoimax(int flag, const char *req, const char *str, int base); static int checkfilesys(char *filesys); static int setup_bkgrdchk(struct statfs *mntp, int sbrdfailed, char **filesys); int main(int argc, char *argv[]) { int ch; struct rlimit rlimit; struct itimerval itimerval; int fsret; int ret = 0; sync(); skipclean = 1; inoopt = 0; while ((ch = getopt(argc, argv, "b:Bc:CdEfFm:npRrSyZz")) != -1) { switch (ch) { case 'b': skipclean = 0; bflag = argtoimax('b', "number", optarg, 10); printf("Alternate super block location: %jd\n", bflag); break; case 'B': bkgrdflag = 1; break; case 'c': skipclean = 0; cvtlevel = argtoimax('c', "conversion level", optarg, 10); if (cvtlevel < 3) errx(EEXIT, "cannot do level %d conversion", cvtlevel); break; case 'd': debug++; break; case 'E': Eflag++; break; case 'f': skipclean = 0; break; case 'F': bkgrdcheck = 1; break; case 'm': lfmode = argtoimax('m', "mode", optarg, 8); if (lfmode &~ 07777) errx(EEXIT, "bad mode to -m: %o", lfmode); printf("** lost+found creation mode %o\n", lfmode); break; case 'n': nflag++; yflag = 0; break; case 'p': preen++; /*FALLTHROUGH*/ case 'C': ckclean++; break; case 'R': wantrestart = 1; break; case 'r': inoopt++; break; case 'S': surrender = 1; break; case 'y': yflag++; nflag = 0; break; case 'Z': Zflag++; break; case 'z': zflag++; break; default: usage(); } } argc -= optind; argv += optind; if (!argc) usage(); if (bkgrdflag && cvtlevel > 0) { pfatal("CANNOT CONVERT A SNAPSHOT\n"); exit(EEXIT); } if (signal(SIGINT, SIG_IGN) != SIG_IGN) (void)signal(SIGINT, catch); if (ckclean) (void)signal(SIGQUIT, catchquit); signal(SIGINFO, infohandler); if (bkgrdflag) { signal(SIGALRM, alarmhandler); itimerval.it_interval.tv_sec = 5; itimerval.it_interval.tv_usec = 0; itimerval.it_value.tv_sec = 5; itimerval.it_value.tv_usec = 0; setitimer(ITIMER_REAL, &itimerval, NULL); } /* * Push up our allowed memory limit so we can cope * with huge file systems. */ if (getrlimit(RLIMIT_DATA, &rlimit) == 0) { rlimit.rlim_cur = rlimit.rlim_max; (void)setrlimit(RLIMIT_DATA, &rlimit); } while (argc > 0) { if ((fsret = checkfilesys(*argv)) == ERESTART) continue; ret |= fsret; argc--; argv++; } if (returntosingle) ret = 2; exit(ret); } static intmax_t argtoimax(int flag, const char *req, const char *str, int base) { char *cp; intmax_t ret; ret = strtoimax(str, &cp, base); if (cp == str || *cp) errx(EEXIT, "-%c flag requires a %s", flag, req); return (ret); } /* * Check the specified file system. */ /* ARGSUSED */ static int checkfilesys(char *filesys) { ufs2_daddr_t n_ffree, n_bfree; struct dups *dp; struct statfs *mntp; intmax_t blks, files; size_t size; int sbreadfailed, ofsmodified; fsutilinit(); fsckinit(); cdevname = filesys; if (debug && ckclean) pwarn("starting\n"); /* * Make best effort to get the disk name. Check first to see * if it is listed among the mounted file systems. Failing that * check to see if it is listed in /etc/fstab. */ mntp = getmntpoint(filesys); if (mntp != NULL) filesys = mntp->f_mntfromname; else filesys = blockcheck(filesys); /* * If -F flag specified, check to see whether a background check * is possible and needed. If possible and needed, exit with * status zero. Otherwise exit with status non-zero. A non-zero * exit status will cause a foreground check to be run. */ sblock_init(); sbreadfailed = 0; if (openfilesys(filesys) == 0 || readsb(0) == 0) sbreadfailed = 1; if (bkgrdcheck) { if (sbreadfailed) exit(3); /* Cannot read superblock */ /* Earlier background failed or journaled */ if (sblock.fs_flags & (FS_NEEDSFSCK | FS_SUJ)) exit(4); if ((sblock.fs_flags & FS_DOSOFTDEP) == 0) exit(5); /* Not running soft updates */ size = MIBSIZE; if (sysctlnametomib("vfs.ffs.adjrefcnt", adjrefcnt, &size) < 0) exit(6); /* Lacks kernel support */ if ((mntp == NULL && sblock.fs_clean == 1) || (mntp != NULL && (sblock.fs_flags & FS_UNCLEAN) == 0)) exit(7); /* Filesystem clean, report it now */ exit(0); } if (ckclean && skipclean) { /* * If file system is gjournaled, check it here. */ if (sbreadfailed) exit(3); /* Cannot read superblock */ if (bkgrdflag == 0 && (nflag || (fswritefd = open(filesys, O_WRONLY)) < 0)) { fswritefd = -1; if (preen) pfatal("NO WRITE ACCESS"); printf(" (NO WRITE)"); } if ((sblock.fs_flags & FS_GJOURNAL) != 0) { if (sblock.fs_clean == 1) { pwarn("FILE SYSTEM CLEAN; SKIPPING CHECKS\n"); exit(0); } if ((sblock.fs_flags & (FS_UNCLEAN | FS_NEEDSFSCK)) == 0) { bufinit(); gjournal_check(filesys); if (chkdoreload(mntp, pwarn) == 0) exit(0); exit(4); } else { pfatal("FULL FSCK NEEDED, CANNOT RUN FAST " "FSCK\n"); } } close(fswritefd); fswritefd = -1; } if (bkgrdflag) { switch (setup_bkgrdchk(mntp, sbreadfailed, &filesys)) { case -1: /* filesystem clean */ goto clean; case 0: /* cannot do background, give up */ exit(EEXIT); case 1: /* doing background check, preen rules apply */ preen = 1; break; } } switch (setup(filesys)) { case 0: if (preen) pfatal("CAN'T CHECK FILE SYSTEM."); return (EEXIT); case -1: clean: pwarn("clean, %ld free ", (long)(sblock.fs_cstotal.cs_nffree + sblock.fs_frag * sblock.fs_cstotal.cs_nbfree)); printf("(%jd frags, %jd blocks, %.1f%% fragmentation)\n", (intmax_t)sblock.fs_cstotal.cs_nffree, (intmax_t)sblock.fs_cstotal.cs_nbfree, sblock.fs_cstotal.cs_nffree * 100.0 / sblock.fs_dsize); return (0); } /* * Determine if we can and should do journal recovery. */ if ((sblock.fs_flags & FS_SUJ) == FS_SUJ) { - if ((sblock.fs_flags & FS_NEEDSFSCK) != FS_NEEDSFSCK && skipclean) { + if ((sblock.fs_flags & FS_NEEDSFSCK) != FS_NEEDSFSCK && + skipclean) { sujrecovery = 1; if (suj_check(filesys) == 0) { - printf("\n***** FILE SYSTEM MARKED CLEAN *****\n"); + pwarn("\n**** FILE SYSTEM MARKED CLEAN ****\n"); if (chkdoreload(mntp, pwarn) == 0) exit(0); exit(4); } sujrecovery = 0; - printf("** Skipping journal, falling through to full fsck\n\n"); + pwarn("Skipping journal, " + "falling through to full fsck\n"); } if (fswritefd != -1) { /* * Write the superblock so we don't try to recover the * journal on another pass. If this is the only change * to the filesystem, we do not want it to be called * out as modified. */ sblock.fs_mtime = time(NULL); sbdirty(); ofsmodified = fsmodified; flush(fswritefd, &sblk); fsmodified = ofsmodified; } } /* * If the filesystem was run on an old kernel that did not * support check hashes, clear the check-hash flags so that * we do not try to verify them. */ if ((sblock.fs_flags & FS_METACKHASH) == 0) sblock.fs_metackhash = 0; /* * If we are running on a kernel that can provide check hashes * that are not yet enabled for the filesystem and we are * running manually without the -y flag, offer to add any * supported check hashes that are not already enabled. */ ckhashadd = 0; if (preen == 0 && yflag == 0 && sblock.fs_magic != FS_UFS1_MAGIC && fswritefd != -1 && getosreldate() >= P_OSREL_CK_CYLGRP) { if ((sblock.fs_metackhash & CK_CYLGRP) == 0 && reply("ADD CYLINDER GROUP CHECK-HASH PROTECTION") != 0) { ckhashadd |= CK_CYLGRP; sblock.fs_metackhash |= CK_CYLGRP; } if ((sblock.fs_metackhash & CK_SUPERBLOCK) == 0 && getosreldate() >= P_OSREL_CK_SUPERBLOCK && reply("ADD SUPERBLOCK CHECK-HASH PROTECTION") != 0) { ckhashadd |= CK_SUPERBLOCK; sblock.fs_metackhash |= CK_SUPERBLOCK; } if ((sblock.fs_metackhash & CK_INODE) == 0 && getosreldate() >= P_OSREL_CK_INODE && reply("ADD INODE CHECK-HASH PROTECTION") != 0) { ckhashadd |= CK_INODE; sblock.fs_metackhash |= CK_INODE; } #ifdef notyet if ((sblock.fs_metackhash & CK_INDIR) == 0 && getosreldate() >= P_OSREL_CK_INDIR && reply("ADD INDIRECT BLOCK CHECK-HASH PROTECTION") != 0) { ckhashadd |= CK_INDIR; sblock.fs_metackhash |= CK_INDIR; } if ((sblock.fs_metackhash & CK_DIR) == 0 && getosreldate() >= P_OSREL_CK_DIR && reply("ADD DIRECTORY CHECK-HASH PROTECTION") != 0) { ckhashadd |= CK_DIR; sblock.fs_metackhash |= CK_DIR; } #endif /* notyet */ if (ckhashadd != 0) { sblock.fs_flags |= FS_METACKHASH; sbdirty(); } } /* * Cleared if any questions answered no. Used to decide if * the superblock should be marked clean. */ resolved = 1; /* * 1: scan inodes tallying blocks used */ if (preen == 0) { printf("** Last Mounted on %s\n", sblock.fs_fsmnt); if (mntp != NULL && mntp->f_flags & MNT_ROOTFS) printf("** Root file system\n"); printf("** Phase 1 - Check Blocks and Sizes\n"); } clock_gettime(CLOCK_REALTIME_PRECISE, &startprog); pass1(); IOstats("Pass1"); /* * 1b: locate first references to duplicates, if any */ if (duplist) { if (preen || usedsoftdep) pfatal("INTERNAL ERROR: DUPS WITH %s%s%s", preen ? "-p" : "", (preen && usedsoftdep) ? " AND " : "", usedsoftdep ? "SOFTUPDATES" : ""); printf("** Phase 1b - Rescan For More DUPS\n"); pass1b(); IOstats("Pass1b"); } /* * 2: traverse directories from root to mark all connected directories */ if (preen == 0) printf("** Phase 2 - Check Pathnames\n"); pass2(); IOstats("Pass2"); /* * 3: scan inodes looking for disconnected directories */ if (preen == 0) printf("** Phase 3 - Check Connectivity\n"); pass3(); IOstats("Pass3"); /* * 4: scan inodes looking for disconnected files; check reference counts */ if (preen == 0) printf("** Phase 4 - Check Reference Counts\n"); pass4(); IOstats("Pass4"); /* * 5: check and repair resource counts in cylinder groups */ if (preen == 0) printf("** Phase 5 - Check Cyl groups\n"); snapflush(std_checkblkavail); if (cgheader_corrupt) { printf("PHASE 5 SKIPPED DUE TO CORRUPT CYLINDER GROUP " "HEADER(S)\n\n"); } else { pass5(); IOstats("Pass5"); } /* * print out summary statistics */ n_ffree = sblock.fs_cstotal.cs_nffree; n_bfree = sblock.fs_cstotal.cs_nbfree; files = maxino - UFS_ROOTINO - sblock.fs_cstotal.cs_nifree - n_files; blks = n_blks + sblock.fs_ncg * (cgdmin(&sblock, 0) - cgsblock(&sblock, 0)); blks += cgsblock(&sblock, 0) - cgbase(&sblock, 0); blks += howmany(sblock.fs_cssize, sblock.fs_fsize); blks = maxfsblock - (n_ffree + sblock.fs_frag * n_bfree) - blks; if (bkgrdflag && (files > 0 || blks > 0)) { countdirs = sblock.fs_cstotal.cs_ndir - countdirs; pwarn("Reclaimed: %ld directories, %jd files, %jd fragments\n", countdirs, files - countdirs, blks); } pwarn("%ld files, %jd used, %ju free ", (long)n_files, (intmax_t)n_blks, (uintmax_t)n_ffree + sblock.fs_frag * n_bfree); printf("(%ju frags, %ju blocks, %.1f%% fragmentation)\n", (uintmax_t)n_ffree, (uintmax_t)n_bfree, n_ffree * 100.0 / sblock.fs_dsize); if (debug) { if (files < 0) printf("%jd inodes missing\n", -files); if (blks < 0) printf("%jd blocks missing\n", -blks); if (duplist != NULL) { printf("The following duplicate blocks remain:"); for (dp = duplist; dp; dp = dp->next) printf(" %jd,", (intmax_t)dp->dup); printf("\n"); } } duplist = (struct dups *)0; muldup = (struct dups *)0; inocleanup(); if (fsmodified) { sblock.fs_time = time(NULL); sbdirty(); } if (cvtlevel && (sblk.b_flags & B_DIRTY) != 0) { /* * Write out the duplicate super blocks */ if (sbput(fswritefd, &sblock, sblock.fs_ncg) == 0) fsmodified = 1; } if (rerun) resolved = 0; /* * Check to see if the file system is mounted read-write. */ if (bkgrdflag == 0 && mntp != NULL && (mntp->f_flags & MNT_RDONLY) == 0) resolved = 0; ckfini(resolved); if (fsmodified && !preen) printf("\n***** FILE SYSTEM WAS MODIFIED *****\n"); if (rerun) { if (wantrestart && (restarts++ < 10) && (preen || reply("RESTART"))) return (ERESTART); printf("\n***** PLEASE RERUN FSCK *****\n"); } if (chkdoreload(mntp, pwarn) != 0) { if (!fsmodified) return (0); if (!preen) printf("\n***** REBOOT NOW *****\n"); sync(); return (4); } return (rerun ? ERERUN : 0); } /* * If we are to do a background check: * Get the mount point information of the file system * If already clean, return -1 * Check that kernel supports background fsck * Find or create the snapshot directory * Create the snapshot file * Open snapshot * If anything fails print reason and return 0 which exits */ static int setup_bkgrdchk(struct statfs *mntp, int sbreadfailed, char **filesys) { struct stat snapdir; struct group *grp; struct iovec *iov; char errmsg[255]; int iovlen; size_t size; /* Get the mount point information of the file system */ if (mntp == NULL) { pwarn("NOT MOUNTED, CANNOT RUN IN BACKGROUND\n"); return (0); } if ((mntp->f_flags & MNT_RDONLY) != 0) { pwarn("MOUNTED READ-ONLY, CANNOT RUN IN BACKGROUND\n"); return (0); } if ((mntp->f_flags & MNT_SOFTDEP) == 0) { pwarn("NOT USING SOFT UPDATES, CANNOT RUN IN BACKGROUND\n"); return (0); } if (sbreadfailed) { pwarn("SUPERBLOCK READ FAILED, CANNOT RUN IN BACKGROUND\n"); return (0); } if ((sblock.fs_flags & FS_NEEDSFSCK) != 0) { pwarn("FULL FSCK NEEDED, CANNOT RUN IN BACKGROUND\n"); return (0); } if ((sblock.fs_flags & FS_SUJ) != 0) { pwarn("JOURNALED FILESYSTEM, CANNOT RUN IN BACKGROUND\n"); return (0); } if (skipclean && ckclean && (sblock.fs_flags & (FS_UNCLEAN|FS_NEEDSFSCK)) == 0) { /* * file system is clean; * skip snapshot and report it clean */ pwarn("FILE SYSTEM CLEAN; SKIPPING CHECKS\n"); return (-1); } /* Check that kernel supports background fsck */ size = MIBSIZE; if (sysctlnametomib("vfs.ffs.adjrefcnt", adjrefcnt, &size) < 0|| sysctlnametomib("vfs.ffs.adjblkcnt", adjblkcnt, &size) < 0|| sysctlnametomib("vfs.ffs.setsize", setsize, &size) < 0 || sysctlnametomib("vfs.ffs.freefiles", freefiles, &size) < 0|| sysctlnametomib("vfs.ffs.freedirs", freedirs, &size) < 0 || sysctlnametomib("vfs.ffs.freeblks", freeblks, &size) < 0) { pwarn("KERNEL LACKS BACKGROUND FSCK SUPPORT\n"); return (0); } /* * When kernel lacks runtime bgfsck superblock summary * adjustment functionality, it does not mean we can not * continue, as old kernels will recompute the summary at * mount time. However, it will be an unexpected softupdates * inconsistency if it turns out that the summary is still * incorrect. Set a flag so subsequent operation can know this. */ bkgrdsumadj = 1; if (sysctlnametomib("vfs.ffs.adjndir", adjndir, &size) < 0 || sysctlnametomib("vfs.ffs.adjnbfree", adjnbfree, &size) < 0 || sysctlnametomib("vfs.ffs.adjnifree", adjnifree, &size) < 0 || sysctlnametomib("vfs.ffs.adjnffree", adjnffree, &size) < 0 || sysctlnametomib("vfs.ffs.adjnumclusters", adjnumclusters, &size) < 0) { bkgrdsumadj = 0; pwarn("KERNEL LACKS RUNTIME SUPERBLOCK SUMMARY ADJUSTMENT " "SUPPORT\n"); } /* Find or create the snapshot directory */ snprintf(snapname, sizeof snapname, "%s/.snap", mntp->f_mntonname); if (stat(snapname, &snapdir) < 0) { if (errno != ENOENT) { pwarn("CANNOT FIND SNAPSHOT DIRECTORY %s: %s, CANNOT " "RUN IN BACKGROUND\n", snapname, strerror(errno)); return (0); } if ((grp = getgrnam("operator")) == NULL || mkdir(snapname, 0770) < 0 || chown(snapname, -1, grp->gr_gid) < 0 || chmod(snapname, 0770) < 0) { pwarn("CANNOT CREATE SNAPSHOT DIRECTORY %s: %s, " "CANNOT RUN IN BACKGROUND\n", snapname, strerror(errno)); return (0); } } else if (!S_ISDIR(snapdir.st_mode)) { pwarn("%s IS NOT A DIRECTORY, CANNOT RUN IN BACKGROUND\n", snapname); return (0); } /* Create the snapshot file */ iov = NULL; iovlen = 0; errmsg[0] = '\0'; snprintf(snapname, sizeof snapname, "%s/.snap/fsck_snapshot", mntp->f_mntonname); build_iovec(&iov, &iovlen, "fstype", "ffs", 4); build_iovec(&iov, &iovlen, "from", snapname, (size_t)-1); build_iovec(&iov, &iovlen, "fspath", mntp->f_mntonname, (size_t)-1); build_iovec(&iov, &iovlen, "errmsg", errmsg, sizeof(errmsg)); build_iovec(&iov, &iovlen, "update", NULL, 0); build_iovec(&iov, &iovlen, "snapshot", NULL, 0); /* Create snapshot, removing old snapshot if it exists */ while (nmount(iov, iovlen, mntp->f_flags) < 0) { if (errno == EEXIST && unlink(snapname) == 0) continue; pwarn("CANNOT CREATE SNAPSHOT %s: %s %s\n", snapname, strerror(errno), errmsg); return (0); } /* Open snapshot */ if (openfilesys(snapname) == 0) { unlink(snapname); pwarn("CANNOT OPEN SNAPSHOT %s: %s, CANNOT RUN IN " "BACKGROUND\n", snapname, strerror(errno)); return (0); } free(sblock.fs_csp); free(sblock.fs_si); havesb = 0; *filesys = snapname; cmd.version = FFS_CMD_VERSION; cmd.handle = fsreadfd; return (1); } static void usage(void) { (void) fprintf(stderr, "usage: %s [-BCdEFfnpRrSyZ] [-b block] [-c level] [-m mode] filesystem ...\n", getprogname()); exit(1); } void infohandler(int sig __unused) { got_siginfo = 1; } void alarmhandler(int sig __unused) { got_sigalarm = 1; } diff --git a/sbin/fsck_ffs/pass1.c b/sbin/fsck_ffs/pass1.c index d328234220ad..5b27cf8cbfbe 100644 --- a/sbin/fsck_ffs/pass1.c +++ b/sbin/fsck_ffs/pass1.c @@ -1,614 +1,614 @@ /*- * SPDX-License-Identifier: BSD-3-Clause * * Copyright (c) 1980, 1986, 1993 * The Regents of the University of California. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #if 0 #ifndef lint static const char sccsid[] = "@(#)pass1.c 8.6 (Berkeley) 4/28/95"; #endif /* not lint */ #endif #include __FBSDID("$FreeBSD$"); #include #include #include #include #include #include #include #include #include #include #include "fsck.h" static ufs2_daddr_t badblk; static ufs2_daddr_t dupblk; static ino_t lastino; /* last inode in use */ static int checkinode(ino_t inumber, struct inodesc *, int rebuiltcg); void pass1(void) { struct inostat *info; struct inodesc idesc; struct bufarea *cgbp; struct cg *cgp; ino_t inumber, inosused, mininos; ufs2_daddr_t i, cgd; u_int8_t *cp; int c, rebuiltcg; badblk = dupblk = lastino = 0; /* * Set file system reserved blocks in used block map. */ for (c = 0; c < sblock.fs_ncg; c++) { cgd = cgdmin(&sblock, c); if (c == 0) { i = cgbase(&sblock, c); } else i = cgsblock(&sblock, c); for (; i < cgd; i++) setbmap(i); } i = sblock.fs_csaddr; cgd = i + howmany(sblock.fs_cssize, sblock.fs_fsize); for (; i < cgd; i++) setbmap(i); /* * Find all allocated blocks. */ memset(&idesc, 0, sizeof(struct inodesc)); idesc.id_func = pass1check; n_files = n_blks = 0; for (c = 0; c < sblock.fs_ncg; c++) { inumber = c * sblock.fs_ipg; cgbp = cglookup(c); cgp = cgbp->b_un.b_cg; rebuiltcg = 0; if (!check_cgmagic(c, cgbp)) { if (!reply("REBUILD CYLINDER GROUP")) { cgheader_corrupt = 1; if (!nflag) { - printf("YOU WILL NEED TO RERUN FSCK.\n"); + pwarn("YOU WILL NEED TO RERUN FSCK.\n"); rerun = 1; } } else { rebuild_cg(c, cgbp); rebuiltcg = 1; } } if (!rebuiltcg && sblock.fs_magic == FS_UFS2_MAGIC) { inosused = cgp->cg_initediblk; if (inosused > sblock.fs_ipg) { pfatal("Too many initialized inodes (%ju > %d) " "in cylinder group %d\nReset to %d\n", (uintmax_t)inosused, sblock.fs_ipg, c, sblock.fs_ipg); inosused = sblock.fs_ipg; } } else { inosused = sblock.fs_ipg; } if (got_siginfo) { printf("%s: phase 1: cyl group %d of %d (%d%%)\n", cdevname, c, sblock.fs_ncg, c * 100 / sblock.fs_ncg); got_siginfo = 0; } if (got_sigalarm) { setproctitle("%s p1 %d%%", cdevname, c * 100 / sblock.fs_ncg); got_sigalarm = 0; } /* * If we are using soft updates, then we can trust the * cylinder group inode allocation maps to tell us which * inodes are allocated. We will scan the used inode map * to find the inodes that are really in use, and then * read only those inodes in from disk. */ if ((preen || inoopt) && usedsoftdep && !rebuiltcg) { cp = &cg_inosused(cgp)[(inosused - 1) / CHAR_BIT]; for ( ; inosused != 0; cp--) { if (*cp == 0) { if (inosused > CHAR_BIT) inosused -= CHAR_BIT; else inosused = 0; continue; } for (i = 1 << (CHAR_BIT - 1); i > 0; i >>= 1) { if (*cp & i) break; inosused--; } break; } } /* * Allocate inoinfo structures for the allocated inodes. */ inostathead[c].il_numalloced = inosused; if (inosused == 0) { inostathead[c].il_stat = NULL; continue; } info = Calloc((unsigned)inosused, sizeof(struct inostat)); if (info == NULL) errx(EEXIT, "cannot alloc %u bytes for inoinfo", (unsigned)(sizeof(struct inostat) * inosused)); inostathead[c].il_stat = info; /* * Scan the allocated inodes. */ setinodebuf(c, inosused); for (i = 0; i < inosused; i++, inumber++) { if (inumber < UFS_ROOTINO) { (void)getnextinode(inumber, rebuiltcg); continue; } /* * NULL return indicates probable end of allocated * inodes during cylinder group rebuild attempt. * We always keep trying until we get to the minimum * valid number for this cylinder group. */ if (checkinode(inumber, &idesc, rebuiltcg) == 0 && i > cgp->cg_initediblk) break; } /* * This optimization speeds up future runs of fsck * by trimming down the number of inodes in cylinder * groups that formerly had many inodes but now have * fewer in use. */ mininos = roundup(inosused + INOPB(&sblock), INOPB(&sblock)); if (inoopt && !preen && !rebuiltcg && sblock.fs_magic == FS_UFS2_MAGIC && cgp->cg_initediblk > 2 * INOPB(&sblock) && mininos < cgp->cg_initediblk) { i = cgp->cg_initediblk; if (mininos < 2 * INOPB(&sblock)) cgp->cg_initediblk = 2 * INOPB(&sblock); else cgp->cg_initediblk = mininos; pwarn("CYLINDER GROUP %d: RESET FROM %ju TO %d %s\n", c, i, cgp->cg_initediblk, "VALID INODES"); cgdirty(cgbp); } if (inosused < sblock.fs_ipg) continue; lastino += 1; if (lastino < (c * sblock.fs_ipg)) inosused = 0; else inosused = lastino - (c * sblock.fs_ipg); if (rebuiltcg && inosused > cgp->cg_initediblk && sblock.fs_magic == FS_UFS2_MAGIC) { cgp->cg_initediblk = roundup(inosused, INOPB(&sblock)); pwarn("CYLINDER GROUP %d: FOUND %d VALID INODES\n", c, cgp->cg_initediblk); } /* * If we were not able to determine in advance which inodes * were in use, then reduce the size of the inoinfo structure * to the size necessary to describe the inodes that we * really found. Always leave map space in the first cylinder * group in case we need to a root or lost+found directory. */ if (inumber == lastino || c == 0) continue; inostathead[c].il_numalloced = inosused; if (inosused == 0) { free(inostathead[c].il_stat); inostathead[c].il_stat = NULL; continue; } info = Calloc((unsigned)inosused, sizeof(struct inostat)); if (info == NULL) errx(EEXIT, "cannot alloc %u bytes for inoinfo", (unsigned)(sizeof(struct inostat) * inosused)); memmove(info, inostathead[c].il_stat, inosused * sizeof(*info)); free(inostathead[c].il_stat); inostathead[c].il_stat = info; } freeinodebuf(); } static int checkinode(ino_t inumber, struct inodesc *idesc, int rebuiltcg) { struct inode ip; union dinode *dp; ufs2_daddr_t ndb; mode_t mode; intmax_t size, fixsize; int j, ret, offset; if ((dp = getnextinode(inumber, rebuiltcg)) == NULL) { pfatal("INVALID INODE"); goto unknown; } mode = DIP(dp, di_mode) & IFMT; if (mode == 0) { if ((sblock.fs_magic == FS_UFS1_MAGIC && (memcmp(dp->dp1.di_db, zino.dp1.di_db, UFS_NDADDR * sizeof(ufs1_daddr_t)) || memcmp(dp->dp1.di_ib, zino.dp1.di_ib, UFS_NIADDR * sizeof(ufs1_daddr_t)) || dp->dp1.di_mode || dp->dp1.di_size)) || (sblock.fs_magic == FS_UFS2_MAGIC && (memcmp(dp->dp2.di_db, zino.dp2.di_db, UFS_NDADDR * sizeof(ufs2_daddr_t)) || memcmp(dp->dp2.di_ib, zino.dp2.di_ib, UFS_NIADDR * sizeof(ufs2_daddr_t)) || dp->dp2.di_mode || dp->dp2.di_size))) { pfatal("PARTIALLY ALLOCATED INODE I=%lu", (u_long)inumber); if (reply("CLEAR") == 1) { ginode(inumber, &ip); clearinode(ip.i_dp); inodirty(&ip); irelse(&ip); } } inoinfo(inumber)->ino_state = USTATE; return (1); } lastino = inumber; if (chkfilesize(mode, DIP(dp, di_size)) == 0) { pfatal("BAD FILE SIZE"); goto unknown; } if (!preen && mode == IFMT && reply("HOLD BAD BLOCK") == 1) { ginode(inumber, &ip); dp = ip.i_dp; DIP_SET(dp, di_size, sblock.fs_fsize); DIP_SET(dp, di_mode, IFREG|0600); inodirty(&ip); irelse(&ip); } if ((mode == IFBLK || mode == IFCHR || mode == IFIFO || mode == IFSOCK) && DIP(dp, di_size) != 0) { if (debug) printf("bad special-file size %ju:", (uintmax_t)DIP(dp, di_size)); pfatal("BAD SPECIAL-FILE SIZE"); goto unknown; } if ((mode == IFBLK || mode == IFCHR) && (dev_t)DIP(dp, di_rdev) == NODEV) { if (debug) printf("bad special-file rdev NODEV:"); pfatal("BAD SPECIAL-FILE RDEV"); goto unknown; } ndb = howmany(DIP(dp, di_size), sblock.fs_bsize); if (ndb < 0) { if (debug) printf("negative size %ju ndb %ju:", (uintmax_t)DIP(dp, di_size), (uintmax_t)ndb); pfatal("NEGATIVE FILE SIZE"); goto unknown; } if (mode == IFBLK || mode == IFCHR) ndb++; if (mode == IFLNK) { /* * Fake ndb value so direct/indirect block checks below * will detect any garbage after symlink string. */ if (DIP(dp, di_size) < (off_t)sblock.fs_maxsymlinklen) { if (sblock.fs_magic == FS_UFS1_MAGIC) ndb = howmany(DIP(dp, di_size), sizeof(ufs1_daddr_t)); else ndb = howmany(DIP(dp, di_size), sizeof(ufs2_daddr_t)); if (ndb > UFS_NDADDR) { j = ndb - UFS_NDADDR; for (ndb = 1; j > 1; j--) ndb *= NINDIR(&sblock); ndb += UFS_NDADDR; } } } for (j = ndb; ndb < UFS_NDADDR && j < UFS_NDADDR; j++) { if (DIP(dp, di_db[j]) == 0) continue; if (debug) printf("invalid direct addr[%d]: %ju\n", j, (uintmax_t)DIP(dp, di_db[j])); pfatal("INVALID DIRECT BLOCK"); ginode(inumber, &ip); prtinode(&ip); if (reply("CLEAR") == 1) { DIP_SET(ip.i_dp, di_db[j], 0); inodirty(&ip); } irelse(&ip); } for (j = 0, ndb -= UFS_NDADDR; ndb > 0; j++) ndb /= NINDIR(&sblock); for (; j < UFS_NIADDR; j++) { if (DIP(dp, di_ib[j]) == 0) continue; if (debug) printf("invalid indirect addr: %ju\n", (uintmax_t)DIP(dp, di_ib[j])); pfatal("INVALID INDIRECT BLOCK"); ginode(inumber, &ip); prtinode(&ip); if (reply("CLEAR") == 1) { DIP_SET(ip.i_dp, di_ib[j], 0); inodirty(&ip); } irelse(&ip); } if (ftypeok(dp) == 0) { pfatal("UNKNOWN FILE TYPE"); goto unknown; } n_files++; inoinfo(inumber)->ino_linkcnt = DIP(dp, di_nlink); if (mode == IFDIR) { if (DIP(dp, di_size) == 0) { inoinfo(inumber)->ino_state = DCLEAR; } else if (DIP(dp, di_nlink) == 0) { inoinfo(inumber)->ino_state = DZLINK; } else { inoinfo(inumber)->ino_state = DSTATE; } cacheino(dp, inumber); countdirs++; } else if (DIP(dp, di_nlink) <= 0) inoinfo(inumber)->ino_state = FZLINK; else inoinfo(inumber)->ino_state = FSTATE; inoinfo(inumber)->ino_type = IFTODT(mode); badblk = dupblk = 0; idesc->id_number = inumber; if (DIP(dp, di_flags) & SF_SNAPSHOT) inoinfo(inumber)->ino_idtype = SNAP; else inoinfo(inumber)->ino_idtype = ADDR; idesc->id_type = inoinfo(inumber)->ino_idtype; (void)ckinode(dp, idesc); if (sblock.fs_magic == FS_UFS2_MAGIC && dp->dp2.di_extsize > 0) { ndb = howmany(dp->dp2.di_extsize, sblock.fs_bsize); for (j = 0; j < UFS_NXADDR; j++) { if (--ndb == 0 && (offset = blkoff(&sblock, dp->dp2.di_extsize)) != 0) idesc->id_numfrags = numfrags(&sblock, fragroundup(&sblock, offset)); else idesc->id_numfrags = sblock.fs_frag; if (dp->dp2.di_extb[j] == 0) continue; idesc->id_blkno = dp->dp2.di_extb[j]; ret = (*idesc->id_func)(idesc); if (ret & STOP) break; } } if (sblock.fs_magic == FS_UFS2_MAGIC) eascan(idesc, &dp->dp2); idesc->id_entryno *= btodb(sblock.fs_fsize); if (DIP(dp, di_blocks) != idesc->id_entryno) { pwarn("INCORRECT BLOCK COUNT I=%lu (%ju should be %ju)", (u_long)inumber, (uintmax_t)DIP(dp, di_blocks), (uintmax_t)idesc->id_entryno); if (preen) printf(" (CORRECTED)\n"); else if (reply("CORRECT") == 0) return (1); if (bkgrdflag == 0) { ginode(inumber, &ip); DIP_SET(ip.i_dp, di_blocks, idesc->id_entryno); inodirty(&ip); irelse(&ip); } else { cmd.value = idesc->id_number; cmd.size = idesc->id_entryno - DIP(dp, di_blocks); if (debug) printf("adjblkcnt ino %ju amount %lld\n", (uintmax_t)cmd.value, (long long)cmd.size); if (sysctl(adjblkcnt, MIBSIZE, 0, 0, &cmd, sizeof cmd) == -1) rwerror("ADJUST INODE BLOCK COUNT", cmd.value); } } /* * UFS does not allow files to end with a hole; it requires that * the last block of a file be allocated. The last allocated block * in a file is tracked in id_lballoc. Here, we check for a size * past the last allocated block of the file and if that is found, * shorten the file to reference the last allocated block to avoid * having it reference a hole at its end. * * Soft updates will always ensure that the file size is correct * for files that contain only direct block pointers. However * soft updates does not roll back sizes for files with indirect * blocks that it has set to unallocated because their contents * have not yet been written to disk. Hence, the file can appear * to have a hole at its end because the block pointer has been * rolled back to zero. Thus finding a hole at the end of a file * that is located in an indirect block receives only a warning * while finding a hole at the end of a file in a direct block * receives a fatal error message. */ size = DIP(dp, di_size); if (idesc->id_lballoc < lblkno(&sblock, size - 1) && /* exclude embedded symbolic links */ ((mode != IFLNK) || size >= sblock.fs_maxsymlinklen)) { fixsize = lblktosize(&sblock, idesc->id_lballoc + 1); if (size > UFS_NDADDR * sblock.fs_bsize) pwarn("INODE %lu: FILE SIZE %ju BEYOND END OF " "ALLOCATED FILE, SIZE SHOULD BE %ju", (u_long)inumber, size, fixsize); else pfatal("INODE %lu: FILE SIZE %ju BEYOND END OF " "ALLOCATED FILE, SIZE SHOULD BE %ju", (u_long)inumber, size, fixsize); if (preen) printf(" (ADJUSTED)\n"); else if (reply("ADJUST") == 0) return (1); if (bkgrdflag == 0) { ginode(inumber, &ip); DIP_SET(ip.i_dp, di_size, fixsize); inodirty(&ip); irelse(&ip); } else { cmd.value = idesc->id_number; cmd.size = fixsize; if (debug) printf("setsize ino %ju size set to %ju\n", (uintmax_t)cmd.value, (uintmax_t)cmd.size); if (sysctl(setsize, MIBSIZE, 0, 0, &cmd, sizeof cmd) == -1) rwerror("SET INODE SIZE", cmd.value); } } return (1); unknown: ginode(inumber, &ip); prtinode(&ip); inoinfo(inumber)->ino_state = USTATE; if (reply("CLEAR") == 1) { clearinode(ip.i_dp); inodirty(&ip); } irelse(&ip); return (1); } int pass1check(struct inodesc *idesc) { int res = KEEPON; int anyout, nfrags; ufs2_daddr_t blkno = idesc->id_blkno; struct dups *dlp; struct dups *new; if (idesc->id_type == SNAP) { if (blkno == BLK_NOCOPY) return (KEEPON); if (idesc->id_number == cursnapshot) { if (blkno == blkstofrags(&sblock, idesc->id_lbn)) return (KEEPON); if (blkno == BLK_SNAP) { blkno = blkstofrags(&sblock, idesc->id_lbn); idesc->id_entryno -= idesc->id_numfrags; } } else { if (blkno == BLK_SNAP) return (KEEPON); } } if ((anyout = chkrange(blkno, idesc->id_numfrags)) != 0) { blkerror(idesc->id_number, "BAD", blkno); if (badblk++ >= MAXBAD) { pwarn("EXCESSIVE BAD BLKS I=%lu", (u_long)idesc->id_number); if (preen) printf(" (SKIPPING)\n"); else if (reply("CONTINUE") == 0) { ckfini(0); exit(EEXIT); } rerun = 1; return (STOP); } } for (nfrags = idesc->id_numfrags; nfrags > 0; blkno++, nfrags--) { if (anyout && chkrange(blkno, 1)) { res = SKIP; } else if (!testbmap(blkno)) { n_blks++; setbmap(blkno); } else { blkerror(idesc->id_number, "DUP", blkno); if (dupblk++ >= MAXDUP) { pwarn("EXCESSIVE DUP BLKS I=%lu", (u_long)idesc->id_number); if (preen) printf(" (SKIPPING)\n"); else if (reply("CONTINUE") == 0) { ckfini(0); exit(EEXIT); } rerun = 1; return (STOP); } new = (struct dups *)Malloc(sizeof(struct dups)); if (new == NULL) { pfatal("DUP TABLE OVERFLOW."); if (reply("CONTINUE") == 0) { ckfini(0); exit(EEXIT); } rerun = 1; return (STOP); } new->dup = blkno; if (muldup == NULL) { duplist = muldup = new; new->next = NULL; } else { new->next = muldup->next; muldup->next = new; } for (dlp = duplist; dlp != muldup; dlp = dlp->next) if (dlp->dup == blkno) break; if (dlp == muldup && dlp->dup != blkno) muldup = new; } /* * count the number of blocks found in id_entryno */ idesc->id_entryno++; } if (idesc->id_level == 0 && idesc->id_lballoc < idesc->id_lbn) idesc->id_lballoc = idesc->id_lbn; return (res); } diff --git a/sbin/fsck_ffs/pass5.c b/sbin/fsck_ffs/pass5.c index 58143a0e8211..d973183eb36a 100644 --- a/sbin/fsck_ffs/pass5.c +++ b/sbin/fsck_ffs/pass5.c @@ -1,640 +1,650 @@ /*- * SPDX-License-Identifier: BSD-3-Clause * * Copyright (c) 1980, 1986, 1993 * The Regents of the University of California. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #if 0 #ifndef lint static const char sccsid[] = "@(#)pass5.c 8.9 (Berkeley) 4/28/95"; #endif /* not lint */ #endif #include __FBSDID("$FreeBSD$"); #include #include #include #include #include #include #include #include #include #include "fsck.h" static void check_maps(u_char *, u_char *, int, ufs2_daddr_t, const char *, int *, int, int, int); static void clear_blocks(ufs2_daddr_t start, ufs2_daddr_t end); void pass5(void) { int c, i, j, blk, frags, basesize, mapsize; int inomapsize, blkmapsize; struct fs *fs = &sblock; ufs2_daddr_t d, dbase, dmax, start; int rewritecg = 0; ino_t inum; struct csum *cs; struct csum_total cstotal; struct inodesc idesc[3]; char buf[MAXBSIZE]; struct cg *cg, *newcg = (struct cg *)buf; struct bufarea *cgbp; inoinfo(UFS_WINO)->ino_state = USTATE; memset(newcg, 0, (size_t)fs->fs_cgsize); newcg->cg_niblk = fs->fs_ipg; /* check to see if we are to add a cylinder group check hash */ if ((ckhashadd & CK_CYLGRP) != 0) rewritecg = 1; if (cvtlevel >= 3) { if (fs->fs_maxcontig < 2 && fs->fs_contigsumsize > 0) { if (preen) pwarn("DELETING CLUSTERING MAPS\n"); if (preen || reply("DELETE CLUSTERING MAPS")) { fs->fs_contigsumsize = 0; rewritecg = 1; sbdirty(); } } if (fs->fs_maxcontig > 1) { const char *doit = NULL; if (fs->fs_contigsumsize < 1) { doit = "CREAT"; } else if (fs->fs_contigsumsize < fs->fs_maxcontig && fs->fs_contigsumsize < FS_MAXCONTIG) { doit = "EXPAND"; } if (doit) { i = fs->fs_contigsumsize; fs->fs_contigsumsize = MIN(fs->fs_maxcontig, FS_MAXCONTIG); if (CGSIZE(fs) > (u_int)fs->fs_bsize) { pwarn("CANNOT %s CLUSTER MAPS\n", doit); fs->fs_contigsumsize = i; } else if (preen || reply("CREATE CLUSTER MAPS")) { if (preen) pwarn("%sING CLUSTER MAPS\n", doit); fs->fs_cgsize = fragroundup(fs, CGSIZE(fs)); rewritecg = 1; sbdirty(); } } } } basesize = sizeof(*newcg); if (sblock.fs_magic == FS_UFS2_MAGIC) { newcg->cg_iusedoff = basesize; } else { /* * We reserve the space for the old rotation summary * tables for the benefit of old kernels, but do not * maintain them in modern kernels. In time, they can * go away. */ newcg->cg_old_btotoff = basesize; newcg->cg_old_boff = newcg->cg_old_btotoff + fs->fs_old_cpg * sizeof(int32_t); newcg->cg_iusedoff = newcg->cg_old_boff + fs->fs_old_cpg * fs->fs_old_nrpos * sizeof(u_int16_t); memset(&newcg[1], 0, newcg->cg_iusedoff - basesize); } inomapsize = howmany(fs->fs_ipg, CHAR_BIT); newcg->cg_freeoff = newcg->cg_iusedoff + inomapsize; blkmapsize = howmany(fs->fs_fpg, CHAR_BIT); newcg->cg_nextfreeoff = newcg->cg_freeoff + blkmapsize; if (fs->fs_contigsumsize > 0) { newcg->cg_clustersumoff = newcg->cg_nextfreeoff - sizeof(u_int32_t); newcg->cg_clustersumoff = roundup(newcg->cg_clustersumoff, sizeof(u_int32_t)); newcg->cg_clusteroff = newcg->cg_clustersumoff + (fs->fs_contigsumsize + 1) * sizeof(u_int32_t); newcg->cg_nextfreeoff = newcg->cg_clusteroff + howmany(fragstoblks(fs, fs->fs_fpg), CHAR_BIT); } newcg->cg_magic = CG_MAGIC; mapsize = newcg->cg_nextfreeoff - newcg->cg_iusedoff; memset(&idesc[0], 0, sizeof idesc); for (i = 0; i < 3; i++) idesc[i].id_type = ADDR; memset(&cstotal, 0, sizeof(struct csum_total)); dmax = blknum(fs, fs->fs_size + fs->fs_frag - 1); for (d = fs->fs_size; d < dmax; d++) setbmap(d); for (c = 0; c < fs->fs_ncg; c++) { if (got_siginfo) { printf("%s: phase 5: cyl group %d of %d (%d%%)\n", cdevname, c, sblock.fs_ncg, c * 100 / sblock.fs_ncg); got_siginfo = 0; } if (got_sigalarm) { setproctitle("%s p5 %d%%", cdevname, c * 100 / sblock.fs_ncg); got_sigalarm = 0; } cgbp = cglookup(c); cg = cgbp->b_un.b_cg; if (!cg_chkmagic(cg)) pfatal("CG %d: BAD MAGIC NUMBER\n", c); /* * If we have a cylinder group check hash and are not adding * it for the first time, verify that it is good. */ if ((fs->fs_metackhash & CK_CYLGRP) != 0 && (ckhashadd & CK_CYLGRP) == 0) { uint32_t ckhash, thishash; ckhash = cg->cg_ckhash; cg->cg_ckhash = 0; thishash = calculate_crc32c(~0L, cg, fs->fs_cgsize); if (ckhash == thishash) { cg->cg_ckhash = ckhash; } else { pwarn("CG %d: BAD CHECK-HASH %#x vs %#x\n", c, ckhash, thishash); cg->cg_ckhash = thishash; cgdirty(cgbp); } } newcg->cg_time = cg->cg_time; newcg->cg_old_time = cg->cg_old_time; newcg->cg_unrefs = cg->cg_unrefs; newcg->cg_ckhash = cg->cg_ckhash; newcg->cg_cgx = c; dbase = cgbase(fs, c); dmax = dbase + fs->fs_fpg; if (dmax > fs->fs_size) dmax = fs->fs_size; newcg->cg_ndblk = dmax - dbase; if (fs->fs_magic == FS_UFS1_MAGIC) { if (c == fs->fs_ncg - 1) newcg->cg_old_ncyl = howmany(newcg->cg_ndblk, fs->fs_fpg / fs->fs_old_cpg); else newcg->cg_old_ncyl = fs->fs_old_cpg; newcg->cg_old_niblk = fs->fs_ipg; newcg->cg_niblk = 0; } if (fs->fs_contigsumsize > 0) newcg->cg_nclusterblks = newcg->cg_ndblk / fs->fs_frag; newcg->cg_cs.cs_ndir = 0; newcg->cg_cs.cs_nffree = 0; newcg->cg_cs.cs_nbfree = 0; newcg->cg_cs.cs_nifree = fs->fs_ipg; if (cg->cg_rotor >= 0 && cg->cg_rotor < newcg->cg_ndblk) newcg->cg_rotor = cg->cg_rotor; else newcg->cg_rotor = 0; if (cg->cg_frotor >= 0 && cg->cg_frotor < newcg->cg_ndblk) newcg->cg_frotor = cg->cg_frotor; else newcg->cg_frotor = 0; if (cg->cg_irotor >= 0 && cg->cg_irotor < fs->fs_ipg) newcg->cg_irotor = cg->cg_irotor; else newcg->cg_irotor = 0; if (fs->fs_magic == FS_UFS1_MAGIC) { newcg->cg_initediblk = 0; } else { if ((unsigned)cg->cg_initediblk > fs->fs_ipg) newcg->cg_initediblk = fs->fs_ipg; else newcg->cg_initediblk = cg->cg_initediblk; } memset(&newcg->cg_frsum[0], 0, sizeof newcg->cg_frsum); memset(cg_inosused(newcg), 0, (size_t)(mapsize)); inum = fs->fs_ipg * c; for (i = 0; i < inostathead[c].il_numalloced; inum++, i++) { switch (inoinfo(inum)->ino_state) { case USTATE: break; case DSTATE: case DCLEAR: case DFOUND: case DZLINK: newcg->cg_cs.cs_ndir++; /* FALLTHROUGH */ case FSTATE: case FCLEAR: case FZLINK: newcg->cg_cs.cs_nifree--; setbit(cg_inosused(newcg), i); break; default: if (inum < UFS_ROOTINO) break; errx(EEXIT, "BAD STATE %d FOR INODE I=%ju", inoinfo(inum)->ino_state, (uintmax_t)inum); } } if (c == 0) for (i = 0; i < (int)UFS_ROOTINO; i++) { setbit(cg_inosused(newcg), i); newcg->cg_cs.cs_nifree--; } start = -1; for (i = 0, d = dbase; d < dmax; d += fs->fs_frag, i += fs->fs_frag) { frags = 0; for (j = 0; j < fs->fs_frag; j++) { if (testbmap(d + j)) { if ((Eflag || Zflag) && start != -1) { clear_blocks(start, d + j - 1); start = -1; } continue; } if (start == -1) start = d + j; setbit(cg_blksfree(newcg), i + j); frags++; } if (frags == fs->fs_frag) { newcg->cg_cs.cs_nbfree++; if (fs->fs_contigsumsize > 0) setbit(cg_clustersfree(newcg), i / fs->fs_frag); } else if (frags > 0) { newcg->cg_cs.cs_nffree += frags; blk = blkmap(fs, cg_blksfree(newcg), i); ffs_fragacct(fs, blk, newcg->cg_frsum, 1); } } if ((Eflag || Zflag) && start != -1) clear_blocks(start, d - 1); if (fs->fs_contigsumsize > 0) { int32_t *sump = cg_clustersum(newcg); u_char *mapp = cg_clustersfree(newcg); int map = *mapp++; int bit = 1; int run = 0; for (i = 0; i < newcg->cg_nclusterblks; i++) { if ((map & bit) != 0) { run++; } else if (run != 0) { if (run > fs->fs_contigsumsize) run = fs->fs_contigsumsize; sump[run]++; run = 0; } if ((i & (CHAR_BIT - 1)) != (CHAR_BIT - 1)) { bit <<= 1; } else { map = *mapp++; bit = 1; } } if (run != 0) { if (run > fs->fs_contigsumsize) run = fs->fs_contigsumsize; sump[run]++; } } if (bkgrdflag != 0) { cstotal.cs_nffree += cg->cg_cs.cs_nffree; cstotal.cs_nbfree += cg->cg_cs.cs_nbfree; cstotal.cs_nifree += cg->cg_cs.cs_nifree; cstotal.cs_ndir += cg->cg_cs.cs_ndir; } else { cstotal.cs_nffree += newcg->cg_cs.cs_nffree; cstotal.cs_nbfree += newcg->cg_cs.cs_nbfree; cstotal.cs_nifree += newcg->cg_cs.cs_nifree; cstotal.cs_ndir += newcg->cg_cs.cs_ndir; } cs = &fs->fs_cs(fs, c); if (cursnapshot == 0 && memcmp(&newcg->cg_cs, cs, sizeof *cs) != 0 && dofix(&idesc[0], "FREE BLK COUNT(S) WRONG IN SUPERBLK")) { memmove(cs, &newcg->cg_cs, sizeof *cs); sbdirty(); } if (rewritecg) { memmove(cg, newcg, (size_t)fs->fs_cgsize); cgdirty(cgbp); continue; } if (cursnapshot == 0 && memcmp(newcg, cg, basesize) != 0 && dofix(&idesc[2], "SUMMARY INFORMATION BAD")) { memmove(cg, newcg, (size_t)basesize); cgdirty(cgbp); } if (bkgrdflag != 0 || usedsoftdep || debug) update_maps(cg, newcg, bkgrdflag); if (cursnapshot == 0 && memcmp(cg_inosused(newcg), cg_inosused(cg), mapsize) != 0 && dofix(&idesc[1], "BLK(S) MISSING IN BIT MAPS")) { memmove(cg_inosused(cg), cg_inosused(newcg), (size_t)mapsize); cgdirty(cgbp); } } if (cursnapshot == 0 && memcmp(&cstotal, &fs->fs_cstotal, sizeof cstotal) != 0 && dofix(&idesc[0], "SUMMARY BLK COUNT(S) WRONG IN SUPERBLK")) { if (debug) { printf("cstotal is currently: %jd dirs, %jd blks free, " "%jd frags free, %jd inos free, %jd clusters\n", (intmax_t)fs->fs_cstotal.cs_ndir, (intmax_t)fs->fs_cstotal.cs_nbfree, (intmax_t)fs->fs_cstotal.cs_nffree, (intmax_t)fs->fs_cstotal.cs_nifree, (intmax_t)fs->fs_cstotal.cs_numclusters); printf("cstotal ought to be: %jd dirs, %jd blks free, " "%jd frags free, %jd inos free, %jd clusters\n", (intmax_t)cstotal.cs_ndir, (intmax_t)cstotal.cs_nbfree, (intmax_t)cstotal.cs_nffree, (intmax_t)cstotal.cs_nifree, (intmax_t)cstotal.cs_numclusters); } memmove(&fs->fs_cstotal, &cstotal, sizeof cstotal); fs->fs_ronly = 0; fs->fs_fmod = 0; sbdirty(); } /* * When doing background fsck on a snapshot, figure out whether * the superblock summary is inaccurate and correct it when * necessary. */ if (cursnapshot != 0) { cmd.size = 1; cmd.value = cstotal.cs_ndir - fs->fs_cstotal.cs_ndir; if (cmd.value != 0) { if (debug) printf("adjndir by %+" PRIi64 "\n", cmd.value); if (bkgrdsumadj == 0 || sysctl(adjndir, MIBSIZE, 0, 0, &cmd, sizeof cmd) == -1) - rwerror("ADJUST NUMBER OF DIRECTORIES", cmd.value); + rwerror("ADJUST NUMBER OF DIRECTORIES", + cmd.value); } cmd.value = cstotal.cs_nbfree - fs->fs_cstotal.cs_nbfree; if (cmd.value != 0) { if (debug) - printf("adjnbfree by %+" PRIi64 "\n", cmd.value); + printf("adjnbfree by %+" PRIi64 "\n", + cmd.value); if (bkgrdsumadj == 0 || sysctl(adjnbfree, MIBSIZE, 0, 0, &cmd, sizeof cmd) == -1) - rwerror("ADJUST NUMBER OF FREE BLOCKS", cmd.value); + rwerror("ADJUST NUMBER OF FREE BLOCKS", + cmd.value); } cmd.value = cstotal.cs_nifree - fs->fs_cstotal.cs_nifree; if (cmd.value != 0) { if (debug) - printf("adjnifree by %+" PRIi64 "\n", cmd.value); + printf("adjnifree by %+" PRIi64 "\n", + cmd.value); if (bkgrdsumadj == 0 || sysctl(adjnifree, MIBSIZE, 0, 0, &cmd, sizeof cmd) == -1) - rwerror("ADJUST NUMBER OF FREE INODES", cmd.value); + rwerror("ADJUST NUMBER OF FREE INODES", + cmd.value); } cmd.value = cstotal.cs_nffree - fs->fs_cstotal.cs_nffree; if (cmd.value != 0) { if (debug) - printf("adjnffree by %+" PRIi64 "\n", cmd.value); + printf("adjnffree by %+" PRIi64 "\n", + cmd.value); if (bkgrdsumadj == 0 || sysctl(adjnffree, MIBSIZE, 0, 0, &cmd, sizeof cmd) == -1) - rwerror("ADJUST NUMBER OF FREE FRAGS", cmd.value); + rwerror("ADJUST NUMBER OF FREE FRAGS", + cmd.value); } - cmd.value = cstotal.cs_numclusters - fs->fs_cstotal.cs_numclusters; + cmd.value = cstotal.cs_numclusters - + fs->fs_cstotal.cs_numclusters; if (cmd.value != 0) { if (debug) - printf("adjnumclusters by %+" PRIi64 "\n", cmd.value); - if (bkgrdsumadj == 0 || sysctl(adjnumclusters, MIBSIZE, 0, 0, - &cmd, sizeof cmd) == -1) - rwerror("ADJUST NUMBER OF FREE CLUSTERS", cmd.value); + printf("adjnumclusters by %+" PRIi64 "\n", + cmd.value); + if (bkgrdsumadj == 0 || sysctl(adjnumclusters, MIBSIZE, + 0, 0, &cmd, sizeof cmd) == -1) + rwerror("ADJUST NUMBER OF FREE CLUSTERS", + cmd.value); } } } /* * Compare the original cylinder group inode and block bitmaps with the * updated cylinder group inode and block bitmaps. Free inodes and blocks * that have been added. Complain if any previously freed inodes blocks * are now allocated. */ void update_maps( struct cg *oldcg, /* cylinder group of claimed allocations */ struct cg *newcg, /* cylinder group of determined allocations */ int usesysctl) /* 1 => use sysctl interface to update maps */ { int inomapsize, excessdirs; struct fs *fs = &sblock; inomapsize = howmany(fs->fs_ipg, CHAR_BIT); excessdirs = oldcg->cg_cs.cs_ndir - newcg->cg_cs.cs_ndir; if (excessdirs < 0) { pfatal("LOST %d DIRECTORIES\n", -excessdirs); excessdirs = 0; } if (excessdirs > 0) check_maps(cg_inosused(newcg), cg_inosused(oldcg), inomapsize, oldcg->cg_cgx * (ufs2_daddr_t)fs->fs_ipg, "DIR", freedirs, 0, excessdirs, usesysctl); check_maps(cg_inosused(newcg), cg_inosused(oldcg), inomapsize, oldcg->cg_cgx * (ufs2_daddr_t)fs->fs_ipg, "FILE", freefiles, excessdirs, fs->fs_ipg, usesysctl); check_maps(cg_blksfree(oldcg), cg_blksfree(newcg), howmany(fs->fs_fpg, CHAR_BIT), oldcg->cg_cgx * (ufs2_daddr_t)fs->fs_fpg, "FRAG", freeblks, 0, fs->fs_fpg, usesysctl); } static void check_maps( u_char *map1, /* map of claimed allocations */ u_char *map2, /* map of determined allocations */ int mapsize, /* size of above two maps */ ufs2_daddr_t startvalue, /* resource value for first element in map */ const char *name, /* name of resource found in maps */ int *opcode, /* sysctl opcode to free resource */ int skip, /* number of entries to skip before starting to free */ int limit, /* limit on number of entries to free */ int usesysctl) /* 1 => use sysctl interface to update maps */ { # define BUFSIZE 16 char buf[BUFSIZE]; long i, j, k, l, m, size; ufs2_daddr_t n, astart, aend, ustart, uend; void (*msg)(const char *fmt, ...); if (usesysctl) msg = pfatal; else msg = pwarn; astart = ustart = aend = uend = -1; for (i = 0; i < mapsize; i++) { j = *map1++; k = *map2++; if (j == k) continue; for (m = 0, l = 1; m < CHAR_BIT; m++, l <<= 1) { if ((j & l) == (k & l)) continue; n = startvalue + i * CHAR_BIT + m; if ((j & l) != 0) { if (astart == -1) { astart = aend = n; continue; } if (aend + 1 == n) { aend = n; continue; } if (astart == aend) (*msg)("ALLOCATED %s %" PRId64 " MARKED FREE\n", name, astart); else (*msg)("%s %sS %" PRId64 "-%" PRId64 " MARKED FREE\n", "ALLOCATED", name, astart, aend); astart = aend = n; } else { if (ustart == -1) { ustart = uend = n; continue; } if (uend + 1 == n) { uend = n; continue; } size = uend - ustart + 1; if (size <= skip) { skip -= size; ustart = uend = n; continue; } if (skip > 0) { ustart += skip; size -= skip; skip = 0; } if (size > limit) size = limit; if (debug && size == 1) pwarn("%s %s %" PRId64 " MARKED USED\n", "UNALLOCATED", name, ustart); else if (debug) pwarn("%s %sS %" PRId64 "-%" PRId64 " MARKED USED\n", "UNALLOCATED", name, ustart, ustart + size - 1); if (usesysctl != 0) { cmd.value = ustart; cmd.size = size; if (sysctl(opcode, MIBSIZE, 0, 0, &cmd, sizeof cmd) == -1) { snprintf(buf, BUFSIZE, "FREE %s", name); rwerror(buf, cmd.value); } } limit -= size; if (limit <= 0) return; ustart = uend = n; } } } if (astart != -1) { if (astart == aend) (*msg)("ALLOCATED %s %" PRId64 " MARKED FREE\n", name, astart); else (*msg)("ALLOCATED %sS %" PRId64 "-%" PRId64 " MARKED FREE\n", name, astart, aend); } if (ustart != -1) { size = uend - ustart + 1; if (size <= skip) return; if (skip > 0) { ustart += skip; size -= skip; } if (size > limit) size = limit; if (debug) { if (size == 1) pwarn("UNALLOCATED %s %" PRId64 " MARKED USED\n", name, ustart); else pwarn("UNALLOCATED %sS %" PRId64 "-%" PRId64 " MARKED USED\n", name, ustart, ustart + size - 1); } if (usesysctl != 0) { cmd.value = ustart; cmd.size = size; if (sysctl(opcode, MIBSIZE, 0, 0, &cmd, sizeof cmd) == -1) { snprintf(buf, BUFSIZE, "FREE %s", name); rwerror(buf, cmd.value); } } } } static void clear_blocks(ufs2_daddr_t start, ufs2_daddr_t end) { if (debug) printf("Zero frags %jd to %jd\n", start, end); if (Zflag) blzero(fswritefd, fsbtodb(&sblock, start), lfragtosize(&sblock, end - start + 1)); if (Eflag) blerase(fswritefd, fsbtodb(&sblock, start), lfragtosize(&sblock, end - start + 1)); } diff --git a/sbin/fsck_ffs/suj.c b/sbin/fsck_ffs/suj.c index c56d6f131af1..0b1202866fe5 100644 --- a/sbin/fsck_ffs/suj.c +++ b/sbin/fsck_ffs/suj.c @@ -1,2536 +1,2539 @@ /*- * SPDX-License-Identifier: BSD-2-Clause * * Copyright 2009, 2010 Jeffrey W. Roberson * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include "fsck.h" #define DOTDOT_OFFSET DIRECTSIZ(1) struct suj_seg { TAILQ_ENTRY(suj_seg) ss_next; struct jsegrec ss_rec; uint8_t *ss_blk; }; struct suj_rec { TAILQ_ENTRY(suj_rec) sr_next; union jrec *sr_rec; }; TAILQ_HEAD(srechd, suj_rec); struct suj_ino { LIST_ENTRY(suj_ino) si_next; struct srechd si_recs; struct srechd si_newrecs; struct srechd si_movs; struct jtrncrec *si_trunc; ino_t si_ino; char si_skipparent; char si_hasrecs; char si_blkadj; char si_linkadj; int si_mode; nlink_t si_nlinkadj; nlink_t si_nlink; nlink_t si_dotlinks; }; LIST_HEAD(inohd, suj_ino); struct suj_blk { LIST_ENTRY(suj_blk) sb_next; struct srechd sb_recs; ufs2_daddr_t sb_blk; }; LIST_HEAD(blkhd, suj_blk); struct suj_cg { LIST_ENTRY(suj_cg) sc_next; struct blkhd sc_blkhash[HASHSIZE]; struct inohd sc_inohash[HASHSIZE]; struct ino_blk *sc_lastiblk; struct suj_ino *sc_lastino; struct suj_blk *sc_lastblk; struct bufarea *sc_cgbp; struct cg *sc_cgp; int sc_cgx; }; static LIST_HEAD(cghd, suj_cg) cghash[HASHSIZE]; static struct suj_cg *lastcg; static TAILQ_HEAD(seghd, suj_seg) allsegs; static uint64_t oldseq; static struct fs *fs = NULL; static ino_t sujino; /* * Summary statistics. */ static uint64_t freefrags; static uint64_t freeblocks; static uint64_t freeinos; static uint64_t freedir; static uint64_t jbytes; static uint64_t jrecs; static jmp_buf jmpbuf; typedef void (*ino_visitor)(ino_t, ufs_lbn_t, ufs2_daddr_t, int); static void err_suj(const char *, ...) __dead2; static void ino_trunc(ino_t, off_t); static void ino_decr(ino_t); static void ino_adjust(struct suj_ino *); static void ino_build(struct suj_ino *); static int blk_isfree(ufs2_daddr_t); static void initsuj(void); static void * errmalloc(size_t n) { void *a; a = Malloc(n); if (a == NULL) err(EX_OSERR, "malloc(%zu)", n); return (a); } /* * When hit a fatal error in journalling check, print out * the error and then offer to fallback to normal fsck. */ static void err_suj(const char * restrict fmt, ...) { va_list ap; if (preen) (void)fprintf(stdout, "%s: ", cdevname); va_start(ap, fmt); (void)vfprintf(stdout, fmt, ap); va_end(ap); longjmp(jmpbuf, -1); } /* * Lookup a cg by number in the hash so we can keep track of which cgs * need stats rebuilt. */ static struct suj_cg * cg_lookup(int cgx) { struct cghd *hd; struct suj_cg *sc; struct bufarea *cgbp; if (cgx < 0 || cgx >= fs->fs_ncg) err_suj("Bad cg number %d\n", cgx); if (lastcg && lastcg->sc_cgx == cgx) return (lastcg); cgbp = cglookup(cgx); if (!check_cgmagic(cgx, cgbp)) err_suj("UNABLE TO REBUILD CYLINDER GROUP %d", cgx); hd = &cghash[HASH(cgx)]; LIST_FOREACH(sc, hd, sc_next) if (sc->sc_cgx == cgx) { sc->sc_cgbp = cgbp; sc->sc_cgp = sc->sc_cgbp->b_un.b_cg; lastcg = sc; return (sc); } sc = errmalloc(sizeof(*sc)); bzero(sc, sizeof(*sc)); sc->sc_cgbp = cgbp; sc->sc_cgp = sc->sc_cgbp->b_un.b_cg; sc->sc_cgx = cgx; LIST_INSERT_HEAD(hd, sc, sc_next); return (sc); } /* * Lookup an inode number in the hash and allocate a suj_ino if it does * not exist. */ static struct suj_ino * ino_lookup(ino_t ino, int creat) { struct suj_ino *sino; struct inohd *hd; struct suj_cg *sc; sc = cg_lookup(ino_to_cg(fs, ino)); if (sc->sc_lastino && sc->sc_lastino->si_ino == ino) return (sc->sc_lastino); hd = &sc->sc_inohash[HASH(ino)]; LIST_FOREACH(sino, hd, si_next) if (sino->si_ino == ino) return (sino); if (creat == 0) return (NULL); sino = errmalloc(sizeof(*sino)); bzero(sino, sizeof(*sino)); sino->si_ino = ino; TAILQ_INIT(&sino->si_recs); TAILQ_INIT(&sino->si_newrecs); TAILQ_INIT(&sino->si_movs); LIST_INSERT_HEAD(hd, sino, si_next); return (sino); } /* * Lookup a block number in the hash and allocate a suj_blk if it does * not exist. */ static struct suj_blk * blk_lookup(ufs2_daddr_t blk, int creat) { struct suj_blk *sblk; struct suj_cg *sc; struct blkhd *hd; sc = cg_lookup(dtog(fs, blk)); if (sc->sc_lastblk && sc->sc_lastblk->sb_blk == blk) return (sc->sc_lastblk); hd = &sc->sc_blkhash[HASH(fragstoblks(fs, blk))]; LIST_FOREACH(sblk, hd, sb_next) if (sblk->sb_blk == blk) return (sblk); if (creat == 0) return (NULL); sblk = errmalloc(sizeof(*sblk)); bzero(sblk, sizeof(*sblk)); sblk->sb_blk = blk; TAILQ_INIT(&sblk->sb_recs); LIST_INSERT_HEAD(hd, sblk, sb_next); return (sblk); } static int blk_overlaps(struct jblkrec *brec, ufs2_daddr_t start, int frags) { ufs2_daddr_t bstart; ufs2_daddr_t bend; ufs2_daddr_t end; end = start + frags; bstart = brec->jb_blkno + brec->jb_oldfrags; bend = bstart + brec->jb_frags; if (start < bend && end > bstart) return (1); return (0); } static int blk_equals(struct jblkrec *brec, ino_t ino, ufs_lbn_t lbn, ufs2_daddr_t start, int frags) { if (brec->jb_ino != ino || brec->jb_lbn != lbn) return (0); if (brec->jb_blkno + brec->jb_oldfrags != start) return (0); if (brec->jb_frags < frags) return (0); return (1); } static void blk_setmask(struct jblkrec *brec, int *mask) { int i; for (i = brec->jb_oldfrags; i < brec->jb_oldfrags + brec->jb_frags; i++) *mask |= 1 << i; } /* * Determine whether a given block has been reallocated to a new location. * Returns a mask of overlapping bits if any frags have been reused or * zero if the block has not been re-used and the contents can be trusted. * * This is used to ensure that an orphaned pointer due to truncate is safe * to be freed. The mask value can be used to free partial blocks. */ static int blk_freemask(ufs2_daddr_t blk, ino_t ino, ufs_lbn_t lbn, int frags) { struct suj_blk *sblk; struct suj_rec *srec; struct jblkrec *brec; int mask; int off; /* * To be certain we're not freeing a reallocated block we lookup * this block in the blk hash and see if there is an allocation * journal record that overlaps with any fragments in the block * we're concerned with. If any fragments have been reallocated * the block has already been freed and re-used for another purpose. */ mask = 0; sblk = blk_lookup(blknum(fs, blk), 0); if (sblk == NULL) return (0); off = blk - sblk->sb_blk; TAILQ_FOREACH(srec, &sblk->sb_recs, sr_next) { brec = (struct jblkrec *)srec->sr_rec; /* * If the block overlaps but does not match * exactly this record refers to the current * location. */ if (blk_overlaps(brec, blk, frags) == 0) continue; if (blk_equals(brec, ino, lbn, blk, frags) == 1) mask = 0; else blk_setmask(brec, &mask); } if (debug) printf("blk_freemask: blk %jd sblk %jd off %d mask 0x%X\n", blk, sblk->sb_blk, off, mask); return (mask >> off); } /* * Determine whether it is safe to follow an indirect. It is not safe * if any part of the indirect has been reallocated or the last journal * entry was an allocation. Just allocated indirects may not have valid * pointers yet and all of their children will have their own records. * It is also not safe to follow an indirect if the cg bitmap has been * cleared as a new allocation may write to the block prior to the journal * being written. * * Returns 1 if it's safe to follow the indirect and 0 otherwise. */ static int blk_isindir(ufs2_daddr_t blk, ino_t ino, ufs_lbn_t lbn) { struct suj_blk *sblk; struct jblkrec *brec; sblk = blk_lookup(blk, 0); if (sblk == NULL) return (1); if (TAILQ_EMPTY(&sblk->sb_recs)) return (1); brec = (struct jblkrec *)TAILQ_LAST(&sblk->sb_recs, srechd)->sr_rec; if (blk_equals(brec, ino, lbn, blk, fs->fs_frag)) if (brec->jb_op == JOP_FREEBLK) return (!blk_isfree(blk)); return (0); } /* * Check to see if the requested block is available. * We can just check in the cylinder-group maps as * they will only have usable blocks in them. */ ufs2_daddr_t suj_checkblkavail(ufs2_daddr_t blkno, long frags) { struct bufarea *cgbp; struct cg *cgp; ufs2_daddr_t j, k, baseblk; long cg; if ((u_int64_t)blkno > sblock.fs_size) return (0); cg = dtog(&sblock, blkno); cgbp = cglookup(cg); cgp = cgbp->b_un.b_cg; if (!check_cgmagic(cg, cgbp)) return (-((cg + 1) * sblock.fs_fpg - sblock.fs_frag)); baseblk = dtogd(&sblock, blkno); for (j = 0; j <= sblock.fs_frag - frags; j++) { if (!isset(cg_blksfree(cgp), baseblk + j)) continue; for (k = 1; k < frags; k++) if (!isset(cg_blksfree(cgp), baseblk + j + k)) break; if (k < frags) { j += k; continue; } for (k = 0; k < frags; k++) clrbit(cg_blksfree(cgp), baseblk + j + k); n_blks += frags; if (frags == sblock.fs_frag) cgp->cg_cs.cs_nbfree--; else cgp->cg_cs.cs_nffree -= frags; cgdirty(cgbp); return ((cg * sblock.fs_fpg) + baseblk + j); } return (0); } /* * Clear an inode from the cg bitmap. If the inode was already clear return * 0 so the caller knows it does not have to check the inode contents. */ static int ino_free(ino_t ino, int mode) { struct suj_cg *sc; uint8_t *inosused; struct cg *cgp; int cg; cg = ino_to_cg(fs, ino); ino = ino % fs->fs_ipg; sc = cg_lookup(cg); cgp = sc->sc_cgp; inosused = cg_inosused(cgp); /* * The bitmap may never have made it to the disk so we have to * conditionally clear. We can avoid writing the cg in this case. */ if (isclr(inosused, ino)) return (0); freeinos++; clrbit(inosused, ino); if (ino < cgp->cg_irotor) cgp->cg_irotor = ino; cgp->cg_cs.cs_nifree++; if ((mode & IFMT) == IFDIR) { freedir++; cgp->cg_cs.cs_ndir--; } cgdirty(sc->sc_cgbp); return (1); } /* * Free 'frags' frags starting at filesystem block 'bno' skipping any frags * set in the mask. */ static void blk_free(ino_t ino, ufs2_daddr_t bno, int mask, int frags) { ufs1_daddr_t fragno, cgbno; struct suj_cg *sc; struct cg *cgp; int i, cg; uint8_t *blksfree; if (debug) printf("Freeing %d frags at blk %jd mask 0x%x\n", frags, bno, mask); /* * Check to see if the block needs to be claimed by a snapshot. * If wanted, the snapshot references it. Otherwise we free it. */ if (snapblkfree(fs, bno, lfragtosize(fs, frags), ino, suj_checkblkavail)) return; cg = dtog(fs, bno); sc = cg_lookup(cg); cgp = sc->sc_cgp; cgbno = dtogd(fs, bno); blksfree = cg_blksfree(cgp); /* * If it's not allocated we only wrote the journal entry * and never the bitmaps. Here we unconditionally clear and * resolve the cg summary later. */ if (frags == fs->fs_frag && mask == 0) { fragno = fragstoblks(fs, cgbno); ffs_setblock(fs, blksfree, fragno); freeblocks++; } else { /* * deallocate the fragment */ for (i = 0; i < frags; i++) - if ((mask & (1 << i)) == 0 && isclr(blksfree, cgbno +i)) { + if ((mask & (1 << i)) == 0 && + isclr(blksfree, cgbno +i)) { freefrags++; setbit(blksfree, cgbno + i); } } cgdirty(sc->sc_cgbp); } /* * Returns 1 if the whole block starting at 'bno' is marked free and 0 * otherwise. */ static int blk_isfree(ufs2_daddr_t bno) { struct suj_cg *sc; sc = cg_lookup(dtog(fs, bno)); return ffs_isblock(fs, cg_blksfree(sc->sc_cgp), dtogd(fs, bno)); } /* * Determine whether a block exists at a particular lbn in an inode. * Returns 1 if found, 0 if not. lbn may be negative for indirects * or ext blocks. */ static int blk_isat(ino_t ino, ufs_lbn_t lbn, ufs2_daddr_t blk, int *frags) { struct inode ip; union dinode *dp; ufs2_daddr_t nblk; ginode(ino, &ip); dp = ip.i_dp; if (DIP(dp, di_nlink) == 0 || DIP(dp, di_mode) == 0) { irelse(&ip); return (0); } nblk = ino_blkatoff(dp, ino, lbn, frags, NULL); irelse(&ip); return (nblk == blk); } /* * Clear the directory entry at diroff that should point to child. Minimal * checking is done and it is assumed that this path was verified with isat. */ static void ino_clrat(ino_t parent, off_t diroff, ino_t child) { union dinode *dip; struct direct *dp; struct inode ip; ufs2_daddr_t blk; struct bufarea *bp; ufs_lbn_t lbn; int blksize; int frags; int doff; if (debug) printf("Clearing inode %ju from parent %ju at offset %jd\n", (uintmax_t)child, (uintmax_t)parent, diroff); lbn = lblkno(fs, diroff); doff = blkoff(fs, diroff); ginode(parent, &ip); dip = ip.i_dp; blk = ino_blkatoff(dip, parent, lbn, &frags, NULL); blksize = sblksize(fs, DIP(dip, di_size), lbn); irelse(&ip); bp = getdatablk(blk, blksize, BT_DIRDATA); if (bp->b_errs != 0) err_suj("ino_clrat: UNRECOVERABLE I/O ERROR"); dp = (struct direct *)&bp->b_un.b_buf[doff]; if (dp->d_ino != child) errx(1, "Inode %ju does not exist in %ju at %jd", (uintmax_t)child, (uintmax_t)parent, diroff); dp->d_ino = 0; dirty(bp); brelse(bp); /* * The actual .. reference count will already have been removed * from the parent by the .. remref record. */ } /* * Determines whether a pointer to an inode exists within a directory * at a specified offset. Returns the mode of the found entry. */ static int ino_isat(ino_t parent, off_t diroff, ino_t child, int *mode, int *isdot) { struct inode ip; union dinode *dip; struct bufarea *bp; struct direct *dp; ufs2_daddr_t blk; ufs_lbn_t lbn; int blksize; int frags; int dpoff; int doff; *isdot = 0; ginode(parent, &ip); dip = ip.i_dp; *mode = DIP(dip, di_mode); if ((*mode & IFMT) != IFDIR) { if (debug) { /* * This can happen if the parent inode * was reallocated. */ if (*mode != 0) printf("Directory %ju has bad mode %o\n", (uintmax_t)parent, *mode); else printf("Directory %ju has zero mode\n", (uintmax_t)parent); } irelse(&ip); return (0); } lbn = lblkno(fs, diroff); doff = blkoff(fs, diroff); blksize = sblksize(fs, DIP(dip, di_size), lbn); if (diroff + DIRECTSIZ(1) > DIP(dip, di_size) || doff >= blksize) { if (debug) printf("ino %ju absent from %ju due to offset %jd" " exceeding size %jd\n", (uintmax_t)child, (uintmax_t)parent, diroff, DIP(dip, di_size)); irelse(&ip); return (0); } blk = ino_blkatoff(dip, parent, lbn, &frags, NULL); irelse(&ip); if (blk <= 0) { if (debug) printf("Sparse directory %ju", (uintmax_t)parent); return (0); } bp = getdatablk(blk, blksize, BT_DIRDATA); if (bp->b_errs != 0) err_suj("ino_isat: UNRECOVERABLE I/O ERROR"); /* * Walk through the records from the start of the block to be * certain we hit a valid record and not some junk in the middle * of a file name. Stop when we reach or pass the expected offset. */ dpoff = rounddown(doff, DIRBLKSIZ); do { dp = (struct direct *)&bp->b_un.b_buf[dpoff]; if (dpoff == doff) break; if (dp->d_reclen == 0) break; dpoff += dp->d_reclen; } while (dpoff <= doff); if (dpoff > fs->fs_bsize) err_suj("Corrupt directory block in dir ino %ju\n", (uintmax_t)parent); /* Not found. */ if (dpoff != doff) { if (debug) printf("ino %ju not found in %ju, lbn %jd, dpoff %d\n", (uintmax_t)child, (uintmax_t)parent, lbn, dpoff); brelse(bp); return (0); } /* * We found the item in question. Record the mode and whether it's * a . or .. link for the caller. */ if (dp->d_ino == child) { if (child == parent) *isdot = 1; else if (dp->d_namlen == 2 && dp->d_name[0] == '.' && dp->d_name[1] == '.') *isdot = 1; *mode = DTTOIF(dp->d_type); brelse(bp); return (1); } if (debug) printf("ino %ju doesn't match dirent ino %ju in parent %ju\n", (uintmax_t)child, (uintmax_t)dp->d_ino, (uintmax_t)parent); brelse(bp); return (0); } #define VISIT_INDIR 0x0001 #define VISIT_EXT 0x0002 #define VISIT_ROOT 0x0004 /* Operation came via root & valid pointers. */ /* * Read an indirect level which may or may not be linked into an inode. */ static void indir_visit(ino_t ino, ufs_lbn_t lbn, ufs2_daddr_t blk, uint64_t *frags, ino_visitor visitor, int flags) { struct bufarea *bp; ufs_lbn_t lbnadd; ufs2_daddr_t nblk; ufs_lbn_t nlbn; int level; int i; /* * Don't visit indirect blocks with contents we can't trust. This * should only happen when indir_visit() is called to complete a * truncate that never finished and not when a pointer is found via * an inode. */ if (blk == 0) return; level = lbn_level(lbn); if (level == -1) err_suj("Invalid level for lbn %jd\n", lbn); if ((flags & VISIT_ROOT) == 0 && blk_isindir(blk, ino, lbn) == 0) { if (debug) printf("blk %jd ino %ju lbn %jd(%d) is not indir.\n", blk, (uintmax_t)ino, lbn, level); goto out; } lbnadd = 1; for (i = level; i > 0; i--) lbnadd *= NINDIR(fs); bp = getdatablk(blk, fs->fs_bsize, BT_LEVEL1 + level); if (bp->b_errs != 0) err_suj("indir_visit: UNRECOVERABLE I/O ERROR\n"); for (i = 0; i < NINDIR(fs); i++) { if ((nblk = IBLK(bp, i)) == 0) continue; if (level == 0) { nlbn = -lbn + i * lbnadd; (*frags) += fs->fs_frag; visitor(ino, nlbn, nblk, fs->fs_frag); } else { nlbn = (lbn + 1) - (i * lbnadd); indir_visit(ino, nlbn, nblk, frags, visitor, flags); } } brelse(bp); out: if (flags & VISIT_INDIR) { (*frags) += fs->fs_frag; visitor(ino, lbn, blk, fs->fs_frag); } } /* * Visit each block in an inode as specified by 'flags' and call a * callback function. The callback may inspect or free blocks. The * count of frags found according to the size in the file is returned. * This is not valid for sparse files but may be used to determine * the correct di_blocks for a file. */ static uint64_t ino_visit(union dinode *dp, ino_t ino, ino_visitor visitor, int flags) { ufs_lbn_t nextlbn; ufs_lbn_t tmpval; ufs_lbn_t lbn; uint64_t size; uint64_t fragcnt; int mode; int frags; int i; size = DIP(dp, di_size); mode = DIP(dp, di_mode) & IFMT; fragcnt = 0; if ((flags & VISIT_EXT) && fs->fs_magic == FS_UFS2_MAGIC && dp->dp2.di_extsize) { for (i = 0; i < UFS_NXADDR; i++) { if (dp->dp2.di_extb[i] == 0) continue; frags = sblksize(fs, dp->dp2.di_extsize, i); frags = numfrags(fs, frags); fragcnt += frags; visitor(ino, -1 - i, dp->dp2.di_extb[i], frags); } } /* Skip datablocks for short links and devices. */ if (mode == IFBLK || mode == IFCHR || (mode == IFLNK && size < fs->fs_maxsymlinklen)) return (fragcnt); for (i = 0; i < UFS_NDADDR; i++) { if (DIP(dp, di_db[i]) == 0) continue; frags = sblksize(fs, size, i); frags = numfrags(fs, frags); fragcnt += frags; visitor(ino, i, DIP(dp, di_db[i]), frags); } /* * We know the following indirects are real as we're following * real pointers to them. */ flags |= VISIT_ROOT; for (i = 0, tmpval = NINDIR(fs), lbn = UFS_NDADDR; i < UFS_NIADDR; i++, lbn = nextlbn) { nextlbn = lbn + tmpval; tmpval *= NINDIR(fs); if (DIP(dp, di_ib[i]) == 0) continue; indir_visit(ino, -lbn - i, DIP(dp, di_ib[i]), &fragcnt, visitor, flags); } return (fragcnt); } /* * Null visitor function used when we just want to count blocks and * record the lbn. */ ufs_lbn_t visitlbn; static void null_visit(ino_t ino, ufs_lbn_t lbn, ufs2_daddr_t blk, int frags) { if (lbn > 0) visitlbn = lbn; } /* * Recalculate di_blocks when we discover that a block allocation or * free was not successfully completed. The kernel does not roll this back * because it would be too expensive to compute which indirects were * reachable at the time the inode was written. */ static void ino_adjblks(struct suj_ino *sino) { struct inode ip; union dinode *dp; uint64_t blocks; uint64_t frags; off_t isize; off_t size; ino_t ino; ino = sino->si_ino; ginode(ino, &ip); dp = ip.i_dp; /* No need to adjust zero'd inodes. */ if (DIP(dp, di_mode) == 0) { irelse(&ip); return; } /* * Visit all blocks and count them as well as recording the last * valid lbn in the file. If the file size doesn't agree with the * last lbn we need to truncate to fix it. Otherwise just adjust * the blocks count. */ visitlbn = 0; frags = ino_visit(dp, ino, null_visit, VISIT_INDIR | VISIT_EXT); blocks = fsbtodb(fs, frags); /* * We assume the size and direct block list is kept coherent by * softdep. For files that have extended into indirects we truncate * to the size in the inode or the maximum size permitted by * populated indirects. */ if (visitlbn >= UFS_NDADDR) { isize = DIP(dp, di_size); size = lblktosize(fs, visitlbn + 1); if (isize > size) isize = size; /* Always truncate to free any unpopulated indirects. */ ino_trunc(ino, isize); irelse(&ip); return; } if (blocks == DIP(dp, di_blocks)) { irelse(&ip); return; } if (debug) printf("ino %ju adjusting block count from %jd to %jd\n", (uintmax_t)ino, DIP(dp, di_blocks), blocks); DIP_SET(dp, di_blocks, blocks); inodirty(&ip); irelse(&ip); } static void blk_free_visit(ino_t ino, ufs_lbn_t lbn, ufs2_daddr_t blk, int frags) { blk_free(ino, blk, blk_freemask(blk, ino, lbn, frags), frags); } /* * Free a block or tree of blocks that was previously rooted in ino at * the given lbn. If the lbn is an indirect all children are freed * recursively. */ static void blk_free_lbn(ufs2_daddr_t blk, ino_t ino, ufs_lbn_t lbn, int frags, int follow) { uint64_t resid; int mask; mask = blk_freemask(blk, ino, lbn, frags); resid = 0; if (lbn <= -UFS_NDADDR && follow && mask == 0) indir_visit(ino, lbn, blk, &resid, blk_free_visit, VISIT_INDIR); else blk_free(ino, blk, mask, frags); } static void ino_setskip(struct suj_ino *sino, ino_t parent) { int isdot; int mode; if (ino_isat(sino->si_ino, DOTDOT_OFFSET, parent, &mode, &isdot)) sino->si_skipparent = 1; } static void ino_remref(ino_t parent, ino_t child, uint64_t diroff, int isdotdot) { struct suj_ino *sino; struct suj_rec *srec; struct jrefrec *rrec; /* * Lookup this inode to see if we have a record for it. */ sino = ino_lookup(child, 0); /* * Tell any child directories we've already removed their * parent link cnt. Don't try to adjust our link down again. */ if (sino != NULL && isdotdot == 0) ino_setskip(sino, parent); /* * No valid record for this inode. Just drop the on-disk * link by one. */ if (sino == NULL || sino->si_hasrecs == 0) { ino_decr(child); return; } /* * Use ino_adjust() if ino_check() has already processed this * child. If we lose the last non-dot reference to a * directory it will be discarded. */ if (sino->si_linkadj) { if (sino->si_nlink == 0) err_suj("ino_remref: ino %ld mode 0%o about to go " "negative\n", sino->si_ino, sino->si_mode); sino->si_nlink--; if (isdotdot) sino->si_dotlinks--; ino_adjust(sino); return; } /* * If we haven't yet processed this inode we need to make * sure we will successfully discover the lost path. If not * use nlinkadj to remember. */ TAILQ_FOREACH(srec, &sino->si_recs, sr_next) { rrec = (struct jrefrec *)srec->sr_rec; if (rrec->jr_parent == parent && rrec->jr_diroff == diroff) return; } sino->si_nlinkadj++; } /* * Free the children of a directory when the directory is discarded. */ static void ino_free_children(ino_t ino, ufs_lbn_t lbn, ufs2_daddr_t blk, int frags) { struct suj_ino *sino; struct bufarea *bp; struct direct *dp; off_t diroff; int skipparent; int isdotdot; int dpoff; int size; sino = ino_lookup(ino, 0); if (sino) skipparent = sino->si_skipparent; else skipparent = 0; size = lfragtosize(fs, frags); bp = getdatablk(blk, size, BT_DIRDATA); if (bp->b_errs != 0) err_suj("ino_free_children: UNRECOVERABLE I/O ERROR"); dp = (struct direct *)&bp->b_un.b_buf[0]; for (dpoff = 0; dpoff < size && dp->d_reclen; dpoff += dp->d_reclen) { dp = (struct direct *)&bp->b_un.b_buf[dpoff]; if (dp->d_ino == 0 || dp->d_ino == UFS_WINO) continue; if (dp->d_namlen == 1 && dp->d_name[0] == '.') continue; isdotdot = dp->d_namlen == 2 && dp->d_name[0] == '.' && dp->d_name[1] == '.'; if (isdotdot && skipparent == 1) continue; if (debug) printf("Directory %ju removing ino %ju name %s\n", (uintmax_t)ino, (uintmax_t)dp->d_ino, dp->d_name); diroff = lblktosize(fs, lbn) + dpoff; ino_remref(ino, dp->d_ino, diroff, isdotdot); } brelse(bp); } /* * Reclaim an inode, freeing all blocks and decrementing all children's * link counts. Free the inode back to the cg. */ static void ino_reclaim(struct inode *ip, ino_t ino, int mode) { union dinode *dp; uint32_t gen; dp = ip->i_dp; if (ino == UFS_ROOTINO) err_suj("Attempting to free UFS_ROOTINO\n"); if (debug) printf("Truncating and freeing ino %ju, nlink %d, mode %o\n", (uintmax_t)ino, DIP(dp, di_nlink), DIP(dp, di_mode)); /* We are freeing an inode or directory. */ if ((DIP(dp, di_mode) & IFMT) == IFDIR) ino_visit(dp, ino, ino_free_children, 0); DIP_SET(dp, di_nlink, 0); if ((DIP(dp, di_flags) & SF_SNAPSHOT) != 0) snapremove(ino); ino_visit(dp, ino, blk_free_visit, VISIT_EXT | VISIT_INDIR); /* Here we have to clear the inode and release any blocks it holds. */ gen = DIP(dp, di_gen); if (fs->fs_magic == FS_UFS1_MAGIC) bzero(dp, sizeof(struct ufs1_dinode)); else bzero(dp, sizeof(struct ufs2_dinode)); DIP_SET(dp, di_gen, gen); inodirty(ip); ino_free(ino, mode); return; } /* * Adjust an inode's link count down by one when a directory goes away. */ static void ino_decr(ino_t ino) { struct inode ip; union dinode *dp; int reqlink; int nlink; int mode; ginode(ino, &ip); dp = ip.i_dp; nlink = DIP(dp, di_nlink); mode = DIP(dp, di_mode); if (nlink < 1) err_suj("Inode %d link count %d invalid\n", ino, nlink); if (mode == 0) err_suj("Inode %d has a link of %d with 0 mode\n", ino, nlink); nlink--; if ((mode & IFMT) == IFDIR) reqlink = 2; else reqlink = 1; if (nlink < reqlink) { if (debug) printf("ino %ju not enough links to live %d < %d\n", (uintmax_t)ino, nlink, reqlink); ino_reclaim(&ip, ino, mode); irelse(&ip); return; } DIP_SET(dp, di_nlink, nlink); inodirty(&ip); irelse(&ip); } /* * Adjust the inode link count to 'nlink'. If the count reaches zero * free it. */ static void ino_adjust(struct suj_ino *sino) { struct jrefrec *rrec; struct suj_rec *srec; struct suj_ino *stmp; union dinode *dp; struct inode ip; nlink_t nlink; nlink_t reqlink; int recmode; int isdot; int mode; ino_t ino; nlink = sino->si_nlink; ino = sino->si_ino; mode = sino->si_mode & IFMT; /* * If it's a directory with no dot links, it was truncated before * the name was cleared. We need to clear the dirent that * points at it. */ if (mode == IFDIR && nlink == 1 && sino->si_dotlinks == 0) { sino->si_nlink = nlink = 0; TAILQ_FOREACH(srec, &sino->si_recs, sr_next) { rrec = (struct jrefrec *)srec->sr_rec; if (ino_isat(rrec->jr_parent, rrec->jr_diroff, ino, &recmode, &isdot) == 0) continue; ino_clrat(rrec->jr_parent, rrec->jr_diroff, ino); break; } if (srec == NULL) errx(1, "Directory %ju name not found", (uintmax_t)ino); } /* * If it's a directory with no real names pointing to it go ahead * and truncate it. This will free any children. */ if (mode == IFDIR && nlink - sino->si_dotlinks == 0) { sino->si_nlink = nlink = 0; /* * Mark any .. links so they know not to free this inode * when they are removed. */ TAILQ_FOREACH(srec, &sino->si_recs, sr_next) { rrec = (struct jrefrec *)srec->sr_rec; if (rrec->jr_diroff == DOTDOT_OFFSET) { stmp = ino_lookup(rrec->jr_parent, 0); if (stmp) ino_setskip(stmp, ino); } } } ginode(ino, &ip); dp = ip.i_dp; mode = DIP(dp, di_mode) & IFMT; if (nlink > UFS_LINK_MAX) err_suj("ino %ju nlink manipulation error, new %ju, old %d\n", (uintmax_t)ino, (uintmax_t)nlink, DIP(dp, di_nlink)); if (debug) printf("Adjusting ino %ju, nlink %ju, old link %d lastmode %o\n", (uintmax_t)ino, (uintmax_t)nlink, DIP(dp, di_nlink), sino->si_mode); if (mode == 0) { if (debug) printf("ino %ju, zero inode freeing bitmap\n", (uintmax_t)ino); ino_free(ino, sino->si_mode); irelse(&ip); return; } /* XXX Should be an assert? */ if (mode != sino->si_mode && debug) printf("ino %ju, mode %o != %o\n", (uintmax_t)ino, mode, sino->si_mode); if ((mode & IFMT) == IFDIR) reqlink = 2; else reqlink = 1; /* If the inode doesn't have enough links to live, free it. */ if (nlink < reqlink) { if (debug) printf("ino %ju not enough links to live %ju < %ju\n", (uintmax_t)ino, (uintmax_t)nlink, (uintmax_t)reqlink); ino_reclaim(&ip, ino, mode); irelse(&ip); return; } /* If required write the updated link count. */ if (DIP(dp, di_nlink) == nlink) { if (debug) printf("ino %ju, link matches, skipping.\n", (uintmax_t)ino); irelse(&ip); return; } DIP_SET(dp, di_nlink, nlink); inodirty(&ip); irelse(&ip); } /* * Truncate some or all blocks in an indirect, freeing any that are required * and zeroing the indirect. */ static void indir_trunc(ino_t ino, ufs_lbn_t lbn, ufs2_daddr_t blk, ufs_lbn_t lastlbn, union dinode *dp) { struct bufarea *bp; ufs_lbn_t lbnadd; ufs2_daddr_t nblk; ufs_lbn_t next; ufs_lbn_t nlbn; int isdirty; int level; int i; if (blk == 0) return; isdirty = 0; level = lbn_level(lbn); if (level == -1) err_suj("Invalid level for lbn %jd\n", lbn); lbnadd = 1; for (i = level; i > 0; i--) lbnadd *= NINDIR(fs); bp = getdatablk(blk, fs->fs_bsize, BT_LEVEL1 + level); if (bp->b_errs != 0) err_suj("indir_trunc: UNRECOVERABLE I/O ERROR"); for (i = 0; i < NINDIR(fs); i++) { if ((nblk = IBLK(bp, i)) == 0) continue; if (level != 0) { nlbn = (lbn + 1) - (i * lbnadd); /* * Calculate the lbn of the next indirect to * determine if any of this indirect must be * reclaimed. */ next = -(lbn + level) + ((i+1) * lbnadd); if (next <= lastlbn) continue; indir_trunc(ino, nlbn, nblk, lastlbn, dp); /* If all of this indirect was reclaimed, free it. */ nlbn = next - lbnadd; if (nlbn < lastlbn) continue; } else { nlbn = -lbn + i * lbnadd; if (nlbn < lastlbn) continue; } isdirty = 1; blk_free(ino, nblk, 0, fs->fs_frag); IBLK_SET(bp, i, 0); } if (isdirty) dirty(bp); brelse(bp); } /* * Truncate an inode to the minimum of the given size or the last populated * block after any over size have been discarded. The kernel would allocate * the last block in the file but fsck does not and neither do we. This * code never extends files, only shrinks them. */ static void ino_trunc(ino_t ino, off_t size) { struct inode ip; union dinode *dp; struct bufarea *bp; ufs2_daddr_t bn; uint64_t totalfrags; ufs_lbn_t nextlbn; ufs_lbn_t lastlbn; ufs_lbn_t tmpval; ufs_lbn_t lbn; ufs_lbn_t i; int blksize, frags; off_t cursize; off_t off; int mode; ginode(ino, &ip); dp = ip.i_dp; mode = DIP(dp, di_mode) & IFMT; cursize = DIP(dp, di_size); /* If no size change, nothing to do */ if (size == cursize) { irelse(&ip); return; } if (debug) - printf("Truncating ino %ju, mode %o to size %jd from size %jd\n", - (uintmax_t)ino, mode, size, cursize); + printf("Truncating ino %ju, mode %o to size %jd from " + "size %jd\n", (uintmax_t)ino, mode, size, cursize); /* Skip datablocks for short links and devices. */ if (mode == 0 || mode == IFBLK || mode == IFCHR || (mode == IFLNK && cursize < fs->fs_maxsymlinklen)) { irelse(&ip); return; } /* Don't extend. */ if (size > cursize) { irelse(&ip); return; } if ((DIP(dp, di_flags) & SF_SNAPSHOT) != 0) { if (size > 0) err_suj("Partial truncation of ino %ju snapshot file\n", (uintmax_t)ino); snapremove(ino); } lastlbn = lblkno(fs, blkroundup(fs, size)); for (i = lastlbn; i < UFS_NDADDR; i++) { if ((bn = DIP(dp, di_db[i])) == 0) continue; blksize = sblksize(fs, cursize, i); blk_free(ino, bn, 0, numfrags(fs, blksize)); DIP_SET(dp, di_db[i], 0); } /* * Follow indirect blocks, freeing anything required. */ for (i = 0, tmpval = NINDIR(fs), lbn = UFS_NDADDR; i < UFS_NIADDR; i++, lbn = nextlbn) { nextlbn = lbn + tmpval; tmpval *= NINDIR(fs); /* If we're not freeing any in this indirect range skip it. */ if (lastlbn >= nextlbn) continue; if ((bn = DIP(dp, di_ib[i])) == 0) continue; indir_trunc(ino, -lbn - i, bn, lastlbn, dp); /* If we freed everything in this indirect free the indir. */ if (lastlbn > lbn) continue; blk_free(ino, bn, 0, fs->fs_frag); DIP_SET(dp, di_ib[i], 0); } /* * Now that we've freed any whole blocks that exceed the desired * truncation size, figure out how many blocks remain and what the * last populated lbn is. We will set the size to this last lbn * rather than worrying about allocating the final lbn as the kernel * would've done. This is consistent with normal fsck behavior. */ visitlbn = 0; totalfrags = ino_visit(dp, ino, null_visit, VISIT_INDIR | VISIT_EXT); if (size > lblktosize(fs, visitlbn + 1)) size = lblktosize(fs, visitlbn + 1); /* * If we're truncating direct blocks we have to adjust frags * accordingly. */ if (visitlbn < UFS_NDADDR && totalfrags) { long oldspace, newspace; bn = DIP(dp, di_db[visitlbn]); if (bn == 0) err_suj("Bad blk at ino %ju lbn %jd\n", (uintmax_t)ino, visitlbn); oldspace = sblksize(fs, cursize, visitlbn); newspace = sblksize(fs, size, visitlbn); if (oldspace != newspace) { bn += numfrags(fs, newspace); frags = numfrags(fs, oldspace - newspace); blk_free(ino, bn, 0, frags); totalfrags -= frags; } } DIP_SET(dp, di_blocks, fsbtodb(fs, totalfrags)); DIP_SET(dp, di_size, size); inodirty(&ip); /* * If we've truncated into the middle of a block or frag we have * to zero it here. Otherwise the file could extend into * uninitialized space later. */ off = blkoff(fs, size); if (off && DIP(dp, di_mode) != IFDIR) { long clrsize; bn = ino_blkatoff(dp, ino, visitlbn, &frags, NULL); if (bn == 0) err_suj("Block missing from ino %ju at lbn %jd\n", (uintmax_t)ino, visitlbn); clrsize = frags * fs->fs_fsize; bp = getdatablk(bn, clrsize, BT_DATA); if (bp->b_errs != 0) err_suj("ino_trunc: UNRECOVERABLE I/O ERROR"); clrsize -= off; bzero(&bp->b_un.b_buf[off], clrsize); dirty(bp); brelse(bp); } irelse(&ip); return; } /* * Process records available for one inode and determine whether the * link count is correct or needs adjusting. */ static void ino_check(struct suj_ino *sino) { struct suj_rec *srec; struct jrefrec *rrec; nlink_t dotlinks; nlink_t newlinks; nlink_t removes; nlink_t nlink; ino_t ino; int isdot; int isat; int mode; if (sino->si_hasrecs == 0) return; ino = sino->si_ino; rrec = (struct jrefrec *)TAILQ_FIRST(&sino->si_recs)->sr_rec; nlink = rrec->jr_nlink; newlinks = 0; dotlinks = 0; removes = sino->si_nlinkadj; TAILQ_FOREACH(srec, &sino->si_recs, sr_next) { rrec = (struct jrefrec *)srec->sr_rec; isat = ino_isat(rrec->jr_parent, rrec->jr_diroff, rrec->jr_ino, &mode, &isdot); if (isat && (mode & IFMT) != (rrec->jr_mode & IFMT)) err_suj("Inode mode/directory type mismatch %o != %o\n", mode, rrec->jr_mode); if (debug) printf("jrefrec: op %d ino %ju, nlink %ju, parent %ju, " "diroff %jd, mode %o, isat %d, isdot %d\n", rrec->jr_op, (uintmax_t)rrec->jr_ino, (uintmax_t)rrec->jr_nlink, (uintmax_t)rrec->jr_parent, (uintmax_t)rrec->jr_diroff, rrec->jr_mode, isat, isdot); mode = rrec->jr_mode & IFMT; if (rrec->jr_op == JOP_REMREF) removes++; newlinks += isat; if (isdot) dotlinks += isat; } /* * The number of links that remain are the starting link count * subtracted by the total number of removes with the total * links discovered back in. An incomplete remove thus * makes no change to the link count but an add increases * by one. */ if (debug) printf( "ino %ju nlink %ju newlinks %ju removes %ju dotlinks %ju\n", (uintmax_t)ino, (uintmax_t)nlink, (uintmax_t)newlinks, (uintmax_t)removes, (uintmax_t)dotlinks); nlink += newlinks; nlink -= removes; sino->si_linkadj = 1; sino->si_nlink = nlink; sino->si_dotlinks = dotlinks; sino->si_mode = mode; ino_adjust(sino); } /* * Process records available for one block and determine whether it is * still allocated and whether the owning inode needs to be updated or * a free completed. */ static void blk_check(struct suj_blk *sblk) { struct suj_rec *srec; struct jblkrec *brec; struct suj_ino *sino; ufs2_daddr_t blk; int mask; int frags; int isat; /* * Each suj_blk actually contains records for any fragments in that * block. As a result we must evaluate each record individually. */ sino = NULL; TAILQ_FOREACH(srec, &sblk->sb_recs, sr_next) { brec = (struct jblkrec *)srec->sr_rec; frags = brec->jb_frags; blk = brec->jb_blkno + brec->jb_oldfrags; isat = blk_isat(brec->jb_ino, brec->jb_lbn, blk, &frags); if (sino == NULL || sino->si_ino != brec->jb_ino) { sino = ino_lookup(brec->jb_ino, 1); sino->si_blkadj = 1; } if (debug) - printf("op %d blk %jd ino %ju lbn %jd frags %d isat %d (%d)\n", - brec->jb_op, blk, (uintmax_t)brec->jb_ino, + printf("op %d blk %jd ino %ju lbn %jd frags %d isat %d " + "(%d)\n", brec->jb_op, blk, (uintmax_t)brec->jb_ino, brec->jb_lbn, brec->jb_frags, isat, frags); /* * If we found the block at this address we still have to * determine if we need to free the tail end that was * added by adding contiguous fragments from the same block. */ if (isat == 1) { if (frags == brec->jb_frags) continue; mask = blk_freemask(blk, brec->jb_ino, brec->jb_lbn, brec->jb_frags); mask >>= frags; blk += frags; frags = brec->jb_frags - frags; blk_free(brec->jb_ino, blk, mask, frags); continue; } /* * The block wasn't found, attempt to free it. It won't be * freed if it was actually reallocated. If this was an * allocation we don't want to follow indirects as they * may not be written yet. Any children of the indirect will * have their own records. If it's a free we need to * recursively free children. */ blk_free_lbn(blk, brec->jb_ino, brec->jb_lbn, brec->jb_frags, brec->jb_op == JOP_FREEBLK); } } /* * Walk the list of inode records for this cg and resolve moved and duplicate * inode references now that we have a complete picture. */ static void cg_build(struct suj_cg *sc) { struct suj_ino *sino; int i; for (i = 0; i < HASHSIZE; i++) LIST_FOREACH(sino, &sc->sc_inohash[i], si_next) ino_build(sino); } /* * Handle inodes requiring truncation. This must be done prior to * looking up any inodes in directories. */ static void cg_trunc(struct suj_cg *sc) { struct suj_ino *sino; int i; for (i = 0; i < HASHSIZE; i++) { LIST_FOREACH(sino, &sc->sc_inohash[i], si_next) { if (sino->si_trunc) { ino_trunc(sino->si_ino, sino->si_trunc->jt_size); sino->si_blkadj = 0; sino->si_trunc = NULL; } if (sino->si_blkadj) ino_adjblks(sino); } } } static void cg_adj_blk(struct suj_cg *sc) { struct suj_ino *sino; int i; for (i = 0; i < HASHSIZE; i++) { LIST_FOREACH(sino, &sc->sc_inohash[i], si_next) { if (sino->si_blkadj) ino_adjblks(sino); } } } /* * Free any partially allocated blocks and then resolve inode block * counts. */ static void cg_check_blk(struct suj_cg *sc) { struct suj_blk *sblk; int i; for (i = 0; i < HASHSIZE; i++) LIST_FOREACH(sblk, &sc->sc_blkhash[i], sb_next) blk_check(sblk); } /* * Walk the list of inode records for this cg, recovering any * changes which were not complete at the time of crash. */ static void cg_check_ino(struct suj_cg *sc) { struct suj_ino *sino; int i; for (i = 0; i < HASHSIZE; i++) LIST_FOREACH(sino, &sc->sc_inohash[i], si_next) ino_check(sino); } static void cg_apply(void (*apply)(struct suj_cg *)) { struct suj_cg *scg; int i; for (i = 0; i < HASHSIZE; i++) LIST_FOREACH(scg, &cghash[i], sc_next) apply(scg); } /* * Process the unlinked but referenced file list. Freeing all inodes. */ static void ino_unlinked(void) { struct inode ip; union dinode *dp; uint16_t mode; ino_t inon; ino_t ino; ino = fs->fs_sujfree; fs->fs_sujfree = 0; while (ino != 0) { ginode(ino, &ip); dp = ip.i_dp; mode = DIP(dp, di_mode) & IFMT; inon = DIP(dp, di_freelink); DIP_SET(dp, di_freelink, 0); inodirty(&ip); /* * XXX Should this be an errx? */ if (DIP(dp, di_nlink) == 0) { if (debug) printf("Freeing unlinked ino %ju mode %o\n", (uintmax_t)ino, mode); ino_reclaim(&ip, ino, mode); } else if (debug) printf("Skipping ino %ju mode %o with link %d\n", (uintmax_t)ino, mode, DIP(dp, di_nlink)); ino = inon; irelse(&ip); } } /* * Append a new record to the list of records requiring processing. */ static void ino_append(union jrec *rec) { struct jrefrec *refrec; struct jmvrec *mvrec; struct suj_ino *sino; struct suj_rec *srec; mvrec = &rec->rec_jmvrec; refrec = &rec->rec_jrefrec; if (debug && mvrec->jm_op == JOP_MVREF) printf("ino move: ino %ju, parent %ju, " "diroff %jd, oldoff %jd\n", (uintmax_t)mvrec->jm_ino, (uintmax_t)mvrec->jm_parent, (uintmax_t)mvrec->jm_newoff, (uintmax_t)mvrec->jm_oldoff); else if (debug && (refrec->jr_op == JOP_ADDREF || refrec->jr_op == JOP_REMREF)) printf("ino ref: op %d, ino %ju, nlink %ju, " "parent %ju, diroff %jd\n", refrec->jr_op, (uintmax_t)refrec->jr_ino, (uintmax_t)refrec->jr_nlink, (uintmax_t)refrec->jr_parent, (uintmax_t)refrec->jr_diroff); sino = ino_lookup(((struct jrefrec *)rec)->jr_ino, 1); sino->si_hasrecs = 1; srec = errmalloc(sizeof(*srec)); srec->sr_rec = rec; TAILQ_INSERT_TAIL(&sino->si_newrecs, srec, sr_next); } /* * Add a reference adjustment to the sino list and eliminate dups. The * primary loop in ino_build_ref() checks for dups but new ones may be * created as a result of offset adjustments. */ static void ino_add_ref(struct suj_ino *sino, struct suj_rec *srec) { struct jrefrec *refrec; struct suj_rec *srn; struct jrefrec *rrn; refrec = (struct jrefrec *)srec->sr_rec; /* * We walk backwards so that the oldest link count is preserved. If * an add record conflicts with a remove keep the remove. Redundant * removes are eliminated in ino_build_ref. Otherwise we keep the * oldest record at a given location. */ for (srn = TAILQ_LAST(&sino->si_recs, srechd); srn; srn = TAILQ_PREV(srn, srechd, sr_next)) { rrn = (struct jrefrec *)srn->sr_rec; if (rrn->jr_parent != refrec->jr_parent || rrn->jr_diroff != refrec->jr_diroff) continue; if (rrn->jr_op == JOP_REMREF || refrec->jr_op == JOP_ADDREF) { rrn->jr_mode = refrec->jr_mode; return; } /* * Adding a remove. * * Replace the record in place with the old nlink in case * we replace the head of the list. Abandon srec as a dup. */ refrec->jr_nlink = rrn->jr_nlink; srn->sr_rec = srec->sr_rec; return; } TAILQ_INSERT_TAIL(&sino->si_recs, srec, sr_next); } /* * Create a duplicate of a reference at a previous location. */ static void ino_dup_ref(struct suj_ino *sino, struct jrefrec *refrec, off_t diroff) { struct jrefrec *rrn; struct suj_rec *srn; rrn = errmalloc(sizeof(*refrec)); *rrn = *refrec; rrn->jr_op = JOP_ADDREF; rrn->jr_diroff = diroff; srn = errmalloc(sizeof(*srn)); srn->sr_rec = (union jrec *)rrn; ino_add_ref(sino, srn); } /* * Add a reference to the list at all known locations. We follow the offset * changes for a single instance and create duplicate add refs at each so * that we can tolerate any version of the directory block. Eliminate * removes which collide with adds that are seen in the journal. They should * not adjust the link count down. */ static void ino_build_ref(struct suj_ino *sino, struct suj_rec *srec) { struct jrefrec *refrec; struct jmvrec *mvrec; struct suj_rec *srp; struct suj_rec *srn; struct jrefrec *rrn; off_t diroff; refrec = (struct jrefrec *)srec->sr_rec; /* * Search for a mvrec that matches this offset. Whether it's an add * or a remove we can delete the mvref after creating a dup record in * the old location. */ if (!TAILQ_EMPTY(&sino->si_movs)) { diroff = refrec->jr_diroff; for (srn = TAILQ_LAST(&sino->si_movs, srechd); srn; srn = srp) { srp = TAILQ_PREV(srn, srechd, sr_next); mvrec = (struct jmvrec *)srn->sr_rec; if (mvrec->jm_parent != refrec->jr_parent || mvrec->jm_newoff != diroff) continue; diroff = mvrec->jm_oldoff; TAILQ_REMOVE(&sino->si_movs, srn, sr_next); free(srn); ino_dup_ref(sino, refrec, diroff); } } /* * If a remove wasn't eliminated by an earlier add just append it to * the list. */ if (refrec->jr_op == JOP_REMREF) { ino_add_ref(sino, srec); return; } /* * Walk the list of records waiting to be added to the list. We * must check for moves that apply to our current offset and remove * them from the list. Remove any duplicates to eliminate removes * with corresponding adds. */ TAILQ_FOREACH_SAFE(srn, &sino->si_newrecs, sr_next, srp) { switch (srn->sr_rec->rec_jrefrec.jr_op) { case JOP_ADDREF: /* * This should actually be an error we should * have a remove for every add journaled. */ rrn = (struct jrefrec *)srn->sr_rec; if (rrn->jr_parent != refrec->jr_parent || rrn->jr_diroff != refrec->jr_diroff) break; TAILQ_REMOVE(&sino->si_newrecs, srn, sr_next); break; case JOP_REMREF: /* * Once we remove the current iteration of the * record at this address we're done. */ rrn = (struct jrefrec *)srn->sr_rec; if (rrn->jr_parent != refrec->jr_parent || rrn->jr_diroff != refrec->jr_diroff) break; TAILQ_REMOVE(&sino->si_newrecs, srn, sr_next); ino_add_ref(sino, srec); return; case JOP_MVREF: /* * Update our diroff based on any moves that match * and remove the move. */ mvrec = (struct jmvrec *)srn->sr_rec; if (mvrec->jm_parent != refrec->jr_parent || mvrec->jm_oldoff != refrec->jr_diroff) break; ino_dup_ref(sino, refrec, mvrec->jm_oldoff); refrec->jr_diroff = mvrec->jm_newoff; TAILQ_REMOVE(&sino->si_newrecs, srn, sr_next); break; default: err_suj("ino_build_ref: Unknown op %d\n", srn->sr_rec->rec_jrefrec.jr_op); } } ino_add_ref(sino, srec); } /* * Walk the list of new records and add them in-order resolving any * dups and adjusted offsets. */ static void ino_build(struct suj_ino *sino) { struct suj_rec *srec; while ((srec = TAILQ_FIRST(&sino->si_newrecs)) != NULL) { TAILQ_REMOVE(&sino->si_newrecs, srec, sr_next); switch (srec->sr_rec->rec_jrefrec.jr_op) { case JOP_ADDREF: case JOP_REMREF: ino_build_ref(sino, srec); break; case JOP_MVREF: /* * Add this mvrec to the queue of pending mvs. */ TAILQ_INSERT_TAIL(&sino->si_movs, srec, sr_next); break; default: err_suj("ino_build: Unknown op %d\n", srec->sr_rec->rec_jrefrec.jr_op); } } if (TAILQ_EMPTY(&sino->si_recs)) sino->si_hasrecs = 0; } /* * Modify journal records so they refer to the base block number * and a start and end frag range. This is to facilitate the discovery * of overlapping fragment allocations. */ static void blk_build(struct jblkrec *blkrec) { struct suj_rec *srec; struct suj_blk *sblk; struct jblkrec *blkrn; ufs2_daddr_t blk; int frag; if (debug) printf("blk_build: op %d blkno %jd frags %d oldfrags %d " "ino %ju lbn %jd\n", blkrec->jb_op, (uintmax_t)blkrec->jb_blkno, blkrec->jb_frags, blkrec->jb_oldfrags, (uintmax_t)blkrec->jb_ino, (uintmax_t)blkrec->jb_lbn); blk = blknum(fs, blkrec->jb_blkno); frag = fragnum(fs, blkrec->jb_blkno); if (blkrec->jb_blkno < 0 || blk + fs->fs_frag - frag > fs->fs_size) err_suj("Out-of-bounds journal block number %jd\n", blkrec->jb_blkno); sblk = blk_lookup(blk, 1); /* * Rewrite the record using oldfrags to indicate the offset into * the block. Leave jb_frags as the actual allocated count. */ blkrec->jb_blkno -= frag; blkrec->jb_oldfrags = frag; if (blkrec->jb_oldfrags + blkrec->jb_frags > fs->fs_frag) err_suj("Invalid fragment count %d oldfrags %d\n", blkrec->jb_frags, frag); /* * Detect dups. If we detect a dup we always discard the oldest * record as it is superseded by the new record. This speeds up * later stages but also eliminates free records which are used * to indicate that the contents of indirects can be trusted. */ TAILQ_FOREACH(srec, &sblk->sb_recs, sr_next) { blkrn = (struct jblkrec *)srec->sr_rec; if (blkrn->jb_ino != blkrec->jb_ino || blkrn->jb_lbn != blkrec->jb_lbn || blkrn->jb_blkno != blkrec->jb_blkno || blkrn->jb_frags != blkrec->jb_frags || blkrn->jb_oldfrags != blkrec->jb_oldfrags) continue; if (debug) printf("Removed dup.\n"); /* Discard the free which is a dup with an alloc. */ if (blkrec->jb_op == JOP_FREEBLK) return; TAILQ_REMOVE(&sblk->sb_recs, srec, sr_next); free(srec); break; } srec = errmalloc(sizeof(*srec)); srec->sr_rec = (union jrec *)blkrec; TAILQ_INSERT_TAIL(&sblk->sb_recs, srec, sr_next); } static void ino_build_trunc(struct jtrncrec *rec) { struct suj_ino *sino; if (debug) printf("ino_build_trunc: op %d ino %ju, size %jd\n", rec->jt_op, (uintmax_t)rec->jt_ino, (uintmax_t)rec->jt_size); if (chkfilesize(IFREG, rec->jt_size) == 0) err_suj("ino_build: truncation size too large %ju\n", (intmax_t)rec->jt_size); sino = ino_lookup(rec->jt_ino, 1); if (rec->jt_op == JOP_SYNC) { sino->si_trunc = NULL; return; } if (sino->si_trunc == NULL || sino->si_trunc->jt_size > rec->jt_size) sino->si_trunc = rec; } /* * Build up tables of the operations we need to recover. */ static void suj_build(void) { struct suj_seg *seg; union jrec *rec; int off; int i; TAILQ_FOREACH(seg, &allsegs, ss_next) { if (debug) printf("seg %jd has %d records, oldseq %jd.\n", seg->ss_rec.jsr_seq, seg->ss_rec.jsr_cnt, seg->ss_rec.jsr_oldest); off = 0; rec = (union jrec *)seg->ss_blk; for (i = 0; i < seg->ss_rec.jsr_cnt; off += JREC_SIZE, rec++) { /* skip the segrec. */ if ((off % real_dev_bsize) == 0) continue; switch (rec->rec_jrefrec.jr_op) { case JOP_ADDREF: case JOP_REMREF: case JOP_MVREF: ino_append(rec); break; case JOP_NEWBLK: case JOP_FREEBLK: blk_build((struct jblkrec *)rec); break; case JOP_TRUNC: case JOP_SYNC: ino_build_trunc((struct jtrncrec *)rec); break; default: err_suj("Unknown journal operation %d (%d)\n", rec->rec_jrefrec.jr_op, off); } i++; } } } /* * Prune the journal segments to those we care about based on the * oldest sequence in the newest segment. Order the segment list * based on sequence number. */ static void suj_prune(void) { struct suj_seg *seg; struct suj_seg *segn; uint64_t newseq; int discard; if (debug) printf("Pruning up to %jd\n", oldseq); /* First free the expired segments. */ TAILQ_FOREACH_SAFE(seg, &allsegs, ss_next, segn) { if (seg->ss_rec.jsr_seq >= oldseq) continue; TAILQ_REMOVE(&allsegs, seg, ss_next); free(seg->ss_blk); free(seg); } /* Next ensure that segments are ordered properly. */ seg = TAILQ_FIRST(&allsegs); if (seg == NULL) { if (debug) printf("Empty journal\n"); return; } newseq = seg->ss_rec.jsr_seq; for (;;) { seg = TAILQ_LAST(&allsegs, seghd); if (seg->ss_rec.jsr_seq >= newseq) break; TAILQ_REMOVE(&allsegs, seg, ss_next); TAILQ_INSERT_HEAD(&allsegs, seg, ss_next); newseq = seg->ss_rec.jsr_seq; } if (newseq != oldseq) { TAILQ_FOREACH(seg, &allsegs, ss_next) { printf("%jd, ", seg->ss_rec.jsr_seq); } printf("\n"); err_suj("Journal file sequence mismatch %jd != %jd\n", newseq, oldseq); } /* * The kernel may asynchronously write segments which can create * gaps in the sequence space. Throw away any segments after the * gap as the kernel guarantees only those that are contiguously * reachable are marked as completed. */ discard = 0; TAILQ_FOREACH_SAFE(seg, &allsegs, ss_next, segn) { if (!discard && newseq++ == seg->ss_rec.jsr_seq) { jrecs += seg->ss_rec.jsr_cnt; jbytes += seg->ss_rec.jsr_blocks * real_dev_bsize; continue; } discard = 1; if (debug) printf("Journal order mismatch %jd != %jd pruning\n", newseq-1, seg->ss_rec.jsr_seq); TAILQ_REMOVE(&allsegs, seg, ss_next); free(seg->ss_blk); free(seg); } if (debug) printf("Processing journal segments from %jd to %jd\n", oldseq, newseq-1); } /* * Verify the journal inode before attempting to read records. */ static int suj_verifyino(union dinode *dp) { if (DIP(dp, di_nlink) != 1) { printf("Invalid link count %d for journal inode %ju\n", DIP(dp, di_nlink), (uintmax_t)sujino); return (-1); } if ((DIP(dp, di_flags) & (SF_IMMUTABLE | SF_NOUNLINK)) != (SF_IMMUTABLE | SF_NOUNLINK)) { printf("Invalid flags 0x%X for journal inode %ju\n", DIP(dp, di_flags), (uintmax_t)sujino); return (-1); } if (DIP(dp, di_mode) != (IFREG | IREAD)) { printf("Invalid mode %o for journal inode %ju\n", DIP(dp, di_mode), (uintmax_t)sujino); return (-1); } if (DIP(dp, di_size) < SUJ_MIN) { printf("Invalid size %jd for journal inode %ju\n", DIP(dp, di_size), (uintmax_t)sujino); return (-1); } if (DIP(dp, di_modrev) != fs->fs_mtime) { printf("Journal timestamp does not match fs mount time\n"); return (-1); } return (0); } struct jblocks { struct jextent *jb_extent; /* Extent array. */ int jb_avail; /* Available extents. */ int jb_used; /* Last used extent. */ int jb_head; /* Allocator head. */ int jb_off; /* Allocator extent offset. */ }; struct jextent { ufs2_daddr_t je_daddr; /* Disk block address. */ int je_blocks; /* Disk block count. */ }; static struct jblocks *suj_jblocks; static struct jblocks * jblocks_create(void) { struct jblocks *jblocks; int size; jblocks = errmalloc(sizeof(*jblocks)); jblocks->jb_avail = 10; jblocks->jb_used = 0; jblocks->jb_head = 0; jblocks->jb_off = 0; size = sizeof(struct jextent) * jblocks->jb_avail; jblocks->jb_extent = errmalloc(size); bzero(jblocks->jb_extent, size); return (jblocks); } /* * Return the next available disk block and the amount of contiguous * free space it contains. */ static ufs2_daddr_t jblocks_next(struct jblocks *jblocks, int bytes, int *actual) { struct jextent *jext; ufs2_daddr_t daddr; int freecnt; int blocks; blocks = btodb(bytes); jext = &jblocks->jb_extent[jblocks->jb_head]; freecnt = jext->je_blocks - jblocks->jb_off; if (freecnt == 0) { jblocks->jb_off = 0; if (++jblocks->jb_head > jblocks->jb_used) return (0); jext = &jblocks->jb_extent[jblocks->jb_head]; freecnt = jext->je_blocks; } if (freecnt > blocks) freecnt = blocks; *actual = dbtob(freecnt); daddr = jext->je_daddr + jblocks->jb_off; return (daddr); } /* * Advance the allocation head by a specified number of bytes, consuming * one journal segment. */ static void jblocks_advance(struct jblocks *jblocks, int bytes) { jblocks->jb_off += btodb(bytes); } static void jblocks_destroy(struct jblocks *jblocks) { free(jblocks->jb_extent); free(jblocks); } static void jblocks_add(struct jblocks *jblocks, ufs2_daddr_t daddr, int blocks) { struct jextent *jext; int size; jext = &jblocks->jb_extent[jblocks->jb_used]; /* Adding the first block. */ if (jext->je_daddr == 0) { jext->je_daddr = daddr; jext->je_blocks = blocks; return; } /* Extending the last extent. */ if (jext->je_daddr + jext->je_blocks == daddr) { jext->je_blocks += blocks; return; } /* Adding a new extent. */ if (++jblocks->jb_used == jblocks->jb_avail) { jblocks->jb_avail *= 2; size = sizeof(struct jextent) * jblocks->jb_avail; jext = errmalloc(size); bzero(jext, size); bcopy(jblocks->jb_extent, jext, sizeof(struct jextent) * jblocks->jb_used); free(jblocks->jb_extent); jblocks->jb_extent = jext; } jext = &jblocks->jb_extent[jblocks->jb_used]; jext->je_daddr = daddr; jext->je_blocks = blocks; return; } /* * Add a file block from the journal to the extent map. We can't read * each file block individually because the kernel treats it as a circular * buffer and segments may span mutliple contiguous blocks. */ static void suj_add_block(ino_t ino, ufs_lbn_t lbn, ufs2_daddr_t blk, int frags) { jblocks_add(suj_jblocks, fsbtodb(fs, blk), fsbtodb(fs, frags)); } static void suj_read(void) { uint8_t block[1 * 1024 * 1024]; struct suj_seg *seg; struct jsegrec *recn; struct jsegrec *rec; ufs2_daddr_t blk; int readsize; int blocks; int recsize; int size; int i; /* * Read records until we exhaust the journal space. If we find * an invalid record we start searching for a valid segment header * at the next block. This is because we don't have a head/tail * pointer and must recover the information indirectly. At the gap * between the head and tail we won't necessarily have a valid * segment. */ restart: for (;;) { size = sizeof(block); blk = jblocks_next(suj_jblocks, size, &readsize); if (blk == 0) return; size = readsize; /* * Read 1MB at a time and scan for records within this block. */ if (pread(fsreadfd, &block, size, dbtob(blk)) != size) { err_suj("Error reading journal block %jd\n", (intmax_t)blk); } for (rec = (void *)block; size; size -= recsize, rec = (struct jsegrec *)((uintptr_t)rec + recsize)) { recsize = real_dev_bsize; if (rec->jsr_time != fs->fs_mtime) { #ifdef notdef if (debug) printf("Rec time %jd != fs mtime %jd\n", rec->jsr_time, fs->fs_mtime); #endif jblocks_advance(suj_jblocks, recsize); continue; } if (rec->jsr_cnt == 0) { if (debug) printf("Found illegal count %d\n", rec->jsr_cnt); jblocks_advance(suj_jblocks, recsize); continue; } blocks = rec->jsr_blocks; recsize = blocks * real_dev_bsize; if (recsize > size) { /* * We may just have run out of buffer, restart * the loop to re-read from this spot. */ if (size < fs->fs_bsize && size != readsize && recsize <= fs->fs_bsize) goto restart; if (debug) - printf("Found invalid segsize %d > %d\n", - recsize, size); + printf("Found invalid segsize " + "%d > %d\n", recsize, size); recsize = real_dev_bsize; jblocks_advance(suj_jblocks, recsize); continue; } /* * Verify that all blocks in the segment are present. */ for (i = 1; i < blocks; i++) { recn = (void *)((uintptr_t)rec) + i * real_dev_bsize; if (recn->jsr_seq == rec->jsr_seq && recn->jsr_time == rec->jsr_time) continue; if (debug) printf("Incomplete record %jd (%d)\n", rec->jsr_seq, i); recsize = i * real_dev_bsize; jblocks_advance(suj_jblocks, recsize); goto restart; } seg = errmalloc(sizeof(*seg)); seg->ss_blk = errmalloc(recsize); seg->ss_rec = *rec; bcopy((void *)rec, seg->ss_blk, recsize); if (rec->jsr_oldest > oldseq) oldseq = rec->jsr_oldest; TAILQ_INSERT_TAIL(&allsegs, seg, ss_next); jblocks_advance(suj_jblocks, recsize); } } } /* * Orchestrate the verification of a filesystem via the softupdates journal. */ int suj_check(const char *filesys) { struct inodesc idesc; struct csum *cgsum; union dinode *dp, *jip; struct inode ip; uint64_t blocks; int i, retval; struct suj_seg *seg; struct suj_seg *segn; initsuj(); fs = &sblock; if (real_dev_bsize == 0 && ioctl(fsreadfd, DIOCGSECTORSIZE, &real_dev_bsize) == -1) real_dev_bsize = secsize; if (debug) printf("dev_bsize %u\n", real_dev_bsize); /* * Set an exit point when SUJ check failed */ retval = setjmp(jmpbuf); if (retval != 0) { pwarn("UNEXPECTED SU+J INCONSISTENCY\n"); TAILQ_FOREACH_SAFE(seg, &allsegs, ss_next, segn) { TAILQ_REMOVE(&allsegs, seg, ss_next); free(seg->ss_blk); free(seg); } if (reply("FALLBACK TO FULL FSCK") == 0) { ckfini(0); exit(EEXIT); } else return (-1); } /* * Search the root directory for the SUJ_FILE. */ idesc.id_type = DATA; idesc.id_fix = IGNORE; idesc.id_number = UFS_ROOTINO; idesc.id_func = findino; idesc.id_name = SUJ_FILE; ginode(UFS_ROOTINO, &ip); dp = ip.i_dp; if ((DIP(dp, di_mode) & IFMT) != IFDIR) { irelse(&ip); err_suj("root inode is not a directory\n"); } if (DIP(dp, di_size) < 0 || DIP(dp, di_size) > MAXDIRSIZE) { irelse(&ip); err_suj("negative or oversized root directory %jd\n", (uintmax_t)DIP(dp, di_size)); } if ((ckinode(dp, &idesc) & FOUND) == FOUND) { sujino = idesc.id_parent; irelse(&ip); } else { printf("Journal inode removed. Use tunefs to re-create.\n"); sblock.fs_flags &= ~FS_SUJ; sblock.fs_sujfree = 0; irelse(&ip); return (-1); } /* * Fetch the journal inode and verify it. */ ginode(sujino, &ip); jip = ip.i_dp; printf("** SU+J Recovering %s\n", filesys); if (suj_verifyino(jip) != 0 || (!preen && !reply("USE JOURNAL"))) { irelse(&ip); return (-1); } /* * Build a list of journal blocks in jblocks before parsing the * available journal blocks in with suj_read(). */ printf("** Reading %jd byte journal from inode %ju.\n", DIP(jip, di_size), (uintmax_t)sujino); suj_jblocks = jblocks_create(); blocks = ino_visit(jip, sujino, suj_add_block, 0); if (blocks != numfrags(fs, DIP(jip, di_size))) { printf("Sparse journal inode %ju.\n", (uintmax_t)sujino); irelse(&ip); return (-1); } irelse(&ip); suj_read(); jblocks_destroy(suj_jblocks); suj_jblocks = NULL; if (preen || reply("RECOVER")) { printf("** Building recovery table.\n"); suj_prune(); suj_build(); cg_apply(cg_build); printf("** Resolving unreferenced inode list.\n"); ino_unlinked(); printf("** Processing journal entries.\n"); cg_apply(cg_trunc); cg_apply(cg_check_blk); cg_apply(cg_adj_blk); cg_apply(cg_check_ino); } - if (preen == 0 && (jrecs > 0 || jbytes > 0) && reply("WRITE CHANGES") == 0) + if (preen == 0 && (jrecs > 0 || jbytes > 0) && + reply("WRITE CHANGES") == 0) return (0); /* * Check block counts of snapshot inodes and * make copies of any needed snapshot blocks. */ for (i = 0; i < snapcnt; i++) check_blkcnt(&snaplist[i]); snapflush(suj_checkblkavail); /* * Recompute the fs summary info from correct cs summaries. */ bzero(&fs->fs_cstotal, sizeof(struct csum_total)); for (i = 0; i < fs->fs_ncg; i++) { cgsum = &fs->fs_cs(fs, i); fs->fs_cstotal.cs_nffree += cgsum->cs_nffree; fs->fs_cstotal.cs_nbfree += cgsum->cs_nbfree; fs->fs_cstotal.cs_nifree += cgsum->cs_nifree; fs->fs_cstotal.cs_ndir += cgsum->cs_ndir; } fs->fs_pendinginodes = 0; fs->fs_pendingblocks = 0; fs->fs_clean = 1; fs->fs_time = time(NULL); fs->fs_mtime = time(NULL); sbdirty(); ckfini(1); if (jrecs > 0 || jbytes > 0) { - printf("** %jd journal records in %jd bytes for %.2f%% utilization\n", - jrecs, jbytes, ((float)jrecs / (float)(jbytes / JREC_SIZE)) * 100); - printf("** Freed %jd inodes (%jd dirs) %jd blocks, and %jd frags.\n", - freeinos, freedir, freeblocks, freefrags); + printf("** %jd journal records in %jd bytes for %.2f%% " + "utilization\n", jrecs, jbytes, + ((float)jrecs / (float)(jbytes / JREC_SIZE)) * 100); + printf("** Freed %jd inodes (%jd dirs) %jd blocks, and %jd " + "frags.\n", freeinos, freedir, freeblocks, freefrags); } return (0); } static void initsuj(void) { int i; for (i = 0; i < HASHSIZE; i++) LIST_INIT(&cghash[i]); lastcg = NULL; TAILQ_INIT(&allsegs); oldseq = 0; fs = NULL; sujino = 0; freefrags = 0; freeblocks = 0; freeinos = 0; freedir = 0; jbytes = 0; jrecs = 0; suj_jblocks = NULL; }