diff --git a/lib/libufs/Makefile b/lib/libufs/Makefile index e37a285bb509..42d76c6061d2 100644 --- a/lib/libufs/Makefile +++ b/lib/libufs/Makefile @@ -1,39 +1,39 @@ PACKAGE= ufs LIB= ufs SHLIBDIR?= /lib -SHLIB_MAJOR= 7 +SHLIB_MAJOR= 8 SRCS= block.c cgroup.c gsb_crc32.c inode.c sblock.c type.c ffs_subr.c SRCS+= ffs_tables.c INCS= libufs.h MAN= bread.3 cgread.3 getinode.3 libufs.3 sbread.3 ufs_disk_close.3 MLINKS+= bread.3 bwrite.3 MLINKS+= bread.3 berase.3 MLINKS+= cgread.3 cgread1.3 MLINKS+= cgread.3 cgget.3 MLINKS+= cgread.3 cgwrite.3 MLINKS+= cgread.3 cgwrite1.3 MLINKS+= cgread.3 cgput.3 MLINKS+= getinode.3 putinode.3 MLINKS+= sbread.3 sbwrite.3 MLINKS+= sbread.3 sbget.3 MLINKS+= sbread.3 sbsearch.3 MLINKS+= sbread.3 sbfind.3 MLINKS+= sbread.3 sbput.3 MLINKS+= ufs_disk_close.3 ufs_disk_fillout.3 MLINKS+= ufs_disk_close.3 ufs_disk_fillout_blank.3 MLINKS+= ufs_disk_close.3 ufs_disk_write.3 .PATH: ${SRCTOP}/sys/libkern ${SRCTOP}/sys/ufs/ffs WARNS?= 2 CFLAGS+= -D_LIBUFS .if defined(LIBUFS_DEBUG) CFLAGS+= -D_LIBUFS_DEBUGGING .endif CFLAGS+= -I${.CURDIR} .include diff --git a/lib/libufs/block.c b/lib/libufs/block.c index 20002fc4df1b..e15216aefa8d 100644 --- a/lib/libufs/block.c +++ b/lib/libufs/block.c @@ -1,203 +1,184 @@ /*- * SPDX-License-Identifier: BSD-2-Clause * * Copyright (c) 2002 Juli Mallett. All rights reserved. * * This software was written by Juli Mallett for the * FreeBSD project. Redistribution and use in source and binary forms, with * or without modification, are permitted provided that the following * conditions are met: * * 1. Redistribution of source code must retain the above copyright notice, * this list of conditions and the following disclaimer. * 2. Redistribution in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE * DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE * POSSIBILITY OF SUCH DAMAGE. */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include ssize_t bread(struct uufsd *disk, ufs2_daddr_t blockno, void *data, size_t size) { void *p2; ssize_t cnt; ERROR(disk, NULL); - p2 = data; - /* - * XXX: various disk controllers require alignment of our buffer - * XXX: which is stricter than struct alignment. - * XXX: Bounce the buffer if not 64 byte aligned. - * XXX: this can be removed if/when the kernel is fixed - */ - if (((intptr_t)data) & 0x3f) { - p2 = malloc(size); - if (p2 == NULL) { - ERROR(disk, "allocate bounce buffer"); - goto fail; - } + BUF_MALLOC(&p2, data, size); + if (p2 == NULL) { + ERROR(disk, "allocate bounce buffer"); + goto fail; } cnt = pread(disk->d_fd, p2, size, (off_t)(blockno * disk->d_bsize)); if (cnt == -1) { ERROR(disk, "read error from block device"); goto fail; } if (cnt == 0) { ERROR(disk, "end of file from block device"); goto fail; } if ((size_t)cnt != size) { ERROR(disk, "short read or read error from block device"); goto fail; } if (p2 != data) { memcpy(data, p2, size); free(p2); } return (cnt); fail: memset(data, 0, size); if (p2 != data) { free(p2); } return (-1); } ssize_t bwrite(struct uufsd *disk, ufs2_daddr_t blockno, const void *data, size_t size) { ssize_t cnt; int rv; - void *p2 = NULL; + void *p2; ERROR(disk, NULL); rv = ufs_disk_write(disk); if (rv == -1) { ERROR(disk, "failed to open disk for writing"); return (-1); } - - /* - * XXX: various disk controllers require alignment of our buffer - * XXX: which is stricter than struct alignment. - * XXX: Bounce the buffer if not 64 byte aligned. - * XXX: this can be removed if/when the kernel is fixed - */ - if (((intptr_t)data) & 0x3f) { - p2 = malloc(size); - if (p2 == NULL) { - ERROR(disk, "allocate bounce buffer"); - return (-1); - } - memcpy(p2, data, size); - data = p2; + BUF_MALLOC(&p2, data, size); + if (p2 == NULL) { + ERROR(disk, "allocate bounce buffer"); + return (-1); } - cnt = pwrite(disk->d_fd, data, size, (off_t)(blockno * disk->d_bsize)); - if (p2 != NULL) + if (p2 != data) + memcpy(p2, data, size); + cnt = pwrite(disk->d_fd, p2, size, (off_t)(blockno * disk->d_bsize)); + if (p2 != data) free(p2); if (cnt == -1) { ERROR(disk, "write error to block device"); return (-1); } if ((size_t)cnt != size) { ERROR(disk, "short write to block device"); return (-1); } - return (cnt); } #ifdef __FreeBSD_kernel__ static int berase_helper(struct uufsd *disk, ufs2_daddr_t blockno, ufs2_daddr_t size) { off_t ioarg[2]; ioarg[0] = blockno * disk->d_bsize; ioarg[1] = size; return (ioctl(disk->d_fd, DIOCGDELETE, ioarg)); } #else static int berase_helper(struct uufsd *disk, ufs2_daddr_t blockno, ufs2_daddr_t size) { char *zero_chunk; off_t offset, zero_chunk_size, pwrite_size; int rv; offset = blockno * disk->d_bsize; zero_chunk_size = 65536 * disk->d_bsize; zero_chunk = calloc(1, zero_chunk_size); if (zero_chunk == NULL) { ERROR(disk, "failed to allocate memory"); return (-1); } while (size > 0) { pwrite_size = size; if (pwrite_size > zero_chunk_size) pwrite_size = zero_chunk_size; rv = pwrite(disk->d_fd, zero_chunk, pwrite_size, offset); if (rv == -1) { ERROR(disk, "failed writing to disk"); break; } size -= rv; offset += rv; rv = 0; } free(zero_chunk); return (rv); } #endif int berase(struct uufsd *disk, ufs2_daddr_t blockno, ufs2_daddr_t size) { int rv; ERROR(disk, NULL); rv = ufs_disk_write(disk); if (rv == -1) { ERROR(disk, "failed to open disk for writing"); return(rv); } return (berase_helper(disk, blockno, size)); } diff --git a/lib/libufs/inode.c b/lib/libufs/inode.c index fe34fd45b815..3865e50591de 100644 --- a/lib/libufs/inode.c +++ b/lib/libufs/inode.c @@ -1,120 +1,108 @@ /*- * SPDX-License-Identifier: BSD-2-Clause * * Copyright (c) 2002 Juli Mallett. All rights reserved. * * This software was written by Juli Mallett for the * FreeBSD project. Redistribution and use in source and binary forms, with * or without modification, are permitted provided that the following * conditions are met: * * 1. Redistribution of source code must retain the above copyright notice, * this list of conditions and the following disclaimer. * 2. Redistribution in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE * DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE * POSSIBILITY OF SUCH DAMAGE. */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include int getinode(struct uufsd *disk, union dinodep *dp, ino_t inum) { ino_t min, max; caddr_t inoblock; struct fs *fs; ERROR(disk, NULL); fs = &disk->d_fs; if (inum >= (ino_t)fs->fs_ipg * fs->fs_ncg) { ERROR(disk, "inode number out of range"); return (-1); } - inoblock = disk->d_inoblock; + inoblock = (caddr_t)&disk->d_inos[0]; min = disk->d_inomin; max = disk->d_inomax; - if (inoblock == NULL) { - inoblock = malloc(fs->fs_bsize); - if (inoblock == NULL) { - ERROR(disk, "unable to allocate inode block"); - return (-1); - } - disk->d_inoblock = inoblock; - } if (inum >= min && inum < max) goto gotit; bread(disk, fsbtodb(fs, ino_to_fsba(fs, inum)), inoblock, fs->fs_bsize); disk->d_inomin = min = inum - (inum % INOPB(fs)); disk->d_inomax = max = min + INOPB(fs); gotit: switch (disk->d_ufs) { case 1: disk->d_dp.dp1 = &((struct ufs1_dinode *)inoblock)[inum - min]; if (dp != NULL) *dp = disk->d_dp; return (0); case 2: disk->d_dp.dp2 = &((struct ufs2_dinode *)inoblock)[inum - min]; if (dp != NULL) *dp = disk->d_dp; if (ffs_verify_dinode_ckhash(fs, disk->d_dp.dp2) == 0) return (0); ERROR(disk, "check-hash failed for inode read from disk"); return (-1); default: break; } ERROR(disk, "unknown UFS filesystem type"); return (-1); } int putinode(struct uufsd *disk) { struct fs *fs; fs = &disk->d_fs; - if (disk->d_inoblock == NULL) { - ERROR(disk, "No inode block allocated"); - return (-1); - } if (disk->d_ufs == 2) ffs_update_dinode_ckhash(fs, disk->d_dp.dp2); if (bwrite(disk, fsbtodb(fs, ino_to_fsba(&disk->d_fs, disk->d_inomin)), - disk->d_inoblock, disk->d_fs.fs_bsize) <= 0) + (caddr_t)&disk->d_inos[0], disk->d_fs.fs_bsize) <= 0) return (-1); return (0); } diff --git a/lib/libufs/libufs.h b/lib/libufs/libufs.h index 4c6242e9daef..45ac97f43c06 100644 --- a/lib/libufs/libufs.h +++ b/lib/libufs/libufs.h @@ -1,174 +1,193 @@ /*- * SPDX-License-Identifier: BSD-2-Clause * * Copyright (c) 2002 Juli Mallett. All rights reserved. * * This software was written by Juli Mallett for the * FreeBSD project. Redistribution and use in source and binary forms, with * or without modification, are permitted provided that the following * conditions are met: * * 1. Redistribution of source code must retain the above copyright notice, * this list of conditions and the following disclaimer. * 2. Redistribution in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE * DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE * POSSIBILITY OF SUCH DAMAGE. */ #ifndef __LIBUFS_H__ #define __LIBUFS_H__ +/* + * Various disk controllers require their buffers to be aligned to the size + * of a cache line. The LIBUFS_BUFALIGN defines the required alignment size. + * The alignment must be a power of 2. + */ +#define LIBUFS_BUFALIGN 128 + /* * libufs structures. */ union dinodep { struct ufs1_dinode *dp1; struct ufs2_dinode *dp2; }; /* * userland ufs disk. */ struct uufsd { - const char *d_name; /* disk name */ - int d_ufs; /* decimal UFS version */ - int d_fd; /* raw device file descriptor */ - long d_bsize; /* device bsize */ - ufs2_daddr_t d_sblock; /* superblock location */ - struct fs_summary_info *d_si; /* Superblock summary info */ - caddr_t d_inoblock; /* inode block */ - uint32_t d_inomin; /* low ino, not ino_t for ABI compat */ - uint32_t d_inomax; /* high ino, not ino_t for ABI compat */ - union dinodep d_dp; /* pointer to currently active inode */ union { struct fs d_fs; /* filesystem information */ - char d_sb[MAXBSIZE]; /* superblock as buffer */ - } d_sbunion; + char d_sb[SBLOCKSIZE]; /* superblock as buffer */ + } d_sbunion __aligned(LIBUFS_BUFALIGN); union { struct cg d_cg; /* cylinder group */ char d_buf[MAXBSIZE]; /* cylinder group storage */ - } d_cgunion; - int d_ccg; /* current cylinder group */ - int d_lcg; /* last cylinder group (in d_cg) */ + } d_cgunion __aligned(LIBUFS_BUFALIGN); + union { + union dinodep d_ino[1]; /* inode block */ + char d_inos[MAXBSIZE]; /* inode block as buffer */ + } d_inosunion __aligned(LIBUFS_BUFALIGN); + const char *d_name; /* disk name */ const char *d_error; /* human readable disk error */ + ufs2_daddr_t d_sblock; /* superblock location */ + struct fs_summary_info *d_si; /* Superblock summary info */ + union dinodep d_dp; /* pointer to currently active inode */ + ino_t d_inomin; /* low ino */ + ino_t d_inomax; /* high ino */ off_t d_sblockloc; /* where to look for the superblock */ - int d_lookupflags; /* flags to superblock lookup */ - int d_mine; /* internal flags */ + int64_t d_bsize; /* device bsize */ + int64_t d_lookupflags; /* flags to superblock lookup */ + int64_t d_mine; /* internal flags */ + int32_t d_ccg; /* current cylinder group */ + int32_t d_ufs; /* decimal UFS version */ + int32_t d_fd; /* raw device file descriptor */ + int32_t d_lcg; /* last cylinder group (in d_cg) */ +}; +#define d_inos d_inosunion.d_inos #define d_fs d_sbunion.d_fs -#define d_sb d_sbunion.d_sb #define d_cg d_cgunion.d_cg -}; /* * libufs macros (internal, non-exported). */ #ifdef _LIBUFS +/* + * Ensure that the buffer is aligned to the I/O subsystem requirements. + */ +#define BUF_MALLOC(newbufpp, data, size) { \ + if (data != NULL && (((intptr_t)data) & (LIBUFS_BUFALIGN - 1)) == 0) \ + *newbufpp = (void *)data; \ + else \ + *newbufpp = aligned_alloc(LIBUFS_BUFALIGN, size); \ +} /* * Trace steps through libufs, to be used at entry and erroneous return. */ static inline void ERROR(struct uufsd *u, const char *str) { #ifdef _LIBUFS_DEBUGGING if (str != NULL) { fprintf(stderr, "libufs: %s", str); if (errno != 0) fprintf(stderr, ": %s", strerror(errno)); fprintf(stderr, "\n"); } #endif if (u != NULL) u->d_error = str; } #endif /* _LIBUFS */ __BEGIN_DECLS /* * libufs prototypes. */ /* * ffs_subr.c */ void ffs_clrblock(struct fs *, u_char *, ufs1_daddr_t); void ffs_clusteracct(struct fs *, struct cg *, ufs1_daddr_t, int); void ffs_fragacct(struct fs *, int, int32_t [], int); int ffs_isblock(struct fs *, u_char *, ufs1_daddr_t); int ffs_isfreeblock(struct fs *, u_char *, ufs1_daddr_t); int ffs_sbsearch(void *, struct fs **, int, char *, int (*)(void *, off_t, void **, int)); void ffs_setblock(struct fs *, u_char *, ufs1_daddr_t); int ffs_sbget(void *, struct fs **, off_t, int, char *, int (*)(void *, off_t, void **, int)); int ffs_sbput(void *, struct fs *, off_t, int (*)(void *, off_t, void *, int)); void ffs_update_dinode_ckhash(struct fs *, struct ufs2_dinode *); int ffs_verify_dinode_ckhash(struct fs *, struct ufs2_dinode *); /* * block.c */ ssize_t bread(struct uufsd *, ufs2_daddr_t, void *, size_t); ssize_t bwrite(struct uufsd *, ufs2_daddr_t, const void *, size_t); int berase(struct uufsd *, ufs2_daddr_t, ufs2_daddr_t); /* * cgroup.c */ ufs2_daddr_t cgballoc(struct uufsd *); int cgbfree(struct uufsd *, ufs2_daddr_t, long); ino_t cgialloc(struct uufsd *); int cgget(int, struct fs *, int, struct cg *); int cgput(int, struct fs *, struct cg *); int cgread(struct uufsd *); int cgread1(struct uufsd *, int); int cgwrite(struct uufsd *); int cgwrite1(struct uufsd *, int); /* * inode.c */ int getinode(struct uufsd *, union dinodep *, ino_t); int putinode(struct uufsd *); /* * sblock.c */ int sbread(struct uufsd *); int sbfind(struct uufsd *, int); int sbwrite(struct uufsd *, int); /* low level superblock read/write functions */ int sbget(int, struct fs **, off_t, int); int sbsearch(int, struct fs **, int); int sbput(int, struct fs *, int); /* * type.c */ int ufs_disk_close(struct uufsd *); int ufs_disk_fillout(struct uufsd *, const char *); int ufs_disk_fillout_blank(struct uufsd *, const char *); int ufs_disk_write(struct uufsd *); /* * crc32c.c */ uint32_t calculate_crc32c(uint32_t, const void *, size_t); __END_DECLS #endif /* __LIBUFS_H__ */ diff --git a/lib/libufs/sblock.c b/lib/libufs/sblock.c index 9f1d8a2485bd..59cd44de04ab 100644 --- a/lib/libufs/sblock.c +++ b/lib/libufs/sblock.c @@ -1,291 +1,292 @@ /*- * SPDX-License-Identifier: BSD-2-Clause * * Copyright (c) 2002 Juli Mallett. All rights reserved. * * This software was written by Juli Mallett for the * FreeBSD project. Redistribution and use in source and binary forms, with * or without modification, are permitted provided that the following * conditions are met: * * 1. Redistribution of source code must retain the above copyright notice, * this list of conditions and the following disclaimer. * 2. Redistribution in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE * DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE * POSSIBILITY OF SUCH DAMAGE. */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include static int handle_disk_read(struct uufsd *, struct fs *, int); /* * Read the standard superblock. * * The following option flags can be or'ed into disk->d_lookupflags: * * UFS_NOMSG indicates that superblock inconsistency error messages * should not be printed. * * UFS_NOCSUM causes only the superblock itself to be returned, but does * not read in any auxillary data structures like the cylinder group * summary information. */ int sbread(struct uufsd *disk) { struct fs *fs; int error; error = sbget(disk->d_fd, &fs, disk->d_sblockloc, disk->d_lookupflags); return (handle_disk_read(disk, fs, error)); } /* * Make an extensive search to find a superblock. If the superblock * in the standard place cannot be used, try looking for one of the * backup superblocks. * * The flags parameter is made up of the following or'ed together options: * * UFS_NOMSG indicates that superblock inconsistency error messages * should not be printed. * * UFS_NOCSUM causes only the superblock itself to be returned, but does * not read in any auxillary data structures like the cylinder group * summary information. */ int sbfind(struct uufsd *disk, int flags) { struct fs *fs; int error; error = sbsearch(disk->d_fd, &fs, flags); return (handle_disk_read(disk, fs, error)); } static int handle_disk_read(struct uufsd *disk, struct fs *fs, int error) { ERROR(disk, NULL); if (error != 0) { switch (error) { case EIO: ERROR(disk, "non-existent or truncated superblock"); break; case ENOENT: ERROR(disk, "no usable known superblock found"); break; case EINTEGRITY: ERROR(disk, "superblock check-hash failure"); break; case ENOSPC: ERROR(disk, "failed to allocate space for superblock " "information"); break; case EINVAL: ERROR(disk, "The previous newfs operation on this " "volume did not complete.\nYou must complete " "newfs before using this volume."); break; default: ERROR(disk, "unknown superblock read error"); errno = EIO; break; } disk->d_ufs = 0; return (-1); } memcpy(&disk->d_fs, fs, fs->fs_sbsize); free(fs); fs = &disk->d_fs; if (fs->fs_magic == FS_UFS1_MAGIC) disk->d_ufs = 1; if (fs->fs_magic == FS_UFS2_MAGIC) disk->d_ufs = 2; disk->d_bsize = fs->fs_fsize / fsbtodb(fs, 1); disk->d_sblock = fs->fs_sblockloc / disk->d_bsize; disk->d_si = fs->fs_si; return (0); } int sbwrite(struct uufsd *disk, int all) { struct fs *fs; int rv; ERROR(disk, NULL); rv = ufs_disk_write(disk); if (rv == -1) { ERROR(disk, "failed to open disk for writing"); return (-1); } fs = &disk->d_fs; if ((errno = sbput(disk->d_fd, fs, all ? fs->fs_ncg : 0)) != 0) { switch (errno) { case EIO: ERROR(disk, "failed to write superblock"); break; default: ERROR(disk, "unknown superblock write error"); errno = EIO; break; } return (-1); } return (0); } /* * These are the low-level functions that actually read and write * the superblock and its associated data. The actual work is done by * the functions ffs_sbget and ffs_sbput in /sys/ufs/ffs/ffs_subr.c. */ static int use_pread(void *devfd, off_t loc, void **bufp, int size); static int use_pwrite(void *devfd, off_t loc, void *buf, int size); /* * The following two functions read a superblock. Their flags * parameter are made up of the following or'ed together options: * * UFS_NOMSG indicates that superblock inconsistency error messages * should not be printed. * * UFS_NOCSUM causes only the superblock itself to be returned, but does * not read in any auxillary data structures like the cylinder group * summary information. * * Read a superblock from the devfd device allocating memory returned * in fsp. */ int sbget(int devfd, struct fs **fsp, off_t sblockloc, int flags) { int error; error = ffs_sbget(&devfd, fsp, sblockloc, flags, "user", use_pread); fflush(NULL); /* flush any messages */ return (error); } /* * Make an extensive search of the devfd device to find a superblock. * If the superblock in the standard place cannot be used, try looking * for one of the backup superblocks. If found, memory is allocated and * returned in fsp. */ int sbsearch(int devfd, struct fs **fsp, int flags) { int error; error = ffs_sbsearch(&devfd, fsp, flags, "user", use_pread); fflush(NULL); /* flush any messages */ return (error); } /* * A read function for use by user-level programs using libufs. */ static int use_pread(void *devfd, off_t loc, void **bufp, int size) { int fd; fd = *(int *)devfd; - if ((*bufp = malloc(size)) == NULL) + BUF_MALLOC(bufp, NULL, size); + if (*bufp == NULL) return (ENOSPC); if (pread(fd, *bufp, size, loc) != size) return (EIO); return (0); } /* * Write a superblock to the devfd device from the memory pointed to by fs. * Also write out the superblock summary information but do not free the * summary information memory. * * Additionally write out numaltwrite of the alternate superblocks. Use * fs->fs_ncg to write out all of the alternate superblocks. */ int sbput(int devfd, struct fs *fs, int numaltwrite) { struct csum *savedcsp; off_t savedactualloc; int i, error; error = ffs_sbput(&devfd, fs, fs->fs_sblockactualloc, use_pwrite); fflush(NULL); /* flush any messages */ if (error != 0 || numaltwrite == 0) return (error); savedactualloc = fs->fs_sblockactualloc; if (fs->fs_si != NULL) { savedcsp = fs->fs_csp; fs->fs_csp = NULL; } for (i = 0; i < numaltwrite; i++) { fs->fs_sblockactualloc = dbtob(fsbtodb(fs, cgsblock(fs, i))); if ((error = ffs_sbput(&devfd, fs, fs->fs_sblockactualloc, use_pwrite)) != 0) { fflush(NULL); /* flush any messages */ fs->fs_sblockactualloc = savedactualloc; fs->fs_csp = savedcsp; return (error); } } fs->fs_sblockactualloc = savedactualloc; if (fs->fs_si != NULL) fs->fs_csp = savedcsp; fflush(NULL); /* flush any messages */ return (0); } /* * A write function for use by user-level programs using sbput in libufs. */ static int use_pwrite(void *devfd, off_t loc, void *buf, int size) { int fd; fd = *(int *)devfd; if (pwrite(fd, buf, size, loc) != size) return (EIO); return (0); } diff --git a/lib/libufs/type.c b/lib/libufs/type.c index 1d0c4c0200eb..99aec0a57e9a 100644 --- a/lib/libufs/type.c +++ b/lib/libufs/type.c @@ -1,206 +1,208 @@ /*- * SPDX-License-Identifier: BSD-2-Clause * * Copyright (c) 2002 Juli Mallett. All rights reserved. * * This software was written by Juli Mallett for the * FreeBSD project. Redistribution and use in source and binary forms, with * or without modification, are permitted provided that the following * conditions are met: * * 1. Redistribution of source code must retain the above copyright notice, * this list of conditions and the following disclaimer. * 2. Redistribution in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE * DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE * POSSIBILITY OF SUCH DAMAGE. */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include /* Internally, track the 'name' value, it's ours. */ #define MINE_NAME 0x01 /* Track if its fd points to a writable device. */ #define MINE_WRITE 0x02 int ufs_disk_close(struct uufsd *disk) { ERROR(disk, NULL); close(disk->d_fd); disk->d_fd = -1; - if (disk->d_inoblock != NULL) { - free(disk->d_inoblock); - disk->d_inoblock = NULL; - } if (disk->d_mine & MINE_NAME) { free((char *)(uintptr_t)disk->d_name); disk->d_name = NULL; } if (disk->d_si != NULL) { free(disk->d_si->si_csp); free(disk->d_si); disk->d_si = NULL; } return (0); } int ufs_disk_fillout(struct uufsd *disk, const char *name) { if (ufs_disk_fillout_blank(disk, name) == -1) { return (-1); } if (sbread(disk) == -1) { ERROR(disk, "could not read superblock to fill out disk"); ufs_disk_close(disk); return (-1); } return (0); } int ufs_disk_fillout_blank(struct uufsd *disk, const char *name) { struct stat st; struct fstab *fs; struct statfs sfs; const char *oname; char dev[MAXPATHLEN]; int fd, ret; ERROR(disk, NULL); oname = name; again: if ((ret = stat(name, &st)) < 0) { if (*name != '/') { snprintf(dev, sizeof(dev), "%s%s", _PATH_DEV, name); name = dev; goto again; } /* * The given object doesn't exist, but don't panic just yet - * it may be still mount point listed in /etc/fstab, but without * existing corresponding directory. */ name = oname; } if (ret >= 0 && S_ISREG(st.st_mode)) { /* Possibly a disk image, give it a try. */ ; } else if (ret >= 0 && S_ISCHR(st.st_mode)) { /* This is what we need, do nothing. */ ; } else if ((fs = getfsfile(name)) != NULL) { /* * The given mount point is listed in /etc/fstab. * It is possible that someone unmounted file system by hand * and different file system is mounted on this mount point, * but we still prefer /etc/fstab entry, because on the other * hand, there could be /etc/fstab entry for this mount * point, but file system is not mounted yet (eg. noauto) and * statfs(2) will point us at different file system. */ name = fs->fs_spec; } else if (ret >= 0 && S_ISDIR(st.st_mode)) { /* * The mount point is not listed in /etc/fstab, so it may be * file system mounted by hand. */ if (statfs(name, &sfs) < 0) { ERROR(disk, "could not find special device"); return (-1); } strlcpy(dev, sfs.f_mntfromname, sizeof(dev)); name = dev; } else { ERROR(disk, "could not find special device"); return (-1); } fd = open(name, O_RDONLY); if (fd == -1) { ERROR(disk, "could not open special device"); return (-1); } + if (((uintptr_t)disk & ~(LIBUFS_BUFALIGN - 1)) != (uintptr_t)disk) { + ERROR(disk, "uufsd structure must be aligned to " + "LIBUFS_BUFALIGN byte boundry, see ufs_disk_fillout(3)"); + close(fd); + return (-1); + } + disk->d_bsize = 1; disk->d_ccg = 0; disk->d_fd = fd; - disk->d_inoblock = NULL; disk->d_inomin = 0; disk->d_inomax = 0; disk->d_lcg = 0; disk->d_mine = 0; disk->d_ufs = 0; disk->d_error = NULL; disk->d_si = NULL; disk->d_sblockloc = UFS_STDSB; disk->d_lookupflags = 0; if (oname != name) { name = strdup(name); if (name == NULL) { ERROR(disk, "could not allocate memory for disk name"); return (-1); } disk->d_mine |= MINE_NAME; } disk->d_name = name; return (0); } int ufs_disk_write(struct uufsd *disk) { int fd; ERROR(disk, NULL); if (disk->d_mine & MINE_WRITE) return (0); fd = open(disk->d_name, O_RDWR); if (fd < 0) { ERROR(disk, "failed to open disk for writing"); return (-1); } close(disk->d_fd); disk->d_fd = fd; disk->d_mine |= MINE_WRITE; return (0); } diff --git a/lib/libufs/ufs_disk_close.3 b/lib/libufs/ufs_disk_close.3 index d0d93d05838c..f332a9bb5de9 100644 --- a/lib/libufs/ufs_disk_close.3 +++ b/lib/libufs/ufs_disk_close.3 @@ -1,113 +1,120 @@ .\" Author: Juli Mallett .\" Date: June 04, 2003 .\" Description: .\" Manual page for libufs functions: .\" ufs_disk_close(3) .\" ufs_disk_fillout(3) .\" ufs_disk_fillout_blank(3) .\" ufs_disk_write(3) .\" .\" This file is in the public domain. .\" -.Dd June 4, 2003 +.Dd November 17, 2023 .Dt UFS_DISK_CLOSE 3 .Os .Sh NAME .Nm ufs_disk_close , .Nm ufs_disk_fillout , .Nm ufs_disk_fillout_blank , .Nm ufs_disk_write .Nd open and close userland UFS disks .Sh LIBRARY .Lb libufs .Sh SYNOPSIS .In sys/param.h .In sys/mount.h .In ufs/ufs/ufsmount.h .In ufs/ufs/dinode.h .In ufs/ffs/fs.h .In libufs.h .Ft int .Fn ufs_disk_close "struct uufsd *disk" .Ft int .Fn ufs_disk_fillout "struct uufsd *disk" "const char *name" .Ft int .Fn ufs_disk_fillout_blank "struct uufsd *disk" "const char *name" .Ft int .Fn ufs_disk_write "struct uufsd *disk" .Sh DESCRIPTION The .Fn ufs_disk_close function closes a disk and frees internal memory related to it. It does not free the .Fa disk structure. .Pp The .Fn ufs_disk_fillout and .Fn ufs_disk_fillout_blank functions open a disk specified by .Fa name and populate the structure pointed to by .Fa disk . +The structure referenced by the +.Fa disk +pointer must be aligned to at least the alignment specified by +.Dv LIBUFS_ALIGN +that is defined in the +.Lb libufs.h +header file. The disk is opened read-only. The specified .Fa name may be either a mountpoint, a device name or a filesystem image. The .Fn ufs_disk_fillout function assumes there is a valid superblock and will fail if not, whereas the .Fn ufs_disk_fillout_blank function makes no assumptions of that sort. .Pp The .Fn ufs_disk_write function attempts to re-open a disk as writable if it is not currently. .Sh ERRORS The function .Fn ufs_disk_close has no failure points. .Pp The function .Fn ufs_disk_fillout may fail for any of the reasons .Fn ufs_disk_fillout_blank might, as well as for any reason .Xr sbread 3 might. .Pp The .Fn ufs_disk_fillout_blank may fail and set .Va errno for any of the errors specified for the library functions .Xr open 2 , .Xr strdup 3 . Additionally, it may follow the .Xr libufs 3 error methodologies in situations where no device could be found to open. .Pp The function .Fn ufs_disk_write may fail and set .Va errno for any of the errors specified for the library functions .Xr open 2 and .Xr stat 2 . Namely, it will fail if the disk in question may not be written to. .Sh SEE ALSO .Xr open 2 , .Xr getfsfile 3 , .Xr libufs 3 , .Xr sbread 3 .Sh HISTORY These functions first appeared as part of .Xr libufs 3 in .Fx 5.0 . .Sh AUTHORS .An Juli Mallett Aq Mt jmallett@FreeBSD.org diff --git a/sbin/fsck_ffs/fsck.h b/sbin/fsck_ffs/fsck.h index 3b795783f39c..827336a77d67 100644 --- a/sbin/fsck_ffs/fsck.h +++ b/sbin/fsck_ffs/fsck.h @@ -1,542 +1,557 @@ /*- * SPDX-License-Identifier: BSD-3-Clause and BSD-2-Clause * * Copyright (c) 2002 Networks Associates Technology, Inc. * All rights reserved. * * This software was developed for the FreeBSD Project by Marshall * Kirk McKusick and Network Associates Laboratories, the Security * Research Division of Network Associates, Inc. under DARPA/SPAWAR * contract N66001-01-C-8035 ("CBOSS"), as part of the DARPA CHATS * research program. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * Copyright (c) 1980, 1986, 1993 * The Regents of the University of California. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * @(#)fsck.h 8.4 (Berkeley) 5/9/95 */ #ifndef _FSCK_H_ #define _FSCK_H_ #include #include #include +#include #include #define MAXDUP 10 /* limit on dup blks (per inode) */ #define MAXBAD 10 /* limit on bad blks (per inode) */ #define MINBUFS 100 /* minimum number of buffers required */ #define INOBUFSIZE 64*1024 /* size of buffer to read inodes in pass1 */ #define ZEROBUFSIZE (dev_bsize * 128) /* size of zero buffer used by -Z */ union dinode { struct ufs1_dinode dp1; struct ufs2_dinode dp2; }; #define DIP(dp, field) \ ((sblock.fs_magic == FS_UFS1_MAGIC) ? \ (dp)->dp1.field : (dp)->dp2.field) #define DIP_SET(dp, field, val) do { \ if (sblock.fs_magic == FS_UFS1_MAGIC) \ (dp)->dp1.field = (val); \ else \ (dp)->dp2.field = (val); \ } while (0) /* * Each inode on the file system is described by the following structure. * The linkcnt is initially set to the value in the inode. Each time it * is found during the descent in passes 2, 3, and 4 the count is * decremented. Any inodes whose count is non-zero after pass 4 needs to * have its link count adjusted by the value remaining in ino_linkcnt. */ struct inostat { u_char ino_state; /* state of inode, see below */ u_char ino_type:4; /* type of inode */ u_char ino_idtype:4; /* idesc id_type, SNAP or ADDR */ u_short ino_linkcnt; /* number of links not found */ }; /* * Inode states. */ #define USTATE 0x1 /* inode not allocated */ #define FSTATE 0x2 /* inode is file */ #define FZLINK 0x3 /* inode is file with a link count of zero */ #define DSTATE 0x4 /* inode is directory */ #define DZLINK 0x5 /* inode is directory with a zero link count */ #define DFOUND 0x6 /* directory found during descent */ /* 0x7 UNUSED - see S_IS_DVALID() definition */ #define DCLEAR 0x8 /* directory is to be cleared */ #define FCLEAR 0x9 /* file is to be cleared */ /* DUNFOUND === (state == DSTATE || state == DZLINK) */ #define S_IS_DUNFOUND(state) (((state) & ~0x1) == DSTATE) /* DVALID === (state == DSTATE || state == DZLINK || state == DFOUND) */ #define S_IS_DVALID(state) (((state) & ~0x3) == DSTATE) #define INO_IS_DUNFOUND(ino) S_IS_DUNFOUND(inoinfo(ino)->ino_state) #define INO_IS_DVALID(ino) S_IS_DVALID(inoinfo(ino)->ino_state) /* * Inode state information is contained on per cylinder group lists * which are described by the following structure. */ extern struct inostatlist { long il_numalloced; /* number of inodes allocated in this cg */ struct inostat *il_stat;/* inostat info for this cylinder group */ } *inostathead; /* * Structure to reference a dinode. */ struct inode { struct bufarea *i_bp; /* buffer containing the dinode */ union dinode *i_dp; /* pointer to dinode in buffer */ ino_t i_number; /* inode number */ }; /* * Size of hash tables */ #define HASHSIZE 2048 #define HASH(x) ((x * 2654435761) & (HASHSIZE - 1)) /* * buffer cache structure. */ struct bufarea { TAILQ_ENTRY(bufarea) b_list; /* LRU buffer queue */ LIST_ENTRY(bufarea) b_hash; /* hash list */ ufs2_daddr_t b_bno; /* disk block number */ int b_size; /* size of I/O */ int b_errs; /* I/O error */ int b_flags; /* B_ flags below */ int b_type; /* BT_ type below */ int b_refcnt; /* ref count of users */ int b_index; /* for BT_LEVEL, ptr index */ /* for BT_INODES, first inum */ union { char *b_buf; /* buffer space */ ufs1_daddr_t *b_indir1; /* UFS1 indirect block */ ufs2_daddr_t *b_indir2; /* UFS2 indirect block */ struct fs *b_fs; /* super block */ struct cg *b_cg; /* cylinder group */ struct ufs1_dinode *b_dinode1; /* UFS1 inode block */ struct ufs2_dinode *b_dinode2; /* UFS2 inode block */ } b_un; }; #define IBLK(bp, i) \ ((sblock.fs_magic == FS_UFS1_MAGIC) ? \ (bp)->b_un.b_indir1[i] : (bp)->b_un.b_indir2[i]) #define IBLK_SET(bp, i, val) do { \ if (sblock.fs_magic == FS_UFS1_MAGIC) \ (bp)->b_un.b_indir1[i] = (val); \ else \ (bp)->b_un.b_indir2[i] = (val); \ } while (0) /* * Buffer flags */ #define B_DIRTY 0x00000001 /* Buffer is dirty */ /* * Type of data in buffer */ #define BT_UNKNOWN 0 /* Buffer type is unknown */ #define BT_SUPERBLK 1 /* Buffer holds a superblock */ #define BT_CYLGRP 2 /* Buffer holds a cylinder group map */ #define BT_LEVEL1 3 /* Buffer holds single level indirect */ #define BT_LEVEL2 4 /* Buffer holds double level indirect */ #define BT_LEVEL3 5 /* Buffer holds triple level indirect */ #define BT_EXTATTR 6 /* Buffer holds external attribute data */ #define BT_INODES 7 /* Buffer holds inodes */ #define BT_DIRDATA 8 /* Buffer holds directory data */ #define BT_DATA 9 /* Buffer holds user data */ #define BT_NUMBUFTYPES 10 #define BT_NAMES { \ "unknown", \ "Superblock", \ "Cylinder Group", \ "Single Level Indirect", \ "Double Level Indirect", \ "Triple Level Indirect", \ "External Attribute", \ "Inode Block", \ "Directory Contents", \ "User Data" } extern char *buftype[]; #define BT_BUFTYPE(type) \ type < BT_NUMBUFTYPES ? buftype[type] : buftype[BT_UNKNOWN] extern long readcnt[BT_NUMBUFTYPES]; extern long totalreadcnt[BT_NUMBUFTYPES]; extern struct timespec readtime[BT_NUMBUFTYPES]; extern struct timespec totalreadtime[BT_NUMBUFTYPES]; extern struct timespec startprog; extern struct bufarea *icachebp; /* inode cache buffer */ extern struct bufarea sblk; /* file system superblock */ extern struct bufarea *pdirbp; /* current directory contents */ #define dirty(bp) do { \ if (fswritefd < 0) \ pfatal("SETTING DIRTY FLAG IN READ_ONLY MODE\n"); \ else \ (bp)->b_flags |= B_DIRTY; \ } while (0) #define initbarea(bp, type) do { \ (bp)->b_bno = (ufs2_daddr_t)-4; \ (bp)->b_size = 0; \ (bp)->b_errs = 0; \ (bp)->b_flags = 0; \ (bp)->b_type = type; \ (bp)->b_refcnt = 0; \ (bp)->b_index = 0; \ } while (0) #define sbdirty() dirty(&sblk) #define sblock (*sblk.b_un.b_fs) enum fixstate {DONTKNOW, NOFIX, FIX, IGNORE}; extern ino_t cursnapshot; struct inodesc { enum fixstate id_fix; /* policy on fixing errors */ int (*id_func)(struct inodesc *); /* function to be applied to blocks of inode */ struct bufarea *id_bp; /* ckinode: buffer with indirect pointers */ union dinode *id_dp; /* ckinode: dinode being traversed */ ino_t id_number; /* inode number described */ ino_t id_parent; /* for DATA nodes, their parent */ ufs_lbn_t id_lbn; /* logical block number of current block */ ufs2_daddr_t id_blkno; /* current block number being examined */ int id_level; /* level of indirection of this block */ int id_numfrags; /* number of frags contained in block */ ufs_lbn_t id_lballoc; /* pass1: last LBN that is allocated */ off_t id_filesize; /* for DATA nodes, the size of the directory */ ufs2_daddr_t id_entryno;/* for DATA nodes, current entry number */ int id_loc; /* for DATA nodes, current location in dir */ struct direct *id_dirp; /* for DATA nodes, ptr to current entry */ char *id_name; /* for DATA nodes, name to find or enter */ char id_type; /* type of descriptor, DATA, ADDR, or SNAP */ }; /* file types */ #define DATA 1 /* a directory */ #define SNAP 2 /* a snapshot */ #define ADDR 3 /* anything but a directory or a snapshot */ /* * Linked list of duplicate blocks. * * The list is composed of two parts. The first part of the * list (from duplist through the node pointed to by muldup) * contains a single copy of each duplicate block that has been * found. The second part of the list (from muldup to the end) * contains duplicate blocks that have been found more than once. * To check if a block has been found as a duplicate it is only * necessary to search from duplist through muldup. To find the * total number of times that a block has been found as a duplicate * the entire list must be searched for occurrences of the block * in question. The following diagram shows a sample list where * w (found twice), x (found once), y (found three times), and z * (found once) are duplicate block numbers: * * w -> y -> x -> z -> y -> w -> y * ^ ^ * | | * duplist muldup */ struct dups { struct dups *next; ufs2_daddr_t dup; }; extern struct dups *duplist; /* head of dup list */ extern struct dups *muldup; /* end of unique duplicate dup block numbers */ /* * Inode cache data structures. */ struct inoinfo { SLIST_ENTRY(inoinfo) i_hash; /* hash list */ ino_t i_number; /* inode number of this entry */ ino_t i_parent; /* inode number of parent */ ino_t i_dotdot; /* inode number of `..' */ size_t i_isize; /* size of inode */ u_int i_depth; /* depth of directory from root */ u_int i_flags; /* flags, see below */ u_int i_numblks; /* size of block array in bytes */ ufs2_daddr_t i_blks[1]; /* actually longer */ }; extern SLIST_HEAD(inohash, inoinfo) *inphash; extern struct inoinfo **inpsort; /* * flags for struct inoinfo */ #define INFO_NEW 0x0000001 /* replaced broken directory */ extern long dirhash, inplast; extern unsigned long numdirs, listmax; extern long countdirs; /* number of directories we actually found */ #define MIBSIZE 3 /* size of fsck sysctl MIBs */ extern int adjblkcnt[MIBSIZE]; /* MIB cmd to adjust inode block count */ extern int adjrefcnt[MIBSIZE]; /* MIB cmd to adjust inode reference count */ extern int adjndir[MIBSIZE]; /* MIB cmd to adjust number of directories */ extern int adjnbfree[MIBSIZE]; /* MIB cmd to adjust number of free blocks */ extern int adjnifree[MIBSIZE]; /* MIB cmd to adjust number of free inodes */ extern int adjnffree[MIBSIZE]; /* MIB cmd to adjust number of free frags */ extern int adjnumclusters[MIBSIZE]; /* MIB cmd adjust number of free clusters */ extern int adjdepth[MIBSIZE]; /* MIB cmd to adjust directory depth count */ extern int freefiles[MIBSIZE]; /* MIB cmd to free a set of files */ extern int freedirs[MIBSIZE]; /* MIB cmd to free a set of directories */ extern int freeblks[MIBSIZE]; /* MIB cmd to free a set of data blocks */ extern int setsize[MIBSIZE]; /* MIB cmd to set inode size */ extern struct fsck_cmd cmd; /* sysctl file system update commands */ extern int bkgrdcheck; /* determine if background check is possible */ extern int bkgrdsumadj; /* whether the kernel has the ability to adjust the superblock summary fields */ extern off_t bflag; /* location of alternate super block */ extern int bkgrdflag; /* use a snapshot to run on an active system */ extern char *blockmap; /* ptr to primary blk allocation map */ extern char *cdevname; /* name of device being checked */ extern int cgheader_corrupt; /* one or more CG headers are corrupt */ extern char ckclean; /* only do work if not cleanly unmounted */ extern int ckhashadd; /* check hashes to be added */ extern char *copybuf; /* buffer to copy snapshot blocks */ extern int cvtlevel; /* convert to newer file system format */ extern long dev_bsize; /* computed value of DEV_BSIZE */ extern u_int real_dev_bsize; /* actual disk sector size, not overridden */ extern int debug; /* output debugging info */ extern int Eflag; /* delete empty data blocks */ extern int fsmodified; /* 1 => write done to file system */ extern int fsreadfd; /* file descriptor for reading file system */ extern int fswritefd; /* file descriptor for writing file system */ extern char havesb; /* superblock has been read */ extern int inoopt; /* trim out unused inodes */ extern ino_t lfdir; /* lost & found directory inode number */ extern int lfmode; /* lost & found directory creation mode */ extern const char *lfname; /* lost & found directory name */ extern ufs2_daddr_t maxfsblock; /* number of blocks in the file system */ extern ino_t maxino; /* number of inodes in file system */ extern ufs2_daddr_t n_blks; /* number of blocks in use */ extern ino_t n_files; /* number of files in use */ extern char nflag; /* assume a no response */ extern char preen; /* just fix normal inconsistencies */ extern char rerun; /* rerun fsck. Only used in non-preen mode */ extern char resolved; /* cleared if unresolved changes => not clean */ extern int returntosingle; /* 1 => return to single user mode on exit */ extern long secsize; /* actual disk sector size */ extern char skipclean; /* skip clean file systems if preening */ extern int snapcnt; /* number of active snapshots */ extern struct inode snaplist[FSMAXSNAP + 1]; /* list of active snapshots */ extern int sujrecovery; /* 1 => doing check using the journal */ extern int surrender; /* Give up if reads fail */ extern char usedsoftdep; /* just fix soft dependency inconsistencies */ extern int wantrestart; /* Restart fsck on early termination */ extern char yflag; /* assume a yes response */ extern int zflag; /* zero unused directory space */ extern int Zflag; /* zero empty data blocks */ extern volatile sig_atomic_t got_siginfo; /* received a SIGINFO */ extern volatile sig_atomic_t got_sigalarm; /* received a SIGALRM */ #define clearinode(dp) \ if (sblock.fs_magic == FS_UFS1_MAGIC) { \ (dp)->dp1 = zino.dp1; \ } else { \ (dp)->dp2 = zino.dp2; \ } extern union dinode zino; #define setbmap(blkno) setbit(blockmap, blkno) #define testbmap(blkno) isset(blockmap, blkno) #define clrbmap(blkno) clrbit(blockmap, blkno) #define STOP 0x01 #define SKIP 0x02 #define KEEPON 0x04 #define ALTERED 0x08 #define FOUND 0x10 #define EEXIT 8 /* Standard error exit. */ #define ERERUN 16 /* fsck needs to be re-run. */ #define ERESTART -1 int flushentry(void); /* * Wrapper for malloc() that flushes the cylinder group cache to try * to get space. */ static inline void* Malloc(size_t size) { void *retval; while ((retval = malloc(size)) == NULL) if (flushentry() == 0) break; return (retval); } +/* + * Allocate a block of memory to be used as an I/O buffer. + * Ensure that the buffer is aligned to the I/O subsystem requirements. + */ +static inline void* +Balloc(size_t size) +{ + void *retval; + + while ((retval = aligned_alloc(LIBUFS_BUFALIGN, size)) == NULL) + if (flushentry() == 0) + break; + return (retval); +} /* * Wrapper for calloc() that flushes the cylinder group cache to try * to get space. */ static inline void* Calloc(size_t cnt, size_t size) { void *retval; while ((retval = calloc(cnt, size)) == NULL) if (flushentry() == 0) break; return (retval); } struct fstab; void adjust(struct inodesc *, int lcnt); void alarmhandler(int sig); ufs2_daddr_t allocblk(long cg, long frags, ufs2_daddr_t (*checkblkavail) (ufs2_daddr_t blkno, long frags)); ino_t allocdir(ino_t parent, ino_t request, int mode); ino_t allocino(ino_t request, int type); void binval(struct bufarea *); void blkerror(ino_t ino, const char *type, ufs2_daddr_t blk); char *blockcheck(char *name); int blread(int fd, char *buf, ufs2_daddr_t blk, long size); void bufinit(void); void blwrite(int fd, char *buf, ufs2_daddr_t blk, ssize_t size); void blerase(int fd, ufs2_daddr_t blk, long size); void blzero(int fd, ufs2_daddr_t blk, long size); void brelse(struct bufarea *); struct inoinfo *cacheino(union dinode *dp, ino_t inumber); void catch(int); void catchquit(int); void cgdirty(struct bufarea *); struct bufarea *cglookup(int cg); int changeino(ino_t dir, const char *name, ino_t newnum, int depth); void check_blkcnt(struct inode *ip); int check_cgmagic(int cg, struct bufarea *cgbp); void rebuild_cg(int cg, struct bufarea *cgbp); void check_dirdepth(struct inoinfo *inp); int chkfilesize(mode_t mode, u_int64_t filesize); int chkrange(ufs2_daddr_t blk, int cnt); void ckfini(int markclean); int ckinode(union dinode *dp, struct inodesc *); void clri(struct inodesc *, const char *type, int flag); int clearentry(struct inodesc *); void copyonwrite(struct fs *, struct bufarea *, ufs2_daddr_t (*checkblkavail)(ufs2_daddr_t, long)); void direrror(ino_t ino, const char *errmesg); int dirscan(struct inodesc *); int dofix(struct inodesc *, const char *msg); int eascan(struct inodesc *, struct ufs2_dinode *dp); void fileerror(ino_t cwd, ino_t ino, const char *errmesg); void finalIOstats(void); int findino(struct inodesc *); int findname(struct inodesc *); void flush(int fd, struct bufarea *bp); int freeblock(struct inodesc *); void freedirino(ino_t ino, ino_t parent); void freeino(ino_t ino); void freeinodebuf(void); void fsckinit(void); void fsutilinit(void); int ftypeok(union dinode *dp); void getblk(struct bufarea *bp, ufs2_daddr_t blk, long size); struct bufarea *getdatablk(ufs2_daddr_t blkno, long size, int type); struct inoinfo *getinoinfo(ino_t inumber); union dinode *getnextinode(ino_t inumber, int rebuiltcg); void getpathname(char *namebuf, ino_t curdir, ino_t ino); void ginode(ino_t, struct inode *); void gjournal_check(const char *filesys); void infohandler(int sig); void irelse(struct inode *); ufs2_daddr_t ino_blkatoff(union dinode *, ino_t, ufs_lbn_t, int *, struct bufarea **); void inocleanup(void); void inodirty(struct inode *); struct inostat *inoinfo(ino_t inum); void IOstats(char *what); int linkup(ino_t orphan, ino_t parentdir, char *name); int makeentry(ino_t parent, ino_t ino, const char *name); int openfilesys(char *dev); void panic(const char *fmt, ...) __printflike(1, 2); void pass1(void); void pass1b(void); int pass1check(struct inodesc *); void pass2(void); void pass3(void); void pass4(void); void pass5(void); void pfatal(const char *fmt, ...) __printflike(1, 2); void propagate(void); void prtbuf(struct bufarea *, const char *, ...) __printflike(2, 3); void prtinode(struct inode *); void pwarn(const char *fmt, ...) __printflike(1, 2); int readsb(void); int removecachedino(ino_t); int reply(const char *question); void rwerror(const char *mesg, ufs2_daddr_t blk); void sblock_init(void); void setinodebuf(int, ino_t); int setup(char *dev); int snapblkfree(struct fs *, ufs2_daddr_t, long, ino_t, ufs2_daddr_t (*)(ufs2_daddr_t, long)); void snapremove(ino_t); void snapflush(ufs2_daddr_t (*checkblkavail)(ufs2_daddr_t, long)); ufs2_daddr_t std_checkblkavail(ufs2_daddr_t blkno, long frags); ufs2_daddr_t suj_checkblkavail(ufs2_daddr_t, long); int suj_check(const char *filesys); void update_maps(struct cg *, struct cg*, int); #endif /* !_FSCK_H_ */ diff --git a/sbin/fsck_ffs/fsutil.c b/sbin/fsck_ffs/fsutil.c index 2583e324e94c..05f83789236e 100644 --- a/sbin/fsck_ffs/fsutil.c +++ b/sbin/fsck_ffs/fsutil.c @@ -1,1496 +1,1495 @@ /*- * SPDX-License-Identifier: BSD-3-Clause * * Copyright (c) 1980, 1986, 1993 * The Regents of the University of California. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #if 0 #ifndef lint static const char sccsid[] = "@(#)utilities.c 8.6 (Berkeley) 5/19/95"; #endif /* not lint */ #endif #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include -#include #include "fsck.h" int sujrecovery = 0; static struct bufarea *allocbuf(const char *); static void cg_write(struct bufarea *); static void slowio_start(void); static void slowio_end(void); static void printIOstats(void); static long diskreads, totaldiskreads, totalreads; /* Disk cache statistics */ static struct timespec startpass, finishpass; struct timeval slowio_starttime; int slowio_delay_usec = 10000; /* Initial IO delay for background fsck */ int slowio_pollcnt; static struct bufarea cgblk; /* backup buffer for cylinder group blocks */ static struct bufarea failedbuf; /* returned by failed getdatablk() */ static TAILQ_HEAD(bufqueue, bufarea) bufqueuehd; /* head of buffer cache LRU */ static LIST_HEAD(bufhash, bufarea) bufhashhd[HASHSIZE]; /* buffer hash list */ static struct bufhash freebufs; /* unused buffers */ static int numbufs; /* size of buffer cache */ static int cachelookups; /* number of cache lookups */ static int cachereads; /* number of cache reads */ static int flushtries; /* number of tries to reclaim memory */ char *buftype[BT_NUMBUFTYPES] = BT_NAMES; void fsutilinit(void) { diskreads = totaldiskreads = totalreads = 0; bzero(&startpass, sizeof(struct timespec)); bzero(&finishpass, sizeof(struct timespec)); bzero(&slowio_starttime, sizeof(struct timeval)); slowio_delay_usec = 10000; slowio_pollcnt = 0; flushtries = 0; } int ftypeok(union dinode *dp) { switch (DIP(dp, di_mode) & IFMT) { case IFDIR: case IFREG: case IFBLK: case IFCHR: case IFLNK: case IFSOCK: case IFIFO: return (1); default: if (debug) printf("bad file type 0%o\n", DIP(dp, di_mode)); return (0); } } int reply(const char *question) { int persevere; char c; if (preen) pfatal("INTERNAL ERROR: GOT TO reply()"); persevere = strcmp(question, "CONTINUE") == 0 || strcmp(question, "LOOK FOR ALTERNATE SUPERBLOCKS") == 0; printf("\n"); if (!persevere && (nflag || (fswritefd < 0 && bkgrdflag == 0))) { printf("%s? no\n\n", question); resolved = 0; return (0); } if (yflag || (persevere && nflag)) { printf("%s? yes\n\n", question); return (1); } do { printf("%s? [yn] ", question); (void) fflush(stdout); c = getc(stdin); while (c != '\n' && getc(stdin) != '\n') { if (feof(stdin)) { resolved = 0; return (0); } } } while (c != 'y' && c != 'Y' && c != 'n' && c != 'N'); printf("\n"); if (c == 'y' || c == 'Y') return (1); resolved = 0; return (0); } /* * Look up state information for an inode. */ struct inostat * inoinfo(ino_t inum) { static struct inostat unallocated = { USTATE, 0, 0, 0 }; struct inostatlist *ilp; int iloff; if (inum >= maxino) errx(EEXIT, "inoinfo: inumber %ju out of range", (uintmax_t)inum); ilp = &inostathead[inum / sblock.fs_ipg]; iloff = inum % sblock.fs_ipg; if (iloff >= ilp->il_numalloced) return (&unallocated); return (&ilp->il_stat[iloff]); } /* * Malloc buffers and set up cache. */ void bufinit(void) { int i; initbarea(&failedbuf, BT_UNKNOWN); failedbuf.b_errs = -1; failedbuf.b_un.b_buf = NULL; - if ((cgblk.b_un.b_buf = Malloc((unsigned int)sblock.fs_bsize)) == NULL) + if ((cgblk.b_un.b_buf = Balloc((unsigned int)sblock.fs_bsize)) == NULL) errx(EEXIT, "Initial malloc(%d) failed", sblock.fs_bsize); initbarea(&cgblk, BT_CYLGRP); numbufs = cachelookups = cachereads = 0; TAILQ_INIT(&bufqueuehd); LIST_INIT(&freebufs); for (i = 0; i < HASHSIZE; i++) LIST_INIT(&bufhashhd[i]); for (i = 0; i < BT_NUMBUFTYPES; i++) { readtime[i].tv_sec = totalreadtime[i].tv_sec = 0; readtime[i].tv_nsec = totalreadtime[i].tv_nsec = 0; readcnt[i] = totalreadcnt[i] = 0; } } static struct bufarea * allocbuf(const char *failreason) { struct bufarea *bp; char *bufp; bp = (struct bufarea *)Malloc(sizeof(struct bufarea)); - bufp = Malloc((unsigned int)sblock.fs_bsize); + bufp = Balloc((unsigned int)sblock.fs_bsize); if (bp == NULL || bufp == NULL) { errx(EEXIT, "%s", failreason); /* NOTREACHED */ } numbufs++; bp->b_un.b_buf = bufp; TAILQ_INSERT_HEAD(&bufqueuehd, bp, b_list); initbarea(bp, BT_UNKNOWN); return (bp); } /* * Manage cylinder group buffers. * * Use getblk() here rather than cgget() because the cylinder group * may be corrupted but we want it anyway so we can fix it. */ static struct bufarea *cgbufs; /* header for cylinder group cache */ static int flushtries; /* number of tries to reclaim memory */ struct bufarea * cglookup(int cg) { struct bufarea *cgbp; struct cg *cgp; if ((unsigned) cg >= sblock.fs_ncg) errx(EEXIT, "cglookup: out of range cylinder group %d", cg); if (cgbufs == NULL) { - cgbufs = calloc(sblock.fs_ncg, sizeof(struct bufarea)); + cgbufs = Calloc(sblock.fs_ncg, sizeof(struct bufarea)); if (cgbufs == NULL) errx(EEXIT, "Cannot allocate cylinder group buffers"); } cgbp = &cgbufs[cg]; if (cgbp->b_un.b_cg != NULL) return (cgbp); cgp = NULL; if (flushtries == 0) - cgp = Malloc((unsigned int)sblock.fs_cgsize); + cgp = Balloc((unsigned int)sblock.fs_cgsize); if (cgp == NULL) { if (sujrecovery) errx(EEXIT,"Ran out of memory during journal recovery"); flush(fswritefd, &cgblk); getblk(&cgblk, cgtod(&sblock, cg), sblock.fs_cgsize); return (&cgblk); } cgbp->b_un.b_cg = cgp; initbarea(cgbp, BT_CYLGRP); getblk(cgbp, cgtod(&sblock, cg), sblock.fs_cgsize); return (cgbp); } /* * Mark a cylinder group buffer as dirty. * Update its check-hash if they are enabled. */ void cgdirty(struct bufarea *cgbp) { struct cg *cg; cg = cgbp->b_un.b_cg; if ((sblock.fs_metackhash & CK_CYLGRP) != 0) { cg->cg_ckhash = 0; cg->cg_ckhash = calculate_crc32c(~0L, (void *)cg, sblock.fs_cgsize); } dirty(cgbp); } /* * Attempt to flush a cylinder group cache entry. * Return whether the flush was successful. */ int flushentry(void) { struct bufarea *cgbp; if (sujrecovery || flushtries == sblock.fs_ncg || cgbufs == NULL) return (0); cgbp = &cgbufs[flushtries++]; if (cgbp->b_un.b_cg == NULL) return (0); flush(fswritefd, cgbp); free(cgbp->b_un.b_buf); cgbp->b_un.b_buf = NULL; return (1); } /* * Manage a cache of filesystem disk blocks. */ struct bufarea * getdatablk(ufs2_daddr_t blkno, long size, int type) { struct bufarea *bp; struct bufhash *bhdp; cachelookups++; /* * If out of range, return empty buffer with b_err == -1 * * Skip check for inodes because chkrange() considers * metadata areas invalid to write data. */ if (type != BT_INODES && chkrange(blkno, size / sblock.fs_fsize)) { failedbuf.b_refcnt++; return (&failedbuf); } bhdp = &bufhashhd[HASH(blkno)]; LIST_FOREACH(bp, bhdp, b_hash) if (bp->b_bno == fsbtodb(&sblock, blkno)) { if (debug && bp->b_size != size) { prtbuf(bp, "getdatablk: size mismatch"); pfatal("getdatablk: b_size %d != size %ld\n", bp->b_size, size); } TAILQ_REMOVE(&bufqueuehd, bp, b_list); goto foundit; } /* * Move long-term busy buffer back to the front of the LRU so we * do not endless inspect them for recycling. */ bp = TAILQ_LAST(&bufqueuehd, bufqueue); if (bp != NULL && bp->b_refcnt != 0) { TAILQ_REMOVE(&bufqueuehd, bp, b_list); TAILQ_INSERT_HEAD(&bufqueuehd, bp, b_list); } /* * Allocate up to the minimum number of buffers before * considering recycling any of them. */ if (size > sblock.fs_bsize) errx(EEXIT, "Excessive buffer size %ld > %d\n", size, sblock.fs_bsize); if ((bp = LIST_FIRST(&freebufs)) != NULL) { LIST_REMOVE(bp, b_hash); } else if (numbufs < MINBUFS) { bp = allocbuf("cannot create minimal buffer pool"); } else if (sujrecovery) { /* * SUJ recovery does not want anything written until it * has successfully completed (so it can fail back to * full fsck). Thus, we can only recycle clean buffers. */ TAILQ_FOREACH_REVERSE(bp, &bufqueuehd, bufqueue, b_list) if ((bp->b_flags & B_DIRTY) == 0 && bp->b_refcnt == 0) break; if (bp == NULL) bp = allocbuf("Ran out of memory during " "journal recovery"); else LIST_REMOVE(bp, b_hash); } else { /* * Recycle oldest non-busy buffer. */ TAILQ_FOREACH_REVERSE(bp, &bufqueuehd, bufqueue, b_list) if (bp->b_refcnt == 0) break; if (bp == NULL) bp = allocbuf("Ran out of memory for buffers"); else LIST_REMOVE(bp, b_hash); } TAILQ_REMOVE(&bufqueuehd, bp, b_list); flush(fswritefd, bp); bp->b_type = type; LIST_INSERT_HEAD(bhdp, bp, b_hash); getblk(bp, blkno, size); cachereads++; /* fall through */ foundit: TAILQ_INSERT_HEAD(&bufqueuehd, bp, b_list); if (debug && bp->b_type != type) { printf("getdatablk: buffer type changed to %s", BT_BUFTYPE(type)); prtbuf(bp, ""); } if (bp->b_errs == 0) bp->b_refcnt++; return (bp); } void getblk(struct bufarea *bp, ufs2_daddr_t blk, long size) { ufs2_daddr_t dblk; struct timespec start, finish; dblk = fsbtodb(&sblock, blk); if (bp->b_bno == dblk) { totalreads++; } else { if (debug) { readcnt[bp->b_type]++; clock_gettime(CLOCK_REALTIME_PRECISE, &start); } bp->b_errs = blread(fsreadfd, bp->b_un.b_buf, dblk, size); if (debug) { clock_gettime(CLOCK_REALTIME_PRECISE, &finish); timespecsub(&finish, &start, &finish); timespecadd(&readtime[bp->b_type], &finish, &readtime[bp->b_type]); } bp->b_bno = dblk; bp->b_size = size; } } void brelse(struct bufarea *bp) { if (bp->b_refcnt <= 0) prtbuf(bp, "brelse: buffer with negative reference count"); bp->b_refcnt--; } void binval(struct bufarea *bp) { bp->b_flags &= ~B_DIRTY; LIST_REMOVE(bp, b_hash); LIST_INSERT_HEAD(&freebufs, bp, b_hash); } void flush(int fd, struct bufarea *bp) { struct inode ip; if ((bp->b_flags & B_DIRTY) == 0) return; bp->b_flags &= ~B_DIRTY; if (fswritefd < 0) { pfatal("WRITING IN READ_ONLY MODE.\n"); return; } if (bp->b_errs != 0) pfatal("WRITING %sZERO'ED BLOCK %lld TO DISK\n", (bp->b_errs == bp->b_size / dev_bsize) ? "" : "PARTIALLY ", (long long)bp->b_bno); bp->b_errs = 0; /* * Write using the appropriate function. */ switch (bp->b_type) { case BT_SUPERBLK: if (bp != &sblk) pfatal("BUFFER %p DOES NOT MATCH SBLK %p\n", bp, &sblk); /* * Superblocks are always pre-copied so we do not need * to check them for copy-on-write. */ if (sbput(fd, bp->b_un.b_fs, 0) == 0) fsmodified = 1; break; case BT_CYLGRP: /* * Cylinder groups are always pre-copied so we do not * need to check them for copy-on-write. */ if (sujrecovery) cg_write(bp); if (cgput(fswritefd, &sblock, bp->b_un.b_cg) == 0) fsmodified = 1; break; case BT_INODES: if (debug && sblock.fs_magic == FS_UFS2_MAGIC) { struct ufs2_dinode *dp = bp->b_un.b_dinode2; int i; for (i = 0; i < bp->b_size; dp++, i += sizeof(*dp)) { if (ffs_verify_dinode_ckhash(&sblock, dp) == 0) continue; pwarn("flush: INODE CHECK-HASH FAILED"); ip.i_bp = bp; ip.i_dp = (union dinode *)dp; ip.i_number = bp->b_index + (i / sizeof(*dp)); prtinode(&ip); if (preen || reply("FIX") != 0) { if (preen) printf(" (FIXED)\n"); ffs_update_dinode_ckhash(&sblock, dp); inodirty(&ip); } } } /* FALLTHROUGH */ default: copyonwrite(&sblock, bp, std_checkblkavail); blwrite(fd, bp->b_un.b_buf, bp->b_bno, bp->b_size); break; } } /* * If there are any snapshots, ensure that all the blocks that they * care about have been copied, then release the snapshot inodes. * These operations need to be done before we rebuild the cylinder * groups so that any block allocations are properly recorded. * Since all the cylinder group maps have already been copied in * the snapshots, no further snapshot copies will need to be done. */ void snapflush(ufs2_daddr_t (*checkblkavail)(ufs2_daddr_t, long)) { struct bufarea *bp; int cnt; if (snapcnt > 0) { if (debug) printf("Check for snapshot copies\n"); TAILQ_FOREACH_REVERSE(bp, &bufqueuehd, bufqueue, b_list) if ((bp->b_flags & B_DIRTY) != 0) copyonwrite(&sblock, bp, checkblkavail); for (cnt = 0; cnt < snapcnt; cnt++) irelse(&snaplist[cnt]); snapcnt = 0; } } /* * Journaled soft updates does not maintain cylinder group summary * information during cleanup, so this routine recalculates the summary * information and updates the superblock summary in preparation for * writing out the cylinder group. */ static void cg_write(struct bufarea *bp) { ufs1_daddr_t fragno, cgbno, maxbno; u_int8_t *blksfree; struct csum *csp; struct cg *cgp; int blk; int i; /* * Fix the frag and cluster summary. */ cgp = bp->b_un.b_cg; cgp->cg_cs.cs_nbfree = 0; cgp->cg_cs.cs_nffree = 0; bzero(&cgp->cg_frsum, sizeof(cgp->cg_frsum)); maxbno = fragstoblks(&sblock, sblock.fs_fpg); if (sblock.fs_contigsumsize > 0) { for (i = 1; i <= sblock.fs_contigsumsize; i++) cg_clustersum(cgp)[i] = 0; bzero(cg_clustersfree(cgp), howmany(maxbno, CHAR_BIT)); } blksfree = cg_blksfree(cgp); for (cgbno = 0; cgbno < maxbno; cgbno++) { if (ffs_isfreeblock(&sblock, blksfree, cgbno)) continue; if (ffs_isblock(&sblock, blksfree, cgbno)) { ffs_clusteracct(&sblock, cgp, cgbno, 1); cgp->cg_cs.cs_nbfree++; continue; } fragno = blkstofrags(&sblock, cgbno); blk = blkmap(&sblock, blksfree, fragno); ffs_fragacct(&sblock, blk, cgp->cg_frsum, 1); for (i = 0; i < sblock.fs_frag; i++) if (isset(blksfree, fragno + i)) cgp->cg_cs.cs_nffree++; } /* * Update the superblock cg summary from our now correct values * before writing the block. */ csp = &sblock.fs_cs(&sblock, cgp->cg_cgx); sblock.fs_cstotal.cs_ndir += cgp->cg_cs.cs_ndir - csp->cs_ndir; sblock.fs_cstotal.cs_nbfree += cgp->cg_cs.cs_nbfree - csp->cs_nbfree; sblock.fs_cstotal.cs_nifree += cgp->cg_cs.cs_nifree - csp->cs_nifree; sblock.fs_cstotal.cs_nffree += cgp->cg_cs.cs_nffree - csp->cs_nffree; sblock.fs_cs(&sblock, cgp->cg_cgx) = cgp->cg_cs; } void rwerror(const char *mesg, ufs2_daddr_t blk) { if (bkgrdcheck) exit(EEXIT); if (preen == 0) printf("\n"); pfatal("CANNOT %s: %ld", mesg, (long)blk); if (reply("CONTINUE") == 0) exit(EEXIT); } void ckfini(int markclean) { struct bufarea *bp, *nbp; int ofsmodified, cnt, cg; if (bkgrdflag) { if ((!(sblock.fs_flags & FS_UNCLEAN)) != markclean) { cmd.value = FS_UNCLEAN; cmd.size = markclean ? -1 : 1; if (sysctlbyname("vfs.ffs.setflags", 0, 0, &cmd, sizeof cmd) == -1) pwarn("CANNOT SET FILE SYSTEM DIRTY FLAG\n"); if (!preen) { printf("\n***** FILE SYSTEM MARKED %s *****\n", markclean ? "CLEAN" : "DIRTY"); if (!markclean) rerun = 1; } } else if (!preen && !markclean) { printf("\n***** FILE SYSTEM STILL DIRTY *****\n"); rerun = 1; } bkgrdflag = 0; } if (debug && cachelookups > 0) printf("cache with %d buffers missed %d of %d (%d%%)\n", numbufs, cachereads, cachelookups, (int)(cachereads * 100 / cachelookups)); if (fswritefd < 0) { (void)close(fsreadfd); return; } /* * To remain idempotent with partial truncations the buffers * must be flushed in this order: * 1) cylinder groups (bitmaps) * 2) indirect, directory, external attribute, and data blocks * 3) inode blocks * 4) superblock * This ordering preserves access to the modified pointers * until they are freed. */ /* Step 1: cylinder groups */ if (debug) printf("Flush Cylinder groups\n"); if (cgbufs != NULL) { for (cnt = 0; cnt < sblock.fs_ncg; cnt++) { if (cgbufs[cnt].b_un.b_cg == NULL) continue; flush(fswritefd, &cgbufs[cnt]); free(cgbufs[cnt].b_un.b_cg); } free(cgbufs); cgbufs = NULL; } flush(fswritefd, &cgblk); free(cgblk.b_un.b_buf); cgblk.b_un.b_buf = NULL; cnt = 0; /* Step 2: indirect, directory, external attribute, and data blocks */ if (debug) printf("Flush indirect, directory, external attribute, " "and data blocks\n"); if (pdirbp != NULL) { brelse(pdirbp); pdirbp = NULL; } TAILQ_FOREACH_REVERSE_SAFE(bp, &bufqueuehd, bufqueue, b_list, nbp) { switch (bp->b_type) { /* These should not be in the buffer cache list */ case BT_UNKNOWN: case BT_SUPERBLK: case BT_CYLGRP: default: prtbuf(bp,"ckfini: improper buffer type on cache list"); continue; /* These are the ones to flush in this step */ case BT_LEVEL1: case BT_LEVEL2: case BT_LEVEL3: case BT_EXTATTR: case BT_DIRDATA: case BT_DATA: break; /* These are the ones to flush in the next step */ case BT_INODES: continue; } if (debug && bp->b_refcnt != 0) prtbuf(bp, "ckfini: clearing in-use buffer"); TAILQ_REMOVE(&bufqueuehd, bp, b_list); LIST_REMOVE(bp, b_hash); cnt++; flush(fswritefd, bp); free(bp->b_un.b_buf); free((char *)bp); } /* Step 3: inode blocks */ if (debug) printf("Flush inode blocks\n"); if (icachebp != NULL) { brelse(icachebp); icachebp = NULL; } TAILQ_FOREACH_REVERSE_SAFE(bp, &bufqueuehd, bufqueue, b_list, nbp) { if (debug && bp->b_refcnt != 0) prtbuf(bp, "ckfini: clearing in-use buffer"); TAILQ_REMOVE(&bufqueuehd, bp, b_list); LIST_REMOVE(bp, b_hash); cnt++; flush(fswritefd, bp); free(bp->b_un.b_buf); free((char *)bp); } if (numbufs != cnt) errx(EEXIT, "panic: lost %d buffers", numbufs - cnt); /* Step 4: superblock */ if (debug) printf("Flush the superblock\n"); flush(fswritefd, &sblk); if (havesb && cursnapshot == 0 && sblk.b_bno != sblock.fs_sblockloc / dev_bsize) { if (preen || reply("UPDATE STANDARD SUPERBLOCK")) { /* Change write destination to standard superblock */ sblock.fs_sblockactualloc = sblock.fs_sblockloc; sblk.b_bno = sblock.fs_sblockloc / dev_bsize; sbdirty(); flush(fswritefd, &sblk); } else { markclean = 0; } } if (cursnapshot == 0 && sblock.fs_clean != markclean) { if ((sblock.fs_clean = markclean) != 0) { sblock.fs_flags &= ~(FS_UNCLEAN | FS_NEEDSFSCK); sblock.fs_pendingblocks = 0; sblock.fs_pendinginodes = 0; } sbdirty(); ofsmodified = fsmodified; flush(fswritefd, &sblk); fsmodified = ofsmodified; if (!preen) { printf("\n***** FILE SYSTEM MARKED %s *****\n", markclean ? "CLEAN" : "DIRTY"); if (!markclean) rerun = 1; } } else if (!preen) { if (markclean) { printf("\n***** FILE SYSTEM IS CLEAN *****\n"); } else { printf("\n***** FILE SYSTEM STILL DIRTY *****\n"); rerun = 1; } } /* * Free allocated tracking structures. */ if (blockmap != NULL) free(blockmap); blockmap = NULL; if (inostathead != NULL) { for (cg = 0; cg < sblock.fs_ncg; cg++) if (inostathead[cg].il_stat != NULL) free((char *)inostathead[cg].il_stat); free(inostathead); } inostathead = NULL; inocleanup(); finalIOstats(); (void)close(fsreadfd); (void)close(fswritefd); } /* * Print out I/O statistics. */ void IOstats(char *what) { int i; if (debug == 0) return; if (diskreads == 0) { printf("%s: no I/O\n\n", what); return; } if (startpass.tv_sec == 0) startpass = startprog; printf("%s: I/O statistics\n", what); printIOstats(); totaldiskreads += diskreads; diskreads = 0; for (i = 0; i < BT_NUMBUFTYPES; i++) { timespecadd(&totalreadtime[i], &readtime[i], &totalreadtime[i]); totalreadcnt[i] += readcnt[i]; readtime[i].tv_sec = readtime[i].tv_nsec = 0; readcnt[i] = 0; } clock_gettime(CLOCK_REALTIME_PRECISE, &startpass); } void finalIOstats(void) { int i; if (debug == 0) return; printf("Final I/O statistics\n"); totaldiskreads += diskreads; diskreads = totaldiskreads; startpass = startprog; for (i = 0; i < BT_NUMBUFTYPES; i++) { timespecadd(&totalreadtime[i], &readtime[i], &totalreadtime[i]); totalreadcnt[i] += readcnt[i]; readtime[i] = totalreadtime[i]; readcnt[i] = totalreadcnt[i]; } printIOstats(); } static void printIOstats(void) { long long msec, totalmsec; int i; clock_gettime(CLOCK_REALTIME_PRECISE, &finishpass); timespecsub(&finishpass, &startpass, &finishpass); printf("Running time: %jd.%03ld sec\n", (intmax_t)finishpass.tv_sec, finishpass.tv_nsec / 1000000); printf("buffer reads by type:\n"); for (totalmsec = 0, i = 0; i < BT_NUMBUFTYPES; i++) totalmsec += readtime[i].tv_sec * 1000 + readtime[i].tv_nsec / 1000000; if (totalmsec == 0) totalmsec = 1; for (i = 0; i < BT_NUMBUFTYPES; i++) { if (readcnt[i] == 0) continue; msec = readtime[i].tv_sec * 1000 + readtime[i].tv_nsec / 1000000; printf("%21s:%8ld %2ld.%ld%% %4jd.%03ld sec %2lld.%lld%%\n", buftype[i], readcnt[i], readcnt[i] * 100 / diskreads, (readcnt[i] * 1000 / diskreads) % 10, (intmax_t)readtime[i].tv_sec, readtime[i].tv_nsec / 1000000, msec * 100 / totalmsec, (msec * 1000 / totalmsec) % 10); } printf("\n"); } int blread(int fd, char *buf, ufs2_daddr_t blk, long size) { char *cp; int i, errs; off_t offset; offset = blk; offset *= dev_bsize; if (bkgrdflag) slowio_start(); totalreads++; diskreads++; if (pread(fd, buf, (int)size, offset) == size) { if (bkgrdflag) slowio_end(); return (0); } /* * This is handled specially here instead of in rwerror because * rwerror is used for all sorts of errors, not just true read/write * errors. It should be refactored and fixed. */ if (surrender) { pfatal("CANNOT READ_BLK: %ld", (long)blk); errx(EEXIT, "ABORTING DUE TO READ ERRORS"); } else rwerror("READ BLK", blk); errs = 0; memset(buf, 0, (size_t)size); printf("THE FOLLOWING DISK SECTORS COULD NOT BE READ:"); for (cp = buf, i = 0; i < size; i += secsize, cp += secsize) { if (pread(fd, cp, (int)secsize, offset + i) != secsize) { if (secsize != dev_bsize && dev_bsize != 1) printf(" %jd (%jd),", (intmax_t)(blk * dev_bsize + i) / secsize, (intmax_t)blk + i / dev_bsize); else printf(" %jd,", (intmax_t)blk + i / dev_bsize); errs++; } } printf("\n"); if (errs) resolved = 0; return (errs); } void blwrite(int fd, char *buf, ufs2_daddr_t blk, ssize_t size) { int i; char *cp; off_t offset; if (fd < 0) return; offset = blk; offset *= dev_bsize; if (pwrite(fd, buf, size, offset) == size) { fsmodified = 1; return; } resolved = 0; rwerror("WRITE BLK", blk); printf("THE FOLLOWING SECTORS COULD NOT BE WRITTEN:"); for (cp = buf, i = 0; i < size; i += dev_bsize, cp += dev_bsize) if (pwrite(fd, cp, dev_bsize, offset + i) != dev_bsize) printf(" %jd,", (intmax_t)blk + i / dev_bsize); printf("\n"); return; } void blerase(int fd, ufs2_daddr_t blk, long size) { off_t ioarg[2]; if (fd < 0) return; ioarg[0] = blk * dev_bsize; ioarg[1] = size; ioctl(fd, DIOCGDELETE, ioarg); /* we don't really care if we succeed or not */ return; } /* * Fill a contiguous region with all-zeroes. Note ZEROBUFSIZE is by * definition a multiple of dev_bsize. */ void blzero(int fd, ufs2_daddr_t blk, long size) { static char *zero; off_t offset, len; if (fd < 0) return; if (zero == NULL) { - zero = calloc(ZEROBUFSIZE, 1); + zero = Balloc(ZEROBUFSIZE); if (zero == NULL) errx(EEXIT, "cannot allocate buffer pool"); } offset = blk * dev_bsize; if (lseek(fd, offset, 0) < 0) rwerror("SEEK BLK", blk); while (size > 0) { len = MIN(ZEROBUFSIZE, size); if (write(fd, zero, len) != len) rwerror("WRITE BLK", blk); blk += len / dev_bsize; size -= len; } } /* * Verify cylinder group's magic number and other parameters. If the * test fails, offer an option to rebuild the whole cylinder group. * * Return 1 if the cylinder group is good or return 0 if it is bad. */ #undef CHK #define CHK(lhs, op, rhs, fmt) \ if (lhs op rhs) { \ pwarn("UFS%d cylinder group %d failed: " \ "%s (" #fmt ") %s %s (" #fmt ")\n", \ sblock.fs_magic == FS_UFS1_MAGIC ? 1 : 2, cg, \ #lhs, (intmax_t)lhs, #op, #rhs, (intmax_t)rhs); \ error = 1; \ } int check_cgmagic(int cg, struct bufarea *cgbp) { struct cg *cgp = cgbp->b_un.b_cg; uint32_t cghash, calchash; static int prevfailcg = -1; long start; int error; /* * Extended cylinder group checks. */ calchash = cgp->cg_ckhash; if ((sblock.fs_metackhash & CK_CYLGRP) != 0 && (ckhashadd & CK_CYLGRP) == 0) { cghash = cgp->cg_ckhash; cgp->cg_ckhash = 0; calchash = calculate_crc32c(~0L, (void *)cgp, sblock.fs_cgsize); cgp->cg_ckhash = cghash; } error = 0; CHK(cgp->cg_ckhash, !=, calchash, "%jd"); CHK(cg_chkmagic(cgp), ==, 0, "%jd"); CHK(cgp->cg_cgx, !=, cg, "%jd"); CHK(cgp->cg_ndblk, >, sblock.fs_fpg, "%jd"); if (sblock.fs_magic == FS_UFS1_MAGIC) { CHK(cgp->cg_old_niblk, !=, sblock.fs_ipg, "%jd"); CHK(cgp->cg_old_ncyl, >, sblock.fs_old_cpg, "%jd"); } else if (sblock.fs_magic == FS_UFS2_MAGIC) { CHK(cgp->cg_niblk, !=, sblock.fs_ipg, "%jd"); CHK(cgp->cg_initediblk, >, sblock.fs_ipg, "%jd"); } if (cgbase(&sblock, cg) + sblock.fs_fpg < sblock.fs_size) { CHK(cgp->cg_ndblk, !=, sblock.fs_fpg, "%jd"); } else { CHK(cgp->cg_ndblk, !=, sblock.fs_size - cgbase(&sblock, cg), "%jd"); } start = sizeof(*cgp); if (sblock.fs_magic == FS_UFS2_MAGIC) { CHK(cgp->cg_iusedoff, !=, start, "%jd"); } else if (sblock.fs_magic == FS_UFS1_MAGIC) { CHK(cgp->cg_niblk, !=, 0, "%jd"); CHK(cgp->cg_initediblk, !=, 0, "%jd"); CHK(cgp->cg_old_ncyl, !=, sblock.fs_old_cpg, "%jd"); CHK(cgp->cg_old_niblk, !=, sblock.fs_ipg, "%jd"); CHK(cgp->cg_old_btotoff, !=, start, "%jd"); CHK(cgp->cg_old_boff, !=, cgp->cg_old_btotoff + sblock.fs_old_cpg * sizeof(int32_t), "%jd"); CHK(cgp->cg_iusedoff, !=, cgp->cg_old_boff + sblock.fs_old_cpg * sizeof(u_int16_t), "%jd"); } CHK(cgp->cg_freeoff, !=, cgp->cg_iusedoff + howmany(sblock.fs_ipg, CHAR_BIT), "%jd"); if (sblock.fs_contigsumsize == 0) { CHK(cgp->cg_nextfreeoff, !=, cgp->cg_freeoff + howmany(sblock.fs_fpg, CHAR_BIT), "%jd"); } else { CHK(cgp->cg_nclusterblks, !=, cgp->cg_ndblk / sblock.fs_frag, "%jd"); CHK(cgp->cg_clustersumoff, !=, roundup(cgp->cg_freeoff + howmany(sblock.fs_fpg, CHAR_BIT), sizeof(u_int32_t)) - sizeof(u_int32_t), "%jd"); CHK(cgp->cg_clusteroff, !=, cgp->cg_clustersumoff + (sblock.fs_contigsumsize + 1) * sizeof(u_int32_t), "%jd"); CHK(cgp->cg_nextfreeoff, !=, cgp->cg_clusteroff + howmany(fragstoblks(&sblock, sblock.fs_fpg), CHAR_BIT), "%jd"); } if (error == 0) return (1); if (prevfailcg == cg) return (0); prevfailcg = cg; pfatal("CYLINDER GROUP %d: INTEGRITY CHECK FAILED", cg); printf("\n"); return (0); } void rebuild_cg(int cg, struct bufarea *cgbp) { struct cg *cgp = cgbp->b_un.b_cg; long start; /* * Zero out the cylinder group and then initialize critical fields. * Bit maps and summaries will be recalculated by later passes. */ memset(cgp, 0, (size_t)sblock.fs_cgsize); cgp->cg_magic = CG_MAGIC; cgp->cg_cgx = cg; cgp->cg_niblk = sblock.fs_ipg; cgp->cg_initediblk = MIN(sblock.fs_ipg, 2 * INOPB(&sblock)); if (cgbase(&sblock, cg) + sblock.fs_fpg < sblock.fs_size) cgp->cg_ndblk = sblock.fs_fpg; else cgp->cg_ndblk = sblock.fs_size - cgbase(&sblock, cg); start = sizeof(*cgp); if (sblock.fs_magic == FS_UFS2_MAGIC) { cgp->cg_iusedoff = start; } else if (sblock.fs_magic == FS_UFS1_MAGIC) { cgp->cg_niblk = 0; cgp->cg_initediblk = 0; cgp->cg_old_ncyl = sblock.fs_old_cpg; cgp->cg_old_niblk = sblock.fs_ipg; cgp->cg_old_btotoff = start; cgp->cg_old_boff = cgp->cg_old_btotoff + sblock.fs_old_cpg * sizeof(int32_t); cgp->cg_iusedoff = cgp->cg_old_boff + sblock.fs_old_cpg * sizeof(u_int16_t); } cgp->cg_freeoff = cgp->cg_iusedoff + howmany(sblock.fs_ipg, CHAR_BIT); cgp->cg_nextfreeoff = cgp->cg_freeoff + howmany(sblock.fs_fpg,CHAR_BIT); if (sblock.fs_contigsumsize > 0) { cgp->cg_nclusterblks = cgp->cg_ndblk / sblock.fs_frag; cgp->cg_clustersumoff = roundup(cgp->cg_nextfreeoff, sizeof(u_int32_t)); cgp->cg_clustersumoff -= sizeof(u_int32_t); cgp->cg_clusteroff = cgp->cg_clustersumoff + (sblock.fs_contigsumsize + 1) * sizeof(u_int32_t); cgp->cg_nextfreeoff = cgp->cg_clusteroff + howmany(fragstoblks(&sblock, sblock.fs_fpg), CHAR_BIT); } cgp->cg_ckhash = calculate_crc32c(~0L, (void *)cgp, sblock.fs_cgsize); cgdirty(cgbp); } /* * allocate a data block with the specified number of fragments */ ufs2_daddr_t allocblk(long startcg, long frags, ufs2_daddr_t (*checkblkavail)(ufs2_daddr_t blkno, long frags)) { ufs2_daddr_t blkno, newblk; if (sujrecovery && checkblkavail == std_checkblkavail) { pfatal("allocblk: std_checkblkavail used for SUJ recovery\n"); return (0); } if (frags <= 0 || frags > sblock.fs_frag) return (0); for (blkno = MAX(cgdata(&sblock, startcg), 0); blkno < maxfsblock - sblock.fs_frag; blkno += sblock.fs_frag) { if ((newblk = (*checkblkavail)(blkno, frags)) == 0) continue; if (newblk > 0) return (newblk); if (newblk < 0) blkno = -newblk; } for (blkno = MAX(cgdata(&sblock, 0), 0); blkno < cgbase(&sblock, startcg) - sblock.fs_frag; blkno += sblock.fs_frag) { if ((newblk = (*checkblkavail)(blkno, frags)) == 0) continue; if (newblk > 0) return (newblk); if (newblk < 0) blkno = -newblk; } return (0); } ufs2_daddr_t std_checkblkavail(ufs2_daddr_t blkno, long frags) { struct bufarea *cgbp; struct cg *cgp; ufs2_daddr_t j, k, baseblk; long cg; if ((u_int64_t)blkno > sblock.fs_size) return (0); for (j = 0; j <= sblock.fs_frag - frags; j++) { if (testbmap(blkno + j)) continue; for (k = 1; k < frags; k++) if (testbmap(blkno + j + k)) break; if (k < frags) { j += k; continue; } cg = dtog(&sblock, blkno + j); cgbp = cglookup(cg); cgp = cgbp->b_un.b_cg; if (!check_cgmagic(cg, cgbp)) return (-((cg + 1) * sblock.fs_fpg - sblock.fs_frag)); baseblk = dtogd(&sblock, blkno + j); for (k = 0; k < frags; k++) { setbmap(blkno + j + k); clrbit(cg_blksfree(cgp), baseblk + k); } n_blks += frags; if (frags == sblock.fs_frag) cgp->cg_cs.cs_nbfree--; else cgp->cg_cs.cs_nffree -= frags; cgdirty(cgbp); return (blkno + j); } return (0); } /* * Check whether a file size is within the limits for the filesystem. * Return 1 when valid and 0 when too big. * * This should match the file size limit in ffs_mountfs(). */ int chkfilesize(mode_t mode, u_int64_t filesize) { u_int64_t kernmaxfilesize; if (sblock.fs_magic == FS_UFS1_MAGIC) kernmaxfilesize = (off_t)0x40000000 * sblock.fs_bsize - 1; else kernmaxfilesize = sblock.fs_maxfilesize; if (filesize > kernmaxfilesize || filesize > sblock.fs_maxfilesize || (mode == IFDIR && filesize > MAXDIRSIZE)) { if (debug) printf("bad file size %ju:", (uintmax_t)filesize); return (0); } return (1); } /* * Slow down IO so as to leave some disk bandwidth for other processes */ void slowio_start() { /* Delay one in every 8 operations */ slowio_pollcnt = (slowio_pollcnt + 1) & 7; if (slowio_pollcnt == 0) { gettimeofday(&slowio_starttime, NULL); } } void slowio_end() { struct timeval tv; int delay_usec; if (slowio_pollcnt != 0) return; /* Update the slowdown interval. */ gettimeofday(&tv, NULL); delay_usec = (tv.tv_sec - slowio_starttime.tv_sec) * 1000000 + (tv.tv_usec - slowio_starttime.tv_usec); if (delay_usec < 64) delay_usec = 64; if (delay_usec > 2500000) delay_usec = 2500000; slowio_delay_usec = (slowio_delay_usec * 63 + delay_usec) >> 6; /* delay by 8 times the average IO delay */ if (slowio_delay_usec > 64) usleep(slowio_delay_usec * 8); } /* * Find a pathname */ void getpathname(char *namebuf, ino_t curdir, ino_t ino) { int len; char *cp; struct inode ip; struct inodesc idesc; static int busy = 0; if (curdir == ino && ino == UFS_ROOTINO) { (void)strcpy(namebuf, "/"); return; } if (busy || !INO_IS_DVALID(curdir)) { (void)strcpy(namebuf, "?"); return; } busy = 1; memset(&idesc, 0, sizeof(struct inodesc)); idesc.id_type = DATA; idesc.id_fix = IGNORE; cp = &namebuf[MAXPATHLEN - 1]; *cp = '\0'; if (curdir != ino) { idesc.id_parent = curdir; goto namelookup; } while (ino != UFS_ROOTINO) { idesc.id_number = ino; idesc.id_func = findino; idesc.id_name = strdup(".."); ginode(ino, &ip); if ((ckinode(ip.i_dp, &idesc) & FOUND) == 0) { irelse(&ip); free(idesc.id_name); break; } irelse(&ip); free(idesc.id_name); namelookup: idesc.id_number = idesc.id_parent; idesc.id_parent = ino; idesc.id_func = findname; idesc.id_name = namebuf; ginode(idesc.id_number, &ip); if ((ckinode(ip.i_dp, &idesc) & FOUND) == 0) { irelse(&ip); break; } irelse(&ip); len = strlen(namebuf); cp -= len; memmove(cp, namebuf, (size_t)len); *--cp = '/'; if (cp < &namebuf[UFS_MAXNAMLEN]) break; ino = idesc.id_number; } busy = 0; if (ino != UFS_ROOTINO) *--cp = '?'; memmove(namebuf, cp, (size_t)(&namebuf[MAXPATHLEN] - cp)); } void catch(int sig __unused) { ckfini(0); exit(12); } /* * When preening, allow a single quit to signal * a special exit after file system checks complete * so that reboot sequence may be interrupted. */ void catchquit(int sig __unused) { printf("returning to single-user after file system check\n"); returntosingle = 1; (void)signal(SIGQUIT, SIG_DFL); } /* * determine whether an inode should be fixed. */ int dofix(struct inodesc *idesc, const char *msg) { switch (idesc->id_fix) { case DONTKNOW: if (idesc->id_type == DATA) direrror(idesc->id_number, msg); else pwarn("%s", msg); if (preen) { printf(" (SALVAGED)\n"); idesc->id_fix = FIX; return (ALTERED); } if (reply("SALVAGE") == 0) { idesc->id_fix = NOFIX; return (0); } idesc->id_fix = FIX; return (ALTERED); case FIX: return (ALTERED); case NOFIX: case IGNORE: return (0); default: errx(EEXIT, "UNKNOWN INODESC FIX MODE %d", idesc->id_fix); } /* NOTREACHED */ return (0); } #include /* * Print details about a buffer. */ void prtbuf(struct bufarea *bp, const char *fmt, ...) { va_list ap; va_start(ap, fmt); if (preen) (void)fprintf(stdout, "%s: ", cdevname); (void)vfprintf(stdout, fmt, ap); va_end(ap); printf(": bp %p, type %s, bno %jd, size %d, refcnt %d, flags %s, " "index %jd\n", bp, BT_BUFTYPE(bp->b_type), (intmax_t) bp->b_bno, bp->b_size, bp->b_refcnt, bp->b_flags & B_DIRTY ? "dirty" : "clean", (intmax_t) bp->b_index); } /* * An unexpected inconsistency occurred. * Die if preening or file system is running with soft dependency protocol, * otherwise just print message and continue. */ void pfatal(const char *fmt, ...) { va_list ap; va_start(ap, fmt); if (!preen) { (void)vfprintf(stdout, fmt, ap); va_end(ap); if (usedsoftdep) (void)fprintf(stdout, "\nUNEXPECTED SOFT UPDATE INCONSISTENCY\n"); /* * Force foreground fsck to clean up inconsistency. */ if (bkgrdflag) { cmd.value = FS_NEEDSFSCK; cmd.size = 1; if (sysctlbyname("vfs.ffs.setflags", 0, 0, &cmd, sizeof cmd) == -1) pwarn("CANNOT SET FS_NEEDSFSCK FLAG\n"); fprintf(stdout, "CANNOT RUN IN BACKGROUND\n"); ckfini(0); exit(EEXIT); } return; } if (cdevname == NULL) cdevname = strdup("fsck"); (void)fprintf(stdout, "%s: ", cdevname); (void)vfprintf(stdout, fmt, ap); (void)fprintf(stdout, "\n%s: UNEXPECTED%sINCONSISTENCY; RUN fsck MANUALLY.\n", cdevname, usedsoftdep ? " SOFT UPDATE " : " "); /* * Force foreground fsck to clean up inconsistency. */ if (bkgrdflag) { cmd.value = FS_NEEDSFSCK; cmd.size = 1; if (sysctlbyname("vfs.ffs.setflags", 0, 0, &cmd, sizeof cmd) == -1) pwarn("CANNOT SET FS_NEEDSFSCK FLAG\n"); } ckfini(0); exit(EEXIT); } /* * Pwarn just prints a message when not preening or running soft dependency * protocol, or a warning (preceded by filename) when preening. */ void pwarn(const char *fmt, ...) { va_list ap; va_start(ap, fmt); if (preen) (void)fprintf(stdout, "%s: ", cdevname); (void)vfprintf(stdout, fmt, ap); va_end(ap); } /* * Stub for routines from kernel. */ void panic(const char *fmt, ...) { va_list ap; va_start(ap, fmt); pfatal("INTERNAL INCONSISTENCY:"); (void)vfprintf(stdout, fmt, ap); va_end(ap); exit(EEXIT); } diff --git a/sbin/fsck_ffs/inode.c b/sbin/fsck_ffs/inode.c index 3db8a5e5c23d..e4349ff97088 100644 --- a/sbin/fsck_ffs/inode.c +++ b/sbin/fsck_ffs/inode.c @@ -1,1475 +1,1474 @@ /*- * SPDX-License-Identifier: BSD-3-Clause * * Copyright (c) 1980, 1986, 1993 * The Regents of the University of California. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #if 0 #ifndef lint static const char sccsid[] = "@(#)inode.c 8.8 (Berkeley) 4/28/95"; #endif /* not lint */ #endif #include #include #include #include #include #include #include #include #include #include #include #include -#include #include "fsck.h" struct bufarea *icachebp; /* inode cache buffer */ static int iblock(struct inodesc *, off_t isize, int type); static ufs2_daddr_t indir_blkatoff(ufs2_daddr_t, ino_t, ufs_lbn_t, ufs_lbn_t, struct bufarea **); static int snapclean(struct inodesc *idesc); static void chkcopyonwrite(struct fs *, ufs2_daddr_t, ufs2_daddr_t (*checkblkavail)(ufs2_daddr_t, long)); int ckinode(union dinode *dp, struct inodesc *idesc) { off_t remsize, sizepb; int i, offset, ret; struct inode ip; union dinode dino; ufs2_daddr_t ndb; mode_t mode; char pathbuf[MAXPATHLEN + 1]; if (idesc->id_fix != IGNORE) idesc->id_fix = DONTKNOW; idesc->id_dp = dp; idesc->id_lbn = -1; idesc->id_lballoc = -1; idesc->id_level = 0; idesc->id_entryno = 0; idesc->id_filesize = DIP(dp, di_size); mode = DIP(dp, di_mode) & IFMT; if (mode == IFBLK || mode == IFCHR || (mode == IFLNK && DIP(dp, di_size) < (unsigned)sblock.fs_maxsymlinklen)) return (KEEPON); if (sblock.fs_magic == FS_UFS1_MAGIC) dino.dp1 = dp->dp1; else dino.dp2 = dp->dp2; if (DIP(&dino, di_size) < 0) { pfatal("NEGATIVE INODE SIZE %jd\n", DIP(&dino, di_size)); return (STOP); } ndb = howmany(DIP(&dino, di_size), sblock.fs_bsize); for (i = 0; i < UFS_NDADDR; i++) { idesc->id_lbn++; if (--ndb == 0 && (offset = blkoff(&sblock, DIP(&dino, di_size))) != 0) idesc->id_numfrags = numfrags(&sblock, fragroundup(&sblock, offset)); else idesc->id_numfrags = sblock.fs_frag; if (DIP(&dino, di_db[i]) == 0) { if (idesc->id_type == DATA && ndb >= 0) { /* An empty block in a directory XXX */ getpathname(pathbuf, idesc->id_number, idesc->id_number); pfatal("DIRECTORY %s: CONTAINS EMPTY BLOCKS", pathbuf); if (reply("ADJUST LENGTH") == 1) { ginode(idesc->id_number, &ip); DIP_SET(ip.i_dp, di_size, i * sblock.fs_bsize); printf( "YOU MUST RERUN FSCK AFTERWARDS\n"); rerun = 1; inodirty(&ip); irelse(&ip); } return (STOP); } continue; } idesc->id_blkno = DIP(&dino, di_db[i]); if (idesc->id_type != DATA) ret = (*idesc->id_func)(idesc); else ret = dirscan(idesc); if (ret & STOP) return (ret); } idesc->id_numfrags = sblock.fs_frag; remsize = DIP(&dino, di_size) - sblock.fs_bsize * UFS_NDADDR; sizepb = sblock.fs_bsize; for (i = 0; i < UFS_NIADDR; i++) { sizepb *= NINDIR(&sblock); idesc->id_level = i + 1; if (DIP(&dino, di_ib[i])) { idesc->id_blkno = DIP(&dino, di_ib[i]); ret = iblock(idesc, remsize, BT_LEVEL1 + i); if (ret & STOP) return (ret); } else if (remsize > 0) { idesc->id_lbn += sizepb / sblock.fs_bsize; if (idesc->id_type == DATA) { /* An empty block in a directory XXX */ getpathname(pathbuf, idesc->id_number, idesc->id_number); pfatal("DIRECTORY %s: CONTAINS EMPTY BLOCKS", pathbuf); if (reply("ADJUST LENGTH") == 1) { ginode(idesc->id_number, &ip); DIP_SET(ip.i_dp, di_size, DIP(ip.i_dp, di_size) - remsize); remsize = 0; printf( "YOU MUST RERUN FSCK AFTERWARDS\n"); rerun = 1; inodirty(&ip); irelse(&ip); break; } } } remsize -= sizepb; } return (KEEPON); } static int iblock(struct inodesc *idesc, off_t isize, int type) { struct inode ip; struct bufarea *bp; int i, n, (*func)(struct inodesc *), nif; off_t sizepb; char buf[BUFSIZ]; char pathbuf[MAXPATHLEN + 1]; if (idesc->id_type != DATA) { func = idesc->id_func; if (((n = (*func)(idesc)) & KEEPON) == 0) return (n); } else func = dirscan; bp = getdatablk(idesc->id_blkno, sblock.fs_bsize, type); if (bp->b_errs != 0) { brelse(bp); return (SKIP); } idesc->id_bp = bp; idesc->id_level--; for (sizepb = sblock.fs_bsize, i = 0; i < idesc->id_level; i++) sizepb *= NINDIR(&sblock); if (howmany(isize, sizepb) > NINDIR(&sblock)) nif = NINDIR(&sblock); else nif = howmany(isize, sizepb); if (idesc->id_func == pass1check && nif < NINDIR(&sblock)) { for (i = nif; i < NINDIR(&sblock); i++) { if (IBLK(bp, i) == 0) continue; (void)sprintf(buf, "PARTIALLY TRUNCATED INODE I=%lu", (u_long)idesc->id_number); if (preen) { pfatal("%s", buf); } else if (dofix(idesc, buf)) { IBLK_SET(bp, i, 0); dirty(bp); } } flush(fswritefd, bp); } for (i = 0; i < nif; i++) { if (IBLK(bp, i)) { idesc->id_blkno = IBLK(bp, i); bp->b_index = i; if (idesc->id_level == 0) { idesc->id_lbn++; n = (*func)(idesc); } else { n = iblock(idesc, isize, type - 1); idesc->id_level++; } if (n & STOP) { brelse(bp); return (n); } } else { idesc->id_lbn += sizepb / sblock.fs_bsize; if (idesc->id_type == DATA && isize > 0) { /* An empty block in a directory XXX */ getpathname(pathbuf, idesc->id_number, idesc->id_number); pfatal("DIRECTORY %s: CONTAINS EMPTY BLOCKS", pathbuf); if (reply("ADJUST LENGTH") == 1) { ginode(idesc->id_number, &ip); DIP_SET(ip.i_dp, di_size, DIP(ip.i_dp, di_size) - isize); isize = 0; printf( "YOU MUST RERUN FSCK AFTERWARDS\n"); rerun = 1; inodirty(&ip); brelse(bp); return(STOP); } } } isize -= sizepb; } brelse(bp); return (KEEPON); } /* * Finds the disk block address at the specified lbn within the inode * specified by dp. This follows the whole tree and honors di_size and * di_extsize so it is a true test of reachability. The lbn may be * negative if an extattr or indirect block is requested. */ ufs2_daddr_t ino_blkatoff(union dinode *dp, ino_t ino, ufs_lbn_t lbn, int *frags, struct bufarea **bpp) { ufs_lbn_t tmpval; ufs_lbn_t cur; ufs_lbn_t next; int i; *frags = 0; if (bpp != NULL) *bpp = NULL; /* * Handle extattr blocks first. */ if (lbn < 0 && lbn >= -UFS_NXADDR) { lbn = -1 - lbn; if (lbn > lblkno(&sblock, dp->dp2.di_extsize - 1)) return (0); *frags = numfrags(&sblock, sblksize(&sblock, dp->dp2.di_extsize, lbn)); return (dp->dp2.di_extb[lbn]); } /* * Now direct and indirect. */ if (DIP(dp, di_mode) == IFLNK && DIP(dp, di_size) < sblock.fs_maxsymlinklen) return (0); if (lbn >= 0 && lbn < UFS_NDADDR) { *frags = numfrags(&sblock, sblksize(&sblock, DIP(dp, di_size), lbn)); return (DIP(dp, di_db[lbn])); } *frags = sblock.fs_frag; for (i = 0, tmpval = NINDIR(&sblock), cur = UFS_NDADDR; i < UFS_NIADDR; i++, tmpval *= NINDIR(&sblock), cur = next) { next = cur + tmpval; if (lbn == -cur - i) return (DIP(dp, di_ib[i])); /* * Determine whether the lbn in question is within this tree. */ if (lbn < 0 && -lbn >= next) continue; if (lbn > 0 && lbn >= next) continue; if (DIP(dp, di_ib[i]) == 0) return (0); return (indir_blkatoff(DIP(dp, di_ib[i]), ino, -cur - i, lbn, bpp)); } pfatal("lbn %jd not in ino %ju\n", lbn, (uintmax_t)ino); return (0); } /* * Fetch an indirect block to find the block at a given lbn. The lbn * may be negative to fetch a specific indirect block pointer or positive * to fetch a specific block. */ static ufs2_daddr_t indir_blkatoff(ufs2_daddr_t blk, ino_t ino, ufs_lbn_t cur, ufs_lbn_t lbn, struct bufarea **bpp) { struct bufarea *bp; ufs_lbn_t lbnadd; ufs_lbn_t base; int i, level; level = lbn_level(cur); if (level == -1) pfatal("Invalid indir lbn %jd in ino %ju\n", lbn, (uintmax_t)ino); if (level == 0 && lbn < 0) pfatal("Invalid lbn %jd in ino %ju\n", lbn, (uintmax_t)ino); lbnadd = 1; base = -(cur + level); for (i = level; i > 0; i--) lbnadd *= NINDIR(&sblock); if (lbn > 0) i = (lbn - base) / lbnadd; else i = (-lbn - base) / lbnadd; if (i < 0 || i >= NINDIR(&sblock)) { pfatal("Invalid indirect index %d produced by lbn %jd " "in ino %ju\n", i, lbn, (uintmax_t)ino); return (0); } if (level == 0) cur = base + (i * lbnadd); else cur = -(base + (i * lbnadd)) - (level - 1); bp = getdatablk(blk, sblock.fs_bsize, BT_LEVEL1 + level); if (bp->b_errs != 0) return (0); blk = IBLK(bp, i); bp->b_index = i; if (cur == lbn || blk == 0) { if (bpp != NULL) *bpp = bp; else brelse(bp); return (blk); } brelse(bp); if (level == 0) pfatal("Invalid lbn %jd at level 0 for ino %ju\n", lbn, (uintmax_t)ino); return (indir_blkatoff(blk, ino, cur, lbn, bpp)); } /* * Check that a block in a legal block number. * Return 0 if in range, 1 if out of range. */ int chkrange(ufs2_daddr_t blk, int cnt) { int c; if (cnt <= 0 || blk <= 0 || blk >= maxfsblock || cnt > maxfsblock - blk) { if (debug) printf("out of range: blk %ld, offset %i, size %d\n", (long)blk, (int)fragnum(&sblock, blk), cnt); return (1); } if (cnt > sblock.fs_frag || fragnum(&sblock, blk) + cnt > sblock.fs_frag) { if (debug) printf("bad size: blk %ld, offset %i, size %d\n", (long)blk, (int)fragnum(&sblock, blk), cnt); return (1); } c = dtog(&sblock, blk); if (blk < cgdmin(&sblock, c)) { if ((blk + cnt) > cgsblock(&sblock, c)) { if (debug) { printf("blk %ld < cgdmin %ld;", (long)blk, (long)cgdmin(&sblock, c)); printf(" blk + cnt %ld > cgsbase %ld\n", (long)(blk + cnt), (long)cgsblock(&sblock, c)); } return (1); } } else { if ((blk + cnt) > cgbase(&sblock, c+1)) { if (debug) { printf("blk %ld >= cgdmin %ld;", (long)blk, (long)cgdmin(&sblock, c)); printf(" blk + cnt %ld > sblock.fs_fpg %ld\n", (long)(blk + cnt), (long)sblock.fs_fpg); } return (1); } } return (0); } /* * General purpose interface for reading inodes. * * firstinum and lastinum track contents of getnextino() cache (below). */ static ino_t firstinum, lastinum; static struct bufarea inobuf; void ginode(ino_t inumber, struct inode *ip) { ufs2_daddr_t iblk; struct ufs2_dinode *dp; if (inumber < UFS_ROOTINO || inumber >= maxino) errx(EEXIT, "bad inode number %ju to ginode", (uintmax_t)inumber); ip->i_number = inumber; if (inumber >= firstinum && inumber < lastinum) { /* contents in getnextino() cache */ ip->i_bp = &inobuf; inobuf.b_refcnt++; inobuf.b_index = firstinum; } else if (icachebp != NULL && inumber >= icachebp->b_index && inumber < icachebp->b_index + INOPB(&sblock)) { /* take an additional reference for the returned inode */ icachebp->b_refcnt++; ip->i_bp = icachebp; } else { iblk = ino_to_fsba(&sblock, inumber); /* release our cache-hold reference on old icachebp */ if (icachebp != NULL) brelse(icachebp); icachebp = getdatablk(iblk, sblock.fs_bsize, BT_INODES); if (icachebp->b_errs != 0) { icachebp = NULL; ip->i_bp = NULL; ip->i_dp = &zino; return; } /* take a cache-hold reference on new icachebp */ icachebp->b_refcnt++; icachebp->b_index = rounddown(inumber, INOPB(&sblock)); ip->i_bp = icachebp; } if (sblock.fs_magic == FS_UFS1_MAGIC) { ip->i_dp = (union dinode *) &ip->i_bp->b_un.b_dinode1[inumber - ip->i_bp->b_index]; return; } ip->i_dp = (union dinode *) &ip->i_bp->b_un.b_dinode2[inumber - ip->i_bp->b_index]; dp = (struct ufs2_dinode *)ip->i_dp; /* Do not check hash of inodes being created */ if (dp->di_mode != 0 && ffs_verify_dinode_ckhash(&sblock, dp)) { pwarn("INODE CHECK-HASH FAILED"); prtinode(ip); if (preen || reply("FIX") != 0) { if (preen) printf(" (FIXED)\n"); ffs_update_dinode_ckhash(&sblock, dp); inodirty(ip); } } } /* * Release a held inode. */ void irelse(struct inode *ip) { /* Check for failed inode read */ if (ip->i_bp == NULL) return; if (debug && sblock.fs_magic == FS_UFS2_MAGIC && ffs_verify_dinode_ckhash(&sblock, (struct ufs2_dinode *)ip->i_dp)) { pwarn("irelse: releasing inode with bad check-hash"); prtinode(ip); } if (ip->i_bp->b_refcnt <= 0) pfatal("irelse: releasing unreferenced ino %ju\n", (uintmax_t) ip->i_number); brelse(ip->i_bp); } /* * Special purpose version of ginode used to optimize first pass * over all the inodes in numerical order. */ static ino_t nextinum, lastvalidinum; static long readcount, readpercg, fullcnt, inobufsize, partialcnt, partialsize; union dinode * getnextinode(ino_t inumber, int rebuiltcg) { int j; long size; mode_t mode; ufs2_daddr_t ndb, blk; union dinode *dp; struct inode ip; static caddr_t nextinop; if (inumber != nextinum++ || inumber > lastvalidinum) errx(EEXIT, "bad inode number %ju to nextinode", (uintmax_t)inumber); if (inumber >= lastinum) { readcount++; firstinum = lastinum; blk = ino_to_fsba(&sblock, lastinum); if (readcount % readpercg == 0) { size = partialsize; lastinum += partialcnt; } else { size = inobufsize; lastinum += fullcnt; } /* * Flush old contents in case they have been updated. * If getblk encounters an error, it will already have zeroed * out the buffer, so we do not need to do so here. */ if (inobuf.b_refcnt != 0) pfatal("Non-zero getnextinode() ref count %d\n", inobuf.b_refcnt); flush(fswritefd, &inobuf); getblk(&inobuf, blk, size); nextinop = inobuf.b_un.b_buf; } dp = (union dinode *)nextinop; if (sblock.fs_magic == FS_UFS1_MAGIC) nextinop += sizeof(struct ufs1_dinode); else nextinop += sizeof(struct ufs2_dinode); if ((ckhashadd & CK_INODE) != 0) { ffs_update_dinode_ckhash(&sblock, (struct ufs2_dinode *)dp); dirty(&inobuf); } if (ffs_verify_dinode_ckhash(&sblock, (struct ufs2_dinode *)dp) != 0) { pwarn("INODE CHECK-HASH FAILED"); ip.i_bp = NULL; ip.i_dp = dp; ip.i_number = inumber; prtinode(&ip); if (preen || reply("FIX") != 0) { if (preen) printf(" (FIXED)\n"); ffs_update_dinode_ckhash(&sblock, (struct ufs2_dinode *)dp); dirty(&inobuf); } } if (rebuiltcg && (char *)dp == inobuf.b_un.b_buf) { /* * Try to determine if we have reached the end of the * allocated inodes. */ mode = DIP(dp, di_mode) & IFMT; if (mode == 0) { if (memcmp(dp->dp2.di_db, zino.dp2.di_db, UFS_NDADDR * sizeof(ufs2_daddr_t)) || memcmp(dp->dp2.di_ib, zino.dp2.di_ib, UFS_NIADDR * sizeof(ufs2_daddr_t)) || dp->dp2.di_mode || dp->dp2.di_size) return (NULL); return (dp); } if (!ftypeok(dp)) return (NULL); ndb = howmany(DIP(dp, di_size), sblock.fs_bsize); if (ndb < 0) return (NULL); if (mode == IFBLK || mode == IFCHR) ndb++; if (mode == IFLNK) { /* * Fake ndb value so direct/indirect block checks below * will detect any garbage after symlink string. */ if (DIP(dp, di_size) < (off_t)sblock.fs_maxsymlinklen) { ndb = howmany(DIP(dp, di_size), sizeof(ufs2_daddr_t)); if (ndb > UFS_NDADDR) { j = ndb - UFS_NDADDR; for (ndb = 1; j > 1; j--) ndb *= NINDIR(&sblock); ndb += UFS_NDADDR; } } } for (j = ndb; ndb < UFS_NDADDR && j < UFS_NDADDR; j++) if (DIP(dp, di_db[j]) != 0) return (NULL); for (j = 0, ndb -= UFS_NDADDR; ndb > 0; j++) ndb /= NINDIR(&sblock); for (; j < UFS_NIADDR; j++) if (DIP(dp, di_ib[j]) != 0) return (NULL); } return (dp); } void setinodebuf(int cg, ino_t inosused) { ino_t inum; inum = cg * sblock.fs_ipg; lastvalidinum = inum + inosused - 1; nextinum = inum; lastinum = inum; readcount = 0; /* Flush old contents in case they have been updated */ flush(fswritefd, &inobuf); inobuf.b_bno = 0; if (inobuf.b_un.b_buf == NULL) { inobufsize = blkroundup(&sblock, MAX(INOBUFSIZE, sblock.fs_bsize)); initbarea(&inobuf, BT_INODES); - if ((inobuf.b_un.b_buf = Malloc((unsigned)inobufsize)) == NULL) + if ((inobuf.b_un.b_buf = Balloc((unsigned)inobufsize)) == NULL) errx(EEXIT, "cannot allocate space for inode buffer"); } fullcnt = inobufsize / ((sblock.fs_magic == FS_UFS1_MAGIC) ? sizeof(struct ufs1_dinode) : sizeof(struct ufs2_dinode)); readpercg = inosused / fullcnt; partialcnt = inosused % fullcnt; partialsize = fragroundup(&sblock, partialcnt * ((sblock.fs_magic == FS_UFS1_MAGIC) ? sizeof(struct ufs1_dinode) : sizeof(struct ufs2_dinode))); if (partialcnt != 0) { readpercg++; } else { partialcnt = fullcnt; partialsize = inobufsize; } } int freeblock(struct inodesc *idesc) { struct dups *dlp; struct bufarea *cgbp; struct cg *cgp; ufs2_daddr_t blkno; long size, nfrags; blkno = idesc->id_blkno; if (idesc->id_type == SNAP) { pfatal("clearing a snapshot dinode\n"); return (STOP); } size = lfragtosize(&sblock, idesc->id_numfrags); if (snapblkfree(&sblock, blkno, size, idesc->id_number, std_checkblkavail)) return (KEEPON); for (nfrags = idesc->id_numfrags; nfrags > 0; blkno++, nfrags--) { if (chkrange(blkno, 1)) { return (SKIP); } else if (testbmap(blkno)) { for (dlp = duplist; dlp; dlp = dlp->next) { if (dlp->dup != blkno) continue; dlp->dup = duplist->dup; dlp = duplist; duplist = duplist->next; free((char *)dlp); break; } if (dlp == NULL) { clrbmap(blkno); n_blks--; } } } /* * If all successfully returned, account for them. */ if (nfrags == 0) { cgbp = cglookup(dtog(&sblock, idesc->id_blkno)); cgp = cgbp->b_un.b_cg; if (idesc->id_numfrags == sblock.fs_frag) cgp->cg_cs.cs_nbfree++; else cgp->cg_cs.cs_nffree += idesc->id_numfrags; cgdirty(cgbp); } return (KEEPON); } /* * Prepare a snapshot file for being removed. */ void snapremove(ino_t inum) { struct inodesc idesc; struct inode ip; int i; for (i = 0; i < snapcnt; i++) if (snaplist[i].i_number == inum) break; if (i == snapcnt) ginode(inum, &ip); else ip = snaplist[i]; if ((DIP(ip.i_dp, di_flags) & SF_SNAPSHOT) == 0) { printf("snapremove: inode %jd is not a snapshot\n", (intmax_t)inum); if (i == snapcnt) irelse(&ip); return; } if (debug) printf("snapremove: remove %sactive snapshot %jd\n", i == snapcnt ? "in" : "", (intmax_t)inum); /* * If on active snapshot list, remove it. */ if (i < snapcnt) { for (i++; i < FSMAXSNAP; i++) { if (sblock.fs_snapinum[i] == 0) break; snaplist[i - 1] = snaplist[i]; sblock.fs_snapinum[i - 1] = sblock.fs_snapinum[i]; } sblock.fs_snapinum[i - 1] = 0; bzero(&snaplist[i - 1], sizeof(struct inode)); snapcnt--; } memset(&idesc, 0, sizeof(struct inodesc)); idesc.id_type = SNAP; idesc.id_func = snapclean; idesc.id_number = inum; (void)ckinode(ip.i_dp, &idesc); DIP_SET(ip.i_dp, di_flags, DIP(ip.i_dp, di_flags) & ~SF_SNAPSHOT); inodirty(&ip); irelse(&ip); } static int snapclean(struct inodesc *idesc) { ufs2_daddr_t blkno; struct bufarea *bp; union dinode *dp; blkno = idesc->id_blkno; if (blkno == 0) return (KEEPON); dp = idesc->id_dp; if (blkno == BLK_NOCOPY || blkno == BLK_SNAP) { if (idesc->id_lbn < UFS_NDADDR) { DIP_SET(dp, di_db[idesc->id_lbn], 0); } else { bp = idesc->id_bp; IBLK_SET(bp, bp->b_index, 0); dirty(bp); } } return (KEEPON); } /* * Notification that a block is being freed. Return zero if the free * should be allowed to proceed. Return non-zero if the snapshot file * wants to claim the block. The block will be claimed if it is an * uncopied part of one of the snapshots. It will be freed if it is * either a BLK_NOCOPY or has already been copied in all of the snapshots. * If a fragment is being freed, then all snapshots that care about * it must make a copy since a snapshot file can only claim full sized * blocks. Note that if more than one snapshot file maps the block, * we can pick one at random to claim it. Since none of the snapshots * can change, we are assurred that they will all see the same unmodified * image. When deleting a snapshot file (see ino_trunc above), we * must push any of these claimed blocks to one of the other snapshots * that maps it. These claimed blocks are easily identified as they will * have a block number equal to their logical block number within the * snapshot. A copied block can never have this property because they * must always have been allocated from a BLK_NOCOPY location. */ int snapblkfree(struct fs *fs, ufs2_daddr_t bno, long size, ino_t inum, ufs2_daddr_t (*checkblkavail)(ufs2_daddr_t blkno, long frags)) { union dinode *dp; struct inode ip; struct bufarea *snapbp; ufs_lbn_t lbn; ufs2_daddr_t blkno, relblkno; int i, frags, claimedblk, copydone; /* If no snapshots, nothing to do */ if (snapcnt == 0) return (0); if (debug) printf("snapblkfree: in ino %jd free blkno %jd, size %jd\n", (intmax_t)inum, (intmax_t)bno, (intmax_t)size); relblkno = blknum(fs, bno); lbn = fragstoblks(fs, relblkno); /* Direct blocks are always pre-copied */ if (lbn < UFS_NDADDR) return (0); copydone = 0; claimedblk = 0; for (i = 0; i < snapcnt; i++) { /* * Lookup block being freed. */ ip = snaplist[i]; dp = ip.i_dp; blkno = ino_blkatoff(dp, inum != 0 ? inum : ip.i_number, lbn, &frags, &snapbp); /* * Check to see if block needs to be copied. */ if (blkno == 0) { /* * A block that we map is being freed. If it has not * been claimed yet, we will claim or copy it (below). */ claimedblk = 1; } else if (blkno == BLK_SNAP) { /* * No previous snapshot claimed the block, * so it will be freed and become a BLK_NOCOPY * (don't care) for us. */ if (claimedblk) pfatal("snapblkfree: inconsistent block type"); IBLK_SET(snapbp, snapbp->b_index, BLK_NOCOPY); dirty(snapbp); brelse(snapbp); continue; } else /* BLK_NOCOPY or default */ { /* * If the snapshot has already copied the block * (default), or does not care about the block, * it is not needed. */ brelse(snapbp); continue; } /* * If this is a full size block, we will just grab it * and assign it to the snapshot inode. Otherwise we * will proceed to copy it. See explanation for this * routine as to why only a single snapshot needs to * claim this block. */ if (size == fs->fs_bsize) { if (debug) printf("Grabonremove snapshot %ju lbn %jd " "from inum %ju\n", (intmax_t)ip.i_number, (intmax_t)lbn, (uintmax_t)inum); IBLK_SET(snapbp, snapbp->b_index, relblkno); dirty(snapbp); brelse(snapbp); DIP_SET(dp, di_blocks, DIP(dp, di_blocks) + btodb(size)); inodirty(&ip); return (1); } /* First time through, read the contents of the old block. */ if (copydone == 0) { copydone = 1; if (blread(fsreadfd, copybuf, fsbtodb(fs, relblkno), fs->fs_bsize) != 0) { pfatal("Could not read snapshot %ju block " "%jd\n", (intmax_t)ip.i_number, (intmax_t)relblkno); continue; } } /* * This allocation will never require any additional * allocations for the snapshot inode. */ blkno = allocblk(dtog(fs, relblkno), fs->fs_frag, checkblkavail); if (blkno == 0) { pfatal("Could not allocate block for snapshot %ju\n", (intmax_t)ip.i_number); continue; } if (debug) printf("Copyonremove: snapino %jd lbn %jd for inum %ju " "size %ld new blkno %jd\n", (intmax_t)ip.i_number, (intmax_t)lbn, (uintmax_t)inum, size, (intmax_t)blkno); blwrite(fswritefd, copybuf, fsbtodb(fs, blkno), fs->fs_bsize); IBLK_SET(snapbp, snapbp->b_index, blkno); dirty(snapbp); brelse(snapbp); DIP_SET(dp, di_blocks, DIP(dp, di_blocks) + btodb(fs->fs_bsize)); inodirty(&ip); } return (0); } /* * Notification that a block is being written. Return if the block * is part of a snapshot as snapshots never track other snapshots. * The block will be copied in all of the snapshots that are tracking * it and have not yet copied it. Some buffers may hold more than one * block. Here we need to check each block in the buffer. */ void copyonwrite(struct fs *fs, struct bufarea *bp, ufs2_daddr_t (*checkblkavail)(ufs2_daddr_t blkno, long frags)) { ufs2_daddr_t copyblkno; long i, numblks; /* If no snapshots, nothing to do. */ if (snapcnt == 0) return; numblks = blkroundup(fs, bp->b_size) / fs->fs_bsize; if (debug) prtbuf(bp, "copyonwrite: checking %jd block%s in buffer", (intmax_t)numblks, numblks > 1 ? "s" : ""); copyblkno = blknum(fs, dbtofsb(fs, bp->b_bno)); for (i = 0; i < numblks; i++) { chkcopyonwrite(fs, copyblkno, checkblkavail); copyblkno += fs->fs_frag; } } static void chkcopyonwrite(struct fs *fs, ufs2_daddr_t copyblkno, ufs2_daddr_t (*checkblkavail)(ufs2_daddr_t blkno, long frags)) { struct inode ip; union dinode *dp; struct bufarea *snapbp; ufs2_daddr_t blkno; int i, frags, copydone; ufs_lbn_t lbn; lbn = fragstoblks(fs, copyblkno); /* Direct blocks are always pre-copied */ if (lbn < UFS_NDADDR) return; copydone = 0; for (i = 0; i < snapcnt; i++) { /* * Lookup block being freed. */ ip = snaplist[i]; dp = ip.i_dp; blkno = ino_blkatoff(dp, ip.i_number, lbn, &frags, &snapbp); /* * Check to see if block needs to be copied. */ if (blkno != 0) { /* * A block that we have already copied or don't track. */ brelse(snapbp); continue; } /* First time through, read the contents of the old block. */ if (copydone == 0) { copydone = 1; if (blread(fsreadfd, copybuf, fsbtodb(fs, copyblkno), fs->fs_bsize) != 0) { pfatal("Could not read snapshot %ju block " "%jd\n", (intmax_t)ip.i_number, (intmax_t)copyblkno); continue; } } /* * This allocation will never require any additional * allocations for the snapshot inode. */ if ((blkno = allocblk(dtog(fs, copyblkno), fs->fs_frag, checkblkavail)) == 0) { pfatal("Could not allocate block for snapshot %ju\n", (intmax_t)ip.i_number); continue; } if (debug) prtbuf(snapbp, "Copyonwrite: snapino %jd lbn %jd using " "blkno %ju setting in buffer", (intmax_t)ip.i_number, (intmax_t)lbn, (intmax_t)blkno); blwrite(fswritefd, copybuf, fsbtodb(fs, blkno), fs->fs_bsize); IBLK_SET(snapbp, snapbp->b_index, blkno); dirty(snapbp); brelse(snapbp); DIP_SET(dp, di_blocks, DIP(dp, di_blocks) + btodb(fs->fs_bsize)); inodirty(&ip); } return; } /* * Traverse an inode and check that its block count is correct * fixing it if necessary. */ void check_blkcnt(struct inode *ip) { struct inodesc idesc; union dinode *dp; ufs2_daddr_t ndb; int j, ret, offset; dp = ip->i_dp; memset(&idesc, 0, sizeof(struct inodesc)); idesc.id_func = pass1check; idesc.id_number = ip->i_number; idesc.id_type = (DIP(dp, di_flags) & SF_SNAPSHOT) == 0 ? ADDR : SNAP; (void)ckinode(dp, &idesc); if (sblock.fs_magic == FS_UFS2_MAGIC && dp->dp2.di_extsize > 0) { ndb = howmany(dp->dp2.di_extsize, sblock.fs_bsize); for (j = 0; j < UFS_NXADDR; j++) { if (--ndb == 0 && (offset = blkoff(&sblock, dp->dp2.di_extsize)) != 0) idesc.id_numfrags = numfrags(&sblock, fragroundup(&sblock, offset)); else idesc.id_numfrags = sblock.fs_frag; if (dp->dp2.di_extb[j] == 0) continue; idesc.id_blkno = dp->dp2.di_extb[j]; ret = (*idesc.id_func)(&idesc); if (ret & STOP) break; } } idesc.id_entryno *= btodb(sblock.fs_fsize); if (DIP(dp, di_blocks) != idesc.id_entryno) { if (!(sujrecovery && preen)) { pwarn("INCORRECT BLOCK COUNT I=%lu (%ju should be %ju)", (u_long)idesc.id_number, (uintmax_t)DIP(dp, di_blocks), (uintmax_t)idesc.id_entryno); if (preen) printf(" (CORRECTED)\n"); else if (reply("CORRECT") == 0) return; } if (bkgrdflag == 0) { DIP_SET(dp, di_blocks, idesc.id_entryno); inodirty(ip); } else { cmd.value = idesc.id_number; cmd.size = idesc.id_entryno - DIP(dp, di_blocks); if (debug) printf("adjblkcnt ino %ju amount %lld\n", (uintmax_t)cmd.value, (long long)cmd.size); if (sysctl(adjblkcnt, MIBSIZE, 0, 0, &cmd, sizeof cmd) == -1) rwerror("ADJUST INODE BLOCK COUNT", cmd.value); } } } void freeinodebuf(void) { struct bufarea *bp; int i; /* * Flush old contents in case they have been updated. */ flush(fswritefd, &inobuf); if (inobuf.b_un.b_buf != NULL) free((char *)inobuf.b_un.b_buf); inobuf.b_un.b_buf = NULL; firstinum = lastinum = 0; /* * Reload the snapshot inodes in case any of them changed. */ for (i = 0; i < snapcnt; i++) { bp = snaplist[i].i_bp; bp->b_errs = blread(fsreadfd, bp->b_un.b_buf, bp->b_bno, bp->b_size); } } /* * Routines to maintain information about directory inodes. * This is built during the first pass and used during the * second and third passes. * * Enter inodes into the cache. */ struct inoinfo * cacheino(union dinode *dp, ino_t inumber) { struct inoinfo *inp; int i, blks; if (getinoinfo(inumber) != NULL) pfatal("cacheino: duplicate entry for ino %jd\n", (intmax_t)inumber); if (howmany(DIP(dp, di_size), sblock.fs_bsize) > UFS_NDADDR) blks = UFS_NDADDR + UFS_NIADDR; else if (DIP(dp, di_size) > 0) blks = howmany(DIP(dp, di_size), sblock.fs_bsize); else blks = 1; inp = (struct inoinfo *) Malloc(sizeof(*inp) + (blks - 1) * sizeof(ufs2_daddr_t)); if (inp == NULL) errx(EEXIT, "cannot increase directory list"); SLIST_INSERT_HEAD(&inphash[inumber % dirhash], inp, i_hash); inp->i_flags = 0; inp->i_parent = inumber == UFS_ROOTINO ? UFS_ROOTINO : (ino_t)0; inp->i_dotdot = (ino_t)0; inp->i_number = inumber; inp->i_isize = DIP(dp, di_size); inp->i_depth = DIP(dp, di_dirdepth); inp->i_numblks = blks; for (i = 0; i < MIN(blks, UFS_NDADDR); i++) inp->i_blks[i] = DIP(dp, di_db[i]); if (blks > UFS_NDADDR) for (i = 0; i < UFS_NIADDR; i++) inp->i_blks[UFS_NDADDR + i] = DIP(dp, di_ib[i]); if (inplast == listmax) { listmax += 100; inpsort = (struct inoinfo **)reallocarray((char *)inpsort, listmax, sizeof(struct inoinfo *)); if (inpsort == NULL) errx(EEXIT, "cannot increase directory list"); } inpsort[inplast++] = inp; return (inp); } /* * Look up an inode cache structure. */ struct inoinfo * getinoinfo(ino_t inumber) { struct inoinfo *inp; SLIST_FOREACH(inp, &inphash[inumber % dirhash], i_hash) { if (inp->i_number != inumber) continue; return (inp); } return (NULL); } /* * Remove an entry from the inode cache and disk-order sorted list. * Return 0 on success and 1 on failure. */ int removecachedino(ino_t inumber) { struct inoinfo *inp, **inpp; char *listtype; listtype = "hash"; SLIST_FOREACH(inp, &inphash[inumber % dirhash], i_hash) { if (inp->i_number != inumber) continue; SLIST_REMOVE(&inphash[inumber % dirhash], inp, inoinfo, i_hash); for (inpp = &inpsort[inplast - 1]; inpp >= inpsort; inpp--) { if (*inpp != inp) continue; *inpp = inpsort[inplast - 1]; inplast--; free(inp); return (0); } listtype = "sort"; break; } pfatal("removecachedino: entry for ino %jd not found on %s list\n", (intmax_t)inumber, listtype); return (1); } /* * Clean up all the inode cache structure. */ void inocleanup(void) { struct inoinfo **inpp; if (inphash == NULL) return; for (inpp = &inpsort[inplast - 1]; inpp >= inpsort; inpp--) free((char *)(*inpp)); free((char *)inphash); inphash = NULL; free((char *)inpsort); inpsort = NULL; } void inodirty(struct inode *ip) { if (sblock.fs_magic == FS_UFS2_MAGIC) ffs_update_dinode_ckhash(&sblock, (struct ufs2_dinode *)ip->i_dp); dirty(ip->i_bp); } void clri(struct inodesc *idesc, const char *type, int flag) { union dinode *dp; struct inode ip; ginode(idesc->id_number, &ip); dp = ip.i_dp; if (flag == 1) { pwarn("%s %s", type, (DIP(dp, di_mode) & IFMT) == IFDIR ? "DIR" : "FILE"); prtinode(&ip); printf("\n"); } if (preen || reply("CLEAR") == 1) { if (preen) printf(" (CLEARED)\n"); n_files--; if (bkgrdflag == 0) { if (idesc->id_type == SNAP) { snapremove(idesc->id_number); idesc->id_type = ADDR; } (void)ckinode(dp, idesc); inoinfo(idesc->id_number)->ino_state = USTATE; clearinode(dp); inodirty(&ip); } else { cmd.value = idesc->id_number; cmd.size = -DIP(dp, di_nlink); if (debug) printf("adjrefcnt ino %ld amt %lld\n", (long)cmd.value, (long long)cmd.size); if (sysctl(adjrefcnt, MIBSIZE, 0, 0, &cmd, sizeof cmd) == -1) rwerror("ADJUST INODE", cmd.value); } } irelse(&ip); } int findname(struct inodesc *idesc) { struct direct *dirp = idesc->id_dirp; if (dirp->d_ino != idesc->id_parent || idesc->id_entryno < 2) { idesc->id_entryno++; return (KEEPON); } memmove(idesc->id_name, dirp->d_name, (size_t)dirp->d_namlen + 1); return (STOP|FOUND); } int findino(struct inodesc *idesc) { struct direct *dirp = idesc->id_dirp; if (dirp->d_ino == 0) return (KEEPON); if (strcmp(dirp->d_name, idesc->id_name) == 0 && dirp->d_ino >= UFS_ROOTINO && dirp->d_ino < maxino) { idesc->id_parent = dirp->d_ino; return (STOP|FOUND); } return (KEEPON); } int clearentry(struct inodesc *idesc) { struct direct *dirp = idesc->id_dirp; if (dirp->d_ino != idesc->id_parent || idesc->id_entryno < 2) { idesc->id_entryno++; return (KEEPON); } dirp->d_ino = 0; return (STOP|FOUND|ALTERED); } void prtinode(struct inode *ip) { char *p; union dinode *dp; struct passwd *pw; time_t t; dp = ip->i_dp; printf(" I=%lu ", (u_long)ip->i_number); if (ip->i_number < UFS_ROOTINO || ip->i_number >= maxino) return; printf(" OWNER="); if ((pw = getpwuid((int)DIP(dp, di_uid))) != NULL) printf("%s ", pw->pw_name); else printf("%u ", (unsigned)DIP(dp, di_uid)); printf("MODE=%o\n", DIP(dp, di_mode)); if (preen) printf("%s: ", cdevname); printf("SIZE=%ju ", (uintmax_t)DIP(dp, di_size)); t = DIP(dp, di_mtime); if ((p = ctime(&t)) != NULL) printf("MTIME=%12.12s %4.4s ", &p[4], &p[20]); } void blkerror(ino_t ino, const char *type, ufs2_daddr_t blk) { pfatal("%jd %s I=%ju", (intmax_t)blk, type, (uintmax_t)ino); printf("\n"); switch (inoinfo(ino)->ino_state) { case FSTATE: case FZLINK: inoinfo(ino)->ino_state = FCLEAR; return; case DSTATE: case DZLINK: inoinfo(ino)->ino_state = DCLEAR; return; case FCLEAR: case DCLEAR: return; default: errx(EEXIT, "BAD STATE %d TO BLKERR", inoinfo(ino)->ino_state); /* NOTREACHED */ } } /* * allocate an unused inode */ ino_t allocino(ino_t request, int type) { ino_t ino; struct inode ip; union dinode *dp; struct bufarea *cgbp; struct cg *cgp; int cg, anyino; anyino = 0; if (request == 0) { request = UFS_ROOTINO; anyino = 1; } else if (inoinfo(request)->ino_state != USTATE) return (0); retry: for (ino = request; ino < maxino; ino++) if (inoinfo(ino)->ino_state == USTATE) break; if (ino >= maxino) return (0); cg = ino_to_cg(&sblock, ino); cgbp = cglookup(cg); cgp = cgbp->b_un.b_cg; if (!check_cgmagic(cg, cgbp)) { if (anyino == 0) return (0); request = (cg + 1) * sblock.fs_ipg; goto retry; } setbit(cg_inosused(cgp), ino % sblock.fs_ipg); cgp->cg_cs.cs_nifree--; switch (type & IFMT) { case IFDIR: inoinfo(ino)->ino_state = DSTATE; cgp->cg_cs.cs_ndir++; break; case IFREG: case IFLNK: inoinfo(ino)->ino_state = FSTATE; break; default: return (0); } cgdirty(cgbp); ginode(ino, &ip); dp = ip.i_dp; memset(dp, 0, ((sblock.fs_magic == FS_UFS1_MAGIC) ? sizeof(struct ufs1_dinode) : sizeof(struct ufs2_dinode))); DIP_SET(dp, di_db[0], allocblk(ino_to_cg(&sblock, ino), (long)1, std_checkblkavail)); if (DIP(dp, di_db[0]) == 0) { inoinfo(ino)->ino_state = USTATE; inodirty(&ip); irelse(&ip); return (0); } DIP_SET(dp, di_mode, type); DIP_SET(dp, di_atime, time(NULL)); DIP_SET(dp, di_ctime, DIP(dp, di_atime)); DIP_SET(dp, di_mtime, DIP(dp, di_ctime)); DIP_SET(dp, di_size, sblock.fs_fsize); DIP_SET(dp, di_blocks, btodb(sblock.fs_fsize)); n_files++; inodirty(&ip); irelse(&ip); inoinfo(ino)->ino_type = IFTODT(type); return (ino); } /* * deallocate an inode */ void freeino(ino_t ino) { struct inodesc idesc; union dinode *dp; struct inode ip; memset(&idesc, 0, sizeof(struct inodesc)); idesc.id_type = ADDR; idesc.id_func = freeblock; idesc.id_number = ino; ginode(ino, &ip); dp = ip.i_dp; (void)ckinode(dp, &idesc); clearinode(dp); inodirty(&ip); irelse(&ip); inoinfo(ino)->ino_state = USTATE; n_files--; } diff --git a/sbin/fsck_ffs/main.c b/sbin/fsck_ffs/main.c index ee39760f4dd2..4189af1ba517 100644 --- a/sbin/fsck_ffs/main.c +++ b/sbin/fsck_ffs/main.c @@ -1,749 +1,748 @@ /*- * SPDX-License-Identifier: BSD-3-Clause * * Copyright (c) 1980, 1986, 1993 * The Regents of the University of California. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #if 0 #ifndef lint static const char copyright[] = "@(#) Copyright (c) 1980, 1986, 1993\n\ The Regents of the University of California. All rights reserved.\n"; #endif /* not lint */ #ifndef lint static char sccsid[] = "@(#)main.c 8.6 (Berkeley) 5/14/95"; #endif /* not lint */ #endif #include #define _WANT_P_OSREL #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include -#include #include #include #include #include #include #include "fsck.h" static int restarts; static char snapname[BUFSIZ]; /* when doing snapshots, the name of the file */ static void usage(void) __dead2; static intmax_t argtoimax(int flag, const char *req, const char *str, int base); static int checkfilesys(char *filesys); static int setup_bkgrdchk(struct statfs *mntp, int sbrdfailed, char **filesys); int main(int argc, char *argv[]) { int ch; struct rlimit rlimit; struct itimerval itimerval; int fsret; int ret = 0; sync(); skipclean = 1; inoopt = 0; while ((ch = getopt(argc, argv, "b:Bc:CdEfFm:npRrSyZz")) != -1) { switch (ch) { case 'b': skipclean = 0; bflag = argtoimax('b', "number", optarg, 10); printf("Alternate super block location: %jd\n", bflag); break; case 'B': bkgrdflag = 1; break; case 'c': skipclean = 0; cvtlevel = argtoimax('c', "conversion level", optarg, 10); if (cvtlevel < 3) errx(EEXIT, "cannot do level %d conversion", cvtlevel); break; case 'd': debug++; break; case 'E': Eflag++; break; case 'f': skipclean = 0; break; case 'F': bkgrdcheck = 1; break; case 'm': lfmode = argtoimax('m', "mode", optarg, 8); if (lfmode &~ 07777) errx(EEXIT, "bad mode to -m: %o", lfmode); printf("** lost+found creation mode %o\n", lfmode); break; case 'n': nflag++; yflag = 0; break; case 'p': preen++; /*FALLTHROUGH*/ case 'C': ckclean++; break; case 'R': wantrestart = 1; break; case 'r': inoopt++; break; case 'S': surrender = 1; break; case 'y': yflag++; nflag = 0; break; case 'Z': Zflag++; break; case 'z': zflag++; break; default: usage(); } } argc -= optind; argv += optind; if (!argc) usage(); if (bkgrdflag && cvtlevel > 0) { pfatal("CANNOT CONVERT A SNAPSHOT\n"); exit(EEXIT); } if (signal(SIGINT, SIG_IGN) != SIG_IGN) (void)signal(SIGINT, catch); if (ckclean) (void)signal(SIGQUIT, catchquit); signal(SIGINFO, infohandler); if (bkgrdflag) { signal(SIGALRM, alarmhandler); itimerval.it_interval.tv_sec = 5; itimerval.it_interval.tv_usec = 0; itimerval.it_value.tv_sec = 5; itimerval.it_value.tv_usec = 0; setitimer(ITIMER_REAL, &itimerval, NULL); } /* * Push up our allowed memory limit so we can cope * with huge file systems. */ if (getrlimit(RLIMIT_DATA, &rlimit) == 0) { rlimit.rlim_cur = rlimit.rlim_max; (void)setrlimit(RLIMIT_DATA, &rlimit); } while (argc > 0) { if ((fsret = checkfilesys(*argv)) == ERESTART) continue; ret |= fsret; argc--; argv++; } if (returntosingle) ret = 2; exit(ret); } static intmax_t argtoimax(int flag, const char *req, const char *str, int base) { char *cp; intmax_t ret; ret = strtoimax(str, &cp, base); if (cp == str || *cp) errx(EEXIT, "-%c flag requires a %s", flag, req); return (ret); } /* * Check the specified file system. */ /* ARGSUSED */ static int checkfilesys(char *filesys) { ufs2_daddr_t n_ffree, n_bfree; struct dups *dp; struct statfs *mntp; intmax_t blks, files; size_t size; int sbreadfailed, ofsmodified; fsutilinit(); fsckinit(); cdevname = filesys; if (debug && ckclean) pwarn("starting\n"); /* * Make best effort to get the disk name. Check first to see * if it is listed among the mounted file systems. Failing that * check to see if it is listed in /etc/fstab. */ mntp = getmntpoint(filesys); if (mntp != NULL) filesys = mntp->f_mntfromname; else filesys = blockcheck(filesys); /* * If -F flag specified, check to see whether a background check * is possible and needed. If possible and needed, exit with * status zero. Otherwise exit with status non-zero. A non-zero * exit status will cause a foreground check to be run. */ sblock_init(); sbreadfailed = 0; if (openfilesys(filesys) == 0 || readsb() == 0) sbreadfailed = 1; if (bkgrdcheck) { if (sbreadfailed) exit(3); /* Cannot read superblock */ if ((sblock.fs_flags & FS_NEEDSFSCK) == FS_NEEDSFSCK) exit(4); /* Earlier background failed */ if ((sblock.fs_flags & FS_SUJ) == FS_SUJ) { maxino = sblock.fs_ncg * sblock.fs_ipg; maxfsblock = sblock.fs_size; bufinit(); preen = 1; if (suj_check(filesys) == 0) exit(4); /* Journal good, run it now */ } if ((sblock.fs_flags & FS_DOSOFTDEP) == 0) exit(5); /* Not running soft updates */ size = MIBSIZE; if (sysctlnametomib("vfs.ffs.adjrefcnt", adjrefcnt, &size) < 0) exit(6); /* Lacks kernel support */ if ((mntp == NULL && sblock.fs_clean == 1) || (mntp != NULL && (sblock.fs_flags & FS_UNCLEAN) == 0)) exit(7); /* Filesystem clean, report it now */ exit(0); } if (ckclean && skipclean) { /* * If file system is gjournaled, check it here. */ if (sbreadfailed) exit(3); /* Cannot read superblock */ if (bkgrdflag == 0 && (nflag || (fswritefd = open(filesys, O_WRONLY)) < 0)) { fswritefd = -1; if (preen) pfatal("NO WRITE ACCESS"); printf(" (NO WRITE)"); } if ((sblock.fs_flags & FS_GJOURNAL) != 0) { if (sblock.fs_clean == 1) { pwarn("FILE SYSTEM CLEAN; SKIPPING CHECKS\n"); exit(0); } if ((sblock.fs_flags & (FS_UNCLEAN | FS_NEEDSFSCK)) == 0) { bufinit(); gjournal_check(filesys); if (chkdoreload(mntp, pwarn) == 0) exit(0); exit(4); } else { pfatal("FULL FSCK NEEDED, CANNOT RUN FAST " "FSCK\n"); } } close(fswritefd); fswritefd = -1; } if (bkgrdflag) { switch (setup_bkgrdchk(mntp, sbreadfailed, &filesys)) { case -1: /* filesystem clean */ goto clean; case 0: /* cannot do background, give up */ exit(EEXIT); case 1: /* doing background check, preen rules apply */ preen = 1; break; } } switch (setup(filesys)) { case 0: if (preen) pfatal("CAN'T CHECK FILE SYSTEM."); return (EEXIT); case -1: clean: pwarn("clean, %ld free ", (long)(sblock.fs_cstotal.cs_nffree + sblock.fs_frag * sblock.fs_cstotal.cs_nbfree)); printf("(%jd frags, %jd blocks, %.1f%% fragmentation)\n", (intmax_t)sblock.fs_cstotal.cs_nffree, (intmax_t)sblock.fs_cstotal.cs_nbfree, sblock.fs_cstotal.cs_nffree * 100.0 / sblock.fs_dsize); return (0); } /* * Determine if we can and should do journal recovery. */ if (bkgrdflag == 0 && (sblock.fs_flags & FS_SUJ) == FS_SUJ) { if ((sblock.fs_flags & FS_NEEDSFSCK) != FS_NEEDSFSCK && skipclean) { sujrecovery = 1; if (suj_check(filesys) == 0) { pwarn("\n**** FILE SYSTEM MARKED CLEAN ****\n"); if (chkdoreload(mntp, pwarn) == 0) exit(0); exit(4); } sujrecovery = 0; pwarn("Skipping journal, " "falling through to full fsck\n"); } if (fswritefd != -1) { /* * Write the superblock so we don't try to recover the * journal on another pass. If this is the only change * to the filesystem, we do not want it to be called * out as modified. */ sblock.fs_mtime = time(NULL); sbdirty(); ofsmodified = fsmodified; flush(fswritefd, &sblk); fsmodified = ofsmodified; } } /* * If the filesystem was run on an old kernel that did not * support check hashes, clear the check-hash flags so that * we do not try to verify them. */ if ((sblock.fs_flags & FS_METACKHASH) == 0) sblock.fs_metackhash = 0; /* * If we are running on a kernel that can provide check hashes * that are not yet enabled for the filesystem and we are * running manually without the -y flag, offer to add any * supported check hashes that are not already enabled. */ ckhashadd = 0; if (preen == 0 && yflag == 0 && sblock.fs_magic != FS_UFS1_MAGIC && fswritefd != -1 && getosreldate() >= P_OSREL_CK_CYLGRP) { if ((sblock.fs_metackhash & CK_CYLGRP) == 0 && reply("ADD CYLINDER GROUP CHECK-HASH PROTECTION") != 0) { ckhashadd |= CK_CYLGRP; sblock.fs_metackhash |= CK_CYLGRP; } if ((sblock.fs_metackhash & CK_SUPERBLOCK) == 0 && getosreldate() >= P_OSREL_CK_SUPERBLOCK && reply("ADD SUPERBLOCK CHECK-HASH PROTECTION") != 0) { ckhashadd |= CK_SUPERBLOCK; sblock.fs_metackhash |= CK_SUPERBLOCK; } if ((sblock.fs_metackhash & CK_INODE) == 0 && getosreldate() >= P_OSREL_CK_INODE && reply("ADD INODE CHECK-HASH PROTECTION") != 0) { ckhashadd |= CK_INODE; sblock.fs_metackhash |= CK_INODE; } #ifdef notyet if ((sblock.fs_metackhash & CK_INDIR) == 0 && getosreldate() >= P_OSREL_CK_INDIR && reply("ADD INDIRECT BLOCK CHECK-HASH PROTECTION") != 0) { ckhashadd |= CK_INDIR; sblock.fs_metackhash |= CK_INDIR; } if ((sblock.fs_metackhash & CK_DIR) == 0 && getosreldate() >= P_OSREL_CK_DIR && reply("ADD DIRECTORY CHECK-HASH PROTECTION") != 0) { ckhashadd |= CK_DIR; sblock.fs_metackhash |= CK_DIR; } #endif /* notyet */ if (ckhashadd != 0) { sblock.fs_flags |= FS_METACKHASH; sbdirty(); } } /* * Cleared if any questions answered no. Used to decide if * the superblock should be marked clean. */ resolved = 1; /* * 1: scan inodes tallying blocks used */ if (preen == 0 || debug) { printf("** Last Mounted on %s\n", sblock.fs_fsmnt); if (mntp != NULL && mntp->f_flags & MNT_ROOTFS) printf("** Root file system\n"); printf("** Phase 1 - Check Blocks and Sizes\n"); } clock_gettime(CLOCK_REALTIME_PRECISE, &startprog); pass1(); IOstats("Pass1"); /* * 1b: locate first references to duplicates, if any */ if (duplist) { if (preen || usedsoftdep) pfatal("INTERNAL ERROR: DUPS WITH %s%s%s", preen ? "-p" : "", (preen && usedsoftdep) ? " AND " : "", usedsoftdep ? "SOFTUPDATES" : ""); if (preen == 0 || debug) printf("** Phase 1b - Rescan For More DUPS\n"); pass1b(); IOstats("Pass1b"); } /* * 2: traverse directories from root to mark all connected directories */ if (preen == 0 || debug) printf("** Phase 2 - Check Pathnames\n"); pass2(); IOstats("Pass2"); /* * 3: scan inodes looking for disconnected directories */ if (preen == 0 || debug) printf("** Phase 3 - Check Connectivity\n"); pass3(); IOstats("Pass3"); /* * 4: scan inodes looking for disconnected files; check reference counts */ if (preen == 0 || debug) printf("** Phase 4 - Check Reference Counts\n"); pass4(); IOstats("Pass4"); /* * 5: check and repair resource counts in cylinder groups */ if (preen == 0 || debug) printf("** Phase 5 - Check Cyl groups\n"); snapflush(std_checkblkavail); if (cgheader_corrupt) { printf("PHASE 5 SKIPPED DUE TO CORRUPT CYLINDER GROUP " "HEADER(S)\n\n"); } else { pass5(); IOstats("Pass5"); } /* * print out summary statistics */ n_ffree = sblock.fs_cstotal.cs_nffree; n_bfree = sblock.fs_cstotal.cs_nbfree; files = maxino - UFS_ROOTINO - sblock.fs_cstotal.cs_nifree - n_files; blks = n_blks + sblock.fs_ncg * (cgdmin(&sblock, 0) - cgsblock(&sblock, 0)); blks += cgsblock(&sblock, 0) - cgbase(&sblock, 0); blks += howmany(sblock.fs_cssize, sblock.fs_fsize); blks = maxfsblock - (n_ffree + sblock.fs_frag * n_bfree) - blks; if (bkgrdflag && (files > 0 || blks > 0)) { countdirs = sblock.fs_cstotal.cs_ndir - countdirs; pwarn("Reclaimed: %ld directories, %jd files, %jd fragments\n", countdirs, files - countdirs, blks); } pwarn("%ld files, %jd used, %ju free ", (long)n_files, (intmax_t)n_blks, (uintmax_t)n_ffree + sblock.fs_frag * n_bfree); printf("(%ju frags, %ju blocks, %.1f%% fragmentation)\n", (uintmax_t)n_ffree, (uintmax_t)n_bfree, n_ffree * 100.0 / sblock.fs_dsize); if (debug) { if (files < 0) printf("%jd inodes missing\n", -files); if (blks < 0) printf("%jd blocks missing\n", -blks); if (duplist != NULL) { printf("The following duplicate blocks remain:"); for (dp = duplist; dp; dp = dp->next) printf(" %jd,", (intmax_t)dp->dup); printf("\n"); } } duplist = (struct dups *)0; muldup = (struct dups *)0; inocleanup(); if (fsmodified) { sblock.fs_time = time(NULL); sbdirty(); } if (cvtlevel && (sblk.b_flags & B_DIRTY) != 0) { /* * Write out the duplicate super blocks */ if (sbput(fswritefd, &sblock, sblock.fs_ncg) == 0) fsmodified = 1; } if (rerun) resolved = 0; /* * Check to see if the file system is mounted read-write. */ if (bkgrdflag == 0 && mntp != NULL && (mntp->f_flags & MNT_RDONLY) == 0) resolved = 0; ckfini(resolved); if (fsmodified && !preen) printf("\n***** FILE SYSTEM WAS MODIFIED *****\n"); if (rerun) { if (wantrestart && (restarts++ < 10) && (preen || reply("RESTART"))) return (ERESTART); printf("\n***** PLEASE RERUN FSCK *****\n"); } if (chkdoreload(mntp, pwarn) != 0) { if (!fsmodified) return (0); if (!preen) printf("\n***** REBOOT NOW *****\n"); sync(); return (4); } return (rerun ? ERERUN : 0); } /* * If we are to do a background check: * Get the mount point information of the file system * If already clean, return -1 * Check that kernel supports background fsck * Find or create the snapshot directory * Create the snapshot file * Open snapshot * If anything fails print reason and return 0 which exits */ static int setup_bkgrdchk(struct statfs *mntp, int sbreadfailed, char **filesys) { struct stat snapdir; struct group *grp; struct iovec *iov; char errmsg[255]; int iovlen; size_t size; /* Get the mount point information of the file system */ if (mntp == NULL) { pwarn("NOT MOUNTED, CANNOT RUN IN BACKGROUND\n"); return (0); } if ((mntp->f_flags & MNT_RDONLY) != 0) { pwarn("MOUNTED READ-ONLY, CANNOT RUN IN BACKGROUND\n"); return (0); } if ((mntp->f_flags & MNT_SOFTDEP) == 0) { pwarn("NOT USING SOFT UPDATES, CANNOT RUN IN BACKGROUND\n"); return (0); } if (sbreadfailed) { pwarn("SUPERBLOCK READ FAILED, CANNOT RUN IN BACKGROUND\n"); return (0); } if ((sblock.fs_flags & FS_NEEDSFSCK) != 0) { pwarn("FULL FSCK NEEDED, CANNOT RUN IN BACKGROUND\n"); return (0); } if (skipclean && ckclean && (sblock.fs_flags & (FS_UNCLEAN|FS_NEEDSFSCK)) == 0) { /* * file system is clean; * skip snapshot and report it clean */ pwarn("FILE SYSTEM CLEAN; SKIPPING CHECKS\n"); return (-1); } /* Check that kernel supports background fsck */ size = MIBSIZE; if (sysctlnametomib("vfs.ffs.adjrefcnt", adjrefcnt, &size) < 0|| sysctlnametomib("vfs.ffs.adjblkcnt", adjblkcnt, &size) < 0|| sysctlnametomib("vfs.ffs.setsize", setsize, &size) < 0 || sysctlnametomib("vfs.ffs.freefiles", freefiles, &size) < 0|| sysctlnametomib("vfs.ffs.freedirs", freedirs, &size) < 0 || sysctlnametomib("vfs.ffs.freeblks", freeblks, &size) < 0) { pwarn("KERNEL LACKS BACKGROUND FSCK SUPPORT\n"); return (0); } /* * When kernel lacks runtime bgfsck superblock summary * adjustment functionality, it does not mean we can not * continue, as old kernels will recompute the summary at * mount time. However, it will be an unexpected softupdates * inconsistency if it turns out that the summary is still * incorrect. Set a flag so subsequent operation can know this. */ bkgrdsumadj = 1; if (sysctlnametomib("vfs.ffs.adjndir", adjndir, &size) < 0 || sysctlnametomib("vfs.ffs.adjnbfree", adjnbfree, &size) < 0 || sysctlnametomib("vfs.ffs.adjnifree", adjnifree, &size) < 0 || sysctlnametomib("vfs.ffs.adjnffree", adjnffree, &size) < 0 || sysctlnametomib("vfs.ffs.adjnumclusters", adjnumclusters, &size) < 0) { bkgrdsumadj = 0; pwarn("KERNEL LACKS RUNTIME SUPERBLOCK SUMMARY ADJUSTMENT " "SUPPORT\n"); } /* Find or create the snapshot directory */ snprintf(snapname, sizeof snapname, "%s/.snap", mntp->f_mntonname); if (stat(snapname, &snapdir) < 0) { if (errno != ENOENT) { pwarn("CANNOT FIND SNAPSHOT DIRECTORY %s: %s, CANNOT " "RUN IN BACKGROUND\n", snapname, strerror(errno)); return (0); } if ((grp = getgrnam("operator")) == NULL || mkdir(snapname, 0770) < 0 || chown(snapname, -1, grp->gr_gid) < 0 || chmod(snapname, 0770) < 0) { pwarn("CANNOT CREATE SNAPSHOT DIRECTORY %s: %s, " "CANNOT RUN IN BACKGROUND\n", snapname, strerror(errno)); return (0); } } else if (!S_ISDIR(snapdir.st_mode)) { pwarn("%s IS NOT A DIRECTORY, CANNOT RUN IN BACKGROUND\n", snapname); return (0); } /* Create the snapshot file */ iov = NULL; iovlen = 0; errmsg[0] = '\0'; snprintf(snapname, sizeof snapname, "%s/.snap/fsck_snapshot", mntp->f_mntonname); build_iovec(&iov, &iovlen, "fstype", "ffs", 4); build_iovec(&iov, &iovlen, "from", snapname, (size_t)-1); build_iovec(&iov, &iovlen, "fspath", mntp->f_mntonname, (size_t)-1); build_iovec(&iov, &iovlen, "errmsg", errmsg, sizeof(errmsg)); build_iovec(&iov, &iovlen, "update", NULL, 0); build_iovec(&iov, &iovlen, "snapshot", NULL, 0); /* Create snapshot, removing old snapshot if it exists */ while (nmount(iov, iovlen, mntp->f_flags) < 0) { if (errno == EEXIST && unlink(snapname) == 0) continue; pwarn("CANNOT CREATE SNAPSHOT %s: %s %s\n", snapname, strerror(errno), errmsg); return (0); } /* Open snapshot */ if (openfilesys(snapname) == 0) { unlink(snapname); pwarn("CANNOT OPEN SNAPSHOT %s: %s, CANNOT RUN IN " "BACKGROUND\n", snapname, strerror(errno)); return (0); } /* Immediately unlink snapshot so that it will be deleted when closed */ unlink(snapname); free(sblock.fs_csp); free(sblock.fs_si); if (readsb() == 0) { pwarn("CANNOT READ SNAPSHOT SUPERBLOCK\n"); return (0); } *filesys = snapname; cmd.version = FFS_CMD_VERSION; cmd.handle = fsreadfd; return (1); } static void usage(void) { (void) fprintf(stderr, "usage: %s [-BCdEFfnpRrSyZ] [-b block] [-c level] [-m mode] filesystem ...\n", getprogname()); exit(1); } void infohandler(int sig __unused) { got_siginfo = 1; } void alarmhandler(int sig __unused) { got_sigalarm = 1; } diff --git a/sbin/fsck_ffs/pass5.c b/sbin/fsck_ffs/pass5.c index 58de6791903f..8980ba60e03a 100644 --- a/sbin/fsck_ffs/pass5.c +++ b/sbin/fsck_ffs/pass5.c @@ -1,648 +1,647 @@ /*- * SPDX-License-Identifier: BSD-3-Clause * * Copyright (c) 1980, 1986, 1993 * The Regents of the University of California. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #if 0 #ifndef lint static const char sccsid[] = "@(#)pass5.c 8.9 (Berkeley) 4/28/95"; #endif /* not lint */ #endif #include #include #include #include #include #include #include #include #include -#include #include "fsck.h" static void check_maps(u_char *, u_char *, int, ufs2_daddr_t, const char *, int *, int, int, int); static void clear_blocks(ufs2_daddr_t start, ufs2_daddr_t end); void pass5(void) { int c, i, j, blk, frags, basesize, mapsize; int inomapsize, blkmapsize; struct fs *fs = &sblock; ufs2_daddr_t d, dbase, dmax, start; int rewritecg = 0; ino_t inum; struct csum *cs; struct csum_total cstotal; struct inodesc idesc[3]; char buf[MAXBSIZE]; struct cg *cg, *newcg = (struct cg *)buf; struct bufarea *cgbp; inoinfo(UFS_WINO)->ino_state = USTATE; memset(newcg, 0, (size_t)fs->fs_cgsize); newcg->cg_niblk = fs->fs_ipg; /* check to see if we are to add a cylinder group check hash */ if ((ckhashadd & CK_CYLGRP) != 0) rewritecg = 1; if (cvtlevel >= 3) { if (fs->fs_maxcontig < 2 && fs->fs_contigsumsize > 0) { if (preen) pwarn("DELETING CLUSTERING MAPS\n"); if (preen || reply("DELETE CLUSTERING MAPS")) { fs->fs_contigsumsize = 0; rewritecg = 1; sbdirty(); } } if (fs->fs_maxcontig > 1) { const char *doit = NULL; if (fs->fs_contigsumsize < 1) { doit = "CREAT"; } else if (fs->fs_contigsumsize < fs->fs_maxcontig && fs->fs_contigsumsize < FS_MAXCONTIG) { doit = "EXPAND"; } if (doit) { i = fs->fs_contigsumsize; fs->fs_contigsumsize = MIN(fs->fs_maxcontig, FS_MAXCONTIG); if (CGSIZE(fs) > (u_int)fs->fs_bsize) { pwarn("CANNOT %s CLUSTER MAPS\n", doit); fs->fs_contigsumsize = i; } else if (preen || reply("CREATE CLUSTER MAPS")) { if (preen) pwarn("%sING CLUSTER MAPS\n", doit); fs->fs_cgsize = fragroundup(fs, CGSIZE(fs)); rewritecg = 1; sbdirty(); } } } } basesize = sizeof(*newcg); if (sblock.fs_magic == FS_UFS2_MAGIC) { newcg->cg_iusedoff = basesize; } else { /* * We reserve the space for the old rotation summary * tables for the benefit of old kernels, but do not * maintain them in modern kernels. In time, they can * go away. */ newcg->cg_old_btotoff = basesize; newcg->cg_old_boff = newcg->cg_old_btotoff + fs->fs_old_cpg * sizeof(int32_t); newcg->cg_iusedoff = newcg->cg_old_boff + fs->fs_old_cpg * fs->fs_old_nrpos * sizeof(u_int16_t); memset(&newcg[1], 0, newcg->cg_iusedoff - basesize); } inomapsize = howmany(fs->fs_ipg, CHAR_BIT); newcg->cg_freeoff = newcg->cg_iusedoff + inomapsize; blkmapsize = howmany(fs->fs_fpg, CHAR_BIT); newcg->cg_nextfreeoff = newcg->cg_freeoff + blkmapsize; if (fs->fs_contigsumsize > 0) { newcg->cg_clustersumoff = newcg->cg_nextfreeoff - sizeof(u_int32_t); newcg->cg_clustersumoff = roundup(newcg->cg_clustersumoff, sizeof(u_int32_t)); newcg->cg_clusteroff = newcg->cg_clustersumoff + (fs->fs_contigsumsize + 1) * sizeof(u_int32_t); newcg->cg_nextfreeoff = newcg->cg_clusteroff + howmany(fragstoblks(fs, fs->fs_fpg), CHAR_BIT); } newcg->cg_magic = CG_MAGIC; mapsize = newcg->cg_nextfreeoff - newcg->cg_iusedoff; memset(&idesc[0], 0, sizeof idesc); for (i = 0; i < 3; i++) idesc[i].id_type = ADDR; memset(&cstotal, 0, sizeof(struct csum_total)); dmax = blknum(fs, fs->fs_size + fs->fs_frag - 1); for (d = fs->fs_size; d < dmax; d++) setbmap(d); for (c = 0; c < fs->fs_ncg; c++) { if (got_siginfo) { printf("%s: phase 5: cyl group %d of %d (%d%%)\n", cdevname, c, sblock.fs_ncg, c * 100 / sblock.fs_ncg); got_siginfo = 0; } if (got_sigalarm) { setproctitle("%s p5 %d%%", cdevname, c * 100 / sblock.fs_ncg); got_sigalarm = 0; } cgbp = cglookup(c); cg = cgbp->b_un.b_cg; if (!cg_chkmagic(cg)) pfatal("CG %d: BAD MAGIC NUMBER\n", c); /* * If we have a cylinder group check hash and are not adding * it for the first time, verify that it is good. */ if ((fs->fs_metackhash & CK_CYLGRP) != 0 && (ckhashadd & CK_CYLGRP) == 0) { uint32_t ckhash, thishash; ckhash = cg->cg_ckhash; cg->cg_ckhash = 0; thishash = calculate_crc32c(~0L, cg, fs->fs_cgsize); if (ckhash == thishash) { cg->cg_ckhash = ckhash; } else { pwarn("CG %d: BAD CHECK-HASH %#x vs %#x\n", c, ckhash, thishash); cg->cg_ckhash = thishash; cgdirty(cgbp); } } newcg->cg_time = cg->cg_time; newcg->cg_old_time = cg->cg_old_time; newcg->cg_unrefs = cg->cg_unrefs; newcg->cg_ckhash = cg->cg_ckhash; newcg->cg_cgx = c; dbase = cgbase(fs, c); dmax = dbase + fs->fs_fpg; if (dmax > fs->fs_size) dmax = fs->fs_size; newcg->cg_ndblk = dmax - dbase; if (fs->fs_magic == FS_UFS1_MAGIC) { if (c == fs->fs_ncg - 1) newcg->cg_old_ncyl = howmany(newcg->cg_ndblk, fs->fs_fpg / fs->fs_old_cpg); else newcg->cg_old_ncyl = fs->fs_old_cpg; newcg->cg_old_niblk = fs->fs_ipg; newcg->cg_niblk = 0; } if (fs->fs_contigsumsize > 0) newcg->cg_nclusterblks = newcg->cg_ndblk / fs->fs_frag; newcg->cg_cs.cs_ndir = 0; newcg->cg_cs.cs_nffree = 0; newcg->cg_cs.cs_nbfree = 0; newcg->cg_cs.cs_nifree = fs->fs_ipg; if (cg->cg_rotor >= 0 && cg->cg_rotor < newcg->cg_ndblk) newcg->cg_rotor = cg->cg_rotor; else newcg->cg_rotor = 0; if (cg->cg_frotor >= 0 && cg->cg_frotor < newcg->cg_ndblk) newcg->cg_frotor = cg->cg_frotor; else newcg->cg_frotor = 0; if (cg->cg_irotor >= 0 && cg->cg_irotor < fs->fs_ipg) newcg->cg_irotor = cg->cg_irotor; else newcg->cg_irotor = 0; if (fs->fs_magic == FS_UFS1_MAGIC) { newcg->cg_initediblk = 0; } else { if ((unsigned)cg->cg_initediblk > fs->fs_ipg) newcg->cg_initediblk = fs->fs_ipg; else newcg->cg_initediblk = cg->cg_initediblk; } memset(&newcg->cg_frsum[0], 0, sizeof newcg->cg_frsum); memset(cg_inosused(newcg), 0, (size_t)(mapsize)); inum = fs->fs_ipg * c; for (i = 0; i < inostathead[c].il_numalloced; inum++, i++) { switch (inoinfo(inum)->ino_state) { case USTATE: break; case DSTATE: case DCLEAR: case DFOUND: case DZLINK: newcg->cg_cs.cs_ndir++; /* FALLTHROUGH */ case FSTATE: case FCLEAR: case FZLINK: newcg->cg_cs.cs_nifree--; setbit(cg_inosused(newcg), i); break; default: if (inum < UFS_ROOTINO) break; errx(EEXIT, "BAD STATE %d FOR INODE I=%ju", inoinfo(inum)->ino_state, (uintmax_t)inum); } } if (c == 0) for (i = 0; i < (int)UFS_ROOTINO; i++) { setbit(cg_inosused(newcg), i); newcg->cg_cs.cs_nifree--; } start = -1; for (i = 0, d = dbase; d < dmax; d += fs->fs_frag, i += fs->fs_frag) { frags = 0; for (j = 0; j < fs->fs_frag; j++) { if (testbmap(d + j)) { if ((Eflag || Zflag) && start != -1) { clear_blocks(start, d + j - 1); start = -1; } continue; } if (start == -1) start = d + j; setbit(cg_blksfree(newcg), i + j); frags++; } if (frags == fs->fs_frag) { newcg->cg_cs.cs_nbfree++; if (fs->fs_contigsumsize > 0) setbit(cg_clustersfree(newcg), i / fs->fs_frag); } else if (frags > 0) { newcg->cg_cs.cs_nffree += frags; blk = blkmap(fs, cg_blksfree(newcg), i); ffs_fragacct(fs, blk, newcg->cg_frsum, 1); } } if ((Eflag || Zflag) && start != -1) clear_blocks(start, d - 1); if (fs->fs_contigsumsize > 0) { int32_t *sump = cg_clustersum(newcg); u_char *mapp = cg_clustersfree(newcg); int map = *mapp++; int bit = 1; int run = 0; for (i = 0; i < newcg->cg_nclusterblks; i++) { if ((map & bit) != 0) { run++; } else if (run != 0) { if (run > fs->fs_contigsumsize) run = fs->fs_contigsumsize; sump[run]++; run = 0; } if ((i & (CHAR_BIT - 1)) != (CHAR_BIT - 1)) { bit <<= 1; } else { map = *mapp++; bit = 1; } } if (run != 0) { if (run > fs->fs_contigsumsize) run = fs->fs_contigsumsize; sump[run]++; } } if (bkgrdflag != 0) { cstotal.cs_nffree += cg->cg_cs.cs_nffree; cstotal.cs_nbfree += cg->cg_cs.cs_nbfree; cstotal.cs_nifree += cg->cg_cs.cs_nifree; cstotal.cs_ndir += cg->cg_cs.cs_ndir; } else { cstotal.cs_nffree += newcg->cg_cs.cs_nffree; cstotal.cs_nbfree += newcg->cg_cs.cs_nbfree; cstotal.cs_nifree += newcg->cg_cs.cs_nifree; cstotal.cs_ndir += newcg->cg_cs.cs_ndir; } cs = &fs->fs_cs(fs, c); if (cursnapshot == 0 && memcmp(&newcg->cg_cs, cs, sizeof *cs) != 0 && dofix(&idesc[0], "FREE BLK COUNT(S) WRONG IN SUPERBLK")) { memmove(cs, &newcg->cg_cs, sizeof *cs); sbdirty(); } if (rewritecg) { memmove(cg, newcg, (size_t)fs->fs_cgsize); cgdirty(cgbp); continue; } if (cursnapshot == 0 && memcmp(newcg, cg, basesize) != 0 && dofix(&idesc[2], "SUMMARY INFORMATION BAD")) { memmove(cg, newcg, (size_t)basesize); cgdirty(cgbp); } if (bkgrdflag != 0 || usedsoftdep || debug) update_maps(cg, newcg, bkgrdflag); if (cursnapshot == 0 && memcmp(cg_inosused(newcg), cg_inosused(cg), mapsize) != 0 && dofix(&idesc[1], "BLK(S) MISSING IN BIT MAPS")) { memmove(cg_inosused(cg), cg_inosused(newcg), (size_t)mapsize); cgdirty(cgbp); } } if (cursnapshot == 0 && memcmp(&cstotal, &fs->fs_cstotal, sizeof cstotal) != 0 && dofix(&idesc[0], "SUMMARY BLK COUNT(S) WRONG IN SUPERBLK")) { if (debug) { printf("cstotal is currently: %jd dirs, %jd blks free, " "%jd frags free, %jd inos free, %jd clusters\n", (intmax_t)fs->fs_cstotal.cs_ndir, (intmax_t)fs->fs_cstotal.cs_nbfree, (intmax_t)fs->fs_cstotal.cs_nffree, (intmax_t)fs->fs_cstotal.cs_nifree, (intmax_t)fs->fs_cstotal.cs_numclusters); printf("cstotal ought to be: %jd dirs, %jd blks free, " "%jd frags free, %jd inos free, %jd clusters\n", (intmax_t)cstotal.cs_ndir, (intmax_t)cstotal.cs_nbfree, (intmax_t)cstotal.cs_nffree, (intmax_t)cstotal.cs_nifree, (intmax_t)cstotal.cs_numclusters); } memmove(&fs->fs_cstotal, &cstotal, sizeof cstotal); fs->fs_ronly = 0; fs->fs_fmod = 0; sbdirty(); } /* * When doing background fsck on a snapshot, figure out whether * the superblock summary is inaccurate and correct it when * necessary. */ if (cursnapshot != 0) { cmd.size = 1; cmd.value = cstotal.cs_ndir - fs->fs_cstotal.cs_ndir; if (cmd.value != 0) { if (debug) printf("adjndir by %+" PRIi64 "\n", cmd.value); if (bkgrdsumadj == 0 || sysctl(adjndir, MIBSIZE, 0, 0, &cmd, sizeof cmd) == -1) rwerror("ADJUST NUMBER OF DIRECTORIES", cmd.value); } cmd.value = cstotal.cs_nbfree - fs->fs_cstotal.cs_nbfree; if (cmd.value != 0) { if (debug) printf("adjnbfree by %+" PRIi64 "\n", cmd.value); if (bkgrdsumadj == 0 || sysctl(adjnbfree, MIBSIZE, 0, 0, &cmd, sizeof cmd) == -1) rwerror("ADJUST NUMBER OF FREE BLOCKS", cmd.value); } cmd.value = cstotal.cs_nifree - fs->fs_cstotal.cs_nifree; if (cmd.value != 0) { if (debug) printf("adjnifree by %+" PRIi64 "\n", cmd.value); if (bkgrdsumadj == 0 || sysctl(adjnifree, MIBSIZE, 0, 0, &cmd, sizeof cmd) == -1) rwerror("ADJUST NUMBER OF FREE INODES", cmd.value); } cmd.value = cstotal.cs_nffree - fs->fs_cstotal.cs_nffree; if (cmd.value != 0) { if (debug) printf("adjnffree by %+" PRIi64 "\n", cmd.value); if (bkgrdsumadj == 0 || sysctl(adjnffree, MIBSIZE, 0, 0, &cmd, sizeof cmd) == -1) rwerror("ADJUST NUMBER OF FREE FRAGS", cmd.value); } cmd.value = cstotal.cs_numclusters - fs->fs_cstotal.cs_numclusters; if (cmd.value != 0) { if (debug) printf("adjnumclusters by %+" PRIi64 "\n", cmd.value); if (bkgrdsumadj == 0 || sysctl(adjnumclusters, MIBSIZE, 0, 0, &cmd, sizeof cmd) == -1) rwerror("ADJUST NUMBER OF FREE CLUSTERS", cmd.value); } } } /* * Compare the original cylinder group inode and block bitmaps with the * updated cylinder group inode and block bitmaps. Free inodes and blocks * that have been added. Complain if any previously freed inodes blocks * are now allocated. */ void update_maps( struct cg *oldcg, /* cylinder group of claimed allocations */ struct cg *newcg, /* cylinder group of determined allocations */ int usesysctl) /* 1 => use sysctl interface to update maps */ { int inomapsize, excessdirs; struct fs *fs = &sblock; inomapsize = howmany(fs->fs_ipg, CHAR_BIT); excessdirs = oldcg->cg_cs.cs_ndir - newcg->cg_cs.cs_ndir; if (excessdirs < 0) { pfatal("LOST %d DIRECTORIES\n", -excessdirs); excessdirs = 0; } if (excessdirs > 0) check_maps(cg_inosused(newcg), cg_inosused(oldcg), inomapsize, oldcg->cg_cgx * (ufs2_daddr_t)fs->fs_ipg, "DIR", freedirs, 0, excessdirs, usesysctl); check_maps(cg_inosused(newcg), cg_inosused(oldcg), inomapsize, oldcg->cg_cgx * (ufs2_daddr_t)fs->fs_ipg, "FILE", freefiles, excessdirs, fs->fs_ipg, usesysctl); check_maps(cg_blksfree(oldcg), cg_blksfree(newcg), howmany(fs->fs_fpg, CHAR_BIT), oldcg->cg_cgx * (ufs2_daddr_t)fs->fs_fpg, "FRAG", freeblks, 0, fs->fs_fpg, usesysctl); } static void check_maps( u_char *map1, /* map of claimed allocations */ u_char *map2, /* map of determined allocations */ int mapsize, /* size of above two maps */ ufs2_daddr_t startvalue, /* resource value for first element in map */ const char *name, /* name of resource found in maps */ int *opcode, /* sysctl opcode to free resource */ int skip, /* number of entries to skip before starting to free */ int limit, /* limit on number of entries to free */ int usesysctl) /* 1 => use sysctl interface to update maps */ { # define BUFSIZE 16 char buf[BUFSIZE]; long i, j, k, l, m, size; ufs2_daddr_t n, astart, aend, ustart, uend; void (*msg)(const char *fmt, ...); if (usesysctl) msg = pfatal; else msg = pwarn; astart = ustart = aend = uend = -1; for (i = 0; i < mapsize; i++) { j = *map1++; k = *map2++; if (j == k) continue; for (m = 0, l = 1; m < CHAR_BIT; m++, l <<= 1) { if ((j & l) == (k & l)) continue; n = startvalue + i * CHAR_BIT + m; if ((j & l) != 0) { if (astart == -1) { astart = aend = n; continue; } if (aend + 1 == n) { aend = n; continue; } if (astart == aend) (*msg)("ALLOCATED %s %" PRId64 " MARKED FREE\n", name, astart); else (*msg)("%s %sS %" PRId64 "-%" PRId64 " MARKED FREE\n", "ALLOCATED", name, astart, aend); astart = aend = n; } else { if (ustart == -1) { ustart = uend = n; continue; } if (uend + 1 == n) { uend = n; continue; } size = uend - ustart + 1; if (size <= skip) { skip -= size; ustart = uend = n; continue; } if (skip > 0) { ustart += skip; size -= skip; skip = 0; } if (size > limit) size = limit; if (debug && size == 1) pwarn("%s %s %" PRId64 " MARKED USED\n", "UNALLOCATED", name, ustart); else if (debug) pwarn("%s %sS %" PRId64 "-%" PRId64 " MARKED USED\n", "UNALLOCATED", name, ustart, ustart + size - 1); if (usesysctl != 0) { cmd.value = ustart; cmd.size = size; if (sysctl(opcode, MIBSIZE, 0, 0, &cmd, sizeof cmd) == -1) { snprintf(buf, BUFSIZE, "FREE %s", name); rwerror(buf, cmd.value); } } limit -= size; if (limit <= 0) return; ustart = uend = n; } } } if (astart != -1) { if (astart == aend) (*msg)("ALLOCATED %s %" PRId64 " MARKED FREE\n", name, astart); else (*msg)("ALLOCATED %sS %" PRId64 "-%" PRId64 " MARKED FREE\n", name, astart, aend); } if (ustart != -1) { size = uend - ustart + 1; if (size <= skip) return; if (skip > 0) { ustart += skip; size -= skip; } if (size > limit) size = limit; if (debug) { if (size == 1) pwarn("UNALLOCATED %s %" PRId64 " MARKED USED\n", name, ustart); else pwarn("UNALLOCATED %sS %" PRId64 "-%" PRId64 " MARKED USED\n", name, ustart, ustart + size - 1); } if (usesysctl != 0) { cmd.value = ustart; cmd.size = size; if (sysctl(opcode, MIBSIZE, 0, 0, &cmd, sizeof cmd) == -1) { snprintf(buf, BUFSIZE, "FREE %s", name); rwerror(buf, cmd.value); } } } } static void clear_blocks(ufs2_daddr_t start, ufs2_daddr_t end) { if (debug) printf("Zero frags %jd to %jd\n", start, end); if (Zflag) blzero(fswritefd, fsbtodb(&sblock, start), lfragtosize(&sblock, end - start + 1)); if (Eflag) blerase(fswritefd, fsbtodb(&sblock, start), lfragtosize(&sblock, end - start + 1)); } diff --git a/sbin/fsck_ffs/setup.c b/sbin/fsck_ffs/setup.c index c9aa19c7eded..b3d58749015e 100644 --- a/sbin/fsck_ffs/setup.c +++ b/sbin/fsck_ffs/setup.c @@ -1,630 +1,629 @@ /*- * SPDX-License-Identifier: BSD-3-Clause * * Copyright (c) 1980, 1986, 1993 * The Regents of the University of California. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #if 0 #ifndef lint static const char sccsid[] = "@(#)setup.c 8.10 (Berkeley) 5/9/95"; #endif /* not lint */ #endif #include #include #include #include #define FSTYPENAMES #include #include #include #include #include #include #include #include #include #include #include -#include #include "fsck.h" struct inohash *inphash; /* hash list of directory inode info */ struct inoinfo **inpsort; /* disk order list of directory inodes */ struct inode snaplist[FSMAXSNAP + 1]; /* list of active snapshots */ int snapcnt; /* number of active snapshots */ char *copybuf; /* buffer to copy snapshot blocks */ static int sbhashfailed; #define POWEROF2(num) (((num) & ((num) - 1)) == 0) static int calcsb(char *dev, int devfd, struct fs *fs); static void saverecovery(int readfd, int writefd); static int chkrecovery(int devfd); static int getlbnblkno(struct inodesc *); static int checksnapinfo(struct inode *); /* * Read in a superblock finding an alternate if necessary. * Return 1 if successful, 0 if unsuccessful, -1 if file system * is already clean (ckclean and preen mode only). */ int setup(char *dev) { long i, bmapsize; struct inode ip; /* * We are expected to have an open file descriptor and a superblock. */ if (fsreadfd < 0 || havesb == 0) { if (debug) { if (fsreadfd < 0) printf("setup: missing fsreadfd\n"); else printf("setup: missing superblock\n"); } return (0); } if (preen == 0) printf("** %s", dev); if (bkgrdflag == 0 && (nflag || (fswritefd = open(dev, O_WRONLY)) < 0)) { fswritefd = -1; if (preen) pfatal("NO WRITE ACCESS"); printf(" (NO WRITE)"); } if (preen == 0) printf("\n"); if (sbhashfailed != 0) { pwarn("SUPERBLOCK CHECK HASH FAILED"); if (fswritefd == -1) pwarn("OPENED READONLY SO CANNOT CORRECT CHECK HASH\n"); else if (preen || reply("CORRECT CHECK HASH") != 0) { if (preen) printf(" (CORRECTED)\n"); sblock.fs_clean = 0; sbdirty(); } } if (skipclean && ckclean && sblock.fs_clean) { pwarn("FILE SYSTEM CLEAN; SKIPPING CHECKS\n"); return (-1); } maxfsblock = sblock.fs_size; maxino = sblock.fs_ncg * sblock.fs_ipg; /* * Check and potentially fix certain fields in the super block. */ if (sblock.fs_optim != FS_OPTTIME && sblock.fs_optim != FS_OPTSPACE) { pfatal("UNDEFINED OPTIMIZATION IN SUPERBLOCK"); if (reply("SET TO DEFAULT") == 1) { sblock.fs_optim = FS_OPTTIME; sbdirty(); } } if ((sblock.fs_minfree < 0 || sblock.fs_minfree > 99)) { pfatal("IMPOSSIBLE MINFREE=%d IN SUPERBLOCK", sblock.fs_minfree); if (reply("SET TO DEFAULT") == 1) { sblock.fs_minfree = 10; sbdirty(); } } if (sblock.fs_magic == FS_UFS1_MAGIC && sblock.fs_old_inodefmt < FS_44INODEFMT) { pwarn("Format of file system is too old.\n"); pwarn("Must update to modern format using a version of fsck\n"); pfatal("from before 2002 with the command ``fsck -c 2''\n"); exit(EEXIT); } if (preen == 0 && yflag == 0 && sblock.fs_magic == FS_UFS2_MAGIC && fswritefd != -1 && chkrecovery(fsreadfd) == 0 && reply("SAVE DATA TO FIND ALTERNATE SUPERBLOCKS") != 0) saverecovery(fsreadfd, fswritefd); /* * allocate and initialize the necessary maps */ bufinit(); bmapsize = roundup(howmany(maxfsblock, CHAR_BIT), sizeof(short)); blockmap = Calloc((unsigned)bmapsize, sizeof (char)); if (blockmap == NULL) { printf("cannot alloc %u bytes for blockmap\n", (unsigned)bmapsize); goto badsb; } inostathead = Calloc(sblock.fs_ncg, sizeof(struct inostatlist)); if (inostathead == NULL) { printf("cannot alloc %u bytes for inostathead\n", (unsigned)(sizeof(struct inostatlist) * (sblock.fs_ncg))); goto badsb; } numdirs = sblock.fs_cstotal.cs_ndir; dirhash = MAX(numdirs / 2, 1); inplast = 0; listmax = numdirs + 10; inpsort = (struct inoinfo **)Calloc(listmax, sizeof(struct inoinfo *)); inphash = (struct inohash *)Calloc(dirhash, sizeof(struct inohash)); if (inpsort == NULL || inphash == NULL) { printf("cannot alloc %ju bytes for inphash\n", (uintmax_t)numdirs * sizeof(struct inoinfo *)); goto badsb; } if (sblock.fs_flags & FS_DOSOFTDEP) usedsoftdep = 1; else usedsoftdep = 0; /* * Collect any snapshot inodes so that we can allow them to * claim any blocks that we free. The code for doing this is * imported here and into inode.c from sys/ufs/ffs/ffs_snapshot.c. */ for (snapcnt = 0; snapcnt < FSMAXSNAP; snapcnt++) { if (sblock.fs_snapinum[snapcnt] == 0) break; ginode(sblock.fs_snapinum[snapcnt], &ip); if ((DIP(ip.i_dp, di_mode) & IFMT) == IFREG && (DIP(ip.i_dp, di_flags) & SF_SNAPSHOT) != 0 && checksnapinfo(&ip)) { if (debug) printf("Load snapshot %jd\n", (intmax_t)sblock.fs_snapinum[snapcnt]); snaplist[snapcnt] = ip; continue; } printf("Removing non-snapshot inode %ju from snapshot list\n", (uintmax_t)sblock.fs_snapinum[snapcnt]); irelse(&ip); for (i = snapcnt + 1; i < FSMAXSNAP; i++) { if (sblock.fs_snapinum[i] == 0) break; sblock.fs_snapinum[i - 1] = sblock.fs_snapinum[i]; } sblock.fs_snapinum[i - 1] = 0; snapcnt--; sbdirty(); } if (snapcnt > 0 && copybuf == NULL) { - copybuf = Malloc(sblock.fs_bsize); + copybuf = Balloc(sblock.fs_bsize); if (copybuf == NULL) errx(EEXIT, "cannot allocate space for snapshot " "copy buffer"); } return (1); badsb: ckfini(0); return (0); } /* * Check for valid snapshot information. * * Each snapshot has a list of blocks that have been copied. This list * is consulted before checking the snapshot inode. Its purpose is to * speed checking of commonly checked blocks and to avoid recursive * checks of the snapshot inode. In particular, the list must contain * the superblock, the superblock summary information, and all the * cylinder group blocks. The list may contain other commonly checked * pointers such as those of the blocks that contain the snapshot inodes. * The list is sorted into block order to allow binary search lookup. * * The twelve direct direct block pointers of the snapshot are always * copied, so we test for them first before checking the list itself * (i.e., they are not in the list). * * The checksnapinfo() routine needs to ensure that the list contains at * least the super block, its summary information, and the cylinder groups. * Here we check the list first for the superblock, zero or more cylinder * groups up to the location of the superblock summary information, the * summary group information, and any remaining cylinder group maps that * follow it. We skip over any other entries in the list. */ #define CHKBLKINLIST(chkblk) \ /* All UFS_NDADDR blocks are copied */ \ if ((chkblk) >= UFS_NDADDR) { \ /* Skip over blocks that are not of interest */ \ while (*blkp < (chkblk) && blkp < lastblkp) \ blkp++; \ /* Fail if end of list and not all blocks found */ \ if (blkp >= lastblkp) { \ pwarn("UFS%d snapshot inode %jd failed: " \ "improper block list length (%jd)\n", \ sblock.fs_magic == FS_UFS1_MAGIC ? 1 : 2, \ (intmax_t)snapip->i_number, \ (intmax_t)(lastblkp - &snapblklist[0])); \ status = 0; \ } \ /* Fail if block we seek is missing */ \ else if (*blkp++ != (chkblk)) { \ pwarn("UFS%d snapshot inode %jd failed: " \ "block list (%jd) != %s (%jd)\n", \ sblock.fs_magic == FS_UFS1_MAGIC ? 1 : 2, \ (intmax_t)snapip->i_number, \ (intmax_t)blkp[-1], #chkblk, \ (intmax_t)chkblk); \ status = 0; \ } \ } static int checksnapinfo(struct inode *snapip) { struct fs *fs; struct bufarea *bp; struct inodesc idesc; daddr_t *snapblklist, *blkp, *lastblkp, csblkno; int cg, loc, len, status; ufs_lbn_t lbn; size_t size; fs = &sblock; memset(&idesc, 0, sizeof(struct inodesc)); idesc.id_type = ADDR; idesc.id_func = getlbnblkno; idesc.id_number = snapip->i_number; lbn = howmany(fs->fs_size, fs->fs_frag); idesc.id_parent = lbn; /* sought after blkno */ if ((ckinode(snapip->i_dp, &idesc) & FOUND) == 0) return (0); size = fragroundup(fs, DIP(snapip->i_dp, di_size) - lblktosize(fs, lbn)); bp = getdatablk(idesc.id_parent, size, BT_DATA); if (bp->b_errs != 0) return (0); snapblklist = (daddr_t *)bp->b_un.b_buf; /* * snapblklist[0] is the size of the list * snapblklist[1] is the first element of the list * * We need to be careful to bound the size of the list and verify * that we have not run off the end of it if it or its size has * been corrupted. */ blkp = &snapblklist[1]; lastblkp = &snapblklist[MAX(0, MIN(snapblklist[0] + 1, size / sizeof(daddr_t)))]; status = 1; /* Check that the superblock is listed. */ CHKBLKINLIST(lblkno(fs, fs->fs_sblockloc)); if (status == 0) goto out; /* * Calculate where the summary information is located. * Usually it is in the first cylinder group, but growfs * may move it to the first cylinder group that it adds. * * Check all cylinder groups up to the summary information. */ csblkno = fragstoblks(fs, fs->fs_csaddr); for (cg = 0; cg < fs->fs_ncg; cg++) { if (fragstoblks(fs, cgtod(fs, cg)) > csblkno) break; CHKBLKINLIST(fragstoblks(fs, cgtod(fs, cg))); if (status == 0) goto out; } /* Check the summary information block(s). */ len = howmany(fs->fs_cssize, fs->fs_bsize); for (loc = 0; loc < len; loc++) { CHKBLKINLIST(csblkno + loc); if (status == 0) goto out; } /* Check the remaining cylinder groups. */ for (; cg < fs->fs_ncg; cg++) { CHKBLKINLIST(fragstoblks(fs, cgtod(fs, cg))); if (status == 0) goto out; } out: brelse(bp); return (status); } /* * Return the block number associated with a specified inode lbn. * Requested lbn is in id_parent. If found, block is returned in * id_parent. */ static int getlbnblkno(struct inodesc *idesc) { if (idesc->id_lbn < idesc->id_parent) return (KEEPON); idesc->id_parent = idesc->id_blkno; return (STOP | FOUND); } /* * Open a device or file to be checked by fsck. */ int openfilesys(char *dev) { struct stat statb; int saved_fsreadfd; if (stat(dev, &statb) < 0) return (0); if ((statb.st_mode & S_IFMT) != S_IFCHR && (statb.st_mode & S_IFMT) != S_IFBLK) { if (bkgrdflag != 0 && (statb.st_flags & SF_SNAPSHOT) == 0) { pwarn("BACKGROUND FSCK LACKS A SNAPSHOT\n"); return (0); } if (bkgrdflag != 0) { cursnapshot = statb.st_ino; } else { pwarn("%s IS NOT A DISK DEVICE\n", dev); if (preen || reply("CONTINUE") == 0) return (0); } } saved_fsreadfd = fsreadfd; if ((fsreadfd = open(dev, O_RDONLY)) < 0) { fsreadfd = saved_fsreadfd; return (0); } if (saved_fsreadfd != -1) close(saved_fsreadfd); return (1); } /* * Read in the super block and its summary info. */ int readsb(void) { struct fs *fs; sbhashfailed = 0; readcnt[sblk.b_type]++; /* * If bflag is given, then check just that superblock. */ if (bflag) { switch (sbget(fsreadfd, &fs, bflag * dev_bsize, 0)) { case 0: goto goodsb; case EINTEGRITY: printf("Check hash failed for superblock at %jd\n", bflag); return (0); case ENOENT: printf("%jd is not a file system superblock\n", bflag); return (0); case EIO: default: printf("I/O error reading %jd\n", bflag); return (0); } } /* * Check for the standard superblock and use it if good. */ if (sbget(fsreadfd, &fs, UFS_STDSB, UFS_NOMSG) == 0) goto goodsb; /* * Check if the only problem is a check-hash failure. */ skipclean = 0; if (sbget(fsreadfd, &fs, UFS_STDSB, UFS_NOMSG | UFS_NOHASHFAIL) == 0) { sbhashfailed = 1; goto goodsb; } /* * Do an exhaustive search for a usable superblock. */ switch (sbsearch(fsreadfd, &fs, 0)) { case 0: goto goodsb; case ENOENT: printf("SEARCH FOR ALTERNATE SUPER-BLOCK FAILED. " "YOU MUST USE THE\n-b OPTION TO FSCK TO SPECIFY " "THE LOCATION OF AN ALTERNATE\nSUPER-BLOCK TO " "SUPPLY NEEDED INFORMATION; SEE fsck_ffs(8).\n"); return (0); case EIO: default: printf("I/O error reading a usable superblock\n"); return (0); } goodsb: memcpy(&sblock, fs, fs->fs_sbsize); free(fs); /* * Compute block size that the file system is based on, * according to fsbtodb, and adjust superblock block number * so we can tell if this is an alternate later. */ dev_bsize = sblock.fs_fsize / fsbtodb(&sblock, 1); sblk.b_bno = sblock.fs_sblockactualloc / dev_bsize; sblk.b_size = SBLOCKSIZE; /* * If not yet done, update UFS1 superblock with new wider fields. */ if (sblock.fs_magic == FS_UFS1_MAGIC && sblock.fs_maxbsize != sblock.fs_bsize) { sblock.fs_maxbsize = sblock.fs_bsize; sblock.fs_time = sblock.fs_old_time; sblock.fs_size = sblock.fs_old_size; sblock.fs_dsize = sblock.fs_old_dsize; sblock.fs_csaddr = sblock.fs_old_csaddr; sblock.fs_cstotal.cs_ndir = sblock.fs_old_cstotal.cs_ndir; sblock.fs_cstotal.cs_nbfree = sblock.fs_old_cstotal.cs_nbfree; sblock.fs_cstotal.cs_nifree = sblock.fs_old_cstotal.cs_nifree; sblock.fs_cstotal.cs_nffree = sblock.fs_old_cstotal.cs_nffree; } havesb = 1; return (1); } void sblock_init(void) { fsreadfd = -1; fswritefd = -1; fsmodified = 0; lfdir = 0; initbarea(&sblk, BT_SUPERBLK); - sblk.b_un.b_buf = Malloc(SBLOCKSIZE); + sblk.b_un.b_buf = Balloc(SBLOCKSIZE); if (sblk.b_un.b_buf == NULL) errx(EEXIT, "cannot allocate space for superblock"); dev_bsize = secsize = DEV_BSIZE; } /* * Calculate a prototype superblock based on information in the boot area. * When done the cgsblock macro can be calculated and the fs_ncg field * can be used. Do NOT attempt to use other macros without verifying that * their needed information is available! */ static int calcsb(char *dev, int devfd, struct fs *fs) { struct fsrecovery *fsr; char *fsrbuf; u_int secsize; /* * We need fragments-per-group and the partition-size. * * Newfs stores these details at the end of the boot block area * at the start of the filesystem partition. If they have been * overwritten by a boot block, we fail. But usually they are * there and we can use them. */ if (ioctl(devfd, DIOCGSECTORSIZE, &secsize) == -1) return (0); - fsrbuf = Malloc(secsize); + fsrbuf = Balloc(secsize); if (fsrbuf == NULL) errx(EEXIT, "calcsb: cannot allocate recovery buffer"); if (blread(devfd, fsrbuf, (SBLOCK_UFS2 - secsize) / dev_bsize, secsize) != 0) { free(fsrbuf); return (0); } fsr = (struct fsrecovery *)&fsrbuf[secsize - sizeof *fsr]; if (fsr->fsr_magic != FS_UFS2_MAGIC) { free(fsrbuf); return (0); } memset(fs, 0, sizeof(struct fs)); fs->fs_fpg = fsr->fsr_fpg; fs->fs_fsbtodb = fsr->fsr_fsbtodb; fs->fs_sblkno = fsr->fsr_sblkno; fs->fs_magic = fsr->fsr_magic; fs->fs_ncg = fsr->fsr_ncg; free(fsrbuf); return (1); } /* * Check to see if recovery information exists. * Return 1 if it exists or cannot be created. * Return 0 if it does not exist and can be created. */ static int chkrecovery(int devfd) { struct fsrecovery *fsr; char *fsrbuf; u_int secsize, rdsize; /* * Could not determine if backup material exists, so do not * offer to create it. */ fsrbuf = NULL; rdsize = sblock.fs_fsize; if (ioctl(devfd, DIOCGSECTORSIZE, &secsize) == -1 || rdsize % secsize != 0 || - (fsrbuf = Malloc(rdsize)) == NULL || + (fsrbuf = Balloc(rdsize)) == NULL || blread(devfd, fsrbuf, (SBLOCK_UFS2 - rdsize) / dev_bsize, rdsize) != 0) { free(fsrbuf); return (1); } /* * Recovery material has already been created, so do not * need to create it again. */ fsr = (struct fsrecovery *)&fsrbuf[rdsize - sizeof *fsr]; if (fsr->fsr_magic == FS_UFS2_MAGIC) { free(fsrbuf); return (1); } /* * Recovery material has not been created and can be if desired. */ free(fsrbuf); return (0); } /* * Read the last filesystem-size piece of the boot block, replace the * last 20 bytes with the recovery information, then write it back. * The recovery information only works for UFS2 filesystems. */ static void saverecovery(int readfd, int writefd) { struct fsrecovery *fsr; char *fsrbuf; u_int secsize, rdsize; fsrbuf = NULL; rdsize = sblock.fs_fsize; if (sblock.fs_magic != FS_UFS2_MAGIC || ioctl(readfd, DIOCGSECTORSIZE, &secsize) == -1 || rdsize % secsize != 0 || - (fsrbuf = Malloc(rdsize)) == NULL || + (fsrbuf = Balloc(rdsize)) == NULL || blread(readfd, fsrbuf, (SBLOCK_UFS2 - rdsize) / dev_bsize, rdsize) != 0) { printf("RECOVERY DATA COULD NOT BE CREATED\n"); free(fsrbuf); return; } fsr = (struct fsrecovery *)&fsrbuf[rdsize - sizeof *fsr]; fsr->fsr_magic = sblock.fs_magic; fsr->fsr_fpg = sblock.fs_fpg; fsr->fsr_fsbtodb = sblock.fs_fsbtodb; fsr->fsr_sblkno = sblock.fs_sblkno; fsr->fsr_ncg = sblock.fs_ncg; blwrite(writefd, fsrbuf, (SBLOCK_UFS2 - rdsize) / dev_bsize, rdsize); free(fsrbuf); } diff --git a/sbin/fsck_ffs/suj.c b/sbin/fsck_ffs/suj.c index c66b605bd69d..e1fd54229d69 100644 --- a/sbin/fsck_ffs/suj.c +++ b/sbin/fsck_ffs/suj.c @@ -1,2554 +1,2553 @@ /*- * SPDX-License-Identifier: BSD-2-Clause * * Copyright 2009, 2010 Jeffrey W. Roberson * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include -#include #include #include #include #include #include "fsck.h" #define DOTDOT_OFFSET DIRECTSIZ(1) struct suj_seg { TAILQ_ENTRY(suj_seg) ss_next; struct jsegrec ss_rec; uint8_t *ss_blk; }; struct suj_rec { TAILQ_ENTRY(suj_rec) sr_next; union jrec *sr_rec; }; TAILQ_HEAD(srechd, suj_rec); struct suj_ino { LIST_ENTRY(suj_ino) si_next; struct srechd si_recs; struct srechd si_newrecs; struct srechd si_movs; struct jtrncrec *si_trunc; ino_t si_ino; char si_skipparent; char si_hasrecs; char si_blkadj; char si_linkadj; int si_mode; nlink_t si_nlinkadj; nlink_t si_nlink; nlink_t si_dotlinks; }; LIST_HEAD(inohd, suj_ino); struct suj_blk { LIST_ENTRY(suj_blk) sb_next; struct srechd sb_recs; ufs2_daddr_t sb_blk; }; LIST_HEAD(blkhd, suj_blk); struct suj_cg { LIST_ENTRY(suj_cg) sc_next; struct blkhd sc_blkhash[HASHSIZE]; struct inohd sc_inohash[HASHSIZE]; struct ino_blk *sc_lastiblk; struct suj_ino *sc_lastino; struct suj_blk *sc_lastblk; struct bufarea *sc_cgbp; struct cg *sc_cgp; int sc_cgx; }; static LIST_HEAD(cghd, suj_cg) cghash[HASHSIZE]; static struct suj_cg *lastcg; static TAILQ_HEAD(seghd, suj_seg) allsegs; static uint64_t oldseq; static struct fs *fs = NULL; static ino_t sujino; static char *joptype[JOP_NUMJOPTYPES] = JOP_NAMES; /* * Summary statistics. */ static uint64_t freefrags; static uint64_t freeblocks; static uint64_t freeinos; static uint64_t freedir; static uint64_t jbytes; static uint64_t jrecs; static jmp_buf jmpbuf; typedef void (*ino_visitor)(ino_t, ufs_lbn_t, ufs2_daddr_t, int); static void err_suj(const char *, ...) __dead2; static void ino_trunc(ino_t, off_t); static void ino_decr(ino_t); static void ino_adjust(struct suj_ino *); static void ino_build(struct suj_ino *); static int blk_isfree(ufs2_daddr_t); static void initsuj(void); static void * errmalloc(size_t n) { void *a; a = Malloc(n); if (a == NULL) err(EX_OSERR, "malloc(%zu)", n); return (a); } /* * When hit a fatal error in journalling check, print out * the error and then offer to fallback to normal fsck. */ static void err_suj(const char * restrict fmt, ...) { va_list ap; if (preen) (void)fprintf(stdout, "%s: ", cdevname); va_start(ap, fmt); (void)vfprintf(stdout, fmt, ap); va_end(ap); longjmp(jmpbuf, -1); } /* * Lookup a cg by number in the hash so we can keep track of which cgs * need stats rebuilt. */ static struct suj_cg * cg_lookup(int cgx) { struct cghd *hd; struct suj_cg *sc; struct bufarea *cgbp; if (cgx < 0 || cgx >= fs->fs_ncg) err_suj("Bad cg number %d\n", cgx); if (lastcg && lastcg->sc_cgx == cgx) return (lastcg); cgbp = cglookup(cgx); if (!check_cgmagic(cgx, cgbp)) err_suj("UNABLE TO REBUILD CYLINDER GROUP %d", cgx); hd = &cghash[HASH(cgx)]; LIST_FOREACH(sc, hd, sc_next) if (sc->sc_cgx == cgx) { sc->sc_cgbp = cgbp; sc->sc_cgp = sc->sc_cgbp->b_un.b_cg; lastcg = sc; return (sc); } sc = errmalloc(sizeof(*sc)); bzero(sc, sizeof(*sc)); sc->sc_cgbp = cgbp; sc->sc_cgp = sc->sc_cgbp->b_un.b_cg; sc->sc_cgx = cgx; LIST_INSERT_HEAD(hd, sc, sc_next); return (sc); } /* * Lookup an inode number in the hash and allocate a suj_ino if it does * not exist. */ static struct suj_ino * ino_lookup(ino_t ino, int creat) { struct suj_ino *sino; struct inohd *hd; struct suj_cg *sc; sc = cg_lookup(ino_to_cg(fs, ino)); if (sc->sc_lastino && sc->sc_lastino->si_ino == ino) return (sc->sc_lastino); hd = &sc->sc_inohash[HASH(ino)]; LIST_FOREACH(sino, hd, si_next) if (sino->si_ino == ino) return (sino); if (creat == 0) return (NULL); sino = errmalloc(sizeof(*sino)); bzero(sino, sizeof(*sino)); sino->si_ino = ino; TAILQ_INIT(&sino->si_recs); TAILQ_INIT(&sino->si_newrecs); TAILQ_INIT(&sino->si_movs); LIST_INSERT_HEAD(hd, sino, si_next); return (sino); } /* * Lookup a block number in the hash and allocate a suj_blk if it does * not exist. */ static struct suj_blk * blk_lookup(ufs2_daddr_t blk, int creat) { struct suj_blk *sblk; struct suj_cg *sc; struct blkhd *hd; sc = cg_lookup(dtog(fs, blk)); if (sc->sc_lastblk && sc->sc_lastblk->sb_blk == blk) return (sc->sc_lastblk); hd = &sc->sc_blkhash[HASH(fragstoblks(fs, blk))]; LIST_FOREACH(sblk, hd, sb_next) if (sblk->sb_blk == blk) return (sblk); if (creat == 0) return (NULL); sblk = errmalloc(sizeof(*sblk)); bzero(sblk, sizeof(*sblk)); sblk->sb_blk = blk; TAILQ_INIT(&sblk->sb_recs); LIST_INSERT_HEAD(hd, sblk, sb_next); return (sblk); } static int blk_overlaps(struct jblkrec *brec, ufs2_daddr_t start, int frags) { ufs2_daddr_t bstart; ufs2_daddr_t bend; ufs2_daddr_t end; end = start + frags; bstart = brec->jb_blkno + brec->jb_oldfrags; bend = bstart + brec->jb_frags; if (start < bend && end > bstart) return (1); return (0); } static int blk_equals(struct jblkrec *brec, ino_t ino, ufs_lbn_t lbn, ufs2_daddr_t start, int frags) { if (brec->jb_ino != ino || brec->jb_lbn != lbn) return (0); if (brec->jb_blkno + brec->jb_oldfrags != start) return (0); if (brec->jb_frags < frags) return (0); return (1); } static void blk_setmask(struct jblkrec *brec, int *mask) { int i; for (i = brec->jb_oldfrags; i < brec->jb_oldfrags + brec->jb_frags; i++) *mask |= 1 << i; } /* * Determine whether a given block has been reallocated to a new location. * Returns a mask of overlapping bits if any frags have been reused or * zero if the block has not been re-used and the contents can be trusted. * * This is used to ensure that an orphaned pointer due to truncate is safe * to be freed. The mask value can be used to free partial blocks. */ static int blk_freemask(ufs2_daddr_t blk, ino_t ino, ufs_lbn_t lbn, int frags) { struct suj_blk *sblk; struct suj_rec *srec; struct jblkrec *brec; int mask; int off; /* * To be certain we're not freeing a reallocated block we lookup * this block in the blk hash and see if there is an allocation * journal record that overlaps with any fragments in the block * we're concerned with. If any fragments have been reallocated * the block has already been freed and re-used for another purpose. */ mask = 0; sblk = blk_lookup(blknum(fs, blk), 0); if (sblk == NULL) return (0); off = blk - sblk->sb_blk; TAILQ_FOREACH(srec, &sblk->sb_recs, sr_next) { brec = (struct jblkrec *)srec->sr_rec; /* * If the block overlaps but does not match * exactly this record refers to the current * location. */ if (blk_overlaps(brec, blk, frags) == 0) continue; if (blk_equals(brec, ino, lbn, blk, frags) == 1) mask = 0; else blk_setmask(brec, &mask); } if (debug) printf("blk_freemask: blk %jd sblk %jd off %d mask 0x%X\n", blk, sblk->sb_blk, off, mask); return (mask >> off); } /* * Determine whether it is safe to follow an indirect. It is not safe * if any part of the indirect has been reallocated or the last journal * entry was an allocation. Just allocated indirects may not have valid * pointers yet and all of their children will have their own records. * It is also not safe to follow an indirect if the cg bitmap has been * cleared as a new allocation may write to the block prior to the journal * being written. * * Returns 1 if it's safe to follow the indirect and 0 otherwise. */ static int blk_isindir(ufs2_daddr_t blk, ino_t ino, ufs_lbn_t lbn) { struct suj_blk *sblk; struct jblkrec *brec; sblk = blk_lookup(blk, 0); if (sblk == NULL) return (1); if (TAILQ_EMPTY(&sblk->sb_recs)) return (1); brec = (struct jblkrec *)TAILQ_LAST(&sblk->sb_recs, srechd)->sr_rec; if (blk_equals(brec, ino, lbn, blk, fs->fs_frag)) if (brec->jb_op == JOP_FREEBLK) return (!blk_isfree(blk)); return (0); } /* * Check to see if the requested block is available. * We can just check in the cylinder-group maps as * they will only have usable blocks in them. */ ufs2_daddr_t suj_checkblkavail(ufs2_daddr_t blkno, long frags) { struct bufarea *cgbp; struct cg *cgp; ufs2_daddr_t j, k, baseblk; long cg; if ((u_int64_t)blkno > sblock.fs_size) return (0); cg = dtog(&sblock, blkno); cgbp = cglookup(cg); cgp = cgbp->b_un.b_cg; if (!check_cgmagic(cg, cgbp)) return (-((cg + 1) * sblock.fs_fpg - sblock.fs_frag)); baseblk = dtogd(&sblock, blkno); for (j = 0; j <= sblock.fs_frag - frags; j++) { if (!isset(cg_blksfree(cgp), baseblk + j)) continue; for (k = 1; k < frags; k++) if (!isset(cg_blksfree(cgp), baseblk + j + k)) break; if (k < frags) { j += k; continue; } for (k = 0; k < frags; k++) clrbit(cg_blksfree(cgp), baseblk + j + k); n_blks += frags; if (frags == sblock.fs_frag) cgp->cg_cs.cs_nbfree--; else cgp->cg_cs.cs_nffree -= frags; cgdirty(cgbp); return ((cg * sblock.fs_fpg) + baseblk + j); } return (0); } /* * Clear an inode from the cg bitmap. If the inode was already clear return * 0 so the caller knows it does not have to check the inode contents. */ static int ino_free(ino_t ino, int mode) { struct suj_cg *sc; uint8_t *inosused; struct cg *cgp; int cg; cg = ino_to_cg(fs, ino); ino = ino % fs->fs_ipg; sc = cg_lookup(cg); cgp = sc->sc_cgp; inosused = cg_inosused(cgp); /* * The bitmap may never have made it to the disk so we have to * conditionally clear. We can avoid writing the cg in this case. */ if (isclr(inosused, ino)) return (0); freeinos++; clrbit(inosused, ino); if (ino < cgp->cg_irotor) cgp->cg_irotor = ino; cgp->cg_cs.cs_nifree++; if ((mode & IFMT) == IFDIR) { freedir++; cgp->cg_cs.cs_ndir--; } cgdirty(sc->sc_cgbp); return (1); } /* * Free 'frags' frags starting at filesystem block 'bno' skipping any frags * set in the mask. */ static void blk_free(ino_t ino, ufs2_daddr_t bno, int mask, int frags) { ufs1_daddr_t fragno, cgbno; struct suj_cg *sc; struct cg *cgp; int i, cg; uint8_t *blksfree; if (debug) printf("Freeing %d frags at blk %jd mask 0x%x\n", frags, bno, mask); /* * Check to see if the block needs to be claimed by a snapshot. * If wanted, the snapshot references it. Otherwise we free it. */ if (snapblkfree(fs, bno, lfragtosize(fs, frags), ino, suj_checkblkavail)) return; cg = dtog(fs, bno); sc = cg_lookup(cg); cgp = sc->sc_cgp; cgbno = dtogd(fs, bno); blksfree = cg_blksfree(cgp); /* * If it's not allocated we only wrote the journal entry * and never the bitmaps. Here we unconditionally clear and * resolve the cg summary later. */ if (frags == fs->fs_frag && mask == 0) { fragno = fragstoblks(fs, cgbno); ffs_setblock(fs, blksfree, fragno); freeblocks++; } else { /* * deallocate the fragment */ for (i = 0; i < frags; i++) if ((mask & (1 << i)) == 0 && isclr(blksfree, cgbno +i)) { freefrags++; setbit(blksfree, cgbno + i); } } cgdirty(sc->sc_cgbp); } /* * Returns 1 if the whole block starting at 'bno' is marked free and 0 * otherwise. */ static int blk_isfree(ufs2_daddr_t bno) { struct suj_cg *sc; sc = cg_lookup(dtog(fs, bno)); return ffs_isblock(fs, cg_blksfree(sc->sc_cgp), dtogd(fs, bno)); } /* * Determine whether a block exists at a particular lbn in an inode. * Returns 1 if found, 0 if not. lbn may be negative for indirects * or ext blocks. */ static int blk_isat(ino_t ino, ufs_lbn_t lbn, ufs2_daddr_t blk, int *frags) { struct inode ip; union dinode *dp; ufs2_daddr_t nblk; ginode(ino, &ip); dp = ip.i_dp; if (DIP(dp, di_nlink) == 0 || DIP(dp, di_mode) == 0) { irelse(&ip); return (0); } nblk = ino_blkatoff(dp, ino, lbn, frags, NULL); irelse(&ip); return (nblk == blk); } /* * Clear the directory entry at diroff that should point to child. Minimal * checking is done and it is assumed that this path was verified with isat. */ static void ino_clrat(ino_t parent, off_t diroff, ino_t child) { union dinode *dip; struct direct *dp; struct inode ip; ufs2_daddr_t blk; struct bufarea *bp; ufs_lbn_t lbn; int blksize; int frags; int doff; if (debug) printf("Clearing inode %ju from parent %ju at offset %jd\n", (uintmax_t)child, (uintmax_t)parent, diroff); lbn = lblkno(fs, diroff); doff = blkoff(fs, diroff); ginode(parent, &ip); dip = ip.i_dp; blk = ino_blkatoff(dip, parent, lbn, &frags, NULL); blksize = sblksize(fs, DIP(dip, di_size), lbn); irelse(&ip); bp = getdatablk(blk, blksize, BT_DIRDATA); if (bp->b_errs != 0) err_suj("ino_clrat: UNRECOVERABLE I/O ERROR"); dp = (struct direct *)&bp->b_un.b_buf[doff]; if (dp->d_ino != child) errx(1, "Inode %ju does not exist in %ju at %jd", (uintmax_t)child, (uintmax_t)parent, diroff); dp->d_ino = 0; dirty(bp); brelse(bp); /* * The actual .. reference count will already have been removed * from the parent by the .. remref record. */ } /* * Determines whether a pointer to an inode exists within a directory * at a specified offset. Returns the mode of the found entry. */ static int ino_isat(ino_t parent, off_t diroff, ino_t child, int *mode, int *isdot) { struct inode ip; union dinode *dip; struct bufarea *bp; struct direct *dp; ufs2_daddr_t blk; ufs_lbn_t lbn; int blksize; int frags; int dpoff; int doff; *isdot = 0; ginode(parent, &ip); dip = ip.i_dp; *mode = DIP(dip, di_mode); if ((*mode & IFMT) != IFDIR) { if (debug) { /* * This can happen if the parent inode * was reallocated. */ if (*mode != 0) printf("Directory %ju has bad mode %o\n", (uintmax_t)parent, *mode); else printf("Directory %ju has zero mode\n", (uintmax_t)parent); } irelse(&ip); return (0); } lbn = lblkno(fs, diroff); doff = blkoff(fs, diroff); blksize = sblksize(fs, DIP(dip, di_size), lbn); if (diroff + DIRECTSIZ(1) > DIP(dip, di_size) || doff >= blksize) { if (debug) printf("ino %ju absent from %ju due to offset %jd" " exceeding size %jd\n", (uintmax_t)child, (uintmax_t)parent, diroff, DIP(dip, di_size)); irelse(&ip); return (0); } blk = ino_blkatoff(dip, parent, lbn, &frags, NULL); irelse(&ip); if (blk <= 0) { if (debug) printf("Sparse directory %ju", (uintmax_t)parent); return (0); } bp = getdatablk(blk, blksize, BT_DIRDATA); if (bp->b_errs != 0) err_suj("ino_isat: UNRECOVERABLE I/O ERROR"); /* * Walk through the records from the start of the block to be * certain we hit a valid record and not some junk in the middle * of a file name. Stop when we reach or pass the expected offset. */ dpoff = rounddown(doff, DIRBLKSIZ); do { dp = (struct direct *)&bp->b_un.b_buf[dpoff]; if (dpoff == doff) break; if (dp->d_reclen == 0) break; dpoff += dp->d_reclen; } while (dpoff <= doff); if (dpoff > fs->fs_bsize) err_suj("Corrupt directory block in dir ino %ju\n", (uintmax_t)parent); /* Not found. */ if (dpoff != doff) { if (debug) printf("ino %ju not found in %ju, lbn %jd, dpoff %d\n", (uintmax_t)child, (uintmax_t)parent, lbn, dpoff); brelse(bp); return (0); } /* * We found the item in question. Record the mode and whether it's * a . or .. link for the caller. */ if (dp->d_ino == child) { if (child == parent) *isdot = 1; else if (dp->d_namlen == 2 && dp->d_name[0] == '.' && dp->d_name[1] == '.') *isdot = 1; *mode = DTTOIF(dp->d_type); brelse(bp); return (1); } if (debug) printf("ino %ju doesn't match dirent ino %ju in parent %ju\n", (uintmax_t)child, (uintmax_t)dp->d_ino, (uintmax_t)parent); brelse(bp); return (0); } #define VISIT_INDIR 0x0001 #define VISIT_EXT 0x0002 #define VISIT_ROOT 0x0004 /* Operation came via root & valid pointers. */ /* * Read an indirect level which may or may not be linked into an inode. */ static void indir_visit(ino_t ino, ufs_lbn_t lbn, ufs2_daddr_t blk, uint64_t *frags, ino_visitor visitor, int flags) { struct bufarea *bp; ufs_lbn_t lbnadd; ufs2_daddr_t nblk; ufs_lbn_t nlbn; int level; int i; /* * Don't visit indirect blocks with contents we can't trust. This * should only happen when indir_visit() is called to complete a * truncate that never finished and not when a pointer is found via * an inode. */ if (blk == 0) return; level = lbn_level(lbn); if (level == -1) err_suj("Invalid level for lbn %jd\n", lbn); if ((flags & VISIT_ROOT) == 0 && blk_isindir(blk, ino, lbn) == 0) { if (debug) printf("blk %jd ino %ju lbn %jd(%d) is not indir.\n", blk, (uintmax_t)ino, lbn, level); goto out; } lbnadd = 1; for (i = level; i > 0; i--) lbnadd *= NINDIR(fs); bp = getdatablk(blk, fs->fs_bsize, BT_LEVEL1 + level); if (bp->b_errs != 0) err_suj("indir_visit: UNRECOVERABLE I/O ERROR\n"); for (i = 0; i < NINDIR(fs); i++) { if ((nblk = IBLK(bp, i)) == 0) continue; if (level == 0) { nlbn = -lbn + i * lbnadd; (*frags) += fs->fs_frag; visitor(ino, nlbn, nblk, fs->fs_frag); } else { nlbn = (lbn + 1) - (i * lbnadd); indir_visit(ino, nlbn, nblk, frags, visitor, flags); } } brelse(bp); out: if (flags & VISIT_INDIR) { (*frags) += fs->fs_frag; visitor(ino, lbn, blk, fs->fs_frag); } } /* * Visit each block in an inode as specified by 'flags' and call a * callback function. The callback may inspect or free blocks. The * count of frags found according to the size in the file is returned. * This is not valid for sparse files but may be used to determine * the correct di_blocks for a file. */ static uint64_t ino_visit(union dinode *dp, ino_t ino, ino_visitor visitor, int flags) { ufs_lbn_t nextlbn; ufs_lbn_t tmpval; ufs_lbn_t lbn; uint64_t size; uint64_t fragcnt; int mode; int frags; int i; size = DIP(dp, di_size); mode = DIP(dp, di_mode) & IFMT; fragcnt = 0; if ((flags & VISIT_EXT) && fs->fs_magic == FS_UFS2_MAGIC && dp->dp2.di_extsize) { for (i = 0; i < UFS_NXADDR; i++) { if (dp->dp2.di_extb[i] == 0) continue; frags = sblksize(fs, dp->dp2.di_extsize, i); frags = numfrags(fs, frags); fragcnt += frags; visitor(ino, -1 - i, dp->dp2.di_extb[i], frags); } } /* Skip datablocks for short links and devices. */ if (mode == IFBLK || mode == IFCHR || (mode == IFLNK && size < fs->fs_maxsymlinklen)) return (fragcnt); for (i = 0; i < UFS_NDADDR; i++) { if (DIP(dp, di_db[i]) == 0) continue; frags = sblksize(fs, size, i); frags = numfrags(fs, frags); fragcnt += frags; visitor(ino, i, DIP(dp, di_db[i]), frags); } /* * We know the following indirects are real as we're following * real pointers to them. */ flags |= VISIT_ROOT; for (i = 0, tmpval = NINDIR(fs), lbn = UFS_NDADDR; i < UFS_NIADDR; i++, lbn = nextlbn) { nextlbn = lbn + tmpval; tmpval *= NINDIR(fs); if (DIP(dp, di_ib[i]) == 0) continue; indir_visit(ino, -lbn - i, DIP(dp, di_ib[i]), &fragcnt, visitor, flags); } return (fragcnt); } /* * Null visitor function used when we just want to count blocks and * record the lbn. */ ufs_lbn_t visitlbn; static void null_visit(ino_t ino, ufs_lbn_t lbn, ufs2_daddr_t blk, int frags) { if (lbn > 0) visitlbn = lbn; } /* * Recalculate di_blocks when we discover that a block allocation or * free was not successfully completed. The kernel does not roll this back * because it would be too expensive to compute which indirects were * reachable at the time the inode was written. */ static void ino_adjblks(struct suj_ino *sino) { struct inode ip; union dinode *dp; uint64_t blocks; uint64_t frags; off_t isize; off_t size; ino_t ino; ino = sino->si_ino; ginode(ino, &ip); dp = ip.i_dp; /* No need to adjust zero'd inodes. */ if (DIP(dp, di_mode) == 0) { irelse(&ip); return; } /* * Visit all blocks and count them as well as recording the last * valid lbn in the file. If the file size doesn't agree with the * last lbn we need to truncate to fix it. Otherwise just adjust * the blocks count. */ visitlbn = 0; frags = ino_visit(dp, ino, null_visit, VISIT_INDIR | VISIT_EXT); blocks = fsbtodb(fs, frags); /* * We assume the size and direct block list is kept coherent by * softdep. For files that have extended into indirects we truncate * to the size in the inode or the maximum size permitted by * populated indirects. */ if (visitlbn >= UFS_NDADDR) { isize = DIP(dp, di_size); size = lblktosize(fs, visitlbn + 1); if (isize > size) isize = size; /* Always truncate to free any unpopulated indirects. */ ino_trunc(ino, isize); irelse(&ip); return; } if (blocks == DIP(dp, di_blocks)) { irelse(&ip); return; } if (debug) printf("ino %ju adjusting block count from %jd to %jd\n", (uintmax_t)ino, DIP(dp, di_blocks), blocks); DIP_SET(dp, di_blocks, blocks); inodirty(&ip); irelse(&ip); } static void blk_free_visit(ino_t ino, ufs_lbn_t lbn, ufs2_daddr_t blk, int frags) { blk_free(ino, blk, blk_freemask(blk, ino, lbn, frags), frags); } /* * Free a block or tree of blocks that was previously rooted in ino at * the given lbn. If the lbn is an indirect all children are freed * recursively. */ static void blk_free_lbn(ufs2_daddr_t blk, ino_t ino, ufs_lbn_t lbn, int frags, int follow) { uint64_t resid; int mask; mask = blk_freemask(blk, ino, lbn, frags); resid = 0; if (lbn <= -UFS_NDADDR && follow && mask == 0) indir_visit(ino, lbn, blk, &resid, blk_free_visit, VISIT_INDIR); else blk_free(ino, blk, mask, frags); } static void ino_setskip(struct suj_ino *sino, ino_t parent) { int isdot; int mode; if (ino_isat(sino->si_ino, DOTDOT_OFFSET, parent, &mode, &isdot)) sino->si_skipparent = 1; } static void ino_remref(ino_t parent, ino_t child, uint64_t diroff, int isdotdot) { struct suj_ino *sino; struct suj_rec *srec; struct jrefrec *rrec; /* * Lookup this inode to see if we have a record for it. */ sino = ino_lookup(child, 0); /* * Tell any child directories we've already removed their * parent link cnt. Don't try to adjust our link down again. */ if (sino != NULL && isdotdot == 0) ino_setskip(sino, parent); /* * No valid record for this inode. Just drop the on-disk * link by one. */ if (sino == NULL || sino->si_hasrecs == 0) { ino_decr(child); return; } /* * Use ino_adjust() if ino_check() has already processed this * child. If we lose the last non-dot reference to a * directory it will be discarded. */ if (sino->si_linkadj) { if (sino->si_nlink == 0) err_suj("ino_remref: ino %ld mode 0%o about to go " "negative\n", sino->si_ino, sino->si_mode); sino->si_nlink--; if (isdotdot) sino->si_dotlinks--; ino_adjust(sino); return; } /* * If we haven't yet processed this inode we need to make * sure we will successfully discover the lost path. If not * use nlinkadj to remember. */ TAILQ_FOREACH(srec, &sino->si_recs, sr_next) { rrec = (struct jrefrec *)srec->sr_rec; if (rrec->jr_parent == parent && rrec->jr_diroff == diroff) return; } sino->si_nlinkadj++; } /* * Free the children of a directory when the directory is discarded. */ static void ino_free_children(ino_t ino, ufs_lbn_t lbn, ufs2_daddr_t blk, int frags) { struct suj_ino *sino; struct bufarea *bp; struct direct *dp; off_t diroff; int skipparent; int isdotdot; int dpoff; int size; sino = ino_lookup(ino, 0); if (sino) skipparent = sino->si_skipparent; else skipparent = 0; size = lfragtosize(fs, frags); bp = getdatablk(blk, size, BT_DIRDATA); if (bp->b_errs != 0) err_suj("ino_free_children: UNRECOVERABLE I/O ERROR"); dp = (struct direct *)&bp->b_un.b_buf[0]; for (dpoff = 0; dpoff < size && dp->d_reclen; dpoff += dp->d_reclen) { dp = (struct direct *)&bp->b_un.b_buf[dpoff]; if (dp->d_ino == 0 || dp->d_ino == UFS_WINO) continue; if (dp->d_namlen == 1 && dp->d_name[0] == '.') continue; isdotdot = dp->d_namlen == 2 && dp->d_name[0] == '.' && dp->d_name[1] == '.'; if (isdotdot && skipparent == 1) continue; if (debug) printf("Directory %ju removing ino %ju name %s\n", (uintmax_t)ino, (uintmax_t)dp->d_ino, dp->d_name); diroff = lblktosize(fs, lbn) + dpoff; ino_remref(ino, dp->d_ino, diroff, isdotdot); } brelse(bp); } /* * Reclaim an inode, freeing all blocks and decrementing all children's * link counts. Free the inode back to the cg. */ static void ino_reclaim(struct inode *ip, ino_t ino, int mode) { union dinode *dp; uint32_t gen; dp = ip->i_dp; if (ino == UFS_ROOTINO) err_suj("Attempting to free UFS_ROOTINO\n"); if (debug) printf("Truncating and freeing ino %ju, nlink %d, mode %o\n", (uintmax_t)ino, DIP(dp, di_nlink), DIP(dp, di_mode)); /* We are freeing an inode or directory. */ if ((DIP(dp, di_mode) & IFMT) == IFDIR) ino_visit(dp, ino, ino_free_children, 0); DIP_SET(dp, di_nlink, 0); if ((DIP(dp, di_flags) & SF_SNAPSHOT) != 0) snapremove(ino); ino_visit(dp, ino, blk_free_visit, VISIT_EXT | VISIT_INDIR); /* Here we have to clear the inode and release any blocks it holds. */ gen = DIP(dp, di_gen); if (fs->fs_magic == FS_UFS1_MAGIC) bzero(dp, sizeof(struct ufs1_dinode)); else bzero(dp, sizeof(struct ufs2_dinode)); DIP_SET(dp, di_gen, gen); inodirty(ip); ino_free(ino, mode); return; } /* * Adjust an inode's link count down by one when a directory goes away. */ static void ino_decr(ino_t ino) { struct inode ip; union dinode *dp; int reqlink; int nlink; int mode; ginode(ino, &ip); dp = ip.i_dp; nlink = DIP(dp, di_nlink); mode = DIP(dp, di_mode); if (nlink < 1) err_suj("Inode %d link count %d invalid\n", ino, nlink); if (mode == 0) err_suj("Inode %d has a link of %d with 0 mode\n", ino, nlink); nlink--; if ((mode & IFMT) == IFDIR) reqlink = 2; else reqlink = 1; if (nlink < reqlink) { if (debug) printf("ino %ju not enough links to live %d < %d\n", (uintmax_t)ino, nlink, reqlink); ino_reclaim(&ip, ino, mode); irelse(&ip); return; } DIP_SET(dp, di_nlink, nlink); inodirty(&ip); irelse(&ip); } /* * Adjust the inode link count to 'nlink'. If the count reaches zero * free it. */ static void ino_adjust(struct suj_ino *sino) { struct jrefrec *rrec; struct suj_rec *srec; struct suj_ino *stmp; union dinode *dp; struct inode ip; nlink_t nlink; nlink_t reqlink; int recmode; int isdot; int mode; ino_t ino; nlink = sino->si_nlink; ino = sino->si_ino; mode = sino->si_mode & IFMT; /* * If it's a directory with no dot links, it was truncated before * the name was cleared. We need to clear the dirent that * points at it. */ if (mode == IFDIR && nlink == 1 && sino->si_dotlinks == 0) { sino->si_nlink = nlink = 0; TAILQ_FOREACH(srec, &sino->si_recs, sr_next) { rrec = (struct jrefrec *)srec->sr_rec; if (ino_isat(rrec->jr_parent, rrec->jr_diroff, ino, &recmode, &isdot) == 0) continue; ino_clrat(rrec->jr_parent, rrec->jr_diroff, ino); break; } if (srec == NULL) errx(1, "Directory %ju name not found", (uintmax_t)ino); } /* * If it's a directory with no real names pointing to it go ahead * and truncate it. This will free any children. */ if (mode == IFDIR && nlink - sino->si_dotlinks == 0) { sino->si_nlink = nlink = 0; /* * Mark any .. links so they know not to free this inode * when they are removed. */ TAILQ_FOREACH(srec, &sino->si_recs, sr_next) { rrec = (struct jrefrec *)srec->sr_rec; if (rrec->jr_diroff == DOTDOT_OFFSET) { stmp = ino_lookup(rrec->jr_parent, 0); if (stmp) ino_setskip(stmp, ino); } } } ginode(ino, &ip); dp = ip.i_dp; mode = DIP(dp, di_mode) & IFMT; if (nlink > UFS_LINK_MAX) err_suj("ino %ju nlink manipulation error, new %ju, old %d\n", (uintmax_t)ino, (uintmax_t)nlink, DIP(dp, di_nlink)); if (debug) printf("Adjusting ino %ju, nlink %ju, old link %d lastmode %o\n", (uintmax_t)ino, (uintmax_t)nlink, DIP(dp, di_nlink), sino->si_mode); if (mode == 0) { if (debug) printf("ino %ju, zero inode freeing bitmap\n", (uintmax_t)ino); ino_free(ino, sino->si_mode); irelse(&ip); return; } /* XXX Should be an assert? */ if (mode != sino->si_mode && debug) printf("ino %ju, mode %o != %o\n", (uintmax_t)ino, mode, sino->si_mode); if ((mode & IFMT) == IFDIR) reqlink = 2; else reqlink = 1; /* If the inode doesn't have enough links to live, free it. */ if (nlink < reqlink) { if (debug) printf("ino %ju not enough links to live %ju < %ju\n", (uintmax_t)ino, (uintmax_t)nlink, (uintmax_t)reqlink); ino_reclaim(&ip, ino, mode); irelse(&ip); return; } /* If required write the updated link count. */ if (DIP(dp, di_nlink) == nlink) { if (debug) printf("ino %ju, link matches, skipping.\n", (uintmax_t)ino); irelse(&ip); return; } DIP_SET(dp, di_nlink, nlink); inodirty(&ip); irelse(&ip); } /* * Truncate some or all blocks in an indirect, freeing any that are required * and zeroing the indirect. */ static void indir_trunc(ino_t ino, ufs_lbn_t lbn, ufs2_daddr_t blk, ufs_lbn_t lastlbn, union dinode *dp) { struct bufarea *bp; ufs_lbn_t lbnadd; ufs2_daddr_t nblk; ufs_lbn_t next; ufs_lbn_t nlbn; int isdirty; int level; int i; if (blk == 0) return; isdirty = 0; level = lbn_level(lbn); if (level == -1) err_suj("Invalid level for lbn %jd\n", lbn); lbnadd = 1; for (i = level; i > 0; i--) lbnadd *= NINDIR(fs); bp = getdatablk(blk, fs->fs_bsize, BT_LEVEL1 + level); if (bp->b_errs != 0) err_suj("indir_trunc: UNRECOVERABLE I/O ERROR"); for (i = 0; i < NINDIR(fs); i++) { if ((nblk = IBLK(bp, i)) == 0) continue; if (level != 0) { nlbn = (lbn + 1) - (i * lbnadd); /* * Calculate the lbn of the next indirect to * determine if any of this indirect must be * reclaimed. */ next = -(lbn + level) + ((i+1) * lbnadd); if (next <= lastlbn) continue; indir_trunc(ino, nlbn, nblk, lastlbn, dp); /* If all of this indirect was reclaimed, free it. */ nlbn = next - lbnadd; if (nlbn < lastlbn) continue; } else { nlbn = -lbn + i * lbnadd; if (nlbn < lastlbn) continue; } isdirty = 1; blk_free(ino, nblk, 0, fs->fs_frag); IBLK_SET(bp, i, 0); } if (isdirty) dirty(bp); brelse(bp); } /* * Truncate an inode to the minimum of the given size or the last populated * block after any over size have been discarded. The kernel would allocate * the last block in the file but fsck does not and neither do we. This * code never extends files, only shrinks them. */ static void ino_trunc(ino_t ino, off_t size) { struct inode ip; union dinode *dp; struct bufarea *bp; ufs2_daddr_t bn; uint64_t totalfrags; ufs_lbn_t nextlbn; ufs_lbn_t lastlbn; ufs_lbn_t tmpval; ufs_lbn_t lbn; ufs_lbn_t i; int blksize, frags; off_t cursize; off_t off; int mode; ginode(ino, &ip); dp = ip.i_dp; mode = DIP(dp, di_mode) & IFMT; cursize = DIP(dp, di_size); /* If no size change, nothing to do */ if (size == cursize) { irelse(&ip); return; } if (debug) printf("Truncating ino %ju, mode %o to size %jd from " "size %jd\n", (uintmax_t)ino, mode, size, cursize); /* Skip datablocks for short links and devices. */ if (mode == 0 || mode == IFBLK || mode == IFCHR || (mode == IFLNK && cursize < fs->fs_maxsymlinklen)) { irelse(&ip); return; } /* Don't extend. */ if (size > cursize) { irelse(&ip); return; } if ((DIP(dp, di_flags) & SF_SNAPSHOT) != 0) { if (size > 0) err_suj("Partial truncation of ino %ju snapshot file\n", (uintmax_t)ino); snapremove(ino); } lastlbn = lblkno(fs, blkroundup(fs, size)); for (i = lastlbn; i < UFS_NDADDR; i++) { if ((bn = DIP(dp, di_db[i])) == 0) continue; blksize = sblksize(fs, cursize, i); blk_free(ino, bn, 0, numfrags(fs, blksize)); DIP_SET(dp, di_db[i], 0); } /* * Follow indirect blocks, freeing anything required. */ for (i = 0, tmpval = NINDIR(fs), lbn = UFS_NDADDR; i < UFS_NIADDR; i++, lbn = nextlbn) { nextlbn = lbn + tmpval; tmpval *= NINDIR(fs); /* If we're not freeing any in this indirect range skip it. */ if (lastlbn >= nextlbn) continue; if ((bn = DIP(dp, di_ib[i])) == 0) continue; indir_trunc(ino, -lbn - i, bn, lastlbn, dp); /* If we freed everything in this indirect free the indir. */ if (lastlbn > lbn) continue; blk_free(ino, bn, 0, fs->fs_frag); DIP_SET(dp, di_ib[i], 0); } /* * Now that we've freed any whole blocks that exceed the desired * truncation size, figure out how many blocks remain and what the * last populated lbn is. We will set the size to this last lbn * rather than worrying about allocating the final lbn as the kernel * would've done. This is consistent with normal fsck behavior. */ visitlbn = 0; totalfrags = ino_visit(dp, ino, null_visit, VISIT_INDIR | VISIT_EXT); if (size > lblktosize(fs, visitlbn + 1)) size = lblktosize(fs, visitlbn + 1); /* * If we're truncating direct blocks we have to adjust frags * accordingly. */ if (visitlbn < UFS_NDADDR && totalfrags) { long oldspace, newspace; bn = DIP(dp, di_db[visitlbn]); if (bn == 0) err_suj("Bad blk at ino %ju lbn %jd\n", (uintmax_t)ino, visitlbn); oldspace = sblksize(fs, cursize, visitlbn); newspace = sblksize(fs, size, visitlbn); if (oldspace != newspace) { bn += numfrags(fs, newspace); frags = numfrags(fs, oldspace - newspace); blk_free(ino, bn, 0, frags); totalfrags -= frags; } } DIP_SET(dp, di_blocks, fsbtodb(fs, totalfrags)); DIP_SET(dp, di_size, size); inodirty(&ip); /* * If we've truncated into the middle of a block or frag we have * to zero it here. Otherwise the file could extend into * uninitialized space later. */ off = blkoff(fs, size); if (off && DIP(dp, di_mode) != IFDIR) { long clrsize; bn = ino_blkatoff(dp, ino, visitlbn, &frags, NULL); if (bn == 0) err_suj("Block missing from ino %ju at lbn %jd\n", (uintmax_t)ino, visitlbn); clrsize = frags * fs->fs_fsize; bp = getdatablk(bn, clrsize, BT_DATA); if (bp->b_errs != 0) err_suj("ino_trunc: UNRECOVERABLE I/O ERROR"); clrsize -= off; bzero(&bp->b_un.b_buf[off], clrsize); dirty(bp); brelse(bp); } irelse(&ip); return; } /* * Process records available for one inode and determine whether the * link count is correct or needs adjusting. */ static void ino_check(struct suj_ino *sino) { struct suj_rec *srec; struct jrefrec *rrec; nlink_t dotlinks; nlink_t newlinks; nlink_t removes; nlink_t nlink; ino_t ino; int isdot; int isat; int mode; if (sino->si_hasrecs == 0) return; ino = sino->si_ino; rrec = (struct jrefrec *)TAILQ_FIRST(&sino->si_recs)->sr_rec; nlink = rrec->jr_nlink; newlinks = 0; dotlinks = 0; removes = sino->si_nlinkadj; TAILQ_FOREACH(srec, &sino->si_recs, sr_next) { rrec = (struct jrefrec *)srec->sr_rec; isat = ino_isat(rrec->jr_parent, rrec->jr_diroff, rrec->jr_ino, &mode, &isdot); if (isat && (mode & IFMT) != (rrec->jr_mode & IFMT)) err_suj("Inode mode/directory type mismatch %o != %o\n", mode, rrec->jr_mode); if (debug) printf("jrefrec: op %s ino %ju, nlink %ju, parent %ju, " "diroff %jd, mode %o, isat %d, isdot %d\n", JOP_OPTYPE(rrec->jr_op), (uintmax_t)rrec->jr_ino, (uintmax_t)rrec->jr_nlink, (uintmax_t)rrec->jr_parent, (uintmax_t)rrec->jr_diroff, rrec->jr_mode, isat, isdot); mode = rrec->jr_mode & IFMT; if (rrec->jr_op == JOP_REMREF) removes++; newlinks += isat; if (isdot) dotlinks += isat; } /* * The number of links that remain are the starting link count * subtracted by the total number of removes with the total * links discovered back in. An incomplete remove thus * makes no change to the link count but an add increases * by one. */ if (debug) printf( "ino %ju nlink %ju newlinks %ju removes %ju dotlinks %ju\n", (uintmax_t)ino, (uintmax_t)nlink, (uintmax_t)newlinks, (uintmax_t)removes, (uintmax_t)dotlinks); nlink += newlinks; nlink -= removes; sino->si_linkadj = 1; sino->si_nlink = nlink; sino->si_dotlinks = dotlinks; sino->si_mode = mode; ino_adjust(sino); } /* * Process records available for one block and determine whether it is * still allocated and whether the owning inode needs to be updated or * a free completed. */ static void blk_check(struct suj_blk *sblk) { struct suj_rec *srec; struct jblkrec *brec; struct suj_ino *sino; ufs2_daddr_t blk; int mask; int frags; int isat; /* * Each suj_blk actually contains records for any fragments in that * block. As a result we must evaluate each record individually. */ sino = NULL; TAILQ_FOREACH(srec, &sblk->sb_recs, sr_next) { brec = (struct jblkrec *)srec->sr_rec; frags = brec->jb_frags; blk = brec->jb_blkno + brec->jb_oldfrags; isat = blk_isat(brec->jb_ino, brec->jb_lbn, blk, &frags); if (sino == NULL || sino->si_ino != brec->jb_ino) { sino = ino_lookup(brec->jb_ino, 1); sino->si_blkadj = 1; } if (debug) printf("op %s blk %jd ino %ju lbn %jd frags %d isat %d " "(%d)\n", JOP_OPTYPE(brec->jb_op), blk, (uintmax_t)brec->jb_ino, brec->jb_lbn, brec->jb_frags, isat, frags); /* * If we found the block at this address we still have to * determine if we need to free the tail end that was * added by adding contiguous fragments from the same block. */ if (isat == 1) { if (frags == brec->jb_frags) continue; mask = blk_freemask(blk, brec->jb_ino, brec->jb_lbn, brec->jb_frags); mask >>= frags; blk += frags; frags = brec->jb_frags - frags; blk_free(brec->jb_ino, blk, mask, frags); continue; } /* * The block wasn't found, attempt to free it. It won't be * freed if it was actually reallocated. If this was an * allocation we don't want to follow indirects as they * may not be written yet. Any children of the indirect will * have their own records. If it's a free we need to * recursively free children. */ blk_free_lbn(blk, brec->jb_ino, brec->jb_lbn, brec->jb_frags, brec->jb_op == JOP_FREEBLK); } } /* * Walk the list of inode records for this cg and resolve moved and duplicate * inode references now that we have a complete picture. */ static void cg_build(struct suj_cg *sc) { struct suj_ino *sino; int i; for (i = 0; i < HASHSIZE; i++) LIST_FOREACH(sino, &sc->sc_inohash[i], si_next) ino_build(sino); } /* * Handle inodes requiring truncation. This must be done prior to * looking up any inodes in directories. */ static void cg_trunc(struct suj_cg *sc) { struct suj_ino *sino; int i; for (i = 0; i < HASHSIZE; i++) { LIST_FOREACH(sino, &sc->sc_inohash[i], si_next) { if (sino->si_trunc) { ino_trunc(sino->si_ino, sino->si_trunc->jt_size); sino->si_blkadj = 0; sino->si_trunc = NULL; } if (sino->si_blkadj) ino_adjblks(sino); } } } static void cg_adj_blk(struct suj_cg *sc) { struct suj_ino *sino; int i; for (i = 0; i < HASHSIZE; i++) { LIST_FOREACH(sino, &sc->sc_inohash[i], si_next) { if (sino->si_blkadj) ino_adjblks(sino); } } } /* * Free any partially allocated blocks and then resolve inode block * counts. */ static void cg_check_blk(struct suj_cg *sc) { struct suj_blk *sblk; int i; for (i = 0; i < HASHSIZE; i++) LIST_FOREACH(sblk, &sc->sc_blkhash[i], sb_next) blk_check(sblk); } /* * Walk the list of inode records for this cg, recovering any * changes which were not complete at the time of crash. */ static void cg_check_ino(struct suj_cg *sc) { struct suj_ino *sino; int i; for (i = 0; i < HASHSIZE; i++) LIST_FOREACH(sino, &sc->sc_inohash[i], si_next) ino_check(sino); } static void cg_apply(void (*apply)(struct suj_cg *)) { struct suj_cg *scg; int i; for (i = 0; i < HASHSIZE; i++) LIST_FOREACH(scg, &cghash[i], sc_next) apply(scg); } /* * Process the unlinked but referenced file list. Freeing all inodes. */ static void ino_unlinked(void) { struct inode ip; union dinode *dp; uint16_t mode; ino_t inon; ino_t ino; ino = fs->fs_sujfree; fs->fs_sujfree = 0; while (ino != 0) { ginode(ino, &ip); dp = ip.i_dp; mode = DIP(dp, di_mode) & IFMT; inon = DIP(dp, di_freelink); DIP_SET(dp, di_freelink, 0); inodirty(&ip); /* * XXX Should this be an errx? */ if (DIP(dp, di_nlink) == 0) { if (debug) printf("Freeing unlinked ino %ju mode %o\n", (uintmax_t)ino, mode); ino_reclaim(&ip, ino, mode); } else if (debug) printf("Skipping ino %ju mode %o with link %d\n", (uintmax_t)ino, mode, DIP(dp, di_nlink)); ino = inon; irelse(&ip); } } /* * Append a new record to the list of records requiring processing. */ static void ino_append(union jrec *rec) { struct jrefrec *refrec; struct jmvrec *mvrec; struct suj_ino *sino; struct suj_rec *srec; mvrec = &rec->rec_jmvrec; refrec = &rec->rec_jrefrec; if (debug && mvrec->jm_op == JOP_MVREF) printf("ino move: ino %ju, parent %ju, " "diroff %jd, oldoff %jd\n", (uintmax_t)mvrec->jm_ino, (uintmax_t)mvrec->jm_parent, (uintmax_t)mvrec->jm_newoff, (uintmax_t)mvrec->jm_oldoff); else if (debug && (refrec->jr_op == JOP_ADDREF || refrec->jr_op == JOP_REMREF)) printf("ino ref: op %s, ino %ju, nlink %ju, " "parent %ju, diroff %jd\n", JOP_OPTYPE(refrec->jr_op), (uintmax_t)refrec->jr_ino, (uintmax_t)refrec->jr_nlink, (uintmax_t)refrec->jr_parent, (uintmax_t)refrec->jr_diroff); sino = ino_lookup(((struct jrefrec *)rec)->jr_ino, 1); sino->si_hasrecs = 1; srec = errmalloc(sizeof(*srec)); srec->sr_rec = rec; TAILQ_INSERT_TAIL(&sino->si_newrecs, srec, sr_next); } /* * Add a reference adjustment to the sino list and eliminate dups. The * primary loop in ino_build_ref() checks for dups but new ones may be * created as a result of offset adjustments. */ static void ino_add_ref(struct suj_ino *sino, struct suj_rec *srec) { struct jrefrec *refrec; struct suj_rec *srn; struct jrefrec *rrn; refrec = (struct jrefrec *)srec->sr_rec; /* * We walk backwards so that the oldest link count is preserved. If * an add record conflicts with a remove keep the remove. Redundant * removes are eliminated in ino_build_ref. Otherwise we keep the * oldest record at a given location. */ for (srn = TAILQ_LAST(&sino->si_recs, srechd); srn; srn = TAILQ_PREV(srn, srechd, sr_next)) { rrn = (struct jrefrec *)srn->sr_rec; if (rrn->jr_parent != refrec->jr_parent || rrn->jr_diroff != refrec->jr_diroff) continue; if (rrn->jr_op == JOP_REMREF || refrec->jr_op == JOP_ADDREF) { rrn->jr_mode = refrec->jr_mode; return; } /* * Adding a remove. * * Replace the record in place with the old nlink in case * we replace the head of the list. Abandon srec as a dup. */ refrec->jr_nlink = rrn->jr_nlink; srn->sr_rec = srec->sr_rec; return; } TAILQ_INSERT_TAIL(&sino->si_recs, srec, sr_next); } /* * Create a duplicate of a reference at a previous location. */ static void ino_dup_ref(struct suj_ino *sino, struct jrefrec *refrec, off_t diroff) { struct jrefrec *rrn; struct suj_rec *srn; rrn = errmalloc(sizeof(*refrec)); *rrn = *refrec; rrn->jr_op = JOP_ADDREF; rrn->jr_diroff = diroff; srn = errmalloc(sizeof(*srn)); srn->sr_rec = (union jrec *)rrn; ino_add_ref(sino, srn); } /* * Add a reference to the list at all known locations. We follow the offset * changes for a single instance and create duplicate add refs at each so * that we can tolerate any version of the directory block. Eliminate * removes which collide with adds that are seen in the journal. They should * not adjust the link count down. */ static void ino_build_ref(struct suj_ino *sino, struct suj_rec *srec) { struct jrefrec *refrec; struct jmvrec *mvrec; struct suj_rec *srp; struct suj_rec *srn; struct jrefrec *rrn; off_t diroff; refrec = (struct jrefrec *)srec->sr_rec; /* * Search for a mvrec that matches this offset. Whether it's an add * or a remove we can delete the mvref after creating a dup record in * the old location. */ if (!TAILQ_EMPTY(&sino->si_movs)) { diroff = refrec->jr_diroff; for (srn = TAILQ_LAST(&sino->si_movs, srechd); srn; srn = srp) { srp = TAILQ_PREV(srn, srechd, sr_next); mvrec = (struct jmvrec *)srn->sr_rec; if (mvrec->jm_parent != refrec->jr_parent || mvrec->jm_newoff != diroff) continue; diroff = mvrec->jm_oldoff; TAILQ_REMOVE(&sino->si_movs, srn, sr_next); free(srn); ino_dup_ref(sino, refrec, diroff); } } /* * If a remove wasn't eliminated by an earlier add just append it to * the list. */ if (refrec->jr_op == JOP_REMREF) { ino_add_ref(sino, srec); return; } /* * Walk the list of records waiting to be added to the list. We * must check for moves that apply to our current offset and remove * them from the list. Remove any duplicates to eliminate removes * with corresponding adds. */ TAILQ_FOREACH_SAFE(srn, &sino->si_newrecs, sr_next, srp) { switch (srn->sr_rec->rec_jrefrec.jr_op) { case JOP_ADDREF: /* * This should actually be an error we should * have a remove for every add journaled. */ rrn = (struct jrefrec *)srn->sr_rec; if (rrn->jr_parent != refrec->jr_parent || rrn->jr_diroff != refrec->jr_diroff) break; TAILQ_REMOVE(&sino->si_newrecs, srn, sr_next); break; case JOP_REMREF: /* * Once we remove the current iteration of the * record at this address we're done. */ rrn = (struct jrefrec *)srn->sr_rec; if (rrn->jr_parent != refrec->jr_parent || rrn->jr_diroff != refrec->jr_diroff) break; TAILQ_REMOVE(&sino->si_newrecs, srn, sr_next); ino_add_ref(sino, srec); return; case JOP_MVREF: /* * Update our diroff based on any moves that match * and remove the move. */ mvrec = (struct jmvrec *)srn->sr_rec; if (mvrec->jm_parent != refrec->jr_parent || mvrec->jm_oldoff != refrec->jr_diroff) break; ino_dup_ref(sino, refrec, mvrec->jm_oldoff); refrec->jr_diroff = mvrec->jm_newoff; TAILQ_REMOVE(&sino->si_newrecs, srn, sr_next); break; default: err_suj("ino_build_ref: Unknown op %s\n", JOP_OPTYPE(srn->sr_rec->rec_jrefrec.jr_op)); } } ino_add_ref(sino, srec); } /* * Walk the list of new records and add them in-order resolving any * dups and adjusted offsets. */ static void ino_build(struct suj_ino *sino) { struct suj_rec *srec; while ((srec = TAILQ_FIRST(&sino->si_newrecs)) != NULL) { TAILQ_REMOVE(&sino->si_newrecs, srec, sr_next); switch (srec->sr_rec->rec_jrefrec.jr_op) { case JOP_ADDREF: case JOP_REMREF: ino_build_ref(sino, srec); break; case JOP_MVREF: /* * Add this mvrec to the queue of pending mvs. */ TAILQ_INSERT_TAIL(&sino->si_movs, srec, sr_next); break; default: err_suj("ino_build: Unknown op %s\n", JOP_OPTYPE(srec->sr_rec->rec_jrefrec.jr_op)); } } if (TAILQ_EMPTY(&sino->si_recs)) sino->si_hasrecs = 0; } /* * Modify journal records so they refer to the base block number * and a start and end frag range. This is to facilitate the discovery * of overlapping fragment allocations. */ static void blk_build(struct jblkrec *blkrec) { struct suj_rec *srec; struct suj_blk *sblk; struct jblkrec *blkrn; ufs2_daddr_t blk; int frag; if (debug) printf("blk_build: op %s blkno %jd frags %d oldfrags %d " "ino %ju lbn %jd\n", JOP_OPTYPE(blkrec->jb_op), (uintmax_t)blkrec->jb_blkno, blkrec->jb_frags, blkrec->jb_oldfrags, (uintmax_t)blkrec->jb_ino, (uintmax_t)blkrec->jb_lbn); blk = blknum(fs, blkrec->jb_blkno); frag = fragnum(fs, blkrec->jb_blkno); if (blkrec->jb_blkno < 0 || blk + fs->fs_frag - frag > fs->fs_size) err_suj("Out-of-bounds journal block number %jd\n", blkrec->jb_blkno); sblk = blk_lookup(blk, 1); /* * Rewrite the record using oldfrags to indicate the offset into * the block. Leave jb_frags as the actual allocated count. */ blkrec->jb_blkno -= frag; blkrec->jb_oldfrags = frag; if (blkrec->jb_oldfrags + blkrec->jb_frags > fs->fs_frag) err_suj("Invalid fragment count %d oldfrags %d\n", blkrec->jb_frags, frag); /* * Detect dups. If we detect a dup we always discard the oldest * record as it is superseded by the new record. This speeds up * later stages but also eliminates free records which are used * to indicate that the contents of indirects can be trusted. */ TAILQ_FOREACH(srec, &sblk->sb_recs, sr_next) { blkrn = (struct jblkrec *)srec->sr_rec; if (blkrn->jb_ino != blkrec->jb_ino || blkrn->jb_lbn != blkrec->jb_lbn || blkrn->jb_blkno != blkrec->jb_blkno || blkrn->jb_frags != blkrec->jb_frags || blkrn->jb_oldfrags != blkrec->jb_oldfrags) continue; if (debug) printf("Removed dup.\n"); /* Discard the free which is a dup with an alloc. */ if (blkrec->jb_op == JOP_FREEBLK) return; TAILQ_REMOVE(&sblk->sb_recs, srec, sr_next); free(srec); break; } srec = errmalloc(sizeof(*srec)); srec->sr_rec = (union jrec *)blkrec; TAILQ_INSERT_TAIL(&sblk->sb_recs, srec, sr_next); } static void ino_build_trunc(struct jtrncrec *rec) { struct suj_ino *sino; if (debug) printf("ino_build_trunc: op %d ino %ju, size %jd\n", rec->jt_op, (uintmax_t)rec->jt_ino, (uintmax_t)rec->jt_size); if (chkfilesize(IFREG, rec->jt_size) == 0) err_suj("ino_build: truncation size too large %ju\n", (intmax_t)rec->jt_size); sino = ino_lookup(rec->jt_ino, 1); if (rec->jt_op == JOP_SYNC) { sino->si_trunc = NULL; return; } if (sino->si_trunc == NULL || sino->si_trunc->jt_size > rec->jt_size) sino->si_trunc = rec; } /* * Build up tables of the operations we need to recover. */ static void suj_build(void) { struct suj_seg *seg; union jrec *rec; int off; int i; TAILQ_FOREACH(seg, &allsegs, ss_next) { if (debug) printf("seg %jd has %d records, oldseq %jd.\n", seg->ss_rec.jsr_seq, seg->ss_rec.jsr_cnt, seg->ss_rec.jsr_oldest); off = 0; rec = (union jrec *)seg->ss_blk; for (i = 0; i < seg->ss_rec.jsr_cnt; off += JREC_SIZE, rec++) { /* skip the segrec. */ if ((off % real_dev_bsize) == 0) continue; switch (rec->rec_jrefrec.jr_op) { case JOP_ADDREF: case JOP_REMREF: case JOP_MVREF: ino_append(rec); break; case JOP_NEWBLK: case JOP_FREEBLK: blk_build((struct jblkrec *)rec); break; case JOP_TRUNC: case JOP_SYNC: ino_build_trunc((struct jtrncrec *)rec); break; default: err_suj("Unknown journal operation %s at %d\n", JOP_OPTYPE(rec->rec_jrefrec.jr_op), off); } i++; } } } /* * Prune the journal segments to those we care about based on the * oldest sequence in the newest segment. Order the segment list * based on sequence number. */ static void suj_prune(void) { struct suj_seg *seg; struct suj_seg *segn; uint64_t newseq; int discard; if (debug) printf("Pruning up to %jd\n", oldseq); /* First free the expired segments. */ TAILQ_FOREACH_SAFE(seg, &allsegs, ss_next, segn) { if (seg->ss_rec.jsr_seq >= oldseq) continue; TAILQ_REMOVE(&allsegs, seg, ss_next); free(seg->ss_blk); free(seg); } /* Next ensure that segments are ordered properly. */ seg = TAILQ_FIRST(&allsegs); if (seg == NULL) { if (debug) printf("Empty journal\n"); return; } newseq = seg->ss_rec.jsr_seq; for (;;) { seg = TAILQ_LAST(&allsegs, seghd); if (seg->ss_rec.jsr_seq >= newseq) break; TAILQ_REMOVE(&allsegs, seg, ss_next); TAILQ_INSERT_HEAD(&allsegs, seg, ss_next); newseq = seg->ss_rec.jsr_seq; } if (newseq != oldseq) { TAILQ_FOREACH(seg, &allsegs, ss_next) { printf("%jd, ", seg->ss_rec.jsr_seq); } printf("\n"); err_suj("Journal file sequence mismatch %jd != %jd\n", newseq, oldseq); } /* * The kernel may asynchronously write segments which can create * gaps in the sequence space. Throw away any segments after the * gap as the kernel guarantees only those that are contiguously * reachable are marked as completed. */ discard = 0; TAILQ_FOREACH_SAFE(seg, &allsegs, ss_next, segn) { if (!discard && newseq++ == seg->ss_rec.jsr_seq) { jrecs += seg->ss_rec.jsr_cnt; jbytes += seg->ss_rec.jsr_blocks * real_dev_bsize; continue; } discard = 1; if (debug) printf("Journal order mismatch %jd != %jd pruning\n", newseq-1, seg->ss_rec.jsr_seq); TAILQ_REMOVE(&allsegs, seg, ss_next); free(seg->ss_blk); free(seg); } if (debug) printf("Processing journal segments from %jd to %jd\n", oldseq, newseq-1); } /* * Verify the journal inode before attempting to read records. */ static int suj_verifyino(union dinode *dp) { if (DIP(dp, di_nlink) != 1) { printf("Invalid link count %d for journal inode %ju\n", DIP(dp, di_nlink), (uintmax_t)sujino); return (-1); } if ((DIP(dp, di_flags) & (SF_IMMUTABLE | SF_NOUNLINK)) != (SF_IMMUTABLE | SF_NOUNLINK)) { printf("Invalid flags 0x%X for journal inode %ju\n", DIP(dp, di_flags), (uintmax_t)sujino); return (-1); } if (DIP(dp, di_mode) != (IFREG | IREAD)) { printf("Invalid mode %o for journal inode %ju\n", DIP(dp, di_mode), (uintmax_t)sujino); return (-1); } if (DIP(dp, di_size) < SUJ_MIN) { printf("Invalid size %jd for journal inode %ju\n", DIP(dp, di_size), (uintmax_t)sujino); return (-1); } if (DIP(dp, di_modrev) != fs->fs_mtime) { if (!bkgrdcheck || debug) printf("Journal timestamp does not match " "fs mount time\n"); return (-1); } return (0); } struct jblocks { struct jextent *jb_extent; /* Extent array. */ int jb_avail; /* Available extents. */ int jb_used; /* Last used extent. */ int jb_head; /* Allocator head. */ int jb_off; /* Allocator extent offset. */ }; struct jextent { ufs2_daddr_t je_daddr; /* Disk block address. */ int je_blocks; /* Disk block count. */ }; static struct jblocks *suj_jblocks; static struct jblocks * jblocks_create(void) { struct jblocks *jblocks; int size; jblocks = errmalloc(sizeof(*jblocks)); jblocks->jb_avail = 10; jblocks->jb_used = 0; jblocks->jb_head = 0; jblocks->jb_off = 0; size = sizeof(struct jextent) * jblocks->jb_avail; jblocks->jb_extent = errmalloc(size); bzero(jblocks->jb_extent, size); return (jblocks); } /* * Return the next available disk block and the amount of contiguous * free space it contains. */ static ufs2_daddr_t jblocks_next(struct jblocks *jblocks, int bytes, int *actual) { struct jextent *jext; ufs2_daddr_t daddr; int freecnt; int blocks; blocks = btodb(bytes); jext = &jblocks->jb_extent[jblocks->jb_head]; freecnt = jext->je_blocks - jblocks->jb_off; if (freecnt == 0) { jblocks->jb_off = 0; if (++jblocks->jb_head > jblocks->jb_used) return (0); jext = &jblocks->jb_extent[jblocks->jb_head]; freecnt = jext->je_blocks; } if (freecnt > blocks) freecnt = blocks; *actual = dbtob(freecnt); daddr = jext->je_daddr + jblocks->jb_off; return (daddr); } /* * Advance the allocation head by a specified number of bytes, consuming * one journal segment. */ static void jblocks_advance(struct jblocks *jblocks, int bytes) { jblocks->jb_off += btodb(bytes); } static void jblocks_destroy(struct jblocks *jblocks) { free(jblocks->jb_extent); free(jblocks); } static void jblocks_add(struct jblocks *jblocks, ufs2_daddr_t daddr, int blocks) { struct jextent *jext; int size; jext = &jblocks->jb_extent[jblocks->jb_used]; /* Adding the first block. */ if (jext->je_daddr == 0) { jext->je_daddr = daddr; jext->je_blocks = blocks; return; } /* Extending the last extent. */ if (jext->je_daddr + jext->je_blocks == daddr) { jext->je_blocks += blocks; return; } /* Adding a new extent. */ if (++jblocks->jb_used == jblocks->jb_avail) { jblocks->jb_avail *= 2; size = sizeof(struct jextent) * jblocks->jb_avail; jext = errmalloc(size); bzero(jext, size); bcopy(jblocks->jb_extent, jext, sizeof(struct jextent) * jblocks->jb_used); free(jblocks->jb_extent); jblocks->jb_extent = jext; } jext = &jblocks->jb_extent[jblocks->jb_used]; jext->je_daddr = daddr; jext->je_blocks = blocks; return; } /* * Add a file block from the journal to the extent map. We can't read * each file block individually because the kernel treats it as a circular * buffer and segments may span multiple contiguous blocks. */ static void suj_add_block(ino_t ino, ufs_lbn_t lbn, ufs2_daddr_t blk, int frags) { jblocks_add(suj_jblocks, fsbtodb(fs, blk), fsbtodb(fs, frags)); } static void suj_read(void) { - uint8_t block[1 * 1024 * 1024]; + uint8_t block[1 * 1024 * 1024] __aligned(LIBUFS_BUFALIGN); struct suj_seg *seg; struct jsegrec *recn; struct jsegrec *rec; ufs2_daddr_t blk; int readsize; int blocks; int recsize; int size; int i; /* * Read records until we exhaust the journal space. If we find * an invalid record we start searching for a valid segment header * at the next block. This is because we don't have a head/tail * pointer and must recover the information indirectly. At the gap * between the head and tail we won't necessarily have a valid * segment. */ restart: for (;;) { size = sizeof(block); blk = jblocks_next(suj_jblocks, size, &readsize); if (blk == 0) return; size = readsize; /* * Read 1MB at a time and scan for records within this block. */ if (pread(fsreadfd, &block, size, dbtob(blk)) != size) { err_suj("Error reading journal block %jd\n", (intmax_t)blk); } for (rec = (void *)block; size; size -= recsize, rec = (struct jsegrec *)((uintptr_t)rec + recsize)) { recsize = real_dev_bsize; if (rec->jsr_time != fs->fs_mtime) { #ifdef notdef if (debug) printf("Rec time %jd != fs mtime %jd\n", rec->jsr_time, fs->fs_mtime); #endif jblocks_advance(suj_jblocks, recsize); continue; } if (rec->jsr_cnt == 0) { if (debug) printf("Found illegal count %d\n", rec->jsr_cnt); jblocks_advance(suj_jblocks, recsize); continue; } blocks = rec->jsr_blocks; recsize = blocks * real_dev_bsize; if (recsize > size) { /* * We may just have run out of buffer, restart * the loop to re-read from this spot. */ if (size < fs->fs_bsize && size != readsize && recsize <= fs->fs_bsize) goto restart; if (debug) printf("Found invalid segsize " "%d > %d\n", recsize, size); recsize = real_dev_bsize; jblocks_advance(suj_jblocks, recsize); continue; } /* * Verify that all blocks in the segment are present. */ for (i = 1; i < blocks; i++) { recn = (void *)((uintptr_t)rec) + i * real_dev_bsize; if (recn->jsr_seq == rec->jsr_seq && recn->jsr_time == rec->jsr_time) continue; if (debug) printf("Incomplete record %jd (%d)\n", rec->jsr_seq, i); recsize = i * real_dev_bsize; jblocks_advance(suj_jblocks, recsize); goto restart; } seg = errmalloc(sizeof(*seg)); seg->ss_blk = errmalloc(recsize); seg->ss_rec = *rec; bcopy((void *)rec, seg->ss_blk, recsize); if (rec->jsr_oldest > oldseq) oldseq = rec->jsr_oldest; TAILQ_INSERT_TAIL(&allsegs, seg, ss_next); jblocks_advance(suj_jblocks, recsize); } } } /* * Orchestrate the verification of a filesystem via the softupdates journal. */ int suj_check(const char *filesys) { struct inodesc idesc; struct csum *cgsum; union dinode *dp, *jip; struct inode ip; uint64_t blocks; int i, retval; struct suj_seg *seg; struct suj_seg *segn; initsuj(); fs = &sblock; if (real_dev_bsize == 0 && ioctl(fsreadfd, DIOCGSECTORSIZE, &real_dev_bsize) == -1) real_dev_bsize = secsize; if (debug) printf("dev_bsize %u\n", real_dev_bsize); /* * Set an exit point when SUJ check failed */ retval = setjmp(jmpbuf); if (retval != 0) { pwarn("UNEXPECTED SU+J INCONSISTENCY\n"); TAILQ_FOREACH_SAFE(seg, &allsegs, ss_next, segn) { TAILQ_REMOVE(&allsegs, seg, ss_next); free(seg->ss_blk); free(seg); } if (reply("FALLBACK TO FULL FSCK") == 0) { ckfini(0); exit(EEXIT); } else return (-1); } /* * Search the root directory for the SUJ_FILE. */ idesc.id_type = DATA; idesc.id_fix = IGNORE; idesc.id_number = UFS_ROOTINO; idesc.id_func = findino; idesc.id_name = SUJ_FILE; ginode(UFS_ROOTINO, &ip); dp = ip.i_dp; if ((DIP(dp, di_mode) & IFMT) != IFDIR) { irelse(&ip); err_suj("root inode is not a directory\n"); } if (DIP(dp, di_size) < 0 || DIP(dp, di_size) > MAXDIRSIZE) { irelse(&ip); err_suj("negative or oversized root directory %jd\n", (uintmax_t)DIP(dp, di_size)); } if ((ckinode(dp, &idesc) & FOUND) == FOUND) { sujino = idesc.id_parent; irelse(&ip); } else { if (!bkgrdcheck || debug) printf("Journal inode removed. " "Use tunefs to re-create.\n"); sblock.fs_flags &= ~FS_SUJ; sblock.fs_sujfree = 0; irelse(&ip); return (-1); } /* * Fetch the journal inode and verify it. */ ginode(sujino, &ip); jip = ip.i_dp; if (!bkgrdcheck || debug) printf("** SU+J Recovering %s\n", filesys); if (suj_verifyino(jip) != 0 || (!preen && !reply("USE JOURNAL"))) { irelse(&ip); return (-1); } /* * Build a list of journal blocks in jblocks before parsing the * available journal blocks in with suj_read(). */ if (!bkgrdcheck || debug) printf("** Reading %jd byte journal from inode %ju.\n", DIP(jip, di_size), (uintmax_t)sujino); suj_jblocks = jblocks_create(); blocks = ino_visit(jip, sujino, suj_add_block, 0); if (blocks != numfrags(fs, DIP(jip, di_size))) { if (!bkgrdcheck || debug) printf("Sparse journal inode %ju.\n", (uintmax_t)sujino); irelse(&ip); return (-1); } /* If journal is valid then do journal check rather than background */ if (bkgrdcheck) { irelse(&ip); return (0); } irelse(&ip); suj_read(); jblocks_destroy(suj_jblocks); suj_jblocks = NULL; if (preen || reply("RECOVER")) { printf("** Building recovery table.\n"); suj_prune(); suj_build(); cg_apply(cg_build); printf("** Resolving unreferenced inode list.\n"); ino_unlinked(); printf("** Processing journal entries.\n"); cg_apply(cg_trunc); cg_apply(cg_check_blk); cg_apply(cg_adj_blk); cg_apply(cg_check_ino); } if (preen == 0 && (jrecs > 0 || jbytes > 0) && reply("WRITE CHANGES") == 0) return (0); /* * Check block counts of snapshot inodes and * make copies of any needed snapshot blocks. */ for (i = 0; i < snapcnt; i++) check_blkcnt(&snaplist[i]); snapflush(suj_checkblkavail); /* * Recompute the fs summary info from correct cs summaries. */ bzero(&fs->fs_cstotal, sizeof(struct csum_total)); for (i = 0; i < fs->fs_ncg; i++) { cgsum = &fs->fs_cs(fs, i); fs->fs_cstotal.cs_nffree += cgsum->cs_nffree; fs->fs_cstotal.cs_nbfree += cgsum->cs_nbfree; fs->fs_cstotal.cs_nifree += cgsum->cs_nifree; fs->fs_cstotal.cs_ndir += cgsum->cs_ndir; } fs->fs_pendinginodes = 0; fs->fs_pendingblocks = 0; fs->fs_clean = 1; fs->fs_time = time(NULL); fs->fs_mtime = time(NULL); sbdirty(); ckfini(1); if (jrecs > 0 || jbytes > 0) { printf("** %jd journal records in %jd bytes for %.2f%% " "utilization\n", jrecs, jbytes, ((float)jrecs / (float)(jbytes / JREC_SIZE)) * 100); printf("** Freed %jd inodes (%jd dirs) %jd blocks, and %jd " "frags.\n", freeinos, freedir, freeblocks, freefrags); } return (0); } static void initsuj(void) { int i; for (i = 0; i < HASHSIZE; i++) LIST_INIT(&cghash[i]); lastcg = NULL; TAILQ_INIT(&allsegs); oldseq = 0; fs = NULL; sujino = 0; freefrags = 0; freeblocks = 0; freeinos = 0; freedir = 0; jbytes = 0; jrecs = 0; suj_jblocks = NULL; }