diff --git a/sbin/fsck_ffs/fsutil.c b/sbin/fsck_ffs/fsutil.c index d58527231240..2b915d28aa26 100644 --- a/sbin/fsck_ffs/fsutil.c +++ b/sbin/fsck_ffs/fsutil.c @@ -1,1470 +1,1472 @@ /*- * SPDX-License-Identifier: BSD-3-Clause * * Copyright (c) 1980, 1986, 1993 * The Regents of the University of California. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #if 0 #ifndef lint static const char sccsid[] = "@(#)utilities.c 8.6 (Berkeley) 5/19/95"; #endif /* not lint */ #endif #include __FBSDID("$FreeBSD$"); #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include "fsck.h" int sujrecovery = 0; static struct bufarea *allocbuf(const char *); static void cg_write(struct bufarea *); static void slowio_start(void); static void slowio_end(void); static void printIOstats(void); static long diskreads, totaldiskreads, totalreads; /* Disk cache statistics */ static struct timespec startpass, finishpass; struct timeval slowio_starttime; int slowio_delay_usec = 10000; /* Initial IO delay for background fsck */ int slowio_pollcnt; static struct bufarea cgblk; /* backup buffer for cylinder group blocks */ static struct bufarea failedbuf; /* returned by failed getdatablk() */ static TAILQ_HEAD(bufqueue, bufarea) bufqueuehd; /* head of buffer cache LRU */ static LIST_HEAD(bufhash, bufarea) bufhashhd[HASHSIZE]; /* buffer hash list */ static struct bufhash freebufs; /* unused buffers */ static int numbufs; /* size of buffer cache */ static int cachelookups; /* number of cache lookups */ static int cachereads; /* number of cache reads */ static int flushtries; /* number of tries to reclaim memory */ char *buftype[BT_NUMBUFTYPES] = BT_NAMES; void fsutilinit(void) { diskreads = totaldiskreads = totalreads = 0; bzero(&startpass, sizeof(struct timespec)); bzero(&finishpass, sizeof(struct timespec)); bzero(&slowio_starttime, sizeof(struct timeval)); slowio_delay_usec = 10000; slowio_pollcnt = 0; flushtries = 0; } int ftypeok(union dinode *dp) { switch (DIP(dp, di_mode) & IFMT) { case IFDIR: case IFREG: case IFBLK: case IFCHR: case IFLNK: case IFSOCK: case IFIFO: return (1); default: if (debug) printf("bad file type 0%o\n", DIP(dp, di_mode)); return (0); } } int reply(const char *question) { int persevere; char c; if (preen) pfatal("INTERNAL ERROR: GOT TO reply()"); persevere = strcmp(question, "CONTINUE") == 0 || strcmp(question, "LOOK FOR ALTERNATE SUPERBLOCKS") == 0; printf("\n"); if (!persevere && (nflag || (fswritefd < 0 && bkgrdflag == 0))) { printf("%s? no\n\n", question); resolved = 0; return (0); } if (yflag || (persevere && nflag)) { printf("%s? yes\n\n", question); return (1); } do { printf("%s? [yn] ", question); (void) fflush(stdout); c = getc(stdin); while (c != '\n' && getc(stdin) != '\n') { if (feof(stdin)) { resolved = 0; return (0); } } } while (c != 'y' && c != 'Y' && c != 'n' && c != 'N'); printf("\n"); if (c == 'y' || c == 'Y') return (1); resolved = 0; return (0); } /* * Look up state information for an inode. */ struct inostat * inoinfo(ino_t inum) { static struct inostat unallocated = { USTATE, 0, 0, 0 }; struct inostatlist *ilp; int iloff; if (inum > maxino) errx(EEXIT, "inoinfo: inumber %ju out of range", (uintmax_t)inum); ilp = &inostathead[inum / sblock.fs_ipg]; iloff = inum % sblock.fs_ipg; if (iloff >= ilp->il_numalloced) return (&unallocated); return (&ilp->il_stat[iloff]); } /* * Malloc buffers and set up cache. */ void bufinit(void) { int i; initbarea(&failedbuf, BT_UNKNOWN); failedbuf.b_errs = -1; failedbuf.b_un.b_buf = NULL; if ((cgblk.b_un.b_buf = Malloc((unsigned int)sblock.fs_bsize)) == NULL) errx(EEXIT, "Initial malloc(%d) failed", sblock.fs_bsize); initbarea(&cgblk, BT_CYLGRP); numbufs = cachelookups = cachereads = 0; TAILQ_INIT(&bufqueuehd); LIST_INIT(&freebufs); for (i = 0; i < HASHSIZE; i++) LIST_INIT(&bufhashhd[i]); for (i = 0; i < BT_NUMBUFTYPES; i++) { readtime[i].tv_sec = totalreadtime[i].tv_sec = 0; readtime[i].tv_nsec = totalreadtime[i].tv_nsec = 0; readcnt[i] = totalreadcnt[i] = 0; } } static struct bufarea * allocbuf(const char *failreason) { struct bufarea *bp; char *bufp; bp = (struct bufarea *)Malloc(sizeof(struct bufarea)); bufp = Malloc((unsigned int)sblock.fs_bsize); if (bp == NULL || bufp == NULL) { errx(EEXIT, "%s", failreason); /* NOTREACHED */ } numbufs++; bp->b_un.b_buf = bufp; TAILQ_INSERT_HEAD(&bufqueuehd, bp, b_list); initbarea(bp, BT_UNKNOWN); return (bp); } /* * Manage cylinder group buffers. * * Use getblk() here rather than cgget() because the cylinder group * may be corrupted but we want it anyway so we can fix it. */ static struct bufarea *cgbufs; /* header for cylinder group cache */ static int flushtries; /* number of tries to reclaim memory */ struct bufarea * cglookup(int cg) { struct bufarea *cgbp; struct cg *cgp; if ((unsigned) cg >= sblock.fs_ncg) errx(EEXIT, "cglookup: out of range cylinder group %d", cg); if (cgbufs == NULL) { cgbufs = calloc(sblock.fs_ncg, sizeof(struct bufarea)); if (cgbufs == NULL) errx(EEXIT, "Cannot allocate cylinder group buffers"); } cgbp = &cgbufs[cg]; if (cgbp->b_un.b_cg != NULL) return (cgbp); cgp = NULL; if (flushtries == 0) cgp = Malloc((unsigned int)sblock.fs_cgsize); if (cgp == NULL) { if (sujrecovery) errx(EEXIT,"Ran out of memory during journal recovery"); flush(fswritefd, &cgblk); getblk(&cgblk, cgtod(&sblock, cg), sblock.fs_cgsize); return (&cgblk); } cgbp->b_un.b_cg = cgp; initbarea(cgbp, BT_CYLGRP); getblk(cgbp, cgtod(&sblock, cg), sblock.fs_cgsize); return (cgbp); } /* * Mark a cylinder group buffer as dirty. * Update its check-hash if they are enabled. */ void cgdirty(struct bufarea *cgbp) { struct cg *cg; cg = cgbp->b_un.b_cg; if ((sblock.fs_metackhash & CK_CYLGRP) != 0) { cg->cg_ckhash = 0; cg->cg_ckhash = calculate_crc32c(~0L, (void *)cg, sblock.fs_cgsize); } dirty(cgbp); } /* * Attempt to flush a cylinder group cache entry. * Return whether the flush was successful. */ int flushentry(void) { struct bufarea *cgbp; if (sujrecovery || flushtries == sblock.fs_ncg || cgbufs == NULL) return (0); cgbp = &cgbufs[flushtries++]; if (cgbp->b_un.b_cg == NULL) return (0); flush(fswritefd, cgbp); free(cgbp->b_un.b_buf); cgbp->b_un.b_buf = NULL; return (1); } /* * Manage a cache of filesystem disk blocks. */ struct bufarea * getdatablk(ufs2_daddr_t blkno, long size, int type) { struct bufarea *bp; struct bufhash *bhdp; cachelookups++; /* * If out of range, return empty buffer with b_err == -1 * * Skip check for inodes because chkrange() considers * metadata areas invalid to write data. */ - if (type != BT_INODES && chkrange(blkno, size / sblock.fs_fsize)) + if (type != BT_INODES && chkrange(blkno, size / sblock.fs_fsize)) { + failedbuf.b_refcnt++; return (&failedbuf); + } bhdp = &bufhashhd[HASH(blkno)]; LIST_FOREACH(bp, bhdp, b_hash) if (bp->b_bno == fsbtodb(&sblock, blkno)) { if (debug && bp->b_size != size) { prtbuf(bp, "getdatablk: size mismatch"); pfatal("getdatablk: b_size %d != size %ld\n", bp->b_size, size); } TAILQ_REMOVE(&bufqueuehd, bp, b_list); goto foundit; } /* * Move long-term busy buffer back to the front of the LRU so we * do not endless inspect them for recycling. */ bp = TAILQ_LAST(&bufqueuehd, bufqueue); if (bp != NULL && bp->b_refcnt != 0) { TAILQ_REMOVE(&bufqueuehd, bp, b_list); TAILQ_INSERT_HEAD(&bufqueuehd, bp, b_list); } /* * Allocate up to the minimum number of buffers before * considering recycling any of them. */ if (size > sblock.fs_bsize) errx(EEXIT, "Excessive buffer size %ld > %d\n", size, sblock.fs_bsize); if ((bp = LIST_FIRST(&freebufs)) != NULL) { LIST_REMOVE(bp, b_hash); } else if (numbufs < MINBUFS) { bp = allocbuf("cannot create minimal buffer pool"); } else if (sujrecovery) { /* * SUJ recovery does not want anything written until it * has successfully completed (so it can fail back to * full fsck). Thus, we can only recycle clean buffers. */ TAILQ_FOREACH_REVERSE(bp, &bufqueuehd, bufqueue, b_list) if ((bp->b_flags & B_DIRTY) == 0 && bp->b_refcnt == 0) break; if (bp == NULL) bp = allocbuf("Ran out of memory during " "journal recovery"); else LIST_REMOVE(bp, b_hash); } else { /* * Recycle oldest non-busy buffer. */ TAILQ_FOREACH_REVERSE(bp, &bufqueuehd, bufqueue, b_list) if (bp->b_refcnt == 0) break; if (bp == NULL) bp = allocbuf("Ran out of memory for buffers"); else LIST_REMOVE(bp, b_hash); } TAILQ_REMOVE(&bufqueuehd, bp, b_list); flush(fswritefd, bp); bp->b_type = type; LIST_INSERT_HEAD(bhdp, bp, b_hash); getblk(bp, blkno, size); cachereads++; /* fall through */ foundit: TAILQ_INSERT_HEAD(&bufqueuehd, bp, b_list); if (debug && bp->b_type != type) { printf("getdatablk: buffer type changed to %s", BT_BUFTYPE(type)); prtbuf(bp, ""); } if (bp->b_errs == 0) bp->b_refcnt++; return (bp); } void getblk(struct bufarea *bp, ufs2_daddr_t blk, long size) { ufs2_daddr_t dblk; struct timespec start, finish; dblk = fsbtodb(&sblock, blk); if (bp->b_bno == dblk) { totalreads++; } else { if (debug) { readcnt[bp->b_type]++; clock_gettime(CLOCK_REALTIME_PRECISE, &start); } bp->b_errs = blread(fsreadfd, bp->b_un.b_buf, dblk, size); if (debug) { clock_gettime(CLOCK_REALTIME_PRECISE, &finish); timespecsub(&finish, &start, &finish); timespecadd(&readtime[bp->b_type], &finish, &readtime[bp->b_type]); } bp->b_bno = dblk; bp->b_size = size; } } void brelse(struct bufarea *bp) { if (bp->b_refcnt <= 0) prtbuf(bp, "brelse: buffer with negative reference count"); bp->b_refcnt--; } void binval(struct bufarea *bp) { bp->b_flags &= ~B_DIRTY; LIST_REMOVE(bp, b_hash); LIST_INSERT_HEAD(&freebufs, bp, b_hash); } void flush(int fd, struct bufarea *bp) { struct inode ip; if ((bp->b_flags & B_DIRTY) == 0) return; bp->b_flags &= ~B_DIRTY; if (fswritefd < 0) { pfatal("WRITING IN READ_ONLY MODE.\n"); return; } if (bp->b_errs != 0) pfatal("WRITING %sZERO'ED BLOCK %lld TO DISK\n", (bp->b_errs == bp->b_size / dev_bsize) ? "" : "PARTIALLY ", (long long)bp->b_bno); bp->b_errs = 0; /* * Write using the appropriate function. */ switch (bp->b_type) { case BT_SUPERBLK: if (bp != &sblk) pfatal("BUFFER %p DOES NOT MATCH SBLK %p\n", bp, &sblk); /* * Superblocks are always pre-copied so we do not need * to check them for copy-on-write. */ if (sbput(fd, bp->b_un.b_fs, 0) == 0) fsmodified = 1; break; case BT_CYLGRP: /* * Cylinder groups are always pre-copied so we do not * need to check them for copy-on-write. */ if (sujrecovery) cg_write(bp); if (cgput(fswritefd, &sblock, bp->b_un.b_cg) == 0) fsmodified = 1; break; case BT_INODES: if (debug && sblock.fs_magic == FS_UFS2_MAGIC) { struct ufs2_dinode *dp = bp->b_un.b_dinode2; int i; for (i = 0; i < bp->b_size; dp++, i += sizeof(*dp)) { if (ffs_verify_dinode_ckhash(&sblock, dp) == 0) continue; pwarn("flush: INODE CHECK-HASH FAILED"); ip.i_bp = bp; ip.i_dp = (union dinode *)dp; ip.i_number = bp->b_index + (i / sizeof(*dp)); prtinode(&ip); if (preen || reply("FIX") != 0) { if (preen) printf(" (FIXED)\n"); ffs_update_dinode_ckhash(&sblock, dp); inodirty(&ip); } } } /* FALLTHROUGH */ default: copyonwrite(&sblock, bp, std_checkblkavail); blwrite(fd, bp->b_un.b_buf, bp->b_bno, bp->b_size); break; } } /* * If there are any snapshots, ensure that all the blocks that they * care about have been copied, then release the snapshot inodes. * These operations need to be done before we rebuild the cylinder * groups so that any block allocations are properly recorded. * Since all the cylinder group maps have already been copied in * the snapshots, no further snapshot copies will need to be done. */ void snapflush(ufs2_daddr_t (*checkblkavail)(ufs2_daddr_t, long)) { struct bufarea *bp; int cnt; if (snapcnt > 0) { if (debug) printf("Check for snapshot copies\n"); TAILQ_FOREACH_REVERSE(bp, &bufqueuehd, bufqueue, b_list) if ((bp->b_flags & B_DIRTY) != 0) copyonwrite(&sblock, bp, checkblkavail); for (cnt = 0; cnt < snapcnt; cnt++) irelse(&snaplist[cnt]); snapcnt = 0; } } /* * Journaled soft updates does not maintain cylinder group summary * information during cleanup, so this routine recalculates the summary * information and updates the superblock summary in preparation for * writing out the cylinder group. */ static void cg_write(struct bufarea *bp) { ufs1_daddr_t fragno, cgbno, maxbno; u_int8_t *blksfree; struct csum *csp; struct cg *cgp; int blk; int i; /* * Fix the frag and cluster summary. */ cgp = bp->b_un.b_cg; cgp->cg_cs.cs_nbfree = 0; cgp->cg_cs.cs_nffree = 0; bzero(&cgp->cg_frsum, sizeof(cgp->cg_frsum)); maxbno = fragstoblks(&sblock, sblock.fs_fpg); if (sblock.fs_contigsumsize > 0) { for (i = 1; i <= sblock.fs_contigsumsize; i++) cg_clustersum(cgp)[i] = 0; bzero(cg_clustersfree(cgp), howmany(maxbno, CHAR_BIT)); } blksfree = cg_blksfree(cgp); for (cgbno = 0; cgbno < maxbno; cgbno++) { if (ffs_isfreeblock(&sblock, blksfree, cgbno)) continue; if (ffs_isblock(&sblock, blksfree, cgbno)) { ffs_clusteracct(&sblock, cgp, cgbno, 1); cgp->cg_cs.cs_nbfree++; continue; } fragno = blkstofrags(&sblock, cgbno); blk = blkmap(&sblock, blksfree, fragno); ffs_fragacct(&sblock, blk, cgp->cg_frsum, 1); for (i = 0; i < sblock.fs_frag; i++) if (isset(blksfree, fragno + i)) cgp->cg_cs.cs_nffree++; } /* * Update the superblock cg summary from our now correct values * before writing the block. */ csp = &sblock.fs_cs(&sblock, cgp->cg_cgx); sblock.fs_cstotal.cs_ndir += cgp->cg_cs.cs_ndir - csp->cs_ndir; sblock.fs_cstotal.cs_nbfree += cgp->cg_cs.cs_nbfree - csp->cs_nbfree; sblock.fs_cstotal.cs_nifree += cgp->cg_cs.cs_nifree - csp->cs_nifree; sblock.fs_cstotal.cs_nffree += cgp->cg_cs.cs_nffree - csp->cs_nffree; sblock.fs_cs(&sblock, cgp->cg_cgx) = cgp->cg_cs; } void rwerror(const char *mesg, ufs2_daddr_t blk) { if (bkgrdcheck) exit(EEXIT); if (preen == 0) printf("\n"); pfatal("CANNOT %s: %ld", mesg, (long)blk); if (reply("CONTINUE") == 0) exit(EEXIT); } void ckfini(int markclean) { struct bufarea *bp, *nbp; int ofsmodified, cnt, cg; if (bkgrdflag) { unlink(snapname); if ((!(sblock.fs_flags & FS_UNCLEAN)) != markclean) { cmd.value = FS_UNCLEAN; cmd.size = markclean ? -1 : 1; if (sysctlbyname("vfs.ffs.setflags", 0, 0, &cmd, sizeof cmd) == -1) pwarn("CANNOT SET FILE SYSTEM DIRTY FLAG\n"); if (!preen) { printf("\n***** FILE SYSTEM MARKED %s *****\n", markclean ? "CLEAN" : "DIRTY"); if (!markclean) rerun = 1; } } else if (!preen && !markclean) { printf("\n***** FILE SYSTEM STILL DIRTY *****\n"); rerun = 1; } bkgrdflag = 0; } if (debug && cachelookups > 0) printf("cache with %d buffers missed %d of %d (%d%%)\n", numbufs, cachereads, cachelookups, (int)(cachereads * 100 / cachelookups)); if (fswritefd < 0) { (void)close(fsreadfd); return; } /* * To remain idempotent with partial truncations the buffers * must be flushed in this order: * 1) cylinder groups (bitmaps) * 2) indirect, directory, external attribute, and data blocks * 3) inode blocks * 4) superblock * This ordering preserves access to the modified pointers * until they are freed. */ /* Step 1: cylinder groups */ if (debug) printf("Flush Cylinder groups\n"); if (cgbufs != NULL) { for (cnt = 0; cnt < sblock.fs_ncg; cnt++) { if (cgbufs[cnt].b_un.b_cg == NULL) continue; flush(fswritefd, &cgbufs[cnt]); free(cgbufs[cnt].b_un.b_cg); } free(cgbufs); cgbufs = NULL; } flush(fswritefd, &cgblk); free(cgblk.b_un.b_buf); cgblk.b_un.b_buf = NULL; cnt = 0; /* Step 2: indirect, directory, external attribute, and data blocks */ if (debug) printf("Flush indirect, directory, external attribute, " "and data blocks\n"); if (pdirbp != NULL) { brelse(pdirbp); pdirbp = NULL; } TAILQ_FOREACH_REVERSE_SAFE(bp, &bufqueuehd, bufqueue, b_list, nbp) { switch (bp->b_type) { /* These should not be in the buffer cache list */ case BT_UNKNOWN: case BT_SUPERBLK: case BT_CYLGRP: default: prtbuf(bp,"ckfini: improper buffer type on cache list"); continue; /* These are the ones to flush in this step */ case BT_LEVEL1: case BT_LEVEL2: case BT_LEVEL3: case BT_EXTATTR: case BT_DIRDATA: case BT_DATA: break; /* These are the ones to flush in the next step */ case BT_INODES: continue; } if (debug && bp->b_refcnt != 0) prtbuf(bp, "ckfini: clearing in-use buffer"); TAILQ_REMOVE(&bufqueuehd, bp, b_list); LIST_REMOVE(bp, b_hash); cnt++; flush(fswritefd, bp); free(bp->b_un.b_buf); free((char *)bp); } /* Step 3: inode blocks */ if (debug) printf("Flush inode blocks\n"); if (icachebp != NULL) { brelse(icachebp); icachebp = NULL; } TAILQ_FOREACH_REVERSE_SAFE(bp, &bufqueuehd, bufqueue, b_list, nbp) { if (debug && bp->b_refcnt != 0) prtbuf(bp, "ckfini: clearing in-use buffer"); TAILQ_REMOVE(&bufqueuehd, bp, b_list); LIST_REMOVE(bp, b_hash); cnt++; flush(fswritefd, bp); free(bp->b_un.b_buf); free((char *)bp); } if (numbufs != cnt) errx(EEXIT, "panic: lost %d buffers", numbufs - cnt); /* Step 4: superblock */ if (debug) printf("Flush the superblock\n"); flush(fswritefd, &sblk); if (havesb && cursnapshot == 0 && sblk.b_bno != sblock.fs_sblockloc / dev_bsize) { if (preen || reply("UPDATE STANDARD SUPERBLOCK")) { /* Change write destination to standard superblock */ sblock.fs_sblockactualloc = sblock.fs_sblockloc; sblk.b_bno = sblock.fs_sblockloc / dev_bsize; sbdirty(); flush(fswritefd, &sblk); } else { markclean = 0; } } if (cursnapshot == 0 && sblock.fs_clean != markclean) { if ((sblock.fs_clean = markclean) != 0) { sblock.fs_flags &= ~(FS_UNCLEAN | FS_NEEDSFSCK); sblock.fs_pendingblocks = 0; sblock.fs_pendinginodes = 0; } sbdirty(); ofsmodified = fsmodified; flush(fswritefd, &sblk); fsmodified = ofsmodified; if (!preen) { printf("\n***** FILE SYSTEM MARKED %s *****\n", markclean ? "CLEAN" : "DIRTY"); if (!markclean) rerun = 1; } } else if (!preen) { if (markclean) { printf("\n***** FILE SYSTEM IS CLEAN *****\n"); } else { printf("\n***** FILE SYSTEM STILL DIRTY *****\n"); rerun = 1; } } /* * Free allocated tracking structures. */ if (blockmap != NULL) free(blockmap); blockmap = NULL; if (inostathead != NULL) { for (cg = 0; cg < sblock.fs_ncg; cg++) if (inostathead[cg].il_stat != NULL) free((char *)inostathead[cg].il_stat); free(inostathead); } inostathead = NULL; inocleanup(); finalIOstats(); (void)close(fsreadfd); (void)close(fswritefd); } /* * Print out I/O statistics. */ void IOstats(char *what) { int i; if (debug == 0) return; if (diskreads == 0) { printf("%s: no I/O\n\n", what); return; } if (startpass.tv_sec == 0) startpass = startprog; printf("%s: I/O statistics\n", what); printIOstats(); totaldiskreads += diskreads; diskreads = 0; for (i = 0; i < BT_NUMBUFTYPES; i++) { timespecadd(&totalreadtime[i], &readtime[i], &totalreadtime[i]); totalreadcnt[i] += readcnt[i]; readtime[i].tv_sec = readtime[i].tv_nsec = 0; readcnt[i] = 0; } clock_gettime(CLOCK_REALTIME_PRECISE, &startpass); } void finalIOstats(void) { int i; if (debug == 0) return; printf("Final I/O statistics\n"); totaldiskreads += diskreads; diskreads = totaldiskreads; startpass = startprog; for (i = 0; i < BT_NUMBUFTYPES; i++) { timespecadd(&totalreadtime[i], &readtime[i], &totalreadtime[i]); totalreadcnt[i] += readcnt[i]; readtime[i] = totalreadtime[i]; readcnt[i] = totalreadcnt[i]; } printIOstats(); } static void printIOstats(void) { long long msec, totalmsec; int i; clock_gettime(CLOCK_REALTIME_PRECISE, &finishpass); timespecsub(&finishpass, &startpass, &finishpass); printf("Running time: %jd.%03ld sec\n", (intmax_t)finishpass.tv_sec, finishpass.tv_nsec / 1000000); printf("buffer reads by type:\n"); for (totalmsec = 0, i = 0; i < BT_NUMBUFTYPES; i++) totalmsec += readtime[i].tv_sec * 1000 + readtime[i].tv_nsec / 1000000; if (totalmsec == 0) totalmsec = 1; for (i = 0; i < BT_NUMBUFTYPES; i++) { if (readcnt[i] == 0) continue; msec = readtime[i].tv_sec * 1000 + readtime[i].tv_nsec / 1000000; printf("%21s:%8ld %2ld.%ld%% %4jd.%03ld sec %2lld.%lld%%\n", buftype[i], readcnt[i], readcnt[i] * 100 / diskreads, (readcnt[i] * 1000 / diskreads) % 10, (intmax_t)readtime[i].tv_sec, readtime[i].tv_nsec / 1000000, msec * 100 / totalmsec, (msec * 1000 / totalmsec) % 10); } printf("\n"); } int blread(int fd, char *buf, ufs2_daddr_t blk, long size) { char *cp; int i, errs; off_t offset; offset = blk; offset *= dev_bsize; if (bkgrdflag) slowio_start(); totalreads++; diskreads++; if (pread(fd, buf, (int)size, offset) == size) { if (bkgrdflag) slowio_end(); return (0); } /* * This is handled specially here instead of in rwerror because * rwerror is used for all sorts of errors, not just true read/write * errors. It should be refactored and fixed. */ if (surrender) { pfatal("CANNOT READ_BLK: %ld", (long)blk); errx(EEXIT, "ABORTING DUE TO READ ERRORS"); } else rwerror("READ BLK", blk); errs = 0; memset(buf, 0, (size_t)size); printf("THE FOLLOWING DISK SECTORS COULD NOT BE READ:"); for (cp = buf, i = 0; i < size; i += secsize, cp += secsize) { if (pread(fd, cp, (int)secsize, offset + i) != secsize) { if (secsize != dev_bsize && dev_bsize != 1) printf(" %jd (%jd),", (intmax_t)(blk * dev_bsize + i) / secsize, (intmax_t)blk + i / dev_bsize); else printf(" %jd,", (intmax_t)blk + i / dev_bsize); errs++; } } printf("\n"); if (errs) resolved = 0; return (errs); } void blwrite(int fd, char *buf, ufs2_daddr_t blk, ssize_t size) { int i; char *cp; off_t offset; if (fd < 0) return; offset = blk; offset *= dev_bsize; if (pwrite(fd, buf, size, offset) == size) { fsmodified = 1; return; } resolved = 0; rwerror("WRITE BLK", blk); printf("THE FOLLOWING SECTORS COULD NOT BE WRITTEN:"); for (cp = buf, i = 0; i < size; i += dev_bsize, cp += dev_bsize) if (pwrite(fd, cp, dev_bsize, offset + i) != dev_bsize) printf(" %jd,", (intmax_t)blk + i / dev_bsize); printf("\n"); return; } void blerase(int fd, ufs2_daddr_t blk, long size) { off_t ioarg[2]; if (fd < 0) return; ioarg[0] = blk * dev_bsize; ioarg[1] = size; ioctl(fd, DIOCGDELETE, ioarg); /* we don't really care if we succeed or not */ return; } /* * Fill a contiguous region with all-zeroes. Note ZEROBUFSIZE is by * definition a multiple of dev_bsize. */ void blzero(int fd, ufs2_daddr_t blk, long size) { static char *zero; off_t offset, len; if (fd < 0) return; if (zero == NULL) { zero = calloc(ZEROBUFSIZE, 1); if (zero == NULL) errx(EEXIT, "cannot allocate buffer pool"); } offset = blk * dev_bsize; if (lseek(fd, offset, 0) < 0) rwerror("SEEK BLK", blk); while (size > 0) { len = MIN(ZEROBUFSIZE, size); if (write(fd, zero, len) != len) rwerror("WRITE BLK", blk); blk += len / dev_bsize; size -= len; } } /* * Verify cylinder group's magic number and other parameters. If the * test fails, offer an option to rebuild the whole cylinder group. */ #undef CHK #define CHK(lhs, op, rhs, fmt) \ if (lhs op rhs) { \ pwarn("UFS%d cylinder group %d failed: " \ "%s (" #fmt ") %s %s (" #fmt ")\n", \ sblock.fs_magic == FS_UFS1_MAGIC ? 1 : 2, cg, \ #lhs, (intmax_t)lhs, #op, #rhs, (intmax_t)rhs); \ error = 1; \ } int check_cgmagic(int cg, struct bufarea *cgbp, int request_rebuild) { struct cg *cgp = cgbp->b_un.b_cg; uint32_t cghash, calchash; static int prevfailcg = -1; long start; int error; /* * Extended cylinder group checks. */ calchash = cgp->cg_ckhash; if ((sblock.fs_metackhash & CK_CYLGRP) != 0 && (ckhashadd & CK_CYLGRP) == 0) { cghash = cgp->cg_ckhash; cgp->cg_ckhash = 0; calchash = calculate_crc32c(~0L, (void *)cgp, sblock.fs_cgsize); cgp->cg_ckhash = cghash; } error = 0; CHK(cgp->cg_ckhash, !=, calchash, "%jd"); CHK(cg_chkmagic(cgp), ==, 0, "%jd"); CHK(cgp->cg_cgx, !=, cg, "%jd"); CHK(cgp->cg_ndblk, >, sblock.fs_fpg, "%jd"); if (sblock.fs_magic == FS_UFS1_MAGIC) { CHK(cgp->cg_old_niblk, !=, sblock.fs_ipg, "%jd"); CHK(cgp->cg_old_ncyl, >, sblock.fs_old_cpg, "%jd"); } else if (sblock.fs_magic == FS_UFS2_MAGIC) { CHK(cgp->cg_niblk, !=, sblock.fs_ipg, "%jd"); CHK(cgp->cg_initediblk, >, sblock.fs_ipg, "%jd"); } if (cgbase(&sblock, cg) + sblock.fs_fpg < sblock.fs_size) { CHK(cgp->cg_ndblk, !=, sblock.fs_fpg, "%jd"); } else { CHK(cgp->cg_ndblk, !=, sblock.fs_size - cgbase(&sblock, cg), "%jd"); } start = &cgp->cg_space[0] - (u_char *)(&cgp->cg_firstfield); if (sblock.fs_magic == FS_UFS2_MAGIC) { CHK(cgp->cg_iusedoff, !=, start, "%jd"); } else if (sblock.fs_magic == FS_UFS1_MAGIC) { CHK(cgp->cg_niblk, !=, 0, "%jd"); CHK(cgp->cg_initediblk, !=, 0, "%jd"); CHK(cgp->cg_old_ncyl, !=, sblock.fs_old_cpg, "%jd"); CHK(cgp->cg_old_niblk, !=, sblock.fs_ipg, "%jd"); CHK(cgp->cg_old_btotoff, !=, start, "%jd"); CHK(cgp->cg_old_boff, !=, cgp->cg_old_btotoff + sblock.fs_old_cpg * sizeof(int32_t), "%jd"); CHK(cgp->cg_iusedoff, !=, cgp->cg_old_boff + sblock.fs_old_cpg * sizeof(u_int16_t), "%jd"); } CHK(cgp->cg_freeoff, !=, cgp->cg_iusedoff + howmany(sblock.fs_ipg, CHAR_BIT), "%jd"); if (sblock.fs_contigsumsize == 0) { CHK(cgp->cg_nextfreeoff, !=, cgp->cg_freeoff + howmany(sblock.fs_fpg, CHAR_BIT), "%jd"); } else { CHK(cgp->cg_nclusterblks, !=, cgp->cg_ndblk / sblock.fs_frag, "%jd"); CHK(cgp->cg_clustersumoff, !=, roundup(cgp->cg_freeoff + howmany(sblock.fs_fpg, CHAR_BIT), sizeof(u_int32_t)) - sizeof(u_int32_t), "%jd"); CHK(cgp->cg_clusteroff, !=, cgp->cg_clustersumoff + (sblock.fs_contigsumsize + 1) * sizeof(u_int32_t), "%jd"); CHK(cgp->cg_nextfreeoff, !=, cgp->cg_clusteroff + howmany(fragstoblks(&sblock, sblock.fs_fpg), CHAR_BIT), "%jd"); } if (error == 0) return (1); if (prevfailcg == cg) return (0); prevfailcg = cg; pfatal("CYLINDER GROUP %d: INTEGRITY CHECK FAILED", cg); if (!request_rebuild) { printf("\n"); return (0); } if (!reply("REBUILD CYLINDER GROUP")) { printf("YOU WILL NEED TO RERUN FSCK.\n"); rerun = 1; return (1); } /* * Zero out the cylinder group and then initialize critical fields. * Bit maps and summaries will be recalculated by later passes. */ memset(cgp, 0, (size_t)sblock.fs_cgsize); cgp->cg_magic = CG_MAGIC; cgp->cg_cgx = cg; cgp->cg_niblk = sblock.fs_ipg; cgp->cg_initediblk = MIN(sblock.fs_ipg, 2 * INOPB(&sblock)); if (cgbase(&sblock, cg) + sblock.fs_fpg < sblock.fs_size) cgp->cg_ndblk = sblock.fs_fpg; else cgp->cg_ndblk = sblock.fs_size - cgbase(&sblock, cg); start = &cgp->cg_space[0] - (u_char *)(&cgp->cg_firstfield); if (sblock.fs_magic == FS_UFS2_MAGIC) { cgp->cg_iusedoff = start; } else if (sblock.fs_magic == FS_UFS1_MAGIC) { cgp->cg_niblk = 0; cgp->cg_initediblk = 0; cgp->cg_old_ncyl = sblock.fs_old_cpg; cgp->cg_old_niblk = sblock.fs_ipg; cgp->cg_old_btotoff = start; cgp->cg_old_boff = cgp->cg_old_btotoff + sblock.fs_old_cpg * sizeof(int32_t); cgp->cg_iusedoff = cgp->cg_old_boff + sblock.fs_old_cpg * sizeof(u_int16_t); } cgp->cg_freeoff = cgp->cg_iusedoff + howmany(sblock.fs_ipg, CHAR_BIT); cgp->cg_nextfreeoff = cgp->cg_freeoff + howmany(sblock.fs_fpg,CHAR_BIT); if (sblock.fs_contigsumsize > 0) { cgp->cg_nclusterblks = cgp->cg_ndblk / sblock.fs_frag; cgp->cg_clustersumoff = roundup(cgp->cg_nextfreeoff, sizeof(u_int32_t)); cgp->cg_clustersumoff -= sizeof(u_int32_t); cgp->cg_clusteroff = cgp->cg_clustersumoff + (sblock.fs_contigsumsize + 1) * sizeof(u_int32_t); cgp->cg_nextfreeoff = cgp->cg_clusteroff + howmany(fragstoblks(&sblock, sblock.fs_fpg), CHAR_BIT); } cgp->cg_ckhash = calculate_crc32c(~0L, (void *)cgp, sblock.fs_cgsize); cgdirty(cgbp); return (0); } /* * allocate a data block with the specified number of fragments */ ufs2_daddr_t allocblk(long startcg, long frags, ufs2_daddr_t (*checkblkavail)(ufs2_daddr_t blkno, long frags)) { ufs2_daddr_t blkno, newblk; if (sujrecovery && checkblkavail == std_checkblkavail) { pfatal("allocblk: std_checkblkavail used for SUJ recovery\n"); return (0); } if (frags <= 0 || frags > sblock.fs_frag) return (0); for (blkno = MAX(cgdata(&sblock, startcg), 0); blkno < maxfsblock - sblock.fs_frag; blkno += sblock.fs_frag) { if ((newblk = (*checkblkavail)(blkno, frags)) == 0) continue; if (newblk > 0) return (newblk); if (newblk < 0) blkno = -newblk; } for (blkno = MAX(cgdata(&sblock, 0), 0); blkno < cgbase(&sblock, startcg) - sblock.fs_frag; blkno += sblock.fs_frag) { if ((newblk = (*checkblkavail)(blkno, frags)) == 0) continue; if (newblk > 0) return (newblk); if (newblk < 0) blkno = -newblk; } return (0); } ufs2_daddr_t std_checkblkavail(ufs2_daddr_t blkno, long frags) { struct bufarea *cgbp; struct cg *cgp; ufs2_daddr_t j, k, baseblk; long cg; if ((u_int64_t)blkno > sblock.fs_size) return (0); for (j = 0; j <= sblock.fs_frag - frags; j++) { if (testbmap(blkno + j)) continue; for (k = 1; k < frags; k++) if (testbmap(blkno + j + k)) break; if (k < frags) { j += k; continue; } cg = dtog(&sblock, blkno + j); cgbp = cglookup(cg); cgp = cgbp->b_un.b_cg; if (!check_cgmagic(cg, cgbp, 0)) return (-((cg + 1) * sblock.fs_fpg - sblock.fs_frag)); baseblk = dtogd(&sblock, blkno + j); for (k = 0; k < frags; k++) { setbmap(blkno + j + k); clrbit(cg_blksfree(cgp), baseblk + k); } n_blks += frags; if (frags == sblock.fs_frag) cgp->cg_cs.cs_nbfree--; else cgp->cg_cs.cs_nffree -= frags; cgdirty(cgbp); return (blkno + j); } return (0); } /* * Slow down IO so as to leave some disk bandwidth for other processes */ void slowio_start() { /* Delay one in every 8 operations */ slowio_pollcnt = (slowio_pollcnt + 1) & 7; if (slowio_pollcnt == 0) { gettimeofday(&slowio_starttime, NULL); } } void slowio_end() { struct timeval tv; int delay_usec; if (slowio_pollcnt != 0) return; /* Update the slowdown interval. */ gettimeofday(&tv, NULL); delay_usec = (tv.tv_sec - slowio_starttime.tv_sec) * 1000000 + (tv.tv_usec - slowio_starttime.tv_usec); if (delay_usec < 64) delay_usec = 64; if (delay_usec > 2500000) delay_usec = 2500000; slowio_delay_usec = (slowio_delay_usec * 63 + delay_usec) >> 6; /* delay by 8 times the average IO delay */ if (slowio_delay_usec > 64) usleep(slowio_delay_usec * 8); } /* * Find a pathname */ void getpathname(char *namebuf, ino_t curdir, ino_t ino) { int len; char *cp; struct inode ip; struct inodesc idesc; static int busy = 0; if (curdir == ino && ino == UFS_ROOTINO) { (void)strcpy(namebuf, "/"); return; } if (busy || !INO_IS_DVALID(curdir)) { (void)strcpy(namebuf, "?"); return; } busy = 1; memset(&idesc, 0, sizeof(struct inodesc)); idesc.id_type = DATA; idesc.id_fix = IGNORE; cp = &namebuf[MAXPATHLEN - 1]; *cp = '\0'; if (curdir != ino) { idesc.id_parent = curdir; goto namelookup; } while (ino != UFS_ROOTINO) { idesc.id_number = ino; idesc.id_func = findino; idesc.id_name = strdup(".."); ginode(ino, &ip); if ((ckinode(ip.i_dp, &idesc) & FOUND) == 0) { irelse(&ip); free(idesc.id_name); break; } irelse(&ip); free(idesc.id_name); namelookup: idesc.id_number = idesc.id_parent; idesc.id_parent = ino; idesc.id_func = findname; idesc.id_name = namebuf; ginode(idesc.id_number, &ip); if ((ckinode(ip.i_dp, &idesc) & FOUND) == 0) { irelse(&ip); break; } irelse(&ip); len = strlen(namebuf); cp -= len; memmove(cp, namebuf, (size_t)len); *--cp = '/'; if (cp < &namebuf[UFS_MAXNAMLEN]) break; ino = idesc.id_number; } busy = 0; if (ino != UFS_ROOTINO) *--cp = '?'; memmove(namebuf, cp, (size_t)(&namebuf[MAXPATHLEN] - cp)); } void catch(int sig __unused) { ckfini(0); exit(12); } /* * When preening, allow a single quit to signal * a special exit after file system checks complete * so that reboot sequence may be interrupted. */ void catchquit(int sig __unused) { printf("returning to single-user after file system check\n"); returntosingle = 1; (void)signal(SIGQUIT, SIG_DFL); } /* * determine whether an inode should be fixed. */ int dofix(struct inodesc *idesc, const char *msg) { switch (idesc->id_fix) { case DONTKNOW: if (idesc->id_type == DATA) direrror(idesc->id_number, msg); else pwarn("%s", msg); if (preen) { printf(" (SALVAGED)\n"); idesc->id_fix = FIX; return (ALTERED); } if (reply("SALVAGE") == 0) { idesc->id_fix = NOFIX; return (0); } idesc->id_fix = FIX; return (ALTERED); case FIX: return (ALTERED); case NOFIX: case IGNORE: return (0); default: errx(EEXIT, "UNKNOWN INODESC FIX MODE %d", idesc->id_fix); } /* NOTREACHED */ return (0); } #include /* * Print details about a buffer. */ void prtbuf(struct bufarea *bp, const char *fmt, ...) { va_list ap; va_start(ap, fmt); if (preen) (void)fprintf(stdout, "%s: ", cdevname); (void)vfprintf(stdout, fmt, ap); va_end(ap); printf(": bp %p, type %s, bno %jd, size %d, refcnt %d, flags %s, " "index %jd\n", bp, BT_BUFTYPE(bp->b_type), (intmax_t) bp->b_bno, bp->b_size, bp->b_refcnt, bp->b_flags & B_DIRTY ? "dirty" : "clean", (intmax_t) bp->b_index); } /* * An unexpected inconsistency occurred. * Die if preening or file system is running with soft dependency protocol, * otherwise just print message and continue. */ void pfatal(const char *fmt, ...) { va_list ap; va_start(ap, fmt); if (!preen) { (void)vfprintf(stdout, fmt, ap); va_end(ap); if (usedsoftdep) (void)fprintf(stdout, "\nUNEXPECTED SOFT UPDATE INCONSISTENCY\n"); /* * Force foreground fsck to clean up inconsistency. */ if (bkgrdflag) { cmd.value = FS_NEEDSFSCK; cmd.size = 1; if (sysctlbyname("vfs.ffs.setflags", 0, 0, &cmd, sizeof cmd) == -1) pwarn("CANNOT SET FS_NEEDSFSCK FLAG\n"); fprintf(stdout, "CANNOT RUN IN BACKGROUND\n"); ckfini(0); exit(EEXIT); } return; } if (cdevname == NULL) cdevname = strdup("fsck"); (void)fprintf(stdout, "%s: ", cdevname); (void)vfprintf(stdout, fmt, ap); (void)fprintf(stdout, "\n%s: UNEXPECTED%sINCONSISTENCY; RUN fsck MANUALLY.\n", cdevname, usedsoftdep ? " SOFT UPDATE " : " "); /* * Force foreground fsck to clean up inconsistency. */ if (bkgrdflag) { cmd.value = FS_NEEDSFSCK; cmd.size = 1; if (sysctlbyname("vfs.ffs.setflags", 0, 0, &cmd, sizeof cmd) == -1) pwarn("CANNOT SET FS_NEEDSFSCK FLAG\n"); } ckfini(0); exit(EEXIT); } /* * Pwarn just prints a message when not preening or running soft dependency * protocol, or a warning (preceded by filename) when preening. */ void pwarn(const char *fmt, ...) { va_list ap; va_start(ap, fmt); if (preen) (void)fprintf(stdout, "%s: ", cdevname); (void)vfprintf(stdout, fmt, ap); va_end(ap); } /* * Stub for routines from kernel. */ void panic(const char *fmt, ...) { va_list ap; va_start(ap, fmt); pfatal("INTERNAL INCONSISTENCY:"); (void)vfprintf(stdout, fmt, ap); va_end(ap); exit(EEXIT); } diff --git a/sbin/fsck_ffs/inode.c b/sbin/fsck_ffs/inode.c index a46fea0607a0..37b0f9ebc1b1 100644 --- a/sbin/fsck_ffs/inode.c +++ b/sbin/fsck_ffs/inode.c @@ -1,1464 +1,1466 @@ /*- * SPDX-License-Identifier: BSD-3-Clause * * Copyright (c) 1980, 1986, 1993 * The Regents of the University of California. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #if 0 #ifndef lint static const char sccsid[] = "@(#)inode.c 8.8 (Berkeley) 4/28/95"; #endif /* not lint */ #endif #include __FBSDID("$FreeBSD$"); #include #include #include #include #include #include #include #include #include #include #include #include #include "fsck.h" struct bufarea *icachebp; /* inode cache buffer */ static int iblock(struct inodesc *, off_t isize, int type); static ufs2_daddr_t indir_blkatoff(ufs2_daddr_t, ino_t, ufs_lbn_t, ufs_lbn_t, struct bufarea **); static int snapclean(struct inodesc *idesc); static void chkcopyonwrite(struct fs *, ufs2_daddr_t, ufs2_daddr_t (*checkblkavail)(ufs2_daddr_t, long)); int ckinode(union dinode *dp, struct inodesc *idesc) { off_t remsize, sizepb; int i, offset, ret; struct inode ip; union dinode dino; ufs2_daddr_t ndb; mode_t mode; char pathbuf[MAXPATHLEN + 1]; if (idesc->id_fix != IGNORE) idesc->id_fix = DONTKNOW; idesc->id_dp = dp; idesc->id_lbn = -1; idesc->id_lballoc = -1; idesc->id_level = 0; idesc->id_entryno = 0; idesc->id_filesize = DIP(dp, di_size); mode = DIP(dp, di_mode) & IFMT; if (mode == IFBLK || mode == IFCHR || (mode == IFLNK && DIP(dp, di_size) < (unsigned)sblock.fs_maxsymlinklen)) return (KEEPON); if (sblock.fs_magic == FS_UFS1_MAGIC) dino.dp1 = dp->dp1; else dino.dp2 = dp->dp2; ndb = howmany(DIP(&dino, di_size), sblock.fs_bsize); for (i = 0; i < UFS_NDADDR; i++) { idesc->id_lbn++; if (--ndb == 0 && (offset = blkoff(&sblock, DIP(&dino, di_size))) != 0) idesc->id_numfrags = numfrags(&sblock, fragroundup(&sblock, offset)); else idesc->id_numfrags = sblock.fs_frag; if (DIP(&dino, di_db[i]) == 0) { if (idesc->id_type == DATA && ndb >= 0) { /* An empty block in a directory XXX */ getpathname(pathbuf, idesc->id_number, idesc->id_number); pfatal("DIRECTORY %s: CONTAINS EMPTY BLOCKS", pathbuf); if (reply("ADJUST LENGTH") == 1) { ginode(idesc->id_number, &ip); DIP_SET(ip.i_dp, di_size, i * sblock.fs_bsize); printf( "YOU MUST RERUN FSCK AFTERWARDS\n"); rerun = 1; inodirty(&ip); irelse(&ip); } } continue; } idesc->id_blkno = DIP(&dino, di_db[i]); if (idesc->id_type != DATA) ret = (*idesc->id_func)(idesc); else ret = dirscan(idesc); if (ret & STOP) return (ret); } idesc->id_numfrags = sblock.fs_frag; remsize = DIP(&dino, di_size) - sblock.fs_bsize * UFS_NDADDR; sizepb = sblock.fs_bsize; for (i = 0; i < UFS_NIADDR; i++) { sizepb *= NINDIR(&sblock); idesc->id_level = i + 1; if (DIP(&dino, di_ib[i])) { idesc->id_blkno = DIP(&dino, di_ib[i]); ret = iblock(idesc, remsize, BT_LEVEL1 + i); if (ret & STOP) return (ret); } else if (remsize > 0) { idesc->id_lbn += sizepb / sblock.fs_bsize; if (idesc->id_type == DATA) { /* An empty block in a directory XXX */ getpathname(pathbuf, idesc->id_number, idesc->id_number); pfatal("DIRECTORY %s: CONTAINS EMPTY BLOCKS", pathbuf); if (reply("ADJUST LENGTH") == 1) { ginode(idesc->id_number, &ip); DIP_SET(ip.i_dp, di_size, DIP(ip.i_dp, di_size) - remsize); remsize = 0; printf( "YOU MUST RERUN FSCK AFTERWARDS\n"); rerun = 1; inodirty(&ip); irelse(&ip); break; } } } remsize -= sizepb; } return (KEEPON); } static int iblock(struct inodesc *idesc, off_t isize, int type) { struct inode ip; struct bufarea *bp; int i, n, (*func)(struct inodesc *), nif; off_t sizepb; char buf[BUFSIZ]; char pathbuf[MAXPATHLEN + 1]; if (idesc->id_type != DATA) { func = idesc->id_func; if (((n = (*func)(idesc)) & KEEPON) == 0) return (n); } else func = dirscan; bp = getdatablk(idesc->id_blkno, sblock.fs_bsize, type); if (bp->b_errs != 0) { brelse(bp); return (SKIP); } idesc->id_bp = bp; idesc->id_level--; for (sizepb = sblock.fs_bsize, i = 0; i < idesc->id_level; i++) sizepb *= NINDIR(&sblock); if (howmany(isize, sizepb) > NINDIR(&sblock)) nif = NINDIR(&sblock); else nif = howmany(isize, sizepb); if (idesc->id_func == pass1check && nif < NINDIR(&sblock)) { for (i = nif; i < NINDIR(&sblock); i++) { if (IBLK(bp, i) == 0) continue; (void)sprintf(buf, "PARTIALLY TRUNCATED INODE I=%lu", (u_long)idesc->id_number); if (preen) { pfatal("%s", buf); } else if (dofix(idesc, buf)) { IBLK_SET(bp, i, 0); dirty(bp); } } flush(fswritefd, bp); } for (i = 0; i < nif; i++) { if (IBLK(bp, i)) { idesc->id_blkno = IBLK(bp, i); bp->b_index = i; if (idesc->id_level == 0) { idesc->id_lbn++; n = (*func)(idesc); } else { n = iblock(idesc, isize, type - 1); idesc->id_level++; } if (n & STOP) { brelse(bp); return (n); } } else { idesc->id_lbn += sizepb / sblock.fs_bsize; if (idesc->id_type == DATA && isize > 0) { /* An empty block in a directory XXX */ getpathname(pathbuf, idesc->id_number, idesc->id_number); pfatal("DIRECTORY %s: CONTAINS EMPTY BLOCKS", pathbuf); if (reply("ADJUST LENGTH") == 1) { ginode(idesc->id_number, &ip); DIP_SET(ip.i_dp, di_size, DIP(ip.i_dp, di_size) - isize); isize = 0; printf( "YOU MUST RERUN FSCK AFTERWARDS\n"); rerun = 1; inodirty(&ip); brelse(bp); return(STOP); } } } isize -= sizepb; } brelse(bp); return (KEEPON); } /* * Finds the disk block address at the specified lbn within the inode * specified by dp. This follows the whole tree and honors di_size and * di_extsize so it is a true test of reachability. The lbn may be * negative if an extattr or indirect block is requested. */ ufs2_daddr_t ino_blkatoff(union dinode *dp, ino_t ino, ufs_lbn_t lbn, int *frags, struct bufarea **bpp) { ufs_lbn_t tmpval; ufs_lbn_t cur; ufs_lbn_t next; int i; *frags = 0; if (bpp != NULL) *bpp = NULL; /* * Handle extattr blocks first. */ if (lbn < 0 && lbn >= -UFS_NXADDR) { lbn = -1 - lbn; if (lbn > lblkno(&sblock, dp->dp2.di_extsize - 1)) return (0); *frags = numfrags(&sblock, sblksize(&sblock, dp->dp2.di_extsize, lbn)); return (dp->dp2.di_extb[lbn]); } /* * Now direct and indirect. */ if (DIP(dp, di_mode) == IFLNK && DIP(dp, di_size) < sblock.fs_maxsymlinklen) return (0); if (lbn >= 0 && lbn < UFS_NDADDR) { *frags = numfrags(&sblock, sblksize(&sblock, DIP(dp, di_size), lbn)); return (DIP(dp, di_db[lbn])); } *frags = sblock.fs_frag; for (i = 0, tmpval = NINDIR(&sblock), cur = UFS_NDADDR; i < UFS_NIADDR; i++, tmpval *= NINDIR(&sblock), cur = next) { next = cur + tmpval; if (lbn == -cur - i) return (DIP(dp, di_ib[i])); /* * Determine whether the lbn in question is within this tree. */ if (lbn < 0 && -lbn >= next) continue; if (lbn > 0 && lbn >= next) continue; if (DIP(dp, di_ib[i]) == 0) return (0); return (indir_blkatoff(DIP(dp, di_ib[i]), ino, -cur - i, lbn, bpp)); } pfatal("lbn %jd not in ino %ju\n", lbn, (uintmax_t)ino); return (0); } /* * Fetch an indirect block to find the block at a given lbn. The lbn * may be negative to fetch a specific indirect block pointer or positive * to fetch a specific block. */ static ufs2_daddr_t indir_blkatoff(ufs2_daddr_t blk, ino_t ino, ufs_lbn_t cur, ufs_lbn_t lbn, struct bufarea **bpp) { struct bufarea *bp; ufs_lbn_t lbnadd; ufs_lbn_t base; int i, level; level = lbn_level(cur); if (level == -1) pfatal("Invalid indir lbn %jd in ino %ju\n", lbn, (uintmax_t)ino); if (level == 0 && lbn < 0) pfatal("Invalid lbn %jd in ino %ju\n", lbn, (uintmax_t)ino); lbnadd = 1; base = -(cur + level); for (i = level; i > 0; i--) lbnadd *= NINDIR(&sblock); if (lbn > 0) i = (lbn - base) / lbnadd; else i = (-lbn - base) / lbnadd; if (i < 0 || i >= NINDIR(&sblock)) { pfatal("Invalid indirect index %d produced by lbn %jd " "in ino %ju\n", i, lbn, (uintmax_t)ino); return (0); } if (level == 0) cur = base + (i * lbnadd); else cur = -(base + (i * lbnadd)) - (level - 1); bp = getdatablk(blk, sblock.fs_bsize, BT_LEVEL1 + level); if (bp->b_errs != 0) return (0); blk = IBLK(bp, i); bp->b_index = i; if (cur == lbn || blk == 0) { if (bpp != NULL) *bpp = bp; else brelse(bp); return (blk); } brelse(bp); if (level == 0) pfatal("Invalid lbn %jd at level 0 for ino %ju\n", lbn, (uintmax_t)ino); return (indir_blkatoff(blk, ino, cur, lbn, bpp)); } /* * Check that a block in a legal block number. * Return 0 if in range, 1 if out of range. */ int chkrange(ufs2_daddr_t blk, int cnt) { int c; if (cnt <= 0 || blk <= 0 || blk > maxfsblock || cnt - 1 > maxfsblock - blk) { if (debug) printf("out of range: blk %ld, offset %i, size %d\n", (long)blk, (int)fragnum(&sblock, blk), cnt); return (1); } if (cnt > sblock.fs_frag || fragnum(&sblock, blk) + cnt > sblock.fs_frag) { if (debug) printf("bad size: blk %ld, offset %i, size %d\n", (long)blk, (int)fragnum(&sblock, blk), cnt); return (1); } c = dtog(&sblock, blk); if (blk < cgdmin(&sblock, c)) { if ((blk + cnt) > cgsblock(&sblock, c)) { if (debug) { printf("blk %ld < cgdmin %ld;", (long)blk, (long)cgdmin(&sblock, c)); printf(" blk + cnt %ld > cgsbase %ld\n", (long)(blk + cnt), (long)cgsblock(&sblock, c)); } return (1); } } else { if ((blk + cnt) > cgbase(&sblock, c+1)) { if (debug) { printf("blk %ld >= cgdmin %ld;", (long)blk, (long)cgdmin(&sblock, c)); printf(" blk + cnt %ld > sblock.fs_fpg %ld\n", (long)(blk + cnt), (long)sblock.fs_fpg); } return (1); } } return (0); } /* * General purpose interface for reading inodes. * * firstinum and lastinum track contents of getnextino() cache (below). */ static ino_t firstinum, lastinum; static struct bufarea inobuf; void ginode(ino_t inumber, struct inode *ip) { ufs2_daddr_t iblk; if (inumber < UFS_ROOTINO || inumber > maxino) errx(EEXIT, "bad inode number %ju to ginode", (uintmax_t)inumber); ip->i_number = inumber; if (inumber >= firstinum && inumber < lastinum) { /* contents in getnextino() cache */ ip->i_bp = &inobuf; inobuf.b_refcnt++; inobuf.b_index = firstinum; } else if (icachebp != NULL && inumber >= icachebp->b_index && inumber < icachebp->b_index + INOPB(&sblock)) { /* take an additional reference for the returned inode */ icachebp->b_refcnt++; ip->i_bp = icachebp; } else { iblk = ino_to_fsba(&sblock, inumber); /* release our cache-hold reference on old icachebp */ if (icachebp != NULL) brelse(icachebp); icachebp = getdatablk(iblk, sblock.fs_bsize, BT_INODES); if (icachebp->b_errs != 0) { icachebp = NULL; ip->i_bp = NULL; ip->i_dp = &zino; return; } /* take a cache-hold reference on new icachebp */ icachebp->b_refcnt++; icachebp->b_index = rounddown(inumber, INOPB(&sblock)); ip->i_bp = icachebp; } if (sblock.fs_magic == FS_UFS1_MAGIC) { ip->i_dp = (union dinode *) &ip->i_bp->b_un.b_dinode1[inumber - ip->i_bp->b_index]; return; } ip->i_dp = (union dinode *) &ip->i_bp->b_un.b_dinode2[inumber - ip->i_bp->b_index]; if (ffs_verify_dinode_ckhash(&sblock, (struct ufs2_dinode *)ip->i_dp)) { pwarn("INODE CHECK-HASH FAILED"); prtinode(ip); if (preen || reply("FIX") != 0) { if (preen) printf(" (FIXED)\n"); ffs_update_dinode_ckhash(&sblock, (struct ufs2_dinode *)ip->i_dp); inodirty(ip); } } } /* * Release a held inode. */ void irelse(struct inode *ip) { /* Check for failed inode read */ if (ip->i_bp == NULL) return; if (ip->i_bp->b_refcnt <= 0) pfatal("irelse: releasing unreferenced ino %ju\n", (uintmax_t) ip->i_number); brelse(ip->i_bp); } /* * Special purpose version of ginode used to optimize first pass * over all the inodes in numerical order. */ static ino_t nextinum, lastvalidinum; static long readcount, readpercg, fullcnt, inobufsize, partialcnt, partialsize; union dinode * getnextinode(ino_t inumber, int rebuildcg) { int j; long size; mode_t mode; ufs2_daddr_t ndb, blk; union dinode *dp; struct inode ip; static caddr_t nextinop; if (inumber != nextinum++ || inumber > lastvalidinum) errx(EEXIT, "bad inode number %ju to nextinode", (uintmax_t)inumber); if (inumber >= lastinum) { readcount++; firstinum = lastinum; blk = ino_to_fsba(&sblock, lastinum); if (readcount % readpercg == 0) { size = partialsize; lastinum += partialcnt; } else { size = inobufsize; lastinum += fullcnt; } /* * Flush old contents in case they have been updated. * If getblk encounters an error, it will already have zeroed * out the buffer, so we do not need to do so here. */ if (inobuf.b_refcnt != 0) pfatal("Non-zero getnextinode() ref count %d\n", inobuf.b_refcnt); flush(fswritefd, &inobuf); getblk(&inobuf, blk, size); nextinop = inobuf.b_un.b_buf; } dp = (union dinode *)nextinop; if (sblock.fs_magic == FS_UFS1_MAGIC) nextinop += sizeof(struct ufs1_dinode); else nextinop += sizeof(struct ufs2_dinode); if ((ckhashadd & CK_INODE) != 0) { ffs_update_dinode_ckhash(&sblock, (struct ufs2_dinode *)dp); dirty(&inobuf); } if (ffs_verify_dinode_ckhash(&sblock, (struct ufs2_dinode *)dp) != 0) { pwarn("INODE CHECK-HASH FAILED"); ip.i_bp = NULL; ip.i_dp = dp; ip.i_number = inumber; prtinode(&ip); if (preen || reply("FIX") != 0) { if (preen) printf(" (FIXED)\n"); ffs_update_dinode_ckhash(&sblock, (struct ufs2_dinode *)dp); dirty(&inobuf); } } if (rebuildcg && (char *)dp == inobuf.b_un.b_buf) { /* * Try to determine if we have reached the end of the * allocated inodes. */ mode = DIP(dp, di_mode) & IFMT; if (mode == 0) { if (memcmp(dp->dp2.di_db, zino.dp2.di_db, UFS_NDADDR * sizeof(ufs2_daddr_t)) || memcmp(dp->dp2.di_ib, zino.dp2.di_ib, UFS_NIADDR * sizeof(ufs2_daddr_t)) || dp->dp2.di_mode || dp->dp2.di_size) return (NULL); return (dp); } if (!ftypeok(dp)) return (NULL); ndb = howmany(DIP(dp, di_size), sblock.fs_bsize); if (ndb < 0) return (NULL); if (mode == IFBLK || mode == IFCHR) ndb++; if (mode == IFLNK) { /* * Fake ndb value so direct/indirect block checks below * will detect any garbage after symlink string. */ if (DIP(dp, di_size) < (off_t)sblock.fs_maxsymlinklen) { ndb = howmany(DIP(dp, di_size), sizeof(ufs2_daddr_t)); if (ndb > UFS_NDADDR) { j = ndb - UFS_NDADDR; for (ndb = 1; j > 1; j--) ndb *= NINDIR(&sblock); ndb += UFS_NDADDR; } } } for (j = ndb; ndb < UFS_NDADDR && j < UFS_NDADDR; j++) if (DIP(dp, di_db[j]) != 0) return (NULL); for (j = 0, ndb -= UFS_NDADDR; ndb > 0; j++) ndb /= NINDIR(&sblock); for (; j < UFS_NIADDR; j++) if (DIP(dp, di_ib[j]) != 0) return (NULL); } return (dp); } void setinodebuf(int cg, ino_t inosused) { ino_t inum; inum = cg * sblock.fs_ipg; lastvalidinum = inum + inosused - 1; nextinum = inum; lastinum = inum; readcount = 0; /* Flush old contents in case they have been updated */ flush(fswritefd, &inobuf); inobuf.b_bno = 0; if (inobuf.b_un.b_buf == NULL) { inobufsize = blkroundup(&sblock, MAX(INOBUFSIZE, sblock.fs_bsize)); initbarea(&inobuf, BT_INODES); if ((inobuf.b_un.b_buf = Malloc((unsigned)inobufsize)) == NULL) errx(EEXIT, "cannot allocate space for inode buffer"); } fullcnt = inobufsize / ((sblock.fs_magic == FS_UFS1_MAGIC) ? sizeof(struct ufs1_dinode) : sizeof(struct ufs2_dinode)); readpercg = inosused / fullcnt; partialcnt = inosused % fullcnt; partialsize = fragroundup(&sblock, partialcnt * ((sblock.fs_magic == FS_UFS1_MAGIC) ? sizeof(struct ufs1_dinode) : sizeof(struct ufs2_dinode))); if (partialcnt != 0) { readpercg++; } else { partialcnt = fullcnt; partialsize = inobufsize; } } int freeblock(struct inodesc *idesc) { struct dups *dlp; struct bufarea *cgbp; struct cg *cgp; ufs2_daddr_t blkno; long size, nfrags; blkno = idesc->id_blkno; if (idesc->id_type == SNAP) { pfatal("clearing a snapshot dinode\n"); return (STOP); } size = lfragtosize(&sblock, idesc->id_numfrags); if (snapblkfree(&sblock, blkno, size, idesc->id_number, std_checkblkavail)) return (KEEPON); for (nfrags = idesc->id_numfrags; nfrags > 0; blkno++, nfrags--) { if (chkrange(blkno, 1)) { return (SKIP); } else if (testbmap(blkno)) { for (dlp = duplist; dlp; dlp = dlp->next) { if (dlp->dup != blkno) continue; dlp->dup = duplist->dup; dlp = duplist; duplist = duplist->next; free((char *)dlp); break; } if (dlp == NULL) { clrbmap(blkno); n_blks--; } } } /* * If all successfully returned, account for them. */ if (nfrags == 0) { cgbp = cglookup(dtog(&sblock, idesc->id_blkno)); cgp = cgbp->b_un.b_cg; if (idesc->id_numfrags == sblock.fs_frag) cgp->cg_cs.cs_nbfree++; else cgp->cg_cs.cs_nffree += idesc->id_numfrags; cgdirty(cgbp); } return (KEEPON); } /* * Prepare a snapshot file for being removed. */ void snapremove(ino_t inum) { struct inodesc idesc; struct inode ip; int i; for (i = 0; i < snapcnt; i++) if (snaplist[i].i_number == inum) break; if (i == snapcnt) ginode(inum, &ip); else ip = snaplist[i]; if ((DIP(ip.i_dp, di_flags) & SF_SNAPSHOT) == 0) { printf("snapremove: inode %jd is not a snapshot\n", (intmax_t)inum); if (i == snapcnt) irelse(&ip); return; } if (debug) printf("snapremove: remove %sactive snapshot %jd\n", i == snapcnt ? "in" : "", (intmax_t)inum); /* * If on active snapshot list, remove it. */ if (i < snapcnt) { for (i++; i < FSMAXSNAP; i++) { if (sblock.fs_snapinum[i] == 0) break; snaplist[i - 1] = snaplist[i]; sblock.fs_snapinum[i - 1] = sblock.fs_snapinum[i]; } sblock.fs_snapinum[i - 1] = 0; bzero(&snaplist[i - 1], sizeof(struct inode)); snapcnt--; } + memset(&idesc, 0, sizeof(struct inodesc)); idesc.id_type = SNAP; idesc.id_func = snapclean; idesc.id_number = inum; (void)ckinode(ip.i_dp, &idesc); DIP_SET(ip.i_dp, di_flags, DIP(ip.i_dp, di_flags) & ~SF_SNAPSHOT); inodirty(&ip); irelse(&ip); } static int snapclean(struct inodesc *idesc) { ufs2_daddr_t blkno; struct bufarea *bp; union dinode *dp; blkno = idesc->id_blkno; if (blkno == 0) return (KEEPON); - bp = idesc->id_bp; dp = idesc->id_dp; if (blkno == BLK_NOCOPY || blkno == BLK_SNAP) { - if (idesc->id_lbn < UFS_NDADDR) + if (idesc->id_lbn < UFS_NDADDR) { DIP_SET(dp, di_db[idesc->id_lbn], 0); - else + } else { + bp = idesc->id_bp; IBLK_SET(bp, bp->b_index, 0); - dirty(bp); + dirty(bp); + } } return (KEEPON); } /* * Notification that a block is being freed. Return zero if the free * should be allowed to proceed. Return non-zero if the snapshot file * wants to claim the block. The block will be claimed if it is an * uncopied part of one of the snapshots. It will be freed if it is * either a BLK_NOCOPY or has already been copied in all of the snapshots. * If a fragment is being freed, then all snapshots that care about * it must make a copy since a snapshot file can only claim full sized * blocks. Note that if more than one snapshot file maps the block, * we can pick one at random to claim it. Since none of the snapshots * can change, we are assurred that they will all see the same unmodified * image. When deleting a snapshot file (see ino_trunc above), we * must push any of these claimed blocks to one of the other snapshots * that maps it. These claimed blocks are easily identified as they will * have a block number equal to their logical block number within the * snapshot. A copied block can never have this property because they * must always have been allocated from a BLK_NOCOPY location. */ int snapblkfree(struct fs *fs, ufs2_daddr_t bno, long size, ino_t inum, ufs2_daddr_t (*checkblkavail)(ufs2_daddr_t blkno, long frags)) { union dinode *dp; struct inode ip; struct bufarea *snapbp; ufs_lbn_t lbn; ufs2_daddr_t blkno, relblkno; int i, frags, claimedblk, copydone; /* If no snapshots, nothing to do */ if (snapcnt == 0) return (0); if (debug) printf("snapblkfree: in ino %jd free blkno %jd, size %jd\n", (intmax_t)inum, (intmax_t)bno, (intmax_t)size); relblkno = blknum(fs, bno); lbn = fragstoblks(fs, relblkno); /* Direct blocks are always pre-copied */ if (lbn < UFS_NDADDR) return (0); copydone = 0; claimedblk = 0; for (i = 0; i < snapcnt; i++) { /* * Lookup block being freed. */ ip = snaplist[i]; dp = ip.i_dp; blkno = ino_blkatoff(dp, inum != 0 ? inum : ip.i_number, lbn, &frags, &snapbp); /* * Check to see if block needs to be copied. */ if (blkno == 0) { /* * A block that we map is being freed. If it has not * been claimed yet, we will claim or copy it (below). */ claimedblk = 1; } else if (blkno == BLK_SNAP) { /* * No previous snapshot claimed the block, * so it will be freed and become a BLK_NOCOPY * (don't care) for us. */ if (claimedblk) pfatal("snapblkfree: inconsistent block type"); IBLK_SET(snapbp, snapbp->b_index, BLK_NOCOPY); dirty(snapbp); brelse(snapbp); continue; } else /* BLK_NOCOPY or default */ { /* * If the snapshot has already copied the block * (default), or does not care about the block, * it is not needed. */ brelse(snapbp); continue; } /* * If this is a full size block, we will just grab it * and assign it to the snapshot inode. Otherwise we * will proceed to copy it. See explanation for this * routine as to why only a single snapshot needs to * claim this block. */ if (size == fs->fs_bsize) { if (debug) printf("Grabonremove snapshot %ju lbn %jd " "from inum %ju\n", (intmax_t)ip.i_number, (intmax_t)lbn, (uintmax_t)inum); IBLK_SET(snapbp, snapbp->b_index, relblkno); dirty(snapbp); brelse(snapbp); DIP_SET(dp, di_blocks, DIP(dp, di_blocks) + btodb(size)); inodirty(&ip); return (1); } /* First time through, read the contents of the old block. */ if (copydone == 0) { copydone = 1; if (blread(fsreadfd, copybuf, fsbtodb(fs, relblkno), fs->fs_bsize) != 0) { pfatal("Could not read snapshot %ju block " "%jd\n", (intmax_t)ip.i_number, (intmax_t)relblkno); continue; } } /* * This allocation will never require any additional * allocations for the snapshot inode. */ blkno = allocblk(dtog(fs, relblkno), fs->fs_frag, checkblkavail); if (blkno == 0) { pfatal("Could not allocate block for snapshot %ju\n", (intmax_t)ip.i_number); continue; } if (debug) printf("Copyonremove: snapino %jd lbn %jd for inum %ju " "size %ld new blkno %jd\n", (intmax_t)ip.i_number, (intmax_t)lbn, (uintmax_t)inum, size, (intmax_t)blkno); blwrite(fswritefd, copybuf, fsbtodb(fs, blkno), fs->fs_bsize); IBLK_SET(snapbp, snapbp->b_index, blkno); dirty(snapbp); brelse(snapbp); DIP_SET(dp, di_blocks, DIP(dp, di_blocks) + btodb(fs->fs_bsize)); inodirty(&ip); } return (0); } /* * Notification that a block is being written. Return if the block * is part of a snapshot as snapshots never track other snapshots. * The block will be copied in all of the snapshots that are tracking * it and have not yet copied it. Some buffers may hold more than one * block. Here we need to check each block in the buffer. */ void copyonwrite(struct fs *fs, struct bufarea *bp, ufs2_daddr_t (*checkblkavail)(ufs2_daddr_t blkno, long frags)) { ufs2_daddr_t copyblkno; long i, numblks; /* If no snapshots, nothing to do. */ if (snapcnt == 0) return; numblks = blkroundup(fs, bp->b_size) / fs->fs_bsize; if (debug) prtbuf(bp, "copyonwrite: checking %jd block%s in buffer", (intmax_t)numblks, numblks > 1 ? "s" : ""); copyblkno = blknum(fs, dbtofsb(fs, bp->b_bno)); for (i = 0; i < numblks; i++) { chkcopyonwrite(fs, copyblkno, checkblkavail); copyblkno += fs->fs_frag; } } static void chkcopyonwrite(struct fs *fs, ufs2_daddr_t copyblkno, ufs2_daddr_t (*checkblkavail)(ufs2_daddr_t blkno, long frags)) { struct inode ip; union dinode *dp; struct bufarea *snapbp; ufs2_daddr_t blkno; int i, frags, copydone; ufs_lbn_t lbn; lbn = fragstoblks(fs, copyblkno); /* Direct blocks are always pre-copied */ if (lbn < UFS_NDADDR) return; copydone = 0; for (i = 0; i < snapcnt; i++) { /* * Lookup block being freed. */ ip = snaplist[i]; dp = ip.i_dp; blkno = ino_blkatoff(dp, ip.i_number, lbn, &frags, &snapbp); /* * Check to see if block needs to be copied. */ if (blkno != 0) { /* * A block that we have already copied or don't track. */ brelse(snapbp); continue; } /* First time through, read the contents of the old block. */ if (copydone == 0) { copydone = 1; if (blread(fsreadfd, copybuf, fsbtodb(fs, copyblkno), fs->fs_bsize) != 0) { pfatal("Could not read snapshot %ju block " "%jd\n", (intmax_t)ip.i_number, (intmax_t)copyblkno); continue; } } /* * This allocation will never require any additional * allocations for the snapshot inode. */ if ((blkno = allocblk(dtog(fs, copyblkno), fs->fs_frag, checkblkavail)) == 0) { pfatal("Could not allocate block for snapshot %ju\n", (intmax_t)ip.i_number); continue; } if (debug) prtbuf(snapbp, "Copyonwrite: snapino %jd lbn %jd using " "blkno %ju setting in buffer", (intmax_t)ip.i_number, (intmax_t)lbn, (intmax_t)blkno); blwrite(fswritefd, copybuf, fsbtodb(fs, blkno), fs->fs_bsize); IBLK_SET(snapbp, snapbp->b_index, blkno); dirty(snapbp); brelse(snapbp); DIP_SET(dp, di_blocks, DIP(dp, di_blocks) + btodb(fs->fs_bsize)); inodirty(&ip); } return; } /* * Traverse an inode and check that its block count is correct * fixing it if necessary. */ void check_blkcnt(struct inode *ip) { struct inodesc idesc; union dinode *dp; ufs2_daddr_t ndb; int j, ret, offset; dp = ip->i_dp; memset(&idesc, 0, sizeof(struct inodesc)); idesc.id_func = pass1check; idesc.id_number = ip->i_number; idesc.id_type = (DIP(dp, di_flags) & SF_SNAPSHOT) == 0 ? ADDR : SNAP; (void)ckinode(dp, &idesc); if (sblock.fs_magic == FS_UFS2_MAGIC && dp->dp2.di_extsize > 0) { ndb = howmany(dp->dp2.di_extsize, sblock.fs_bsize); for (j = 0; j < UFS_NXADDR; j++) { if (--ndb == 0 && (offset = blkoff(&sblock, dp->dp2.di_extsize)) != 0) idesc.id_numfrags = numfrags(&sblock, fragroundup(&sblock, offset)); else idesc.id_numfrags = sblock.fs_frag; if (dp->dp2.di_extb[j] == 0) continue; idesc.id_blkno = dp->dp2.di_extb[j]; ret = (*idesc.id_func)(&idesc); if (ret & STOP) break; } } idesc.id_entryno *= btodb(sblock.fs_fsize); if (DIP(dp, di_blocks) != idesc.id_entryno) { if (!(sujrecovery && preen)) { pwarn("INCORRECT BLOCK COUNT I=%lu (%ju should be %ju)", (u_long)idesc.id_number, (uintmax_t)DIP(dp, di_blocks), (uintmax_t)idesc.id_entryno); if (preen) printf(" (CORRECTED)\n"); else if (reply("CORRECT") == 0) return; } if (bkgrdflag == 0) { DIP_SET(dp, di_blocks, idesc.id_entryno); inodirty(ip); } else { cmd.value = idesc.id_number; cmd.size = idesc.id_entryno - DIP(dp, di_blocks); if (debug) printf("adjblkcnt ino %ju amount %lld\n", (uintmax_t)cmd.value, (long long)cmd.size); if (sysctl(adjblkcnt, MIBSIZE, 0, 0, &cmd, sizeof cmd) == -1) rwerror("ADJUST INODE BLOCK COUNT", cmd.value); } } } void freeinodebuf(void) { struct bufarea *bp; int i; /* * Flush old contents in case they have been updated. */ flush(fswritefd, &inobuf); if (inobuf.b_un.b_buf != NULL) free((char *)inobuf.b_un.b_buf); inobuf.b_un.b_buf = NULL; firstinum = lastinum = 0; /* * Reload the snapshot inodes in case any of them changed. */ for (i = 0; i < snapcnt; i++) { bp = snaplist[i].i_bp; bp->b_errs = blread(fsreadfd, bp->b_un.b_buf, bp->b_bno, bp->b_size); } } /* * Routines to maintain information about directory inodes. * This is built during the first pass and used during the * second and third passes. * * Enter inodes into the cache. */ struct inoinfo * cacheino(union dinode *dp, ino_t inumber) { struct inoinfo *inp; int i, blks; if (getinoinfo(inumber) != NULL) pfatal("cacheino: duplicate entry for ino %jd\n", (intmax_t)inumber); if (howmany(DIP(dp, di_size), sblock.fs_bsize) > UFS_NDADDR) blks = UFS_NDADDR + UFS_NIADDR; else if (DIP(dp, di_size) > 0) blks = howmany(DIP(dp, di_size), sblock.fs_bsize); else blks = 1; inp = (struct inoinfo *) Malloc(sizeof(*inp) + (blks - 1) * sizeof(ufs2_daddr_t)); if (inp == NULL) errx(EEXIT, "cannot increase directory list"); SLIST_INSERT_HEAD(&inphash[inumber % dirhash], inp, i_hash); inp->i_flags = 0; inp->i_parent = inumber == UFS_ROOTINO ? UFS_ROOTINO : (ino_t)0; inp->i_dotdot = (ino_t)0; inp->i_number = inumber; inp->i_isize = DIP(dp, di_size); inp->i_depth = DIP(dp, di_dirdepth); inp->i_numblks = blks; for (i = 0; i < MIN(blks, UFS_NDADDR); i++) inp->i_blks[i] = DIP(dp, di_db[i]); if (blks > UFS_NDADDR) for (i = 0; i < UFS_NIADDR; i++) inp->i_blks[UFS_NDADDR + i] = DIP(dp, di_ib[i]); if (inplast == listmax) { listmax += 100; inpsort = (struct inoinfo **)reallocarray((char *)inpsort, listmax, sizeof(struct inoinfo *)); if (inpsort == NULL) errx(EEXIT, "cannot increase directory list"); } inpsort[inplast++] = inp; return (inp); } /* * Look up an inode cache structure. */ struct inoinfo * getinoinfo(ino_t inumber) { struct inoinfo *inp; SLIST_FOREACH(inp, &inphash[inumber % dirhash], i_hash) { if (inp->i_number != inumber) continue; return (inp); } return (NULL); } /* * Remove an entry from the inode cache and disk-order sorted list. * Return 0 on success and 1 on failure. */ int removecachedino(ino_t inumber) { struct inoinfo *inp, **inpp; char *listtype; listtype = "hash"; SLIST_FOREACH(inp, &inphash[inumber % dirhash], i_hash) { if (inp->i_number != inumber) continue; SLIST_REMOVE(&inphash[inumber % dirhash], inp, inoinfo, i_hash); for (inpp = &inpsort[inplast - 1]; inpp >= inpsort; inpp--) { if (*inpp != inp) continue; *inpp = inpsort[inplast - 1]; inplast--; free(inp); return (0); } listtype = "sort"; break; } pfatal("removecachedino: entry for ino %jd not found on %s list\n", (intmax_t)inumber, listtype); return (1); } /* * Clean up all the inode cache structure. */ void inocleanup(void) { struct inoinfo **inpp; if (inphash == NULL) return; for (inpp = &inpsort[inplast - 1]; inpp >= inpsort; inpp--) free((char *)(*inpp)); free((char *)inphash); inphash = NULL; free((char *)inpsort); inpsort = NULL; } void inodirty(struct inode *ip) { if (sblock.fs_magic == FS_UFS2_MAGIC) ffs_update_dinode_ckhash(&sblock, (struct ufs2_dinode *)ip->i_dp); dirty(ip->i_bp); } void clri(struct inodesc *idesc, const char *type, int flag) { union dinode *dp; struct inode ip; ginode(idesc->id_number, &ip); dp = ip.i_dp; if (flag == 1) { pwarn("%s %s", type, (DIP(dp, di_mode) & IFMT) == IFDIR ? "DIR" : "FILE"); prtinode(&ip); printf("\n"); } if (preen || reply("CLEAR") == 1) { if (preen) printf(" (CLEARED)\n"); n_files--; if (bkgrdflag == 0) { if (idesc->id_type == SNAP) { snapremove(idesc->id_number); idesc->id_type = ADDR; } (void)ckinode(dp, idesc); inoinfo(idesc->id_number)->ino_state = USTATE; clearinode(dp); inodirty(&ip); } else { cmd.value = idesc->id_number; cmd.size = -DIP(dp, di_nlink); if (debug) printf("adjrefcnt ino %ld amt %lld\n", (long)cmd.value, (long long)cmd.size); if (sysctl(adjrefcnt, MIBSIZE, 0, 0, &cmd, sizeof cmd) == -1) rwerror("ADJUST INODE", cmd.value); } } irelse(&ip); } int findname(struct inodesc *idesc) { struct direct *dirp = idesc->id_dirp; if (dirp->d_ino != idesc->id_parent || idesc->id_entryno < 2) { idesc->id_entryno++; return (KEEPON); } memmove(idesc->id_name, dirp->d_name, (size_t)dirp->d_namlen + 1); return (STOP|FOUND); } int findino(struct inodesc *idesc) { struct direct *dirp = idesc->id_dirp; if (dirp->d_ino == 0) return (KEEPON); if (strcmp(dirp->d_name, idesc->id_name) == 0 && dirp->d_ino >= UFS_ROOTINO && dirp->d_ino <= maxino) { idesc->id_parent = dirp->d_ino; return (STOP|FOUND); } return (KEEPON); } int clearentry(struct inodesc *idesc) { struct direct *dirp = idesc->id_dirp; if (dirp->d_ino != idesc->id_parent || idesc->id_entryno < 2) { idesc->id_entryno++; return (KEEPON); } dirp->d_ino = 0; return (STOP|FOUND|ALTERED); } void prtinode(struct inode *ip) { char *p; union dinode *dp; struct passwd *pw; time_t t; dp = ip->i_dp; printf(" I=%lu ", (u_long)ip->i_number); if (ip->i_number < UFS_ROOTINO || ip->i_number > maxino) return; printf(" OWNER="); if ((pw = getpwuid((int)DIP(dp, di_uid))) != NULL) printf("%s ", pw->pw_name); else printf("%u ", (unsigned)DIP(dp, di_uid)); printf("MODE=%o\n", DIP(dp, di_mode)); if (preen) printf("%s: ", cdevname); printf("SIZE=%ju ", (uintmax_t)DIP(dp, di_size)); t = DIP(dp, di_mtime); if ((p = ctime(&t)) != NULL) printf("MTIME=%12.12s %4.4s ", &p[4], &p[20]); } void blkerror(ino_t ino, const char *type, ufs2_daddr_t blk) { pfatal("%jd %s I=%ju", (intmax_t)blk, type, (uintmax_t)ino); printf("\n"); switch (inoinfo(ino)->ino_state) { case FSTATE: case FZLINK: inoinfo(ino)->ino_state = FCLEAR; return; case DSTATE: case DZLINK: inoinfo(ino)->ino_state = DCLEAR; return; case FCLEAR: case DCLEAR: return; default: errx(EEXIT, "BAD STATE %d TO BLKERR", inoinfo(ino)->ino_state); /* NOTREACHED */ } } /* * allocate an unused inode */ ino_t allocino(ino_t request, int type) { ino_t ino; struct inode ip; union dinode *dp; struct bufarea *cgbp; struct cg *cgp; int cg, anyino; anyino = 0; if (request == 0) { request = UFS_ROOTINO; anyino = 1; } else if (inoinfo(request)->ino_state != USTATE) return (0); retry: for (ino = request; ino < maxino; ino++) if (inoinfo(ino)->ino_state == USTATE) break; if (ino >= maxino) return (0); cg = ino_to_cg(&sblock, ino); cgbp = cglookup(cg); cgp = cgbp->b_un.b_cg; if (!check_cgmagic(cg, cgbp, 0)) { if (anyino == 0) return (0); request = (cg + 1) * sblock.fs_ipg; goto retry; } setbit(cg_inosused(cgp), ino % sblock.fs_ipg); cgp->cg_cs.cs_nifree--; switch (type & IFMT) { case IFDIR: inoinfo(ino)->ino_state = DSTATE; cgp->cg_cs.cs_ndir++; break; case IFREG: case IFLNK: inoinfo(ino)->ino_state = FSTATE; break; default: return (0); } cgdirty(cgbp); ginode(ino, &ip); dp = ip.i_dp; DIP_SET(dp, di_db[0], allocblk(ino_to_cg(&sblock, ino), (long)1, std_checkblkavail)); if (DIP(dp, di_db[0]) == 0) { inoinfo(ino)->ino_state = USTATE; irelse(&ip); return (0); } DIP_SET(dp, di_mode, type); DIP_SET(dp, di_flags, 0); DIP_SET(dp, di_atime, time(NULL)); DIP_SET(dp, di_ctime, DIP(dp, di_atime)); DIP_SET(dp, di_mtime, DIP(dp, di_ctime)); DIP_SET(dp, di_mtimensec, 0); DIP_SET(dp, di_ctimensec, 0); DIP_SET(dp, di_atimensec, 0); DIP_SET(dp, di_size, sblock.fs_fsize); DIP_SET(dp, di_blocks, btodb(sblock.fs_fsize)); n_files++; inodirty(&ip); irelse(&ip); inoinfo(ino)->ino_type = IFTODT(type); return (ino); } /* * deallocate an inode */ void freeino(ino_t ino) { struct inodesc idesc; union dinode *dp; struct inode ip; memset(&idesc, 0, sizeof(struct inodesc)); idesc.id_type = ADDR; idesc.id_func = freeblock; idesc.id_number = ino; ginode(ino, &ip); dp = ip.i_dp; (void)ckinode(dp, &idesc); clearinode(dp); inodirty(&ip); irelse(&ip); inoinfo(ino)->ino_state = USTATE; n_files--; }