Index: head/sbin/dump/main.c =================================================================== --- head/sbin/dump/main.c (revision 298871) +++ head/sbin/dump/main.c (revision 298872) @@ -1,778 +1,778 @@ /*- * Copyright (c) 1980, 1991, 1993, 1994 * The Regents of the University of California. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 4. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #ifndef lint static const char copyright[] = "@(#) Copyright (c) 1980, 1991, 1993, 1994\n\ The Regents of the University of California. All rights reserved.\n"; #endif /* not lint */ #ifndef lint #if 0 static char sccsid[] = "@(#)main.c 8.6 (Berkeley) 5/1/95"; #endif static const char rcsid[] = "$FreeBSD$"; #endif /* not lint */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include "dump.h" #include "pathnames.h" int notify = 0; /* notify operator flag */ int snapdump = 0; /* dumping live filesystem, so use snapshot */ int blockswritten = 0; /* number of blocks written on current tape */ int tapeno = 0; /* current tape number */ int density = 0; /* density in bytes/0.1" " <- this is for hilit19 */ int ntrec = NTREC; /* # tape blocks in each tape record */ int cartridge = 0; /* Assume non-cartridge tape */ int cachesize = 0; /* block cache size (in bytes), defaults to 0 */ long dev_bsize = 1; /* recalculated below */ long blocksperfile; /* output blocks per file */ char *host = NULL; /* remote host (if any) */ /* * Possible superblock locations ordered from most to least likely. */ static int sblock_try[] = SBLOCKSEARCH; static char *getmntpt(char *, int *); static long numarg(const char *, long, long); static void obsolete(int *, char **[]); static void usage(void) __dead2; int main(int argc, char *argv[]) { struct stat sb; ino_t ino; int dirty; union dinode *dp; struct fstab *dt; char *map, *mntpt; int ch, mode, mntflags; int i, anydirskipped, bflag = 0, Tflag = 0, honorlevel = 1; int just_estimate = 0; ino_t maxino; char *tmsg; spcl.c_date = _time_to_time64(time(NULL)); tsize = 0; /* Default later, based on 'c' option for cart tapes */ dumpdates = _PATH_DUMPDATES; popenout = NULL; tape = NULL; temp = _PATH_DTMP; if (TP_BSIZE / DEV_BSIZE == 0 || TP_BSIZE % DEV_BSIZE != 0) quit("TP_BSIZE must be a multiple of DEV_BSIZE\n"); level = 0; rsync_friendly = 0; if (argc < 2) usage(); obsolete(&argc, &argv); while ((ch = getopt(argc, argv, "0123456789aB:b:C:cD:d:f:h:LnP:RrSs:T:uWw")) != -1) switch (ch) { /* dump level */ case '0': case '1': case '2': case '3': case '4': case '5': case '6': case '7': case '8': case '9': level = 10 * level + ch - '0'; break; case 'a': /* `auto-size', Write to EOM. */ unlimited = 1; break; case 'B': /* blocks per output file */ blocksperfile = numarg("number of blocks per file", 1L, 0L); break; case 'b': /* blocks per tape write */ ntrec = numarg("number of blocks per write", 1L, 1000L); break; case 'C': cachesize = numarg("cachesize", 0, 0) * 1024 * 1024; break; case 'c': /* Tape is cart. not 9-track */ cartridge = 1; break; case 'D': dumpdates = optarg; break; case 'd': /* density, in bits per inch */ density = numarg("density", 10L, 327670L) / 10; if (density >= 625 && !bflag) ntrec = HIGHDENSITYTREC; break; case 'f': /* output file */ if (popenout != NULL) errx(X_STARTUP, "You cannot use the P and f " "flags together.\n"); tape = optarg; break; case 'h': honorlevel = numarg("honor level", 0L, 10L); break; case 'L': snapdump = 1; break; case 'n': /* notify operators */ notify = 1; break; case 'P': if (tape != NULL) errx(X_STARTUP, "You cannot use the P and f " "flags together.\n"); popenout = optarg; break; case 'r': /* store slightly less data to be friendly to rsync */ if (rsync_friendly < 1) rsync_friendly = 1; break; case 'R': /* store even less data to be friendlier to rsync */ if (rsync_friendly < 2) rsync_friendly = 2; break; case 'S': /* exit after estimating # of tapes */ just_estimate = 1; break; case 's': /* tape size, feet */ tsize = numarg("tape size", 1L, 0L) * 12 * 10; break; case 'T': /* time of last dump */ spcl.c_ddate = unctime(optarg); if (spcl.c_ddate < 0) { (void)fprintf(stderr, "bad time \"%s\"\n", optarg); exit(X_STARTUP); } Tflag = 1; lastlevel = -1; break; case 'u': /* update /etc/dumpdates */ uflag = 1; break; case 'W': /* what to do */ case 'w': lastdump(ch); exit(X_FINOK); /* do nothing else */ default: usage(); } argc -= optind; argv += optind; if (argc < 1) { (void)fprintf(stderr, "Must specify disk or file system\n"); exit(X_STARTUP); } disk = *argv++; argc--; if (argc >= 1) { (void)fprintf(stderr, "Unknown arguments to dump:"); while (argc--) (void)fprintf(stderr, " %s", *argv++); (void)fprintf(stderr, "\n"); exit(X_STARTUP); } if (rsync_friendly && (level > 0)) { (void)fprintf(stderr, "%s %s\n", "rsync friendly options", "can be used only with level 0 dumps."); exit(X_STARTUP); } if (Tflag && uflag) { (void)fprintf(stderr, "You cannot use the T and u flags together.\n"); exit(X_STARTUP); } if (popenout) { tape = "child pipeline process"; } else if (tape == NULL && (tape = getenv("TAPE")) == NULL) tape = _PATH_DEFTAPE; if (strcmp(tape, "-") == 0) { pipeout++; tape = "standard output"; } if (blocksperfile) - blocksperfile = blocksperfile / ntrec * ntrec; /* round down */ + blocksperfile = rounddown(blocksperfile, ntrec); else if (!unlimited) { /* * Determine how to default tape size and density * * density tape size * 9-track 1600 bpi (160 bytes/.1") 2300 ft. * 9-track 6250 bpi (625 bytes/.1") 2300 ft. * cartridge 8000 bpi (100 bytes/.1") 1700 ft. * (450*4 - slop) * hilit19 hits again: " */ if (density == 0) density = cartridge ? 100 : 160; if (tsize == 0) tsize = cartridge ? 1700L*120L : 2300L*120L; } if (strchr(tape, ':')) { host = tape; tape = strchr(host, ':'); *tape++ = '\0'; #ifdef RDUMP if (strchr(tape, '\n')) { (void)fprintf(stderr, "invalid characters in tape\n"); exit(X_STARTUP); } if (rmthost(host) == 0) exit(X_STARTUP); #else (void)fprintf(stderr, "remote dump not enabled\n"); exit(X_STARTUP); #endif } (void)setuid(getuid()); /* rmthost() is the only reason to be setuid */ if (signal(SIGHUP, SIG_IGN) != SIG_IGN) signal(SIGHUP, sig); if (signal(SIGTRAP, SIG_IGN) != SIG_IGN) signal(SIGTRAP, sig); if (signal(SIGFPE, SIG_IGN) != SIG_IGN) signal(SIGFPE, sig); if (signal(SIGBUS, SIG_IGN) != SIG_IGN) signal(SIGBUS, sig); if (signal(SIGSEGV, SIG_IGN) != SIG_IGN) signal(SIGSEGV, sig); if (signal(SIGTERM, SIG_IGN) != SIG_IGN) signal(SIGTERM, sig); if (signal(SIGINT, interrupt) == SIG_IGN) signal(SIGINT, SIG_IGN); dump_getfstab(); /* /etc/fstab snarfed */ /* * disk can be either the full special file name, * the suffix of the special file name, * the special name missing the leading '/', * the file system name with or without the leading '/'. */ dt = fstabsearch(disk); if (dt != NULL) { disk = rawname(dt->fs_spec); if (disk == NULL) errx(X_STARTUP, "%s: unknown file system", dt->fs_spec); (void)strncpy(spcl.c_dev, dt->fs_spec, NAMELEN); (void)strncpy(spcl.c_filesys, dt->fs_file, NAMELEN); } else { (void)strncpy(spcl.c_dev, disk, NAMELEN); (void)strncpy(spcl.c_filesys, "an unlisted file system", NAMELEN); } spcl.c_dev[NAMELEN-1]='\0'; spcl.c_filesys[NAMELEN-1]='\0'; if ((mntpt = getmntpt(disk, &mntflags)) != NULL) { if (mntflags & MNT_RDONLY) { if (snapdump != 0) { msg("WARNING: %s\n", "-L ignored for read-only filesystem."); snapdump = 0; } } else if (snapdump == 0) { msg("WARNING: %s\n", "should use -L when dumping live read-write " "filesystems!"); } else { char snapname[BUFSIZ], snapcmd[BUFSIZ]; snprintf(snapname, sizeof snapname, "%s/.snap", mntpt); if ((stat(snapname, &sb) < 0) || !S_ISDIR(sb.st_mode)) { msg("WARNING: %s %s\n", "-L requested but snapshot location", snapname); msg(" %s: %s\n", "is not a directory", "dump downgraded, -L ignored"); snapdump = 0; } else { snprintf(snapname, sizeof snapname, "%s/.snap/dump_snapshot", mntpt); snprintf(snapcmd, sizeof snapcmd, "%s %s %s", _PATH_MKSNAP_FFS, mntpt, snapname); unlink(snapname); if (system(snapcmd) != 0) errx(X_STARTUP, "Cannot create %s: %s\n", snapname, strerror(errno)); if ((diskfd = open(snapname, O_RDONLY)) < 0) { unlink(snapname); errx(X_STARTUP, "Cannot open %s: %s\n", snapname, strerror(errno)); } unlink(snapname); if (fstat(diskfd, &sb) != 0) err(X_STARTUP, "%s: stat", snapname); spcl.c_date = _time_to_time64(sb.st_mtime); } } } else if (snapdump != 0) { msg("WARNING: Cannot use -L on an unmounted filesystem.\n"); snapdump = 0; } if (snapdump == 0) { if ((diskfd = open(disk, O_RDONLY)) < 0) err(X_STARTUP, "Cannot open %s", disk); if (fstat(diskfd, &sb) != 0) err(X_STARTUP, "%s: stat", disk); if (S_ISDIR(sb.st_mode)) errx(X_STARTUP, "%s: unknown file system", disk); } (void)strcpy(spcl.c_label, "none"); (void)gethostname(spcl.c_host, NAMELEN); spcl.c_level = level; spcl.c_type = TS_TAPE; if (rsync_friendly) { /* don't store real dump times */ spcl.c_date = 0; spcl.c_ddate = 0; } if (spcl.c_date == 0) { tmsg = "the epoch\n"; } else { time_t t = _time64_to_time(spcl.c_date); tmsg = ctime(&t); } msg("Date of this level %d dump: %s", level, tmsg); if (!Tflag && (!rsync_friendly)) getdumptime(); /* /etc/dumpdates snarfed */ if (spcl.c_ddate == 0) { tmsg = "the epoch\n"; } else { time_t t = _time64_to_time(spcl.c_ddate); tmsg = ctime(&t); } if (lastlevel < 0) msg("Date of last (level unknown) dump: %s", tmsg); else msg("Date of last level %d dump: %s", lastlevel, tmsg); msg("Dumping %s%s ", snapdump ? "snapshot of ": "", disk); if (dt != NULL) msgtail("(%s) ", dt->fs_file); if (host) msgtail("to %s on host %s\n", tape, host); else msgtail("to %s\n", tape); sync(); sblock = (struct fs *)sblock_buf; for (i = 0; sblock_try[i] != -1; i++) { sblock->fs_fsize = SBLOCKSIZE; /* needed in bread */ bread(sblock_try[i] >> dev_bshift, (char *) sblock, SBLOCKSIZE); if ((sblock->fs_magic == FS_UFS1_MAGIC || (sblock->fs_magic == FS_UFS2_MAGIC && sblock->fs_sblockloc == sblock_try[i])) && sblock->fs_bsize <= MAXBSIZE && sblock->fs_bsize >= sizeof(struct fs)) break; } if (sblock_try[i] == -1) quit("Cannot find file system superblock\n"); dev_bsize = sblock->fs_fsize / fsbtodb(sblock, 1); dev_bshift = ffs(dev_bsize) - 1; if (dev_bsize != (1 << dev_bshift)) quit("dev_bsize (%ld) is not a power of 2", dev_bsize); tp_bshift = ffs(TP_BSIZE) - 1; if (TP_BSIZE != (1 << tp_bshift)) quit("TP_BSIZE (%d) is not a power of 2", TP_BSIZE); maxino = sblock->fs_ipg * sblock->fs_ncg; mapsize = roundup(howmany(maxino, CHAR_BIT), TP_BSIZE); usedinomap = (char *)calloc((unsigned) mapsize, sizeof(char)); dumpdirmap = (char *)calloc((unsigned) mapsize, sizeof(char)); dumpinomap = (char *)calloc((unsigned) mapsize, sizeof(char)); tapesize = 3 * (howmany(mapsize * sizeof(char), TP_BSIZE) + 1); nonodump = spcl.c_level < honorlevel; passno = 1; setproctitle("%s: pass 1: regular files", disk); msg("mapping (Pass I) [regular files]\n"); anydirskipped = mapfiles(maxino, &tapesize); passno = 2; setproctitle("%s: pass 2: directories", disk); msg("mapping (Pass II) [directories]\n"); while (anydirskipped) { anydirskipped = mapdirs(maxino, &tapesize); } if (pipeout || unlimited) { tapesize += 10; /* 10 trailer blocks */ msg("estimated %ld tape blocks.\n", tapesize); } else { double fetapes; if (blocksperfile) fetapes = (double) tapesize / blocksperfile; else if (cartridge) { /* Estimate number of tapes, assuming streaming stops at the end of each block written, and not in mid-block. Assume no erroneous blocks; this can be compensated for with an artificially low tape size. */ fetapes = ( (double) tapesize /* blocks */ * TP_BSIZE /* bytes/block */ * (1.0/density) /* 0.1" / byte " */ + (double) tapesize /* blocks */ * (1.0/ntrec) /* streaming-stops per block */ * 15.48 /* 0.1" / streaming-stop " */ ) * (1.0 / tsize ); /* tape / 0.1" " */ } else { /* Estimate number of tapes, for old fashioned 9-track tape */ int tenthsperirg = (density == 625) ? 3 : 7; fetapes = ( (double) tapesize /* blocks */ * TP_BSIZE /* bytes / block */ * (1.0/density) /* 0.1" / byte " */ + (double) tapesize /* blocks */ * (1.0/ntrec) /* IRG's / block */ * tenthsperirg /* 0.1" / IRG " */ ) * (1.0 / tsize ); /* tape / 0.1" " */ } etapes = fetapes; /* truncating assignment */ etapes++; /* count the dumped inodes map on each additional tape */ tapesize += (etapes - 1) * (howmany(mapsize * sizeof(char), TP_BSIZE) + 1); tapesize += etapes + 10; /* headers + 10 trailer blks */ msg("estimated %ld tape blocks on %3.2f tape(s).\n", tapesize, fetapes); } /* * If the user only wants an estimate of the number of * tapes, exit now. */ if (just_estimate) exit(0); /* * Allocate tape buffer. */ if (!alloctape()) quit( "can't allocate tape buffers - try a smaller blocking factor.\n"); startnewtape(1); (void)time((time_t *)&(tstart_writing)); dumpmap(usedinomap, TS_CLRI, maxino - 1); passno = 3; setproctitle("%s: pass 3: directories", disk); msg("dumping (Pass III) [directories]\n"); dirty = 0; /* XXX just to get gcc to shut up */ for (map = dumpdirmap, ino = 1; ino < maxino; ino++) { if (((ino - 1) % CHAR_BIT) == 0) /* map is offset by 1 */ dirty = *map++; else dirty >>= 1; if ((dirty & 1) == 0) continue; /* * Skip directory inodes deleted and maybe reallocated */ dp = getino(ino, &mode); if (mode != IFDIR) continue; (void)dumpino(dp, ino); } passno = 4; setproctitle("%s: pass 4: regular files", disk); msg("dumping (Pass IV) [regular files]\n"); for (map = dumpinomap, ino = 1; ino < maxino; ino++) { if (((ino - 1) % CHAR_BIT) == 0) /* map is offset by 1 */ dirty = *map++; else dirty >>= 1; if ((dirty & 1) == 0) continue; /* * Skip inodes deleted and reallocated as directories. */ dp = getino(ino, &mode); if (mode == IFDIR) continue; (void)dumpino(dp, ino); } (void)time((time_t *)&(tend_writing)); spcl.c_type = TS_END; for (i = 0; i < ntrec; i++) writeheader(maxino - 1); if (pipeout) msg("DUMP: %jd tape blocks\n", (intmax_t)spcl.c_tapea); else msg("DUMP: %jd tape blocks on %d volume%s\n", (intmax_t)spcl.c_tapea, spcl.c_volume, (spcl.c_volume == 1) ? "" : "s"); /* report dump performance, avoid division through zero */ if (tend_writing - tstart_writing == 0) msg("finished in less than a second\n"); else msg("finished in %jd seconds, throughput %jd KBytes/sec\n", (intmax_t)tend_writing - tstart_writing, (intmax_t)(spcl.c_tapea / (tend_writing - tstart_writing))); putdumptime(); trewind(); broadcast("DUMP IS DONE!\a\a\n"); msg("DUMP IS DONE\n"); Exit(X_FINOK); /* NOTREACHED */ } static void usage(void) { fprintf(stderr, "usage: dump [-0123456789acLnSu] [-B records] [-b blocksize] [-C cachesize]\n" " [-D dumpdates] [-d density] [-f file | -P pipecommand] [-h level]\n" " [-s feet] [-T date] filesystem\n" " dump -W | -w\n"); exit(X_STARTUP); } /* * Check to see if a disk is currently mounted. */ static char * getmntpt(char *name, int *mntflagsp) { long mntsize, i; struct statfs *mntbuf; mntsize = getmntinfo(&mntbuf, MNT_NOWAIT); for (i = 0; i < mntsize; i++) { if (!strcmp(mntbuf[i].f_mntfromname, name)) { *mntflagsp = mntbuf[i].f_flags; return (mntbuf[i].f_mntonname); } } return (0); } /* * Pick up a numeric argument. It must be nonnegative and in the given * range (except that a vmax of 0 means unlimited). */ static long numarg(const char *meaning, long vmin, long vmax) { char *p; long val; val = strtol(optarg, &p, 10); if (*p) errx(1, "illegal %s -- %s", meaning, optarg); if (val < vmin || (vmax && val > vmax)) errx(1, "%s must be between %ld and %ld", meaning, vmin, vmax); return (val); } void sig(int signo) { switch(signo) { case SIGALRM: case SIGBUS: case SIGFPE: case SIGHUP: case SIGTERM: case SIGTRAP: if (pipeout) quit("Signal on pipe: cannot recover\n"); msg("Rewriting attempted as response to unknown signal.\n"); (void)fflush(stderr); (void)fflush(stdout); close_rewind(); exit(X_REWRITE); /* NOTREACHED */ case SIGSEGV: msg("SIGSEGV: ABORTING!\n"); (void)signal(SIGSEGV, SIG_DFL); (void)kill(0, SIGSEGV); /* NOTREACHED */ } } char * rawname(char *cp) { struct stat sb; /* * Ensure that the device passed in is a raw device. */ if (stat(cp, &sb) == 0 && (sb.st_mode & S_IFMT) == S_IFCHR) return (cp); /* * Since there's only one device type now, we can't construct any * better name, so we have to return NULL. */ return (NULL); } /* * obsolete -- * Change set of key letters and ordered arguments into something * getopt(3) will like. */ static void obsolete(int *argcp, char **argvp[]) { int argc, flags; char *ap, **argv, *flagsp, **nargv, *p; /* Setup. */ argv = *argvp; argc = *argcp; /* * Return if no arguments or first argument has leading * dash or slash. */ ap = argv[1]; if (argc == 1 || *ap == '-' || *ap == '/') return; /* Allocate space for new arguments. */ if ((*argvp = nargv = malloc((argc + 1) * sizeof(char *))) == NULL || (p = flagsp = malloc(strlen(ap) + 2)) == NULL) err(1, NULL); *nargv++ = *argv; argv += 2; for (flags = 0; *ap; ++ap) { switch (*ap) { case 'B': case 'b': case 'd': case 'f': case 'D': case 'C': case 'h': case 's': case 'T': if (*argv == NULL) { warnx("option requires an argument -- %c", *ap); usage(); } if ((nargv[0] = malloc(strlen(*argv) + 2 + 1)) == NULL) err(1, NULL); nargv[0][0] = '-'; nargv[0][1] = *ap; (void)strcpy(&nargv[0][2], *argv); ++argv; ++nargv; break; default: if (!flags) { *p++ = '-'; flags = 1; } *p++ = *ap; break; } } /* Terminate flags. */ if (flags) { *p = '\0'; *nargv++ = flagsp; } else free(flagsp); /* Copy remaining arguments. */ while ((*nargv++ = *argv++)); /* Update argument count. */ *argcp = nargv - *argvp - 1; } Index: head/sbin/fsck_ffs/inode.c =================================================================== --- head/sbin/fsck_ffs/inode.c (revision 298871) +++ head/sbin/fsck_ffs/inode.c (revision 298872) @@ -1,734 +1,734 @@ /* * Copyright (c) 1980, 1986, 1993 * The Regents of the University of California. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 4. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #if 0 #ifndef lint static const char sccsid[] = "@(#)inode.c 8.8 (Berkeley) 4/28/95"; #endif /* not lint */ #endif #include __FBSDID("$FreeBSD$"); #include #include #include #include #include #include #include #include #include #include #include "fsck.h" static ino_t startinum; static int iblock(struct inodesc *, long ilevel, off_t isize, int type); int ckinode(union dinode *dp, struct inodesc *idesc) { off_t remsize, sizepb; int i, offset, ret; union dinode dino; ufs2_daddr_t ndb; mode_t mode; char pathbuf[MAXPATHLEN + 1]; if (idesc->id_fix != IGNORE) idesc->id_fix = DONTKNOW; idesc->id_lbn = -1; idesc->id_entryno = 0; idesc->id_filesize = DIP(dp, di_size); mode = DIP(dp, di_mode) & IFMT; if (mode == IFBLK || mode == IFCHR || (mode == IFLNK && DIP(dp, di_size) < (unsigned)sblock.fs_maxsymlinklen)) return (KEEPON); if (sblock.fs_magic == FS_UFS1_MAGIC) dino.dp1 = dp->dp1; else dino.dp2 = dp->dp2; ndb = howmany(DIP(&dino, di_size), sblock.fs_bsize); for (i = 0; i < NDADDR; i++) { idesc->id_lbn++; if (--ndb == 0 && (offset = blkoff(&sblock, DIP(&dino, di_size))) != 0) idesc->id_numfrags = numfrags(&sblock, fragroundup(&sblock, offset)); else idesc->id_numfrags = sblock.fs_frag; if (DIP(&dino, di_db[i]) == 0) { if (idesc->id_type == DATA && ndb >= 0) { /* An empty block in a directory XXX */ getpathname(pathbuf, idesc->id_number, idesc->id_number); pfatal("DIRECTORY %s: CONTAINS EMPTY BLOCKS", pathbuf); if (reply("ADJUST LENGTH") == 1) { dp = ginode(idesc->id_number); DIP_SET(dp, di_size, i * sblock.fs_bsize); printf( "YOU MUST RERUN FSCK AFTERWARDS\n"); rerun = 1; inodirty(); } } continue; } idesc->id_blkno = DIP(&dino, di_db[i]); if (idesc->id_type != DATA) ret = (*idesc->id_func)(idesc); else ret = dirscan(idesc); if (ret & STOP) return (ret); } idesc->id_numfrags = sblock.fs_frag; remsize = DIP(&dino, di_size) - sblock.fs_bsize * NDADDR; sizepb = sblock.fs_bsize; for (i = 0; i < NIADDR; i++) { sizepb *= NINDIR(&sblock); if (DIP(&dino, di_ib[i])) { idesc->id_blkno = DIP(&dino, di_ib[i]); ret = iblock(idesc, i + 1, remsize, BT_LEVEL1 + i); if (ret & STOP) return (ret); } else { idesc->id_lbn += sizepb / sblock.fs_bsize; if (idesc->id_type == DATA && remsize > 0) { /* An empty block in a directory XXX */ getpathname(pathbuf, idesc->id_number, idesc->id_number); pfatal("DIRECTORY %s: CONTAINS EMPTY BLOCKS", pathbuf); if (reply("ADJUST LENGTH") == 1) { dp = ginode(idesc->id_number); DIP_SET(dp, di_size, DIP(dp, di_size) - remsize); remsize = 0; printf( "YOU MUST RERUN FSCK AFTERWARDS\n"); rerun = 1; inodirty(); break; } } } remsize -= sizepb; } return (KEEPON); } static int iblock(struct inodesc *idesc, long ilevel, off_t isize, int type) { struct bufarea *bp; int i, n, (*func)(struct inodesc *), nif; off_t sizepb; char buf[BUFSIZ]; char pathbuf[MAXPATHLEN + 1]; union dinode *dp; if (idesc->id_type != DATA) { func = idesc->id_func; if (((n = (*func)(idesc)) & KEEPON) == 0) return (n); } else func = dirscan; if (chkrange(idesc->id_blkno, idesc->id_numfrags)) return (SKIP); bp = getdatablk(idesc->id_blkno, sblock.fs_bsize, type); ilevel--; for (sizepb = sblock.fs_bsize, i = 0; i < ilevel; i++) sizepb *= NINDIR(&sblock); if (howmany(isize, sizepb) > NINDIR(&sblock)) nif = NINDIR(&sblock); else nif = howmany(isize, sizepb); if (idesc->id_func == pass1check && nif < NINDIR(&sblock)) { for (i = nif; i < NINDIR(&sblock); i++) { if (IBLK(bp, i) == 0) continue; (void)sprintf(buf, "PARTIALLY TRUNCATED INODE I=%lu", (u_long)idesc->id_number); if (preen) { pfatal("%s", buf); } else if (dofix(idesc, buf)) { IBLK_SET(bp, i, 0); dirty(bp); } } flush(fswritefd, bp); } for (i = 0; i < nif; i++) { if (ilevel == 0) idesc->id_lbn++; if (IBLK(bp, i)) { idesc->id_blkno = IBLK(bp, i); if (ilevel == 0) n = (*func)(idesc); else n = iblock(idesc, ilevel, isize, type); if (n & STOP) { bp->b_flags &= ~B_INUSE; return (n); } } else { if (idesc->id_type == DATA && isize > 0) { /* An empty block in a directory XXX */ getpathname(pathbuf, idesc->id_number, idesc->id_number); pfatal("DIRECTORY %s: CONTAINS EMPTY BLOCKS", pathbuf); if (reply("ADJUST LENGTH") == 1) { dp = ginode(idesc->id_number); DIP_SET(dp, di_size, DIP(dp, di_size) - isize); isize = 0; printf( "YOU MUST RERUN FSCK AFTERWARDS\n"); rerun = 1; inodirty(); bp->b_flags &= ~B_INUSE; return(STOP); } } } isize -= sizepb; } bp->b_flags &= ~B_INUSE; return (KEEPON); } /* * Check that a block in a legal block number. * Return 0 if in range, 1 if out of range. */ int chkrange(ufs2_daddr_t blk, int cnt) { int c; if (cnt <= 0 || blk <= 0 || blk > maxfsblock || cnt - 1 > maxfsblock - blk) return (1); if (cnt > sblock.fs_frag || fragnum(&sblock, blk) + cnt > sblock.fs_frag) { if (debug) printf("bad size: blk %ld, offset %i, size %d\n", (long)blk, (int)fragnum(&sblock, blk), cnt); return (1); } c = dtog(&sblock, blk); if (blk < cgdmin(&sblock, c)) { if ((blk + cnt) > cgsblock(&sblock, c)) { if (debug) { printf("blk %ld < cgdmin %ld;", (long)blk, (long)cgdmin(&sblock, c)); printf(" blk + cnt %ld > cgsbase %ld\n", (long)(blk + cnt), (long)cgsblock(&sblock, c)); } return (1); } } else { if ((blk + cnt) > cgbase(&sblock, c+1)) { if (debug) { printf("blk %ld >= cgdmin %ld;", (long)blk, (long)cgdmin(&sblock, c)); printf(" blk + cnt %ld > sblock.fs_fpg %ld\n", (long)(blk + cnt), (long)sblock.fs_fpg); } return (1); } } return (0); } /* * General purpose interface for reading inodes. */ union dinode * ginode(ino_t inumber) { ufs2_daddr_t iblk; if (inumber < ROOTINO || inumber > maxino) errx(EEXIT, "bad inode number %ju to ginode", (uintmax_t)inumber); if (startinum == 0 || inumber < startinum || inumber >= startinum + INOPB(&sblock)) { iblk = ino_to_fsba(&sblock, inumber); if (pbp != NULL) pbp->b_flags &= ~B_INUSE; pbp = getdatablk(iblk, sblock.fs_bsize, BT_INODES); - startinum = (inumber / INOPB(&sblock)) * INOPB(&sblock); + startinum = rounddown(inumber, INOPB(&sblock)); } if (sblock.fs_magic == FS_UFS1_MAGIC) return ((union dinode *) &pbp->b_un.b_dinode1[inumber % INOPB(&sblock)]); return ((union dinode *)&pbp->b_un.b_dinode2[inumber % INOPB(&sblock)]); } /* * Special purpose version of ginode used to optimize first pass * over all the inodes in numerical order. */ static ino_t nextino, lastinum, lastvalidinum; static long readcount, readpercg, fullcnt, inobufsize, partialcnt, partialsize; static struct bufarea inobuf; union dinode * getnextinode(ino_t inumber, int rebuildcg) { int j; long size; mode_t mode; ufs2_daddr_t ndb, blk; union dinode *dp; static caddr_t nextinop; if (inumber != nextino++ || inumber > lastvalidinum) errx(EEXIT, "bad inode number %ju to nextinode", (uintmax_t)inumber); if (inumber >= lastinum) { readcount++; blk = ino_to_fsba(&sblock, lastinum); if (readcount % readpercg == 0) { size = partialsize; lastinum += partialcnt; } else { size = inobufsize; lastinum += fullcnt; } /* * If getblk encounters an error, it will already have zeroed * out the buffer, so we do not need to do so here. */ getblk(&inobuf, blk, size); nextinop = inobuf.b_un.b_buf; } dp = (union dinode *)nextinop; if (rebuildcg && nextinop == inobuf.b_un.b_buf) { /* * Try to determine if we have reached the end of the * allocated inodes. */ mode = DIP(dp, di_mode) & IFMT; if (mode == 0) { if (memcmp(dp->dp2.di_db, ufs2_zino.di_db, NDADDR * sizeof(ufs2_daddr_t)) || memcmp(dp->dp2.di_ib, ufs2_zino.di_ib, NIADDR * sizeof(ufs2_daddr_t)) || dp->dp2.di_mode || dp->dp2.di_size) return (NULL); goto inodegood; } if (!ftypeok(dp)) return (NULL); ndb = howmany(DIP(dp, di_size), sblock.fs_bsize); if (ndb < 0) return (NULL); if (mode == IFBLK || mode == IFCHR) ndb++; if (mode == IFLNK) { /* * Fake ndb value so direct/indirect block checks below * will detect any garbage after symlink string. */ if (DIP(dp, di_size) < (off_t)sblock.fs_maxsymlinklen) { ndb = howmany(DIP(dp, di_size), sizeof(ufs2_daddr_t)); if (ndb > NDADDR) { j = ndb - NDADDR; for (ndb = 1; j > 1; j--) ndb *= NINDIR(&sblock); ndb += NDADDR; } } } for (j = ndb; ndb < NDADDR && j < NDADDR; j++) if (DIP(dp, di_db[j]) != 0) return (NULL); for (j = 0, ndb -= NDADDR; ndb > 0; j++) ndb /= NINDIR(&sblock); for (; j < NIADDR; j++) if (DIP(dp, di_ib[j]) != 0) return (NULL); } inodegood: if (sblock.fs_magic == FS_UFS1_MAGIC) nextinop += sizeof(struct ufs1_dinode); else nextinop += sizeof(struct ufs2_dinode); return (dp); } void setinodebuf(ino_t inum) { if (inum % sblock.fs_ipg != 0) errx(EEXIT, "bad inode number %ju to setinodebuf", (uintmax_t)inum); lastvalidinum = inum + sblock.fs_ipg - 1; startinum = 0; nextino = inum; lastinum = inum; readcount = 0; if (inobuf.b_un.b_buf != NULL) return; inobufsize = blkroundup(&sblock, INOBUFSIZE); fullcnt = inobufsize / ((sblock.fs_magic == FS_UFS1_MAGIC) ? sizeof(struct ufs1_dinode) : sizeof(struct ufs2_dinode)); readpercg = sblock.fs_ipg / fullcnt; partialcnt = sblock.fs_ipg % fullcnt; partialsize = partialcnt * ((sblock.fs_magic == FS_UFS1_MAGIC) ? sizeof(struct ufs1_dinode) : sizeof(struct ufs2_dinode)); if (partialcnt != 0) { readpercg++; } else { partialcnt = fullcnt; partialsize = inobufsize; } initbarea(&inobuf, BT_INODES); if ((inobuf.b_un.b_buf = Malloc((unsigned)inobufsize)) == NULL) errx(EEXIT, "cannot allocate space for inode buffer"); } void freeinodebuf(void) { if (inobuf.b_un.b_buf != NULL) free((char *)inobuf.b_un.b_buf); inobuf.b_un.b_buf = NULL; } /* * Routines to maintain information about directory inodes. * This is built during the first pass and used during the * second and third passes. * * Enter inodes into the cache. */ void cacheino(union dinode *dp, ino_t inumber) { struct inoinfo *inp, **inpp; int i, blks; if (howmany(DIP(dp, di_size), sblock.fs_bsize) > NDADDR) blks = NDADDR + NIADDR; else blks = howmany(DIP(dp, di_size), sblock.fs_bsize); inp = (struct inoinfo *) Malloc(sizeof(*inp) + (blks - 1) * sizeof(ufs2_daddr_t)); if (inp == NULL) errx(EEXIT, "cannot increase directory list"); inpp = &inphead[inumber % dirhash]; inp->i_nexthash = *inpp; *inpp = inp; inp->i_parent = inumber == ROOTINO ? ROOTINO : (ino_t)0; inp->i_dotdot = (ino_t)0; inp->i_number = inumber; inp->i_isize = DIP(dp, di_size); inp->i_numblks = blks; for (i = 0; i < (blks < NDADDR ? blks : NDADDR); i++) inp->i_blks[i] = DIP(dp, di_db[i]); if (blks > NDADDR) for (i = 0; i < NIADDR; i++) inp->i_blks[NDADDR + i] = DIP(dp, di_ib[i]); if (inplast == listmax) { listmax += 100; inpsort = (struct inoinfo **)realloc((char *)inpsort, (unsigned)listmax * sizeof(struct inoinfo *)); if (inpsort == NULL) errx(EEXIT, "cannot increase directory list"); } inpsort[inplast++] = inp; } /* * Look up an inode cache structure. */ struct inoinfo * getinoinfo(ino_t inumber) { struct inoinfo *inp; for (inp = inphead[inumber % dirhash]; inp; inp = inp->i_nexthash) { if (inp->i_number != inumber) continue; return (inp); } errx(EEXIT, "cannot find inode %ju", (uintmax_t)inumber); return ((struct inoinfo *)0); } /* * Clean up all the inode cache structure. */ void inocleanup(void) { struct inoinfo **inpp; if (inphead == NULL) return; for (inpp = &inpsort[inplast - 1]; inpp >= inpsort; inpp--) free((char *)(*inpp)); free((char *)inphead); free((char *)inpsort); inphead = inpsort = NULL; } void inodirty(void) { dirty(pbp); } void clri(struct inodesc *idesc, const char *type, int flag) { union dinode *dp; dp = ginode(idesc->id_number); if (flag == 1) { pwarn("%s %s", type, (DIP(dp, di_mode) & IFMT) == IFDIR ? "DIR" : "FILE"); pinode(idesc->id_number); } if (preen || reply("CLEAR") == 1) { if (preen) printf(" (CLEARED)\n"); n_files--; if (bkgrdflag == 0) { (void)ckinode(dp, idesc); inoinfo(idesc->id_number)->ino_state = USTATE; clearinode(dp); inodirty(); } else { cmd.value = idesc->id_number; cmd.size = -DIP(dp, di_nlink); if (debug) printf("adjrefcnt ino %ld amt %lld\n", (long)cmd.value, (long long)cmd.size); if (sysctl(adjrefcnt, MIBSIZE, 0, 0, &cmd, sizeof cmd) == -1) rwerror("ADJUST INODE", cmd.value); } } } int findname(struct inodesc *idesc) { struct direct *dirp = idesc->id_dirp; if (dirp->d_ino != idesc->id_parent || idesc->id_entryno < 2) { idesc->id_entryno++; return (KEEPON); } memmove(idesc->id_name, dirp->d_name, (size_t)dirp->d_namlen + 1); return (STOP|FOUND); } int findino(struct inodesc *idesc) { struct direct *dirp = idesc->id_dirp; if (dirp->d_ino == 0) return (KEEPON); if (strcmp(dirp->d_name, idesc->id_name) == 0 && dirp->d_ino >= ROOTINO && dirp->d_ino <= maxino) { idesc->id_parent = dirp->d_ino; return (STOP|FOUND); } return (KEEPON); } int clearentry(struct inodesc *idesc) { struct direct *dirp = idesc->id_dirp; if (dirp->d_ino != idesc->id_parent || idesc->id_entryno < 2) { idesc->id_entryno++; return (KEEPON); } dirp->d_ino = 0; return (STOP|FOUND|ALTERED); } void pinode(ino_t ino) { union dinode *dp; char *p; struct passwd *pw; time_t t; printf(" I=%lu ", (u_long)ino); if (ino < ROOTINO || ino > maxino) return; dp = ginode(ino); printf(" OWNER="); if ((pw = getpwuid((int)DIP(dp, di_uid))) != NULL) printf("%s ", pw->pw_name); else printf("%u ", (unsigned)DIP(dp, di_uid)); printf("MODE=%o\n", DIP(dp, di_mode)); if (preen) printf("%s: ", cdevname); printf("SIZE=%ju ", (uintmax_t)DIP(dp, di_size)); t = DIP(dp, di_mtime); p = ctime(&t); printf("MTIME=%12.12s %4.4s ", &p[4], &p[20]); } void blkerror(ino_t ino, const char *type, ufs2_daddr_t blk) { pfatal("%jd %s I=%ju", (intmax_t)blk, type, (uintmax_t)ino); printf("\n"); switch (inoinfo(ino)->ino_state) { case FSTATE: case FZLINK: inoinfo(ino)->ino_state = FCLEAR; return; case DSTATE: case DZLINK: inoinfo(ino)->ino_state = DCLEAR; return; case FCLEAR: case DCLEAR: return; default: errx(EEXIT, "BAD STATE %d TO BLKERR", inoinfo(ino)->ino_state); /* NOTREACHED */ } } /* * allocate an unused inode */ ino_t allocino(ino_t request, int type) { ino_t ino; union dinode *dp; struct bufarea *cgbp; struct cg *cgp; int cg; if (request == 0) request = ROOTINO; else if (inoinfo(request)->ino_state != USTATE) return (0); for (ino = request; ino < maxino; ino++) if (inoinfo(ino)->ino_state == USTATE) break; if (ino == maxino) return (0); cg = ino_to_cg(&sblock, ino); cgbp = cgget(cg); cgp = cgbp->b_un.b_cg; if (!check_cgmagic(cg, cgbp)) return (0); setbit(cg_inosused(cgp), ino % sblock.fs_ipg); cgp->cg_cs.cs_nifree--; switch (type & IFMT) { case IFDIR: inoinfo(ino)->ino_state = DSTATE; cgp->cg_cs.cs_ndir++; break; case IFREG: case IFLNK: inoinfo(ino)->ino_state = FSTATE; break; default: return (0); } dirty(cgbp); dp = ginode(ino); DIP_SET(dp, di_db[0], allocblk((long)1)); if (DIP(dp, di_db[0]) == 0) { inoinfo(ino)->ino_state = USTATE; return (0); } DIP_SET(dp, di_mode, type); DIP_SET(dp, di_flags, 0); DIP_SET(dp, di_atime, time(NULL)); DIP_SET(dp, di_ctime, DIP(dp, di_atime)); DIP_SET(dp, di_mtime, DIP(dp, di_ctime)); DIP_SET(dp, di_mtimensec, 0); DIP_SET(dp, di_ctimensec, 0); DIP_SET(dp, di_atimensec, 0); DIP_SET(dp, di_size, sblock.fs_fsize); DIP_SET(dp, di_blocks, btodb(sblock.fs_fsize)); n_files++; inodirty(); inoinfo(ino)->ino_type = IFTODT(type); return (ino); } /* * deallocate an inode */ void freeino(ino_t ino) { struct inodesc idesc; union dinode *dp; memset(&idesc, 0, sizeof(struct inodesc)); idesc.id_type = ADDR; idesc.id_func = pass4check; idesc.id_number = ino; dp = ginode(ino); (void)ckinode(dp, &idesc); clearinode(dp); inodirty(); inoinfo(ino)->ino_state = USTATE; n_files--; } Index: head/sbin/fsck_ffs/suj.c =================================================================== --- head/sbin/fsck_ffs/suj.c (revision 298871) +++ head/sbin/fsck_ffs/suj.c (revision 298872) @@ -1,2791 +1,2791 @@ /*- * Copyright 2009, 2010 Jeffrey W. Roberson * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include "fsck.h" #define DOTDOT_OFFSET DIRECTSIZ(1) #define SUJ_HASHSIZE 2048 #define SUJ_HASHMASK (SUJ_HASHSIZE - 1) #define SUJ_HASH(x) ((x * 2654435761) & SUJ_HASHMASK) struct suj_seg { TAILQ_ENTRY(suj_seg) ss_next; struct jsegrec ss_rec; uint8_t *ss_blk; }; struct suj_rec { TAILQ_ENTRY(suj_rec) sr_next; union jrec *sr_rec; }; TAILQ_HEAD(srechd, suj_rec); struct suj_ino { LIST_ENTRY(suj_ino) si_next; struct srechd si_recs; struct srechd si_newrecs; struct srechd si_movs; struct jtrncrec *si_trunc; ino_t si_ino; char si_skipparent; char si_hasrecs; char si_blkadj; char si_linkadj; int si_mode; nlink_t si_nlinkadj; nlink_t si_nlink; nlink_t si_dotlinks; }; LIST_HEAD(inohd, suj_ino); struct suj_blk { LIST_ENTRY(suj_blk) sb_next; struct srechd sb_recs; ufs2_daddr_t sb_blk; }; LIST_HEAD(blkhd, suj_blk); struct data_blk { LIST_ENTRY(data_blk) db_next; uint8_t *db_buf; ufs2_daddr_t db_blk; int db_size; int db_dirty; }; struct ino_blk { LIST_ENTRY(ino_blk) ib_next; uint8_t *ib_buf; int ib_dirty; ufs2_daddr_t ib_blk; }; LIST_HEAD(iblkhd, ino_blk); struct suj_cg { LIST_ENTRY(suj_cg) sc_next; struct blkhd sc_blkhash[SUJ_HASHSIZE]; struct inohd sc_inohash[SUJ_HASHSIZE]; struct iblkhd sc_iblkhash[SUJ_HASHSIZE]; struct ino_blk *sc_lastiblk; struct suj_ino *sc_lastino; struct suj_blk *sc_lastblk; uint8_t *sc_cgbuf; struct cg *sc_cgp; int sc_dirty; int sc_cgx; }; static LIST_HEAD(cghd, suj_cg) cghash[SUJ_HASHSIZE]; static LIST_HEAD(dblkhd, data_blk) dbhash[SUJ_HASHSIZE]; static struct suj_cg *lastcg; static struct data_blk *lastblk; static TAILQ_HEAD(seghd, suj_seg) allsegs; static uint64_t oldseq; static struct uufsd *disk = NULL; static struct fs *fs = NULL; static ino_t sujino; /* * Summary statistics. */ static uint64_t freefrags; static uint64_t freeblocks; static uint64_t freeinos; static uint64_t freedir; static uint64_t jbytes; static uint64_t jrecs; static jmp_buf jmpbuf; typedef void (*ino_visitor)(ino_t, ufs_lbn_t, ufs2_daddr_t, int); static void err_suj(const char *, ...) __dead2; static void ino_trunc(ino_t, off_t); static void ino_decr(ino_t); static void ino_adjust(struct suj_ino *); static void ino_build(struct suj_ino *); static int blk_isfree(ufs2_daddr_t); static void initsuj(void); static void * errmalloc(size_t n) { void *a; a = Malloc(n); if (a == NULL) err(EX_OSERR, "malloc(%zu)", n); return (a); } /* * When hit a fatal error in journalling check, print out * the error and then offer to fallback to normal fsck. */ static void err_suj(const char * restrict fmt, ...) { va_list ap; if (preen) (void)fprintf(stdout, "%s: ", cdevname); va_start(ap, fmt); (void)vfprintf(stdout, fmt, ap); va_end(ap); longjmp(jmpbuf, -1); } /* * Open the given provider, load superblock. */ static void opendisk(const char *devnam) { if (disk != NULL) return; disk = Malloc(sizeof(*disk)); if (disk == NULL) err(EX_OSERR, "malloc(%zu)", sizeof(*disk)); if (ufs_disk_fillout(disk, devnam) == -1) { err(EX_OSERR, "ufs_disk_fillout(%s) failed: %s", devnam, disk->d_error); } fs = &disk->d_fs; if (real_dev_bsize == 0 && ioctl(disk->d_fd, DIOCGSECTORSIZE, &real_dev_bsize) == -1) real_dev_bsize = secsize; if (debug) printf("dev_bsize %u\n", real_dev_bsize); } /* * Mark file system as clean, write the super-block back, close the disk. */ static void closedisk(const char *devnam) { struct csum *cgsum; uint32_t i; /* * Recompute the fs summary info from correct cs summaries. */ bzero(&fs->fs_cstotal, sizeof(struct csum_total)); for (i = 0; i < fs->fs_ncg; i++) { cgsum = &fs->fs_cs(fs, i); fs->fs_cstotal.cs_nffree += cgsum->cs_nffree; fs->fs_cstotal.cs_nbfree += cgsum->cs_nbfree; fs->fs_cstotal.cs_nifree += cgsum->cs_nifree; fs->fs_cstotal.cs_ndir += cgsum->cs_ndir; } fs->fs_pendinginodes = 0; fs->fs_pendingblocks = 0; fs->fs_clean = 1; fs->fs_time = time(NULL); fs->fs_mtime = time(NULL); if (sbwrite(disk, 0) == -1) err(EX_OSERR, "sbwrite(%s)", devnam); if (ufs_disk_close(disk) == -1) err(EX_OSERR, "ufs_disk_close(%s)", devnam); free(disk); disk = NULL; fs = NULL; } /* * Lookup a cg by number in the hash so we can keep track of which cgs * need stats rebuilt. */ static struct suj_cg * cg_lookup(int cgx) { struct cghd *hd; struct suj_cg *sc; if (cgx < 0 || cgx >= fs->fs_ncg) err_suj("Bad cg number %d\n", cgx); if (lastcg && lastcg->sc_cgx == cgx) return (lastcg); hd = &cghash[SUJ_HASH(cgx)]; LIST_FOREACH(sc, hd, sc_next) if (sc->sc_cgx == cgx) { lastcg = sc; return (sc); } sc = errmalloc(sizeof(*sc)); bzero(sc, sizeof(*sc)); sc->sc_cgbuf = errmalloc(fs->fs_bsize); sc->sc_cgp = (struct cg *)sc->sc_cgbuf; sc->sc_cgx = cgx; LIST_INSERT_HEAD(hd, sc, sc_next); if (bread(disk, fsbtodb(fs, cgtod(fs, sc->sc_cgx)), sc->sc_cgbuf, fs->fs_bsize) == -1) err_suj("Unable to read cylinder group %d\n", sc->sc_cgx); return (sc); } /* * Lookup an inode number in the hash and allocate a suj_ino if it does * not exist. */ static struct suj_ino * ino_lookup(ino_t ino, int creat) { struct suj_ino *sino; struct inohd *hd; struct suj_cg *sc; sc = cg_lookup(ino_to_cg(fs, ino)); if (sc->sc_lastino && sc->sc_lastino->si_ino == ino) return (sc->sc_lastino); hd = &sc->sc_inohash[SUJ_HASH(ino)]; LIST_FOREACH(sino, hd, si_next) if (sino->si_ino == ino) return (sino); if (creat == 0) return (NULL); sino = errmalloc(sizeof(*sino)); bzero(sino, sizeof(*sino)); sino->si_ino = ino; TAILQ_INIT(&sino->si_recs); TAILQ_INIT(&sino->si_newrecs); TAILQ_INIT(&sino->si_movs); LIST_INSERT_HEAD(hd, sino, si_next); return (sino); } /* * Lookup a block number in the hash and allocate a suj_blk if it does * not exist. */ static struct suj_blk * blk_lookup(ufs2_daddr_t blk, int creat) { struct suj_blk *sblk; struct suj_cg *sc; struct blkhd *hd; sc = cg_lookup(dtog(fs, blk)); if (sc->sc_lastblk && sc->sc_lastblk->sb_blk == blk) return (sc->sc_lastblk); hd = &sc->sc_blkhash[SUJ_HASH(fragstoblks(fs, blk))]; LIST_FOREACH(sblk, hd, sb_next) if (sblk->sb_blk == blk) return (sblk); if (creat == 0) return (NULL); sblk = errmalloc(sizeof(*sblk)); bzero(sblk, sizeof(*sblk)); sblk->sb_blk = blk; TAILQ_INIT(&sblk->sb_recs); LIST_INSERT_HEAD(hd, sblk, sb_next); return (sblk); } static struct data_blk * dblk_lookup(ufs2_daddr_t blk) { struct data_blk *dblk; struct dblkhd *hd; hd = &dbhash[SUJ_HASH(fragstoblks(fs, blk))]; if (lastblk && lastblk->db_blk == blk) return (lastblk); LIST_FOREACH(dblk, hd, db_next) if (dblk->db_blk == blk) return (dblk); /* * The inode block wasn't located, allocate a new one. */ dblk = errmalloc(sizeof(*dblk)); bzero(dblk, sizeof(*dblk)); LIST_INSERT_HEAD(hd, dblk, db_next); dblk->db_blk = blk; return (dblk); } static uint8_t * dblk_read(ufs2_daddr_t blk, int size) { struct data_blk *dblk; dblk = dblk_lookup(blk); /* * I doubt size mismatches can happen in practice but it is trivial * to handle. */ if (size != dblk->db_size) { if (dblk->db_buf) free(dblk->db_buf); dblk->db_buf = errmalloc(size); dblk->db_size = size; if (bread(disk, fsbtodb(fs, blk), dblk->db_buf, size) == -1) err_suj("Failed to read data block %jd\n", blk); } return (dblk->db_buf); } static void dblk_dirty(ufs2_daddr_t blk) { struct data_blk *dblk; dblk = dblk_lookup(blk); dblk->db_dirty = 1; } static void dblk_write(void) { struct data_blk *dblk; int i; for (i = 0; i < SUJ_HASHSIZE; i++) { LIST_FOREACH(dblk, &dbhash[i], db_next) { if (dblk->db_dirty == 0 || dblk->db_size == 0) continue; if (bwrite(disk, fsbtodb(fs, dblk->db_blk), dblk->db_buf, dblk->db_size) == -1) err_suj("Unable to write block %jd\n", dblk->db_blk); } } } static union dinode * ino_read(ino_t ino) { struct ino_blk *iblk; struct iblkhd *hd; struct suj_cg *sc; ufs2_daddr_t blk; int off; blk = ino_to_fsba(fs, ino); sc = cg_lookup(ino_to_cg(fs, ino)); iblk = sc->sc_lastiblk; if (iblk && iblk->ib_blk == blk) goto found; hd = &sc->sc_iblkhash[SUJ_HASH(fragstoblks(fs, blk))]; LIST_FOREACH(iblk, hd, ib_next) if (iblk->ib_blk == blk) goto found; /* * The inode block wasn't located, allocate a new one. */ iblk = errmalloc(sizeof(*iblk)); bzero(iblk, sizeof(*iblk)); iblk->ib_buf = errmalloc(fs->fs_bsize); iblk->ib_blk = blk; LIST_INSERT_HEAD(hd, iblk, ib_next); if (bread(disk, fsbtodb(fs, blk), iblk->ib_buf, fs->fs_bsize) == -1) err_suj("Failed to read inode block %jd\n", blk); found: sc->sc_lastiblk = iblk; off = ino_to_fsbo(fs, ino); if (fs->fs_magic == FS_UFS1_MAGIC) return (union dinode *)&((struct ufs1_dinode *)iblk->ib_buf)[off]; else return (union dinode *)&((struct ufs2_dinode *)iblk->ib_buf)[off]; } static void ino_dirty(ino_t ino) { struct ino_blk *iblk; struct iblkhd *hd; struct suj_cg *sc; ufs2_daddr_t blk; blk = ino_to_fsba(fs, ino); sc = cg_lookup(ino_to_cg(fs, ino)); iblk = sc->sc_lastiblk; if (iblk && iblk->ib_blk == blk) { iblk->ib_dirty = 1; return; } hd = &sc->sc_iblkhash[SUJ_HASH(fragstoblks(fs, blk))]; LIST_FOREACH(iblk, hd, ib_next) { if (iblk->ib_blk == blk) { iblk->ib_dirty = 1; return; } } ino_read(ino); ino_dirty(ino); } static void iblk_write(struct ino_blk *iblk) { if (iblk->ib_dirty == 0) return; if (bwrite(disk, fsbtodb(fs, iblk->ib_blk), iblk->ib_buf, fs->fs_bsize) == -1) err_suj("Failed to write inode block %jd\n", iblk->ib_blk); } static int blk_overlaps(struct jblkrec *brec, ufs2_daddr_t start, int frags) { ufs2_daddr_t bstart; ufs2_daddr_t bend; ufs2_daddr_t end; end = start + frags; bstart = brec->jb_blkno + brec->jb_oldfrags; bend = bstart + brec->jb_frags; if (start < bend && end > bstart) return (1); return (0); } static int blk_equals(struct jblkrec *brec, ino_t ino, ufs_lbn_t lbn, ufs2_daddr_t start, int frags) { if (brec->jb_ino != ino || brec->jb_lbn != lbn) return (0); if (brec->jb_blkno + brec->jb_oldfrags != start) return (0); if (brec->jb_frags < frags) return (0); return (1); } static void blk_setmask(struct jblkrec *brec, int *mask) { int i; for (i = brec->jb_oldfrags; i < brec->jb_oldfrags + brec->jb_frags; i++) *mask |= 1 << i; } /* * Determine whether a given block has been reallocated to a new location. * Returns a mask of overlapping bits if any frags have been reused or * zero if the block has not been re-used and the contents can be trusted. * * This is used to ensure that an orphaned pointer due to truncate is safe * to be freed. The mask value can be used to free partial blocks. */ static int blk_freemask(ufs2_daddr_t blk, ino_t ino, ufs_lbn_t lbn, int frags) { struct suj_blk *sblk; struct suj_rec *srec; struct jblkrec *brec; int mask; int off; /* * To be certain we're not freeing a reallocated block we lookup * this block in the blk hash and see if there is an allocation * journal record that overlaps with any fragments in the block * we're concerned with. If any fragments have ben reallocated * the block has already been freed and re-used for another purpose. */ mask = 0; sblk = blk_lookup(blknum(fs, blk), 0); if (sblk == NULL) return (0); off = blk - sblk->sb_blk; TAILQ_FOREACH(srec, &sblk->sb_recs, sr_next) { brec = (struct jblkrec *)srec->sr_rec; /* * If the block overlaps but does not match * exactly this record refers to the current * location. */ if (blk_overlaps(brec, blk, frags) == 0) continue; if (blk_equals(brec, ino, lbn, blk, frags) == 1) mask = 0; else blk_setmask(brec, &mask); } if (debug) printf("blk_freemask: blk %jd sblk %jd off %d mask 0x%X\n", blk, sblk->sb_blk, off, mask); return (mask >> off); } /* * Determine whether it is safe to follow an indirect. It is not safe * if any part of the indirect has been reallocated or the last journal * entry was an allocation. Just allocated indirects may not have valid * pointers yet and all of their children will have their own records. * It is also not safe to follow an indirect if the cg bitmap has been * cleared as a new allocation may write to the block prior to the journal * being written. * * Returns 1 if it's safe to follow the indirect and 0 otherwise. */ static int blk_isindir(ufs2_daddr_t blk, ino_t ino, ufs_lbn_t lbn) { struct suj_blk *sblk; struct jblkrec *brec; sblk = blk_lookup(blk, 0); if (sblk == NULL) return (1); if (TAILQ_EMPTY(&sblk->sb_recs)) return (1); brec = (struct jblkrec *)TAILQ_LAST(&sblk->sb_recs, srechd)->sr_rec; if (blk_equals(brec, ino, lbn, blk, fs->fs_frag)) if (brec->jb_op == JOP_FREEBLK) return (!blk_isfree(blk)); return (0); } /* * Clear an inode from the cg bitmap. If the inode was already clear return * 0 so the caller knows it does not have to check the inode contents. */ static int ino_free(ino_t ino, int mode) { struct suj_cg *sc; uint8_t *inosused; struct cg *cgp; int cg; cg = ino_to_cg(fs, ino); ino = ino % fs->fs_ipg; sc = cg_lookup(cg); cgp = sc->sc_cgp; inosused = cg_inosused(cgp); /* * The bitmap may never have made it to the disk so we have to * conditionally clear. We can avoid writing the cg in this case. */ if (isclr(inosused, ino)) return (0); freeinos++; clrbit(inosused, ino); if (ino < cgp->cg_irotor) cgp->cg_irotor = ino; cgp->cg_cs.cs_nifree++; if ((mode & IFMT) == IFDIR) { freedir++; cgp->cg_cs.cs_ndir--; } sc->sc_dirty = 1; return (1); } /* * Free 'frags' frags starting at filesystem block 'bno' skipping any frags * set in the mask. */ static void blk_free(ufs2_daddr_t bno, int mask, int frags) { ufs1_daddr_t fragno, cgbno; struct suj_cg *sc; struct cg *cgp; int i, cg; uint8_t *blksfree; if (debug) printf("Freeing %d frags at blk %jd mask 0x%x\n", frags, bno, mask); cg = dtog(fs, bno); sc = cg_lookup(cg); cgp = sc->sc_cgp; cgbno = dtogd(fs, bno); blksfree = cg_blksfree(cgp); /* * If it's not allocated we only wrote the journal entry * and never the bitmaps. Here we unconditionally clear and * resolve the cg summary later. */ if (frags == fs->fs_frag && mask == 0) { fragno = fragstoblks(fs, cgbno); ffs_setblock(fs, blksfree, fragno); freeblocks++; } else { /* * deallocate the fragment */ for (i = 0; i < frags; i++) if ((mask & (1 << i)) == 0 && isclr(blksfree, cgbno +i)) { freefrags++; setbit(blksfree, cgbno + i); } } sc->sc_dirty = 1; } /* * Returns 1 if the whole block starting at 'bno' is marked free and 0 * otherwise. */ static int blk_isfree(ufs2_daddr_t bno) { struct suj_cg *sc; sc = cg_lookup(dtog(fs, bno)); return ffs_isblock(fs, cg_blksfree(sc->sc_cgp), dtogd(fs, bno)); } /* * Fetch an indirect block to find the block at a given lbn. The lbn * may be negative to fetch a specific indirect block pointer or positive * to fetch a specific block. */ static ufs2_daddr_t indir_blkatoff(ufs2_daddr_t blk, ino_t ino, ufs_lbn_t cur, ufs_lbn_t lbn) { ufs2_daddr_t *bap2; ufs2_daddr_t *bap1; ufs_lbn_t lbnadd; ufs_lbn_t base; int level; int i; if (blk == 0) return (0); level = lbn_level(cur); if (level == -1) err_suj("Invalid indir lbn %jd\n", lbn); if (level == 0 && lbn < 0) err_suj("Invalid lbn %jd\n", lbn); bap2 = (void *)dblk_read(blk, fs->fs_bsize); bap1 = (void *)bap2; lbnadd = 1; base = -(cur + level); for (i = level; i > 0; i--) lbnadd *= NINDIR(fs); if (lbn > 0) i = (lbn - base) / lbnadd; else i = (-lbn - base) / lbnadd; if (i < 0 || i >= NINDIR(fs)) err_suj("Invalid indirect index %d produced by lbn %jd\n", i, lbn); if (level == 0) cur = base + (i * lbnadd); else cur = -(base + (i * lbnadd)) - (level - 1); if (fs->fs_magic == FS_UFS1_MAGIC) blk = bap1[i]; else blk = bap2[i]; if (cur == lbn) return (blk); if (level == 0) err_suj("Invalid lbn %jd at level 0\n", lbn); return indir_blkatoff(blk, ino, cur, lbn); } /* * Finds the disk block address at the specified lbn within the inode * specified by ip. This follows the whole tree and honors di_size and * di_extsize so it is a true test of reachability. The lbn may be * negative if an extattr or indirect block is requested. */ static ufs2_daddr_t ino_blkatoff(union dinode *ip, ino_t ino, ufs_lbn_t lbn, int *frags) { ufs_lbn_t tmpval; ufs_lbn_t cur; ufs_lbn_t next; int i; /* * Handle extattr blocks first. */ if (lbn < 0 && lbn >= -NXADDR) { lbn = -1 - lbn; if (lbn > lblkno(fs, ip->dp2.di_extsize - 1)) return (0); *frags = numfrags(fs, sblksize(fs, ip->dp2.di_extsize, lbn)); return (ip->dp2.di_extb[lbn]); } /* * Now direct and indirect. */ if (DIP(ip, di_mode) == IFLNK && DIP(ip, di_size) < fs->fs_maxsymlinklen) return (0); if (lbn >= 0 && lbn < NDADDR) { *frags = numfrags(fs, sblksize(fs, DIP(ip, di_size), lbn)); return (DIP(ip, di_db[lbn])); } *frags = fs->fs_frag; for (i = 0, tmpval = NINDIR(fs), cur = NDADDR; i < NIADDR; i++, tmpval *= NINDIR(fs), cur = next) { next = cur + tmpval; if (lbn == -cur - i) return (DIP(ip, di_ib[i])); /* * Determine whether the lbn in question is within this tree. */ if (lbn < 0 && -lbn >= next) continue; if (lbn > 0 && lbn >= next) continue; return indir_blkatoff(DIP(ip, di_ib[i]), ino, -cur - i, lbn); } err_suj("lbn %jd not in ino\n", lbn); /* NOTREACHED */ } /* * Determine whether a block exists at a particular lbn in an inode. * Returns 1 if found, 0 if not. lbn may be negative for indirects * or ext blocks. */ static int blk_isat(ino_t ino, ufs_lbn_t lbn, ufs2_daddr_t blk, int *frags) { union dinode *ip; ufs2_daddr_t nblk; ip = ino_read(ino); if (DIP(ip, di_nlink) == 0 || DIP(ip, di_mode) == 0) return (0); nblk = ino_blkatoff(ip, ino, lbn, frags); return (nblk == blk); } /* * Clear the directory entry at diroff that should point to child. Minimal * checking is done and it is assumed that this path was verified with isat. */ static void ino_clrat(ino_t parent, off_t diroff, ino_t child) { union dinode *dip; struct direct *dp; ufs2_daddr_t blk; uint8_t *block; ufs_lbn_t lbn; int blksize; int frags; int doff; if (debug) printf("Clearing inode %ju from parent %ju at offset %jd\n", (uintmax_t)child, (uintmax_t)parent, diroff); lbn = lblkno(fs, diroff); doff = blkoff(fs, diroff); dip = ino_read(parent); blk = ino_blkatoff(dip, parent, lbn, &frags); blksize = sblksize(fs, DIP(dip, di_size), lbn); block = dblk_read(blk, blksize); dp = (struct direct *)&block[doff]; if (dp->d_ino != child) errx(1, "Inode %ju does not exist in %ju at %jd", (uintmax_t)child, (uintmax_t)parent, diroff); dp->d_ino = 0; dblk_dirty(blk); /* * The actual .. reference count will already have been removed * from the parent by the .. remref record. */ } /* * Determines whether a pointer to an inode exists within a directory * at a specified offset. Returns the mode of the found entry. */ static int ino_isat(ino_t parent, off_t diroff, ino_t child, int *mode, int *isdot) { union dinode *dip; struct direct *dp; ufs2_daddr_t blk; uint8_t *block; ufs_lbn_t lbn; int blksize; int frags; int dpoff; int doff; *isdot = 0; dip = ino_read(parent); *mode = DIP(dip, di_mode); if ((*mode & IFMT) != IFDIR) { if (debug) { /* * This can happen if the parent inode * was reallocated. */ if (*mode != 0) printf("Directory %ju has bad mode %o\n", (uintmax_t)parent, *mode); else printf("Directory %ju has zero mode\n", (uintmax_t)parent); } return (0); } lbn = lblkno(fs, diroff); doff = blkoff(fs, diroff); blksize = sblksize(fs, DIP(dip, di_size), lbn); if (diroff + DIRECTSIZ(1) > DIP(dip, di_size) || doff >= blksize) { if (debug) printf("ino %ju absent from %ju due to offset %jd" " exceeding size %jd\n", (uintmax_t)child, (uintmax_t)parent, diroff, DIP(dip, di_size)); return (0); } blk = ino_blkatoff(dip, parent, lbn, &frags); if (blk <= 0) { if (debug) printf("Sparse directory %ju", (uintmax_t)parent); return (0); } block = dblk_read(blk, blksize); /* * Walk through the records from the start of the block to be * certain we hit a valid record and not some junk in the middle * of a file name. Stop when we reach or pass the expected offset. */ - dpoff = (doff / DIRBLKSIZ) * DIRBLKSIZ; + dpoff = rounddown(doff, DIRBLKSIZ); do { dp = (struct direct *)&block[dpoff]; if (dpoff == doff) break; if (dp->d_reclen == 0) break; dpoff += dp->d_reclen; } while (dpoff <= doff); if (dpoff > fs->fs_bsize) err_suj("Corrupt directory block in dir ino %ju\n", (uintmax_t)parent); /* Not found. */ if (dpoff != doff) { if (debug) printf("ino %ju not found in %ju, lbn %jd, dpoff %d\n", (uintmax_t)child, (uintmax_t)parent, lbn, dpoff); return (0); } /* * We found the item in question. Record the mode and whether it's * a . or .. link for the caller. */ if (dp->d_ino == child) { if (child == parent) *isdot = 1; else if (dp->d_namlen == 2 && dp->d_name[0] == '.' && dp->d_name[1] == '.') *isdot = 1; *mode = DTTOIF(dp->d_type); return (1); } if (debug) printf("ino %ju doesn't match dirent ino %ju in parent %ju\n", (uintmax_t)child, (uintmax_t)dp->d_ino, (uintmax_t)parent); return (0); } #define VISIT_INDIR 0x0001 #define VISIT_EXT 0x0002 #define VISIT_ROOT 0x0004 /* Operation came via root & valid pointers. */ /* * Read an indirect level which may or may not be linked into an inode. */ static void indir_visit(ino_t ino, ufs_lbn_t lbn, ufs2_daddr_t blk, uint64_t *frags, ino_visitor visitor, int flags) { ufs2_daddr_t *bap2; ufs1_daddr_t *bap1; ufs_lbn_t lbnadd; ufs2_daddr_t nblk; ufs_lbn_t nlbn; int level; int i; /* * Don't visit indirect blocks with contents we can't trust. This * should only happen when indir_visit() is called to complete a * truncate that never finished and not when a pointer is found via * an inode. */ if (blk == 0) return; level = lbn_level(lbn); if (level == -1) err_suj("Invalid level for lbn %jd\n", lbn); if ((flags & VISIT_ROOT) == 0 && blk_isindir(blk, ino, lbn) == 0) { if (debug) printf("blk %jd ino %ju lbn %jd(%d) is not indir.\n", blk, (uintmax_t)ino, lbn, level); goto out; } lbnadd = 1; for (i = level; i > 0; i--) lbnadd *= NINDIR(fs); bap1 = (void *)dblk_read(blk, fs->fs_bsize); bap2 = (void *)bap1; for (i = 0; i < NINDIR(fs); i++) { if (fs->fs_magic == FS_UFS1_MAGIC) nblk = *bap1++; else nblk = *bap2++; if (nblk == 0) continue; if (level == 0) { nlbn = -lbn + i * lbnadd; (*frags) += fs->fs_frag; visitor(ino, nlbn, nblk, fs->fs_frag); } else { nlbn = (lbn + 1) - (i * lbnadd); indir_visit(ino, nlbn, nblk, frags, visitor, flags); } } out: if (flags & VISIT_INDIR) { (*frags) += fs->fs_frag; visitor(ino, lbn, blk, fs->fs_frag); } } /* * Visit each block in an inode as specified by 'flags' and call a * callback function. The callback may inspect or free blocks. The * count of frags found according to the size in the file is returned. * This is not valid for sparse files but may be used to determine * the correct di_blocks for a file. */ static uint64_t ino_visit(union dinode *ip, ino_t ino, ino_visitor visitor, int flags) { ufs_lbn_t nextlbn; ufs_lbn_t tmpval; ufs_lbn_t lbn; uint64_t size; uint64_t fragcnt; int mode; int frags; int i; size = DIP(ip, di_size); mode = DIP(ip, di_mode) & IFMT; fragcnt = 0; if ((flags & VISIT_EXT) && fs->fs_magic == FS_UFS2_MAGIC && ip->dp2.di_extsize) { for (i = 0; i < NXADDR; i++) { if (ip->dp2.di_extb[i] == 0) continue; frags = sblksize(fs, ip->dp2.di_extsize, i); frags = numfrags(fs, frags); fragcnt += frags; visitor(ino, -1 - i, ip->dp2.di_extb[i], frags); } } /* Skip datablocks for short links and devices. */ if (mode == IFBLK || mode == IFCHR || (mode == IFLNK && size < fs->fs_maxsymlinklen)) return (fragcnt); for (i = 0; i < NDADDR; i++) { if (DIP(ip, di_db[i]) == 0) continue; frags = sblksize(fs, size, i); frags = numfrags(fs, frags); fragcnt += frags; visitor(ino, i, DIP(ip, di_db[i]), frags); } /* * We know the following indirects are real as we're following * real pointers to them. */ flags |= VISIT_ROOT; for (i = 0, tmpval = NINDIR(fs), lbn = NDADDR; i < NIADDR; i++, lbn = nextlbn) { nextlbn = lbn + tmpval; tmpval *= NINDIR(fs); if (DIP(ip, di_ib[i]) == 0) continue; indir_visit(ino, -lbn - i, DIP(ip, di_ib[i]), &fragcnt, visitor, flags); } return (fragcnt); } /* * Null visitor function used when we just want to count blocks and * record the lbn. */ ufs_lbn_t visitlbn; static void null_visit(ino_t ino, ufs_lbn_t lbn, ufs2_daddr_t blk, int frags) { if (lbn > 0) visitlbn = lbn; } /* * Recalculate di_blocks when we discover that a block allocation or * free was not successfully completed. The kernel does not roll this back * because it would be too expensive to compute which indirects were * reachable at the time the inode was written. */ static void ino_adjblks(struct suj_ino *sino) { union dinode *ip; uint64_t blocks; uint64_t frags; off_t isize; off_t size; ino_t ino; ino = sino->si_ino; ip = ino_read(ino); /* No need to adjust zero'd inodes. */ if (DIP(ip, di_mode) == 0) return; /* * Visit all blocks and count them as well as recording the last * valid lbn in the file. If the file size doesn't agree with the * last lbn we need to truncate to fix it. Otherwise just adjust * the blocks count. */ visitlbn = 0; frags = ino_visit(ip, ino, null_visit, VISIT_INDIR | VISIT_EXT); blocks = fsbtodb(fs, frags); /* * We assume the size and direct block list is kept coherent by * softdep. For files that have extended into indirects we truncate * to the size in the inode or the maximum size permitted by * populated indirects. */ if (visitlbn >= NDADDR) { isize = DIP(ip, di_size); size = lblktosize(fs, visitlbn + 1); if (isize > size) isize = size; /* Always truncate to free any unpopulated indirects. */ ino_trunc(sino->si_ino, isize); return; } if (blocks == DIP(ip, di_blocks)) return; if (debug) printf("ino %ju adjusting block count from %jd to %jd\n", (uintmax_t)ino, DIP(ip, di_blocks), blocks); DIP_SET(ip, di_blocks, blocks); ino_dirty(ino); } static void blk_free_visit(ino_t ino, ufs_lbn_t lbn, ufs2_daddr_t blk, int frags) { blk_free(blk, blk_freemask(blk, ino, lbn, frags), frags); } /* * Free a block or tree of blocks that was previously rooted in ino at * the given lbn. If the lbn is an indirect all children are freed * recursively. */ static void blk_free_lbn(ufs2_daddr_t blk, ino_t ino, ufs_lbn_t lbn, int frags, int follow) { uint64_t resid; int mask; mask = blk_freemask(blk, ino, lbn, frags); resid = 0; if (lbn <= -NDADDR && follow && mask == 0) indir_visit(ino, lbn, blk, &resid, blk_free_visit, VISIT_INDIR); else blk_free(blk, mask, frags); } static void ino_setskip(struct suj_ino *sino, ino_t parent) { int isdot; int mode; if (ino_isat(sino->si_ino, DOTDOT_OFFSET, parent, &mode, &isdot)) sino->si_skipparent = 1; } static void ino_remref(ino_t parent, ino_t child, uint64_t diroff, int isdotdot) { struct suj_ino *sino; struct suj_rec *srec; struct jrefrec *rrec; /* * Lookup this inode to see if we have a record for it. */ sino = ino_lookup(child, 0); /* * Tell any child directories we've already removed their * parent link cnt. Don't try to adjust our link down again. */ if (sino != NULL && isdotdot == 0) ino_setskip(sino, parent); /* * No valid record for this inode. Just drop the on-disk * link by one. */ if (sino == NULL || sino->si_hasrecs == 0) { ino_decr(child); return; } /* * Use ino_adjust() if ino_check() has already processed this * child. If we lose the last non-dot reference to a * directory it will be discarded. */ if (sino->si_linkadj) { sino->si_nlink--; if (isdotdot) sino->si_dotlinks--; ino_adjust(sino); return; } /* * If we haven't yet processed this inode we need to make * sure we will successfully discover the lost path. If not * use nlinkadj to remember. */ TAILQ_FOREACH(srec, &sino->si_recs, sr_next) { rrec = (struct jrefrec *)srec->sr_rec; if (rrec->jr_parent == parent && rrec->jr_diroff == diroff) return; } sino->si_nlinkadj++; } /* * Free the children of a directory when the directory is discarded. */ static void ino_free_children(ino_t ino, ufs_lbn_t lbn, ufs2_daddr_t blk, int frags) { struct suj_ino *sino; struct direct *dp; off_t diroff; uint8_t *block; int skipparent; int isdotdot; int dpoff; int size; sino = ino_lookup(ino, 0); if (sino) skipparent = sino->si_skipparent; else skipparent = 0; size = lfragtosize(fs, frags); block = dblk_read(blk, size); dp = (struct direct *)&block[0]; for (dpoff = 0; dpoff < size && dp->d_reclen; dpoff += dp->d_reclen) { dp = (struct direct *)&block[dpoff]; if (dp->d_ino == 0 || dp->d_ino == WINO) continue; if (dp->d_namlen == 1 && dp->d_name[0] == '.') continue; isdotdot = dp->d_namlen == 2 && dp->d_name[0] == '.' && dp->d_name[1] == '.'; if (isdotdot && skipparent == 1) continue; if (debug) printf("Directory %ju removing ino %ju name %s\n", (uintmax_t)ino, (uintmax_t)dp->d_ino, dp->d_name); diroff = lblktosize(fs, lbn) + dpoff; ino_remref(ino, dp->d_ino, diroff, isdotdot); } } /* * Reclaim an inode, freeing all blocks and decrementing all children's * link counts. Free the inode back to the cg. */ static void ino_reclaim(union dinode *ip, ino_t ino, int mode) { uint32_t gen; if (ino == ROOTINO) err_suj("Attempting to free ROOTINO\n"); if (debug) printf("Truncating and freeing ino %ju, nlink %d, mode %o\n", (uintmax_t)ino, DIP(ip, di_nlink), DIP(ip, di_mode)); /* We are freeing an inode or directory. */ if ((DIP(ip, di_mode) & IFMT) == IFDIR) ino_visit(ip, ino, ino_free_children, 0); DIP_SET(ip, di_nlink, 0); ino_visit(ip, ino, blk_free_visit, VISIT_EXT | VISIT_INDIR); /* Here we have to clear the inode and release any blocks it holds. */ gen = DIP(ip, di_gen); if (fs->fs_magic == FS_UFS1_MAGIC) bzero(ip, sizeof(struct ufs1_dinode)); else bzero(ip, sizeof(struct ufs2_dinode)); DIP_SET(ip, di_gen, gen); ino_dirty(ino); ino_free(ino, mode); return; } /* * Adjust an inode's link count down by one when a directory goes away. */ static void ino_decr(ino_t ino) { union dinode *ip; int reqlink; int nlink; int mode; ip = ino_read(ino); nlink = DIP(ip, di_nlink); mode = DIP(ip, di_mode); if (nlink < 1) err_suj("Inode %d link count %d invalid\n", ino, nlink); if (mode == 0) err_suj("Inode %d has a link of %d with 0 mode\n", ino, nlink); nlink--; if ((mode & IFMT) == IFDIR) reqlink = 2; else reqlink = 1; if (nlink < reqlink) { if (debug) printf("ino %ju not enough links to live %d < %d\n", (uintmax_t)ino, nlink, reqlink); ino_reclaim(ip, ino, mode); return; } DIP_SET(ip, di_nlink, nlink); ino_dirty(ino); } /* * Adjust the inode link count to 'nlink'. If the count reaches zero * free it. */ static void ino_adjust(struct suj_ino *sino) { struct jrefrec *rrec; struct suj_rec *srec; struct suj_ino *stmp; union dinode *ip; nlink_t nlink; int recmode; int reqlink; int isdot; int mode; ino_t ino; nlink = sino->si_nlink; ino = sino->si_ino; mode = sino->si_mode & IFMT; /* * If it's a directory with no dot links, it was truncated before * the name was cleared. We need to clear the dirent that * points at it. */ if (mode == IFDIR && nlink == 1 && sino->si_dotlinks == 0) { sino->si_nlink = nlink = 0; TAILQ_FOREACH(srec, &sino->si_recs, sr_next) { rrec = (struct jrefrec *)srec->sr_rec; if (ino_isat(rrec->jr_parent, rrec->jr_diroff, ino, &recmode, &isdot) == 0) continue; ino_clrat(rrec->jr_parent, rrec->jr_diroff, ino); break; } if (srec == NULL) errx(1, "Directory %ju name not found", (uintmax_t)ino); } /* * If it's a directory with no real names pointing to it go ahead * and truncate it. This will free any children. */ if (mode == IFDIR && nlink - sino->si_dotlinks == 0) { sino->si_nlink = nlink = 0; /* * Mark any .. links so they know not to free this inode * when they are removed. */ TAILQ_FOREACH(srec, &sino->si_recs, sr_next) { rrec = (struct jrefrec *)srec->sr_rec; if (rrec->jr_diroff == DOTDOT_OFFSET) { stmp = ino_lookup(rrec->jr_parent, 0); if (stmp) ino_setskip(stmp, ino); } } } ip = ino_read(ino); mode = DIP(ip, di_mode) & IFMT; if (nlink > LINK_MAX) err_suj("ino %ju nlink manipulation error, new %d, old %d\n", (uintmax_t)ino, nlink, DIP(ip, di_nlink)); if (debug) printf("Adjusting ino %ju, nlink %d, old link %d lastmode %o\n", (uintmax_t)ino, nlink, DIP(ip, di_nlink), sino->si_mode); if (mode == 0) { if (debug) printf("ino %ju, zero inode freeing bitmap\n", (uintmax_t)ino); ino_free(ino, sino->si_mode); return; } /* XXX Should be an assert? */ if (mode != sino->si_mode && debug) printf("ino %ju, mode %o != %o\n", (uintmax_t)ino, mode, sino->si_mode); if ((mode & IFMT) == IFDIR) reqlink = 2; else reqlink = 1; /* If the inode doesn't have enough links to live, free it. */ if (nlink < reqlink) { if (debug) printf("ino %ju not enough links to live %d < %d\n", (uintmax_t)ino, nlink, reqlink); ino_reclaim(ip, ino, mode); return; } /* If required write the updated link count. */ if (DIP(ip, di_nlink) == nlink) { if (debug) printf("ino %ju, link matches, skipping.\n", (uintmax_t)ino); return; } DIP_SET(ip, di_nlink, nlink); ino_dirty(ino); } /* * Truncate some or all blocks in an indirect, freeing any that are required * and zeroing the indirect. */ static void indir_trunc(ino_t ino, ufs_lbn_t lbn, ufs2_daddr_t blk, ufs_lbn_t lastlbn) { ufs2_daddr_t *bap2; ufs1_daddr_t *bap1; ufs_lbn_t lbnadd; ufs2_daddr_t nblk; ufs_lbn_t next; ufs_lbn_t nlbn; int dirty; int level; int i; if (blk == 0) return; dirty = 0; level = lbn_level(lbn); if (level == -1) err_suj("Invalid level for lbn %jd\n", lbn); lbnadd = 1; for (i = level; i > 0; i--) lbnadd *= NINDIR(fs); bap1 = (void *)dblk_read(blk, fs->fs_bsize); bap2 = (void *)bap1; for (i = 0; i < NINDIR(fs); i++) { if (fs->fs_magic == FS_UFS1_MAGIC) nblk = *bap1++; else nblk = *bap2++; if (nblk == 0) continue; if (level != 0) { nlbn = (lbn + 1) - (i * lbnadd); /* * Calculate the lbn of the next indirect to * determine if any of this indirect must be * reclaimed. */ next = -(lbn + level) + ((i+1) * lbnadd); if (next <= lastlbn) continue; indir_trunc(ino, nlbn, nblk, lastlbn); /* If all of this indirect was reclaimed, free it. */ nlbn = next - lbnadd; if (nlbn < lastlbn) continue; } else { nlbn = -lbn + i * lbnadd; if (nlbn < lastlbn) continue; } dirty = 1; blk_free(nblk, 0, fs->fs_frag); if (fs->fs_magic == FS_UFS1_MAGIC) *(bap1 - 1) = 0; else *(bap2 - 1) = 0; } if (dirty) dblk_dirty(blk); } /* * Truncate an inode to the minimum of the given size or the last populated * block after any over size have been discarded. The kernel would allocate * the last block in the file but fsck does not and neither do we. This * code never extends files, only shrinks them. */ static void ino_trunc(ino_t ino, off_t size) { union dinode *ip; ufs2_daddr_t bn; uint64_t totalfrags; ufs_lbn_t nextlbn; ufs_lbn_t lastlbn; ufs_lbn_t tmpval; ufs_lbn_t lbn; ufs_lbn_t i; int frags; off_t cursize; off_t off; int mode; ip = ino_read(ino); mode = DIP(ip, di_mode) & IFMT; cursize = DIP(ip, di_size); if (debug) printf("Truncating ino %ju, mode %o to size %jd from size %jd\n", (uintmax_t)ino, mode, size, cursize); /* Skip datablocks for short links and devices. */ if (mode == 0 || mode == IFBLK || mode == IFCHR || (mode == IFLNK && cursize < fs->fs_maxsymlinklen)) return; /* Don't extend. */ if (size > cursize) size = cursize; lastlbn = lblkno(fs, blkroundup(fs, size)); for (i = lastlbn; i < NDADDR; i++) { if (DIP(ip, di_db[i]) == 0) continue; frags = sblksize(fs, cursize, i); frags = numfrags(fs, frags); blk_free(DIP(ip, di_db[i]), 0, frags); DIP_SET(ip, di_db[i], 0); } /* * Follow indirect blocks, freeing anything required. */ for (i = 0, tmpval = NINDIR(fs), lbn = NDADDR; i < NIADDR; i++, lbn = nextlbn) { nextlbn = lbn + tmpval; tmpval *= NINDIR(fs); /* If we're not freeing any in this indirect range skip it. */ if (lastlbn >= nextlbn) continue; if (DIP(ip, di_ib[i]) == 0) continue; indir_trunc(ino, -lbn - i, DIP(ip, di_ib[i]), lastlbn); /* If we freed everything in this indirect free the indir. */ if (lastlbn > lbn) continue; blk_free(DIP(ip, di_ib[i]), 0, frags); DIP_SET(ip, di_ib[i], 0); } ino_dirty(ino); /* * Now that we've freed any whole blocks that exceed the desired * truncation size, figure out how many blocks remain and what the * last populated lbn is. We will set the size to this last lbn * rather than worrying about allocating the final lbn as the kernel * would've done. This is consistent with normal fsck behavior. */ visitlbn = 0; totalfrags = ino_visit(ip, ino, null_visit, VISIT_INDIR | VISIT_EXT); if (size > lblktosize(fs, visitlbn + 1)) size = lblktosize(fs, visitlbn + 1); /* * If we're truncating direct blocks we have to adjust frags * accordingly. */ if (visitlbn < NDADDR && totalfrags) { long oldspace, newspace; bn = DIP(ip, di_db[visitlbn]); if (bn == 0) err_suj("Bad blk at ino %ju lbn %jd\n", (uintmax_t)ino, visitlbn); oldspace = sblksize(fs, cursize, visitlbn); newspace = sblksize(fs, size, visitlbn); if (oldspace != newspace) { bn += numfrags(fs, newspace); frags = numfrags(fs, oldspace - newspace); blk_free(bn, 0, frags); totalfrags -= frags; } } DIP_SET(ip, di_blocks, fsbtodb(fs, totalfrags)); DIP_SET(ip, di_size, size); /* * If we've truncated into the middle of a block or frag we have * to zero it here. Otherwise the file could extend into * uninitialized space later. */ off = blkoff(fs, size); if (off && DIP(ip, di_mode) != IFDIR) { uint8_t *buf; long clrsize; bn = ino_blkatoff(ip, ino, visitlbn, &frags); if (bn == 0) err_suj("Block missing from ino %ju at lbn %jd\n", (uintmax_t)ino, visitlbn); clrsize = frags * fs->fs_fsize; buf = dblk_read(bn, clrsize); clrsize -= off; buf += off; bzero(buf, clrsize); dblk_dirty(bn); } return; } /* * Process records available for one inode and determine whether the * link count is correct or needs adjusting. */ static void ino_check(struct suj_ino *sino) { struct suj_rec *srec; struct jrefrec *rrec; nlink_t dotlinks; int newlinks; int removes; int nlink; ino_t ino; int isdot; int isat; int mode; if (sino->si_hasrecs == 0) return; ino = sino->si_ino; rrec = (struct jrefrec *)TAILQ_FIRST(&sino->si_recs)->sr_rec; nlink = rrec->jr_nlink; newlinks = 0; dotlinks = 0; removes = sino->si_nlinkadj; TAILQ_FOREACH(srec, &sino->si_recs, sr_next) { rrec = (struct jrefrec *)srec->sr_rec; isat = ino_isat(rrec->jr_parent, rrec->jr_diroff, rrec->jr_ino, &mode, &isdot); if (isat && (mode & IFMT) != (rrec->jr_mode & IFMT)) err_suj("Inode mode/directory type mismatch %o != %o\n", mode, rrec->jr_mode); if (debug) printf("jrefrec: op %d ino %ju, nlink %d, parent %d, " "diroff %jd, mode %o, isat %d, isdot %d\n", rrec->jr_op, (uintmax_t)rrec->jr_ino, rrec->jr_nlink, rrec->jr_parent, rrec->jr_diroff, rrec->jr_mode, isat, isdot); mode = rrec->jr_mode & IFMT; if (rrec->jr_op == JOP_REMREF) removes++; newlinks += isat; if (isdot) dotlinks += isat; } /* * The number of links that remain are the starting link count * subtracted by the total number of removes with the total * links discovered back in. An incomplete remove thus * makes no change to the link count but an add increases * by one. */ if (debug) printf("ino %ju nlink %d newlinks %d removes %d dotlinks %d\n", (uintmax_t)ino, nlink, newlinks, removes, dotlinks); nlink += newlinks; nlink -= removes; sino->si_linkadj = 1; sino->si_nlink = nlink; sino->si_dotlinks = dotlinks; sino->si_mode = mode; ino_adjust(sino); } /* * Process records available for one block and determine whether it is * still allocated and whether the owning inode needs to be updated or * a free completed. */ static void blk_check(struct suj_blk *sblk) { struct suj_rec *srec; struct jblkrec *brec; struct suj_ino *sino; ufs2_daddr_t blk; int mask; int frags; int isat; /* * Each suj_blk actually contains records for any fragments in that * block. As a result we must evaluate each record individually. */ sino = NULL; TAILQ_FOREACH(srec, &sblk->sb_recs, sr_next) { brec = (struct jblkrec *)srec->sr_rec; frags = brec->jb_frags; blk = brec->jb_blkno + brec->jb_oldfrags; isat = blk_isat(brec->jb_ino, brec->jb_lbn, blk, &frags); if (sino == NULL || sino->si_ino != brec->jb_ino) { sino = ino_lookup(brec->jb_ino, 1); sino->si_blkadj = 1; } if (debug) printf("op %d blk %jd ino %ju lbn %jd frags %d isat %d (%d)\n", brec->jb_op, blk, (uintmax_t)brec->jb_ino, brec->jb_lbn, brec->jb_frags, isat, frags); /* * If we found the block at this address we still have to * determine if we need to free the tail end that was * added by adding contiguous fragments from the same block. */ if (isat == 1) { if (frags == brec->jb_frags) continue; mask = blk_freemask(blk, brec->jb_ino, brec->jb_lbn, brec->jb_frags); mask >>= frags; blk += frags; frags = brec->jb_frags - frags; blk_free(blk, mask, frags); continue; } /* * The block wasn't found, attempt to free it. It won't be * freed if it was actually reallocated. If this was an * allocation we don't want to follow indirects as they * may not be written yet. Any children of the indirect will * have their own records. If it's a free we need to * recursively free children. */ blk_free_lbn(blk, brec->jb_ino, brec->jb_lbn, brec->jb_frags, brec->jb_op == JOP_FREEBLK); } } /* * Walk the list of inode records for this cg and resolve moved and duplicate * inode references now that we have a complete picture. */ static void cg_build(struct suj_cg *sc) { struct suj_ino *sino; int i; for (i = 0; i < SUJ_HASHSIZE; i++) LIST_FOREACH(sino, &sc->sc_inohash[i], si_next) ino_build(sino); } /* * Handle inodes requiring truncation. This must be done prior to * looking up any inodes in directories. */ static void cg_trunc(struct suj_cg *sc) { struct suj_ino *sino; int i; for (i = 0; i < SUJ_HASHSIZE; i++) { LIST_FOREACH(sino, &sc->sc_inohash[i], si_next) { if (sino->si_trunc) { ino_trunc(sino->si_ino, sino->si_trunc->jt_size); sino->si_blkadj = 0; sino->si_trunc = NULL; } if (sino->si_blkadj) ino_adjblks(sino); } } } static void cg_adj_blk(struct suj_cg *sc) { struct suj_ino *sino; int i; for (i = 0; i < SUJ_HASHSIZE; i++) { LIST_FOREACH(sino, &sc->sc_inohash[i], si_next) { if (sino->si_blkadj) ino_adjblks(sino); } } } /* * Free any partially allocated blocks and then resolve inode block * counts. */ static void cg_check_blk(struct suj_cg *sc) { struct suj_blk *sblk; int i; for (i = 0; i < SUJ_HASHSIZE; i++) LIST_FOREACH(sblk, &sc->sc_blkhash[i], sb_next) blk_check(sblk); } /* * Walk the list of inode records for this cg, recovering any * changes which were not complete at the time of crash. */ static void cg_check_ino(struct suj_cg *sc) { struct suj_ino *sino; int i; for (i = 0; i < SUJ_HASHSIZE; i++) LIST_FOREACH(sino, &sc->sc_inohash[i], si_next) ino_check(sino); } /* * Write a potentially dirty cg. Recalculate the summary information and * update the superblock summary. */ static void cg_write(struct suj_cg *sc) { ufs1_daddr_t fragno, cgbno, maxbno; u_int8_t *blksfree; struct cg *cgp; int blk; int i; if (sc->sc_dirty == 0) return; /* * Fix the frag and cluster summary. */ cgp = sc->sc_cgp; cgp->cg_cs.cs_nbfree = 0; cgp->cg_cs.cs_nffree = 0; bzero(&cgp->cg_frsum, sizeof(cgp->cg_frsum)); maxbno = fragstoblks(fs, fs->fs_fpg); if (fs->fs_contigsumsize > 0) { for (i = 1; i <= fs->fs_contigsumsize; i++) cg_clustersum(cgp)[i] = 0; bzero(cg_clustersfree(cgp), howmany(maxbno, CHAR_BIT)); } blksfree = cg_blksfree(cgp); for (cgbno = 0; cgbno < maxbno; cgbno++) { if (ffs_isfreeblock(fs, blksfree, cgbno)) continue; if (ffs_isblock(fs, blksfree, cgbno)) { ffs_clusteracct(fs, cgp, cgbno, 1); cgp->cg_cs.cs_nbfree++; continue; } fragno = blkstofrags(fs, cgbno); blk = blkmap(fs, blksfree, fragno); ffs_fragacct(fs, blk, cgp->cg_frsum, 1); for (i = 0; i < fs->fs_frag; i++) if (isset(blksfree, fragno + i)) cgp->cg_cs.cs_nffree++; } /* * Update the superblock cg summary from our now correct values * before writing the block. */ fs->fs_cs(fs, sc->sc_cgx) = cgp->cg_cs; if (bwrite(disk, fsbtodb(fs, cgtod(fs, sc->sc_cgx)), sc->sc_cgbuf, fs->fs_bsize) == -1) err_suj("Unable to write cylinder group %d\n", sc->sc_cgx); } /* * Write out any modified inodes. */ static void cg_write_inos(struct suj_cg *sc) { struct ino_blk *iblk; int i; for (i = 0; i < SUJ_HASHSIZE; i++) LIST_FOREACH(iblk, &sc->sc_iblkhash[i], ib_next) if (iblk->ib_dirty) iblk_write(iblk); } static void cg_apply(void (*apply)(struct suj_cg *)) { struct suj_cg *scg; int i; for (i = 0; i < SUJ_HASHSIZE; i++) LIST_FOREACH(scg, &cghash[i], sc_next) apply(scg); } /* * Process the unlinked but referenced file list. Freeing all inodes. */ static void ino_unlinked(void) { union dinode *ip; uint16_t mode; ino_t inon; ino_t ino; ino = fs->fs_sujfree; fs->fs_sujfree = 0; while (ino != 0) { ip = ino_read(ino); mode = DIP(ip, di_mode) & IFMT; inon = DIP(ip, di_freelink); DIP_SET(ip, di_freelink, 0); /* * XXX Should this be an errx? */ if (DIP(ip, di_nlink) == 0) { if (debug) printf("Freeing unlinked ino %ju mode %o\n", (uintmax_t)ino, mode); ino_reclaim(ip, ino, mode); } else if (debug) printf("Skipping ino %ju mode %o with link %d\n", (uintmax_t)ino, mode, DIP(ip, di_nlink)); ino = inon; } } /* * Append a new record to the list of records requiring processing. */ static void ino_append(union jrec *rec) { struct jrefrec *refrec; struct jmvrec *mvrec; struct suj_ino *sino; struct suj_rec *srec; mvrec = &rec->rec_jmvrec; refrec = &rec->rec_jrefrec; if (debug && mvrec->jm_op == JOP_MVREF) printf("ino move: ino %d, parent %d, diroff %jd, oldoff %jd\n", mvrec->jm_ino, mvrec->jm_parent, mvrec->jm_newoff, mvrec->jm_oldoff); else if (debug && (refrec->jr_op == JOP_ADDREF || refrec->jr_op == JOP_REMREF)) printf("ino ref: op %d, ino %d, nlink %d, " "parent %d, diroff %jd\n", refrec->jr_op, refrec->jr_ino, refrec->jr_nlink, refrec->jr_parent, refrec->jr_diroff); sino = ino_lookup(((struct jrefrec *)rec)->jr_ino, 1); sino->si_hasrecs = 1; srec = errmalloc(sizeof(*srec)); srec->sr_rec = rec; TAILQ_INSERT_TAIL(&sino->si_newrecs, srec, sr_next); } /* * Add a reference adjustment to the sino list and eliminate dups. The * primary loop in ino_build_ref() checks for dups but new ones may be * created as a result of offset adjustments. */ static void ino_add_ref(struct suj_ino *sino, struct suj_rec *srec) { struct jrefrec *refrec; struct suj_rec *srn; struct jrefrec *rrn; refrec = (struct jrefrec *)srec->sr_rec; /* * We walk backwards so that the oldest link count is preserved. If * an add record conflicts with a remove keep the remove. Redundant * removes are eliminated in ino_build_ref. Otherwise we keep the * oldest record at a given location. */ for (srn = TAILQ_LAST(&sino->si_recs, srechd); srn; srn = TAILQ_PREV(srn, srechd, sr_next)) { rrn = (struct jrefrec *)srn->sr_rec; if (rrn->jr_parent != refrec->jr_parent || rrn->jr_diroff != refrec->jr_diroff) continue; if (rrn->jr_op == JOP_REMREF || refrec->jr_op == JOP_ADDREF) { rrn->jr_mode = refrec->jr_mode; return; } /* * Adding a remove. * * Replace the record in place with the old nlink in case * we replace the head of the list. Abandon srec as a dup. */ refrec->jr_nlink = rrn->jr_nlink; srn->sr_rec = srec->sr_rec; return; } TAILQ_INSERT_TAIL(&sino->si_recs, srec, sr_next); } /* * Create a duplicate of a reference at a previous location. */ static void ino_dup_ref(struct suj_ino *sino, struct jrefrec *refrec, off_t diroff) { struct jrefrec *rrn; struct suj_rec *srn; rrn = errmalloc(sizeof(*refrec)); *rrn = *refrec; rrn->jr_op = JOP_ADDREF; rrn->jr_diroff = diroff; srn = errmalloc(sizeof(*srn)); srn->sr_rec = (union jrec *)rrn; ino_add_ref(sino, srn); } /* * Add a reference to the list at all known locations. We follow the offset * changes for a single instance and create duplicate add refs at each so * that we can tolerate any version of the directory block. Eliminate * removes which collide with adds that are seen in the journal. They should * not adjust the link count down. */ static void ino_build_ref(struct suj_ino *sino, struct suj_rec *srec) { struct jrefrec *refrec; struct jmvrec *mvrec; struct suj_rec *srp; struct suj_rec *srn; struct jrefrec *rrn; off_t diroff; refrec = (struct jrefrec *)srec->sr_rec; /* * Search for a mvrec that matches this offset. Whether it's an add * or a remove we can delete the mvref after creating a dup record in * the old location. */ if (!TAILQ_EMPTY(&sino->si_movs)) { diroff = refrec->jr_diroff; for (srn = TAILQ_LAST(&sino->si_movs, srechd); srn; srn = srp) { srp = TAILQ_PREV(srn, srechd, sr_next); mvrec = (struct jmvrec *)srn->sr_rec; if (mvrec->jm_parent != refrec->jr_parent || mvrec->jm_newoff != diroff) continue; diroff = mvrec->jm_oldoff; TAILQ_REMOVE(&sino->si_movs, srn, sr_next); free(srn); ino_dup_ref(sino, refrec, diroff); } } /* * If a remove wasn't eliminated by an earlier add just append it to * the list. */ if (refrec->jr_op == JOP_REMREF) { ino_add_ref(sino, srec); return; } /* * Walk the list of records waiting to be added to the list. We * must check for moves that apply to our current offset and remove * them from the list. Remove any duplicates to eliminate removes * with corresponding adds. */ TAILQ_FOREACH_SAFE(srn, &sino->si_newrecs, sr_next, srp) { switch (srn->sr_rec->rec_jrefrec.jr_op) { case JOP_ADDREF: /* * This should actually be an error we should * have a remove for every add journaled. */ rrn = (struct jrefrec *)srn->sr_rec; if (rrn->jr_parent != refrec->jr_parent || rrn->jr_diroff != refrec->jr_diroff) break; TAILQ_REMOVE(&sino->si_newrecs, srn, sr_next); break; case JOP_REMREF: /* * Once we remove the current iteration of the * record at this address we're done. */ rrn = (struct jrefrec *)srn->sr_rec; if (rrn->jr_parent != refrec->jr_parent || rrn->jr_diroff != refrec->jr_diroff) break; TAILQ_REMOVE(&sino->si_newrecs, srn, sr_next); ino_add_ref(sino, srec); return; case JOP_MVREF: /* * Update our diroff based on any moves that match * and remove the move. */ mvrec = (struct jmvrec *)srn->sr_rec; if (mvrec->jm_parent != refrec->jr_parent || mvrec->jm_oldoff != refrec->jr_diroff) break; ino_dup_ref(sino, refrec, mvrec->jm_oldoff); refrec->jr_diroff = mvrec->jm_newoff; TAILQ_REMOVE(&sino->si_newrecs, srn, sr_next); break; default: err_suj("ino_build_ref: Unknown op %d\n", srn->sr_rec->rec_jrefrec.jr_op); } } ino_add_ref(sino, srec); } /* * Walk the list of new records and add them in-order resolving any * dups and adjusted offsets. */ static void ino_build(struct suj_ino *sino) { struct suj_rec *srec; while ((srec = TAILQ_FIRST(&sino->si_newrecs)) != NULL) { TAILQ_REMOVE(&sino->si_newrecs, srec, sr_next); switch (srec->sr_rec->rec_jrefrec.jr_op) { case JOP_ADDREF: case JOP_REMREF: ino_build_ref(sino, srec); break; case JOP_MVREF: /* * Add this mvrec to the queue of pending mvs. */ TAILQ_INSERT_TAIL(&sino->si_movs, srec, sr_next); break; default: err_suj("ino_build: Unknown op %d\n", srec->sr_rec->rec_jrefrec.jr_op); } } if (TAILQ_EMPTY(&sino->si_recs)) sino->si_hasrecs = 0; } /* * Modify journal records so they refer to the base block number * and a start and end frag range. This is to facilitate the discovery * of overlapping fragment allocations. */ static void blk_build(struct jblkrec *blkrec) { struct suj_rec *srec; struct suj_blk *sblk; struct jblkrec *blkrn; ufs2_daddr_t blk; int frag; if (debug) printf("blk_build: op %d blkno %jd frags %d oldfrags %d " "ino %d lbn %jd\n", blkrec->jb_op, blkrec->jb_blkno, blkrec->jb_frags, blkrec->jb_oldfrags, blkrec->jb_ino, blkrec->jb_lbn); blk = blknum(fs, blkrec->jb_blkno); frag = fragnum(fs, blkrec->jb_blkno); sblk = blk_lookup(blk, 1); /* * Rewrite the record using oldfrags to indicate the offset into * the block. Leave jb_frags as the actual allocated count. */ blkrec->jb_blkno -= frag; blkrec->jb_oldfrags = frag; if (blkrec->jb_oldfrags + blkrec->jb_frags > fs->fs_frag) err_suj("Invalid fragment count %d oldfrags %d\n", blkrec->jb_frags, frag); /* * Detect dups. If we detect a dup we always discard the oldest * record as it is superseded by the new record. This speeds up * later stages but also eliminates free records which are used * to indicate that the contents of indirects can be trusted. */ TAILQ_FOREACH(srec, &sblk->sb_recs, sr_next) { blkrn = (struct jblkrec *)srec->sr_rec; if (blkrn->jb_ino != blkrec->jb_ino || blkrn->jb_lbn != blkrec->jb_lbn || blkrn->jb_blkno != blkrec->jb_blkno || blkrn->jb_frags != blkrec->jb_frags || blkrn->jb_oldfrags != blkrec->jb_oldfrags) continue; if (debug) printf("Removed dup.\n"); /* Discard the free which is a dup with an alloc. */ if (blkrec->jb_op == JOP_FREEBLK) return; TAILQ_REMOVE(&sblk->sb_recs, srec, sr_next); free(srec); break; } srec = errmalloc(sizeof(*srec)); srec->sr_rec = (union jrec *)blkrec; TAILQ_INSERT_TAIL(&sblk->sb_recs, srec, sr_next); } static void ino_build_trunc(struct jtrncrec *rec) { struct suj_ino *sino; if (debug) printf("ino_build_trunc: op %d ino %d, size %jd\n", rec->jt_op, rec->jt_ino, rec->jt_size); sino = ino_lookup(rec->jt_ino, 1); if (rec->jt_op == JOP_SYNC) { sino->si_trunc = NULL; return; } if (sino->si_trunc == NULL || sino->si_trunc->jt_size > rec->jt_size) sino->si_trunc = rec; } /* * Build up tables of the operations we need to recover. */ static void suj_build(void) { struct suj_seg *seg; union jrec *rec; int off; int i; TAILQ_FOREACH(seg, &allsegs, ss_next) { if (debug) printf("seg %jd has %d records, oldseq %jd.\n", seg->ss_rec.jsr_seq, seg->ss_rec.jsr_cnt, seg->ss_rec.jsr_oldest); off = 0; rec = (union jrec *)seg->ss_blk; for (i = 0; i < seg->ss_rec.jsr_cnt; off += JREC_SIZE, rec++) { /* skip the segrec. */ if ((off % real_dev_bsize) == 0) continue; switch (rec->rec_jrefrec.jr_op) { case JOP_ADDREF: case JOP_REMREF: case JOP_MVREF: ino_append(rec); break; case JOP_NEWBLK: case JOP_FREEBLK: blk_build((struct jblkrec *)rec); break; case JOP_TRUNC: case JOP_SYNC: ino_build_trunc((struct jtrncrec *)rec); break; default: err_suj("Unknown journal operation %d (%d)\n", rec->rec_jrefrec.jr_op, off); } i++; } } } /* * Prune the journal segments to those we care about based on the * oldest sequence in the newest segment. Order the segment list * based on sequence number. */ static void suj_prune(void) { struct suj_seg *seg; struct suj_seg *segn; uint64_t newseq; int discard; if (debug) printf("Pruning up to %jd\n", oldseq); /* First free the expired segments. */ TAILQ_FOREACH_SAFE(seg, &allsegs, ss_next, segn) { if (seg->ss_rec.jsr_seq >= oldseq) continue; TAILQ_REMOVE(&allsegs, seg, ss_next); free(seg->ss_blk); free(seg); } /* Next ensure that segments are ordered properly. */ seg = TAILQ_FIRST(&allsegs); if (seg == NULL) { if (debug) printf("Empty journal\n"); return; } newseq = seg->ss_rec.jsr_seq; for (;;) { seg = TAILQ_LAST(&allsegs, seghd); if (seg->ss_rec.jsr_seq >= newseq) break; TAILQ_REMOVE(&allsegs, seg, ss_next); TAILQ_INSERT_HEAD(&allsegs, seg, ss_next); newseq = seg->ss_rec.jsr_seq; } if (newseq != oldseq) { TAILQ_FOREACH(seg, &allsegs, ss_next) { printf("%jd, ", seg->ss_rec.jsr_seq); } printf("\n"); err_suj("Journal file sequence mismatch %jd != %jd\n", newseq, oldseq); } /* * The kernel may asynchronously write segments which can create * gaps in the sequence space. Throw away any segments after the * gap as the kernel guarantees only those that are contiguously * reachable are marked as completed. */ discard = 0; TAILQ_FOREACH_SAFE(seg, &allsegs, ss_next, segn) { if (!discard && newseq++ == seg->ss_rec.jsr_seq) { jrecs += seg->ss_rec.jsr_cnt; jbytes += seg->ss_rec.jsr_blocks * real_dev_bsize; continue; } discard = 1; if (debug) printf("Journal order mismatch %jd != %jd pruning\n", newseq-1, seg->ss_rec.jsr_seq); TAILQ_REMOVE(&allsegs, seg, ss_next); free(seg->ss_blk); free(seg); } if (debug) printf("Processing journal segments from %jd to %jd\n", oldseq, newseq-1); } /* * Verify the journal inode before attempting to read records. */ static int suj_verifyino(union dinode *ip) { if (DIP(ip, di_nlink) != 1) { printf("Invalid link count %d for journal inode %ju\n", DIP(ip, di_nlink), (uintmax_t)sujino); return (-1); } if ((DIP(ip, di_flags) & (SF_IMMUTABLE | SF_NOUNLINK)) != (SF_IMMUTABLE | SF_NOUNLINK)) { printf("Invalid flags 0x%X for journal inode %ju\n", DIP(ip, di_flags), (uintmax_t)sujino); return (-1); } if (DIP(ip, di_mode) != (IFREG | IREAD)) { printf("Invalid mode %o for journal inode %ju\n", DIP(ip, di_mode), (uintmax_t)sujino); return (-1); } if (DIP(ip, di_size) < SUJ_MIN) { printf("Invalid size %jd for journal inode %ju\n", DIP(ip, di_size), (uintmax_t)sujino); return (-1); } if (DIP(ip, di_modrev) != fs->fs_mtime) { printf("Journal timestamp does not match fs mount time\n"); return (-1); } return (0); } struct jblocks { struct jextent *jb_extent; /* Extent array. */ int jb_avail; /* Available extents. */ int jb_used; /* Last used extent. */ int jb_head; /* Allocator head. */ int jb_off; /* Allocator extent offset. */ }; struct jextent { ufs2_daddr_t je_daddr; /* Disk block address. */ int je_blocks; /* Disk block count. */ }; static struct jblocks *suj_jblocks; static struct jblocks * jblocks_create(void) { struct jblocks *jblocks; int size; jblocks = errmalloc(sizeof(*jblocks)); jblocks->jb_avail = 10; jblocks->jb_used = 0; jblocks->jb_head = 0; jblocks->jb_off = 0; size = sizeof(struct jextent) * jblocks->jb_avail; jblocks->jb_extent = errmalloc(size); bzero(jblocks->jb_extent, size); return (jblocks); } /* * Return the next available disk block and the amount of contiguous * free space it contains. */ static ufs2_daddr_t jblocks_next(struct jblocks *jblocks, int bytes, int *actual) { struct jextent *jext; ufs2_daddr_t daddr; int freecnt; int blocks; blocks = bytes / disk->d_bsize; jext = &jblocks->jb_extent[jblocks->jb_head]; freecnt = jext->je_blocks - jblocks->jb_off; if (freecnt == 0) { jblocks->jb_off = 0; if (++jblocks->jb_head > jblocks->jb_used) return (0); jext = &jblocks->jb_extent[jblocks->jb_head]; freecnt = jext->je_blocks; } if (freecnt > blocks) freecnt = blocks; *actual = freecnt * disk->d_bsize; daddr = jext->je_daddr + jblocks->jb_off; return (daddr); } /* * Advance the allocation head by a specified number of bytes, consuming * one journal segment. */ static void jblocks_advance(struct jblocks *jblocks, int bytes) { jblocks->jb_off += bytes / disk->d_bsize; } static void jblocks_destroy(struct jblocks *jblocks) { free(jblocks->jb_extent); free(jblocks); } static void jblocks_add(struct jblocks *jblocks, ufs2_daddr_t daddr, int blocks) { struct jextent *jext; int size; jext = &jblocks->jb_extent[jblocks->jb_used]; /* Adding the first block. */ if (jext->je_daddr == 0) { jext->je_daddr = daddr; jext->je_blocks = blocks; return; } /* Extending the last extent. */ if (jext->je_daddr + jext->je_blocks == daddr) { jext->je_blocks += blocks; return; } /* Adding a new extent. */ if (++jblocks->jb_used == jblocks->jb_avail) { jblocks->jb_avail *= 2; size = sizeof(struct jextent) * jblocks->jb_avail; jext = errmalloc(size); bzero(jext, size); bcopy(jblocks->jb_extent, jext, sizeof(struct jextent) * jblocks->jb_used); free(jblocks->jb_extent); jblocks->jb_extent = jext; } jext = &jblocks->jb_extent[jblocks->jb_used]; jext->je_daddr = daddr; jext->je_blocks = blocks; return; } /* * Add a file block from the journal to the extent map. We can't read * each file block individually because the kernel treats it as a circular * buffer and segments may span mutliple contiguous blocks. */ static void suj_add_block(ino_t ino, ufs_lbn_t lbn, ufs2_daddr_t blk, int frags) { jblocks_add(suj_jblocks, fsbtodb(fs, blk), fsbtodb(fs, frags)); } static void suj_read(void) { uint8_t block[1 * 1024 * 1024]; struct suj_seg *seg; struct jsegrec *recn; struct jsegrec *rec; ufs2_daddr_t blk; int readsize; int blocks; int recsize; int size; int i; /* * Read records until we exhaust the journal space. If we find * an invalid record we start searching for a valid segment header * at the next block. This is because we don't have a head/tail * pointer and must recover the information indirectly. At the gap * between the head and tail we won't necessarily have a valid * segment. */ restart: for (;;) { size = sizeof(block); blk = jblocks_next(suj_jblocks, size, &readsize); if (blk == 0) return; size = readsize; /* * Read 1MB at a time and scan for records within this block. */ if (bread(disk, blk, &block, size) == -1) { err_suj("Error reading journal block %jd\n", (intmax_t)blk); } for (rec = (void *)block; size; size -= recsize, rec = (struct jsegrec *)((uintptr_t)rec + recsize)) { recsize = real_dev_bsize; if (rec->jsr_time != fs->fs_mtime) { if (debug) printf("Rec time %jd != fs mtime %jd\n", rec->jsr_time, fs->fs_mtime); jblocks_advance(suj_jblocks, recsize); continue; } if (rec->jsr_cnt == 0) { if (debug) printf("Found illegal count %d\n", rec->jsr_cnt); jblocks_advance(suj_jblocks, recsize); continue; } blocks = rec->jsr_blocks; recsize = blocks * real_dev_bsize; if (recsize > size) { /* * We may just have run out of buffer, restart * the loop to re-read from this spot. */ if (size < fs->fs_bsize && size != readsize && recsize <= fs->fs_bsize) goto restart; if (debug) printf("Found invalid segsize %d > %d\n", recsize, size); recsize = real_dev_bsize; jblocks_advance(suj_jblocks, recsize); continue; } /* * Verify that all blocks in the segment are present. */ for (i = 1; i < blocks; i++) { recn = (void *)((uintptr_t)rec) + i * real_dev_bsize; if (recn->jsr_seq == rec->jsr_seq && recn->jsr_time == rec->jsr_time) continue; if (debug) printf("Incomplete record %jd (%d)\n", rec->jsr_seq, i); recsize = i * real_dev_bsize; jblocks_advance(suj_jblocks, recsize); goto restart; } seg = errmalloc(sizeof(*seg)); seg->ss_blk = errmalloc(recsize); seg->ss_rec = *rec; bcopy((void *)rec, seg->ss_blk, recsize); if (rec->jsr_oldest > oldseq) oldseq = rec->jsr_oldest; TAILQ_INSERT_TAIL(&allsegs, seg, ss_next); jblocks_advance(suj_jblocks, recsize); } } } /* * Search a directory block for the SUJ_FILE. */ static void suj_find(ino_t ino, ufs_lbn_t lbn, ufs2_daddr_t blk, int frags) { char block[MAXBSIZE]; struct direct *dp; int bytes; int off; if (sujino) return; bytes = lfragtosize(fs, frags); if (bread(disk, fsbtodb(fs, blk), block, bytes) <= 0) err_suj("Failed to read ROOTINO directory block %jd\n", blk); for (off = 0; off < bytes; off += dp->d_reclen) { dp = (struct direct *)&block[off]; if (dp->d_reclen == 0) break; if (dp->d_ino == 0) continue; if (dp->d_namlen != strlen(SUJ_FILE)) continue; if (bcmp(dp->d_name, SUJ_FILE, dp->d_namlen) != 0) continue; sujino = dp->d_ino; return; } } /* * Orchestrate the verification of a filesystem via the softupdates journal. */ int suj_check(const char *filesys) { union dinode *jip; union dinode *ip; uint64_t blocks; int retval; struct suj_seg *seg; struct suj_seg *segn; initsuj(); opendisk(filesys); /* * Set an exit point when SUJ check failed */ retval = setjmp(jmpbuf); if (retval != 0) { pwarn("UNEXPECTED SU+J INCONSISTENCY\n"); TAILQ_FOREACH_SAFE(seg, &allsegs, ss_next, segn) { TAILQ_REMOVE(&allsegs, seg, ss_next); free(seg->ss_blk); free(seg); } if (reply("FALLBACK TO FULL FSCK") == 0) { ckfini(0); exit(EEXIT); } else return (-1); } /* * Find the journal inode. */ ip = ino_read(ROOTINO); sujino = 0; ino_visit(ip, ROOTINO, suj_find, 0); if (sujino == 0) { printf("Journal inode removed. Use tunefs to re-create.\n"); sblock.fs_flags &= ~FS_SUJ; sblock.fs_sujfree = 0; return (-1); } /* * Fetch the journal inode and verify it. */ jip = ino_read(sujino); printf("** SU+J Recovering %s\n", filesys); if (suj_verifyino(jip) != 0) return (-1); /* * Build a list of journal blocks in jblocks before parsing the * available journal blocks in with suj_read(). */ printf("** Reading %jd byte journal from inode %ju.\n", DIP(jip, di_size), (uintmax_t)sujino); suj_jblocks = jblocks_create(); blocks = ino_visit(jip, sujino, suj_add_block, 0); if (blocks != numfrags(fs, DIP(jip, di_size))) { printf("Sparse journal inode %ju.\n", (uintmax_t)sujino); return (-1); } suj_read(); jblocks_destroy(suj_jblocks); suj_jblocks = NULL; if (preen || reply("RECOVER")) { printf("** Building recovery table.\n"); suj_prune(); suj_build(); cg_apply(cg_build); printf("** Resolving unreferenced inode list.\n"); ino_unlinked(); printf("** Processing journal entries.\n"); cg_apply(cg_trunc); cg_apply(cg_check_blk); cg_apply(cg_adj_blk); cg_apply(cg_check_ino); } if (preen == 0 && (jrecs > 0 || jbytes > 0) && reply("WRITE CHANGES") == 0) return (0); /* * To remain idempotent with partial truncations the free bitmaps * must be written followed by indirect blocks and lastly inode * blocks. This preserves access to the modified pointers until * they are freed. */ cg_apply(cg_write); dblk_write(); cg_apply(cg_write_inos); /* Write back superblock. */ closedisk(filesys); if (jrecs > 0 || jbytes > 0) { printf("** %jd journal records in %jd bytes for %.2f%% utilization\n", jrecs, jbytes, ((float)jrecs / (float)(jbytes / JREC_SIZE)) * 100); printf("** Freed %jd inodes (%jd dirs) %jd blocks, and %jd frags.\n", freeinos, freedir, freeblocks, freefrags); } return (0); } static void initsuj(void) { int i; for (i = 0; i < SUJ_HASHSIZE; i++) { LIST_INIT(&cghash[i]); LIST_INIT(&dbhash[i]); } lastcg = NULL; lastblk = NULL; TAILQ_INIT(&allsegs); oldseq = 0; disk = NULL; fs = NULL; sujino = 0; freefrags = 0; freeblocks = 0; freeinos = 0; freedir = 0; jbytes = 0; jrecs = 0; suj_jblocks = NULL; } Index: head/sbin/geom/class/virstor/geom_virstor.c =================================================================== --- head/sbin/geom/class/virstor/geom_virstor.c (revision 298871) +++ head/sbin/geom/class/virstor/geom_virstor.c (revision 298872) @@ -1,583 +1,583 @@ /*- * Copyright (c) 2005 Ivan Voras * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include uint32_t lib_version = G_LIB_VERSION; uint32_t version = G_VIRSTOR_VERSION; #define GVIRSTOR_CHUNK_SIZE "4M" #define GVIRSTOR_VIR_SIZE "2T" #if G_LIB_VERSION == 1 /* Support RELENG_6 */ #define G_TYPE_BOOL G_TYPE_NONE #endif /* * virstor_main gets called by the geom(8) utility */ static void virstor_main(struct gctl_req *req, unsigned flags); struct g_command class_commands[] = { { "clear", G_FLAG_VERBOSE, virstor_main, G_NULL_OPTS, "[-v] prov ..." }, { "dump", 0, virstor_main, G_NULL_OPTS, "prov ..." }, { "label", G_FLAG_VERBOSE | G_FLAG_LOADKLD, virstor_main, { { 'h', "hardcode", NULL, G_TYPE_BOOL}, { 'm', "chunk_size", GVIRSTOR_CHUNK_SIZE, G_TYPE_NUMBER}, { 's', "vir_size", GVIRSTOR_VIR_SIZE, G_TYPE_NUMBER}, G_OPT_SENTINEL }, "[-h] [-v] [-m chunk_size] [-s vir_size] name provider0 [provider1 ...]" }, { "destroy", G_FLAG_VERBOSE, NULL, { { 'f', "force", NULL, G_TYPE_BOOL}, G_OPT_SENTINEL }, "[-fv] name ..." }, { "stop", G_FLAG_VERBOSE, NULL, { { 'f', "force", NULL, G_TYPE_BOOL}, G_OPT_SENTINEL }, "[-fv] name ... (alias for \"destroy\")" }, { "add", G_FLAG_VERBOSE, NULL, { { 'h', "hardcode", NULL, G_TYPE_BOOL}, G_OPT_SENTINEL }, "[-vh] name prov [prov ...]" }, { "remove", G_FLAG_VERBOSE, NULL, G_NULL_OPTS, "[-v] name ..." }, G_CMD_SENTINEL }; static int verbose = 0; /* Helper functions' declarations */ static void virstor_clear(struct gctl_req *req); static void virstor_dump(struct gctl_req *req); static void virstor_label(struct gctl_req *req); /* Dispatcher function (no real work done here, only verbose flag recorder) */ static void virstor_main(struct gctl_req *req, unsigned flags) { const char *name; if ((flags & G_FLAG_VERBOSE) != 0) verbose = 1; name = gctl_get_ascii(req, "verb"); if (name == NULL) { gctl_error(req, "No '%s' argument.", "verb"); return; } if (strcmp(name, "label") == 0) virstor_label(req); else if (strcmp(name, "clear") == 0) virstor_clear(req); else if (strcmp(name, "dump") == 0) virstor_dump(req); else gctl_error(req, "%s: Unknown command: %s.", __func__, name); /* No CTASSERT in userland CTASSERT(VIRSTOR_MAP_BLOCK_ENTRIES*VIRSTOR_MAP_ENTRY_SIZE == MAXPHYS); */ } static void pathgen(const char *name, char *path, size_t size) { if (strncmp(name, _PATH_DEV, sizeof(_PATH_DEV) - 1) != 0) snprintf(path, size, "%s%s", _PATH_DEV, name); else strlcpy(path, name, size); } static int my_g_metadata_store(const char *name, u_char *md, size_t size) { char path[MAXPATHLEN]; unsigned sectorsize; off_t mediasize; u_char *sector; int error, fd; pathgen(name, path, sizeof(path)); sector = NULL; error = 0; fd = open(path, O_RDWR); if (fd == -1) return (errno); mediasize = g_get_mediasize(name); if (mediasize == 0) { error = errno; goto out; } sectorsize = g_get_sectorsize(name); if (sectorsize == 0) { error = errno; goto out; } assert(sectorsize >= size); sector = malloc(sectorsize); if (sector == NULL) { error = ENOMEM; goto out; } bcopy(md, sector, size); if (pwrite(fd, sector, sectorsize, mediasize - sectorsize) != (ssize_t)sectorsize) { error = errno; goto out; } out: if (sector != NULL) free(sector); close(fd); return (error); } /* * Labels a new geom Meaning: parses and checks the parameters, calculates & * writes metadata to the relevant providers so when the next round of * "tasting" comes (which will be just after the provider(s) are closed) geom * can be instantiated with the tasted metadata. */ static void virstor_label(struct gctl_req *req) { struct g_virstor_metadata md; off_t msize; unsigned char *sect; unsigned int i; size_t ssize, secsize; const char *name; char param[32]; int hardcode, nargs, error; struct virstor_map_entry *map; size_t total_chunks; /* We'll run out of memory if this needs to be bigger. */ unsigned int map_chunks; /* Chunks needed by the map (map size). */ size_t map_size; /* In bytes. */ ssize_t written; int fd; nargs = gctl_get_int(req, "nargs"); if (nargs < 2) { gctl_error(req, "Too few arguments (%d): expecting: name " "provider0 [provider1 ...]", nargs); return; } hardcode = gctl_get_int(req, "hardcode"); /* * Initialize constant parts of metadata: magic signature, version, * name. */ bzero(&md, sizeof(md)); strlcpy(md.md_magic, G_VIRSTOR_MAGIC, sizeof(md.md_magic)); md.md_version = G_VIRSTOR_VERSION; name = gctl_get_ascii(req, "arg0"); if (name == NULL) { gctl_error(req, "No 'arg%u' argument.", 0); return; } strlcpy(md.md_name, name, sizeof(md.md_name)); md.md_virsize = (off_t)gctl_get_intmax(req, "vir_size"); md.md_chunk_size = gctl_get_intmax(req, "chunk_size"); md.md_count = nargs - 1; if (md.md_virsize == 0 || md.md_chunk_size == 0) { gctl_error(req, "Virtual size and chunk size must be non-zero"); return; } if (md.md_chunk_size % MAXPHYS != 0) { /* XXX: This is not strictly needed, but it's convenient to * impose some limitations on it, so why not MAXPHYS. */ - size_t new_size = (md.md_chunk_size / MAXPHYS) * MAXPHYS; + size_t new_size = rounddown(md.md_chunk_size, MAXPHYS); if (new_size < md.md_chunk_size) new_size += MAXPHYS; fprintf(stderr, "Resizing chunk size to be a multiple of " "MAXPHYS (%d kB).\n", MAXPHYS / 1024); fprintf(stderr, "New chunk size: %zu kB\n", new_size / 1024); md.md_chunk_size = new_size; } if (md.md_virsize % md.md_chunk_size != 0) { off_t chunk_count = md.md_virsize / md.md_chunk_size; md.md_virsize = chunk_count * md.md_chunk_size; fprintf(stderr, "Resizing virtual size to be a multiple of " "chunk size.\n"); fprintf(stderr, "New virtual size: %zu MB\n", (size_t)(md.md_virsize/(1024 * 1024))); } msize = secsize = 0; for (i = 1; i < (unsigned)nargs; i++) { snprintf(param, sizeof(param), "arg%u", i); name = gctl_get_ascii(req, "%s", param); ssize = g_get_sectorsize(name); if (ssize == 0) fprintf(stderr, "%s for %s\n", strerror(errno), name); msize += g_get_mediasize(name); if (secsize == 0) secsize = ssize; else if (secsize != ssize) { gctl_error(req, "Devices need to have same sector size " "(%u on %s needs to be %u).", (u_int)ssize, name, (u_int)secsize); return; } } if (secsize == 0) { gctl_error(req, "Device not specified"); return; } if (md.md_chunk_size % secsize != 0) { fprintf(stderr, "Error: chunk size is not a multiple of sector " "size."); gctl_error(req, "Chunk size (in bytes) must be multiple of %u.", (unsigned int)secsize); return; } total_chunks = md.md_virsize / md.md_chunk_size; map_size = total_chunks * sizeof(*map); assert(md.md_virsize % md.md_chunk_size == 0); ssize = map_size % secsize; if (ssize != 0) { size_t add_chunks = (secsize - ssize) / sizeof(*map); total_chunks += add_chunks; md.md_virsize = (off_t)total_chunks * (off_t)md.md_chunk_size; map_size = total_chunks * sizeof(*map); fprintf(stderr, "Resizing virtual size to fit virstor " "structures.\n"); fprintf(stderr, "New virtual size: %ju MB (%zu new chunks)\n", (uintmax_t)(md.md_virsize / (1024 * 1024)), add_chunks); } if (verbose) printf("Total virtual chunks: %zu (%zu MB each), %ju MB total " "virtual size.\n", total_chunks, (size_t)(md.md_chunk_size / (1024 * 1024)), md.md_virsize/(1024 * 1024)); if ((off_t)md.md_virsize < msize) fprintf(stderr, "WARNING: Virtual storage size < Physical " "available storage (%ju < %ju)\n", md.md_virsize, msize); /* Clear last sector first to spoil all components if device exists. */ if (verbose) printf("Clearing metadata on"); for (i = 1; i < (unsigned)nargs; i++) { snprintf(param, sizeof(param), "arg%u", i); name = gctl_get_ascii(req, "%s", param); if (verbose) printf(" %s", name); msize = g_get_mediasize(name); ssize = g_get_sectorsize(name); if (msize == 0 || ssize == 0) { gctl_error(req, "Can't retrieve information about " "%s: %s.", name, strerror(errno)); return; } if (msize < (off_t) MAX(md.md_chunk_size*4, map_size)) gctl_error(req, "Device %s is too small", name); error = g_metadata_clear(name, NULL); if (error != 0) { gctl_error(req, "Can't clear metadata on %s: %s.", name, strerror(error)); return; } } /* Write allocation table to the first provider - this needs to be done * before metadata is written because when kernel tastes it it's too * late */ name = gctl_get_ascii(req, "arg1"); /* device with metadata */ if (verbose) printf(".\nWriting allocation table to %s...", name); /* How many chunks does the map occupy? */ map_chunks = map_size/md.md_chunk_size; if (map_size % md.md_chunk_size != 0) map_chunks++; if (verbose) { printf(" (%zu MB, %d chunks) ", map_size/(1024*1024), map_chunks); fflush(stdout); } if (strncmp(name, _PATH_DEV, sizeof(_PATH_DEV) - 1) == 0) fd = open(name, O_RDWR); else { sprintf(param, "%s%s", _PATH_DEV, name); fd = open(param, O_RDWR); } if (fd < 0) gctl_error(req, "Cannot open provider %s to write map", name); /* Do it with calloc because there might be a need to set up chunk flags * in the future */ map = calloc(total_chunks, sizeof(*map)); if (map == NULL) { gctl_error(req, "Out of memory (need %zu bytes for allocation map)", map_size); } written = pwrite(fd, map, map_size, 0); free(map); if ((size_t)written != map_size) { if (verbose) { fprintf(stderr, "\nTried to write %zu, written %zd (%s)\n", map_size, written, strerror(errno)); } gctl_error(req, "Error writing out allocation map!"); return; } close (fd); if (verbose) printf("\nStoring metadata on "); /* * ID is randomly generated, unique for a geom. This is used to * recognize all providers belonging to one geom. */ md.md_id = arc4random(); /* Ok, store metadata. */ for (i = 1; i < (unsigned)nargs; i++) { snprintf(param, sizeof(param), "arg%u", i); name = gctl_get_ascii(req, "%s", param); msize = g_get_mediasize(name); ssize = g_get_sectorsize(name); if (verbose) printf("%s ", name); /* this provider's position/type in geom */ md.no = i - 1; /* this provider's size */ md.provsize = msize; /* chunk allocation info */ md.chunk_count = md.provsize / md.md_chunk_size; if (verbose) printf("(%u chunks) ", md.chunk_count); /* Check to make sure last sector is unused */ if ((off_t)(md.chunk_count * md.md_chunk_size) > (off_t)(msize-ssize)) md.chunk_count--; md.chunk_next = 0; if (i != 1) { md.chunk_reserved = 0; md.flags = 0; } else { md.chunk_reserved = map_chunks * 2; md.flags = VIRSTOR_PROVIDER_ALLOCATED | VIRSTOR_PROVIDER_CURRENT; md.chunk_next = md.chunk_reserved; if (verbose) printf("(%u reserved) ", md.chunk_reserved); } if (!hardcode) bzero(md.provider, sizeof(md.provider)); else { /* convert "/dev/something" to "something" */ if (strncmp(name, _PATH_DEV, sizeof(_PATH_DEV) - 1) == 0) { strlcpy(md.provider, name + sizeof(_PATH_DEV) - 1, sizeof(md.provider)); } else strlcpy(md.provider, name, sizeof(md.provider)); } sect = malloc(ssize); if (sect == NULL) err(1, "Cannot allocate sector of %zu bytes", ssize); bzero(sect, ssize); virstor_metadata_encode(&md, sect); error = my_g_metadata_store(name, sect, ssize); free(sect); if (error != 0) { if (verbose) printf("\n"); fprintf(stderr, "Can't store metadata on %s: %s.\n", name, strerror(error)); gctl_error(req, "Not fully done (error storing metadata)."); return; } } #if 0 if (verbose) printf("\n"); #endif } /* Clears metadata on given provider(s) IF it's owned by us */ static void virstor_clear(struct gctl_req *req) { const char *name; char param[32]; unsigned i; int nargs, error; int fd; nargs = gctl_get_int(req, "nargs"); if (nargs < 1) { gctl_error(req, "Too few arguments."); return; } for (i = 0; i < (unsigned)nargs; i++) { snprintf(param, sizeof(param), "arg%u", i); name = gctl_get_ascii(req, "%s", param); error = g_metadata_clear(name, G_VIRSTOR_MAGIC); if (error != 0) { fprintf(stderr, "Can't clear metadata on %s: %s " "(do I own it?)\n", name, strerror(error)); gctl_error(req, "Not fully done (can't clear metadata)."); continue; } if (strncmp(name, _PATH_DEV, sizeof(_PATH_DEV) - 1) == 0) fd = open(name, O_RDWR); else { sprintf(param, "%s%s", _PATH_DEV, name); fd = open(param, O_RDWR); } if (fd < 0) { gctl_error(req, "Cannot clear header sector for %s", name); continue; } if (verbose) printf("Metadata cleared on %s.\n", name); } } /* Print some metadata information */ static void virstor_metadata_dump(const struct g_virstor_metadata *md) { printf(" Magic string: %s\n", md->md_magic); printf(" Metadata version: %u\n", (u_int) md->md_version); printf(" Device name: %s\n", md->md_name); printf(" Device ID: %u\n", (u_int) md->md_id); printf(" Provider index: %u\n", (u_int) md->no); printf(" Active providers: %u\n", (u_int) md->md_count); printf(" Hardcoded provider: %s\n", md->provider[0] != '\0' ? md->provider : "(not hardcoded)"); printf(" Virtual size: %u MB\n", (unsigned int)(md->md_virsize/(1024 * 1024))); printf(" Chunk size: %u kB\n", md->md_chunk_size / 1024); printf(" Chunks on provider: %u\n", md->chunk_count); printf(" Chunks free: %u\n", md->chunk_count - md->chunk_next); printf(" Reserved chunks: %u\n", md->chunk_reserved); } /* Called by geom(8) via gvirstor_main() to dump metadata information */ static void virstor_dump(struct gctl_req *req) { struct g_virstor_metadata md; u_char tmpmd[512]; /* temporary buffer */ const char *name; char param[16]; int nargs, error, i; assert(sizeof(tmpmd) >= sizeof(md)); nargs = gctl_get_int(req, "nargs"); if (nargs < 1) { gctl_error(req, "Too few arguments."); return; } for (i = 0; i < nargs; i++) { snprintf(param, sizeof(param), "arg%u", i); name = gctl_get_ascii(req, "%s", param); error = g_metadata_read(name, (u_char *) & tmpmd, sizeof(tmpmd), G_VIRSTOR_MAGIC); if (error != 0) { fprintf(stderr, "Can't read metadata from %s: %s.\n", name, strerror(error)); gctl_error(req, "Not fully done (error reading metadata)."); continue; } virstor_metadata_decode((u_char *) & tmpmd, &md); printf("Metadata on %s:\n", name); virstor_metadata_dump(&md); printf("\n"); } } Index: head/sbin/recoverdisk/recoverdisk.c =================================================================== --- head/sbin/recoverdisk/recoverdisk.c (revision 298871) +++ head/sbin/recoverdisk/recoverdisk.c (revision 298872) @@ -1,322 +1,322 @@ /*- * ---------------------------------------------------------------------------- * "THE BEER-WARE LICENSE" (Revision 42): * wrote this file. As long as you retain this notice you * can do whatever you want with this stuff. If we meet some day, and you think * this stuff is worth it, you can buy me a beer in return. Poul-Henning Kamp * ---------------------------------------------------------------------------- * * $FreeBSD$ */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include static volatile sig_atomic_t aborting = 0; static size_t bigsize = 1024 * 1024; static size_t medsize; static size_t minsize = 512; struct lump { off_t start; off_t len; int state; TAILQ_ENTRY(lump) list; }; static TAILQ_HEAD(, lump) lumps = TAILQ_HEAD_INITIALIZER(lumps); static void new_lump(off_t start, off_t len, int state) { struct lump *lp; lp = malloc(sizeof *lp); if (lp == NULL) err(1, "Malloc failed"); lp->start = start; lp->len = len; lp->state = state; TAILQ_INSERT_TAIL(&lumps, lp, list); } static struct lump *lp; static char *wworklist = NULL; static char *rworklist = NULL; #define PRINT_HEADER \ printf("%13s %7s %13s %5s %13s %13s %9s\n", \ "start", "size", "block-len", "state", "done", "remaining", "% done") #define PRINT_STATUS(start, i, len, state, d, t) \ printf("\r%13jd %7zu %13jd %5d %13jd %13jd %9.5f", \ (intmax_t)start, \ i, \ (intmax_t)len, \ state, \ (intmax_t)d, \ (intmax_t)(t - d), \ 100*(double)d/(double)t) /* Save the worklist if -w was given */ static void save_worklist(void) { FILE *file; struct lump *llp; if (wworklist != NULL) { (void)fprintf(stderr, "\nSaving worklist ..."); fflush(stderr); file = fopen(wworklist, "w"); if (file == NULL) err(1, "Error opening file %s", wworklist); TAILQ_FOREACH(llp, &lumps, list) fprintf(file, "%jd %jd %d\n", (intmax_t)llp->start, (intmax_t)llp->len, llp->state); fclose(file); (void)fprintf(stderr, " done.\n"); } } /* Read the worklist if -r was given */ static off_t read_worklist(off_t t) { off_t s, l, d; int state, lines; FILE *file; (void)fprintf(stderr, "Reading worklist ..."); fflush(stderr); file = fopen(rworklist, "r"); if (file == NULL) err(1, "Error opening file %s", rworklist); lines = 0; d = t; for (;;) { ++lines; if (3 != fscanf(file, "%jd %jd %d\n", &s, &l, &state)) { if (!feof(file)) err(1, "Error parsing file %s at line %d", rworklist, lines); else break; } new_lump(s, l, state); d -= l; } (void)fprintf(stderr, " done.\n"); /* * Return the number of bytes already read * (at least not in worklist). */ return (d); } static void usage(void) { (void)fprintf(stderr, "usage: recoverdisk [-b bigsize] [-r readlist] " "[-s interval] [-w writelist] source [destination]\n"); exit(1); } static void sighandler(__unused int sig) { aborting = 1; } int main(int argc, char * const argv[]) { int ch; int fdr, fdw; off_t t, d, start, len; size_t i, j; int error, state; u_char *buf; u_int sectorsize; off_t stripesize; time_t t1, t2; struct stat sb; u_int n, snapshot = 60; while ((ch = getopt(argc, argv, "b:r:w:s:")) != -1) { switch (ch) { case 'b': bigsize = strtoul(optarg, NULL, 0); break; case 'r': rworklist = strdup(optarg); if (rworklist == NULL) err(1, "Cannot allocate enough memory"); break; case 's': snapshot = strtoul(optarg, NULL, 0); break; case 'w': wworklist = strdup(optarg); if (wworklist == NULL) err(1, "Cannot allocate enough memory"); break; default: usage(); /* NOTREACHED */ } } argc -= optind; argv += optind; if (argc < 1 || argc > 2) usage(); fdr = open(argv[0], O_RDONLY); if (fdr < 0) err(1, "Cannot open read descriptor %s", argv[0]); error = fstat(fdr, &sb); if (error < 0) err(1, "fstat failed"); if (S_ISBLK(sb.st_mode) || S_ISCHR(sb.st_mode)) { error = ioctl(fdr, DIOCGSECTORSIZE, §orsize); if (error < 0) err(1, "DIOCGSECTORSIZE failed"); error = ioctl(fdr, DIOCGSTRIPESIZE, &stripesize); if (error == 0 && stripesize > sectorsize) sectorsize = stripesize; minsize = sectorsize; - bigsize = (bigsize / sectorsize) * sectorsize; + bigsize = rounddown(bigsize, sectorsize); error = ioctl(fdr, DIOCGMEDIASIZE, &t); if (error < 0) err(1, "DIOCGMEDIASIZE failed"); } else { t = sb.st_size; } if (bigsize < minsize) bigsize = minsize; for (ch = 0; (bigsize >> ch) > minsize; ch++) continue; medsize = bigsize >> (ch / 2); - medsize = (medsize / minsize) * minsize; + medsize = rounddown(medsize, minsize); fprintf(stderr, "Bigsize = %zu, medsize = %zu, minsize = %zu\n", bigsize, medsize, minsize); buf = malloc(bigsize); if (buf == NULL) err(1, "Cannot allocate %zu bytes buffer", bigsize); if (argc > 1) { fdw = open(argv[1], O_WRONLY | O_CREAT, DEFFILEMODE); if (fdw < 0) err(1, "Cannot open write descriptor %s", argv[1]); if (ftruncate(fdw, t) < 0) err(1, "Cannot truncate output %s to %jd bytes", argv[1], (intmax_t)t); } else fdw = -1; if (rworklist != NULL) { d = read_worklist(t); } else { new_lump(0, t, 0); d = 0; } if (wworklist != NULL) signal(SIGINT, sighandler); t1 = 0; start = len = i = state = 0; PRINT_HEADER; n = 0; for (;;) { lp = TAILQ_FIRST(&lumps); if (lp == NULL) break; while (lp->len > 0 && !aborting) { /* These are only copied for printing stats */ start = lp->start; len = lp->len; state = lp->state; i = MIN(lp->len, (off_t)bigsize); if (lp->state == 1) i = MIN(lp->len, (off_t)medsize); if (lp->state > 1) i = MIN(lp->len, (off_t)minsize); time(&t2); if (t1 != t2 || lp->len < (off_t)bigsize) { PRINT_STATUS(start, i, len, state, d, t); t1 = t2; if (++n == snapshot) { save_worklist(); n = 0; } } if (i == 0) { errx(1, "BOGUS i %10jd", (intmax_t)i); } fflush(stdout); j = pread(fdr, buf, i, lp->start); if (j == i) { d += i; if (fdw >= 0) j = pwrite(fdw, buf, i, lp->start); else j = i; if (j != i) printf("\nWrite error at %jd/%zu\n", lp->start, i); lp->start += i; lp->len -= i; continue; } printf("\n%jd %zu failed (%s)\n", lp->start, i, strerror(errno)); if (errno == EINVAL) { printf("read() size too big? Try with -b 131072"); aborting = 1; } if (errno == ENXIO) aborting = 1; new_lump(lp->start, i, lp->state + 1); lp->start += i; lp->len -= i; } if (aborting) { save_worklist(); return (0); } TAILQ_REMOVE(&lumps, lp, list); free(lp); } PRINT_STATUS(start, i, len, state, d, t); save_worklist(); printf("\nCompleted\n"); return (0); }