Index: head/sbin/dumpfs/dumpfs.c =================================================================== --- head/sbin/dumpfs/dumpfs.c (revision 329050) +++ head/sbin/dumpfs/dumpfs.c (revision 329051) @@ -1,529 +1,531 @@ /* * SPDX-License-Identifier: BSD-3-Clause * * Copyright (c) 2009 Robert N. M. Watson * All rights reserved. * * This software was developed at the University of Cambridge Computer * Laboratory with support from a grant from Google, Inc. * * Copyright (c) 2002 Networks Associates Technology, Inc. * All rights reserved. * * This software was developed for the FreeBSD Project by Marshall * Kirk McKusick and Network Associates Laboratories, the Security * Research Division of Network Associates, Inc. under DARPA/SPAWAR * contract N66001-01-C-8035 ("CBOSS"), as part of the DARPA CHATS * research program. * * Copyright (c) 1983, 1992, 1993 * The Regents of the University of California. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #ifndef lint static const char copyright[] = "@(#) Copyright (c) 1983, 1992, 1993\n\ The Regents of the University of California. All rights reserved.\n"; #endif /* not lint */ #ifndef lint #if 0 static char sccsid[] = "@(#)dumpfs.c 8.5 (Berkeley) 4/29/95"; #endif static const char rcsid[] = "$FreeBSD$"; #endif /* not lint */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #define afs disk.d_fs #define acg disk.d_cg static struct uufsd disk; static int dumpfs(const char *); static int dumpfsid(void); static int dumpcg(void); static int dumpfreespace(const char *, int); static void dumpfreespacecg(int); static int marshal(const char *); static void pbits(void *, int); static void pblklist(void *, int, off_t, int); static void ufserr(const char *); static void usage(void) __dead2; int main(int argc, char *argv[]) { const char *name; int ch, dofreespace, domarshal, dolabel, eval; dofreespace = domarshal = dolabel = eval = 0; while ((ch = getopt(argc, argv, "lfm")) != -1) { switch (ch) { case 'f': dofreespace++; break; case 'm': domarshal = 1; break; case 'l': dolabel = 1; break; case '?': default: usage(); } } argc -= optind; argv += optind; if (argc < 1) usage(); if (dofreespace && domarshal) usage(); if (dofreespace > 2) usage(); while ((name = *argv++) != NULL) { if (ufs_disk_fillout(&disk, name) == -1) { ufserr(name); eval |= 1; continue; } if (dofreespace) eval |= dumpfreespace(name, dofreespace); else if (domarshal) eval |= marshal(name); else if (dolabel) eval |= dumpfsid(); else eval |= dumpfs(name); ufs_disk_close(&disk); } exit(eval); } static int dumpfsid(void) { printf("%sufsid/%08x%08x\n", _PATH_DEV, afs.fs_id[0], afs.fs_id[1]); return 0; } static int dumpfs(const char *name) { time_t fstime; int64_t fssize; int32_t fsflags; int i; switch (disk.d_ufs) { case 2: fssize = afs.fs_size; fstime = afs.fs_time; printf("magic\t%x (UFS2)\ttime\t%s", afs.fs_magic, ctime(&fstime)); printf("superblock location\t%jd\tid\t[ %08x %08x ]\n", (intmax_t)afs.fs_sblockloc, afs.fs_id[0], afs.fs_id[1]); printf("ncg\t%d\tsize\t%jd\tblocks\t%jd\n", afs.fs_ncg, (intmax_t)fssize, (intmax_t)afs.fs_dsize); break; case 1: fssize = afs.fs_old_size; fstime = afs.fs_old_time; printf("magic\t%x (UFS1)\ttime\t%s", afs.fs_magic, ctime(&fstime)); printf("id\t[ %08x %08x ]\n", afs.fs_id[0], afs.fs_id[1]); printf("ncg\t%d\tsize\t%jd\tblocks\t%jd\n", afs.fs_ncg, (intmax_t)fssize, (intmax_t)afs.fs_dsize); break; default: goto err; } printf("bsize\t%d\tshift\t%d\tmask\t0x%08x\n", afs.fs_bsize, afs.fs_bshift, afs.fs_bmask); printf("fsize\t%d\tshift\t%d\tmask\t0x%08x\n", afs.fs_fsize, afs.fs_fshift, afs.fs_fmask); printf("frag\t%d\tshift\t%d\tfsbtodb\t%d\n", afs.fs_frag, afs.fs_fragshift, afs.fs_fsbtodb); printf("minfree\t%d%%\toptim\t%s\tsymlinklen %d\n", afs.fs_minfree, afs.fs_optim == FS_OPTSPACE ? "space" : "time", afs.fs_maxsymlinklen); switch (disk.d_ufs) { case 2: printf("%s %d\tmaxbpg\t%d\tmaxcontig %d\tcontigsumsize %d\n", "maxbsize", afs.fs_maxbsize, afs.fs_maxbpg, afs.fs_maxcontig, afs.fs_contigsumsize); printf("nbfree\t%jd\tndir\t%jd\tnifree\t%jd\tnffree\t%jd\n", (intmax_t)afs.fs_cstotal.cs_nbfree, (intmax_t)afs.fs_cstotal.cs_ndir, (intmax_t)afs.fs_cstotal.cs_nifree, (intmax_t)afs.fs_cstotal.cs_nffree); printf("bpg\t%d\tfpg\t%d\tipg\t%d\tunrefs\t%jd\n", afs.fs_fpg / afs.fs_frag, afs.fs_fpg, afs.fs_ipg, (intmax_t)afs.fs_unrefs); printf("nindir\t%d\tinopb\t%d\tmaxfilesize\t%ju\n", afs.fs_nindir, afs.fs_inopb, (uintmax_t)afs.fs_maxfilesize); printf("sbsize\t%d\tcgsize\t%d\tcsaddr\t%jd\tcssize\t%d\n", afs.fs_sbsize, afs.fs_cgsize, (intmax_t)afs.fs_csaddr, afs.fs_cssize); break; case 1: printf("maxbpg\t%d\tmaxcontig %d\tcontigsumsize %d\n", afs.fs_maxbpg, afs.fs_maxcontig, afs.fs_contigsumsize); printf("nbfree\t%d\tndir\t%d\tnifree\t%d\tnffree\t%d\n", afs.fs_old_cstotal.cs_nbfree, afs.fs_old_cstotal.cs_ndir, afs.fs_old_cstotal.cs_nifree, afs.fs_old_cstotal.cs_nffree); printf("cpg\t%d\tbpg\t%d\tfpg\t%d\tipg\t%d\n", afs.fs_old_cpg, afs.fs_fpg / afs.fs_frag, afs.fs_fpg, afs.fs_ipg); printf("nindir\t%d\tinopb\t%d\tnspf\t%d\tmaxfilesize\t%ju\n", afs.fs_nindir, afs.fs_inopb, afs.fs_old_nspf, (uintmax_t)afs.fs_maxfilesize); printf("sbsize\t%d\tcgsize\t%d\tcgoffset %d\tcgmask\t0x%08x\n", afs.fs_sbsize, afs.fs_cgsize, afs.fs_old_cgoffset, afs.fs_old_cgmask); printf("csaddr\t%d\tcssize\t%d\n", afs.fs_old_csaddr, afs.fs_cssize); printf("rotdelay %dms\trps\t%d\ttrackskew %d\tinterleave %d\n", afs.fs_old_rotdelay, afs.fs_old_rps, afs.fs_old_trackskew, afs.fs_old_interleave); printf("nsect\t%d\tnpsect\t%d\tspc\t%d\n", afs.fs_old_nsect, afs.fs_old_npsect, afs.fs_old_spc); break; default: goto err; } printf("sblkno\t%d\tcblkno\t%d\tiblkno\t%d\tdblkno\t%d\n", afs.fs_sblkno, afs.fs_cblkno, afs.fs_iblkno, afs.fs_dblkno); printf("cgrotor\t%d\tfmod\t%d\tronly\t%d\tclean\t%d\n", afs.fs_cgrotor, afs.fs_fmod, afs.fs_ronly, afs.fs_clean); printf("metaspace %jd\tavgfpdir %d\tavgfilesize %d\n", afs.fs_metaspace, afs.fs_avgfpdir, afs.fs_avgfilesize); printf("flags\t"); if (afs.fs_old_flags & FS_FLAGS_UPDATED) fsflags = afs.fs_flags; else fsflags = afs.fs_old_flags; if (fsflags == 0) printf("none"); if (fsflags & FS_UNCLEAN) printf("unclean "); if (fsflags & FS_DOSOFTDEP) printf("soft-updates%s ", (fsflags & FS_SUJ) ? "+journal" : ""); if (fsflags & FS_NEEDSFSCK) - printf("needs fsck run "); + printf("needs-fsck-run "); if (fsflags & FS_INDEXDIRS) - printf("indexed directories "); + printf("indexed-directories "); if (fsflags & FS_ACLS) printf("acls "); if (fsflags & FS_MULTILABEL) printf("multilabel "); if (fsflags & FS_GJOURNAL) printf("gjournal "); if (fsflags & FS_FLAGS_UPDATED) - printf("fs_flags expanded "); + printf("fs_flags-expanded "); if (fsflags & FS_NFS4ACLS) printf("nfsv4acls "); if (fsflags & FS_TRIM) printf("trim "); - fsflags &= ~(FS_UNCLEAN | FS_DOSOFTDEP | FS_NEEDSFSCK | FS_INDEXDIRS | + fsflags &= ~(FS_UNCLEAN | FS_DOSOFTDEP | FS_NEEDSFSCK | FS_METACKHASH | FS_ACLS | FS_MULTILABEL | FS_GJOURNAL | FS_FLAGS_UPDATED | - FS_NFS4ACLS | FS_SUJ | FS_TRIM); + FS_NFS4ACLS | FS_SUJ | FS_TRIM | FS_INDEXDIRS); if (fsflags != 0) - printf("unknown flags (%#x)", fsflags); + printf("unknown-flags (%#x)", fsflags); putchar('\n'); - printf("check hashes\t"); - fsflags = afs.fs_metackhash; - if (fsflags == 0) - printf("none"); - if (fsflags & CK_SUPERBLOCK) - printf("superblock "); - if (fsflags & CK_CYLGRP) - printf("cylinder-groups "); - if (fsflags & CK_INODE) - printf("inodes "); - if (fsflags & CK_INDIR) - printf("indirect-blocks "); - if (fsflags & CK_DIR) - printf("directories "); + if (afs.fs_flags & FS_METACKHASH) { + printf("check hashes\t"); + fsflags = afs.fs_metackhash; + if (fsflags == 0) + printf("none"); + if (fsflags & CK_SUPERBLOCK) + printf("superblock "); + if (fsflags & CK_CYLGRP) + printf("cylinder-groups "); + if (fsflags & CK_INODE) + printf("inodes "); + if (fsflags & CK_INDIR) + printf("indirect-blocks "); + if (fsflags & CK_DIR) + printf("directories "); + } fsflags &= ~(CK_SUPERBLOCK | CK_CYLGRP | CK_INODE | CK_INDIR | CK_DIR); if (fsflags != 0) printf("unknown flags (%#x)", fsflags); putchar('\n'); printf("fsmnt\t%s\n", afs.fs_fsmnt); printf("volname\t%s\tswuid\t%ju\tprovidersize\t%ju\n", afs.fs_volname, (uintmax_t)afs.fs_swuid, (uintmax_t)afs.fs_providersize); printf("\ncs[].cs_(nbfree,ndir,nifree,nffree):\n\t"); afs.fs_csp = calloc(1, afs.fs_cssize); if (bread(&disk, fsbtodb(&afs, afs.fs_csaddr), afs.fs_csp, afs.fs_cssize) == -1) goto err; for (i = 0; i < afs.fs_ncg; i++) { struct csum *cs = &afs.fs_cs(&afs, i); if (i && i % 4 == 0) printf("\n\t"); printf("(%d,%d,%d,%d) ", cs->cs_nbfree, cs->cs_ndir, cs->cs_nifree, cs->cs_nffree); } printf("\n"); if (fssize % afs.fs_fpg) { if (disk.d_ufs == 1) printf("cylinders in last group %d\n", howmany(afs.fs_old_size % afs.fs_fpg, afs.fs_old_spc / afs.fs_old_nspf)); printf("blocks in last group %ld\n\n", (long)((fssize % afs.fs_fpg) / afs.fs_frag)); } while ((i = cgread(&disk)) != 0) { if (i == -1 || dumpcg()) goto err; } return (0); err: ufserr(name); return (1); } static int dumpcg(void) { time_t cgtime; off_t cur; int i, j; printf("\ncg %d:\n", disk.d_lcg); cur = fsbtodb(&afs, cgtod(&afs, disk.d_lcg)) * disk.d_bsize; switch (disk.d_ufs) { case 2: cgtime = acg.cg_time; printf("magic\t%x\ttell\t%jx\ttime\t%s", acg.cg_magic, (intmax_t)cur, ctime(&cgtime)); printf("cgx\t%d\tndblk\t%d\tniblk\t%d\tinitiblk %d\tunrefs %d\n", acg.cg_cgx, acg.cg_ndblk, acg.cg_niblk, acg.cg_initediblk, acg.cg_unrefs); break; case 1: cgtime = acg.cg_old_time; printf("magic\t%x\ttell\t%jx\ttime\t%s", acg.cg_magic, (intmax_t)cur, ctime(&cgtime)); printf("cgx\t%d\tncyl\t%d\tniblk\t%d\tndblk\t%d\n", acg.cg_cgx, acg.cg_old_ncyl, acg.cg_old_niblk, acg.cg_ndblk); break; default: break; } printf("nbfree\t%d\tndir\t%d\tnifree\t%d\tnffree\t%d\n", acg.cg_cs.cs_nbfree, acg.cg_cs.cs_ndir, acg.cg_cs.cs_nifree, acg.cg_cs.cs_nffree); printf("rotor\t%d\tirotor\t%d\tfrotor\t%d\nfrsum", acg.cg_rotor, acg.cg_irotor, acg.cg_frotor); for (i = 1, j = 0; i < afs.fs_frag; i++) { printf("\t%d", acg.cg_frsum[i]); j += i * acg.cg_frsum[i]; } printf("\nsum of frsum: %d", j); if (afs.fs_contigsumsize > 0) { for (i = 1; i < afs.fs_contigsumsize; i++) { if ((i - 1) % 8 == 0) printf("\nclusters %d-%d:", i, MIN(afs.fs_contigsumsize - 1, i + 7)); printf("\t%d", cg_clustersum(&acg)[i]); } printf("\nclusters size %d and over: %d\n", afs.fs_contigsumsize, cg_clustersum(&acg)[afs.fs_contigsumsize]); printf("clusters free:\t"); pbits(cg_clustersfree(&acg), acg.cg_nclusterblks); } else printf("\n"); printf("inodes used:\t"); pbits(cg_inosused(&acg), afs.fs_ipg); printf("blks free:\t"); pbits(cg_blksfree(&acg), afs.fs_fpg); return (0); } static int dumpfreespace(const char *name, int fflag) { int i; while ((i = cgread(&disk)) != 0) { if (i == -1) goto err; dumpfreespacecg(fflag); } return (0); err: ufserr(name); return (1); } static void dumpfreespacecg(int fflag) { pblklist(cg_blksfree(&acg), afs.fs_fpg, disk.d_lcg * afs.fs_fpg, fflag); } static int marshal(const char *name) { struct fs *fs; fs = &disk.d_fs; printf("# newfs command for %s (%s)\n", name, disk.d_name); printf("newfs "); if (fs->fs_volname[0] != '\0') printf("-L %s ", fs->fs_volname); printf("-O %d ", disk.d_ufs); if (fs->fs_flags & FS_DOSOFTDEP) printf("-U "); printf("-a %d ", fs->fs_maxcontig); printf("-b %d ", fs->fs_bsize); /* -c is dumb */ printf("-d %d ", fs->fs_maxbsize); printf("-e %d ", fs->fs_maxbpg); printf("-f %d ", fs->fs_fsize); printf("-g %d ", fs->fs_avgfilesize); printf("-h %d ", fs->fs_avgfpdir); printf("-i %jd ", fragroundup(fs, lblktosize(fs, fragstoblks(fs, fs->fs_fpg)) / fs->fs_ipg)); if (fs->fs_flags & FS_SUJ) printf("-j "); if (fs->fs_flags & FS_GJOURNAL) printf("-J "); printf("-k %jd ", fs->fs_metaspace); if (fs->fs_flags & FS_MULTILABEL) printf("-l "); printf("-m %d ", fs->fs_minfree); /* -n unimplemented */ printf("-o "); switch (fs->fs_optim) { case FS_OPTSPACE: printf("space "); break; case FS_OPTTIME: printf("time "); break; default: printf("unknown "); break; } /* -p..r unimplemented */ printf("-s %jd ", (intmax_t)fsbtodb(fs, fs->fs_size)); if (fs->fs_flags & FS_TRIM) printf("-t "); printf("%s ", disk.d_name); printf("\n"); return 0; } static void pbits(void *vp, int max) { int i; char *p; int count, j; for (count = i = 0, p = vp; i < max; i++) if (isset(p, i)) { if (count) printf(",%s", count % 6 ? " " : "\n\t"); count++; printf("%d", i); j = i; while ((i+1) __FBSDID("$FreeBSD$"); -#define IN_RTLD /* So we pickup the P_OSREL defines */ #include #include #include #include #include #include #include #include #include #include "fsck.h" static void check_maps(u_char *, u_char *, int, ufs2_daddr_t, const char *, int *, int, int, int); static void clear_blocks(ufs2_daddr_t start, ufs2_daddr_t end); void pass5(void) { int c, i, j, blk, frags, basesize, mapsize; int inomapsize, blkmapsize; struct fs *fs = &sblock; ufs2_daddr_t d, dbase, dmax, start; - int rewritecg = 0, cgckadd = 0; + int rewritecg = 0; struct csum *cs; struct csum_total cstotal; struct inodesc idesc[3]; char buf[MAXBSIZE]; struct cg *cg, *newcg = (struct cg *)buf; struct bufarea *cgbp; inoinfo(UFS_WINO)->ino_state = USTATE; memset(newcg, 0, (size_t)fs->fs_cgsize); newcg->cg_niblk = fs->fs_ipg; - if (preen == 0 && yflag == 0 && fs->fs_magic == FS_UFS2_MAGIC && - fswritefd != -1 && (fs->fs_metackhash & CK_CYLGRP) == 0 && - getosreldate() >= P_OSREL_CK_CYLGRP && - reply("ADD CYLINDER GROUP CHECKSUM PROTECTION") != 0) { + /* check to see if we are to add a cylinder group check hash */ + if ((ckhashadd & CK_CYLGRP) != 0) { fs->fs_metackhash |= CK_CYLGRP; rewritecg = 1; - cgckadd = 1; sbdirty(); } if (cvtlevel >= 3) { if (fs->fs_maxcontig < 2 && fs->fs_contigsumsize > 0) { if (preen) pwarn("DELETING CLUSTERING MAPS\n"); if (preen || reply("DELETE CLUSTERING MAPS")) { fs->fs_contigsumsize = 0; rewritecg = 1; sbdirty(); } } if (fs->fs_maxcontig > 1) { const char *doit = NULL; if (fs->fs_contigsumsize < 1) { doit = "CREAT"; } else if (fs->fs_contigsumsize < fs->fs_maxcontig && fs->fs_contigsumsize < FS_MAXCONTIG) { doit = "EXPAND"; } if (doit) { i = fs->fs_contigsumsize; fs->fs_contigsumsize = MIN(fs->fs_maxcontig, FS_MAXCONTIG); if (CGSIZE(fs) > (u_int)fs->fs_bsize) { pwarn("CANNOT %s CLUSTER MAPS\n", doit); fs->fs_contigsumsize = i; } else if (preen || reply("CREATE CLUSTER MAPS")) { if (preen) pwarn("%sING CLUSTER MAPS\n", doit); fs->fs_cgsize = fragroundup(fs, CGSIZE(fs)); rewritecg = 1; sbdirty(); } } } } basesize = &newcg->cg_space[0] - (u_char *)(&newcg->cg_firstfield); if (sblock.fs_magic == FS_UFS2_MAGIC) { newcg->cg_iusedoff = basesize; } else { /* * We reserve the space for the old rotation summary * tables for the benefit of old kernels, but do not * maintain them in modern kernels. In time, they can * go away. */ newcg->cg_old_btotoff = basesize; newcg->cg_old_boff = newcg->cg_old_btotoff + fs->fs_old_cpg * sizeof(int32_t); newcg->cg_iusedoff = newcg->cg_old_boff + fs->fs_old_cpg * fs->fs_old_nrpos * sizeof(u_int16_t); memset(&newcg->cg_space[0], 0, newcg->cg_iusedoff - basesize); } inomapsize = howmany(fs->fs_ipg, CHAR_BIT); newcg->cg_freeoff = newcg->cg_iusedoff + inomapsize; blkmapsize = howmany(fs->fs_fpg, CHAR_BIT); newcg->cg_nextfreeoff = newcg->cg_freeoff + blkmapsize; if (fs->fs_contigsumsize > 0) { newcg->cg_clustersumoff = newcg->cg_nextfreeoff - sizeof(u_int32_t); newcg->cg_clustersumoff = roundup(newcg->cg_clustersumoff, sizeof(u_int32_t)); newcg->cg_clusteroff = newcg->cg_clustersumoff + (fs->fs_contigsumsize + 1) * sizeof(u_int32_t); newcg->cg_nextfreeoff = newcg->cg_clusteroff + howmany(fragstoblks(fs, fs->fs_fpg), CHAR_BIT); } newcg->cg_magic = CG_MAGIC; mapsize = newcg->cg_nextfreeoff - newcg->cg_iusedoff; memset(&idesc[0], 0, sizeof idesc); for (i = 0; i < 3; i++) idesc[i].id_type = ADDR; memset(&cstotal, 0, sizeof(struct csum_total)); dmax = blknum(fs, fs->fs_size + fs->fs_frag - 1); for (d = fs->fs_size; d < dmax; d++) setbmap(d); for (c = 0; c < fs->fs_ncg; c++) { if (got_siginfo) { printf("%s: phase 5: cyl group %d of %d (%d%%)\n", cdevname, c, sblock.fs_ncg, c * 100 / sblock.fs_ncg); got_siginfo = 0; } if (got_sigalarm) { setproctitle("%s p5 %d%%", cdevname, c * 100 / sblock.fs_ncg); got_sigalarm = 0; } cgbp = cglookup(c); cg = cgbp->b_un.b_cg; if (!cg_chkmagic(cg)) pfatal("CG %d: BAD MAGIC NUMBER\n", c); - if ((fs->fs_metackhash & CK_CYLGRP) != 0 && cgckadd == 0) { + /* + * If we have a cylinder group check hash and are not adding + * it for the first time, verify that it is good. + */ + if ((fs->fs_metackhash & CK_CYLGRP) != 0 && + (ckhashadd & CK_CYLGRP) == 0) { uint32_t ckhash, thishash; ckhash = cg->cg_ckhash; cg->cg_ckhash = 0; thishash = calculate_crc32c(~0L, cg, fs->fs_cgsize); if (ckhash != thishash) - pwarn("CG %d: BAD CHECKSUM %#x vs %#x", c, ckhash, thishash); + pwarn("CG %d: BAD CHECK-HASH %#x vs %#x", + c, ckhash, thishash); cg->cg_ckhash = ckhash; } newcg->cg_time = cg->cg_time; newcg->cg_old_time = cg->cg_old_time; newcg->cg_unrefs = cg->cg_unrefs; newcg->cg_cgx = c; dbase = cgbase(fs, c); dmax = dbase + fs->fs_fpg; if (dmax > fs->fs_size) dmax = fs->fs_size; newcg->cg_ndblk = dmax - dbase; if (fs->fs_magic == FS_UFS1_MAGIC) { if (c == fs->fs_ncg - 1) newcg->cg_old_ncyl = howmany(newcg->cg_ndblk, fs->fs_fpg / fs->fs_old_cpg); else newcg->cg_old_ncyl = fs->fs_old_cpg; newcg->cg_old_niblk = fs->fs_ipg; newcg->cg_niblk = 0; } if (fs->fs_contigsumsize > 0) newcg->cg_nclusterblks = newcg->cg_ndblk / fs->fs_frag; newcg->cg_cs.cs_ndir = 0; newcg->cg_cs.cs_nffree = 0; newcg->cg_cs.cs_nbfree = 0; newcg->cg_cs.cs_nifree = fs->fs_ipg; if (cg->cg_rotor >= 0 && cg->cg_rotor < newcg->cg_ndblk) newcg->cg_rotor = cg->cg_rotor; else newcg->cg_rotor = 0; if (cg->cg_frotor >= 0 && cg->cg_frotor < newcg->cg_ndblk) newcg->cg_frotor = cg->cg_frotor; else newcg->cg_frotor = 0; if (cg->cg_irotor >= 0 && cg->cg_irotor < fs->fs_ipg) newcg->cg_irotor = cg->cg_irotor; else newcg->cg_irotor = 0; if (fs->fs_magic == FS_UFS1_MAGIC) { newcg->cg_initediblk = 0; } else { if ((unsigned)cg->cg_initediblk > fs->fs_ipg) newcg->cg_initediblk = fs->fs_ipg; else newcg->cg_initediblk = cg->cg_initediblk; } memset(&newcg->cg_frsum[0], 0, sizeof newcg->cg_frsum); memset(cg_inosused(newcg), 0, (size_t)(mapsize)); j = fs->fs_ipg * c; for (i = 0; i < inostathead[c].il_numalloced; j++, i++) { switch (inoinfo(j)->ino_state) { case USTATE: break; case DSTATE: case DCLEAR: case DFOUND: case DZLINK: newcg->cg_cs.cs_ndir++; /* FALLTHROUGH */ case FSTATE: case FCLEAR: case FZLINK: newcg->cg_cs.cs_nifree--; setbit(cg_inosused(newcg), i); break; default: if (j < (int)UFS_ROOTINO) break; errx(EEXIT, "BAD STATE %d FOR INODE I=%d", inoinfo(j)->ino_state, j); } } if (c == 0) for (i = 0; i < (int)UFS_ROOTINO; i++) { setbit(cg_inosused(newcg), i); newcg->cg_cs.cs_nifree--; } start = -1; for (i = 0, d = dbase; d < dmax; d += fs->fs_frag, i += fs->fs_frag) { frags = 0; for (j = 0; j < fs->fs_frag; j++) { if (testbmap(d + j)) { if ((Eflag || Zflag) && start != -1) { clear_blocks(start, d + j - 1); start = -1; } continue; } if (start == -1) start = d + j; setbit(cg_blksfree(newcg), i + j); frags++; } if (frags == fs->fs_frag) { newcg->cg_cs.cs_nbfree++; if (fs->fs_contigsumsize > 0) setbit(cg_clustersfree(newcg), i / fs->fs_frag); } else if (frags > 0) { newcg->cg_cs.cs_nffree += frags; blk = blkmap(fs, cg_blksfree(newcg), i); ffs_fragacct(fs, blk, newcg->cg_frsum, 1); } } if ((Eflag || Zflag) && start != -1) clear_blocks(start, d - 1); if (fs->fs_contigsumsize > 0) { int32_t *sump = cg_clustersum(newcg); u_char *mapp = cg_clustersfree(newcg); int map = *mapp++; int bit = 1; int run = 0; for (i = 0; i < newcg->cg_nclusterblks; i++) { if ((map & bit) != 0) { run++; } else if (run != 0) { if (run > fs->fs_contigsumsize) run = fs->fs_contigsumsize; sump[run]++; run = 0; } if ((i & (CHAR_BIT - 1)) != (CHAR_BIT - 1)) { bit <<= 1; } else { map = *mapp++; bit = 1; } } if (run != 0) { if (run > fs->fs_contigsumsize) run = fs->fs_contigsumsize; sump[run]++; } } if ((fs->fs_metackhash & CK_CYLGRP) != 0) { newcg->cg_ckhash = 0; newcg->cg_ckhash = calculate_crc32c(~0L, (void *)newcg, fs->fs_cgsize); } if (bkgrdflag != 0) { cstotal.cs_nffree += cg->cg_cs.cs_nffree; cstotal.cs_nbfree += cg->cg_cs.cs_nbfree; cstotal.cs_nifree += cg->cg_cs.cs_nifree; cstotal.cs_ndir += cg->cg_cs.cs_ndir; } else { cstotal.cs_nffree += newcg->cg_cs.cs_nffree; cstotal.cs_nbfree += newcg->cg_cs.cs_nbfree; cstotal.cs_nifree += newcg->cg_cs.cs_nifree; cstotal.cs_ndir += newcg->cg_cs.cs_ndir; } cs = &fs->fs_cs(fs, c); if (cursnapshot == 0 && memcmp(&newcg->cg_cs, cs, sizeof *cs) != 0 && dofix(&idesc[0], "FREE BLK COUNT(S) WRONG IN SUPERBLK")) { memmove(cs, &newcg->cg_cs, sizeof *cs); sbdirty(); } if (rewritecg) { memmove(cg, newcg, (size_t)fs->fs_cgsize); dirty(cgbp); continue; } if (cursnapshot == 0 && memcmp(newcg, cg, basesize) != 0 && dofix(&idesc[2], "SUMMARY INFORMATION BAD")) { memmove(cg, newcg, (size_t)basesize); dirty(cgbp); } if (bkgrdflag != 0 || usedsoftdep || debug) update_maps(cg, newcg, bkgrdflag); if (cursnapshot == 0 && memcmp(cg_inosused(newcg), cg_inosused(cg), mapsize) != 0 && dofix(&idesc[1], "BLK(S) MISSING IN BIT MAPS")) { memmove(cg_inosused(cg), cg_inosused(newcg), (size_t)mapsize); dirty(cgbp); } } if (cursnapshot == 0 && memcmp(&cstotal, &fs->fs_cstotal, sizeof cstotal) != 0 && dofix(&idesc[0], "SUMMARY BLK COUNT(S) WRONG IN SUPERBLK")) { memmove(&fs->fs_cstotal, &cstotal, sizeof cstotal); fs->fs_ronly = 0; fs->fs_fmod = 0; sbdirty(); } /* * When doing background fsck on a snapshot, figure out whether * the superblock summary is inaccurate and correct it when * necessary. */ if (cursnapshot != 0) { cmd.size = 1; cmd.value = cstotal.cs_ndir - fs->fs_cstotal.cs_ndir; if (cmd.value != 0) { if (debug) printf("adjndir by %+" PRIi64 "\n", cmd.value); if (bkgrdsumadj == 0 || sysctl(adjndir, MIBSIZE, 0, 0, &cmd, sizeof cmd) == -1) rwerror("ADJUST NUMBER OF DIRECTORIES", cmd.value); } cmd.value = cstotal.cs_nbfree - fs->fs_cstotal.cs_nbfree; if (cmd.value != 0) { if (debug) printf("adjnbfree by %+" PRIi64 "\n", cmd.value); if (bkgrdsumadj == 0 || sysctl(adjnbfree, MIBSIZE, 0, 0, &cmd, sizeof cmd) == -1) rwerror("ADJUST NUMBER OF FREE BLOCKS", cmd.value); } cmd.value = cstotal.cs_nifree - fs->fs_cstotal.cs_nifree; if (cmd.value != 0) { if (debug) printf("adjnifree by %+" PRIi64 "\n", cmd.value); if (bkgrdsumadj == 0 || sysctl(adjnifree, MIBSIZE, 0, 0, &cmd, sizeof cmd) == -1) rwerror("ADJUST NUMBER OF FREE INODES", cmd.value); } cmd.value = cstotal.cs_nffree - fs->fs_cstotal.cs_nffree; if (cmd.value != 0) { if (debug) printf("adjnffree by %+" PRIi64 "\n", cmd.value); if (bkgrdsumadj == 0 || sysctl(adjnffree, MIBSIZE, 0, 0, &cmd, sizeof cmd) == -1) rwerror("ADJUST NUMBER OF FREE FRAGS", cmd.value); } cmd.value = cstotal.cs_numclusters - fs->fs_cstotal.cs_numclusters; if (cmd.value != 0) { if (debug) printf("adjnumclusters by %+" PRIi64 "\n", cmd.value); if (bkgrdsumadj == 0 || sysctl(adjnumclusters, MIBSIZE, 0, 0, &cmd, sizeof cmd) == -1) rwerror("ADJUST NUMBER OF FREE CLUSTERS", cmd.value); } } } /* * Compare the original cylinder group inode and block bitmaps with the * updated cylinder group inode and block bitmaps. Free inodes and blocks * that have been added. Complain if any previously freed inodes blocks * are now allocated. */ void update_maps( struct cg *oldcg, /* cylinder group of claimed allocations */ struct cg *newcg, /* cylinder group of determined allocations */ int usesysctl) /* 1 => use sysctl interface to update maps */ { int inomapsize, excessdirs; struct fs *fs = &sblock; inomapsize = howmany(fs->fs_ipg, CHAR_BIT); excessdirs = oldcg->cg_cs.cs_ndir - newcg->cg_cs.cs_ndir; if (excessdirs < 0) { pfatal("LOST %d DIRECTORIES\n", -excessdirs); excessdirs = 0; } if (excessdirs > 0) check_maps(cg_inosused(newcg), cg_inosused(oldcg), inomapsize, oldcg->cg_cgx * (ufs2_daddr_t)fs->fs_ipg, "DIR", freedirs, 0, excessdirs, usesysctl); check_maps(cg_inosused(newcg), cg_inosused(oldcg), inomapsize, oldcg->cg_cgx * (ufs2_daddr_t)fs->fs_ipg, "FILE", freefiles, excessdirs, fs->fs_ipg, usesysctl); check_maps(cg_blksfree(oldcg), cg_blksfree(newcg), howmany(fs->fs_fpg, CHAR_BIT), oldcg->cg_cgx * (ufs2_daddr_t)fs->fs_fpg, "FRAG", freeblks, 0, fs->fs_fpg, usesysctl); } static void check_maps( u_char *map1, /* map of claimed allocations */ u_char *map2, /* map of determined allocations */ int mapsize, /* size of above two maps */ ufs2_daddr_t startvalue, /* resource value for first element in map */ const char *name, /* name of resource found in maps */ int *opcode, /* sysctl opcode to free resource */ int skip, /* number of entries to skip before starting to free */ int limit, /* limit on number of entries to free */ int usesysctl) /* 1 => use sysctl interface to update maps */ { # define BUFSIZE 16 char buf[BUFSIZE]; long i, j, k, l, m, size; ufs2_daddr_t n, astart, aend, ustart, uend; void (*msg)(const char *fmt, ...); if (usesysctl) msg = pfatal; else msg = pwarn; astart = ustart = aend = uend = -1; for (i = 0; i < mapsize; i++) { j = *map1++; k = *map2++; if (j == k) continue; for (m = 0, l = 1; m < CHAR_BIT; m++, l <<= 1) { if ((j & l) == (k & l)) continue; n = startvalue + i * CHAR_BIT + m; if ((j & l) != 0) { if (astart == -1) { astart = aend = n; continue; } if (aend + 1 == n) { aend = n; continue; } if (astart == aend) (*msg)("ALLOCATED %s %" PRId64 " MARKED FREE\n", name, astart); else (*msg)("%s %sS %" PRId64 "-%" PRId64 " MARKED FREE\n", "ALLOCATED", name, astart, aend); astart = aend = n; } else { if (ustart == -1) { ustart = uend = n; continue; } if (uend + 1 == n) { uend = n; continue; } size = uend - ustart + 1; if (size <= skip) { skip -= size; ustart = uend = n; continue; } if (skip > 0) { ustart += skip; size -= skip; skip = 0; } if (size > limit) size = limit; if (debug && size == 1) pwarn("%s %s %" PRId64 " MARKED USED\n", "UNALLOCATED", name, ustart); else if (debug) pwarn("%s %sS %" PRId64 "-%" PRId64 " MARKED USED\n", "UNALLOCATED", name, ustart, ustart + size - 1); if (usesysctl != 0) { cmd.value = ustart; cmd.size = size; if (sysctl(opcode, MIBSIZE, 0, 0, &cmd, sizeof cmd) == -1) { snprintf(buf, BUFSIZE, "FREE %s", name); rwerror(buf, cmd.value); } } limit -= size; if (limit <= 0) return; ustart = uend = n; } } } if (astart != -1) { if (astart == aend) (*msg)("ALLOCATED %s %" PRId64 " MARKED FREE\n", name, astart); else (*msg)("ALLOCATED %sS %" PRId64 "-%" PRId64 " MARKED FREE\n", name, astart, aend); } if (ustart != -1) { size = uend - ustart + 1; if (size <= skip) return; if (skip > 0) { ustart += skip; size -= skip; } if (size > limit) size = limit; if (debug) { if (size == 1) pwarn("UNALLOCATED %s %" PRId64 " MARKED USED\n", name, ustart); else pwarn("UNALLOCATED %sS %" PRId64 "-%" PRId64 " MARKED USED\n", name, ustart, ustart + size - 1); } if (usesysctl != 0) { cmd.value = ustart; cmd.size = size; if (sysctl(opcode, MIBSIZE, 0, 0, &cmd, sizeof cmd) == -1) { snprintf(buf, BUFSIZE, "FREE %s", name); rwerror(buf, cmd.value); } } } } static void clear_blocks(ufs2_daddr_t start, ufs2_daddr_t end) { if (debug) printf("Zero frags %jd to %jd\n", start, end); if (Zflag) blzero(fswritefd, fsbtodb(&sblock, start), lfragtosize(&sblock, end - start + 1)); if (Eflag) blerase(fswritefd, fsbtodb(&sblock, start), lfragtosize(&sblock, end - start + 1)); } Index: head/sbin/newfs/mkfs.c =================================================================== --- head/sbin/newfs/mkfs.c (revision 329050) +++ head/sbin/newfs/mkfs.c (revision 329051) @@ -1,1187 +1,1193 @@ /*- * SPDX-License-Identifier: BSD-3-Clause * * Copyright (c) 2002 Networks Associates Technology, Inc. * All rights reserved. * * This software was developed for the FreeBSD Project by Marshall * Kirk McKusick and Network Associates Laboratories, the Security * Research Division of Network Associates, Inc. under DARPA/SPAWAR * contract N66001-01-C-8035 ("CBOSS"), as part of the DARPA CHATS * research program. * * Copyright (c) 1980, 1989, 1993 * The Regents of the University of California. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #if 0 #ifndef lint static char sccsid[] = "@(#)mkfs.c 8.11 (Berkeley) 5/3/95"; #endif /* not lint */ #endif #include __FBSDID("$FreeBSD$"); #define IN_RTLD /* So we pickup the P_OSREL defines */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include "newfs.h" /* * make file system for cylinder-group style file systems */ #define UMASK 0755 #define POWEROF2(num) (((num) & ((num) - 1)) == 0) static struct csum *fscs; #define sblock disk.d_fs #define acg disk.d_cg union dinode { struct ufs1_dinode dp1; struct ufs2_dinode dp2; }; #define DIP(dp, field) \ ((sblock.fs_magic == FS_UFS1_MAGIC) ? \ (dp)->dp1.field : (dp)->dp2.field) static caddr_t iobuf; static long iobufsize; static ufs2_daddr_t alloc(int size, int mode); static int charsperline(void); static void clrblock(struct fs *, unsigned char *, int); static void fsinit(time_t); static int ilog2(int); static void initcg(int, time_t); static int isblock(struct fs *, unsigned char *, int); static void iput(union dinode *, ino_t); static int makedir(struct direct *, int); static void setblock(struct fs *, unsigned char *, int); static void wtfs(ufs2_daddr_t, int, char *); static u_int32_t newfs_random(void); void mkfs(struct partition *pp, char *fsys) { int fragsperinode, optimalfpg, origdensity, minfpg, lastminfpg; long i, j, csfrags; uint cg; time_t utime; quad_t sizepb; int width; ino_t maxinum; int minfragsperinode; /* minimum ratio of frags to inodes */ char tmpbuf[100]; /* XXX this will break in about 2,500 years */ struct fsrecovery *fsr; char *fsrbuf; union { struct fs fdummy; char cdummy[SBLOCKSIZE]; } dummy; #define fsdummy dummy.fdummy #define chdummy dummy.cdummy /* * Our blocks == sector size, and the version of UFS we are using is * specified by Oflag. */ disk.d_bsize = sectorsize; disk.d_ufs = Oflag; if (Rflag) utime = 1000000000; else time(&utime); sblock.fs_old_flags = FS_FLAGS_UPDATED; sblock.fs_flags = 0; if (Uflag) sblock.fs_flags |= FS_DOSOFTDEP; if (Lflag) strlcpy(sblock.fs_volname, volumelabel, MAXVOLLEN); if (Jflag) sblock.fs_flags |= FS_GJOURNAL; if (lflag) sblock.fs_flags |= FS_MULTILABEL; if (tflag) sblock.fs_flags |= FS_TRIM; /* * Validate the given file system size. * Verify that its last block can actually be accessed. * Convert to file system fragment sized units. */ if (fssize <= 0) { printf("preposterous size %jd\n", (intmax_t)fssize); exit(13); } wtfs(fssize - (realsectorsize / DEV_BSIZE), realsectorsize, (char *)&sblock); /* * collect and verify the file system density info */ sblock.fs_avgfilesize = avgfilesize; sblock.fs_avgfpdir = avgfilesperdir; if (sblock.fs_avgfilesize <= 0) printf("illegal expected average file size %d\n", sblock.fs_avgfilesize), exit(14); if (sblock.fs_avgfpdir <= 0) printf("illegal expected number of files per directory %d\n", sblock.fs_avgfpdir), exit(15); restart: /* * collect and verify the block and fragment sizes */ sblock.fs_bsize = bsize; sblock.fs_fsize = fsize; if (!POWEROF2(sblock.fs_bsize)) { printf("block size must be a power of 2, not %d\n", sblock.fs_bsize); exit(16); } if (!POWEROF2(sblock.fs_fsize)) { printf("fragment size must be a power of 2, not %d\n", sblock.fs_fsize); exit(17); } if (sblock.fs_fsize < sectorsize) { printf("increasing fragment size from %d to sector size (%d)\n", sblock.fs_fsize, sectorsize); sblock.fs_fsize = sectorsize; } if (sblock.fs_bsize > MAXBSIZE) { printf("decreasing block size from %d to maximum (%d)\n", sblock.fs_bsize, MAXBSIZE); sblock.fs_bsize = MAXBSIZE; } if (sblock.fs_bsize < MINBSIZE) { printf("increasing block size from %d to minimum (%d)\n", sblock.fs_bsize, MINBSIZE); sblock.fs_bsize = MINBSIZE; } if (sblock.fs_fsize > MAXBSIZE) { printf("decreasing fragment size from %d to maximum (%d)\n", sblock.fs_fsize, MAXBSIZE); sblock.fs_fsize = MAXBSIZE; } if (sblock.fs_bsize < sblock.fs_fsize) { printf("increasing block size from %d to fragment size (%d)\n", sblock.fs_bsize, sblock.fs_fsize); sblock.fs_bsize = sblock.fs_fsize; } if (sblock.fs_fsize * MAXFRAG < sblock.fs_bsize) { printf( "increasing fragment size from %d to block size / %d (%d)\n", sblock.fs_fsize, MAXFRAG, sblock.fs_bsize / MAXFRAG); sblock.fs_fsize = sblock.fs_bsize / MAXFRAG; } if (maxbsize == 0) maxbsize = bsize; if (maxbsize < bsize || !POWEROF2(maxbsize)) { sblock.fs_maxbsize = sblock.fs_bsize; printf("Extent size set to %d\n", sblock.fs_maxbsize); } else if (sblock.fs_maxbsize > FS_MAXCONTIG * sblock.fs_bsize) { sblock.fs_maxbsize = FS_MAXCONTIG * sblock.fs_bsize; printf("Extent size reduced to %d\n", sblock.fs_maxbsize); } else { sblock.fs_maxbsize = maxbsize; } /* * Maxcontig sets the default for the maximum number of blocks * that may be allocated sequentially. With file system clustering * it is possible to allocate contiguous blocks up to the maximum * transfer size permitted by the controller or buffering. */ if (maxcontig == 0) maxcontig = MAX(1, MAXPHYS / bsize); sblock.fs_maxcontig = maxcontig; if (sblock.fs_maxcontig < sblock.fs_maxbsize / sblock.fs_bsize) { sblock.fs_maxcontig = sblock.fs_maxbsize / sblock.fs_bsize; printf("Maxcontig raised to %d\n", sblock.fs_maxbsize); } if (sblock.fs_maxcontig > 1) sblock.fs_contigsumsize = MIN(sblock.fs_maxcontig,FS_MAXCONTIG); sblock.fs_bmask = ~(sblock.fs_bsize - 1); sblock.fs_fmask = ~(sblock.fs_fsize - 1); sblock.fs_qbmask = ~sblock.fs_bmask; sblock.fs_qfmask = ~sblock.fs_fmask; sblock.fs_bshift = ilog2(sblock.fs_bsize); sblock.fs_fshift = ilog2(sblock.fs_fsize); sblock.fs_frag = numfrags(&sblock, sblock.fs_bsize); sblock.fs_fragshift = ilog2(sblock.fs_frag); if (sblock.fs_frag > MAXFRAG) { printf("fragment size %d is still too small (can't happen)\n", sblock.fs_bsize / MAXFRAG); exit(21); } sblock.fs_fsbtodb = ilog2(sblock.fs_fsize / sectorsize); sblock.fs_size = fssize = dbtofsb(&sblock, fssize); sblock.fs_providersize = dbtofsb(&sblock, mediasize / sectorsize); /* * Before the filesystem is finally initialized, mark it * as incompletely initialized. */ sblock.fs_magic = FS_BAD_MAGIC; if (Oflag == 1) { sblock.fs_sblockloc = SBLOCK_UFS1; sblock.fs_sblockactualloc = SBLOCK_UFS1; sblock.fs_nindir = sblock.fs_bsize / sizeof(ufs1_daddr_t); sblock.fs_inopb = sblock.fs_bsize / sizeof(struct ufs1_dinode); sblock.fs_maxsymlinklen = ((UFS_NDADDR + UFS_NIADDR) * sizeof(ufs1_daddr_t)); sblock.fs_old_inodefmt = FS_44INODEFMT; sblock.fs_old_cgoffset = 0; sblock.fs_old_cgmask = 0xffffffff; sblock.fs_old_size = sblock.fs_size; sblock.fs_old_rotdelay = 0; sblock.fs_old_rps = 60; sblock.fs_old_nspf = sblock.fs_fsize / sectorsize; sblock.fs_old_cpg = 1; sblock.fs_old_interleave = 1; sblock.fs_old_trackskew = 0; sblock.fs_old_cpc = 0; sblock.fs_old_postblformat = 1; sblock.fs_old_nrpos = 1; } else { sblock.fs_sblockloc = SBLOCK_UFS2; sblock.fs_sblockactualloc = SBLOCK_UFS2; sblock.fs_nindir = sblock.fs_bsize / sizeof(ufs2_daddr_t); sblock.fs_inopb = sblock.fs_bsize / sizeof(struct ufs2_dinode); sblock.fs_maxsymlinklen = ((UFS_NDADDR + UFS_NIADDR) * sizeof(ufs2_daddr_t)); } sblock.fs_sblkno = roundup(howmany(sblock.fs_sblockloc + SBLOCKSIZE, sblock.fs_fsize), sblock.fs_frag); sblock.fs_cblkno = sblock.fs_sblkno + roundup(howmany(SBLOCKSIZE, sblock.fs_fsize), sblock.fs_frag); sblock.fs_iblkno = sblock.fs_cblkno + sblock.fs_frag; sblock.fs_maxfilesize = sblock.fs_bsize * UFS_NDADDR - 1; for (sizepb = sblock.fs_bsize, i = 0; i < UFS_NIADDR; i++) { sizepb *= NINDIR(&sblock); sblock.fs_maxfilesize += sizepb; } /* * It's impossible to create a snapshot in case that fs_maxfilesize * is smaller than the fssize. */ if (sblock.fs_maxfilesize < (u_quad_t)fssize) { warnx("WARNING: You will be unable to create snapshots on this " "file system. Correct by using a larger blocksize."); } /* * Calculate the number of blocks to put into each cylinder group. * * This algorithm selects the number of blocks per cylinder * group. The first goal is to have at least enough data blocks * in each cylinder group to meet the density requirement. Once * this goal is achieved we try to expand to have at least * MINCYLGRPS cylinder groups. Once this goal is achieved, we * pack as many blocks into each cylinder group map as will fit. * * We start by calculating the smallest number of blocks that we * can put into each cylinder group. If this is too big, we reduce * the density until it fits. */ maxinum = (((int64_t)(1)) << 32) - INOPB(&sblock); minfragsperinode = 1 + fssize / maxinum; if (density == 0) { density = MAX(NFPI, minfragsperinode) * fsize; } else if (density < minfragsperinode * fsize) { origdensity = density; density = minfragsperinode * fsize; fprintf(stderr, "density increased from %d to %d\n", origdensity, density); } origdensity = density; for (;;) { fragsperinode = MAX(numfrags(&sblock, density), 1); if (fragsperinode < minfragsperinode) { bsize <<= 1; fsize <<= 1; printf("Block size too small for a file system %s %d\n", "of this size. Increasing blocksize to", bsize); goto restart; } minfpg = fragsperinode * INOPB(&sblock); if (minfpg > sblock.fs_size) minfpg = sblock.fs_size; sblock.fs_ipg = INOPB(&sblock); sblock.fs_fpg = roundup(sblock.fs_iblkno + sblock.fs_ipg / INOPF(&sblock), sblock.fs_frag); if (sblock.fs_fpg < minfpg) sblock.fs_fpg = minfpg; sblock.fs_ipg = roundup(howmany(sblock.fs_fpg, fragsperinode), INOPB(&sblock)); sblock.fs_fpg = roundup(sblock.fs_iblkno + sblock.fs_ipg / INOPF(&sblock), sblock.fs_frag); if (sblock.fs_fpg < minfpg) sblock.fs_fpg = minfpg; sblock.fs_ipg = roundup(howmany(sblock.fs_fpg, fragsperinode), INOPB(&sblock)); if (CGSIZE(&sblock) < (unsigned long)sblock.fs_bsize) break; density -= sblock.fs_fsize; } if (density != origdensity) printf("density reduced from %d to %d\n", origdensity, density); /* * Start packing more blocks into the cylinder group until * it cannot grow any larger, the number of cylinder groups * drops below MINCYLGRPS, or we reach the size requested. * For UFS1 inodes per cylinder group are stored in an int16_t * so fs_ipg is limited to 2^15 - 1. */ for ( ; sblock.fs_fpg < maxblkspercg; sblock.fs_fpg += sblock.fs_frag) { sblock.fs_ipg = roundup(howmany(sblock.fs_fpg, fragsperinode), INOPB(&sblock)); if (Oflag > 1 || (Oflag == 1 && sblock.fs_ipg <= 0x7fff)) { if (sblock.fs_size / sblock.fs_fpg < MINCYLGRPS) break; if (CGSIZE(&sblock) < (unsigned long)sblock.fs_bsize) continue; if (CGSIZE(&sblock) == (unsigned long)sblock.fs_bsize) break; } sblock.fs_fpg -= sblock.fs_frag; sblock.fs_ipg = roundup(howmany(sblock.fs_fpg, fragsperinode), INOPB(&sblock)); break; } /* * Check to be sure that the last cylinder group has enough blocks * to be viable. If it is too small, reduce the number of blocks * per cylinder group which will have the effect of moving more * blocks into the last cylinder group. */ optimalfpg = sblock.fs_fpg; for (;;) { sblock.fs_ncg = howmany(sblock.fs_size, sblock.fs_fpg); lastminfpg = roundup(sblock.fs_iblkno + sblock.fs_ipg / INOPF(&sblock), sblock.fs_frag); if (sblock.fs_size < lastminfpg) { printf("Filesystem size %jd < minimum size of %d\n", (intmax_t)sblock.fs_size, lastminfpg); exit(28); } if (sblock.fs_size % sblock.fs_fpg >= lastminfpg || sblock.fs_size % sblock.fs_fpg == 0) break; sblock.fs_fpg -= sblock.fs_frag; sblock.fs_ipg = roundup(howmany(sblock.fs_fpg, fragsperinode), INOPB(&sblock)); } if (optimalfpg != sblock.fs_fpg) printf("Reduced frags per cylinder group from %d to %d %s\n", optimalfpg, sblock.fs_fpg, "to enlarge last cyl group"); sblock.fs_cgsize = fragroundup(&sblock, CGSIZE(&sblock)); sblock.fs_dblkno = sblock.fs_iblkno + sblock.fs_ipg / INOPF(&sblock); if (Oflag == 1) { sblock.fs_old_spc = sblock.fs_fpg * sblock.fs_old_nspf; sblock.fs_old_nsect = sblock.fs_old_spc; sblock.fs_old_npsect = sblock.fs_old_spc; sblock.fs_old_ncyl = sblock.fs_ncg; } /* * fill in remaining fields of the super block */ sblock.fs_csaddr = cgdmin(&sblock, 0); sblock.fs_cssize = fragroundup(&sblock, sblock.fs_ncg * sizeof(struct csum)); fscs = (struct csum *)calloc(1, sblock.fs_cssize); if (fscs == NULL) errx(31, "calloc failed"); sblock.fs_sbsize = fragroundup(&sblock, sizeof(struct fs)); if (sblock.fs_sbsize > SBLOCKSIZE) sblock.fs_sbsize = SBLOCKSIZE; if (sblock.fs_sbsize < realsectorsize) sblock.fs_sbsize = realsectorsize; sblock.fs_minfree = minfree; if (metaspace > 0 && metaspace < sblock.fs_fpg / 2) sblock.fs_metaspace = blknum(&sblock, metaspace); else if (metaspace != -1) /* reserve half of minfree for metadata blocks */ sblock.fs_metaspace = blknum(&sblock, (sblock.fs_fpg * minfree) / 200); if (maxbpg == 0) sblock.fs_maxbpg = MAXBLKPG(sblock.fs_bsize); else sblock.fs_maxbpg = maxbpg; sblock.fs_optim = opt; sblock.fs_cgrotor = 0; sblock.fs_pendingblocks = 0; sblock.fs_pendinginodes = 0; sblock.fs_fmod = 0; sblock.fs_ronly = 0; sblock.fs_state = 0; sblock.fs_clean = 1; sblock.fs_id[0] = (long)utime; sblock.fs_id[1] = newfs_random(); sblock.fs_fsmnt[0] = '\0'; csfrags = howmany(sblock.fs_cssize, sblock.fs_fsize); sblock.fs_dsize = sblock.fs_size - sblock.fs_sblkno - sblock.fs_ncg * (sblock.fs_dblkno - sblock.fs_sblkno); sblock.fs_cstotal.cs_nbfree = fragstoblks(&sblock, sblock.fs_dsize) - howmany(csfrags, sblock.fs_frag); sblock.fs_cstotal.cs_nffree = fragnum(&sblock, sblock.fs_size) + (fragnum(&sblock, csfrags) > 0 ? sblock.fs_frag - fragnum(&sblock, csfrags) : 0); sblock.fs_cstotal.cs_nifree = sblock.fs_ncg * sblock.fs_ipg - UFS_ROOTINO; sblock.fs_cstotal.cs_ndir = 0; sblock.fs_dsize -= csfrags; sblock.fs_time = utime; if (Oflag == 1) { sblock.fs_old_time = utime; sblock.fs_old_dsize = sblock.fs_dsize; sblock.fs_old_csaddr = sblock.fs_csaddr; sblock.fs_old_cstotal.cs_ndir = sblock.fs_cstotal.cs_ndir; sblock.fs_old_cstotal.cs_nbfree = sblock.fs_cstotal.cs_nbfree; sblock.fs_old_cstotal.cs_nifree = sblock.fs_cstotal.cs_nifree; sblock.fs_old_cstotal.cs_nffree = sblock.fs_cstotal.cs_nffree; } /* * Set flags for metadata that is being check-hashed. + * + * Metadata check hashes are not supported in the UFS version 1 + * filesystem to keep it as small and simple as possible. */ - if (Oflag > 1 && getosreldate() >= P_OSREL_CK_CYLGRP) - sblock.fs_metackhash = CK_CYLGRP; + if (Oflag > 1) { + sblock.fs_flags |= FS_METACKHASH; + if (getosreldate() >= P_OSREL_CK_CYLGRP) + sblock.fs_metackhash = CK_CYLGRP; + } /* * Dump out summary information about file system. */ # define B2MBFACTOR (1 / (1024.0 * 1024.0)) printf("%s: %.1fMB (%jd sectors) block size %d, fragment size %d\n", fsys, (float)sblock.fs_size * sblock.fs_fsize * B2MBFACTOR, (intmax_t)fsbtodb(&sblock, sblock.fs_size), sblock.fs_bsize, sblock.fs_fsize); printf("\tusing %d cylinder groups of %.2fMB, %d blks, %d inodes.\n", sblock.fs_ncg, (float)sblock.fs_fpg * sblock.fs_fsize * B2MBFACTOR, sblock.fs_fpg / sblock.fs_frag, sblock.fs_ipg); if (sblock.fs_flags & FS_DOSOFTDEP) printf("\twith soft updates\n"); # undef B2MBFACTOR if (Eflag && !Nflag) { printf("Erasing sectors [%jd...%jd]\n", sblock.fs_sblockloc / disk.d_bsize, fsbtodb(&sblock, sblock.fs_size) - 1); berase(&disk, sblock.fs_sblockloc / disk.d_bsize, sblock.fs_size * sblock.fs_fsize - sblock.fs_sblockloc); } /* * Wipe out old UFS1 superblock(s) if necessary. */ if (!Nflag && Oflag != 1 && realsectorsize <= SBLOCK_UFS1) { i = bread(&disk, part_ofs + SBLOCK_UFS1 / disk.d_bsize, chdummy, SBLOCKSIZE); if (i == -1) err(1, "can't read old UFS1 superblock: %s", disk.d_error); if (fsdummy.fs_magic == FS_UFS1_MAGIC) { fsdummy.fs_magic = 0; bwrite(&disk, part_ofs + SBLOCK_UFS1 / disk.d_bsize, chdummy, SBLOCKSIZE); for (cg = 0; cg < fsdummy.fs_ncg; cg++) { if (fsbtodb(&fsdummy, cgsblock(&fsdummy, cg)) > fssize) break; bwrite(&disk, part_ofs + fsbtodb(&fsdummy, cgsblock(&fsdummy, cg)), chdummy, SBLOCKSIZE); } } } if (!Nflag && sbput(disk.d_fd, &disk.d_fs, 0) != 0) err(1, "sbput: %s", disk.d_error); if (Xflag == 1) { printf("** Exiting on Xflag 1\n"); exit(0); } if (Xflag == 2) printf("** Leaving BAD MAGIC on Xflag 2\n"); else sblock.fs_magic = (Oflag != 1) ? FS_UFS2_MAGIC : FS_UFS1_MAGIC; /* * Now build the cylinders group blocks and * then print out indices of cylinder groups. */ printf("super-block backups (for fsck_ffs -b #) at:\n"); i = 0; width = charsperline(); /* * Allocate space for two sets of inode blocks. */ iobufsize = 2 * sblock.fs_bsize; if ((iobuf = calloc(1, iobufsize)) == 0) { printf("Cannot allocate I/O buffer\n"); exit(38); } /* * Write out all the cylinder groups and backup superblocks. */ for (cg = 0; cg < sblock.fs_ncg; cg++) { if (!Nflag) initcg(cg, utime); j = snprintf(tmpbuf, sizeof(tmpbuf), " %jd%s", (intmax_t)fsbtodb(&sblock, cgsblock(&sblock, cg)), cg < (sblock.fs_ncg-1) ? "," : ""); if (j < 0) tmpbuf[j = 0] = '\0'; if (i + j >= width) { printf("\n"); i = 0; } i += j; printf("%s", tmpbuf); fflush(stdout); } printf("\n"); if (Nflag) exit(0); /* * Now construct the initial file system, * then write out the super-block. */ fsinit(utime); if (Oflag == 1) { sblock.fs_old_cstotal.cs_ndir = sblock.fs_cstotal.cs_ndir; sblock.fs_old_cstotal.cs_nbfree = sblock.fs_cstotal.cs_nbfree; sblock.fs_old_cstotal.cs_nifree = sblock.fs_cstotal.cs_nifree; sblock.fs_old_cstotal.cs_nffree = sblock.fs_cstotal.cs_nffree; } if (Xflag == 3) { printf("** Exiting on Xflag 3\n"); exit(0); } /* * Reference the summary information so it will also be written. */ sblock.fs_csp = fscs; if (sbput(disk.d_fd, &disk.d_fs, 0) != 0) err(1, "sbput: %s", disk.d_error); /* * For UFS1 filesystems with a blocksize of 64K, the first * alternate superblock resides at the location used for * the default UFS2 superblock. As there is a valid * superblock at this location, the boot code will use * it as its first choice. Thus we have to ensure that * all of its statistcs on usage are correct. */ if (Oflag == 1 && sblock.fs_bsize == 65536) wtfs(fsbtodb(&sblock, cgsblock(&sblock, 0)), sblock.fs_bsize, (char *)&sblock); /* * Read the last sector of the boot block, replace the last * 20 bytes with the recovery information, then write it back. * The recovery information only works for UFS2 filesystems. */ if (sblock.fs_magic == FS_UFS2_MAGIC) { if ((fsrbuf = malloc(realsectorsize)) == NULL || bread(&disk, part_ofs + (SBLOCK_UFS2 - realsectorsize) / disk.d_bsize, fsrbuf, realsectorsize) == -1) err(1, "can't read recovery area: %s", disk.d_error); fsr = (struct fsrecovery *)&fsrbuf[realsectorsize - sizeof *fsr]; fsr->fsr_magic = sblock.fs_magic; fsr->fsr_fpg = sblock.fs_fpg; fsr->fsr_fsbtodb = sblock.fs_fsbtodb; fsr->fsr_sblkno = sblock.fs_sblkno; fsr->fsr_ncg = sblock.fs_ncg; wtfs((SBLOCK_UFS2 - realsectorsize) / disk.d_bsize, realsectorsize, fsrbuf); free(fsrbuf); } /* * Update information about this partition in pack * label, to that it may be updated on disk. */ if (pp != NULL) { pp->p_fstype = FS_BSDFFS; pp->p_fsize = sblock.fs_fsize; pp->p_frag = sblock.fs_frag; pp->p_cpg = sblock.fs_fpg; } } /* * Initialize a cylinder group. */ void initcg(int cylno, time_t utime) { long blkno, start; off_t savedactualloc; uint i, j, d, dlower, dupper; ufs2_daddr_t cbase, dmax; struct ufs1_dinode *dp1; struct ufs2_dinode *dp2; struct csum *cs; /* * Determine block bounds for cylinder group. * Allow space for super block summary information in first * cylinder group. */ cbase = cgbase(&sblock, cylno); dmax = cbase + sblock.fs_fpg; if (dmax > sblock.fs_size) dmax = sblock.fs_size; dlower = cgsblock(&sblock, cylno) - cbase; dupper = cgdmin(&sblock, cylno) - cbase; if (cylno == 0) dupper += howmany(sblock.fs_cssize, sblock.fs_fsize); cs = &fscs[cylno]; memset(&acg, 0, sblock.fs_cgsize); acg.cg_time = utime; acg.cg_magic = CG_MAGIC; acg.cg_cgx = cylno; acg.cg_niblk = sblock.fs_ipg; acg.cg_initediblk = MIN(sblock.fs_ipg, 2 * INOPB(&sblock)); acg.cg_ndblk = dmax - cbase; if (sblock.fs_contigsumsize > 0) acg.cg_nclusterblks = acg.cg_ndblk / sblock.fs_frag; start = &acg.cg_space[0] - (u_char *)(&acg.cg_firstfield); if (Oflag == 2) { acg.cg_iusedoff = start; } else { acg.cg_old_ncyl = sblock.fs_old_cpg; acg.cg_old_time = acg.cg_time; acg.cg_time = 0; acg.cg_old_niblk = acg.cg_niblk; acg.cg_niblk = 0; acg.cg_initediblk = 0; acg.cg_old_btotoff = start; acg.cg_old_boff = acg.cg_old_btotoff + sblock.fs_old_cpg * sizeof(int32_t); acg.cg_iusedoff = acg.cg_old_boff + sblock.fs_old_cpg * sizeof(u_int16_t); } acg.cg_freeoff = acg.cg_iusedoff + howmany(sblock.fs_ipg, CHAR_BIT); acg.cg_nextfreeoff = acg.cg_freeoff + howmany(sblock.fs_fpg, CHAR_BIT); if (sblock.fs_contigsumsize > 0) { acg.cg_clustersumoff = roundup(acg.cg_nextfreeoff, sizeof(u_int32_t)); acg.cg_clustersumoff -= sizeof(u_int32_t); acg.cg_clusteroff = acg.cg_clustersumoff + (sblock.fs_contigsumsize + 1) * sizeof(u_int32_t); acg.cg_nextfreeoff = acg.cg_clusteroff + howmany(fragstoblks(&sblock, sblock.fs_fpg), CHAR_BIT); } if (acg.cg_nextfreeoff > (unsigned)sblock.fs_cgsize) { printf("Panic: cylinder group too big\n"); exit(37); } acg.cg_cs.cs_nifree += sblock.fs_ipg; if (cylno == 0) for (i = 0; i < (long)UFS_ROOTINO; i++) { setbit(cg_inosused(&acg), i); acg.cg_cs.cs_nifree--; } if (cylno > 0) { /* * In cylno 0, beginning space is reserved * for boot and super blocks. */ for (d = 0; d < dlower; d += sblock.fs_frag) { blkno = d / sblock.fs_frag; setblock(&sblock, cg_blksfree(&acg), blkno); if (sblock.fs_contigsumsize > 0) setbit(cg_clustersfree(&acg), blkno); acg.cg_cs.cs_nbfree++; } } if ((i = dupper % sblock.fs_frag)) { acg.cg_frsum[sblock.fs_frag - i]++; for (d = dupper + sblock.fs_frag - i; dupper < d; dupper++) { setbit(cg_blksfree(&acg), dupper); acg.cg_cs.cs_nffree++; } } for (d = dupper; d + sblock.fs_frag <= acg.cg_ndblk; d += sblock.fs_frag) { blkno = d / sblock.fs_frag; setblock(&sblock, cg_blksfree(&acg), blkno); if (sblock.fs_contigsumsize > 0) setbit(cg_clustersfree(&acg), blkno); acg.cg_cs.cs_nbfree++; } if (d < acg.cg_ndblk) { acg.cg_frsum[acg.cg_ndblk - d]++; for (; d < acg.cg_ndblk; d++) { setbit(cg_blksfree(&acg), d); acg.cg_cs.cs_nffree++; } } if (sblock.fs_contigsumsize > 0) { int32_t *sump = cg_clustersum(&acg); u_char *mapp = cg_clustersfree(&acg); int map = *mapp++; int bit = 1; int run = 0; for (i = 0; i < acg.cg_nclusterblks; i++) { if ((map & bit) != 0) run++; else if (run != 0) { if (run > sblock.fs_contigsumsize) run = sblock.fs_contigsumsize; sump[run]++; run = 0; } if ((i & (CHAR_BIT - 1)) != CHAR_BIT - 1) bit <<= 1; else { map = *mapp++; bit = 1; } } if (run != 0) { if (run > sblock.fs_contigsumsize) run = sblock.fs_contigsumsize; sump[run]++; } } *cs = acg.cg_cs; /* * Write out the duplicate super block. Then write the cylinder * group map and two blocks worth of inodes in a single write. */ savedactualloc = sblock.fs_sblockactualloc; sblock.fs_sblockactualloc = dbtob(fsbtodb(&sblock, cgsblock(&sblock, cylno))); if (sbput(disk.d_fd, &disk.d_fs, 0) != 0) err(1, "sbput: %s", disk.d_error); sblock.fs_sblockactualloc = savedactualloc; if (cgput(&disk, &acg) != 0) err(1, "initcg: cgput: %s", disk.d_error); start = 0; dp1 = (struct ufs1_dinode *)(&iobuf[start]); dp2 = (struct ufs2_dinode *)(&iobuf[start]); for (i = 0; i < acg.cg_initediblk; i++) { if (sblock.fs_magic == FS_UFS1_MAGIC) { dp1->di_gen = newfs_random(); dp1++; } else { dp2->di_gen = newfs_random(); dp2++; } } wtfs(fsbtodb(&sblock, cgimin(&sblock, cylno)), iobufsize, iobuf); /* * For the old file system, we have to initialize all the inodes. */ if (Oflag == 1) { for (i = 2 * sblock.fs_frag; i < sblock.fs_ipg / INOPF(&sblock); i += sblock.fs_frag) { dp1 = (struct ufs1_dinode *)(&iobuf[start]); for (j = 0; j < INOPB(&sblock); j++) { dp1->di_gen = newfs_random(); dp1++; } wtfs(fsbtodb(&sblock, cgimin(&sblock, cylno) + i), sblock.fs_bsize, &iobuf[start]); } } } /* * initialize the file system */ #define ROOTLINKCNT 3 static struct direct root_dir[] = { { UFS_ROOTINO, sizeof(struct direct), DT_DIR, 1, "." }, { UFS_ROOTINO, sizeof(struct direct), DT_DIR, 2, ".." }, { UFS_ROOTINO + 1, sizeof(struct direct), DT_DIR, 5, ".snap" }, }; #define SNAPLINKCNT 2 static struct direct snap_dir[] = { { UFS_ROOTINO + 1, sizeof(struct direct), DT_DIR, 1, "." }, { UFS_ROOTINO, sizeof(struct direct), DT_DIR, 2, ".." }, }; void fsinit(time_t utime) { union dinode node; struct group *grp; gid_t gid; int entries; memset(&node, 0, sizeof node); if ((grp = getgrnam("operator")) != NULL) { gid = grp->gr_gid; } else { warnx("Cannot retrieve operator gid, using gid 0."); gid = 0; } entries = (nflag) ? ROOTLINKCNT - 1: ROOTLINKCNT; if (sblock.fs_magic == FS_UFS1_MAGIC) { /* * initialize the node */ node.dp1.di_atime = utime; node.dp1.di_mtime = utime; node.dp1.di_ctime = utime; /* * create the root directory */ node.dp1.di_mode = IFDIR | UMASK; node.dp1.di_nlink = entries; node.dp1.di_size = makedir(root_dir, entries); node.dp1.di_db[0] = alloc(sblock.fs_fsize, node.dp1.di_mode); node.dp1.di_blocks = btodb(fragroundup(&sblock, node.dp1.di_size)); wtfs(fsbtodb(&sblock, node.dp1.di_db[0]), sblock.fs_fsize, iobuf); iput(&node, UFS_ROOTINO); if (!nflag) { /* * create the .snap directory */ node.dp1.di_mode |= 020; node.dp1.di_gid = gid; node.dp1.di_nlink = SNAPLINKCNT; node.dp1.di_size = makedir(snap_dir, SNAPLINKCNT); node.dp1.di_db[0] = alloc(sblock.fs_fsize, node.dp1.di_mode); node.dp1.di_blocks = btodb(fragroundup(&sblock, node.dp1.di_size)); wtfs(fsbtodb(&sblock, node.dp1.di_db[0]), sblock.fs_fsize, iobuf); iput(&node, UFS_ROOTINO + 1); } } else { /* * initialize the node */ node.dp2.di_atime = utime; node.dp2.di_mtime = utime; node.dp2.di_ctime = utime; node.dp2.di_birthtime = utime; /* * create the root directory */ node.dp2.di_mode = IFDIR | UMASK; node.dp2.di_nlink = entries; node.dp2.di_size = makedir(root_dir, entries); node.dp2.di_db[0] = alloc(sblock.fs_fsize, node.dp2.di_mode); node.dp2.di_blocks = btodb(fragroundup(&sblock, node.dp2.di_size)); wtfs(fsbtodb(&sblock, node.dp2.di_db[0]), sblock.fs_fsize, iobuf); iput(&node, UFS_ROOTINO); if (!nflag) { /* * create the .snap directory */ node.dp2.di_mode |= 020; node.dp2.di_gid = gid; node.dp2.di_nlink = SNAPLINKCNT; node.dp2.di_size = makedir(snap_dir, SNAPLINKCNT); node.dp2.di_db[0] = alloc(sblock.fs_fsize, node.dp2.di_mode); node.dp2.di_blocks = btodb(fragroundup(&sblock, node.dp2.di_size)); wtfs(fsbtodb(&sblock, node.dp2.di_db[0]), sblock.fs_fsize, iobuf); iput(&node, UFS_ROOTINO + 1); } } } /* * construct a set of directory entries in "iobuf". * return size of directory. */ int makedir(struct direct *protodir, int entries) { char *cp; int i, spcleft; spcleft = DIRBLKSIZ; memset(iobuf, 0, DIRBLKSIZ); for (cp = iobuf, i = 0; i < entries - 1; i++) { protodir[i].d_reclen = DIRSIZ(0, &protodir[i]); memmove(cp, &protodir[i], protodir[i].d_reclen); cp += protodir[i].d_reclen; spcleft -= protodir[i].d_reclen; } protodir[i].d_reclen = spcleft; memmove(cp, &protodir[i], DIRSIZ(0, &protodir[i])); return (DIRBLKSIZ); } /* * allocate a block or frag */ ufs2_daddr_t alloc(int size, int mode) { int i, blkno, frag; uint d; bread(&disk, part_ofs + fsbtodb(&sblock, cgtod(&sblock, 0)), (char *)&acg, sblock.fs_cgsize); if (acg.cg_magic != CG_MAGIC) { printf("cg 0: bad magic number\n"); exit(38); } if (acg.cg_cs.cs_nbfree == 0) { printf("first cylinder group ran out of space\n"); exit(39); } for (d = 0; d < acg.cg_ndblk; d += sblock.fs_frag) if (isblock(&sblock, cg_blksfree(&acg), d / sblock.fs_frag)) goto goth; printf("internal error: can't find block in cyl 0\n"); exit(40); goth: blkno = fragstoblks(&sblock, d); clrblock(&sblock, cg_blksfree(&acg), blkno); if (sblock.fs_contigsumsize > 0) clrbit(cg_clustersfree(&acg), blkno); acg.cg_cs.cs_nbfree--; sblock.fs_cstotal.cs_nbfree--; fscs[0].cs_nbfree--; if (mode & IFDIR) { acg.cg_cs.cs_ndir++; sblock.fs_cstotal.cs_ndir++; fscs[0].cs_ndir++; } if (size != sblock.fs_bsize) { frag = howmany(size, sblock.fs_fsize); fscs[0].cs_nffree += sblock.fs_frag - frag; sblock.fs_cstotal.cs_nffree += sblock.fs_frag - frag; acg.cg_cs.cs_nffree += sblock.fs_frag - frag; acg.cg_frsum[sblock.fs_frag - frag]++; for (i = frag; i < sblock.fs_frag; i++) setbit(cg_blksfree(&acg), d + i); } if (cgput(&disk, &acg) != 0) err(1, "alloc: cgput: %s", disk.d_error); return ((ufs2_daddr_t)d); } /* * Allocate an inode on the disk */ void iput(union dinode *ip, ino_t ino) { ufs2_daddr_t d; bread(&disk, part_ofs + fsbtodb(&sblock, cgtod(&sblock, 0)), (char *)&acg, sblock.fs_cgsize); if (acg.cg_magic != CG_MAGIC) { printf("cg 0: bad magic number\n"); exit(31); } acg.cg_cs.cs_nifree--; setbit(cg_inosused(&acg), ino); if (cgput(&disk, &acg) != 0) err(1, "iput: cgput: %s", disk.d_error); sblock.fs_cstotal.cs_nifree--; fscs[0].cs_nifree--; if (ino >= (unsigned long)sblock.fs_ipg * sblock.fs_ncg) { printf("fsinit: inode value out of range (%ju).\n", (uintmax_t)ino); exit(32); } d = fsbtodb(&sblock, ino_to_fsba(&sblock, ino)); bread(&disk, part_ofs + d, (char *)iobuf, sblock.fs_bsize); if (sblock.fs_magic == FS_UFS1_MAGIC) ((struct ufs1_dinode *)iobuf)[ino_to_fsbo(&sblock, ino)] = ip->dp1; else ((struct ufs2_dinode *)iobuf)[ino_to_fsbo(&sblock, ino)] = ip->dp2; wtfs(d, sblock.fs_bsize, (char *)iobuf); } /* * possibly write to disk */ static void wtfs(ufs2_daddr_t bno, int size, char *bf) { if (Nflag) return; if (bwrite(&disk, part_ofs + bno, bf, size) < 0) err(36, "wtfs: %d bytes at sector %jd", size, (intmax_t)bno); } /* * check if a block is available */ static int isblock(struct fs *fs, unsigned char *cp, int h) { unsigned char mask; switch (fs->fs_frag) { case 8: return (cp[h] == 0xff); case 4: mask = 0x0f << ((h & 0x1) << 2); return ((cp[h >> 1] & mask) == mask); case 2: mask = 0x03 << ((h & 0x3) << 1); return ((cp[h >> 2] & mask) == mask); case 1: mask = 0x01 << (h & 0x7); return ((cp[h >> 3] & mask) == mask); default: fprintf(stderr, "isblock bad fs_frag %d\n", fs->fs_frag); return (0); } } /* * take a block out of the map */ static void clrblock(struct fs *fs, unsigned char *cp, int h) { switch ((fs)->fs_frag) { case 8: cp[h] = 0; return; case 4: cp[h >> 1] &= ~(0x0f << ((h & 0x1) << 2)); return; case 2: cp[h >> 2] &= ~(0x03 << ((h & 0x3) << 1)); return; case 1: cp[h >> 3] &= ~(0x01 << (h & 0x7)); return; default: fprintf(stderr, "clrblock bad fs_frag %d\n", fs->fs_frag); return; } } /* * put a block into the map */ static void setblock(struct fs *fs, unsigned char *cp, int h) { switch (fs->fs_frag) { case 8: cp[h] = 0xff; return; case 4: cp[h >> 1] |= (0x0f << ((h & 0x1) << 2)); return; case 2: cp[h >> 2] |= (0x03 << ((h & 0x3) << 1)); return; case 1: cp[h >> 3] |= (0x01 << (h & 0x7)); return; default: fprintf(stderr, "setblock bad fs_frag %d\n", fs->fs_frag); return; } } /* * Determine the number of characters in a * single line. */ static int charsperline(void) { int columns; char *cp; struct winsize ws; columns = 0; if (ioctl(0, TIOCGWINSZ, &ws) != -1) columns = ws.ws_col; if (columns == 0 && (cp = getenv("COLUMNS"))) columns = atoi(cp); if (columns == 0) columns = 80; /* last resort */ return (columns); } static int ilog2(int val) { u_int n; for (n = 0; n < sizeof(n) * CHAR_BIT; n++) if (1 << n == val) return (n); errx(1, "ilog2: %d is not a power of 2\n", val); } /* * For the regression test, return predictable random values. * Otherwise use a true random number generator. */ static u_int32_t newfs_random(void) { static int nextnum = 1; if (Rflag) return (nextnum++); return (arc4random()); } Index: head/sys/ufs/ffs/ffs_vfsops.c =================================================================== --- head/sys/ufs/ffs/ffs_vfsops.c (revision 329050) +++ head/sys/ufs/ffs/ffs_vfsops.c (revision 329051) @@ -1,2316 +1,2318 @@ /*- * SPDX-License-Identifier: BSD-3-Clause * * Copyright (c) 1989, 1991, 1993, 1994 * The Regents of the University of California. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * @(#)ffs_vfsops.c 8.31 (Berkeley) 5/20/95 */ #include __FBSDID("$FreeBSD$"); #include "opt_quota.h" #include "opt_ufs.h" #include "opt_ffs.h" #include "opt_ddb.h" #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include static uma_zone_t uma_inode, uma_ufs1, uma_ufs2; static int ffs_mountfs(struct vnode *, struct mount *, struct thread *); static void ffs_oldfscompat_read(struct fs *, struct ufsmount *, ufs2_daddr_t); static void ffs_ifree(struct ufsmount *ump, struct inode *ip); static int ffs_sync_lazy(struct mount *mp); static int ffs_use_bread(void *devfd, off_t loc, void **bufp, int size); static int ffs_use_bwrite(void *devfd, off_t loc, void *buf, int size); static vfs_init_t ffs_init; static vfs_uninit_t ffs_uninit; static vfs_extattrctl_t ffs_extattrctl; static vfs_cmount_t ffs_cmount; static vfs_unmount_t ffs_unmount; static vfs_mount_t ffs_mount; static vfs_statfs_t ffs_statfs; static vfs_fhtovp_t ffs_fhtovp; static vfs_sync_t ffs_sync; static struct vfsops ufs_vfsops = { .vfs_extattrctl = ffs_extattrctl, .vfs_fhtovp = ffs_fhtovp, .vfs_init = ffs_init, .vfs_mount = ffs_mount, .vfs_cmount = ffs_cmount, .vfs_quotactl = ufs_quotactl, .vfs_root = ufs_root, .vfs_statfs = ffs_statfs, .vfs_sync = ffs_sync, .vfs_uninit = ffs_uninit, .vfs_unmount = ffs_unmount, .vfs_vget = ffs_vget, .vfs_susp_clean = process_deferred_inactive, }; VFS_SET(ufs_vfsops, ufs, 0); MODULE_VERSION(ufs, 1); static b_strategy_t ffs_geom_strategy; static b_write_t ffs_bufwrite; static struct buf_ops ffs_ops = { .bop_name = "FFS", .bop_write = ffs_bufwrite, .bop_strategy = ffs_geom_strategy, .bop_sync = bufsync, #ifdef NO_FFS_SNAPSHOT .bop_bdflush = bufbdflush, #else .bop_bdflush = ffs_bdflush, #endif }; /* * Note that userquota and groupquota options are not currently used * by UFS/FFS code and generally mount(8) does not pass those options * from userland, but they can be passed by loader(8) via * vfs.root.mountfrom.options. */ static const char *ffs_opts[] = { "acls", "async", "noatime", "noclusterr", "noclusterw", "noexec", "export", "force", "from", "groupquota", "multilabel", "nfsv4acls", "fsckpid", "snapshot", "nosuid", "suiddir", "nosymfollow", "sync", "union", "userquota", NULL }; static int ffs_mount(struct mount *mp) { struct vnode *devvp; struct thread *td; struct ufsmount *ump = NULL; struct fs *fs; pid_t fsckpid = 0; int error, error1, flags; uint64_t mntorflags; accmode_t accmode; struct nameidata ndp; char *fspec; td = curthread; if (vfs_filteropt(mp->mnt_optnew, ffs_opts)) return (EINVAL); if (uma_inode == NULL) { uma_inode = uma_zcreate("FFS inode", sizeof(struct inode), NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, 0); uma_ufs1 = uma_zcreate("FFS1 dinode", sizeof(struct ufs1_dinode), NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, 0); uma_ufs2 = uma_zcreate("FFS2 dinode", sizeof(struct ufs2_dinode), NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, 0); } vfs_deleteopt(mp->mnt_optnew, "groupquota"); vfs_deleteopt(mp->mnt_optnew, "userquota"); fspec = vfs_getopts(mp->mnt_optnew, "from", &error); if (error) return (error); mntorflags = 0; if (vfs_getopt(mp->mnt_optnew, "acls", NULL, NULL) == 0) mntorflags |= MNT_ACLS; if (vfs_getopt(mp->mnt_optnew, "snapshot", NULL, NULL) == 0) { mntorflags |= MNT_SNAPSHOT; /* * Once we have set the MNT_SNAPSHOT flag, do not * persist "snapshot" in the options list. */ vfs_deleteopt(mp->mnt_optnew, "snapshot"); vfs_deleteopt(mp->mnt_opt, "snapshot"); } if (vfs_getopt(mp->mnt_optnew, "fsckpid", NULL, NULL) == 0 && vfs_scanopt(mp->mnt_optnew, "fsckpid", "%d", &fsckpid) == 1) { /* * Once we have set the restricted PID, do not * persist "fsckpid" in the options list. */ vfs_deleteopt(mp->mnt_optnew, "fsckpid"); vfs_deleteopt(mp->mnt_opt, "fsckpid"); if (mp->mnt_flag & MNT_UPDATE) { if (VFSTOUFS(mp)->um_fs->fs_ronly == 0 && vfs_flagopt(mp->mnt_optnew, "ro", NULL, 0) == 0) { vfs_mount_error(mp, "Checker enable: Must be read-only"); return (EINVAL); } } else if (vfs_flagopt(mp->mnt_optnew, "ro", NULL, 0) == 0) { vfs_mount_error(mp, "Checker enable: Must be read-only"); return (EINVAL); } /* Set to -1 if we are done */ if (fsckpid == 0) fsckpid = -1; } if (vfs_getopt(mp->mnt_optnew, "nfsv4acls", NULL, NULL) == 0) { if (mntorflags & MNT_ACLS) { vfs_mount_error(mp, "\"acls\" and \"nfsv4acls\" options " "are mutually exclusive"); return (EINVAL); } mntorflags |= MNT_NFS4ACLS; } MNT_ILOCK(mp); mp->mnt_flag |= mntorflags; MNT_IUNLOCK(mp); /* * If updating, check whether changing from read-only to * read/write; if there is no device name, that's all we do. */ if (mp->mnt_flag & MNT_UPDATE) { ump = VFSTOUFS(mp); fs = ump->um_fs; devvp = ump->um_devvp; if (fsckpid == -1 && ump->um_fsckpid > 0) { if ((error = ffs_flushfiles(mp, WRITECLOSE, td)) != 0 || (error = ffs_sbupdate(ump, MNT_WAIT, 0)) != 0) return (error); g_topology_lock(); /* * Return to normal read-only mode. */ error = g_access(ump->um_cp, 0, -1, 0); g_topology_unlock(); ump->um_fsckpid = 0; } if (fs->fs_ronly == 0 && vfs_flagopt(mp->mnt_optnew, "ro", NULL, 0)) { /* * Flush any dirty data and suspend filesystem. */ if ((error = vn_start_write(NULL, &mp, V_WAIT)) != 0) return (error); error = vfs_write_suspend_umnt(mp); if (error != 0) return (error); /* * Check for and optionally get rid of files open * for writing. */ flags = WRITECLOSE; if (mp->mnt_flag & MNT_FORCE) flags |= FORCECLOSE; if (MOUNTEDSOFTDEP(mp)) { error = softdep_flushfiles(mp, flags, td); } else { error = ffs_flushfiles(mp, flags, td); } if (error) { vfs_write_resume(mp, 0); return (error); } if (fs->fs_pendingblocks != 0 || fs->fs_pendinginodes != 0) { printf("WARNING: %s Update error: blocks %jd " "files %d\n", fs->fs_fsmnt, (intmax_t)fs->fs_pendingblocks, fs->fs_pendinginodes); fs->fs_pendingblocks = 0; fs->fs_pendinginodes = 0; } if ((fs->fs_flags & (FS_UNCLEAN | FS_NEEDSFSCK)) == 0) fs->fs_clean = 1; if ((error = ffs_sbupdate(ump, MNT_WAIT, 0)) != 0) { fs->fs_ronly = 0; fs->fs_clean = 0; vfs_write_resume(mp, 0); return (error); } if (MOUNTEDSOFTDEP(mp)) softdep_unmount(mp); g_topology_lock(); /* * Drop our write and exclusive access. */ g_access(ump->um_cp, 0, -1, -1); g_topology_unlock(); fs->fs_ronly = 1; MNT_ILOCK(mp); mp->mnt_flag |= MNT_RDONLY; MNT_IUNLOCK(mp); /* * Allow the writers to note that filesystem * is ro now. */ vfs_write_resume(mp, 0); } if ((mp->mnt_flag & MNT_RELOAD) && (error = ffs_reload(mp, td, 0)) != 0) return (error); if (fs->fs_ronly && !vfs_flagopt(mp->mnt_optnew, "ro", NULL, 0)) { /* * If we are running a checker, do not allow upgrade. */ if (ump->um_fsckpid > 0) { vfs_mount_error(mp, "Active checker, cannot upgrade to write"); return (EINVAL); } /* * If upgrade to read-write by non-root, then verify * that user has necessary permissions on the device. */ vn_lock(devvp, LK_EXCLUSIVE | LK_RETRY); error = VOP_ACCESS(devvp, VREAD | VWRITE, td->td_ucred, td); if (error) error = priv_check(td, PRIV_VFS_MOUNT_PERM); if (error) { VOP_UNLOCK(devvp, 0); return (error); } VOP_UNLOCK(devvp, 0); fs->fs_flags &= ~FS_UNCLEAN; if (fs->fs_clean == 0) { fs->fs_flags |= FS_UNCLEAN; if ((mp->mnt_flag & MNT_FORCE) || ((fs->fs_flags & (FS_SUJ | FS_NEEDSFSCK)) == 0 && (fs->fs_flags & FS_DOSOFTDEP))) { printf("WARNING: %s was not properly " "dismounted\n", fs->fs_fsmnt); } else { vfs_mount_error(mp, "R/W mount of %s denied. %s.%s", fs->fs_fsmnt, "Filesystem is not clean - run fsck", (fs->fs_flags & FS_SUJ) == 0 ? "" : " Forced mount will invalidate" " journal contents"); return (EPERM); } } g_topology_lock(); /* * Request exclusive write access. */ error = g_access(ump->um_cp, 0, 1, 1); g_topology_unlock(); if (error) return (error); if ((error = vn_start_write(NULL, &mp, V_WAIT)) != 0) return (error); fs->fs_ronly = 0; MNT_ILOCK(mp); mp->mnt_flag &= ~MNT_RDONLY; MNT_IUNLOCK(mp); fs->fs_mtime = time_second; /* check to see if we need to start softdep */ if ((fs->fs_flags & FS_DOSOFTDEP) && (error = softdep_mount(devvp, mp, fs, td->td_ucred))){ vn_finished_write(mp); return (error); } fs->fs_clean = 0; if ((error = ffs_sbupdate(ump, MNT_WAIT, 0)) != 0) { vn_finished_write(mp); return (error); } if (fs->fs_snapinum[0] != 0) ffs_snapshot_mount(mp); vn_finished_write(mp); } /* * Soft updates is incompatible with "async", * so if we are doing softupdates stop the user * from setting the async flag in an update. * Softdep_mount() clears it in an initial mount * or ro->rw remount. */ if (MOUNTEDSOFTDEP(mp)) { /* XXX: Reset too late ? */ MNT_ILOCK(mp); mp->mnt_flag &= ~MNT_ASYNC; MNT_IUNLOCK(mp); } /* * Keep MNT_ACLS flag if it is stored in superblock. */ if ((fs->fs_flags & FS_ACLS) != 0) { /* XXX: Set too late ? */ MNT_ILOCK(mp); mp->mnt_flag |= MNT_ACLS; MNT_IUNLOCK(mp); } if ((fs->fs_flags & FS_NFS4ACLS) != 0) { /* XXX: Set too late ? */ MNT_ILOCK(mp); mp->mnt_flag |= MNT_NFS4ACLS; MNT_IUNLOCK(mp); } /* * If this is a request from fsck to clean up the filesystem, * then allow the specified pid to proceed. */ if (fsckpid > 0) { if (ump->um_fsckpid != 0) { vfs_mount_error(mp, "Active checker already running on %s", fs->fs_fsmnt); return (EINVAL); } KASSERT(MOUNTEDSOFTDEP(mp) == 0, ("soft updates enabled on read-only file system")); g_topology_lock(); /* * Request write access. */ error = g_access(ump->um_cp, 0, 1, 0); g_topology_unlock(); if (error) { vfs_mount_error(mp, "Checker activation failed on %s", fs->fs_fsmnt); return (error); } ump->um_fsckpid = fsckpid; if (fs->fs_snapinum[0] != 0) ffs_snapshot_mount(mp); fs->fs_mtime = time_second; fs->fs_fmod = 1; fs->fs_clean = 0; (void) ffs_sbupdate(ump, MNT_WAIT, 0); } /* * If this is a snapshot request, take the snapshot. */ if (mp->mnt_flag & MNT_SNAPSHOT) return (ffs_snapshot(mp, fspec)); /* * Must not call namei() while owning busy ref. */ vfs_unbusy(mp); } /* * Not an update, or updating the name: look up the name * and verify that it refers to a sensible disk device. */ NDINIT(&ndp, LOOKUP, FOLLOW | LOCKLEAF, UIO_SYSSPACE, fspec, td); error = namei(&ndp); if ((mp->mnt_flag & MNT_UPDATE) != 0) { /* * Unmount does not start if MNT_UPDATE is set. Mount * update busies mp before setting MNT_UPDATE. We * must be able to retain our busy ref succesfully, * without sleep. */ error1 = vfs_busy(mp, MBF_NOWAIT); MPASS(error1 == 0); } if (error != 0) return (error); NDFREE(&ndp, NDF_ONLY_PNBUF); devvp = ndp.ni_vp; if (!vn_isdisk(devvp, &error)) { vput(devvp); return (error); } /* * If mount by non-root, then verify that user has necessary * permissions on the device. */ accmode = VREAD; if ((mp->mnt_flag & MNT_RDONLY) == 0) accmode |= VWRITE; error = VOP_ACCESS(devvp, accmode, td->td_ucred, td); if (error) error = priv_check(td, PRIV_VFS_MOUNT_PERM); if (error) { vput(devvp); return (error); } if (mp->mnt_flag & MNT_UPDATE) { /* * Update only * * If it's not the same vnode, or at least the same device * then it's not correct. */ if (devvp->v_rdev != ump->um_devvp->v_rdev) error = EINVAL; /* needs translation */ vput(devvp); if (error) return (error); } else { /* * New mount * * We need the name for the mount point (also used for * "last mounted on") copied in. If an error occurs, * the mount point is discarded by the upper level code. * Note that vfs_mount_alloc() populates f_mntonname for us. */ if ((error = ffs_mountfs(devvp, mp, td)) != 0) { vrele(devvp); return (error); } if (fsckpid > 0) { KASSERT(MOUNTEDSOFTDEP(mp) == 0, ("soft updates enabled on read-only file system")); ump = VFSTOUFS(mp); fs = ump->um_fs; g_topology_lock(); /* * Request write access. */ error = g_access(ump->um_cp, 0, 1, 0); g_topology_unlock(); if (error) { printf("WARNING: %s: Checker activation " "failed\n", fs->fs_fsmnt); } else { ump->um_fsckpid = fsckpid; if (fs->fs_snapinum[0] != 0) ffs_snapshot_mount(mp); fs->fs_mtime = time_second; fs->fs_clean = 0; (void) ffs_sbupdate(ump, MNT_WAIT, 0); } } } vfs_mountedfrom(mp, fspec); return (0); } /* * Compatibility with old mount system call. */ static int ffs_cmount(struct mntarg *ma, void *data, uint64_t flags) { struct ufs_args args; struct export_args exp; int error; if (data == NULL) return (EINVAL); error = copyin(data, &args, sizeof args); if (error) return (error); vfs_oexport_conv(&args.export, &exp); ma = mount_argsu(ma, "from", args.fspec, MAXPATHLEN); ma = mount_arg(ma, "export", &exp, sizeof(exp)); error = kernel_mount(ma, flags); return (error); } /* * Reload all incore data for a filesystem (used after running fsck on * the root filesystem and finding things to fix). If the 'force' flag * is 0, the filesystem must be mounted read-only. * * Things to do to update the mount: * 1) invalidate all cached meta-data. * 2) re-read superblock from disk. * 3) re-read summary information from disk. * 4) invalidate all inactive vnodes. * 5) clear MNTK_SUSPEND2 and MNTK_SUSPENDED flags, allowing secondary * writers, if requested. * 6) invalidate all cached file data. * 7) re-read inode data for all active vnodes. */ int ffs_reload(struct mount *mp, struct thread *td, int flags) { struct vnode *vp, *mvp, *devvp; struct inode *ip; void *space; struct buf *bp; struct fs *fs, *newfs; struct ufsmount *ump; ufs2_daddr_t sblockloc; int i, blks, error; u_long size; int32_t *lp; ump = VFSTOUFS(mp); MNT_ILOCK(mp); if ((mp->mnt_flag & MNT_RDONLY) == 0 && (flags & FFSR_FORCE) == 0) { MNT_IUNLOCK(mp); return (EINVAL); } MNT_IUNLOCK(mp); /* * Step 1: invalidate all cached meta-data. */ devvp = VFSTOUFS(mp)->um_devvp; vn_lock(devvp, LK_EXCLUSIVE | LK_RETRY); if (vinvalbuf(devvp, 0, 0, 0) != 0) panic("ffs_reload: dirty1"); VOP_UNLOCK(devvp, 0); /* * Step 2: re-read superblock from disk. */ fs = VFSTOUFS(mp)->um_fs; if ((error = bread(devvp, btodb(fs->fs_sblockloc), fs->fs_sbsize, NOCRED, &bp)) != 0) return (error); newfs = (struct fs *)bp->b_data; if ((newfs->fs_magic != FS_UFS1_MAGIC && newfs->fs_magic != FS_UFS2_MAGIC) || newfs->fs_bsize > MAXBSIZE || newfs->fs_bsize < sizeof(struct fs)) { brelse(bp); return (EIO); /* XXX needs translation */ } /* * Copy pointer fields back into superblock before copying in XXX * new superblock. These should really be in the ufsmount. XXX * Note that important parameters (eg fs_ncg) are unchanged. */ newfs->fs_csp = fs->fs_csp; newfs->fs_maxcluster = fs->fs_maxcluster; newfs->fs_contigdirs = fs->fs_contigdirs; newfs->fs_active = fs->fs_active; newfs->fs_ronly = fs->fs_ronly; sblockloc = fs->fs_sblockloc; bcopy(newfs, fs, (u_int)fs->fs_sbsize); brelse(bp); mp->mnt_maxsymlinklen = fs->fs_maxsymlinklen; ffs_oldfscompat_read(fs, VFSTOUFS(mp), sblockloc); UFS_LOCK(ump); if (fs->fs_pendingblocks != 0 || fs->fs_pendinginodes != 0) { printf("WARNING: %s: reload pending error: blocks %jd " "files %d\n", fs->fs_fsmnt, (intmax_t)fs->fs_pendingblocks, fs->fs_pendinginodes); fs->fs_pendingblocks = 0; fs->fs_pendinginodes = 0; } UFS_UNLOCK(ump); /* * Step 3: re-read summary information from disk. */ size = fs->fs_cssize; blks = howmany(size, fs->fs_fsize); if (fs->fs_contigsumsize > 0) size += fs->fs_ncg * sizeof(int32_t); size += fs->fs_ncg * sizeof(u_int8_t); free(fs->fs_csp, M_UFSMNT); space = malloc(size, M_UFSMNT, M_WAITOK); fs->fs_csp = space; for (i = 0; i < blks; i += fs->fs_frag) { size = fs->fs_bsize; if (i + fs->fs_frag > blks) size = (blks - i) * fs->fs_fsize; error = bread(devvp, fsbtodb(fs, fs->fs_csaddr + i), size, NOCRED, &bp); if (error) return (error); bcopy(bp->b_data, space, (u_int)size); space = (char *)space + size; brelse(bp); } /* * We no longer know anything about clusters per cylinder group. */ if (fs->fs_contigsumsize > 0) { fs->fs_maxcluster = lp = space; for (i = 0; i < fs->fs_ncg; i++) *lp++ = fs->fs_contigsumsize; space = lp; } size = fs->fs_ncg * sizeof(u_int8_t); fs->fs_contigdirs = (u_int8_t *)space; bzero(fs->fs_contigdirs, size); if ((flags & FFSR_UNSUSPEND) != 0) { MNT_ILOCK(mp); mp->mnt_kern_flag &= ~(MNTK_SUSPENDED | MNTK_SUSPEND2); wakeup(&mp->mnt_flag); MNT_IUNLOCK(mp); } loop: MNT_VNODE_FOREACH_ALL(vp, mp, mvp) { /* * Skip syncer vnode. */ if (vp->v_type == VNON) { VI_UNLOCK(vp); continue; } /* * Step 4: invalidate all cached file data. */ if (vget(vp, LK_EXCLUSIVE | LK_INTERLOCK, td)) { MNT_VNODE_FOREACH_ALL_ABORT(mp, mvp); goto loop; } if (vinvalbuf(vp, 0, 0, 0)) panic("ffs_reload: dirty2"); /* * Step 5: re-read inode data for all active vnodes. */ ip = VTOI(vp); error = bread(devvp, fsbtodb(fs, ino_to_fsba(fs, ip->i_number)), (int)fs->fs_bsize, NOCRED, &bp); if (error) { VOP_UNLOCK(vp, 0); vrele(vp); MNT_VNODE_FOREACH_ALL_ABORT(mp, mvp); return (error); } ffs_load_inode(bp, ip, fs, ip->i_number); ip->i_effnlink = ip->i_nlink; brelse(bp); VOP_UNLOCK(vp, 0); vrele(vp); } return (0); } /* * Common code for mount and mountroot */ static int ffs_mountfs(devvp, mp, td) struct vnode *devvp; struct mount *mp; struct thread *td; { struct ufsmount *ump; struct fs *fs; struct cdev *dev; int error, i, len, ronly; struct ucred *cred; struct g_consumer *cp; struct mount *nmp; fs = NULL; ump = NULL; cred = td ? td->td_ucred : NOCRED; ronly = (mp->mnt_flag & MNT_RDONLY) != 0; KASSERT(devvp->v_type == VCHR, ("reclaimed devvp")); dev = devvp->v_rdev; if (atomic_cmpset_acq_ptr((uintptr_t *)&dev->si_mountpt, 0, (uintptr_t)mp) == 0) { VOP_UNLOCK(devvp, 0); return (EBUSY); } g_topology_lock(); error = g_vfs_open(devvp, &cp, "ffs", ronly ? 0 : 1); g_topology_unlock(); if (error != 0) { atomic_store_rel_ptr((uintptr_t *)&dev->si_mountpt, 0); VOP_UNLOCK(devvp, 0); return (error); } dev_ref(dev); devvp->v_bufobj.bo_ops = &ffs_ops; VOP_UNLOCK(devvp, 0); if (dev->si_iosize_max != 0) mp->mnt_iosize_max = dev->si_iosize_max; if (mp->mnt_iosize_max > MAXPHYS) mp->mnt_iosize_max = MAXPHYS; if ((SBLOCKSIZE % cp->provider->sectorsize) != 0) { error = EINVAL; vfs_mount_error(mp, "Invalid sectorsize %d for superblock size %d", cp->provider->sectorsize, SBLOCKSIZE); goto out; } /* fetch the superblock and summary information */ if ((error = ffs_sbget(devvp, &fs, -1, M_UFSMNT, ffs_use_bread)) != 0) goto out; fs->fs_fmod = 0; - /* none of these types of check-hashes are maintained */ + /* if we ran on a kernel without metadata check hashes, disable them */ + if ((fs->fs_flags & FS_METACKHASH) == 0) + fs->fs_metackhash = 0; + /* none of these types of check-hashes are maintained by this kernel */ fs->fs_metackhash &= ~(CK_SUPERBLOCK | CK_INODE | CK_INDIR | CK_DIR); - /* no support for directory indices or any other undefined flags */ - fs->fs_flags &= ~FS_INDEXDIRS; + /* no support for any undefined flags */ fs->fs_flags &= FS_SUPPORTED; fs->fs_flags &= ~FS_UNCLEAN; if (fs->fs_clean == 0) { fs->fs_flags |= FS_UNCLEAN; if (ronly || (mp->mnt_flag & MNT_FORCE) || ((fs->fs_flags & (FS_SUJ | FS_NEEDSFSCK)) == 0 && (fs->fs_flags & FS_DOSOFTDEP))) { printf("WARNING: %s was not properly dismounted\n", fs->fs_fsmnt); } else { vfs_mount_error(mp, "R/W mount of %s denied. %s%s", fs->fs_fsmnt, "Filesystem is not clean - run fsck.", (fs->fs_flags & FS_SUJ) == 0 ? "" : " Forced mount will invalidate journal contents"); error = EPERM; goto out; } if ((fs->fs_pendingblocks != 0 || fs->fs_pendinginodes != 0) && (mp->mnt_flag & MNT_FORCE)) { printf("WARNING: %s: lost blocks %jd files %d\n", fs->fs_fsmnt, (intmax_t)fs->fs_pendingblocks, fs->fs_pendinginodes); fs->fs_pendingblocks = 0; fs->fs_pendinginodes = 0; } } if (fs->fs_pendingblocks != 0 || fs->fs_pendinginodes != 0) { printf("WARNING: %s: mount pending error: blocks %jd " "files %d\n", fs->fs_fsmnt, (intmax_t)fs->fs_pendingblocks, fs->fs_pendinginodes); fs->fs_pendingblocks = 0; fs->fs_pendinginodes = 0; } if ((fs->fs_flags & FS_GJOURNAL) != 0) { #ifdef UFS_GJOURNAL /* * Get journal provider name. */ len = 1024; mp->mnt_gjprovider = malloc((u_long)len, M_UFSMNT, M_WAITOK); if (g_io_getattr("GJOURNAL::provider", cp, &len, mp->mnt_gjprovider) == 0) { mp->mnt_gjprovider = realloc(mp->mnt_gjprovider, len, M_UFSMNT, M_WAITOK); MNT_ILOCK(mp); mp->mnt_flag |= MNT_GJOURNAL; MNT_IUNLOCK(mp); } else { printf("WARNING: %s: GJOURNAL flag on fs " "but no gjournal provider below\n", mp->mnt_stat.f_mntonname); free(mp->mnt_gjprovider, M_UFSMNT); mp->mnt_gjprovider = NULL; } #else printf("WARNING: %s: GJOURNAL flag on fs but no " "UFS_GJOURNAL support\n", mp->mnt_stat.f_mntonname); #endif } else { mp->mnt_gjprovider = NULL; } ump = malloc(sizeof *ump, M_UFSMNT, M_WAITOK | M_ZERO); ump->um_cp = cp; ump->um_bo = &devvp->v_bufobj; ump->um_fs = fs; if (fs->fs_magic == FS_UFS1_MAGIC) { ump->um_fstype = UFS1; ump->um_balloc = ffs_balloc_ufs1; } else { ump->um_fstype = UFS2; ump->um_balloc = ffs_balloc_ufs2; } ump->um_blkatoff = ffs_blkatoff; ump->um_truncate = ffs_truncate; ump->um_update = ffs_update; ump->um_valloc = ffs_valloc; ump->um_vfree = ffs_vfree; ump->um_ifree = ffs_ifree; ump->um_rdonly = ffs_rdonly; ump->um_snapgone = ffs_snapgone; mtx_init(UFS_MTX(ump), "FFS", "FFS Lock", MTX_DEF); ffs_oldfscompat_read(fs, ump, fs->fs_sblockloc); fs->fs_ronly = ronly; fs->fs_active = NULL; mp->mnt_data = ump; mp->mnt_stat.f_fsid.val[0] = fs->fs_id[0]; mp->mnt_stat.f_fsid.val[1] = fs->fs_id[1]; nmp = NULL; if (fs->fs_id[0] == 0 || fs->fs_id[1] == 0 || (nmp = vfs_getvfs(&mp->mnt_stat.f_fsid))) { if (nmp) vfs_rel(nmp); vfs_getnewfsid(mp); } mp->mnt_maxsymlinklen = fs->fs_maxsymlinklen; MNT_ILOCK(mp); mp->mnt_flag |= MNT_LOCAL; MNT_IUNLOCK(mp); if ((fs->fs_flags & FS_MULTILABEL) != 0) { #ifdef MAC MNT_ILOCK(mp); mp->mnt_flag |= MNT_MULTILABEL; MNT_IUNLOCK(mp); #else printf("WARNING: %s: multilabel flag on fs but " "no MAC support\n", mp->mnt_stat.f_mntonname); #endif } if ((fs->fs_flags & FS_ACLS) != 0) { #ifdef UFS_ACL MNT_ILOCK(mp); if (mp->mnt_flag & MNT_NFS4ACLS) printf("WARNING: %s: ACLs flag on fs conflicts with " "\"nfsv4acls\" mount option; option ignored\n", mp->mnt_stat.f_mntonname); mp->mnt_flag &= ~MNT_NFS4ACLS; mp->mnt_flag |= MNT_ACLS; MNT_IUNLOCK(mp); #else printf("WARNING: %s: ACLs flag on fs but no ACLs support\n", mp->mnt_stat.f_mntonname); #endif } if ((fs->fs_flags & FS_NFS4ACLS) != 0) { #ifdef UFS_ACL MNT_ILOCK(mp); if (mp->mnt_flag & MNT_ACLS) printf("WARNING: %s: NFSv4 ACLs flag on fs conflicts " "with \"acls\" mount option; option ignored\n", mp->mnt_stat.f_mntonname); mp->mnt_flag &= ~MNT_ACLS; mp->mnt_flag |= MNT_NFS4ACLS; MNT_IUNLOCK(mp); #else printf("WARNING: %s: NFSv4 ACLs flag on fs but no " "ACLs support\n", mp->mnt_stat.f_mntonname); #endif } if ((fs->fs_flags & FS_TRIM) != 0) { len = sizeof(int); if (g_io_getattr("GEOM::candelete", cp, &len, &ump->um_candelete) == 0) { if (!ump->um_candelete) printf("WARNING: %s: TRIM flag on fs but disk " "does not support TRIM\n", mp->mnt_stat.f_mntonname); } else { printf("WARNING: %s: TRIM flag on fs but disk does " "not confirm that it supports TRIM\n", mp->mnt_stat.f_mntonname); ump->um_candelete = 0; } if (ump->um_candelete) { ump->um_trim_tq = taskqueue_create("trim", M_WAITOK, taskqueue_thread_enqueue, &ump->um_trim_tq); taskqueue_start_threads(&ump->um_trim_tq, 1, PVFS, "%s trim", mp->mnt_stat.f_mntonname); } } ump->um_mountp = mp; ump->um_dev = dev; ump->um_devvp = devvp; ump->um_nindir = fs->fs_nindir; ump->um_bptrtodb = fs->fs_fsbtodb; ump->um_seqinc = fs->fs_frag; for (i = 0; i < MAXQUOTAS; i++) ump->um_quotas[i] = NULLVP; #ifdef UFS_EXTATTR ufs_extattr_uepm_init(&ump->um_extattr); #endif /* * Set FS local "last mounted on" information (NULL pad) */ bzero(fs->fs_fsmnt, MAXMNTLEN); strlcpy(fs->fs_fsmnt, mp->mnt_stat.f_mntonname, MAXMNTLEN); mp->mnt_stat.f_iosize = fs->fs_bsize; if (mp->mnt_flag & MNT_ROOTFS) { /* * Root mount; update timestamp in mount structure. * this will be used by the common root mount code * to update the system clock. */ mp->mnt_time = fs->fs_time; } if (ronly == 0) { fs->fs_mtime = time_second; if ((fs->fs_flags & FS_DOSOFTDEP) && (error = softdep_mount(devvp, mp, fs, cred)) != 0) { ffs_flushfiles(mp, FORCECLOSE, td); goto out; } if (fs->fs_snapinum[0] != 0) ffs_snapshot_mount(mp); fs->fs_fmod = 1; fs->fs_clean = 0; (void) ffs_sbupdate(ump, MNT_WAIT, 0); } /* * Initialize filesystem state information in mount struct. */ MNT_ILOCK(mp); mp->mnt_kern_flag |= MNTK_LOOKUP_SHARED | MNTK_EXTENDED_SHARED | MNTK_NO_IOPF | MNTK_UNMAPPED_BUFS | MNTK_USES_BCACHE; MNT_IUNLOCK(mp); #ifdef UFS_EXTATTR #ifdef UFS_EXTATTR_AUTOSTART /* * * Auto-starting does the following: * - check for /.attribute in the fs, and extattr_start if so * - for each file in .attribute, enable that file with * an attribute of the same name. * Not clear how to report errors -- probably eat them. * This would all happen while the filesystem was busy/not * available, so would effectively be "atomic". */ (void) ufs_extattr_autostart(mp, td); #endif /* !UFS_EXTATTR_AUTOSTART */ #endif /* !UFS_EXTATTR */ return (0); out: if (fs != NULL) { free(fs->fs_csp, M_UFSMNT); free(fs, M_UFSMNT); } if (cp != NULL) { g_topology_lock(); g_vfs_close(cp); g_topology_unlock(); } if (ump) { mtx_destroy(UFS_MTX(ump)); if (mp->mnt_gjprovider != NULL) { free(mp->mnt_gjprovider, M_UFSMNT); mp->mnt_gjprovider = NULL; } free(ump, M_UFSMNT); mp->mnt_data = NULL; } atomic_store_rel_ptr((uintptr_t *)&dev->si_mountpt, 0); dev_rel(dev); return (error); } /* * A read function for use by filesystem-layer routines. */ static int ffs_use_bread(void *devfd, off_t loc, void **bufp, int size) { struct buf *bp; int error; *bufp = malloc(size, M_UFSMNT, M_WAITOK); if ((error = bread((struct vnode *)devfd, btodb(loc), size, NOCRED, &bp)) != 0) { free(*bufp, M_UFSMNT); *bufp = NULL; return (error); } bcopy(bp->b_data, *bufp, size); bp->b_flags |= B_INVAL | B_NOCACHE; brelse(bp); return (0); } #include static int bigcgs = 0; SYSCTL_INT(_debug, OID_AUTO, bigcgs, CTLFLAG_RW, &bigcgs, 0, ""); /* * Sanity checks for loading old filesystem superblocks. * See ffs_oldfscompat_write below for unwound actions. * * XXX - Parts get retired eventually. * Unfortunately new bits get added. */ static void ffs_oldfscompat_read(fs, ump, sblockloc) struct fs *fs; struct ufsmount *ump; ufs2_daddr_t sblockloc; { off_t maxfilesize; /* * If not yet done, update fs_flags location and value of fs_sblockloc. */ if ((fs->fs_old_flags & FS_FLAGS_UPDATED) == 0) { fs->fs_flags = fs->fs_old_flags; fs->fs_old_flags |= FS_FLAGS_UPDATED; fs->fs_sblockloc = sblockloc; } /* * If not yet done, update UFS1 superblock with new wider fields. */ if (fs->fs_magic == FS_UFS1_MAGIC && fs->fs_maxbsize != fs->fs_bsize) { fs->fs_maxbsize = fs->fs_bsize; fs->fs_time = fs->fs_old_time; fs->fs_size = fs->fs_old_size; fs->fs_dsize = fs->fs_old_dsize; fs->fs_csaddr = fs->fs_old_csaddr; fs->fs_cstotal.cs_ndir = fs->fs_old_cstotal.cs_ndir; fs->fs_cstotal.cs_nbfree = fs->fs_old_cstotal.cs_nbfree; fs->fs_cstotal.cs_nifree = fs->fs_old_cstotal.cs_nifree; fs->fs_cstotal.cs_nffree = fs->fs_old_cstotal.cs_nffree; } if (fs->fs_magic == FS_UFS1_MAGIC && fs->fs_old_inodefmt < FS_44INODEFMT) { fs->fs_maxfilesize = ((uint64_t)1 << 31) - 1; fs->fs_qbmask = ~fs->fs_bmask; fs->fs_qfmask = ~fs->fs_fmask; } if (fs->fs_magic == FS_UFS1_MAGIC) { ump->um_savedmaxfilesize = fs->fs_maxfilesize; maxfilesize = (uint64_t)0x80000000 * fs->fs_bsize - 1; if (fs->fs_maxfilesize > maxfilesize) fs->fs_maxfilesize = maxfilesize; } /* Compatibility for old filesystems */ if (fs->fs_avgfilesize <= 0) fs->fs_avgfilesize = AVFILESIZ; if (fs->fs_avgfpdir <= 0) fs->fs_avgfpdir = AFPDIR; if (bigcgs) { fs->fs_save_cgsize = fs->fs_cgsize; fs->fs_cgsize = fs->fs_bsize; } } /* * Unwinding superblock updates for old filesystems. * See ffs_oldfscompat_read above for details. * * XXX - Parts get retired eventually. * Unfortunately new bits get added. */ void ffs_oldfscompat_write(fs, ump) struct fs *fs; struct ufsmount *ump; { /* * Copy back UFS2 updated fields that UFS1 inspects. */ if (fs->fs_magic == FS_UFS1_MAGIC) { fs->fs_old_time = fs->fs_time; fs->fs_old_cstotal.cs_ndir = fs->fs_cstotal.cs_ndir; fs->fs_old_cstotal.cs_nbfree = fs->fs_cstotal.cs_nbfree; fs->fs_old_cstotal.cs_nifree = fs->fs_cstotal.cs_nifree; fs->fs_old_cstotal.cs_nffree = fs->fs_cstotal.cs_nffree; fs->fs_maxfilesize = ump->um_savedmaxfilesize; } if (bigcgs) { fs->fs_cgsize = fs->fs_save_cgsize; fs->fs_save_cgsize = 0; } } /* * unmount system call */ static int ffs_unmount(mp, mntflags) struct mount *mp; int mntflags; { struct thread *td; struct ufsmount *ump = VFSTOUFS(mp); struct fs *fs; int error, flags, susp; #ifdef UFS_EXTATTR int e_restart; #endif flags = 0; td = curthread; fs = ump->um_fs; susp = 0; if (mntflags & MNT_FORCE) { flags |= FORCECLOSE; susp = fs->fs_ronly == 0; } #ifdef UFS_EXTATTR if ((error = ufs_extattr_stop(mp, td))) { if (error != EOPNOTSUPP) printf("WARNING: unmount %s: ufs_extattr_stop " "returned errno %d\n", mp->mnt_stat.f_mntonname, error); e_restart = 0; } else { ufs_extattr_uepm_destroy(&ump->um_extattr); e_restart = 1; } #endif if (susp) { error = vfs_write_suspend_umnt(mp); if (error != 0) goto fail1; } if (MOUNTEDSOFTDEP(mp)) error = softdep_flushfiles(mp, flags, td); else error = ffs_flushfiles(mp, flags, td); if (error != 0 && error != ENXIO) goto fail; UFS_LOCK(ump); if (fs->fs_pendingblocks != 0 || fs->fs_pendinginodes != 0) { printf("WARNING: unmount %s: pending error: blocks %jd " "files %d\n", fs->fs_fsmnt, (intmax_t)fs->fs_pendingblocks, fs->fs_pendinginodes); fs->fs_pendingblocks = 0; fs->fs_pendinginodes = 0; } UFS_UNLOCK(ump); if (MOUNTEDSOFTDEP(mp)) softdep_unmount(mp); if (fs->fs_ronly == 0 || ump->um_fsckpid > 0) { fs->fs_clean = fs->fs_flags & (FS_UNCLEAN|FS_NEEDSFSCK) ? 0 : 1; error = ffs_sbupdate(ump, MNT_WAIT, 0); if (error && error != ENXIO) { fs->fs_clean = 0; goto fail; } } if (susp) vfs_write_resume(mp, VR_START_WRITE); if (ump->um_trim_tq != NULL) { while (ump->um_trim_inflight != 0) pause("ufsutr", hz); taskqueue_drain_all(ump->um_trim_tq); taskqueue_free(ump->um_trim_tq); } g_topology_lock(); if (ump->um_fsckpid > 0) { /* * Return to normal read-only mode. */ error = g_access(ump->um_cp, 0, -1, 0); ump->um_fsckpid = 0; } g_vfs_close(ump->um_cp); g_topology_unlock(); atomic_store_rel_ptr((uintptr_t *)&ump->um_dev->si_mountpt, 0); vrele(ump->um_devvp); dev_rel(ump->um_dev); mtx_destroy(UFS_MTX(ump)); if (mp->mnt_gjprovider != NULL) { free(mp->mnt_gjprovider, M_UFSMNT); mp->mnt_gjprovider = NULL; } free(fs->fs_csp, M_UFSMNT); free(fs, M_UFSMNT); free(ump, M_UFSMNT); mp->mnt_data = NULL; MNT_ILOCK(mp); mp->mnt_flag &= ~MNT_LOCAL; MNT_IUNLOCK(mp); if (td->td_su == mp) { td->td_su = NULL; vfs_rel(mp); } return (error); fail: if (susp) vfs_write_resume(mp, VR_START_WRITE); fail1: #ifdef UFS_EXTATTR if (e_restart) { ufs_extattr_uepm_init(&ump->um_extattr); #ifdef UFS_EXTATTR_AUTOSTART (void) ufs_extattr_autostart(mp, td); #endif } #endif return (error); } /* * Flush out all the files in a filesystem. */ int ffs_flushfiles(mp, flags, td) struct mount *mp; int flags; struct thread *td; { struct ufsmount *ump; int qerror, error; ump = VFSTOUFS(mp); qerror = 0; #ifdef QUOTA if (mp->mnt_flag & MNT_QUOTA) { int i; error = vflush(mp, 0, SKIPSYSTEM|flags, td); if (error) return (error); for (i = 0; i < MAXQUOTAS; i++) { error = quotaoff(td, mp, i); if (error != 0) { if ((flags & EARLYFLUSH) == 0) return (error); else qerror = error; } } /* * Here we fall through to vflush again to ensure that * we have gotten rid of all the system vnodes, unless * quotas must not be closed. */ } #endif ASSERT_VOP_LOCKED(ump->um_devvp, "ffs_flushfiles"); if (ump->um_devvp->v_vflag & VV_COPYONWRITE) { if ((error = vflush(mp, 0, SKIPSYSTEM | flags, td)) != 0) return (error); ffs_snapshot_unmount(mp); flags |= FORCECLOSE; /* * Here we fall through to vflush again to ensure * that we have gotten rid of all the system vnodes. */ } /* * Do not close system files if quotas were not closed, to be * able to sync the remaining dquots. The freeblks softupdate * workitems might hold a reference on a dquot, preventing * quotaoff() from completing. Next round of * softdep_flushworklist() iteration should process the * blockers, allowing the next run of quotaoff() to finally * flush held dquots. * * Otherwise, flush all the files. */ if (qerror == 0 && (error = vflush(mp, 0, flags, td)) != 0) return (error); /* * Flush filesystem metadata. */ vn_lock(ump->um_devvp, LK_EXCLUSIVE | LK_RETRY); error = VOP_FSYNC(ump->um_devvp, MNT_WAIT, td); VOP_UNLOCK(ump->um_devvp, 0); return (error); } /* * Get filesystem statistics. */ static int ffs_statfs(mp, sbp) struct mount *mp; struct statfs *sbp; { struct ufsmount *ump; struct fs *fs; ump = VFSTOUFS(mp); fs = ump->um_fs; if (fs->fs_magic != FS_UFS1_MAGIC && fs->fs_magic != FS_UFS2_MAGIC) panic("ffs_statfs"); sbp->f_version = STATFS_VERSION; sbp->f_bsize = fs->fs_fsize; sbp->f_iosize = fs->fs_bsize; sbp->f_blocks = fs->fs_dsize; UFS_LOCK(ump); sbp->f_bfree = fs->fs_cstotal.cs_nbfree * fs->fs_frag + fs->fs_cstotal.cs_nffree + dbtofsb(fs, fs->fs_pendingblocks); sbp->f_bavail = freespace(fs, fs->fs_minfree) + dbtofsb(fs, fs->fs_pendingblocks); sbp->f_files = fs->fs_ncg * fs->fs_ipg - UFS_ROOTINO; sbp->f_ffree = fs->fs_cstotal.cs_nifree + fs->fs_pendinginodes; UFS_UNLOCK(ump); sbp->f_namemax = UFS_MAXNAMLEN; return (0); } static bool sync_doupdate(struct inode *ip) { return ((ip->i_flag & (IN_ACCESS | IN_CHANGE | IN_MODIFIED | IN_UPDATE)) != 0); } /* * For a lazy sync, we only care about access times, quotas and the * superblock. Other filesystem changes are already converted to * cylinder group blocks or inode blocks updates and are written to * disk by syncer. */ static int ffs_sync_lazy(mp) struct mount *mp; { struct vnode *mvp, *vp; struct inode *ip; struct thread *td; int allerror, error; allerror = 0; td = curthread; if ((mp->mnt_flag & MNT_NOATIME) != 0) goto qupdate; MNT_VNODE_FOREACH_ACTIVE(vp, mp, mvp) { if (vp->v_type == VNON) { VI_UNLOCK(vp); continue; } ip = VTOI(vp); /* * The IN_ACCESS flag is converted to IN_MODIFIED by * ufs_close() and ufs_getattr() by the calls to * ufs_itimes_locked(), without subsequent UFS_UPDATE(). * Test also all the other timestamp flags too, to pick up * any other cases that could be missed. */ if (!sync_doupdate(ip) && (vp->v_iflag & VI_OWEINACT) == 0) { VI_UNLOCK(vp); continue; } if ((error = vget(vp, LK_EXCLUSIVE | LK_NOWAIT | LK_INTERLOCK, td)) != 0) continue; if (sync_doupdate(ip)) error = ffs_update(vp, 0); if (error != 0) allerror = error; vput(vp); } qupdate: #ifdef QUOTA qsync(mp); #endif if (VFSTOUFS(mp)->um_fs->fs_fmod != 0 && (error = ffs_sbupdate(VFSTOUFS(mp), MNT_LAZY, 0)) != 0) allerror = error; return (allerror); } /* * Go through the disk queues to initiate sandbagged IO; * go through the inodes to write those that have been modified; * initiate the writing of the super block if it has been modified. * * Note: we are always called with the filesystem marked busy using * vfs_busy(). */ static int ffs_sync(mp, waitfor) struct mount *mp; int waitfor; { struct vnode *mvp, *vp, *devvp; struct thread *td; struct inode *ip; struct ufsmount *ump = VFSTOUFS(mp); struct fs *fs; int error, count, lockreq, allerror = 0; int suspend; int suspended; int secondary_writes; int secondary_accwrites; int softdep_deps; int softdep_accdeps; struct bufobj *bo; suspend = 0; suspended = 0; td = curthread; fs = ump->um_fs; if (fs->fs_fmod != 0 && fs->fs_ronly != 0 && ump->um_fsckpid == 0) panic("%s: ffs_sync: modification on read-only filesystem", fs->fs_fsmnt); if (waitfor == MNT_LAZY) { if (!rebooting) return (ffs_sync_lazy(mp)); waitfor = MNT_NOWAIT; } /* * Write back each (modified) inode. */ lockreq = LK_EXCLUSIVE | LK_NOWAIT; if (waitfor == MNT_SUSPEND) { suspend = 1; waitfor = MNT_WAIT; } if (waitfor == MNT_WAIT) lockreq = LK_EXCLUSIVE; lockreq |= LK_INTERLOCK | LK_SLEEPFAIL; loop: /* Grab snapshot of secondary write counts */ MNT_ILOCK(mp); secondary_writes = mp->mnt_secondary_writes; secondary_accwrites = mp->mnt_secondary_accwrites; MNT_IUNLOCK(mp); /* Grab snapshot of softdep dependency counts */ softdep_get_depcounts(mp, &softdep_deps, &softdep_accdeps); MNT_VNODE_FOREACH_ALL(vp, mp, mvp) { /* * Depend on the vnode interlock to keep things stable enough * for a quick test. Since there might be hundreds of * thousands of vnodes, we cannot afford even a subroutine * call unless there's a good chance that we have work to do. */ if (vp->v_type == VNON) { VI_UNLOCK(vp); continue; } ip = VTOI(vp); if ((ip->i_flag & (IN_ACCESS | IN_CHANGE | IN_MODIFIED | IN_UPDATE)) == 0 && vp->v_bufobj.bo_dirty.bv_cnt == 0) { VI_UNLOCK(vp); continue; } if ((error = vget(vp, lockreq, td)) != 0) { if (error == ENOENT || error == ENOLCK) { MNT_VNODE_FOREACH_ALL_ABORT(mp, mvp); goto loop; } continue; } if ((error = ffs_syncvnode(vp, waitfor, 0)) != 0) allerror = error; vput(vp); } /* * Force stale filesystem control information to be flushed. */ if (waitfor == MNT_WAIT || rebooting) { if ((error = softdep_flushworklist(ump->um_mountp, &count, td))) allerror = error; /* Flushed work items may create new vnodes to clean */ if (allerror == 0 && count) goto loop; } #ifdef QUOTA qsync(mp); #endif devvp = ump->um_devvp; bo = &devvp->v_bufobj; BO_LOCK(bo); if (bo->bo_numoutput > 0 || bo->bo_dirty.bv_cnt > 0) { BO_UNLOCK(bo); vn_lock(devvp, LK_EXCLUSIVE | LK_RETRY); error = VOP_FSYNC(devvp, waitfor, td); VOP_UNLOCK(devvp, 0); if (MOUNTEDSOFTDEP(mp) && (error == 0 || error == EAGAIN)) error = ffs_sbupdate(ump, waitfor, 0); if (error != 0) allerror = error; if (allerror == 0 && waitfor == MNT_WAIT) goto loop; } else if (suspend != 0) { if (softdep_check_suspend(mp, devvp, softdep_deps, softdep_accdeps, secondary_writes, secondary_accwrites) != 0) { MNT_IUNLOCK(mp); goto loop; /* More work needed */ } mtx_assert(MNT_MTX(mp), MA_OWNED); mp->mnt_kern_flag |= MNTK_SUSPEND2 | MNTK_SUSPENDED; MNT_IUNLOCK(mp); suspended = 1; } else BO_UNLOCK(bo); /* * Write back modified superblock. */ if (fs->fs_fmod != 0 && (error = ffs_sbupdate(ump, waitfor, suspended)) != 0) allerror = error; return (allerror); } int ffs_vget(mp, ino, flags, vpp) struct mount *mp; ino_t ino; int flags; struct vnode **vpp; { return (ffs_vgetf(mp, ino, flags, vpp, 0)); } int ffs_vgetf(mp, ino, flags, vpp, ffs_flags) struct mount *mp; ino_t ino; int flags; struct vnode **vpp; int ffs_flags; { struct fs *fs; struct inode *ip; struct ufsmount *ump; struct buf *bp; struct vnode *vp; int error; error = vfs_hash_get(mp, ino, flags, curthread, vpp, NULL, NULL); if (error || *vpp != NULL) return (error); /* * We must promote to an exclusive lock for vnode creation. This * can happen if lookup is passed LOCKSHARED. */ if ((flags & LK_TYPE_MASK) == LK_SHARED) { flags &= ~LK_TYPE_MASK; flags |= LK_EXCLUSIVE; } /* * We do not lock vnode creation as it is believed to be too * expensive for such rare case as simultaneous creation of vnode * for same ino by different processes. We just allow them to race * and check later to decide who wins. Let the race begin! */ ump = VFSTOUFS(mp); fs = ump->um_fs; ip = uma_zalloc(uma_inode, M_WAITOK | M_ZERO); /* Allocate a new vnode/inode. */ error = getnewvnode("ufs", mp, fs->fs_magic == FS_UFS1_MAGIC ? &ffs_vnodeops1 : &ffs_vnodeops2, &vp); if (error) { *vpp = NULL; uma_zfree(uma_inode, ip); return (error); } /* * FFS supports recursive locking. */ lockmgr(vp->v_vnlock, LK_EXCLUSIVE, NULL); VN_LOCK_AREC(vp); vp->v_data = ip; vp->v_bufobj.bo_bsize = fs->fs_bsize; ip->i_vnode = vp; ip->i_ump = ump; ip->i_number = ino; ip->i_ea_refs = 0; ip->i_nextclustercg = -1; ip->i_flag = fs->fs_magic == FS_UFS1_MAGIC ? 0 : IN_UFS2; #ifdef QUOTA { int i; for (i = 0; i < MAXQUOTAS; i++) ip->i_dquot[i] = NODQUOT; } #endif if (ffs_flags & FFSV_FORCEINSMQ) vp->v_vflag |= VV_FORCEINSMQ; error = insmntque(vp, mp); if (error != 0) { uma_zfree(uma_inode, ip); *vpp = NULL; return (error); } vp->v_vflag &= ~VV_FORCEINSMQ; error = vfs_hash_insert(vp, ino, flags, curthread, vpp, NULL, NULL); if (error || *vpp != NULL) return (error); /* Read in the disk contents for the inode, copy into the inode. */ error = bread(ump->um_devvp, fsbtodb(fs, ino_to_fsba(fs, ino)), (int)fs->fs_bsize, NOCRED, &bp); if (error) { /* * The inode does not contain anything useful, so it would * be misleading to leave it on its hash chain. With mode * still zero, it will be unlinked and returned to the free * list by vput(). */ brelse(bp); vput(vp); *vpp = NULL; return (error); } if (I_IS_UFS1(ip)) ip->i_din1 = uma_zalloc(uma_ufs1, M_WAITOK); else ip->i_din2 = uma_zalloc(uma_ufs2, M_WAITOK); ffs_load_inode(bp, ip, fs, ino); if (DOINGSOFTDEP(vp)) softdep_load_inodeblock(ip); else ip->i_effnlink = ip->i_nlink; bqrelse(bp); /* * Initialize the vnode from the inode, check for aliases. * Note that the underlying vnode may have changed. */ error = ufs_vinit(mp, I_IS_UFS1(ip) ? &ffs_fifoops1 : &ffs_fifoops2, &vp); if (error) { vput(vp); *vpp = NULL; return (error); } /* * Finish inode initialization. */ if (vp->v_type != VFIFO) { /* FFS supports shared locking for all files except fifos. */ VN_LOCK_ASHARE(vp); } /* * Set up a generation number for this inode if it does not * already have one. This should only happen on old filesystems. */ if (ip->i_gen == 0) { while (ip->i_gen == 0) ip->i_gen = arc4random(); if ((vp->v_mount->mnt_flag & MNT_RDONLY) == 0) { ip->i_flag |= IN_MODIFIED; DIP_SET(ip, i_gen, ip->i_gen); } } #ifdef MAC if ((mp->mnt_flag & MNT_MULTILABEL) && ip->i_mode) { /* * If this vnode is already allocated, and we're running * multi-label, attempt to perform a label association * from the extended attributes on the inode. */ error = mac_vnode_associate_extattr(mp, vp); if (error) { /* ufs_inactive will release ip->i_devvp ref. */ vput(vp); *vpp = NULL; return (error); } } #endif *vpp = vp; return (0); } /* * File handle to vnode * * Have to be really careful about stale file handles: * - check that the inode number is valid * - for UFS2 check that the inode number is initialized * - call ffs_vget() to get the locked inode * - check for an unallocated inode (i_mode == 0) * - check that the given client host has export rights and return * those rights via. exflagsp and credanonp */ static int ffs_fhtovp(mp, fhp, flags, vpp) struct mount *mp; struct fid *fhp; int flags; struct vnode **vpp; { struct ufid *ufhp; struct ufsmount *ump; struct fs *fs; struct cg *cgp; struct buf *bp; ino_t ino; u_int cg; int error; ufhp = (struct ufid *)fhp; ino = ufhp->ufid_ino; ump = VFSTOUFS(mp); fs = ump->um_fs; if (ino < UFS_ROOTINO || ino >= fs->fs_ncg * fs->fs_ipg) return (ESTALE); /* * Need to check if inode is initialized because UFS2 does lazy * initialization and nfs_fhtovp can offer arbitrary inode numbers. */ if (fs->fs_magic != FS_UFS2_MAGIC) return (ufs_fhtovp(mp, ufhp, flags, vpp)); cg = ino_to_cg(fs, ino); if ((error = ffs_getcg(fs, ump->um_devvp, cg, &bp, &cgp)) != 0) return (error); if (ino >= cg * fs->fs_ipg + cgp->cg_initediblk) { brelse(bp); return (ESTALE); } brelse(bp); return (ufs_fhtovp(mp, ufhp, flags, vpp)); } /* * Initialize the filesystem. */ static int ffs_init(vfsp) struct vfsconf *vfsp; { ffs_susp_initialize(); softdep_initialize(); return (ufs_init(vfsp)); } /* * Undo the work of ffs_init(). */ static int ffs_uninit(vfsp) struct vfsconf *vfsp; { int ret; ret = ufs_uninit(vfsp); softdep_uninitialize(); ffs_susp_uninitialize(); return (ret); } /* * Structure used to pass information from ffs_sbupdate to its * helper routine ffs_use_bwrite. */ struct devfd { struct ufsmount *ump; struct buf *sbbp; int waitfor; int suspended; int error; }; /* * Write a superblock and associated information back to disk. */ int ffs_sbupdate(ump, waitfor, suspended) struct ufsmount *ump; int waitfor; int suspended; { struct fs *fs; struct buf *sbbp; struct devfd devfd; fs = ump->um_fs; if (fs->fs_ronly == 1 && (ump->um_mountp->mnt_flag & (MNT_RDONLY | MNT_UPDATE)) != (MNT_RDONLY | MNT_UPDATE) && ump->um_fsckpid == 0) panic("ffs_sbupdate: write read-only filesystem"); /* * We use the superblock's buf to serialize calls to ffs_sbupdate(). */ sbbp = getblk(ump->um_devvp, btodb(fs->fs_sblockloc), (int)fs->fs_sbsize, 0, 0, 0); /* * Initialize info needed for write function. */ devfd.ump = ump; devfd.sbbp = sbbp; devfd.waitfor = waitfor; devfd.suspended = suspended; devfd.error = 0; return (ffs_sbput(&devfd, fs, fs->fs_sblockloc, ffs_use_bwrite)); } /* * Write function for use by filesystem-layer routines. */ static int ffs_use_bwrite(void *devfd, off_t loc, void *buf, int size) { struct devfd *devfdp; struct ufsmount *ump; struct buf *bp; struct fs *fs; int error; devfdp = devfd; ump = devfdp->ump; fs = ump->um_fs; /* * Writing the superblock summary information. */ if (loc != fs->fs_sblockloc) { bp = getblk(ump->um_devvp, btodb(loc), size, 0, 0, 0); bcopy(buf, bp->b_data, (u_int)size); if (devfdp->suspended) bp->b_flags |= B_VALIDSUSPWRT; if (devfdp->waitfor != MNT_WAIT) bawrite(bp); else if ((error = bwrite(bp)) != 0) devfdp->error = error; return (0); } /* * Writing the superblock itself. We need to do special checks for it. */ bp = devfdp->sbbp; if (devfdp->error != 0) { brelse(bp); return (devfdp->error); } if (fs->fs_magic == FS_UFS1_MAGIC && fs->fs_sblockloc != SBLOCK_UFS1 && (fs->fs_old_flags & FS_FLAGS_UPDATED) == 0) { printf("WARNING: %s: correcting fs_sblockloc from %jd to %d\n", fs->fs_fsmnt, fs->fs_sblockloc, SBLOCK_UFS1); fs->fs_sblockloc = SBLOCK_UFS1; } if (fs->fs_magic == FS_UFS2_MAGIC && fs->fs_sblockloc != SBLOCK_UFS2 && (fs->fs_old_flags & FS_FLAGS_UPDATED) == 0) { printf("WARNING: %s: correcting fs_sblockloc from %jd to %d\n", fs->fs_fsmnt, fs->fs_sblockloc, SBLOCK_UFS2); fs->fs_sblockloc = SBLOCK_UFS2; } if (MOUNTEDSOFTDEP(ump->um_mountp)) softdep_setup_sbupdate(ump, (struct fs *)bp->b_data, bp); bcopy((caddr_t)fs, bp->b_data, (u_int)fs->fs_sbsize); ffs_oldfscompat_write((struct fs *)bp->b_data, ump); if (devfdp->suspended) bp->b_flags |= B_VALIDSUSPWRT; if (devfdp->waitfor != MNT_WAIT) bawrite(bp); else if ((error = bwrite(bp)) != 0) devfdp->error = error; return (devfdp->error); } static int ffs_extattrctl(struct mount *mp, int cmd, struct vnode *filename_vp, int attrnamespace, const char *attrname) { #ifdef UFS_EXTATTR return (ufs_extattrctl(mp, cmd, filename_vp, attrnamespace, attrname)); #else return (vfs_stdextattrctl(mp, cmd, filename_vp, attrnamespace, attrname)); #endif } static void ffs_ifree(struct ufsmount *ump, struct inode *ip) { if (ump->um_fstype == UFS1 && ip->i_din1 != NULL) uma_zfree(uma_ufs1, ip->i_din1); else if (ip->i_din2 != NULL) uma_zfree(uma_ufs2, ip->i_din2); uma_zfree(uma_inode, ip); } static int dobkgrdwrite = 1; SYSCTL_INT(_debug, OID_AUTO, dobkgrdwrite, CTLFLAG_RW, &dobkgrdwrite, 0, "Do background writes (honoring the BV_BKGRDWRITE flag)?"); /* * Complete a background write started from bwrite. */ static void ffs_backgroundwritedone(struct buf *bp) { struct bufobj *bufobj; struct buf *origbp; /* * Find the original buffer that we are writing. */ bufobj = bp->b_bufobj; BO_LOCK(bufobj); if ((origbp = gbincore(bp->b_bufobj, bp->b_lblkno)) == NULL) panic("backgroundwritedone: lost buffer"); /* * We should mark the cylinder group buffer origbp as * dirty, to not loose the failed write. */ if ((bp->b_ioflags & BIO_ERROR) != 0) origbp->b_vflags |= BV_BKGRDERR; BO_UNLOCK(bufobj); /* * Process dependencies then return any unfinished ones. */ if (!LIST_EMPTY(&bp->b_dep) && (bp->b_ioflags & BIO_ERROR) == 0) buf_complete(bp); #ifdef SOFTUPDATES if (!LIST_EMPTY(&bp->b_dep)) softdep_move_dependencies(bp, origbp); #endif /* * This buffer is marked B_NOCACHE so when it is released * by biodone it will be tossed. */ bp->b_flags |= B_NOCACHE; bp->b_flags &= ~B_CACHE; pbrelvp(bp); /* * Prevent brelse() from trying to keep and re-dirtying bp on * errors. It causes b_bufobj dereference in * bdirty()/reassignbuf(), and b_bufobj was cleared in * pbrelvp() above. */ if ((bp->b_ioflags & BIO_ERROR) != 0) bp->b_flags |= B_INVAL; bufdone(bp); BO_LOCK(bufobj); /* * Clear the BV_BKGRDINPROG flag in the original buffer * and awaken it if it is waiting for the write to complete. * If BV_BKGRDINPROG is not set in the original buffer it must * have been released and re-instantiated - which is not legal. */ KASSERT((origbp->b_vflags & BV_BKGRDINPROG), ("backgroundwritedone: lost buffer2")); origbp->b_vflags &= ~BV_BKGRDINPROG; if (origbp->b_vflags & BV_BKGRDWAIT) { origbp->b_vflags &= ~BV_BKGRDWAIT; wakeup(&origbp->b_xflags); } BO_UNLOCK(bufobj); } /* * Write, release buffer on completion. (Done by iodone * if async). Do not bother writing anything if the buffer * is invalid. * * Note that we set B_CACHE here, indicating that buffer is * fully valid and thus cacheable. This is true even of NFS * now so we set it generally. This could be set either here * or in biodone() since the I/O is synchronous. We put it * here. */ static int ffs_bufwrite(struct buf *bp) { struct buf *newbp; struct cg *cgp; CTR3(KTR_BUF, "bufwrite(%p) vp %p flags %X", bp, bp->b_vp, bp->b_flags); if (bp->b_flags & B_INVAL) { brelse(bp); return (0); } if (!BUF_ISLOCKED(bp)) panic("bufwrite: buffer is not busy???"); /* * If a background write is already in progress, delay * writing this block if it is asynchronous. Otherwise * wait for the background write to complete. */ BO_LOCK(bp->b_bufobj); if (bp->b_vflags & BV_BKGRDINPROG) { if (bp->b_flags & B_ASYNC) { BO_UNLOCK(bp->b_bufobj); bdwrite(bp); return (0); } bp->b_vflags |= BV_BKGRDWAIT; msleep(&bp->b_xflags, BO_LOCKPTR(bp->b_bufobj), PRIBIO, "bwrbg", 0); if (bp->b_vflags & BV_BKGRDINPROG) panic("bufwrite: still writing"); } bp->b_vflags &= ~BV_BKGRDERR; BO_UNLOCK(bp->b_bufobj); /* * If this buffer is marked for background writing and we * do not have to wait for it, make a copy and write the * copy so as to leave this buffer ready for further use. * * This optimization eats a lot of memory. If we have a page * or buffer shortfall we can't do it. */ if (dobkgrdwrite && (bp->b_xflags & BX_BKGRDWRITE) && (bp->b_flags & B_ASYNC) && !vm_page_count_severe() && !buf_dirty_count_severe()) { KASSERT(bp->b_iodone == NULL, ("bufwrite: needs chained iodone (%p)", bp->b_iodone)); /* get a new block */ newbp = geteblk(bp->b_bufsize, GB_NOWAIT_BD); if (newbp == NULL) goto normal_write; KASSERT(buf_mapped(bp), ("Unmapped cg")); memcpy(newbp->b_data, bp->b_data, bp->b_bufsize); BO_LOCK(bp->b_bufobj); bp->b_vflags |= BV_BKGRDINPROG; BO_UNLOCK(bp->b_bufobj); newbp->b_xflags |= (bp->b_xflags & BX_FSPRIV) | BX_BKGRDMARKER; newbp->b_lblkno = bp->b_lblkno; newbp->b_blkno = bp->b_blkno; newbp->b_offset = bp->b_offset; newbp->b_iodone = ffs_backgroundwritedone; newbp->b_flags |= B_ASYNC; newbp->b_flags &= ~B_INVAL; pbgetvp(bp->b_vp, newbp); #ifdef SOFTUPDATES /* * Move over the dependencies. If there are rollbacks, * leave the parent buffer dirtied as it will need to * be written again. */ if (LIST_EMPTY(&bp->b_dep) || softdep_move_dependencies(bp, newbp) == 0) bundirty(bp); #else bundirty(bp); #endif /* * Initiate write on the copy, release the original. The * BKGRDINPROG flag prevents it from going away until * the background write completes. We have to recalculate * its check hash in case the buffer gets freed and then * reconstituted from the buffer cache during a later read. */ if ((bp->b_xflags & BX_CYLGRP) != 0) { cgp = (struct cg *)bp->b_data; cgp->cg_ckhash = 0; cgp->cg_ckhash = calculate_crc32c(~0L, bp->b_data, bp->b_bcount); } bqrelse(bp); bp = newbp; } else /* Mark the buffer clean */ bundirty(bp); /* Let the normal bufwrite do the rest for us */ normal_write: /* * If we are writing a cylinder group, update its time. */ if ((bp->b_xflags & BX_CYLGRP) != 0) { cgp = (struct cg *)bp->b_data; cgp->cg_old_time = cgp->cg_time = time_second; } return (bufwrite(bp)); } static void ffs_geom_strategy(struct bufobj *bo, struct buf *bp) { struct vnode *vp; struct buf *tbp; int error, nocopy; vp = bo2vnode(bo); if (bp->b_iocmd == BIO_WRITE) { if ((bp->b_flags & B_VALIDSUSPWRT) == 0 && bp->b_vp != NULL && bp->b_vp->v_mount != NULL && (bp->b_vp->v_mount->mnt_kern_flag & MNTK_SUSPENDED) != 0) panic("ffs_geom_strategy: bad I/O"); nocopy = bp->b_flags & B_NOCOPY; bp->b_flags &= ~(B_VALIDSUSPWRT | B_NOCOPY); if ((vp->v_vflag & VV_COPYONWRITE) && nocopy == 0 && vp->v_rdev->si_snapdata != NULL) { if ((bp->b_flags & B_CLUSTER) != 0) { runningbufwakeup(bp); TAILQ_FOREACH(tbp, &bp->b_cluster.cluster_head, b_cluster.cluster_entry) { error = ffs_copyonwrite(vp, tbp); if (error != 0 && error != EOPNOTSUPP) { bp->b_error = error; bp->b_ioflags |= BIO_ERROR; bufdone(bp); return; } } bp->b_runningbufspace = bp->b_bufsize; atomic_add_long(&runningbufspace, bp->b_runningbufspace); } else { error = ffs_copyonwrite(vp, bp); if (error != 0 && error != EOPNOTSUPP) { bp->b_error = error; bp->b_ioflags |= BIO_ERROR; bufdone(bp); return; } } } #ifdef SOFTUPDATES if ((bp->b_flags & B_CLUSTER) != 0) { TAILQ_FOREACH(tbp, &bp->b_cluster.cluster_head, b_cluster.cluster_entry) { if (!LIST_EMPTY(&tbp->b_dep)) buf_start(tbp); } } else { if (!LIST_EMPTY(&bp->b_dep)) buf_start(bp); } #endif /* * Check for metadata that needs check-hashes and update them. */ switch (bp->b_xflags & BX_FSPRIV) { case BX_CYLGRP: ((struct cg *)bp->b_data)->cg_ckhash = 0; ((struct cg *)bp->b_data)->cg_ckhash = calculate_crc32c(~0L, bp->b_data, bp->b_bcount); break; case BX_SUPERBLOCK: case BX_INODE: case BX_INDIR: case BX_DIR: printf("Check-hash write is unimplemented!!!\n"); break; case 0: break; default: printf("multiple buffer types 0x%b\n", (u_int)(bp->b_xflags & BX_FSPRIV), PRINT_UFS_BUF_XFLAGS); break; } } g_vfs_strategy(bo, bp); } int ffs_own_mount(const struct mount *mp) { if (mp->mnt_op == &ufs_vfsops) return (1); return (0); } #ifdef DDB #ifdef SOFTUPDATES /* defined in ffs_softdep.c */ extern void db_print_ffs(struct ufsmount *ump); DB_SHOW_COMMAND(ffs, db_show_ffs) { struct mount *mp; struct ufsmount *ump; if (have_addr) { ump = VFSTOUFS((struct mount *)addr); db_print_ffs(ump); return; } TAILQ_FOREACH(mp, &mountlist, mnt_list) { if (!strcmp(mp->mnt_stat.f_fstypename, ufs_vfsconf.vfc_name)) db_print_ffs(VFSTOUFS(mp)); } } #endif /* SOFTUPDATES */ #endif /* DDB */ Index: head/sys/ufs/ffs/fs.h =================================================================== --- head/sys/ufs/ffs/fs.h (revision 329050) +++ head/sys/ufs/ffs/fs.h (revision 329051) @@ -1,821 +1,834 @@ /*- * SPDX-License-Identifier: BSD-3-Clause * * Copyright (c) 1982, 1986, 1993 * The Regents of the University of California. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * @(#)fs.h 8.13 (Berkeley) 3/21/95 * $FreeBSD$ */ #ifndef _UFS_FFS_FS_H_ #define _UFS_FFS_FS_H_ #include #include /* * Each disk drive contains some number of filesystems. * A filesystem consists of a number of cylinder groups. * Each cylinder group has inodes and data. * * A filesystem is described by its super-block, which in turn * describes the cylinder groups. The super-block is critical * data and is replicated in each cylinder group to protect against * catastrophic loss. This is done at `newfs' time and the critical * super-block data does not change, so the copies need not be * referenced further unless disaster strikes. * * For filesystem fs, the offsets of the various blocks of interest * are given in the super block as: * [fs->fs_sblkno] Super-block * [fs->fs_cblkno] Cylinder group block * [fs->fs_iblkno] Inode blocks * [fs->fs_dblkno] Data blocks * The beginning of cylinder group cg in fs, is given by * the ``cgbase(fs, cg)'' macro. * * Depending on the architecture and the media, the superblock may * reside in any one of four places. For tiny media where every block * counts, it is placed at the very front of the partition. Historically, * UFS1 placed it 8K from the front to leave room for the disk label and * a small bootstrap. For UFS2 it got moved to 64K from the front to leave * room for the disk label and a bigger bootstrap, and for really piggy * systems we check at 256K from the front if the first three fail. In * all cases the size of the superblock will be SBLOCKSIZE. All values are * given in byte-offset form, so they do not imply a sector size. The * SBLOCKSEARCH specifies the order in which the locations should be searched. */ #define SBLOCK_FLOPPY 0 #define SBLOCK_UFS1 8192 #define SBLOCK_UFS2 65536 #define SBLOCK_PIGGY 262144 #define SBLOCKSIZE 8192 #define SBLOCKSEARCH \ { SBLOCK_UFS2, SBLOCK_UFS1, SBLOCK_FLOPPY, SBLOCK_PIGGY, -1 } /* * Max number of fragments per block. This value is NOT tweakable. */ #define MAXFRAG 8 /* * Addresses stored in inodes are capable of addressing fragments * of `blocks'. File system blocks of at most size MAXBSIZE can * be optionally broken into 2, 4, or 8 pieces, each of which is * addressable; these pieces may be DEV_BSIZE, or some multiple of * a DEV_BSIZE unit. * * Large files consist of exclusively large data blocks. To avoid * undue wasted disk space, the last data block of a small file may be * allocated as only as many fragments of a large block as are * necessary. The filesystem format retains only a single pointer * to such a fragment, which is a piece of a single large block that * has been divided. The size of such a fragment is determinable from * information in the inode, using the ``blksize(fs, ip, lbn)'' macro. * * The filesystem records space availability at the fragment level; * to determine block availability, aligned fragments are examined. */ /* * MINBSIZE is the smallest allowable block size. * In order to insure that it is possible to create files of size * 2^32 with only two levels of indirection, MINBSIZE is set to 4096. * MINBSIZE must be big enough to hold a cylinder group block, * thus changes to (struct cg) must keep its size within MINBSIZE. * Note that super blocks are always of size SBLOCKSIZE, * and that both SBLOCKSIZE and MAXBSIZE must be >= MINBSIZE. */ #define MINBSIZE 4096 /* * The path name on which the filesystem is mounted is maintained * in fs_fsmnt. MAXMNTLEN defines the amount of space allocated in * the super block for this name. */ #define MAXMNTLEN 468 /* * The volume name for this filesystem is maintained in fs_volname. * MAXVOLLEN defines the length of the buffer allocated. */ #define MAXVOLLEN 32 /* * There is a 128-byte region in the superblock reserved for in-core * pointers to summary information. Originally this included an array * of pointers to blocks of struct csum; now there are just a few * pointers and the remaining space is padded with fs_ocsp[]. * * NOCSPTRS determines the size of this padding. One pointer (fs_csp) * is taken away to point to a contiguous array of struct csum for * all cylinder groups; a second (fs_maxcluster) points to an array * of cluster sizes that is computed as cylinder groups are inspected, * and the third points to an array that tracks the creation of new * directories. A fourth pointer, fs_active, is used when creating * snapshots; it points to a bitmap of cylinder groups for which the * free-block bitmap has changed since the snapshot operation began. */ #define NOCSPTRS ((128 / sizeof(void *)) - 4) /* * A summary of contiguous blocks of various sizes is maintained * in each cylinder group. Normally this is set by the initial * value of fs_maxcontig. To conserve space, a maximum summary size * is set by FS_MAXCONTIG. */ #define FS_MAXCONTIG 16 /* * MINFREE gives the minimum acceptable percentage of filesystem * blocks which may be free. If the freelist drops below this level * only the superuser may continue to allocate blocks. This may * be set to 0 if no reserve of free blocks is deemed necessary, * however throughput drops by fifty percent if the filesystem * is run at between 95% and 100% full; thus the minimum default * value of fs_minfree is 5%. However, to get good clustering * performance, 10% is a better choice. hence we use 10% as our * default value. With 10% free space, fragmentation is not a * problem, so we choose to optimize for time. */ #define MINFREE 8 #define DEFAULTOPT FS_OPTTIME /* * Grigoriy Orlov has done some extensive work to fine * tune the layout preferences for directories within a filesystem. * His algorithm can be tuned by adjusting the following parameters * which tell the system the average file size and the average number * of files per directory. These defaults are well selected for typical * filesystems, but may need to be tuned for odd cases like filesystems * being used for squid caches or news spools. */ #define AVFILESIZ 16384 /* expected average file size */ #define AFPDIR 64 /* expected number of files per directory */ /* * The maximum number of snapshot nodes that can be associated * with each filesystem. This limit affects only the number of * snapshot files that can be recorded within the superblock so * that they can be found when the filesystem is mounted. However, * maintaining too many will slow the filesystem performance, so * having this limit is a good idea. */ #define FSMAXSNAP 20 /* * Used to identify special blocks in snapshots: * * BLK_NOCOPY - A block that was unallocated at the time the snapshot * was taken, hence does not need to be copied when written. * BLK_SNAP - A block held by another snapshot that is not needed by this * snapshot. When the other snapshot is freed, the BLK_SNAP entries * are converted to BLK_NOCOPY. These are needed to allow fsck to * identify blocks that are in use by other snapshots (which are * expunged from this snapshot). */ #define BLK_NOCOPY ((ufs2_daddr_t)(1)) #define BLK_SNAP ((ufs2_daddr_t)(2)) /* * Sysctl values for the fast filesystem. */ #define FFS_ADJ_REFCNT 1 /* adjust inode reference count */ #define FFS_ADJ_BLKCNT 2 /* adjust inode used block count */ #define FFS_BLK_FREE 3 /* free range of blocks in map */ #define FFS_DIR_FREE 4 /* free specified dir inodes in map */ #define FFS_FILE_FREE 5 /* free specified file inodes in map */ #define FFS_SET_FLAGS 6 /* set filesystem flags */ #define FFS_ADJ_NDIR 7 /* adjust number of directories */ #define FFS_ADJ_NBFREE 8 /* adjust number of free blocks */ #define FFS_ADJ_NIFREE 9 /* adjust number of free inodes */ #define FFS_ADJ_NFFREE 10 /* adjust number of free frags */ #define FFS_ADJ_NUMCLUSTERS 11 /* adjust number of free clusters */ #define FFS_SET_CWD 12 /* set current directory */ #define FFS_SET_DOTDOT 13 /* set inode number for ".." */ #define FFS_UNLINK 14 /* remove a name in the filesystem */ #define FFS_SET_INODE 15 /* update an on-disk inode */ #define FFS_SET_BUFOUTPUT 16 /* set buffered writing on descriptor */ #define FFS_MAXID 16 /* number of valid ffs ids */ /* * Command structure passed in to the filesystem to adjust filesystem values. */ #define FFS_CMD_VERSION 0x19790518 /* version ID */ struct fsck_cmd { int32_t version; /* version of command structure */ int32_t handle; /* reference to filesystem to be changed */ int64_t value; /* inode or block number to be affected */ int64_t size; /* amount or range to be adjusted */ int64_t spare; /* reserved for future use */ }; /* * A recovery structure placed at the end of the boot block area by newfs * that can be used by fsck to search for alternate superblocks. */ struct fsrecovery { int32_t fsr_magic; /* magic number */ int32_t fsr_fsbtodb; /* fsbtodb and dbtofsb shift constant */ int32_t fsr_sblkno; /* offset of super-block in filesys */ int32_t fsr_fpg; /* blocks per group * fs_frag */ u_int32_t fsr_ncg; /* number of cylinder groups */ }; /* * Per cylinder group information; summarized in blocks allocated * from first cylinder group data blocks. These blocks have to be * read in from fs_csaddr (size fs_cssize) in addition to the * super block. */ struct csum { int32_t cs_ndir; /* number of directories */ int32_t cs_nbfree; /* number of free blocks */ int32_t cs_nifree; /* number of free inodes */ int32_t cs_nffree; /* number of free frags */ }; struct csum_total { int64_t cs_ndir; /* number of directories */ int64_t cs_nbfree; /* number of free blocks */ int64_t cs_nifree; /* number of free inodes */ int64_t cs_nffree; /* number of free frags */ int64_t cs_numclusters; /* number of free clusters */ int64_t cs_spare[3]; /* future expansion */ }; /* * Super block for an FFS filesystem. */ struct fs { int32_t fs_firstfield; /* historic filesystem linked list, */ int32_t fs_unused_1; /* used for incore super blocks */ int32_t fs_sblkno; /* offset of super-block in filesys */ int32_t fs_cblkno; /* offset of cyl-block in filesys */ int32_t fs_iblkno; /* offset of inode-blocks in filesys */ int32_t fs_dblkno; /* offset of first data after cg */ int32_t fs_old_cgoffset; /* cylinder group offset in cylinder */ int32_t fs_old_cgmask; /* used to calc mod fs_ntrak */ int32_t fs_old_time; /* last time written */ int32_t fs_old_size; /* number of blocks in fs */ int32_t fs_old_dsize; /* number of data blocks in fs */ u_int32_t fs_ncg; /* number of cylinder groups */ int32_t fs_bsize; /* size of basic blocks in fs */ int32_t fs_fsize; /* size of frag blocks in fs */ int32_t fs_frag; /* number of frags in a block in fs */ /* these are configuration parameters */ int32_t fs_minfree; /* minimum percentage of free blocks */ int32_t fs_old_rotdelay; /* num of ms for optimal next block */ int32_t fs_old_rps; /* disk revolutions per second */ /* these fields can be computed from the others */ int32_t fs_bmask; /* ``blkoff'' calc of blk offsets */ int32_t fs_fmask; /* ``fragoff'' calc of frag offsets */ int32_t fs_bshift; /* ``lblkno'' calc of logical blkno */ int32_t fs_fshift; /* ``numfrags'' calc number of frags */ /* these are configuration parameters */ int32_t fs_maxcontig; /* max number of contiguous blks */ int32_t fs_maxbpg; /* max number of blks per cyl group */ /* these fields can be computed from the others */ int32_t fs_fragshift; /* block to frag shift */ int32_t fs_fsbtodb; /* fsbtodb and dbtofsb shift constant */ int32_t fs_sbsize; /* actual size of super block */ int32_t fs_spare1[2]; /* old fs_csmask */ /* old fs_csshift */ int32_t fs_nindir; /* value of NINDIR */ u_int32_t fs_inopb; /* value of INOPB */ int32_t fs_old_nspf; /* value of NSPF */ /* yet another configuration parameter */ int32_t fs_optim; /* optimization preference, see below */ int32_t fs_old_npsect; /* # sectors/track including spares */ int32_t fs_old_interleave; /* hardware sector interleave */ int32_t fs_old_trackskew; /* sector 0 skew, per track */ int32_t fs_id[2]; /* unique filesystem id */ /* sizes determined by number of cylinder groups and their sizes */ int32_t fs_old_csaddr; /* blk addr of cyl grp summary area */ int32_t fs_cssize; /* size of cyl grp summary area */ int32_t fs_cgsize; /* cylinder group size */ int32_t fs_spare2; /* old fs_ntrak */ int32_t fs_old_nsect; /* sectors per track */ int32_t fs_old_spc; /* sectors per cylinder */ int32_t fs_old_ncyl; /* cylinders in filesystem */ int32_t fs_old_cpg; /* cylinders per group */ u_int32_t fs_ipg; /* inodes per group */ int32_t fs_fpg; /* blocks per group * fs_frag */ /* this data must be re-computed after crashes */ struct csum fs_old_cstotal; /* cylinder summary information */ /* these fields are cleared at mount time */ int8_t fs_fmod; /* super block modified flag */ int8_t fs_clean; /* filesystem is clean flag */ int8_t fs_ronly; /* mounted read-only flag */ int8_t fs_old_flags; /* old FS_ flags */ u_char fs_fsmnt[MAXMNTLEN]; /* name mounted on */ u_char fs_volname[MAXVOLLEN]; /* volume name */ u_int64_t fs_swuid; /* system-wide uid */ int32_t fs_pad; /* due to alignment of fs_swuid */ /* these fields retain the current block allocation info */ int32_t fs_cgrotor; /* last cg searched */ void *fs_ocsp[NOCSPTRS]; /* padding; was list of fs_cs buffers */ u_int8_t *fs_contigdirs; /* (u) # of contig. allocated dirs */ struct csum *fs_csp; /* (u) cg summary info buffer */ int32_t *fs_maxcluster; /* (u) max cluster in each cyl group */ u_int *fs_active; /* (u) used by snapshots to track fs */ int32_t fs_old_cpc; /* cyl per cycle in postbl */ int32_t fs_maxbsize; /* maximum blocking factor permitted */ int64_t fs_unrefs; /* number of unreferenced inodes */ int64_t fs_providersize; /* size of underlying GEOM provider */ int64_t fs_metaspace; /* size of area reserved for metadata */ int64_t fs_sparecon64[13]; /* old rotation block list head */ int64_t fs_sblockactualloc; /* byte offset of this superblock */ int64_t fs_sblockloc; /* byte offset of standard superblock */ struct csum_total fs_cstotal; /* (u) cylinder summary information */ ufs_time_t fs_time; /* last time written */ int64_t fs_size; /* number of blocks in fs */ int64_t fs_dsize; /* number of data blocks in fs */ ufs2_daddr_t fs_csaddr; /* blk addr of cyl grp summary area */ int64_t fs_pendingblocks; /* (u) blocks being freed */ u_int32_t fs_pendinginodes; /* (u) inodes being freed */ uint32_t fs_snapinum[FSMAXSNAP];/* list of snapshot inode numbers */ u_int32_t fs_avgfilesize; /* expected average file size */ u_int32_t fs_avgfpdir; /* expected # of files per directory */ int32_t fs_save_cgsize; /* save real cg size to use fs_bsize */ ufs_time_t fs_mtime; /* Last mount or fsck time. */ int32_t fs_sujfree; /* SUJ free list */ int32_t fs_sparecon32[22]; /* reserved for future constants */ u_int32_t fs_metackhash; /* metadata check-hash, see CK_ below */ int32_t fs_flags; /* see FS_ flags below */ int32_t fs_contigsumsize; /* size of cluster summary array */ int32_t fs_maxsymlinklen; /* max length of an internal symlink */ int32_t fs_old_inodefmt; /* format of on-disk inodes */ u_int64_t fs_maxfilesize; /* maximum representable file size */ int64_t fs_qbmask; /* ~fs_bmask for use with 64-bit size */ int64_t fs_qfmask; /* ~fs_fmask for use with 64-bit size */ int32_t fs_state; /* validate fs_clean field */ int32_t fs_old_postblformat; /* format of positional layout tables */ int32_t fs_old_nrpos; /* number of rotational positions */ int32_t fs_spare5[2]; /* old fs_postbloff */ /* old fs_rotbloff */ int32_t fs_magic; /* magic number */ }; /* Sanity checking. */ #ifdef CTASSERT CTASSERT(sizeof(struct fs) == 1376); #endif /* * Filesystem identification */ #define FS_UFS1_MAGIC 0x011954 /* UFS1 fast filesystem magic number */ #define FS_UFS2_MAGIC 0x19540119 /* UFS2 fast filesystem magic number */ #define FS_BAD_MAGIC 0x19960408 /* UFS incomplete newfs magic number */ #define FS_42INODEFMT -1 /* 4.2BSD inode format */ #define FS_44INODEFMT 2 /* 4.4BSD inode format */ /* * Preference for optimization. */ #define FS_OPTTIME 0 /* minimize allocation time */ #define FS_OPTSPACE 1 /* minimize disk fragmentation */ /* * Filesystem flags. * * The FS_UNCLEAN flag is set by the kernel when the filesystem was * mounted with fs_clean set to zero. The FS_DOSOFTDEP flag indicates * that the filesystem should be managed by the soft updates code. * Note that the FS_NEEDSFSCK flag is set and cleared only by the * fsck utility. It is set when background fsck finds an unexpected * inconsistency which requires a traditional foreground fsck to be * run. Such inconsistencies should only be found after an uncorrectable * disk error. A foreground fsck will clear the FS_NEEDSFSCK flag when * it has successfully cleaned up the filesystem. The kernel uses this * flag to enforce that inconsistent filesystems be mounted read-only. * The FS_INDEXDIRS flag when set indicates that the kernel maintains * on-disk auxiliary indexes (such as B-trees) for speeding directory * accesses. Kernels that do not support auxiliary indices clear the * flag to indicate that the indices need to be rebuilt (by fsck) before * they can be used. When a filesystem is mounted, any flags not * included in FS_SUPPORTED are cleared. This lets newer features * know that the filesystem has been run on an older version of the * filesystem and thus that data structures associated with those * features are out-of-date and need to be rebuilt. * * FS_ACLS indicates that POSIX.1e ACLs are administratively enabled * for the file system, so they should be loaded from extended attributes, * observed for access control purposes, and be administered by object * owners. FS_NFS4ACLS indicates that NFSv4 ACLs are administratively * enabled. This flag is mutually exclusive with FS_ACLS. FS_MULTILABEL * indicates that the TrustedBSD MAC Framework should attempt to back MAC * labels into extended attributes on the file system rather than maintain * a single mount label for all objects. */ -#define FS_UNCLEAN 0x0001 /* filesystem not clean at mount */ -#define FS_DOSOFTDEP 0x0002 /* filesystem using soft dependencies */ -#define FS_NEEDSFSCK 0x0004 /* filesystem needs sync fsck before mount */ -#define FS_SUJ 0x0008 /* Filesystem using softupdate journal */ -#define FS_ACLS 0x0010 /* file system has POSIX.1e ACLs enabled */ -#define FS_MULTILABEL 0x0020 /* file system is MAC multi-label */ -#define FS_GJOURNAL 0x0040 /* gjournaled file system */ -#define FS_FLAGS_UPDATED 0x0080 /* flags have been moved to new location */ -#define FS_NFS4ACLS 0x0100 /* file system has NFSv4 ACLs enabled */ -#define FS_INDEXDIRS 0x0200 /* kernel supports indexed directories */ -#define FS_TRIM 0x0400 /* issue BIO_DELETE for deleted blocks */ -#define FS_SUPPORTED 0xFFFF /* supported flags, others cleared at mount */ +#define FS_UNCLEAN 0x00000001 /* filesystem not clean at mount */ +#define FS_DOSOFTDEP 0x00000002 /* filesystem using soft dependencies */ +#define FS_NEEDSFSCK 0x00000004 /* filesystem needs sync fsck before mount */ +#define FS_SUJ 0x00000008 /* Filesystem using softupdate journal */ +#define FS_ACLS 0x00000010 /* file system has POSIX.1e ACLs enabled */ +#define FS_MULTILABEL 0x00000020 /* file system is MAC multi-label */ +#define FS_GJOURNAL 0x00000040 /* gjournaled file system */ +#define FS_FLAGS_UPDATED 0x0000080 /* flags have been moved to new location */ +#define FS_NFS4ACLS 0x00000100 /* file system has NFSv4 ACLs enabled */ +#define FS_METACKHASH 0x00000200 /* kernel supports metadata check hashes */ +#define FS_TRIM 0x00000400 /* issue BIO_DELETE for deleted blocks */ +#define FS_SUPPORTED 0x00FFFFFF /* supported flags, others cleared at mount*/ +/* + * Things that we may someday support, but currently do not. + * These flags are all cleared so we know if we ran on a kernel + * that does not support them. + */ +#define FS_INDEXDIRS 0x01000000 /* kernel supports indexed directories */ +#define FS_VARBLKSIZE 0x02000000 /* kernel supports variable block sizes */ +#define FS_COOLOPT1 0x04000000 /* kernel supports cool option 1 */ +#define FS_COOLOPT2 0x08000000 /* kernel supports cool option 2 */ +#define FS_COOLOPT3 0x10000000 /* kernel supports cool option 3 */ +#define FS_COOLOPT4 0x20000000 /* kernel supports cool option 4 */ +#define FS_COOLOPT5 0x40000000 /* kernel supports cool option 5 */ +#define FS_COOLOPT6 0x80000000 /* kernel supports cool option 6 */ /* * The fs_metackhash field indicates the types of metadata check-hash * that are maintained for a filesystem. Not all filesystems check-hash * all metadata. */ #define CK_SUPERBLOCK 0x0001 /* the superblock */ #define CK_CYLGRP 0x0002 /* the cylinder groups */ #define CK_INODE 0x0004 /* inodes */ #define CK_INDIR 0x0008 /* indirect blocks */ #define CK_DIR 0x0010 /* directory contents */ /* * The BX_FSPRIV buffer b_xflags are used to track types of data in buffers. */ #define BX_SUPERBLOCK 0x00010000 /* superblock */ #define BX_CYLGRP 0x00020000 /* cylinder groups */ #define BX_INODE 0x00040000 /* inodes */ #define BX_INDIR 0x00080000 /* indirect blocks */ #define BX_DIR 0x00100000 /* directory contents */ #define PRINT_UFS_BUF_XFLAGS "\20\25dir\24indir\23inode\22cylgrp\21superblock" /* * Macros to access bits in the fs_active array. */ #define ACTIVECGNUM(fs, cg) ((fs)->fs_active[(cg) / (NBBY * sizeof(int))]) #define ACTIVECGOFF(cg) (1 << ((cg) % (NBBY * sizeof(int)))) #define ACTIVESET(fs, cg) do { \ if ((fs)->fs_active) \ ACTIVECGNUM((fs), (cg)) |= ACTIVECGOFF((cg)); \ } while (0) #define ACTIVECLEAR(fs, cg) do { \ if ((fs)->fs_active) \ ACTIVECGNUM((fs), (cg)) &= ~ACTIVECGOFF((cg)); \ } while (0) /* * The size of a cylinder group is calculated by CGSIZE. The maximum size * is limited by the fact that cylinder groups are at most one block. * Its size is derived from the size of the maps maintained in the * cylinder group and the (struct cg) size. */ #define CGSIZE(fs) \ /* base cg */ (sizeof(struct cg) + sizeof(int32_t) + \ /* old btotoff */ (fs)->fs_old_cpg * sizeof(int32_t) + \ /* old boff */ (fs)->fs_old_cpg * sizeof(u_int16_t) + \ /* inode map */ howmany((fs)->fs_ipg, NBBY) + \ /* block map */ howmany((fs)->fs_fpg, NBBY) +\ /* if present */ ((fs)->fs_contigsumsize <= 0 ? 0 : \ /* cluster sum */ (fs)->fs_contigsumsize * sizeof(int32_t) + \ /* cluster map */ howmany(fragstoblks(fs, (fs)->fs_fpg), NBBY))) /* * The minimal number of cylinder groups that should be created. */ #define MINCYLGRPS 4 /* * Convert cylinder group to base address of its global summary info. */ #define fs_cs(fs, indx) fs_csp[indx] /* * Cylinder group block for a filesystem. */ #define CG_MAGIC 0x090255 struct cg { int32_t cg_firstfield; /* historic cyl groups linked list */ int32_t cg_magic; /* magic number */ int32_t cg_old_time; /* time last written */ u_int32_t cg_cgx; /* we are the cgx'th cylinder group */ int16_t cg_old_ncyl; /* number of cyl's this cg */ int16_t cg_old_niblk; /* number of inode blocks this cg */ u_int32_t cg_ndblk; /* number of data blocks this cg */ struct csum cg_cs; /* cylinder summary information */ u_int32_t cg_rotor; /* position of last used block */ u_int32_t cg_frotor; /* position of last used frag */ u_int32_t cg_irotor; /* position of last used inode */ u_int32_t cg_frsum[MAXFRAG]; /* counts of available frags */ int32_t cg_old_btotoff; /* (int32) block totals per cylinder */ int32_t cg_old_boff; /* (u_int16) free block positions */ u_int32_t cg_iusedoff; /* (u_int8) used inode map */ u_int32_t cg_freeoff; /* (u_int8) free block map */ u_int32_t cg_nextfreeoff; /* (u_int8) next available space */ u_int32_t cg_clustersumoff; /* (u_int32) counts of avail clusters */ u_int32_t cg_clusteroff; /* (u_int8) free cluster map */ u_int32_t cg_nclusterblks; /* number of clusters this cg */ u_int32_t cg_niblk; /* number of inode blocks this cg */ u_int32_t cg_initediblk; /* last initialized inode */ u_int32_t cg_unrefs; /* number of unreferenced inodes */ int32_t cg_sparecon32[1]; /* reserved for future use */ u_int32_t cg_ckhash; /* check-hash of this cg */ ufs_time_t cg_time; /* time last written */ int64_t cg_sparecon64[3]; /* reserved for future use */ u_int8_t cg_space[1]; /* space for cylinder group maps */ /* actually longer */ }; /* * Macros for access to cylinder group array structures */ #define cg_chkmagic(cgp) ((cgp)->cg_magic == CG_MAGIC) #define cg_inosused(cgp) \ ((u_int8_t *)((u_int8_t *)(cgp) + (cgp)->cg_iusedoff)) #define cg_blksfree(cgp) \ ((u_int8_t *)((u_int8_t *)(cgp) + (cgp)->cg_freeoff)) #define cg_clustersfree(cgp) \ ((u_int8_t *)((u_int8_t *)(cgp) + (cgp)->cg_clusteroff)) #define cg_clustersum(cgp) \ ((int32_t *)((uintptr_t)(cgp) + (cgp)->cg_clustersumoff)) /* * Turn filesystem block numbers into disk block addresses. * This maps filesystem blocks to device size blocks. */ #define fsbtodb(fs, b) ((daddr_t)(b) << (fs)->fs_fsbtodb) #define dbtofsb(fs, b) ((b) >> (fs)->fs_fsbtodb) /* * Cylinder group macros to locate things in cylinder groups. * They calc filesystem addresses of cylinder group data structures. */ #define cgbase(fs, c) (((ufs2_daddr_t)(fs)->fs_fpg) * (c)) #define cgdata(fs, c) (cgdmin(fs, c) + (fs)->fs_metaspace) /* data zone */ #define cgmeta(fs, c) (cgdmin(fs, c)) /* meta data */ #define cgdmin(fs, c) (cgstart(fs, c) + (fs)->fs_dblkno) /* 1st data */ #define cgimin(fs, c) (cgstart(fs, c) + (fs)->fs_iblkno) /* inode blk */ #define cgsblock(fs, c) (cgstart(fs, c) + (fs)->fs_sblkno) /* super blk */ #define cgtod(fs, c) (cgstart(fs, c) + (fs)->fs_cblkno) /* cg block */ #define cgstart(fs, c) \ ((fs)->fs_magic == FS_UFS2_MAGIC ? cgbase(fs, c) : \ (cgbase(fs, c) + (fs)->fs_old_cgoffset * ((c) & ~((fs)->fs_old_cgmask)))) /* * Macros for handling inode numbers: * inode number to filesystem block offset. * inode number to cylinder group number. * inode number to filesystem block address. */ #define ino_to_cg(fs, x) (((ino_t)(x)) / (fs)->fs_ipg) #define ino_to_fsba(fs, x) \ ((ufs2_daddr_t)(cgimin(fs, ino_to_cg(fs, (ino_t)(x))) + \ (blkstofrags((fs), ((((ino_t)(x)) % (fs)->fs_ipg) / INOPB(fs)))))) #define ino_to_fsbo(fs, x) (((ino_t)(x)) % INOPB(fs)) /* * Give cylinder group number for a filesystem block. * Give cylinder group block number for a filesystem block. */ #define dtog(fs, d) ((d) / (fs)->fs_fpg) #define dtogd(fs, d) ((d) % (fs)->fs_fpg) /* * Extract the bits for a block from a map. * Compute the cylinder and rotational position of a cyl block addr. */ #define blkmap(fs, map, loc) \ (((map)[(loc) / NBBY] >> ((loc) % NBBY)) & (0xff >> (NBBY - (fs)->fs_frag))) /* * The following macros optimize certain frequently calculated * quantities by using shifts and masks in place of divisions * modulos and multiplications. */ #define blkoff(fs, loc) /* calculates (loc % fs->fs_bsize) */ \ ((loc) & (fs)->fs_qbmask) #define fragoff(fs, loc) /* calculates (loc % fs->fs_fsize) */ \ ((loc) & (fs)->fs_qfmask) #define lfragtosize(fs, frag) /* calculates ((off_t)frag * fs->fs_fsize) */ \ (((off_t)(frag)) << (fs)->fs_fshift) #define lblktosize(fs, blk) /* calculates ((off_t)blk * fs->fs_bsize) */ \ (((off_t)(blk)) << (fs)->fs_bshift) /* Use this only when `blk' is known to be small, e.g., < UFS_NDADDR. */ #define smalllblktosize(fs, blk) /* calculates (blk * fs->fs_bsize) */ \ ((blk) << (fs)->fs_bshift) #define lblkno(fs, loc) /* calculates (loc / fs->fs_bsize) */ \ ((loc) >> (fs)->fs_bshift) #define numfrags(fs, loc) /* calculates (loc / fs->fs_fsize) */ \ ((loc) >> (fs)->fs_fshift) #define blkroundup(fs, size) /* calculates roundup(size, fs->fs_bsize) */ \ (((size) + (fs)->fs_qbmask) & (fs)->fs_bmask) #define fragroundup(fs, size) /* calculates roundup(size, fs->fs_fsize) */ \ (((size) + (fs)->fs_qfmask) & (fs)->fs_fmask) #define fragstoblks(fs, frags) /* calculates (frags / fs->fs_frag) */ \ ((frags) >> (fs)->fs_fragshift) #define blkstofrags(fs, blks) /* calculates (blks * fs->fs_frag) */ \ ((blks) << (fs)->fs_fragshift) #define fragnum(fs, fsb) /* calculates (fsb % fs->fs_frag) */ \ ((fsb) & ((fs)->fs_frag - 1)) #define blknum(fs, fsb) /* calculates rounddown(fsb, fs->fs_frag) */ \ ((fsb) &~ ((fs)->fs_frag - 1)) /* * Determine the number of available frags given a * percentage to hold in reserve. */ #define freespace(fs, percentreserved) \ (blkstofrags((fs), (fs)->fs_cstotal.cs_nbfree) + \ (fs)->fs_cstotal.cs_nffree - \ (((off_t)((fs)->fs_dsize)) * (percentreserved) / 100)) /* * Determining the size of a file block in the filesystem. */ #define blksize(fs, ip, lbn) \ (((lbn) >= UFS_NDADDR || (ip)->i_size >= \ (uint64_t)smalllblktosize(fs, (lbn) + 1)) \ ? (fs)->fs_bsize \ : (fragroundup(fs, blkoff(fs, (ip)->i_size)))) #define sblksize(fs, size, lbn) \ (((lbn) >= UFS_NDADDR || (size) >= ((lbn) + 1) << (fs)->fs_bshift) \ ? (fs)->fs_bsize \ : (fragroundup(fs, blkoff(fs, (size))))) /* * Number of indirects in a filesystem block. */ #define NINDIR(fs) ((fs)->fs_nindir) /* * Indirect lbns are aligned on UFS_NDADDR addresses where single indirects * are the negated address of the lowest lbn reachable, double indirects * are this lbn - 1 and triple indirects are this lbn - 2. This yields * an unusual bit order to determine level. */ static inline int lbn_level(ufs_lbn_t lbn) { if (lbn >= 0) return 0; switch (lbn & 0x3) { case 0: return (0); case 1: break; case 2: return (2); case 3: return (1); default: break; } return (-1); } static inline ufs_lbn_t lbn_offset(struct fs *fs, int level) { ufs_lbn_t res; for (res = 1; level > 0; level--) res *= NINDIR(fs); return (res); } /* * Number of inodes in a secondary storage block/fragment. */ #define INOPB(fs) ((fs)->fs_inopb) #define INOPF(fs) ((fs)->fs_inopb >> (fs)->fs_fragshift) /* * Softdep journal record format. */ #define JOP_ADDREF 1 /* Add a reference to an inode. */ #define JOP_REMREF 2 /* Remove a reference from an inode. */ #define JOP_NEWBLK 3 /* Allocate a block. */ #define JOP_FREEBLK 4 /* Free a block or a tree of blocks. */ #define JOP_MVREF 5 /* Move a reference from one off to another. */ #define JOP_TRUNC 6 /* Partial truncation record. */ #define JOP_SYNC 7 /* fsync() complete record. */ #define JREC_SIZE 32 /* Record and segment header size. */ #define SUJ_MIN (4 * 1024 * 1024) /* Minimum journal size */ #define SUJ_MAX (32 * 1024 * 1024) /* Maximum journal size */ #define SUJ_FILE ".sujournal" /* Journal file name */ /* * Size of the segment record header. There is at most one for each disk * block in the journal. The segment header is followed by an array of * records. fsck depends on the first element in each record being 'op' * and the second being 'ino'. Segments may span multiple disk blocks but * the header is present on each. */ struct jsegrec { uint64_t jsr_seq; /* Our sequence number */ uint64_t jsr_oldest; /* Oldest valid sequence number */ uint16_t jsr_cnt; /* Count of valid records */ uint16_t jsr_blocks; /* Count of device bsize blocks. */ uint32_t jsr_crc; /* 32bit crc of the valid space */ ufs_time_t jsr_time; /* timestamp for mount instance */ }; /* * Reference record. Records a single link count modification. */ struct jrefrec { uint32_t jr_op; uint32_t jr_ino; uint32_t jr_parent; uint16_t jr_nlink; uint16_t jr_mode; int64_t jr_diroff; uint64_t jr_unused; }; /* * Move record. Records a reference moving within a directory block. The * nlink is unchanged but we must search both locations. */ struct jmvrec { uint32_t jm_op; uint32_t jm_ino; uint32_t jm_parent; uint16_t jm_unused; int64_t jm_oldoff; int64_t jm_newoff; }; /* * Block record. A set of frags or tree of blocks starting at an indirect are * freed or a set of frags are allocated. */ struct jblkrec { uint32_t jb_op; uint32_t jb_ino; ufs2_daddr_t jb_blkno; ufs_lbn_t jb_lbn; uint16_t jb_frags; uint16_t jb_oldfrags; uint32_t jb_unused; }; /* * Truncation record. Records a partial truncation so that it may be * completed at check time. Also used for sync records. */ struct jtrncrec { uint32_t jt_op; uint32_t jt_ino; int64_t jt_size; uint32_t jt_extsize; uint32_t jt_pad[3]; }; union jrec { struct jsegrec rec_jsegrec; struct jrefrec rec_jrefrec; struct jmvrec rec_jmvrec; struct jblkrec rec_jblkrec; struct jtrncrec rec_jtrncrec; }; #ifdef CTASSERT CTASSERT(sizeof(struct jsegrec) == JREC_SIZE); CTASSERT(sizeof(struct jrefrec) == JREC_SIZE); CTASSERT(sizeof(struct jmvrec) == JREC_SIZE); CTASSERT(sizeof(struct jblkrec) == JREC_SIZE); CTASSERT(sizeof(struct jtrncrec) == JREC_SIZE); CTASSERT(sizeof(union jrec) == JREC_SIZE); #endif extern int inside[], around[]; extern u_char *fragtbl[]; /* * IOCTLs used for filesystem write suspension. */ #define UFSSUSPEND _IOW('U', 1, fsid_t) #define UFSRESUME _IO('U', 2) #endif