Changeset View
Standalone View
sbin/fsck_ffs/fsutil.c
| Show First 20 Lines • Show All 65 Lines • ▼ Show 20 Lines | |||||||||
| int sujrecovery = 0; | int sujrecovery = 0; | ||||||||
| static struct bufarea *allocbuf(const char *); | static struct bufarea *allocbuf(const char *); | ||||||||
| static void cg_write(struct bufarea *); | static void cg_write(struct bufarea *); | ||||||||
| static void slowio_start(void); | static void slowio_start(void); | ||||||||
| static void slowio_end(void); | static void slowio_end(void); | ||||||||
| static void printIOstats(void); | static void printIOstats(void); | ||||||||
| static void prtbuf(const char *, struct bufarea *); | |||||||||
| static long diskreads, totaldiskreads, totalreads; /* Disk cache statistics */ | static long diskreads, totaldiskreads, totalreads; /* Disk cache statistics */ | ||||||||
| static struct timespec startpass, finishpass; | static struct timespec startpass, finishpass; | ||||||||
| struct timeval slowio_starttime; | struct timeval slowio_starttime; | ||||||||
| int slowio_delay_usec = 10000; /* Initial IO delay for background fsck */ | int slowio_delay_usec = 10000; /* Initial IO delay for background fsck */ | ||||||||
| int slowio_pollcnt; | int slowio_pollcnt; | ||||||||
| static struct bufarea cgblk; /* backup buffer for cylinder group blocks */ | static struct bufarea cgblk; /* backup buffer for cylinder group blocks */ | ||||||||
| static struct bufarea failedbuf; /* returned by failed getdatablk() */ | |||||||||
| static TAILQ_HEAD(bufqueue, bufarea) bufqueuehd; /* head of buffer cache LRU */ | static TAILQ_HEAD(bufqueue, bufarea) bufqueuehd; /* head of buffer cache LRU */ | ||||||||
| static LIST_HEAD(bufhash, bufarea) bufhashhd[HASHSIZE]; /* buffer hash list */ | static LIST_HEAD(bufhash, bufarea) bufhashhd[HASHSIZE]; /* buffer hash list */ | ||||||||
| static struct bufhash freebufs; /* unused buffers */ | |||||||||
| static int numbufs; /* size of buffer cache */ | static int numbufs; /* size of buffer cache */ | ||||||||
| static int cachelookups; /* number of cache lookups */ | static int cachelookups; /* number of cache lookups */ | ||||||||
| static int cachereads; /* number of cache reads */ | static int cachereads; /* number of cache reads */ | ||||||||
| static int flushtries; /* number of tries to reclaim memory */ | static int flushtries; /* number of tries to reclaim memory */ | ||||||||
| char *buftype[BT_NUMBUFTYPES] = BT_NAMES; | char *buftype[BT_NUMBUFTYPES] = BT_NAMES; | ||||||||
| void | void | ||||||||
| ▲ Show 20 Lines • Show All 90 Lines • ▼ Show 20 Lines | |||||||||
| /* | /* | ||||||||
| * Malloc buffers and set up cache. | * Malloc buffers and set up cache. | ||||||||
| */ | */ | ||||||||
| void | void | ||||||||
| bufinit(void) | bufinit(void) | ||||||||
| { | { | ||||||||
| int i; | int i; | ||||||||
| initbarea(&failedbuf, BT_UNKNOWN); | |||||||||
| failedbuf.b_errs = -1; | |||||||||
| failedbuf.b_un.b_buf = NULL; | |||||||||
| if ((cgblk.b_un.b_buf = Malloc((unsigned int)sblock.fs_bsize)) == NULL) | if ((cgblk.b_un.b_buf = Malloc((unsigned int)sblock.fs_bsize)) == NULL) | ||||||||
| errx(EEXIT, "Initial malloc(%d) failed", sblock.fs_bsize); | errx(EEXIT, "Initial malloc(%d) failed", sblock.fs_bsize); | ||||||||
| initbarea(&cgblk, BT_CYLGRP); | initbarea(&cgblk, BT_CYLGRP); | ||||||||
| numbufs = cachelookups = cachereads = 0; | numbufs = cachelookups = cachereads = 0; | ||||||||
| TAILQ_INIT(&bufqueuehd); | TAILQ_INIT(&bufqueuehd); | ||||||||
| LIST_INIT(&freebufs); | |||||||||
| for (i = 0; i < HASHSIZE; i++) | for (i = 0; i < HASHSIZE; i++) | ||||||||
| LIST_INIT(&bufhashhd[i]); | LIST_INIT(&bufhashhd[i]); | ||||||||
| for (i = 0; i < BT_NUMBUFTYPES; i++) { | for (i = 0; i < BT_NUMBUFTYPES; i++) { | ||||||||
| readtime[i].tv_sec = totalreadtime[i].tv_sec = 0; | readtime[i].tv_sec = totalreadtime[i].tv_sec = 0; | ||||||||
| readtime[i].tv_nsec = totalreadtime[i].tv_nsec = 0; | readtime[i].tv_nsec = totalreadtime[i].tv_nsec = 0; | ||||||||
| readcnt[i] = totalreadcnt[i] = 0; | readcnt[i] = totalreadcnt[i] = 0; | ||||||||
| } | } | ||||||||
| } | } | ||||||||
| ▲ Show 20 Lines • Show All 92 Lines • ▼ Show 20 Lines | if (cgbp->b_un.b_cg == NULL) | ||||||||
| return (0); | return (0); | ||||||||
| flush(fswritefd, cgbp); | flush(fswritefd, cgbp); | ||||||||
| free(cgbp->b_un.b_buf); | free(cgbp->b_un.b_buf); | ||||||||
| cgbp->b_un.b_buf = NULL; | cgbp->b_un.b_buf = NULL; | ||||||||
| return (1); | return (1); | ||||||||
| } | } | ||||||||
| /* | /* | ||||||||
| * Manage a cache of directory blocks. | * Manage a cache of filesystem disk blocks. | ||||||||
| */ | */ | ||||||||
| struct bufarea * | struct bufarea * | ||||||||
| getdatablk(ufs2_daddr_t blkno, long size, int type) | getdatablk(ufs2_daddr_t blkno, long size, int type) | ||||||||
| { | { | ||||||||
| struct bufarea *bp; | struct bufarea *bp; | ||||||||
| struct bufhash *bhdp; | struct bufhash *bhdp; | ||||||||
| cachelookups++; | cachelookups++; | ||||||||
| /* If out of range, return empty buffer with b_err == -1 */ | /* | ||||||||
| if (type != BT_INODES && chkrange(blkno, size / sblock.fs_fsize)) { | * If out of range, return empty buffer with b_err == -1 | ||||||||
| blkno = -1; | * | ||||||||
| type = BT_EMPTY; | * Skip check for inodes because chkrange() considers | ||||||||
| } | * metadata areas invalid to write data. | ||||||||
| */ | |||||||||
| if (type != BT_INODES && chkrange(blkno, size / sblock.fs_fsize)) | |||||||||
| return (&failedbuf); | |||||||||
kibUnsubmitted Done Inline Actions
kib: | |||||||||
| bhdp = &bufhashhd[HASH(blkno)]; | bhdp = &bufhashhd[HASH(blkno)]; | ||||||||
| LIST_FOREACH(bp, bhdp, b_hash) | LIST_FOREACH(bp, bhdp, b_hash) | ||||||||
| if (bp->b_bno == fsbtodb(&sblock, blkno)) { | if (bp->b_bno == fsbtodb(&sblock, blkno)) { | ||||||||
| if (debug && bp->b_size != size) { | if (debug && bp->b_size != size) { | ||||||||
| prtbuf("getdatablk: size mismatch", bp); | prtbuf(bp, "getdatablk: size mismatch"); | ||||||||
| pfatal("getdatablk: b_size %d != size %ld\n", | pfatal("getdatablk: b_size %d != size %ld\n", | ||||||||
| bp->b_size, size); | bp->b_size, size); | ||||||||
| } | } | ||||||||
| TAILQ_REMOVE(&bufqueuehd, bp, b_list); | |||||||||
| goto foundit; | goto foundit; | ||||||||
| } | } | ||||||||
| /* | /* | ||||||||
| * Move long-term busy buffer back to the front of the LRU so we | * Move long-term busy buffer back to the front of the LRU so we | ||||||||
| * do not endless inspect them for recycling. | * do not endless inspect them for recycling. | ||||||||
| */ | */ | ||||||||
| bp = TAILQ_LAST(&bufqueuehd, bufqueue); | bp = TAILQ_LAST(&bufqueuehd, bufqueue); | ||||||||
| if (bp != NULL && bp->b_refcnt != 0) { | if (bp != NULL && bp->b_refcnt != 0) { | ||||||||
| TAILQ_REMOVE(&bufqueuehd, bp, b_list); | TAILQ_REMOVE(&bufqueuehd, bp, b_list); | ||||||||
| TAILQ_INSERT_HEAD(&bufqueuehd, bp, b_list); | TAILQ_INSERT_HEAD(&bufqueuehd, bp, b_list); | ||||||||
| } | } | ||||||||
| /* | /* | ||||||||
| * Allocate up to the minimum number of buffers before | * Allocate up to the minimum number of buffers before | ||||||||
| * considering recycling any of them. | * considering recycling any of them. | ||||||||
| */ | */ | ||||||||
| if (size > sblock.fs_bsize) | if (size > sblock.fs_bsize) | ||||||||
| errx(EEXIT, "Excessive buffer size %ld > %d\n", size, | errx(EEXIT, "Excessive buffer size %ld > %d\n", size, | ||||||||
| sblock.fs_bsize); | sblock.fs_bsize); | ||||||||
| if (numbufs < MINBUFS) { | if ((bp = LIST_FIRST(&freebufs)) != NULL) { | ||||||||
| LIST_REMOVE(bp, b_hash); | |||||||||
| } else if (numbufs < MINBUFS) { | |||||||||
| bp = allocbuf("cannot create minimal buffer pool"); | bp = allocbuf("cannot create minimal buffer pool"); | ||||||||
| } else if (sujrecovery) { | } else if (sujrecovery) { | ||||||||
| /* | /* | ||||||||
| * SUJ recovery does not want anything written until it | * SUJ recovery does not want anything written until it | ||||||||
| * has successfully completed (so it can fail back to | * has successfully completed (so it can fail back to | ||||||||
| * full fsck). Thus, we can only recycle clean buffers. | * full fsck). Thus, we can only recycle clean buffers. | ||||||||
| */ | */ | ||||||||
| TAILQ_FOREACH_REVERSE(bp, &bufqueuehd, bufqueue, b_list) | TAILQ_FOREACH_REVERSE(bp, &bufqueuehd, bufqueue, b_list) | ||||||||
| Show All 11 Lines | if ((bp = LIST_FIRST(&freebufs)) != NULL) { | ||||||||
| TAILQ_FOREACH_REVERSE(bp, &bufqueuehd, bufqueue, b_list) | TAILQ_FOREACH_REVERSE(bp, &bufqueuehd, bufqueue, b_list) | ||||||||
| if (bp->b_refcnt == 0) | if (bp->b_refcnt == 0) | ||||||||
| break; | break; | ||||||||
| if (bp == NULL) | if (bp == NULL) | ||||||||
| bp = allocbuf("Ran out of memory for buffers"); | bp = allocbuf("Ran out of memory for buffers"); | ||||||||
| else | else | ||||||||
| LIST_REMOVE(bp, b_hash); | LIST_REMOVE(bp, b_hash); | ||||||||
| } | } | ||||||||
| TAILQ_REMOVE(&bufqueuehd, bp, b_list); | |||||||||
| flush(fswritefd, bp); | flush(fswritefd, bp); | ||||||||
| bp->b_type = type; | bp->b_type = type; | ||||||||
| LIST_INSERT_HEAD(bhdp, bp, b_hash); | LIST_INSERT_HEAD(bhdp, bp, b_hash); | ||||||||
| getblk(bp, blkno, size); | getblk(bp, blkno, size); | ||||||||
| cachereads++; | cachereads++; | ||||||||
| /* fall through */ | /* fall through */ | ||||||||
| foundit: | foundit: | ||||||||
| TAILQ_INSERT_HEAD(&bufqueuehd, bp, b_list); | |||||||||
| if (debug && bp->b_type != type) { | if (debug && bp->b_type != type) { | ||||||||
| printf("getdatablk: buffer type changed to %s", | printf("getdatablk: buffer type changed to %s", | ||||||||
| BT_BUFTYPE(type)); | BT_BUFTYPE(type)); | ||||||||
| prtbuf("", bp); | prtbuf(bp, ""); | ||||||||
| } | } | ||||||||
| TAILQ_REMOVE(&bufqueuehd, bp, b_list); | |||||||||
| TAILQ_INSERT_HEAD(&bufqueuehd, bp, b_list); | |||||||||
| if (bp->b_errs == 0) | if (bp->b_errs == 0) | ||||||||
| bp->b_refcnt++; | bp->b_refcnt++; | ||||||||
| return (bp); | return (bp); | ||||||||
| } | } | ||||||||
| void | void | ||||||||
| getblk(struct bufarea *bp, ufs2_daddr_t blk, long size) | getblk(struct bufarea *bp, ufs2_daddr_t blk, long size) | ||||||||
| { | { | ||||||||
| ufs2_daddr_t dblk; | ufs2_daddr_t dblk; | ||||||||
| struct timespec start, finish; | struct timespec start, finish; | ||||||||
| dblk = fsbtodb(&sblock, blk); | dblk = fsbtodb(&sblock, blk); | ||||||||
| if (bp->b_bno == dblk) { | if (bp->b_bno == dblk) { | ||||||||
| totalreads++; | totalreads++; | ||||||||
| } else { | } else { | ||||||||
| if (debug) { | if (debug) { | ||||||||
| readcnt[bp->b_type]++; | readcnt[bp->b_type]++; | ||||||||
| clock_gettime(CLOCK_REALTIME_PRECISE, &start); | clock_gettime(CLOCK_REALTIME_PRECISE, &start); | ||||||||
| } | } | ||||||||
| if (bp->b_type != BT_EMPTY) | bp->b_errs = blread(fsreadfd, bp->b_un.b_buf, dblk, size); | ||||||||
| bp->b_errs = | |||||||||
| blread(fsreadfd, bp->b_un.b_buf, dblk, size); | |||||||||
| else | |||||||||
| bp->b_errs = -1; | |||||||||
| if (debug) { | if (debug) { | ||||||||
| clock_gettime(CLOCK_REALTIME_PRECISE, &finish); | clock_gettime(CLOCK_REALTIME_PRECISE, &finish); | ||||||||
| timespecsub(&finish, &start, &finish); | timespecsub(&finish, &start, &finish); | ||||||||
| timespecadd(&readtime[bp->b_type], &finish, | timespecadd(&readtime[bp->b_type], &finish, | ||||||||
| &readtime[bp->b_type]); | &readtime[bp->b_type]); | ||||||||
| } | } | ||||||||
| bp->b_bno = dblk; | bp->b_bno = dblk; | ||||||||
| bp->b_size = size; | bp->b_size = size; | ||||||||
| } | } | ||||||||
| } | } | ||||||||
| void | void | ||||||||
| brelse(struct bufarea *bp) | brelse(struct bufarea *bp) | ||||||||
| { | { | ||||||||
| if (bp->b_refcnt <= 0) | if (bp->b_refcnt <= 0) | ||||||||
| prtbuf("brelse: buffer with negative reference count", bp); | prtbuf(bp, "brelse: buffer with negative reference count"); | ||||||||
| bp->b_refcnt--; | bp->b_refcnt--; | ||||||||
| } | } | ||||||||
| void | void | ||||||||
| binval(struct bufarea *bp) | |||||||||
| { | |||||||||
| bp->b_flags &= ~B_DIRTY; | |||||||||
| LIST_REMOVE(bp, b_hash); | |||||||||
| LIST_INSERT_HEAD(&freebufs, bp, b_hash); | |||||||||
| } | |||||||||
| void | |||||||||
| flush(int fd, struct bufarea *bp) | flush(int fd, struct bufarea *bp) | ||||||||
| { | { | ||||||||
| struct inode ip; | struct inode ip; | ||||||||
| if ((bp->b_flags & B_DIRTY) == 0) | if ((bp->b_flags & B_DIRTY) == 0) | ||||||||
| return; | return; | ||||||||
| bp->b_flags &= ~B_DIRTY; | bp->b_flags &= ~B_DIRTY; | ||||||||
| if (fswritefd < 0) { | if (fswritefd < 0) { | ||||||||
| pfatal("WRITING IN READ_ONLY MODE.\n"); | pfatal("WRITING IN READ_ONLY MODE.\n"); | ||||||||
| return; | return; | ||||||||
| } | } | ||||||||
| if (bp->b_errs != 0) | if (bp->b_errs != 0) | ||||||||
| pfatal("WRITING %sZERO'ED BLOCK %lld TO DISK\n", | pfatal("WRITING %sZERO'ED BLOCK %lld TO DISK\n", | ||||||||
| (bp->b_errs == bp->b_size / dev_bsize) ? "" : "PARTIALLY ", | (bp->b_errs == bp->b_size / dev_bsize) ? "" : "PARTIALLY ", | ||||||||
| (long long)bp->b_bno); | (long long)bp->b_bno); | ||||||||
| bp->b_errs = 0; | bp->b_errs = 0; | ||||||||
| /* | /* | ||||||||
| * Write using the appropriate function. | * Write using the appropriate function. | ||||||||
| */ | */ | ||||||||
| switch (bp->b_type) { | switch (bp->b_type) { | ||||||||
| case BT_SUPERBLK: | case BT_SUPERBLK: | ||||||||
| if (bp != &sblk) | if (bp != &sblk) | ||||||||
| pfatal("BUFFER %p DOES NOT MATCH SBLK %p\n", | pfatal("BUFFER %p DOES NOT MATCH SBLK %p\n", | ||||||||
| bp, &sblk); | bp, &sblk); | ||||||||
| /* | |||||||||
| * Superblocks are always pre-copied so we do not need | |||||||||
Not Done Inline ActionsCan those assumptions be broken in the corrupted filesystem? kib: Can those assumptions be broken in the corrupted filesystem? | |||||||||
Done Inline ActionsThe superblock and all cylinder groups are copied as part of the creation of the snapshot, and it will not be marked as a snapshot until all the copies have been done and committed. We create a list of all the blocks that were copied as part of the snapshot creation which we check at the top of ffs_copyonwrite() to avoid needing to look them up in the snapshot vnode. The table is never modified after the creation, so absent disk corruption it cannot get broken. I could get fsck to recreate the list and make sure that it matches if you think that is useful or necessary. I could also check to see that the blocks have actually been copied in the snapshot (that is they point to a disk block rather than being 0 (uncopied), BLK_SNAP (owned by other snapshot), or BLK_UNCOPIED (unallocated when we were created). But like the above mentioned list of copied blocks, they are set up as part of creating the snapshot so will only get corrupted by disk corruption. mckusick: The superblock and all cylinder groups are copied as part of the creation of the snapshot, and… | |||||||||
Not Done Inline ActionsAs I understand, users' expectations (myself included) is that after fsck reported that the filesystem is clean, it is usable for safe mounting. Failures that fsck is supposed to fix include both kernel and storage corruption. For instance, once I saw the problem where hardware RAID overwrite enough blocks with other data from the same disk, as if write request incorrectly calculated started block. As result, once cg block was destroyed. I expect that fsck would be able to do something that would give me access to the data from other cgs. So I think that fsck_ffs must check the consistency there, and either try to correct, or try to safely remove the corrupted snapshot. kib: As I understand, users' expectations (myself included) is that after fsck reported that the… | |||||||||
Done Inline ActionsHow about I add a check-hash to the list that tracks what has been pre-copied and if the check-hash fails offer to remove the offending snapshot file? mckusick: How about I add a check-hash to the list that tracks what has been pre-copied and if the check… | |||||||||
Not Done Inline ActionsI would say that this is some step in the right direction, but not the ultimate solution. Also, I suppose by removal you mean zeroing out the snapshot inode, instead of proper removal. Then, the unreferenced blocks from the snapshot are garbage-collected by fsck. Also, I think that removing the snapshot, we need to remove all later snapshots as well, since AFAIR, blocks are not CoW if they are already owned by an earlier snapshot. Also, please note that removing snapshot causes user data loss, which sometimes can be avoided. User might rely on the data caught by the snapshot, which is later modified (which is the reason to take the snapshot, after all). But regardless, even with all limitations, this would be a step into better fsck handling of the corrupted snapshots. kib: I would say that this is some step in the right direction, but not the ultimate solution.
Also… | |||||||||
| * to check them for copy-on-write. | |||||||||
| */ | |||||||||
| if (sbput(fd, bp->b_un.b_fs, 0) == 0) | if (sbput(fd, bp->b_un.b_fs, 0) == 0) | ||||||||
| fsmodified = 1; | fsmodified = 1; | ||||||||
| break; | break; | ||||||||
| case BT_CYLGRP: | case BT_CYLGRP: | ||||||||
| /* | |||||||||
| * Cylinder groups are always pre-copied so we do not | |||||||||
Not Done Inline ActionsSame kib: Same | |||||||||
Done Inline ActionsSee above comment on line 458 about blocks copied during snapshot creation. mckusick: See above comment on line 458 about blocks copied during snapshot creation. | |||||||||
| * need to check them for copy-on-write. | |||||||||
| */ | |||||||||
| if (sujrecovery) | if (sujrecovery) | ||||||||
| cg_write(bp); | cg_write(bp); | ||||||||
| if (cgput(fswritefd, &sblock, bp->b_un.b_cg) == 0) | if (cgput(fswritefd, &sblock, bp->b_un.b_cg) == 0) | ||||||||
| fsmodified = 1; | fsmodified = 1; | ||||||||
| break; | break; | ||||||||
| case BT_INODES: | case BT_INODES: | ||||||||
| if (debug && sblock.fs_magic == FS_UFS2_MAGIC) { | if (debug && sblock.fs_magic == FS_UFS2_MAGIC) { | ||||||||
| struct ufs2_dinode *dp = bp->b_un.b_dinode2; | struct ufs2_dinode *dp = bp->b_un.b_dinode2; | ||||||||
| Show All 12 Lines | if (debug && sblock.fs_magic == FS_UFS2_MAGIC) { | ||||||||
| printf(" (FIXED)\n"); | printf(" (FIXED)\n"); | ||||||||
| ffs_update_dinode_ckhash(&sblock, dp); | ffs_update_dinode_ckhash(&sblock, dp); | ||||||||
| inodirty(&ip); | inodirty(&ip); | ||||||||
| } | } | ||||||||
| } | } | ||||||||
| } | } | ||||||||
| /* FALLTHROUGH */ | /* FALLTHROUGH */ | ||||||||
| default: | default: | ||||||||
| copyonwrite(&sblock, bp, std_checkblkavail); | |||||||||
| blwrite(fd, bp->b_un.b_buf, bp->b_bno, bp->b_size); | blwrite(fd, bp->b_un.b_buf, bp->b_bno, bp->b_size); | ||||||||
| break; | break; | ||||||||
| } | } | ||||||||
| } | } | ||||||||
| /* | /* | ||||||||
| * If there are any snapshots, ensure that all the blocks that they | |||||||||
| * care about have been copied, then release the snapshot inodes. | |||||||||
| * These operations need to be done before we rebuild the cylinder | |||||||||
| * groups so that any block allocations are properly recorded. | |||||||||
| * Since all the cylinder group maps have already been copied in | |||||||||
| * the snapshots, no further snapshot copies will need to be done. | |||||||||
Not Done Inline ActionsSame kib: Same | |||||||||
Done Inline ActionsSee above comment on line 458 about blocks copied during snapshot creation. mckusick: See above comment on line 458 about blocks copied during snapshot creation. | |||||||||
| */ | |||||||||
| void | |||||||||
| snapflush(ufs2_daddr_t (*checkblkavail)(long, long)) | |||||||||
| { | |||||||||
| struct bufarea *bp; | |||||||||
| int cnt; | |||||||||
| if (snapcnt > 0) { | |||||||||
| if (debug) | |||||||||
| printf("Check for snapshot copies\n"); | |||||||||
| TAILQ_FOREACH_REVERSE(bp, &bufqueuehd, bufqueue, b_list) | |||||||||
| if ((bp->b_flags & B_DIRTY) != 0) | |||||||||
| copyonwrite(&sblock, bp, checkblkavail); | |||||||||
| for (cnt = 0; cnt < snapcnt; cnt++) | |||||||||
| irelse(&snaplist[cnt]); | |||||||||
| snapcnt = 0; | |||||||||
| } | |||||||||
| } | |||||||||
| /* | |||||||||
| * Journaled soft updates does not maintain cylinder group summary | * Journaled soft updates does not maintain cylinder group summary | ||||||||
| * information during cleanup, so this routine recalculates the summary | * information during cleanup, so this routine recalculates the summary | ||||||||
| * information and updates the superblock summary in preparation for | * information and updates the superblock summary in preparation for | ||||||||
| * writing out the cylinder group. | * writing out the cylinder group. | ||||||||
| */ | */ | ||||||||
| static void | static void | ||||||||
| cg_write(struct bufarea *bp) | cg_write(struct bufarea *bp) | ||||||||
| { | { | ||||||||
| ufs1_daddr_t fragno, cgbno, maxbno; | ufs1_daddr_t fragno, cgbno, maxbno; | ||||||||
| u_int8_t *blksfree; | u_int8_t *blksfree; | ||||||||
| struct csum *csp; | |||||||||
| struct cg *cgp; | struct cg *cgp; | ||||||||
| int blk; | int blk; | ||||||||
| int i; | int i; | ||||||||
| /* | /* | ||||||||
| * Fix the frag and cluster summary. | * Fix the frag and cluster summary. | ||||||||
| */ | */ | ||||||||
| cgp = bp->b_un.b_cg; | cgp = bp->b_un.b_cg; | ||||||||
| Show All 21 Lines | for (cgbno = 0; cgbno < maxbno; cgbno++) { | ||||||||
| for (i = 0; i < sblock.fs_frag; i++) | for (i = 0; i < sblock.fs_frag; i++) | ||||||||
| if (isset(blksfree, fragno + i)) | if (isset(blksfree, fragno + i)) | ||||||||
| cgp->cg_cs.cs_nffree++; | cgp->cg_cs.cs_nffree++; | ||||||||
| } | } | ||||||||
| /* | /* | ||||||||
| * Update the superblock cg summary from our now correct values | * Update the superblock cg summary from our now correct values | ||||||||
| * before writing the block. | * before writing the block. | ||||||||
| */ | */ | ||||||||
| csp = &sblock.fs_cs(&sblock, cgp->cg_cgx); | |||||||||
| sblock.fs_cstotal.cs_ndir += cgp->cg_cs.cs_ndir - csp->cs_ndir; | |||||||||
| sblock.fs_cstotal.cs_nbfree += cgp->cg_cs.cs_nbfree - csp->cs_nbfree; | |||||||||
| sblock.fs_cstotal.cs_nifree += cgp->cg_cs.cs_nifree - csp->cs_nifree; | |||||||||
| sblock.fs_cstotal.cs_nffree += cgp->cg_cs.cs_nffree - csp->cs_nffree; | |||||||||
| sblock.fs_cs(&sblock, cgp->cg_cgx) = cgp->cg_cs; | sblock.fs_cs(&sblock, cgp->cg_cgx) = cgp->cg_cs; | ||||||||
| } | } | ||||||||
| void | void | ||||||||
| rwerror(const char *mesg, ufs2_daddr_t blk) | rwerror(const char *mesg, ufs2_daddr_t blk) | ||||||||
| { | { | ||||||||
| if (bkgrdcheck) | if (bkgrdcheck) | ||||||||
| Show All 35 Lines | ckfini(int markclean) | ||||||||
| if (debug && cachelookups > 0) | if (debug && cachelookups > 0) | ||||||||
| printf("cache with %d buffers missed %d of %d (%d%%)\n", | printf("cache with %d buffers missed %d of %d (%d%%)\n", | ||||||||
| numbufs, cachereads, cachelookups, | numbufs, cachereads, cachelookups, | ||||||||
| (int)(cachereads * 100 / cachelookups)); | (int)(cachereads * 100 / cachelookups)); | ||||||||
| if (fswritefd < 0) { | if (fswritefd < 0) { | ||||||||
| (void)close(fsreadfd); | (void)close(fsreadfd); | ||||||||
| return; | return; | ||||||||
| } | } | ||||||||
| /* | /* | ||||||||
| * To remain idempotent with partial truncations the buffers | * To remain idempotent with partial truncations the buffers | ||||||||
| * must be flushed in this order: | * must be flushed in this order: | ||||||||
| * 1) cylinder groups (bitmaps) | * 1) cylinder groups (bitmaps) | ||||||||
| * 2) indirect, directory, external attribute, and data blocks | * 2) indirect, directory, external attribute, and data blocks | ||||||||
| * 3) inode blocks | * 3) inode blocks | ||||||||
| * 4) superblock | * 4) superblock | ||||||||
| * This ordering preserves access to the modified pointers | * This ordering preserves access to the modified pointers | ||||||||
| Show All 26 Lines | ckfini(int markclean) | ||||||||
| } | } | ||||||||
| TAILQ_FOREACH_REVERSE_SAFE(bp, &bufqueuehd, bufqueue, b_list, nbp) { | TAILQ_FOREACH_REVERSE_SAFE(bp, &bufqueuehd, bufqueue, b_list, nbp) { | ||||||||
| switch (bp->b_type) { | switch (bp->b_type) { | ||||||||
| /* These should not be in the buffer cache list */ | /* These should not be in the buffer cache list */ | ||||||||
| case BT_UNKNOWN: | case BT_UNKNOWN: | ||||||||
| case BT_SUPERBLK: | case BT_SUPERBLK: | ||||||||
| case BT_CYLGRP: | case BT_CYLGRP: | ||||||||
| default: | default: | ||||||||
| prtbuf("ckfini: improper buffer type on cache list",bp); | prtbuf(bp,"ckfini: improper buffer type on cache list"); | ||||||||
| continue; | continue; | ||||||||
| /* These are the ones to flush in this step */ | /* These are the ones to flush in this step */ | ||||||||
| case BT_EMPTY: | |||||||||
| if (bp->b_bno >= 0) | |||||||||
| pfatal("Unused BT_EMPTY buffer for block %jd\n", | |||||||||
| (intmax_t)bp->b_bno); | |||||||||
| /* FALLTHROUGH */ | |||||||||
| case BT_LEVEL1: | case BT_LEVEL1: | ||||||||
| case BT_LEVEL2: | case BT_LEVEL2: | ||||||||
| case BT_LEVEL3: | case BT_LEVEL3: | ||||||||
| case BT_EXTATTR: | case BT_EXTATTR: | ||||||||
| case BT_DIRDATA: | case BT_DIRDATA: | ||||||||
| case BT_DATA: | case BT_DATA: | ||||||||
| break; | break; | ||||||||
| /* These are the ones to flush in the next step */ | /* These are the ones to flush in the next step */ | ||||||||
| case BT_INODES: | case BT_INODES: | ||||||||
| continue; | continue; | ||||||||
| } | } | ||||||||
| if (debug && bp->b_refcnt != 0) { | if (debug && bp->b_refcnt != 0) | ||||||||
| prtbuf("ckfini: clearing in-use buffer", bp); | prtbuf(bp, "ckfini: clearing in-use buffer"); | ||||||||
| pfatal("ckfini: clearing in-use buffer\n"); | |||||||||
| } | |||||||||
| TAILQ_REMOVE(&bufqueuehd, bp, b_list); | TAILQ_REMOVE(&bufqueuehd, bp, b_list); | ||||||||
| LIST_REMOVE(bp, b_hash); | |||||||||
| cnt++; | cnt++; | ||||||||
| flush(fswritefd, bp); | flush(fswritefd, bp); | ||||||||
| free(bp->b_un.b_buf); | free(bp->b_un.b_buf); | ||||||||
| free((char *)bp); | free((char *)bp); | ||||||||
| } | } | ||||||||
| /* Step 3: inode blocks */ | /* Step 3: inode blocks */ | ||||||||
| if (debug) | if (debug) | ||||||||
| printf("Flush inode blocks\n"); | printf("Flush inode blocks\n"); | ||||||||
| if (icachebp != NULL) { | if (icachebp != NULL) { | ||||||||
| brelse(icachebp); | brelse(icachebp); | ||||||||
| icachebp = NULL; | icachebp = NULL; | ||||||||
| } | } | ||||||||
| TAILQ_FOREACH_REVERSE_SAFE(bp, &bufqueuehd, bufqueue, b_list, nbp) { | TAILQ_FOREACH_REVERSE_SAFE(bp, &bufqueuehd, bufqueue, b_list, nbp) { | ||||||||
| if (debug && bp->b_refcnt != 0) { | if (debug && bp->b_refcnt != 0) | ||||||||
| prtbuf("ckfini: clearing in-use buffer", bp); | prtbuf(bp, "ckfini: clearing in-use buffer"); | ||||||||
| pfatal("ckfini: clearing in-use buffer\n"); | |||||||||
| } | |||||||||
| TAILQ_REMOVE(&bufqueuehd, bp, b_list); | TAILQ_REMOVE(&bufqueuehd, bp, b_list); | ||||||||
| LIST_REMOVE(bp, b_hash); | |||||||||
| cnt++; | cnt++; | ||||||||
| flush(fswritefd, bp); | flush(fswritefd, bp); | ||||||||
| free(bp->b_un.b_buf); | free(bp->b_un.b_buf); | ||||||||
| free((char *)bp); | free((char *)bp); | ||||||||
| } | } | ||||||||
| if (numbufs != cnt) | if (numbufs != cnt) | ||||||||
| errx(EEXIT, "panic: lost %d buffers", numbufs - cnt); | errx(EEXIT, "panic: lost %d buffers", numbufs - cnt); | ||||||||
| /* Step 4: superblock */ | /* Step 4: superblock */ | ||||||||
| ▲ Show 20 Lines • Show All 363 Lines • ▼ Show 20 Lines | check_cgmagic(int cg, struct bufarea *cgbp, int request_rebuild) | ||||||||
| cgdirty(cgbp); | cgdirty(cgbp); | ||||||||
| return (0); | return (0); | ||||||||
| } | } | ||||||||
| /* | /* | ||||||||
| * allocate a data block with the specified number of fragments | * allocate a data block with the specified number of fragments | ||||||||
| */ | */ | ||||||||
| ufs2_daddr_t | ufs2_daddr_t | ||||||||
| allocblk(long frags) | allocblk(long startcg, long frags, | ||||||||
| ufs2_daddr_t (*checkblkavail)(ufs2_daddr_t blkno, long frags)) | |||||||||
| { | { | ||||||||
| int i, j, k, cg, baseblk; | ufs2_daddr_t blkno, newblk; | ||||||||
| struct bufarea *cgbp; | |||||||||
| struct cg *cgp; | |||||||||
| if (sujrecovery && checkblkavail == std_checkblkavail) { | |||||||||
| pfatal("allocblk: std_checkblkavail used for SUJ recovery\n"); | |||||||||
| return (0); | |||||||||
| } | |||||||||
| if (frags <= 0 || frags > sblock.fs_frag) | if (frags <= 0 || frags > sblock.fs_frag) | ||||||||
| return (0); | return (0); | ||||||||
| for (i = 0; i < maxfsblock - sblock.fs_frag; i += sblock.fs_frag) { | for (blkno = cgdata(&sblock, startcg); | ||||||||
| blkno < maxfsblock - sblock.fs_frag; | |||||||||
| blkno += sblock.fs_frag) { | |||||||||
| if ((newblk = (*checkblkavail)(blkno, frags)) == 0) | |||||||||
| continue; | |||||||||
| if (newblk > 0) | |||||||||
| return (newblk); | |||||||||
| if (newblk < 0) | |||||||||
| blkno = -newblk; | |||||||||
| } | |||||||||
| for (blkno = cgdata(&sblock, 0); | |||||||||
| blkno < cgbase(&sblock, startcg) - sblock.fs_frag; | |||||||||
| blkno += sblock.fs_frag) { | |||||||||
| if ((newblk = (*checkblkavail)(blkno, frags)) == 0) | |||||||||
| continue; | |||||||||
| if (newblk > 0) | |||||||||
| return (newblk); | |||||||||
| if (newblk < 0) | |||||||||
| blkno = -newblk; | |||||||||
| } | |||||||||
| return (0); | |||||||||
| } | |||||||||
| ufs2_daddr_t | |||||||||
| std_checkblkavail(blkno, frags) | |||||||||
| ufs2_daddr_t blkno; | |||||||||
| long frags; | |||||||||
| { | |||||||||
| struct bufarea *cgbp; | |||||||||
| struct cg *cgp; | |||||||||
| ufs2_daddr_t j, k, baseblk; | |||||||||
| long cg; | |||||||||
| for (j = 0; j <= sblock.fs_frag - frags; j++) { | for (j = 0; j <= sblock.fs_frag - frags; j++) { | ||||||||
| if (testbmap(i + j)) | if (testbmap(blkno + j)) | ||||||||
| continue; | continue; | ||||||||
| for (k = 1; k < frags; k++) | for (k = 1; k < frags; k++) | ||||||||
| if (testbmap(i + j + k)) | if (testbmap(blkno + j + k)) | ||||||||
| break; | break; | ||||||||
| if (k < frags) { | if (k < frags) { | ||||||||
| j += k; | j += k; | ||||||||
| continue; | continue; | ||||||||
| } | } | ||||||||
| cg = dtog(&sblock, i + j); | cg = dtog(&sblock, blkno + j); | ||||||||
| cgbp = cglookup(cg); | cgbp = cglookup(cg); | ||||||||
| cgp = cgbp->b_un.b_cg; | cgp = cgbp->b_un.b_cg; | ||||||||
| if (!check_cgmagic(cg, cgbp, 0)) { | if (!check_cgmagic(cg, cgbp, 0)) | ||||||||
| i = (cg + 1) * sblock.fs_fpg - sblock.fs_frag; | return (-((cg + 1) * sblock.fs_fpg - sblock.fs_frag)); | ||||||||
| continue; | baseblk = dtogd(&sblock, blkno + j); | ||||||||
| } | |||||||||
| baseblk = dtogd(&sblock, i + j); | |||||||||
| for (k = 0; k < frags; k++) { | for (k = 0; k < frags; k++) { | ||||||||
| setbmap(i + j + k); | setbmap(blkno + j + k); | ||||||||
| clrbit(cg_blksfree(cgp), baseblk + k); | clrbit(cg_blksfree(cgp), baseblk + k); | ||||||||
| } | } | ||||||||
| n_blks += frags; | n_blks += frags; | ||||||||
| if (frags == sblock.fs_frag) | if (frags == sblock.fs_frag) | ||||||||
| cgp->cg_cs.cs_nbfree--; | cgp->cg_cs.cs_nbfree--; | ||||||||
| else | else | ||||||||
| cgp->cg_cs.cs_nffree -= frags; | cgp->cg_cs.cs_nffree -= frags; | ||||||||
| cgdirty(cgbp); | cgdirty(cgbp); | ||||||||
| return (i + j); | return (blkno + j); | ||||||||
| } | } | ||||||||
| } | |||||||||
| return (0); | return (0); | ||||||||
| } | } | ||||||||
| /* | /* | ||||||||
| * Slow down IO so as to leave some disk bandwidth for other processes | * Slow down IO so as to leave some disk bandwidth for other processes | ||||||||
| */ | */ | ||||||||
| void | void | ||||||||
| slowio_start() | slowio_start() | ||||||||
| ▲ Show 20 Lines • Show All 155 Lines • ▼ Show 20 Lines | dofix(struct inodesc *idesc, const char *msg) | ||||||||
| return (0); | return (0); | ||||||||
| } | } | ||||||||
| #include <stdarg.h> | #include <stdarg.h> | ||||||||
| /* | /* | ||||||||
| * Print details about a buffer. | * Print details about a buffer. | ||||||||
| */ | */ | ||||||||
| static void | void | ||||||||
| prtbuf(const char *msg, struct bufarea *bp) | prtbuf(struct bufarea *bp, const char *fmt, ...) | ||||||||
| { | { | ||||||||
| va_list ap; | |||||||||
| printf("%s: bp %p, type %s, bno %jd, size %d, refcnt %d, flags %s, " | va_start(ap, fmt); | ||||||||
| "index %jd\n", msg, bp, BT_BUFTYPE(bp->b_type), | if (preen) | ||||||||
| (intmax_t) bp->b_bno, bp->b_size, bp->b_refcnt, | (void)fprintf(stdout, "%s: ", cdevname); | ||||||||
| bp->b_flags & B_DIRTY ? "dirty" : "clean", (intmax_t) bp->b_index); | (void)vfprintf(stdout, fmt, ap); | ||||||||
| va_end(ap); | |||||||||
| printf(": bp %p, type %s, bno %jd, size %d, refcnt %d, flags %s, " | |||||||||
| "index %jd\n", bp, BT_BUFTYPE(bp->b_type), (intmax_t) bp->b_bno, | |||||||||
| bp->b_size, bp->b_refcnt, bp->b_flags & B_DIRTY ? "dirty" : "clean", | |||||||||
| (intmax_t) bp->b_index); | |||||||||
| } | } | ||||||||
| /* | /* | ||||||||
| * An unexpected inconsistency occurred. | * An unexpected inconsistency occurred. | ||||||||
| * Die if preening or file system is running with soft dependency protocol, | * Die if preening or file system is running with soft dependency protocol, | ||||||||
| * otherwise just print message and continue. | * otherwise just print message and continue. | ||||||||
| */ | */ | ||||||||
| void | void | ||||||||
| ▲ Show 20 Lines • Show All 74 Lines • Show Last 20 Lines | |||||||||