diff --git a/sbin/fsck_ffs/dir.c b/sbin/fsck_ffs/dir.c --- a/sbin/fsck_ffs/dir.c +++ b/sbin/fsck_ffs/dir.c @@ -679,7 +679,7 @@ struct bufarea *bp, *nbp; struct inodesc idesc; union dinode *dp; - int indiralloced; + long cg, indiralloced; char *cp; nbp = NULL; @@ -687,6 +687,7 @@ pwarn("NO SPACE LEFT IN %s", name); if (!preen && reply("EXPAND") == 0) return (0); + cg = ino_to_cg(&sblock, ip->i_number); dp = ip->i_dp; filesize = DIP(dp, di_size); lastlbn = lblkno(&sblock, filesize); @@ -705,7 +706,7 @@ bp = getdirblk(oldblk, lastlbnsize); if (bp->b_errs) goto bad; - if ((newblk = allocblk(sblock.fs_frag)) == 0) + if ((newblk = allocblk(cg, sblock.fs_frag)) == 0) goto bad; nbp = getdatablk(newblk, sblock.fs_bsize, BT_DIRDATA); if (nbp->b_errs) @@ -731,7 +732,7 @@ printf(" (EXPANDED)\n"); return (1); } - if ((newblk = allocblk(sblock.fs_frag)) == 0) + if ((newblk = allocblk(cg, sblock.fs_frag)) == 0) goto bad; bp = getdirblk(newblk, sblock.fs_bsize); if (bp->b_errs) @@ -749,7 +750,7 @@ * Allocate indirect block if needed. */ if ((indirblk = DIP(dp, di_ib[0])) == 0) { - if ((indirblk = allocblk(sblock.fs_frag)) == 0) + if ((indirblk = allocblk(cg, sblock.fs_frag)) == 0) goto bad; indiralloced = 1; } diff --git a/sbin/fsck_ffs/ea.c b/sbin/fsck_ffs/ea.c --- a/sbin/fsck_ffs/ea.c +++ b/sbin/fsck_ffs/ea.c @@ -74,8 +74,10 @@ blksiz = sblock.fs_fsize; else blksiz = sblock.fs_bsize; - printf("blksiz = %ju\n", (intmax_t)blksiz); bp = getdatablk(dp->di_extb[0], blksiz, BT_EXTATTR); + if (bp->b_errs) + return (STOP); + printf("blksiz = %ju\n", (intmax_t)blksiz); cp = (u_char *)bp->b_un.b_buf; for (n = 0; n < blksiz; n++) { printf("%02x", cp[n]); diff --git a/sbin/fsck_ffs/fsck.h b/sbin/fsck_ffs/fsck.h --- a/sbin/fsck_ffs/fsck.h +++ b/sbin/fsck_ffs/fsck.h @@ -200,8 +200,7 @@ #define BT_INODES 7 /* Buffer holds inodes */ #define BT_DIRDATA 8 /* Buffer holds directory data */ #define BT_DATA 9 /* Buffer holds user data */ -#define BT_EMPTY 10 /* Buffer allocated but not filled */ -#define BT_NUMBUFTYPES 11 +#define BT_NUMBUFTYPES 10 #define BT_NAMES { \ "unknown", \ "Superblock", \ @@ -212,8 +211,7 @@ "External Attribute", \ "Inode Block", \ "Directory Contents", \ - "User Data", \ - "Allocated but not filled" } + "User Data" } extern char *buftype[]; #define BT_BUFTYPE(type) \ type < BT_NUMBUFTYPES ? buftype[type] : buftype[BT_UNKNOWN] @@ -234,7 +232,7 @@ (bp)->b_flags |= B_DIRTY; \ } while (0) #define initbarea(bp, type) do { \ - (bp)->b_bno = (ufs2_daddr_t)-1; \ + (bp)->b_bno = (ufs2_daddr_t)-4; \ (bp)->b_size = 0; \ (bp)->b_errs = 0; \ (bp)->b_flags = 0; \ @@ -347,6 +345,7 @@ extern char *cdevname; /* name of device being checked */ extern char ckclean; /* only do work if not cleanly unmounted */ extern int ckhashadd; /* check hashes to be added */ +extern char *copybuf; /* buffer to copy snapshot blocks */ extern int cvtlevel; /* convert to newer file system format */ extern long dev_bsize; /* computed value of DEV_BSIZE */ extern u_int real_dev_bsize; /* actual disk sector size, not overridden */ @@ -371,6 +370,8 @@ extern int returntosingle; /* 1 => return to single user mode on exit */ extern long secsize; /* actual disk sector size */ extern char skipclean; /* skip clean file systems if preening */ +extern int snapcnt; /* number of active snapshots */ +extern struct inode snaplist[FSMAXSNAP + 1]; /* list of active snapshots */ extern char snapname[BUFSIZ]; /* when doing snapshots, the name of the file */ extern int sujrecovery; /* 1 => doing check using the journal */ extern int surrender; /* Give up if reads fail */ @@ -441,7 +442,7 @@ void adjust(struct inodesc *, int lcnt); void alarmhandler(int sig); -ufs2_daddr_t allocblk(long frags); +ufs2_daddr_t allocblk(long cg, long frags); ino_t allocdir(ino_t parent, ino_t request, int mode); ino_t allocino(ino_t request, int type); void blkerror(ino_t ino, const char *type, ufs2_daddr_t blk); @@ -464,6 +465,7 @@ int ckinode(union dinode *dp, struct inodesc *); void clri(struct inodesc *, const char *type, int flag); int clearentry(struct inodesc *); +void copyonwrite(struct fs *, struct bufarea *); void direrror(ino_t ino, const char *errmesg); int dirscan(struct inodesc *); int dofix(struct inodesc *, const char *msg); @@ -505,6 +507,7 @@ void pass5(void); void pfatal(const char *fmt, ...) __printflike(1, 2); void propagate(void); +void prtbuf(const char *, struct bufarea *); void prtinode(struct inode *); void pwarn(const char *fmt, ...) __printflike(1, 2); int readsb(void); @@ -513,6 +516,9 @@ void sblock_init(void); void setinodebuf(int, ino_t); int setup(char *dev); +int snapblkfree(struct fs *, ufs2_daddr_t, long, ino_t); +void snapremove(ino_t); +void snapflush(void); void gjournal_check(const char *filesys); int suj_check(const char *filesys); void update_maps(struct cg *, struct cg*, int); diff --git a/sbin/fsck_ffs/fsutil.c b/sbin/fsck_ffs/fsutil.c --- a/sbin/fsck_ffs/fsutil.c +++ b/sbin/fsck_ffs/fsutil.c @@ -71,7 +71,7 @@ static void slowio_start(void); static void slowio_end(void); static void printIOstats(void); -static void prtbuf(const char *, struct bufarea *); +static ufs2_daddr_t checkblkavail(ufs2_daddr_t blkno, long frags); static long diskreads, totaldiskreads, totalreads; /* Disk cache statistics */ static struct timespec startpass, finishpass; @@ -79,6 +79,7 @@ int slowio_delay_usec = 10000; /* Initial IO delay for background fsck */ int slowio_pollcnt; static struct bufarea cgblk; /* backup buffer for cylinder group blocks */ +static struct bufarea failedbuf; /* returned by failed getdatablk() */ static TAILQ_HEAD(bufqueue, bufarea) bufqueuehd; /* head of buffer cache LRU */ static LIST_HEAD(bufhash, bufarea) bufhashhd[HASHSIZE]; /* buffer hash list */ static int numbufs; /* size of buffer cache */ @@ -187,6 +188,9 @@ { int i; + initbarea(&failedbuf, BT_UNKNOWN); + failedbuf.b_errs = -1; + failedbuf.b_un.b_buf = NULL; if ((cgblk.b_un.b_buf = Malloc((unsigned int)sblock.fs_bsize)) == NULL) errx(EEXIT, "Initial malloc(%d) failed", sblock.fs_bsize); initbarea(&cgblk, BT_CYLGRP); @@ -309,11 +313,14 @@ struct bufhash *bhdp; cachelookups++; - /* If out of range, return empty buffer with b_err == -1 */ - if (type != BT_INODES && chkrange(blkno, size / sblock.fs_fsize)) { - blkno = -1; - type = BT_EMPTY; - } + /* + * If out of range, return empty buffer with b_err == -1 + * + * Skip check for inodes because chkrange() considers + * metadata areas invalid to write data. + */ + if (type != BT_INODES && chkrange(blkno, size / sblock.fs_fsize)) + return(&failedbuf); bhdp = &bufhashhd[HASH(blkno)]; LIST_FOREACH(bp, bhdp, b_hash) if (bp->b_bno == fsbtodb(&sblock, blkno)) { @@ -401,11 +408,7 @@ readcnt[bp->b_type]++; clock_gettime(CLOCK_REALTIME_PRECISE, &start); } - if (bp->b_type != BT_EMPTY) - bp->b_errs = - blread(fsreadfd, bp->b_un.b_buf, dblk, size); - else - bp->b_errs = -1; + bp->b_errs = blread(fsreadfd, bp->b_un.b_buf, dblk, size); if (debug) { clock_gettime(CLOCK_REALTIME_PRECISE, &finish); timespecsub(&finish, &start, &finish); @@ -451,10 +454,18 @@ if (bp != &sblk) pfatal("BUFFER %p DOES NOT MATCH SBLK %p\n", bp, &sblk); + /* + * Superblocks are always pre-copied so we do not need + * to check them for copy-on-write. + */ if (sbput(fd, bp->b_un.b_fs, 0) == 0) fsmodified = 1; break; case BT_CYLGRP: + /* + * Cylinder groups are always pre-copied so we do not + * need to check them for copy-on-write. + */ if (sujrecovery) cg_write(bp); if (cgput(fswritefd, &sblock, bp->b_un.b_cg) == 0) @@ -483,11 +494,38 @@ } /* FALLTHROUGH */ default: + copyonwrite(&sblock, bp); blwrite(fd, bp->b_un.b_buf, bp->b_bno, bp->b_size); break; } } +/* + * If there are any snapshots, ensure that all the blocks that they + * care about have been copied, then release the snapshot inodes. + * These operations need to be done before we rebuild the cylinder + * groups so that any block allocations are properly recorded. + * Since all the cylinder group maps have already been copied in + * the snapshots, no further snapshot copies will need to be done. + */ +void +snapflush(void) +{ + struct bufarea *bp; + int cnt; + + if (snapcnt > 0) { + if (debug) + printf("Check for snapshot copies\n"); + TAILQ_FOREACH_REVERSE(bp, &bufqueuehd, bufqueue, b_list) + if ((bp->b_flags & B_DIRTY) != 0) + copyonwrite(&sblock, bp); + for (cnt = 0; cnt < snapcnt; cnt++) + irelse(&snaplist[cnt]); + snapcnt = 0; + } +} + /* * Journaled soft updates does not maintain cylinder group summary * information during cleanup, so this routine recalculates the summary @@ -503,6 +541,7 @@ int blk; int i; + snapflush(); /* * Fix the frag and cluster summary. */ @@ -587,6 +626,7 @@ (void)close(fsreadfd); return; } + /* * To remain idempotent with partial truncations the buffers * must be flushed in this order: @@ -632,11 +672,6 @@ prtbuf("ckfini: improper buffer type on cache list",bp); continue; /* These are the ones to flush in this step */ - case BT_EMPTY: - if (bp->b_bno >= 0) - pfatal("Unused BT_EMPTY buffer for block %jd\n", - (intmax_t)bp->b_bno); - /* FALLTHROUGH */ case BT_LEVEL1: case BT_LEVEL2: case BT_LEVEL3: @@ -1050,45 +1085,72 @@ * allocate a data block with the specified number of fragments */ ufs2_daddr_t -allocblk(long frags) +allocblk(long startcg, long frags) { - int i, j, k, cg, baseblk; - struct bufarea *cgbp; - struct cg *cgp; + ufs2_daddr_t blkno, newblk; if (frags <= 0 || frags > sblock.fs_frag) return (0); - for (i = 0; i < maxfsblock - sblock.fs_frag; i += sblock.fs_frag) { - for (j = 0; j <= sblock.fs_frag - frags; j++) { - if (testbmap(i + j)) - continue; - for (k = 1; k < frags; k++) - if (testbmap(i + j + k)) - break; - if (k < frags) { - j += k; - continue; - } - cg = dtog(&sblock, i + j); - cgbp = cglookup(cg); - cgp = cgbp->b_un.b_cg; - if (!check_cgmagic(cg, cgbp, 0)) { - i = (cg + 1) * sblock.fs_fpg - sblock.fs_frag; - continue; - } - baseblk = dtogd(&sblock, i + j); - for (k = 0; k < frags; k++) { - setbmap(i + j + k); - clrbit(cg_blksfree(cgp), baseblk + k); - } - n_blks += frags; - if (frags == sblock.fs_frag) - cgp->cg_cs.cs_nbfree--; - else - cgp->cg_cs.cs_nffree -= frags; - cgdirty(cgbp); - return (i + j); + for (blkno = cgdata(&sblock, startcg); + blkno < maxfsblock - sblock.fs_frag; + blkno += sblock.fs_frag) { + if ((newblk = checkblkavail(blkno, frags)) == 0) + continue; + if (newblk > 0) + return (newblk); + if (newblk < 0) + blkno = -newblk; + } + for (blkno = cgdata(&sblock, 0); + blkno < cgbase(&sblock, startcg) - sblock.fs_frag; + blkno += sblock.fs_frag) { + if ((newblk = checkblkavail(blkno, frags)) == 0) + continue; + if (newblk > 0) + return (newblk); + if (newblk < 0) + blkno = -newblk; + } + return (0); +} + +static ufs2_daddr_t +checkblkavail(blkno, frags) + ufs2_daddr_t blkno; + long frags; +{ + struct bufarea *cgbp; + struct cg *cgp; + ufs2_daddr_t j, k, baseblk; + long cg; + + for (j = 0; j <= sblock.fs_frag - frags; j++) { + if (testbmap(blkno + j)) + continue; + for (k = 1; k < frags; k++) + if (testbmap(blkno + j + k)) + break; + if (k < frags) { + j += k; + continue; + } + cg = dtog(&sblock, blkno + j); + cgbp = cglookup(cg); + cgp = cgbp->b_un.b_cg; + if (!check_cgmagic(cg, cgbp, 0)) + return (-((cg + 1) * sblock.fs_fpg - sblock.fs_frag)); + baseblk = dtogd(&sblock, blkno + j); + for (k = 0; k < frags; k++) { + setbmap(blkno + j + k); + clrbit(cg_blksfree(cgp), baseblk + k); } + n_blks += frags; + if (frags == sblock.fs_frag) + cgp->cg_cs.cs_nbfree--; + else + cgp->cg_cs.cs_nffree -= frags; + cgdirty(cgbp); + return (blkno + j); } return (0); } @@ -1261,7 +1323,7 @@ /* * Print details about a buffer. */ -static void +void prtbuf(const char *msg, struct bufarea *bp) { diff --git a/sbin/fsck_ffs/inode.c b/sbin/fsck_ffs/inode.c --- a/sbin/fsck_ffs/inode.c +++ b/sbin/fsck_ffs/inode.c @@ -38,6 +38,7 @@ __FBSDID("$FreeBSD$"); #include +#include #include #include @@ -58,6 +59,7 @@ static int iblock(struct inodesc *, off_t isize, int type); static ufs2_daddr_t indir_blkatoff(ufs2_daddr_t, ino_t, ufs_lbn_t, ufs_lbn_t, struct bufarea **); +static int snapclean(struct inodesc *idesc); int ckinode(union dinode *dp, struct inodesc *idesc) @@ -378,8 +380,12 @@ int c; if (cnt <= 0 || blk <= 0 || blk > maxfsblock || - cnt - 1 > maxfsblock - blk) + cnt - 1 > maxfsblock - blk) { + if (debug) + printf("out of range: blk %ld, offset %i, size %d\n", + (long)blk, (int)fragnum(&sblock, blk), cnt); return (1); + } if (cnt > sblock.fs_frag || fragnum(&sblock, blk) + cnt > sblock.fs_frag) { if (debug) @@ -651,10 +657,17 @@ { struct dups *dlp; ufs2_daddr_t blkno; - long nfrags, res; + long size, nfrags, res; res = KEEPON; blkno = idesc->id_blkno; + if (idesc->id_type == SNAP) { + pfatal("clearing a snapshot dinode\n"); + return (STOP); + } + size = lfragtosize(&sblock, idesc->id_numfrags); + if (snapblkfree(&sblock, blkno, size, idesc->id_number)) + return (res); for (nfrags = idesc->id_numfrags; nfrags > 0; blkno++, nfrags--) { if (chkrange(blkno, 1)) { res = SKIP; @@ -677,6 +690,297 @@ return (res); } +/* + * Prepare a snapshot file for being removed. + */ +void +snapremove(ino_t inum) +{ + struct inodesc idesc; + struct inode ip; + int i; + + for (i = 0; i < snapcnt; i++) + if (snaplist[i].i_number == inum) + break; + ip = snaplist[i]; + if (i == snapcnt || (DIP(ip.i_dp, di_flags) & SF_SNAPSHOT) == 0) { + printf("snapremove: inode %jd is not a snapshot\n", + (intmax_t)inum); + return; + } + /* + * Remove from active snapshot list. + */ + for (i++; i < FSMAXSNAP; i++) { + if (sblock.fs_snapinum[i] == 0) + break; + snaplist[i - 1] = snaplist[i]; + sblock.fs_snapinum[i - 1] = sblock.fs_snapinum[i]; + } + sblock.fs_snapinum[i - 1] = 0; + bzero(&snaplist[i - 1], sizeof(struct inode)); + snapcnt--; + idesc.id_type = SNAP; + idesc.id_func = snapclean; + idesc.id_number = inum; + (void)ckinode(ip.i_dp, &idesc); + DIP_SET(ip.i_dp, di_flags, DIP(ip.i_dp, di_flags) & ~SF_SNAPSHOT); + inodirty(&ip); + irelse(&ip); +} + +static int +snapclean(struct inodesc *idesc) +{ + ufs2_daddr_t blkno; + struct bufarea *bp; + union dinode *dp; + + blkno = idesc->id_blkno; + if (blkno == 0) + return (KEEPON); + + bp = idesc->id_bp; + dp = idesc->id_dp; + if (blkno == BLK_NOCOPY || blkno == BLK_SNAP || + (blkno == blkstofrags(&sblock, idesc->id_lbn) && + snapblkfree(&sblock, blkno, sblock.fs_bsize, idesc->id_number))) { + if (idesc->id_lbn < UFS_NDADDR) + DIP_SET(dp, di_db[idesc->id_lbn], 0); + else + IBLK_SET(bp, bp->b_index, 0); + if (blkno == blkstofrags(&sblock, idesc->id_lbn)) + DIP_SET(dp, di_blocks, DIP(dp, di_blocks) - + btodb(sblock.fs_bsize)); + dirty(bp); + } + return (KEEPON); +} + +/* + * Notification that a block is being freed. Return zero if the free + * should be allowed to proceed. Return non-zero if the snapshot file + * wants to claim the block. The block will be claimed if it is an + * uncopied part of one of the snapshots. It will be freed if it is + * either a BLK_NOCOPY or has already been copied in all of the snapshots. + * If a fragment is being freed, then all snapshots that care about + * it must make a copy since a snapshot file can only claim full sized + * blocks. Note that if more than one snapshot file maps the block, + * we can pick one at random to claim it. Since none of the snapshots + * can change, we are assurred that they will all see the same unmodified + * image. When deleting a snapshot file (see ino_trunc above), we + * must push any of these claimed blocks to one of the other snapshots + * that maps it. These claimed blocks are easily identified as they will + * have a block number equal to their logical block number within the + * snapshot. A copied block can never have this property because they + * must always have been allocated from a BLK_NOCOPY location. + */ +int +snapblkfree(fs, bno, size, inum) + struct fs *fs; + ufs2_daddr_t bno; + long size; + ino_t inum; +{ + union dinode *dp; + struct inode ip; + struct bufarea *snapbp; + ufs_lbn_t lbn; + ufs2_daddr_t blkno, relblkno; + int i, frags, claimedblk, copydone; + + /* If no snapshots, nothing to do */ + if (snapcnt == 0) + return (0); + if (debug) + printf("snapblkfree: in ino %ld free blkno %ld, size %ld\n", + inum, bno, size); + relblkno = blknum(fs, bno); + lbn = fragstoblks(fs, relblkno); + /* Direct blocks are always pre-copied */ + if (lbn < UFS_NDADDR) + return(0); + copydone = 0; + claimedblk = 0; + for (i = 0; i < snapcnt; i++) { + /* + * Lookup block being freed. + */ + ip = snaplist[i]; + dp = ip.i_dp; + blkno = ino_blkatoff(dp, inum != 0 ? inum : ip.i_number, + lbn, &frags, &snapbp); + /* + * Check to see if block needs to be copied. + */ + if (blkno == 0) { + /* + * A block that we map is being freed. If it has not + * been claimed yet, we will claim or copy it (below). + */ + claimedblk = 1; + } else if (blkno == BLK_SNAP) { + /* + * No previous snapshot claimed the block, + * so it will be freed and become a BLK_NOCOPY + * (don't care) for us. + */ + if (claimedblk) + pfatal("snapblkfree: inconsistent block type"); + IBLK_SET(snapbp, snapbp->b_index, BLK_NOCOPY); + dirty(snapbp); + brelse(snapbp); + continue; + } else /* BLK_NOCOPY or default */ { + /* + * If the snapshot has already copied the block + * (default), or does not care about the block, + * it is not needed. + */ + brelse(snapbp); + continue; + } + /* + * If this is a full size block, we will just grab it + * and assign it to the snapshot inode. Otherwise we + * will proceed to copy it. See explanation for this + * routine as to why only a single snapshot needs to + * claim this block. + */ + if (size == fs->fs_bsize) { + if (debug) + printf("Grabonremove snapshot %ju lbn %jd " + "from inum %ju\n", (intmax_t)ip.i_number, + (intmax_t)lbn, (uintmax_t)inum); + IBLK_SET(snapbp, snapbp->b_index, relblkno); + dirty(snapbp); + brelse(snapbp); + DIP_SET(dp, di_blocks, + DIP(dp, di_blocks) + btodb(size)); + inodirty(&ip); + return (1); + } + + /* First time through, read the contents of the old block. */ + if (copydone == 0) { + copydone = 1; + if (blread(fsreadfd, copybuf, fsbtodb(fs, relblkno), + fs->fs_bsize) != 0) { + pfatal("Could not read snapshot %ju block " + "%jd\n", (intmax_t)ip.i_number, + (intmax_t)relblkno); + continue; + } + } + /* + * This allocation will never require any additional + * allocations for the snapshot inode. + */ + if ((blkno = allocblk(dtog(fs, relblkno), fs->fs_frag)) == 0) { + pfatal("Could not allocate block for snapshot %ju\n", + (intmax_t)ip.i_number); + continue; + } + if (debug) + printf("Copyonremove: snapino %jd lbn %jd for inum %ju " + "size %ld to blkno %jd\n", (intmax_t)ip.i_number, + (intmax_t)lbn, (uintmax_t)inum, size, + (intmax_t)blkno); + blwrite(fswritefd, copybuf, fsbtodb(fs, blkno), fs->fs_bsize); + IBLK_SET(snapbp, snapbp->b_index, blkno); + dirty(snapbp); + brelse(snapbp); + DIP_SET(dp, di_blocks, + DIP(dp, di_blocks) + btodb(fs->fs_bsize)); + inodirty(&ip); + } + return (0); +} + +/* + * Notification that a block is being written. Return if the block + * is part of a snapshot as snapshots never track other snapshots. + * The block will be copied in all of the snapshots that are tracking + * it and have not yet copied it. + */ +void +copyonwrite(fs, bp) + struct fs *fs; + struct bufarea *bp; +{ + struct inode ip; + union dinode *dp; + struct bufarea *snapbp; + ufs2_daddr_t blkno, copyblkno; + int i, frags, copydone; + ufs_lbn_t lbn; + + /* If no snapshots, nothing to do. */ + if (snapcnt == 0) + return; + if (debug) + prtbuf("copyonwrite: checking buffer", bp); + copyblkno = blknum(fs, dbtofsb(fs, bp->b_bno)); + lbn = fragstoblks(fs, copyblkno); + /* Direct blocks are always pre-copied */ + if (lbn < UFS_NDADDR) + return; + copydone = 0; + for (i = 0; i < snapcnt; i++) { + /* + * Lookup block being freed. + */ + ip = snaplist[i]; + dp = ip.i_dp; + blkno = ino_blkatoff(dp, ip.i_number, lbn, &frags, &snapbp); + /* + * Check to see if block needs to be copied. + */ + if (blkno != 0) { + /* + * A block that we have already copied or don't track. + */ + brelse(snapbp); + continue; + } + /* First time through, read the contents of the old block. */ + if (copydone == 0) { + copydone = 1; + if (blread(fsreadfd, copybuf, fsbtodb(fs, copyblkno), + fs->fs_bsize) != 0) { + pfatal("Could not read snapshot %ju block " + "%jd\n", (intmax_t)ip.i_number, + (intmax_t)copyblkno); + continue; + } + } + /* + * This allocation will never require any additional + * allocations for the snapshot inode. + */ + if ((blkno = allocblk(dtog(fs, copyblkno), fs->fs_frag)) == 0) { + pfatal("Could not allocate block for snapshot %ju\n", + (intmax_t)ip.i_number); + continue; + } + if (debug) { + printf("Copyonwrite: snapino %jd lbn %jd using " + "blkno %ju\n", (intmax_t)ip.i_number, (intmax_t)lbn, + (intmax_t)blkno); + prtbuf(" setting in buffer", snapbp); + } + blwrite(fswritefd, copybuf, fsbtodb(fs, blkno), fs->fs_bsize); + IBLK_SET(snapbp, snapbp->b_index, blkno); + dirty(snapbp); + brelse(snapbp); + DIP_SET(dp, di_blocks, + DIP(dp, di_blocks) + btodb(fs->fs_bsize)); + inodirty(&ip); + } + return; +} + void freeinodebuf(void) { @@ -803,6 +1107,10 @@ printf(" (CLEARED)\n"); n_files--; if (bkgrdflag == 0) { + if (idesc->id_type == SNAP) { + snapremove(idesc->id_number); + idesc->id_type = ADDR; + } (void)ckinode(dp, idesc); inoinfo(idesc->id_number)->ino_state = USTATE; clearinode(dp); @@ -967,7 +1275,7 @@ cgdirty(cgbp); ginode(ino, &ip); dp = ip.i_dp; - DIP_SET(dp, di_db[0], allocblk((long)1)); + DIP_SET(dp, di_db[0], allocblk(ino_to_cg(&sblock, ino), (long)1)); if (DIP(dp, di_db[0]) == 0) { inoinfo(ino)->ino_state = USTATE; irelse(&ip); diff --git a/sbin/fsck_ffs/main.c b/sbin/fsck_ffs/main.c --- a/sbin/fsck_ffs/main.c +++ b/sbin/fsck_ffs/main.c @@ -491,6 +491,7 @@ */ if (preen == 0) printf("** Phase 5 - Check Cyl groups\n"); + snapflush(); pass5(); IOstats("Pass5"); diff --git a/sbin/fsck_ffs/setup.c b/sbin/fsck_ffs/setup.c --- a/sbin/fsck_ffs/setup.c +++ b/sbin/fsck_ffs/setup.c @@ -59,6 +59,9 @@ #include "fsck.h" struct inoinfo **inphead, **inpsort; /* info about all inodes */ +struct inode snaplist[FSMAXSNAP + 1]; /* list of active snapshots */ +int snapcnt; /* number of active snapshots */ +char *copybuf; /* buffer to copy snapshot blocks */ static int sbhashfailed; #define POWEROF2(num) (((num) & ((num) - 1)) == 0) @@ -75,7 +78,8 @@ int setup(char *dev) { - long bmapsize; + long i, bmapsize; + struct inode ip; /* * We are expected to have an open file descriptor and a superblock. @@ -174,6 +178,37 @@ usedsoftdep = 1; else usedsoftdep = 0; + /* + * Collect any snapshot inodes so that we can allow them to + * claim any blocks that we free. The code for doing this is + * imported here and into inode.c from sys/ufs/ffs/ffs_snapshot.c. + */ + for (snapcnt = 0; snapcnt < FSMAXSNAP; snapcnt++) { + if (sblock.fs_snapinum[snapcnt] == 0) + break; + ginode(sblock.fs_snapinum[snapcnt], &ip); + if ((DIP(ip.i_dp, di_mode) & IFMT) == IFREG && + (DIP(ip.i_dp, di_flags) & SF_SNAPSHOT) != 0) { + snaplist[snapcnt] = ip; + continue; + } + printf("Removing non-snapshot inode %ju from snapshot list\n", + (uintmax_t)sblock.fs_snapinum[snapcnt]); + irelse(&ip); + for (i = snapcnt + 1; i < FSMAXSNAP; i++) { + if (sblock.fs_snapinum[i] == 0) + break; + sblock.fs_snapinum[i - 1] = sblock.fs_snapinum[i]; + } + sblock.fs_snapinum[i - 1] = 0; + snapcnt--; + } + if (snapcnt > 0 && copybuf == NULL) { + copybuf = Malloc(sblock.fs_bsize); + if (copybuf == NULL) + errx(EEXIT, "cannot allocate space for snapshot " + "copy buffer"); + } return (1); badsb: diff --git a/sbin/fsck_ffs/suj.c b/sbin/fsck_ffs/suj.c --- a/sbin/fsck_ffs/suj.c +++ b/sbin/fsck_ffs/suj.c @@ -431,6 +431,12 @@ if (debug) printf("Freeing %d frags at blk %jd mask 0x%x\n", frags, bno, mask); + /* + * Check to see if the block needs to be claimed by a snapshot. + * If wanted, the snapshot references it. Otherwise we free it. + */ + if (snapblkfree(fs, bno, lfragtosize(fs, frags), 0)) + return; cg = dtog(fs, bno); sc = cg_lookup(cg); cgp = sc->sc_cgp; @@ -1264,6 +1270,7 @@ if (size > 0) err_suj("Partial truncation of ino %ju snapshot file\n", (uintmax_t)ino); + snapremove(ino); } lastlbn = lblkno(fs, blkroundup(fs, size)); for (i = lastlbn; i < UFS_NDADDR; i++) { @@ -1283,13 +1290,13 @@ /* If we're not freeing any in this indirect range skip it. */ if (lastlbn >= nextlbn) continue; - if (DIP(dp, di_ib[i]) == 0) - continue; - indir_trunc(ino, -lbn - i, DIP(dp, di_ib[i]), lastlbn, dp); - /* If we freed everything in this indirect free the indir. */ - if (lastlbn > lbn) - continue; - blk_free(DIP(dp, di_ib[i]), 0, fs->fs_frag); + if ((bn = DIP(dp, di_ib[i])) == 0) + continue; + indir_trunc(ino, -lbn - i, bn, lastlbn, dp); + /* If we freed everything in this indirect free the indir. */ + if (lastlbn > lbn) + continue; + blk_free(bn, 0, fs->fs_frag); DIP_SET(dp, di_ib[i], 0); } /*