diff --git a/sbin/fsck_ffs/dir.c b/sbin/fsck_ffs/dir.c --- a/sbin/fsck_ffs/dir.c +++ b/sbin/fsck_ffs/dir.c @@ -679,14 +679,17 @@ struct bufarea *bp, *nbp; struct inodesc idesc; union dinode *dp; - int indiralloced; + long cg, indiralloced; char *cp; nbp = NULL; indiralloced = newblk = indirblk = 0; + memset(&idesc, 0, sizeof(struct inodesc)); + idesc.id_type = ADDR; pwarn("NO SPACE LEFT IN %s", name); if (!preen && reply("EXPAND") == 0) return (0); + cg = ino_to_cg(&sblock, ip->i_number); dp = ip->i_dp; filesize = DIP(dp, di_size); lastlbn = lblkno(&sblock, filesize); @@ -705,7 +708,8 @@ bp = getdirblk(oldblk, lastlbnsize); if (bp->b_errs) goto bad; - if ((newblk = allocblk(sblock.fs_frag)) == 0) + newblk = allocblk(cg, sblock.fs_frag, std_checkblkavail); + if (newblk == 0) goto bad; nbp = getdatablk(newblk, sblock.fs_bsize, BT_DIRDATA); if (nbp->b_errs) @@ -724,6 +728,7 @@ memmove(cp, &emptydir, sizeof emptydir); dirty(nbp); brelse(nbp); + binval(bp); idesc.id_blkno = oldblk; idesc.id_numfrags = numfrags(&sblock, lastlbnsize); (void)freeblock(&idesc); @@ -731,7 +736,7 @@ printf(" (EXPANDED)\n"); return (1); } - if ((newblk = allocblk(sblock.fs_frag)) == 0) + if ((newblk = allocblk(cg, sblock.fs_frag, std_checkblkavail)) == 0) goto bad; bp = getdirblk(newblk, sblock.fs_bsize); if (bp->b_errs) @@ -749,8 +754,12 @@ * Allocate indirect block if needed. */ if ((indirblk = DIP(dp, di_ib[0])) == 0) { - if ((indirblk = allocblk(sblock.fs_frag)) == 0) + indirblk = allocblk(cg, sblock.fs_frag, + std_checkblkavail); + if (indirblk == 0) { + binval(bp); goto bad; + } indiralloced = 1; } nbp = getdatablk(indirblk, sblock.fs_bsize, BT_LEVEL1); @@ -774,8 +783,10 @@ return (1); bad: pfatal(" (EXPANSION FAILED)\n"); - if (nbp != NULL) + if (nbp != NULL) { + binval(bp); brelse(nbp); + } if (newblk != 0) { idesc.id_blkno = newblk; idesc.id_numfrags = sblock.fs_frag; diff --git a/sbin/fsck_ffs/fsck.h b/sbin/fsck_ffs/fsck.h --- a/sbin/fsck_ffs/fsck.h +++ b/sbin/fsck_ffs/fsck.h @@ -200,8 +200,7 @@ #define BT_INODES 7 /* Buffer holds inodes */ #define BT_DIRDATA 8 /* Buffer holds directory data */ #define BT_DATA 9 /* Buffer holds user data */ -#define BT_EMPTY 10 /* Buffer allocated but not filled */ -#define BT_NUMBUFTYPES 11 +#define BT_NUMBUFTYPES 10 #define BT_NAMES { \ "unknown", \ "Superblock", \ @@ -212,8 +211,7 @@ "External Attribute", \ "Inode Block", \ "Directory Contents", \ - "User Data", \ - "Allocated but not filled" } + "User Data" } extern char *buftype[]; #define BT_BUFTYPE(type) \ type < BT_NUMBUFTYPES ? buftype[type] : buftype[BT_UNKNOWN] @@ -234,7 +232,7 @@ (bp)->b_flags |= B_DIRTY; \ } while (0) #define initbarea(bp, type) do { \ - (bp)->b_bno = (ufs2_daddr_t)-1; \ + (bp)->b_bno = (ufs2_daddr_t)-4; \ (bp)->b_size = 0; \ (bp)->b_errs = 0; \ (bp)->b_flags = 0; \ @@ -347,6 +345,7 @@ extern char *cdevname; /* name of device being checked */ extern char ckclean; /* only do work if not cleanly unmounted */ extern int ckhashadd; /* check hashes to be added */ +extern char *copybuf; /* buffer to copy snapshot blocks */ extern int cvtlevel; /* convert to newer file system format */ extern long dev_bsize; /* computed value of DEV_BSIZE */ extern u_int real_dev_bsize; /* actual disk sector size, not overridden */ @@ -371,6 +370,8 @@ extern int returntosingle; /* 1 => return to single user mode on exit */ extern long secsize; /* actual disk sector size */ extern char skipclean; /* skip clean file systems if preening */ +extern int snapcnt; /* number of active snapshots */ +extern struct inode snaplist[FSMAXSNAP + 1]; /* list of active snapshots */ extern char snapname[BUFSIZ]; /* when doing snapshots, the name of the file */ extern int sujrecovery; /* 1 => doing check using the journal */ extern int surrender; /* Give up if reads fail */ @@ -441,9 +442,11 @@ void adjust(struct inodesc *, int lcnt); void alarmhandler(int sig); -ufs2_daddr_t allocblk(long frags); +ufs2_daddr_t allocblk(long cg, long frags, ufs2_daddr_t (*checkblkavail) + (ufs2_daddr_t blkno, long frags)); ino_t allocdir(ino_t parent, ino_t request, int mode); ino_t allocino(ino_t request, int type); +void binval(struct bufarea *); void blkerror(ino_t ino, const char *type, ufs2_daddr_t blk); char *blockcheck(char *name); int blread(int fd, char *buf, ufs2_daddr_t blk, long size); @@ -458,12 +461,15 @@ void cgdirty(struct bufarea *); struct bufarea *cglookup(int cg); int changeino(ino_t dir, const char *name, ino_t newnum); +void check_blkcnt(struct inode *ip); int check_cgmagic(int cg, struct bufarea *cgbp, int requestrebuild); int chkrange(ufs2_daddr_t blk, int cnt); void ckfini(int markclean); int ckinode(union dinode *dp, struct inodesc *); void clri(struct inodesc *, const char *type, int flag); int clearentry(struct inodesc *); +void copyonwrite(struct fs *, struct bufarea *, + ufs2_daddr_t (*checkblkavail)(long, long)); void direrror(ino_t ino, const char *errmesg); int dirscan(struct inodesc *); int dofix(struct inodesc *, const char *msg); @@ -476,6 +482,7 @@ int freeblock(struct inodesc *); void freeino(ino_t ino); void freeinodebuf(void); +void fsckinit(void); void fsutilinit(void); int ftypeok(union dinode *dp); void getblk(struct bufarea *bp, ufs2_daddr_t blk, long size); @@ -484,6 +491,7 @@ union dinode *getnextinode(ino_t inumber, int rebuildcg); void getpathname(char *namebuf, ino_t curdir, ino_t ino); void ginode(ino_t, struct inode *); +void gjournal_check(const char *filesys); void infohandler(int sig); void irelse(struct inode *); ufs2_daddr_t ino_blkatoff(union dinode *, ino_t, ufs_lbn_t, int *, @@ -505,6 +513,7 @@ void pass5(void); void pfatal(const char *fmt, ...) __printflike(1, 2); void propagate(void); +void prtbuf(struct bufarea *, const char *, ...) __printflike(2, 3); void prtinode(struct inode *); void pwarn(const char *fmt, ...) __printflike(1, 2); int readsb(void); @@ -513,9 +522,13 @@ void sblock_init(void); void setinodebuf(int, ino_t); int setup(char *dev); -void gjournal_check(const char *filesys); +int snapblkfree(struct fs *, ufs2_daddr_t, long, ino_t, + ufs2_daddr_t (*)(ufs2_daddr_t, long)); +void snapremove(ino_t); +void snapflush(ufs2_daddr_t (*checkblkavail)(long, long)); +ufs2_daddr_t std_checkblkavail(ufs2_daddr_t blkno, long frags); +ufs2_daddr_t suj_checkblkavail(ufs2_daddr_t, long); int suj_check(const char *filesys); void update_maps(struct cg *, struct cg*, int); -void fsckinit(void); #endif /* !_FSCK_H_ */ diff --git a/sbin/fsck_ffs/fsutil.c b/sbin/fsck_ffs/fsutil.c --- a/sbin/fsck_ffs/fsutil.c +++ b/sbin/fsck_ffs/fsutil.c @@ -71,7 +71,6 @@ static void slowio_start(void); static void slowio_end(void); static void printIOstats(void); -static void prtbuf(const char *, struct bufarea *); static long diskreads, totaldiskreads, totalreads; /* Disk cache statistics */ static struct timespec startpass, finishpass; @@ -79,8 +78,10 @@ int slowio_delay_usec = 10000; /* Initial IO delay for background fsck */ int slowio_pollcnt; static struct bufarea cgblk; /* backup buffer for cylinder group blocks */ +static struct bufarea failedbuf; /* returned by failed getdatablk() */ static TAILQ_HEAD(bufqueue, bufarea) bufqueuehd; /* head of buffer cache LRU */ static LIST_HEAD(bufhash, bufarea) bufhashhd[HASHSIZE]; /* buffer hash list */ +static struct bufhash freebufs; /* unused buffers */ static int numbufs; /* size of buffer cache */ static int cachelookups; /* number of cache lookups */ static int cachereads; /* number of cache reads */ @@ -187,11 +188,15 @@ { int i; + initbarea(&failedbuf, BT_UNKNOWN); + failedbuf.b_errs = -1; + failedbuf.b_un.b_buf = NULL; if ((cgblk.b_un.b_buf = Malloc((unsigned int)sblock.fs_bsize)) == NULL) errx(EEXIT, "Initial malloc(%d) failed", sblock.fs_bsize); initbarea(&cgblk, BT_CYLGRP); numbufs = cachelookups = cachereads = 0; TAILQ_INIT(&bufqueuehd); + LIST_INIT(&freebufs); for (i = 0; i < HASHSIZE; i++) LIST_INIT(&bufhashhd[i]); for (i = 0; i < BT_NUMBUFTYPES; i++) { @@ -300,7 +305,7 @@ } /* - * Manage a cache of directory blocks. + * Manage a cache of filesystem disk blocks. */ struct bufarea * getdatablk(ufs2_daddr_t blkno, long size, int type) @@ -309,19 +314,23 @@ struct bufhash *bhdp; cachelookups++; - /* If out of range, return empty buffer with b_err == -1 */ - if (type != BT_INODES && chkrange(blkno, size / sblock.fs_fsize)) { - blkno = -1; - type = BT_EMPTY; - } + /* + * If out of range, return empty buffer with b_err == -1 + * + * Skip check for inodes because chkrange() considers + * metadata areas invalid to write data. + */ + if (type != BT_INODES && chkrange(blkno, size / sblock.fs_fsize)) + return (&failedbuf); bhdp = &bufhashhd[HASH(blkno)]; LIST_FOREACH(bp, bhdp, b_hash) if (bp->b_bno == fsbtodb(&sblock, blkno)) { if (debug && bp->b_size != size) { - prtbuf("getdatablk: size mismatch", bp); + prtbuf(bp, "getdatablk: size mismatch"); pfatal("getdatablk: b_size %d != size %ld\n", bp->b_size, size); } + TAILQ_REMOVE(&bufqueuehd, bp, b_list); goto foundit; } /* @@ -340,7 +349,9 @@ if (size > sblock.fs_bsize) errx(EEXIT, "Excessive buffer size %ld > %d\n", size, sblock.fs_bsize); - if (numbufs < MINBUFS) { + if ((bp = LIST_FIRST(&freebufs)) != NULL) { + LIST_REMOVE(bp, b_hash); + } else if (numbufs < MINBUFS) { bp = allocbuf("cannot create minimal buffer pool"); } else if (sujrecovery) { /* @@ -368,6 +379,7 @@ else LIST_REMOVE(bp, b_hash); } + TAILQ_REMOVE(&bufqueuehd, bp, b_list); flush(fswritefd, bp); bp->b_type = type; LIST_INSERT_HEAD(bhdp, bp, b_hash); @@ -375,13 +387,12 @@ cachereads++; /* fall through */ foundit: + TAILQ_INSERT_HEAD(&bufqueuehd, bp, b_list); if (debug && bp->b_type != type) { printf("getdatablk: buffer type changed to %s", BT_BUFTYPE(type)); - prtbuf("", bp); + prtbuf(bp, ""); } - TAILQ_REMOVE(&bufqueuehd, bp, b_list); - TAILQ_INSERT_HEAD(&bufqueuehd, bp, b_list); if (bp->b_errs == 0) bp->b_refcnt++; return (bp); @@ -401,11 +412,7 @@ readcnt[bp->b_type]++; clock_gettime(CLOCK_REALTIME_PRECISE, &start); } - if (bp->b_type != BT_EMPTY) - bp->b_errs = - blread(fsreadfd, bp->b_un.b_buf, dblk, size); - else - bp->b_errs = -1; + bp->b_errs = blread(fsreadfd, bp->b_un.b_buf, dblk, size); if (debug) { clock_gettime(CLOCK_REALTIME_PRECISE, &finish); timespecsub(&finish, &start, &finish); @@ -422,10 +429,19 @@ { if (bp->b_refcnt <= 0) - prtbuf("brelse: buffer with negative reference count", bp); + prtbuf(bp, "brelse: buffer with negative reference count"); bp->b_refcnt--; } +void +binval(struct bufarea *bp) +{ + + bp->b_flags &= ~B_DIRTY; + LIST_REMOVE(bp, b_hash); + LIST_INSERT_HEAD(&freebufs, bp, b_hash); +} + void flush(int fd, struct bufarea *bp) { @@ -451,10 +467,18 @@ if (bp != &sblk) pfatal("BUFFER %p DOES NOT MATCH SBLK %p\n", bp, &sblk); + /* + * Superblocks are always pre-copied so we do not need + * to check them for copy-on-write. + */ if (sbput(fd, bp->b_un.b_fs, 0) == 0) fsmodified = 1; break; case BT_CYLGRP: + /* + * Cylinder groups are always pre-copied so we do not + * need to check them for copy-on-write. + */ if (sujrecovery) cg_write(bp); if (cgput(fswritefd, &sblock, bp->b_un.b_cg) == 0) @@ -483,11 +507,38 @@ } /* FALLTHROUGH */ default: + copyonwrite(&sblock, bp, std_checkblkavail); blwrite(fd, bp->b_un.b_buf, bp->b_bno, bp->b_size); break; } } +/* + * If there are any snapshots, ensure that all the blocks that they + * care about have been copied, then release the snapshot inodes. + * These operations need to be done before we rebuild the cylinder + * groups so that any block allocations are properly recorded. + * Since all the cylinder group maps have already been copied in + * the snapshots, no further snapshot copies will need to be done. + */ +void +snapflush(ufs2_daddr_t (*checkblkavail)(long, long)) +{ + struct bufarea *bp; + int cnt; + + if (snapcnt > 0) { + if (debug) + printf("Check for snapshot copies\n"); + TAILQ_FOREACH_REVERSE(bp, &bufqueuehd, bufqueue, b_list) + if ((bp->b_flags & B_DIRTY) != 0) + copyonwrite(&sblock, bp, checkblkavail); + for (cnt = 0; cnt < snapcnt; cnt++) + irelse(&snaplist[cnt]); + snapcnt = 0; + } +} + /* * Journaled soft updates does not maintain cylinder group summary * information during cleanup, so this routine recalculates the summary @@ -499,6 +550,7 @@ { ufs1_daddr_t fragno, cgbno, maxbno; u_int8_t *blksfree; + struct csum *csp; struct cg *cgp; int blk; int i; @@ -536,6 +588,11 @@ * Update the superblock cg summary from our now correct values * before writing the block. */ + csp = &sblock.fs_cs(&sblock, cgp->cg_cgx); + sblock.fs_cstotal.cs_ndir += cgp->cg_cs.cs_ndir - csp->cs_ndir; + sblock.fs_cstotal.cs_nbfree += cgp->cg_cs.cs_nbfree - csp->cs_nbfree; + sblock.fs_cstotal.cs_nifree += cgp->cg_cs.cs_nifree - csp->cs_nifree; + sblock.fs_cstotal.cs_nffree += cgp->cg_cs.cs_nffree - csp->cs_nffree; sblock.fs_cs(&sblock, cgp->cg_cgx) = cgp->cg_cs; } @@ -587,6 +644,7 @@ (void)close(fsreadfd); return; } + /* * To remain idempotent with partial truncations the buffers * must be flushed in this order: @@ -629,14 +687,9 @@ case BT_SUPERBLK: case BT_CYLGRP: default: - prtbuf("ckfini: improper buffer type on cache list",bp); + prtbuf(bp,"ckfini: improper buffer type on cache list"); continue; /* These are the ones to flush in this step */ - case BT_EMPTY: - if (bp->b_bno >= 0) - pfatal("Unused BT_EMPTY buffer for block %jd\n", - (intmax_t)bp->b_bno); - /* FALLTHROUGH */ case BT_LEVEL1: case BT_LEVEL2: case BT_LEVEL3: @@ -648,11 +701,10 @@ case BT_INODES: continue; } - if (debug && bp->b_refcnt != 0) { - prtbuf("ckfini: clearing in-use buffer", bp); - pfatal("ckfini: clearing in-use buffer\n"); - } + if (debug && bp->b_refcnt != 0) + prtbuf(bp, "ckfini: clearing in-use buffer"); TAILQ_REMOVE(&bufqueuehd, bp, b_list); + LIST_REMOVE(bp, b_hash); cnt++; flush(fswritefd, bp); free(bp->b_un.b_buf); @@ -666,11 +718,10 @@ icachebp = NULL; } TAILQ_FOREACH_REVERSE_SAFE(bp, &bufqueuehd, bufqueue, b_list, nbp) { - if (debug && bp->b_refcnt != 0) { - prtbuf("ckfini: clearing in-use buffer", bp); - pfatal("ckfini: clearing in-use buffer\n"); - } + if (debug && bp->b_refcnt != 0) + prtbuf(bp, "ckfini: clearing in-use buffer"); TAILQ_REMOVE(&bufqueuehd, bp, b_list); + LIST_REMOVE(bp, b_hash); cnt++; flush(fswritefd, bp); free(bp->b_un.b_buf); @@ -1050,45 +1101,77 @@ * allocate a data block with the specified number of fragments */ ufs2_daddr_t -allocblk(long frags) +allocblk(long startcg, long frags, + ufs2_daddr_t (*checkblkavail)(ufs2_daddr_t blkno, long frags)) { - int i, j, k, cg, baseblk; - struct bufarea *cgbp; - struct cg *cgp; + ufs2_daddr_t blkno, newblk; + if (sujrecovery && checkblkavail == std_checkblkavail) { + pfatal("allocblk: std_checkblkavail used for SUJ recovery\n"); + return (0); + } if (frags <= 0 || frags > sblock.fs_frag) return (0); - for (i = 0; i < maxfsblock - sblock.fs_frag; i += sblock.fs_frag) { - for (j = 0; j <= sblock.fs_frag - frags; j++) { - if (testbmap(i + j)) - continue; - for (k = 1; k < frags; k++) - if (testbmap(i + j + k)) - break; - if (k < frags) { - j += k; - continue; - } - cg = dtog(&sblock, i + j); - cgbp = cglookup(cg); - cgp = cgbp->b_un.b_cg; - if (!check_cgmagic(cg, cgbp, 0)) { - i = (cg + 1) * sblock.fs_fpg - sblock.fs_frag; - continue; - } - baseblk = dtogd(&sblock, i + j); - for (k = 0; k < frags; k++) { - setbmap(i + j + k); - clrbit(cg_blksfree(cgp), baseblk + k); - } - n_blks += frags; - if (frags == sblock.fs_frag) - cgp->cg_cs.cs_nbfree--; - else - cgp->cg_cs.cs_nffree -= frags; - cgdirty(cgbp); - return (i + j); + for (blkno = cgdata(&sblock, startcg); + blkno < maxfsblock - sblock.fs_frag; + blkno += sblock.fs_frag) { + if ((newblk = (*checkblkavail)(blkno, frags)) == 0) + continue; + if (newblk > 0) + return (newblk); + if (newblk < 0) + blkno = -newblk; + } + for (blkno = cgdata(&sblock, 0); + blkno < cgbase(&sblock, startcg) - sblock.fs_frag; + blkno += sblock.fs_frag) { + if ((newblk = (*checkblkavail)(blkno, frags)) == 0) + continue; + if (newblk > 0) + return (newblk); + if (newblk < 0) + blkno = -newblk; + } + return (0); +} + +ufs2_daddr_t +std_checkblkavail(blkno, frags) + ufs2_daddr_t blkno; + long frags; +{ + struct bufarea *cgbp; + struct cg *cgp; + ufs2_daddr_t j, k, baseblk; + long cg; + + for (j = 0; j <= sblock.fs_frag - frags; j++) { + if (testbmap(blkno + j)) + continue; + for (k = 1; k < frags; k++) + if (testbmap(blkno + j + k)) + break; + if (k < frags) { + j += k; + continue; } + cg = dtog(&sblock, blkno + j); + cgbp = cglookup(cg); + cgp = cgbp->b_un.b_cg; + if (!check_cgmagic(cg, cgbp, 0)) + return (-((cg + 1) * sblock.fs_fpg - sblock.fs_frag)); + baseblk = dtogd(&sblock, blkno + j); + for (k = 0; k < frags; k++) { + setbmap(blkno + j + k); + clrbit(cg_blksfree(cgp), baseblk + k); + } + n_blks += frags; + if (frags == sblock.fs_frag) + cgp->cg_cs.cs_nbfree--; + else + cgp->cg_cs.cs_nffree -= frags; + cgdirty(cgbp); + return (blkno + j); } return (0); } @@ -1261,14 +1344,19 @@ /* * Print details about a buffer. */ -static void -prtbuf(const char *msg, struct bufarea *bp) +void +prtbuf(struct bufarea *bp, const char *fmt, ...) { - - printf("%s: bp %p, type %s, bno %jd, size %d, refcnt %d, flags %s, " - "index %jd\n", msg, bp, BT_BUFTYPE(bp->b_type), - (intmax_t) bp->b_bno, bp->b_size, bp->b_refcnt, - bp->b_flags & B_DIRTY ? "dirty" : "clean", (intmax_t) bp->b_index); + va_list ap; + va_start(ap, fmt); + if (preen) + (void)fprintf(stdout, "%s: ", cdevname); + (void)vfprintf(stdout, fmt, ap); + va_end(ap); + printf(": bp %p, type %s, bno %jd, size %d, refcnt %d, flags %s, " + "index %jd\n", bp, BT_BUFTYPE(bp->b_type), (intmax_t) bp->b_bno, + bp->b_size, bp->b_refcnt, bp->b_flags & B_DIRTY ? "dirty" : "clean", + (intmax_t) bp->b_index); } /* diff --git a/sbin/fsck_ffs/inode.c b/sbin/fsck_ffs/inode.c --- a/sbin/fsck_ffs/inode.c +++ b/sbin/fsck_ffs/inode.c @@ -38,6 +38,7 @@ __FBSDID("$FreeBSD$"); #include +#include #include #include @@ -58,6 +59,9 @@ static int iblock(struct inodesc *, off_t isize, int type); static ufs2_daddr_t indir_blkatoff(ufs2_daddr_t, ino_t, ufs_lbn_t, ufs_lbn_t, struct bufarea **); +static int snapclean(struct inodesc *idesc); +static void chkcopyonwrite(struct fs *, ufs2_daddr_t, + ufs2_daddr_t (*checkblkavail)(long, long)); int ckinode(union dinode *dp, struct inodesc *idesc) @@ -378,8 +382,12 @@ int c; if (cnt <= 0 || blk <= 0 || blk > maxfsblock || - cnt - 1 > maxfsblock - blk) + cnt - 1 > maxfsblock - blk) { + if (debug) + printf("out of range: blk %ld, offset %i, size %d\n", + (long)blk, (int)fragnum(&sblock, blk), cnt); return (1); + } if (cnt > sblock.fs_frag || fragnum(&sblock, blk) + cnt > sblock.fs_frag) { if (debug) @@ -650,11 +658,21 @@ freeblock(struct inodesc *idesc) { struct dups *dlp; + struct bufarea *cgbp; + struct cg *cgp; ufs2_daddr_t blkno; - long nfrags, res; + long size, nfrags, res; res = KEEPON; blkno = idesc->id_blkno; + if (idesc->id_type == SNAP) { + pfatal("clearing a snapshot dinode\n"); + return (STOP); + } + size = lfragtosize(&sblock, idesc->id_numfrags); + if (snapblkfree(&sblock, blkno, size, idesc->id_number, + std_checkblkavail)) + return (res); for (nfrags = idesc->id_numfrags; nfrags > 0; blkno++, nfrags--) { if (chkrange(blkno, 1)) { res = SKIP; @@ -674,12 +692,407 @@ } } } + /* + * If all successfully returned, account for them. + */ + if (nfrags == 0) { + cgbp = cglookup(dtog(&sblock, idesc->id_blkno)); + cgp = cgbp->b_un.b_cg; + if (idesc->id_numfrags == sblock.fs_frag) + cgp->cg_cs.cs_nbfree++; + else + cgp->cg_cs.cs_nffree += idesc->id_numfrags; + cgdirty(cgbp); + } return (res); } +/* + * Prepare a snapshot file for being removed. + */ +void +snapremove(ino_t inum) +{ + struct inodesc idesc; + struct inode ip; + int i; + + for (i = 0; i < snapcnt; i++) + if (snaplist[i].i_number == inum) + break; + if (i == snapcnt) + ginode(inum, &ip); + else + ip = snaplist[i]; + if ((DIP(ip.i_dp, di_flags) & SF_SNAPSHOT) == 0) { + printf("snapremove: inode %jd is not a snapshot\n", + (intmax_t)inum); + if (i == snapcnt) + irelse(&ip); + return; + } + if (debug) + printf("snapremove: remove %sactive snapshot %jd\n", + i == snapcnt ? "in" : "", (intmax_t)inum); + /* + * If on active snapshot list, remove it. + */ + if (i < snapcnt) { + for (i++; i < FSMAXSNAP; i++) { + if (sblock.fs_snapinum[i] == 0) + break; + snaplist[i - 1] = snaplist[i]; + sblock.fs_snapinum[i - 1] = sblock.fs_snapinum[i]; + } + sblock.fs_snapinum[i - 1] = 0; + bzero(&snaplist[i - 1], sizeof(struct inode)); + snapcnt--; + } + idesc.id_type = SNAP; + idesc.id_func = snapclean; + idesc.id_number = inum; + (void)ckinode(ip.i_dp, &idesc); + DIP_SET(ip.i_dp, di_flags, DIP(ip.i_dp, di_flags) & ~SF_SNAPSHOT); + inodirty(&ip); + irelse(&ip); +} + +static int +snapclean(struct inodesc *idesc) +{ + ufs2_daddr_t blkno; + struct bufarea *bp; + union dinode *dp; + + blkno = idesc->id_blkno; + if (blkno == 0) + return (KEEPON); + + bp = idesc->id_bp; + dp = idesc->id_dp; + if (blkno == BLK_NOCOPY || blkno == BLK_SNAP) { + if (idesc->id_lbn < UFS_NDADDR) + DIP_SET(dp, di_db[idesc->id_lbn], 0); + else + IBLK_SET(bp, bp->b_index, 0); + dirty(bp); + } + return (KEEPON); +} + +/* + * Notification that a block is being freed. Return zero if the free + * should be allowed to proceed. Return non-zero if the snapshot file + * wants to claim the block. The block will be claimed if it is an + * uncopied part of one of the snapshots. It will be freed if it is + * either a BLK_NOCOPY or has already been copied in all of the snapshots. + * If a fragment is being freed, then all snapshots that care about + * it must make a copy since a snapshot file can only claim full sized + * blocks. Note that if more than one snapshot file maps the block, + * we can pick one at random to claim it. Since none of the snapshots + * can change, we are assurred that they will all see the same unmodified + * image. When deleting a snapshot file (see ino_trunc above), we + * must push any of these claimed blocks to one of the other snapshots + * that maps it. These claimed blocks are easily identified as they will + * have a block number equal to their logical block number within the + * snapshot. A copied block can never have this property because they + * must always have been allocated from a BLK_NOCOPY location. + */ +int +snapblkfree(fs, bno, size, inum, checkblkavail) + struct fs *fs; + ufs2_daddr_t bno; + long size; + ino_t inum; + ufs2_daddr_t (*checkblkavail)(long cg, long frags); +{ + union dinode *dp; + struct inode ip; + struct bufarea *snapbp; + ufs_lbn_t lbn; + ufs2_daddr_t blkno, relblkno; + int i, frags, claimedblk, copydone; + + /* If no snapshots, nothing to do */ + if (snapcnt == 0) + return (0); + if (debug) + printf("snapblkfree: in ino %ld free blkno %ld, size %ld\n", + inum, bno, size); + relblkno = blknum(fs, bno); + lbn = fragstoblks(fs, relblkno); + /* Direct blocks are always pre-copied */ + if (lbn < UFS_NDADDR) + return (0); + copydone = 0; + claimedblk = 0; + for (i = 0; i < snapcnt; i++) { + /* + * Lookup block being freed. + */ + ip = snaplist[i]; + dp = ip.i_dp; + blkno = ino_blkatoff(dp, inum != 0 ? inum : ip.i_number, + lbn, &frags, &snapbp); + /* + * Check to see if block needs to be copied. + */ + if (blkno == 0) { + /* + * A block that we map is being freed. If it has not + * been claimed yet, we will claim or copy it (below). + */ + claimedblk = 1; + } else if (blkno == BLK_SNAP) { + /* + * No previous snapshot claimed the block, + * so it will be freed and become a BLK_NOCOPY + * (don't care) for us. + */ + if (claimedblk) + pfatal("snapblkfree: inconsistent block type"); + IBLK_SET(snapbp, snapbp->b_index, BLK_NOCOPY); + dirty(snapbp); + brelse(snapbp); + continue; + } else /* BLK_NOCOPY or default */ { + /* + * If the snapshot has already copied the block + * (default), or does not care about the block, + * it is not needed. + */ + brelse(snapbp); + continue; + } + /* + * If this is a full size block, we will just grab it + * and assign it to the snapshot inode. Otherwise we + * will proceed to copy it. See explanation for this + * routine as to why only a single snapshot needs to + * claim this block. + */ + if (size == fs->fs_bsize) { + if (debug) + printf("Grabonremove snapshot %ju lbn %jd " + "from inum %ju\n", (intmax_t)ip.i_number, + (intmax_t)lbn, (uintmax_t)inum); + IBLK_SET(snapbp, snapbp->b_index, relblkno); + dirty(snapbp); + brelse(snapbp); + DIP_SET(dp, di_blocks, + DIP(dp, di_blocks) + btodb(size)); + inodirty(&ip); + return (1); + } + + /* First time through, read the contents of the old block. */ + if (copydone == 0) { + copydone = 1; + if (blread(fsreadfd, copybuf, fsbtodb(fs, relblkno), + fs->fs_bsize) != 0) { + pfatal("Could not read snapshot %ju block " + "%jd\n", (intmax_t)ip.i_number, + (intmax_t)relblkno); + continue; + } + } + /* + * This allocation will never require any additional + * allocations for the snapshot inode. + */ + blkno = allocblk(dtog(fs, relblkno), fs->fs_frag, + checkblkavail); + if (blkno == 0) { + pfatal("Could not allocate block for snapshot %ju\n", + (intmax_t)ip.i_number); + continue; + } + if (debug) + printf("Copyonremove: snapino %jd lbn %jd for inum %ju " + "size %ld new blkno %jd\n", (intmax_t)ip.i_number, + (intmax_t)lbn, (uintmax_t)inum, size, + (intmax_t)blkno); + blwrite(fswritefd, copybuf, fsbtodb(fs, blkno), fs->fs_bsize); + IBLK_SET(snapbp, snapbp->b_index, blkno); + dirty(snapbp); + brelse(snapbp); + DIP_SET(dp, di_blocks, + DIP(dp, di_blocks) + btodb(fs->fs_bsize)); + inodirty(&ip); + } + return (0); +} + +/* + * Notification that a block is being written. Return if the block + * is part of a snapshot as snapshots never track other snapshots. + * The block will be copied in all of the snapshots that are tracking + * it and have not yet copied it. Some buffers may hold more than one + * block. Here we need to check each block in the buffer. + */ +void +copyonwrite(fs, bp, checkblkavail) + struct fs *fs; + struct bufarea *bp; + ufs2_daddr_t (*checkblkavail)(long cg, long frags); +{ + ufs2_daddr_t copyblkno; + long i, numblks; + + /* If no snapshots, nothing to do. */ + if (snapcnt == 0) + return; + numblks = blkroundup(fs, bp->b_size) / fs->fs_bsize; + if (debug) + prtbuf(bp, "copyonwrite: checking %jd block%s in buffer", + numblks, numblks > 1 ? "s" : ""); + copyblkno = blknum(fs, dbtofsb(fs, bp->b_bno)); + for (i = 0; i < numblks; i++) { + chkcopyonwrite(fs, copyblkno, checkblkavail); + copyblkno += fs->fs_frag; + } +} + +static void +chkcopyonwrite(fs, copyblkno, checkblkavail) + struct fs *fs; + ufs2_daddr_t copyblkno; + ufs2_daddr_t (*checkblkavail)(long cg, long frags); +{ + struct inode ip; + union dinode *dp; + struct bufarea *snapbp; + ufs2_daddr_t blkno; + int i, frags, copydone; + ufs_lbn_t lbn; + + lbn = fragstoblks(fs, copyblkno); + /* Direct blocks are always pre-copied */ + if (lbn < UFS_NDADDR) + return; + copydone = 0; + for (i = 0; i < snapcnt; i++) { + /* + * Lookup block being freed. + */ + ip = snaplist[i]; + dp = ip.i_dp; + blkno = ino_blkatoff(dp, ip.i_number, lbn, &frags, &snapbp); + /* + * Check to see if block needs to be copied. + */ + if (blkno != 0) { + /* + * A block that we have already copied or don't track. + */ + brelse(snapbp); + continue; + } + /* First time through, read the contents of the old block. */ + if (copydone == 0) { + copydone = 1; + if (blread(fsreadfd, copybuf, fsbtodb(fs, copyblkno), + fs->fs_bsize) != 0) { + pfatal("Could not read snapshot %ju block " + "%jd\n", (intmax_t)ip.i_number, + (intmax_t)copyblkno); + continue; + } + } + /* + * This allocation will never require any additional + * allocations for the snapshot inode. + */ + if ((blkno = allocblk(dtog(fs, copyblkno), fs->fs_frag, + checkblkavail)) == 0) { + pfatal("Could not allocate block for snapshot %ju\n", + (intmax_t)ip.i_number); + continue; + } + if (debug) + prtbuf(snapbp, "Copyonwrite: snapino %jd lbn %jd using " + "blkno %ju setting in buffer", + (intmax_t)ip.i_number, (intmax_t)lbn, + (intmax_t)blkno); + blwrite(fswritefd, copybuf, fsbtodb(fs, blkno), fs->fs_bsize); + IBLK_SET(snapbp, snapbp->b_index, blkno); + dirty(snapbp); + brelse(snapbp); + DIP_SET(dp, di_blocks, + DIP(dp, di_blocks) + btodb(fs->fs_bsize)); + inodirty(&ip); + } + return; +} + +/* + * Traverse an inode and check that its block count is correct + * fixing it if necessary. + */ +void +check_blkcnt(struct inode *ip) +{ + struct inodesc idesc; + union dinode *dp; + ufs2_daddr_t ndb; + int j, ret, offset; + + dp = ip->i_dp; + memset(&idesc, 0, sizeof(struct inodesc)); + idesc.id_func = pass1check; + idesc.id_number = ip->i_number; + idesc.id_type = (DIP(dp, di_flags) & SF_SNAPSHOT) == 0 ? ADDR : SNAP; + (void)ckinode(dp, &idesc); + if (sblock.fs_magic == FS_UFS2_MAGIC && dp->dp2.di_extsize > 0) { + ndb = howmany(dp->dp2.di_extsize, sblock.fs_bsize); + for (j = 0; j < UFS_NXADDR; j++) { + if (--ndb == 0 && + (offset = blkoff(&sblock, dp->dp2.di_extsize)) != 0) + idesc.id_numfrags = numfrags(&sblock, + fragroundup(&sblock, offset)); + else + idesc.id_numfrags = sblock.fs_frag; + if (dp->dp2.di_extb[j] == 0) + continue; + idesc.id_blkno = dp->dp2.di_extb[j]; + ret = (*idesc.id_func)(&idesc); + if (ret & STOP) + break; + } + } + idesc.id_entryno *= btodb(sblock.fs_fsize); + if (DIP(dp, di_blocks) != idesc.id_entryno) { + if (!(sujrecovery && preen)) { + pwarn("INCORRECT BLOCK COUNT I=%lu (%ju should be %ju)", + (u_long)idesc.id_number, + (uintmax_t)DIP(dp, di_blocks), + (uintmax_t)idesc.id_entryno); + if (preen) + printf(" (CORRECTED)\n"); + else if (reply("CORRECT") == 0) + return; + } + if (bkgrdflag == 0) { + DIP_SET(dp, di_blocks, idesc.id_entryno); + inodirty(ip); + } else { + cmd.value = idesc.id_number; + cmd.size = idesc.id_entryno - DIP(dp, di_blocks); + if (debug) + printf("adjblkcnt ino %ju amount %lld\n", + (uintmax_t)cmd.value, (long long)cmd.size); + if (sysctl(adjblkcnt, MIBSIZE, 0, 0, + &cmd, sizeof cmd) == -1) + rwerror("ADJUST INODE BLOCK COUNT", cmd.value); + } + } +} + void freeinodebuf(void) { + struct bufarea *bp; + int i; /* * Flush old contents in case they have been updated. @@ -689,6 +1102,14 @@ free((char *)inobuf.b_un.b_buf); inobuf.b_un.b_buf = NULL; firstinum = lastinum = 0; + /* + * Reload the snapshot inodes in case any of them changed. + */ + for (i = 0; i < snapcnt; i++) { + bp = snaplist[i].i_bp; + bp->b_errs = blread(fsreadfd, bp->b_un.b_buf, bp->b_bno, + bp->b_size); + } } /* @@ -720,6 +1141,7 @@ inpp = &inphead[inumber % dirhash]; inp->i_nexthash = *inpp; *inpp = inp; + inp->i_flags = 0; inp->i_parent = inumber == UFS_ROOTINO ? UFS_ROOTINO : (ino_t)0; inp->i_dotdot = (ino_t)0; inp->i_number = inumber; @@ -803,6 +1225,10 @@ printf(" (CLEARED)\n"); n_files--; if (bkgrdflag == 0) { + if (idesc->id_type == SNAP) { + snapremove(idesc->id_number); + idesc->id_type = ADDR; + } (void)ckinode(dp, idesc); inoinfo(idesc->id_number)->ino_state = USTATE; clearinode(dp); @@ -967,7 +1393,8 @@ cgdirty(cgbp); ginode(ino, &ip); dp = ip.i_dp; - DIP_SET(dp, di_db[0], allocblk((long)1)); + DIP_SET(dp, di_db[0], allocblk(ino_to_cg(&sblock, ino), (long)1, + std_checkblkavail)); if (DIP(dp, di_db[0]) == 0) { inoinfo(ino)->ino_state = USTATE; irelse(&ip); diff --git a/sbin/fsck_ffs/main.c b/sbin/fsck_ffs/main.c --- a/sbin/fsck_ffs/main.c +++ b/sbin/fsck_ffs/main.c @@ -491,6 +491,7 @@ */ if (preen == 0) printf("** Phase 5 - Check Cyl groups\n"); + snapflush(std_checkblkavail); pass5(); IOstats("Pass5"); diff --git a/sbin/fsck_ffs/setup.c b/sbin/fsck_ffs/setup.c --- a/sbin/fsck_ffs/setup.c +++ b/sbin/fsck_ffs/setup.c @@ -59,6 +59,9 @@ #include "fsck.h" struct inoinfo **inphead, **inpsort; /* info about all inodes */ +struct inode snaplist[FSMAXSNAP + 1]; /* list of active snapshots */ +int snapcnt; /* number of active snapshots */ +char *copybuf; /* buffer to copy snapshot blocks */ static int sbhashfailed; #define POWEROF2(num) (((num) & ((num) - 1)) == 0) @@ -66,6 +69,8 @@ static int calcsb(char *dev, int devfd, struct fs *fs); static void saverecovery(int readfd, int writefd); static int chkrecovery(int devfd); +static int getlbnblkno(struct inodesc *); +static int checksnapinfo(struct inode *); /* * Read in a superblock finding an alternate if necessary. @@ -75,7 +80,8 @@ int setup(char *dev) { - long bmapsize; + long i, bmapsize; + struct inode ip; /* * We are expected to have an open file descriptor and a superblock. @@ -174,6 +180,42 @@ usedsoftdep = 1; else usedsoftdep = 0; + /* + * Collect any snapshot inodes so that we can allow them to + * claim any blocks that we free. The code for doing this is + * imported here and into inode.c from sys/ufs/ffs/ffs_snapshot.c. + */ + for (snapcnt = 0; snapcnt < FSMAXSNAP; snapcnt++) { + if (sblock.fs_snapinum[snapcnt] == 0) + break; + ginode(sblock.fs_snapinum[snapcnt], &ip); + if ((DIP(ip.i_dp, di_mode) & IFMT) == IFREG && + (DIP(ip.i_dp, di_flags) & SF_SNAPSHOT) != 0 && + checksnapinfo(&ip)) { + if (debug) + printf("Load snapshot %jd\n", + (intmax_t)sblock.fs_snapinum[snapcnt]); + snaplist[snapcnt] = ip; + continue; + } + printf("Removing non-snapshot inode %ju from snapshot list\n", + (uintmax_t)sblock.fs_snapinum[snapcnt]); + irelse(&ip); + for (i = snapcnt + 1; i < FSMAXSNAP; i++) { + if (sblock.fs_snapinum[i] == 0) + break; + sblock.fs_snapinum[i - 1] = sblock.fs_snapinum[i]; + } + sblock.fs_snapinum[i - 1] = 0; + snapcnt--; + sbdirty(); + } + if (snapcnt > 0 && copybuf == NULL) { + copybuf = Malloc(sblock.fs_bsize); + if (copybuf == NULL) + errx(EEXIT, "cannot allocate space for snapshot " + "copy buffer"); + } return (1); badsb: @@ -181,6 +223,144 @@ return (0); } +/* + * Check for valid snapshot information. + * + * Each snapshot has a list of blocks that have been copied. This list + * is consulted before checking the snapshot inode. Its purpose is to + * speed checking of commonly checked blocks and to avoid recursive + * checks of the snapshot inode. In particular, the list must contain + * the superblock, the superblock summary information, and all the + * cylinder group blocks. The list may contain other commonly checked + * pointers such as those of the blocks that contain the snapshot inodes. + * The list is sorted into block order to allow binary search lookup. + * + * The twelve direct direct block pointers of the snapshot are always + * copied, so we test for them first before checking the list itself + * (i.e., they are not in the list). + * + * The checksnapinfo() routine needs to ensure that the list contains at + * least the super block, its summary information, and the cylinder groups. + * Here we check the list first for the superblock, zero or more cylinder + * groups up to the location of the superblock summary information, the + * summary group information, and any remaining cylinder group maps that + * follow it. We skip over any other entries in the list. + */ +#define CHKBLKINLIST(chkblk) \ + /* All UFS_NDADDR blocks are copied */ \ + if ((chkblk) >= UFS_NDADDR) { \ + /* Skip over blocks that are not of interest */ \ + while (*blkp < (chkblk) && blkp < lastblkp) \ + blkp++; \ + /* Fail if end of list and not all blocks found */ \ + if (blkp >= lastblkp) { \ + pwarn("UFS%d snapshot inode %jd failed: " \ + "improper block list length (%jd)\n", \ + sblock.fs_magic == FS_UFS1_MAGIC ? 1 : 2, \ + (intmax_t)snapip->i_number, \ + (intmax_t)(lastblkp - &snapblklist[0])); \ + status = 0; \ + } \ + /* Fail if block we seek is missing */ \ + else if (*blkp++ != (chkblk)) { \ + pwarn("UFS%d snapshot inode %jd failed: " \ + "block list (%jd) != %s (%jd)\n", \ + sblock.fs_magic == FS_UFS1_MAGIC ? 1 : 2, \ + (intmax_t)snapip->i_number, \ + (intmax_t)blkp[-1], #chkblk, \ + (intmax_t)chkblk); \ + status = 0; \ + } \ + } + +static int +checksnapinfo(struct inode *snapip) +{ + struct fs *fs; + struct bufarea *bp; + struct inodesc idesc; + daddr_t *snapblklist, *blkp, *lastblkp, csblkno; + int cg, loc, len, status; + ufs_lbn_t lbn; + size_t size; + + fs = &sblock; + memset(&idesc, 0, sizeof(struct inodesc)); + idesc.id_type = ADDR; + idesc.id_func = getlbnblkno; + idesc.id_number = snapip->i_number; + lbn = howmany(fs->fs_size, fs->fs_frag); + idesc.id_parent = lbn; /* sought after blkno */ + if ((ckinode(snapip->i_dp, &idesc) & FOUND) == 0) + return (0); + size = fragroundup(fs, + DIP(snapip->i_dp, di_size) - lblktosize(fs, lbn)); + bp = getdatablk(idesc.id_parent, size, BT_DATA); + snapblklist = (daddr_t *)bp->b_un.b_buf; + /* + * snapblklist[0] is the size of the list + * snapblklist[1] is the first element of the list + * + * We need to be careful to bound the size of the list and verify + * that we have not run off the end of it if it or its size has + * been corrupted. + */ + blkp = &snapblklist[1]; + lastblkp = &snapblklist[MAX(0, + MIN(snapblklist[0] + 1, size / sizeof(daddr_t)))]; + status = 1; + /* Check that the superblock is listed. */ + CHKBLKINLIST(lblkno(fs, fs->fs_sblockloc)); + if (status == 0) + goto out; + /* + * Calculate where the summary information is located. + * Usually it is in the first cylinder group, but growfs + * may move it to the first cylinder group that it adds. + * + * Check all cylinder groups up to the summary information. + */ + csblkno = fragstoblks(fs, fs->fs_csaddr); + for (cg = 0; cg < fs->fs_ncg; cg++) { + if (fragstoblks(fs, cgtod(fs, cg)) > csblkno) + break; + CHKBLKINLIST(fragstoblks(fs, cgtod(fs, cg))); + if (status == 0) + goto out; + } + /* Check the summary information block(s). */ + len = howmany(fs->fs_cssize, fs->fs_bsize); + for (loc = 0; loc < len; loc++) { + CHKBLKINLIST(csblkno + loc); + if (status == 0) + goto out; + } + /* Check the remaining cylinder groups. */ + for (; cg < fs->fs_ncg; cg++) { + CHKBLKINLIST(fragstoblks(fs, cgtod(fs, cg))); + if (status == 0) + goto out; + } +out: + brelse(bp); + return (status); +} + +/* + * Return the block number associated with a specified inode lbn. + * Requested lbn is in id_parent. If found, block is returned in + * id_parent. + */ +static int +getlbnblkno(struct inodesc *idesc) +{ + + if (idesc->id_lbn < idesc->id_parent) + return (KEEPON); + idesc->id_parent = idesc->id_blkno; + return (STOP | FOUND); +} + /* * Open a device or file to be checked by fsck. */ diff --git a/sbin/fsck_ffs/suj.c b/sbin/fsck_ffs/suj.c --- a/sbin/fsck_ffs/suj.c +++ b/sbin/fsck_ffs/suj.c @@ -321,7 +321,7 @@ * To be certain we're not freeing a reallocated block we lookup * this block in the blk hash and see if there is an allocation * journal record that overlaps with any fragments in the block - * we're concerned with. If any fragments have ben reallocated + * we're concerned with. If any fragments have been reallocated * the block has already been freed and re-used for another purpose. */ mask = 0; @@ -378,6 +378,50 @@ return (0); } +/* + * Check to see if the requested block is available. + * We can just check in the cylinder-group maps as + * they will only have usable blocks in them. + */ +ufs2_daddr_t +suj_checkblkavail(blkno, frags) + ufs2_daddr_t blkno; + long frags; +{ + struct bufarea *cgbp; + struct cg *cgp; + ufs2_daddr_t j, k, baseblk; + long cg; + + cg = dtog(&sblock, blkno); + cgbp = cglookup(cg); + cgp = cgbp->b_un.b_cg; + if (!check_cgmagic(cg, cgbp, 0)) + return (-((cg + 1) * sblock.fs_fpg - sblock.fs_frag)); + baseblk = dtogd(&sblock, blkno); + for (j = 0; j <= sblock.fs_frag - frags; j++) { + if (!isset(cg_blksfree(cgp), baseblk + j)) + continue; + for (k = 1; k < frags; k++) + if (!isset(cg_blksfree(cgp), baseblk + j + k)) + break; + if (k < frags) { + j += k; + continue; + } + for (k = 0; k < frags; k++) + clrbit(cg_blksfree(cgp), baseblk + j + k); + n_blks += frags; + if (frags == sblock.fs_frag) + cgp->cg_cs.cs_nbfree--; + else + cgp->cg_cs.cs_nffree -= frags; + cgdirty(cgbp); + return ((cg * sblock.fs_fpg) + baseblk + j); + } + return (0); +} + /* * Clear an inode from the cg bitmap. If the inode was already clear return * 0 so the caller knows it does not have to check the inode contents. @@ -420,7 +464,7 @@ * set in the mask. */ static void -blk_free(ufs2_daddr_t bno, int mask, int frags) +blk_free(ino_t ino, ufs2_daddr_t bno, int mask, int frags) { ufs1_daddr_t fragno, cgbno; struct suj_cg *sc; @@ -431,6 +475,13 @@ if (debug) printf("Freeing %d frags at blk %jd mask 0x%x\n", frags, bno, mask); + /* + * Check to see if the block needs to be claimed by a snapshot. + * If wanted, the snapshot references it. Otherwise we free it. + */ + if (snapblkfree(fs, bno, lfragtosize(fs, frags), ino, + suj_checkblkavail)) + return; cg = dtog(fs, bno); sc = cg_lookup(cg); cgp = sc->sc_cgp; @@ -846,7 +897,7 @@ blk_free_visit(ino_t ino, ufs_lbn_t lbn, ufs2_daddr_t blk, int frags) { - blk_free(blk, blk_freemask(blk, ino, lbn, frags), frags); + blk_free(ino, blk, blk_freemask(blk, ino, lbn, frags), frags); } /* @@ -865,7 +916,7 @@ if (lbn <= -UFS_NDADDR && follow && mask == 0) indir_visit(ino, lbn, blk, &resid, blk_free_visit, VISIT_INDIR); else - blk_free(blk, mask, frags); + blk_free(ino, blk, mask, frags); } static void @@ -997,6 +1048,8 @@ if ((DIP(dp, di_mode) & IFMT) == IFDIR) ino_visit(dp, ino, ino_free_children, 0); DIP_SET(dp, di_nlink, 0); + if ((DIP(dp, di_flags) & SF_SNAPSHOT) != 0) + snapremove(ino); ino_visit(dp, ino, blk_free_visit, VISIT_EXT | VISIT_INDIR); /* Here we have to clear the inode and release any blocks it holds. */ gen = DIP(dp, di_gen); @@ -1209,7 +1262,7 @@ continue; } isdirty = 1; - blk_free(nblk, 0, fs->fs_frag); + blk_free(ino, nblk, 0, fs->fs_frag); IBLK_SET(bp, i, 0); } if (isdirty) @@ -1245,6 +1298,11 @@ dp = ip.i_dp; mode = DIP(dp, di_mode) & IFMT; cursize = DIP(dp, di_size); + /* If no size change, nothing to do */ + if (size == cursize) { + irelse(&ip); + return; + } if (debug) printf("Truncating ino %ju, mode %o to size %jd from size %jd\n", (uintmax_t)ino, mode, size, cursize); @@ -1264,13 +1322,14 @@ if (size > 0) err_suj("Partial truncation of ino %ju snapshot file\n", (uintmax_t)ino); + snapremove(ino); } lastlbn = lblkno(fs, blkroundup(fs, size)); for (i = lastlbn; i < UFS_NDADDR; i++) { if ((bn = DIP(dp, di_db[i])) == 0) continue; blksize = sblksize(fs, cursize, i); - blk_free(bn, 0, numfrags(fs, blksize)); + blk_free(ino, bn, 0, numfrags(fs, blksize)); DIP_SET(dp, di_db[i], 0); } /* @@ -1283,13 +1342,13 @@ /* If we're not freeing any in this indirect range skip it. */ if (lastlbn >= nextlbn) continue; - if (DIP(dp, di_ib[i]) == 0) - continue; - indir_trunc(ino, -lbn - i, DIP(dp, di_ib[i]), lastlbn, dp); - /* If we freed everything in this indirect free the indir. */ - if (lastlbn > lbn) - continue; - blk_free(DIP(dp, di_ib[i]), 0, fs->fs_frag); + if ((bn = DIP(dp, di_ib[i])) == 0) + continue; + indir_trunc(ino, -lbn - i, bn, lastlbn, dp); + /* If we freed everything in this indirect free the indir. */ + if (lastlbn > lbn) + continue; + blk_free(ino, bn, 0, fs->fs_frag); DIP_SET(dp, di_ib[i], 0); } /* @@ -1319,7 +1378,7 @@ if (oldspace != newspace) { bn += numfrags(fs, newspace); frags = numfrags(fs, oldspace - newspace); - blk_free(bn, 0, frags); + blk_free(ino, bn, 0, frags); totalfrags -= frags; } } @@ -1468,7 +1527,7 @@ mask >>= frags; blk += frags; frags = brec->jb_frags - frags; - blk_free(blk, mask, frags); + blk_free(brec->jb_ino, blk, mask, frags); continue; } /* @@ -2406,6 +2465,13 @@ } if (preen == 0 && (jrecs > 0 || jbytes > 0) && reply("WRITE CHANGES") == 0) return (0); + /* + * Check block counts of snapshot inodes and + * make copies of any needed snapshot blocks. + */ + for (i = 0; i < snapcnt; i++) + check_blkcnt(&snaplist[i]); + snapflush(suj_checkblkavail); /* * Recompute the fs summary info from correct cs summaries. */