Index: sys/geom/geom_vfs.c =================================================================== --- sys/geom/geom_vfs.c +++ sys/geom/geom_vfs.c @@ -55,6 +55,7 @@ struct bufobj *sc_bo; int sc_active; int sc_orphaned; + int sc_enxio_active; }; static struct buf_ops __g_vfs_bufops = { @@ -139,9 +140,14 @@ cp = bip->bio_from; sc = cp->geom->softc; - if (bip->bio_error && bip->bio_error != EOPNOTSUPP) + if (bip->bio_error != 0 && bip->bio_error != EOPNOTSUPP) { + if ((bp->b_xflags & BX_CVTENXIO) != 0) + sc->sc_enxio_active = 1; + if (sc->sc_enxio_active) + bip->bio_error = ENXIO; g_print_bio("g_vfs_done():", bip, "error = %d", bip->bio_error); + } bp->b_error = bip->bio_error; bp->b_ioflags = bip->bio_flags; if (bip->bio_error) @@ -172,7 +178,7 @@ * If the provider has orphaned us, just return ENXIO. */ mtx_lock(&sc->sc_mtx); - if (sc->sc_orphaned) { + if (sc->sc_orphaned || sc->sc_enxio_active) { mtx_unlock(&sc->sc_mtx); bp->b_error = ENXIO; bp->b_ioflags |= BIO_ERROR; Index: sys/kern/vfs_bio.c =================================================================== --- sys/kern/vfs_bio.c +++ sys/kern/vfs_bio.c @@ -2176,6 +2176,8 @@ bp->b_flags |= B_CKHASH; bp->b_ckhashcalc = ckhashfunc; } + if ((flags & GB_CVTENXIO) != 0) + bp->b_xflags |= BX_CVTENXIO; bp->b_ioflags &= ~BIO_ERROR; if (bp->b_rcred == NOCRED && cred != NOCRED) bp->b_rcred = crhold(cred); @@ -2773,6 +2775,7 @@ panic("brelse: not dirty"); bp->b_flags &= ~(B_ASYNC | B_NOCACHE | B_RELBUF | B_DIRECT); + bp->b_xflags &= ~(BX_CVTENXIO); /* binsfree unlocks bp. */ binsfree(bp, qindex); } @@ -2804,6 +2807,7 @@ return; } bp->b_flags &= ~(B_ASYNC | B_NOCACHE | B_AGE | B_RELBUF); + bp->b_xflags &= ~(BX_CVTENXIO); if (bp->b_flags & B_MANAGED) { if (bp->b_flags & B_REMFREE) Index: sys/sys/buf.h =================================================================== --- sys/sys/buf.h +++ sys/sys/buf.h @@ -261,12 +261,14 @@ */ #define BX_VNDIRTY 0x00000001 /* On vnode dirty list */ #define BX_VNCLEAN 0x00000002 /* On vnode clean list */ +#define BX_CVTENXIO 0x00000004 /* Convert errors to ENXIO */ #define BX_BKGRDWRITE 0x00000010 /* Do writes in background */ #define BX_BKGRDMARKER 0x00000020 /* Mark buffer for splay tree */ #define BX_ALTDATA 0x00000040 /* Holds extended data */ #define BX_FSPRIV 0x00FF0000 /* Filesystem-specific flags mask */ -#define PRINT_BUF_XFLAGS "\20\7altdata\6bkgrdmarker\5bkgrdwrite\2clean\1dirty" +#define PRINT_BUF_XFLAGS "\20\7altdata\6bkgrdmarker\5bkgrdwrite\3cvtenxio" \ + "\2clean\1dirty" #define NOOFFSET (-1LL) /* No buffer offset calculated yet */ @@ -487,6 +489,7 @@ #define GB_KVAALLOC 0x0010 /* But allocate KVA. */ #define GB_CKHASH 0x0020 /* If reading, calc checksum hash */ #define GB_NOSPARSE 0x0040 /* Do not instantiate holes */ +#define GB_CVTENXIO 0x0080 /* Convert errors to ENXIO */ #ifdef _KERNEL extern int nbuf; /* The number of buffer headers */ Index: sys/ufs/ffs/ffs_alloc.c =================================================================== --- sys/ufs/ffs/ffs_alloc.c +++ sys/ufs/ffs/ffs_alloc.c @@ -222,6 +222,10 @@ softdep_request_cleanup(fs, ITOV(ip), cred, FLUSH_BLOCKS_WAIT); goto retry; } + if (ffs_fsfail_cleanup_locked(ump, 0)) { + UFS_UNLOCK(ump); + return (ENXIO); + } if (reclaimed > 0 && ppsratecheck(&ump->um_last_fullmsg, &ump->um_secs_fullmsg, 1)) { UFS_UNLOCK(ump); @@ -447,6 +451,12 @@ softdep_request_cleanup(fs, vp, cred, FLUSH_BLOCKS_WAIT); goto retry; } + if (bp) + brelse(bp); + if (ffs_fsfail_cleanup_locked(ump, 0)) { + UFS_UNLOCK(ump); + return (ENXIO); + } if (reclaimed > 0 && ppsratecheck(&ump->um_last_fullmsg, &ump->um_secs_fullmsg, 1)) { UFS_UNLOCK(ump); @@ -456,8 +466,6 @@ } else { UFS_UNLOCK(ump); } - if (bp) - brelse(bp); return (ENOSPC); } @@ -1102,7 +1110,7 @@ struct ufsmount *ump; ino_t ino, ipref; u_int cg; - int error, error1, reclaimed; + int error, reclaimed; *vpp = NULL; pip = VTOI(pvp); @@ -1137,28 +1145,21 @@ (allocfcn_t *)ffs_nodealloccg); if (ino == 0) goto noinodes; - /* * Get rid of the cached old vnode, force allocation of a new vnode - * for this inode. + * for this inode. If this fails, release the allocated ino and + * return the error. */ - error = ffs_vgetf(pvp->v_mount, ino, LK_EXCLUSIVE, vpp, FFSV_REPLACE); - if (error) { - error1 = ffs_vgetf(pvp->v_mount, ino, LK_EXCLUSIVE, vpp, - FFSV_FORCEINSMQ | FFSV_REPLACE); + if ((error = ffs_vgetf(pvp->v_mount, ino, LK_EXCLUSIVE, vpp, + FFSV_FORCEINSMQ | FFSV_REPLACE)) != 0) { ffs_vfree(pvp, ino, mode); - if (error1 == 0) { - ip = VTOI(*vpp); - if (ip->i_mode) - goto dup_alloc; - UFS_INODE_SET_FLAG(ip, IN_MODIFIED); - vput(*vpp); - } return (error); } + /* + * We got an inode, so check mode and panic if it is already allocated. + */ ip = VTOI(*vpp); if (ip->i_mode) { -dup_alloc: printf("mode = 0%o, inum = %ju, fs = %s\n", ip->i_mode, (uintmax_t)ip->i_number, fs->fs_fsmnt); panic("ffs_valloc: dup alloc"); @@ -1197,6 +1198,10 @@ softdep_request_cleanup(fs, pvp, cred, FLUSH_INODES_WAIT); goto retry; } + if (ffs_fsfail_cleanup_locked(ump, 0)) { + UFS_UNLOCK(ump); + return (ENXIO); + } if (ppsratecheck(&ump->um_last_fullmsg, &ump->um_secs_fullmsg, 1)) { UFS_UNLOCK(ump); ffs_fserr(fs, pip->i_number, "out of inodes"); @@ -2230,6 +2235,7 @@ struct mount *mp; struct cg *cgp; struct buf *bp; + daddr_t dbn; ufs1_daddr_t fragno, cgbno; int i, blk, frags, bbase, error; u_int cg; @@ -2262,8 +2268,27 @@ ffs_fserr(fs, inum, "bad block"); return; } - if ((error = ffs_getcg(fs, devvp, cg, 0, &bp, &cgp)) != 0) + if ((error = ffs_getcg(fs, devvp, cg, GB_CVTENXIO, &bp, &cgp)) != 0) { + if (!ffs_fsfail_cleanup(ump, error) || + !MOUNTEDSOFTDEP(UFSTOVFS(ump)) || devvp->v_type != VCHR) + return; + if (devvp->v_type == VREG) + dbn = fragstoblks(fs, cgtod(fs, cg)); + else + dbn = fsbtodb(fs, cgtod(fs, cg)); + error = getblkx(devvp, dbn, dbn, fs->fs_cgsize, 0, 0, 0, &bp); + if (error != 0) { + printf("%s: unexpected error %d from getblkx\n", + __func__, error); + return; + } + softdep_setup_blkfree(UFSTOVFS(ump), bp, bno, + numfrags(fs, size), dephd); + bp->b_flags |= B_RELBUF | B_NOCACHE; + bp->b_flags &= ~B_CACHE; + bawrite(bp); return; + } cgbno = dtogd(fs, bno); blksfree = cg_blksfree(cgp); UFS_LOCK(ump); @@ -2783,6 +2808,7 @@ { struct cg *cgp; struct buf *bp; + daddr_t dbn; int error; u_int cg; u_int8_t *inosused; @@ -2804,8 +2830,26 @@ if (ino >= fs->fs_ipg * fs->fs_ncg) panic("ffs_freefile: range: dev = %s, ino = %ju, fs = %s", devtoname(dev), (uintmax_t)ino, fs->fs_fsmnt); - if ((error = ffs_getcg(fs, devvp, cg, 0, &bp, &cgp)) != 0) + if ((error = ffs_getcg(fs, devvp, cg, GB_CVTENXIO, &bp, &cgp)) != 0) { + if (!ffs_fsfail_cleanup(ump, error) || + !MOUNTEDSOFTDEP(UFSTOVFS(ump)) || devvp->v_type != VCHR) + return (error); + if (devvp->v_type == VREG) + dbn = fragstoblks(fs, cgtod(fs, cg)); + else + dbn = fsbtodb(fs, cgtod(fs, cg)); + error = getblkx(devvp, dbn, dbn, fs->fs_cgsize, 0, 0, 0, &bp); + if (error != 0) { + printf("%s: unexpected error %d from getblkx\n", + __func__, error); + return (error); + } + softdep_setup_inofree(UFSTOVFS(ump), bp, ino, wkhd); + bp->b_flags |= B_RELBUF | B_NOCACHE; + bp->b_flags &= ~B_CACHE; + bawrite(bp); return (error); + } inosused = cg_inosused(cgp); cgino = ino % fs->fs_ipg; if (isclr(inosused, cgino)) { Index: sys/ufs/ffs/ffs_balloc.c =================================================================== --- sys/ufs/ffs/ffs_balloc.c +++ sys/ufs/ffs/ffs_balloc.c @@ -324,7 +324,8 @@ UFS_UNLOCK(ump); goto retry; } - if (ppsratecheck(&ump->um_last_fullmsg, + if (!ffs_fsfail_cleanup_locked(ump, error) && + ppsratecheck(&ump->um_last_fullmsg, &ump->um_secs_fullmsg, 1)) { UFS_UNLOCK(ump); ffs_fserr(fs, ip->i_number, "filesystem full"); @@ -407,7 +408,8 @@ UFS_UNLOCK(ump); goto retry; } - if (ppsratecheck(&ump->um_last_fullmsg, + if (!ffs_fsfail_cleanup_locked(ump, error) && + ppsratecheck(&ump->um_last_fullmsg, &ump->um_secs_fullmsg, 1)) { UFS_UNLOCK(ump); ffs_fserr(fs, ip->i_number, "filesystem full"); @@ -919,7 +921,8 @@ UFS_UNLOCK(ump); goto retry; } - if (ppsratecheck(&ump->um_last_fullmsg, + if (!ffs_fsfail_cleanup_locked(ump, error) && + ppsratecheck(&ump->um_last_fullmsg, &ump->um_secs_fullmsg, 1)) { UFS_UNLOCK(ump); ffs_fserr(fs, ip->i_number, "filesystem full"); @@ -1003,7 +1006,8 @@ UFS_UNLOCK(ump); goto retry; } - if (ppsratecheck(&ump->um_last_fullmsg, + if (!ffs_fsfail_cleanup_locked(ump, error) && + ppsratecheck(&ump->um_last_fullmsg, &ump->um_secs_fullmsg, 1)) { UFS_UNLOCK(ump); ffs_fserr(fs, ip->i_number, "filesystem full"); Index: sys/ufs/ffs/ffs_extern.h =================================================================== --- sys/ufs/ffs/ffs_extern.h +++ sys/ufs/ffs/ffs_extern.h @@ -116,6 +116,11 @@ vfs_vget_t ffs_vget; int ffs_vgetf(struct mount *, ino_t, int, struct vnode **, int); void process_deferred_inactive(struct mount *mp); +int ffs_fsfail_cleanup(struct ufsmount *, int); +int ffs_fsfail_cleanup_locked(struct ufsmount *, int); +int ffs_breadz(struct ufsmount *, struct vnode *, daddr_t, daddr_t, int, + daddr_t *, int *, int, struct ucred *, int, void (*)(struct buf *), + struct buf **); /* * Flags to ffs_vgetf @@ -162,6 +167,7 @@ int softdep_mount(struct vnode *, struct mount *, struct fs *, struct ucred *); void softdep_unmount(struct mount *); +void softdep_handle_error(struct buf *); int softdep_move_dependencies(struct buf *, struct buf *); int softdep_flushworklist(struct mount *, int *, struct thread *); int softdep_flushfiles(struct mount *, int, struct thread *); Index: sys/ufs/ffs/ffs_inode.c =================================================================== --- sys/ufs/ffs/ffs_inode.c +++ sys/ufs/ffs/ffs_inode.c @@ -86,6 +86,7 @@ struct fs *fs; struct buf *bp; struct inode *ip; + daddr_t bn; int flags, error; ASSERT_VOP_ELOCKED(vp, "ffs_update"); @@ -112,9 +113,9 @@ if (IS_SNAPSHOT(ip)) flags = GB_LOCK_NOWAIT; loop: - error = bread_gb(ITODEVVP(ip), - fsbtodb(fs, ino_to_fsba(fs, ip->i_number)), - (int) fs->fs_bsize, NOCRED, flags, &bp); + bn = fsbtodb(fs, ino_to_fsba(fs, ip->i_number)); + error = ffs_breadz(VFSTOUFS(vp->v_mount), ITODEVVP(ip), bn, bn, + (int) fs->fs_bsize, NULL, NULL, 0, NOCRED, flags, NULL, &bp); if (error != 0) { if (error != EBUSY) return (error); @@ -163,9 +164,11 @@ */ random_harvest_queue(&(ip->i_din2), sizeof(ip->i_din2), RANDOM_FS_ATIME); } - if (waitfor) + if (waitfor) { error = bwrite(bp); - else if (vm_page_count_severe() || buf_dirty_count_severe()) { + if (ffs_fsfail_cleanup(VFSTOUFS(vp->v_mount), error)) + error = 0; + } else if (vm_page_count_severe() || buf_dirty_count_severe()) { bawrite(bp); error = 0; } else { @@ -684,7 +687,7 @@ * of having bread() attempt to calculate it using VOP_BMAP(). */ vp = ITOV(ip); - error = breadn_flags(vp, lbn, dbn, (int)fs->fs_bsize, NULL, NULL, 0, + error = ffs_breadz(ump, vp, lbn, dbn, (int)fs->fs_bsize, NULL, NULL, 0, NOCRED, 0, NULL, &bp); if (error) { *countp = 0; Index: sys/ufs/ffs/ffs_softdep.c =================================================================== --- sys/ufs/ffs/ffs_softdep.c +++ sys/ufs/ffs/ffs_softdep.c @@ -2271,6 +2271,7 @@ inodedep->id_ino = inum; inodedep->id_state = ALLCOMPLETE; inodedep->id_nlinkdelta = 0; + inodedep->id_nlinkwrote = -1; inodedep->id_savedino1 = NULL; inodedep->id_savedsize = -1; inodedep->id_savedextsize = -1; @@ -3606,6 +3607,7 @@ jblocks->jb_needseg = 0; WORKLIST_INSERT(&bp->b_dep, &jseg->js_list); FREE_LOCK(ump); + bp->b_xflags |= BX_CVTENXIO; pbgetvp(ump->um_devvp, bp); /* * We only do the blocking wait once we find the journal @@ -6334,7 +6336,7 @@ * the on-disk address, so we just pass it to bread() instead of * having bread() attempt to calculate it using VOP_BMAP(). */ - error = breadn_flags(ITOV(ip), lbn, blkptrtodb(ump, blkno), + error = ffs_breadz(ump, ITOV(ip), lbn, blkptrtodb(ump, blkno), (int)mp->mnt_stat.f_iosize, NULL, NULL, 0, NOCRED, 0, NULL, &bp); if (error) return (error); @@ -6485,6 +6487,15 @@ else WORKLIST_INSERT(&indirdep->ir_freeblks->fb_freeworkhd, &freework->fw_list); + if (fwn == NULL) { + freework->fw_indir = (void *)0x0000deadbeef0000; + bp = indirdep->ir_savebp; + indirdep->ir_savebp = NULL; + free_indirdep(indirdep); + FREE_LOCK(ump); + brelse(bp); + ACQUIRE_LOCK(ump); + } } else { /* Complete when the real copy is written. */ WORKLIST_INSERT(&bp->b_dep, &freework->fw_list); @@ -6589,6 +6600,7 @@ struct buf *bp; struct vnode *vp; struct mount *mp; + daddr_t dbn; ufs2_daddr_t extblocks, datablocks; ufs_lbn_t tmpval, lbn, lastlbn; int frags, lastoff, iboff, allocblock, needj, error, i; @@ -6726,8 +6738,9 @@ */ ufs_itimes(vp); ip->i_flag &= ~(IN_LAZYACCESS | IN_LAZYMOD | IN_MODIFIED); - error = bread(ump->um_devvp, fsbtodb(fs, ino_to_fsba(fs, ip->i_number)), - (int)fs->fs_bsize, cred, &bp); + dbn = fsbtodb(fs, ino_to_fsba(fs, ip->i_number)); + error = ffs_breadz(ump, ump->um_devvp, dbn, dbn, (int)fs->fs_bsize, + NULL, NULL, 0, cred, 0, NULL, &bp); if (error) { softdep_error("softdep_journal_freeblocks", error); return; @@ -6828,13 +6841,13 @@ */ size = sblksize(fs, length, lastlbn); error = bread(vp, lastlbn, size, cred, &bp); - if (error) { + if (error == 0) { + bzero((char *)bp->b_data + lastoff, size - lastoff); + bawrite(bp); + } else if (!ffs_fsfail_cleanup(ump, error)) { softdep_error("softdep_journal_freeblks", error); return; } - bzero((char *)bp->b_data + lastoff, size - lastoff); - bawrite(bp); - } ACQUIRE_LOCK(ump); inodedep_lookup(mp, ip->i_number, DEPALLOC, &inodedep); @@ -6945,8 +6958,8 @@ if ((error = bread(ump->um_devvp, fsbtodb(fs, ino_to_fsba(fs, ip->i_number)), (int)fs->fs_bsize, NOCRED, &bp)) != 0) { - brelse(bp); - softdep_error("softdep_setup_freeblocks", error); + if (!ffs_fsfail_cleanup(ump, error)) + softdep_error("softdep_setup_freeblocks", error); return; } freeblks = newfreeblks(mp, ip); @@ -8161,7 +8174,7 @@ ufs_lbn_t lbnadd, nlbn; u_long key; int nblocks, ufs1fmt, freedblocks; - int goingaway, freedeps, needj, level, cnt, i; + int goingaway, freedeps, needj, level, cnt, i, error; freeblks = freework->fw_freeblks; mp = freeblks->fb_list.wk_mp; @@ -8199,10 +8212,11 @@ if (indirdep == NULL || (indirdep->ir_state & GOINGAWAY) == 0) panic("indir_trunc: Bad indirdep %p from buf %p", indirdep, bp); - } else if (bread(freeblks->fb_devvp, dbn, (int)fs->fs_bsize, - NOCRED, &bp) != 0) { - brelse(bp); - return; + } else { + error = ffs_breadz(ump, freeblks->fb_devvp, dbn, dbn, + (int)fs->fs_bsize, NULL, NULL, 0, NOCRED, 0, NULL, &bp); + if (error) + return; } ACQUIRE_LOCK(ump); /* Protects against a race with complete_trunc_indir(). */ @@ -9700,6 +9714,7 @@ struct inodedep *idn; struct fs *fs, *bpfs; struct buf *bp; + daddr_t dbn; ino_t ino; ino_t nino; ino_t pino; @@ -9753,11 +9768,10 @@ bp = getblk(ump->um_devvp, btodb(fs->fs_sblockloc), (int)fs->fs_sbsize, 0, 0, 0); } else { - error = bread(ump->um_devvp, - fsbtodb(fs, ino_to_fsba(fs, pino)), - (int)fs->fs_bsize, NOCRED, &bp); - if (error) - brelse(bp); + dbn = fsbtodb(fs, ino_to_fsba(fs, pino)); + error = ffs_breadz(ump, ump->um_devvp, dbn, dbn, + (int)fs->fs_bsize, NULL, NULL, 0, NOCRED, 0, NULL, + &bp); } ACQUIRE_LOCK(ump); if (error) @@ -10578,14 +10592,16 @@ if ((adp->ad_state & ATTACHED) == 0) panic("inodedep %p and adp %p not attached", inodedep, adp); prevlbn = adp->ad_offset; - if (adp->ad_offset < UFS_NDADDR && + if (!ffs_fsfail_cleanup(ump, 0) && + adp->ad_offset < UFS_NDADDR && dp->di_db[adp->ad_offset] != adp->ad_newblkno) panic("initiate_write_inodeblock_ufs2: " "direct pointer #%jd mismatch %jd != %jd", (intmax_t)adp->ad_offset, (intmax_t)dp->di_db[adp->ad_offset], (intmax_t)adp->ad_newblkno); - if (adp->ad_offset >= UFS_NDADDR && + if (!ffs_fsfail_cleanup(ump, 0) && + adp->ad_offset >= UFS_NDADDR && dp->di_ib[adp->ad_offset - UFS_NDADDR] != adp->ad_newblkno) panic("initiate_write_inodeblock_ufs2: " "indirect pointer #%jd mismatch %jd != %jd", @@ -10817,12 +10833,14 @@ ("softdep_setup_inofree called on non-softdep filesystem")); ump = VFSTOUFS(mp); ACQUIRE_LOCK(ump); - fs = ump->um_fs; - cgp = (struct cg *)bp->b_data; - inosused = cg_inosused(cgp); - if (isset(inosused, ino % fs->fs_ipg)) - panic("softdep_setup_inofree: inode %ju not freed.", - (uintmax_t)ino); + if (!ffs_fsfail_cleanup(ump, 0)) { + fs = ump->um_fs; + cgp = (struct cg *)bp->b_data; + inosused = cg_inosused(cgp); + if (isset(inosused, ino % fs->fs_ipg)) + panic("softdep_setup_inofree: inode %ju not freed.", + (uintmax_t)ino); + } if (inodedep_lookup(mp, ino, 0, &inodedep)) panic("softdep_setup_inofree: ino %ju has existing inodedep %p", (uintmax_t)ino, inodedep); @@ -11091,6 +11109,26 @@ wk_list); } +void +softdep_handle_error(struct buf *bp) +{ + struct ufsmount *ump; + + ump = softdep_bp_to_mp(bp); + if (ump == NULL) + return; + + if (ffs_fsfail_cleanup(ump, bp->b_error)) { + /* + * No future writes will succeed, so the on-disk image is safe. + * Pretend that this write succeeded so that the softdep state + * will be cleaned up naturally. + */ + bp->b_ioflags &= ~BIO_ERROR; + bp->b_error = 0; + } +} + /* * This routine is called during the completion interrupt * service routine for a disk write (from the procedure called @@ -11117,6 +11155,8 @@ "with outstanding dependencies for buffer %p", bp)); if (ump == NULL) return; + if ((bp->b_ioflags & BIO_ERROR) != 0) + softdep_handle_error(bp); /* * If an error occurred while doing the write, then the data * has not hit the disk and the dependencies cannot be processed. @@ -12305,6 +12345,13 @@ FREE_LOCK(ump); return; } + if (ip->i_nlink != inodedep->id_nlinkwrote && + inodedep->id_nlinkwrote != -1) { + KASSERT(ip->i_nlink == 0 && + (ump->um_flags & UM_FSFAIL_CLEANUP) != 0, + ("read bad i_nlink value")); + ip->i_effnlink = ip->i_nlink = inodedep->id_nlinkwrote; + } ip->i_effnlink -= inodedep->id_nlinkdelta; KASSERT(ip->i_effnlink >= 0, ("softdep_load_inodeblock: negative i_effnlink")); @@ -12367,6 +12414,11 @@ panic("softdep_update_inodeblock: bad link count"); return; } + KASSERT(ip->i_nlink >= inodedep->id_nlinkdelta, + ("softdep_update_inodeblock inconsistent ip %p i_nlink %d " + "inodedep %p id_nlinkdelta %jd", + ip, ip->i_nlink, inodedep, (intmax_t)inodedep->id_nlinkdelta)); + inodedep->id_nlinkwrote = ip->i_nlink; if (inodedep->id_nlinkdelta != ip->i_nlink - ip->i_effnlink) panic("softdep_update_inodeblock: bad delta"); /* @@ -12642,7 +12694,7 @@ else brelse(bp); vput(pvp); - if (error != 0) + if (!ffs_fsfail_cleanup(ump, error)) return (error); ACQUIRE_LOCK(ump); if (inodedep_lookup(mp, ip->i_number, 0, &inodedep) == 0) Index: sys/ufs/ffs/ffs_subr.c =================================================================== --- sys/ufs/ffs/ffs_subr.c +++ sys/ufs/ffs/ffs_subr.c @@ -136,7 +136,8 @@ return (0); } dip2 = ((struct ufs2_dinode *)bp->b_data + ino_to_fsbo(fs, ino)); - if ((error = ffs_verify_dinode_ckhash(fs, dip2)) != 0) { + if ((error = ffs_verify_dinode_ckhash(fs, dip2)) != 0 && + !ffs_fsfail_cleanup(ITOUMP(ip), error)) { printf("%s: inode %jd: check-hash failed\n", fs->fs_fsmnt, (intmax_t)ino); return (error); @@ -201,6 +202,101 @@ } else if (!havemtx) UFS_UNLOCK(ump); return (EINTEGRITY); +} + +/* + * Initiate a forcible unmount. + * Used to unmount filesystems whose underlying media has gone away. + */ +static void +ffs_fsfail_unmount(void *v, int pending) +{ + struct fsfail_task *etp; + struct mount *mp; + int error; + + etp = v; + + /* + * Find our mount and get a ref on it, then try to unmount. + */ + do { + mp = vfs_getvfs(&etp->fsid); + if (mp == NULL) + break; + error = dounmount(mp, MNT_FORCE, curthread); + } while (error != 0); + free(etp, M_UFSMNT); +} + +/* + * On first ENXIO error, start a task that forcibly unmounts the filesystem. + * + * Return true if a cleanup is in progress. + */ +int +ffs_fsfail_cleanup(struct ufsmount *ump, int error) +{ + int retval; + + UFS_LOCK(ump); + retval = ffs_fsfail_cleanup_locked(ump, error); + UFS_UNLOCK(ump); + return (retval); +} + +int +ffs_fsfail_cleanup_locked(struct ufsmount *ump, int error) +{ + struct fsfail_task *etp; + struct task *tp; + + mtx_assert(UFS_MTX(ump), MA_OWNED); + if (error == ENXIO && (ump->um_flags & UM_FSFAIL_CLEANUP) == 0) { + ump->um_flags |= UM_FSFAIL_CLEANUP; + /* + * Queue an async forced unmount. + */ + etp = ump->um_fsfail_task; + ump->um_fsfail_task = NULL; + if (etp != NULL) { + tp = &etp->task; + TASK_INIT(tp, 0, ffs_fsfail_unmount, etp); + taskqueue_enqueue(taskqueue_thread, tp); + printf("UFS: forcibly unmounting %s from %s\n", + ump->um_mountp->mnt_stat.f_mntfromname, + ump->um_mountp->mnt_stat.f_mntonname); + } + } + return ((ump->um_flags & UM_FSFAIL_CLEANUP) != 0); +} + +/* + * Wrapper used during ENXIO cleanup to allocate empty buffers when + * the kernel is unable to read the real one. They are needed so that + * the soft updates code can use them to unwind its dependencies. + */ +int +ffs_breadz(struct ufsmount *ump, struct vnode *vp, daddr_t lblkno, + daddr_t dblkno, int size, daddr_t *rablkno, int *rabsize, int cnt, + struct ucred *cred, int flags, void (*ckhashfunc)(struct buf *), + struct buf **bpp) +{ + int error; + + flags |= GB_CVTENXIO; + error = breadn_flags(vp, lblkno, dblkno, size, rablkno, rabsize, cnt, + cred, flags, ckhashfunc, bpp); + if (error != 0 && ffs_fsfail_cleanup(ump, error)) { + error = getblkx(vp, lblkno, dblkno, size, 0, 0, flags, bpp); + if (error != 0) { + printf("%s: unexpected error %d from getblkx\n", + __func__, error); + return ENXIO; + } + bzero((*bpp)->b_data, size); + } + return error; } #endif /* _KERNEL */ Index: sys/ufs/ffs/ffs_vfsops.c =================================================================== --- sys/ufs/ffs/ffs_vfsops.c +++ sys/ufs/ffs/ffs_vfsops.c @@ -58,6 +58,7 @@ #include #include #include +#include #include #include @@ -148,6 +149,13 @@ "multilabel", "nfsv4acls", "fsckpid", "snapshot", "nosuid", "suiddir", "nosymfollow", "sync", "union", "userquota", "untrusted", NULL }; +static int ffs_enxio_enable = 1; +TUNABLE_INT("vfs.ffs.enxio_enable", &ffs_enxio_enable); +SYSCTL_DECL(_vfs_ffs); +SYSCTL_INT(_vfs_ffs, OID_AUTO, enxio_enable, CTLFLAG_RW, + &ffs_enxio_enable, 0, + "enable mapping of other disk I/O errors to ENXIO"); + static int ffs_mount(struct mount *mp) { @@ -795,6 +803,7 @@ struct g_consumer *cp; struct mount *nmp; struct vnode *devvp; + struct fsfail_task *etp; int candelete, canspeedup; off_t loc; @@ -1085,6 +1094,9 @@ (void) ufs_extattr_autostart(mp, td); #endif /* !UFS_EXTATTR_AUTOSTART */ #endif /* !UFS_EXTATTR */ + etp = malloc(sizeof *ump->um_fsfail_task, M_UFSMNT, M_WAITOK | M_ZERO); + etp->fsid = mp->mnt_stat.f_fsid; + ump->um_fsfail_task = etp; return (0); out: if (fs != NULL) { @@ -1134,7 +1146,6 @@ return (0); } -#include static int bigcgs = 0; SYSCTL_INT(_debug, OID_AUTO, bigcgs, CTLFLAG_RW, &bigcgs, 0, ""); @@ -1271,7 +1282,7 @@ error = softdep_flushfiles(mp, flags, td); else error = ffs_flushfiles(mp, flags, td); - if (error != 0 && error != ENXIO) + if (error != 0 && !ffs_fsfail_cleanup(ump, error)) goto fail; UFS_LOCK(ump); @@ -1288,7 +1299,9 @@ if (fs->fs_ronly == 0 || ump->um_fsckpid > 0) { fs->fs_clean = fs->fs_flags & (FS_UNCLEAN|FS_NEEDSFSCK) ? 0 : 1; error = ffs_sbupdate(ump, MNT_WAIT, 0); - if (error && error != ENXIO) { + if (ffs_fsfail_cleanup(ump, error)) + error = 0; + if (error != 0 && !ffs_fsfail_cleanup(ump, error)) { fs->fs_clean = 0; goto fail; } @@ -1326,6 +1339,8 @@ } free(fs->fs_csp, M_UFSMNT); free(fs, M_UFSMNT); + if (ump->um_fsfail_task != NULL) + free(ump->um_fsfail_task, M_UFSMNT); free(ump, M_UFSMNT); mp->mnt_data = NULL; MNT_ILOCK(mp); @@ -1640,6 +1655,8 @@ if (waitfor == MNT_WAIT || rebooting) { if ((error = softdep_flushworklist(ump->um_mountp, &count, td))) allerror = error; + if (ffs_fsfail_cleanup(ump, allerror)) + allerror = 0; /* Flushed work items may create new vnodes to clean */ if (allerror == 0 && count) goto loop; @@ -1657,6 +1674,8 @@ error = ffs_sbupdate(ump, waitfor, 0); if (error != 0) allerror = error; + if (ffs_fsfail_cleanup(ump, allerror)) + allerror = 0; if (allerror == 0 && waitfor == MNT_WAIT) goto loop; } else if (suspend != 0) { @@ -1681,6 +1700,8 @@ if (fs->fs_fmod != 0 && (error = ffs_sbupdate(ump, waitfor, suspended)) != 0) allerror = error; + if (ffs_fsfail_cleanup(ump, allerror)) + allerror = 0; return (allerror); } @@ -1707,6 +1728,7 @@ struct ufsmount *ump; struct buf *bp; struct vnode *vp; + daddr_t dbn; int error; MPASS((ffs_flags & FFSV_REPLACE) == 0 || (flags & LK_EXCLUSIVE) != 0); @@ -1796,9 +1818,10 @@ } /* Read in the disk contents for the inode, copy into the inode. */ - error = bread(ump->um_devvp, fsbtodb(fs, ino_to_fsba(fs, ino)), - (int)fs->fs_bsize, NOCRED, &bp); - if (error) { + dbn = fsbtodb(fs, ino_to_fsba(fs, ino)); + error = ffs_breadz(ump, ump->um_devvp, dbn, dbn, (int)fs->fs_bsize, + NULL, NULL, 0, NOCRED, 0, NULL, &bp); + if (error != 0) { /* * The inode does not contain anything useful, so it would * be misleading to leave it on its hash chain. With mode @@ -2039,6 +2062,8 @@ * Writing the superblock itself. We need to do special checks for it. */ bp = devfdp->sbbp; + if (ffs_fsfail_cleanup(ump, devfdp->error)) + devfdp->error = 0; if (devfdp->error != 0) { brelse(bp); return (devfdp->error); @@ -2112,6 +2137,11 @@ struct bufobj *bufobj; struct buf *origbp; +#ifdef SOFTUPDATES + if (!LIST_EMPTY(&bp->b_dep) && (bp->b_ioflags & BIO_ERROR) != 0) + softdep_handle_error(bp); +#endif + /* * Find the original buffer that we are writing. */ @@ -2122,7 +2152,7 @@ /* * We should mark the cylinder group buffer origbp as - * dirty, to not loose the failed write. + * dirty, to not lose the failed write. */ if ((bp->b_ioflags & BIO_ERROR) != 0) origbp->b_vflags |= BV_BKGRDERR; @@ -2393,6 +2423,8 @@ break; } } + if (bp->b_iocmd != BIO_READ && ffs_enxio_enable) + bp->b_xflags |= BX_CVTENXIO; g_vfs_strategy(bo, bp); } Index: sys/ufs/ffs/ffs_vnops.c =================================================================== --- sys/ufs/ffs/ffs_vnops.c +++ sys/ufs/ffs/ffs_vnops.c @@ -239,6 +239,8 @@ } BO_UNLOCK(bo); } + if (ffs_fsfail_cleanup(VFSTOUFS(vp->v_mount), 0)) + return (ENXIO); return (0); } @@ -247,6 +249,7 @@ { struct inode *ip; struct bufobj *bo; + struct ufsmount *ump; struct buf *bp, *nbp; ufs_lbn_t lbn; int error, passes; @@ -255,14 +258,18 @@ ip = VTOI(vp); ip->i_flag &= ~IN_NEEDSYNC; bo = &vp->v_bufobj; + ump = VFSTOUFS(vp->v_mount); /* * When doing MNT_WAIT we must first flush all dependencies * on the inode. */ if (DOINGSOFTDEP(vp) && waitfor == MNT_WAIT && - (error = softdep_sync_metadata(vp)) != 0) + (error = softdep_sync_metadata(vp)) != 0) { + if (ffs_fsfail_cleanup(ump, error)) + error = 0; return (error); + } /* * Flush all dirty buffers associated with a vnode. @@ -332,7 +339,10 @@ } if (wait) { bremfree(bp); - if ((error = bwrite(bp)) != 0) + error = bwrite(bp); + if (ffs_fsfail_cleanup(ump, error)) + error = 0; + if (error != 0) return (error); } else if ((bp->b_flags & B_CLUSTEROK)) { (void) vfs_bio_awrite(bp); Index: sys/ufs/ffs/softdep.h =================================================================== --- sys/ufs/ffs/softdep.h +++ sys/ufs/ffs/softdep.h @@ -358,6 +358,7 @@ struct fs *id_fs; /* associated filesystem */ ino_t id_ino; /* dependent inode */ nlink_t id_nlinkdelta; /* saved effective link count */ + nlink_t id_nlinkwrote; /* i_nlink that we wrote to disk */ nlink_t id_savednlink; /* Link saved during rollback */ LIST_ENTRY(inodedep) id_deps; /* bmsafemap's list of inodedep's */ struct bmsafemap *id_bmsafemap; /* related bmsafemap (if pending) */ Index: sys/ufs/ufs/ufs_vnops.c =================================================================== --- sys/ufs/ufs/ufs_vnops.c +++ sys/ufs/ufs/ufs_vnops.c @@ -1426,6 +1426,7 @@ if (DOINGSOFTDEP(tvp)) softdep_change_linkcnt(tip); } + goto bad; } if (doingdirectory && !DOINGSOFTDEP(tvp)) { /* @@ -1523,11 +1524,13 @@ if (error == 0 && endoff != 0) { error = UFS_TRUNCATE(tdvp, endoff, IO_NORMAL | (DOINGASYNC(tdvp) ? 0 : IO_SYNC), tcnp->cn_cred); - if (error != 0) + if (error != 0 && !ffs_fsfail_cleanup(VFSTOUFS(mp), error)) vn_printf(tdvp, "ufs_rename: failed to truncate, error %d\n", error); #ifdef UFS_DIRHASH + if (error != 0) + ufsdirhash_free(tdp); else if (tdp->i_dirhash != NULL) ufsdirhash_dirtrunc(tdp, endoff); #endif @@ -2728,6 +2731,17 @@ } } +static int +ufs_putpages(struct vop_putpages_args *ap) +{ + int error; + + error = vop_stdputpages(ap); + if (ffs_fsfail_cleanup(VFSTOUFS(ap->a_vp->v_mount), error)) + error = ENXIO; + return (error); +} + /* Global vfs data structures for ufs. */ struct vop_vector ufs_vnodeops = { .vop_default = &default_vnodeops, @@ -2753,6 +2767,7 @@ .vop_pathconf = ufs_pathconf, .vop_poll = vop_stdpoll, .vop_print = ufs_print, + .vop_putpages = ufs_putpages, .vop_readdir = ufs_readdir, .vop_readlink = ufs_readlink, .vop_reclaim = ufs_reclaim, Index: sys/ufs/ufs/ufsmount.h =================================================================== --- sys/ufs/ufs/ufsmount.h +++ sys/ufs/ufs/ufsmount.h @@ -45,6 +45,8 @@ #ifdef _KERNEL +#include + #ifdef MALLOC_DECLARE MALLOC_DECLARE(M_UFSMNT); MALLOC_DECLARE(M_TRIM); @@ -65,6 +67,10 @@ TAILQ_HEAD(inodedeplst, inodedep); LIST_HEAD(bmsafemaphd, bmsafemap); LIST_HEAD(trimlist_hashhead, ffs_blkfree_trim_params); +struct fsfail_task { + struct task task; + fsid_t fsid; +}; /* * This structure describes the UFS specific mount structure data. @@ -112,6 +118,7 @@ struct taskqueue *um_trim_tq; /* (c) trim request queue */ struct trimlist_hashhead *um_trimhash; /* (i) trimlist hash table */ u_long um_trimlisthashsize; /* (i) trim hash table size-1 */ + struct fsfail_task *um_fsfail_task; /* (i) task for fsfail cleanup*/ /* (c) - below function ptrs */ int (*um_balloc)(struct vnode *, off_t, int, struct ucred *, int, struct buf **); @@ -133,7 +140,8 @@ #define UM_CANDELETE 0x00000001 /* devvp supports TRIM */ #define UM_WRITESUSPENDED 0x00000002 /* suspension in progress */ #define UM_CANSPEEDUP 0x00000004 /* devvp supports SPEEDUP */ - +#define UM_FSFAIL_CLEANUP 0x00000008 /* need cleanup after + unrecoverable error */ /* * function prototypes */