diff --git a/sys/fs/msdosfs/fat.h b/sys/fs/msdosfs/fat.h --- a/sys/fs/msdosfs/fat.h +++ b/sys/fs/msdosfs/fat.h @@ -97,7 +97,7 @@ #define DE_CLEAR 1 /* Zero out the blocks allocated */ int pcbmap(struct denode *dep, u_long findcn, daddr_t *bnp, u_long *cnp, int* sp); -int clusterfree(struct msdosfsmount *pmp, u_long cn, u_long *oldcnp); +void clusterfree(struct msdosfsmount *pmp, u_long cn); int clusteralloc(struct msdosfsmount *pmp, u_long start, u_long count, u_long fillwith, u_long *retcluster, u_long *got); int fatentry(int function, struct msdosfsmount *pmp, u_long cluster, u_long *oldcontents, u_long newcontents); int freeclusterchain(struct msdosfsmount *pmp, u_long startchain); diff --git a/sys/fs/msdosfs/msdosfs_denode.c b/sys/fs/msdosfs/msdosfs_denode.c --- a/sys/fs/msdosfs/msdosfs_denode.c +++ b/sys/fs/msdosfs/msdosfs_denode.c @@ -142,9 +142,24 @@ return (error); if (nvp != NULL) { *depp = VTODE(nvp); - KASSERT((*depp)->de_dirclust == dirclust, ("wrong dirclust")); - KASSERT((*depp)->de_diroffset == diroffset, ("wrong diroffset")); + if ((*depp)->de_dirclust != dirclust) { + printf("wrong dir cluster %s %lu %lu\n", + pmp->pm_mountp->mnt_stat.f_mntonname, + (*depp)->de_dirclust, dirclust); + goto badoff; + } + if ((*depp)->de_diroffset != diroffset) { + printf("wrong dir offset %s %lu %lu\n", + pmp->pm_mountp->mnt_stat.f_mntonname, + (*depp)->de_diroffset, diroffset); + goto badoff; + } return (0); +badoff: + vgone(nvp); + vput(nvp); + msdosfs_integrity_error(pmp); + return (EBADF); } ldep = malloc(sizeof(struct denode), M_MSDOSFSNODE, M_WAITOK | M_ZERO); diff --git a/sys/fs/msdosfs/msdosfs_fat.c b/sys/fs/msdosfs/msdosfs_fat.c --- a/sys/fs/msdosfs/msdosfs_fat.c +++ b/sys/fs/msdosfs/msdosfs_fat.c @@ -80,8 +80,7 @@ u_long fatbn); static __inline void usemap_alloc(struct msdosfsmount *pmp, u_long cn); -static __inline void - usemap_free(struct msdosfsmount *pmp, u_long cn); +static int usemap_free(struct msdosfsmount *pmp, u_long cn); static int clusteralloc1(struct msdosfsmount *pmp, u_long start, u_long count, u_long fillwith, u_long *retcluster, u_long *got); @@ -398,7 +397,7 @@ pmp->pm_flags |= MSDOSFS_FSIMOD; } -static __inline void +static int usemap_free(struct msdosfsmount *pmp, u_long cn) { @@ -408,35 +407,37 @@ pmp->pm_maxcluster)); KASSERT((pmp->pm_flags & MSDOSFSMNT_RONLY) == 0, ("usemap_free on ro msdosfs mount")); + if ((pmp->pm_inusemap[cn / N_INUSEBITS] & + (1U << (cn % N_INUSEBITS))) == 0) { + printf("%s: Freeing unused sector %ld %ld %x\n", + pmp->pm_mountp->mnt_stat.f_mntonname, cn, cn % N_INUSEBITS, + (unsigned)pmp->pm_inusemap[cn / N_INUSEBITS]); + msdosfs_integrity_error(pmp); + return (EINTEGRITY); + } pmp->pm_freeclustercount++; pmp->pm_flags |= MSDOSFS_FSIMOD; - KASSERT((pmp->pm_inusemap[cn / N_INUSEBITS] & - (1U << (cn % N_INUSEBITS))) != 0, - ("Freeing unused sector %ld %ld %x", cn, cn % N_INUSEBITS, - (unsigned)pmp->pm_inusemap[cn / N_INUSEBITS])); pmp->pm_inusemap[cn / N_INUSEBITS] &= ~(1U << (cn % N_INUSEBITS)); + return (0); } -int -clusterfree(struct msdosfsmount *pmp, u_long cluster, u_long *oldcnp) +void +clusterfree(struct msdosfsmount *pmp, u_long cluster) { int error; u_long oldcn; error = fatentry(FAT_GET_AND_SET, pmp, cluster, &oldcn, MSDOSFSFREE); - if (error) - return (error); + if (error != 0) + return; /* * If the cluster was successfully marked free, then update * the count of free clusters, and turn off the "allocated" * bit in the "in use" cluster bit map. */ MSDOSFS_LOCK_MP(pmp); - usemap_free(pmp, cluster); + error = usemap_free(pmp, cluster); MSDOSFS_UNLOCK_MP(pmp); - if (oldcnp) - *oldcnp = oldcn; - return (0); } /* @@ -712,7 +713,7 @@ error = fatchain(pmp, start, count, fillwith); if (error != 0) { for (cl = start, n = count; n-- > 0;) - usemap_free(pmp, cl++); + (void)usemap_free(pmp, cl++); return (error); } #ifdef MSDOSFS_DEBUG @@ -846,7 +847,12 @@ } lbn = bn; } - usemap_free(pmp, cluster); + error = usemap_free(pmp, cluster); + if (error != 0) { + updatefats(pmp, bp, lbn); + MSDOSFS_UNLOCK_MP(pmp); + return (error); + } switch (pmp->pm_fatmask) { case FAT12_MASK: readcn = getushort(bp->b_data + bo); @@ -940,8 +946,13 @@ #endif brelse(bp); return (EINVAL); - } else if (readcn == CLUST_FREE) - usemap_free(pmp, cn); + } else if (readcn == CLUST_FREE) { + error = usemap_free(pmp, cn); + if (error != 0) { + brelse(bp); + return (error); + } + } } if (bp != NULL) brelse(bp); @@ -1043,7 +1054,7 @@ dep->de_fc[FC_LASTFC].fc_fsrcn, 0, cn); if (error) { - clusterfree(pmp, cn, NULL); + clusterfree(pmp, cn); return (error); } frcn = dep->de_fc[FC_LASTFC].fc_frcn + 1; diff --git a/sys/fs/msdosfs/msdosfs_lookup.c b/sys/fs/msdosfs/msdosfs_lookup.c --- a/sys/fs/msdosfs/msdosfs_lookup.c +++ b/sys/fs/msdosfs/msdosfs_lookup.c @@ -63,6 +63,29 @@ #include #include +static int +msdosfs_lookup_checker(struct msdosfsmount *pmp, struct vnode *dvp, + struct denode *tdp, struct vnode **vpp) +{ + struct vnode *vp; + + vp = DETOV(tdp); + + /* + * Lookup assumes that directory cannot be hardlinked. + * Corrupted msdosfs filesystem could break this assumption. + */ + if (vp == dvp) { + vput(vp); + msdosfs_integrity_error(pmp); + *vpp = NULL; + return (EBADF); + } + + *vpp = vp; + return (0); +} + int msdosfs_lookup(struct vop_cachedlookup_args *ap) { @@ -501,8 +524,7 @@ error = deget(pmp, cluster, blkoff, LK_EXCLUSIVE, &tdp); if (error) return (error); - *vpp = DETOV(tdp); - return (0); + return (msdosfs_lookup_checker(pmp, vdp, tdp, vpp)); } /* @@ -529,7 +551,9 @@ if ((error = deget(pmp, cluster, blkoff, LK_EXCLUSIVE, &tdp)) != 0) return (error); - *vpp = DETOV(tdp); + if ((error = msdosfs_lookup_checker(pmp, vdp, tdp, vpp)) + != 0) + return (error); cnp->cn_flags |= SAVENAME; return (0); } @@ -572,14 +596,23 @@ vput(*vpp); goto restart; } + error = msdosfs_lookup_checker(pmp, vdp, VTODE(*vpp), vpp); + if (error != 0) + return (error); } else if (dp->de_StartCluster == scn && isadir) { + if (cnp->cn_namelen != 1 || cnp->cn_nameptr[0] != '.') { + /* fs is corrupted, non-dot lookup returned dvp */ + msdosfs_integrity_error(pmp); + return (EBADF); + } VREF(vdp); /* we want ourself, ie "." */ *vpp = vdp; } else { if ((error = deget(pmp, cluster, blkoff, LK_EXCLUSIVE, &tdp)) != 0) return (error); - *vpp = DETOV(tdp); + if ((error = msdosfs_lookup_checker(pmp, vdp, tdp, vpp)) != 0) + return (error); } /* diff --git a/sys/fs/msdosfs/msdosfs_vfsops.c b/sys/fs/msdosfs/msdosfs_vfsops.c --- a/sys/fs/msdosfs/msdosfs_vfsops.c +++ b/sys/fs/msdosfs/msdosfs_vfsops.c @@ -65,6 +65,7 @@ #include #include #include +#include #include #include @@ -112,6 +113,7 @@ static int update_mp(struct mount *mp, struct thread *td); static int mountmsdosfs(struct vnode *devvp, struct mount *mp); +static void msdosfs_remount_ro(void *arg, int pending); static vfs_fhtovp_t msdosfs_fhtovp; static vfs_mount_t msdosfs_mount; static vfs_root_t msdosfs_root; @@ -337,6 +339,13 @@ mp->mnt_flag &= ~MNT_RDONLY; MNT_IUNLOCK(mp); } + + /* + * Avoid namei() below. The "from" option is not set. + * Update of the devvp is pointless for this case. + */ + if ((pmp->pm_flags & MSDOSFS_ERR_RO) != 0) + return (0); } /* * Not an update, or updating the name: look up the name @@ -471,6 +480,8 @@ lockinit(&pmp->pm_fatlock, 0, msdosfs_lock_msg, 0, 0); lockinit(&pmp->pm_checkpath_lock, 0, "msdoscp", 0, 0); + TASK_INIT(&pmp->pm_rw2ro_task, 0, msdosfs_remount_ro, pmp); + /* * Initialize ownerships and permissions, since nothing else will * initialize them iff we are mounting root. @@ -558,6 +569,14 @@ } pmp->pm_HugeSectors *= pmp->pm_BlkPerSec; + if ((off_t)pmp->pm_HugeSectors * pmp->pm_BytesPerSec < + pmp->pm_HugeSectors /* overflow */ || + (off_t)pmp->pm_HugeSectors * pmp->pm_BytesPerSec > + cp->provider->mediasize /* past end of vol */) { + error = EINVAL; + goto error_exit; + } + pmp->pm_HiddenSects *= pmp->pm_BlkPerSec; /* XXX not used? */ pmp->pm_FATsecs *= pmp->pm_BlkPerSec; SecPerClust *= pmp->pm_BlkPerSec; @@ -577,6 +596,10 @@ pmp->pm_firstcluster = pmp->pm_rootdirblk + pmp->pm_rootdirsize; } + if (pmp->pm_HugeSectors <= pmp->pm_firstcluster) { + error = EINVAL; + goto error_exit; + } pmp->pm_maxcluster = (pmp->pm_HugeSectors - pmp->pm_firstcluster) / SecPerClust + 1; pmp->pm_fatsize = pmp->pm_FATsecs * DEV_BSIZE; /* XXX not used? */ @@ -835,6 +858,47 @@ return (error); } +static void +msdosfs_remount_ro(void *arg, int pending) +{ + struct msdosfsmount *pmp; + int error; + + pmp = arg; + + MSDOSFS_LOCK_MP(pmp); + if ((pmp->pm_flags & MSDOSFS_ERR_RO) != 0) { + while ((pmp->pm_flags & MSDOSFS_ERR_RO) != 0) + msleep(&pmp->pm_flags, &pmp->pm_fatlock, PVFS, + "msdoserrro", hz); + } else if ((pmp->pm_mountp->mnt_flag & MNT_RDONLY) == 0) { + pmp->pm_flags |= MSDOSFS_ERR_RO; + MSDOSFS_UNLOCK_MP(pmp); + printf("remounting %s read-only due to corruption\n", + pmp->pm_mountp->mnt_stat.f_mntfromname); + error = vfs_remount_ro(pmp->pm_mountp); + if (error != 0) + printf("remounting %s read-only failed: error %d\n", + pmp->pm_mountp->mnt_stat.f_mntfromname, error); + else + printf("remounted %s read-only\n", + pmp->pm_mountp->mnt_stat.f_mntfromname); + MSDOSFS_LOCK_MP(pmp); + pmp->pm_flags &= ~MSDOSFS_ERR_RO; + wakeup(&pmp->pm_flags); + } + MSDOSFS_UNLOCK_MP(pmp); + + vfs_unbusy(pmp->pm_mountp); +} + +void +msdosfs_integrity_error(struct msdosfsmount *pmp) +{ + if (vfs_busy(pmp->pm_mountp, MBF_NOWAIT) == 0) + taskqueue_enqueue(taskqueue_thread, &pmp->pm_rw2ro_task); +} + static int msdosfs_root(struct mount *mp, int flags, struct vnode **vpp) { diff --git a/sys/fs/msdosfs/msdosfs_vnops.c b/sys/fs/msdosfs/msdosfs_vnops.c --- a/sys/fs/msdosfs/msdosfs_vnops.c +++ b/sys/fs/msdosfs/msdosfs_vnops.c @@ -1433,7 +1433,7 @@ return (0); bad: - clusterfree(pmp, newcluster, NULL); + clusterfree(pmp, newcluster); bad2: return (error); } diff --git a/sys/fs/msdosfs/msdosfsmount.h b/sys/fs/msdosfs/msdosfsmount.h --- a/sys/fs/msdosfs/msdosfsmount.h +++ b/sys/fs/msdosfs/msdosfsmount.h @@ -59,6 +59,7 @@ #ifndef MAKEFS #include #include +#include #endif #include @@ -116,6 +117,7 @@ struct lock pm_fatlock; /* lockmgr protecting allocations */ struct lock pm_checkpath_lock; /* protects doscheckpath result */ #endif + struct task pm_rw2ro_task; }; /* @@ -263,5 +265,10 @@ #define MSDOSFSMNT_WAITONFAT 0x40000000 /* mounted synchronous */ #define MSDOSFS_FATMIRROR 0x20000000 /* FAT is mirrored */ #define MSDOSFS_FSIMOD 0x01000000 +#define MSDOSFS_ERR_RO 0x00800000 /* remouning ro due to error */ + +#ifdef _KERNEL +void msdosfs_integrity_error(struct msdosfsmount *pmp); +#endif #endif /* !_MSDOSFS_MSDOSFSMOUNT_H_ */ diff --git a/sys/geom/label/g_label_msdosfs.c b/sys/geom/label/g_label_msdosfs.c --- a/sys/geom/label/g_label_msdosfs.c +++ b/sys/geom/label/g_label_msdosfs.c @@ -156,6 +156,12 @@ G_LABEL_DEBUG(2, "MSDOSFS: FAT_FirstDataSector=0x%x, FAT_BytesPerSector=%d", fat_FirstDataSector, fat_BytesPerSector); + if (fat_BytesPerSector == 0 || + fat_BytesPerSector % pp->sectorsize != 0) { + G_LABEL_DEBUG(1, "MSDOSFS: %s: corrupted BPB", + pp->name); + goto error; + } for (offset = fat_BytesPerSector * fat_FirstDataSector;; offset += fat_BytesPerSector) { diff --git a/sys/kern/vfs_mount.c b/sys/kern/vfs_mount.c --- a/sys/kern/vfs_mount.c +++ b/sys/kern/vfs_mount.c @@ -2868,6 +2868,97 @@ free(buf, M_MOUNT); } +/* + * Force remount specified mount point to read-only. The argument + * must be busied to avoid parallel unmount attempts. + * + * Intended use is to prevent further writes if some metadata + * inconsistency is detected. Note that the function still flushes + * all cached metadata and data for the mount point, which might be + * not always suitable. + */ +int +vfs_remount_ro(struct mount *mp) +{ + struct vfsoptlist *opts; + struct vfsopt *opt; + struct vnode *vp_covered, *rootvp; + int error; + + KASSERT(mp->mnt_lockref > 0, + ("vfs_remount_ro: mp %p is not busied", mp)); + KASSERT((mp->mnt_kern_flag & MNTK_UNMOUNT) == 0, + ("vfs_remount_ro: mp %p is being unmounted (and busy?)", mp)); + + rootvp = NULL; + vp_covered = mp->mnt_vnodecovered; + error = vget(vp_covered, LK_EXCLUSIVE | LK_NOWAIT); + if (error != 0) + return (error); + VI_LOCK(vp_covered); + if ((vp_covered->v_iflag & VI_MOUNT) != 0) { + VI_UNLOCK(vp_covered); + vput(vp_covered); + return (EBUSY); + } + vp_covered->v_iflag |= VI_MOUNT; + VI_UNLOCK(vp_covered); + vfs_op_enter(mp); + vn_start_write(NULL, &mp, V_WAIT); + vn_seqc_write_begin(vp_covered); + + MNT_ILOCK(mp); + if ((mp->mnt_flag & MNT_RDONLY) != 0) { + MNT_IUNLOCK(mp); + error = EBUSY; + goto out; + } + mp->mnt_flag |= MNT_UPDATE | MNT_FORCE | MNT_RDONLY; + rootvp = vfs_cache_root_clear(mp); + MNT_IUNLOCK(mp); + + opts = malloc(sizeof(struct vfsoptlist), M_MOUNT, M_WAITOK | M_ZERO); + TAILQ_INIT(opts); + opt = malloc(sizeof(struct vfsopt), M_MOUNT, M_WAITOK | M_ZERO); + opt->name = strdup("ro", M_MOUNT); + opt->value = NULL; + TAILQ_INSERT_TAIL(opts, opt, link); + vfs_mergeopts(opts, mp->mnt_opt); + mp->mnt_optnew = opts; + + error = VFS_MOUNT(mp); + + if (error == 0) { + MNT_ILOCK(mp); + mp->mnt_flag &= ~(MNT_UPDATE | MNT_FORCE); + MNT_IUNLOCK(mp); + vfs_deallocate_syncvnode(mp); + if (mp->mnt_opt != NULL) + vfs_freeopts(mp->mnt_opt); + mp->mnt_opt = mp->mnt_optnew; + } else { + MNT_ILOCK(mp); + mp->mnt_flag &= ~(MNT_UPDATE | MNT_FORCE | MNT_RDONLY); + MNT_IUNLOCK(mp); + vfs_freeopts(mp->mnt_optnew); + } + mp->mnt_optnew = NULL; + +out: + vfs_op_exit(mp); + vn_finished_write(mp); + VI_LOCK(vp_covered); + vp_covered->v_iflag &= ~VI_MOUNT; + VI_UNLOCK(vp_covered); + vput(vp_covered); + vn_seqc_write_end(vp_covered); + if (rootvp != NULL) { + vn_seqc_write_end(rootvp); + vrele(rootvp); + } + return (error); +} + /* * Suspend write operations on all local writeable filesystems. Does * full sync of them in the process. diff --git a/sys/sys/mount.h b/sys/sys/mount.h --- a/sys/sys/mount.h +++ b/sys/sys/mount.h @@ -1036,6 +1036,8 @@ void vfs_unregister_for_notification(struct mount *, struct mount_upper_node *); void vfs_unregister_upper(struct mount *, struct mount_upper_node *); +int vfs_remount_ro(struct mount *mp); + extern TAILQ_HEAD(mntlist, mount) mountlist; /* mounted filesystem list */ extern struct mtx_padalign mountlist_mtx; extern struct nfs_public nfs_pub;