Index: head/sys/fs/ext2fs/ext2_alloc.c =================================================================== --- head/sys/fs/ext2fs/ext2_alloc.c (revision 351925) +++ head/sys/fs/ext2fs/ext2_alloc.c (revision 351926) @@ -1,1558 +1,1555 @@ /*- * modified for Lites 1.1 * * Aug 1995, Godmar Back (gback@cs.utah.edu) * University of Utah, Department of Computer Science */ /*- * SPDX-License-Identifier: BSD-3-Clause * * Copyright (c) 1982, 1986, 1989, 1993 * The Regents of the University of California. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * @(#)ffs_alloc.c 8.8 (Berkeley) 2/21/94 * $FreeBSD$ */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include SDT_PROVIDER_DEFINE(ext2fs); /* * ext2fs trace probe: * arg0: verbosity. Higher numbers give more verbose messages * arg1: Textual message */ SDT_PROBE_DEFINE2(ext2fs, , alloc, trace, "int", "char*"); SDT_PROBE_DEFINE3(ext2fs, , alloc, ext2_reallocblks_realloc, "ino_t", "e2fs_lbn_t", "e2fs_lbn_t"); SDT_PROBE_DEFINE1(ext2fs, , alloc, ext2_reallocblks_bap, "uint32_t"); SDT_PROBE_DEFINE1(ext2fs, , alloc, ext2_reallocblks_blkno, "e2fs_daddr_t"); SDT_PROBE_DEFINE2(ext2fs, , alloc, ext2_b_bitmap_validate_error, "char*", "int"); SDT_PROBE_DEFINE3(ext2fs, , alloc, ext2_nodealloccg_bmap_corrupted, "int", "daddr_t", "char*"); SDT_PROBE_DEFINE2(ext2fs, , alloc, ext2_blkfree_bad_block, "ino_t", "e4fs_daddr_t"); SDT_PROBE_DEFINE2(ext2fs, , alloc, ext2_vfree_doublefree, "char*", "ino_t"); static daddr_t ext2_alloccg(struct inode *, int, daddr_t, int); static daddr_t ext2_clusteralloc(struct inode *, int, daddr_t, int); static u_long ext2_dirpref(struct inode *); static e4fs_daddr_t ext2_hashalloc(struct inode *, int, long, int, daddr_t (*)(struct inode *, int, daddr_t, int)); static daddr_t ext2_nodealloccg(struct inode *, int, daddr_t, int); static daddr_t ext2_mapsearch(struct m_ext2fs *, char *, daddr_t); /* * Allocate a block in the filesystem. * * A preference may be optionally specified. If a preference is given * the following hierarchy is used to allocate a block: * 1) allocate the requested block. * 2) allocate a rotationally optimal block in the same cylinder. * 3) allocate a block in the same cylinder group. * 4) quadradically rehash into other cylinder groups, until an * available block is located. * If no block preference is given the following hierarchy is used * to allocate a block: * 1) allocate a block in the cylinder group that contains the * inode for the file. * 2) quadradically rehash into other cylinder groups, until an * available block is located. */ int ext2_alloc(struct inode *ip, daddr_t lbn, e4fs_daddr_t bpref, int size, struct ucred *cred, e4fs_daddr_t *bnp) { struct m_ext2fs *fs; struct ext2mount *ump; e4fs_daddr_t bno; int cg; *bnp = 0; fs = ip->i_e2fs; ump = ip->i_ump; mtx_assert(EXT2_MTX(ump), MA_OWNED); #ifdef INVARIANTS if ((u_int)size > fs->e2fs_bsize || blkoff(fs, size) != 0) { vn_printf(ip->i_devvp, "bsize = %lu, size = %d, fs = %s\n", (long unsigned int)fs->e2fs_bsize, size, fs->e2fs_fsmnt); panic("ext2_alloc: bad size"); } if (cred == NOCRED) panic("ext2_alloc: missing credential"); #endif /* INVARIANTS */ if (size == fs->e2fs_bsize && fs->e2fs_fbcount == 0) goto nospace; if (cred->cr_uid != 0 && fs->e2fs_fbcount < fs->e2fs_rbcount) goto nospace; if (bpref >= fs->e2fs_bcount) bpref = 0; if (bpref == 0) cg = ino_to_cg(fs, ip->i_number); else cg = dtog(fs, bpref); bno = (daddr_t)ext2_hashalloc(ip, cg, bpref, fs->e2fs_bsize, ext2_alloccg); if (bno > 0) { /* set next_alloc fields as done in block_getblk */ ip->i_next_alloc_block = lbn; ip->i_next_alloc_goal = bno; ip->i_blocks += btodb(fs->e2fs_bsize); ip->i_flag |= IN_CHANGE | IN_UPDATE; *bnp = bno; return (0); } nospace: EXT2_UNLOCK(ump); SDT_PROBE2(ext2fs, , alloc, trace, 1, "cannot allocate data block"); return (ENOSPC); } /* * Allocate EA's block for inode. */ e4fs_daddr_t ext2_alloc_meta(struct inode *ip) { struct m_ext2fs *fs; daddr_t blk; fs = ip->i_e2fs; EXT2_LOCK(ip->i_ump); blk = ext2_hashalloc(ip, ino_to_cg(fs, ip->i_number), 0, fs->e2fs_bsize, ext2_alloccg); if (0 == blk) { EXT2_UNLOCK(ip->i_ump); SDT_PROBE2(ext2fs, , alloc, trace, 1, "cannot allocate meta block"); } return (blk); } /* * Reallocate a sequence of blocks into a contiguous sequence of blocks. * * The vnode and an array of buffer pointers for a range of sequential * logical blocks to be made contiguous is given. The allocator attempts * to find a range of sequential blocks starting as close as possible to * an fs_rotdelay offset from the end of the allocation for the logical * block immediately preceding the current range. If successful, the * physical block numbers in the buffer pointers and in the inode are * changed to reflect the new allocation. If unsuccessful, the allocation * is left unchanged. The success in doing the reallocation is returned. * Note that the error return is not reflected back to the user. Rather * the previous block allocation will be used. */ static SYSCTL_NODE(_vfs, OID_AUTO, ext2fs, CTLFLAG_RW, 0, "EXT2FS filesystem"); static int doasyncfree = 1; SYSCTL_INT(_vfs_ext2fs, OID_AUTO, doasyncfree, CTLFLAG_RW, &doasyncfree, 0, "Use asychronous writes to update block pointers when freeing blocks"); static int doreallocblks = 0; SYSCTL_INT(_vfs_ext2fs, OID_AUTO, doreallocblks, CTLFLAG_RW, &doreallocblks, 0, ""); int ext2_reallocblks(struct vop_reallocblks_args *ap) { struct m_ext2fs *fs; struct inode *ip; struct vnode *vp; struct buf *sbp, *ebp; uint32_t *bap, *sbap, *ebap; struct ext2mount *ump; struct cluster_save *buflist; struct indir start_ap[EXT2_NIADDR + 1], end_ap[EXT2_NIADDR + 1], *idp; e2fs_lbn_t start_lbn, end_lbn; int soff; e2fs_daddr_t newblk, blkno; int i, len, start_lvl, end_lvl, pref, ssize; if (doreallocblks == 0) return (ENOSPC); vp = ap->a_vp; ip = VTOI(vp); fs = ip->i_e2fs; ump = ip->i_ump; if (fs->e2fs_contigsumsize <= 0 || ip->i_flag & IN_E4EXTENTS) return (ENOSPC); buflist = ap->a_buflist; len = buflist->bs_nchildren; start_lbn = buflist->bs_children[0]->b_lblkno; end_lbn = start_lbn + len - 1; #ifdef INVARIANTS for (i = 1; i < len; i++) if (buflist->bs_children[i]->b_lblkno != start_lbn + i) panic("ext2_reallocblks: non-cluster"); #endif /* * If the cluster crosses the boundary for the first indirect * block, leave space for the indirect block. Indirect blocks * are initially laid out in a position after the last direct * block. Block reallocation would usually destroy locality by * moving the indirect block out of the way to make room for * data blocks if we didn't compensate here. We should also do * this for other indirect block boundaries, but it is only * important for the first one. */ if (start_lbn < EXT2_NDADDR && end_lbn >= EXT2_NDADDR) return (ENOSPC); /* * If the latest allocation is in a new cylinder group, assume that * the filesystem has decided to move and do not force it back to * the previous cylinder group. */ if (dtog(fs, dbtofsb(fs, buflist->bs_children[0]->b_blkno)) != dtog(fs, dbtofsb(fs, buflist->bs_children[len - 1]->b_blkno))) return (ENOSPC); if (ext2_getlbns(vp, start_lbn, start_ap, &start_lvl) || ext2_getlbns(vp, end_lbn, end_ap, &end_lvl)) return (ENOSPC); /* * Get the starting offset and block map for the first block. */ if (start_lvl == 0) { sbap = &ip->i_db[0]; soff = start_lbn; } else { idp = &start_ap[start_lvl - 1]; if (bread(vp, idp->in_lbn, (int)fs->e2fs_bsize, NOCRED, &sbp)) { brelse(sbp); return (ENOSPC); } sbap = (u_int *)sbp->b_data; soff = idp->in_off; } /* * If the block range spans two block maps, get the second map. */ ebap = NULL; if (end_lvl == 0 || (idp = &end_ap[end_lvl - 1])->in_off + 1 >= len) { ssize = len; } else { #ifdef INVARIANTS if (start_ap[start_lvl - 1].in_lbn == idp->in_lbn) panic("ext2_reallocblks: start == end"); #endif ssize = len - (idp->in_off + 1); if (bread(vp, idp->in_lbn, (int)fs->e2fs_bsize, NOCRED, &ebp)) goto fail; ebap = (u_int *)ebp->b_data; } /* * Find the preferred location for the cluster. */ EXT2_LOCK(ump); pref = ext2_blkpref(ip, start_lbn, soff, sbap, 0); /* * Search the block map looking for an allocation of the desired size. */ if ((newblk = (e2fs_daddr_t)ext2_hashalloc(ip, dtog(fs, pref), pref, len, ext2_clusteralloc)) == 0) { EXT2_UNLOCK(ump); goto fail; } /* * We have found a new contiguous block. * * First we have to replace the old block pointers with the new * block pointers in the inode and indirect blocks associated * with the file. */ SDT_PROBE3(ext2fs, , alloc, ext2_reallocblks_realloc, ip->i_number, start_lbn, end_lbn); blkno = newblk; for (bap = &sbap[soff], i = 0; i < len; i++, blkno += fs->e2fs_fpb) { if (i == ssize) { bap = ebap; soff = -i; } #ifdef INVARIANTS if (buflist->bs_children[i]->b_blkno != fsbtodb(fs, *bap)) panic("ext2_reallocblks: alloc mismatch"); #endif SDT_PROBE1(ext2fs, , alloc, ext2_reallocblks_bap, *bap); *bap++ = blkno; } /* * Next we must write out the modified inode and indirect blocks. * For strict correctness, the writes should be synchronous since * the old block values may have been written to disk. In practise * they are almost never written, but if we are concerned about * strict correctness, the `doasyncfree' flag should be set to zero. * * The test on `doasyncfree' should be changed to test a flag * that shows whether the associated buffers and inodes have * been written. The flag should be set when the cluster is * started and cleared whenever the buffer or inode is flushed. * We can then check below to see if it is set, and do the * synchronous write only when it has been cleared. */ if (sbap != &ip->i_db[0]) { if (doasyncfree) bdwrite(sbp); else bwrite(sbp); } else { ip->i_flag |= IN_CHANGE | IN_UPDATE; if (!doasyncfree) ext2_update(vp, 1); } if (ssize < len) { if (doasyncfree) bdwrite(ebp); else bwrite(ebp); } /* * Last, free the old blocks and assign the new blocks to the buffers. */ for (blkno = newblk, i = 0; i < len; i++, blkno += fs->e2fs_fpb) { ext2_blkfree(ip, dbtofsb(fs, buflist->bs_children[i]->b_blkno), fs->e2fs_bsize); buflist->bs_children[i]->b_blkno = fsbtodb(fs, blkno); SDT_PROBE1(ext2fs, , alloc, ext2_reallocblks_blkno, blkno); } return (0); fail: if (ssize < len) brelse(ebp); if (sbap != &ip->i_db[0]) brelse(sbp); return (ENOSPC); } /* * Allocate an inode in the filesystem. * */ int ext2_valloc(struct vnode *pvp, int mode, struct ucred *cred, struct vnode **vpp) { struct timespec ts; struct m_ext2fs *fs; struct ext2mount *ump; struct inode *pip; struct inode *ip; struct vnode *vp; struct thread *td; ino_t ino, ipref; int error, cg; *vpp = NULL; pip = VTOI(pvp); fs = pip->i_e2fs; ump = pip->i_ump; EXT2_LOCK(ump); if (fs->e2fs->e2fs_ficount == 0) goto noinodes; /* * If it is a directory then obtain a cylinder group based on * ext2_dirpref else obtain it using ino_to_cg. The preferred inode is * always the next inode. */ if ((mode & IFMT) == IFDIR) { cg = ext2_dirpref(pip); if (fs->e2fs_contigdirs[cg] < 255) fs->e2fs_contigdirs[cg]++; } else { cg = ino_to_cg(fs, pip->i_number); if (fs->e2fs_contigdirs[cg] > 0) fs->e2fs_contigdirs[cg]--; } ipref = cg * fs->e2fs->e2fs_ipg + 1; ino = (ino_t)ext2_hashalloc(pip, cg, (long)ipref, mode, ext2_nodealloccg); if (ino == 0) goto noinodes; td = curthread; error = vfs_hash_get(ump->um_mountp, ino, LK_EXCLUSIVE, td, vpp, NULL, NULL); if (error || *vpp != NULL) { return (error); } ip = malloc(sizeof(struct inode), M_EXT2NODE, M_WAITOK | M_ZERO); if (ip == NULL) { return (ENOMEM); } /* Allocate a new vnode/inode. */ if ((error = getnewvnode("ext2fs", ump->um_mountp, &ext2_vnodeops, &vp)) != 0) { free(ip, M_EXT2NODE); return (error); } lockmgr(vp->v_vnlock, LK_EXCLUSIVE, NULL); vp->v_data = ip; ip->i_vnode = vp; ip->i_e2fs = fs = ump->um_e2fs; ip->i_ump = ump; ip->i_number = ino; ip->i_block_group = ino_to_cg(fs, ino); ip->i_next_alloc_block = 0; ip->i_next_alloc_goal = 0; error = insmntque(vp, ump->um_mountp); if (error) { free(ip, M_EXT2NODE); return (error); } error = vfs_hash_insert(vp, ino, LK_EXCLUSIVE, td, vpp, NULL, NULL); if (error || *vpp != NULL) { *vpp = NULL; free(ip, M_EXT2NODE); return (error); } if ((error = ext2_vinit(ump->um_mountp, &ext2_fifoops, &vp)) != 0) { vput(vp); *vpp = NULL; free(ip, M_EXT2NODE); return (error); } if (EXT2_HAS_INCOMPAT_FEATURE(fs, EXT2F_INCOMPAT_EXTENTS) && (S_ISREG(mode) || S_ISDIR(mode))) ext4_ext_tree_init(ip); else memset(ip->i_data, 0, sizeof(ip->i_data)); /* * Set up a new generation number for this inode. * Avoid zero values. */ do { ip->i_gen = arc4random(); } while (ip->i_gen == 0); vfs_timestamp(&ts); ip->i_birthtime = ts.tv_sec; ip->i_birthnsec = ts.tv_nsec; *vpp = vp; return (0); noinodes: EXT2_UNLOCK(ump); SDT_PROBE2(ext2fs, , alloc, trace, 1, "out of inodes"); return (ENOSPC); } /* * 64-bit compatible getters and setters for struct ext2_gd from ext2fs.h */ uint64_t e2fs_gd_get_b_bitmap(struct ext2_gd *gd) { return (((uint64_t)(gd->ext4bgd_b_bitmap_hi) << 32) | gd->ext2bgd_b_bitmap); } uint64_t e2fs_gd_get_i_bitmap(struct ext2_gd *gd) { return (((uint64_t)(gd->ext4bgd_i_bitmap_hi) << 32) | gd->ext2bgd_i_bitmap); } uint64_t e2fs_gd_get_i_tables(struct ext2_gd *gd) { return (((uint64_t)(gd->ext4bgd_i_tables_hi) << 32) | gd->ext2bgd_i_tables); } static uint32_t e2fs_gd_get_nbfree(struct ext2_gd *gd) { return (((uint32_t)(gd->ext4bgd_nbfree_hi) << 16) | gd->ext2bgd_nbfree); } static void e2fs_gd_set_nbfree(struct ext2_gd *gd, uint32_t val) { gd->ext2bgd_nbfree = val & 0xffff; gd->ext4bgd_nbfree_hi = val >> 16; } static uint32_t e2fs_gd_get_nifree(struct ext2_gd *gd) { return (((uint32_t)(gd->ext4bgd_nifree_hi) << 16) | gd->ext2bgd_nifree); } static void e2fs_gd_set_nifree(struct ext2_gd *gd, uint32_t val) { gd->ext2bgd_nifree = val & 0xffff; gd->ext4bgd_nifree_hi = val >> 16; } uint32_t e2fs_gd_get_ndirs(struct ext2_gd *gd) { return (((uint32_t)(gd->ext4bgd_ndirs_hi) << 16) | gd->ext2bgd_ndirs); } static void e2fs_gd_set_ndirs(struct ext2_gd *gd, uint32_t val) { gd->ext2bgd_ndirs = val & 0xffff; gd->ext4bgd_ndirs_hi = val >> 16; } static uint32_t e2fs_gd_get_i_unused(struct ext2_gd *gd) { return (((uint32_t)(gd->ext4bgd_i_unused_hi) << 16) | gd->ext4bgd_i_unused); } static void e2fs_gd_set_i_unused(struct ext2_gd *gd, uint32_t val) { gd->ext4bgd_i_unused = val & 0xffff; gd->ext4bgd_i_unused_hi = val >> 16; } /* * Find a cylinder to place a directory. * * The policy implemented by this algorithm is to allocate a * directory inode in the same cylinder group as its parent * directory, but also to reserve space for its files inodes * and data. Restrict the number of directories which may be * allocated one after another in the same cylinder group * without intervening allocation of files. * * If we allocate a first level directory then force allocation * in another cylinder group. * */ static u_long ext2_dirpref(struct inode *pip) { struct m_ext2fs *fs; int cg, prefcg, cgsize; uint64_t avgbfree, minbfree; u_int avgifree, avgndir, curdirsize; u_int minifree, maxndir; u_int mincg, minndir; u_int dirsize, maxcontigdirs; mtx_assert(EXT2_MTX(pip->i_ump), MA_OWNED); fs = pip->i_e2fs; avgifree = fs->e2fs->e2fs_ficount / fs->e2fs_gcount; avgbfree = fs->e2fs_fbcount / fs->e2fs_gcount; avgndir = fs->e2fs_total_dir / fs->e2fs_gcount; /* * Force allocation in another cg if creating a first level dir. */ ASSERT_VOP_LOCKED(ITOV(pip), "ext2fs_dirpref"); if (ITOV(pip)->v_vflag & VV_ROOT) { prefcg = arc4random() % fs->e2fs_gcount; mincg = prefcg; minndir = fs->e2fs_ipg; for (cg = prefcg; cg < fs->e2fs_gcount; cg++) if (e2fs_gd_get_ndirs(&fs->e2fs_gd[cg]) < minndir && e2fs_gd_get_nifree(&fs->e2fs_gd[cg]) >= avgifree && e2fs_gd_get_nbfree(&fs->e2fs_gd[cg]) >= avgbfree) { mincg = cg; minndir = e2fs_gd_get_ndirs(&fs->e2fs_gd[cg]); } for (cg = 0; cg < prefcg; cg++) if (e2fs_gd_get_ndirs(&fs->e2fs_gd[cg]) < minndir && e2fs_gd_get_nifree(&fs->e2fs_gd[cg]) >= avgifree && e2fs_gd_get_nbfree(&fs->e2fs_gd[cg]) >= avgbfree) { mincg = cg; minndir = e2fs_gd_get_ndirs(&fs->e2fs_gd[cg]); } return (mincg); } /* * Count various limits which used for * optimal allocation of a directory inode. */ maxndir = min(avgndir + fs->e2fs_ipg / 16, fs->e2fs_ipg); minifree = avgifree - avgifree / 4; if (minifree < 1) minifree = 1; minbfree = avgbfree - avgbfree / 4; if (minbfree < 1) minbfree = 1; cgsize = fs->e2fs_fsize * fs->e2fs_fpg; dirsize = AVGDIRSIZE; curdirsize = avgndir ? (cgsize - avgbfree * fs->e2fs_bsize) / avgndir : 0; if (dirsize < curdirsize) dirsize = curdirsize; maxcontigdirs = min((avgbfree * fs->e2fs_bsize) / dirsize, 255); maxcontigdirs = min(maxcontigdirs, fs->e2fs_ipg / AFPDIR); if (maxcontigdirs == 0) maxcontigdirs = 1; /* * Limit number of dirs in one cg and reserve space for * regular files, but only if we have no deficit in * inodes or space. */ prefcg = ino_to_cg(fs, pip->i_number); for (cg = prefcg; cg < fs->e2fs_gcount; cg++) if (e2fs_gd_get_ndirs(&fs->e2fs_gd[cg]) < maxndir && e2fs_gd_get_nifree(&fs->e2fs_gd[cg]) >= minifree && e2fs_gd_get_nbfree(&fs->e2fs_gd[cg]) >= minbfree) { if (fs->e2fs_contigdirs[cg] < maxcontigdirs) return (cg); } for (cg = 0; cg < prefcg; cg++) if (e2fs_gd_get_ndirs(&fs->e2fs_gd[cg]) < maxndir && e2fs_gd_get_nifree(&fs->e2fs_gd[cg]) >= minifree && e2fs_gd_get_nbfree(&fs->e2fs_gd[cg]) >= minbfree) { if (fs->e2fs_contigdirs[cg] < maxcontigdirs) return (cg); } /* * This is a backstop when we have deficit in space. */ for (cg = prefcg; cg < fs->e2fs_gcount; cg++) if (e2fs_gd_get_nifree(&fs->e2fs_gd[cg]) >= avgifree) return (cg); for (cg = 0; cg < prefcg; cg++) if (e2fs_gd_get_nifree(&fs->e2fs_gd[cg]) >= avgifree) break; return (cg); } /* * Select the desired position for the next block in a file. * * we try to mimic what Remy does in inode_getblk/block_getblk * * we note: blocknr == 0 means that we're about to allocate either * a direct block or a pointer block at the first level of indirection * (In other words, stuff that will go in i_db[] or i_ib[]) * * blocknr != 0 means that we're allocating a block that is none * of the above. Then, blocknr tells us the number of the block * that will hold the pointer */ e4fs_daddr_t ext2_blkpref(struct inode *ip, e2fs_lbn_t lbn, int indx, e2fs_daddr_t *bap, e2fs_daddr_t blocknr) { struct m_ext2fs *fs; int tmp; fs = ip->i_e2fs; mtx_assert(EXT2_MTX(ip->i_ump), MA_OWNED); /* * If the next block is actually what we thought it is, then set the * goal to what we thought it should be. */ if (ip->i_next_alloc_block == lbn && ip->i_next_alloc_goal != 0) return ip->i_next_alloc_goal; /* * Now check whether we were provided with an array that basically * tells us previous blocks to which we want to stay close. */ if (bap) for (tmp = indx - 1; tmp >= 0; tmp--) if (bap[tmp]) return bap[tmp]; /* * Else lets fall back to the blocknr or, if there is none, follow * the rule that a block should be allocated near its inode. */ return (blocknr ? blocknr : (e2fs_daddr_t)(ip->i_block_group * EXT2_BLOCKS_PER_GROUP(fs)) + fs->e2fs->e2fs_first_dblock); } /* * Implement the cylinder overflow algorithm. * * The policy implemented by this algorithm is: * 1) allocate the block in its requested cylinder group. * 2) quadradically rehash on the cylinder group number. * 3) brute force search for a free block. */ static e4fs_daddr_t ext2_hashalloc(struct inode *ip, int cg, long pref, int size, daddr_t (*allocator) (struct inode *, int, daddr_t, int)) { struct m_ext2fs *fs; e4fs_daddr_t result; int i, icg = cg; mtx_assert(EXT2_MTX(ip->i_ump), MA_OWNED); fs = ip->i_e2fs; /* * 1: preferred cylinder group */ result = (*allocator)(ip, cg, pref, size); if (result) return (result); /* * 2: quadratic rehash */ for (i = 1; i < fs->e2fs_gcount; i *= 2) { cg += i; if (cg >= fs->e2fs_gcount) cg -= fs->e2fs_gcount; result = (*allocator)(ip, cg, 0, size); if (result) return (result); } /* * 3: brute force search * Note that we start at i == 2, since 0 was checked initially, * and 1 is always checked in the quadratic rehash. */ cg = (icg + 2) % fs->e2fs_gcount; for (i = 2; i < fs->e2fs_gcount; i++) { result = (*allocator)(ip, cg, 0, size); if (result) return (result); cg++; if (cg == fs->e2fs_gcount) cg = 0; } return (0); } static uint64_t ext2_cg_number_gdb_nometa(struct m_ext2fs *fs, int cg) { if (!ext2_cg_has_sb(fs, cg)) return (0); if (EXT2_HAS_INCOMPAT_FEATURE(fs, EXT2F_INCOMPAT_META_BG)) return (fs->e2fs->e3fs_first_meta_bg); return ((fs->e2fs_gcount + EXT2_DESCS_PER_BLOCK(fs) - 1) / EXT2_DESCS_PER_BLOCK(fs)); } static uint64_t ext2_cg_number_gdb_meta(struct m_ext2fs *fs, int cg) { unsigned long metagroup; int first, last; metagroup = cg / EXT2_DESCS_PER_BLOCK(fs); first = metagroup * EXT2_DESCS_PER_BLOCK(fs); last = first + EXT2_DESCS_PER_BLOCK(fs) - 1; if (cg == first || cg == first + 1 || cg == last) return (1); return (0); } uint64_t ext2_cg_number_gdb(struct m_ext2fs *fs, int cg) { unsigned long first_meta_bg, metagroup; first_meta_bg = fs->e2fs->e3fs_first_meta_bg; metagroup = cg / EXT2_DESCS_PER_BLOCK(fs); if (!EXT2_HAS_INCOMPAT_FEATURE(fs, EXT2F_INCOMPAT_META_BG) || metagroup < first_meta_bg) return (ext2_cg_number_gdb_nometa(fs, cg)); return ext2_cg_number_gdb_meta(fs, cg); } static int ext2_number_base_meta_blocks(struct m_ext2fs *fs, int cg) { int number; number = ext2_cg_has_sb(fs, cg); if (!EXT2_HAS_INCOMPAT_FEATURE(fs, EXT2F_INCOMPAT_META_BG) || cg < fs->e2fs->e3fs_first_meta_bg * EXT2_DESCS_PER_BLOCK(fs)) { if (number) { number += ext2_cg_number_gdb(fs, cg); number += fs->e2fs->e2fs_reserved_ngdb; } } else { number += ext2_cg_number_gdb(fs, cg); } return (number); } static void ext2_mark_bitmap_end(int start_bit, int end_bit, char *bitmap) { int i; if (start_bit >= end_bit) return; for (i = start_bit; i < ((start_bit + 7) & ~7UL); i++) setbit(bitmap, i); if (i < end_bit) memset(bitmap + (i >> 3), 0xff, (end_bit - i) >> 3); } static int ext2_get_group_number(struct m_ext2fs *fs, e4fs_daddr_t block) { return ((block - fs->e2fs->e2fs_first_dblock) / fs->e2fs_bsize); } static int ext2_block_in_group(struct m_ext2fs *fs, e4fs_daddr_t block, int cg) { return ((ext2_get_group_number(fs, block) == cg) ? 1 : 0); } static int ext2_cg_block_bitmap_init(struct m_ext2fs *fs, int cg, struct buf *bp) { int bit, bit_max, inodes_per_block; uint64_t start, tmp; if (!(fs->e2fs_gd[cg].ext4bgd_flags & EXT2_BG_BLOCK_UNINIT)) return (0); memset(bp->b_data, 0, fs->e2fs_bsize); bit_max = ext2_number_base_meta_blocks(fs, cg); if ((bit_max >> 3) >= fs->e2fs_bsize) return (EINVAL); for (bit = 0; bit < bit_max; bit++) setbit(bp->b_data, bit); start = (uint64_t)cg * fs->e2fs->e2fs_bpg + fs->e2fs->e2fs_first_dblock; /* Set bits for block and inode bitmaps, and inode table. */ tmp = e2fs_gd_get_b_bitmap(&fs->e2fs_gd[cg]); if (!EXT2_HAS_INCOMPAT_FEATURE(fs, EXT2F_INCOMPAT_FLEX_BG) || ext2_block_in_group(fs, tmp, cg)) setbit(bp->b_data, tmp - start); tmp = e2fs_gd_get_i_bitmap(&fs->e2fs_gd[cg]); if (!EXT2_HAS_INCOMPAT_FEATURE(fs, EXT2F_INCOMPAT_FLEX_BG) || ext2_block_in_group(fs, tmp, cg)) setbit(bp->b_data, tmp - start); tmp = e2fs_gd_get_i_tables(&fs->e2fs_gd[cg]); inodes_per_block = fs->e2fs_bsize/EXT2_INODE_SIZE(fs); while( tmp < e2fs_gd_get_i_tables(&fs->e2fs_gd[cg]) + fs->e2fs->e2fs_ipg / inodes_per_block ) { if (!EXT2_HAS_INCOMPAT_FEATURE(fs, EXT2F_INCOMPAT_FLEX_BG) || ext2_block_in_group(fs, tmp, cg)) setbit(bp->b_data, tmp - start); tmp++; } /* * Also if the number of blocks within the group is less than * the blocksize * 8 ( which is the size of bitmap ), set rest * of the block bitmap to 1 */ ext2_mark_bitmap_end(fs->e2fs->e2fs_bpg, fs->e2fs_bsize * 8, bp->b_data); /* Clean the flag */ fs->e2fs_gd[cg].ext4bgd_flags &= ~EXT2_BG_BLOCK_UNINIT; return (0); } static int ext2_b_bitmap_validate(struct m_ext2fs *fs, struct buf *bp, int cg) { struct ext2_gd *gd; uint64_t group_first_block; unsigned int offset, max_bit; if (EXT2_HAS_INCOMPAT_FEATURE(fs, EXT2F_INCOMPAT_FLEX_BG)) { /* * It is not possible to check block bitmap in case of this feature, * because the inode and block bitmaps and inode table * blocks may not be in the group at all. * So, skip check in this case. */ return (0); } gd = &fs->e2fs_gd[cg]; max_bit = fs->e2fs_fpg; group_first_block = ((uint64_t)cg) * fs->e2fs->e2fs_fpg + fs->e2fs->e2fs_first_dblock; /* Check block bitmap block number */ offset = e2fs_gd_get_b_bitmap(gd) - group_first_block; if (offset >= max_bit || !isset(bp->b_data, offset)) { SDT_PROBE2(ext2fs, , alloc, ext2_b_bitmap_validate_error, "bad block bitmap, group", cg); return (EINVAL); } /* Check inode bitmap block number */ offset = e2fs_gd_get_i_bitmap(gd) - group_first_block; if (offset >= max_bit || !isset(bp->b_data, offset)) { SDT_PROBE2(ext2fs, , alloc, ext2_b_bitmap_validate_error, "bad inode bitmap", cg); return (EINVAL); } /* Check inode table */ offset = e2fs_gd_get_i_tables(gd) - group_first_block; if (offset >= max_bit || offset + fs->e2fs_itpg >= max_bit) { SDT_PROBE2(ext2fs, , alloc, ext2_b_bitmap_validate_error, "bad inode table, group", cg); return (EINVAL); } return (0); } /* * Determine whether a block can be allocated. * * Check to see if a block of the appropriate size is available, * and if it is, allocate it. */ static daddr_t ext2_alloccg(struct inode *ip, int cg, daddr_t bpref, int size) { struct m_ext2fs *fs; struct buf *bp; struct ext2mount *ump; daddr_t bno, runstart, runlen; int bit, loc, end, error, start; char *bbp; /* XXX ondisk32 */ fs = ip->i_e2fs; ump = ip->i_ump; if (e2fs_gd_get_nbfree(&fs->e2fs_gd[cg]) == 0) return (0); EXT2_UNLOCK(ump); error = bread(ip->i_devvp, fsbtodb(fs, e2fs_gd_get_b_bitmap(&fs->e2fs_gd[cg])), (int)fs->e2fs_bsize, NOCRED, &bp); if (error) goto fail; if (EXT2_HAS_RO_COMPAT_FEATURE(fs, EXT2F_ROCOMPAT_GDT_CSUM) || EXT2_HAS_RO_COMPAT_FEATURE(fs, EXT2F_ROCOMPAT_METADATA_CKSUM)) { error = ext2_cg_block_bitmap_init(fs, cg, bp); if (error) goto fail; ext2_gd_b_bitmap_csum_set(fs, cg, bp); } error = ext2_gd_b_bitmap_csum_verify(fs, cg, bp); if (error) goto fail; error = ext2_b_bitmap_validate(fs,bp, cg); if (error) goto fail; /* * Check, that another thread did not not allocate the last block in this * group while we were waiting for the buffer. */ if (e2fs_gd_get_nbfree(&fs->e2fs_gd[cg]) == 0) goto fail; bbp = (char *)bp->b_data; if (dtog(fs, bpref) != cg) bpref = 0; if (bpref != 0) { bpref = dtogd(fs, bpref); /* * if the requested block is available, use it */ if (isclr(bbp, bpref)) { bno = bpref; goto gotit; } } /* * no blocks in the requested cylinder, so take next * available one in this cylinder group. * first try to get 8 contigous blocks, then fall back to a single * block. */ if (bpref) start = dtogd(fs, bpref) / NBBY; else start = 0; end = howmany(fs->e2fs->e2fs_fpg, NBBY) - start; retry: runlen = 0; runstart = 0; for (loc = start; loc < end; loc++) { if (bbp[loc] == (char)0xff) { runlen = 0; continue; } /* Start of a run, find the number of high clear bits. */ if (runlen == 0) { bit = fls(bbp[loc]); runlen = NBBY - bit; runstart = loc * NBBY + bit; } else if (bbp[loc] == 0) { /* Continue a run. */ runlen += NBBY; } else { /* * Finish the current run. If it isn't long * enough, start a new one. */ bit = ffs(bbp[loc]) - 1; runlen += bit; if (runlen >= 8) { bno = runstart; goto gotit; } /* Run was too short, start a new one. */ bit = fls(bbp[loc]); runlen = NBBY - bit; runstart = loc * NBBY + bit; } /* If the current run is long enough, use it. */ if (runlen >= 8) { bno = runstart; goto gotit; } } if (start != 0) { end = start; start = 0; goto retry; } bno = ext2_mapsearch(fs, bbp, bpref); if (bno < 0) goto fail; gotit: #ifdef INVARIANTS if (isset(bbp, bno)) { printf("ext2fs_alloccgblk: cg=%d bno=%jd fs=%s\n", cg, (intmax_t)bno, fs->e2fs_fsmnt); panic("ext2fs_alloccg: dup alloc"); } #endif setbit(bbp, bno); EXT2_LOCK(ump); ext2_clusteracct(fs, bbp, cg, bno, -1); fs->e2fs_fbcount--; e2fs_gd_set_nbfree(&fs->e2fs_gd[cg], e2fs_gd_get_nbfree(&fs->e2fs_gd[cg]) - 1); fs->e2fs_fmod = 1; EXT2_UNLOCK(ump); ext2_gd_b_bitmap_csum_set(fs, cg, bp); bdwrite(bp); return (((uint64_t)cg) * fs->e2fs->e2fs_fpg + fs->e2fs->e2fs_first_dblock + bno); fail: brelse(bp); EXT2_LOCK(ump); return (0); } /* * Determine whether a cluster can be allocated. */ static daddr_t ext2_clusteralloc(struct inode *ip, int cg, daddr_t bpref, int len) { struct m_ext2fs *fs; struct ext2mount *ump; struct buf *bp; char *bbp; int bit, error, got, i, loc, run; int32_t *lp; daddr_t bno; fs = ip->i_e2fs; ump = ip->i_ump; if (fs->e2fs_maxcluster[cg] < len) return (0); EXT2_UNLOCK(ump); error = bread(ip->i_devvp, fsbtodb(fs, e2fs_gd_get_b_bitmap(&fs->e2fs_gd[cg])), (int)fs->e2fs_bsize, NOCRED, &bp); if (error) goto fail_lock; bbp = (char *)bp->b_data; EXT2_LOCK(ump); /* * Check to see if a cluster of the needed size (or bigger) is * available in this cylinder group. */ lp = &fs->e2fs_clustersum[cg].cs_sum[len]; for (i = len; i <= fs->e2fs_contigsumsize; i++) if (*lp++ > 0) break; if (i > fs->e2fs_contigsumsize) { /* * Update the cluster summary information to reflect * the true maximum-sized cluster so that future cluster * allocation requests can avoid reading the bitmap only * to find no cluster. */ lp = &fs->e2fs_clustersum[cg].cs_sum[len - 1]; for (i = len - 1; i > 0; i--) if (*lp-- > 0) break; fs->e2fs_maxcluster[cg] = i; goto fail; } EXT2_UNLOCK(ump); /* Search the bitmap to find a big enough cluster like in FFS. */ if (dtog(fs, bpref) != cg) bpref = 0; if (bpref != 0) bpref = dtogd(fs, bpref); loc = bpref / NBBY; bit = 1 << (bpref % NBBY); for (run = 0, got = bpref; got < fs->e2fs->e2fs_fpg; got++) { if ((bbp[loc] & bit) != 0) run = 0; else { run++; if (run == len) break; } if ((got & (NBBY - 1)) != (NBBY - 1)) bit <<= 1; else { loc++; bit = 1; } } if (got >= fs->e2fs->e2fs_fpg) goto fail_lock; /* Allocate the cluster that we found. */ for (i = 1; i < len; i++) if (!isclr(bbp, got - run + i)) panic("ext2_clusteralloc: map mismatch"); bno = got - run + 1; if (bno >= fs->e2fs->e2fs_fpg) panic("ext2_clusteralloc: allocated out of group"); EXT2_LOCK(ump); for (i = 0; i < len; i += fs->e2fs_fpb) { setbit(bbp, bno + i); ext2_clusteracct(fs, bbp, cg, bno + i, -1); fs->e2fs_fbcount--; e2fs_gd_set_nbfree(&fs->e2fs_gd[cg], e2fs_gd_get_nbfree(&fs->e2fs_gd[cg]) - 1); } fs->e2fs_fmod = 1; EXT2_UNLOCK(ump); bdwrite(bp); return (cg * fs->e2fs->e2fs_fpg + fs->e2fs->e2fs_first_dblock + bno); fail_lock: EXT2_LOCK(ump); fail: brelse(bp); return (0); } static int ext2_zero_inode_table(struct inode *ip, int cg) { struct m_ext2fs *fs; struct buf *bp; int i, all_blks, used_blks; fs = ip->i_e2fs; if (fs->e2fs_gd[cg].ext4bgd_flags & EXT2_BG_INODE_ZEROED) return (0); all_blks = fs->e2fs->e2fs_inode_size * fs->e2fs->e2fs_ipg / fs->e2fs_bsize; used_blks = howmany(fs->e2fs->e2fs_ipg - e2fs_gd_get_i_unused(&fs->e2fs_gd[cg]), fs->e2fs_bsize / EXT2_INODE_SIZE(fs)); for (i = 0; i < all_blks - used_blks; i++) { bp = getblk(ip->i_devvp, fsbtodb(fs, e2fs_gd_get_i_tables(&fs->e2fs_gd[cg]) + used_blks + i), fs->e2fs_bsize, 0, 0, 0); if (!bp) return (EIO); vfs_bio_bzero_buf(bp, 0, fs->e2fs_bsize); bawrite(bp); } fs->e2fs_gd[cg].ext4bgd_flags |= EXT2_BG_INODE_ZEROED; return (0); } /* * Determine whether an inode can be allocated. * * Check to see if an inode is available, and if it is, * allocate it using tode in the specified cylinder group. */ static daddr_t ext2_nodealloccg(struct inode *ip, int cg, daddr_t ipref, int mode) { struct m_ext2fs *fs; struct buf *bp; struct ext2mount *ump; int error, start, len, ifree; char *ibp, *loc; ipref--; /* to avoid a lot of (ipref -1) */ if (ipref == -1) ipref = 0; fs = ip->i_e2fs; ump = ip->i_ump; if (e2fs_gd_get_nifree(&fs->e2fs_gd[cg]) == 0) return (0); EXT2_UNLOCK(ump); error = bread(ip->i_devvp, fsbtodb(fs, e2fs_gd_get_i_bitmap(&fs->e2fs_gd[cg])), (int)fs->e2fs_bsize, NOCRED, &bp); if (error) { - brelse(bp); EXT2_LOCK(ump); return (0); } if (EXT2_HAS_RO_COMPAT_FEATURE(fs, EXT2F_ROCOMPAT_GDT_CSUM) || EXT2_HAS_RO_COMPAT_FEATURE(fs, EXT2F_ROCOMPAT_METADATA_CKSUM)) { if (fs->e2fs_gd[cg].ext4bgd_flags & EXT2_BG_INODE_UNINIT) { memset(bp->b_data, 0, fs->e2fs_bsize); fs->e2fs_gd[cg].ext4bgd_flags &= ~EXT2_BG_INODE_UNINIT; } ext2_gd_i_bitmap_csum_set(fs, cg, bp); error = ext2_zero_inode_table(ip, cg); if (error) { brelse(bp); EXT2_LOCK(ump); return (0); } } error = ext2_gd_i_bitmap_csum_verify(fs, cg, bp); if (error) { brelse(bp); EXT2_LOCK(ump); return (0); } if (e2fs_gd_get_nifree(&fs->e2fs_gd[cg]) == 0) { /* * Another thread allocated the last i-node in this * group while we were waiting for the buffer. */ brelse(bp); EXT2_LOCK(ump); return (0); } ibp = (char *)bp->b_data; if (ipref) { ipref %= fs->e2fs->e2fs_ipg; if (isclr(ibp, ipref)) goto gotit; } start = ipref / NBBY; len = howmany(fs->e2fs->e2fs_ipg - ipref, NBBY); loc = memcchr(&ibp[start], 0xff, len); if (loc == NULL) { len = start + 1; start = 0; loc = memcchr(&ibp[start], 0xff, len); if (loc == NULL) { SDT_PROBE3(ext2fs, , alloc, ext2_nodealloccg_bmap_corrupted, cg, ipref, fs->e2fs_fsmnt); brelse(bp); EXT2_LOCK(ump); return (0); } } ipref = (loc - ibp) * NBBY + ffs(~*loc) - 1; gotit: setbit(ibp, ipref); EXT2_LOCK(ump); e2fs_gd_set_nifree(&fs->e2fs_gd[cg], e2fs_gd_get_nifree(&fs->e2fs_gd[cg]) - 1); if (EXT2_HAS_RO_COMPAT_FEATURE(fs, EXT2F_ROCOMPAT_GDT_CSUM) || EXT2_HAS_RO_COMPAT_FEATURE(fs, EXT2F_ROCOMPAT_METADATA_CKSUM)) { ifree = fs->e2fs->e2fs_ipg - e2fs_gd_get_i_unused(&fs->e2fs_gd[cg]); if (ipref + 1 > ifree) e2fs_gd_set_i_unused(&fs->e2fs_gd[cg], fs->e2fs->e2fs_ipg - (ipref + 1)); } fs->e2fs->e2fs_ficount--; fs->e2fs_fmod = 1; if ((mode & IFMT) == IFDIR) { e2fs_gd_set_ndirs(&fs->e2fs_gd[cg], e2fs_gd_get_ndirs(&fs->e2fs_gd[cg]) + 1); fs->e2fs_total_dir++; } EXT2_UNLOCK(ump); ext2_gd_i_bitmap_csum_set(fs, cg, bp); bdwrite(bp); return ((uint64_t)cg * fs->e2fs_ipg + ipref + 1); } /* * Free a block or fragment. * */ void ext2_blkfree(struct inode *ip, e4fs_daddr_t bno, long size) { struct m_ext2fs *fs; struct buf *bp; struct ext2mount *ump; int cg, error; char *bbp; fs = ip->i_e2fs; ump = ip->i_ump; cg = dtog(fs, bno); if (bno >= fs->e2fs_bcount) { SDT_PROBE2(ext2fs, , alloc, ext2_blkfree_bad_block, ip->i_number, bno); return; } error = bread(ip->i_devvp, fsbtodb(fs, e2fs_gd_get_b_bitmap(&fs->e2fs_gd[cg])), (int)fs->e2fs_bsize, NOCRED, &bp); if (error) { - brelse(bp); return; } bbp = (char *)bp->b_data; bno = dtogd(fs, bno); if (isclr(bbp, bno)) { panic("ext2_blkfree: freeing free block %lld, fs=%s", (long long)bno, fs->e2fs_fsmnt); } clrbit(bbp, bno); EXT2_LOCK(ump); ext2_clusteracct(fs, bbp, cg, bno, 1); fs->e2fs_fbcount++; e2fs_gd_set_nbfree(&fs->e2fs_gd[cg], e2fs_gd_get_nbfree(&fs->e2fs_gd[cg]) + 1); fs->e2fs_fmod = 1; EXT2_UNLOCK(ump); ext2_gd_b_bitmap_csum_set(fs, cg, bp); bdwrite(bp); } /* * Free an inode. * */ int ext2_vfree(struct vnode *pvp, ino_t ino, int mode) { struct m_ext2fs *fs; struct inode *pip; struct buf *bp; struct ext2mount *ump; int error, cg; char *ibp; pip = VTOI(pvp); fs = pip->i_e2fs; ump = pip->i_ump; if ((u_int)ino > fs->e2fs_ipg * fs->e2fs_gcount) panic("ext2_vfree: range: devvp = %p, ino = %ju, fs = %s", pip->i_devvp, (uintmax_t)ino, fs->e2fs_fsmnt); cg = ino_to_cg(fs, ino); error = bread(pip->i_devvp, fsbtodb(fs, e2fs_gd_get_i_bitmap(&fs->e2fs_gd[cg])), (int)fs->e2fs_bsize, NOCRED, &bp); if (error) { - brelse(bp); return (0); } ibp = (char *)bp->b_data; ino = (ino - 1) % fs->e2fs->e2fs_ipg; if (isclr(ibp, ino)) { SDT_PROBE2(ext2fs, , alloc, ext2_vfree_doublefree, fs->e2fs_fsmnt, ino); if (fs->e2fs_ronly == 0) panic("ext2_vfree: freeing free inode"); } clrbit(ibp, ino); EXT2_LOCK(ump); fs->e2fs->e2fs_ficount++; e2fs_gd_set_nifree(&fs->e2fs_gd[cg], e2fs_gd_get_nifree(&fs->e2fs_gd[cg]) + 1); if ((mode & IFMT) == IFDIR) { e2fs_gd_set_ndirs(&fs->e2fs_gd[cg], e2fs_gd_get_ndirs(&fs->e2fs_gd[cg]) - 1); fs->e2fs_total_dir--; } fs->e2fs_fmod = 1; EXT2_UNLOCK(ump); ext2_gd_i_bitmap_csum_set(fs, cg, bp); bdwrite(bp); return (0); } /* * Find a block in the specified cylinder group. * * It is a panic if a request is made to find a block if none are * available. */ static daddr_t ext2_mapsearch(struct m_ext2fs *fs, char *bbp, daddr_t bpref) { char *loc; int start, len; /* * find the fragment by searching through the free block * map for an appropriate bit pattern */ if (bpref) start = dtogd(fs, bpref) / NBBY; else start = 0; len = howmany(fs->e2fs->e2fs_fpg, NBBY) - start; loc = memcchr(&bbp[start], 0xff, len); if (loc == NULL) { len = start + 1; start = 0; loc = memcchr(&bbp[start], 0xff, len); if (loc == NULL) { panic("ext2_mapsearch: map corrupted: start=%d, len=%d, fs=%s", start, len, fs->e2fs_fsmnt); /* NOTREACHED */ } } return ((loc - bbp) * NBBY + ffs(~*loc) - 1); } int ext2_cg_has_sb(struct m_ext2fs *fs, int cg) { int a3, a5, a7; if (cg == 0) return (1); if (EXT2_HAS_COMPAT_FEATURE(fs, EXT2F_COMPAT_SPARSESUPER2)) { if (cg == fs->e2fs->e4fs_backup_bgs[0] || cg == fs->e2fs->e4fs_backup_bgs[1]) return (1); return (0); } if ((cg <= 1) || !EXT2_HAS_RO_COMPAT_FEATURE(fs, EXT2F_ROCOMPAT_SPARSESUPER)) return (1); if (!(cg & 1)) return (0); for (a3 = 3, a5 = 5, a7 = 7; a3 <= cg || a5 <= cg || a7 <= cg; a3 *= 3, a5 *= 5, a7 *= 7) if (cg == a3 || cg == a5 || cg == a7) return (1); return (0); } Index: head/sys/fs/ext2fs/ext2_balloc.c =================================================================== --- head/sys/fs/ext2fs/ext2_balloc.c (revision 351925) +++ head/sys/fs/ext2fs/ext2_balloc.c (revision 351926) @@ -1,326 +1,323 @@ /*- * modified for Lites 1.1 * * Aug 1995, Godmar Back (gback@cs.utah.edu) * University of Utah, Department of Computer Science */ /*- * SPDX-License-Identifier: BSD-3-Clause * * Copyright (c) 1982, 1986, 1989, 1993 * The Regents of the University of California. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * @(#)ffs_balloc.c 8.4 (Berkeley) 9/23/93 * $FreeBSD$ */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include static int ext2_ext_balloc(struct inode *ip, uint32_t lbn, int size, struct ucred *cred, struct buf **bpp, int flags) { struct m_ext2fs *fs; struct buf *bp = NULL; struct vnode *vp = ITOV(ip); daddr_t newblk; int blks, error, allocated; fs = ip->i_e2fs; blks = howmany(size, fs->e2fs_bsize); error = ext4_ext_get_blocks(ip, lbn, blks, cred, NULL, &allocated, &newblk); if (error) return (error); if (allocated) { bp = getblk(vp, lbn, fs->e2fs_bsize, 0, 0, 0); if(!bp) return (EIO); } else { error = bread(vp, lbn, fs->e2fs_bsize, NOCRED, &bp); if (error) { - brelse(bp); return (error); } } bp->b_blkno = fsbtodb(fs, newblk); if (flags & BA_CLRBUF) vfs_bio_clrbuf(bp); *bpp = bp; return (error); } /* * Balloc defines the structure of filesystem storage * by allocating the physical blocks on a device given * the inode and the logical block number in a file. */ int ext2_balloc(struct inode *ip, e2fs_lbn_t lbn, int size, struct ucred *cred, struct buf **bpp, int flags) { struct m_ext2fs *fs; struct ext2mount *ump; struct buf *bp, *nbp; struct vnode *vp = ITOV(ip); struct indir indirs[EXT2_NIADDR + 2]; e4fs_daddr_t nb, newb; e2fs_daddr_t *bap, pref; int num, i, error; *bpp = NULL; if (lbn < 0) return (EFBIG); fs = ip->i_e2fs; ump = ip->i_ump; /* * check if this is a sequential block allocation. * If so, increment next_alloc fields to allow ext2_blkpref * to make a good guess */ if (lbn == ip->i_next_alloc_block + 1) { ip->i_next_alloc_block++; ip->i_next_alloc_goal++; } if (ip->i_flag & IN_E4EXTENTS) return (ext2_ext_balloc(ip, lbn, size, cred, bpp, flags)); /* * The first EXT2_NDADDR blocks are direct blocks */ if (lbn < EXT2_NDADDR) { nb = ip->i_db[lbn]; /* * no new block is to be allocated, and no need to expand * the file */ if (nb != 0) { error = bread(vp, lbn, fs->e2fs_bsize, NOCRED, &bp); if (error) { - brelse(bp); return (error); } bp->b_blkno = fsbtodb(fs, nb); if (ip->i_size >= (lbn + 1) * fs->e2fs_bsize) { *bpp = bp; return (0); } } else { EXT2_LOCK(ump); error = ext2_alloc(ip, lbn, ext2_blkpref(ip, lbn, (int)lbn, &ip->i_db[0], 0), fs->e2fs_bsize, cred, &newb); if (error) return (error); /* * If the newly allocated block exceeds 32-bit limit, * we can not use it in file block maps. */ if (newb > UINT_MAX) return (EFBIG); bp = getblk(vp, lbn, fs->e2fs_bsize, 0, 0, 0); bp->b_blkno = fsbtodb(fs, newb); if (flags & BA_CLRBUF) vfs_bio_clrbuf(bp); } ip->i_db[lbn] = dbtofsb(fs, bp->b_blkno); ip->i_flag |= IN_CHANGE | IN_UPDATE; *bpp = bp; return (0); } /* * Determine the number of levels of indirection. */ pref = 0; if ((error = ext2_getlbns(vp, lbn, indirs, &num)) != 0) return (error); #ifdef INVARIANTS if (num < 1) panic("ext2_balloc: ext2_getlbns returned indirect block"); #endif /* * Fetch the first indirect block allocating if necessary. */ --num; nb = ip->i_ib[indirs[0].in_off]; if (nb == 0) { EXT2_LOCK(ump); pref = ext2_blkpref(ip, lbn, indirs[0].in_off + EXT2_NDIR_BLOCKS, &ip->i_db[0], 0); if ((error = ext2_alloc(ip, lbn, pref, fs->e2fs_bsize, cred, &newb))) return (error); if (newb > UINT_MAX) return (EFBIG); nb = newb; bp = getblk(vp, indirs[1].in_lbn, fs->e2fs_bsize, 0, 0, 0); bp->b_blkno = fsbtodb(fs, newb); vfs_bio_clrbuf(bp); /* * Write synchronously so that indirect blocks * never point at garbage. */ if ((error = bwrite(bp)) != 0) { ext2_blkfree(ip, nb, fs->e2fs_bsize); return (error); } ip->i_ib[indirs[0].in_off] = newb; ip->i_flag |= IN_CHANGE | IN_UPDATE; } /* * Fetch through the indirect blocks, allocating as necessary. */ for (i = 1;;) { error = bread(vp, indirs[i].in_lbn, (int)fs->e2fs_bsize, NOCRED, &bp); if (error) { - brelse(bp); return (error); } bap = (e2fs_daddr_t *)bp->b_data; nb = bap[indirs[i].in_off]; if (i == num) break; i += 1; if (nb != 0) { bqrelse(bp); continue; } EXT2_LOCK(ump); if (pref == 0) pref = ext2_blkpref(ip, lbn, indirs[i].in_off, bap, bp->b_lblkno); error = ext2_alloc(ip, lbn, pref, (int)fs->e2fs_bsize, cred, &newb); if (error) { brelse(bp); return (error); } if (newb > UINT_MAX) return (EFBIG); nb = newb; nbp = getblk(vp, indirs[i].in_lbn, fs->e2fs_bsize, 0, 0, 0); nbp->b_blkno = fsbtodb(fs, nb); vfs_bio_clrbuf(nbp); /* * Write synchronously so that indirect blocks * never point at garbage. */ if ((error = bwrite(nbp)) != 0) { ext2_blkfree(ip, nb, fs->e2fs_bsize); brelse(bp); return (error); } bap[indirs[i - 1].in_off] = nb; /* * If required, write synchronously, otherwise use * delayed write. */ if (flags & IO_SYNC) { bwrite(bp); } else { if (bp->b_bufsize == fs->e2fs_bsize) bp->b_flags |= B_CLUSTEROK; bdwrite(bp); } } /* * Get the data block, allocating if necessary. */ if (nb == 0) { EXT2_LOCK(ump); pref = ext2_blkpref(ip, lbn, indirs[i].in_off, &bap[0], bp->b_lblkno); if ((error = ext2_alloc(ip, lbn, pref, (int)fs->e2fs_bsize, cred, &newb)) != 0) { brelse(bp); return (error); } if (newb > UINT_MAX) return (EFBIG); nb = newb; nbp = getblk(vp, lbn, fs->e2fs_bsize, 0, 0, 0); nbp->b_blkno = fsbtodb(fs, nb); if (flags & BA_CLRBUF) vfs_bio_clrbuf(nbp); bap[indirs[i].in_off] = nb; /* * If required, write synchronously, otherwise use * delayed write. */ if (flags & IO_SYNC) { bwrite(bp); } else { if (bp->b_bufsize == fs->e2fs_bsize) bp->b_flags |= B_CLUSTEROK; bdwrite(bp); } *bpp = nbp; return (0); } brelse(bp); if (flags & BA_CLRBUF) { int seqcount = (flags & BA_SEQMASK) >> BA_SEQSHIFT; if (seqcount && (vp->v_mount->mnt_flag & MNT_NOCLUSTERR) == 0) { error = cluster_read(vp, ip->i_size, lbn, (int)fs->e2fs_bsize, NOCRED, MAXBSIZE, seqcount, 0, &nbp); } else { error = bread(vp, lbn, (int)fs->e2fs_bsize, NOCRED, &nbp); } if (error) { brelse(nbp); return (error); } } else { nbp = getblk(vp, lbn, fs->e2fs_bsize, 0, 0, 0); nbp->b_blkno = fsbtodb(fs, nb); } *bpp = nbp; return (0); } Index: head/sys/fs/ext2fs/ext2_extattr.c =================================================================== --- head/sys/fs/ext2fs/ext2_extattr.c (revision 351925) +++ head/sys/fs/ext2fs/ext2_extattr.c (revision 351926) @@ -1,1255 +1,1250 @@ /*- * SPDX-License-Identifier: BSD-2-Clause-FreeBSD * * Copyright (c) 2017, Fedor Uporov * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * $FreeBSD$ */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include SDT_PROVIDER_DECLARE(ext2fs); /* * ext2fs trace probe: * arg0: verbosity. Higher numbers give more verbose messages * arg1: Textual message */ SDT_PROBE_DEFINE2(ext2fs, , trace, extattr, "int", "char*"); static int ext2_extattr_attrnamespace_to_bsd(int attrnamespace) { switch (attrnamespace) { case EXT4_XATTR_INDEX_SYSTEM: return (EXTATTR_NAMESPACE_SYSTEM); case EXT4_XATTR_INDEX_USER: return (EXTATTR_NAMESPACE_USER); case EXT4_XATTR_INDEX_POSIX_ACL_DEFAULT: return (POSIX1E_ACL_DEFAULT_EXTATTR_NAMESPACE); case EXT4_XATTR_INDEX_POSIX_ACL_ACCESS: return (POSIX1E_ACL_ACCESS_EXTATTR_NAMESPACE); } return (EXTATTR_NAMESPACE_EMPTY); } static const char * ext2_extattr_name_to_bsd(int attrnamespace, const char *name, int* name_len) { if (attrnamespace == EXT4_XATTR_INDEX_SYSTEM) return (name); else if (attrnamespace == EXT4_XATTR_INDEX_USER) return (name); else if (attrnamespace == EXT4_XATTR_INDEX_POSIX_ACL_DEFAULT) { *name_len = strlen(POSIX1E_ACL_DEFAULT_EXTATTR_NAME); return (POSIX1E_ACL_DEFAULT_EXTATTR_NAME); } else if (attrnamespace == EXT4_XATTR_INDEX_POSIX_ACL_ACCESS) { *name_len = strlen(POSIX1E_ACL_ACCESS_EXTATTR_NAME); return (POSIX1E_ACL_ACCESS_EXTATTR_NAME); } /* * XXX: Not all linux namespaces are mapped to bsd for now, * return NULL, which will be converted to ENOTSUP on upper layer. */ SDT_PROBE2(ext2fs, , trace, extattr, 1, "can not convert ext2fs name to bsd namespace"); return (NULL); } static int ext2_extattr_attrnamespace_to_linux(int attrnamespace, const char *name) { if (attrnamespace == POSIX1E_ACL_DEFAULT_EXTATTR_NAMESPACE && !strcmp(name, POSIX1E_ACL_DEFAULT_EXTATTR_NAME)) return (EXT4_XATTR_INDEX_POSIX_ACL_DEFAULT); if (attrnamespace == POSIX1E_ACL_ACCESS_EXTATTR_NAMESPACE && !strcmp(name, POSIX1E_ACL_ACCESS_EXTATTR_NAME)) return (EXT4_XATTR_INDEX_POSIX_ACL_ACCESS); switch (attrnamespace) { case EXTATTR_NAMESPACE_SYSTEM: return (EXT4_XATTR_INDEX_SYSTEM); case EXTATTR_NAMESPACE_USER: return (EXT4_XATTR_INDEX_USER); } /* * In this case namespace conversion should be unique, * so this point is unreachable. */ return (-1); } static const char * ext2_extattr_name_to_linux(int attrnamespace, const char *name) { if (attrnamespace == POSIX1E_ACL_DEFAULT_EXTATTR_NAMESPACE || attrnamespace == POSIX1E_ACL_ACCESS_EXTATTR_NAMESPACE) return (""); else return (name); } int ext2_extattr_valid_attrname(int attrnamespace, const char *attrname) { if (attrnamespace == EXTATTR_NAMESPACE_EMPTY) return (EINVAL); if (strlen(attrname) == 0) return (EINVAL); if (strlen(attrname) + 1 > EXT2_EXTATTR_NAMELEN_MAX) return (ENAMETOOLONG); return (0); } static int ext2_extattr_check(struct ext2fs_extattr_entry *entry, char *end) { struct ext2fs_extattr_entry *next; while (!EXT2_IS_LAST_ENTRY(entry)) { next = EXT2_EXTATTR_NEXT(entry); if ((char *)next >= end) return (EIO); entry = next; } return (0); } static int ext2_extattr_block_check(struct inode *ip, struct buf *bp) { struct ext2fs_extattr_header *header; int error; header = (struct ext2fs_extattr_header *)bp->b_data; error = ext2_extattr_check(EXT2_IFIRST(header), bp->b_data + bp->b_bufsize); if (error) return (error); return (ext2_extattr_blk_csum_verify(ip, bp)); } int ext2_extattr_inode_list(struct inode *ip, int attrnamespace, struct uio *uio, size_t *size) { struct m_ext2fs *fs; struct buf *bp; struct ext2fs_extattr_dinode_header *header; struct ext2fs_extattr_entry *entry; const char *attr_name; int name_len; int error; fs = ip->i_e2fs; if ((error = bread(ip->i_devvp, fsbtodb(fs, ino_to_fsba(fs, ip->i_number)), (int)fs->e2fs_bsize, NOCRED, &bp)) != 0) { brelse(bp); return (error); } struct ext2fs_dinode *dinode = (struct ext2fs_dinode *) ((char *)bp->b_data + EXT2_INODE_SIZE(fs) * ino_to_fsbo(fs, ip->i_number)); /* Check attributes magic value */ header = (struct ext2fs_extattr_dinode_header *)((char *)dinode + E2FS_REV0_INODE_SIZE + dinode->e2di_extra_isize); if (header->h_magic != EXTATTR_MAGIC) { brelse(bp); return (0); } error = ext2_extattr_check(EXT2_IFIRST(header), (char *)dinode + EXT2_INODE_SIZE(fs)); if (error) { brelse(bp); return (error); } for (entry = EXT2_IFIRST(header); !EXT2_IS_LAST_ENTRY(entry); entry = EXT2_EXTATTR_NEXT(entry)) { if (ext2_extattr_attrnamespace_to_bsd(entry->e_name_index) != attrnamespace) continue; name_len = entry->e_name_len; attr_name = ext2_extattr_name_to_bsd(entry->e_name_index, entry->e_name, &name_len); if (!attr_name) { brelse(bp); return (ENOTSUP); } if (size != NULL) *size += name_len + 1; if (uio != NULL) { char *name = malloc(name_len + 1, M_TEMP, M_WAITOK); name[0] = name_len; memcpy(&name[1], attr_name, name_len); error = uiomove(name, name_len + 1, uio); free(name, M_TEMP); if (error) break; } } brelse(bp); return (error); } int ext2_extattr_block_list(struct inode *ip, int attrnamespace, struct uio *uio, size_t *size) { struct m_ext2fs *fs; struct buf *bp; struct ext2fs_extattr_header *header; struct ext2fs_extattr_entry *entry; const char *attr_name; int name_len; int error; fs = ip->i_e2fs; error = bread(ip->i_devvp, fsbtodb(fs, ip->i_facl), fs->e2fs_bsize, NOCRED, &bp); if (error) { - brelse(bp); return (error); } /* Check attributes magic value */ header = EXT2_HDR(bp); if (header->h_magic != EXTATTR_MAGIC || header->h_blocks != 1) { brelse(bp); return (EINVAL); } error = ext2_extattr_block_check(ip, bp); if (error) { brelse(bp); return (error); } for (entry = EXT2_FIRST_ENTRY(bp); !EXT2_IS_LAST_ENTRY(entry); entry = EXT2_EXTATTR_NEXT(entry)) { if (ext2_extattr_attrnamespace_to_bsd(entry->e_name_index) != attrnamespace) continue; name_len = entry->e_name_len; attr_name = ext2_extattr_name_to_bsd(entry->e_name_index, entry->e_name, &name_len); if (!attr_name) { brelse(bp); return (ENOTSUP); } if (size != NULL) *size += name_len + 1; if (uio != NULL) { char *name = malloc(name_len + 1, M_TEMP, M_WAITOK); name[0] = name_len; memcpy(&name[1], attr_name, name_len); error = uiomove(name, name_len + 1, uio); free(name, M_TEMP); if (error) break; } } brelse(bp); return (error); } int ext2_extattr_inode_get(struct inode *ip, int attrnamespace, const char *name, struct uio *uio, size_t *size) { struct m_ext2fs *fs; struct buf *bp; struct ext2fs_extattr_dinode_header *header; struct ext2fs_extattr_entry *entry; const char *attr_name; int name_len; int error; fs = ip->i_e2fs; if ((error = bread(ip->i_devvp, fsbtodb(fs, ino_to_fsba(fs, ip->i_number)), (int)fs->e2fs_bsize, NOCRED, &bp)) != 0) { brelse(bp); return (error); } struct ext2fs_dinode *dinode = (struct ext2fs_dinode *) ((char *)bp->b_data + EXT2_INODE_SIZE(fs) * ino_to_fsbo(fs, ip->i_number)); /* Check attributes magic value */ header = (struct ext2fs_extattr_dinode_header *)((char *)dinode + E2FS_REV0_INODE_SIZE + dinode->e2di_extra_isize); if (header->h_magic != EXTATTR_MAGIC) { brelse(bp); return (ENOATTR); } error = ext2_extattr_check(EXT2_IFIRST(header), (char *)dinode + EXT2_INODE_SIZE(fs)); if (error) { brelse(bp); return (error); } for (entry = EXT2_IFIRST(header); !EXT2_IS_LAST_ENTRY(entry); entry = EXT2_EXTATTR_NEXT(entry)) { if (ext2_extattr_attrnamespace_to_bsd(entry->e_name_index) != attrnamespace) continue; name_len = entry->e_name_len; attr_name = ext2_extattr_name_to_bsd(entry->e_name_index, entry->e_name, &name_len); if (!attr_name) { brelse(bp); return (ENOTSUP); } if (strlen(name) == name_len && 0 == strncmp(attr_name, name, name_len)) { if (size != NULL) *size += entry->e_value_size; if (uio != NULL) error = uiomove(((char *)EXT2_IFIRST(header)) + entry->e_value_offs, entry->e_value_size, uio); brelse(bp); return (error); } } brelse(bp); return (ENOATTR); } int ext2_extattr_block_get(struct inode *ip, int attrnamespace, const char *name, struct uio *uio, size_t *size) { struct m_ext2fs *fs; struct buf *bp; struct ext2fs_extattr_header *header; struct ext2fs_extattr_entry *entry; const char *attr_name; int name_len; int error; fs = ip->i_e2fs; error = bread(ip->i_devvp, fsbtodb(fs, ip->i_facl), fs->e2fs_bsize, NOCRED, &bp); if (error) { - brelse(bp); return (error); } /* Check attributes magic value */ header = EXT2_HDR(bp); if (header->h_magic != EXTATTR_MAGIC || header->h_blocks != 1) { brelse(bp); return (EINVAL); } error = ext2_extattr_block_check(ip, bp); if (error) { brelse(bp); return (error); } for (entry = EXT2_FIRST_ENTRY(bp); !EXT2_IS_LAST_ENTRY(entry); entry = EXT2_EXTATTR_NEXT(entry)) { if (ext2_extattr_attrnamespace_to_bsd(entry->e_name_index) != attrnamespace) continue; name_len = entry->e_name_len; attr_name = ext2_extattr_name_to_bsd(entry->e_name_index, entry->e_name, &name_len); if (!attr_name) { brelse(bp); return (ENOTSUP); } if (strlen(name) == name_len && 0 == strncmp(attr_name, name, name_len)) { if (size != NULL) *size += entry->e_value_size; if (uio != NULL) error = uiomove(bp->b_data + entry->e_value_offs, entry->e_value_size, uio); brelse(bp); return (error); } } brelse(bp); return (ENOATTR); } static uint16_t ext2_extattr_delete_value(char *off, struct ext2fs_extattr_entry *first_entry, struct ext2fs_extattr_entry *entry, char *end) { uint16_t min_offs; struct ext2fs_extattr_entry *next; min_offs = end - off; next = first_entry; while (!EXT2_IS_LAST_ENTRY(next)) { if (min_offs > next->e_value_offs && next->e_value_offs > 0) min_offs = next->e_value_offs; next = EXT2_EXTATTR_NEXT(next); } if (entry->e_value_size == 0) return (min_offs); memmove(off + min_offs + EXT2_EXTATTR_SIZE(entry->e_value_size), off + min_offs, entry->e_value_offs - min_offs); /* Adjust all value offsets */ next = first_entry; while (!EXT2_IS_LAST_ENTRY(next)) { if (next->e_value_offs > 0 && next->e_value_offs < entry->e_value_offs) next->e_value_offs += EXT2_EXTATTR_SIZE(entry->e_value_size); next = EXT2_EXTATTR_NEXT(next); } min_offs += EXT2_EXTATTR_SIZE(entry->e_value_size); return (min_offs); } static void ext2_extattr_delete_entry(char *off, struct ext2fs_extattr_entry *first_entry, struct ext2fs_extattr_entry *entry, char *end) { char *pad; struct ext2fs_extattr_entry *next; /* Clean entry value */ ext2_extattr_delete_value(off, first_entry, entry, end); /* Clean the entry */ next = first_entry; while (!EXT2_IS_LAST_ENTRY(next)) next = EXT2_EXTATTR_NEXT(next); pad = (char*)next + sizeof(uint32_t); memmove(entry, (char *)entry + EXT2_EXTATTR_LEN(entry->e_name_len), pad - ((char *)entry + EXT2_EXTATTR_LEN(entry->e_name_len))); } int ext2_extattr_inode_delete(struct inode *ip, int attrnamespace, const char *name) { struct m_ext2fs *fs; struct buf *bp; struct ext2fs_extattr_dinode_header *header; struct ext2fs_extattr_entry *entry; const char *attr_name; int name_len; int error; fs = ip->i_e2fs; if ((error = bread(ip->i_devvp, fsbtodb(fs, ino_to_fsba(fs, ip->i_number)), (int)fs->e2fs_bsize, NOCRED, &bp)) != 0) { brelse(bp); return (error); } struct ext2fs_dinode *dinode = (struct ext2fs_dinode *) ((char *)bp->b_data + EXT2_INODE_SIZE(fs) * ino_to_fsbo(fs, ip->i_number)); /* Check attributes magic value */ header = (struct ext2fs_extattr_dinode_header *)((char *)dinode + E2FS_REV0_INODE_SIZE + dinode->e2di_extra_isize); if (header->h_magic != EXTATTR_MAGIC) { brelse(bp); return (ENOATTR); } error = ext2_extattr_check(EXT2_IFIRST(header), (char *)dinode + EXT2_INODE_SIZE(fs)); if (error) { brelse(bp); return (error); } /* If I am last entry, just make magic zero */ entry = EXT2_IFIRST(header); if ((EXT2_IS_LAST_ENTRY(EXT2_EXTATTR_NEXT(entry))) && (ext2_extattr_attrnamespace_to_bsd(entry->e_name_index) == attrnamespace)) { name_len = entry->e_name_len; attr_name = ext2_extattr_name_to_bsd(entry->e_name_index, entry->e_name, &name_len); if (!attr_name) { brelse(bp); return (ENOTSUP); } if (strlen(name) == name_len && 0 == strncmp(attr_name, name, name_len)) { memset(header, 0, sizeof(struct ext2fs_extattr_dinode_header)); return (bwrite(bp)); } } for (entry = EXT2_IFIRST(header); !EXT2_IS_LAST_ENTRY(entry); entry = EXT2_EXTATTR_NEXT(entry)) { if (ext2_extattr_attrnamespace_to_bsd(entry->e_name_index) != attrnamespace) continue; name_len = entry->e_name_len; attr_name = ext2_extattr_name_to_bsd(entry->e_name_index, entry->e_name, &name_len); if (!attr_name) { brelse(bp); return (ENOTSUP); } if (strlen(name) == name_len && 0 == strncmp(attr_name, name, name_len)) { ext2_extattr_delete_entry((char *)EXT2_IFIRST(header), EXT2_IFIRST(header), entry, (char *)dinode + EXT2_INODE_SIZE(fs)); return (bwrite(bp)); } } brelse(bp); return (ENOATTR); } static int ext2_extattr_block_clone(struct inode *ip, struct buf **bpp) { struct m_ext2fs *fs; struct buf *sbp; struct buf *cbp; struct ext2fs_extattr_header *header; uint64_t facl; fs = ip->i_e2fs; sbp = *bpp; header = EXT2_HDR(sbp); if (header->h_magic != EXTATTR_MAGIC || header->h_refcount == 1) return (EINVAL); facl = ext2_alloc_meta(ip); if (!facl) return (ENOSPC); cbp = getblk(ip->i_devvp, fsbtodb(fs, facl), fs->e2fs_bsize, 0, 0, 0); if (!cbp) { ext2_blkfree(ip, facl, fs->e2fs_bsize); return (EIO); } memcpy(cbp->b_data, sbp->b_data, fs->e2fs_bsize); header->h_refcount--; bwrite(sbp); ip->i_facl = facl; ext2_update(ip->i_vnode, 1); header = EXT2_HDR(cbp); header->h_refcount = 1; *bpp = cbp; return (0); } int ext2_extattr_block_delete(struct inode *ip, int attrnamespace, const char *name) { struct m_ext2fs *fs; struct buf *bp; struct ext2fs_extattr_header *header; struct ext2fs_extattr_entry *entry; const char *attr_name; int name_len; int error; fs = ip->i_e2fs; error = bread(ip->i_devvp, fsbtodb(fs, ip->i_facl), fs->e2fs_bsize, NOCRED, &bp); if (error) { - brelse(bp); return (error); } /* Check attributes magic value */ header = EXT2_HDR(bp); if (header->h_magic != EXTATTR_MAGIC || header->h_blocks != 1) { brelse(bp); return (EINVAL); } error = ext2_extattr_block_check(ip, bp); if (error) { brelse(bp); return (error); } if (header->h_refcount > 1) { error = ext2_extattr_block_clone(ip, &bp); if (error) { brelse(bp); return (error); } } /* If I am last entry, clean me and free the block */ entry = EXT2_FIRST_ENTRY(bp); if (EXT2_IS_LAST_ENTRY(EXT2_EXTATTR_NEXT(entry)) && (ext2_extattr_attrnamespace_to_bsd(entry->e_name_index) == attrnamespace)) { name_len = entry->e_name_len; attr_name = ext2_extattr_name_to_bsd(entry->e_name_index, entry->e_name, &name_len); if (!attr_name) { brelse(bp); return (ENOTSUP); } if (strlen(name) == name_len && 0 == strncmp(attr_name, name, name_len)) { ip->i_blocks -= btodb(fs->e2fs_bsize); ext2_blkfree(ip, ip->i_facl, fs->e2fs_bsize); ip->i_facl = 0; error = ext2_update(ip->i_vnode, 1); brelse(bp); return (error); } } for (entry = EXT2_FIRST_ENTRY(bp); !EXT2_IS_LAST_ENTRY(entry); entry = EXT2_EXTATTR_NEXT(entry)) { if (ext2_extattr_attrnamespace_to_bsd(entry->e_name_index) != attrnamespace) continue; name_len = entry->e_name_len; attr_name = ext2_extattr_name_to_bsd(entry->e_name_index, entry->e_name, &name_len); if (!attr_name) { brelse(bp); return (ENOTSUP); } if (strlen(name) == name_len && 0 == strncmp(attr_name, name, name_len)) { ext2_extattr_delete_entry(bp->b_data, EXT2_FIRST_ENTRY(bp), entry, bp->b_data + bp->b_bufsize); return (bwrite(bp)); } } brelse(bp); return (ENOATTR); } static struct ext2fs_extattr_entry * allocate_entry(const char *name, int attrnamespace, uint16_t offs, uint32_t size, uint32_t hash) { const char *attr_name; int name_len; struct ext2fs_extattr_entry *entry; attr_name = ext2_extattr_name_to_linux(attrnamespace, name); name_len = strlen(attr_name); entry = malloc(sizeof(struct ext2fs_extattr_entry) + name_len, M_TEMP, M_WAITOK); entry->e_name_len = name_len; entry->e_name_index = ext2_extattr_attrnamespace_to_linux(attrnamespace, name); entry->e_value_offs = offs; entry->e_value_block = 0; entry->e_value_size = size; entry->e_hash = hash; memcpy(entry->e_name, name, name_len); return (entry); } static void free_entry(struct ext2fs_extattr_entry *entry) { free(entry, M_TEMP); } static int ext2_extattr_get_size(struct ext2fs_extattr_entry *first_entry, struct ext2fs_extattr_entry *exist_entry, int header_size, int name_len, int new_size) { struct ext2fs_extattr_entry *entry; int size; size = header_size; size += sizeof(uint32_t); if (NULL == exist_entry) { size += EXT2_EXTATTR_LEN(name_len); size += EXT2_EXTATTR_SIZE(new_size); } if (first_entry) for (entry = first_entry; !EXT2_IS_LAST_ENTRY(entry); entry = EXT2_EXTATTR_NEXT(entry)) { if (entry != exist_entry) size += EXT2_EXTATTR_LEN(entry->e_name_len) + EXT2_EXTATTR_SIZE(entry->e_value_size); else size += EXT2_EXTATTR_LEN(entry->e_name_len) + EXT2_EXTATTR_SIZE(new_size); } return (size); } static void ext2_extattr_set_exist_entry(char *off, struct ext2fs_extattr_entry *first_entry, struct ext2fs_extattr_entry *entry, char *end, struct uio *uio) { uint16_t min_offs; min_offs = ext2_extattr_delete_value(off, first_entry, entry, end); entry->e_value_size = uio->uio_resid; if (entry->e_value_size) entry->e_value_offs = min_offs - EXT2_EXTATTR_SIZE(uio->uio_resid); else entry->e_value_offs = 0; uiomove(off + entry->e_value_offs, entry->e_value_size, uio); } static struct ext2fs_extattr_entry * ext2_extattr_set_new_entry(char *off, struct ext2fs_extattr_entry *first_entry, const char *name, int attrnamespace, char *end, struct uio *uio) { int name_len; char *pad; uint16_t min_offs; struct ext2fs_extattr_entry *entry; struct ext2fs_extattr_entry *new_entry; /* Find pad's */ min_offs = end - off; entry = first_entry; while (!EXT2_IS_LAST_ENTRY(entry)) { if (min_offs > entry->e_value_offs && entry->e_value_offs > 0) min_offs = entry->e_value_offs; entry = EXT2_EXTATTR_NEXT(entry); } pad = (char*)entry + sizeof(uint32_t); /* Find entry insert position */ name_len = strlen(name); entry = first_entry; while (!EXT2_IS_LAST_ENTRY(entry)) { if (!(attrnamespace - entry->e_name_index) && !(name_len - entry->e_name_len)) if (memcmp(name, entry->e_name, name_len) <= 0) break; entry = EXT2_EXTATTR_NEXT(entry); } /* Create new entry and insert it */ new_entry = allocate_entry(name, attrnamespace, 0, uio->uio_resid, 0); memmove((char *)entry + EXT2_EXTATTR_LEN(new_entry->e_name_len), entry, pad - (char*)entry); memcpy(entry, new_entry, EXT2_EXTATTR_LEN(new_entry->e_name_len)); free_entry(new_entry); new_entry = entry; if (new_entry->e_value_size > 0) new_entry->e_value_offs = min_offs - EXT2_EXTATTR_SIZE(new_entry->e_value_size); uiomove(off + new_entry->e_value_offs, new_entry->e_value_size, uio); return (new_entry); } int ext2_extattr_inode_set(struct inode *ip, int attrnamespace, const char *name, struct uio *uio) { struct m_ext2fs *fs; struct buf *bp; struct ext2fs_extattr_dinode_header *header; struct ext2fs_extattr_entry *entry; const char *attr_name; int name_len; size_t size = 0, max_size; int error; fs = ip->i_e2fs; if ((error = bread(ip->i_devvp, fsbtodb(fs, ino_to_fsba(fs, ip->i_number)), (int)fs->e2fs_bsize, NOCRED, &bp)) != 0) { brelse(bp); return (error); } struct ext2fs_dinode *dinode = (struct ext2fs_dinode *) ((char *)bp->b_data + EXT2_INODE_SIZE(fs) * ino_to_fsbo(fs, ip->i_number)); /* Check attributes magic value */ header = (struct ext2fs_extattr_dinode_header *)((char *)dinode + E2FS_REV0_INODE_SIZE + dinode->e2di_extra_isize); if (header->h_magic != EXTATTR_MAGIC) { brelse(bp); return (ENOSPC); } error = ext2_extattr_check(EXT2_IFIRST(header), (char *)dinode + EXT2_INODE_SIZE(fs)); if (error) { brelse(bp); return (error); } /* Find if entry exist */ for (entry = EXT2_IFIRST(header); !EXT2_IS_LAST_ENTRY(entry); entry = EXT2_EXTATTR_NEXT(entry)) { if (ext2_extattr_attrnamespace_to_bsd(entry->e_name_index) != attrnamespace) continue; name_len = entry->e_name_len; attr_name = ext2_extattr_name_to_bsd(entry->e_name_index, entry->e_name, &name_len); if (!attr_name) { brelse(bp); return (ENOTSUP); } if (strlen(name) == name_len && 0 == strncmp(attr_name, name, name_len)) break; } max_size = EXT2_INODE_SIZE(fs) - E2FS_REV0_INODE_SIZE - dinode->e2di_extra_isize; if (!EXT2_IS_LAST_ENTRY(entry)) { size = ext2_extattr_get_size(EXT2_IFIRST(header), entry, sizeof(struct ext2fs_extattr_dinode_header), entry->e_name_len, uio->uio_resid); if (size > max_size) { brelse(bp); return (ENOSPC); } ext2_extattr_set_exist_entry((char *)EXT2_IFIRST(header), EXT2_IFIRST(header), entry, (char *)header + max_size, uio); } else { /* Ensure that the same entry does not exist in the block */ if (ip->i_facl) { error = ext2_extattr_block_get(ip, attrnamespace, name, NULL, &size); if (error != ENOATTR || size > 0) { brelse(bp); if (size > 0) error = ENOSPC; return (error); } } size = ext2_extattr_get_size(EXT2_IFIRST(header), NULL, sizeof(struct ext2fs_extattr_dinode_header), entry->e_name_len, uio->uio_resid); if (size > max_size) { brelse(bp); return (ENOSPC); } ext2_extattr_set_new_entry((char *)EXT2_IFIRST(header), EXT2_IFIRST(header), name, attrnamespace, (char *)header + max_size, uio); } return (bwrite(bp)); } static void ext2_extattr_hash_entry(struct ext2fs_extattr_header *header, struct ext2fs_extattr_entry *entry) { uint32_t hash = 0; char *name = entry->e_name; int n; for (n=0; n < entry->e_name_len; n++) { hash = (hash << EXT2_EXTATTR_NAME_HASH_SHIFT) ^ (hash >> (8*sizeof(hash) - EXT2_EXTATTR_NAME_HASH_SHIFT)) ^ (*name++); } if (entry->e_value_block == 0 && entry->e_value_size != 0) { uint32_t *value = (uint32_t *)((char *)header + entry->e_value_offs); for (n = (entry->e_value_size + EXT2_EXTATTR_ROUND) >> EXT2_EXTATTR_PAD_BITS; n; n--) { hash = (hash << EXT2_EXTATTR_VALUE_HASH_SHIFT) ^ (hash >> (8*sizeof(hash) - EXT2_EXTATTR_VALUE_HASH_SHIFT)) ^ (*value++); } } entry->e_hash = hash; } static void ext2_extattr_rehash(struct ext2fs_extattr_header *header, struct ext2fs_extattr_entry *entry) { struct ext2fs_extattr_entry *here; uint32_t hash = 0; ext2_extattr_hash_entry(header, entry); here = EXT2_ENTRY(header+1); while (!EXT2_IS_LAST_ENTRY(here)) { if (!here->e_hash) { /* Block is not shared if an entry's hash value == 0 */ hash = 0; break; } hash = (hash << EXT2_EXTATTR_BLOCK_HASH_SHIFT) ^ (hash >> (8*sizeof(hash) - EXT2_EXTATTR_BLOCK_HASH_SHIFT)) ^ here->e_hash; here = EXT2_EXTATTR_NEXT(here); } header->h_hash = hash; } int ext2_extattr_block_set(struct inode *ip, int attrnamespace, const char *name, struct uio *uio) { struct m_ext2fs *fs; struct buf *bp; struct ext2fs_extattr_header *header; struct ext2fs_extattr_entry *entry; const char *attr_name; int name_len; size_t size; int error; fs = ip->i_e2fs; if (ip->i_facl) { error = bread(ip->i_devvp, fsbtodb(fs, ip->i_facl), fs->e2fs_bsize, NOCRED, &bp); if (error) { - brelse(bp); return (error); } /* Check attributes magic value */ header = EXT2_HDR(bp); if (header->h_magic != EXTATTR_MAGIC || header->h_blocks != 1) { brelse(bp); return (EINVAL); } error = ext2_extattr_block_check(ip, bp); if (error) { brelse(bp); return (error); } if (header->h_refcount > 1) { error = ext2_extattr_block_clone(ip, &bp); if (error) { brelse(bp); return (error); } header = EXT2_HDR(bp); } /* Find if entry exist */ for (entry = EXT2_FIRST_ENTRY(bp); !EXT2_IS_LAST_ENTRY(entry); entry = EXT2_EXTATTR_NEXT(entry)) { if (ext2_extattr_attrnamespace_to_bsd(entry->e_name_index) != attrnamespace) continue; name_len = entry->e_name_len; attr_name = ext2_extattr_name_to_bsd(entry->e_name_index, entry->e_name, &name_len); if (!attr_name) { brelse(bp); return (ENOTSUP); } if (strlen(name) == name_len && 0 == strncmp(attr_name, name, name_len)) break; } if (!EXT2_IS_LAST_ENTRY(entry)) { size = ext2_extattr_get_size(EXT2_FIRST_ENTRY(bp), entry, sizeof(struct ext2fs_extattr_header), entry->e_name_len, uio->uio_resid); if (size > bp->b_bufsize) { brelse(bp); return (ENOSPC); } ext2_extattr_set_exist_entry(bp->b_data, EXT2_FIRST_ENTRY(bp), entry, bp->b_data + bp->b_bufsize, uio); } else { size = ext2_extattr_get_size(EXT2_FIRST_ENTRY(bp), NULL, sizeof(struct ext2fs_extattr_header), strlen(name), uio->uio_resid); if (size > bp->b_bufsize) { brelse(bp); return (ENOSPC); } entry = ext2_extattr_set_new_entry(bp->b_data, EXT2_FIRST_ENTRY(bp), name, attrnamespace, bp->b_data + bp->b_bufsize, uio); /* Clean the same entry in the inode */ error = ext2_extattr_inode_delete(ip, attrnamespace, name); if (error && error != ENOATTR) { brelse(bp); return (error); } } ext2_extattr_rehash(header, entry); ext2_extattr_blk_csum_set(ip, bp); return (bwrite(bp)); } size = ext2_extattr_get_size(NULL, NULL, sizeof(struct ext2fs_extattr_header), strlen(ext2_extattr_name_to_linux(attrnamespace, name)), uio->uio_resid); if (size > fs->e2fs_bsize) return (ENOSPC); /* Allocate block, fill EA header and insert entry */ ip->i_facl = ext2_alloc_meta(ip); if (0 == ip->i_facl) return (ENOSPC); ip->i_blocks += btodb(fs->e2fs_bsize); ext2_update(ip->i_vnode, 1); bp = getblk(ip->i_devvp, fsbtodb(fs, ip->i_facl), fs->e2fs_bsize, 0, 0, 0); if (!bp) { ext2_blkfree(ip, ip->i_facl, fs->e2fs_bsize); ip->i_blocks -= btodb(fs->e2fs_bsize); ip->i_facl = 0; ext2_update(ip->i_vnode, 1); return (EIO); } header = EXT2_HDR(bp); header->h_magic = EXTATTR_MAGIC; header->h_refcount = 1; header->h_blocks = 1; header->h_hash = 0; memset(header->h_reserved, 0, sizeof(header->h_reserved)); memcpy(bp->b_data, header, sizeof(struct ext2fs_extattr_header)); memset(EXT2_FIRST_ENTRY(bp), 0, sizeof(uint32_t)); entry = ext2_extattr_set_new_entry(bp->b_data, EXT2_FIRST_ENTRY(bp), name, attrnamespace, bp->b_data + bp->b_bufsize, uio); /* Clean the same entry in the inode */ error = ext2_extattr_inode_delete(ip, attrnamespace, name); if (error && error != ENOATTR) { brelse(bp); return (error); } ext2_extattr_rehash(header, entry); ext2_extattr_blk_csum_set(ip, bp); return (bwrite(bp)); } int ext2_extattr_free(struct inode *ip) { struct m_ext2fs *fs; struct buf *bp; struct ext2fs_extattr_header *header; int error; fs = ip->i_e2fs; if (!ip->i_facl) return (0); error = bread(ip->i_devvp, fsbtodb(fs, ip->i_facl), fs->e2fs_bsize, NOCRED, &bp); if (error) { - brelse(bp); return (error); } /* Check attributes magic value */ header = EXT2_HDR(bp); if (header->h_magic != EXTATTR_MAGIC || header->h_blocks != 1) { brelse(bp); return (EINVAL); } error = ext2_extattr_check(EXT2_FIRST_ENTRY(bp), bp->b_data + bp->b_bufsize); if (error) { brelse(bp); return (error); } if (header->h_refcount > 1) { header->h_refcount--; bwrite(bp); } else { ext2_blkfree(ip, ip->i_facl, ip->i_e2fs->e2fs_bsize); brelse(bp); } ip->i_blocks -= btodb(ip->i_e2fs->e2fs_bsize); ip->i_facl = 0; ext2_update(ip->i_vnode, 1); return (0); } Index: head/sys/fs/ext2fs/ext2_extents.c =================================================================== --- head/sys/fs/ext2fs/ext2_extents.c (revision 351925) +++ head/sys/fs/ext2fs/ext2_extents.c (revision 351926) @@ -1,1584 +1,1581 @@ /*- * SPDX-License-Identifier: BSD-2-Clause-FreeBSD * * Copyright (c) 2010 Zheng Liu * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * $FreeBSD$ */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include SDT_PROVIDER_DECLARE(ext2fs); /* * ext2fs trace probe: * arg0: verbosity. Higher numbers give more verbose messages * arg1: Textual message */ SDT_PROBE_DEFINE2(ext2fs, , trace, extents, "int", "char*"); static MALLOC_DEFINE(M_EXT2EXTENTS, "ext2_extents", "EXT2 extents"); #ifdef EXT2FS_PRINT_EXTENTS static void ext4_ext_print_extent(struct ext4_extent *ep) { printf(" ext %p => (blk %u len %u start %ju)\n", ep, ep->e_blk, ep->e_len, (uint64_t)ep->e_start_hi << 32 | ep->e_start_lo); } static void ext4_ext_print_header(struct inode *ip, struct ext4_extent_header *ehp); static void ext4_ext_print_index(struct inode *ip, struct ext4_extent_index *ex, int do_walk) { struct m_ext2fs *fs; struct buf *bp; int error; fs = ip->i_e2fs; printf(" index %p => (blk %u pblk %ju)\n", ex, ex->ei_blk, (uint64_t)ex->ei_leaf_hi << 32 | ex->ei_leaf_lo); if(!do_walk) return; if ((error = bread(ip->i_devvp, fsbtodb(fs, ((uint64_t)ex->ei_leaf_hi << 32 | ex->ei_leaf_lo)), (int)fs->e2fs_bsize, NOCRED, &bp)) != 0) { brelse(bp); return; } ext4_ext_print_header(ip, (struct ext4_extent_header *)bp->b_data); brelse(bp); } static void ext4_ext_print_header(struct inode *ip, struct ext4_extent_header *ehp) { int i; printf("header %p => (magic 0x%x entries %d max %d depth %d gen %d)\n", ehp, ehp->eh_magic, ehp->eh_ecount, ehp->eh_max, ehp->eh_depth, ehp->eh_gen); for (i = 0; i < ehp->eh_ecount; i++) if (ehp->eh_depth != 0) ext4_ext_print_index(ip, (struct ext4_extent_index *)(ehp + 1 + i), 1); else ext4_ext_print_extent((struct ext4_extent *)(ehp + 1 + i)); } static void ext4_ext_print_path(struct inode *ip, struct ext4_extent_path *path) { int k, l; l = path->ep_depth; printf("ip=%ju, Path:\n", ip->i_number); for (k = 0; k <= l; k++, path++) { if (path->ep_index) { ext4_ext_print_index(ip, path->ep_index, 0); } else if (path->ep_ext) { ext4_ext_print_extent(path->ep_ext); } } } void ext4_ext_print_extent_tree_status(struct inode *ip) { struct ext4_extent_header *ehp; ehp = (struct ext4_extent_header *)(char *)ip->i_db; printf("Extent status:ip=%ju\n", ip->i_number); if (!(ip->i_flag & IN_E4EXTENTS)) return; ext4_ext_print_header(ip, ehp); return; } #endif static inline struct ext4_extent_header * ext4_ext_inode_header(struct inode *ip) { return ((struct ext4_extent_header *)ip->i_db); } static inline struct ext4_extent_header * ext4_ext_block_header(char *bdata) { return ((struct ext4_extent_header *)bdata); } static inline unsigned short ext4_ext_inode_depth(struct inode *ip) { struct ext4_extent_header *ehp; ehp = (struct ext4_extent_header *)ip->i_data; return (ehp->eh_depth); } static inline e4fs_daddr_t ext4_ext_index_pblock(struct ext4_extent_index *index) { e4fs_daddr_t blk; blk = index->ei_leaf_lo; blk |= (e4fs_daddr_t)index->ei_leaf_hi << 32; return (blk); } static inline void ext4_index_store_pblock(struct ext4_extent_index *index, e4fs_daddr_t pb) { index->ei_leaf_lo = pb & 0xffffffff; index->ei_leaf_hi = (pb >> 32) & 0xffff; } static inline e4fs_daddr_t ext4_ext_extent_pblock(struct ext4_extent *extent) { e4fs_daddr_t blk; blk = extent->e_start_lo; blk |= (e4fs_daddr_t)extent->e_start_hi << 32; return (blk); } static inline void ext4_ext_store_pblock(struct ext4_extent *ex, e4fs_daddr_t pb) { ex->e_start_lo = pb & 0xffffffff; ex->e_start_hi = (pb >> 32) & 0xffff; } int ext4_ext_in_cache(struct inode *ip, daddr_t lbn, struct ext4_extent *ep) { struct ext4_extent_cache *ecp; int ret = EXT4_EXT_CACHE_NO; ecp = &ip->i_ext_cache; if (ecp->ec_type == EXT4_EXT_CACHE_NO) return (ret); if (lbn >= ecp->ec_blk && lbn < ecp->ec_blk + ecp->ec_len) { ep->e_blk = ecp->ec_blk; ep->e_start_lo = ecp->ec_start & 0xffffffff; ep->e_start_hi = ecp->ec_start >> 32 & 0xffff; ep->e_len = ecp->ec_len; ret = ecp->ec_type; } return (ret); } static int ext4_ext_check_header(struct inode *ip, struct ext4_extent_header *eh) { struct m_ext2fs *fs; char *error_msg; fs = ip->i_e2fs; if (eh->eh_magic != EXT4_EXT_MAGIC) { error_msg = "header: invalid magic"; goto corrupted; } if (eh->eh_max == 0) { error_msg = "header: invalid eh_max"; goto corrupted; } if (eh->eh_ecount > eh->eh_max) { error_msg = "header: invalid eh_entries"; goto corrupted; } return (0); corrupted: SDT_PROBE2(ext2fs, , trace, extents, 1, error_msg); return (EIO); } static void ext4_ext_binsearch_index(struct ext4_extent_path *path, int blk) { struct ext4_extent_header *eh; struct ext4_extent_index *r, *l, *m; eh = path->ep_header; KASSERT(eh->eh_ecount <= eh->eh_max && eh->eh_ecount > 0, ("ext4_ext_binsearch_index: bad args")); l = EXT_FIRST_INDEX(eh) + 1; r = EXT_FIRST_INDEX(eh) + eh->eh_ecount - 1; while (l <= r) { m = l + (r - l) / 2; if (blk < m->ei_blk) r = m - 1; else l = m + 1; } path->ep_index = l - 1; } static void ext4_ext_binsearch_ext(struct ext4_extent_path *path, int blk) { struct ext4_extent_header *eh; struct ext4_extent *r, *l, *m; eh = path->ep_header; KASSERT(eh->eh_ecount <= eh->eh_max, ("ext4_ext_binsearch_ext: bad args")); if (eh->eh_ecount == 0) return; l = EXT_FIRST_EXTENT(eh) + 1; r = EXT_FIRST_EXTENT(eh) + eh->eh_ecount - 1; while (l <= r) { m = l + (r - l) / 2; if (blk < m->e_blk) r = m - 1; else l = m + 1; } path->ep_ext = l - 1; } static int ext4_ext_fill_path_bdata(struct ext4_extent_path *path, struct buf *bp, uint64_t blk) { KASSERT(path->ep_data == NULL, ("ext4_ext_fill_path_bdata: bad ep_data")); path->ep_data = malloc(bp->b_bufsize, M_EXT2EXTENTS, M_WAITOK); if (!path->ep_data) return (ENOMEM); memcpy(path->ep_data, bp->b_data, bp->b_bufsize); path->ep_blk = blk; return (0); } static void ext4_ext_fill_path_buf(struct ext4_extent_path *path, struct buf *bp) { KASSERT(path->ep_data != NULL, ("ext4_ext_fill_path_buf: bad ep_data")); memcpy(bp->b_data, path->ep_data, bp->b_bufsize); } static void ext4_ext_drop_refs(struct ext4_extent_path *path) { int depth, i; if (!path) return; depth = path->ep_depth; for (i = 0; i <= depth; i++, path++) if (path->ep_data) { free(path->ep_data, M_EXT2EXTENTS); path->ep_data = NULL; } } void ext4_ext_path_free(struct ext4_extent_path *path) { if (!path) return; ext4_ext_drop_refs(path); free(path, M_EXT2EXTENTS); } int ext4_ext_find_extent(struct inode *ip, daddr_t block, struct ext4_extent_path **ppath) { struct m_ext2fs *fs; struct ext4_extent_header *eh; struct ext4_extent_path *path; struct buf *bp; uint64_t blk; int error, depth, i, ppos, alloc; fs = ip->i_e2fs; eh = ext4_ext_inode_header(ip); depth = ext4_ext_inode_depth(ip); ppos = 0; alloc = 0; error = ext4_ext_check_header(ip, eh); if (error) return (error); if (ppath == NULL) return (EINVAL); path = *ppath; if (path == NULL) { path = malloc(EXT4_EXT_DEPTH_MAX * sizeof(struct ext4_extent_path), M_EXT2EXTENTS, M_WAITOK | M_ZERO); if (!path) return (ENOMEM); *ppath = path; alloc = 1; } path[0].ep_header = eh; path[0].ep_data = NULL; /* Walk through the tree. */ i = depth; while (i) { ext4_ext_binsearch_index(&path[ppos], block); blk = ext4_ext_index_pblock(path[ppos].ep_index); path[ppos].ep_depth = i; path[ppos].ep_ext = NULL; error = bread(ip->i_devvp, fsbtodb(ip->i_e2fs, blk), ip->i_e2fs->e2fs_bsize, NOCRED, &bp); if (error) { - brelse(bp); goto error; } ppos++; if (ppos > depth) { SDT_PROBE2(ext2fs, , trace, extents, 1, "ppos > depth => extent corrupted"); error = EIO; brelse(bp); goto error; } ext4_ext_fill_path_bdata(&path[ppos], bp, blk); bqrelse(bp); eh = ext4_ext_block_header(path[ppos].ep_data); if (ext4_ext_check_header(ip, eh) || ext2_extent_blk_csum_verify(ip, path[ppos].ep_data)) { error = EIO; goto error; } path[ppos].ep_header = eh; i--; } error = ext4_ext_check_header(ip, eh); if (error) goto error; /* Find extent. */ path[ppos].ep_depth = i; path[ppos].ep_header = eh; path[ppos].ep_ext = NULL; path[ppos].ep_index = NULL; ext4_ext_binsearch_ext(&path[ppos], block); return (0); error: ext4_ext_drop_refs(path); if (alloc) free(path, M_EXT2EXTENTS); *ppath = NULL; return (error); } static inline int ext4_ext_space_root(struct inode *ip) { int size; size = sizeof(ip->i_data); size -= sizeof(struct ext4_extent_header); size /= sizeof(struct ext4_extent); return (size); } static inline int ext4_ext_space_block(struct inode *ip) { struct m_ext2fs *fs; int size; fs = ip->i_e2fs; size = (fs->e2fs_bsize - sizeof(struct ext4_extent_header)) / sizeof(struct ext4_extent); return (size); } static inline int ext4_ext_space_block_index(struct inode *ip) { struct m_ext2fs *fs; int size; fs = ip->i_e2fs; size = (fs->e2fs_bsize - sizeof(struct ext4_extent_header)) / sizeof(struct ext4_extent_index); return (size); } void ext4_ext_tree_init(struct inode *ip) { struct ext4_extent_header *ehp; ip->i_flag |= IN_E4EXTENTS; memset(ip->i_data, 0, EXT2_NDADDR + EXT2_NIADDR); ehp = (struct ext4_extent_header *)ip->i_data; ehp->eh_magic = EXT4_EXT_MAGIC; ehp->eh_max = ext4_ext_space_root(ip); ip->i_ext_cache.ec_type = EXT4_EXT_CACHE_NO; ip->i_flag |= IN_CHANGE | IN_UPDATE; ext2_update(ip->i_vnode, 1); } static inline void ext4_ext_put_in_cache(struct inode *ip, uint32_t blk, uint32_t len, uint32_t start, int type) { KASSERT(len != 0, ("ext4_ext_put_in_cache: bad input")); ip->i_ext_cache.ec_type = type; ip->i_ext_cache.ec_blk = blk; ip->i_ext_cache.ec_len = len; ip->i_ext_cache.ec_start = start; } static e4fs_daddr_t ext4_ext_blkpref(struct inode *ip, struct ext4_extent_path *path, e4fs_daddr_t block) { struct m_ext2fs *fs; struct ext4_extent *ex; e4fs_daddr_t bg_start; int depth; fs = ip->i_e2fs; if (path) { depth = path->ep_depth; ex = path[depth].ep_ext; if (ex) { e4fs_daddr_t pblk = ext4_ext_extent_pblock(ex); e2fs_daddr_t blk = ex->e_blk; if (block > blk) return (pblk + (block - blk)); else return (pblk - (blk - block)); } /* Try to get block from index itself. */ if (path[depth].ep_data) return (path[depth].ep_blk); } /* Use inode's group. */ bg_start = (ip->i_block_group * EXT2_BLOCKS_PER_GROUP(ip->i_e2fs)) + fs->e2fs->e2fs_first_dblock; return (bg_start + block); } static int inline ext4_can_extents_be_merged(struct ext4_extent *ex1, struct ext4_extent *ex2) { if (ex1->e_blk + ex1->e_len != ex2->e_blk) return (0); if (ex1->e_len + ex2->e_len > EXT4_MAX_LEN) return (0); if (ext4_ext_extent_pblock(ex1) + ex1->e_len == ext4_ext_extent_pblock(ex2)) return (1); return (0); } static unsigned ext4_ext_next_leaf_block(struct inode *ip, struct ext4_extent_path *path) { int depth = path->ep_depth; /* Empty tree */ if (depth == 0) return (EXT4_MAX_BLOCKS); /* Go to indexes. */ depth--; while (depth >= 0) { if (path[depth].ep_index != EXT_LAST_INDEX(path[depth].ep_header)) return (path[depth].ep_index[1].ei_blk); depth--; } return (EXT4_MAX_BLOCKS); } static int ext4_ext_dirty(struct inode *ip, struct ext4_extent_path *path) { struct m_ext2fs *fs; struct buf *bp; uint64_t blk; int error; fs = ip->i_e2fs; if (!path) return (EINVAL); if (path->ep_data) { blk = path->ep_blk; bp = getblk(ip->i_devvp, fsbtodb(fs, blk), fs->e2fs_bsize, 0, 0, 0); if (!bp) return (EIO); ext4_ext_fill_path_buf(path, bp); ext2_extent_blk_csum_set(ip, bp->b_data); error = bwrite(bp); } else { ip->i_flag |= IN_CHANGE | IN_UPDATE; error = ext2_update(ip->i_vnode, 1); } return (error); } static int ext4_ext_insert_index(struct inode *ip, struct ext4_extent_path *path, uint32_t lblk, e4fs_daddr_t blk) { struct m_ext2fs *fs; struct ext4_extent_index *idx; int len; fs = ip->i_e2fs; if (lblk == path->ep_index->ei_blk) { SDT_PROBE2(ext2fs, , trace, extents, 1, "lblk == index blk => extent corrupted"); return (EIO); } if (path->ep_header->eh_ecount >= path->ep_header->eh_max) { SDT_PROBE2(ext2fs, , trace, extents, 1, "ecout > maxcount => extent corrupted"); return (EIO); } if (lblk > path->ep_index->ei_blk) { /* Insert after. */ idx = path->ep_index + 1; } else { /* Insert before. */ idx = path->ep_index; } len = EXT_LAST_INDEX(path->ep_header) - idx + 1; if (len > 0) memmove(idx + 1, idx, len * sizeof(struct ext4_extent_index)); if (idx > EXT_MAX_INDEX(path->ep_header)) { SDT_PROBE2(ext2fs, , trace, extents, 1, "index is out of range => extent corrupted"); return (EIO); } idx->ei_blk = lblk; ext4_index_store_pblock(idx, blk); path->ep_header->eh_ecount++; return (ext4_ext_dirty(ip, path)); } static e4fs_daddr_t ext4_ext_alloc_meta(struct inode *ip) { e4fs_daddr_t blk = ext2_alloc_meta(ip); if (blk) { ip->i_blocks += btodb(ip->i_e2fs->e2fs_bsize); ip->i_flag |= IN_CHANGE | IN_UPDATE; ext2_update(ip->i_vnode, 1); } return (blk); } static void ext4_ext_blkfree(struct inode *ip, uint64_t blk, int count, int flags) { struct m_ext2fs *fs; int i, blocksreleased; fs = ip->i_e2fs; blocksreleased = count; for(i = 0; i < count; i++) ext2_blkfree(ip, blk + i, fs->e2fs_bsize); if (ip->i_blocks >= blocksreleased) ip->i_blocks -= (btodb(fs->e2fs_bsize)*blocksreleased); else ip->i_blocks = 0; ip->i_flag |= IN_CHANGE | IN_UPDATE; ext2_update(ip->i_vnode, 1); } static int ext4_ext_split(struct inode *ip, struct ext4_extent_path *path, struct ext4_extent *newext, int at) { struct m_ext2fs *fs; struct buf *bp; int depth = ext4_ext_inode_depth(ip); struct ext4_extent_header *neh; struct ext4_extent_index *fidx; struct ext4_extent *ex; int i = at, k, m, a; e4fs_daddr_t newblk, oldblk; uint32_t border; e4fs_daddr_t *ablks = NULL; int error = 0; fs = ip->i_e2fs; bp = NULL; /* * We will split at current extent for now. */ if (path[depth].ep_ext > EXT_MAX_EXTENT(path[depth].ep_header)) { SDT_PROBE2(ext2fs, , trace, extents, 1, "extent is out of range => extent corrupted"); return (EIO); } if (path[depth].ep_ext != EXT_MAX_EXTENT(path[depth].ep_header)) border = path[depth].ep_ext[1].e_blk; else border = newext->e_blk; /* Allocate new blocks. */ ablks = malloc(sizeof(e4fs_daddr_t) * depth, M_EXT2EXTENTS, M_WAITOK | M_ZERO); if (!ablks) return (ENOMEM); for (a = 0; a < depth - at; a++) { newblk = ext4_ext_alloc_meta(ip); if (newblk == 0) goto cleanup; ablks[a] = newblk; } newblk = ablks[--a]; bp = getblk(ip->i_devvp, fsbtodb(fs, newblk), fs->e2fs_bsize, 0, 0, 0); if (!bp) { error = EIO; goto cleanup; } neh = ext4_ext_block_header(bp->b_data); neh->eh_ecount = 0; neh->eh_max = ext4_ext_space_block(ip); neh->eh_magic = EXT4_EXT_MAGIC; neh->eh_depth = 0; ex = EXT_FIRST_EXTENT(neh); if (path[depth].ep_header->eh_ecount != path[depth].ep_header->eh_max) { SDT_PROBE2(ext2fs, , trace, extents, 1, "extents count out of range => extent corrupted"); error = EIO; goto cleanup; } /* Start copy from next extent. */ m = 0; path[depth].ep_ext++; while (path[depth].ep_ext <= EXT_MAX_EXTENT(path[depth].ep_header)) { path[depth].ep_ext++; m++; } if (m) { memmove(ex, path[depth].ep_ext - m, sizeof(struct ext4_extent) * m); neh->eh_ecount = neh->eh_ecount + m; } ext2_extent_blk_csum_set(ip, bp->b_data); bwrite(bp); bp = NULL; /* Fix old leaf. */ if (m) { path[depth].ep_header->eh_ecount = path[depth].ep_header->eh_ecount - m; ext4_ext_dirty(ip, path + depth); } /* Create intermediate indexes. */ k = depth - at - 1; KASSERT(k >= 0, ("ext4_ext_split: negative k")); /* Insert new index into current index block. */ i = depth - 1; while (k--) { oldblk = newblk; newblk = ablks[--a]; error = bread(ip->i_devvp, fsbtodb(fs, newblk), (int)fs->e2fs_bsize, NOCRED, &bp); if (error) { - brelse(bp); goto cleanup; } neh = (struct ext4_extent_header *)bp->b_data; neh->eh_ecount = 1; neh->eh_magic = EXT4_EXT_MAGIC; neh->eh_max = ext4_ext_space_block_index(ip); neh->eh_depth = depth - i; fidx = EXT_FIRST_INDEX(neh); fidx->ei_blk = border; ext4_index_store_pblock(fidx, oldblk); m = 0; path[i].ep_index++; while (path[i].ep_index <= EXT_MAX_INDEX(path[i].ep_header)) { path[i].ep_index++; m++; } if (m) { memmove(++fidx, path[i].ep_index - m, sizeof(struct ext4_extent_index) * m); neh->eh_ecount = neh->eh_ecount + m; } ext2_extent_blk_csum_set(ip, bp->b_data); bwrite(bp); bp = NULL; /* Fix old index. */ if (m) { path[i].ep_header->eh_ecount = path[i].ep_header->eh_ecount - m; ext4_ext_dirty(ip, path + i); } i--; } error = ext4_ext_insert_index(ip, path + at, border, newblk); cleanup: if (bp) brelse(bp); if (error) { for (i = 0; i < depth; i++) { if (!ablks[i]) continue; ext4_ext_blkfree(ip, ablks[i], 1, 0); } } free(ablks, M_EXT2EXTENTS); return (error); } static int ext4_ext_grow_indepth(struct inode *ip, struct ext4_extent_path *path, struct ext4_extent *newext) { struct m_ext2fs *fs; struct ext4_extent_path *curpath; struct ext4_extent_header *neh; struct buf *bp; e4fs_daddr_t newblk; int error = 0; fs = ip->i_e2fs; curpath = path; newblk = ext4_ext_alloc_meta(ip); if (newblk == 0) return (error); bp = getblk(ip->i_devvp, fsbtodb(fs, newblk), fs->e2fs_bsize, 0, 0, 0); if (!bp) return (EIO); /* Move top-level index/leaf into new block. */ memmove(bp->b_data, curpath->ep_header, sizeof(ip->i_data)); /* Set size of new block */ neh = ext4_ext_block_header(bp->b_data); neh->eh_magic = EXT4_EXT_MAGIC; if (ext4_ext_inode_depth(ip)) neh->eh_max = ext4_ext_space_block_index(ip); else neh->eh_max = ext4_ext_space_block(ip); ext2_extent_blk_csum_set(ip, bp->b_data); error = bwrite(bp); if (error) goto out; bp = NULL; curpath->ep_header->eh_magic = EXT4_EXT_MAGIC; curpath->ep_header->eh_max = ext4_ext_space_root(ip); curpath->ep_header->eh_ecount = 1; curpath->ep_index = EXT_FIRST_INDEX(curpath->ep_header); curpath->ep_index->ei_blk = EXT_FIRST_EXTENT(path[0].ep_header)->e_blk; ext4_index_store_pblock(curpath->ep_index, newblk); neh = ext4_ext_inode_header(ip); neh->eh_depth = path->ep_depth + 1; ext4_ext_dirty(ip, curpath); out: brelse(bp); return (error); } static int ext4_ext_create_new_leaf(struct inode *ip, struct ext4_extent_path *path, struct ext4_extent *newext) { struct ext4_extent_path *curpath; int depth, i, error; repeat: i = depth = ext4_ext_inode_depth(ip); /* Look for free index entry int the tree */ curpath = path + depth; while (i > 0 && !EXT_HAS_FREE_INDEX(curpath)) { i--; curpath--; } /* * We use already allocated block for index block, * so subsequent data blocks should be contiguous. */ if (EXT_HAS_FREE_INDEX(curpath)) { error = ext4_ext_split(ip, path, newext, i); if (error) goto out; /* Refill path. */ ext4_ext_drop_refs(path); error = ext4_ext_find_extent(ip, newext->e_blk, &path); if (error) goto out; } else { /* Tree is full, do grow in depth. */ error = ext4_ext_grow_indepth(ip, path, newext); if (error) goto out; /* Refill path. */ ext4_ext_drop_refs(path); error = ext4_ext_find_extent(ip, newext->e_blk, &path); if (error) goto out; /* Check and split tree if required. */ depth = ext4_ext_inode_depth(ip); if (path[depth].ep_header->eh_ecount == path[depth].ep_header->eh_max) goto repeat; } out: return (error); } static int ext4_ext_correct_indexes(struct inode *ip, struct ext4_extent_path *path) { struct ext4_extent_header *eh; struct ext4_extent *ex; int32_t border; int depth, k; depth = ext4_ext_inode_depth(ip); eh = path[depth].ep_header; ex = path[depth].ep_ext; if (ex == NULL || eh == NULL) return (EIO); if (!depth) return (0); /* We will correct tree if first leaf got modified only. */ if (ex != EXT_FIRST_EXTENT(eh)) return (0); k = depth - 1; border = path[depth].ep_ext->e_blk; path[k].ep_index->ei_blk = border; ext4_ext_dirty(ip, path + k); while (k--) { /* Change all left-side indexes. */ if (path[k+1].ep_index != EXT_FIRST_INDEX(path[k+1].ep_header)) break; path[k].ep_index->ei_blk = border; ext4_ext_dirty(ip, path + k); } return (0); } static int ext4_ext_insert_extent(struct inode *ip, struct ext4_extent_path *path, struct ext4_extent *newext) { struct ext4_extent_header * eh; struct ext4_extent *ex, *nex, *nearex; struct ext4_extent_path *npath; int depth, len, error, next; depth = ext4_ext_inode_depth(ip); ex = path[depth].ep_ext; npath = NULL; if (newext->e_len == 0 || path[depth].ep_header == NULL) return (EINVAL); /* Insert block into found extent. */ if (ex && ext4_can_extents_be_merged(ex, newext)) { ex->e_len = ex->e_len + newext->e_len; eh = path[depth].ep_header; nearex = ex; goto merge; } repeat: depth = ext4_ext_inode_depth(ip); eh = path[depth].ep_header; if (eh->eh_ecount < eh->eh_max) goto has_space; /* Try next leaf */ nex = EXT_LAST_EXTENT(eh); next = ext4_ext_next_leaf_block(ip, path); if (newext->e_blk > nex->e_blk && next != EXT4_MAX_BLOCKS) { KASSERT(npath == NULL, ("ext4_ext_insert_extent: bad path")); error = ext4_ext_find_extent(ip, next, &npath); if (error) goto cleanup; if (npath->ep_depth != path->ep_depth) { error = EIO; goto cleanup; } eh = npath[depth].ep_header; if (eh->eh_ecount < eh->eh_max) { path = npath; goto repeat; } } /* * There is no free space in the found leaf, * try to add a new leaf to the tree. */ error = ext4_ext_create_new_leaf(ip, path, newext); if (error) goto cleanup; depth = ext4_ext_inode_depth(ip); eh = path[depth].ep_header; has_space: nearex = path[depth].ep_ext; if (!nearex) { /* Create new extent in the leaf. */ path[depth].ep_ext = EXT_FIRST_EXTENT(eh); } else if (newext->e_blk > nearex->e_blk) { if (nearex != EXT_LAST_EXTENT(eh)) { len = EXT_MAX_EXTENT(eh) - nearex; len = (len - 1) * sizeof(struct ext4_extent); len = len < 0 ? 0 : len; memmove(nearex + 2, nearex + 1, len); } path[depth].ep_ext = nearex + 1; } else { len = (EXT_MAX_EXTENT(eh) - nearex) * sizeof(struct ext4_extent); len = len < 0 ? 0 : len; memmove(nearex + 1, nearex, len); path[depth].ep_ext = nearex; } eh->eh_ecount = eh->eh_ecount + 1; nearex = path[depth].ep_ext; nearex->e_blk = newext->e_blk; nearex->e_start_lo = newext->e_start_lo; nearex->e_start_hi = newext->e_start_hi; nearex->e_len = newext->e_len; merge: /* Try to merge extents to the right. */ while (nearex < EXT_LAST_EXTENT(eh)) { if (!ext4_can_extents_be_merged(nearex, nearex + 1)) break; /* Merge with next extent. */ nearex->e_len = nearex->e_len + nearex[1].e_len; if (nearex + 1 < EXT_LAST_EXTENT(eh)) { len = (EXT_LAST_EXTENT(eh) - nearex - 1) * sizeof(struct ext4_extent); memmove(nearex + 1, nearex + 2, len); } eh->eh_ecount = eh->eh_ecount - 1; KASSERT(eh->eh_ecount != 0, ("ext4_ext_insert_extent: bad ecount")); } /* * Try to merge extents to the left, * start from inexes correction. */ error = ext4_ext_correct_indexes(ip, path); if (error) goto cleanup; ext4_ext_dirty(ip, path + depth); cleanup: if (npath) { ext4_ext_drop_refs(npath); free(npath, M_EXT2EXTENTS); } ip->i_ext_cache.ec_type = EXT4_EXT_CACHE_NO; return (error); } static e4fs_daddr_t ext4_new_blocks(struct inode *ip, daddr_t lbn, e4fs_daddr_t pref, struct ucred *cred, unsigned long *count, int *perror) { struct m_ext2fs *fs; e4fs_daddr_t newblk; /* * We will allocate only single block for now. */ if (*count > 1) return (0); fs = ip->i_e2fs; EXT2_LOCK(ip->i_ump); *perror = ext2_alloc(ip, lbn, pref, (int)fs->e2fs_bsize, cred, &newblk); if (*perror) return (0); if (newblk) { ip->i_flag |= IN_CHANGE | IN_UPDATE; ext2_update(ip->i_vnode, 1); } return (newblk); } int ext4_ext_get_blocks(struct inode *ip, e4fs_daddr_t iblk, unsigned long max_blocks, struct ucred *cred, struct buf **bpp, int *pallocated, daddr_t *nb) { struct m_ext2fs *fs; struct buf *bp = NULL; struct ext4_extent_path *path; struct ext4_extent newex, *ex; e4fs_daddr_t bpref, newblk = 0; unsigned long allocated = 0; int error = 0, depth; if(bpp) *bpp = NULL; *pallocated = 0; /* Check cache. */ path = NULL; if ((bpref = ext4_ext_in_cache(ip, iblk, &newex))) { if (bpref == EXT4_EXT_CACHE_IN) { /* Block is already allocated. */ newblk = iblk - newex.e_blk + ext4_ext_extent_pblock(&newex); allocated = newex.e_len - (iblk - newex.e_blk); goto out; } else { error = EIO; goto out2; } } error = ext4_ext_find_extent(ip, iblk, &path); if (error) { goto out2; } depth = ext4_ext_inode_depth(ip); if (path[depth].ep_ext == NULL && depth != 0) { error = EIO; goto out2; } if ((ex = path[depth].ep_ext)) { uint64_t lblk = ex->e_blk; uint16_t e_len = ex->e_len; e4fs_daddr_t e_start = ext4_ext_extent_pblock(ex); if (e_len > EXT4_MAX_LEN) goto out2; /* If we found extent covers block, simply return it. */ if (iblk >= lblk && iblk < lblk + e_len) { newblk = iblk - lblk + e_start; allocated = e_len - (iblk - lblk); ext4_ext_put_in_cache(ip, lblk, e_len, e_start, EXT4_EXT_CACHE_IN); goto out; } } /* Allocate the new block. */ if (S_ISREG(ip->i_mode) && (!ip->i_next_alloc_block)) { ip->i_next_alloc_goal = 0; } bpref = ext4_ext_blkpref(ip, path, iblk); allocated = max_blocks; newblk = ext4_new_blocks(ip, iblk, bpref, cred, &allocated, &error); if (!newblk) goto out2; /* Try to insert new extent into found leaf and return. */ newex.e_blk = iblk; ext4_ext_store_pblock(&newex, newblk); newex.e_len = allocated; error = ext4_ext_insert_extent(ip, path, &newex); if (error) goto out2; newblk = ext4_ext_extent_pblock(&newex); ext4_ext_put_in_cache(ip, iblk, allocated, newblk, EXT4_EXT_CACHE_IN); *pallocated = 1; out: if (allocated > max_blocks) allocated = max_blocks; if (bpp) { fs = ip->i_e2fs; error = bread(ip->i_devvp, fsbtodb(fs, newblk), fs->e2fs_bsize, cred, &bp); if (error) { brelse(bp); } else { *bpp = bp; } } out2: if (path) { ext4_ext_drop_refs(path); free(path, M_EXT2EXTENTS); } if (nb) *nb = newblk; return (error); } static inline uint16_t ext4_ext_get_actual_len(struct ext4_extent *ext) { return (ext->e_len <= EXT_INIT_MAX_LEN ? ext->e_len : (ext->e_len - EXT_INIT_MAX_LEN)); } static inline struct ext4_extent_header * ext4_ext_header(struct inode *ip) { return ((struct ext4_extent_header *)ip->i_db); } static int ext4_remove_blocks(struct inode *ip, struct ext4_extent *ex, unsigned long from, unsigned long to) { unsigned long num, start; if (from >= ex->e_blk && to == ex->e_blk + ext4_ext_get_actual_len(ex) - 1) { /* Tail cleanup. */ num = ex->e_blk + ext4_ext_get_actual_len(ex) - from; start = ext4_ext_extent_pblock(ex) + ext4_ext_get_actual_len(ex) - num; ext4_ext_blkfree(ip, start, num, 0); } return (0); } static int ext4_ext_rm_index(struct inode *ip, struct ext4_extent_path *path) { e4fs_daddr_t leaf; /* Free index block. */ path--; leaf = ext4_ext_index_pblock(path->ep_index); KASSERT(path->ep_header->eh_ecount != 0, ("ext4_ext_rm_index: bad ecount")); path->ep_header->eh_ecount--; ext4_ext_dirty(ip, path); ext4_ext_blkfree(ip, leaf, 1, 0); return (0); } static int ext4_ext_rm_leaf(struct inode *ip, struct ext4_extent_path *path, uint64_t start) { struct ext4_extent_header *eh; struct ext4_extent *ex; unsigned int a, b, block, num; unsigned long ex_blk; unsigned short ex_len; int depth; int error, correct_index; depth = ext4_ext_inode_depth(ip); if (!path[depth].ep_header) { if (path[depth].ep_data == NULL) return (EINVAL); path[depth].ep_header = (struct ext4_extent_header* )path[depth].ep_data; } eh = path[depth].ep_header; if (!eh) { SDT_PROBE2(ext2fs, , trace, extents, 1, "bad header => extent corrupted"); return (EIO); } ex = EXT_LAST_EXTENT(eh); ex_blk = ex->e_blk; ex_len = ext4_ext_get_actual_len(ex); error = 0; correct_index = 0; while (ex >= EXT_FIRST_EXTENT(eh) && ex_blk + ex_len > start) { path[depth].ep_ext = ex; a = ex_blk > start ? ex_blk : start; b = (uint64_t)ex_blk + ex_len - 1 < EXT4_MAX_BLOCKS ? ex_blk + ex_len - 1 : EXT4_MAX_BLOCKS; if (a != ex_blk && b != ex_blk + ex_len - 1) return (EINVAL); else if (a != ex_blk) { /* Remove tail of the extent. */ block = ex_blk; num = a - block; } else if (b != ex_blk + ex_len - 1) { /* Remove head of the extent, not implemented. */ return (EINVAL); } else { /* Remove whole extent. */ block = ex_blk; num = 0; } if (ex == EXT_FIRST_EXTENT(eh)) correct_index = 1; error = ext4_remove_blocks(ip, ex, a, b); if (error) goto out; if (num == 0) { ext4_ext_store_pblock(ex, 0); eh->eh_ecount--; } ex->e_blk = block; ex->e_len = num; ext4_ext_dirty(ip, path + depth); ex--; ex_blk = ex->e_blk; ex_len = ext4_ext_get_actual_len(ex); }; if (correct_index && eh->eh_ecount) error = ext4_ext_correct_indexes(ip, path); /* * If this leaf is free, we should * remove it from index block above. */ if (error == 0 && eh->eh_ecount == 0 && path[depth].ep_data != NULL) error = ext4_ext_rm_index(ip, path + depth); out: return (error); } static struct buf * ext4_read_extent_tree_block(struct inode *ip, e4fs_daddr_t pblk, int depth, int flags) { struct m_ext2fs *fs; struct ext4_extent_header *eh; struct buf *bp; int error; fs = ip->i_e2fs; error = bread(ip->i_devvp, fsbtodb(fs, pblk), fs->e2fs_bsize, NOCRED, &bp); if (error) { - brelse(bp); return (NULL); } eh = ext4_ext_block_header(bp->b_data); if (eh->eh_depth != depth) { SDT_PROBE2(ext2fs, , trace, extents, 1, "unexpected eh_depth"); goto err; } error = ext4_ext_check_header(ip, eh); if (error) goto err; return (bp); err: brelse(bp); return (NULL); } static int inline ext4_ext_more_to_rm(struct ext4_extent_path *path) { KASSERT(path->ep_index != NULL, ("ext4_ext_more_to_rm: bad index from path")); if (path->ep_index < EXT_FIRST_INDEX(path->ep_header)) return (0); if (path->ep_header->eh_ecount == path->index_count) return (0); return (1); } int ext4_ext_remove_space(struct inode *ip, off_t length, int flags, struct ucred *cred, struct thread *td) { struct buf *bp; struct ext4_extent_header *ehp; struct ext4_extent_path *path; int depth; int i, error; ehp = (struct ext4_extent_header *)ip->i_db; depth = ext4_ext_inode_depth(ip); error = ext4_ext_check_header(ip, ehp); if(error) return (error); path = malloc(sizeof(struct ext4_extent_path) * (depth + 1), M_EXT2EXTENTS, M_WAITOK | M_ZERO); if (!path) return (ENOMEM); path[0].ep_header = ehp; path[0].ep_depth = depth; i = 0; while (error == 0 && i >= 0) { if (i == depth) { /* This is leaf. */ error = ext4_ext_rm_leaf(ip, path, length); if (error) break; free(path[i].ep_data, M_EXT2EXTENTS); path[i].ep_data = NULL; i--; continue; } /* This is index. */ if (!path[i].ep_header) path[i].ep_header = (struct ext4_extent_header *)path[i].ep_data; if (!path[i].ep_index) { /* This level hasn't touched yet. */ path[i].ep_index = EXT_LAST_INDEX(path[i].ep_header); path[i].index_count = path[i].ep_header->eh_ecount + 1; } else { /* We've already was here, see at next index. */ path[i].ep_index--; } if (ext4_ext_more_to_rm(path + i)) { memset(path + i + 1, 0, sizeof(*path)); bp = ext4_read_extent_tree_block(ip, ext4_ext_index_pblock(path[i].ep_index), path[0].ep_depth - (i + 1), 0); if (!bp) { error = EIO; break; } ext4_ext_fill_path_bdata(&path[i+1], bp, ext4_ext_index_pblock(path[i].ep_index)); brelse(bp); path[i].index_count = path[i].ep_header->eh_ecount; i++; } else { if (path[i].ep_header->eh_ecount == 0 && i > 0) { /* Index is empty, remove it. */ error = ext4_ext_rm_index(ip, path + i); } free(path[i].ep_data, M_EXT2EXTENTS); path[i].ep_data = NULL; i--; } } if (path->ep_header->eh_ecount == 0) { /* * Truncate the tree to zero. */ ext4_ext_header(ip)->eh_depth = 0; ext4_ext_header(ip)->eh_max = ext4_ext_space_root(ip); ext4_ext_dirty(ip, path); } ext4_ext_drop_refs(path); free(path, M_EXT2EXTENTS); return (error); } Index: head/sys/fs/ext2fs/ext2_vfsops.c =================================================================== --- head/sys/fs/ext2fs/ext2_vfsops.c (revision 351925) +++ head/sys/fs/ext2fs/ext2_vfsops.c (revision 351926) @@ -1,1431 +1,1430 @@ /*- * modified for EXT2FS support in Lites 1.1 * * Aug 1995, Godmar Back (gback@cs.utah.edu) * University of Utah, Department of Computer Science */ /*- * SPDX-License-Identifier: BSD-3-Clause * * Copyright (c) 1989, 1991, 1993, 1994 * The Regents of the University of California. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * @(#)ffs_vfsops.c 8.8 (Berkeley) 4/18/94 * $FreeBSD$ */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include SDT_PROVIDER_DECLARE(ext2fs); /* * ext2fs trace probe: * arg0: verbosity. Higher numbers give more verbose messages * arg1: Textual message */ SDT_PROBE_DEFINE2(ext2fs, , vfsops, trace, "int", "char*"); SDT_PROBE_DEFINE2(ext2fs, , vfsops, ext2_cg_validate_error, "char*", "int"); SDT_PROBE_DEFINE1(ext2fs, , vfsops, ext2_compute_sb_data_error, "char*"); static int ext2_flushfiles(struct mount *mp, int flags, struct thread *td); static int ext2_mountfs(struct vnode *, struct mount *); static int ext2_reload(struct mount *mp, struct thread *td); static int ext2_sbupdate(struct ext2mount *, int); static int ext2_cgupdate(struct ext2mount *, int); static vfs_unmount_t ext2_unmount; static vfs_root_t ext2_root; static vfs_statfs_t ext2_statfs; static vfs_sync_t ext2_sync; static vfs_vget_t ext2_vget; static vfs_fhtovp_t ext2_fhtovp; static vfs_mount_t ext2_mount; MALLOC_DEFINE(M_EXT2NODE, "ext2_node", "EXT2 vnode private part"); static MALLOC_DEFINE(M_EXT2MNT, "ext2_mount", "EXT2 mount structure"); static struct vfsops ext2fs_vfsops = { .vfs_fhtovp = ext2_fhtovp, .vfs_mount = ext2_mount, .vfs_root = ext2_root, /* root inode via vget */ .vfs_statfs = ext2_statfs, .vfs_sync = ext2_sync, .vfs_unmount = ext2_unmount, .vfs_vget = ext2_vget, }; VFS_SET(ext2fs_vfsops, ext2fs, 0); static int ext2_check_sb_compat(struct ext2fs *es, struct cdev *dev, int ronly); static int ext2_compute_sb_data(struct vnode * devvp, struct ext2fs * es, struct m_ext2fs * fs); static const char *ext2_opts[] = { "acls", "async", "noatime", "noclusterr", "noclusterw", "noexec", "export", "force", "from", "multilabel", "suiddir", "nosymfollow", "sync", "union", NULL }; /* * VFS Operations. * * mount system call */ static int ext2_mount(struct mount *mp) { struct vfsoptlist *opts; struct vnode *devvp; struct thread *td; struct ext2mount *ump = NULL; struct m_ext2fs *fs; struct nameidata nd, *ndp = &nd; accmode_t accmode; char *path, *fspec; int error, flags, len; td = curthread; opts = mp->mnt_optnew; if (vfs_filteropt(opts, ext2_opts)) return (EINVAL); vfs_getopt(opts, "fspath", (void **)&path, NULL); /* Double-check the length of path.. */ if (strlen(path) >= MAXMNTLEN) return (ENAMETOOLONG); fspec = NULL; error = vfs_getopt(opts, "from", (void **)&fspec, &len); if (!error && fspec[len - 1] != '\0') return (EINVAL); /* * If updating, check whether changing from read-only to * read/write; if there is no device name, that's all we do. */ if (mp->mnt_flag & MNT_UPDATE) { ump = VFSTOEXT2(mp); fs = ump->um_e2fs; error = 0; if (fs->e2fs_ronly == 0 && vfs_flagopt(opts, "ro", NULL, 0)) { error = VFS_SYNC(mp, MNT_WAIT); if (error) return (error); flags = WRITECLOSE; if (mp->mnt_flag & MNT_FORCE) flags |= FORCECLOSE; error = ext2_flushfiles(mp, flags, td); if (error == 0 && fs->e2fs_wasvalid && ext2_cgupdate(ump, MNT_WAIT) == 0) { fs->e2fs->e2fs_state |= E2FS_ISCLEAN; ext2_sbupdate(ump, MNT_WAIT); } fs->e2fs_ronly = 1; vfs_flagopt(opts, "ro", &mp->mnt_flag, MNT_RDONLY); g_topology_lock(); g_access(ump->um_cp, 0, -1, 0); g_topology_unlock(); } if (!error && (mp->mnt_flag & MNT_RELOAD)) error = ext2_reload(mp, td); if (error) return (error); devvp = ump->um_devvp; if (fs->e2fs_ronly && !vfs_flagopt(opts, "ro", NULL, 0)) { if (ext2_check_sb_compat(fs->e2fs, devvp->v_rdev, 0)) return (EPERM); /* * If upgrade to read-write by non-root, then verify * that user has necessary permissions on the device. */ vn_lock(devvp, LK_EXCLUSIVE | LK_RETRY); error = VOP_ACCESS(devvp, VREAD | VWRITE, td->td_ucred, td); if (error) error = priv_check(td, PRIV_VFS_MOUNT_PERM); if (error) { VOP_UNLOCK(devvp, 0); return (error); } VOP_UNLOCK(devvp, 0); g_topology_lock(); error = g_access(ump->um_cp, 0, 1, 0); g_topology_unlock(); if (error) return (error); if ((fs->e2fs->e2fs_state & E2FS_ISCLEAN) == 0 || (fs->e2fs->e2fs_state & E2FS_ERRORS)) { if (mp->mnt_flag & MNT_FORCE) { printf( "WARNING: %s was not properly dismounted\n", fs->e2fs_fsmnt); } else { printf( "WARNING: R/W mount of %s denied. Filesystem is not clean - run fsck\n", fs->e2fs_fsmnt); return (EPERM); } } fs->e2fs->e2fs_state &= ~E2FS_ISCLEAN; (void)ext2_cgupdate(ump, MNT_WAIT); fs->e2fs_ronly = 0; MNT_ILOCK(mp); mp->mnt_flag &= ~MNT_RDONLY; MNT_IUNLOCK(mp); } if (vfs_flagopt(opts, "export", NULL, 0)) { /* Process export requests in vfs_mount.c. */ return (error); } } /* * Not an update, or updating the name: look up the name * and verify that it refers to a sensible disk device. */ if (fspec == NULL) return (EINVAL); NDINIT(ndp, LOOKUP, FOLLOW | LOCKLEAF, UIO_SYSSPACE, fspec, td); if ((error = namei(ndp)) != 0) return (error); NDFREE(ndp, NDF_ONLY_PNBUF); devvp = ndp->ni_vp; if (!vn_isdisk(devvp, &error)) { vput(devvp); return (error); } /* * If mount by non-root, then verify that user has necessary * permissions on the device. * * XXXRW: VOP_ACCESS() enough? */ accmode = VREAD; if ((mp->mnt_flag & MNT_RDONLY) == 0) accmode |= VWRITE; error = VOP_ACCESS(devvp, accmode, td->td_ucred, td); if (error) error = priv_check(td, PRIV_VFS_MOUNT_PERM); if (error) { vput(devvp); return (error); } if ((mp->mnt_flag & MNT_UPDATE) == 0) { error = ext2_mountfs(devvp, mp); } else { if (devvp != ump->um_devvp) { vput(devvp); return (EINVAL); /* needs translation */ } else vput(devvp); } if (error) { vrele(devvp); return (error); } ump = VFSTOEXT2(mp); fs = ump->um_e2fs; /* * Note that this strncpy() is ok because of a check at the start * of ext2_mount(). */ strncpy(fs->e2fs_fsmnt, path, MAXMNTLEN); fs->e2fs_fsmnt[MAXMNTLEN - 1] = '\0'; vfs_mountedfrom(mp, fspec); return (0); } static int ext2_check_sb_compat(struct ext2fs *es, struct cdev *dev, int ronly) { uint32_t i, mask; if (es->e2fs_magic != E2FS_MAGIC) { printf("ext2fs: %s: wrong magic number %#x (expected %#x)\n", devtoname(dev), es->e2fs_magic, E2FS_MAGIC); return (1); } if (es->e2fs_rev > E2FS_REV0) { mask = es->e2fs_features_incompat & ~(EXT2F_INCOMPAT_SUPP); if (mask) { printf("WARNING: mount of %s denied due to " "unsupported optional features:\n", devtoname(dev)); for (i = 0; i < sizeof(incompat)/sizeof(struct ext2_feature); i++) if (mask & incompat[i].mask) printf("%s ", incompat[i].name); printf("\n"); return (1); } mask = es->e2fs_features_rocompat & ~EXT2F_ROCOMPAT_SUPP; if (!ronly && mask) { printf("WARNING: R/W mount of %s denied due to " "unsupported optional features:\n", devtoname(dev)); for (i = 0; i < sizeof(ro_compat)/sizeof(struct ext2_feature); i++) if (mask & ro_compat[i].mask) printf("%s ", ro_compat[i].name); printf("\n"); return (1); } } return (0); } static e4fs_daddr_t ext2_cg_location(struct m_ext2fs *fs, int number) { int cg, descpb, logical_sb, has_super = 0; /* * Adjust logical superblock block number. * Godmar thinks: if the blocksize is greater than 1024, then * the superblock is logically part of block zero. */ logical_sb = fs->e2fs_bsize > SBSIZE ? 0 : 1; if (!EXT2_HAS_INCOMPAT_FEATURE(fs, EXT2F_INCOMPAT_META_BG) || number < fs->e2fs->e3fs_first_meta_bg) return (logical_sb + number + 1); if (EXT2_HAS_INCOMPAT_FEATURE(fs, EXT2F_INCOMPAT_64BIT)) descpb = fs->e2fs_bsize / sizeof(struct ext2_gd); else descpb = fs->e2fs_bsize / E2FS_REV0_GD_SIZE; cg = descpb * number; if (ext2_cg_has_sb(fs, cg)) has_super = 1; return (has_super + cg * (e4fs_daddr_t)EXT2_BLOCKS_PER_GROUP(fs) + fs->e2fs->e2fs_first_dblock); } static int ext2_cg_validate(struct m_ext2fs *fs) { uint64_t b_bitmap; uint64_t i_bitmap; uint64_t i_tables; uint64_t first_block, last_block, last_cg_block; struct ext2_gd *gd; unsigned int i, cg_count; first_block = fs->e2fs->e2fs_first_dblock; last_cg_block = ext2_cg_number_gdb(fs, 0); cg_count = fs->e2fs_gcount; for (i = 0; i < fs->e2fs_gcount; i++) { gd = &fs->e2fs_gd[i]; if (EXT2_HAS_INCOMPAT_FEATURE(fs, EXT2F_INCOMPAT_FLEX_BG) || i == fs->e2fs_gcount - 1) { last_block = fs->e2fs_bcount - 1; } else { last_block = first_block + (EXT2_BLOCKS_PER_GROUP(fs) - 1); } if ((cg_count == fs->e2fs_gcount) && !(gd->ext4bgd_flags & EXT2_BG_INODE_ZEROED)) cg_count = i; b_bitmap = e2fs_gd_get_b_bitmap(gd); if (b_bitmap == 0) { SDT_PROBE2(ext2fs, , vfsops, ext2_cg_validate_error, "block bitmap is zero", i); return (EINVAL); } if (b_bitmap <= last_cg_block) { SDT_PROBE2(ext2fs, , vfsops, ext2_cg_validate_error, "block bitmap overlaps gds", i); return (EINVAL); } if (b_bitmap < first_block || b_bitmap > last_block) { SDT_PROBE2(ext2fs, , vfsops, ext2_cg_validate_error, "block bitmap not in group", i); return (EINVAL); } i_bitmap = e2fs_gd_get_i_bitmap(gd); if (i_bitmap == 0) { SDT_PROBE2(ext2fs, , vfsops, ext2_cg_validate_error, "inode bitmap is zero", i); return (EINVAL); } if (i_bitmap <= last_cg_block) { SDT_PROBE2(ext2fs, , vfsops, ext2_cg_validate_error, "inode bitmap overlaps gds", i); return (EINVAL); } if (i_bitmap < first_block || i_bitmap > last_block) { SDT_PROBE2(ext2fs, , vfsops, ext2_cg_validate_error, "inode bitmap not in group blk", i); return (EINVAL); } i_tables = e2fs_gd_get_i_tables(gd); if (i_tables == 0) { SDT_PROBE2(ext2fs, , vfsops, ext2_cg_validate_error, "inode table is zero", i); return (EINVAL); } if (i_tables <= last_cg_block) { SDT_PROBE2(ext2fs, , vfsops, ext2_cg_validate_error, "inode talbes overlaps gds", i); return (EINVAL); } if (i_tables < first_block || i_tables + fs->e2fs_itpg - 1 > last_block) { SDT_PROBE2(ext2fs, , vfsops, ext2_cg_validate_error, "inode tables not in group blk", i); return (EINVAL); } if (!EXT2_HAS_INCOMPAT_FEATURE(fs, EXT2F_INCOMPAT_FLEX_BG)) first_block += EXT2_BLOCKS_PER_GROUP(fs); } return (0); } /* * This computes the fields of the m_ext2fs structure from the * data in the ext2fs structure read in. */ static int ext2_compute_sb_data(struct vnode *devvp, struct ext2fs *es, struct m_ext2fs *fs) { struct buf *bp; uint32_t e2fs_descpb, e2fs_gdbcount_alloc; int i, j; int g_count = 0; int error; /* Check checksum features */ if (EXT2_HAS_RO_COMPAT_FEATURE(fs, EXT2F_ROCOMPAT_GDT_CSUM) && EXT2_HAS_RO_COMPAT_FEATURE(fs, EXT2F_ROCOMPAT_METADATA_CKSUM)) { SDT_PROBE1(ext2fs, , vfsops, ext2_compute_sb_data_error, "incorrect checksum features combination"); return (EINVAL); } /* Precompute checksum seed for all metadata */ ext2_sb_csum_set_seed(fs); /* Verify sb csum if possible */ if (EXT2_HAS_RO_COMPAT_FEATURE(fs, EXT2F_ROCOMPAT_METADATA_CKSUM)) { error = ext2_sb_csum_verify(fs); if (error) { return (error); } } /* Check for block size = 1K|2K|4K */ if (es->e2fs_log_bsize > 2) { SDT_PROBE1(ext2fs, , vfsops, ext2_compute_sb_data_error, "bad block size"); return (EINVAL); } fs->e2fs_bshift = EXT2_MIN_BLOCK_LOG_SIZE + es->e2fs_log_bsize; fs->e2fs_bsize = 1U << fs->e2fs_bshift; fs->e2fs_fsbtodb = es->e2fs_log_bsize + 1; fs->e2fs_qbmask = fs->e2fs_bsize - 1; /* Check for fragment size */ if (es->e2fs_log_fsize > (EXT2_MAX_FRAG_LOG_SIZE - EXT2_MIN_BLOCK_LOG_SIZE)) { SDT_PROBE1(ext2fs, , vfsops, ext2_compute_sb_data_error, "invalid log cluster size"); return (EINVAL); } fs->e2fs_fsize = EXT2_MIN_FRAG_SIZE << es->e2fs_log_fsize; if (fs->e2fs_fsize != fs->e2fs_bsize) { SDT_PROBE1(ext2fs, , vfsops, ext2_compute_sb_data_error, "fragment size != block size"); return (EINVAL); } fs->e2fs_fpb = fs->e2fs_bsize / fs->e2fs_fsize; /* Check reserved gdt blocks for future filesystem expansion */ if (es->e2fs_reserved_ngdb > (fs->e2fs_bsize / 4)) { SDT_PROBE1(ext2fs, , vfsops, ext2_compute_sb_data_error, "number of reserved GDT blocks too large"); return (EINVAL); } if (es->e2fs_rev == E2FS_REV0) { fs->e2fs_isize = E2FS_REV0_INODE_SIZE; } else { fs->e2fs_isize = es->e2fs_inode_size; /* * Check first ino. */ if (es->e2fs_first_ino < EXT2_FIRSTINO) { SDT_PROBE1(ext2fs, , vfsops, ext2_compute_sb_data_error, "invalid first ino"); return (EINVAL); } /* * Simple sanity check for superblock inode size value. */ if (EXT2_INODE_SIZE(fs) < E2FS_REV0_INODE_SIZE || EXT2_INODE_SIZE(fs) > fs->e2fs_bsize || (fs->e2fs_isize & (fs->e2fs_isize - 1)) != 0) { SDT_PROBE1(ext2fs, , vfsops, ext2_compute_sb_data_error, "invalid inode size"); return (EINVAL); } } /* Check group descriptors */ if (EXT2_HAS_INCOMPAT_FEATURE(fs, EXT2F_INCOMPAT_64BIT) && es->e3fs_desc_size != E2FS_64BIT_GD_SIZE) { SDT_PROBE1(ext2fs, , vfsops, ext2_compute_sb_data_error, "unsupported 64bit descriptor size"); return (EINVAL); } fs->e2fs_bpg = es->e2fs_bpg; fs->e2fs_fpg = es->e2fs_fpg; if (fs->e2fs_bpg == 0 || fs->e2fs_fpg == 0) { SDT_PROBE1(ext2fs, , vfsops, ext2_compute_sb_data_error, "zero blocks/fragments per group"); return (EINVAL); } else if (fs->e2fs_bpg != fs->e2fs_fpg) { SDT_PROBE1(ext2fs, , vfsops, ext2_compute_sb_data_error, "blocks per group not equal fragments per group"); return (EINVAL); } if (fs->e2fs_bpg != fs->e2fs_bsize * 8) { SDT_PROBE1(ext2fs, , vfsops, ext2_compute_sb_data_error, "non-standard group size unsupported"); return (EINVAL); } fs->e2fs_ipb = fs->e2fs_bsize / EXT2_INODE_SIZE(fs); if (fs->e2fs_ipb == 0 || fs->e2fs_ipb > fs->e2fs_bsize / E2FS_REV0_INODE_SIZE) { SDT_PROBE1(ext2fs, , vfsops, ext2_compute_sb_data_error, "bad inodes per block size"); return (EINVAL); } fs->e2fs_ipg = es->e2fs_ipg; if (fs->e2fs_ipg < fs->e2fs_ipb || fs->e2fs_ipg > fs->e2fs_bsize * 8) { SDT_PROBE1(ext2fs, , vfsops, ext2_compute_sb_data_error, "invalid inodes per group"); return (EINVAL); } fs->e2fs_itpg = fs->e2fs_ipg / fs->e2fs_ipb; fs->e2fs_bcount = es->e2fs_bcount; fs->e2fs_rbcount = es->e2fs_rbcount; fs->e2fs_fbcount = es->e2fs_fbcount; if (EXT2_HAS_INCOMPAT_FEATURE(fs, EXT2F_INCOMPAT_64BIT)) { fs->e2fs_bcount |= (uint64_t)(es->e4fs_bcount_hi) << 32; fs->e2fs_rbcount |= (uint64_t)(es->e4fs_rbcount_hi) << 32; fs->e2fs_fbcount |= (uint64_t)(es->e4fs_fbcount_hi) << 32; } if (fs->e2fs_rbcount > fs->e2fs_bcount || fs->e2fs_fbcount > fs->e2fs_bcount) { SDT_PROBE1(ext2fs, , vfsops, ext2_compute_sb_data_error, "invalid block count"); return (EINVAL); } if (es->e2fs_first_dblock >= fs->e2fs_bcount) { SDT_PROBE1(ext2fs, , vfsops, ext2_compute_sb_data_error, "first data block out of range"); return (EINVAL); } fs->e2fs_gcount = howmany(fs->e2fs_bcount - es->e2fs_first_dblock, EXT2_BLOCKS_PER_GROUP(fs)); if (fs->e2fs_gcount > ((uint64_t)1 << 32) - EXT2_DESCS_PER_BLOCK(fs)) { SDT_PROBE1(ext2fs, , vfsops, ext2_compute_sb_data_error, "groups count too large"); return (EINVAL); } /* Check for extra isize in big inodes. */ if (EXT2_HAS_RO_COMPAT_FEATURE(fs, EXT2F_ROCOMPAT_EXTRA_ISIZE) && EXT2_INODE_SIZE(fs) < sizeof(struct ext2fs_dinode)) { SDT_PROBE1(ext2fs, , vfsops, ext2_compute_sb_data_error, "no space for extra inode timestamps"); return (EINVAL); } /* s_resuid / s_resgid ? */ if (EXT2_HAS_INCOMPAT_FEATURE(fs, EXT2F_INCOMPAT_64BIT)) { e2fs_descpb = fs->e2fs_bsize / E2FS_64BIT_GD_SIZE; e2fs_gdbcount_alloc = howmany(fs->e2fs_gcount, e2fs_descpb); } else { e2fs_descpb = fs->e2fs_bsize / E2FS_REV0_GD_SIZE; e2fs_gdbcount_alloc = howmany(fs->e2fs_gcount, fs->e2fs_bsize / sizeof(struct ext2_gd)); } fs->e2fs_gdbcount = howmany(fs->e2fs_gcount, e2fs_descpb); fs->e2fs_gd = malloc(e2fs_gdbcount_alloc * fs->e2fs_bsize, M_EXT2MNT, M_WAITOK | M_ZERO); fs->e2fs_contigdirs = malloc(fs->e2fs_gcount * sizeof(*fs->e2fs_contigdirs), M_EXT2MNT, M_WAITOK | M_ZERO); for (i = 0; i < fs->e2fs_gdbcount; i++) { error = bread(devvp, fsbtodb(fs, ext2_cg_location(fs, i)), fs->e2fs_bsize, NOCRED, &bp); if (error) { /* * fs->e2fs_gd and fs->e2fs_contigdirs * will be freed later by the caller, * because this function could be called from * MNT_UPDATE path. */ - brelse(bp); return (error); } if (EXT2_HAS_INCOMPAT_FEATURE(fs, EXT2F_INCOMPAT_64BIT)) { memcpy(&fs->e2fs_gd[ i * fs->e2fs_bsize / sizeof(struct ext2_gd)], bp->b_data, fs->e2fs_bsize); } else { for (j = 0; j < e2fs_descpb && g_count < fs->e2fs_gcount; j++, g_count++) memcpy(&fs->e2fs_gd[g_count], bp->b_data + j * E2FS_REV0_GD_SIZE, E2FS_REV0_GD_SIZE); } brelse(bp); bp = NULL; } /* Validate cgs consistency */ error = ext2_cg_validate(fs); if (error) return (error); /* Verfy cgs csum */ if (EXT2_HAS_RO_COMPAT_FEATURE(fs, EXT2F_ROCOMPAT_GDT_CSUM) || EXT2_HAS_RO_COMPAT_FEATURE(fs, EXT2F_ROCOMPAT_METADATA_CKSUM)) { error = ext2_gd_csum_verify(fs, devvp->v_rdev); if (error) return (error); } /* Initialization for the ext2 Orlov allocator variant. */ fs->e2fs_total_dir = 0; for (i = 0; i < fs->e2fs_gcount; i++) fs->e2fs_total_dir += e2fs_gd_get_ndirs(&fs->e2fs_gd[i]); if (es->e2fs_rev == E2FS_REV0 || !EXT2_HAS_RO_COMPAT_FEATURE(fs, EXT2F_ROCOMPAT_LARGEFILE)) fs->e2fs_maxfilesize = 0x7fffffff; else { fs->e2fs_maxfilesize = 0xffffffffffff; if (EXT2_HAS_RO_COMPAT_FEATURE(fs, EXT2F_ROCOMPAT_HUGE_FILE)) fs->e2fs_maxfilesize = 0x7fffffffffffffff; } if (es->e4fs_flags & E2FS_UNSIGNED_HASH) { fs->e2fs_uhash = 3; } else if ((es->e4fs_flags & E2FS_SIGNED_HASH) == 0) { #ifdef __CHAR_UNSIGNED__ es->e4fs_flags |= E2FS_UNSIGNED_HASH; fs->e2fs_uhash = 3; #else es->e4fs_flags |= E2FS_SIGNED_HASH; #endif } if (EXT2_HAS_RO_COMPAT_FEATURE(fs, EXT2F_ROCOMPAT_METADATA_CKSUM)) error = ext2_sb_csum_verify(fs); return (error); } /* * Reload all incore data for a filesystem (used after running fsck on * the root filesystem and finding things to fix). The filesystem must * be mounted read-only. * * Things to do to update the mount: * 1) invalidate all cached meta-data. * 2) re-read superblock from disk. * 3) invalidate all cluster summary information. * 4) invalidate all inactive vnodes. * 5) invalidate all cached file data. * 6) re-read inode data for all active vnodes. * XXX we are missing some steps, in particular # 3, this has to be reviewed. */ static int ext2_reload(struct mount *mp, struct thread *td) { struct vnode *vp, *mvp, *devvp; struct inode *ip; struct buf *bp; struct ext2fs *es; struct m_ext2fs *fs; struct csum *sump; int error, i; int32_t *lp; if ((mp->mnt_flag & MNT_RDONLY) == 0) return (EINVAL); /* * Step 1: invalidate all cached meta-data. */ devvp = VFSTOEXT2(mp)->um_devvp; vn_lock(devvp, LK_EXCLUSIVE | LK_RETRY); if (vinvalbuf(devvp, 0, 0, 0) != 0) panic("ext2_reload: dirty1"); VOP_UNLOCK(devvp, 0); /* * Step 2: re-read superblock from disk. * constants have been adjusted for ext2 */ if ((error = bread(devvp, SBLOCK, SBSIZE, NOCRED, &bp)) != 0) return (error); es = (struct ext2fs *)bp->b_data; if (ext2_check_sb_compat(es, devvp->v_rdev, 0) != 0) { brelse(bp); return (EIO); /* XXX needs translation */ } fs = VFSTOEXT2(mp)->um_e2fs; bcopy(bp->b_data, fs->e2fs, sizeof(struct ext2fs)); if ((error = ext2_compute_sb_data(devvp, es, fs)) != 0) { brelse(bp); return (error); } #ifdef UNKLAR if (fs->fs_sbsize < SBSIZE) bp->b_flags |= B_INVAL; #endif brelse(bp); /* * Step 3: invalidate all cluster summary information. */ if (fs->e2fs_contigsumsize > 0) { lp = fs->e2fs_maxcluster; sump = fs->e2fs_clustersum; for (i = 0; i < fs->e2fs_gcount; i++, sump++) { *lp++ = fs->e2fs_contigsumsize; sump->cs_init = 0; bzero(sump->cs_sum, fs->e2fs_contigsumsize + 1); } } loop: MNT_VNODE_FOREACH_ALL(vp, mp, mvp) { /* * Step 4: invalidate all cached file data. */ if (vget(vp, LK_EXCLUSIVE | LK_INTERLOCK, td)) { MNT_VNODE_FOREACH_ALL_ABORT(mp, mvp); goto loop; } if (vinvalbuf(vp, 0, 0, 0)) panic("ext2_reload: dirty2"); /* * Step 5: re-read inode data for all active vnodes. */ ip = VTOI(vp); error = bread(devvp, fsbtodb(fs, ino_to_fsba(fs, ip->i_number)), (int)fs->e2fs_bsize, NOCRED, &bp); if (error) { VOP_UNLOCK(vp, 0); vrele(vp); MNT_VNODE_FOREACH_ALL_ABORT(mp, mvp); return (error); } error = ext2_ei2i((struct ext2fs_dinode *)((char *)bp->b_data + EXT2_INODE_SIZE(fs) * ino_to_fsbo(fs, ip->i_number)), ip); brelse(bp); VOP_UNLOCK(vp, 0); vrele(vp); if (error) { MNT_VNODE_FOREACH_ALL_ABORT(mp, mvp); return (error); } } return (0); } /* * Common code for mount and mountroot. */ static int ext2_mountfs(struct vnode *devvp, struct mount *mp) { struct ext2mount *ump; struct buf *bp; struct m_ext2fs *fs; struct ext2fs *es; struct cdev *dev = devvp->v_rdev; struct g_consumer *cp; struct bufobj *bo; struct csum *sump; int error; int ronly; int i; u_long size; int32_t *lp; int32_t e2fs_maxcontig; ronly = vfs_flagopt(mp->mnt_optnew, "ro", NULL, 0); /* XXX: use VOP_ACESS to check FS perms */ g_topology_lock(); error = g_vfs_open(devvp, &cp, "ext2fs", ronly ? 0 : 1); g_topology_unlock(); VOP_UNLOCK(devvp, 0); if (error) return (error); /* XXX: should we check for some sectorsize or 512 instead? */ if (((SBSIZE % cp->provider->sectorsize) != 0) || (SBSIZE < cp->provider->sectorsize)) { g_topology_lock(); g_vfs_close(cp); g_topology_unlock(); return (EINVAL); } bo = &devvp->v_bufobj; bo->bo_private = cp; bo->bo_ops = g_vfs_bufops; if (devvp->v_rdev->si_iosize_max != 0) mp->mnt_iosize_max = devvp->v_rdev->si_iosize_max; if (mp->mnt_iosize_max > MAXPHYS) mp->mnt_iosize_max = MAXPHYS; bp = NULL; ump = NULL; if ((error = bread(devvp, SBLOCK, SBSIZE, NOCRED, &bp)) != 0) goto out; es = (struct ext2fs *)bp->b_data; if (ext2_check_sb_compat(es, dev, ronly) != 0) { error = EINVAL; /* XXX needs translation */ goto out; } if ((es->e2fs_state & E2FS_ISCLEAN) == 0 || (es->e2fs_state & E2FS_ERRORS)) { if (ronly || (mp->mnt_flag & MNT_FORCE)) { printf( "WARNING: Filesystem was not properly dismounted\n"); } else { printf( "WARNING: R/W mount denied. Filesystem is not clean - run fsck\n"); error = EPERM; goto out; } } ump = malloc(sizeof(*ump), M_EXT2MNT, M_WAITOK | M_ZERO); /* * I don't know whether this is the right strategy. Note that * we dynamically allocate both an m_ext2fs and an ext2fs * while Linux keeps the super block in a locked buffer. */ ump->um_e2fs = malloc(sizeof(struct m_ext2fs), M_EXT2MNT, M_WAITOK | M_ZERO); ump->um_e2fs->e2fs = malloc(sizeof(struct ext2fs), M_EXT2MNT, M_WAITOK); mtx_init(EXT2_MTX(ump), "EXT2FS", "EXT2FS Lock", MTX_DEF); bcopy(es, ump->um_e2fs->e2fs, (u_int)sizeof(struct ext2fs)); if ((error = ext2_compute_sb_data(devvp, ump->um_e2fs->e2fs, ump->um_e2fs))) goto out; /* * Calculate the maximum contiguous blocks and size of cluster summary * array. In FFS this is done by newfs; however, the superblock * in ext2fs doesn't have these variables, so we can calculate * them here. */ e2fs_maxcontig = MAX(1, MAXPHYS / ump->um_e2fs->e2fs_bsize); ump->um_e2fs->e2fs_contigsumsize = MIN(e2fs_maxcontig, EXT2_MAXCONTIG); if (ump->um_e2fs->e2fs_contigsumsize > 0) { size = ump->um_e2fs->e2fs_gcount * sizeof(int32_t); ump->um_e2fs->e2fs_maxcluster = malloc(size, M_EXT2MNT, M_WAITOK); size = ump->um_e2fs->e2fs_gcount * sizeof(struct csum); ump->um_e2fs->e2fs_clustersum = malloc(size, M_EXT2MNT, M_WAITOK); lp = ump->um_e2fs->e2fs_maxcluster; sump = ump->um_e2fs->e2fs_clustersum; for (i = 0; i < ump->um_e2fs->e2fs_gcount; i++, sump++) { *lp++ = ump->um_e2fs->e2fs_contigsumsize; sump->cs_init = 0; sump->cs_sum = malloc((ump->um_e2fs->e2fs_contigsumsize + 1) * sizeof(int32_t), M_EXT2MNT, M_WAITOK | M_ZERO); } } brelse(bp); bp = NULL; fs = ump->um_e2fs; fs->e2fs_ronly = ronly; /* ronly is set according to mnt_flags */ /* * If the fs is not mounted read-only, make sure the super block is * always written back on a sync(). */ fs->e2fs_wasvalid = fs->e2fs->e2fs_state & E2FS_ISCLEAN ? 1 : 0; if (ronly == 0) { fs->e2fs_fmod = 1; /* mark it modified */ fs->e2fs->e2fs_state &= ~E2FS_ISCLEAN; /* set fs invalid */ } mp->mnt_data = ump; mp->mnt_stat.f_fsid.val[0] = dev2udev(dev); mp->mnt_stat.f_fsid.val[1] = mp->mnt_vfc->vfc_typenum; mp->mnt_maxsymlinklen = EXT2_MAXSYMLINKLEN; MNT_ILOCK(mp); mp->mnt_flag |= MNT_LOCAL; MNT_IUNLOCK(mp); ump->um_mountp = mp; ump->um_dev = dev; ump->um_devvp = devvp; ump->um_bo = &devvp->v_bufobj; ump->um_cp = cp; /* * Setting those two parameters allowed us to use * ufs_bmap w/o changse! */ ump->um_nindir = EXT2_ADDR_PER_BLOCK(fs); ump->um_bptrtodb = fs->e2fs->e2fs_log_bsize + 1; ump->um_seqinc = EXT2_FRAGS_PER_BLOCK(fs); if (ronly == 0) ext2_sbupdate(ump, MNT_WAIT); /* * Initialize filesystem stat information in mount struct. */ MNT_ILOCK(mp); mp->mnt_kern_flag |= MNTK_LOOKUP_SHARED | MNTK_EXTENDED_SHARED | MNTK_USES_BCACHE; MNT_IUNLOCK(mp); return (0); out: if (bp) brelse(bp); if (cp != NULL) { g_topology_lock(); g_vfs_close(cp); g_topology_unlock(); } if (ump) { mtx_destroy(EXT2_MTX(ump)); free(ump->um_e2fs->e2fs_gd, M_EXT2MNT); free(ump->um_e2fs->e2fs_contigdirs, M_EXT2MNT); free(ump->um_e2fs->e2fs, M_EXT2MNT); free(ump->um_e2fs, M_EXT2MNT); free(ump, M_EXT2MNT); mp->mnt_data = NULL; } return (error); } /* * Unmount system call. */ static int ext2_unmount(struct mount *mp, int mntflags) { struct ext2mount *ump; struct m_ext2fs *fs; struct csum *sump; int error, flags, i, ronly; flags = 0; if (mntflags & MNT_FORCE) { if (mp->mnt_flag & MNT_ROOTFS) return (EINVAL); flags |= FORCECLOSE; } if ((error = ext2_flushfiles(mp, flags, curthread)) != 0) return (error); ump = VFSTOEXT2(mp); fs = ump->um_e2fs; ronly = fs->e2fs_ronly; if (ronly == 0 && ext2_cgupdate(ump, MNT_WAIT) == 0) { if (fs->e2fs_wasvalid) fs->e2fs->e2fs_state |= E2FS_ISCLEAN; ext2_sbupdate(ump, MNT_WAIT); } g_topology_lock(); g_vfs_close(ump->um_cp); g_topology_unlock(); vrele(ump->um_devvp); sump = fs->e2fs_clustersum; for (i = 0; i < fs->e2fs_gcount; i++, sump++) free(sump->cs_sum, M_EXT2MNT); free(fs->e2fs_clustersum, M_EXT2MNT); free(fs->e2fs_maxcluster, M_EXT2MNT); free(fs->e2fs_gd, M_EXT2MNT); free(fs->e2fs_contigdirs, M_EXT2MNT); free(fs->e2fs, M_EXT2MNT); free(fs, M_EXT2MNT); free(ump, M_EXT2MNT); mp->mnt_data = NULL; MNT_ILOCK(mp); mp->mnt_flag &= ~MNT_LOCAL; MNT_IUNLOCK(mp); return (error); } /* * Flush out all the files in a filesystem. */ static int ext2_flushfiles(struct mount *mp, int flags, struct thread *td) { int error; error = vflush(mp, 0, flags, td); return (error); } /* * Get filesystem statistics. */ int ext2_statfs(struct mount *mp, struct statfs *sbp) { struct ext2mount *ump; struct m_ext2fs *fs; uint32_t overhead, overhead_per_group, ngdb; int i, ngroups; ump = VFSTOEXT2(mp); fs = ump->um_e2fs; if (fs->e2fs->e2fs_magic != E2FS_MAGIC) panic("ext2_statfs"); /* * Compute the overhead (FS structures) */ overhead_per_group = 1 /* block bitmap */ + 1 /* inode bitmap */ + fs->e2fs_itpg; overhead = fs->e2fs->e2fs_first_dblock + fs->e2fs_gcount * overhead_per_group; if (fs->e2fs->e2fs_rev > E2FS_REV0 && fs->e2fs->e2fs_features_rocompat & EXT2F_ROCOMPAT_SPARSESUPER) { for (i = 0, ngroups = 0; i < fs->e2fs_gcount; i++) { if (ext2_cg_has_sb(fs, i)) ngroups++; } } else { ngroups = fs->e2fs_gcount; } ngdb = fs->e2fs_gdbcount; if (fs->e2fs->e2fs_rev > E2FS_REV0 && fs->e2fs->e2fs_features_compat & EXT2F_COMPAT_RESIZE) ngdb += fs->e2fs->e2fs_reserved_ngdb; overhead += ngroups * (1 /* superblock */ + ngdb); sbp->f_bsize = EXT2_FRAG_SIZE(fs); sbp->f_iosize = EXT2_BLOCK_SIZE(fs); sbp->f_blocks = fs->e2fs_bcount - overhead; sbp->f_bfree = fs->e2fs_fbcount; sbp->f_bavail = sbp->f_bfree - fs->e2fs_rbcount; sbp->f_files = fs->e2fs->e2fs_icount; sbp->f_ffree = fs->e2fs->e2fs_ficount; return (0); } /* * Go through the disk queues to initiate sandbagged IO; * go through the inodes to write those that have been modified; * initiate the writing of the super block if it has been modified. * * Note: we are always called with the filesystem marked `MPBUSY'. */ static int ext2_sync(struct mount *mp, int waitfor) { struct vnode *mvp, *vp; struct thread *td; struct inode *ip; struct ext2mount *ump = VFSTOEXT2(mp); struct m_ext2fs *fs; int error, allerror = 0; td = curthread; fs = ump->um_e2fs; if (fs->e2fs_fmod != 0 && fs->e2fs_ronly != 0) { /* XXX */ panic("ext2_sync: rofs mod fs=%s", fs->e2fs_fsmnt); } /* * Write back each (modified) inode. */ loop: MNT_VNODE_FOREACH_ALL(vp, mp, mvp) { if (vp->v_type == VNON) { VI_UNLOCK(vp); continue; } ip = VTOI(vp); if ((ip->i_flag & (IN_ACCESS | IN_CHANGE | IN_MODIFIED | IN_UPDATE)) == 0 && (vp->v_bufobj.bo_dirty.bv_cnt == 0 || waitfor == MNT_LAZY)) { VI_UNLOCK(vp); continue; } error = vget(vp, LK_EXCLUSIVE | LK_NOWAIT | LK_INTERLOCK, td); if (error) { if (error == ENOENT) { MNT_VNODE_FOREACH_ALL_ABORT(mp, mvp); goto loop; } continue; } if ((error = VOP_FSYNC(vp, waitfor, td)) != 0) allerror = error; VOP_UNLOCK(vp, 0); vrele(vp); } /* * Force stale filesystem control information to be flushed. */ if (waitfor != MNT_LAZY) { vn_lock(ump->um_devvp, LK_EXCLUSIVE | LK_RETRY); if ((error = VOP_FSYNC(ump->um_devvp, waitfor, td)) != 0) allerror = error; VOP_UNLOCK(ump->um_devvp, 0); } /* * Write back modified superblock. */ if (fs->e2fs_fmod != 0) { fs->e2fs_fmod = 0; fs->e2fs->e2fs_wtime = time_second; if ((error = ext2_cgupdate(ump, waitfor)) != 0) allerror = error; } return (allerror); } /* * Look up an EXT2FS dinode number to find its incore vnode, otherwise read it * in from disk. If it is in core, wait for the lock bit to clear, then * return the inode locked. Detection and handling of mount points must be * done by the calling routine. */ static int ext2_vget(struct mount *mp, ino_t ino, int flags, struct vnode **vpp) { struct m_ext2fs *fs; struct inode *ip; struct ext2mount *ump; struct buf *bp; struct vnode *vp; struct thread *td; unsigned int i, used_blocks; int error; td = curthread; error = vfs_hash_get(mp, ino, flags, td, vpp, NULL, NULL); if (error || *vpp != NULL) return (error); ump = VFSTOEXT2(mp); ip = malloc(sizeof(struct inode), M_EXT2NODE, M_WAITOK | M_ZERO); /* Allocate a new vnode/inode. */ if ((error = getnewvnode("ext2fs", mp, &ext2_vnodeops, &vp)) != 0) { *vpp = NULL; free(ip, M_EXT2NODE); return (error); } vp->v_data = ip; ip->i_vnode = vp; ip->i_e2fs = fs = ump->um_e2fs; ip->i_ump = ump; ip->i_number = ino; lockmgr(vp->v_vnlock, LK_EXCLUSIVE, NULL); error = insmntque(vp, mp); if (error != 0) { free(ip, M_EXT2NODE); *vpp = NULL; return (error); } error = vfs_hash_insert(vp, ino, flags, td, vpp, NULL, NULL); if (error || *vpp != NULL) return (error); /* Read in the disk contents for the inode, copy into the inode. */ if ((error = bread(ump->um_devvp, fsbtodb(fs, ino_to_fsba(fs, ino)), (int)fs->e2fs_bsize, NOCRED, &bp)) != 0) { /* * The inode does not contain anything useful, so it would * be misleading to leave it on its hash chain. With mode * still zero, it will be unlinked and returned to the free * list by vput(). */ brelse(bp); vput(vp); *vpp = NULL; return (error); } /* convert ext2 inode to dinode */ error = ext2_ei2i((struct ext2fs_dinode *)((char *)bp->b_data + EXT2_INODE_SIZE(fs) * ino_to_fsbo(fs, ino)), ip); if (error) { brelse(bp); vput(vp); *vpp = NULL; return (error); } ip->i_block_group = ino_to_cg(fs, ino); ip->i_next_alloc_block = 0; ip->i_next_alloc_goal = 0; /* * Now we want to make sure that block pointers for unused * blocks are zeroed out - ext2_balloc depends on this * although for regular files and directories only * * If IN_E4EXTENTS is enabled, unused blocks are not zeroed * out because we could corrupt the extent tree. */ if (!(ip->i_flag & IN_E4EXTENTS) && (S_ISDIR(ip->i_mode) || S_ISREG(ip->i_mode))) { used_blocks = howmany(ip->i_size, fs->e2fs_bsize); for (i = used_blocks; i < EXT2_NDIR_BLOCKS; i++) ip->i_db[i] = 0; } #ifdef EXT2FS_PRINT_EXTENTS ext2_print_inode(ip); ext4_ext_print_extent_tree_status(ip); #endif bqrelse(bp); /* * Initialize the vnode from the inode, check for aliases. * Note that the underlying vnode may have changed. */ if ((error = ext2_vinit(mp, &ext2_fifoops, &vp)) != 0) { vput(vp); *vpp = NULL; return (error); } /* * Finish inode initialization. */ *vpp = vp; return (0); } /* * File handle to vnode * * Have to be really careful about stale file handles: * - check that the inode number is valid * - call ext2_vget() to get the locked inode * - check for an unallocated inode (i_mode == 0) * - check that the given client host has export rights and return * those rights via. exflagsp and credanonp */ static int ext2_fhtovp(struct mount *mp, struct fid *fhp, int flags, struct vnode **vpp) { struct inode *ip; struct ufid *ufhp; struct vnode *nvp; struct m_ext2fs *fs; int error; ufhp = (struct ufid *)fhp; fs = VFSTOEXT2(mp)->um_e2fs; if (ufhp->ufid_ino < EXT2_ROOTINO || ufhp->ufid_ino > fs->e2fs_gcount * fs->e2fs->e2fs_ipg) return (ESTALE); error = VFS_VGET(mp, ufhp->ufid_ino, LK_EXCLUSIVE, &nvp); if (error) { *vpp = NULLVP; return (error); } ip = VTOI(nvp); if (ip->i_mode == 0 || ip->i_gen != ufhp->ufid_gen || ip->i_nlink <= 0) { vput(nvp); *vpp = NULLVP; return (ESTALE); } *vpp = nvp; vnode_create_vobject(*vpp, 0, curthread); return (0); } /* * Write a superblock and associated information back to disk. */ static int ext2_sbupdate(struct ext2mount *mp, int waitfor) { struct m_ext2fs *fs = mp->um_e2fs; struct ext2fs *es = fs->e2fs; struct buf *bp; int error = 0; es->e2fs_bcount = fs->e2fs_bcount & 0xffffffff; es->e2fs_rbcount = fs->e2fs_rbcount & 0xffffffff; es->e2fs_fbcount = fs->e2fs_fbcount & 0xffffffff; if (EXT2_HAS_INCOMPAT_FEATURE(fs, EXT2F_INCOMPAT_64BIT)) { es->e4fs_bcount_hi = fs->e2fs_bcount >> 32; es->e4fs_rbcount_hi = fs->e2fs_rbcount >> 32; es->e4fs_fbcount_hi = fs->e2fs_fbcount >> 32; } if (EXT2_HAS_RO_COMPAT_FEATURE(fs, EXT2F_ROCOMPAT_METADATA_CKSUM)) ext2_sb_csum_set(fs); bp = getblk(mp->um_devvp, SBLOCK, SBSIZE, 0, 0, 0); bcopy((caddr_t)es, bp->b_data, (u_int)sizeof(struct ext2fs)); if (waitfor == MNT_WAIT) error = bwrite(bp); else bawrite(bp); /* * The buffers for group descriptors, inode bitmaps and block bitmaps * are not busy at this point and are (hopefully) written by the * usual sync mechanism. No need to write them here. */ return (error); } int ext2_cgupdate(struct ext2mount *mp, int waitfor) { struct m_ext2fs *fs = mp->um_e2fs; struct buf *bp; int i, j, g_count = 0, error = 0, allerror = 0; allerror = ext2_sbupdate(mp, waitfor); /* Update gd csums */ if (EXT2_HAS_RO_COMPAT_FEATURE(fs, EXT2F_ROCOMPAT_GDT_CSUM) || EXT2_HAS_RO_COMPAT_FEATURE(fs, EXT2F_ROCOMPAT_METADATA_CKSUM)) ext2_gd_csum_set(fs); for (i = 0; i < fs->e2fs_gdbcount; i++) { bp = getblk(mp->um_devvp, fsbtodb(fs, ext2_cg_location(fs, i)), fs->e2fs_bsize, 0, 0, 0); if (EXT2_HAS_INCOMPAT_FEATURE(fs, EXT2F_INCOMPAT_64BIT)) { memcpy(bp->b_data, &fs->e2fs_gd[ i * fs->e2fs_bsize / sizeof(struct ext2_gd)], fs->e2fs_bsize); } else { for (j = 0; j < fs->e2fs_bsize / E2FS_REV0_GD_SIZE && g_count < fs->e2fs_gcount; j++, g_count++) memcpy(bp->b_data + j * E2FS_REV0_GD_SIZE, &fs->e2fs_gd[g_count], E2FS_REV0_GD_SIZE); } if (waitfor == MNT_WAIT) error = bwrite(bp); else bawrite(bp); } if (!allerror && error) allerror = error; return (allerror); } /* * Return the root of a filesystem. */ static int ext2_root(struct mount *mp, int flags, struct vnode **vpp) { struct vnode *nvp; int error; error = VFS_VGET(mp, EXT2_ROOTINO, LK_EXCLUSIVE, &nvp); if (error) return (error); *vpp = nvp; return (0); }