Index: sys/fs/ext2fs/ext2_alloc.c =================================================================== --- sys/fs/ext2fs/ext2_alloc.c +++ sys/fs/ext2fs/ext2_alloc.c @@ -56,7 +56,6 @@ static daddr_t ext2_alloccg(struct inode *, int, daddr_t, int); static daddr_t ext2_clusteralloc(struct inode *, int, daddr_t, int); static u_long ext2_dirpref(struct inode *); -static void ext2_fserr(struct m_ext2fs *, uid_t, char *); static u_long ext2_hashalloc(struct inode *, int, long, int, daddr_t (*)(struct inode *, int, daddr_t, int)); @@ -136,19 +135,20 @@ * Allocate EA's block for inode. */ daddr_t -ext2_allocfacl(struct inode *ip) +ext2_allocmeta(struct inode *ip) { struct m_ext2fs *fs; - daddr_t facl; + daddr_t blk; fs = ip->i_e2fs; EXT2_LOCK(ip->i_ump); - facl = ext2_alloccg(ip, ino_to_cg(fs, ip->i_number), 0, fs->e2fs_bsize); - if (0 == facl) + blk = ext2_hashalloc(ip, ino_to_cg(fs, ip->i_number), 0, fs->e2fs_bsize, + ext2_alloccg); + if (0 == blk) EXT2_UNLOCK(ip->i_ump); - return (facl); + return (blk); } /* @@ -201,7 +201,7 @@ fs = ip->i_e2fs; ump = ip->i_ump; - if (fs->e2fs_contigsumsize <= 0) + if (fs->e2fs_contigsumsize <= 0 || ip->i_flag & IN_E4EXTENTS) return (ENOSPC); buflist = ap->a_buflist; @@ -376,7 +376,7 @@ struct inode *ip; struct ext2mount *ump; ino_t ino, ipref; - int i, error, cg; + int error, cg; *vpp = NULL; pip = VTOI(pvp); @@ -422,11 +422,11 @@ ip->i_blocks = 0; ip->i_mode = 0; ip->i_flags = 0; - /* now we want to make sure that the block pointers are zeroed out */ - for (i = 0; i < EXT2_NDADDR; i++) - ip->i_db[i] = 0; - for (i = 0; i < EXT2_NIADDR; i++) - ip->i_ib[i] = 0; + if (EXT2_HAS_INCOMPAT_FEATURE(fs, EXT2F_INCOMPAT_EXTENTS) + && (S_ISREG(mode) || S_ISDIR(mode))) + ext4_ext_tree_init(ip); + else + memset(ip->i_data, 0, sizeof(ip->i_data)); /* * Set up a new generation number for this inode. @@ -576,8 +576,11 @@ ext2_blkpref(struct inode *ip, e2fs_lbn_t lbn, int indx, e2fs_daddr_t *bap, e2fs_daddr_t blocknr) { + struct m_ext2fs *fs; int tmp; + fs = ip->i_e2fs; + mtx_assert(EXT2_MTX(ip->i_ump), MA_OWNED); /* @@ -600,10 +603,9 @@ * Else lets fall back to the blocknr or, if there is none, follow * the rule that a block should be allocated near its inode. */ - return blocknr ? blocknr : + return (blocknr ? blocknr : (e2fs_daddr_t)(ip->i_block_group * - EXT2_BLOCKS_PER_GROUP(ip->i_e2fs)) + - ip->i_e2fs->e2fs->e2fs_first_dblock; + EXT2_BLOCKS_PER_GROUP(fs)) + fs->e2fs->e2fs_first_dblock); } /* @@ -1303,7 +1305,7 @@ * The form of the error message is: * fs: error message */ -static void +void ext2_fserr(struct m_ext2fs *fs, uid_t uid, char *cp) { Index: sys/fs/ext2fs/ext2_balloc.c =================================================================== --- sys/fs/ext2fs/ext2_balloc.c +++ sys/fs/ext2fs/ext2_balloc.c @@ -51,6 +51,76 @@ #include #include +static int +ext2_ext_balloc(struct inode *ip, uint32_t lbn, int size, + struct ucred *cred, struct buf **bpp, int flags) +{ + struct m_ext2fs *fs; + struct buf *bp = NULL; + struct vnode *vp = ITOV(ip); + uint32_t nb; + int osize, nsize, blks, error, allocated; + + fs = ip->i_e2fs; + blks = howmany(size, fs->e2fs_bsize); + + error = ext4_ext_get_blocks(ip, lbn, blks, cred, NULL, &allocated, &nb); + if (error) + return (error); + + if (allocated) { + if (ip->i_size < (lbn + 1) * fs->e2fs_bsize) + nsize = fragroundup(fs, size); + else + nsize = fs->e2fs_bsize; + + bp = getblk(vp, lbn, nsize, 0, 0, 0); + if(!bp) + return (EIO); + + bp->b_blkno = fsbtodb(fs, nb); + if (flags & BA_CLRBUF) + vfs_bio_clrbuf(bp); + } else { + if (ip->i_size >= (lbn + 1) * fs->e2fs_bsize) { + + error = bread(vp, lbn, fs->e2fs_bsize, NOCRED, &bp); + if (error) { + brelse(bp); + return (error); + } + bp->b_blkno = fsbtodb(fs, nb); + *bpp = bp; + return (0); + } + + /* + * Consider need to reallocate a fragment. + */ + osize = fragroundup(fs, blkoff(fs, ip->i_size)); + nsize = fragroundup(fs, size); + if (nsize <= osize) { + error = bread(vp, lbn, osize, NOCRED, &bp); + if (error) { + brelse(bp); + return (error); + } + bp->b_blkno = fsbtodb(fs, nb); + } else { + error = bread(vp, lbn, fs->e2fs_bsize, NOCRED, &bp); + if (error) { + brelse(bp); + return (error); + } + bp->b_blkno = fsbtodb(fs, nb); + } + } + + *bpp = bp; + + return (error); +} + /* * Balloc defines the structure of filesystem storage * by allocating the physical blocks on a device given @@ -84,6 +154,10 @@ ip->i_next_alloc_block++; ip->i_next_alloc_goal++; } + + if (ip->i_flag & IN_E4EXTENTS) + return (ext2_ext_balloc(ip, lbn, size, cred, bpp, flags)); + /* * The first EXT2_NDADDR blocks are direct blocks */ Index: sys/fs/ext2fs/ext2_bmap.c =================================================================== --- sys/fs/ext2fs/ext2_bmap.c +++ sys/fs/ext2fs/ext2_bmap.c @@ -53,8 +53,6 @@ #include #include -static int ext4_bmapext(struct vnode *, int32_t, int64_t *, int *, int *); - /* * Bmap converts the logical block number of a file to its physical block * number on the disk. The conversion is done by using the logical block @@ -89,55 +87,52 @@ * Convert the logical block number of a file to its physical block number * on the disk within ext4 extents. */ -static int +int ext4_bmapext(struct vnode *vp, int32_t bn, int64_t *bnp, int *runp, int *runb) { struct inode *ip; struct m_ext2fs *fs; + struct ext4_extent_header *ehp; struct ext4_extent *ep; - struct ext4_extent_path path = {.ep_bp = NULL}; + struct ext4_extent_path *path = NULL; daddr_t lbn; - int error; + int error, depth; ip = VTOI(vp); fs = ip->i_e2fs; lbn = bn; + ehp = (struct ext4_extent_header *)ip->i_data; + depth = ehp->eh_depth; + *bnp = -1; if (runp != NULL) *runp = 0; if (runb != NULL) *runb = 0; - error = 0; - - ext4_ext_find_extent(fs, ip, lbn, &path); - if (path.ep_is_sparse) { - *bnp = -1; - if (runp != NULL) - *runp = path.ep_sparse_ext.e_len - - (lbn - path.ep_sparse_ext.e_blk) - 1; - if (runb != NULL) - *runb = lbn - path.ep_sparse_ext.e_blk; - } else { - if (path.ep_ext == NULL) { - error = EIO; - goto out; - } - ep = path.ep_ext; - *bnp = fsbtodb(fs, lbn - ep->e_blk + - (ep->e_start_lo | (daddr_t)ep->e_start_hi << 32)); - if (*bnp == 0) - *bnp = -1; + error = ext4_ext_find_extent(ip, lbn, &path); + if (error) + return (error); - if (runp != NULL) - *runp = ep->e_len - (lbn - ep->e_blk) - 1; - if (runb != NULL) - *runb = lbn - ep->e_blk; + ep = path[depth].ep_ext; + if(ep) { + if (lbn < ep->e_blk) { + if (runp != NULL) + *runp = ep->e_blk - lbn - 1; + } else if (ep->e_blk <= lbn && lbn < ep->e_blk + ep->e_len) { + *bnp = fsbtodb(fs, lbn - ep->e_blk + + (ep->e_start_lo | (daddr_t)ep->e_start_hi << 32)); + if (runp != NULL) + *runp = ep->e_len - (lbn - ep->e_blk) - 1; + if (runb != NULL) + *runb = lbn - ep->e_blk; + } else { + if (runb != NULL) + *runb = ep->e_blk + lbn - ep->e_len; + } } -out: - if (path.ep_bp != NULL) - brelse(path.ep_bp); + ext4_ext_path_free(path); return (error); } Index: sys/fs/ext2fs/ext2_extattr.c =================================================================== --- sys/fs/ext2fs/ext2_extattr.c +++ sys/fs/ext2fs/ext2_extattr.c @@ -612,7 +612,7 @@ if (header->h_magic != EXTATTR_MAGIC || header->h_refcount == 1) return (EINVAL); - facl = ext2_allocfacl(ip); + facl = ext2_allocmeta(ip); if (!facl) return (ENOSPC); @@ -1137,7 +1137,7 @@ return (ENOSPC); /* Allocate block, fill EA header and insert entry */ - ip->i_facl = ext2_allocfacl(ip); + ip->i_facl = ext2_allocmeta(ip); if (0 == ip->i_facl) return (ENOSPC); Index: sys/fs/ext2fs/ext2_extents.h =================================================================== --- sys/fs/ext2fs/ext2_extents.h +++ sys/fs/ext2fs/ext2_extents.h @@ -31,6 +31,10 @@ #include #define EXT4_EXT_MAGIC 0xf30a +#define EXT4_MAX_BLOCKS 0xffffffff +#define EXT_INIT_MAX_LEN (1UL << 15) +#define EXT4_MAX_LEN (EXT_INIT_MAX_LEN - 1) +#define EXT4_EXT_DEPTH_MAX 5 #define EXT4_EXT_CACHE_NO 0 #define EXT4_EXT_CACHE_GAP 1 @@ -82,23 +86,41 @@ * Save path to some extent. */ struct ext4_extent_path { + int index_count; uint16_t ep_depth; - struct buf *ep_bp; - bool ep_is_sparse; - union { - struct ext4_extent ep_sparse_ext; - struct ext4_extent *ep_ext; - }; + uint64_t ep_blk; + char *ep_data; + struct ext4_extent *ep_ext; struct ext4_extent_index *ep_index; struct ext4_extent_header *ep_header; }; +#define EXT_FIRST_EXTENT(hdr) ((struct ext4_extent *)(((char *)(hdr)) + \ + sizeof(struct ext4_extent_header))) +#define EXT_FIRST_INDEX(hdr) ((struct ext4_extent_index *)(((char *)(hdr)) + \ + sizeof(struct ext4_extent_header))) +#define EXT_LAST_EXTENT(hdr) (EXT_FIRST_EXTENT((hdr)) + (hdr)->eh_ecount - 1) +#define EXT_LAST_INDEX(hdr) (EXT_FIRST_INDEX((hdr)) + (hdr)->eh_ecount - 1) +#define EXT4_EXTENT_TAIL_OFFSET(hdr) (sizeof(struct ext4_extent_header) + \ + (sizeof(struct ext4_extent) * (hdr)->eh_max)) +#define EXT_HAS_FREE_INDEX(path) \ + ((path)->ep_header->eh_ecount < (path)->ep_header->eh_max) +#define EXT_MAX_EXTENT(hdr) (EXT_FIRST_EXTENT(hdr) + ((hdr)->eh_max) - 1) +#define EXT_MAX_INDEX(hdr) (EXT_FIRST_INDEX((hdr)) + (hdr)->eh_max - 1) + struct inode; struct m_ext2fs; +void ext4_ext_tree_init(struct inode *ip); int ext4_ext_in_cache(struct inode *, daddr_t, struct ext4_extent *); void ext4_ext_put_cache(struct inode *, struct ext4_extent *, int); -struct ext4_extent_path * -ext4_ext_find_extent(struct m_ext2fs *fs, - struct inode *, daddr_t, struct ext4_extent_path *); +int ext4_ext_find_extent(struct inode *, daddr_t, struct ext4_extent_path **); +void ext4_ext_path_free(struct ext4_extent_path *path); +int ext4_ext_remove_space(struct inode *ip, off_t length, int flags, + struct ucred *cred, struct thread *td); +int ext4_ext_get_blocks(struct inode *ip, int64_t iblock, + unsigned long max_blocks, struct ucred *cred, struct buf **bpp, int *allocate, uint32_t *); +#ifdef EXT2FS_DEBUG +void ext4_ext_print_extent_tree_status(struct inode * ip); +#endif #endif /* !_FS_EXT2FS_EXT2_EXTENTS_H_ */ Index: sys/fs/ext2fs/ext2_extents.c =================================================================== --- sys/fs/ext2fs/ext2_extents.c +++ sys/fs/ext2fs/ext2_extents.c @@ -35,6 +35,7 @@ #include #include #include +#include #include #include @@ -43,87 +44,163 @@ #include #include -static bool -ext4_ext_binsearch_index(struct inode *ip, struct ext4_extent_path *path, - daddr_t lbn, daddr_t *first_lbn, daddr_t *last_lbn){ - struct ext4_extent_header *ehp = path->ep_header; - struct ext4_extent_index *first, *last, *l, *r, *m; +static MALLOC_DEFINE(M_EXT2EXTENTS, "ext2_extents", "EXT2 extents"); - first = (struct ext4_extent_index *)(char *)(ehp + 1); - last = first + ehp->eh_ecount - 1; - l = first; - r = last; - while (l <= r) { - m = l + (r - l) / 2; - if (lbn < m->ei_blk) - r = m - 1; - else - l = m + 1; - } +#ifdef EXT2FS_DEBUG +static void +ext4_ext_print_extent(struct ext4_extent *ep) +{ - if (l == first) { - path->ep_sparse_ext.e_blk = *first_lbn; - path->ep_sparse_ext.e_len = first->ei_blk - *first_lbn; - path->ep_sparse_ext.e_start_hi = 0; - path->ep_sparse_ext.e_start_lo = 0; - path->ep_is_sparse = true; - return (true); - } - path->ep_index = l - 1; - *first_lbn = path->ep_index->ei_blk; - if (path->ep_index < last) - *last_lbn = l->ei_blk - 1; - return (false); + printf(" ext %p => (blk %u len %u start %lu)\n", + ep, ep->e_blk, ep->e_len, + (uint64_t)ep->e_start_hi << 32 | ep->e_start_lo); } +static void ext4_ext_print_header(struct inode *ip, struct ext4_extent_header *ehp); + static void -ext4_ext_binsearch(struct inode *ip, struct ext4_extent_path *path, daddr_t lbn, - daddr_t first_lbn, daddr_t last_lbn) +ext4_ext_print_index(struct inode *ip, struct ext4_extent_index *ex, int do_walk) { - struct ext4_extent_header *ehp = path->ep_header; - struct ext4_extent *first, *l, *r, *m; + struct m_ext2fs *fs; + struct buf *bp; + int error; - if (ehp->eh_ecount == 0) - return; + fs = ip->i_e2fs; - first = (struct ext4_extent *)(char *)(ehp + 1); - l = first; - r = first + ehp->eh_ecount - 1; - while (l <= r) { - m = l + (r - l) / 2; - if (lbn < m->e_blk) - r = m - 1; - else - l = m + 1; - } + printf(" index %p => (blk %u pblk %lu)\n", + ex, ex->ei_blk, (uint64_t)ex->ei_leaf_hi << 32 | ex->ei_leaf_lo); + + if(!do_walk) + return; - if (l == first) { - path->ep_sparse_ext.e_blk = first_lbn; - path->ep_sparse_ext.e_len = first->e_blk - first_lbn; - path->ep_sparse_ext.e_start_hi = 0; - path->ep_sparse_ext.e_start_lo = 0; - path->ep_is_sparse = true; + if ((error = bread(ip->i_devvp, + fsbtodb(fs, ((uint64_t)ex->ei_leaf_hi << 32 | ex->ei_leaf_lo)), + (int)fs->e2fs_bsize, NOCRED, &bp)) != 0) { + brelse(bp); return; } - path->ep_ext = l - 1; - if (path->ep_ext->e_blk + path->ep_ext->e_len <= lbn) { - path->ep_sparse_ext.e_blk = path->ep_ext->e_blk + - path->ep_ext->e_len; - if (l <= (first + ehp->eh_ecount - 1)) - path->ep_sparse_ext.e_len = l->e_blk - - path->ep_sparse_ext.e_blk; + + ext4_ext_print_header(ip, (struct ext4_extent_header *)bp->b_data); + + brelse(bp); + +} + +static void +ext4_ext_print_header(struct inode *ip, struct ext4_extent_header *ehp) +{ + int i; + + printf("header %p => (magic 0x%x entries %d max %d depth %d gen %d)\n", + ehp, ehp->eh_magic, ehp->eh_ecount, ehp->eh_max, ehp->eh_depth, + ehp->eh_gen); + + for (i = 0; i < ehp->eh_ecount; i++) + if (ehp->eh_depth != 0) + ext4_ext_print_index(ip, + (struct ext4_extent_index *)(ehp + 1 + i), 1); else - path->ep_sparse_ext.e_len = last_lbn - - path->ep_sparse_ext.e_blk + 1; - path->ep_sparse_ext.e_start_hi = 0; - path->ep_sparse_ext.e_start_lo = 0; - path->ep_is_sparse = true; + ext4_ext_print_extent((struct ext4_extent *)(ehp + 1 + i)); +} + +static void +ext4_ext_print_path(struct inode *ip, struct ext4_extent_path *path) +{ + int k, l; + + l = path->ep_depth + + printf("ip=%d, Path:\n", ip->i_number); + for (k = 0; k <= l; k++, path++) { + if (path->ep_index) { + ext4_ext_print_index(ip, path->ep_index, 0); + } else if (path->ep_ext) { + ext4_ext_print_extent(path->ep_ext); + } } } -/* - * Find a block in ext4 extent cache. - */ +void +ext4_ext_print_extent_tree_status(struct inode * ip) +{ + struct m_ext2fs *fs; + struct ext4_extent_header *ehp; + + fs = ip->i_e2fs; + ehp = (struct ext4_extent_header *)(char *)ip->i_db; + + printf("Extent status:ip=%d\n", ip->i_number); + if (!(ip->i_flag & IN_E4EXTENTS)) + return; + + ext4_ext_print_header(ip, ehp); + + return; +} +#endif + +static inline struct ext4_extent_header * +ext4_ext_inode_header(struct inode *ip) +{ + + return ((struct ext4_extent_header *)ip->i_db); +} + +static inline struct ext4_extent_header * +ext4_ext_block_header(char *bdata) +{ + + return ((struct ext4_extent_header *)bdata); +} + +static inline unsigned short +ext4_ext_inode_depth(struct inode *ip) +{ + struct ext4_extent_header *ehp; + + ehp = (struct ext4_extent_header *)ip->i_data; + return (ehp->eh_depth); +} + +static inline e4fs_daddr_t +ext4_ext_index_pblock(struct ext4_extent_index *index) +{ + e4fs_daddr_t blk; + + blk = index->ei_leaf_lo; + blk |= (e4fs_daddr_t)index->ei_leaf_hi << 32; + + return (blk); +} + +static inline void +ext4_index_store_pblock(struct ext4_extent_index *index, e4fs_daddr_t pb) +{ + + index->ei_leaf_lo = pb & 0xffffffff; + index->ei_leaf_hi = (pb >> 32) & 0xffff; +} + + +static inline e4fs_daddr_t +ext4_ext_extent_pblock(struct ext4_extent *extent) +{ + e4fs_daddr_t blk; + + blk = extent->e_start_lo; + blk |= (e4fs_daddr_t)extent->e_start_hi << 32; + + return (blk); +} + +static inline void +ext4_ext_store_pblock(struct ext4_extent *ex, e4fs_daddr_t pb) +{ + + ex->e_start_lo = pb & 0xffffffff; + ex->e_start_hi = (pb >> 32) & 0xffff; +} + int ext4_ext_in_cache(struct inode *ip, daddr_t lbn, struct ext4_extent *ep) { @@ -131,8 +208,6 @@ int ret = EXT4_EXT_CACHE_NO; ecp = &ip->i_ext_cache; - - /* cache is invalid */ if (ecp->ec_type == EXT4_EXT_CACHE_NO) return (ret); @@ -146,74 +221,1367 @@ return (ret); } -/* - * Put an ext4_extent structure in ext4 cache. - */ +static int +ext4_ext_check_header(struct inode *ip, struct ext4_extent_header *eh) +{ + struct m_ext2fs *fs; + char *error_msg; + + fs = ip->i_e2fs; + + if (eh->eh_magic != EXT4_EXT_MAGIC) { + error_msg = "invalid magic"; + goto corrupted; + } + if (eh->eh_max == 0) { + error_msg = "invalid eh_max"; + goto corrupted; + } + if (eh->eh_ecount > eh->eh_max) { + error_msg = "invalid eh_entries"; + goto corrupted; + } + + return (0); + +corrupted: + ext2_fserr(fs, ip->i_uid, error_msg); + return (EIO); +} + +static void +ext4_ext_binsearch_index(struct ext4_extent_path *path, int blk) +{ + struct ext4_extent_header *eh; + struct ext4_extent_index *r, *l, *m; + + eh = path->ep_header; + + KASSERT(eh->eh_ecount <= eh->eh_max && eh->eh_ecount > 0, + ("ext4_ext_binsearch_index: bad args")); + + l = EXT_FIRST_INDEX(eh) + 1; + r = EXT_FIRST_INDEX(eh) + eh->eh_ecount - 1; + while (l <= r) { + m = l + (r - l) / 2; + if (blk < m->ei_blk) + r = m - 1; + else + l = m + 1; + } + + path->ep_index = l - 1; +} + +static void +ext4_ext_binsearch_ext(struct ext4_extent_path *path, int blk) +{ + struct ext4_extent_header *eh; + struct ext4_extent *r, *l, *m; + + eh = path->ep_header; + + KASSERT(eh->eh_ecount <= eh->eh_max, + ("ext4_ext_binsearch_ext: bad args")); + + if (eh->eh_ecount == 0) + return; + + l = EXT_FIRST_EXTENT(eh) + 1; + r = EXT_FIRST_EXTENT(eh) + eh->eh_ecount - 1; + + while (l <= r) { + m = l + (r - l) / 2; + if (blk < m->e_blk) + r = m - 1; + else + l = m + 1; + } + + path->ep_ext = l - 1; +} + +static int +ext4_ext_fill_path_bdata(struct ext4_extent_path *path, + struct buf *bp, uint64_t blk) +{ + + KASSERT(path->ep_data == NULL, + ("ext4_ext_fill_path_bdata: bad ep_data")); + + path->ep_data = malloc(bp->b_bufsize, M_EXT2EXTENTS, M_WAITOK); + if (!path->ep_data) + return (ENOMEM); + + memcpy(path->ep_data, bp->b_data, bp->b_bufsize); + path->ep_blk = blk; + + return (0); +} + +static void +ext4_ext_fill_path_buf(struct ext4_extent_path *path, struct buf *bp) +{ + + KASSERT(path->ep_data != NULL, + ("ext4_ext_fill_path_buf: bad ep_data")); + + memcpy(bp->b_data, path->ep_data, bp->b_bufsize); +} + +static void +ext4_ext_drop_refs(struct ext4_extent_path *path) +{ + int depth, i; + + if (!path) + return; + + depth = path->ep_depth; + for (i = 0; i <= depth; i++, path++) + if (path->ep_data) { + free(path->ep_data, M_EXT2EXTENTS); + path->ep_data = NULL; + } +} + void -ext4_ext_put_cache(struct inode *ip, struct ext4_extent *ep, int type) +ext4_ext_path_free(struct ext4_extent_path *path) { - struct ext4_extent_cache *ecp; - ecp = &ip->i_ext_cache; - ecp->ec_type = type; - ecp->ec_blk = ep->e_blk; - ecp->ec_len = ep->e_len; - ecp->ec_start = (daddr_t)ep->e_start_hi << 32 | ep->e_start_lo; + if (!path) + return; + + ext4_ext_drop_refs(path); + free(path, M_EXT2EXTENTS); } -/* - * Find an extent. - */ -struct ext4_extent_path * -ext4_ext_find_extent(struct m_ext2fs *fs, struct inode *ip, - daddr_t lbn, struct ext4_extent_path *path) +int +ext4_ext_find_extent(struct inode *ip, daddr_t block, + struct ext4_extent_path **ppath) +{ + struct m_ext2fs *fs; + struct ext4_extent_header *eh; + struct ext4_extent_path *path; + struct buf *bp; + uint64_t blk; + int error, depth, i, ppos, alloc; + + fs = ip->i_e2fs; + eh = ext4_ext_inode_header(ip); + depth = ext4_ext_inode_depth(ip); + ppos = 0; + alloc = 0; + + error = ext4_ext_check_header(ip, eh); + if (error) + return (error); + + if (!ppath) + return (EINVAL); + + path = *ppath; + if (!path) { + path = malloc(EXT4_EXT_DEPTH_MAX * + sizeof(struct ext4_extent_path), + M_EXT2EXTENTS, M_WAITOK | M_ZERO); + if (!path) + return (ENOMEM); + + *ppath = path; + alloc = 1; + } + + path[0].ep_header = eh; + path[0].ep_data = NULL; + + /* Walk through the tree. */ + i = depth; + while (i) { + ext4_ext_binsearch_index(&path[ppos], block); + blk = ext4_ext_index_pblock(path[ppos].ep_index); + path[ppos].ep_depth = i; + path[ppos].ep_ext = NULL; + + error = bread(ip->i_devvp, fsbtodb(ip->i_e2fs, blk), + ip->i_e2fs->e2fs_bsize, NOCRED, &bp); + if (error) { + brelse(bp); + goto error; + } + + ppos++; + if (ppos > depth) { + ext2_fserr(fs, ip->i_uid, + "ppos > depth => extent corrupted"); + error = EIO; + brelse(bp); + goto error; + } + + ext4_ext_fill_path_bdata(&path[ppos], bp, blk); + brelse(bp); + + eh = ext4_ext_block_header(path[ppos].ep_data); + error = ext4_ext_check_header(ip, eh); + if (error) + goto error; + + path[ppos].ep_header = eh; + + i--; + } + + error = ext4_ext_check_header(ip, eh); + if (error) + goto error; + + /* Find extent. */ + path[ppos].ep_depth = i; + path[ppos].ep_header = eh; + path[ppos].ep_ext = NULL; + path[ppos].ep_index = NULL; + ext4_ext_binsearch_ext(&path[ppos], block); + return (0); + +error: + ext4_ext_drop_refs(path); + if (alloc) + free(path, M_EXT2EXTENTS); + + *ppath = NULL; + + return (error); +} + +static inline int +ext4_ext_space_root(struct inode *ip) +{ + int size; + + size = sizeof(ip->i_data); + size -= sizeof(struct ext4_extent_header); + size /= sizeof(struct ext4_extent); + + return (size); +} + +static inline int +ext4_ext_space_block(struct inode *ip) +{ + struct m_ext2fs *fs; + int size; + + fs = ip->i_e2fs; + + size = (fs->e2fs_bsize - sizeof(struct ext4_extent_header)) / + sizeof(struct ext4_extent); + + return (size); +} + +static inline int +ext4_ext_space_block_index(struct inode *ip) +{ + struct m_ext2fs *fs; + int size; + + fs = ip->i_e2fs; + + size = (fs->e2fs_bsize - sizeof(struct ext4_extent_header)) / + sizeof(struct ext4_extent_index); + + return (size); +} + +void +ext4_ext_tree_init(struct inode *ip) { struct ext4_extent_header *ehp; - uint16_t i; - int error, size; - daddr_t nblk; - ehp = (struct ext4_extent_header *)(char *)ip->i_db; + ip->i_flag |= IN_E4EXTENTS; - if (ehp->eh_magic != EXT4_EXT_MAGIC) - return (NULL); + memset(ip->i_data, 0, EXT2_NDADDR + EXT2_NIADDR); + ehp = (struct ext4_extent_header *)ip->i_data; + ehp->eh_magic = EXT4_EXT_MAGIC; + ehp->eh_max = ext4_ext_space_root(ip); + ip->i_ext_cache.ec_type = EXT4_EXT_CACHE_NO; + ip->i_flag |= IN_CHANGE | IN_UPDATE; + ext2_update(ip->i_vnode, 1); +} + +static inline void +ext4_ext_put_in_cache(struct inode *ip, uint32_t blk, + uint32_t len, uint32_t start, int type) +{ + + KASSERT(len != 0, ("ext4_ext_put_in_cache: bad input")); + + ip->i_ext_cache.ec_type = type; + ip->i_ext_cache.ec_blk = blk; + ip->i_ext_cache.ec_len = len; + ip->i_ext_cache.ec_start = start; +} + +static e4fs_daddr_t +ext4_ext_blkpref(struct inode *ip, struct ext4_extent_path *path, + e4fs_daddr_t block) +{ + struct m_ext2fs *fs; + struct ext4_extent *ex; + e4fs_daddr_t bg_start; + int depth; + + fs = ip->i_e2fs; + + if (path) { + depth = path->ep_depth; + ex = path[depth].ep_ext; + if (ex) { + e4fs_daddr_t pblk = ext4_ext_extent_pblock(ex); + e2fs_daddr_t blk = ex->e_blk; + + if (block > blk) + return (pblk + (block - blk)); + else + return (pblk - (blk - block)); + } + + /* Try to get block from index itself. */ + if (path[depth].ep_data) + return (path[depth].ep_blk); + } + + /* Use inode's group. */ + bg_start = (ip->i_block_group * EXT2_BLOCKS_PER_GROUP(ip->i_e2fs)) + + fs->e2fs->e2fs_first_dblock; + + return (bg_start + block); +} + +static int inline +ext4_can_extents_be_merged(struct ext4_extent *ex1, + struct ext4_extent *ex2) +{ + + if (ex1->e_blk + ex1->e_len != ex2->e_blk) + return (0); + + if (ex1->e_len + ex2->e_len > EXT4_MAX_LEN) + return (0); + + if (ext4_ext_extent_pblock(ex1) + ex1->e_len == + ext4_ext_extent_pblock(ex2)) + return (1); + + return (0); +} + +static unsigned +ext4_ext_next_leaf_block(struct inode *ip, struct ext4_extent_path *path) +{ + int depth = path->ep_depth; + + /* Empty tree */ + if (depth == 0) + return (EXT4_MAX_BLOCKS); + + /* Go to indexes. */ + depth--; + + while (depth >= 0) { + if (path[depth].ep_index != + EXT_LAST_INDEX(path[depth].ep_header)) + return (path[depth].ep_index[1].ei_blk); + + depth--; + } + + return (EXT4_MAX_BLOCKS); +} + +static int +ext4_ext_dirty(struct inode *ip, struct ext4_extent_path *path) +{ + struct m_ext2fs *fs; + struct buf *bp; + uint64_t blk; + int error; + + fs = ip->i_e2fs; + + if (!path) + return (EINVAL); + + if (path->ep_data) { + blk = path->ep_blk; + bp = getblk(ip->i_devvp, fsbtodb(fs, blk), + fs->e2fs_bsize, 0, 0, 0); + if (!bp) + return (EIO); + ext4_ext_fill_path_buf(path, bp); + error = bwrite(bp); + } else { + ip->i_flag |= IN_CHANGE | IN_UPDATE; + error = ext2_update(ip->i_vnode, 1); + } + + return (error); +} + +static int +ext4_ext_insert_index(struct inode *ip, struct ext4_extent_path *path, + uint32_t lblk, e4fs_daddr_t blk) +{ + struct m_ext2fs *fs; + struct ext4_extent_index *idx; + int len; + + fs = ip->i_e2fs; + + if (lblk == path->ep_index->ei_blk) { + ext2_fserr(fs, ip->i_uid, + "lblk == index blk => extent corrupted"); + return (EIO); + } + + if (path->ep_header->eh_ecount >= path->ep_header->eh_max) { + ext2_fserr(fs, ip->i_uid, + "ecout > maxcount => extent corrupted"); + return (EIO); + } + + if (lblk > path->ep_index->ei_blk) { + /* Insert after. */ + idx = path->ep_index + 1; + } else { + /* Insert before. */ + idx = path->ep_index; + } + + len = EXT_LAST_INDEX(path->ep_header) - idx + 1; + if (len > 0) + memmove(idx + 1, idx, len * sizeof(struct ext4_extent_index)); + + if (idx > EXT_MAX_INDEX(path->ep_header)) { + ext2_fserr(fs, ip->i_uid, + "index is out of range => extent corrupted"); + return (EIO); + } + + idx->ei_blk = lblk; + ext4_index_store_pblock(idx, blk); + path->ep_header->eh_ecount++; + + return (ext4_ext_dirty(ip, path)); +} + +static e4fs_daddr_t +ext4_ext_alloc_meta(struct inode *ip) +{ + e4fs_daddr_t blk = ext2_allocmeta(ip); + if (blk) { + ip->i_blocks += btodb(ip->i_e2fs->e2fs_bsize); + ip->i_flag |= IN_CHANGE | IN_UPDATE; + ext2_update(ip->i_vnode, 1); + } + + return (blk); +} + +static void +ext4_ext_blkfree(struct inode *ip, uint64_t blk, int count, int flags) +{ + struct m_ext2fs *fs; + int i, blocksreleased; + + fs = ip->i_e2fs; + blocksreleased = count; + + for(i = 0; i < count; i++) + ext2_blkfree(ip, blk + i, fs->e2fs_bsize); + + if (ip->i_blocks >= blocksreleased) + ip->i_blocks -= (btodb(fs->e2fs_bsize)*blocksreleased); + else + ip->i_blocks = 0; + + ip->i_flag |= IN_CHANGE | IN_UPDATE; + ext2_update(ip->i_vnode, 1); +} + +static int +ext4_ext_split(struct inode *ip, struct ext4_extent_path *path, + struct ext4_extent *newext, int at) +{ + struct m_ext2fs *fs; + struct buf *bp; + int depth = ext4_ext_inode_depth(ip); + struct ext4_extent_header *neh; + struct ext4_extent_index *fidx; + struct ext4_extent *ex; + int i = at, k, m, a; + e4fs_daddr_t newblk, oldblk; + uint32_t border; + e4fs_daddr_t *ablks = NULL; + int error = 0; + + fs = ip->i_e2fs; + bp = NULL; + + /* + * We will split at current extent for now. + */ + if (path[depth].ep_ext > EXT_MAX_EXTENT(path[depth].ep_header)) { + ext2_fserr(fs, ip->i_uid, + "extent is out of range => extent corrupted"); + return (EIO); + } + + if (path[depth].ep_ext != EXT_MAX_EXTENT(path[depth].ep_header)) + border = path[depth].ep_ext[1].e_blk; + else + border = newext->e_blk; + + /* Allocate new blocks. */ + ablks = malloc(sizeof(e4fs_daddr_t) * depth, + M_EXT2EXTENTS, M_WAITOK | M_ZERO); + if (!ablks) + return (ENOMEM); + for (a = 0; a < depth - at; a++) { + newblk = ext4_ext_alloc_meta(ip); + if (newblk == 0) + goto cleanup; + ablks[a] = newblk; + } + + newblk = ablks[--a]; + bp = getblk(ip->i_devvp, fsbtodb(fs, newblk), fs->e2fs_bsize, 0, 0, 0); + if (!bp) { + error = EIO; + goto cleanup; + } + + neh = ext4_ext_block_header(bp->b_data); + neh->eh_ecount = 0; + neh->eh_max = ext4_ext_space_block(ip); + neh->eh_magic = EXT4_EXT_MAGIC; + neh->eh_depth = 0; + ex = EXT_FIRST_EXTENT(neh); + + if (path[depth].ep_header->eh_ecount != path[depth].ep_header->eh_max) { + ext2_fserr(fs, ip->i_uid, + "extents count out of range => extent corrupted"); + error = EIO; + goto cleanup; + } + + /* Start copy from next extent. */ + m = 0; + path[depth].ep_ext++; + while (path[depth].ep_ext <= EXT_MAX_EXTENT(path[depth].ep_header)) { + path[depth].ep_ext++; + m++; + } + if (m) { + memmove(ex, path[depth].ep_ext - m, + sizeof(struct ext4_extent) * m); + neh->eh_ecount = neh->eh_ecount + m; + } + + bwrite(bp); + bp = NULL; + + /* Fix old leaf. */ + if (m) { + path[depth].ep_header->eh_ecount = + path[depth].ep_header->eh_ecount - m; + ext4_ext_dirty(ip, path + depth); + } + + /* Create intermediate indexes. */ + k = depth - at - 1; + KASSERT(k >= 0, ("ext4_ext_split: negative k")); + + /* Insert new index into current index block. */ + i = depth - 1; + while (k--) { + oldblk = newblk; + newblk = ablks[--a]; + error = bread(ip->i_devvp, fsbtodb(fs, newblk), + (int)fs->e2fs_bsize, NOCRED, &bp); + if (error) { + brelse(bp); + goto cleanup; + } + + neh = (struct ext4_extent_header *)bp->b_data; + neh->eh_ecount = 1; + neh->eh_magic = EXT4_EXT_MAGIC; + neh->eh_max = ext4_ext_space_block_index(ip); + neh->eh_depth = depth - i; + fidx = EXT_FIRST_INDEX(neh); + fidx->ei_blk = border; + ext4_index_store_pblock(fidx, oldblk); + + m = 0; + path[i].ep_index++; + while (path[i].ep_index <= EXT_MAX_INDEX(path[i].ep_header)) { + path[i].ep_index++; + m++; + } + if (m) { + memmove(++fidx, path[i].ep_index - m, + sizeof(struct ext4_extent_index) * m); + neh->eh_ecount = neh->eh_ecount + m; + } + + bwrite(bp); + bp = NULL; + + /* Fix old index. */ + if (m) { + path[i].ep_header->eh_ecount = + path[i].ep_header->eh_ecount - m; + ext4_ext_dirty(ip, path + i); + } + + i--; + } + + error = ext4_ext_insert_index(ip, path + at, border, newblk); + +cleanup: + if (bp) + brelse(bp); + + if (error) { + for (i = 0; i < depth; i++) { + if (!ablks[i]) + continue; + ext4_ext_blkfree(ip, ablks[i], 1, 0); + } + } + + free(ablks, M_EXT2EXTENTS); + + return (error); +} + +static int +ext4_ext_grow_indepth(struct inode *ip, struct ext4_extent_path *path, + struct ext4_extent *newext) +{ + struct m_ext2fs *fs; + struct ext4_extent_path *curpath; + struct ext4_extent_header *neh; + struct ext4_extent_index *fidx; + struct buf *bp; + e4fs_daddr_t newblk; + int error = 0; + + fs = ip->i_e2fs; + curpath = path; + + newblk = ext4_ext_alloc_meta(ip); + if (newblk == 0) + return (error); + + bp = getblk(ip->i_devvp, fsbtodb(fs, newblk), fs->e2fs_bsize, 0, 0, 0); + if (!bp) + return (EIO); + + /* Move top-level index/leaf into new block. */ + memmove(bp->b_data, curpath->ep_header, sizeof(ip->i_data)); + + /* Set size of new block */ + neh = ext4_ext_block_header(bp->b_data); + neh->eh_magic = EXT4_EXT_MAGIC; + + if (ext4_ext_inode_depth(ip)) + neh->eh_max = ext4_ext_space_block_index(ip); + else + neh->eh_max = ext4_ext_space_block(ip); + + error = bwrite(bp); + if (error) + goto out; + + bp = NULL; + + curpath->ep_header->eh_magic = EXT4_EXT_MAGIC; + curpath->ep_header->eh_max = ext4_ext_space_root(ip); + curpath->ep_header->eh_ecount = 1; + curpath->ep_index = EXT_FIRST_INDEX(curpath->ep_header); + curpath->ep_index->ei_blk = EXT_FIRST_EXTENT(path[0].ep_header)->e_blk; + ext4_index_store_pblock(curpath->ep_index, newblk); + + neh = ext4_ext_inode_header(ip); + fidx = EXT_FIRST_INDEX(neh); + neh->eh_depth = path->ep_depth + 1; + ext4_ext_dirty(ip, curpath); +out: + brelse(bp); + + return (error); +} + +static int +ext4_ext_create_new_leaf(struct inode *ip, struct ext4_extent_path *path, + struct ext4_extent *newext) +{ + struct m_ext2fs *fs; + struct ext4_extent_path *curpath; + int depth, i, error; + + fs = ip->i_e2fs; + +repeat: + i = depth = ext4_ext_inode_depth(ip); + + /* Look for free index entry int the tree */ + curpath = path + depth; + while (i > 0 && !EXT_HAS_FREE_INDEX(curpath)) { + i--; + curpath--; + } + + /* + * We use already allocated block for index block, + * so subsequent data blocks should be contiguous. + */ + if (EXT_HAS_FREE_INDEX(curpath)) { + error = ext4_ext_split(ip, path, newext, i); + if (error) + goto out; + + /* Refill path. */ + ext4_ext_drop_refs(path); + error = ext4_ext_find_extent(ip, newext->e_blk, &path); + if (error) + goto out; + } else { + /* Tree is full, do grow in depth. */ + error = ext4_ext_grow_indepth(ip, path, newext); + if (error) + goto out; + + /* Refill path. */ + ext4_ext_drop_refs(path); + error = ext4_ext_find_extent(ip, newext->e_blk, &path); + if (error) + goto out; + + /* Check and split tree if required. */ + depth = ext4_ext_inode_depth(ip); + if (path[depth].ep_header->eh_ecount == + path[depth].ep_header->eh_max) + goto repeat; + } + +out: + return (error); +} + +static int +ext4_ext_correct_indexes(struct inode *ip, struct ext4_extent_path *path) +{ + struct ext4_extent_header *eh; + struct ext4_extent *ex; + int32_t border; + int depth, k; + + depth = ext4_ext_inode_depth(ip); + eh = path[depth].ep_header; + ex = path[depth].ep_ext; + + if (ex == NULL || eh == NULL) + return (EIO); + + if (!depth) + return (0); + + /* We will correct tree if first leaf got modified only. */ + if (ex != EXT_FIRST_EXTENT(eh)) + return (0); + + k = depth - 1; + border = path[depth].ep_ext->e_blk; + path[k].ep_index->ei_blk = border; + ext4_ext_dirty(ip, path + k); + while (k--) { + /* Change all left-side indexes. */ + if (path[k+1].ep_index != EXT_FIRST_INDEX(path[k+1].ep_header)) + break; + + path[k].ep_index->ei_blk = border; + ext4_ext_dirty(ip, path + k); + } + + return (0); +} + +static int +ext4_ext_insert_extent(struct inode *ip, struct ext4_extent_path *path, + struct ext4_extent *newext) +{ + struct m_ext2fs *fs; + struct ext4_extent_header * eh; + struct ext4_extent *ex, *nex, *nearex; + struct ext4_extent_path *npath; + int depth, len, error, next; + + fs = ip->i_e2fs; + depth = ext4_ext_inode_depth(ip); + ex = path[depth].ep_ext; + npath = NULL; + + if (newext->e_len == 0 || path[depth].ep_header == NULL) + return (EINVAL); + + /* Insert block into found extent. */ + if (ex && ext4_can_extents_be_merged(ex, newext)) { + ex->e_len = ex->e_len + newext->e_len; + eh = path[depth].ep_header; + nearex = ex; + goto merge; + } + +repeat: + depth = ext4_ext_inode_depth(ip); + eh = path[depth].ep_header; + if (eh->eh_ecount < eh->eh_max) + goto has_space; + + /* Try next leaf */ + nex = EXT_LAST_EXTENT(eh); + next = ext4_ext_next_leaf_block(ip, path); + if (newext->e_blk > nex->e_blk && next != EXT4_MAX_BLOCKS) { + KASSERT(npath == NULL, + ("ext4_ext_insert_extent: bad path")); + + error = ext4_ext_find_extent(ip, next, &npath); + if (error) + goto cleanup; + + if (npath->ep_depth != path->ep_depth) { + error = EIO; + goto cleanup; + } + + eh = npath[depth].ep_header; + if (eh->eh_ecount < eh->eh_max) { + path = npath; + goto repeat; + } + } + + /* + * There is no free space in the found leaf, + * try to add a new leaf to the tree. + */ + error = ext4_ext_create_new_leaf(ip, path, newext); + if (error) + goto cleanup; + + depth = ext4_ext_inode_depth(ip); + eh = path[depth].ep_header; + +has_space: + nearex = path[depth].ep_ext; + if (!nearex) { + /* Create new extent in the leaf. */ + path[depth].ep_ext = EXT_FIRST_EXTENT(eh); + } else if (newext->e_blk > nearex->e_blk) { + if (nearex != EXT_LAST_EXTENT(eh)) { + len = EXT_MAX_EXTENT(eh) - nearex; + len = (len - 1) * sizeof(struct ext4_extent); + len = len < 0 ? 0 : len; + memmove(nearex + 2, nearex + 1, len); + } + path[depth].ep_ext = nearex + 1; + } else { + len = (EXT_MAX_EXTENT(eh) - nearex) * sizeof(struct ext4_extent); + len = len < 0 ? 0 : len; + memmove(nearex + 1, nearex, len); + path[depth].ep_ext = nearex; + } + + eh->eh_ecount = eh->eh_ecount + 1; + nearex = path[depth].ep_ext; + nearex->e_blk = newext->e_blk; + nearex->e_start_lo = newext->e_start_lo; + nearex->e_start_hi = newext->e_start_hi; + nearex->e_len = newext->e_len; - path->ep_header = ehp; +merge: + /* Try to merge extents to the right. */ + while (nearex < EXT_LAST_EXTENT(eh)) { + if (!ext4_can_extents_be_merged(nearex, nearex + 1)) + break; - daddr_t first_lbn = 0; - daddr_t last_lbn = lblkno(ip->i_e2fs, ip->i_size); + /* Merge with next extent. */ + nearex->e_len = nearex->e_len + nearex[1].e_len; + if (nearex + 1 < EXT_LAST_EXTENT(eh)) { + len = (EXT_LAST_EXTENT(eh) - nearex - 1) * + sizeof(struct ext4_extent); + memmove(nearex + 1, nearex + 2, len); + } + + eh->eh_ecount = eh->eh_ecount - 1; + KASSERT(eh->eh_ecount != 0, + ("ext4_ext_insert_extent: bad ecount")); + } + + /* + * Try to merge extents to the left, + * start from inexes correction. + */ + error = ext4_ext_correct_indexes(ip, path); + if (error) + goto cleanup; + + ext4_ext_dirty(ip, path + depth); + +cleanup: + if (npath) { + ext4_ext_drop_refs(npath); + free(npath, M_EXT2EXTENTS); + } + + ip->i_ext_cache.ec_type = EXT4_EXT_CACHE_NO; + return (error); +} + +static e4fs_daddr_t +ext4_new_blocks(struct inode *ip, daddr_t lbn, e4fs_daddr_t pref, + struct ucred *cred, unsigned long *count, int *perror) +{ + struct m_ext2fs *fs; + struct ext2mount *ump; + e4fs_daddr_t newblk; + + fs = ip->i_e2fs; + ump = ip->i_ump; + + /* + * We will allocate only single block for now. + */ + if (*count > 1) + return (0); + + EXT2_LOCK(ip->i_ump); + *perror = ext2_alloc(ip, lbn, pref, (int)fs->e2fs_bsize, cred, &newblk); + if (*perror) + return (0); + + if (newblk) { + ip->i_flag |= IN_CHANGE | IN_UPDATE; + ext2_update(ip->i_vnode, 1); + } - for (i = ehp->eh_depth; i != 0; --i) { - path->ep_depth = i; - path->ep_ext = NULL; - if (ext4_ext_binsearch_index(ip, path, lbn, &first_lbn, - &last_lbn)) { - return (path); + return (newblk); +} + +int +ext4_ext_get_blocks(struct inode *ip, e4fs_daddr_t iblk, + unsigned long max_blocks, struct ucred *cred, struct buf **bpp, + int *pallocated, uint32_t *nb) +{ + struct m_ext2fs *fs; + struct buf *bp = NULL; + struct ext4_extent_path *path; + struct ext4_extent newex, *ex; + e4fs_daddr_t bpref, newblk = 0; + unsigned long allocated = 0; + int error = 0, depth; + + fs = ip->i_e2fs; + *pallocated = 0; + path = NULL; + if(bpp) + *bpp = NULL; + + /* Check cache. */ + if ((bpref = ext4_ext_in_cache(ip, iblk, &newex))) { + if (bpref == EXT4_EXT_CACHE_IN) { + /* Block is already allocated. */ + newblk = iblk - newex.e_blk + + ext4_ext_extent_pblock(&newex); + allocated = newex.e_len - (iblk - newex.e_blk); + goto out; + } else { + error = EIO; + goto out2; } + } - nblk = (daddr_t)path->ep_index->ei_leaf_hi << 32 | - path->ep_index->ei_leaf_lo; - size = blksize(fs, ip, nblk); - if (path->ep_bp != NULL) { - brelse(path->ep_bp); - path->ep_bp = NULL; + error = ext4_ext_find_extent(ip, iblk, &path); + if (error) { + goto out2; + } + + depth = ext4_ext_inode_depth(ip); + if (path[depth].ep_ext == NULL && depth != 0) { + error = EIO; + goto out2; + } + + if ((ex = path[depth].ep_ext)) { + uint64_t lblk = ex->e_blk; + uint16_t e_len = ex->e_len; + e4fs_daddr_t e_start = ext4_ext_extent_pblock(ex); + + if (e_len > EXT4_MAX_LEN) + goto out2; + + /* If we found extent covers block, simply return it. */ + if (iblk >= lblk && iblk < lblk + e_len) { + newblk = iblk - lblk + e_start; + allocated = e_len - (iblk - lblk); + ext4_ext_put_in_cache(ip, lblk, e_len, + e_start, EXT4_EXT_CACHE_IN); + goto out; } - error = bread(ip->i_devvp, fsbtodb(fs, nblk), size, NOCRED, - &path->ep_bp); + } + + /* Allocate the new block. */ + if (S_ISREG(ip->i_mode) && (!ip->i_next_alloc_block)) { + ip->i_next_alloc_goal = 0; + } + + bpref = ext4_ext_blkpref(ip, path, iblk); + allocated = max_blocks; + newblk = ext4_new_blocks(ip, iblk, bpref, cred, &allocated, &error); + if (!newblk) + goto out2; + + /* Try to insert new extent into found leaf and return. */ + newex.e_blk = iblk; + ext4_ext_store_pblock(&newex, newblk); + newex.e_len = allocated; + error = ext4_ext_insert_extent(ip, path, &newex); + if (error) + goto out2; + + newblk = ext4_ext_extent_pblock(&newex); + ext4_ext_put_in_cache(ip, iblk, allocated, newblk, EXT4_EXT_CACHE_IN); + *pallocated = 1; + +out: + if (allocated > max_blocks) + allocated = max_blocks; + + if (bpp) + { + error = bread(ip->i_devvp, fsbtodb(fs, newblk), + fs->e2fs_bsize, cred, &bp); if (error) { - brelse(path->ep_bp); - path->ep_bp = NULL; - return (NULL); + brelse(bp); + } else { + *bpp = bp; + } + } + +out2: + if (path) { + ext4_ext_drop_refs(path); + free(path, M_EXT2EXTENTS); + } + + if (nb) + *nb = newblk; + + return (error); +} + +static inline uint16_t +ext4_ext_get_actual_len(struct ext4_extent *ext) +{ + + return (ext->e_len <= EXT_INIT_MAX_LEN ? + ext->e_len : (ext->e_len - EXT_INIT_MAX_LEN)); +} + +static inline struct ext4_extent_header * +ext4_ext_header(struct inode *ip) +{ + + return (struct ext4_extent_header *)ip->i_db; +} + +static int +ext4_remove_blocks(struct inode *ip, struct ext4_extent *ex, + unsigned long from, unsigned long to) +{ + unsigned long num, start; + + if (from >= ex->e_blk && + to == ex->e_blk + ext4_ext_get_actual_len(ex) - 1) { + /* Tail cleanup. */ + num = ex->e_blk + ext4_ext_get_actual_len(ex) - from; + start = ext4_ext_extent_pblock(ex) + + ext4_ext_get_actual_len(ex) - num; + ext4_ext_blkfree(ip, start, num, 0); + } + + return (0); +} + +static int +ext4_ext_rm_index(struct inode *ip, struct ext4_extent_path *path) +{ + e4fs_daddr_t leaf; + + /* Free index block. */ + path--; + leaf = ext4_ext_index_pblock(path->ep_index); + KASSERT(path->ep_header->eh_ecount != 0, + ("ext4_ext_rm_index: bad ecount")); + path->ep_header->eh_ecount--; + ext4_ext_dirty(ip, path); + ext4_ext_blkfree(ip, leaf, 1, 0); + return (0); +} + +static int +ext4_ext_rm_leaf(struct inode *ip, struct ext4_extent_path *path, + uint64_t start) +{ + struct m_ext2fs *fs; + int depth, credits; + struct ext4_extent_header *eh; + unsigned int a, b, block, num; + unsigned long ex_blk; + unsigned short ex_len; + struct ext4_extent *ex; + int error, correct_index; + + fs = ip->i_e2fs; + depth = ext4_ext_inode_depth(ip); + correct_index = 0; + + if (!path[depth].ep_header) { + if (path[depth].ep_data == NULL) + return (EINVAL); + path[depth].ep_header = + (struct ext4_extent_header* )path[depth].ep_data; + } + + eh = path[depth].ep_header; + if (!eh) { + ext2_fserr(fs, ip->i_uid, "bad header => extent corrupted"); + return (EIO); + } + + ex = EXT_LAST_EXTENT(eh); + ex_blk = ex->e_blk; + ex_len = ext4_ext_get_actual_len(ex); + + while (ex >= EXT_FIRST_EXTENT(eh) && ex_blk + ex_len > start) { + path[depth].ep_ext = ex; + a = ex_blk > start ? ex_blk : start; + b = (uint64_t)ex_blk + ex_len - 1 < + EXT4_MAX_BLOCKS ? ex_blk + ex_len - 1 : EXT4_MAX_BLOCKS; + + if (a != ex_blk && b != ex_blk + ex_len - 1) + return (EINVAL); + else if (a != ex_blk) { + /* Remove tail of the extent. */ + block = ex_blk; + num = a - block; + } else if (b != ex_blk + ex_len - 1) { + /* Remove head of the extent, not implemented. */ + return (EINVAL); + } else { + /* Remove whole extent. */ + block = ex_blk; + num = 0; + KASSERT(a == ex_blk, ("ext4_ext_rm_leaf: bad a")); + KASSERT(b != ex_blk + ex_len - 1, + ("ext4_ext_rm_leaf: bad b")); + } + + credits = EXT4_EXT_DEPTH_MAX; + if (ex == EXT_FIRST_EXTENT(eh)) { + correct_index = 1; + credits += (ext4_ext_inode_depth(ip)) + 1; + } + + error = ext4_remove_blocks(ip, ex, a, b); + if (error) + goto out; + + if (num == 0) { + ext4_ext_store_pblock(ex, 0); + eh->eh_ecount--; } - ehp = (struct ext4_extent_header *)path->ep_bp->b_data; - path->ep_header = ehp; + + ex->e_blk = block; + ex->e_len = num; + + ext4_ext_dirty(ip, path + depth); + + ex--; + ex_blk = ex->e_blk; + ex_len = ext4_ext_get_actual_len(ex); + }; + + if (correct_index && eh->eh_ecount) + error = ext4_ext_correct_indexes(ip, path); + + /* + * If this leaf is free, we should + * remove it from index block above. + */ + if (error == 0 && eh->eh_ecount == 0 && path[depth].ep_data != NULL) + error = ext4_ext_rm_index(ip, path + depth); + +out: + return (error); +} + +static struct buf * +ext4_read_extent_tree_block(struct inode *ip, e4fs_daddr_t pblk, + int depth, int flags) +{ + struct m_ext2fs *fs; + struct ext4_extent_header *eh; + struct buf *bp; + int error; + + fs = ip->i_e2fs; + + error = bread(ip->i_devvp, fsbtodb(fs, pblk), + fs->e2fs_bsize, NOCRED, &bp); + if (error) { + brelse(bp); + return (NULL); + } + + eh = ext4_ext_block_header(bp->b_data); + if (eh->eh_depth != depth) { + ext2_fserr(fs, ip->i_uid, "unexpected eh_depth"); + goto err; + } + + error = ext4_ext_check_header(ip, eh); + if (error) + goto err; + + return (bp); + +err: + brelse(bp); + return (NULL); + +} + +static int inline +ext4_ext_more_to_rm(struct ext4_extent_path *path) +{ + + KASSERT(path->ep_index != NULL, + ("ext4_ext_more_to_rm: bad index from path")); + + if (path->ep_index < EXT_FIRST_INDEX(path->ep_header)) + return (0); + + if (path->ep_header->eh_ecount == path->index_count) + return (0); + + return (1); +} + +int +ext4_ext_remove_space(struct inode *ip, off_t length, int flags, + struct ucred *cred, struct thread *td) +{ + struct buf *bp; + struct ext4_extent_header *ehp; + struct ext4_extent_path *path; + int depth; + int i, error; + + ehp = (struct ext4_extent_header *)ip->i_db; + depth = ext4_ext_inode_depth(ip); + + error = ext4_ext_check_header(ip, ehp); + if(error) + return (error); + + path = malloc(sizeof(struct ext4_extent_path) * (depth + 1), + M_EXT2EXTENTS, M_WAITOK | M_ZERO); + if (!path) + return (ENOMEM); + + i = 0; + path[0].ep_header = ehp; + path[0].ep_depth = depth; + while (i >= 0 && error == 0) { + if (i == depth) { + /* This is leaf. */ + error = ext4_ext_rm_leaf(ip, path, length); + if (error) + break; + free(path[i].ep_data, M_EXT2EXTENTS); + path[i].ep_data = NULL; + i--; + continue; + } + + /* This is index. */ + if (!path[i].ep_header) + path[i].ep_header = + (struct ext4_extent_header *)path[i].ep_data; + + if (!path[i].ep_index) { + /* This level hasn't touched yet. */ + path[i].ep_index = EXT_LAST_INDEX(path[i].ep_header); + path[i].index_count = path[i].ep_header->eh_ecount + 1; + } else { + /* We've already was here, see at next index. */ + path[i].ep_index--; + } + + if (ext4_ext_more_to_rm(path + i)) { + memset(path + i + 1, 0, sizeof(*path)); + bp = ext4_read_extent_tree_block(ip, + ext4_ext_index_pblock(path[i].ep_index), + path[0].ep_depth - (i + 1), 0); + if (!bp) { + error = EIO; + break; + } + + ext4_ext_fill_path_bdata(&path[i+1], bp, + ext4_ext_index_pblock(path[i].ep_index)); + brelse(bp); + path[i].index_count = path[i].ep_header->eh_ecount; + i++; + } else { + if (path[i].ep_header->eh_ecount == 0 && i > 0) { + /* Index is empty, remove it. */ + error = ext4_ext_rm_index(ip, path + i); + } + free(path[i].ep_data, M_EXT2EXTENTS); + path[i].ep_data = NULL; + i--; + } + } + + if (path->ep_header->eh_ecount == 0) { + /* + * Truncate the tree to zero. + */ + ext4_ext_header(ip)->eh_depth = 0; + ext4_ext_header(ip)->eh_max = ext4_ext_space_root(ip); + ext4_ext_dirty(ip, path); + } - path->ep_depth = i; - path->ep_ext = NULL; - path->ep_index = NULL; - path->ep_is_sparse = false; + ext4_ext_drop_refs(path); + free(path, M_EXT2EXTENTS); - ext4_ext_binsearch(ip, path, lbn, first_lbn, last_lbn); - return (path); + return (error); } Index: sys/fs/ext2fs/ext2_extern.h =================================================================== --- sys/fs/ext2fs/ext2_extern.h +++ sys/fs/ext2fs/ext2_extern.h @@ -51,7 +51,7 @@ int ext2_add_entry(struct vnode *, struct ext2fs_direct_2 *); int ext2_alloc(struct inode *, daddr_t, e4fs_daddr_t, int, struct ucred *, e4fs_daddr_t *); -daddr_t ext2_allocfacl(struct inode *ip); +daddr_t ext2_allocmeta(struct inode *ip); int ext2_balloc(struct inode *, e2fs_lbn_t, int, struct ucred *, struct buf **, int); int ext2_blkatoff(struct vnode *, off_t, char **, struct buf **); @@ -60,6 +60,7 @@ e2fs_daddr_t); int ext2_bmap(struct vop_bmap_args *); int ext2_bmaparray(struct vnode *, daddr_t, daddr_t *, int *, int *); +int ext4_bmapext(struct vnode *, int32_t, int64_t *, int *, int *); void ext2_clusteracct(struct m_ext2fs *, char *, int, daddr_t, int); void ext2_dirbad(struct inode *ip, doff_t offset, char *how); void ext2_ei2i(struct ext2fs_dinode *, struct inode *); @@ -100,6 +101,7 @@ int *, doff_t *, doff_t *, doff_t *, struct ext2fs_searchslot *); int ext2_gd_csum_verify(struct m_ext2fs *fs, struct cdev *dev); void ext2_gd_csum_set(struct m_ext2fs *fs); +void ext2_fserr(struct m_ext2fs *, uid_t, char *); /* Flags to low-level allocation routines. Index: sys/fs/ext2fs/ext2_inode.c =================================================================== --- sys/fs/ext2fs/ext2_inode.c +++ sys/fs/ext2fs/ext2_inode.c @@ -55,9 +55,6 @@ #include #include -static int ext2_indirtrunc(struct inode *, daddr_t, daddr_t, - daddr_t, int, e4fs_daddr_t *); - /* * Update the access, modified, and inode change times as specified by the * IN_ACCESS, IN_UPDATE, and IN_CHANGE flags respectively. Write the inode @@ -103,12 +100,123 @@ #define SINGLE 0 /* index of single indirect block */ #define DOUBLE 1 /* index of double indirect block */ #define TRIPLE 2 /* index of triple indirect block */ + +/* + * Release blocks associated with the inode ip and stored in the indirect + * block bn. Blocks are free'd in LIFO order up to (but not including) + * lastbn. If level is greater than SINGLE, the block is an indirect block + * and recursive calls to indirtrunc must be used to cleanse other indirect + * blocks. + * + * NB: triple indirect blocks are untested. + */ +static int +ext2_indirtrunc(struct inode *ip, daddr_t lbn, daddr_t dbn, + daddr_t lastbn, int level, e4fs_daddr_t *countp) +{ + struct buf *bp; + struct m_ext2fs *fs = ip->i_e2fs; + struct vnode *vp; + e2fs_daddr_t *bap, *copy; + int i, nblocks, error = 0, allerror = 0; + e2fs_lbn_t nb, nlbn, last; + e4fs_daddr_t blkcount, factor, blocksreleased = 0; + + /* + * Calculate index in current block of last + * block to be kept. -1 indicates the entire + * block so we need not calculate the index. + */ + factor = 1; + for (i = SINGLE; i < level; i++) + factor *= NINDIR(fs); + last = lastbn; + if (lastbn > 0) + last /= factor; + nblocks = btodb(fs->e2fs_bsize); + /* + * Get buffer of block pointers, zero those entries corresponding + * to blocks to be free'd, and update on disk copy first. Since + * double(triple) indirect before single(double) indirect, calls + * to bmap on these blocks will fail. However, we already have + * the on disk address, so we have to set the b_blkno field + * explicitly instead of letting bread do everything for us. + */ + vp = ITOV(ip); + bp = getblk(vp, lbn, (int)fs->e2fs_bsize, 0, 0, 0); + if ((bp->b_flags & (B_DONE | B_DELWRI)) == 0) { + bp->b_iocmd = BIO_READ; + if (bp->b_bcount > bp->b_bufsize) + panic("ext2_indirtrunc: bad buffer size"); + bp->b_blkno = dbn; + vfs_busy_pages(bp, 0); + bp->b_iooffset = dbtob(bp->b_blkno); + bstrategy(bp); + error = bufwait(bp); + } + if (error) { + brelse(bp); + *countp = 0; + return (error); + } + bap = (e2fs_daddr_t *)bp->b_data; + copy = malloc(fs->e2fs_bsize, M_TEMP, M_WAITOK); + bcopy((caddr_t)bap, (caddr_t)copy, (u_int)fs->e2fs_bsize); + bzero((caddr_t)&bap[last + 1], + (NINDIR(fs) - (last + 1)) * sizeof(e2fs_daddr_t)); + if (last == -1) + bp->b_flags |= B_INVAL; + if (DOINGASYNC(vp)) { + bdwrite(bp); + } else { + error = bwrite(bp); + if (error) + allerror = error; + } + bap = copy; + + /* + * Recursively free totally unused blocks. + */ + for (i = NINDIR(fs) - 1, nlbn = lbn + 1 - i * factor; i > last; + i--, nlbn += factor) { + nb = bap[i]; + if (nb == 0) + continue; + if (level > SINGLE) { + if ((error = ext2_indirtrunc(ip, nlbn, + fsbtodb(fs, nb), (int32_t)-1, level - 1, &blkcount)) != 0) + allerror = error; + blocksreleased += blkcount; + } + ext2_blkfree(ip, nb, fs->e2fs_bsize); + blocksreleased += nblocks; + } + + /* + * Recursively free last partial block. + */ + if (level > SINGLE && lastbn >= 0) { + last = lastbn % factor; + nb = bap[i]; + if (nb != 0) { + if ((error = ext2_indirtrunc(ip, nlbn, fsbtodb(fs, nb), + last, level - 1, &blkcount)) != 0) + allerror = error; + blocksreleased += blkcount; + } + } + free(copy, M_TEMP); + *countp = blocksreleased; + return (allerror); +} + /* * Truncate the inode oip to at most length size, freeing the * disk blocks. */ -int -ext2_truncate(struct vnode *vp, off_t length, int flags, struct ucred *cred, +static int +ext2_ind_truncate(struct vnode *vp, off_t length, int flags, struct ucred *cred, struct thread *td) { struct vnode *ovp = vp; @@ -132,26 +240,6 @@ bo = &ovp->v_bufobj; #endif - ASSERT_VOP_LOCKED(vp, "ext2_truncate"); - - if (length < 0) - return (EINVAL); - - if (ovp->v_type == VLNK && - oip->i_size < ovp->v_mount->mnt_maxsymlinklen) { -#ifdef INVARIANTS - if (length != 0) - panic("ext2_truncate: partial truncate of symlink"); -#endif - bzero((char *)&oip->i_shortlink, (u_int)oip->i_size); - oip->i_size = 0; - oip->i_flag |= IN_CHANGE | IN_UPDATE; - return (ext2_update(ovp, 1)); - } - if (oip->i_size == length) { - oip->i_flag |= IN_CHANGE | IN_UPDATE; - return (ext2_update(ovp, 0)); - } fs = oip->i_e2fs; osize = oip->i_size; /* @@ -361,115 +449,131 @@ return (allerror); } -/* - * Release blocks associated with the inode ip and stored in the indirect - * block bn. Blocks are free'd in LIFO order up to (but not including) - * lastbn. If level is greater than SINGLE, the block is an indirect block - * and recursive calls to indirtrunc must be used to cleanse other indirect - * blocks. - * - * NB: triple indirect blocks are untested. - */ - static int -ext2_indirtrunc(struct inode *ip, daddr_t lbn, daddr_t dbn, - daddr_t lastbn, int level, e4fs_daddr_t *countp) +ext2_ext_truncate(struct vnode *vp, off_t length, int flags, + struct ucred *cred, struct thread *td) { + struct vnode *ovp = vp; + int32_t lastblock; + struct m_ext2fs *fs; + struct inode *oip; struct buf *bp; - struct m_ext2fs *fs = ip->i_e2fs; - struct vnode *vp; - e2fs_daddr_t *bap, *copy; - int i, nblocks, error = 0, allerror = 0; - e2fs_lbn_t nb, nlbn, last; - e4fs_daddr_t blkcount, factor, blocksreleased = 0; + uint32_t lbn, offset; + int error, size; + off_t osize; - /* - * Calculate index in current block of last - * block to be kept. -1 indicates the entire - * block so we need not calculate the index. - */ - factor = 1; - for (i = SINGLE; i < level; i++) - factor *= NINDIR(fs); - last = lastbn; - if (lastbn > 0) - last /= factor; - nblocks = btodb(fs->e2fs_bsize); - /* - * Get buffer of block pointers, zero those entries corresponding - * to blocks to be free'd, and update on disk copy first. Since - * double(triple) indirect before single(double) indirect, calls - * to bmap on these blocks will fail. However, we already have - * the on disk address, so we have to set the b_blkno field - * explicitly instead of letting bread do everything for us. - */ - vp = ITOV(ip); - bp = getblk(vp, lbn, (int)fs->e2fs_bsize, 0, 0, 0); - if ((bp->b_flags & (B_DONE | B_DELWRI)) == 0) { - bp->b_iocmd = BIO_READ; - if (bp->b_bcount > bp->b_bufsize) - panic("ext2_indirtrunc: bad buffer size"); - bp->b_blkno = dbn; - vfs_busy_pages(bp, 0); - bp->b_iooffset = dbtob(bp->b_blkno); - bstrategy(bp); - error = bufwait(bp); + oip = VTOI(ovp); + fs = oip->i_e2fs; + osize = oip->i_size; + + if (osize < length) { + if (length > oip->i_e2fs->e2fs_maxfilesize) { + return (EFBIG); + } + vnode_pager_setsize(ovp, length); + offset = blkoff(fs, length - 1); + lbn = lblkno(fs, length - 1); + flags |= BA_CLRBUF; + error = ext2_balloc(oip, lbn, offset + 1, cred, &bp, flags); + if (error) { + vnode_pager_setsize(vp, osize); + return (error); + } + oip->i_size = length; + if (bp->b_bufsize == fs->e2fs_bsize) + bp->b_flags |= B_CLUSTEROK; + if (flags & IO_SYNC) + bwrite(bp); + else if (DOINGASYNC(ovp)) + bdwrite(bp); + else + bawrite(bp); + oip->i_flag |= IN_CHANGE | IN_UPDATE; + return (ext2_update(ovp, !DOINGASYNC(ovp))); } - if (error) { - brelse(bp); - *countp = 0; + + lastblock = (length + fs->e2fs_bsize - 1) / fs->e2fs_bsize; + error = ext4_ext_remove_space(oip, lastblock, flags, cred, td); + if (error) return (error); - } - bap = (e2fs_daddr_t *)bp->b_data; - copy = malloc(fs->e2fs_bsize, M_TEMP, M_WAITOK); - bcopy((caddr_t)bap, (caddr_t)copy, (u_int)fs->e2fs_bsize); - bzero((caddr_t)&bap[last + 1], - (NINDIR(fs) - (last + 1)) * sizeof(e2fs_daddr_t)); - if (last == -1) - bp->b_flags |= B_INVAL; - if (DOINGASYNC(vp)) { - bdwrite(bp); - } else { - error = bwrite(bp); - if (error) - allerror = error; - } - bap = copy; - /* - * Recursively free totally unused blocks. - */ - for (i = NINDIR(fs) - 1, nlbn = lbn + 1 - i * factor; i > last; - i--, nlbn += factor) { - nb = bap[i]; - if (nb == 0) - continue; - if (level > SINGLE) { - if ((error = ext2_indirtrunc(ip, nlbn, - fsbtodb(fs, nb), (int32_t)-1, level - 1, &blkcount)) != 0) - allerror = error; - blocksreleased += blkcount; + offset = blkoff(fs, length); + if (offset == 0) { + oip->i_size = length; + } else { + lbn = lblkno(fs, length); + flags |= BA_CLRBUF; + error = ext2_balloc(oip, lbn, offset, cred, &bp, flags); + if (error) { + return (error); } - ext2_blkfree(ip, nb, fs->e2fs_bsize); - blocksreleased += nblocks; + oip->i_size = length; + size = blksize(fs, oip, lbn); + bzero((char *)bp->b_data + offset, (u_int)(size - offset)); + allocbuf(bp, size); + if (bp->b_bufsize == fs->e2fs_bsize) + bp->b_flags |= B_CLUSTEROK; + if (flags & IO_SYNC) + bwrite(bp); + else if (DOINGASYNC(ovp)) + bdwrite(bp); + else + bawrite(bp); } - /* - * Recursively free last partial block. - */ - if (level > SINGLE && lastbn >= 0) { - last = lastbn % factor; - nb = bap[i]; - if (nb != 0) { - if ((error = ext2_indirtrunc(ip, nlbn, fsbtodb(fs, nb), - last, level - 1, &blkcount)) != 0) - allerror = error; - blocksreleased += blkcount; - } + oip->i_size = osize; + error = vtruncbuf(ovp, cred, length, (int)fs->e2fs_bsize); + if (error) + return (error); + + vnode_pager_setsize(ovp, length); + + oip->i_size = length; + oip->i_flag |= IN_CHANGE | IN_UPDATE; + error = ext2_update(ovp, !DOINGASYNC(ovp)); + + return (error); +} + +/* + * Truncate the inode ip to at most length size, freeing the + * disk blocks. + */ +int +ext2_truncate(struct vnode *vp, off_t length, int flags, struct ucred *cred, + struct thread *td) +{ + struct inode *ip; + int error; + + ASSERT_VOP_LOCKED(vp, "ext2_truncate"); + + if (length < 0) + return (EINVAL); + + ip = VTOI(vp); + if (vp->v_type == VLNK && + ip->i_size < vp->v_mount->mnt_maxsymlinklen) { +#ifdef INVARIANTS + if (length != 0) + panic("ext2_truncate: partial truncate of symlink"); +#endif + bzero((char *)&ip->i_shortlink, (u_int)ip->i_size); + ip->i_size = 0; + ip->i_flag |= IN_CHANGE | IN_UPDATE; + return (ext2_update(vp, 1)); } - free(copy, M_TEMP); - *countp = blocksreleased; - return (allerror); + if (ip->i_size == length) { + ip->i_flag |= IN_CHANGE | IN_UPDATE; + return (ext2_update(vp, 0)); + } + + if (ip->i_flag & IN_E4EXTENTS) + error = ext2_ext_truncate(vp, length, flags, cred, td); + else + error = ext2_ind_truncate(vp, length, flags, cred, td); + + return (error); } /* @@ -491,7 +595,8 @@ if (ip->i_nlink <= 0) { ext2_extattr_free(ip); error = ext2_truncate(vp, (off_t)0, 0, NOCRED, td); - ip->i_rdev = 0; + if (!(ip->i_flag & IN_E4EXTENTS)) + ip->i_rdev = 0; mode = ip->i_mode; ip->i_mode = 0; ip->i_flag |= IN_CHANGE | IN_UPDATE; Index: sys/fs/ext2fs/ext2_inode_cnv.c =================================================================== --- sys/fs/ext2fs/ext2_inode_cnv.c +++ sys/fs/ext2fs/ext2_inode_cnv.c @@ -51,7 +51,7 @@ printf("Inode: %5ju", (uintmax_t)in->i_number); printf( /* "Inode: %5d" */ - " Type: %10s Mode: 0x%o Flags: 0x%x Version: %d acl: 0x%llx\n", + " Type: %10s Mode: 0x%o Flags: 0x%x Version: %d acl: 0x%lx\n", "n/a", in->i_mode, in->i_flags, in->i_gen, in->i_facl); printf("User: %5u Group: %5u Size: %ju\n", in->i_uid, in->i_gid, (uintmax_t)in->i_size); @@ -62,19 +62,22 @@ printf("mtime: 0x%x", in->i_mtime); if (E2DI_HAS_XTIME(in)) printf("crtime %#x ", in->i_birthtime); - printf("BLOCKS:"); - for (i = 0; i < (in->i_blocks <= 24 ? (in->i_blocks + 1) / 2 : 12); i++) - printf(" %d", in->i_db[i]); - printf("\n"); - printf("Extents:\n"); - ehp = (struct ext4_extent_header *)in->i_db; - printf("Header (magic 0x%x entries %d max %d depth %d gen %d)\n", - ehp->eh_magic, ehp->eh_ecount, ehp->eh_max, ehp->eh_depth, - ehp->eh_gen); - ep = (struct ext4_extent *)(char *)(ehp + 1); - printf("Index (blk %d len %d start_lo %d start_hi %d)\n", ep->e_blk, - ep->e_len, ep->e_start_lo, ep->e_start_hi); - printf("\n"); + if (in->i_flag & IN_E4EXTENTS) { + printf("Extents:\n"); + ehp = (struct ext4_extent_header *)in->i_db; + printf("Header (magic 0x%x entries %d max %d depth %d gen %d)\n", + ehp->eh_magic, ehp->eh_ecount, ehp->eh_max, ehp->eh_depth, + ehp->eh_gen); + ep = (struct ext4_extent *)(char *)(ehp + 1); + printf("Index (blk %d len %d start_lo %d start_hi %d)\n", ep->e_blk, + ep->e_len, ep->e_start_lo, ep->e_start_hi); + printf("\n"); + } else { + printf("BLOCKS:"); + for (i = 0; i < (in->i_blocks <= 24 ? (in->i_blocks + 1) / 2 : 12); i++) + printf(" %d", in->i_db[i]); + printf("\n"); + } } #endif /* EXT2FS_DEBUG */ @@ -84,8 +87,6 @@ void ext2_ei2i(struct ext2fs_dinode *ei, struct inode *ip) { - int i; - ip->i_nlink = ei->e2di_nlink; /* * Godmar thinks - if the link count is zero, then the inode is @@ -127,10 +128,7 @@ ip->i_uid |= (uint32_t)ei->e2di_uid_high << 16; ip->i_gid |= (uint32_t)ei->e2di_gid_high << 16; - for (i = 0; i < EXT2_NDADDR; i++) - ip->i_db[i] = ei->e2di_blocks[i]; - for (i = 0; i < EXT2_NIADDR; i++) - ip->i_ib[i] = ei->e2di_blocks[EXT2_NDIR_BLOCKS + i]; + memcpy(ip->i_data, ei->e2di_blocks, sizeof(ei->e2di_blocks)); } /* @@ -139,8 +137,6 @@ void ext2_i2ei(struct inode *ip, struct ext2fs_dinode *ei) { - int i; - ei->e2di_mode = ip->i_mode; ei->e2di_nlink = ip->i_nlink; /* @@ -177,8 +173,5 @@ ei->e2di_gid = ip->i_gid & 0xffff; ei->e2di_gid_high = ip->i_gid >> 16 & 0xffff; - for (i = 0; i < EXT2_NDADDR; i++) - ei->e2di_blocks[i] = ip->i_db[i]; - for (i = 0; i < EXT2_NIADDR; i++) - ei->e2di_blocks[EXT2_NDIR_BLOCKS + i] = ip->i_ib[i]; + memcpy(ei->e2di_blocks, ip->i_data, sizeof(ei->e2di_blocks)); } Index: sys/fs/ext2fs/ext2_subr.c =================================================================== --- sys/fs/ext2fs/ext2_subr.c +++ sys/fs/ext2fs/ext2_subr.c @@ -66,63 +66,22 @@ struct m_ext2fs *fs; struct buf *bp; e2fs_lbn_t lbn; - int bsize, error; - daddr_t newblk; - struct ext4_extent *ep; - struct ext4_extent_path path; + int error, bsize; ip = VTOI(vp); fs = ip->i_e2fs; lbn = lblkno(fs, offset); bsize = blksize(fs, ip, lbn); - *bpp = NULL; - /* - * IN_E4EXTENTS requires special treatment as we can otherwise fall - * back to the normal path. - */ - if (!(ip->i_flag & IN_E4EXTENTS)) - goto normal; - - memset(&path, 0, sizeof(path)); - if (ext4_ext_find_extent(fs, ip, lbn, &path) == NULL) - goto normal; - ep = path.ep_ext; - if (ep == NULL) - goto normal; - - newblk = lbn - ep->e_blk + - (ep->e_start_lo | (daddr_t)ep->e_start_hi << 32); - - if (path.ep_bp != NULL) { - brelse(path.ep_bp); - path.ep_bp = NULL; - } - error = bread(ip->i_devvp, fsbtodb(fs, newblk), bsize, NOCRED, &bp); - if (error != 0) { + if ((error = bread(vp, lbn, bsize, NOCRED, &bp)) != 0) { brelse(bp); return (error); } if (res) *res = (char *)bp->b_data + blkoff(fs, offset); - /* - * If IN_E4EXTENTS is enabled we would get a wrong offset so - * reset b_offset here. - */ - bp->b_offset = lbn * bsize; + *bpp = bp; - return (0); -normal: - if (*bpp == NULL) { - if ((error = bread(vp, lbn, bsize, NOCRED, &bp)) != 0) { - brelse(bp); - return (error); - } - if (res) - *res = (char *)bp->b_data + blkoff(fs, offset); - *bpp = bp; - } return (0); } Index: sys/fs/ext2fs/ext2_vfsops.c =================================================================== --- sys/fs/ext2fs/ext2_vfsops.c +++ sys/fs/ext2fs/ext2_vfsops.c @@ -992,6 +992,7 @@ } #ifdef EXT2FS_DEBUG ext2_print_inode(ip); + ext4_ext_print_extent_tree_status(ip); #endif bqrelse(bp); Index: sys/fs/ext2fs/ext2_vnops.c =================================================================== --- sys/fs/ext2fs/ext2_vnops.c +++ sys/fs/ext2fs/ext2_vnops.c @@ -89,10 +89,48 @@ #include #include +//!!!==================================== DEBUG: REMOVE IT: VVVVVV +#define IO_NOREUSE 0x0200 /* VMIO data won't be reused */ + +static void +b_io_dismiss(struct buf *bp, int ioflag, bool release) +{ + + KASSERT((ioflag & IO_NOREUSE) == 0 || (ioflag & IO_VMIO) != 0, + ("buf %p non-VMIO noreuse", bp)); + + if ((ioflag & IO_DIRECT) != 0) + bp->b_flags |= B_DIRECT; + if ((ioflag & (IO_VMIO | IO_DIRECT)) != 0 && LIST_EMPTY(&bp->b_dep)) { + bp->b_flags |= B_RELBUF; + if ((ioflag & IO_NOREUSE) != 0) + bp->b_flags |= B_NOREUSE; + if (release) + brelse(bp); + } else if (release) + bqrelse(bp); +} + +void vfs_bio_brelse(struct buf *bp, int ioflag); +void +vfs_bio_brelse(struct buf *bp, int ioflag) +{ + + b_io_dismiss(bp, ioflag, true); +} + +void vfs_bio_set_flags(struct buf *bp, int ioflag); +void +vfs_bio_set_flags(struct buf *bp, int ioflag) +{ + + b_io_dismiss(bp, ioflag, false); +} +//!!!==================================== DEBUG: REMOVE IT: ^^^^^^ + + static int ext2_makeinode(int mode, struct vnode *, struct vnode **, struct componentname *); static void ext2_itimes_locked(struct vnode *); -static int ext4_ext_read(struct vop_read_args *); -static int ext2_ind_read(struct vop_read_args *); static vop_access_t ext2_access; static int ext2_chmod(struct vnode *, int, struct ucred *, struct thread *); @@ -630,7 +668,8 @@ * Want to be able to use this to make badblock * inodes, so don't truncate the dev number. */ - ip->i_rdev = vap->va_rdev; + if (!(ip->i_flag & IN_E4EXTENTS)) + ip->i_rdev = vap->va_rdev; } /* * Remove inode, then reload it through VFS_VGET so it is @@ -1542,7 +1581,12 @@ if (vp->v_type == VBLK || vp->v_type == VCHR) panic("ext2_strategy: spec"); if (bp->b_blkno == bp->b_lblkno) { - error = ext2_bmaparray(vp, bp->b_lblkno, &blkno, NULL, NULL); + + if (VTOI(ap->a_vp)->i_flag & IN_E4EXTENTS) + error = ext4_bmapext(vp, bp->b_lblkno, &blkno, NULL, NULL); + else + error = ext2_bmaparray(vp, bp->b_lblkno, &blkno, NULL, NULL); + bp->b_blkno = blkno; if (error) { bp->b_error = error; @@ -1990,28 +2034,6 @@ { struct vnode *vp; struct inode *ip; - int error; - - vp = ap->a_vp; - ip = VTOI(vp); - - /* EXT4_EXT_LOCK(ip); */ - if (ip->i_flag & IN_E4EXTENTS) - error = ext4_ext_read(ap); - else - error = ext2_ind_read(ap); - /* EXT4_EXT_UNLOCK(ip); */ - return (error); -} - -/* - * Vnode op for reading. - */ -static int -ext2_ind_read(struct vop_read_args *ap) -{ - struct vnode *vp; - struct inode *ip; struct uio *uio; struct m_ext2fs *fs; struct buf *bp; @@ -2131,122 +2153,6 @@ } /* - * this function handles ext4 extents block mapping - */ -static int -ext4_ext_read(struct vop_read_args *ap) -{ - static unsigned char zeroes[EXT2_MAX_BLOCK_SIZE]; - struct vnode *vp; - struct inode *ip; - struct uio *uio; - struct m_ext2fs *fs; - struct buf *bp; - struct ext4_extent nex, *ep; - struct ext4_extent_path path; - daddr_t lbn, newblk; - off_t bytesinfile; - int cache_type; - ssize_t orig_resid; - int error; - long size, xfersize, blkoffset; - - vp = ap->a_vp; - ip = VTOI(vp); - uio = ap->a_uio; - memset(&path, 0, sizeof(path)); - - orig_resid = uio->uio_resid; - KASSERT(orig_resid >= 0, ("%s: uio->uio_resid < 0", __func__)); - if (orig_resid == 0) - return (0); - KASSERT(uio->uio_offset >= 0, ("%s: uio->uio_offset < 0", __func__)); - fs = ip->i_e2fs; - if (uio->uio_offset < ip->i_size && uio->uio_offset >= fs->e2fs_maxfilesize) - return (EOVERFLOW); - - while (uio->uio_resid > 0) { - if ((bytesinfile = ip->i_size - uio->uio_offset) <= 0) - break; - lbn = lblkno(fs, uio->uio_offset); - size = blksize(fs, ip, lbn); - blkoffset = blkoff(fs, uio->uio_offset); - - xfersize = fs->e2fs_fsize - blkoffset; - xfersize = MIN(xfersize, uio->uio_resid); - xfersize = MIN(xfersize, bytesinfile); - - /* get block from ext4 extent cache */ - cache_type = ext4_ext_in_cache(ip, lbn, &nex); - switch (cache_type) { - case EXT4_EXT_CACHE_NO: - ext4_ext_find_extent(fs, ip, lbn, &path); - if (path.ep_is_sparse) - ep = &path.ep_sparse_ext; - else - ep = path.ep_ext; - if (ep == NULL) - return (EIO); - - ext4_ext_put_cache(ip, ep, - path.ep_is_sparse ? EXT4_EXT_CACHE_GAP : EXT4_EXT_CACHE_IN); - - newblk = lbn - ep->e_blk + (ep->e_start_lo | - (daddr_t)ep->e_start_hi << 32); - - if (path.ep_bp != NULL) { - brelse(path.ep_bp); - path.ep_bp = NULL; - } - break; - - case EXT4_EXT_CACHE_GAP: - /* block has not been allocated yet */ - break; - - case EXT4_EXT_CACHE_IN: - newblk = lbn - nex.e_blk + (nex.e_start_lo | - (daddr_t)nex.e_start_hi << 32); - break; - - default: - panic("%s: invalid cache type", __func__); - } - - if (cache_type == EXT4_EXT_CACHE_GAP || - (cache_type == EXT4_EXT_CACHE_NO && path.ep_is_sparse)) { - if (xfersize > sizeof(zeroes)) - xfersize = sizeof(zeroes); - error = uiomove(zeroes, xfersize, uio); - if (error) - return (error); - } else { - error = bread(ip->i_devvp, fsbtodb(fs, newblk), size, - NOCRED, &bp); - if (error) { - brelse(bp); - return (error); - } - - size -= bp->b_resid; - if (size < xfersize) { - if (size == 0) { - bqrelse(bp); - break; - } - xfersize = size; - } - error = uiomove(bp->b_data + blkoffset, xfersize, uio); - bqrelse(bp); - if (error) - return (error); - } - } - - return (0); -} - -/* * Vnode op for writing. */ static int Index: sys/fs/ext2fs/ext2fs.h =================================================================== --- sys/fs/ext2fs/ext2fs.h +++ sys/fs/ext2fs/ext2fs.h @@ -385,7 +385,6 @@ /* * Macro-instructions used to manage several block sizes */ -#define EXT2_MAX_BLOCK_SIZE 4096 #define EXT2_MIN_BLOCK_LOG_SIZE 10 #define EXT2_BLOCK_SIZE(s) ((s)->e2fs_bsize) #define EXT2_ADDR_PER_BLOCK(s) (EXT2_BLOCK_SIZE(s) / sizeof(uint32_t)) Index: sys/fs/ext2fs/inode.h =================================================================== --- sys/fs/ext2fs/inode.h +++ sys/fs/ext2fs/inode.h @@ -107,8 +107,13 @@ uint32_t i_gen; /* Generation number. */ uint64_t i_facl; /* EA block number. */ uint32_t i_flags; /* Status flags (chflags). */ - uint32_t i_db[EXT2_NDADDR]; /* Direct disk blocks. */ - uint32_t i_ib[EXT2_NIADDR]; /* Indirect disk blocks. */ + union { + struct { + uint32_t i_db[EXT2_NDADDR]; /* Direct disk blocks. */ + uint32_t i_ib[EXT2_NIADDR]; /* Indirect disk blocks. */ + }; + uint32_t i_data[EXT2_NDADDR + EXT2_NIADDR]; + }; struct ext4_extent_cache i_ext_cache; /* cache for ext4 extent */ };