Index: sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dmu.c =================================================================== --- sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dmu.c +++ sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dmu.c @@ -1518,6 +1518,173 @@ dmu_buf_rele_array(dbp, numbufs, FTAG); return (err); } + +int +dmu_read_pages(objset_t *os, uint64_t object, vm_page_t *ma, int count, + int *rbehind, int *rahead, int last_size) +{ + struct sf_buf *sf; + vm_object_t vmobj; + vm_page_t m; + dmu_buf_t **dbp; + dmu_buf_t *db; + caddr_t va; + int numbufs, i; + int bufoff, pgoff, tocpy; + int mi, di; + int err; + + ASSERT3U(ma[0]->pindex + count - 1, ==, ma[count - 1]->pindex); + ASSERT(last_size <= PAGE_SIZE); + + err = dmu_buf_hold_array(os, object, IDX_TO_OFF(ma[0]->pindex), + IDX_TO_OFF(count - 1) + last_size, TRUE, FTAG, &numbufs, &dbp); + if (err != 0) + return (err); + +#ifdef DEBUG + IMPLY(last_size < PAGE_SIZE, *rahead == 0); + if (dbp[0]->db_offset != 0 || numbufs > 1) { + for (i = 0; i < numbufs; i++) { + ASSERT(ISP2(dbp[i]->db_size)); + ASSERT((dbp[i]->db_offset % dbp[i]->db_size) == 0); + ASSERT3U(dbp[i]->db_size, ==, dbp[0]->db_size); + } + } +#endif + + vmobj = ma[0]->object; + zfs_vmobject_wlock(vmobj); + + db = dbp[0]; + for (i = 0; i < *rbehind; i++) { + m = vm_page_grab(vmobj, ma[0]->pindex - 1 - i, + VM_ALLOC_NORMAL | VM_ALLOC_NOWAIT); + if (m == NULL) + break; + if (m->valid != 0) { + ASSERT3U(m->valid, ==, VM_PAGE_BITS_ALL); + vm_page_xunbusy(m); + break; + } + + ASSERT(db->db_size > PAGE_SIZE); + bufoff = IDX_TO_OFF(m->pindex) % db->db_size; + va = zfs_map_page(m, &sf); + bcopy((char *)db->db_data + bufoff, va, PAGESIZE); + zfs_unmap_page(sf); + m->valid = VM_PAGE_BITS_ALL; + vm_page_readahead_finish(m); + } + *rbehind = i; + + bufoff = IDX_TO_OFF(ma[0]->pindex) % db->db_size; + pgoff = 0; + for (mi = 0, di = 0; mi < count && di < numbufs; ) { + if (pgoff == 0) { + m = ma[mi]; + vm_page_assert_xbusied(m); + va = zfs_map_page(m, &sf); + } + if (bufoff == 0) + db = dbp[di]; + + ASSERT3U(IDX_TO_OFF(m->pindex) + pgoff, ==, + db->db_offset + bufoff); + + /* + * We do not need to clamp the copy size by the file + * size as the last block is zero-filled beyond the + * end of file anyway. + */ + tocpy = MIN(db->db_size - bufoff, PAGESIZE - pgoff); + bcopy((char *)db->db_data + bufoff, va + pgoff, tocpy); + + pgoff += tocpy; + ASSERT(pgoff <= PAGESIZE); + if (pgoff == PAGESIZE) { + zfs_unmap_page(sf); + m->valid = VM_PAGE_BITS_ALL; + ASSERT(mi < count); + mi++; + pgoff = 0; + } + + bufoff += tocpy; + ASSERT(bufoff <= db->db_size); + if (bufoff == db->db_size) { + ASSERT(di < numbufs); + di++; + bufoff = 0; + } + } + +#ifdef DEBUG + /* + * Three possibilities: + * - last requested page ends at a buffer boundary and , thus, + * all pages and buffers have been iterated; + * - all requested pages are filled, but the last buffer + * has not been exhausted; + * the read-ahead is possible only in this case; + * - all buffers have been read, but the last page has not been + * fully filled; + * this is only possible if the file has only a single buffer + * with a size that is not a multiple of the page size. + */ + if (mi == count) { + ASSERT(di >= numbufs - 1); + IMPLY(*rahead != 0, di == numbufs - 1); + IMPLY(*rahead != 0, bufoff != 0); + ASSERT(pgoff == 0); + } + if (di == numbufs) { + ASSERT(mi >= count - 1); + ASSERT(*rahead == 0); + IMPLY(pgoff == 0, mi == count); + if (pgoff != 0) { + ASSERT(mi == count - 1); + ASSERT((dbp[0]->db_size & PAGE_MASK) != 0); + } + } +#endif + if (pgoff != 0) { + bzero(va + pgoff, PAGESIZE - pgoff); + zfs_unmap_page(sf); + m->valid = VM_PAGE_BITS_ALL; + } + + for (i = 0; i < *rahead; i++) { + m = vm_page_grab(vmobj, ma[count - 1]->pindex + 1 + i, + VM_ALLOC_NORMAL | VM_ALLOC_NOWAIT); + if (m == NULL) + break; + if (m->valid != 0) { + ASSERT3U(m->valid, ==, VM_PAGE_BITS_ALL); + vm_page_xunbusy(m); + break; + } + + ASSERT(db->db_size > PAGE_SIZE); + bufoff = IDX_TO_OFF(m->pindex) % db->db_size; + tocpy = MIN(db->db_size - bufoff, PAGESIZE); + va = zfs_map_page(m, &sf); + bcopy((char *)db->db_data + bufoff, va, tocpy); + if (tocpy < PAGESIZE) { + ASSERT(i == *rahead - 1); + ASSERT((db->db_size & PAGE_MASK) != 0); + bzero(va + tocpy, PAGESIZE - tocpy); + } + zfs_unmap_page(sf); + m->valid = VM_PAGE_BITS_ALL; + vm_page_readahead_finish(m); + } + *rahead = i; + zfs_vmobject_wunlock(vmobj); + + dmu_buf_rele_array(dbp, numbufs, FTAG); + return (0); +} #endif /* illumos */ #endif /* _KERNEL */ Index: sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/dmu.h =================================================================== --- sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/dmu.h +++ sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/dmu.h @@ -750,6 +750,8 @@ #else int dmu_write_pages(objset_t *os, uint64_t object, uint64_t offset, uint64_t size, struct vm_page **ppa, dmu_tx_t *tx); +int dmu_read_pages(objset_t *os, uint64_t object, vm_page_t *ma, int count, + int *rbehind, int *rahead, int last_size); #endif #endif struct arc_buf *dmu_request_arcbuf(dmu_buf_t *handle, int size); Index: sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_vnops.c =================================================================== --- sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_vnops.c +++ sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_vnops.c @@ -4514,81 +4514,71 @@ } static int -zfs_getpages(struct vnode *vp, vm_page_t *m, int count, int *rbehind, +zfs_getpages(struct vnode *vp, vm_page_t *ma, int count, int *rbehind, int *rahead) { znode_t *zp = VTOZ(vp); zfsvfs_t *zfsvfs = zp->z_zfsvfs; objset_t *os = zp->z_zfsvfs->z_os; - vm_page_t mlast; vm_object_t object; - caddr_t va; - struct sf_buf *sf; - off_t startoff, endoff; - int i, error; - vm_pindex_t reqstart, reqend; - int lsize, size; + off_t start, end, obj_size; + uint_t blksz; + int pgsin_b, pgsin_a; + int error; - object = m[0]->object; - error = 0; ZFS_ENTER(zfsvfs); ZFS_VERIFY_ZP(zp); + object = ma[0]->object; zfs_vmobject_wlock(object); - if (m[count - 1]->valid != 0 && --count == 0) { - zfs_vmobject_wunlock(object); - goto out; - } - - mlast = m[count - 1]; - - if (IDX_TO_OFF(mlast->pindex) >= - object->un_pager.vnp.vnp_size) { - zfs_vmobject_wunlock(object); + obj_size = object->un_pager.vnp.vnp_size; + zfs_vmobject_wunlock(object); + if (IDX_TO_OFF(ma[count - 1]->pindex) >= obj_size) { ZFS_EXIT(zfsvfs); return (zfs_vm_pagerret_bad); } - VM_CNT_INC(v_vnodein); - VM_CNT_ADD(v_vnodepgsin, count); + start = IDX_TO_OFF(ma[0]->pindex); + end = IDX_TO_OFF(ma[count - 1]->pindex + 1); + blksz = zp->z_blksz; - lsize = PAGE_SIZE; - if (IDX_TO_OFF(mlast->pindex) + lsize > object->un_pager.vnp.vnp_size) - lsize = object->un_pager.vnp.vnp_size - - IDX_TO_OFF(mlast->pindex); - zfs_vmobject_wunlock(object); + pgsin_b = 0; + if (rbehind != NULL) { + pgsin_b = OFF_TO_IDX(start - rounddown(start, blksz)); + pgsin_b = MIN(*rbehind, pgsin_b); + } - for (i = 0; i < count; i++) { - size = PAGE_SIZE; - if (i == count - 1) - size = lsize; - va = zfs_map_page(m[i], &sf); - error = dmu_read(os, zp->z_id, IDX_TO_OFF(m[i]->pindex), - size, va, DMU_READ_PREFETCH); - if (size != PAGE_SIZE) - bzero(va + size, PAGE_SIZE - size); - zfs_unmap_page(sf); - if (error != 0) - goto out; + pgsin_a = 0; + if (rahead != NULL) { + pgsin_a = OFF_TO_IDX(roundup(end, blksz) - end); + if (end + IDX_TO_OFF(pgsin_a) >= obj_size) + pgsin_a = OFF_TO_IDX(round_page(obj_size) - end); + pgsin_a = MIN(*rahead, pgsin_a); } - zfs_vmobject_wlock(object); - for (i = 0; i < count; i++) - m[i]->valid = VM_PAGE_BITS_ALL; - zfs_vmobject_wunlock(object); + /* + * NB: we need to pass the exact byte size of the data that we expect + * to read after accounting for the file size. This is required because + * ZFS will panic if we request DMU to read beyond the end of the last + * allocated block. + */ + error = dmu_read_pages(os, zp->z_id, ma, count, &pgsin_b, &pgsin_a, + MIN(end, obj_size) - (end - PAGE_SIZE)); -out: ZFS_ACCESSTIME_STAMP(zfsvfs, zp); ZFS_EXIT(zfsvfs); - if (error == 0) { - if (rbehind) - *rbehind = 0; - if (rahead) - *rahead = 0; - return (zfs_vm_pagerret_ok); - } else + + if (error != 0) return (zfs_vm_pagerret_error); + + VM_CNT_INC(v_vnodein); + VM_CNT_ADD(v_vnodepgsin, count + pgsin_b + pgsin_a); + if (rbehind != NULL) + *rbehind = pgsin_b; + if (rahead != NULL) + *rahead = pgsin_a; + return (zfs_vm_pagerret_ok); } static int