diff --git a/include/sys/abd.h b/include/sys/abd.h
index 750f9986c1da..b48dc36423f7 100644
--- a/include/sys/abd.h
+++ b/include/sys/abd.h
@@ -1,226 +1,226 @@
 /*
  * CDDL HEADER START
  *
  * The contents of this file are subject to the terms of the
  * Common Development and Distribution License (the "License").
  * You may not use this file except in compliance with the License.
  *
  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
  * or https://opensource.org/licenses/CDDL-1.0.
  * See the License for the specific language governing permissions
  * and limitations under the License.
  *
  * When distributing Covered Code, include this CDDL HEADER in each
  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  * If applicable, add the following below this CDDL HEADER, with the
  * fields enclosed by brackets "[]" replaced with your own identifying
  * information: Portions Copyright [yyyy] [name of copyright owner]
  *
  * CDDL HEADER END
  */
 /*
  * Copyright (c) 2014 by Chunwei Chen. All rights reserved.
  * Copyright (c) 2016, 2019 by Delphix. All rights reserved.
  */
 
 #ifndef _ABD_H
 #define	_ABD_H
 
 #include <sys/isa_defs.h>
 #include <sys/debug.h>
 #include <sys/zfs_refcount.h>
 #include <sys/uio.h>
 
 #ifdef __cplusplus
 extern "C" {
 #endif
 
 typedef enum abd_flags {
 	ABD_FLAG_LINEAR		= 1 << 0, /* is buffer linear (or scattered)? */
 	ABD_FLAG_OWNER		= 1 << 1, /* does it own its data buffers? */
 	ABD_FLAG_META		= 1 << 2, /* does this represent FS metadata? */
 	ABD_FLAG_MULTI_ZONE  	= 1 << 3, /* pages split over memory zones */
 	ABD_FLAG_MULTI_CHUNK 	= 1 << 4, /* pages split over multiple chunks */
 	ABD_FLAG_LINEAR_PAGE 	= 1 << 5, /* linear but allocd from page */
 	ABD_FLAG_GANG		= 1 << 6, /* mult ABDs chained together */
 	ABD_FLAG_GANG_FREE	= 1 << 7, /* gang ABD is responsible for mem */
 	ABD_FLAG_ZEROS		= 1 << 8, /* ABD for zero-filled buffer */
 	ABD_FLAG_ALLOCD		= 1 << 9, /* we allocated the abd_t */
 } abd_flags_t;
 
 typedef struct abd {
 	abd_flags_t	abd_flags;
 	uint_t		abd_size;	/* excludes scattered abd_offset */
 	list_node_t	abd_gang_link;
 #ifdef ZFS_DEBUG
 	struct abd	*abd_parent;
 	zfs_refcount_t	abd_children;
 #endif
 	kmutex_t	abd_mtx;
 	union {
 		struct abd_scatter {
 			uint_t		abd_offset;
 #if defined(__FreeBSD__) && defined(_KERNEL)
 			void    *abd_chunks[1]; /* actually variable-length */
 #else
 			uint_t		abd_nents;
 			struct scatterlist *abd_sgl;
 #endif
 		} abd_scatter;
 		struct abd_linear {
 			void		*abd_buf;
 			struct scatterlist *abd_sgl; /* for LINEAR_PAGE */
 		} abd_linear;
 		struct abd_gang {
 			list_t abd_gang_chain;
 		} abd_gang;
 	} abd_u;
 } abd_t;
 
 typedef int abd_iter_func_t(void *buf, size_t len, void *priv);
 typedef int abd_iter_func2_t(void *bufa, void *bufb, size_t len, void *priv);
 
 extern int zfs_abd_scatter_enabled;
 
 /*
  * Allocations and deallocations
  */
 
 __attribute__((malloc))
 abd_t *abd_alloc(size_t, boolean_t);
 __attribute__((malloc))
 abd_t *abd_alloc_linear(size_t, boolean_t);
 __attribute__((malloc))
 abd_t *abd_alloc_gang(void);
 __attribute__((malloc))
 abd_t *abd_alloc_for_io(size_t, boolean_t);
 __attribute__((malloc))
 abd_t *abd_alloc_sametype(abd_t *, size_t);
 boolean_t abd_size_alloc_linear(size_t);
 void abd_gang_add(abd_t *, abd_t *, boolean_t);
 void abd_free(abd_t *);
 abd_t *abd_get_offset(abd_t *, size_t);
 abd_t *abd_get_offset_size(abd_t *, size_t, size_t);
 abd_t *abd_get_offset_struct(abd_t *, abd_t *, size_t, size_t);
 abd_t *abd_get_zeros(size_t);
 abd_t *abd_get_from_buf(void *, size_t);
 void abd_cache_reap_now(void);
 
 /*
  * Conversion to and from a normal buffer
  */
 
 void *abd_to_buf(abd_t *);
 void *abd_borrow_buf(abd_t *, size_t);
 void *abd_borrow_buf_copy(abd_t *, size_t);
 void abd_return_buf(abd_t *, void *, size_t);
 void abd_return_buf_copy(abd_t *, void *, size_t);
 void abd_take_ownership_of_buf(abd_t *, boolean_t);
 void abd_release_ownership_of_buf(abd_t *);
 
 /*
  * ABD operations
  */
 
 int abd_iterate_func(abd_t *, size_t, size_t, abd_iter_func_t *, void *);
 int abd_iterate_func2(abd_t *, abd_t *, size_t, size_t, size_t,
     abd_iter_func2_t *, void *);
 void abd_copy_off(abd_t *, abd_t *, size_t, size_t, size_t);
 void abd_copy_from_buf_off(abd_t *, const void *, size_t, size_t);
 void abd_copy_to_buf_off(void *, abd_t *, size_t, size_t);
 int abd_cmp(abd_t *, abd_t *);
 int abd_cmp_buf_off(abd_t *, const void *, size_t, size_t);
 void abd_zero_off(abd_t *, size_t, size_t);
 void abd_verify(abd_t *);
 
-void abd_raidz_gen_iterate(abd_t **cabds, abd_t *dabd,
-	ssize_t csize, ssize_t dsize, const unsigned parity,
+void abd_raidz_gen_iterate(abd_t **cabds, abd_t *dabd, size_t off,
+	size_t csize, size_t dsize, const unsigned parity,
 	void (*func_raidz_gen)(void **, const void *, size_t, size_t));
 void abd_raidz_rec_iterate(abd_t **cabds, abd_t **tabds,
-	ssize_t tsize, const unsigned parity,
+	size_t tsize, const unsigned parity,
 	void (*func_raidz_rec)(void **t, const size_t tsize, void **c,
 	const unsigned *mul),
 	const unsigned *mul);
 
 /*
  * Wrappers for calls with offsets of 0
  */
 
 static inline void
 abd_copy(abd_t *dabd, abd_t *sabd, size_t size)
 {
 	abd_copy_off(dabd, sabd, 0, 0, size);
 }
 
 static inline void
 abd_copy_from_buf(abd_t *abd, const void *buf, size_t size)
 {
 	abd_copy_from_buf_off(abd, buf, 0, size);
 }
 
 static inline void
 abd_copy_to_buf(void* buf, abd_t *abd, size_t size)
 {
 	abd_copy_to_buf_off(buf, abd, 0, size);
 }
 
 static inline int
 abd_cmp_buf(abd_t *abd, const void *buf, size_t size)
 {
 	return (abd_cmp_buf_off(abd, buf, 0, size));
 }
 
 static inline void
 abd_zero(abd_t *abd, size_t size)
 {
 	abd_zero_off(abd, 0, size);
 }
 
 /*
  * ABD type check functions
  */
 static inline boolean_t
 abd_is_linear(abd_t *abd)
 {
 	return ((abd->abd_flags & ABD_FLAG_LINEAR) ? B_TRUE : B_FALSE);
 }
 
 static inline boolean_t
 abd_is_linear_page(abd_t *abd)
 {
 	return ((abd->abd_flags & ABD_FLAG_LINEAR_PAGE) ? B_TRUE : B_FALSE);
 }
 
 static inline boolean_t
 abd_is_gang(abd_t *abd)
 {
 	return ((abd->abd_flags & ABD_FLAG_GANG) ? B_TRUE : B_FALSE);
 }
 
 static inline uint_t
 abd_get_size(abd_t *abd)
 {
 	return (abd->abd_size);
 }
 
 /*
  * Module lifecycle
  * Defined in each specific OS's abd_os.c
  */
 
 void abd_init(void);
 void abd_fini(void);
 
 /*
  * Linux ABD bio functions
  */
 #if defined(__linux__) && defined(_KERNEL)
 unsigned int abd_bio_map_off(struct bio *, abd_t *, unsigned int, size_t);
 unsigned long abd_nr_pages_off(abd_t *, unsigned int, size_t);
 #endif
 
 #ifdef __cplusplus
 }
 #endif
 
 #endif	/* _ABD_H */
diff --git a/module/zfs/abd.c b/module/zfs/abd.c
index d982f201c930..bcc6ddd5e81b 100644
--- a/module/zfs/abd.c
+++ b/module/zfs/abd.c
@@ -1,1177 +1,1173 @@
 /*
  * CDDL HEADER START
  *
  * The contents of this file are subject to the terms of the
  * Common Development and Distribution License (the "License").
  * You may not use this file except in compliance with the License.
  *
  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
  * or https://opensource.org/licenses/CDDL-1.0.
  * See the License for the specific language governing permissions
  * and limitations under the License.
  *
  * When distributing Covered Code, include this CDDL HEADER in each
  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  * If applicable, add the following below this CDDL HEADER, with the
  * fields enclosed by brackets "[]" replaced with your own identifying
  * information: Portions Copyright [yyyy] [name of copyright owner]
  *
  * CDDL HEADER END
  */
 /*
  * Copyright (c) 2014 by Chunwei Chen. All rights reserved.
  * Copyright (c) 2019 by Delphix. All rights reserved.
  */
 
 /*
  * ARC buffer data (ABD).
  *
  * ABDs are an abstract data structure for the ARC which can use two
  * different ways of storing the underlying data:
  *
  * (a) Linear buffer. In this case, all the data in the ABD is stored in one
  *     contiguous buffer in memory (from a zio_[data_]buf_* kmem cache).
  *
  *         +-------------------+
  *         | ABD (linear)      |
  *         |   abd_flags = ... |
  *         |   abd_size = ...  |     +--------------------------------+
  *         |   abd_buf ------------->| raw buffer of size abd_size    |
  *         +-------------------+     +--------------------------------+
  *              no abd_chunks
  *
  * (b) Scattered buffer. In this case, the data in the ABD is split into
  *     equal-sized chunks (from the abd_chunk_cache kmem_cache), with pointers
  *     to the chunks recorded in an array at the end of the ABD structure.
  *
  *         +-------------------+
  *         | ABD (scattered)   |
  *         |   abd_flags = ... |
  *         |   abd_size = ...  |
  *         |   abd_offset = 0  |                           +-----------+
  *         |   abd_chunks[0] ----------------------------->| chunk 0   |
  *         |   abd_chunks[1] ---------------------+        +-----------+
  *         |   ...             |                  |        +-----------+
  *         |   abd_chunks[N-1] ---------+         +------->| chunk 1   |
  *         +-------------------+        |                  +-----------+
  *                                      |                      ...
  *                                      |                  +-----------+
  *                                      +----------------->| chunk N-1 |
  *                                                         +-----------+
  *
  * In addition to directly allocating a linear or scattered ABD, it is also
  * possible to create an ABD by requesting the "sub-ABD" starting at an offset
  * within an existing ABD. In linear buffers this is simple (set abd_buf of
  * the new ABD to the starting point within the original raw buffer), but
  * scattered ABDs are a little more complex. The new ABD makes a copy of the
  * relevant abd_chunks pointers (but not the underlying data). However, to
  * provide arbitrary rather than only chunk-aligned starting offsets, it also
  * tracks an abd_offset field which represents the starting point of the data
  * within the first chunk in abd_chunks. For both linear and scattered ABDs,
  * creating an offset ABD marks the original ABD as the offset's parent, and the
  * original ABD's abd_children refcount is incremented. This data allows us to
  * ensure the root ABD isn't deleted before its children.
  *
  * Most consumers should never need to know what type of ABD they're using --
  * the ABD public API ensures that it's possible to transparently switch from
  * using a linear ABD to a scattered one when doing so would be beneficial.
  *
  * If you need to use the data within an ABD directly, if you know it's linear
  * (because you allocated it) you can use abd_to_buf() to access the underlying
  * raw buffer. Otherwise, you should use one of the abd_borrow_buf* functions
  * which will allocate a raw buffer if necessary. Use the abd_return_buf*
  * functions to return any raw buffers that are no longer necessary when you're
  * done using them.
  *
  * There are a variety of ABD APIs that implement basic buffer operations:
  * compare, copy, read, write, and fill with zeroes. If you need a custom
  * function which progressively accesses the whole ABD, use the abd_iterate_*
  * functions.
  *
  * As an additional feature, linear and scatter ABD's can be stitched together
  * by using the gang ABD type (abd_alloc_gang_abd()). This allows for
  * multiple ABDs to be viewed as a singular ABD.
  *
  * It is possible to make all ABDs linear by setting zfs_abd_scatter_enabled to
  * B_FALSE.
  */
 
 #include <sys/abd_impl.h>
 #include <sys/param.h>
 #include <sys/zio.h>
 #include <sys/zfs_context.h>
 #include <sys/zfs_znode.h>
 
 /* see block comment above for description */
 int zfs_abd_scatter_enabled = B_TRUE;
 
 void
 abd_verify(abd_t *abd)
 {
 #ifdef ZFS_DEBUG
 	ASSERT3U(abd->abd_size, <=, SPA_MAXBLOCKSIZE);
 	ASSERT3U(abd->abd_flags, ==, abd->abd_flags & (ABD_FLAG_LINEAR |
 	    ABD_FLAG_OWNER | ABD_FLAG_META | ABD_FLAG_MULTI_ZONE |
 	    ABD_FLAG_MULTI_CHUNK | ABD_FLAG_LINEAR_PAGE | ABD_FLAG_GANG |
 	    ABD_FLAG_GANG_FREE | ABD_FLAG_ZEROS | ABD_FLAG_ALLOCD));
 	IMPLY(abd->abd_parent != NULL, !(abd->abd_flags & ABD_FLAG_OWNER));
 	IMPLY(abd->abd_flags & ABD_FLAG_META, abd->abd_flags & ABD_FLAG_OWNER);
 	if (abd_is_linear(abd)) {
 		ASSERT3U(abd->abd_size, >, 0);
 		ASSERT3P(ABD_LINEAR_BUF(abd), !=, NULL);
 	} else if (abd_is_gang(abd)) {
 		uint_t child_sizes = 0;
 		for (abd_t *cabd = list_head(&ABD_GANG(abd).abd_gang_chain);
 		    cabd != NULL;
 		    cabd = list_next(&ABD_GANG(abd).abd_gang_chain, cabd)) {
 			ASSERT(list_link_active(&cabd->abd_gang_link));
 			child_sizes += cabd->abd_size;
 			abd_verify(cabd);
 		}
 		ASSERT3U(abd->abd_size, ==, child_sizes);
 	} else {
 		ASSERT3U(abd->abd_size, >, 0);
 		abd_verify_scatter(abd);
 	}
 #endif
 }
 
 static void
 abd_init_struct(abd_t *abd)
 {
 	list_link_init(&abd->abd_gang_link);
 	mutex_init(&abd->abd_mtx, NULL, MUTEX_DEFAULT, NULL);
 	abd->abd_flags = 0;
 #ifdef ZFS_DEBUG
 	zfs_refcount_create(&abd->abd_children);
 	abd->abd_parent = NULL;
 #endif
 	abd->abd_size = 0;
 }
 
 static void
 abd_fini_struct(abd_t *abd)
 {
 	mutex_destroy(&abd->abd_mtx);
 	ASSERT(!list_link_active(&abd->abd_gang_link));
 #ifdef ZFS_DEBUG
 	zfs_refcount_destroy(&abd->abd_children);
 #endif
 }
 
 abd_t *
 abd_alloc_struct(size_t size)
 {
 	abd_t *abd = abd_alloc_struct_impl(size);
 	abd_init_struct(abd);
 	abd->abd_flags |= ABD_FLAG_ALLOCD;
 	return (abd);
 }
 
 void
 abd_free_struct(abd_t *abd)
 {
 	abd_fini_struct(abd);
 	abd_free_struct_impl(abd);
 }
 
 /*
  * Allocate an ABD, along with its own underlying data buffers. Use this if you
  * don't care whether the ABD is linear or not.
  */
 abd_t *
 abd_alloc(size_t size, boolean_t is_metadata)
 {
 	if (abd_size_alloc_linear(size))
 		return (abd_alloc_linear(size, is_metadata));
 
 	VERIFY3U(size, <=, SPA_MAXBLOCKSIZE);
 
 	abd_t *abd = abd_alloc_struct(size);
 	abd->abd_flags |= ABD_FLAG_OWNER;
 	abd->abd_u.abd_scatter.abd_offset = 0;
 	abd_alloc_chunks(abd, size);
 
 	if (is_metadata) {
 		abd->abd_flags |= ABD_FLAG_META;
 	}
 	abd->abd_size = size;
 
 	abd_update_scatter_stats(abd, ABDSTAT_INCR);
 
 	return (abd);
 }
 
 /*
  * Allocate an ABD that must be linear, along with its own underlying data
  * buffer. Only use this when it would be very annoying to write your ABD
  * consumer with a scattered ABD.
  */
 abd_t *
 abd_alloc_linear(size_t size, boolean_t is_metadata)
 {
 	abd_t *abd = abd_alloc_struct(0);
 
 	VERIFY3U(size, <=, SPA_MAXBLOCKSIZE);
 
 	abd->abd_flags |= ABD_FLAG_LINEAR | ABD_FLAG_OWNER;
 	if (is_metadata) {
 		abd->abd_flags |= ABD_FLAG_META;
 	}
 	abd->abd_size = size;
 
 	if (is_metadata) {
 		ABD_LINEAR_BUF(abd) = zio_buf_alloc(size);
 	} else {
 		ABD_LINEAR_BUF(abd) = zio_data_buf_alloc(size);
 	}
 
 	abd_update_linear_stats(abd, ABDSTAT_INCR);
 
 	return (abd);
 }
 
 static void
 abd_free_linear(abd_t *abd)
 {
 	if (abd_is_linear_page(abd)) {
 		abd_free_linear_page(abd);
 		return;
 	}
 	if (abd->abd_flags & ABD_FLAG_META) {
 		zio_buf_free(ABD_LINEAR_BUF(abd), abd->abd_size);
 	} else {
 		zio_data_buf_free(ABD_LINEAR_BUF(abd), abd->abd_size);
 	}
 
 	abd_update_linear_stats(abd, ABDSTAT_DECR);
 }
 
 static void
 abd_free_gang(abd_t *abd)
 {
 	ASSERT(abd_is_gang(abd));
 	abd_t *cabd;
 
 	while ((cabd = list_head(&ABD_GANG(abd).abd_gang_chain)) != NULL) {
 		/*
 		 * We must acquire the child ABDs mutex to ensure that if it
 		 * is being added to another gang ABD we will set the link
 		 * as inactive when removing it from this gang ABD and before
 		 * adding it to the other gang ABD.
 		 */
 		mutex_enter(&cabd->abd_mtx);
 		ASSERT(list_link_active(&cabd->abd_gang_link));
 		list_remove(&ABD_GANG(abd).abd_gang_chain, cabd);
 		mutex_exit(&cabd->abd_mtx);
 		if (cabd->abd_flags & ABD_FLAG_GANG_FREE)
 			abd_free(cabd);
 	}
 	list_destroy(&ABD_GANG(abd).abd_gang_chain);
 }
 
 static void
 abd_free_scatter(abd_t *abd)
 {
 	abd_free_chunks(abd);
 	abd_update_scatter_stats(abd, ABDSTAT_DECR);
 }
 
 /*
  * Free an ABD.  Use with any kind of abd: those created with abd_alloc_*()
  * and abd_get_*(), including abd_get_offset_struct().
  *
  * If the ABD was created with abd_alloc_*(), the underlying data
  * (scatterlist or linear buffer) will also be freed.  (Subject to ownership
  * changes via abd_*_ownership_of_buf().)
  *
  * Unless the ABD was created with abd_get_offset_struct(), the abd_t will
  * also be freed.
  */
 void
 abd_free(abd_t *abd)
 {
 	if (abd == NULL)
 		return;
 
 	abd_verify(abd);
 #ifdef ZFS_DEBUG
 	IMPLY(abd->abd_flags & ABD_FLAG_OWNER, abd->abd_parent == NULL);
 #endif
 
 	if (abd_is_gang(abd)) {
 		abd_free_gang(abd);
 	} else if (abd_is_linear(abd)) {
 		if (abd->abd_flags & ABD_FLAG_OWNER)
 			abd_free_linear(abd);
 	} else {
 		if (abd->abd_flags & ABD_FLAG_OWNER)
 			abd_free_scatter(abd);
 	}
 
 #ifdef ZFS_DEBUG
 	if (abd->abd_parent != NULL) {
 		(void) zfs_refcount_remove_many(&abd->abd_parent->abd_children,
 		    abd->abd_size, abd);
 	}
 #endif
 
 	abd_fini_struct(abd);
 	if (abd->abd_flags & ABD_FLAG_ALLOCD)
 		abd_free_struct_impl(abd);
 }
 
 /*
  * Allocate an ABD of the same format (same metadata flag, same scatterize
  * setting) as another ABD.
  */
 abd_t *
 abd_alloc_sametype(abd_t *sabd, size_t size)
 {
 	boolean_t is_metadata = (sabd->abd_flags & ABD_FLAG_META) != 0;
 	if (abd_is_linear(sabd) &&
 	    !abd_is_linear_page(sabd)) {
 		return (abd_alloc_linear(size, is_metadata));
 	} else {
 		return (abd_alloc(size, is_metadata));
 	}
 }
 
 /*
  * Create gang ABD that will be the head of a list of ABD's. This is used
  * to "chain" scatter/gather lists together when constructing aggregated
  * IO's. To free this abd, abd_free() must be called.
  */
 abd_t *
 abd_alloc_gang(void)
 {
 	abd_t *abd = abd_alloc_struct(0);
 	abd->abd_flags |= ABD_FLAG_GANG | ABD_FLAG_OWNER;
 	list_create(&ABD_GANG(abd).abd_gang_chain,
 	    sizeof (abd_t), offsetof(abd_t, abd_gang_link));
 	return (abd);
 }
 
 /*
  * Add a child gang ABD to a parent gang ABDs chained list.
  */
 static void
 abd_gang_add_gang(abd_t *pabd, abd_t *cabd, boolean_t free_on_free)
 {
 	ASSERT(abd_is_gang(pabd));
 	ASSERT(abd_is_gang(cabd));
 
 	if (free_on_free) {
 		/*
 		 * If the parent is responsible for freeing the child gang
 		 * ABD we will just splice the child's children ABD list to
 		 * the parent's list and immediately free the child gang ABD
 		 * struct. The parent gang ABDs children from the child gang
 		 * will retain all the free_on_free settings after being
 		 * added to the parents list.
 		 */
 #ifdef ZFS_DEBUG
 		/*
 		 * If cabd had abd_parent, we have to drop it here.  We can't
 		 * transfer it to pabd, nor we can clear abd_size leaving it.
 		 */
 		if (cabd->abd_parent != NULL) {
 			(void) zfs_refcount_remove_many(
 			    &cabd->abd_parent->abd_children,
 			    cabd->abd_size, cabd);
 			cabd->abd_parent = NULL;
 		}
 #endif
 		pabd->abd_size += cabd->abd_size;
 		cabd->abd_size = 0;
 		list_move_tail(&ABD_GANG(pabd).abd_gang_chain,
 		    &ABD_GANG(cabd).abd_gang_chain);
 		ASSERT(list_is_empty(&ABD_GANG(cabd).abd_gang_chain));
 		abd_verify(pabd);
 		abd_free(cabd);
 	} else {
 		for (abd_t *child = list_head(&ABD_GANG(cabd).abd_gang_chain);
 		    child != NULL;
 		    child = list_next(&ABD_GANG(cabd).abd_gang_chain, child)) {
 			/*
 			 * We always pass B_FALSE for free_on_free as it is the
 			 * original child gang ABDs responsibility to determine
 			 * if any of its child ABDs should be free'd on the call
 			 * to abd_free().
 			 */
 			abd_gang_add(pabd, child, B_FALSE);
 		}
 		abd_verify(pabd);
 	}
 }
 
 /*
  * Add a child ABD to a gang ABD's chained list.
  */
 void
 abd_gang_add(abd_t *pabd, abd_t *cabd, boolean_t free_on_free)
 {
 	ASSERT(abd_is_gang(pabd));
 	abd_t *child_abd = NULL;
 
 	/*
 	 * If the child being added is a gang ABD, we will add the
 	 * child's ABDs to the parent gang ABD. This allows us to account
 	 * for the offset correctly in the parent gang ABD.
 	 */
 	if (abd_is_gang(cabd)) {
 		ASSERT(!list_link_active(&cabd->abd_gang_link));
 		return (abd_gang_add_gang(pabd, cabd, free_on_free));
 	}
 	ASSERT(!abd_is_gang(cabd));
 
 	/*
 	 * In order to verify that an ABD is not already part of
 	 * another gang ABD, we must lock the child ABD's abd_mtx
 	 * to check its abd_gang_link status. We unlock the abd_mtx
 	 * only after it is has been added to a gang ABD, which
 	 * will update the abd_gang_link's status. See comment below
 	 * for how an ABD can be in multiple gang ABD's simultaneously.
 	 */
 	mutex_enter(&cabd->abd_mtx);
 	if (list_link_active(&cabd->abd_gang_link)) {
 		/*
 		 * If the child ABD is already part of another
 		 * gang ABD then we must allocate a new
 		 * ABD to use a separate link. We mark the newly
 		 * allocated ABD with ABD_FLAG_GANG_FREE, before
 		 * adding it to the gang ABD's list, to make the
 		 * gang ABD aware that it is responsible to call
 		 * abd_free(). We use abd_get_offset() in order
 		 * to just allocate a new ABD but avoid copying the
 		 * data over into the newly allocated ABD.
 		 *
 		 * An ABD may become part of multiple gang ABD's. For
 		 * example, when writing ditto bocks, the same ABD
 		 * is used to write 2 or 3 locations with 2 or 3
 		 * zio_t's. Each of the zio's may be aggregated with
 		 * different adjacent zio's. zio aggregation uses gang
 		 * zio's, so the single ABD can become part of multiple
 		 * gang zio's.
 		 *
 		 * The ASSERT below is to make sure that if
 		 * free_on_free is passed as B_TRUE, the ABD can
 		 * not be in multiple gang ABD's. The gang ABD
 		 * can not be responsible for cleaning up the child
 		 * ABD memory allocation if the ABD can be in
 		 * multiple gang ABD's at one time.
 		 */
 		ASSERT3B(free_on_free, ==, B_FALSE);
 		child_abd = abd_get_offset(cabd, 0);
 		child_abd->abd_flags |= ABD_FLAG_GANG_FREE;
 	} else {
 		child_abd = cabd;
 		if (free_on_free)
 			child_abd->abd_flags |= ABD_FLAG_GANG_FREE;
 	}
 	ASSERT3P(child_abd, !=, NULL);
 
 	list_insert_tail(&ABD_GANG(pabd).abd_gang_chain, child_abd);
 	mutex_exit(&cabd->abd_mtx);
 	pabd->abd_size += child_abd->abd_size;
 }
 
 /*
  * Locate the ABD for the supplied offset in the gang ABD.
  * Return a new offset relative to the returned ABD.
  */
 abd_t *
 abd_gang_get_offset(abd_t *abd, size_t *off)
 {
 	abd_t *cabd;
 
 	ASSERT(abd_is_gang(abd));
 	ASSERT3U(*off, <, abd->abd_size);
 	for (cabd = list_head(&ABD_GANG(abd).abd_gang_chain); cabd != NULL;
 	    cabd = list_next(&ABD_GANG(abd).abd_gang_chain, cabd)) {
 		if (*off >= cabd->abd_size)
 			*off -= cabd->abd_size;
 		else
 			return (cabd);
 	}
 	VERIFY3P(cabd, !=, NULL);
 	return (cabd);
 }
 
 /*
  * Allocate a new ABD, using the provided struct (if non-NULL, and if
  * circumstances allow - otherwise allocate the struct).  The returned ABD will
  * point to offset off of sabd. It shares the underlying buffer data with sabd.
  * Use abd_free() to free.  sabd must not be freed while any derived ABDs exist.
  */
 static abd_t *
 abd_get_offset_impl(abd_t *abd, abd_t *sabd, size_t off, size_t size)
 {
 	abd_verify(sabd);
 	ASSERT3U(off + size, <=, sabd->abd_size);
 
 	if (abd_is_linear(sabd)) {
 		if (abd == NULL)
 			abd = abd_alloc_struct(0);
 		/*
 		 * Even if this buf is filesystem metadata, we only track that
 		 * if we own the underlying data buffer, which is not true in
 		 * this case. Therefore, we don't ever use ABD_FLAG_META here.
 		 */
 		abd->abd_flags |= ABD_FLAG_LINEAR;
 
 		ABD_LINEAR_BUF(abd) = (char *)ABD_LINEAR_BUF(sabd) + off;
 	} else if (abd_is_gang(sabd)) {
 		size_t left = size;
 		if (abd == NULL) {
 			abd = abd_alloc_gang();
 		} else {
 			abd->abd_flags |= ABD_FLAG_GANG;
 			list_create(&ABD_GANG(abd).abd_gang_chain,
 			    sizeof (abd_t), offsetof(abd_t, abd_gang_link));
 		}
 
 		abd->abd_flags &= ~ABD_FLAG_OWNER;
 		for (abd_t *cabd = abd_gang_get_offset(sabd, &off);
 		    cabd != NULL && left > 0;
 		    cabd = list_next(&ABD_GANG(sabd).abd_gang_chain, cabd)) {
 			int csize = MIN(left, cabd->abd_size - off);
 
 			abd_t *nabd = abd_get_offset_size(cabd, off, csize);
 			abd_gang_add(abd, nabd, B_TRUE);
 			left -= csize;
 			off = 0;
 		}
 		ASSERT3U(left, ==, 0);
 	} else {
 		abd = abd_get_offset_scatter(abd, sabd, off, size);
 	}
 
 	ASSERT3P(abd, !=, NULL);
 	abd->abd_size = size;
 #ifdef ZFS_DEBUG
 	abd->abd_parent = sabd;
 	(void) zfs_refcount_add_many(&sabd->abd_children, abd->abd_size, abd);
 #endif
 	return (abd);
 }
 
 /*
  * Like abd_get_offset_size(), but memory for the abd_t is provided by the
  * caller.  Using this routine can improve performance by avoiding the cost
  * of allocating memory for the abd_t struct, and updating the abd stats.
  * Usually, the provided abd is returned, but in some circumstances (FreeBSD,
  * if sabd is scatter and size is more than 2 pages) a new abd_t may need to
  * be allocated.  Therefore callers should be careful to use the returned
  * abd_t*.
  */
 abd_t *
 abd_get_offset_struct(abd_t *abd, abd_t *sabd, size_t off, size_t size)
 {
 	abd_t *result;
 	abd_init_struct(abd);
 	result = abd_get_offset_impl(abd, sabd, off, size);
 	if (result != abd)
 		abd_fini_struct(abd);
 	return (result);
 }
 
 abd_t *
 abd_get_offset(abd_t *sabd, size_t off)
 {
 	size_t size = sabd->abd_size > off ? sabd->abd_size - off : 0;
 	VERIFY3U(size, >, 0);
 	return (abd_get_offset_impl(NULL, sabd, off, size));
 }
 
 abd_t *
 abd_get_offset_size(abd_t *sabd, size_t off, size_t size)
 {
 	ASSERT3U(off + size, <=, sabd->abd_size);
 	return (abd_get_offset_impl(NULL, sabd, off, size));
 }
 
 /*
  * Return a size scatter ABD containing only zeros.
  */
 abd_t *
 abd_get_zeros(size_t size)
 {
 	ASSERT3P(abd_zero_scatter, !=, NULL);
 	ASSERT3U(size, <=, SPA_MAXBLOCKSIZE);
 	return (abd_get_offset_size(abd_zero_scatter, 0, size));
 }
 
 /*
  * Allocate a linear ABD structure for buf.
  */
 abd_t *
 abd_get_from_buf(void *buf, size_t size)
 {
 	abd_t *abd = abd_alloc_struct(0);
 
 	VERIFY3U(size, <=, SPA_MAXBLOCKSIZE);
 
 	/*
 	 * Even if this buf is filesystem metadata, we only track that if we
 	 * own the underlying data buffer, which is not true in this case.
 	 * Therefore, we don't ever use ABD_FLAG_META here.
 	 */
 	abd->abd_flags |= ABD_FLAG_LINEAR;
 	abd->abd_size = size;
 
 	ABD_LINEAR_BUF(abd) = buf;
 
 	return (abd);
 }
 
 /*
  * Get the raw buffer associated with a linear ABD.
  */
 void *
 abd_to_buf(abd_t *abd)
 {
 	ASSERT(abd_is_linear(abd));
 	abd_verify(abd);
 	return (ABD_LINEAR_BUF(abd));
 }
 
 /*
  * Borrow a raw buffer from an ABD without copying the contents of the ABD
  * into the buffer. If the ABD is scattered, this will allocate a raw buffer
  * whose contents are undefined. To copy over the existing data in the ABD, use
  * abd_borrow_buf_copy() instead.
  */
 void *
 abd_borrow_buf(abd_t *abd, size_t n)
 {
 	void *buf;
 	abd_verify(abd);
 	ASSERT3U(abd->abd_size, >=, n);
 	if (abd_is_linear(abd)) {
 		buf = abd_to_buf(abd);
 	} else {
 		buf = zio_buf_alloc(n);
 	}
 #ifdef ZFS_DEBUG
 	(void) zfs_refcount_add_many(&abd->abd_children, n, buf);
 #endif
 	return (buf);
 }
 
 void *
 abd_borrow_buf_copy(abd_t *abd, size_t n)
 {
 	void *buf = abd_borrow_buf(abd, n);
 	if (!abd_is_linear(abd)) {
 		abd_copy_to_buf(buf, abd, n);
 	}
 	return (buf);
 }
 
 /*
  * Return a borrowed raw buffer to an ABD. If the ABD is scattered, this will
  * not change the contents of the ABD and will ASSERT that you didn't modify
  * the buffer since it was borrowed. If you want any changes you made to buf to
  * be copied back to abd, use abd_return_buf_copy() instead.
  */
 void
 abd_return_buf(abd_t *abd, void *buf, size_t n)
 {
 	abd_verify(abd);
 	ASSERT3U(abd->abd_size, >=, n);
 #ifdef ZFS_DEBUG
 	(void) zfs_refcount_remove_many(&abd->abd_children, n, buf);
 #endif
 	if (abd_is_linear(abd)) {
 		ASSERT3P(buf, ==, abd_to_buf(abd));
 	} else {
 		ASSERT0(abd_cmp_buf(abd, buf, n));
 		zio_buf_free(buf, n);
 	}
 }
 
 void
 abd_return_buf_copy(abd_t *abd, void *buf, size_t n)
 {
 	if (!abd_is_linear(abd)) {
 		abd_copy_from_buf(abd, buf, n);
 	}
 	abd_return_buf(abd, buf, n);
 }
 
 void
 abd_release_ownership_of_buf(abd_t *abd)
 {
 	ASSERT(abd_is_linear(abd));
 	ASSERT(abd->abd_flags & ABD_FLAG_OWNER);
 
 	/*
 	 * abd_free() needs to handle LINEAR_PAGE ABD's specially.
 	 * Since that flag does not survive the
 	 * abd_release_ownership_of_buf() -> abd_get_from_buf() ->
 	 * abd_take_ownership_of_buf() sequence, we don't allow releasing
 	 * these "linear but not zio_[data_]buf_alloc()'ed" ABD's.
 	 */
 	ASSERT(!abd_is_linear_page(abd));
 
 	abd_verify(abd);
 
 	abd->abd_flags &= ~ABD_FLAG_OWNER;
 	/* Disable this flag since we no longer own the data buffer */
 	abd->abd_flags &= ~ABD_FLAG_META;
 
 	abd_update_linear_stats(abd, ABDSTAT_DECR);
 }
 
 
 /*
  * Give this ABD ownership of the buffer that it's storing. Can only be used on
  * linear ABDs which were allocated via abd_get_from_buf(), or ones allocated
  * with abd_alloc_linear() which subsequently released ownership of their buf
  * with abd_release_ownership_of_buf().
  */
 void
 abd_take_ownership_of_buf(abd_t *abd, boolean_t is_metadata)
 {
 	ASSERT(abd_is_linear(abd));
 	ASSERT(!(abd->abd_flags & ABD_FLAG_OWNER));
 	abd_verify(abd);
 
 	abd->abd_flags |= ABD_FLAG_OWNER;
 	if (is_metadata) {
 		abd->abd_flags |= ABD_FLAG_META;
 	}
 
 	abd_update_linear_stats(abd, ABDSTAT_INCR);
 }
 
 /*
  * Initializes an abd_iter based on whether the abd is a gang ABD
  * or just a single ABD.
  */
 static inline abd_t *
 abd_init_abd_iter(abd_t *abd, struct abd_iter *aiter, size_t off)
 {
 	abd_t *cabd = NULL;
 
 	if (abd_is_gang(abd)) {
 		cabd = abd_gang_get_offset(abd, &off);
 		if (cabd) {
 			abd_iter_init(aiter, cabd);
 			abd_iter_advance(aiter, off);
 		}
 	} else {
 		abd_iter_init(aiter, abd);
 		abd_iter_advance(aiter, off);
 	}
 	return (cabd);
 }
 
 /*
  * Advances an abd_iter. We have to be careful with gang ABD as
  * advancing could mean that we are at the end of a particular ABD and
  * must grab the ABD in the gang ABD's list.
  */
 static inline abd_t *
 abd_advance_abd_iter(abd_t *abd, abd_t *cabd, struct abd_iter *aiter,
     size_t len)
 {
 	abd_iter_advance(aiter, len);
 	if (abd_is_gang(abd) && abd_iter_at_end(aiter)) {
 		ASSERT3P(cabd, !=, NULL);
 		cabd = list_next(&ABD_GANG(abd).abd_gang_chain, cabd);
 		if (cabd) {
 			abd_iter_init(aiter, cabd);
 			abd_iter_advance(aiter, 0);
 		}
 	}
 	return (cabd);
 }
 
 int
 abd_iterate_func(abd_t *abd, size_t off, size_t size,
     abd_iter_func_t *func, void *private)
 {
 	struct abd_iter aiter;
 	int ret = 0;
 
 	if (size == 0)
 		return (0);
 
 	abd_verify(abd);
 	ASSERT3U(off + size, <=, abd->abd_size);
 
 	abd_t *c_abd = abd_init_abd_iter(abd, &aiter, off);
 
 	while (size > 0) {
 		IMPLY(abd_is_gang(abd), c_abd != NULL);
 
 		abd_iter_map(&aiter);
 
 		size_t len = MIN(aiter.iter_mapsize, size);
 		ASSERT3U(len, >, 0);
 
 		ret = func(aiter.iter_mapaddr, len, private);
 
 		abd_iter_unmap(&aiter);
 
 		if (ret != 0)
 			break;
 
 		size -= len;
 		c_abd = abd_advance_abd_iter(abd, c_abd, &aiter, len);
 	}
 
 	return (ret);
 }
 
 struct buf_arg {
 	void *arg_buf;
 };
 
 static int
 abd_copy_to_buf_off_cb(void *buf, size_t size, void *private)
 {
 	struct buf_arg *ba_ptr = private;
 
 	(void) memcpy(ba_ptr->arg_buf, buf, size);
 	ba_ptr->arg_buf = (char *)ba_ptr->arg_buf + size;
 
 	return (0);
 }
 
 /*
  * Copy abd to buf. (off is the offset in abd.)
  */
 void
 abd_copy_to_buf_off(void *buf, abd_t *abd, size_t off, size_t size)
 {
 	struct buf_arg ba_ptr = { buf };
 
 	(void) abd_iterate_func(abd, off, size, abd_copy_to_buf_off_cb,
 	    &ba_ptr);
 }
 
 static int
 abd_cmp_buf_off_cb(void *buf, size_t size, void *private)
 {
 	int ret;
 	struct buf_arg *ba_ptr = private;
 
 	ret = memcmp(buf, ba_ptr->arg_buf, size);
 	ba_ptr->arg_buf = (char *)ba_ptr->arg_buf + size;
 
 	return (ret);
 }
 
 /*
  * Compare the contents of abd to buf. (off is the offset in abd.)
  */
 int
 abd_cmp_buf_off(abd_t *abd, const void *buf, size_t off, size_t size)
 {
 	struct buf_arg ba_ptr = { (void *) buf };
 
 	return (abd_iterate_func(abd, off, size, abd_cmp_buf_off_cb, &ba_ptr));
 }
 
 static int
 abd_copy_from_buf_off_cb(void *buf, size_t size, void *private)
 {
 	struct buf_arg *ba_ptr = private;
 
 	(void) memcpy(buf, ba_ptr->arg_buf, size);
 	ba_ptr->arg_buf = (char *)ba_ptr->arg_buf + size;
 
 	return (0);
 }
 
 /*
  * Copy from buf to abd. (off is the offset in abd.)
  */
 void
 abd_copy_from_buf_off(abd_t *abd, const void *buf, size_t off, size_t size)
 {
 	struct buf_arg ba_ptr = { (void *) buf };
 
 	(void) abd_iterate_func(abd, off, size, abd_copy_from_buf_off_cb,
 	    &ba_ptr);
 }
 
 static int
 abd_zero_off_cb(void *buf, size_t size, void *private)
 {
 	(void) private;
 	(void) memset(buf, 0, size);
 	return (0);
 }
 
 /*
  * Zero out the abd from a particular offset to the end.
  */
 void
 abd_zero_off(abd_t *abd, size_t off, size_t size)
 {
 	(void) abd_iterate_func(abd, off, size, abd_zero_off_cb, NULL);
 }
 
 /*
  * Iterate over two ABDs and call func incrementally on the two ABDs' data in
  * equal-sized chunks (passed to func as raw buffers). func could be called many
  * times during this iteration.
  */
 int
 abd_iterate_func2(abd_t *dabd, abd_t *sabd, size_t doff, size_t soff,
     size_t size, abd_iter_func2_t *func, void *private)
 {
 	int ret = 0;
 	struct abd_iter daiter, saiter;
 	abd_t *c_dabd, *c_sabd;
 
 	if (size == 0)
 		return (0);
 
 	abd_verify(dabd);
 	abd_verify(sabd);
 
 	ASSERT3U(doff + size, <=, dabd->abd_size);
 	ASSERT3U(soff + size, <=, sabd->abd_size);
 
 	c_dabd = abd_init_abd_iter(dabd, &daiter, doff);
 	c_sabd = abd_init_abd_iter(sabd, &saiter, soff);
 
 	while (size > 0) {
 		IMPLY(abd_is_gang(dabd), c_dabd != NULL);
 		IMPLY(abd_is_gang(sabd), c_sabd != NULL);
 
 		abd_iter_map(&daiter);
 		abd_iter_map(&saiter);
 
 		size_t dlen = MIN(daiter.iter_mapsize, size);
 		size_t slen = MIN(saiter.iter_mapsize, size);
 		size_t len = MIN(dlen, slen);
 		ASSERT(dlen > 0 || slen > 0);
 
 		ret = func(daiter.iter_mapaddr, saiter.iter_mapaddr, len,
 		    private);
 
 		abd_iter_unmap(&saiter);
 		abd_iter_unmap(&daiter);
 
 		if (ret != 0)
 			break;
 
 		size -= len;
 		c_dabd =
 		    abd_advance_abd_iter(dabd, c_dabd, &daiter, len);
 		c_sabd =
 		    abd_advance_abd_iter(sabd, c_sabd, &saiter, len);
 	}
 
 	return (ret);
 }
 
 static int
 abd_copy_off_cb(void *dbuf, void *sbuf, size_t size, void *private)
 {
 	(void) private;
 	(void) memcpy(dbuf, sbuf, size);
 	return (0);
 }
 
 /*
  * Copy from sabd to dabd starting from soff and doff.
  */
 void
 abd_copy_off(abd_t *dabd, abd_t *sabd, size_t doff, size_t soff, size_t size)
 {
 	(void) abd_iterate_func2(dabd, sabd, doff, soff, size,
 	    abd_copy_off_cb, NULL);
 }
 
 static int
 abd_cmp_cb(void *bufa, void *bufb, size_t size, void *private)
 {
 	(void) private;
 	return (memcmp(bufa, bufb, size));
 }
 
 /*
  * Compares the contents of two ABDs.
  */
 int
 abd_cmp(abd_t *dabd, abd_t *sabd)
 {
 	ASSERT3U(dabd->abd_size, ==, sabd->abd_size);
 	return (abd_iterate_func2(dabd, sabd, 0, 0, dabd->abd_size,
 	    abd_cmp_cb, NULL));
 }
 
 /*
  * Iterate over code ABDs and a data ABD and call @func_raidz_gen.
  *
  * @cabds          parity ABDs, must have equal size
  * @dabd           data ABD. Can be NULL (in this case @dsize = 0)
  * @func_raidz_gen should be implemented so that its behaviour
  *                 is the same when taking linear and when taking scatter
  */
 void
-abd_raidz_gen_iterate(abd_t **cabds, abd_t *dabd,
-    ssize_t csize, ssize_t dsize, const unsigned parity,
+abd_raidz_gen_iterate(abd_t **cabds, abd_t *dabd, size_t off,
+    size_t csize, size_t dsize, const unsigned parity,
     void (*func_raidz_gen)(void **, const void *, size_t, size_t))
 {
 	int i;
-	ssize_t len, dlen;
+	size_t len, dlen;
 	struct abd_iter caiters[3];
 	struct abd_iter daiter;
 	void *caddrs[3];
 	unsigned long flags __maybe_unused = 0;
 	abd_t *c_cabds[3];
 	abd_t *c_dabd = NULL;
 
 	ASSERT3U(parity, <=, 3);
 	for (i = 0; i < parity; i++) {
 		abd_verify(cabds[i]);
-		ASSERT3U(csize, <=, cabds[i]->abd_size);
-		c_cabds[i] = abd_init_abd_iter(cabds[i], &caiters[i], 0);
+		ASSERT3U(off + csize, <=, cabds[i]->abd_size);
+		c_cabds[i] = abd_init_abd_iter(cabds[i], &caiters[i], off);
 	}
 
-	ASSERT3S(dsize, >=, 0);
 	if (dsize > 0) {
 		ASSERT(dabd);
 		abd_verify(dabd);
-		ASSERT3U(dsize, <=, dabd->abd_size);
-		c_dabd = abd_init_abd_iter(dabd, &daiter, 0);
+		ASSERT3U(off + dsize, <=, dabd->abd_size);
+		c_dabd = abd_init_abd_iter(dabd, &daiter, off);
 	}
 
 	abd_enter_critical(flags);
 	while (csize > 0) {
 		len = csize;
 		for (i = 0; i < parity; i++) {
 			IMPLY(abd_is_gang(cabds[i]), c_cabds[i] != NULL);
 			abd_iter_map(&caiters[i]);
 			caddrs[i] = caiters[i].iter_mapaddr;
 			len = MIN(caiters[i].iter_mapsize, len);
 		}
 
 		if (dsize > 0) {
 			IMPLY(abd_is_gang(dabd), c_dabd != NULL);
 			abd_iter_map(&daiter);
 			len = MIN(daiter.iter_mapsize, len);
 			dlen = len;
 		} else
 			dlen = 0;
 
 		/* must be progressive */
-		ASSERT3S(len, >, 0);
+		ASSERT3U(len, >, 0);
 		/*
 		 * The iterated function likely will not do well if each
 		 * segment except the last one is not multiple of 512 (raidz).
 		 */
 		ASSERT3U(((uint64_t)len & 511ULL), ==, 0);
 
 		func_raidz_gen(caddrs, daiter.iter_mapaddr, len, dlen);
 
 		for (i = parity-1; i >= 0; i--) {
 			abd_iter_unmap(&caiters[i]);
 			c_cabds[i] =
 			    abd_advance_abd_iter(cabds[i], c_cabds[i],
 			    &caiters[i], len);
 		}
 
 		if (dsize > 0) {
 			abd_iter_unmap(&daiter);
 			c_dabd =
 			    abd_advance_abd_iter(dabd, c_dabd, &daiter,
 			    dlen);
 			dsize -= dlen;
 		}
 
 		csize -= len;
-
-		ASSERT3S(dsize, >=, 0);
-		ASSERT3S(csize, >=, 0);
 	}
 	abd_exit_critical(flags);
 }
 
 /*
  * Iterate over code ABDs and data reconstruction target ABDs and call
  * @func_raidz_rec. Function maps at most 6 pages atomically.
  *
  * @cabds           parity ABDs, must have equal size
  * @tabds           rec target ABDs, at most 3
  * @tsize           size of data target columns
  * @func_raidz_rec  expects syndrome data in target columns. Function
  *                  reconstructs data and overwrites target columns.
  */
 void
 abd_raidz_rec_iterate(abd_t **cabds, abd_t **tabds,
-    ssize_t tsize, const unsigned parity,
+    size_t tsize, const unsigned parity,
     void (*func_raidz_rec)(void **t, const size_t tsize, void **c,
     const unsigned *mul),
     const unsigned *mul)
 {
 	int i;
-	ssize_t len;
+	size_t len;
 	struct abd_iter citers[3];
 	struct abd_iter xiters[3];
 	void *caddrs[3], *xaddrs[3];
 	unsigned long flags __maybe_unused = 0;
 	abd_t *c_cabds[3];
 	abd_t *c_tabds[3];
 
 	ASSERT3U(parity, <=, 3);
 
 	for (i = 0; i < parity; i++) {
 		abd_verify(cabds[i]);
 		abd_verify(tabds[i]);
 		ASSERT3U(tsize, <=, cabds[i]->abd_size);
 		ASSERT3U(tsize, <=, tabds[i]->abd_size);
 		c_cabds[i] =
 		    abd_init_abd_iter(cabds[i], &citers[i], 0);
 		c_tabds[i] =
 		    abd_init_abd_iter(tabds[i], &xiters[i], 0);
 	}
 
 	abd_enter_critical(flags);
 	while (tsize > 0) {
 		len = tsize;
 		for (i = 0; i < parity; i++) {
 			IMPLY(abd_is_gang(cabds[i]), c_cabds[i] != NULL);
 			IMPLY(abd_is_gang(tabds[i]), c_tabds[i] != NULL);
 			abd_iter_map(&citers[i]);
 			abd_iter_map(&xiters[i]);
 			caddrs[i] = citers[i].iter_mapaddr;
 			xaddrs[i] = xiters[i].iter_mapaddr;
 			len = MIN(citers[i].iter_mapsize, len);
 			len = MIN(xiters[i].iter_mapsize, len);
 		}
 
 		/* must be progressive */
 		ASSERT3S(len, >, 0);
 		/*
 		 * The iterated function likely will not do well if each
 		 * segment except the last one is not multiple of 512 (raidz).
 		 */
 		ASSERT3U(((uint64_t)len & 511ULL), ==, 0);
 
 		func_raidz_rec(xaddrs, len, caddrs, mul);
 
 		for (i = parity-1; i >= 0; i--) {
 			abd_iter_unmap(&xiters[i]);
 			abd_iter_unmap(&citers[i]);
 			c_tabds[i] =
 			    abd_advance_abd_iter(tabds[i], c_tabds[i],
 			    &xiters[i], len);
 			c_cabds[i] =
 			    abd_advance_abd_iter(cabds[i], c_cabds[i],
 			    &citers[i], len);
 		}
 
 		tsize -= len;
 		ASSERT3S(tsize, >=, 0);
 	}
 	abd_exit_critical(flags);
 }
diff --git a/module/zfs/vdev_raidz_math_impl.h b/module/zfs/vdev_raidz_math_impl.h
index 8ba7e0cd769d..5d77c5d046d5 100644
--- a/module/zfs/vdev_raidz_math_impl.h
+++ b/module/zfs/vdev_raidz_math_impl.h
@@ -1,1502 +1,1528 @@
 /*
  * CDDL HEADER START
  *
  * The contents of this file are subject to the terms of the
  * Common Development and Distribution License (the "License").
  * You may not use this file except in compliance with the License.
  *
  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
  * or https://opensource.org/licenses/CDDL-1.0.
  * See the License for the specific language governing permissions
  * and limitations under the License.
  *
  * When distributing Covered Code, include this CDDL HEADER in each
  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  * If applicable, add the following below this CDDL HEADER, with the
  * fields enclosed by brackets "[]" replaced with your own identifying
  * information: Portions Copyright [yyyy] [name of copyright owner]
  *
  * CDDL HEADER END
  */
 /*
  * Copyright (C) 2016 Gvozden Nešković. All rights reserved.
  */
 
 #ifndef _VDEV_RAIDZ_MATH_IMPL_H
 #define	_VDEV_RAIDZ_MATH_IMPL_H
 
 #include <sys/types.h>
 #include <sys/vdev_raidz_impl.h>
 
 #define	raidz_inline inline __attribute__((always_inline))
 #ifndef noinline
 #define	noinline __attribute__((noinline))
 #endif
 
 /*
  * Functions calculate multiplication constants for data reconstruction.
  * Coefficients depend on RAIDZ geometry, indexes of failed child vdevs, and
  * used parity columns for reconstruction.
  * @rr			RAIDZ row
  * @tgtidx		array of missing data indexes
  * @coeff		output array of coefficients. Array must be provided by
  *         		user and must hold minimum MUL_CNT values.
  */
 static noinline void
 raidz_rec_q_coeff(const raidz_row_t *rr, const int *tgtidx, unsigned *coeff)
 {
 	const unsigned ncols = rr->rr_cols;
 	const unsigned x = tgtidx[TARGET_X];
 
 	coeff[MUL_Q_X] = gf_exp2(255 - (ncols - x - 1));
 }
 
 static noinline void
 raidz_rec_r_coeff(const raidz_row_t *rr, const int *tgtidx, unsigned *coeff)
 {
 	const unsigned ncols = rr->rr_cols;
 	const unsigned x = tgtidx[TARGET_X];
 
 	coeff[MUL_R_X] = gf_exp4(255 - (ncols - x - 1));
 }
 
 static noinline void
 raidz_rec_pq_coeff(const raidz_row_t *rr, const int *tgtidx, unsigned *coeff)
 {
 	const unsigned ncols = rr->rr_cols;
 	const unsigned x = tgtidx[TARGET_X];
 	const unsigned y = tgtidx[TARGET_Y];
 	gf_t a, b, e;
 
 	a = gf_exp2(x + 255 - y);
 	b = gf_exp2(255 - (ncols - x - 1));
 	e = a ^ 0x01;
 
 	coeff[MUL_PQ_X] = gf_div(a, e);
 	coeff[MUL_PQ_Y] = gf_div(b, e);
 }
 
 static noinline void
 raidz_rec_pr_coeff(const raidz_row_t *rr, const int *tgtidx, unsigned *coeff)
 {
 	const unsigned ncols = rr->rr_cols;
 	const unsigned x = tgtidx[TARGET_X];
 	const unsigned y = tgtidx[TARGET_Y];
 
 	gf_t a, b, e;
 
 	a = gf_exp4(x + 255 - y);
 	b = gf_exp4(255 - (ncols - x - 1));
 	e = a ^ 0x01;
 
 	coeff[MUL_PR_X] = gf_div(a, e);
 	coeff[MUL_PR_Y] = gf_div(b, e);
 }
 
 static noinline void
 raidz_rec_qr_coeff(const raidz_row_t *rr, const int *tgtidx, unsigned *coeff)
 {
 	const unsigned ncols = rr->rr_cols;
 	const unsigned x = tgtidx[TARGET_X];
 	const unsigned y = tgtidx[TARGET_Y];
 
 	gf_t nx, ny, nxxy, nxyy, d;
 
 	nx = gf_exp2(ncols - x - 1);
 	ny = gf_exp2(ncols - y - 1);
 	nxxy = gf_mul(gf_mul(nx, nx), ny);
 	nxyy = gf_mul(gf_mul(nx, ny), ny);
 	d = nxxy ^ nxyy;
 
 	coeff[MUL_QR_XQ] = ny;
 	coeff[MUL_QR_X]	= gf_div(ny, d);
 	coeff[MUL_QR_YQ] = nx;
 	coeff[MUL_QR_Y]	= gf_div(nx, d);
 }
 
 static noinline void
 raidz_rec_pqr_coeff(const raidz_row_t *rr, const int *tgtidx, unsigned *coeff)
 {
 	const unsigned ncols = rr->rr_cols;
 	const unsigned x = tgtidx[TARGET_X];
 	const unsigned y = tgtidx[TARGET_Y];
 	const unsigned z = tgtidx[TARGET_Z];
 
 	gf_t nx, ny, nz, nxx, nyy, nzz, nyyz, nyzz, xd, yd;
 
 	nx = gf_exp2(ncols - x - 1);
 	ny = gf_exp2(ncols - y - 1);
 	nz = gf_exp2(ncols - z - 1);
 
 	nxx = gf_exp4(ncols - x - 1);
 	nyy = gf_exp4(ncols - y - 1);
 	nzz = gf_exp4(ncols - z - 1);
 
 	nyyz = gf_mul(gf_mul(ny, nz), ny);
 	nyzz = gf_mul(nzz, ny);
 
 	xd = gf_mul(nxx, ny) ^ gf_mul(nx, nyy) ^ nyyz ^
 	    gf_mul(nxx, nz) ^ gf_mul(nzz, nx) ^  nyzz;
 
 	yd = gf_inv(ny ^ nz);
 
 	coeff[MUL_PQR_XP] = gf_div(nyyz ^ nyzz, xd);
 	coeff[MUL_PQR_XQ] = gf_div(nyy ^ nzz, xd);
 	coeff[MUL_PQR_XR] = gf_div(ny ^ nz, xd);
 	coeff[MUL_PQR_YU] = nx;
 	coeff[MUL_PQR_YP] = gf_mul(nz, yd);
 	coeff[MUL_PQR_YQ] = yd;
 }
 
 /*
  * Method for zeroing a buffer (can be implemented using SIMD).
  * This method is used by multiple for gen/rec functions.
  *
  * @dc		Destination buffer
  * @dsize	Destination buffer size
  * @private	Unused
  */
 static int
 raidz_zero_abd_cb(void *dc, size_t dsize, void *private)
 {
 	v_t *dst = (v_t *)dc;
 	size_t i;
 
 	ZERO_DEFINE();
 
 	(void) private; /* unused */
 
 	ZERO(ZERO_D);
 
 	for (i = 0; i < dsize / sizeof (v_t); i += (2 * ZERO_STRIDE)) {
 		STORE(dst + i, ZERO_D);
 		STORE(dst + i + ZERO_STRIDE, ZERO_D);
 	}
 
 	return (0);
 }
 
 #define	raidz_zero(dabd, size)						\
 {									\
 	abd_iterate_func(dabd, 0, size, raidz_zero_abd_cb, NULL);	\
 }
 
 /*
  * Method for copying two buffers (can be implemented using SIMD).
  * This method is used by multiple for gen/rec functions.
  *
  * @dc		Destination buffer
  * @sc		Source buffer
  * @dsize	Destination buffer size
  * @ssize	Source buffer size
  * @private	Unused
  */
 static int
 raidz_copy_abd_cb(void *dc, void *sc, size_t size, void *private)
 {
 	v_t *dst = (v_t *)dc;
 	const v_t *src = (v_t *)sc;
 	size_t i;
 
 	COPY_DEFINE();
 
 	(void) private; /* unused */
 
 	for (i = 0; i < size / sizeof (v_t); i += (2 * COPY_STRIDE)) {
 		LOAD(src + i, COPY_D);
 		STORE(dst + i, COPY_D);
 
 		LOAD(src + i + COPY_STRIDE, COPY_D);
 		STORE(dst + i + COPY_STRIDE, COPY_D);
 	}
 
 	return (0);
 }
 
 
-#define	raidz_copy(dabd, sabd, size)					\
+#define	raidz_copy(dabd, sabd, off, size)				\
 {									\
-	abd_iterate_func2(dabd, sabd, 0, 0, size, raidz_copy_abd_cb, NULL);\
+	abd_iterate_func2(dabd, sabd, off, off, size, raidz_copy_abd_cb, \
+	    NULL);							\
 }
 
 /*
  * Method for adding (XORing) two buffers.
  * Source and destination are XORed together and result is stored in
  * destination buffer. This method is used by multiple for gen/rec functions.
  *
  * @dc		Destination buffer
  * @sc		Source buffer
  * @dsize	Destination buffer size
  * @ssize	Source buffer size
  * @private	Unused
  */
 static int
 raidz_add_abd_cb(void *dc, void *sc, size_t size, void *private)
 {
 	v_t *dst = (v_t *)dc;
 	const v_t *src = (v_t *)sc;
 	size_t i;
 
 	ADD_DEFINE();
 
 	(void) private; /* unused */
 
 	for (i = 0; i < size / sizeof (v_t); i += (2 * ADD_STRIDE)) {
 		LOAD(dst + i, ADD_D);
 		XOR_ACC(src + i, ADD_D);
 		STORE(dst + i, ADD_D);
 
 		LOAD(dst + i + ADD_STRIDE, ADD_D);
 		XOR_ACC(src + i + ADD_STRIDE, ADD_D);
 		STORE(dst + i + ADD_STRIDE, ADD_D);
 	}
 
 	return (0);
 }
 
-#define	raidz_add(dabd, sabd, size)					\
+#define	raidz_add(dabd, sabd, off, size)				\
 {									\
-	abd_iterate_func2(dabd, sabd, 0, 0, size, raidz_add_abd_cb, NULL);\
+	abd_iterate_func2(dabd, sabd, off, off, size, raidz_add_abd_cb, \
+	    NULL);							\
 }
 
 /*
  * Method for multiplying a buffer with a constant in GF(2^8).
  * Symbols from buffer are multiplied by a constant and result is stored
  * back in the same buffer.
  *
  * @dc		In/Out data buffer.
  * @size	Size of the buffer
  * @private	pointer to the multiplication constant (unsigned)
  */
 static int
 raidz_mul_abd_cb(void *dc, size_t size, void *private)
 {
 	const unsigned mul = *((unsigned *)private);
 	v_t *d = (v_t *)dc;
 	size_t i;
 
 	MUL_DEFINE();
 
 	for (i = 0; i < size / sizeof (v_t); i += (2 * MUL_STRIDE)) {
 		LOAD(d + i, MUL_D);
 		MUL(mul, MUL_D);
 		STORE(d + i, MUL_D);
 
 		LOAD(d + i + MUL_STRIDE, MUL_D);
 		MUL(mul, MUL_D);
 		STORE(d + i + MUL_STRIDE, MUL_D);
 	}
 
 	return (0);
 }
 
 
 /*
  * Syndrome generation/update macros
  *
  * Require LOAD(), XOR(), STORE(), MUL2(), and MUL4() macros
  */
 #define	P_D_SYNDROME(D, T, t)		\
 {					\
 	LOAD((t), T);			\
 	XOR(D, T);			\
 	STORE((t), T);			\
 }
 
 #define	Q_D_SYNDROME(D, T, t)		\
 {					\
 	LOAD((t), T);			\
 	MUL2(T);			\
 	XOR(D, T);			\
 	STORE((t), T);			\
 }
 
 #define	Q_SYNDROME(T, t)		\
 {					\
 	LOAD((t), T);			\
 	MUL2(T);			\
 	STORE((t), T);			\
 }
 
 #define	R_D_SYNDROME(D, T, t)		\
 {					\
 	LOAD((t), T);			\
 	MUL4(T);			\
 	XOR(D, T);			\
 	STORE((t), T);			\
 }
 
 #define	R_SYNDROME(T, t)		\
 {					\
 	LOAD((t), T);			\
 	MUL4(T);			\
 	STORE((t), T);			\
 }
 
 
 /*
  * PARITY CALCULATION
  *
  * Macros *_SYNDROME are used for parity/syndrome calculation.
  * *_D_SYNDROME() macros are used to calculate syndrome between 0 and
  * length of data column, and *_SYNDROME() macros are only for updating
  * the parity/syndrome if data column is shorter.
  *
  * P parity is calculated using raidz_add_abd().
+ *
+ * For CPU L2 cache blocking we process 64KB at a time.
  */
+#define	CHUNK		65536
 
 /*
  * Generate P parity (RAIDZ1)
  *
  * @rr	RAIDZ row
  */
 static raidz_inline void
 raidz_generate_p_impl(raidz_row_t * const rr)
 {
 	size_t c;
 	const size_t ncols = rr->rr_cols;
 	const size_t psize = rr->rr_col[CODE_P].rc_size;
 	abd_t *pabd = rr->rr_col[CODE_P].rc_abd;
-	size_t size;
-	abd_t *dabd;
+	size_t off, size;
 
 	raidz_math_begin();
 
-	/* start with first data column */
-	raidz_copy(pabd, rr->rr_col[1].rc_abd, psize);
+	for (off = 0; off < psize; off += CHUNK) {
 
-	for (c = 2; c < ncols; c++) {
-		dabd = rr->rr_col[c].rc_abd;
-		size = rr->rr_col[c].rc_size;
+		/* start with first data column */
+		size = MIN(CHUNK, psize - off);
+		raidz_copy(pabd, rr->rr_col[1].rc_abd, off, size);
 
-		/* add data column */
-		raidz_add(pabd, dabd, size);
+		for (c = 2; c < ncols; c++) {
+			size = rr->rr_col[c].rc_size;
+			if (size <= off)
+				continue;
+
+			/* add data column */
+			size = MIN(CHUNK, size - off);
+			abd_t *dabd = rr->rr_col[c].rc_abd;
+			raidz_add(pabd, dabd, off, size);
+		}
 	}
 
 	raidz_math_end();
 }
 
 
 /*
  * Generate PQ parity (RAIDZ2)
  * The function is called per data column.
  *
  * @c		array of pointers to parity (code) columns
  * @dc		pointer to data column
  * @csize	size of parity columns
  * @dsize	size of data column
  */
 static void
 raidz_gen_pq_add(void **c, const void *dc, const size_t csize,
     const size_t dsize)
 {
 	v_t *p = (v_t *)c[0];
 	v_t *q = (v_t *)c[1];
 	const v_t *d = (const v_t *)dc;
 	const v_t * const dend = d + (dsize / sizeof (v_t));
 	const v_t * const qend = q + (csize / sizeof (v_t));
 
 	GEN_PQ_DEFINE();
 
 	MUL2_SETUP();
 
 	for (; d < dend; d += GEN_PQ_STRIDE, p += GEN_PQ_STRIDE,
 	    q += GEN_PQ_STRIDE) {
 		LOAD(d, GEN_PQ_D);
 		P_D_SYNDROME(GEN_PQ_D, GEN_PQ_C, p);
 		Q_D_SYNDROME(GEN_PQ_D, GEN_PQ_C, q);
 	}
 	for (; q < qend; q += GEN_PQ_STRIDE) {
 		Q_SYNDROME(GEN_PQ_C, q);
 	}
 }
 
 
 /*
  * Generate PQ parity (RAIDZ2)
  *
  * @rr	RAIDZ row
  */
 static raidz_inline void
 raidz_generate_pq_impl(raidz_row_t * const rr)
 {
 	size_t c;
 	const size_t ncols = rr->rr_cols;
 	const size_t csize = rr->rr_col[CODE_P].rc_size;
-	size_t dsize;
+	size_t off, size, dsize;
 	abd_t *dabd;
 	abd_t *cabds[] = {
 		rr->rr_col[CODE_P].rc_abd,
 		rr->rr_col[CODE_Q].rc_abd
 	};
 
 	raidz_math_begin();
 
-	raidz_copy(cabds[CODE_P], rr->rr_col[2].rc_abd, csize);
-	raidz_copy(cabds[CODE_Q], rr->rr_col[2].rc_abd, csize);
+	for (off = 0; off < csize; off += CHUNK) {
+
+		size = MIN(CHUNK, csize - off);
+		raidz_copy(cabds[CODE_P], rr->rr_col[2].rc_abd, off, size);
+		raidz_copy(cabds[CODE_Q], rr->rr_col[2].rc_abd, off, size);
 
-	for (c = 3; c < ncols; c++) {
-		dabd = rr->rr_col[c].rc_abd;
-		dsize = rr->rr_col[c].rc_size;
+		for (c = 3; c < ncols; c++) {
+			dabd = rr->rr_col[c].rc_abd;
+			dsize = rr->rr_col[c].rc_size;
+			dsize = (dsize > off) ? MIN(CHUNK, dsize - off) : 0;
 
-		abd_raidz_gen_iterate(cabds, dabd, csize, dsize, 2,
-		    raidz_gen_pq_add);
+			abd_raidz_gen_iterate(cabds, dabd, off, size, dsize, 2,
+			    raidz_gen_pq_add);
+		}
 	}
 
 	raidz_math_end();
 }
 
 
 /*
  * Generate PQR parity (RAIDZ3)
  * The function is called per data column.
  *
  * @c		array of pointers to parity (code) columns
  * @dc		pointer to data column
  * @csize	size of parity columns
  * @dsize	size of data column
  */
 static void
 raidz_gen_pqr_add(void **c, const void *dc, const size_t csize,
     const size_t dsize)
 {
 	v_t *p = (v_t *)c[CODE_P];
 	v_t *q = (v_t *)c[CODE_Q];
 	v_t *r = (v_t *)c[CODE_R];
 	const v_t *d = (const v_t *)dc;
 	const v_t * const dend = d + (dsize / sizeof (v_t));
 	const v_t * const qend = q + (csize / sizeof (v_t));
 
 	GEN_PQR_DEFINE();
 
 	MUL2_SETUP();
 
 	for (; d < dend; d += GEN_PQR_STRIDE, p += GEN_PQR_STRIDE,
 	    q += GEN_PQR_STRIDE, r += GEN_PQR_STRIDE) {
 		LOAD(d, GEN_PQR_D);
 		P_D_SYNDROME(GEN_PQR_D, GEN_PQR_C, p);
 		Q_D_SYNDROME(GEN_PQR_D, GEN_PQR_C, q);
 		R_D_SYNDROME(GEN_PQR_D, GEN_PQR_C, r);
 	}
 	for (; q < qend; q += GEN_PQR_STRIDE, r += GEN_PQR_STRIDE) {
 		Q_SYNDROME(GEN_PQR_C, q);
 		R_SYNDROME(GEN_PQR_C, r);
 	}
 }
 
 
 /*
  * Generate PQR parity (RAIDZ3)
  *
  * @rr	RAIDZ row
  */
 static raidz_inline void
 raidz_generate_pqr_impl(raidz_row_t * const rr)
 {
 	size_t c;
 	const size_t ncols = rr->rr_cols;
 	const size_t csize = rr->rr_col[CODE_P].rc_size;
-	size_t dsize;
+	size_t off, size, dsize;
 	abd_t *dabd;
 	abd_t *cabds[] = {
 		rr->rr_col[CODE_P].rc_abd,
 		rr->rr_col[CODE_Q].rc_abd,
 		rr->rr_col[CODE_R].rc_abd
 	};
 
 	raidz_math_begin();
 
-	raidz_copy(cabds[CODE_P], rr->rr_col[3].rc_abd, csize);
-	raidz_copy(cabds[CODE_Q], rr->rr_col[3].rc_abd, csize);
-	raidz_copy(cabds[CODE_R], rr->rr_col[3].rc_abd, csize);
+	for (off = 0; off < csize; off += CHUNK) {
 
-	for (c = 4; c < ncols; c++) {
-		dabd = rr->rr_col[c].rc_abd;
-		dsize = rr->rr_col[c].rc_size;
+		size = MIN(CHUNK, csize - off);
+		raidz_copy(cabds[CODE_P], rr->rr_col[3].rc_abd, off, size);
+		raidz_copy(cabds[CODE_Q], rr->rr_col[3].rc_abd, off, size);
+		raidz_copy(cabds[CODE_R], rr->rr_col[3].rc_abd, off, size);
+
+		for (c = 4; c < ncols; c++) {
+			dabd = rr->rr_col[c].rc_abd;
+			dsize = rr->rr_col[c].rc_size;
+			dsize = (dsize > off) ? MIN(CHUNK, dsize - off) : 0;
 
-		abd_raidz_gen_iterate(cabds, dabd, csize, dsize, 3,
-		    raidz_gen_pqr_add);
+			abd_raidz_gen_iterate(cabds, dabd, off, size, dsize, 3,
+			    raidz_gen_pqr_add);
+		}
 	}
 
 	raidz_math_end();
 }
 
 
 /*
  * DATA RECONSTRUCTION
  *
  * Data reconstruction process consists of two phases:
  * 	- Syndrome calculation
  * 	- Data reconstruction
  *
  * Syndrome is calculated by generating parity using available data columns
  * and zeros in places of erasure. Existing parity is added to corresponding
  * syndrome value to obtain the [P|Q|R]syn values from equation:
  * 	P = Psyn + Dx + Dy + Dz
  * 	Q = Qsyn + 2^x * Dx + 2^y * Dy + 2^z * Dz
  * 	R = Rsyn + 4^x * Dx + 4^y * Dy + 4^z * Dz
  *
  * For data reconstruction phase, the corresponding equations are solved
  * for missing data (Dx, Dy, Dz). This generally involves multiplying known
  * symbols by an coefficient and adding them together. The multiplication
  * constant coefficients are calculated ahead of the operation in
  * raidz_rec_[q|r|pq|pq|qr|pqr]_coeff() functions.
  *
  * IMPLEMENTATION NOTE: RAID-Z block can have complex geometry, with "big"
  * and "short" columns.
  * For this reason, reconstruction is performed in minimum of
  * two steps. First, from offset 0 to short_size, then from short_size to
  * short_size. Calculation functions REC_[*]_BLOCK() are implemented to work
  * over both ranges. The split also enables removal of conditional expressions
  * from loop bodies, improving throughput of SIMD implementations.
  * For the best performance, all functions marked with raidz_inline attribute
  * must be inlined by compiler.
  *
  *    parity          data
  *    columns         columns
  * <----------> <------------------>
  *                   x       y  <----+ missing columns (x, y)
  *                   |       |
  * +---+---+---+---+-v-+---+-v-+---+   ^ 0
  * |   |   |   |   |   |   |   |   |   |
  * |   |   |   |   |   |   |   |   |   |
  * | P | Q | R | D | D | D | D | D |   |
  * |   |   |   | 0 | 1 | 2 | 3 | 4 |   |
  * |   |   |   |   |   |   |   |   |   v
  * |   |   |   |   |   +---+---+---+   ^ short_size
  * |   |   |   |   |   |               |
  * +---+---+---+---+---+               v big_size
  * <------------------> <---------->
  *      big columns     short columns
  *
  */
 
 
 
 
 /*
  * Reconstruct single data column using P parity
  *
  * @syn_method	raidz_add_abd()
  * @rec_method	not applicable
  *
  * @rr		RAIDZ row
  * @tgtidx	array of missing data indexes
  */
 static raidz_inline int
 raidz_reconstruct_p_impl(raidz_row_t *rr, const int *tgtidx)
 {
 	size_t c;
 	const size_t firstdc = rr->rr_firstdatacol;
 	const size_t ncols = rr->rr_cols;
 	const size_t x = tgtidx[TARGET_X];
 	const size_t xsize = rr->rr_col[x].rc_size;
 	abd_t *xabd = rr->rr_col[x].rc_abd;
-	size_t size;
-	abd_t *dabd;
+	size_t off, size;
 
 	if (xabd == NULL)
 		return (1 << CODE_P);
 
 	raidz_math_begin();
 
-	/* copy P into target */
-	raidz_copy(xabd, rr->rr_col[CODE_P].rc_abd, xsize);
+	for (off = 0; off < xsize; off += CHUNK) {
 
-	/* generate p_syndrome */
-	for (c = firstdc; c < ncols; c++) {
-		if (c == x)
-			continue;
+		/* copy P into target */
+		size = MIN(CHUNK, xsize - off);
+		raidz_copy(xabd, rr->rr_col[CODE_P].rc_abd, off, size);
 
-		dabd = rr->rr_col[c].rc_abd;
-		size = MIN(rr->rr_col[c].rc_size, xsize);
+		/* generate p_syndrome */
+		for (c = firstdc; c < ncols; c++) {
+			if (c == x)
+				continue;
+			size = rr->rr_col[c].rc_size;
+			if (size <= off)
+				continue;
 
-		raidz_add(xabd, dabd, size);
+			size = MIN(CHUNK, MIN(size, xsize) - off);
+			abd_t *dabd = rr->rr_col[c].rc_abd;
+			raidz_add(xabd, dabd, off, size);
+		}
 	}
 
 	raidz_math_end();
 
 	return (1 << CODE_P);
 }
 
 
 /*
  * Generate Q syndrome (Qsyn)
  *
  * @xc		array of pointers to syndrome columns
  * @dc		data column (NULL if missing)
  * @xsize	size of syndrome columns
  * @dsize	size of data column (0 if missing)
  */
 static void
 raidz_syn_q_abd(void **xc, const void *dc, const size_t xsize,
     const size_t dsize)
 {
 	v_t *x = (v_t *)xc[TARGET_X];
 	const v_t *d = (const v_t *)dc;
 	const v_t * const dend = d + (dsize / sizeof (v_t));
 	const v_t * const xend = x + (xsize / sizeof (v_t));
 
 	SYN_Q_DEFINE();
 
 	MUL2_SETUP();
 
 	for (; d < dend; d += SYN_STRIDE, x += SYN_STRIDE) {
 		LOAD(d, SYN_Q_D);
 		Q_D_SYNDROME(SYN_Q_D, SYN_Q_X, x);
 	}
 	for (; x < xend; x += SYN_STRIDE) {
 		Q_SYNDROME(SYN_Q_X, x);
 	}
 }
 
 
 /*
  * Reconstruct single data column using Q parity
  *
  * @syn_method	raidz_add_abd()
  * @rec_method	raidz_mul_abd_cb()
  *
  * @rr		RAIDZ row
  * @tgtidx	array of missing data indexes
  */
 static raidz_inline int
 raidz_reconstruct_q_impl(raidz_row_t *rr, const int *tgtidx)
 {
 	size_t c;
 	size_t dsize;
 	abd_t *dabd;
 	const size_t firstdc = rr->rr_firstdatacol;
 	const size_t ncols = rr->rr_cols;
 	const size_t x = tgtidx[TARGET_X];
 	abd_t *xabd = rr->rr_col[x].rc_abd;
 	const size_t xsize = rr->rr_col[x].rc_size;
 	abd_t *tabds[] = { xabd };
 
 	if (xabd == NULL)
 		return (1 << CODE_Q);
 
 	unsigned coeff[MUL_CNT];
 	raidz_rec_q_coeff(rr, tgtidx, coeff);
 
 	raidz_math_begin();
 
 	/* Start with first data column if present */
 	if (firstdc != x) {
-		raidz_copy(xabd, rr->rr_col[firstdc].rc_abd, xsize);
+		raidz_copy(xabd, rr->rr_col[firstdc].rc_abd, 0, xsize);
 	} else {
 		raidz_zero(xabd, xsize);
 	}
 
 	/* generate q_syndrome */
 	for (c = firstdc+1; c < ncols; c++) {
 		if (c == x) {
 			dabd = NULL;
 			dsize = 0;
 		} else {
 			dabd = rr->rr_col[c].rc_abd;
 			dsize = rr->rr_col[c].rc_size;
 		}
 
-		abd_raidz_gen_iterate(tabds, dabd, xsize, dsize, 1,
+		abd_raidz_gen_iterate(tabds, dabd, 0, xsize, dsize, 1,
 		    raidz_syn_q_abd);
 	}
 
 	/* add Q to the syndrome */
-	raidz_add(xabd, rr->rr_col[CODE_Q].rc_abd, xsize);
+	raidz_add(xabd, rr->rr_col[CODE_Q].rc_abd, 0, xsize);
 
 	/* transform the syndrome */
 	abd_iterate_func(xabd, 0, xsize, raidz_mul_abd_cb, (void*) coeff);
 
 	raidz_math_end();
 
 	return (1 << CODE_Q);
 }
 
 
 /*
  * Generate R syndrome (Rsyn)
  *
  * @xc		array of pointers to syndrome columns
  * @dc		data column (NULL if missing)
  * @tsize	size of syndrome columns
  * @dsize	size of data column (0 if missing)
  */
 static void
 raidz_syn_r_abd(void **xc, const void *dc, const size_t tsize,
     const size_t dsize)
 {
 	v_t *x = (v_t *)xc[TARGET_X];
 	const v_t *d = (const v_t *)dc;
 	const v_t * const dend = d + (dsize / sizeof (v_t));
 	const v_t * const xend = x + (tsize / sizeof (v_t));
 
 	SYN_R_DEFINE();
 
 	MUL2_SETUP();
 
 	for (; d < dend; d += SYN_STRIDE, x += SYN_STRIDE) {
 		LOAD(d, SYN_R_D);
 		R_D_SYNDROME(SYN_R_D, SYN_R_X, x);
 	}
 	for (; x < xend; x += SYN_STRIDE) {
 		R_SYNDROME(SYN_R_X, x);
 	}
 }
 
 
 /*
  * Reconstruct single data column using R parity
  *
  * @syn_method	raidz_add_abd()
  * @rec_method	raidz_mul_abd_cb()
  *
  * @rr		RAIDZ rr
  * @tgtidx	array of missing data indexes
  */
 static raidz_inline int
 raidz_reconstruct_r_impl(raidz_row_t *rr, const int *tgtidx)
 {
 	size_t c;
 	size_t dsize;
 	abd_t *dabd;
 	const size_t firstdc = rr->rr_firstdatacol;
 	const size_t ncols = rr->rr_cols;
 	const size_t x = tgtidx[TARGET_X];
 	const size_t xsize = rr->rr_col[x].rc_size;
 	abd_t *xabd = rr->rr_col[x].rc_abd;
 	abd_t *tabds[] = { xabd };
 
 	if (xabd == NULL)
 		return (1 << CODE_R);
 
 	unsigned coeff[MUL_CNT];
 	raidz_rec_r_coeff(rr, tgtidx, coeff);
 
 	raidz_math_begin();
 
 	/* Start with first data column if present */
 	if (firstdc != x) {
-		raidz_copy(xabd, rr->rr_col[firstdc].rc_abd, xsize);
+		raidz_copy(xabd, rr->rr_col[firstdc].rc_abd, 0, xsize);
 	} else {
 		raidz_zero(xabd, xsize);
 	}
 
 
 	/* generate q_syndrome */
 	for (c = firstdc+1; c < ncols; c++) {
 		if (c == x) {
 			dabd = NULL;
 			dsize = 0;
 		} else {
 			dabd = rr->rr_col[c].rc_abd;
 			dsize = rr->rr_col[c].rc_size;
 		}
 
-		abd_raidz_gen_iterate(tabds, dabd, xsize, dsize, 1,
+		abd_raidz_gen_iterate(tabds, dabd, 0, xsize, dsize, 1,
 		    raidz_syn_r_abd);
 	}
 
 	/* add R to the syndrome */
-	raidz_add(xabd, rr->rr_col[CODE_R].rc_abd, xsize);
+	raidz_add(xabd, rr->rr_col[CODE_R].rc_abd, 0, xsize);
 
 	/* transform the syndrome */
 	abd_iterate_func(xabd, 0, xsize, raidz_mul_abd_cb, (void *)coeff);
 
 	raidz_math_end();
 
 	return (1 << CODE_R);
 }
 
 
 /*
  * Generate P and Q syndromes
  *
  * @xc		array of pointers to syndrome columns
  * @dc		data column (NULL if missing)
  * @tsize	size of syndrome columns
  * @dsize	size of data column (0 if missing)
  */
 static void
 raidz_syn_pq_abd(void **tc, const void *dc, const size_t tsize,
     const size_t dsize)
 {
 	v_t *x = (v_t *)tc[TARGET_X];
 	v_t *y = (v_t *)tc[TARGET_Y];
 	const v_t *d = (const v_t *)dc;
 	const v_t * const dend = d + (dsize / sizeof (v_t));
 	const v_t * const yend = y + (tsize / sizeof (v_t));
 
 	SYN_PQ_DEFINE();
 
 	MUL2_SETUP();
 
 	for (; d < dend; d += SYN_STRIDE, x += SYN_STRIDE, y += SYN_STRIDE) {
 		LOAD(d, SYN_PQ_D);
 		P_D_SYNDROME(SYN_PQ_D, SYN_PQ_X, x);
 		Q_D_SYNDROME(SYN_PQ_D, SYN_PQ_X, y);
 	}
 	for (; y < yend; y += SYN_STRIDE) {
 		Q_SYNDROME(SYN_PQ_X, y);
 	}
 }
 
 /*
  * Reconstruct data using PQ parity and PQ syndromes
  *
  * @tc		syndrome/result columns
  * @tsize	size of syndrome/result columns
  * @c		parity columns
  * @mul		array of multiplication constants
  */
 static void
 raidz_rec_pq_abd(void **tc, const size_t tsize, void **c,
     const unsigned *mul)
 {
 	v_t *x = (v_t *)tc[TARGET_X];
 	v_t *y = (v_t *)tc[TARGET_Y];
 	const v_t * const xend = x + (tsize / sizeof (v_t));
 	const v_t *p = (v_t *)c[CODE_P];
 	const v_t *q = (v_t *)c[CODE_Q];
 
 	REC_PQ_DEFINE();
 
 	for (; x < xend; x += REC_PQ_STRIDE, y += REC_PQ_STRIDE,
 	    p += REC_PQ_STRIDE, q += REC_PQ_STRIDE) {
 		LOAD(x, REC_PQ_X);
 		LOAD(y, REC_PQ_Y);
 
 		XOR_ACC(p, REC_PQ_X);
 		XOR_ACC(q, REC_PQ_Y);
 
 		/* Save Pxy */
 		COPY(REC_PQ_X,  REC_PQ_T);
 
 		/* Calc X */
 		MUL(mul[MUL_PQ_X], REC_PQ_X);
 		MUL(mul[MUL_PQ_Y], REC_PQ_Y);
 		XOR(REC_PQ_Y,  REC_PQ_X);
 		STORE(x, REC_PQ_X);
 
 		/* Calc Y */
 		XOR(REC_PQ_T,  REC_PQ_X);
 		STORE(y, REC_PQ_X);
 	}
 }
 
 
 /*
  * Reconstruct two data columns using PQ parity
  *
  * @syn_method	raidz_syn_pq_abd()
  * @rec_method	raidz_rec_pq_abd()
  *
  * @rr		RAIDZ row
  * @tgtidx	array of missing data indexes
  */
 static raidz_inline int
 raidz_reconstruct_pq_impl(raidz_row_t *rr, const int *tgtidx)
 {
 	size_t c;
 	size_t dsize;
 	abd_t *dabd;
 	const size_t firstdc = rr->rr_firstdatacol;
 	const size_t ncols = rr->rr_cols;
 	const size_t x = tgtidx[TARGET_X];
 	const size_t y = tgtidx[TARGET_Y];
 	const size_t xsize = rr->rr_col[x].rc_size;
 	const size_t ysize = rr->rr_col[y].rc_size;
 	abd_t *xabd = rr->rr_col[x].rc_abd;
 	abd_t *yabd = rr->rr_col[y].rc_abd;
 	abd_t *tabds[2] = { xabd, yabd };
 	abd_t *cabds[] = {
 		rr->rr_col[CODE_P].rc_abd,
 		rr->rr_col[CODE_Q].rc_abd
 	};
 
 	if (xabd == NULL)
 		return ((1 << CODE_P) | (1 << CODE_Q));
 
 	unsigned coeff[MUL_CNT];
 	raidz_rec_pq_coeff(rr, tgtidx, coeff);
 
 	/*
 	 * Check if some of targets is shorter then others
 	 * In this case, shorter target needs to be replaced with
 	 * new buffer so that syndrome can be calculated.
 	 */
 	if (ysize < xsize) {
 		yabd = abd_alloc(xsize, B_FALSE);
 		tabds[1] = yabd;
 	}
 
 	raidz_math_begin();
 
 	/* Start with first data column if present */
 	if (firstdc != x) {
-		raidz_copy(xabd, rr->rr_col[firstdc].rc_abd, xsize);
-		raidz_copy(yabd, rr->rr_col[firstdc].rc_abd, xsize);
+		raidz_copy(xabd, rr->rr_col[firstdc].rc_abd, 0, xsize);
+		raidz_copy(yabd, rr->rr_col[firstdc].rc_abd, 0, xsize);
 	} else {
 		raidz_zero(xabd, xsize);
 		raidz_zero(yabd, xsize);
 	}
 
 	/* generate q_syndrome */
 	for (c = firstdc+1; c < ncols; c++) {
 		if (c == x || c == y) {
 			dabd = NULL;
 			dsize = 0;
 		} else {
 			dabd = rr->rr_col[c].rc_abd;
 			dsize = rr->rr_col[c].rc_size;
 		}
 
-		abd_raidz_gen_iterate(tabds, dabd, xsize, dsize, 2,
+		abd_raidz_gen_iterate(tabds, dabd, 0, xsize, dsize, 2,
 		    raidz_syn_pq_abd);
 	}
 
 	abd_raidz_rec_iterate(cabds, tabds, xsize, 2, raidz_rec_pq_abd, coeff);
 
 	/* Copy shorter targets back to the original abd buffer */
 	if (ysize < xsize)
-		raidz_copy(rr->rr_col[y].rc_abd, yabd, ysize);
+		raidz_copy(rr->rr_col[y].rc_abd, yabd, 0, ysize);
 
 	raidz_math_end();
 
 	if (ysize < xsize)
 		abd_free(yabd);
 
 	return ((1 << CODE_P) | (1 << CODE_Q));
 }
 
 
 /*
  * Generate P and R syndromes
  *
  * @xc		array of pointers to syndrome columns
  * @dc		data column (NULL if missing)
  * @tsize	size of syndrome columns
  * @dsize	size of data column (0 if missing)
  */
 static void
 raidz_syn_pr_abd(void **c, const void *dc, const size_t tsize,
     const size_t dsize)
 {
 	v_t *x = (v_t *)c[TARGET_X];
 	v_t *y = (v_t *)c[TARGET_Y];
 	const v_t *d = (const v_t *)dc;
 	const v_t * const dend = d + (dsize / sizeof (v_t));
 	const v_t * const yend = y + (tsize / sizeof (v_t));
 
 	SYN_PR_DEFINE();
 
 	MUL2_SETUP();
 
 	for (; d < dend; d += SYN_STRIDE, x += SYN_STRIDE, y += SYN_STRIDE) {
 		LOAD(d, SYN_PR_D);
 		P_D_SYNDROME(SYN_PR_D, SYN_PR_X, x);
 		R_D_SYNDROME(SYN_PR_D, SYN_PR_X, y);
 	}
 	for (; y < yend; y += SYN_STRIDE) {
 		R_SYNDROME(SYN_PR_X, y);
 	}
 }
 
 /*
  * Reconstruct data using PR parity and PR syndromes
  *
  * @tc		syndrome/result columns
  * @tsize	size of syndrome/result columns
  * @c		parity columns
  * @mul		array of multiplication constants
  */
 static void
 raidz_rec_pr_abd(void **t, const size_t tsize, void **c,
     const unsigned *mul)
 {
 	v_t *x = (v_t *)t[TARGET_X];
 	v_t *y = (v_t *)t[TARGET_Y];
 	const v_t * const xend = x + (tsize / sizeof (v_t));
 	const v_t *p = (v_t *)c[CODE_P];
 	const v_t *q = (v_t *)c[CODE_Q];
 
 	REC_PR_DEFINE();
 
 	for (; x < xend; x += REC_PR_STRIDE, y += REC_PR_STRIDE,
 	    p += REC_PR_STRIDE, q += REC_PR_STRIDE) {
 		LOAD(x, REC_PR_X);
 		LOAD(y, REC_PR_Y);
 		XOR_ACC(p, REC_PR_X);
 		XOR_ACC(q, REC_PR_Y);
 
 		/* Save Pxy */
 		COPY(REC_PR_X,  REC_PR_T);
 
 		/* Calc X */
 		MUL(mul[MUL_PR_X], REC_PR_X);
 		MUL(mul[MUL_PR_Y], REC_PR_Y);
 		XOR(REC_PR_Y,  REC_PR_X);
 		STORE(x, REC_PR_X);
 
 		/* Calc Y */
 		XOR(REC_PR_T,  REC_PR_X);
 		STORE(y, REC_PR_X);
 	}
 }
 
 
 /*
  * Reconstruct two data columns using PR parity
  *
  * @syn_method	raidz_syn_pr_abd()
  * @rec_method	raidz_rec_pr_abd()
  *
  * @rr		RAIDZ row
  * @tgtidx	array of missing data indexes
  */
 static raidz_inline int
 raidz_reconstruct_pr_impl(raidz_row_t *rr, const int *tgtidx)
 {
 	size_t c;
 	size_t dsize;
 	abd_t *dabd;
 	const size_t firstdc = rr->rr_firstdatacol;
 	const size_t ncols = rr->rr_cols;
 	const size_t x = tgtidx[0];
 	const size_t y = tgtidx[1];
 	const size_t xsize = rr->rr_col[x].rc_size;
 	const size_t ysize = rr->rr_col[y].rc_size;
 	abd_t *xabd = rr->rr_col[x].rc_abd;
 	abd_t *yabd = rr->rr_col[y].rc_abd;
 	abd_t *tabds[2] = { xabd, yabd };
 	abd_t *cabds[] = {
 		rr->rr_col[CODE_P].rc_abd,
 		rr->rr_col[CODE_R].rc_abd
 	};
 
 	if (xabd == NULL)
 		return ((1 << CODE_P) | (1 << CODE_R));
 
 	unsigned coeff[MUL_CNT];
 	raidz_rec_pr_coeff(rr, tgtidx, coeff);
 
 	/*
 	 * Check if some of targets are shorter then others.
 	 * They need to be replaced with a new buffer so that syndrome can
 	 * be calculated on full length.
 	 */
 	if (ysize < xsize) {
 		yabd = abd_alloc(xsize, B_FALSE);
 		tabds[1] = yabd;
 	}
 
 	raidz_math_begin();
 
 	/* Start with first data column if present */
 	if (firstdc != x) {
-		raidz_copy(xabd, rr->rr_col[firstdc].rc_abd, xsize);
-		raidz_copy(yabd, rr->rr_col[firstdc].rc_abd, xsize);
+		raidz_copy(xabd, rr->rr_col[firstdc].rc_abd, 0, xsize);
+		raidz_copy(yabd, rr->rr_col[firstdc].rc_abd, 0, xsize);
 	} else {
 		raidz_zero(xabd, xsize);
 		raidz_zero(yabd, xsize);
 	}
 
 	/* generate q_syndrome */
 	for (c = firstdc+1; c < ncols; c++) {
 		if (c == x || c == y) {
 			dabd = NULL;
 			dsize = 0;
 		} else {
 			dabd = rr->rr_col[c].rc_abd;
 			dsize = rr->rr_col[c].rc_size;
 		}
 
-		abd_raidz_gen_iterate(tabds, dabd, xsize, dsize, 2,
+		abd_raidz_gen_iterate(tabds, dabd, 0, xsize, dsize, 2,
 		    raidz_syn_pr_abd);
 	}
 
 	abd_raidz_rec_iterate(cabds, tabds, xsize, 2, raidz_rec_pr_abd, coeff);
 
 	/*
 	 * Copy shorter targets back to the original abd buffer
 	 */
 	if (ysize < xsize)
-		raidz_copy(rr->rr_col[y].rc_abd, yabd, ysize);
+		raidz_copy(rr->rr_col[y].rc_abd, yabd, 0, ysize);
 
 	raidz_math_end();
 
 	if (ysize < xsize)
 		abd_free(yabd);
 
 	return ((1 << CODE_P) | (1 << CODE_R));
 }
 
 
 /*
  * Generate Q and R syndromes
  *
  * @xc		array of pointers to syndrome columns
  * @dc		data column (NULL if missing)
  * @tsize	size of syndrome columns
  * @dsize	size of data column (0 if missing)
  */
 static void
 raidz_syn_qr_abd(void **c, const void *dc, const size_t tsize,
     const size_t dsize)
 {
 	v_t *x = (v_t *)c[TARGET_X];
 	v_t *y = (v_t *)c[TARGET_Y];
 	const v_t * const xend = x + (tsize / sizeof (v_t));
 	const v_t *d = (const v_t *)dc;
 	const v_t * const dend = d + (dsize / sizeof (v_t));
 
 	SYN_QR_DEFINE();
 
 	MUL2_SETUP();
 
 	for (; d < dend; d += SYN_STRIDE, x += SYN_STRIDE, y += SYN_STRIDE) {
 		LOAD(d, SYN_PQ_D);
 		Q_D_SYNDROME(SYN_QR_D, SYN_QR_X, x);
 		R_D_SYNDROME(SYN_QR_D, SYN_QR_X, y);
 	}
 	for (; x < xend; x += SYN_STRIDE, y += SYN_STRIDE) {
 		Q_SYNDROME(SYN_QR_X, x);
 		R_SYNDROME(SYN_QR_X, y);
 	}
 }
 
 
 /*
  * Reconstruct data using QR parity and QR syndromes
  *
  * @tc		syndrome/result columns
  * @tsize	size of syndrome/result columns
  * @c		parity columns
  * @mul		array of multiplication constants
  */
 static void
 raidz_rec_qr_abd(void **t, const size_t tsize, void **c,
     const unsigned *mul)
 {
 	v_t *x = (v_t *)t[TARGET_X];
 	v_t *y = (v_t *)t[TARGET_Y];
 	const v_t * const xend = x + (tsize / sizeof (v_t));
 	const v_t *p = (v_t *)c[CODE_P];
 	const v_t *q = (v_t *)c[CODE_Q];
 
 	REC_QR_DEFINE();
 
 	for (; x < xend; x += REC_QR_STRIDE, y += REC_QR_STRIDE,
 	    p += REC_QR_STRIDE, q += REC_QR_STRIDE) {
 		LOAD(x, REC_QR_X);
 		LOAD(y, REC_QR_Y);
 
 		XOR_ACC(p, REC_QR_X);
 		XOR_ACC(q, REC_QR_Y);
 
 		/* Save Pxy */
 		COPY(REC_QR_X,  REC_QR_T);
 
 		/* Calc X */
 		MUL(mul[MUL_QR_XQ], REC_QR_X);	/* X = Q * xqm */
 		XOR(REC_QR_Y, REC_QR_X);	/* X = R ^ X   */
 		MUL(mul[MUL_QR_X], REC_QR_X);	/* X = X * xm  */
 		STORE(x, REC_QR_X);
 
 		/* Calc Y */
 		MUL(mul[MUL_QR_YQ], REC_QR_T);	/* X = Q * xqm */
 		XOR(REC_QR_Y, REC_QR_T);	/* X = R ^ X   */
 		MUL(mul[MUL_QR_Y], REC_QR_T);	/* X = X * xm  */
 		STORE(y, REC_QR_T);
 	}
 }
 
 
 /*
  * Reconstruct two data columns using QR parity
  *
  * @syn_method	raidz_syn_qr_abd()
  * @rec_method	raidz_rec_qr_abd()
  *
  * @rr		RAIDZ row
  * @tgtidx	array of missing data indexes
  */
 static raidz_inline int
 raidz_reconstruct_qr_impl(raidz_row_t *rr, const int *tgtidx)
 {
 	size_t c;
 	size_t dsize;
 	abd_t *dabd;
 	const size_t firstdc = rr->rr_firstdatacol;
 	const size_t ncols = rr->rr_cols;
 	const size_t x = tgtidx[TARGET_X];
 	const size_t y = tgtidx[TARGET_Y];
 	const size_t xsize = rr->rr_col[x].rc_size;
 	const size_t ysize = rr->rr_col[y].rc_size;
 	abd_t *xabd = rr->rr_col[x].rc_abd;
 	abd_t *yabd = rr->rr_col[y].rc_abd;
 	abd_t *tabds[2] = { xabd, yabd };
 	abd_t *cabds[] = {
 		rr->rr_col[CODE_Q].rc_abd,
 		rr->rr_col[CODE_R].rc_abd
 	};
 
 	if (xabd == NULL)
 		return ((1 << CODE_Q) | (1 << CODE_R));
 
 	unsigned coeff[MUL_CNT];
 	raidz_rec_qr_coeff(rr, tgtidx, coeff);
 
 	/*
 	 * Check if some of targets is shorter then others
 	 * In this case, shorter target needs to be replaced with
 	 * new buffer so that syndrome can be calculated.
 	 */
 	if (ysize < xsize) {
 		yabd = abd_alloc(xsize, B_FALSE);
 		tabds[1] = yabd;
 	}
 
 	raidz_math_begin();
 
 	/* Start with first data column if present */
 	if (firstdc != x) {
-		raidz_copy(xabd, rr->rr_col[firstdc].rc_abd, xsize);
-		raidz_copy(yabd, rr->rr_col[firstdc].rc_abd, xsize);
+		raidz_copy(xabd, rr->rr_col[firstdc].rc_abd, 0, xsize);
+		raidz_copy(yabd, rr->rr_col[firstdc].rc_abd, 0, xsize);
 	} else {
 		raidz_zero(xabd, xsize);
 		raidz_zero(yabd, xsize);
 	}
 
 	/* generate q_syndrome */
 	for (c = firstdc+1; c < ncols; c++) {
 		if (c == x || c == y) {
 			dabd = NULL;
 			dsize = 0;
 		} else {
 			dabd = rr->rr_col[c].rc_abd;
 			dsize = rr->rr_col[c].rc_size;
 		}
 
-		abd_raidz_gen_iterate(tabds, dabd, xsize, dsize, 2,
+		abd_raidz_gen_iterate(tabds, dabd, 0, xsize, dsize, 2,
 		    raidz_syn_qr_abd);
 	}
 
 	abd_raidz_rec_iterate(cabds, tabds, xsize, 2, raidz_rec_qr_abd, coeff);
 
 	/*
 	 * Copy shorter targets back to the original abd buffer
 	 */
 	if (ysize < xsize)
-		raidz_copy(rr->rr_col[y].rc_abd, yabd, ysize);
+		raidz_copy(rr->rr_col[y].rc_abd, yabd, 0, ysize);
 
 	raidz_math_end();
 
 	if (ysize < xsize)
 		abd_free(yabd);
 
 
 	return ((1 << CODE_Q) | (1 << CODE_R));
 }
 
 
 /*
  * Generate P, Q, and R syndromes
  *
  * @xc		array of pointers to syndrome columns
  * @dc		data column (NULL if missing)
  * @tsize	size of syndrome columns
  * @dsize	size of data column (0 if missing)
  */
 static void
 raidz_syn_pqr_abd(void **c, const void *dc, const size_t tsize,
     const size_t dsize)
 {
 	v_t *x = (v_t *)c[TARGET_X];
 	v_t *y = (v_t *)c[TARGET_Y];
 	v_t *z = (v_t *)c[TARGET_Z];
 	const v_t * const yend = y + (tsize / sizeof (v_t));
 	const v_t *d = (const v_t *)dc;
 	const v_t * const dend = d + (dsize / sizeof (v_t));
 
 	SYN_PQR_DEFINE();
 
 	MUL2_SETUP();
 
 	for (; d < dend;  d += SYN_STRIDE, x += SYN_STRIDE, y += SYN_STRIDE,
 	    z += SYN_STRIDE) {
 		LOAD(d, SYN_PQR_D);
 		P_D_SYNDROME(SYN_PQR_D, SYN_PQR_X, x)
 		Q_D_SYNDROME(SYN_PQR_D, SYN_PQR_X, y);
 		R_D_SYNDROME(SYN_PQR_D, SYN_PQR_X, z);
 	}
 	for (; y < yend; y += SYN_STRIDE, z += SYN_STRIDE) {
 		Q_SYNDROME(SYN_PQR_X, y);
 		R_SYNDROME(SYN_PQR_X, z);
 	}
 }
 
 
 /*
  * Reconstruct data using PRQ parity and PQR syndromes
  *
  * @tc		syndrome/result columns
  * @tsize	size of syndrome/result columns
  * @c		parity columns
  * @mul		array of multiplication constants
  */
 static void
 raidz_rec_pqr_abd(void **t, const size_t tsize, void **c,
     const unsigned * const mul)
 {
 	v_t *x = (v_t *)t[TARGET_X];
 	v_t *y = (v_t *)t[TARGET_Y];
 	v_t *z = (v_t *)t[TARGET_Z];
 	const v_t * const xend = x + (tsize / sizeof (v_t));
 	const v_t *p = (v_t *)c[CODE_P];
 	const v_t *q = (v_t *)c[CODE_Q];
 	const v_t *r = (v_t *)c[CODE_R];
 
 	REC_PQR_DEFINE();
 
 	for (; x < xend; x += REC_PQR_STRIDE, y += REC_PQR_STRIDE,
 	    z += REC_PQR_STRIDE, p += REC_PQR_STRIDE, q += REC_PQR_STRIDE,
 	    r += REC_PQR_STRIDE) {
 		LOAD(x, REC_PQR_X);
 		LOAD(y, REC_PQR_Y);
 		LOAD(z, REC_PQR_Z);
 
 		XOR_ACC(p, REC_PQR_X);
 		XOR_ACC(q, REC_PQR_Y);
 		XOR_ACC(r, REC_PQR_Z);
 
 		/* Save Pxyz and Qxyz */
 		COPY(REC_PQR_X, REC_PQR_XS);
 		COPY(REC_PQR_Y, REC_PQR_YS);
 
 		/* Calc X */
 		MUL(mul[MUL_PQR_XP], REC_PQR_X);	/* Xp = Pxyz * xp   */
 		MUL(mul[MUL_PQR_XQ], REC_PQR_Y);	/* Xq = Qxyz * xq   */
 		XOR(REC_PQR_Y, REC_PQR_X);
 		MUL(mul[MUL_PQR_XR], REC_PQR_Z);	/* Xr = Rxyz * xr   */
 		XOR(REC_PQR_Z, REC_PQR_X);		/* X = Xp + Xq + Xr */
 		STORE(x, REC_PQR_X);
 
 		/* Calc Y */
 		XOR(REC_PQR_X, REC_PQR_XS); 		/* Pyz = Pxyz + X */
 		MUL(mul[MUL_PQR_YU], REC_PQR_X);  	/* Xq = X * upd_q */
 		XOR(REC_PQR_X, REC_PQR_YS); 		/* Qyz = Qxyz + Xq */
 		COPY(REC_PQR_XS, REC_PQR_X);		/* restore Pyz */
 		MUL(mul[MUL_PQR_YP], REC_PQR_X);	/* Yp = Pyz * yp */
 		MUL(mul[MUL_PQR_YQ], REC_PQR_YS);	/* Yq = Qyz * yq */
 		XOR(REC_PQR_X, REC_PQR_YS); 		/* Y = Yp + Yq */
 		STORE(y, REC_PQR_YS);
 
 		/* Calc Z */
 		XOR(REC_PQR_XS, REC_PQR_YS);		/* Z = Pz = Pyz + Y */
 		STORE(z, REC_PQR_YS);
 	}
 }
 
 
 /*
  * Reconstruct three data columns using PQR parity
  *
  * @syn_method	raidz_syn_pqr_abd()
  * @rec_method	raidz_rec_pqr_abd()
  *
  * @rr		RAIDZ row
  * @tgtidx	array of missing data indexes
  */
 static raidz_inline int
 raidz_reconstruct_pqr_impl(raidz_row_t *rr, const int *tgtidx)
 {
 	size_t c;
 	size_t dsize;
 	abd_t *dabd;
 	const size_t firstdc = rr->rr_firstdatacol;
 	const size_t ncols = rr->rr_cols;
 	const size_t x = tgtidx[TARGET_X];
 	const size_t y = tgtidx[TARGET_Y];
 	const size_t z = tgtidx[TARGET_Z];
 	const size_t xsize = rr->rr_col[x].rc_size;
 	const size_t ysize = rr->rr_col[y].rc_size;
 	const size_t zsize = rr->rr_col[z].rc_size;
 	abd_t *xabd = rr->rr_col[x].rc_abd;
 	abd_t *yabd = rr->rr_col[y].rc_abd;
 	abd_t *zabd = rr->rr_col[z].rc_abd;
 	abd_t *tabds[] = { xabd, yabd, zabd };
 	abd_t *cabds[] = {
 		rr->rr_col[CODE_P].rc_abd,
 		rr->rr_col[CODE_Q].rc_abd,
 		rr->rr_col[CODE_R].rc_abd
 	};
 
 	if (xabd == NULL)
 		return ((1 << CODE_P) | (1 << CODE_Q) | (1 << CODE_R));
 
 	unsigned coeff[MUL_CNT];
 	raidz_rec_pqr_coeff(rr, tgtidx, coeff);
 
 	/*
 	 * Check if some of targets is shorter then others
 	 * In this case, shorter target needs to be replaced with
 	 * new buffer so that syndrome can be calculated.
 	 */
 	if (ysize < xsize) {
 		yabd = abd_alloc(xsize, B_FALSE);
 		tabds[1] = yabd;
 	}
 	if (zsize < xsize) {
 		zabd = abd_alloc(xsize, B_FALSE);
 		tabds[2] = zabd;
 	}
 
 	raidz_math_begin();
 
 	/* Start with first data column if present */
 	if (firstdc != x) {
-		raidz_copy(xabd, rr->rr_col[firstdc].rc_abd, xsize);
-		raidz_copy(yabd, rr->rr_col[firstdc].rc_abd, xsize);
-		raidz_copy(zabd, rr->rr_col[firstdc].rc_abd, xsize);
+		raidz_copy(xabd, rr->rr_col[firstdc].rc_abd, 0, xsize);
+		raidz_copy(yabd, rr->rr_col[firstdc].rc_abd, 0, xsize);
+		raidz_copy(zabd, rr->rr_col[firstdc].rc_abd, 0, xsize);
 	} else {
 		raidz_zero(xabd, xsize);
 		raidz_zero(yabd, xsize);
 		raidz_zero(zabd, xsize);
 	}
 
 	/* generate q_syndrome */
 	for (c = firstdc+1; c < ncols; c++) {
 		if (c == x || c == y || c == z) {
 			dabd = NULL;
 			dsize = 0;
 		} else {
 			dabd = rr->rr_col[c].rc_abd;
 			dsize = rr->rr_col[c].rc_size;
 		}
 
-		abd_raidz_gen_iterate(tabds, dabd, xsize, dsize, 3,
+		abd_raidz_gen_iterate(tabds, dabd, 0, xsize, dsize, 3,
 		    raidz_syn_pqr_abd);
 	}
 
 	abd_raidz_rec_iterate(cabds, tabds, xsize, 3, raidz_rec_pqr_abd, coeff);
 
 	/*
 	 * Copy shorter targets back to the original abd buffer
 	 */
 	if (ysize < xsize)
-		raidz_copy(rr->rr_col[y].rc_abd, yabd, ysize);
+		raidz_copy(rr->rr_col[y].rc_abd, yabd, 0, ysize);
 	if (zsize < xsize)
-		raidz_copy(rr->rr_col[z].rc_abd, zabd, zsize);
+		raidz_copy(rr->rr_col[z].rc_abd, zabd, 0, zsize);
 
 	raidz_math_end();
 
 	if (ysize < xsize)
 		abd_free(yabd);
 	if (zsize < xsize)
 		abd_free(zabd);
 
 	return ((1 << CODE_P) | (1 << CODE_Q) | (1 << CODE_R));
 }
 
 #endif /* _VDEV_RAIDZ_MATH_IMPL_H */