diff --git a/module/icp/algs/aes/aes_impl.c b/module/icp/algs/aes/aes_impl.c
index 9d90914aacfa..9daa975226fe 100644
--- a/module/icp/algs/aes/aes_impl.c
+++ b/module/icp/algs/aes/aes_impl.c
@@ -1,444 +1,444 @@
 /*
  * CDDL HEADER START
  *
  * The contents of this file are subject to the terms of the
  * Common Development and Distribution License (the "License").
  * You may not use this file except in compliance with the License.
  *
  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
  * or https://opensource.org/licenses/CDDL-1.0.
  * See the License for the specific language governing permissions
  * and limitations under the License.
  *
  * When distributing Covered Code, include this CDDL HEADER in each
  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  * If applicable, add the following below this CDDL HEADER, with the
  * fields enclosed by brackets "[]" replaced with your own identifying
  * information: Portions Copyright [yyyy] [name of copyright owner]
  *
  * CDDL HEADER END
  */
 /*
  * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved.
  */
 
 #include <sys/zfs_context.h>
 #include <sys/crypto/icp.h>
 #include <sys/crypto/spi.h>
 #include <sys/simd.h>
 #include <modes/modes.h>
 #include <aes/aes_impl.h>
 
 /*
  * Initialize AES encryption and decryption key schedules.
  *
  * Parameters:
  * cipherKey	User key
  * keyBits	AES key size (128, 192, or 256 bits)
  * keysched	AES key schedule to be initialized, of type aes_key_t.
  *		Allocated by aes_alloc_keysched().
  */
 void
 aes_init_keysched(const uint8_t *cipherKey, uint_t keyBits, void *keysched)
 {
 	const aes_impl_ops_t *ops = aes_impl_get_ops();
 	aes_key_t *newbie = keysched;
 	uint_t keysize, i, j;
 	union {
 		uint64_t	ka64[4];
 		uint32_t	ka32[8];
 	} keyarr;
 
 	switch (keyBits) {
 	case 128:
 		newbie->nr = 10;
 		break;
 
 	case 192:
 		newbie->nr = 12;
 		break;
 
 	case 256:
 		newbie->nr = 14;
 		break;
 
 	default:
 		/* should never get here */
 		return;
 	}
 	keysize = CRYPTO_BITS2BYTES(keyBits);
 
 	/*
 	 * Generic C implementation requires byteswap for little endian
 	 * machines, various accelerated implementations for various
 	 * architectures may not.
 	 */
 	if (!ops->needs_byteswap) {
 		/* no byteswap needed */
 		if (IS_P2ALIGNED(cipherKey, sizeof (uint64_t))) {
 			for (i = 0, j = 0; j < keysize; i++, j += 8) {
 				/* LINTED: pointer alignment */
 				keyarr.ka64[i] = *((uint64_t *)&cipherKey[j]);
 			}
 		} else {
 			memcpy(keyarr.ka32, cipherKey, keysize);
 		}
 	} else {
 		/* byte swap */
 		for (i = 0, j = 0; j < keysize; i++, j += 4) {
 			keyarr.ka32[i] =
 			    htonl(*(uint32_t *)(void *)&cipherKey[j]);
 		}
 	}
 
 	ops->generate(newbie, keyarr.ka32, keyBits);
 	newbie->ops = ops;
 
 	/*
 	 * Note: if there are systems that need the AES_64BIT_KS type in the
 	 * future, move setting key schedule type to individual implementations
 	 */
 	newbie->type = AES_32BIT_KS;
 }
 
 
 /*
  * Encrypt one block using AES.
  * Align if needed and (for x86 32-bit only) byte-swap.
  *
  * Parameters:
  * ks	Key schedule, of type aes_key_t
  * pt	Input block (plain text)
  * ct	Output block (crypto text).  Can overlap with pt
  */
 int
 aes_encrypt_block(const void *ks, const uint8_t *pt, uint8_t *ct)
 {
 	aes_key_t	*ksch = (aes_key_t *)ks;
 	const aes_impl_ops_t	*ops = ksch->ops;
 
 	if (IS_P2ALIGNED2(pt, ct, sizeof (uint32_t)) && !ops->needs_byteswap) {
 		/* LINTED:  pointer alignment */
 		ops->encrypt(&ksch->encr_ks.ks32[0], ksch->nr,
 		    /* LINTED:  pointer alignment */
 		    (uint32_t *)pt, (uint32_t *)ct);
 	} else {
 		uint32_t buffer[AES_BLOCK_LEN / sizeof (uint32_t)];
 
 		/* Copy input block into buffer */
 		if (ops->needs_byteswap) {
 			buffer[0] = htonl(*(uint32_t *)(void *)&pt[0]);
 			buffer[1] = htonl(*(uint32_t *)(void *)&pt[4]);
 			buffer[2] = htonl(*(uint32_t *)(void *)&pt[8]);
 			buffer[3] = htonl(*(uint32_t *)(void *)&pt[12]);
 		} else
 			memcpy(&buffer, pt, AES_BLOCK_LEN);
 
 		ops->encrypt(&ksch->encr_ks.ks32[0], ksch->nr, buffer, buffer);
 
 		/* Copy result from buffer to output block */
 		if (ops->needs_byteswap) {
 			*(uint32_t *)(void *)&ct[0] = htonl(buffer[0]);
 			*(uint32_t *)(void *)&ct[4] = htonl(buffer[1]);
 			*(uint32_t *)(void *)&ct[8] = htonl(buffer[2]);
 			*(uint32_t *)(void *)&ct[12] = htonl(buffer[3]);
 		} else
 			memcpy(ct, &buffer, AES_BLOCK_LEN);
 	}
 	return (CRYPTO_SUCCESS);
 }
 
 
 /*
  * Decrypt one block using AES.
  * Align and byte-swap if needed.
  *
  * Parameters:
  * ks	Key schedule, of type aes_key_t
  * ct	Input block (crypto text)
  * pt	Output block (plain text). Can overlap with pt
  */
 int
 aes_decrypt_block(const void *ks, const uint8_t *ct, uint8_t *pt)
 {
 	aes_key_t	*ksch = (aes_key_t *)ks;
 	const aes_impl_ops_t	*ops = ksch->ops;
 
 	if (IS_P2ALIGNED2(ct, pt, sizeof (uint32_t)) && !ops->needs_byteswap) {
 		/* LINTED:  pointer alignment */
 		ops->decrypt(&ksch->decr_ks.ks32[0], ksch->nr,
 		    /* LINTED:  pointer alignment */
 		    (uint32_t *)ct, (uint32_t *)pt);
 	} else {
 		uint32_t buffer[AES_BLOCK_LEN / sizeof (uint32_t)];
 
 		/* Copy input block into buffer */
 		if (ops->needs_byteswap) {
 			buffer[0] = htonl(*(uint32_t *)(void *)&ct[0]);
 			buffer[1] = htonl(*(uint32_t *)(void *)&ct[4]);
 			buffer[2] = htonl(*(uint32_t *)(void *)&ct[8]);
 			buffer[3] = htonl(*(uint32_t *)(void *)&ct[12]);
 		} else
 			memcpy(&buffer, ct, AES_BLOCK_LEN);
 
 		ops->decrypt(&ksch->decr_ks.ks32[0], ksch->nr, buffer, buffer);
 
 		/* Copy result from buffer to output block */
 		if (ops->needs_byteswap) {
 			*(uint32_t *)(void *)&pt[0] = htonl(buffer[0]);
 			*(uint32_t *)(void *)&pt[4] = htonl(buffer[1]);
 			*(uint32_t *)(void *)&pt[8] = htonl(buffer[2]);
 			*(uint32_t *)(void *)&pt[12] = htonl(buffer[3]);
 		} else
 			memcpy(pt, &buffer, AES_BLOCK_LEN);
 	}
 	return (CRYPTO_SUCCESS);
 }
 
 
 /*
  * Allocate key schedule for AES.
  *
  * Return the pointer and set size to the number of bytes allocated.
  * Memory allocated must be freed by the caller when done.
  *
  * Parameters:
  * size		Size of key schedule allocated, in bytes
  * kmflag	Flag passed to kmem_alloc(9F); ignored in userland.
  */
 void *
 aes_alloc_keysched(size_t *size, int kmflag)
 {
 	aes_key_t *keysched;
 
-	keysched = (aes_key_t *)kmem_alloc(sizeof (aes_key_t), kmflag);
+	keysched = kmem_alloc(sizeof (aes_key_t), kmflag);
 	if (keysched != NULL) {
 		*size = sizeof (aes_key_t);
 		return (keysched);
 	}
 	return (NULL);
 }
 
 /* AES implementation that contains the fastest methods */
 static aes_impl_ops_t aes_fastest_impl = {
 	.name = "fastest"
 };
 
 /* All compiled in implementations */
 static const aes_impl_ops_t *aes_all_impl[] = {
 	&aes_generic_impl,
 #if defined(__x86_64)
 	&aes_x86_64_impl,
 #endif
 #if defined(__x86_64) && defined(HAVE_AES)
 	&aes_aesni_impl,
 #endif
 };
 
 /* Indicate that benchmark has been completed */
 static boolean_t aes_impl_initialized = B_FALSE;
 
 /* Select aes implementation */
 #define	IMPL_FASTEST	(UINT32_MAX)
 #define	IMPL_CYCLE	(UINT32_MAX-1)
 
 #define	AES_IMPL_READ(i) (*(volatile uint32_t *) &(i))
 
 static uint32_t icp_aes_impl = IMPL_FASTEST;
 static uint32_t user_sel_impl = IMPL_FASTEST;
 
 /* Hold all supported implementations */
 static size_t aes_supp_impl_cnt = 0;
 static aes_impl_ops_t *aes_supp_impl[ARRAY_SIZE(aes_all_impl)];
 
 /*
  * Returns the AES operations for encrypt/decrypt/key setup.  When a
  * SIMD implementation is not allowed in the current context, then
  * fallback to the fastest generic implementation.
  */
 const aes_impl_ops_t *
 aes_impl_get_ops(void)
 {
 	if (!kfpu_allowed())
 		return (&aes_generic_impl);
 
 	const aes_impl_ops_t *ops = NULL;
 	const uint32_t impl = AES_IMPL_READ(icp_aes_impl);
 
 	switch (impl) {
 	case IMPL_FASTEST:
 		ASSERT(aes_impl_initialized);
 		ops = &aes_fastest_impl;
 		break;
 	case IMPL_CYCLE:
 		/* Cycle through supported implementations */
 		ASSERT(aes_impl_initialized);
 		ASSERT3U(aes_supp_impl_cnt, >, 0);
 		static size_t cycle_impl_idx = 0;
 		size_t idx = (++cycle_impl_idx) % aes_supp_impl_cnt;
 		ops = aes_supp_impl[idx];
 		break;
 	default:
 		ASSERT3U(impl, <, aes_supp_impl_cnt);
 		ASSERT3U(aes_supp_impl_cnt, >, 0);
 		if (impl < ARRAY_SIZE(aes_all_impl))
 			ops = aes_supp_impl[impl];
 		break;
 	}
 
 	ASSERT3P(ops, !=, NULL);
 
 	return (ops);
 }
 
 /*
  * Initialize all supported implementations.
  */
 void
 aes_impl_init(void)
 {
 	aes_impl_ops_t *curr_impl;
 	int i, c;
 
 	/* Move supported implementations into aes_supp_impls */
 	for (i = 0, c = 0; i < ARRAY_SIZE(aes_all_impl); i++) {
 		curr_impl = (aes_impl_ops_t *)aes_all_impl[i];
 
 		if (curr_impl->is_supported())
 			aes_supp_impl[c++] = (aes_impl_ops_t *)curr_impl;
 	}
 	aes_supp_impl_cnt = c;
 
 	/*
 	 * Set the fastest implementation given the assumption that the
 	 * hardware accelerated version is the fastest.
 	 */
 #if defined(__x86_64)
 #if defined(HAVE_AES)
 	if (aes_aesni_impl.is_supported()) {
 		memcpy(&aes_fastest_impl, &aes_aesni_impl,
 		    sizeof (aes_fastest_impl));
 	} else
 #endif
 	{
 		memcpy(&aes_fastest_impl, &aes_x86_64_impl,
 		    sizeof (aes_fastest_impl));
 	}
 #else
 	memcpy(&aes_fastest_impl, &aes_generic_impl,
 	    sizeof (aes_fastest_impl));
 #endif
 
 	strlcpy(aes_fastest_impl.name, "fastest", AES_IMPL_NAME_MAX);
 
 	/* Finish initialization */
 	atomic_swap_32(&icp_aes_impl, user_sel_impl);
 	aes_impl_initialized = B_TRUE;
 }
 
 static const struct {
 	const char *name;
 	uint32_t sel;
 } aes_impl_opts[] = {
 		{ "cycle",	IMPL_CYCLE },
 		{ "fastest",	IMPL_FASTEST },
 };
 
 /*
  * Function sets desired aes implementation.
  *
  * If we are called before init(), user preference will be saved in
  * user_sel_impl, and applied in later init() call. This occurs when module
  * parameter is specified on module load. Otherwise, directly update
  * icp_aes_impl.
  *
  * @val		Name of aes implementation to use
  * @param	Unused.
  */
 int
 aes_impl_set(const char *val)
 {
 	int err = -EINVAL;
 	char req_name[AES_IMPL_NAME_MAX];
 	uint32_t impl = AES_IMPL_READ(user_sel_impl);
 	size_t i;
 
 	/* sanitize input */
 	i = strnlen(val, AES_IMPL_NAME_MAX);
 	if (i == 0 || i >= AES_IMPL_NAME_MAX)
 		return (err);
 
 	strlcpy(req_name, val, AES_IMPL_NAME_MAX);
 	while (i > 0 && isspace(req_name[i-1]))
 		i--;
 	req_name[i] = '\0';
 
 	/* Check mandatory options */
 	for (i = 0; i < ARRAY_SIZE(aes_impl_opts); i++) {
 		if (strcmp(req_name, aes_impl_opts[i].name) == 0) {
 			impl = aes_impl_opts[i].sel;
 			err = 0;
 			break;
 		}
 	}
 
 	/* check all supported impl if init() was already called */
 	if (err != 0 && aes_impl_initialized) {
 		/* check all supported implementations */
 		for (i = 0; i < aes_supp_impl_cnt; i++) {
 			if (strcmp(req_name, aes_supp_impl[i]->name) == 0) {
 				impl = i;
 				err = 0;
 				break;
 			}
 		}
 	}
 
 	if (err == 0) {
 		if (aes_impl_initialized)
 			atomic_swap_32(&icp_aes_impl, impl);
 		else
 			atomic_swap_32(&user_sel_impl, impl);
 	}
 
 	return (err);
 }
 
 #if defined(_KERNEL) && defined(__linux__)
 
 static int
 icp_aes_impl_set(const char *val, zfs_kernel_param_t *kp)
 {
 	return (aes_impl_set(val));
 }
 
 static int
 icp_aes_impl_get(char *buffer, zfs_kernel_param_t *kp)
 {
 	int i, cnt = 0;
 	char *fmt;
 	const uint32_t impl = AES_IMPL_READ(icp_aes_impl);
 
 	ASSERT(aes_impl_initialized);
 
 	/* list mandatory options */
 	for (i = 0; i < ARRAY_SIZE(aes_impl_opts); i++) {
 		fmt = (impl == aes_impl_opts[i].sel) ? "[%s] " : "%s ";
 		cnt += kmem_scnprintf(buffer + cnt, PAGE_SIZE - cnt, fmt,
 		    aes_impl_opts[i].name);
 	}
 
 	/* list all supported implementations */
 	for (i = 0; i < aes_supp_impl_cnt; i++) {
 		fmt = (i == impl) ? "[%s] " : "%s ";
 		cnt += kmem_scnprintf(buffer + cnt, PAGE_SIZE - cnt, fmt,
 		    aes_supp_impl[i]->name);
 	}
 
 	return (cnt);
 }
 
 module_param_call(icp_aes_impl, icp_aes_impl_set, icp_aes_impl_get,
     NULL, 0644);
 MODULE_PARM_DESC(icp_aes_impl, "Select aes implementation.");
 #endif
diff --git a/module/icp/algs/modes/gcm.c b/module/icp/algs/modes/gcm.c
index 558a578090b2..a8792c4555e2 100644
--- a/module/icp/algs/modes/gcm.c
+++ b/module/icp/algs/modes/gcm.c
@@ -1,1596 +1,1596 @@
 /*
  * CDDL HEADER START
  *
  * The contents of this file are subject to the terms of the
  * Common Development and Distribution License (the "License").
  * You may not use this file except in compliance with the License.
  *
  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
  * or https://opensource.org/licenses/CDDL-1.0.
  * See the License for the specific language governing permissions
  * and limitations under the License.
  *
  * When distributing Covered Code, include this CDDL HEADER in each
  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  * If applicable, add the following below this CDDL HEADER, with the
  * fields enclosed by brackets "[]" replaced with your own identifying
  * information: Portions Copyright [yyyy] [name of copyright owner]
  *
  * CDDL HEADER END
  */
 /*
  * Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved.
  */
 
 #include <sys/zfs_context.h>
 #include <modes/modes.h>
 #include <sys/crypto/common.h>
 #include <sys/crypto/icp.h>
 #include <sys/crypto/impl.h>
 #include <sys/byteorder.h>
 #include <sys/simd.h>
 #include <modes/gcm_impl.h>
 #ifdef CAN_USE_GCM_ASM
 #include <aes/aes_impl.h>
 #endif
 
 #define	GHASH(c, d, t, o) \
 	xor_block((uint8_t *)(d), (uint8_t *)(c)->gcm_ghash); \
 	(o)->mul((uint64_t *)(void *)(c)->gcm_ghash, (c)->gcm_H, \
 	(uint64_t *)(void *)(t));
 
 /* Select GCM implementation */
 #define	IMPL_FASTEST	(UINT32_MAX)
 #define	IMPL_CYCLE	(UINT32_MAX-1)
 #ifdef CAN_USE_GCM_ASM
 #define	IMPL_AVX	(UINT32_MAX-2)
 #endif
 #define	GCM_IMPL_READ(i) (*(volatile uint32_t *) &(i))
 static uint32_t icp_gcm_impl = IMPL_FASTEST;
 static uint32_t user_sel_impl = IMPL_FASTEST;
 
 #ifdef CAN_USE_GCM_ASM
 /* Does the architecture we run on support the MOVBE instruction? */
 boolean_t gcm_avx_can_use_movbe = B_FALSE;
 /*
  * Whether to use the optimized openssl gcm and ghash implementations.
  * Set to true if module parameter icp_gcm_impl == "avx".
  */
 static boolean_t gcm_use_avx = B_FALSE;
 #define	GCM_IMPL_USE_AVX	(*(volatile boolean_t *)&gcm_use_avx)
 
 extern boolean_t atomic_toggle_boolean_nv(volatile boolean_t *);
 
 static inline boolean_t gcm_avx_will_work(void);
 static inline void gcm_set_avx(boolean_t);
 static inline boolean_t gcm_toggle_avx(void);
 static inline size_t gcm_simd_get_htab_size(boolean_t);
 
 static int gcm_mode_encrypt_contiguous_blocks_avx(gcm_ctx_t *, char *, size_t,
     crypto_data_t *, size_t);
 
 static int gcm_encrypt_final_avx(gcm_ctx_t *, crypto_data_t *, size_t);
 static int gcm_decrypt_final_avx(gcm_ctx_t *, crypto_data_t *, size_t);
 static int gcm_init_avx(gcm_ctx_t *, unsigned char *, size_t, unsigned char *,
     size_t, size_t);
 #endif /* ifdef CAN_USE_GCM_ASM */
 
 /*
  * Encrypt multiple blocks of data in GCM mode.  Decrypt for GCM mode
  * is done in another function.
  */
 int
 gcm_mode_encrypt_contiguous_blocks(gcm_ctx_t *ctx, char *data, size_t length,
     crypto_data_t *out, size_t block_size,
     int (*encrypt_block)(const void *, const uint8_t *, uint8_t *),
     void (*copy_block)(uint8_t *, uint8_t *),
     void (*xor_block)(uint8_t *, uint8_t *))
 {
 #ifdef CAN_USE_GCM_ASM
 	if (ctx->gcm_use_avx == B_TRUE)
 		return (gcm_mode_encrypt_contiguous_blocks_avx(
 		    ctx, data, length, out, block_size));
 #endif
 
 	const gcm_impl_ops_t *gops;
 	size_t remainder = length;
 	size_t need = 0;
 	uint8_t *datap = (uint8_t *)data;
 	uint8_t *blockp;
 	uint8_t *lastp;
 	void *iov_or_mp;
 	offset_t offset;
 	uint8_t *out_data_1;
 	uint8_t *out_data_2;
 	size_t out_data_1_len;
 	uint64_t counter;
 	uint64_t counter_mask = ntohll(0x00000000ffffffffULL);
 
 	if (length + ctx->gcm_remainder_len < block_size) {
 		/* accumulate bytes here and return */
 		memcpy((uint8_t *)ctx->gcm_remainder + ctx->gcm_remainder_len,
 		    datap,
 		    length);
 		ctx->gcm_remainder_len += length;
 		if (ctx->gcm_copy_to == NULL) {
 			ctx->gcm_copy_to = datap;
 		}
 		return (CRYPTO_SUCCESS);
 	}
 
 	crypto_init_ptrs(out, &iov_or_mp, &offset);
 
 	gops = gcm_impl_get_ops();
 	do {
 		/* Unprocessed data from last call. */
 		if (ctx->gcm_remainder_len > 0) {
 			need = block_size - ctx->gcm_remainder_len;
 
 			if (need > remainder)
 				return (CRYPTO_DATA_LEN_RANGE);
 
 			memcpy(&((uint8_t *)ctx->gcm_remainder)
 			    [ctx->gcm_remainder_len], datap, need);
 
 			blockp = (uint8_t *)ctx->gcm_remainder;
 		} else {
 			blockp = datap;
 		}
 
 		/*
 		 * Increment counter. Counter bits are confined
 		 * to the bottom 32 bits of the counter block.
 		 */
 		counter = ntohll(ctx->gcm_cb[1] & counter_mask);
 		counter = htonll(counter + 1);
 		counter &= counter_mask;
 		ctx->gcm_cb[1] = (ctx->gcm_cb[1] & ~counter_mask) | counter;
 
 		encrypt_block(ctx->gcm_keysched, (uint8_t *)ctx->gcm_cb,
 		    (uint8_t *)ctx->gcm_tmp);
 		xor_block(blockp, (uint8_t *)ctx->gcm_tmp);
 
 		lastp = (uint8_t *)ctx->gcm_tmp;
 
 		ctx->gcm_processed_data_len += block_size;
 
 		crypto_get_ptrs(out, &iov_or_mp, &offset, &out_data_1,
 		    &out_data_1_len, &out_data_2, block_size);
 
 		/* copy block to where it belongs */
 		if (out_data_1_len == block_size) {
 			copy_block(lastp, out_data_1);
 		} else {
 			memcpy(out_data_1, lastp, out_data_1_len);
 			if (out_data_2 != NULL) {
 				memcpy(out_data_2,
 				    lastp + out_data_1_len,
 				    block_size - out_data_1_len);
 			}
 		}
 		/* update offset */
 		out->cd_offset += block_size;
 
 		/* add ciphertext to the hash */
 		GHASH(ctx, ctx->gcm_tmp, ctx->gcm_ghash, gops);
 
 		/* Update pointer to next block of data to be processed. */
 		if (ctx->gcm_remainder_len != 0) {
 			datap += need;
 			ctx->gcm_remainder_len = 0;
 		} else {
 			datap += block_size;
 		}
 
 		remainder = (size_t)&data[length] - (size_t)datap;
 
 		/* Incomplete last block. */
 		if (remainder > 0 && remainder < block_size) {
 			memcpy(ctx->gcm_remainder, datap, remainder);
 			ctx->gcm_remainder_len = remainder;
 			ctx->gcm_copy_to = datap;
 			goto out;
 		}
 		ctx->gcm_copy_to = NULL;
 
 	} while (remainder > 0);
 out:
 	return (CRYPTO_SUCCESS);
 }
 
 int
 gcm_encrypt_final(gcm_ctx_t *ctx, crypto_data_t *out, size_t block_size,
     int (*encrypt_block)(const void *, const uint8_t *, uint8_t *),
     void (*copy_block)(uint8_t *, uint8_t *),
     void (*xor_block)(uint8_t *, uint8_t *))
 {
 	(void) copy_block;
 #ifdef CAN_USE_GCM_ASM
 	if (ctx->gcm_use_avx == B_TRUE)
 		return (gcm_encrypt_final_avx(ctx, out, block_size));
 #endif
 
 	const gcm_impl_ops_t *gops;
 	uint64_t counter_mask = ntohll(0x00000000ffffffffULL);
 	uint8_t *ghash, *macp = NULL;
 	int i, rv;
 
 	if (out->cd_length <
 	    (ctx->gcm_remainder_len + ctx->gcm_tag_len)) {
 		return (CRYPTO_DATA_LEN_RANGE);
 	}
 
 	gops = gcm_impl_get_ops();
 	ghash = (uint8_t *)ctx->gcm_ghash;
 
 	if (ctx->gcm_remainder_len > 0) {
 		uint64_t counter;
 		uint8_t *tmpp = (uint8_t *)ctx->gcm_tmp;
 
 		/*
 		 * Here is where we deal with data that is not a
 		 * multiple of the block size.
 		 */
 
 		/*
 		 * Increment counter.
 		 */
 		counter = ntohll(ctx->gcm_cb[1] & counter_mask);
 		counter = htonll(counter + 1);
 		counter &= counter_mask;
 		ctx->gcm_cb[1] = (ctx->gcm_cb[1] & ~counter_mask) | counter;
 
 		encrypt_block(ctx->gcm_keysched, (uint8_t *)ctx->gcm_cb,
 		    (uint8_t *)ctx->gcm_tmp);
 
 		macp = (uint8_t *)ctx->gcm_remainder;
 		memset(macp + ctx->gcm_remainder_len, 0,
 		    block_size - ctx->gcm_remainder_len);
 
 		/* XOR with counter block */
 		for (i = 0; i < ctx->gcm_remainder_len; i++) {
 			macp[i] ^= tmpp[i];
 		}
 
 		/* add ciphertext to the hash */
 		GHASH(ctx, macp, ghash, gops);
 
 		ctx->gcm_processed_data_len += ctx->gcm_remainder_len;
 	}
 
 	ctx->gcm_len_a_len_c[1] =
 	    htonll(CRYPTO_BYTES2BITS(ctx->gcm_processed_data_len));
 	GHASH(ctx, ctx->gcm_len_a_len_c, ghash, gops);
 	encrypt_block(ctx->gcm_keysched, (uint8_t *)ctx->gcm_J0,
 	    (uint8_t *)ctx->gcm_J0);
 	xor_block((uint8_t *)ctx->gcm_J0, ghash);
 
 	if (ctx->gcm_remainder_len > 0) {
 		rv = crypto_put_output_data(macp, out, ctx->gcm_remainder_len);
 		if (rv != CRYPTO_SUCCESS)
 			return (rv);
 	}
 	out->cd_offset += ctx->gcm_remainder_len;
 	ctx->gcm_remainder_len = 0;
 	rv = crypto_put_output_data(ghash, out, ctx->gcm_tag_len);
 	if (rv != CRYPTO_SUCCESS)
 		return (rv);
 	out->cd_offset += ctx->gcm_tag_len;
 
 	return (CRYPTO_SUCCESS);
 }
 
 /*
  * This will only deal with decrypting the last block of the input that
  * might not be a multiple of block length.
  */
 static void
 gcm_decrypt_incomplete_block(gcm_ctx_t *ctx, size_t block_size, size_t index,
     int (*encrypt_block)(const void *, const uint8_t *, uint8_t *),
     void (*xor_block)(uint8_t *, uint8_t *))
 {
 	uint8_t *datap, *outp, *counterp;
 	uint64_t counter;
 	uint64_t counter_mask = ntohll(0x00000000ffffffffULL);
 	int i;
 
 	/*
 	 * Increment counter.
 	 * Counter bits are confined to the bottom 32 bits
 	 */
 	counter = ntohll(ctx->gcm_cb[1] & counter_mask);
 	counter = htonll(counter + 1);
 	counter &= counter_mask;
 	ctx->gcm_cb[1] = (ctx->gcm_cb[1] & ~counter_mask) | counter;
 
 	datap = (uint8_t *)ctx->gcm_remainder;
 	outp = &((ctx->gcm_pt_buf)[index]);
 	counterp = (uint8_t *)ctx->gcm_tmp;
 
 	/* authentication tag */
 	memset((uint8_t *)ctx->gcm_tmp, 0, block_size);
 	memcpy((uint8_t *)ctx->gcm_tmp, datap, ctx->gcm_remainder_len);
 
 	/* add ciphertext to the hash */
 	GHASH(ctx, ctx->gcm_tmp, ctx->gcm_ghash, gcm_impl_get_ops());
 
 	/* decrypt remaining ciphertext */
 	encrypt_block(ctx->gcm_keysched, (uint8_t *)ctx->gcm_cb, counterp);
 
 	/* XOR with counter block */
 	for (i = 0; i < ctx->gcm_remainder_len; i++) {
 		outp[i] = datap[i] ^ counterp[i];
 	}
 }
 
 int
 gcm_mode_decrypt_contiguous_blocks(gcm_ctx_t *ctx, char *data, size_t length,
     crypto_data_t *out, size_t block_size,
     int (*encrypt_block)(const void *, const uint8_t *, uint8_t *),
     void (*copy_block)(uint8_t *, uint8_t *),
     void (*xor_block)(uint8_t *, uint8_t *))
 {
 	(void) out, (void) block_size, (void) encrypt_block, (void) copy_block,
 	    (void) xor_block;
 	size_t new_len;
 	uint8_t *new;
 
 	/*
 	 * Copy contiguous ciphertext input blocks to plaintext buffer.
 	 * Ciphertext will be decrypted in the final.
 	 */
 	if (length > 0) {
 		new_len = ctx->gcm_pt_buf_len + length;
 		new = vmem_alloc(new_len, KM_SLEEP);
 		if (new == NULL) {
 			vmem_free(ctx->gcm_pt_buf, ctx->gcm_pt_buf_len);
 			ctx->gcm_pt_buf = NULL;
 			return (CRYPTO_HOST_MEMORY);
 		}
 
 		if (ctx->gcm_pt_buf != NULL) {
 			memcpy(new, ctx->gcm_pt_buf, ctx->gcm_pt_buf_len);
 			vmem_free(ctx->gcm_pt_buf, ctx->gcm_pt_buf_len);
 		} else {
 			ASSERT0(ctx->gcm_pt_buf_len);
 		}
 
 		ctx->gcm_pt_buf = new;
 		ctx->gcm_pt_buf_len = new_len;
 		memcpy(&ctx->gcm_pt_buf[ctx->gcm_processed_data_len], data,
 		    length);
 		ctx->gcm_processed_data_len += length;
 	}
 
 	ctx->gcm_remainder_len = 0;
 	return (CRYPTO_SUCCESS);
 }
 
 int
 gcm_decrypt_final(gcm_ctx_t *ctx, crypto_data_t *out, size_t block_size,
     int (*encrypt_block)(const void *, const uint8_t *, uint8_t *),
     void (*xor_block)(uint8_t *, uint8_t *))
 {
 #ifdef CAN_USE_GCM_ASM
 	if (ctx->gcm_use_avx == B_TRUE)
 		return (gcm_decrypt_final_avx(ctx, out, block_size));
 #endif
 
 	const gcm_impl_ops_t *gops;
 	size_t pt_len;
 	size_t remainder;
 	uint8_t *ghash;
 	uint8_t *blockp;
 	uint8_t *cbp;
 	uint64_t counter;
 	uint64_t counter_mask = ntohll(0x00000000ffffffffULL);
 	int processed = 0, rv;
 
 	ASSERT(ctx->gcm_processed_data_len == ctx->gcm_pt_buf_len);
 
 	gops = gcm_impl_get_ops();
 	pt_len = ctx->gcm_processed_data_len - ctx->gcm_tag_len;
 	ghash = (uint8_t *)ctx->gcm_ghash;
 	blockp = ctx->gcm_pt_buf;
 	remainder = pt_len;
 	while (remainder > 0) {
 		/* Incomplete last block */
 		if (remainder < block_size) {
 			memcpy(ctx->gcm_remainder, blockp, remainder);
 			ctx->gcm_remainder_len = remainder;
 			/*
 			 * not expecting anymore ciphertext, just
 			 * compute plaintext for the remaining input
 			 */
 			gcm_decrypt_incomplete_block(ctx, block_size,
 			    processed, encrypt_block, xor_block);
 			ctx->gcm_remainder_len = 0;
 			goto out;
 		}
 		/* add ciphertext to the hash */
 		GHASH(ctx, blockp, ghash, gops);
 
 		/*
 		 * Increment counter.
 		 * Counter bits are confined to the bottom 32 bits
 		 */
 		counter = ntohll(ctx->gcm_cb[1] & counter_mask);
 		counter = htonll(counter + 1);
 		counter &= counter_mask;
 		ctx->gcm_cb[1] = (ctx->gcm_cb[1] & ~counter_mask) | counter;
 
 		cbp = (uint8_t *)ctx->gcm_tmp;
 		encrypt_block(ctx->gcm_keysched, (uint8_t *)ctx->gcm_cb, cbp);
 
 		/* XOR with ciphertext */
 		xor_block(cbp, blockp);
 
 		processed += block_size;
 		blockp += block_size;
 		remainder -= block_size;
 	}
 out:
 	ctx->gcm_len_a_len_c[1] = htonll(CRYPTO_BYTES2BITS(pt_len));
 	GHASH(ctx, ctx->gcm_len_a_len_c, ghash, gops);
 	encrypt_block(ctx->gcm_keysched, (uint8_t *)ctx->gcm_J0,
 	    (uint8_t *)ctx->gcm_J0);
 	xor_block((uint8_t *)ctx->gcm_J0, ghash);
 
 	/* compare the input authentication tag with what we calculated */
 	if (memcmp(&ctx->gcm_pt_buf[pt_len], ghash, ctx->gcm_tag_len)) {
 		/* They don't match */
 		return (CRYPTO_INVALID_MAC);
 	} else {
 		rv = crypto_put_output_data(ctx->gcm_pt_buf, out, pt_len);
 		if (rv != CRYPTO_SUCCESS)
 			return (rv);
 		out->cd_offset += pt_len;
 	}
 	return (CRYPTO_SUCCESS);
 }
 
 static int
 gcm_validate_args(CK_AES_GCM_PARAMS *gcm_param)
 {
 	size_t tag_len;
 
 	/*
 	 * Check the length of the authentication tag (in bits).
 	 */
 	tag_len = gcm_param->ulTagBits;
 	switch (tag_len) {
 	case 32:
 	case 64:
 	case 96:
 	case 104:
 	case 112:
 	case 120:
 	case 128:
 		break;
 	default:
 		return (CRYPTO_MECHANISM_PARAM_INVALID);
 	}
 
 	if (gcm_param->ulIvLen == 0)
 		return (CRYPTO_MECHANISM_PARAM_INVALID);
 
 	return (CRYPTO_SUCCESS);
 }
 
 static void
 gcm_format_initial_blocks(uchar_t *iv, ulong_t iv_len,
     gcm_ctx_t *ctx, size_t block_size,
     void (*copy_block)(uint8_t *, uint8_t *),
     void (*xor_block)(uint8_t *, uint8_t *))
 {
 	const gcm_impl_ops_t *gops;
 	uint8_t *cb;
 	ulong_t remainder = iv_len;
 	ulong_t processed = 0;
 	uint8_t *datap, *ghash;
 	uint64_t len_a_len_c[2];
 
 	gops = gcm_impl_get_ops();
 	ghash = (uint8_t *)ctx->gcm_ghash;
 	cb = (uint8_t *)ctx->gcm_cb;
 	if (iv_len == 12) {
 		memcpy(cb, iv, 12);
 		cb[12] = 0;
 		cb[13] = 0;
 		cb[14] = 0;
 		cb[15] = 1;
 		/* J0 will be used again in the final */
 		copy_block(cb, (uint8_t *)ctx->gcm_J0);
 	} else {
 		/* GHASH the IV */
 		do {
 			if (remainder < block_size) {
 				memset(cb, 0, block_size);
 				memcpy(cb, &(iv[processed]), remainder);
 				datap = (uint8_t *)cb;
 				remainder = 0;
 			} else {
 				datap = (uint8_t *)(&(iv[processed]));
 				processed += block_size;
 				remainder -= block_size;
 			}
 			GHASH(ctx, datap, ghash, gops);
 		} while (remainder > 0);
 
 		len_a_len_c[0] = 0;
 		len_a_len_c[1] = htonll(CRYPTO_BYTES2BITS(iv_len));
 		GHASH(ctx, len_a_len_c, ctx->gcm_J0, gops);
 
 		/* J0 will be used again in the final */
 		copy_block((uint8_t *)ctx->gcm_J0, (uint8_t *)cb);
 	}
 }
 
 static int
 gcm_init(gcm_ctx_t *ctx, unsigned char *iv, size_t iv_len,
     unsigned char *auth_data, size_t auth_data_len, size_t block_size,
     int (*encrypt_block)(const void *, const uint8_t *, uint8_t *),
     void (*copy_block)(uint8_t *, uint8_t *),
     void (*xor_block)(uint8_t *, uint8_t *))
 {
 	const gcm_impl_ops_t *gops;
 	uint8_t *ghash, *datap, *authp;
 	size_t remainder, processed;
 
 	/* encrypt zero block to get subkey H */
 	memset(ctx->gcm_H, 0, sizeof (ctx->gcm_H));
 	encrypt_block(ctx->gcm_keysched, (uint8_t *)ctx->gcm_H,
 	    (uint8_t *)ctx->gcm_H);
 
 	gcm_format_initial_blocks(iv, iv_len, ctx, block_size,
 	    copy_block, xor_block);
 
 	gops = gcm_impl_get_ops();
 	authp = (uint8_t *)ctx->gcm_tmp;
 	ghash = (uint8_t *)ctx->gcm_ghash;
 	memset(authp, 0, block_size);
 	memset(ghash, 0, block_size);
 
 	processed = 0;
 	remainder = auth_data_len;
 	do {
 		if (remainder < block_size) {
 			/*
 			 * There's not a block full of data, pad rest of
 			 * buffer with zero
 			 */
 
 			if (auth_data != NULL) {
 				memset(authp, 0, block_size);
 				memcpy(authp, &(auth_data[processed]),
 				    remainder);
 			} else {
 				ASSERT0(remainder);
 			}
 
 			datap = (uint8_t *)authp;
 			remainder = 0;
 		} else {
 			datap = (uint8_t *)(&(auth_data[processed]));
 			processed += block_size;
 			remainder -= block_size;
 		}
 
 		/* add auth data to the hash */
 		GHASH(ctx, datap, ghash, gops);
 
 	} while (remainder > 0);
 
 	return (CRYPTO_SUCCESS);
 }
 
 /*
  * The following function is called at encrypt or decrypt init time
  * for AES GCM mode.
  *
  * Init the GCM context struct. Handle the cycle and avx implementations here.
  */
 int
 gcm_init_ctx(gcm_ctx_t *gcm_ctx, char *param, size_t block_size,
     int (*encrypt_block)(const void *, const uint8_t *, uint8_t *),
     void (*copy_block)(uint8_t *, uint8_t *),
     void (*xor_block)(uint8_t *, uint8_t *))
 {
 	int rv;
 	CK_AES_GCM_PARAMS *gcm_param;
 
 	if (param != NULL) {
 		gcm_param = (CK_AES_GCM_PARAMS *)(void *)param;
 
 		if ((rv = gcm_validate_args(gcm_param)) != 0) {
 			return (rv);
 		}
 
 		gcm_ctx->gcm_tag_len = gcm_param->ulTagBits;
 		gcm_ctx->gcm_tag_len >>= 3;
 		gcm_ctx->gcm_processed_data_len = 0;
 
 		/* these values are in bits */
 		gcm_ctx->gcm_len_a_len_c[0]
 		    = htonll(CRYPTO_BYTES2BITS(gcm_param->ulAADLen));
 
 		rv = CRYPTO_SUCCESS;
 		gcm_ctx->gcm_flags |= GCM_MODE;
 	} else {
 		return (CRYPTO_MECHANISM_PARAM_INVALID);
 	}
 
 #ifdef CAN_USE_GCM_ASM
 	if (GCM_IMPL_READ(icp_gcm_impl) != IMPL_CYCLE) {
 		gcm_ctx->gcm_use_avx = GCM_IMPL_USE_AVX;
 	} else {
 		/*
 		 * Handle the "cycle" implementation by creating avx and
 		 * non-avx contexts alternately.
 		 */
 		gcm_ctx->gcm_use_avx = gcm_toggle_avx();
 		/*
 		 * We don't handle byte swapped key schedules in the avx
 		 * code path.
 		 */
 		aes_key_t *ks = (aes_key_t *)gcm_ctx->gcm_keysched;
 		if (ks->ops->needs_byteswap == B_TRUE) {
 			gcm_ctx->gcm_use_avx = B_FALSE;
 		}
 		/* Use the MOVBE and the BSWAP variants alternately. */
 		if (gcm_ctx->gcm_use_avx == B_TRUE &&
 		    zfs_movbe_available() == B_TRUE) {
 			(void) atomic_toggle_boolean_nv(
 			    (volatile boolean_t *)&gcm_avx_can_use_movbe);
 		}
 	}
 	/* Allocate Htab memory as needed. */
 	if (gcm_ctx->gcm_use_avx == B_TRUE) {
 		size_t htab_len = gcm_simd_get_htab_size(gcm_ctx->gcm_use_avx);
 
 		if (htab_len == 0) {
 			return (CRYPTO_MECHANISM_PARAM_INVALID);
 		}
 		gcm_ctx->gcm_htab_len = htab_len;
 		gcm_ctx->gcm_Htable =
-		    (uint64_t *)kmem_alloc(htab_len, KM_SLEEP);
+		    kmem_alloc(htab_len, KM_SLEEP);
 
 		if (gcm_ctx->gcm_Htable == NULL) {
 			return (CRYPTO_HOST_MEMORY);
 		}
 	}
 	/* Avx and non avx context initialization differs from here on. */
 	if (gcm_ctx->gcm_use_avx == B_FALSE) {
 #endif /* ifdef CAN_USE_GCM_ASM */
 		if (gcm_init(gcm_ctx, gcm_param->pIv, gcm_param->ulIvLen,
 		    gcm_param->pAAD, gcm_param->ulAADLen, block_size,
 		    encrypt_block, copy_block, xor_block) != 0) {
 			rv = CRYPTO_MECHANISM_PARAM_INVALID;
 		}
 #ifdef CAN_USE_GCM_ASM
 	} else {
 		if (gcm_init_avx(gcm_ctx, gcm_param->pIv, gcm_param->ulIvLen,
 		    gcm_param->pAAD, gcm_param->ulAADLen, block_size) != 0) {
 			rv = CRYPTO_MECHANISM_PARAM_INVALID;
 		}
 	}
 #endif /* ifdef CAN_USE_GCM_ASM */
 
 	return (rv);
 }
 
 int
 gmac_init_ctx(gcm_ctx_t *gcm_ctx, char *param, size_t block_size,
     int (*encrypt_block)(const void *, const uint8_t *, uint8_t *),
     void (*copy_block)(uint8_t *, uint8_t *),
     void (*xor_block)(uint8_t *, uint8_t *))
 {
 	int rv;
 	CK_AES_GMAC_PARAMS *gmac_param;
 
 	if (param != NULL) {
 		gmac_param = (CK_AES_GMAC_PARAMS *)(void *)param;
 
 		gcm_ctx->gcm_tag_len = CRYPTO_BITS2BYTES(AES_GMAC_TAG_BITS);
 		gcm_ctx->gcm_processed_data_len = 0;
 
 		/* these values are in bits */
 		gcm_ctx->gcm_len_a_len_c[0]
 		    = htonll(CRYPTO_BYTES2BITS(gmac_param->ulAADLen));
 
 		rv = CRYPTO_SUCCESS;
 		gcm_ctx->gcm_flags |= GMAC_MODE;
 	} else {
 		return (CRYPTO_MECHANISM_PARAM_INVALID);
 	}
 
 #ifdef CAN_USE_GCM_ASM
 	/*
 	 * Handle the "cycle" implementation by creating avx and non avx
 	 * contexts alternately.
 	 */
 	if (GCM_IMPL_READ(icp_gcm_impl) != IMPL_CYCLE) {
 		gcm_ctx->gcm_use_avx = GCM_IMPL_USE_AVX;
 	} else {
 		gcm_ctx->gcm_use_avx = gcm_toggle_avx();
 	}
 	/* We don't handle byte swapped key schedules in the avx code path. */
 	aes_key_t *ks = (aes_key_t *)gcm_ctx->gcm_keysched;
 	if (ks->ops->needs_byteswap == B_TRUE) {
 		gcm_ctx->gcm_use_avx = B_FALSE;
 	}
 	/* Allocate Htab memory as needed. */
 	if (gcm_ctx->gcm_use_avx == B_TRUE) {
 		size_t htab_len = gcm_simd_get_htab_size(gcm_ctx->gcm_use_avx);
 
 		if (htab_len == 0) {
 			return (CRYPTO_MECHANISM_PARAM_INVALID);
 		}
 		gcm_ctx->gcm_htab_len = htab_len;
 		gcm_ctx->gcm_Htable =
-		    (uint64_t *)kmem_alloc(htab_len, KM_SLEEP);
+		    kmem_alloc(htab_len, KM_SLEEP);
 
 		if (gcm_ctx->gcm_Htable == NULL) {
 			return (CRYPTO_HOST_MEMORY);
 		}
 	}
 
 	/* Avx and non avx context initialization differs from here on. */
 	if (gcm_ctx->gcm_use_avx == B_FALSE) {
 #endif	/* ifdef CAN_USE_GCM_ASM */
 		if (gcm_init(gcm_ctx, gmac_param->pIv, AES_GMAC_IV_LEN,
 		    gmac_param->pAAD, gmac_param->ulAADLen, block_size,
 		    encrypt_block, copy_block, xor_block) != 0) {
 			rv = CRYPTO_MECHANISM_PARAM_INVALID;
 		}
 #ifdef CAN_USE_GCM_ASM
 	} else {
 		if (gcm_init_avx(gcm_ctx, gmac_param->pIv, AES_GMAC_IV_LEN,
 		    gmac_param->pAAD, gmac_param->ulAADLen, block_size) != 0) {
 			rv = CRYPTO_MECHANISM_PARAM_INVALID;
 		}
 	}
 #endif /* ifdef CAN_USE_GCM_ASM */
 
 	return (rv);
 }
 
 void *
 gcm_alloc_ctx(int kmflag)
 {
 	gcm_ctx_t *gcm_ctx;
 
 	if ((gcm_ctx = kmem_zalloc(sizeof (gcm_ctx_t), kmflag)) == NULL)
 		return (NULL);
 
 	gcm_ctx->gcm_flags = GCM_MODE;
 	return (gcm_ctx);
 }
 
 void *
 gmac_alloc_ctx(int kmflag)
 {
 	gcm_ctx_t *gcm_ctx;
 
 	if ((gcm_ctx = kmem_zalloc(sizeof (gcm_ctx_t), kmflag)) == NULL)
 		return (NULL);
 
 	gcm_ctx->gcm_flags = GMAC_MODE;
 	return (gcm_ctx);
 }
 
 /* GCM implementation that contains the fastest methods */
 static gcm_impl_ops_t gcm_fastest_impl = {
 	.name = "fastest"
 };
 
 /* All compiled in implementations */
 static const gcm_impl_ops_t *gcm_all_impl[] = {
 	&gcm_generic_impl,
 #if defined(__x86_64) && defined(HAVE_PCLMULQDQ)
 	&gcm_pclmulqdq_impl,
 #endif
 };
 
 /* Indicate that benchmark has been completed */
 static boolean_t gcm_impl_initialized = B_FALSE;
 
 /* Hold all supported implementations */
 static size_t gcm_supp_impl_cnt = 0;
 static gcm_impl_ops_t *gcm_supp_impl[ARRAY_SIZE(gcm_all_impl)];
 
 /*
  * Returns the GCM operations for encrypt/decrypt/key setup.  When a
  * SIMD implementation is not allowed in the current context, then
  * fallback to the fastest generic implementation.
  */
 const gcm_impl_ops_t *
 gcm_impl_get_ops(void)
 {
 	if (!kfpu_allowed())
 		return (&gcm_generic_impl);
 
 	const gcm_impl_ops_t *ops = NULL;
 	const uint32_t impl = GCM_IMPL_READ(icp_gcm_impl);
 
 	switch (impl) {
 	case IMPL_FASTEST:
 		ASSERT(gcm_impl_initialized);
 		ops = &gcm_fastest_impl;
 		break;
 	case IMPL_CYCLE:
 		/* Cycle through supported implementations */
 		ASSERT(gcm_impl_initialized);
 		ASSERT3U(gcm_supp_impl_cnt, >, 0);
 		static size_t cycle_impl_idx = 0;
 		size_t idx = (++cycle_impl_idx) % gcm_supp_impl_cnt;
 		ops = gcm_supp_impl[idx];
 		break;
 #ifdef CAN_USE_GCM_ASM
 	case IMPL_AVX:
 		/*
 		 * Make sure that we return a valid implementation while
 		 * switching to the avx implementation since there still
 		 * may be unfinished non-avx contexts around.
 		 */
 		ops = &gcm_generic_impl;
 		break;
 #endif
 	default:
 		ASSERT3U(impl, <, gcm_supp_impl_cnt);
 		ASSERT3U(gcm_supp_impl_cnt, >, 0);
 		if (impl < ARRAY_SIZE(gcm_all_impl))
 			ops = gcm_supp_impl[impl];
 		break;
 	}
 
 	ASSERT3P(ops, !=, NULL);
 
 	return (ops);
 }
 
 /*
  * Initialize all supported implementations.
  */
 void
 gcm_impl_init(void)
 {
 	gcm_impl_ops_t *curr_impl;
 	int i, c;
 
 	/* Move supported implementations into gcm_supp_impls */
 	for (i = 0, c = 0; i < ARRAY_SIZE(gcm_all_impl); i++) {
 		curr_impl = (gcm_impl_ops_t *)gcm_all_impl[i];
 
 		if (curr_impl->is_supported())
 			gcm_supp_impl[c++] = (gcm_impl_ops_t *)curr_impl;
 	}
 	gcm_supp_impl_cnt = c;
 
 	/*
 	 * Set the fastest implementation given the assumption that the
 	 * hardware accelerated version is the fastest.
 	 */
 #if defined(__x86_64) && defined(HAVE_PCLMULQDQ)
 	if (gcm_pclmulqdq_impl.is_supported()) {
 		memcpy(&gcm_fastest_impl, &gcm_pclmulqdq_impl,
 		    sizeof (gcm_fastest_impl));
 	} else
 #endif
 	{
 		memcpy(&gcm_fastest_impl, &gcm_generic_impl,
 		    sizeof (gcm_fastest_impl));
 	}
 
 	strlcpy(gcm_fastest_impl.name, "fastest", GCM_IMPL_NAME_MAX);
 
 #ifdef CAN_USE_GCM_ASM
 	/*
 	 * Use the avx implementation if it's available and the implementation
 	 * hasn't changed from its default value of fastest on module load.
 	 */
 	if (gcm_avx_will_work()) {
 #ifdef HAVE_MOVBE
 		if (zfs_movbe_available() == B_TRUE) {
 			atomic_swap_32(&gcm_avx_can_use_movbe, B_TRUE);
 		}
 #endif
 		if (GCM_IMPL_READ(user_sel_impl) == IMPL_FASTEST) {
 			gcm_set_avx(B_TRUE);
 		}
 	}
 #endif
 	/* Finish initialization */
 	atomic_swap_32(&icp_gcm_impl, user_sel_impl);
 	gcm_impl_initialized = B_TRUE;
 }
 
 static const struct {
 	const char *name;
 	uint32_t sel;
 } gcm_impl_opts[] = {
 		{ "cycle",	IMPL_CYCLE },
 		{ "fastest",	IMPL_FASTEST },
 #ifdef CAN_USE_GCM_ASM
 		{ "avx",	IMPL_AVX },
 #endif
 };
 
 /*
  * Function sets desired gcm implementation.
  *
  * If we are called before init(), user preference will be saved in
  * user_sel_impl, and applied in later init() call. This occurs when module
  * parameter is specified on module load. Otherwise, directly update
  * icp_gcm_impl.
  *
  * @val		Name of gcm implementation to use
  * @param	Unused.
  */
 int
 gcm_impl_set(const char *val)
 {
 	int err = -EINVAL;
 	char req_name[GCM_IMPL_NAME_MAX];
 	uint32_t impl = GCM_IMPL_READ(user_sel_impl);
 	size_t i;
 
 	/* sanitize input */
 	i = strnlen(val, GCM_IMPL_NAME_MAX);
 	if (i == 0 || i >= GCM_IMPL_NAME_MAX)
 		return (err);
 
 	strlcpy(req_name, val, GCM_IMPL_NAME_MAX);
 	while (i > 0 && isspace(req_name[i-1]))
 		i--;
 	req_name[i] = '\0';
 
 	/* Check mandatory options */
 	for (i = 0; i < ARRAY_SIZE(gcm_impl_opts); i++) {
 #ifdef CAN_USE_GCM_ASM
 		/* Ignore avx implementation if it won't work. */
 		if (gcm_impl_opts[i].sel == IMPL_AVX && !gcm_avx_will_work()) {
 			continue;
 		}
 #endif
 		if (strcmp(req_name, gcm_impl_opts[i].name) == 0) {
 			impl = gcm_impl_opts[i].sel;
 			err = 0;
 			break;
 		}
 	}
 
 	/* check all supported impl if init() was already called */
 	if (err != 0 && gcm_impl_initialized) {
 		/* check all supported implementations */
 		for (i = 0; i < gcm_supp_impl_cnt; i++) {
 			if (strcmp(req_name, gcm_supp_impl[i]->name) == 0) {
 				impl = i;
 				err = 0;
 				break;
 			}
 		}
 	}
 #ifdef CAN_USE_GCM_ASM
 	/*
 	 * Use the avx implementation if available and the requested one is
 	 * avx or fastest.
 	 */
 	if (gcm_avx_will_work() == B_TRUE &&
 	    (impl == IMPL_AVX || impl == IMPL_FASTEST)) {
 		gcm_set_avx(B_TRUE);
 	} else {
 		gcm_set_avx(B_FALSE);
 	}
 #endif
 
 	if (err == 0) {
 		if (gcm_impl_initialized)
 			atomic_swap_32(&icp_gcm_impl, impl);
 		else
 			atomic_swap_32(&user_sel_impl, impl);
 	}
 
 	return (err);
 }
 
 #if defined(_KERNEL) && defined(__linux__)
 
 static int
 icp_gcm_impl_set(const char *val, zfs_kernel_param_t *kp)
 {
 	return (gcm_impl_set(val));
 }
 
 static int
 icp_gcm_impl_get(char *buffer, zfs_kernel_param_t *kp)
 {
 	int i, cnt = 0;
 	char *fmt;
 	const uint32_t impl = GCM_IMPL_READ(icp_gcm_impl);
 
 	ASSERT(gcm_impl_initialized);
 
 	/* list mandatory options */
 	for (i = 0; i < ARRAY_SIZE(gcm_impl_opts); i++) {
 #ifdef CAN_USE_GCM_ASM
 		/* Ignore avx implementation if it won't work. */
 		if (gcm_impl_opts[i].sel == IMPL_AVX && !gcm_avx_will_work()) {
 			continue;
 		}
 #endif
 		fmt = (impl == gcm_impl_opts[i].sel) ? "[%s] " : "%s ";
 		cnt += kmem_scnprintf(buffer + cnt, PAGE_SIZE - cnt, fmt,
 		    gcm_impl_opts[i].name);
 	}
 
 	/* list all supported implementations */
 	for (i = 0; i < gcm_supp_impl_cnt; i++) {
 		fmt = (i == impl) ? "[%s] " : "%s ";
 		cnt += kmem_scnprintf(buffer + cnt, PAGE_SIZE - cnt, fmt,
 		    gcm_supp_impl[i]->name);
 	}
 
 	return (cnt);
 }
 
 module_param_call(icp_gcm_impl, icp_gcm_impl_set, icp_gcm_impl_get,
     NULL, 0644);
 MODULE_PARM_DESC(icp_gcm_impl, "Select gcm implementation.");
 #endif /* defined(__KERNEL) */
 
 #ifdef CAN_USE_GCM_ASM
 #define	GCM_BLOCK_LEN 16
 /*
  * The openssl asm routines are 6x aggregated and need that many bytes
  * at minimum.
  */
 #define	GCM_AVX_MIN_DECRYPT_BYTES (GCM_BLOCK_LEN * 6)
 #define	GCM_AVX_MIN_ENCRYPT_BYTES (GCM_BLOCK_LEN * 6 * 3)
 /*
  * Ensure the chunk size is reasonable since we are allocating a
  * GCM_AVX_MAX_CHUNK_SIZEd buffer and disabling preemption and interrupts.
  */
 #define	GCM_AVX_MAX_CHUNK_SIZE \
 	(((128*1024)/GCM_AVX_MIN_DECRYPT_BYTES) * GCM_AVX_MIN_DECRYPT_BYTES)
 
 /* Clear the FPU registers since they hold sensitive internal state. */
 #define	clear_fpu_regs() clear_fpu_regs_avx()
 #define	GHASH_AVX(ctx, in, len) \
     gcm_ghash_avx((ctx)->gcm_ghash, (const uint64_t *)(ctx)->gcm_Htable, \
     in, len)
 
 #define	gcm_incr_counter_block(ctx) gcm_incr_counter_block_by(ctx, 1)
 
 /* Get the chunk size module parameter. */
 #define	GCM_CHUNK_SIZE_READ *(volatile uint32_t *) &gcm_avx_chunk_size
 
 /*
  * Module parameter: number of bytes to process at once while owning the FPU.
  * Rounded down to the next GCM_AVX_MIN_DECRYPT_BYTES byte boundary and is
  * ensured to be greater or equal than GCM_AVX_MIN_DECRYPT_BYTES.
  */
 static uint32_t gcm_avx_chunk_size =
 	((32 * 1024) / GCM_AVX_MIN_DECRYPT_BYTES) * GCM_AVX_MIN_DECRYPT_BYTES;
 
 extern void clear_fpu_regs_avx(void);
 extern void gcm_xor_avx(const uint8_t *src, uint8_t *dst);
 extern void aes_encrypt_intel(const uint32_t rk[], int nr,
     const uint32_t pt[4], uint32_t ct[4]);
 
 extern void gcm_init_htab_avx(uint64_t *Htable, const uint64_t H[2]);
 extern void gcm_ghash_avx(uint64_t ghash[2], const uint64_t *Htable,
     const uint8_t *in, size_t len);
 
 extern size_t aesni_gcm_encrypt(const uint8_t *, uint8_t *, size_t,
     const void *, uint64_t *, uint64_t *);
 
 extern size_t aesni_gcm_decrypt(const uint8_t *, uint8_t *, size_t,
     const void *, uint64_t *, uint64_t *);
 
 static inline boolean_t
 gcm_avx_will_work(void)
 {
 	/* Avx should imply aes-ni and pclmulqdq, but make sure anyhow. */
 	return (kfpu_allowed() &&
 	    zfs_avx_available() && zfs_aes_available() &&
 	    zfs_pclmulqdq_available());
 }
 
 static inline void
 gcm_set_avx(boolean_t val)
 {
 	if (gcm_avx_will_work() == B_TRUE) {
 		atomic_swap_32(&gcm_use_avx, val);
 	}
 }
 
 static inline boolean_t
 gcm_toggle_avx(void)
 {
 	if (gcm_avx_will_work() == B_TRUE) {
 		return (atomic_toggle_boolean_nv(&GCM_IMPL_USE_AVX));
 	} else {
 		return (B_FALSE);
 	}
 }
 
 static inline size_t
 gcm_simd_get_htab_size(boolean_t simd_mode)
 {
 	switch (simd_mode) {
 	case B_TRUE:
 		return (2 * 6 * 2 * sizeof (uint64_t));
 
 	default:
 		return (0);
 	}
 }
 
 /*
  * Clear sensitive data in the context.
  *
  * ctx->gcm_remainder may contain a plaintext remainder. ctx->gcm_H and
  * ctx->gcm_Htable contain the hash sub key which protects authentication.
  *
  * Although extremely unlikely, ctx->gcm_J0 and ctx->gcm_tmp could be used for
  * a known plaintext attack, they consists of the IV and the first and last
  * counter respectively. If they should be cleared is debatable.
  */
 static inline void
 gcm_clear_ctx(gcm_ctx_t *ctx)
 {
 	memset(ctx->gcm_remainder, 0, sizeof (ctx->gcm_remainder));
 	memset(ctx->gcm_H, 0, sizeof (ctx->gcm_H));
 	memset(ctx->gcm_J0, 0, sizeof (ctx->gcm_J0));
 	memset(ctx->gcm_tmp, 0, sizeof (ctx->gcm_tmp));
 }
 
 /* Increment the GCM counter block by n. */
 static inline void
 gcm_incr_counter_block_by(gcm_ctx_t *ctx, int n)
 {
 	uint64_t counter_mask = ntohll(0x00000000ffffffffULL);
 	uint64_t counter = ntohll(ctx->gcm_cb[1] & counter_mask);
 
 	counter = htonll(counter + n);
 	counter &= counter_mask;
 	ctx->gcm_cb[1] = (ctx->gcm_cb[1] & ~counter_mask) | counter;
 }
 
 /*
  * Encrypt multiple blocks of data in GCM mode.
  * This is done in gcm_avx_chunk_size chunks, utilizing AVX assembler routines
  * if possible. While processing a chunk the FPU is "locked".
  */
 static int
 gcm_mode_encrypt_contiguous_blocks_avx(gcm_ctx_t *ctx, char *data,
     size_t length, crypto_data_t *out, size_t block_size)
 {
 	size_t bleft = length;
 	size_t need = 0;
 	size_t done = 0;
 	uint8_t *datap = (uint8_t *)data;
 	size_t chunk_size = (size_t)GCM_CHUNK_SIZE_READ;
 	const aes_key_t *key = ((aes_key_t *)ctx->gcm_keysched);
 	uint64_t *ghash = ctx->gcm_ghash;
 	uint64_t *cb = ctx->gcm_cb;
 	uint8_t *ct_buf = NULL;
 	uint8_t *tmp = (uint8_t *)ctx->gcm_tmp;
 	int rv = CRYPTO_SUCCESS;
 
 	ASSERT(block_size == GCM_BLOCK_LEN);
 	/*
 	 * If the last call left an incomplete block, try to fill
 	 * it first.
 	 */
 	if (ctx->gcm_remainder_len > 0) {
 		need = block_size - ctx->gcm_remainder_len;
 		if (length < need) {
 			/* Accumulate bytes here and return. */
 			memcpy((uint8_t *)ctx->gcm_remainder +
 			    ctx->gcm_remainder_len, datap, length);
 
 			ctx->gcm_remainder_len += length;
 			if (ctx->gcm_copy_to == NULL) {
 				ctx->gcm_copy_to = datap;
 			}
 			return (CRYPTO_SUCCESS);
 		} else {
 			/* Complete incomplete block. */
 			memcpy((uint8_t *)ctx->gcm_remainder +
 			    ctx->gcm_remainder_len, datap, need);
 
 			ctx->gcm_copy_to = NULL;
 		}
 	}
 
 	/* Allocate a buffer to encrypt to if there is enough input. */
 	if (bleft >= GCM_AVX_MIN_ENCRYPT_BYTES) {
 		ct_buf = vmem_alloc(chunk_size, KM_SLEEP);
 		if (ct_buf == NULL) {
 			return (CRYPTO_HOST_MEMORY);
 		}
 	}
 
 	/* If we completed an incomplete block, encrypt and write it out. */
 	if (ctx->gcm_remainder_len > 0) {
 		kfpu_begin();
 		aes_encrypt_intel(key->encr_ks.ks32, key->nr,
 		    (const uint32_t *)cb, (uint32_t *)tmp);
 
 		gcm_xor_avx((const uint8_t *) ctx->gcm_remainder, tmp);
 		GHASH_AVX(ctx, tmp, block_size);
 		clear_fpu_regs();
 		kfpu_end();
 		rv = crypto_put_output_data(tmp, out, block_size);
 		out->cd_offset += block_size;
 		gcm_incr_counter_block(ctx);
 		ctx->gcm_processed_data_len += block_size;
 		bleft -= need;
 		datap += need;
 		ctx->gcm_remainder_len = 0;
 	}
 
 	/* Do the bulk encryption in chunk_size blocks. */
 	for (; bleft >= chunk_size; bleft -= chunk_size) {
 		kfpu_begin();
 		done = aesni_gcm_encrypt(
 		    datap, ct_buf, chunk_size, key, cb, ghash);
 
 		clear_fpu_regs();
 		kfpu_end();
 		if (done != chunk_size) {
 			rv = CRYPTO_FAILED;
 			goto out_nofpu;
 		}
 		rv = crypto_put_output_data(ct_buf, out, chunk_size);
 		if (rv != CRYPTO_SUCCESS) {
 			goto out_nofpu;
 		}
 		out->cd_offset += chunk_size;
 		datap += chunk_size;
 		ctx->gcm_processed_data_len += chunk_size;
 	}
 	/* Check if we are already done. */
 	if (bleft == 0) {
 		goto out_nofpu;
 	}
 	/* Bulk encrypt the remaining data. */
 	kfpu_begin();
 	if (bleft >= GCM_AVX_MIN_ENCRYPT_BYTES) {
 		done = aesni_gcm_encrypt(datap, ct_buf, bleft, key, cb, ghash);
 		if (done == 0) {
 			rv = CRYPTO_FAILED;
 			goto out;
 		}
 		rv = crypto_put_output_data(ct_buf, out, done);
 		if (rv != CRYPTO_SUCCESS) {
 			goto out;
 		}
 		out->cd_offset += done;
 		ctx->gcm_processed_data_len += done;
 		datap += done;
 		bleft -= done;
 
 	}
 	/* Less than GCM_AVX_MIN_ENCRYPT_BYTES remain, operate on blocks. */
 	while (bleft > 0) {
 		if (bleft < block_size) {
 			memcpy(ctx->gcm_remainder, datap, bleft);
 			ctx->gcm_remainder_len = bleft;
 			ctx->gcm_copy_to = datap;
 			goto out;
 		}
 		/* Encrypt, hash and write out. */
 		aes_encrypt_intel(key->encr_ks.ks32, key->nr,
 		    (const uint32_t *)cb, (uint32_t *)tmp);
 
 		gcm_xor_avx(datap, tmp);
 		GHASH_AVX(ctx, tmp, block_size);
 		rv = crypto_put_output_data(tmp, out, block_size);
 		if (rv != CRYPTO_SUCCESS) {
 			goto out;
 		}
 		out->cd_offset += block_size;
 		gcm_incr_counter_block(ctx);
 		ctx->gcm_processed_data_len += block_size;
 		datap += block_size;
 		bleft -= block_size;
 	}
 out:
 	clear_fpu_regs();
 	kfpu_end();
 out_nofpu:
 	if (ct_buf != NULL) {
 		vmem_free(ct_buf, chunk_size);
 	}
 	return (rv);
 }
 
 /*
  * Finalize the encryption: Zero fill, encrypt, hash and write out an eventual
  * incomplete last block. Encrypt the ICB. Calculate the tag and write it out.
  */
 static int
 gcm_encrypt_final_avx(gcm_ctx_t *ctx, crypto_data_t *out, size_t block_size)
 {
 	uint8_t *ghash = (uint8_t *)ctx->gcm_ghash;
 	uint32_t *J0 = (uint32_t *)ctx->gcm_J0;
 	uint8_t *remainder = (uint8_t *)ctx->gcm_remainder;
 	size_t rem_len = ctx->gcm_remainder_len;
 	const void *keysched = ((aes_key_t *)ctx->gcm_keysched)->encr_ks.ks32;
 	int aes_rounds = ((aes_key_t *)keysched)->nr;
 	int rv;
 
 	ASSERT(block_size == GCM_BLOCK_LEN);
 
 	if (out->cd_length < (rem_len + ctx->gcm_tag_len)) {
 		return (CRYPTO_DATA_LEN_RANGE);
 	}
 
 	kfpu_begin();
 	/* Pad last incomplete block with zeros, encrypt and hash. */
 	if (rem_len > 0) {
 		uint8_t *tmp = (uint8_t *)ctx->gcm_tmp;
 		const uint32_t *cb = (uint32_t *)ctx->gcm_cb;
 
 		aes_encrypt_intel(keysched, aes_rounds, cb, (uint32_t *)tmp);
 		memset(remainder + rem_len, 0, block_size - rem_len);
 		for (int i = 0; i < rem_len; i++) {
 			remainder[i] ^= tmp[i];
 		}
 		GHASH_AVX(ctx, remainder, block_size);
 		ctx->gcm_processed_data_len += rem_len;
 		/* No need to increment counter_block, it's the last block. */
 	}
 	/* Finish tag. */
 	ctx->gcm_len_a_len_c[1] =
 	    htonll(CRYPTO_BYTES2BITS(ctx->gcm_processed_data_len));
 	GHASH_AVX(ctx, (const uint8_t *)ctx->gcm_len_a_len_c, block_size);
 	aes_encrypt_intel(keysched, aes_rounds, J0, J0);
 
 	gcm_xor_avx((uint8_t *)J0, ghash);
 	clear_fpu_regs();
 	kfpu_end();
 
 	/* Output remainder. */
 	if (rem_len > 0) {
 		rv = crypto_put_output_data(remainder, out, rem_len);
 		if (rv != CRYPTO_SUCCESS)
 			return (rv);
 	}
 	out->cd_offset += rem_len;
 	ctx->gcm_remainder_len = 0;
 	rv = crypto_put_output_data(ghash, out, ctx->gcm_tag_len);
 	if (rv != CRYPTO_SUCCESS)
 		return (rv);
 
 	out->cd_offset += ctx->gcm_tag_len;
 	/* Clear sensitive data in the context before returning. */
 	gcm_clear_ctx(ctx);
 	return (CRYPTO_SUCCESS);
 }
 
 /*
  * Finalize decryption: We just have accumulated crypto text, so now we
  * decrypt it here inplace.
  */
 static int
 gcm_decrypt_final_avx(gcm_ctx_t *ctx, crypto_data_t *out, size_t block_size)
 {
 	ASSERT3U(ctx->gcm_processed_data_len, ==, ctx->gcm_pt_buf_len);
 	ASSERT3U(block_size, ==, 16);
 
 	size_t chunk_size = (size_t)GCM_CHUNK_SIZE_READ;
 	size_t pt_len = ctx->gcm_processed_data_len - ctx->gcm_tag_len;
 	uint8_t *datap = ctx->gcm_pt_buf;
 	const aes_key_t *key = ((aes_key_t *)ctx->gcm_keysched);
 	uint32_t *cb = (uint32_t *)ctx->gcm_cb;
 	uint64_t *ghash = ctx->gcm_ghash;
 	uint32_t *tmp = (uint32_t *)ctx->gcm_tmp;
 	int rv = CRYPTO_SUCCESS;
 	size_t bleft, done;
 
 	/*
 	 * Decrypt in chunks of gcm_avx_chunk_size, which is asserted to be
 	 * greater or equal than GCM_AVX_MIN_ENCRYPT_BYTES, and a multiple of
 	 * GCM_AVX_MIN_DECRYPT_BYTES.
 	 */
 	for (bleft = pt_len; bleft >= chunk_size; bleft -= chunk_size) {
 		kfpu_begin();
 		done = aesni_gcm_decrypt(datap, datap, chunk_size,
 		    (const void *)key, ctx->gcm_cb, ghash);
 		clear_fpu_regs();
 		kfpu_end();
 		if (done != chunk_size) {
 			return (CRYPTO_FAILED);
 		}
 		datap += done;
 	}
 	/* Decrypt remainder, which is less than chunk size, in one go. */
 	kfpu_begin();
 	if (bleft >= GCM_AVX_MIN_DECRYPT_BYTES) {
 		done = aesni_gcm_decrypt(datap, datap, bleft,
 		    (const void *)key, ctx->gcm_cb, ghash);
 		if (done == 0) {
 			clear_fpu_regs();
 			kfpu_end();
 			return (CRYPTO_FAILED);
 		}
 		datap += done;
 		bleft -= done;
 	}
 	ASSERT(bleft < GCM_AVX_MIN_DECRYPT_BYTES);
 
 	/*
 	 * Now less than GCM_AVX_MIN_DECRYPT_BYTES bytes remain,
 	 * decrypt them block by block.
 	 */
 	while (bleft > 0) {
 		/* Incomplete last block. */
 		if (bleft < block_size) {
 			uint8_t *lastb = (uint8_t *)ctx->gcm_remainder;
 
 			memset(lastb, 0, block_size);
 			memcpy(lastb, datap, bleft);
 			/* The GCM processing. */
 			GHASH_AVX(ctx, lastb, block_size);
 			aes_encrypt_intel(key->encr_ks.ks32, key->nr, cb, tmp);
 			for (size_t i = 0; i < bleft; i++) {
 				datap[i] = lastb[i] ^ ((uint8_t *)tmp)[i];
 			}
 			break;
 		}
 		/* The GCM processing. */
 		GHASH_AVX(ctx, datap, block_size);
 		aes_encrypt_intel(key->encr_ks.ks32, key->nr, cb, tmp);
 		gcm_xor_avx((uint8_t *)tmp, datap);
 		gcm_incr_counter_block(ctx);
 
 		datap += block_size;
 		bleft -= block_size;
 	}
 	if (rv != CRYPTO_SUCCESS) {
 		clear_fpu_regs();
 		kfpu_end();
 		return (rv);
 	}
 	/* Decryption done, finish the tag. */
 	ctx->gcm_len_a_len_c[1] = htonll(CRYPTO_BYTES2BITS(pt_len));
 	GHASH_AVX(ctx, (uint8_t *)ctx->gcm_len_a_len_c, block_size);
 	aes_encrypt_intel(key->encr_ks.ks32, key->nr, (uint32_t *)ctx->gcm_J0,
 	    (uint32_t *)ctx->gcm_J0);
 
 	gcm_xor_avx((uint8_t *)ctx->gcm_J0, (uint8_t *)ghash);
 
 	/* We are done with the FPU, restore its state. */
 	clear_fpu_regs();
 	kfpu_end();
 
 	/* Compare the input authentication tag with what we calculated. */
 	if (memcmp(&ctx->gcm_pt_buf[pt_len], ghash, ctx->gcm_tag_len)) {
 		/* They don't match. */
 		return (CRYPTO_INVALID_MAC);
 	}
 	rv = crypto_put_output_data(ctx->gcm_pt_buf, out, pt_len);
 	if (rv != CRYPTO_SUCCESS) {
 		return (rv);
 	}
 	out->cd_offset += pt_len;
 	gcm_clear_ctx(ctx);
 	return (CRYPTO_SUCCESS);
 }
 
 /*
  * Initialize the GCM params H, Htabtle and the counter block. Save the
  * initial counter block.
  */
 static int
 gcm_init_avx(gcm_ctx_t *ctx, unsigned char *iv, size_t iv_len,
     unsigned char *auth_data, size_t auth_data_len, size_t block_size)
 {
 	uint8_t *cb = (uint8_t *)ctx->gcm_cb;
 	uint64_t *H = ctx->gcm_H;
 	const void *keysched = ((aes_key_t *)ctx->gcm_keysched)->encr_ks.ks32;
 	int aes_rounds = ((aes_key_t *)ctx->gcm_keysched)->nr;
 	uint8_t *datap = auth_data;
 	size_t chunk_size = (size_t)GCM_CHUNK_SIZE_READ;
 	size_t bleft;
 
 	ASSERT(block_size == GCM_BLOCK_LEN);
 
 	/* Init H (encrypt zero block) and create the initial counter block. */
 	memset(ctx->gcm_ghash, 0, sizeof (ctx->gcm_ghash));
 	memset(H, 0, sizeof (ctx->gcm_H));
 	kfpu_begin();
 	aes_encrypt_intel(keysched, aes_rounds,
 	    (const uint32_t *)H, (uint32_t *)H);
 
 	gcm_init_htab_avx(ctx->gcm_Htable, H);
 
 	if (iv_len == 12) {
 		memcpy(cb, iv, 12);
 		cb[12] = 0;
 		cb[13] = 0;
 		cb[14] = 0;
 		cb[15] = 1;
 		/* We need the ICB later. */
 		memcpy(ctx->gcm_J0, cb, sizeof (ctx->gcm_J0));
 	} else {
 		/*
 		 * Most consumers use 12 byte IVs, so it's OK to use the
 		 * original routines for other IV sizes, just avoid nesting
 		 * kfpu_begin calls.
 		 */
 		clear_fpu_regs();
 		kfpu_end();
 		gcm_format_initial_blocks(iv, iv_len, ctx, block_size,
 		    aes_copy_block, aes_xor_block);
 		kfpu_begin();
 	}
 
 	/* Openssl post increments the counter, adjust for that. */
 	gcm_incr_counter_block(ctx);
 
 	/* Ghash AAD in chunk_size blocks. */
 	for (bleft = auth_data_len; bleft >= chunk_size; bleft -= chunk_size) {
 		GHASH_AVX(ctx, datap, chunk_size);
 		datap += chunk_size;
 		clear_fpu_regs();
 		kfpu_end();
 		kfpu_begin();
 	}
 	/* Ghash the remainder and handle possible incomplete GCM block. */
 	if (bleft > 0) {
 		size_t incomp = bleft % block_size;
 
 		bleft -= incomp;
 		if (bleft > 0) {
 			GHASH_AVX(ctx, datap, bleft);
 			datap += bleft;
 		}
 		if (incomp > 0) {
 			/* Zero pad and hash incomplete last block. */
 			uint8_t *authp = (uint8_t *)ctx->gcm_tmp;
 
 			memset(authp, 0, block_size);
 			memcpy(authp, datap, incomp);
 			GHASH_AVX(ctx, authp, block_size);
 		}
 	}
 	clear_fpu_regs();
 	kfpu_end();
 	return (CRYPTO_SUCCESS);
 }
 
 #if defined(_KERNEL)
 static int
 icp_gcm_avx_set_chunk_size(const char *buf, zfs_kernel_param_t *kp)
 {
 	unsigned long val;
 	char val_rounded[16];
 	int error = 0;
 
 	error = kstrtoul(buf, 0, &val);
 	if (error)
 		return (error);
 
 	val = (val / GCM_AVX_MIN_DECRYPT_BYTES) * GCM_AVX_MIN_DECRYPT_BYTES;
 
 	if (val < GCM_AVX_MIN_ENCRYPT_BYTES || val > GCM_AVX_MAX_CHUNK_SIZE)
 		return (-EINVAL);
 
 	snprintf(val_rounded, 16, "%u", (uint32_t)val);
 	error = param_set_uint(val_rounded, kp);
 	return (error);
 }
 
 module_param_call(icp_gcm_avx_chunk_size, icp_gcm_avx_set_chunk_size,
     param_get_uint, &gcm_avx_chunk_size, 0644);
 
 MODULE_PARM_DESC(icp_gcm_avx_chunk_size,
 	"How many bytes to process while owning the FPU");
 
 #endif /* defined(__KERNEL) */
 #endif /* ifdef CAN_USE_GCM_ASM */
diff --git a/module/icp/api/kcf_ctxops.c b/module/icp/api/kcf_ctxops.c
index 4fa281676b81..b8cd67ea7f67 100644
--- a/module/icp/api/kcf_ctxops.c
+++ b/module/icp/api/kcf_ctxops.c
@@ -1,149 +1,149 @@
 /*
  * CDDL HEADER START
  *
  * The contents of this file are subject to the terms of the
  * Common Development and Distribution License (the "License").
  * You may not use this file except in compliance with the License.
  *
  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
  * or https://opensource.org/licenses/CDDL-1.0.
  * See the License for the specific language governing permissions
  * and limitations under the License.
  *
  * When distributing Covered Code, include this CDDL HEADER in each
  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  * If applicable, add the following below this CDDL HEADER, with the
  * fields enclosed by brackets "[]" replaced with your own identifying
  * information: Portions Copyright [yyyy] [name of copyright owner]
  *
  * CDDL HEADER END
  */
 /*
  * Copyright 2007 Sun Microsystems, Inc.  All rights reserved.
  * Use is subject to license terms.
  */
 
 #include <sys/zfs_context.h>
 #include <sys/crypto/common.h>
 #include <sys/crypto/impl.h>
 #include <sys/crypto/api.h>
 #include <sys/crypto/spi.h>
 #include <sys/crypto/sched_impl.h>
 
 /*
  * Crypto contexts manipulation routines
  */
 
 /*
  * crypto_create_ctx_template()
  *
  * Arguments:
  *
  *	mech:	crypto_mechanism_t pointer.
  *		mech_type is a valid value previously returned by
  *		crypto_mech2id();
  *		When the mech's parameter is not NULL, its definition depends
  *		on the standard definition of the mechanism.
  *	key:	pointer to a crypto_key_t structure.
  *	ptmpl:	a storage for the opaque crypto_ctx_template_t, allocated and
  *		initialized by the software provider this routine is
  *		dispatched to.
  *
  * Description:
  *	Redirects the call to the software provider of the specified
  *	mechanism. That provider will allocate and pre-compute/pre-expand
  *	the context template, reusable by later calls to crypto_xxx_init().
  *	The size and address of that provider context template are stored
  *	in an internal structure, kcf_ctx_template_t. The address of that
  *	structure is given back to the caller in *ptmpl.
  *
  * Context:
  *	Process or interrupt.
  *
  * Returns:
  *	CRYPTO_SUCCESS when the context template is successfully created.
  *	CRYPTO_HOST_MEMORY: mem alloc failure
  *	CRYPTO_ARGUMENTS_BAD: NULL storage for the ctx template.
  *	RYPTO_MECHANISM_INVALID: invalid mechanism 'mech'.
  */
 int
 crypto_create_ctx_template(crypto_mechanism_t *mech, crypto_key_t *key,
     crypto_ctx_template_t *ptmpl)
 {
 	int error;
 	kcf_mech_entry_t *me;
 	kcf_provider_desc_t *pd;
 	kcf_ctx_template_t *ctx_tmpl;
 	crypto_mechanism_t prov_mech;
 
 	/* A few args validation */
 
 	if (ptmpl == NULL)
 		return (CRYPTO_ARGUMENTS_BAD);
 
 	if (mech == NULL)
 		return (CRYPTO_MECHANISM_INVALID);
 
 	error = kcf_get_sw_prov(mech->cm_type, &pd, &me, B_TRUE);
 	if (error != CRYPTO_SUCCESS)
 		return (error);
 
-	if ((ctx_tmpl = (kcf_ctx_template_t *)kmem_alloc(
+	if ((ctx_tmpl = kmem_alloc(
 	    sizeof (kcf_ctx_template_t), KM_SLEEP)) == NULL) {
 		KCF_PROV_REFRELE(pd);
 		return (CRYPTO_HOST_MEMORY);
 	}
 
 	/* Pass a mechtype that the provider understands */
 	prov_mech.cm_type = KCF_TO_PROV_MECHNUM(pd, mech->cm_type);
 	prov_mech.cm_param = mech->cm_param;
 	prov_mech.cm_param_len = mech->cm_param_len;
 
 	error = KCF_PROV_CREATE_CTX_TEMPLATE(pd, &prov_mech, key,
 	    &(ctx_tmpl->ct_prov_tmpl), &(ctx_tmpl->ct_size));
 
 	if (error == CRYPTO_SUCCESS) {
 		*ptmpl = ctx_tmpl;
 	} else {
 		kmem_free(ctx_tmpl, sizeof (kcf_ctx_template_t));
 	}
 	KCF_PROV_REFRELE(pd);
 
 	return (error);
 }
 
 /*
  * crypto_destroy_ctx_template()
  *
  * Arguments:
  *
  *	tmpl:	an opaque crypto_ctx_template_t previously created by
  *		crypto_create_ctx_template()
  *
  * Description:
  *	Frees the embedded crypto_spi_ctx_template_t, then the
  *	kcf_ctx_template_t.
  *
  * Context:
  *	Process or interrupt.
  *
  */
 void
 crypto_destroy_ctx_template(crypto_ctx_template_t tmpl)
 {
 	kcf_ctx_template_t *ctx_tmpl = (kcf_ctx_template_t *)tmpl;
 
 	if (ctx_tmpl == NULL)
 		return;
 
 	ASSERT(ctx_tmpl->ct_prov_tmpl != NULL);
 
 	memset(ctx_tmpl->ct_prov_tmpl, 0, ctx_tmpl->ct_size);
 	kmem_free(ctx_tmpl->ct_prov_tmpl, ctx_tmpl->ct_size);
 	kmem_free(ctx_tmpl, sizeof (kcf_ctx_template_t));
 }
 
 #if defined(_KERNEL)
 EXPORT_SYMBOL(crypto_create_ctx_template);
 EXPORT_SYMBOL(crypto_destroy_ctx_template);
 #endif
diff --git a/module/os/freebsd/spl/callb.c b/module/os/freebsd/spl/callb.c
index 47f3ccc0c7fa..2bfd4ea169bb 100644
--- a/module/os/freebsd/spl/callb.c
+++ b/module/os/freebsd/spl/callb.c
@@ -1,372 +1,372 @@
 /*
  * CDDL HEADER START
  *
  * The contents of this file are subject to the terms of the
  * Common Development and Distribution License (the "License").
  * You may not use this file except in compliance with the License.
  *
  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
  * or https://opensource.org/licenses/CDDL-1.0.
  * See the License for the specific language governing permissions
  * and limitations under the License.
  *
  * When distributing Covered Code, include this CDDL HEADER in each
  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  * If applicable, add the following below this CDDL HEADER, with the
  * fields enclosed by brackets "[]" replaced with your own identifying
  * information: Portions Copyright [yyyy] [name of copyright owner]
  *
  * CDDL HEADER END
  */
 /*
  * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
  * Use is subject to license terms.
  */
 
 #include <sys/types.h>
 #include <sys/param.h>
 #include <sys/time.h>
 #include <sys/sysmacros.h>
 #include <sys/systm.h>
 #include <sys/proc.h>
 #include <sys/mutex.h>
 #include <sys/condvar.h>
 #include <sys/callb.h>
 #include <sys/kmem.h>
 #include <sys/cmn_err.h>
 #include <sys/debug.h>
 #include <sys/kobj.h>
 #include <sys/systm.h>	/* for delay() */
 #include <sys/taskq.h>  /* For TASKQ_NAMELEN */
 #include <sys/kernel.h>
 
 #define	CB_MAXNAME	TASKQ_NAMELEN
 
 /*
  * The callb mechanism provides generic event scheduling/echoing.
  * A callb function is registered and called on behalf of the event.
  */
 typedef struct callb {
 	struct callb	*c_next; 	/* next in class or on freelist */
 	kthread_id_t	c_thread;	/* ptr to caller's thread struct */
 	char		c_flag;		/* info about the callb state */
 	uchar_t		c_class;	/* this callb's class */
 	kcondvar_t	c_done_cv;	/* signal callb completion */
 	boolean_t	(*c_func)(void *, int);
 					/* cb function: returns true if ok */
 	void		*c_arg;		/* arg to c_func */
 	char		c_name[CB_MAXNAME+1]; /* debug:max func name length */
 } callb_t;
 
 /*
  * callb c_flag bitmap definitions
  */
 #define	CALLB_FREE		0x0
 #define	CALLB_TAKEN		0x1
 #define	CALLB_EXECUTING		0x2
 
 /*
  * Basic structure for a callb table.
  * All callbs are organized into different class groups described
  * by ct_class array.
  * The callbs within a class are single-linked and normally run by a
  * serial execution.
  */
 typedef struct callb_table {
 	kmutex_t ct_lock;		/* protect all callb states */
 	callb_t	*ct_freelist; 		/* free callb structures */
 	boolean_t ct_busy;		/* B_TRUE prevents additions */
 	kcondvar_t ct_busy_cv;		/* to wait for not busy    */
 	int	ct_ncallb; 		/* num of callbs allocated */
 	callb_t	*ct_first_cb[NCBCLASS];	/* ptr to 1st callb in a class */
 } callb_table_t;
 
 int callb_timeout_sec = CPR_KTHREAD_TIMEOUT_SEC;
 
 static callb_id_t callb_add_common(boolean_t (*)(void *, int),
     void *, int, char *, kthread_id_t);
 
 static callb_table_t callb_table;	/* system level callback table */
 static callb_table_t *ct = &callb_table;
 static kmutex_t	callb_safe_mutex;
 callb_cpr_t	callb_cprinfo_safe = {
 	&callb_safe_mutex, CALLB_CPR_ALWAYS_SAFE, 0, {0, 0} };
 
 /*
  * Init all callb tables in the system.
  */
 static void
 callb_init(void *dummy __unused)
 {
 	callb_table.ct_busy = B_FALSE;	/* mark table open for additions */
 	mutex_init(&callb_safe_mutex, NULL, MUTEX_DEFAULT, NULL);
 	mutex_init(&callb_table.ct_lock, NULL, MUTEX_DEFAULT, NULL);
 }
 
 static void
 callb_fini(void *dummy __unused)
 {
 	callb_t *cp;
 	int i;
 
 	mutex_enter(&ct->ct_lock);
 	for (i = 0; i < 16; i++) {
 		while ((cp = ct->ct_freelist) != NULL) {
 			ct->ct_freelist = cp->c_next;
 			ct->ct_ncallb--;
 			kmem_free(cp, sizeof (callb_t));
 		}
 		if (ct->ct_ncallb == 0)
 			break;
 		/* Not all callbacks finished, waiting for the rest. */
 		mutex_exit(&ct->ct_lock);
 		tsleep(ct, 0, "callb", hz / 4);
 		mutex_enter(&ct->ct_lock);
 	}
 	if (ct->ct_ncallb > 0)
 		printf("%s: Leaked %d callbacks!\n", __func__, ct->ct_ncallb);
 	mutex_exit(&ct->ct_lock);
 	mutex_destroy(&callb_safe_mutex);
 	mutex_destroy(&callb_table.ct_lock);
 }
 
 /*
  * callout_add() is called to register func() be called later.
  */
 static callb_id_t
 callb_add_common(boolean_t (*func)(void *arg, int code),
     void *arg, int class, char *name, kthread_id_t t)
 {
 	callb_t *cp;
 
 	ASSERT3S(class, <, NCBCLASS);
 
 	mutex_enter(&ct->ct_lock);
 	while (ct->ct_busy)
 		cv_wait(&ct->ct_busy_cv, &ct->ct_lock);
 	if ((cp = ct->ct_freelist) == NULL) {
 		ct->ct_ncallb++;
-		cp = (callb_t *)kmem_zalloc(sizeof (callb_t), KM_SLEEP);
+		cp = kmem_zalloc(sizeof (callb_t), KM_SLEEP);
 	}
 	ct->ct_freelist = cp->c_next;
 	cp->c_thread = t;
 	cp->c_func = func;
 	cp->c_arg = arg;
 	cp->c_class = (uchar_t)class;
 	cp->c_flag |= CALLB_TAKEN;
 #ifdef ZFS_DEBUG
 	if (strlen(name) > CB_MAXNAME)
 		cmn_err(CE_WARN, "callb_add: name of callback function '%s' "
 		    "too long -- truncated to %d chars",
 		    name, CB_MAXNAME);
 #endif
 	(void) strlcpy(cp->c_name, name, sizeof (cp->c_name));
 
 	/*
 	 * Insert the new callb at the head of its class list.
 	 */
 	cp->c_next = ct->ct_first_cb[class];
 	ct->ct_first_cb[class] = cp;
 
 	mutex_exit(&ct->ct_lock);
 	return ((callb_id_t)cp);
 }
 
 /*
  * The default function to add an entry to the callback table.  Since
  * it uses curthread as the thread identifier to store in the table,
  * it should be used for the normal case of a thread which is calling
  * to add ITSELF to the table.
  */
 callb_id_t
 callb_add(boolean_t (*func)(void *arg, int code),
     void *arg, int class, char *name)
 {
 	return (callb_add_common(func, arg, class, name, curthread));
 }
 
 /*
  * A special version of callb_add() above for use by threads which
  * might be adding an entry to the table on behalf of some other
  * thread (for example, one which is constructed but not yet running).
  * In this version the thread id is an argument.
  */
 callb_id_t
 callb_add_thread(boolean_t (*func)(void *arg, int code),
     void *arg, int class, char *name, kthread_id_t t)
 {
 	return (callb_add_common(func, arg, class, name, t));
 }
 
 /*
  * callout_delete() is called to remove an entry identified by id
  * that was originally placed there by a call to callout_add().
  * return -1 if fail to delete a callb entry otherwise return 0.
  */
 int
 callb_delete(callb_id_t id)
 {
 	callb_t **pp;
 	callb_t *me = (callb_t *)id;
 
 	mutex_enter(&ct->ct_lock);
 
 	for (;;) {
 		pp = &ct->ct_first_cb[me->c_class];
 		while (*pp != NULL && *pp != me)
 			pp = &(*pp)->c_next;
 
 #ifdef ZFS_DEBUG
 		if (*pp != me) {
 			cmn_err(CE_WARN, "callb delete bogus entry 0x%p",
 			    (void *)me);
 			mutex_exit(&ct->ct_lock);
 			return (-1);
 		}
 #endif /* DEBUG */
 
 		/*
 		 * It is not allowed to delete a callb in the middle of
 		 * executing otherwise, the callb_execute() will be confused.
 		 */
 		if (!(me->c_flag & CALLB_EXECUTING))
 			break;
 
 		cv_wait(&me->c_done_cv, &ct->ct_lock);
 	}
 	/* relink the class list */
 	*pp = me->c_next;
 
 	/* clean up myself and return the free callb to the head of freelist */
 	me->c_flag = CALLB_FREE;
 	me->c_next = ct->ct_freelist;
 	ct->ct_freelist = me;
 
 	mutex_exit(&ct->ct_lock);
 	return (0);
 }
 
 /*
  * class:	indicates to execute all callbs in the same class;
  * code:	optional argument for the callb functions.
  * return:	 = 0: success
  *		!= 0: ptr to string supplied when callback was registered
  */
 void *
 callb_execute_class(int class, int code)
 {
 	callb_t *cp;
 	void *ret = NULL;
 
 	ASSERT3S(class, <, NCBCLASS);
 
 	mutex_enter(&ct->ct_lock);
 
 	for (cp = ct->ct_first_cb[class];
 	    cp != NULL && ret == 0; cp = cp->c_next) {
 		while (cp->c_flag & CALLB_EXECUTING)
 			cv_wait(&cp->c_done_cv, &ct->ct_lock);
 		/*
 		 * cont if the callb is deleted while we're sleeping
 		 */
 		if (cp->c_flag == CALLB_FREE)
 			continue;
 		cp->c_flag |= CALLB_EXECUTING;
 
 #ifdef CALLB_DEBUG
 		printf("callb_execute: name=%s func=%p arg=%p\n",
 		    cp->c_name, (void *)cp->c_func, (void *)cp->c_arg);
 #endif /* CALLB_DEBUG */
 
 		mutex_exit(&ct->ct_lock);
 		/* If callback function fails, pass back client's name */
 		if (!(*cp->c_func)(cp->c_arg, code))
 			ret = cp->c_name;
 		mutex_enter(&ct->ct_lock);
 
 		cp->c_flag &= ~CALLB_EXECUTING;
 		cv_broadcast(&cp->c_done_cv);
 	}
 	mutex_exit(&ct->ct_lock);
 	return (ret);
 }
 
 /*
  * callers make sure no recursive entries to this func.
  * dp->cc_lockp is registered by callb_add to protect callb_cpr_t structure.
  *
  * When calling to stop a kernel thread (code == CB_CODE_CPR_CHKPT) we
  * use a cv_timedwait() in case the kernel thread is blocked.
  *
  * Note that this is a generic callback handler for daemon CPR and
  * should NOT be changed to accommodate any specific requirement in a daemon.
  * Individual daemons that require changes to the handler shall write
  * callback routines in their own daemon modules.
  */
 boolean_t
 callb_generic_cpr(void *arg, int code)
 {
 	callb_cpr_t *cp = (callb_cpr_t *)arg;
 	clock_t ret = 0;			/* assume success */
 
 	mutex_enter(cp->cc_lockp);
 
 	switch (code) {
 	case CB_CODE_CPR_CHKPT:
 		cp->cc_events |= CALLB_CPR_START;
 #ifdef CPR_NOT_THREAD_SAFE
 		while (!(cp->cc_events & CALLB_CPR_SAFE))
 			/* cv_timedwait() returns -1 if it times out. */
 			if ((ret = cv_reltimedwait(&cp->cc_callb_cv,
 			    cp->cc_lockp, (callb_timeout_sec * hz),
 			    TR_CLOCK_TICK)) == -1)
 				break;
 #endif
 		break;
 
 	case CB_CODE_CPR_RESUME:
 		cp->cc_events &= ~CALLB_CPR_START;
 		cv_signal(&cp->cc_stop_cv);
 		break;
 	}
 	mutex_exit(cp->cc_lockp);
 	return (ret != -1);
 }
 
 /*
  * The generic callback function associated with kernel threads which
  * are always considered safe.
  */
 boolean_t
 callb_generic_cpr_safe(void *arg, int code)
 {
 	(void) arg, (void) code;
 	return (B_TRUE);
 }
 /*
  * Prevent additions to callback table.
  */
 void
 callb_lock_table(void)
 {
 	mutex_enter(&ct->ct_lock);
 	ASSERT(!ct->ct_busy);
 	ct->ct_busy = B_TRUE;
 	mutex_exit(&ct->ct_lock);
 }
 
 /*
  * Allow additions to callback table.
  */
 void
 callb_unlock_table(void)
 {
 	mutex_enter(&ct->ct_lock);
 	ASSERT(ct->ct_busy);
 	ct->ct_busy = B_FALSE;
 	cv_broadcast(&ct->ct_busy_cv);
 	mutex_exit(&ct->ct_lock);
 }
 
 SYSINIT(sol_callb, SI_SUB_DRIVERS, SI_ORDER_FIRST, callb_init, NULL);
 SYSUNINIT(sol_callb, SI_SUB_DRIVERS, SI_ORDER_FIRST, callb_fini, NULL);
diff --git a/module/os/linux/spl/spl-kmem-cache.c b/module/os/linux/spl/spl-kmem-cache.c
index edd04783b363..4cceeb8bdffd 100644
--- a/module/os/linux/spl/spl-kmem-cache.c
+++ b/module/os/linux/spl/spl-kmem-cache.c
@@ -1,1465 +1,1465 @@
 /*
  *  Copyright (C) 2007-2010 Lawrence Livermore National Security, LLC.
  *  Copyright (C) 2007 The Regents of the University of California.
  *  Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER).
  *  Written by Brian Behlendorf <behlendorf1@llnl.gov>.
  *  UCRL-CODE-235197
  *
  *  This file is part of the SPL, Solaris Porting Layer.
  *
  *  The SPL is free software; you can redistribute it and/or modify it
  *  under the terms of the GNU General Public License as published by the
  *  Free Software Foundation; either version 2 of the License, or (at your
  *  option) any later version.
  *
  *  The SPL is distributed in the hope that it will be useful, but WITHOUT
  *  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  *  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  *  for more details.
  *
  *  You should have received a copy of the GNU General Public License along
  *  with the SPL.  If not, see <http://www.gnu.org/licenses/>.
  */
 
 #include <linux/percpu_compat.h>
 #include <sys/kmem.h>
 #include <sys/kmem_cache.h>
 #include <sys/taskq.h>
 #include <sys/timer.h>
 #include <sys/vmem.h>
 #include <sys/wait.h>
 #include <linux/slab.h>
 #include <linux/swap.h>
 #include <linux/prefetch.h>
 
 /*
  * Within the scope of spl-kmem.c file the kmem_cache_* definitions
  * are removed to allow access to the real Linux slab allocator.
  */
 #undef kmem_cache_destroy
 #undef kmem_cache_create
 #undef kmem_cache_alloc
 #undef kmem_cache_free
 
 
 /*
  * Linux 3.16 replaced smp_mb__{before,after}_{atomic,clear}_{dec,inc,bit}()
  * with smp_mb__{before,after}_atomic() because they were redundant. This is
  * only used inside our SLAB allocator, so we implement an internal wrapper
  * here to give us smp_mb__{before,after}_atomic() on older kernels.
  */
 #ifndef smp_mb__before_atomic
 #define	smp_mb__before_atomic(x) smp_mb__before_clear_bit(x)
 #endif
 
 #ifndef smp_mb__after_atomic
 #define	smp_mb__after_atomic(x) smp_mb__after_clear_bit(x)
 #endif
 
 /* BEGIN CSTYLED */
 /*
  * Cache magazines are an optimization designed to minimize the cost of
  * allocating memory.  They do this by keeping a per-cpu cache of recently
  * freed objects, which can then be reallocated without taking a lock. This
  * can improve performance on highly contended caches.  However, because
  * objects in magazines will prevent otherwise empty slabs from being
  * immediately released this may not be ideal for low memory machines.
  *
  * For this reason spl_kmem_cache_magazine_size can be used to set a maximum
  * magazine size.  When this value is set to 0 the magazine size will be
  * automatically determined based on the object size.  Otherwise magazines
  * will be limited to 2-256 objects per magazine (i.e per cpu).  Magazines
  * may never be entirely disabled in this implementation.
  */
 static unsigned int spl_kmem_cache_magazine_size = 0;
 module_param(spl_kmem_cache_magazine_size, uint, 0444);
 MODULE_PARM_DESC(spl_kmem_cache_magazine_size,
 	"Default magazine size (2-256), set automatically (0)");
 
 /*
  * The default behavior is to report the number of objects remaining in the
  * cache.  This allows the Linux VM to repeatedly reclaim objects from the
  * cache when memory is low satisfy other memory allocations.  Alternately,
  * setting this value to KMC_RECLAIM_ONCE limits how aggressively the cache
  * is reclaimed.  This may increase the likelihood of out of memory events.
  */
 static unsigned int spl_kmem_cache_reclaim = 0 /* KMC_RECLAIM_ONCE */;
 module_param(spl_kmem_cache_reclaim, uint, 0644);
 MODULE_PARM_DESC(spl_kmem_cache_reclaim, "Single reclaim pass (0x1)");
 
 static unsigned int spl_kmem_cache_obj_per_slab = SPL_KMEM_CACHE_OBJ_PER_SLAB;
 module_param(spl_kmem_cache_obj_per_slab, uint, 0644);
 MODULE_PARM_DESC(spl_kmem_cache_obj_per_slab, "Number of objects per slab");
 
 static unsigned int spl_kmem_cache_max_size = SPL_KMEM_CACHE_MAX_SIZE;
 module_param(spl_kmem_cache_max_size, uint, 0644);
 MODULE_PARM_DESC(spl_kmem_cache_max_size, "Maximum size of slab in MB");
 
 /*
  * For small objects the Linux slab allocator should be used to make the most
  * efficient use of the memory.  However, large objects are not supported by
  * the Linux slab and therefore the SPL implementation is preferred.  A cutoff
  * of 16K was determined to be optimal for architectures using 4K pages and
  * to also work well on architecutres using larger 64K page sizes.
  */
 static unsigned int spl_kmem_cache_slab_limit = 16384;
 module_param(spl_kmem_cache_slab_limit, uint, 0644);
 MODULE_PARM_DESC(spl_kmem_cache_slab_limit,
 	"Objects less than N bytes use the Linux slab");
 
 /*
  * The number of threads available to allocate new slabs for caches.  This
  * should not need to be tuned but it is available for performance analysis.
  */
 static unsigned int spl_kmem_cache_kmem_threads = 4;
 module_param(spl_kmem_cache_kmem_threads, uint, 0444);
 MODULE_PARM_DESC(spl_kmem_cache_kmem_threads,
 	"Number of spl_kmem_cache threads");
 /* END CSTYLED */
 
 /*
  * Slab allocation interfaces
  *
  * While the Linux slab implementation was inspired by the Solaris
  * implementation I cannot use it to emulate the Solaris APIs.  I
  * require two features which are not provided by the Linux slab.
  *
  * 1) Constructors AND destructors.  Recent versions of the Linux
  *    kernel have removed support for destructors.  This is a deal
  *    breaker for the SPL which contains particularly expensive
  *    initializers for mutex's, condition variables, etc.  We also
  *    require a minimal level of cleanup for these data types unlike
  *    many Linux data types which do need to be explicitly destroyed.
  *
  * 2) Virtual address space backed slab.  Callers of the Solaris slab
  *    expect it to work well for both small are very large allocations.
  *    Because of memory fragmentation the Linux slab which is backed
  *    by kmalloc'ed memory performs very badly when confronted with
  *    large numbers of large allocations.  Basing the slab on the
  *    virtual address space removes the need for contiguous pages
  *    and greatly improve performance for large allocations.
  *
  * For these reasons, the SPL has its own slab implementation with
  * the needed features.  It is not as highly optimized as either the
  * Solaris or Linux slabs, but it should get me most of what is
  * needed until it can be optimized or obsoleted by another approach.
  *
  * One serious concern I do have about this method is the relatively
  * small virtual address space on 32bit arches.  This will seriously
  * constrain the size of the slab caches and their performance.
  */
 
 struct list_head spl_kmem_cache_list;   /* List of caches */
 struct rw_semaphore spl_kmem_cache_sem; /* Cache list lock */
 static taskq_t *spl_kmem_cache_taskq;   /* Task queue for aging / reclaim */
 
 static void spl_cache_shrink(spl_kmem_cache_t *skc, void *obj);
 
 static void *
 kv_alloc(spl_kmem_cache_t *skc, int size, int flags)
 {
 	gfp_t lflags = kmem_flags_convert(flags);
 	void *ptr;
 
 	ptr = spl_vmalloc(size, lflags | __GFP_HIGHMEM);
 
 	/* Resulting allocated memory will be page aligned */
 	ASSERT(IS_P2ALIGNED(ptr, PAGE_SIZE));
 
 	return (ptr);
 }
 
 static void
 kv_free(spl_kmem_cache_t *skc, void *ptr, int size)
 {
 	ASSERT(IS_P2ALIGNED(ptr, PAGE_SIZE));
 
 	/*
 	 * The Linux direct reclaim path uses this out of band value to
 	 * determine if forward progress is being made.  Normally this is
 	 * incremented by kmem_freepages() which is part of the various
 	 * Linux slab implementations.  However, since we are using none
 	 * of that infrastructure we are responsible for incrementing it.
 	 */
 	if (current->reclaim_state)
 		current->reclaim_state->reclaimed_slab += size >> PAGE_SHIFT;
 
 	vfree(ptr);
 }
 
 /*
  * Required space for each aligned sks.
  */
 static inline uint32_t
 spl_sks_size(spl_kmem_cache_t *skc)
 {
 	return (P2ROUNDUP_TYPED(sizeof (spl_kmem_slab_t),
 	    skc->skc_obj_align, uint32_t));
 }
 
 /*
  * Required space for each aligned object.
  */
 static inline uint32_t
 spl_obj_size(spl_kmem_cache_t *skc)
 {
 	uint32_t align = skc->skc_obj_align;
 
 	return (P2ROUNDUP_TYPED(skc->skc_obj_size, align, uint32_t) +
 	    P2ROUNDUP_TYPED(sizeof (spl_kmem_obj_t), align, uint32_t));
 }
 
 uint64_t
 spl_kmem_cache_inuse(kmem_cache_t *cache)
 {
 	return (cache->skc_obj_total);
 }
 EXPORT_SYMBOL(spl_kmem_cache_inuse);
 
 uint64_t
 spl_kmem_cache_entry_size(kmem_cache_t *cache)
 {
 	return (cache->skc_obj_size);
 }
 EXPORT_SYMBOL(spl_kmem_cache_entry_size);
 
 /*
  * Lookup the spl_kmem_object_t for an object given that object.
  */
 static inline spl_kmem_obj_t *
 spl_sko_from_obj(spl_kmem_cache_t *skc, void *obj)
 {
 	return (obj + P2ROUNDUP_TYPED(skc->skc_obj_size,
 	    skc->skc_obj_align, uint32_t));
 }
 
 /*
  * It's important that we pack the spl_kmem_obj_t structure and the
  * actual objects in to one large address space to minimize the number
  * of calls to the allocator.  It is far better to do a few large
  * allocations and then subdivide it ourselves.  Now which allocator
  * we use requires balancing a few trade offs.
  *
  * For small objects we use kmem_alloc() because as long as you are
  * only requesting a small number of pages (ideally just one) its cheap.
  * However, when you start requesting multiple pages with kmem_alloc()
  * it gets increasingly expensive since it requires contiguous pages.
  * For this reason we shift to vmem_alloc() for slabs of large objects
  * which removes the need for contiguous pages.  We do not use
  * vmem_alloc() in all cases because there is significant locking
  * overhead in __get_vm_area_node().  This function takes a single
  * global lock when acquiring an available virtual address range which
  * serializes all vmem_alloc()'s for all slab caches.  Using slightly
  * different allocation functions for small and large objects should
  * give us the best of both worlds.
  *
  * +------------------------+
  * | spl_kmem_slab_t --+-+  |
  * | skc_obj_size    <-+ |  |
  * | spl_kmem_obj_t      |  |
  * | skc_obj_size    <---+  |
  * | spl_kmem_obj_t      |  |
  * | ...                 v  |
  * +------------------------+
  */
 static spl_kmem_slab_t *
 spl_slab_alloc(spl_kmem_cache_t *skc, int flags)
 {
 	spl_kmem_slab_t *sks;
 	void *base;
 	uint32_t obj_size;
 
 	base = kv_alloc(skc, skc->skc_slab_size, flags);
 	if (base == NULL)
 		return (NULL);
 
 	sks = (spl_kmem_slab_t *)base;
 	sks->sks_magic = SKS_MAGIC;
 	sks->sks_objs = skc->skc_slab_objs;
 	sks->sks_age = jiffies;
 	sks->sks_cache = skc;
 	INIT_LIST_HEAD(&sks->sks_list);
 	INIT_LIST_HEAD(&sks->sks_free_list);
 	sks->sks_ref = 0;
 	obj_size = spl_obj_size(skc);
 
 	for (int i = 0; i < sks->sks_objs; i++) {
 		void *obj = base + spl_sks_size(skc) + (i * obj_size);
 
 		ASSERT(IS_P2ALIGNED(obj, skc->skc_obj_align));
 		spl_kmem_obj_t *sko = spl_sko_from_obj(skc, obj);
 		sko->sko_addr = obj;
 		sko->sko_magic = SKO_MAGIC;
 		sko->sko_slab = sks;
 		INIT_LIST_HEAD(&sko->sko_list);
 		list_add_tail(&sko->sko_list, &sks->sks_free_list);
 	}
 
 	return (sks);
 }
 
 /*
  * Remove a slab from complete or partial list, it must be called with
  * the 'skc->skc_lock' held but the actual free must be performed
  * outside the lock to prevent deadlocking on vmem addresses.
  */
 static void
 spl_slab_free(spl_kmem_slab_t *sks,
     struct list_head *sks_list, struct list_head *sko_list)
 {
 	spl_kmem_cache_t *skc;
 
 	ASSERT(sks->sks_magic == SKS_MAGIC);
 	ASSERT(sks->sks_ref == 0);
 
 	skc = sks->sks_cache;
 	ASSERT(skc->skc_magic == SKC_MAGIC);
 
 	/*
 	 * Update slab/objects counters in the cache, then remove the
 	 * slab from the skc->skc_partial_list.  Finally add the slab
 	 * and all its objects in to the private work lists where the
 	 * destructors will be called and the memory freed to the system.
 	 */
 	skc->skc_obj_total -= sks->sks_objs;
 	skc->skc_slab_total--;
 	list_del(&sks->sks_list);
 	list_add(&sks->sks_list, sks_list);
 	list_splice_init(&sks->sks_free_list, sko_list);
 }
 
 /*
  * Reclaim empty slabs at the end of the partial list.
  */
 static void
 spl_slab_reclaim(spl_kmem_cache_t *skc)
 {
 	spl_kmem_slab_t *sks = NULL, *m = NULL;
 	spl_kmem_obj_t *sko = NULL, *n = NULL;
 	LIST_HEAD(sks_list);
 	LIST_HEAD(sko_list);
 
 	/*
 	 * Empty slabs and objects must be moved to a private list so they
 	 * can be safely freed outside the spin lock.  All empty slabs are
 	 * at the end of skc->skc_partial_list, therefore once a non-empty
 	 * slab is found we can stop scanning.
 	 */
 	spin_lock(&skc->skc_lock);
 	list_for_each_entry_safe_reverse(sks, m,
 	    &skc->skc_partial_list, sks_list) {
 
 		if (sks->sks_ref > 0)
 			break;
 
 		spl_slab_free(sks, &sks_list, &sko_list);
 	}
 	spin_unlock(&skc->skc_lock);
 
 	/*
 	 * The following two loops ensure all the object destructors are run,
 	 * and the slabs themselves are freed.  This is all done outside the
 	 * skc->skc_lock since this allows the destructor to sleep, and
 	 * allows us to perform a conditional reschedule when a freeing a
 	 * large number of objects and slabs back to the system.
 	 */
 
 	list_for_each_entry_safe(sko, n, &sko_list, sko_list) {
 		ASSERT(sko->sko_magic == SKO_MAGIC);
 	}
 
 	list_for_each_entry_safe(sks, m, &sks_list, sks_list) {
 		ASSERT(sks->sks_magic == SKS_MAGIC);
 		kv_free(skc, sks, skc->skc_slab_size);
 	}
 }
 
 static spl_kmem_emergency_t *
 spl_emergency_search(struct rb_root *root, void *obj)
 {
 	struct rb_node *node = root->rb_node;
 	spl_kmem_emergency_t *ske;
 	unsigned long address = (unsigned long)obj;
 
 	while (node) {
 		ske = container_of(node, spl_kmem_emergency_t, ske_node);
 
 		if (address < ske->ske_obj)
 			node = node->rb_left;
 		else if (address > ske->ske_obj)
 			node = node->rb_right;
 		else
 			return (ske);
 	}
 
 	return (NULL);
 }
 
 static int
 spl_emergency_insert(struct rb_root *root, spl_kmem_emergency_t *ske)
 {
 	struct rb_node **new = &(root->rb_node), *parent = NULL;
 	spl_kmem_emergency_t *ske_tmp;
 	unsigned long address = ske->ske_obj;
 
 	while (*new) {
 		ske_tmp = container_of(*new, spl_kmem_emergency_t, ske_node);
 
 		parent = *new;
 		if (address < ske_tmp->ske_obj)
 			new = &((*new)->rb_left);
 		else if (address > ske_tmp->ske_obj)
 			new = &((*new)->rb_right);
 		else
 			return (0);
 	}
 
 	rb_link_node(&ske->ske_node, parent, new);
 	rb_insert_color(&ske->ske_node, root);
 
 	return (1);
 }
 
 /*
  * Allocate a single emergency object and track it in a red black tree.
  */
 static int
 spl_emergency_alloc(spl_kmem_cache_t *skc, int flags, void **obj)
 {
 	gfp_t lflags = kmem_flags_convert(flags);
 	spl_kmem_emergency_t *ske;
 	int order = get_order(skc->skc_obj_size);
 	int empty;
 
 	/* Last chance use a partial slab if one now exists */
 	spin_lock(&skc->skc_lock);
 	empty = list_empty(&skc->skc_partial_list);
 	spin_unlock(&skc->skc_lock);
 	if (!empty)
 		return (-EEXIST);
 
 	ske = kmalloc(sizeof (*ske), lflags);
 	if (ske == NULL)
 		return (-ENOMEM);
 
 	ske->ske_obj = __get_free_pages(lflags, order);
 	if (ske->ske_obj == 0) {
 		kfree(ske);
 		return (-ENOMEM);
 	}
 
 	spin_lock(&skc->skc_lock);
 	empty = spl_emergency_insert(&skc->skc_emergency_tree, ske);
 	if (likely(empty)) {
 		skc->skc_obj_total++;
 		skc->skc_obj_emergency++;
 		if (skc->skc_obj_emergency > skc->skc_obj_emergency_max)
 			skc->skc_obj_emergency_max = skc->skc_obj_emergency;
 	}
 	spin_unlock(&skc->skc_lock);
 
 	if (unlikely(!empty)) {
 		free_pages(ske->ske_obj, order);
 		kfree(ske);
 		return (-EINVAL);
 	}
 
 	*obj = (void *)ske->ske_obj;
 
 	return (0);
 }
 
 /*
  * Locate the passed object in the red black tree and free it.
  */
 static int
 spl_emergency_free(spl_kmem_cache_t *skc, void *obj)
 {
 	spl_kmem_emergency_t *ske;
 	int order = get_order(skc->skc_obj_size);
 
 	spin_lock(&skc->skc_lock);
 	ske = spl_emergency_search(&skc->skc_emergency_tree, obj);
 	if (ske) {
 		rb_erase(&ske->ske_node, &skc->skc_emergency_tree);
 		skc->skc_obj_emergency--;
 		skc->skc_obj_total--;
 	}
 	spin_unlock(&skc->skc_lock);
 
 	if (ske == NULL)
 		return (-ENOENT);
 
 	free_pages(ske->ske_obj, order);
 	kfree(ske);
 
 	return (0);
 }
 
 /*
  * Release objects from the per-cpu magazine back to their slab.  The flush
  * argument contains the max number of entries to remove from the magazine.
  */
 static void
 spl_cache_flush(spl_kmem_cache_t *skc, spl_kmem_magazine_t *skm, int flush)
 {
 	spin_lock(&skc->skc_lock);
 
 	ASSERT(skc->skc_magic == SKC_MAGIC);
 	ASSERT(skm->skm_magic == SKM_MAGIC);
 
 	int count = MIN(flush, skm->skm_avail);
 	for (int i = 0; i < count; i++)
 		spl_cache_shrink(skc, skm->skm_objs[i]);
 
 	skm->skm_avail -= count;
 	memmove(skm->skm_objs, &(skm->skm_objs[count]),
 	    sizeof (void *) * skm->skm_avail);
 
 	spin_unlock(&skc->skc_lock);
 }
 
 /*
  * Size a slab based on the size of each aligned object plus spl_kmem_obj_t.
  * When on-slab we want to target spl_kmem_cache_obj_per_slab.  However,
  * for very small objects we may end up with more than this so as not
  * to waste space in the minimal allocation of a single page.
  */
 static int
 spl_slab_size(spl_kmem_cache_t *skc, uint32_t *objs, uint32_t *size)
 {
 	uint32_t sks_size, obj_size, max_size, tgt_size, tgt_objs;
 
 	sks_size = spl_sks_size(skc);
 	obj_size = spl_obj_size(skc);
 	max_size = (spl_kmem_cache_max_size * 1024 * 1024);
 	tgt_size = (spl_kmem_cache_obj_per_slab * obj_size + sks_size);
 
 	if (tgt_size <= max_size) {
 		tgt_objs = (tgt_size - sks_size) / obj_size;
 	} else {
 		tgt_objs = (max_size - sks_size) / obj_size;
 		tgt_size = (tgt_objs * obj_size) + sks_size;
 	}
 
 	if (tgt_objs == 0)
 		return (-ENOSPC);
 
 	*objs = tgt_objs;
 	*size = tgt_size;
 
 	return (0);
 }
 
 /*
  * Make a guess at reasonable per-cpu magazine size based on the size of
  * each object and the cost of caching N of them in each magazine.  Long
  * term this should really adapt based on an observed usage heuristic.
  */
 static int
 spl_magazine_size(spl_kmem_cache_t *skc)
 {
 	uint32_t obj_size = spl_obj_size(skc);
 	int size;
 
 	if (spl_kmem_cache_magazine_size > 0)
 		return (MAX(MIN(spl_kmem_cache_magazine_size, 256), 2));
 
 	/* Per-magazine sizes below assume a 4Kib page size */
 	if (obj_size > (PAGE_SIZE * 256))
 		size = 4;  /* Minimum 4Mib per-magazine */
 	else if (obj_size > (PAGE_SIZE * 32))
 		size = 16; /* Minimum 2Mib per-magazine */
 	else if (obj_size > (PAGE_SIZE))
 		size = 64; /* Minimum 256Kib per-magazine */
 	else if (obj_size > (PAGE_SIZE / 4))
 		size = 128; /* Minimum 128Kib per-magazine */
 	else
 		size = 256;
 
 	return (size);
 }
 
 /*
  * Allocate a per-cpu magazine to associate with a specific core.
  */
 static spl_kmem_magazine_t *
 spl_magazine_alloc(spl_kmem_cache_t *skc, int cpu)
 {
 	spl_kmem_magazine_t *skm;
 	int size = sizeof (spl_kmem_magazine_t) +
 	    sizeof (void *) * skc->skc_mag_size;
 
 	skm = kmalloc_node(size, GFP_KERNEL, cpu_to_node(cpu));
 	if (skm) {
 		skm->skm_magic = SKM_MAGIC;
 		skm->skm_avail = 0;
 		skm->skm_size = skc->skc_mag_size;
 		skm->skm_refill = skc->skc_mag_refill;
 		skm->skm_cache = skc;
 		skm->skm_cpu = cpu;
 	}
 
 	return (skm);
 }
 
 /*
  * Free a per-cpu magazine associated with a specific core.
  */
 static void
 spl_magazine_free(spl_kmem_magazine_t *skm)
 {
 	ASSERT(skm->skm_magic == SKM_MAGIC);
 	ASSERT(skm->skm_avail == 0);
 	kfree(skm);
 }
 
 /*
  * Create all pre-cpu magazines of reasonable sizes.
  */
 static int
 spl_magazine_create(spl_kmem_cache_t *skc)
 {
 	int i = 0;
 
 	ASSERT((skc->skc_flags & KMC_SLAB) == 0);
 
 	skc->skc_mag = kzalloc(sizeof (spl_kmem_magazine_t *) *
 	    num_possible_cpus(), kmem_flags_convert(KM_SLEEP));
 	skc->skc_mag_size = spl_magazine_size(skc);
 	skc->skc_mag_refill = (skc->skc_mag_size + 1) / 2;
 
 	for_each_possible_cpu(i) {
 		skc->skc_mag[i] = spl_magazine_alloc(skc, i);
 		if (!skc->skc_mag[i]) {
 			for (i--; i >= 0; i--)
 				spl_magazine_free(skc->skc_mag[i]);
 
 			kfree(skc->skc_mag);
 			return (-ENOMEM);
 		}
 	}
 
 	return (0);
 }
 
 /*
  * Destroy all pre-cpu magazines.
  */
 static void
 spl_magazine_destroy(spl_kmem_cache_t *skc)
 {
 	spl_kmem_magazine_t *skm;
 	int i = 0;
 
 	ASSERT((skc->skc_flags & KMC_SLAB) == 0);
 
 	for_each_possible_cpu(i) {
 		skm = skc->skc_mag[i];
 		spl_cache_flush(skc, skm, skm->skm_avail);
 		spl_magazine_free(skm);
 	}
 
 	kfree(skc->skc_mag);
 }
 
 /*
  * Create a object cache based on the following arguments:
  * name		cache name
  * size		cache object size
  * align	cache object alignment
  * ctor		cache object constructor
  * dtor		cache object destructor
  * reclaim	cache object reclaim
  * priv		cache private data for ctor/dtor/reclaim
  * vmp		unused must be NULL
  * flags
  *	KMC_KVMEM       Force kvmem backed SPL cache
  *	KMC_SLAB        Force Linux slab backed cache
  *	KMC_NODEBUG	Disable debugging (unsupported)
  */
 spl_kmem_cache_t *
 spl_kmem_cache_create(const char *name, size_t size, size_t align,
     spl_kmem_ctor_t ctor, spl_kmem_dtor_t dtor, void *reclaim,
     void *priv, void *vmp, int flags)
 {
 	gfp_t lflags = kmem_flags_convert(KM_SLEEP);
 	spl_kmem_cache_t *skc;
 	int rc;
 
 	/*
 	 * Unsupported flags
 	 */
 	ASSERT(vmp == NULL);
 	ASSERT(reclaim == NULL);
 
 	might_sleep();
 
 	skc = kzalloc(sizeof (*skc), lflags);
 	if (skc == NULL)
 		return (NULL);
 
 	skc->skc_magic = SKC_MAGIC;
 	skc->skc_name_size = strlen(name) + 1;
-	skc->skc_name = (char *)kmalloc(skc->skc_name_size, lflags);
+	skc->skc_name = kmalloc(skc->skc_name_size, lflags);
 	if (skc->skc_name == NULL) {
 		kfree(skc);
 		return (NULL);
 	}
 	strlcpy(skc->skc_name, name, skc->skc_name_size);
 
 	skc->skc_ctor = ctor;
 	skc->skc_dtor = dtor;
 	skc->skc_private = priv;
 	skc->skc_vmp = vmp;
 	skc->skc_linux_cache = NULL;
 	skc->skc_flags = flags;
 	skc->skc_obj_size = size;
 	skc->skc_obj_align = SPL_KMEM_CACHE_ALIGN;
 	atomic_set(&skc->skc_ref, 0);
 
 	INIT_LIST_HEAD(&skc->skc_list);
 	INIT_LIST_HEAD(&skc->skc_complete_list);
 	INIT_LIST_HEAD(&skc->skc_partial_list);
 	skc->skc_emergency_tree = RB_ROOT;
 	spin_lock_init(&skc->skc_lock);
 	init_waitqueue_head(&skc->skc_waitq);
 	skc->skc_slab_fail = 0;
 	skc->skc_slab_create = 0;
 	skc->skc_slab_destroy = 0;
 	skc->skc_slab_total = 0;
 	skc->skc_slab_alloc = 0;
 	skc->skc_slab_max = 0;
 	skc->skc_obj_total = 0;
 	skc->skc_obj_alloc = 0;
 	skc->skc_obj_max = 0;
 	skc->skc_obj_deadlock = 0;
 	skc->skc_obj_emergency = 0;
 	skc->skc_obj_emergency_max = 0;
 
 	rc = percpu_counter_init_common(&skc->skc_linux_alloc, 0,
 	    GFP_KERNEL);
 	if (rc != 0) {
 		kfree(skc);
 		return (NULL);
 	}
 
 	/*
 	 * Verify the requested alignment restriction is sane.
 	 */
 	if (align) {
 		VERIFY(ISP2(align));
 		VERIFY3U(align, >=, SPL_KMEM_CACHE_ALIGN);
 		VERIFY3U(align, <=, PAGE_SIZE);
 		skc->skc_obj_align = align;
 	}
 
 	/*
 	 * When no specific type of slab is requested (kmem, vmem, or
 	 * linuxslab) then select a cache type based on the object size
 	 * and default tunables.
 	 */
 	if (!(skc->skc_flags & (KMC_SLAB | KMC_KVMEM))) {
 		if (spl_kmem_cache_slab_limit &&
 		    size <= (size_t)spl_kmem_cache_slab_limit) {
 			/*
 			 * Objects smaller than spl_kmem_cache_slab_limit can
 			 * use the Linux slab for better space-efficiency.
 			 */
 			skc->skc_flags |= KMC_SLAB;
 		} else {
 			/*
 			 * All other objects are considered large and are
 			 * placed on kvmem backed slabs.
 			 */
 			skc->skc_flags |= KMC_KVMEM;
 		}
 	}
 
 	/*
 	 * Given the type of slab allocate the required resources.
 	 */
 	if (skc->skc_flags & KMC_KVMEM) {
 		rc = spl_slab_size(skc,
 		    &skc->skc_slab_objs, &skc->skc_slab_size);
 		if (rc)
 			goto out;
 
 		rc = spl_magazine_create(skc);
 		if (rc)
 			goto out;
 	} else {
 		unsigned long slabflags = 0;
 
 		if (size > (SPL_MAX_KMEM_ORDER_NR_PAGES * PAGE_SIZE)) {
 			rc = EINVAL;
 			goto out;
 		}
 
 #if defined(SLAB_USERCOPY)
 		/*
 		 * Required for PAX-enabled kernels if the slab is to be
 		 * used for copying between user and kernel space.
 		 */
 		slabflags |= SLAB_USERCOPY;
 #endif
 
 #if defined(HAVE_KMEM_CACHE_CREATE_USERCOPY)
 		/*
 		 * Newer grsec patchset uses kmem_cache_create_usercopy()
 		 * instead of SLAB_USERCOPY flag
 		 */
 		skc->skc_linux_cache = kmem_cache_create_usercopy(
 		    skc->skc_name, size, align, slabflags, 0, size, NULL);
 #else
 		skc->skc_linux_cache = kmem_cache_create(
 		    skc->skc_name, size, align, slabflags, NULL);
 #endif
 		if (skc->skc_linux_cache == NULL) {
 			rc = ENOMEM;
 			goto out;
 		}
 	}
 
 	down_write(&spl_kmem_cache_sem);
 	list_add_tail(&skc->skc_list, &spl_kmem_cache_list);
 	up_write(&spl_kmem_cache_sem);
 
 	return (skc);
 out:
 	kfree(skc->skc_name);
 	percpu_counter_destroy(&skc->skc_linux_alloc);
 	kfree(skc);
 	return (NULL);
 }
 EXPORT_SYMBOL(spl_kmem_cache_create);
 
 /*
  * Register a move callback for cache defragmentation.
  * XXX: Unimplemented but harmless to stub out for now.
  */
 void
 spl_kmem_cache_set_move(spl_kmem_cache_t *skc,
     kmem_cbrc_t (move)(void *, void *, size_t, void *))
 {
 	ASSERT(move != NULL);
 }
 EXPORT_SYMBOL(spl_kmem_cache_set_move);
 
 /*
  * Destroy a cache and all objects associated with the cache.
  */
 void
 spl_kmem_cache_destroy(spl_kmem_cache_t *skc)
 {
 	DECLARE_WAIT_QUEUE_HEAD(wq);
 	taskqid_t id;
 
 	ASSERT(skc->skc_magic == SKC_MAGIC);
 	ASSERT(skc->skc_flags & (KMC_KVMEM | KMC_SLAB));
 
 	down_write(&spl_kmem_cache_sem);
 	list_del_init(&skc->skc_list);
 	up_write(&spl_kmem_cache_sem);
 
 	/* Cancel any and wait for any pending delayed tasks */
 	VERIFY(!test_and_set_bit(KMC_BIT_DESTROY, &skc->skc_flags));
 
 	spin_lock(&skc->skc_lock);
 	id = skc->skc_taskqid;
 	spin_unlock(&skc->skc_lock);
 
 	taskq_cancel_id(spl_kmem_cache_taskq, id);
 
 	/*
 	 * Wait until all current callers complete, this is mainly
 	 * to catch the case where a low memory situation triggers a
 	 * cache reaping action which races with this destroy.
 	 */
 	wait_event(wq, atomic_read(&skc->skc_ref) == 0);
 
 	if (skc->skc_flags & KMC_KVMEM) {
 		spl_magazine_destroy(skc);
 		spl_slab_reclaim(skc);
 	} else {
 		ASSERT(skc->skc_flags & KMC_SLAB);
 		kmem_cache_destroy(skc->skc_linux_cache);
 	}
 
 	spin_lock(&skc->skc_lock);
 
 	/*
 	 * Validate there are no objects in use and free all the
 	 * spl_kmem_slab_t, spl_kmem_obj_t, and object buffers.
 	 */
 	ASSERT3U(skc->skc_slab_alloc, ==, 0);
 	ASSERT3U(skc->skc_obj_alloc, ==, 0);
 	ASSERT3U(skc->skc_slab_total, ==, 0);
 	ASSERT3U(skc->skc_obj_total, ==, 0);
 	ASSERT3U(skc->skc_obj_emergency, ==, 0);
 	ASSERT(list_empty(&skc->skc_complete_list));
 
 	ASSERT3U(percpu_counter_sum(&skc->skc_linux_alloc), ==, 0);
 	percpu_counter_destroy(&skc->skc_linux_alloc);
 
 	spin_unlock(&skc->skc_lock);
 
 	kfree(skc->skc_name);
 	kfree(skc);
 }
 EXPORT_SYMBOL(spl_kmem_cache_destroy);
 
 /*
  * Allocate an object from a slab attached to the cache.  This is used to
  * repopulate the per-cpu magazine caches in batches when they run low.
  */
 static void *
 spl_cache_obj(spl_kmem_cache_t *skc, spl_kmem_slab_t *sks)
 {
 	spl_kmem_obj_t *sko;
 
 	ASSERT(skc->skc_magic == SKC_MAGIC);
 	ASSERT(sks->sks_magic == SKS_MAGIC);
 
 	sko = list_entry(sks->sks_free_list.next, spl_kmem_obj_t, sko_list);
 	ASSERT(sko->sko_magic == SKO_MAGIC);
 	ASSERT(sko->sko_addr != NULL);
 
 	/* Remove from sks_free_list */
 	list_del_init(&sko->sko_list);
 
 	sks->sks_age = jiffies;
 	sks->sks_ref++;
 	skc->skc_obj_alloc++;
 
 	/* Track max obj usage statistics */
 	if (skc->skc_obj_alloc > skc->skc_obj_max)
 		skc->skc_obj_max = skc->skc_obj_alloc;
 
 	/* Track max slab usage statistics */
 	if (sks->sks_ref == 1) {
 		skc->skc_slab_alloc++;
 
 		if (skc->skc_slab_alloc > skc->skc_slab_max)
 			skc->skc_slab_max = skc->skc_slab_alloc;
 	}
 
 	return (sko->sko_addr);
 }
 
 /*
  * Generic slab allocation function to run by the global work queues.
  * It is responsible for allocating a new slab, linking it in to the list
  * of partial slabs, and then waking any waiters.
  */
 static int
 __spl_cache_grow(spl_kmem_cache_t *skc, int flags)
 {
 	spl_kmem_slab_t *sks;
 
 	fstrans_cookie_t cookie = spl_fstrans_mark();
 	sks = spl_slab_alloc(skc, flags);
 	spl_fstrans_unmark(cookie);
 
 	spin_lock(&skc->skc_lock);
 	if (sks) {
 		skc->skc_slab_total++;
 		skc->skc_obj_total += sks->sks_objs;
 		list_add_tail(&sks->sks_list, &skc->skc_partial_list);
 
 		smp_mb__before_atomic();
 		clear_bit(KMC_BIT_DEADLOCKED, &skc->skc_flags);
 		smp_mb__after_atomic();
 	}
 	spin_unlock(&skc->skc_lock);
 
 	return (sks == NULL ? -ENOMEM : 0);
 }
 
 static void
 spl_cache_grow_work(void *data)
 {
 	spl_kmem_alloc_t *ska = (spl_kmem_alloc_t *)data;
 	spl_kmem_cache_t *skc = ska->ska_cache;
 
 	int error = __spl_cache_grow(skc, ska->ska_flags);
 
 	atomic_dec(&skc->skc_ref);
 	smp_mb__before_atomic();
 	clear_bit(KMC_BIT_GROWING, &skc->skc_flags);
 	smp_mb__after_atomic();
 	if (error == 0)
 		wake_up_all(&skc->skc_waitq);
 
 	kfree(ska);
 }
 
 /*
  * Returns non-zero when a new slab should be available.
  */
 static int
 spl_cache_grow_wait(spl_kmem_cache_t *skc)
 {
 	return (!test_bit(KMC_BIT_GROWING, &skc->skc_flags));
 }
 
 /*
  * No available objects on any slabs, create a new slab.  Note that this
  * functionality is disabled for KMC_SLAB caches which are backed by the
  * Linux slab.
  */
 static int
 spl_cache_grow(spl_kmem_cache_t *skc, int flags, void **obj)
 {
 	int remaining, rc = 0;
 
 	ASSERT0(flags & ~KM_PUBLIC_MASK);
 	ASSERT(skc->skc_magic == SKC_MAGIC);
 	ASSERT((skc->skc_flags & KMC_SLAB) == 0);
 	might_sleep();
 	*obj = NULL;
 
 	/*
 	 * Before allocating a new slab wait for any reaping to complete and
 	 * then return so the local magazine can be rechecked for new objects.
 	 */
 	if (test_bit(KMC_BIT_REAPING, &skc->skc_flags)) {
 		rc = spl_wait_on_bit(&skc->skc_flags, KMC_BIT_REAPING,
 		    TASK_UNINTERRUPTIBLE);
 		return (rc ? rc : -EAGAIN);
 	}
 
 	/*
 	 * Note: It would be nice to reduce the overhead of context switch
 	 * and improve NUMA locality, by trying to allocate a new slab in the
 	 * current process context with KM_NOSLEEP flag.
 	 *
 	 * However, this can't be applied to vmem/kvmem due to a bug that
 	 * spl_vmalloc() doesn't honor gfp flags in page table allocation.
 	 */
 
 	/*
 	 * This is handled by dispatching a work request to the global work
 	 * queue.  This allows us to asynchronously allocate a new slab while
 	 * retaining the ability to safely fall back to a smaller synchronous
 	 * allocations to ensure forward progress is always maintained.
 	 */
 	if (test_and_set_bit(KMC_BIT_GROWING, &skc->skc_flags) == 0) {
 		spl_kmem_alloc_t *ska;
 
 		ska = kmalloc(sizeof (*ska), kmem_flags_convert(flags));
 		if (ska == NULL) {
 			clear_bit_unlock(KMC_BIT_GROWING, &skc->skc_flags);
 			smp_mb__after_atomic();
 			wake_up_all(&skc->skc_waitq);
 			return (-ENOMEM);
 		}
 
 		atomic_inc(&skc->skc_ref);
 		ska->ska_cache = skc;
 		ska->ska_flags = flags;
 		taskq_init_ent(&ska->ska_tqe);
 		taskq_dispatch_ent(spl_kmem_cache_taskq,
 		    spl_cache_grow_work, ska, 0, &ska->ska_tqe);
 	}
 
 	/*
 	 * The goal here is to only detect the rare case where a virtual slab
 	 * allocation has deadlocked.  We must be careful to minimize the use
 	 * of emergency objects which are more expensive to track.  Therefore,
 	 * we set a very long timeout for the asynchronous allocation and if
 	 * the timeout is reached the cache is flagged as deadlocked.  From
 	 * this point only new emergency objects will be allocated until the
 	 * asynchronous allocation completes and clears the deadlocked flag.
 	 */
 	if (test_bit(KMC_BIT_DEADLOCKED, &skc->skc_flags)) {
 		rc = spl_emergency_alloc(skc, flags, obj);
 	} else {
 		remaining = wait_event_timeout(skc->skc_waitq,
 		    spl_cache_grow_wait(skc), HZ / 10);
 
 		if (!remaining) {
 			spin_lock(&skc->skc_lock);
 			if (test_bit(KMC_BIT_GROWING, &skc->skc_flags)) {
 				set_bit(KMC_BIT_DEADLOCKED, &skc->skc_flags);
 				skc->skc_obj_deadlock++;
 			}
 			spin_unlock(&skc->skc_lock);
 		}
 
 		rc = -ENOMEM;
 	}
 
 	return (rc);
 }
 
 /*
  * Refill a per-cpu magazine with objects from the slabs for this cache.
  * Ideally the magazine can be repopulated using existing objects which have
  * been released, however if we are unable to locate enough free objects new
  * slabs of objects will be created.  On success NULL is returned, otherwise
  * the address of a single emergency object is returned for use by the caller.
  */
 static void *
 spl_cache_refill(spl_kmem_cache_t *skc, spl_kmem_magazine_t *skm, int flags)
 {
 	spl_kmem_slab_t *sks;
 	int count = 0, rc, refill;
 	void *obj = NULL;
 
 	ASSERT(skc->skc_magic == SKC_MAGIC);
 	ASSERT(skm->skm_magic == SKM_MAGIC);
 
 	refill = MIN(skm->skm_refill, skm->skm_size - skm->skm_avail);
 	spin_lock(&skc->skc_lock);
 
 	while (refill > 0) {
 		/* No slabs available we may need to grow the cache */
 		if (list_empty(&skc->skc_partial_list)) {
 			spin_unlock(&skc->skc_lock);
 
 			local_irq_enable();
 			rc = spl_cache_grow(skc, flags, &obj);
 			local_irq_disable();
 
 			/* Emergency object for immediate use by caller */
 			if (rc == 0 && obj != NULL)
 				return (obj);
 
 			if (rc)
 				goto out;
 
 			/* Rescheduled to different CPU skm is not local */
 			if (skm != skc->skc_mag[smp_processor_id()])
 				goto out;
 
 			/*
 			 * Potentially rescheduled to the same CPU but
 			 * allocations may have occurred from this CPU while
 			 * we were sleeping so recalculate max refill.
 			 */
 			refill = MIN(refill, skm->skm_size - skm->skm_avail);
 
 			spin_lock(&skc->skc_lock);
 			continue;
 		}
 
 		/* Grab the next available slab */
 		sks = list_entry((&skc->skc_partial_list)->next,
 		    spl_kmem_slab_t, sks_list);
 		ASSERT(sks->sks_magic == SKS_MAGIC);
 		ASSERT(sks->sks_ref < sks->sks_objs);
 		ASSERT(!list_empty(&sks->sks_free_list));
 
 		/*
 		 * Consume as many objects as needed to refill the requested
 		 * cache.  We must also be careful not to overfill it.
 		 */
 		while (sks->sks_ref < sks->sks_objs && refill-- > 0 &&
 		    ++count) {
 			ASSERT(skm->skm_avail < skm->skm_size);
 			ASSERT(count < skm->skm_size);
 			skm->skm_objs[skm->skm_avail++] =
 			    spl_cache_obj(skc, sks);
 		}
 
 		/* Move slab to skc_complete_list when full */
 		if (sks->sks_ref == sks->sks_objs) {
 			list_del(&sks->sks_list);
 			list_add(&sks->sks_list, &skc->skc_complete_list);
 		}
 	}
 
 	spin_unlock(&skc->skc_lock);
 out:
 	return (NULL);
 }
 
 /*
  * Release an object back to the slab from which it came.
  */
 static void
 spl_cache_shrink(spl_kmem_cache_t *skc, void *obj)
 {
 	spl_kmem_slab_t *sks = NULL;
 	spl_kmem_obj_t *sko = NULL;
 
 	ASSERT(skc->skc_magic == SKC_MAGIC);
 
 	sko = spl_sko_from_obj(skc, obj);
 	ASSERT(sko->sko_magic == SKO_MAGIC);
 	sks = sko->sko_slab;
 	ASSERT(sks->sks_magic == SKS_MAGIC);
 	ASSERT(sks->sks_cache == skc);
 	list_add(&sko->sko_list, &sks->sks_free_list);
 
 	sks->sks_age = jiffies;
 	sks->sks_ref--;
 	skc->skc_obj_alloc--;
 
 	/*
 	 * Move slab to skc_partial_list when no longer full.  Slabs
 	 * are added to the head to keep the partial list is quasi-full
 	 * sorted order.  Fuller at the head, emptier at the tail.
 	 */
 	if (sks->sks_ref == (sks->sks_objs - 1)) {
 		list_del(&sks->sks_list);
 		list_add(&sks->sks_list, &skc->skc_partial_list);
 	}
 
 	/*
 	 * Move empty slabs to the end of the partial list so
 	 * they can be easily found and freed during reclamation.
 	 */
 	if (sks->sks_ref == 0) {
 		list_del(&sks->sks_list);
 		list_add_tail(&sks->sks_list, &skc->skc_partial_list);
 		skc->skc_slab_alloc--;
 	}
 }
 
 /*
  * Allocate an object from the per-cpu magazine, or if the magazine
  * is empty directly allocate from a slab and repopulate the magazine.
  */
 void *
 spl_kmem_cache_alloc(spl_kmem_cache_t *skc, int flags)
 {
 	spl_kmem_magazine_t *skm;
 	void *obj = NULL;
 
 	ASSERT0(flags & ~KM_PUBLIC_MASK);
 	ASSERT(skc->skc_magic == SKC_MAGIC);
 	ASSERT(!test_bit(KMC_BIT_DESTROY, &skc->skc_flags));
 
 	/*
 	 * Allocate directly from a Linux slab.  All optimizations are left
 	 * to the underlying cache we only need to guarantee that KM_SLEEP
 	 * callers will never fail.
 	 */
 	if (skc->skc_flags & KMC_SLAB) {
 		struct kmem_cache *slc = skc->skc_linux_cache;
 		do {
 			obj = kmem_cache_alloc(slc, kmem_flags_convert(flags));
 		} while ((obj == NULL) && !(flags & KM_NOSLEEP));
 
 		if (obj != NULL) {
 			/*
 			 * Even though we leave everything up to the
 			 * underlying cache we still keep track of
 			 * how many objects we've allocated in it for
 			 * better debuggability.
 			 */
 			percpu_counter_inc(&skc->skc_linux_alloc);
 		}
 		goto ret;
 	}
 
 	local_irq_disable();
 
 restart:
 	/*
 	 * Safe to update per-cpu structure without lock, but
 	 * in the restart case we must be careful to reacquire
 	 * the local magazine since this may have changed
 	 * when we need to grow the cache.
 	 */
 	skm = skc->skc_mag[smp_processor_id()];
 	ASSERT(skm->skm_magic == SKM_MAGIC);
 
 	if (likely(skm->skm_avail)) {
 		/* Object available in CPU cache, use it */
 		obj = skm->skm_objs[--skm->skm_avail];
 	} else {
 		obj = spl_cache_refill(skc, skm, flags);
 		if ((obj == NULL) && !(flags & KM_NOSLEEP))
 			goto restart;
 
 		local_irq_enable();
 		goto ret;
 	}
 
 	local_irq_enable();
 	ASSERT(obj);
 	ASSERT(IS_P2ALIGNED(obj, skc->skc_obj_align));
 
 ret:
 	/* Pre-emptively migrate object to CPU L1 cache */
 	if (obj) {
 		if (obj && skc->skc_ctor)
 			skc->skc_ctor(obj, skc->skc_private, flags);
 		else
 			prefetchw(obj);
 	}
 
 	return (obj);
 }
 EXPORT_SYMBOL(spl_kmem_cache_alloc);
 
 /*
  * Free an object back to the local per-cpu magazine, there is no
  * guarantee that this is the same magazine the object was originally
  * allocated from.  We may need to flush entire from the magazine
  * back to the slabs to make space.
  */
 void
 spl_kmem_cache_free(spl_kmem_cache_t *skc, void *obj)
 {
 	spl_kmem_magazine_t *skm;
 	unsigned long flags;
 	int do_reclaim = 0;
 	int do_emergency = 0;
 
 	ASSERT(skc->skc_magic == SKC_MAGIC);
 	ASSERT(!test_bit(KMC_BIT_DESTROY, &skc->skc_flags));
 
 	/*
 	 * Run the destructor
 	 */
 	if (skc->skc_dtor)
 		skc->skc_dtor(obj, skc->skc_private);
 
 	/*
 	 * Free the object from the Linux underlying Linux slab.
 	 */
 	if (skc->skc_flags & KMC_SLAB) {
 		kmem_cache_free(skc->skc_linux_cache, obj);
 		percpu_counter_dec(&skc->skc_linux_alloc);
 		return;
 	}
 
 	/*
 	 * While a cache has outstanding emergency objects all freed objects
 	 * must be checked.  However, since emergency objects will never use
 	 * a virtual address these objects can be safely excluded as an
 	 * optimization.
 	 */
 	if (!is_vmalloc_addr(obj)) {
 		spin_lock(&skc->skc_lock);
 		do_emergency = (skc->skc_obj_emergency > 0);
 		spin_unlock(&skc->skc_lock);
 
 		if (do_emergency && (spl_emergency_free(skc, obj) == 0))
 			return;
 	}
 
 	local_irq_save(flags);
 
 	/*
 	 * Safe to update per-cpu structure without lock, but
 	 * no remote memory allocation tracking is being performed
 	 * it is entirely possible to allocate an object from one
 	 * CPU cache and return it to another.
 	 */
 	skm = skc->skc_mag[smp_processor_id()];
 	ASSERT(skm->skm_magic == SKM_MAGIC);
 
 	/*
 	 * Per-CPU cache full, flush it to make space for this object,
 	 * this may result in an empty slab which can be reclaimed once
 	 * interrupts are re-enabled.
 	 */
 	if (unlikely(skm->skm_avail >= skm->skm_size)) {
 		spl_cache_flush(skc, skm, skm->skm_refill);
 		do_reclaim = 1;
 	}
 
 	/* Available space in cache, use it */
 	skm->skm_objs[skm->skm_avail++] = obj;
 
 	local_irq_restore(flags);
 
 	if (do_reclaim)
 		spl_slab_reclaim(skc);
 }
 EXPORT_SYMBOL(spl_kmem_cache_free);
 
 /*
  * Depending on how many and which objects are released it may simply
  * repopulate the local magazine which will then need to age-out.  Objects
  * which cannot fit in the magazine will be released back to their slabs
  * which will also need to age out before being released.  This is all just
  * best effort and we do not want to thrash creating and destroying slabs.
  */
 void
 spl_kmem_cache_reap_now(spl_kmem_cache_t *skc)
 {
 	ASSERT(skc->skc_magic == SKC_MAGIC);
 	ASSERT(!test_bit(KMC_BIT_DESTROY, &skc->skc_flags));
 
 	if (skc->skc_flags & KMC_SLAB)
 		return;
 
 	atomic_inc(&skc->skc_ref);
 
 	/*
 	 * Prevent concurrent cache reaping when contended.
 	 */
 	if (test_and_set_bit(KMC_BIT_REAPING, &skc->skc_flags))
 		goto out;
 
 	/* Reclaim from the magazine and free all now empty slabs. */
 	unsigned long irq_flags;
 	local_irq_save(irq_flags);
 	spl_kmem_magazine_t *skm = skc->skc_mag[smp_processor_id()];
 	spl_cache_flush(skc, skm, skm->skm_avail);
 	local_irq_restore(irq_flags);
 
 	spl_slab_reclaim(skc);
 	clear_bit_unlock(KMC_BIT_REAPING, &skc->skc_flags);
 	smp_mb__after_atomic();
 	wake_up_bit(&skc->skc_flags, KMC_BIT_REAPING);
 out:
 	atomic_dec(&skc->skc_ref);
 }
 EXPORT_SYMBOL(spl_kmem_cache_reap_now);
 
 /*
  * This is stubbed out for code consistency with other platforms.  There
  * is existing logic to prevent concurrent reaping so while this is ugly
  * it should do no harm.
  */
 int
 spl_kmem_cache_reap_active(void)
 {
 	return (0);
 }
 EXPORT_SYMBOL(spl_kmem_cache_reap_active);
 
 /*
  * Reap all free slabs from all registered caches.
  */
 void
 spl_kmem_reap(void)
 {
 	spl_kmem_cache_t *skc = NULL;
 
 	down_read(&spl_kmem_cache_sem);
 	list_for_each_entry(skc, &spl_kmem_cache_list, skc_list) {
 		spl_kmem_cache_reap_now(skc);
 	}
 	up_read(&spl_kmem_cache_sem);
 }
 EXPORT_SYMBOL(spl_kmem_reap);
 
 int
 spl_kmem_cache_init(void)
 {
 	init_rwsem(&spl_kmem_cache_sem);
 	INIT_LIST_HEAD(&spl_kmem_cache_list);
 	spl_kmem_cache_taskq = taskq_create("spl_kmem_cache",
 	    spl_kmem_cache_kmem_threads, maxclsyspri,
 	    spl_kmem_cache_kmem_threads * 8, INT_MAX,
 	    TASKQ_PREPOPULATE | TASKQ_DYNAMIC);
 
 	if (spl_kmem_cache_taskq == NULL)
 		return (-ENOMEM);
 
 	return (0);
 }
 
 void
 spl_kmem_cache_fini(void)
 {
 	taskq_destroy(spl_kmem_cache_taskq);
 }
diff --git a/module/zfs/zfs_chksum.c b/module/zfs/zfs_chksum.c
index 4a9a36d87e66..91247f29278f 100644
--- a/module/zfs/zfs_chksum.c
+++ b/module/zfs/zfs_chksum.c
@@ -1,357 +1,357 @@
 /*
  * CDDL HEADER START
  *
  * The contents of this file are subject to the terms of the
  * Common Development and Distribution License (the "License").
  * You may not use this file except in compliance with the License.
  *
  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
  * or https://opensource.org/licenses/CDDL-1.0.
  * See the License for the specific language governing permissions
  * and limitations under the License.
  *
  * When distributing Covered Code, include this CDDL HEADER in each
  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  * If applicable, add the following below this CDDL HEADER, with the
  * fields enclosed by brackets "[]" replaced with your own identifying
  * information: Portions Copyright [yyyy] [name of copyright owner]
  *
  * CDDL HEADER END
  */
 
 /*
  * Copyright (c) 2021-2022 Tino Reichardt <milky-zfs@mcmilk.de>
  */
 
 #include <sys/types.h>
 #include <sys/spa.h>
 #include <sys/zio_checksum.h>
 #include <sys/zfs_context.h>
 #include <sys/zfs_chksum.h>
 
 #include <sys/blake3.h>
 
 /* limit benchmarking to max 256KiB, when EdonR is slower then this: */
 #define	LIMIT_PERF_MBS	300
 
 typedef struct {
 	const char *name;
 	const char *impl;
 	uint64_t bs1k;
 	uint64_t bs4k;
 	uint64_t bs16k;
 	uint64_t bs64k;
 	uint64_t bs256k;
 	uint64_t bs1m;
 	uint64_t bs4m;
 	uint64_t bs16m;
 	zio_cksum_salt_t salt;
 	zio_checksum_t *(func);
 	zio_checksum_tmpl_init_t *(init);
 	zio_checksum_tmpl_free_t *(free);
 } chksum_stat_t;
 
 static chksum_stat_t *chksum_stat_data = 0;
 static int chksum_stat_cnt = 0;
 static kstat_t *chksum_kstat = NULL;
 
 /*
  * i3-1005G1 test output:
  *
  * implementation     1k      4k     16k     64k    256k      1m      4m
  * fletcher-4       5421   15001   26468   32555   34720   32801   18847
  * edonr-generic    1196    1602    1761    1749    1762    1759    1751
  * skein-generic     546     591     608     615     619     612     616
  * sha256-generic    246     270     274     274     277     275     276
  * sha256-avx        262     296     304     307     307     307     306
  * sha256-sha-ni     769    1072    1172    1220    1219    1232    1228
  * sha256-openssl    240     300     316     314     304     285     276
  * sha512-generic    333     374     385     392     391     393     392
  * sha512-openssl    353     441     467     476     472     467     426
  * sha512-avx        362     444     473     475     479     476     478
  * sha512-avx2       394     500     530     538     543     545     542
  * blake3-generic    308     313     313     313     312     313     312
  * blake3-sse2       402    1289    1423    1446    1432    1458    1413
  * blake3-sse41      427    1470    1625    1704    1679    1607    1629
  * blake3-avx2       428    1920    3095    3343    3356    3318    3204
  * blake3-avx512     473    2687    4905    5836    5844    5643    5374
  */
 static int
 chksum_kstat_headers(char *buf, size_t size)
 {
 	ssize_t off = 0;
 
 	off += kmem_scnprintf(buf + off, size, "%-23s", "implementation");
 	off += kmem_scnprintf(buf + off, size - off, "%8s", "1k");
 	off += kmem_scnprintf(buf + off, size - off, "%8s", "4k");
 	off += kmem_scnprintf(buf + off, size - off, "%8s", "16k");
 	off += kmem_scnprintf(buf + off, size - off, "%8s", "64k");
 	off += kmem_scnprintf(buf + off, size - off, "%8s", "256k");
 	off += kmem_scnprintf(buf + off, size - off, "%8s", "1m");
 	off += kmem_scnprintf(buf + off, size - off, "%8s", "4m");
 	(void) kmem_scnprintf(buf + off, size - off, "%8s\n", "16m");
 
 	return (0);
 }
 
 static int
 chksum_kstat_data(char *buf, size_t size, void *data)
 {
 	chksum_stat_t *cs;
 	ssize_t off = 0;
 	char b[24];
 
 	cs = (chksum_stat_t *)data;
 	kmem_scnprintf(b, 23, "%s-%s", cs->name, cs->impl);
 	off += kmem_scnprintf(buf + off, size - off, "%-23s", b);
 	off += kmem_scnprintf(buf + off, size - off, "%8llu",
 	    (u_longlong_t)cs->bs1k);
 	off += kmem_scnprintf(buf + off, size - off, "%8llu",
 	    (u_longlong_t)cs->bs4k);
 	off += kmem_scnprintf(buf + off, size - off, "%8llu",
 	    (u_longlong_t)cs->bs16k);
 	off += kmem_scnprintf(buf + off, size - off, "%8llu",
 	    (u_longlong_t)cs->bs64k);
 	off += kmem_scnprintf(buf + off, size - off, "%8llu",
 	    (u_longlong_t)cs->bs256k);
 	off += kmem_scnprintf(buf + off, size - off, "%8llu",
 	    (u_longlong_t)cs->bs1m);
 	off += kmem_scnprintf(buf + off, size - off, "%8llu",
 	    (u_longlong_t)cs->bs4m);
 	(void) kmem_scnprintf(buf + off, size - off, "%8llu\n",
 	    (u_longlong_t)cs->bs16m);
 
 	return (0);
 }
 
 static void *
 chksum_kstat_addr(kstat_t *ksp, loff_t n)
 {
 	if (n < chksum_stat_cnt)
 		ksp->ks_private = (void *)(chksum_stat_data + n);
 	else
 		ksp->ks_private = NULL;
 
 	return (ksp->ks_private);
 }
 
 static void
 chksum_run(chksum_stat_t *cs, abd_t *abd, void *ctx, int round,
     uint64_t *result)
 {
 	hrtime_t start;
 	uint64_t run_bw, run_time_ns, run_count = 0, size = 0;
 	uint32_t l, loops = 0;
 	zio_cksum_t zcp;
 
 	switch (round) {
 	case 1: /* 1k */
 		size = 1<<10; loops = 128; break;
 	case 2: /* 2k */
 		size = 1<<12; loops = 64; break;
 	case 3: /* 4k */
 		size = 1<<14; loops = 32; break;
 	case 4: /* 16k */
 		size = 1<<16; loops = 16; break;
 	case 5: /* 256k */
 		size = 1<<18; loops = 8; break;
 	case 6: /* 1m */
 		size = 1<<20; loops = 4; break;
 	case 7: /* 4m */
 		size = 1<<22; loops = 1; break;
 	case 8: /* 16m */
 		size = 1<<24; loops = 1; break;
 	}
 
 	kpreempt_disable();
 	start = gethrtime();
 	do {
 		for (l = 0; l < loops; l++, run_count++)
 			cs->func(abd, size, ctx, &zcp);
 
 		run_time_ns = gethrtime() - start;
 	} while (run_time_ns < MSEC2NSEC(1));
 	kpreempt_enable();
 
 	run_bw = size * run_count * NANOSEC;
 	run_bw /= run_time_ns;	/* B/s */
 	*result = run_bw/1024/1024; /* MiB/s */
 }
 
 #define	LIMIT_INIT	0
 #define	LIMIT_NEEDED	1
 #define	LIMIT_NOLIMIT	2
 
 static void
 chksum_benchit(chksum_stat_t *cs)
 {
 	abd_t *abd;
 	void *ctx = 0;
 	void *salt = &cs->salt.zcs_bytes;
 	static int chksum_stat_limit = LIMIT_INIT;
 
 	memset(salt, 0, sizeof (cs->salt.zcs_bytes));
 	if (cs->init)
 		ctx = cs->init(&cs->salt);
 
 	/* allocate test memory via abd linear interface */
 	abd = abd_alloc_linear(1<<20, B_FALSE);
 	chksum_run(cs, abd, ctx, 1, &cs->bs1k);
 	chksum_run(cs, abd, ctx, 2, &cs->bs4k);
 	chksum_run(cs, abd, ctx, 3, &cs->bs16k);
 	chksum_run(cs, abd, ctx, 4, &cs->bs64k);
 	chksum_run(cs, abd, ctx, 5, &cs->bs256k);
 
 	/* check if we ran on a slow cpu */
 	if (chksum_stat_limit == LIMIT_INIT) {
 		if (cs->bs1k < LIMIT_PERF_MBS) {
 			chksum_stat_limit = LIMIT_NEEDED;
 		} else {
 			chksum_stat_limit = LIMIT_NOLIMIT;
 		}
 	}
 
 	/* skip benchmarks >= 1MiB when the CPU is to slow */
 	if (chksum_stat_limit == LIMIT_NEEDED)
 		goto abort;
 
 	chksum_run(cs, abd, ctx, 6, &cs->bs1m);
 	abd_free(abd);
 
 	/* allocate test memory via abd non linear interface */
 	abd = abd_alloc(1<<24, B_FALSE);
 	chksum_run(cs, abd, ctx, 7, &cs->bs4m);
 	chksum_run(cs, abd, ctx, 8, &cs->bs16m);
 
 abort:
 	abd_free(abd);
 
 	/* free up temp memory */
 	if (cs->free)
 		cs->free(ctx);
 }
 
 /*
  * Initialize and benchmark all supported implementations.
  */
 static void
 chksum_benchmark(void)
 {
 
 #ifndef _KERNEL
 	/* we need the benchmark only for the kernel module */
 	return;
 #endif
 
 	chksum_stat_t *cs;
 	int cbid = 0;
 	uint64_t max = 0;
 	uint32_t id, id_save;
 
 	/* space for the benchmark times */
 	chksum_stat_cnt = 4;
 	chksum_stat_cnt += blake3_impl_getcnt();
-	chksum_stat_data = (chksum_stat_t *)kmem_zalloc(
+	chksum_stat_data = kmem_zalloc(
 	    sizeof (chksum_stat_t) * chksum_stat_cnt, KM_SLEEP);
 
 	/* edonr - needs to be the first one here (slow CPU check) */
 	cs = &chksum_stat_data[cbid++];
 	cs->init = abd_checksum_edonr_tmpl_init;
 	cs->func = abd_checksum_edonr_native;
 	cs->free = abd_checksum_edonr_tmpl_free;
 	cs->name = "edonr";
 	cs->impl = "generic";
 	chksum_benchit(cs);
 
 	/* skein */
 	cs = &chksum_stat_data[cbid++];
 	cs->init = abd_checksum_skein_tmpl_init;
 	cs->func = abd_checksum_skein_native;
 	cs->free = abd_checksum_skein_tmpl_free;
 	cs->name = "skein";
 	cs->impl = "generic";
 	chksum_benchit(cs);
 
 	/* sha256 */
 	cs = &chksum_stat_data[cbid++];
 	cs->init = 0;
 	cs->func = abd_checksum_SHA256;
 	cs->free = 0;
 	cs->name = "sha256";
 	cs->impl = "generic";
 	chksum_benchit(cs);
 
 	/* sha512 */
 	cs = &chksum_stat_data[cbid++];
 	cs->init = 0;
 	cs->func = abd_checksum_SHA512_native;
 	cs->free = 0;
 	cs->name = "sha512";
 	cs->impl = "generic";
 	chksum_benchit(cs);
 
 	/* blake3 */
 	id_save = blake3_impl_getid();
 	for (id = 0; id < blake3_impl_getcnt(); id++) {
 		blake3_impl_setid(id);
 		cs = &chksum_stat_data[cbid++];
 		cs->init = abd_checksum_blake3_tmpl_init;
 		cs->func = abd_checksum_blake3_native;
 		cs->free = abd_checksum_blake3_tmpl_free;
 		cs->name = "blake3";
 		cs->impl = blake3_impl_getname();
 		chksum_benchit(cs);
 		if (cs->bs256k > max) {
 			max = cs->bs256k;
 			blake3_impl_set_fastest(id);
 		}
 	}
 
 	/* restore initial value */
 	blake3_impl_setid(id_save);
 }
 
 void
 chksum_init(void)
 {
 #ifdef _KERNEL
 	blake3_per_cpu_ctx_init();
 #endif
 
 	/* Benchmark supported implementations */
 	chksum_benchmark();
 
 	/* Install kstats for all implementations */
 	chksum_kstat = kstat_create("zfs", 0, "chksum_bench", "misc",
 	    KSTAT_TYPE_RAW, 0, KSTAT_FLAG_VIRTUAL);
 
 	if (chksum_kstat != NULL) {
 		chksum_kstat->ks_data = NULL;
 		chksum_kstat->ks_ndata = UINT32_MAX;
 		kstat_set_raw_ops(chksum_kstat,
 		    chksum_kstat_headers,
 		    chksum_kstat_data,
 		    chksum_kstat_addr);
 		kstat_install(chksum_kstat);
 	}
 }
 
 void
 chksum_fini(void)
 {
 	if (chksum_kstat != NULL) {
 		kstat_delete(chksum_kstat);
 		chksum_kstat = NULL;
 	}
 
 	if (chksum_stat_cnt) {
 		kmem_free(chksum_stat_data,
 		    sizeof (chksum_stat_t) * chksum_stat_cnt);
 		chksum_stat_cnt = 0;
 		chksum_stat_data = 0;
 	}
 
 #ifdef _KERNEL
 	blake3_per_cpu_ctx_fini();
 #endif
 }
diff --git a/module/zfs/zfs_vnops.c b/module/zfs/zfs_vnops.c
index 45ecb0773260..0c392b9da0fb 100644
--- a/module/zfs/zfs_vnops.c
+++ b/module/zfs/zfs_vnops.c
@@ -1,1005 +1,1005 @@
 /*
  * CDDL HEADER START
  *
  * The contents of this file are subject to the terms of the
  * Common Development and Distribution License (the "License").
  * You may not use this file except in compliance with the License.
  *
  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
  * or https://opensource.org/licenses/CDDL-1.0.
  * See the License for the specific language governing permissions
  * and limitations under the License.
  *
  * When distributing Covered Code, include this CDDL HEADER in each
  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  * If applicable, add the following below this CDDL HEADER, with the
  * fields enclosed by brackets "[]" replaced with your own identifying
  * information: Portions Copyright [yyyy] [name of copyright owner]
  *
  * CDDL HEADER END
  */
 
 /*
  * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
  * Copyright (c) 2012, 2018 by Delphix. All rights reserved.
  * Copyright (c) 2015 by Chunwei Chen. All rights reserved.
  * Copyright 2017 Nexenta Systems, Inc.
  */
 
 /* Portions Copyright 2007 Jeremy Teo */
 /* Portions Copyright 2010 Robert Milkowski */
 
 #include <sys/types.h>
 #include <sys/param.h>
 #include <sys/time.h>
 #include <sys/sysmacros.h>
 #include <sys/vfs.h>
 #include <sys/uio_impl.h>
 #include <sys/file.h>
 #include <sys/stat.h>
 #include <sys/kmem.h>
 #include <sys/cmn_err.h>
 #include <sys/errno.h>
 #include <sys/zfs_dir.h>
 #include <sys/zfs_acl.h>
 #include <sys/zfs_ioctl.h>
 #include <sys/fs/zfs.h>
 #include <sys/dmu.h>
 #include <sys/dmu_objset.h>
 #include <sys/spa.h>
 #include <sys/txg.h>
 #include <sys/dbuf.h>
 #include <sys/policy.h>
 #include <sys/zfs_vnops.h>
 #include <sys/zfs_quota.h>
 #include <sys/zfs_vfsops.h>
 #include <sys/zfs_znode.h>
 
 
 static ulong_t zfs_fsync_sync_cnt = 4;
 
 int
 zfs_fsync(znode_t *zp, int syncflag, cred_t *cr)
 {
 	int error = 0;
 	zfsvfs_t *zfsvfs = ZTOZSB(zp);
 
 	(void) tsd_set(zfs_fsyncer_key, (void *)(uintptr_t)zfs_fsync_sync_cnt);
 
 	if (zfsvfs->z_os->os_sync != ZFS_SYNC_DISABLED) {
 		if ((error = zfs_enter_verify_zp(zfsvfs, zp, FTAG)) != 0)
 			goto out;
 		atomic_inc_32(&zp->z_sync_writes_cnt);
 		zil_commit(zfsvfs->z_log, zp->z_id);
 		atomic_dec_32(&zp->z_sync_writes_cnt);
 		zfs_exit(zfsvfs, FTAG);
 	}
 out:
 	tsd_set(zfs_fsyncer_key, NULL);
 
 	return (error);
 }
 
 
 #if defined(SEEK_HOLE) && defined(SEEK_DATA)
 /*
  * Lseek support for finding holes (cmd == SEEK_HOLE) and
  * data (cmd == SEEK_DATA). "off" is an in/out parameter.
  */
 static int
 zfs_holey_common(znode_t *zp, ulong_t cmd, loff_t *off)
 {
 	zfs_locked_range_t *lr;
 	uint64_t noff = (uint64_t)*off; /* new offset */
 	uint64_t file_sz;
 	int error;
 	boolean_t hole;
 
 	file_sz = zp->z_size;
 	if (noff >= file_sz)  {
 		return (SET_ERROR(ENXIO));
 	}
 
 	if (cmd == F_SEEK_HOLE)
 		hole = B_TRUE;
 	else
 		hole = B_FALSE;
 
 	/* Flush any mmap()'d data to disk */
 	if (zn_has_cached_data(zp))
 		zn_flush_cached_data(zp, B_FALSE);
 
 	lr = zfs_rangelock_enter(&zp->z_rangelock, 0, file_sz, RL_READER);
 	error = dmu_offset_next(ZTOZSB(zp)->z_os, zp->z_id, hole, &noff);
 	zfs_rangelock_exit(lr);
 
 	if (error == ESRCH)
 		return (SET_ERROR(ENXIO));
 
 	/* File was dirty, so fall back to using generic logic */
 	if (error == EBUSY) {
 		if (hole)
 			*off = file_sz;
 
 		return (0);
 	}
 
 	/*
 	 * We could find a hole that begins after the logical end-of-file,
 	 * because dmu_offset_next() only works on whole blocks.  If the
 	 * EOF falls mid-block, then indicate that the "virtual hole"
 	 * at the end of the file begins at the logical EOF, rather than
 	 * at the end of the last block.
 	 */
 	if (noff > file_sz) {
 		ASSERT(hole);
 		noff = file_sz;
 	}
 
 	if (noff < *off)
 		return (error);
 	*off = noff;
 	return (error);
 }
 
 int
 zfs_holey(znode_t *zp, ulong_t cmd, loff_t *off)
 {
 	zfsvfs_t *zfsvfs = ZTOZSB(zp);
 	int error;
 
 	if ((error = zfs_enter_verify_zp(zfsvfs, zp, FTAG)) != 0)
 		return (error);
 
 	error = zfs_holey_common(zp, cmd, off);
 
 	zfs_exit(zfsvfs, FTAG);
 	return (error);
 }
 #endif /* SEEK_HOLE && SEEK_DATA */
 
 int
 zfs_access(znode_t *zp, int mode, int flag, cred_t *cr)
 {
 	zfsvfs_t *zfsvfs = ZTOZSB(zp);
 	int error;
 
 	if ((error = zfs_enter_verify_zp(zfsvfs, zp, FTAG)) != 0)
 		return (error);
 
 	if (flag & V_ACE_MASK)
 #if defined(__linux__)
 		error = zfs_zaccess(zp, mode, flag, B_FALSE, cr,
 		    kcred->user_ns);
 #else
 		error = zfs_zaccess(zp, mode, flag, B_FALSE, cr,
 		    NULL);
 #endif
 	else
 #if defined(__linux__)
 		error = zfs_zaccess_rwx(zp, mode, flag, cr, kcred->user_ns);
 #else
 		error = zfs_zaccess_rwx(zp, mode, flag, cr, NULL);
 #endif
 
 	zfs_exit(zfsvfs, FTAG);
 	return (error);
 }
 
 static uint64_t zfs_vnops_read_chunk_size = 1024 * 1024; /* Tunable */
 
 /*
  * Read bytes from specified file into supplied buffer.
  *
  *	IN:	zp	- inode of file to be read from.
  *		uio	- structure supplying read location, range info,
  *			  and return buffer.
  *		ioflag	- O_SYNC flags; used to provide FRSYNC semantics.
  *			  O_DIRECT flag; used to bypass page cache.
  *		cr	- credentials of caller.
  *
  *	OUT:	uio	- updated offset and range, buffer filled.
  *
  *	RETURN:	0 on success, error code on failure.
  *
  * Side Effects:
  *	inode - atime updated if byte count > 0
  */
 int
 zfs_read(struct znode *zp, zfs_uio_t *uio, int ioflag, cred_t *cr)
 {
 	(void) cr;
 	int error = 0;
 	boolean_t frsync = B_FALSE;
 
 	zfsvfs_t *zfsvfs = ZTOZSB(zp);
 	if ((error = zfs_enter_verify_zp(zfsvfs, zp, FTAG)) != 0)
 		return (error);
 
 	if (zp->z_pflags & ZFS_AV_QUARANTINED) {
 		zfs_exit(zfsvfs, FTAG);
 		return (SET_ERROR(EACCES));
 	}
 
 	/* We don't copy out anything useful for directories. */
 	if (Z_ISDIR(ZTOTYPE(zp))) {
 		zfs_exit(zfsvfs, FTAG);
 		return (SET_ERROR(EISDIR));
 	}
 
 	/*
 	 * Validate file offset
 	 */
 	if (zfs_uio_offset(uio) < (offset_t)0) {
 		zfs_exit(zfsvfs, FTAG);
 		return (SET_ERROR(EINVAL));
 	}
 
 	/*
 	 * Fasttrack empty reads
 	 */
 	if (zfs_uio_resid(uio) == 0) {
 		zfs_exit(zfsvfs, FTAG);
 		return (0);
 	}
 
 #ifdef FRSYNC
 	/*
 	 * If we're in FRSYNC mode, sync out this znode before reading it.
 	 * Only do this for non-snapshots.
 	 *
 	 * Some platforms do not support FRSYNC and instead map it
 	 * to O_SYNC, which results in unnecessary calls to zil_commit. We
 	 * only honor FRSYNC requests on platforms which support it.
 	 */
 	frsync = !!(ioflag & FRSYNC);
 #endif
 	if (zfsvfs->z_log &&
 	    (frsync || zfsvfs->z_os->os_sync == ZFS_SYNC_ALWAYS))
 		zil_commit(zfsvfs->z_log, zp->z_id);
 
 	/*
 	 * Lock the range against changes.
 	 */
 	zfs_locked_range_t *lr = zfs_rangelock_enter(&zp->z_rangelock,
 	    zfs_uio_offset(uio), zfs_uio_resid(uio), RL_READER);
 
 	/*
 	 * If we are reading past end-of-file we can skip
 	 * to the end; but we might still need to set atime.
 	 */
 	if (zfs_uio_offset(uio) >= zp->z_size) {
 		error = 0;
 		goto out;
 	}
 
 	ASSERT(zfs_uio_offset(uio) < zp->z_size);
 #if defined(__linux__)
 	ssize_t start_offset = zfs_uio_offset(uio);
 #endif
 	ssize_t n = MIN(zfs_uio_resid(uio), zp->z_size - zfs_uio_offset(uio));
 	ssize_t start_resid = n;
 
 	while (n > 0) {
 		ssize_t nbytes = MIN(n, zfs_vnops_read_chunk_size -
 		    P2PHASE(zfs_uio_offset(uio), zfs_vnops_read_chunk_size));
 #ifdef UIO_NOCOPY
 		if (zfs_uio_segflg(uio) == UIO_NOCOPY)
 			error = mappedread_sf(zp, nbytes, uio);
 		else
 #endif
 		if (zn_has_cached_data(zp) && !(ioflag & O_DIRECT)) {
 			error = mappedread(zp, nbytes, uio);
 		} else {
 			error = dmu_read_uio_dbuf(sa_get_db(zp->z_sa_hdl),
 			    uio, nbytes);
 		}
 
 		if (error) {
 			/* convert checksum errors into IO errors */
 			if (error == ECKSUM)
 				error = SET_ERROR(EIO);
 
 #if defined(__linux__)
 			/*
 			 * if we actually read some bytes, bubbling EFAULT
 			 * up to become EAGAIN isn't what we want here...
 			 *
 			 * ...on Linux, at least. On FBSD, doing this breaks.
 			 */
 			if (error == EFAULT &&
 			    (zfs_uio_offset(uio) - start_offset) != 0)
 				error = 0;
 #endif
 			break;
 		}
 
 		n -= nbytes;
 	}
 
 	int64_t nread = start_resid - n;
 	dataset_kstats_update_read_kstats(&zfsvfs->z_kstat, nread);
 	task_io_account_read(nread);
 out:
 	zfs_rangelock_exit(lr);
 
 	ZFS_ACCESSTIME_STAMP(zfsvfs, zp);
 	zfs_exit(zfsvfs, FTAG);
 	return (error);
 }
 
 static void
 zfs_clear_setid_bits_if_necessary(zfsvfs_t *zfsvfs, znode_t *zp, cred_t *cr,
     uint64_t *clear_setid_bits_txgp, dmu_tx_t *tx)
 {
 	zilog_t *zilog = zfsvfs->z_log;
 	const uint64_t uid = KUID_TO_SUID(ZTOUID(zp));
 
 	ASSERT(clear_setid_bits_txgp != NULL);
 	ASSERT(tx != NULL);
 
 	/*
 	 * Clear Set-UID/Set-GID bits on successful write if not
 	 * privileged and at least one of the execute bits is set.
 	 *
 	 * It would be nice to do this after all writes have
 	 * been done, but that would still expose the ISUID/ISGID
 	 * to another app after the partial write is committed.
 	 *
 	 * Note: we don't call zfs_fuid_map_id() here because
 	 * user 0 is not an ephemeral uid.
 	 */
 	mutex_enter(&zp->z_acl_lock);
 	if ((zp->z_mode & (S_IXUSR | (S_IXUSR >> 3) | (S_IXUSR >> 6))) != 0 &&
 	    (zp->z_mode & (S_ISUID | S_ISGID)) != 0 &&
 	    secpolicy_vnode_setid_retain(zp, cr,
 	    ((zp->z_mode & S_ISUID) != 0 && uid == 0)) != 0) {
 		uint64_t newmode;
 
 		zp->z_mode &= ~(S_ISUID | S_ISGID);
 		newmode = zp->z_mode;
 		(void) sa_update(zp->z_sa_hdl, SA_ZPL_MODE(zfsvfs),
 		    (void *)&newmode, sizeof (uint64_t), tx);
 
 		mutex_exit(&zp->z_acl_lock);
 
 		/*
 		 * Make sure SUID/SGID bits will be removed when we replay the
 		 * log. If the setid bits are keep coming back, don't log more
 		 * than one TX_SETATTR per transaction group.
 		 */
 		if (*clear_setid_bits_txgp != dmu_tx_get_txg(tx)) {
 			vattr_t va = {0};
 
 			va.va_mask = ATTR_MODE;
 			va.va_nodeid = zp->z_id;
 			va.va_mode = newmode;
 			zfs_log_setattr(zilog, tx, TX_SETATTR, zp, &va,
 			    ATTR_MODE, NULL);
 			*clear_setid_bits_txgp = dmu_tx_get_txg(tx);
 		}
 	} else {
 		mutex_exit(&zp->z_acl_lock);
 	}
 }
 
 /*
  * Write the bytes to a file.
  *
  *	IN:	zp	- znode of file to be written to.
  *		uio	- structure supplying write location, range info,
  *			  and data buffer.
  *		ioflag	- O_APPEND flag set if in append mode.
  *			  O_DIRECT flag; used to bypass page cache.
  *		cr	- credentials of caller.
  *
  *	OUT:	uio	- updated offset and range.
  *
  *	RETURN:	0 if success
  *		error code if failure
  *
  * Timestamps:
  *	ip - ctime|mtime updated if byte count > 0
  */
 int
 zfs_write(znode_t *zp, zfs_uio_t *uio, int ioflag, cred_t *cr)
 {
 	int error = 0, error1;
 	ssize_t start_resid = zfs_uio_resid(uio);
 	uint64_t clear_setid_bits_txg = 0;
 
 	/*
 	 * Fasttrack empty write
 	 */
 	ssize_t n = start_resid;
 	if (n == 0)
 		return (0);
 
 	zfsvfs_t *zfsvfs = ZTOZSB(zp);
 	if ((error = zfs_enter_verify_zp(zfsvfs, zp, FTAG)) != 0)
 		return (error);
 
 	sa_bulk_attr_t bulk[4];
 	int count = 0;
 	uint64_t mtime[2], ctime[2];
 	SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_MTIME(zfsvfs), NULL, &mtime, 16);
 	SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_CTIME(zfsvfs), NULL, &ctime, 16);
 	SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_SIZE(zfsvfs), NULL,
 	    &zp->z_size, 8);
 	SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_FLAGS(zfsvfs), NULL,
 	    &zp->z_pflags, 8);
 
 	/*
 	 * Callers might not be able to detect properly that we are read-only,
 	 * so check it explicitly here.
 	 */
 	if (zfs_is_readonly(zfsvfs)) {
 		zfs_exit(zfsvfs, FTAG);
 		return (SET_ERROR(EROFS));
 	}
 
 	/*
 	 * If immutable or not appending then return EPERM.
 	 * Intentionally allow ZFS_READONLY through here.
 	 * See zfs_zaccess_common()
 	 */
 	if ((zp->z_pflags & ZFS_IMMUTABLE) ||
 	    ((zp->z_pflags & ZFS_APPENDONLY) && !(ioflag & O_APPEND) &&
 	    (zfs_uio_offset(uio) < zp->z_size))) {
 		zfs_exit(zfsvfs, FTAG);
 		return (SET_ERROR(EPERM));
 	}
 
 	/*
 	 * Validate file offset
 	 */
 	offset_t woff = ioflag & O_APPEND ? zp->z_size : zfs_uio_offset(uio);
 	if (woff < 0) {
 		zfs_exit(zfsvfs, FTAG);
 		return (SET_ERROR(EINVAL));
 	}
 
 	const uint64_t max_blksz = zfsvfs->z_max_blksz;
 
 	/*
 	 * Pre-fault the pages to ensure slow (eg NFS) pages
 	 * don't hold up txg.
 	 * Skip this if uio contains loaned arc_buf.
 	 */
 	if (zfs_uio_prefaultpages(MIN(n, max_blksz), uio)) {
 		zfs_exit(zfsvfs, FTAG);
 		return (SET_ERROR(EFAULT));
 	}
 
 	/*
 	 * If in append mode, set the io offset pointer to eof.
 	 */
 	zfs_locked_range_t *lr;
 	if (ioflag & O_APPEND) {
 		/*
 		 * Obtain an appending range lock to guarantee file append
 		 * semantics.  We reset the write offset once we have the lock.
 		 */
 		lr = zfs_rangelock_enter(&zp->z_rangelock, 0, n, RL_APPEND);
 		woff = lr->lr_offset;
 		if (lr->lr_length == UINT64_MAX) {
 			/*
 			 * We overlocked the file because this write will cause
 			 * the file block size to increase.
 			 * Note that zp_size cannot change with this lock held.
 			 */
 			woff = zp->z_size;
 		}
 		zfs_uio_setoffset(uio, woff);
 	} else {
 		/*
 		 * Note that if the file block size will change as a result of
 		 * this write, then this range lock will lock the entire file
 		 * so that we can re-write the block safely.
 		 */
 		lr = zfs_rangelock_enter(&zp->z_rangelock, woff, n, RL_WRITER);
 	}
 
 	if (zn_rlimit_fsize(zp, uio)) {
 		zfs_rangelock_exit(lr);
 		zfs_exit(zfsvfs, FTAG);
 		return (SET_ERROR(EFBIG));
 	}
 
 	const rlim64_t limit = MAXOFFSET_T;
 
 	if (woff >= limit) {
 		zfs_rangelock_exit(lr);
 		zfs_exit(zfsvfs, FTAG);
 		return (SET_ERROR(EFBIG));
 	}
 
 	if (n > limit - woff)
 		n = limit - woff;
 
 	uint64_t end_size = MAX(zp->z_size, woff + n);
 	zilog_t *zilog = zfsvfs->z_log;
 
 	const uint64_t uid = KUID_TO_SUID(ZTOUID(zp));
 	const uint64_t gid = KGID_TO_SGID(ZTOGID(zp));
 	const uint64_t projid = zp->z_projid;
 
 	/*
 	 * Write the file in reasonable size chunks.  Each chunk is written
 	 * in a separate transaction; this keeps the intent log records small
 	 * and allows us to do more fine-grained space accounting.
 	 */
 	while (n > 0) {
 		woff = zfs_uio_offset(uio);
 
 		if (zfs_id_overblockquota(zfsvfs, DMU_USERUSED_OBJECT, uid) ||
 		    zfs_id_overblockquota(zfsvfs, DMU_GROUPUSED_OBJECT, gid) ||
 		    (projid != ZFS_DEFAULT_PROJID &&
 		    zfs_id_overblockquota(zfsvfs, DMU_PROJECTUSED_OBJECT,
 		    projid))) {
 			error = SET_ERROR(EDQUOT);
 			break;
 		}
 
 		arc_buf_t *abuf = NULL;
 		if (n >= max_blksz && woff >= zp->z_size &&
 		    P2PHASE(woff, max_blksz) == 0 &&
 		    zp->z_blksz == max_blksz) {
 			/*
 			 * This write covers a full block.  "Borrow" a buffer
 			 * from the dmu so that we can fill it before we enter
 			 * a transaction.  This avoids the possibility of
 			 * holding up the transaction if the data copy hangs
 			 * up on a pagefault (e.g., from an NFS server mapping).
 			 */
 			size_t cbytes;
 
 			abuf = dmu_request_arcbuf(sa_get_db(zp->z_sa_hdl),
 			    max_blksz);
 			ASSERT(abuf != NULL);
 			ASSERT(arc_buf_size(abuf) == max_blksz);
 			if ((error = zfs_uiocopy(abuf->b_data, max_blksz,
 			    UIO_WRITE, uio, &cbytes))) {
 				dmu_return_arcbuf(abuf);
 				break;
 			}
 			ASSERT3S(cbytes, ==, max_blksz);
 		}
 
 		/*
 		 * Start a transaction.
 		 */
 		dmu_tx_t *tx = dmu_tx_create(zfsvfs->z_os);
 		dmu_tx_hold_sa(tx, zp->z_sa_hdl, B_FALSE);
 		dmu_buf_impl_t *db = (dmu_buf_impl_t *)sa_get_db(zp->z_sa_hdl);
 		DB_DNODE_ENTER(db);
 		dmu_tx_hold_write_by_dnode(tx, DB_DNODE(db), woff,
 		    MIN(n, max_blksz));
 		DB_DNODE_EXIT(db);
 		zfs_sa_upgrade_txholds(tx, zp);
 		error = dmu_tx_assign(tx, TXG_WAIT);
 		if (error) {
 			dmu_tx_abort(tx);
 			if (abuf != NULL)
 				dmu_return_arcbuf(abuf);
 			break;
 		}
 
 		/*
 		 * NB: We must call zfs_clear_setid_bits_if_necessary before
 		 * committing the transaction!
 		 */
 
 		/*
 		 * If rangelock_enter() over-locked we grow the blocksize
 		 * and then reduce the lock range.  This will only happen
 		 * on the first iteration since rangelock_reduce() will
 		 * shrink down lr_length to the appropriate size.
 		 */
 		if (lr->lr_length == UINT64_MAX) {
 			uint64_t new_blksz;
 
 			if (zp->z_blksz > max_blksz) {
 				/*
 				 * File's blocksize is already larger than the
 				 * "recordsize" property.  Only let it grow to
 				 * the next power of 2.
 				 */
 				ASSERT(!ISP2(zp->z_blksz));
 				new_blksz = MIN(end_size,
 				    1 << highbit64(zp->z_blksz));
 			} else {
 				new_blksz = MIN(end_size, max_blksz);
 			}
 			zfs_grow_blocksize(zp, new_blksz, tx);
 			zfs_rangelock_reduce(lr, woff, n);
 		}
 
 		/*
 		 * XXX - should we really limit each write to z_max_blksz?
 		 * Perhaps we should use SPA_MAXBLOCKSIZE chunks?
 		 */
 		const ssize_t nbytes =
 		    MIN(n, max_blksz - P2PHASE(woff, max_blksz));
 
 		ssize_t tx_bytes;
 		if (abuf == NULL) {
 			tx_bytes = zfs_uio_resid(uio);
 			zfs_uio_fault_disable(uio, B_TRUE);
 			error = dmu_write_uio_dbuf(sa_get_db(zp->z_sa_hdl),
 			    uio, nbytes, tx);
 			zfs_uio_fault_disable(uio, B_FALSE);
 #ifdef __linux__
 			if (error == EFAULT) {
 				zfs_clear_setid_bits_if_necessary(zfsvfs, zp,
 				    cr, &clear_setid_bits_txg, tx);
 				dmu_tx_commit(tx);
 				/*
 				 * Account for partial writes before
 				 * continuing the loop.
 				 * Update needs to occur before the next
 				 * zfs_uio_prefaultpages, or prefaultpages may
 				 * error, and we may break the loop early.
 				 */
 				if (tx_bytes != zfs_uio_resid(uio))
 					n -= tx_bytes - zfs_uio_resid(uio);
 				if (zfs_uio_prefaultpages(MIN(n, max_blksz),
 				    uio)) {
 					break;
 				}
 				continue;
 			}
 #endif
 			/*
 			 * On FreeBSD, EFAULT should be propagated back to the
 			 * VFS, which will handle faulting and will retry.
 			 */
 			if (error != 0 && error != EFAULT) {
 				zfs_clear_setid_bits_if_necessary(zfsvfs, zp,
 				    cr, &clear_setid_bits_txg, tx);
 				dmu_tx_commit(tx);
 				break;
 			}
 			tx_bytes -= zfs_uio_resid(uio);
 		} else {
 			/* Implied by abuf != NULL: */
 			ASSERT3S(n, >=, max_blksz);
 			ASSERT0(P2PHASE(woff, max_blksz));
 			/*
 			 * We can simplify nbytes to MIN(n, max_blksz) since
 			 * P2PHASE(woff, max_blksz) is 0, and knowing
 			 * n >= max_blksz lets us simplify further:
 			 */
 			ASSERT3S(nbytes, ==, max_blksz);
 			/*
 			 * Thus, we're writing a full block at a block-aligned
 			 * offset and extending the file past EOF.
 			 *
 			 * dmu_assign_arcbuf_by_dbuf() will directly assign the
 			 * arc buffer to a dbuf.
 			 */
 			error = dmu_assign_arcbuf_by_dbuf(
 			    sa_get_db(zp->z_sa_hdl), woff, abuf, tx);
 			if (error != 0) {
 				/*
 				 * XXX This might not be necessary if
 				 * dmu_assign_arcbuf_by_dbuf is guaranteed
 				 * to be atomic.
 				 */
 				zfs_clear_setid_bits_if_necessary(zfsvfs, zp,
 				    cr, &clear_setid_bits_txg, tx);
 				dmu_return_arcbuf(abuf);
 				dmu_tx_commit(tx);
 				break;
 			}
 			ASSERT3S(nbytes, <=, zfs_uio_resid(uio));
 			zfs_uioskip(uio, nbytes);
 			tx_bytes = nbytes;
 		}
 		if (tx_bytes && zn_has_cached_data(zp) &&
 		    !(ioflag & O_DIRECT)) {
 			update_pages(zp, woff, tx_bytes, zfsvfs->z_os);
 		}
 
 		/*
 		 * If we made no progress, we're done.  If we made even
 		 * partial progress, update the znode and ZIL accordingly.
 		 */
 		if (tx_bytes == 0) {
 			(void) sa_update(zp->z_sa_hdl, SA_ZPL_SIZE(zfsvfs),
 			    (void *)&zp->z_size, sizeof (uint64_t), tx);
 			dmu_tx_commit(tx);
 			ASSERT(error != 0);
 			break;
 		}
 
 		zfs_clear_setid_bits_if_necessary(zfsvfs, zp, cr,
 		    &clear_setid_bits_txg, tx);
 
 		zfs_tstamp_update_setup(zp, CONTENT_MODIFIED, mtime, ctime);
 
 		/*
 		 * Update the file size (zp_size) if it has changed;
 		 * account for possible concurrent updates.
 		 */
 		while ((end_size = zp->z_size) < zfs_uio_offset(uio)) {
 			(void) atomic_cas_64(&zp->z_size, end_size,
 			    zfs_uio_offset(uio));
 			ASSERT(error == 0 || error == EFAULT);
 		}
 		/*
 		 * If we are replaying and eof is non zero then force
 		 * the file size to the specified eof. Note, there's no
 		 * concurrency during replay.
 		 */
 		if (zfsvfs->z_replay && zfsvfs->z_replay_eof != 0)
 			zp->z_size = zfsvfs->z_replay_eof;
 
 		error1 = sa_bulk_update(zp->z_sa_hdl, bulk, count, tx);
 		if (error1 != 0)
 			/* Avoid clobbering EFAULT. */
 			error = error1;
 
 		/*
 		 * NB: During replay, the TX_SETATTR record logged by
 		 * zfs_clear_setid_bits_if_necessary must precede any of
 		 * the TX_WRITE records logged here.
 		 */
 		zfs_log_write(zilog, tx, TX_WRITE, zp, woff, tx_bytes, ioflag,
 		    NULL, NULL);
 
 		dmu_tx_commit(tx);
 
 		if (error != 0)
 			break;
 		ASSERT3S(tx_bytes, ==, nbytes);
 		n -= nbytes;
 
 		if (n > 0) {
 			if (zfs_uio_prefaultpages(MIN(n, max_blksz), uio)) {
 				error = SET_ERROR(EFAULT);
 				break;
 			}
 		}
 	}
 
 	zfs_znode_update_vfs(zp);
 	zfs_rangelock_exit(lr);
 
 	/*
 	 * If we're in replay mode, or we made no progress, or the
 	 * uio data is inaccessible return an error.  Otherwise, it's
 	 * at least a partial write, so it's successful.
 	 */
 	if (zfsvfs->z_replay || zfs_uio_resid(uio) == start_resid ||
 	    error == EFAULT) {
 		zfs_exit(zfsvfs, FTAG);
 		return (error);
 	}
 
 	if (ioflag & (O_SYNC | O_DSYNC) ||
 	    zfsvfs->z_os->os_sync == ZFS_SYNC_ALWAYS)
 		zil_commit(zilog, zp->z_id);
 
 	const int64_t nwritten = start_resid - zfs_uio_resid(uio);
 	dataset_kstats_update_write_kstats(&zfsvfs->z_kstat, nwritten);
 	task_io_account_write(nwritten);
 
 	zfs_exit(zfsvfs, FTAG);
 	return (0);
 }
 
 int
 zfs_getsecattr(znode_t *zp, vsecattr_t *vsecp, int flag, cred_t *cr)
 {
 	zfsvfs_t *zfsvfs = ZTOZSB(zp);
 	int error;
 	boolean_t skipaclchk = (flag & ATTR_NOACLCHECK) ? B_TRUE : B_FALSE;
 
 	if ((error = zfs_enter_verify_zp(zfsvfs, zp, FTAG)) != 0)
 		return (error);
 	error = zfs_getacl(zp, vsecp, skipaclchk, cr);
 	zfs_exit(zfsvfs, FTAG);
 
 	return (error);
 }
 
 int
 zfs_setsecattr(znode_t *zp, vsecattr_t *vsecp, int flag, cred_t *cr)
 {
 	zfsvfs_t *zfsvfs = ZTOZSB(zp);
 	int error;
 	boolean_t skipaclchk = (flag & ATTR_NOACLCHECK) ? B_TRUE : B_FALSE;
 	zilog_t	*zilog = zfsvfs->z_log;
 
 	if ((error = zfs_enter_verify_zp(zfsvfs, zp, FTAG)) != 0)
 		return (error);
 
 	error = zfs_setacl(zp, vsecp, skipaclchk, cr);
 
 	if (zfsvfs->z_os->os_sync == ZFS_SYNC_ALWAYS)
 		zil_commit(zilog, 0);
 
 	zfs_exit(zfsvfs, FTAG);
 	return (error);
 }
 
 #ifdef ZFS_DEBUG
 static int zil_fault_io = 0;
 #endif
 
 static void zfs_get_done(zgd_t *zgd, int error);
 
 /*
  * Get data to generate a TX_WRITE intent log record.
  */
 int
 zfs_get_data(void *arg, uint64_t gen, lr_write_t *lr, char *buf,
     struct lwb *lwb, zio_t *zio)
 {
 	zfsvfs_t *zfsvfs = arg;
 	objset_t *os = zfsvfs->z_os;
 	znode_t *zp;
 	uint64_t object = lr->lr_foid;
 	uint64_t offset = lr->lr_offset;
 	uint64_t size = lr->lr_length;
 	dmu_buf_t *db;
 	zgd_t *zgd;
 	int error = 0;
 	uint64_t zp_gen;
 
 	ASSERT3P(lwb, !=, NULL);
 	ASSERT3P(zio, !=, NULL);
 	ASSERT3U(size, !=, 0);
 
 	/*
 	 * Nothing to do if the file has been removed
 	 */
 	if (zfs_zget(zfsvfs, object, &zp) != 0)
 		return (SET_ERROR(ENOENT));
 	if (zp->z_unlinked) {
 		/*
 		 * Release the vnode asynchronously as we currently have the
 		 * txg stopped from syncing.
 		 */
 		zfs_zrele_async(zp);
 		return (SET_ERROR(ENOENT));
 	}
 	/* check if generation number matches */
 	if (sa_lookup(zp->z_sa_hdl, SA_ZPL_GEN(zfsvfs), &zp_gen,
 	    sizeof (zp_gen)) != 0) {
 		zfs_zrele_async(zp);
 		return (SET_ERROR(EIO));
 	}
 	if (zp_gen != gen) {
 		zfs_zrele_async(zp);
 		return (SET_ERROR(ENOENT));
 	}
 
-	zgd = (zgd_t *)kmem_zalloc(sizeof (zgd_t), KM_SLEEP);
+	zgd = kmem_zalloc(sizeof (zgd_t), KM_SLEEP);
 	zgd->zgd_lwb = lwb;
 	zgd->zgd_private = zp;
 
 	/*
 	 * Write records come in two flavors: immediate and indirect.
 	 * For small writes it's cheaper to store the data with the
 	 * log record (immediate); for large writes it's cheaper to
 	 * sync the data and get a pointer to it (indirect) so that
 	 * we don't have to write the data twice.
 	 */
 	if (buf != NULL) { /* immediate write */
 		zgd->zgd_lr = zfs_rangelock_enter(&zp->z_rangelock,
 		    offset, size, RL_READER);
 		/* test for truncation needs to be done while range locked */
 		if (offset >= zp->z_size) {
 			error = SET_ERROR(ENOENT);
 		} else {
 			error = dmu_read(os, object, offset, size, buf,
 			    DMU_READ_NO_PREFETCH);
 		}
 		ASSERT(error == 0 || error == ENOENT);
 	} else { /* indirect write */
 		/*
 		 * Have to lock the whole block to ensure when it's
 		 * written out and its checksum is being calculated
 		 * that no one can change the data. We need to re-check
 		 * blocksize after we get the lock in case it's changed!
 		 */
 		for (;;) {
 			uint64_t blkoff;
 			size = zp->z_blksz;
 			blkoff = ISP2(size) ? P2PHASE(offset, size) : offset;
 			offset -= blkoff;
 			zgd->zgd_lr = zfs_rangelock_enter(&zp->z_rangelock,
 			    offset, size, RL_READER);
 			if (zp->z_blksz == size)
 				break;
 			offset += blkoff;
 			zfs_rangelock_exit(zgd->zgd_lr);
 		}
 		/* test for truncation needs to be done while range locked */
 		if (lr->lr_offset >= zp->z_size)
 			error = SET_ERROR(ENOENT);
 #ifdef ZFS_DEBUG
 		if (zil_fault_io) {
 			error = SET_ERROR(EIO);
 			zil_fault_io = 0;
 		}
 #endif
 		if (error == 0)
 			error = dmu_buf_hold(os, object, offset, zgd, &db,
 			    DMU_READ_NO_PREFETCH);
 
 		if (error == 0) {
 			blkptr_t *bp = &lr->lr_blkptr;
 
 			zgd->zgd_db = db;
 			zgd->zgd_bp = bp;
 
 			ASSERT(db->db_offset == offset);
 			ASSERT(db->db_size == size);
 
 			error = dmu_sync(zio, lr->lr_common.lrc_txg,
 			    zfs_get_done, zgd);
 			ASSERT(error || lr->lr_length <= size);
 
 			/*
 			 * On success, we need to wait for the write I/O
 			 * initiated by dmu_sync() to complete before we can
 			 * release this dbuf.  We will finish everything up
 			 * in the zfs_get_done() callback.
 			 */
 			if (error == 0)
 				return (0);
 
 			if (error == EALREADY) {
 				lr->lr_common.lrc_txtype = TX_WRITE2;
 				/*
 				 * TX_WRITE2 relies on the data previously
 				 * written by the TX_WRITE that caused
 				 * EALREADY.  We zero out the BP because
 				 * it is the old, currently-on-disk BP.
 				 */
 				zgd->zgd_bp = NULL;
 				BP_ZERO(bp);
 				error = 0;
 			}
 		}
 	}
 
 	zfs_get_done(zgd, error);
 
 	return (error);
 }
 
 
 static void
 zfs_get_done(zgd_t *zgd, int error)
 {
 	(void) error;
 	znode_t *zp = zgd->zgd_private;
 
 	if (zgd->zgd_db)
 		dmu_buf_rele(zgd->zgd_db, zgd);
 
 	zfs_rangelock_exit(zgd->zgd_lr);
 
 	/*
 	 * Release the vnode asynchronously as we currently have the
 	 * txg stopped from syncing.
 	 */
 	zfs_zrele_async(zp);
 
 	kmem_free(zgd, sizeof (zgd_t));
 }
 
 EXPORT_SYMBOL(zfs_access);
 EXPORT_SYMBOL(zfs_fsync);
 EXPORT_SYMBOL(zfs_holey);
 EXPORT_SYMBOL(zfs_read);
 EXPORT_SYMBOL(zfs_write);
 EXPORT_SYMBOL(zfs_getsecattr);
 EXPORT_SYMBOL(zfs_setsecattr);
 
 ZFS_MODULE_PARAM(zfs_vnops, zfs_vnops_, read_chunk_size, U64, ZMOD_RW,
 	"Bytes to read per chunk");
diff --git a/module/zfs/zvol.c b/module/zfs/zvol.c
index 20578a8223b2..1371e5187516 100644
--- a/module/zfs/zvol.c
+++ b/module/zfs/zvol.c
@@ -1,1739 +1,1739 @@
 /*
  * CDDL HEADER START
  *
  * The contents of this file are subject to the terms of the
  * Common Development and Distribution License (the "License").
  * You may not use this file except in compliance with the License.
  *
  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
  * or https://opensource.org/licenses/CDDL-1.0.
  * See the License for the specific language governing permissions
  * and limitations under the License.
  *
  * When distributing Covered Code, include this CDDL HEADER in each
  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  * If applicable, add the following below this CDDL HEADER, with the
  * fields enclosed by brackets "[]" replaced with your own identifying
  * information: Portions Copyright [yyyy] [name of copyright owner]
  *
  * CDDL HEADER END
  */
 /*
  * Copyright (C) 2008-2010 Lawrence Livermore National Security, LLC.
  * Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER).
  * Rewritten for Linux by Brian Behlendorf <behlendorf1@llnl.gov>.
  * LLNL-CODE-403049.
  *
  * ZFS volume emulation driver.
  *
  * Makes a DMU object look like a volume of arbitrary size, up to 2^64 bytes.
  * Volumes are accessed through the symbolic links named:
  *
  * /dev/<pool_name>/<dataset_name>
  *
  * Volumes are persistent through reboot and module load.  No user command
  * needs to be run before opening and using a device.
  *
  * Copyright 2014 Nexenta Systems, Inc.  All rights reserved.
  * Copyright (c) 2016 Actifio, Inc. All rights reserved.
  * Copyright (c) 2012, 2019 by Delphix. All rights reserved.
  */
 
 /*
  * Note on locking of zvol state structures.
  *
  * These structures are used to maintain internal state used to emulate block
  * devices on top of zvols. In particular, management of device minor number
  * operations - create, remove, rename, and set_snapdev - involves access to
  * these structures. The zvol_state_lock is primarily used to protect the
  * zvol_state_list. The zv->zv_state_lock is used to protect the contents
  * of the zvol_state_t structures, as well as to make sure that when the
  * time comes to remove the structure from the list, it is not in use, and
  * therefore, it can be taken off zvol_state_list and freed.
  *
  * The zv_suspend_lock was introduced to allow for suspending I/O to a zvol,
  * e.g. for the duration of receive and rollback operations. This lock can be
  * held for significant periods of time. Given that it is undesirable to hold
  * mutexes for long periods of time, the following lock ordering applies:
  * - take zvol_state_lock if necessary, to protect zvol_state_list
  * - take zv_suspend_lock if necessary, by the code path in question
  * - take zv_state_lock to protect zvol_state_t
  *
  * The minor operations are issued to spa->spa_zvol_taskq queues, that are
  * single-threaded (to preserve order of minor operations), and are executed
  * through the zvol_task_cb that dispatches the specific operations. Therefore,
  * these operations are serialized per pool. Consequently, we can be certain
  * that for a given zvol, there is only one operation at a time in progress.
  * That is why one can be sure that first, zvol_state_t for a given zvol is
  * allocated and placed on zvol_state_list, and then other minor operations
  * for this zvol are going to proceed in the order of issue.
  *
  */
 
 #include <sys/dataset_kstats.h>
 #include <sys/dbuf.h>
 #include <sys/dmu_traverse.h>
 #include <sys/dsl_dataset.h>
 #include <sys/dsl_prop.h>
 #include <sys/dsl_dir.h>
 #include <sys/zap.h>
 #include <sys/zfeature.h>
 #include <sys/zil_impl.h>
 #include <sys/dmu_tx.h>
 #include <sys/zio.h>
 #include <sys/zfs_rlock.h>
 #include <sys/spa_impl.h>
 #include <sys/zvol.h>
 #include <sys/zvol_impl.h>
 
 unsigned int zvol_inhibit_dev = 0;
 unsigned int zvol_volmode = ZFS_VOLMODE_GEOM;
 
 struct hlist_head *zvol_htable;
 static list_t zvol_state_list;
 krwlock_t zvol_state_lock;
 
 typedef enum {
 	ZVOL_ASYNC_REMOVE_MINORS,
 	ZVOL_ASYNC_RENAME_MINORS,
 	ZVOL_ASYNC_SET_SNAPDEV,
 	ZVOL_ASYNC_SET_VOLMODE,
 	ZVOL_ASYNC_MAX
 } zvol_async_op_t;
 
 typedef struct {
 	zvol_async_op_t op;
 	char name1[MAXNAMELEN];
 	char name2[MAXNAMELEN];
 	uint64_t value;
 } zvol_task_t;
 
 uint64_t
 zvol_name_hash(const char *name)
 {
 	int i;
 	uint64_t crc = -1ULL;
 	const uint8_t *p = (const uint8_t *)name;
 	ASSERT(zfs_crc64_table[128] == ZFS_CRC64_POLY);
 	for (i = 0; i < MAXNAMELEN - 1 && *p; i++, p++) {
 		crc = (crc >> 8) ^ zfs_crc64_table[(crc ^ (*p)) & 0xFF];
 	}
 	return (crc);
 }
 
 /*
  * Find a zvol_state_t given the name and hash generated by zvol_name_hash.
  * If found, return with zv_suspend_lock and zv_state_lock taken, otherwise,
  * return (NULL) without the taking locks. The zv_suspend_lock is always taken
  * before zv_state_lock. The mode argument indicates the mode (including none)
  * for zv_suspend_lock to be taken.
  */
 zvol_state_t *
 zvol_find_by_name_hash(const char *name, uint64_t hash, int mode)
 {
 	zvol_state_t *zv;
 	struct hlist_node *p = NULL;
 
 	rw_enter(&zvol_state_lock, RW_READER);
 	hlist_for_each(p, ZVOL_HT_HEAD(hash)) {
 		zv = hlist_entry(p, zvol_state_t, zv_hlink);
 		mutex_enter(&zv->zv_state_lock);
 		if (zv->zv_hash == hash &&
 		    strncmp(zv->zv_name, name, MAXNAMELEN) == 0) {
 			/*
 			 * this is the right zvol, take the locks in the
 			 * right order
 			 */
 			if (mode != RW_NONE &&
 			    !rw_tryenter(&zv->zv_suspend_lock, mode)) {
 				mutex_exit(&zv->zv_state_lock);
 				rw_enter(&zv->zv_suspend_lock, mode);
 				mutex_enter(&zv->zv_state_lock);
 				/*
 				 * zvol cannot be renamed as we continue
 				 * to hold zvol_state_lock
 				 */
 				ASSERT(zv->zv_hash == hash &&
 				    strncmp(zv->zv_name, name, MAXNAMELEN)
 				    == 0);
 			}
 			rw_exit(&zvol_state_lock);
 			return (zv);
 		}
 		mutex_exit(&zv->zv_state_lock);
 	}
 	rw_exit(&zvol_state_lock);
 
 	return (NULL);
 }
 
 /*
  * Find a zvol_state_t given the name.
  * If found, return with zv_suspend_lock and zv_state_lock taken, otherwise,
  * return (NULL) without the taking locks. The zv_suspend_lock is always taken
  * before zv_state_lock. The mode argument indicates the mode (including none)
  * for zv_suspend_lock to be taken.
  */
 static zvol_state_t *
 zvol_find_by_name(const char *name, int mode)
 {
 	return (zvol_find_by_name_hash(name, zvol_name_hash(name), mode));
 }
 
 /*
  * ZFS_IOC_CREATE callback handles dmu zvol and zap object creation.
  */
 void
 zvol_create_cb(objset_t *os, void *arg, cred_t *cr, dmu_tx_t *tx)
 {
 	zfs_creat_t *zct = arg;
 	nvlist_t *nvprops = zct->zct_props;
 	int error;
 	uint64_t volblocksize, volsize;
 
 	VERIFY(nvlist_lookup_uint64(nvprops,
 	    zfs_prop_to_name(ZFS_PROP_VOLSIZE), &volsize) == 0);
 	if (nvlist_lookup_uint64(nvprops,
 	    zfs_prop_to_name(ZFS_PROP_VOLBLOCKSIZE), &volblocksize) != 0)
 		volblocksize = zfs_prop_default_numeric(ZFS_PROP_VOLBLOCKSIZE);
 
 	/*
 	 * These properties must be removed from the list so the generic
 	 * property setting step won't apply to them.
 	 */
 	VERIFY(nvlist_remove_all(nvprops,
 	    zfs_prop_to_name(ZFS_PROP_VOLSIZE)) == 0);
 	(void) nvlist_remove_all(nvprops,
 	    zfs_prop_to_name(ZFS_PROP_VOLBLOCKSIZE));
 
 	error = dmu_object_claim(os, ZVOL_OBJ, DMU_OT_ZVOL, volblocksize,
 	    DMU_OT_NONE, 0, tx);
 	ASSERT(error == 0);
 
 	error = zap_create_claim(os, ZVOL_ZAP_OBJ, DMU_OT_ZVOL_PROP,
 	    DMU_OT_NONE, 0, tx);
 	ASSERT(error == 0);
 
 	error = zap_update(os, ZVOL_ZAP_OBJ, "size", 8, 1, &volsize, tx);
 	ASSERT(error == 0);
 }
 
 /*
  * ZFS_IOC_OBJSET_STATS entry point.
  */
 int
 zvol_get_stats(objset_t *os, nvlist_t *nv)
 {
 	int error;
 	dmu_object_info_t *doi;
 	uint64_t val;
 
 	error = zap_lookup(os, ZVOL_ZAP_OBJ, "size", 8, 1, &val);
 	if (error)
 		return (SET_ERROR(error));
 
 	dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_VOLSIZE, val);
 	doi = kmem_alloc(sizeof (dmu_object_info_t), KM_SLEEP);
 	error = dmu_object_info(os, ZVOL_OBJ, doi);
 
 	if (error == 0) {
 		dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_VOLBLOCKSIZE,
 		    doi->doi_data_block_size);
 	}
 
 	kmem_free(doi, sizeof (dmu_object_info_t));
 
 	return (SET_ERROR(error));
 }
 
 /*
  * Sanity check volume size.
  */
 int
 zvol_check_volsize(uint64_t volsize, uint64_t blocksize)
 {
 	if (volsize == 0)
 		return (SET_ERROR(EINVAL));
 
 	if (volsize % blocksize != 0)
 		return (SET_ERROR(EINVAL));
 
 #ifdef _ILP32
 	if (volsize - 1 > SPEC_MAXOFFSET_T)
 		return (SET_ERROR(EOVERFLOW));
 #endif
 	return (0);
 }
 
 /*
  * Ensure the zap is flushed then inform the VFS of the capacity change.
  */
 static int
 zvol_update_volsize(uint64_t volsize, objset_t *os)
 {
 	dmu_tx_t *tx;
 	int error;
 	uint64_t txg;
 
 	tx = dmu_tx_create(os);
 	dmu_tx_hold_zap(tx, ZVOL_ZAP_OBJ, TRUE, NULL);
 	dmu_tx_mark_netfree(tx);
 	error = dmu_tx_assign(tx, TXG_WAIT);
 	if (error) {
 		dmu_tx_abort(tx);
 		return (SET_ERROR(error));
 	}
 	txg = dmu_tx_get_txg(tx);
 
 	error = zap_update(os, ZVOL_ZAP_OBJ, "size", 8, 1,
 	    &volsize, tx);
 	dmu_tx_commit(tx);
 
 	txg_wait_synced(dmu_objset_pool(os), txg);
 
 	if (error == 0)
 		error = dmu_free_long_range(os,
 		    ZVOL_OBJ, volsize, DMU_OBJECT_END);
 
 	return (error);
 }
 
 /*
  * Set ZFS_PROP_VOLSIZE set entry point.  Note that modifying the volume
  * size will result in a udev "change" event being generated.
  */
 int
 zvol_set_volsize(const char *name, uint64_t volsize)
 {
 	objset_t *os = NULL;
 	uint64_t readonly;
 	int error;
 	boolean_t owned = B_FALSE;
 
 	error = dsl_prop_get_integer(name,
 	    zfs_prop_to_name(ZFS_PROP_READONLY), &readonly, NULL);
 	if (error != 0)
 		return (SET_ERROR(error));
 	if (readonly)
 		return (SET_ERROR(EROFS));
 
 	zvol_state_t *zv = zvol_find_by_name(name, RW_READER);
 
 	ASSERT(zv == NULL || (MUTEX_HELD(&zv->zv_state_lock) &&
 	    RW_READ_HELD(&zv->zv_suspend_lock)));
 
 	if (zv == NULL || zv->zv_objset == NULL) {
 		if (zv != NULL)
 			rw_exit(&zv->zv_suspend_lock);
 		if ((error = dmu_objset_own(name, DMU_OST_ZVOL, B_FALSE, B_TRUE,
 		    FTAG, &os)) != 0) {
 			if (zv != NULL)
 				mutex_exit(&zv->zv_state_lock);
 			return (SET_ERROR(error));
 		}
 		owned = B_TRUE;
 		if (zv != NULL)
 			zv->zv_objset = os;
 	} else {
 		os = zv->zv_objset;
 	}
 
 	dmu_object_info_t *doi = kmem_alloc(sizeof (*doi), KM_SLEEP);
 
 	if ((error = dmu_object_info(os, ZVOL_OBJ, doi)) ||
 	    (error = zvol_check_volsize(volsize, doi->doi_data_block_size)))
 		goto out;
 
 	error = zvol_update_volsize(volsize, os);
 	if (error == 0 && zv != NULL) {
 		zv->zv_volsize = volsize;
 		zv->zv_changed = 1;
 	}
 out:
 	kmem_free(doi, sizeof (dmu_object_info_t));
 
 	if (owned) {
 		dmu_objset_disown(os, B_TRUE, FTAG);
 		if (zv != NULL)
 			zv->zv_objset = NULL;
 	} else {
 		rw_exit(&zv->zv_suspend_lock);
 	}
 
 	if (zv != NULL)
 		mutex_exit(&zv->zv_state_lock);
 
 	if (error == 0 && zv != NULL)
 		zvol_os_update_volsize(zv, volsize);
 
 	return (SET_ERROR(error));
 }
 
 /*
  * Sanity check volume block size.
  */
 int
 zvol_check_volblocksize(const char *name, uint64_t volblocksize)
 {
 	/* Record sizes above 128k need the feature to be enabled */
 	if (volblocksize > SPA_OLD_MAXBLOCKSIZE) {
 		spa_t *spa;
 		int error;
 
 		if ((error = spa_open(name, &spa, FTAG)) != 0)
 			return (error);
 
 		if (!spa_feature_is_enabled(spa, SPA_FEATURE_LARGE_BLOCKS)) {
 			spa_close(spa, FTAG);
 			return (SET_ERROR(ENOTSUP));
 		}
 
 		/*
 		 * We don't allow setting the property above 1MB,
 		 * unless the tunable has been changed.
 		 */
 		if (volblocksize > zfs_max_recordsize)
 			return (SET_ERROR(EDOM));
 
 		spa_close(spa, FTAG);
 	}
 
 	if (volblocksize < SPA_MINBLOCKSIZE ||
 	    volblocksize > SPA_MAXBLOCKSIZE ||
 	    !ISP2(volblocksize))
 		return (SET_ERROR(EDOM));
 
 	return (0);
 }
 
 /*
  * Replay a TX_TRUNCATE ZIL transaction if asked.  TX_TRUNCATE is how we
  * implement DKIOCFREE/free-long-range.
  */
 static int
 zvol_replay_truncate(void *arg1, void *arg2, boolean_t byteswap)
 {
 	zvol_state_t *zv = arg1;
 	lr_truncate_t *lr = arg2;
 	uint64_t offset, length;
 
 	if (byteswap)
 		byteswap_uint64_array(lr, sizeof (*lr));
 
 	offset = lr->lr_offset;
 	length = lr->lr_length;
 
 	dmu_tx_t *tx = dmu_tx_create(zv->zv_objset);
 	dmu_tx_mark_netfree(tx);
 	int error = dmu_tx_assign(tx, TXG_WAIT);
 	if (error != 0) {
 		dmu_tx_abort(tx);
 	} else {
 		(void) zil_replaying(zv->zv_zilog, tx);
 		dmu_tx_commit(tx);
 		error = dmu_free_long_range(zv->zv_objset, ZVOL_OBJ, offset,
 		    length);
 	}
 
 	return (error);
 }
 
 /*
  * Replay a TX_WRITE ZIL transaction that didn't get committed
  * after a system failure
  */
 static int
 zvol_replay_write(void *arg1, void *arg2, boolean_t byteswap)
 {
 	zvol_state_t *zv = arg1;
 	lr_write_t *lr = arg2;
 	objset_t *os = zv->zv_objset;
 	char *data = (char *)(lr + 1);  /* data follows lr_write_t */
 	uint64_t offset, length;
 	dmu_tx_t *tx;
 	int error;
 
 	if (byteswap)
 		byteswap_uint64_array(lr, sizeof (*lr));
 
 	offset = lr->lr_offset;
 	length = lr->lr_length;
 
 	/* If it's a dmu_sync() block, write the whole block */
 	if (lr->lr_common.lrc_reclen == sizeof (lr_write_t)) {
 		uint64_t blocksize = BP_GET_LSIZE(&lr->lr_blkptr);
 		if (length < blocksize) {
 			offset -= offset % blocksize;
 			length = blocksize;
 		}
 	}
 
 	tx = dmu_tx_create(os);
 	dmu_tx_hold_write(tx, ZVOL_OBJ, offset, length);
 	error = dmu_tx_assign(tx, TXG_WAIT);
 	if (error) {
 		dmu_tx_abort(tx);
 	} else {
 		dmu_write(os, ZVOL_OBJ, offset, length, data, tx);
 		(void) zil_replaying(zv->zv_zilog, tx);
 		dmu_tx_commit(tx);
 	}
 
 	return (error);
 }
 
 static int
 zvol_replay_err(void *arg1, void *arg2, boolean_t byteswap)
 {
 	(void) arg1, (void) arg2, (void) byteswap;
 	return (SET_ERROR(ENOTSUP));
 }
 
 /*
  * Callback vectors for replaying records.
  * Only TX_WRITE and TX_TRUNCATE are needed for zvol.
  */
 zil_replay_func_t *const zvol_replay_vector[TX_MAX_TYPE] = {
 	zvol_replay_err,	/* no such transaction type */
 	zvol_replay_err,	/* TX_CREATE */
 	zvol_replay_err,	/* TX_MKDIR */
 	zvol_replay_err,	/* TX_MKXATTR */
 	zvol_replay_err,	/* TX_SYMLINK */
 	zvol_replay_err,	/* TX_REMOVE */
 	zvol_replay_err,	/* TX_RMDIR */
 	zvol_replay_err,	/* TX_LINK */
 	zvol_replay_err,	/* TX_RENAME */
 	zvol_replay_write,	/* TX_WRITE */
 	zvol_replay_truncate,	/* TX_TRUNCATE */
 	zvol_replay_err,	/* TX_SETATTR */
 	zvol_replay_err,	/* TX_ACL */
 	zvol_replay_err,	/* TX_CREATE_ATTR */
 	zvol_replay_err,	/* TX_CREATE_ACL_ATTR */
 	zvol_replay_err,	/* TX_MKDIR_ACL */
 	zvol_replay_err,	/* TX_MKDIR_ATTR */
 	zvol_replay_err,	/* TX_MKDIR_ACL_ATTR */
 	zvol_replay_err,	/* TX_WRITE2 */
 	zvol_replay_err,	/* TX_SETSAXATTR */
 	zvol_replay_err,	/* TX_RENAME_EXCHANGE */
 	zvol_replay_err,	/* TX_RENAME_WHITEOUT */
 };
 
 /*
  * zvol_log_write() handles synchronous writes using TX_WRITE ZIL transactions.
  *
  * We store data in the log buffers if it's small enough.
  * Otherwise we will later flush the data out via dmu_sync().
  */
 static const ssize_t zvol_immediate_write_sz = 32768;
 
 void
 zvol_log_write(zvol_state_t *zv, dmu_tx_t *tx, uint64_t offset,
     uint64_t size, int sync)
 {
 	uint32_t blocksize = zv->zv_volblocksize;
 	zilog_t *zilog = zv->zv_zilog;
 	itx_wr_state_t write_state;
 	uint64_t sz = size;
 
 	if (zil_replaying(zilog, tx))
 		return;
 
 	if (zilog->zl_logbias == ZFS_LOGBIAS_THROUGHPUT)
 		write_state = WR_INDIRECT;
 	else if (!spa_has_slogs(zilog->zl_spa) &&
 	    size >= blocksize && blocksize > zvol_immediate_write_sz)
 		write_state = WR_INDIRECT;
 	else if (sync)
 		write_state = WR_COPIED;
 	else
 		write_state = WR_NEED_COPY;
 
 	while (size) {
 		itx_t *itx;
 		lr_write_t *lr;
 		itx_wr_state_t wr_state = write_state;
 		ssize_t len = size;
 
 		if (wr_state == WR_COPIED && size > zil_max_copied_data(zilog))
 			wr_state = WR_NEED_COPY;
 		else if (wr_state == WR_INDIRECT)
 			len = MIN(blocksize - P2PHASE(offset, blocksize), size);
 
 		itx = zil_itx_create(TX_WRITE, sizeof (*lr) +
 		    (wr_state == WR_COPIED ? len : 0));
 		lr = (lr_write_t *)&itx->itx_lr;
 		if (wr_state == WR_COPIED && dmu_read_by_dnode(zv->zv_dn,
 		    offset, len, lr+1, DMU_READ_NO_PREFETCH) != 0) {
 			zil_itx_destroy(itx);
 			itx = zil_itx_create(TX_WRITE, sizeof (*lr));
 			lr = (lr_write_t *)&itx->itx_lr;
 			wr_state = WR_NEED_COPY;
 		}
 
 		itx->itx_wr_state = wr_state;
 		lr->lr_foid = ZVOL_OBJ;
 		lr->lr_offset = offset;
 		lr->lr_length = len;
 		lr->lr_blkoff = 0;
 		BP_ZERO(&lr->lr_blkptr);
 
 		itx->itx_private = zv;
 		itx->itx_sync = sync;
 
 		(void) zil_itx_assign(zilog, itx, tx);
 
 		offset += len;
 		size -= len;
 	}
 
 	if (write_state == WR_COPIED || write_state == WR_NEED_COPY) {
 		dsl_pool_wrlog_count(zilog->zl_dmu_pool, sz, tx->tx_txg);
 	}
 }
 
 /*
  * Log a DKIOCFREE/free-long-range to the ZIL with TX_TRUNCATE.
  */
 void
 zvol_log_truncate(zvol_state_t *zv, dmu_tx_t *tx, uint64_t off, uint64_t len,
     boolean_t sync)
 {
 	itx_t *itx;
 	lr_truncate_t *lr;
 	zilog_t *zilog = zv->zv_zilog;
 
 	if (zil_replaying(zilog, tx))
 		return;
 
 	itx = zil_itx_create(TX_TRUNCATE, sizeof (*lr));
 	lr = (lr_truncate_t *)&itx->itx_lr;
 	lr->lr_foid = ZVOL_OBJ;
 	lr->lr_offset = off;
 	lr->lr_length = len;
 
 	itx->itx_sync = sync;
 	zil_itx_assign(zilog, itx, tx);
 }
 
 
 static void
 zvol_get_done(zgd_t *zgd, int error)
 {
 	(void) error;
 	if (zgd->zgd_db)
 		dmu_buf_rele(zgd->zgd_db, zgd);
 
 	zfs_rangelock_exit(zgd->zgd_lr);
 
 	kmem_free(zgd, sizeof (zgd_t));
 }
 
 /*
  * Get data to generate a TX_WRITE intent log record.
  */
 int
 zvol_get_data(void *arg, uint64_t arg2, lr_write_t *lr, char *buf,
     struct lwb *lwb, zio_t *zio)
 {
 	zvol_state_t *zv = arg;
 	uint64_t offset = lr->lr_offset;
 	uint64_t size = lr->lr_length;
 	dmu_buf_t *db;
 	zgd_t *zgd;
 	int error;
 
 	ASSERT3P(lwb, !=, NULL);
 	ASSERT3P(zio, !=, NULL);
 	ASSERT3U(size, !=, 0);
 
-	zgd = (zgd_t *)kmem_zalloc(sizeof (zgd_t), KM_SLEEP);
+	zgd = kmem_zalloc(sizeof (zgd_t), KM_SLEEP);
 	zgd->zgd_lwb = lwb;
 
 	/*
 	 * Write records come in two flavors: immediate and indirect.
 	 * For small writes it's cheaper to store the data with the
 	 * log record (immediate); for large writes it's cheaper to
 	 * sync the data and get a pointer to it (indirect) so that
 	 * we don't have to write the data twice.
 	 */
 	if (buf != NULL) { /* immediate write */
 		zgd->zgd_lr = zfs_rangelock_enter(&zv->zv_rangelock, offset,
 		    size, RL_READER);
 		error = dmu_read_by_dnode(zv->zv_dn, offset, size, buf,
 		    DMU_READ_NO_PREFETCH);
 	} else { /* indirect write */
 		/*
 		 * Have to lock the whole block to ensure when it's written out
 		 * and its checksum is being calculated that no one can change
 		 * the data. Contrarily to zfs_get_data we need not re-check
 		 * blocksize after we get the lock because it cannot be changed.
 		 */
 		size = zv->zv_volblocksize;
 		offset = P2ALIGN_TYPED(offset, size, uint64_t);
 		zgd->zgd_lr = zfs_rangelock_enter(&zv->zv_rangelock, offset,
 		    size, RL_READER);
 		error = dmu_buf_hold_by_dnode(zv->zv_dn, offset, zgd, &db,
 		    DMU_READ_NO_PREFETCH);
 		if (error == 0) {
 			blkptr_t *bp = &lr->lr_blkptr;
 
 			zgd->zgd_db = db;
 			zgd->zgd_bp = bp;
 
 			ASSERT(db != NULL);
 			ASSERT(db->db_offset == offset);
 			ASSERT(db->db_size == size);
 
 			error = dmu_sync(zio, lr->lr_common.lrc_txg,
 			    zvol_get_done, zgd);
 
 			if (error == 0)
 				return (0);
 		}
 	}
 
 	zvol_get_done(zgd, error);
 
 	return (SET_ERROR(error));
 }
 
 /*
  * The zvol_state_t's are inserted into zvol_state_list and zvol_htable.
  */
 
 void
 zvol_insert(zvol_state_t *zv)
 {
 	ASSERT(RW_WRITE_HELD(&zvol_state_lock));
 	list_insert_head(&zvol_state_list, zv);
 	hlist_add_head(&zv->zv_hlink, ZVOL_HT_HEAD(zv->zv_hash));
 }
 
 /*
  * Simply remove the zvol from to list of zvols.
  */
 static void
 zvol_remove(zvol_state_t *zv)
 {
 	ASSERT(RW_WRITE_HELD(&zvol_state_lock));
 	list_remove(&zvol_state_list, zv);
 	hlist_del(&zv->zv_hlink);
 }
 
 /*
  * Setup zv after we just own the zv->objset
  */
 static int
 zvol_setup_zv(zvol_state_t *zv)
 {
 	uint64_t volsize;
 	int error;
 	uint64_t ro;
 	objset_t *os = zv->zv_objset;
 
 	ASSERT(MUTEX_HELD(&zv->zv_state_lock));
 	ASSERT(RW_LOCK_HELD(&zv->zv_suspend_lock));
 
 	zv->zv_zilog = NULL;
 	zv->zv_flags &= ~ZVOL_WRITTEN_TO;
 
 	error = dsl_prop_get_integer(zv->zv_name, "readonly", &ro, NULL);
 	if (error)
 		return (SET_ERROR(error));
 
 	error = zap_lookup(os, ZVOL_ZAP_OBJ, "size", 8, 1, &volsize);
 	if (error)
 		return (SET_ERROR(error));
 
 	error = dnode_hold(os, ZVOL_OBJ, zv, &zv->zv_dn);
 	if (error)
 		return (SET_ERROR(error));
 
 	zvol_os_set_capacity(zv, volsize >> 9);
 	zv->zv_volsize = volsize;
 
 	if (ro || dmu_objset_is_snapshot(os) ||
 	    !spa_writeable(dmu_objset_spa(os))) {
 		zvol_os_set_disk_ro(zv, 1);
 		zv->zv_flags |= ZVOL_RDONLY;
 	} else {
 		zvol_os_set_disk_ro(zv, 0);
 		zv->zv_flags &= ~ZVOL_RDONLY;
 	}
 	return (0);
 }
 
 /*
  * Shutdown every zv_objset related stuff except zv_objset itself.
  * The is the reverse of zvol_setup_zv.
  */
 static void
 zvol_shutdown_zv(zvol_state_t *zv)
 {
 	ASSERT(MUTEX_HELD(&zv->zv_state_lock) &&
 	    RW_LOCK_HELD(&zv->zv_suspend_lock));
 
 	if (zv->zv_flags & ZVOL_WRITTEN_TO) {
 		ASSERT(zv->zv_zilog != NULL);
 		zil_close(zv->zv_zilog);
 	}
 
 	zv->zv_zilog = NULL;
 
 	dnode_rele(zv->zv_dn, zv);
 	zv->zv_dn = NULL;
 
 	/*
 	 * Evict cached data. We must write out any dirty data before
 	 * disowning the dataset.
 	 */
 	if (zv->zv_flags & ZVOL_WRITTEN_TO)
 		txg_wait_synced(dmu_objset_pool(zv->zv_objset), 0);
 	(void) dmu_objset_evict_dbufs(zv->zv_objset);
 }
 
 /*
  * return the proper tag for rollback and recv
  */
 void *
 zvol_tag(zvol_state_t *zv)
 {
 	ASSERT(RW_WRITE_HELD(&zv->zv_suspend_lock));
 	return (zv->zv_open_count > 0 ? zv : NULL);
 }
 
 /*
  * Suspend the zvol for recv and rollback.
  */
 zvol_state_t *
 zvol_suspend(const char *name)
 {
 	zvol_state_t *zv;
 
 	zv = zvol_find_by_name(name, RW_WRITER);
 
 	if (zv == NULL)
 		return (NULL);
 
 	/* block all I/O, release in zvol_resume. */
 	ASSERT(MUTEX_HELD(&zv->zv_state_lock));
 	ASSERT(RW_WRITE_HELD(&zv->zv_suspend_lock));
 
 	atomic_inc(&zv->zv_suspend_ref);
 
 	if (zv->zv_open_count > 0)
 		zvol_shutdown_zv(zv);
 
 	/*
 	 * do not hold zv_state_lock across suspend/resume to
 	 * avoid locking up zvol lookups
 	 */
 	mutex_exit(&zv->zv_state_lock);
 
 	/* zv_suspend_lock is released in zvol_resume() */
 	return (zv);
 }
 
 int
 zvol_resume(zvol_state_t *zv)
 {
 	int error = 0;
 
 	ASSERT(RW_WRITE_HELD(&zv->zv_suspend_lock));
 
 	mutex_enter(&zv->zv_state_lock);
 
 	if (zv->zv_open_count > 0) {
 		VERIFY0(dmu_objset_hold(zv->zv_name, zv, &zv->zv_objset));
 		VERIFY3P(zv->zv_objset->os_dsl_dataset->ds_owner, ==, zv);
 		VERIFY(dsl_dataset_long_held(zv->zv_objset->os_dsl_dataset));
 		dmu_objset_rele(zv->zv_objset, zv);
 
 		error = zvol_setup_zv(zv);
 	}
 
 	mutex_exit(&zv->zv_state_lock);
 
 	rw_exit(&zv->zv_suspend_lock);
 	/*
 	 * We need this because we don't hold zvol_state_lock while releasing
 	 * zv_suspend_lock. zvol_remove_minors_impl thus cannot check
 	 * zv_suspend_lock to determine it is safe to free because rwlock is
 	 * not inherent atomic.
 	 */
 	atomic_dec(&zv->zv_suspend_ref);
 
 	return (SET_ERROR(error));
 }
 
 int
 zvol_first_open(zvol_state_t *zv, boolean_t readonly)
 {
 	objset_t *os;
 	int error;
 
 	ASSERT(RW_READ_HELD(&zv->zv_suspend_lock));
 	ASSERT(MUTEX_HELD(&zv->zv_state_lock));
 	ASSERT(mutex_owned(&spa_namespace_lock));
 
 	boolean_t ro = (readonly || (strchr(zv->zv_name, '@') != NULL));
 	error = dmu_objset_own(zv->zv_name, DMU_OST_ZVOL, ro, B_TRUE, zv, &os);
 	if (error)
 		return (SET_ERROR(error));
 
 	zv->zv_objset = os;
 
 	error = zvol_setup_zv(zv);
 	if (error) {
 		dmu_objset_disown(os, 1, zv);
 		zv->zv_objset = NULL;
 	}
 
 	return (error);
 }
 
 void
 zvol_last_close(zvol_state_t *zv)
 {
 	ASSERT(RW_READ_HELD(&zv->zv_suspend_lock));
 	ASSERT(MUTEX_HELD(&zv->zv_state_lock));
 
 	zvol_shutdown_zv(zv);
 
 	dmu_objset_disown(zv->zv_objset, 1, zv);
 	zv->zv_objset = NULL;
 }
 
 typedef struct minors_job {
 	list_t *list;
 	list_node_t link;
 	/* input */
 	char *name;
 	/* output */
 	int error;
 } minors_job_t;
 
 /*
  * Prefetch zvol dnodes for the minors_job
  */
 static void
 zvol_prefetch_minors_impl(void *arg)
 {
 	minors_job_t *job = arg;
 	char *dsname = job->name;
 	objset_t *os = NULL;
 
 	job->error = dmu_objset_own(dsname, DMU_OST_ZVOL, B_TRUE, B_TRUE,
 	    FTAG, &os);
 	if (job->error == 0) {
 		dmu_prefetch(os, ZVOL_OBJ, 0, 0, 0, ZIO_PRIORITY_SYNC_READ);
 		dmu_objset_disown(os, B_TRUE, FTAG);
 	}
 }
 
 /*
  * Mask errors to continue dmu_objset_find() traversal
  */
 static int
 zvol_create_snap_minor_cb(const char *dsname, void *arg)
 {
 	minors_job_t *j = arg;
 	list_t *minors_list = j->list;
 	const char *name = j->name;
 
 	ASSERT0(MUTEX_HELD(&spa_namespace_lock));
 
 	/* skip the designated dataset */
 	if (name && strcmp(dsname, name) == 0)
 		return (0);
 
 	/* at this point, the dsname should name a snapshot */
 	if (strchr(dsname, '@') == 0) {
 		dprintf("zvol_create_snap_minor_cb(): "
 		    "%s is not a snapshot name\n", dsname);
 	} else {
 		minors_job_t *job;
 		char *n = kmem_strdup(dsname);
 		if (n == NULL)
 			return (0);
 
 		job = kmem_alloc(sizeof (minors_job_t), KM_SLEEP);
 		job->name = n;
 		job->list = minors_list;
 		job->error = 0;
 		list_insert_tail(minors_list, job);
 		/* don't care if dispatch fails, because job->error is 0 */
 		taskq_dispatch(system_taskq, zvol_prefetch_minors_impl, job,
 		    TQ_SLEEP);
 	}
 
 	return (0);
 }
 
 /*
  * If spa_keystore_load_wkey() is called for an encrypted zvol,
  * we need to look for any clones also using the key. This function
  * is "best effort" - so we just skip over it if there are failures.
  */
 static void
 zvol_add_clones(const char *dsname, list_t *minors_list)
 {
 	/* Also check if it has clones */
 	dsl_dir_t *dd = NULL;
 	dsl_pool_t *dp = NULL;
 
 	if (dsl_pool_hold(dsname, FTAG, &dp) != 0)
 		return;
 
 	if (!spa_feature_is_enabled(dp->dp_spa,
 	    SPA_FEATURE_ENCRYPTION))
 		goto out;
 
 	if (dsl_dir_hold(dp, dsname, FTAG, &dd, NULL) != 0)
 		goto out;
 
 	if (dsl_dir_phys(dd)->dd_clones == 0)
 		goto out;
 
 	zap_cursor_t *zc = kmem_alloc(sizeof (zap_cursor_t), KM_SLEEP);
 	zap_attribute_t *za = kmem_alloc(sizeof (zap_attribute_t), KM_SLEEP);
 	objset_t *mos = dd->dd_pool->dp_meta_objset;
 
 	for (zap_cursor_init(zc, mos, dsl_dir_phys(dd)->dd_clones);
 	    zap_cursor_retrieve(zc, za) == 0;
 	    zap_cursor_advance(zc)) {
 		dsl_dataset_t *clone;
 		minors_job_t *job;
 
 		if (dsl_dataset_hold_obj(dd->dd_pool,
 		    za->za_first_integer, FTAG, &clone) == 0) {
 
 			char name[ZFS_MAX_DATASET_NAME_LEN];
 			dsl_dataset_name(clone, name);
 
 			char *n = kmem_strdup(name);
 			job = kmem_alloc(sizeof (minors_job_t), KM_SLEEP);
 			job->name = n;
 			job->list = minors_list;
 			job->error = 0;
 			list_insert_tail(minors_list, job);
 
 			dsl_dataset_rele(clone, FTAG);
 		}
 	}
 	zap_cursor_fini(zc);
 	kmem_free(za, sizeof (zap_attribute_t));
 	kmem_free(zc, sizeof (zap_cursor_t));
 
 out:
 	if (dd != NULL)
 		dsl_dir_rele(dd, FTAG);
 	dsl_pool_rele(dp, FTAG);
 }
 
 /*
  * Mask errors to continue dmu_objset_find() traversal
  */
 static int
 zvol_create_minors_cb(const char *dsname, void *arg)
 {
 	uint64_t snapdev;
 	int error;
 	list_t *minors_list = arg;
 
 	ASSERT0(MUTEX_HELD(&spa_namespace_lock));
 
 	error = dsl_prop_get_integer(dsname, "snapdev", &snapdev, NULL);
 	if (error)
 		return (0);
 
 	/*
 	 * Given the name and the 'snapdev' property, create device minor nodes
 	 * with the linkages to zvols/snapshots as needed.
 	 * If the name represents a zvol, create a minor node for the zvol, then
 	 * check if its snapshots are 'visible', and if so, iterate over the
 	 * snapshots and create device minor nodes for those.
 	 */
 	if (strchr(dsname, '@') == 0) {
 		minors_job_t *job;
 		char *n = kmem_strdup(dsname);
 		if (n == NULL)
 			return (0);
 
 		job = kmem_alloc(sizeof (minors_job_t), KM_SLEEP);
 		job->name = n;
 		job->list = minors_list;
 		job->error = 0;
 		list_insert_tail(minors_list, job);
 		/* don't care if dispatch fails, because job->error is 0 */
 		taskq_dispatch(system_taskq, zvol_prefetch_minors_impl, job,
 		    TQ_SLEEP);
 
 		zvol_add_clones(dsname, minors_list);
 
 		if (snapdev == ZFS_SNAPDEV_VISIBLE) {
 			/*
 			 * traverse snapshots only, do not traverse children,
 			 * and skip the 'dsname'
 			 */
 			error = dmu_objset_find(dsname,
 			    zvol_create_snap_minor_cb, (void *)job,
 			    DS_FIND_SNAPSHOTS);
 		}
 	} else {
 		dprintf("zvol_create_minors_cb(): %s is not a zvol name\n",
 		    dsname);
 	}
 
 	return (0);
 }
 
 /*
  * Create minors for the specified dataset, including children and snapshots.
  * Pay attention to the 'snapdev' property and iterate over the snapshots
  * only if they are 'visible'. This approach allows one to assure that the
  * snapshot metadata is read from disk only if it is needed.
  *
  * The name can represent a dataset to be recursively scanned for zvols and
  * their snapshots, or a single zvol snapshot. If the name represents a
  * dataset, the scan is performed in two nested stages:
  * - scan the dataset for zvols, and
  * - for each zvol, create a minor node, then check if the zvol's snapshots
  *   are 'visible', and only then iterate over the snapshots if needed
  *
  * If the name represents a snapshot, a check is performed if the snapshot is
  * 'visible' (which also verifies that the parent is a zvol), and if so,
  * a minor node for that snapshot is created.
  */
 void
 zvol_create_minors_recursive(const char *name)
 {
 	list_t minors_list;
 	minors_job_t *job;
 
 	if (zvol_inhibit_dev)
 		return;
 
 	/*
 	 * This is the list for prefetch jobs. Whenever we found a match
 	 * during dmu_objset_find, we insert a minors_job to the list and do
 	 * taskq_dispatch to parallel prefetch zvol dnodes. Note we don't need
 	 * any lock because all list operation is done on the current thread.
 	 *
 	 * We will use this list to do zvol_os_create_minor after prefetch
 	 * so we don't have to traverse using dmu_objset_find again.
 	 */
 	list_create(&minors_list, sizeof (minors_job_t),
 	    offsetof(minors_job_t, link));
 
 
 	if (strchr(name, '@') != NULL) {
 		uint64_t snapdev;
 
 		int error = dsl_prop_get_integer(name, "snapdev",
 		    &snapdev, NULL);
 
 		if (error == 0 && snapdev == ZFS_SNAPDEV_VISIBLE)
 			(void) zvol_os_create_minor(name);
 	} else {
 		fstrans_cookie_t cookie = spl_fstrans_mark();
 		(void) dmu_objset_find(name, zvol_create_minors_cb,
 		    &minors_list, DS_FIND_CHILDREN);
 		spl_fstrans_unmark(cookie);
 	}
 
 	taskq_wait_outstanding(system_taskq, 0);
 
 	/*
 	 * Prefetch is completed, we can do zvol_os_create_minor
 	 * sequentially.
 	 */
 	while ((job = list_head(&minors_list)) != NULL) {
 		list_remove(&minors_list, job);
 		if (!job->error)
 			(void) zvol_os_create_minor(job->name);
 		kmem_strfree(job->name);
 		kmem_free(job, sizeof (minors_job_t));
 	}
 
 	list_destroy(&minors_list);
 }
 
 void
 zvol_create_minor(const char *name)
 {
 	/*
 	 * Note: the dsl_pool_config_lock must not be held.
 	 * Minor node creation needs to obtain the zvol_state_lock.
 	 * zvol_open() obtains the zvol_state_lock and then the dsl pool
 	 * config lock.  Therefore, we can't have the config lock now if
 	 * we are going to wait for the zvol_state_lock, because it
 	 * would be a lock order inversion which could lead to deadlock.
 	 */
 
 	if (zvol_inhibit_dev)
 		return;
 
 	if (strchr(name, '@') != NULL) {
 		uint64_t snapdev;
 
 		int error = dsl_prop_get_integer(name,
 		    "snapdev", &snapdev, NULL);
 
 		if (error == 0 && snapdev == ZFS_SNAPDEV_VISIBLE)
 			(void) zvol_os_create_minor(name);
 	} else {
 		(void) zvol_os_create_minor(name);
 	}
 }
 
 /*
  * Remove minors for specified dataset including children and snapshots.
  */
 
 static void
 zvol_free_task(void *arg)
 {
 	zvol_os_free(arg);
 }
 
 void
 zvol_remove_minors_impl(const char *name)
 {
 	zvol_state_t *zv, *zv_next;
 	int namelen = ((name) ? strlen(name) : 0);
 	taskqid_t t;
 	list_t free_list;
 
 	if (zvol_inhibit_dev)
 		return;
 
 	list_create(&free_list, sizeof (zvol_state_t),
 	    offsetof(zvol_state_t, zv_next));
 
 	rw_enter(&zvol_state_lock, RW_WRITER);
 
 	for (zv = list_head(&zvol_state_list); zv != NULL; zv = zv_next) {
 		zv_next = list_next(&zvol_state_list, zv);
 
 		mutex_enter(&zv->zv_state_lock);
 		if (name == NULL || strcmp(zv->zv_name, name) == 0 ||
 		    (strncmp(zv->zv_name, name, namelen) == 0 &&
 		    (zv->zv_name[namelen] == '/' ||
 		    zv->zv_name[namelen] == '@'))) {
 			/*
 			 * By holding zv_state_lock here, we guarantee that no
 			 * one is currently using this zv
 			 */
 
 			/* If in use, leave alone */
 			if (zv->zv_open_count > 0 ||
 			    atomic_read(&zv->zv_suspend_ref)) {
 				mutex_exit(&zv->zv_state_lock);
 				continue;
 			}
 
 			zvol_remove(zv);
 
 			/*
 			 * Cleared while holding zvol_state_lock as a writer
 			 * which will prevent zvol_open() from opening it.
 			 */
 			zvol_os_clear_private(zv);
 
 			/* Drop zv_state_lock before zvol_free() */
 			mutex_exit(&zv->zv_state_lock);
 
 			/* Try parallel zv_free, if failed do it in place */
 			t = taskq_dispatch(system_taskq, zvol_free_task, zv,
 			    TQ_SLEEP);
 			if (t == TASKQID_INVALID)
 				list_insert_head(&free_list, zv);
 		} else {
 			mutex_exit(&zv->zv_state_lock);
 		}
 	}
 	rw_exit(&zvol_state_lock);
 
 	/* Drop zvol_state_lock before calling zvol_free() */
 	while ((zv = list_head(&free_list)) != NULL) {
 		list_remove(&free_list, zv);
 		zvol_os_free(zv);
 	}
 }
 
 /* Remove minor for this specific volume only */
 static void
 zvol_remove_minor_impl(const char *name)
 {
 	zvol_state_t *zv = NULL, *zv_next;
 
 	if (zvol_inhibit_dev)
 		return;
 
 	rw_enter(&zvol_state_lock, RW_WRITER);
 
 	for (zv = list_head(&zvol_state_list); zv != NULL; zv = zv_next) {
 		zv_next = list_next(&zvol_state_list, zv);
 
 		mutex_enter(&zv->zv_state_lock);
 		if (strcmp(zv->zv_name, name) == 0) {
 			/*
 			 * By holding zv_state_lock here, we guarantee that no
 			 * one is currently using this zv
 			 */
 
 			/* If in use, leave alone */
 			if (zv->zv_open_count > 0 ||
 			    atomic_read(&zv->zv_suspend_ref)) {
 				mutex_exit(&zv->zv_state_lock);
 				continue;
 			}
 			zvol_remove(zv);
 
 			zvol_os_clear_private(zv);
 			mutex_exit(&zv->zv_state_lock);
 			break;
 		} else {
 			mutex_exit(&zv->zv_state_lock);
 		}
 	}
 
 	/* Drop zvol_state_lock before calling zvol_free() */
 	rw_exit(&zvol_state_lock);
 
 	if (zv != NULL)
 		zvol_os_free(zv);
 }
 
 /*
  * Rename minors for specified dataset including children and snapshots.
  */
 static void
 zvol_rename_minors_impl(const char *oldname, const char *newname)
 {
 	zvol_state_t *zv, *zv_next;
 	int oldnamelen;
 
 	if (zvol_inhibit_dev)
 		return;
 
 	oldnamelen = strlen(oldname);
 
 	rw_enter(&zvol_state_lock, RW_READER);
 
 	for (zv = list_head(&zvol_state_list); zv != NULL; zv = zv_next) {
 		zv_next = list_next(&zvol_state_list, zv);
 
 		mutex_enter(&zv->zv_state_lock);
 
 		if (strcmp(zv->zv_name, oldname) == 0) {
 			zvol_os_rename_minor(zv, newname);
 		} else if (strncmp(zv->zv_name, oldname, oldnamelen) == 0 &&
 		    (zv->zv_name[oldnamelen] == '/' ||
 		    zv->zv_name[oldnamelen] == '@')) {
 			char *name = kmem_asprintf("%s%c%s", newname,
 			    zv->zv_name[oldnamelen],
 			    zv->zv_name + oldnamelen + 1);
 			zvol_os_rename_minor(zv, name);
 			kmem_strfree(name);
 		}
 
 		mutex_exit(&zv->zv_state_lock);
 	}
 
 	rw_exit(&zvol_state_lock);
 }
 
 typedef struct zvol_snapdev_cb_arg {
 	uint64_t snapdev;
 } zvol_snapdev_cb_arg_t;
 
 static int
 zvol_set_snapdev_cb(const char *dsname, void *param)
 {
 	zvol_snapdev_cb_arg_t *arg = param;
 
 	if (strchr(dsname, '@') == NULL)
 		return (0);
 
 	switch (arg->snapdev) {
 		case ZFS_SNAPDEV_VISIBLE:
 			(void) zvol_os_create_minor(dsname);
 			break;
 		case ZFS_SNAPDEV_HIDDEN:
 			(void) zvol_remove_minor_impl(dsname);
 			break;
 	}
 
 	return (0);
 }
 
 static void
 zvol_set_snapdev_impl(char *name, uint64_t snapdev)
 {
 	zvol_snapdev_cb_arg_t arg = {snapdev};
 	fstrans_cookie_t cookie = spl_fstrans_mark();
 	/*
 	 * The zvol_set_snapdev_sync() sets snapdev appropriately
 	 * in the dataset hierarchy. Here, we only scan snapshots.
 	 */
 	dmu_objset_find(name, zvol_set_snapdev_cb, &arg, DS_FIND_SNAPSHOTS);
 	spl_fstrans_unmark(cookie);
 }
 
 static void
 zvol_set_volmode_impl(char *name, uint64_t volmode)
 {
 	fstrans_cookie_t cookie;
 	uint64_t old_volmode;
 	zvol_state_t *zv;
 
 	if (strchr(name, '@') != NULL)
 		return;
 
 	/*
 	 * It's unfortunate we need to remove minors before we create new ones:
 	 * this is necessary because our backing gendisk (zvol_state->zv_disk)
 	 * could be different when we set, for instance, volmode from "geom"
 	 * to "dev" (or vice versa).
 	 */
 	zv = zvol_find_by_name(name, RW_NONE);
 	if (zv == NULL && volmode == ZFS_VOLMODE_NONE)
 			return;
 	if (zv != NULL) {
 		old_volmode = zv->zv_volmode;
 		mutex_exit(&zv->zv_state_lock);
 		if (old_volmode == volmode)
 			return;
 		zvol_wait_close(zv);
 	}
 	cookie = spl_fstrans_mark();
 	switch (volmode) {
 		case ZFS_VOLMODE_NONE:
 			(void) zvol_remove_minor_impl(name);
 			break;
 		case ZFS_VOLMODE_GEOM:
 		case ZFS_VOLMODE_DEV:
 			(void) zvol_remove_minor_impl(name);
 			(void) zvol_os_create_minor(name);
 			break;
 		case ZFS_VOLMODE_DEFAULT:
 			(void) zvol_remove_minor_impl(name);
 			if (zvol_volmode == ZFS_VOLMODE_NONE)
 				break;
 			else /* if zvol_volmode is invalid defaults to "geom" */
 				(void) zvol_os_create_minor(name);
 			break;
 	}
 	spl_fstrans_unmark(cookie);
 }
 
 static zvol_task_t *
 zvol_task_alloc(zvol_async_op_t op, const char *name1, const char *name2,
     uint64_t value)
 {
 	zvol_task_t *task;
 
 	/* Never allow tasks on hidden names. */
 	if (name1[0] == '$')
 		return (NULL);
 
 	task = kmem_zalloc(sizeof (zvol_task_t), KM_SLEEP);
 	task->op = op;
 	task->value = value;
 
 	strlcpy(task->name1, name1, MAXNAMELEN);
 	if (name2 != NULL)
 		strlcpy(task->name2, name2, MAXNAMELEN);
 
 	return (task);
 }
 
 static void
 zvol_task_free(zvol_task_t *task)
 {
 	kmem_free(task, sizeof (zvol_task_t));
 }
 
 /*
  * The worker thread function performed asynchronously.
  */
 static void
 zvol_task_cb(void *arg)
 {
 	zvol_task_t *task = arg;
 
 	switch (task->op) {
 	case ZVOL_ASYNC_REMOVE_MINORS:
 		zvol_remove_minors_impl(task->name1);
 		break;
 	case ZVOL_ASYNC_RENAME_MINORS:
 		zvol_rename_minors_impl(task->name1, task->name2);
 		break;
 	case ZVOL_ASYNC_SET_SNAPDEV:
 		zvol_set_snapdev_impl(task->name1, task->value);
 		break;
 	case ZVOL_ASYNC_SET_VOLMODE:
 		zvol_set_volmode_impl(task->name1, task->value);
 		break;
 	default:
 		VERIFY(0);
 		break;
 	}
 
 	zvol_task_free(task);
 }
 
 typedef struct zvol_set_prop_int_arg {
 	const char *zsda_name;
 	uint64_t zsda_value;
 	zprop_source_t zsda_source;
 	dmu_tx_t *zsda_tx;
 } zvol_set_prop_int_arg_t;
 
 /*
  * Sanity check the dataset for safe use by the sync task.  No additional
  * conditions are imposed.
  */
 static int
 zvol_set_snapdev_check(void *arg, dmu_tx_t *tx)
 {
 	zvol_set_prop_int_arg_t *zsda = arg;
 	dsl_pool_t *dp = dmu_tx_pool(tx);
 	dsl_dir_t *dd;
 	int error;
 
 	error = dsl_dir_hold(dp, zsda->zsda_name, FTAG, &dd, NULL);
 	if (error != 0)
 		return (error);
 
 	dsl_dir_rele(dd, FTAG);
 
 	return (error);
 }
 
 static int
 zvol_set_snapdev_sync_cb(dsl_pool_t *dp, dsl_dataset_t *ds, void *arg)
 {
 	(void) arg;
 	char dsname[MAXNAMELEN];
 	zvol_task_t *task;
 	uint64_t snapdev;
 
 	dsl_dataset_name(ds, dsname);
 	if (dsl_prop_get_int_ds(ds, "snapdev", &snapdev) != 0)
 		return (0);
 	task = zvol_task_alloc(ZVOL_ASYNC_SET_SNAPDEV, dsname, NULL, snapdev);
 	if (task == NULL)
 		return (0);
 
 	(void) taskq_dispatch(dp->dp_spa->spa_zvol_taskq, zvol_task_cb,
 	    task, TQ_SLEEP);
 	return (0);
 }
 
 /*
  * Traverse all child datasets and apply snapdev appropriately.
  * We call dsl_prop_set_sync_impl() here to set the value only on the toplevel
  * dataset and read the effective "snapdev" on every child in the callback
  * function: this is because the value is not guaranteed to be the same in the
  * whole dataset hierarchy.
  */
 static void
 zvol_set_snapdev_sync(void *arg, dmu_tx_t *tx)
 {
 	zvol_set_prop_int_arg_t *zsda = arg;
 	dsl_pool_t *dp = dmu_tx_pool(tx);
 	dsl_dir_t *dd;
 	dsl_dataset_t *ds;
 	int error;
 
 	VERIFY0(dsl_dir_hold(dp, zsda->zsda_name, FTAG, &dd, NULL));
 	zsda->zsda_tx = tx;
 
 	error = dsl_dataset_hold(dp, zsda->zsda_name, FTAG, &ds);
 	if (error == 0) {
 		dsl_prop_set_sync_impl(ds, zfs_prop_to_name(ZFS_PROP_SNAPDEV),
 		    zsda->zsda_source, sizeof (zsda->zsda_value), 1,
 		    &zsda->zsda_value, zsda->zsda_tx);
 		dsl_dataset_rele(ds, FTAG);
 	}
 	dmu_objset_find_dp(dp, dd->dd_object, zvol_set_snapdev_sync_cb,
 	    zsda, DS_FIND_CHILDREN);
 
 	dsl_dir_rele(dd, FTAG);
 }
 
 int
 zvol_set_snapdev(const char *ddname, zprop_source_t source, uint64_t snapdev)
 {
 	zvol_set_prop_int_arg_t zsda;
 
 	zsda.zsda_name = ddname;
 	zsda.zsda_source = source;
 	zsda.zsda_value = snapdev;
 
 	return (dsl_sync_task(ddname, zvol_set_snapdev_check,
 	    zvol_set_snapdev_sync, &zsda, 0, ZFS_SPACE_CHECK_NONE));
 }
 
 /*
  * Sanity check the dataset for safe use by the sync task.  No additional
  * conditions are imposed.
  */
 static int
 zvol_set_volmode_check(void *arg, dmu_tx_t *tx)
 {
 	zvol_set_prop_int_arg_t *zsda = arg;
 	dsl_pool_t *dp = dmu_tx_pool(tx);
 	dsl_dir_t *dd;
 	int error;
 
 	error = dsl_dir_hold(dp, zsda->zsda_name, FTAG, &dd, NULL);
 	if (error != 0)
 		return (error);
 
 	dsl_dir_rele(dd, FTAG);
 
 	return (error);
 }
 
 static int
 zvol_set_volmode_sync_cb(dsl_pool_t *dp, dsl_dataset_t *ds, void *arg)
 {
 	(void) arg;
 	char dsname[MAXNAMELEN];
 	zvol_task_t *task;
 	uint64_t volmode;
 
 	dsl_dataset_name(ds, dsname);
 	if (dsl_prop_get_int_ds(ds, "volmode", &volmode) != 0)
 		return (0);
 	task = zvol_task_alloc(ZVOL_ASYNC_SET_VOLMODE, dsname, NULL, volmode);
 	if (task == NULL)
 		return (0);
 
 	(void) taskq_dispatch(dp->dp_spa->spa_zvol_taskq, zvol_task_cb,
 	    task, TQ_SLEEP);
 	return (0);
 }
 
 /*
  * Traverse all child datasets and apply volmode appropriately.
  * We call dsl_prop_set_sync_impl() here to set the value only on the toplevel
  * dataset and read the effective "volmode" on every child in the callback
  * function: this is because the value is not guaranteed to be the same in the
  * whole dataset hierarchy.
  */
 static void
 zvol_set_volmode_sync(void *arg, dmu_tx_t *tx)
 {
 	zvol_set_prop_int_arg_t *zsda = arg;
 	dsl_pool_t *dp = dmu_tx_pool(tx);
 	dsl_dir_t *dd;
 	dsl_dataset_t *ds;
 	int error;
 
 	VERIFY0(dsl_dir_hold(dp, zsda->zsda_name, FTAG, &dd, NULL));
 	zsda->zsda_tx = tx;
 
 	error = dsl_dataset_hold(dp, zsda->zsda_name, FTAG, &ds);
 	if (error == 0) {
 		dsl_prop_set_sync_impl(ds, zfs_prop_to_name(ZFS_PROP_VOLMODE),
 		    zsda->zsda_source, sizeof (zsda->zsda_value), 1,
 		    &zsda->zsda_value, zsda->zsda_tx);
 		dsl_dataset_rele(ds, FTAG);
 	}
 
 	dmu_objset_find_dp(dp, dd->dd_object, zvol_set_volmode_sync_cb,
 	    zsda, DS_FIND_CHILDREN);
 
 	dsl_dir_rele(dd, FTAG);
 }
 
 int
 zvol_set_volmode(const char *ddname, zprop_source_t source, uint64_t volmode)
 {
 	zvol_set_prop_int_arg_t zsda;
 
 	zsda.zsda_name = ddname;
 	zsda.zsda_source = source;
 	zsda.zsda_value = volmode;
 
 	return (dsl_sync_task(ddname, zvol_set_volmode_check,
 	    zvol_set_volmode_sync, &zsda, 0, ZFS_SPACE_CHECK_NONE));
 }
 
 void
 zvol_remove_minors(spa_t *spa, const char *name, boolean_t async)
 {
 	zvol_task_t *task;
 	taskqid_t id;
 
 	task = zvol_task_alloc(ZVOL_ASYNC_REMOVE_MINORS, name, NULL, ~0ULL);
 	if (task == NULL)
 		return;
 
 	id = taskq_dispatch(spa->spa_zvol_taskq, zvol_task_cb, task, TQ_SLEEP);
 	if ((async == B_FALSE) && (id != TASKQID_INVALID))
 		taskq_wait_id(spa->spa_zvol_taskq, id);
 }
 
 void
 zvol_rename_minors(spa_t *spa, const char *name1, const char *name2,
     boolean_t async)
 {
 	zvol_task_t *task;
 	taskqid_t id;
 
 	task = zvol_task_alloc(ZVOL_ASYNC_RENAME_MINORS, name1, name2, ~0ULL);
 	if (task == NULL)
 		return;
 
 	id = taskq_dispatch(spa->spa_zvol_taskq, zvol_task_cb, task, TQ_SLEEP);
 	if ((async == B_FALSE) && (id != TASKQID_INVALID))
 		taskq_wait_id(spa->spa_zvol_taskq, id);
 }
 
 boolean_t
 zvol_is_zvol(const char *name)
 {
 
 	return (zvol_os_is_zvol(name));
 }
 
 int
 zvol_init_impl(void)
 {
 	int i;
 
 	list_create(&zvol_state_list, sizeof (zvol_state_t),
 	    offsetof(zvol_state_t, zv_next));
 	rw_init(&zvol_state_lock, NULL, RW_DEFAULT, NULL);
 
 	zvol_htable = kmem_alloc(ZVOL_HT_SIZE * sizeof (struct hlist_head),
 	    KM_SLEEP);
 	for (i = 0; i < ZVOL_HT_SIZE; i++)
 		INIT_HLIST_HEAD(&zvol_htable[i]);
 
 	return (0);
 }
 
 void
 zvol_fini_impl(void)
 {
 	zvol_remove_minors_impl(NULL);
 
 	/*
 	 * The call to "zvol_remove_minors_impl" may dispatch entries to
 	 * the system_taskq, but it doesn't wait for those entries to
 	 * complete before it returns. Thus, we must wait for all of the
 	 * removals to finish, before we can continue.
 	 */
 	taskq_wait_outstanding(system_taskq, 0);
 
 	kmem_free(zvol_htable, ZVOL_HT_SIZE * sizeof (struct hlist_head));
 	list_destroy(&zvol_state_list);
 	rw_destroy(&zvol_state_lock);
 }
diff --git a/module/zstd/zfs_zstd.c b/module/zstd/zfs_zstd.c
index 1bb95e460a81..ed0271a8d683 100644
--- a/module/zstd/zfs_zstd.c
+++ b/module/zstd/zfs_zstd.c
@@ -1,904 +1,904 @@
 /*
  * BSD 3-Clause New License (https://spdx.org/licenses/BSD-3-Clause.html)
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions are met:
  *
  * 1. Redistributions of source code must retain the above copyright notice,
  * this list of conditions and the following disclaimer.
  *
  * 2. Redistributions in binary form must reproduce the above copyright notice,
  * this list of conditions and the following disclaimer in the documentation
  * and/or other materials provided with the distribution.
  *
  * 3. Neither the name of the copyright holder nor the names of its
  * contributors may be used to endorse or promote products derived from this
  * software without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
  * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
  * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
  * POSSIBILITY OF SUCH DAMAGE.
  */
 
 /*
  * Copyright (c) 2016-2018, Klara Inc.
  * Copyright (c) 2016-2018, Allan Jude
  * Copyright (c) 2018-2020, Sebastian Gottschall
  * Copyright (c) 2019-2020, Michael Niewöhner
  * Copyright (c) 2020, The FreeBSD Foundation [1]
  *
  * [1] Portions of this software were developed by Allan Jude
  *     under sponsorship from the FreeBSD Foundation.
  */
 
 #include <sys/param.h>
 #include <sys/sysmacros.h>
 #include <sys/zfs_context.h>
 #include <sys/zio_compress.h>
 #include <sys/spa.h>
 #include <sys/zstd/zstd.h>
 
 #define	ZSTD_STATIC_LINKING_ONLY
 #include "lib/zstd.h"
 #include "lib/common/zstd_errors.h"
 
 static uint_t zstd_earlyabort_pass = 1;
 static int zstd_cutoff_level = ZIO_ZSTD_LEVEL_3;
 static unsigned int zstd_abort_size = (128 * 1024);
 
 static kstat_t *zstd_ksp = NULL;
 
 typedef struct zstd_stats {
 	kstat_named_t	zstd_stat_alloc_fail;
 	kstat_named_t	zstd_stat_alloc_fallback;
 	kstat_named_t	zstd_stat_com_alloc_fail;
 	kstat_named_t	zstd_stat_dec_alloc_fail;
 	kstat_named_t	zstd_stat_com_inval;
 	kstat_named_t	zstd_stat_dec_inval;
 	kstat_named_t	zstd_stat_dec_header_inval;
 	kstat_named_t	zstd_stat_com_fail;
 	kstat_named_t	zstd_stat_dec_fail;
 	/*
 	 * LZ4 first-pass early abort verdict
 	 */
 	kstat_named_t	zstd_stat_lz4pass_allowed;
 	kstat_named_t	zstd_stat_lz4pass_rejected;
 	/*
 	 * zstd-1 second-pass early abort verdict
 	 */
 	kstat_named_t	zstd_stat_zstdpass_allowed;
 	kstat_named_t	zstd_stat_zstdpass_rejected;
 	/*
 	 * We excluded this from early abort for some reason
 	 */
 	kstat_named_t	zstd_stat_passignored;
 	kstat_named_t	zstd_stat_passignored_size;
 	kstat_named_t	zstd_stat_buffers;
 	kstat_named_t	zstd_stat_size;
 } zstd_stats_t;
 
 static zstd_stats_t zstd_stats = {
 	{ "alloc_fail",			KSTAT_DATA_UINT64 },
 	{ "alloc_fallback",		KSTAT_DATA_UINT64 },
 	{ "compress_alloc_fail",	KSTAT_DATA_UINT64 },
 	{ "decompress_alloc_fail",	KSTAT_DATA_UINT64 },
 	{ "compress_level_invalid",	KSTAT_DATA_UINT64 },
 	{ "decompress_level_invalid",	KSTAT_DATA_UINT64 },
 	{ "decompress_header_invalid",	KSTAT_DATA_UINT64 },
 	{ "compress_failed",		KSTAT_DATA_UINT64 },
 	{ "decompress_failed",		KSTAT_DATA_UINT64 },
 	{ "lz4pass_allowed",		KSTAT_DATA_UINT64 },
 	{ "lz4pass_rejected",		KSTAT_DATA_UINT64 },
 	{ "zstdpass_allowed",		KSTAT_DATA_UINT64 },
 	{ "zstdpass_rejected",		KSTAT_DATA_UINT64 },
 	{ "passignored",		KSTAT_DATA_UINT64 },
 	{ "passignored_size",		KSTAT_DATA_UINT64 },
 	{ "buffers",			KSTAT_DATA_UINT64 },
 	{ "size",			KSTAT_DATA_UINT64 },
 };
 
 #ifdef _KERNEL
 static int
 kstat_zstd_update(kstat_t *ksp, int rw)
 {
 	ASSERT(ksp != NULL);
 
 	if (rw == KSTAT_WRITE && ksp == zstd_ksp) {
 		ZSTDSTAT_ZERO(zstd_stat_alloc_fail);
 		ZSTDSTAT_ZERO(zstd_stat_alloc_fallback);
 		ZSTDSTAT_ZERO(zstd_stat_com_alloc_fail);
 		ZSTDSTAT_ZERO(zstd_stat_dec_alloc_fail);
 		ZSTDSTAT_ZERO(zstd_stat_com_inval);
 		ZSTDSTAT_ZERO(zstd_stat_dec_inval);
 		ZSTDSTAT_ZERO(zstd_stat_dec_header_inval);
 		ZSTDSTAT_ZERO(zstd_stat_com_fail);
 		ZSTDSTAT_ZERO(zstd_stat_dec_fail);
 		ZSTDSTAT_ZERO(zstd_stat_lz4pass_allowed);
 		ZSTDSTAT_ZERO(zstd_stat_lz4pass_rejected);
 		ZSTDSTAT_ZERO(zstd_stat_zstdpass_allowed);
 		ZSTDSTAT_ZERO(zstd_stat_zstdpass_rejected);
 		ZSTDSTAT_ZERO(zstd_stat_passignored);
 		ZSTDSTAT_ZERO(zstd_stat_passignored_size);
 	}
 
 	return (0);
 }
 #endif
 
 /* Enums describing the allocator type specified by kmem_type in zstd_kmem */
 enum zstd_kmem_type {
 	ZSTD_KMEM_UNKNOWN = 0,
 	/* Allocation type using kmem_vmalloc */
 	ZSTD_KMEM_DEFAULT,
 	/* Pool based allocation using mempool_alloc */
 	ZSTD_KMEM_POOL,
 	/* Reserved fallback memory for decompression only */
 	ZSTD_KMEM_DCTX,
 	ZSTD_KMEM_COUNT,
 };
 
 /* Structure for pooled memory objects */
 struct zstd_pool {
 	void *mem;
 	size_t size;
 	kmutex_t barrier;
 	hrtime_t timeout;
 };
 
 /* Global structure for handling memory allocations */
 struct zstd_kmem {
 	enum zstd_kmem_type kmem_type;
 	size_t kmem_size;
 	struct zstd_pool *pool;
 };
 
 /* Fallback memory structure used for decompression only if memory runs out */
 struct zstd_fallback_mem {
 	size_t mem_size;
 	void *mem;
 	kmutex_t barrier;
 };
 
 struct zstd_levelmap {
 	int16_t zstd_level;
 	enum zio_zstd_levels level;
 };
 
 /*
  * ZSTD memory handlers
  *
  * For decompression we use a different handler which also provides fallback
  * memory allocation in case memory runs out.
  *
  * The ZSTD handlers were split up for the most simplified implementation.
  */
 static void *zstd_alloc(void *opaque, size_t size);
 static void *zstd_dctx_alloc(void *opaque, size_t size);
 static void zstd_free(void *opaque, void *ptr);
 
 /* Compression memory handler */
 static const ZSTD_customMem zstd_malloc = {
 	zstd_alloc,
 	zstd_free,
 	NULL,
 };
 
 /* Decompression memory handler */
 static const ZSTD_customMem zstd_dctx_malloc = {
 	zstd_dctx_alloc,
 	zstd_free,
 	NULL,
 };
 
 /* Level map for converting ZFS internal levels to ZSTD levels and vice versa */
 static struct zstd_levelmap zstd_levels[] = {
 	{ZIO_ZSTD_LEVEL_1, ZIO_ZSTD_LEVEL_1},
 	{ZIO_ZSTD_LEVEL_2, ZIO_ZSTD_LEVEL_2},
 	{ZIO_ZSTD_LEVEL_3, ZIO_ZSTD_LEVEL_3},
 	{ZIO_ZSTD_LEVEL_4, ZIO_ZSTD_LEVEL_4},
 	{ZIO_ZSTD_LEVEL_5, ZIO_ZSTD_LEVEL_5},
 	{ZIO_ZSTD_LEVEL_6, ZIO_ZSTD_LEVEL_6},
 	{ZIO_ZSTD_LEVEL_7, ZIO_ZSTD_LEVEL_7},
 	{ZIO_ZSTD_LEVEL_8, ZIO_ZSTD_LEVEL_8},
 	{ZIO_ZSTD_LEVEL_9, ZIO_ZSTD_LEVEL_9},
 	{ZIO_ZSTD_LEVEL_10, ZIO_ZSTD_LEVEL_10},
 	{ZIO_ZSTD_LEVEL_11, ZIO_ZSTD_LEVEL_11},
 	{ZIO_ZSTD_LEVEL_12, ZIO_ZSTD_LEVEL_12},
 	{ZIO_ZSTD_LEVEL_13, ZIO_ZSTD_LEVEL_13},
 	{ZIO_ZSTD_LEVEL_14, ZIO_ZSTD_LEVEL_14},
 	{ZIO_ZSTD_LEVEL_15, ZIO_ZSTD_LEVEL_15},
 	{ZIO_ZSTD_LEVEL_16, ZIO_ZSTD_LEVEL_16},
 	{ZIO_ZSTD_LEVEL_17, ZIO_ZSTD_LEVEL_17},
 	{ZIO_ZSTD_LEVEL_18, ZIO_ZSTD_LEVEL_18},
 	{ZIO_ZSTD_LEVEL_19, ZIO_ZSTD_LEVEL_19},
 	{-1, ZIO_ZSTD_LEVEL_FAST_1},
 	{-2, ZIO_ZSTD_LEVEL_FAST_2},
 	{-3, ZIO_ZSTD_LEVEL_FAST_3},
 	{-4, ZIO_ZSTD_LEVEL_FAST_4},
 	{-5, ZIO_ZSTD_LEVEL_FAST_5},
 	{-6, ZIO_ZSTD_LEVEL_FAST_6},
 	{-7, ZIO_ZSTD_LEVEL_FAST_7},
 	{-8, ZIO_ZSTD_LEVEL_FAST_8},
 	{-9, ZIO_ZSTD_LEVEL_FAST_9},
 	{-10, ZIO_ZSTD_LEVEL_FAST_10},
 	{-20, ZIO_ZSTD_LEVEL_FAST_20},
 	{-30, ZIO_ZSTD_LEVEL_FAST_30},
 	{-40, ZIO_ZSTD_LEVEL_FAST_40},
 	{-50, ZIO_ZSTD_LEVEL_FAST_50},
 	{-60, ZIO_ZSTD_LEVEL_FAST_60},
 	{-70, ZIO_ZSTD_LEVEL_FAST_70},
 	{-80, ZIO_ZSTD_LEVEL_FAST_80},
 	{-90, ZIO_ZSTD_LEVEL_FAST_90},
 	{-100, ZIO_ZSTD_LEVEL_FAST_100},
 	{-500, ZIO_ZSTD_LEVEL_FAST_500},
 	{-1000, ZIO_ZSTD_LEVEL_FAST_1000},
 };
 
 /*
  * This variable represents the maximum count of the pool based on the number
  * of CPUs plus some buffer. We default to cpu count * 4, see init_zstd.
  */
 static int pool_count = 16;
 
 #define	ZSTD_POOL_MAX		pool_count
 #define	ZSTD_POOL_TIMEOUT	60 * 2
 
 static struct zstd_fallback_mem zstd_dctx_fallback;
 static struct zstd_pool *zstd_mempool_cctx;
 static struct zstd_pool *zstd_mempool_dctx;
 
 /*
  * The library zstd code expects these if ADDRESS_SANITIZER gets defined,
  * and while ASAN does this, KASAN defines that and does not. So to avoid
  * changing the external code, we do this.
  */
 #if defined(ZFS_ASAN_ENABLED)
 #define	ADDRESS_SANITIZER 1
 #endif
 #if defined(_KERNEL) && defined(ADDRESS_SANITIZER)
 void __asan_unpoison_memory_region(void const volatile *addr, size_t size);
 void __asan_poison_memory_region(void const volatile *addr, size_t size);
 void __asan_unpoison_memory_region(void const volatile *addr, size_t size) {};
 void __asan_poison_memory_region(void const volatile *addr, size_t size) {};
 #endif
 
 
 static void
 zstd_mempool_reap(struct zstd_pool *zstd_mempool)
 {
 	struct zstd_pool *pool;
 
 	if (!zstd_mempool || !ZSTDSTAT(zstd_stat_buffers)) {
 		return;
 	}
 
 	/* free obsolete slots */
 	for (int i = 0; i < ZSTD_POOL_MAX; i++) {
 		pool = &zstd_mempool[i];
 		if (pool->mem && mutex_tryenter(&pool->barrier)) {
 			/* Free memory if unused object older than 2 minutes */
 			if (pool->mem && gethrestime_sec() > pool->timeout) {
 				vmem_free(pool->mem, pool->size);
 				ZSTDSTAT_SUB(zstd_stat_buffers, 1);
 				ZSTDSTAT_SUB(zstd_stat_size, pool->size);
 				pool->mem = NULL;
 				pool->size = 0;
 				pool->timeout = 0;
 			}
 			mutex_exit(&pool->barrier);
 		}
 	}
 }
 
 /*
  * Try to get a cached allocated buffer from memory pool or allocate a new one
  * if necessary. If a object is older than 2 minutes and does not fit the
  * requested size, it will be released and a new cached entry will be allocated.
  * If other pooled objects are detected without being used for 2 minutes, they
  * will be released, too.
  *
  * The concept is that high frequency memory allocations of bigger objects are
  * expensive. So if a lot of work is going on, allocations will be kept for a
  * while and can be reused in that time frame.
  *
  * The scheduled release will be updated every time a object is reused.
  */
 
 static void *
 zstd_mempool_alloc(struct zstd_pool *zstd_mempool, size_t size)
 {
 	struct zstd_pool *pool;
 	struct zstd_kmem *mem = NULL;
 
 	if (!zstd_mempool) {
 		return (NULL);
 	}
 
 	/* Seek for preallocated memory slot and free obsolete slots */
 	for (int i = 0; i < ZSTD_POOL_MAX; i++) {
 		pool = &zstd_mempool[i];
 		/*
 		 * This lock is simply a marker for a pool object being in use.
 		 * If it's already hold, it will be skipped.
 		 *
 		 * We need to create it before checking it to avoid race
 		 * conditions caused by running in a threaded context.
 		 *
 		 * The lock is later released by zstd_mempool_free.
 		 */
 		if (mutex_tryenter(&pool->barrier)) {
 			/*
 			 * Check if objects fits the size, if so we take it and
 			 * update the timestamp.
 			 */
 			if (pool->mem && size <= pool->size) {
 				pool->timeout = gethrestime_sec() +
 				    ZSTD_POOL_TIMEOUT;
 				mem = pool->mem;
 				return (mem);
 			}
 			mutex_exit(&pool->barrier);
 		}
 	}
 
 	/*
 	 * If no preallocated slot was found, try to fill in a new one.
 	 *
 	 * We run a similar algorithm twice here to avoid pool fragmentation.
 	 * The first one may generate holes in the list if objects get released.
 	 * We always make sure that these holes get filled instead of adding new
 	 * allocations constantly at the end.
 	 */
 	for (int i = 0; i < ZSTD_POOL_MAX; i++) {
 		pool = &zstd_mempool[i];
 		if (mutex_tryenter(&pool->barrier)) {
 			/* Object is free, try to allocate new one */
 			if (!pool->mem) {
 				mem = vmem_alloc(size, KM_SLEEP);
 				if (mem) {
 					ZSTDSTAT_ADD(zstd_stat_buffers, 1);
 					ZSTDSTAT_ADD(zstd_stat_size, size);
 					pool->mem = mem;
 					pool->size = size;
 					/* Keep track for later release */
 					mem->pool = pool;
 					mem->kmem_type = ZSTD_KMEM_POOL;
 					mem->kmem_size = size;
 				}
 			}
 
 			if (size <= pool->size) {
 				/* Update timestamp */
 				pool->timeout = gethrestime_sec() +
 				    ZSTD_POOL_TIMEOUT;
 
 				return (pool->mem);
 			}
 
 			mutex_exit(&pool->barrier);
 		}
 	}
 
 	/*
 	 * If the pool is full or the allocation failed, try lazy allocation
 	 * instead.
 	 */
 	if (!mem) {
 		mem = vmem_alloc(size, KM_NOSLEEP);
 		if (mem) {
 			mem->pool = NULL;
 			mem->kmem_type = ZSTD_KMEM_DEFAULT;
 			mem->kmem_size = size;
 		}
 	}
 
 	return (mem);
 }
 
 /* Mark object as released by releasing the barrier mutex */
 static void
 zstd_mempool_free(struct zstd_kmem *z)
 {
 	mutex_exit(&z->pool->barrier);
 }
 
 /* Convert ZFS internal enum to ZSTD level */
 static int
 zstd_enum_to_level(enum zio_zstd_levels level, int16_t *zstd_level)
 {
 	if (level > 0 && level <= ZIO_ZSTD_LEVEL_19) {
 		*zstd_level = zstd_levels[level - 1].zstd_level;
 		return (0);
 	}
 	if (level >= ZIO_ZSTD_LEVEL_FAST_1 &&
 	    level <= ZIO_ZSTD_LEVEL_FAST_1000) {
 		*zstd_level = zstd_levels[level - ZIO_ZSTD_LEVEL_FAST_1
 		    + ZIO_ZSTD_LEVEL_19].zstd_level;
 		return (0);
 	}
 
 	/* Invalid/unknown zfs compression enum - this should never happen. */
 	return (1);
 }
 
 
 size_t
 zfs_zstd_compress_wrap(void *s_start, void *d_start, size_t s_len, size_t d_len,
     int level)
 {
 	int16_t zstd_level;
 	if (zstd_enum_to_level(level, &zstd_level)) {
 		ZSTDSTAT_BUMP(zstd_stat_com_inval);
 		return (s_len);
 	}
 	/*
 	 * A zstd early abort heuristic.
 	 *
 	 * - Zeroth, if this is <= zstd-3, or < zstd_abort_size (currently
 	 *   128k), don't try any of this, just go.
 	 *   (because experimentally that was a reasonable cutoff for a perf win
 	 *   with tiny ratio change)
 	 * - First, we try LZ4 compression, and if it doesn't early abort, we
 	 *   jump directly to whatever compression level we intended to try.
 	 * - Second, we try zstd-1 - if that errors out (usually, but not
 	 *   exclusively, if it would overflow), we give up early.
 	 *
 	 *   If it works, instead we go on and compress anyway.
 	 *
 	 * Why two passes? LZ4 alone gets you a lot of the way, but on highly
 	 * compressible data, it was losing up to 8.5% of the compressed
 	 * savings versus no early abort, and all the zstd-fast levels are
 	 * worse indications on their own than LZ4, and don't improve the LZ4
 	 * pass noticably if stacked like this.
 	 */
 	size_t actual_abort_size = zstd_abort_size;
 	if (zstd_earlyabort_pass > 0 && zstd_level >= zstd_cutoff_level &&
 	    s_len >= actual_abort_size) {
 		int pass_len = 1;
 		pass_len = lz4_compress_zfs(s_start, d_start, s_len, d_len, 0);
 		if (pass_len < d_len) {
 			ZSTDSTAT_BUMP(zstd_stat_lz4pass_allowed);
 			goto keep_trying;
 		}
 		ZSTDSTAT_BUMP(zstd_stat_lz4pass_rejected);
 
 		pass_len = zfs_zstd_compress(s_start, d_start, s_len, d_len,
 		    ZIO_ZSTD_LEVEL_1);
 		if (pass_len == s_len || pass_len <= 0 || pass_len > d_len) {
 			ZSTDSTAT_BUMP(zstd_stat_zstdpass_rejected);
 			return (s_len);
 		}
 		ZSTDSTAT_BUMP(zstd_stat_zstdpass_allowed);
 	} else {
 		ZSTDSTAT_BUMP(zstd_stat_passignored);
 		if (s_len < actual_abort_size) {
 			ZSTDSTAT_BUMP(zstd_stat_passignored_size);
 		}
 	}
 keep_trying:
 	return (zfs_zstd_compress(s_start, d_start, s_len, d_len, level));
 
 }
 
 /* Compress block using zstd */
 size_t
 zfs_zstd_compress(void *s_start, void *d_start, size_t s_len, size_t d_len,
     int level)
 {
 	size_t c_len;
 	int16_t zstd_level;
 	zfs_zstdhdr_t *hdr;
 	ZSTD_CCtx *cctx;
 
 	hdr = (zfs_zstdhdr_t *)d_start;
 
 	/* Skip compression if the specified level is invalid */
 	if (zstd_enum_to_level(level, &zstd_level)) {
 		ZSTDSTAT_BUMP(zstd_stat_com_inval);
 		return (s_len);
 	}
 
 	ASSERT3U(d_len, >=, sizeof (*hdr));
 	ASSERT3U(d_len, <=, s_len);
 	ASSERT3U(zstd_level, !=, 0);
 
 	cctx = ZSTD_createCCtx_advanced(zstd_malloc);
 
 	/*
 	 * Out of kernel memory, gently fall through - this will disable
 	 * compression in zio_compress_data
 	 */
 	if (!cctx) {
 		ZSTDSTAT_BUMP(zstd_stat_com_alloc_fail);
 		return (s_len);
 	}
 
 	/* Set the compression level */
 	ZSTD_CCtx_setParameter(cctx, ZSTD_c_compressionLevel, zstd_level);
 
 	/* Use the "magicless" zstd header which saves us 4 header bytes */
 	ZSTD_CCtx_setParameter(cctx, ZSTD_c_format, ZSTD_f_zstd1_magicless);
 
 	/*
 	 * Disable redundant checksum calculation and content size storage since
 	 * this is already done by ZFS itself.
 	 */
 	ZSTD_CCtx_setParameter(cctx, ZSTD_c_checksumFlag, 0);
 	ZSTD_CCtx_setParameter(cctx, ZSTD_c_contentSizeFlag, 0);
 
 	c_len = ZSTD_compress2(cctx,
 	    hdr->data,
 	    d_len - sizeof (*hdr),
 	    s_start, s_len);
 
 	ZSTD_freeCCtx(cctx);
 
 	/* Error in the compression routine, disable compression. */
 	if (ZSTD_isError(c_len)) {
 		/*
 		 * If we are aborting the compression because the saves are
 		 * too small, that is not a failure. Everything else is a
 		 * failure, so increment the compression failure counter.
 		 */
 		int err = ZSTD_getErrorCode(c_len);
 		if (err != ZSTD_error_dstSize_tooSmall) {
 			ZSTDSTAT_BUMP(zstd_stat_com_fail);
 			dprintf("Error: %s", ZSTD_getErrorString(err));
 		}
 		return (s_len);
 	}
 
 	/*
 	 * Encode the compressed buffer size at the start. We'll need this in
 	 * decompression to counter the effects of padding which might be added
 	 * to the compressed buffer and which, if unhandled, would confuse the
 	 * hell out of our decompression function.
 	 */
 	hdr->c_len = BE_32(c_len);
 
 	/*
 	 * Check version for overflow.
 	 * The limit of 24 bits must not be exceeded. This allows a maximum
 	 * version 1677.72.15 which we don't expect to be ever reached.
 	 */
 	ASSERT3U(ZSTD_VERSION_NUMBER, <=, 0xFFFFFF);
 
 	/*
 	 * Encode the compression level as well. We may need to know the
 	 * original compression level if compressed_arc is disabled, to match
 	 * the compression settings to write this block to the L2ARC.
 	 *
 	 * Encode the actual level, so if the enum changes in the future, we
 	 * will be compatible.
 	 *
 	 * The upper 24 bits store the ZSTD version to be able to provide
 	 * future compatibility, since new versions might enhance the
 	 * compression algorithm in a way, where the compressed data will
 	 * change.
 	 *
 	 * As soon as such incompatibility occurs, handling code needs to be
 	 * added, differentiating between the versions.
 	 */
 	zfs_set_hdrversion(hdr, ZSTD_VERSION_NUMBER);
 	zfs_set_hdrlevel(hdr, level);
 	hdr->raw_version_level = BE_32(hdr->raw_version_level);
 
 	return (c_len + sizeof (*hdr));
 }
 
 /* Decompress block using zstd and return its stored level */
 int
 zfs_zstd_decompress_level(void *s_start, void *d_start, size_t s_len,
     size_t d_len, uint8_t *level)
 {
 	ZSTD_DCtx *dctx;
 	size_t result;
 	int16_t zstd_level;
 	uint32_t c_len;
 	const zfs_zstdhdr_t *hdr;
 	zfs_zstdhdr_t hdr_copy;
 
 	hdr = (const zfs_zstdhdr_t *)s_start;
 	c_len = BE_32(hdr->c_len);
 
 	/*
 	 * Make a copy instead of directly converting the header, since we must
 	 * not modify the original data that may be used again later.
 	 */
 	hdr_copy.raw_version_level = BE_32(hdr->raw_version_level);
 	uint8_t curlevel = zfs_get_hdrlevel(&hdr_copy);
 
 	/*
 	 * NOTE: We ignore the ZSTD version for now. As soon as any
 	 * incompatibility occurs, it has to be handled accordingly.
 	 * The version can be accessed via `hdr_copy.version`.
 	 */
 
 	/*
 	 * Convert and check the level
 	 * An invalid level is a strong indicator for data corruption! In such
 	 * case return an error so the upper layers can try to fix it.
 	 */
 	if (zstd_enum_to_level(curlevel, &zstd_level)) {
 		ZSTDSTAT_BUMP(zstd_stat_dec_inval);
 		return (1);
 	}
 
 	ASSERT3U(d_len, >=, s_len);
 	ASSERT3U(curlevel, !=, ZIO_COMPLEVEL_INHERIT);
 
 	/* Invalid compressed buffer size encoded at start */
 	if (c_len + sizeof (*hdr) > s_len) {
 		ZSTDSTAT_BUMP(zstd_stat_dec_header_inval);
 		return (1);
 	}
 
 	dctx = ZSTD_createDCtx_advanced(zstd_dctx_malloc);
 	if (!dctx) {
 		ZSTDSTAT_BUMP(zstd_stat_dec_alloc_fail);
 		return (1);
 	}
 
 	/* Set header type to "magicless" */
 	ZSTD_DCtx_setParameter(dctx, ZSTD_d_format, ZSTD_f_zstd1_magicless);
 
 	/* Decompress the data and release the context */
 	result = ZSTD_decompressDCtx(dctx, d_start, d_len, hdr->data, c_len);
 	ZSTD_freeDCtx(dctx);
 
 	/*
 	 * Returns 0 on success (decompression function returned non-negative)
 	 * and non-zero on failure (decompression function returned negative.
 	 */
 	if (ZSTD_isError(result)) {
 		ZSTDSTAT_BUMP(zstd_stat_dec_fail);
 		return (1);
 	}
 
 	if (level) {
 		*level = curlevel;
 	}
 
 	return (0);
 }
 
 /* Decompress datablock using zstd */
 int
 zfs_zstd_decompress(void *s_start, void *d_start, size_t s_len, size_t d_len,
     int level __maybe_unused)
 {
 
 	return (zfs_zstd_decompress_level(s_start, d_start, s_len, d_len,
 	    NULL));
 }
 
 /* Allocator for zstd compression context using mempool_allocator */
 static void *
 zstd_alloc(void *opaque __maybe_unused, size_t size)
 {
 	size_t nbytes = sizeof (struct zstd_kmem) + size;
 	struct zstd_kmem *z = NULL;
 
 	z = (struct zstd_kmem *)zstd_mempool_alloc(zstd_mempool_cctx, nbytes);
 
 	if (!z) {
 		ZSTDSTAT_BUMP(zstd_stat_alloc_fail);
 		return (NULL);
 	}
 
 	return ((void*)z + (sizeof (struct zstd_kmem)));
 }
 
 /*
  * Allocator for zstd decompression context using mempool_allocator with
  * fallback to reserved memory if allocation fails
  */
 static void *
 zstd_dctx_alloc(void *opaque __maybe_unused, size_t size)
 {
 	size_t nbytes = sizeof (struct zstd_kmem) + size;
 	struct zstd_kmem *z = NULL;
 	enum zstd_kmem_type type = ZSTD_KMEM_DEFAULT;
 
 	z = (struct zstd_kmem *)zstd_mempool_alloc(zstd_mempool_dctx, nbytes);
 	if (!z) {
 		/* Try harder, decompression shall not fail */
 		z = vmem_alloc(nbytes, KM_SLEEP);
 		if (z) {
 			z->pool = NULL;
 		}
 		ZSTDSTAT_BUMP(zstd_stat_alloc_fail);
 	} else {
 		return ((void*)z + (sizeof (struct zstd_kmem)));
 	}
 
 	/* Fallback if everything fails */
 	if (!z) {
 		/*
 		 * Barrier since we only can handle it in a single thread. All
 		 * other following threads need to wait here until decompression
 		 * is completed. zstd_free will release this barrier later.
 		 */
 		mutex_enter(&zstd_dctx_fallback.barrier);
 
 		z = zstd_dctx_fallback.mem;
 		type = ZSTD_KMEM_DCTX;
 		ZSTDSTAT_BUMP(zstd_stat_alloc_fallback);
 	}
 
 	/* Allocation should always be successful */
 	if (!z) {
 		return (NULL);
 	}
 
 	z->kmem_type = type;
 	z->kmem_size = nbytes;
 
 	return ((void*)z + (sizeof (struct zstd_kmem)));
 }
 
 /* Free allocated memory by its specific type */
 static void
 zstd_free(void *opaque __maybe_unused, void *ptr)
 {
 	struct zstd_kmem *z = (ptr - sizeof (struct zstd_kmem));
 	enum zstd_kmem_type type;
 
 	ASSERT3U(z->kmem_type, <, ZSTD_KMEM_COUNT);
 	ASSERT3U(z->kmem_type, >, ZSTD_KMEM_UNKNOWN);
 
 	type = z->kmem_type;
 	switch (type) {
 	case ZSTD_KMEM_DEFAULT:
 		vmem_free(z, z->kmem_size);
 		break;
 	case ZSTD_KMEM_POOL:
 		zstd_mempool_free(z);
 		break;
 	case ZSTD_KMEM_DCTX:
 		mutex_exit(&zstd_dctx_fallback.barrier);
 		break;
 	default:
 		break;
 	}
 }
 
 /* Allocate fallback memory to ensure safe decompression */
 static void __init
 create_fallback_mem(struct zstd_fallback_mem *mem, size_t size)
 {
 	mem->mem_size = size;
 	mem->mem = vmem_zalloc(mem->mem_size, KM_SLEEP);
 	mutex_init(&mem->barrier, NULL, MUTEX_DEFAULT, NULL);
 }
 
 /* Initialize memory pool barrier mutexes */
 static void __init
 zstd_mempool_init(void)
 {
-	zstd_mempool_cctx = (struct zstd_pool *)
+	zstd_mempool_cctx =
 	    kmem_zalloc(ZSTD_POOL_MAX * sizeof (struct zstd_pool), KM_SLEEP);
-	zstd_mempool_dctx = (struct zstd_pool *)
+	zstd_mempool_dctx =
 	    kmem_zalloc(ZSTD_POOL_MAX * sizeof (struct zstd_pool), KM_SLEEP);
 
 	for (int i = 0; i < ZSTD_POOL_MAX; i++) {
 		mutex_init(&zstd_mempool_cctx[i].barrier, NULL,
 		    MUTEX_DEFAULT, NULL);
 		mutex_init(&zstd_mempool_dctx[i].barrier, NULL,
 		    MUTEX_DEFAULT, NULL);
 	}
 }
 
 /* Initialize zstd-related memory handling */
 static int __init
 zstd_meminit(void)
 {
 	zstd_mempool_init();
 
 	/*
 	 * Estimate the size of the fallback decompression context.
 	 * The expected size on x64 with current ZSTD should be about 160 KB.
 	 */
 	create_fallback_mem(&zstd_dctx_fallback,
 	    P2ROUNDUP(ZSTD_estimateDCtxSize() + sizeof (struct zstd_kmem),
 	    PAGESIZE));
 
 	return (0);
 }
 
 /* Release object from pool and free memory */
 static void
 release_pool(struct zstd_pool *pool)
 {
 	mutex_destroy(&pool->barrier);
 	vmem_free(pool->mem, pool->size);
 	pool->mem = NULL;
 	pool->size = 0;
 }
 
 /* Release memory pool objects */
 static void
 zstd_mempool_deinit(void)
 {
 	for (int i = 0; i < ZSTD_POOL_MAX; i++) {
 		release_pool(&zstd_mempool_cctx[i]);
 		release_pool(&zstd_mempool_dctx[i]);
 	}
 
 	kmem_free(zstd_mempool_dctx, ZSTD_POOL_MAX * sizeof (struct zstd_pool));
 	kmem_free(zstd_mempool_cctx, ZSTD_POOL_MAX * sizeof (struct zstd_pool));
 	zstd_mempool_dctx = NULL;
 	zstd_mempool_cctx = NULL;
 }
 
 /* release unused memory from pool */
 
 void
 zfs_zstd_cache_reap_now(void)
 {
 	/*
 	 * calling alloc with zero size seeks
 	 * and releases old unused objects
 	 */
 	zstd_mempool_reap(zstd_mempool_cctx);
 	zstd_mempool_reap(zstd_mempool_dctx);
 }
 
 extern int __init
 zstd_init(void)
 {
 	/* Set pool size by using maximum sane thread count * 4 */
 	pool_count = (boot_ncpus * 4);
 	zstd_meminit();
 
 	/* Initialize kstat */
 	zstd_ksp = kstat_create("zfs", 0, "zstd", "misc",
 	    KSTAT_TYPE_NAMED, sizeof (zstd_stats) / sizeof (kstat_named_t),
 	    KSTAT_FLAG_VIRTUAL);
 	if (zstd_ksp != NULL) {
 		zstd_ksp->ks_data = &zstd_stats;
 		kstat_install(zstd_ksp);
 #ifdef _KERNEL
 		zstd_ksp->ks_update = kstat_zstd_update;
 #endif
 	}
 
 	return (0);
 }
 
 extern void
 zstd_fini(void)
 {
 	/* Deinitialize kstat */
 	if (zstd_ksp != NULL) {
 		kstat_delete(zstd_ksp);
 		zstd_ksp = NULL;
 	}
 
 	/* Release fallback memory */
 	vmem_free(zstd_dctx_fallback.mem, zstd_dctx_fallback.mem_size);
 	mutex_destroy(&zstd_dctx_fallback.barrier);
 
 	/* Deinit memory pool */
 	zstd_mempool_deinit();
 }
 
 #if defined(_KERNEL)
 #ifdef __FreeBSD__
 module_init(zstd_init);
 module_exit(zstd_fini);
 #endif
 
 ZFS_MODULE_PARAM(zfs, zstd_, earlyabort_pass, UINT, ZMOD_RW,
 	"Enable early abort attempts when using zstd");
 ZFS_MODULE_PARAM(zfs, zstd_, abort_size, UINT, ZMOD_RW,
 	"Minimal size of block to attempt early abort");
 #endif