diff --git a/module/icp/algs/aes/aes_impl.c b/module/icp/algs/aes/aes_impl.c
index 037be0db60d7..a5b88b8aab25 100644
--- a/module/icp/algs/aes/aes_impl.c
+++ b/module/icp/algs/aes/aes_impl.c
@@ -1,443 +1,442 @@
 /*
  * CDDL HEADER START
  *
  * The contents of this file are subject to the terms of the
  * Common Development and Distribution License (the "License").
  * You may not use this file except in compliance with the License.
  *
  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
  * or http://www.opensolaris.org/os/licensing.
  * See the License for the specific language governing permissions
  * and limitations under the License.
  *
  * When distributing Covered Code, include this CDDL HEADER in each
  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  * If applicable, add the following below this CDDL HEADER, with the
  * fields enclosed by brackets "[]" replaced with your own identifying
  * information: Portions Copyright [yyyy] [name of copyright owner]
  *
  * CDDL HEADER END
  */
 /*
  * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved.
  */
 
 #include <sys/zfs_context.h>
 #include <sys/crypto/icp.h>
 #include <sys/crypto/spi.h>
 #include <sys/simd.h>
 #include <modes/modes.h>
 #include <aes/aes_impl.h>
 
 /*
  * Initialize AES encryption and decryption key schedules.
  *
  * Parameters:
  * cipherKey	User key
  * keyBits	AES key size (128, 192, or 256 bits)
  * keysched	AES key schedule to be initialized, of type aes_key_t.
  *		Allocated by aes_alloc_keysched().
  */
 void
 aes_init_keysched(const uint8_t *cipherKey, uint_t keyBits, void *keysched)
 {
 	const aes_impl_ops_t *ops = aes_impl_get_ops();
 	aes_key_t *newbie = keysched;
 	uint_t keysize, i, j;
 	union {
 		uint64_t	ka64[4];
 		uint32_t	ka32[8];
 		} keyarr;
 
 	switch (keyBits) {
 	case 128:
 		newbie->nr = 10;
 		break;
 
 	case 192:
 		newbie->nr = 12;
 		break;
 
 	case 256:
 		newbie->nr = 14;
 		break;
 
 	default:
 		/* should never get here */
 		return;
 	}
 	keysize = CRYPTO_BITS2BYTES(keyBits);
 
 	/*
 	 * Generic C implementation requires byteswap for little endian
 	 * machines, various accelerated implementations for various
 	 * architectures may not.
 	 */
 	if (!ops->needs_byteswap) {
 		/* no byteswap needed */
 		if (IS_P2ALIGNED(cipherKey, sizeof (uint64_t))) {
 			for (i = 0, j = 0; j < keysize; i++, j += 8) {
 				/* LINTED: pointer alignment */
 				keyarr.ka64[i] = *((uint64_t *)&cipherKey[j]);
 			}
 		} else {
 			bcopy(cipherKey, keyarr.ka32, keysize);
 		}
 	} else {
 		/* byte swap */
 		for (i = 0, j = 0; j < keysize; i++, j += 4) {
 			keyarr.ka32[i] =
 			    htonl(*(uint32_t *)(void *)&cipherKey[j]);
 		}
 	}
 
 	ops->generate(newbie, keyarr.ka32, keyBits);
 	newbie->ops = ops;
 
 	/*
 	 * Note: if there are systems that need the AES_64BIT_KS type in the
 	 * future, move setting key schedule type to individual implementations
 	 */
 	newbie->type = AES_32BIT_KS;
 }
 
 
 /*
  * Encrypt one block using AES.
  * Align if needed and (for x86 32-bit only) byte-swap.
  *
  * Parameters:
  * ks	Key schedule, of type aes_key_t
  * pt	Input block (plain text)
  * ct	Output block (crypto text).  Can overlap with pt
  */
 int
 aes_encrypt_block(const void *ks, const uint8_t *pt, uint8_t *ct)
 {
 	aes_key_t	*ksch = (aes_key_t *)ks;
 	const aes_impl_ops_t	*ops = ksch->ops;
 
 	if (IS_P2ALIGNED2(pt, ct, sizeof (uint32_t)) && !ops->needs_byteswap) {
 		/* LINTED:  pointer alignment */
 		ops->encrypt(&ksch->encr_ks.ks32[0], ksch->nr,
 		    /* LINTED:  pointer alignment */
 		    (uint32_t *)pt, (uint32_t *)ct);
 	} else {
 		uint32_t buffer[AES_BLOCK_LEN / sizeof (uint32_t)];
 
 		/* Copy input block into buffer */
 		if (ops->needs_byteswap) {
 			buffer[0] = htonl(*(uint32_t *)(void *)&pt[0]);
 			buffer[1] = htonl(*(uint32_t *)(void *)&pt[4]);
 			buffer[2] = htonl(*(uint32_t *)(void *)&pt[8]);
 			buffer[3] = htonl(*(uint32_t *)(void *)&pt[12]);
 		} else
 			bcopy(pt, &buffer, AES_BLOCK_LEN);
 
 		ops->encrypt(&ksch->encr_ks.ks32[0], ksch->nr, buffer, buffer);
 
 		/* Copy result from buffer to output block */
 		if (ops->needs_byteswap) {
 			*(uint32_t *)(void *)&ct[0] = htonl(buffer[0]);
 			*(uint32_t *)(void *)&ct[4] = htonl(buffer[1]);
 			*(uint32_t *)(void *)&ct[8] = htonl(buffer[2]);
 			*(uint32_t *)(void *)&ct[12] = htonl(buffer[3]);
 		} else
 			bcopy(&buffer, ct, AES_BLOCK_LEN);
 	}
 	return (CRYPTO_SUCCESS);
 }
 
 
 /*
  * Decrypt one block using AES.
  * Align and byte-swap if needed.
  *
  * Parameters:
  * ks	Key schedule, of type aes_key_t
  * ct	Input block (crypto text)
  * pt	Output block (plain text). Can overlap with pt
  */
 int
 aes_decrypt_block(const void *ks, const uint8_t *ct, uint8_t *pt)
 {
 	aes_key_t	*ksch = (aes_key_t *)ks;
 	const aes_impl_ops_t	*ops = ksch->ops;
 
 	if (IS_P2ALIGNED2(ct, pt, sizeof (uint32_t)) && !ops->needs_byteswap) {
 		/* LINTED:  pointer alignment */
 		ops->decrypt(&ksch->decr_ks.ks32[0], ksch->nr,
 		    /* LINTED:  pointer alignment */
 		    (uint32_t *)ct, (uint32_t *)pt);
 	} else {
 		uint32_t buffer[AES_BLOCK_LEN / sizeof (uint32_t)];
 
 		/* Copy input block into buffer */
 		if (ops->needs_byteswap) {
 			buffer[0] = htonl(*(uint32_t *)(void *)&ct[0]);
 			buffer[1] = htonl(*(uint32_t *)(void *)&ct[4]);
 			buffer[2] = htonl(*(uint32_t *)(void *)&ct[8]);
 			buffer[3] = htonl(*(uint32_t *)(void *)&ct[12]);
 		} else
 			bcopy(ct, &buffer, AES_BLOCK_LEN);
 
 		ops->decrypt(&ksch->decr_ks.ks32[0], ksch->nr, buffer, buffer);
 
 		/* Copy result from buffer to output block */
 		if (ops->needs_byteswap) {
 			*(uint32_t *)(void *)&pt[0] = htonl(buffer[0]);
 			*(uint32_t *)(void *)&pt[4] = htonl(buffer[1]);
 			*(uint32_t *)(void *)&pt[8] = htonl(buffer[2]);
 			*(uint32_t *)(void *)&pt[12] = htonl(buffer[3]);
 		} else
 			bcopy(&buffer, pt, AES_BLOCK_LEN);
 	}
 	return (CRYPTO_SUCCESS);
 }
 
 
 /*
  * Allocate key schedule for AES.
  *
  * Return the pointer and set size to the number of bytes allocated.
  * Memory allocated must be freed by the caller when done.
  *
  * Parameters:
  * size		Size of key schedule allocated, in bytes
  * kmflag	Flag passed to kmem_alloc(9F); ignored in userland.
  */
-/* ARGSUSED */
 void *
 aes_alloc_keysched(size_t *size, int kmflag)
 {
 	aes_key_t *keysched;
 
 	keysched = (aes_key_t *)kmem_alloc(sizeof (aes_key_t), kmflag);
 	if (keysched != NULL) {
 		*size = sizeof (aes_key_t);
 		return (keysched);
 	}
 	return (NULL);
 }
 
 /* AES implementation that contains the fastest methods */
 static aes_impl_ops_t aes_fastest_impl = {
 	.name = "fastest"
 };
 
 /* All compiled in implementations */
 const aes_impl_ops_t *aes_all_impl[] = {
 	&aes_generic_impl,
 #if defined(__x86_64)
 	&aes_x86_64_impl,
 #endif
 #if defined(__x86_64) && defined(HAVE_AES)
 	&aes_aesni_impl,
 #endif
 };
 
 /* Indicate that benchmark has been completed */
 static boolean_t aes_impl_initialized = B_FALSE;
 
 /* Select aes implementation */
 #define	IMPL_FASTEST	(UINT32_MAX)
 #define	IMPL_CYCLE	(UINT32_MAX-1)
 
 #define	AES_IMPL_READ(i) (*(volatile uint32_t *) &(i))
 
 static uint32_t icp_aes_impl = IMPL_FASTEST;
 static uint32_t user_sel_impl = IMPL_FASTEST;
 
 /* Hold all supported implementations */
 static size_t aes_supp_impl_cnt = 0;
 static aes_impl_ops_t *aes_supp_impl[ARRAY_SIZE(aes_all_impl)];
 
 /*
  * Returns the AES operations for encrypt/decrypt/key setup.  When a
  * SIMD implementation is not allowed in the current context, then
  * fallback to the fastest generic implementation.
  */
 const aes_impl_ops_t *
 aes_impl_get_ops(void)
 {
 	if (!kfpu_allowed())
 		return (&aes_generic_impl);
 
 	const aes_impl_ops_t *ops = NULL;
 	const uint32_t impl = AES_IMPL_READ(icp_aes_impl);
 
 	switch (impl) {
 	case IMPL_FASTEST:
 		ASSERT(aes_impl_initialized);
 		ops = &aes_fastest_impl;
 		break;
 	case IMPL_CYCLE:
 		/* Cycle through supported implementations */
 		ASSERT(aes_impl_initialized);
 		ASSERT3U(aes_supp_impl_cnt, >, 0);
 		static size_t cycle_impl_idx = 0;
 		size_t idx = (++cycle_impl_idx) % aes_supp_impl_cnt;
 		ops = aes_supp_impl[idx];
 		break;
 	default:
 		ASSERT3U(impl, <, aes_supp_impl_cnt);
 		ASSERT3U(aes_supp_impl_cnt, >, 0);
 		if (impl < ARRAY_SIZE(aes_all_impl))
 			ops = aes_supp_impl[impl];
 		break;
 	}
 
 	ASSERT3P(ops, !=, NULL);
 
 	return (ops);
 }
 
 /*
  * Initialize all supported implementations.
  */
 void
 aes_impl_init(void)
 {
 	aes_impl_ops_t *curr_impl;
 	int i, c;
 
 	/* Move supported implementations into aes_supp_impls */
 	for (i = 0, c = 0; i < ARRAY_SIZE(aes_all_impl); i++) {
 		curr_impl = (aes_impl_ops_t *)aes_all_impl[i];
 
 		if (curr_impl->is_supported())
 			aes_supp_impl[c++] = (aes_impl_ops_t *)curr_impl;
 	}
 	aes_supp_impl_cnt = c;
 
 	/*
 	 * Set the fastest implementation given the assumption that the
 	 * hardware accelerated version is the fastest.
 	 */
 #if defined(__x86_64)
 #if defined(HAVE_AES)
 	if (aes_aesni_impl.is_supported()) {
 		memcpy(&aes_fastest_impl, &aes_aesni_impl,
 		    sizeof (aes_fastest_impl));
 	} else
 #endif
 	{
 		memcpy(&aes_fastest_impl, &aes_x86_64_impl,
 		    sizeof (aes_fastest_impl));
 	}
 #else
 	memcpy(&aes_fastest_impl, &aes_generic_impl,
 	    sizeof (aes_fastest_impl));
 #endif
 
 	strlcpy(aes_fastest_impl.name, "fastest", AES_IMPL_NAME_MAX);
 
 	/* Finish initialization */
 	atomic_swap_32(&icp_aes_impl, user_sel_impl);
 	aes_impl_initialized = B_TRUE;
 }
 
 static const struct {
 	char *name;
 	uint32_t sel;
 } aes_impl_opts[] = {
 		{ "cycle",	IMPL_CYCLE },
 		{ "fastest",	IMPL_FASTEST },
 };
 
 /*
  * Function sets desired aes implementation.
  *
  * If we are called before init(), user preference will be saved in
  * user_sel_impl, and applied in later init() call. This occurs when module
  * parameter is specified on module load. Otherwise, directly update
  * icp_aes_impl.
  *
  * @val		Name of aes implementation to use
  * @param	Unused.
  */
 int
 aes_impl_set(const char *val)
 {
 	int err = -EINVAL;
 	char req_name[AES_IMPL_NAME_MAX];
 	uint32_t impl = AES_IMPL_READ(user_sel_impl);
 	size_t i;
 
 	/* sanitize input */
 	i = strnlen(val, AES_IMPL_NAME_MAX);
 	if (i == 0 || i >= AES_IMPL_NAME_MAX)
 		return (err);
 
 	strlcpy(req_name, val, AES_IMPL_NAME_MAX);
 	while (i > 0 && isspace(req_name[i-1]))
 		i--;
 	req_name[i] = '\0';
 
 	/* Check mandatory options */
 	for (i = 0; i < ARRAY_SIZE(aes_impl_opts); i++) {
 		if (strcmp(req_name, aes_impl_opts[i].name) == 0) {
 			impl = aes_impl_opts[i].sel;
 			err = 0;
 			break;
 		}
 	}
 
 	/* check all supported impl if init() was already called */
 	if (err != 0 && aes_impl_initialized) {
 		/* check all supported implementations */
 		for (i = 0; i < aes_supp_impl_cnt; i++) {
 			if (strcmp(req_name, aes_supp_impl[i]->name) == 0) {
 				impl = i;
 				err = 0;
 				break;
 			}
 		}
 	}
 
 	if (err == 0) {
 		if (aes_impl_initialized)
 			atomic_swap_32(&icp_aes_impl, impl);
 		else
 			atomic_swap_32(&user_sel_impl, impl);
 	}
 
 	return (err);
 }
 
 #if defined(_KERNEL) && defined(__linux__)
 
 static int
 icp_aes_impl_set(const char *val, zfs_kernel_param_t *kp)
 {
 	return (aes_impl_set(val));
 }
 
 static int
 icp_aes_impl_get(char *buffer, zfs_kernel_param_t *kp)
 {
 	int i, cnt = 0;
 	char *fmt;
 	const uint32_t impl = AES_IMPL_READ(icp_aes_impl);
 
 	ASSERT(aes_impl_initialized);
 
 	/* list mandatory options */
 	for (i = 0; i < ARRAY_SIZE(aes_impl_opts); i++) {
 		fmt = (impl == aes_impl_opts[i].sel) ? "[%s] " : "%s ";
 		cnt += sprintf(buffer + cnt, fmt, aes_impl_opts[i].name);
 	}
 
 	/* list all supported implementations */
 	for (i = 0; i < aes_supp_impl_cnt; i++) {
 		fmt = (i == impl) ? "[%s] " : "%s ";
 		cnt += sprintf(buffer + cnt, fmt, aes_supp_impl[i]->name);
 	}
 
 	return (cnt);
 }
 
 module_param_call(icp_aes_impl, icp_aes_impl_set, icp_aes_impl_get,
     NULL, 0644);
 MODULE_PARM_DESC(icp_aes_impl, "Select aes implementation.");
 #endif
diff --git a/module/icp/algs/modes/cbc.c b/module/icp/algs/modes/cbc.c
index 85864f56dead..bddb5b64ddd3 100644
--- a/module/icp/algs/modes/cbc.c
+++ b/module/icp/algs/modes/cbc.c
@@ -1,273 +1,271 @@
 /*
  * CDDL HEADER START
  *
  * The contents of this file are subject to the terms of the
  * Common Development and Distribution License (the "License").
  * You may not use this file except in compliance with the License.
  *
  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
  * or http://www.opensolaris.org/os/licensing.
  * See the License for the specific language governing permissions
  * and limitations under the License.
  *
  * When distributing Covered Code, include this CDDL HEADER in each
  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  * If applicable, add the following below this CDDL HEADER, with the
  * fields enclosed by brackets "[]" replaced with your own identifying
  * information: Portions Copyright [yyyy] [name of copyright owner]
  *
  * CDDL HEADER END
  */
 /*
  * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
  * Use is subject to license terms.
  */
 
 #include <sys/zfs_context.h>
 #include <modes/modes.h>
 #include <sys/crypto/common.h>
 #include <sys/crypto/impl.h>
 
 /*
  * Algorithm independent CBC functions.
  */
 int
 cbc_encrypt_contiguous_blocks(cbc_ctx_t *ctx, char *data, size_t length,
     crypto_data_t *out, size_t block_size,
     int (*encrypt)(const void *, const uint8_t *, uint8_t *),
     void (*copy_block)(uint8_t *, uint8_t *),
     void (*xor_block)(uint8_t *, uint8_t *))
 {
 	size_t remainder = length;
 	size_t need = 0;
 	uint8_t *datap = (uint8_t *)data;
 	uint8_t *blockp;
 	uint8_t *lastp;
 	void *iov_or_mp;
 	offset_t offset;
 	uint8_t *out_data_1;
 	uint8_t *out_data_2;
 	size_t out_data_1_len;
 
 	if (length + ctx->cbc_remainder_len < block_size) {
 		/* accumulate bytes here and return */
 		bcopy(datap,
 		    (uint8_t *)ctx->cbc_remainder + ctx->cbc_remainder_len,
 		    length);
 		ctx->cbc_remainder_len += length;
 		ctx->cbc_copy_to = datap;
 		return (CRYPTO_SUCCESS);
 	}
 
 	lastp = (uint8_t *)ctx->cbc_iv;
 	crypto_init_ptrs(out, &iov_or_mp, &offset);
 
 	do {
 		/* Unprocessed data from last call. */
 		if (ctx->cbc_remainder_len > 0) {
 			need = block_size - ctx->cbc_remainder_len;
 
 			if (need > remainder)
 				return (CRYPTO_DATA_LEN_RANGE);
 
 			bcopy(datap, &((uint8_t *)ctx->cbc_remainder)
 			    [ctx->cbc_remainder_len], need);
 
 			blockp = (uint8_t *)ctx->cbc_remainder;
 		} else {
 			blockp = datap;
 		}
 
 		/*
 		 * XOR the previous cipher block or IV with the
 		 * current clear block.
 		 */
 		xor_block(blockp, lastp);
 		encrypt(ctx->cbc_keysched, lastp, lastp);
 		crypto_get_ptrs(out, &iov_or_mp, &offset, &out_data_1,
 		    &out_data_1_len, &out_data_2, block_size);
 
 		/* copy block to where it belongs */
 		if (out_data_1_len == block_size) {
 			copy_block(lastp, out_data_1);
 		} else {
 			bcopy(lastp, out_data_1, out_data_1_len);
 			if (out_data_2 != NULL) {
 				bcopy(lastp + out_data_1_len,
 				    out_data_2,
 				    block_size - out_data_1_len);
 			}
 		}
 		/* update offset */
 		out->cd_offset += block_size;
 
 		/* Update pointer to next block of data to be processed. */
 		if (ctx->cbc_remainder_len != 0) {
 			datap += need;
 			ctx->cbc_remainder_len = 0;
 		} else {
 			datap += block_size;
 		}
 
 		remainder = (size_t)&data[length] - (size_t)datap;
 
 		/* Incomplete last block. */
 		if (remainder > 0 && remainder < block_size) {
 			bcopy(datap, ctx->cbc_remainder, remainder);
 			ctx->cbc_remainder_len = remainder;
 			ctx->cbc_copy_to = datap;
 			goto out;
 		}
 		ctx->cbc_copy_to = NULL;
 
 	} while (remainder > 0);
 
 out:
 	/*
 	 * Save the last encrypted block in the context.
 	 */
 	if (ctx->cbc_lastp != NULL) {
 		copy_block((uint8_t *)ctx->cbc_lastp, (uint8_t *)ctx->cbc_iv);
 		ctx->cbc_lastp = (uint8_t *)ctx->cbc_iv;
 	}
 
 	return (CRYPTO_SUCCESS);
 }
 
 #define	OTHER(a, ctx) \
 	(((a) == (ctx)->cbc_lastblock) ? (ctx)->cbc_iv : (ctx)->cbc_lastblock)
 
-/* ARGSUSED */
 int
 cbc_decrypt_contiguous_blocks(cbc_ctx_t *ctx, char *data, size_t length,
     crypto_data_t *out, size_t block_size,
     int (*decrypt)(const void *, const uint8_t *, uint8_t *),
     void (*copy_block)(uint8_t *, uint8_t *),
     void (*xor_block)(uint8_t *, uint8_t *))
 {
 	size_t remainder = length;
 	size_t need = 0;
 	uint8_t *datap = (uint8_t *)data;
 	uint8_t *blockp;
 	uint8_t *lastp;
 	void *iov_or_mp;
 	offset_t offset;
 	uint8_t *out_data_1;
 	uint8_t *out_data_2;
 	size_t out_data_1_len;
 
 	if (length + ctx->cbc_remainder_len < block_size) {
 		/* accumulate bytes here and return */
 		bcopy(datap,
 		    (uint8_t *)ctx->cbc_remainder + ctx->cbc_remainder_len,
 		    length);
 		ctx->cbc_remainder_len += length;
 		ctx->cbc_copy_to = datap;
 		return (CRYPTO_SUCCESS);
 	}
 
 	lastp = ctx->cbc_lastp;
 	crypto_init_ptrs(out, &iov_or_mp, &offset);
 
 	do {
 		/* Unprocessed data from last call. */
 		if (ctx->cbc_remainder_len > 0) {
 			need = block_size - ctx->cbc_remainder_len;
 
 			if (need > remainder)
 				return (CRYPTO_ENCRYPTED_DATA_LEN_RANGE);
 
 			bcopy(datap, &((uint8_t *)ctx->cbc_remainder)
 			    [ctx->cbc_remainder_len], need);
 
 			blockp = (uint8_t *)ctx->cbc_remainder;
 		} else {
 			blockp = datap;
 		}
 
 		/* LINTED: pointer alignment */
 		copy_block(blockp, (uint8_t *)OTHER((uint64_t *)lastp, ctx));
 
 		decrypt(ctx->cbc_keysched, blockp,
 		    (uint8_t *)ctx->cbc_remainder);
 		blockp = (uint8_t *)ctx->cbc_remainder;
 
 		/*
 		 * XOR the previous cipher block or IV with the
 		 * currently decrypted block.
 		 */
 		xor_block(lastp, blockp);
 
 		/* LINTED: pointer alignment */
 		lastp = (uint8_t *)OTHER((uint64_t *)lastp, ctx);
 
 		crypto_get_ptrs(out, &iov_or_mp, &offset, &out_data_1,
 		    &out_data_1_len, &out_data_2, block_size);
 
 		bcopy(blockp, out_data_1, out_data_1_len);
 		if (out_data_2 != NULL) {
 			bcopy(blockp + out_data_1_len, out_data_2,
 			    block_size - out_data_1_len);
 		}
 
 		/* update offset */
 		out->cd_offset += block_size;
 
 		/* Update pointer to next block of data to be processed. */
 		if (ctx->cbc_remainder_len != 0) {
 			datap += need;
 			ctx->cbc_remainder_len = 0;
 		} else {
 			datap += block_size;
 		}
 
 		remainder = (size_t)&data[length] - (size_t)datap;
 
 		/* Incomplete last block. */
 		if (remainder > 0 && remainder < block_size) {
 			bcopy(datap, ctx->cbc_remainder, remainder);
 			ctx->cbc_remainder_len = remainder;
 			ctx->cbc_lastp = lastp;
 			ctx->cbc_copy_to = datap;
 			return (CRYPTO_SUCCESS);
 		}
 		ctx->cbc_copy_to = NULL;
 
 	} while (remainder > 0);
 
 	ctx->cbc_lastp = lastp;
 	return (CRYPTO_SUCCESS);
 }
 
 int
 cbc_init_ctx(cbc_ctx_t *cbc_ctx, char *param, size_t param_len,
     size_t block_size, void (*copy_block)(uint8_t *, uint64_t *))
 {
 	/*
 	 * Copy IV into context.
 	 *
 	 * If cm_param == NULL then the IV comes from the
 	 * cd_miscdata field in the crypto_data structure.
 	 */
 	if (param != NULL) {
 		ASSERT(param_len == block_size);
 		copy_block((uchar_t *)param, cbc_ctx->cbc_iv);
 	}
 
 	cbc_ctx->cbc_lastp = (uint8_t *)&cbc_ctx->cbc_iv[0];
 	cbc_ctx->cbc_flags |= CBC_MODE;
 	return (CRYPTO_SUCCESS);
 }
 
-/* ARGSUSED */
 void *
 cbc_alloc_ctx(int kmflag)
 {
 	cbc_ctx_t *cbc_ctx;
 
 	if ((cbc_ctx = kmem_zalloc(sizeof (cbc_ctx_t), kmflag)) == NULL)
 		return (NULL);
 
 	cbc_ctx->cbc_flags = CBC_MODE;
 	return (cbc_ctx);
 }
diff --git a/module/icp/algs/modes/ccm.c b/module/icp/algs/modes/ccm.c
index 5d6507c49db1..a41cbc395fd6 100644
--- a/module/icp/algs/modes/ccm.c
+++ b/module/icp/algs/modes/ccm.c
@@ -1,907 +1,906 @@
 /*
  * CDDL HEADER START
  *
  * The contents of this file are subject to the terms of the
  * Common Development and Distribution License (the "License").
  * You may not use this file except in compliance with the License.
  *
  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
  * or http://www.opensolaris.org/os/licensing.
  * See the License for the specific language governing permissions
  * and limitations under the License.
  *
  * When distributing Covered Code, include this CDDL HEADER in each
  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  * If applicable, add the following below this CDDL HEADER, with the
  * fields enclosed by brackets "[]" replaced with your own identifying
  * information: Portions Copyright [yyyy] [name of copyright owner]
  *
  * CDDL HEADER END
  */
 /*
  * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
  * Use is subject to license terms.
  */
 
 #include <sys/zfs_context.h>
 #include <modes/modes.h>
 #include <sys/crypto/common.h>
 #include <sys/crypto/impl.h>
 
 #ifdef HAVE_EFFICIENT_UNALIGNED_ACCESS
 #include <sys/byteorder.h>
 #define	UNALIGNED_POINTERS_PERMITTED
 #endif
 
 /*
  * Encrypt multiple blocks of data in CCM mode.  Decrypt for CCM mode
  * is done in another function.
  */
 int
 ccm_mode_encrypt_contiguous_blocks(ccm_ctx_t *ctx, char *data, size_t length,
     crypto_data_t *out, size_t block_size,
     int (*encrypt_block)(const void *, const uint8_t *, uint8_t *),
     void (*copy_block)(uint8_t *, uint8_t *),
     void (*xor_block)(uint8_t *, uint8_t *))
 {
 	size_t remainder = length;
 	size_t need = 0;
 	uint8_t *datap = (uint8_t *)data;
 	uint8_t *blockp;
 	uint8_t *lastp;
 	void *iov_or_mp;
 	offset_t offset;
 	uint8_t *out_data_1;
 	uint8_t *out_data_2;
 	size_t out_data_1_len;
 	uint64_t counter;
 	uint8_t *mac_buf;
 
 	if (length + ctx->ccm_remainder_len < block_size) {
 		/* accumulate bytes here and return */
 		bcopy(datap,
 		    (uint8_t *)ctx->ccm_remainder + ctx->ccm_remainder_len,
 		    length);
 		ctx->ccm_remainder_len += length;
 		ctx->ccm_copy_to = datap;
 		return (CRYPTO_SUCCESS);
 	}
 
 	lastp = (uint8_t *)ctx->ccm_cb;
 	crypto_init_ptrs(out, &iov_or_mp, &offset);
 
 	mac_buf = (uint8_t *)ctx->ccm_mac_buf;
 
 	do {
 		/* Unprocessed data from last call. */
 		if (ctx->ccm_remainder_len > 0) {
 			need = block_size - ctx->ccm_remainder_len;
 
 			if (need > remainder)
 				return (CRYPTO_DATA_LEN_RANGE);
 
 			bcopy(datap, &((uint8_t *)ctx->ccm_remainder)
 			    [ctx->ccm_remainder_len], need);
 
 			blockp = (uint8_t *)ctx->ccm_remainder;
 		} else {
 			blockp = datap;
 		}
 
 		/*
 		 * do CBC MAC
 		 *
 		 * XOR the previous cipher block current clear block.
 		 * mac_buf always contain previous cipher block.
 		 */
 		xor_block(blockp, mac_buf);
 		encrypt_block(ctx->ccm_keysched, mac_buf, mac_buf);
 
 		/* ccm_cb is the counter block */
 		encrypt_block(ctx->ccm_keysched, (uint8_t *)ctx->ccm_cb,
 		    (uint8_t *)ctx->ccm_tmp);
 
 		lastp = (uint8_t *)ctx->ccm_tmp;
 
 		/*
 		 * Increment counter. Counter bits are confined
 		 * to the bottom 64 bits of the counter block.
 		 */
 #ifdef _ZFS_LITTLE_ENDIAN
 		counter = ntohll(ctx->ccm_cb[1] & ctx->ccm_counter_mask);
 		counter = htonll(counter + 1);
 #else
 		counter = ctx->ccm_cb[1] & ctx->ccm_counter_mask;
 		counter++;
 #endif	/* _ZFS_LITTLE_ENDIAN */
 		counter &= ctx->ccm_counter_mask;
 		ctx->ccm_cb[1] =
 		    (ctx->ccm_cb[1] & ~(ctx->ccm_counter_mask)) | counter;
 
 		/*
 		 * XOR encrypted counter block with the current clear block.
 		 */
 		xor_block(blockp, lastp);
 
 		ctx->ccm_processed_data_len += block_size;
 
 		crypto_get_ptrs(out, &iov_or_mp, &offset, &out_data_1,
 		    &out_data_1_len, &out_data_2, block_size);
 
 		/* copy block to where it belongs */
 		if (out_data_1_len == block_size) {
 			copy_block(lastp, out_data_1);
 		} else {
 			bcopy(lastp, out_data_1, out_data_1_len);
 			if (out_data_2 != NULL) {
 				bcopy(lastp + out_data_1_len,
 				    out_data_2,
 				    block_size - out_data_1_len);
 			}
 		}
 		/* update offset */
 		out->cd_offset += block_size;
 
 		/* Update pointer to next block of data to be processed. */
 		if (ctx->ccm_remainder_len != 0) {
 			datap += need;
 			ctx->ccm_remainder_len = 0;
 		} else {
 			datap += block_size;
 		}
 
 		remainder = (size_t)&data[length] - (size_t)datap;
 
 		/* Incomplete last block. */
 		if (remainder > 0 && remainder < block_size) {
 			bcopy(datap, ctx->ccm_remainder, remainder);
 			ctx->ccm_remainder_len = remainder;
 			ctx->ccm_copy_to = datap;
 			goto out;
 		}
 		ctx->ccm_copy_to = NULL;
 
 	} while (remainder > 0);
 
 out:
 	return (CRYPTO_SUCCESS);
 }
 
 void
 calculate_ccm_mac(ccm_ctx_t *ctx, uint8_t *ccm_mac,
     int (*encrypt_block)(const void *, const uint8_t *, uint8_t *))
 {
 	uint64_t counter;
 	uint8_t *counterp, *mac_buf;
 	int i;
 
 	mac_buf = (uint8_t *)ctx->ccm_mac_buf;
 
 	/* first counter block start with index 0 */
 	counter = 0;
 	ctx->ccm_cb[1] = (ctx->ccm_cb[1] & ~(ctx->ccm_counter_mask)) | counter;
 
 	counterp = (uint8_t *)ctx->ccm_tmp;
 	encrypt_block(ctx->ccm_keysched, (uint8_t *)ctx->ccm_cb, counterp);
 
 	/* calculate XOR of MAC with first counter block */
 	for (i = 0; i < ctx->ccm_mac_len; i++) {
 		ccm_mac[i] = mac_buf[i] ^ counterp[i];
 	}
 }
 
-/* ARGSUSED */
 int
 ccm_encrypt_final(ccm_ctx_t *ctx, crypto_data_t *out, size_t block_size,
     int (*encrypt_block)(const void *, const uint8_t *, uint8_t *),
     void (*xor_block)(uint8_t *, uint8_t *))
 {
 	uint8_t *lastp, *mac_buf, *ccm_mac_p, *macp = NULL;
 	void *iov_or_mp;
 	offset_t offset;
 	uint8_t *out_data_1;
 	uint8_t *out_data_2;
 	size_t out_data_1_len;
 	int i;
 
 	if (out->cd_length < (ctx->ccm_remainder_len + ctx->ccm_mac_len)) {
 		return (CRYPTO_DATA_LEN_RANGE);
 	}
 
 	/*
 	 * When we get here, the number of bytes of payload processed
 	 * plus whatever data remains, if any,
 	 * should be the same as the number of bytes that's being
 	 * passed in the argument during init time.
 	 */
 	if ((ctx->ccm_processed_data_len + ctx->ccm_remainder_len)
 	    != (ctx->ccm_data_len)) {
 		return (CRYPTO_DATA_LEN_RANGE);
 	}
 
 	mac_buf = (uint8_t *)ctx->ccm_mac_buf;
 
 	if (ctx->ccm_remainder_len > 0) {
 
 		/* ccm_mac_input_buf is not used for encryption */
 		macp = (uint8_t *)ctx->ccm_mac_input_buf;
 		bzero(macp, block_size);
 
 		/* copy remainder to temporary buffer */
 		bcopy(ctx->ccm_remainder, macp, ctx->ccm_remainder_len);
 
 		/* calculate the CBC MAC */
 		xor_block(macp, mac_buf);
 		encrypt_block(ctx->ccm_keysched, mac_buf, mac_buf);
 
 		/* calculate the counter mode */
 		lastp = (uint8_t *)ctx->ccm_tmp;
 		encrypt_block(ctx->ccm_keysched, (uint8_t *)ctx->ccm_cb, lastp);
 
 		/* XOR with counter block */
 		for (i = 0; i < ctx->ccm_remainder_len; i++) {
 			macp[i] ^= lastp[i];
 		}
 		ctx->ccm_processed_data_len += ctx->ccm_remainder_len;
 	}
 
 	/* Calculate the CCM MAC */
 	ccm_mac_p = (uint8_t *)ctx->ccm_tmp;
 	calculate_ccm_mac(ctx, ccm_mac_p, encrypt_block);
 
 	crypto_init_ptrs(out, &iov_or_mp, &offset);
 	crypto_get_ptrs(out, &iov_or_mp, &offset, &out_data_1,
 	    &out_data_1_len, &out_data_2,
 	    ctx->ccm_remainder_len + ctx->ccm_mac_len);
 
 	if (ctx->ccm_remainder_len > 0) {
 
 		/* copy temporary block to where it belongs */
 		if (out_data_2 == NULL) {
 			/* everything will fit in out_data_1 */
 			bcopy(macp, out_data_1, ctx->ccm_remainder_len);
 			bcopy(ccm_mac_p, out_data_1 + ctx->ccm_remainder_len,
 			    ctx->ccm_mac_len);
 		} else {
 
 			if (out_data_1_len < ctx->ccm_remainder_len) {
 
 				size_t data_2_len_used;
 
 				bcopy(macp, out_data_1, out_data_1_len);
 
 				data_2_len_used = ctx->ccm_remainder_len
 				    - out_data_1_len;
 
 				bcopy((uint8_t *)macp + out_data_1_len,
 				    out_data_2, data_2_len_used);
 				bcopy(ccm_mac_p, out_data_2 + data_2_len_used,
 				    ctx->ccm_mac_len);
 			} else {
 				bcopy(macp, out_data_1, out_data_1_len);
 				if (out_data_1_len == ctx->ccm_remainder_len) {
 					/* mac will be in out_data_2 */
 					bcopy(ccm_mac_p, out_data_2,
 					    ctx->ccm_mac_len);
 				} else {
 					size_t len_not_used = out_data_1_len -
 					    ctx->ccm_remainder_len;
 					/*
 					 * part of mac in will be in
 					 * out_data_1, part of the mac will be
 					 * in out_data_2
 					 */
 					bcopy(ccm_mac_p,
 					    out_data_1 + ctx->ccm_remainder_len,
 					    len_not_used);
 					bcopy(ccm_mac_p + len_not_used,
 					    out_data_2,
 					    ctx->ccm_mac_len - len_not_used);
 
 				}
 			}
 		}
 	} else {
 		/* copy block to where it belongs */
 		bcopy(ccm_mac_p, out_data_1, out_data_1_len);
 		if (out_data_2 != NULL) {
 			bcopy(ccm_mac_p + out_data_1_len, out_data_2,
 			    block_size - out_data_1_len);
 		}
 	}
 	out->cd_offset += ctx->ccm_remainder_len + ctx->ccm_mac_len;
 	ctx->ccm_remainder_len = 0;
 	return (CRYPTO_SUCCESS);
 }
 
 /*
  * This will only deal with decrypting the last block of the input that
  * might not be a multiple of block length.
  */
 static void
 ccm_decrypt_incomplete_block(ccm_ctx_t *ctx,
     int (*encrypt_block)(const void *, const uint8_t *, uint8_t *))
 {
 	uint8_t *datap, *outp, *counterp;
 	int i;
 
 	datap = (uint8_t *)ctx->ccm_remainder;
 	outp = &((ctx->ccm_pt_buf)[ctx->ccm_processed_data_len]);
 
 	counterp = (uint8_t *)ctx->ccm_tmp;
 	encrypt_block(ctx->ccm_keysched, (uint8_t *)ctx->ccm_cb, counterp);
 
 	/* XOR with counter block */
 	for (i = 0; i < ctx->ccm_remainder_len; i++) {
 		outp[i] = datap[i] ^ counterp[i];
 	}
 }
 
 /*
  * This will decrypt the cipher text.  However, the plaintext won't be
  * returned to the caller.  It will be returned when decrypt_final() is
  * called if the MAC matches
  */
-/* ARGSUSED */
 int
 ccm_mode_decrypt_contiguous_blocks(ccm_ctx_t *ctx, char *data, size_t length,
     crypto_data_t *out, size_t block_size,
     int (*encrypt_block)(const void *, const uint8_t *, uint8_t *),
     void (*copy_block)(uint8_t *, uint8_t *),
     void (*xor_block)(uint8_t *, uint8_t *))
 {
+	(void) out;
 	size_t remainder = length;
 	size_t need = 0;
 	uint8_t *datap = (uint8_t *)data;
 	uint8_t *blockp;
 	uint8_t *cbp;
 	uint64_t counter;
 	size_t pt_len, total_decrypted_len, mac_len, pm_len, pd_len;
 	uint8_t *resultp;
 
 
 	pm_len = ctx->ccm_processed_mac_len;
 
 	if (pm_len > 0) {
 		uint8_t *tmp;
 		/*
 		 * all ciphertext has been processed, just waiting for
 		 * part of the value of the mac
 		 */
 		if ((pm_len + length) > ctx->ccm_mac_len) {
 			return (CRYPTO_ENCRYPTED_DATA_LEN_RANGE);
 		}
 		tmp = (uint8_t *)ctx->ccm_mac_input_buf;
 
 		bcopy(datap, tmp + pm_len, length);
 
 		ctx->ccm_processed_mac_len += length;
 		return (CRYPTO_SUCCESS);
 	}
 
 	/*
 	 * If we decrypt the given data, what total amount of data would
 	 * have been decrypted?
 	 */
 	pd_len = ctx->ccm_processed_data_len;
 	total_decrypted_len = pd_len + length + ctx->ccm_remainder_len;
 
 	if (total_decrypted_len >
 	    (ctx->ccm_data_len + ctx->ccm_mac_len)) {
 		return (CRYPTO_ENCRYPTED_DATA_LEN_RANGE);
 	}
 
 	pt_len = ctx->ccm_data_len;
 
 	if (total_decrypted_len > pt_len) {
 		/*
 		 * part of the input will be the MAC, need to isolate that
 		 * to be dealt with later.  The left-over data in
 		 * ccm_remainder_len from last time will not be part of the
 		 * MAC.  Otherwise, it would have already been taken out
 		 * when this call is made last time.
 		 */
 		size_t pt_part = pt_len - pd_len - ctx->ccm_remainder_len;
 
 		mac_len = length - pt_part;
 
 		ctx->ccm_processed_mac_len = mac_len;
 		bcopy(data + pt_part, ctx->ccm_mac_input_buf, mac_len);
 
 		if (pt_part + ctx->ccm_remainder_len < block_size) {
 			/*
 			 * since this is last of the ciphertext, will
 			 * just decrypt with it here
 			 */
 			bcopy(datap, &((uint8_t *)ctx->ccm_remainder)
 			    [ctx->ccm_remainder_len], pt_part);
 			ctx->ccm_remainder_len += pt_part;
 			ccm_decrypt_incomplete_block(ctx, encrypt_block);
 			ctx->ccm_processed_data_len += ctx->ccm_remainder_len;
 			ctx->ccm_remainder_len = 0;
 			return (CRYPTO_SUCCESS);
 		} else {
 			/* let rest of the code handle this */
 			length = pt_part;
 		}
 	} else if (length + ctx->ccm_remainder_len < block_size) {
 			/* accumulate bytes here and return */
 		bcopy(datap,
 		    (uint8_t *)ctx->ccm_remainder + ctx->ccm_remainder_len,
 		    length);
 		ctx->ccm_remainder_len += length;
 		ctx->ccm_copy_to = datap;
 		return (CRYPTO_SUCCESS);
 	}
 
 	do {
 		/* Unprocessed data from last call. */
 		if (ctx->ccm_remainder_len > 0) {
 			need = block_size - ctx->ccm_remainder_len;
 
 			if (need > remainder)
 				return (CRYPTO_ENCRYPTED_DATA_LEN_RANGE);
 
 			bcopy(datap, &((uint8_t *)ctx->ccm_remainder)
 			    [ctx->ccm_remainder_len], need);
 
 			blockp = (uint8_t *)ctx->ccm_remainder;
 		} else {
 			blockp = datap;
 		}
 
 		/* Calculate the counter mode, ccm_cb is the counter block */
 		cbp = (uint8_t *)ctx->ccm_tmp;
 		encrypt_block(ctx->ccm_keysched, (uint8_t *)ctx->ccm_cb, cbp);
 
 		/*
 		 * Increment counter.
 		 * Counter bits are confined to the bottom 64 bits
 		 */
 #ifdef _ZFS_LITTLE_ENDIAN
 		counter = ntohll(ctx->ccm_cb[1] & ctx->ccm_counter_mask);
 		counter = htonll(counter + 1);
 #else
 		counter = ctx->ccm_cb[1] & ctx->ccm_counter_mask;
 		counter++;
 #endif	/* _ZFS_LITTLE_ENDIAN */
 		counter &= ctx->ccm_counter_mask;
 		ctx->ccm_cb[1] =
 		    (ctx->ccm_cb[1] & ~(ctx->ccm_counter_mask)) | counter;
 
 		/* XOR with the ciphertext */
 		xor_block(blockp, cbp);
 
 		/* Copy the plaintext to the "holding buffer" */
 		resultp = (uint8_t *)ctx->ccm_pt_buf +
 		    ctx->ccm_processed_data_len;
 		copy_block(cbp, resultp);
 
 		ctx->ccm_processed_data_len += block_size;
 
 		ctx->ccm_lastp = blockp;
 
 		/* Update pointer to next block of data to be processed. */
 		if (ctx->ccm_remainder_len != 0) {
 			datap += need;
 			ctx->ccm_remainder_len = 0;
 		} else {
 			datap += block_size;
 		}
 
 		remainder = (size_t)&data[length] - (size_t)datap;
 
 		/* Incomplete last block */
 		if (remainder > 0 && remainder < block_size) {
 			bcopy(datap, ctx->ccm_remainder, remainder);
 			ctx->ccm_remainder_len = remainder;
 			ctx->ccm_copy_to = datap;
 			if (ctx->ccm_processed_mac_len > 0) {
 				/*
 				 * not expecting anymore ciphertext, just
 				 * compute plaintext for the remaining input
 				 */
 				ccm_decrypt_incomplete_block(ctx,
 				    encrypt_block);
 				ctx->ccm_processed_data_len += remainder;
 				ctx->ccm_remainder_len = 0;
 			}
 			goto out;
 		}
 		ctx->ccm_copy_to = NULL;
 
 	} while (remainder > 0);
 
 out:
 	return (CRYPTO_SUCCESS);
 }
 
 int
 ccm_decrypt_final(ccm_ctx_t *ctx, crypto_data_t *out, size_t block_size,
     int (*encrypt_block)(const void *, const uint8_t *, uint8_t *),
     void (*copy_block)(uint8_t *, uint8_t *),
     void (*xor_block)(uint8_t *, uint8_t *))
 {
 	size_t mac_remain, pt_len;
 	uint8_t *pt, *mac_buf, *macp, *ccm_mac_p;
 	int rv;
 
 	pt_len = ctx->ccm_data_len;
 
 	/* Make sure output buffer can fit all of the plaintext */
 	if (out->cd_length < pt_len) {
 		return (CRYPTO_DATA_LEN_RANGE);
 	}
 
 	pt = ctx->ccm_pt_buf;
 	mac_remain = ctx->ccm_processed_data_len;
 	mac_buf = (uint8_t *)ctx->ccm_mac_buf;
 
 	macp = (uint8_t *)ctx->ccm_tmp;
 
 	while (mac_remain > 0) {
 
 		if (mac_remain < block_size) {
 			bzero(macp, block_size);
 			bcopy(pt, macp, mac_remain);
 			mac_remain = 0;
 		} else {
 			copy_block(pt, macp);
 			mac_remain -= block_size;
 			pt += block_size;
 		}
 
 		/* calculate the CBC MAC */
 		xor_block(macp, mac_buf);
 		encrypt_block(ctx->ccm_keysched, mac_buf, mac_buf);
 	}
 
 	/* Calculate the CCM MAC */
 	ccm_mac_p = (uint8_t *)ctx->ccm_tmp;
 	calculate_ccm_mac((ccm_ctx_t *)ctx, ccm_mac_p, encrypt_block);
 
 	/* compare the input CCM MAC value with what we calculated */
 	if (bcmp(ctx->ccm_mac_input_buf, ccm_mac_p, ctx->ccm_mac_len)) {
 		/* They don't match */
 		return (CRYPTO_INVALID_MAC);
 	} else {
 		rv = crypto_put_output_data(ctx->ccm_pt_buf, out, pt_len);
 		if (rv != CRYPTO_SUCCESS)
 			return (rv);
 		out->cd_offset += pt_len;
 	}
 	return (CRYPTO_SUCCESS);
 }
 
 static int
 ccm_validate_args(CK_AES_CCM_PARAMS *ccm_param, boolean_t is_encrypt_init)
 {
 	size_t macSize, nonceSize;
 	uint8_t q;
 	uint64_t maxValue;
 
 	/*
 	 * Check the length of the MAC.  The only valid
 	 * lengths for the MAC are: 4, 6, 8, 10, 12, 14, 16
 	 */
 	macSize = ccm_param->ulMACSize;
 	if ((macSize < 4) || (macSize > 16) || ((macSize % 2) != 0)) {
 		return (CRYPTO_MECHANISM_PARAM_INVALID);
 	}
 
 	/* Check the nonce length.  Valid values are 7, 8, 9, 10, 11, 12, 13 */
 	nonceSize = ccm_param->ulNonceSize;
 	if ((nonceSize < 7) || (nonceSize > 13)) {
 		return (CRYPTO_MECHANISM_PARAM_INVALID);
 	}
 
 	/* q is the length of the field storing the length, in bytes */
 	q = (uint8_t)((15 - nonceSize) & 0xFF);
 
 
 	/*
 	 * If it is decrypt, need to make sure size of ciphertext is at least
 	 * bigger than MAC len
 	 */
 	if ((!is_encrypt_init) && (ccm_param->ulDataSize < macSize)) {
 		return (CRYPTO_MECHANISM_PARAM_INVALID);
 	}
 
 	/*
 	 * Check to make sure the length of the payload is within the
 	 * range of values allowed by q
 	 */
 	if (q < 8) {
 		maxValue = (1ULL << (q * 8)) - 1;
 	} else {
 		maxValue = ULONG_MAX;
 	}
 
 	if (ccm_param->ulDataSize > maxValue) {
 		return (CRYPTO_MECHANISM_PARAM_INVALID);
 	}
 	return (CRYPTO_SUCCESS);
 }
 
 /*
  * Format the first block used in CBC-MAC (B0) and the initial counter
  * block based on formatting functions and counter generation functions
  * specified in RFC 3610 and NIST publication 800-38C, appendix A
  *
  * b0 is the first block used in CBC-MAC
  * cb0 is the first counter block
  *
  * It's assumed that the arguments b0 and cb0 are preallocated AES blocks
  *
  */
 static void
 ccm_format_initial_blocks(uchar_t *nonce, ulong_t nonceSize,
     ulong_t authDataSize, uint8_t *b0, ccm_ctx_t *aes_ctx)
 {
 	uint64_t payloadSize;
 	uint8_t t, q, have_adata = 0;
 	size_t limit;
 	int i, j, k;
 	uint64_t mask = 0;
 	uint8_t *cb;
 
 	q = (uint8_t)((15 - nonceSize) & 0xFF);
 	t = (uint8_t)((aes_ctx->ccm_mac_len) & 0xFF);
 
 	/* Construct the first octet of b0 */
 	if (authDataSize > 0) {
 		have_adata = 1;
 	}
 	b0[0] = (have_adata << 6) | (((t - 2)  / 2) << 3) | (q - 1);
 
 	/* copy the nonce value into b0 */
 	bcopy(nonce, &(b0[1]), nonceSize);
 
 	/* store the length of the payload into b0 */
 	bzero(&(b0[1+nonceSize]), q);
 
 	payloadSize = aes_ctx->ccm_data_len;
 	limit = 8 < q ? 8 : q;
 
 	for (i = 0, j = 0, k = 15; i < limit; i++, j += 8, k--) {
 		b0[k] = (uint8_t)((payloadSize >> j) & 0xFF);
 	}
 
 	/* format the counter block */
 
 	cb = (uint8_t *)aes_ctx->ccm_cb;
 
 	cb[0] = 0x07 & (q-1); /* first byte */
 
 	/* copy the nonce value into the counter block */
 	bcopy(nonce, &(cb[1]), nonceSize);
 
 	bzero(&(cb[1+nonceSize]), q);
 
 	/* Create the mask for the counter field based on the size of nonce */
 	q <<= 3;
 	while (q-- > 0) {
 		mask |= (1ULL << q);
 	}
 
 #ifdef _ZFS_LITTLE_ENDIAN
 	mask = htonll(mask);
 #endif
 	aes_ctx->ccm_counter_mask = mask;
 
 	/*
 	 * During calculation, we start using counter block 1, we will
 	 * set it up right here.
 	 * We can just set the last byte to have the value 1, because
 	 * even with the biggest nonce of 13, the last byte of the
 	 * counter block will be used for the counter value.
 	 */
 	cb[15] = 0x01;
 }
 
 /*
  * Encode the length of the associated data as
  * specified in RFC 3610 and NIST publication 800-38C, appendix A
  */
 static void
 encode_adata_len(ulong_t auth_data_len, uint8_t *encoded, size_t *encoded_len)
 {
 #ifdef UNALIGNED_POINTERS_PERMITTED
 	uint32_t	*lencoded_ptr;
 #ifdef _LP64
 	uint64_t	*llencoded_ptr;
 #endif
 #endif	/* UNALIGNED_POINTERS_PERMITTED */
 
 	if (auth_data_len < ((1ULL<<16) - (1ULL<<8))) {
 		/* 0 < a < (2^16-2^8) */
 		*encoded_len = 2;
 		encoded[0] = (auth_data_len & 0xff00) >> 8;
 		encoded[1] = auth_data_len & 0xff;
 
 	} else if ((auth_data_len >= ((1ULL<<16) - (1ULL<<8))) &&
 	    (auth_data_len < (1ULL << 31))) {
 		/* (2^16-2^8) <= a < 2^32 */
 		*encoded_len = 6;
 		encoded[0] = 0xff;
 		encoded[1] = 0xfe;
 #ifdef UNALIGNED_POINTERS_PERMITTED
 		lencoded_ptr = (uint32_t *)&encoded[2];
 		*lencoded_ptr = htonl(auth_data_len);
 #else
 		encoded[2] = (auth_data_len & 0xff000000) >> 24;
 		encoded[3] = (auth_data_len & 0xff0000) >> 16;
 		encoded[4] = (auth_data_len & 0xff00) >> 8;
 		encoded[5] = auth_data_len & 0xff;
 #endif	/* UNALIGNED_POINTERS_PERMITTED */
 
 #ifdef _LP64
 	} else {
 		/* 2^32 <= a < 2^64 */
 		*encoded_len = 10;
 		encoded[0] = 0xff;
 		encoded[1] = 0xff;
 #ifdef UNALIGNED_POINTERS_PERMITTED
 		llencoded_ptr = (uint64_t *)&encoded[2];
 		*llencoded_ptr = htonl(auth_data_len);
 #else
 		encoded[2] = (auth_data_len & 0xff00000000000000) >> 56;
 		encoded[3] = (auth_data_len & 0xff000000000000) >> 48;
 		encoded[4] = (auth_data_len & 0xff0000000000) >> 40;
 		encoded[5] = (auth_data_len & 0xff00000000) >> 32;
 		encoded[6] = (auth_data_len & 0xff000000) >> 24;
 		encoded[7] = (auth_data_len & 0xff0000) >> 16;
 		encoded[8] = (auth_data_len & 0xff00) >> 8;
 		encoded[9] = auth_data_len & 0xff;
 #endif	/* UNALIGNED_POINTERS_PERMITTED */
 #endif	/* _LP64 */
 	}
 }
 
 static int
 ccm_init(ccm_ctx_t *ctx, unsigned char *nonce, size_t nonce_len,
     unsigned char *auth_data, size_t auth_data_len, size_t block_size,
     int (*encrypt_block)(const void *, const uint8_t *, uint8_t *),
     void (*xor_block)(uint8_t *, uint8_t *))
 {
 	uint8_t *mac_buf, *datap, *ivp, *authp;
 	size_t remainder, processed;
 	uint8_t encoded_a[10]; /* max encoded auth data length is 10 octets */
 	size_t encoded_a_len = 0;
 
 	mac_buf = (uint8_t *)&(ctx->ccm_mac_buf);
 
 	/*
 	 * Format the 1st block for CBC-MAC and construct the
 	 * 1st counter block.
 	 *
 	 * aes_ctx->ccm_iv is used for storing the counter block
 	 * mac_buf will store b0 at this time.
 	 */
 	ccm_format_initial_blocks(nonce, nonce_len,
 	    auth_data_len, mac_buf, ctx);
 
 	/* The IV for CBC MAC for AES CCM mode is always zero */
 	ivp = (uint8_t *)ctx->ccm_tmp;
 	bzero(ivp, block_size);
 
 	xor_block(ivp, mac_buf);
 
 	/* encrypt the nonce */
 	encrypt_block(ctx->ccm_keysched, mac_buf, mac_buf);
 
 	/* take care of the associated data, if any */
 	if (auth_data_len == 0) {
 		return (CRYPTO_SUCCESS);
 	}
 
 	encode_adata_len(auth_data_len, encoded_a, &encoded_a_len);
 
 	remainder = auth_data_len;
 
 	/* 1st block: it contains encoded associated data, and some data */
 	authp = (uint8_t *)ctx->ccm_tmp;
 	bzero(authp, block_size);
 	bcopy(encoded_a, authp, encoded_a_len);
 	processed = block_size - encoded_a_len;
 	if (processed > auth_data_len) {
 		/* in case auth_data is very small */
 		processed = auth_data_len;
 	}
 	bcopy(auth_data, authp+encoded_a_len, processed);
 	/* xor with previous buffer */
 	xor_block(authp, mac_buf);
 	encrypt_block(ctx->ccm_keysched, mac_buf, mac_buf);
 	remainder -= processed;
 	if (remainder == 0) {
 		/* a small amount of associated data, it's all done now */
 		return (CRYPTO_SUCCESS);
 	}
 
 	do {
 		if (remainder < block_size) {
 			/*
 			 * There's not a block full of data, pad rest of
 			 * buffer with zero
 			 */
 			bzero(authp, block_size);
 			bcopy(&(auth_data[processed]), authp, remainder);
 			datap = (uint8_t *)authp;
 			remainder = 0;
 		} else {
 			datap = (uint8_t *)(&(auth_data[processed]));
 			processed += block_size;
 			remainder -= block_size;
 		}
 
 		xor_block(datap, mac_buf);
 		encrypt_block(ctx->ccm_keysched, mac_buf, mac_buf);
 
 	} while (remainder > 0);
 
 	return (CRYPTO_SUCCESS);
 }
 
 /*
  * The following function should be call at encrypt or decrypt init time
  * for AES CCM mode.
  */
 int
 ccm_init_ctx(ccm_ctx_t *ccm_ctx, char *param, int kmflag,
     boolean_t is_encrypt_init, size_t block_size,
     int (*encrypt_block)(const void *, const uint8_t *, uint8_t *),
     void (*xor_block)(uint8_t *, uint8_t *))
 {
 	int rv;
 	CK_AES_CCM_PARAMS *ccm_param;
 
 	if (param != NULL) {
 		ccm_param = (CK_AES_CCM_PARAMS *)param;
 
 		if ((rv = ccm_validate_args(ccm_param,
 		    is_encrypt_init)) != 0) {
 			return (rv);
 		}
 
 		ccm_ctx->ccm_mac_len = ccm_param->ulMACSize;
 		if (is_encrypt_init) {
 			ccm_ctx->ccm_data_len = ccm_param->ulDataSize;
 		} else {
 			ccm_ctx->ccm_data_len =
 			    ccm_param->ulDataSize - ccm_ctx->ccm_mac_len;
 			ccm_ctx->ccm_processed_mac_len = 0;
 		}
 		ccm_ctx->ccm_processed_data_len = 0;
 
 		ccm_ctx->ccm_flags |= CCM_MODE;
 	} else {
 		return (CRYPTO_MECHANISM_PARAM_INVALID);
 	}
 
 	if (ccm_init(ccm_ctx, ccm_param->nonce, ccm_param->ulNonceSize,
 	    ccm_param->authData, ccm_param->ulAuthDataSize, block_size,
 	    encrypt_block, xor_block) != 0) {
 		return (CRYPTO_MECHANISM_PARAM_INVALID);
 	}
 	if (!is_encrypt_init) {
 		/* allocate buffer for storing decrypted plaintext */
 		ccm_ctx->ccm_pt_buf = vmem_alloc(ccm_ctx->ccm_data_len,
 		    kmflag);
 		if (ccm_ctx->ccm_pt_buf == NULL) {
 			rv = CRYPTO_HOST_MEMORY;
 		}
 	}
 	return (rv);
 }
 
 void *
 ccm_alloc_ctx(int kmflag)
 {
 	ccm_ctx_t *ccm_ctx;
 
 	if ((ccm_ctx = kmem_zalloc(sizeof (ccm_ctx_t), kmflag)) == NULL)
 		return (NULL);
 
 	ccm_ctx->ccm_flags = CCM_MODE;
 	return (ccm_ctx);
 }
diff --git a/module/icp/algs/modes/ctr.c b/module/icp/algs/modes/ctr.c
index 0188bdd395ff..82295cda877e 100644
--- a/module/icp/algs/modes/ctr.c
+++ b/module/icp/algs/modes/ctr.c
@@ -1,228 +1,227 @@
 /*
  * CDDL HEADER START
  *
  * The contents of this file are subject to the terms of the
  * Common Development and Distribution License (the "License").
  * You may not use this file except in compliance with the License.
  *
  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
  * or http://www.opensolaris.org/os/licensing.
  * See the License for the specific language governing permissions
  * and limitations under the License.
  *
  * When distributing Covered Code, include this CDDL HEADER in each
  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  * If applicable, add the following below this CDDL HEADER, with the
  * fields enclosed by brackets "[]" replaced with your own identifying
  * information: Portions Copyright [yyyy] [name of copyright owner]
  *
  * CDDL HEADER END
  */
 /*
  * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
  * Use is subject to license terms.
  */
 
 #include <sys/zfs_context.h>
 #include <modes/modes.h>
 #include <sys/crypto/common.h>
 #include <sys/crypto/impl.h>
 #include <sys/byteorder.h>
 
 /*
  * Encrypt and decrypt multiple blocks of data in counter mode.
  */
 int
 ctr_mode_contiguous_blocks(ctr_ctx_t *ctx, char *data, size_t length,
     crypto_data_t *out, size_t block_size,
     int (*cipher)(const void *ks, const uint8_t *pt, uint8_t *ct),
     void (*xor_block)(uint8_t *, uint8_t *))
 {
 	size_t remainder = length;
 	size_t need = 0;
 	uint8_t *datap = (uint8_t *)data;
 	uint8_t *blockp;
 	uint8_t *lastp;
 	void *iov_or_mp;
 	offset_t offset;
 	uint8_t *out_data_1;
 	uint8_t *out_data_2;
 	size_t out_data_1_len;
 	uint64_t lower_counter, upper_counter;
 
 	if (length + ctx->ctr_remainder_len < block_size) {
 		/* accumulate bytes here and return */
 		bcopy(datap,
 		    (uint8_t *)ctx->ctr_remainder + ctx->ctr_remainder_len,
 		    length);
 		ctx->ctr_remainder_len += length;
 		ctx->ctr_copy_to = datap;
 		return (CRYPTO_SUCCESS);
 	}
 
 	lastp = (uint8_t *)ctx->ctr_cb;
 	crypto_init_ptrs(out, &iov_or_mp, &offset);
 
 	do {
 		/* Unprocessed data from last call. */
 		if (ctx->ctr_remainder_len > 0) {
 			need = block_size - ctx->ctr_remainder_len;
 
 			if (need > remainder)
 				return (CRYPTO_DATA_LEN_RANGE);
 
 			bcopy(datap, &((uint8_t *)ctx->ctr_remainder)
 			    [ctx->ctr_remainder_len], need);
 
 			blockp = (uint8_t *)ctx->ctr_remainder;
 		} else {
 			blockp = datap;
 		}
 
 		/* ctr_cb is the counter block */
 		cipher(ctx->ctr_keysched, (uint8_t *)ctx->ctr_cb,
 		    (uint8_t *)ctx->ctr_tmp);
 
 		lastp = (uint8_t *)ctx->ctr_tmp;
 
 		/*
 		 * Increment Counter.
 		 */
 		lower_counter = ntohll(ctx->ctr_cb[1] & ctx->ctr_lower_mask);
 		lower_counter = htonll(lower_counter + 1);
 		lower_counter &= ctx->ctr_lower_mask;
 		ctx->ctr_cb[1] = (ctx->ctr_cb[1] & ~(ctx->ctr_lower_mask)) |
 		    lower_counter;
 
 		/* wrap around */
 		if (lower_counter == 0) {
 			upper_counter =
 			    ntohll(ctx->ctr_cb[0] & ctx->ctr_upper_mask);
 			upper_counter = htonll(upper_counter + 1);
 			upper_counter &= ctx->ctr_upper_mask;
 			ctx->ctr_cb[0] =
 			    (ctx->ctr_cb[0] & ~(ctx->ctr_upper_mask)) |
 			    upper_counter;
 		}
 
 		/*
 		 * XOR encrypted counter block with the current clear block.
 		 */
 		xor_block(blockp, lastp);
 
 		crypto_get_ptrs(out, &iov_or_mp, &offset, &out_data_1,
 		    &out_data_1_len, &out_data_2, block_size);
 
 		/* copy block to where it belongs */
 		bcopy(lastp, out_data_1, out_data_1_len);
 		if (out_data_2 != NULL) {
 			bcopy(lastp + out_data_1_len, out_data_2,
 			    block_size - out_data_1_len);
 		}
 		/* update offset */
 		out->cd_offset += block_size;
 
 		/* Update pointer to next block of data to be processed. */
 		if (ctx->ctr_remainder_len != 0) {
 			datap += need;
 			ctx->ctr_remainder_len = 0;
 		} else {
 			datap += block_size;
 		}
 
 		remainder = (size_t)&data[length] - (size_t)datap;
 
 		/* Incomplete last block. */
 		if (remainder > 0 && remainder < block_size) {
 			bcopy(datap, ctx->ctr_remainder, remainder);
 			ctx->ctr_remainder_len = remainder;
 			ctx->ctr_copy_to = datap;
 			goto out;
 		}
 		ctx->ctr_copy_to = NULL;
 
 	} while (remainder > 0);
 
 out:
 	return (CRYPTO_SUCCESS);
 }
 
 int
 ctr_mode_final(ctr_ctx_t *ctx, crypto_data_t *out,
     int (*encrypt_block)(const void *, const uint8_t *, uint8_t *))
 {
 	uint8_t *lastp;
 	void *iov_or_mp;
 	offset_t offset;
 	uint8_t *out_data_1;
 	uint8_t *out_data_2;
 	size_t out_data_1_len;
 	uint8_t *p;
 	int i;
 
 	if (out->cd_length < ctx->ctr_remainder_len)
 		return (CRYPTO_DATA_LEN_RANGE);
 
 	encrypt_block(ctx->ctr_keysched, (uint8_t *)ctx->ctr_cb,
 	    (uint8_t *)ctx->ctr_tmp);
 
 	lastp = (uint8_t *)ctx->ctr_tmp;
 	p = (uint8_t *)ctx->ctr_remainder;
 	for (i = 0; i < ctx->ctr_remainder_len; i++) {
 		p[i] ^= lastp[i];
 	}
 
 	crypto_init_ptrs(out, &iov_or_mp, &offset);
 	crypto_get_ptrs(out, &iov_or_mp, &offset, &out_data_1,
 	    &out_data_1_len, &out_data_2, ctx->ctr_remainder_len);
 
 	bcopy(p, out_data_1, out_data_1_len);
 	if (out_data_2 != NULL) {
 		bcopy((uint8_t *)p + out_data_1_len,
 		    out_data_2, ctx->ctr_remainder_len - out_data_1_len);
 	}
 	out->cd_offset += ctx->ctr_remainder_len;
 	ctx->ctr_remainder_len = 0;
 	return (CRYPTO_SUCCESS);
 }
 
 int
 ctr_init_ctx(ctr_ctx_t *ctr_ctx, ulong_t count, uint8_t *cb,
     void (*copy_block)(uint8_t *, uint8_t *))
 {
 	uint64_t upper_mask = 0;
 	uint64_t lower_mask = 0;
 
 	if (count == 0 || count > 128) {
 		return (CRYPTO_MECHANISM_PARAM_INVALID);
 	}
 	/* upper 64 bits of the mask */
 	if (count >= 64) {
 		count -= 64;
 		upper_mask = (count == 64) ? UINT64_MAX : (1ULL << count) - 1;
 		lower_mask = UINT64_MAX;
 	} else {
 		/* now the lower 63 bits */
 		lower_mask = (1ULL << count) - 1;
 	}
 	ctr_ctx->ctr_lower_mask = htonll(lower_mask);
 	ctr_ctx->ctr_upper_mask = htonll(upper_mask);
 
 	copy_block(cb, (uchar_t *)ctr_ctx->ctr_cb);
 	ctr_ctx->ctr_lastp = (uint8_t *)&ctr_ctx->ctr_cb[0];
 	ctr_ctx->ctr_flags |= CTR_MODE;
 	return (CRYPTO_SUCCESS);
 }
 
-/* ARGSUSED */
 void *
 ctr_alloc_ctx(int kmflag)
 {
 	ctr_ctx_t *ctr_ctx;
 
 	if ((ctr_ctx = kmem_zalloc(sizeof (ctr_ctx_t), kmflag)) == NULL)
 		return (NULL);
 
 	ctr_ctx->ctr_flags = CTR_MODE;
 	return (ctr_ctx);
 }
diff --git a/module/icp/algs/modes/ecb.c b/module/icp/algs/modes/ecb.c
index 025f5825cf04..ffbdb9d57d0a 100644
--- a/module/icp/algs/modes/ecb.c
+++ b/module/icp/algs/modes/ecb.c
@@ -1,128 +1,127 @@
 /*
  * CDDL HEADER START
  *
  * The contents of this file are subject to the terms of the
  * Common Development and Distribution License (the "License").
  * You may not use this file except in compliance with the License.
  *
  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
  * or http://www.opensolaris.org/os/licensing.
  * See the License for the specific language governing permissions
  * and limitations under the License.
  *
  * When distributing Covered Code, include this CDDL HEADER in each
  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  * If applicable, add the following below this CDDL HEADER, with the
  * fields enclosed by brackets "[]" replaced with your own identifying
  * information: Portions Copyright [yyyy] [name of copyright owner]
  *
  * CDDL HEADER END
  */
 /*
  * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
  * Use is subject to license terms.
  */
 
 #include <sys/zfs_context.h>
 #include <modes/modes.h>
 #include <sys/crypto/common.h>
 #include <sys/crypto/impl.h>
 
 /*
  * Algorithm independent ECB functions.
  */
 int
 ecb_cipher_contiguous_blocks(ecb_ctx_t *ctx, char *data, size_t length,
     crypto_data_t *out, size_t block_size,
     int (*cipher)(const void *ks, const uint8_t *pt, uint8_t *ct))
 {
 	size_t remainder = length;
 	size_t need = 0;
 	uint8_t *datap = (uint8_t *)data;
 	uint8_t *blockp;
 	uint8_t *lastp;
 	void *iov_or_mp;
 	offset_t offset;
 	uint8_t *out_data_1;
 	uint8_t *out_data_2;
 	size_t out_data_1_len;
 
 	if (length + ctx->ecb_remainder_len < block_size) {
 		/* accumulate bytes here and return */
 		bcopy(datap,
 		    (uint8_t *)ctx->ecb_remainder + ctx->ecb_remainder_len,
 		    length);
 		ctx->ecb_remainder_len += length;
 		ctx->ecb_copy_to = datap;
 		return (CRYPTO_SUCCESS);
 	}
 
 	lastp = (uint8_t *)ctx->ecb_iv;
 	crypto_init_ptrs(out, &iov_or_mp, &offset);
 
 	do {
 		/* Unprocessed data from last call. */
 		if (ctx->ecb_remainder_len > 0) {
 			need = block_size - ctx->ecb_remainder_len;
 
 			if (need > remainder)
 				return (CRYPTO_DATA_LEN_RANGE);
 
 			bcopy(datap, &((uint8_t *)ctx->ecb_remainder)
 			    [ctx->ecb_remainder_len], need);
 
 			blockp = (uint8_t *)ctx->ecb_remainder;
 		} else {
 			blockp = datap;
 		}
 
 		cipher(ctx->ecb_keysched, blockp, lastp);
 		crypto_get_ptrs(out, &iov_or_mp, &offset, &out_data_1,
 		    &out_data_1_len, &out_data_2, block_size);
 
 		/* copy block to where it belongs */
 		bcopy(lastp, out_data_1, out_data_1_len);
 		if (out_data_2 != NULL) {
 			bcopy(lastp + out_data_1_len, out_data_2,
 			    block_size - out_data_1_len);
 		}
 		/* update offset */
 		out->cd_offset += block_size;
 
 		/* Update pointer to next block of data to be processed. */
 		if (ctx->ecb_remainder_len != 0) {
 			datap += need;
 			ctx->ecb_remainder_len = 0;
 		} else {
 			datap += block_size;
 		}
 
 		remainder = (size_t)&data[length] - (size_t)datap;
 
 		/* Incomplete last block. */
 		if (remainder > 0 && remainder < block_size) {
 			bcopy(datap, ctx->ecb_remainder, remainder);
 			ctx->ecb_remainder_len = remainder;
 			ctx->ecb_copy_to = datap;
 			goto out;
 		}
 		ctx->ecb_copy_to = NULL;
 
 	} while (remainder > 0);
 
 out:
 	return (CRYPTO_SUCCESS);
 }
 
-/* ARGSUSED */
 void *
 ecb_alloc_ctx(int kmflag)
 {
 	ecb_ctx_t *ecb_ctx;
 
 	if ((ecb_ctx = kmem_zalloc(sizeof (ecb_ctx_t), kmflag)) == NULL)
 		return (NULL);
 
 	ecb_ctx->ecb_flags = ECB_MODE;
 	return (ecb_ctx);
 }
diff --git a/module/icp/algs/modes/gcm.c b/module/icp/algs/modes/gcm.c
index 7332834cbe37..3a1660d93ab7 100644
--- a/module/icp/algs/modes/gcm.c
+++ b/module/icp/algs/modes/gcm.c
@@ -1,1587 +1,1588 @@
 /*
  * CDDL HEADER START
  *
  * The contents of this file are subject to the terms of the
  * Common Development and Distribution License (the "License").
  * You may not use this file except in compliance with the License.
  *
  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
  * or http://www.opensolaris.org/os/licensing.
  * See the License for the specific language governing permissions
  * and limitations under the License.
  *
  * When distributing Covered Code, include this CDDL HEADER in each
  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  * If applicable, add the following below this CDDL HEADER, with the
  * fields enclosed by brackets "[]" replaced with your own identifying
  * information: Portions Copyright [yyyy] [name of copyright owner]
  *
  * CDDL HEADER END
  */
 /*
  * Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved.
  */
 
 #include <sys/zfs_context.h>
 #include <modes/modes.h>
 #include <sys/crypto/common.h>
 #include <sys/crypto/icp.h>
 #include <sys/crypto/impl.h>
 #include <sys/byteorder.h>
 #include <sys/simd.h>
 #include <modes/gcm_impl.h>
 #ifdef CAN_USE_GCM_ASM
 #include <aes/aes_impl.h>
 #endif
 
 #define	GHASH(c, d, t, o) \
 	xor_block((uint8_t *)(d), (uint8_t *)(c)->gcm_ghash); \
 	(o)->mul((uint64_t *)(void *)(c)->gcm_ghash, (c)->gcm_H, \
 	(uint64_t *)(void *)(t));
 
 /* Select GCM implementation */
 #define	IMPL_FASTEST	(UINT32_MAX)
 #define	IMPL_CYCLE	(UINT32_MAX-1)
 #ifdef CAN_USE_GCM_ASM
 #define	IMPL_AVX	(UINT32_MAX-2)
 #endif
 #define	GCM_IMPL_READ(i) (*(volatile uint32_t *) &(i))
 static uint32_t icp_gcm_impl = IMPL_FASTEST;
 static uint32_t user_sel_impl = IMPL_FASTEST;
 
 #ifdef CAN_USE_GCM_ASM
 /* Does the architecture we run on support the MOVBE instruction? */
 boolean_t gcm_avx_can_use_movbe = B_FALSE;
 /*
  * Whether to use the optimized openssl gcm and ghash implementations.
  * Set to true if module parameter icp_gcm_impl == "avx".
  */
 static boolean_t gcm_use_avx = B_FALSE;
 #define	GCM_IMPL_USE_AVX	(*(volatile boolean_t *)&gcm_use_avx)
 
 extern boolean_t atomic_toggle_boolean_nv(volatile boolean_t *);
 
 static inline boolean_t gcm_avx_will_work(void);
 static inline void gcm_set_avx(boolean_t);
 static inline boolean_t gcm_toggle_avx(void);
 static inline size_t gcm_simd_get_htab_size(boolean_t);
 
 static int gcm_mode_encrypt_contiguous_blocks_avx(gcm_ctx_t *, char *, size_t,
     crypto_data_t *, size_t);
 
 static int gcm_encrypt_final_avx(gcm_ctx_t *, crypto_data_t *, size_t);
 static int gcm_decrypt_final_avx(gcm_ctx_t *, crypto_data_t *, size_t);
 static int gcm_init_avx(gcm_ctx_t *, unsigned char *, size_t, unsigned char *,
     size_t, size_t);
 #endif /* ifdef CAN_USE_GCM_ASM */
 
 /*
  * Encrypt multiple blocks of data in GCM mode.  Decrypt for GCM mode
  * is done in another function.
  */
 int
 gcm_mode_encrypt_contiguous_blocks(gcm_ctx_t *ctx, char *data, size_t length,
     crypto_data_t *out, size_t block_size,
     int (*encrypt_block)(const void *, const uint8_t *, uint8_t *),
     void (*copy_block)(uint8_t *, uint8_t *),
     void (*xor_block)(uint8_t *, uint8_t *))
 {
 #ifdef CAN_USE_GCM_ASM
 	if (ctx->gcm_use_avx == B_TRUE)
 		return (gcm_mode_encrypt_contiguous_blocks_avx(
 		    ctx, data, length, out, block_size));
 #endif
 
 	const gcm_impl_ops_t *gops;
 	size_t remainder = length;
 	size_t need = 0;
 	uint8_t *datap = (uint8_t *)data;
 	uint8_t *blockp;
 	uint8_t *lastp;
 	void *iov_or_mp;
 	offset_t offset;
 	uint8_t *out_data_1;
 	uint8_t *out_data_2;
 	size_t out_data_1_len;
 	uint64_t counter;
 	uint64_t counter_mask = ntohll(0x00000000ffffffffULL);
 
 	if (length + ctx->gcm_remainder_len < block_size) {
 		/* accumulate bytes here and return */
 		bcopy(datap,
 		    (uint8_t *)ctx->gcm_remainder + ctx->gcm_remainder_len,
 		    length);
 		ctx->gcm_remainder_len += length;
 		if (ctx->gcm_copy_to == NULL) {
 			ctx->gcm_copy_to = datap;
 		}
 		return (CRYPTO_SUCCESS);
 	}
 
 	lastp = (uint8_t *)ctx->gcm_cb;
 	crypto_init_ptrs(out, &iov_or_mp, &offset);
 
 	gops = gcm_impl_get_ops();
 	do {
 		/* Unprocessed data from last call. */
 		if (ctx->gcm_remainder_len > 0) {
 			need = block_size - ctx->gcm_remainder_len;
 
 			if (need > remainder)
 				return (CRYPTO_DATA_LEN_RANGE);
 
 			bcopy(datap, &((uint8_t *)ctx->gcm_remainder)
 			    [ctx->gcm_remainder_len], need);
 
 			blockp = (uint8_t *)ctx->gcm_remainder;
 		} else {
 			blockp = datap;
 		}
 
 		/*
 		 * Increment counter. Counter bits are confined
 		 * to the bottom 32 bits of the counter block.
 		 */
 		counter = ntohll(ctx->gcm_cb[1] & counter_mask);
 		counter = htonll(counter + 1);
 		counter &= counter_mask;
 		ctx->gcm_cb[1] = (ctx->gcm_cb[1] & ~counter_mask) | counter;
 
 		encrypt_block(ctx->gcm_keysched, (uint8_t *)ctx->gcm_cb,
 		    (uint8_t *)ctx->gcm_tmp);
 		xor_block(blockp, (uint8_t *)ctx->gcm_tmp);
 
 		lastp = (uint8_t *)ctx->gcm_tmp;
 
 		ctx->gcm_processed_data_len += block_size;
 
 		crypto_get_ptrs(out, &iov_or_mp, &offset, &out_data_1,
 		    &out_data_1_len, &out_data_2, block_size);
 
 		/* copy block to where it belongs */
 		if (out_data_1_len == block_size) {
 			copy_block(lastp, out_data_1);
 		} else {
 			bcopy(lastp, out_data_1, out_data_1_len);
 			if (out_data_2 != NULL) {
 				bcopy(lastp + out_data_1_len,
 				    out_data_2,
 				    block_size - out_data_1_len);
 			}
 		}
 		/* update offset */
 		out->cd_offset += block_size;
 
 		/* add ciphertext to the hash */
 		GHASH(ctx, ctx->gcm_tmp, ctx->gcm_ghash, gops);
 
 		/* Update pointer to next block of data to be processed. */
 		if (ctx->gcm_remainder_len != 0) {
 			datap += need;
 			ctx->gcm_remainder_len = 0;
 		} else {
 			datap += block_size;
 		}
 
 		remainder = (size_t)&data[length] - (size_t)datap;
 
 		/* Incomplete last block. */
 		if (remainder > 0 && remainder < block_size) {
 			bcopy(datap, ctx->gcm_remainder, remainder);
 			ctx->gcm_remainder_len = remainder;
 			ctx->gcm_copy_to = datap;
 			goto out;
 		}
 		ctx->gcm_copy_to = NULL;
 
 	} while (remainder > 0);
 out:
 	return (CRYPTO_SUCCESS);
 }
 
-/* ARGSUSED */
 int
 gcm_encrypt_final(gcm_ctx_t *ctx, crypto_data_t *out, size_t block_size,
     int (*encrypt_block)(const void *, const uint8_t *, uint8_t *),
     void (*copy_block)(uint8_t *, uint8_t *),
     void (*xor_block)(uint8_t *, uint8_t *))
 {
+	(void) copy_block;
 #ifdef CAN_USE_GCM_ASM
 	if (ctx->gcm_use_avx == B_TRUE)
 		return (gcm_encrypt_final_avx(ctx, out, block_size));
 #endif
 
 	const gcm_impl_ops_t *gops;
 	uint64_t counter_mask = ntohll(0x00000000ffffffffULL);
 	uint8_t *ghash, *macp = NULL;
 	int i, rv;
 
 	if (out->cd_length <
 	    (ctx->gcm_remainder_len + ctx->gcm_tag_len)) {
 		return (CRYPTO_DATA_LEN_RANGE);
 	}
 
 	gops = gcm_impl_get_ops();
 	ghash = (uint8_t *)ctx->gcm_ghash;
 
 	if (ctx->gcm_remainder_len > 0) {
 		uint64_t counter;
 		uint8_t *tmpp = (uint8_t *)ctx->gcm_tmp;
 
 		/*
 		 * Here is where we deal with data that is not a
 		 * multiple of the block size.
 		 */
 
 		/*
 		 * Increment counter.
 		 */
 		counter = ntohll(ctx->gcm_cb[1] & counter_mask);
 		counter = htonll(counter + 1);
 		counter &= counter_mask;
 		ctx->gcm_cb[1] = (ctx->gcm_cb[1] & ~counter_mask) | counter;
 
 		encrypt_block(ctx->gcm_keysched, (uint8_t *)ctx->gcm_cb,
 		    (uint8_t *)ctx->gcm_tmp);
 
 		macp = (uint8_t *)ctx->gcm_remainder;
 		bzero(macp + ctx->gcm_remainder_len,
 		    block_size - ctx->gcm_remainder_len);
 
 		/* XOR with counter block */
 		for (i = 0; i < ctx->gcm_remainder_len; i++) {
 			macp[i] ^= tmpp[i];
 		}
 
 		/* add ciphertext to the hash */
 		GHASH(ctx, macp, ghash, gops);
 
 		ctx->gcm_processed_data_len += ctx->gcm_remainder_len;
 	}
 
 	ctx->gcm_len_a_len_c[1] =
 	    htonll(CRYPTO_BYTES2BITS(ctx->gcm_processed_data_len));
 	GHASH(ctx, ctx->gcm_len_a_len_c, ghash, gops);
 	encrypt_block(ctx->gcm_keysched, (uint8_t *)ctx->gcm_J0,
 	    (uint8_t *)ctx->gcm_J0);
 	xor_block((uint8_t *)ctx->gcm_J0, ghash);
 
 	if (ctx->gcm_remainder_len > 0) {
 		rv = crypto_put_output_data(macp, out, ctx->gcm_remainder_len);
 		if (rv != CRYPTO_SUCCESS)
 			return (rv);
 	}
 	out->cd_offset += ctx->gcm_remainder_len;
 	ctx->gcm_remainder_len = 0;
 	rv = crypto_put_output_data(ghash, out, ctx->gcm_tag_len);
 	if (rv != CRYPTO_SUCCESS)
 		return (rv);
 	out->cd_offset += ctx->gcm_tag_len;
 
 	return (CRYPTO_SUCCESS);
 }
 
 /*
  * This will only deal with decrypting the last block of the input that
  * might not be a multiple of block length.
  */
 static void
 gcm_decrypt_incomplete_block(gcm_ctx_t *ctx, size_t block_size, size_t index,
     int (*encrypt_block)(const void *, const uint8_t *, uint8_t *),
     void (*xor_block)(uint8_t *, uint8_t *))
 {
 	uint8_t *datap, *outp, *counterp;
 	uint64_t counter;
 	uint64_t counter_mask = ntohll(0x00000000ffffffffULL);
 	int i;
 
 	/*
 	 * Increment counter.
 	 * Counter bits are confined to the bottom 32 bits
 	 */
 	counter = ntohll(ctx->gcm_cb[1] & counter_mask);
 	counter = htonll(counter + 1);
 	counter &= counter_mask;
 	ctx->gcm_cb[1] = (ctx->gcm_cb[1] & ~counter_mask) | counter;
 
 	datap = (uint8_t *)ctx->gcm_remainder;
 	outp = &((ctx->gcm_pt_buf)[index]);
 	counterp = (uint8_t *)ctx->gcm_tmp;
 
 	/* authentication tag */
 	bzero((uint8_t *)ctx->gcm_tmp, block_size);
 	bcopy(datap, (uint8_t *)ctx->gcm_tmp, ctx->gcm_remainder_len);
 
 	/* add ciphertext to the hash */
 	GHASH(ctx, ctx->gcm_tmp, ctx->gcm_ghash, gcm_impl_get_ops());
 
 	/* decrypt remaining ciphertext */
 	encrypt_block(ctx->gcm_keysched, (uint8_t *)ctx->gcm_cb, counterp);
 
 	/* XOR with counter block */
 	for (i = 0; i < ctx->gcm_remainder_len; i++) {
 		outp[i] = datap[i] ^ counterp[i];
 	}
 }
 
-/* ARGSUSED */
 int
 gcm_mode_decrypt_contiguous_blocks(gcm_ctx_t *ctx, char *data, size_t length,
     crypto_data_t *out, size_t block_size,
     int (*encrypt_block)(const void *, const uint8_t *, uint8_t *),
     void (*copy_block)(uint8_t *, uint8_t *),
     void (*xor_block)(uint8_t *, uint8_t *))
 {
+	(void) out, (void) block_size, (void) encrypt_block, (void) copy_block,
+	    (void) xor_block;
 	size_t new_len;
 	uint8_t *new;
 
 	/*
 	 * Copy contiguous ciphertext input blocks to plaintext buffer.
 	 * Ciphertext will be decrypted in the final.
 	 */
 	if (length > 0) {
 		new_len = ctx->gcm_pt_buf_len + length;
 		new = vmem_alloc(new_len, ctx->gcm_kmflag);
 		if (new == NULL) {
 			vmem_free(ctx->gcm_pt_buf, ctx->gcm_pt_buf_len);
 			ctx->gcm_pt_buf = NULL;
 			return (CRYPTO_HOST_MEMORY);
 		}
 		bcopy(ctx->gcm_pt_buf, new, ctx->gcm_pt_buf_len);
 		vmem_free(ctx->gcm_pt_buf, ctx->gcm_pt_buf_len);
 		ctx->gcm_pt_buf = new;
 		ctx->gcm_pt_buf_len = new_len;
 		bcopy(data, &ctx->gcm_pt_buf[ctx->gcm_processed_data_len],
 		    length);
 		ctx->gcm_processed_data_len += length;
 	}
 
 	ctx->gcm_remainder_len = 0;
 	return (CRYPTO_SUCCESS);
 }
 
 int
 gcm_decrypt_final(gcm_ctx_t *ctx, crypto_data_t *out, size_t block_size,
     int (*encrypt_block)(const void *, const uint8_t *, uint8_t *),
     void (*xor_block)(uint8_t *, uint8_t *))
 {
 #ifdef CAN_USE_GCM_ASM
 	if (ctx->gcm_use_avx == B_TRUE)
 		return (gcm_decrypt_final_avx(ctx, out, block_size));
 #endif
 
 	const gcm_impl_ops_t *gops;
 	size_t pt_len;
 	size_t remainder;
 	uint8_t *ghash;
 	uint8_t *blockp;
 	uint8_t *cbp;
 	uint64_t counter;
 	uint64_t counter_mask = ntohll(0x00000000ffffffffULL);
 	int processed = 0, rv;
 
 	ASSERT(ctx->gcm_processed_data_len == ctx->gcm_pt_buf_len);
 
 	gops = gcm_impl_get_ops();
 	pt_len = ctx->gcm_processed_data_len - ctx->gcm_tag_len;
 	ghash = (uint8_t *)ctx->gcm_ghash;
 	blockp = ctx->gcm_pt_buf;
 	remainder = pt_len;
 	while (remainder > 0) {
 		/* Incomplete last block */
 		if (remainder < block_size) {
 			bcopy(blockp, ctx->gcm_remainder, remainder);
 			ctx->gcm_remainder_len = remainder;
 			/*
 			 * not expecting anymore ciphertext, just
 			 * compute plaintext for the remaining input
 			 */
 			gcm_decrypt_incomplete_block(ctx, block_size,
 			    processed, encrypt_block, xor_block);
 			ctx->gcm_remainder_len = 0;
 			goto out;
 		}
 		/* add ciphertext to the hash */
 		GHASH(ctx, blockp, ghash, gops);
 
 		/*
 		 * Increment counter.
 		 * Counter bits are confined to the bottom 32 bits
 		 */
 		counter = ntohll(ctx->gcm_cb[1] & counter_mask);
 		counter = htonll(counter + 1);
 		counter &= counter_mask;
 		ctx->gcm_cb[1] = (ctx->gcm_cb[1] & ~counter_mask) | counter;
 
 		cbp = (uint8_t *)ctx->gcm_tmp;
 		encrypt_block(ctx->gcm_keysched, (uint8_t *)ctx->gcm_cb, cbp);
 
 		/* XOR with ciphertext */
 		xor_block(cbp, blockp);
 
 		processed += block_size;
 		blockp += block_size;
 		remainder -= block_size;
 	}
 out:
 	ctx->gcm_len_a_len_c[1] = htonll(CRYPTO_BYTES2BITS(pt_len));
 	GHASH(ctx, ctx->gcm_len_a_len_c, ghash, gops);
 	encrypt_block(ctx->gcm_keysched, (uint8_t *)ctx->gcm_J0,
 	    (uint8_t *)ctx->gcm_J0);
 	xor_block((uint8_t *)ctx->gcm_J0, ghash);
 
 	/* compare the input authentication tag with what we calculated */
 	if (bcmp(&ctx->gcm_pt_buf[pt_len], ghash, ctx->gcm_tag_len)) {
 		/* They don't match */
 		return (CRYPTO_INVALID_MAC);
 	} else {
 		rv = crypto_put_output_data(ctx->gcm_pt_buf, out, pt_len);
 		if (rv != CRYPTO_SUCCESS)
 			return (rv);
 		out->cd_offset += pt_len;
 	}
 	return (CRYPTO_SUCCESS);
 }
 
 static int
 gcm_validate_args(CK_AES_GCM_PARAMS *gcm_param)
 {
 	size_t tag_len;
 
 	/*
 	 * Check the length of the authentication tag (in bits).
 	 */
 	tag_len = gcm_param->ulTagBits;
 	switch (tag_len) {
 	case 32:
 	case 64:
 	case 96:
 	case 104:
 	case 112:
 	case 120:
 	case 128:
 		break;
 	default:
 		return (CRYPTO_MECHANISM_PARAM_INVALID);
 	}
 
 	if (gcm_param->ulIvLen == 0)
 		return (CRYPTO_MECHANISM_PARAM_INVALID);
 
 	return (CRYPTO_SUCCESS);
 }
 
 static void
 gcm_format_initial_blocks(uchar_t *iv, ulong_t iv_len,
     gcm_ctx_t *ctx, size_t block_size,
     void (*copy_block)(uint8_t *, uint8_t *),
     void (*xor_block)(uint8_t *, uint8_t *))
 {
 	const gcm_impl_ops_t *gops;
 	uint8_t *cb;
 	ulong_t remainder = iv_len;
 	ulong_t processed = 0;
 	uint8_t *datap, *ghash;
 	uint64_t len_a_len_c[2];
 
 	gops = gcm_impl_get_ops();
 	ghash = (uint8_t *)ctx->gcm_ghash;
 	cb = (uint8_t *)ctx->gcm_cb;
 	if (iv_len == 12) {
 		bcopy(iv, cb, 12);
 		cb[12] = 0;
 		cb[13] = 0;
 		cb[14] = 0;
 		cb[15] = 1;
 		/* J0 will be used again in the final */
 		copy_block(cb, (uint8_t *)ctx->gcm_J0);
 	} else {
 		/* GHASH the IV */
 		do {
 			if (remainder < block_size) {
 				bzero(cb, block_size);
 				bcopy(&(iv[processed]), cb, remainder);
 				datap = (uint8_t *)cb;
 				remainder = 0;
 			} else {
 				datap = (uint8_t *)(&(iv[processed]));
 				processed += block_size;
 				remainder -= block_size;
 			}
 			GHASH(ctx, datap, ghash, gops);
 		} while (remainder > 0);
 
 		len_a_len_c[0] = 0;
 		len_a_len_c[1] = htonll(CRYPTO_BYTES2BITS(iv_len));
 		GHASH(ctx, len_a_len_c, ctx->gcm_J0, gops);
 
 		/* J0 will be used again in the final */
 		copy_block((uint8_t *)ctx->gcm_J0, (uint8_t *)cb);
 	}
 }
 
 static int
 gcm_init(gcm_ctx_t *ctx, unsigned char *iv, size_t iv_len,
     unsigned char *auth_data, size_t auth_data_len, size_t block_size,
     int (*encrypt_block)(const void *, const uint8_t *, uint8_t *),
     void (*copy_block)(uint8_t *, uint8_t *),
     void (*xor_block)(uint8_t *, uint8_t *))
 {
 	const gcm_impl_ops_t *gops;
 	uint8_t *ghash, *datap, *authp;
 	size_t remainder, processed;
 
 	/* encrypt zero block to get subkey H */
 	bzero(ctx->gcm_H, sizeof (ctx->gcm_H));
 	encrypt_block(ctx->gcm_keysched, (uint8_t *)ctx->gcm_H,
 	    (uint8_t *)ctx->gcm_H);
 
 	gcm_format_initial_blocks(iv, iv_len, ctx, block_size,
 	    copy_block, xor_block);
 
 	gops = gcm_impl_get_ops();
 	authp = (uint8_t *)ctx->gcm_tmp;
 	ghash = (uint8_t *)ctx->gcm_ghash;
 	bzero(authp, block_size);
 	bzero(ghash, block_size);
 
 	processed = 0;
 	remainder = auth_data_len;
 	do {
 		if (remainder < block_size) {
 			/*
 			 * There's not a block full of data, pad rest of
 			 * buffer with zero
 			 */
 			bzero(authp, block_size);
 			bcopy(&(auth_data[processed]), authp, remainder);
 			datap = (uint8_t *)authp;
 			remainder = 0;
 		} else {
 			datap = (uint8_t *)(&(auth_data[processed]));
 			processed += block_size;
 			remainder -= block_size;
 		}
 
 		/* add auth data to the hash */
 		GHASH(ctx, datap, ghash, gops);
 
 	} while (remainder > 0);
 
 	return (CRYPTO_SUCCESS);
 }
 
 /*
  * The following function is called at encrypt or decrypt init time
  * for AES GCM mode.
  *
  * Init the GCM context struct. Handle the cycle and avx implementations here.
  */
 int
 gcm_init_ctx(gcm_ctx_t *gcm_ctx, char *param, size_t block_size,
     int (*encrypt_block)(const void *, const uint8_t *, uint8_t *),
     void (*copy_block)(uint8_t *, uint8_t *),
     void (*xor_block)(uint8_t *, uint8_t *))
 {
 	int rv;
 	CK_AES_GCM_PARAMS *gcm_param;
 
 	if (param != NULL) {
 		gcm_param = (CK_AES_GCM_PARAMS *)(void *)param;
 
 		if ((rv = gcm_validate_args(gcm_param)) != 0) {
 			return (rv);
 		}
 
 		gcm_ctx->gcm_tag_len = gcm_param->ulTagBits;
 		gcm_ctx->gcm_tag_len >>= 3;
 		gcm_ctx->gcm_processed_data_len = 0;
 
 		/* these values are in bits */
 		gcm_ctx->gcm_len_a_len_c[0]
 		    = htonll(CRYPTO_BYTES2BITS(gcm_param->ulAADLen));
 
 		rv = CRYPTO_SUCCESS;
 		gcm_ctx->gcm_flags |= GCM_MODE;
 	} else {
 		return (CRYPTO_MECHANISM_PARAM_INVALID);
 	}
 
 #ifdef CAN_USE_GCM_ASM
 	if (GCM_IMPL_READ(icp_gcm_impl) != IMPL_CYCLE) {
 		gcm_ctx->gcm_use_avx = GCM_IMPL_USE_AVX;
 	} else {
 		/*
 		 * Handle the "cycle" implementation by creating avx and
 		 * non-avx contexts alternately.
 		 */
 		gcm_ctx->gcm_use_avx = gcm_toggle_avx();
 		/*
 		 * We don't handle byte swapped key schedules in the avx
 		 * code path.
 		 */
 		aes_key_t *ks = (aes_key_t *)gcm_ctx->gcm_keysched;
 		if (ks->ops->needs_byteswap == B_TRUE) {
 			gcm_ctx->gcm_use_avx = B_FALSE;
 		}
 		/* Use the MOVBE and the BSWAP variants alternately. */
 		if (gcm_ctx->gcm_use_avx == B_TRUE &&
 		    zfs_movbe_available() == B_TRUE) {
 			(void) atomic_toggle_boolean_nv(
 			    (volatile boolean_t *)&gcm_avx_can_use_movbe);
 		}
 	}
 	/* Allocate Htab memory as needed. */
 	if (gcm_ctx->gcm_use_avx == B_TRUE) {
 		size_t htab_len = gcm_simd_get_htab_size(gcm_ctx->gcm_use_avx);
 
 		if (htab_len == 0) {
 			return (CRYPTO_MECHANISM_PARAM_INVALID);
 		}
 		gcm_ctx->gcm_htab_len = htab_len;
 		gcm_ctx->gcm_Htable =
 		    (uint64_t *)kmem_alloc(htab_len, gcm_ctx->gcm_kmflag);
 
 		if (gcm_ctx->gcm_Htable == NULL) {
 			return (CRYPTO_HOST_MEMORY);
 		}
 	}
 	/* Avx and non avx context initialization differs from here on. */
 	if (gcm_ctx->gcm_use_avx == B_FALSE) {
 #endif /* ifdef CAN_USE_GCM_ASM */
 		if (gcm_init(gcm_ctx, gcm_param->pIv, gcm_param->ulIvLen,
 		    gcm_param->pAAD, gcm_param->ulAADLen, block_size,
 		    encrypt_block, copy_block, xor_block) != 0) {
 			rv = CRYPTO_MECHANISM_PARAM_INVALID;
 		}
 #ifdef CAN_USE_GCM_ASM
 	} else {
 		if (gcm_init_avx(gcm_ctx, gcm_param->pIv, gcm_param->ulIvLen,
 		    gcm_param->pAAD, gcm_param->ulAADLen, block_size) != 0) {
 			rv = CRYPTO_MECHANISM_PARAM_INVALID;
 		}
 	}
 #endif /* ifdef CAN_USE_GCM_ASM */
 
 	return (rv);
 }
 
 int
 gmac_init_ctx(gcm_ctx_t *gcm_ctx, char *param, size_t block_size,
     int (*encrypt_block)(const void *, const uint8_t *, uint8_t *),
     void (*copy_block)(uint8_t *, uint8_t *),
     void (*xor_block)(uint8_t *, uint8_t *))
 {
 	int rv;
 	CK_AES_GMAC_PARAMS *gmac_param;
 
 	if (param != NULL) {
 		gmac_param = (CK_AES_GMAC_PARAMS *)(void *)param;
 
 		gcm_ctx->gcm_tag_len = CRYPTO_BITS2BYTES(AES_GMAC_TAG_BITS);
 		gcm_ctx->gcm_processed_data_len = 0;
 
 		/* these values are in bits */
 		gcm_ctx->gcm_len_a_len_c[0]
 		    = htonll(CRYPTO_BYTES2BITS(gmac_param->ulAADLen));
 
 		rv = CRYPTO_SUCCESS;
 		gcm_ctx->gcm_flags |= GMAC_MODE;
 	} else {
 		return (CRYPTO_MECHANISM_PARAM_INVALID);
 	}
 
 #ifdef CAN_USE_GCM_ASM
 	/*
 	 * Handle the "cycle" implementation by creating avx and non avx
 	 * contexts alternately.
 	 */
 	if (GCM_IMPL_READ(icp_gcm_impl) != IMPL_CYCLE) {
 		gcm_ctx->gcm_use_avx = GCM_IMPL_USE_AVX;
 	} else {
 		gcm_ctx->gcm_use_avx = gcm_toggle_avx();
 	}
 	/* We don't handle byte swapped key schedules in the avx code path. */
 	aes_key_t *ks = (aes_key_t *)gcm_ctx->gcm_keysched;
 	if (ks->ops->needs_byteswap == B_TRUE) {
 		gcm_ctx->gcm_use_avx = B_FALSE;
 	}
 	/* Allocate Htab memory as needed. */
 	if (gcm_ctx->gcm_use_avx == B_TRUE) {
 		size_t htab_len = gcm_simd_get_htab_size(gcm_ctx->gcm_use_avx);
 
 		if (htab_len == 0) {
 			return (CRYPTO_MECHANISM_PARAM_INVALID);
 		}
 		gcm_ctx->gcm_htab_len = htab_len;
 		gcm_ctx->gcm_Htable =
 		    (uint64_t *)kmem_alloc(htab_len, gcm_ctx->gcm_kmflag);
 
 		if (gcm_ctx->gcm_Htable == NULL) {
 			return (CRYPTO_HOST_MEMORY);
 		}
 	}
 
 	/* Avx and non avx context initialization differs from here on. */
 	if (gcm_ctx->gcm_use_avx == B_FALSE) {
 #endif	/* ifdef CAN_USE_GCM_ASM */
 		if (gcm_init(gcm_ctx, gmac_param->pIv, AES_GMAC_IV_LEN,
 		    gmac_param->pAAD, gmac_param->ulAADLen, block_size,
 		    encrypt_block, copy_block, xor_block) != 0) {
 			rv = CRYPTO_MECHANISM_PARAM_INVALID;
 		}
 #ifdef CAN_USE_GCM_ASM
 	} else {
 		if (gcm_init_avx(gcm_ctx, gmac_param->pIv, AES_GMAC_IV_LEN,
 		    gmac_param->pAAD, gmac_param->ulAADLen, block_size) != 0) {
 			rv = CRYPTO_MECHANISM_PARAM_INVALID;
 		}
 	}
 #endif /* ifdef CAN_USE_GCM_ASM */
 
 	return (rv);
 }
 
 void *
 gcm_alloc_ctx(int kmflag)
 {
 	gcm_ctx_t *gcm_ctx;
 
 	if ((gcm_ctx = kmem_zalloc(sizeof (gcm_ctx_t), kmflag)) == NULL)
 		return (NULL);
 
 	gcm_ctx->gcm_flags = GCM_MODE;
 	return (gcm_ctx);
 }
 
 void *
 gmac_alloc_ctx(int kmflag)
 {
 	gcm_ctx_t *gcm_ctx;
 
 	if ((gcm_ctx = kmem_zalloc(sizeof (gcm_ctx_t), kmflag)) == NULL)
 		return (NULL);
 
 	gcm_ctx->gcm_flags = GMAC_MODE;
 	return (gcm_ctx);
 }
 
 void
 gcm_set_kmflag(gcm_ctx_t *ctx, int kmflag)
 {
 	ctx->gcm_kmflag = kmflag;
 }
 
 /* GCM implementation that contains the fastest methods */
 static gcm_impl_ops_t gcm_fastest_impl = {
 	.name = "fastest"
 };
 
 /* All compiled in implementations */
 const gcm_impl_ops_t *gcm_all_impl[] = {
 	&gcm_generic_impl,
 #if defined(__x86_64) && defined(HAVE_PCLMULQDQ)
 	&gcm_pclmulqdq_impl,
 #endif
 };
 
 /* Indicate that benchmark has been completed */
 static boolean_t gcm_impl_initialized = B_FALSE;
 
 /* Hold all supported implementations */
 static size_t gcm_supp_impl_cnt = 0;
 static gcm_impl_ops_t *gcm_supp_impl[ARRAY_SIZE(gcm_all_impl)];
 
 /*
  * Returns the GCM operations for encrypt/decrypt/key setup.  When a
  * SIMD implementation is not allowed in the current context, then
  * fallback to the fastest generic implementation.
  */
 const gcm_impl_ops_t *
 gcm_impl_get_ops()
 {
 	if (!kfpu_allowed())
 		return (&gcm_generic_impl);
 
 	const gcm_impl_ops_t *ops = NULL;
 	const uint32_t impl = GCM_IMPL_READ(icp_gcm_impl);
 
 	switch (impl) {
 	case IMPL_FASTEST:
 		ASSERT(gcm_impl_initialized);
 		ops = &gcm_fastest_impl;
 		break;
 	case IMPL_CYCLE:
 		/* Cycle through supported implementations */
 		ASSERT(gcm_impl_initialized);
 		ASSERT3U(gcm_supp_impl_cnt, >, 0);
 		static size_t cycle_impl_idx = 0;
 		size_t idx = (++cycle_impl_idx) % gcm_supp_impl_cnt;
 		ops = gcm_supp_impl[idx];
 		break;
 #ifdef CAN_USE_GCM_ASM
 	case IMPL_AVX:
 		/*
 		 * Make sure that we return a valid implementation while
 		 * switching to the avx implementation since there still
 		 * may be unfinished non-avx contexts around.
 		 */
 		ops = &gcm_generic_impl;
 		break;
 #endif
 	default:
 		ASSERT3U(impl, <, gcm_supp_impl_cnt);
 		ASSERT3U(gcm_supp_impl_cnt, >, 0);
 		if (impl < ARRAY_SIZE(gcm_all_impl))
 			ops = gcm_supp_impl[impl];
 		break;
 	}
 
 	ASSERT3P(ops, !=, NULL);
 
 	return (ops);
 }
 
 /*
  * Initialize all supported implementations.
  */
 void
 gcm_impl_init(void)
 {
 	gcm_impl_ops_t *curr_impl;
 	int i, c;
 
 	/* Move supported implementations into gcm_supp_impls */
 	for (i = 0, c = 0; i < ARRAY_SIZE(gcm_all_impl); i++) {
 		curr_impl = (gcm_impl_ops_t *)gcm_all_impl[i];
 
 		if (curr_impl->is_supported())
 			gcm_supp_impl[c++] = (gcm_impl_ops_t *)curr_impl;
 	}
 	gcm_supp_impl_cnt = c;
 
 	/*
 	 * Set the fastest implementation given the assumption that the
 	 * hardware accelerated version is the fastest.
 	 */
 #if defined(__x86_64) && defined(HAVE_PCLMULQDQ)
 	if (gcm_pclmulqdq_impl.is_supported()) {
 		memcpy(&gcm_fastest_impl, &gcm_pclmulqdq_impl,
 		    sizeof (gcm_fastest_impl));
 	} else
 #endif
 	{
 		memcpy(&gcm_fastest_impl, &gcm_generic_impl,
 		    sizeof (gcm_fastest_impl));
 	}
 
 	strlcpy(gcm_fastest_impl.name, "fastest", GCM_IMPL_NAME_MAX);
 
 #ifdef CAN_USE_GCM_ASM
 	/*
 	 * Use the avx implementation if it's available and the implementation
 	 * hasn't changed from its default value of fastest on module load.
 	 */
 	if (gcm_avx_will_work()) {
 #ifdef HAVE_MOVBE
 		if (zfs_movbe_available() == B_TRUE) {
 			atomic_swap_32(&gcm_avx_can_use_movbe, B_TRUE);
 		}
 #endif
 		if (GCM_IMPL_READ(user_sel_impl) == IMPL_FASTEST) {
 			gcm_set_avx(B_TRUE);
 		}
 	}
 #endif
 	/* Finish initialization */
 	atomic_swap_32(&icp_gcm_impl, user_sel_impl);
 	gcm_impl_initialized = B_TRUE;
 }
 
 static const struct {
 	char *name;
 	uint32_t sel;
 } gcm_impl_opts[] = {
 		{ "cycle",	IMPL_CYCLE },
 		{ "fastest",	IMPL_FASTEST },
 #ifdef CAN_USE_GCM_ASM
 		{ "avx",	IMPL_AVX },
 #endif
 };
 
 /*
  * Function sets desired gcm implementation.
  *
  * If we are called before init(), user preference will be saved in
  * user_sel_impl, and applied in later init() call. This occurs when module
  * parameter is specified on module load. Otherwise, directly update
  * icp_gcm_impl.
  *
  * @val		Name of gcm implementation to use
  * @param	Unused.
  */
 int
 gcm_impl_set(const char *val)
 {
 	int err = -EINVAL;
 	char req_name[GCM_IMPL_NAME_MAX];
 	uint32_t impl = GCM_IMPL_READ(user_sel_impl);
 	size_t i;
 
 	/* sanitize input */
 	i = strnlen(val, GCM_IMPL_NAME_MAX);
 	if (i == 0 || i >= GCM_IMPL_NAME_MAX)
 		return (err);
 
 	strlcpy(req_name, val, GCM_IMPL_NAME_MAX);
 	while (i > 0 && isspace(req_name[i-1]))
 		i--;
 	req_name[i] = '\0';
 
 	/* Check mandatory options */
 	for (i = 0; i < ARRAY_SIZE(gcm_impl_opts); i++) {
 #ifdef CAN_USE_GCM_ASM
 		/* Ignore avx implementation if it won't work. */
 		if (gcm_impl_opts[i].sel == IMPL_AVX && !gcm_avx_will_work()) {
 			continue;
 		}
 #endif
 		if (strcmp(req_name, gcm_impl_opts[i].name) == 0) {
 			impl = gcm_impl_opts[i].sel;
 			err = 0;
 			break;
 		}
 	}
 
 	/* check all supported impl if init() was already called */
 	if (err != 0 && gcm_impl_initialized) {
 		/* check all supported implementations */
 		for (i = 0; i < gcm_supp_impl_cnt; i++) {
 			if (strcmp(req_name, gcm_supp_impl[i]->name) == 0) {
 				impl = i;
 				err = 0;
 				break;
 			}
 		}
 	}
 #ifdef CAN_USE_GCM_ASM
 	/*
 	 * Use the avx implementation if available and the requested one is
 	 * avx or fastest.
 	 */
 	if (gcm_avx_will_work() == B_TRUE &&
 	    (impl == IMPL_AVX || impl == IMPL_FASTEST)) {
 		gcm_set_avx(B_TRUE);
 	} else {
 		gcm_set_avx(B_FALSE);
 	}
 #endif
 
 	if (err == 0) {
 		if (gcm_impl_initialized)
 			atomic_swap_32(&icp_gcm_impl, impl);
 		else
 			atomic_swap_32(&user_sel_impl, impl);
 	}
 
 	return (err);
 }
 
 #if defined(_KERNEL) && defined(__linux__)
 
 static int
 icp_gcm_impl_set(const char *val, zfs_kernel_param_t *kp)
 {
 	return (gcm_impl_set(val));
 }
 
 static int
 icp_gcm_impl_get(char *buffer, zfs_kernel_param_t *kp)
 {
 	int i, cnt = 0;
 	char *fmt;
 	const uint32_t impl = GCM_IMPL_READ(icp_gcm_impl);
 
 	ASSERT(gcm_impl_initialized);
 
 	/* list mandatory options */
 	for (i = 0; i < ARRAY_SIZE(gcm_impl_opts); i++) {
 #ifdef CAN_USE_GCM_ASM
 		/* Ignore avx implementation if it won't work. */
 		if (gcm_impl_opts[i].sel == IMPL_AVX && !gcm_avx_will_work()) {
 			continue;
 		}
 #endif
 		fmt = (impl == gcm_impl_opts[i].sel) ? "[%s] " : "%s ";
 		cnt += sprintf(buffer + cnt, fmt, gcm_impl_opts[i].name);
 	}
 
 	/* list all supported implementations */
 	for (i = 0; i < gcm_supp_impl_cnt; i++) {
 		fmt = (i == impl) ? "[%s] " : "%s ";
 		cnt += sprintf(buffer + cnt, fmt, gcm_supp_impl[i]->name);
 	}
 
 	return (cnt);
 }
 
 module_param_call(icp_gcm_impl, icp_gcm_impl_set, icp_gcm_impl_get,
     NULL, 0644);
 MODULE_PARM_DESC(icp_gcm_impl, "Select gcm implementation.");
 #endif /* defined(__KERNEL) */
 
 #ifdef CAN_USE_GCM_ASM
 #define	GCM_BLOCK_LEN 16
 /*
  * The openssl asm routines are 6x aggregated and need that many bytes
  * at minimum.
  */
 #define	GCM_AVX_MIN_DECRYPT_BYTES (GCM_BLOCK_LEN * 6)
 #define	GCM_AVX_MIN_ENCRYPT_BYTES (GCM_BLOCK_LEN * 6 * 3)
 /*
  * Ensure the chunk size is reasonable since we are allocating a
  * GCM_AVX_MAX_CHUNK_SIZEd buffer and disabling preemption and interrupts.
  */
 #define	GCM_AVX_MAX_CHUNK_SIZE \
 	(((128*1024)/GCM_AVX_MIN_DECRYPT_BYTES) * GCM_AVX_MIN_DECRYPT_BYTES)
 
 /* Get the chunk size module parameter. */
 #define	GCM_CHUNK_SIZE_READ *(volatile uint32_t *) &gcm_avx_chunk_size
 
 /* Clear the FPU registers since they hold sensitive internal state. */
 #define	clear_fpu_regs() clear_fpu_regs_avx()
 #define	GHASH_AVX(ctx, in, len) \
     gcm_ghash_avx((ctx)->gcm_ghash, (const uint64_t *)(ctx)->gcm_Htable, \
     in, len)
 
 #define	gcm_incr_counter_block(ctx) gcm_incr_counter_block_by(ctx, 1)
 
 /*
  * Module parameter: number of bytes to process at once while owning the FPU.
  * Rounded down to the next GCM_AVX_MIN_DECRYPT_BYTES byte boundary and is
  * ensured to be greater or equal than GCM_AVX_MIN_DECRYPT_BYTES.
  */
 static uint32_t gcm_avx_chunk_size =
 	((32 * 1024) / GCM_AVX_MIN_DECRYPT_BYTES) * GCM_AVX_MIN_DECRYPT_BYTES;
 
 extern void clear_fpu_regs_avx(void);
 extern void gcm_xor_avx(const uint8_t *src, uint8_t *dst);
 extern void aes_encrypt_intel(const uint32_t rk[], int nr,
     const uint32_t pt[4], uint32_t ct[4]);
 
 extern void gcm_init_htab_avx(uint64_t *Htable, const uint64_t H[2]);
 extern void gcm_ghash_avx(uint64_t ghash[2], const uint64_t *Htable,
     const uint8_t *in, size_t len);
 
 extern size_t aesni_gcm_encrypt(const uint8_t *, uint8_t *, size_t,
     const void *, uint64_t *, uint64_t *);
 
 extern size_t aesni_gcm_decrypt(const uint8_t *, uint8_t *, size_t,
     const void *, uint64_t *, uint64_t *);
 
 static inline boolean_t
 gcm_avx_will_work(void)
 {
 	/* Avx should imply aes-ni and pclmulqdq, but make sure anyhow. */
 	return (kfpu_allowed() &&
 	    zfs_avx_available() && zfs_aes_available() &&
 	    zfs_pclmulqdq_available());
 }
 
 static inline void
 gcm_set_avx(boolean_t val)
 {
 	if (gcm_avx_will_work() == B_TRUE) {
 		atomic_swap_32(&gcm_use_avx, val);
 	}
 }
 
 static inline boolean_t
 gcm_toggle_avx(void)
 {
 	if (gcm_avx_will_work() == B_TRUE) {
 		return (atomic_toggle_boolean_nv(&GCM_IMPL_USE_AVX));
 	} else {
 		return (B_FALSE);
 	}
 }
 
 static inline size_t
 gcm_simd_get_htab_size(boolean_t simd_mode)
 {
 	switch (simd_mode) {
 	case B_TRUE:
 		return (2 * 6 * 2 * sizeof (uint64_t));
 
 	default:
 		return (0);
 	}
 }
 
 /*
  * Clear sensitive data in the context.
  *
  * ctx->gcm_remainder may contain a plaintext remainder. ctx->gcm_H and
  * ctx->gcm_Htable contain the hash sub key which protects authentication.
  *
  * Although extremely unlikely, ctx->gcm_J0 and ctx->gcm_tmp could be used for
  * a known plaintext attack, they consists of the IV and the first and last
  * counter respectively. If they should be cleared is debatable.
  */
 static inline void
 gcm_clear_ctx(gcm_ctx_t *ctx)
 {
 	bzero(ctx->gcm_remainder, sizeof (ctx->gcm_remainder));
 	bzero(ctx->gcm_H, sizeof (ctx->gcm_H));
 	bzero(ctx->gcm_J0, sizeof (ctx->gcm_J0));
 	bzero(ctx->gcm_tmp, sizeof (ctx->gcm_tmp));
 }
 
 /* Increment the GCM counter block by n. */
 static inline void
 gcm_incr_counter_block_by(gcm_ctx_t *ctx, int n)
 {
 	uint64_t counter_mask = ntohll(0x00000000ffffffffULL);
 	uint64_t counter = ntohll(ctx->gcm_cb[1] & counter_mask);
 
 	counter = htonll(counter + n);
 	counter &= counter_mask;
 	ctx->gcm_cb[1] = (ctx->gcm_cb[1] & ~counter_mask) | counter;
 }
 
 /*
  * Encrypt multiple blocks of data in GCM mode.
  * This is done in gcm_avx_chunk_size chunks, utilizing AVX assembler routines
  * if possible. While processing a chunk the FPU is "locked".
  */
 static int
 gcm_mode_encrypt_contiguous_blocks_avx(gcm_ctx_t *ctx, char *data,
     size_t length, crypto_data_t *out, size_t block_size)
 {
 	size_t bleft = length;
 	size_t need = 0;
 	size_t done = 0;
 	uint8_t *datap = (uint8_t *)data;
 	size_t chunk_size = (size_t)GCM_CHUNK_SIZE_READ;
 	const aes_key_t *key = ((aes_key_t *)ctx->gcm_keysched);
 	uint64_t *ghash = ctx->gcm_ghash;
 	uint64_t *cb = ctx->gcm_cb;
 	uint8_t *ct_buf = NULL;
 	uint8_t *tmp = (uint8_t *)ctx->gcm_tmp;
 	int rv = CRYPTO_SUCCESS;
 
 	ASSERT(block_size == GCM_BLOCK_LEN);
 	/*
 	 * If the last call left an incomplete block, try to fill
 	 * it first.
 	 */
 	if (ctx->gcm_remainder_len > 0) {
 		need = block_size - ctx->gcm_remainder_len;
 		if (length < need) {
 			/* Accumulate bytes here and return. */
 			bcopy(datap, (uint8_t *)ctx->gcm_remainder +
 			    ctx->gcm_remainder_len, length);
 
 			ctx->gcm_remainder_len += length;
 			if (ctx->gcm_copy_to == NULL) {
 				ctx->gcm_copy_to = datap;
 			}
 			return (CRYPTO_SUCCESS);
 		} else {
 			/* Complete incomplete block. */
 			bcopy(datap, (uint8_t *)ctx->gcm_remainder +
 			    ctx->gcm_remainder_len, need);
 
 			ctx->gcm_copy_to = NULL;
 		}
 	}
 
 	/* Allocate a buffer to encrypt to if there is enough input. */
 	if (bleft >= GCM_AVX_MIN_ENCRYPT_BYTES) {
 		ct_buf = vmem_alloc(chunk_size, ctx->gcm_kmflag);
 		if (ct_buf == NULL) {
 			return (CRYPTO_HOST_MEMORY);
 		}
 	}
 
 	/* If we completed an incomplete block, encrypt and write it out. */
 	if (ctx->gcm_remainder_len > 0) {
 		kfpu_begin();
 		aes_encrypt_intel(key->encr_ks.ks32, key->nr,
 		    (const uint32_t *)cb, (uint32_t *)tmp);
 
 		gcm_xor_avx((const uint8_t *) ctx->gcm_remainder, tmp);
 		GHASH_AVX(ctx, tmp, block_size);
 		clear_fpu_regs();
 		kfpu_end();
 		rv = crypto_put_output_data(tmp, out, block_size);
 		out->cd_offset += block_size;
 		gcm_incr_counter_block(ctx);
 		ctx->gcm_processed_data_len += block_size;
 		bleft -= need;
 		datap += need;
 		ctx->gcm_remainder_len = 0;
 	}
 
 	/* Do the bulk encryption in chunk_size blocks. */
 	for (; bleft >= chunk_size; bleft -= chunk_size) {
 		kfpu_begin();
 		done = aesni_gcm_encrypt(
 		    datap, ct_buf, chunk_size, key, cb, ghash);
 
 		clear_fpu_regs();
 		kfpu_end();
 		if (done != chunk_size) {
 			rv = CRYPTO_FAILED;
 			goto out_nofpu;
 		}
 		rv = crypto_put_output_data(ct_buf, out, chunk_size);
 		if (rv != CRYPTO_SUCCESS) {
 			goto out_nofpu;
 		}
 		out->cd_offset += chunk_size;
 		datap += chunk_size;
 		ctx->gcm_processed_data_len += chunk_size;
 	}
 	/* Check if we are already done. */
 	if (bleft == 0) {
 		goto out_nofpu;
 	}
 	/* Bulk encrypt the remaining data. */
 	kfpu_begin();
 	if (bleft >= GCM_AVX_MIN_ENCRYPT_BYTES) {
 		done = aesni_gcm_encrypt(datap, ct_buf, bleft, key, cb, ghash);
 		if (done == 0) {
 			rv = CRYPTO_FAILED;
 			goto out;
 		}
 		rv = crypto_put_output_data(ct_buf, out, done);
 		if (rv != CRYPTO_SUCCESS) {
 			goto out;
 		}
 		out->cd_offset += done;
 		ctx->gcm_processed_data_len += done;
 		datap += done;
 		bleft -= done;
 
 	}
 	/* Less than GCM_AVX_MIN_ENCRYPT_BYTES remain, operate on blocks. */
 	while (bleft > 0) {
 		if (bleft < block_size) {
 			bcopy(datap, ctx->gcm_remainder, bleft);
 			ctx->gcm_remainder_len = bleft;
 			ctx->gcm_copy_to = datap;
 			goto out;
 		}
 		/* Encrypt, hash and write out. */
 		aes_encrypt_intel(key->encr_ks.ks32, key->nr,
 		    (const uint32_t *)cb, (uint32_t *)tmp);
 
 		gcm_xor_avx(datap, tmp);
 		GHASH_AVX(ctx, tmp, block_size);
 		rv = crypto_put_output_data(tmp, out, block_size);
 		if (rv != CRYPTO_SUCCESS) {
 			goto out;
 		}
 		out->cd_offset += block_size;
 		gcm_incr_counter_block(ctx);
 		ctx->gcm_processed_data_len += block_size;
 		datap += block_size;
 		bleft -= block_size;
 	}
 out:
 	clear_fpu_regs();
 	kfpu_end();
 out_nofpu:
 	if (ct_buf != NULL) {
 		vmem_free(ct_buf, chunk_size);
 	}
 	return (rv);
 }
 
 /*
  * Finalize the encryption: Zero fill, encrypt, hash and write out an eventual
  * incomplete last block. Encrypt the ICB. Calculate the tag and write it out.
  */
 static int
 gcm_encrypt_final_avx(gcm_ctx_t *ctx, crypto_data_t *out, size_t block_size)
 {
 	uint8_t *ghash = (uint8_t *)ctx->gcm_ghash;
 	uint32_t *J0 = (uint32_t *)ctx->gcm_J0;
 	uint8_t *remainder = (uint8_t *)ctx->gcm_remainder;
 	size_t rem_len = ctx->gcm_remainder_len;
 	const void *keysched = ((aes_key_t *)ctx->gcm_keysched)->encr_ks.ks32;
 	int aes_rounds = ((aes_key_t *)keysched)->nr;
 	int rv;
 
 	ASSERT(block_size == GCM_BLOCK_LEN);
 
 	if (out->cd_length < (rem_len + ctx->gcm_tag_len)) {
 		return (CRYPTO_DATA_LEN_RANGE);
 	}
 
 	kfpu_begin();
 	/* Pad last incomplete block with zeros, encrypt and hash. */
 	if (rem_len > 0) {
 		uint8_t *tmp = (uint8_t *)ctx->gcm_tmp;
 		const uint32_t *cb = (uint32_t *)ctx->gcm_cb;
 
 		aes_encrypt_intel(keysched, aes_rounds, cb, (uint32_t *)tmp);
 		bzero(remainder + rem_len, block_size - rem_len);
 		for (int i = 0; i < rem_len; i++) {
 			remainder[i] ^= tmp[i];
 		}
 		GHASH_AVX(ctx, remainder, block_size);
 		ctx->gcm_processed_data_len += rem_len;
 		/* No need to increment counter_block, it's the last block. */
 	}
 	/* Finish tag. */
 	ctx->gcm_len_a_len_c[1] =
 	    htonll(CRYPTO_BYTES2BITS(ctx->gcm_processed_data_len));
 	GHASH_AVX(ctx, (const uint8_t *)ctx->gcm_len_a_len_c, block_size);
 	aes_encrypt_intel(keysched, aes_rounds, J0, J0);
 
 	gcm_xor_avx((uint8_t *)J0, ghash);
 	clear_fpu_regs();
 	kfpu_end();
 
 	/* Output remainder. */
 	if (rem_len > 0) {
 		rv = crypto_put_output_data(remainder, out, rem_len);
 		if (rv != CRYPTO_SUCCESS)
 			return (rv);
 	}
 	out->cd_offset += rem_len;
 	ctx->gcm_remainder_len = 0;
 	rv = crypto_put_output_data(ghash, out, ctx->gcm_tag_len);
 	if (rv != CRYPTO_SUCCESS)
 		return (rv);
 
 	out->cd_offset += ctx->gcm_tag_len;
 	/* Clear sensitive data in the context before returning. */
 	gcm_clear_ctx(ctx);
 	return (CRYPTO_SUCCESS);
 }
 
 /*
  * Finalize decryption: We just have accumulated crypto text, so now we
  * decrypt it here inplace.
  */
 static int
 gcm_decrypt_final_avx(gcm_ctx_t *ctx, crypto_data_t *out, size_t block_size)
 {
 	ASSERT3U(ctx->gcm_processed_data_len, ==, ctx->gcm_pt_buf_len);
 	ASSERT3U(block_size, ==, 16);
 
 	size_t chunk_size = (size_t)GCM_CHUNK_SIZE_READ;
 	size_t pt_len = ctx->gcm_processed_data_len - ctx->gcm_tag_len;
 	uint8_t *datap = ctx->gcm_pt_buf;
 	const aes_key_t *key = ((aes_key_t *)ctx->gcm_keysched);
 	uint32_t *cb = (uint32_t *)ctx->gcm_cb;
 	uint64_t *ghash = ctx->gcm_ghash;
 	uint32_t *tmp = (uint32_t *)ctx->gcm_tmp;
 	int rv = CRYPTO_SUCCESS;
 	size_t bleft, done;
 
 	/*
 	 * Decrypt in chunks of gcm_avx_chunk_size, which is asserted to be
 	 * greater or equal than GCM_AVX_MIN_ENCRYPT_BYTES, and a multiple of
 	 * GCM_AVX_MIN_DECRYPT_BYTES.
 	 */
 	for (bleft = pt_len; bleft >= chunk_size; bleft -= chunk_size) {
 		kfpu_begin();
 		done = aesni_gcm_decrypt(datap, datap, chunk_size,
 		    (const void *)key, ctx->gcm_cb, ghash);
 		clear_fpu_regs();
 		kfpu_end();
 		if (done != chunk_size) {
 			return (CRYPTO_FAILED);
 		}
 		datap += done;
 	}
 	/* Decrypt remainder, which is less than chunk size, in one go. */
 	kfpu_begin();
 	if (bleft >= GCM_AVX_MIN_DECRYPT_BYTES) {
 		done = aesni_gcm_decrypt(datap, datap, bleft,
 		    (const void *)key, ctx->gcm_cb, ghash);
 		if (done == 0) {
 			clear_fpu_regs();
 			kfpu_end();
 			return (CRYPTO_FAILED);
 		}
 		datap += done;
 		bleft -= done;
 	}
 	ASSERT(bleft < GCM_AVX_MIN_DECRYPT_BYTES);
 
 	/*
 	 * Now less than GCM_AVX_MIN_DECRYPT_BYTES bytes remain,
 	 * decrypt them block by block.
 	 */
 	while (bleft > 0) {
 		/* Incomplete last block. */
 		if (bleft < block_size) {
 			uint8_t *lastb = (uint8_t *)ctx->gcm_remainder;
 
 			bzero(lastb, block_size);
 			bcopy(datap, lastb, bleft);
 			/* The GCM processing. */
 			GHASH_AVX(ctx, lastb, block_size);
 			aes_encrypt_intel(key->encr_ks.ks32, key->nr, cb, tmp);
 			for (size_t i = 0; i < bleft; i++) {
 				datap[i] = lastb[i] ^ ((uint8_t *)tmp)[i];
 			}
 			break;
 		}
 		/* The GCM processing. */
 		GHASH_AVX(ctx, datap, block_size);
 		aes_encrypt_intel(key->encr_ks.ks32, key->nr, cb, tmp);
 		gcm_xor_avx((uint8_t *)tmp, datap);
 		gcm_incr_counter_block(ctx);
 
 		datap += block_size;
 		bleft -= block_size;
 	}
 	if (rv != CRYPTO_SUCCESS) {
 		clear_fpu_regs();
 		kfpu_end();
 		return (rv);
 	}
 	/* Decryption done, finish the tag. */
 	ctx->gcm_len_a_len_c[1] = htonll(CRYPTO_BYTES2BITS(pt_len));
 	GHASH_AVX(ctx, (uint8_t *)ctx->gcm_len_a_len_c, block_size);
 	aes_encrypt_intel(key->encr_ks.ks32, key->nr, (uint32_t *)ctx->gcm_J0,
 	    (uint32_t *)ctx->gcm_J0);
 
 	gcm_xor_avx((uint8_t *)ctx->gcm_J0, (uint8_t *)ghash);
 
 	/* We are done with the FPU, restore its state. */
 	clear_fpu_regs();
 	kfpu_end();
 
 	/* Compare the input authentication tag with what we calculated. */
 	if (bcmp(&ctx->gcm_pt_buf[pt_len], ghash, ctx->gcm_tag_len)) {
 		/* They don't match. */
 		return (CRYPTO_INVALID_MAC);
 	}
 	rv = crypto_put_output_data(ctx->gcm_pt_buf, out, pt_len);
 	if (rv != CRYPTO_SUCCESS) {
 		return (rv);
 	}
 	out->cd_offset += pt_len;
 	gcm_clear_ctx(ctx);
 	return (CRYPTO_SUCCESS);
 }
 
 /*
  * Initialize the GCM params H, Htabtle and the counter block. Save the
  * initial counter block.
  */
 static int
 gcm_init_avx(gcm_ctx_t *ctx, unsigned char *iv, size_t iv_len,
     unsigned char *auth_data, size_t auth_data_len, size_t block_size)
 {
 	uint8_t *cb = (uint8_t *)ctx->gcm_cb;
 	uint64_t *H = ctx->gcm_H;
 	const void *keysched = ((aes_key_t *)ctx->gcm_keysched)->encr_ks.ks32;
 	int aes_rounds = ((aes_key_t *)ctx->gcm_keysched)->nr;
 	uint8_t *datap = auth_data;
 	size_t chunk_size = (size_t)GCM_CHUNK_SIZE_READ;
 	size_t bleft;
 
 	ASSERT(block_size == GCM_BLOCK_LEN);
 
 	/* Init H (encrypt zero block) and create the initial counter block. */
 	bzero(ctx->gcm_ghash, sizeof (ctx->gcm_ghash));
 	bzero(H, sizeof (ctx->gcm_H));
 	kfpu_begin();
 	aes_encrypt_intel(keysched, aes_rounds,
 	    (const uint32_t *)H, (uint32_t *)H);
 
 	gcm_init_htab_avx(ctx->gcm_Htable, H);
 
 	if (iv_len == 12) {
 		bcopy(iv, cb, 12);
 		cb[12] = 0;
 		cb[13] = 0;
 		cb[14] = 0;
 		cb[15] = 1;
 		/* We need the ICB later. */
 		bcopy(cb, ctx->gcm_J0, sizeof (ctx->gcm_J0));
 	} else {
 		/*
 		 * Most consumers use 12 byte IVs, so it's OK to use the
 		 * original routines for other IV sizes, just avoid nesting
 		 * kfpu_begin calls.
 		 */
 		clear_fpu_regs();
 		kfpu_end();
 		gcm_format_initial_blocks(iv, iv_len, ctx, block_size,
 		    aes_copy_block, aes_xor_block);
 		kfpu_begin();
 	}
 
 	/* Openssl post increments the counter, adjust for that. */
 	gcm_incr_counter_block(ctx);
 
 	/* Ghash AAD in chunk_size blocks. */
 	for (bleft = auth_data_len; bleft >= chunk_size; bleft -= chunk_size) {
 		GHASH_AVX(ctx, datap, chunk_size);
 		datap += chunk_size;
 		clear_fpu_regs();
 		kfpu_end();
 		kfpu_begin();
 	}
 	/* Ghash the remainder and handle possible incomplete GCM block. */
 	if (bleft > 0) {
 		size_t incomp = bleft % block_size;
 
 		bleft -= incomp;
 		if (bleft > 0) {
 			GHASH_AVX(ctx, datap, bleft);
 			datap += bleft;
 		}
 		if (incomp > 0) {
 			/* Zero pad and hash incomplete last block. */
 			uint8_t *authp = (uint8_t *)ctx->gcm_tmp;
 
 			bzero(authp, block_size);
 			bcopy(datap, authp, incomp);
 			GHASH_AVX(ctx, authp, block_size);
 		}
 	}
 	clear_fpu_regs();
 	kfpu_end();
 	return (CRYPTO_SUCCESS);
 }
 
 #if defined(_KERNEL)
 static int
 icp_gcm_avx_set_chunk_size(const char *buf, zfs_kernel_param_t *kp)
 {
 	unsigned long val;
 	char val_rounded[16];
 	int error = 0;
 
 	error = kstrtoul(buf, 0, &val);
 	if (error)
 		return (error);
 
 	val = (val / GCM_AVX_MIN_DECRYPT_BYTES) * GCM_AVX_MIN_DECRYPT_BYTES;
 
 	if (val < GCM_AVX_MIN_ENCRYPT_BYTES || val > GCM_AVX_MAX_CHUNK_SIZE)
 		return (-EINVAL);
 
 	snprintf(val_rounded, 16, "%u", (uint32_t)val);
 	error = param_set_uint(val_rounded, kp);
 	return (error);
 }
 
 module_param_call(icp_gcm_avx_chunk_size, icp_gcm_avx_set_chunk_size,
     param_get_uint, &gcm_avx_chunk_size, 0644);
 
 MODULE_PARM_DESC(icp_gcm_avx_chunk_size,
 	"How many bytes to process while owning the FPU");
 
 #endif /* defined(__KERNEL) */
 #endif /* ifdef CAN_USE_GCM_ASM */
diff --git a/module/icp/asm-x86_64/aes/aes_aesni.S b/module/icp/asm-x86_64/aes/aes_aesni.S
index 4a80c62097ae..1a8669ccd1d6 100644
--- a/module/icp/asm-x86_64/aes/aes_aesni.S
+++ b/module/icp/asm-x86_64/aes/aes_aesni.S
@@ -1,748 +1,748 @@
 /*
  * ====================================================================
  * Written by Intel Corporation for the OpenSSL project to add support
  * for Intel AES-NI instructions. Rights for redistribution and usage
  * in source and binary forms are granted according to the OpenSSL
  * license.
  *
  *   Author: Huang Ying <ying.huang at intel dot com>
  *           Vinodh Gopal <vinodh.gopal at intel dot com>
  *           Kahraman Akdemir
  *
  * Intel AES-NI is a new set of Single Instruction Multiple Data (SIMD)
  * instructions that are going to be introduced in the next generation
  * of Intel processor, as of 2009. These instructions enable fast and
  * secure data encryption and decryption, using the Advanced Encryption
  * Standard (AES), defined by FIPS Publication number 197. The
  * architecture introduces six instructions that offer full hardware
  * support for AES. Four of them support high performance data
  * encryption and decryption, and the other two instructions support
  * the AES key expansion procedure.
  * ====================================================================
  */
 
 /*
  * ====================================================================
  * Copyright (c) 1998-2008 The OpenSSL Project.  All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  *
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  *
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in
  *    the documentation and/or other materials provided with the
  *    distribution.
  *
  * 3. All advertising materials mentioning features or use of this
  *    software must display the following acknowledgment:
  *    "This product includes software developed by the OpenSSL Project
  *    for use in the OpenSSL Toolkit. (http://www.openssl.org/)"
  *
  * 4. The names "OpenSSL Toolkit" and "OpenSSL Project" must not be used to
  *    endorse or promote products derived from this software without
  *    prior written permission. For written permission, please contact
  *    openssl-core@openssl.org.
  *
  * 5. Products derived from this software may not be called "OpenSSL"
  *    nor may "OpenSSL" appear in their names without prior written
  *    permission of the OpenSSL Project.
  *
  * 6. Redistributions of any form whatsoever must retain the following
  *    acknowledgment:
  *    "This product includes software developed by the OpenSSL Project
  *    for use in the OpenSSL Toolkit (http://www.openssl.org/)"
  *
  * THIS SOFTWARE IS PROVIDED BY THE OpenSSL PROJECT ``AS IS'' AND ANY
  * EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE OpenSSL PROJECT OR
  * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
  * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
  * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
  * OF THE POSSIBILITY OF SUCH DAMAGE.
  * ====================================================================
  */
 
 /*
  * ====================================================================
  * OpenSolaris OS modifications
  *
  * This source originates as files aes-intel.S and eng_aesni_asm.pl, in
  * patches sent sent Dec. 9, 2008 and Dec. 24, 2008, respectively, by
  * Huang Ying of Intel to the openssl-dev mailing list under the subject
  * of "Add support to Intel AES-NI instruction set for x86_64 platform".
  *
  * This OpenSolaris version has these major changes from the original source:
  *
  * 1. Added OpenSolaris ENTRY_NP/SET_SIZE macros from
  * /usr/include/sys/asm_linkage.h, lint(1B) guards, and dummy C function
  * definitions for lint.
  *
  * 2. Formatted code, added comments, and added #includes and #defines.
  *
  * 3. If bit CR0.TS is set, clear and set the TS bit, after and before
  * calling kpreempt_disable() and kpreempt_enable().
  * If the TS bit is not set, Save and restore %xmm registers at the beginning
  * and end of function calls (%xmm* registers are not saved and restored by
  * during kernel thread preemption).
  *
  * 4. Renamed functions, reordered parameters, and changed return value
  * to match OpenSolaris:
  *
  * OpenSSL interface:
  *	int intel_AES_set_encrypt_key(const unsigned char *userKey,
  *		const int bits, AES_KEY *key);
  *	int intel_AES_set_decrypt_key(const unsigned char *userKey,
  *		const int bits, AES_KEY *key);
  *	Return values for above are non-zero on error, 0 on success.
  *
  *	void intel_AES_encrypt(const unsigned char *in, unsigned char *out,
  *		const AES_KEY *key);
  *	void intel_AES_decrypt(const unsigned char *in, unsigned char *out,
  *		const AES_KEY *key);
  *	typedef struct aes_key_st {
  *		unsigned int	rd_key[4 *(AES_MAXNR + 1)];
  *		int		rounds;
  *		unsigned int	pad[3];
  *	} AES_KEY;
  * Note: AES_LONG is undefined (that is, Intel uses 32-bit key schedules
  * (ks32) instead of 64-bit (ks64).
  * Number of rounds (aka round count) is at offset 240 of AES_KEY.
  *
  * OpenSolaris OS interface (#ifdefs removed for readability):
  *	int rijndael_key_setup_dec_intel(uint32_t rk[],
  *		const uint32_t cipherKey[], uint64_t keyBits);
  *	int rijndael_key_setup_enc_intel(uint32_t rk[],
  *		const uint32_t cipherKey[], uint64_t keyBits);
  *	Return values for above are 0 on error, number of rounds on success.
  *
  *	void aes_encrypt_intel(const aes_ks_t *ks, int Nr,
  *		const uint32_t pt[4], uint32_t ct[4]);
  *	void aes_decrypt_intel(const aes_ks_t *ks, int Nr,
  *		const uint32_t pt[4], uint32_t ct[4]);
  *	typedef union {uint64_t ks64[(MAX_AES_NR + 1) * 4];
  *		 uint32_t ks32[(MAX_AES_NR + 1) * 4]; } aes_ks_t;
  *
  *	typedef union {
  *		uint32_t	ks32[((MAX_AES_NR) + 1) * (MAX_AES_NB)];
  *	} aes_ks_t;
  *	typedef struct aes_key {
  *		aes_ks_t	encr_ks, decr_ks;
  *		long double	align128;
  *		int		flags, nr, type;
  *	} aes_key_t;
  *
  * Note: ks is the AES key schedule, Nr is number of rounds, pt is plain text,
  * ct is crypto text, and MAX_AES_NR is 14.
  * For the x86 64-bit architecture, OpenSolaris OS uses ks32 instead of ks64.
  *
  * Note2: aes_ks_t must be aligned on a 0 mod 128 byte boundary.
  *
  * ====================================================================
  */
 
 
 #if defined(lint) || defined(__lint)
 
 #include <sys/types.h>
 
-/* ARGSUSED */
 void
 aes_encrypt_intel(const uint32_t rk[], int Nr, const uint32_t pt[4],
     uint32_t ct[4]) {
+	(void) rk, (void) Nr, (void) pt, (void) ct;
 }
-/* ARGSUSED */
 void
 aes_decrypt_intel(const uint32_t rk[], int Nr, const uint32_t ct[4],
     uint32_t pt[4]) {
+	(void) rk, (void) Nr, (void) ct, (void) pt;
 }
-/* ARGSUSED */
 int
 rijndael_key_setup_enc_intel(uint32_t rk[], const uint32_t cipherKey[],
     uint64_t keyBits) {
+	(void) rk, (void) cipherKey, (void) keyBits;
 	return (0);
 }
-/* ARGSUSED */
 int
 rijndael_key_setup_dec_intel(uint32_t rk[], const uint32_t cipherKey[],
    uint64_t keyBits) {
+	(void) rk, (void) cipherKey, (void) keyBits;
 	return (0);
 }
 
 
 #elif defined(HAVE_AES)	/* guard by instruction set */
 
 #define _ASM
 #include <sys/asm_linkage.h>
 
 /*
  * _key_expansion_128(), * _key_expansion_192a(), _key_expansion_192b(),
  * _key_expansion_256a(), _key_expansion_256b()
  *
  * Helper functions called by rijndael_key_setup_inc_intel().
  * Also used indirectly by rijndael_key_setup_dec_intel().
  *
  * Input:
  * %xmm0	User-provided cipher key
  * %xmm1	Round constant
  * Output:
  * (%rcx)	AES key
  */
 
 ENTRY_NP2(_key_expansion_128, _key_expansion_256a)
 _key_expansion_128_local:
 _key_expansion_256a_local:
 	pshufd	$0b11111111, %xmm1, %xmm1
 	shufps	$0b00010000, %xmm0, %xmm4
 	pxor	%xmm4, %xmm0
 	shufps	$0b10001100, %xmm0, %xmm4
 	pxor	%xmm4, %xmm0
 	pxor	%xmm1, %xmm0
 	movups	%xmm0, (%rcx)
 	add	$0x10, %rcx
 	ret
 	nop
 SET_SIZE(_key_expansion_128)
 SET_SIZE(_key_expansion_256a)
 
 
 ENTRY_NP(_key_expansion_192a)
 _key_expansion_192a_local:
 	pshufd	$0b01010101, %xmm1, %xmm1
 	shufps	$0b00010000, %xmm0, %xmm4
 	pxor	%xmm4, %xmm0
 	shufps	$0b10001100, %xmm0, %xmm4
 	pxor	%xmm4, %xmm0
 	pxor	%xmm1, %xmm0
 
 	movups	%xmm2, %xmm5
 	movups	%xmm2, %xmm6
 	pslldq	$4, %xmm5
 	pshufd	$0b11111111, %xmm0, %xmm3
 	pxor	%xmm3, %xmm2
 	pxor	%xmm5, %xmm2
 
 	movups	%xmm0, %xmm1
 	shufps	$0b01000100, %xmm0, %xmm6
 	movups	%xmm6, (%rcx)
 	shufps	$0b01001110, %xmm2, %xmm1
 	movups	%xmm1, 0x10(%rcx)
 	add	$0x20, %rcx
 	ret
 SET_SIZE(_key_expansion_192a)
 
 
 ENTRY_NP(_key_expansion_192b)
 _key_expansion_192b_local:
 	pshufd	$0b01010101, %xmm1, %xmm1
 	shufps	$0b00010000, %xmm0, %xmm4
 	pxor	%xmm4, %xmm0
 	shufps	$0b10001100, %xmm0, %xmm4
 	pxor	%xmm4, %xmm0
 	pxor	%xmm1, %xmm0
 
 	movups	%xmm2, %xmm5
 	pslldq	$4, %xmm5
 	pshufd	$0b11111111, %xmm0, %xmm3
 	pxor	%xmm3, %xmm2
 	pxor	%xmm5, %xmm2
 
 	movups	%xmm0, (%rcx)
 	add	$0x10, %rcx
 	ret
 SET_SIZE(_key_expansion_192b)
 
 
 ENTRY_NP(_key_expansion_256b)
 _key_expansion_256b_local:
 	pshufd	$0b10101010, %xmm1, %xmm1
 	shufps	$0b00010000, %xmm2, %xmm4
 	pxor	%xmm4, %xmm2
 	shufps	$0b10001100, %xmm2, %xmm4
 	pxor	%xmm4, %xmm2
 	pxor	%xmm1, %xmm2
 	movups	%xmm2, (%rcx)
 	add	$0x10, %rcx
 	ret
 SET_SIZE(_key_expansion_256b)
 
 
 /*
  * rijndael_key_setup_enc_intel()
  * Expand the cipher key into the encryption key schedule.
  *
  * For kernel code, caller is responsible for ensuring kpreempt_disable()
  * has been called.  This is because %xmm registers are not saved/restored.
  * Clear and set the CR0.TS bit on entry and exit, respectively,  if TS is set
  * on entry.  Otherwise, if TS is not set, save and restore %xmm registers
  * on the stack.
  *
  * OpenSolaris interface:
  * int rijndael_key_setup_enc_intel(uint32_t rk[], const uint32_t cipherKey[],
  *	uint64_t keyBits);
  * Return value is 0 on error, number of rounds on success.
  *
  * Original Intel OpenSSL interface:
  * int intel_AES_set_encrypt_key(const unsigned char *userKey,
  *	const int bits, AES_KEY *key);
  * Return value is non-zero on error, 0 on success.
  */
 
 #ifdef	OPENSSL_INTERFACE
 #define	rijndael_key_setup_enc_intel	intel_AES_set_encrypt_key
 #define	rijndael_key_setup_dec_intel	intel_AES_set_decrypt_key
 
 #define	USERCIPHERKEY		rdi	/* P1, 64 bits */
 #define	KEYSIZE32		esi	/* P2, 32 bits */
 #define	KEYSIZE64		rsi	/* P2, 64 bits */
 #define	AESKEY			rdx	/* P3, 64 bits */
 
 #else	/* OpenSolaris Interface */
 #define	AESKEY			rdi	/* P1, 64 bits */
 #define	USERCIPHERKEY		rsi	/* P2, 64 bits */
 #define	KEYSIZE32		edx	/* P3, 32 bits */
 #define	KEYSIZE64		rdx	/* P3, 64 bits */
 #endif	/* OPENSSL_INTERFACE */
 
 #define	ROUNDS32		KEYSIZE32	/* temp */
 #define	ROUNDS64		KEYSIZE64	/* temp */
 #define	ENDAESKEY		USERCIPHERKEY	/* temp */
 
 ENTRY_NP(rijndael_key_setup_enc_intel)
 rijndael_key_setup_enc_intel_local:
 	FRAME_BEGIN
 	// NULL pointer sanity check
 	test	%USERCIPHERKEY, %USERCIPHERKEY
 	jz	.Lenc_key_invalid_param
 	test	%AESKEY, %AESKEY
 	jz	.Lenc_key_invalid_param
 
 	movups	(%USERCIPHERKEY), %xmm0	// user key (first 16 bytes)
 	movups	%xmm0, (%AESKEY)
 	lea	0x10(%AESKEY), %rcx	// key addr
 	pxor	%xmm4, %xmm4		// xmm4 is assumed 0 in _key_expansion_x
 
 	cmp	$256, %KEYSIZE32
 	jnz	.Lenc_key192
 
 	// AES 256: 14 rounds in encryption key schedule
 #ifdef OPENSSL_INTERFACE
 	mov	$14, %ROUNDS32
 	movl	%ROUNDS32, 240(%AESKEY)		// key.rounds = 14
 #endif	/* OPENSSL_INTERFACE */
 
 	movups	0x10(%USERCIPHERKEY), %xmm2	// other user key (2nd 16 bytes)
 	movups	%xmm2, (%rcx)
 	add	$0x10, %rcx
 
 	aeskeygenassist $0x1, %xmm2, %xmm1	// expand the key
 	call	_key_expansion_256a_local
 	aeskeygenassist $0x1, %xmm0, %xmm1
 	call	_key_expansion_256b_local
 	aeskeygenassist $0x2, %xmm2, %xmm1	// expand the key
 	call	_key_expansion_256a_local
 	aeskeygenassist $0x2, %xmm0, %xmm1
 	call	_key_expansion_256b_local
 	aeskeygenassist $0x4, %xmm2, %xmm1	// expand the key
 	call	_key_expansion_256a_local
 	aeskeygenassist $0x4, %xmm0, %xmm1
 	call	_key_expansion_256b_local
 	aeskeygenassist $0x8, %xmm2, %xmm1	// expand the key
 	call	_key_expansion_256a_local
 	aeskeygenassist $0x8, %xmm0, %xmm1
 	call	_key_expansion_256b_local
 	aeskeygenassist $0x10, %xmm2, %xmm1	// expand the key
 	call	_key_expansion_256a_local
 	aeskeygenassist $0x10, %xmm0, %xmm1
 	call	_key_expansion_256b_local
 	aeskeygenassist $0x20, %xmm2, %xmm1	// expand the key
 	call	_key_expansion_256a_local
 	aeskeygenassist $0x20, %xmm0, %xmm1
 	call	_key_expansion_256b_local
 	aeskeygenassist $0x40, %xmm2, %xmm1	// expand the key
 	call	_key_expansion_256a_local
 
 #ifdef	OPENSSL_INTERFACE
 	xor	%rax, %rax			// return 0 (OK)
 #else	/* Open Solaris Interface */
 	mov	$14, %rax			// return # rounds = 14
 #endif
 	FRAME_END
 	ret
 
 .align 4
 .Lenc_key192:
 	cmp	$192, %KEYSIZE32
 	jnz	.Lenc_key128
 
 	// AES 192: 12 rounds in encryption key schedule
 #ifdef OPENSSL_INTERFACE
 	mov	$12, %ROUNDS32
 	movl	%ROUNDS32, 240(%AESKEY)	// key.rounds = 12
 #endif	/* OPENSSL_INTERFACE */
 
 	movq	0x10(%USERCIPHERKEY), %xmm2	// other user key
 	aeskeygenassist $0x1, %xmm2, %xmm1	// expand the key
 	call	_key_expansion_192a_local
 	aeskeygenassist $0x2, %xmm2, %xmm1	// expand the key
 	call	_key_expansion_192b_local
 	aeskeygenassist $0x4, %xmm2, %xmm1	// expand the key
 	call	_key_expansion_192a_local
 	aeskeygenassist $0x8, %xmm2, %xmm1	// expand the key
 	call	_key_expansion_192b_local
 	aeskeygenassist $0x10, %xmm2, %xmm1	// expand the key
 	call	_key_expansion_192a_local
 	aeskeygenassist $0x20, %xmm2, %xmm1	// expand the key
 	call	_key_expansion_192b_local
 	aeskeygenassist $0x40, %xmm2, %xmm1	// expand the key
 	call	_key_expansion_192a_local
 	aeskeygenassist $0x80, %xmm2, %xmm1	// expand the key
 	call	_key_expansion_192b_local
 
 #ifdef	OPENSSL_INTERFACE
 	xor	%rax, %rax			// return 0 (OK)
 #else	/* OpenSolaris Interface */
 	mov	$12, %rax			// return # rounds = 12
 #endif
 	FRAME_END
 	ret
 
 .align 4
 .Lenc_key128:
 	cmp $128, %KEYSIZE32
 	jnz .Lenc_key_invalid_key_bits
 
 	// AES 128: 10 rounds in encryption key schedule
 #ifdef OPENSSL_INTERFACE
 	mov	$10, %ROUNDS32
 	movl	%ROUNDS32, 240(%AESKEY)		// key.rounds = 10
 #endif	/* OPENSSL_INTERFACE */
 
 	aeskeygenassist $0x1, %xmm0, %xmm1	// expand the key
 	call	_key_expansion_128_local
 	aeskeygenassist $0x2, %xmm0, %xmm1	// expand the key
 	call	_key_expansion_128_local
 	aeskeygenassist $0x4, %xmm0, %xmm1	// expand the key
 	call	_key_expansion_128_local
 	aeskeygenassist $0x8, %xmm0, %xmm1	// expand the key
 	call	_key_expansion_128_local
 	aeskeygenassist $0x10, %xmm0, %xmm1	// expand the key
 	call	_key_expansion_128_local
 	aeskeygenassist $0x20, %xmm0, %xmm1	// expand the key
 	call	_key_expansion_128_local
 	aeskeygenassist $0x40, %xmm0, %xmm1	// expand the key
 	call	_key_expansion_128_local
 	aeskeygenassist $0x80, %xmm0, %xmm1	// expand the key
 	call	_key_expansion_128_local
 	aeskeygenassist $0x1b, %xmm0, %xmm1	// expand the key
 	call	_key_expansion_128_local
 	aeskeygenassist $0x36, %xmm0, %xmm1	// expand the key
 	call	_key_expansion_128_local
 
 #ifdef	OPENSSL_INTERFACE
 	xor	%rax, %rax			// return 0 (OK)
 #else	/* OpenSolaris Interface */
 	mov	$10, %rax			// return # rounds = 10
 #endif
 	FRAME_END
 	ret
 
 .Lenc_key_invalid_param:
 #ifdef	OPENSSL_INTERFACE
 	mov	$-1, %rax	// user key or AES key pointer is NULL
 	FRAME_END
 	ret
 #else
 	/* FALLTHROUGH */
 #endif	/* OPENSSL_INTERFACE */
 
 .Lenc_key_invalid_key_bits:
 #ifdef	OPENSSL_INTERFACE
 	mov	$-2, %rax	// keysize is invalid
 #else	/* Open Solaris Interface */
 	xor	%rax, %rax	// a key pointer is NULL or invalid keysize
 #endif	/* OPENSSL_INTERFACE */
 	FRAME_END
 	ret
 	SET_SIZE(rijndael_key_setup_enc_intel)
 
 
 /*
  * rijndael_key_setup_dec_intel()
  * Expand the cipher key into the decryption key schedule.
  *
  * For kernel code, caller is responsible for ensuring kpreempt_disable()
  * has been called.  This is because %xmm registers are not saved/restored.
  * Clear and set the CR0.TS bit on entry and exit, respectively,  if TS is set
  * on entry.  Otherwise, if TS is not set, save and restore %xmm registers
  * on the stack.
  *
  * OpenSolaris interface:
  * int rijndael_key_setup_dec_intel(uint32_t rk[], const uint32_t cipherKey[],
  *	uint64_t keyBits);
  * Return value is 0 on error, number of rounds on success.
  * P1->P2, P2->P3, P3->P1
  *
  * Original Intel OpenSSL interface:
  * int intel_AES_set_decrypt_key(const unsigned char *userKey,
  *	const int bits, AES_KEY *key);
  * Return value is non-zero on error, 0 on success.
  */
 
 ENTRY_NP(rijndael_key_setup_dec_intel)
 FRAME_BEGIN
 	// Generate round keys used for encryption
 	call	rijndael_key_setup_enc_intel_local
 	test	%rax, %rax
 #ifdef	OPENSSL_INTERFACE
 	jnz	.Ldec_key_exit	// Failed if returned non-0
 #else	/* OpenSolaris Interface */
 	jz	.Ldec_key_exit	// Failed if returned 0
 #endif	/* OPENSSL_INTERFACE */
 
 	/*
 	 * Convert round keys used for encryption
 	 * to a form usable for decryption
 	 */
 #ifndef	OPENSSL_INTERFACE		/* OpenSolaris Interface */
 	mov	%rax, %ROUNDS64		// set # rounds (10, 12, or 14)
 					// (already set for OpenSSL)
 #endif
 
 	lea	0x10(%AESKEY), %rcx	// key addr
 	shl	$4, %ROUNDS32
 	add	%AESKEY, %ROUNDS64
 	mov	%ROUNDS64, %ENDAESKEY
 
 .align 4
 .Ldec_key_reorder_loop:
 	movups	(%AESKEY), %xmm0
 	movups	(%ROUNDS64), %xmm1
 	movups	%xmm0, (%ROUNDS64)
 	movups	%xmm1, (%AESKEY)
 	lea	0x10(%AESKEY), %AESKEY
 	lea	-0x10(%ROUNDS64), %ROUNDS64
 	cmp	%AESKEY, %ROUNDS64
 	ja	.Ldec_key_reorder_loop
 
 .align 4
 .Ldec_key_inv_loop:
 	movups	(%rcx), %xmm0
 	// Convert an encryption round key to a form usable for decryption
 	// with the "AES Inverse Mix Columns" instruction
 	aesimc	%xmm0, %xmm1
 	movups	%xmm1, (%rcx)
 	lea	0x10(%rcx), %rcx
 	cmp	%ENDAESKEY, %rcx
 	jnz	.Ldec_key_inv_loop
 
 .Ldec_key_exit:
 	// OpenSolaris: rax = # rounds (10, 12, or 14) or 0 for error
 	// OpenSSL: rax = 0 for OK, or non-zero for error
 	FRAME_END
 	ret
 	SET_SIZE(rijndael_key_setup_dec_intel)
 
 
 /*
  * aes_encrypt_intel()
  * Encrypt a single block (in and out can overlap).
  *
  * For kernel code, caller is responsible for ensuring kpreempt_disable()
  * has been called.  This is because %xmm registers are not saved/restored.
  * Clear and set the CR0.TS bit on entry and exit, respectively,  if TS is set
  * on entry.  Otherwise, if TS is not set, save and restore %xmm registers
  * on the stack.
  *
  * Temporary register usage:
  * %xmm0	State
  * %xmm1	Key
  *
  * Original OpenSolaris Interface:
  * void aes_encrypt_intel(const aes_ks_t *ks, int Nr,
  *	const uint32_t pt[4], uint32_t ct[4])
  *
  * Original Intel OpenSSL Interface:
  * void intel_AES_encrypt(const unsigned char *in, unsigned char *out,
  *	const AES_KEY *key)
  */
 
 #ifdef	OPENSSL_INTERFACE
 #define	aes_encrypt_intel	intel_AES_encrypt
 #define	aes_decrypt_intel	intel_AES_decrypt
 
 #define	INP		rdi	/* P1, 64 bits */
 #define	OUTP		rsi	/* P2, 64 bits */
 #define	KEYP		rdx	/* P3, 64 bits */
 
 /* No NROUNDS parameter--offset 240 from KEYP saved in %ecx:  */
 #define	NROUNDS32	ecx	/* temporary, 32 bits */
 #define	NROUNDS		cl	/* temporary,  8 bits */
 
 #else	/* OpenSolaris Interface */
 #define	KEYP		rdi	/* P1, 64 bits */
 #define	NROUNDS		esi	/* P2, 32 bits */
 #define	INP		rdx	/* P3, 64 bits */
 #define	OUTP		rcx	/* P4, 64 bits */
 #endif	/* OPENSSL_INTERFACE */
 
 #define	STATE		xmm0	/* temporary, 128 bits */
 #define	KEY		xmm1	/* temporary, 128 bits */
 
 
 ENTRY_NP(aes_encrypt_intel)
 
 	movups	(%INP), %STATE			// input
 	movups	(%KEYP), %KEY			// key
 #ifdef	OPENSSL_INTERFACE
 	mov	240(%KEYP), %NROUNDS32		// round count
 #else	/* OpenSolaris Interface */
 	/* Round count is already present as P2 in %rsi/%esi */
 #endif	/* OPENSSL_INTERFACE */
 
 	pxor	%KEY, %STATE			// round 0
 	lea	0x30(%KEYP), %KEYP
 	cmp	$12, %NROUNDS
 	jb	.Lenc128
 	lea	0x20(%KEYP), %KEYP
 	je	.Lenc192
 
 	// AES 256
 	lea	0x20(%KEYP), %KEYP
 	movups	-0x60(%KEYP), %KEY
 	aesenc	%KEY, %STATE
 	movups	-0x50(%KEYP), %KEY
 	aesenc	%KEY, %STATE
 
 .align 4
 .Lenc192:
 	// AES 192 and 256
 	movups	-0x40(%KEYP), %KEY
 	aesenc	%KEY, %STATE
 	movups	-0x30(%KEYP), %KEY
 	aesenc	%KEY, %STATE
 
 .align 4
 .Lenc128:
 	// AES 128, 192, and 256
 	movups	-0x20(%KEYP), %KEY
 	aesenc	%KEY, %STATE
 	movups	-0x10(%KEYP), %KEY
 	aesenc	%KEY, %STATE
 	movups	(%KEYP), %KEY
 	aesenc	%KEY, %STATE
 	movups	0x10(%KEYP), %KEY
 	aesenc	%KEY, %STATE
 	movups	0x20(%KEYP), %KEY
 	aesenc	%KEY, %STATE
 	movups	0x30(%KEYP), %KEY
 	aesenc	%KEY, %STATE
 	movups	0x40(%KEYP), %KEY
 	aesenc	%KEY, %STATE
 	movups	0x50(%KEYP), %KEY
 	aesenc	%KEY, %STATE
 	movups	0x60(%KEYP), %KEY
 	aesenc	%KEY, %STATE
 	movups	0x70(%KEYP), %KEY
 	aesenclast	 %KEY, %STATE		// last round
 	movups	%STATE, (%OUTP)			// output
 
 	ret
 	SET_SIZE(aes_encrypt_intel)
 
 
 /*
  * aes_decrypt_intel()
  * Decrypt a single block (in and out can overlap).
  *
  * For kernel code, caller is responsible for ensuring kpreempt_disable()
  * has been called.  This is because %xmm registers are not saved/restored.
  * Clear and set the CR0.TS bit on entry and exit, respectively,  if TS is set
  * on entry.  Otherwise, if TS is not set, save and restore %xmm registers
  * on the stack.
  *
  * Temporary register usage:
  * %xmm0	State
  * %xmm1	Key
  *
  * Original OpenSolaris Interface:
  * void aes_decrypt_intel(const aes_ks_t *ks, int Nr,
  *	const uint32_t pt[4], uint32_t ct[4])/
  *
  * Original Intel OpenSSL Interface:
  * void intel_AES_decrypt(const unsigned char *in, unsigned char *out,
  *	const AES_KEY *key);
  */
 ENTRY_NP(aes_decrypt_intel)
 
 	movups	(%INP), %STATE			// input
 	movups	(%KEYP), %KEY			// key
 #ifdef	OPENSSL_INTERFACE
 	mov	240(%KEYP), %NROUNDS32		// round count
 #else	/* OpenSolaris Interface */
 	/* Round count is already present as P2 in %rsi/%esi */
 #endif	/* OPENSSL_INTERFACE */
 
 	pxor	%KEY, %STATE			// round 0
 	lea	0x30(%KEYP), %KEYP
 	cmp	$12, %NROUNDS
 	jb	.Ldec128
 	lea	0x20(%KEYP), %KEYP
 	je	.Ldec192
 
 	// AES 256
 	lea	0x20(%KEYP), %KEYP
 	movups	-0x60(%KEYP), %KEY
 	aesdec	%KEY, %STATE
 	movups	-0x50(%KEYP), %KEY
 	aesdec	%KEY, %STATE
 
 .align 4
 .Ldec192:
 	// AES 192 and 256
 	movups	-0x40(%KEYP), %KEY
 	aesdec	%KEY, %STATE
 	movups	-0x30(%KEYP), %KEY
 	aesdec	%KEY, %STATE
 
 .align 4
 .Ldec128:
 	// AES 128, 192, and 256
 	movups	-0x20(%KEYP), %KEY
 	aesdec	%KEY, %STATE
 	movups	-0x10(%KEYP), %KEY
 	aesdec	%KEY, %STATE
 	movups	(%KEYP), %KEY
 	aesdec	%KEY, %STATE
 	movups	0x10(%KEYP), %KEY
 	aesdec	%KEY, %STATE
 	movups	0x20(%KEYP), %KEY
 	aesdec	%KEY, %STATE
 	movups	0x30(%KEYP), %KEY
 	aesdec	%KEY, %STATE
 	movups	0x40(%KEYP), %KEY
 	aesdec	%KEY, %STATE
 	movups	0x50(%KEYP), %KEY
 	aesdec	%KEY, %STATE
 	movups	0x60(%KEYP), %KEY
 	aesdec	%KEY, %STATE
 	movups	0x70(%KEYP), %KEY
 	aesdeclast	%KEY, %STATE		// last round
 	movups	%STATE, (%OUTP)			// output
 
 	ret
 	SET_SIZE(aes_decrypt_intel)
 
 #endif	/* lint || __lint */
 
 #ifdef __ELF__
 .section .note.GNU-stack,"",%progbits
 #endif
diff --git a/module/icp/asm-x86_64/aes/aes_amd64.S b/module/icp/asm-x86_64/aes/aes_amd64.S
index 9db3a3179230..272720e517e3 100644
--- a/module/icp/asm-x86_64/aes/aes_amd64.S
+++ b/module/icp/asm-x86_64/aes/aes_amd64.S
@@ -1,906 +1,906 @@
 /*
  * ---------------------------------------------------------------------------
  * Copyright (c) 1998-2007, Brian Gladman, Worcester, UK. All rights reserved.
  *
  * LICENSE TERMS
  *
  * The free distribution and use of this software is allowed (with or without
  * changes) provided that:
  *
  *  1. source code distributions include the above copyright notice, this
  *     list of conditions and the following disclaimer;
  *
  *  2. binary distributions include the above copyright notice, this list
  *     of conditions and the following disclaimer in their documentation;
  *
  *  3. the name of the copyright holder is not used to endorse products
  *     built using this software without specific written permission.
  *
  * DISCLAIMER
  *
  * This software is provided 'as is' with no explicit or implied warranties
  * in respect of its properties, including, but not limited to, correctness
  * and/or fitness for purpose.
  * ---------------------------------------------------------------------------
  * Issue 20/12/2007
  *
  * I am grateful to Dag Arne Osvik for many discussions of the techniques that
  * can be used to optimise AES assembler code on AMD64/EM64T architectures.
  * Some of the techniques used in this implementation are the result of
  * suggestions made by him for which I am most grateful.
  *
  * An AES implementation for AMD64 processors using the YASM assembler.  This
  * implementation provides only encryption, decryption and hence requires key
  * scheduling support in C. It uses 8k bytes of tables but its encryption and
  * decryption performance is very close to that obtained using large tables.
  * It can use either MS Windows or Gnu/Linux/OpenSolaris OS calling conventions,
  * which are as follows:
  *               ms windows  gnu/linux/opensolaris os
  *
  *   in_blk          rcx     rdi
  *   out_blk         rdx     rsi
  *   context (cx)     r8     rdx
  *
  *   preserved       rsi      -    + rbx, rbp, rsp, r12, r13, r14 & r15
  *   registers       rdi      -      on both
  *
  *   destroyed        -      rsi   + rax, rcx, rdx, r8, r9, r10 & r11
  *   registers        -      rdi     on both
  *
  * The convention used here is that for gnu/linux/opensolaris os.
  *
  * This code provides the standard AES block size (128 bits, 16 bytes) and the
  * three standard AES key sizes (128, 192 and 256 bits). It has the same call
  * interface as my C implementation.  It uses the Microsoft C AMD64 calling
  * conventions in which the three parameters are placed in  rcx, rdx and r8
  * respectively.  The rbx, rsi, rdi, rbp and r12..r15 registers are preserved.
  *
  * OpenSolaris Note:
  * Modified to use GNU/Linux/Solaris calling conventions.
  * That is parameters are placed in rdi, rsi, rdx, and rcx, respectively.
  *
  *     AES_RETURN aes_encrypt(const unsigned char in_blk[],
  *                   unsigned char out_blk[], const aes_encrypt_ctx cx[1])/
  *
  *     AES_RETURN aes_decrypt(const unsigned char in_blk[],
  *                   unsigned char out_blk[], const aes_decrypt_ctx cx[1])/
  *
  *     AES_RETURN aes_encrypt_key<NNN>(const unsigned char key[],
  *                                            const aes_encrypt_ctx cx[1])/
  *
  *     AES_RETURN aes_decrypt_key<NNN>(const unsigned char key[],
  *                                            const aes_decrypt_ctx cx[1])/
  *
  *     AES_RETURN aes_encrypt_key(const unsigned char key[],
  *                           unsigned int len, const aes_decrypt_ctx cx[1])/
  *
  *     AES_RETURN aes_decrypt_key(const unsigned char key[],
  *                           unsigned int len, const aes_decrypt_ctx cx[1])/
  *
  * where <NNN> is 128, 102 or 256.  In the last two calls the length can be in
  * either bits or bytes.
  *
  * Comment in/out the following lines to obtain the desired subroutines. These
  * selections MUST match those in the C header file aesopt.h
  */
 #define	AES_REV_DKS	  /* define if key decryption schedule is reversed */
 
 #define	LAST_ROUND_TABLES /* define for the faster version using extra tables */
 
 /*
  * The encryption key schedule has the following in memory layout where N is the
  * number of rounds (10, 12 or 14):
  *
  * lo: | input key (round 0)  |  / each round is four 32-bit words
  *     | encryption round 1   |
  *     | encryption round 2   |
  *     ....
  *     | encryption round N-1 |
  * hi: | encryption round N   |
  *
  * The decryption key schedule is normally set up so that it has the same
  * layout as above by actually reversing the order of the encryption key
  * schedule in memory (this happens when AES_REV_DKS is set):
  *
  * lo: | decryption round 0   | =              | encryption round N   |
  *     | decryption round 1   | = INV_MIX_COL[ | encryption round N-1 | ]
  *     | decryption round 2   | = INV_MIX_COL[ | encryption round N-2 | ]
  *     ....                       ....
  *     | decryption round N-1 | = INV_MIX_COL[ | encryption round 1   | ]
  * hi: | decryption round N   | =              | input key (round 0)  |
  *
  * with rounds except the first and last modified using inv_mix_column()
  * But if AES_REV_DKS is NOT set the order of keys is left as it is for
  * encryption so that it has to be accessed in reverse when used for
  * decryption (although the inverse mix column modifications are done)
  *
  * lo: | decryption round 0   | =              | input key (round 0)  |
  *     | decryption round 1   | = INV_MIX_COL[ | encryption round 1   | ]
  *     | decryption round 2   | = INV_MIX_COL[ | encryption round 2   | ]
  *     ....                       ....
  *     | decryption round N-1 | = INV_MIX_COL[ | encryption round N-1 | ]
  * hi: | decryption round N   | =              | encryption round N   |
  *
  * This layout is faster when the assembler key scheduling provided here
  * is used.
  *
  * End of user defines
  */
 
 /*
  * ---------------------------------------------------------------------------
  * OpenSolaris OS modifications
  *
  * This source originates from Brian Gladman file aes_amd64.asm
  * in http://fp.gladman.plus.com/AES/aes-src-04-03-08.zip
  * with these changes:
  *
  * 1. Removed MS Windows-specific code within DLL_EXPORT, _SEH_, and
  * !__GNUC__ ifdefs.  Also removed ENCRYPTION, DECRYPTION,
  * AES_128, AES_192, AES_256, AES_VAR ifdefs.
  *
  * 2. Translate yasm/nasm %define and .macro definitions to cpp(1) #define
  *
  * 3. Translate yasm/nasm %ifdef/%ifndef to cpp(1) #ifdef
  *
  * 4. Translate Intel/yasm/nasm syntax to ATT/OpenSolaris as(1) syntax
  * (operands reversed, literals prefixed with "$", registers prefixed with "%",
  * and "[register+offset]", addressing changed to "offset(register)",
  * parenthesis in constant expressions "()" changed to square brackets "[]",
  * "." removed from  local (numeric) labels, and other changes.
  * Examples:
  * Intel/yasm/nasm Syntax	ATT/OpenSolaris Syntax
  * mov	rax,(4*20h)		mov	$[4*0x20],%rax
  * mov	rax,[ebx+20h]		mov	0x20(%ebx),%rax
  * lea	rax,[ebx+ecx]		lea	(%ebx,%ecx),%rax
  * sub	rax,[ebx+ecx*4-20h]	sub	-0x20(%ebx,%ecx,4),%rax
  *
  * 5. Added OpenSolaris ENTRY_NP/SET_SIZE macros from
  * /usr/include/sys/asm_linkage.h, lint(1B) guards, and dummy C function
  * definitions for lint.
  *
  * 6. Renamed functions and reordered parameters to match OpenSolaris:
  * Original Gladman interface:
  *	int aes_encrypt(const unsigned char *in,
  *		unsigned char *out, const aes_encrypt_ctx cx[1])/
  *	int aes_decrypt(const unsigned char *in,
  *		unsigned char *out, const aes_encrypt_ctx cx[1])/
  * Note: aes_encrypt_ctx contains ks, a 60 element array of uint32_t,
  * and a union type, inf., containing inf.l, a uint32_t and
  * inf.b, a 4-element array of uint32_t.  Only b[0] in the array (aka "l") is
  * used and contains the key schedule length * 16 where key schedule length is
  * 10, 12, or 14 bytes.
  *
  * OpenSolaris OS interface:
  *	void aes_encrypt_amd64(const aes_ks_t *ks, int Nr,
  *		const uint32_t pt[4], uint32_t ct[4])/
  *	void aes_decrypt_amd64(const aes_ks_t *ks, int Nr,
  *		const uint32_t pt[4], uint32_t ct[4])/
  *	typedef union {uint64_t ks64[(MAX_AES_NR + 1) * 4]/
  *		 uint32_t ks32[(MAX_AES_NR + 1) * 4]/ } aes_ks_t/
  * Note: ks is the AES key schedule, Nr is number of rounds, pt is plain text,
  * ct is crypto text, and MAX_AES_NR is 14.
  * For the x86 64-bit architecture, OpenSolaris OS uses ks32 instead of ks64.
  */
 
 #if defined(lint) || defined(__lint)
 
 #include <sys/types.h>
-/* ARGSUSED */
 void
 aes_encrypt_amd64(const uint32_t rk[], int Nr, const uint32_t pt[4],
 	uint32_t ct[4]) {
+   (void) rk, (void) Nr, (void) pt, (void) ct;
 }
-/* ARGSUSED */
 void
 aes_decrypt_amd64(const uint32_t rk[], int Nr, const uint32_t ct[4],
 	uint32_t pt[4]) {
+   (void) rk, (void) Nr, (void) pt, (void) ct;
 }
 
 
 #else
 
 #define _ASM
 #include <sys/asm_linkage.h>
 
 #define	KS_LENGTH	60
 
 #define	raxd		eax
 #define	rdxd		edx
 #define	rcxd		ecx
 #define	rbxd		ebx
 #define	rsid		esi
 #define	rdid		edi
 
 #define	raxb		al
 #define	rdxb		dl
 #define	rcxb		cl
 #define	rbxb		bl
 #define	rsib		sil
 #define	rdib		dil
 
 // finite field multiplies by {02}, {04} and {08}
 
 #define	f2(x) [[x<<1]^[[[x>>7]&1]*0x11b]]
 #define	f4(x) [[x<<2]^[[[x>>6]&1]*0x11b]^[[[x>>6]&2]*0x11b]]
 #define	f8(x) [[x<<3]^[[[x>>5]&1]*0x11b]^[[[x>>5]&2]*0x11b]^[[[x>>5]&4]*0x11b]]
 
 // finite field multiplies required in table generation
 
 #define	f3(x) [[f2(x)] ^ [x]]
 #define	f9(x) [[f8(x)] ^ [x]]
 #define	fb(x) [[f8(x)] ^ [f2(x)] ^ [x]]
 #define	fd(x) [[f8(x)] ^ [f4(x)] ^ [x]]
 #define	fe(x) [[f8(x)] ^ [f4(x)] ^ [f2(x)]]
 
 // macros for expanding S-box data
 
 #define	u8(x) [f2(x)], [x], [x], [f3(x)], [f2(x)], [x], [x], [f3(x)]
 #define	v8(x) [fe(x)], [f9(x)], [fd(x)], [fb(x)], [fe(x)], [f9(x)], [fd(x)], [x]
 #define	w8(x) [x], 0, 0, 0, [x], 0, 0, 0
 
 #define	enc_vals(x)	\
    .byte x(0x63),x(0x7c),x(0x77),x(0x7b),x(0xf2),x(0x6b),x(0x6f),x(0xc5); \
    .byte x(0x30),x(0x01),x(0x67),x(0x2b),x(0xfe),x(0xd7),x(0xab),x(0x76); \
    .byte x(0xca),x(0x82),x(0xc9),x(0x7d),x(0xfa),x(0x59),x(0x47),x(0xf0); \
    .byte x(0xad),x(0xd4),x(0xa2),x(0xaf),x(0x9c),x(0xa4),x(0x72),x(0xc0); \
    .byte x(0xb7),x(0xfd),x(0x93),x(0x26),x(0x36),x(0x3f),x(0xf7),x(0xcc); \
    .byte x(0x34),x(0xa5),x(0xe5),x(0xf1),x(0x71),x(0xd8),x(0x31),x(0x15); \
    .byte x(0x04),x(0xc7),x(0x23),x(0xc3),x(0x18),x(0x96),x(0x05),x(0x9a); \
    .byte x(0x07),x(0x12),x(0x80),x(0xe2),x(0xeb),x(0x27),x(0xb2),x(0x75); \
    .byte x(0x09),x(0x83),x(0x2c),x(0x1a),x(0x1b),x(0x6e),x(0x5a),x(0xa0); \
    .byte x(0x52),x(0x3b),x(0xd6),x(0xb3),x(0x29),x(0xe3),x(0x2f),x(0x84); \
    .byte x(0x53),x(0xd1),x(0x00),x(0xed),x(0x20),x(0xfc),x(0xb1),x(0x5b); \
    .byte x(0x6a),x(0xcb),x(0xbe),x(0x39),x(0x4a),x(0x4c),x(0x58),x(0xcf); \
    .byte x(0xd0),x(0xef),x(0xaa),x(0xfb),x(0x43),x(0x4d),x(0x33),x(0x85); \
    .byte x(0x45),x(0xf9),x(0x02),x(0x7f),x(0x50),x(0x3c),x(0x9f),x(0xa8); \
    .byte x(0x51),x(0xa3),x(0x40),x(0x8f),x(0x92),x(0x9d),x(0x38),x(0xf5); \
    .byte x(0xbc),x(0xb6),x(0xda),x(0x21),x(0x10),x(0xff),x(0xf3),x(0xd2); \
    .byte x(0xcd),x(0x0c),x(0x13),x(0xec),x(0x5f),x(0x97),x(0x44),x(0x17); \
    .byte x(0xc4),x(0xa7),x(0x7e),x(0x3d),x(0x64),x(0x5d),x(0x19),x(0x73); \
    .byte x(0x60),x(0x81),x(0x4f),x(0xdc),x(0x22),x(0x2a),x(0x90),x(0x88); \
    .byte x(0x46),x(0xee),x(0xb8),x(0x14),x(0xde),x(0x5e),x(0x0b),x(0xdb); \
    .byte x(0xe0),x(0x32),x(0x3a),x(0x0a),x(0x49),x(0x06),x(0x24),x(0x5c); \
    .byte x(0xc2),x(0xd3),x(0xac),x(0x62),x(0x91),x(0x95),x(0xe4),x(0x79); \
    .byte x(0xe7),x(0xc8),x(0x37),x(0x6d),x(0x8d),x(0xd5),x(0x4e),x(0xa9); \
    .byte x(0x6c),x(0x56),x(0xf4),x(0xea),x(0x65),x(0x7a),x(0xae),x(0x08); \
    .byte x(0xba),x(0x78),x(0x25),x(0x2e),x(0x1c),x(0xa6),x(0xb4),x(0xc6); \
    .byte x(0xe8),x(0xdd),x(0x74),x(0x1f),x(0x4b),x(0xbd),x(0x8b),x(0x8a); \
    .byte x(0x70),x(0x3e),x(0xb5),x(0x66),x(0x48),x(0x03),x(0xf6),x(0x0e); \
    .byte x(0x61),x(0x35),x(0x57),x(0xb9),x(0x86),x(0xc1),x(0x1d),x(0x9e); \
    .byte x(0xe1),x(0xf8),x(0x98),x(0x11),x(0x69),x(0xd9),x(0x8e),x(0x94); \
    .byte x(0x9b),x(0x1e),x(0x87),x(0xe9),x(0xce),x(0x55),x(0x28),x(0xdf); \
    .byte x(0x8c),x(0xa1),x(0x89),x(0x0d),x(0xbf),x(0xe6),x(0x42),x(0x68); \
    .byte x(0x41),x(0x99),x(0x2d),x(0x0f),x(0xb0),x(0x54),x(0xbb),x(0x16)
 
 #define	dec_vals(x) \
    .byte x(0x52),x(0x09),x(0x6a),x(0xd5),x(0x30),x(0x36),x(0xa5),x(0x38); \
    .byte x(0xbf),x(0x40),x(0xa3),x(0x9e),x(0x81),x(0xf3),x(0xd7),x(0xfb); \
    .byte x(0x7c),x(0xe3),x(0x39),x(0x82),x(0x9b),x(0x2f),x(0xff),x(0x87); \
    .byte x(0x34),x(0x8e),x(0x43),x(0x44),x(0xc4),x(0xde),x(0xe9),x(0xcb); \
    .byte x(0x54),x(0x7b),x(0x94),x(0x32),x(0xa6),x(0xc2),x(0x23),x(0x3d); \
    .byte x(0xee),x(0x4c),x(0x95),x(0x0b),x(0x42),x(0xfa),x(0xc3),x(0x4e); \
    .byte x(0x08),x(0x2e),x(0xa1),x(0x66),x(0x28),x(0xd9),x(0x24),x(0xb2); \
    .byte x(0x76),x(0x5b),x(0xa2),x(0x49),x(0x6d),x(0x8b),x(0xd1),x(0x25); \
    .byte x(0x72),x(0xf8),x(0xf6),x(0x64),x(0x86),x(0x68),x(0x98),x(0x16); \
    .byte x(0xd4),x(0xa4),x(0x5c),x(0xcc),x(0x5d),x(0x65),x(0xb6),x(0x92); \
    .byte x(0x6c),x(0x70),x(0x48),x(0x50),x(0xfd),x(0xed),x(0xb9),x(0xda); \
    .byte x(0x5e),x(0x15),x(0x46),x(0x57),x(0xa7),x(0x8d),x(0x9d),x(0x84); \
    .byte x(0x90),x(0xd8),x(0xab),x(0x00),x(0x8c),x(0xbc),x(0xd3),x(0x0a); \
    .byte x(0xf7),x(0xe4),x(0x58),x(0x05),x(0xb8),x(0xb3),x(0x45),x(0x06); \
    .byte x(0xd0),x(0x2c),x(0x1e),x(0x8f),x(0xca),x(0x3f),x(0x0f),x(0x02); \
    .byte x(0xc1),x(0xaf),x(0xbd),x(0x03),x(0x01),x(0x13),x(0x8a),x(0x6b); \
    .byte x(0x3a),x(0x91),x(0x11),x(0x41),x(0x4f),x(0x67),x(0xdc),x(0xea); \
    .byte x(0x97),x(0xf2),x(0xcf),x(0xce),x(0xf0),x(0xb4),x(0xe6),x(0x73); \
    .byte x(0x96),x(0xac),x(0x74),x(0x22),x(0xe7),x(0xad),x(0x35),x(0x85); \
    .byte x(0xe2),x(0xf9),x(0x37),x(0xe8),x(0x1c),x(0x75),x(0xdf),x(0x6e); \
    .byte x(0x47),x(0xf1),x(0x1a),x(0x71),x(0x1d),x(0x29),x(0xc5),x(0x89); \
    .byte x(0x6f),x(0xb7),x(0x62),x(0x0e),x(0xaa),x(0x18),x(0xbe),x(0x1b); \
    .byte x(0xfc),x(0x56),x(0x3e),x(0x4b),x(0xc6),x(0xd2),x(0x79),x(0x20); \
    .byte x(0x9a),x(0xdb),x(0xc0),x(0xfe),x(0x78),x(0xcd),x(0x5a),x(0xf4); \
    .byte x(0x1f),x(0xdd),x(0xa8),x(0x33),x(0x88),x(0x07),x(0xc7),x(0x31); \
    .byte x(0xb1),x(0x12),x(0x10),x(0x59),x(0x27),x(0x80),x(0xec),x(0x5f); \
    .byte x(0x60),x(0x51),x(0x7f),x(0xa9),x(0x19),x(0xb5),x(0x4a),x(0x0d); \
    .byte x(0x2d),x(0xe5),x(0x7a),x(0x9f),x(0x93),x(0xc9),x(0x9c),x(0xef); \
    .byte x(0xa0),x(0xe0),x(0x3b),x(0x4d),x(0xae),x(0x2a),x(0xf5),x(0xb0); \
    .byte x(0xc8),x(0xeb),x(0xbb),x(0x3c),x(0x83),x(0x53),x(0x99),x(0x61); \
    .byte x(0x17),x(0x2b),x(0x04),x(0x7e),x(0xba),x(0x77),x(0xd6),x(0x26); \
    .byte x(0xe1),x(0x69),x(0x14),x(0x63),x(0x55),x(0x21),x(0x0c),x(0x7d)
 
 #define	tptr	%rbp	/* table pointer */
 #define	kptr	%r8	/* key schedule pointer */
 #define	fofs	128	/* adjust offset in key schedule to keep |disp| < 128 */
 #define	fk_ref(x, y)	-16*x+fofs+4*y(kptr)
 
 #ifdef	AES_REV_DKS
 #define	rofs		128
 #define	ik_ref(x, y)	-16*x+rofs+4*y(kptr)
 
 #else
 #define	rofs		-128
 #define	ik_ref(x, y)	16*x+rofs+4*y(kptr)
 #endif	/* AES_REV_DKS */
 
 #define	tab_0(x)	(tptr,x,8)
 #define	tab_1(x)	3(tptr,x,8)
 #define	tab_2(x)	2(tptr,x,8)
 #define	tab_3(x)	1(tptr,x,8)
 #define	tab_f(x)	1(tptr,x,8)
 #define	tab_i(x)	7(tptr,x,8)
 
 #define	ff_rnd(p1, p2, p3, p4, round)	/* normal forward round */ \
 	mov	fk_ref(round,0), p1; \
 	mov	fk_ref(round,1), p2; \
 	mov	fk_ref(round,2), p3; \
 	mov	fk_ref(round,3), p4; \
  \
 	movzx	%al, %esi; \
 	movzx	%ah, %edi; \
 	shr	$16, %eax; \
 	xor	tab_0(%rsi), p1; \
 	xor	tab_1(%rdi), p4; \
 	movzx	%al, %esi; \
 	movzx	%ah, %edi; \
 	xor	tab_2(%rsi), p3; \
 	xor	tab_3(%rdi), p2; \
  \
 	movzx	%bl, %esi; \
 	movzx	%bh, %edi; \
 	shr	$16, %ebx; \
 	xor	tab_0(%rsi), p2; \
 	xor	tab_1(%rdi), p1; \
 	movzx	%bl, %esi; \
 	movzx	%bh, %edi; \
 	xor	tab_2(%rsi), p4; \
 	xor	tab_3(%rdi), p3; \
  \
 	movzx	%cl, %esi; \
 	movzx	%ch, %edi; \
 	shr	$16, %ecx; \
 	xor	tab_0(%rsi), p3; \
 	xor	tab_1(%rdi), p2; \
 	movzx	%cl, %esi; \
 	movzx	%ch, %edi; \
 	xor	tab_2(%rsi), p1; \
 	xor	tab_3(%rdi), p4; \
  \
 	movzx	%dl, %esi; \
 	movzx	%dh, %edi; \
 	shr	$16, %edx; \
 	xor	tab_0(%rsi), p4; \
 	xor	tab_1(%rdi), p3; \
 	movzx	%dl, %esi; \
 	movzx	%dh, %edi; \
 	xor	tab_2(%rsi), p2; \
 	xor	tab_3(%rdi), p1; \
  \
 	mov	p1, %eax; \
 	mov	p2, %ebx; \
 	mov	p3, %ecx; \
 	mov	p4, %edx
 
 #ifdef	LAST_ROUND_TABLES
 
 #define	fl_rnd(p1, p2, p3, p4, round)	/* last forward round */ \
 	add	$2048, tptr; \
 	mov	fk_ref(round,0), p1; \
 	mov	fk_ref(round,1), p2; \
 	mov	fk_ref(round,2), p3; \
 	mov	fk_ref(round,3), p4; \
  \
 	movzx	%al, %esi; \
 	movzx	%ah, %edi; \
 	shr	$16, %eax; \
 	xor	tab_0(%rsi), p1; \
 	xor	tab_1(%rdi), p4; \
 	movzx	%al, %esi; \
 	movzx	%ah, %edi; \
 	xor	tab_2(%rsi), p3; \
 	xor	tab_3(%rdi), p2; \
  \
 	movzx	%bl, %esi; \
 	movzx	%bh, %edi; \
 	shr	$16, %ebx; \
 	xor	tab_0(%rsi), p2; \
 	xor	tab_1(%rdi), p1; \
 	movzx	%bl, %esi; \
 	movzx	%bh, %edi; \
 	xor	tab_2(%rsi), p4; \
 	xor	tab_3(%rdi), p3; \
  \
 	movzx	%cl, %esi; \
 	movzx	%ch, %edi; \
 	shr	$16, %ecx; \
 	xor	tab_0(%rsi), p3; \
 	xor	tab_1(%rdi), p2; \
 	movzx	%cl, %esi; \
 	movzx	%ch, %edi; \
 	xor	tab_2(%rsi), p1; \
 	xor	tab_3(%rdi), p4; \
  \
 	movzx	%dl, %esi; \
 	movzx	%dh, %edi; \
 	shr	$16, %edx; \
 	xor	tab_0(%rsi), p4; \
 	xor	tab_1(%rdi), p3; \
 	movzx	%dl, %esi; \
 	movzx	%dh, %edi; \
 	xor	tab_2(%rsi), p2; \
 	xor	tab_3(%rdi), p1
 
 #else
 
 #define	fl_rnd(p1, p2, p3, p4, round)	/* last forward round */ \
 	mov	fk_ref(round,0), p1; \
 	mov	fk_ref(round,1), p2; \
 	mov	fk_ref(round,2), p3; \
 	mov	fk_ref(round,3), p4; \
  \
 	movzx	%al, %esi; \
 	movzx	%ah, %edi; \
 	shr	$16, %eax; \
 	movzx	tab_f(%rsi), %esi; \
 	movzx	tab_f(%rdi), %edi; \
 	xor	%esi, p1; \
 	rol	$8, %edi; \
 	xor	%edi, p4; \
 	movzx	%al, %esi; \
 	movzx	%ah, %edi; \
 	movzx	tab_f(%rsi), %esi; \
 	movzx	tab_f(%rdi), %edi; \
 	rol	$16, %esi; \
 	rol	$24, %edi; \
 	xor	%esi, p3; \
 	xor	%edi, p2; \
  \
 	movzx	%bl, %esi; \
 	movzx	%bh, %edi; \
 	shr	$16, %ebx; \
 	movzx	tab_f(%rsi), %esi; \
 	movzx	tab_f(%rdi), %edi; \
 	xor	%esi, p2; \
 	rol	$8, %edi; \
 	xor	%edi, p1; \
 	movzx	%bl, %esi; \
 	movzx	%bh, %edi; \
 	movzx	tab_f(%rsi), %esi; \
 	movzx	tab_f(%rdi), %edi; \
 	rol	$16, %esi; \
 	rol	$24, %edi; \
 	xor	%esi, p4; \
 	xor	%edi, p3; \
  \
 	movzx	%cl, %esi; \
 	movzx	%ch, %edi; \
 	movzx	tab_f(%rsi), %esi; \
 	movzx	tab_f(%rdi), %edi; \
 	shr	$16, %ecx; \
 	xor	%esi, p3; \
 	rol	$8, %edi; \
 	xor	%edi, p2; \
 	movzx	%cl, %esi; \
 	movzx	%ch, %edi; \
 	movzx	tab_f(%rsi), %esi; \
 	movzx	tab_f(%rdi), %edi; \
 	rol	$16, %esi; \
 	rol	$24, %edi; \
 	xor	%esi, p1; \
 	xor	%edi, p4; \
  \
 	movzx	%dl, %esi; \
 	movzx	%dh, %edi; \
 	movzx	tab_f(%rsi), %esi; \
 	movzx	tab_f(%rdi), %edi; \
 	shr	$16, %edx; \
 	xor	%esi, p4; \
 	rol	$8, %edi; \
 	xor	%edi, p3; \
 	movzx	%dl, %esi; \
 	movzx	%dh, %edi; \
 	movzx	tab_f(%rsi), %esi; \
 	movzx	tab_f(%rdi), %edi; \
 	rol	$16, %esi; \
 	rol	$24, %edi; \
 	xor	%esi, p2; \
 	xor	%edi, p1
 
 #endif	/* LAST_ROUND_TABLES */
 
 #define	ii_rnd(p1, p2, p3, p4, round)	/* normal inverse round */ \
 	mov	ik_ref(round,0), p1; \
 	mov	ik_ref(round,1), p2; \
 	mov	ik_ref(round,2), p3; \
 	mov	ik_ref(round,3), p4; \
  \
 	movzx	%al, %esi; \
 	movzx	%ah, %edi; \
 	shr	$16, %eax; \
 	xor	tab_0(%rsi), p1; \
 	xor	tab_1(%rdi), p2; \
 	movzx	%al, %esi; \
 	movzx	%ah, %edi; \
 	xor	tab_2(%rsi), p3; \
 	xor	tab_3(%rdi), p4; \
  \
 	movzx	%bl, %esi; \
 	movzx	%bh, %edi; \
 	shr	$16, %ebx; \
 	xor	tab_0(%rsi), p2; \
 	xor	tab_1(%rdi), p3; \
 	movzx	%bl, %esi; \
 	movzx	%bh, %edi; \
 	xor	tab_2(%rsi), p4; \
 	xor	tab_3(%rdi), p1; \
  \
 	movzx	%cl, %esi; \
 	movzx	%ch, %edi; \
 	shr	$16, %ecx; \
 	xor	tab_0(%rsi), p3; \
 	xor	tab_1(%rdi), p4; \
 	movzx	%cl, %esi; \
 	movzx	%ch, %edi; \
 	xor	tab_2(%rsi), p1; \
 	xor	tab_3(%rdi), p2; \
  \
 	movzx	%dl, %esi; \
 	movzx	%dh, %edi; \
 	shr	$16, %edx; \
 	xor	tab_0(%rsi), p4; \
 	xor	tab_1(%rdi), p1; \
 	movzx	%dl, %esi; \
 	movzx	%dh, %edi; \
 	xor	tab_2(%rsi), p2; \
 	xor	tab_3(%rdi), p3; \
  \
 	mov	p1, %eax; \
 	mov	p2, %ebx; \
 	mov	p3, %ecx; \
 	mov	p4, %edx
 
 #ifdef	LAST_ROUND_TABLES
 
 #define	il_rnd(p1, p2, p3, p4, round)	/* last inverse round */ \
 	add	$2048, tptr; \
 	mov	ik_ref(round,0), p1; \
 	mov	ik_ref(round,1), p2; \
 	mov	ik_ref(round,2), p3; \
 	mov	ik_ref(round,3), p4; \
  \
 	movzx	%al, %esi; \
 	movzx	%ah, %edi; \
 	shr	$16, %eax; \
 	xor	tab_0(%rsi), p1; \
 	xor	tab_1(%rdi), p2; \
 	movzx	%al, %esi; \
 	movzx	%ah, %edi; \
 	xor	tab_2(%rsi), p3; \
 	xor	tab_3(%rdi), p4; \
  \
 	movzx	%bl, %esi; \
 	movzx	%bh, %edi; \
 	shr	$16, %ebx; \
 	xor	tab_0(%rsi), p2; \
 	xor	tab_1(%rdi), p3; \
 	movzx	%bl, %esi; \
 	movzx	%bh, %edi; \
 	xor	tab_2(%rsi), p4; \
 	xor	tab_3(%rdi), p1; \
  \
 	movzx	%cl, %esi; \
 	movzx	%ch, %edi; \
 	shr	$16, %ecx; \
 	xor	tab_0(%rsi), p3; \
 	xor	tab_1(%rdi), p4; \
 	movzx	%cl, %esi; \
 	movzx	%ch, %edi; \
 	xor	tab_2(%rsi), p1; \
 	xor	tab_3(%rdi), p2; \
  \
 	movzx	%dl, %esi; \
 	movzx	%dh, %edi; \
 	shr	$16, %edx; \
 	xor	tab_0(%rsi), p4; \
 	xor	tab_1(%rdi), p1; \
 	movzx	%dl, %esi; \
 	movzx	%dh, %edi; \
 	xor	tab_2(%rsi), p2; \
 	xor	tab_3(%rdi), p3
 
 #else
 
 #define	il_rnd(p1, p2, p3, p4, round)	/* last inverse round */ \
 	mov	ik_ref(round,0), p1; \
 	mov	ik_ref(round,1), p2; \
 	mov	ik_ref(round,2), p3; \
 	mov	ik_ref(round,3), p4; \
  \
 	movzx	%al, %esi; \
 	movzx	%ah, %edi; \
 	movzx	tab_i(%rsi), %esi; \
 	movzx	tab_i(%rdi), %edi; \
 	shr	$16, %eax; \
 	xor	%esi, p1; \
 	rol	$8, %edi; \
 	xor	%edi, p2; \
 	movzx	%al, %esi; \
 	movzx	%ah, %edi; \
 	movzx	tab_i(%rsi), %esi; \
 	movzx	tab_i(%rdi), %edi; \
 	rol	$16, %esi; \
 	rol	$24, %edi; \
 	xor	%esi, p3; \
 	xor	%edi, p4; \
  \
 	movzx	%bl, %esi; \
 	movzx	%bh, %edi; \
 	movzx	tab_i(%rsi), %esi; \
 	movzx	tab_i(%rdi), %edi; \
 	shr	$16, %ebx; \
 	xor	%esi, p2; \
 	rol	$8, %edi; \
 	xor	%edi, p3; \
 	movzx	%bl, %esi; \
 	movzx	%bh, %edi; \
 	movzx	tab_i(%rsi), %esi; \
 	movzx	tab_i(%rdi), %edi; \
 	rol	$16, %esi; \
 	rol	$24, %edi; \
 	xor	%esi, p4; \
 	xor	%edi, p1; \
  \
 	movzx	%cl, %esi; \
 	movzx	%ch, %edi; \
 	movzx	tab_i(%rsi), %esi; \
 	movzx	tab_i(%rdi), %edi; \
 	shr	$16, %ecx; \
 	xor	%esi, p3; \
 	rol	$8, %edi; \
 	xor	%edi, p4; \
 	movzx	%cl, %esi; \
 	movzx	%ch, %edi; \
 	movzx	tab_i(%rsi), %esi; \
 	movzx	tab_i(%rdi), %edi; \
 	rol	$16, %esi; \
 	rol	$24, %edi; \
 	xor	%esi, p1; \
 	xor	%edi, p2; \
  \
 	movzx	%dl, %esi; \
 	movzx	%dh, %edi; \
 	movzx	tab_i(%rsi), %esi; \
 	movzx	tab_i(%rdi), %edi; \
 	shr	$16, %edx; \
 	xor	%esi, p4; \
 	rol	$8, %edi; \
 	xor	%edi, p1; \
 	movzx	%dl, %esi; \
 	movzx	%dh, %edi; \
 	movzx	tab_i(%rsi), %esi; \
 	movzx	tab_i(%rdi), %edi; \
 	rol	$16, %esi; \
 	rol	$24, %edi; \
 	xor	%esi, p2; \
 	xor	%edi, p3
 
 #endif	/* LAST_ROUND_TABLES */
 
 /*
  * OpenSolaris OS:
  * void aes_encrypt_amd64(const aes_ks_t *ks, int Nr,
  *	const uint32_t pt[4], uint32_t ct[4])/
  *
  * Original interface:
  * int aes_encrypt(const unsigned char *in,
  *	unsigned char *out, const aes_encrypt_ctx cx[1])/
  */
 .data
 .align	64
 enc_tab:
 	enc_vals(u8)
 #ifdef	LAST_ROUND_TABLES
 	// Last Round Tables:
 	enc_vals(w8)
 #endif
 
 
 ENTRY_NP(aes_encrypt_amd64)
 #ifdef	GLADMAN_INTERFACE
 	// Original interface
 	sub	$[4*8], %rsp	// gnu/linux/opensolaris binary interface
 	mov	%rsi, (%rsp)	// output pointer (P2)
 	mov	%rdx, %r8	// context (P3)
 
 	mov	%rbx, 1*8(%rsp)	// P1: input pointer in rdi
 	mov	%rbp, 2*8(%rsp)	// P2: output pointer in (rsp)
 	mov	%r12, 3*8(%rsp)	// P3: context in r8
 	movzx	4*KS_LENGTH(kptr), %esi	// Get byte key length * 16
 
 #else
 	// OpenSolaris OS interface
 	sub	$[4*8], %rsp	// Make room on stack to save registers
 	mov	%rcx, (%rsp)	// Save output pointer (P4) on stack
 	mov	%rdi, %r8	// context (P1)
 	mov	%rdx, %rdi	// P3: save input pointer
 	shl	$4, %esi	// P2: esi byte key length * 16
 
 	mov	%rbx, 1*8(%rsp)	// Save registers
 	mov	%rbp, 2*8(%rsp)
 	mov	%r12, 3*8(%rsp)
 	// P1: context in r8
 	// P2: byte key length * 16 in esi
 	// P3: input pointer in rdi
 	// P4: output pointer in (rsp)
 #endif	/* GLADMAN_INTERFACE */
 
 	lea	enc_tab(%rip), tptr
 	sub	$fofs, kptr
 
 	// Load input block into registers
 	mov	(%rdi), %eax
 	mov	1*4(%rdi), %ebx
 	mov	2*4(%rdi), %ecx
 	mov	3*4(%rdi), %edx
 
 	xor	fofs(kptr), %eax
 	xor	fofs+4(kptr), %ebx
 	xor	fofs+8(kptr), %ecx
 	xor	fofs+12(kptr), %edx
 
 	lea	(kptr,%rsi), kptr
 	// Jump based on byte key length * 16:
 	cmp	$[10*16], %esi
 	je	3f
 	cmp	$[12*16], %esi
 	je	2f
 	cmp	$[14*16], %esi
 	je	1f
 	mov	$-1, %rax	// error
 	jmp	4f
 
 	// Perform normal forward rounds
 1:	ff_rnd(%r9d, %r10d, %r11d, %r12d, 13)
 	ff_rnd(%r9d, %r10d, %r11d, %r12d, 12)
 2:	ff_rnd(%r9d, %r10d, %r11d, %r12d, 11)
 	ff_rnd(%r9d, %r10d, %r11d, %r12d, 10)
 3:	ff_rnd(%r9d, %r10d, %r11d, %r12d,  9)
 	ff_rnd(%r9d, %r10d, %r11d, %r12d,  8)
 	ff_rnd(%r9d, %r10d, %r11d, %r12d,  7)
 	ff_rnd(%r9d, %r10d, %r11d, %r12d,  6)
 	ff_rnd(%r9d, %r10d, %r11d, %r12d,  5)
 	ff_rnd(%r9d, %r10d, %r11d, %r12d,  4)
 	ff_rnd(%r9d, %r10d, %r11d, %r12d,  3)
 	ff_rnd(%r9d, %r10d, %r11d, %r12d,  2)
 	ff_rnd(%r9d, %r10d, %r11d, %r12d,  1)
 	fl_rnd(%r9d, %r10d, %r11d, %r12d,  0)
 
 	// Copy results
 	mov	(%rsp), %rbx
 	mov	%r9d, (%rbx)
 	mov	%r10d, 4(%rbx)
 	mov	%r11d, 8(%rbx)
 	mov	%r12d, 12(%rbx)
 	xor	%rax, %rax
 4:	// Restore registers
 	mov	1*8(%rsp), %rbx
 	mov	2*8(%rsp), %rbp
 	mov	3*8(%rsp), %r12
 	add	$[4*8], %rsp
 	ret
 
 	SET_SIZE(aes_encrypt_amd64)
 
 /*
  * OpenSolaris OS:
  * void aes_decrypt_amd64(const aes_ks_t *ks, int Nr,
  *	const uint32_t pt[4], uint32_t ct[4])/
  *
  * Original interface:
  * int aes_decrypt(const unsigned char *in,
  *	unsigned char *out, const aes_encrypt_ctx cx[1])/
  */
 .data
 .align	64
 dec_tab:
 	dec_vals(v8)
 #ifdef	LAST_ROUND_TABLES
 	// Last Round Tables:
 	dec_vals(w8)
 #endif
 
 
 ENTRY_NP(aes_decrypt_amd64)
 #ifdef	GLADMAN_INTERFACE
 	// Original interface
 	sub	$[4*8], %rsp	// gnu/linux/opensolaris binary interface
 	mov	%rsi, (%rsp)	// output pointer (P2)
 	mov	%rdx, %r8	// context (P3)
 
 	mov	%rbx, 1*8(%rsp)	// P1: input pointer in rdi
 	mov	%rbp, 2*8(%rsp)	// P2: output pointer in (rsp)
 	mov	%r12, 3*8(%rsp)	// P3: context in r8
 	movzx	4*KS_LENGTH(kptr), %esi	// Get byte key length * 16
 
 #else
 	// OpenSolaris OS interface
 	sub	$[4*8], %rsp	// Make room on stack to save registers
 	mov	%rcx, (%rsp)	// Save output pointer (P4) on stack
 	mov	%rdi, %r8	// context (P1)
 	mov	%rdx, %rdi	// P3: save input pointer
 	shl	$4, %esi	// P2: esi byte key length * 16
 
 	mov	%rbx, 1*8(%rsp)	// Save registers
 	mov	%rbp, 2*8(%rsp)
 	mov	%r12, 3*8(%rsp)
 	// P1: context in r8
 	// P2: byte key length * 16 in esi
 	// P3: input pointer in rdi
 	// P4: output pointer in (rsp)
 #endif	/* GLADMAN_INTERFACE */
 
 	lea	dec_tab(%rip), tptr
 	sub	$rofs, kptr
 
 	// Load input block into registers
 	mov	(%rdi), %eax
 	mov	1*4(%rdi), %ebx
 	mov	2*4(%rdi), %ecx
 	mov	3*4(%rdi), %edx
 
 #ifdef AES_REV_DKS
 	mov	kptr, %rdi
 	lea	(kptr,%rsi), kptr
 #else
 	lea	(kptr,%rsi), %rdi
 #endif
 
 	xor	rofs(%rdi), %eax
 	xor	rofs+4(%rdi), %ebx
 	xor	rofs+8(%rdi), %ecx
 	xor	rofs+12(%rdi), %edx
 
 	// Jump based on byte key length * 16:
 	cmp	$[10*16], %esi
 	je	3f
 	cmp	$[12*16], %esi
 	je	2f
 	cmp	$[14*16], %esi
 	je	1f
 	mov	$-1, %rax	// error
 	jmp	4f
 
 	// Perform normal inverse rounds
 1:	ii_rnd(%r9d, %r10d, %r11d, %r12d, 13)
 	ii_rnd(%r9d, %r10d, %r11d, %r12d, 12)
 2:	ii_rnd(%r9d, %r10d, %r11d, %r12d, 11)
 	ii_rnd(%r9d, %r10d, %r11d, %r12d, 10)
 3:	ii_rnd(%r9d, %r10d, %r11d, %r12d,  9)
 	ii_rnd(%r9d, %r10d, %r11d, %r12d,  8)
 	ii_rnd(%r9d, %r10d, %r11d, %r12d,  7)
 	ii_rnd(%r9d, %r10d, %r11d, %r12d,  6)
 	ii_rnd(%r9d, %r10d, %r11d, %r12d,  5)
 	ii_rnd(%r9d, %r10d, %r11d, %r12d,  4)
 	ii_rnd(%r9d, %r10d, %r11d, %r12d,  3)
 	ii_rnd(%r9d, %r10d, %r11d, %r12d,  2)
 	ii_rnd(%r9d, %r10d, %r11d, %r12d,  1)
 	il_rnd(%r9d, %r10d, %r11d, %r12d,  0)
 
 	// Copy results
 	mov	(%rsp), %rbx
 	mov	%r9d, (%rbx)
 	mov	%r10d, 4(%rbx)
 	mov	%r11d, 8(%rbx)
 	mov	%r12d, 12(%rbx)
 	xor	%rax, %rax
 4:	// Restore registers
 	mov	1*8(%rsp), %rbx
 	mov	2*8(%rsp), %rbp
 	mov	3*8(%rsp), %r12
 	add	$[4*8], %rsp
 	ret
 
 	SET_SIZE(aes_decrypt_amd64)
 #endif	/* lint || __lint */
 
 #ifdef __ELF__
 .section .note.GNU-stack,"",%progbits
 #endif
diff --git a/module/icp/asm-x86_64/modes/gcm_pclmulqdq.S b/module/icp/asm-x86_64/modes/gcm_pclmulqdq.S
index 59edc4c8d56c..0e1e04b78c5d 100644
--- a/module/icp/asm-x86_64/modes/gcm_pclmulqdq.S
+++ b/module/icp/asm-x86_64/modes/gcm_pclmulqdq.S
@@ -1,254 +1,254 @@
 /*
  * CDDL HEADER START
  *
  * The contents of this file are subject to the terms of the
  * Common Development and Distribution License (the "License").
  * You may not use this file except in compliance with the License.
  *
  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
  * or http://www.opensolaris.org/os/licensing.
  * See the License for the specific language governing permissions
  * and limitations under the License.
  *
  * When distributing Covered Code, include this CDDL HEADER in each
  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  * If applicable, add the following below this CDDL HEADER, with the
  * fields enclosed by brackets "[]" replaced with your own identifying
  * information: Portions Copyright [yyyy] [name of copyright owner]
  *
  * CDDL HEADER END
  */
 
 /*
  * Copyright (c) 2009 Intel Corporation
  * All Rights Reserved.
  */
 /*
  * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
  * Use is subject to license terms.
  */
 
 /*
  * Accelerated GHASH implementation with Intel PCLMULQDQ-NI
  * instructions.  This file contains an accelerated
  * Galois Field Multiplication implementation.
  *
  * PCLMULQDQ is used to accelerate the most time-consuming part of GHASH,
  * carry-less multiplication. More information about PCLMULQDQ can be
  * found at:
  * http://software.intel.com/en-us/articles/
  * carry-less-multiplication-and-its-usage-for-computing-the-gcm-mode/
  *
  */
 
 /*
  * ====================================================================
  * OpenSolaris OS modifications
  *
  * This source originates as file galois_hash_asm.c from
  * Intel Corporation dated September 21, 2009.
  *
  * This OpenSolaris version has these major changes from the original source:
  *
  * 1. Added OpenSolaris ENTRY_NP/SET_SIZE macros from
  * /usr/include/sys/asm_linkage.h, lint(1B) guards, and a dummy C function
  * definition for lint.
  *
  * 2. Formatted code, added comments, and added #includes and #defines.
  *
  * 3. If bit CR0.TS is set, clear and set the TS bit, after and before
  * calling kpreempt_disable() and kpreempt_enable().
  * If the TS bit is not set, Save and restore %xmm registers at the beginning
  * and end of function calls (%xmm* registers are not saved and restored by
  * during kernel thread preemption).
  *
  * 4. Removed code to perform hashing.  This is already done with C macro
  * GHASH in gcm.c.  For better performance, this removed code should be
  * reintegrated in the future to replace the C GHASH macro.
  *
  * 5. Added code to byte swap 16-byte input and output.
  *
  * 6. Folded in comments from the original C source with embedded assembly
  * (SB_w_shift_xor.c)
  *
  * 7. Renamed function and reordered parameters to match OpenSolaris:
  * Intel interface:
  *	void galois_hash_asm(unsigned char *hk, unsigned char *s,
  *		unsigned char *d, int length)
  * OpenSolaris OS interface:
  *	void gcm_mul_pclmulqdq(uint64_t *x_in, uint64_t *y, uint64_t *res);
  * ====================================================================
  */
 
 
 #if defined(lint) || defined(__lint)	/* lint */
 
 #include <sys/types.h>
 
-/* ARGSUSED */
 void
 gcm_mul_pclmulqdq(uint64_t *x_in, uint64_t *y, uint64_t *res) {
+	(void) x_in, (void) y, (void) res;
 }
 
 #elif defined(HAVE_PCLMULQDQ)	/* guard by instruction set */
 
 #define _ASM
 #include <sys/asm_linkage.h>
 
 /*
  * Use this mask to byte-swap a 16-byte integer with the pshufb instruction
  */
 
 // static uint8_t byte_swap16_mask[] = {
 //	 15, 14, 13, 12, 11, 10, 9, 8, 7, 6 ,5, 4, 3, 2, 1, 0 };
 .data
 .align XMM_ALIGN
 .Lbyte_swap16_mask:
 	.byte	15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0
 
 
 /*
  * void gcm_mul_pclmulqdq(uint64_t *x_in, uint64_t *y, uint64_t *res);
  *
  * Perform a carry-less multiplication (that is, use XOR instead of the
  * multiply operator) on P1 and P2 and place the result in P3.
  *
  * Byte swap the input and the output.
  *
  * Note: x_in, y, and res all point to a block of 20-byte numbers
  * (an array of two 64-bit integers).
  *
  * Note2: For kernel code, caller is responsible for ensuring
  * kpreempt_disable() has been called.  This is because %xmm registers are
  * not saved/restored.  Clear and set the CR0.TS bit on entry and exit,
  * respectively, if TS is set on entry.  Otherwise, if TS is not set,
  * save and restore %xmm registers on the stack.
  *
  * Note3: Original Intel definition:
  * void galois_hash_asm(unsigned char *hk, unsigned char *s,
  *	unsigned char *d, int length)
  *
  * Note4: Register/parameter mapping:
  * Intel:
  *	Parameter 1: %rcx (copied to %xmm0)	hk or x_in
  *	Parameter 2: %rdx (copied to %xmm1)	s or y
  *	Parameter 3: %rdi (result)		d or res
  * OpenSolaris:
  *	Parameter 1: %rdi (copied to %xmm0)	x_in
  *	Parameter 2: %rsi (copied to %xmm1)	y
  *	Parameter 3: %rdx (result)		res
  */
 
 ENTRY_NP(gcm_mul_pclmulqdq)
 	//
 	// Copy Parameters
 	//
 	movdqu	(%rdi), %xmm0	// P1
 	movdqu	(%rsi), %xmm1	// P2
 
 	//
 	// Byte swap 16-byte input
 	//
 	lea	.Lbyte_swap16_mask(%rip), %rax
 	movups	(%rax), %xmm10
 	pshufb	%xmm10, %xmm0
 	pshufb	%xmm10, %xmm1
 
 
 	//
 	// Multiply with the hash key
 	//
 	movdqu	%xmm0, %xmm3
 	pclmulqdq $0, %xmm1, %xmm3	// xmm3 holds a0*b0
 
 	movdqu	%xmm0, %xmm4
 	pclmulqdq $16, %xmm1, %xmm4	// xmm4 holds a0*b1
 
 	movdqu	%xmm0, %xmm5
 	pclmulqdq $1, %xmm1, %xmm5	// xmm5 holds a1*b0
 	movdqu	%xmm0, %xmm6
 	pclmulqdq $17, %xmm1, %xmm6	// xmm6 holds a1*b1
 
 	pxor	%xmm5, %xmm4	// xmm4 holds a0*b1 + a1*b0
 
 	movdqu	%xmm4, %xmm5	// move the contents of xmm4 to xmm5
 	psrldq	$8, %xmm4	// shift by xmm4 64 bits to the right
 	pslldq	$8, %xmm5	// shift by xmm5 64 bits to the left
 	pxor	%xmm5, %xmm3
 	pxor	%xmm4, %xmm6	// Register pair <xmm6:xmm3> holds the result
 				// of the carry-less multiplication of
 				// xmm0 by xmm1.
 
 	// We shift the result of the multiplication by one bit position
 	// to the left to cope for the fact that the bits are reversed.
 	movdqu	%xmm3, %xmm7
 	movdqu	%xmm6, %xmm8
 	pslld	$1, %xmm3
 	pslld	$1, %xmm6
 	psrld	$31, %xmm7
 	psrld	$31, %xmm8
 	movdqu	%xmm7, %xmm9
 	pslldq	$4, %xmm8
 	pslldq	$4, %xmm7
 	psrldq	$12, %xmm9
 	por	%xmm7, %xmm3
 	por	%xmm8, %xmm6
 	por	%xmm9, %xmm6
 
 	//
 	// First phase of the reduction
 	//
 	// Move xmm3 into xmm7, xmm8, xmm9 in order to perform the shifts
 	// independently.
 	movdqu	%xmm3, %xmm7
 	movdqu	%xmm3, %xmm8
 	movdqu	%xmm3, %xmm9
 	pslld	$31, %xmm7	// packed right shift shifting << 31
 	pslld	$30, %xmm8	// packed right shift shifting << 30
 	pslld	$25, %xmm9	// packed right shift shifting << 25
 	pxor	%xmm8, %xmm7	// xor the shifted versions
 	pxor	%xmm9, %xmm7
 	movdqu	%xmm7, %xmm8
 	pslldq	$12, %xmm7
 	psrldq	$4, %xmm8
 	pxor	%xmm7, %xmm3	// first phase of the reduction complete
 
 	//
 	// Second phase of the reduction
 	//
 	// Make 3 copies of xmm3 in xmm2, xmm4, xmm5 for doing these
 	// shift operations.
 	movdqu	%xmm3, %xmm2
 	movdqu	%xmm3, %xmm4	// packed left shifting >> 1
 	movdqu	%xmm3, %xmm5
 	psrld	$1, %xmm2
 	psrld	$2, %xmm4	// packed left shifting >> 2
 	psrld	$7, %xmm5	// packed left shifting >> 7
 	pxor	%xmm4, %xmm2	// xor the shifted versions
 	pxor	%xmm5, %xmm2
 	pxor	%xmm8, %xmm2
 	pxor	%xmm2, %xmm3
 	pxor	%xmm3, %xmm6	// the result is in xmm6
 
 	//
 	// Byte swap 16-byte result
 	//
 	pshufb	%xmm10, %xmm6	// %xmm10 has the swap mask
 
 	//
 	// Store the result
 	//
 	movdqu	%xmm6, (%rdx)	// P3
 
 
 	//
 	// Return
 	//
 	ret
 	SET_SIZE(gcm_mul_pclmulqdq)
 
 #endif	/* lint || __lint */
 
 #ifdef __ELF__
 .section .note.GNU-stack,"",%progbits
 #endif
diff --git a/module/icp/asm-x86_64/sha1/sha1-x86_64.S b/module/icp/asm-x86_64/sha1/sha1-x86_64.S
index fc844cd8c74f..1d65e818dbfd 100644
--- a/module/icp/asm-x86_64/sha1/sha1-x86_64.S
+++ b/module/icp/asm-x86_64/sha1/sha1-x86_64.S
@@ -1,1369 +1,1369 @@
 /*
  * !/usr/bin/env perl
  *
  *  ====================================================================
  *  Written by Andy Polyakov <appro@fy.chalmers.se> for the OpenSSL
  *  project. The module is, however, dual licensed under OpenSSL and
  *  CRYPTOGAMS licenses depending on where you obtain it. For further
  *  details see http://www.openssl.org/~appro/cryptogams/.
  *  ====================================================================
  *
  *  sha1_block procedure for x86_64.
  *
  *  It was brought to my attention that on EM64T compiler-generated code
  *  was far behind 32-bit assembler implementation. This is unlike on
  *  Opteron where compiler-generated code was only 15% behind 32-bit
  *  assembler, which originally made it hard to motivate the effort.
  *  There was suggestion to mechanically translate 32-bit code, but I
  *  dismissed it, reasoning that x86_64 offers enough register bank
  *  capacity to fully utilize SHA-1 parallelism. Therefore this fresh
  *  implementation:-) However! While 64-bit code does performs better
  *  on Opteron, I failed to beat 32-bit assembler on EM64T core. Well,
  *  x86_64 does offer larger *addressable* bank, but out-of-order core
  *  reaches for even more registers through dynamic aliasing, and EM64T
  *  core must have managed to run-time optimize even 32-bit code just as
  *  good as 64-bit one. Performance improvement is summarized in the
  *  following table:
  *
  * 		gcc 3.4		32-bit asm	cycles/byte
  *  Opteron	+45%		+20%		6.8
  *  Xeon P4	+65%		+0%		9.9
  *  Core2		+60%		+10%		7.0
  *
  *
  *  OpenSolaris OS modifications
  *
  *  Sun elects to use this software under the BSD license.
  *
  *  This source originates from OpenSSL file sha1-x86_64.pl at
  *  ftp://ftp.openssl.org/snapshot/openssl-0.9.8-stable-SNAP-20080131.tar.gz
  *  (presumably for future OpenSSL release 0.9.8h), with these changes:
  *
  *  1. Added perl "use strict" and declared variables.
  *
  *  2. Added OpenSolaris ENTRY_NP/SET_SIZE macros from
  *  /usr/include/sys/asm_linkage.h, .ident keywords, and lint(1B) guards.
  *
  *  3. Removed x86_64-xlate.pl script (not needed for as(1) or gas(1)
  *  assemblers).
  *
  */
 
 /*
  * This file was generated by a perl script (sha1-x86_64.pl). The comments from
  * the original file have been pasted above.
  */
 
 #if defined(lint) || defined(__lint)
 #include <sys/stdint.h>
 #include <sys/sha1.h>
 
 
-/* ARGSUSED */
 void
 sha1_block_data_order(SHA1_CTX *ctx, const void *inpp, size_t blocks)
 {
+	(void) ctx, (void) inpp, (void) blocks;
 }
 
 #else
 #define _ASM
 #include <sys/asm_linkage.h>
 ENTRY_NP(sha1_block_data_order)
 .cfi_startproc
 	mov	%rsp,%rax
 .cfi_def_cfa_register %rax
 	push	%rbx
 .cfi_offset	%rbx,-16
 	push	%rbp
 .cfi_offset	%rbp,-24
 	push	%r12
 .cfi_offset	%r12,-32
 	mov	%rdi,%r8	# reassigned argument
 .cfi_register	%rdi, %r8
 	sub	$72,%rsp
 	mov	%rsi,%r9	# reassigned argument
 .cfi_register	%rsi, %r9
 	and	$-64,%rsp
 	mov	%rdx,%r10	# reassigned argument
 .cfi_register	%rdx, %r10
 	mov	%rax,64(%rsp)
 # echo ".cfi_cfa_expression %rsp+64,deref,+8" |
 #	openssl/crypto/perlasm/x86_64-xlate.pl
 .cfi_escape	0x0f,0x06,0x77,0xc0,0x00,0x06,0x23,0x08
 
 	mov	0(%r8),%edx
 	mov	4(%r8),%esi
 	mov	8(%r8),%edi
 	mov	12(%r8),%ebp
 	mov	16(%r8),%r11d
 .align	4
 .Lloop:
 	mov	0(%r9),%eax
 	bswap	%eax
 	mov	%eax,0(%rsp)
 	lea	0x5a827999(%eax,%r11d),%r12d
 	mov	%edi,%ebx
 	mov	4(%r9),%eax
 	mov	%edx,%r11d
 	xor	%ebp,%ebx
 	bswap	%eax
 	rol	$5,%r11d
 	and	%esi,%ebx
 	mov	%eax,4(%rsp)
 	add	%r11d,%r12d
 	xor	%ebp,%ebx
 	rol	$30,%esi
 	add	%ebx,%r12d
 	lea	0x5a827999(%eax,%ebp),%r11d
 	mov	%esi,%ebx
 	mov	8(%r9),%eax
 	mov	%r12d,%ebp
 	xor	%edi,%ebx
 	bswap	%eax
 	rol	$5,%ebp
 	and	%edx,%ebx
 	mov	%eax,8(%rsp)
 	add	%ebp,%r11d
 	xor	%edi,%ebx
 	rol	$30,%edx
 	add	%ebx,%r11d
 	lea	0x5a827999(%eax,%edi),%ebp
 	mov	%edx,%ebx
 	mov	12(%r9),%eax
 	mov	%r11d,%edi
 	xor	%esi,%ebx
 	bswap	%eax
 	rol	$5,%edi
 	and	%r12d,%ebx
 	mov	%eax,12(%rsp)
 	add	%edi,%ebp
 	xor	%esi,%ebx
 	rol	$30,%r12d
 	add	%ebx,%ebp
 	lea	0x5a827999(%eax,%esi),%edi
 	mov	%r12d,%ebx
 	mov	16(%r9),%eax
 	mov	%ebp,%esi
 	xor	%edx,%ebx
 	bswap	%eax
 	rol	$5,%esi
 	and	%r11d,%ebx
 	mov	%eax,16(%rsp)
 	add	%esi,%edi
 	xor	%edx,%ebx
 	rol	$30,%r11d
 	add	%ebx,%edi
 	lea	0x5a827999(%eax,%edx),%esi
 	mov	%r11d,%ebx
 	mov	20(%r9),%eax
 	mov	%edi,%edx
 	xor	%r12d,%ebx
 	bswap	%eax
 	rol	$5,%edx
 	and	%ebp,%ebx
 	mov	%eax,20(%rsp)
 	add	%edx,%esi
 	xor	%r12d,%ebx
 	rol	$30,%ebp
 	add	%ebx,%esi
 	lea	0x5a827999(%eax,%r12d),%edx
 	mov	%ebp,%ebx
 	mov	24(%r9),%eax
 	mov	%esi,%r12d
 	xor	%r11d,%ebx
 	bswap	%eax
 	rol	$5,%r12d
 	and	%edi,%ebx
 	mov	%eax,24(%rsp)
 	add	%r12d,%edx
 	xor	%r11d,%ebx
 	rol	$30,%edi
 	add	%ebx,%edx
 	lea	0x5a827999(%eax,%r11d),%r12d
 	mov	%edi,%ebx
 	mov	28(%r9),%eax
 	mov	%edx,%r11d
 	xor	%ebp,%ebx
 	bswap	%eax
 	rol	$5,%r11d
 	and	%esi,%ebx
 	mov	%eax,28(%rsp)
 	add	%r11d,%r12d
 	xor	%ebp,%ebx
 	rol	$30,%esi
 	add	%ebx,%r12d
 	lea	0x5a827999(%eax,%ebp),%r11d
 	mov	%esi,%ebx
 	mov	32(%r9),%eax
 	mov	%r12d,%ebp
 	xor	%edi,%ebx
 	bswap	%eax
 	rol	$5,%ebp
 	and	%edx,%ebx
 	mov	%eax,32(%rsp)
 	add	%ebp,%r11d
 	xor	%edi,%ebx
 	rol	$30,%edx
 	add	%ebx,%r11d
 	lea	0x5a827999(%eax,%edi),%ebp
 	mov	%edx,%ebx
 	mov	36(%r9),%eax
 	mov	%r11d,%edi
 	xor	%esi,%ebx
 	bswap	%eax
 	rol	$5,%edi
 	and	%r12d,%ebx
 	mov	%eax,36(%rsp)
 	add	%edi,%ebp
 	xor	%esi,%ebx
 	rol	$30,%r12d
 	add	%ebx,%ebp
 	lea	0x5a827999(%eax,%esi),%edi
 	mov	%r12d,%ebx
 	mov	40(%r9),%eax
 	mov	%ebp,%esi
 	xor	%edx,%ebx
 	bswap	%eax
 	rol	$5,%esi
 	and	%r11d,%ebx
 	mov	%eax,40(%rsp)
 	add	%esi,%edi
 	xor	%edx,%ebx
 	rol	$30,%r11d
 	add	%ebx,%edi
 	lea	0x5a827999(%eax,%edx),%esi
 	mov	%r11d,%ebx
 	mov	44(%r9),%eax
 	mov	%edi,%edx
 	xor	%r12d,%ebx
 	bswap	%eax
 	rol	$5,%edx
 	and	%ebp,%ebx
 	mov	%eax,44(%rsp)
 	add	%edx,%esi
 	xor	%r12d,%ebx
 	rol	$30,%ebp
 	add	%ebx,%esi
 	lea	0x5a827999(%eax,%r12d),%edx
 	mov	%ebp,%ebx
 	mov	48(%r9),%eax
 	mov	%esi,%r12d
 	xor	%r11d,%ebx
 	bswap	%eax
 	rol	$5,%r12d
 	and	%edi,%ebx
 	mov	%eax,48(%rsp)
 	add	%r12d,%edx
 	xor	%r11d,%ebx
 	rol	$30,%edi
 	add	%ebx,%edx
 	lea	0x5a827999(%eax,%r11d),%r12d
 	mov	%edi,%ebx
 	mov	52(%r9),%eax
 	mov	%edx,%r11d
 	xor	%ebp,%ebx
 	bswap	%eax
 	rol	$5,%r11d
 	and	%esi,%ebx
 	mov	%eax,52(%rsp)
 	add	%r11d,%r12d
 	xor	%ebp,%ebx
 	rol	$30,%esi
 	add	%ebx,%r12d
 	lea	0x5a827999(%eax,%ebp),%r11d
 	mov	%esi,%ebx
 	mov	56(%r9),%eax
 	mov	%r12d,%ebp
 	xor	%edi,%ebx
 	bswap	%eax
 	rol	$5,%ebp
 	and	%edx,%ebx
 	mov	%eax,56(%rsp)
 	add	%ebp,%r11d
 	xor	%edi,%ebx
 	rol	$30,%edx
 	add	%ebx,%r11d
 	lea	0x5a827999(%eax,%edi),%ebp
 	mov	%edx,%ebx
 	mov	60(%r9),%eax
 	mov	%r11d,%edi
 	xor	%esi,%ebx
 	bswap	%eax
 	rol	$5,%edi
 	and	%r12d,%ebx
 	mov	%eax,60(%rsp)
 	add	%edi,%ebp
 	xor	%esi,%ebx
 	rol	$30,%r12d
 	add	%ebx,%ebp
 	lea	0x5a827999(%eax,%esi),%edi
 	mov	0(%rsp),%eax
 	mov	%r12d,%ebx
 	mov	%ebp,%esi
 	xor	8(%rsp),%eax
 	xor	%edx,%ebx
 	rol	$5,%esi
 	xor	32(%rsp),%eax
 	and	%r11d,%ebx
 	add	%esi,%edi
 	xor	52(%rsp),%eax
 	xor	%edx,%ebx
 	rol	$30,%r11d
 	add	%ebx,%edi
 	rol	$1,%eax
 	mov	%eax,0(%rsp)
 	lea	0x5a827999(%eax,%edx),%esi
 	mov	4(%rsp),%eax
 	mov	%r11d,%ebx
 	mov	%edi,%edx
 	xor	12(%rsp),%eax
 	xor	%r12d,%ebx
 	rol	$5,%edx
 	xor	36(%rsp),%eax
 	and	%ebp,%ebx
 	add	%edx,%esi
 	xor	56(%rsp),%eax
 	xor	%r12d,%ebx
 	rol	$30,%ebp
 	add	%ebx,%esi
 	rol	$1,%eax
 	mov	%eax,4(%rsp)
 	lea	0x5a827999(%eax,%r12d),%edx
 	mov	8(%rsp),%eax
 	mov	%ebp,%ebx
 	mov	%esi,%r12d
 	xor	16(%rsp),%eax
 	xor	%r11d,%ebx
 	rol	$5,%r12d
 	xor	40(%rsp),%eax
 	and	%edi,%ebx
 	add	%r12d,%edx
 	xor	60(%rsp),%eax
 	xor	%r11d,%ebx
 	rol	$30,%edi
 	add	%ebx,%edx
 	rol	$1,%eax
 	mov	%eax,8(%rsp)
 	lea	0x5a827999(%eax,%r11d),%r12d
 	mov	12(%rsp),%eax
 	mov	%edi,%ebx
 	mov	%edx,%r11d
 	xor	20(%rsp),%eax
 	xor	%ebp,%ebx
 	rol	$5,%r11d
 	xor	44(%rsp),%eax
 	and	%esi,%ebx
 	add	%r11d,%r12d
 	xor	0(%rsp),%eax
 	xor	%ebp,%ebx
 	rol	$30,%esi
 	add	%ebx,%r12d
 	rol	$1,%eax
 	mov	%eax,12(%rsp)
 	lea	0x5a827999(%eax,%ebp),%r11d
 	mov	16(%rsp),%eax
 	mov	%esi,%ebx
 	mov	%r12d,%ebp
 	xor	24(%rsp),%eax
 	xor	%edi,%ebx
 	rol	$5,%ebp
 	xor	48(%rsp),%eax
 	and	%edx,%ebx
 	add	%ebp,%r11d
 	xor	4(%rsp),%eax
 	xor	%edi,%ebx
 	rol	$30,%edx
 	add	%ebx,%r11d
 	rol	$1,%eax
 	mov	%eax,16(%rsp)
 	lea	0x6ed9eba1(%eax,%edi),%ebp
 	mov	20(%rsp),%eax
 	mov	%edx,%ebx
 	mov	%r11d,%edi
 	xor	28(%rsp),%eax
 	xor	%r12d,%ebx
 	rol	$5,%edi
 	xor	52(%rsp),%eax
 	xor	%esi,%ebx
 	add	%edi,%ebp
 	xor	8(%rsp),%eax
 	rol	$30,%r12d
 	add	%ebx,%ebp
 	rol	$1,%eax
 	mov	%eax,20(%rsp)
 	lea	0x6ed9eba1(%eax,%esi),%edi
 	mov	24(%rsp),%eax
 	mov	%r12d,%ebx
 	mov	%ebp,%esi
 	xor	32(%rsp),%eax
 	xor	%r11d,%ebx
 	rol	$5,%esi
 	xor	56(%rsp),%eax
 	xor	%edx,%ebx
 	add	%esi,%edi
 	xor	12(%rsp),%eax
 	rol	$30,%r11d
 	add	%ebx,%edi
 	rol	$1,%eax
 	mov	%eax,24(%rsp)
 	lea	0x6ed9eba1(%eax,%edx),%esi
 	mov	28(%rsp),%eax
 	mov	%r11d,%ebx
 	mov	%edi,%edx
 	xor	36(%rsp),%eax
 	xor	%ebp,%ebx
 	rol	$5,%edx
 	xor	60(%rsp),%eax
 	xor	%r12d,%ebx
 	add	%edx,%esi
 	xor	16(%rsp),%eax
 	rol	$30,%ebp
 	add	%ebx,%esi
 	rol	$1,%eax
 	mov	%eax,28(%rsp)
 	lea	0x6ed9eba1(%eax,%r12d),%edx
 	mov	32(%rsp),%eax
 	mov	%ebp,%ebx
 	mov	%esi,%r12d
 	xor	40(%rsp),%eax
 	xor	%edi,%ebx
 	rol	$5,%r12d
 	xor	0(%rsp),%eax
 	xor	%r11d,%ebx
 	add	%r12d,%edx
 	xor	20(%rsp),%eax
 	rol	$30,%edi
 	add	%ebx,%edx
 	rol	$1,%eax
 	mov	%eax,32(%rsp)
 	lea	0x6ed9eba1(%eax,%r11d),%r12d
 	mov	36(%rsp),%eax
 	mov	%edi,%ebx
 	mov	%edx,%r11d
 	xor	44(%rsp),%eax
 	xor	%esi,%ebx
 	rol	$5,%r11d
 	xor	4(%rsp),%eax
 	xor	%ebp,%ebx
 	add	%r11d,%r12d
 	xor	24(%rsp),%eax
 	rol	$30,%esi
 	add	%ebx,%r12d
 	rol	$1,%eax
 	mov	%eax,36(%rsp)
 	lea	0x6ed9eba1(%eax,%ebp),%r11d
 	mov	40(%rsp),%eax
 	mov	%esi,%ebx
 	mov	%r12d,%ebp
 	xor	48(%rsp),%eax
 	xor	%edx,%ebx
 	rol	$5,%ebp
 	xor	8(%rsp),%eax
 	xor	%edi,%ebx
 	add	%ebp,%r11d
 	xor	28(%rsp),%eax
 	rol	$30,%edx
 	add	%ebx,%r11d
 	rol	$1,%eax
 	mov	%eax,40(%rsp)
 	lea	0x6ed9eba1(%eax,%edi),%ebp
 	mov	44(%rsp),%eax
 	mov	%edx,%ebx
 	mov	%r11d,%edi
 	xor	52(%rsp),%eax
 	xor	%r12d,%ebx
 	rol	$5,%edi
 	xor	12(%rsp),%eax
 	xor	%esi,%ebx
 	add	%edi,%ebp
 	xor	32(%rsp),%eax
 	rol	$30,%r12d
 	add	%ebx,%ebp
 	rol	$1,%eax
 	mov	%eax,44(%rsp)
 	lea	0x6ed9eba1(%eax,%esi),%edi
 	mov	48(%rsp),%eax
 	mov	%r12d,%ebx
 	mov	%ebp,%esi
 	xor	56(%rsp),%eax
 	xor	%r11d,%ebx
 	rol	$5,%esi
 	xor	16(%rsp),%eax
 	xor	%edx,%ebx
 	add	%esi,%edi
 	xor	36(%rsp),%eax
 	rol	$30,%r11d
 	add	%ebx,%edi
 	rol	$1,%eax
 	mov	%eax,48(%rsp)
 	lea	0x6ed9eba1(%eax,%edx),%esi
 	mov	52(%rsp),%eax
 	mov	%r11d,%ebx
 	mov	%edi,%edx
 	xor	60(%rsp),%eax
 	xor	%ebp,%ebx
 	rol	$5,%edx
 	xor	20(%rsp),%eax
 	xor	%r12d,%ebx
 	add	%edx,%esi
 	xor	40(%rsp),%eax
 	rol	$30,%ebp
 	add	%ebx,%esi
 	rol	$1,%eax
 	mov	%eax,52(%rsp)
 	lea	0x6ed9eba1(%eax,%r12d),%edx
 	mov	56(%rsp),%eax
 	mov	%ebp,%ebx
 	mov	%esi,%r12d
 	xor	0(%rsp),%eax
 	xor	%edi,%ebx
 	rol	$5,%r12d
 	xor	24(%rsp),%eax
 	xor	%r11d,%ebx
 	add	%r12d,%edx
 	xor	44(%rsp),%eax
 	rol	$30,%edi
 	add	%ebx,%edx
 	rol	$1,%eax
 	mov	%eax,56(%rsp)
 	lea	0x6ed9eba1(%eax,%r11d),%r12d
 	mov	60(%rsp),%eax
 	mov	%edi,%ebx
 	mov	%edx,%r11d
 	xor	4(%rsp),%eax
 	xor	%esi,%ebx
 	rol	$5,%r11d
 	xor	28(%rsp),%eax
 	xor	%ebp,%ebx
 	add	%r11d,%r12d
 	xor	48(%rsp),%eax
 	rol	$30,%esi
 	add	%ebx,%r12d
 	rol	$1,%eax
 	mov	%eax,60(%rsp)
 	lea	0x6ed9eba1(%eax,%ebp),%r11d
 	mov	0(%rsp),%eax
 	mov	%esi,%ebx
 	mov	%r12d,%ebp
 	xor	8(%rsp),%eax
 	xor	%edx,%ebx
 	rol	$5,%ebp
 	xor	32(%rsp),%eax
 	xor	%edi,%ebx
 	add	%ebp,%r11d
 	xor	52(%rsp),%eax
 	rol	$30,%edx
 	add	%ebx,%r11d
 	rol	$1,%eax
 	mov	%eax,0(%rsp)
 	lea	0x6ed9eba1(%eax,%edi),%ebp
 	mov	4(%rsp),%eax
 	mov	%edx,%ebx
 	mov	%r11d,%edi
 	xor	12(%rsp),%eax
 	xor	%r12d,%ebx
 	rol	$5,%edi
 	xor	36(%rsp),%eax
 	xor	%esi,%ebx
 	add	%edi,%ebp
 	xor	56(%rsp),%eax
 	rol	$30,%r12d
 	add	%ebx,%ebp
 	rol	$1,%eax
 	mov	%eax,4(%rsp)
 	lea	0x6ed9eba1(%eax,%esi),%edi
 	mov	8(%rsp),%eax
 	mov	%r12d,%ebx
 	mov	%ebp,%esi
 	xor	16(%rsp),%eax
 	xor	%r11d,%ebx
 	rol	$5,%esi
 	xor	40(%rsp),%eax
 	xor	%edx,%ebx
 	add	%esi,%edi
 	xor	60(%rsp),%eax
 	rol	$30,%r11d
 	add	%ebx,%edi
 	rol	$1,%eax
 	mov	%eax,8(%rsp)
 	lea	0x6ed9eba1(%eax,%edx),%esi
 	mov	12(%rsp),%eax
 	mov	%r11d,%ebx
 	mov	%edi,%edx
 	xor	20(%rsp),%eax
 	xor	%ebp,%ebx
 	rol	$5,%edx
 	xor	44(%rsp),%eax
 	xor	%r12d,%ebx
 	add	%edx,%esi
 	xor	0(%rsp),%eax
 	rol	$30,%ebp
 	add	%ebx,%esi
 	rol	$1,%eax
 	mov	%eax,12(%rsp)
 	lea	0x6ed9eba1(%eax,%r12d),%edx
 	mov	16(%rsp),%eax
 	mov	%ebp,%ebx
 	mov	%esi,%r12d
 	xor	24(%rsp),%eax
 	xor	%edi,%ebx
 	rol	$5,%r12d
 	xor	48(%rsp),%eax
 	xor	%r11d,%ebx
 	add	%r12d,%edx
 	xor	4(%rsp),%eax
 	rol	$30,%edi
 	add	%ebx,%edx
 	rol	$1,%eax
 	mov	%eax,16(%rsp)
 	lea	0x6ed9eba1(%eax,%r11d),%r12d
 	mov	20(%rsp),%eax
 	mov	%edi,%ebx
 	mov	%edx,%r11d
 	xor	28(%rsp),%eax
 	xor	%esi,%ebx
 	rol	$5,%r11d
 	xor	52(%rsp),%eax
 	xor	%ebp,%ebx
 	add	%r11d,%r12d
 	xor	8(%rsp),%eax
 	rol	$30,%esi
 	add	%ebx,%r12d
 	rol	$1,%eax
 	mov	%eax,20(%rsp)
 	lea	0x6ed9eba1(%eax,%ebp),%r11d
 	mov	24(%rsp),%eax
 	mov	%esi,%ebx
 	mov	%r12d,%ebp
 	xor	32(%rsp),%eax
 	xor	%edx,%ebx
 	rol	$5,%ebp
 	xor	56(%rsp),%eax
 	xor	%edi,%ebx
 	add	%ebp,%r11d
 	xor	12(%rsp),%eax
 	rol	$30,%edx
 	add	%ebx,%r11d
 	rol	$1,%eax
 	mov	%eax,24(%rsp)
 	lea	0x6ed9eba1(%eax,%edi),%ebp
 	mov	28(%rsp),%eax
 	mov	%edx,%ebx
 	mov	%r11d,%edi
 	xor	36(%rsp),%eax
 	xor	%r12d,%ebx
 	rol	$5,%edi
 	xor	60(%rsp),%eax
 	xor	%esi,%ebx
 	add	%edi,%ebp
 	xor	16(%rsp),%eax
 	rol	$30,%r12d
 	add	%ebx,%ebp
 	rol	$1,%eax
 	mov	%eax,28(%rsp)
 	lea	0x6ed9eba1(%eax,%esi),%edi
 	mov	32(%rsp),%eax
 	mov	%r12d,%ebx
 	mov	%ebp,%esi
 	xor	40(%rsp),%eax
 	xor	%r11d,%ebx
 	rol	$5,%esi
 	xor	0(%rsp),%eax
 	xor	%edx,%ebx
 	add	%esi,%edi
 	xor	20(%rsp),%eax
 	rol	$30,%r11d
 	add	%ebx,%edi
 	rol	$1,%eax
 	mov	%eax,32(%rsp)
 	lea	-0x70e44324(%eax,%edx),%esi
 	mov	36(%rsp),%eax
 	mov	%ebp,%ebx
 	mov	%ebp,%ecx
 	xor	44(%rsp),%eax
 	mov	%edi,%edx
 	and	%r11d,%ebx
 	xor	4(%rsp),%eax
 	or	%r11d,%ecx
 	rol	$5,%edx
 	xor	24(%rsp),%eax
 	and	%r12d,%ecx
 	add	%edx,%esi
 	rol	$1,%eax
 	or	%ecx,%ebx
 	rol	$30,%ebp
 	mov	%eax,36(%rsp)
 	add	%ebx,%esi
 	lea	-0x70e44324(%eax,%r12d),%edx
 	mov	40(%rsp),%eax
 	mov	%edi,%ebx
 	mov	%edi,%ecx
 	xor	48(%rsp),%eax
 	mov	%esi,%r12d
 	and	%ebp,%ebx
 	xor	8(%rsp),%eax
 	or	%ebp,%ecx
 	rol	$5,%r12d
 	xor	28(%rsp),%eax
 	and	%r11d,%ecx
 	add	%r12d,%edx
 	rol	$1,%eax
 	or	%ecx,%ebx
 	rol	$30,%edi
 	mov	%eax,40(%rsp)
 	add	%ebx,%edx
 	lea	-0x70e44324(%eax,%r11d),%r12d
 	mov	44(%rsp),%eax
 	mov	%esi,%ebx
 	mov	%esi,%ecx
 	xor	52(%rsp),%eax
 	mov	%edx,%r11d
 	and	%edi,%ebx
 	xor	12(%rsp),%eax
 	or	%edi,%ecx
 	rol	$5,%r11d
 	xor	32(%rsp),%eax
 	and	%ebp,%ecx
 	add	%r11d,%r12d
 	rol	$1,%eax
 	or	%ecx,%ebx
 	rol	$30,%esi
 	mov	%eax,44(%rsp)
 	add	%ebx,%r12d
 	lea	-0x70e44324(%eax,%ebp),%r11d
 	mov	48(%rsp),%eax
 	mov	%edx,%ebx
 	mov	%edx,%ecx
 	xor	56(%rsp),%eax
 	mov	%r12d,%ebp
 	and	%esi,%ebx
 	xor	16(%rsp),%eax
 	or	%esi,%ecx
 	rol	$5,%ebp
 	xor	36(%rsp),%eax
 	and	%edi,%ecx
 	add	%ebp,%r11d
 	rol	$1,%eax
 	or	%ecx,%ebx
 	rol	$30,%edx
 	mov	%eax,48(%rsp)
 	add	%ebx,%r11d
 	lea	-0x70e44324(%eax,%edi),%ebp
 	mov	52(%rsp),%eax
 	mov	%r12d,%ebx
 	mov	%r12d,%ecx
 	xor	60(%rsp),%eax
 	mov	%r11d,%edi
 	and	%edx,%ebx
 	xor	20(%rsp),%eax
 	or	%edx,%ecx
 	rol	$5,%edi
 	xor	40(%rsp),%eax
 	and	%esi,%ecx
 	add	%edi,%ebp
 	rol	$1,%eax
 	or	%ecx,%ebx
 	rol	$30,%r12d
 	mov	%eax,52(%rsp)
 	add	%ebx,%ebp
 	lea	-0x70e44324(%eax,%esi),%edi
 	mov	56(%rsp),%eax
 	mov	%r11d,%ebx
 	mov	%r11d,%ecx
 	xor	0(%rsp),%eax
 	mov	%ebp,%esi
 	and	%r12d,%ebx
 	xor	24(%rsp),%eax
 	or	%r12d,%ecx
 	rol	$5,%esi
 	xor	44(%rsp),%eax
 	and	%edx,%ecx
 	add	%esi,%edi
 	rol	$1,%eax
 	or	%ecx,%ebx
 	rol	$30,%r11d
 	mov	%eax,56(%rsp)
 	add	%ebx,%edi
 	lea	-0x70e44324(%eax,%edx),%esi
 	mov	60(%rsp),%eax
 	mov	%ebp,%ebx
 	mov	%ebp,%ecx
 	xor	4(%rsp),%eax
 	mov	%edi,%edx
 	and	%r11d,%ebx
 	xor	28(%rsp),%eax
 	or	%r11d,%ecx
 	rol	$5,%edx
 	xor	48(%rsp),%eax
 	and	%r12d,%ecx
 	add	%edx,%esi
 	rol	$1,%eax
 	or	%ecx,%ebx
 	rol	$30,%ebp
 	mov	%eax,60(%rsp)
 	add	%ebx,%esi
 	lea	-0x70e44324(%eax,%r12d),%edx
 	mov	0(%rsp),%eax
 	mov	%edi,%ebx
 	mov	%edi,%ecx
 	xor	8(%rsp),%eax
 	mov	%esi,%r12d
 	and	%ebp,%ebx
 	xor	32(%rsp),%eax
 	or	%ebp,%ecx
 	rol	$5,%r12d
 	xor	52(%rsp),%eax
 	and	%r11d,%ecx
 	add	%r12d,%edx
 	rol	$1,%eax
 	or	%ecx,%ebx
 	rol	$30,%edi
 	mov	%eax,0(%rsp)
 	add	%ebx,%edx
 	lea	-0x70e44324(%eax,%r11d),%r12d
 	mov	4(%rsp),%eax
 	mov	%esi,%ebx
 	mov	%esi,%ecx
 	xor	12(%rsp),%eax
 	mov	%edx,%r11d
 	and	%edi,%ebx
 	xor	36(%rsp),%eax
 	or	%edi,%ecx
 	rol	$5,%r11d
 	xor	56(%rsp),%eax
 	and	%ebp,%ecx
 	add	%r11d,%r12d
 	rol	$1,%eax
 	or	%ecx,%ebx
 	rol	$30,%esi
 	mov	%eax,4(%rsp)
 	add	%ebx,%r12d
 	lea	-0x70e44324(%eax,%ebp),%r11d
 	mov	8(%rsp),%eax
 	mov	%edx,%ebx
 	mov	%edx,%ecx
 	xor	16(%rsp),%eax
 	mov	%r12d,%ebp
 	and	%esi,%ebx
 	xor	40(%rsp),%eax
 	or	%esi,%ecx
 	rol	$5,%ebp
 	xor	60(%rsp),%eax
 	and	%edi,%ecx
 	add	%ebp,%r11d
 	rol	$1,%eax
 	or	%ecx,%ebx
 	rol	$30,%edx
 	mov	%eax,8(%rsp)
 	add	%ebx,%r11d
 	lea	-0x70e44324(%eax,%edi),%ebp
 	mov	12(%rsp),%eax
 	mov	%r12d,%ebx
 	mov	%r12d,%ecx
 	xor	20(%rsp),%eax
 	mov	%r11d,%edi
 	and	%edx,%ebx
 	xor	44(%rsp),%eax
 	or	%edx,%ecx
 	rol	$5,%edi
 	xor	0(%rsp),%eax
 	and	%esi,%ecx
 	add	%edi,%ebp
 	rol	$1,%eax
 	or	%ecx,%ebx
 	rol	$30,%r12d
 	mov	%eax,12(%rsp)
 	add	%ebx,%ebp
 	lea	-0x70e44324(%eax,%esi),%edi
 	mov	16(%rsp),%eax
 	mov	%r11d,%ebx
 	mov	%r11d,%ecx
 	xor	24(%rsp),%eax
 	mov	%ebp,%esi
 	and	%r12d,%ebx
 	xor	48(%rsp),%eax
 	or	%r12d,%ecx
 	rol	$5,%esi
 	xor	4(%rsp),%eax
 	and	%edx,%ecx
 	add	%esi,%edi
 	rol	$1,%eax
 	or	%ecx,%ebx
 	rol	$30,%r11d
 	mov	%eax,16(%rsp)
 	add	%ebx,%edi
 	lea	-0x70e44324(%eax,%edx),%esi
 	mov	20(%rsp),%eax
 	mov	%ebp,%ebx
 	mov	%ebp,%ecx
 	xor	28(%rsp),%eax
 	mov	%edi,%edx
 	and	%r11d,%ebx
 	xor	52(%rsp),%eax
 	or	%r11d,%ecx
 	rol	$5,%edx
 	xor	8(%rsp),%eax
 	and	%r12d,%ecx
 	add	%edx,%esi
 	rol	$1,%eax
 	or	%ecx,%ebx
 	rol	$30,%ebp
 	mov	%eax,20(%rsp)
 	add	%ebx,%esi
 	lea	-0x70e44324(%eax,%r12d),%edx
 	mov	24(%rsp),%eax
 	mov	%edi,%ebx
 	mov	%edi,%ecx
 	xor	32(%rsp),%eax
 	mov	%esi,%r12d
 	and	%ebp,%ebx
 	xor	56(%rsp),%eax
 	or	%ebp,%ecx
 	rol	$5,%r12d
 	xor	12(%rsp),%eax
 	and	%r11d,%ecx
 	add	%r12d,%edx
 	rol	$1,%eax
 	or	%ecx,%ebx
 	rol	$30,%edi
 	mov	%eax,24(%rsp)
 	add	%ebx,%edx
 	lea	-0x70e44324(%eax,%r11d),%r12d
 	mov	28(%rsp),%eax
 	mov	%esi,%ebx
 	mov	%esi,%ecx
 	xor	36(%rsp),%eax
 	mov	%edx,%r11d
 	and	%edi,%ebx
 	xor	60(%rsp),%eax
 	or	%edi,%ecx
 	rol	$5,%r11d
 	xor	16(%rsp),%eax
 	and	%ebp,%ecx
 	add	%r11d,%r12d
 	rol	$1,%eax
 	or	%ecx,%ebx
 	rol	$30,%esi
 	mov	%eax,28(%rsp)
 	add	%ebx,%r12d
 	lea	-0x70e44324(%eax,%ebp),%r11d
 	mov	32(%rsp),%eax
 	mov	%edx,%ebx
 	mov	%edx,%ecx
 	xor	40(%rsp),%eax
 	mov	%r12d,%ebp
 	and	%esi,%ebx
 	xor	0(%rsp),%eax
 	or	%esi,%ecx
 	rol	$5,%ebp
 	xor	20(%rsp),%eax
 	and	%edi,%ecx
 	add	%ebp,%r11d
 	rol	$1,%eax
 	or	%ecx,%ebx
 	rol	$30,%edx
 	mov	%eax,32(%rsp)
 	add	%ebx,%r11d
 	lea	-0x70e44324(%eax,%edi),%ebp
 	mov	36(%rsp),%eax
 	mov	%r12d,%ebx
 	mov	%r12d,%ecx
 	xor	44(%rsp),%eax
 	mov	%r11d,%edi
 	and	%edx,%ebx
 	xor	4(%rsp),%eax
 	or	%edx,%ecx
 	rol	$5,%edi
 	xor	24(%rsp),%eax
 	and	%esi,%ecx
 	add	%edi,%ebp
 	rol	$1,%eax
 	or	%ecx,%ebx
 	rol	$30,%r12d
 	mov	%eax,36(%rsp)
 	add	%ebx,%ebp
 	lea	-0x70e44324(%eax,%esi),%edi
 	mov	40(%rsp),%eax
 	mov	%r11d,%ebx
 	mov	%r11d,%ecx
 	xor	48(%rsp),%eax
 	mov	%ebp,%esi
 	and	%r12d,%ebx
 	xor	8(%rsp),%eax
 	or	%r12d,%ecx
 	rol	$5,%esi
 	xor	28(%rsp),%eax
 	and	%edx,%ecx
 	add	%esi,%edi
 	rol	$1,%eax
 	or	%ecx,%ebx
 	rol	$30,%r11d
 	mov	%eax,40(%rsp)
 	add	%ebx,%edi
 	lea	-0x70e44324(%eax,%edx),%esi
 	mov	44(%rsp),%eax
 	mov	%ebp,%ebx
 	mov	%ebp,%ecx
 	xor	52(%rsp),%eax
 	mov	%edi,%edx
 	and	%r11d,%ebx
 	xor	12(%rsp),%eax
 	or	%r11d,%ecx
 	rol	$5,%edx
 	xor	32(%rsp),%eax
 	and	%r12d,%ecx
 	add	%edx,%esi
 	rol	$1,%eax
 	or	%ecx,%ebx
 	rol	$30,%ebp
 	mov	%eax,44(%rsp)
 	add	%ebx,%esi
 	lea	-0x70e44324(%eax,%r12d),%edx
 	mov	48(%rsp),%eax
 	mov	%edi,%ebx
 	mov	%edi,%ecx
 	xor	56(%rsp),%eax
 	mov	%esi,%r12d
 	and	%ebp,%ebx
 	xor	16(%rsp),%eax
 	or	%ebp,%ecx
 	rol	$5,%r12d
 	xor	36(%rsp),%eax
 	and	%r11d,%ecx
 	add	%r12d,%edx
 	rol	$1,%eax
 	or	%ecx,%ebx
 	rol	$30,%edi
 	mov	%eax,48(%rsp)
 	add	%ebx,%edx
 	lea	-0x359d3e2a(%eax,%r11d),%r12d
 	mov	52(%rsp),%eax
 	mov	%edi,%ebx
 	mov	%edx,%r11d
 	xor	60(%rsp),%eax
 	xor	%esi,%ebx
 	rol	$5,%r11d
 	xor	20(%rsp),%eax
 	xor	%ebp,%ebx
 	add	%r11d,%r12d
 	xor	40(%rsp),%eax
 	rol	$30,%esi
 	add	%ebx,%r12d
 	rol	$1,%eax
 	mov	%eax,52(%rsp)
 	lea	-0x359d3e2a(%eax,%ebp),%r11d
 	mov	56(%rsp),%eax
 	mov	%esi,%ebx
 	mov	%r12d,%ebp
 	xor	0(%rsp),%eax
 	xor	%edx,%ebx
 	rol	$5,%ebp
 	xor	24(%rsp),%eax
 	xor	%edi,%ebx
 	add	%ebp,%r11d
 	xor	44(%rsp),%eax
 	rol	$30,%edx
 	add	%ebx,%r11d
 	rol	$1,%eax
 	mov	%eax,56(%rsp)
 	lea	-0x359d3e2a(%eax,%edi),%ebp
 	mov	60(%rsp),%eax
 	mov	%edx,%ebx
 	mov	%r11d,%edi
 	xor	4(%rsp),%eax
 	xor	%r12d,%ebx
 	rol	$5,%edi
 	xor	28(%rsp),%eax
 	xor	%esi,%ebx
 	add	%edi,%ebp
 	xor	48(%rsp),%eax
 	rol	$30,%r12d
 	add	%ebx,%ebp
 	rol	$1,%eax
 	mov	%eax,60(%rsp)
 	lea	-0x359d3e2a(%eax,%esi),%edi
 	mov	0(%rsp),%eax
 	mov	%r12d,%ebx
 	mov	%ebp,%esi
 	xor	8(%rsp),%eax
 	xor	%r11d,%ebx
 	rol	$5,%esi
 	xor	32(%rsp),%eax
 	xor	%edx,%ebx
 	add	%esi,%edi
 	xor	52(%rsp),%eax
 	rol	$30,%r11d
 	add	%ebx,%edi
 	rol	$1,%eax
 	mov	%eax,0(%rsp)
 	lea	-0x359d3e2a(%eax,%edx),%esi
 	mov	4(%rsp),%eax
 	mov	%r11d,%ebx
 	mov	%edi,%edx
 	xor	12(%rsp),%eax
 	xor	%ebp,%ebx
 	rol	$5,%edx
 	xor	36(%rsp),%eax
 	xor	%r12d,%ebx
 	add	%edx,%esi
 	xor	56(%rsp),%eax
 	rol	$30,%ebp
 	add	%ebx,%esi
 	rol	$1,%eax
 	mov	%eax,4(%rsp)
 	lea	-0x359d3e2a(%eax,%r12d),%edx
 	mov	8(%rsp),%eax
 	mov	%ebp,%ebx
 	mov	%esi,%r12d
 	xor	16(%rsp),%eax
 	xor	%edi,%ebx
 	rol	$5,%r12d
 	xor	40(%rsp),%eax
 	xor	%r11d,%ebx
 	add	%r12d,%edx
 	xor	60(%rsp),%eax
 	rol	$30,%edi
 	add	%ebx,%edx
 	rol	$1,%eax
 	mov	%eax,8(%rsp)
 	lea	-0x359d3e2a(%eax,%r11d),%r12d
 	mov	12(%rsp),%eax
 	mov	%edi,%ebx
 	mov	%edx,%r11d
 	xor	20(%rsp),%eax
 	xor	%esi,%ebx
 	rol	$5,%r11d
 	xor	44(%rsp),%eax
 	xor	%ebp,%ebx
 	add	%r11d,%r12d
 	xor	0(%rsp),%eax
 	rol	$30,%esi
 	add	%ebx,%r12d
 	rol	$1,%eax
 	mov	%eax,12(%rsp)
 	lea	-0x359d3e2a(%eax,%ebp),%r11d
 	mov	16(%rsp),%eax
 	mov	%esi,%ebx
 	mov	%r12d,%ebp
 	xor	24(%rsp),%eax
 	xor	%edx,%ebx
 	rol	$5,%ebp
 	xor	48(%rsp),%eax
 	xor	%edi,%ebx
 	add	%ebp,%r11d
 	xor	4(%rsp),%eax
 	rol	$30,%edx
 	add	%ebx,%r11d
 	rol	$1,%eax
 	mov	%eax,16(%rsp)
 	lea	-0x359d3e2a(%eax,%edi),%ebp
 	mov	20(%rsp),%eax
 	mov	%edx,%ebx
 	mov	%r11d,%edi
 	xor	28(%rsp),%eax
 	xor	%r12d,%ebx
 	rol	$5,%edi
 	xor	52(%rsp),%eax
 	xor	%esi,%ebx
 	add	%edi,%ebp
 	xor	8(%rsp),%eax
 	rol	$30,%r12d
 	add	%ebx,%ebp
 	rol	$1,%eax
 	mov	%eax,20(%rsp)
 	lea	-0x359d3e2a(%eax,%esi),%edi
 	mov	24(%rsp),%eax
 	mov	%r12d,%ebx
 	mov	%ebp,%esi
 	xor	32(%rsp),%eax
 	xor	%r11d,%ebx
 	rol	$5,%esi
 	xor	56(%rsp),%eax
 	xor	%edx,%ebx
 	add	%esi,%edi
 	xor	12(%rsp),%eax
 	rol	$30,%r11d
 	add	%ebx,%edi
 	rol	$1,%eax
 	mov	%eax,24(%rsp)
 	lea	-0x359d3e2a(%eax,%edx),%esi
 	mov	28(%rsp),%eax
 	mov	%r11d,%ebx
 	mov	%edi,%edx
 	xor	36(%rsp),%eax
 	xor	%ebp,%ebx
 	rol	$5,%edx
 	xor	60(%rsp),%eax
 	xor	%r12d,%ebx
 	add	%edx,%esi
 	xor	16(%rsp),%eax
 	rol	$30,%ebp
 	add	%ebx,%esi
 	rol	$1,%eax
 	mov	%eax,28(%rsp)
 	lea	-0x359d3e2a(%eax,%r12d),%edx
 	mov	32(%rsp),%eax
 	mov	%ebp,%ebx
 	mov	%esi,%r12d
 	xor	40(%rsp),%eax
 	xor	%edi,%ebx
 	rol	$5,%r12d
 	xor	0(%rsp),%eax
 	xor	%r11d,%ebx
 	add	%r12d,%edx
 	xor	20(%rsp),%eax
 	rol	$30,%edi
 	add	%ebx,%edx
 	rol	$1,%eax
 	mov	%eax,32(%rsp)
 	lea	-0x359d3e2a(%eax,%r11d),%r12d
 	mov	36(%rsp),%eax
 	mov	%edi,%ebx
 	mov	%edx,%r11d
 	xor	44(%rsp),%eax
 	xor	%esi,%ebx
 	rol	$5,%r11d
 	xor	4(%rsp),%eax
 	xor	%ebp,%ebx
 	add	%r11d,%r12d
 	xor	24(%rsp),%eax
 	rol	$30,%esi
 	add	%ebx,%r12d
 	rol	$1,%eax
 	mov	%eax,36(%rsp)
 	lea	-0x359d3e2a(%eax,%ebp),%r11d
 	mov	40(%rsp),%eax
 	mov	%esi,%ebx
 	mov	%r12d,%ebp
 	xor	48(%rsp),%eax
 	xor	%edx,%ebx
 	rol	$5,%ebp
 	xor	8(%rsp),%eax
 	xor	%edi,%ebx
 	add	%ebp,%r11d
 	xor	28(%rsp),%eax
 	rol	$30,%edx
 	add	%ebx,%r11d
 	rol	$1,%eax
 	mov	%eax,40(%rsp)
 	lea	-0x359d3e2a(%eax,%edi),%ebp
 	mov	44(%rsp),%eax
 	mov	%edx,%ebx
 	mov	%r11d,%edi
 	xor	52(%rsp),%eax
 	xor	%r12d,%ebx
 	rol	$5,%edi
 	xor	12(%rsp),%eax
 	xor	%esi,%ebx
 	add	%edi,%ebp
 	xor	32(%rsp),%eax
 	rol	$30,%r12d
 	add	%ebx,%ebp
 	rol	$1,%eax
 	mov	%eax,44(%rsp)
 	lea	-0x359d3e2a(%eax,%esi),%edi
 	mov	48(%rsp),%eax
 	mov	%r12d,%ebx
 	mov	%ebp,%esi
 	xor	56(%rsp),%eax
 	xor	%r11d,%ebx
 	rol	$5,%esi
 	xor	16(%rsp),%eax
 	xor	%edx,%ebx
 	add	%esi,%edi
 	xor	36(%rsp),%eax
 	rol	$30,%r11d
 	add	%ebx,%edi
 	rol	$1,%eax
 	mov	%eax,48(%rsp)
 	lea	-0x359d3e2a(%eax,%edx),%esi
 	mov	52(%rsp),%eax
 	mov	%r11d,%ebx
 	mov	%edi,%edx
 	xor	60(%rsp),%eax
 	xor	%ebp,%ebx
 	rol	$5,%edx
 	xor	20(%rsp),%eax
 	xor	%r12d,%ebx
 	add	%edx,%esi
 	xor	40(%rsp),%eax
 	rol	$30,%ebp
 	add	%ebx,%esi
 	rol	$1,%eax
 	lea	-0x359d3e2a(%eax,%r12d),%edx
 	mov	56(%rsp),%eax
 	mov	%ebp,%ebx
 	mov	%esi,%r12d
 	xor	0(%rsp),%eax
 	xor	%edi,%ebx
 	rol	$5,%r12d
 	xor	24(%rsp),%eax
 	xor	%r11d,%ebx
 	add	%r12d,%edx
 	xor	44(%rsp),%eax
 	rol	$30,%edi
 	add	%ebx,%edx
 	rol	$1,%eax
 	lea	-0x359d3e2a(%eax,%r11d),%r12d
 	mov	60(%rsp),%eax
 	mov	%edi,%ebx
 	mov	%edx,%r11d
 	xor	4(%rsp),%eax
 	xor	%esi,%ebx
 	rol	$5,%r11d
 	xor	28(%rsp),%eax
 	xor	%ebp,%ebx
 	add	%r11d,%r12d
 	xor	48(%rsp),%eax
 	rol	$30,%esi
 	add	%ebx,%r12d
 	rol	$1,%eax
 	lea	-0x359d3e2a(%eax,%ebp),%r11d
 	mov	%esi,%ebx
 	mov	%r12d,%ebp
 	xor	%edx,%ebx
 	rol	$5,%ebp
 	xor	%edi,%ebx
 	add	%ebp,%r11d
 	rol	$30,%edx
 	add	%ebx,%r11d
 	// Update and save state information in SHA-1 context
 	add	0(%r8),%r11d
 	add	4(%r8),%r12d
 	add	8(%r8),%edx
 	add	12(%r8),%esi
 	add	16(%r8),%edi
 	mov	%r11d,0(%r8)
 	mov	%r12d,4(%r8)
 	mov	%edx,8(%r8)
 	mov	%esi,12(%r8)
 	mov	%edi,16(%r8)
 
 	xchg	%r11d,%edx	# mov	%r11d,%edx
 	xchg	%r12d,%esi	# mov	%r12d,%esi
 	xchg	%r11d,%edi	# mov	%edx,%edi
 	xchg	%r12d,%ebp	# mov	%esi,%ebp
 			# mov	%edi,%r11d
 	lea	64(%r9),%r9
 	sub	$1,%r10
 	jnz	.Lloop
 	mov	64(%rsp),%rsp
 .cfi_def_cfa	%rsp,8
 	movq	-24(%rsp),%r12
 .cfi_restore	%r12
 	movq	-16(%rsp),%rbp
 .cfi_restore	%rbp
 	movq	-8(%rsp),%rbx
 .cfi_restore	%rbx
 	ret
 .cfi_endproc
 SET_SIZE(sha1_block_data_order)
 
 .data
 .asciz	"SHA1 block transform for x86_64, CRYPTOGAMS by <appro@openssl.org>"
 
 #endif /* lint || __lint */
 
 #ifdef __ELF__
 .section .note.GNU-stack,"",%progbits
 #endif
diff --git a/module/icp/asm-x86_64/sha2/sha256_impl.S b/module/icp/asm-x86_64/sha2/sha256_impl.S
index 28b048d2db24..ccd4a3e6b3af 100644
--- a/module/icp/asm-x86_64/sha2/sha256_impl.S
+++ b/module/icp/asm-x86_64/sha2/sha256_impl.S
@@ -1,2089 +1,2089 @@
 /*
  * ====================================================================
  * Written by Andy Polyakov <appro@fy.chalmers.se> for the OpenSSL
  * project. Rights for redistribution and usage in source and binary
  * forms are granted according to the OpenSSL license.
  * ====================================================================
  *
  * sha256/512_block procedure for x86_64.
  *
  * 40% improvement over compiler-generated code on Opteron. On EM64T
  * sha256 was observed to run >80% faster and sha512 - >40%. No magical
  * tricks, just straight implementation... I really wonder why gcc
  * [being armed with inline assembler] fails to generate as fast code.
  * The only thing which is cool about this module is that it's very
  * same instruction sequence used for both SHA-256 and SHA-512. In
  * former case the instructions operate on 32-bit operands, while in
  * latter - on 64-bit ones. All I had to do is to get one flavor right,
  * the other one passed the test right away:-)
  *
  * sha256_block runs in ~1005 cycles on Opteron, which gives you
  * asymptotic performance of 64*1000/1005=63.7MBps times CPU clock
  * frequency in GHz. sha512_block runs in ~1275 cycles, which results
  * in 128*1000/1275=100MBps per GHz. Is there room for improvement?
  * Well, if you compare it to IA-64 implementation, which maintains
  * X[16] in register bank[!], tends to 4 instructions per CPU clock
  * cycle and runs in 1003 cycles, 1275 is very good result for 3-way
  * issue Opteron pipeline and X[16] maintained in memory. So that *if*
  * there is a way to improve it, *then* the only way would be to try to
  * offload X[16] updates to SSE unit, but that would require "deeper"
  * loop unroll, which in turn would naturally cause size blow-up, not
  * to mention increased complexity! And once again, only *if* it's
  * actually possible to noticeably improve overall ILP, instruction
  * level parallelism, on a given CPU implementation in this case.
  *
  * Special note on Intel EM64T. While Opteron CPU exhibits perfect
  * performance ratio of 1.5 between 64- and 32-bit flavors [see above],
  * [currently available] EM64T CPUs apparently are far from it. On the
  * contrary, 64-bit version, sha512_block, is ~30% *slower* than 32-bit
  * sha256_block:-( This is presumably because 64-bit shifts/rotates
  * apparently are not atomic instructions, but implemented in microcode.
  */
 
 /*
  * OpenSolaris OS modifications
  *
  * Sun elects to use this software under the BSD license.
  *
  * This source originates from OpenSSL file sha512-x86_64.pl at
  * ftp://ftp.openssl.org/snapshot/openssl-0.9.8-stable-SNAP-20080131.tar.gz
  * (presumably for future OpenSSL release 0.9.8h), with these changes:
  *
  * 1. Added perl "use strict" and declared variables.
  *
  * 2. Added OpenSolaris ENTRY_NP/SET_SIZE macros from
  * /usr/include/sys/asm_linkage.h, .ident keywords, and lint(1B) guards.
  *
  * 3. Removed x86_64-xlate.pl script (not needed for as(1) or gas(1)
  * assemblers).  Replaced the .picmeup macro with assembler code.
  *
  * 4. Added 8 to $ctx, as OpenSolaris OS has an extra 4-byte field, "algotype",
  * at the beginning of SHA2_CTX (the next field is 8-byte aligned).
  */
 
 /*
  * This file was generated by a perl script (sha512-x86_64.pl) that were
  * used to generate sha256 and sha512 variants from the same code base.
  * The comments from the original file have been pasted above.
  */
 
 #if defined(lint) || defined(__lint)
 #include <sys/stdint.h>
 #include <sha2/sha2.h>
 
-/* ARGSUSED */
 void
 SHA256TransformBlocks(SHA2_CTX *ctx, const void *in, size_t num)
 {
+	(void) ctx, (void) in, (void) num;
 }
 
 
 #else
 #define _ASM
 #include <sys/asm_linkage.h>
 
 ENTRY_NP(SHA256TransformBlocks)
 .cfi_startproc
 	movq	%rsp, %rax
 .cfi_def_cfa_register %rax
 	push	%rbx
 .cfi_offset	%rbx,-16
 	push	%rbp
 .cfi_offset	%rbp,-24
 	push	%r12
 .cfi_offset	%r12,-32
 	push	%r13
 .cfi_offset	%r13,-40
 	push	%r14
 .cfi_offset	%r14,-48
 	push	%r15
 .cfi_offset	%r15,-56
 	mov	%rsp,%rbp		# copy %rsp
 	shl	$4,%rdx		# num*16
 	sub	$16*4+4*8,%rsp
 	lea	(%rsi,%rdx,4),%rdx	# inp+num*16*4
 	and	$-64,%rsp		# align stack frame
 	add	$8,%rdi		# Skip OpenSolaris field, "algotype"
 	mov	%rdi,16*4+0*8(%rsp)		# save ctx, 1st arg
 	mov	%rsi,16*4+1*8(%rsp)		# save inp, 2nd arg
 	mov	%rdx,16*4+2*8(%rsp)		# save end pointer, "3rd" arg
 	mov	%rbp,16*4+3*8(%rsp)		# save copy of %rsp
 # echo ".cfi_cfa_expression %rsp+88,deref,+56" |
 #	openssl/crypto/perlasm/x86_64-xlate.pl
 .cfi_escape	0x0f,0x06,0x77,0xd8,0x00,0x06,0x23,0x38
 
 	#.picmeup %rbp
 	# The .picmeup pseudo-directive, from perlasm/x86_64_xlate.pl, puts
 	# the address of the "next" instruction into the target register
 	# (%rbp).  This generates these 2 instructions:
 	lea	.Llea(%rip),%rbp
 	#nop	# .picmeup generates a nop for mod 8 alignment--not needed here
 
 .Llea:
 	lea	K256-.(%rbp),%rbp
 
 	mov	4*0(%rdi),%eax
 	mov	4*1(%rdi),%ebx
 	mov	4*2(%rdi),%ecx
 	mov	4*3(%rdi),%edx
 	mov	4*4(%rdi),%r8d
 	mov	4*5(%rdi),%r9d
 	mov	4*6(%rdi),%r10d
 	mov	4*7(%rdi),%r11d
 	jmp	.Lloop
 
 .align	16
 .Lloop:
 	xor	%rdi,%rdi
 	mov	4*0(%rsi),%r12d
 	bswap	%r12d
 	mov	%r8d,%r13d
 	mov	%r8d,%r14d
 	mov	%r9d,%r15d
 
 	ror	$6,%r13d
 	ror	$11,%r14d
 	xor	%r10d,%r15d			# f^g
 
 	xor	%r14d,%r13d
 	ror	$14,%r14d
 	and	%r8d,%r15d			# (f^g)&e
 	mov	%r12d,0(%rsp)
 
 	xor	%r14d,%r13d			# Sigma1(e)
 	xor	%r10d,%r15d			# Ch(e,f,g)=((f^g)&e)^g
 	add	%r11d,%r12d			# T1+=h
 
 	mov	%eax,%r11d
 	add	%r13d,%r12d			# T1+=Sigma1(e)
 
 	add	%r15d,%r12d			# T1+=Ch(e,f,g)
 	mov	%eax,%r13d
 	mov	%eax,%r14d
 
 	ror	$2,%r11d
 	ror	$13,%r13d
 	mov	%eax,%r15d
 	add	(%rbp,%rdi,4),%r12d	# T1+=K[round]
 
 	xor	%r13d,%r11d
 	ror	$9,%r13d
 	or	%ecx,%r14d			# a|c
 
 	xor	%r13d,%r11d			# h=Sigma0(a)
 	and	%ecx,%r15d			# a&c
 	add	%r12d,%edx			# d+=T1
 
 	and	%ebx,%r14d			# (a|c)&b
 	add	%r12d,%r11d			# h+=T1
 
 	or	%r15d,%r14d			# Maj(a,b,c)=((a|c)&b)|(a&c)
 	lea	1(%rdi),%rdi	# round++
 
 	add	%r14d,%r11d			# h+=Maj(a,b,c)
 	mov	4*1(%rsi),%r12d
 	bswap	%r12d
 	mov	%edx,%r13d
 	mov	%edx,%r14d
 	mov	%r8d,%r15d
 
 	ror	$6,%r13d
 	ror	$11,%r14d
 	xor	%r9d,%r15d			# f^g
 
 	xor	%r14d,%r13d
 	ror	$14,%r14d
 	and	%edx,%r15d			# (f^g)&e
 	mov	%r12d,4(%rsp)
 
 	xor	%r14d,%r13d			# Sigma1(e)
 	xor	%r9d,%r15d			# Ch(e,f,g)=((f^g)&e)^g
 	add	%r10d,%r12d			# T1+=h
 
 	mov	%r11d,%r10d
 	add	%r13d,%r12d			# T1+=Sigma1(e)
 
 	add	%r15d,%r12d			# T1+=Ch(e,f,g)
 	mov	%r11d,%r13d
 	mov	%r11d,%r14d
 
 	ror	$2,%r10d
 	ror	$13,%r13d
 	mov	%r11d,%r15d
 	add	(%rbp,%rdi,4),%r12d	# T1+=K[round]
 
 	xor	%r13d,%r10d
 	ror	$9,%r13d
 	or	%ebx,%r14d			# a|c
 
 	xor	%r13d,%r10d			# h=Sigma0(a)
 	and	%ebx,%r15d			# a&c
 	add	%r12d,%ecx			# d+=T1
 
 	and	%eax,%r14d			# (a|c)&b
 	add	%r12d,%r10d			# h+=T1
 
 	or	%r15d,%r14d			# Maj(a,b,c)=((a|c)&b)|(a&c)
 	lea	1(%rdi),%rdi	# round++
 
 	add	%r14d,%r10d			# h+=Maj(a,b,c)
 	mov	4*2(%rsi),%r12d
 	bswap	%r12d
 	mov	%ecx,%r13d
 	mov	%ecx,%r14d
 	mov	%edx,%r15d
 
 	ror	$6,%r13d
 	ror	$11,%r14d
 	xor	%r8d,%r15d			# f^g
 
 	xor	%r14d,%r13d
 	ror	$14,%r14d
 	and	%ecx,%r15d			# (f^g)&e
 	mov	%r12d,8(%rsp)
 
 	xor	%r14d,%r13d			# Sigma1(e)
 	xor	%r8d,%r15d			# Ch(e,f,g)=((f^g)&e)^g
 	add	%r9d,%r12d			# T1+=h
 
 	mov	%r10d,%r9d
 	add	%r13d,%r12d			# T1+=Sigma1(e)
 
 	add	%r15d,%r12d			# T1+=Ch(e,f,g)
 	mov	%r10d,%r13d
 	mov	%r10d,%r14d
 
 	ror	$2,%r9d
 	ror	$13,%r13d
 	mov	%r10d,%r15d
 	add	(%rbp,%rdi,4),%r12d	# T1+=K[round]
 
 	xor	%r13d,%r9d
 	ror	$9,%r13d
 	or	%eax,%r14d			# a|c
 
 	xor	%r13d,%r9d			# h=Sigma0(a)
 	and	%eax,%r15d			# a&c
 	add	%r12d,%ebx			# d+=T1
 
 	and	%r11d,%r14d			# (a|c)&b
 	add	%r12d,%r9d			# h+=T1
 
 	or	%r15d,%r14d			# Maj(a,b,c)=((a|c)&b)|(a&c)
 	lea	1(%rdi),%rdi	# round++
 
 	add	%r14d,%r9d			# h+=Maj(a,b,c)
 	mov	4*3(%rsi),%r12d
 	bswap	%r12d
 	mov	%ebx,%r13d
 	mov	%ebx,%r14d
 	mov	%ecx,%r15d
 
 	ror	$6,%r13d
 	ror	$11,%r14d
 	xor	%edx,%r15d			# f^g
 
 	xor	%r14d,%r13d
 	ror	$14,%r14d
 	and	%ebx,%r15d			# (f^g)&e
 	mov	%r12d,12(%rsp)
 
 	xor	%r14d,%r13d			# Sigma1(e)
 	xor	%edx,%r15d			# Ch(e,f,g)=((f^g)&e)^g
 	add	%r8d,%r12d			# T1+=h
 
 	mov	%r9d,%r8d
 	add	%r13d,%r12d			# T1+=Sigma1(e)
 
 	add	%r15d,%r12d			# T1+=Ch(e,f,g)
 	mov	%r9d,%r13d
 	mov	%r9d,%r14d
 
 	ror	$2,%r8d
 	ror	$13,%r13d
 	mov	%r9d,%r15d
 	add	(%rbp,%rdi,4),%r12d	# T1+=K[round]
 
 	xor	%r13d,%r8d
 	ror	$9,%r13d
 	or	%r11d,%r14d			# a|c
 
 	xor	%r13d,%r8d			# h=Sigma0(a)
 	and	%r11d,%r15d			# a&c
 	add	%r12d,%eax			# d+=T1
 
 	and	%r10d,%r14d			# (a|c)&b
 	add	%r12d,%r8d			# h+=T1
 
 	or	%r15d,%r14d			# Maj(a,b,c)=((a|c)&b)|(a&c)
 	lea	1(%rdi),%rdi	# round++
 
 	add	%r14d,%r8d			# h+=Maj(a,b,c)
 	mov	4*4(%rsi),%r12d
 	bswap	%r12d
 	mov	%eax,%r13d
 	mov	%eax,%r14d
 	mov	%ebx,%r15d
 
 	ror	$6,%r13d
 	ror	$11,%r14d
 	xor	%ecx,%r15d			# f^g
 
 	xor	%r14d,%r13d
 	ror	$14,%r14d
 	and	%eax,%r15d			# (f^g)&e
 	mov	%r12d,16(%rsp)
 
 	xor	%r14d,%r13d			# Sigma1(e)
 	xor	%ecx,%r15d			# Ch(e,f,g)=((f^g)&e)^g
 	add	%edx,%r12d			# T1+=h
 
 	mov	%r8d,%edx
 	add	%r13d,%r12d			# T1+=Sigma1(e)
 
 	add	%r15d,%r12d			# T1+=Ch(e,f,g)
 	mov	%r8d,%r13d
 	mov	%r8d,%r14d
 
 	ror	$2,%edx
 	ror	$13,%r13d
 	mov	%r8d,%r15d
 	add	(%rbp,%rdi,4),%r12d	# T1+=K[round]
 
 	xor	%r13d,%edx
 	ror	$9,%r13d
 	or	%r10d,%r14d			# a|c
 
 	xor	%r13d,%edx			# h=Sigma0(a)
 	and	%r10d,%r15d			# a&c
 	add	%r12d,%r11d			# d+=T1
 
 	and	%r9d,%r14d			# (a|c)&b
 	add	%r12d,%edx			# h+=T1
 
 	or	%r15d,%r14d			# Maj(a,b,c)=((a|c)&b)|(a&c)
 	lea	1(%rdi),%rdi	# round++
 
 	add	%r14d,%edx			# h+=Maj(a,b,c)
 	mov	4*5(%rsi),%r12d
 	bswap	%r12d
 	mov	%r11d,%r13d
 	mov	%r11d,%r14d
 	mov	%eax,%r15d
 
 	ror	$6,%r13d
 	ror	$11,%r14d
 	xor	%ebx,%r15d			# f^g
 
 	xor	%r14d,%r13d
 	ror	$14,%r14d
 	and	%r11d,%r15d			# (f^g)&e
 	mov	%r12d,20(%rsp)
 
 	xor	%r14d,%r13d			# Sigma1(e)
 	xor	%ebx,%r15d			# Ch(e,f,g)=((f^g)&e)^g
 	add	%ecx,%r12d			# T1+=h
 
 	mov	%edx,%ecx
 	add	%r13d,%r12d			# T1+=Sigma1(e)
 
 	add	%r15d,%r12d			# T1+=Ch(e,f,g)
 	mov	%edx,%r13d
 	mov	%edx,%r14d
 
 	ror	$2,%ecx
 	ror	$13,%r13d
 	mov	%edx,%r15d
 	add	(%rbp,%rdi,4),%r12d	# T1+=K[round]
 
 	xor	%r13d,%ecx
 	ror	$9,%r13d
 	or	%r9d,%r14d			# a|c
 
 	xor	%r13d,%ecx			# h=Sigma0(a)
 	and	%r9d,%r15d			# a&c
 	add	%r12d,%r10d			# d+=T1
 
 	and	%r8d,%r14d			# (a|c)&b
 	add	%r12d,%ecx			# h+=T1
 
 	or	%r15d,%r14d			# Maj(a,b,c)=((a|c)&b)|(a&c)
 	lea	1(%rdi),%rdi	# round++
 
 	add	%r14d,%ecx			# h+=Maj(a,b,c)
 	mov	4*6(%rsi),%r12d
 	bswap	%r12d
 	mov	%r10d,%r13d
 	mov	%r10d,%r14d
 	mov	%r11d,%r15d
 
 	ror	$6,%r13d
 	ror	$11,%r14d
 	xor	%eax,%r15d			# f^g
 
 	xor	%r14d,%r13d
 	ror	$14,%r14d
 	and	%r10d,%r15d			# (f^g)&e
 	mov	%r12d,24(%rsp)
 
 	xor	%r14d,%r13d			# Sigma1(e)
 	xor	%eax,%r15d			# Ch(e,f,g)=((f^g)&e)^g
 	add	%ebx,%r12d			# T1+=h
 
 	mov	%ecx,%ebx
 	add	%r13d,%r12d			# T1+=Sigma1(e)
 
 	add	%r15d,%r12d			# T1+=Ch(e,f,g)
 	mov	%ecx,%r13d
 	mov	%ecx,%r14d
 
 	ror	$2,%ebx
 	ror	$13,%r13d
 	mov	%ecx,%r15d
 	add	(%rbp,%rdi,4),%r12d	# T1+=K[round]
 
 	xor	%r13d,%ebx
 	ror	$9,%r13d
 	or	%r8d,%r14d			# a|c
 
 	xor	%r13d,%ebx			# h=Sigma0(a)
 	and	%r8d,%r15d			# a&c
 	add	%r12d,%r9d			# d+=T1
 
 	and	%edx,%r14d			# (a|c)&b
 	add	%r12d,%ebx			# h+=T1
 
 	or	%r15d,%r14d			# Maj(a,b,c)=((a|c)&b)|(a&c)
 	lea	1(%rdi),%rdi	# round++
 
 	add	%r14d,%ebx			# h+=Maj(a,b,c)
 	mov	4*7(%rsi),%r12d
 	bswap	%r12d
 	mov	%r9d,%r13d
 	mov	%r9d,%r14d
 	mov	%r10d,%r15d
 
 	ror	$6,%r13d
 	ror	$11,%r14d
 	xor	%r11d,%r15d			# f^g
 
 	xor	%r14d,%r13d
 	ror	$14,%r14d
 	and	%r9d,%r15d			# (f^g)&e
 	mov	%r12d,28(%rsp)
 
 	xor	%r14d,%r13d			# Sigma1(e)
 	xor	%r11d,%r15d			# Ch(e,f,g)=((f^g)&e)^g
 	add	%eax,%r12d			# T1+=h
 
 	mov	%ebx,%eax
 	add	%r13d,%r12d			# T1+=Sigma1(e)
 
 	add	%r15d,%r12d			# T1+=Ch(e,f,g)
 	mov	%ebx,%r13d
 	mov	%ebx,%r14d
 
 	ror	$2,%eax
 	ror	$13,%r13d
 	mov	%ebx,%r15d
 	add	(%rbp,%rdi,4),%r12d	# T1+=K[round]
 
 	xor	%r13d,%eax
 	ror	$9,%r13d
 	or	%edx,%r14d			# a|c
 
 	xor	%r13d,%eax			# h=Sigma0(a)
 	and	%edx,%r15d			# a&c
 	add	%r12d,%r8d			# d+=T1
 
 	and	%ecx,%r14d			# (a|c)&b
 	add	%r12d,%eax			# h+=T1
 
 	or	%r15d,%r14d			# Maj(a,b,c)=((a|c)&b)|(a&c)
 	lea	1(%rdi),%rdi	# round++
 
 	add	%r14d,%eax			# h+=Maj(a,b,c)
 	mov	4*8(%rsi),%r12d
 	bswap	%r12d
 	mov	%r8d,%r13d
 	mov	%r8d,%r14d
 	mov	%r9d,%r15d
 
 	ror	$6,%r13d
 	ror	$11,%r14d
 	xor	%r10d,%r15d			# f^g
 
 	xor	%r14d,%r13d
 	ror	$14,%r14d
 	and	%r8d,%r15d			# (f^g)&e
 	mov	%r12d,32(%rsp)
 
 	xor	%r14d,%r13d			# Sigma1(e)
 	xor	%r10d,%r15d			# Ch(e,f,g)=((f^g)&e)^g
 	add	%r11d,%r12d			# T1+=h
 
 	mov	%eax,%r11d
 	add	%r13d,%r12d			# T1+=Sigma1(e)
 
 	add	%r15d,%r12d			# T1+=Ch(e,f,g)
 	mov	%eax,%r13d
 	mov	%eax,%r14d
 
 	ror	$2,%r11d
 	ror	$13,%r13d
 	mov	%eax,%r15d
 	add	(%rbp,%rdi,4),%r12d	# T1+=K[round]
 
 	xor	%r13d,%r11d
 	ror	$9,%r13d
 	or	%ecx,%r14d			# a|c
 
 	xor	%r13d,%r11d			# h=Sigma0(a)
 	and	%ecx,%r15d			# a&c
 	add	%r12d,%edx			# d+=T1
 
 	and	%ebx,%r14d			# (a|c)&b
 	add	%r12d,%r11d			# h+=T1
 
 	or	%r15d,%r14d			# Maj(a,b,c)=((a|c)&b)|(a&c)
 	lea	1(%rdi),%rdi	# round++
 
 	add	%r14d,%r11d			# h+=Maj(a,b,c)
 	mov	4*9(%rsi),%r12d
 	bswap	%r12d
 	mov	%edx,%r13d
 	mov	%edx,%r14d
 	mov	%r8d,%r15d
 
 	ror	$6,%r13d
 	ror	$11,%r14d
 	xor	%r9d,%r15d			# f^g
 
 	xor	%r14d,%r13d
 	ror	$14,%r14d
 	and	%edx,%r15d			# (f^g)&e
 	mov	%r12d,36(%rsp)
 
 	xor	%r14d,%r13d			# Sigma1(e)
 	xor	%r9d,%r15d			# Ch(e,f,g)=((f^g)&e)^g
 	add	%r10d,%r12d			# T1+=h
 
 	mov	%r11d,%r10d
 	add	%r13d,%r12d			# T1+=Sigma1(e)
 
 	add	%r15d,%r12d			# T1+=Ch(e,f,g)
 	mov	%r11d,%r13d
 	mov	%r11d,%r14d
 
 	ror	$2,%r10d
 	ror	$13,%r13d
 	mov	%r11d,%r15d
 	add	(%rbp,%rdi,4),%r12d	# T1+=K[round]
 
 	xor	%r13d,%r10d
 	ror	$9,%r13d
 	or	%ebx,%r14d			# a|c
 
 	xor	%r13d,%r10d			# h=Sigma0(a)
 	and	%ebx,%r15d			# a&c
 	add	%r12d,%ecx			# d+=T1
 
 	and	%eax,%r14d			# (a|c)&b
 	add	%r12d,%r10d			# h+=T1
 
 	or	%r15d,%r14d			# Maj(a,b,c)=((a|c)&b)|(a&c)
 	lea	1(%rdi),%rdi	# round++
 
 	add	%r14d,%r10d			# h+=Maj(a,b,c)
 	mov	4*10(%rsi),%r12d
 	bswap	%r12d
 	mov	%ecx,%r13d
 	mov	%ecx,%r14d
 	mov	%edx,%r15d
 
 	ror	$6,%r13d
 	ror	$11,%r14d
 	xor	%r8d,%r15d			# f^g
 
 	xor	%r14d,%r13d
 	ror	$14,%r14d
 	and	%ecx,%r15d			# (f^g)&e
 	mov	%r12d,40(%rsp)
 
 	xor	%r14d,%r13d			# Sigma1(e)
 	xor	%r8d,%r15d			# Ch(e,f,g)=((f^g)&e)^g
 	add	%r9d,%r12d			# T1+=h
 
 	mov	%r10d,%r9d
 	add	%r13d,%r12d			# T1+=Sigma1(e)
 
 	add	%r15d,%r12d			# T1+=Ch(e,f,g)
 	mov	%r10d,%r13d
 	mov	%r10d,%r14d
 
 	ror	$2,%r9d
 	ror	$13,%r13d
 	mov	%r10d,%r15d
 	add	(%rbp,%rdi,4),%r12d	# T1+=K[round]
 
 	xor	%r13d,%r9d
 	ror	$9,%r13d
 	or	%eax,%r14d			# a|c
 
 	xor	%r13d,%r9d			# h=Sigma0(a)
 	and	%eax,%r15d			# a&c
 	add	%r12d,%ebx			# d+=T1
 
 	and	%r11d,%r14d			# (a|c)&b
 	add	%r12d,%r9d			# h+=T1
 
 	or	%r15d,%r14d			# Maj(a,b,c)=((a|c)&b)|(a&c)
 	lea	1(%rdi),%rdi	# round++
 
 	add	%r14d,%r9d			# h+=Maj(a,b,c)
 	mov	4*11(%rsi),%r12d
 	bswap	%r12d
 	mov	%ebx,%r13d
 	mov	%ebx,%r14d
 	mov	%ecx,%r15d
 
 	ror	$6,%r13d
 	ror	$11,%r14d
 	xor	%edx,%r15d			# f^g
 
 	xor	%r14d,%r13d
 	ror	$14,%r14d
 	and	%ebx,%r15d			# (f^g)&e
 	mov	%r12d,44(%rsp)
 
 	xor	%r14d,%r13d			# Sigma1(e)
 	xor	%edx,%r15d			# Ch(e,f,g)=((f^g)&e)^g
 	add	%r8d,%r12d			# T1+=h
 
 	mov	%r9d,%r8d
 	add	%r13d,%r12d			# T1+=Sigma1(e)
 
 	add	%r15d,%r12d			# T1+=Ch(e,f,g)
 	mov	%r9d,%r13d
 	mov	%r9d,%r14d
 
 	ror	$2,%r8d
 	ror	$13,%r13d
 	mov	%r9d,%r15d
 	add	(%rbp,%rdi,4),%r12d	# T1+=K[round]
 
 	xor	%r13d,%r8d
 	ror	$9,%r13d
 	or	%r11d,%r14d			# a|c
 
 	xor	%r13d,%r8d			# h=Sigma0(a)
 	and	%r11d,%r15d			# a&c
 	add	%r12d,%eax			# d+=T1
 
 	and	%r10d,%r14d			# (a|c)&b
 	add	%r12d,%r8d			# h+=T1
 
 	or	%r15d,%r14d			# Maj(a,b,c)=((a|c)&b)|(a&c)
 	lea	1(%rdi),%rdi	# round++
 
 	add	%r14d,%r8d			# h+=Maj(a,b,c)
 	mov	4*12(%rsi),%r12d
 	bswap	%r12d
 	mov	%eax,%r13d
 	mov	%eax,%r14d
 	mov	%ebx,%r15d
 
 	ror	$6,%r13d
 	ror	$11,%r14d
 	xor	%ecx,%r15d			# f^g
 
 	xor	%r14d,%r13d
 	ror	$14,%r14d
 	and	%eax,%r15d			# (f^g)&e
 	mov	%r12d,48(%rsp)
 
 	xor	%r14d,%r13d			# Sigma1(e)
 	xor	%ecx,%r15d			# Ch(e,f,g)=((f^g)&e)^g
 	add	%edx,%r12d			# T1+=h
 
 	mov	%r8d,%edx
 	add	%r13d,%r12d			# T1+=Sigma1(e)
 
 	add	%r15d,%r12d			# T1+=Ch(e,f,g)
 	mov	%r8d,%r13d
 	mov	%r8d,%r14d
 
 	ror	$2,%edx
 	ror	$13,%r13d
 	mov	%r8d,%r15d
 	add	(%rbp,%rdi,4),%r12d	# T1+=K[round]
 
 	xor	%r13d,%edx
 	ror	$9,%r13d
 	or	%r10d,%r14d			# a|c
 
 	xor	%r13d,%edx			# h=Sigma0(a)
 	and	%r10d,%r15d			# a&c
 	add	%r12d,%r11d			# d+=T1
 
 	and	%r9d,%r14d			# (a|c)&b
 	add	%r12d,%edx			# h+=T1
 
 	or	%r15d,%r14d			# Maj(a,b,c)=((a|c)&b)|(a&c)
 	lea	1(%rdi),%rdi	# round++
 
 	add	%r14d,%edx			# h+=Maj(a,b,c)
 	mov	4*13(%rsi),%r12d
 	bswap	%r12d
 	mov	%r11d,%r13d
 	mov	%r11d,%r14d
 	mov	%eax,%r15d
 
 	ror	$6,%r13d
 	ror	$11,%r14d
 	xor	%ebx,%r15d			# f^g
 
 	xor	%r14d,%r13d
 	ror	$14,%r14d
 	and	%r11d,%r15d			# (f^g)&e
 	mov	%r12d,52(%rsp)
 
 	xor	%r14d,%r13d			# Sigma1(e)
 	xor	%ebx,%r15d			# Ch(e,f,g)=((f^g)&e)^g
 	add	%ecx,%r12d			# T1+=h
 
 	mov	%edx,%ecx
 	add	%r13d,%r12d			# T1+=Sigma1(e)
 
 	add	%r15d,%r12d			# T1+=Ch(e,f,g)
 	mov	%edx,%r13d
 	mov	%edx,%r14d
 
 	ror	$2,%ecx
 	ror	$13,%r13d
 	mov	%edx,%r15d
 	add	(%rbp,%rdi,4),%r12d	# T1+=K[round]
 
 	xor	%r13d,%ecx
 	ror	$9,%r13d
 	or	%r9d,%r14d			# a|c
 
 	xor	%r13d,%ecx			# h=Sigma0(a)
 	and	%r9d,%r15d			# a&c
 	add	%r12d,%r10d			# d+=T1
 
 	and	%r8d,%r14d			# (a|c)&b
 	add	%r12d,%ecx			# h+=T1
 
 	or	%r15d,%r14d			# Maj(a,b,c)=((a|c)&b)|(a&c)
 	lea	1(%rdi),%rdi	# round++
 
 	add	%r14d,%ecx			# h+=Maj(a,b,c)
 	mov	4*14(%rsi),%r12d
 	bswap	%r12d
 	mov	%r10d,%r13d
 	mov	%r10d,%r14d
 	mov	%r11d,%r15d
 
 	ror	$6,%r13d
 	ror	$11,%r14d
 	xor	%eax,%r15d			# f^g
 
 	xor	%r14d,%r13d
 	ror	$14,%r14d
 	and	%r10d,%r15d			# (f^g)&e
 	mov	%r12d,56(%rsp)
 
 	xor	%r14d,%r13d			# Sigma1(e)
 	xor	%eax,%r15d			# Ch(e,f,g)=((f^g)&e)^g
 	add	%ebx,%r12d			# T1+=h
 
 	mov	%ecx,%ebx
 	add	%r13d,%r12d			# T1+=Sigma1(e)
 
 	add	%r15d,%r12d			# T1+=Ch(e,f,g)
 	mov	%ecx,%r13d
 	mov	%ecx,%r14d
 
 	ror	$2,%ebx
 	ror	$13,%r13d
 	mov	%ecx,%r15d
 	add	(%rbp,%rdi,4),%r12d	# T1+=K[round]
 
 	xor	%r13d,%ebx
 	ror	$9,%r13d
 	or	%r8d,%r14d			# a|c
 
 	xor	%r13d,%ebx			# h=Sigma0(a)
 	and	%r8d,%r15d			# a&c
 	add	%r12d,%r9d			# d+=T1
 
 	and	%edx,%r14d			# (a|c)&b
 	add	%r12d,%ebx			# h+=T1
 
 	or	%r15d,%r14d			# Maj(a,b,c)=((a|c)&b)|(a&c)
 	lea	1(%rdi),%rdi	# round++
 
 	add	%r14d,%ebx			# h+=Maj(a,b,c)
 	mov	4*15(%rsi),%r12d
 	bswap	%r12d
 	mov	%r9d,%r13d
 	mov	%r9d,%r14d
 	mov	%r10d,%r15d
 
 	ror	$6,%r13d
 	ror	$11,%r14d
 	xor	%r11d,%r15d			# f^g
 
 	xor	%r14d,%r13d
 	ror	$14,%r14d
 	and	%r9d,%r15d			# (f^g)&e
 	mov	%r12d,60(%rsp)
 
 	xor	%r14d,%r13d			# Sigma1(e)
 	xor	%r11d,%r15d			# Ch(e,f,g)=((f^g)&e)^g
 	add	%eax,%r12d			# T1+=h
 
 	mov	%ebx,%eax
 	add	%r13d,%r12d			# T1+=Sigma1(e)
 
 	add	%r15d,%r12d			# T1+=Ch(e,f,g)
 	mov	%ebx,%r13d
 	mov	%ebx,%r14d
 
 	ror	$2,%eax
 	ror	$13,%r13d
 	mov	%ebx,%r15d
 	add	(%rbp,%rdi,4),%r12d	# T1+=K[round]
 
 	xor	%r13d,%eax
 	ror	$9,%r13d
 	or	%edx,%r14d			# a|c
 
 	xor	%r13d,%eax			# h=Sigma0(a)
 	and	%edx,%r15d			# a&c
 	add	%r12d,%r8d			# d+=T1
 
 	and	%ecx,%r14d			# (a|c)&b
 	add	%r12d,%eax			# h+=T1
 
 	or	%r15d,%r14d			# Maj(a,b,c)=((a|c)&b)|(a&c)
 	lea	1(%rdi),%rdi	# round++
 
 	add	%r14d,%eax			# h+=Maj(a,b,c)
 	jmp	.Lrounds_16_xx
 .align	16
 .Lrounds_16_xx:
 	mov	4(%rsp),%r13d
 	mov	56(%rsp),%r12d
 
 	mov	%r13d,%r15d
 
 	shr	$3,%r13d
 	ror	$7,%r15d
 
 	xor	%r15d,%r13d
 	ror	$11,%r15d
 
 	xor	%r15d,%r13d			# sigma0(X[(i+1)&0xf])
 	mov	%r12d,%r14d
 
 	shr	$10,%r12d
 	ror	$17,%r14d
 
 	xor	%r14d,%r12d
 	ror	$2,%r14d
 
 	xor	%r14d,%r12d			# sigma1(X[(i+14)&0xf])
 
 	add	%r13d,%r12d
 
 	add	36(%rsp),%r12d
 
 	add	0(%rsp),%r12d
 	mov	%r8d,%r13d
 	mov	%r8d,%r14d
 	mov	%r9d,%r15d
 
 	ror	$6,%r13d
 	ror	$11,%r14d
 	xor	%r10d,%r15d			# f^g
 
 	xor	%r14d,%r13d
 	ror	$14,%r14d
 	and	%r8d,%r15d			# (f^g)&e
 	mov	%r12d,0(%rsp)
 
 	xor	%r14d,%r13d			# Sigma1(e)
 	xor	%r10d,%r15d			# Ch(e,f,g)=((f^g)&e)^g
 	add	%r11d,%r12d			# T1+=h
 
 	mov	%eax,%r11d
 	add	%r13d,%r12d			# T1+=Sigma1(e)
 
 	add	%r15d,%r12d			# T1+=Ch(e,f,g)
 	mov	%eax,%r13d
 	mov	%eax,%r14d
 
 	ror	$2,%r11d
 	ror	$13,%r13d
 	mov	%eax,%r15d
 	add	(%rbp,%rdi,4),%r12d	# T1+=K[round]
 
 	xor	%r13d,%r11d
 	ror	$9,%r13d
 	or	%ecx,%r14d			# a|c
 
 	xor	%r13d,%r11d			# h=Sigma0(a)
 	and	%ecx,%r15d			# a&c
 	add	%r12d,%edx			# d+=T1
 
 	and	%ebx,%r14d			# (a|c)&b
 	add	%r12d,%r11d			# h+=T1
 
 	or	%r15d,%r14d			# Maj(a,b,c)=((a|c)&b)|(a&c)
 	lea	1(%rdi),%rdi	# round++
 
 	add	%r14d,%r11d			# h+=Maj(a,b,c)
 	mov	8(%rsp),%r13d
 	mov	60(%rsp),%r12d
 
 	mov	%r13d,%r15d
 
 	shr	$3,%r13d
 	ror	$7,%r15d
 
 	xor	%r15d,%r13d
 	ror	$11,%r15d
 
 	xor	%r15d,%r13d			# sigma0(X[(i+1)&0xf])
 	mov	%r12d,%r14d
 
 	shr	$10,%r12d
 	ror	$17,%r14d
 
 	xor	%r14d,%r12d
 	ror	$2,%r14d
 
 	xor	%r14d,%r12d			# sigma1(X[(i+14)&0xf])
 
 	add	%r13d,%r12d
 
 	add	40(%rsp),%r12d
 
 	add	4(%rsp),%r12d
 	mov	%edx,%r13d
 	mov	%edx,%r14d
 	mov	%r8d,%r15d
 
 	ror	$6,%r13d
 	ror	$11,%r14d
 	xor	%r9d,%r15d			# f^g
 
 	xor	%r14d,%r13d
 	ror	$14,%r14d
 	and	%edx,%r15d			# (f^g)&e
 	mov	%r12d,4(%rsp)
 
 	xor	%r14d,%r13d			# Sigma1(e)
 	xor	%r9d,%r15d			# Ch(e,f,g)=((f^g)&e)^g
 	add	%r10d,%r12d			# T1+=h
 
 	mov	%r11d,%r10d
 	add	%r13d,%r12d			# T1+=Sigma1(e)
 
 	add	%r15d,%r12d			# T1+=Ch(e,f,g)
 	mov	%r11d,%r13d
 	mov	%r11d,%r14d
 
 	ror	$2,%r10d
 	ror	$13,%r13d
 	mov	%r11d,%r15d
 	add	(%rbp,%rdi,4),%r12d	# T1+=K[round]
 
 	xor	%r13d,%r10d
 	ror	$9,%r13d
 	or	%ebx,%r14d			# a|c
 
 	xor	%r13d,%r10d			# h=Sigma0(a)
 	and	%ebx,%r15d			# a&c
 	add	%r12d,%ecx			# d+=T1
 
 	and	%eax,%r14d			# (a|c)&b
 	add	%r12d,%r10d			# h+=T1
 
 	or	%r15d,%r14d			# Maj(a,b,c)=((a|c)&b)|(a&c)
 	lea	1(%rdi),%rdi	# round++
 
 	add	%r14d,%r10d			# h+=Maj(a,b,c)
 	mov	12(%rsp),%r13d
 	mov	0(%rsp),%r12d
 
 	mov	%r13d,%r15d
 
 	shr	$3,%r13d
 	ror	$7,%r15d
 
 	xor	%r15d,%r13d
 	ror	$11,%r15d
 
 	xor	%r15d,%r13d			# sigma0(X[(i+1)&0xf])
 	mov	%r12d,%r14d
 
 	shr	$10,%r12d
 	ror	$17,%r14d
 
 	xor	%r14d,%r12d
 	ror	$2,%r14d
 
 	xor	%r14d,%r12d			# sigma1(X[(i+14)&0xf])
 
 	add	%r13d,%r12d
 
 	add	44(%rsp),%r12d
 
 	add	8(%rsp),%r12d
 	mov	%ecx,%r13d
 	mov	%ecx,%r14d
 	mov	%edx,%r15d
 
 	ror	$6,%r13d
 	ror	$11,%r14d
 	xor	%r8d,%r15d			# f^g
 
 	xor	%r14d,%r13d
 	ror	$14,%r14d
 	and	%ecx,%r15d			# (f^g)&e
 	mov	%r12d,8(%rsp)
 
 	xor	%r14d,%r13d			# Sigma1(e)
 	xor	%r8d,%r15d			# Ch(e,f,g)=((f^g)&e)^g
 	add	%r9d,%r12d			# T1+=h
 
 	mov	%r10d,%r9d
 	add	%r13d,%r12d			# T1+=Sigma1(e)
 
 	add	%r15d,%r12d			# T1+=Ch(e,f,g)
 	mov	%r10d,%r13d
 	mov	%r10d,%r14d
 
 	ror	$2,%r9d
 	ror	$13,%r13d
 	mov	%r10d,%r15d
 	add	(%rbp,%rdi,4),%r12d	# T1+=K[round]
 
 	xor	%r13d,%r9d
 	ror	$9,%r13d
 	or	%eax,%r14d			# a|c
 
 	xor	%r13d,%r9d			# h=Sigma0(a)
 	and	%eax,%r15d			# a&c
 	add	%r12d,%ebx			# d+=T1
 
 	and	%r11d,%r14d			# (a|c)&b
 	add	%r12d,%r9d			# h+=T1
 
 	or	%r15d,%r14d			# Maj(a,b,c)=((a|c)&b)|(a&c)
 	lea	1(%rdi),%rdi	# round++
 
 	add	%r14d,%r9d			# h+=Maj(a,b,c)
 	mov	16(%rsp),%r13d
 	mov	4(%rsp),%r12d
 
 	mov	%r13d,%r15d
 
 	shr	$3,%r13d
 	ror	$7,%r15d
 
 	xor	%r15d,%r13d
 	ror	$11,%r15d
 
 	xor	%r15d,%r13d			# sigma0(X[(i+1)&0xf])
 	mov	%r12d,%r14d
 
 	shr	$10,%r12d
 	ror	$17,%r14d
 
 	xor	%r14d,%r12d
 	ror	$2,%r14d
 
 	xor	%r14d,%r12d			# sigma1(X[(i+14)&0xf])
 
 	add	%r13d,%r12d
 
 	add	48(%rsp),%r12d
 
 	add	12(%rsp),%r12d
 	mov	%ebx,%r13d
 	mov	%ebx,%r14d
 	mov	%ecx,%r15d
 
 	ror	$6,%r13d
 	ror	$11,%r14d
 	xor	%edx,%r15d			# f^g
 
 	xor	%r14d,%r13d
 	ror	$14,%r14d
 	and	%ebx,%r15d			# (f^g)&e
 	mov	%r12d,12(%rsp)
 
 	xor	%r14d,%r13d			# Sigma1(e)
 	xor	%edx,%r15d			# Ch(e,f,g)=((f^g)&e)^g
 	add	%r8d,%r12d			# T1+=h
 
 	mov	%r9d,%r8d
 	add	%r13d,%r12d			# T1+=Sigma1(e)
 
 	add	%r15d,%r12d			# T1+=Ch(e,f,g)
 	mov	%r9d,%r13d
 	mov	%r9d,%r14d
 
 	ror	$2,%r8d
 	ror	$13,%r13d
 	mov	%r9d,%r15d
 	add	(%rbp,%rdi,4),%r12d	# T1+=K[round]
 
 	xor	%r13d,%r8d
 	ror	$9,%r13d
 	or	%r11d,%r14d			# a|c
 
 	xor	%r13d,%r8d			# h=Sigma0(a)
 	and	%r11d,%r15d			# a&c
 	add	%r12d,%eax			# d+=T1
 
 	and	%r10d,%r14d			# (a|c)&b
 	add	%r12d,%r8d			# h+=T1
 
 	or	%r15d,%r14d			# Maj(a,b,c)=((a|c)&b)|(a&c)
 	lea	1(%rdi),%rdi	# round++
 
 	add	%r14d,%r8d			# h+=Maj(a,b,c)
 	mov	20(%rsp),%r13d
 	mov	8(%rsp),%r12d
 
 	mov	%r13d,%r15d
 
 	shr	$3,%r13d
 	ror	$7,%r15d
 
 	xor	%r15d,%r13d
 	ror	$11,%r15d
 
 	xor	%r15d,%r13d			# sigma0(X[(i+1)&0xf])
 	mov	%r12d,%r14d
 
 	shr	$10,%r12d
 	ror	$17,%r14d
 
 	xor	%r14d,%r12d
 	ror	$2,%r14d
 
 	xor	%r14d,%r12d			# sigma1(X[(i+14)&0xf])
 
 	add	%r13d,%r12d
 
 	add	52(%rsp),%r12d
 
 	add	16(%rsp),%r12d
 	mov	%eax,%r13d
 	mov	%eax,%r14d
 	mov	%ebx,%r15d
 
 	ror	$6,%r13d
 	ror	$11,%r14d
 	xor	%ecx,%r15d			# f^g
 
 	xor	%r14d,%r13d
 	ror	$14,%r14d
 	and	%eax,%r15d			# (f^g)&e
 	mov	%r12d,16(%rsp)
 
 	xor	%r14d,%r13d			# Sigma1(e)
 	xor	%ecx,%r15d			# Ch(e,f,g)=((f^g)&e)^g
 	add	%edx,%r12d			# T1+=h
 
 	mov	%r8d,%edx
 	add	%r13d,%r12d			# T1+=Sigma1(e)
 
 	add	%r15d,%r12d			# T1+=Ch(e,f,g)
 	mov	%r8d,%r13d
 	mov	%r8d,%r14d
 
 	ror	$2,%edx
 	ror	$13,%r13d
 	mov	%r8d,%r15d
 	add	(%rbp,%rdi,4),%r12d	# T1+=K[round]
 
 	xor	%r13d,%edx
 	ror	$9,%r13d
 	or	%r10d,%r14d			# a|c
 
 	xor	%r13d,%edx			# h=Sigma0(a)
 	and	%r10d,%r15d			# a&c
 	add	%r12d,%r11d			# d+=T1
 
 	and	%r9d,%r14d			# (a|c)&b
 	add	%r12d,%edx			# h+=T1
 
 	or	%r15d,%r14d			# Maj(a,b,c)=((a|c)&b)|(a&c)
 	lea	1(%rdi),%rdi	# round++
 
 	add	%r14d,%edx			# h+=Maj(a,b,c)
 	mov	24(%rsp),%r13d
 	mov	12(%rsp),%r12d
 
 	mov	%r13d,%r15d
 
 	shr	$3,%r13d
 	ror	$7,%r15d
 
 	xor	%r15d,%r13d
 	ror	$11,%r15d
 
 	xor	%r15d,%r13d			# sigma0(X[(i+1)&0xf])
 	mov	%r12d,%r14d
 
 	shr	$10,%r12d
 	ror	$17,%r14d
 
 	xor	%r14d,%r12d
 	ror	$2,%r14d
 
 	xor	%r14d,%r12d			# sigma1(X[(i+14)&0xf])
 
 	add	%r13d,%r12d
 
 	add	56(%rsp),%r12d
 
 	add	20(%rsp),%r12d
 	mov	%r11d,%r13d
 	mov	%r11d,%r14d
 	mov	%eax,%r15d
 
 	ror	$6,%r13d
 	ror	$11,%r14d
 	xor	%ebx,%r15d			# f^g
 
 	xor	%r14d,%r13d
 	ror	$14,%r14d
 	and	%r11d,%r15d			# (f^g)&e
 	mov	%r12d,20(%rsp)
 
 	xor	%r14d,%r13d			# Sigma1(e)
 	xor	%ebx,%r15d			# Ch(e,f,g)=((f^g)&e)^g
 	add	%ecx,%r12d			# T1+=h
 
 	mov	%edx,%ecx
 	add	%r13d,%r12d			# T1+=Sigma1(e)
 
 	add	%r15d,%r12d			# T1+=Ch(e,f,g)
 	mov	%edx,%r13d
 	mov	%edx,%r14d
 
 	ror	$2,%ecx
 	ror	$13,%r13d
 	mov	%edx,%r15d
 	add	(%rbp,%rdi,4),%r12d	# T1+=K[round]
 
 	xor	%r13d,%ecx
 	ror	$9,%r13d
 	or	%r9d,%r14d			# a|c
 
 	xor	%r13d,%ecx			# h=Sigma0(a)
 	and	%r9d,%r15d			# a&c
 	add	%r12d,%r10d			# d+=T1
 
 	and	%r8d,%r14d			# (a|c)&b
 	add	%r12d,%ecx			# h+=T1
 
 	or	%r15d,%r14d			# Maj(a,b,c)=((a|c)&b)|(a&c)
 	lea	1(%rdi),%rdi	# round++
 
 	add	%r14d,%ecx			# h+=Maj(a,b,c)
 	mov	28(%rsp),%r13d
 	mov	16(%rsp),%r12d
 
 	mov	%r13d,%r15d
 
 	shr	$3,%r13d
 	ror	$7,%r15d
 
 	xor	%r15d,%r13d
 	ror	$11,%r15d
 
 	xor	%r15d,%r13d			# sigma0(X[(i+1)&0xf])
 	mov	%r12d,%r14d
 
 	shr	$10,%r12d
 	ror	$17,%r14d
 
 	xor	%r14d,%r12d
 	ror	$2,%r14d
 
 	xor	%r14d,%r12d			# sigma1(X[(i+14)&0xf])
 
 	add	%r13d,%r12d
 
 	add	60(%rsp),%r12d
 
 	add	24(%rsp),%r12d
 	mov	%r10d,%r13d
 	mov	%r10d,%r14d
 	mov	%r11d,%r15d
 
 	ror	$6,%r13d
 	ror	$11,%r14d
 	xor	%eax,%r15d			# f^g
 
 	xor	%r14d,%r13d
 	ror	$14,%r14d
 	and	%r10d,%r15d			# (f^g)&e
 	mov	%r12d,24(%rsp)
 
 	xor	%r14d,%r13d			# Sigma1(e)
 	xor	%eax,%r15d			# Ch(e,f,g)=((f^g)&e)^g
 	add	%ebx,%r12d			# T1+=h
 
 	mov	%ecx,%ebx
 	add	%r13d,%r12d			# T1+=Sigma1(e)
 
 	add	%r15d,%r12d			# T1+=Ch(e,f,g)
 	mov	%ecx,%r13d
 	mov	%ecx,%r14d
 
 	ror	$2,%ebx
 	ror	$13,%r13d
 	mov	%ecx,%r15d
 	add	(%rbp,%rdi,4),%r12d	# T1+=K[round]
 
 	xor	%r13d,%ebx
 	ror	$9,%r13d
 	or	%r8d,%r14d			# a|c
 
 	xor	%r13d,%ebx			# h=Sigma0(a)
 	and	%r8d,%r15d			# a&c
 	add	%r12d,%r9d			# d+=T1
 
 	and	%edx,%r14d			# (a|c)&b
 	add	%r12d,%ebx			# h+=T1
 
 	or	%r15d,%r14d			# Maj(a,b,c)=((a|c)&b)|(a&c)
 	lea	1(%rdi),%rdi	# round++
 
 	add	%r14d,%ebx			# h+=Maj(a,b,c)
 	mov	32(%rsp),%r13d
 	mov	20(%rsp),%r12d
 
 	mov	%r13d,%r15d
 
 	shr	$3,%r13d
 	ror	$7,%r15d
 
 	xor	%r15d,%r13d
 	ror	$11,%r15d
 
 	xor	%r15d,%r13d			# sigma0(X[(i+1)&0xf])
 	mov	%r12d,%r14d
 
 	shr	$10,%r12d
 	ror	$17,%r14d
 
 	xor	%r14d,%r12d
 	ror	$2,%r14d
 
 	xor	%r14d,%r12d			# sigma1(X[(i+14)&0xf])
 
 	add	%r13d,%r12d
 
 	add	0(%rsp),%r12d
 
 	add	28(%rsp),%r12d
 	mov	%r9d,%r13d
 	mov	%r9d,%r14d
 	mov	%r10d,%r15d
 
 	ror	$6,%r13d
 	ror	$11,%r14d
 	xor	%r11d,%r15d			# f^g
 
 	xor	%r14d,%r13d
 	ror	$14,%r14d
 	and	%r9d,%r15d			# (f^g)&e
 	mov	%r12d,28(%rsp)
 
 	xor	%r14d,%r13d			# Sigma1(e)
 	xor	%r11d,%r15d			# Ch(e,f,g)=((f^g)&e)^g
 	add	%eax,%r12d			# T1+=h
 
 	mov	%ebx,%eax
 	add	%r13d,%r12d			# T1+=Sigma1(e)
 
 	add	%r15d,%r12d			# T1+=Ch(e,f,g)
 	mov	%ebx,%r13d
 	mov	%ebx,%r14d
 
 	ror	$2,%eax
 	ror	$13,%r13d
 	mov	%ebx,%r15d
 	add	(%rbp,%rdi,4),%r12d	# T1+=K[round]
 
 	xor	%r13d,%eax
 	ror	$9,%r13d
 	or	%edx,%r14d			# a|c
 
 	xor	%r13d,%eax			# h=Sigma0(a)
 	and	%edx,%r15d			# a&c
 	add	%r12d,%r8d			# d+=T1
 
 	and	%ecx,%r14d			# (a|c)&b
 	add	%r12d,%eax			# h+=T1
 
 	or	%r15d,%r14d			# Maj(a,b,c)=((a|c)&b)|(a&c)
 	lea	1(%rdi),%rdi	# round++
 
 	add	%r14d,%eax			# h+=Maj(a,b,c)
 	mov	36(%rsp),%r13d
 	mov	24(%rsp),%r12d
 
 	mov	%r13d,%r15d
 
 	shr	$3,%r13d
 	ror	$7,%r15d
 
 	xor	%r15d,%r13d
 	ror	$11,%r15d
 
 	xor	%r15d,%r13d			# sigma0(X[(i+1)&0xf])
 	mov	%r12d,%r14d
 
 	shr	$10,%r12d
 	ror	$17,%r14d
 
 	xor	%r14d,%r12d
 	ror	$2,%r14d
 
 	xor	%r14d,%r12d			# sigma1(X[(i+14)&0xf])
 
 	add	%r13d,%r12d
 
 	add	4(%rsp),%r12d
 
 	add	32(%rsp),%r12d
 	mov	%r8d,%r13d
 	mov	%r8d,%r14d
 	mov	%r9d,%r15d
 
 	ror	$6,%r13d
 	ror	$11,%r14d
 	xor	%r10d,%r15d			# f^g
 
 	xor	%r14d,%r13d
 	ror	$14,%r14d
 	and	%r8d,%r15d			# (f^g)&e
 	mov	%r12d,32(%rsp)
 
 	xor	%r14d,%r13d			# Sigma1(e)
 	xor	%r10d,%r15d			# Ch(e,f,g)=((f^g)&e)^g
 	add	%r11d,%r12d			# T1+=h
 
 	mov	%eax,%r11d
 	add	%r13d,%r12d			# T1+=Sigma1(e)
 
 	add	%r15d,%r12d			# T1+=Ch(e,f,g)
 	mov	%eax,%r13d
 	mov	%eax,%r14d
 
 	ror	$2,%r11d
 	ror	$13,%r13d
 	mov	%eax,%r15d
 	add	(%rbp,%rdi,4),%r12d	# T1+=K[round]
 
 	xor	%r13d,%r11d
 	ror	$9,%r13d
 	or	%ecx,%r14d			# a|c
 
 	xor	%r13d,%r11d			# h=Sigma0(a)
 	and	%ecx,%r15d			# a&c
 	add	%r12d,%edx			# d+=T1
 
 	and	%ebx,%r14d			# (a|c)&b
 	add	%r12d,%r11d			# h+=T1
 
 	or	%r15d,%r14d			# Maj(a,b,c)=((a|c)&b)|(a&c)
 	lea	1(%rdi),%rdi	# round++
 
 	add	%r14d,%r11d			# h+=Maj(a,b,c)
 	mov	40(%rsp),%r13d
 	mov	28(%rsp),%r12d
 
 	mov	%r13d,%r15d
 
 	shr	$3,%r13d
 	ror	$7,%r15d
 
 	xor	%r15d,%r13d
 	ror	$11,%r15d
 
 	xor	%r15d,%r13d			# sigma0(X[(i+1)&0xf])
 	mov	%r12d,%r14d
 
 	shr	$10,%r12d
 	ror	$17,%r14d
 
 	xor	%r14d,%r12d
 	ror	$2,%r14d
 
 	xor	%r14d,%r12d			# sigma1(X[(i+14)&0xf])
 
 	add	%r13d,%r12d
 
 	add	8(%rsp),%r12d
 
 	add	36(%rsp),%r12d
 	mov	%edx,%r13d
 	mov	%edx,%r14d
 	mov	%r8d,%r15d
 
 	ror	$6,%r13d
 	ror	$11,%r14d
 	xor	%r9d,%r15d			# f^g
 
 	xor	%r14d,%r13d
 	ror	$14,%r14d
 	and	%edx,%r15d			# (f^g)&e
 	mov	%r12d,36(%rsp)
 
 	xor	%r14d,%r13d			# Sigma1(e)
 	xor	%r9d,%r15d			# Ch(e,f,g)=((f^g)&e)^g
 	add	%r10d,%r12d			# T1+=h
 
 	mov	%r11d,%r10d
 	add	%r13d,%r12d			# T1+=Sigma1(e)
 
 	add	%r15d,%r12d			# T1+=Ch(e,f,g)
 	mov	%r11d,%r13d
 	mov	%r11d,%r14d
 
 	ror	$2,%r10d
 	ror	$13,%r13d
 	mov	%r11d,%r15d
 	add	(%rbp,%rdi,4),%r12d	# T1+=K[round]
 
 	xor	%r13d,%r10d
 	ror	$9,%r13d
 	or	%ebx,%r14d			# a|c
 
 	xor	%r13d,%r10d			# h=Sigma0(a)
 	and	%ebx,%r15d			# a&c
 	add	%r12d,%ecx			# d+=T1
 
 	and	%eax,%r14d			# (a|c)&b
 	add	%r12d,%r10d			# h+=T1
 
 	or	%r15d,%r14d			# Maj(a,b,c)=((a|c)&b)|(a&c)
 	lea	1(%rdi),%rdi	# round++
 
 	add	%r14d,%r10d			# h+=Maj(a,b,c)
 	mov	44(%rsp),%r13d
 	mov	32(%rsp),%r12d
 
 	mov	%r13d,%r15d
 
 	shr	$3,%r13d
 	ror	$7,%r15d
 
 	xor	%r15d,%r13d
 	ror	$11,%r15d
 
 	xor	%r15d,%r13d			# sigma0(X[(i+1)&0xf])
 	mov	%r12d,%r14d
 
 	shr	$10,%r12d
 	ror	$17,%r14d
 
 	xor	%r14d,%r12d
 	ror	$2,%r14d
 
 	xor	%r14d,%r12d			# sigma1(X[(i+14)&0xf])
 
 	add	%r13d,%r12d
 
 	add	12(%rsp),%r12d
 
 	add	40(%rsp),%r12d
 	mov	%ecx,%r13d
 	mov	%ecx,%r14d
 	mov	%edx,%r15d
 
 	ror	$6,%r13d
 	ror	$11,%r14d
 	xor	%r8d,%r15d			# f^g
 
 	xor	%r14d,%r13d
 	ror	$14,%r14d
 	and	%ecx,%r15d			# (f^g)&e
 	mov	%r12d,40(%rsp)
 
 	xor	%r14d,%r13d			# Sigma1(e)
 	xor	%r8d,%r15d			# Ch(e,f,g)=((f^g)&e)^g
 	add	%r9d,%r12d			# T1+=h
 
 	mov	%r10d,%r9d
 	add	%r13d,%r12d			# T1+=Sigma1(e)
 
 	add	%r15d,%r12d			# T1+=Ch(e,f,g)
 	mov	%r10d,%r13d
 	mov	%r10d,%r14d
 
 	ror	$2,%r9d
 	ror	$13,%r13d
 	mov	%r10d,%r15d
 	add	(%rbp,%rdi,4),%r12d	# T1+=K[round]
 
 	xor	%r13d,%r9d
 	ror	$9,%r13d
 	or	%eax,%r14d			# a|c
 
 	xor	%r13d,%r9d			# h=Sigma0(a)
 	and	%eax,%r15d			# a&c
 	add	%r12d,%ebx			# d+=T1
 
 	and	%r11d,%r14d			# (a|c)&b
 	add	%r12d,%r9d			# h+=T1
 
 	or	%r15d,%r14d			# Maj(a,b,c)=((a|c)&b)|(a&c)
 	lea	1(%rdi),%rdi	# round++
 
 	add	%r14d,%r9d			# h+=Maj(a,b,c)
 	mov	48(%rsp),%r13d
 	mov	36(%rsp),%r12d
 
 	mov	%r13d,%r15d
 
 	shr	$3,%r13d
 	ror	$7,%r15d
 
 	xor	%r15d,%r13d
 	ror	$11,%r15d
 
 	xor	%r15d,%r13d			# sigma0(X[(i+1)&0xf])
 	mov	%r12d,%r14d
 
 	shr	$10,%r12d
 	ror	$17,%r14d
 
 	xor	%r14d,%r12d
 	ror	$2,%r14d
 
 	xor	%r14d,%r12d			# sigma1(X[(i+14)&0xf])
 
 	add	%r13d,%r12d
 
 	add	16(%rsp),%r12d
 
 	add	44(%rsp),%r12d
 	mov	%ebx,%r13d
 	mov	%ebx,%r14d
 	mov	%ecx,%r15d
 
 	ror	$6,%r13d
 	ror	$11,%r14d
 	xor	%edx,%r15d			# f^g
 
 	xor	%r14d,%r13d
 	ror	$14,%r14d
 	and	%ebx,%r15d			# (f^g)&e
 	mov	%r12d,44(%rsp)
 
 	xor	%r14d,%r13d			# Sigma1(e)
 	xor	%edx,%r15d			# Ch(e,f,g)=((f^g)&e)^g
 	add	%r8d,%r12d			# T1+=h
 
 	mov	%r9d,%r8d
 	add	%r13d,%r12d			# T1+=Sigma1(e)
 
 	add	%r15d,%r12d			# T1+=Ch(e,f,g)
 	mov	%r9d,%r13d
 	mov	%r9d,%r14d
 
 	ror	$2,%r8d
 	ror	$13,%r13d
 	mov	%r9d,%r15d
 	add	(%rbp,%rdi,4),%r12d	# T1+=K[round]
 
 	xor	%r13d,%r8d
 	ror	$9,%r13d
 	or	%r11d,%r14d			# a|c
 
 	xor	%r13d,%r8d			# h=Sigma0(a)
 	and	%r11d,%r15d			# a&c
 	add	%r12d,%eax			# d+=T1
 
 	and	%r10d,%r14d			# (a|c)&b
 	add	%r12d,%r8d			# h+=T1
 
 	or	%r15d,%r14d			# Maj(a,b,c)=((a|c)&b)|(a&c)
 	lea	1(%rdi),%rdi	# round++
 
 	add	%r14d,%r8d			# h+=Maj(a,b,c)
 	mov	52(%rsp),%r13d
 	mov	40(%rsp),%r12d
 
 	mov	%r13d,%r15d
 
 	shr	$3,%r13d
 	ror	$7,%r15d
 
 	xor	%r15d,%r13d
 	ror	$11,%r15d
 
 	xor	%r15d,%r13d			# sigma0(X[(i+1)&0xf])
 	mov	%r12d,%r14d
 
 	shr	$10,%r12d
 	ror	$17,%r14d
 
 	xor	%r14d,%r12d
 	ror	$2,%r14d
 
 	xor	%r14d,%r12d			# sigma1(X[(i+14)&0xf])
 
 	add	%r13d,%r12d
 
 	add	20(%rsp),%r12d
 
 	add	48(%rsp),%r12d
 	mov	%eax,%r13d
 	mov	%eax,%r14d
 	mov	%ebx,%r15d
 
 	ror	$6,%r13d
 	ror	$11,%r14d
 	xor	%ecx,%r15d			# f^g
 
 	xor	%r14d,%r13d
 	ror	$14,%r14d
 	and	%eax,%r15d			# (f^g)&e
 	mov	%r12d,48(%rsp)
 
 	xor	%r14d,%r13d			# Sigma1(e)
 	xor	%ecx,%r15d			# Ch(e,f,g)=((f^g)&e)^g
 	add	%edx,%r12d			# T1+=h
 
 	mov	%r8d,%edx
 	add	%r13d,%r12d			# T1+=Sigma1(e)
 
 	add	%r15d,%r12d			# T1+=Ch(e,f,g)
 	mov	%r8d,%r13d
 	mov	%r8d,%r14d
 
 	ror	$2,%edx
 	ror	$13,%r13d
 	mov	%r8d,%r15d
 	add	(%rbp,%rdi,4),%r12d	# T1+=K[round]
 
 	xor	%r13d,%edx
 	ror	$9,%r13d
 	or	%r10d,%r14d			# a|c
 
 	xor	%r13d,%edx			# h=Sigma0(a)
 	and	%r10d,%r15d			# a&c
 	add	%r12d,%r11d			# d+=T1
 
 	and	%r9d,%r14d			# (a|c)&b
 	add	%r12d,%edx			# h+=T1
 
 	or	%r15d,%r14d			# Maj(a,b,c)=((a|c)&b)|(a&c)
 	lea	1(%rdi),%rdi	# round++
 
 	add	%r14d,%edx			# h+=Maj(a,b,c)
 	mov	56(%rsp),%r13d
 	mov	44(%rsp),%r12d
 
 	mov	%r13d,%r15d
 
 	shr	$3,%r13d
 	ror	$7,%r15d
 
 	xor	%r15d,%r13d
 	ror	$11,%r15d
 
 	xor	%r15d,%r13d			# sigma0(X[(i+1)&0xf])
 	mov	%r12d,%r14d
 
 	shr	$10,%r12d
 	ror	$17,%r14d
 
 	xor	%r14d,%r12d
 	ror	$2,%r14d
 
 	xor	%r14d,%r12d			# sigma1(X[(i+14)&0xf])
 
 	add	%r13d,%r12d
 
 	add	24(%rsp),%r12d
 
 	add	52(%rsp),%r12d
 	mov	%r11d,%r13d
 	mov	%r11d,%r14d
 	mov	%eax,%r15d
 
 	ror	$6,%r13d
 	ror	$11,%r14d
 	xor	%ebx,%r15d			# f^g
 
 	xor	%r14d,%r13d
 	ror	$14,%r14d
 	and	%r11d,%r15d			# (f^g)&e
 	mov	%r12d,52(%rsp)
 
 	xor	%r14d,%r13d			# Sigma1(e)
 	xor	%ebx,%r15d			# Ch(e,f,g)=((f^g)&e)^g
 	add	%ecx,%r12d			# T1+=h
 
 	mov	%edx,%ecx
 	add	%r13d,%r12d			# T1+=Sigma1(e)
 
 	add	%r15d,%r12d			# T1+=Ch(e,f,g)
 	mov	%edx,%r13d
 	mov	%edx,%r14d
 
 	ror	$2,%ecx
 	ror	$13,%r13d
 	mov	%edx,%r15d
 	add	(%rbp,%rdi,4),%r12d	# T1+=K[round]
 
 	xor	%r13d,%ecx
 	ror	$9,%r13d
 	or	%r9d,%r14d			# a|c
 
 	xor	%r13d,%ecx			# h=Sigma0(a)
 	and	%r9d,%r15d			# a&c
 	add	%r12d,%r10d			# d+=T1
 
 	and	%r8d,%r14d			# (a|c)&b
 	add	%r12d,%ecx			# h+=T1
 
 	or	%r15d,%r14d			# Maj(a,b,c)=((a|c)&b)|(a&c)
 	lea	1(%rdi),%rdi	# round++
 
 	add	%r14d,%ecx			# h+=Maj(a,b,c)
 	mov	60(%rsp),%r13d
 	mov	48(%rsp),%r12d
 
 	mov	%r13d,%r15d
 
 	shr	$3,%r13d
 	ror	$7,%r15d
 
 	xor	%r15d,%r13d
 	ror	$11,%r15d
 
 	xor	%r15d,%r13d			# sigma0(X[(i+1)&0xf])
 	mov	%r12d,%r14d
 
 	shr	$10,%r12d
 	ror	$17,%r14d
 
 	xor	%r14d,%r12d
 	ror	$2,%r14d
 
 	xor	%r14d,%r12d			# sigma1(X[(i+14)&0xf])
 
 	add	%r13d,%r12d
 
 	add	28(%rsp),%r12d
 
 	add	56(%rsp),%r12d
 	mov	%r10d,%r13d
 	mov	%r10d,%r14d
 	mov	%r11d,%r15d
 
 	ror	$6,%r13d
 	ror	$11,%r14d
 	xor	%eax,%r15d			# f^g
 
 	xor	%r14d,%r13d
 	ror	$14,%r14d
 	and	%r10d,%r15d			# (f^g)&e
 	mov	%r12d,56(%rsp)
 
 	xor	%r14d,%r13d			# Sigma1(e)
 	xor	%eax,%r15d			# Ch(e,f,g)=((f^g)&e)^g
 	add	%ebx,%r12d			# T1+=h
 
 	mov	%ecx,%ebx
 	add	%r13d,%r12d			# T1+=Sigma1(e)
 
 	add	%r15d,%r12d			# T1+=Ch(e,f,g)
 	mov	%ecx,%r13d
 	mov	%ecx,%r14d
 
 	ror	$2,%ebx
 	ror	$13,%r13d
 	mov	%ecx,%r15d
 	add	(%rbp,%rdi,4),%r12d	# T1+=K[round]
 
 	xor	%r13d,%ebx
 	ror	$9,%r13d
 	or	%r8d,%r14d			# a|c
 
 	xor	%r13d,%ebx			# h=Sigma0(a)
 	and	%r8d,%r15d			# a&c
 	add	%r12d,%r9d			# d+=T1
 
 	and	%edx,%r14d			# (a|c)&b
 	add	%r12d,%ebx			# h+=T1
 
 	or	%r15d,%r14d			# Maj(a,b,c)=((a|c)&b)|(a&c)
 	lea	1(%rdi),%rdi	# round++
 
 	add	%r14d,%ebx			# h+=Maj(a,b,c)
 	mov	0(%rsp),%r13d
 	mov	52(%rsp),%r12d
 
 	mov	%r13d,%r15d
 
 	shr	$3,%r13d
 	ror	$7,%r15d
 
 	xor	%r15d,%r13d
 	ror	$11,%r15d
 
 	xor	%r15d,%r13d			# sigma0(X[(i+1)&0xf])
 	mov	%r12d,%r14d
 
 	shr	$10,%r12d
 	ror	$17,%r14d
 
 	xor	%r14d,%r12d
 	ror	$2,%r14d
 
 	xor	%r14d,%r12d			# sigma1(X[(i+14)&0xf])
 
 	add	%r13d,%r12d
 
 	add	32(%rsp),%r12d
 
 	add	60(%rsp),%r12d
 	mov	%r9d,%r13d
 	mov	%r9d,%r14d
 	mov	%r10d,%r15d
 
 	ror	$6,%r13d
 	ror	$11,%r14d
 	xor	%r11d,%r15d			# f^g
 
 	xor	%r14d,%r13d
 	ror	$14,%r14d
 	and	%r9d,%r15d			# (f^g)&e
 	mov	%r12d,60(%rsp)
 
 	xor	%r14d,%r13d			# Sigma1(e)
 	xor	%r11d,%r15d			# Ch(e,f,g)=((f^g)&e)^g
 	add	%eax,%r12d			# T1+=h
 
 	mov	%ebx,%eax
 	add	%r13d,%r12d			# T1+=Sigma1(e)
 
 	add	%r15d,%r12d			# T1+=Ch(e,f,g)
 	mov	%ebx,%r13d
 	mov	%ebx,%r14d
 
 	ror	$2,%eax
 	ror	$13,%r13d
 	mov	%ebx,%r15d
 	add	(%rbp,%rdi,4),%r12d	# T1+=K[round]
 
 	xor	%r13d,%eax
 	ror	$9,%r13d
 	or	%edx,%r14d			# a|c
 
 	xor	%r13d,%eax			# h=Sigma0(a)
 	and	%edx,%r15d			# a&c
 	add	%r12d,%r8d			# d+=T1
 
 	and	%ecx,%r14d			# (a|c)&b
 	add	%r12d,%eax			# h+=T1
 
 	or	%r15d,%r14d			# Maj(a,b,c)=((a|c)&b)|(a&c)
 	lea	1(%rdi),%rdi	# round++
 
 	add	%r14d,%eax			# h+=Maj(a,b,c)
 	cmp	$64,%rdi
 	jb	.Lrounds_16_xx
 
 	mov	16*4+0*8(%rsp),%rdi
 	lea	16*4(%rsi),%rsi
 
 	add	4*0(%rdi),%eax
 	add	4*1(%rdi),%ebx
 	add	4*2(%rdi),%ecx
 	add	4*3(%rdi),%edx
 	add	4*4(%rdi),%r8d
 	add	4*5(%rdi),%r9d
 	add	4*6(%rdi),%r10d
 	add	4*7(%rdi),%r11d
 
 	cmp	16*4+2*8(%rsp),%rsi
 
 	mov	%eax,4*0(%rdi)
 	mov	%ebx,4*1(%rdi)
 	mov	%ecx,4*2(%rdi)
 	mov	%edx,4*3(%rdi)
 	mov	%r8d,4*4(%rdi)
 	mov	%r9d,4*5(%rdi)
 	mov	%r10d,4*6(%rdi)
 	mov	%r11d,4*7(%rdi)
 	jb	.Lloop
 
 	mov	16*4+3*8(%rsp),%rsp
 .cfi_def_cfa	%rsp,56
 	pop	%r15
 .cfi_adjust_cfa_offset -8
 .cfi_restore	%r15
 	pop	%r14
 .cfi_adjust_cfa_offset -8
 .cfi_restore	%r14
 	pop	%r13
 .cfi_adjust_cfa_offset -8
 .cfi_restore	%r13
 	pop	%r12
 .cfi_adjust_cfa_offset -8
 .cfi_restore	%r12
 	pop	%rbp
 .cfi_adjust_cfa_offset -8
 .cfi_restore	%rbp
 	pop	%rbx
 .cfi_adjust_cfa_offset -8
 .cfi_restore	%rbx
 
 	ret
 .cfi_endproc
 SET_SIZE(SHA256TransformBlocks)
 
 .data
 .align	64
 .type	K256,@object
 K256:
 	.long	0x428a2f98,0x71374491,0xb5c0fbcf,0xe9b5dba5
 	.long	0x3956c25b,0x59f111f1,0x923f82a4,0xab1c5ed5
 	.long	0xd807aa98,0x12835b01,0x243185be,0x550c7dc3
 	.long	0x72be5d74,0x80deb1fe,0x9bdc06a7,0xc19bf174
 	.long	0xe49b69c1,0xefbe4786,0x0fc19dc6,0x240ca1cc
 	.long	0x2de92c6f,0x4a7484aa,0x5cb0a9dc,0x76f988da
 	.long	0x983e5152,0xa831c66d,0xb00327c8,0xbf597fc7
 	.long	0xc6e00bf3,0xd5a79147,0x06ca6351,0x14292967
 	.long	0x27b70a85,0x2e1b2138,0x4d2c6dfc,0x53380d13
 	.long	0x650a7354,0x766a0abb,0x81c2c92e,0x92722c85
 	.long	0xa2bfe8a1,0xa81a664b,0xc24b8b70,0xc76c51a3
 	.long	0xd192e819,0xd6990624,0xf40e3585,0x106aa070
 	.long	0x19a4c116,0x1e376c08,0x2748774c,0x34b0bcb5
 	.long	0x391c0cb3,0x4ed8aa4a,0x5b9cca4f,0x682e6ff3
 	.long	0x748f82ee,0x78a5636f,0x84c87814,0x8cc70208
 	.long	0x90befffa,0xa4506ceb,0xbef9a3f7,0xc67178f2
 #endif /* !lint && !__lint */
 
 #ifdef __ELF__
 .section .note.GNU-stack,"",%progbits
 #endif
diff --git a/module/icp/asm-x86_64/sha2/sha512_impl.S b/module/icp/asm-x86_64/sha2/sha512_impl.S
index 746c85a98566..c6e7efd86038 100644
--- a/module/icp/asm-x86_64/sha2/sha512_impl.S
+++ b/module/icp/asm-x86_64/sha2/sha512_impl.S
@@ -1,2114 +1,2114 @@
 /*
  * ====================================================================
  * Written by Andy Polyakov <appro@fy.chalmers.se> for the OpenSSL
  * project. Rights for redistribution and usage in source and binary
  * forms are granted according to the OpenSSL license.
  * ====================================================================
  *
  * sha256/512_block procedure for x86_64.
  *
  * 40% improvement over compiler-generated code on Opteron. On EM64T
  * sha256 was observed to run >80% faster and sha512 - >40%. No magical
  * tricks, just straight implementation... I really wonder why gcc
  * [being armed with inline assembler] fails to generate as fast code.
  * The only thing which is cool about this module is that it's very
  * same instruction sequence used for both SHA-256 and SHA-512. In
  * former case the instructions operate on 32-bit operands, while in
  * latter - on 64-bit ones. All I had to do is to get one flavor right,
  * the other one passed the test right away:-)
  *
  * sha256_block runs in ~1005 cycles on Opteron, which gives you
  * asymptotic performance of 64*1000/1005=63.7MBps times CPU clock
  * frequency in GHz. sha512_block runs in ~1275 cycles, which results
  * in 128*1000/1275=100MBps per GHz. Is there room for improvement?
  * Well, if you compare it to IA-64 implementation, which maintains
  * X[16] in register bank[!], tends to 4 instructions per CPU clock
  * cycle and runs in 1003 cycles, 1275 is very good result for 3-way
  * issue Opteron pipeline and X[16] maintained in memory. So that *if*
  * there is a way to improve it, *then* the only way would be to try to
  * offload X[16] updates to SSE unit, but that would require "deeper"
  * loop unroll, which in turn would naturally cause size blow-up, not
  * to mention increased complexity! And once again, only *if* it's
  * actually possible to noticeably improve overall ILP, instruction
  * level parallelism, on a given CPU implementation in this case.
  *
  * Special note on Intel EM64T. While Opteron CPU exhibits perfect
  * performance ratio of 1.5 between 64- and 32-bit flavors [see above],
  * [currently available] EM64T CPUs apparently are far from it. On the
  * contrary, 64-bit version, sha512_block, is ~30% *slower* than 32-bit
  * sha256_block:-( This is presumably because 64-bit shifts/rotates
  * apparently are not atomic instructions, but implemented in microcode.
  */
 
 /*
  * OpenSolaris OS modifications
  *
  * Sun elects to use this software under the BSD license.
  *
  * This source originates from OpenSSL file sha512-x86_64.pl at
  * ftp://ftp.openssl.org/snapshot/openssl-0.9.8-stable-SNAP-20080131.tar.gz
  * (presumably for future OpenSSL release 0.9.8h), with these changes:
  *
  * 1. Added perl "use strict" and declared variables.
  *
  * 2. Added OpenSolaris ENTRY_NP/SET_SIZE macros from
  * /usr/include/sys/asm_linkage.h, .ident keywords, and lint(1B) guards.
  *
  * 3. Removed x86_64-xlate.pl script (not needed for as(1) or gas(1)
  * assemblers).  Replaced the .picmeup macro with assembler code.
  *
  * 4. Added 8 to $ctx, as OpenSolaris OS has an extra 4-byte field, "algotype",
  * at the beginning of SHA2_CTX (the next field is 8-byte aligned).
  */
 
 /*
  * This file was generated by a perl script (sha512-x86_64.pl) that were
  * used to generate sha256 and sha512 variants from the same code base.
  * The comments from the original file have been pasted above.
  */
 
 
 #if defined(lint) || defined(__lint)
 #include <sys/stdint.h>
 #include <sha2/sha2.h>
 
-/* ARGSUSED */
 void
 SHA512TransformBlocks(SHA2_CTX *ctx, const void *in, size_t num)
 {
+	(void) ctx, (void) in, (void) num;
 }
 
 
 #else
 #define _ASM
 #include <sys/asm_linkage.h>
 
 ENTRY_NP(SHA512TransformBlocks)
 .cfi_startproc
 	movq	%rsp, %rax
 .cfi_def_cfa_register %rax
 	push	%rbx
 .cfi_offset	%rbx,-16
 	push	%rbp
 .cfi_offset	%rbp,-24
 	push	%r12
 .cfi_offset	%r12,-32
 	push	%r13
 .cfi_offset	%r13,-40
 	push	%r14
 .cfi_offset	%r14,-48
 	push	%r15
 .cfi_offset	%r15,-56
 	mov	%rsp,%rbp		# copy %rsp
 	shl	$4,%rdx		# num*16
 	sub	$16*8+4*8,%rsp
 	lea	(%rsi,%rdx,8),%rdx	# inp+num*16*8
 	and	$-64,%rsp		# align stack frame
 	add	$8,%rdi		# Skip OpenSolaris field, "algotype"
 	mov	%rdi,16*8+0*8(%rsp)		# save ctx, 1st arg
 	mov	%rsi,16*8+1*8(%rsp)		# save inp, 2nd arg
 	mov	%rdx,16*8+2*8(%rsp)		# save end pointer, "3rd" arg
 	mov	%rbp,16*8+3*8(%rsp)		# save copy of %rsp
 # echo ".cfi_cfa_expression %rsp+152,deref,+56" |
 #	openssl/crypto/perlasm/x86_64-xlate.pl
 .cfi_escape	0x0f,0x06,0x77,0x98,0x01,0x06,0x23,0x38
 
 	#.picmeup %rbp
 	# The .picmeup pseudo-directive, from perlasm/x86_64_xlate.pl, puts
 	# the address of the "next" instruction into the target register
 	# (%rbp).  This generates these 2 instructions:
 	lea	.Llea(%rip),%rbp
 	#nop	# .picmeup generates a nop for mod 8 alignment--not needed here
 
 .Llea:
 	lea	K512-.(%rbp),%rbp
 
 	mov	8*0(%rdi),%rax
 	mov	8*1(%rdi),%rbx
 	mov	8*2(%rdi),%rcx
 	mov	8*3(%rdi),%rdx
 	mov	8*4(%rdi),%r8
 	mov	8*5(%rdi),%r9
 	mov	8*6(%rdi),%r10
 	mov	8*7(%rdi),%r11
 	jmp	.Lloop
 
 .align	16
 .Lloop:
 	xor	%rdi,%rdi
 	mov	8*0(%rsi),%r12
 	bswap	%r12
 	mov	%r8,%r13
 	mov	%r8,%r14
 	mov	%r9,%r15
 
 	ror	$14,%r13
 	ror	$18,%r14
 	xor	%r10,%r15			# f^g
 
 	xor	%r14,%r13
 	ror	$23,%r14
 	and	%r8,%r15			# (f^g)&e
 	mov	%r12,0(%rsp)
 
 	xor	%r14,%r13			# Sigma1(e)
 	xor	%r10,%r15			# Ch(e,f,g)=((f^g)&e)^g
 	add	%r11,%r12			# T1+=h
 
 	mov	%rax,%r11
 	add	%r13,%r12			# T1+=Sigma1(e)
 
 	add	%r15,%r12			# T1+=Ch(e,f,g)
 	mov	%rax,%r13
 	mov	%rax,%r14
 
 	ror	$28,%r11
 	ror	$34,%r13
 	mov	%rax,%r15
 	add	(%rbp,%rdi,8),%r12	# T1+=K[round]
 
 	xor	%r13,%r11
 	ror	$5,%r13
 	or	%rcx,%r14			# a|c
 
 	xor	%r13,%r11			# h=Sigma0(a)
 	and	%rcx,%r15			# a&c
 	add	%r12,%rdx			# d+=T1
 
 	and	%rbx,%r14			# (a|c)&b
 	add	%r12,%r11			# h+=T1
 
 	or	%r15,%r14			# Maj(a,b,c)=((a|c)&b)|(a&c)
 	lea	1(%rdi),%rdi	# round++
 
 	add	%r14,%r11			# h+=Maj(a,b,c)
 	mov	8*1(%rsi),%r12
 	bswap	%r12
 	mov	%rdx,%r13
 	mov	%rdx,%r14
 	mov	%r8,%r15
 
 	ror	$14,%r13
 	ror	$18,%r14
 	xor	%r9,%r15			# f^g
 
 	xor	%r14,%r13
 	ror	$23,%r14
 	and	%rdx,%r15			# (f^g)&e
 	mov	%r12,8(%rsp)
 
 	xor	%r14,%r13			# Sigma1(e)
 	xor	%r9,%r15			# Ch(e,f,g)=((f^g)&e)^g
 	add	%r10,%r12			# T1+=h
 
 	mov	%r11,%r10
 	add	%r13,%r12			# T1+=Sigma1(e)
 
 	add	%r15,%r12			# T1+=Ch(e,f,g)
 	mov	%r11,%r13
 	mov	%r11,%r14
 
 	ror	$28,%r10
 	ror	$34,%r13
 	mov	%r11,%r15
 	add	(%rbp,%rdi,8),%r12	# T1+=K[round]
 
 	xor	%r13,%r10
 	ror	$5,%r13
 	or	%rbx,%r14			# a|c
 
 	xor	%r13,%r10			# h=Sigma0(a)
 	and	%rbx,%r15			# a&c
 	add	%r12,%rcx			# d+=T1
 
 	and	%rax,%r14			# (a|c)&b
 	add	%r12,%r10			# h+=T1
 
 	or	%r15,%r14			# Maj(a,b,c)=((a|c)&b)|(a&c)
 	lea	1(%rdi),%rdi	# round++
 
 	add	%r14,%r10			# h+=Maj(a,b,c)
 	mov	8*2(%rsi),%r12
 	bswap	%r12
 	mov	%rcx,%r13
 	mov	%rcx,%r14
 	mov	%rdx,%r15
 
 	ror	$14,%r13
 	ror	$18,%r14
 	xor	%r8,%r15			# f^g
 
 	xor	%r14,%r13
 	ror	$23,%r14
 	and	%rcx,%r15			# (f^g)&e
 	mov	%r12,16(%rsp)
 
 	xor	%r14,%r13			# Sigma1(e)
 	xor	%r8,%r15			# Ch(e,f,g)=((f^g)&e)^g
 	add	%r9,%r12			# T1+=h
 
 	mov	%r10,%r9
 	add	%r13,%r12			# T1+=Sigma1(e)
 
 	add	%r15,%r12			# T1+=Ch(e,f,g)
 	mov	%r10,%r13
 	mov	%r10,%r14
 
 	ror	$28,%r9
 	ror	$34,%r13
 	mov	%r10,%r15
 	add	(%rbp,%rdi,8),%r12	# T1+=K[round]
 
 	xor	%r13,%r9
 	ror	$5,%r13
 	or	%rax,%r14			# a|c
 
 	xor	%r13,%r9			# h=Sigma0(a)
 	and	%rax,%r15			# a&c
 	add	%r12,%rbx			# d+=T1
 
 	and	%r11,%r14			# (a|c)&b
 	add	%r12,%r9			# h+=T1
 
 	or	%r15,%r14			# Maj(a,b,c)=((a|c)&b)|(a&c)
 	lea	1(%rdi),%rdi	# round++
 
 	add	%r14,%r9			# h+=Maj(a,b,c)
 	mov	8*3(%rsi),%r12
 	bswap	%r12
 	mov	%rbx,%r13
 	mov	%rbx,%r14
 	mov	%rcx,%r15
 
 	ror	$14,%r13
 	ror	$18,%r14
 	xor	%rdx,%r15			# f^g
 
 	xor	%r14,%r13
 	ror	$23,%r14
 	and	%rbx,%r15			# (f^g)&e
 	mov	%r12,24(%rsp)
 
 	xor	%r14,%r13			# Sigma1(e)
 	xor	%rdx,%r15			# Ch(e,f,g)=((f^g)&e)^g
 	add	%r8,%r12			# T1+=h
 
 	mov	%r9,%r8
 	add	%r13,%r12			# T1+=Sigma1(e)
 
 	add	%r15,%r12			# T1+=Ch(e,f,g)
 	mov	%r9,%r13
 	mov	%r9,%r14
 
 	ror	$28,%r8
 	ror	$34,%r13
 	mov	%r9,%r15
 	add	(%rbp,%rdi,8),%r12	# T1+=K[round]
 
 	xor	%r13,%r8
 	ror	$5,%r13
 	or	%r11,%r14			# a|c
 
 	xor	%r13,%r8			# h=Sigma0(a)
 	and	%r11,%r15			# a&c
 	add	%r12,%rax			# d+=T1
 
 	and	%r10,%r14			# (a|c)&b
 	add	%r12,%r8			# h+=T1
 
 	or	%r15,%r14			# Maj(a,b,c)=((a|c)&b)|(a&c)
 	lea	1(%rdi),%rdi	# round++
 
 	add	%r14,%r8			# h+=Maj(a,b,c)
 	mov	8*4(%rsi),%r12
 	bswap	%r12
 	mov	%rax,%r13
 	mov	%rax,%r14
 	mov	%rbx,%r15
 
 	ror	$14,%r13
 	ror	$18,%r14
 	xor	%rcx,%r15			# f^g
 
 	xor	%r14,%r13
 	ror	$23,%r14
 	and	%rax,%r15			# (f^g)&e
 	mov	%r12,32(%rsp)
 
 	xor	%r14,%r13			# Sigma1(e)
 	xor	%rcx,%r15			# Ch(e,f,g)=((f^g)&e)^g
 	add	%rdx,%r12			# T1+=h
 
 	mov	%r8,%rdx
 	add	%r13,%r12			# T1+=Sigma1(e)
 
 	add	%r15,%r12			# T1+=Ch(e,f,g)
 	mov	%r8,%r13
 	mov	%r8,%r14
 
 	ror	$28,%rdx
 	ror	$34,%r13
 	mov	%r8,%r15
 	add	(%rbp,%rdi,8),%r12	# T1+=K[round]
 
 	xor	%r13,%rdx
 	ror	$5,%r13
 	or	%r10,%r14			# a|c
 
 	xor	%r13,%rdx			# h=Sigma0(a)
 	and	%r10,%r15			# a&c
 	add	%r12,%r11			# d+=T1
 
 	and	%r9,%r14			# (a|c)&b
 	add	%r12,%rdx			# h+=T1
 
 	or	%r15,%r14			# Maj(a,b,c)=((a|c)&b)|(a&c)
 	lea	1(%rdi),%rdi	# round++
 
 	add	%r14,%rdx			# h+=Maj(a,b,c)
 	mov	8*5(%rsi),%r12
 	bswap	%r12
 	mov	%r11,%r13
 	mov	%r11,%r14
 	mov	%rax,%r15
 
 	ror	$14,%r13
 	ror	$18,%r14
 	xor	%rbx,%r15			# f^g
 
 	xor	%r14,%r13
 	ror	$23,%r14
 	and	%r11,%r15			# (f^g)&e
 	mov	%r12,40(%rsp)
 
 	xor	%r14,%r13			# Sigma1(e)
 	xor	%rbx,%r15			# Ch(e,f,g)=((f^g)&e)^g
 	add	%rcx,%r12			# T1+=h
 
 	mov	%rdx,%rcx
 	add	%r13,%r12			# T1+=Sigma1(e)
 
 	add	%r15,%r12			# T1+=Ch(e,f,g)
 	mov	%rdx,%r13
 	mov	%rdx,%r14
 
 	ror	$28,%rcx
 	ror	$34,%r13
 	mov	%rdx,%r15
 	add	(%rbp,%rdi,8),%r12	# T1+=K[round]
 
 	xor	%r13,%rcx
 	ror	$5,%r13
 	or	%r9,%r14			# a|c
 
 	xor	%r13,%rcx			# h=Sigma0(a)
 	and	%r9,%r15			# a&c
 	add	%r12,%r10			# d+=T1
 
 	and	%r8,%r14			# (a|c)&b
 	add	%r12,%rcx			# h+=T1
 
 	or	%r15,%r14			# Maj(a,b,c)=((a|c)&b)|(a&c)
 	lea	1(%rdi),%rdi	# round++
 
 	add	%r14,%rcx			# h+=Maj(a,b,c)
 	mov	8*6(%rsi),%r12
 	bswap	%r12
 	mov	%r10,%r13
 	mov	%r10,%r14
 	mov	%r11,%r15
 
 	ror	$14,%r13
 	ror	$18,%r14
 	xor	%rax,%r15			# f^g
 
 	xor	%r14,%r13
 	ror	$23,%r14
 	and	%r10,%r15			# (f^g)&e
 	mov	%r12,48(%rsp)
 
 	xor	%r14,%r13			# Sigma1(e)
 	xor	%rax,%r15			# Ch(e,f,g)=((f^g)&e)^g
 	add	%rbx,%r12			# T1+=h
 
 	mov	%rcx,%rbx
 	add	%r13,%r12			# T1+=Sigma1(e)
 
 	add	%r15,%r12			# T1+=Ch(e,f,g)
 	mov	%rcx,%r13
 	mov	%rcx,%r14
 
 	ror	$28,%rbx
 	ror	$34,%r13
 	mov	%rcx,%r15
 	add	(%rbp,%rdi,8),%r12	# T1+=K[round]
 
 	xor	%r13,%rbx
 	ror	$5,%r13
 	or	%r8,%r14			# a|c
 
 	xor	%r13,%rbx			# h=Sigma0(a)
 	and	%r8,%r15			# a&c
 	add	%r12,%r9			# d+=T1
 
 	and	%rdx,%r14			# (a|c)&b
 	add	%r12,%rbx			# h+=T1
 
 	or	%r15,%r14			# Maj(a,b,c)=((a|c)&b)|(a&c)
 	lea	1(%rdi),%rdi	# round++
 
 	add	%r14,%rbx			# h+=Maj(a,b,c)
 	mov	8*7(%rsi),%r12
 	bswap	%r12
 	mov	%r9,%r13
 	mov	%r9,%r14
 	mov	%r10,%r15
 
 	ror	$14,%r13
 	ror	$18,%r14
 	xor	%r11,%r15			# f^g
 
 	xor	%r14,%r13
 	ror	$23,%r14
 	and	%r9,%r15			# (f^g)&e
 	mov	%r12,56(%rsp)
 
 	xor	%r14,%r13			# Sigma1(e)
 	xor	%r11,%r15			# Ch(e,f,g)=((f^g)&e)^g
 	add	%rax,%r12			# T1+=h
 
 	mov	%rbx,%rax
 	add	%r13,%r12			# T1+=Sigma1(e)
 
 	add	%r15,%r12			# T1+=Ch(e,f,g)
 	mov	%rbx,%r13
 	mov	%rbx,%r14
 
 	ror	$28,%rax
 	ror	$34,%r13
 	mov	%rbx,%r15
 	add	(%rbp,%rdi,8),%r12	# T1+=K[round]
 
 	xor	%r13,%rax
 	ror	$5,%r13
 	or	%rdx,%r14			# a|c
 
 	xor	%r13,%rax			# h=Sigma0(a)
 	and	%rdx,%r15			# a&c
 	add	%r12,%r8			# d+=T1
 
 	and	%rcx,%r14			# (a|c)&b
 	add	%r12,%rax			# h+=T1
 
 	or	%r15,%r14			# Maj(a,b,c)=((a|c)&b)|(a&c)
 	lea	1(%rdi),%rdi	# round++
 
 	add	%r14,%rax			# h+=Maj(a,b,c)
 	mov	8*8(%rsi),%r12
 	bswap	%r12
 	mov	%r8,%r13
 	mov	%r8,%r14
 	mov	%r9,%r15
 
 	ror	$14,%r13
 	ror	$18,%r14
 	xor	%r10,%r15			# f^g
 
 	xor	%r14,%r13
 	ror	$23,%r14
 	and	%r8,%r15			# (f^g)&e
 	mov	%r12,64(%rsp)
 
 	xor	%r14,%r13			# Sigma1(e)
 	xor	%r10,%r15			# Ch(e,f,g)=((f^g)&e)^g
 	add	%r11,%r12			# T1+=h
 
 	mov	%rax,%r11
 	add	%r13,%r12			# T1+=Sigma1(e)
 
 	add	%r15,%r12			# T1+=Ch(e,f,g)
 	mov	%rax,%r13
 	mov	%rax,%r14
 
 	ror	$28,%r11
 	ror	$34,%r13
 	mov	%rax,%r15
 	add	(%rbp,%rdi,8),%r12	# T1+=K[round]
 
 	xor	%r13,%r11
 	ror	$5,%r13
 	or	%rcx,%r14			# a|c
 
 	xor	%r13,%r11			# h=Sigma0(a)
 	and	%rcx,%r15			# a&c
 	add	%r12,%rdx			# d+=T1
 
 	and	%rbx,%r14			# (a|c)&b
 	add	%r12,%r11			# h+=T1
 
 	or	%r15,%r14			# Maj(a,b,c)=((a|c)&b)|(a&c)
 	lea	1(%rdi),%rdi	# round++
 
 	add	%r14,%r11			# h+=Maj(a,b,c)
 	mov	8*9(%rsi),%r12
 	bswap	%r12
 	mov	%rdx,%r13
 	mov	%rdx,%r14
 	mov	%r8,%r15
 
 	ror	$14,%r13
 	ror	$18,%r14
 	xor	%r9,%r15			# f^g
 
 	xor	%r14,%r13
 	ror	$23,%r14
 	and	%rdx,%r15			# (f^g)&e
 	mov	%r12,72(%rsp)
 
 	xor	%r14,%r13			# Sigma1(e)
 	xor	%r9,%r15			# Ch(e,f,g)=((f^g)&e)^g
 	add	%r10,%r12			# T1+=h
 
 	mov	%r11,%r10
 	add	%r13,%r12			# T1+=Sigma1(e)
 
 	add	%r15,%r12			# T1+=Ch(e,f,g)
 	mov	%r11,%r13
 	mov	%r11,%r14
 
 	ror	$28,%r10
 	ror	$34,%r13
 	mov	%r11,%r15
 	add	(%rbp,%rdi,8),%r12	# T1+=K[round]
 
 	xor	%r13,%r10
 	ror	$5,%r13
 	or	%rbx,%r14			# a|c
 
 	xor	%r13,%r10			# h=Sigma0(a)
 	and	%rbx,%r15			# a&c
 	add	%r12,%rcx			# d+=T1
 
 	and	%rax,%r14			# (a|c)&b
 	add	%r12,%r10			# h+=T1
 
 	or	%r15,%r14			# Maj(a,b,c)=((a|c)&b)|(a&c)
 	lea	1(%rdi),%rdi	# round++
 
 	add	%r14,%r10			# h+=Maj(a,b,c)
 	mov	8*10(%rsi),%r12
 	bswap	%r12
 	mov	%rcx,%r13
 	mov	%rcx,%r14
 	mov	%rdx,%r15
 
 	ror	$14,%r13
 	ror	$18,%r14
 	xor	%r8,%r15			# f^g
 
 	xor	%r14,%r13
 	ror	$23,%r14
 	and	%rcx,%r15			# (f^g)&e
 	mov	%r12,80(%rsp)
 
 	xor	%r14,%r13			# Sigma1(e)
 	xor	%r8,%r15			# Ch(e,f,g)=((f^g)&e)^g
 	add	%r9,%r12			# T1+=h
 
 	mov	%r10,%r9
 	add	%r13,%r12			# T1+=Sigma1(e)
 
 	add	%r15,%r12			# T1+=Ch(e,f,g)
 	mov	%r10,%r13
 	mov	%r10,%r14
 
 	ror	$28,%r9
 	ror	$34,%r13
 	mov	%r10,%r15
 	add	(%rbp,%rdi,8),%r12	# T1+=K[round]
 
 	xor	%r13,%r9
 	ror	$5,%r13
 	or	%rax,%r14			# a|c
 
 	xor	%r13,%r9			# h=Sigma0(a)
 	and	%rax,%r15			# a&c
 	add	%r12,%rbx			# d+=T1
 
 	and	%r11,%r14			# (a|c)&b
 	add	%r12,%r9			# h+=T1
 
 	or	%r15,%r14			# Maj(a,b,c)=((a|c)&b)|(a&c)
 	lea	1(%rdi),%rdi	# round++
 
 	add	%r14,%r9			# h+=Maj(a,b,c)
 	mov	8*11(%rsi),%r12
 	bswap	%r12
 	mov	%rbx,%r13
 	mov	%rbx,%r14
 	mov	%rcx,%r15
 
 	ror	$14,%r13
 	ror	$18,%r14
 	xor	%rdx,%r15			# f^g
 
 	xor	%r14,%r13
 	ror	$23,%r14
 	and	%rbx,%r15			# (f^g)&e
 	mov	%r12,88(%rsp)
 
 	xor	%r14,%r13			# Sigma1(e)
 	xor	%rdx,%r15			# Ch(e,f,g)=((f^g)&e)^g
 	add	%r8,%r12			# T1+=h
 
 	mov	%r9,%r8
 	add	%r13,%r12			# T1+=Sigma1(e)
 
 	add	%r15,%r12			# T1+=Ch(e,f,g)
 	mov	%r9,%r13
 	mov	%r9,%r14
 
 	ror	$28,%r8
 	ror	$34,%r13
 	mov	%r9,%r15
 	add	(%rbp,%rdi,8),%r12	# T1+=K[round]
 
 	xor	%r13,%r8
 	ror	$5,%r13
 	or	%r11,%r14			# a|c
 
 	xor	%r13,%r8			# h=Sigma0(a)
 	and	%r11,%r15			# a&c
 	add	%r12,%rax			# d+=T1
 
 	and	%r10,%r14			# (a|c)&b
 	add	%r12,%r8			# h+=T1
 
 	or	%r15,%r14			# Maj(a,b,c)=((a|c)&b)|(a&c)
 	lea	1(%rdi),%rdi	# round++
 
 	add	%r14,%r8			# h+=Maj(a,b,c)
 	mov	8*12(%rsi),%r12
 	bswap	%r12
 	mov	%rax,%r13
 	mov	%rax,%r14
 	mov	%rbx,%r15
 
 	ror	$14,%r13
 	ror	$18,%r14
 	xor	%rcx,%r15			# f^g
 
 	xor	%r14,%r13
 	ror	$23,%r14
 	and	%rax,%r15			# (f^g)&e
 	mov	%r12,96(%rsp)
 
 	xor	%r14,%r13			# Sigma1(e)
 	xor	%rcx,%r15			# Ch(e,f,g)=((f^g)&e)^g
 	add	%rdx,%r12			# T1+=h
 
 	mov	%r8,%rdx
 	add	%r13,%r12			# T1+=Sigma1(e)
 
 	add	%r15,%r12			# T1+=Ch(e,f,g)
 	mov	%r8,%r13
 	mov	%r8,%r14
 
 	ror	$28,%rdx
 	ror	$34,%r13
 	mov	%r8,%r15
 	add	(%rbp,%rdi,8),%r12	# T1+=K[round]
 
 	xor	%r13,%rdx
 	ror	$5,%r13
 	or	%r10,%r14			# a|c
 
 	xor	%r13,%rdx			# h=Sigma0(a)
 	and	%r10,%r15			# a&c
 	add	%r12,%r11			# d+=T1
 
 	and	%r9,%r14			# (a|c)&b
 	add	%r12,%rdx			# h+=T1
 
 	or	%r15,%r14			# Maj(a,b,c)=((a|c)&b)|(a&c)
 	lea	1(%rdi),%rdi	# round++
 
 	add	%r14,%rdx			# h+=Maj(a,b,c)
 	mov	8*13(%rsi),%r12
 	bswap	%r12
 	mov	%r11,%r13
 	mov	%r11,%r14
 	mov	%rax,%r15
 
 	ror	$14,%r13
 	ror	$18,%r14
 	xor	%rbx,%r15			# f^g
 
 	xor	%r14,%r13
 	ror	$23,%r14
 	and	%r11,%r15			# (f^g)&e
 	mov	%r12,104(%rsp)
 
 	xor	%r14,%r13			# Sigma1(e)
 	xor	%rbx,%r15			# Ch(e,f,g)=((f^g)&e)^g
 	add	%rcx,%r12			# T1+=h
 
 	mov	%rdx,%rcx
 	add	%r13,%r12			# T1+=Sigma1(e)
 
 	add	%r15,%r12			# T1+=Ch(e,f,g)
 	mov	%rdx,%r13
 	mov	%rdx,%r14
 
 	ror	$28,%rcx
 	ror	$34,%r13
 	mov	%rdx,%r15
 	add	(%rbp,%rdi,8),%r12	# T1+=K[round]
 
 	xor	%r13,%rcx
 	ror	$5,%r13
 	or	%r9,%r14			# a|c
 
 	xor	%r13,%rcx			# h=Sigma0(a)
 	and	%r9,%r15			# a&c
 	add	%r12,%r10			# d+=T1
 
 	and	%r8,%r14			# (a|c)&b
 	add	%r12,%rcx			# h+=T1
 
 	or	%r15,%r14			# Maj(a,b,c)=((a|c)&b)|(a&c)
 	lea	1(%rdi),%rdi	# round++
 
 	add	%r14,%rcx			# h+=Maj(a,b,c)
 	mov	8*14(%rsi),%r12
 	bswap	%r12
 	mov	%r10,%r13
 	mov	%r10,%r14
 	mov	%r11,%r15
 
 	ror	$14,%r13
 	ror	$18,%r14
 	xor	%rax,%r15			# f^g
 
 	xor	%r14,%r13
 	ror	$23,%r14
 	and	%r10,%r15			# (f^g)&e
 	mov	%r12,112(%rsp)
 
 	xor	%r14,%r13			# Sigma1(e)
 	xor	%rax,%r15			# Ch(e,f,g)=((f^g)&e)^g
 	add	%rbx,%r12			# T1+=h
 
 	mov	%rcx,%rbx
 	add	%r13,%r12			# T1+=Sigma1(e)
 
 	add	%r15,%r12			# T1+=Ch(e,f,g)
 	mov	%rcx,%r13
 	mov	%rcx,%r14
 
 	ror	$28,%rbx
 	ror	$34,%r13
 	mov	%rcx,%r15
 	add	(%rbp,%rdi,8),%r12	# T1+=K[round]
 
 	xor	%r13,%rbx
 	ror	$5,%r13
 	or	%r8,%r14			# a|c
 
 	xor	%r13,%rbx			# h=Sigma0(a)
 	and	%r8,%r15			# a&c
 	add	%r12,%r9			# d+=T1
 
 	and	%rdx,%r14			# (a|c)&b
 	add	%r12,%rbx			# h+=T1
 
 	or	%r15,%r14			# Maj(a,b,c)=((a|c)&b)|(a&c)
 	lea	1(%rdi),%rdi	# round++
 
 	add	%r14,%rbx			# h+=Maj(a,b,c)
 	mov	8*15(%rsi),%r12
 	bswap	%r12
 	mov	%r9,%r13
 	mov	%r9,%r14
 	mov	%r10,%r15
 
 	ror	$14,%r13
 	ror	$18,%r14
 	xor	%r11,%r15			# f^g
 
 	xor	%r14,%r13
 	ror	$23,%r14
 	and	%r9,%r15			# (f^g)&e
 	mov	%r12,120(%rsp)
 
 	xor	%r14,%r13			# Sigma1(e)
 	xor	%r11,%r15			# Ch(e,f,g)=((f^g)&e)^g
 	add	%rax,%r12			# T1+=h
 
 	mov	%rbx,%rax
 	add	%r13,%r12			# T1+=Sigma1(e)
 
 	add	%r15,%r12			# T1+=Ch(e,f,g)
 	mov	%rbx,%r13
 	mov	%rbx,%r14
 
 	ror	$28,%rax
 	ror	$34,%r13
 	mov	%rbx,%r15
 	add	(%rbp,%rdi,8),%r12	# T1+=K[round]
 
 	xor	%r13,%rax
 	ror	$5,%r13
 	or	%rdx,%r14			# a|c
 
 	xor	%r13,%rax			# h=Sigma0(a)
 	and	%rdx,%r15			# a&c
 	add	%r12,%r8			# d+=T1
 
 	and	%rcx,%r14			# (a|c)&b
 	add	%r12,%rax			# h+=T1
 
 	or	%r15,%r14			# Maj(a,b,c)=((a|c)&b)|(a&c)
 	lea	1(%rdi),%rdi	# round++
 
 	add	%r14,%rax			# h+=Maj(a,b,c)
 	jmp	.Lrounds_16_xx
 .align	16
 .Lrounds_16_xx:
 	mov	8(%rsp),%r13
 	mov	112(%rsp),%r12
 
 	mov	%r13,%r15
 
 	shr	$7,%r13
 	ror	$1,%r15
 
 	xor	%r15,%r13
 	ror	$7,%r15
 
 	xor	%r15,%r13			# sigma0(X[(i+1)&0xf])
 	mov	%r12,%r14
 
 	shr	$6,%r12
 	ror	$19,%r14
 
 	xor	%r14,%r12
 	ror	$42,%r14
 
 	xor	%r14,%r12			# sigma1(X[(i+14)&0xf])
 
 	add	%r13,%r12
 
 	add	72(%rsp),%r12
 
 	add	0(%rsp),%r12
 	mov	%r8,%r13
 	mov	%r8,%r14
 	mov	%r9,%r15
 
 	ror	$14,%r13
 	ror	$18,%r14
 	xor	%r10,%r15			# f^g
 
 	xor	%r14,%r13
 	ror	$23,%r14
 	and	%r8,%r15			# (f^g)&e
 	mov	%r12,0(%rsp)
 
 	xor	%r14,%r13			# Sigma1(e)
 	xor	%r10,%r15			# Ch(e,f,g)=((f^g)&e)^g
 	add	%r11,%r12			# T1+=h
 
 	mov	%rax,%r11
 	add	%r13,%r12			# T1+=Sigma1(e)
 
 	add	%r15,%r12			# T1+=Ch(e,f,g)
 	mov	%rax,%r13
 	mov	%rax,%r14
 
 	ror	$28,%r11
 	ror	$34,%r13
 	mov	%rax,%r15
 	add	(%rbp,%rdi,8),%r12	# T1+=K[round]
 
 	xor	%r13,%r11
 	ror	$5,%r13
 	or	%rcx,%r14			# a|c
 
 	xor	%r13,%r11			# h=Sigma0(a)
 	and	%rcx,%r15			# a&c
 	add	%r12,%rdx			# d+=T1
 
 	and	%rbx,%r14			# (a|c)&b
 	add	%r12,%r11			# h+=T1
 
 	or	%r15,%r14			# Maj(a,b,c)=((a|c)&b)|(a&c)
 	lea	1(%rdi),%rdi	# round++
 
 	add	%r14,%r11			# h+=Maj(a,b,c)
 	mov	16(%rsp),%r13
 	mov	120(%rsp),%r12
 
 	mov	%r13,%r15
 
 	shr	$7,%r13
 	ror	$1,%r15
 
 	xor	%r15,%r13
 	ror	$7,%r15
 
 	xor	%r15,%r13			# sigma0(X[(i+1)&0xf])
 	mov	%r12,%r14
 
 	shr	$6,%r12
 	ror	$19,%r14
 
 	xor	%r14,%r12
 	ror	$42,%r14
 
 	xor	%r14,%r12			# sigma1(X[(i+14)&0xf])
 
 	add	%r13,%r12
 
 	add	80(%rsp),%r12
 
 	add	8(%rsp),%r12
 	mov	%rdx,%r13
 	mov	%rdx,%r14
 	mov	%r8,%r15
 
 	ror	$14,%r13
 	ror	$18,%r14
 	xor	%r9,%r15			# f^g
 
 	xor	%r14,%r13
 	ror	$23,%r14
 	and	%rdx,%r15			# (f^g)&e
 	mov	%r12,8(%rsp)
 
 	xor	%r14,%r13			# Sigma1(e)
 	xor	%r9,%r15			# Ch(e,f,g)=((f^g)&e)^g
 	add	%r10,%r12			# T1+=h
 
 	mov	%r11,%r10
 	add	%r13,%r12			# T1+=Sigma1(e)
 
 	add	%r15,%r12			# T1+=Ch(e,f,g)
 	mov	%r11,%r13
 	mov	%r11,%r14
 
 	ror	$28,%r10
 	ror	$34,%r13
 	mov	%r11,%r15
 	add	(%rbp,%rdi,8),%r12	# T1+=K[round]
 
 	xor	%r13,%r10
 	ror	$5,%r13
 	or	%rbx,%r14			# a|c
 
 	xor	%r13,%r10			# h=Sigma0(a)
 	and	%rbx,%r15			# a&c
 	add	%r12,%rcx			# d+=T1
 
 	and	%rax,%r14			# (a|c)&b
 	add	%r12,%r10			# h+=T1
 
 	or	%r15,%r14			# Maj(a,b,c)=((a|c)&b)|(a&c)
 	lea	1(%rdi),%rdi	# round++
 
 	add	%r14,%r10			# h+=Maj(a,b,c)
 	mov	24(%rsp),%r13
 	mov	0(%rsp),%r12
 
 	mov	%r13,%r15
 
 	shr	$7,%r13
 	ror	$1,%r15
 
 	xor	%r15,%r13
 	ror	$7,%r15
 
 	xor	%r15,%r13			# sigma0(X[(i+1)&0xf])
 	mov	%r12,%r14
 
 	shr	$6,%r12
 	ror	$19,%r14
 
 	xor	%r14,%r12
 	ror	$42,%r14
 
 	xor	%r14,%r12			# sigma1(X[(i+14)&0xf])
 
 	add	%r13,%r12
 
 	add	88(%rsp),%r12
 
 	add	16(%rsp),%r12
 	mov	%rcx,%r13
 	mov	%rcx,%r14
 	mov	%rdx,%r15
 
 	ror	$14,%r13
 	ror	$18,%r14
 	xor	%r8,%r15			# f^g
 
 	xor	%r14,%r13
 	ror	$23,%r14
 	and	%rcx,%r15			# (f^g)&e
 	mov	%r12,16(%rsp)
 
 	xor	%r14,%r13			# Sigma1(e)
 	xor	%r8,%r15			# Ch(e,f,g)=((f^g)&e)^g
 	add	%r9,%r12			# T1+=h
 
 	mov	%r10,%r9
 	add	%r13,%r12			# T1+=Sigma1(e)
 
 	add	%r15,%r12			# T1+=Ch(e,f,g)
 	mov	%r10,%r13
 	mov	%r10,%r14
 
 	ror	$28,%r9
 	ror	$34,%r13
 	mov	%r10,%r15
 	add	(%rbp,%rdi,8),%r12	# T1+=K[round]
 
 	xor	%r13,%r9
 	ror	$5,%r13
 	or	%rax,%r14			# a|c
 
 	xor	%r13,%r9			# h=Sigma0(a)
 	and	%rax,%r15			# a&c
 	add	%r12,%rbx			# d+=T1
 
 	and	%r11,%r14			# (a|c)&b
 	add	%r12,%r9			# h+=T1
 
 	or	%r15,%r14			# Maj(a,b,c)=((a|c)&b)|(a&c)
 	lea	1(%rdi),%rdi	# round++
 
 	add	%r14,%r9			# h+=Maj(a,b,c)
 	mov	32(%rsp),%r13
 	mov	8(%rsp),%r12
 
 	mov	%r13,%r15
 
 	shr	$7,%r13
 	ror	$1,%r15
 
 	xor	%r15,%r13
 	ror	$7,%r15
 
 	xor	%r15,%r13			# sigma0(X[(i+1)&0xf])
 	mov	%r12,%r14
 
 	shr	$6,%r12
 	ror	$19,%r14
 
 	xor	%r14,%r12
 	ror	$42,%r14
 
 	xor	%r14,%r12			# sigma1(X[(i+14)&0xf])
 
 	add	%r13,%r12
 
 	add	96(%rsp),%r12
 
 	add	24(%rsp),%r12
 	mov	%rbx,%r13
 	mov	%rbx,%r14
 	mov	%rcx,%r15
 
 	ror	$14,%r13
 	ror	$18,%r14
 	xor	%rdx,%r15			# f^g
 
 	xor	%r14,%r13
 	ror	$23,%r14
 	and	%rbx,%r15			# (f^g)&e
 	mov	%r12,24(%rsp)
 
 	xor	%r14,%r13			# Sigma1(e)
 	xor	%rdx,%r15			# Ch(e,f,g)=((f^g)&e)^g
 	add	%r8,%r12			# T1+=h
 
 	mov	%r9,%r8
 	add	%r13,%r12			# T1+=Sigma1(e)
 
 	add	%r15,%r12			# T1+=Ch(e,f,g)
 	mov	%r9,%r13
 	mov	%r9,%r14
 
 	ror	$28,%r8
 	ror	$34,%r13
 	mov	%r9,%r15
 	add	(%rbp,%rdi,8),%r12	# T1+=K[round]
 
 	xor	%r13,%r8
 	ror	$5,%r13
 	or	%r11,%r14			# a|c
 
 	xor	%r13,%r8			# h=Sigma0(a)
 	and	%r11,%r15			# a&c
 	add	%r12,%rax			# d+=T1
 
 	and	%r10,%r14			# (a|c)&b
 	add	%r12,%r8			# h+=T1
 
 	or	%r15,%r14			# Maj(a,b,c)=((a|c)&b)|(a&c)
 	lea	1(%rdi),%rdi	# round++
 
 	add	%r14,%r8			# h+=Maj(a,b,c)
 	mov	40(%rsp),%r13
 	mov	16(%rsp),%r12
 
 	mov	%r13,%r15
 
 	shr	$7,%r13
 	ror	$1,%r15
 
 	xor	%r15,%r13
 	ror	$7,%r15
 
 	xor	%r15,%r13			# sigma0(X[(i+1)&0xf])
 	mov	%r12,%r14
 
 	shr	$6,%r12
 	ror	$19,%r14
 
 	xor	%r14,%r12
 	ror	$42,%r14
 
 	xor	%r14,%r12			# sigma1(X[(i+14)&0xf])
 
 	add	%r13,%r12
 
 	add	104(%rsp),%r12
 
 	add	32(%rsp),%r12
 	mov	%rax,%r13
 	mov	%rax,%r14
 	mov	%rbx,%r15
 
 	ror	$14,%r13
 	ror	$18,%r14
 	xor	%rcx,%r15			# f^g
 
 	xor	%r14,%r13
 	ror	$23,%r14
 	and	%rax,%r15			# (f^g)&e
 	mov	%r12,32(%rsp)
 
 	xor	%r14,%r13			# Sigma1(e)
 	xor	%rcx,%r15			# Ch(e,f,g)=((f^g)&e)^g
 	add	%rdx,%r12			# T1+=h
 
 	mov	%r8,%rdx
 	add	%r13,%r12			# T1+=Sigma1(e)
 
 	add	%r15,%r12			# T1+=Ch(e,f,g)
 	mov	%r8,%r13
 	mov	%r8,%r14
 
 	ror	$28,%rdx
 	ror	$34,%r13
 	mov	%r8,%r15
 	add	(%rbp,%rdi,8),%r12	# T1+=K[round]
 
 	xor	%r13,%rdx
 	ror	$5,%r13
 	or	%r10,%r14			# a|c
 
 	xor	%r13,%rdx			# h=Sigma0(a)
 	and	%r10,%r15			# a&c
 	add	%r12,%r11			# d+=T1
 
 	and	%r9,%r14			# (a|c)&b
 	add	%r12,%rdx			# h+=T1
 
 	or	%r15,%r14			# Maj(a,b,c)=((a|c)&b)|(a&c)
 	lea	1(%rdi),%rdi	# round++
 
 	add	%r14,%rdx			# h+=Maj(a,b,c)
 	mov	48(%rsp),%r13
 	mov	24(%rsp),%r12
 
 	mov	%r13,%r15
 
 	shr	$7,%r13
 	ror	$1,%r15
 
 	xor	%r15,%r13
 	ror	$7,%r15
 
 	xor	%r15,%r13			# sigma0(X[(i+1)&0xf])
 	mov	%r12,%r14
 
 	shr	$6,%r12
 	ror	$19,%r14
 
 	xor	%r14,%r12
 	ror	$42,%r14
 
 	xor	%r14,%r12			# sigma1(X[(i+14)&0xf])
 
 	add	%r13,%r12
 
 	add	112(%rsp),%r12
 
 	add	40(%rsp),%r12
 	mov	%r11,%r13
 	mov	%r11,%r14
 	mov	%rax,%r15
 
 	ror	$14,%r13
 	ror	$18,%r14
 	xor	%rbx,%r15			# f^g
 
 	xor	%r14,%r13
 	ror	$23,%r14
 	and	%r11,%r15			# (f^g)&e
 	mov	%r12,40(%rsp)
 
 	xor	%r14,%r13			# Sigma1(e)
 	xor	%rbx,%r15			# Ch(e,f,g)=((f^g)&e)^g
 	add	%rcx,%r12			# T1+=h
 
 	mov	%rdx,%rcx
 	add	%r13,%r12			# T1+=Sigma1(e)
 
 	add	%r15,%r12			# T1+=Ch(e,f,g)
 	mov	%rdx,%r13
 	mov	%rdx,%r14
 
 	ror	$28,%rcx
 	ror	$34,%r13
 	mov	%rdx,%r15
 	add	(%rbp,%rdi,8),%r12	# T1+=K[round]
 
 	xor	%r13,%rcx
 	ror	$5,%r13
 	or	%r9,%r14			# a|c
 
 	xor	%r13,%rcx			# h=Sigma0(a)
 	and	%r9,%r15			# a&c
 	add	%r12,%r10			# d+=T1
 
 	and	%r8,%r14			# (a|c)&b
 	add	%r12,%rcx			# h+=T1
 
 	or	%r15,%r14			# Maj(a,b,c)=((a|c)&b)|(a&c)
 	lea	1(%rdi),%rdi	# round++
 
 	add	%r14,%rcx			# h+=Maj(a,b,c)
 	mov	56(%rsp),%r13
 	mov	32(%rsp),%r12
 
 	mov	%r13,%r15
 
 	shr	$7,%r13
 	ror	$1,%r15
 
 	xor	%r15,%r13
 	ror	$7,%r15
 
 	xor	%r15,%r13			# sigma0(X[(i+1)&0xf])
 	mov	%r12,%r14
 
 	shr	$6,%r12
 	ror	$19,%r14
 
 	xor	%r14,%r12
 	ror	$42,%r14
 
 	xor	%r14,%r12			# sigma1(X[(i+14)&0xf])
 
 	add	%r13,%r12
 
 	add	120(%rsp),%r12
 
 	add	48(%rsp),%r12
 	mov	%r10,%r13
 	mov	%r10,%r14
 	mov	%r11,%r15
 
 	ror	$14,%r13
 	ror	$18,%r14
 	xor	%rax,%r15			# f^g
 
 	xor	%r14,%r13
 	ror	$23,%r14
 	and	%r10,%r15			# (f^g)&e
 	mov	%r12,48(%rsp)
 
 	xor	%r14,%r13			# Sigma1(e)
 	xor	%rax,%r15			# Ch(e,f,g)=((f^g)&e)^g
 	add	%rbx,%r12			# T1+=h
 
 	mov	%rcx,%rbx
 	add	%r13,%r12			# T1+=Sigma1(e)
 
 	add	%r15,%r12			# T1+=Ch(e,f,g)
 	mov	%rcx,%r13
 	mov	%rcx,%r14
 
 	ror	$28,%rbx
 	ror	$34,%r13
 	mov	%rcx,%r15
 	add	(%rbp,%rdi,8),%r12	# T1+=K[round]
 
 	xor	%r13,%rbx
 	ror	$5,%r13
 	or	%r8,%r14			# a|c
 
 	xor	%r13,%rbx			# h=Sigma0(a)
 	and	%r8,%r15			# a&c
 	add	%r12,%r9			# d+=T1
 
 	and	%rdx,%r14			# (a|c)&b
 	add	%r12,%rbx			# h+=T1
 
 	or	%r15,%r14			# Maj(a,b,c)=((a|c)&b)|(a&c)
 	lea	1(%rdi),%rdi	# round++
 
 	add	%r14,%rbx			# h+=Maj(a,b,c)
 	mov	64(%rsp),%r13
 	mov	40(%rsp),%r12
 
 	mov	%r13,%r15
 
 	shr	$7,%r13
 	ror	$1,%r15
 
 	xor	%r15,%r13
 	ror	$7,%r15
 
 	xor	%r15,%r13			# sigma0(X[(i+1)&0xf])
 	mov	%r12,%r14
 
 	shr	$6,%r12
 	ror	$19,%r14
 
 	xor	%r14,%r12
 	ror	$42,%r14
 
 	xor	%r14,%r12			# sigma1(X[(i+14)&0xf])
 
 	add	%r13,%r12
 
 	add	0(%rsp),%r12
 
 	add	56(%rsp),%r12
 	mov	%r9,%r13
 	mov	%r9,%r14
 	mov	%r10,%r15
 
 	ror	$14,%r13
 	ror	$18,%r14
 	xor	%r11,%r15			# f^g
 
 	xor	%r14,%r13
 	ror	$23,%r14
 	and	%r9,%r15			# (f^g)&e
 	mov	%r12,56(%rsp)
 
 	xor	%r14,%r13			# Sigma1(e)
 	xor	%r11,%r15			# Ch(e,f,g)=((f^g)&e)^g
 	add	%rax,%r12			# T1+=h
 
 	mov	%rbx,%rax
 	add	%r13,%r12			# T1+=Sigma1(e)
 
 	add	%r15,%r12			# T1+=Ch(e,f,g)
 	mov	%rbx,%r13
 	mov	%rbx,%r14
 
 	ror	$28,%rax
 	ror	$34,%r13
 	mov	%rbx,%r15
 	add	(%rbp,%rdi,8),%r12	# T1+=K[round]
 
 	xor	%r13,%rax
 	ror	$5,%r13
 	or	%rdx,%r14			# a|c
 
 	xor	%r13,%rax			# h=Sigma0(a)
 	and	%rdx,%r15			# a&c
 	add	%r12,%r8			# d+=T1
 
 	and	%rcx,%r14			# (a|c)&b
 	add	%r12,%rax			# h+=T1
 
 	or	%r15,%r14			# Maj(a,b,c)=((a|c)&b)|(a&c)
 	lea	1(%rdi),%rdi	# round++
 
 	add	%r14,%rax			# h+=Maj(a,b,c)
 	mov	72(%rsp),%r13
 	mov	48(%rsp),%r12
 
 	mov	%r13,%r15
 
 	shr	$7,%r13
 	ror	$1,%r15
 
 	xor	%r15,%r13
 	ror	$7,%r15
 
 	xor	%r15,%r13			# sigma0(X[(i+1)&0xf])
 	mov	%r12,%r14
 
 	shr	$6,%r12
 	ror	$19,%r14
 
 	xor	%r14,%r12
 	ror	$42,%r14
 
 	xor	%r14,%r12			# sigma1(X[(i+14)&0xf])
 
 	add	%r13,%r12
 
 	add	8(%rsp),%r12
 
 	add	64(%rsp),%r12
 	mov	%r8,%r13
 	mov	%r8,%r14
 	mov	%r9,%r15
 
 	ror	$14,%r13
 	ror	$18,%r14
 	xor	%r10,%r15			# f^g
 
 	xor	%r14,%r13
 	ror	$23,%r14
 	and	%r8,%r15			# (f^g)&e
 	mov	%r12,64(%rsp)
 
 	xor	%r14,%r13			# Sigma1(e)
 	xor	%r10,%r15			# Ch(e,f,g)=((f^g)&e)^g
 	add	%r11,%r12			# T1+=h
 
 	mov	%rax,%r11
 	add	%r13,%r12			# T1+=Sigma1(e)
 
 	add	%r15,%r12			# T1+=Ch(e,f,g)
 	mov	%rax,%r13
 	mov	%rax,%r14
 
 	ror	$28,%r11
 	ror	$34,%r13
 	mov	%rax,%r15
 	add	(%rbp,%rdi,8),%r12	# T1+=K[round]
 
 	xor	%r13,%r11
 	ror	$5,%r13
 	or	%rcx,%r14			# a|c
 
 	xor	%r13,%r11			# h=Sigma0(a)
 	and	%rcx,%r15			# a&c
 	add	%r12,%rdx			# d+=T1
 
 	and	%rbx,%r14			# (a|c)&b
 	add	%r12,%r11			# h+=T1
 
 	or	%r15,%r14			# Maj(a,b,c)=((a|c)&b)|(a&c)
 	lea	1(%rdi),%rdi	# round++
 
 	add	%r14,%r11			# h+=Maj(a,b,c)
 	mov	80(%rsp),%r13
 	mov	56(%rsp),%r12
 
 	mov	%r13,%r15
 
 	shr	$7,%r13
 	ror	$1,%r15
 
 	xor	%r15,%r13
 	ror	$7,%r15
 
 	xor	%r15,%r13			# sigma0(X[(i+1)&0xf])
 	mov	%r12,%r14
 
 	shr	$6,%r12
 	ror	$19,%r14
 
 	xor	%r14,%r12
 	ror	$42,%r14
 
 	xor	%r14,%r12			# sigma1(X[(i+14)&0xf])
 
 	add	%r13,%r12
 
 	add	16(%rsp),%r12
 
 	add	72(%rsp),%r12
 	mov	%rdx,%r13
 	mov	%rdx,%r14
 	mov	%r8,%r15
 
 	ror	$14,%r13
 	ror	$18,%r14
 	xor	%r9,%r15			# f^g
 
 	xor	%r14,%r13
 	ror	$23,%r14
 	and	%rdx,%r15			# (f^g)&e
 	mov	%r12,72(%rsp)
 
 	xor	%r14,%r13			# Sigma1(e)
 	xor	%r9,%r15			# Ch(e,f,g)=((f^g)&e)^g
 	add	%r10,%r12			# T1+=h
 
 	mov	%r11,%r10
 	add	%r13,%r12			# T1+=Sigma1(e)
 
 	add	%r15,%r12			# T1+=Ch(e,f,g)
 	mov	%r11,%r13
 	mov	%r11,%r14
 
 	ror	$28,%r10
 	ror	$34,%r13
 	mov	%r11,%r15
 	add	(%rbp,%rdi,8),%r12	# T1+=K[round]
 
 	xor	%r13,%r10
 	ror	$5,%r13
 	or	%rbx,%r14			# a|c
 
 	xor	%r13,%r10			# h=Sigma0(a)
 	and	%rbx,%r15			# a&c
 	add	%r12,%rcx			# d+=T1
 
 	and	%rax,%r14			# (a|c)&b
 	add	%r12,%r10			# h+=T1
 
 	or	%r15,%r14			# Maj(a,b,c)=((a|c)&b)|(a&c)
 	lea	1(%rdi),%rdi	# round++
 
 	add	%r14,%r10			# h+=Maj(a,b,c)
 	mov	88(%rsp),%r13
 	mov	64(%rsp),%r12
 
 	mov	%r13,%r15
 
 	shr	$7,%r13
 	ror	$1,%r15
 
 	xor	%r15,%r13
 	ror	$7,%r15
 
 	xor	%r15,%r13			# sigma0(X[(i+1)&0xf])
 	mov	%r12,%r14
 
 	shr	$6,%r12
 	ror	$19,%r14
 
 	xor	%r14,%r12
 	ror	$42,%r14
 
 	xor	%r14,%r12			# sigma1(X[(i+14)&0xf])
 
 	add	%r13,%r12
 
 	add	24(%rsp),%r12
 
 	add	80(%rsp),%r12
 	mov	%rcx,%r13
 	mov	%rcx,%r14
 	mov	%rdx,%r15
 
 	ror	$14,%r13
 	ror	$18,%r14
 	xor	%r8,%r15			# f^g
 
 	xor	%r14,%r13
 	ror	$23,%r14
 	and	%rcx,%r15			# (f^g)&e
 	mov	%r12,80(%rsp)
 
 	xor	%r14,%r13			# Sigma1(e)
 	xor	%r8,%r15			# Ch(e,f,g)=((f^g)&e)^g
 	add	%r9,%r12			# T1+=h
 
 	mov	%r10,%r9
 	add	%r13,%r12			# T1+=Sigma1(e)
 
 	add	%r15,%r12			# T1+=Ch(e,f,g)
 	mov	%r10,%r13
 	mov	%r10,%r14
 
 	ror	$28,%r9
 	ror	$34,%r13
 	mov	%r10,%r15
 	add	(%rbp,%rdi,8),%r12	# T1+=K[round]
 
 	xor	%r13,%r9
 	ror	$5,%r13
 	or	%rax,%r14			# a|c
 
 	xor	%r13,%r9			# h=Sigma0(a)
 	and	%rax,%r15			# a&c
 	add	%r12,%rbx			# d+=T1
 
 	and	%r11,%r14			# (a|c)&b
 	add	%r12,%r9			# h+=T1
 
 	or	%r15,%r14			# Maj(a,b,c)=((a|c)&b)|(a&c)
 	lea	1(%rdi),%rdi	# round++
 
 	add	%r14,%r9			# h+=Maj(a,b,c)
 	mov	96(%rsp),%r13
 	mov	72(%rsp),%r12
 
 	mov	%r13,%r15
 
 	shr	$7,%r13
 	ror	$1,%r15
 
 	xor	%r15,%r13
 	ror	$7,%r15
 
 	xor	%r15,%r13			# sigma0(X[(i+1)&0xf])
 	mov	%r12,%r14
 
 	shr	$6,%r12
 	ror	$19,%r14
 
 	xor	%r14,%r12
 	ror	$42,%r14
 
 	xor	%r14,%r12			# sigma1(X[(i+14)&0xf])
 
 	add	%r13,%r12
 
 	add	32(%rsp),%r12
 
 	add	88(%rsp),%r12
 	mov	%rbx,%r13
 	mov	%rbx,%r14
 	mov	%rcx,%r15
 
 	ror	$14,%r13
 	ror	$18,%r14
 	xor	%rdx,%r15			# f^g
 
 	xor	%r14,%r13
 	ror	$23,%r14
 	and	%rbx,%r15			# (f^g)&e
 	mov	%r12,88(%rsp)
 
 	xor	%r14,%r13			# Sigma1(e)
 	xor	%rdx,%r15			# Ch(e,f,g)=((f^g)&e)^g
 	add	%r8,%r12			# T1+=h
 
 	mov	%r9,%r8
 	add	%r13,%r12			# T1+=Sigma1(e)
 
 	add	%r15,%r12			# T1+=Ch(e,f,g)
 	mov	%r9,%r13
 	mov	%r9,%r14
 
 	ror	$28,%r8
 	ror	$34,%r13
 	mov	%r9,%r15
 	add	(%rbp,%rdi,8),%r12	# T1+=K[round]
 
 	xor	%r13,%r8
 	ror	$5,%r13
 	or	%r11,%r14			# a|c
 
 	xor	%r13,%r8			# h=Sigma0(a)
 	and	%r11,%r15			# a&c
 	add	%r12,%rax			# d+=T1
 
 	and	%r10,%r14			# (a|c)&b
 	add	%r12,%r8			# h+=T1
 
 	or	%r15,%r14			# Maj(a,b,c)=((a|c)&b)|(a&c)
 	lea	1(%rdi),%rdi	# round++
 
 	add	%r14,%r8			# h+=Maj(a,b,c)
 	mov	104(%rsp),%r13
 	mov	80(%rsp),%r12
 
 	mov	%r13,%r15
 
 	shr	$7,%r13
 	ror	$1,%r15
 
 	xor	%r15,%r13
 	ror	$7,%r15
 
 	xor	%r15,%r13			# sigma0(X[(i+1)&0xf])
 	mov	%r12,%r14
 
 	shr	$6,%r12
 	ror	$19,%r14
 
 	xor	%r14,%r12
 	ror	$42,%r14
 
 	xor	%r14,%r12			# sigma1(X[(i+14)&0xf])
 
 	add	%r13,%r12
 
 	add	40(%rsp),%r12
 
 	add	96(%rsp),%r12
 	mov	%rax,%r13
 	mov	%rax,%r14
 	mov	%rbx,%r15
 
 	ror	$14,%r13
 	ror	$18,%r14
 	xor	%rcx,%r15			# f^g
 
 	xor	%r14,%r13
 	ror	$23,%r14
 	and	%rax,%r15			# (f^g)&e
 	mov	%r12,96(%rsp)
 
 	xor	%r14,%r13			# Sigma1(e)
 	xor	%rcx,%r15			# Ch(e,f,g)=((f^g)&e)^g
 	add	%rdx,%r12			# T1+=h
 
 	mov	%r8,%rdx
 	add	%r13,%r12			# T1+=Sigma1(e)
 
 	add	%r15,%r12			# T1+=Ch(e,f,g)
 	mov	%r8,%r13
 	mov	%r8,%r14
 
 	ror	$28,%rdx
 	ror	$34,%r13
 	mov	%r8,%r15
 	add	(%rbp,%rdi,8),%r12	# T1+=K[round]
 
 	xor	%r13,%rdx
 	ror	$5,%r13
 	or	%r10,%r14			# a|c
 
 	xor	%r13,%rdx			# h=Sigma0(a)
 	and	%r10,%r15			# a&c
 	add	%r12,%r11			# d+=T1
 
 	and	%r9,%r14			# (a|c)&b
 	add	%r12,%rdx			# h+=T1
 
 	or	%r15,%r14			# Maj(a,b,c)=((a|c)&b)|(a&c)
 	lea	1(%rdi),%rdi	# round++
 
 	add	%r14,%rdx			# h+=Maj(a,b,c)
 	mov	112(%rsp),%r13
 	mov	88(%rsp),%r12
 
 	mov	%r13,%r15
 
 	shr	$7,%r13
 	ror	$1,%r15
 
 	xor	%r15,%r13
 	ror	$7,%r15
 
 	xor	%r15,%r13			# sigma0(X[(i+1)&0xf])
 	mov	%r12,%r14
 
 	shr	$6,%r12
 	ror	$19,%r14
 
 	xor	%r14,%r12
 	ror	$42,%r14
 
 	xor	%r14,%r12			# sigma1(X[(i+14)&0xf])
 
 	add	%r13,%r12
 
 	add	48(%rsp),%r12
 
 	add	104(%rsp),%r12
 	mov	%r11,%r13
 	mov	%r11,%r14
 	mov	%rax,%r15
 
 	ror	$14,%r13
 	ror	$18,%r14
 	xor	%rbx,%r15			# f^g
 
 	xor	%r14,%r13
 	ror	$23,%r14
 	and	%r11,%r15			# (f^g)&e
 	mov	%r12,104(%rsp)
 
 	xor	%r14,%r13			# Sigma1(e)
 	xor	%rbx,%r15			# Ch(e,f,g)=((f^g)&e)^g
 	add	%rcx,%r12			# T1+=h
 
 	mov	%rdx,%rcx
 	add	%r13,%r12			# T1+=Sigma1(e)
 
 	add	%r15,%r12			# T1+=Ch(e,f,g)
 	mov	%rdx,%r13
 	mov	%rdx,%r14
 
 	ror	$28,%rcx
 	ror	$34,%r13
 	mov	%rdx,%r15
 	add	(%rbp,%rdi,8),%r12	# T1+=K[round]
 
 	xor	%r13,%rcx
 	ror	$5,%r13
 	or	%r9,%r14			# a|c
 
 	xor	%r13,%rcx			# h=Sigma0(a)
 	and	%r9,%r15			# a&c
 	add	%r12,%r10			# d+=T1
 
 	and	%r8,%r14			# (a|c)&b
 	add	%r12,%rcx			# h+=T1
 
 	or	%r15,%r14			# Maj(a,b,c)=((a|c)&b)|(a&c)
 	lea	1(%rdi),%rdi	# round++
 
 	add	%r14,%rcx			# h+=Maj(a,b,c)
 	mov	120(%rsp),%r13
 	mov	96(%rsp),%r12
 
 	mov	%r13,%r15
 
 	shr	$7,%r13
 	ror	$1,%r15
 
 	xor	%r15,%r13
 	ror	$7,%r15
 
 	xor	%r15,%r13			# sigma0(X[(i+1)&0xf])
 	mov	%r12,%r14
 
 	shr	$6,%r12
 	ror	$19,%r14
 
 	xor	%r14,%r12
 	ror	$42,%r14
 
 	xor	%r14,%r12			# sigma1(X[(i+14)&0xf])
 
 	add	%r13,%r12
 
 	add	56(%rsp),%r12
 
 	add	112(%rsp),%r12
 	mov	%r10,%r13
 	mov	%r10,%r14
 	mov	%r11,%r15
 
 	ror	$14,%r13
 	ror	$18,%r14
 	xor	%rax,%r15			# f^g
 
 	xor	%r14,%r13
 	ror	$23,%r14
 	and	%r10,%r15			# (f^g)&e
 	mov	%r12,112(%rsp)
 
 	xor	%r14,%r13			# Sigma1(e)
 	xor	%rax,%r15			# Ch(e,f,g)=((f^g)&e)^g
 	add	%rbx,%r12			# T1+=h
 
 	mov	%rcx,%rbx
 	add	%r13,%r12			# T1+=Sigma1(e)
 
 	add	%r15,%r12			# T1+=Ch(e,f,g)
 	mov	%rcx,%r13
 	mov	%rcx,%r14
 
 	ror	$28,%rbx
 	ror	$34,%r13
 	mov	%rcx,%r15
 	add	(%rbp,%rdi,8),%r12	# T1+=K[round]
 
 	xor	%r13,%rbx
 	ror	$5,%r13
 	or	%r8,%r14			# a|c
 
 	xor	%r13,%rbx			# h=Sigma0(a)
 	and	%r8,%r15			# a&c
 	add	%r12,%r9			# d+=T1
 
 	and	%rdx,%r14			# (a|c)&b
 	add	%r12,%rbx			# h+=T1
 
 	or	%r15,%r14			# Maj(a,b,c)=((a|c)&b)|(a&c)
 	lea	1(%rdi),%rdi	# round++
 
 	add	%r14,%rbx			# h+=Maj(a,b,c)
 	mov	0(%rsp),%r13
 	mov	104(%rsp),%r12
 
 	mov	%r13,%r15
 
 	shr	$7,%r13
 	ror	$1,%r15
 
 	xor	%r15,%r13
 	ror	$7,%r15
 
 	xor	%r15,%r13			# sigma0(X[(i+1)&0xf])
 	mov	%r12,%r14
 
 	shr	$6,%r12
 	ror	$19,%r14
 
 	xor	%r14,%r12
 	ror	$42,%r14
 
 	xor	%r14,%r12			# sigma1(X[(i+14)&0xf])
 
 	add	%r13,%r12
 
 	add	64(%rsp),%r12
 
 	add	120(%rsp),%r12
 	mov	%r9,%r13
 	mov	%r9,%r14
 	mov	%r10,%r15
 
 	ror	$14,%r13
 	ror	$18,%r14
 	xor	%r11,%r15			# f^g
 
 	xor	%r14,%r13
 	ror	$23,%r14
 	and	%r9,%r15			# (f^g)&e
 	mov	%r12,120(%rsp)
 
 	xor	%r14,%r13			# Sigma1(e)
 	xor	%r11,%r15			# Ch(e,f,g)=((f^g)&e)^g
 	add	%rax,%r12			# T1+=h
 
 	mov	%rbx,%rax
 	add	%r13,%r12			# T1+=Sigma1(e)
 
 	add	%r15,%r12			# T1+=Ch(e,f,g)
 	mov	%rbx,%r13
 	mov	%rbx,%r14
 
 	ror	$28,%rax
 	ror	$34,%r13
 	mov	%rbx,%r15
 	add	(%rbp,%rdi,8),%r12	# T1+=K[round]
 
 	xor	%r13,%rax
 	ror	$5,%r13
 	or	%rdx,%r14			# a|c
 
 	xor	%r13,%rax			# h=Sigma0(a)
 	and	%rdx,%r15			# a&c
 	add	%r12,%r8			# d+=T1
 
 	and	%rcx,%r14			# (a|c)&b
 	add	%r12,%rax			# h+=T1
 
 	or	%r15,%r14			# Maj(a,b,c)=((a|c)&b)|(a&c)
 	lea	1(%rdi),%rdi	# round++
 
 	add	%r14,%rax			# h+=Maj(a,b,c)
 	cmp	$80,%rdi
 	jb	.Lrounds_16_xx
 
 	mov	16*8+0*8(%rsp),%rdi
 	lea	16*8(%rsi),%rsi
 
 	add	8*0(%rdi),%rax
 	add	8*1(%rdi),%rbx
 	add	8*2(%rdi),%rcx
 	add	8*3(%rdi),%rdx
 	add	8*4(%rdi),%r8
 	add	8*5(%rdi),%r9
 	add	8*6(%rdi),%r10
 	add	8*7(%rdi),%r11
 
 	cmp	16*8+2*8(%rsp),%rsi
 
 	mov	%rax,8*0(%rdi)
 	mov	%rbx,8*1(%rdi)
 	mov	%rcx,8*2(%rdi)
 	mov	%rdx,8*3(%rdi)
 	mov	%r8,8*4(%rdi)
 	mov	%r9,8*5(%rdi)
 	mov	%r10,8*6(%rdi)
 	mov	%r11,8*7(%rdi)
 	jb	.Lloop
 
 	mov	16*8+3*8(%rsp),%rsp
 .cfi_def_cfa	%rsp,56
 	pop	%r15
 .cfi_adjust_cfa_offset -8
 .cfi_restore	%r15
 	pop	%r14
 .cfi_adjust_cfa_offset -8
 .cfi_restore	%r14
 	pop	%r13
 .cfi_adjust_cfa_offset -8
 .cfi_restore	%r13
 	pop	%r12
 .cfi_adjust_cfa_offset -8
 .cfi_restore	%r12
 	pop	%rbp
 .cfi_adjust_cfa_offset -8
 .cfi_restore	%rbp
 	pop	%rbx
 .cfi_adjust_cfa_offset -8
 .cfi_restore	%rbx
 
 	ret
 .cfi_endproc
 SET_SIZE(SHA512TransformBlocks)
 
 .data
 .align	64
 .type	K512,@object
 K512:
 	.quad	0x428a2f98d728ae22,0x7137449123ef65cd
 	.quad	0xb5c0fbcfec4d3b2f,0xe9b5dba58189dbbc
 	.quad	0x3956c25bf348b538,0x59f111f1b605d019
 	.quad	0x923f82a4af194f9b,0xab1c5ed5da6d8118
 	.quad	0xd807aa98a3030242,0x12835b0145706fbe
 	.quad	0x243185be4ee4b28c,0x550c7dc3d5ffb4e2
 	.quad	0x72be5d74f27b896f,0x80deb1fe3b1696b1
 	.quad	0x9bdc06a725c71235,0xc19bf174cf692694
 	.quad	0xe49b69c19ef14ad2,0xefbe4786384f25e3
 	.quad	0x0fc19dc68b8cd5b5,0x240ca1cc77ac9c65
 	.quad	0x2de92c6f592b0275,0x4a7484aa6ea6e483
 	.quad	0x5cb0a9dcbd41fbd4,0x76f988da831153b5
 	.quad	0x983e5152ee66dfab,0xa831c66d2db43210
 	.quad	0xb00327c898fb213f,0xbf597fc7beef0ee4
 	.quad	0xc6e00bf33da88fc2,0xd5a79147930aa725
 	.quad	0x06ca6351e003826f,0x142929670a0e6e70
 	.quad	0x27b70a8546d22ffc,0x2e1b21385c26c926
 	.quad	0x4d2c6dfc5ac42aed,0x53380d139d95b3df
 	.quad	0x650a73548baf63de,0x766a0abb3c77b2a8
 	.quad	0x81c2c92e47edaee6,0x92722c851482353b
 	.quad	0xa2bfe8a14cf10364,0xa81a664bbc423001
 	.quad	0xc24b8b70d0f89791,0xc76c51a30654be30
 	.quad	0xd192e819d6ef5218,0xd69906245565a910
 	.quad	0xf40e35855771202a,0x106aa07032bbd1b8
 	.quad	0x19a4c116b8d2d0c8,0x1e376c085141ab53
 	.quad	0x2748774cdf8eeb99,0x34b0bcb5e19b48a8
 	.quad	0x391c0cb3c5c95a63,0x4ed8aa4ae3418acb
 	.quad	0x5b9cca4f7763e373,0x682e6ff3d6b2b8a3
 	.quad	0x748f82ee5defb2fc,0x78a5636f43172f60
 	.quad	0x84c87814a1f0ab72,0x8cc702081a6439ec
 	.quad	0x90befffa23631e28,0xa4506cebde82bde9
 	.quad	0xbef9a3f7b2c67915,0xc67178f2e372532b
 	.quad	0xca273eceea26619c,0xd186b8c721c0c207
 	.quad	0xeada7dd6cde0eb1e,0xf57d4f7fee6ed178
 	.quad	0x06f067aa72176fba,0x0a637dc5a2c898a6
 	.quad	0x113f9804bef90dae,0x1b710b35131c471b
 	.quad	0x28db77f523047d84,0x32caab7b40c72493
 	.quad	0x3c9ebe0a15c9bebc,0x431d67c49c100d4c
 	.quad	0x4cc5d4becb3e42b6,0x597f299cfc657e2a
 	.quad	0x5fcb6fab3ad6faec,0x6c44198c4a475817
 #endif /* !lint && !__lint */
 
 #ifdef __ELF__
 .section .note.GNU-stack,"",%progbits
 #endif
diff --git a/module/icp/core/kcf_mech_tabs.c b/module/icp/core/kcf_mech_tabs.c
index 2642b317d698..e1ac7ffd5471 100644
--- a/module/icp/core/kcf_mech_tabs.c
+++ b/module/icp/core/kcf_mech_tabs.c
@@ -1,791 +1,791 @@
 /*
  * CDDL HEADER START
  *
  * The contents of this file are subject to the terms of the
  * Common Development and Distribution License (the "License").
  * You may not use this file except in compliance with the License.
  *
  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
  * or http://www.opensolaris.org/os/licensing.
  * See the License for the specific language governing permissions
  * and limitations under the License.
  *
  * When distributing Covered Code, include this CDDL HEADER in each
  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  * If applicable, add the following below this CDDL HEADER, with the
  * fields enclosed by brackets "[]" replaced with your own identifying
  * information: Portions Copyright [yyyy] [name of copyright owner]
  *
  * CDDL HEADER END
  */
 /*
  * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
  * Use is subject to license terms.
  */
 
 #include <sys/zfs_context.h>
 #include <sys/crypto/common.h>
 #include <sys/crypto/api.h>
 #include <sys/crypto/impl.h>
 #include <sys/modhash.h>
 
 /* Cryptographic mechanisms tables and their access functions */
 
 /*
  * Internal numbers assigned to mechanisms are coded as follows:
  *
  * +----------------+----------------+
  * | mech. class    | mech. index    |
  * <--- 32-bits --->+<--- 32-bits --->
  *
  * the mech_class identifies the table the mechanism belongs to.
  * mech_index  is the index for that mechanism in the table.
  * A mechanism belongs to exactly 1 table.
  * The tables are:
  * . digest_mechs_tab[] for the msg digest mechs.
  * . cipher_mechs_tab[] for encrypt/decrypt and wrap/unwrap mechs.
  * . mac_mechs_tab[] for MAC mechs.
  * . sign_mechs_tab[] for sign & verify mechs.
  * . keyops_mechs_tab[] for key/key pair generation, and key derivation.
  * . misc_mechs_tab[] for mechs that don't belong to any of the above.
  *
  * There are no holes in the tables.
  */
 
 /*
  * Locking conventions:
  * --------------------
  * A global mutex, kcf_mech_tabs_lock, serializes writes to the
  * mechanism table via kcf_create_mech_entry().
  *
  * A mutex is associated with every entry of the tables.
  * The mutex is acquired whenever the entry is accessed for
  * 1) retrieving the mech_id (comparing the mech name)
  * 2) finding a provider for an xxx_init() or atomic operation.
  * 3) altering the mechs entry to add or remove a provider.
  *
  * In 2), after a provider is chosen, its prov_desc is held and the
  * entry's mutex must be dropped. The provider's working function (SPI) is
  * called outside the mech_entry's mutex.
  *
  * The number of providers for a particular mechanism is not expected to be
  * long enough to justify the cost of using rwlocks, so the per-mechanism
  * entry mutex won't be very *hot*.
  *
  * When both kcf_mech_tabs_lock and a mech_entry mutex need to be held,
  * kcf_mech_tabs_lock must always be acquired first.
  *
  */
 
 		/* Mechanisms tables */
 
 
 /* RFE 4687834 Will deal with the extensibility of these tables later */
 
 kcf_mech_entry_t kcf_digest_mechs_tab[KCF_MAXDIGEST];
 kcf_mech_entry_t kcf_cipher_mechs_tab[KCF_MAXCIPHER];
 kcf_mech_entry_t kcf_mac_mechs_tab[KCF_MAXMAC];
 kcf_mech_entry_t kcf_sign_mechs_tab[KCF_MAXSIGN];
 kcf_mech_entry_t kcf_keyops_mechs_tab[KCF_MAXKEYOPS];
 kcf_mech_entry_t kcf_misc_mechs_tab[KCF_MAXMISC];
 
 kcf_mech_entry_tab_t kcf_mech_tabs_tab[KCF_LAST_OPSCLASS + 1] = {
 	{0, NULL},				/* No class zero */
 	{KCF_MAXDIGEST, kcf_digest_mechs_tab},
 	{KCF_MAXCIPHER, kcf_cipher_mechs_tab},
 	{KCF_MAXMAC, kcf_mac_mechs_tab},
 	{KCF_MAXSIGN, kcf_sign_mechs_tab},
 	{KCF_MAXKEYOPS, kcf_keyops_mechs_tab},
 	{KCF_MAXMISC, kcf_misc_mechs_tab}
 };
 
 /*
  * Per-algorithm internal thresholds for the minimum input size of before
  * offloading to hardware provider.
  * Dispatching a crypto operation  to a hardware provider entails paying the
  * cost of an additional context switch.  Measurements with Sun Accelerator 4000
  * shows that 512-byte jobs or smaller are better handled in software.
  * There is room for refinement here.
  *
  */
 int kcf_md5_threshold = 512;
 int kcf_sha1_threshold = 512;
 int kcf_des_threshold = 512;
 int kcf_des3_threshold = 512;
 int kcf_aes_threshold = 512;
 int kcf_bf_threshold = 512;
 int kcf_rc4_threshold = 512;
 
 kmutex_t kcf_mech_tabs_lock;
 static uint32_t kcf_gen_swprov = 0;
 
 int kcf_mech_hash_size = 256;
 mod_hash_t *kcf_mech_hash;	/* mech name to id hash */
 
 static crypto_mech_type_t
 kcf_mech_hash_find(char *mechname)
 {
 	mod_hash_val_t hv;
 	crypto_mech_type_t mt;
 
 	mt = CRYPTO_MECH_INVALID;
 	if (mod_hash_find(kcf_mech_hash, (mod_hash_key_t)mechname, &hv) == 0) {
 		mt = *(crypto_mech_type_t *)hv;
 		ASSERT(mt != CRYPTO_MECH_INVALID);
 	}
 
 	return (mt);
 }
 
 void
 kcf_destroy_mech_tabs(void)
 {
 	int i, max;
 	kcf_ops_class_t class;
 	kcf_mech_entry_t *me_tab;
 
 	if (kcf_mech_hash)
 		mod_hash_destroy_hash(kcf_mech_hash);
 
 	mutex_destroy(&kcf_mech_tabs_lock);
 
 	for (class = KCF_FIRST_OPSCLASS; class <= KCF_LAST_OPSCLASS; class++) {
 		max = kcf_mech_tabs_tab[class].met_size;
 		me_tab = kcf_mech_tabs_tab[class].met_tab;
 		for (i = 0; i < max; i++)
 			mutex_destroy(&(me_tab[i].me_mutex));
 	}
 }
 
 /*
  * kcf_init_mech_tabs()
  *
  * Called by the misc/kcf's _init() routine to initialize the tables
  * of mech_entry's.
  */
 void
 kcf_init_mech_tabs(void)
 {
 	int i, max;
 	kcf_ops_class_t class;
 	kcf_mech_entry_t *me_tab;
 
 	/* Initializes the mutex locks. */
 
 	mutex_init(&kcf_mech_tabs_lock, NULL, MUTEX_DEFAULT, NULL);
 
 	/* Then the pre-defined mechanism entries */
 
 	/* Two digests */
 	(void) strncpy(kcf_digest_mechs_tab[0].me_name, SUN_CKM_MD5,
 	    CRYPTO_MAX_MECH_NAME);
 	kcf_digest_mechs_tab[0].me_threshold = kcf_md5_threshold;
 
 	(void) strncpy(kcf_digest_mechs_tab[1].me_name, SUN_CKM_SHA1,
 	    CRYPTO_MAX_MECH_NAME);
 	kcf_digest_mechs_tab[1].me_threshold = kcf_sha1_threshold;
 
 	/* The symmetric ciphers in various modes */
 	(void) strncpy(kcf_cipher_mechs_tab[0].me_name, SUN_CKM_DES_CBC,
 	    CRYPTO_MAX_MECH_NAME);
 	kcf_cipher_mechs_tab[0].me_threshold = kcf_des_threshold;
 
 	(void) strncpy(kcf_cipher_mechs_tab[1].me_name, SUN_CKM_DES3_CBC,
 	    CRYPTO_MAX_MECH_NAME);
 	kcf_cipher_mechs_tab[1].me_threshold = kcf_des3_threshold;
 
 	(void) strncpy(kcf_cipher_mechs_tab[2].me_name, SUN_CKM_DES_ECB,
 	    CRYPTO_MAX_MECH_NAME);
 	kcf_cipher_mechs_tab[2].me_threshold = kcf_des_threshold;
 
 	(void) strncpy(kcf_cipher_mechs_tab[3].me_name, SUN_CKM_DES3_ECB,
 	    CRYPTO_MAX_MECH_NAME);
 	kcf_cipher_mechs_tab[3].me_threshold = kcf_des3_threshold;
 
 	(void) strncpy(kcf_cipher_mechs_tab[4].me_name, SUN_CKM_BLOWFISH_CBC,
 	    CRYPTO_MAX_MECH_NAME);
 	kcf_cipher_mechs_tab[4].me_threshold = kcf_bf_threshold;
 
 	(void) strncpy(kcf_cipher_mechs_tab[5].me_name, SUN_CKM_BLOWFISH_ECB,
 	    CRYPTO_MAX_MECH_NAME);
 	kcf_cipher_mechs_tab[5].me_threshold = kcf_bf_threshold;
 
 	(void) strncpy(kcf_cipher_mechs_tab[6].me_name, SUN_CKM_AES_CBC,
 	    CRYPTO_MAX_MECH_NAME);
 	kcf_cipher_mechs_tab[6].me_threshold = kcf_aes_threshold;
 
 	(void) strncpy(kcf_cipher_mechs_tab[7].me_name, SUN_CKM_AES_ECB,
 	    CRYPTO_MAX_MECH_NAME);
 	kcf_cipher_mechs_tab[7].me_threshold = kcf_aes_threshold;
 
 	(void) strncpy(kcf_cipher_mechs_tab[8].me_name, SUN_CKM_RC4,
 	    CRYPTO_MAX_MECH_NAME);
 	kcf_cipher_mechs_tab[8].me_threshold = kcf_rc4_threshold;
 
 
 	/* 4 HMACs */
 	(void) strncpy(kcf_mac_mechs_tab[0].me_name, SUN_CKM_MD5_HMAC,
 	    CRYPTO_MAX_MECH_NAME);
 	kcf_mac_mechs_tab[0].me_threshold = kcf_md5_threshold;
 
 	(void) strncpy(kcf_mac_mechs_tab[1].me_name, SUN_CKM_MD5_HMAC_GENERAL,
 	    CRYPTO_MAX_MECH_NAME);
 	kcf_mac_mechs_tab[1].me_threshold = kcf_md5_threshold;
 
 	(void) strncpy(kcf_mac_mechs_tab[2].me_name, SUN_CKM_SHA1_HMAC,
 	    CRYPTO_MAX_MECH_NAME);
 	kcf_mac_mechs_tab[2].me_threshold = kcf_sha1_threshold;
 
 	(void) strncpy(kcf_mac_mechs_tab[3].me_name, SUN_CKM_SHA1_HMAC_GENERAL,
 	    CRYPTO_MAX_MECH_NAME);
 	kcf_mac_mechs_tab[3].me_threshold = kcf_sha1_threshold;
 
 
 	/* 1 random number generation pseudo mechanism */
 	(void) strncpy(kcf_misc_mechs_tab[0].me_name, SUN_RANDOM,
 	    CRYPTO_MAX_MECH_NAME);
 
 	kcf_mech_hash = mod_hash_create_strhash_nodtr("kcf mech2id hash",
 	    kcf_mech_hash_size, mod_hash_null_valdtor);
 
 	for (class = KCF_FIRST_OPSCLASS; class <= KCF_LAST_OPSCLASS; class++) {
 		max = kcf_mech_tabs_tab[class].met_size;
 		me_tab = kcf_mech_tabs_tab[class].met_tab;
 		for (i = 0; i < max; i++) {
 			mutex_init(&(me_tab[i].me_mutex), NULL,
 			    MUTEX_DEFAULT, NULL);
 			if (me_tab[i].me_name[0] != 0) {
 				me_tab[i].me_mechid = KCF_MECHID(class, i);
 				(void) mod_hash_insert(kcf_mech_hash,
 				    (mod_hash_key_t)me_tab[i].me_name,
 				    (mod_hash_val_t)&(me_tab[i].me_mechid));
 			}
 		}
 	}
 }
 
 /*
  * kcf_create_mech_entry()
  *
  * Arguments:
  *	. The class of mechanism.
  *	. the name of the new mechanism.
  *
  * Description:
  *	Creates a new mech_entry for a mechanism not yet known to the
  *	framework.
  *	This routine is called by kcf_add_mech_provider, which is
  *	in turn invoked for each mechanism supported by a provider.
  *	The'class' argument depends on the crypto_func_group_t bitmask
  *	in the registering provider's mech_info struct for this mechanism.
  *	When there is ambiguity in the mapping between the crypto_func_group_t
  *	and a class (dual ops, ...) the KCF_MISC_CLASS should be used.
  *
  * Context:
  *	User context only.
  *
  * Returns:
  *	KCF_INVALID_MECH_CLASS or KCF_INVALID_MECH_NAME if the class or
  *	the mechname is bogus.
  *	KCF_MECH_TAB_FULL when there is no room left in the mech. tabs.
  *	KCF_SUCCESS otherwise.
  */
 static int
 kcf_create_mech_entry(kcf_ops_class_t class, char *mechname)
 {
 	crypto_mech_type_t mt;
 	kcf_mech_entry_t *me_tab;
 	int i = 0, size;
 
 	if ((class < KCF_FIRST_OPSCLASS) || (class > KCF_LAST_OPSCLASS))
 		return (KCF_INVALID_MECH_CLASS);
 
 	if ((mechname == NULL) || (mechname[0] == 0))
 		return (KCF_INVALID_MECH_NAME);
 	/*
 	 * First check if the mechanism is already in one of the tables.
 	 * The mech_entry could be in another class.
 	 */
 	mutex_enter(&kcf_mech_tabs_lock);
 	mt = kcf_mech_hash_find(mechname);
 	if (mt != CRYPTO_MECH_INVALID) {
 		/* Nothing to do, regardless the suggested class. */
 		mutex_exit(&kcf_mech_tabs_lock);
 		return (KCF_SUCCESS);
 	}
 	/* Now take the next unused mech entry in the class's tab */
 	me_tab = kcf_mech_tabs_tab[class].met_tab;
 	size = kcf_mech_tabs_tab[class].met_size;
 
 	while (i < size) {
 		mutex_enter(&(me_tab[i].me_mutex));
 		if (me_tab[i].me_name[0] == 0) {
 			/* Found an empty spot */
 			(void) strlcpy(me_tab[i].me_name, mechname,
 			    CRYPTO_MAX_MECH_NAME);
 			me_tab[i].me_name[CRYPTO_MAX_MECH_NAME-1] = '\0';
 			me_tab[i].me_mechid = KCF_MECHID(class, i);
 			/*
 			 * No a-priori information about the new mechanism, so
 			 * the threshold is set to zero.
 			 */
 			me_tab[i].me_threshold = 0;
 
 			mutex_exit(&(me_tab[i].me_mutex));
 			/* Add the new mechanism to the hash table */
 			(void) mod_hash_insert(kcf_mech_hash,
 			    (mod_hash_key_t)me_tab[i].me_name,
 			    (mod_hash_val_t)&(me_tab[i].me_mechid));
 			break;
 		}
 		mutex_exit(&(me_tab[i].me_mutex));
 		i++;
 	}
 
 	mutex_exit(&kcf_mech_tabs_lock);
 
 	if (i == size) {
 		return (KCF_MECH_TAB_FULL);
 	}
 
 	return (KCF_SUCCESS);
 }
 
 /*
  * kcf_add_mech_provider()
  *
  * Arguments:
  *	. An index in to  the provider mechanism array
  *      . A pointer to the provider descriptor
  *	. A storage for the kcf_prov_mech_desc_t the entry was added at.
  *
  * Description:
  *      Adds  a new provider of a mechanism to the mechanism's mech_entry
  *	chain.
  *
  * Context:
  *      User context only.
  *
  * Returns
  *      KCF_SUCCESS on success
  *      KCF_MECH_TAB_FULL otherwise.
  */
 int
 kcf_add_mech_provider(short mech_indx,
     kcf_provider_desc_t *prov_desc, kcf_prov_mech_desc_t **pmdpp)
 {
 	int error;
 	kcf_mech_entry_t *mech_entry = NULL;
 	crypto_mech_info_t *mech_info;
 	crypto_mech_type_t kcf_mech_type, mt;
 	kcf_prov_mech_desc_t *prov_mech, *prov_mech2;
 	crypto_func_group_t simple_fg_mask, dual_fg_mask;
 	crypto_mech_info_t *dmi;
 	crypto_mech_info_list_t *mil, *mil2;
 	kcf_mech_entry_t *me;
 	int i;
 
 	ASSERT(prov_desc->pd_prov_type != CRYPTO_LOGICAL_PROVIDER);
 
 	mech_info = &prov_desc->pd_mechanisms[mech_indx];
 
 	/*
 	 * A mechanism belongs to exactly one mechanism table.
 	 * Find the class corresponding to the function group flag of
 	 * the mechanism.
 	 */
 	kcf_mech_type = kcf_mech_hash_find(mech_info->cm_mech_name);
 	if (kcf_mech_type == CRYPTO_MECH_INVALID) {
 		crypto_func_group_t fg = mech_info->cm_func_group_mask;
 		kcf_ops_class_t class;
 
 		if (fg & CRYPTO_FG_DIGEST || fg & CRYPTO_FG_DIGEST_ATOMIC)
 			class = KCF_DIGEST_CLASS;
 		else if (fg & CRYPTO_FG_ENCRYPT || fg & CRYPTO_FG_DECRYPT ||
 		    fg & CRYPTO_FG_ENCRYPT_ATOMIC ||
 		    fg & CRYPTO_FG_DECRYPT_ATOMIC)
 			class = KCF_CIPHER_CLASS;
 		else if (fg & CRYPTO_FG_MAC || fg & CRYPTO_FG_MAC_ATOMIC)
 			class = KCF_MAC_CLASS;
 		else if (fg & CRYPTO_FG_SIGN || fg & CRYPTO_FG_VERIFY ||
 		    fg & CRYPTO_FG_SIGN_ATOMIC ||
 		    fg & CRYPTO_FG_VERIFY_ATOMIC ||
 		    fg & CRYPTO_FG_SIGN_RECOVER ||
 		    fg & CRYPTO_FG_VERIFY_RECOVER)
 			class = KCF_SIGN_CLASS;
 		else if (fg & CRYPTO_FG_GENERATE ||
 		    fg & CRYPTO_FG_GENERATE_KEY_PAIR ||
 		    fg & CRYPTO_FG_WRAP || fg & CRYPTO_FG_UNWRAP ||
 		    fg & CRYPTO_FG_DERIVE)
 			class = KCF_KEYOPS_CLASS;
 		else
 			class = KCF_MISC_CLASS;
 
 		/*
 		 * Attempt to create a new mech_entry for the specified
 		 * mechanism. kcf_create_mech_entry() can handle the case
 		 * where such an entry already exists.
 		 */
 		if ((error = kcf_create_mech_entry(class,
 		    mech_info->cm_mech_name)) != KCF_SUCCESS) {
 			return (error);
 		}
 		/* get the KCF mech type that was assigned to the mechanism */
 		kcf_mech_type = kcf_mech_hash_find(mech_info->cm_mech_name);
 		ASSERT(kcf_mech_type != CRYPTO_MECH_INVALID);
 	}
 
 	error = kcf_get_mech_entry(kcf_mech_type, &mech_entry);
 	ASSERT(error == KCF_SUCCESS);
 
 	/* allocate and initialize new kcf_prov_mech_desc */
 	prov_mech = kmem_zalloc(sizeof (kcf_prov_mech_desc_t), KM_SLEEP);
 	bcopy(mech_info, &prov_mech->pm_mech_info, sizeof (crypto_mech_info_t));
 	prov_mech->pm_prov_desc = prov_desc;
 	prov_desc->pd_mech_indx[KCF_MECH2CLASS(kcf_mech_type)]
 	    [KCF_MECH2INDEX(kcf_mech_type)] = mech_indx;
 
 	KCF_PROV_REFHOLD(prov_desc);
 	KCF_PROV_IREFHOLD(prov_desc);
 
 	dual_fg_mask = mech_info->cm_func_group_mask & CRYPTO_FG_DUAL_MASK;
 
 	if (dual_fg_mask == ((crypto_func_group_t)0))
 		goto add_entry;
 
 	simple_fg_mask = (mech_info->cm_func_group_mask &
 	    CRYPTO_FG_SIMPLEOP_MASK) | CRYPTO_FG_RANDOM;
 
 	for (i = 0; i < prov_desc->pd_mech_list_count; i++) {
 		dmi = &prov_desc->pd_mechanisms[i];
 
 		/* skip self */
 		if (dmi->cm_mech_number == mech_info->cm_mech_number)
 			continue;
 
 		/* skip if not a dual operation mechanism */
 		if (!(dmi->cm_func_group_mask & dual_fg_mask) ||
 		    (dmi->cm_func_group_mask & simple_fg_mask))
 			continue;
 
 		mt = kcf_mech_hash_find(dmi->cm_mech_name);
 		if (mt == CRYPTO_MECH_INVALID)
 			continue;
 
 		if (kcf_get_mech_entry(mt, &me) != KCF_SUCCESS)
 			continue;
 
 		mil = kmem_zalloc(sizeof (*mil), KM_SLEEP);
 		mil2 = kmem_zalloc(sizeof (*mil2), KM_SLEEP);
 
 		/*
 		 * Ignore hard-coded entries in the mech table
 		 * if the provider hasn't registered.
 		 */
 		mutex_enter(&me->me_mutex);
 		if (me->me_hw_prov_chain == NULL && me->me_sw_prov == NULL) {
 			mutex_exit(&me->me_mutex);
 			kmem_free(mil, sizeof (*mil));
 			kmem_free(mil2, sizeof (*mil2));
 			continue;
 		}
 
 		/*
 		 * Add other dual mechanisms that have registered
 		 * with the framework to this mechanism's
 		 * cross-reference list.
 		 */
 		mil->ml_mech_info = *dmi; /* struct assignment */
 		mil->ml_kcf_mechid = mt;
 
 		/* add to head of list */
 		mil->ml_next = prov_mech->pm_mi_list;
 		prov_mech->pm_mi_list = mil;
 
 		if (prov_desc->pd_prov_type == CRYPTO_HW_PROVIDER)
 			prov_mech2 = me->me_hw_prov_chain;
 		else
 			prov_mech2 = me->me_sw_prov;
 
 		if (prov_mech2 == NULL) {
 			kmem_free(mil2, sizeof (*mil2));
 			mutex_exit(&me->me_mutex);
 			continue;
 		}
 
 		/*
 		 * Update all other cross-reference lists by
 		 * adding this new mechanism.
 		 */
 		while (prov_mech2 != NULL) {
 			if (prov_mech2->pm_prov_desc == prov_desc) {
 				/* struct assignment */
 				mil2->ml_mech_info = *mech_info;
 				mil2->ml_kcf_mechid = kcf_mech_type;
 
 				/* add to head of list */
 				mil2->ml_next = prov_mech2->pm_mi_list;
 				prov_mech2->pm_mi_list = mil2;
 				break;
 			}
 			prov_mech2 = prov_mech2->pm_next;
 		}
 		if (prov_mech2 == NULL)
 			kmem_free(mil2, sizeof (*mil2));
 
 		mutex_exit(&me->me_mutex);
 	}
 
 add_entry:
 	/*
 	 * Add new kcf_prov_mech_desc at the front of HW providers
 	 * chain.
 	 */
 	switch (prov_desc->pd_prov_type) {
 
 	case CRYPTO_HW_PROVIDER:
 		mutex_enter(&mech_entry->me_mutex);
 		prov_mech->pm_me = mech_entry;
 		prov_mech->pm_next = mech_entry->me_hw_prov_chain;
 		mech_entry->me_hw_prov_chain = prov_mech;
 		mech_entry->me_num_hwprov++;
 		mutex_exit(&mech_entry->me_mutex);
 		break;
 
 	case CRYPTO_SW_PROVIDER:
 		mutex_enter(&mech_entry->me_mutex);
 		if (mech_entry->me_sw_prov != NULL) {
 			/*
 			 * There is already a SW provider for this mechanism.
 			 * Since we allow only one SW provider per mechanism,
 			 * report this condition.
 			 */
 			cmn_err(CE_WARN, "The cryptographic software provider "
 			    "\"%s\" will not be used for %s. The provider "
 			    "\"%s\" will be used for this mechanism "
 			    "instead.", prov_desc->pd_description,
 			    mech_info->cm_mech_name,
 			    mech_entry->me_sw_prov->pm_prov_desc->
 			    pd_description);
 			KCF_PROV_REFRELE(prov_desc);
 			kmem_free(prov_mech, sizeof (kcf_prov_mech_desc_t));
 			prov_mech = NULL;
 		} else {
 			/*
 			 * Set the provider as the software provider for
 			 * this mechanism.
 			 */
 			mech_entry->me_sw_prov = prov_mech;
 
 			/* We'll wrap around after 4 billion registrations! */
 			mech_entry->me_gen_swprov = kcf_gen_swprov++;
 		}
 		mutex_exit(&mech_entry->me_mutex);
 		break;
 	default:
 		break;
 	}
 
 	*pmdpp = prov_mech;
 
 	return (KCF_SUCCESS);
 }
 
 /*
  * kcf_remove_mech_provider()
  *
  * Arguments:
  *      . mech_name: the name of the mechanism.
  *      . prov_desc: The provider descriptor
  *
  * Description:
  *      Removes a provider from chain of provider descriptors.
  *	The provider is made unavailable to kernel consumers for the specified
  *	mechanism.
  *
  * Context:
  *      User context only.
  */
 void
 kcf_remove_mech_provider(char *mech_name, kcf_provider_desc_t *prov_desc)
 {
 	crypto_mech_type_t mech_type;
 	kcf_prov_mech_desc_t *prov_mech = NULL, *prov_chain;
 	kcf_prov_mech_desc_t **prev_entry_next;
 	kcf_mech_entry_t *mech_entry;
 	crypto_mech_info_list_t *mil, *mil2, *next, **prev_next;
 
 	ASSERT(prov_desc->pd_prov_type != CRYPTO_LOGICAL_PROVIDER);
 
 	/* get the KCF mech type that was assigned to the mechanism */
 	if ((mech_type = kcf_mech_hash_find(mech_name)) ==
 	    CRYPTO_MECH_INVALID) {
 		/*
 		 * Provider was not allowed for this mech due to policy or
 		 * configuration.
 		 */
 		return;
 	}
 
 	/* get a ptr to the mech_entry that was created */
 	if (kcf_get_mech_entry(mech_type, &mech_entry) != KCF_SUCCESS) {
 		/*
 		 * Provider was not allowed for this mech due to policy or
 		 * configuration.
 		 */
 		return;
 	}
 
 	mutex_enter(&mech_entry->me_mutex);
 
 	switch (prov_desc->pd_prov_type) {
 
 	case CRYPTO_HW_PROVIDER:
 		/* find the provider in the mech_entry chain */
 		prev_entry_next = &mech_entry->me_hw_prov_chain;
 		prov_mech = mech_entry->me_hw_prov_chain;
 		while (prov_mech != NULL &&
 		    prov_mech->pm_prov_desc != prov_desc) {
 			prev_entry_next = &prov_mech->pm_next;
 			prov_mech = prov_mech->pm_next;
 		}
 
 		if (prov_mech == NULL) {
 			/* entry not found, simply return */
 			mutex_exit(&mech_entry->me_mutex);
 			return;
 		}
 
 		/* remove provider entry from mech_entry chain */
 		*prev_entry_next = prov_mech->pm_next;
 		ASSERT(mech_entry->me_num_hwprov > 0);
 		mech_entry->me_num_hwprov--;
 		break;
 
 	case CRYPTO_SW_PROVIDER:
 		if (mech_entry->me_sw_prov == NULL ||
 		    mech_entry->me_sw_prov->pm_prov_desc != prov_desc) {
 			/* not the software provider for this mechanism */
 			mutex_exit(&mech_entry->me_mutex);
 			return;
 		}
 		prov_mech = mech_entry->me_sw_prov;
 		mech_entry->me_sw_prov = NULL;
 		break;
 	default:
 		/* unexpected crypto_provider_type_t */
 		mutex_exit(&mech_entry->me_mutex);
 		return;
 	}
 
 	mutex_exit(&mech_entry->me_mutex);
 
 	/* Free the dual ops cross-reference lists  */
 	mil = prov_mech->pm_mi_list;
 	while (mil != NULL) {
 		next = mil->ml_next;
 		if (kcf_get_mech_entry(mil->ml_kcf_mechid,
 		    &mech_entry) != KCF_SUCCESS) {
 			mil = next;
 			continue;
 		}
 
 		mutex_enter(&mech_entry->me_mutex);
 		if (prov_desc->pd_prov_type == CRYPTO_HW_PROVIDER)
 			prov_chain = mech_entry->me_hw_prov_chain;
 		else
 			prov_chain = mech_entry->me_sw_prov;
 
 		while (prov_chain != NULL) {
 			if (prov_chain->pm_prov_desc == prov_desc) {
 				prev_next = &prov_chain->pm_mi_list;
 				mil2 = prov_chain->pm_mi_list;
 				while (mil2 != NULL &&
 				    mil2->ml_kcf_mechid != mech_type) {
 					prev_next = &mil2->ml_next;
 					mil2 = mil2->ml_next;
 				}
 				if (mil2 != NULL) {
 					*prev_next = mil2->ml_next;
 					kmem_free(mil2, sizeof (*mil2));
 				}
 				break;
 			}
 			prov_chain = prov_chain->pm_next;
 		}
 
 		mutex_exit(&mech_entry->me_mutex);
 		kmem_free(mil, sizeof (crypto_mech_info_list_t));
 		mil = next;
 	}
 
 	/* free entry  */
 	KCF_PROV_REFRELE(prov_mech->pm_prov_desc);
 	KCF_PROV_IREFRELE(prov_mech->pm_prov_desc);
 	kmem_free(prov_mech, sizeof (kcf_prov_mech_desc_t));
 }
 
 /*
  * kcf_get_mech_entry()
  *
  * Arguments:
  *      . The framework mechanism type
  *      . Storage for the mechanism entry
  *
  * Description:
  *      Retrieves the mechanism entry for the mech.
  *
  * Context:
  *      User and interrupt contexts.
  *
  * Returns:
  *      KCF_MECHANISM_XXX appropriate error code.
  *      KCF_SUCCESS otherwise.
  */
 int
 kcf_get_mech_entry(crypto_mech_type_t mech_type, kcf_mech_entry_t **mep)
 {
 	kcf_ops_class_t		class;
 	int			index;
 	kcf_mech_entry_tab_t	*me_tab;
 
 	ASSERT(mep != NULL);
 
 	class = KCF_MECH2CLASS(mech_type);
 
 	if ((class < KCF_FIRST_OPSCLASS) || (class > KCF_LAST_OPSCLASS)) {
 		/* the caller won't need to know it's an invalid class */
 		return (KCF_INVALID_MECH_NUMBER);
 	}
 
 	me_tab = &kcf_mech_tabs_tab[class];
 	index = KCF_MECH2INDEX(mech_type);
 
 	if ((index < 0) || (index >= me_tab->met_size)) {
 		return (KCF_INVALID_MECH_NUMBER);
 	}
 
 	*mep = &((me_tab->met_tab)[index]);
 
 	return (KCF_SUCCESS);
 }
 
 /* CURRENTLY UNSUPPORTED: attempting to load the module if it isn't found */
 /*
  * Lookup the hash table for an entry that matches the mechname.
  * If there are no hardware or software providers for the mechanism,
  * but there is an unloaded software provider, this routine will attempt
  * to load it.
  *
  * If the MOD_NOAUTOUNLOAD flag is not set, a software provider is
  * in constant danger of being unloaded.  For consumers that call
  * crypto_mech2id() only once, the provider will not be reloaded
  * if it becomes unloaded.  If a provider gets loaded elsewhere
  * without the MOD_NOAUTOUNLOAD flag being set, we set it now.
  */
 crypto_mech_type_t
 crypto_mech2id_common(char *mechname, boolean_t load_module)
 {
-	crypto_mech_type_t mt = kcf_mech_hash_find(mechname);
-	return (mt);
+	(void) load_module;
+	return (kcf_mech_hash_find(mechname));
 }
diff --git a/module/icp/core/kcf_prov_lib.c b/module/icp/core/kcf_prov_lib.c
index 1b115d976232..6e8853c56dc6 100644
--- a/module/icp/core/kcf_prov_lib.c
+++ b/module/icp/core/kcf_prov_lib.c
@@ -1,227 +1,228 @@
 /*
  * CDDL HEADER START
  *
  * The contents of this file are subject to the terms of the
  * Common Development and Distribution License (the "License").
  * You may not use this file except in compliance with the License.
  *
  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
  * or http://www.opensolaris.org/os/licensing.
  * See the License for the specific language governing permissions
  * and limitations under the License.
  *
  * When distributing Covered Code, include this CDDL HEADER in each
  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  * If applicable, add the following below this CDDL HEADER, with the
  * fields enclosed by brackets "[]" replaced with your own identifying
  * information: Portions Copyright [yyyy] [name of copyright owner]
  *
  * CDDL HEADER END
  */
 /*
  * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
  * Use is subject to license terms.
  */
 
 #include <sys/zfs_context.h>
 #include <modes/modes.h>
 #include <sys/crypto/common.h>
 #include <sys/crypto/impl.h>
 
 /*
  * Utility routine to copy a buffer to a crypto_data structure.
  */
 
 /*
  * Utility routine to apply the command, 'cmd', to the
  * data in the uio structure.
  */
 int
 crypto_uio_data(crypto_data_t *data, uchar_t *buf, int len, cmd_type_t cmd,
     void *digest_ctx, void (*update)(void))
 {
+	(void) digest_ctx, (void) update;
 	zfs_uio_t *uiop = data->cd_uio;
 	off_t offset = data->cd_offset;
 	size_t length = len;
 	uint_t vec_idx;
 	size_t cur_len;
 	uchar_t *datap;
 
 	ASSERT(data->cd_format == CRYPTO_DATA_UIO);
 	if (zfs_uio_segflg(uiop) != UIO_SYSSPACE) {
 		return (CRYPTO_ARGUMENTS_BAD);
 	}
 
 	/*
 	 * Jump to the first iovec containing data to be
 	 * processed.
 	 */
 	offset = zfs_uio_index_at_offset(uiop, offset, &vec_idx);
 
 	if (vec_idx == zfs_uio_iovcnt(uiop) && length > 0) {
 		/*
 		 * The caller specified an offset that is larger than
 		 * the total size of the buffers it provided.
 		 */
 		return (CRYPTO_DATA_LEN_RANGE);
 	}
 
 	while (vec_idx < zfs_uio_iovcnt(uiop) && length > 0) {
 		cur_len = MIN(zfs_uio_iovlen(uiop, vec_idx) -
 		    offset, length);
 
 		datap = (uchar_t *)(zfs_uio_iovbase(uiop, vec_idx) + offset);
 		switch (cmd) {
 		case COPY_FROM_DATA:
 			bcopy(datap, buf, cur_len);
 			buf += cur_len;
 			break;
 		case COPY_TO_DATA:
 			bcopy(buf, datap, cur_len);
 			buf += cur_len;
 			break;
 		case COMPARE_TO_DATA:
 			if (bcmp(datap, buf, cur_len))
 				return (CRYPTO_SIGNATURE_INVALID);
 			buf += cur_len;
 			break;
 		case MD5_DIGEST_DATA:
 		case SHA1_DIGEST_DATA:
 		case SHA2_DIGEST_DATA:
 		case GHASH_DATA:
 			return (CRYPTO_ARGUMENTS_BAD);
 		}
 
 		length -= cur_len;
 		vec_idx++;
 		offset = 0;
 	}
 
 	if (vec_idx == zfs_uio_iovcnt(uiop) && length > 0) {
 		/*
 		 * The end of the specified iovec's was reached but
 		 * the length requested could not be processed.
 		 */
 		switch (cmd) {
 		case COPY_TO_DATA:
 			data->cd_length = len;
 			return (CRYPTO_BUFFER_TOO_SMALL);
 		default:
 			return (CRYPTO_DATA_LEN_RANGE);
 		}
 	}
 
 	return (CRYPTO_SUCCESS);
 }
 
 int
 crypto_put_output_data(uchar_t *buf, crypto_data_t *output, int len)
 {
 	switch (output->cd_format) {
 	case CRYPTO_DATA_RAW:
 		if (output->cd_raw.iov_len < len) {
 			output->cd_length = len;
 			return (CRYPTO_BUFFER_TOO_SMALL);
 		}
 		bcopy(buf, (uchar_t *)(output->cd_raw.iov_base +
 		    output->cd_offset), len);
 		break;
 
 	case CRYPTO_DATA_UIO:
 		return (crypto_uio_data(output, buf, len,
 		    COPY_TO_DATA, NULL, NULL));
 	default:
 		return (CRYPTO_ARGUMENTS_BAD);
 	}
 
 	return (CRYPTO_SUCCESS);
 }
 
 int
 crypto_update_iov(void *ctx, crypto_data_t *input, crypto_data_t *output,
     int (*cipher)(void *, caddr_t, size_t, crypto_data_t *),
     void (*copy_block)(uint8_t *, uint64_t *))
 {
 	common_ctx_t *common_ctx = ctx;
 	int rv;
 
 	ASSERT(input != output);
 	if (input->cd_miscdata != NULL) {
 		copy_block((uint8_t *)input->cd_miscdata,
 		    &common_ctx->cc_iv[0]);
 	}
 
 	if (input->cd_raw.iov_len < input->cd_length)
 		return (CRYPTO_ARGUMENTS_BAD);
 
 	rv = (cipher)(ctx, input->cd_raw.iov_base + input->cd_offset,
 	    input->cd_length, output);
 
 	return (rv);
 }
 
 int
 crypto_update_uio(void *ctx, crypto_data_t *input, crypto_data_t *output,
     int (*cipher)(void *, caddr_t, size_t, crypto_data_t *),
     void (*copy_block)(uint8_t *, uint64_t *))
 {
 	common_ctx_t *common_ctx = ctx;
 	zfs_uio_t *uiop = input->cd_uio;
 	off_t offset = input->cd_offset;
 	size_t length = input->cd_length;
 	uint_t vec_idx;
 	size_t cur_len;
 
 	ASSERT(input != output);
 	if (input->cd_miscdata != NULL) {
 		copy_block((uint8_t *)input->cd_miscdata,
 		    &common_ctx->cc_iv[0]);
 	}
 
 	if (zfs_uio_segflg(input->cd_uio) != UIO_SYSSPACE) {
 		return (CRYPTO_ARGUMENTS_BAD);
 	}
 
 	/*
 	 * Jump to the first iovec containing data to be
 	 * processed.
 	 */
 	offset = zfs_uio_index_at_offset(uiop, offset, &vec_idx);
 	if (vec_idx == zfs_uio_iovcnt(uiop) && length > 0) {
 		/*
 		 * The caller specified an offset that is larger than the
 		 * total size of the buffers it provided.
 		 */
 		return (CRYPTO_DATA_LEN_RANGE);
 	}
 
 	/*
 	 * Now process the iovecs.
 	 */
 	while (vec_idx < zfs_uio_iovcnt(uiop) && length > 0) {
 		cur_len = MIN(zfs_uio_iovlen(uiop, vec_idx) -
 		    offset, length);
 
 		int rv = (cipher)(ctx, zfs_uio_iovbase(uiop, vec_idx) + offset,
 		    cur_len, output);
 
 		if (rv != CRYPTO_SUCCESS) {
 			return (rv);
 		}
 		length -= cur_len;
 		vec_idx++;
 		offset = 0;
 	}
 
 	if (vec_idx == zfs_uio_iovcnt(uiop) && length > 0) {
 		/*
 		 * The end of the specified iovec's was reached but
 		 * the length requested could not be processed, i.e.
 		 * The caller requested to digest more data than it provided.
 		 */
 
 		return (CRYPTO_DATA_LEN_RANGE);
 	}
 
 	return (CRYPTO_SUCCESS);
 }
diff --git a/module/icp/core/kcf_sched.c b/module/icp/core/kcf_sched.c
index 81fd15f8ea26..e4ccdbde9fb4 100644
--- a/module/icp/core/kcf_sched.c
+++ b/module/icp/core/kcf_sched.c
@@ -1,1780 +1,1780 @@
 /*
  * CDDL HEADER START
  *
  * The contents of this file are subject to the terms of the
  * Common Development and Distribution License (the "License").
  * You may not use this file except in compliance with the License.
  *
  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
  * or http://www.opensolaris.org/os/licensing.
  * See the License for the specific language governing permissions
  * and limitations under the License.
  *
  * When distributing Covered Code, include this CDDL HEADER in each
  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  * If applicable, add the following below this CDDL HEADER, with the
  * fields enclosed by brackets "[]" replaced with your own identifying
  * information: Portions Copyright [yyyy] [name of copyright owner]
  *
  * CDDL HEADER END
  */
 /*
  * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
  * Use is subject to license terms.
  */
 
 /*
  * This file contains the core framework routines for the
  * kernel cryptographic framework. These routines are at the
  * layer, between the kernel API/ioctls and the SPI.
  */
 
 #include <sys/zfs_context.h>
 #include <sys/crypto/common.h>
 #include <sys/crypto/impl.h>
 #include <sys/crypto/sched_impl.h>
 #include <sys/crypto/api.h>
 
 kcf_global_swq_t *gswq;	/* Global software queue */
 
 /* Thread pool related variables */
 static kcf_pool_t *kcfpool;	/* Thread pool of kcfd LWPs */
 int kcf_maxthreads = 2;
 int kcf_minthreads = 1;
 int kcf_thr_multiple = 2;	/* Boot-time tunable for experimentation */
 static ulong_t	kcf_idlethr_timeout;
 #define	KCF_DEFAULT_THRTIMEOUT	60000000	/* 60 seconds */
 
 /* kmem caches used by the scheduler */
 static kmem_cache_t *kcf_sreq_cache;
 static kmem_cache_t *kcf_areq_cache;
 static kmem_cache_t *kcf_context_cache;
 
 /* Global request ID table */
 static kcf_reqid_table_t *kcf_reqid_table[REQID_TABLES];
 
 /* KCF stats. Not protected. */
 static kcf_stats_t kcf_ksdata = {
 	{ "total threads in pool",	KSTAT_DATA_UINT32},
 	{ "idle threads in pool",	KSTAT_DATA_UINT32},
 	{ "min threads in pool",	KSTAT_DATA_UINT32},
 	{ "max threads in pool",	KSTAT_DATA_UINT32},
 	{ "requests in gswq",		KSTAT_DATA_UINT32},
 	{ "max requests in gswq",	KSTAT_DATA_UINT32},
 	{ "threads for HW taskq",	KSTAT_DATA_UINT32},
 	{ "minalloc for HW taskq",	KSTAT_DATA_UINT32},
 	{ "maxalloc for HW taskq",	KSTAT_DATA_UINT32}
 };
 
 static kstat_t *kcf_misc_kstat = NULL;
 ulong_t kcf_swprov_hndl = 0;
 
 static kcf_areq_node_t *kcf_areqnode_alloc(kcf_provider_desc_t *,
     kcf_context_t *, crypto_call_req_t *, kcf_req_params_t *, boolean_t);
 static int kcf_disp_sw_request(kcf_areq_node_t *);
 static void process_req_hwp(void *);
 static int kcf_enqueue(kcf_areq_node_t *);
 static void kcfpool_alloc(void);
 static void kcf_reqid_delete(kcf_areq_node_t *areq);
 static crypto_req_id_t kcf_reqid_insert(kcf_areq_node_t *areq);
 static int kcf_misc_kstat_update(kstat_t *ksp, int rw);
 
 /*
  * Create a new context.
  */
 crypto_ctx_t *
 kcf_new_ctx(crypto_call_req_t *crq, kcf_provider_desc_t *pd,
     crypto_session_id_t sid)
 {
 	crypto_ctx_t *ctx;
 	kcf_context_t *kcf_ctx;
 
 	kcf_ctx = kmem_cache_alloc(kcf_context_cache,
 	    (crq == NULL) ? KM_SLEEP : KM_NOSLEEP);
 	if (kcf_ctx == NULL)
 		return (NULL);
 
 	/* initialize the context for the consumer */
 	kcf_ctx->kc_refcnt = 1;
 	kcf_ctx->kc_req_chain_first = NULL;
 	kcf_ctx->kc_req_chain_last = NULL;
 	kcf_ctx->kc_secondctx = NULL;
 	KCF_PROV_REFHOLD(pd);
 	kcf_ctx->kc_prov_desc = pd;
 	kcf_ctx->kc_sw_prov_desc = NULL;
 	kcf_ctx->kc_mech = NULL;
 
 	ctx = &kcf_ctx->kc_glbl_ctx;
 	ctx->cc_provider = pd->pd_prov_handle;
 	ctx->cc_session = sid;
 	ctx->cc_provider_private = NULL;
 	ctx->cc_framework_private = (void *)kcf_ctx;
 	ctx->cc_flags = 0;
 	ctx->cc_opstate = NULL;
 
 	return (ctx);
 }
 
 /*
  * Allocate a new async request node.
  *
  * ictx - Framework private context pointer
  * crq - Has callback function and argument. Should be non NULL.
  * req - The parameters to pass to the SPI
  */
 static kcf_areq_node_t *
 kcf_areqnode_alloc(kcf_provider_desc_t *pd, kcf_context_t *ictx,
     crypto_call_req_t *crq, kcf_req_params_t *req, boolean_t isdual)
 {
 	kcf_areq_node_t	*arptr, *areq;
 
 	ASSERT(crq != NULL);
 	arptr = kmem_cache_alloc(kcf_areq_cache, KM_NOSLEEP);
 	if (arptr == NULL)
 		return (NULL);
 
 	arptr->an_state = REQ_ALLOCATED;
 	arptr->an_reqarg = *crq;
 	arptr->an_params = *req;
 	arptr->an_context = ictx;
 	arptr->an_isdual = isdual;
 
 	arptr->an_next = arptr->an_prev = NULL;
 	KCF_PROV_REFHOLD(pd);
 	arptr->an_provider = pd;
 	arptr->an_tried_plist = NULL;
 	arptr->an_refcnt = 1;
 	arptr->an_idnext = arptr->an_idprev = NULL;
 
 	/*
 	 * Requests for context-less operations do not use the
 	 * fields - an_is_my_turn, and an_ctxchain_next.
 	 */
 	if (ictx == NULL)
 		return (arptr);
 
 	KCF_CONTEXT_REFHOLD(ictx);
 	/*
 	 * Chain this request to the context.
 	 */
 	mutex_enter(&ictx->kc_in_use_lock);
 	arptr->an_ctxchain_next = NULL;
 	if ((areq = ictx->kc_req_chain_last) == NULL) {
 		arptr->an_is_my_turn = B_TRUE;
 		ictx->kc_req_chain_last =
 		    ictx->kc_req_chain_first = arptr;
 	} else {
 		ASSERT(ictx->kc_req_chain_first != NULL);
 		arptr->an_is_my_turn = B_FALSE;
 		/* Insert the new request to the end of the chain. */
 		areq->an_ctxchain_next = arptr;
 		ictx->kc_req_chain_last = arptr;
 	}
 	mutex_exit(&ictx->kc_in_use_lock);
 
 	return (arptr);
 }
 
 /*
  * Queue the request node and do one of the following:
  *	- If there is an idle thread signal it to run.
  *	- If there is no idle thread and max running threads is not
  *	  reached, signal the creator thread for more threads.
  *
  * If the two conditions above are not met, we don't need to do
  * anything. The request will be picked up by one of the
  * worker threads when it becomes available.
  */
 static int
 kcf_disp_sw_request(kcf_areq_node_t *areq)
 {
 	int err;
 	int cnt = 0;
 
 	if ((err = kcf_enqueue(areq)) != 0)
 		return (err);
 
 	if (kcfpool->kp_idlethreads > 0) {
 		/* Signal an idle thread to run */
 		mutex_enter(&gswq->gs_lock);
 		cv_signal(&gswq->gs_cv);
 		mutex_exit(&gswq->gs_lock);
 
 		return (CRYPTO_QUEUED);
 	}
 
 	/*
 	 * We keep the number of running threads to be at
 	 * kcf_minthreads to reduce gs_lock contention.
 	 */
 	cnt = kcf_minthreads -
 	    (kcfpool->kp_threads - kcfpool->kp_blockedthreads);
 	if (cnt > 0) {
 		/*
 		 * The following ensures the number of threads in pool
 		 * does not exceed kcf_maxthreads.
 		 */
 		cnt = MIN(cnt, kcf_maxthreads - (int)kcfpool->kp_threads);
 		if (cnt > 0) {
 			/* Signal the creator thread for more threads */
 			mutex_enter(&kcfpool->kp_user_lock);
 			if (!kcfpool->kp_signal_create_thread) {
 				kcfpool->kp_signal_create_thread = B_TRUE;
 				kcfpool->kp_nthrs = cnt;
 				cv_signal(&kcfpool->kp_user_cv);
 			}
 			mutex_exit(&kcfpool->kp_user_lock);
 		}
 	}
 
 	return (CRYPTO_QUEUED);
 }
 
 /*
  * This routine is called by the taskq associated with
  * each hardware provider. We notify the kernel consumer
  * via the callback routine in case of CRYPTO_SUCCESS or
  * a failure.
  *
  * A request can be of type kcf_areq_node_t or of type
  * kcf_sreq_node_t.
  */
 static void
 process_req_hwp(void *ireq)
 {
 	int error = 0;
 	crypto_ctx_t *ctx;
 	kcf_call_type_t ctype;
 	kcf_provider_desc_t *pd;
 	kcf_areq_node_t *areq = (kcf_areq_node_t *)ireq;
 	kcf_sreq_node_t *sreq = (kcf_sreq_node_t *)ireq;
 
 	pd = ((ctype = GET_REQ_TYPE(ireq)) == CRYPTO_SYNCH) ?
 	    sreq->sn_provider : areq->an_provider;
 
 	/*
 	 * Wait if flow control is in effect for the provider. A
 	 * CRYPTO_PROVIDER_READY or CRYPTO_PROVIDER_FAILED
 	 * notification will signal us. We also get signaled if
 	 * the provider is unregistering.
 	 */
 	if (pd->pd_state == KCF_PROV_BUSY) {
 		mutex_enter(&pd->pd_lock);
 		while (pd->pd_state == KCF_PROV_BUSY)
 			cv_wait(&pd->pd_resume_cv, &pd->pd_lock);
 		mutex_exit(&pd->pd_lock);
 	}
 
 	/*
 	 * Bump the internal reference count while the request is being
 	 * processed. This is how we know when it's safe to unregister
 	 * a provider. This step must precede the pd_state check below.
 	 */
 	KCF_PROV_IREFHOLD(pd);
 
 	/*
 	 * Fail the request if the provider has failed. We return a
 	 * recoverable error and the notified clients attempt any
 	 * recovery. For async clients this is done in kcf_aop_done()
 	 * and for sync clients it is done in the k-api routines.
 	 */
 	if (pd->pd_state >= KCF_PROV_FAILED) {
 		error = CRYPTO_DEVICE_ERROR;
 		goto bail;
 	}
 
 	if (ctype == CRYPTO_SYNCH) {
 		mutex_enter(&sreq->sn_lock);
 		sreq->sn_state = REQ_INPROGRESS;
 		mutex_exit(&sreq->sn_lock);
 
 		ctx = sreq->sn_context ? &sreq->sn_context->kc_glbl_ctx : NULL;
 		error = common_submit_request(sreq->sn_provider, ctx,
 		    sreq->sn_params, sreq);
 	} else {
 		kcf_context_t *ictx;
 		ASSERT(ctype == CRYPTO_ASYNCH);
 
 		/*
 		 * We are in the per-hardware provider thread context and
 		 * hence can sleep. Note that the caller would have done
 		 * a taskq_dispatch(..., TQ_NOSLEEP) and would have returned.
 		 */
 		ctx = (ictx = areq->an_context) ? &ictx->kc_glbl_ctx : NULL;
 
 		mutex_enter(&areq->an_lock);
 		/*
 		 * We need to maintain ordering for multi-part requests.
 		 * an_is_my_turn is set to B_TRUE initially for a request
 		 * when it is enqueued and there are no other requests
 		 * for that context. It is set later from kcf_aop_done() when
 		 * the request before us in the chain of requests for the
 		 * context completes. We get signaled at that point.
 		 */
 		if (ictx != NULL) {
 			ASSERT(ictx->kc_prov_desc == areq->an_provider);
 
 			while (areq->an_is_my_turn == B_FALSE) {
 				cv_wait(&areq->an_turn_cv, &areq->an_lock);
 			}
 		}
 		areq->an_state = REQ_INPROGRESS;
 		mutex_exit(&areq->an_lock);
 
 		error = common_submit_request(areq->an_provider, ctx,
 		    &areq->an_params, areq);
 	}
 
 bail:
 	if (error == CRYPTO_QUEUED) {
 		/*
 		 * The request is queued by the provider and we should
 		 * get a crypto_op_notification() from the provider later.
 		 * We notify the consumer at that time.
 		 */
 		return;
 	} else {		/* CRYPTO_SUCCESS or other failure */
 		KCF_PROV_IREFRELE(pd);
 		if (ctype == CRYPTO_SYNCH)
 			kcf_sop_done(sreq, error);
 		else
 			kcf_aop_done(areq, error);
 	}
 }
 
 /*
  * This routine checks if a request can be retried on another
  * provider. If true, mech1 is initialized to point to the mechanism
  * structure. mech2 is also initialized in case of a dual operation. fg
  * is initialized to the correct crypto_func_group_t bit flag. They are
  * initialized by this routine, so that the caller can pass them to a
  * kcf_get_mech_provider() or kcf_get_dual_provider() with no further change.
  *
  * We check that the request is for a init or atomic routine and that
  * it is for one of the operation groups used from k-api .
  */
 static boolean_t
 can_resubmit(kcf_areq_node_t *areq, crypto_mechanism_t **mech1,
     crypto_mechanism_t **mech2, crypto_func_group_t *fg)
 {
 	kcf_req_params_t *params;
 	kcf_op_type_t optype;
 
 	params = &areq->an_params;
 	optype = params->rp_optype;
 
 	if (!(IS_INIT_OP(optype) || IS_ATOMIC_OP(optype)))
 		return (B_FALSE);
 
 	switch (params->rp_opgrp) {
 	case KCF_OG_DIGEST: {
 		kcf_digest_ops_params_t *dops = &params->rp_u.digest_params;
 
 		dops->do_mech.cm_type = dops->do_framework_mechtype;
 		*mech1 = &dops->do_mech;
 		*fg = (optype == KCF_OP_INIT) ? CRYPTO_FG_DIGEST :
 		    CRYPTO_FG_DIGEST_ATOMIC;
 		break;
 	}
 
 	case KCF_OG_MAC: {
 		kcf_mac_ops_params_t *mops = &params->rp_u.mac_params;
 
 		mops->mo_mech.cm_type = mops->mo_framework_mechtype;
 		*mech1 = &mops->mo_mech;
 		*fg = (optype == KCF_OP_INIT) ? CRYPTO_FG_MAC :
 		    CRYPTO_FG_MAC_ATOMIC;
 		break;
 	}
 
 	case KCF_OG_SIGN: {
 		kcf_sign_ops_params_t *sops = &params->rp_u.sign_params;
 
 		sops->so_mech.cm_type = sops->so_framework_mechtype;
 		*mech1 = &sops->so_mech;
 		switch (optype) {
 		case KCF_OP_INIT:
 			*fg = CRYPTO_FG_SIGN;
 			break;
 		case KCF_OP_ATOMIC:
 			*fg = CRYPTO_FG_SIGN_ATOMIC;
 			break;
 		default:
 			ASSERT(optype == KCF_OP_SIGN_RECOVER_ATOMIC);
 			*fg = CRYPTO_FG_SIGN_RECOVER_ATOMIC;
 		}
 		break;
 	}
 
 	case KCF_OG_VERIFY: {
 		kcf_verify_ops_params_t *vops = &params->rp_u.verify_params;
 
 		vops->vo_mech.cm_type = vops->vo_framework_mechtype;
 		*mech1 = &vops->vo_mech;
 		switch (optype) {
 		case KCF_OP_INIT:
 			*fg = CRYPTO_FG_VERIFY;
 			break;
 		case KCF_OP_ATOMIC:
 			*fg = CRYPTO_FG_VERIFY_ATOMIC;
 			break;
 		default:
 			ASSERT(optype == KCF_OP_VERIFY_RECOVER_ATOMIC);
 			*fg = CRYPTO_FG_VERIFY_RECOVER_ATOMIC;
 		}
 		break;
 	}
 
 	case KCF_OG_ENCRYPT: {
 		kcf_encrypt_ops_params_t *eops = &params->rp_u.encrypt_params;
 
 		eops->eo_mech.cm_type = eops->eo_framework_mechtype;
 		*mech1 = &eops->eo_mech;
 		*fg = (optype == KCF_OP_INIT) ? CRYPTO_FG_ENCRYPT :
 		    CRYPTO_FG_ENCRYPT_ATOMIC;
 		break;
 	}
 
 	case KCF_OG_DECRYPT: {
 		kcf_decrypt_ops_params_t *dcrops = &params->rp_u.decrypt_params;
 
 		dcrops->dop_mech.cm_type = dcrops->dop_framework_mechtype;
 		*mech1 = &dcrops->dop_mech;
 		*fg = (optype == KCF_OP_INIT) ? CRYPTO_FG_DECRYPT :
 		    CRYPTO_FG_DECRYPT_ATOMIC;
 		break;
 	}
 
 	case KCF_OG_ENCRYPT_MAC: {
 		kcf_encrypt_mac_ops_params_t *eops =
 		    &params->rp_u.encrypt_mac_params;
 
 		eops->em_encr_mech.cm_type = eops->em_framework_encr_mechtype;
 		*mech1 = &eops->em_encr_mech;
 		eops->em_mac_mech.cm_type = eops->em_framework_mac_mechtype;
 		*mech2 = &eops->em_mac_mech;
 		*fg = (optype == KCF_OP_INIT) ? CRYPTO_FG_ENCRYPT_MAC :
 		    CRYPTO_FG_ENCRYPT_MAC_ATOMIC;
 		break;
 	}
 
 	case KCF_OG_MAC_DECRYPT: {
 		kcf_mac_decrypt_ops_params_t *dops =
 		    &params->rp_u.mac_decrypt_params;
 
 		dops->md_mac_mech.cm_type = dops->md_framework_mac_mechtype;
 		*mech1 = &dops->md_mac_mech;
 		dops->md_decr_mech.cm_type = dops->md_framework_decr_mechtype;
 		*mech2 = &dops->md_decr_mech;
 		*fg = (optype == KCF_OP_INIT) ? CRYPTO_FG_MAC_DECRYPT :
 		    CRYPTO_FG_MAC_DECRYPT_ATOMIC;
 		break;
 	}
 
 	default:
 		return (B_FALSE);
 	}
 
 	return (B_TRUE);
 }
 
 /*
  * This routine is called when a request to a provider has failed
  * with a recoverable error. This routine tries to find another provider
  * and dispatches the request to the new provider, if one is available.
  * We reuse the request structure.
  *
  * A return value of NULL from kcf_get_mech_provider() indicates
  * we have tried the last provider.
  */
 static int
 kcf_resubmit_request(kcf_areq_node_t *areq)
 {
 	int error = CRYPTO_FAILED;
 	kcf_context_t *ictx;
 	kcf_provider_desc_t *old_pd;
 	kcf_provider_desc_t *new_pd;
 	crypto_mechanism_t *mech1 = NULL, *mech2 = NULL;
 	crypto_mech_type_t prov_mt1, prov_mt2;
 	crypto_func_group_t fg = 0;
 
 	if (!can_resubmit(areq, &mech1, &mech2, &fg))
 		return (error);
 
 	old_pd = areq->an_provider;
 	/*
 	 * Add old_pd to the list of providers already tried. We release
 	 * the hold on old_pd (from the earlier kcf_get_mech_provider()) in
 	 * kcf_free_triedlist().
 	 */
 	if (kcf_insert_triedlist(&areq->an_tried_plist, old_pd,
 	    KM_NOSLEEP) == NULL)
 		return (error);
 
 	if (mech1 && !mech2) {
 		new_pd = kcf_get_mech_provider(mech1->cm_type, NULL, &error,
 		    areq->an_tried_plist, fg,
 		    (areq->an_reqarg.cr_flag & CRYPTO_RESTRICTED), 0);
 	} else {
 		ASSERT(mech1 != NULL && mech2 != NULL);
 
 		new_pd = kcf_get_dual_provider(mech1, mech2, NULL, &prov_mt1,
 		    &prov_mt2, &error, areq->an_tried_plist, fg, fg,
 		    (areq->an_reqarg.cr_flag & CRYPTO_RESTRICTED), 0);
 	}
 
 	if (new_pd == NULL)
 		return (error);
 
 	/*
 	 * We reuse the old context by resetting provider specific
 	 * fields in it.
 	 */
 	if ((ictx = areq->an_context) != NULL) {
 		crypto_ctx_t *ctx;
 
 		ASSERT(old_pd == ictx->kc_prov_desc);
 		KCF_PROV_REFRELE(ictx->kc_prov_desc);
 		KCF_PROV_REFHOLD(new_pd);
 		ictx->kc_prov_desc = new_pd;
 
 		ctx = &ictx->kc_glbl_ctx;
 		ctx->cc_provider = new_pd->pd_prov_handle;
 		ctx->cc_session = new_pd->pd_sid;
 		ctx->cc_provider_private = NULL;
 	}
 
 	/* We reuse areq. by resetting the provider and context fields. */
 	KCF_PROV_REFRELE(old_pd);
 	KCF_PROV_REFHOLD(new_pd);
 	areq->an_provider = new_pd;
 	mutex_enter(&areq->an_lock);
 	areq->an_state = REQ_WAITING;
 	mutex_exit(&areq->an_lock);
 
 	switch (new_pd->pd_prov_type) {
 	case CRYPTO_SW_PROVIDER:
 		error = kcf_disp_sw_request(areq);
 		break;
 
 	case CRYPTO_HW_PROVIDER: {
 		taskq_t *taskq = new_pd->pd_sched_info.ks_taskq;
 
 		if (taskq_dispatch(taskq, process_req_hwp, areq, TQ_NOSLEEP) ==
 		    TASKQID_INVALID) {
 			error = CRYPTO_HOST_MEMORY;
 		} else {
 			error = CRYPTO_QUEUED;
 		}
 
 		break;
 	default:
 		break;
 	}
 	}
 
 	return (error);
 }
 
 static inline int EMPTY_TASKQ(taskq_t *tq)
 {
 #ifdef _KERNEL
 	return (tq->tq_lowest_id == tq->tq_next_id);
 #else
 	return (tq->tq_task.tqent_next == &tq->tq_task || tq->tq_active == 0);
 #endif
 }
 
 /*
  * Routine called by both ioctl and k-api. The consumer should
  * bundle the parameters into a kcf_req_params_t structure. A bunch
  * of macros are available in ops_impl.h for this bundling. They are:
  *
  * 	KCF_WRAP_DIGEST_OPS_PARAMS()
  *	KCF_WRAP_MAC_OPS_PARAMS()
  *	KCF_WRAP_ENCRYPT_OPS_PARAMS()
  *	KCF_WRAP_DECRYPT_OPS_PARAMS() ... etc.
  *
  * It is the caller's responsibility to free the ctx argument when
  * appropriate. See the KCF_CONTEXT_COND_RELEASE macro for details.
  */
 int
 kcf_submit_request(kcf_provider_desc_t *pd, crypto_ctx_t *ctx,
     crypto_call_req_t *crq, kcf_req_params_t *params, boolean_t cont)
 {
 	int error = CRYPTO_SUCCESS;
 	kcf_areq_node_t *areq;
 	kcf_sreq_node_t *sreq;
 	kcf_context_t *kcf_ctx;
 	taskq_t *taskq = pd->pd_sched_info.ks_taskq;
 
 	kcf_ctx = ctx ? (kcf_context_t *)ctx->cc_framework_private : NULL;
 
 	/* Synchronous cases */
 	if (crq == NULL) {
 		switch (pd->pd_prov_type) {
 		case CRYPTO_SW_PROVIDER:
 			error = common_submit_request(pd, ctx, params,
 			    KCF_RHNDL(KM_SLEEP));
 			break;
 
 		case CRYPTO_HW_PROVIDER:
 			/*
 			 * Special case for CRYPTO_SYNCHRONOUS providers that
 			 * never return a CRYPTO_QUEUED error. We skip any
 			 * request allocation and call the SPI directly.
 			 */
 			if ((pd->pd_flags & CRYPTO_SYNCHRONOUS) &&
 			    EMPTY_TASKQ(taskq)) {
 				KCF_PROV_IREFHOLD(pd);
 				if (pd->pd_state == KCF_PROV_READY) {
 					error = common_submit_request(pd, ctx,
 					    params, KCF_RHNDL(KM_SLEEP));
 					KCF_PROV_IREFRELE(pd);
 					ASSERT(error != CRYPTO_QUEUED);
 					break;
 				}
 				KCF_PROV_IREFRELE(pd);
 			}
 
 			sreq = kmem_cache_alloc(kcf_sreq_cache, KM_SLEEP);
 			sreq->sn_state = REQ_ALLOCATED;
 			sreq->sn_rv = CRYPTO_FAILED;
 			sreq->sn_params = params;
 
 			/*
 			 * Note that we do not need to hold the context
 			 * for synchronous case as the context will never
 			 * become invalid underneath us. We do not need to hold
 			 * the provider here either as the caller has a hold.
 			 */
 			sreq->sn_context = kcf_ctx;
 			ASSERT(KCF_PROV_REFHELD(pd));
 			sreq->sn_provider = pd;
 
 			ASSERT(taskq != NULL);
 			/*
 			 * Call the SPI directly if the taskq is empty and the
 			 * provider is not busy, else dispatch to the taskq.
 			 * Calling directly is fine as this is the synchronous
 			 * case. This is unlike the asynchronous case where we
 			 * must always dispatch to the taskq.
 			 */
 			if (EMPTY_TASKQ(taskq) &&
 			    pd->pd_state == KCF_PROV_READY) {
 				process_req_hwp(sreq);
 			} else {
 				/*
 				 * We can not tell from taskq_dispatch() return
 				 * value if we exceeded maxalloc. Hence the
 				 * check here. Since we are allowed to wait in
 				 * the synchronous case, we wait for the taskq
 				 * to become empty.
 				 */
 				if (taskq->tq_nalloc >= crypto_taskq_maxalloc) {
 					taskq_wait(taskq);
 				}
 
 				(void) taskq_dispatch(taskq, process_req_hwp,
 				    sreq, TQ_SLEEP);
 			}
 
 			/*
 			 * Wait for the notification to arrive,
 			 * if the operation is not done yet.
 			 * Bug# 4722589 will make the wait a cv_wait_sig().
 			 */
 			mutex_enter(&sreq->sn_lock);
 			while (sreq->sn_state < REQ_DONE)
 				cv_wait(&sreq->sn_cv, &sreq->sn_lock);
 			mutex_exit(&sreq->sn_lock);
 
 			error = sreq->sn_rv;
 			kmem_cache_free(kcf_sreq_cache, sreq);
 
 			break;
 
 		default:
 			error = CRYPTO_FAILED;
 			break;
 		}
 
 	} else {	/* Asynchronous cases */
 		switch (pd->pd_prov_type) {
 		case CRYPTO_SW_PROVIDER:
 			if (!(crq->cr_flag & CRYPTO_ALWAYS_QUEUE)) {
 				/*
 				 * This case has less overhead since there is
 				 * no switching of context.
 				 */
 				error = common_submit_request(pd, ctx, params,
 				    KCF_RHNDL(KM_NOSLEEP));
 			} else {
 				/*
 				 * CRYPTO_ALWAYS_QUEUE is set. We need to
 				 * queue the request and return.
 				 */
 				areq = kcf_areqnode_alloc(pd, kcf_ctx, crq,
 				    params, cont);
 				if (areq == NULL)
 					error = CRYPTO_HOST_MEMORY;
 				else {
 					if (!(crq->cr_flag
 					    & CRYPTO_SKIP_REQID)) {
 					/*
 					 * Set the request handle. This handle
 					 * is used for any crypto_cancel_req(9f)
 					 * calls from the consumer. We have to
 					 * do this before dispatching the
 					 * request.
 					 */
 					crq->cr_reqid = kcf_reqid_insert(areq);
 					}
 
 					error = kcf_disp_sw_request(areq);
 					/*
 					 * There is an error processing this
 					 * request. Remove the handle and
 					 * release the request structure.
 					 */
 					if (error != CRYPTO_QUEUED) {
 						if (!(crq->cr_flag
 						    & CRYPTO_SKIP_REQID))
 							kcf_reqid_delete(areq);
 						KCF_AREQ_REFRELE(areq);
 					}
 				}
 			}
 			break;
 
 		case CRYPTO_HW_PROVIDER:
 			/*
 			 * We need to queue the request and return.
 			 */
 			areq = kcf_areqnode_alloc(pd, kcf_ctx, crq, params,
 			    cont);
 			if (areq == NULL) {
 				error = CRYPTO_HOST_MEMORY;
 				goto done;
 			}
 
 			ASSERT(taskq != NULL);
 			/*
 			 * We can not tell from taskq_dispatch() return
 			 * value if we exceeded maxalloc. Hence the check
 			 * here.
 			 */
 			if (taskq->tq_nalloc >= crypto_taskq_maxalloc) {
 				error = CRYPTO_BUSY;
 				KCF_AREQ_REFRELE(areq);
 				goto done;
 			}
 
 			if (!(crq->cr_flag & CRYPTO_SKIP_REQID)) {
 			/*
 			 * Set the request handle. This handle is used
 			 * for any crypto_cancel_req(9f) calls from the
 			 * consumer. We have to do this before dispatching
 			 * the request.
 			 */
 			crq->cr_reqid = kcf_reqid_insert(areq);
 			}
 
 			if (taskq_dispatch(taskq,
 			    process_req_hwp, areq, TQ_NOSLEEP) ==
 			    TASKQID_INVALID) {
 				error = CRYPTO_HOST_MEMORY;
 				if (!(crq->cr_flag & CRYPTO_SKIP_REQID))
 					kcf_reqid_delete(areq);
 				KCF_AREQ_REFRELE(areq);
 			} else {
 				error = CRYPTO_QUEUED;
 			}
 			break;
 
 		default:
 			error = CRYPTO_FAILED;
 			break;
 		}
 	}
 
 done:
 	return (error);
 }
 
 /*
  * We're done with this framework context, so free it. Note that freeing
  * framework context (kcf_context) frees the global context (crypto_ctx).
  *
  * The provider is responsible for freeing provider private context after a
  * final or single operation and resetting the cc_provider_private field
  * to NULL. It should do this before it notifies the framework of the
  * completion. We still need to call KCF_PROV_FREE_CONTEXT to handle cases
  * like crypto_cancel_ctx(9f).
  */
 void
 kcf_free_context(kcf_context_t *kcf_ctx)
 {
 	kcf_provider_desc_t *pd = kcf_ctx->kc_prov_desc;
 	crypto_ctx_t *gctx = &kcf_ctx->kc_glbl_ctx;
 	kcf_context_t *kcf_secondctx = kcf_ctx->kc_secondctx;
 
 	/* Release the second context, if any */
 
 	if (kcf_secondctx != NULL)
 		KCF_CONTEXT_REFRELE(kcf_secondctx);
 
 	if (gctx->cc_provider_private != NULL) {
 		mutex_enter(&pd->pd_lock);
 		if (!KCF_IS_PROV_REMOVED(pd)) {
 			/*
 			 * Increment the provider's internal refcnt so it
 			 * doesn't unregister from the framework while
 			 * we're calling the entry point.
 			 */
 			KCF_PROV_IREFHOLD(pd);
 			mutex_exit(&pd->pd_lock);
 			(void) KCF_PROV_FREE_CONTEXT(pd, gctx);
 			KCF_PROV_IREFRELE(pd);
 		} else {
 			mutex_exit(&pd->pd_lock);
 		}
 	}
 
 	/* kcf_ctx->kc_prov_desc has a hold on pd */
 	KCF_PROV_REFRELE(kcf_ctx->kc_prov_desc);
 
 	/* check if this context is shared with a software provider */
 	if ((gctx->cc_flags & CRYPTO_INIT_OPSTATE) &&
 	    kcf_ctx->kc_sw_prov_desc != NULL) {
 		KCF_PROV_REFRELE(kcf_ctx->kc_sw_prov_desc);
 	}
 
 	kmem_cache_free(kcf_context_cache, kcf_ctx);
 }
 
 /*
  * Free the request after releasing all the holds.
  */
 void
 kcf_free_req(kcf_areq_node_t *areq)
 {
 	KCF_PROV_REFRELE(areq->an_provider);
 	if (areq->an_context != NULL)
 		KCF_CONTEXT_REFRELE(areq->an_context);
 
 	if (areq->an_tried_plist != NULL)
 		kcf_free_triedlist(areq->an_tried_plist);
 	kmem_cache_free(kcf_areq_cache, areq);
 }
 
 /*
  * Utility routine to remove a request from the chain of requests
  * hanging off a context.
  */
 static void
 kcf_removereq_in_ctxchain(kcf_context_t *ictx, kcf_areq_node_t *areq)
 {
 	kcf_areq_node_t *cur, *prev;
 
 	/*
 	 * Get context lock, search for areq in the chain and remove it.
 	 */
 	ASSERT(ictx != NULL);
 	mutex_enter(&ictx->kc_in_use_lock);
 	prev = cur = ictx->kc_req_chain_first;
 
 	while (cur != NULL) {
 		if (cur == areq) {
 			if (prev == cur) {
 				if ((ictx->kc_req_chain_first =
 				    cur->an_ctxchain_next) == NULL)
 					ictx->kc_req_chain_last = NULL;
 			} else {
 				if (cur == ictx->kc_req_chain_last)
 					ictx->kc_req_chain_last = prev;
 				prev->an_ctxchain_next = cur->an_ctxchain_next;
 			}
 
 			break;
 		}
 		prev = cur;
 		cur = cur->an_ctxchain_next;
 	}
 	mutex_exit(&ictx->kc_in_use_lock);
 }
 
 /*
  * Remove the specified node from the global software queue.
  *
  * The caller must hold the queue lock and request lock (an_lock).
  */
 static void
 kcf_remove_node(kcf_areq_node_t *node)
 {
 	kcf_areq_node_t *nextp = node->an_next;
 	kcf_areq_node_t *prevp = node->an_prev;
 
 	if (nextp != NULL)
 		nextp->an_prev = prevp;
 	else
 		gswq->gs_last = prevp;
 
 	if (prevp != NULL)
 		prevp->an_next = nextp;
 	else
 		gswq->gs_first = nextp;
 
 	node->an_state = REQ_CANCELED;
 }
 
 /*
  * Add the request node to the end of the global software queue.
  *
  * The caller should not hold the queue lock. Returns 0 if the
  * request is successfully queued. Returns CRYPTO_BUSY if the limit
  * on the number of jobs is exceeded.
  */
 static int
 kcf_enqueue(kcf_areq_node_t *node)
 {
 	kcf_areq_node_t *tnode;
 
 	mutex_enter(&gswq->gs_lock);
 
 	if (gswq->gs_njobs >= gswq->gs_maxjobs) {
 		mutex_exit(&gswq->gs_lock);
 		return (CRYPTO_BUSY);
 	}
 
 	if (gswq->gs_last == NULL) {
 		gswq->gs_first = gswq->gs_last = node;
 	} else {
 		ASSERT(gswq->gs_last->an_next == NULL);
 		tnode = gswq->gs_last;
 		tnode->an_next = node;
 		gswq->gs_last = node;
 		node->an_prev = tnode;
 	}
 
 	gswq->gs_njobs++;
 
 	/* an_lock not needed here as we hold gs_lock */
 	node->an_state = REQ_WAITING;
 
 	mutex_exit(&gswq->gs_lock);
 
 	return (0);
 }
 
 /*
  * kmem_cache_alloc constructor for sync request structure.
  */
-/* ARGSUSED */
 static int
 kcf_sreq_cache_constructor(void *buf, void *cdrarg, int kmflags)
 {
+	(void) cdrarg, (void) kmflags;
 	kcf_sreq_node_t *sreq = (kcf_sreq_node_t *)buf;
 
 	sreq->sn_type = CRYPTO_SYNCH;
 	cv_init(&sreq->sn_cv, NULL, CV_DEFAULT, NULL);
 	mutex_init(&sreq->sn_lock, NULL, MUTEX_DEFAULT, NULL);
 
 	return (0);
 }
 
-/* ARGSUSED */
 static void
 kcf_sreq_cache_destructor(void *buf, void *cdrarg)
 {
+	(void) cdrarg;
 	kcf_sreq_node_t *sreq = (kcf_sreq_node_t *)buf;
 
 	mutex_destroy(&sreq->sn_lock);
 	cv_destroy(&sreq->sn_cv);
 }
 
 /*
  * kmem_cache_alloc constructor for async request structure.
  */
-/* ARGSUSED */
 static int
 kcf_areq_cache_constructor(void *buf, void *cdrarg, int kmflags)
 {
+	(void) cdrarg, (void) kmflags;
 	kcf_areq_node_t *areq = (kcf_areq_node_t *)buf;
 
 	areq->an_type = CRYPTO_ASYNCH;
 	areq->an_refcnt = 0;
 	mutex_init(&areq->an_lock, NULL, MUTEX_DEFAULT, NULL);
 	cv_init(&areq->an_done, NULL, CV_DEFAULT, NULL);
 	cv_init(&areq->an_turn_cv, NULL, CV_DEFAULT, NULL);
 
 	return (0);
 }
 
-/* ARGSUSED */
 static void
 kcf_areq_cache_destructor(void *buf, void *cdrarg)
 {
+	(void) cdrarg;
 	kcf_areq_node_t *areq = (kcf_areq_node_t *)buf;
 
 	ASSERT(areq->an_refcnt == 0);
 	mutex_destroy(&areq->an_lock);
 	cv_destroy(&areq->an_done);
 	cv_destroy(&areq->an_turn_cv);
 }
 
 /*
  * kmem_cache_alloc constructor for kcf_context structure.
  */
-/* ARGSUSED */
 static int
 kcf_context_cache_constructor(void *buf, void *cdrarg, int kmflags)
 {
+	(void) cdrarg, (void) kmflags;
 	kcf_context_t *kctx = (kcf_context_t *)buf;
 
 	kctx->kc_refcnt = 0;
 	mutex_init(&kctx->kc_in_use_lock, NULL, MUTEX_DEFAULT, NULL);
 
 	return (0);
 }
 
-/* ARGSUSED */
 static void
 kcf_context_cache_destructor(void *buf, void *cdrarg)
 {
+	(void) cdrarg;
 	kcf_context_t *kctx = (kcf_context_t *)buf;
 
 	ASSERT(kctx->kc_refcnt == 0);
 	mutex_destroy(&kctx->kc_in_use_lock);
 }
 
 void
 kcf_sched_destroy(void)
 {
 	int i;
 
 	if (kcf_misc_kstat)
 		kstat_delete(kcf_misc_kstat);
 
 	if (kcfpool) {
 		mutex_destroy(&kcfpool->kp_thread_lock);
 		cv_destroy(&kcfpool->kp_nothr_cv);
 		mutex_destroy(&kcfpool->kp_user_lock);
 		cv_destroy(&kcfpool->kp_user_cv);
 
 		kmem_free(kcfpool, sizeof (kcf_pool_t));
 	}
 
 	for (i = 0; i < REQID_TABLES; i++) {
 		if (kcf_reqid_table[i]) {
 			mutex_destroy(&(kcf_reqid_table[i]->rt_lock));
 			kmem_free(kcf_reqid_table[i],
 			    sizeof (kcf_reqid_table_t));
 		}
 	}
 
 	if (gswq) {
 		mutex_destroy(&gswq->gs_lock);
 		cv_destroy(&gswq->gs_cv);
 		kmem_free(gswq, sizeof (kcf_global_swq_t));
 	}
 
 	if (kcf_context_cache)
 		kmem_cache_destroy(kcf_context_cache);
 	if (kcf_areq_cache)
 		kmem_cache_destroy(kcf_areq_cache);
 	if (kcf_sreq_cache)
 		kmem_cache_destroy(kcf_sreq_cache);
 
 	mutex_destroy(&ntfy_list_lock);
 	cv_destroy(&ntfy_list_cv);
 }
 
 /*
  * Creates and initializes all the structures needed by the framework.
  */
 void
 kcf_sched_init(void)
 {
 	int i;
 	kcf_reqid_table_t *rt;
 
 	/*
 	 * Create all the kmem caches needed by the framework. We set the
 	 * align argument to 64, to get a slab aligned to 64-byte as well as
 	 * have the objects (cache_chunksize) to be a 64-byte multiple.
 	 * This helps to avoid false sharing as this is the size of the
 	 * CPU cache line.
 	 */
 	kcf_sreq_cache = kmem_cache_create("kcf_sreq_cache",
 	    sizeof (struct kcf_sreq_node), 64, kcf_sreq_cache_constructor,
 	    kcf_sreq_cache_destructor, NULL, NULL, NULL, 0);
 
 	kcf_areq_cache = kmem_cache_create("kcf_areq_cache",
 	    sizeof (struct kcf_areq_node), 64, kcf_areq_cache_constructor,
 	    kcf_areq_cache_destructor, NULL, NULL, NULL, 0);
 
 	kcf_context_cache = kmem_cache_create("kcf_context_cache",
 	    sizeof (struct kcf_context), 64, kcf_context_cache_constructor,
 	    kcf_context_cache_destructor, NULL, NULL, NULL, 0);
 
 	gswq = kmem_alloc(sizeof (kcf_global_swq_t), KM_SLEEP);
 
 	mutex_init(&gswq->gs_lock, NULL, MUTEX_DEFAULT, NULL);
 	cv_init(&gswq->gs_cv, NULL, CV_DEFAULT, NULL);
 	gswq->gs_njobs = 0;
 	gswq->gs_maxjobs = kcf_maxthreads * crypto_taskq_maxalloc;
 	gswq->gs_first = gswq->gs_last = NULL;
 
 	/* Initialize the global reqid table */
 	for (i = 0; i < REQID_TABLES; i++) {
 		rt = kmem_zalloc(sizeof (kcf_reqid_table_t), KM_SLEEP);
 		kcf_reqid_table[i] = rt;
 		mutex_init(&rt->rt_lock, NULL, MUTEX_DEFAULT, NULL);
 		rt->rt_curid = i;
 	}
 
 	/* Allocate and initialize the thread pool */
 	kcfpool_alloc();
 
 	/* Initialize the event notification list variables */
 	mutex_init(&ntfy_list_lock, NULL, MUTEX_DEFAULT, NULL);
 	cv_init(&ntfy_list_cv, NULL, CV_DEFAULT, NULL);
 
 	/* Create the kcf kstat */
 	kcf_misc_kstat = kstat_create("kcf", 0, "framework_stats", "crypto",
 	    KSTAT_TYPE_NAMED, sizeof (kcf_stats_t) / sizeof (kstat_named_t),
 	    KSTAT_FLAG_VIRTUAL);
 
 	if (kcf_misc_kstat != NULL) {
 		kcf_misc_kstat->ks_data = &kcf_ksdata;
 		kcf_misc_kstat->ks_update = kcf_misc_kstat_update;
 		kstat_install(kcf_misc_kstat);
 	}
 }
 
 /*
  * Signal the waiting sync client.
  */
 void
 kcf_sop_done(kcf_sreq_node_t *sreq, int error)
 {
 	mutex_enter(&sreq->sn_lock);
 	sreq->sn_state = REQ_DONE;
 	sreq->sn_rv = error;
 	cv_signal(&sreq->sn_cv);
 	mutex_exit(&sreq->sn_lock);
 }
 
 /*
  * Callback the async client with the operation status.
  * We free the async request node and possibly the context.
  * We also handle any chain of requests hanging off of
  * the context.
  */
 void
 kcf_aop_done(kcf_areq_node_t *areq, int error)
 {
 	kcf_op_type_t optype;
 	boolean_t skip_notify = B_FALSE;
 	kcf_context_t *ictx;
 	kcf_areq_node_t *nextreq;
 
 	/*
 	 * Handle recoverable errors. This has to be done first
 	 * before doing anything else in this routine so that
 	 * we do not change the state of the request.
 	 */
 	if (error != CRYPTO_SUCCESS && IS_RECOVERABLE(error)) {
 		/*
 		 * We try another provider, if one is available. Else
 		 * we continue with the failure notification to the
 		 * client.
 		 */
 		if (kcf_resubmit_request(areq) == CRYPTO_QUEUED)
 			return;
 	}
 
 	mutex_enter(&areq->an_lock);
 	areq->an_state = REQ_DONE;
 	mutex_exit(&areq->an_lock);
 
 	optype = (&areq->an_params)->rp_optype;
 	if ((ictx = areq->an_context) != NULL) {
 		/*
 		 * A request after it is removed from the request
 		 * queue, still stays on a chain of requests hanging
 		 * of its context structure. It needs to be removed
 		 * from this chain at this point.
 		 */
 		mutex_enter(&ictx->kc_in_use_lock);
 		nextreq = areq->an_ctxchain_next;
 		if (nextreq != NULL) {
 			mutex_enter(&nextreq->an_lock);
 			nextreq->an_is_my_turn = B_TRUE;
 			cv_signal(&nextreq->an_turn_cv);
 			mutex_exit(&nextreq->an_lock);
 		}
 
 		ictx->kc_req_chain_first = nextreq;
 		if (nextreq == NULL)
 			ictx->kc_req_chain_last = NULL;
 		mutex_exit(&ictx->kc_in_use_lock);
 
 		if (IS_SINGLE_OP(optype) || IS_FINAL_OP(optype)) {
 			ASSERT(nextreq == NULL);
 			KCF_CONTEXT_REFRELE(ictx);
 		} else if (error != CRYPTO_SUCCESS && IS_INIT_OP(optype)) {
 		/*
 		 * NOTE - We do not release the context in case of update
 		 * operations. We require the consumer to free it explicitly,
 		 * in case it wants to abandon an update operation. This is done
 		 * as there may be mechanisms in ECB mode that can continue
 		 * even if an operation on a block fails.
 		 */
 			KCF_CONTEXT_REFRELE(ictx);
 		}
 	}
 
 	/* Deal with the internal continuation to this request first */
 
 	if (areq->an_isdual) {
 		kcf_dual_req_t *next_arg;
 		next_arg = (kcf_dual_req_t *)areq->an_reqarg.cr_callback_arg;
 		next_arg->kr_areq = areq;
 		KCF_AREQ_REFHOLD(areq);
 		areq->an_isdual = B_FALSE;
 
 		NOTIFY_CLIENT(areq, error);
 		return;
 	}
 
 	/*
 	 * If CRYPTO_NOTIFY_OPDONE flag is set, we should notify
 	 * always. If this flag is clear, we skip the notification
 	 * provided there are no errors.  We check this flag for only
 	 * init or update operations. It is ignored for single, final or
 	 * atomic operations.
 	 */
 	skip_notify = (IS_UPDATE_OP(optype) || IS_INIT_OP(optype)) &&
 	    (!(areq->an_reqarg.cr_flag & CRYPTO_NOTIFY_OPDONE)) &&
 	    (error == CRYPTO_SUCCESS);
 
 	if (!skip_notify) {
 		NOTIFY_CLIENT(areq, error);
 	}
 
 	if (!(areq->an_reqarg.cr_flag & CRYPTO_SKIP_REQID))
 		kcf_reqid_delete(areq);
 
 	KCF_AREQ_REFRELE(areq);
 }
 
 /*
  * Allocate the thread pool and initialize all the fields.
  */
 static void
 kcfpool_alloc()
 {
 	kcfpool = kmem_alloc(sizeof (kcf_pool_t), KM_SLEEP);
 
 	kcfpool->kp_threads = kcfpool->kp_idlethreads = 0;
 	kcfpool->kp_blockedthreads = 0;
 	kcfpool->kp_signal_create_thread = B_FALSE;
 	kcfpool->kp_nthrs = 0;
 	kcfpool->kp_user_waiting = B_FALSE;
 
 	mutex_init(&kcfpool->kp_thread_lock, NULL, MUTEX_DEFAULT, NULL);
 	cv_init(&kcfpool->kp_nothr_cv, NULL, CV_DEFAULT, NULL);
 
 	mutex_init(&kcfpool->kp_user_lock, NULL, MUTEX_DEFAULT, NULL);
 	cv_init(&kcfpool->kp_user_cv, NULL, CV_DEFAULT, NULL);
 
 	kcf_idlethr_timeout = KCF_DEFAULT_THRTIMEOUT;
 }
 
 /*
  * Insert the async request in the hash table after assigning it
  * an ID. Returns the ID.
  *
  * The ID is used by the caller to pass as an argument to a
  * cancel_req() routine later.
  */
 static crypto_req_id_t
 kcf_reqid_insert(kcf_areq_node_t *areq)
 {
 	int indx;
 	crypto_req_id_t id;
 	kcf_areq_node_t *headp;
 	kcf_reqid_table_t *rt;
 
 	rt = kcf_reqid_table[CPU_SEQID_UNSTABLE & REQID_TABLE_MASK];
 
 	mutex_enter(&rt->rt_lock);
 
 	rt->rt_curid = id =
 	    (rt->rt_curid - REQID_COUNTER_LOW) | REQID_COUNTER_HIGH;
 	SET_REQID(areq, id);
 	indx = REQID_HASH(id);
 	headp = areq->an_idnext = rt->rt_idhash[indx];
 	areq->an_idprev = NULL;
 	if (headp != NULL)
 		headp->an_idprev = areq;
 
 	rt->rt_idhash[indx] = areq;
 	mutex_exit(&rt->rt_lock);
 
 	return (id);
 }
 
 /*
  * Delete the async request from the hash table.
  */
 static void
 kcf_reqid_delete(kcf_areq_node_t *areq)
 {
 	int indx;
 	kcf_areq_node_t *nextp, *prevp;
 	crypto_req_id_t id = GET_REQID(areq);
 	kcf_reqid_table_t *rt;
 
 	rt = kcf_reqid_table[id & REQID_TABLE_MASK];
 	indx = REQID_HASH(id);
 
 	mutex_enter(&rt->rt_lock);
 
 	nextp = areq->an_idnext;
 	prevp = areq->an_idprev;
 	if (nextp != NULL)
 		nextp->an_idprev = prevp;
 	if (prevp != NULL)
 		prevp->an_idnext = nextp;
 	else
 		rt->rt_idhash[indx] = nextp;
 
 	SET_REQID(areq, 0);
 	cv_broadcast(&areq->an_done);
 
 	mutex_exit(&rt->rt_lock);
 }
 
 /*
  * Cancel a single asynchronous request.
  *
  * We guarantee that no problems will result from calling
  * crypto_cancel_req() for a request which is either running, or
  * has already completed. We remove the request from any queues
  * if it is possible. We wait for request completion if the
  * request is dispatched to a provider.
  *
  * Calling context:
  * 	Can be called from user context only.
  *
  * NOTE: We acquire the following locks in this routine (in order):
  *	- rt_lock (kcf_reqid_table_t)
  *	- gswq->gs_lock
  *	- areq->an_lock
  *	- ictx->kc_in_use_lock (from kcf_removereq_in_ctxchain())
  *
  * This locking order MUST be maintained in code every where else.
  */
 void
 crypto_cancel_req(crypto_req_id_t id)
 {
 	int indx;
 	kcf_areq_node_t *areq;
 	kcf_provider_desc_t *pd;
 	kcf_context_t *ictx;
 	kcf_reqid_table_t *rt;
 
 	rt = kcf_reqid_table[id & REQID_TABLE_MASK];
 	indx = REQID_HASH(id);
 
 	mutex_enter(&rt->rt_lock);
 	for (areq = rt->rt_idhash[indx]; areq; areq = areq->an_idnext) {
 	if (GET_REQID(areq) == id) {
 		/*
 		 * We found the request. It is either still waiting
 		 * in the framework queues or running at the provider.
 		 */
 		pd = areq->an_provider;
 		ASSERT(pd != NULL);
 
 		switch (pd->pd_prov_type) {
 		case CRYPTO_SW_PROVIDER:
 			mutex_enter(&gswq->gs_lock);
 			mutex_enter(&areq->an_lock);
 
 			/* This request can be safely canceled. */
 			if (areq->an_state <= REQ_WAITING) {
 				/* Remove from gswq, global software queue. */
 				kcf_remove_node(areq);
 				if ((ictx = areq->an_context) != NULL)
 					kcf_removereq_in_ctxchain(ictx, areq);
 
 				mutex_exit(&areq->an_lock);
 				mutex_exit(&gswq->gs_lock);
 				mutex_exit(&rt->rt_lock);
 
 				/* Remove areq from hash table and free it. */
 				kcf_reqid_delete(areq);
 				KCF_AREQ_REFRELE(areq);
 				return;
 			}
 
 			mutex_exit(&areq->an_lock);
 			mutex_exit(&gswq->gs_lock);
 			break;
 
 		case CRYPTO_HW_PROVIDER:
 			/*
 			 * There is no interface to remove an entry
 			 * once it is on the taskq. So, we do not do
 			 * anything for a hardware provider.
 			 */
 			break;
 		default:
 			break;
 		}
 
 		/*
 		 * The request is running. Wait for the request completion
 		 * to notify us.
 		 */
 		KCF_AREQ_REFHOLD(areq);
 		while (GET_REQID(areq) == id)
 			cv_wait(&areq->an_done, &rt->rt_lock);
 		KCF_AREQ_REFRELE(areq);
 		break;
 	}
 	}
 
 	mutex_exit(&rt->rt_lock);
 }
 
 /*
  * Cancel all asynchronous requests associated with the
  * passed in crypto context and free it.
  *
  * A client SHOULD NOT call this routine after calling a crypto_*_final
  * routine. This routine is called only during intermediate operations.
  * The client should not use the crypto context after this function returns
  * since we destroy it.
  *
  * Calling context:
  * 	Can be called from user context only.
  */
 void
 crypto_cancel_ctx(crypto_context_t ctx)
 {
 	kcf_context_t *ictx;
 	kcf_areq_node_t *areq;
 
 	if (ctx == NULL)
 		return;
 
 	ictx = (kcf_context_t *)((crypto_ctx_t *)ctx)->cc_framework_private;
 
 	mutex_enter(&ictx->kc_in_use_lock);
 
 	/* Walk the chain and cancel each request */
 	while ((areq = ictx->kc_req_chain_first) != NULL) {
 		/*
 		 * We have to drop the lock here as we may have
 		 * to wait for request completion. We hold the
 		 * request before dropping the lock though, so that it
 		 * won't be freed underneath us.
 		 */
 		KCF_AREQ_REFHOLD(areq);
 		mutex_exit(&ictx->kc_in_use_lock);
 
 		crypto_cancel_req(GET_REQID(areq));
 		KCF_AREQ_REFRELE(areq);
 
 		mutex_enter(&ictx->kc_in_use_lock);
 	}
 
 	mutex_exit(&ictx->kc_in_use_lock);
 	KCF_CONTEXT_REFRELE(ictx);
 }
 
 /*
  * Update kstats.
  */
 static int
 kcf_misc_kstat_update(kstat_t *ksp, int rw)
 {
 	uint_t tcnt;
 	kcf_stats_t *ks_data;
 
 	if (rw == KSTAT_WRITE)
 		return (EACCES);
 
 	ks_data = ksp->ks_data;
 
 	ks_data->ks_thrs_in_pool.value.ui32 = kcfpool->kp_threads;
 	/*
 	 * The failover thread is counted in kp_idlethreads in
 	 * some corner cases. This is done to avoid doing more checks
 	 * when submitting a request. We account for those cases below.
 	 */
 	if ((tcnt = kcfpool->kp_idlethreads) == (kcfpool->kp_threads + 1))
 		tcnt--;
 	ks_data->ks_idle_thrs.value.ui32 = tcnt;
 	ks_data->ks_minthrs.value.ui32 = kcf_minthreads;
 	ks_data->ks_maxthrs.value.ui32 = kcf_maxthreads;
 	ks_data->ks_swq_njobs.value.ui32 = gswq->gs_njobs;
 	ks_data->ks_swq_maxjobs.value.ui32 = gswq->gs_maxjobs;
 	ks_data->ks_taskq_threads.value.ui32 = crypto_taskq_threads;
 	ks_data->ks_taskq_minalloc.value.ui32 = crypto_taskq_minalloc;
 	ks_data->ks_taskq_maxalloc.value.ui32 = crypto_taskq_maxalloc;
 
 	return (0);
 }
 
 /*
  * Allocate and initialize a kcf_dual_req, used for saving the arguments of
  * a dual operation or an atomic operation that has to be internally
  * simulated with multiple single steps.
  * crq determines the memory allocation flags.
  */
 
 kcf_dual_req_t *
 kcf_alloc_req(crypto_call_req_t *crq)
 {
 	kcf_dual_req_t *kcr;
 
 	kcr = kmem_alloc(sizeof (kcf_dual_req_t), KCF_KMFLAG(crq));
 
 	if (kcr == NULL)
 		return (NULL);
 
 	/* Copy the whole crypto_call_req struct, as it isn't persistent */
 	if (crq != NULL)
 		kcr->kr_callreq = *crq;
 	else
 		bzero(&(kcr->kr_callreq), sizeof (crypto_call_req_t));
 	kcr->kr_areq = NULL;
 	kcr->kr_saveoffset = 0;
 	kcr->kr_savelen = 0;
 
 	return (kcr);
 }
 
 /*
  * Callback routine for the next part of a simulated dual part.
  * Schedules the next step.
  *
  * This routine can be called from interrupt context.
  */
 void
 kcf_next_req(void *next_req_arg, int status)
 {
 	kcf_dual_req_t *next_req = (kcf_dual_req_t *)next_req_arg;
 	kcf_req_params_t *params = &(next_req->kr_params);
 	kcf_areq_node_t *areq = next_req->kr_areq;
 	int error = status;
 	kcf_provider_desc_t *pd = NULL;
 	crypto_dual_data_t *ct = NULL;
 
 	/* Stop the processing if an error occurred at this step */
 	if (error != CRYPTO_SUCCESS) {
 out:
 		areq->an_reqarg = next_req->kr_callreq;
 		KCF_AREQ_REFRELE(areq);
 		kmem_free(next_req, sizeof (kcf_dual_req_t));
 		areq->an_isdual = B_FALSE;
 		kcf_aop_done(areq, error);
 		return;
 	}
 
 	switch (params->rp_opgrp) {
 	case KCF_OG_MAC: {
 
 		/*
 		 * The next req is submitted with the same reqid as the
 		 * first part. The consumer only got back that reqid, and
 		 * should still be able to cancel the operation during its
 		 * second step.
 		 */
 		kcf_mac_ops_params_t *mops = &(params->rp_u.mac_params);
 		crypto_ctx_template_t mac_tmpl;
 		kcf_mech_entry_t *me;
 
 		ct = (crypto_dual_data_t *)mops->mo_data;
 		mac_tmpl = (crypto_ctx_template_t)mops->mo_templ;
 
 		/* No expected recoverable failures, so no retry list */
 		pd = kcf_get_mech_provider(mops->mo_framework_mechtype,
 		    &me, &error, NULL, CRYPTO_FG_MAC_ATOMIC,
 		    (areq->an_reqarg.cr_flag & CRYPTO_RESTRICTED), ct->dd_len2);
 
 		if (pd == NULL) {
 			error = CRYPTO_MECH_NOT_SUPPORTED;
 			goto out;
 		}
 		/* Validate the MAC context template here */
 		if ((pd->pd_prov_type == CRYPTO_SW_PROVIDER) &&
 		    (mac_tmpl != NULL)) {
 			kcf_ctx_template_t *ctx_mac_tmpl;
 
 			ctx_mac_tmpl = (kcf_ctx_template_t *)mac_tmpl;
 
 			if (ctx_mac_tmpl->ct_generation != me->me_gen_swprov) {
 				KCF_PROV_REFRELE(pd);
 				error = CRYPTO_OLD_CTX_TEMPLATE;
 				goto out;
 			}
 			mops->mo_templ = ctx_mac_tmpl->ct_prov_tmpl;
 		}
 
 		break;
 	}
 	case KCF_OG_DECRYPT: {
 		kcf_decrypt_ops_params_t *dcrops =
 		    &(params->rp_u.decrypt_params);
 
 		ct = (crypto_dual_data_t *)dcrops->dop_ciphertext;
 		/* No expected recoverable failures, so no retry list */
 		pd = kcf_get_mech_provider(dcrops->dop_framework_mechtype,
 		    NULL, &error, NULL, CRYPTO_FG_DECRYPT_ATOMIC,
 		    (areq->an_reqarg.cr_flag & CRYPTO_RESTRICTED), ct->dd_len1);
 
 		if (pd == NULL) {
 			error = CRYPTO_MECH_NOT_SUPPORTED;
 			goto out;
 		}
 		break;
 	}
 	default:
 		break;
 	}
 
 	/* The second step uses len2 and offset2 of the dual_data */
 	next_req->kr_saveoffset = ct->dd_offset1;
 	next_req->kr_savelen = ct->dd_len1;
 	ct->dd_offset1 = ct->dd_offset2;
 	ct->dd_len1 = ct->dd_len2;
 
 	/* preserve if the caller is restricted */
 	if (areq->an_reqarg.cr_flag & CRYPTO_RESTRICTED) {
 		areq->an_reqarg.cr_flag = CRYPTO_RESTRICTED;
 	} else {
 		areq->an_reqarg.cr_flag = 0;
 	}
 
 	areq->an_reqarg.cr_callback_func = kcf_last_req;
 	areq->an_reqarg.cr_callback_arg = next_req;
 	areq->an_isdual = B_TRUE;
 
 	/*
 	 * We would like to call kcf_submit_request() here. But,
 	 * that is not possible as that routine allocates a new
 	 * kcf_areq_node_t request structure, while we need to
 	 * reuse the existing request structure.
 	 */
 	switch (pd->pd_prov_type) {
 	case CRYPTO_SW_PROVIDER:
 		error = common_submit_request(pd, NULL, params,
 		    KCF_RHNDL(KM_NOSLEEP));
 		break;
 
 	case CRYPTO_HW_PROVIDER: {
 		kcf_provider_desc_t *old_pd;
 		taskq_t *taskq = pd->pd_sched_info.ks_taskq;
 
 		/*
 		 * Set the params for the second step in the
 		 * dual-ops.
 		 */
 		areq->an_params = *params;
 		old_pd = areq->an_provider;
 		KCF_PROV_REFRELE(old_pd);
 		KCF_PROV_REFHOLD(pd);
 		areq->an_provider = pd;
 
 		/*
 		 * Note that we have to do a taskq_dispatch()
 		 * here as we may be in interrupt context.
 		 */
 		if (taskq_dispatch(taskq, process_req_hwp, areq,
 		    TQ_NOSLEEP) == (taskqid_t)0) {
 			error = CRYPTO_HOST_MEMORY;
 		} else {
 			error = CRYPTO_QUEUED;
 		}
 		break;
 	}
 	default:
 		break;
 	}
 
 	/*
 	 * We have to release the holds on the request and the provider
 	 * in all cases.
 	 */
 	KCF_AREQ_REFRELE(areq);
 	KCF_PROV_REFRELE(pd);
 
 	if (error != CRYPTO_QUEUED) {
 		/* restore, clean up, and invoke the client's callback */
 
 		ct->dd_offset1 = next_req->kr_saveoffset;
 		ct->dd_len1 = next_req->kr_savelen;
 		areq->an_reqarg = next_req->kr_callreq;
 		kmem_free(next_req, sizeof (kcf_dual_req_t));
 		areq->an_isdual = B_FALSE;
 		kcf_aop_done(areq, error);
 	}
 }
 
 /*
  * Last part of an emulated dual operation.
  * Clean up and restore ...
  */
 void
 kcf_last_req(void *last_req_arg, int status)
 {
 	kcf_dual_req_t *last_req = (kcf_dual_req_t *)last_req_arg;
 
 	kcf_req_params_t *params = &(last_req->kr_params);
 	kcf_areq_node_t *areq = last_req->kr_areq;
 	crypto_dual_data_t *ct = NULL;
 
 	switch (params->rp_opgrp) {
 	case KCF_OG_MAC: {
 		kcf_mac_ops_params_t *mops = &(params->rp_u.mac_params);
 
 		ct = (crypto_dual_data_t *)mops->mo_data;
 		break;
 	}
 	case KCF_OG_DECRYPT: {
 		kcf_decrypt_ops_params_t *dcrops =
 		    &(params->rp_u.decrypt_params);
 
 		ct = (crypto_dual_data_t *)dcrops->dop_ciphertext;
 		break;
 	}
 	default: {
 		panic("invalid kcf_op_group_t %d", (int)params->rp_opgrp);
 		return;
 	}
 	}
 	ct->dd_offset1 = last_req->kr_saveoffset;
 	ct->dd_len1 = last_req->kr_savelen;
 
 	/* The submitter used kcf_last_req as its callback */
 
 	if (areq == NULL) {
 		crypto_call_req_t *cr = &last_req->kr_callreq;
 
 		(*(cr->cr_callback_func))(cr->cr_callback_arg, status);
 		kmem_free(last_req, sizeof (kcf_dual_req_t));
 		return;
 	}
 	areq->an_reqarg = last_req->kr_callreq;
 	KCF_AREQ_REFRELE(areq);
 	kmem_free(last_req, sizeof (kcf_dual_req_t));
 	areq->an_isdual = B_FALSE;
 	kcf_aop_done(areq, status);
 }
diff --git a/module/icp/io/aes.c b/module/icp/io/aes.c
index c47c7567b900..d50e3bdc15f1 100644
--- a/module/icp/io/aes.c
+++ b/module/icp/io/aes.c
@@ -1,1457 +1,1457 @@
 /*
  * CDDL HEADER START
  *
  * The contents of this file are subject to the terms of the
  * Common Development and Distribution License (the "License").
  * You may not use this file except in compliance with the License.
  *
  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
  * or http://www.opensolaris.org/os/licensing.
  * See the License for the specific language governing permissions
  * and limitations under the License.
  *
  * When distributing Covered Code, include this CDDL HEADER in each
  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  * If applicable, add the following below this CDDL HEADER, with the
  * fields enclosed by brackets "[]" replaced with your own identifying
  * information: Portions Copyright [yyyy] [name of copyright owner]
  *
  * CDDL HEADER END
  */
 /*
  * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved.
  */
 
 /*
  * AES provider for the Kernel Cryptographic Framework (KCF)
  */
 
 #include <sys/zfs_context.h>
 #include <sys/crypto/common.h>
 #include <sys/crypto/impl.h>
 #include <sys/crypto/spi.h>
 #include <sys/crypto/icp.h>
 #include <modes/modes.h>
 #include <sys/modctl.h>
 #define	_AES_IMPL
 #include <aes/aes_impl.h>
 #include <modes/gcm_impl.h>
 
 #define	CRYPTO_PROVIDER_NAME "aes"
 
 extern struct mod_ops mod_cryptoops;
 
 /*
  * Module linkage information for the kernel.
  */
 static struct modlcrypto modlcrypto = {
 	&mod_cryptoops,
 	"AES Kernel SW Provider"
 };
 
 static struct modlinkage modlinkage = {
 	MODREV_1, { (void *)&modlcrypto, NULL }
 };
 
 /*
  * Mechanism info structure passed to KCF during registration.
  */
 static crypto_mech_info_t aes_mech_info_tab[] = {
 	/* AES_ECB */
 	{SUN_CKM_AES_ECB, AES_ECB_MECH_INFO_TYPE,
 	    CRYPTO_FG_ENCRYPT | CRYPTO_FG_ENCRYPT_ATOMIC |
 	    CRYPTO_FG_DECRYPT | CRYPTO_FG_DECRYPT_ATOMIC,
 	    AES_MIN_KEY_BYTES, AES_MAX_KEY_BYTES, CRYPTO_KEYSIZE_UNIT_IN_BYTES},
 	/* AES_CBC */
 	{SUN_CKM_AES_CBC, AES_CBC_MECH_INFO_TYPE,
 	    CRYPTO_FG_ENCRYPT | CRYPTO_FG_ENCRYPT_ATOMIC |
 	    CRYPTO_FG_DECRYPT | CRYPTO_FG_DECRYPT_ATOMIC,
 	    AES_MIN_KEY_BYTES, AES_MAX_KEY_BYTES, CRYPTO_KEYSIZE_UNIT_IN_BYTES},
 	/* AES_CTR */
 	{SUN_CKM_AES_CTR, AES_CTR_MECH_INFO_TYPE,
 	    CRYPTO_FG_ENCRYPT | CRYPTO_FG_ENCRYPT_ATOMIC |
 	    CRYPTO_FG_DECRYPT | CRYPTO_FG_DECRYPT_ATOMIC,
 	    AES_MIN_KEY_BYTES, AES_MAX_KEY_BYTES, CRYPTO_KEYSIZE_UNIT_IN_BYTES},
 	/* AES_CCM */
 	{SUN_CKM_AES_CCM, AES_CCM_MECH_INFO_TYPE,
 	    CRYPTO_FG_ENCRYPT | CRYPTO_FG_ENCRYPT_ATOMIC |
 	    CRYPTO_FG_DECRYPT | CRYPTO_FG_DECRYPT_ATOMIC,
 	    AES_MIN_KEY_BYTES, AES_MAX_KEY_BYTES, CRYPTO_KEYSIZE_UNIT_IN_BYTES},
 	/* AES_GCM */
 	{SUN_CKM_AES_GCM, AES_GCM_MECH_INFO_TYPE,
 	    CRYPTO_FG_ENCRYPT | CRYPTO_FG_ENCRYPT_ATOMIC |
 	    CRYPTO_FG_DECRYPT | CRYPTO_FG_DECRYPT_ATOMIC,
 	    AES_MIN_KEY_BYTES, AES_MAX_KEY_BYTES, CRYPTO_KEYSIZE_UNIT_IN_BYTES},
 	/* AES_GMAC */
 	{SUN_CKM_AES_GMAC, AES_GMAC_MECH_INFO_TYPE,
 	    CRYPTO_FG_ENCRYPT | CRYPTO_FG_ENCRYPT_ATOMIC |
 	    CRYPTO_FG_DECRYPT | CRYPTO_FG_DECRYPT_ATOMIC |
 	    CRYPTO_FG_MAC | CRYPTO_FG_MAC_ATOMIC |
 	    CRYPTO_FG_SIGN | CRYPTO_FG_SIGN_ATOMIC |
 	    CRYPTO_FG_VERIFY | CRYPTO_FG_VERIFY_ATOMIC,
 	    AES_MIN_KEY_BYTES, AES_MAX_KEY_BYTES, CRYPTO_KEYSIZE_UNIT_IN_BYTES}
 };
 
 static void aes_provider_status(crypto_provider_handle_t, uint_t *);
 
 static crypto_control_ops_t aes_control_ops = {
 	aes_provider_status
 };
 
 static int aes_encrypt_init(crypto_ctx_t *, crypto_mechanism_t *,
     crypto_key_t *, crypto_spi_ctx_template_t, crypto_req_handle_t);
 static int aes_decrypt_init(crypto_ctx_t *, crypto_mechanism_t *,
     crypto_key_t *, crypto_spi_ctx_template_t, crypto_req_handle_t);
 static int aes_common_init(crypto_ctx_t *, crypto_mechanism_t *,
     crypto_key_t *, crypto_spi_ctx_template_t, crypto_req_handle_t, boolean_t);
 static int aes_common_init_ctx(aes_ctx_t *, crypto_spi_ctx_template_t *,
     crypto_mechanism_t *, crypto_key_t *, int, boolean_t);
 static int aes_encrypt_final(crypto_ctx_t *, crypto_data_t *,
     crypto_req_handle_t);
 static int aes_decrypt_final(crypto_ctx_t *, crypto_data_t *,
     crypto_req_handle_t);
 
 static int aes_encrypt(crypto_ctx_t *, crypto_data_t *, crypto_data_t *,
     crypto_req_handle_t);
 static int aes_encrypt_update(crypto_ctx_t *, crypto_data_t *,
     crypto_data_t *, crypto_req_handle_t);
 static int aes_encrypt_atomic(crypto_provider_handle_t, crypto_session_id_t,
     crypto_mechanism_t *, crypto_key_t *, crypto_data_t *,
     crypto_data_t *, crypto_spi_ctx_template_t, crypto_req_handle_t);
 
 static int aes_decrypt(crypto_ctx_t *, crypto_data_t *, crypto_data_t *,
     crypto_req_handle_t);
 static int aes_decrypt_update(crypto_ctx_t *, crypto_data_t *,
     crypto_data_t *, crypto_req_handle_t);
 static int aes_decrypt_atomic(crypto_provider_handle_t, crypto_session_id_t,
     crypto_mechanism_t *, crypto_key_t *, crypto_data_t *,
     crypto_data_t *, crypto_spi_ctx_template_t, crypto_req_handle_t);
 
 static crypto_cipher_ops_t aes_cipher_ops = {
 	.encrypt_init = aes_encrypt_init,
 	.encrypt = aes_encrypt,
 	.encrypt_update = aes_encrypt_update,
 	.encrypt_final = aes_encrypt_final,
 	.encrypt_atomic = aes_encrypt_atomic,
 	.decrypt_init = aes_decrypt_init,
 	.decrypt = aes_decrypt,
 	.decrypt_update = aes_decrypt_update,
 	.decrypt_final = aes_decrypt_final,
 	.decrypt_atomic = aes_decrypt_atomic
 };
 
 static int aes_mac_atomic(crypto_provider_handle_t, crypto_session_id_t,
     crypto_mechanism_t *, crypto_key_t *, crypto_data_t *, crypto_data_t *,
     crypto_spi_ctx_template_t, crypto_req_handle_t);
 static int aes_mac_verify_atomic(crypto_provider_handle_t, crypto_session_id_t,
     crypto_mechanism_t *, crypto_key_t *, crypto_data_t *, crypto_data_t *,
     crypto_spi_ctx_template_t, crypto_req_handle_t);
 
 static crypto_mac_ops_t aes_mac_ops = {
 	.mac_init = NULL,
 	.mac = NULL,
 	.mac_update = NULL,
 	.mac_final = NULL,
 	.mac_atomic = aes_mac_atomic,
 	.mac_verify_atomic = aes_mac_verify_atomic
 };
 
 static int aes_create_ctx_template(crypto_provider_handle_t,
     crypto_mechanism_t *, crypto_key_t *, crypto_spi_ctx_template_t *,
     size_t *, crypto_req_handle_t);
 static int aes_free_context(crypto_ctx_t *);
 
 static crypto_ctx_ops_t aes_ctx_ops = {
 	.create_ctx_template = aes_create_ctx_template,
 	.free_context = aes_free_context
 };
 
 static crypto_ops_t aes_crypto_ops = {{{{{
 	&aes_control_ops,
 	NULL,
 	&aes_cipher_ops,
 	&aes_mac_ops,
 	NULL,
 	NULL,
 	NULL,
 	NULL,
 	NULL,
 	NULL,
 	NULL,
 	NULL,
 	NULL,
 	&aes_ctx_ops
 }}}}};
 
 static crypto_provider_info_t aes_prov_info = {{{{
 	CRYPTO_SPI_VERSION_1,
 	"AES Software Provider",
 	CRYPTO_SW_PROVIDER,
 	NULL,
 	&aes_crypto_ops,
 	sizeof (aes_mech_info_tab)/sizeof (crypto_mech_info_t),
 	aes_mech_info_tab
 }}}};
 
 static crypto_kcf_provider_handle_t aes_prov_handle = 0;
 static crypto_data_t null_crypto_data = { CRYPTO_DATA_RAW };
 
 int
 aes_mod_init(void)
 {
 	int ret;
 
 	/* Determine the fastest available implementation. */
 	aes_impl_init();
 	gcm_impl_init();
 
 	if ((ret = mod_install(&modlinkage)) != 0)
 		return (ret);
 
 	/* Register with KCF.  If the registration fails, remove the module. */
 	if (crypto_register_provider(&aes_prov_info, &aes_prov_handle)) {
 		(void) mod_remove(&modlinkage);
 		return (EACCES);
 	}
 
 	return (0);
 }
 
 int
 aes_mod_fini(void)
 {
 	/* Unregister from KCF if module is registered */
 	if (aes_prov_handle != 0) {
 		if (crypto_unregister_provider(aes_prov_handle))
 			return (EBUSY);
 
 		aes_prov_handle = 0;
 	}
 
 	return (mod_remove(&modlinkage));
 }
 
 static int
 aes_check_mech_param(crypto_mechanism_t *mechanism, aes_ctx_t **ctx, int kmflag)
 {
 	void *p = NULL;
 	boolean_t param_required = B_TRUE;
 	size_t param_len;
 	void *(*alloc_fun)(int);
 	int rv = CRYPTO_SUCCESS;
 
 	switch (mechanism->cm_type) {
 	case AES_ECB_MECH_INFO_TYPE:
 		param_required = B_FALSE;
 		alloc_fun = ecb_alloc_ctx;
 		break;
 	case AES_CBC_MECH_INFO_TYPE:
 		param_len = AES_BLOCK_LEN;
 		alloc_fun = cbc_alloc_ctx;
 		break;
 	case AES_CTR_MECH_INFO_TYPE:
 		param_len = sizeof (CK_AES_CTR_PARAMS);
 		alloc_fun = ctr_alloc_ctx;
 		break;
 	case AES_CCM_MECH_INFO_TYPE:
 		param_len = sizeof (CK_AES_CCM_PARAMS);
 		alloc_fun = ccm_alloc_ctx;
 		break;
 	case AES_GCM_MECH_INFO_TYPE:
 		param_len = sizeof (CK_AES_GCM_PARAMS);
 		alloc_fun = gcm_alloc_ctx;
 		break;
 	case AES_GMAC_MECH_INFO_TYPE:
 		param_len = sizeof (CK_AES_GMAC_PARAMS);
 		alloc_fun = gmac_alloc_ctx;
 		break;
 	default:
 		rv = CRYPTO_MECHANISM_INVALID;
 		return (rv);
 	}
 	if (param_required && mechanism->cm_param != NULL &&
 	    mechanism->cm_param_len != param_len) {
 		rv = CRYPTO_MECHANISM_PARAM_INVALID;
 	}
 	if (ctx != NULL) {
 		p = (alloc_fun)(kmflag);
 		*ctx = p;
 	}
 	return (rv);
 }
 
 /*
  * Initialize key schedules for AES
  */
 static int
 init_keysched(crypto_key_t *key, void *newbie)
 {
 	/*
 	 * Only keys by value are supported by this module.
 	 */
 	switch (key->ck_format) {
 	case CRYPTO_KEY_RAW:
 		if (key->ck_length < AES_MINBITS ||
 		    key->ck_length > AES_MAXBITS) {
 			return (CRYPTO_KEY_SIZE_RANGE);
 		}
 
 		/* key length must be either 128, 192, or 256 */
 		if ((key->ck_length & 63) != 0)
 			return (CRYPTO_KEY_SIZE_RANGE);
 		break;
 	default:
 		return (CRYPTO_KEY_TYPE_INCONSISTENT);
 	}
 
 	aes_init_keysched(key->ck_data, key->ck_length, newbie);
 	return (CRYPTO_SUCCESS);
 }
 
 /*
  * KCF software provider control entry points.
  */
-/* ARGSUSED */
 static void
 aes_provider_status(crypto_provider_handle_t provider, uint_t *status)
 {
+	(void) provider;
 	*status = CRYPTO_PROVIDER_READY;
 }
 
 static int
 aes_encrypt_init(crypto_ctx_t *ctx, crypto_mechanism_t *mechanism,
     crypto_key_t *key, crypto_spi_ctx_template_t template,
     crypto_req_handle_t req)
 {
 	return (aes_common_init(ctx, mechanism, key, template, req, B_TRUE));
 }
 
 static int
 aes_decrypt_init(crypto_ctx_t *ctx, crypto_mechanism_t *mechanism,
     crypto_key_t *key, crypto_spi_ctx_template_t template,
     crypto_req_handle_t req)
 {
 	return (aes_common_init(ctx, mechanism, key, template, req, B_FALSE));
 }
 
 
 
 /*
  * KCF software provider encrypt entry points.
  */
 static int
 aes_common_init(crypto_ctx_t *ctx, crypto_mechanism_t *mechanism,
     crypto_key_t *key, crypto_spi_ctx_template_t template,
     crypto_req_handle_t req, boolean_t is_encrypt_init)
 {
 	aes_ctx_t *aes_ctx;
 	int rv;
 	int kmflag;
 
 	/*
 	 * Only keys by value are supported by this module.
 	 */
 	if (key->ck_format != CRYPTO_KEY_RAW) {
 		return (CRYPTO_KEY_TYPE_INCONSISTENT);
 	}
 
 	kmflag = crypto_kmflag(req);
 	if ((rv = aes_check_mech_param(mechanism, &aes_ctx, kmflag))
 	    != CRYPTO_SUCCESS)
 		return (rv);
 
 	rv = aes_common_init_ctx(aes_ctx, template, mechanism, key, kmflag,
 	    is_encrypt_init);
 	if (rv != CRYPTO_SUCCESS) {
 		crypto_free_mode_ctx(aes_ctx);
 		return (rv);
 	}
 
 	ctx->cc_provider_private = aes_ctx;
 
 	return (CRYPTO_SUCCESS);
 }
 
 static void
 aes_copy_block64(uint8_t *in, uint64_t *out)
 {
 	if (IS_P2ALIGNED(in, sizeof (uint64_t))) {
 		/* LINTED: pointer alignment */
 		out[0] = *(uint64_t *)&in[0];
 		/* LINTED: pointer alignment */
 		out[1] = *(uint64_t *)&in[8];
 	} else {
 		uint8_t *iv8 = (uint8_t *)&out[0];
 
 		AES_COPY_BLOCK(in, iv8);
 	}
 }
 
 
 static int
 aes_encrypt(crypto_ctx_t *ctx, crypto_data_t *plaintext,
     crypto_data_t *ciphertext, crypto_req_handle_t req)
 {
 	int ret = CRYPTO_FAILED;
 
 	aes_ctx_t *aes_ctx;
 	size_t saved_length, saved_offset, length_needed;
 
 	ASSERT(ctx->cc_provider_private != NULL);
 	aes_ctx = ctx->cc_provider_private;
 
 	/*
 	 * For block ciphers, plaintext must be a multiple of AES block size.
 	 * This test is only valid for ciphers whose blocksize is a power of 2.
 	 */
 	if (((aes_ctx->ac_flags & (CTR_MODE|CCM_MODE|GCM_MODE|GMAC_MODE))
 	    == 0) && (plaintext->cd_length & (AES_BLOCK_LEN - 1)) != 0)
 		return (CRYPTO_DATA_LEN_RANGE);
 
 	ASSERT(ciphertext != NULL);
 
 	/*
 	 * We need to just return the length needed to store the output.
 	 * We should not destroy the context for the following case.
 	 */
 	switch (aes_ctx->ac_flags & (CCM_MODE|GCM_MODE|GMAC_MODE)) {
 	case CCM_MODE:
 		length_needed = plaintext->cd_length + aes_ctx->ac_mac_len;
 		break;
 	case GCM_MODE:
 		length_needed = plaintext->cd_length + aes_ctx->ac_tag_len;
 		break;
 	case GMAC_MODE:
 		if (plaintext->cd_length != 0)
 			return (CRYPTO_ARGUMENTS_BAD);
 
 		length_needed = aes_ctx->ac_tag_len;
 		break;
 	default:
 		length_needed = plaintext->cd_length;
 	}
 
 	if (ciphertext->cd_length < length_needed) {
 		ciphertext->cd_length = length_needed;
 		return (CRYPTO_BUFFER_TOO_SMALL);
 	}
 
 	saved_length = ciphertext->cd_length;
 	saved_offset = ciphertext->cd_offset;
 
 	/*
 	 * Do an update on the specified input data.
 	 */
 	ret = aes_encrypt_update(ctx, plaintext, ciphertext, req);
 	if (ret != CRYPTO_SUCCESS) {
 		return (ret);
 	}
 
 	/*
 	 * For CCM mode, aes_ccm_encrypt_final() will take care of any
 	 * left-over unprocessed data, and compute the MAC
 	 */
 	if (aes_ctx->ac_flags & CCM_MODE) {
 		/*
 		 * ccm_encrypt_final() will compute the MAC and append
 		 * it to existing ciphertext. So, need to adjust the left over
 		 * length value accordingly
 		 */
 
 		/* order of following 2 lines MUST not be reversed */
 		ciphertext->cd_offset = ciphertext->cd_length;
 		ciphertext->cd_length = saved_length - ciphertext->cd_length;
 		ret = ccm_encrypt_final((ccm_ctx_t *)aes_ctx, ciphertext,
 		    AES_BLOCK_LEN, aes_encrypt_block, aes_xor_block);
 		if (ret != CRYPTO_SUCCESS) {
 			return (ret);
 		}
 
 		if (plaintext != ciphertext) {
 			ciphertext->cd_length =
 			    ciphertext->cd_offset - saved_offset;
 		}
 		ciphertext->cd_offset = saved_offset;
 	} else if (aes_ctx->ac_flags & (GCM_MODE|GMAC_MODE)) {
 		/*
 		 * gcm_encrypt_final() will compute the MAC and append
 		 * it to existing ciphertext. So, need to adjust the left over
 		 * length value accordingly
 		 */
 
 		/* order of following 2 lines MUST not be reversed */
 		ciphertext->cd_offset = ciphertext->cd_length;
 		ciphertext->cd_length = saved_length - ciphertext->cd_length;
 		ret = gcm_encrypt_final((gcm_ctx_t *)aes_ctx, ciphertext,
 		    AES_BLOCK_LEN, aes_encrypt_block, aes_copy_block,
 		    aes_xor_block);
 		if (ret != CRYPTO_SUCCESS) {
 			return (ret);
 		}
 
 		if (plaintext != ciphertext) {
 			ciphertext->cd_length =
 			    ciphertext->cd_offset - saved_offset;
 		}
 		ciphertext->cd_offset = saved_offset;
 	}
 
 	ASSERT(aes_ctx->ac_remainder_len == 0);
 	(void) aes_free_context(ctx);
 
 	return (ret);
 }
 
 
 static int
 aes_decrypt(crypto_ctx_t *ctx, crypto_data_t *ciphertext,
     crypto_data_t *plaintext, crypto_req_handle_t req)
 {
 	int ret = CRYPTO_FAILED;
 
 	aes_ctx_t *aes_ctx;
 	off_t saved_offset;
 	size_t saved_length, length_needed;
 
 	ASSERT(ctx->cc_provider_private != NULL);
 	aes_ctx = ctx->cc_provider_private;
 
 	/*
 	 * For block ciphers, plaintext must be a multiple of AES block size.
 	 * This test is only valid for ciphers whose blocksize is a power of 2.
 	 */
 	if (((aes_ctx->ac_flags & (CTR_MODE|CCM_MODE|GCM_MODE|GMAC_MODE))
 	    == 0) && (ciphertext->cd_length & (AES_BLOCK_LEN - 1)) != 0) {
 		return (CRYPTO_ENCRYPTED_DATA_LEN_RANGE);
 	}
 
 	ASSERT(plaintext != NULL);
 
 	/*
 	 * Return length needed to store the output.
 	 * Do not destroy context when plaintext buffer is too small.
 	 *
 	 * CCM:  plaintext is MAC len smaller than cipher text
 	 * GCM:  plaintext is TAG len smaller than cipher text
 	 * GMAC: plaintext length must be zero
 	 */
 	switch (aes_ctx->ac_flags & (CCM_MODE|GCM_MODE|GMAC_MODE)) {
 	case CCM_MODE:
 		length_needed = aes_ctx->ac_processed_data_len;
 		break;
 	case GCM_MODE:
 		length_needed = ciphertext->cd_length - aes_ctx->ac_tag_len;
 		break;
 	case GMAC_MODE:
 		if (plaintext->cd_length != 0)
 			return (CRYPTO_ARGUMENTS_BAD);
 
 		length_needed = 0;
 		break;
 	default:
 		length_needed = ciphertext->cd_length;
 	}
 
 	if (plaintext->cd_length < length_needed) {
 		plaintext->cd_length = length_needed;
 		return (CRYPTO_BUFFER_TOO_SMALL);
 	}
 
 	saved_offset = plaintext->cd_offset;
 	saved_length = plaintext->cd_length;
 
 	/*
 	 * Do an update on the specified input data.
 	 */
 	ret = aes_decrypt_update(ctx, ciphertext, plaintext, req);
 	if (ret != CRYPTO_SUCCESS) {
 		goto cleanup;
 	}
 
 	if (aes_ctx->ac_flags & CCM_MODE) {
 		ASSERT(aes_ctx->ac_processed_data_len == aes_ctx->ac_data_len);
 		ASSERT(aes_ctx->ac_processed_mac_len == aes_ctx->ac_mac_len);
 
 		/* order of following 2 lines MUST not be reversed */
 		plaintext->cd_offset = plaintext->cd_length;
 		plaintext->cd_length = saved_length - plaintext->cd_length;
 
 		ret = ccm_decrypt_final((ccm_ctx_t *)aes_ctx, plaintext,
 		    AES_BLOCK_LEN, aes_encrypt_block, aes_copy_block,
 		    aes_xor_block);
 		if (ret == CRYPTO_SUCCESS) {
 			if (plaintext != ciphertext) {
 				plaintext->cd_length =
 				    plaintext->cd_offset - saved_offset;
 			}
 		} else {
 			plaintext->cd_length = saved_length;
 		}
 
 		plaintext->cd_offset = saved_offset;
 	} else if (aes_ctx->ac_flags & (GCM_MODE|GMAC_MODE)) {
 		/* order of following 2 lines MUST not be reversed */
 		plaintext->cd_offset = plaintext->cd_length;
 		plaintext->cd_length = saved_length - plaintext->cd_length;
 
 		ret = gcm_decrypt_final((gcm_ctx_t *)aes_ctx, plaintext,
 		    AES_BLOCK_LEN, aes_encrypt_block, aes_xor_block);
 		if (ret == CRYPTO_SUCCESS) {
 			if (plaintext != ciphertext) {
 				plaintext->cd_length =
 				    plaintext->cd_offset - saved_offset;
 			}
 		} else {
 			plaintext->cd_length = saved_length;
 		}
 
 		plaintext->cd_offset = saved_offset;
 	}
 
 	ASSERT(aes_ctx->ac_remainder_len == 0);
 
 cleanup:
 	(void) aes_free_context(ctx);
 
 	return (ret);
 }
 
 
-/* ARGSUSED */
 static int
 aes_encrypt_update(crypto_ctx_t *ctx, crypto_data_t *plaintext,
     crypto_data_t *ciphertext, crypto_req_handle_t req)
 {
+	(void) req;
 	off_t saved_offset;
 	size_t saved_length, out_len;
 	int ret = CRYPTO_SUCCESS;
 	aes_ctx_t *aes_ctx;
 
 	ASSERT(ctx->cc_provider_private != NULL);
 	aes_ctx = ctx->cc_provider_private;
 
 	ASSERT(ciphertext != NULL);
 
 	/* compute number of bytes that will hold the ciphertext */
 	out_len = aes_ctx->ac_remainder_len;
 	out_len += plaintext->cd_length;
 	out_len &= ~(AES_BLOCK_LEN - 1);
 
 	/* return length needed to store the output */
 	if (ciphertext->cd_length < out_len) {
 		ciphertext->cd_length = out_len;
 		return (CRYPTO_BUFFER_TOO_SMALL);
 	}
 
 	saved_offset = ciphertext->cd_offset;
 	saved_length = ciphertext->cd_length;
 
 	/*
 	 * Do the AES update on the specified input data.
 	 */
 	switch (plaintext->cd_format) {
 	case CRYPTO_DATA_RAW:
 		ret = crypto_update_iov(ctx->cc_provider_private,
 		    plaintext, ciphertext, aes_encrypt_contiguous_blocks,
 		    aes_copy_block64);
 		break;
 	case CRYPTO_DATA_UIO:
 		ret = crypto_update_uio(ctx->cc_provider_private,
 		    plaintext, ciphertext, aes_encrypt_contiguous_blocks,
 		    aes_copy_block64);
 		break;
 	default:
 		ret = CRYPTO_ARGUMENTS_BAD;
 	}
 
 	/*
 	 * Since AES counter mode is a stream cipher, we call
 	 * ctr_mode_final() to pick up any remaining bytes.
 	 * It is an internal function that does not destroy
 	 * the context like *normal* final routines.
 	 */
 	if ((aes_ctx->ac_flags & CTR_MODE) && (aes_ctx->ac_remainder_len > 0)) {
 		ret = ctr_mode_final((ctr_ctx_t *)aes_ctx,
 		    ciphertext, aes_encrypt_block);
 	}
 
 	if (ret == CRYPTO_SUCCESS) {
 		if (plaintext != ciphertext)
 			ciphertext->cd_length =
 			    ciphertext->cd_offset - saved_offset;
 	} else {
 		ciphertext->cd_length = saved_length;
 	}
 	ciphertext->cd_offset = saved_offset;
 
 	return (ret);
 }
 
 
 static int
 aes_decrypt_update(crypto_ctx_t *ctx, crypto_data_t *ciphertext,
     crypto_data_t *plaintext, crypto_req_handle_t req)
 {
 	off_t saved_offset;
 	size_t saved_length, out_len;
 	int ret = CRYPTO_SUCCESS;
 	aes_ctx_t *aes_ctx;
 
 	ASSERT(ctx->cc_provider_private != NULL);
 	aes_ctx = ctx->cc_provider_private;
 
 	ASSERT(plaintext != NULL);
 
 	/*
 	 * Compute number of bytes that will hold the plaintext.
 	 * This is not necessary for CCM, GCM, and GMAC since these
 	 * mechanisms never return plaintext for update operations.
 	 */
 	if ((aes_ctx->ac_flags & (CCM_MODE|GCM_MODE|GMAC_MODE)) == 0) {
 		out_len = aes_ctx->ac_remainder_len;
 		out_len += ciphertext->cd_length;
 		out_len &= ~(AES_BLOCK_LEN - 1);
 
 		/* return length needed to store the output */
 		if (plaintext->cd_length < out_len) {
 			plaintext->cd_length = out_len;
 			return (CRYPTO_BUFFER_TOO_SMALL);
 		}
 	}
 
 	saved_offset = plaintext->cd_offset;
 	saved_length = plaintext->cd_length;
 
 	if (aes_ctx->ac_flags & (GCM_MODE|GMAC_MODE))
 		gcm_set_kmflag((gcm_ctx_t *)aes_ctx, crypto_kmflag(req));
 
 	/*
 	 * Do the AES update on the specified input data.
 	 */
 	switch (ciphertext->cd_format) {
 	case CRYPTO_DATA_RAW:
 		ret = crypto_update_iov(ctx->cc_provider_private,
 		    ciphertext, plaintext, aes_decrypt_contiguous_blocks,
 		    aes_copy_block64);
 		break;
 	case CRYPTO_DATA_UIO:
 		ret = crypto_update_uio(ctx->cc_provider_private,
 		    ciphertext, plaintext, aes_decrypt_contiguous_blocks,
 		    aes_copy_block64);
 		break;
 	default:
 		ret = CRYPTO_ARGUMENTS_BAD;
 	}
 
 	/*
 	 * Since AES counter mode is a stream cipher, we call
 	 * ctr_mode_final() to pick up any remaining bytes.
 	 * It is an internal function that does not destroy
 	 * the context like *normal* final routines.
 	 */
 	if ((aes_ctx->ac_flags & CTR_MODE) && (aes_ctx->ac_remainder_len > 0)) {
 		ret = ctr_mode_final((ctr_ctx_t *)aes_ctx, plaintext,
 		    aes_encrypt_block);
 		if (ret == CRYPTO_DATA_LEN_RANGE)
 			ret = CRYPTO_ENCRYPTED_DATA_LEN_RANGE;
 	}
 
 	if (ret == CRYPTO_SUCCESS) {
 		if (ciphertext != plaintext)
 			plaintext->cd_length =
 			    plaintext->cd_offset - saved_offset;
 	} else {
 		plaintext->cd_length = saved_length;
 	}
 	plaintext->cd_offset = saved_offset;
 
 
 	return (ret);
 }
 
-/* ARGSUSED */
 static int
 aes_encrypt_final(crypto_ctx_t *ctx, crypto_data_t *data,
     crypto_req_handle_t req)
 {
+	(void) req;
 	aes_ctx_t *aes_ctx;
 	int ret;
 
 	ASSERT(ctx->cc_provider_private != NULL);
 	aes_ctx = ctx->cc_provider_private;
 
 	if (data->cd_format != CRYPTO_DATA_RAW &&
 	    data->cd_format != CRYPTO_DATA_UIO) {
 		return (CRYPTO_ARGUMENTS_BAD);
 	}
 
 	if (aes_ctx->ac_flags & CTR_MODE) {
 		if (aes_ctx->ac_remainder_len > 0) {
 			ret = ctr_mode_final((ctr_ctx_t *)aes_ctx, data,
 			    aes_encrypt_block);
 			if (ret != CRYPTO_SUCCESS)
 				return (ret);
 		}
 	} else if (aes_ctx->ac_flags & CCM_MODE) {
 		ret = ccm_encrypt_final((ccm_ctx_t *)aes_ctx, data,
 		    AES_BLOCK_LEN, aes_encrypt_block, aes_xor_block);
 		if (ret != CRYPTO_SUCCESS) {
 			return (ret);
 		}
 	} else if (aes_ctx->ac_flags & (GCM_MODE|GMAC_MODE)) {
 		size_t saved_offset = data->cd_offset;
 
 		ret = gcm_encrypt_final((gcm_ctx_t *)aes_ctx, data,
 		    AES_BLOCK_LEN, aes_encrypt_block, aes_copy_block,
 		    aes_xor_block);
 		if (ret != CRYPTO_SUCCESS) {
 			return (ret);
 		}
 		data->cd_length = data->cd_offset - saved_offset;
 		data->cd_offset = saved_offset;
 	} else {
 		/*
 		 * There must be no unprocessed plaintext.
 		 * This happens if the length of the last data is
 		 * not a multiple of the AES block length.
 		 */
 		if (aes_ctx->ac_remainder_len > 0) {
 			return (CRYPTO_DATA_LEN_RANGE);
 		}
 		data->cd_length = 0;
 	}
 
 	(void) aes_free_context(ctx);
 
 	return (CRYPTO_SUCCESS);
 }
 
-/* ARGSUSED */
 static int
 aes_decrypt_final(crypto_ctx_t *ctx, crypto_data_t *data,
     crypto_req_handle_t req)
 {
+	(void) req;
 	aes_ctx_t *aes_ctx;
 	int ret;
 	off_t saved_offset;
 	size_t saved_length;
 
 	ASSERT(ctx->cc_provider_private != NULL);
 	aes_ctx = ctx->cc_provider_private;
 
 	if (data->cd_format != CRYPTO_DATA_RAW &&
 	    data->cd_format != CRYPTO_DATA_UIO) {
 		return (CRYPTO_ARGUMENTS_BAD);
 	}
 
 	/*
 	 * There must be no unprocessed ciphertext.
 	 * This happens if the length of the last ciphertext is
 	 * not a multiple of the AES block length.
 	 */
 	if (aes_ctx->ac_remainder_len > 0) {
 		if ((aes_ctx->ac_flags & CTR_MODE) == 0)
 			return (CRYPTO_ENCRYPTED_DATA_LEN_RANGE);
 		else {
 			ret = ctr_mode_final((ctr_ctx_t *)aes_ctx, data,
 			    aes_encrypt_block);
 			if (ret == CRYPTO_DATA_LEN_RANGE)
 				ret = CRYPTO_ENCRYPTED_DATA_LEN_RANGE;
 			if (ret != CRYPTO_SUCCESS)
 				return (ret);
 		}
 	}
 
 	if (aes_ctx->ac_flags & CCM_MODE) {
 		/*
 		 * This is where all the plaintext is returned, make sure
 		 * the plaintext buffer is big enough
 		 */
 		size_t pt_len = aes_ctx->ac_data_len;
 		if (data->cd_length < pt_len) {
 			data->cd_length = pt_len;
 			return (CRYPTO_BUFFER_TOO_SMALL);
 		}
 
 		ASSERT(aes_ctx->ac_processed_data_len == pt_len);
 		ASSERT(aes_ctx->ac_processed_mac_len == aes_ctx->ac_mac_len);
 		saved_offset = data->cd_offset;
 		saved_length = data->cd_length;
 		ret = ccm_decrypt_final((ccm_ctx_t *)aes_ctx, data,
 		    AES_BLOCK_LEN, aes_encrypt_block, aes_copy_block,
 		    aes_xor_block);
 		if (ret == CRYPTO_SUCCESS) {
 			data->cd_length = data->cd_offset - saved_offset;
 		} else {
 			data->cd_length = saved_length;
 		}
 
 		data->cd_offset = saved_offset;
 		if (ret != CRYPTO_SUCCESS) {
 			return (ret);
 		}
 	} else if (aes_ctx->ac_flags & (GCM_MODE|GMAC_MODE)) {
 		/*
 		 * This is where all the plaintext is returned, make sure
 		 * the plaintext buffer is big enough
 		 */
 		gcm_ctx_t *ctx = (gcm_ctx_t *)aes_ctx;
 		size_t pt_len = ctx->gcm_processed_data_len - ctx->gcm_tag_len;
 
 		if (data->cd_length < pt_len) {
 			data->cd_length = pt_len;
 			return (CRYPTO_BUFFER_TOO_SMALL);
 		}
 
 		saved_offset = data->cd_offset;
 		saved_length = data->cd_length;
 		ret = gcm_decrypt_final((gcm_ctx_t *)aes_ctx, data,
 		    AES_BLOCK_LEN, aes_encrypt_block, aes_xor_block);
 		if (ret == CRYPTO_SUCCESS) {
 			data->cd_length = data->cd_offset - saved_offset;
 		} else {
 			data->cd_length = saved_length;
 		}
 
 		data->cd_offset = saved_offset;
 		if (ret != CRYPTO_SUCCESS) {
 			return (ret);
 		}
 	}
 
 
 	if ((aes_ctx->ac_flags & (CTR_MODE|CCM_MODE|GCM_MODE|GMAC_MODE)) == 0) {
 		data->cd_length = 0;
 	}
 
 	(void) aes_free_context(ctx);
 
 	return (CRYPTO_SUCCESS);
 }
 
-/* ARGSUSED */
 static int
 aes_encrypt_atomic(crypto_provider_handle_t provider,
     crypto_session_id_t session_id, crypto_mechanism_t *mechanism,
     crypto_key_t *key, crypto_data_t *plaintext, crypto_data_t *ciphertext,
     crypto_spi_ctx_template_t template, crypto_req_handle_t req)
 {
+	(void) provider, (void) session_id;
 	aes_ctx_t aes_ctx;	/* on the stack */
 	off_t saved_offset;
 	size_t saved_length;
 	size_t length_needed;
 	int ret;
 
 	ASSERT(ciphertext != NULL);
 
 	/*
 	 * CTR, CCM, GCM, and GMAC modes do not require that plaintext
 	 * be a multiple of AES block size.
 	 */
 	switch (mechanism->cm_type) {
 	case AES_CTR_MECH_INFO_TYPE:
 	case AES_CCM_MECH_INFO_TYPE:
 	case AES_GCM_MECH_INFO_TYPE:
 	case AES_GMAC_MECH_INFO_TYPE:
 		break;
 	default:
 		if ((plaintext->cd_length & (AES_BLOCK_LEN - 1)) != 0)
 			return (CRYPTO_DATA_LEN_RANGE);
 	}
 
 	if ((ret = aes_check_mech_param(mechanism, NULL, 0)) != CRYPTO_SUCCESS)
 		return (ret);
 
 	bzero(&aes_ctx, sizeof (aes_ctx_t));
 
 	ret = aes_common_init_ctx(&aes_ctx, template, mechanism, key,
 	    crypto_kmflag(req), B_TRUE);
 	if (ret != CRYPTO_SUCCESS)
 		return (ret);
 
 	switch (mechanism->cm_type) {
 	case AES_CCM_MECH_INFO_TYPE:
 		length_needed = plaintext->cd_length + aes_ctx.ac_mac_len;
 		break;
 	case AES_GMAC_MECH_INFO_TYPE:
 		if (plaintext->cd_length != 0)
 			return (CRYPTO_ARGUMENTS_BAD);
 		fallthrough;
 	case AES_GCM_MECH_INFO_TYPE:
 		length_needed = plaintext->cd_length + aes_ctx.ac_tag_len;
 		break;
 	default:
 		length_needed = plaintext->cd_length;
 	}
 
 	/* return size of buffer needed to store output */
 	if (ciphertext->cd_length < length_needed) {
 		ciphertext->cd_length = length_needed;
 		ret = CRYPTO_BUFFER_TOO_SMALL;
 		goto out;
 	}
 
 	saved_offset = ciphertext->cd_offset;
 	saved_length = ciphertext->cd_length;
 
 	/*
 	 * Do an update on the specified input data.
 	 */
 	switch (plaintext->cd_format) {
 	case CRYPTO_DATA_RAW:
 		ret = crypto_update_iov(&aes_ctx, plaintext, ciphertext,
 		    aes_encrypt_contiguous_blocks, aes_copy_block64);
 		break;
 	case CRYPTO_DATA_UIO:
 		ret = crypto_update_uio(&aes_ctx, plaintext, ciphertext,
 		    aes_encrypt_contiguous_blocks, aes_copy_block64);
 		break;
 	default:
 		ret = CRYPTO_ARGUMENTS_BAD;
 	}
 
 	if (ret == CRYPTO_SUCCESS) {
 		if (mechanism->cm_type == AES_CCM_MECH_INFO_TYPE) {
 			ret = ccm_encrypt_final((ccm_ctx_t *)&aes_ctx,
 			    ciphertext, AES_BLOCK_LEN, aes_encrypt_block,
 			    aes_xor_block);
 			if (ret != CRYPTO_SUCCESS)
 				goto out;
 			ASSERT(aes_ctx.ac_remainder_len == 0);
 		} else if (mechanism->cm_type == AES_GCM_MECH_INFO_TYPE ||
 		    mechanism->cm_type == AES_GMAC_MECH_INFO_TYPE) {
 			ret = gcm_encrypt_final((gcm_ctx_t *)&aes_ctx,
 			    ciphertext, AES_BLOCK_LEN, aes_encrypt_block,
 			    aes_copy_block, aes_xor_block);
 			if (ret != CRYPTO_SUCCESS)
 				goto out;
 			ASSERT(aes_ctx.ac_remainder_len == 0);
 		} else if (mechanism->cm_type == AES_CTR_MECH_INFO_TYPE) {
 			if (aes_ctx.ac_remainder_len > 0) {
 				ret = ctr_mode_final((ctr_ctx_t *)&aes_ctx,
 				    ciphertext, aes_encrypt_block);
 				if (ret != CRYPTO_SUCCESS)
 					goto out;
 			}
 		} else {
 			ASSERT(aes_ctx.ac_remainder_len == 0);
 		}
 
 		if (plaintext != ciphertext) {
 			ciphertext->cd_length =
 			    ciphertext->cd_offset - saved_offset;
 		}
 	} else {
 		ciphertext->cd_length = saved_length;
 	}
 	ciphertext->cd_offset = saved_offset;
 
 out:
 	if (aes_ctx.ac_flags & PROVIDER_OWNS_KEY_SCHEDULE) {
 		bzero(aes_ctx.ac_keysched, aes_ctx.ac_keysched_len);
 		kmem_free(aes_ctx.ac_keysched, aes_ctx.ac_keysched_len);
 	}
 #ifdef CAN_USE_GCM_ASM
 	if (aes_ctx.ac_flags & (GCM_MODE|GMAC_MODE) &&
 	    ((gcm_ctx_t *)&aes_ctx)->gcm_Htable != NULL) {
 
 		gcm_ctx_t *ctx = (gcm_ctx_t *)&aes_ctx;
 
 		bzero(ctx->gcm_Htable, ctx->gcm_htab_len);
 		kmem_free(ctx->gcm_Htable, ctx->gcm_htab_len);
 	}
 #endif
 
 	return (ret);
 }
 
-/* ARGSUSED */
 static int
 aes_decrypt_atomic(crypto_provider_handle_t provider,
     crypto_session_id_t session_id, crypto_mechanism_t *mechanism,
     crypto_key_t *key, crypto_data_t *ciphertext, crypto_data_t *plaintext,
     crypto_spi_ctx_template_t template, crypto_req_handle_t req)
 {
+	(void) provider, (void) session_id;
 	aes_ctx_t aes_ctx;	/* on the stack */
 	off_t saved_offset;
 	size_t saved_length;
 	size_t length_needed;
 	int ret;
 
 	ASSERT(plaintext != NULL);
 
 	/*
 	 * CCM, GCM, CTR, and GMAC modes do not require that ciphertext
 	 * be a multiple of AES block size.
 	 */
 	switch (mechanism->cm_type) {
 	case AES_CTR_MECH_INFO_TYPE:
 	case AES_CCM_MECH_INFO_TYPE:
 	case AES_GCM_MECH_INFO_TYPE:
 	case AES_GMAC_MECH_INFO_TYPE:
 		break;
 	default:
 		if ((ciphertext->cd_length & (AES_BLOCK_LEN - 1)) != 0)
 			return (CRYPTO_ENCRYPTED_DATA_LEN_RANGE);
 	}
 
 	if ((ret = aes_check_mech_param(mechanism, NULL, 0)) != CRYPTO_SUCCESS)
 		return (ret);
 
 	bzero(&aes_ctx, sizeof (aes_ctx_t));
 
 	ret = aes_common_init_ctx(&aes_ctx, template, mechanism, key,
 	    crypto_kmflag(req), B_FALSE);
 	if (ret != CRYPTO_SUCCESS)
 		return (ret);
 
 	switch (mechanism->cm_type) {
 	case AES_CCM_MECH_INFO_TYPE:
 		length_needed = aes_ctx.ac_data_len;
 		break;
 	case AES_GCM_MECH_INFO_TYPE:
 		length_needed = ciphertext->cd_length - aes_ctx.ac_tag_len;
 		break;
 	case AES_GMAC_MECH_INFO_TYPE:
 		if (plaintext->cd_length != 0)
 			return (CRYPTO_ARGUMENTS_BAD);
 		length_needed = 0;
 		break;
 	default:
 		length_needed = ciphertext->cd_length;
 	}
 
 	/* return size of buffer needed to store output */
 	if (plaintext->cd_length < length_needed) {
 		plaintext->cd_length = length_needed;
 		ret = CRYPTO_BUFFER_TOO_SMALL;
 		goto out;
 	}
 
 	saved_offset = plaintext->cd_offset;
 	saved_length = plaintext->cd_length;
 
 	if (mechanism->cm_type == AES_GCM_MECH_INFO_TYPE ||
 	    mechanism->cm_type == AES_GMAC_MECH_INFO_TYPE)
 		gcm_set_kmflag((gcm_ctx_t *)&aes_ctx, crypto_kmflag(req));
 
 	/*
 	 * Do an update on the specified input data.
 	 */
 	switch (ciphertext->cd_format) {
 	case CRYPTO_DATA_RAW:
 		ret = crypto_update_iov(&aes_ctx, ciphertext, plaintext,
 		    aes_decrypt_contiguous_blocks, aes_copy_block64);
 		break;
 	case CRYPTO_DATA_UIO:
 		ret = crypto_update_uio(&aes_ctx, ciphertext, plaintext,
 		    aes_decrypt_contiguous_blocks, aes_copy_block64);
 		break;
 	default:
 		ret = CRYPTO_ARGUMENTS_BAD;
 	}
 
 	if (ret == CRYPTO_SUCCESS) {
 		if (mechanism->cm_type == AES_CCM_MECH_INFO_TYPE) {
 			ASSERT(aes_ctx.ac_processed_data_len
 			    == aes_ctx.ac_data_len);
 			ASSERT(aes_ctx.ac_processed_mac_len
 			    == aes_ctx.ac_mac_len);
 			ret = ccm_decrypt_final((ccm_ctx_t *)&aes_ctx,
 			    plaintext, AES_BLOCK_LEN, aes_encrypt_block,
 			    aes_copy_block, aes_xor_block);
 			ASSERT(aes_ctx.ac_remainder_len == 0);
 			if ((ret == CRYPTO_SUCCESS) &&
 			    (ciphertext != plaintext)) {
 				plaintext->cd_length =
 				    plaintext->cd_offset - saved_offset;
 			} else {
 				plaintext->cd_length = saved_length;
 			}
 		} else if (mechanism->cm_type == AES_GCM_MECH_INFO_TYPE ||
 		    mechanism->cm_type == AES_GMAC_MECH_INFO_TYPE) {
 			ret = gcm_decrypt_final((gcm_ctx_t *)&aes_ctx,
 			    plaintext, AES_BLOCK_LEN, aes_encrypt_block,
 			    aes_xor_block);
 			ASSERT(aes_ctx.ac_remainder_len == 0);
 			if ((ret == CRYPTO_SUCCESS) &&
 			    (ciphertext != plaintext)) {
 				plaintext->cd_length =
 				    plaintext->cd_offset - saved_offset;
 			} else {
 				plaintext->cd_length = saved_length;
 			}
 		} else if (mechanism->cm_type != AES_CTR_MECH_INFO_TYPE) {
 			ASSERT(aes_ctx.ac_remainder_len == 0);
 			if (ciphertext != plaintext)
 				plaintext->cd_length =
 				    plaintext->cd_offset - saved_offset;
 		} else {
 			if (aes_ctx.ac_remainder_len > 0) {
 				ret = ctr_mode_final((ctr_ctx_t *)&aes_ctx,
 				    plaintext, aes_encrypt_block);
 				if (ret == CRYPTO_DATA_LEN_RANGE)
 					ret = CRYPTO_ENCRYPTED_DATA_LEN_RANGE;
 				if (ret != CRYPTO_SUCCESS)
 					goto out;
 			}
 			if (ciphertext != plaintext)
 				plaintext->cd_length =
 				    plaintext->cd_offset - saved_offset;
 		}
 	} else {
 		plaintext->cd_length = saved_length;
 	}
 	plaintext->cd_offset = saved_offset;
 
 out:
 	if (aes_ctx.ac_flags & PROVIDER_OWNS_KEY_SCHEDULE) {
 		bzero(aes_ctx.ac_keysched, aes_ctx.ac_keysched_len);
 		kmem_free(aes_ctx.ac_keysched, aes_ctx.ac_keysched_len);
 	}
 
 	if (aes_ctx.ac_flags & CCM_MODE) {
 		if (aes_ctx.ac_pt_buf != NULL) {
 			vmem_free(aes_ctx.ac_pt_buf, aes_ctx.ac_data_len);
 		}
 	} else if (aes_ctx.ac_flags & (GCM_MODE|GMAC_MODE)) {
 		if (((gcm_ctx_t *)&aes_ctx)->gcm_pt_buf != NULL) {
 			vmem_free(((gcm_ctx_t *)&aes_ctx)->gcm_pt_buf,
 			    ((gcm_ctx_t *)&aes_ctx)->gcm_pt_buf_len);
 		}
 #ifdef CAN_USE_GCM_ASM
 		if (((gcm_ctx_t *)&aes_ctx)->gcm_Htable != NULL) {
 			gcm_ctx_t *ctx = (gcm_ctx_t *)&aes_ctx;
 
 			bzero(ctx->gcm_Htable, ctx->gcm_htab_len);
 			kmem_free(ctx->gcm_Htable, ctx->gcm_htab_len);
 		}
 #endif
 	}
 
 	return (ret);
 }
 
 /*
  * KCF software provider context template entry points.
  */
-/* ARGSUSED */
 static int
 aes_create_ctx_template(crypto_provider_handle_t provider,
     crypto_mechanism_t *mechanism, crypto_key_t *key,
     crypto_spi_ctx_template_t *tmpl, size_t *tmpl_size, crypto_req_handle_t req)
 {
+	(void) provider;
 	void *keysched;
 	size_t size;
 	int rv;
 
 	if (mechanism->cm_type != AES_ECB_MECH_INFO_TYPE &&
 	    mechanism->cm_type != AES_CBC_MECH_INFO_TYPE &&
 	    mechanism->cm_type != AES_CTR_MECH_INFO_TYPE &&
 	    mechanism->cm_type != AES_CCM_MECH_INFO_TYPE &&
 	    mechanism->cm_type != AES_GCM_MECH_INFO_TYPE &&
 	    mechanism->cm_type != AES_GMAC_MECH_INFO_TYPE)
 		return (CRYPTO_MECHANISM_INVALID);
 
 	if ((keysched = aes_alloc_keysched(&size,
 	    crypto_kmflag(req))) == NULL) {
 		return (CRYPTO_HOST_MEMORY);
 	}
 
 	/*
 	 * Initialize key schedule.  Key length information is stored
 	 * in the key.
 	 */
 	if ((rv = init_keysched(key, keysched)) != CRYPTO_SUCCESS) {
 		bzero(keysched, size);
 		kmem_free(keysched, size);
 		return (rv);
 	}
 
 	*tmpl = keysched;
 	*tmpl_size = size;
 
 	return (CRYPTO_SUCCESS);
 }
 
 
 static int
 aes_free_context(crypto_ctx_t *ctx)
 {
 	aes_ctx_t *aes_ctx = ctx->cc_provider_private;
 
 	if (aes_ctx != NULL) {
 		if (aes_ctx->ac_flags & PROVIDER_OWNS_KEY_SCHEDULE) {
 			ASSERT(aes_ctx->ac_keysched_len != 0);
 			bzero(aes_ctx->ac_keysched, aes_ctx->ac_keysched_len);
 			kmem_free(aes_ctx->ac_keysched,
 			    aes_ctx->ac_keysched_len);
 		}
 		crypto_free_mode_ctx(aes_ctx);
 		ctx->cc_provider_private = NULL;
 	}
 
 	return (CRYPTO_SUCCESS);
 }
 
 
 static int
 aes_common_init_ctx(aes_ctx_t *aes_ctx, crypto_spi_ctx_template_t *template,
     crypto_mechanism_t *mechanism, crypto_key_t *key, int kmflag,
     boolean_t is_encrypt_init)
 {
 	int rv = CRYPTO_SUCCESS;
 	void *keysched;
 	size_t size = 0;
 
 	if (template == NULL) {
 		if ((keysched = aes_alloc_keysched(&size, kmflag)) == NULL)
 			return (CRYPTO_HOST_MEMORY);
 		/*
 		 * Initialize key schedule.
 		 * Key length is stored in the key.
 		 */
 		if ((rv = init_keysched(key, keysched)) != CRYPTO_SUCCESS) {
 			kmem_free(keysched, size);
 			return (rv);
 		}
 
 		aes_ctx->ac_flags |= PROVIDER_OWNS_KEY_SCHEDULE;
 		aes_ctx->ac_keysched_len = size;
 	} else {
 		keysched = template;
 	}
 	aes_ctx->ac_keysched = keysched;
 
 	switch (mechanism->cm_type) {
 	case AES_CBC_MECH_INFO_TYPE:
 		rv = cbc_init_ctx((cbc_ctx_t *)aes_ctx, mechanism->cm_param,
 		    mechanism->cm_param_len, AES_BLOCK_LEN, aes_copy_block64);
 		break;
 	case AES_CTR_MECH_INFO_TYPE: {
 		CK_AES_CTR_PARAMS *pp;
 
 		if (mechanism->cm_param == NULL ||
 		    mechanism->cm_param_len != sizeof (CK_AES_CTR_PARAMS)) {
 			return (CRYPTO_MECHANISM_PARAM_INVALID);
 		}
 		pp = (CK_AES_CTR_PARAMS *)(void *)mechanism->cm_param;
 		rv = ctr_init_ctx((ctr_ctx_t *)aes_ctx, pp->ulCounterBits,
 		    pp->cb, aes_copy_block);
 		break;
 	}
 	case AES_CCM_MECH_INFO_TYPE:
 		if (mechanism->cm_param == NULL ||
 		    mechanism->cm_param_len != sizeof (CK_AES_CCM_PARAMS)) {
 			return (CRYPTO_MECHANISM_PARAM_INVALID);
 		}
 		rv = ccm_init_ctx((ccm_ctx_t *)aes_ctx, mechanism->cm_param,
 		    kmflag, is_encrypt_init, AES_BLOCK_LEN, aes_encrypt_block,
 		    aes_xor_block);
 		break;
 	case AES_GCM_MECH_INFO_TYPE:
 		if (mechanism->cm_param == NULL ||
 		    mechanism->cm_param_len != sizeof (CK_AES_GCM_PARAMS)) {
 			return (CRYPTO_MECHANISM_PARAM_INVALID);
 		}
 		rv = gcm_init_ctx((gcm_ctx_t *)aes_ctx, mechanism->cm_param,
 		    AES_BLOCK_LEN, aes_encrypt_block, aes_copy_block,
 		    aes_xor_block);
 		break;
 	case AES_GMAC_MECH_INFO_TYPE:
 		if (mechanism->cm_param == NULL ||
 		    mechanism->cm_param_len != sizeof (CK_AES_GMAC_PARAMS)) {
 			return (CRYPTO_MECHANISM_PARAM_INVALID);
 		}
 		rv = gmac_init_ctx((gcm_ctx_t *)aes_ctx, mechanism->cm_param,
 		    AES_BLOCK_LEN, aes_encrypt_block, aes_copy_block,
 		    aes_xor_block);
 		break;
 	case AES_ECB_MECH_INFO_TYPE:
 		aes_ctx->ac_flags |= ECB_MODE;
 	}
 
 	if (rv != CRYPTO_SUCCESS) {
 		if (aes_ctx->ac_flags & PROVIDER_OWNS_KEY_SCHEDULE) {
 			bzero(keysched, size);
 			kmem_free(keysched, size);
 		}
 	}
 
 	return (rv);
 }
 
 static int
 process_gmac_mech(crypto_mechanism_t *mech, crypto_data_t *data,
     CK_AES_GCM_PARAMS *gcm_params)
 {
 	/* LINTED: pointer alignment */
 	CK_AES_GMAC_PARAMS *params = (CK_AES_GMAC_PARAMS *)mech->cm_param;
 
 	if (mech->cm_type != AES_GMAC_MECH_INFO_TYPE)
 		return (CRYPTO_MECHANISM_INVALID);
 
 	if (mech->cm_param_len != sizeof (CK_AES_GMAC_PARAMS))
 		return (CRYPTO_MECHANISM_PARAM_INVALID);
 
 	if (params->pIv == NULL)
 		return (CRYPTO_MECHANISM_PARAM_INVALID);
 
 	gcm_params->pIv = params->pIv;
 	gcm_params->ulIvLen = AES_GMAC_IV_LEN;
 	gcm_params->ulTagBits = AES_GMAC_TAG_BITS;
 
 	if (data == NULL)
 		return (CRYPTO_SUCCESS);
 
 	if (data->cd_format != CRYPTO_DATA_RAW)
 		return (CRYPTO_ARGUMENTS_BAD);
 
 	gcm_params->pAAD = (uchar_t *)data->cd_raw.iov_base;
 	gcm_params->ulAADLen = data->cd_length;
 	return (CRYPTO_SUCCESS);
 }
 
 static int
 aes_mac_atomic(crypto_provider_handle_t provider,
     crypto_session_id_t session_id, crypto_mechanism_t *mechanism,
     crypto_key_t *key, crypto_data_t *data, crypto_data_t *mac,
     crypto_spi_ctx_template_t template, crypto_req_handle_t req)
 {
 	CK_AES_GCM_PARAMS gcm_params;
 	crypto_mechanism_t gcm_mech;
 	int rv;
 
 	if ((rv = process_gmac_mech(mechanism, data, &gcm_params))
 	    != CRYPTO_SUCCESS)
 		return (rv);
 
 	gcm_mech.cm_type = AES_GCM_MECH_INFO_TYPE;
 	gcm_mech.cm_param_len = sizeof (CK_AES_GCM_PARAMS);
 	gcm_mech.cm_param = (char *)&gcm_params;
 
 	return (aes_encrypt_atomic(provider, session_id, &gcm_mech,
 	    key, &null_crypto_data, mac, template, req));
 }
 
 static int
 aes_mac_verify_atomic(crypto_provider_handle_t provider,
     crypto_session_id_t session_id, crypto_mechanism_t *mechanism,
     crypto_key_t *key, crypto_data_t *data, crypto_data_t *mac,
     crypto_spi_ctx_template_t template, crypto_req_handle_t req)
 {
 	CK_AES_GCM_PARAMS gcm_params;
 	crypto_mechanism_t gcm_mech;
 	int rv;
 
 	if ((rv = process_gmac_mech(mechanism, data, &gcm_params))
 	    != CRYPTO_SUCCESS)
 		return (rv);
 
 	gcm_mech.cm_type = AES_GCM_MECH_INFO_TYPE;
 	gcm_mech.cm_param_len = sizeof (CK_AES_GCM_PARAMS);
 	gcm_mech.cm_param = (char *)&gcm_params;
 
 	return (aes_decrypt_atomic(provider, session_id, &gcm_mech,
 	    key, mac, &null_crypto_data, template, req));
 }
diff --git a/module/icp/io/sha1_mod.c b/module/icp/io/sha1_mod.c
index 6dcee6b2ecf2..5a372e3d3754 100644
--- a/module/icp/io/sha1_mod.c
+++ b/module/icp/io/sha1_mod.c
@@ -1,1230 +1,1230 @@
 /*
  * CDDL HEADER START
  *
  * The contents of this file are subject to the terms of the
  * Common Development and Distribution License (the "License").
  * You may not use this file except in compliance with the License.
  *
  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
  * or http://www.opensolaris.org/os/licensing.
  * See the License for the specific language governing permissions
  * and limitations under the License.
  *
  * When distributing Covered Code, include this CDDL HEADER in each
  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  * If applicable, add the following below this CDDL HEADER, with the
  * fields enclosed by brackets "[]" replaced with your own identifying
  * information: Portions Copyright [yyyy] [name of copyright owner]
  *
  * CDDL HEADER END
  */
 
 /*
  * Copyright 2010 Sun Microsystems, Inc.  All rights reserved.
  * Use is subject to license terms.
  */
 
 #include <sys/zfs_context.h>
 #include <sys/modctl.h>
 #include <sys/crypto/common.h>
 #include <sys/crypto/icp.h>
 #include <sys/crypto/spi.h>
 
 #include <sha1/sha1.h>
 #include <sha1/sha1_impl.h>
 
 /*
  * The sha1 module is created with two modlinkages:
  * - a modlmisc that allows consumers to directly call the entry points
  *   SHA1Init, SHA1Update, and SHA1Final.
  * - a modlcrypto that allows the module to register with the Kernel
  *   Cryptographic Framework (KCF) as a software provider for the SHA1
  *   mechanisms.
  */
 
 static struct modlcrypto modlcrypto = {
 	&mod_cryptoops,
 	"SHA1 Kernel SW Provider 1.1"
 };
 
 static struct modlinkage modlinkage = {
 	MODREV_1, { &modlcrypto, NULL }
 };
 
 
 /*
  * Macros to access the SHA1 or SHA1-HMAC contexts from a context passed
  * by KCF to one of the entry points.
  */
 
 #define	PROV_SHA1_CTX(ctx)	((sha1_ctx_t *)(ctx)->cc_provider_private)
 #define	PROV_SHA1_HMAC_CTX(ctx)	((sha1_hmac_ctx_t *)(ctx)->cc_provider_private)
 
 /* to extract the digest length passed as mechanism parameter */
 #define	PROV_SHA1_GET_DIGEST_LEN(m, len) {				\
 	if (IS_P2ALIGNED((m)->cm_param, sizeof (ulong_t)))		\
 		(len) = (uint32_t)*((ulong_t *)(void *)mechanism->cm_param); \
 	else {								\
 		ulong_t tmp_ulong;					\
 		bcopy((m)->cm_param, &tmp_ulong, sizeof (ulong_t));	\
 		(len) = (uint32_t)tmp_ulong;				\
 	}								\
 }
 
 #define	PROV_SHA1_DIGEST_KEY(ctx, key, len, digest) {	\
 	SHA1Init(ctx);					\
 	SHA1Update(ctx, key, len);			\
 	SHA1Final(digest, ctx);				\
 }
 
 /*
  * Mechanism info structure passed to KCF during registration.
  */
 static crypto_mech_info_t sha1_mech_info_tab[] = {
 	/* SHA1 */
 	{SUN_CKM_SHA1, SHA1_MECH_INFO_TYPE,
 	    CRYPTO_FG_DIGEST | CRYPTO_FG_DIGEST_ATOMIC,
 	    0, 0, CRYPTO_KEYSIZE_UNIT_IN_BITS},
 	/* SHA1-HMAC */
 	{SUN_CKM_SHA1_HMAC, SHA1_HMAC_MECH_INFO_TYPE,
 	    CRYPTO_FG_MAC | CRYPTO_FG_MAC_ATOMIC,
 	    SHA1_HMAC_MIN_KEY_LEN, SHA1_HMAC_MAX_KEY_LEN,
 	    CRYPTO_KEYSIZE_UNIT_IN_BYTES},
 	/* SHA1-HMAC GENERAL */
 	{SUN_CKM_SHA1_HMAC_GENERAL, SHA1_HMAC_GEN_MECH_INFO_TYPE,
 	    CRYPTO_FG_MAC | CRYPTO_FG_MAC_ATOMIC,
 	    SHA1_HMAC_MIN_KEY_LEN, SHA1_HMAC_MAX_KEY_LEN,
 	    CRYPTO_KEYSIZE_UNIT_IN_BYTES}
 };
 
 static void sha1_provider_status(crypto_provider_handle_t, uint_t *);
 
 static crypto_control_ops_t sha1_control_ops = {
 	sha1_provider_status
 };
 
 static int sha1_digest_init(crypto_ctx_t *, crypto_mechanism_t *,
     crypto_req_handle_t);
 static int sha1_digest(crypto_ctx_t *, crypto_data_t *, crypto_data_t *,
     crypto_req_handle_t);
 static int sha1_digest_update(crypto_ctx_t *, crypto_data_t *,
     crypto_req_handle_t);
 static int sha1_digest_final(crypto_ctx_t *, crypto_data_t *,
     crypto_req_handle_t);
 static int sha1_digest_atomic(crypto_provider_handle_t, crypto_session_id_t,
     crypto_mechanism_t *, crypto_data_t *, crypto_data_t *,
     crypto_req_handle_t);
 
 static crypto_digest_ops_t sha1_digest_ops = {
 	.digest_init = sha1_digest_init,
 	.digest = sha1_digest,
 	.digest_update = sha1_digest_update,
 	.digest_key = NULL,
 	.digest_final = sha1_digest_final,
 	.digest_atomic = sha1_digest_atomic
 };
 
 static int sha1_mac_init(crypto_ctx_t *, crypto_mechanism_t *, crypto_key_t *,
     crypto_spi_ctx_template_t, crypto_req_handle_t);
 static int sha1_mac_update(crypto_ctx_t *, crypto_data_t *,
     crypto_req_handle_t);
 static int sha1_mac_final(crypto_ctx_t *, crypto_data_t *, crypto_req_handle_t);
 static int sha1_mac_atomic(crypto_provider_handle_t, crypto_session_id_t,
     crypto_mechanism_t *, crypto_key_t *, crypto_data_t *, crypto_data_t *,
     crypto_spi_ctx_template_t, crypto_req_handle_t);
 static int sha1_mac_verify_atomic(crypto_provider_handle_t, crypto_session_id_t,
     crypto_mechanism_t *, crypto_key_t *, crypto_data_t *, crypto_data_t *,
     crypto_spi_ctx_template_t, crypto_req_handle_t);
 
 static crypto_mac_ops_t sha1_mac_ops = {
 	.mac_init = sha1_mac_init,
 	.mac = NULL,
 	.mac_update = sha1_mac_update,
 	.mac_final = sha1_mac_final,
 	.mac_atomic = sha1_mac_atomic,
 	.mac_verify_atomic = sha1_mac_verify_atomic
 };
 
 static int sha1_create_ctx_template(crypto_provider_handle_t,
     crypto_mechanism_t *, crypto_key_t *, crypto_spi_ctx_template_t *,
     size_t *, crypto_req_handle_t);
 static int sha1_free_context(crypto_ctx_t *);
 
 static crypto_ctx_ops_t sha1_ctx_ops = {
 	.create_ctx_template = sha1_create_ctx_template,
 	.free_context = sha1_free_context
 };
 
 static crypto_ops_t sha1_crypto_ops = {{{{{
 	&sha1_control_ops,
 	&sha1_digest_ops,
 	NULL,
 	&sha1_mac_ops,
 	NULL,
 	NULL,
 	NULL,
 	NULL,
 	NULL,
 	NULL,
 	NULL,
 	NULL,
 	NULL,
 	&sha1_ctx_ops,
 }}}}};
 
 static crypto_provider_info_t sha1_prov_info = {{{{
 	CRYPTO_SPI_VERSION_1,
 	"SHA1 Software Provider",
 	CRYPTO_SW_PROVIDER,
 	NULL,
 	&sha1_crypto_ops,
 	sizeof (sha1_mech_info_tab)/sizeof (crypto_mech_info_t),
 	sha1_mech_info_tab
 }}}};
 
 static crypto_kcf_provider_handle_t sha1_prov_handle = 0;
 
 int
 sha1_mod_init(void)
 {
 	int ret;
 
 	if ((ret = mod_install(&modlinkage)) != 0)
 		return (ret);
 
 	/*
 	 * Register with KCF. If the registration fails, log an
 	 * error but do not uninstall the module, since the functionality
 	 * provided by misc/sha1 should still be available.
 	 */
 	if ((ret = crypto_register_provider(&sha1_prov_info,
 	    &sha1_prov_handle)) != CRYPTO_SUCCESS)
 		cmn_err(CE_WARN, "sha1 _init: "
 		    "crypto_register_provider() failed (0x%x)", ret);
 
 	return (0);
 }
 
 int
 sha1_mod_fini(void)
 {
 	int ret;
 
 	if (sha1_prov_handle != 0) {
 		if ((ret = crypto_unregister_provider(sha1_prov_handle)) !=
 		    CRYPTO_SUCCESS) {
 			cmn_err(CE_WARN,
 			    "sha1 _fini: crypto_unregister_provider() "
 			    "failed (0x%x)", ret);
 			return (EBUSY);
 		}
 		sha1_prov_handle = 0;
 	}
 
 	return (mod_remove(&modlinkage));
 }
 
 /*
  * KCF software provider control entry points.
  */
-/* ARGSUSED */
 static void
 sha1_provider_status(crypto_provider_handle_t provider, uint_t *status)
 {
+	(void) provider, (void) status;
 	*status = CRYPTO_PROVIDER_READY;
 }
 
 /*
  * KCF software provider digest entry points.
  */
 
 static int
 sha1_digest_init(crypto_ctx_t *ctx, crypto_mechanism_t *mechanism,
     crypto_req_handle_t req)
 {
 	if (mechanism->cm_type != SHA1_MECH_INFO_TYPE)
 		return (CRYPTO_MECHANISM_INVALID);
 
 	/*
 	 * Allocate and initialize SHA1 context.
 	 */
 	ctx->cc_provider_private = kmem_alloc(sizeof (sha1_ctx_t),
 	    crypto_kmflag(req));
 	if (ctx->cc_provider_private == NULL)
 		return (CRYPTO_HOST_MEMORY);
 
 	PROV_SHA1_CTX(ctx)->sc_mech_type = SHA1_MECH_INFO_TYPE;
 	SHA1Init(&PROV_SHA1_CTX(ctx)->sc_sha1_ctx);
 
 	return (CRYPTO_SUCCESS);
 }
 
 /*
  * Helper SHA1 digest update function for uio data.
  */
 static int
 sha1_digest_update_uio(SHA1_CTX *sha1_ctx, crypto_data_t *data)
 {
 	off_t offset = data->cd_offset;
 	size_t length = data->cd_length;
 	uint_t vec_idx = 0;
 	size_t cur_len;
 
 	/* we support only kernel buffer */
 	if (zfs_uio_segflg(data->cd_uio) != UIO_SYSSPACE)
 		return (CRYPTO_ARGUMENTS_BAD);
 
 	/*
 	 * Jump to the first iovec containing data to be
 	 * digested.
 	 */
 	offset = zfs_uio_index_at_offset(data->cd_uio, offset, &vec_idx);
 	if (vec_idx == zfs_uio_iovcnt(data->cd_uio)) {
 		/*
 		 * The caller specified an offset that is larger than the
 		 * total size of the buffers it provided.
 		 */
 		return (CRYPTO_DATA_LEN_RANGE);
 	}
 
 	/*
 	 * Now do the digesting on the iovecs.
 	 */
 	while (vec_idx < zfs_uio_iovcnt(data->cd_uio) && length > 0) {
 		cur_len = MIN(zfs_uio_iovlen(data->cd_uio, vec_idx) -
 		    offset, length);
 
 		SHA1Update(sha1_ctx,
 		    (uint8_t *)zfs_uio_iovbase(data->cd_uio, vec_idx) + offset,
 		    cur_len);
 
 		length -= cur_len;
 		vec_idx++;
 		offset = 0;
 	}
 
 	if (vec_idx == zfs_uio_iovcnt(data->cd_uio) && length > 0) {
 		/*
 		 * The end of the specified iovec's was reached but
 		 * the length requested could not be processed, i.e.
 		 * The caller requested to digest more data than it provided.
 		 */
 		return (CRYPTO_DATA_LEN_RANGE);
 	}
 
 	return (CRYPTO_SUCCESS);
 }
 
 /*
  * Helper SHA1 digest final function for uio data.
  * digest_len is the length of the desired digest. If digest_len
  * is smaller than the default SHA1 digest length, the caller
  * must pass a scratch buffer, digest_scratch, which must
  * be at least SHA1_DIGEST_LENGTH bytes.
  */
 static int
 sha1_digest_final_uio(SHA1_CTX *sha1_ctx, crypto_data_t *digest,
     ulong_t digest_len, uchar_t *digest_scratch)
 {
 	off_t offset = digest->cd_offset;
 	uint_t vec_idx = 0;
 
 	/* we support only kernel buffer */
 	if (zfs_uio_segflg(digest->cd_uio) != UIO_SYSSPACE)
 		return (CRYPTO_ARGUMENTS_BAD);
 
 	/*
 	 * Jump to the first iovec containing ptr to the digest to
 	 * be returned.
 	 */
 	offset = zfs_uio_index_at_offset(digest->cd_uio, offset, &vec_idx);
 	if (vec_idx == zfs_uio_iovcnt(digest->cd_uio)) {
 		/*
 		 * The caller specified an offset that is
 		 * larger than the total size of the buffers
 		 * it provided.
 		 */
 		return (CRYPTO_DATA_LEN_RANGE);
 	}
 
 	if (offset + digest_len <=
 	    zfs_uio_iovlen(digest->cd_uio, vec_idx)) {
 		/*
 		 * The computed SHA1 digest will fit in the current
 		 * iovec.
 		 */
 		if (digest_len != SHA1_DIGEST_LENGTH) {
 			/*
 			 * The caller requested a short digest. Digest
 			 * into a scratch buffer and return to
 			 * the user only what was requested.
 			 */
 			SHA1Final(digest_scratch, sha1_ctx);
 			bcopy(digest_scratch, (uchar_t *)
 			    zfs_uio_iovbase(digest->cd_uio, vec_idx) + offset,
 			    digest_len);
 		} else {
 			SHA1Final((uchar_t *)zfs_uio_iovbase(digest->
 			    cd_uio, vec_idx) + offset,
 			    sha1_ctx);
 		}
 	} else {
 		/*
 		 * The computed digest will be crossing one or more iovec's.
 		 * This is bad performance-wise but we need to support it.
 		 * Allocate a small scratch buffer on the stack and
 		 * copy it piece meal to the specified digest iovec's.
 		 */
 		uchar_t digest_tmp[SHA1_DIGEST_LENGTH];
 		off_t scratch_offset = 0;
 		size_t length = digest_len;
 		size_t cur_len;
 
 		SHA1Final(digest_tmp, sha1_ctx);
 
 		while (vec_idx < zfs_uio_iovcnt(digest->cd_uio) && length > 0) {
 			cur_len = MIN(zfs_uio_iovlen(digest->cd_uio, vec_idx) -
 			    offset, length);
 			bcopy(digest_tmp + scratch_offset,
 			    zfs_uio_iovbase(digest->cd_uio, vec_idx) + offset,
 			    cur_len);
 
 			length -= cur_len;
 			vec_idx++;
 			scratch_offset += cur_len;
 			offset = 0;
 		}
 
 		if (vec_idx == zfs_uio_iovcnt(digest->cd_uio) && length > 0) {
 			/*
 			 * The end of the specified iovec's was reached but
 			 * the length requested could not be processed, i.e.
 			 * The caller requested to digest more data than it
 			 * provided.
 			 */
 			return (CRYPTO_DATA_LEN_RANGE);
 		}
 	}
 
 	return (CRYPTO_SUCCESS);
 }
 
-/* ARGSUSED */
 static int
 sha1_digest(crypto_ctx_t *ctx, crypto_data_t *data, crypto_data_t *digest,
     crypto_req_handle_t req)
 {
+	(void) req;
 	int ret = CRYPTO_SUCCESS;
 
 	ASSERT(ctx->cc_provider_private != NULL);
 
 	/*
 	 * We need to just return the length needed to store the output.
 	 * We should not destroy the context for the following cases.
 	 */
 	if ((digest->cd_length == 0) ||
 	    (digest->cd_length < SHA1_DIGEST_LENGTH)) {
 		digest->cd_length = SHA1_DIGEST_LENGTH;
 		return (CRYPTO_BUFFER_TOO_SMALL);
 	}
 
 	/*
 	 * Do the SHA1 update on the specified input data.
 	 */
 	switch (data->cd_format) {
 	case CRYPTO_DATA_RAW:
 		SHA1Update(&PROV_SHA1_CTX(ctx)->sc_sha1_ctx,
 		    (uint8_t *)data->cd_raw.iov_base + data->cd_offset,
 		    data->cd_length);
 		break;
 	case CRYPTO_DATA_UIO:
 		ret = sha1_digest_update_uio(&PROV_SHA1_CTX(ctx)->sc_sha1_ctx,
 		    data);
 		break;
 	default:
 		ret = CRYPTO_ARGUMENTS_BAD;
 	}
 
 	if (ret != CRYPTO_SUCCESS) {
 		/* the update failed, free context and bail */
 		kmem_free(ctx->cc_provider_private, sizeof (sha1_ctx_t));
 		ctx->cc_provider_private = NULL;
 		digest->cd_length = 0;
 		return (ret);
 	}
 
 	/*
 	 * Do a SHA1 final, must be done separately since the digest
 	 * type can be different than the input data type.
 	 */
 	switch (digest->cd_format) {
 	case CRYPTO_DATA_RAW:
 		SHA1Final((unsigned char *)digest->cd_raw.iov_base +
 		    digest->cd_offset, &PROV_SHA1_CTX(ctx)->sc_sha1_ctx);
 		break;
 	case CRYPTO_DATA_UIO:
 		ret = sha1_digest_final_uio(&PROV_SHA1_CTX(ctx)->sc_sha1_ctx,
 		    digest, SHA1_DIGEST_LENGTH, NULL);
 		break;
 	default:
 		ret = CRYPTO_ARGUMENTS_BAD;
 	}
 
 	/* all done, free context and return */
 
 	if (ret == CRYPTO_SUCCESS) {
 		digest->cd_length = SHA1_DIGEST_LENGTH;
 	} else {
 		digest->cd_length = 0;
 	}
 
 	kmem_free(ctx->cc_provider_private, sizeof (sha1_ctx_t));
 	ctx->cc_provider_private = NULL;
 	return (ret);
 }
 
-/* ARGSUSED */
 static int
 sha1_digest_update(crypto_ctx_t *ctx, crypto_data_t *data,
     crypto_req_handle_t req)
 {
+	(void) req;
 	int ret = CRYPTO_SUCCESS;
 
 	ASSERT(ctx->cc_provider_private != NULL);
 
 	/*
 	 * Do the SHA1 update on the specified input data.
 	 */
 	switch (data->cd_format) {
 	case CRYPTO_DATA_RAW:
 		SHA1Update(&PROV_SHA1_CTX(ctx)->sc_sha1_ctx,
 		    (uint8_t *)data->cd_raw.iov_base + data->cd_offset,
 		    data->cd_length);
 		break;
 	case CRYPTO_DATA_UIO:
 		ret = sha1_digest_update_uio(&PROV_SHA1_CTX(ctx)->sc_sha1_ctx,
 		    data);
 		break;
 	default:
 		ret = CRYPTO_ARGUMENTS_BAD;
 	}
 
 	return (ret);
 }
 
-/* ARGSUSED */
 static int
 sha1_digest_final(crypto_ctx_t *ctx, crypto_data_t *digest,
     crypto_req_handle_t req)
 {
+	(void) req;
 	int ret = CRYPTO_SUCCESS;
 
 	ASSERT(ctx->cc_provider_private != NULL);
 
 	/*
 	 * We need to just return the length needed to store the output.
 	 * We should not destroy the context for the following cases.
 	 */
 	if ((digest->cd_length == 0) ||
 	    (digest->cd_length < SHA1_DIGEST_LENGTH)) {
 		digest->cd_length = SHA1_DIGEST_LENGTH;
 		return (CRYPTO_BUFFER_TOO_SMALL);
 	}
 
 	/*
 	 * Do a SHA1 final.
 	 */
 	switch (digest->cd_format) {
 	case CRYPTO_DATA_RAW:
 		SHA1Final((unsigned char *)digest->cd_raw.iov_base +
 		    digest->cd_offset, &PROV_SHA1_CTX(ctx)->sc_sha1_ctx);
 		break;
 	case CRYPTO_DATA_UIO:
 		ret = sha1_digest_final_uio(&PROV_SHA1_CTX(ctx)->sc_sha1_ctx,
 		    digest, SHA1_DIGEST_LENGTH, NULL);
 		break;
 	default:
 		ret = CRYPTO_ARGUMENTS_BAD;
 	}
 
 	/* all done, free context and return */
 
 	if (ret == CRYPTO_SUCCESS) {
 		digest->cd_length = SHA1_DIGEST_LENGTH;
 	} else {
 		digest->cd_length = 0;
 	}
 
 	kmem_free(ctx->cc_provider_private, sizeof (sha1_ctx_t));
 	ctx->cc_provider_private = NULL;
 
 	return (ret);
 }
 
-/* ARGSUSED */
 static int
 sha1_digest_atomic(crypto_provider_handle_t provider,
     crypto_session_id_t session_id, crypto_mechanism_t *mechanism,
     crypto_data_t *data, crypto_data_t *digest,
     crypto_req_handle_t req)
 {
+	(void) provider, (void) session_id, (void) req;
 	int ret = CRYPTO_SUCCESS;
 	SHA1_CTX sha1_ctx;
 
 	if (mechanism->cm_type != SHA1_MECH_INFO_TYPE)
 		return (CRYPTO_MECHANISM_INVALID);
 
 	/*
 	 * Do the SHA1 init.
 	 */
 	SHA1Init(&sha1_ctx);
 
 	/*
 	 * Do the SHA1 update on the specified input data.
 	 */
 	switch (data->cd_format) {
 	case CRYPTO_DATA_RAW:
 		SHA1Update(&sha1_ctx,
 		    (uint8_t *)data->cd_raw.iov_base + data->cd_offset,
 		    data->cd_length);
 		break;
 	case CRYPTO_DATA_UIO:
 		ret = sha1_digest_update_uio(&sha1_ctx, data);
 		break;
 	default:
 		ret = CRYPTO_ARGUMENTS_BAD;
 	}
 
 	if (ret != CRYPTO_SUCCESS) {
 		/* the update failed, bail */
 		digest->cd_length = 0;
 		return (ret);
 	}
 
 	/*
 	 * Do a SHA1 final, must be done separately since the digest
 	 * type can be different than the input data type.
 	 */
 	switch (digest->cd_format) {
 	case CRYPTO_DATA_RAW:
 		SHA1Final((unsigned char *)digest->cd_raw.iov_base +
 		    digest->cd_offset, &sha1_ctx);
 		break;
 	case CRYPTO_DATA_UIO:
 		ret = sha1_digest_final_uio(&sha1_ctx, digest,
 		    SHA1_DIGEST_LENGTH, NULL);
 		break;
 	default:
 		ret = CRYPTO_ARGUMENTS_BAD;
 	}
 
 	if (ret == CRYPTO_SUCCESS) {
 		digest->cd_length = SHA1_DIGEST_LENGTH;
 	} else {
 		digest->cd_length = 0;
 	}
 
 	return (ret);
 }
 
 /*
  * KCF software provider mac entry points.
  *
  * SHA1 HMAC is: SHA1(key XOR opad, SHA1(key XOR ipad, text))
  *
  * Init:
  * The initialization routine initializes what we denote
  * as the inner and outer contexts by doing
  * - for inner context: SHA1(key XOR ipad)
  * - for outer context: SHA1(key XOR opad)
  *
  * Update:
  * Each subsequent SHA1 HMAC update will result in an
  * update of the inner context with the specified data.
  *
  * Final:
  * The SHA1 HMAC final will do a SHA1 final operation on the
  * inner context, and the resulting digest will be used
  * as the data for an update on the outer context. Last
  * but not least, a SHA1 final on the outer context will
  * be performed to obtain the SHA1 HMAC digest to return
  * to the user.
  */
 
 /*
  * Initialize a SHA1-HMAC context.
  */
 static void
 sha1_mac_init_ctx(sha1_hmac_ctx_t *ctx, void *keyval, uint_t length_in_bytes)
 {
 	uint32_t ipad[SHA1_HMAC_INTS_PER_BLOCK];
 	uint32_t opad[SHA1_HMAC_INTS_PER_BLOCK];
 	uint_t i;
 
 	bzero(ipad, SHA1_HMAC_BLOCK_SIZE);
 	bzero(opad, SHA1_HMAC_BLOCK_SIZE);
 
 	bcopy(keyval, ipad, length_in_bytes);
 	bcopy(keyval, opad, length_in_bytes);
 
 	/* XOR key with ipad (0x36) and opad (0x5c) */
 	for (i = 0; i < SHA1_HMAC_INTS_PER_BLOCK; i++) {
 		ipad[i] ^= 0x36363636;
 		opad[i] ^= 0x5c5c5c5c;
 	}
 
 	/* perform SHA1 on ipad */
 	SHA1Init(&ctx->hc_icontext);
 	SHA1Update(&ctx->hc_icontext, (uint8_t *)ipad, SHA1_HMAC_BLOCK_SIZE);
 
 	/* perform SHA1 on opad */
 	SHA1Init(&ctx->hc_ocontext);
 	SHA1Update(&ctx->hc_ocontext, (uint8_t *)opad, SHA1_HMAC_BLOCK_SIZE);
 }
 
 /*
  */
 static int
 sha1_mac_init(crypto_ctx_t *ctx, crypto_mechanism_t *mechanism,
     crypto_key_t *key, crypto_spi_ctx_template_t ctx_template,
     crypto_req_handle_t req)
 {
 	int ret = CRYPTO_SUCCESS;
 	uint_t keylen_in_bytes = CRYPTO_BITS2BYTES(key->ck_length);
 
 	if (mechanism->cm_type != SHA1_HMAC_MECH_INFO_TYPE &&
 	    mechanism->cm_type != SHA1_HMAC_GEN_MECH_INFO_TYPE)
 		return (CRYPTO_MECHANISM_INVALID);
 
 	/* Add support for key by attributes (RFE 4706552) */
 	if (key->ck_format != CRYPTO_KEY_RAW)
 		return (CRYPTO_ARGUMENTS_BAD);
 
 	ctx->cc_provider_private = kmem_alloc(sizeof (sha1_hmac_ctx_t),
 	    crypto_kmflag(req));
 	if (ctx->cc_provider_private == NULL)
 		return (CRYPTO_HOST_MEMORY);
 
 	if (ctx_template != NULL) {
 		/* reuse context template */
 		bcopy(ctx_template, PROV_SHA1_HMAC_CTX(ctx),
 		    sizeof (sha1_hmac_ctx_t));
 	} else {
 		/* no context template, compute context */
 		if (keylen_in_bytes > SHA1_HMAC_BLOCK_SIZE) {
 			uchar_t digested_key[SHA1_DIGEST_LENGTH];
 			sha1_hmac_ctx_t *hmac_ctx = ctx->cc_provider_private;
 
 			/*
 			 * Hash the passed-in key to get a smaller key.
 			 * The inner context is used since it hasn't been
 			 * initialized yet.
 			 */
 			PROV_SHA1_DIGEST_KEY(&hmac_ctx->hc_icontext,
 			    key->ck_data, keylen_in_bytes, digested_key);
 			sha1_mac_init_ctx(PROV_SHA1_HMAC_CTX(ctx),
 			    digested_key, SHA1_DIGEST_LENGTH);
 		} else {
 			sha1_mac_init_ctx(PROV_SHA1_HMAC_CTX(ctx),
 			    key->ck_data, keylen_in_bytes);
 		}
 	}
 
 	/*
 	 * Get the mechanism parameters, if applicable.
 	 */
 	PROV_SHA1_HMAC_CTX(ctx)->hc_mech_type = mechanism->cm_type;
 	if (mechanism->cm_type == SHA1_HMAC_GEN_MECH_INFO_TYPE) {
 		if (mechanism->cm_param == NULL ||
 		    mechanism->cm_param_len != sizeof (ulong_t))
 			ret = CRYPTO_MECHANISM_PARAM_INVALID;
 		PROV_SHA1_GET_DIGEST_LEN(mechanism,
 		    PROV_SHA1_HMAC_CTX(ctx)->hc_digest_len);
 		if (PROV_SHA1_HMAC_CTX(ctx)->hc_digest_len >
 		    SHA1_DIGEST_LENGTH)
 			ret = CRYPTO_MECHANISM_PARAM_INVALID;
 	}
 
 	if (ret != CRYPTO_SUCCESS) {
 		bzero(ctx->cc_provider_private, sizeof (sha1_hmac_ctx_t));
 		kmem_free(ctx->cc_provider_private, sizeof (sha1_hmac_ctx_t));
 		ctx->cc_provider_private = NULL;
 	}
 
 	return (ret);
 }
 
-/* ARGSUSED */
 static int
 sha1_mac_update(crypto_ctx_t *ctx, crypto_data_t *data, crypto_req_handle_t req)
 {
+	(void) req;
 	int ret = CRYPTO_SUCCESS;
 
 	ASSERT(ctx->cc_provider_private != NULL);
 
 	/*
 	 * Do a SHA1 update of the inner context using the specified
 	 * data.
 	 */
 	switch (data->cd_format) {
 	case CRYPTO_DATA_RAW:
 		SHA1Update(&PROV_SHA1_HMAC_CTX(ctx)->hc_icontext,
 		    (uint8_t *)data->cd_raw.iov_base + data->cd_offset,
 		    data->cd_length);
 		break;
 	case CRYPTO_DATA_UIO:
 		ret = sha1_digest_update_uio(
 		    &PROV_SHA1_HMAC_CTX(ctx)->hc_icontext, data);
 		break;
 	default:
 		ret = CRYPTO_ARGUMENTS_BAD;
 	}
 
 	return (ret);
 }
 
-/* ARGSUSED */
 static int
 sha1_mac_final(crypto_ctx_t *ctx, crypto_data_t *mac, crypto_req_handle_t req)
 {
+	(void) req;
 	int ret = CRYPTO_SUCCESS;
 	uchar_t digest[SHA1_DIGEST_LENGTH];
 	uint32_t digest_len = SHA1_DIGEST_LENGTH;
 
 	ASSERT(ctx->cc_provider_private != NULL);
 
 	if (PROV_SHA1_HMAC_CTX(ctx)->hc_mech_type ==
 	    SHA1_HMAC_GEN_MECH_INFO_TYPE)
 		digest_len = PROV_SHA1_HMAC_CTX(ctx)->hc_digest_len;
 
 	/*
 	 * We need to just return the length needed to store the output.
 	 * We should not destroy the context for the following cases.
 	 */
 	if ((mac->cd_length == 0) || (mac->cd_length < digest_len)) {
 		mac->cd_length = digest_len;
 		return (CRYPTO_BUFFER_TOO_SMALL);
 	}
 
 	/*
 	 * Do a SHA1 final on the inner context.
 	 */
 	SHA1Final(digest, &PROV_SHA1_HMAC_CTX(ctx)->hc_icontext);
 
 	/*
 	 * Do a SHA1 update on the outer context, feeding the inner
 	 * digest as data.
 	 */
 	SHA1Update(&PROV_SHA1_HMAC_CTX(ctx)->hc_ocontext, digest,
 	    SHA1_DIGEST_LENGTH);
 
 	/*
 	 * Do a SHA1 final on the outer context, storing the computing
 	 * digest in the users buffer.
 	 */
 	switch (mac->cd_format) {
 	case CRYPTO_DATA_RAW:
 		if (digest_len != SHA1_DIGEST_LENGTH) {
 			/*
 			 * The caller requested a short digest. Digest
 			 * into a scratch buffer and return to
 			 * the user only what was requested.
 			 */
 			SHA1Final(digest,
 			    &PROV_SHA1_HMAC_CTX(ctx)->hc_ocontext);
 			bcopy(digest, (unsigned char *)mac->cd_raw.iov_base +
 			    mac->cd_offset, digest_len);
 		} else {
 			SHA1Final((unsigned char *)mac->cd_raw.iov_base +
 			    mac->cd_offset,
 			    &PROV_SHA1_HMAC_CTX(ctx)->hc_ocontext);
 		}
 		break;
 	case CRYPTO_DATA_UIO:
 		ret = sha1_digest_final_uio(
 		    &PROV_SHA1_HMAC_CTX(ctx)->hc_ocontext, mac,
 		    digest_len, digest);
 		break;
 	default:
 		ret = CRYPTO_ARGUMENTS_BAD;
 	}
 
 	if (ret == CRYPTO_SUCCESS) {
 		mac->cd_length = digest_len;
 	} else {
 		mac->cd_length = 0;
 	}
 
 	bzero(ctx->cc_provider_private, sizeof (sha1_hmac_ctx_t));
 	kmem_free(ctx->cc_provider_private, sizeof (sha1_hmac_ctx_t));
 	ctx->cc_provider_private = NULL;
 
 	return (ret);
 }
 
 #define	SHA1_MAC_UPDATE(data, ctx, ret) {				\
 	switch (data->cd_format) {					\
 	case CRYPTO_DATA_RAW:						\
 		SHA1Update(&(ctx).hc_icontext,				\
 		    (uint8_t *)data->cd_raw.iov_base +			\
 		    data->cd_offset, data->cd_length);			\
 		break;							\
 	case CRYPTO_DATA_UIO:						\
 		ret = sha1_digest_update_uio(&(ctx).hc_icontext, data); \
 		break;							\
 	default:							\
 		ret = CRYPTO_ARGUMENTS_BAD;				\
 	}								\
 }
 
-/* ARGSUSED */
 static int
 sha1_mac_atomic(crypto_provider_handle_t provider,
     crypto_session_id_t session_id, crypto_mechanism_t *mechanism,
     crypto_key_t *key, crypto_data_t *data, crypto_data_t *mac,
     crypto_spi_ctx_template_t ctx_template, crypto_req_handle_t req)
 {
+	(void) provider, (void) session_id, (void) req;
 	int ret = CRYPTO_SUCCESS;
 	uchar_t digest[SHA1_DIGEST_LENGTH];
 	sha1_hmac_ctx_t sha1_hmac_ctx;
 	uint32_t digest_len = SHA1_DIGEST_LENGTH;
 	uint_t keylen_in_bytes = CRYPTO_BITS2BYTES(key->ck_length);
 
 	if (mechanism->cm_type != SHA1_HMAC_MECH_INFO_TYPE &&
 	    mechanism->cm_type != SHA1_HMAC_GEN_MECH_INFO_TYPE)
 		return (CRYPTO_MECHANISM_INVALID);
 
 	/* Add support for key by attributes (RFE 4706552) */
 	if (key->ck_format != CRYPTO_KEY_RAW)
 		return (CRYPTO_ARGUMENTS_BAD);
 
 	if (ctx_template != NULL) {
 		/* reuse context template */
 		bcopy(ctx_template, &sha1_hmac_ctx, sizeof (sha1_hmac_ctx_t));
 	} else {
 		/* no context template, initialize context */
 		if (keylen_in_bytes > SHA1_HMAC_BLOCK_SIZE) {
 			/*
 			 * Hash the passed-in key to get a smaller key.
 			 * The inner context is used since it hasn't been
 			 * initialized yet.
 			 */
 			PROV_SHA1_DIGEST_KEY(&sha1_hmac_ctx.hc_icontext,
 			    key->ck_data, keylen_in_bytes, digest);
 			sha1_mac_init_ctx(&sha1_hmac_ctx, digest,
 			    SHA1_DIGEST_LENGTH);
 		} else {
 			sha1_mac_init_ctx(&sha1_hmac_ctx, key->ck_data,
 			    keylen_in_bytes);
 		}
 	}
 
 	/* get the mechanism parameters, if applicable */
 	if (mechanism->cm_type == SHA1_HMAC_GEN_MECH_INFO_TYPE) {
 		if (mechanism->cm_param == NULL ||
 		    mechanism->cm_param_len != sizeof (ulong_t)) {
 			ret = CRYPTO_MECHANISM_PARAM_INVALID;
 			goto bail;
 		}
 		PROV_SHA1_GET_DIGEST_LEN(mechanism, digest_len);
 		if (digest_len > SHA1_DIGEST_LENGTH) {
 			ret = CRYPTO_MECHANISM_PARAM_INVALID;
 			goto bail;
 		}
 	}
 
 	/* do a SHA1 update of the inner context using the specified data */
 	SHA1_MAC_UPDATE(data, sha1_hmac_ctx, ret);
 	if (ret != CRYPTO_SUCCESS)
 		/* the update failed, free context and bail */
 		goto bail;
 
 	/*
 	 * Do a SHA1 final on the inner context.
 	 */
 	SHA1Final(digest, &sha1_hmac_ctx.hc_icontext);
 
 	/*
 	 * Do an SHA1 update on the outer context, feeding the inner
 	 * digest as data.
 	 */
 	SHA1Update(&sha1_hmac_ctx.hc_ocontext, digest, SHA1_DIGEST_LENGTH);
 
 	/*
 	 * Do a SHA1 final on the outer context, storing the computed
 	 * digest in the users buffer.
 	 */
 	switch (mac->cd_format) {
 	case CRYPTO_DATA_RAW:
 		if (digest_len != SHA1_DIGEST_LENGTH) {
 			/*
 			 * The caller requested a short digest. Digest
 			 * into a scratch buffer and return to
 			 * the user only what was requested.
 			 */
 			SHA1Final(digest, &sha1_hmac_ctx.hc_ocontext);
 			bcopy(digest, (unsigned char *)mac->cd_raw.iov_base +
 			    mac->cd_offset, digest_len);
 		} else {
 			SHA1Final((unsigned char *)mac->cd_raw.iov_base +
 			    mac->cd_offset, &sha1_hmac_ctx.hc_ocontext);
 		}
 		break;
 	case CRYPTO_DATA_UIO:
 		ret = sha1_digest_final_uio(&sha1_hmac_ctx.hc_ocontext, mac,
 		    digest_len, digest);
 		break;
 	default:
 		ret = CRYPTO_ARGUMENTS_BAD;
 	}
 
 	if (ret == CRYPTO_SUCCESS) {
 		mac->cd_length = digest_len;
 	} else {
 		mac->cd_length = 0;
 	}
 	/* Extra paranoia: zeroize the context on the stack */
 	bzero(&sha1_hmac_ctx, sizeof (sha1_hmac_ctx_t));
 
 	return (ret);
 bail:
 	bzero(&sha1_hmac_ctx, sizeof (sha1_hmac_ctx_t));
 	mac->cd_length = 0;
 	return (ret);
 }
 
-/* ARGSUSED */
 static int
 sha1_mac_verify_atomic(crypto_provider_handle_t provider,
     crypto_session_id_t session_id, crypto_mechanism_t *mechanism,
     crypto_key_t *key, crypto_data_t *data, crypto_data_t *mac,
     crypto_spi_ctx_template_t ctx_template, crypto_req_handle_t req)
 {
+	(void) provider, (void) session_id, (void) req;
 	int ret = CRYPTO_SUCCESS;
 	uchar_t digest[SHA1_DIGEST_LENGTH];
 	sha1_hmac_ctx_t sha1_hmac_ctx;
 	uint32_t digest_len = SHA1_DIGEST_LENGTH;
 	uint_t keylen_in_bytes = CRYPTO_BITS2BYTES(key->ck_length);
 
 	if (mechanism->cm_type != SHA1_HMAC_MECH_INFO_TYPE &&
 	    mechanism->cm_type != SHA1_HMAC_GEN_MECH_INFO_TYPE)
 		return (CRYPTO_MECHANISM_INVALID);
 
 	/* Add support for key by attributes (RFE 4706552) */
 	if (key->ck_format != CRYPTO_KEY_RAW)
 		return (CRYPTO_ARGUMENTS_BAD);
 
 	if (ctx_template != NULL) {
 		/* reuse context template */
 		bcopy(ctx_template, &sha1_hmac_ctx, sizeof (sha1_hmac_ctx_t));
 	} else {
 		/* no context template, initialize context */
 		if (keylen_in_bytes > SHA1_HMAC_BLOCK_SIZE) {
 			/*
 			 * Hash the passed-in key to get a smaller key.
 			 * The inner context is used since it hasn't been
 			 * initialized yet.
 			 */
 			PROV_SHA1_DIGEST_KEY(&sha1_hmac_ctx.hc_icontext,
 			    key->ck_data, keylen_in_bytes, digest);
 			sha1_mac_init_ctx(&sha1_hmac_ctx, digest,
 			    SHA1_DIGEST_LENGTH);
 		} else {
 			sha1_mac_init_ctx(&sha1_hmac_ctx, key->ck_data,
 			    keylen_in_bytes);
 		}
 	}
 
 	/* get the mechanism parameters, if applicable */
 	if (mechanism->cm_type == SHA1_HMAC_GEN_MECH_INFO_TYPE) {
 		if (mechanism->cm_param == NULL ||
 		    mechanism->cm_param_len != sizeof (ulong_t)) {
 			ret = CRYPTO_MECHANISM_PARAM_INVALID;
 			goto bail;
 		}
 		PROV_SHA1_GET_DIGEST_LEN(mechanism, digest_len);
 		if (digest_len > SHA1_DIGEST_LENGTH) {
 			ret = CRYPTO_MECHANISM_PARAM_INVALID;
 			goto bail;
 		}
 	}
 
 	if (mac->cd_length != digest_len) {
 		ret = CRYPTO_INVALID_MAC;
 		goto bail;
 	}
 
 	/* do a SHA1 update of the inner context using the specified data */
 	SHA1_MAC_UPDATE(data, sha1_hmac_ctx, ret);
 	if (ret != CRYPTO_SUCCESS)
 		/* the update failed, free context and bail */
 		goto bail;
 
 	/* do a SHA1 final on the inner context */
 	SHA1Final(digest, &sha1_hmac_ctx.hc_icontext);
 
 	/*
 	 * Do an SHA1 update on the outer context, feeding the inner
 	 * digest as data.
 	 */
 	SHA1Update(&sha1_hmac_ctx.hc_ocontext, digest, SHA1_DIGEST_LENGTH);
 
 	/*
 	 * Do a SHA1 final on the outer context, storing the computed
 	 * digest in the users buffer.
 	 */
 	SHA1Final(digest, &sha1_hmac_ctx.hc_ocontext);
 
 	/*
 	 * Compare the computed digest against the expected digest passed
 	 * as argument.
 	 */
 
 	switch (mac->cd_format) {
 
 	case CRYPTO_DATA_RAW:
 		if (bcmp(digest, (unsigned char *)mac->cd_raw.iov_base +
 		    mac->cd_offset, digest_len) != 0)
 			ret = CRYPTO_INVALID_MAC;
 		break;
 
 	case CRYPTO_DATA_UIO: {
 		off_t offset = mac->cd_offset;
 		uint_t vec_idx = 0;
 		off_t scratch_offset = 0;
 		size_t length = digest_len;
 		size_t cur_len;
 
 		/* we support only kernel buffer */
 		if (zfs_uio_segflg(mac->cd_uio) != UIO_SYSSPACE)
 			return (CRYPTO_ARGUMENTS_BAD);
 
 		/* jump to the first iovec containing the expected digest */
 		offset = zfs_uio_index_at_offset(mac->cd_uio, offset, &vec_idx);
 		if (vec_idx == zfs_uio_iovcnt(mac->cd_uio)) {
 			/*
 			 * The caller specified an offset that is
 			 * larger than the total size of the buffers
 			 * it provided.
 			 */
 			ret = CRYPTO_DATA_LEN_RANGE;
 			break;
 		}
 
 		/* do the comparison of computed digest vs specified one */
 		while (vec_idx < zfs_uio_iovcnt(mac->cd_uio) && length > 0) {
 			cur_len = MIN(zfs_uio_iovlen(mac->cd_uio, vec_idx) -
 			    offset, length);
 
 			if (bcmp(digest + scratch_offset,
 			    zfs_uio_iovbase(mac->cd_uio, vec_idx) + offset,
 			    cur_len) != 0) {
 				ret = CRYPTO_INVALID_MAC;
 				break;
 			}
 
 			length -= cur_len;
 			vec_idx++;
 			scratch_offset += cur_len;
 			offset = 0;
 		}
 		break;
 	}
 
 	default:
 		ret = CRYPTO_ARGUMENTS_BAD;
 	}
 
 	bzero(&sha1_hmac_ctx, sizeof (sha1_hmac_ctx_t));
 	return (ret);
 bail:
 	bzero(&sha1_hmac_ctx, sizeof (sha1_hmac_ctx_t));
 	mac->cd_length = 0;
 	return (ret);
 }
 
 /*
  * KCF software provider context management entry points.
  */
 
-/* ARGSUSED */
 static int
 sha1_create_ctx_template(crypto_provider_handle_t provider,
     crypto_mechanism_t *mechanism, crypto_key_t *key,
     crypto_spi_ctx_template_t *ctx_template, size_t *ctx_template_size,
     crypto_req_handle_t req)
 {
+	(void) provider;
 	sha1_hmac_ctx_t *sha1_hmac_ctx_tmpl;
 	uint_t keylen_in_bytes = CRYPTO_BITS2BYTES(key->ck_length);
 
 	if ((mechanism->cm_type != SHA1_HMAC_MECH_INFO_TYPE) &&
 	    (mechanism->cm_type != SHA1_HMAC_GEN_MECH_INFO_TYPE)) {
 		return (CRYPTO_MECHANISM_INVALID);
 	}
 
 	/* Add support for key by attributes (RFE 4706552) */
 	if (key->ck_format != CRYPTO_KEY_RAW)
 		return (CRYPTO_ARGUMENTS_BAD);
 
 	/*
 	 * Allocate and initialize SHA1 context.
 	 */
 	sha1_hmac_ctx_tmpl = kmem_alloc(sizeof (sha1_hmac_ctx_t),
 	    crypto_kmflag(req));
 	if (sha1_hmac_ctx_tmpl == NULL)
 		return (CRYPTO_HOST_MEMORY);
 
 	if (keylen_in_bytes > SHA1_HMAC_BLOCK_SIZE) {
 		uchar_t digested_key[SHA1_DIGEST_LENGTH];
 
 		/*
 		 * Hash the passed-in key to get a smaller key.
 		 * The inner context is used since it hasn't been
 		 * initialized yet.
 		 */
 		PROV_SHA1_DIGEST_KEY(&sha1_hmac_ctx_tmpl->hc_icontext,
 		    key->ck_data, keylen_in_bytes, digested_key);
 		sha1_mac_init_ctx(sha1_hmac_ctx_tmpl, digested_key,
 		    SHA1_DIGEST_LENGTH);
 	} else {
 		sha1_mac_init_ctx(sha1_hmac_ctx_tmpl, key->ck_data,
 		    keylen_in_bytes);
 	}
 
 	sha1_hmac_ctx_tmpl->hc_mech_type = mechanism->cm_type;
 	*ctx_template = (crypto_spi_ctx_template_t)sha1_hmac_ctx_tmpl;
 	*ctx_template_size = sizeof (sha1_hmac_ctx_t);
 
 
 	return (CRYPTO_SUCCESS);
 }
 
 static int
 sha1_free_context(crypto_ctx_t *ctx)
 {
 	uint_t ctx_len;
 	sha1_mech_type_t mech_type;
 
 	if (ctx->cc_provider_private == NULL)
 		return (CRYPTO_SUCCESS);
 
 	/*
 	 * We have to free either SHA1 or SHA1-HMAC contexts, which
 	 * have different lengths.
 	 */
 
 	mech_type = PROV_SHA1_CTX(ctx)->sc_mech_type;
 	if (mech_type == SHA1_MECH_INFO_TYPE)
 		ctx_len = sizeof (sha1_ctx_t);
 	else {
 		ASSERT(mech_type == SHA1_HMAC_MECH_INFO_TYPE ||
 		    mech_type == SHA1_HMAC_GEN_MECH_INFO_TYPE);
 		ctx_len = sizeof (sha1_hmac_ctx_t);
 	}
 
 	bzero(ctx->cc_provider_private, ctx_len);
 	kmem_free(ctx->cc_provider_private, ctx_len);
 	ctx->cc_provider_private = NULL;
 
 	return (CRYPTO_SUCCESS);
 }
diff --git a/module/icp/io/sha2_mod.c b/module/icp/io/sha2_mod.c
index d690cd0bcb05..77957ee114d9 100644
--- a/module/icp/io/sha2_mod.c
+++ b/module/icp/io/sha2_mod.c
@@ -1,1399 +1,1399 @@
 /*
  * CDDL HEADER START
  *
  * The contents of this file are subject to the terms of the
  * Common Development and Distribution License (the "License").
  * You may not use this file except in compliance with the License.
  *
  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
  * or http://www.opensolaris.org/os/licensing.
  * See the License for the specific language governing permissions
  * and limitations under the License.
  *
  * When distributing Covered Code, include this CDDL HEADER in each
  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  * If applicable, add the following below this CDDL HEADER, with the
  * fields enclosed by brackets "[]" replaced with your own identifying
  * information: Portions Copyright [yyyy] [name of copyright owner]
  *
  * CDDL HEADER END
  */
 
 /*
  * Copyright 2010 Sun Microsystems, Inc.  All rights reserved.
  * Use is subject to license terms.
  */
 
 #include <sys/zfs_context.h>
 #include <sys/modctl.h>
 #include <sys/crypto/common.h>
 #include <sys/crypto/spi.h>
 #include <sys/crypto/icp.h>
 #define	_SHA2_IMPL
 #include <sys/sha2.h>
 #include <sha2/sha2_impl.h>
 
 /*
  * The sha2 module is created with two modlinkages:
  * - a modlmisc that allows consumers to directly call the entry points
  *   SHA2Init, SHA2Update, and SHA2Final.
  * - a modlcrypto that allows the module to register with the Kernel
  *   Cryptographic Framework (KCF) as a software provider for the SHA2
  *   mechanisms.
  */
 
 static struct modlcrypto modlcrypto = {
 	&mod_cryptoops,
 	"SHA2 Kernel SW Provider"
 };
 
 static struct modlinkage modlinkage = {
 	MODREV_1, {&modlcrypto, NULL}
 };
 
 /*
  * Macros to access the SHA2 or SHA2-HMAC contexts from a context passed
  * by KCF to one of the entry points.
  */
 
 #define	PROV_SHA2_CTX(ctx)	((sha2_ctx_t *)(ctx)->cc_provider_private)
 #define	PROV_SHA2_HMAC_CTX(ctx)	((sha2_hmac_ctx_t *)(ctx)->cc_provider_private)
 
 /* to extract the digest length passed as mechanism parameter */
 #define	PROV_SHA2_GET_DIGEST_LEN(m, len) {				\
 	if (IS_P2ALIGNED((m)->cm_param, sizeof (ulong_t)))		\
 		(len) = (uint32_t)*((ulong_t *)(m)->cm_param);	\
 	else {								\
 		ulong_t tmp_ulong;					\
 		bcopy((m)->cm_param, &tmp_ulong, sizeof (ulong_t));	\
 		(len) = (uint32_t)tmp_ulong;				\
 	}								\
 }
 
 #define	PROV_SHA2_DIGEST_KEY(mech, ctx, key, len, digest) {	\
 	SHA2Init(mech, ctx);				\
 	SHA2Update(ctx, key, len);			\
 	SHA2Final(digest, ctx);				\
 }
 
 /*
  * Mechanism info structure passed to KCF during registration.
  */
 static crypto_mech_info_t sha2_mech_info_tab[] = {
 	/* SHA256 */
 	{SUN_CKM_SHA256, SHA256_MECH_INFO_TYPE,
 	    CRYPTO_FG_DIGEST | CRYPTO_FG_DIGEST_ATOMIC,
 	    0, 0, CRYPTO_KEYSIZE_UNIT_IN_BITS},
 	/* SHA256-HMAC */
 	{SUN_CKM_SHA256_HMAC, SHA256_HMAC_MECH_INFO_TYPE,
 	    CRYPTO_FG_MAC | CRYPTO_FG_MAC_ATOMIC,
 	    SHA2_HMAC_MIN_KEY_LEN, SHA2_HMAC_MAX_KEY_LEN,
 	    CRYPTO_KEYSIZE_UNIT_IN_BYTES},
 	/* SHA256-HMAC GENERAL */
 	{SUN_CKM_SHA256_HMAC_GENERAL, SHA256_HMAC_GEN_MECH_INFO_TYPE,
 	    CRYPTO_FG_MAC | CRYPTO_FG_MAC_ATOMIC,
 	    SHA2_HMAC_MIN_KEY_LEN, SHA2_HMAC_MAX_KEY_LEN,
 	    CRYPTO_KEYSIZE_UNIT_IN_BYTES},
 	/* SHA384 */
 	{SUN_CKM_SHA384, SHA384_MECH_INFO_TYPE,
 	    CRYPTO_FG_DIGEST | CRYPTO_FG_DIGEST_ATOMIC,
 	    0, 0, CRYPTO_KEYSIZE_UNIT_IN_BITS},
 	/* SHA384-HMAC */
 	{SUN_CKM_SHA384_HMAC, SHA384_HMAC_MECH_INFO_TYPE,
 	    CRYPTO_FG_MAC | CRYPTO_FG_MAC_ATOMIC,
 	    SHA2_HMAC_MIN_KEY_LEN, SHA2_HMAC_MAX_KEY_LEN,
 	    CRYPTO_KEYSIZE_UNIT_IN_BYTES},
 	/* SHA384-HMAC GENERAL */
 	{SUN_CKM_SHA384_HMAC_GENERAL, SHA384_HMAC_GEN_MECH_INFO_TYPE,
 	    CRYPTO_FG_MAC | CRYPTO_FG_MAC_ATOMIC,
 	    SHA2_HMAC_MIN_KEY_LEN, SHA2_HMAC_MAX_KEY_LEN,
 	    CRYPTO_KEYSIZE_UNIT_IN_BYTES},
 	/* SHA512 */
 	{SUN_CKM_SHA512, SHA512_MECH_INFO_TYPE,
 	    CRYPTO_FG_DIGEST | CRYPTO_FG_DIGEST_ATOMIC,
 	    0, 0, CRYPTO_KEYSIZE_UNIT_IN_BITS},
 	/* SHA512-HMAC */
 	{SUN_CKM_SHA512_HMAC, SHA512_HMAC_MECH_INFO_TYPE,
 	    CRYPTO_FG_MAC | CRYPTO_FG_MAC_ATOMIC,
 	    SHA2_HMAC_MIN_KEY_LEN, SHA2_HMAC_MAX_KEY_LEN,
 	    CRYPTO_KEYSIZE_UNIT_IN_BYTES},
 	/* SHA512-HMAC GENERAL */
 	{SUN_CKM_SHA512_HMAC_GENERAL, SHA512_HMAC_GEN_MECH_INFO_TYPE,
 	    CRYPTO_FG_MAC | CRYPTO_FG_MAC_ATOMIC,
 	    SHA2_HMAC_MIN_KEY_LEN, SHA2_HMAC_MAX_KEY_LEN,
 	    CRYPTO_KEYSIZE_UNIT_IN_BYTES}
 };
 
 static void sha2_provider_status(crypto_provider_handle_t, uint_t *);
 
 static crypto_control_ops_t sha2_control_ops = {
 	sha2_provider_status
 };
 
 static int sha2_digest_init(crypto_ctx_t *, crypto_mechanism_t *,
     crypto_req_handle_t);
 static int sha2_digest(crypto_ctx_t *, crypto_data_t *, crypto_data_t *,
     crypto_req_handle_t);
 static int sha2_digest_update(crypto_ctx_t *, crypto_data_t *,
     crypto_req_handle_t);
 static int sha2_digest_final(crypto_ctx_t *, crypto_data_t *,
     crypto_req_handle_t);
 static int sha2_digest_atomic(crypto_provider_handle_t, crypto_session_id_t,
     crypto_mechanism_t *, crypto_data_t *, crypto_data_t *,
     crypto_req_handle_t);
 
 static crypto_digest_ops_t sha2_digest_ops = {
 	.digest_init = sha2_digest_init,
 	.digest = sha2_digest,
 	.digest_update = sha2_digest_update,
 	.digest_key = NULL,
 	.digest_final = sha2_digest_final,
 	.digest_atomic = sha2_digest_atomic
 };
 
 static int sha2_mac_init(crypto_ctx_t *, crypto_mechanism_t *, crypto_key_t *,
     crypto_spi_ctx_template_t, crypto_req_handle_t);
 static int sha2_mac_update(crypto_ctx_t *, crypto_data_t *,
     crypto_req_handle_t);
 static int sha2_mac_final(crypto_ctx_t *, crypto_data_t *, crypto_req_handle_t);
 static int sha2_mac_atomic(crypto_provider_handle_t, crypto_session_id_t,
     crypto_mechanism_t *, crypto_key_t *, crypto_data_t *, crypto_data_t *,
     crypto_spi_ctx_template_t, crypto_req_handle_t);
 static int sha2_mac_verify_atomic(crypto_provider_handle_t, crypto_session_id_t,
     crypto_mechanism_t *, crypto_key_t *, crypto_data_t *, crypto_data_t *,
     crypto_spi_ctx_template_t, crypto_req_handle_t);
 
 static crypto_mac_ops_t sha2_mac_ops = {
 	.mac_init = sha2_mac_init,
 	.mac = NULL,
 	.mac_update = sha2_mac_update,
 	.mac_final = sha2_mac_final,
 	.mac_atomic = sha2_mac_atomic,
 	.mac_verify_atomic = sha2_mac_verify_atomic
 };
 
 static int sha2_create_ctx_template(crypto_provider_handle_t,
     crypto_mechanism_t *, crypto_key_t *, crypto_spi_ctx_template_t *,
     size_t *, crypto_req_handle_t);
 static int sha2_free_context(crypto_ctx_t *);
 
 static crypto_ctx_ops_t sha2_ctx_ops = {
 	.create_ctx_template = sha2_create_ctx_template,
 	.free_context = sha2_free_context
 };
 
 static crypto_ops_t sha2_crypto_ops = {{{{{
 	&sha2_control_ops,
 	&sha2_digest_ops,
 	NULL,
 	&sha2_mac_ops,
 	NULL,
 	NULL,
 	NULL,
 	NULL,
 	NULL,
 	NULL,
 	NULL,
 	NULL,
 	NULL,
 	&sha2_ctx_ops
 }}}}};
 
 static crypto_provider_info_t sha2_prov_info = {{{{
 	CRYPTO_SPI_VERSION_1,
 	"SHA2 Software Provider",
 	CRYPTO_SW_PROVIDER,
 	NULL,
 	&sha2_crypto_ops,
 	sizeof (sha2_mech_info_tab)/sizeof (crypto_mech_info_t),
 	sha2_mech_info_tab
 }}}};
 
 static crypto_kcf_provider_handle_t sha2_prov_handle = 0;
 
 int
 sha2_mod_init(void)
 {
 	int ret;
 
 	if ((ret = mod_install(&modlinkage)) != 0)
 		return (ret);
 
 	/*
 	 * Register with KCF. If the registration fails, log an
 	 * error but do not uninstall the module, since the functionality
 	 * provided by misc/sha2 should still be available.
 	 */
 	if ((ret = crypto_register_provider(&sha2_prov_info,
 	    &sha2_prov_handle)) != CRYPTO_SUCCESS)
 		cmn_err(CE_WARN, "sha2 _init: "
 		    "crypto_register_provider() failed (0x%x)", ret);
 
 	return (0);
 }
 
 int
 sha2_mod_fini(void)
 {
 	int ret;
 
 	if (sha2_prov_handle != 0) {
 		if ((ret = crypto_unregister_provider(sha2_prov_handle)) !=
 		    CRYPTO_SUCCESS) {
 			cmn_err(CE_WARN,
 			    "sha2 _fini: crypto_unregister_provider() "
 			    "failed (0x%x)", ret);
 			return (EBUSY);
 		}
 		sha2_prov_handle = 0;
 	}
 
 	return (mod_remove(&modlinkage));
 }
 
 /*
  * KCF software provider control entry points.
  */
-/* ARGSUSED */
 static void
 sha2_provider_status(crypto_provider_handle_t provider, uint_t *status)
 {
+	(void) provider;
 	*status = CRYPTO_PROVIDER_READY;
 }
 
 /*
  * KCF software provider digest entry points.
  */
 
 static int
 sha2_digest_init(crypto_ctx_t *ctx, crypto_mechanism_t *mechanism,
     crypto_req_handle_t req)
 {
 
 	/*
 	 * Allocate and initialize SHA2 context.
 	 */
 	ctx->cc_provider_private = kmem_alloc(sizeof (sha2_ctx_t),
 	    crypto_kmflag(req));
 	if (ctx->cc_provider_private == NULL)
 		return (CRYPTO_HOST_MEMORY);
 
 	PROV_SHA2_CTX(ctx)->sc_mech_type = mechanism->cm_type;
 	SHA2Init(mechanism->cm_type, &PROV_SHA2_CTX(ctx)->sc_sha2_ctx);
 
 	return (CRYPTO_SUCCESS);
 }
 
 /*
  * Helper SHA2 digest update function for uio data.
  */
 static int
 sha2_digest_update_uio(SHA2_CTX *sha2_ctx, crypto_data_t *data)
 {
 	off_t offset = data->cd_offset;
 	size_t length = data->cd_length;
 	uint_t vec_idx = 0;
 	size_t cur_len;
 
 	/* we support only kernel buffer */
 	if (zfs_uio_segflg(data->cd_uio) != UIO_SYSSPACE)
 		return (CRYPTO_ARGUMENTS_BAD);
 
 	/*
 	 * Jump to the first iovec containing data to be
 	 * digested.
 	 */
 	offset = zfs_uio_index_at_offset(data->cd_uio, offset, &vec_idx);
 	if (vec_idx == zfs_uio_iovcnt(data->cd_uio)) {
 		/*
 		 * The caller specified an offset that is larger than the
 		 * total size of the buffers it provided.
 		 */
 		return (CRYPTO_DATA_LEN_RANGE);
 	}
 
 	/*
 	 * Now do the digesting on the iovecs.
 	 */
 	while (vec_idx < zfs_uio_iovcnt(data->cd_uio) && length > 0) {
 		cur_len = MIN(zfs_uio_iovlen(data->cd_uio, vec_idx) -
 		    offset, length);
 
 		SHA2Update(sha2_ctx, (uint8_t *)zfs_uio_iovbase(data->cd_uio,
 		    vec_idx) + offset, cur_len);
 		length -= cur_len;
 		vec_idx++;
 		offset = 0;
 	}
 
 	if (vec_idx == zfs_uio_iovcnt(data->cd_uio) && length > 0) {
 		/*
 		 * The end of the specified iovec's was reached but
 		 * the length requested could not be processed, i.e.
 		 * The caller requested to digest more data than it provided.
 		 */
 		return (CRYPTO_DATA_LEN_RANGE);
 	}
 
 	return (CRYPTO_SUCCESS);
 }
 
 /*
  * Helper SHA2 digest final function for uio data.
  * digest_len is the length of the desired digest. If digest_len
  * is smaller than the default SHA2 digest length, the caller
  * must pass a scratch buffer, digest_scratch, which must
  * be at least the algorithm's digest length bytes.
  */
 static int
 sha2_digest_final_uio(SHA2_CTX *sha2_ctx, crypto_data_t *digest,
     ulong_t digest_len, uchar_t *digest_scratch)
 {
 	off_t offset = digest->cd_offset;
 	uint_t vec_idx = 0;
 
 	/* we support only kernel buffer */
 	if (zfs_uio_segflg(digest->cd_uio) != UIO_SYSSPACE)
 		return (CRYPTO_ARGUMENTS_BAD);
 
 	/*
 	 * Jump to the first iovec containing ptr to the digest to
 	 * be returned.
 	 */
 	offset = zfs_uio_index_at_offset(digest->cd_uio, offset, &vec_idx);
 	if (vec_idx == zfs_uio_iovcnt(digest->cd_uio)) {
 		/*
 		 * The caller specified an offset that is
 		 * larger than the total size of the buffers
 		 * it provided.
 		 */
 		return (CRYPTO_DATA_LEN_RANGE);
 	}
 
 	if (offset + digest_len <=
 	    zfs_uio_iovlen(digest->cd_uio, vec_idx)) {
 		/*
 		 * The computed SHA2 digest will fit in the current
 		 * iovec.
 		 */
 		if (((sha2_ctx->algotype <= SHA256_HMAC_GEN_MECH_INFO_TYPE) &&
 		    (digest_len != SHA256_DIGEST_LENGTH)) ||
 		    ((sha2_ctx->algotype > SHA256_HMAC_GEN_MECH_INFO_TYPE) &&
 		    (digest_len != SHA512_DIGEST_LENGTH))) {
 			/*
 			 * The caller requested a short digest. Digest
 			 * into a scratch buffer and return to
 			 * the user only what was requested.
 			 */
 			SHA2Final(digest_scratch, sha2_ctx);
 
 			bcopy(digest_scratch, (uchar_t *)
 			    zfs_uio_iovbase(digest->cd_uio, vec_idx) + offset,
 			    digest_len);
 		} else {
 			SHA2Final((uchar_t *)zfs_uio_iovbase(digest->
 			    cd_uio, vec_idx) + offset,
 			    sha2_ctx);
 
 		}
 	} else {
 		/*
 		 * The computed digest will be crossing one or more iovec's.
 		 * This is bad performance-wise but we need to support it.
 		 * Allocate a small scratch buffer on the stack and
 		 * copy it piece meal to the specified digest iovec's.
 		 */
 		uchar_t digest_tmp[SHA512_DIGEST_LENGTH];
 		off_t scratch_offset = 0;
 		size_t length = digest_len;
 		size_t cur_len;
 
 		SHA2Final(digest_tmp, sha2_ctx);
 
 		while (vec_idx < zfs_uio_iovcnt(digest->cd_uio) && length > 0) {
 			cur_len =
 			    MIN(zfs_uio_iovlen(digest->cd_uio, vec_idx) -
 			    offset, length);
 			bcopy(digest_tmp + scratch_offset,
 			    zfs_uio_iovbase(digest->cd_uio, vec_idx) + offset,
 			    cur_len);
 
 			length -= cur_len;
 			vec_idx++;
 			scratch_offset += cur_len;
 			offset = 0;
 		}
 
 		if (vec_idx == zfs_uio_iovcnt(digest->cd_uio) && length > 0) {
 			/*
 			 * The end of the specified iovec's was reached but
 			 * the length requested could not be processed, i.e.
 			 * The caller requested to digest more data than it
 			 * provided.
 			 */
 			return (CRYPTO_DATA_LEN_RANGE);
 		}
 	}
 
 	return (CRYPTO_SUCCESS);
 }
 
-/* ARGSUSED */
 static int
 sha2_digest(crypto_ctx_t *ctx, crypto_data_t *data, crypto_data_t *digest,
     crypto_req_handle_t req)
 {
+	(void) req;
 	int ret = CRYPTO_SUCCESS;
 	uint_t sha_digest_len;
 
 	ASSERT(ctx->cc_provider_private != NULL);
 
 	switch (PROV_SHA2_CTX(ctx)->sc_mech_type) {
 	case SHA256_MECH_INFO_TYPE:
 		sha_digest_len = SHA256_DIGEST_LENGTH;
 		break;
 	case SHA384_MECH_INFO_TYPE:
 		sha_digest_len = SHA384_DIGEST_LENGTH;
 		break;
 	case SHA512_MECH_INFO_TYPE:
 		sha_digest_len = SHA512_DIGEST_LENGTH;
 		break;
 	default:
 		return (CRYPTO_MECHANISM_INVALID);
 	}
 
 	/*
 	 * We need to just return the length needed to store the output.
 	 * We should not destroy the context for the following cases.
 	 */
 	if ((digest->cd_length == 0) ||
 	    (digest->cd_length < sha_digest_len)) {
 		digest->cd_length = sha_digest_len;
 		return (CRYPTO_BUFFER_TOO_SMALL);
 	}
 
 	/*
 	 * Do the SHA2 update on the specified input data.
 	 */
 	switch (data->cd_format) {
 	case CRYPTO_DATA_RAW:
 		SHA2Update(&PROV_SHA2_CTX(ctx)->sc_sha2_ctx,
 		    (uint8_t *)data->cd_raw.iov_base + data->cd_offset,
 		    data->cd_length);
 		break;
 	case CRYPTO_DATA_UIO:
 		ret = sha2_digest_update_uio(&PROV_SHA2_CTX(ctx)->sc_sha2_ctx,
 		    data);
 		break;
 	default:
 		ret = CRYPTO_ARGUMENTS_BAD;
 	}
 
 	if (ret != CRYPTO_SUCCESS) {
 		/* the update failed, free context and bail */
 		kmem_free(ctx->cc_provider_private, sizeof (sha2_ctx_t));
 		ctx->cc_provider_private = NULL;
 		digest->cd_length = 0;
 		return (ret);
 	}
 
 	/*
 	 * Do a SHA2 final, must be done separately since the digest
 	 * type can be different than the input data type.
 	 */
 	switch (digest->cd_format) {
 	case CRYPTO_DATA_RAW:
 		SHA2Final((unsigned char *)digest->cd_raw.iov_base +
 		    digest->cd_offset, &PROV_SHA2_CTX(ctx)->sc_sha2_ctx);
 		break;
 	case CRYPTO_DATA_UIO:
 		ret = sha2_digest_final_uio(&PROV_SHA2_CTX(ctx)->sc_sha2_ctx,
 		    digest, sha_digest_len, NULL);
 		break;
 	default:
 		ret = CRYPTO_ARGUMENTS_BAD;
 	}
 
 	/* all done, free context and return */
 
 	if (ret == CRYPTO_SUCCESS)
 		digest->cd_length = sha_digest_len;
 	else
 		digest->cd_length = 0;
 
 	kmem_free(ctx->cc_provider_private, sizeof (sha2_ctx_t));
 	ctx->cc_provider_private = NULL;
 	return (ret);
 }
 
-/* ARGSUSED */
 static int
 sha2_digest_update(crypto_ctx_t *ctx, crypto_data_t *data,
     crypto_req_handle_t req)
 {
+	(void) req;
 	int ret = CRYPTO_SUCCESS;
 
 	ASSERT(ctx->cc_provider_private != NULL);
 
 	/*
 	 * Do the SHA2 update on the specified input data.
 	 */
 	switch (data->cd_format) {
 	case CRYPTO_DATA_RAW:
 		SHA2Update(&PROV_SHA2_CTX(ctx)->sc_sha2_ctx,
 		    (uint8_t *)data->cd_raw.iov_base + data->cd_offset,
 		    data->cd_length);
 		break;
 	case CRYPTO_DATA_UIO:
 		ret = sha2_digest_update_uio(&PROV_SHA2_CTX(ctx)->sc_sha2_ctx,
 		    data);
 		break;
 	default:
 		ret = CRYPTO_ARGUMENTS_BAD;
 	}
 
 	return (ret);
 }
 
-/* ARGSUSED */
 static int
 sha2_digest_final(crypto_ctx_t *ctx, crypto_data_t *digest,
     crypto_req_handle_t req)
 {
+	(void) req;
 	int ret = CRYPTO_SUCCESS;
 	uint_t sha_digest_len;
 
 	ASSERT(ctx->cc_provider_private != NULL);
 
 	switch (PROV_SHA2_CTX(ctx)->sc_mech_type) {
 	case SHA256_MECH_INFO_TYPE:
 		sha_digest_len = SHA256_DIGEST_LENGTH;
 		break;
 	case SHA384_MECH_INFO_TYPE:
 		sha_digest_len = SHA384_DIGEST_LENGTH;
 		break;
 	case SHA512_MECH_INFO_TYPE:
 		sha_digest_len = SHA512_DIGEST_LENGTH;
 		break;
 	default:
 		return (CRYPTO_MECHANISM_INVALID);
 	}
 
 	/*
 	 * We need to just return the length needed to store the output.
 	 * We should not destroy the context for the following cases.
 	 */
 	if ((digest->cd_length == 0) ||
 	    (digest->cd_length < sha_digest_len)) {
 		digest->cd_length = sha_digest_len;
 		return (CRYPTO_BUFFER_TOO_SMALL);
 	}
 
 	/*
 	 * Do a SHA2 final.
 	 */
 	switch (digest->cd_format) {
 	case CRYPTO_DATA_RAW:
 		SHA2Final((unsigned char *)digest->cd_raw.iov_base +
 		    digest->cd_offset, &PROV_SHA2_CTX(ctx)->sc_sha2_ctx);
 		break;
 	case CRYPTO_DATA_UIO:
 		ret = sha2_digest_final_uio(&PROV_SHA2_CTX(ctx)->sc_sha2_ctx,
 		    digest, sha_digest_len, NULL);
 		break;
 	default:
 		ret = CRYPTO_ARGUMENTS_BAD;
 	}
 
 	/* all done, free context and return */
 
 	if (ret == CRYPTO_SUCCESS)
 		digest->cd_length = sha_digest_len;
 	else
 		digest->cd_length = 0;
 
 	kmem_free(ctx->cc_provider_private, sizeof (sha2_ctx_t));
 	ctx->cc_provider_private = NULL;
 
 	return (ret);
 }
 
-/* ARGSUSED */
 static int
 sha2_digest_atomic(crypto_provider_handle_t provider,
     crypto_session_id_t session_id, crypto_mechanism_t *mechanism,
     crypto_data_t *data, crypto_data_t *digest,
     crypto_req_handle_t req)
 {
+	(void) provider, (void) session_id, (void) req;
 	int ret = CRYPTO_SUCCESS;
 	SHA2_CTX sha2_ctx;
 	uint32_t sha_digest_len;
 
 	/*
 	 * Do the SHA inits.
 	 */
 
 	SHA2Init(mechanism->cm_type, &sha2_ctx);
 
 	switch (data->cd_format) {
 	case CRYPTO_DATA_RAW:
 		SHA2Update(&sha2_ctx, (uint8_t *)data->
 		    cd_raw.iov_base + data->cd_offset, data->cd_length);
 		break;
 	case CRYPTO_DATA_UIO:
 		ret = sha2_digest_update_uio(&sha2_ctx, data);
 		break;
 	default:
 		ret = CRYPTO_ARGUMENTS_BAD;
 	}
 
 	/*
 	 * Do the SHA updates on the specified input data.
 	 */
 
 	if (ret != CRYPTO_SUCCESS) {
 		/* the update failed, bail */
 		digest->cd_length = 0;
 		return (ret);
 	}
 
 	if (mechanism->cm_type <= SHA256_HMAC_GEN_MECH_INFO_TYPE)
 		sha_digest_len = SHA256_DIGEST_LENGTH;
 	else
 		sha_digest_len = SHA512_DIGEST_LENGTH;
 
 	/*
 	 * Do a SHA2 final, must be done separately since the digest
 	 * type can be different than the input data type.
 	 */
 	switch (digest->cd_format) {
 	case CRYPTO_DATA_RAW:
 		SHA2Final((unsigned char *)digest->cd_raw.iov_base +
 		    digest->cd_offset, &sha2_ctx);
 		break;
 	case CRYPTO_DATA_UIO:
 		ret = sha2_digest_final_uio(&sha2_ctx, digest,
 		    sha_digest_len, NULL);
 		break;
 	default:
 		ret = CRYPTO_ARGUMENTS_BAD;
 	}
 
 	if (ret == CRYPTO_SUCCESS)
 		digest->cd_length = sha_digest_len;
 	else
 		digest->cd_length = 0;
 
 	return (ret);
 }
 
 /*
  * KCF software provider mac entry points.
  *
  * SHA2 HMAC is: SHA2(key XOR opad, SHA2(key XOR ipad, text))
  *
  * Init:
  * The initialization routine initializes what we denote
  * as the inner and outer contexts by doing
  * - for inner context: SHA2(key XOR ipad)
  * - for outer context: SHA2(key XOR opad)
  *
  * Update:
  * Each subsequent SHA2 HMAC update will result in an
  * update of the inner context with the specified data.
  *
  * Final:
  * The SHA2 HMAC final will do a SHA2 final operation on the
  * inner context, and the resulting digest will be used
  * as the data for an update on the outer context. Last
  * but not least, a SHA2 final on the outer context will
  * be performed to obtain the SHA2 HMAC digest to return
  * to the user.
  */
 
 /*
  * Initialize a SHA2-HMAC context.
  */
 static void
 sha2_mac_init_ctx(sha2_hmac_ctx_t *ctx, void *keyval, uint_t length_in_bytes)
 {
 	uint64_t ipad[SHA512_HMAC_BLOCK_SIZE / sizeof (uint64_t)];
 	uint64_t opad[SHA512_HMAC_BLOCK_SIZE / sizeof (uint64_t)];
 	int i, block_size, blocks_per_int64;
 
 	/* Determine the block size */
 	if (ctx->hc_mech_type <= SHA256_HMAC_GEN_MECH_INFO_TYPE) {
 		block_size = SHA256_HMAC_BLOCK_SIZE;
 		blocks_per_int64 = SHA256_HMAC_BLOCK_SIZE / sizeof (uint64_t);
 	} else {
 		block_size = SHA512_HMAC_BLOCK_SIZE;
 		blocks_per_int64 = SHA512_HMAC_BLOCK_SIZE / sizeof (uint64_t);
 	}
 
 	(void) bzero(ipad, block_size);
 	(void) bzero(opad, block_size);
 	(void) bcopy(keyval, ipad, length_in_bytes);
 	(void) bcopy(keyval, opad, length_in_bytes);
 
 	/* XOR key with ipad (0x36) and opad (0x5c) */
 	for (i = 0; i < blocks_per_int64; i ++) {
 		ipad[i] ^= 0x3636363636363636;
 		opad[i] ^= 0x5c5c5c5c5c5c5c5c;
 	}
 
 	/* perform SHA2 on ipad */
 	SHA2Init(ctx->hc_mech_type, &ctx->hc_icontext);
 	SHA2Update(&ctx->hc_icontext, (uint8_t *)ipad, block_size);
 
 	/* perform SHA2 on opad */
 	SHA2Init(ctx->hc_mech_type, &ctx->hc_ocontext);
 	SHA2Update(&ctx->hc_ocontext, (uint8_t *)opad, block_size);
 
 }
 
 /*
  */
 static int
 sha2_mac_init(crypto_ctx_t *ctx, crypto_mechanism_t *mechanism,
     crypto_key_t *key, crypto_spi_ctx_template_t ctx_template,
     crypto_req_handle_t req)
 {
 	int ret = CRYPTO_SUCCESS;
 	uint_t keylen_in_bytes = CRYPTO_BITS2BYTES(key->ck_length);
 	uint_t sha_digest_len, sha_hmac_block_size;
 
 	/*
 	 * Set the digest length and block size to values appropriate to the
 	 * mechanism
 	 */
 	switch (mechanism->cm_type) {
 	case SHA256_HMAC_MECH_INFO_TYPE:
 	case SHA256_HMAC_GEN_MECH_INFO_TYPE:
 		sha_digest_len = SHA256_DIGEST_LENGTH;
 		sha_hmac_block_size = SHA256_HMAC_BLOCK_SIZE;
 		break;
 	case SHA384_HMAC_MECH_INFO_TYPE:
 	case SHA384_HMAC_GEN_MECH_INFO_TYPE:
 	case SHA512_HMAC_MECH_INFO_TYPE:
 	case SHA512_HMAC_GEN_MECH_INFO_TYPE:
 		sha_digest_len = SHA512_DIGEST_LENGTH;
 		sha_hmac_block_size = SHA512_HMAC_BLOCK_SIZE;
 		break;
 	default:
 		return (CRYPTO_MECHANISM_INVALID);
 	}
 
 	if (key->ck_format != CRYPTO_KEY_RAW)
 		return (CRYPTO_ARGUMENTS_BAD);
 
 	ctx->cc_provider_private = kmem_alloc(sizeof (sha2_hmac_ctx_t),
 	    crypto_kmflag(req));
 	if (ctx->cc_provider_private == NULL)
 		return (CRYPTO_HOST_MEMORY);
 
 	PROV_SHA2_HMAC_CTX(ctx)->hc_mech_type = mechanism->cm_type;
 	if (ctx_template != NULL) {
 		/* reuse context template */
 		bcopy(ctx_template, PROV_SHA2_HMAC_CTX(ctx),
 		    sizeof (sha2_hmac_ctx_t));
 	} else {
 		/* no context template, compute context */
 		if (keylen_in_bytes > sha_hmac_block_size) {
 			uchar_t digested_key[SHA512_DIGEST_LENGTH];
 			sha2_hmac_ctx_t *hmac_ctx = ctx->cc_provider_private;
 
 			/*
 			 * Hash the passed-in key to get a smaller key.
 			 * The inner context is used since it hasn't been
 			 * initialized yet.
 			 */
 			PROV_SHA2_DIGEST_KEY(mechanism->cm_type / 3,
 			    &hmac_ctx->hc_icontext,
 			    key->ck_data, keylen_in_bytes, digested_key);
 			sha2_mac_init_ctx(PROV_SHA2_HMAC_CTX(ctx),
 			    digested_key, sha_digest_len);
 		} else {
 			sha2_mac_init_ctx(PROV_SHA2_HMAC_CTX(ctx),
 			    key->ck_data, keylen_in_bytes);
 		}
 	}
 
 	/*
 	 * Get the mechanism parameters, if applicable.
 	 */
 	if (mechanism->cm_type % 3 == 2) {
 		if (mechanism->cm_param == NULL ||
 		    mechanism->cm_param_len != sizeof (ulong_t))
 			ret = CRYPTO_MECHANISM_PARAM_INVALID;
 		PROV_SHA2_GET_DIGEST_LEN(mechanism,
 		    PROV_SHA2_HMAC_CTX(ctx)->hc_digest_len);
 		if (PROV_SHA2_HMAC_CTX(ctx)->hc_digest_len > sha_digest_len)
 			ret = CRYPTO_MECHANISM_PARAM_INVALID;
 	}
 
 	if (ret != CRYPTO_SUCCESS) {
 		bzero(ctx->cc_provider_private, sizeof (sha2_hmac_ctx_t));
 		kmem_free(ctx->cc_provider_private, sizeof (sha2_hmac_ctx_t));
 		ctx->cc_provider_private = NULL;
 	}
 
 	return (ret);
 }
 
-/* ARGSUSED */
 static int
 sha2_mac_update(crypto_ctx_t *ctx, crypto_data_t *data,
     crypto_req_handle_t req)
 {
+	(void) req;
 	int ret = CRYPTO_SUCCESS;
 
 	ASSERT(ctx->cc_provider_private != NULL);
 
 	/*
 	 * Do a SHA2 update of the inner context using the specified
 	 * data.
 	 */
 	switch (data->cd_format) {
 	case CRYPTO_DATA_RAW:
 		SHA2Update(&PROV_SHA2_HMAC_CTX(ctx)->hc_icontext,
 		    (uint8_t *)data->cd_raw.iov_base + data->cd_offset,
 		    data->cd_length);
 		break;
 	case CRYPTO_DATA_UIO:
 		ret = sha2_digest_update_uio(
 		    &PROV_SHA2_HMAC_CTX(ctx)->hc_icontext, data);
 		break;
 	default:
 		ret = CRYPTO_ARGUMENTS_BAD;
 	}
 
 	return (ret);
 }
 
-/* ARGSUSED */
 static int
 sha2_mac_final(crypto_ctx_t *ctx, crypto_data_t *mac, crypto_req_handle_t req)
 {
+	(void) req;
 	int ret = CRYPTO_SUCCESS;
 	uchar_t digest[SHA512_DIGEST_LENGTH];
 	uint32_t digest_len, sha_digest_len;
 
 	ASSERT(ctx->cc_provider_private != NULL);
 
 	/* Set the digest lengths to values appropriate to the mechanism */
 	switch (PROV_SHA2_HMAC_CTX(ctx)->hc_mech_type) {
 	case SHA256_HMAC_MECH_INFO_TYPE:
 		sha_digest_len = digest_len = SHA256_DIGEST_LENGTH;
 		break;
 	case SHA384_HMAC_MECH_INFO_TYPE:
 		sha_digest_len = digest_len = SHA384_DIGEST_LENGTH;
 		break;
 	case SHA512_HMAC_MECH_INFO_TYPE:
 		sha_digest_len = digest_len = SHA512_DIGEST_LENGTH;
 		break;
 	case SHA256_HMAC_GEN_MECH_INFO_TYPE:
 		sha_digest_len = SHA256_DIGEST_LENGTH;
 		digest_len = PROV_SHA2_HMAC_CTX(ctx)->hc_digest_len;
 		break;
 	case SHA384_HMAC_GEN_MECH_INFO_TYPE:
 	case SHA512_HMAC_GEN_MECH_INFO_TYPE:
 		sha_digest_len = SHA512_DIGEST_LENGTH;
 		digest_len = PROV_SHA2_HMAC_CTX(ctx)->hc_digest_len;
 		break;
 	default:
 		return (CRYPTO_ARGUMENTS_BAD);
 	}
 
 	/*
 	 * We need to just return the length needed to store the output.
 	 * We should not destroy the context for the following cases.
 	 */
 	if ((mac->cd_length == 0) || (mac->cd_length < digest_len)) {
 		mac->cd_length = digest_len;
 		return (CRYPTO_BUFFER_TOO_SMALL);
 	}
 
 	/*
 	 * Do a SHA2 final on the inner context.
 	 */
 	SHA2Final(digest, &PROV_SHA2_HMAC_CTX(ctx)->hc_icontext);
 
 	/*
 	 * Do a SHA2 update on the outer context, feeding the inner
 	 * digest as data.
 	 */
 	SHA2Update(&PROV_SHA2_HMAC_CTX(ctx)->hc_ocontext, digest,
 	    sha_digest_len);
 
 	/*
 	 * Do a SHA2 final on the outer context, storing the computing
 	 * digest in the users buffer.
 	 */
 	switch (mac->cd_format) {
 	case CRYPTO_DATA_RAW:
 		if (digest_len != sha_digest_len) {
 			/*
 			 * The caller requested a short digest. Digest
 			 * into a scratch buffer and return to
 			 * the user only what was requested.
 			 */
 			SHA2Final(digest,
 			    &PROV_SHA2_HMAC_CTX(ctx)->hc_ocontext);
 			bcopy(digest, (unsigned char *)mac->cd_raw.iov_base +
 			    mac->cd_offset, digest_len);
 		} else {
 			SHA2Final((unsigned char *)mac->cd_raw.iov_base +
 			    mac->cd_offset,
 			    &PROV_SHA2_HMAC_CTX(ctx)->hc_ocontext);
 		}
 		break;
 	case CRYPTO_DATA_UIO:
 		ret = sha2_digest_final_uio(
 		    &PROV_SHA2_HMAC_CTX(ctx)->hc_ocontext, mac,
 		    digest_len, digest);
 		break;
 	default:
 		ret = CRYPTO_ARGUMENTS_BAD;
 	}
 
 	if (ret == CRYPTO_SUCCESS)
 		mac->cd_length = digest_len;
 	else
 		mac->cd_length = 0;
 
 	bzero(ctx->cc_provider_private, sizeof (sha2_hmac_ctx_t));
 	kmem_free(ctx->cc_provider_private, sizeof (sha2_hmac_ctx_t));
 	ctx->cc_provider_private = NULL;
 
 	return (ret);
 }
 
 #define	SHA2_MAC_UPDATE(data, ctx, ret) {				\
 	switch (data->cd_format) {					\
 	case CRYPTO_DATA_RAW:						\
 		SHA2Update(&(ctx).hc_icontext,				\
 		    (uint8_t *)data->cd_raw.iov_base +			\
 		    data->cd_offset, data->cd_length);			\
 		break;							\
 	case CRYPTO_DATA_UIO:						\
 		ret = sha2_digest_update_uio(&(ctx).hc_icontext, data);	\
 		break;							\
 	default:							\
 		ret = CRYPTO_ARGUMENTS_BAD;				\
 	}								\
 }
 
-/* ARGSUSED */
 static int
 sha2_mac_atomic(crypto_provider_handle_t provider,
     crypto_session_id_t session_id, crypto_mechanism_t *mechanism,
     crypto_key_t *key, crypto_data_t *data, crypto_data_t *mac,
     crypto_spi_ctx_template_t ctx_template, crypto_req_handle_t req)
 {
+	(void) provider, (void) session_id, (void) req;
 	int ret = CRYPTO_SUCCESS;
 	uchar_t digest[SHA512_DIGEST_LENGTH];
 	sha2_hmac_ctx_t sha2_hmac_ctx;
 	uint32_t sha_digest_len, digest_len, sha_hmac_block_size;
 	uint_t keylen_in_bytes = CRYPTO_BITS2BYTES(key->ck_length);
 
 	/*
 	 * Set the digest length and block size to values appropriate to the
 	 * mechanism
 	 */
 	switch (mechanism->cm_type) {
 	case SHA256_HMAC_MECH_INFO_TYPE:
 	case SHA256_HMAC_GEN_MECH_INFO_TYPE:
 		sha_digest_len = digest_len = SHA256_DIGEST_LENGTH;
 		sha_hmac_block_size = SHA256_HMAC_BLOCK_SIZE;
 		break;
 	case SHA384_HMAC_MECH_INFO_TYPE:
 	case SHA384_HMAC_GEN_MECH_INFO_TYPE:
 	case SHA512_HMAC_MECH_INFO_TYPE:
 	case SHA512_HMAC_GEN_MECH_INFO_TYPE:
 		sha_digest_len = digest_len = SHA512_DIGEST_LENGTH;
 		sha_hmac_block_size = SHA512_HMAC_BLOCK_SIZE;
 		break;
 	default:
 		return (CRYPTO_MECHANISM_INVALID);
 	}
 
 	/* Add support for key by attributes (RFE 4706552) */
 	if (key->ck_format != CRYPTO_KEY_RAW)
 		return (CRYPTO_ARGUMENTS_BAD);
 
 	if (ctx_template != NULL) {
 		/* reuse context template */
 		bcopy(ctx_template, &sha2_hmac_ctx, sizeof (sha2_hmac_ctx_t));
 	} else {
 		sha2_hmac_ctx.hc_mech_type = mechanism->cm_type;
 		/* no context template, initialize context */
 		if (keylen_in_bytes > sha_hmac_block_size) {
 			/*
 			 * Hash the passed-in key to get a smaller key.
 			 * The inner context is used since it hasn't been
 			 * initialized yet.
 			 */
 			PROV_SHA2_DIGEST_KEY(mechanism->cm_type / 3,
 			    &sha2_hmac_ctx.hc_icontext,
 			    key->ck_data, keylen_in_bytes, digest);
 			sha2_mac_init_ctx(&sha2_hmac_ctx, digest,
 			    sha_digest_len);
 		} else {
 			sha2_mac_init_ctx(&sha2_hmac_ctx, key->ck_data,
 			    keylen_in_bytes);
 		}
 	}
 
 	/* get the mechanism parameters, if applicable */
 	if ((mechanism->cm_type % 3) == 2) {
 		if (mechanism->cm_param == NULL ||
 		    mechanism->cm_param_len != sizeof (ulong_t)) {
 			ret = CRYPTO_MECHANISM_PARAM_INVALID;
 			goto bail;
 		}
 		PROV_SHA2_GET_DIGEST_LEN(mechanism, digest_len);
 		if (digest_len > sha_digest_len) {
 			ret = CRYPTO_MECHANISM_PARAM_INVALID;
 			goto bail;
 		}
 	}
 
 	/* do a SHA2 update of the inner context using the specified data */
 	SHA2_MAC_UPDATE(data, sha2_hmac_ctx, ret);
 	if (ret != CRYPTO_SUCCESS)
 		/* the update failed, free context and bail */
 		goto bail;
 
 	/*
 	 * Do a SHA2 final on the inner context.
 	 */
 	SHA2Final(digest, &sha2_hmac_ctx.hc_icontext);
 
 	/*
 	 * Do an SHA2 update on the outer context, feeding the inner
 	 * digest as data.
 	 *
 	 * HMAC-SHA384 needs special handling as the outer hash needs only 48
 	 * bytes of the inner hash value.
 	 */
 	if (mechanism->cm_type == SHA384_HMAC_MECH_INFO_TYPE ||
 	    mechanism->cm_type == SHA384_HMAC_GEN_MECH_INFO_TYPE)
 		SHA2Update(&sha2_hmac_ctx.hc_ocontext, digest,
 		    SHA384_DIGEST_LENGTH);
 	else
 		SHA2Update(&sha2_hmac_ctx.hc_ocontext, digest, sha_digest_len);
 
 	/*
 	 * Do a SHA2 final on the outer context, storing the computed
 	 * digest in the users buffer.
 	 */
 	switch (mac->cd_format) {
 	case CRYPTO_DATA_RAW:
 		if (digest_len != sha_digest_len) {
 			/*
 			 * The caller requested a short digest. Digest
 			 * into a scratch buffer and return to
 			 * the user only what was requested.
 			 */
 			SHA2Final(digest, &sha2_hmac_ctx.hc_ocontext);
 			bcopy(digest, (unsigned char *)mac->cd_raw.iov_base +
 			    mac->cd_offset, digest_len);
 		} else {
 			SHA2Final((unsigned char *)mac->cd_raw.iov_base +
 			    mac->cd_offset, &sha2_hmac_ctx.hc_ocontext);
 		}
 		break;
 	case CRYPTO_DATA_UIO:
 		ret = sha2_digest_final_uio(&sha2_hmac_ctx.hc_ocontext, mac,
 		    digest_len, digest);
 		break;
 	default:
 		ret = CRYPTO_ARGUMENTS_BAD;
 	}
 
 	if (ret == CRYPTO_SUCCESS) {
 		mac->cd_length = digest_len;
 		return (CRYPTO_SUCCESS);
 	}
 bail:
 	bzero(&sha2_hmac_ctx, sizeof (sha2_hmac_ctx_t));
 	mac->cd_length = 0;
 	return (ret);
 }
 
-/* ARGSUSED */
 static int
 sha2_mac_verify_atomic(crypto_provider_handle_t provider,
     crypto_session_id_t session_id, crypto_mechanism_t *mechanism,
     crypto_key_t *key, crypto_data_t *data, crypto_data_t *mac,
     crypto_spi_ctx_template_t ctx_template, crypto_req_handle_t req)
 {
+	(void) provider, (void) session_id, (void) req;
 	int ret = CRYPTO_SUCCESS;
 	uchar_t digest[SHA512_DIGEST_LENGTH];
 	sha2_hmac_ctx_t sha2_hmac_ctx;
 	uint32_t sha_digest_len, digest_len, sha_hmac_block_size;
 	uint_t keylen_in_bytes = CRYPTO_BITS2BYTES(key->ck_length);
 
 	/*
 	 * Set the digest length and block size to values appropriate to the
 	 * mechanism
 	 */
 	switch (mechanism->cm_type) {
 	case SHA256_HMAC_MECH_INFO_TYPE:
 	case SHA256_HMAC_GEN_MECH_INFO_TYPE:
 		sha_digest_len = digest_len = SHA256_DIGEST_LENGTH;
 		sha_hmac_block_size = SHA256_HMAC_BLOCK_SIZE;
 		break;
 	case SHA384_HMAC_MECH_INFO_TYPE:
 	case SHA384_HMAC_GEN_MECH_INFO_TYPE:
 	case SHA512_HMAC_MECH_INFO_TYPE:
 	case SHA512_HMAC_GEN_MECH_INFO_TYPE:
 		sha_digest_len = digest_len = SHA512_DIGEST_LENGTH;
 		sha_hmac_block_size = SHA512_HMAC_BLOCK_SIZE;
 		break;
 	default:
 		return (CRYPTO_MECHANISM_INVALID);
 	}
 
 	/* Add support for key by attributes (RFE 4706552) */
 	if (key->ck_format != CRYPTO_KEY_RAW)
 		return (CRYPTO_ARGUMENTS_BAD);
 
 	if (ctx_template != NULL) {
 		/* reuse context template */
 		bcopy(ctx_template, &sha2_hmac_ctx, sizeof (sha2_hmac_ctx_t));
 	} else {
 		sha2_hmac_ctx.hc_mech_type = mechanism->cm_type;
 		/* no context template, initialize context */
 		if (keylen_in_bytes > sha_hmac_block_size) {
 			/*
 			 * Hash the passed-in key to get a smaller key.
 			 * The inner context is used since it hasn't been
 			 * initialized yet.
 			 */
 			PROV_SHA2_DIGEST_KEY(mechanism->cm_type / 3,
 			    &sha2_hmac_ctx.hc_icontext,
 			    key->ck_data, keylen_in_bytes, digest);
 			sha2_mac_init_ctx(&sha2_hmac_ctx, digest,
 			    sha_digest_len);
 		} else {
 			sha2_mac_init_ctx(&sha2_hmac_ctx, key->ck_data,
 			    keylen_in_bytes);
 		}
 	}
 
 	/* get the mechanism parameters, if applicable */
 	if (mechanism->cm_type % 3 == 2) {
 		if (mechanism->cm_param == NULL ||
 		    mechanism->cm_param_len != sizeof (ulong_t)) {
 			ret = CRYPTO_MECHANISM_PARAM_INVALID;
 			goto bail;
 		}
 		PROV_SHA2_GET_DIGEST_LEN(mechanism, digest_len);
 		if (digest_len > sha_digest_len) {
 			ret = CRYPTO_MECHANISM_PARAM_INVALID;
 			goto bail;
 		}
 	}
 
 	if (mac->cd_length != digest_len) {
 		ret = CRYPTO_INVALID_MAC;
 		goto bail;
 	}
 
 	/* do a SHA2 update of the inner context using the specified data */
 	SHA2_MAC_UPDATE(data, sha2_hmac_ctx, ret);
 	if (ret != CRYPTO_SUCCESS)
 		/* the update failed, free context and bail */
 		goto bail;
 
 	/* do a SHA2 final on the inner context */
 	SHA2Final(digest, &sha2_hmac_ctx.hc_icontext);
 
 	/*
 	 * Do an SHA2 update on the outer context, feeding the inner
 	 * digest as data.
 	 *
 	 * HMAC-SHA384 needs special handling as the outer hash needs only 48
 	 * bytes of the inner hash value.
 	 */
 	if (mechanism->cm_type == SHA384_HMAC_MECH_INFO_TYPE ||
 	    mechanism->cm_type == SHA384_HMAC_GEN_MECH_INFO_TYPE)
 		SHA2Update(&sha2_hmac_ctx.hc_ocontext, digest,
 		    SHA384_DIGEST_LENGTH);
 	else
 		SHA2Update(&sha2_hmac_ctx.hc_ocontext, digest, sha_digest_len);
 
 	/*
 	 * Do a SHA2 final on the outer context, storing the computed
 	 * digest in the users buffer.
 	 */
 	SHA2Final(digest, &sha2_hmac_ctx.hc_ocontext);
 
 	/*
 	 * Compare the computed digest against the expected digest passed
 	 * as argument.
 	 */
 
 	switch (mac->cd_format) {
 
 	case CRYPTO_DATA_RAW:
 		if (bcmp(digest, (unsigned char *)mac->cd_raw.iov_base +
 		    mac->cd_offset, digest_len) != 0)
 			ret = CRYPTO_INVALID_MAC;
 		break;
 
 	case CRYPTO_DATA_UIO: {
 		off_t offset = mac->cd_offset;
 		uint_t vec_idx = 0;
 		off_t scratch_offset = 0;
 		size_t length = digest_len;
 		size_t cur_len;
 
 		/* we support only kernel buffer */
 		if (zfs_uio_segflg(mac->cd_uio) != UIO_SYSSPACE)
 			return (CRYPTO_ARGUMENTS_BAD);
 
 		/* jump to the first iovec containing the expected digest */
 		offset = zfs_uio_index_at_offset(mac->cd_uio, offset, &vec_idx);
 		if (vec_idx == zfs_uio_iovcnt(mac->cd_uio)) {
 			/*
 			 * The caller specified an offset that is
 			 * larger than the total size of the buffers
 			 * it provided.
 			 */
 			ret = CRYPTO_DATA_LEN_RANGE;
 			break;
 		}
 
 		/* do the comparison of computed digest vs specified one */
 		while (vec_idx < zfs_uio_iovcnt(mac->cd_uio) && length > 0) {
 			cur_len = MIN(zfs_uio_iovlen(mac->cd_uio, vec_idx) -
 			    offset, length);
 
 			if (bcmp(digest + scratch_offset,
 			    zfs_uio_iovbase(mac->cd_uio, vec_idx) + offset,
 			    cur_len) != 0) {
 				ret = CRYPTO_INVALID_MAC;
 				break;
 			}
 
 			length -= cur_len;
 			vec_idx++;
 			scratch_offset += cur_len;
 			offset = 0;
 		}
 		break;
 	}
 
 	default:
 		ret = CRYPTO_ARGUMENTS_BAD;
 	}
 
 	return (ret);
 bail:
 	bzero(&sha2_hmac_ctx, sizeof (sha2_hmac_ctx_t));
 	mac->cd_length = 0;
 	return (ret);
 }
 
 /*
  * KCF software provider context management entry points.
  */
 
-/* ARGSUSED */
 static int
 sha2_create_ctx_template(crypto_provider_handle_t provider,
     crypto_mechanism_t *mechanism, crypto_key_t *key,
     crypto_spi_ctx_template_t *ctx_template, size_t *ctx_template_size,
     crypto_req_handle_t req)
 {
+	(void) provider;
 	sha2_hmac_ctx_t *sha2_hmac_ctx_tmpl;
 	uint_t keylen_in_bytes = CRYPTO_BITS2BYTES(key->ck_length);
 	uint32_t sha_digest_len, sha_hmac_block_size;
 
 	/*
 	 * Set the digest length and block size to values appropriate to the
 	 * mechanism
 	 */
 	switch (mechanism->cm_type) {
 	case SHA256_HMAC_MECH_INFO_TYPE:
 	case SHA256_HMAC_GEN_MECH_INFO_TYPE:
 		sha_digest_len = SHA256_DIGEST_LENGTH;
 		sha_hmac_block_size = SHA256_HMAC_BLOCK_SIZE;
 		break;
 	case SHA384_HMAC_MECH_INFO_TYPE:
 	case SHA384_HMAC_GEN_MECH_INFO_TYPE:
 	case SHA512_HMAC_MECH_INFO_TYPE:
 	case SHA512_HMAC_GEN_MECH_INFO_TYPE:
 		sha_digest_len = SHA512_DIGEST_LENGTH;
 		sha_hmac_block_size = SHA512_HMAC_BLOCK_SIZE;
 		break;
 	default:
 		return (CRYPTO_MECHANISM_INVALID);
 	}
 
 	/* Add support for key by attributes (RFE 4706552) */
 	if (key->ck_format != CRYPTO_KEY_RAW)
 		return (CRYPTO_ARGUMENTS_BAD);
 
 	/*
 	 * Allocate and initialize SHA2 context.
 	 */
 	sha2_hmac_ctx_tmpl = kmem_alloc(sizeof (sha2_hmac_ctx_t),
 	    crypto_kmflag(req));
 	if (sha2_hmac_ctx_tmpl == NULL)
 		return (CRYPTO_HOST_MEMORY);
 
 	sha2_hmac_ctx_tmpl->hc_mech_type = mechanism->cm_type;
 
 	if (keylen_in_bytes > sha_hmac_block_size) {
 		uchar_t digested_key[SHA512_DIGEST_LENGTH];
 
 		/*
 		 * Hash the passed-in key to get a smaller key.
 		 * The inner context is used since it hasn't been
 		 * initialized yet.
 		 */
 		PROV_SHA2_DIGEST_KEY(mechanism->cm_type / 3,
 		    &sha2_hmac_ctx_tmpl->hc_icontext,
 		    key->ck_data, keylen_in_bytes, digested_key);
 		sha2_mac_init_ctx(sha2_hmac_ctx_tmpl, digested_key,
 		    sha_digest_len);
 	} else {
 		sha2_mac_init_ctx(sha2_hmac_ctx_tmpl, key->ck_data,
 		    keylen_in_bytes);
 	}
 
 	*ctx_template = (crypto_spi_ctx_template_t)sha2_hmac_ctx_tmpl;
 	*ctx_template_size = sizeof (sha2_hmac_ctx_t);
 
 	return (CRYPTO_SUCCESS);
 }
 
 static int
 sha2_free_context(crypto_ctx_t *ctx)
 {
 	uint_t ctx_len;
 
 	if (ctx->cc_provider_private == NULL)
 		return (CRYPTO_SUCCESS);
 
 	/*
 	 * We have to free either SHA2 or SHA2-HMAC contexts, which
 	 * have different lengths.
 	 *
 	 * Note: Below is dependent on the mechanism ordering.
 	 */
 
 	if (PROV_SHA2_CTX(ctx)->sc_mech_type % 3 == 0)
 		ctx_len = sizeof (sha2_ctx_t);
 	else
 		ctx_len = sizeof (sha2_hmac_ctx_t);
 
 	bzero(ctx->cc_provider_private, ctx_len);
 	kmem_free(ctx->cc_provider_private, ctx_len);
 	ctx->cc_provider_private = NULL;
 
 	return (CRYPTO_SUCCESS);
 }
diff --git a/module/icp/io/skein_mod.c b/module/icp/io/skein_mod.c
index ac7d201eb708..49dcbadd86f5 100644
--- a/module/icp/io/skein_mod.c
+++ b/module/icp/io/skein_mod.c
@@ -1,728 +1,727 @@
 /*
  * CDDL HEADER START
  *
  * The contents of this file are subject to the terms of the
  * Common Development and Distribution License (the "License").
  * You may not use this file except in compliance with the License.
  *
  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
  * or http://opensource.org/licenses/CDDL-1.0.
  * See the License for the specific language governing permissions
  * and limitations under the License.
  *
  * When distributing Covered Code, include this CDDL HEADER in each
  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  * If applicable, add the following below this CDDL HEADER, with the
  * fields enclosed by brackets "[]" replaced with your own identifying
  * information: Portions Copyright [yyyy] [name of copyright owner]
  *
  * CDDL HEADER END
  */
 
 /*
  * Copyright 2013 Saso Kiselkov. All rights reserved.
  */
 
 #include <sys/modctl.h>
 #include <sys/crypto/common.h>
 #include <sys/crypto/icp.h>
 #include <sys/crypto/spi.h>
 #include <sys/sysmacros.h>
 #define	SKEIN_MODULE_IMPL
 #include <sys/skein.h>
 
 /*
  * Like the sha2 module, we create the skein module with two modlinkages:
  * - modlmisc to allow direct calls to Skein_* API functions.
  * - modlcrypto to integrate well into the Kernel Crypto Framework (KCF).
  */
 static struct modlmisc modlmisc = {
 	&mod_cryptoops,
 	"Skein Message-Digest Algorithm"
 };
 
 static struct modlcrypto modlcrypto = {
 	&mod_cryptoops,
 	"Skein Kernel SW Provider"
 };
 
 static struct modlinkage modlinkage = {
 	MODREV_1, {&modlmisc, &modlcrypto, NULL}
 };
 
 static crypto_mech_info_t skein_mech_info_tab[] = {
 	{CKM_SKEIN_256, SKEIN_256_MECH_INFO_TYPE,
 	    CRYPTO_FG_DIGEST | CRYPTO_FG_DIGEST_ATOMIC,
 	    0, 0, CRYPTO_KEYSIZE_UNIT_IN_BITS},
 	{CKM_SKEIN_256_MAC, SKEIN_256_MAC_MECH_INFO_TYPE,
 	    CRYPTO_FG_MAC | CRYPTO_FG_MAC_ATOMIC, 1, INT_MAX,
 	    CRYPTO_KEYSIZE_UNIT_IN_BYTES},
 	{CKM_SKEIN_512, SKEIN_512_MECH_INFO_TYPE,
 	    CRYPTO_FG_DIGEST | CRYPTO_FG_DIGEST_ATOMIC,
 	    0, 0, CRYPTO_KEYSIZE_UNIT_IN_BITS},
 	{CKM_SKEIN_512_MAC, SKEIN_512_MAC_MECH_INFO_TYPE,
 	    CRYPTO_FG_MAC | CRYPTO_FG_MAC_ATOMIC, 1, INT_MAX,
 	    CRYPTO_KEYSIZE_UNIT_IN_BYTES},
 	{CKM_SKEIN1024, SKEIN1024_MECH_INFO_TYPE,
 	    CRYPTO_FG_DIGEST | CRYPTO_FG_DIGEST_ATOMIC,
 	    0, 0, CRYPTO_KEYSIZE_UNIT_IN_BITS},
 	{CKM_SKEIN1024_MAC, SKEIN1024_MAC_MECH_INFO_TYPE,
 	    CRYPTO_FG_MAC | CRYPTO_FG_MAC_ATOMIC, 1, INT_MAX,
 	    CRYPTO_KEYSIZE_UNIT_IN_BYTES}
 };
 
 static void skein_provider_status(crypto_provider_handle_t, uint_t *);
 
 static crypto_control_ops_t skein_control_ops = {
 	skein_provider_status
 };
 
 static int skein_digest_init(crypto_ctx_t *, crypto_mechanism_t *,
     crypto_req_handle_t);
 static int skein_digest(crypto_ctx_t *, crypto_data_t *, crypto_data_t *,
     crypto_req_handle_t);
 static int skein_update(crypto_ctx_t *, crypto_data_t *, crypto_req_handle_t);
 static int skein_final(crypto_ctx_t *, crypto_data_t *, crypto_req_handle_t);
 static int skein_digest_atomic(crypto_provider_handle_t, crypto_session_id_t,
     crypto_mechanism_t *, crypto_data_t *, crypto_data_t *,
     crypto_req_handle_t);
 
 static crypto_digest_ops_t skein_digest_ops = {
 	.digest_init = skein_digest_init,
 	.digest = skein_digest,
 	.digest_update = skein_update,
 	.digest_key = NULL,
 	.digest_final = skein_final,
 	.digest_atomic = skein_digest_atomic
 };
 
 static int skein_mac_init(crypto_ctx_t *, crypto_mechanism_t *, crypto_key_t *,
     crypto_spi_ctx_template_t, crypto_req_handle_t);
 static int skein_mac_atomic(crypto_provider_handle_t, crypto_session_id_t,
     crypto_mechanism_t *, crypto_key_t *, crypto_data_t *, crypto_data_t *,
     crypto_spi_ctx_template_t, crypto_req_handle_t);
 
 static crypto_mac_ops_t skein_mac_ops = {
 	.mac_init = skein_mac_init,
 	.mac = NULL,
 	.mac_update = skein_update, /* using regular digest update is OK here */
 	.mac_final = skein_final,   /* using regular digest final is OK here */
 	.mac_atomic = skein_mac_atomic,
 	.mac_verify_atomic = NULL
 };
 
 static int skein_create_ctx_template(crypto_provider_handle_t,
     crypto_mechanism_t *, crypto_key_t *, crypto_spi_ctx_template_t *,
     size_t *, crypto_req_handle_t);
 static int skein_free_context(crypto_ctx_t *);
 
 static crypto_ctx_ops_t skein_ctx_ops = {
 	.create_ctx_template = skein_create_ctx_template,
 	.free_context = skein_free_context
 };
 
 static crypto_ops_t skein_crypto_ops = {{{{{
 	&skein_control_ops,
 	&skein_digest_ops,
 	NULL,
 	&skein_mac_ops,
 	NULL,
 	NULL,
 	NULL,
 	NULL,
 	NULL,
 	NULL,
 	NULL,
 	NULL,
 	NULL,
 	&skein_ctx_ops,
 }}}}};
 
 static crypto_provider_info_t skein_prov_info = {{{{
 	CRYPTO_SPI_VERSION_1,
 	"Skein Software Provider",
 	CRYPTO_SW_PROVIDER,
 	NULL,
 	&skein_crypto_ops,
 	sizeof (skein_mech_info_tab) / sizeof (crypto_mech_info_t),
 	skein_mech_info_tab
 }}}};
 
 static crypto_kcf_provider_handle_t skein_prov_handle = 0;
 
 typedef struct skein_ctx {
 	skein_mech_type_t		sc_mech_type;
 	size_t				sc_digest_bitlen;
 	/*LINTED(E_ANONYMOUS_UNION_DECL)*/
 	union {
 		Skein_256_Ctxt_t	sc_256;
 		Skein_512_Ctxt_t	sc_512;
 		Skein1024_Ctxt_t	sc_1024;
 	};
 } skein_ctx_t;
 #define	SKEIN_CTX(_ctx_)	((skein_ctx_t *)((_ctx_)->cc_provider_private))
 #define	SKEIN_CTX_LVALUE(_ctx_)	(_ctx_)->cc_provider_private
 #define	SKEIN_OP(_skein_ctx, _op, ...)					\
 	do {								\
 		skein_ctx_t	*sc = (_skein_ctx);			\
 		switch (sc->sc_mech_type) {				\
 		case SKEIN_256_MECH_INFO_TYPE:				\
 		case SKEIN_256_MAC_MECH_INFO_TYPE:			\
 			(void) Skein_256_ ## _op(&sc->sc_256, __VA_ARGS__);\
 			break;						\
 		case SKEIN_512_MECH_INFO_TYPE:				\
 		case SKEIN_512_MAC_MECH_INFO_TYPE:			\
 			(void) Skein_512_ ## _op(&sc->sc_512, __VA_ARGS__);\
 			break;						\
 		case SKEIN1024_MECH_INFO_TYPE:				\
 		case SKEIN1024_MAC_MECH_INFO_TYPE:			\
 			(void) Skein1024_ ## _op(&sc->sc_1024, __VA_ARGS__);\
 			break;						\
 		}							\
 	} while (0)
 
 static int
 skein_get_digest_bitlen(const crypto_mechanism_t *mechanism, size_t *result)
 {
 	if (mechanism->cm_param != NULL) {
 		/*LINTED(E_BAD_PTR_CAST_ALIGN)*/
 		skein_param_t	*param = (skein_param_t *)mechanism->cm_param;
 
 		if (mechanism->cm_param_len != sizeof (*param) ||
 		    param->sp_digest_bitlen == 0) {
 			return (CRYPTO_MECHANISM_PARAM_INVALID);
 		}
 		*result = param->sp_digest_bitlen;
 	} else {
 		switch (mechanism->cm_type) {
 		case SKEIN_256_MECH_INFO_TYPE:
 			*result = 256;
 			break;
 		case SKEIN_512_MECH_INFO_TYPE:
 			*result = 512;
 			break;
 		case SKEIN1024_MECH_INFO_TYPE:
 			*result = 1024;
 			break;
 		default:
 			return (CRYPTO_MECHANISM_INVALID);
 		}
 	}
 	return (CRYPTO_SUCCESS);
 }
 
 int
 skein_mod_init(void)
 {
 	int error;
 
 	if ((error = mod_install(&modlinkage)) != 0)
 		return (error);
 
 	/*
 	 * Try to register with KCF - failure shouldn't unload us, since we
 	 * still may want to continue providing misc/skein functionality.
 	 */
 	(void) crypto_register_provider(&skein_prov_info, &skein_prov_handle);
 
 	return (0);
 }
 
 int
 skein_mod_fini(void)
 {
 	int ret;
 
 	if (skein_prov_handle != 0) {
 		if ((ret = crypto_unregister_provider(skein_prov_handle)) !=
 		    CRYPTO_SUCCESS) {
 			cmn_err(CE_WARN,
 			    "skein _fini: crypto_unregister_provider() "
 			    "failed (0x%x)", ret);
 			return (EBUSY);
 		}
 		skein_prov_handle = 0;
 	}
 
 	return (mod_remove(&modlinkage));
 }
 
 /*
  * KCF software provider control entry points.
  */
-/* ARGSUSED */
 static void
 skein_provider_status(crypto_provider_handle_t provider, uint_t *status)
 {
+	(void) provider;
 	*status = CRYPTO_PROVIDER_READY;
 }
 
 /*
  * General Skein hashing helper functions.
  */
 
 /*
  * Performs an Update on a context with uio input data.
  */
 static int
 skein_digest_update_uio(skein_ctx_t *ctx, const crypto_data_t *data)
 {
 	off_t		offset = data->cd_offset;
 	size_t		length = data->cd_length;
 	uint_t		vec_idx = 0;
 	size_t		cur_len;
 	zfs_uio_t	*uio = data->cd_uio;
 
 	/* we support only kernel buffer */
 	if (zfs_uio_segflg(uio) != UIO_SYSSPACE)
 		return (CRYPTO_ARGUMENTS_BAD);
 
 	/*
 	 * Jump to the first iovec containing data to be
 	 * digested.
 	 */
 	offset = zfs_uio_index_at_offset(uio, offset, &vec_idx);
 	if (vec_idx == zfs_uio_iovcnt(uio)) {
 		/*
 		 * The caller specified an offset that is larger than the
 		 * total size of the buffers it provided.
 		 */
 		return (CRYPTO_DATA_LEN_RANGE);
 	}
 
 	/*
 	 * Now do the digesting on the iovecs.
 	 */
 	while (vec_idx < zfs_uio_iovcnt(uio) && length > 0) {
 		cur_len = MIN(zfs_uio_iovlen(uio, vec_idx) - offset, length);
 		SKEIN_OP(ctx, Update, (uint8_t *)zfs_uio_iovbase(uio, vec_idx)
 		    + offset, cur_len);
 		length -= cur_len;
 		vec_idx++;
 		offset = 0;
 	}
 
 	if (vec_idx == zfs_uio_iovcnt(uio) && length > 0) {
 		/*
 		 * The end of the specified iovec's was reached but
 		 * the length requested could not be processed, i.e.
 		 * The caller requested to digest more data than it provided.
 		 */
 		return (CRYPTO_DATA_LEN_RANGE);
 	}
 
 	return (CRYPTO_SUCCESS);
 }
 
 /*
  * Performs a Final on a context and writes to a uio digest output.
  */
 static int
 skein_digest_final_uio(skein_ctx_t *ctx, crypto_data_t *digest,
     crypto_req_handle_t req)
 {
 	off_t offset = digest->cd_offset;
 	uint_t vec_idx = 0;
 	zfs_uio_t *uio = digest->cd_uio;
 
 	/* we support only kernel buffer */
 	if (zfs_uio_segflg(uio) != UIO_SYSSPACE)
 		return (CRYPTO_ARGUMENTS_BAD);
 
 	/*
 	 * Jump to the first iovec containing ptr to the digest to be returned.
 	 */
 	offset = zfs_uio_index_at_offset(uio, offset, &vec_idx);
 	if (vec_idx == zfs_uio_iovcnt(uio)) {
 		/*
 		 * The caller specified an offset that is larger than the
 		 * total size of the buffers it provided.
 		 */
 		return (CRYPTO_DATA_LEN_RANGE);
 	}
 	if (offset + CRYPTO_BITS2BYTES(ctx->sc_digest_bitlen) <=
 	    zfs_uio_iovlen(uio, vec_idx)) {
 		/* The computed digest will fit in the current iovec. */
 		SKEIN_OP(ctx, Final,
 		    (uchar_t *)zfs_uio_iovbase(uio, vec_idx) + offset);
 	} else {
 		uint8_t *digest_tmp;
 		off_t scratch_offset = 0;
 		size_t length = CRYPTO_BITS2BYTES(ctx->sc_digest_bitlen);
 		size_t cur_len;
 
 		digest_tmp = kmem_alloc(CRYPTO_BITS2BYTES(
 		    ctx->sc_digest_bitlen), crypto_kmflag(req));
 		if (digest_tmp == NULL)
 			return (CRYPTO_HOST_MEMORY);
 		SKEIN_OP(ctx, Final, digest_tmp);
 		while (vec_idx < zfs_uio_iovcnt(uio) && length > 0) {
 			cur_len = MIN(zfs_uio_iovlen(uio, vec_idx) - offset,
 			    length);
 			bcopy(digest_tmp + scratch_offset,
 			    zfs_uio_iovbase(uio, vec_idx) + offset, cur_len);
 
 			length -= cur_len;
 			vec_idx++;
 			scratch_offset += cur_len;
 			offset = 0;
 		}
 		kmem_free(digest_tmp, CRYPTO_BITS2BYTES(ctx->sc_digest_bitlen));
 
 		if (vec_idx == zfs_uio_iovcnt(uio) && length > 0) {
 			/*
 			 * The end of the specified iovec's was reached but
 			 * the length requested could not be processed, i.e.
 			 * The caller requested to digest more data than it
 			 * provided.
 			 */
 			return (CRYPTO_DATA_LEN_RANGE);
 		}
 	}
 
 	return (CRYPTO_SUCCESS);
 }
 
 /*
  * KCF software provider digest entry points.
  */
 
 /*
  * Initializes a skein digest context to the configuration in `mechanism'.
  * The mechanism cm_type must be one of SKEIN_*_MECH_INFO_TYPE. The cm_param
  * field may contain a skein_param_t structure indicating the length of the
  * digest the algorithm should produce. Otherwise the default output lengths
  * are applied (32 bytes for Skein-256, 64 bytes for Skein-512 and 128 bytes
  * for Skein-1024).
  */
 static int
 skein_digest_init(crypto_ctx_t *ctx, crypto_mechanism_t *mechanism,
     crypto_req_handle_t req)
 {
 	int	error = CRYPTO_SUCCESS;
 
 	if (!VALID_SKEIN_DIGEST_MECH(mechanism->cm_type))
 		return (CRYPTO_MECHANISM_INVALID);
 
 	SKEIN_CTX_LVALUE(ctx) = kmem_alloc(sizeof (*SKEIN_CTX(ctx)),
 	    crypto_kmflag(req));
 	if (SKEIN_CTX(ctx) == NULL)
 		return (CRYPTO_HOST_MEMORY);
 
 	SKEIN_CTX(ctx)->sc_mech_type = mechanism->cm_type;
 	error = skein_get_digest_bitlen(mechanism,
 	    &SKEIN_CTX(ctx)->sc_digest_bitlen);
 	if (error != CRYPTO_SUCCESS)
 		goto errout;
 	SKEIN_OP(SKEIN_CTX(ctx), Init, SKEIN_CTX(ctx)->sc_digest_bitlen);
 
 	return (CRYPTO_SUCCESS);
 errout:
 	bzero(SKEIN_CTX(ctx), sizeof (*SKEIN_CTX(ctx)));
 	kmem_free(SKEIN_CTX(ctx), sizeof (*SKEIN_CTX(ctx)));
 	SKEIN_CTX_LVALUE(ctx) = NULL;
 	return (error);
 }
 
 /*
  * Executes a skein_update and skein_digest on a pre-initialized crypto
  * context in a single step. See the documentation to these functions to
  * see what to pass here.
  */
 static int
 skein_digest(crypto_ctx_t *ctx, crypto_data_t *data, crypto_data_t *digest,
     crypto_req_handle_t req)
 {
 	int error = CRYPTO_SUCCESS;
 
 	ASSERT(SKEIN_CTX(ctx) != NULL);
 
 	if (digest->cd_length <
 	    CRYPTO_BITS2BYTES(SKEIN_CTX(ctx)->sc_digest_bitlen)) {
 		digest->cd_length =
 		    CRYPTO_BITS2BYTES(SKEIN_CTX(ctx)->sc_digest_bitlen);
 		return (CRYPTO_BUFFER_TOO_SMALL);
 	}
 
 	error = skein_update(ctx, data, req);
 	if (error != CRYPTO_SUCCESS) {
 		bzero(SKEIN_CTX(ctx), sizeof (*SKEIN_CTX(ctx)));
 		kmem_free(SKEIN_CTX(ctx), sizeof (*SKEIN_CTX(ctx)));
 		SKEIN_CTX_LVALUE(ctx) = NULL;
 		digest->cd_length = 0;
 		return (error);
 	}
 	error = skein_final(ctx, digest, req);
 
 	return (error);
 }
 
 /*
  * Performs a skein Update with the input message in `data' (successive calls
  * can push more data). This is used both for digest and MAC operation.
  * Supported input data formats are raw, uio and mblk.
  */
-/*ARGSUSED*/
 static int
 skein_update(crypto_ctx_t *ctx, crypto_data_t *data, crypto_req_handle_t req)
 {
+	(void) req;
 	int error = CRYPTO_SUCCESS;
 
 	ASSERT(SKEIN_CTX(ctx) != NULL);
 
 	switch (data->cd_format) {
 	case CRYPTO_DATA_RAW:
 		SKEIN_OP(SKEIN_CTX(ctx), Update,
 		    (uint8_t *)data->cd_raw.iov_base + data->cd_offset,
 		    data->cd_length);
 		break;
 	case CRYPTO_DATA_UIO:
 		error = skein_digest_update_uio(SKEIN_CTX(ctx), data);
 		break;
 	default:
 		error = CRYPTO_ARGUMENTS_BAD;
 	}
 
 	return (error);
 }
 
 /*
  * Performs a skein Final, writing the output to `digest'. This is used both
  * for digest and MAC operation.
  * Supported output digest formats are raw, uio and mblk.
  */
-/*ARGSUSED*/
 static int
 skein_final(crypto_ctx_t *ctx, crypto_data_t *digest, crypto_req_handle_t req)
 {
 	int error = CRYPTO_SUCCESS;
 
 	ASSERT(SKEIN_CTX(ctx) != NULL);
 
 	if (digest->cd_length <
 	    CRYPTO_BITS2BYTES(SKEIN_CTX(ctx)->sc_digest_bitlen)) {
 		digest->cd_length =
 		    CRYPTO_BITS2BYTES(SKEIN_CTX(ctx)->sc_digest_bitlen);
 		return (CRYPTO_BUFFER_TOO_SMALL);
 	}
 
 	switch (digest->cd_format) {
 	case CRYPTO_DATA_RAW:
 		SKEIN_OP(SKEIN_CTX(ctx), Final,
 		    (uint8_t *)digest->cd_raw.iov_base + digest->cd_offset);
 		break;
 	case CRYPTO_DATA_UIO:
 		error = skein_digest_final_uio(SKEIN_CTX(ctx), digest, req);
 		break;
 	default:
 		error = CRYPTO_ARGUMENTS_BAD;
 	}
 
 	if (error == CRYPTO_SUCCESS)
 		digest->cd_length =
 		    CRYPTO_BITS2BYTES(SKEIN_CTX(ctx)->sc_digest_bitlen);
 	else
 		digest->cd_length = 0;
 
 	bzero(SKEIN_CTX(ctx), sizeof (*SKEIN_CTX(ctx)));
 	kmem_free(SKEIN_CTX(ctx), sizeof (*(SKEIN_CTX(ctx))));
 	SKEIN_CTX_LVALUE(ctx) = NULL;
 
 	return (error);
 }
 
 /*
  * Performs a full skein digest computation in a single call, configuring the
  * algorithm according to `mechanism', reading the input to be digested from
  * `data' and writing the output to `digest'.
  * Supported input/output formats are raw, uio and mblk.
  */
-/*ARGSUSED*/
 static int
 skein_digest_atomic(crypto_provider_handle_t provider,
     crypto_session_id_t session_id, crypto_mechanism_t *mechanism,
     crypto_data_t *data, crypto_data_t *digest, crypto_req_handle_t req)
 {
-	int		error;
-	skein_ctx_t	skein_ctx;
-	crypto_ctx_t	ctx;
+	(void) provider, (void) session_id, (void) req;
+	int	 error;
+	skein_ctx_t skein_ctx;
+	crypto_ctx_t ctx;
 	SKEIN_CTX_LVALUE(&ctx) = &skein_ctx;
 
 	/* Init */
 	if (!VALID_SKEIN_DIGEST_MECH(mechanism->cm_type))
 		return (CRYPTO_MECHANISM_INVALID);
 	skein_ctx.sc_mech_type = mechanism->cm_type;
 	error = skein_get_digest_bitlen(mechanism, &skein_ctx.sc_digest_bitlen);
 	if (error != CRYPTO_SUCCESS)
 		goto out;
 	SKEIN_OP(&skein_ctx, Init, skein_ctx.sc_digest_bitlen);
 
 	if ((error = skein_update(&ctx, data, digest)) != CRYPTO_SUCCESS)
 		goto out;
 	if ((error = skein_final(&ctx, data, digest)) != CRYPTO_SUCCESS)
 		goto out;
 
 out:
 	if (error == CRYPTO_SUCCESS)
 		digest->cd_length =
 		    CRYPTO_BITS2BYTES(skein_ctx.sc_digest_bitlen);
 	else
 		digest->cd_length = 0;
 	bzero(&skein_ctx, sizeof (skein_ctx));
 
 	return (error);
 }
 
 /*
  * Helper function that builds a Skein MAC context from the provided
  * mechanism and key.
  */
 static int
 skein_mac_ctx_build(skein_ctx_t *ctx, crypto_mechanism_t *mechanism,
     crypto_key_t *key)
 {
 	int error;
 
 	if (!VALID_SKEIN_MAC_MECH(mechanism->cm_type))
 		return (CRYPTO_MECHANISM_INVALID);
 	if (key->ck_format != CRYPTO_KEY_RAW)
 		return (CRYPTO_ARGUMENTS_BAD);
 	ctx->sc_mech_type = mechanism->cm_type;
 	error = skein_get_digest_bitlen(mechanism, &ctx->sc_digest_bitlen);
 	if (error != CRYPTO_SUCCESS)
 		return (error);
 	SKEIN_OP(ctx, InitExt, ctx->sc_digest_bitlen, 0, key->ck_data,
 	    CRYPTO_BITS2BYTES(key->ck_length));
 
 	return (CRYPTO_SUCCESS);
 }
 
 /*
  * KCF software provide mac entry points.
  */
 /*
  * Initializes a skein MAC context. You may pass a ctx_template, in which
  * case the template will be reused to make initialization more efficient.
  * Otherwise a new context will be constructed. The mechanism cm_type must
  * be one of SKEIN_*_MAC_MECH_INFO_TYPE. Same as in skein_digest_init, you
  * may pass a skein_param_t in cm_param to configure the length of the
  * digest. The key must be in raw format.
  */
 static int
 skein_mac_init(crypto_ctx_t *ctx, crypto_mechanism_t *mechanism,
     crypto_key_t *key, crypto_spi_ctx_template_t ctx_template,
     crypto_req_handle_t req)
 {
 	int	error;
 
 	SKEIN_CTX_LVALUE(ctx) = kmem_alloc(sizeof (*SKEIN_CTX(ctx)),
 	    crypto_kmflag(req));
 	if (SKEIN_CTX(ctx) == NULL)
 		return (CRYPTO_HOST_MEMORY);
 
 	if (ctx_template != NULL) {
 		bcopy(ctx_template, SKEIN_CTX(ctx),
 		    sizeof (*SKEIN_CTX(ctx)));
 	} else {
 		error = skein_mac_ctx_build(SKEIN_CTX(ctx), mechanism, key);
 		if (error != CRYPTO_SUCCESS)
 			goto errout;
 	}
 
 	return (CRYPTO_SUCCESS);
 errout:
 	bzero(SKEIN_CTX(ctx), sizeof (*SKEIN_CTX(ctx)));
 	kmem_free(SKEIN_CTX(ctx), sizeof (*SKEIN_CTX(ctx)));
 	return (error);
 }
 
 /*
  * The MAC update and final calls are reused from the regular digest code.
  */
 
-/*ARGSUSED*/
 /*
  * Same as skein_digest_atomic, performs an atomic Skein MAC operation in
  * one step. All the same properties apply to the arguments of this
  * function as to those of the partial operations above.
  */
 static int
 skein_mac_atomic(crypto_provider_handle_t provider,
     crypto_session_id_t session_id, crypto_mechanism_t *mechanism,
     crypto_key_t *key, crypto_data_t *data, crypto_data_t *mac,
     crypto_spi_ctx_template_t ctx_template, crypto_req_handle_t req)
 {
 	/* faux crypto context just for skein_digest_{update,final} */
-	int		error;
-	crypto_ctx_t	ctx;
-	skein_ctx_t	skein_ctx;
+	(void) provider, (void) session_id;
+	int	 error;
+	crypto_ctx_t ctx;
+	skein_ctx_t skein_ctx;
 	SKEIN_CTX_LVALUE(&ctx) = &skein_ctx;
 
 	if (ctx_template != NULL) {
 		bcopy(ctx_template, &skein_ctx, sizeof (skein_ctx));
 	} else {
 		error = skein_mac_ctx_build(&skein_ctx, mechanism, key);
 		if (error != CRYPTO_SUCCESS)
 			goto errout;
 	}
 
 	if ((error = skein_update(&ctx, data, req)) != CRYPTO_SUCCESS)
 		goto errout;
 	if ((error = skein_final(&ctx, mac, req)) != CRYPTO_SUCCESS)
 		goto errout;
 
 	return (CRYPTO_SUCCESS);
 errout:
 	bzero(&skein_ctx, sizeof (skein_ctx));
 	return (error);
 }
 
 /*
  * KCF software provider context management entry points.
  */
 
 /*
  * Constructs a context template for the Skein MAC algorithm. The same
  * properties apply to the arguments of this function as to those of
  * skein_mac_init.
  */
-/*ARGSUSED*/
 static int
 skein_create_ctx_template(crypto_provider_handle_t provider,
     crypto_mechanism_t *mechanism, crypto_key_t *key,
     crypto_spi_ctx_template_t *ctx_template, size_t *ctx_template_size,
     crypto_req_handle_t req)
 {
-	int		error;
-	skein_ctx_t	*ctx_tmpl;
+	(void) provider;
+	int	 error;
+	skein_ctx_t *ctx_tmpl;
 
 	ctx_tmpl = kmem_alloc(sizeof (*ctx_tmpl), crypto_kmflag(req));
 	if (ctx_tmpl == NULL)
 		return (CRYPTO_HOST_MEMORY);
 	error = skein_mac_ctx_build(ctx_tmpl, mechanism, key);
 	if (error != CRYPTO_SUCCESS)
 		goto errout;
 	*ctx_template = ctx_tmpl;
 	*ctx_template_size = sizeof (*ctx_tmpl);
 
 	return (CRYPTO_SUCCESS);
 errout:
 	bzero(ctx_tmpl, sizeof (*ctx_tmpl));
 	kmem_free(ctx_tmpl, sizeof (*ctx_tmpl));
 	return (error);
 }
 
 /*
  * Frees a skein context in a parent crypto context.
  */
 static int
 skein_free_context(crypto_ctx_t *ctx)
 {
 	if (SKEIN_CTX(ctx) != NULL) {
 		bzero(SKEIN_CTX(ctx), sizeof (*SKEIN_CTX(ctx)));
 		kmem_free(SKEIN_CTX(ctx), sizeof (*SKEIN_CTX(ctx)));
 		SKEIN_CTX_LVALUE(ctx) = NULL;
 	}
 
 	return (CRYPTO_SUCCESS);
 }
diff --git a/module/icp/os/modconf.c b/module/icp/os/modconf.c
index 3743416ed951..f1822af4e266 100644
--- a/module/icp/os/modconf.c
+++ b/module/icp/os/modconf.c
@@ -1,173 +1,175 @@
 /*
  * CDDL HEADER START
  *
  * The contents of this file are subject to the terms of the
  * Common Development and Distribution License (the "License").
  * You may not use this file except in compliance with the License.
  *
  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
  * or http://www.opensolaris.org/os/licensing.
  * See the License for the specific language governing permissions
  * and limitations under the License.
  *
  * When distributing Covered Code, include this CDDL HEADER in each
  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  * If applicable, add the following below this CDDL HEADER, with the
  * fields enclosed by brackets "[]" replaced with your own identifying
  * information: Portions Copyright [yyyy] [name of copyright owner]
  *
  * CDDL HEADER END
  */
 /*
  * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
  * Use is subject to license terms.
  */
 
 #include <sys/zfs_context.h>
 #include <sys/modctl.h>
 
 /*
  * Null operations; used for uninitialized and "misc" modules.
  */
 static int mod_null(struct modlmisc *, struct modlinkage *);
 static int mod_infonull(void *, struct modlinkage *, int *);
 
 /*
  * Cryptographic Modules
  */
 struct mod_ops mod_cryptoops = {
 	.modm_install = mod_null,
 	.modm_remove = mod_null,
 	.modm_info = mod_infonull
 };
 
 /*
  * Null operation; return 0.
  */
 static int
 mod_null(struct modlmisc *modl, struct modlinkage *modlp)
 {
+	(void) modl, (void) modlp;
 	return (0);
 }
 
 /*
  * Status for User modules.
  */
 static int
 mod_infonull(void *modl, struct modlinkage *modlp, int *p0)
 {
+	(void) modl, (void) modlp;
 	*p0 = -1;		/* for modinfo display */
 	return (0);
 }
 
 /*
  * Install a module.
  * (This routine is in the Solaris SPARC DDI/DKI)
  */
 int
 mod_install(struct modlinkage *modlp)
 {
 	int retval = -1;	/* No linkage structures */
 	struct modlmisc **linkpp;
 	struct modlmisc **linkpp1;
 
 	if (modlp->ml_rev != MODREV_1) {
 		cmn_err(CE_WARN, "mod_install: "
 		    "modlinkage structure is not MODREV_1\n");
 		return (EINVAL);
 	}
 	linkpp = (struct modlmisc **)&modlp->ml_linkage[0];
 
 	while (*linkpp != NULL) {
 		if ((retval = MODL_INSTALL(*linkpp, modlp)) != 0) {
 			linkpp1 = (struct modlmisc **)&modlp->ml_linkage[0];
 
 			while (linkpp1 != linkpp) {
 				MODL_REMOVE(*linkpp1, modlp); /* clean up */
 				linkpp1++;
 			}
 			break;
 		}
 		linkpp++;
 	}
 	return (retval);
 }
 
 static char *reins_err =
 	"Could not reinstall %s\nReboot to correct the problem";
 
 /*
  * Remove a module.  This is called by the module wrapper routine.
  * (This routine is in the Solaris SPARC DDI/DKI)
  */
 int
 mod_remove(struct modlinkage *modlp)
 {
 	int retval = 0;
 	struct modlmisc **linkpp, *last_linkp;
 
 	linkpp = (struct modlmisc **)&modlp->ml_linkage[0];
 
 	while (*linkpp != NULL) {
 		if ((retval = MODL_REMOVE(*linkpp, modlp)) != 0) {
 			last_linkp = *linkpp;
 			linkpp = (struct modlmisc **)&modlp->ml_linkage[0];
 			while (*linkpp != last_linkp) {
 				if (MODL_INSTALL(*linkpp, modlp) != 0) {
 					cmn_err(CE_WARN, reins_err,
 					    (*linkpp)->misc_linkinfo);
 					break;
 				}
 				linkpp++;
 			}
 			break;
 		}
 		linkpp++;
 	}
 	return (retval);
 }
 
 /*
  * Get module status.
  * (This routine is in the Solaris SPARC DDI/DKI)
  */
 int
 mod_info(struct modlinkage *modlp, struct modinfo *modinfop)
 {
 	int i;
 	int retval = 0;
 	struct modspecific_info *msip;
 	struct modlmisc **linkpp;
 
 	modinfop->mi_rev = modlp->ml_rev;
 
 	linkpp = (struct modlmisc **)modlp->ml_linkage;
 	msip = &modinfop->mi_msinfo[0];
 
 	for (i = 0; i < MODMAXLINK; i++) {
 		if (*linkpp == NULL) {
 			msip->msi_linkinfo[0] = '\0';
 		} else {
 			(void) strlcpy(msip->msi_linkinfo,
 			    (*linkpp)->misc_linkinfo, MODMAXLINKINFOLEN);
 			retval = MODL_INFO(*linkpp, modlp, &msip->msi_p0);
 			if (retval != 0)
 				break;
 			linkpp++;
 		}
 		msip++;
 	}
 
 	if (modinfop->mi_info == MI_INFO_LINKAGE) {
 		/*
 		 * Slight kludge used to extract the address of the
 		 * modlinkage structure from the module (just after
 		 * loading a module for the very first time)
 		 */
 		modinfop->mi_base = (void *)modlp;
 	}
 
 	if (retval == 0)
 		return (1);
 	return (0);
 }
diff --git a/module/icp/os/modhash.c b/module/icp/os/modhash.c
index a897871001ce..8bd06973eff1 100644
--- a/module/icp/os/modhash.c
+++ b/module/icp/os/modhash.c
@@ -1,927 +1,927 @@
 /*
  * CDDL HEADER START
  *
  * The contents of this file are subject to the terms of the
  * Common Development and Distribution License (the "License").
  * You may not use this file except in compliance with the License.
  *
  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
  * or http://www.opensolaris.org/os/licensing.
  * See the License for the specific language governing permissions
  * and limitations under the License.
  *
  * When distributing Covered Code, include this CDDL HEADER in each
  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  * If applicable, add the following below this CDDL HEADER, with the
  * fields enclosed by brackets "[]" replaced with your own identifying
  * information: Portions Copyright [yyyy] [name of copyright owner]
  *
  * CDDL HEADER END
  */
 /*
  * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
  * Use is subject to license terms.
  */
 
 /*
  * mod_hash: flexible hash table implementation.
  *
  * This is a reasonably fast, reasonably flexible hash table implementation
  * which features pluggable hash algorithms to support storing arbitrary keys
  * and values.  It is designed to handle small (< 100,000 items) amounts of
  * data.  The hash uses chaining to resolve collisions, and does not feature a
  * mechanism to grow the hash.  Care must be taken to pick nchains to be large
  * enough for the application at hand, or lots of time will be wasted searching
  * hash chains.
  *
  * The client of the hash is required to supply a number of items to support
  * the various hash functions:
  *
  * 	- Destructor functions for the key and value being hashed.
  *	  A destructor is responsible for freeing an object when the hash
  *	  table is no longer storing it.  Since keys and values can be of
  *	  arbitrary type, separate destructors for keys & values are used.
  *	  These may be mod_hash_null_keydtor and mod_hash_null_valdtor if no
  *	  destructor is needed for either a key or value.
  *
  *	- A hashing algorithm which returns a uint_t representing a hash index
  *	  The number returned need _not_ be between 0 and nchains.  The mod_hash
  *	  code will take care of doing that.  The second argument (after the
  *	  key) to the hashing function is a void * that represents
  *	  hash_alg_data-- this is provided so that the hashing algorithm can
  *	  maintain some state across calls, or keep algorithm-specific
  *	  constants associated with the hash table.
  *
  *	  A pointer-hashing and a string-hashing algorithm are supplied in
  *	  this file.
  *
  *	- A key comparator (a la qsort).
  *	  This is used when searching the hash chain.  The key comparator
  *	  determines if two keys match.  It should follow the return value
  *	  semantics of strcmp.
  *
  *	  string and pointer comparators are supplied in this file.
  *
  * mod_hash_create_strhash() and mod_hash_create_ptrhash() provide good
  * examples of how to create a customized hash table.
  *
  * Basic hash operations:
  *
  *   mod_hash_create_strhash(name, nchains, dtor),
  *	create a hash using strings as keys.
  *	NOTE: This create a hash which automatically cleans up the string
  *	      values it is given for keys.
  *
  *   mod_hash_create_ptrhash(name, nchains, dtor, key_elem_size):
  *	create a hash using pointers as keys.
  *
  *   mod_hash_create_extended(name, nchains, kdtor, vdtor,
  *			      hash_alg, hash_alg_data,
  *			      keycmp, sleep)
  *	create a customized hash table.
  *
  *   mod_hash_destroy_hash(hash):
  *	destroy the given hash table, calling the key and value destructors
  *	on each key-value pair stored in the hash.
  *
  *   mod_hash_insert(hash, key, val):
  *	place a key, value pair into the given hash.
  *	duplicate keys are rejected.
  *
  *   mod_hash_insert_reserve(hash, key, val, handle):
  *	place a key, value pair into the given hash, using handle to indicate
  *	the reserved storage for the pair.  (no memory allocation is needed
  *	during a mod_hash_insert_reserve.)  duplicate keys are rejected.
  *
  *   mod_hash_reserve(hash, *handle):
  *      reserve storage for a key-value pair using the memory allocation
  *      policy of 'hash', returning the storage handle in 'handle'.
  *
  *   mod_hash_reserve_nosleep(hash, *handle): reserve storage for a key-value
  *	pair ignoring the memory allocation policy of 'hash' and always without
  *	sleep, returning the storage handle in 'handle'.
  *
  *   mod_hash_remove(hash, key, *val):
  *	remove a key-value pair with key 'key' from 'hash', destroying the
  *	stored key, and returning the value in val.
  *
  *   mod_hash_replace(hash, key, val)
  * 	atomically remove an existing key-value pair from a hash, and replace
  * 	the key and value with the ones supplied.  The removed key and value
  * 	(if any) are destroyed.
  *
  *   mod_hash_destroy(hash, key):
  *	remove a key-value pair with key 'key' from 'hash', destroying both
  *	stored key and stored value.
  *
  *   mod_hash_find(hash, key, val):
  *	find a value in the hash table corresponding to the given key.
  *
  *   mod_hash_find_cb(hash, key, val, found_callback)
  *	find a value in the hash table corresponding to the given key.
  *	If a value is found, call specified callback passing key and val to it.
  *      The callback is called with the hash lock held.
  *	It is intended to be used in situations where the act of locating the
  *	data must also modify it - such as in reference counting schemes.
  *
  *   mod_hash_walk(hash, callback(key, elem, arg), arg)
  * 	walks all the elements in the hashtable and invokes the callback
  * 	function with the key/value pair for each element.  the hashtable
  * 	is locked for readers so the callback function should not attempt
  * 	to do any updates to the hashable.  the callback function should
  * 	return MH_WALK_CONTINUE to continue walking the hashtable or
  * 	MH_WALK_TERMINATE to abort the walk of the hashtable.
  *
  *   mod_hash_clear(hash):
  *	clears the given hash table of entries, calling the key and value
  *	destructors for every element in the hash.
  */
 
 #include <sys/zfs_context.h>
 #include <sys/bitmap.h>
 #include <sys/modhash_impl.h>
 #include <sys/sysmacros.h>
 
 /*
  * MH_KEY_DESTROY()
  * 	Invoke the key destructor.
  */
 #define	MH_KEY_DESTROY(hash, key) ((hash->mh_kdtor)(key))
 
 /*
  * MH_VAL_DESTROY()
  * 	Invoke the value destructor.
  */
 #define	MH_VAL_DESTROY(hash, val) ((hash->mh_vdtor)(val))
 
 /*
  * MH_KEYCMP()
  * 	Call the key comparator for the given hash keys.
  */
 #define	MH_KEYCMP(hash, key1, key2) ((hash->mh_keycmp)(key1, key2))
 
 /*
  * Cache for struct mod_hash_entry
  */
 kmem_cache_t *mh_e_cache = NULL;
 mod_hash_t *mh_head = NULL;
 kmutex_t mh_head_lock;
 
 /*
  * mod_hash_null_keydtor()
  * mod_hash_null_valdtor()
  * 	no-op key and value destructors.
  */
-/*ARGSUSED*/
 void
 mod_hash_null_keydtor(mod_hash_key_t key)
 {
+	(void) key;
 }
 
-/*ARGSUSED*/
 void
 mod_hash_null_valdtor(mod_hash_val_t val)
 {
+	(void) val;
 }
 
 /*
  * mod_hash_bystr()
  * mod_hash_strkey_cmp()
  * mod_hash_strkey_dtor()
  * mod_hash_strval_dtor()
  *	Hash and key comparison routines for hashes with string keys.
  *
  * mod_hash_create_strhash()
  * 	Create a hash using strings as keys
  *
  *	The string hashing algorithm is from the "Dragon Book" --
  *	"Compilers: Principles, Tools & Techniques", by Aho, Sethi, Ullman
  */
 
-/*ARGSUSED*/
 uint_t
 mod_hash_bystr(void *hash_data, mod_hash_key_t key)
 {
+	(void) hash_data;
 	uint_t hash = 0;
 	uint_t g;
 	char *p, *k = (char *)key;
 
 	ASSERT(k);
 	for (p = k; *p != '\0'; p++) {
 		hash = (hash << 4) + *p;
 		if ((g = (hash & 0xf0000000)) != 0) {
 			hash ^= (g >> 24);
 			hash ^= g;
 		}
 	}
 	return (hash);
 }
 
 int
 mod_hash_strkey_cmp(mod_hash_key_t key1, mod_hash_key_t key2)
 {
 	return (strcmp((char *)key1, (char *)key2));
 }
 
 void
 mod_hash_strkey_dtor(mod_hash_key_t key)
 {
 	char *c = (char *)key;
 	kmem_free(c, strlen(c) + 1);
 }
 
 void
 mod_hash_strval_dtor(mod_hash_val_t val)
 {
 	char *c = (char *)val;
 	kmem_free(c, strlen(c) + 1);
 }
 
 mod_hash_t *
 mod_hash_create_strhash_nodtr(char *name, size_t nchains,
     void (*val_dtor)(mod_hash_val_t))
 {
 	return mod_hash_create_extended(name, nchains, mod_hash_null_keydtor,
 	    val_dtor, mod_hash_bystr, NULL, mod_hash_strkey_cmp, KM_SLEEP);
 }
 
 mod_hash_t *
 mod_hash_create_strhash(char *name, size_t nchains,
     void (*val_dtor)(mod_hash_val_t))
 {
 	return mod_hash_create_extended(name, nchains, mod_hash_strkey_dtor,
 	    val_dtor, mod_hash_bystr, NULL, mod_hash_strkey_cmp, KM_SLEEP);
 }
 
 void
 mod_hash_destroy_strhash(mod_hash_t *strhash)
 {
 	ASSERT(strhash);
 	mod_hash_destroy_hash(strhash);
 }
 
 
 /*
  * mod_hash_byptr()
  * mod_hash_ptrkey_cmp()
  *	Hash and key comparison routines for hashes with pointer keys.
  *
  * mod_hash_create_ptrhash()
  * mod_hash_destroy_ptrhash()
  * 	Create a hash that uses pointers as keys.  This hash algorithm
  * 	picks an appropriate set of middle bits in the address to hash on
  * 	based on the size of the hash table and a hint about the size of
  * 	the items pointed at.
  */
 uint_t
 mod_hash_byptr(void *hash_data, mod_hash_key_t key)
 {
 	uintptr_t k = (uintptr_t)key;
 	k >>= (int)(uintptr_t)hash_data;
 
 	return ((uint_t)k);
 }
 
 int
 mod_hash_ptrkey_cmp(mod_hash_key_t key1, mod_hash_key_t key2)
 {
 	uintptr_t k1 = (uintptr_t)key1;
 	uintptr_t k2 = (uintptr_t)key2;
 	if (k1 > k2)
 		return (-1);
 	else if (k1 < k2)
 		return (1);
 	else
 		return (0);
 }
 
 mod_hash_t *
 mod_hash_create_ptrhash(char *name, size_t nchains,
     void (*val_dtor)(mod_hash_val_t), size_t key_elem_size)
 {
 	size_t rshift;
 
 	/*
 	 * We want to hash on the bits in the middle of the address word
 	 * Bits far to the right in the word have little significance, and
 	 * are likely to all look the same (for example, an array of
 	 * 256-byte structures will have the bottom 8 bits of address
 	 * words the same).  So we want to right-shift each address to
 	 * ignore the bottom bits.
 	 *
 	 * The high bits, which are also unused, will get taken out when
 	 * mod_hash takes hashkey % nchains.
 	 */
 	rshift = highbit64(key_elem_size);
 
 	return mod_hash_create_extended(name, nchains, mod_hash_null_keydtor,
 	    val_dtor, mod_hash_byptr, (void *)rshift, mod_hash_ptrkey_cmp,
 	    KM_SLEEP);
 }
 
 void
 mod_hash_destroy_ptrhash(mod_hash_t *hash)
 {
 	ASSERT(hash);
 	mod_hash_destroy_hash(hash);
 }
 
 /*
  * mod_hash_byid()
  * mod_hash_idkey_cmp()
  *	Hash and key comparison routines for hashes with 32-bit unsigned keys.
  *
  * mod_hash_create_idhash()
  * mod_hash_destroy_idhash()
  * mod_hash_iddata_gen()
  * 	Create a hash that uses numeric keys.
  *
  *	The hash algorithm is documented in "Introduction to Algorithms"
  *	(Cormen, Leiserson, Rivest);  when the hash table is created, it
  *	attempts to find the next largest prime above the number of hash
  *	slots.  The hash index is then this number times the key modulo
  *	the hash size, or (key * prime) % nchains.
  */
 uint_t
 mod_hash_byid(void *hash_data, mod_hash_key_t key)
 {
 	uint_t kval = (uint_t)(uintptr_t)hash_data;
 	return ((uint_t)(uintptr_t)key * (uint_t)kval);
 }
 
 int
 mod_hash_idkey_cmp(mod_hash_key_t key1, mod_hash_key_t key2)
 {
 	return ((uint_t)(uintptr_t)key1 - (uint_t)(uintptr_t)key2);
 }
 
 /*
  * Generate the next largest prime number greater than nchains; this value
  * is intended to be later passed in to mod_hash_create_extended() as the
  * hash_data.
  */
 uint_t
 mod_hash_iddata_gen(size_t nchains)
 {
 	uint_t kval, i, prime;
 
 	/*
 	 * Pick the first (odd) prime greater than nchains.  Make sure kval is
 	 * odd (so start with nchains +1 or +2 as appropriate).
 	 */
 	kval = (nchains % 2 == 0) ? nchains + 1 : nchains + 2;
 
 	for (;;) {
 		prime = 1;
 		for (i = 3; i * i <= kval; i += 2) {
 			if (kval % i == 0)
 				prime = 0;
 		}
 		if (prime == 1)
 			break;
 		kval += 2;
 	}
 	return (kval);
 }
 
 mod_hash_t *
 mod_hash_create_idhash(char *name, size_t nchains,
     void (*val_dtor)(mod_hash_val_t))
 {
 	uint_t kval = mod_hash_iddata_gen(nchains);
 
 	return (mod_hash_create_extended(name, nchains, mod_hash_null_keydtor,
 	    val_dtor, mod_hash_byid, (void *)(uintptr_t)kval,
 	    mod_hash_idkey_cmp, KM_SLEEP));
 }
 
 void
 mod_hash_destroy_idhash(mod_hash_t *hash)
 {
 	ASSERT(hash);
 	mod_hash_destroy_hash(hash);
 }
 
 void
 mod_hash_fini(void)
 {
 	mutex_destroy(&mh_head_lock);
 
 	if (mh_e_cache) {
 		kmem_cache_destroy(mh_e_cache);
 		mh_e_cache = NULL;
 	}
 }
 
 /*
  * mod_hash_init()
  * 	sets up globals, etc for mod_hash_*
  */
 void
 mod_hash_init(void)
 {
 	ASSERT(mh_e_cache == NULL);
 	mh_e_cache = kmem_cache_create("mod_hash_entries",
 	    sizeof (struct mod_hash_entry), 0, NULL, NULL, NULL, NULL,
 	    NULL, 0);
 
 	mutex_init(&mh_head_lock, NULL, MUTEX_DEFAULT, NULL);
 }
 
 /*
  * mod_hash_create_extended()
  * 	The full-blown hash creation function.
  *
  * notes:
  * 	nchains		- how many hash slots to create.  More hash slots will
  *			  result in shorter hash chains, but will consume
  *			  slightly more memory up front.
  *	sleep		- should be KM_SLEEP or KM_NOSLEEP, to indicate whether
  *			  to sleep for memory, or fail in low-memory conditions.
  *
  * 	Fails only if KM_NOSLEEP was specified, and no memory was available.
  */
 mod_hash_t *
 mod_hash_create_extended(
     char *hname,			/* descriptive name for hash */
     size_t nchains,			/* number of hash slots */
     void (*kdtor)(mod_hash_key_t),	/* key destructor */
     void (*vdtor)(mod_hash_val_t),	/* value destructor */
     uint_t (*hash_alg)(void *, mod_hash_key_t), /* hash algorithm */
     void *hash_alg_data,		/* pass-thru arg for hash_alg */
     int (*keycmp)(mod_hash_key_t, mod_hash_key_t), /* key comparator */
     int sleep)				/* whether to sleep for mem */
 {
 	mod_hash_t *mod_hash;
 	size_t size;
 	ASSERT(hname && keycmp && hash_alg && vdtor && kdtor);
 
 	if ((mod_hash = kmem_zalloc(MH_SIZE(nchains), sleep)) == NULL)
 		return (NULL);
 
 	size = strlen(hname) + 1;
 	mod_hash->mh_name = kmem_alloc(size, sleep);
 	if (mod_hash->mh_name == NULL) {
 		kmem_free(mod_hash, MH_SIZE(nchains));
 		return (NULL);
 	}
 	(void) strlcpy(mod_hash->mh_name, hname, size);
 
 	rw_init(&mod_hash->mh_contents, NULL, RW_DEFAULT, NULL);
 	mod_hash->mh_sleep = sleep;
 	mod_hash->mh_nchains = nchains;
 	mod_hash->mh_kdtor = kdtor;
 	mod_hash->mh_vdtor = vdtor;
 	mod_hash->mh_hashalg = hash_alg;
 	mod_hash->mh_hashalg_data = hash_alg_data;
 	mod_hash->mh_keycmp = keycmp;
 
 	/*
 	 * Link the hash up on the list of hashes
 	 */
 	mutex_enter(&mh_head_lock);
 	mod_hash->mh_next = mh_head;
 	mh_head = mod_hash;
 	mutex_exit(&mh_head_lock);
 
 	return (mod_hash);
 }
 
 /*
  * mod_hash_destroy_hash()
  * 	destroy a hash table, destroying all of its stored keys and values
  * 	as well.
  */
 void
 mod_hash_destroy_hash(mod_hash_t *hash)
 {
 	mod_hash_t *mhp, *mhpp;
 
 	mutex_enter(&mh_head_lock);
 	/*
 	 * Remove the hash from the hash list
 	 */
 	if (hash == mh_head) {		/* removing 1st list elem */
 		mh_head = mh_head->mh_next;
 	} else {
 		/*
 		 * mhpp can start out NULL since we know the 1st elem isn't the
 		 * droid we're looking for.
 		 */
 		mhpp = NULL;
 		for (mhp = mh_head; mhp != NULL; mhp = mhp->mh_next) {
 			if (mhp == hash) {
 				mhpp->mh_next = mhp->mh_next;
 				break;
 			}
 			mhpp = mhp;
 		}
 	}
 	mutex_exit(&mh_head_lock);
 
 	/*
 	 * Clean out keys and values.
 	 */
 	mod_hash_clear(hash);
 
 	rw_destroy(&hash->mh_contents);
 	kmem_free(hash->mh_name, strlen(hash->mh_name) + 1);
 	kmem_free(hash, MH_SIZE(hash->mh_nchains));
 }
 
 /*
  * i_mod_hash()
  * 	Call the hashing algorithm for this hash table, with the given key.
  */
 uint_t
 i_mod_hash(mod_hash_t *hash, mod_hash_key_t key)
 {
 	uint_t h;
 	/*
 	 * Prevent div by 0 problems;
 	 * Also a nice shortcut when using a hash as a list
 	 */
 	if (hash->mh_nchains == 1)
 		return (0);
 
 	h = (hash->mh_hashalg)(hash->mh_hashalg_data, key);
 	return (h % (hash->mh_nchains - 1));
 }
 
 /*
  * i_mod_hash_insert_nosync()
  * mod_hash_insert()
  * mod_hash_insert_reserve()
  * 	insert 'val' into the hash table, using 'key' as its key.  If 'key' is
  * 	already a key in the hash, an error will be returned, and the key-val
  * 	pair will not be inserted.  i_mod_hash_insert_nosync() supports a simple
  * 	handle abstraction, allowing hash entry allocation to be separated from
  * 	the hash insertion.  this abstraction allows simple use of the mod_hash
  * 	structure in situations where mod_hash_insert() with a KM_SLEEP
  * 	allocation policy would otherwise be unsafe.
  */
 int
 i_mod_hash_insert_nosync(mod_hash_t *hash, mod_hash_key_t key,
     mod_hash_val_t val, mod_hash_hndl_t handle)
 {
 	uint_t hashidx;
 	struct mod_hash_entry *entry;
 
 	ASSERT(hash);
 
 	/*
 	 * If we've not been given reserved storage, allocate storage directly,
 	 * using the hash's allocation policy.
 	 */
 	if (handle == (mod_hash_hndl_t)0) {
 		entry = kmem_cache_alloc(mh_e_cache, hash->mh_sleep);
 		if (entry == NULL) {
 			hash->mh_stat.mhs_nomem++;
 			return (MH_ERR_NOMEM);
 		}
 	} else {
 		entry = (struct mod_hash_entry *)handle;
 	}
 
 	hashidx = i_mod_hash(hash, key);
 	entry->mhe_key = key;
 	entry->mhe_val = val;
 	entry->mhe_next = hash->mh_entries[hashidx];
 
 	hash->mh_entries[hashidx] = entry;
 	hash->mh_stat.mhs_nelems++;
 
 	return (0);
 }
 
 int
 mod_hash_insert(mod_hash_t *hash, mod_hash_key_t key, mod_hash_val_t val)
 {
 	int res;
 	mod_hash_val_t v;
 
 	rw_enter(&hash->mh_contents, RW_WRITER);
 
 	/*
 	 * Disallow duplicate keys in the hash
 	 */
 	if (i_mod_hash_find_nosync(hash, key, &v) == 0) {
 		rw_exit(&hash->mh_contents);
 		hash->mh_stat.mhs_coll++;
 		return (MH_ERR_DUPLICATE);
 	}
 
 	res = i_mod_hash_insert_nosync(hash, key, val, (mod_hash_hndl_t)0);
 	rw_exit(&hash->mh_contents);
 
 	return (res);
 }
 
 int
 mod_hash_insert_reserve(mod_hash_t *hash, mod_hash_key_t key,
     mod_hash_val_t val, mod_hash_hndl_t handle)
 {
 	int res;
 	mod_hash_val_t v;
 
 	rw_enter(&hash->mh_contents, RW_WRITER);
 
 	/*
 	 * Disallow duplicate keys in the hash
 	 */
 	if (i_mod_hash_find_nosync(hash, key, &v) == 0) {
 		rw_exit(&hash->mh_contents);
 		hash->mh_stat.mhs_coll++;
 		return (MH_ERR_DUPLICATE);
 	}
 	res = i_mod_hash_insert_nosync(hash, key, val, handle);
 	rw_exit(&hash->mh_contents);
 
 	return (res);
 }
 
 /*
  * mod_hash_reserve()
  * mod_hash_reserve_nosleep()
  * mod_hash_cancel()
  *   Make or cancel a mod_hash_entry_t reservation.  Reservations are used in
  *   mod_hash_insert_reserve() above.
  */
 int
 mod_hash_reserve(mod_hash_t *hash, mod_hash_hndl_t *handlep)
 {
 	*handlep = kmem_cache_alloc(mh_e_cache, hash->mh_sleep);
 	if (*handlep == NULL) {
 		hash->mh_stat.mhs_nomem++;
 		return (MH_ERR_NOMEM);
 	}
 
 	return (0);
 }
 
 int
 mod_hash_reserve_nosleep(mod_hash_t *hash, mod_hash_hndl_t *handlep)
 {
 	*handlep = kmem_cache_alloc(mh_e_cache, KM_NOSLEEP);
 	if (*handlep == NULL) {
 		hash->mh_stat.mhs_nomem++;
 		return (MH_ERR_NOMEM);
 	}
 
 	return (0);
 
 }
 
-/*ARGSUSED*/
 void
 mod_hash_cancel(mod_hash_t *hash, mod_hash_hndl_t *handlep)
 {
+	(void) hash;
 	kmem_cache_free(mh_e_cache, *handlep);
 	*handlep = (mod_hash_hndl_t)0;
 }
 
 /*
  * i_mod_hash_remove_nosync()
  * mod_hash_remove()
  * 	Remove an element from the hash table.
  */
 int
 i_mod_hash_remove_nosync(mod_hash_t *hash, mod_hash_key_t key,
     mod_hash_val_t *val)
 {
 	int hashidx;
 	struct mod_hash_entry *e, *ep;
 
 	hashidx = i_mod_hash(hash, key);
 	ep = NULL; /* e's parent */
 
 	for (e = hash->mh_entries[hashidx]; e != NULL; e = e->mhe_next) {
 		if (MH_KEYCMP(hash, e->mhe_key, key) == 0)
 			break;
 		ep = e;
 	}
 
 	if (e == NULL) {	/* not found */
 		return (MH_ERR_NOTFOUND);
 	}
 
 	if (ep == NULL) 	/* special case 1st element in bucket */
 		hash->mh_entries[hashidx] = e->mhe_next;
 	else
 		ep->mhe_next = e->mhe_next;
 
 	/*
 	 * Clean up resources used by the node's key.
 	 */
 	MH_KEY_DESTROY(hash, e->mhe_key);
 
 	*val = e->mhe_val;
 	kmem_cache_free(mh_e_cache, e);
 	hash->mh_stat.mhs_nelems--;
 
 	return (0);
 }
 
 int
 mod_hash_remove(mod_hash_t *hash, mod_hash_key_t key, mod_hash_val_t *val)
 {
 	int res;
 
 	rw_enter(&hash->mh_contents, RW_WRITER);
 	res = i_mod_hash_remove_nosync(hash, key, val);
 	rw_exit(&hash->mh_contents);
 
 	return (res);
 }
 
 /*
  * mod_hash_replace()
  * 	atomically remove an existing key-value pair from a hash, and replace
  * 	the key and value with the ones supplied.  The removed key and value
  * 	(if any) are destroyed.
  */
 int
 mod_hash_replace(mod_hash_t *hash, mod_hash_key_t key, mod_hash_val_t val)
 {
 	int res;
 	mod_hash_val_t v;
 
 	rw_enter(&hash->mh_contents, RW_WRITER);
 
 	if (i_mod_hash_remove_nosync(hash, key, &v) == 0) {
 		/*
 		 * mod_hash_remove() takes care of freeing up the key resources.
 		 */
 		MH_VAL_DESTROY(hash, v);
 	}
 	res = i_mod_hash_insert_nosync(hash, key, val, (mod_hash_hndl_t)0);
 
 	rw_exit(&hash->mh_contents);
 
 	return (res);
 }
 
 /*
  * mod_hash_destroy()
  * 	Remove an element from the hash table matching 'key', and destroy it.
  */
 int
 mod_hash_destroy(mod_hash_t *hash, mod_hash_key_t key)
 {
 	mod_hash_val_t val;
 	int rv;
 
 	rw_enter(&hash->mh_contents, RW_WRITER);
 
 	if ((rv = i_mod_hash_remove_nosync(hash, key, &val)) == 0) {
 		/*
 		 * mod_hash_remove() takes care of freeing up the key resources.
 		 */
 		MH_VAL_DESTROY(hash, val);
 	}
 
 	rw_exit(&hash->mh_contents);
 	return (rv);
 }
 
 /*
  * i_mod_hash_find_nosync()
  * mod_hash_find()
  * 	Find a value in the hash table corresponding to the given key.
  */
 int
 i_mod_hash_find_nosync(mod_hash_t *hash, mod_hash_key_t key,
     mod_hash_val_t *val)
 {
 	uint_t hashidx;
 	struct mod_hash_entry *e;
 
 	hashidx = i_mod_hash(hash, key);
 
 	for (e = hash->mh_entries[hashidx]; e != NULL; e = e->mhe_next) {
 		if (MH_KEYCMP(hash, e->mhe_key, key) == 0) {
 			*val = e->mhe_val;
 			hash->mh_stat.mhs_hit++;
 			return (0);
 		}
 	}
 	hash->mh_stat.mhs_miss++;
 	return (MH_ERR_NOTFOUND);
 }
 
 int
 mod_hash_find(mod_hash_t *hash, mod_hash_key_t key, mod_hash_val_t *val)
 {
 	int res;
 
 	rw_enter(&hash->mh_contents, RW_READER);
 	res = i_mod_hash_find_nosync(hash, key, val);
 	rw_exit(&hash->mh_contents);
 
 	return (res);
 }
 
 int
 mod_hash_find_cb(mod_hash_t *hash, mod_hash_key_t key, mod_hash_val_t *val,
     void (*find_cb)(mod_hash_key_t, mod_hash_val_t))
 {
 	int res;
 
 	rw_enter(&hash->mh_contents, RW_READER);
 	res = i_mod_hash_find_nosync(hash, key, val);
 	if (res == 0) {
 		find_cb(key, *val);
 	}
 	rw_exit(&hash->mh_contents);
 
 	return (res);
 }
 
 int
 mod_hash_find_cb_rval(mod_hash_t *hash, mod_hash_key_t key, mod_hash_val_t *val,
     int (*find_cb)(mod_hash_key_t, mod_hash_val_t), int *cb_rval)
 {
 	int res;
 
 	rw_enter(&hash->mh_contents, RW_READER);
 	res = i_mod_hash_find_nosync(hash, key, val);
 	if (res == 0) {
 		*cb_rval = find_cb(key, *val);
 	}
 	rw_exit(&hash->mh_contents);
 
 	return (res);
 }
 
 void
 i_mod_hash_walk_nosync(mod_hash_t *hash,
     uint_t (*callback)(mod_hash_key_t, mod_hash_val_t *, void *), void *arg)
 {
 	struct mod_hash_entry	*e;
 	uint_t			hashidx;
 	int			res = MH_WALK_CONTINUE;
 
 	for (hashidx = 0;
 	    (hashidx < (hash->mh_nchains - 1)) && (res == MH_WALK_CONTINUE);
 	    hashidx++) {
 		e = hash->mh_entries[hashidx];
 		while ((e != NULL) && (res == MH_WALK_CONTINUE)) {
 			res = callback(e->mhe_key, e->mhe_val, arg);
 			e = e->mhe_next;
 		}
 	}
 }
 
 /*
  * mod_hash_walk()
  * 	Walks all the elements in the hashtable and invokes the callback
  * 	function with the key/value pair for each element.  The hashtable
  * 	is locked for readers so the callback function should not attempt
  * 	to do any updates to the hashable.  The callback function should
  * 	return MH_WALK_CONTINUE to continue walking the hashtable or
  * 	MH_WALK_TERMINATE to abort the walk of the hashtable.
  */
 void
 mod_hash_walk(mod_hash_t *hash,
     uint_t (*callback)(mod_hash_key_t, mod_hash_val_t *, void *), void *arg)
 {
 	rw_enter(&hash->mh_contents, RW_READER);
 	i_mod_hash_walk_nosync(hash, callback, arg);
 	rw_exit(&hash->mh_contents);
 }
 
 
 /*
  * i_mod_hash_clear_nosync()
  * mod_hash_clear()
  *	Clears the given hash table by calling the destructor of every hash
  *	element and freeing up all mod_hash_entry's.
  */
 void
 i_mod_hash_clear_nosync(mod_hash_t *hash)
 {
 	int i;
 	struct mod_hash_entry *e, *old_e;
 
 	for (i = 0; i < hash->mh_nchains; i++) {
 		e = hash->mh_entries[i];
 		while (e != NULL) {
 			MH_KEY_DESTROY(hash, e->mhe_key);
 			MH_VAL_DESTROY(hash, e->mhe_val);
 			old_e = e;
 			e = e->mhe_next;
 			kmem_cache_free(mh_e_cache, old_e);
 		}
 		hash->mh_entries[i] = NULL;
 	}
 	hash->mh_stat.mhs_nelems = 0;
 }
 
 void
 mod_hash_clear(mod_hash_t *hash)
 {
 	ASSERT(hash);
 	rw_enter(&hash->mh_contents, RW_WRITER);
 	i_mod_hash_clear_nosync(hash);
 	rw_exit(&hash->mh_contents);
 }