diff --git a/module/icp/algs/aes/aes_impl.c b/module/icp/algs/aes/aes_impl.c index 037be0db60d7..a5b88b8aab25 100644 --- a/module/icp/algs/aes/aes_impl.c +++ b/module/icp/algs/aes/aes_impl.c @@ -1,443 +1,442 @@ /* * CDDL HEADER START * * The contents of this file are subject to the terms of the * Common Development and Distribution License (the "License"). * You may not use this file except in compliance with the License. * * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE * or http://www.opensolaris.org/os/licensing. * See the License for the specific language governing permissions * and limitations under the License. * * When distributing Covered Code, include this CDDL HEADER in each * file and include the License file at usr/src/OPENSOLARIS.LICENSE. * If applicable, add the following below this CDDL HEADER, with the * fields enclosed by brackets "[]" replaced with your own identifying * information: Portions Copyright [yyyy] [name of copyright owner] * * CDDL HEADER END */ /* * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved. */ #include #include #include #include #include #include /* * Initialize AES encryption and decryption key schedules. * * Parameters: * cipherKey User key * keyBits AES key size (128, 192, or 256 bits) * keysched AES key schedule to be initialized, of type aes_key_t. * Allocated by aes_alloc_keysched(). */ void aes_init_keysched(const uint8_t *cipherKey, uint_t keyBits, void *keysched) { const aes_impl_ops_t *ops = aes_impl_get_ops(); aes_key_t *newbie = keysched; uint_t keysize, i, j; union { uint64_t ka64[4]; uint32_t ka32[8]; } keyarr; switch (keyBits) { case 128: newbie->nr = 10; break; case 192: newbie->nr = 12; break; case 256: newbie->nr = 14; break; default: /* should never get here */ return; } keysize = CRYPTO_BITS2BYTES(keyBits); /* * Generic C implementation requires byteswap for little endian * machines, various accelerated implementations for various * architectures may not. */ if (!ops->needs_byteswap) { /* no byteswap needed */ if (IS_P2ALIGNED(cipherKey, sizeof (uint64_t))) { for (i = 0, j = 0; j < keysize; i++, j += 8) { /* LINTED: pointer alignment */ keyarr.ka64[i] = *((uint64_t *)&cipherKey[j]); } } else { bcopy(cipherKey, keyarr.ka32, keysize); } } else { /* byte swap */ for (i = 0, j = 0; j < keysize; i++, j += 4) { keyarr.ka32[i] = htonl(*(uint32_t *)(void *)&cipherKey[j]); } } ops->generate(newbie, keyarr.ka32, keyBits); newbie->ops = ops; /* * Note: if there are systems that need the AES_64BIT_KS type in the * future, move setting key schedule type to individual implementations */ newbie->type = AES_32BIT_KS; } /* * Encrypt one block using AES. * Align if needed and (for x86 32-bit only) byte-swap. * * Parameters: * ks Key schedule, of type aes_key_t * pt Input block (plain text) * ct Output block (crypto text). Can overlap with pt */ int aes_encrypt_block(const void *ks, const uint8_t *pt, uint8_t *ct) { aes_key_t *ksch = (aes_key_t *)ks; const aes_impl_ops_t *ops = ksch->ops; if (IS_P2ALIGNED2(pt, ct, sizeof (uint32_t)) && !ops->needs_byteswap) { /* LINTED: pointer alignment */ ops->encrypt(&ksch->encr_ks.ks32[0], ksch->nr, /* LINTED: pointer alignment */ (uint32_t *)pt, (uint32_t *)ct); } else { uint32_t buffer[AES_BLOCK_LEN / sizeof (uint32_t)]; /* Copy input block into buffer */ if (ops->needs_byteswap) { buffer[0] = htonl(*(uint32_t *)(void *)&pt[0]); buffer[1] = htonl(*(uint32_t *)(void *)&pt[4]); buffer[2] = htonl(*(uint32_t *)(void *)&pt[8]); buffer[3] = htonl(*(uint32_t *)(void *)&pt[12]); } else bcopy(pt, &buffer, AES_BLOCK_LEN); ops->encrypt(&ksch->encr_ks.ks32[0], ksch->nr, buffer, buffer); /* Copy result from buffer to output block */ if (ops->needs_byteswap) { *(uint32_t *)(void *)&ct[0] = htonl(buffer[0]); *(uint32_t *)(void *)&ct[4] = htonl(buffer[1]); *(uint32_t *)(void *)&ct[8] = htonl(buffer[2]); *(uint32_t *)(void *)&ct[12] = htonl(buffer[3]); } else bcopy(&buffer, ct, AES_BLOCK_LEN); } return (CRYPTO_SUCCESS); } /* * Decrypt one block using AES. * Align and byte-swap if needed. * * Parameters: * ks Key schedule, of type aes_key_t * ct Input block (crypto text) * pt Output block (plain text). Can overlap with pt */ int aes_decrypt_block(const void *ks, const uint8_t *ct, uint8_t *pt) { aes_key_t *ksch = (aes_key_t *)ks; const aes_impl_ops_t *ops = ksch->ops; if (IS_P2ALIGNED2(ct, pt, sizeof (uint32_t)) && !ops->needs_byteswap) { /* LINTED: pointer alignment */ ops->decrypt(&ksch->decr_ks.ks32[0], ksch->nr, /* LINTED: pointer alignment */ (uint32_t *)ct, (uint32_t *)pt); } else { uint32_t buffer[AES_BLOCK_LEN / sizeof (uint32_t)]; /* Copy input block into buffer */ if (ops->needs_byteswap) { buffer[0] = htonl(*(uint32_t *)(void *)&ct[0]); buffer[1] = htonl(*(uint32_t *)(void *)&ct[4]); buffer[2] = htonl(*(uint32_t *)(void *)&ct[8]); buffer[3] = htonl(*(uint32_t *)(void *)&ct[12]); } else bcopy(ct, &buffer, AES_BLOCK_LEN); ops->decrypt(&ksch->decr_ks.ks32[0], ksch->nr, buffer, buffer); /* Copy result from buffer to output block */ if (ops->needs_byteswap) { *(uint32_t *)(void *)&pt[0] = htonl(buffer[0]); *(uint32_t *)(void *)&pt[4] = htonl(buffer[1]); *(uint32_t *)(void *)&pt[8] = htonl(buffer[2]); *(uint32_t *)(void *)&pt[12] = htonl(buffer[3]); } else bcopy(&buffer, pt, AES_BLOCK_LEN); } return (CRYPTO_SUCCESS); } /* * Allocate key schedule for AES. * * Return the pointer and set size to the number of bytes allocated. * Memory allocated must be freed by the caller when done. * * Parameters: * size Size of key schedule allocated, in bytes * kmflag Flag passed to kmem_alloc(9F); ignored in userland. */ -/* ARGSUSED */ void * aes_alloc_keysched(size_t *size, int kmflag) { aes_key_t *keysched; keysched = (aes_key_t *)kmem_alloc(sizeof (aes_key_t), kmflag); if (keysched != NULL) { *size = sizeof (aes_key_t); return (keysched); } return (NULL); } /* AES implementation that contains the fastest methods */ static aes_impl_ops_t aes_fastest_impl = { .name = "fastest" }; /* All compiled in implementations */ const aes_impl_ops_t *aes_all_impl[] = { &aes_generic_impl, #if defined(__x86_64) &aes_x86_64_impl, #endif #if defined(__x86_64) && defined(HAVE_AES) &aes_aesni_impl, #endif }; /* Indicate that benchmark has been completed */ static boolean_t aes_impl_initialized = B_FALSE; /* Select aes implementation */ #define IMPL_FASTEST (UINT32_MAX) #define IMPL_CYCLE (UINT32_MAX-1) #define AES_IMPL_READ(i) (*(volatile uint32_t *) &(i)) static uint32_t icp_aes_impl = IMPL_FASTEST; static uint32_t user_sel_impl = IMPL_FASTEST; /* Hold all supported implementations */ static size_t aes_supp_impl_cnt = 0; static aes_impl_ops_t *aes_supp_impl[ARRAY_SIZE(aes_all_impl)]; /* * Returns the AES operations for encrypt/decrypt/key setup. When a * SIMD implementation is not allowed in the current context, then * fallback to the fastest generic implementation. */ const aes_impl_ops_t * aes_impl_get_ops(void) { if (!kfpu_allowed()) return (&aes_generic_impl); const aes_impl_ops_t *ops = NULL; const uint32_t impl = AES_IMPL_READ(icp_aes_impl); switch (impl) { case IMPL_FASTEST: ASSERT(aes_impl_initialized); ops = &aes_fastest_impl; break; case IMPL_CYCLE: /* Cycle through supported implementations */ ASSERT(aes_impl_initialized); ASSERT3U(aes_supp_impl_cnt, >, 0); static size_t cycle_impl_idx = 0; size_t idx = (++cycle_impl_idx) % aes_supp_impl_cnt; ops = aes_supp_impl[idx]; break; default: ASSERT3U(impl, <, aes_supp_impl_cnt); ASSERT3U(aes_supp_impl_cnt, >, 0); if (impl < ARRAY_SIZE(aes_all_impl)) ops = aes_supp_impl[impl]; break; } ASSERT3P(ops, !=, NULL); return (ops); } /* * Initialize all supported implementations. */ void aes_impl_init(void) { aes_impl_ops_t *curr_impl; int i, c; /* Move supported implementations into aes_supp_impls */ for (i = 0, c = 0; i < ARRAY_SIZE(aes_all_impl); i++) { curr_impl = (aes_impl_ops_t *)aes_all_impl[i]; if (curr_impl->is_supported()) aes_supp_impl[c++] = (aes_impl_ops_t *)curr_impl; } aes_supp_impl_cnt = c; /* * Set the fastest implementation given the assumption that the * hardware accelerated version is the fastest. */ #if defined(__x86_64) #if defined(HAVE_AES) if (aes_aesni_impl.is_supported()) { memcpy(&aes_fastest_impl, &aes_aesni_impl, sizeof (aes_fastest_impl)); } else #endif { memcpy(&aes_fastest_impl, &aes_x86_64_impl, sizeof (aes_fastest_impl)); } #else memcpy(&aes_fastest_impl, &aes_generic_impl, sizeof (aes_fastest_impl)); #endif strlcpy(aes_fastest_impl.name, "fastest", AES_IMPL_NAME_MAX); /* Finish initialization */ atomic_swap_32(&icp_aes_impl, user_sel_impl); aes_impl_initialized = B_TRUE; } static const struct { char *name; uint32_t sel; } aes_impl_opts[] = { { "cycle", IMPL_CYCLE }, { "fastest", IMPL_FASTEST }, }; /* * Function sets desired aes implementation. * * If we are called before init(), user preference will be saved in * user_sel_impl, and applied in later init() call. This occurs when module * parameter is specified on module load. Otherwise, directly update * icp_aes_impl. * * @val Name of aes implementation to use * @param Unused. */ int aes_impl_set(const char *val) { int err = -EINVAL; char req_name[AES_IMPL_NAME_MAX]; uint32_t impl = AES_IMPL_READ(user_sel_impl); size_t i; /* sanitize input */ i = strnlen(val, AES_IMPL_NAME_MAX); if (i == 0 || i >= AES_IMPL_NAME_MAX) return (err); strlcpy(req_name, val, AES_IMPL_NAME_MAX); while (i > 0 && isspace(req_name[i-1])) i--; req_name[i] = '\0'; /* Check mandatory options */ for (i = 0; i < ARRAY_SIZE(aes_impl_opts); i++) { if (strcmp(req_name, aes_impl_opts[i].name) == 0) { impl = aes_impl_opts[i].sel; err = 0; break; } } /* check all supported impl if init() was already called */ if (err != 0 && aes_impl_initialized) { /* check all supported implementations */ for (i = 0; i < aes_supp_impl_cnt; i++) { if (strcmp(req_name, aes_supp_impl[i]->name) == 0) { impl = i; err = 0; break; } } } if (err == 0) { if (aes_impl_initialized) atomic_swap_32(&icp_aes_impl, impl); else atomic_swap_32(&user_sel_impl, impl); } return (err); } #if defined(_KERNEL) && defined(__linux__) static int icp_aes_impl_set(const char *val, zfs_kernel_param_t *kp) { return (aes_impl_set(val)); } static int icp_aes_impl_get(char *buffer, zfs_kernel_param_t *kp) { int i, cnt = 0; char *fmt; const uint32_t impl = AES_IMPL_READ(icp_aes_impl); ASSERT(aes_impl_initialized); /* list mandatory options */ for (i = 0; i < ARRAY_SIZE(aes_impl_opts); i++) { fmt = (impl == aes_impl_opts[i].sel) ? "[%s] " : "%s "; cnt += sprintf(buffer + cnt, fmt, aes_impl_opts[i].name); } /* list all supported implementations */ for (i = 0; i < aes_supp_impl_cnt; i++) { fmt = (i == impl) ? "[%s] " : "%s "; cnt += sprintf(buffer + cnt, fmt, aes_supp_impl[i]->name); } return (cnt); } module_param_call(icp_aes_impl, icp_aes_impl_set, icp_aes_impl_get, NULL, 0644); MODULE_PARM_DESC(icp_aes_impl, "Select aes implementation."); #endif diff --git a/module/icp/algs/modes/cbc.c b/module/icp/algs/modes/cbc.c index 85864f56dead..bddb5b64ddd3 100644 --- a/module/icp/algs/modes/cbc.c +++ b/module/icp/algs/modes/cbc.c @@ -1,273 +1,271 @@ /* * CDDL HEADER START * * The contents of this file are subject to the terms of the * Common Development and Distribution License (the "License"). * You may not use this file except in compliance with the License. * * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE * or http://www.opensolaris.org/os/licensing. * See the License for the specific language governing permissions * and limitations under the License. * * When distributing Covered Code, include this CDDL HEADER in each * file and include the License file at usr/src/OPENSOLARIS.LICENSE. * If applicable, add the following below this CDDL HEADER, with the * fields enclosed by brackets "[]" replaced with your own identifying * information: Portions Copyright [yyyy] [name of copyright owner] * * CDDL HEADER END */ /* * Copyright 2008 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ #include #include #include #include /* * Algorithm independent CBC functions. */ int cbc_encrypt_contiguous_blocks(cbc_ctx_t *ctx, char *data, size_t length, crypto_data_t *out, size_t block_size, int (*encrypt)(const void *, const uint8_t *, uint8_t *), void (*copy_block)(uint8_t *, uint8_t *), void (*xor_block)(uint8_t *, uint8_t *)) { size_t remainder = length; size_t need = 0; uint8_t *datap = (uint8_t *)data; uint8_t *blockp; uint8_t *lastp; void *iov_or_mp; offset_t offset; uint8_t *out_data_1; uint8_t *out_data_2; size_t out_data_1_len; if (length + ctx->cbc_remainder_len < block_size) { /* accumulate bytes here and return */ bcopy(datap, (uint8_t *)ctx->cbc_remainder + ctx->cbc_remainder_len, length); ctx->cbc_remainder_len += length; ctx->cbc_copy_to = datap; return (CRYPTO_SUCCESS); } lastp = (uint8_t *)ctx->cbc_iv; crypto_init_ptrs(out, &iov_or_mp, &offset); do { /* Unprocessed data from last call. */ if (ctx->cbc_remainder_len > 0) { need = block_size - ctx->cbc_remainder_len; if (need > remainder) return (CRYPTO_DATA_LEN_RANGE); bcopy(datap, &((uint8_t *)ctx->cbc_remainder) [ctx->cbc_remainder_len], need); blockp = (uint8_t *)ctx->cbc_remainder; } else { blockp = datap; } /* * XOR the previous cipher block or IV with the * current clear block. */ xor_block(blockp, lastp); encrypt(ctx->cbc_keysched, lastp, lastp); crypto_get_ptrs(out, &iov_or_mp, &offset, &out_data_1, &out_data_1_len, &out_data_2, block_size); /* copy block to where it belongs */ if (out_data_1_len == block_size) { copy_block(lastp, out_data_1); } else { bcopy(lastp, out_data_1, out_data_1_len); if (out_data_2 != NULL) { bcopy(lastp + out_data_1_len, out_data_2, block_size - out_data_1_len); } } /* update offset */ out->cd_offset += block_size; /* Update pointer to next block of data to be processed. */ if (ctx->cbc_remainder_len != 0) { datap += need; ctx->cbc_remainder_len = 0; } else { datap += block_size; } remainder = (size_t)&data[length] - (size_t)datap; /* Incomplete last block. */ if (remainder > 0 && remainder < block_size) { bcopy(datap, ctx->cbc_remainder, remainder); ctx->cbc_remainder_len = remainder; ctx->cbc_copy_to = datap; goto out; } ctx->cbc_copy_to = NULL; } while (remainder > 0); out: /* * Save the last encrypted block in the context. */ if (ctx->cbc_lastp != NULL) { copy_block((uint8_t *)ctx->cbc_lastp, (uint8_t *)ctx->cbc_iv); ctx->cbc_lastp = (uint8_t *)ctx->cbc_iv; } return (CRYPTO_SUCCESS); } #define OTHER(a, ctx) \ (((a) == (ctx)->cbc_lastblock) ? (ctx)->cbc_iv : (ctx)->cbc_lastblock) -/* ARGSUSED */ int cbc_decrypt_contiguous_blocks(cbc_ctx_t *ctx, char *data, size_t length, crypto_data_t *out, size_t block_size, int (*decrypt)(const void *, const uint8_t *, uint8_t *), void (*copy_block)(uint8_t *, uint8_t *), void (*xor_block)(uint8_t *, uint8_t *)) { size_t remainder = length; size_t need = 0; uint8_t *datap = (uint8_t *)data; uint8_t *blockp; uint8_t *lastp; void *iov_or_mp; offset_t offset; uint8_t *out_data_1; uint8_t *out_data_2; size_t out_data_1_len; if (length + ctx->cbc_remainder_len < block_size) { /* accumulate bytes here and return */ bcopy(datap, (uint8_t *)ctx->cbc_remainder + ctx->cbc_remainder_len, length); ctx->cbc_remainder_len += length; ctx->cbc_copy_to = datap; return (CRYPTO_SUCCESS); } lastp = ctx->cbc_lastp; crypto_init_ptrs(out, &iov_or_mp, &offset); do { /* Unprocessed data from last call. */ if (ctx->cbc_remainder_len > 0) { need = block_size - ctx->cbc_remainder_len; if (need > remainder) return (CRYPTO_ENCRYPTED_DATA_LEN_RANGE); bcopy(datap, &((uint8_t *)ctx->cbc_remainder) [ctx->cbc_remainder_len], need); blockp = (uint8_t *)ctx->cbc_remainder; } else { blockp = datap; } /* LINTED: pointer alignment */ copy_block(blockp, (uint8_t *)OTHER((uint64_t *)lastp, ctx)); decrypt(ctx->cbc_keysched, blockp, (uint8_t *)ctx->cbc_remainder); blockp = (uint8_t *)ctx->cbc_remainder; /* * XOR the previous cipher block or IV with the * currently decrypted block. */ xor_block(lastp, blockp); /* LINTED: pointer alignment */ lastp = (uint8_t *)OTHER((uint64_t *)lastp, ctx); crypto_get_ptrs(out, &iov_or_mp, &offset, &out_data_1, &out_data_1_len, &out_data_2, block_size); bcopy(blockp, out_data_1, out_data_1_len); if (out_data_2 != NULL) { bcopy(blockp + out_data_1_len, out_data_2, block_size - out_data_1_len); } /* update offset */ out->cd_offset += block_size; /* Update pointer to next block of data to be processed. */ if (ctx->cbc_remainder_len != 0) { datap += need; ctx->cbc_remainder_len = 0; } else { datap += block_size; } remainder = (size_t)&data[length] - (size_t)datap; /* Incomplete last block. */ if (remainder > 0 && remainder < block_size) { bcopy(datap, ctx->cbc_remainder, remainder); ctx->cbc_remainder_len = remainder; ctx->cbc_lastp = lastp; ctx->cbc_copy_to = datap; return (CRYPTO_SUCCESS); } ctx->cbc_copy_to = NULL; } while (remainder > 0); ctx->cbc_lastp = lastp; return (CRYPTO_SUCCESS); } int cbc_init_ctx(cbc_ctx_t *cbc_ctx, char *param, size_t param_len, size_t block_size, void (*copy_block)(uint8_t *, uint64_t *)) { /* * Copy IV into context. * * If cm_param == NULL then the IV comes from the * cd_miscdata field in the crypto_data structure. */ if (param != NULL) { ASSERT(param_len == block_size); copy_block((uchar_t *)param, cbc_ctx->cbc_iv); } cbc_ctx->cbc_lastp = (uint8_t *)&cbc_ctx->cbc_iv[0]; cbc_ctx->cbc_flags |= CBC_MODE; return (CRYPTO_SUCCESS); } -/* ARGSUSED */ void * cbc_alloc_ctx(int kmflag) { cbc_ctx_t *cbc_ctx; if ((cbc_ctx = kmem_zalloc(sizeof (cbc_ctx_t), kmflag)) == NULL) return (NULL); cbc_ctx->cbc_flags = CBC_MODE; return (cbc_ctx); } diff --git a/module/icp/algs/modes/ccm.c b/module/icp/algs/modes/ccm.c index 5d6507c49db1..a41cbc395fd6 100644 --- a/module/icp/algs/modes/ccm.c +++ b/module/icp/algs/modes/ccm.c @@ -1,907 +1,906 @@ /* * CDDL HEADER START * * The contents of this file are subject to the terms of the * Common Development and Distribution License (the "License"). * You may not use this file except in compliance with the License. * * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE * or http://www.opensolaris.org/os/licensing. * See the License for the specific language governing permissions * and limitations under the License. * * When distributing Covered Code, include this CDDL HEADER in each * file and include the License file at usr/src/OPENSOLARIS.LICENSE. * If applicable, add the following below this CDDL HEADER, with the * fields enclosed by brackets "[]" replaced with your own identifying * information: Portions Copyright [yyyy] [name of copyright owner] * * CDDL HEADER END */ /* * Copyright 2008 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ #include #include #include #include #ifdef HAVE_EFFICIENT_UNALIGNED_ACCESS #include #define UNALIGNED_POINTERS_PERMITTED #endif /* * Encrypt multiple blocks of data in CCM mode. Decrypt for CCM mode * is done in another function. */ int ccm_mode_encrypt_contiguous_blocks(ccm_ctx_t *ctx, char *data, size_t length, crypto_data_t *out, size_t block_size, int (*encrypt_block)(const void *, const uint8_t *, uint8_t *), void (*copy_block)(uint8_t *, uint8_t *), void (*xor_block)(uint8_t *, uint8_t *)) { size_t remainder = length; size_t need = 0; uint8_t *datap = (uint8_t *)data; uint8_t *blockp; uint8_t *lastp; void *iov_or_mp; offset_t offset; uint8_t *out_data_1; uint8_t *out_data_2; size_t out_data_1_len; uint64_t counter; uint8_t *mac_buf; if (length + ctx->ccm_remainder_len < block_size) { /* accumulate bytes here and return */ bcopy(datap, (uint8_t *)ctx->ccm_remainder + ctx->ccm_remainder_len, length); ctx->ccm_remainder_len += length; ctx->ccm_copy_to = datap; return (CRYPTO_SUCCESS); } lastp = (uint8_t *)ctx->ccm_cb; crypto_init_ptrs(out, &iov_or_mp, &offset); mac_buf = (uint8_t *)ctx->ccm_mac_buf; do { /* Unprocessed data from last call. */ if (ctx->ccm_remainder_len > 0) { need = block_size - ctx->ccm_remainder_len; if (need > remainder) return (CRYPTO_DATA_LEN_RANGE); bcopy(datap, &((uint8_t *)ctx->ccm_remainder) [ctx->ccm_remainder_len], need); blockp = (uint8_t *)ctx->ccm_remainder; } else { blockp = datap; } /* * do CBC MAC * * XOR the previous cipher block current clear block. * mac_buf always contain previous cipher block. */ xor_block(blockp, mac_buf); encrypt_block(ctx->ccm_keysched, mac_buf, mac_buf); /* ccm_cb is the counter block */ encrypt_block(ctx->ccm_keysched, (uint8_t *)ctx->ccm_cb, (uint8_t *)ctx->ccm_tmp); lastp = (uint8_t *)ctx->ccm_tmp; /* * Increment counter. Counter bits are confined * to the bottom 64 bits of the counter block. */ #ifdef _ZFS_LITTLE_ENDIAN counter = ntohll(ctx->ccm_cb[1] & ctx->ccm_counter_mask); counter = htonll(counter + 1); #else counter = ctx->ccm_cb[1] & ctx->ccm_counter_mask; counter++; #endif /* _ZFS_LITTLE_ENDIAN */ counter &= ctx->ccm_counter_mask; ctx->ccm_cb[1] = (ctx->ccm_cb[1] & ~(ctx->ccm_counter_mask)) | counter; /* * XOR encrypted counter block with the current clear block. */ xor_block(blockp, lastp); ctx->ccm_processed_data_len += block_size; crypto_get_ptrs(out, &iov_or_mp, &offset, &out_data_1, &out_data_1_len, &out_data_2, block_size); /* copy block to where it belongs */ if (out_data_1_len == block_size) { copy_block(lastp, out_data_1); } else { bcopy(lastp, out_data_1, out_data_1_len); if (out_data_2 != NULL) { bcopy(lastp + out_data_1_len, out_data_2, block_size - out_data_1_len); } } /* update offset */ out->cd_offset += block_size; /* Update pointer to next block of data to be processed. */ if (ctx->ccm_remainder_len != 0) { datap += need; ctx->ccm_remainder_len = 0; } else { datap += block_size; } remainder = (size_t)&data[length] - (size_t)datap; /* Incomplete last block. */ if (remainder > 0 && remainder < block_size) { bcopy(datap, ctx->ccm_remainder, remainder); ctx->ccm_remainder_len = remainder; ctx->ccm_copy_to = datap; goto out; } ctx->ccm_copy_to = NULL; } while (remainder > 0); out: return (CRYPTO_SUCCESS); } void calculate_ccm_mac(ccm_ctx_t *ctx, uint8_t *ccm_mac, int (*encrypt_block)(const void *, const uint8_t *, uint8_t *)) { uint64_t counter; uint8_t *counterp, *mac_buf; int i; mac_buf = (uint8_t *)ctx->ccm_mac_buf; /* first counter block start with index 0 */ counter = 0; ctx->ccm_cb[1] = (ctx->ccm_cb[1] & ~(ctx->ccm_counter_mask)) | counter; counterp = (uint8_t *)ctx->ccm_tmp; encrypt_block(ctx->ccm_keysched, (uint8_t *)ctx->ccm_cb, counterp); /* calculate XOR of MAC with first counter block */ for (i = 0; i < ctx->ccm_mac_len; i++) { ccm_mac[i] = mac_buf[i] ^ counterp[i]; } } -/* ARGSUSED */ int ccm_encrypt_final(ccm_ctx_t *ctx, crypto_data_t *out, size_t block_size, int (*encrypt_block)(const void *, const uint8_t *, uint8_t *), void (*xor_block)(uint8_t *, uint8_t *)) { uint8_t *lastp, *mac_buf, *ccm_mac_p, *macp = NULL; void *iov_or_mp; offset_t offset; uint8_t *out_data_1; uint8_t *out_data_2; size_t out_data_1_len; int i; if (out->cd_length < (ctx->ccm_remainder_len + ctx->ccm_mac_len)) { return (CRYPTO_DATA_LEN_RANGE); } /* * When we get here, the number of bytes of payload processed * plus whatever data remains, if any, * should be the same as the number of bytes that's being * passed in the argument during init time. */ if ((ctx->ccm_processed_data_len + ctx->ccm_remainder_len) != (ctx->ccm_data_len)) { return (CRYPTO_DATA_LEN_RANGE); } mac_buf = (uint8_t *)ctx->ccm_mac_buf; if (ctx->ccm_remainder_len > 0) { /* ccm_mac_input_buf is not used for encryption */ macp = (uint8_t *)ctx->ccm_mac_input_buf; bzero(macp, block_size); /* copy remainder to temporary buffer */ bcopy(ctx->ccm_remainder, macp, ctx->ccm_remainder_len); /* calculate the CBC MAC */ xor_block(macp, mac_buf); encrypt_block(ctx->ccm_keysched, mac_buf, mac_buf); /* calculate the counter mode */ lastp = (uint8_t *)ctx->ccm_tmp; encrypt_block(ctx->ccm_keysched, (uint8_t *)ctx->ccm_cb, lastp); /* XOR with counter block */ for (i = 0; i < ctx->ccm_remainder_len; i++) { macp[i] ^= lastp[i]; } ctx->ccm_processed_data_len += ctx->ccm_remainder_len; } /* Calculate the CCM MAC */ ccm_mac_p = (uint8_t *)ctx->ccm_tmp; calculate_ccm_mac(ctx, ccm_mac_p, encrypt_block); crypto_init_ptrs(out, &iov_or_mp, &offset); crypto_get_ptrs(out, &iov_or_mp, &offset, &out_data_1, &out_data_1_len, &out_data_2, ctx->ccm_remainder_len + ctx->ccm_mac_len); if (ctx->ccm_remainder_len > 0) { /* copy temporary block to where it belongs */ if (out_data_2 == NULL) { /* everything will fit in out_data_1 */ bcopy(macp, out_data_1, ctx->ccm_remainder_len); bcopy(ccm_mac_p, out_data_1 + ctx->ccm_remainder_len, ctx->ccm_mac_len); } else { if (out_data_1_len < ctx->ccm_remainder_len) { size_t data_2_len_used; bcopy(macp, out_data_1, out_data_1_len); data_2_len_used = ctx->ccm_remainder_len - out_data_1_len; bcopy((uint8_t *)macp + out_data_1_len, out_data_2, data_2_len_used); bcopy(ccm_mac_p, out_data_2 + data_2_len_used, ctx->ccm_mac_len); } else { bcopy(macp, out_data_1, out_data_1_len); if (out_data_1_len == ctx->ccm_remainder_len) { /* mac will be in out_data_2 */ bcopy(ccm_mac_p, out_data_2, ctx->ccm_mac_len); } else { size_t len_not_used = out_data_1_len - ctx->ccm_remainder_len; /* * part of mac in will be in * out_data_1, part of the mac will be * in out_data_2 */ bcopy(ccm_mac_p, out_data_1 + ctx->ccm_remainder_len, len_not_used); bcopy(ccm_mac_p + len_not_used, out_data_2, ctx->ccm_mac_len - len_not_used); } } } } else { /* copy block to where it belongs */ bcopy(ccm_mac_p, out_data_1, out_data_1_len); if (out_data_2 != NULL) { bcopy(ccm_mac_p + out_data_1_len, out_data_2, block_size - out_data_1_len); } } out->cd_offset += ctx->ccm_remainder_len + ctx->ccm_mac_len; ctx->ccm_remainder_len = 0; return (CRYPTO_SUCCESS); } /* * This will only deal with decrypting the last block of the input that * might not be a multiple of block length. */ static void ccm_decrypt_incomplete_block(ccm_ctx_t *ctx, int (*encrypt_block)(const void *, const uint8_t *, uint8_t *)) { uint8_t *datap, *outp, *counterp; int i; datap = (uint8_t *)ctx->ccm_remainder; outp = &((ctx->ccm_pt_buf)[ctx->ccm_processed_data_len]); counterp = (uint8_t *)ctx->ccm_tmp; encrypt_block(ctx->ccm_keysched, (uint8_t *)ctx->ccm_cb, counterp); /* XOR with counter block */ for (i = 0; i < ctx->ccm_remainder_len; i++) { outp[i] = datap[i] ^ counterp[i]; } } /* * This will decrypt the cipher text. However, the plaintext won't be * returned to the caller. It will be returned when decrypt_final() is * called if the MAC matches */ -/* ARGSUSED */ int ccm_mode_decrypt_contiguous_blocks(ccm_ctx_t *ctx, char *data, size_t length, crypto_data_t *out, size_t block_size, int (*encrypt_block)(const void *, const uint8_t *, uint8_t *), void (*copy_block)(uint8_t *, uint8_t *), void (*xor_block)(uint8_t *, uint8_t *)) { + (void) out; size_t remainder = length; size_t need = 0; uint8_t *datap = (uint8_t *)data; uint8_t *blockp; uint8_t *cbp; uint64_t counter; size_t pt_len, total_decrypted_len, mac_len, pm_len, pd_len; uint8_t *resultp; pm_len = ctx->ccm_processed_mac_len; if (pm_len > 0) { uint8_t *tmp; /* * all ciphertext has been processed, just waiting for * part of the value of the mac */ if ((pm_len + length) > ctx->ccm_mac_len) { return (CRYPTO_ENCRYPTED_DATA_LEN_RANGE); } tmp = (uint8_t *)ctx->ccm_mac_input_buf; bcopy(datap, tmp + pm_len, length); ctx->ccm_processed_mac_len += length; return (CRYPTO_SUCCESS); } /* * If we decrypt the given data, what total amount of data would * have been decrypted? */ pd_len = ctx->ccm_processed_data_len; total_decrypted_len = pd_len + length + ctx->ccm_remainder_len; if (total_decrypted_len > (ctx->ccm_data_len + ctx->ccm_mac_len)) { return (CRYPTO_ENCRYPTED_DATA_LEN_RANGE); } pt_len = ctx->ccm_data_len; if (total_decrypted_len > pt_len) { /* * part of the input will be the MAC, need to isolate that * to be dealt with later. The left-over data in * ccm_remainder_len from last time will not be part of the * MAC. Otherwise, it would have already been taken out * when this call is made last time. */ size_t pt_part = pt_len - pd_len - ctx->ccm_remainder_len; mac_len = length - pt_part; ctx->ccm_processed_mac_len = mac_len; bcopy(data + pt_part, ctx->ccm_mac_input_buf, mac_len); if (pt_part + ctx->ccm_remainder_len < block_size) { /* * since this is last of the ciphertext, will * just decrypt with it here */ bcopy(datap, &((uint8_t *)ctx->ccm_remainder) [ctx->ccm_remainder_len], pt_part); ctx->ccm_remainder_len += pt_part; ccm_decrypt_incomplete_block(ctx, encrypt_block); ctx->ccm_processed_data_len += ctx->ccm_remainder_len; ctx->ccm_remainder_len = 0; return (CRYPTO_SUCCESS); } else { /* let rest of the code handle this */ length = pt_part; } } else if (length + ctx->ccm_remainder_len < block_size) { /* accumulate bytes here and return */ bcopy(datap, (uint8_t *)ctx->ccm_remainder + ctx->ccm_remainder_len, length); ctx->ccm_remainder_len += length; ctx->ccm_copy_to = datap; return (CRYPTO_SUCCESS); } do { /* Unprocessed data from last call. */ if (ctx->ccm_remainder_len > 0) { need = block_size - ctx->ccm_remainder_len; if (need > remainder) return (CRYPTO_ENCRYPTED_DATA_LEN_RANGE); bcopy(datap, &((uint8_t *)ctx->ccm_remainder) [ctx->ccm_remainder_len], need); blockp = (uint8_t *)ctx->ccm_remainder; } else { blockp = datap; } /* Calculate the counter mode, ccm_cb is the counter block */ cbp = (uint8_t *)ctx->ccm_tmp; encrypt_block(ctx->ccm_keysched, (uint8_t *)ctx->ccm_cb, cbp); /* * Increment counter. * Counter bits are confined to the bottom 64 bits */ #ifdef _ZFS_LITTLE_ENDIAN counter = ntohll(ctx->ccm_cb[1] & ctx->ccm_counter_mask); counter = htonll(counter + 1); #else counter = ctx->ccm_cb[1] & ctx->ccm_counter_mask; counter++; #endif /* _ZFS_LITTLE_ENDIAN */ counter &= ctx->ccm_counter_mask; ctx->ccm_cb[1] = (ctx->ccm_cb[1] & ~(ctx->ccm_counter_mask)) | counter; /* XOR with the ciphertext */ xor_block(blockp, cbp); /* Copy the plaintext to the "holding buffer" */ resultp = (uint8_t *)ctx->ccm_pt_buf + ctx->ccm_processed_data_len; copy_block(cbp, resultp); ctx->ccm_processed_data_len += block_size; ctx->ccm_lastp = blockp; /* Update pointer to next block of data to be processed. */ if (ctx->ccm_remainder_len != 0) { datap += need; ctx->ccm_remainder_len = 0; } else { datap += block_size; } remainder = (size_t)&data[length] - (size_t)datap; /* Incomplete last block */ if (remainder > 0 && remainder < block_size) { bcopy(datap, ctx->ccm_remainder, remainder); ctx->ccm_remainder_len = remainder; ctx->ccm_copy_to = datap; if (ctx->ccm_processed_mac_len > 0) { /* * not expecting anymore ciphertext, just * compute plaintext for the remaining input */ ccm_decrypt_incomplete_block(ctx, encrypt_block); ctx->ccm_processed_data_len += remainder; ctx->ccm_remainder_len = 0; } goto out; } ctx->ccm_copy_to = NULL; } while (remainder > 0); out: return (CRYPTO_SUCCESS); } int ccm_decrypt_final(ccm_ctx_t *ctx, crypto_data_t *out, size_t block_size, int (*encrypt_block)(const void *, const uint8_t *, uint8_t *), void (*copy_block)(uint8_t *, uint8_t *), void (*xor_block)(uint8_t *, uint8_t *)) { size_t mac_remain, pt_len; uint8_t *pt, *mac_buf, *macp, *ccm_mac_p; int rv; pt_len = ctx->ccm_data_len; /* Make sure output buffer can fit all of the plaintext */ if (out->cd_length < pt_len) { return (CRYPTO_DATA_LEN_RANGE); } pt = ctx->ccm_pt_buf; mac_remain = ctx->ccm_processed_data_len; mac_buf = (uint8_t *)ctx->ccm_mac_buf; macp = (uint8_t *)ctx->ccm_tmp; while (mac_remain > 0) { if (mac_remain < block_size) { bzero(macp, block_size); bcopy(pt, macp, mac_remain); mac_remain = 0; } else { copy_block(pt, macp); mac_remain -= block_size; pt += block_size; } /* calculate the CBC MAC */ xor_block(macp, mac_buf); encrypt_block(ctx->ccm_keysched, mac_buf, mac_buf); } /* Calculate the CCM MAC */ ccm_mac_p = (uint8_t *)ctx->ccm_tmp; calculate_ccm_mac((ccm_ctx_t *)ctx, ccm_mac_p, encrypt_block); /* compare the input CCM MAC value with what we calculated */ if (bcmp(ctx->ccm_mac_input_buf, ccm_mac_p, ctx->ccm_mac_len)) { /* They don't match */ return (CRYPTO_INVALID_MAC); } else { rv = crypto_put_output_data(ctx->ccm_pt_buf, out, pt_len); if (rv != CRYPTO_SUCCESS) return (rv); out->cd_offset += pt_len; } return (CRYPTO_SUCCESS); } static int ccm_validate_args(CK_AES_CCM_PARAMS *ccm_param, boolean_t is_encrypt_init) { size_t macSize, nonceSize; uint8_t q; uint64_t maxValue; /* * Check the length of the MAC. The only valid * lengths for the MAC are: 4, 6, 8, 10, 12, 14, 16 */ macSize = ccm_param->ulMACSize; if ((macSize < 4) || (macSize > 16) || ((macSize % 2) != 0)) { return (CRYPTO_MECHANISM_PARAM_INVALID); } /* Check the nonce length. Valid values are 7, 8, 9, 10, 11, 12, 13 */ nonceSize = ccm_param->ulNonceSize; if ((nonceSize < 7) || (nonceSize > 13)) { return (CRYPTO_MECHANISM_PARAM_INVALID); } /* q is the length of the field storing the length, in bytes */ q = (uint8_t)((15 - nonceSize) & 0xFF); /* * If it is decrypt, need to make sure size of ciphertext is at least * bigger than MAC len */ if ((!is_encrypt_init) && (ccm_param->ulDataSize < macSize)) { return (CRYPTO_MECHANISM_PARAM_INVALID); } /* * Check to make sure the length of the payload is within the * range of values allowed by q */ if (q < 8) { maxValue = (1ULL << (q * 8)) - 1; } else { maxValue = ULONG_MAX; } if (ccm_param->ulDataSize > maxValue) { return (CRYPTO_MECHANISM_PARAM_INVALID); } return (CRYPTO_SUCCESS); } /* * Format the first block used in CBC-MAC (B0) and the initial counter * block based on formatting functions and counter generation functions * specified in RFC 3610 and NIST publication 800-38C, appendix A * * b0 is the first block used in CBC-MAC * cb0 is the first counter block * * It's assumed that the arguments b0 and cb0 are preallocated AES blocks * */ static void ccm_format_initial_blocks(uchar_t *nonce, ulong_t nonceSize, ulong_t authDataSize, uint8_t *b0, ccm_ctx_t *aes_ctx) { uint64_t payloadSize; uint8_t t, q, have_adata = 0; size_t limit; int i, j, k; uint64_t mask = 0; uint8_t *cb; q = (uint8_t)((15 - nonceSize) & 0xFF); t = (uint8_t)((aes_ctx->ccm_mac_len) & 0xFF); /* Construct the first octet of b0 */ if (authDataSize > 0) { have_adata = 1; } b0[0] = (have_adata << 6) | (((t - 2) / 2) << 3) | (q - 1); /* copy the nonce value into b0 */ bcopy(nonce, &(b0[1]), nonceSize); /* store the length of the payload into b0 */ bzero(&(b0[1+nonceSize]), q); payloadSize = aes_ctx->ccm_data_len; limit = 8 < q ? 8 : q; for (i = 0, j = 0, k = 15; i < limit; i++, j += 8, k--) { b0[k] = (uint8_t)((payloadSize >> j) & 0xFF); } /* format the counter block */ cb = (uint8_t *)aes_ctx->ccm_cb; cb[0] = 0x07 & (q-1); /* first byte */ /* copy the nonce value into the counter block */ bcopy(nonce, &(cb[1]), nonceSize); bzero(&(cb[1+nonceSize]), q); /* Create the mask for the counter field based on the size of nonce */ q <<= 3; while (q-- > 0) { mask |= (1ULL << q); } #ifdef _ZFS_LITTLE_ENDIAN mask = htonll(mask); #endif aes_ctx->ccm_counter_mask = mask; /* * During calculation, we start using counter block 1, we will * set it up right here. * We can just set the last byte to have the value 1, because * even with the biggest nonce of 13, the last byte of the * counter block will be used for the counter value. */ cb[15] = 0x01; } /* * Encode the length of the associated data as * specified in RFC 3610 and NIST publication 800-38C, appendix A */ static void encode_adata_len(ulong_t auth_data_len, uint8_t *encoded, size_t *encoded_len) { #ifdef UNALIGNED_POINTERS_PERMITTED uint32_t *lencoded_ptr; #ifdef _LP64 uint64_t *llencoded_ptr; #endif #endif /* UNALIGNED_POINTERS_PERMITTED */ if (auth_data_len < ((1ULL<<16) - (1ULL<<8))) { /* 0 < a < (2^16-2^8) */ *encoded_len = 2; encoded[0] = (auth_data_len & 0xff00) >> 8; encoded[1] = auth_data_len & 0xff; } else if ((auth_data_len >= ((1ULL<<16) - (1ULL<<8))) && (auth_data_len < (1ULL << 31))) { /* (2^16-2^8) <= a < 2^32 */ *encoded_len = 6; encoded[0] = 0xff; encoded[1] = 0xfe; #ifdef UNALIGNED_POINTERS_PERMITTED lencoded_ptr = (uint32_t *)&encoded[2]; *lencoded_ptr = htonl(auth_data_len); #else encoded[2] = (auth_data_len & 0xff000000) >> 24; encoded[3] = (auth_data_len & 0xff0000) >> 16; encoded[4] = (auth_data_len & 0xff00) >> 8; encoded[5] = auth_data_len & 0xff; #endif /* UNALIGNED_POINTERS_PERMITTED */ #ifdef _LP64 } else { /* 2^32 <= a < 2^64 */ *encoded_len = 10; encoded[0] = 0xff; encoded[1] = 0xff; #ifdef UNALIGNED_POINTERS_PERMITTED llencoded_ptr = (uint64_t *)&encoded[2]; *llencoded_ptr = htonl(auth_data_len); #else encoded[2] = (auth_data_len & 0xff00000000000000) >> 56; encoded[3] = (auth_data_len & 0xff000000000000) >> 48; encoded[4] = (auth_data_len & 0xff0000000000) >> 40; encoded[5] = (auth_data_len & 0xff00000000) >> 32; encoded[6] = (auth_data_len & 0xff000000) >> 24; encoded[7] = (auth_data_len & 0xff0000) >> 16; encoded[8] = (auth_data_len & 0xff00) >> 8; encoded[9] = auth_data_len & 0xff; #endif /* UNALIGNED_POINTERS_PERMITTED */ #endif /* _LP64 */ } } static int ccm_init(ccm_ctx_t *ctx, unsigned char *nonce, size_t nonce_len, unsigned char *auth_data, size_t auth_data_len, size_t block_size, int (*encrypt_block)(const void *, const uint8_t *, uint8_t *), void (*xor_block)(uint8_t *, uint8_t *)) { uint8_t *mac_buf, *datap, *ivp, *authp; size_t remainder, processed; uint8_t encoded_a[10]; /* max encoded auth data length is 10 octets */ size_t encoded_a_len = 0; mac_buf = (uint8_t *)&(ctx->ccm_mac_buf); /* * Format the 1st block for CBC-MAC and construct the * 1st counter block. * * aes_ctx->ccm_iv is used for storing the counter block * mac_buf will store b0 at this time. */ ccm_format_initial_blocks(nonce, nonce_len, auth_data_len, mac_buf, ctx); /* The IV for CBC MAC for AES CCM mode is always zero */ ivp = (uint8_t *)ctx->ccm_tmp; bzero(ivp, block_size); xor_block(ivp, mac_buf); /* encrypt the nonce */ encrypt_block(ctx->ccm_keysched, mac_buf, mac_buf); /* take care of the associated data, if any */ if (auth_data_len == 0) { return (CRYPTO_SUCCESS); } encode_adata_len(auth_data_len, encoded_a, &encoded_a_len); remainder = auth_data_len; /* 1st block: it contains encoded associated data, and some data */ authp = (uint8_t *)ctx->ccm_tmp; bzero(authp, block_size); bcopy(encoded_a, authp, encoded_a_len); processed = block_size - encoded_a_len; if (processed > auth_data_len) { /* in case auth_data is very small */ processed = auth_data_len; } bcopy(auth_data, authp+encoded_a_len, processed); /* xor with previous buffer */ xor_block(authp, mac_buf); encrypt_block(ctx->ccm_keysched, mac_buf, mac_buf); remainder -= processed; if (remainder == 0) { /* a small amount of associated data, it's all done now */ return (CRYPTO_SUCCESS); } do { if (remainder < block_size) { /* * There's not a block full of data, pad rest of * buffer with zero */ bzero(authp, block_size); bcopy(&(auth_data[processed]), authp, remainder); datap = (uint8_t *)authp; remainder = 0; } else { datap = (uint8_t *)(&(auth_data[processed])); processed += block_size; remainder -= block_size; } xor_block(datap, mac_buf); encrypt_block(ctx->ccm_keysched, mac_buf, mac_buf); } while (remainder > 0); return (CRYPTO_SUCCESS); } /* * The following function should be call at encrypt or decrypt init time * for AES CCM mode. */ int ccm_init_ctx(ccm_ctx_t *ccm_ctx, char *param, int kmflag, boolean_t is_encrypt_init, size_t block_size, int (*encrypt_block)(const void *, const uint8_t *, uint8_t *), void (*xor_block)(uint8_t *, uint8_t *)) { int rv; CK_AES_CCM_PARAMS *ccm_param; if (param != NULL) { ccm_param = (CK_AES_CCM_PARAMS *)param; if ((rv = ccm_validate_args(ccm_param, is_encrypt_init)) != 0) { return (rv); } ccm_ctx->ccm_mac_len = ccm_param->ulMACSize; if (is_encrypt_init) { ccm_ctx->ccm_data_len = ccm_param->ulDataSize; } else { ccm_ctx->ccm_data_len = ccm_param->ulDataSize - ccm_ctx->ccm_mac_len; ccm_ctx->ccm_processed_mac_len = 0; } ccm_ctx->ccm_processed_data_len = 0; ccm_ctx->ccm_flags |= CCM_MODE; } else { return (CRYPTO_MECHANISM_PARAM_INVALID); } if (ccm_init(ccm_ctx, ccm_param->nonce, ccm_param->ulNonceSize, ccm_param->authData, ccm_param->ulAuthDataSize, block_size, encrypt_block, xor_block) != 0) { return (CRYPTO_MECHANISM_PARAM_INVALID); } if (!is_encrypt_init) { /* allocate buffer for storing decrypted plaintext */ ccm_ctx->ccm_pt_buf = vmem_alloc(ccm_ctx->ccm_data_len, kmflag); if (ccm_ctx->ccm_pt_buf == NULL) { rv = CRYPTO_HOST_MEMORY; } } return (rv); } void * ccm_alloc_ctx(int kmflag) { ccm_ctx_t *ccm_ctx; if ((ccm_ctx = kmem_zalloc(sizeof (ccm_ctx_t), kmflag)) == NULL) return (NULL); ccm_ctx->ccm_flags = CCM_MODE; return (ccm_ctx); } diff --git a/module/icp/algs/modes/ctr.c b/module/icp/algs/modes/ctr.c index 0188bdd395ff..82295cda877e 100644 --- a/module/icp/algs/modes/ctr.c +++ b/module/icp/algs/modes/ctr.c @@ -1,228 +1,227 @@ /* * CDDL HEADER START * * The contents of this file are subject to the terms of the * Common Development and Distribution License (the "License"). * You may not use this file except in compliance with the License. * * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE * or http://www.opensolaris.org/os/licensing. * See the License for the specific language governing permissions * and limitations under the License. * * When distributing Covered Code, include this CDDL HEADER in each * file and include the License file at usr/src/OPENSOLARIS.LICENSE. * If applicable, add the following below this CDDL HEADER, with the * fields enclosed by brackets "[]" replaced with your own identifying * information: Portions Copyright [yyyy] [name of copyright owner] * * CDDL HEADER END */ /* * Copyright 2008 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ #include #include #include #include #include /* * Encrypt and decrypt multiple blocks of data in counter mode. */ int ctr_mode_contiguous_blocks(ctr_ctx_t *ctx, char *data, size_t length, crypto_data_t *out, size_t block_size, int (*cipher)(const void *ks, const uint8_t *pt, uint8_t *ct), void (*xor_block)(uint8_t *, uint8_t *)) { size_t remainder = length; size_t need = 0; uint8_t *datap = (uint8_t *)data; uint8_t *blockp; uint8_t *lastp; void *iov_or_mp; offset_t offset; uint8_t *out_data_1; uint8_t *out_data_2; size_t out_data_1_len; uint64_t lower_counter, upper_counter; if (length + ctx->ctr_remainder_len < block_size) { /* accumulate bytes here and return */ bcopy(datap, (uint8_t *)ctx->ctr_remainder + ctx->ctr_remainder_len, length); ctx->ctr_remainder_len += length; ctx->ctr_copy_to = datap; return (CRYPTO_SUCCESS); } lastp = (uint8_t *)ctx->ctr_cb; crypto_init_ptrs(out, &iov_or_mp, &offset); do { /* Unprocessed data from last call. */ if (ctx->ctr_remainder_len > 0) { need = block_size - ctx->ctr_remainder_len; if (need > remainder) return (CRYPTO_DATA_LEN_RANGE); bcopy(datap, &((uint8_t *)ctx->ctr_remainder) [ctx->ctr_remainder_len], need); blockp = (uint8_t *)ctx->ctr_remainder; } else { blockp = datap; } /* ctr_cb is the counter block */ cipher(ctx->ctr_keysched, (uint8_t *)ctx->ctr_cb, (uint8_t *)ctx->ctr_tmp); lastp = (uint8_t *)ctx->ctr_tmp; /* * Increment Counter. */ lower_counter = ntohll(ctx->ctr_cb[1] & ctx->ctr_lower_mask); lower_counter = htonll(lower_counter + 1); lower_counter &= ctx->ctr_lower_mask; ctx->ctr_cb[1] = (ctx->ctr_cb[1] & ~(ctx->ctr_lower_mask)) | lower_counter; /* wrap around */ if (lower_counter == 0) { upper_counter = ntohll(ctx->ctr_cb[0] & ctx->ctr_upper_mask); upper_counter = htonll(upper_counter + 1); upper_counter &= ctx->ctr_upper_mask; ctx->ctr_cb[0] = (ctx->ctr_cb[0] & ~(ctx->ctr_upper_mask)) | upper_counter; } /* * XOR encrypted counter block with the current clear block. */ xor_block(blockp, lastp); crypto_get_ptrs(out, &iov_or_mp, &offset, &out_data_1, &out_data_1_len, &out_data_2, block_size); /* copy block to where it belongs */ bcopy(lastp, out_data_1, out_data_1_len); if (out_data_2 != NULL) { bcopy(lastp + out_data_1_len, out_data_2, block_size - out_data_1_len); } /* update offset */ out->cd_offset += block_size; /* Update pointer to next block of data to be processed. */ if (ctx->ctr_remainder_len != 0) { datap += need; ctx->ctr_remainder_len = 0; } else { datap += block_size; } remainder = (size_t)&data[length] - (size_t)datap; /* Incomplete last block. */ if (remainder > 0 && remainder < block_size) { bcopy(datap, ctx->ctr_remainder, remainder); ctx->ctr_remainder_len = remainder; ctx->ctr_copy_to = datap; goto out; } ctx->ctr_copy_to = NULL; } while (remainder > 0); out: return (CRYPTO_SUCCESS); } int ctr_mode_final(ctr_ctx_t *ctx, crypto_data_t *out, int (*encrypt_block)(const void *, const uint8_t *, uint8_t *)) { uint8_t *lastp; void *iov_or_mp; offset_t offset; uint8_t *out_data_1; uint8_t *out_data_2; size_t out_data_1_len; uint8_t *p; int i; if (out->cd_length < ctx->ctr_remainder_len) return (CRYPTO_DATA_LEN_RANGE); encrypt_block(ctx->ctr_keysched, (uint8_t *)ctx->ctr_cb, (uint8_t *)ctx->ctr_tmp); lastp = (uint8_t *)ctx->ctr_tmp; p = (uint8_t *)ctx->ctr_remainder; for (i = 0; i < ctx->ctr_remainder_len; i++) { p[i] ^= lastp[i]; } crypto_init_ptrs(out, &iov_or_mp, &offset); crypto_get_ptrs(out, &iov_or_mp, &offset, &out_data_1, &out_data_1_len, &out_data_2, ctx->ctr_remainder_len); bcopy(p, out_data_1, out_data_1_len); if (out_data_2 != NULL) { bcopy((uint8_t *)p + out_data_1_len, out_data_2, ctx->ctr_remainder_len - out_data_1_len); } out->cd_offset += ctx->ctr_remainder_len; ctx->ctr_remainder_len = 0; return (CRYPTO_SUCCESS); } int ctr_init_ctx(ctr_ctx_t *ctr_ctx, ulong_t count, uint8_t *cb, void (*copy_block)(uint8_t *, uint8_t *)) { uint64_t upper_mask = 0; uint64_t lower_mask = 0; if (count == 0 || count > 128) { return (CRYPTO_MECHANISM_PARAM_INVALID); } /* upper 64 bits of the mask */ if (count >= 64) { count -= 64; upper_mask = (count == 64) ? UINT64_MAX : (1ULL << count) - 1; lower_mask = UINT64_MAX; } else { /* now the lower 63 bits */ lower_mask = (1ULL << count) - 1; } ctr_ctx->ctr_lower_mask = htonll(lower_mask); ctr_ctx->ctr_upper_mask = htonll(upper_mask); copy_block(cb, (uchar_t *)ctr_ctx->ctr_cb); ctr_ctx->ctr_lastp = (uint8_t *)&ctr_ctx->ctr_cb[0]; ctr_ctx->ctr_flags |= CTR_MODE; return (CRYPTO_SUCCESS); } -/* ARGSUSED */ void * ctr_alloc_ctx(int kmflag) { ctr_ctx_t *ctr_ctx; if ((ctr_ctx = kmem_zalloc(sizeof (ctr_ctx_t), kmflag)) == NULL) return (NULL); ctr_ctx->ctr_flags = CTR_MODE; return (ctr_ctx); } diff --git a/module/icp/algs/modes/ecb.c b/module/icp/algs/modes/ecb.c index 025f5825cf04..ffbdb9d57d0a 100644 --- a/module/icp/algs/modes/ecb.c +++ b/module/icp/algs/modes/ecb.c @@ -1,128 +1,127 @@ /* * CDDL HEADER START * * The contents of this file are subject to the terms of the * Common Development and Distribution License (the "License"). * You may not use this file except in compliance with the License. * * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE * or http://www.opensolaris.org/os/licensing. * See the License for the specific language governing permissions * and limitations under the License. * * When distributing Covered Code, include this CDDL HEADER in each * file and include the License file at usr/src/OPENSOLARIS.LICENSE. * If applicable, add the following below this CDDL HEADER, with the * fields enclosed by brackets "[]" replaced with your own identifying * information: Portions Copyright [yyyy] [name of copyright owner] * * CDDL HEADER END */ /* * Copyright 2008 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ #include #include #include #include /* * Algorithm independent ECB functions. */ int ecb_cipher_contiguous_blocks(ecb_ctx_t *ctx, char *data, size_t length, crypto_data_t *out, size_t block_size, int (*cipher)(const void *ks, const uint8_t *pt, uint8_t *ct)) { size_t remainder = length; size_t need = 0; uint8_t *datap = (uint8_t *)data; uint8_t *blockp; uint8_t *lastp; void *iov_or_mp; offset_t offset; uint8_t *out_data_1; uint8_t *out_data_2; size_t out_data_1_len; if (length + ctx->ecb_remainder_len < block_size) { /* accumulate bytes here and return */ bcopy(datap, (uint8_t *)ctx->ecb_remainder + ctx->ecb_remainder_len, length); ctx->ecb_remainder_len += length; ctx->ecb_copy_to = datap; return (CRYPTO_SUCCESS); } lastp = (uint8_t *)ctx->ecb_iv; crypto_init_ptrs(out, &iov_or_mp, &offset); do { /* Unprocessed data from last call. */ if (ctx->ecb_remainder_len > 0) { need = block_size - ctx->ecb_remainder_len; if (need > remainder) return (CRYPTO_DATA_LEN_RANGE); bcopy(datap, &((uint8_t *)ctx->ecb_remainder) [ctx->ecb_remainder_len], need); blockp = (uint8_t *)ctx->ecb_remainder; } else { blockp = datap; } cipher(ctx->ecb_keysched, blockp, lastp); crypto_get_ptrs(out, &iov_or_mp, &offset, &out_data_1, &out_data_1_len, &out_data_2, block_size); /* copy block to where it belongs */ bcopy(lastp, out_data_1, out_data_1_len); if (out_data_2 != NULL) { bcopy(lastp + out_data_1_len, out_data_2, block_size - out_data_1_len); } /* update offset */ out->cd_offset += block_size; /* Update pointer to next block of data to be processed. */ if (ctx->ecb_remainder_len != 0) { datap += need; ctx->ecb_remainder_len = 0; } else { datap += block_size; } remainder = (size_t)&data[length] - (size_t)datap; /* Incomplete last block. */ if (remainder > 0 && remainder < block_size) { bcopy(datap, ctx->ecb_remainder, remainder); ctx->ecb_remainder_len = remainder; ctx->ecb_copy_to = datap; goto out; } ctx->ecb_copy_to = NULL; } while (remainder > 0); out: return (CRYPTO_SUCCESS); } -/* ARGSUSED */ void * ecb_alloc_ctx(int kmflag) { ecb_ctx_t *ecb_ctx; if ((ecb_ctx = kmem_zalloc(sizeof (ecb_ctx_t), kmflag)) == NULL) return (NULL); ecb_ctx->ecb_flags = ECB_MODE; return (ecb_ctx); } diff --git a/module/icp/algs/modes/gcm.c b/module/icp/algs/modes/gcm.c index 7332834cbe37..3a1660d93ab7 100644 --- a/module/icp/algs/modes/gcm.c +++ b/module/icp/algs/modes/gcm.c @@ -1,1587 +1,1588 @@ /* * CDDL HEADER START * * The contents of this file are subject to the terms of the * Common Development and Distribution License (the "License"). * You may not use this file except in compliance with the License. * * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE * or http://www.opensolaris.org/os/licensing. * See the License for the specific language governing permissions * and limitations under the License. * * When distributing Covered Code, include this CDDL HEADER in each * file and include the License file at usr/src/OPENSOLARIS.LICENSE. * If applicable, add the following below this CDDL HEADER, with the * fields enclosed by brackets "[]" replaced with your own identifying * information: Portions Copyright [yyyy] [name of copyright owner] * * CDDL HEADER END */ /* * Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved. */ #include #include #include #include #include #include #include #include #ifdef CAN_USE_GCM_ASM #include #endif #define GHASH(c, d, t, o) \ xor_block((uint8_t *)(d), (uint8_t *)(c)->gcm_ghash); \ (o)->mul((uint64_t *)(void *)(c)->gcm_ghash, (c)->gcm_H, \ (uint64_t *)(void *)(t)); /* Select GCM implementation */ #define IMPL_FASTEST (UINT32_MAX) #define IMPL_CYCLE (UINT32_MAX-1) #ifdef CAN_USE_GCM_ASM #define IMPL_AVX (UINT32_MAX-2) #endif #define GCM_IMPL_READ(i) (*(volatile uint32_t *) &(i)) static uint32_t icp_gcm_impl = IMPL_FASTEST; static uint32_t user_sel_impl = IMPL_FASTEST; #ifdef CAN_USE_GCM_ASM /* Does the architecture we run on support the MOVBE instruction? */ boolean_t gcm_avx_can_use_movbe = B_FALSE; /* * Whether to use the optimized openssl gcm and ghash implementations. * Set to true if module parameter icp_gcm_impl == "avx". */ static boolean_t gcm_use_avx = B_FALSE; #define GCM_IMPL_USE_AVX (*(volatile boolean_t *)&gcm_use_avx) extern boolean_t atomic_toggle_boolean_nv(volatile boolean_t *); static inline boolean_t gcm_avx_will_work(void); static inline void gcm_set_avx(boolean_t); static inline boolean_t gcm_toggle_avx(void); static inline size_t gcm_simd_get_htab_size(boolean_t); static int gcm_mode_encrypt_contiguous_blocks_avx(gcm_ctx_t *, char *, size_t, crypto_data_t *, size_t); static int gcm_encrypt_final_avx(gcm_ctx_t *, crypto_data_t *, size_t); static int gcm_decrypt_final_avx(gcm_ctx_t *, crypto_data_t *, size_t); static int gcm_init_avx(gcm_ctx_t *, unsigned char *, size_t, unsigned char *, size_t, size_t); #endif /* ifdef CAN_USE_GCM_ASM */ /* * Encrypt multiple blocks of data in GCM mode. Decrypt for GCM mode * is done in another function. */ int gcm_mode_encrypt_contiguous_blocks(gcm_ctx_t *ctx, char *data, size_t length, crypto_data_t *out, size_t block_size, int (*encrypt_block)(const void *, const uint8_t *, uint8_t *), void (*copy_block)(uint8_t *, uint8_t *), void (*xor_block)(uint8_t *, uint8_t *)) { #ifdef CAN_USE_GCM_ASM if (ctx->gcm_use_avx == B_TRUE) return (gcm_mode_encrypt_contiguous_blocks_avx( ctx, data, length, out, block_size)); #endif const gcm_impl_ops_t *gops; size_t remainder = length; size_t need = 0; uint8_t *datap = (uint8_t *)data; uint8_t *blockp; uint8_t *lastp; void *iov_or_mp; offset_t offset; uint8_t *out_data_1; uint8_t *out_data_2; size_t out_data_1_len; uint64_t counter; uint64_t counter_mask = ntohll(0x00000000ffffffffULL); if (length + ctx->gcm_remainder_len < block_size) { /* accumulate bytes here and return */ bcopy(datap, (uint8_t *)ctx->gcm_remainder + ctx->gcm_remainder_len, length); ctx->gcm_remainder_len += length; if (ctx->gcm_copy_to == NULL) { ctx->gcm_copy_to = datap; } return (CRYPTO_SUCCESS); } lastp = (uint8_t *)ctx->gcm_cb; crypto_init_ptrs(out, &iov_or_mp, &offset); gops = gcm_impl_get_ops(); do { /* Unprocessed data from last call. */ if (ctx->gcm_remainder_len > 0) { need = block_size - ctx->gcm_remainder_len; if (need > remainder) return (CRYPTO_DATA_LEN_RANGE); bcopy(datap, &((uint8_t *)ctx->gcm_remainder) [ctx->gcm_remainder_len], need); blockp = (uint8_t *)ctx->gcm_remainder; } else { blockp = datap; } /* * Increment counter. Counter bits are confined * to the bottom 32 bits of the counter block. */ counter = ntohll(ctx->gcm_cb[1] & counter_mask); counter = htonll(counter + 1); counter &= counter_mask; ctx->gcm_cb[1] = (ctx->gcm_cb[1] & ~counter_mask) | counter; encrypt_block(ctx->gcm_keysched, (uint8_t *)ctx->gcm_cb, (uint8_t *)ctx->gcm_tmp); xor_block(blockp, (uint8_t *)ctx->gcm_tmp); lastp = (uint8_t *)ctx->gcm_tmp; ctx->gcm_processed_data_len += block_size; crypto_get_ptrs(out, &iov_or_mp, &offset, &out_data_1, &out_data_1_len, &out_data_2, block_size); /* copy block to where it belongs */ if (out_data_1_len == block_size) { copy_block(lastp, out_data_1); } else { bcopy(lastp, out_data_1, out_data_1_len); if (out_data_2 != NULL) { bcopy(lastp + out_data_1_len, out_data_2, block_size - out_data_1_len); } } /* update offset */ out->cd_offset += block_size; /* add ciphertext to the hash */ GHASH(ctx, ctx->gcm_tmp, ctx->gcm_ghash, gops); /* Update pointer to next block of data to be processed. */ if (ctx->gcm_remainder_len != 0) { datap += need; ctx->gcm_remainder_len = 0; } else { datap += block_size; } remainder = (size_t)&data[length] - (size_t)datap; /* Incomplete last block. */ if (remainder > 0 && remainder < block_size) { bcopy(datap, ctx->gcm_remainder, remainder); ctx->gcm_remainder_len = remainder; ctx->gcm_copy_to = datap; goto out; } ctx->gcm_copy_to = NULL; } while (remainder > 0); out: return (CRYPTO_SUCCESS); } -/* ARGSUSED */ int gcm_encrypt_final(gcm_ctx_t *ctx, crypto_data_t *out, size_t block_size, int (*encrypt_block)(const void *, const uint8_t *, uint8_t *), void (*copy_block)(uint8_t *, uint8_t *), void (*xor_block)(uint8_t *, uint8_t *)) { + (void) copy_block; #ifdef CAN_USE_GCM_ASM if (ctx->gcm_use_avx == B_TRUE) return (gcm_encrypt_final_avx(ctx, out, block_size)); #endif const gcm_impl_ops_t *gops; uint64_t counter_mask = ntohll(0x00000000ffffffffULL); uint8_t *ghash, *macp = NULL; int i, rv; if (out->cd_length < (ctx->gcm_remainder_len + ctx->gcm_tag_len)) { return (CRYPTO_DATA_LEN_RANGE); } gops = gcm_impl_get_ops(); ghash = (uint8_t *)ctx->gcm_ghash; if (ctx->gcm_remainder_len > 0) { uint64_t counter; uint8_t *tmpp = (uint8_t *)ctx->gcm_tmp; /* * Here is where we deal with data that is not a * multiple of the block size. */ /* * Increment counter. */ counter = ntohll(ctx->gcm_cb[1] & counter_mask); counter = htonll(counter + 1); counter &= counter_mask; ctx->gcm_cb[1] = (ctx->gcm_cb[1] & ~counter_mask) | counter; encrypt_block(ctx->gcm_keysched, (uint8_t *)ctx->gcm_cb, (uint8_t *)ctx->gcm_tmp); macp = (uint8_t *)ctx->gcm_remainder; bzero(macp + ctx->gcm_remainder_len, block_size - ctx->gcm_remainder_len); /* XOR with counter block */ for (i = 0; i < ctx->gcm_remainder_len; i++) { macp[i] ^= tmpp[i]; } /* add ciphertext to the hash */ GHASH(ctx, macp, ghash, gops); ctx->gcm_processed_data_len += ctx->gcm_remainder_len; } ctx->gcm_len_a_len_c[1] = htonll(CRYPTO_BYTES2BITS(ctx->gcm_processed_data_len)); GHASH(ctx, ctx->gcm_len_a_len_c, ghash, gops); encrypt_block(ctx->gcm_keysched, (uint8_t *)ctx->gcm_J0, (uint8_t *)ctx->gcm_J0); xor_block((uint8_t *)ctx->gcm_J0, ghash); if (ctx->gcm_remainder_len > 0) { rv = crypto_put_output_data(macp, out, ctx->gcm_remainder_len); if (rv != CRYPTO_SUCCESS) return (rv); } out->cd_offset += ctx->gcm_remainder_len; ctx->gcm_remainder_len = 0; rv = crypto_put_output_data(ghash, out, ctx->gcm_tag_len); if (rv != CRYPTO_SUCCESS) return (rv); out->cd_offset += ctx->gcm_tag_len; return (CRYPTO_SUCCESS); } /* * This will only deal with decrypting the last block of the input that * might not be a multiple of block length. */ static void gcm_decrypt_incomplete_block(gcm_ctx_t *ctx, size_t block_size, size_t index, int (*encrypt_block)(const void *, const uint8_t *, uint8_t *), void (*xor_block)(uint8_t *, uint8_t *)) { uint8_t *datap, *outp, *counterp; uint64_t counter; uint64_t counter_mask = ntohll(0x00000000ffffffffULL); int i; /* * Increment counter. * Counter bits are confined to the bottom 32 bits */ counter = ntohll(ctx->gcm_cb[1] & counter_mask); counter = htonll(counter + 1); counter &= counter_mask; ctx->gcm_cb[1] = (ctx->gcm_cb[1] & ~counter_mask) | counter; datap = (uint8_t *)ctx->gcm_remainder; outp = &((ctx->gcm_pt_buf)[index]); counterp = (uint8_t *)ctx->gcm_tmp; /* authentication tag */ bzero((uint8_t *)ctx->gcm_tmp, block_size); bcopy(datap, (uint8_t *)ctx->gcm_tmp, ctx->gcm_remainder_len); /* add ciphertext to the hash */ GHASH(ctx, ctx->gcm_tmp, ctx->gcm_ghash, gcm_impl_get_ops()); /* decrypt remaining ciphertext */ encrypt_block(ctx->gcm_keysched, (uint8_t *)ctx->gcm_cb, counterp); /* XOR with counter block */ for (i = 0; i < ctx->gcm_remainder_len; i++) { outp[i] = datap[i] ^ counterp[i]; } } -/* ARGSUSED */ int gcm_mode_decrypt_contiguous_blocks(gcm_ctx_t *ctx, char *data, size_t length, crypto_data_t *out, size_t block_size, int (*encrypt_block)(const void *, const uint8_t *, uint8_t *), void (*copy_block)(uint8_t *, uint8_t *), void (*xor_block)(uint8_t *, uint8_t *)) { + (void) out, (void) block_size, (void) encrypt_block, (void) copy_block, + (void) xor_block; size_t new_len; uint8_t *new; /* * Copy contiguous ciphertext input blocks to plaintext buffer. * Ciphertext will be decrypted in the final. */ if (length > 0) { new_len = ctx->gcm_pt_buf_len + length; new = vmem_alloc(new_len, ctx->gcm_kmflag); if (new == NULL) { vmem_free(ctx->gcm_pt_buf, ctx->gcm_pt_buf_len); ctx->gcm_pt_buf = NULL; return (CRYPTO_HOST_MEMORY); } bcopy(ctx->gcm_pt_buf, new, ctx->gcm_pt_buf_len); vmem_free(ctx->gcm_pt_buf, ctx->gcm_pt_buf_len); ctx->gcm_pt_buf = new; ctx->gcm_pt_buf_len = new_len; bcopy(data, &ctx->gcm_pt_buf[ctx->gcm_processed_data_len], length); ctx->gcm_processed_data_len += length; } ctx->gcm_remainder_len = 0; return (CRYPTO_SUCCESS); } int gcm_decrypt_final(gcm_ctx_t *ctx, crypto_data_t *out, size_t block_size, int (*encrypt_block)(const void *, const uint8_t *, uint8_t *), void (*xor_block)(uint8_t *, uint8_t *)) { #ifdef CAN_USE_GCM_ASM if (ctx->gcm_use_avx == B_TRUE) return (gcm_decrypt_final_avx(ctx, out, block_size)); #endif const gcm_impl_ops_t *gops; size_t pt_len; size_t remainder; uint8_t *ghash; uint8_t *blockp; uint8_t *cbp; uint64_t counter; uint64_t counter_mask = ntohll(0x00000000ffffffffULL); int processed = 0, rv; ASSERT(ctx->gcm_processed_data_len == ctx->gcm_pt_buf_len); gops = gcm_impl_get_ops(); pt_len = ctx->gcm_processed_data_len - ctx->gcm_tag_len; ghash = (uint8_t *)ctx->gcm_ghash; blockp = ctx->gcm_pt_buf; remainder = pt_len; while (remainder > 0) { /* Incomplete last block */ if (remainder < block_size) { bcopy(blockp, ctx->gcm_remainder, remainder); ctx->gcm_remainder_len = remainder; /* * not expecting anymore ciphertext, just * compute plaintext for the remaining input */ gcm_decrypt_incomplete_block(ctx, block_size, processed, encrypt_block, xor_block); ctx->gcm_remainder_len = 0; goto out; } /* add ciphertext to the hash */ GHASH(ctx, blockp, ghash, gops); /* * Increment counter. * Counter bits are confined to the bottom 32 bits */ counter = ntohll(ctx->gcm_cb[1] & counter_mask); counter = htonll(counter + 1); counter &= counter_mask; ctx->gcm_cb[1] = (ctx->gcm_cb[1] & ~counter_mask) | counter; cbp = (uint8_t *)ctx->gcm_tmp; encrypt_block(ctx->gcm_keysched, (uint8_t *)ctx->gcm_cb, cbp); /* XOR with ciphertext */ xor_block(cbp, blockp); processed += block_size; blockp += block_size; remainder -= block_size; } out: ctx->gcm_len_a_len_c[1] = htonll(CRYPTO_BYTES2BITS(pt_len)); GHASH(ctx, ctx->gcm_len_a_len_c, ghash, gops); encrypt_block(ctx->gcm_keysched, (uint8_t *)ctx->gcm_J0, (uint8_t *)ctx->gcm_J0); xor_block((uint8_t *)ctx->gcm_J0, ghash); /* compare the input authentication tag with what we calculated */ if (bcmp(&ctx->gcm_pt_buf[pt_len], ghash, ctx->gcm_tag_len)) { /* They don't match */ return (CRYPTO_INVALID_MAC); } else { rv = crypto_put_output_data(ctx->gcm_pt_buf, out, pt_len); if (rv != CRYPTO_SUCCESS) return (rv); out->cd_offset += pt_len; } return (CRYPTO_SUCCESS); } static int gcm_validate_args(CK_AES_GCM_PARAMS *gcm_param) { size_t tag_len; /* * Check the length of the authentication tag (in bits). */ tag_len = gcm_param->ulTagBits; switch (tag_len) { case 32: case 64: case 96: case 104: case 112: case 120: case 128: break; default: return (CRYPTO_MECHANISM_PARAM_INVALID); } if (gcm_param->ulIvLen == 0) return (CRYPTO_MECHANISM_PARAM_INVALID); return (CRYPTO_SUCCESS); } static void gcm_format_initial_blocks(uchar_t *iv, ulong_t iv_len, gcm_ctx_t *ctx, size_t block_size, void (*copy_block)(uint8_t *, uint8_t *), void (*xor_block)(uint8_t *, uint8_t *)) { const gcm_impl_ops_t *gops; uint8_t *cb; ulong_t remainder = iv_len; ulong_t processed = 0; uint8_t *datap, *ghash; uint64_t len_a_len_c[2]; gops = gcm_impl_get_ops(); ghash = (uint8_t *)ctx->gcm_ghash; cb = (uint8_t *)ctx->gcm_cb; if (iv_len == 12) { bcopy(iv, cb, 12); cb[12] = 0; cb[13] = 0; cb[14] = 0; cb[15] = 1; /* J0 will be used again in the final */ copy_block(cb, (uint8_t *)ctx->gcm_J0); } else { /* GHASH the IV */ do { if (remainder < block_size) { bzero(cb, block_size); bcopy(&(iv[processed]), cb, remainder); datap = (uint8_t *)cb; remainder = 0; } else { datap = (uint8_t *)(&(iv[processed])); processed += block_size; remainder -= block_size; } GHASH(ctx, datap, ghash, gops); } while (remainder > 0); len_a_len_c[0] = 0; len_a_len_c[1] = htonll(CRYPTO_BYTES2BITS(iv_len)); GHASH(ctx, len_a_len_c, ctx->gcm_J0, gops); /* J0 will be used again in the final */ copy_block((uint8_t *)ctx->gcm_J0, (uint8_t *)cb); } } static int gcm_init(gcm_ctx_t *ctx, unsigned char *iv, size_t iv_len, unsigned char *auth_data, size_t auth_data_len, size_t block_size, int (*encrypt_block)(const void *, const uint8_t *, uint8_t *), void (*copy_block)(uint8_t *, uint8_t *), void (*xor_block)(uint8_t *, uint8_t *)) { const gcm_impl_ops_t *gops; uint8_t *ghash, *datap, *authp; size_t remainder, processed; /* encrypt zero block to get subkey H */ bzero(ctx->gcm_H, sizeof (ctx->gcm_H)); encrypt_block(ctx->gcm_keysched, (uint8_t *)ctx->gcm_H, (uint8_t *)ctx->gcm_H); gcm_format_initial_blocks(iv, iv_len, ctx, block_size, copy_block, xor_block); gops = gcm_impl_get_ops(); authp = (uint8_t *)ctx->gcm_tmp; ghash = (uint8_t *)ctx->gcm_ghash; bzero(authp, block_size); bzero(ghash, block_size); processed = 0; remainder = auth_data_len; do { if (remainder < block_size) { /* * There's not a block full of data, pad rest of * buffer with zero */ bzero(authp, block_size); bcopy(&(auth_data[processed]), authp, remainder); datap = (uint8_t *)authp; remainder = 0; } else { datap = (uint8_t *)(&(auth_data[processed])); processed += block_size; remainder -= block_size; } /* add auth data to the hash */ GHASH(ctx, datap, ghash, gops); } while (remainder > 0); return (CRYPTO_SUCCESS); } /* * The following function is called at encrypt or decrypt init time * for AES GCM mode. * * Init the GCM context struct. Handle the cycle and avx implementations here. */ int gcm_init_ctx(gcm_ctx_t *gcm_ctx, char *param, size_t block_size, int (*encrypt_block)(const void *, const uint8_t *, uint8_t *), void (*copy_block)(uint8_t *, uint8_t *), void (*xor_block)(uint8_t *, uint8_t *)) { int rv; CK_AES_GCM_PARAMS *gcm_param; if (param != NULL) { gcm_param = (CK_AES_GCM_PARAMS *)(void *)param; if ((rv = gcm_validate_args(gcm_param)) != 0) { return (rv); } gcm_ctx->gcm_tag_len = gcm_param->ulTagBits; gcm_ctx->gcm_tag_len >>= 3; gcm_ctx->gcm_processed_data_len = 0; /* these values are in bits */ gcm_ctx->gcm_len_a_len_c[0] = htonll(CRYPTO_BYTES2BITS(gcm_param->ulAADLen)); rv = CRYPTO_SUCCESS; gcm_ctx->gcm_flags |= GCM_MODE; } else { return (CRYPTO_MECHANISM_PARAM_INVALID); } #ifdef CAN_USE_GCM_ASM if (GCM_IMPL_READ(icp_gcm_impl) != IMPL_CYCLE) { gcm_ctx->gcm_use_avx = GCM_IMPL_USE_AVX; } else { /* * Handle the "cycle" implementation by creating avx and * non-avx contexts alternately. */ gcm_ctx->gcm_use_avx = gcm_toggle_avx(); /* * We don't handle byte swapped key schedules in the avx * code path. */ aes_key_t *ks = (aes_key_t *)gcm_ctx->gcm_keysched; if (ks->ops->needs_byteswap == B_TRUE) { gcm_ctx->gcm_use_avx = B_FALSE; } /* Use the MOVBE and the BSWAP variants alternately. */ if (gcm_ctx->gcm_use_avx == B_TRUE && zfs_movbe_available() == B_TRUE) { (void) atomic_toggle_boolean_nv( (volatile boolean_t *)&gcm_avx_can_use_movbe); } } /* Allocate Htab memory as needed. */ if (gcm_ctx->gcm_use_avx == B_TRUE) { size_t htab_len = gcm_simd_get_htab_size(gcm_ctx->gcm_use_avx); if (htab_len == 0) { return (CRYPTO_MECHANISM_PARAM_INVALID); } gcm_ctx->gcm_htab_len = htab_len; gcm_ctx->gcm_Htable = (uint64_t *)kmem_alloc(htab_len, gcm_ctx->gcm_kmflag); if (gcm_ctx->gcm_Htable == NULL) { return (CRYPTO_HOST_MEMORY); } } /* Avx and non avx context initialization differs from here on. */ if (gcm_ctx->gcm_use_avx == B_FALSE) { #endif /* ifdef CAN_USE_GCM_ASM */ if (gcm_init(gcm_ctx, gcm_param->pIv, gcm_param->ulIvLen, gcm_param->pAAD, gcm_param->ulAADLen, block_size, encrypt_block, copy_block, xor_block) != 0) { rv = CRYPTO_MECHANISM_PARAM_INVALID; } #ifdef CAN_USE_GCM_ASM } else { if (gcm_init_avx(gcm_ctx, gcm_param->pIv, gcm_param->ulIvLen, gcm_param->pAAD, gcm_param->ulAADLen, block_size) != 0) { rv = CRYPTO_MECHANISM_PARAM_INVALID; } } #endif /* ifdef CAN_USE_GCM_ASM */ return (rv); } int gmac_init_ctx(gcm_ctx_t *gcm_ctx, char *param, size_t block_size, int (*encrypt_block)(const void *, const uint8_t *, uint8_t *), void (*copy_block)(uint8_t *, uint8_t *), void (*xor_block)(uint8_t *, uint8_t *)) { int rv; CK_AES_GMAC_PARAMS *gmac_param; if (param != NULL) { gmac_param = (CK_AES_GMAC_PARAMS *)(void *)param; gcm_ctx->gcm_tag_len = CRYPTO_BITS2BYTES(AES_GMAC_TAG_BITS); gcm_ctx->gcm_processed_data_len = 0; /* these values are in bits */ gcm_ctx->gcm_len_a_len_c[0] = htonll(CRYPTO_BYTES2BITS(gmac_param->ulAADLen)); rv = CRYPTO_SUCCESS; gcm_ctx->gcm_flags |= GMAC_MODE; } else { return (CRYPTO_MECHANISM_PARAM_INVALID); } #ifdef CAN_USE_GCM_ASM /* * Handle the "cycle" implementation by creating avx and non avx * contexts alternately. */ if (GCM_IMPL_READ(icp_gcm_impl) != IMPL_CYCLE) { gcm_ctx->gcm_use_avx = GCM_IMPL_USE_AVX; } else { gcm_ctx->gcm_use_avx = gcm_toggle_avx(); } /* We don't handle byte swapped key schedules in the avx code path. */ aes_key_t *ks = (aes_key_t *)gcm_ctx->gcm_keysched; if (ks->ops->needs_byteswap == B_TRUE) { gcm_ctx->gcm_use_avx = B_FALSE; } /* Allocate Htab memory as needed. */ if (gcm_ctx->gcm_use_avx == B_TRUE) { size_t htab_len = gcm_simd_get_htab_size(gcm_ctx->gcm_use_avx); if (htab_len == 0) { return (CRYPTO_MECHANISM_PARAM_INVALID); } gcm_ctx->gcm_htab_len = htab_len; gcm_ctx->gcm_Htable = (uint64_t *)kmem_alloc(htab_len, gcm_ctx->gcm_kmflag); if (gcm_ctx->gcm_Htable == NULL) { return (CRYPTO_HOST_MEMORY); } } /* Avx and non avx context initialization differs from here on. */ if (gcm_ctx->gcm_use_avx == B_FALSE) { #endif /* ifdef CAN_USE_GCM_ASM */ if (gcm_init(gcm_ctx, gmac_param->pIv, AES_GMAC_IV_LEN, gmac_param->pAAD, gmac_param->ulAADLen, block_size, encrypt_block, copy_block, xor_block) != 0) { rv = CRYPTO_MECHANISM_PARAM_INVALID; } #ifdef CAN_USE_GCM_ASM } else { if (gcm_init_avx(gcm_ctx, gmac_param->pIv, AES_GMAC_IV_LEN, gmac_param->pAAD, gmac_param->ulAADLen, block_size) != 0) { rv = CRYPTO_MECHANISM_PARAM_INVALID; } } #endif /* ifdef CAN_USE_GCM_ASM */ return (rv); } void * gcm_alloc_ctx(int kmflag) { gcm_ctx_t *gcm_ctx; if ((gcm_ctx = kmem_zalloc(sizeof (gcm_ctx_t), kmflag)) == NULL) return (NULL); gcm_ctx->gcm_flags = GCM_MODE; return (gcm_ctx); } void * gmac_alloc_ctx(int kmflag) { gcm_ctx_t *gcm_ctx; if ((gcm_ctx = kmem_zalloc(sizeof (gcm_ctx_t), kmflag)) == NULL) return (NULL); gcm_ctx->gcm_flags = GMAC_MODE; return (gcm_ctx); } void gcm_set_kmflag(gcm_ctx_t *ctx, int kmflag) { ctx->gcm_kmflag = kmflag; } /* GCM implementation that contains the fastest methods */ static gcm_impl_ops_t gcm_fastest_impl = { .name = "fastest" }; /* All compiled in implementations */ const gcm_impl_ops_t *gcm_all_impl[] = { &gcm_generic_impl, #if defined(__x86_64) && defined(HAVE_PCLMULQDQ) &gcm_pclmulqdq_impl, #endif }; /* Indicate that benchmark has been completed */ static boolean_t gcm_impl_initialized = B_FALSE; /* Hold all supported implementations */ static size_t gcm_supp_impl_cnt = 0; static gcm_impl_ops_t *gcm_supp_impl[ARRAY_SIZE(gcm_all_impl)]; /* * Returns the GCM operations for encrypt/decrypt/key setup. When a * SIMD implementation is not allowed in the current context, then * fallback to the fastest generic implementation. */ const gcm_impl_ops_t * gcm_impl_get_ops() { if (!kfpu_allowed()) return (&gcm_generic_impl); const gcm_impl_ops_t *ops = NULL; const uint32_t impl = GCM_IMPL_READ(icp_gcm_impl); switch (impl) { case IMPL_FASTEST: ASSERT(gcm_impl_initialized); ops = &gcm_fastest_impl; break; case IMPL_CYCLE: /* Cycle through supported implementations */ ASSERT(gcm_impl_initialized); ASSERT3U(gcm_supp_impl_cnt, >, 0); static size_t cycle_impl_idx = 0; size_t idx = (++cycle_impl_idx) % gcm_supp_impl_cnt; ops = gcm_supp_impl[idx]; break; #ifdef CAN_USE_GCM_ASM case IMPL_AVX: /* * Make sure that we return a valid implementation while * switching to the avx implementation since there still * may be unfinished non-avx contexts around. */ ops = &gcm_generic_impl; break; #endif default: ASSERT3U(impl, <, gcm_supp_impl_cnt); ASSERT3U(gcm_supp_impl_cnt, >, 0); if (impl < ARRAY_SIZE(gcm_all_impl)) ops = gcm_supp_impl[impl]; break; } ASSERT3P(ops, !=, NULL); return (ops); } /* * Initialize all supported implementations. */ void gcm_impl_init(void) { gcm_impl_ops_t *curr_impl; int i, c; /* Move supported implementations into gcm_supp_impls */ for (i = 0, c = 0; i < ARRAY_SIZE(gcm_all_impl); i++) { curr_impl = (gcm_impl_ops_t *)gcm_all_impl[i]; if (curr_impl->is_supported()) gcm_supp_impl[c++] = (gcm_impl_ops_t *)curr_impl; } gcm_supp_impl_cnt = c; /* * Set the fastest implementation given the assumption that the * hardware accelerated version is the fastest. */ #if defined(__x86_64) && defined(HAVE_PCLMULQDQ) if (gcm_pclmulqdq_impl.is_supported()) { memcpy(&gcm_fastest_impl, &gcm_pclmulqdq_impl, sizeof (gcm_fastest_impl)); } else #endif { memcpy(&gcm_fastest_impl, &gcm_generic_impl, sizeof (gcm_fastest_impl)); } strlcpy(gcm_fastest_impl.name, "fastest", GCM_IMPL_NAME_MAX); #ifdef CAN_USE_GCM_ASM /* * Use the avx implementation if it's available and the implementation * hasn't changed from its default value of fastest on module load. */ if (gcm_avx_will_work()) { #ifdef HAVE_MOVBE if (zfs_movbe_available() == B_TRUE) { atomic_swap_32(&gcm_avx_can_use_movbe, B_TRUE); } #endif if (GCM_IMPL_READ(user_sel_impl) == IMPL_FASTEST) { gcm_set_avx(B_TRUE); } } #endif /* Finish initialization */ atomic_swap_32(&icp_gcm_impl, user_sel_impl); gcm_impl_initialized = B_TRUE; } static const struct { char *name; uint32_t sel; } gcm_impl_opts[] = { { "cycle", IMPL_CYCLE }, { "fastest", IMPL_FASTEST }, #ifdef CAN_USE_GCM_ASM { "avx", IMPL_AVX }, #endif }; /* * Function sets desired gcm implementation. * * If we are called before init(), user preference will be saved in * user_sel_impl, and applied in later init() call. This occurs when module * parameter is specified on module load. Otherwise, directly update * icp_gcm_impl. * * @val Name of gcm implementation to use * @param Unused. */ int gcm_impl_set(const char *val) { int err = -EINVAL; char req_name[GCM_IMPL_NAME_MAX]; uint32_t impl = GCM_IMPL_READ(user_sel_impl); size_t i; /* sanitize input */ i = strnlen(val, GCM_IMPL_NAME_MAX); if (i == 0 || i >= GCM_IMPL_NAME_MAX) return (err); strlcpy(req_name, val, GCM_IMPL_NAME_MAX); while (i > 0 && isspace(req_name[i-1])) i--; req_name[i] = '\0'; /* Check mandatory options */ for (i = 0; i < ARRAY_SIZE(gcm_impl_opts); i++) { #ifdef CAN_USE_GCM_ASM /* Ignore avx implementation if it won't work. */ if (gcm_impl_opts[i].sel == IMPL_AVX && !gcm_avx_will_work()) { continue; } #endif if (strcmp(req_name, gcm_impl_opts[i].name) == 0) { impl = gcm_impl_opts[i].sel; err = 0; break; } } /* check all supported impl if init() was already called */ if (err != 0 && gcm_impl_initialized) { /* check all supported implementations */ for (i = 0; i < gcm_supp_impl_cnt; i++) { if (strcmp(req_name, gcm_supp_impl[i]->name) == 0) { impl = i; err = 0; break; } } } #ifdef CAN_USE_GCM_ASM /* * Use the avx implementation if available and the requested one is * avx or fastest. */ if (gcm_avx_will_work() == B_TRUE && (impl == IMPL_AVX || impl == IMPL_FASTEST)) { gcm_set_avx(B_TRUE); } else { gcm_set_avx(B_FALSE); } #endif if (err == 0) { if (gcm_impl_initialized) atomic_swap_32(&icp_gcm_impl, impl); else atomic_swap_32(&user_sel_impl, impl); } return (err); } #if defined(_KERNEL) && defined(__linux__) static int icp_gcm_impl_set(const char *val, zfs_kernel_param_t *kp) { return (gcm_impl_set(val)); } static int icp_gcm_impl_get(char *buffer, zfs_kernel_param_t *kp) { int i, cnt = 0; char *fmt; const uint32_t impl = GCM_IMPL_READ(icp_gcm_impl); ASSERT(gcm_impl_initialized); /* list mandatory options */ for (i = 0; i < ARRAY_SIZE(gcm_impl_opts); i++) { #ifdef CAN_USE_GCM_ASM /* Ignore avx implementation if it won't work. */ if (gcm_impl_opts[i].sel == IMPL_AVX && !gcm_avx_will_work()) { continue; } #endif fmt = (impl == gcm_impl_opts[i].sel) ? "[%s] " : "%s "; cnt += sprintf(buffer + cnt, fmt, gcm_impl_opts[i].name); } /* list all supported implementations */ for (i = 0; i < gcm_supp_impl_cnt; i++) { fmt = (i == impl) ? "[%s] " : "%s "; cnt += sprintf(buffer + cnt, fmt, gcm_supp_impl[i]->name); } return (cnt); } module_param_call(icp_gcm_impl, icp_gcm_impl_set, icp_gcm_impl_get, NULL, 0644); MODULE_PARM_DESC(icp_gcm_impl, "Select gcm implementation."); #endif /* defined(__KERNEL) */ #ifdef CAN_USE_GCM_ASM #define GCM_BLOCK_LEN 16 /* * The openssl asm routines are 6x aggregated and need that many bytes * at minimum. */ #define GCM_AVX_MIN_DECRYPT_BYTES (GCM_BLOCK_LEN * 6) #define GCM_AVX_MIN_ENCRYPT_BYTES (GCM_BLOCK_LEN * 6 * 3) /* * Ensure the chunk size is reasonable since we are allocating a * GCM_AVX_MAX_CHUNK_SIZEd buffer and disabling preemption and interrupts. */ #define GCM_AVX_MAX_CHUNK_SIZE \ (((128*1024)/GCM_AVX_MIN_DECRYPT_BYTES) * GCM_AVX_MIN_DECRYPT_BYTES) /* Get the chunk size module parameter. */ #define GCM_CHUNK_SIZE_READ *(volatile uint32_t *) &gcm_avx_chunk_size /* Clear the FPU registers since they hold sensitive internal state. */ #define clear_fpu_regs() clear_fpu_regs_avx() #define GHASH_AVX(ctx, in, len) \ gcm_ghash_avx((ctx)->gcm_ghash, (const uint64_t *)(ctx)->gcm_Htable, \ in, len) #define gcm_incr_counter_block(ctx) gcm_incr_counter_block_by(ctx, 1) /* * Module parameter: number of bytes to process at once while owning the FPU. * Rounded down to the next GCM_AVX_MIN_DECRYPT_BYTES byte boundary and is * ensured to be greater or equal than GCM_AVX_MIN_DECRYPT_BYTES. */ static uint32_t gcm_avx_chunk_size = ((32 * 1024) / GCM_AVX_MIN_DECRYPT_BYTES) * GCM_AVX_MIN_DECRYPT_BYTES; extern void clear_fpu_regs_avx(void); extern void gcm_xor_avx(const uint8_t *src, uint8_t *dst); extern void aes_encrypt_intel(const uint32_t rk[], int nr, const uint32_t pt[4], uint32_t ct[4]); extern void gcm_init_htab_avx(uint64_t *Htable, const uint64_t H[2]); extern void gcm_ghash_avx(uint64_t ghash[2], const uint64_t *Htable, const uint8_t *in, size_t len); extern size_t aesni_gcm_encrypt(const uint8_t *, uint8_t *, size_t, const void *, uint64_t *, uint64_t *); extern size_t aesni_gcm_decrypt(const uint8_t *, uint8_t *, size_t, const void *, uint64_t *, uint64_t *); static inline boolean_t gcm_avx_will_work(void) { /* Avx should imply aes-ni and pclmulqdq, but make sure anyhow. */ return (kfpu_allowed() && zfs_avx_available() && zfs_aes_available() && zfs_pclmulqdq_available()); } static inline void gcm_set_avx(boolean_t val) { if (gcm_avx_will_work() == B_TRUE) { atomic_swap_32(&gcm_use_avx, val); } } static inline boolean_t gcm_toggle_avx(void) { if (gcm_avx_will_work() == B_TRUE) { return (atomic_toggle_boolean_nv(&GCM_IMPL_USE_AVX)); } else { return (B_FALSE); } } static inline size_t gcm_simd_get_htab_size(boolean_t simd_mode) { switch (simd_mode) { case B_TRUE: return (2 * 6 * 2 * sizeof (uint64_t)); default: return (0); } } /* * Clear sensitive data in the context. * * ctx->gcm_remainder may contain a plaintext remainder. ctx->gcm_H and * ctx->gcm_Htable contain the hash sub key which protects authentication. * * Although extremely unlikely, ctx->gcm_J0 and ctx->gcm_tmp could be used for * a known plaintext attack, they consists of the IV and the first and last * counter respectively. If they should be cleared is debatable. */ static inline void gcm_clear_ctx(gcm_ctx_t *ctx) { bzero(ctx->gcm_remainder, sizeof (ctx->gcm_remainder)); bzero(ctx->gcm_H, sizeof (ctx->gcm_H)); bzero(ctx->gcm_J0, sizeof (ctx->gcm_J0)); bzero(ctx->gcm_tmp, sizeof (ctx->gcm_tmp)); } /* Increment the GCM counter block by n. */ static inline void gcm_incr_counter_block_by(gcm_ctx_t *ctx, int n) { uint64_t counter_mask = ntohll(0x00000000ffffffffULL); uint64_t counter = ntohll(ctx->gcm_cb[1] & counter_mask); counter = htonll(counter + n); counter &= counter_mask; ctx->gcm_cb[1] = (ctx->gcm_cb[1] & ~counter_mask) | counter; } /* * Encrypt multiple blocks of data in GCM mode. * This is done in gcm_avx_chunk_size chunks, utilizing AVX assembler routines * if possible. While processing a chunk the FPU is "locked". */ static int gcm_mode_encrypt_contiguous_blocks_avx(gcm_ctx_t *ctx, char *data, size_t length, crypto_data_t *out, size_t block_size) { size_t bleft = length; size_t need = 0; size_t done = 0; uint8_t *datap = (uint8_t *)data; size_t chunk_size = (size_t)GCM_CHUNK_SIZE_READ; const aes_key_t *key = ((aes_key_t *)ctx->gcm_keysched); uint64_t *ghash = ctx->gcm_ghash; uint64_t *cb = ctx->gcm_cb; uint8_t *ct_buf = NULL; uint8_t *tmp = (uint8_t *)ctx->gcm_tmp; int rv = CRYPTO_SUCCESS; ASSERT(block_size == GCM_BLOCK_LEN); /* * If the last call left an incomplete block, try to fill * it first. */ if (ctx->gcm_remainder_len > 0) { need = block_size - ctx->gcm_remainder_len; if (length < need) { /* Accumulate bytes here and return. */ bcopy(datap, (uint8_t *)ctx->gcm_remainder + ctx->gcm_remainder_len, length); ctx->gcm_remainder_len += length; if (ctx->gcm_copy_to == NULL) { ctx->gcm_copy_to = datap; } return (CRYPTO_SUCCESS); } else { /* Complete incomplete block. */ bcopy(datap, (uint8_t *)ctx->gcm_remainder + ctx->gcm_remainder_len, need); ctx->gcm_copy_to = NULL; } } /* Allocate a buffer to encrypt to if there is enough input. */ if (bleft >= GCM_AVX_MIN_ENCRYPT_BYTES) { ct_buf = vmem_alloc(chunk_size, ctx->gcm_kmflag); if (ct_buf == NULL) { return (CRYPTO_HOST_MEMORY); } } /* If we completed an incomplete block, encrypt and write it out. */ if (ctx->gcm_remainder_len > 0) { kfpu_begin(); aes_encrypt_intel(key->encr_ks.ks32, key->nr, (const uint32_t *)cb, (uint32_t *)tmp); gcm_xor_avx((const uint8_t *) ctx->gcm_remainder, tmp); GHASH_AVX(ctx, tmp, block_size); clear_fpu_regs(); kfpu_end(); rv = crypto_put_output_data(tmp, out, block_size); out->cd_offset += block_size; gcm_incr_counter_block(ctx); ctx->gcm_processed_data_len += block_size; bleft -= need; datap += need; ctx->gcm_remainder_len = 0; } /* Do the bulk encryption in chunk_size blocks. */ for (; bleft >= chunk_size; bleft -= chunk_size) { kfpu_begin(); done = aesni_gcm_encrypt( datap, ct_buf, chunk_size, key, cb, ghash); clear_fpu_regs(); kfpu_end(); if (done != chunk_size) { rv = CRYPTO_FAILED; goto out_nofpu; } rv = crypto_put_output_data(ct_buf, out, chunk_size); if (rv != CRYPTO_SUCCESS) { goto out_nofpu; } out->cd_offset += chunk_size; datap += chunk_size; ctx->gcm_processed_data_len += chunk_size; } /* Check if we are already done. */ if (bleft == 0) { goto out_nofpu; } /* Bulk encrypt the remaining data. */ kfpu_begin(); if (bleft >= GCM_AVX_MIN_ENCRYPT_BYTES) { done = aesni_gcm_encrypt(datap, ct_buf, bleft, key, cb, ghash); if (done == 0) { rv = CRYPTO_FAILED; goto out; } rv = crypto_put_output_data(ct_buf, out, done); if (rv != CRYPTO_SUCCESS) { goto out; } out->cd_offset += done; ctx->gcm_processed_data_len += done; datap += done; bleft -= done; } /* Less than GCM_AVX_MIN_ENCRYPT_BYTES remain, operate on blocks. */ while (bleft > 0) { if (bleft < block_size) { bcopy(datap, ctx->gcm_remainder, bleft); ctx->gcm_remainder_len = bleft; ctx->gcm_copy_to = datap; goto out; } /* Encrypt, hash and write out. */ aes_encrypt_intel(key->encr_ks.ks32, key->nr, (const uint32_t *)cb, (uint32_t *)tmp); gcm_xor_avx(datap, tmp); GHASH_AVX(ctx, tmp, block_size); rv = crypto_put_output_data(tmp, out, block_size); if (rv != CRYPTO_SUCCESS) { goto out; } out->cd_offset += block_size; gcm_incr_counter_block(ctx); ctx->gcm_processed_data_len += block_size; datap += block_size; bleft -= block_size; } out: clear_fpu_regs(); kfpu_end(); out_nofpu: if (ct_buf != NULL) { vmem_free(ct_buf, chunk_size); } return (rv); } /* * Finalize the encryption: Zero fill, encrypt, hash and write out an eventual * incomplete last block. Encrypt the ICB. Calculate the tag and write it out. */ static int gcm_encrypt_final_avx(gcm_ctx_t *ctx, crypto_data_t *out, size_t block_size) { uint8_t *ghash = (uint8_t *)ctx->gcm_ghash; uint32_t *J0 = (uint32_t *)ctx->gcm_J0; uint8_t *remainder = (uint8_t *)ctx->gcm_remainder; size_t rem_len = ctx->gcm_remainder_len; const void *keysched = ((aes_key_t *)ctx->gcm_keysched)->encr_ks.ks32; int aes_rounds = ((aes_key_t *)keysched)->nr; int rv; ASSERT(block_size == GCM_BLOCK_LEN); if (out->cd_length < (rem_len + ctx->gcm_tag_len)) { return (CRYPTO_DATA_LEN_RANGE); } kfpu_begin(); /* Pad last incomplete block with zeros, encrypt and hash. */ if (rem_len > 0) { uint8_t *tmp = (uint8_t *)ctx->gcm_tmp; const uint32_t *cb = (uint32_t *)ctx->gcm_cb; aes_encrypt_intel(keysched, aes_rounds, cb, (uint32_t *)tmp); bzero(remainder + rem_len, block_size - rem_len); for (int i = 0; i < rem_len; i++) { remainder[i] ^= tmp[i]; } GHASH_AVX(ctx, remainder, block_size); ctx->gcm_processed_data_len += rem_len; /* No need to increment counter_block, it's the last block. */ } /* Finish tag. */ ctx->gcm_len_a_len_c[1] = htonll(CRYPTO_BYTES2BITS(ctx->gcm_processed_data_len)); GHASH_AVX(ctx, (const uint8_t *)ctx->gcm_len_a_len_c, block_size); aes_encrypt_intel(keysched, aes_rounds, J0, J0); gcm_xor_avx((uint8_t *)J0, ghash); clear_fpu_regs(); kfpu_end(); /* Output remainder. */ if (rem_len > 0) { rv = crypto_put_output_data(remainder, out, rem_len); if (rv != CRYPTO_SUCCESS) return (rv); } out->cd_offset += rem_len; ctx->gcm_remainder_len = 0; rv = crypto_put_output_data(ghash, out, ctx->gcm_tag_len); if (rv != CRYPTO_SUCCESS) return (rv); out->cd_offset += ctx->gcm_tag_len; /* Clear sensitive data in the context before returning. */ gcm_clear_ctx(ctx); return (CRYPTO_SUCCESS); } /* * Finalize decryption: We just have accumulated crypto text, so now we * decrypt it here inplace. */ static int gcm_decrypt_final_avx(gcm_ctx_t *ctx, crypto_data_t *out, size_t block_size) { ASSERT3U(ctx->gcm_processed_data_len, ==, ctx->gcm_pt_buf_len); ASSERT3U(block_size, ==, 16); size_t chunk_size = (size_t)GCM_CHUNK_SIZE_READ; size_t pt_len = ctx->gcm_processed_data_len - ctx->gcm_tag_len; uint8_t *datap = ctx->gcm_pt_buf; const aes_key_t *key = ((aes_key_t *)ctx->gcm_keysched); uint32_t *cb = (uint32_t *)ctx->gcm_cb; uint64_t *ghash = ctx->gcm_ghash; uint32_t *tmp = (uint32_t *)ctx->gcm_tmp; int rv = CRYPTO_SUCCESS; size_t bleft, done; /* * Decrypt in chunks of gcm_avx_chunk_size, which is asserted to be * greater or equal than GCM_AVX_MIN_ENCRYPT_BYTES, and a multiple of * GCM_AVX_MIN_DECRYPT_BYTES. */ for (bleft = pt_len; bleft >= chunk_size; bleft -= chunk_size) { kfpu_begin(); done = aesni_gcm_decrypt(datap, datap, chunk_size, (const void *)key, ctx->gcm_cb, ghash); clear_fpu_regs(); kfpu_end(); if (done != chunk_size) { return (CRYPTO_FAILED); } datap += done; } /* Decrypt remainder, which is less than chunk size, in one go. */ kfpu_begin(); if (bleft >= GCM_AVX_MIN_DECRYPT_BYTES) { done = aesni_gcm_decrypt(datap, datap, bleft, (const void *)key, ctx->gcm_cb, ghash); if (done == 0) { clear_fpu_regs(); kfpu_end(); return (CRYPTO_FAILED); } datap += done; bleft -= done; } ASSERT(bleft < GCM_AVX_MIN_DECRYPT_BYTES); /* * Now less than GCM_AVX_MIN_DECRYPT_BYTES bytes remain, * decrypt them block by block. */ while (bleft > 0) { /* Incomplete last block. */ if (bleft < block_size) { uint8_t *lastb = (uint8_t *)ctx->gcm_remainder; bzero(lastb, block_size); bcopy(datap, lastb, bleft); /* The GCM processing. */ GHASH_AVX(ctx, lastb, block_size); aes_encrypt_intel(key->encr_ks.ks32, key->nr, cb, tmp); for (size_t i = 0; i < bleft; i++) { datap[i] = lastb[i] ^ ((uint8_t *)tmp)[i]; } break; } /* The GCM processing. */ GHASH_AVX(ctx, datap, block_size); aes_encrypt_intel(key->encr_ks.ks32, key->nr, cb, tmp); gcm_xor_avx((uint8_t *)tmp, datap); gcm_incr_counter_block(ctx); datap += block_size; bleft -= block_size; } if (rv != CRYPTO_SUCCESS) { clear_fpu_regs(); kfpu_end(); return (rv); } /* Decryption done, finish the tag. */ ctx->gcm_len_a_len_c[1] = htonll(CRYPTO_BYTES2BITS(pt_len)); GHASH_AVX(ctx, (uint8_t *)ctx->gcm_len_a_len_c, block_size); aes_encrypt_intel(key->encr_ks.ks32, key->nr, (uint32_t *)ctx->gcm_J0, (uint32_t *)ctx->gcm_J0); gcm_xor_avx((uint8_t *)ctx->gcm_J0, (uint8_t *)ghash); /* We are done with the FPU, restore its state. */ clear_fpu_regs(); kfpu_end(); /* Compare the input authentication tag with what we calculated. */ if (bcmp(&ctx->gcm_pt_buf[pt_len], ghash, ctx->gcm_tag_len)) { /* They don't match. */ return (CRYPTO_INVALID_MAC); } rv = crypto_put_output_data(ctx->gcm_pt_buf, out, pt_len); if (rv != CRYPTO_SUCCESS) { return (rv); } out->cd_offset += pt_len; gcm_clear_ctx(ctx); return (CRYPTO_SUCCESS); } /* * Initialize the GCM params H, Htabtle and the counter block. Save the * initial counter block. */ static int gcm_init_avx(gcm_ctx_t *ctx, unsigned char *iv, size_t iv_len, unsigned char *auth_data, size_t auth_data_len, size_t block_size) { uint8_t *cb = (uint8_t *)ctx->gcm_cb; uint64_t *H = ctx->gcm_H; const void *keysched = ((aes_key_t *)ctx->gcm_keysched)->encr_ks.ks32; int aes_rounds = ((aes_key_t *)ctx->gcm_keysched)->nr; uint8_t *datap = auth_data; size_t chunk_size = (size_t)GCM_CHUNK_SIZE_READ; size_t bleft; ASSERT(block_size == GCM_BLOCK_LEN); /* Init H (encrypt zero block) and create the initial counter block. */ bzero(ctx->gcm_ghash, sizeof (ctx->gcm_ghash)); bzero(H, sizeof (ctx->gcm_H)); kfpu_begin(); aes_encrypt_intel(keysched, aes_rounds, (const uint32_t *)H, (uint32_t *)H); gcm_init_htab_avx(ctx->gcm_Htable, H); if (iv_len == 12) { bcopy(iv, cb, 12); cb[12] = 0; cb[13] = 0; cb[14] = 0; cb[15] = 1; /* We need the ICB later. */ bcopy(cb, ctx->gcm_J0, sizeof (ctx->gcm_J0)); } else { /* * Most consumers use 12 byte IVs, so it's OK to use the * original routines for other IV sizes, just avoid nesting * kfpu_begin calls. */ clear_fpu_regs(); kfpu_end(); gcm_format_initial_blocks(iv, iv_len, ctx, block_size, aes_copy_block, aes_xor_block); kfpu_begin(); } /* Openssl post increments the counter, adjust for that. */ gcm_incr_counter_block(ctx); /* Ghash AAD in chunk_size blocks. */ for (bleft = auth_data_len; bleft >= chunk_size; bleft -= chunk_size) { GHASH_AVX(ctx, datap, chunk_size); datap += chunk_size; clear_fpu_regs(); kfpu_end(); kfpu_begin(); } /* Ghash the remainder and handle possible incomplete GCM block. */ if (bleft > 0) { size_t incomp = bleft % block_size; bleft -= incomp; if (bleft > 0) { GHASH_AVX(ctx, datap, bleft); datap += bleft; } if (incomp > 0) { /* Zero pad and hash incomplete last block. */ uint8_t *authp = (uint8_t *)ctx->gcm_tmp; bzero(authp, block_size); bcopy(datap, authp, incomp); GHASH_AVX(ctx, authp, block_size); } } clear_fpu_regs(); kfpu_end(); return (CRYPTO_SUCCESS); } #if defined(_KERNEL) static int icp_gcm_avx_set_chunk_size(const char *buf, zfs_kernel_param_t *kp) { unsigned long val; char val_rounded[16]; int error = 0; error = kstrtoul(buf, 0, &val); if (error) return (error); val = (val / GCM_AVX_MIN_DECRYPT_BYTES) * GCM_AVX_MIN_DECRYPT_BYTES; if (val < GCM_AVX_MIN_ENCRYPT_BYTES || val > GCM_AVX_MAX_CHUNK_SIZE) return (-EINVAL); snprintf(val_rounded, 16, "%u", (uint32_t)val); error = param_set_uint(val_rounded, kp); return (error); } module_param_call(icp_gcm_avx_chunk_size, icp_gcm_avx_set_chunk_size, param_get_uint, &gcm_avx_chunk_size, 0644); MODULE_PARM_DESC(icp_gcm_avx_chunk_size, "How many bytes to process while owning the FPU"); #endif /* defined(__KERNEL) */ #endif /* ifdef CAN_USE_GCM_ASM */ diff --git a/module/icp/asm-x86_64/aes/aes_aesni.S b/module/icp/asm-x86_64/aes/aes_aesni.S index 4a80c62097ae..1a8669ccd1d6 100644 --- a/module/icp/asm-x86_64/aes/aes_aesni.S +++ b/module/icp/asm-x86_64/aes/aes_aesni.S @@ -1,748 +1,748 @@ /* * ==================================================================== * Written by Intel Corporation for the OpenSSL project to add support * for Intel AES-NI instructions. Rights for redistribution and usage * in source and binary forms are granted according to the OpenSSL * license. * * Author: Huang Ying * Vinodh Gopal * Kahraman Akdemir * * Intel AES-NI is a new set of Single Instruction Multiple Data (SIMD) * instructions that are going to be introduced in the next generation * of Intel processor, as of 2009. These instructions enable fast and * secure data encryption and decryption, using the Advanced Encryption * Standard (AES), defined by FIPS Publication number 197. The * architecture introduces six instructions that offer full hardware * support for AES. Four of them support high performance data * encryption and decryption, and the other two instructions support * the AES key expansion procedure. * ==================================================================== */ /* * ==================================================================== * Copyright (c) 1998-2008 The OpenSSL Project. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in * the documentation and/or other materials provided with the * distribution. * * 3. All advertising materials mentioning features or use of this * software must display the following acknowledgment: * "This product includes software developed by the OpenSSL Project * for use in the OpenSSL Toolkit. (http://www.openssl.org/)" * * 4. The names "OpenSSL Toolkit" and "OpenSSL Project" must not be used to * endorse or promote products derived from this software without * prior written permission. For written permission, please contact * openssl-core@openssl.org. * * 5. Products derived from this software may not be called "OpenSSL" * nor may "OpenSSL" appear in their names without prior written * permission of the OpenSSL Project. * * 6. Redistributions of any form whatsoever must retain the following * acknowledgment: * "This product includes software developed by the OpenSSL Project * for use in the OpenSSL Toolkit (http://www.openssl.org/)" * * THIS SOFTWARE IS PROVIDED BY THE OpenSSL PROJECT ``AS IS'' AND ANY * EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE OpenSSL PROJECT OR * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED * OF THE POSSIBILITY OF SUCH DAMAGE. * ==================================================================== */ /* * ==================================================================== * OpenSolaris OS modifications * * This source originates as files aes-intel.S and eng_aesni_asm.pl, in * patches sent sent Dec. 9, 2008 and Dec. 24, 2008, respectively, by * Huang Ying of Intel to the openssl-dev mailing list under the subject * of "Add support to Intel AES-NI instruction set for x86_64 platform". * * This OpenSolaris version has these major changes from the original source: * * 1. Added OpenSolaris ENTRY_NP/SET_SIZE macros from * /usr/include/sys/asm_linkage.h, lint(1B) guards, and dummy C function * definitions for lint. * * 2. Formatted code, added comments, and added #includes and #defines. * * 3. If bit CR0.TS is set, clear and set the TS bit, after and before * calling kpreempt_disable() and kpreempt_enable(). * If the TS bit is not set, Save and restore %xmm registers at the beginning * and end of function calls (%xmm* registers are not saved and restored by * during kernel thread preemption). * * 4. Renamed functions, reordered parameters, and changed return value * to match OpenSolaris: * * OpenSSL interface: * int intel_AES_set_encrypt_key(const unsigned char *userKey, * const int bits, AES_KEY *key); * int intel_AES_set_decrypt_key(const unsigned char *userKey, * const int bits, AES_KEY *key); * Return values for above are non-zero on error, 0 on success. * * void intel_AES_encrypt(const unsigned char *in, unsigned char *out, * const AES_KEY *key); * void intel_AES_decrypt(const unsigned char *in, unsigned char *out, * const AES_KEY *key); * typedef struct aes_key_st { * unsigned int rd_key[4 *(AES_MAXNR + 1)]; * int rounds; * unsigned int pad[3]; * } AES_KEY; * Note: AES_LONG is undefined (that is, Intel uses 32-bit key schedules * (ks32) instead of 64-bit (ks64). * Number of rounds (aka round count) is at offset 240 of AES_KEY. * * OpenSolaris OS interface (#ifdefs removed for readability): * int rijndael_key_setup_dec_intel(uint32_t rk[], * const uint32_t cipherKey[], uint64_t keyBits); * int rijndael_key_setup_enc_intel(uint32_t rk[], * const uint32_t cipherKey[], uint64_t keyBits); * Return values for above are 0 on error, number of rounds on success. * * void aes_encrypt_intel(const aes_ks_t *ks, int Nr, * const uint32_t pt[4], uint32_t ct[4]); * void aes_decrypt_intel(const aes_ks_t *ks, int Nr, * const uint32_t pt[4], uint32_t ct[4]); * typedef union {uint64_t ks64[(MAX_AES_NR + 1) * 4]; * uint32_t ks32[(MAX_AES_NR + 1) * 4]; } aes_ks_t; * * typedef union { * uint32_t ks32[((MAX_AES_NR) + 1) * (MAX_AES_NB)]; * } aes_ks_t; * typedef struct aes_key { * aes_ks_t encr_ks, decr_ks; * long double align128; * int flags, nr, type; * } aes_key_t; * * Note: ks is the AES key schedule, Nr is number of rounds, pt is plain text, * ct is crypto text, and MAX_AES_NR is 14. * For the x86 64-bit architecture, OpenSolaris OS uses ks32 instead of ks64. * * Note2: aes_ks_t must be aligned on a 0 mod 128 byte boundary. * * ==================================================================== */ #if defined(lint) || defined(__lint) #include -/* ARGSUSED */ void aes_encrypt_intel(const uint32_t rk[], int Nr, const uint32_t pt[4], uint32_t ct[4]) { + (void) rk, (void) Nr, (void) pt, (void) ct; } -/* ARGSUSED */ void aes_decrypt_intel(const uint32_t rk[], int Nr, const uint32_t ct[4], uint32_t pt[4]) { + (void) rk, (void) Nr, (void) ct, (void) pt; } -/* ARGSUSED */ int rijndael_key_setup_enc_intel(uint32_t rk[], const uint32_t cipherKey[], uint64_t keyBits) { + (void) rk, (void) cipherKey, (void) keyBits; return (0); } -/* ARGSUSED */ int rijndael_key_setup_dec_intel(uint32_t rk[], const uint32_t cipherKey[], uint64_t keyBits) { + (void) rk, (void) cipherKey, (void) keyBits; return (0); } #elif defined(HAVE_AES) /* guard by instruction set */ #define _ASM #include /* * _key_expansion_128(), * _key_expansion_192a(), _key_expansion_192b(), * _key_expansion_256a(), _key_expansion_256b() * * Helper functions called by rijndael_key_setup_inc_intel(). * Also used indirectly by rijndael_key_setup_dec_intel(). * * Input: * %xmm0 User-provided cipher key * %xmm1 Round constant * Output: * (%rcx) AES key */ ENTRY_NP2(_key_expansion_128, _key_expansion_256a) _key_expansion_128_local: _key_expansion_256a_local: pshufd $0b11111111, %xmm1, %xmm1 shufps $0b00010000, %xmm0, %xmm4 pxor %xmm4, %xmm0 shufps $0b10001100, %xmm0, %xmm4 pxor %xmm4, %xmm0 pxor %xmm1, %xmm0 movups %xmm0, (%rcx) add $0x10, %rcx ret nop SET_SIZE(_key_expansion_128) SET_SIZE(_key_expansion_256a) ENTRY_NP(_key_expansion_192a) _key_expansion_192a_local: pshufd $0b01010101, %xmm1, %xmm1 shufps $0b00010000, %xmm0, %xmm4 pxor %xmm4, %xmm0 shufps $0b10001100, %xmm0, %xmm4 pxor %xmm4, %xmm0 pxor %xmm1, %xmm0 movups %xmm2, %xmm5 movups %xmm2, %xmm6 pslldq $4, %xmm5 pshufd $0b11111111, %xmm0, %xmm3 pxor %xmm3, %xmm2 pxor %xmm5, %xmm2 movups %xmm0, %xmm1 shufps $0b01000100, %xmm0, %xmm6 movups %xmm6, (%rcx) shufps $0b01001110, %xmm2, %xmm1 movups %xmm1, 0x10(%rcx) add $0x20, %rcx ret SET_SIZE(_key_expansion_192a) ENTRY_NP(_key_expansion_192b) _key_expansion_192b_local: pshufd $0b01010101, %xmm1, %xmm1 shufps $0b00010000, %xmm0, %xmm4 pxor %xmm4, %xmm0 shufps $0b10001100, %xmm0, %xmm4 pxor %xmm4, %xmm0 pxor %xmm1, %xmm0 movups %xmm2, %xmm5 pslldq $4, %xmm5 pshufd $0b11111111, %xmm0, %xmm3 pxor %xmm3, %xmm2 pxor %xmm5, %xmm2 movups %xmm0, (%rcx) add $0x10, %rcx ret SET_SIZE(_key_expansion_192b) ENTRY_NP(_key_expansion_256b) _key_expansion_256b_local: pshufd $0b10101010, %xmm1, %xmm1 shufps $0b00010000, %xmm2, %xmm4 pxor %xmm4, %xmm2 shufps $0b10001100, %xmm2, %xmm4 pxor %xmm4, %xmm2 pxor %xmm1, %xmm2 movups %xmm2, (%rcx) add $0x10, %rcx ret SET_SIZE(_key_expansion_256b) /* * rijndael_key_setup_enc_intel() * Expand the cipher key into the encryption key schedule. * * For kernel code, caller is responsible for ensuring kpreempt_disable() * has been called. This is because %xmm registers are not saved/restored. * Clear and set the CR0.TS bit on entry and exit, respectively, if TS is set * on entry. Otherwise, if TS is not set, save and restore %xmm registers * on the stack. * * OpenSolaris interface: * int rijndael_key_setup_enc_intel(uint32_t rk[], const uint32_t cipherKey[], * uint64_t keyBits); * Return value is 0 on error, number of rounds on success. * * Original Intel OpenSSL interface: * int intel_AES_set_encrypt_key(const unsigned char *userKey, * const int bits, AES_KEY *key); * Return value is non-zero on error, 0 on success. */ #ifdef OPENSSL_INTERFACE #define rijndael_key_setup_enc_intel intel_AES_set_encrypt_key #define rijndael_key_setup_dec_intel intel_AES_set_decrypt_key #define USERCIPHERKEY rdi /* P1, 64 bits */ #define KEYSIZE32 esi /* P2, 32 bits */ #define KEYSIZE64 rsi /* P2, 64 bits */ #define AESKEY rdx /* P3, 64 bits */ #else /* OpenSolaris Interface */ #define AESKEY rdi /* P1, 64 bits */ #define USERCIPHERKEY rsi /* P2, 64 bits */ #define KEYSIZE32 edx /* P3, 32 bits */ #define KEYSIZE64 rdx /* P3, 64 bits */ #endif /* OPENSSL_INTERFACE */ #define ROUNDS32 KEYSIZE32 /* temp */ #define ROUNDS64 KEYSIZE64 /* temp */ #define ENDAESKEY USERCIPHERKEY /* temp */ ENTRY_NP(rijndael_key_setup_enc_intel) rijndael_key_setup_enc_intel_local: FRAME_BEGIN // NULL pointer sanity check test %USERCIPHERKEY, %USERCIPHERKEY jz .Lenc_key_invalid_param test %AESKEY, %AESKEY jz .Lenc_key_invalid_param movups (%USERCIPHERKEY), %xmm0 // user key (first 16 bytes) movups %xmm0, (%AESKEY) lea 0x10(%AESKEY), %rcx // key addr pxor %xmm4, %xmm4 // xmm4 is assumed 0 in _key_expansion_x cmp $256, %KEYSIZE32 jnz .Lenc_key192 // AES 256: 14 rounds in encryption key schedule #ifdef OPENSSL_INTERFACE mov $14, %ROUNDS32 movl %ROUNDS32, 240(%AESKEY) // key.rounds = 14 #endif /* OPENSSL_INTERFACE */ movups 0x10(%USERCIPHERKEY), %xmm2 // other user key (2nd 16 bytes) movups %xmm2, (%rcx) add $0x10, %rcx aeskeygenassist $0x1, %xmm2, %xmm1 // expand the key call _key_expansion_256a_local aeskeygenassist $0x1, %xmm0, %xmm1 call _key_expansion_256b_local aeskeygenassist $0x2, %xmm2, %xmm1 // expand the key call _key_expansion_256a_local aeskeygenassist $0x2, %xmm0, %xmm1 call _key_expansion_256b_local aeskeygenassist $0x4, %xmm2, %xmm1 // expand the key call _key_expansion_256a_local aeskeygenassist $0x4, %xmm0, %xmm1 call _key_expansion_256b_local aeskeygenassist $0x8, %xmm2, %xmm1 // expand the key call _key_expansion_256a_local aeskeygenassist $0x8, %xmm0, %xmm1 call _key_expansion_256b_local aeskeygenassist $0x10, %xmm2, %xmm1 // expand the key call _key_expansion_256a_local aeskeygenassist $0x10, %xmm0, %xmm1 call _key_expansion_256b_local aeskeygenassist $0x20, %xmm2, %xmm1 // expand the key call _key_expansion_256a_local aeskeygenassist $0x20, %xmm0, %xmm1 call _key_expansion_256b_local aeskeygenassist $0x40, %xmm2, %xmm1 // expand the key call _key_expansion_256a_local #ifdef OPENSSL_INTERFACE xor %rax, %rax // return 0 (OK) #else /* Open Solaris Interface */ mov $14, %rax // return # rounds = 14 #endif FRAME_END ret .align 4 .Lenc_key192: cmp $192, %KEYSIZE32 jnz .Lenc_key128 // AES 192: 12 rounds in encryption key schedule #ifdef OPENSSL_INTERFACE mov $12, %ROUNDS32 movl %ROUNDS32, 240(%AESKEY) // key.rounds = 12 #endif /* OPENSSL_INTERFACE */ movq 0x10(%USERCIPHERKEY), %xmm2 // other user key aeskeygenassist $0x1, %xmm2, %xmm1 // expand the key call _key_expansion_192a_local aeskeygenassist $0x2, %xmm2, %xmm1 // expand the key call _key_expansion_192b_local aeskeygenassist $0x4, %xmm2, %xmm1 // expand the key call _key_expansion_192a_local aeskeygenassist $0x8, %xmm2, %xmm1 // expand the key call _key_expansion_192b_local aeskeygenassist $0x10, %xmm2, %xmm1 // expand the key call _key_expansion_192a_local aeskeygenassist $0x20, %xmm2, %xmm1 // expand the key call _key_expansion_192b_local aeskeygenassist $0x40, %xmm2, %xmm1 // expand the key call _key_expansion_192a_local aeskeygenassist $0x80, %xmm2, %xmm1 // expand the key call _key_expansion_192b_local #ifdef OPENSSL_INTERFACE xor %rax, %rax // return 0 (OK) #else /* OpenSolaris Interface */ mov $12, %rax // return # rounds = 12 #endif FRAME_END ret .align 4 .Lenc_key128: cmp $128, %KEYSIZE32 jnz .Lenc_key_invalid_key_bits // AES 128: 10 rounds in encryption key schedule #ifdef OPENSSL_INTERFACE mov $10, %ROUNDS32 movl %ROUNDS32, 240(%AESKEY) // key.rounds = 10 #endif /* OPENSSL_INTERFACE */ aeskeygenassist $0x1, %xmm0, %xmm1 // expand the key call _key_expansion_128_local aeskeygenassist $0x2, %xmm0, %xmm1 // expand the key call _key_expansion_128_local aeskeygenassist $0x4, %xmm0, %xmm1 // expand the key call _key_expansion_128_local aeskeygenassist $0x8, %xmm0, %xmm1 // expand the key call _key_expansion_128_local aeskeygenassist $0x10, %xmm0, %xmm1 // expand the key call _key_expansion_128_local aeskeygenassist $0x20, %xmm0, %xmm1 // expand the key call _key_expansion_128_local aeskeygenassist $0x40, %xmm0, %xmm1 // expand the key call _key_expansion_128_local aeskeygenassist $0x80, %xmm0, %xmm1 // expand the key call _key_expansion_128_local aeskeygenassist $0x1b, %xmm0, %xmm1 // expand the key call _key_expansion_128_local aeskeygenassist $0x36, %xmm0, %xmm1 // expand the key call _key_expansion_128_local #ifdef OPENSSL_INTERFACE xor %rax, %rax // return 0 (OK) #else /* OpenSolaris Interface */ mov $10, %rax // return # rounds = 10 #endif FRAME_END ret .Lenc_key_invalid_param: #ifdef OPENSSL_INTERFACE mov $-1, %rax // user key or AES key pointer is NULL FRAME_END ret #else /* FALLTHROUGH */ #endif /* OPENSSL_INTERFACE */ .Lenc_key_invalid_key_bits: #ifdef OPENSSL_INTERFACE mov $-2, %rax // keysize is invalid #else /* Open Solaris Interface */ xor %rax, %rax // a key pointer is NULL or invalid keysize #endif /* OPENSSL_INTERFACE */ FRAME_END ret SET_SIZE(rijndael_key_setup_enc_intel) /* * rijndael_key_setup_dec_intel() * Expand the cipher key into the decryption key schedule. * * For kernel code, caller is responsible for ensuring kpreempt_disable() * has been called. This is because %xmm registers are not saved/restored. * Clear and set the CR0.TS bit on entry and exit, respectively, if TS is set * on entry. Otherwise, if TS is not set, save and restore %xmm registers * on the stack. * * OpenSolaris interface: * int rijndael_key_setup_dec_intel(uint32_t rk[], const uint32_t cipherKey[], * uint64_t keyBits); * Return value is 0 on error, number of rounds on success. * P1->P2, P2->P3, P3->P1 * * Original Intel OpenSSL interface: * int intel_AES_set_decrypt_key(const unsigned char *userKey, * const int bits, AES_KEY *key); * Return value is non-zero on error, 0 on success. */ ENTRY_NP(rijndael_key_setup_dec_intel) FRAME_BEGIN // Generate round keys used for encryption call rijndael_key_setup_enc_intel_local test %rax, %rax #ifdef OPENSSL_INTERFACE jnz .Ldec_key_exit // Failed if returned non-0 #else /* OpenSolaris Interface */ jz .Ldec_key_exit // Failed if returned 0 #endif /* OPENSSL_INTERFACE */ /* * Convert round keys used for encryption * to a form usable for decryption */ #ifndef OPENSSL_INTERFACE /* OpenSolaris Interface */ mov %rax, %ROUNDS64 // set # rounds (10, 12, or 14) // (already set for OpenSSL) #endif lea 0x10(%AESKEY), %rcx // key addr shl $4, %ROUNDS32 add %AESKEY, %ROUNDS64 mov %ROUNDS64, %ENDAESKEY .align 4 .Ldec_key_reorder_loop: movups (%AESKEY), %xmm0 movups (%ROUNDS64), %xmm1 movups %xmm0, (%ROUNDS64) movups %xmm1, (%AESKEY) lea 0x10(%AESKEY), %AESKEY lea -0x10(%ROUNDS64), %ROUNDS64 cmp %AESKEY, %ROUNDS64 ja .Ldec_key_reorder_loop .align 4 .Ldec_key_inv_loop: movups (%rcx), %xmm0 // Convert an encryption round key to a form usable for decryption // with the "AES Inverse Mix Columns" instruction aesimc %xmm0, %xmm1 movups %xmm1, (%rcx) lea 0x10(%rcx), %rcx cmp %ENDAESKEY, %rcx jnz .Ldec_key_inv_loop .Ldec_key_exit: // OpenSolaris: rax = # rounds (10, 12, or 14) or 0 for error // OpenSSL: rax = 0 for OK, or non-zero for error FRAME_END ret SET_SIZE(rijndael_key_setup_dec_intel) /* * aes_encrypt_intel() * Encrypt a single block (in and out can overlap). * * For kernel code, caller is responsible for ensuring kpreempt_disable() * has been called. This is because %xmm registers are not saved/restored. * Clear and set the CR0.TS bit on entry and exit, respectively, if TS is set * on entry. Otherwise, if TS is not set, save and restore %xmm registers * on the stack. * * Temporary register usage: * %xmm0 State * %xmm1 Key * * Original OpenSolaris Interface: * void aes_encrypt_intel(const aes_ks_t *ks, int Nr, * const uint32_t pt[4], uint32_t ct[4]) * * Original Intel OpenSSL Interface: * void intel_AES_encrypt(const unsigned char *in, unsigned char *out, * const AES_KEY *key) */ #ifdef OPENSSL_INTERFACE #define aes_encrypt_intel intel_AES_encrypt #define aes_decrypt_intel intel_AES_decrypt #define INP rdi /* P1, 64 bits */ #define OUTP rsi /* P2, 64 bits */ #define KEYP rdx /* P3, 64 bits */ /* No NROUNDS parameter--offset 240 from KEYP saved in %ecx: */ #define NROUNDS32 ecx /* temporary, 32 bits */ #define NROUNDS cl /* temporary, 8 bits */ #else /* OpenSolaris Interface */ #define KEYP rdi /* P1, 64 bits */ #define NROUNDS esi /* P2, 32 bits */ #define INP rdx /* P3, 64 bits */ #define OUTP rcx /* P4, 64 bits */ #endif /* OPENSSL_INTERFACE */ #define STATE xmm0 /* temporary, 128 bits */ #define KEY xmm1 /* temporary, 128 bits */ ENTRY_NP(aes_encrypt_intel) movups (%INP), %STATE // input movups (%KEYP), %KEY // key #ifdef OPENSSL_INTERFACE mov 240(%KEYP), %NROUNDS32 // round count #else /* OpenSolaris Interface */ /* Round count is already present as P2 in %rsi/%esi */ #endif /* OPENSSL_INTERFACE */ pxor %KEY, %STATE // round 0 lea 0x30(%KEYP), %KEYP cmp $12, %NROUNDS jb .Lenc128 lea 0x20(%KEYP), %KEYP je .Lenc192 // AES 256 lea 0x20(%KEYP), %KEYP movups -0x60(%KEYP), %KEY aesenc %KEY, %STATE movups -0x50(%KEYP), %KEY aesenc %KEY, %STATE .align 4 .Lenc192: // AES 192 and 256 movups -0x40(%KEYP), %KEY aesenc %KEY, %STATE movups -0x30(%KEYP), %KEY aesenc %KEY, %STATE .align 4 .Lenc128: // AES 128, 192, and 256 movups -0x20(%KEYP), %KEY aesenc %KEY, %STATE movups -0x10(%KEYP), %KEY aesenc %KEY, %STATE movups (%KEYP), %KEY aesenc %KEY, %STATE movups 0x10(%KEYP), %KEY aesenc %KEY, %STATE movups 0x20(%KEYP), %KEY aesenc %KEY, %STATE movups 0x30(%KEYP), %KEY aesenc %KEY, %STATE movups 0x40(%KEYP), %KEY aesenc %KEY, %STATE movups 0x50(%KEYP), %KEY aesenc %KEY, %STATE movups 0x60(%KEYP), %KEY aesenc %KEY, %STATE movups 0x70(%KEYP), %KEY aesenclast %KEY, %STATE // last round movups %STATE, (%OUTP) // output ret SET_SIZE(aes_encrypt_intel) /* * aes_decrypt_intel() * Decrypt a single block (in and out can overlap). * * For kernel code, caller is responsible for ensuring kpreempt_disable() * has been called. This is because %xmm registers are not saved/restored. * Clear and set the CR0.TS bit on entry and exit, respectively, if TS is set * on entry. Otherwise, if TS is not set, save and restore %xmm registers * on the stack. * * Temporary register usage: * %xmm0 State * %xmm1 Key * * Original OpenSolaris Interface: * void aes_decrypt_intel(const aes_ks_t *ks, int Nr, * const uint32_t pt[4], uint32_t ct[4])/ * * Original Intel OpenSSL Interface: * void intel_AES_decrypt(const unsigned char *in, unsigned char *out, * const AES_KEY *key); */ ENTRY_NP(aes_decrypt_intel) movups (%INP), %STATE // input movups (%KEYP), %KEY // key #ifdef OPENSSL_INTERFACE mov 240(%KEYP), %NROUNDS32 // round count #else /* OpenSolaris Interface */ /* Round count is already present as P2 in %rsi/%esi */ #endif /* OPENSSL_INTERFACE */ pxor %KEY, %STATE // round 0 lea 0x30(%KEYP), %KEYP cmp $12, %NROUNDS jb .Ldec128 lea 0x20(%KEYP), %KEYP je .Ldec192 // AES 256 lea 0x20(%KEYP), %KEYP movups -0x60(%KEYP), %KEY aesdec %KEY, %STATE movups -0x50(%KEYP), %KEY aesdec %KEY, %STATE .align 4 .Ldec192: // AES 192 and 256 movups -0x40(%KEYP), %KEY aesdec %KEY, %STATE movups -0x30(%KEYP), %KEY aesdec %KEY, %STATE .align 4 .Ldec128: // AES 128, 192, and 256 movups -0x20(%KEYP), %KEY aesdec %KEY, %STATE movups -0x10(%KEYP), %KEY aesdec %KEY, %STATE movups (%KEYP), %KEY aesdec %KEY, %STATE movups 0x10(%KEYP), %KEY aesdec %KEY, %STATE movups 0x20(%KEYP), %KEY aesdec %KEY, %STATE movups 0x30(%KEYP), %KEY aesdec %KEY, %STATE movups 0x40(%KEYP), %KEY aesdec %KEY, %STATE movups 0x50(%KEYP), %KEY aesdec %KEY, %STATE movups 0x60(%KEYP), %KEY aesdec %KEY, %STATE movups 0x70(%KEYP), %KEY aesdeclast %KEY, %STATE // last round movups %STATE, (%OUTP) // output ret SET_SIZE(aes_decrypt_intel) #endif /* lint || __lint */ #ifdef __ELF__ .section .note.GNU-stack,"",%progbits #endif diff --git a/module/icp/asm-x86_64/aes/aes_amd64.S b/module/icp/asm-x86_64/aes/aes_amd64.S index 9db3a3179230..272720e517e3 100644 --- a/module/icp/asm-x86_64/aes/aes_amd64.S +++ b/module/icp/asm-x86_64/aes/aes_amd64.S @@ -1,906 +1,906 @@ /* * --------------------------------------------------------------------------- * Copyright (c) 1998-2007, Brian Gladman, Worcester, UK. All rights reserved. * * LICENSE TERMS * * The free distribution and use of this software is allowed (with or without * changes) provided that: * * 1. source code distributions include the above copyright notice, this * list of conditions and the following disclaimer; * * 2. binary distributions include the above copyright notice, this list * of conditions and the following disclaimer in their documentation; * * 3. the name of the copyright holder is not used to endorse products * built using this software without specific written permission. * * DISCLAIMER * * This software is provided 'as is' with no explicit or implied warranties * in respect of its properties, including, but not limited to, correctness * and/or fitness for purpose. * --------------------------------------------------------------------------- * Issue 20/12/2007 * * I am grateful to Dag Arne Osvik for many discussions of the techniques that * can be used to optimise AES assembler code on AMD64/EM64T architectures. * Some of the techniques used in this implementation are the result of * suggestions made by him for which I am most grateful. * * An AES implementation for AMD64 processors using the YASM assembler. This * implementation provides only encryption, decryption and hence requires key * scheduling support in C. It uses 8k bytes of tables but its encryption and * decryption performance is very close to that obtained using large tables. * It can use either MS Windows or Gnu/Linux/OpenSolaris OS calling conventions, * which are as follows: * ms windows gnu/linux/opensolaris os * * in_blk rcx rdi * out_blk rdx rsi * context (cx) r8 rdx * * preserved rsi - + rbx, rbp, rsp, r12, r13, r14 & r15 * registers rdi - on both * * destroyed - rsi + rax, rcx, rdx, r8, r9, r10 & r11 * registers - rdi on both * * The convention used here is that for gnu/linux/opensolaris os. * * This code provides the standard AES block size (128 bits, 16 bytes) and the * three standard AES key sizes (128, 192 and 256 bits). It has the same call * interface as my C implementation. It uses the Microsoft C AMD64 calling * conventions in which the three parameters are placed in rcx, rdx and r8 * respectively. The rbx, rsi, rdi, rbp and r12..r15 registers are preserved. * * OpenSolaris Note: * Modified to use GNU/Linux/Solaris calling conventions. * That is parameters are placed in rdi, rsi, rdx, and rcx, respectively. * * AES_RETURN aes_encrypt(const unsigned char in_blk[], * unsigned char out_blk[], const aes_encrypt_ctx cx[1])/ * * AES_RETURN aes_decrypt(const unsigned char in_blk[], * unsigned char out_blk[], const aes_decrypt_ctx cx[1])/ * * AES_RETURN aes_encrypt_key(const unsigned char key[], * const aes_encrypt_ctx cx[1])/ * * AES_RETURN aes_decrypt_key(const unsigned char key[], * const aes_decrypt_ctx cx[1])/ * * AES_RETURN aes_encrypt_key(const unsigned char key[], * unsigned int len, const aes_decrypt_ctx cx[1])/ * * AES_RETURN aes_decrypt_key(const unsigned char key[], * unsigned int len, const aes_decrypt_ctx cx[1])/ * * where is 128, 102 or 256. In the last two calls the length can be in * either bits or bytes. * * Comment in/out the following lines to obtain the desired subroutines. These * selections MUST match those in the C header file aesopt.h */ #define AES_REV_DKS /* define if key decryption schedule is reversed */ #define LAST_ROUND_TABLES /* define for the faster version using extra tables */ /* * The encryption key schedule has the following in memory layout where N is the * number of rounds (10, 12 or 14): * * lo: | input key (round 0) | / each round is four 32-bit words * | encryption round 1 | * | encryption round 2 | * .... * | encryption round N-1 | * hi: | encryption round N | * * The decryption key schedule is normally set up so that it has the same * layout as above by actually reversing the order of the encryption key * schedule in memory (this happens when AES_REV_DKS is set): * * lo: | decryption round 0 | = | encryption round N | * | decryption round 1 | = INV_MIX_COL[ | encryption round N-1 | ] * | decryption round 2 | = INV_MIX_COL[ | encryption round N-2 | ] * .... .... * | decryption round N-1 | = INV_MIX_COL[ | encryption round 1 | ] * hi: | decryption round N | = | input key (round 0) | * * with rounds except the first and last modified using inv_mix_column() * But if AES_REV_DKS is NOT set the order of keys is left as it is for * encryption so that it has to be accessed in reverse when used for * decryption (although the inverse mix column modifications are done) * * lo: | decryption round 0 | = | input key (round 0) | * | decryption round 1 | = INV_MIX_COL[ | encryption round 1 | ] * | decryption round 2 | = INV_MIX_COL[ | encryption round 2 | ] * .... .... * | decryption round N-1 | = INV_MIX_COL[ | encryption round N-1 | ] * hi: | decryption round N | = | encryption round N | * * This layout is faster when the assembler key scheduling provided here * is used. * * End of user defines */ /* * --------------------------------------------------------------------------- * OpenSolaris OS modifications * * This source originates from Brian Gladman file aes_amd64.asm * in http://fp.gladman.plus.com/AES/aes-src-04-03-08.zip * with these changes: * * 1. Removed MS Windows-specific code within DLL_EXPORT, _SEH_, and * !__GNUC__ ifdefs. Also removed ENCRYPTION, DECRYPTION, * AES_128, AES_192, AES_256, AES_VAR ifdefs. * * 2. Translate yasm/nasm %define and .macro definitions to cpp(1) #define * * 3. Translate yasm/nasm %ifdef/%ifndef to cpp(1) #ifdef * * 4. Translate Intel/yasm/nasm syntax to ATT/OpenSolaris as(1) syntax * (operands reversed, literals prefixed with "$", registers prefixed with "%", * and "[register+offset]", addressing changed to "offset(register)", * parenthesis in constant expressions "()" changed to square brackets "[]", * "." removed from local (numeric) labels, and other changes. * Examples: * Intel/yasm/nasm Syntax ATT/OpenSolaris Syntax * mov rax,(4*20h) mov $[4*0x20],%rax * mov rax,[ebx+20h] mov 0x20(%ebx),%rax * lea rax,[ebx+ecx] lea (%ebx,%ecx),%rax * sub rax,[ebx+ecx*4-20h] sub -0x20(%ebx,%ecx,4),%rax * * 5. Added OpenSolaris ENTRY_NP/SET_SIZE macros from * /usr/include/sys/asm_linkage.h, lint(1B) guards, and dummy C function * definitions for lint. * * 6. Renamed functions and reordered parameters to match OpenSolaris: * Original Gladman interface: * int aes_encrypt(const unsigned char *in, * unsigned char *out, const aes_encrypt_ctx cx[1])/ * int aes_decrypt(const unsigned char *in, * unsigned char *out, const aes_encrypt_ctx cx[1])/ * Note: aes_encrypt_ctx contains ks, a 60 element array of uint32_t, * and a union type, inf., containing inf.l, a uint32_t and * inf.b, a 4-element array of uint32_t. Only b[0] in the array (aka "l") is * used and contains the key schedule length * 16 where key schedule length is * 10, 12, or 14 bytes. * * OpenSolaris OS interface: * void aes_encrypt_amd64(const aes_ks_t *ks, int Nr, * const uint32_t pt[4], uint32_t ct[4])/ * void aes_decrypt_amd64(const aes_ks_t *ks, int Nr, * const uint32_t pt[4], uint32_t ct[4])/ * typedef union {uint64_t ks64[(MAX_AES_NR + 1) * 4]/ * uint32_t ks32[(MAX_AES_NR + 1) * 4]/ } aes_ks_t/ * Note: ks is the AES key schedule, Nr is number of rounds, pt is plain text, * ct is crypto text, and MAX_AES_NR is 14. * For the x86 64-bit architecture, OpenSolaris OS uses ks32 instead of ks64. */ #if defined(lint) || defined(__lint) #include -/* ARGSUSED */ void aes_encrypt_amd64(const uint32_t rk[], int Nr, const uint32_t pt[4], uint32_t ct[4]) { + (void) rk, (void) Nr, (void) pt, (void) ct; } -/* ARGSUSED */ void aes_decrypt_amd64(const uint32_t rk[], int Nr, const uint32_t ct[4], uint32_t pt[4]) { + (void) rk, (void) Nr, (void) pt, (void) ct; } #else #define _ASM #include #define KS_LENGTH 60 #define raxd eax #define rdxd edx #define rcxd ecx #define rbxd ebx #define rsid esi #define rdid edi #define raxb al #define rdxb dl #define rcxb cl #define rbxb bl #define rsib sil #define rdib dil // finite field multiplies by {02}, {04} and {08} #define f2(x) [[x<<1]^[[[x>>7]&1]*0x11b]] #define f4(x) [[x<<2]^[[[x>>6]&1]*0x11b]^[[[x>>6]&2]*0x11b]] #define f8(x) [[x<<3]^[[[x>>5]&1]*0x11b]^[[[x>>5]&2]*0x11b]^[[[x>>5]&4]*0x11b]] // finite field multiplies required in table generation #define f3(x) [[f2(x)] ^ [x]] #define f9(x) [[f8(x)] ^ [x]] #define fb(x) [[f8(x)] ^ [f2(x)] ^ [x]] #define fd(x) [[f8(x)] ^ [f4(x)] ^ [x]] #define fe(x) [[f8(x)] ^ [f4(x)] ^ [f2(x)]] // macros for expanding S-box data #define u8(x) [f2(x)], [x], [x], [f3(x)], [f2(x)], [x], [x], [f3(x)] #define v8(x) [fe(x)], [f9(x)], [fd(x)], [fb(x)], [fe(x)], [f9(x)], [fd(x)], [x] #define w8(x) [x], 0, 0, 0, [x], 0, 0, 0 #define enc_vals(x) \ .byte x(0x63),x(0x7c),x(0x77),x(0x7b),x(0xf2),x(0x6b),x(0x6f),x(0xc5); \ .byte x(0x30),x(0x01),x(0x67),x(0x2b),x(0xfe),x(0xd7),x(0xab),x(0x76); \ .byte x(0xca),x(0x82),x(0xc9),x(0x7d),x(0xfa),x(0x59),x(0x47),x(0xf0); \ .byte x(0xad),x(0xd4),x(0xa2),x(0xaf),x(0x9c),x(0xa4),x(0x72),x(0xc0); \ .byte x(0xb7),x(0xfd),x(0x93),x(0x26),x(0x36),x(0x3f),x(0xf7),x(0xcc); \ .byte x(0x34),x(0xa5),x(0xe5),x(0xf1),x(0x71),x(0xd8),x(0x31),x(0x15); \ .byte x(0x04),x(0xc7),x(0x23),x(0xc3),x(0x18),x(0x96),x(0x05),x(0x9a); \ .byte x(0x07),x(0x12),x(0x80),x(0xe2),x(0xeb),x(0x27),x(0xb2),x(0x75); \ .byte x(0x09),x(0x83),x(0x2c),x(0x1a),x(0x1b),x(0x6e),x(0x5a),x(0xa0); \ .byte x(0x52),x(0x3b),x(0xd6),x(0xb3),x(0x29),x(0xe3),x(0x2f),x(0x84); \ .byte x(0x53),x(0xd1),x(0x00),x(0xed),x(0x20),x(0xfc),x(0xb1),x(0x5b); \ .byte x(0x6a),x(0xcb),x(0xbe),x(0x39),x(0x4a),x(0x4c),x(0x58),x(0xcf); \ .byte x(0xd0),x(0xef),x(0xaa),x(0xfb),x(0x43),x(0x4d),x(0x33),x(0x85); \ .byte x(0x45),x(0xf9),x(0x02),x(0x7f),x(0x50),x(0x3c),x(0x9f),x(0xa8); \ .byte x(0x51),x(0xa3),x(0x40),x(0x8f),x(0x92),x(0x9d),x(0x38),x(0xf5); \ .byte x(0xbc),x(0xb6),x(0xda),x(0x21),x(0x10),x(0xff),x(0xf3),x(0xd2); \ .byte x(0xcd),x(0x0c),x(0x13),x(0xec),x(0x5f),x(0x97),x(0x44),x(0x17); \ .byte x(0xc4),x(0xa7),x(0x7e),x(0x3d),x(0x64),x(0x5d),x(0x19),x(0x73); \ .byte x(0x60),x(0x81),x(0x4f),x(0xdc),x(0x22),x(0x2a),x(0x90),x(0x88); \ .byte x(0x46),x(0xee),x(0xb8),x(0x14),x(0xde),x(0x5e),x(0x0b),x(0xdb); \ .byte x(0xe0),x(0x32),x(0x3a),x(0x0a),x(0x49),x(0x06),x(0x24),x(0x5c); \ .byte x(0xc2),x(0xd3),x(0xac),x(0x62),x(0x91),x(0x95),x(0xe4),x(0x79); \ .byte x(0xe7),x(0xc8),x(0x37),x(0x6d),x(0x8d),x(0xd5),x(0x4e),x(0xa9); \ .byte x(0x6c),x(0x56),x(0xf4),x(0xea),x(0x65),x(0x7a),x(0xae),x(0x08); \ .byte x(0xba),x(0x78),x(0x25),x(0x2e),x(0x1c),x(0xa6),x(0xb4),x(0xc6); \ .byte x(0xe8),x(0xdd),x(0x74),x(0x1f),x(0x4b),x(0xbd),x(0x8b),x(0x8a); \ .byte x(0x70),x(0x3e),x(0xb5),x(0x66),x(0x48),x(0x03),x(0xf6),x(0x0e); \ .byte x(0x61),x(0x35),x(0x57),x(0xb9),x(0x86),x(0xc1),x(0x1d),x(0x9e); \ .byte x(0xe1),x(0xf8),x(0x98),x(0x11),x(0x69),x(0xd9),x(0x8e),x(0x94); \ .byte x(0x9b),x(0x1e),x(0x87),x(0xe9),x(0xce),x(0x55),x(0x28),x(0xdf); \ .byte x(0x8c),x(0xa1),x(0x89),x(0x0d),x(0xbf),x(0xe6),x(0x42),x(0x68); \ .byte x(0x41),x(0x99),x(0x2d),x(0x0f),x(0xb0),x(0x54),x(0xbb),x(0x16) #define dec_vals(x) \ .byte x(0x52),x(0x09),x(0x6a),x(0xd5),x(0x30),x(0x36),x(0xa5),x(0x38); \ .byte x(0xbf),x(0x40),x(0xa3),x(0x9e),x(0x81),x(0xf3),x(0xd7),x(0xfb); \ .byte x(0x7c),x(0xe3),x(0x39),x(0x82),x(0x9b),x(0x2f),x(0xff),x(0x87); \ .byte x(0x34),x(0x8e),x(0x43),x(0x44),x(0xc4),x(0xde),x(0xe9),x(0xcb); \ .byte x(0x54),x(0x7b),x(0x94),x(0x32),x(0xa6),x(0xc2),x(0x23),x(0x3d); \ .byte x(0xee),x(0x4c),x(0x95),x(0x0b),x(0x42),x(0xfa),x(0xc3),x(0x4e); \ .byte x(0x08),x(0x2e),x(0xa1),x(0x66),x(0x28),x(0xd9),x(0x24),x(0xb2); \ .byte x(0x76),x(0x5b),x(0xa2),x(0x49),x(0x6d),x(0x8b),x(0xd1),x(0x25); \ .byte x(0x72),x(0xf8),x(0xf6),x(0x64),x(0x86),x(0x68),x(0x98),x(0x16); \ .byte x(0xd4),x(0xa4),x(0x5c),x(0xcc),x(0x5d),x(0x65),x(0xb6),x(0x92); \ .byte x(0x6c),x(0x70),x(0x48),x(0x50),x(0xfd),x(0xed),x(0xb9),x(0xda); \ .byte x(0x5e),x(0x15),x(0x46),x(0x57),x(0xa7),x(0x8d),x(0x9d),x(0x84); \ .byte x(0x90),x(0xd8),x(0xab),x(0x00),x(0x8c),x(0xbc),x(0xd3),x(0x0a); \ .byte x(0xf7),x(0xe4),x(0x58),x(0x05),x(0xb8),x(0xb3),x(0x45),x(0x06); \ .byte x(0xd0),x(0x2c),x(0x1e),x(0x8f),x(0xca),x(0x3f),x(0x0f),x(0x02); \ .byte x(0xc1),x(0xaf),x(0xbd),x(0x03),x(0x01),x(0x13),x(0x8a),x(0x6b); \ .byte x(0x3a),x(0x91),x(0x11),x(0x41),x(0x4f),x(0x67),x(0xdc),x(0xea); \ .byte x(0x97),x(0xf2),x(0xcf),x(0xce),x(0xf0),x(0xb4),x(0xe6),x(0x73); \ .byte x(0x96),x(0xac),x(0x74),x(0x22),x(0xe7),x(0xad),x(0x35),x(0x85); \ .byte x(0xe2),x(0xf9),x(0x37),x(0xe8),x(0x1c),x(0x75),x(0xdf),x(0x6e); \ .byte x(0x47),x(0xf1),x(0x1a),x(0x71),x(0x1d),x(0x29),x(0xc5),x(0x89); \ .byte x(0x6f),x(0xb7),x(0x62),x(0x0e),x(0xaa),x(0x18),x(0xbe),x(0x1b); \ .byte x(0xfc),x(0x56),x(0x3e),x(0x4b),x(0xc6),x(0xd2),x(0x79),x(0x20); \ .byte x(0x9a),x(0xdb),x(0xc0),x(0xfe),x(0x78),x(0xcd),x(0x5a),x(0xf4); \ .byte x(0x1f),x(0xdd),x(0xa8),x(0x33),x(0x88),x(0x07),x(0xc7),x(0x31); \ .byte x(0xb1),x(0x12),x(0x10),x(0x59),x(0x27),x(0x80),x(0xec),x(0x5f); \ .byte x(0x60),x(0x51),x(0x7f),x(0xa9),x(0x19),x(0xb5),x(0x4a),x(0x0d); \ .byte x(0x2d),x(0xe5),x(0x7a),x(0x9f),x(0x93),x(0xc9),x(0x9c),x(0xef); \ .byte x(0xa0),x(0xe0),x(0x3b),x(0x4d),x(0xae),x(0x2a),x(0xf5),x(0xb0); \ .byte x(0xc8),x(0xeb),x(0xbb),x(0x3c),x(0x83),x(0x53),x(0x99),x(0x61); \ .byte x(0x17),x(0x2b),x(0x04),x(0x7e),x(0xba),x(0x77),x(0xd6),x(0x26); \ .byte x(0xe1),x(0x69),x(0x14),x(0x63),x(0x55),x(0x21),x(0x0c),x(0x7d) #define tptr %rbp /* table pointer */ #define kptr %r8 /* key schedule pointer */ #define fofs 128 /* adjust offset in key schedule to keep |disp| < 128 */ #define fk_ref(x, y) -16*x+fofs+4*y(kptr) #ifdef AES_REV_DKS #define rofs 128 #define ik_ref(x, y) -16*x+rofs+4*y(kptr) #else #define rofs -128 #define ik_ref(x, y) 16*x+rofs+4*y(kptr) #endif /* AES_REV_DKS */ #define tab_0(x) (tptr,x,8) #define tab_1(x) 3(tptr,x,8) #define tab_2(x) 2(tptr,x,8) #define tab_3(x) 1(tptr,x,8) #define tab_f(x) 1(tptr,x,8) #define tab_i(x) 7(tptr,x,8) #define ff_rnd(p1, p2, p3, p4, round) /* normal forward round */ \ mov fk_ref(round,0), p1; \ mov fk_ref(round,1), p2; \ mov fk_ref(round,2), p3; \ mov fk_ref(round,3), p4; \ \ movzx %al, %esi; \ movzx %ah, %edi; \ shr $16, %eax; \ xor tab_0(%rsi), p1; \ xor tab_1(%rdi), p4; \ movzx %al, %esi; \ movzx %ah, %edi; \ xor tab_2(%rsi), p3; \ xor tab_3(%rdi), p2; \ \ movzx %bl, %esi; \ movzx %bh, %edi; \ shr $16, %ebx; \ xor tab_0(%rsi), p2; \ xor tab_1(%rdi), p1; \ movzx %bl, %esi; \ movzx %bh, %edi; \ xor tab_2(%rsi), p4; \ xor tab_3(%rdi), p3; \ \ movzx %cl, %esi; \ movzx %ch, %edi; \ shr $16, %ecx; \ xor tab_0(%rsi), p3; \ xor tab_1(%rdi), p2; \ movzx %cl, %esi; \ movzx %ch, %edi; \ xor tab_2(%rsi), p1; \ xor tab_3(%rdi), p4; \ \ movzx %dl, %esi; \ movzx %dh, %edi; \ shr $16, %edx; \ xor tab_0(%rsi), p4; \ xor tab_1(%rdi), p3; \ movzx %dl, %esi; \ movzx %dh, %edi; \ xor tab_2(%rsi), p2; \ xor tab_3(%rdi), p1; \ \ mov p1, %eax; \ mov p2, %ebx; \ mov p3, %ecx; \ mov p4, %edx #ifdef LAST_ROUND_TABLES #define fl_rnd(p1, p2, p3, p4, round) /* last forward round */ \ add $2048, tptr; \ mov fk_ref(round,0), p1; \ mov fk_ref(round,1), p2; \ mov fk_ref(round,2), p3; \ mov fk_ref(round,3), p4; \ \ movzx %al, %esi; \ movzx %ah, %edi; \ shr $16, %eax; \ xor tab_0(%rsi), p1; \ xor tab_1(%rdi), p4; \ movzx %al, %esi; \ movzx %ah, %edi; \ xor tab_2(%rsi), p3; \ xor tab_3(%rdi), p2; \ \ movzx %bl, %esi; \ movzx %bh, %edi; \ shr $16, %ebx; \ xor tab_0(%rsi), p2; \ xor tab_1(%rdi), p1; \ movzx %bl, %esi; \ movzx %bh, %edi; \ xor tab_2(%rsi), p4; \ xor tab_3(%rdi), p3; \ \ movzx %cl, %esi; \ movzx %ch, %edi; \ shr $16, %ecx; \ xor tab_0(%rsi), p3; \ xor tab_1(%rdi), p2; \ movzx %cl, %esi; \ movzx %ch, %edi; \ xor tab_2(%rsi), p1; \ xor tab_3(%rdi), p4; \ \ movzx %dl, %esi; \ movzx %dh, %edi; \ shr $16, %edx; \ xor tab_0(%rsi), p4; \ xor tab_1(%rdi), p3; \ movzx %dl, %esi; \ movzx %dh, %edi; \ xor tab_2(%rsi), p2; \ xor tab_3(%rdi), p1 #else #define fl_rnd(p1, p2, p3, p4, round) /* last forward round */ \ mov fk_ref(round,0), p1; \ mov fk_ref(round,1), p2; \ mov fk_ref(round,2), p3; \ mov fk_ref(round,3), p4; \ \ movzx %al, %esi; \ movzx %ah, %edi; \ shr $16, %eax; \ movzx tab_f(%rsi), %esi; \ movzx tab_f(%rdi), %edi; \ xor %esi, p1; \ rol $8, %edi; \ xor %edi, p4; \ movzx %al, %esi; \ movzx %ah, %edi; \ movzx tab_f(%rsi), %esi; \ movzx tab_f(%rdi), %edi; \ rol $16, %esi; \ rol $24, %edi; \ xor %esi, p3; \ xor %edi, p2; \ \ movzx %bl, %esi; \ movzx %bh, %edi; \ shr $16, %ebx; \ movzx tab_f(%rsi), %esi; \ movzx tab_f(%rdi), %edi; \ xor %esi, p2; \ rol $8, %edi; \ xor %edi, p1; \ movzx %bl, %esi; \ movzx %bh, %edi; \ movzx tab_f(%rsi), %esi; \ movzx tab_f(%rdi), %edi; \ rol $16, %esi; \ rol $24, %edi; \ xor %esi, p4; \ xor %edi, p3; \ \ movzx %cl, %esi; \ movzx %ch, %edi; \ movzx tab_f(%rsi), %esi; \ movzx tab_f(%rdi), %edi; \ shr $16, %ecx; \ xor %esi, p3; \ rol $8, %edi; \ xor %edi, p2; \ movzx %cl, %esi; \ movzx %ch, %edi; \ movzx tab_f(%rsi), %esi; \ movzx tab_f(%rdi), %edi; \ rol $16, %esi; \ rol $24, %edi; \ xor %esi, p1; \ xor %edi, p4; \ \ movzx %dl, %esi; \ movzx %dh, %edi; \ movzx tab_f(%rsi), %esi; \ movzx tab_f(%rdi), %edi; \ shr $16, %edx; \ xor %esi, p4; \ rol $8, %edi; \ xor %edi, p3; \ movzx %dl, %esi; \ movzx %dh, %edi; \ movzx tab_f(%rsi), %esi; \ movzx tab_f(%rdi), %edi; \ rol $16, %esi; \ rol $24, %edi; \ xor %esi, p2; \ xor %edi, p1 #endif /* LAST_ROUND_TABLES */ #define ii_rnd(p1, p2, p3, p4, round) /* normal inverse round */ \ mov ik_ref(round,0), p1; \ mov ik_ref(round,1), p2; \ mov ik_ref(round,2), p3; \ mov ik_ref(round,3), p4; \ \ movzx %al, %esi; \ movzx %ah, %edi; \ shr $16, %eax; \ xor tab_0(%rsi), p1; \ xor tab_1(%rdi), p2; \ movzx %al, %esi; \ movzx %ah, %edi; \ xor tab_2(%rsi), p3; \ xor tab_3(%rdi), p4; \ \ movzx %bl, %esi; \ movzx %bh, %edi; \ shr $16, %ebx; \ xor tab_0(%rsi), p2; \ xor tab_1(%rdi), p3; \ movzx %bl, %esi; \ movzx %bh, %edi; \ xor tab_2(%rsi), p4; \ xor tab_3(%rdi), p1; \ \ movzx %cl, %esi; \ movzx %ch, %edi; \ shr $16, %ecx; \ xor tab_0(%rsi), p3; \ xor tab_1(%rdi), p4; \ movzx %cl, %esi; \ movzx %ch, %edi; \ xor tab_2(%rsi), p1; \ xor tab_3(%rdi), p2; \ \ movzx %dl, %esi; \ movzx %dh, %edi; \ shr $16, %edx; \ xor tab_0(%rsi), p4; \ xor tab_1(%rdi), p1; \ movzx %dl, %esi; \ movzx %dh, %edi; \ xor tab_2(%rsi), p2; \ xor tab_3(%rdi), p3; \ \ mov p1, %eax; \ mov p2, %ebx; \ mov p3, %ecx; \ mov p4, %edx #ifdef LAST_ROUND_TABLES #define il_rnd(p1, p2, p3, p4, round) /* last inverse round */ \ add $2048, tptr; \ mov ik_ref(round,0), p1; \ mov ik_ref(round,1), p2; \ mov ik_ref(round,2), p3; \ mov ik_ref(round,3), p4; \ \ movzx %al, %esi; \ movzx %ah, %edi; \ shr $16, %eax; \ xor tab_0(%rsi), p1; \ xor tab_1(%rdi), p2; \ movzx %al, %esi; \ movzx %ah, %edi; \ xor tab_2(%rsi), p3; \ xor tab_3(%rdi), p4; \ \ movzx %bl, %esi; \ movzx %bh, %edi; \ shr $16, %ebx; \ xor tab_0(%rsi), p2; \ xor tab_1(%rdi), p3; \ movzx %bl, %esi; \ movzx %bh, %edi; \ xor tab_2(%rsi), p4; \ xor tab_3(%rdi), p1; \ \ movzx %cl, %esi; \ movzx %ch, %edi; \ shr $16, %ecx; \ xor tab_0(%rsi), p3; \ xor tab_1(%rdi), p4; \ movzx %cl, %esi; \ movzx %ch, %edi; \ xor tab_2(%rsi), p1; \ xor tab_3(%rdi), p2; \ \ movzx %dl, %esi; \ movzx %dh, %edi; \ shr $16, %edx; \ xor tab_0(%rsi), p4; \ xor tab_1(%rdi), p1; \ movzx %dl, %esi; \ movzx %dh, %edi; \ xor tab_2(%rsi), p2; \ xor tab_3(%rdi), p3 #else #define il_rnd(p1, p2, p3, p4, round) /* last inverse round */ \ mov ik_ref(round,0), p1; \ mov ik_ref(round,1), p2; \ mov ik_ref(round,2), p3; \ mov ik_ref(round,3), p4; \ \ movzx %al, %esi; \ movzx %ah, %edi; \ movzx tab_i(%rsi), %esi; \ movzx tab_i(%rdi), %edi; \ shr $16, %eax; \ xor %esi, p1; \ rol $8, %edi; \ xor %edi, p2; \ movzx %al, %esi; \ movzx %ah, %edi; \ movzx tab_i(%rsi), %esi; \ movzx tab_i(%rdi), %edi; \ rol $16, %esi; \ rol $24, %edi; \ xor %esi, p3; \ xor %edi, p4; \ \ movzx %bl, %esi; \ movzx %bh, %edi; \ movzx tab_i(%rsi), %esi; \ movzx tab_i(%rdi), %edi; \ shr $16, %ebx; \ xor %esi, p2; \ rol $8, %edi; \ xor %edi, p3; \ movzx %bl, %esi; \ movzx %bh, %edi; \ movzx tab_i(%rsi), %esi; \ movzx tab_i(%rdi), %edi; \ rol $16, %esi; \ rol $24, %edi; \ xor %esi, p4; \ xor %edi, p1; \ \ movzx %cl, %esi; \ movzx %ch, %edi; \ movzx tab_i(%rsi), %esi; \ movzx tab_i(%rdi), %edi; \ shr $16, %ecx; \ xor %esi, p3; \ rol $8, %edi; \ xor %edi, p4; \ movzx %cl, %esi; \ movzx %ch, %edi; \ movzx tab_i(%rsi), %esi; \ movzx tab_i(%rdi), %edi; \ rol $16, %esi; \ rol $24, %edi; \ xor %esi, p1; \ xor %edi, p2; \ \ movzx %dl, %esi; \ movzx %dh, %edi; \ movzx tab_i(%rsi), %esi; \ movzx tab_i(%rdi), %edi; \ shr $16, %edx; \ xor %esi, p4; \ rol $8, %edi; \ xor %edi, p1; \ movzx %dl, %esi; \ movzx %dh, %edi; \ movzx tab_i(%rsi), %esi; \ movzx tab_i(%rdi), %edi; \ rol $16, %esi; \ rol $24, %edi; \ xor %esi, p2; \ xor %edi, p3 #endif /* LAST_ROUND_TABLES */ /* * OpenSolaris OS: * void aes_encrypt_amd64(const aes_ks_t *ks, int Nr, * const uint32_t pt[4], uint32_t ct[4])/ * * Original interface: * int aes_encrypt(const unsigned char *in, * unsigned char *out, const aes_encrypt_ctx cx[1])/ */ .data .align 64 enc_tab: enc_vals(u8) #ifdef LAST_ROUND_TABLES // Last Round Tables: enc_vals(w8) #endif ENTRY_NP(aes_encrypt_amd64) #ifdef GLADMAN_INTERFACE // Original interface sub $[4*8], %rsp // gnu/linux/opensolaris binary interface mov %rsi, (%rsp) // output pointer (P2) mov %rdx, %r8 // context (P3) mov %rbx, 1*8(%rsp) // P1: input pointer in rdi mov %rbp, 2*8(%rsp) // P2: output pointer in (rsp) mov %r12, 3*8(%rsp) // P3: context in r8 movzx 4*KS_LENGTH(kptr), %esi // Get byte key length * 16 #else // OpenSolaris OS interface sub $[4*8], %rsp // Make room on stack to save registers mov %rcx, (%rsp) // Save output pointer (P4) on stack mov %rdi, %r8 // context (P1) mov %rdx, %rdi // P3: save input pointer shl $4, %esi // P2: esi byte key length * 16 mov %rbx, 1*8(%rsp) // Save registers mov %rbp, 2*8(%rsp) mov %r12, 3*8(%rsp) // P1: context in r8 // P2: byte key length * 16 in esi // P3: input pointer in rdi // P4: output pointer in (rsp) #endif /* GLADMAN_INTERFACE */ lea enc_tab(%rip), tptr sub $fofs, kptr // Load input block into registers mov (%rdi), %eax mov 1*4(%rdi), %ebx mov 2*4(%rdi), %ecx mov 3*4(%rdi), %edx xor fofs(kptr), %eax xor fofs+4(kptr), %ebx xor fofs+8(kptr), %ecx xor fofs+12(kptr), %edx lea (kptr,%rsi), kptr // Jump based on byte key length * 16: cmp $[10*16], %esi je 3f cmp $[12*16], %esi je 2f cmp $[14*16], %esi je 1f mov $-1, %rax // error jmp 4f // Perform normal forward rounds 1: ff_rnd(%r9d, %r10d, %r11d, %r12d, 13) ff_rnd(%r9d, %r10d, %r11d, %r12d, 12) 2: ff_rnd(%r9d, %r10d, %r11d, %r12d, 11) ff_rnd(%r9d, %r10d, %r11d, %r12d, 10) 3: ff_rnd(%r9d, %r10d, %r11d, %r12d, 9) ff_rnd(%r9d, %r10d, %r11d, %r12d, 8) ff_rnd(%r9d, %r10d, %r11d, %r12d, 7) ff_rnd(%r9d, %r10d, %r11d, %r12d, 6) ff_rnd(%r9d, %r10d, %r11d, %r12d, 5) ff_rnd(%r9d, %r10d, %r11d, %r12d, 4) ff_rnd(%r9d, %r10d, %r11d, %r12d, 3) ff_rnd(%r9d, %r10d, %r11d, %r12d, 2) ff_rnd(%r9d, %r10d, %r11d, %r12d, 1) fl_rnd(%r9d, %r10d, %r11d, %r12d, 0) // Copy results mov (%rsp), %rbx mov %r9d, (%rbx) mov %r10d, 4(%rbx) mov %r11d, 8(%rbx) mov %r12d, 12(%rbx) xor %rax, %rax 4: // Restore registers mov 1*8(%rsp), %rbx mov 2*8(%rsp), %rbp mov 3*8(%rsp), %r12 add $[4*8], %rsp ret SET_SIZE(aes_encrypt_amd64) /* * OpenSolaris OS: * void aes_decrypt_amd64(const aes_ks_t *ks, int Nr, * const uint32_t pt[4], uint32_t ct[4])/ * * Original interface: * int aes_decrypt(const unsigned char *in, * unsigned char *out, const aes_encrypt_ctx cx[1])/ */ .data .align 64 dec_tab: dec_vals(v8) #ifdef LAST_ROUND_TABLES // Last Round Tables: dec_vals(w8) #endif ENTRY_NP(aes_decrypt_amd64) #ifdef GLADMAN_INTERFACE // Original interface sub $[4*8], %rsp // gnu/linux/opensolaris binary interface mov %rsi, (%rsp) // output pointer (P2) mov %rdx, %r8 // context (P3) mov %rbx, 1*8(%rsp) // P1: input pointer in rdi mov %rbp, 2*8(%rsp) // P2: output pointer in (rsp) mov %r12, 3*8(%rsp) // P3: context in r8 movzx 4*KS_LENGTH(kptr), %esi // Get byte key length * 16 #else // OpenSolaris OS interface sub $[4*8], %rsp // Make room on stack to save registers mov %rcx, (%rsp) // Save output pointer (P4) on stack mov %rdi, %r8 // context (P1) mov %rdx, %rdi // P3: save input pointer shl $4, %esi // P2: esi byte key length * 16 mov %rbx, 1*8(%rsp) // Save registers mov %rbp, 2*8(%rsp) mov %r12, 3*8(%rsp) // P1: context in r8 // P2: byte key length * 16 in esi // P3: input pointer in rdi // P4: output pointer in (rsp) #endif /* GLADMAN_INTERFACE */ lea dec_tab(%rip), tptr sub $rofs, kptr // Load input block into registers mov (%rdi), %eax mov 1*4(%rdi), %ebx mov 2*4(%rdi), %ecx mov 3*4(%rdi), %edx #ifdef AES_REV_DKS mov kptr, %rdi lea (kptr,%rsi), kptr #else lea (kptr,%rsi), %rdi #endif xor rofs(%rdi), %eax xor rofs+4(%rdi), %ebx xor rofs+8(%rdi), %ecx xor rofs+12(%rdi), %edx // Jump based on byte key length * 16: cmp $[10*16], %esi je 3f cmp $[12*16], %esi je 2f cmp $[14*16], %esi je 1f mov $-1, %rax // error jmp 4f // Perform normal inverse rounds 1: ii_rnd(%r9d, %r10d, %r11d, %r12d, 13) ii_rnd(%r9d, %r10d, %r11d, %r12d, 12) 2: ii_rnd(%r9d, %r10d, %r11d, %r12d, 11) ii_rnd(%r9d, %r10d, %r11d, %r12d, 10) 3: ii_rnd(%r9d, %r10d, %r11d, %r12d, 9) ii_rnd(%r9d, %r10d, %r11d, %r12d, 8) ii_rnd(%r9d, %r10d, %r11d, %r12d, 7) ii_rnd(%r9d, %r10d, %r11d, %r12d, 6) ii_rnd(%r9d, %r10d, %r11d, %r12d, 5) ii_rnd(%r9d, %r10d, %r11d, %r12d, 4) ii_rnd(%r9d, %r10d, %r11d, %r12d, 3) ii_rnd(%r9d, %r10d, %r11d, %r12d, 2) ii_rnd(%r9d, %r10d, %r11d, %r12d, 1) il_rnd(%r9d, %r10d, %r11d, %r12d, 0) // Copy results mov (%rsp), %rbx mov %r9d, (%rbx) mov %r10d, 4(%rbx) mov %r11d, 8(%rbx) mov %r12d, 12(%rbx) xor %rax, %rax 4: // Restore registers mov 1*8(%rsp), %rbx mov 2*8(%rsp), %rbp mov 3*8(%rsp), %r12 add $[4*8], %rsp ret SET_SIZE(aes_decrypt_amd64) #endif /* lint || __lint */ #ifdef __ELF__ .section .note.GNU-stack,"",%progbits #endif diff --git a/module/icp/asm-x86_64/modes/gcm_pclmulqdq.S b/module/icp/asm-x86_64/modes/gcm_pclmulqdq.S index 59edc4c8d56c..0e1e04b78c5d 100644 --- a/module/icp/asm-x86_64/modes/gcm_pclmulqdq.S +++ b/module/icp/asm-x86_64/modes/gcm_pclmulqdq.S @@ -1,254 +1,254 @@ /* * CDDL HEADER START * * The contents of this file are subject to the terms of the * Common Development and Distribution License (the "License"). * You may not use this file except in compliance with the License. * * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE * or http://www.opensolaris.org/os/licensing. * See the License for the specific language governing permissions * and limitations under the License. * * When distributing Covered Code, include this CDDL HEADER in each * file and include the License file at usr/src/OPENSOLARIS.LICENSE. * If applicable, add the following below this CDDL HEADER, with the * fields enclosed by brackets "[]" replaced with your own identifying * information: Portions Copyright [yyyy] [name of copyright owner] * * CDDL HEADER END */ /* * Copyright (c) 2009 Intel Corporation * All Rights Reserved. */ /* * Copyright 2009 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ /* * Accelerated GHASH implementation with Intel PCLMULQDQ-NI * instructions. This file contains an accelerated * Galois Field Multiplication implementation. * * PCLMULQDQ is used to accelerate the most time-consuming part of GHASH, * carry-less multiplication. More information about PCLMULQDQ can be * found at: * http://software.intel.com/en-us/articles/ * carry-less-multiplication-and-its-usage-for-computing-the-gcm-mode/ * */ /* * ==================================================================== * OpenSolaris OS modifications * * This source originates as file galois_hash_asm.c from * Intel Corporation dated September 21, 2009. * * This OpenSolaris version has these major changes from the original source: * * 1. Added OpenSolaris ENTRY_NP/SET_SIZE macros from * /usr/include/sys/asm_linkage.h, lint(1B) guards, and a dummy C function * definition for lint. * * 2. Formatted code, added comments, and added #includes and #defines. * * 3. If bit CR0.TS is set, clear and set the TS bit, after and before * calling kpreempt_disable() and kpreempt_enable(). * If the TS bit is not set, Save and restore %xmm registers at the beginning * and end of function calls (%xmm* registers are not saved and restored by * during kernel thread preemption). * * 4. Removed code to perform hashing. This is already done with C macro * GHASH in gcm.c. For better performance, this removed code should be * reintegrated in the future to replace the C GHASH macro. * * 5. Added code to byte swap 16-byte input and output. * * 6. Folded in comments from the original C source with embedded assembly * (SB_w_shift_xor.c) * * 7. Renamed function and reordered parameters to match OpenSolaris: * Intel interface: * void galois_hash_asm(unsigned char *hk, unsigned char *s, * unsigned char *d, int length) * OpenSolaris OS interface: * void gcm_mul_pclmulqdq(uint64_t *x_in, uint64_t *y, uint64_t *res); * ==================================================================== */ #if defined(lint) || defined(__lint) /* lint */ #include -/* ARGSUSED */ void gcm_mul_pclmulqdq(uint64_t *x_in, uint64_t *y, uint64_t *res) { + (void) x_in, (void) y, (void) res; } #elif defined(HAVE_PCLMULQDQ) /* guard by instruction set */ #define _ASM #include /* * Use this mask to byte-swap a 16-byte integer with the pshufb instruction */ // static uint8_t byte_swap16_mask[] = { // 15, 14, 13, 12, 11, 10, 9, 8, 7, 6 ,5, 4, 3, 2, 1, 0 }; .data .align XMM_ALIGN .Lbyte_swap16_mask: .byte 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0 /* * void gcm_mul_pclmulqdq(uint64_t *x_in, uint64_t *y, uint64_t *res); * * Perform a carry-less multiplication (that is, use XOR instead of the * multiply operator) on P1 and P2 and place the result in P3. * * Byte swap the input and the output. * * Note: x_in, y, and res all point to a block of 20-byte numbers * (an array of two 64-bit integers). * * Note2: For kernel code, caller is responsible for ensuring * kpreempt_disable() has been called. This is because %xmm registers are * not saved/restored. Clear and set the CR0.TS bit on entry and exit, * respectively, if TS is set on entry. Otherwise, if TS is not set, * save and restore %xmm registers on the stack. * * Note3: Original Intel definition: * void galois_hash_asm(unsigned char *hk, unsigned char *s, * unsigned char *d, int length) * * Note4: Register/parameter mapping: * Intel: * Parameter 1: %rcx (copied to %xmm0) hk or x_in * Parameter 2: %rdx (copied to %xmm1) s or y * Parameter 3: %rdi (result) d or res * OpenSolaris: * Parameter 1: %rdi (copied to %xmm0) x_in * Parameter 2: %rsi (copied to %xmm1) y * Parameter 3: %rdx (result) res */ ENTRY_NP(gcm_mul_pclmulqdq) // // Copy Parameters // movdqu (%rdi), %xmm0 // P1 movdqu (%rsi), %xmm1 // P2 // // Byte swap 16-byte input // lea .Lbyte_swap16_mask(%rip), %rax movups (%rax), %xmm10 pshufb %xmm10, %xmm0 pshufb %xmm10, %xmm1 // // Multiply with the hash key // movdqu %xmm0, %xmm3 pclmulqdq $0, %xmm1, %xmm3 // xmm3 holds a0*b0 movdqu %xmm0, %xmm4 pclmulqdq $16, %xmm1, %xmm4 // xmm4 holds a0*b1 movdqu %xmm0, %xmm5 pclmulqdq $1, %xmm1, %xmm5 // xmm5 holds a1*b0 movdqu %xmm0, %xmm6 pclmulqdq $17, %xmm1, %xmm6 // xmm6 holds a1*b1 pxor %xmm5, %xmm4 // xmm4 holds a0*b1 + a1*b0 movdqu %xmm4, %xmm5 // move the contents of xmm4 to xmm5 psrldq $8, %xmm4 // shift by xmm4 64 bits to the right pslldq $8, %xmm5 // shift by xmm5 64 bits to the left pxor %xmm5, %xmm3 pxor %xmm4, %xmm6 // Register pair holds the result // of the carry-less multiplication of // xmm0 by xmm1. // We shift the result of the multiplication by one bit position // to the left to cope for the fact that the bits are reversed. movdqu %xmm3, %xmm7 movdqu %xmm6, %xmm8 pslld $1, %xmm3 pslld $1, %xmm6 psrld $31, %xmm7 psrld $31, %xmm8 movdqu %xmm7, %xmm9 pslldq $4, %xmm8 pslldq $4, %xmm7 psrldq $12, %xmm9 por %xmm7, %xmm3 por %xmm8, %xmm6 por %xmm9, %xmm6 // // First phase of the reduction // // Move xmm3 into xmm7, xmm8, xmm9 in order to perform the shifts // independently. movdqu %xmm3, %xmm7 movdqu %xmm3, %xmm8 movdqu %xmm3, %xmm9 pslld $31, %xmm7 // packed right shift shifting << 31 pslld $30, %xmm8 // packed right shift shifting << 30 pslld $25, %xmm9 // packed right shift shifting << 25 pxor %xmm8, %xmm7 // xor the shifted versions pxor %xmm9, %xmm7 movdqu %xmm7, %xmm8 pslldq $12, %xmm7 psrldq $4, %xmm8 pxor %xmm7, %xmm3 // first phase of the reduction complete // // Second phase of the reduction // // Make 3 copies of xmm3 in xmm2, xmm4, xmm5 for doing these // shift operations. movdqu %xmm3, %xmm2 movdqu %xmm3, %xmm4 // packed left shifting >> 1 movdqu %xmm3, %xmm5 psrld $1, %xmm2 psrld $2, %xmm4 // packed left shifting >> 2 psrld $7, %xmm5 // packed left shifting >> 7 pxor %xmm4, %xmm2 // xor the shifted versions pxor %xmm5, %xmm2 pxor %xmm8, %xmm2 pxor %xmm2, %xmm3 pxor %xmm3, %xmm6 // the result is in xmm6 // // Byte swap 16-byte result // pshufb %xmm10, %xmm6 // %xmm10 has the swap mask // // Store the result // movdqu %xmm6, (%rdx) // P3 // // Return // ret SET_SIZE(gcm_mul_pclmulqdq) #endif /* lint || __lint */ #ifdef __ELF__ .section .note.GNU-stack,"",%progbits #endif diff --git a/module/icp/asm-x86_64/sha1/sha1-x86_64.S b/module/icp/asm-x86_64/sha1/sha1-x86_64.S index fc844cd8c74f..1d65e818dbfd 100644 --- a/module/icp/asm-x86_64/sha1/sha1-x86_64.S +++ b/module/icp/asm-x86_64/sha1/sha1-x86_64.S @@ -1,1369 +1,1369 @@ /* * !/usr/bin/env perl * * ==================================================================== * Written by Andy Polyakov for the OpenSSL * project. The module is, however, dual licensed under OpenSSL and * CRYPTOGAMS licenses depending on where you obtain it. For further * details see http://www.openssl.org/~appro/cryptogams/. * ==================================================================== * * sha1_block procedure for x86_64. * * It was brought to my attention that on EM64T compiler-generated code * was far behind 32-bit assembler implementation. This is unlike on * Opteron where compiler-generated code was only 15% behind 32-bit * assembler, which originally made it hard to motivate the effort. * There was suggestion to mechanically translate 32-bit code, but I * dismissed it, reasoning that x86_64 offers enough register bank * capacity to fully utilize SHA-1 parallelism. Therefore this fresh * implementation:-) However! While 64-bit code does performs better * on Opteron, I failed to beat 32-bit assembler on EM64T core. Well, * x86_64 does offer larger *addressable* bank, but out-of-order core * reaches for even more registers through dynamic aliasing, and EM64T * core must have managed to run-time optimize even 32-bit code just as * good as 64-bit one. Performance improvement is summarized in the * following table: * * gcc 3.4 32-bit asm cycles/byte * Opteron +45% +20% 6.8 * Xeon P4 +65% +0% 9.9 * Core2 +60% +10% 7.0 * * * OpenSolaris OS modifications * * Sun elects to use this software under the BSD license. * * This source originates from OpenSSL file sha1-x86_64.pl at * ftp://ftp.openssl.org/snapshot/openssl-0.9.8-stable-SNAP-20080131.tar.gz * (presumably for future OpenSSL release 0.9.8h), with these changes: * * 1. Added perl "use strict" and declared variables. * * 2. Added OpenSolaris ENTRY_NP/SET_SIZE macros from * /usr/include/sys/asm_linkage.h, .ident keywords, and lint(1B) guards. * * 3. Removed x86_64-xlate.pl script (not needed for as(1) or gas(1) * assemblers). * */ /* * This file was generated by a perl script (sha1-x86_64.pl). The comments from * the original file have been pasted above. */ #if defined(lint) || defined(__lint) #include #include -/* ARGSUSED */ void sha1_block_data_order(SHA1_CTX *ctx, const void *inpp, size_t blocks) { + (void) ctx, (void) inpp, (void) blocks; } #else #define _ASM #include ENTRY_NP(sha1_block_data_order) .cfi_startproc mov %rsp,%rax .cfi_def_cfa_register %rax push %rbx .cfi_offset %rbx,-16 push %rbp .cfi_offset %rbp,-24 push %r12 .cfi_offset %r12,-32 mov %rdi,%r8 # reassigned argument .cfi_register %rdi, %r8 sub $72,%rsp mov %rsi,%r9 # reassigned argument .cfi_register %rsi, %r9 and $-64,%rsp mov %rdx,%r10 # reassigned argument .cfi_register %rdx, %r10 mov %rax,64(%rsp) # echo ".cfi_cfa_expression %rsp+64,deref,+8" | # openssl/crypto/perlasm/x86_64-xlate.pl .cfi_escape 0x0f,0x06,0x77,0xc0,0x00,0x06,0x23,0x08 mov 0(%r8),%edx mov 4(%r8),%esi mov 8(%r8),%edi mov 12(%r8),%ebp mov 16(%r8),%r11d .align 4 .Lloop: mov 0(%r9),%eax bswap %eax mov %eax,0(%rsp) lea 0x5a827999(%eax,%r11d),%r12d mov %edi,%ebx mov 4(%r9),%eax mov %edx,%r11d xor %ebp,%ebx bswap %eax rol $5,%r11d and %esi,%ebx mov %eax,4(%rsp) add %r11d,%r12d xor %ebp,%ebx rol $30,%esi add %ebx,%r12d lea 0x5a827999(%eax,%ebp),%r11d mov %esi,%ebx mov 8(%r9),%eax mov %r12d,%ebp xor %edi,%ebx bswap %eax rol $5,%ebp and %edx,%ebx mov %eax,8(%rsp) add %ebp,%r11d xor %edi,%ebx rol $30,%edx add %ebx,%r11d lea 0x5a827999(%eax,%edi),%ebp mov %edx,%ebx mov 12(%r9),%eax mov %r11d,%edi xor %esi,%ebx bswap %eax rol $5,%edi and %r12d,%ebx mov %eax,12(%rsp) add %edi,%ebp xor %esi,%ebx rol $30,%r12d add %ebx,%ebp lea 0x5a827999(%eax,%esi),%edi mov %r12d,%ebx mov 16(%r9),%eax mov %ebp,%esi xor %edx,%ebx bswap %eax rol $5,%esi and %r11d,%ebx mov %eax,16(%rsp) add %esi,%edi xor %edx,%ebx rol $30,%r11d add %ebx,%edi lea 0x5a827999(%eax,%edx),%esi mov %r11d,%ebx mov 20(%r9),%eax mov %edi,%edx xor %r12d,%ebx bswap %eax rol $5,%edx and %ebp,%ebx mov %eax,20(%rsp) add %edx,%esi xor %r12d,%ebx rol $30,%ebp add %ebx,%esi lea 0x5a827999(%eax,%r12d),%edx mov %ebp,%ebx mov 24(%r9),%eax mov %esi,%r12d xor %r11d,%ebx bswap %eax rol $5,%r12d and %edi,%ebx mov %eax,24(%rsp) add %r12d,%edx xor %r11d,%ebx rol $30,%edi add %ebx,%edx lea 0x5a827999(%eax,%r11d),%r12d mov %edi,%ebx mov 28(%r9),%eax mov %edx,%r11d xor %ebp,%ebx bswap %eax rol $5,%r11d and %esi,%ebx mov %eax,28(%rsp) add %r11d,%r12d xor %ebp,%ebx rol $30,%esi add %ebx,%r12d lea 0x5a827999(%eax,%ebp),%r11d mov %esi,%ebx mov 32(%r9),%eax mov %r12d,%ebp xor %edi,%ebx bswap %eax rol $5,%ebp and %edx,%ebx mov %eax,32(%rsp) add %ebp,%r11d xor %edi,%ebx rol $30,%edx add %ebx,%r11d lea 0x5a827999(%eax,%edi),%ebp mov %edx,%ebx mov 36(%r9),%eax mov %r11d,%edi xor %esi,%ebx bswap %eax rol $5,%edi and %r12d,%ebx mov %eax,36(%rsp) add %edi,%ebp xor %esi,%ebx rol $30,%r12d add %ebx,%ebp lea 0x5a827999(%eax,%esi),%edi mov %r12d,%ebx mov 40(%r9),%eax mov %ebp,%esi xor %edx,%ebx bswap %eax rol $5,%esi and %r11d,%ebx mov %eax,40(%rsp) add %esi,%edi xor %edx,%ebx rol $30,%r11d add %ebx,%edi lea 0x5a827999(%eax,%edx),%esi mov %r11d,%ebx mov 44(%r9),%eax mov %edi,%edx xor %r12d,%ebx bswap %eax rol $5,%edx and %ebp,%ebx mov %eax,44(%rsp) add %edx,%esi xor %r12d,%ebx rol $30,%ebp add %ebx,%esi lea 0x5a827999(%eax,%r12d),%edx mov %ebp,%ebx mov 48(%r9),%eax mov %esi,%r12d xor %r11d,%ebx bswap %eax rol $5,%r12d and %edi,%ebx mov %eax,48(%rsp) add %r12d,%edx xor %r11d,%ebx rol $30,%edi add %ebx,%edx lea 0x5a827999(%eax,%r11d),%r12d mov %edi,%ebx mov 52(%r9),%eax mov %edx,%r11d xor %ebp,%ebx bswap %eax rol $5,%r11d and %esi,%ebx mov %eax,52(%rsp) add %r11d,%r12d xor %ebp,%ebx rol $30,%esi add %ebx,%r12d lea 0x5a827999(%eax,%ebp),%r11d mov %esi,%ebx mov 56(%r9),%eax mov %r12d,%ebp xor %edi,%ebx bswap %eax rol $5,%ebp and %edx,%ebx mov %eax,56(%rsp) add %ebp,%r11d xor %edi,%ebx rol $30,%edx add %ebx,%r11d lea 0x5a827999(%eax,%edi),%ebp mov %edx,%ebx mov 60(%r9),%eax mov %r11d,%edi xor %esi,%ebx bswap %eax rol $5,%edi and %r12d,%ebx mov %eax,60(%rsp) add %edi,%ebp xor %esi,%ebx rol $30,%r12d add %ebx,%ebp lea 0x5a827999(%eax,%esi),%edi mov 0(%rsp),%eax mov %r12d,%ebx mov %ebp,%esi xor 8(%rsp),%eax xor %edx,%ebx rol $5,%esi xor 32(%rsp),%eax and %r11d,%ebx add %esi,%edi xor 52(%rsp),%eax xor %edx,%ebx rol $30,%r11d add %ebx,%edi rol $1,%eax mov %eax,0(%rsp) lea 0x5a827999(%eax,%edx),%esi mov 4(%rsp),%eax mov %r11d,%ebx mov %edi,%edx xor 12(%rsp),%eax xor %r12d,%ebx rol $5,%edx xor 36(%rsp),%eax and %ebp,%ebx add %edx,%esi xor 56(%rsp),%eax xor %r12d,%ebx rol $30,%ebp add %ebx,%esi rol $1,%eax mov %eax,4(%rsp) lea 0x5a827999(%eax,%r12d),%edx mov 8(%rsp),%eax mov %ebp,%ebx mov %esi,%r12d xor 16(%rsp),%eax xor %r11d,%ebx rol $5,%r12d xor 40(%rsp),%eax and %edi,%ebx add %r12d,%edx xor 60(%rsp),%eax xor %r11d,%ebx rol $30,%edi add %ebx,%edx rol $1,%eax mov %eax,8(%rsp) lea 0x5a827999(%eax,%r11d),%r12d mov 12(%rsp),%eax mov %edi,%ebx mov %edx,%r11d xor 20(%rsp),%eax xor %ebp,%ebx rol $5,%r11d xor 44(%rsp),%eax and %esi,%ebx add %r11d,%r12d xor 0(%rsp),%eax xor %ebp,%ebx rol $30,%esi add %ebx,%r12d rol $1,%eax mov %eax,12(%rsp) lea 0x5a827999(%eax,%ebp),%r11d mov 16(%rsp),%eax mov %esi,%ebx mov %r12d,%ebp xor 24(%rsp),%eax xor %edi,%ebx rol $5,%ebp xor 48(%rsp),%eax and %edx,%ebx add %ebp,%r11d xor 4(%rsp),%eax xor %edi,%ebx rol $30,%edx add %ebx,%r11d rol $1,%eax mov %eax,16(%rsp) lea 0x6ed9eba1(%eax,%edi),%ebp mov 20(%rsp),%eax mov %edx,%ebx mov %r11d,%edi xor 28(%rsp),%eax xor %r12d,%ebx rol $5,%edi xor 52(%rsp),%eax xor %esi,%ebx add %edi,%ebp xor 8(%rsp),%eax rol $30,%r12d add %ebx,%ebp rol $1,%eax mov %eax,20(%rsp) lea 0x6ed9eba1(%eax,%esi),%edi mov 24(%rsp),%eax mov %r12d,%ebx mov %ebp,%esi xor 32(%rsp),%eax xor %r11d,%ebx rol $5,%esi xor 56(%rsp),%eax xor %edx,%ebx add %esi,%edi xor 12(%rsp),%eax rol $30,%r11d add %ebx,%edi rol $1,%eax mov %eax,24(%rsp) lea 0x6ed9eba1(%eax,%edx),%esi mov 28(%rsp),%eax mov %r11d,%ebx mov %edi,%edx xor 36(%rsp),%eax xor %ebp,%ebx rol $5,%edx xor 60(%rsp),%eax xor %r12d,%ebx add %edx,%esi xor 16(%rsp),%eax rol $30,%ebp add %ebx,%esi rol $1,%eax mov %eax,28(%rsp) lea 0x6ed9eba1(%eax,%r12d),%edx mov 32(%rsp),%eax mov %ebp,%ebx mov %esi,%r12d xor 40(%rsp),%eax xor %edi,%ebx rol $5,%r12d xor 0(%rsp),%eax xor %r11d,%ebx add %r12d,%edx xor 20(%rsp),%eax rol $30,%edi add %ebx,%edx rol $1,%eax mov %eax,32(%rsp) lea 0x6ed9eba1(%eax,%r11d),%r12d mov 36(%rsp),%eax mov %edi,%ebx mov %edx,%r11d xor 44(%rsp),%eax xor %esi,%ebx rol $5,%r11d xor 4(%rsp),%eax xor %ebp,%ebx add %r11d,%r12d xor 24(%rsp),%eax rol $30,%esi add %ebx,%r12d rol $1,%eax mov %eax,36(%rsp) lea 0x6ed9eba1(%eax,%ebp),%r11d mov 40(%rsp),%eax mov %esi,%ebx mov %r12d,%ebp xor 48(%rsp),%eax xor %edx,%ebx rol $5,%ebp xor 8(%rsp),%eax xor %edi,%ebx add %ebp,%r11d xor 28(%rsp),%eax rol $30,%edx add %ebx,%r11d rol $1,%eax mov %eax,40(%rsp) lea 0x6ed9eba1(%eax,%edi),%ebp mov 44(%rsp),%eax mov %edx,%ebx mov %r11d,%edi xor 52(%rsp),%eax xor %r12d,%ebx rol $5,%edi xor 12(%rsp),%eax xor %esi,%ebx add %edi,%ebp xor 32(%rsp),%eax rol $30,%r12d add %ebx,%ebp rol $1,%eax mov %eax,44(%rsp) lea 0x6ed9eba1(%eax,%esi),%edi mov 48(%rsp),%eax mov %r12d,%ebx mov %ebp,%esi xor 56(%rsp),%eax xor %r11d,%ebx rol $5,%esi xor 16(%rsp),%eax xor %edx,%ebx add %esi,%edi xor 36(%rsp),%eax rol $30,%r11d add %ebx,%edi rol $1,%eax mov %eax,48(%rsp) lea 0x6ed9eba1(%eax,%edx),%esi mov 52(%rsp),%eax mov %r11d,%ebx mov %edi,%edx xor 60(%rsp),%eax xor %ebp,%ebx rol $5,%edx xor 20(%rsp),%eax xor %r12d,%ebx add %edx,%esi xor 40(%rsp),%eax rol $30,%ebp add %ebx,%esi rol $1,%eax mov %eax,52(%rsp) lea 0x6ed9eba1(%eax,%r12d),%edx mov 56(%rsp),%eax mov %ebp,%ebx mov %esi,%r12d xor 0(%rsp),%eax xor %edi,%ebx rol $5,%r12d xor 24(%rsp),%eax xor %r11d,%ebx add %r12d,%edx xor 44(%rsp),%eax rol $30,%edi add %ebx,%edx rol $1,%eax mov %eax,56(%rsp) lea 0x6ed9eba1(%eax,%r11d),%r12d mov 60(%rsp),%eax mov %edi,%ebx mov %edx,%r11d xor 4(%rsp),%eax xor %esi,%ebx rol $5,%r11d xor 28(%rsp),%eax xor %ebp,%ebx add %r11d,%r12d xor 48(%rsp),%eax rol $30,%esi add %ebx,%r12d rol $1,%eax mov %eax,60(%rsp) lea 0x6ed9eba1(%eax,%ebp),%r11d mov 0(%rsp),%eax mov %esi,%ebx mov %r12d,%ebp xor 8(%rsp),%eax xor %edx,%ebx rol $5,%ebp xor 32(%rsp),%eax xor %edi,%ebx add %ebp,%r11d xor 52(%rsp),%eax rol $30,%edx add %ebx,%r11d rol $1,%eax mov %eax,0(%rsp) lea 0x6ed9eba1(%eax,%edi),%ebp mov 4(%rsp),%eax mov %edx,%ebx mov %r11d,%edi xor 12(%rsp),%eax xor %r12d,%ebx rol $5,%edi xor 36(%rsp),%eax xor %esi,%ebx add %edi,%ebp xor 56(%rsp),%eax rol $30,%r12d add %ebx,%ebp rol $1,%eax mov %eax,4(%rsp) lea 0x6ed9eba1(%eax,%esi),%edi mov 8(%rsp),%eax mov %r12d,%ebx mov %ebp,%esi xor 16(%rsp),%eax xor %r11d,%ebx rol $5,%esi xor 40(%rsp),%eax xor %edx,%ebx add %esi,%edi xor 60(%rsp),%eax rol $30,%r11d add %ebx,%edi rol $1,%eax mov %eax,8(%rsp) lea 0x6ed9eba1(%eax,%edx),%esi mov 12(%rsp),%eax mov %r11d,%ebx mov %edi,%edx xor 20(%rsp),%eax xor %ebp,%ebx rol $5,%edx xor 44(%rsp),%eax xor %r12d,%ebx add %edx,%esi xor 0(%rsp),%eax rol $30,%ebp add %ebx,%esi rol $1,%eax mov %eax,12(%rsp) lea 0x6ed9eba1(%eax,%r12d),%edx mov 16(%rsp),%eax mov %ebp,%ebx mov %esi,%r12d xor 24(%rsp),%eax xor %edi,%ebx rol $5,%r12d xor 48(%rsp),%eax xor %r11d,%ebx add %r12d,%edx xor 4(%rsp),%eax rol $30,%edi add %ebx,%edx rol $1,%eax mov %eax,16(%rsp) lea 0x6ed9eba1(%eax,%r11d),%r12d mov 20(%rsp),%eax mov %edi,%ebx mov %edx,%r11d xor 28(%rsp),%eax xor %esi,%ebx rol $5,%r11d xor 52(%rsp),%eax xor %ebp,%ebx add %r11d,%r12d xor 8(%rsp),%eax rol $30,%esi add %ebx,%r12d rol $1,%eax mov %eax,20(%rsp) lea 0x6ed9eba1(%eax,%ebp),%r11d mov 24(%rsp),%eax mov %esi,%ebx mov %r12d,%ebp xor 32(%rsp),%eax xor %edx,%ebx rol $5,%ebp xor 56(%rsp),%eax xor %edi,%ebx add %ebp,%r11d xor 12(%rsp),%eax rol $30,%edx add %ebx,%r11d rol $1,%eax mov %eax,24(%rsp) lea 0x6ed9eba1(%eax,%edi),%ebp mov 28(%rsp),%eax mov %edx,%ebx mov %r11d,%edi xor 36(%rsp),%eax xor %r12d,%ebx rol $5,%edi xor 60(%rsp),%eax xor %esi,%ebx add %edi,%ebp xor 16(%rsp),%eax rol $30,%r12d add %ebx,%ebp rol $1,%eax mov %eax,28(%rsp) lea 0x6ed9eba1(%eax,%esi),%edi mov 32(%rsp),%eax mov %r12d,%ebx mov %ebp,%esi xor 40(%rsp),%eax xor %r11d,%ebx rol $5,%esi xor 0(%rsp),%eax xor %edx,%ebx add %esi,%edi xor 20(%rsp),%eax rol $30,%r11d add %ebx,%edi rol $1,%eax mov %eax,32(%rsp) lea -0x70e44324(%eax,%edx),%esi mov 36(%rsp),%eax mov %ebp,%ebx mov %ebp,%ecx xor 44(%rsp),%eax mov %edi,%edx and %r11d,%ebx xor 4(%rsp),%eax or %r11d,%ecx rol $5,%edx xor 24(%rsp),%eax and %r12d,%ecx add %edx,%esi rol $1,%eax or %ecx,%ebx rol $30,%ebp mov %eax,36(%rsp) add %ebx,%esi lea -0x70e44324(%eax,%r12d),%edx mov 40(%rsp),%eax mov %edi,%ebx mov %edi,%ecx xor 48(%rsp),%eax mov %esi,%r12d and %ebp,%ebx xor 8(%rsp),%eax or %ebp,%ecx rol $5,%r12d xor 28(%rsp),%eax and %r11d,%ecx add %r12d,%edx rol $1,%eax or %ecx,%ebx rol $30,%edi mov %eax,40(%rsp) add %ebx,%edx lea -0x70e44324(%eax,%r11d),%r12d mov 44(%rsp),%eax mov %esi,%ebx mov %esi,%ecx xor 52(%rsp),%eax mov %edx,%r11d and %edi,%ebx xor 12(%rsp),%eax or %edi,%ecx rol $5,%r11d xor 32(%rsp),%eax and %ebp,%ecx add %r11d,%r12d rol $1,%eax or %ecx,%ebx rol $30,%esi mov %eax,44(%rsp) add %ebx,%r12d lea -0x70e44324(%eax,%ebp),%r11d mov 48(%rsp),%eax mov %edx,%ebx mov %edx,%ecx xor 56(%rsp),%eax mov %r12d,%ebp and %esi,%ebx xor 16(%rsp),%eax or %esi,%ecx rol $5,%ebp xor 36(%rsp),%eax and %edi,%ecx add %ebp,%r11d rol $1,%eax or %ecx,%ebx rol $30,%edx mov %eax,48(%rsp) add %ebx,%r11d lea -0x70e44324(%eax,%edi),%ebp mov 52(%rsp),%eax mov %r12d,%ebx mov %r12d,%ecx xor 60(%rsp),%eax mov %r11d,%edi and %edx,%ebx xor 20(%rsp),%eax or %edx,%ecx rol $5,%edi xor 40(%rsp),%eax and %esi,%ecx add %edi,%ebp rol $1,%eax or %ecx,%ebx rol $30,%r12d mov %eax,52(%rsp) add %ebx,%ebp lea -0x70e44324(%eax,%esi),%edi mov 56(%rsp),%eax mov %r11d,%ebx mov %r11d,%ecx xor 0(%rsp),%eax mov %ebp,%esi and %r12d,%ebx xor 24(%rsp),%eax or %r12d,%ecx rol $5,%esi xor 44(%rsp),%eax and %edx,%ecx add %esi,%edi rol $1,%eax or %ecx,%ebx rol $30,%r11d mov %eax,56(%rsp) add %ebx,%edi lea -0x70e44324(%eax,%edx),%esi mov 60(%rsp),%eax mov %ebp,%ebx mov %ebp,%ecx xor 4(%rsp),%eax mov %edi,%edx and %r11d,%ebx xor 28(%rsp),%eax or %r11d,%ecx rol $5,%edx xor 48(%rsp),%eax and %r12d,%ecx add %edx,%esi rol $1,%eax or %ecx,%ebx rol $30,%ebp mov %eax,60(%rsp) add %ebx,%esi lea -0x70e44324(%eax,%r12d),%edx mov 0(%rsp),%eax mov %edi,%ebx mov %edi,%ecx xor 8(%rsp),%eax mov %esi,%r12d and %ebp,%ebx xor 32(%rsp),%eax or %ebp,%ecx rol $5,%r12d xor 52(%rsp),%eax and %r11d,%ecx add %r12d,%edx rol $1,%eax or %ecx,%ebx rol $30,%edi mov %eax,0(%rsp) add %ebx,%edx lea -0x70e44324(%eax,%r11d),%r12d mov 4(%rsp),%eax mov %esi,%ebx mov %esi,%ecx xor 12(%rsp),%eax mov %edx,%r11d and %edi,%ebx xor 36(%rsp),%eax or %edi,%ecx rol $5,%r11d xor 56(%rsp),%eax and %ebp,%ecx add %r11d,%r12d rol $1,%eax or %ecx,%ebx rol $30,%esi mov %eax,4(%rsp) add %ebx,%r12d lea -0x70e44324(%eax,%ebp),%r11d mov 8(%rsp),%eax mov %edx,%ebx mov %edx,%ecx xor 16(%rsp),%eax mov %r12d,%ebp and %esi,%ebx xor 40(%rsp),%eax or %esi,%ecx rol $5,%ebp xor 60(%rsp),%eax and %edi,%ecx add %ebp,%r11d rol $1,%eax or %ecx,%ebx rol $30,%edx mov %eax,8(%rsp) add %ebx,%r11d lea -0x70e44324(%eax,%edi),%ebp mov 12(%rsp),%eax mov %r12d,%ebx mov %r12d,%ecx xor 20(%rsp),%eax mov %r11d,%edi and %edx,%ebx xor 44(%rsp),%eax or %edx,%ecx rol $5,%edi xor 0(%rsp),%eax and %esi,%ecx add %edi,%ebp rol $1,%eax or %ecx,%ebx rol $30,%r12d mov %eax,12(%rsp) add %ebx,%ebp lea -0x70e44324(%eax,%esi),%edi mov 16(%rsp),%eax mov %r11d,%ebx mov %r11d,%ecx xor 24(%rsp),%eax mov %ebp,%esi and %r12d,%ebx xor 48(%rsp),%eax or %r12d,%ecx rol $5,%esi xor 4(%rsp),%eax and %edx,%ecx add %esi,%edi rol $1,%eax or %ecx,%ebx rol $30,%r11d mov %eax,16(%rsp) add %ebx,%edi lea -0x70e44324(%eax,%edx),%esi mov 20(%rsp),%eax mov %ebp,%ebx mov %ebp,%ecx xor 28(%rsp),%eax mov %edi,%edx and %r11d,%ebx xor 52(%rsp),%eax or %r11d,%ecx rol $5,%edx xor 8(%rsp),%eax and %r12d,%ecx add %edx,%esi rol $1,%eax or %ecx,%ebx rol $30,%ebp mov %eax,20(%rsp) add %ebx,%esi lea -0x70e44324(%eax,%r12d),%edx mov 24(%rsp),%eax mov %edi,%ebx mov %edi,%ecx xor 32(%rsp),%eax mov %esi,%r12d and %ebp,%ebx xor 56(%rsp),%eax or %ebp,%ecx rol $5,%r12d xor 12(%rsp),%eax and %r11d,%ecx add %r12d,%edx rol $1,%eax or %ecx,%ebx rol $30,%edi mov %eax,24(%rsp) add %ebx,%edx lea -0x70e44324(%eax,%r11d),%r12d mov 28(%rsp),%eax mov %esi,%ebx mov %esi,%ecx xor 36(%rsp),%eax mov %edx,%r11d and %edi,%ebx xor 60(%rsp),%eax or %edi,%ecx rol $5,%r11d xor 16(%rsp),%eax and %ebp,%ecx add %r11d,%r12d rol $1,%eax or %ecx,%ebx rol $30,%esi mov %eax,28(%rsp) add %ebx,%r12d lea -0x70e44324(%eax,%ebp),%r11d mov 32(%rsp),%eax mov %edx,%ebx mov %edx,%ecx xor 40(%rsp),%eax mov %r12d,%ebp and %esi,%ebx xor 0(%rsp),%eax or %esi,%ecx rol $5,%ebp xor 20(%rsp),%eax and %edi,%ecx add %ebp,%r11d rol $1,%eax or %ecx,%ebx rol $30,%edx mov %eax,32(%rsp) add %ebx,%r11d lea -0x70e44324(%eax,%edi),%ebp mov 36(%rsp),%eax mov %r12d,%ebx mov %r12d,%ecx xor 44(%rsp),%eax mov %r11d,%edi and %edx,%ebx xor 4(%rsp),%eax or %edx,%ecx rol $5,%edi xor 24(%rsp),%eax and %esi,%ecx add %edi,%ebp rol $1,%eax or %ecx,%ebx rol $30,%r12d mov %eax,36(%rsp) add %ebx,%ebp lea -0x70e44324(%eax,%esi),%edi mov 40(%rsp),%eax mov %r11d,%ebx mov %r11d,%ecx xor 48(%rsp),%eax mov %ebp,%esi and %r12d,%ebx xor 8(%rsp),%eax or %r12d,%ecx rol $5,%esi xor 28(%rsp),%eax and %edx,%ecx add %esi,%edi rol $1,%eax or %ecx,%ebx rol $30,%r11d mov %eax,40(%rsp) add %ebx,%edi lea -0x70e44324(%eax,%edx),%esi mov 44(%rsp),%eax mov %ebp,%ebx mov %ebp,%ecx xor 52(%rsp),%eax mov %edi,%edx and %r11d,%ebx xor 12(%rsp),%eax or %r11d,%ecx rol $5,%edx xor 32(%rsp),%eax and %r12d,%ecx add %edx,%esi rol $1,%eax or %ecx,%ebx rol $30,%ebp mov %eax,44(%rsp) add %ebx,%esi lea -0x70e44324(%eax,%r12d),%edx mov 48(%rsp),%eax mov %edi,%ebx mov %edi,%ecx xor 56(%rsp),%eax mov %esi,%r12d and %ebp,%ebx xor 16(%rsp),%eax or %ebp,%ecx rol $5,%r12d xor 36(%rsp),%eax and %r11d,%ecx add %r12d,%edx rol $1,%eax or %ecx,%ebx rol $30,%edi mov %eax,48(%rsp) add %ebx,%edx lea -0x359d3e2a(%eax,%r11d),%r12d mov 52(%rsp),%eax mov %edi,%ebx mov %edx,%r11d xor 60(%rsp),%eax xor %esi,%ebx rol $5,%r11d xor 20(%rsp),%eax xor %ebp,%ebx add %r11d,%r12d xor 40(%rsp),%eax rol $30,%esi add %ebx,%r12d rol $1,%eax mov %eax,52(%rsp) lea -0x359d3e2a(%eax,%ebp),%r11d mov 56(%rsp),%eax mov %esi,%ebx mov %r12d,%ebp xor 0(%rsp),%eax xor %edx,%ebx rol $5,%ebp xor 24(%rsp),%eax xor %edi,%ebx add %ebp,%r11d xor 44(%rsp),%eax rol $30,%edx add %ebx,%r11d rol $1,%eax mov %eax,56(%rsp) lea -0x359d3e2a(%eax,%edi),%ebp mov 60(%rsp),%eax mov %edx,%ebx mov %r11d,%edi xor 4(%rsp),%eax xor %r12d,%ebx rol $5,%edi xor 28(%rsp),%eax xor %esi,%ebx add %edi,%ebp xor 48(%rsp),%eax rol $30,%r12d add %ebx,%ebp rol $1,%eax mov %eax,60(%rsp) lea -0x359d3e2a(%eax,%esi),%edi mov 0(%rsp),%eax mov %r12d,%ebx mov %ebp,%esi xor 8(%rsp),%eax xor %r11d,%ebx rol $5,%esi xor 32(%rsp),%eax xor %edx,%ebx add %esi,%edi xor 52(%rsp),%eax rol $30,%r11d add %ebx,%edi rol $1,%eax mov %eax,0(%rsp) lea -0x359d3e2a(%eax,%edx),%esi mov 4(%rsp),%eax mov %r11d,%ebx mov %edi,%edx xor 12(%rsp),%eax xor %ebp,%ebx rol $5,%edx xor 36(%rsp),%eax xor %r12d,%ebx add %edx,%esi xor 56(%rsp),%eax rol $30,%ebp add %ebx,%esi rol $1,%eax mov %eax,4(%rsp) lea -0x359d3e2a(%eax,%r12d),%edx mov 8(%rsp),%eax mov %ebp,%ebx mov %esi,%r12d xor 16(%rsp),%eax xor %edi,%ebx rol $5,%r12d xor 40(%rsp),%eax xor %r11d,%ebx add %r12d,%edx xor 60(%rsp),%eax rol $30,%edi add %ebx,%edx rol $1,%eax mov %eax,8(%rsp) lea -0x359d3e2a(%eax,%r11d),%r12d mov 12(%rsp),%eax mov %edi,%ebx mov %edx,%r11d xor 20(%rsp),%eax xor %esi,%ebx rol $5,%r11d xor 44(%rsp),%eax xor %ebp,%ebx add %r11d,%r12d xor 0(%rsp),%eax rol $30,%esi add %ebx,%r12d rol $1,%eax mov %eax,12(%rsp) lea -0x359d3e2a(%eax,%ebp),%r11d mov 16(%rsp),%eax mov %esi,%ebx mov %r12d,%ebp xor 24(%rsp),%eax xor %edx,%ebx rol $5,%ebp xor 48(%rsp),%eax xor %edi,%ebx add %ebp,%r11d xor 4(%rsp),%eax rol $30,%edx add %ebx,%r11d rol $1,%eax mov %eax,16(%rsp) lea -0x359d3e2a(%eax,%edi),%ebp mov 20(%rsp),%eax mov %edx,%ebx mov %r11d,%edi xor 28(%rsp),%eax xor %r12d,%ebx rol $5,%edi xor 52(%rsp),%eax xor %esi,%ebx add %edi,%ebp xor 8(%rsp),%eax rol $30,%r12d add %ebx,%ebp rol $1,%eax mov %eax,20(%rsp) lea -0x359d3e2a(%eax,%esi),%edi mov 24(%rsp),%eax mov %r12d,%ebx mov %ebp,%esi xor 32(%rsp),%eax xor %r11d,%ebx rol $5,%esi xor 56(%rsp),%eax xor %edx,%ebx add %esi,%edi xor 12(%rsp),%eax rol $30,%r11d add %ebx,%edi rol $1,%eax mov %eax,24(%rsp) lea -0x359d3e2a(%eax,%edx),%esi mov 28(%rsp),%eax mov %r11d,%ebx mov %edi,%edx xor 36(%rsp),%eax xor %ebp,%ebx rol $5,%edx xor 60(%rsp),%eax xor %r12d,%ebx add %edx,%esi xor 16(%rsp),%eax rol $30,%ebp add %ebx,%esi rol $1,%eax mov %eax,28(%rsp) lea -0x359d3e2a(%eax,%r12d),%edx mov 32(%rsp),%eax mov %ebp,%ebx mov %esi,%r12d xor 40(%rsp),%eax xor %edi,%ebx rol $5,%r12d xor 0(%rsp),%eax xor %r11d,%ebx add %r12d,%edx xor 20(%rsp),%eax rol $30,%edi add %ebx,%edx rol $1,%eax mov %eax,32(%rsp) lea -0x359d3e2a(%eax,%r11d),%r12d mov 36(%rsp),%eax mov %edi,%ebx mov %edx,%r11d xor 44(%rsp),%eax xor %esi,%ebx rol $5,%r11d xor 4(%rsp),%eax xor %ebp,%ebx add %r11d,%r12d xor 24(%rsp),%eax rol $30,%esi add %ebx,%r12d rol $1,%eax mov %eax,36(%rsp) lea -0x359d3e2a(%eax,%ebp),%r11d mov 40(%rsp),%eax mov %esi,%ebx mov %r12d,%ebp xor 48(%rsp),%eax xor %edx,%ebx rol $5,%ebp xor 8(%rsp),%eax xor %edi,%ebx add %ebp,%r11d xor 28(%rsp),%eax rol $30,%edx add %ebx,%r11d rol $1,%eax mov %eax,40(%rsp) lea -0x359d3e2a(%eax,%edi),%ebp mov 44(%rsp),%eax mov %edx,%ebx mov %r11d,%edi xor 52(%rsp),%eax xor %r12d,%ebx rol $5,%edi xor 12(%rsp),%eax xor %esi,%ebx add %edi,%ebp xor 32(%rsp),%eax rol $30,%r12d add %ebx,%ebp rol $1,%eax mov %eax,44(%rsp) lea -0x359d3e2a(%eax,%esi),%edi mov 48(%rsp),%eax mov %r12d,%ebx mov %ebp,%esi xor 56(%rsp),%eax xor %r11d,%ebx rol $5,%esi xor 16(%rsp),%eax xor %edx,%ebx add %esi,%edi xor 36(%rsp),%eax rol $30,%r11d add %ebx,%edi rol $1,%eax mov %eax,48(%rsp) lea -0x359d3e2a(%eax,%edx),%esi mov 52(%rsp),%eax mov %r11d,%ebx mov %edi,%edx xor 60(%rsp),%eax xor %ebp,%ebx rol $5,%edx xor 20(%rsp),%eax xor %r12d,%ebx add %edx,%esi xor 40(%rsp),%eax rol $30,%ebp add %ebx,%esi rol $1,%eax lea -0x359d3e2a(%eax,%r12d),%edx mov 56(%rsp),%eax mov %ebp,%ebx mov %esi,%r12d xor 0(%rsp),%eax xor %edi,%ebx rol $5,%r12d xor 24(%rsp),%eax xor %r11d,%ebx add %r12d,%edx xor 44(%rsp),%eax rol $30,%edi add %ebx,%edx rol $1,%eax lea -0x359d3e2a(%eax,%r11d),%r12d mov 60(%rsp),%eax mov %edi,%ebx mov %edx,%r11d xor 4(%rsp),%eax xor %esi,%ebx rol $5,%r11d xor 28(%rsp),%eax xor %ebp,%ebx add %r11d,%r12d xor 48(%rsp),%eax rol $30,%esi add %ebx,%r12d rol $1,%eax lea -0x359d3e2a(%eax,%ebp),%r11d mov %esi,%ebx mov %r12d,%ebp xor %edx,%ebx rol $5,%ebp xor %edi,%ebx add %ebp,%r11d rol $30,%edx add %ebx,%r11d // Update and save state information in SHA-1 context add 0(%r8),%r11d add 4(%r8),%r12d add 8(%r8),%edx add 12(%r8),%esi add 16(%r8),%edi mov %r11d,0(%r8) mov %r12d,4(%r8) mov %edx,8(%r8) mov %esi,12(%r8) mov %edi,16(%r8) xchg %r11d,%edx # mov %r11d,%edx xchg %r12d,%esi # mov %r12d,%esi xchg %r11d,%edi # mov %edx,%edi xchg %r12d,%ebp # mov %esi,%ebp # mov %edi,%r11d lea 64(%r9),%r9 sub $1,%r10 jnz .Lloop mov 64(%rsp),%rsp .cfi_def_cfa %rsp,8 movq -24(%rsp),%r12 .cfi_restore %r12 movq -16(%rsp),%rbp .cfi_restore %rbp movq -8(%rsp),%rbx .cfi_restore %rbx ret .cfi_endproc SET_SIZE(sha1_block_data_order) .data .asciz "SHA1 block transform for x86_64, CRYPTOGAMS by " #endif /* lint || __lint */ #ifdef __ELF__ .section .note.GNU-stack,"",%progbits #endif diff --git a/module/icp/asm-x86_64/sha2/sha256_impl.S b/module/icp/asm-x86_64/sha2/sha256_impl.S index 28b048d2db24..ccd4a3e6b3af 100644 --- a/module/icp/asm-x86_64/sha2/sha256_impl.S +++ b/module/icp/asm-x86_64/sha2/sha256_impl.S @@ -1,2089 +1,2089 @@ /* * ==================================================================== * Written by Andy Polyakov for the OpenSSL * project. Rights for redistribution and usage in source and binary * forms are granted according to the OpenSSL license. * ==================================================================== * * sha256/512_block procedure for x86_64. * * 40% improvement over compiler-generated code on Opteron. On EM64T * sha256 was observed to run >80% faster and sha512 - >40%. No magical * tricks, just straight implementation... I really wonder why gcc * [being armed with inline assembler] fails to generate as fast code. * The only thing which is cool about this module is that it's very * same instruction sequence used for both SHA-256 and SHA-512. In * former case the instructions operate on 32-bit operands, while in * latter - on 64-bit ones. All I had to do is to get one flavor right, * the other one passed the test right away:-) * * sha256_block runs in ~1005 cycles on Opteron, which gives you * asymptotic performance of 64*1000/1005=63.7MBps times CPU clock * frequency in GHz. sha512_block runs in ~1275 cycles, which results * in 128*1000/1275=100MBps per GHz. Is there room for improvement? * Well, if you compare it to IA-64 implementation, which maintains * X[16] in register bank[!], tends to 4 instructions per CPU clock * cycle and runs in 1003 cycles, 1275 is very good result for 3-way * issue Opteron pipeline and X[16] maintained in memory. So that *if* * there is a way to improve it, *then* the only way would be to try to * offload X[16] updates to SSE unit, but that would require "deeper" * loop unroll, which in turn would naturally cause size blow-up, not * to mention increased complexity! And once again, only *if* it's * actually possible to noticeably improve overall ILP, instruction * level parallelism, on a given CPU implementation in this case. * * Special note on Intel EM64T. While Opteron CPU exhibits perfect * performance ratio of 1.5 between 64- and 32-bit flavors [see above], * [currently available] EM64T CPUs apparently are far from it. On the * contrary, 64-bit version, sha512_block, is ~30% *slower* than 32-bit * sha256_block:-( This is presumably because 64-bit shifts/rotates * apparently are not atomic instructions, but implemented in microcode. */ /* * OpenSolaris OS modifications * * Sun elects to use this software under the BSD license. * * This source originates from OpenSSL file sha512-x86_64.pl at * ftp://ftp.openssl.org/snapshot/openssl-0.9.8-stable-SNAP-20080131.tar.gz * (presumably for future OpenSSL release 0.9.8h), with these changes: * * 1. Added perl "use strict" and declared variables. * * 2. Added OpenSolaris ENTRY_NP/SET_SIZE macros from * /usr/include/sys/asm_linkage.h, .ident keywords, and lint(1B) guards. * * 3. Removed x86_64-xlate.pl script (not needed for as(1) or gas(1) * assemblers). Replaced the .picmeup macro with assembler code. * * 4. Added 8 to $ctx, as OpenSolaris OS has an extra 4-byte field, "algotype", * at the beginning of SHA2_CTX (the next field is 8-byte aligned). */ /* * This file was generated by a perl script (sha512-x86_64.pl) that were * used to generate sha256 and sha512 variants from the same code base. * The comments from the original file have been pasted above. */ #if defined(lint) || defined(__lint) #include #include -/* ARGSUSED */ void SHA256TransformBlocks(SHA2_CTX *ctx, const void *in, size_t num) { + (void) ctx, (void) in, (void) num; } #else #define _ASM #include ENTRY_NP(SHA256TransformBlocks) .cfi_startproc movq %rsp, %rax .cfi_def_cfa_register %rax push %rbx .cfi_offset %rbx,-16 push %rbp .cfi_offset %rbp,-24 push %r12 .cfi_offset %r12,-32 push %r13 .cfi_offset %r13,-40 push %r14 .cfi_offset %r14,-48 push %r15 .cfi_offset %r15,-56 mov %rsp,%rbp # copy %rsp shl $4,%rdx # num*16 sub $16*4+4*8,%rsp lea (%rsi,%rdx,4),%rdx # inp+num*16*4 and $-64,%rsp # align stack frame add $8,%rdi # Skip OpenSolaris field, "algotype" mov %rdi,16*4+0*8(%rsp) # save ctx, 1st arg mov %rsi,16*4+1*8(%rsp) # save inp, 2nd arg mov %rdx,16*4+2*8(%rsp) # save end pointer, "3rd" arg mov %rbp,16*4+3*8(%rsp) # save copy of %rsp # echo ".cfi_cfa_expression %rsp+88,deref,+56" | # openssl/crypto/perlasm/x86_64-xlate.pl .cfi_escape 0x0f,0x06,0x77,0xd8,0x00,0x06,0x23,0x38 #.picmeup %rbp # The .picmeup pseudo-directive, from perlasm/x86_64_xlate.pl, puts # the address of the "next" instruction into the target register # (%rbp). This generates these 2 instructions: lea .Llea(%rip),%rbp #nop # .picmeup generates a nop for mod 8 alignment--not needed here .Llea: lea K256-.(%rbp),%rbp mov 4*0(%rdi),%eax mov 4*1(%rdi),%ebx mov 4*2(%rdi),%ecx mov 4*3(%rdi),%edx mov 4*4(%rdi),%r8d mov 4*5(%rdi),%r9d mov 4*6(%rdi),%r10d mov 4*7(%rdi),%r11d jmp .Lloop .align 16 .Lloop: xor %rdi,%rdi mov 4*0(%rsi),%r12d bswap %r12d mov %r8d,%r13d mov %r8d,%r14d mov %r9d,%r15d ror $6,%r13d ror $11,%r14d xor %r10d,%r15d # f^g xor %r14d,%r13d ror $14,%r14d and %r8d,%r15d # (f^g)&e mov %r12d,0(%rsp) xor %r14d,%r13d # Sigma1(e) xor %r10d,%r15d # Ch(e,f,g)=((f^g)&e)^g add %r11d,%r12d # T1+=h mov %eax,%r11d add %r13d,%r12d # T1+=Sigma1(e) add %r15d,%r12d # T1+=Ch(e,f,g) mov %eax,%r13d mov %eax,%r14d ror $2,%r11d ror $13,%r13d mov %eax,%r15d add (%rbp,%rdi,4),%r12d # T1+=K[round] xor %r13d,%r11d ror $9,%r13d or %ecx,%r14d # a|c xor %r13d,%r11d # h=Sigma0(a) and %ecx,%r15d # a&c add %r12d,%edx # d+=T1 and %ebx,%r14d # (a|c)&b add %r12d,%r11d # h+=T1 or %r15d,%r14d # Maj(a,b,c)=((a|c)&b)|(a&c) lea 1(%rdi),%rdi # round++ add %r14d,%r11d # h+=Maj(a,b,c) mov 4*1(%rsi),%r12d bswap %r12d mov %edx,%r13d mov %edx,%r14d mov %r8d,%r15d ror $6,%r13d ror $11,%r14d xor %r9d,%r15d # f^g xor %r14d,%r13d ror $14,%r14d and %edx,%r15d # (f^g)&e mov %r12d,4(%rsp) xor %r14d,%r13d # Sigma1(e) xor %r9d,%r15d # Ch(e,f,g)=((f^g)&e)^g add %r10d,%r12d # T1+=h mov %r11d,%r10d add %r13d,%r12d # T1+=Sigma1(e) add %r15d,%r12d # T1+=Ch(e,f,g) mov %r11d,%r13d mov %r11d,%r14d ror $2,%r10d ror $13,%r13d mov %r11d,%r15d add (%rbp,%rdi,4),%r12d # T1+=K[round] xor %r13d,%r10d ror $9,%r13d or %ebx,%r14d # a|c xor %r13d,%r10d # h=Sigma0(a) and %ebx,%r15d # a&c add %r12d,%ecx # d+=T1 and %eax,%r14d # (a|c)&b add %r12d,%r10d # h+=T1 or %r15d,%r14d # Maj(a,b,c)=((a|c)&b)|(a&c) lea 1(%rdi),%rdi # round++ add %r14d,%r10d # h+=Maj(a,b,c) mov 4*2(%rsi),%r12d bswap %r12d mov %ecx,%r13d mov %ecx,%r14d mov %edx,%r15d ror $6,%r13d ror $11,%r14d xor %r8d,%r15d # f^g xor %r14d,%r13d ror $14,%r14d and %ecx,%r15d # (f^g)&e mov %r12d,8(%rsp) xor %r14d,%r13d # Sigma1(e) xor %r8d,%r15d # Ch(e,f,g)=((f^g)&e)^g add %r9d,%r12d # T1+=h mov %r10d,%r9d add %r13d,%r12d # T1+=Sigma1(e) add %r15d,%r12d # T1+=Ch(e,f,g) mov %r10d,%r13d mov %r10d,%r14d ror $2,%r9d ror $13,%r13d mov %r10d,%r15d add (%rbp,%rdi,4),%r12d # T1+=K[round] xor %r13d,%r9d ror $9,%r13d or %eax,%r14d # a|c xor %r13d,%r9d # h=Sigma0(a) and %eax,%r15d # a&c add %r12d,%ebx # d+=T1 and %r11d,%r14d # (a|c)&b add %r12d,%r9d # h+=T1 or %r15d,%r14d # Maj(a,b,c)=((a|c)&b)|(a&c) lea 1(%rdi),%rdi # round++ add %r14d,%r9d # h+=Maj(a,b,c) mov 4*3(%rsi),%r12d bswap %r12d mov %ebx,%r13d mov %ebx,%r14d mov %ecx,%r15d ror $6,%r13d ror $11,%r14d xor %edx,%r15d # f^g xor %r14d,%r13d ror $14,%r14d and %ebx,%r15d # (f^g)&e mov %r12d,12(%rsp) xor %r14d,%r13d # Sigma1(e) xor %edx,%r15d # Ch(e,f,g)=((f^g)&e)^g add %r8d,%r12d # T1+=h mov %r9d,%r8d add %r13d,%r12d # T1+=Sigma1(e) add %r15d,%r12d # T1+=Ch(e,f,g) mov %r9d,%r13d mov %r9d,%r14d ror $2,%r8d ror $13,%r13d mov %r9d,%r15d add (%rbp,%rdi,4),%r12d # T1+=K[round] xor %r13d,%r8d ror $9,%r13d or %r11d,%r14d # a|c xor %r13d,%r8d # h=Sigma0(a) and %r11d,%r15d # a&c add %r12d,%eax # d+=T1 and %r10d,%r14d # (a|c)&b add %r12d,%r8d # h+=T1 or %r15d,%r14d # Maj(a,b,c)=((a|c)&b)|(a&c) lea 1(%rdi),%rdi # round++ add %r14d,%r8d # h+=Maj(a,b,c) mov 4*4(%rsi),%r12d bswap %r12d mov %eax,%r13d mov %eax,%r14d mov %ebx,%r15d ror $6,%r13d ror $11,%r14d xor %ecx,%r15d # f^g xor %r14d,%r13d ror $14,%r14d and %eax,%r15d # (f^g)&e mov %r12d,16(%rsp) xor %r14d,%r13d # Sigma1(e) xor %ecx,%r15d # Ch(e,f,g)=((f^g)&e)^g add %edx,%r12d # T1+=h mov %r8d,%edx add %r13d,%r12d # T1+=Sigma1(e) add %r15d,%r12d # T1+=Ch(e,f,g) mov %r8d,%r13d mov %r8d,%r14d ror $2,%edx ror $13,%r13d mov %r8d,%r15d add (%rbp,%rdi,4),%r12d # T1+=K[round] xor %r13d,%edx ror $9,%r13d or %r10d,%r14d # a|c xor %r13d,%edx # h=Sigma0(a) and %r10d,%r15d # a&c add %r12d,%r11d # d+=T1 and %r9d,%r14d # (a|c)&b add %r12d,%edx # h+=T1 or %r15d,%r14d # Maj(a,b,c)=((a|c)&b)|(a&c) lea 1(%rdi),%rdi # round++ add %r14d,%edx # h+=Maj(a,b,c) mov 4*5(%rsi),%r12d bswap %r12d mov %r11d,%r13d mov %r11d,%r14d mov %eax,%r15d ror $6,%r13d ror $11,%r14d xor %ebx,%r15d # f^g xor %r14d,%r13d ror $14,%r14d and %r11d,%r15d # (f^g)&e mov %r12d,20(%rsp) xor %r14d,%r13d # Sigma1(e) xor %ebx,%r15d # Ch(e,f,g)=((f^g)&e)^g add %ecx,%r12d # T1+=h mov %edx,%ecx add %r13d,%r12d # T1+=Sigma1(e) add %r15d,%r12d # T1+=Ch(e,f,g) mov %edx,%r13d mov %edx,%r14d ror $2,%ecx ror $13,%r13d mov %edx,%r15d add (%rbp,%rdi,4),%r12d # T1+=K[round] xor %r13d,%ecx ror $9,%r13d or %r9d,%r14d # a|c xor %r13d,%ecx # h=Sigma0(a) and %r9d,%r15d # a&c add %r12d,%r10d # d+=T1 and %r8d,%r14d # (a|c)&b add %r12d,%ecx # h+=T1 or %r15d,%r14d # Maj(a,b,c)=((a|c)&b)|(a&c) lea 1(%rdi),%rdi # round++ add %r14d,%ecx # h+=Maj(a,b,c) mov 4*6(%rsi),%r12d bswap %r12d mov %r10d,%r13d mov %r10d,%r14d mov %r11d,%r15d ror $6,%r13d ror $11,%r14d xor %eax,%r15d # f^g xor %r14d,%r13d ror $14,%r14d and %r10d,%r15d # (f^g)&e mov %r12d,24(%rsp) xor %r14d,%r13d # Sigma1(e) xor %eax,%r15d # Ch(e,f,g)=((f^g)&e)^g add %ebx,%r12d # T1+=h mov %ecx,%ebx add %r13d,%r12d # T1+=Sigma1(e) add %r15d,%r12d # T1+=Ch(e,f,g) mov %ecx,%r13d mov %ecx,%r14d ror $2,%ebx ror $13,%r13d mov %ecx,%r15d add (%rbp,%rdi,4),%r12d # T1+=K[round] xor %r13d,%ebx ror $9,%r13d or %r8d,%r14d # a|c xor %r13d,%ebx # h=Sigma0(a) and %r8d,%r15d # a&c add %r12d,%r9d # d+=T1 and %edx,%r14d # (a|c)&b add %r12d,%ebx # h+=T1 or %r15d,%r14d # Maj(a,b,c)=((a|c)&b)|(a&c) lea 1(%rdi),%rdi # round++ add %r14d,%ebx # h+=Maj(a,b,c) mov 4*7(%rsi),%r12d bswap %r12d mov %r9d,%r13d mov %r9d,%r14d mov %r10d,%r15d ror $6,%r13d ror $11,%r14d xor %r11d,%r15d # f^g xor %r14d,%r13d ror $14,%r14d and %r9d,%r15d # (f^g)&e mov %r12d,28(%rsp) xor %r14d,%r13d # Sigma1(e) xor %r11d,%r15d # Ch(e,f,g)=((f^g)&e)^g add %eax,%r12d # T1+=h mov %ebx,%eax add %r13d,%r12d # T1+=Sigma1(e) add %r15d,%r12d # T1+=Ch(e,f,g) mov %ebx,%r13d mov %ebx,%r14d ror $2,%eax ror $13,%r13d mov %ebx,%r15d add (%rbp,%rdi,4),%r12d # T1+=K[round] xor %r13d,%eax ror $9,%r13d or %edx,%r14d # a|c xor %r13d,%eax # h=Sigma0(a) and %edx,%r15d # a&c add %r12d,%r8d # d+=T1 and %ecx,%r14d # (a|c)&b add %r12d,%eax # h+=T1 or %r15d,%r14d # Maj(a,b,c)=((a|c)&b)|(a&c) lea 1(%rdi),%rdi # round++ add %r14d,%eax # h+=Maj(a,b,c) mov 4*8(%rsi),%r12d bswap %r12d mov %r8d,%r13d mov %r8d,%r14d mov %r9d,%r15d ror $6,%r13d ror $11,%r14d xor %r10d,%r15d # f^g xor %r14d,%r13d ror $14,%r14d and %r8d,%r15d # (f^g)&e mov %r12d,32(%rsp) xor %r14d,%r13d # Sigma1(e) xor %r10d,%r15d # Ch(e,f,g)=((f^g)&e)^g add %r11d,%r12d # T1+=h mov %eax,%r11d add %r13d,%r12d # T1+=Sigma1(e) add %r15d,%r12d # T1+=Ch(e,f,g) mov %eax,%r13d mov %eax,%r14d ror $2,%r11d ror $13,%r13d mov %eax,%r15d add (%rbp,%rdi,4),%r12d # T1+=K[round] xor %r13d,%r11d ror $9,%r13d or %ecx,%r14d # a|c xor %r13d,%r11d # h=Sigma0(a) and %ecx,%r15d # a&c add %r12d,%edx # d+=T1 and %ebx,%r14d # (a|c)&b add %r12d,%r11d # h+=T1 or %r15d,%r14d # Maj(a,b,c)=((a|c)&b)|(a&c) lea 1(%rdi),%rdi # round++ add %r14d,%r11d # h+=Maj(a,b,c) mov 4*9(%rsi),%r12d bswap %r12d mov %edx,%r13d mov %edx,%r14d mov %r8d,%r15d ror $6,%r13d ror $11,%r14d xor %r9d,%r15d # f^g xor %r14d,%r13d ror $14,%r14d and %edx,%r15d # (f^g)&e mov %r12d,36(%rsp) xor %r14d,%r13d # Sigma1(e) xor %r9d,%r15d # Ch(e,f,g)=((f^g)&e)^g add %r10d,%r12d # T1+=h mov %r11d,%r10d add %r13d,%r12d # T1+=Sigma1(e) add %r15d,%r12d # T1+=Ch(e,f,g) mov %r11d,%r13d mov %r11d,%r14d ror $2,%r10d ror $13,%r13d mov %r11d,%r15d add (%rbp,%rdi,4),%r12d # T1+=K[round] xor %r13d,%r10d ror $9,%r13d or %ebx,%r14d # a|c xor %r13d,%r10d # h=Sigma0(a) and %ebx,%r15d # a&c add %r12d,%ecx # d+=T1 and %eax,%r14d # (a|c)&b add %r12d,%r10d # h+=T1 or %r15d,%r14d # Maj(a,b,c)=((a|c)&b)|(a&c) lea 1(%rdi),%rdi # round++ add %r14d,%r10d # h+=Maj(a,b,c) mov 4*10(%rsi),%r12d bswap %r12d mov %ecx,%r13d mov %ecx,%r14d mov %edx,%r15d ror $6,%r13d ror $11,%r14d xor %r8d,%r15d # f^g xor %r14d,%r13d ror $14,%r14d and %ecx,%r15d # (f^g)&e mov %r12d,40(%rsp) xor %r14d,%r13d # Sigma1(e) xor %r8d,%r15d # Ch(e,f,g)=((f^g)&e)^g add %r9d,%r12d # T1+=h mov %r10d,%r9d add %r13d,%r12d # T1+=Sigma1(e) add %r15d,%r12d # T1+=Ch(e,f,g) mov %r10d,%r13d mov %r10d,%r14d ror $2,%r9d ror $13,%r13d mov %r10d,%r15d add (%rbp,%rdi,4),%r12d # T1+=K[round] xor %r13d,%r9d ror $9,%r13d or %eax,%r14d # a|c xor %r13d,%r9d # h=Sigma0(a) and %eax,%r15d # a&c add %r12d,%ebx # d+=T1 and %r11d,%r14d # (a|c)&b add %r12d,%r9d # h+=T1 or %r15d,%r14d # Maj(a,b,c)=((a|c)&b)|(a&c) lea 1(%rdi),%rdi # round++ add %r14d,%r9d # h+=Maj(a,b,c) mov 4*11(%rsi),%r12d bswap %r12d mov %ebx,%r13d mov %ebx,%r14d mov %ecx,%r15d ror $6,%r13d ror $11,%r14d xor %edx,%r15d # f^g xor %r14d,%r13d ror $14,%r14d and %ebx,%r15d # (f^g)&e mov %r12d,44(%rsp) xor %r14d,%r13d # Sigma1(e) xor %edx,%r15d # Ch(e,f,g)=((f^g)&e)^g add %r8d,%r12d # T1+=h mov %r9d,%r8d add %r13d,%r12d # T1+=Sigma1(e) add %r15d,%r12d # T1+=Ch(e,f,g) mov %r9d,%r13d mov %r9d,%r14d ror $2,%r8d ror $13,%r13d mov %r9d,%r15d add (%rbp,%rdi,4),%r12d # T1+=K[round] xor %r13d,%r8d ror $9,%r13d or %r11d,%r14d # a|c xor %r13d,%r8d # h=Sigma0(a) and %r11d,%r15d # a&c add %r12d,%eax # d+=T1 and %r10d,%r14d # (a|c)&b add %r12d,%r8d # h+=T1 or %r15d,%r14d # Maj(a,b,c)=((a|c)&b)|(a&c) lea 1(%rdi),%rdi # round++ add %r14d,%r8d # h+=Maj(a,b,c) mov 4*12(%rsi),%r12d bswap %r12d mov %eax,%r13d mov %eax,%r14d mov %ebx,%r15d ror $6,%r13d ror $11,%r14d xor %ecx,%r15d # f^g xor %r14d,%r13d ror $14,%r14d and %eax,%r15d # (f^g)&e mov %r12d,48(%rsp) xor %r14d,%r13d # Sigma1(e) xor %ecx,%r15d # Ch(e,f,g)=((f^g)&e)^g add %edx,%r12d # T1+=h mov %r8d,%edx add %r13d,%r12d # T1+=Sigma1(e) add %r15d,%r12d # T1+=Ch(e,f,g) mov %r8d,%r13d mov %r8d,%r14d ror $2,%edx ror $13,%r13d mov %r8d,%r15d add (%rbp,%rdi,4),%r12d # T1+=K[round] xor %r13d,%edx ror $9,%r13d or %r10d,%r14d # a|c xor %r13d,%edx # h=Sigma0(a) and %r10d,%r15d # a&c add %r12d,%r11d # d+=T1 and %r9d,%r14d # (a|c)&b add %r12d,%edx # h+=T1 or %r15d,%r14d # Maj(a,b,c)=((a|c)&b)|(a&c) lea 1(%rdi),%rdi # round++ add %r14d,%edx # h+=Maj(a,b,c) mov 4*13(%rsi),%r12d bswap %r12d mov %r11d,%r13d mov %r11d,%r14d mov %eax,%r15d ror $6,%r13d ror $11,%r14d xor %ebx,%r15d # f^g xor %r14d,%r13d ror $14,%r14d and %r11d,%r15d # (f^g)&e mov %r12d,52(%rsp) xor %r14d,%r13d # Sigma1(e) xor %ebx,%r15d # Ch(e,f,g)=((f^g)&e)^g add %ecx,%r12d # T1+=h mov %edx,%ecx add %r13d,%r12d # T1+=Sigma1(e) add %r15d,%r12d # T1+=Ch(e,f,g) mov %edx,%r13d mov %edx,%r14d ror $2,%ecx ror $13,%r13d mov %edx,%r15d add (%rbp,%rdi,4),%r12d # T1+=K[round] xor %r13d,%ecx ror $9,%r13d or %r9d,%r14d # a|c xor %r13d,%ecx # h=Sigma0(a) and %r9d,%r15d # a&c add %r12d,%r10d # d+=T1 and %r8d,%r14d # (a|c)&b add %r12d,%ecx # h+=T1 or %r15d,%r14d # Maj(a,b,c)=((a|c)&b)|(a&c) lea 1(%rdi),%rdi # round++ add %r14d,%ecx # h+=Maj(a,b,c) mov 4*14(%rsi),%r12d bswap %r12d mov %r10d,%r13d mov %r10d,%r14d mov %r11d,%r15d ror $6,%r13d ror $11,%r14d xor %eax,%r15d # f^g xor %r14d,%r13d ror $14,%r14d and %r10d,%r15d # (f^g)&e mov %r12d,56(%rsp) xor %r14d,%r13d # Sigma1(e) xor %eax,%r15d # Ch(e,f,g)=((f^g)&e)^g add %ebx,%r12d # T1+=h mov %ecx,%ebx add %r13d,%r12d # T1+=Sigma1(e) add %r15d,%r12d # T1+=Ch(e,f,g) mov %ecx,%r13d mov %ecx,%r14d ror $2,%ebx ror $13,%r13d mov %ecx,%r15d add (%rbp,%rdi,4),%r12d # T1+=K[round] xor %r13d,%ebx ror $9,%r13d or %r8d,%r14d # a|c xor %r13d,%ebx # h=Sigma0(a) and %r8d,%r15d # a&c add %r12d,%r9d # d+=T1 and %edx,%r14d # (a|c)&b add %r12d,%ebx # h+=T1 or %r15d,%r14d # Maj(a,b,c)=((a|c)&b)|(a&c) lea 1(%rdi),%rdi # round++ add %r14d,%ebx # h+=Maj(a,b,c) mov 4*15(%rsi),%r12d bswap %r12d mov %r9d,%r13d mov %r9d,%r14d mov %r10d,%r15d ror $6,%r13d ror $11,%r14d xor %r11d,%r15d # f^g xor %r14d,%r13d ror $14,%r14d and %r9d,%r15d # (f^g)&e mov %r12d,60(%rsp) xor %r14d,%r13d # Sigma1(e) xor %r11d,%r15d # Ch(e,f,g)=((f^g)&e)^g add %eax,%r12d # T1+=h mov %ebx,%eax add %r13d,%r12d # T1+=Sigma1(e) add %r15d,%r12d # T1+=Ch(e,f,g) mov %ebx,%r13d mov %ebx,%r14d ror $2,%eax ror $13,%r13d mov %ebx,%r15d add (%rbp,%rdi,4),%r12d # T1+=K[round] xor %r13d,%eax ror $9,%r13d or %edx,%r14d # a|c xor %r13d,%eax # h=Sigma0(a) and %edx,%r15d # a&c add %r12d,%r8d # d+=T1 and %ecx,%r14d # (a|c)&b add %r12d,%eax # h+=T1 or %r15d,%r14d # Maj(a,b,c)=((a|c)&b)|(a&c) lea 1(%rdi),%rdi # round++ add %r14d,%eax # h+=Maj(a,b,c) jmp .Lrounds_16_xx .align 16 .Lrounds_16_xx: mov 4(%rsp),%r13d mov 56(%rsp),%r12d mov %r13d,%r15d shr $3,%r13d ror $7,%r15d xor %r15d,%r13d ror $11,%r15d xor %r15d,%r13d # sigma0(X[(i+1)&0xf]) mov %r12d,%r14d shr $10,%r12d ror $17,%r14d xor %r14d,%r12d ror $2,%r14d xor %r14d,%r12d # sigma1(X[(i+14)&0xf]) add %r13d,%r12d add 36(%rsp),%r12d add 0(%rsp),%r12d mov %r8d,%r13d mov %r8d,%r14d mov %r9d,%r15d ror $6,%r13d ror $11,%r14d xor %r10d,%r15d # f^g xor %r14d,%r13d ror $14,%r14d and %r8d,%r15d # (f^g)&e mov %r12d,0(%rsp) xor %r14d,%r13d # Sigma1(e) xor %r10d,%r15d # Ch(e,f,g)=((f^g)&e)^g add %r11d,%r12d # T1+=h mov %eax,%r11d add %r13d,%r12d # T1+=Sigma1(e) add %r15d,%r12d # T1+=Ch(e,f,g) mov %eax,%r13d mov %eax,%r14d ror $2,%r11d ror $13,%r13d mov %eax,%r15d add (%rbp,%rdi,4),%r12d # T1+=K[round] xor %r13d,%r11d ror $9,%r13d or %ecx,%r14d # a|c xor %r13d,%r11d # h=Sigma0(a) and %ecx,%r15d # a&c add %r12d,%edx # d+=T1 and %ebx,%r14d # (a|c)&b add %r12d,%r11d # h+=T1 or %r15d,%r14d # Maj(a,b,c)=((a|c)&b)|(a&c) lea 1(%rdi),%rdi # round++ add %r14d,%r11d # h+=Maj(a,b,c) mov 8(%rsp),%r13d mov 60(%rsp),%r12d mov %r13d,%r15d shr $3,%r13d ror $7,%r15d xor %r15d,%r13d ror $11,%r15d xor %r15d,%r13d # sigma0(X[(i+1)&0xf]) mov %r12d,%r14d shr $10,%r12d ror $17,%r14d xor %r14d,%r12d ror $2,%r14d xor %r14d,%r12d # sigma1(X[(i+14)&0xf]) add %r13d,%r12d add 40(%rsp),%r12d add 4(%rsp),%r12d mov %edx,%r13d mov %edx,%r14d mov %r8d,%r15d ror $6,%r13d ror $11,%r14d xor %r9d,%r15d # f^g xor %r14d,%r13d ror $14,%r14d and %edx,%r15d # (f^g)&e mov %r12d,4(%rsp) xor %r14d,%r13d # Sigma1(e) xor %r9d,%r15d # Ch(e,f,g)=((f^g)&e)^g add %r10d,%r12d # T1+=h mov %r11d,%r10d add %r13d,%r12d # T1+=Sigma1(e) add %r15d,%r12d # T1+=Ch(e,f,g) mov %r11d,%r13d mov %r11d,%r14d ror $2,%r10d ror $13,%r13d mov %r11d,%r15d add (%rbp,%rdi,4),%r12d # T1+=K[round] xor %r13d,%r10d ror $9,%r13d or %ebx,%r14d # a|c xor %r13d,%r10d # h=Sigma0(a) and %ebx,%r15d # a&c add %r12d,%ecx # d+=T1 and %eax,%r14d # (a|c)&b add %r12d,%r10d # h+=T1 or %r15d,%r14d # Maj(a,b,c)=((a|c)&b)|(a&c) lea 1(%rdi),%rdi # round++ add %r14d,%r10d # h+=Maj(a,b,c) mov 12(%rsp),%r13d mov 0(%rsp),%r12d mov %r13d,%r15d shr $3,%r13d ror $7,%r15d xor %r15d,%r13d ror $11,%r15d xor %r15d,%r13d # sigma0(X[(i+1)&0xf]) mov %r12d,%r14d shr $10,%r12d ror $17,%r14d xor %r14d,%r12d ror $2,%r14d xor %r14d,%r12d # sigma1(X[(i+14)&0xf]) add %r13d,%r12d add 44(%rsp),%r12d add 8(%rsp),%r12d mov %ecx,%r13d mov %ecx,%r14d mov %edx,%r15d ror $6,%r13d ror $11,%r14d xor %r8d,%r15d # f^g xor %r14d,%r13d ror $14,%r14d and %ecx,%r15d # (f^g)&e mov %r12d,8(%rsp) xor %r14d,%r13d # Sigma1(e) xor %r8d,%r15d # Ch(e,f,g)=((f^g)&e)^g add %r9d,%r12d # T1+=h mov %r10d,%r9d add %r13d,%r12d # T1+=Sigma1(e) add %r15d,%r12d # T1+=Ch(e,f,g) mov %r10d,%r13d mov %r10d,%r14d ror $2,%r9d ror $13,%r13d mov %r10d,%r15d add (%rbp,%rdi,4),%r12d # T1+=K[round] xor %r13d,%r9d ror $9,%r13d or %eax,%r14d # a|c xor %r13d,%r9d # h=Sigma0(a) and %eax,%r15d # a&c add %r12d,%ebx # d+=T1 and %r11d,%r14d # (a|c)&b add %r12d,%r9d # h+=T1 or %r15d,%r14d # Maj(a,b,c)=((a|c)&b)|(a&c) lea 1(%rdi),%rdi # round++ add %r14d,%r9d # h+=Maj(a,b,c) mov 16(%rsp),%r13d mov 4(%rsp),%r12d mov %r13d,%r15d shr $3,%r13d ror $7,%r15d xor %r15d,%r13d ror $11,%r15d xor %r15d,%r13d # sigma0(X[(i+1)&0xf]) mov %r12d,%r14d shr $10,%r12d ror $17,%r14d xor %r14d,%r12d ror $2,%r14d xor %r14d,%r12d # sigma1(X[(i+14)&0xf]) add %r13d,%r12d add 48(%rsp),%r12d add 12(%rsp),%r12d mov %ebx,%r13d mov %ebx,%r14d mov %ecx,%r15d ror $6,%r13d ror $11,%r14d xor %edx,%r15d # f^g xor %r14d,%r13d ror $14,%r14d and %ebx,%r15d # (f^g)&e mov %r12d,12(%rsp) xor %r14d,%r13d # Sigma1(e) xor %edx,%r15d # Ch(e,f,g)=((f^g)&e)^g add %r8d,%r12d # T1+=h mov %r9d,%r8d add %r13d,%r12d # T1+=Sigma1(e) add %r15d,%r12d # T1+=Ch(e,f,g) mov %r9d,%r13d mov %r9d,%r14d ror $2,%r8d ror $13,%r13d mov %r9d,%r15d add (%rbp,%rdi,4),%r12d # T1+=K[round] xor %r13d,%r8d ror $9,%r13d or %r11d,%r14d # a|c xor %r13d,%r8d # h=Sigma0(a) and %r11d,%r15d # a&c add %r12d,%eax # d+=T1 and %r10d,%r14d # (a|c)&b add %r12d,%r8d # h+=T1 or %r15d,%r14d # Maj(a,b,c)=((a|c)&b)|(a&c) lea 1(%rdi),%rdi # round++ add %r14d,%r8d # h+=Maj(a,b,c) mov 20(%rsp),%r13d mov 8(%rsp),%r12d mov %r13d,%r15d shr $3,%r13d ror $7,%r15d xor %r15d,%r13d ror $11,%r15d xor %r15d,%r13d # sigma0(X[(i+1)&0xf]) mov %r12d,%r14d shr $10,%r12d ror $17,%r14d xor %r14d,%r12d ror $2,%r14d xor %r14d,%r12d # sigma1(X[(i+14)&0xf]) add %r13d,%r12d add 52(%rsp),%r12d add 16(%rsp),%r12d mov %eax,%r13d mov %eax,%r14d mov %ebx,%r15d ror $6,%r13d ror $11,%r14d xor %ecx,%r15d # f^g xor %r14d,%r13d ror $14,%r14d and %eax,%r15d # (f^g)&e mov %r12d,16(%rsp) xor %r14d,%r13d # Sigma1(e) xor %ecx,%r15d # Ch(e,f,g)=((f^g)&e)^g add %edx,%r12d # T1+=h mov %r8d,%edx add %r13d,%r12d # T1+=Sigma1(e) add %r15d,%r12d # T1+=Ch(e,f,g) mov %r8d,%r13d mov %r8d,%r14d ror $2,%edx ror $13,%r13d mov %r8d,%r15d add (%rbp,%rdi,4),%r12d # T1+=K[round] xor %r13d,%edx ror $9,%r13d or %r10d,%r14d # a|c xor %r13d,%edx # h=Sigma0(a) and %r10d,%r15d # a&c add %r12d,%r11d # d+=T1 and %r9d,%r14d # (a|c)&b add %r12d,%edx # h+=T1 or %r15d,%r14d # Maj(a,b,c)=((a|c)&b)|(a&c) lea 1(%rdi),%rdi # round++ add %r14d,%edx # h+=Maj(a,b,c) mov 24(%rsp),%r13d mov 12(%rsp),%r12d mov %r13d,%r15d shr $3,%r13d ror $7,%r15d xor %r15d,%r13d ror $11,%r15d xor %r15d,%r13d # sigma0(X[(i+1)&0xf]) mov %r12d,%r14d shr $10,%r12d ror $17,%r14d xor %r14d,%r12d ror $2,%r14d xor %r14d,%r12d # sigma1(X[(i+14)&0xf]) add %r13d,%r12d add 56(%rsp),%r12d add 20(%rsp),%r12d mov %r11d,%r13d mov %r11d,%r14d mov %eax,%r15d ror $6,%r13d ror $11,%r14d xor %ebx,%r15d # f^g xor %r14d,%r13d ror $14,%r14d and %r11d,%r15d # (f^g)&e mov %r12d,20(%rsp) xor %r14d,%r13d # Sigma1(e) xor %ebx,%r15d # Ch(e,f,g)=((f^g)&e)^g add %ecx,%r12d # T1+=h mov %edx,%ecx add %r13d,%r12d # T1+=Sigma1(e) add %r15d,%r12d # T1+=Ch(e,f,g) mov %edx,%r13d mov %edx,%r14d ror $2,%ecx ror $13,%r13d mov %edx,%r15d add (%rbp,%rdi,4),%r12d # T1+=K[round] xor %r13d,%ecx ror $9,%r13d or %r9d,%r14d # a|c xor %r13d,%ecx # h=Sigma0(a) and %r9d,%r15d # a&c add %r12d,%r10d # d+=T1 and %r8d,%r14d # (a|c)&b add %r12d,%ecx # h+=T1 or %r15d,%r14d # Maj(a,b,c)=((a|c)&b)|(a&c) lea 1(%rdi),%rdi # round++ add %r14d,%ecx # h+=Maj(a,b,c) mov 28(%rsp),%r13d mov 16(%rsp),%r12d mov %r13d,%r15d shr $3,%r13d ror $7,%r15d xor %r15d,%r13d ror $11,%r15d xor %r15d,%r13d # sigma0(X[(i+1)&0xf]) mov %r12d,%r14d shr $10,%r12d ror $17,%r14d xor %r14d,%r12d ror $2,%r14d xor %r14d,%r12d # sigma1(X[(i+14)&0xf]) add %r13d,%r12d add 60(%rsp),%r12d add 24(%rsp),%r12d mov %r10d,%r13d mov %r10d,%r14d mov %r11d,%r15d ror $6,%r13d ror $11,%r14d xor %eax,%r15d # f^g xor %r14d,%r13d ror $14,%r14d and %r10d,%r15d # (f^g)&e mov %r12d,24(%rsp) xor %r14d,%r13d # Sigma1(e) xor %eax,%r15d # Ch(e,f,g)=((f^g)&e)^g add %ebx,%r12d # T1+=h mov %ecx,%ebx add %r13d,%r12d # T1+=Sigma1(e) add %r15d,%r12d # T1+=Ch(e,f,g) mov %ecx,%r13d mov %ecx,%r14d ror $2,%ebx ror $13,%r13d mov %ecx,%r15d add (%rbp,%rdi,4),%r12d # T1+=K[round] xor %r13d,%ebx ror $9,%r13d or %r8d,%r14d # a|c xor %r13d,%ebx # h=Sigma0(a) and %r8d,%r15d # a&c add %r12d,%r9d # d+=T1 and %edx,%r14d # (a|c)&b add %r12d,%ebx # h+=T1 or %r15d,%r14d # Maj(a,b,c)=((a|c)&b)|(a&c) lea 1(%rdi),%rdi # round++ add %r14d,%ebx # h+=Maj(a,b,c) mov 32(%rsp),%r13d mov 20(%rsp),%r12d mov %r13d,%r15d shr $3,%r13d ror $7,%r15d xor %r15d,%r13d ror $11,%r15d xor %r15d,%r13d # sigma0(X[(i+1)&0xf]) mov %r12d,%r14d shr $10,%r12d ror $17,%r14d xor %r14d,%r12d ror $2,%r14d xor %r14d,%r12d # sigma1(X[(i+14)&0xf]) add %r13d,%r12d add 0(%rsp),%r12d add 28(%rsp),%r12d mov %r9d,%r13d mov %r9d,%r14d mov %r10d,%r15d ror $6,%r13d ror $11,%r14d xor %r11d,%r15d # f^g xor %r14d,%r13d ror $14,%r14d and %r9d,%r15d # (f^g)&e mov %r12d,28(%rsp) xor %r14d,%r13d # Sigma1(e) xor %r11d,%r15d # Ch(e,f,g)=((f^g)&e)^g add %eax,%r12d # T1+=h mov %ebx,%eax add %r13d,%r12d # T1+=Sigma1(e) add %r15d,%r12d # T1+=Ch(e,f,g) mov %ebx,%r13d mov %ebx,%r14d ror $2,%eax ror $13,%r13d mov %ebx,%r15d add (%rbp,%rdi,4),%r12d # T1+=K[round] xor %r13d,%eax ror $9,%r13d or %edx,%r14d # a|c xor %r13d,%eax # h=Sigma0(a) and %edx,%r15d # a&c add %r12d,%r8d # d+=T1 and %ecx,%r14d # (a|c)&b add %r12d,%eax # h+=T1 or %r15d,%r14d # Maj(a,b,c)=((a|c)&b)|(a&c) lea 1(%rdi),%rdi # round++ add %r14d,%eax # h+=Maj(a,b,c) mov 36(%rsp),%r13d mov 24(%rsp),%r12d mov %r13d,%r15d shr $3,%r13d ror $7,%r15d xor %r15d,%r13d ror $11,%r15d xor %r15d,%r13d # sigma0(X[(i+1)&0xf]) mov %r12d,%r14d shr $10,%r12d ror $17,%r14d xor %r14d,%r12d ror $2,%r14d xor %r14d,%r12d # sigma1(X[(i+14)&0xf]) add %r13d,%r12d add 4(%rsp),%r12d add 32(%rsp),%r12d mov %r8d,%r13d mov %r8d,%r14d mov %r9d,%r15d ror $6,%r13d ror $11,%r14d xor %r10d,%r15d # f^g xor %r14d,%r13d ror $14,%r14d and %r8d,%r15d # (f^g)&e mov %r12d,32(%rsp) xor %r14d,%r13d # Sigma1(e) xor %r10d,%r15d # Ch(e,f,g)=((f^g)&e)^g add %r11d,%r12d # T1+=h mov %eax,%r11d add %r13d,%r12d # T1+=Sigma1(e) add %r15d,%r12d # T1+=Ch(e,f,g) mov %eax,%r13d mov %eax,%r14d ror $2,%r11d ror $13,%r13d mov %eax,%r15d add (%rbp,%rdi,4),%r12d # T1+=K[round] xor %r13d,%r11d ror $9,%r13d or %ecx,%r14d # a|c xor %r13d,%r11d # h=Sigma0(a) and %ecx,%r15d # a&c add %r12d,%edx # d+=T1 and %ebx,%r14d # (a|c)&b add %r12d,%r11d # h+=T1 or %r15d,%r14d # Maj(a,b,c)=((a|c)&b)|(a&c) lea 1(%rdi),%rdi # round++ add %r14d,%r11d # h+=Maj(a,b,c) mov 40(%rsp),%r13d mov 28(%rsp),%r12d mov %r13d,%r15d shr $3,%r13d ror $7,%r15d xor %r15d,%r13d ror $11,%r15d xor %r15d,%r13d # sigma0(X[(i+1)&0xf]) mov %r12d,%r14d shr $10,%r12d ror $17,%r14d xor %r14d,%r12d ror $2,%r14d xor %r14d,%r12d # sigma1(X[(i+14)&0xf]) add %r13d,%r12d add 8(%rsp),%r12d add 36(%rsp),%r12d mov %edx,%r13d mov %edx,%r14d mov %r8d,%r15d ror $6,%r13d ror $11,%r14d xor %r9d,%r15d # f^g xor %r14d,%r13d ror $14,%r14d and %edx,%r15d # (f^g)&e mov %r12d,36(%rsp) xor %r14d,%r13d # Sigma1(e) xor %r9d,%r15d # Ch(e,f,g)=((f^g)&e)^g add %r10d,%r12d # T1+=h mov %r11d,%r10d add %r13d,%r12d # T1+=Sigma1(e) add %r15d,%r12d # T1+=Ch(e,f,g) mov %r11d,%r13d mov %r11d,%r14d ror $2,%r10d ror $13,%r13d mov %r11d,%r15d add (%rbp,%rdi,4),%r12d # T1+=K[round] xor %r13d,%r10d ror $9,%r13d or %ebx,%r14d # a|c xor %r13d,%r10d # h=Sigma0(a) and %ebx,%r15d # a&c add %r12d,%ecx # d+=T1 and %eax,%r14d # (a|c)&b add %r12d,%r10d # h+=T1 or %r15d,%r14d # Maj(a,b,c)=((a|c)&b)|(a&c) lea 1(%rdi),%rdi # round++ add %r14d,%r10d # h+=Maj(a,b,c) mov 44(%rsp),%r13d mov 32(%rsp),%r12d mov %r13d,%r15d shr $3,%r13d ror $7,%r15d xor %r15d,%r13d ror $11,%r15d xor %r15d,%r13d # sigma0(X[(i+1)&0xf]) mov %r12d,%r14d shr $10,%r12d ror $17,%r14d xor %r14d,%r12d ror $2,%r14d xor %r14d,%r12d # sigma1(X[(i+14)&0xf]) add %r13d,%r12d add 12(%rsp),%r12d add 40(%rsp),%r12d mov %ecx,%r13d mov %ecx,%r14d mov %edx,%r15d ror $6,%r13d ror $11,%r14d xor %r8d,%r15d # f^g xor %r14d,%r13d ror $14,%r14d and %ecx,%r15d # (f^g)&e mov %r12d,40(%rsp) xor %r14d,%r13d # Sigma1(e) xor %r8d,%r15d # Ch(e,f,g)=((f^g)&e)^g add %r9d,%r12d # T1+=h mov %r10d,%r9d add %r13d,%r12d # T1+=Sigma1(e) add %r15d,%r12d # T1+=Ch(e,f,g) mov %r10d,%r13d mov %r10d,%r14d ror $2,%r9d ror $13,%r13d mov %r10d,%r15d add (%rbp,%rdi,4),%r12d # T1+=K[round] xor %r13d,%r9d ror $9,%r13d or %eax,%r14d # a|c xor %r13d,%r9d # h=Sigma0(a) and %eax,%r15d # a&c add %r12d,%ebx # d+=T1 and %r11d,%r14d # (a|c)&b add %r12d,%r9d # h+=T1 or %r15d,%r14d # Maj(a,b,c)=((a|c)&b)|(a&c) lea 1(%rdi),%rdi # round++ add %r14d,%r9d # h+=Maj(a,b,c) mov 48(%rsp),%r13d mov 36(%rsp),%r12d mov %r13d,%r15d shr $3,%r13d ror $7,%r15d xor %r15d,%r13d ror $11,%r15d xor %r15d,%r13d # sigma0(X[(i+1)&0xf]) mov %r12d,%r14d shr $10,%r12d ror $17,%r14d xor %r14d,%r12d ror $2,%r14d xor %r14d,%r12d # sigma1(X[(i+14)&0xf]) add %r13d,%r12d add 16(%rsp),%r12d add 44(%rsp),%r12d mov %ebx,%r13d mov %ebx,%r14d mov %ecx,%r15d ror $6,%r13d ror $11,%r14d xor %edx,%r15d # f^g xor %r14d,%r13d ror $14,%r14d and %ebx,%r15d # (f^g)&e mov %r12d,44(%rsp) xor %r14d,%r13d # Sigma1(e) xor %edx,%r15d # Ch(e,f,g)=((f^g)&e)^g add %r8d,%r12d # T1+=h mov %r9d,%r8d add %r13d,%r12d # T1+=Sigma1(e) add %r15d,%r12d # T1+=Ch(e,f,g) mov %r9d,%r13d mov %r9d,%r14d ror $2,%r8d ror $13,%r13d mov %r9d,%r15d add (%rbp,%rdi,4),%r12d # T1+=K[round] xor %r13d,%r8d ror $9,%r13d or %r11d,%r14d # a|c xor %r13d,%r8d # h=Sigma0(a) and %r11d,%r15d # a&c add %r12d,%eax # d+=T1 and %r10d,%r14d # (a|c)&b add %r12d,%r8d # h+=T1 or %r15d,%r14d # Maj(a,b,c)=((a|c)&b)|(a&c) lea 1(%rdi),%rdi # round++ add %r14d,%r8d # h+=Maj(a,b,c) mov 52(%rsp),%r13d mov 40(%rsp),%r12d mov %r13d,%r15d shr $3,%r13d ror $7,%r15d xor %r15d,%r13d ror $11,%r15d xor %r15d,%r13d # sigma0(X[(i+1)&0xf]) mov %r12d,%r14d shr $10,%r12d ror $17,%r14d xor %r14d,%r12d ror $2,%r14d xor %r14d,%r12d # sigma1(X[(i+14)&0xf]) add %r13d,%r12d add 20(%rsp),%r12d add 48(%rsp),%r12d mov %eax,%r13d mov %eax,%r14d mov %ebx,%r15d ror $6,%r13d ror $11,%r14d xor %ecx,%r15d # f^g xor %r14d,%r13d ror $14,%r14d and %eax,%r15d # (f^g)&e mov %r12d,48(%rsp) xor %r14d,%r13d # Sigma1(e) xor %ecx,%r15d # Ch(e,f,g)=((f^g)&e)^g add %edx,%r12d # T1+=h mov %r8d,%edx add %r13d,%r12d # T1+=Sigma1(e) add %r15d,%r12d # T1+=Ch(e,f,g) mov %r8d,%r13d mov %r8d,%r14d ror $2,%edx ror $13,%r13d mov %r8d,%r15d add (%rbp,%rdi,4),%r12d # T1+=K[round] xor %r13d,%edx ror $9,%r13d or %r10d,%r14d # a|c xor %r13d,%edx # h=Sigma0(a) and %r10d,%r15d # a&c add %r12d,%r11d # d+=T1 and %r9d,%r14d # (a|c)&b add %r12d,%edx # h+=T1 or %r15d,%r14d # Maj(a,b,c)=((a|c)&b)|(a&c) lea 1(%rdi),%rdi # round++ add %r14d,%edx # h+=Maj(a,b,c) mov 56(%rsp),%r13d mov 44(%rsp),%r12d mov %r13d,%r15d shr $3,%r13d ror $7,%r15d xor %r15d,%r13d ror $11,%r15d xor %r15d,%r13d # sigma0(X[(i+1)&0xf]) mov %r12d,%r14d shr $10,%r12d ror $17,%r14d xor %r14d,%r12d ror $2,%r14d xor %r14d,%r12d # sigma1(X[(i+14)&0xf]) add %r13d,%r12d add 24(%rsp),%r12d add 52(%rsp),%r12d mov %r11d,%r13d mov %r11d,%r14d mov %eax,%r15d ror $6,%r13d ror $11,%r14d xor %ebx,%r15d # f^g xor %r14d,%r13d ror $14,%r14d and %r11d,%r15d # (f^g)&e mov %r12d,52(%rsp) xor %r14d,%r13d # Sigma1(e) xor %ebx,%r15d # Ch(e,f,g)=((f^g)&e)^g add %ecx,%r12d # T1+=h mov %edx,%ecx add %r13d,%r12d # T1+=Sigma1(e) add %r15d,%r12d # T1+=Ch(e,f,g) mov %edx,%r13d mov %edx,%r14d ror $2,%ecx ror $13,%r13d mov %edx,%r15d add (%rbp,%rdi,4),%r12d # T1+=K[round] xor %r13d,%ecx ror $9,%r13d or %r9d,%r14d # a|c xor %r13d,%ecx # h=Sigma0(a) and %r9d,%r15d # a&c add %r12d,%r10d # d+=T1 and %r8d,%r14d # (a|c)&b add %r12d,%ecx # h+=T1 or %r15d,%r14d # Maj(a,b,c)=((a|c)&b)|(a&c) lea 1(%rdi),%rdi # round++ add %r14d,%ecx # h+=Maj(a,b,c) mov 60(%rsp),%r13d mov 48(%rsp),%r12d mov %r13d,%r15d shr $3,%r13d ror $7,%r15d xor %r15d,%r13d ror $11,%r15d xor %r15d,%r13d # sigma0(X[(i+1)&0xf]) mov %r12d,%r14d shr $10,%r12d ror $17,%r14d xor %r14d,%r12d ror $2,%r14d xor %r14d,%r12d # sigma1(X[(i+14)&0xf]) add %r13d,%r12d add 28(%rsp),%r12d add 56(%rsp),%r12d mov %r10d,%r13d mov %r10d,%r14d mov %r11d,%r15d ror $6,%r13d ror $11,%r14d xor %eax,%r15d # f^g xor %r14d,%r13d ror $14,%r14d and %r10d,%r15d # (f^g)&e mov %r12d,56(%rsp) xor %r14d,%r13d # Sigma1(e) xor %eax,%r15d # Ch(e,f,g)=((f^g)&e)^g add %ebx,%r12d # T1+=h mov %ecx,%ebx add %r13d,%r12d # T1+=Sigma1(e) add %r15d,%r12d # T1+=Ch(e,f,g) mov %ecx,%r13d mov %ecx,%r14d ror $2,%ebx ror $13,%r13d mov %ecx,%r15d add (%rbp,%rdi,4),%r12d # T1+=K[round] xor %r13d,%ebx ror $9,%r13d or %r8d,%r14d # a|c xor %r13d,%ebx # h=Sigma0(a) and %r8d,%r15d # a&c add %r12d,%r9d # d+=T1 and %edx,%r14d # (a|c)&b add %r12d,%ebx # h+=T1 or %r15d,%r14d # Maj(a,b,c)=((a|c)&b)|(a&c) lea 1(%rdi),%rdi # round++ add %r14d,%ebx # h+=Maj(a,b,c) mov 0(%rsp),%r13d mov 52(%rsp),%r12d mov %r13d,%r15d shr $3,%r13d ror $7,%r15d xor %r15d,%r13d ror $11,%r15d xor %r15d,%r13d # sigma0(X[(i+1)&0xf]) mov %r12d,%r14d shr $10,%r12d ror $17,%r14d xor %r14d,%r12d ror $2,%r14d xor %r14d,%r12d # sigma1(X[(i+14)&0xf]) add %r13d,%r12d add 32(%rsp),%r12d add 60(%rsp),%r12d mov %r9d,%r13d mov %r9d,%r14d mov %r10d,%r15d ror $6,%r13d ror $11,%r14d xor %r11d,%r15d # f^g xor %r14d,%r13d ror $14,%r14d and %r9d,%r15d # (f^g)&e mov %r12d,60(%rsp) xor %r14d,%r13d # Sigma1(e) xor %r11d,%r15d # Ch(e,f,g)=((f^g)&e)^g add %eax,%r12d # T1+=h mov %ebx,%eax add %r13d,%r12d # T1+=Sigma1(e) add %r15d,%r12d # T1+=Ch(e,f,g) mov %ebx,%r13d mov %ebx,%r14d ror $2,%eax ror $13,%r13d mov %ebx,%r15d add (%rbp,%rdi,4),%r12d # T1+=K[round] xor %r13d,%eax ror $9,%r13d or %edx,%r14d # a|c xor %r13d,%eax # h=Sigma0(a) and %edx,%r15d # a&c add %r12d,%r8d # d+=T1 and %ecx,%r14d # (a|c)&b add %r12d,%eax # h+=T1 or %r15d,%r14d # Maj(a,b,c)=((a|c)&b)|(a&c) lea 1(%rdi),%rdi # round++ add %r14d,%eax # h+=Maj(a,b,c) cmp $64,%rdi jb .Lrounds_16_xx mov 16*4+0*8(%rsp),%rdi lea 16*4(%rsi),%rsi add 4*0(%rdi),%eax add 4*1(%rdi),%ebx add 4*2(%rdi),%ecx add 4*3(%rdi),%edx add 4*4(%rdi),%r8d add 4*5(%rdi),%r9d add 4*6(%rdi),%r10d add 4*7(%rdi),%r11d cmp 16*4+2*8(%rsp),%rsi mov %eax,4*0(%rdi) mov %ebx,4*1(%rdi) mov %ecx,4*2(%rdi) mov %edx,4*3(%rdi) mov %r8d,4*4(%rdi) mov %r9d,4*5(%rdi) mov %r10d,4*6(%rdi) mov %r11d,4*7(%rdi) jb .Lloop mov 16*4+3*8(%rsp),%rsp .cfi_def_cfa %rsp,56 pop %r15 .cfi_adjust_cfa_offset -8 .cfi_restore %r15 pop %r14 .cfi_adjust_cfa_offset -8 .cfi_restore %r14 pop %r13 .cfi_adjust_cfa_offset -8 .cfi_restore %r13 pop %r12 .cfi_adjust_cfa_offset -8 .cfi_restore %r12 pop %rbp .cfi_adjust_cfa_offset -8 .cfi_restore %rbp pop %rbx .cfi_adjust_cfa_offset -8 .cfi_restore %rbx ret .cfi_endproc SET_SIZE(SHA256TransformBlocks) .data .align 64 .type K256,@object K256: .long 0x428a2f98,0x71374491,0xb5c0fbcf,0xe9b5dba5 .long 0x3956c25b,0x59f111f1,0x923f82a4,0xab1c5ed5 .long 0xd807aa98,0x12835b01,0x243185be,0x550c7dc3 .long 0x72be5d74,0x80deb1fe,0x9bdc06a7,0xc19bf174 .long 0xe49b69c1,0xefbe4786,0x0fc19dc6,0x240ca1cc .long 0x2de92c6f,0x4a7484aa,0x5cb0a9dc,0x76f988da .long 0x983e5152,0xa831c66d,0xb00327c8,0xbf597fc7 .long 0xc6e00bf3,0xd5a79147,0x06ca6351,0x14292967 .long 0x27b70a85,0x2e1b2138,0x4d2c6dfc,0x53380d13 .long 0x650a7354,0x766a0abb,0x81c2c92e,0x92722c85 .long 0xa2bfe8a1,0xa81a664b,0xc24b8b70,0xc76c51a3 .long 0xd192e819,0xd6990624,0xf40e3585,0x106aa070 .long 0x19a4c116,0x1e376c08,0x2748774c,0x34b0bcb5 .long 0x391c0cb3,0x4ed8aa4a,0x5b9cca4f,0x682e6ff3 .long 0x748f82ee,0x78a5636f,0x84c87814,0x8cc70208 .long 0x90befffa,0xa4506ceb,0xbef9a3f7,0xc67178f2 #endif /* !lint && !__lint */ #ifdef __ELF__ .section .note.GNU-stack,"",%progbits #endif diff --git a/module/icp/asm-x86_64/sha2/sha512_impl.S b/module/icp/asm-x86_64/sha2/sha512_impl.S index 746c85a98566..c6e7efd86038 100644 --- a/module/icp/asm-x86_64/sha2/sha512_impl.S +++ b/module/icp/asm-x86_64/sha2/sha512_impl.S @@ -1,2114 +1,2114 @@ /* * ==================================================================== * Written by Andy Polyakov for the OpenSSL * project. Rights for redistribution and usage in source and binary * forms are granted according to the OpenSSL license. * ==================================================================== * * sha256/512_block procedure for x86_64. * * 40% improvement over compiler-generated code on Opteron. On EM64T * sha256 was observed to run >80% faster and sha512 - >40%. No magical * tricks, just straight implementation... I really wonder why gcc * [being armed with inline assembler] fails to generate as fast code. * The only thing which is cool about this module is that it's very * same instruction sequence used for both SHA-256 and SHA-512. In * former case the instructions operate on 32-bit operands, while in * latter - on 64-bit ones. All I had to do is to get one flavor right, * the other one passed the test right away:-) * * sha256_block runs in ~1005 cycles on Opteron, which gives you * asymptotic performance of 64*1000/1005=63.7MBps times CPU clock * frequency in GHz. sha512_block runs in ~1275 cycles, which results * in 128*1000/1275=100MBps per GHz. Is there room for improvement? * Well, if you compare it to IA-64 implementation, which maintains * X[16] in register bank[!], tends to 4 instructions per CPU clock * cycle and runs in 1003 cycles, 1275 is very good result for 3-way * issue Opteron pipeline and X[16] maintained in memory. So that *if* * there is a way to improve it, *then* the only way would be to try to * offload X[16] updates to SSE unit, but that would require "deeper" * loop unroll, which in turn would naturally cause size blow-up, not * to mention increased complexity! And once again, only *if* it's * actually possible to noticeably improve overall ILP, instruction * level parallelism, on a given CPU implementation in this case. * * Special note on Intel EM64T. While Opteron CPU exhibits perfect * performance ratio of 1.5 between 64- and 32-bit flavors [see above], * [currently available] EM64T CPUs apparently are far from it. On the * contrary, 64-bit version, sha512_block, is ~30% *slower* than 32-bit * sha256_block:-( This is presumably because 64-bit shifts/rotates * apparently are not atomic instructions, but implemented in microcode. */ /* * OpenSolaris OS modifications * * Sun elects to use this software under the BSD license. * * This source originates from OpenSSL file sha512-x86_64.pl at * ftp://ftp.openssl.org/snapshot/openssl-0.9.8-stable-SNAP-20080131.tar.gz * (presumably for future OpenSSL release 0.9.8h), with these changes: * * 1. Added perl "use strict" and declared variables. * * 2. Added OpenSolaris ENTRY_NP/SET_SIZE macros from * /usr/include/sys/asm_linkage.h, .ident keywords, and lint(1B) guards. * * 3. Removed x86_64-xlate.pl script (not needed for as(1) or gas(1) * assemblers). Replaced the .picmeup macro with assembler code. * * 4. Added 8 to $ctx, as OpenSolaris OS has an extra 4-byte field, "algotype", * at the beginning of SHA2_CTX (the next field is 8-byte aligned). */ /* * This file was generated by a perl script (sha512-x86_64.pl) that were * used to generate sha256 and sha512 variants from the same code base. * The comments from the original file have been pasted above. */ #if defined(lint) || defined(__lint) #include #include -/* ARGSUSED */ void SHA512TransformBlocks(SHA2_CTX *ctx, const void *in, size_t num) { + (void) ctx, (void) in, (void) num; } #else #define _ASM #include ENTRY_NP(SHA512TransformBlocks) .cfi_startproc movq %rsp, %rax .cfi_def_cfa_register %rax push %rbx .cfi_offset %rbx,-16 push %rbp .cfi_offset %rbp,-24 push %r12 .cfi_offset %r12,-32 push %r13 .cfi_offset %r13,-40 push %r14 .cfi_offset %r14,-48 push %r15 .cfi_offset %r15,-56 mov %rsp,%rbp # copy %rsp shl $4,%rdx # num*16 sub $16*8+4*8,%rsp lea (%rsi,%rdx,8),%rdx # inp+num*16*8 and $-64,%rsp # align stack frame add $8,%rdi # Skip OpenSolaris field, "algotype" mov %rdi,16*8+0*8(%rsp) # save ctx, 1st arg mov %rsi,16*8+1*8(%rsp) # save inp, 2nd arg mov %rdx,16*8+2*8(%rsp) # save end pointer, "3rd" arg mov %rbp,16*8+3*8(%rsp) # save copy of %rsp # echo ".cfi_cfa_expression %rsp+152,deref,+56" | # openssl/crypto/perlasm/x86_64-xlate.pl .cfi_escape 0x0f,0x06,0x77,0x98,0x01,0x06,0x23,0x38 #.picmeup %rbp # The .picmeup pseudo-directive, from perlasm/x86_64_xlate.pl, puts # the address of the "next" instruction into the target register # (%rbp). This generates these 2 instructions: lea .Llea(%rip),%rbp #nop # .picmeup generates a nop for mod 8 alignment--not needed here .Llea: lea K512-.(%rbp),%rbp mov 8*0(%rdi),%rax mov 8*1(%rdi),%rbx mov 8*2(%rdi),%rcx mov 8*3(%rdi),%rdx mov 8*4(%rdi),%r8 mov 8*5(%rdi),%r9 mov 8*6(%rdi),%r10 mov 8*7(%rdi),%r11 jmp .Lloop .align 16 .Lloop: xor %rdi,%rdi mov 8*0(%rsi),%r12 bswap %r12 mov %r8,%r13 mov %r8,%r14 mov %r9,%r15 ror $14,%r13 ror $18,%r14 xor %r10,%r15 # f^g xor %r14,%r13 ror $23,%r14 and %r8,%r15 # (f^g)&e mov %r12,0(%rsp) xor %r14,%r13 # Sigma1(e) xor %r10,%r15 # Ch(e,f,g)=((f^g)&e)^g add %r11,%r12 # T1+=h mov %rax,%r11 add %r13,%r12 # T1+=Sigma1(e) add %r15,%r12 # T1+=Ch(e,f,g) mov %rax,%r13 mov %rax,%r14 ror $28,%r11 ror $34,%r13 mov %rax,%r15 add (%rbp,%rdi,8),%r12 # T1+=K[round] xor %r13,%r11 ror $5,%r13 or %rcx,%r14 # a|c xor %r13,%r11 # h=Sigma0(a) and %rcx,%r15 # a&c add %r12,%rdx # d+=T1 and %rbx,%r14 # (a|c)&b add %r12,%r11 # h+=T1 or %r15,%r14 # Maj(a,b,c)=((a|c)&b)|(a&c) lea 1(%rdi),%rdi # round++ add %r14,%r11 # h+=Maj(a,b,c) mov 8*1(%rsi),%r12 bswap %r12 mov %rdx,%r13 mov %rdx,%r14 mov %r8,%r15 ror $14,%r13 ror $18,%r14 xor %r9,%r15 # f^g xor %r14,%r13 ror $23,%r14 and %rdx,%r15 # (f^g)&e mov %r12,8(%rsp) xor %r14,%r13 # Sigma1(e) xor %r9,%r15 # Ch(e,f,g)=((f^g)&e)^g add %r10,%r12 # T1+=h mov %r11,%r10 add %r13,%r12 # T1+=Sigma1(e) add %r15,%r12 # T1+=Ch(e,f,g) mov %r11,%r13 mov %r11,%r14 ror $28,%r10 ror $34,%r13 mov %r11,%r15 add (%rbp,%rdi,8),%r12 # T1+=K[round] xor %r13,%r10 ror $5,%r13 or %rbx,%r14 # a|c xor %r13,%r10 # h=Sigma0(a) and %rbx,%r15 # a&c add %r12,%rcx # d+=T1 and %rax,%r14 # (a|c)&b add %r12,%r10 # h+=T1 or %r15,%r14 # Maj(a,b,c)=((a|c)&b)|(a&c) lea 1(%rdi),%rdi # round++ add %r14,%r10 # h+=Maj(a,b,c) mov 8*2(%rsi),%r12 bswap %r12 mov %rcx,%r13 mov %rcx,%r14 mov %rdx,%r15 ror $14,%r13 ror $18,%r14 xor %r8,%r15 # f^g xor %r14,%r13 ror $23,%r14 and %rcx,%r15 # (f^g)&e mov %r12,16(%rsp) xor %r14,%r13 # Sigma1(e) xor %r8,%r15 # Ch(e,f,g)=((f^g)&e)^g add %r9,%r12 # T1+=h mov %r10,%r9 add %r13,%r12 # T1+=Sigma1(e) add %r15,%r12 # T1+=Ch(e,f,g) mov %r10,%r13 mov %r10,%r14 ror $28,%r9 ror $34,%r13 mov %r10,%r15 add (%rbp,%rdi,8),%r12 # T1+=K[round] xor %r13,%r9 ror $5,%r13 or %rax,%r14 # a|c xor %r13,%r9 # h=Sigma0(a) and %rax,%r15 # a&c add %r12,%rbx # d+=T1 and %r11,%r14 # (a|c)&b add %r12,%r9 # h+=T1 or %r15,%r14 # Maj(a,b,c)=((a|c)&b)|(a&c) lea 1(%rdi),%rdi # round++ add %r14,%r9 # h+=Maj(a,b,c) mov 8*3(%rsi),%r12 bswap %r12 mov %rbx,%r13 mov %rbx,%r14 mov %rcx,%r15 ror $14,%r13 ror $18,%r14 xor %rdx,%r15 # f^g xor %r14,%r13 ror $23,%r14 and %rbx,%r15 # (f^g)&e mov %r12,24(%rsp) xor %r14,%r13 # Sigma1(e) xor %rdx,%r15 # Ch(e,f,g)=((f^g)&e)^g add %r8,%r12 # T1+=h mov %r9,%r8 add %r13,%r12 # T1+=Sigma1(e) add %r15,%r12 # T1+=Ch(e,f,g) mov %r9,%r13 mov %r9,%r14 ror $28,%r8 ror $34,%r13 mov %r9,%r15 add (%rbp,%rdi,8),%r12 # T1+=K[round] xor %r13,%r8 ror $5,%r13 or %r11,%r14 # a|c xor %r13,%r8 # h=Sigma0(a) and %r11,%r15 # a&c add %r12,%rax # d+=T1 and %r10,%r14 # (a|c)&b add %r12,%r8 # h+=T1 or %r15,%r14 # Maj(a,b,c)=((a|c)&b)|(a&c) lea 1(%rdi),%rdi # round++ add %r14,%r8 # h+=Maj(a,b,c) mov 8*4(%rsi),%r12 bswap %r12 mov %rax,%r13 mov %rax,%r14 mov %rbx,%r15 ror $14,%r13 ror $18,%r14 xor %rcx,%r15 # f^g xor %r14,%r13 ror $23,%r14 and %rax,%r15 # (f^g)&e mov %r12,32(%rsp) xor %r14,%r13 # Sigma1(e) xor %rcx,%r15 # Ch(e,f,g)=((f^g)&e)^g add %rdx,%r12 # T1+=h mov %r8,%rdx add %r13,%r12 # T1+=Sigma1(e) add %r15,%r12 # T1+=Ch(e,f,g) mov %r8,%r13 mov %r8,%r14 ror $28,%rdx ror $34,%r13 mov %r8,%r15 add (%rbp,%rdi,8),%r12 # T1+=K[round] xor %r13,%rdx ror $5,%r13 or %r10,%r14 # a|c xor %r13,%rdx # h=Sigma0(a) and %r10,%r15 # a&c add %r12,%r11 # d+=T1 and %r9,%r14 # (a|c)&b add %r12,%rdx # h+=T1 or %r15,%r14 # Maj(a,b,c)=((a|c)&b)|(a&c) lea 1(%rdi),%rdi # round++ add %r14,%rdx # h+=Maj(a,b,c) mov 8*5(%rsi),%r12 bswap %r12 mov %r11,%r13 mov %r11,%r14 mov %rax,%r15 ror $14,%r13 ror $18,%r14 xor %rbx,%r15 # f^g xor %r14,%r13 ror $23,%r14 and %r11,%r15 # (f^g)&e mov %r12,40(%rsp) xor %r14,%r13 # Sigma1(e) xor %rbx,%r15 # Ch(e,f,g)=((f^g)&e)^g add %rcx,%r12 # T1+=h mov %rdx,%rcx add %r13,%r12 # T1+=Sigma1(e) add %r15,%r12 # T1+=Ch(e,f,g) mov %rdx,%r13 mov %rdx,%r14 ror $28,%rcx ror $34,%r13 mov %rdx,%r15 add (%rbp,%rdi,8),%r12 # T1+=K[round] xor %r13,%rcx ror $5,%r13 or %r9,%r14 # a|c xor %r13,%rcx # h=Sigma0(a) and %r9,%r15 # a&c add %r12,%r10 # d+=T1 and %r8,%r14 # (a|c)&b add %r12,%rcx # h+=T1 or %r15,%r14 # Maj(a,b,c)=((a|c)&b)|(a&c) lea 1(%rdi),%rdi # round++ add %r14,%rcx # h+=Maj(a,b,c) mov 8*6(%rsi),%r12 bswap %r12 mov %r10,%r13 mov %r10,%r14 mov %r11,%r15 ror $14,%r13 ror $18,%r14 xor %rax,%r15 # f^g xor %r14,%r13 ror $23,%r14 and %r10,%r15 # (f^g)&e mov %r12,48(%rsp) xor %r14,%r13 # Sigma1(e) xor %rax,%r15 # Ch(e,f,g)=((f^g)&e)^g add %rbx,%r12 # T1+=h mov %rcx,%rbx add %r13,%r12 # T1+=Sigma1(e) add %r15,%r12 # T1+=Ch(e,f,g) mov %rcx,%r13 mov %rcx,%r14 ror $28,%rbx ror $34,%r13 mov %rcx,%r15 add (%rbp,%rdi,8),%r12 # T1+=K[round] xor %r13,%rbx ror $5,%r13 or %r8,%r14 # a|c xor %r13,%rbx # h=Sigma0(a) and %r8,%r15 # a&c add %r12,%r9 # d+=T1 and %rdx,%r14 # (a|c)&b add %r12,%rbx # h+=T1 or %r15,%r14 # Maj(a,b,c)=((a|c)&b)|(a&c) lea 1(%rdi),%rdi # round++ add %r14,%rbx # h+=Maj(a,b,c) mov 8*7(%rsi),%r12 bswap %r12 mov %r9,%r13 mov %r9,%r14 mov %r10,%r15 ror $14,%r13 ror $18,%r14 xor %r11,%r15 # f^g xor %r14,%r13 ror $23,%r14 and %r9,%r15 # (f^g)&e mov %r12,56(%rsp) xor %r14,%r13 # Sigma1(e) xor %r11,%r15 # Ch(e,f,g)=((f^g)&e)^g add %rax,%r12 # T1+=h mov %rbx,%rax add %r13,%r12 # T1+=Sigma1(e) add %r15,%r12 # T1+=Ch(e,f,g) mov %rbx,%r13 mov %rbx,%r14 ror $28,%rax ror $34,%r13 mov %rbx,%r15 add (%rbp,%rdi,8),%r12 # T1+=K[round] xor %r13,%rax ror $5,%r13 or %rdx,%r14 # a|c xor %r13,%rax # h=Sigma0(a) and %rdx,%r15 # a&c add %r12,%r8 # d+=T1 and %rcx,%r14 # (a|c)&b add %r12,%rax # h+=T1 or %r15,%r14 # Maj(a,b,c)=((a|c)&b)|(a&c) lea 1(%rdi),%rdi # round++ add %r14,%rax # h+=Maj(a,b,c) mov 8*8(%rsi),%r12 bswap %r12 mov %r8,%r13 mov %r8,%r14 mov %r9,%r15 ror $14,%r13 ror $18,%r14 xor %r10,%r15 # f^g xor %r14,%r13 ror $23,%r14 and %r8,%r15 # (f^g)&e mov %r12,64(%rsp) xor %r14,%r13 # Sigma1(e) xor %r10,%r15 # Ch(e,f,g)=((f^g)&e)^g add %r11,%r12 # T1+=h mov %rax,%r11 add %r13,%r12 # T1+=Sigma1(e) add %r15,%r12 # T1+=Ch(e,f,g) mov %rax,%r13 mov %rax,%r14 ror $28,%r11 ror $34,%r13 mov %rax,%r15 add (%rbp,%rdi,8),%r12 # T1+=K[round] xor %r13,%r11 ror $5,%r13 or %rcx,%r14 # a|c xor %r13,%r11 # h=Sigma0(a) and %rcx,%r15 # a&c add %r12,%rdx # d+=T1 and %rbx,%r14 # (a|c)&b add %r12,%r11 # h+=T1 or %r15,%r14 # Maj(a,b,c)=((a|c)&b)|(a&c) lea 1(%rdi),%rdi # round++ add %r14,%r11 # h+=Maj(a,b,c) mov 8*9(%rsi),%r12 bswap %r12 mov %rdx,%r13 mov %rdx,%r14 mov %r8,%r15 ror $14,%r13 ror $18,%r14 xor %r9,%r15 # f^g xor %r14,%r13 ror $23,%r14 and %rdx,%r15 # (f^g)&e mov %r12,72(%rsp) xor %r14,%r13 # Sigma1(e) xor %r9,%r15 # Ch(e,f,g)=((f^g)&e)^g add %r10,%r12 # T1+=h mov %r11,%r10 add %r13,%r12 # T1+=Sigma1(e) add %r15,%r12 # T1+=Ch(e,f,g) mov %r11,%r13 mov %r11,%r14 ror $28,%r10 ror $34,%r13 mov %r11,%r15 add (%rbp,%rdi,8),%r12 # T1+=K[round] xor %r13,%r10 ror $5,%r13 or %rbx,%r14 # a|c xor %r13,%r10 # h=Sigma0(a) and %rbx,%r15 # a&c add %r12,%rcx # d+=T1 and %rax,%r14 # (a|c)&b add %r12,%r10 # h+=T1 or %r15,%r14 # Maj(a,b,c)=((a|c)&b)|(a&c) lea 1(%rdi),%rdi # round++ add %r14,%r10 # h+=Maj(a,b,c) mov 8*10(%rsi),%r12 bswap %r12 mov %rcx,%r13 mov %rcx,%r14 mov %rdx,%r15 ror $14,%r13 ror $18,%r14 xor %r8,%r15 # f^g xor %r14,%r13 ror $23,%r14 and %rcx,%r15 # (f^g)&e mov %r12,80(%rsp) xor %r14,%r13 # Sigma1(e) xor %r8,%r15 # Ch(e,f,g)=((f^g)&e)^g add %r9,%r12 # T1+=h mov %r10,%r9 add %r13,%r12 # T1+=Sigma1(e) add %r15,%r12 # T1+=Ch(e,f,g) mov %r10,%r13 mov %r10,%r14 ror $28,%r9 ror $34,%r13 mov %r10,%r15 add (%rbp,%rdi,8),%r12 # T1+=K[round] xor %r13,%r9 ror $5,%r13 or %rax,%r14 # a|c xor %r13,%r9 # h=Sigma0(a) and %rax,%r15 # a&c add %r12,%rbx # d+=T1 and %r11,%r14 # (a|c)&b add %r12,%r9 # h+=T1 or %r15,%r14 # Maj(a,b,c)=((a|c)&b)|(a&c) lea 1(%rdi),%rdi # round++ add %r14,%r9 # h+=Maj(a,b,c) mov 8*11(%rsi),%r12 bswap %r12 mov %rbx,%r13 mov %rbx,%r14 mov %rcx,%r15 ror $14,%r13 ror $18,%r14 xor %rdx,%r15 # f^g xor %r14,%r13 ror $23,%r14 and %rbx,%r15 # (f^g)&e mov %r12,88(%rsp) xor %r14,%r13 # Sigma1(e) xor %rdx,%r15 # Ch(e,f,g)=((f^g)&e)^g add %r8,%r12 # T1+=h mov %r9,%r8 add %r13,%r12 # T1+=Sigma1(e) add %r15,%r12 # T1+=Ch(e,f,g) mov %r9,%r13 mov %r9,%r14 ror $28,%r8 ror $34,%r13 mov %r9,%r15 add (%rbp,%rdi,8),%r12 # T1+=K[round] xor %r13,%r8 ror $5,%r13 or %r11,%r14 # a|c xor %r13,%r8 # h=Sigma0(a) and %r11,%r15 # a&c add %r12,%rax # d+=T1 and %r10,%r14 # (a|c)&b add %r12,%r8 # h+=T1 or %r15,%r14 # Maj(a,b,c)=((a|c)&b)|(a&c) lea 1(%rdi),%rdi # round++ add %r14,%r8 # h+=Maj(a,b,c) mov 8*12(%rsi),%r12 bswap %r12 mov %rax,%r13 mov %rax,%r14 mov %rbx,%r15 ror $14,%r13 ror $18,%r14 xor %rcx,%r15 # f^g xor %r14,%r13 ror $23,%r14 and %rax,%r15 # (f^g)&e mov %r12,96(%rsp) xor %r14,%r13 # Sigma1(e) xor %rcx,%r15 # Ch(e,f,g)=((f^g)&e)^g add %rdx,%r12 # T1+=h mov %r8,%rdx add %r13,%r12 # T1+=Sigma1(e) add %r15,%r12 # T1+=Ch(e,f,g) mov %r8,%r13 mov %r8,%r14 ror $28,%rdx ror $34,%r13 mov %r8,%r15 add (%rbp,%rdi,8),%r12 # T1+=K[round] xor %r13,%rdx ror $5,%r13 or %r10,%r14 # a|c xor %r13,%rdx # h=Sigma0(a) and %r10,%r15 # a&c add %r12,%r11 # d+=T1 and %r9,%r14 # (a|c)&b add %r12,%rdx # h+=T1 or %r15,%r14 # Maj(a,b,c)=((a|c)&b)|(a&c) lea 1(%rdi),%rdi # round++ add %r14,%rdx # h+=Maj(a,b,c) mov 8*13(%rsi),%r12 bswap %r12 mov %r11,%r13 mov %r11,%r14 mov %rax,%r15 ror $14,%r13 ror $18,%r14 xor %rbx,%r15 # f^g xor %r14,%r13 ror $23,%r14 and %r11,%r15 # (f^g)&e mov %r12,104(%rsp) xor %r14,%r13 # Sigma1(e) xor %rbx,%r15 # Ch(e,f,g)=((f^g)&e)^g add %rcx,%r12 # T1+=h mov %rdx,%rcx add %r13,%r12 # T1+=Sigma1(e) add %r15,%r12 # T1+=Ch(e,f,g) mov %rdx,%r13 mov %rdx,%r14 ror $28,%rcx ror $34,%r13 mov %rdx,%r15 add (%rbp,%rdi,8),%r12 # T1+=K[round] xor %r13,%rcx ror $5,%r13 or %r9,%r14 # a|c xor %r13,%rcx # h=Sigma0(a) and %r9,%r15 # a&c add %r12,%r10 # d+=T1 and %r8,%r14 # (a|c)&b add %r12,%rcx # h+=T1 or %r15,%r14 # Maj(a,b,c)=((a|c)&b)|(a&c) lea 1(%rdi),%rdi # round++ add %r14,%rcx # h+=Maj(a,b,c) mov 8*14(%rsi),%r12 bswap %r12 mov %r10,%r13 mov %r10,%r14 mov %r11,%r15 ror $14,%r13 ror $18,%r14 xor %rax,%r15 # f^g xor %r14,%r13 ror $23,%r14 and %r10,%r15 # (f^g)&e mov %r12,112(%rsp) xor %r14,%r13 # Sigma1(e) xor %rax,%r15 # Ch(e,f,g)=((f^g)&e)^g add %rbx,%r12 # T1+=h mov %rcx,%rbx add %r13,%r12 # T1+=Sigma1(e) add %r15,%r12 # T1+=Ch(e,f,g) mov %rcx,%r13 mov %rcx,%r14 ror $28,%rbx ror $34,%r13 mov %rcx,%r15 add (%rbp,%rdi,8),%r12 # T1+=K[round] xor %r13,%rbx ror $5,%r13 or %r8,%r14 # a|c xor %r13,%rbx # h=Sigma0(a) and %r8,%r15 # a&c add %r12,%r9 # d+=T1 and %rdx,%r14 # (a|c)&b add %r12,%rbx # h+=T1 or %r15,%r14 # Maj(a,b,c)=((a|c)&b)|(a&c) lea 1(%rdi),%rdi # round++ add %r14,%rbx # h+=Maj(a,b,c) mov 8*15(%rsi),%r12 bswap %r12 mov %r9,%r13 mov %r9,%r14 mov %r10,%r15 ror $14,%r13 ror $18,%r14 xor %r11,%r15 # f^g xor %r14,%r13 ror $23,%r14 and %r9,%r15 # (f^g)&e mov %r12,120(%rsp) xor %r14,%r13 # Sigma1(e) xor %r11,%r15 # Ch(e,f,g)=((f^g)&e)^g add %rax,%r12 # T1+=h mov %rbx,%rax add %r13,%r12 # T1+=Sigma1(e) add %r15,%r12 # T1+=Ch(e,f,g) mov %rbx,%r13 mov %rbx,%r14 ror $28,%rax ror $34,%r13 mov %rbx,%r15 add (%rbp,%rdi,8),%r12 # T1+=K[round] xor %r13,%rax ror $5,%r13 or %rdx,%r14 # a|c xor %r13,%rax # h=Sigma0(a) and %rdx,%r15 # a&c add %r12,%r8 # d+=T1 and %rcx,%r14 # (a|c)&b add %r12,%rax # h+=T1 or %r15,%r14 # Maj(a,b,c)=((a|c)&b)|(a&c) lea 1(%rdi),%rdi # round++ add %r14,%rax # h+=Maj(a,b,c) jmp .Lrounds_16_xx .align 16 .Lrounds_16_xx: mov 8(%rsp),%r13 mov 112(%rsp),%r12 mov %r13,%r15 shr $7,%r13 ror $1,%r15 xor %r15,%r13 ror $7,%r15 xor %r15,%r13 # sigma0(X[(i+1)&0xf]) mov %r12,%r14 shr $6,%r12 ror $19,%r14 xor %r14,%r12 ror $42,%r14 xor %r14,%r12 # sigma1(X[(i+14)&0xf]) add %r13,%r12 add 72(%rsp),%r12 add 0(%rsp),%r12 mov %r8,%r13 mov %r8,%r14 mov %r9,%r15 ror $14,%r13 ror $18,%r14 xor %r10,%r15 # f^g xor %r14,%r13 ror $23,%r14 and %r8,%r15 # (f^g)&e mov %r12,0(%rsp) xor %r14,%r13 # Sigma1(e) xor %r10,%r15 # Ch(e,f,g)=((f^g)&e)^g add %r11,%r12 # T1+=h mov %rax,%r11 add %r13,%r12 # T1+=Sigma1(e) add %r15,%r12 # T1+=Ch(e,f,g) mov %rax,%r13 mov %rax,%r14 ror $28,%r11 ror $34,%r13 mov %rax,%r15 add (%rbp,%rdi,8),%r12 # T1+=K[round] xor %r13,%r11 ror $5,%r13 or %rcx,%r14 # a|c xor %r13,%r11 # h=Sigma0(a) and %rcx,%r15 # a&c add %r12,%rdx # d+=T1 and %rbx,%r14 # (a|c)&b add %r12,%r11 # h+=T1 or %r15,%r14 # Maj(a,b,c)=((a|c)&b)|(a&c) lea 1(%rdi),%rdi # round++ add %r14,%r11 # h+=Maj(a,b,c) mov 16(%rsp),%r13 mov 120(%rsp),%r12 mov %r13,%r15 shr $7,%r13 ror $1,%r15 xor %r15,%r13 ror $7,%r15 xor %r15,%r13 # sigma0(X[(i+1)&0xf]) mov %r12,%r14 shr $6,%r12 ror $19,%r14 xor %r14,%r12 ror $42,%r14 xor %r14,%r12 # sigma1(X[(i+14)&0xf]) add %r13,%r12 add 80(%rsp),%r12 add 8(%rsp),%r12 mov %rdx,%r13 mov %rdx,%r14 mov %r8,%r15 ror $14,%r13 ror $18,%r14 xor %r9,%r15 # f^g xor %r14,%r13 ror $23,%r14 and %rdx,%r15 # (f^g)&e mov %r12,8(%rsp) xor %r14,%r13 # Sigma1(e) xor %r9,%r15 # Ch(e,f,g)=((f^g)&e)^g add %r10,%r12 # T1+=h mov %r11,%r10 add %r13,%r12 # T1+=Sigma1(e) add %r15,%r12 # T1+=Ch(e,f,g) mov %r11,%r13 mov %r11,%r14 ror $28,%r10 ror $34,%r13 mov %r11,%r15 add (%rbp,%rdi,8),%r12 # T1+=K[round] xor %r13,%r10 ror $5,%r13 or %rbx,%r14 # a|c xor %r13,%r10 # h=Sigma0(a) and %rbx,%r15 # a&c add %r12,%rcx # d+=T1 and %rax,%r14 # (a|c)&b add %r12,%r10 # h+=T1 or %r15,%r14 # Maj(a,b,c)=((a|c)&b)|(a&c) lea 1(%rdi),%rdi # round++ add %r14,%r10 # h+=Maj(a,b,c) mov 24(%rsp),%r13 mov 0(%rsp),%r12 mov %r13,%r15 shr $7,%r13 ror $1,%r15 xor %r15,%r13 ror $7,%r15 xor %r15,%r13 # sigma0(X[(i+1)&0xf]) mov %r12,%r14 shr $6,%r12 ror $19,%r14 xor %r14,%r12 ror $42,%r14 xor %r14,%r12 # sigma1(X[(i+14)&0xf]) add %r13,%r12 add 88(%rsp),%r12 add 16(%rsp),%r12 mov %rcx,%r13 mov %rcx,%r14 mov %rdx,%r15 ror $14,%r13 ror $18,%r14 xor %r8,%r15 # f^g xor %r14,%r13 ror $23,%r14 and %rcx,%r15 # (f^g)&e mov %r12,16(%rsp) xor %r14,%r13 # Sigma1(e) xor %r8,%r15 # Ch(e,f,g)=((f^g)&e)^g add %r9,%r12 # T1+=h mov %r10,%r9 add %r13,%r12 # T1+=Sigma1(e) add %r15,%r12 # T1+=Ch(e,f,g) mov %r10,%r13 mov %r10,%r14 ror $28,%r9 ror $34,%r13 mov %r10,%r15 add (%rbp,%rdi,8),%r12 # T1+=K[round] xor %r13,%r9 ror $5,%r13 or %rax,%r14 # a|c xor %r13,%r9 # h=Sigma0(a) and %rax,%r15 # a&c add %r12,%rbx # d+=T1 and %r11,%r14 # (a|c)&b add %r12,%r9 # h+=T1 or %r15,%r14 # Maj(a,b,c)=((a|c)&b)|(a&c) lea 1(%rdi),%rdi # round++ add %r14,%r9 # h+=Maj(a,b,c) mov 32(%rsp),%r13 mov 8(%rsp),%r12 mov %r13,%r15 shr $7,%r13 ror $1,%r15 xor %r15,%r13 ror $7,%r15 xor %r15,%r13 # sigma0(X[(i+1)&0xf]) mov %r12,%r14 shr $6,%r12 ror $19,%r14 xor %r14,%r12 ror $42,%r14 xor %r14,%r12 # sigma1(X[(i+14)&0xf]) add %r13,%r12 add 96(%rsp),%r12 add 24(%rsp),%r12 mov %rbx,%r13 mov %rbx,%r14 mov %rcx,%r15 ror $14,%r13 ror $18,%r14 xor %rdx,%r15 # f^g xor %r14,%r13 ror $23,%r14 and %rbx,%r15 # (f^g)&e mov %r12,24(%rsp) xor %r14,%r13 # Sigma1(e) xor %rdx,%r15 # Ch(e,f,g)=((f^g)&e)^g add %r8,%r12 # T1+=h mov %r9,%r8 add %r13,%r12 # T1+=Sigma1(e) add %r15,%r12 # T1+=Ch(e,f,g) mov %r9,%r13 mov %r9,%r14 ror $28,%r8 ror $34,%r13 mov %r9,%r15 add (%rbp,%rdi,8),%r12 # T1+=K[round] xor %r13,%r8 ror $5,%r13 or %r11,%r14 # a|c xor %r13,%r8 # h=Sigma0(a) and %r11,%r15 # a&c add %r12,%rax # d+=T1 and %r10,%r14 # (a|c)&b add %r12,%r8 # h+=T1 or %r15,%r14 # Maj(a,b,c)=((a|c)&b)|(a&c) lea 1(%rdi),%rdi # round++ add %r14,%r8 # h+=Maj(a,b,c) mov 40(%rsp),%r13 mov 16(%rsp),%r12 mov %r13,%r15 shr $7,%r13 ror $1,%r15 xor %r15,%r13 ror $7,%r15 xor %r15,%r13 # sigma0(X[(i+1)&0xf]) mov %r12,%r14 shr $6,%r12 ror $19,%r14 xor %r14,%r12 ror $42,%r14 xor %r14,%r12 # sigma1(X[(i+14)&0xf]) add %r13,%r12 add 104(%rsp),%r12 add 32(%rsp),%r12 mov %rax,%r13 mov %rax,%r14 mov %rbx,%r15 ror $14,%r13 ror $18,%r14 xor %rcx,%r15 # f^g xor %r14,%r13 ror $23,%r14 and %rax,%r15 # (f^g)&e mov %r12,32(%rsp) xor %r14,%r13 # Sigma1(e) xor %rcx,%r15 # Ch(e,f,g)=((f^g)&e)^g add %rdx,%r12 # T1+=h mov %r8,%rdx add %r13,%r12 # T1+=Sigma1(e) add %r15,%r12 # T1+=Ch(e,f,g) mov %r8,%r13 mov %r8,%r14 ror $28,%rdx ror $34,%r13 mov %r8,%r15 add (%rbp,%rdi,8),%r12 # T1+=K[round] xor %r13,%rdx ror $5,%r13 or %r10,%r14 # a|c xor %r13,%rdx # h=Sigma0(a) and %r10,%r15 # a&c add %r12,%r11 # d+=T1 and %r9,%r14 # (a|c)&b add %r12,%rdx # h+=T1 or %r15,%r14 # Maj(a,b,c)=((a|c)&b)|(a&c) lea 1(%rdi),%rdi # round++ add %r14,%rdx # h+=Maj(a,b,c) mov 48(%rsp),%r13 mov 24(%rsp),%r12 mov %r13,%r15 shr $7,%r13 ror $1,%r15 xor %r15,%r13 ror $7,%r15 xor %r15,%r13 # sigma0(X[(i+1)&0xf]) mov %r12,%r14 shr $6,%r12 ror $19,%r14 xor %r14,%r12 ror $42,%r14 xor %r14,%r12 # sigma1(X[(i+14)&0xf]) add %r13,%r12 add 112(%rsp),%r12 add 40(%rsp),%r12 mov %r11,%r13 mov %r11,%r14 mov %rax,%r15 ror $14,%r13 ror $18,%r14 xor %rbx,%r15 # f^g xor %r14,%r13 ror $23,%r14 and %r11,%r15 # (f^g)&e mov %r12,40(%rsp) xor %r14,%r13 # Sigma1(e) xor %rbx,%r15 # Ch(e,f,g)=((f^g)&e)^g add %rcx,%r12 # T1+=h mov %rdx,%rcx add %r13,%r12 # T1+=Sigma1(e) add %r15,%r12 # T1+=Ch(e,f,g) mov %rdx,%r13 mov %rdx,%r14 ror $28,%rcx ror $34,%r13 mov %rdx,%r15 add (%rbp,%rdi,8),%r12 # T1+=K[round] xor %r13,%rcx ror $5,%r13 or %r9,%r14 # a|c xor %r13,%rcx # h=Sigma0(a) and %r9,%r15 # a&c add %r12,%r10 # d+=T1 and %r8,%r14 # (a|c)&b add %r12,%rcx # h+=T1 or %r15,%r14 # Maj(a,b,c)=((a|c)&b)|(a&c) lea 1(%rdi),%rdi # round++ add %r14,%rcx # h+=Maj(a,b,c) mov 56(%rsp),%r13 mov 32(%rsp),%r12 mov %r13,%r15 shr $7,%r13 ror $1,%r15 xor %r15,%r13 ror $7,%r15 xor %r15,%r13 # sigma0(X[(i+1)&0xf]) mov %r12,%r14 shr $6,%r12 ror $19,%r14 xor %r14,%r12 ror $42,%r14 xor %r14,%r12 # sigma1(X[(i+14)&0xf]) add %r13,%r12 add 120(%rsp),%r12 add 48(%rsp),%r12 mov %r10,%r13 mov %r10,%r14 mov %r11,%r15 ror $14,%r13 ror $18,%r14 xor %rax,%r15 # f^g xor %r14,%r13 ror $23,%r14 and %r10,%r15 # (f^g)&e mov %r12,48(%rsp) xor %r14,%r13 # Sigma1(e) xor %rax,%r15 # Ch(e,f,g)=((f^g)&e)^g add %rbx,%r12 # T1+=h mov %rcx,%rbx add %r13,%r12 # T1+=Sigma1(e) add %r15,%r12 # T1+=Ch(e,f,g) mov %rcx,%r13 mov %rcx,%r14 ror $28,%rbx ror $34,%r13 mov %rcx,%r15 add (%rbp,%rdi,8),%r12 # T1+=K[round] xor %r13,%rbx ror $5,%r13 or %r8,%r14 # a|c xor %r13,%rbx # h=Sigma0(a) and %r8,%r15 # a&c add %r12,%r9 # d+=T1 and %rdx,%r14 # (a|c)&b add %r12,%rbx # h+=T1 or %r15,%r14 # Maj(a,b,c)=((a|c)&b)|(a&c) lea 1(%rdi),%rdi # round++ add %r14,%rbx # h+=Maj(a,b,c) mov 64(%rsp),%r13 mov 40(%rsp),%r12 mov %r13,%r15 shr $7,%r13 ror $1,%r15 xor %r15,%r13 ror $7,%r15 xor %r15,%r13 # sigma0(X[(i+1)&0xf]) mov %r12,%r14 shr $6,%r12 ror $19,%r14 xor %r14,%r12 ror $42,%r14 xor %r14,%r12 # sigma1(X[(i+14)&0xf]) add %r13,%r12 add 0(%rsp),%r12 add 56(%rsp),%r12 mov %r9,%r13 mov %r9,%r14 mov %r10,%r15 ror $14,%r13 ror $18,%r14 xor %r11,%r15 # f^g xor %r14,%r13 ror $23,%r14 and %r9,%r15 # (f^g)&e mov %r12,56(%rsp) xor %r14,%r13 # Sigma1(e) xor %r11,%r15 # Ch(e,f,g)=((f^g)&e)^g add %rax,%r12 # T1+=h mov %rbx,%rax add %r13,%r12 # T1+=Sigma1(e) add %r15,%r12 # T1+=Ch(e,f,g) mov %rbx,%r13 mov %rbx,%r14 ror $28,%rax ror $34,%r13 mov %rbx,%r15 add (%rbp,%rdi,8),%r12 # T1+=K[round] xor %r13,%rax ror $5,%r13 or %rdx,%r14 # a|c xor %r13,%rax # h=Sigma0(a) and %rdx,%r15 # a&c add %r12,%r8 # d+=T1 and %rcx,%r14 # (a|c)&b add %r12,%rax # h+=T1 or %r15,%r14 # Maj(a,b,c)=((a|c)&b)|(a&c) lea 1(%rdi),%rdi # round++ add %r14,%rax # h+=Maj(a,b,c) mov 72(%rsp),%r13 mov 48(%rsp),%r12 mov %r13,%r15 shr $7,%r13 ror $1,%r15 xor %r15,%r13 ror $7,%r15 xor %r15,%r13 # sigma0(X[(i+1)&0xf]) mov %r12,%r14 shr $6,%r12 ror $19,%r14 xor %r14,%r12 ror $42,%r14 xor %r14,%r12 # sigma1(X[(i+14)&0xf]) add %r13,%r12 add 8(%rsp),%r12 add 64(%rsp),%r12 mov %r8,%r13 mov %r8,%r14 mov %r9,%r15 ror $14,%r13 ror $18,%r14 xor %r10,%r15 # f^g xor %r14,%r13 ror $23,%r14 and %r8,%r15 # (f^g)&e mov %r12,64(%rsp) xor %r14,%r13 # Sigma1(e) xor %r10,%r15 # Ch(e,f,g)=((f^g)&e)^g add %r11,%r12 # T1+=h mov %rax,%r11 add %r13,%r12 # T1+=Sigma1(e) add %r15,%r12 # T1+=Ch(e,f,g) mov %rax,%r13 mov %rax,%r14 ror $28,%r11 ror $34,%r13 mov %rax,%r15 add (%rbp,%rdi,8),%r12 # T1+=K[round] xor %r13,%r11 ror $5,%r13 or %rcx,%r14 # a|c xor %r13,%r11 # h=Sigma0(a) and %rcx,%r15 # a&c add %r12,%rdx # d+=T1 and %rbx,%r14 # (a|c)&b add %r12,%r11 # h+=T1 or %r15,%r14 # Maj(a,b,c)=((a|c)&b)|(a&c) lea 1(%rdi),%rdi # round++ add %r14,%r11 # h+=Maj(a,b,c) mov 80(%rsp),%r13 mov 56(%rsp),%r12 mov %r13,%r15 shr $7,%r13 ror $1,%r15 xor %r15,%r13 ror $7,%r15 xor %r15,%r13 # sigma0(X[(i+1)&0xf]) mov %r12,%r14 shr $6,%r12 ror $19,%r14 xor %r14,%r12 ror $42,%r14 xor %r14,%r12 # sigma1(X[(i+14)&0xf]) add %r13,%r12 add 16(%rsp),%r12 add 72(%rsp),%r12 mov %rdx,%r13 mov %rdx,%r14 mov %r8,%r15 ror $14,%r13 ror $18,%r14 xor %r9,%r15 # f^g xor %r14,%r13 ror $23,%r14 and %rdx,%r15 # (f^g)&e mov %r12,72(%rsp) xor %r14,%r13 # Sigma1(e) xor %r9,%r15 # Ch(e,f,g)=((f^g)&e)^g add %r10,%r12 # T1+=h mov %r11,%r10 add %r13,%r12 # T1+=Sigma1(e) add %r15,%r12 # T1+=Ch(e,f,g) mov %r11,%r13 mov %r11,%r14 ror $28,%r10 ror $34,%r13 mov %r11,%r15 add (%rbp,%rdi,8),%r12 # T1+=K[round] xor %r13,%r10 ror $5,%r13 or %rbx,%r14 # a|c xor %r13,%r10 # h=Sigma0(a) and %rbx,%r15 # a&c add %r12,%rcx # d+=T1 and %rax,%r14 # (a|c)&b add %r12,%r10 # h+=T1 or %r15,%r14 # Maj(a,b,c)=((a|c)&b)|(a&c) lea 1(%rdi),%rdi # round++ add %r14,%r10 # h+=Maj(a,b,c) mov 88(%rsp),%r13 mov 64(%rsp),%r12 mov %r13,%r15 shr $7,%r13 ror $1,%r15 xor %r15,%r13 ror $7,%r15 xor %r15,%r13 # sigma0(X[(i+1)&0xf]) mov %r12,%r14 shr $6,%r12 ror $19,%r14 xor %r14,%r12 ror $42,%r14 xor %r14,%r12 # sigma1(X[(i+14)&0xf]) add %r13,%r12 add 24(%rsp),%r12 add 80(%rsp),%r12 mov %rcx,%r13 mov %rcx,%r14 mov %rdx,%r15 ror $14,%r13 ror $18,%r14 xor %r8,%r15 # f^g xor %r14,%r13 ror $23,%r14 and %rcx,%r15 # (f^g)&e mov %r12,80(%rsp) xor %r14,%r13 # Sigma1(e) xor %r8,%r15 # Ch(e,f,g)=((f^g)&e)^g add %r9,%r12 # T1+=h mov %r10,%r9 add %r13,%r12 # T1+=Sigma1(e) add %r15,%r12 # T1+=Ch(e,f,g) mov %r10,%r13 mov %r10,%r14 ror $28,%r9 ror $34,%r13 mov %r10,%r15 add (%rbp,%rdi,8),%r12 # T1+=K[round] xor %r13,%r9 ror $5,%r13 or %rax,%r14 # a|c xor %r13,%r9 # h=Sigma0(a) and %rax,%r15 # a&c add %r12,%rbx # d+=T1 and %r11,%r14 # (a|c)&b add %r12,%r9 # h+=T1 or %r15,%r14 # Maj(a,b,c)=((a|c)&b)|(a&c) lea 1(%rdi),%rdi # round++ add %r14,%r9 # h+=Maj(a,b,c) mov 96(%rsp),%r13 mov 72(%rsp),%r12 mov %r13,%r15 shr $7,%r13 ror $1,%r15 xor %r15,%r13 ror $7,%r15 xor %r15,%r13 # sigma0(X[(i+1)&0xf]) mov %r12,%r14 shr $6,%r12 ror $19,%r14 xor %r14,%r12 ror $42,%r14 xor %r14,%r12 # sigma1(X[(i+14)&0xf]) add %r13,%r12 add 32(%rsp),%r12 add 88(%rsp),%r12 mov %rbx,%r13 mov %rbx,%r14 mov %rcx,%r15 ror $14,%r13 ror $18,%r14 xor %rdx,%r15 # f^g xor %r14,%r13 ror $23,%r14 and %rbx,%r15 # (f^g)&e mov %r12,88(%rsp) xor %r14,%r13 # Sigma1(e) xor %rdx,%r15 # Ch(e,f,g)=((f^g)&e)^g add %r8,%r12 # T1+=h mov %r9,%r8 add %r13,%r12 # T1+=Sigma1(e) add %r15,%r12 # T1+=Ch(e,f,g) mov %r9,%r13 mov %r9,%r14 ror $28,%r8 ror $34,%r13 mov %r9,%r15 add (%rbp,%rdi,8),%r12 # T1+=K[round] xor %r13,%r8 ror $5,%r13 or %r11,%r14 # a|c xor %r13,%r8 # h=Sigma0(a) and %r11,%r15 # a&c add %r12,%rax # d+=T1 and %r10,%r14 # (a|c)&b add %r12,%r8 # h+=T1 or %r15,%r14 # Maj(a,b,c)=((a|c)&b)|(a&c) lea 1(%rdi),%rdi # round++ add %r14,%r8 # h+=Maj(a,b,c) mov 104(%rsp),%r13 mov 80(%rsp),%r12 mov %r13,%r15 shr $7,%r13 ror $1,%r15 xor %r15,%r13 ror $7,%r15 xor %r15,%r13 # sigma0(X[(i+1)&0xf]) mov %r12,%r14 shr $6,%r12 ror $19,%r14 xor %r14,%r12 ror $42,%r14 xor %r14,%r12 # sigma1(X[(i+14)&0xf]) add %r13,%r12 add 40(%rsp),%r12 add 96(%rsp),%r12 mov %rax,%r13 mov %rax,%r14 mov %rbx,%r15 ror $14,%r13 ror $18,%r14 xor %rcx,%r15 # f^g xor %r14,%r13 ror $23,%r14 and %rax,%r15 # (f^g)&e mov %r12,96(%rsp) xor %r14,%r13 # Sigma1(e) xor %rcx,%r15 # Ch(e,f,g)=((f^g)&e)^g add %rdx,%r12 # T1+=h mov %r8,%rdx add %r13,%r12 # T1+=Sigma1(e) add %r15,%r12 # T1+=Ch(e,f,g) mov %r8,%r13 mov %r8,%r14 ror $28,%rdx ror $34,%r13 mov %r8,%r15 add (%rbp,%rdi,8),%r12 # T1+=K[round] xor %r13,%rdx ror $5,%r13 or %r10,%r14 # a|c xor %r13,%rdx # h=Sigma0(a) and %r10,%r15 # a&c add %r12,%r11 # d+=T1 and %r9,%r14 # (a|c)&b add %r12,%rdx # h+=T1 or %r15,%r14 # Maj(a,b,c)=((a|c)&b)|(a&c) lea 1(%rdi),%rdi # round++ add %r14,%rdx # h+=Maj(a,b,c) mov 112(%rsp),%r13 mov 88(%rsp),%r12 mov %r13,%r15 shr $7,%r13 ror $1,%r15 xor %r15,%r13 ror $7,%r15 xor %r15,%r13 # sigma0(X[(i+1)&0xf]) mov %r12,%r14 shr $6,%r12 ror $19,%r14 xor %r14,%r12 ror $42,%r14 xor %r14,%r12 # sigma1(X[(i+14)&0xf]) add %r13,%r12 add 48(%rsp),%r12 add 104(%rsp),%r12 mov %r11,%r13 mov %r11,%r14 mov %rax,%r15 ror $14,%r13 ror $18,%r14 xor %rbx,%r15 # f^g xor %r14,%r13 ror $23,%r14 and %r11,%r15 # (f^g)&e mov %r12,104(%rsp) xor %r14,%r13 # Sigma1(e) xor %rbx,%r15 # Ch(e,f,g)=((f^g)&e)^g add %rcx,%r12 # T1+=h mov %rdx,%rcx add %r13,%r12 # T1+=Sigma1(e) add %r15,%r12 # T1+=Ch(e,f,g) mov %rdx,%r13 mov %rdx,%r14 ror $28,%rcx ror $34,%r13 mov %rdx,%r15 add (%rbp,%rdi,8),%r12 # T1+=K[round] xor %r13,%rcx ror $5,%r13 or %r9,%r14 # a|c xor %r13,%rcx # h=Sigma0(a) and %r9,%r15 # a&c add %r12,%r10 # d+=T1 and %r8,%r14 # (a|c)&b add %r12,%rcx # h+=T1 or %r15,%r14 # Maj(a,b,c)=((a|c)&b)|(a&c) lea 1(%rdi),%rdi # round++ add %r14,%rcx # h+=Maj(a,b,c) mov 120(%rsp),%r13 mov 96(%rsp),%r12 mov %r13,%r15 shr $7,%r13 ror $1,%r15 xor %r15,%r13 ror $7,%r15 xor %r15,%r13 # sigma0(X[(i+1)&0xf]) mov %r12,%r14 shr $6,%r12 ror $19,%r14 xor %r14,%r12 ror $42,%r14 xor %r14,%r12 # sigma1(X[(i+14)&0xf]) add %r13,%r12 add 56(%rsp),%r12 add 112(%rsp),%r12 mov %r10,%r13 mov %r10,%r14 mov %r11,%r15 ror $14,%r13 ror $18,%r14 xor %rax,%r15 # f^g xor %r14,%r13 ror $23,%r14 and %r10,%r15 # (f^g)&e mov %r12,112(%rsp) xor %r14,%r13 # Sigma1(e) xor %rax,%r15 # Ch(e,f,g)=((f^g)&e)^g add %rbx,%r12 # T1+=h mov %rcx,%rbx add %r13,%r12 # T1+=Sigma1(e) add %r15,%r12 # T1+=Ch(e,f,g) mov %rcx,%r13 mov %rcx,%r14 ror $28,%rbx ror $34,%r13 mov %rcx,%r15 add (%rbp,%rdi,8),%r12 # T1+=K[round] xor %r13,%rbx ror $5,%r13 or %r8,%r14 # a|c xor %r13,%rbx # h=Sigma0(a) and %r8,%r15 # a&c add %r12,%r9 # d+=T1 and %rdx,%r14 # (a|c)&b add %r12,%rbx # h+=T1 or %r15,%r14 # Maj(a,b,c)=((a|c)&b)|(a&c) lea 1(%rdi),%rdi # round++ add %r14,%rbx # h+=Maj(a,b,c) mov 0(%rsp),%r13 mov 104(%rsp),%r12 mov %r13,%r15 shr $7,%r13 ror $1,%r15 xor %r15,%r13 ror $7,%r15 xor %r15,%r13 # sigma0(X[(i+1)&0xf]) mov %r12,%r14 shr $6,%r12 ror $19,%r14 xor %r14,%r12 ror $42,%r14 xor %r14,%r12 # sigma1(X[(i+14)&0xf]) add %r13,%r12 add 64(%rsp),%r12 add 120(%rsp),%r12 mov %r9,%r13 mov %r9,%r14 mov %r10,%r15 ror $14,%r13 ror $18,%r14 xor %r11,%r15 # f^g xor %r14,%r13 ror $23,%r14 and %r9,%r15 # (f^g)&e mov %r12,120(%rsp) xor %r14,%r13 # Sigma1(e) xor %r11,%r15 # Ch(e,f,g)=((f^g)&e)^g add %rax,%r12 # T1+=h mov %rbx,%rax add %r13,%r12 # T1+=Sigma1(e) add %r15,%r12 # T1+=Ch(e,f,g) mov %rbx,%r13 mov %rbx,%r14 ror $28,%rax ror $34,%r13 mov %rbx,%r15 add (%rbp,%rdi,8),%r12 # T1+=K[round] xor %r13,%rax ror $5,%r13 or %rdx,%r14 # a|c xor %r13,%rax # h=Sigma0(a) and %rdx,%r15 # a&c add %r12,%r8 # d+=T1 and %rcx,%r14 # (a|c)&b add %r12,%rax # h+=T1 or %r15,%r14 # Maj(a,b,c)=((a|c)&b)|(a&c) lea 1(%rdi),%rdi # round++ add %r14,%rax # h+=Maj(a,b,c) cmp $80,%rdi jb .Lrounds_16_xx mov 16*8+0*8(%rsp),%rdi lea 16*8(%rsi),%rsi add 8*0(%rdi),%rax add 8*1(%rdi),%rbx add 8*2(%rdi),%rcx add 8*3(%rdi),%rdx add 8*4(%rdi),%r8 add 8*5(%rdi),%r9 add 8*6(%rdi),%r10 add 8*7(%rdi),%r11 cmp 16*8+2*8(%rsp),%rsi mov %rax,8*0(%rdi) mov %rbx,8*1(%rdi) mov %rcx,8*2(%rdi) mov %rdx,8*3(%rdi) mov %r8,8*4(%rdi) mov %r9,8*5(%rdi) mov %r10,8*6(%rdi) mov %r11,8*7(%rdi) jb .Lloop mov 16*8+3*8(%rsp),%rsp .cfi_def_cfa %rsp,56 pop %r15 .cfi_adjust_cfa_offset -8 .cfi_restore %r15 pop %r14 .cfi_adjust_cfa_offset -8 .cfi_restore %r14 pop %r13 .cfi_adjust_cfa_offset -8 .cfi_restore %r13 pop %r12 .cfi_adjust_cfa_offset -8 .cfi_restore %r12 pop %rbp .cfi_adjust_cfa_offset -8 .cfi_restore %rbp pop %rbx .cfi_adjust_cfa_offset -8 .cfi_restore %rbx ret .cfi_endproc SET_SIZE(SHA512TransformBlocks) .data .align 64 .type K512,@object K512: .quad 0x428a2f98d728ae22,0x7137449123ef65cd .quad 0xb5c0fbcfec4d3b2f,0xe9b5dba58189dbbc .quad 0x3956c25bf348b538,0x59f111f1b605d019 .quad 0x923f82a4af194f9b,0xab1c5ed5da6d8118 .quad 0xd807aa98a3030242,0x12835b0145706fbe .quad 0x243185be4ee4b28c,0x550c7dc3d5ffb4e2 .quad 0x72be5d74f27b896f,0x80deb1fe3b1696b1 .quad 0x9bdc06a725c71235,0xc19bf174cf692694 .quad 0xe49b69c19ef14ad2,0xefbe4786384f25e3 .quad 0x0fc19dc68b8cd5b5,0x240ca1cc77ac9c65 .quad 0x2de92c6f592b0275,0x4a7484aa6ea6e483 .quad 0x5cb0a9dcbd41fbd4,0x76f988da831153b5 .quad 0x983e5152ee66dfab,0xa831c66d2db43210 .quad 0xb00327c898fb213f,0xbf597fc7beef0ee4 .quad 0xc6e00bf33da88fc2,0xd5a79147930aa725 .quad 0x06ca6351e003826f,0x142929670a0e6e70 .quad 0x27b70a8546d22ffc,0x2e1b21385c26c926 .quad 0x4d2c6dfc5ac42aed,0x53380d139d95b3df .quad 0x650a73548baf63de,0x766a0abb3c77b2a8 .quad 0x81c2c92e47edaee6,0x92722c851482353b .quad 0xa2bfe8a14cf10364,0xa81a664bbc423001 .quad 0xc24b8b70d0f89791,0xc76c51a30654be30 .quad 0xd192e819d6ef5218,0xd69906245565a910 .quad 0xf40e35855771202a,0x106aa07032bbd1b8 .quad 0x19a4c116b8d2d0c8,0x1e376c085141ab53 .quad 0x2748774cdf8eeb99,0x34b0bcb5e19b48a8 .quad 0x391c0cb3c5c95a63,0x4ed8aa4ae3418acb .quad 0x5b9cca4f7763e373,0x682e6ff3d6b2b8a3 .quad 0x748f82ee5defb2fc,0x78a5636f43172f60 .quad 0x84c87814a1f0ab72,0x8cc702081a6439ec .quad 0x90befffa23631e28,0xa4506cebde82bde9 .quad 0xbef9a3f7b2c67915,0xc67178f2e372532b .quad 0xca273eceea26619c,0xd186b8c721c0c207 .quad 0xeada7dd6cde0eb1e,0xf57d4f7fee6ed178 .quad 0x06f067aa72176fba,0x0a637dc5a2c898a6 .quad 0x113f9804bef90dae,0x1b710b35131c471b .quad 0x28db77f523047d84,0x32caab7b40c72493 .quad 0x3c9ebe0a15c9bebc,0x431d67c49c100d4c .quad 0x4cc5d4becb3e42b6,0x597f299cfc657e2a .quad 0x5fcb6fab3ad6faec,0x6c44198c4a475817 #endif /* !lint && !__lint */ #ifdef __ELF__ .section .note.GNU-stack,"",%progbits #endif diff --git a/module/icp/core/kcf_mech_tabs.c b/module/icp/core/kcf_mech_tabs.c index 2642b317d698..e1ac7ffd5471 100644 --- a/module/icp/core/kcf_mech_tabs.c +++ b/module/icp/core/kcf_mech_tabs.c @@ -1,791 +1,791 @@ /* * CDDL HEADER START * * The contents of this file are subject to the terms of the * Common Development and Distribution License (the "License"). * You may not use this file except in compliance with the License. * * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE * or http://www.opensolaris.org/os/licensing. * See the License for the specific language governing permissions * and limitations under the License. * * When distributing Covered Code, include this CDDL HEADER in each * file and include the License file at usr/src/OPENSOLARIS.LICENSE. * If applicable, add the following below this CDDL HEADER, with the * fields enclosed by brackets "[]" replaced with your own identifying * information: Portions Copyright [yyyy] [name of copyright owner] * * CDDL HEADER END */ /* * Copyright 2008 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ #include #include #include #include #include /* Cryptographic mechanisms tables and their access functions */ /* * Internal numbers assigned to mechanisms are coded as follows: * * +----------------+----------------+ * | mech. class | mech. index | * <--- 32-bits --->+<--- 32-bits ---> * * the mech_class identifies the table the mechanism belongs to. * mech_index is the index for that mechanism in the table. * A mechanism belongs to exactly 1 table. * The tables are: * . digest_mechs_tab[] for the msg digest mechs. * . cipher_mechs_tab[] for encrypt/decrypt and wrap/unwrap mechs. * . mac_mechs_tab[] for MAC mechs. * . sign_mechs_tab[] for sign & verify mechs. * . keyops_mechs_tab[] for key/key pair generation, and key derivation. * . misc_mechs_tab[] for mechs that don't belong to any of the above. * * There are no holes in the tables. */ /* * Locking conventions: * -------------------- * A global mutex, kcf_mech_tabs_lock, serializes writes to the * mechanism table via kcf_create_mech_entry(). * * A mutex is associated with every entry of the tables. * The mutex is acquired whenever the entry is accessed for * 1) retrieving the mech_id (comparing the mech name) * 2) finding a provider for an xxx_init() or atomic operation. * 3) altering the mechs entry to add or remove a provider. * * In 2), after a provider is chosen, its prov_desc is held and the * entry's mutex must be dropped. The provider's working function (SPI) is * called outside the mech_entry's mutex. * * The number of providers for a particular mechanism is not expected to be * long enough to justify the cost of using rwlocks, so the per-mechanism * entry mutex won't be very *hot*. * * When both kcf_mech_tabs_lock and a mech_entry mutex need to be held, * kcf_mech_tabs_lock must always be acquired first. * */ /* Mechanisms tables */ /* RFE 4687834 Will deal with the extensibility of these tables later */ kcf_mech_entry_t kcf_digest_mechs_tab[KCF_MAXDIGEST]; kcf_mech_entry_t kcf_cipher_mechs_tab[KCF_MAXCIPHER]; kcf_mech_entry_t kcf_mac_mechs_tab[KCF_MAXMAC]; kcf_mech_entry_t kcf_sign_mechs_tab[KCF_MAXSIGN]; kcf_mech_entry_t kcf_keyops_mechs_tab[KCF_MAXKEYOPS]; kcf_mech_entry_t kcf_misc_mechs_tab[KCF_MAXMISC]; kcf_mech_entry_tab_t kcf_mech_tabs_tab[KCF_LAST_OPSCLASS + 1] = { {0, NULL}, /* No class zero */ {KCF_MAXDIGEST, kcf_digest_mechs_tab}, {KCF_MAXCIPHER, kcf_cipher_mechs_tab}, {KCF_MAXMAC, kcf_mac_mechs_tab}, {KCF_MAXSIGN, kcf_sign_mechs_tab}, {KCF_MAXKEYOPS, kcf_keyops_mechs_tab}, {KCF_MAXMISC, kcf_misc_mechs_tab} }; /* * Per-algorithm internal thresholds for the minimum input size of before * offloading to hardware provider. * Dispatching a crypto operation to a hardware provider entails paying the * cost of an additional context switch. Measurements with Sun Accelerator 4000 * shows that 512-byte jobs or smaller are better handled in software. * There is room for refinement here. * */ int kcf_md5_threshold = 512; int kcf_sha1_threshold = 512; int kcf_des_threshold = 512; int kcf_des3_threshold = 512; int kcf_aes_threshold = 512; int kcf_bf_threshold = 512; int kcf_rc4_threshold = 512; kmutex_t kcf_mech_tabs_lock; static uint32_t kcf_gen_swprov = 0; int kcf_mech_hash_size = 256; mod_hash_t *kcf_mech_hash; /* mech name to id hash */ static crypto_mech_type_t kcf_mech_hash_find(char *mechname) { mod_hash_val_t hv; crypto_mech_type_t mt; mt = CRYPTO_MECH_INVALID; if (mod_hash_find(kcf_mech_hash, (mod_hash_key_t)mechname, &hv) == 0) { mt = *(crypto_mech_type_t *)hv; ASSERT(mt != CRYPTO_MECH_INVALID); } return (mt); } void kcf_destroy_mech_tabs(void) { int i, max; kcf_ops_class_t class; kcf_mech_entry_t *me_tab; if (kcf_mech_hash) mod_hash_destroy_hash(kcf_mech_hash); mutex_destroy(&kcf_mech_tabs_lock); for (class = KCF_FIRST_OPSCLASS; class <= KCF_LAST_OPSCLASS; class++) { max = kcf_mech_tabs_tab[class].met_size; me_tab = kcf_mech_tabs_tab[class].met_tab; for (i = 0; i < max; i++) mutex_destroy(&(me_tab[i].me_mutex)); } } /* * kcf_init_mech_tabs() * * Called by the misc/kcf's _init() routine to initialize the tables * of mech_entry's. */ void kcf_init_mech_tabs(void) { int i, max; kcf_ops_class_t class; kcf_mech_entry_t *me_tab; /* Initializes the mutex locks. */ mutex_init(&kcf_mech_tabs_lock, NULL, MUTEX_DEFAULT, NULL); /* Then the pre-defined mechanism entries */ /* Two digests */ (void) strncpy(kcf_digest_mechs_tab[0].me_name, SUN_CKM_MD5, CRYPTO_MAX_MECH_NAME); kcf_digest_mechs_tab[0].me_threshold = kcf_md5_threshold; (void) strncpy(kcf_digest_mechs_tab[1].me_name, SUN_CKM_SHA1, CRYPTO_MAX_MECH_NAME); kcf_digest_mechs_tab[1].me_threshold = kcf_sha1_threshold; /* The symmetric ciphers in various modes */ (void) strncpy(kcf_cipher_mechs_tab[0].me_name, SUN_CKM_DES_CBC, CRYPTO_MAX_MECH_NAME); kcf_cipher_mechs_tab[0].me_threshold = kcf_des_threshold; (void) strncpy(kcf_cipher_mechs_tab[1].me_name, SUN_CKM_DES3_CBC, CRYPTO_MAX_MECH_NAME); kcf_cipher_mechs_tab[1].me_threshold = kcf_des3_threshold; (void) strncpy(kcf_cipher_mechs_tab[2].me_name, SUN_CKM_DES_ECB, CRYPTO_MAX_MECH_NAME); kcf_cipher_mechs_tab[2].me_threshold = kcf_des_threshold; (void) strncpy(kcf_cipher_mechs_tab[3].me_name, SUN_CKM_DES3_ECB, CRYPTO_MAX_MECH_NAME); kcf_cipher_mechs_tab[3].me_threshold = kcf_des3_threshold; (void) strncpy(kcf_cipher_mechs_tab[4].me_name, SUN_CKM_BLOWFISH_CBC, CRYPTO_MAX_MECH_NAME); kcf_cipher_mechs_tab[4].me_threshold = kcf_bf_threshold; (void) strncpy(kcf_cipher_mechs_tab[5].me_name, SUN_CKM_BLOWFISH_ECB, CRYPTO_MAX_MECH_NAME); kcf_cipher_mechs_tab[5].me_threshold = kcf_bf_threshold; (void) strncpy(kcf_cipher_mechs_tab[6].me_name, SUN_CKM_AES_CBC, CRYPTO_MAX_MECH_NAME); kcf_cipher_mechs_tab[6].me_threshold = kcf_aes_threshold; (void) strncpy(kcf_cipher_mechs_tab[7].me_name, SUN_CKM_AES_ECB, CRYPTO_MAX_MECH_NAME); kcf_cipher_mechs_tab[7].me_threshold = kcf_aes_threshold; (void) strncpy(kcf_cipher_mechs_tab[8].me_name, SUN_CKM_RC4, CRYPTO_MAX_MECH_NAME); kcf_cipher_mechs_tab[8].me_threshold = kcf_rc4_threshold; /* 4 HMACs */ (void) strncpy(kcf_mac_mechs_tab[0].me_name, SUN_CKM_MD5_HMAC, CRYPTO_MAX_MECH_NAME); kcf_mac_mechs_tab[0].me_threshold = kcf_md5_threshold; (void) strncpy(kcf_mac_mechs_tab[1].me_name, SUN_CKM_MD5_HMAC_GENERAL, CRYPTO_MAX_MECH_NAME); kcf_mac_mechs_tab[1].me_threshold = kcf_md5_threshold; (void) strncpy(kcf_mac_mechs_tab[2].me_name, SUN_CKM_SHA1_HMAC, CRYPTO_MAX_MECH_NAME); kcf_mac_mechs_tab[2].me_threshold = kcf_sha1_threshold; (void) strncpy(kcf_mac_mechs_tab[3].me_name, SUN_CKM_SHA1_HMAC_GENERAL, CRYPTO_MAX_MECH_NAME); kcf_mac_mechs_tab[3].me_threshold = kcf_sha1_threshold; /* 1 random number generation pseudo mechanism */ (void) strncpy(kcf_misc_mechs_tab[0].me_name, SUN_RANDOM, CRYPTO_MAX_MECH_NAME); kcf_mech_hash = mod_hash_create_strhash_nodtr("kcf mech2id hash", kcf_mech_hash_size, mod_hash_null_valdtor); for (class = KCF_FIRST_OPSCLASS; class <= KCF_LAST_OPSCLASS; class++) { max = kcf_mech_tabs_tab[class].met_size; me_tab = kcf_mech_tabs_tab[class].met_tab; for (i = 0; i < max; i++) { mutex_init(&(me_tab[i].me_mutex), NULL, MUTEX_DEFAULT, NULL); if (me_tab[i].me_name[0] != 0) { me_tab[i].me_mechid = KCF_MECHID(class, i); (void) mod_hash_insert(kcf_mech_hash, (mod_hash_key_t)me_tab[i].me_name, (mod_hash_val_t)&(me_tab[i].me_mechid)); } } } } /* * kcf_create_mech_entry() * * Arguments: * . The class of mechanism. * . the name of the new mechanism. * * Description: * Creates a new mech_entry for a mechanism not yet known to the * framework. * This routine is called by kcf_add_mech_provider, which is * in turn invoked for each mechanism supported by a provider. * The'class' argument depends on the crypto_func_group_t bitmask * in the registering provider's mech_info struct for this mechanism. * When there is ambiguity in the mapping between the crypto_func_group_t * and a class (dual ops, ...) the KCF_MISC_CLASS should be used. * * Context: * User context only. * * Returns: * KCF_INVALID_MECH_CLASS or KCF_INVALID_MECH_NAME if the class or * the mechname is bogus. * KCF_MECH_TAB_FULL when there is no room left in the mech. tabs. * KCF_SUCCESS otherwise. */ static int kcf_create_mech_entry(kcf_ops_class_t class, char *mechname) { crypto_mech_type_t mt; kcf_mech_entry_t *me_tab; int i = 0, size; if ((class < KCF_FIRST_OPSCLASS) || (class > KCF_LAST_OPSCLASS)) return (KCF_INVALID_MECH_CLASS); if ((mechname == NULL) || (mechname[0] == 0)) return (KCF_INVALID_MECH_NAME); /* * First check if the mechanism is already in one of the tables. * The mech_entry could be in another class. */ mutex_enter(&kcf_mech_tabs_lock); mt = kcf_mech_hash_find(mechname); if (mt != CRYPTO_MECH_INVALID) { /* Nothing to do, regardless the suggested class. */ mutex_exit(&kcf_mech_tabs_lock); return (KCF_SUCCESS); } /* Now take the next unused mech entry in the class's tab */ me_tab = kcf_mech_tabs_tab[class].met_tab; size = kcf_mech_tabs_tab[class].met_size; while (i < size) { mutex_enter(&(me_tab[i].me_mutex)); if (me_tab[i].me_name[0] == 0) { /* Found an empty spot */ (void) strlcpy(me_tab[i].me_name, mechname, CRYPTO_MAX_MECH_NAME); me_tab[i].me_name[CRYPTO_MAX_MECH_NAME-1] = '\0'; me_tab[i].me_mechid = KCF_MECHID(class, i); /* * No a-priori information about the new mechanism, so * the threshold is set to zero. */ me_tab[i].me_threshold = 0; mutex_exit(&(me_tab[i].me_mutex)); /* Add the new mechanism to the hash table */ (void) mod_hash_insert(kcf_mech_hash, (mod_hash_key_t)me_tab[i].me_name, (mod_hash_val_t)&(me_tab[i].me_mechid)); break; } mutex_exit(&(me_tab[i].me_mutex)); i++; } mutex_exit(&kcf_mech_tabs_lock); if (i == size) { return (KCF_MECH_TAB_FULL); } return (KCF_SUCCESS); } /* * kcf_add_mech_provider() * * Arguments: * . An index in to the provider mechanism array * . A pointer to the provider descriptor * . A storage for the kcf_prov_mech_desc_t the entry was added at. * * Description: * Adds a new provider of a mechanism to the mechanism's mech_entry * chain. * * Context: * User context only. * * Returns * KCF_SUCCESS on success * KCF_MECH_TAB_FULL otherwise. */ int kcf_add_mech_provider(short mech_indx, kcf_provider_desc_t *prov_desc, kcf_prov_mech_desc_t **pmdpp) { int error; kcf_mech_entry_t *mech_entry = NULL; crypto_mech_info_t *mech_info; crypto_mech_type_t kcf_mech_type, mt; kcf_prov_mech_desc_t *prov_mech, *prov_mech2; crypto_func_group_t simple_fg_mask, dual_fg_mask; crypto_mech_info_t *dmi; crypto_mech_info_list_t *mil, *mil2; kcf_mech_entry_t *me; int i; ASSERT(prov_desc->pd_prov_type != CRYPTO_LOGICAL_PROVIDER); mech_info = &prov_desc->pd_mechanisms[mech_indx]; /* * A mechanism belongs to exactly one mechanism table. * Find the class corresponding to the function group flag of * the mechanism. */ kcf_mech_type = kcf_mech_hash_find(mech_info->cm_mech_name); if (kcf_mech_type == CRYPTO_MECH_INVALID) { crypto_func_group_t fg = mech_info->cm_func_group_mask; kcf_ops_class_t class; if (fg & CRYPTO_FG_DIGEST || fg & CRYPTO_FG_DIGEST_ATOMIC) class = KCF_DIGEST_CLASS; else if (fg & CRYPTO_FG_ENCRYPT || fg & CRYPTO_FG_DECRYPT || fg & CRYPTO_FG_ENCRYPT_ATOMIC || fg & CRYPTO_FG_DECRYPT_ATOMIC) class = KCF_CIPHER_CLASS; else if (fg & CRYPTO_FG_MAC || fg & CRYPTO_FG_MAC_ATOMIC) class = KCF_MAC_CLASS; else if (fg & CRYPTO_FG_SIGN || fg & CRYPTO_FG_VERIFY || fg & CRYPTO_FG_SIGN_ATOMIC || fg & CRYPTO_FG_VERIFY_ATOMIC || fg & CRYPTO_FG_SIGN_RECOVER || fg & CRYPTO_FG_VERIFY_RECOVER) class = KCF_SIGN_CLASS; else if (fg & CRYPTO_FG_GENERATE || fg & CRYPTO_FG_GENERATE_KEY_PAIR || fg & CRYPTO_FG_WRAP || fg & CRYPTO_FG_UNWRAP || fg & CRYPTO_FG_DERIVE) class = KCF_KEYOPS_CLASS; else class = KCF_MISC_CLASS; /* * Attempt to create a new mech_entry for the specified * mechanism. kcf_create_mech_entry() can handle the case * where such an entry already exists. */ if ((error = kcf_create_mech_entry(class, mech_info->cm_mech_name)) != KCF_SUCCESS) { return (error); } /* get the KCF mech type that was assigned to the mechanism */ kcf_mech_type = kcf_mech_hash_find(mech_info->cm_mech_name); ASSERT(kcf_mech_type != CRYPTO_MECH_INVALID); } error = kcf_get_mech_entry(kcf_mech_type, &mech_entry); ASSERT(error == KCF_SUCCESS); /* allocate and initialize new kcf_prov_mech_desc */ prov_mech = kmem_zalloc(sizeof (kcf_prov_mech_desc_t), KM_SLEEP); bcopy(mech_info, &prov_mech->pm_mech_info, sizeof (crypto_mech_info_t)); prov_mech->pm_prov_desc = prov_desc; prov_desc->pd_mech_indx[KCF_MECH2CLASS(kcf_mech_type)] [KCF_MECH2INDEX(kcf_mech_type)] = mech_indx; KCF_PROV_REFHOLD(prov_desc); KCF_PROV_IREFHOLD(prov_desc); dual_fg_mask = mech_info->cm_func_group_mask & CRYPTO_FG_DUAL_MASK; if (dual_fg_mask == ((crypto_func_group_t)0)) goto add_entry; simple_fg_mask = (mech_info->cm_func_group_mask & CRYPTO_FG_SIMPLEOP_MASK) | CRYPTO_FG_RANDOM; for (i = 0; i < prov_desc->pd_mech_list_count; i++) { dmi = &prov_desc->pd_mechanisms[i]; /* skip self */ if (dmi->cm_mech_number == mech_info->cm_mech_number) continue; /* skip if not a dual operation mechanism */ if (!(dmi->cm_func_group_mask & dual_fg_mask) || (dmi->cm_func_group_mask & simple_fg_mask)) continue; mt = kcf_mech_hash_find(dmi->cm_mech_name); if (mt == CRYPTO_MECH_INVALID) continue; if (kcf_get_mech_entry(mt, &me) != KCF_SUCCESS) continue; mil = kmem_zalloc(sizeof (*mil), KM_SLEEP); mil2 = kmem_zalloc(sizeof (*mil2), KM_SLEEP); /* * Ignore hard-coded entries in the mech table * if the provider hasn't registered. */ mutex_enter(&me->me_mutex); if (me->me_hw_prov_chain == NULL && me->me_sw_prov == NULL) { mutex_exit(&me->me_mutex); kmem_free(mil, sizeof (*mil)); kmem_free(mil2, sizeof (*mil2)); continue; } /* * Add other dual mechanisms that have registered * with the framework to this mechanism's * cross-reference list. */ mil->ml_mech_info = *dmi; /* struct assignment */ mil->ml_kcf_mechid = mt; /* add to head of list */ mil->ml_next = prov_mech->pm_mi_list; prov_mech->pm_mi_list = mil; if (prov_desc->pd_prov_type == CRYPTO_HW_PROVIDER) prov_mech2 = me->me_hw_prov_chain; else prov_mech2 = me->me_sw_prov; if (prov_mech2 == NULL) { kmem_free(mil2, sizeof (*mil2)); mutex_exit(&me->me_mutex); continue; } /* * Update all other cross-reference lists by * adding this new mechanism. */ while (prov_mech2 != NULL) { if (prov_mech2->pm_prov_desc == prov_desc) { /* struct assignment */ mil2->ml_mech_info = *mech_info; mil2->ml_kcf_mechid = kcf_mech_type; /* add to head of list */ mil2->ml_next = prov_mech2->pm_mi_list; prov_mech2->pm_mi_list = mil2; break; } prov_mech2 = prov_mech2->pm_next; } if (prov_mech2 == NULL) kmem_free(mil2, sizeof (*mil2)); mutex_exit(&me->me_mutex); } add_entry: /* * Add new kcf_prov_mech_desc at the front of HW providers * chain. */ switch (prov_desc->pd_prov_type) { case CRYPTO_HW_PROVIDER: mutex_enter(&mech_entry->me_mutex); prov_mech->pm_me = mech_entry; prov_mech->pm_next = mech_entry->me_hw_prov_chain; mech_entry->me_hw_prov_chain = prov_mech; mech_entry->me_num_hwprov++; mutex_exit(&mech_entry->me_mutex); break; case CRYPTO_SW_PROVIDER: mutex_enter(&mech_entry->me_mutex); if (mech_entry->me_sw_prov != NULL) { /* * There is already a SW provider for this mechanism. * Since we allow only one SW provider per mechanism, * report this condition. */ cmn_err(CE_WARN, "The cryptographic software provider " "\"%s\" will not be used for %s. The provider " "\"%s\" will be used for this mechanism " "instead.", prov_desc->pd_description, mech_info->cm_mech_name, mech_entry->me_sw_prov->pm_prov_desc-> pd_description); KCF_PROV_REFRELE(prov_desc); kmem_free(prov_mech, sizeof (kcf_prov_mech_desc_t)); prov_mech = NULL; } else { /* * Set the provider as the software provider for * this mechanism. */ mech_entry->me_sw_prov = prov_mech; /* We'll wrap around after 4 billion registrations! */ mech_entry->me_gen_swprov = kcf_gen_swprov++; } mutex_exit(&mech_entry->me_mutex); break; default: break; } *pmdpp = prov_mech; return (KCF_SUCCESS); } /* * kcf_remove_mech_provider() * * Arguments: * . mech_name: the name of the mechanism. * . prov_desc: The provider descriptor * * Description: * Removes a provider from chain of provider descriptors. * The provider is made unavailable to kernel consumers for the specified * mechanism. * * Context: * User context only. */ void kcf_remove_mech_provider(char *mech_name, kcf_provider_desc_t *prov_desc) { crypto_mech_type_t mech_type; kcf_prov_mech_desc_t *prov_mech = NULL, *prov_chain; kcf_prov_mech_desc_t **prev_entry_next; kcf_mech_entry_t *mech_entry; crypto_mech_info_list_t *mil, *mil2, *next, **prev_next; ASSERT(prov_desc->pd_prov_type != CRYPTO_LOGICAL_PROVIDER); /* get the KCF mech type that was assigned to the mechanism */ if ((mech_type = kcf_mech_hash_find(mech_name)) == CRYPTO_MECH_INVALID) { /* * Provider was not allowed for this mech due to policy or * configuration. */ return; } /* get a ptr to the mech_entry that was created */ if (kcf_get_mech_entry(mech_type, &mech_entry) != KCF_SUCCESS) { /* * Provider was not allowed for this mech due to policy or * configuration. */ return; } mutex_enter(&mech_entry->me_mutex); switch (prov_desc->pd_prov_type) { case CRYPTO_HW_PROVIDER: /* find the provider in the mech_entry chain */ prev_entry_next = &mech_entry->me_hw_prov_chain; prov_mech = mech_entry->me_hw_prov_chain; while (prov_mech != NULL && prov_mech->pm_prov_desc != prov_desc) { prev_entry_next = &prov_mech->pm_next; prov_mech = prov_mech->pm_next; } if (prov_mech == NULL) { /* entry not found, simply return */ mutex_exit(&mech_entry->me_mutex); return; } /* remove provider entry from mech_entry chain */ *prev_entry_next = prov_mech->pm_next; ASSERT(mech_entry->me_num_hwprov > 0); mech_entry->me_num_hwprov--; break; case CRYPTO_SW_PROVIDER: if (mech_entry->me_sw_prov == NULL || mech_entry->me_sw_prov->pm_prov_desc != prov_desc) { /* not the software provider for this mechanism */ mutex_exit(&mech_entry->me_mutex); return; } prov_mech = mech_entry->me_sw_prov; mech_entry->me_sw_prov = NULL; break; default: /* unexpected crypto_provider_type_t */ mutex_exit(&mech_entry->me_mutex); return; } mutex_exit(&mech_entry->me_mutex); /* Free the dual ops cross-reference lists */ mil = prov_mech->pm_mi_list; while (mil != NULL) { next = mil->ml_next; if (kcf_get_mech_entry(mil->ml_kcf_mechid, &mech_entry) != KCF_SUCCESS) { mil = next; continue; } mutex_enter(&mech_entry->me_mutex); if (prov_desc->pd_prov_type == CRYPTO_HW_PROVIDER) prov_chain = mech_entry->me_hw_prov_chain; else prov_chain = mech_entry->me_sw_prov; while (prov_chain != NULL) { if (prov_chain->pm_prov_desc == prov_desc) { prev_next = &prov_chain->pm_mi_list; mil2 = prov_chain->pm_mi_list; while (mil2 != NULL && mil2->ml_kcf_mechid != mech_type) { prev_next = &mil2->ml_next; mil2 = mil2->ml_next; } if (mil2 != NULL) { *prev_next = mil2->ml_next; kmem_free(mil2, sizeof (*mil2)); } break; } prov_chain = prov_chain->pm_next; } mutex_exit(&mech_entry->me_mutex); kmem_free(mil, sizeof (crypto_mech_info_list_t)); mil = next; } /* free entry */ KCF_PROV_REFRELE(prov_mech->pm_prov_desc); KCF_PROV_IREFRELE(prov_mech->pm_prov_desc); kmem_free(prov_mech, sizeof (kcf_prov_mech_desc_t)); } /* * kcf_get_mech_entry() * * Arguments: * . The framework mechanism type * . Storage for the mechanism entry * * Description: * Retrieves the mechanism entry for the mech. * * Context: * User and interrupt contexts. * * Returns: * KCF_MECHANISM_XXX appropriate error code. * KCF_SUCCESS otherwise. */ int kcf_get_mech_entry(crypto_mech_type_t mech_type, kcf_mech_entry_t **mep) { kcf_ops_class_t class; int index; kcf_mech_entry_tab_t *me_tab; ASSERT(mep != NULL); class = KCF_MECH2CLASS(mech_type); if ((class < KCF_FIRST_OPSCLASS) || (class > KCF_LAST_OPSCLASS)) { /* the caller won't need to know it's an invalid class */ return (KCF_INVALID_MECH_NUMBER); } me_tab = &kcf_mech_tabs_tab[class]; index = KCF_MECH2INDEX(mech_type); if ((index < 0) || (index >= me_tab->met_size)) { return (KCF_INVALID_MECH_NUMBER); } *mep = &((me_tab->met_tab)[index]); return (KCF_SUCCESS); } /* CURRENTLY UNSUPPORTED: attempting to load the module if it isn't found */ /* * Lookup the hash table for an entry that matches the mechname. * If there are no hardware or software providers for the mechanism, * but there is an unloaded software provider, this routine will attempt * to load it. * * If the MOD_NOAUTOUNLOAD flag is not set, a software provider is * in constant danger of being unloaded. For consumers that call * crypto_mech2id() only once, the provider will not be reloaded * if it becomes unloaded. If a provider gets loaded elsewhere * without the MOD_NOAUTOUNLOAD flag being set, we set it now. */ crypto_mech_type_t crypto_mech2id_common(char *mechname, boolean_t load_module) { - crypto_mech_type_t mt = kcf_mech_hash_find(mechname); - return (mt); + (void) load_module; + return (kcf_mech_hash_find(mechname)); } diff --git a/module/icp/core/kcf_prov_lib.c b/module/icp/core/kcf_prov_lib.c index 1b115d976232..6e8853c56dc6 100644 --- a/module/icp/core/kcf_prov_lib.c +++ b/module/icp/core/kcf_prov_lib.c @@ -1,227 +1,228 @@ /* * CDDL HEADER START * * The contents of this file are subject to the terms of the * Common Development and Distribution License (the "License"). * You may not use this file except in compliance with the License. * * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE * or http://www.opensolaris.org/os/licensing. * See the License for the specific language governing permissions * and limitations under the License. * * When distributing Covered Code, include this CDDL HEADER in each * file and include the License file at usr/src/OPENSOLARIS.LICENSE. * If applicable, add the following below this CDDL HEADER, with the * fields enclosed by brackets "[]" replaced with your own identifying * information: Portions Copyright [yyyy] [name of copyright owner] * * CDDL HEADER END */ /* * Copyright 2009 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ #include #include #include #include /* * Utility routine to copy a buffer to a crypto_data structure. */ /* * Utility routine to apply the command, 'cmd', to the * data in the uio structure. */ int crypto_uio_data(crypto_data_t *data, uchar_t *buf, int len, cmd_type_t cmd, void *digest_ctx, void (*update)(void)) { + (void) digest_ctx, (void) update; zfs_uio_t *uiop = data->cd_uio; off_t offset = data->cd_offset; size_t length = len; uint_t vec_idx; size_t cur_len; uchar_t *datap; ASSERT(data->cd_format == CRYPTO_DATA_UIO); if (zfs_uio_segflg(uiop) != UIO_SYSSPACE) { return (CRYPTO_ARGUMENTS_BAD); } /* * Jump to the first iovec containing data to be * processed. */ offset = zfs_uio_index_at_offset(uiop, offset, &vec_idx); if (vec_idx == zfs_uio_iovcnt(uiop) && length > 0) { /* * The caller specified an offset that is larger than * the total size of the buffers it provided. */ return (CRYPTO_DATA_LEN_RANGE); } while (vec_idx < zfs_uio_iovcnt(uiop) && length > 0) { cur_len = MIN(zfs_uio_iovlen(uiop, vec_idx) - offset, length); datap = (uchar_t *)(zfs_uio_iovbase(uiop, vec_idx) + offset); switch (cmd) { case COPY_FROM_DATA: bcopy(datap, buf, cur_len); buf += cur_len; break; case COPY_TO_DATA: bcopy(buf, datap, cur_len); buf += cur_len; break; case COMPARE_TO_DATA: if (bcmp(datap, buf, cur_len)) return (CRYPTO_SIGNATURE_INVALID); buf += cur_len; break; case MD5_DIGEST_DATA: case SHA1_DIGEST_DATA: case SHA2_DIGEST_DATA: case GHASH_DATA: return (CRYPTO_ARGUMENTS_BAD); } length -= cur_len; vec_idx++; offset = 0; } if (vec_idx == zfs_uio_iovcnt(uiop) && length > 0) { /* * The end of the specified iovec's was reached but * the length requested could not be processed. */ switch (cmd) { case COPY_TO_DATA: data->cd_length = len; return (CRYPTO_BUFFER_TOO_SMALL); default: return (CRYPTO_DATA_LEN_RANGE); } } return (CRYPTO_SUCCESS); } int crypto_put_output_data(uchar_t *buf, crypto_data_t *output, int len) { switch (output->cd_format) { case CRYPTO_DATA_RAW: if (output->cd_raw.iov_len < len) { output->cd_length = len; return (CRYPTO_BUFFER_TOO_SMALL); } bcopy(buf, (uchar_t *)(output->cd_raw.iov_base + output->cd_offset), len); break; case CRYPTO_DATA_UIO: return (crypto_uio_data(output, buf, len, COPY_TO_DATA, NULL, NULL)); default: return (CRYPTO_ARGUMENTS_BAD); } return (CRYPTO_SUCCESS); } int crypto_update_iov(void *ctx, crypto_data_t *input, crypto_data_t *output, int (*cipher)(void *, caddr_t, size_t, crypto_data_t *), void (*copy_block)(uint8_t *, uint64_t *)) { common_ctx_t *common_ctx = ctx; int rv; ASSERT(input != output); if (input->cd_miscdata != NULL) { copy_block((uint8_t *)input->cd_miscdata, &common_ctx->cc_iv[0]); } if (input->cd_raw.iov_len < input->cd_length) return (CRYPTO_ARGUMENTS_BAD); rv = (cipher)(ctx, input->cd_raw.iov_base + input->cd_offset, input->cd_length, output); return (rv); } int crypto_update_uio(void *ctx, crypto_data_t *input, crypto_data_t *output, int (*cipher)(void *, caddr_t, size_t, crypto_data_t *), void (*copy_block)(uint8_t *, uint64_t *)) { common_ctx_t *common_ctx = ctx; zfs_uio_t *uiop = input->cd_uio; off_t offset = input->cd_offset; size_t length = input->cd_length; uint_t vec_idx; size_t cur_len; ASSERT(input != output); if (input->cd_miscdata != NULL) { copy_block((uint8_t *)input->cd_miscdata, &common_ctx->cc_iv[0]); } if (zfs_uio_segflg(input->cd_uio) != UIO_SYSSPACE) { return (CRYPTO_ARGUMENTS_BAD); } /* * Jump to the first iovec containing data to be * processed. */ offset = zfs_uio_index_at_offset(uiop, offset, &vec_idx); if (vec_idx == zfs_uio_iovcnt(uiop) && length > 0) { /* * The caller specified an offset that is larger than the * total size of the buffers it provided. */ return (CRYPTO_DATA_LEN_RANGE); } /* * Now process the iovecs. */ while (vec_idx < zfs_uio_iovcnt(uiop) && length > 0) { cur_len = MIN(zfs_uio_iovlen(uiop, vec_idx) - offset, length); int rv = (cipher)(ctx, zfs_uio_iovbase(uiop, vec_idx) + offset, cur_len, output); if (rv != CRYPTO_SUCCESS) { return (rv); } length -= cur_len; vec_idx++; offset = 0; } if (vec_idx == zfs_uio_iovcnt(uiop) && length > 0) { /* * The end of the specified iovec's was reached but * the length requested could not be processed, i.e. * The caller requested to digest more data than it provided. */ return (CRYPTO_DATA_LEN_RANGE); } return (CRYPTO_SUCCESS); } diff --git a/module/icp/core/kcf_sched.c b/module/icp/core/kcf_sched.c index 81fd15f8ea26..e4ccdbde9fb4 100644 --- a/module/icp/core/kcf_sched.c +++ b/module/icp/core/kcf_sched.c @@ -1,1780 +1,1780 @@ /* * CDDL HEADER START * * The contents of this file are subject to the terms of the * Common Development and Distribution License (the "License"). * You may not use this file except in compliance with the License. * * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE * or http://www.opensolaris.org/os/licensing. * See the License for the specific language governing permissions * and limitations under the License. * * When distributing Covered Code, include this CDDL HEADER in each * file and include the License file at usr/src/OPENSOLARIS.LICENSE. * If applicable, add the following below this CDDL HEADER, with the * fields enclosed by brackets "[]" replaced with your own identifying * information: Portions Copyright [yyyy] [name of copyright owner] * * CDDL HEADER END */ /* * Copyright 2008 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ /* * This file contains the core framework routines for the * kernel cryptographic framework. These routines are at the * layer, between the kernel API/ioctls and the SPI. */ #include #include #include #include #include kcf_global_swq_t *gswq; /* Global software queue */ /* Thread pool related variables */ static kcf_pool_t *kcfpool; /* Thread pool of kcfd LWPs */ int kcf_maxthreads = 2; int kcf_minthreads = 1; int kcf_thr_multiple = 2; /* Boot-time tunable for experimentation */ static ulong_t kcf_idlethr_timeout; #define KCF_DEFAULT_THRTIMEOUT 60000000 /* 60 seconds */ /* kmem caches used by the scheduler */ static kmem_cache_t *kcf_sreq_cache; static kmem_cache_t *kcf_areq_cache; static kmem_cache_t *kcf_context_cache; /* Global request ID table */ static kcf_reqid_table_t *kcf_reqid_table[REQID_TABLES]; /* KCF stats. Not protected. */ static kcf_stats_t kcf_ksdata = { { "total threads in pool", KSTAT_DATA_UINT32}, { "idle threads in pool", KSTAT_DATA_UINT32}, { "min threads in pool", KSTAT_DATA_UINT32}, { "max threads in pool", KSTAT_DATA_UINT32}, { "requests in gswq", KSTAT_DATA_UINT32}, { "max requests in gswq", KSTAT_DATA_UINT32}, { "threads for HW taskq", KSTAT_DATA_UINT32}, { "minalloc for HW taskq", KSTAT_DATA_UINT32}, { "maxalloc for HW taskq", KSTAT_DATA_UINT32} }; static kstat_t *kcf_misc_kstat = NULL; ulong_t kcf_swprov_hndl = 0; static kcf_areq_node_t *kcf_areqnode_alloc(kcf_provider_desc_t *, kcf_context_t *, crypto_call_req_t *, kcf_req_params_t *, boolean_t); static int kcf_disp_sw_request(kcf_areq_node_t *); static void process_req_hwp(void *); static int kcf_enqueue(kcf_areq_node_t *); static void kcfpool_alloc(void); static void kcf_reqid_delete(kcf_areq_node_t *areq); static crypto_req_id_t kcf_reqid_insert(kcf_areq_node_t *areq); static int kcf_misc_kstat_update(kstat_t *ksp, int rw); /* * Create a new context. */ crypto_ctx_t * kcf_new_ctx(crypto_call_req_t *crq, kcf_provider_desc_t *pd, crypto_session_id_t sid) { crypto_ctx_t *ctx; kcf_context_t *kcf_ctx; kcf_ctx = kmem_cache_alloc(kcf_context_cache, (crq == NULL) ? KM_SLEEP : KM_NOSLEEP); if (kcf_ctx == NULL) return (NULL); /* initialize the context for the consumer */ kcf_ctx->kc_refcnt = 1; kcf_ctx->kc_req_chain_first = NULL; kcf_ctx->kc_req_chain_last = NULL; kcf_ctx->kc_secondctx = NULL; KCF_PROV_REFHOLD(pd); kcf_ctx->kc_prov_desc = pd; kcf_ctx->kc_sw_prov_desc = NULL; kcf_ctx->kc_mech = NULL; ctx = &kcf_ctx->kc_glbl_ctx; ctx->cc_provider = pd->pd_prov_handle; ctx->cc_session = sid; ctx->cc_provider_private = NULL; ctx->cc_framework_private = (void *)kcf_ctx; ctx->cc_flags = 0; ctx->cc_opstate = NULL; return (ctx); } /* * Allocate a new async request node. * * ictx - Framework private context pointer * crq - Has callback function and argument. Should be non NULL. * req - The parameters to pass to the SPI */ static kcf_areq_node_t * kcf_areqnode_alloc(kcf_provider_desc_t *pd, kcf_context_t *ictx, crypto_call_req_t *crq, kcf_req_params_t *req, boolean_t isdual) { kcf_areq_node_t *arptr, *areq; ASSERT(crq != NULL); arptr = kmem_cache_alloc(kcf_areq_cache, KM_NOSLEEP); if (arptr == NULL) return (NULL); arptr->an_state = REQ_ALLOCATED; arptr->an_reqarg = *crq; arptr->an_params = *req; arptr->an_context = ictx; arptr->an_isdual = isdual; arptr->an_next = arptr->an_prev = NULL; KCF_PROV_REFHOLD(pd); arptr->an_provider = pd; arptr->an_tried_plist = NULL; arptr->an_refcnt = 1; arptr->an_idnext = arptr->an_idprev = NULL; /* * Requests for context-less operations do not use the * fields - an_is_my_turn, and an_ctxchain_next. */ if (ictx == NULL) return (arptr); KCF_CONTEXT_REFHOLD(ictx); /* * Chain this request to the context. */ mutex_enter(&ictx->kc_in_use_lock); arptr->an_ctxchain_next = NULL; if ((areq = ictx->kc_req_chain_last) == NULL) { arptr->an_is_my_turn = B_TRUE; ictx->kc_req_chain_last = ictx->kc_req_chain_first = arptr; } else { ASSERT(ictx->kc_req_chain_first != NULL); arptr->an_is_my_turn = B_FALSE; /* Insert the new request to the end of the chain. */ areq->an_ctxchain_next = arptr; ictx->kc_req_chain_last = arptr; } mutex_exit(&ictx->kc_in_use_lock); return (arptr); } /* * Queue the request node and do one of the following: * - If there is an idle thread signal it to run. * - If there is no idle thread and max running threads is not * reached, signal the creator thread for more threads. * * If the two conditions above are not met, we don't need to do * anything. The request will be picked up by one of the * worker threads when it becomes available. */ static int kcf_disp_sw_request(kcf_areq_node_t *areq) { int err; int cnt = 0; if ((err = kcf_enqueue(areq)) != 0) return (err); if (kcfpool->kp_idlethreads > 0) { /* Signal an idle thread to run */ mutex_enter(&gswq->gs_lock); cv_signal(&gswq->gs_cv); mutex_exit(&gswq->gs_lock); return (CRYPTO_QUEUED); } /* * We keep the number of running threads to be at * kcf_minthreads to reduce gs_lock contention. */ cnt = kcf_minthreads - (kcfpool->kp_threads - kcfpool->kp_blockedthreads); if (cnt > 0) { /* * The following ensures the number of threads in pool * does not exceed kcf_maxthreads. */ cnt = MIN(cnt, kcf_maxthreads - (int)kcfpool->kp_threads); if (cnt > 0) { /* Signal the creator thread for more threads */ mutex_enter(&kcfpool->kp_user_lock); if (!kcfpool->kp_signal_create_thread) { kcfpool->kp_signal_create_thread = B_TRUE; kcfpool->kp_nthrs = cnt; cv_signal(&kcfpool->kp_user_cv); } mutex_exit(&kcfpool->kp_user_lock); } } return (CRYPTO_QUEUED); } /* * This routine is called by the taskq associated with * each hardware provider. We notify the kernel consumer * via the callback routine in case of CRYPTO_SUCCESS or * a failure. * * A request can be of type kcf_areq_node_t or of type * kcf_sreq_node_t. */ static void process_req_hwp(void *ireq) { int error = 0; crypto_ctx_t *ctx; kcf_call_type_t ctype; kcf_provider_desc_t *pd; kcf_areq_node_t *areq = (kcf_areq_node_t *)ireq; kcf_sreq_node_t *sreq = (kcf_sreq_node_t *)ireq; pd = ((ctype = GET_REQ_TYPE(ireq)) == CRYPTO_SYNCH) ? sreq->sn_provider : areq->an_provider; /* * Wait if flow control is in effect for the provider. A * CRYPTO_PROVIDER_READY or CRYPTO_PROVIDER_FAILED * notification will signal us. We also get signaled if * the provider is unregistering. */ if (pd->pd_state == KCF_PROV_BUSY) { mutex_enter(&pd->pd_lock); while (pd->pd_state == KCF_PROV_BUSY) cv_wait(&pd->pd_resume_cv, &pd->pd_lock); mutex_exit(&pd->pd_lock); } /* * Bump the internal reference count while the request is being * processed. This is how we know when it's safe to unregister * a provider. This step must precede the pd_state check below. */ KCF_PROV_IREFHOLD(pd); /* * Fail the request if the provider has failed. We return a * recoverable error and the notified clients attempt any * recovery. For async clients this is done in kcf_aop_done() * and for sync clients it is done in the k-api routines. */ if (pd->pd_state >= KCF_PROV_FAILED) { error = CRYPTO_DEVICE_ERROR; goto bail; } if (ctype == CRYPTO_SYNCH) { mutex_enter(&sreq->sn_lock); sreq->sn_state = REQ_INPROGRESS; mutex_exit(&sreq->sn_lock); ctx = sreq->sn_context ? &sreq->sn_context->kc_glbl_ctx : NULL; error = common_submit_request(sreq->sn_provider, ctx, sreq->sn_params, sreq); } else { kcf_context_t *ictx; ASSERT(ctype == CRYPTO_ASYNCH); /* * We are in the per-hardware provider thread context and * hence can sleep. Note that the caller would have done * a taskq_dispatch(..., TQ_NOSLEEP) and would have returned. */ ctx = (ictx = areq->an_context) ? &ictx->kc_glbl_ctx : NULL; mutex_enter(&areq->an_lock); /* * We need to maintain ordering for multi-part requests. * an_is_my_turn is set to B_TRUE initially for a request * when it is enqueued and there are no other requests * for that context. It is set later from kcf_aop_done() when * the request before us in the chain of requests for the * context completes. We get signaled at that point. */ if (ictx != NULL) { ASSERT(ictx->kc_prov_desc == areq->an_provider); while (areq->an_is_my_turn == B_FALSE) { cv_wait(&areq->an_turn_cv, &areq->an_lock); } } areq->an_state = REQ_INPROGRESS; mutex_exit(&areq->an_lock); error = common_submit_request(areq->an_provider, ctx, &areq->an_params, areq); } bail: if (error == CRYPTO_QUEUED) { /* * The request is queued by the provider and we should * get a crypto_op_notification() from the provider later. * We notify the consumer at that time. */ return; } else { /* CRYPTO_SUCCESS or other failure */ KCF_PROV_IREFRELE(pd); if (ctype == CRYPTO_SYNCH) kcf_sop_done(sreq, error); else kcf_aop_done(areq, error); } } /* * This routine checks if a request can be retried on another * provider. If true, mech1 is initialized to point to the mechanism * structure. mech2 is also initialized in case of a dual operation. fg * is initialized to the correct crypto_func_group_t bit flag. They are * initialized by this routine, so that the caller can pass them to a * kcf_get_mech_provider() or kcf_get_dual_provider() with no further change. * * We check that the request is for a init or atomic routine and that * it is for one of the operation groups used from k-api . */ static boolean_t can_resubmit(kcf_areq_node_t *areq, crypto_mechanism_t **mech1, crypto_mechanism_t **mech2, crypto_func_group_t *fg) { kcf_req_params_t *params; kcf_op_type_t optype; params = &areq->an_params; optype = params->rp_optype; if (!(IS_INIT_OP(optype) || IS_ATOMIC_OP(optype))) return (B_FALSE); switch (params->rp_opgrp) { case KCF_OG_DIGEST: { kcf_digest_ops_params_t *dops = ¶ms->rp_u.digest_params; dops->do_mech.cm_type = dops->do_framework_mechtype; *mech1 = &dops->do_mech; *fg = (optype == KCF_OP_INIT) ? CRYPTO_FG_DIGEST : CRYPTO_FG_DIGEST_ATOMIC; break; } case KCF_OG_MAC: { kcf_mac_ops_params_t *mops = ¶ms->rp_u.mac_params; mops->mo_mech.cm_type = mops->mo_framework_mechtype; *mech1 = &mops->mo_mech; *fg = (optype == KCF_OP_INIT) ? CRYPTO_FG_MAC : CRYPTO_FG_MAC_ATOMIC; break; } case KCF_OG_SIGN: { kcf_sign_ops_params_t *sops = ¶ms->rp_u.sign_params; sops->so_mech.cm_type = sops->so_framework_mechtype; *mech1 = &sops->so_mech; switch (optype) { case KCF_OP_INIT: *fg = CRYPTO_FG_SIGN; break; case KCF_OP_ATOMIC: *fg = CRYPTO_FG_SIGN_ATOMIC; break; default: ASSERT(optype == KCF_OP_SIGN_RECOVER_ATOMIC); *fg = CRYPTO_FG_SIGN_RECOVER_ATOMIC; } break; } case KCF_OG_VERIFY: { kcf_verify_ops_params_t *vops = ¶ms->rp_u.verify_params; vops->vo_mech.cm_type = vops->vo_framework_mechtype; *mech1 = &vops->vo_mech; switch (optype) { case KCF_OP_INIT: *fg = CRYPTO_FG_VERIFY; break; case KCF_OP_ATOMIC: *fg = CRYPTO_FG_VERIFY_ATOMIC; break; default: ASSERT(optype == KCF_OP_VERIFY_RECOVER_ATOMIC); *fg = CRYPTO_FG_VERIFY_RECOVER_ATOMIC; } break; } case KCF_OG_ENCRYPT: { kcf_encrypt_ops_params_t *eops = ¶ms->rp_u.encrypt_params; eops->eo_mech.cm_type = eops->eo_framework_mechtype; *mech1 = &eops->eo_mech; *fg = (optype == KCF_OP_INIT) ? CRYPTO_FG_ENCRYPT : CRYPTO_FG_ENCRYPT_ATOMIC; break; } case KCF_OG_DECRYPT: { kcf_decrypt_ops_params_t *dcrops = ¶ms->rp_u.decrypt_params; dcrops->dop_mech.cm_type = dcrops->dop_framework_mechtype; *mech1 = &dcrops->dop_mech; *fg = (optype == KCF_OP_INIT) ? CRYPTO_FG_DECRYPT : CRYPTO_FG_DECRYPT_ATOMIC; break; } case KCF_OG_ENCRYPT_MAC: { kcf_encrypt_mac_ops_params_t *eops = ¶ms->rp_u.encrypt_mac_params; eops->em_encr_mech.cm_type = eops->em_framework_encr_mechtype; *mech1 = &eops->em_encr_mech; eops->em_mac_mech.cm_type = eops->em_framework_mac_mechtype; *mech2 = &eops->em_mac_mech; *fg = (optype == KCF_OP_INIT) ? CRYPTO_FG_ENCRYPT_MAC : CRYPTO_FG_ENCRYPT_MAC_ATOMIC; break; } case KCF_OG_MAC_DECRYPT: { kcf_mac_decrypt_ops_params_t *dops = ¶ms->rp_u.mac_decrypt_params; dops->md_mac_mech.cm_type = dops->md_framework_mac_mechtype; *mech1 = &dops->md_mac_mech; dops->md_decr_mech.cm_type = dops->md_framework_decr_mechtype; *mech2 = &dops->md_decr_mech; *fg = (optype == KCF_OP_INIT) ? CRYPTO_FG_MAC_DECRYPT : CRYPTO_FG_MAC_DECRYPT_ATOMIC; break; } default: return (B_FALSE); } return (B_TRUE); } /* * This routine is called when a request to a provider has failed * with a recoverable error. This routine tries to find another provider * and dispatches the request to the new provider, if one is available. * We reuse the request structure. * * A return value of NULL from kcf_get_mech_provider() indicates * we have tried the last provider. */ static int kcf_resubmit_request(kcf_areq_node_t *areq) { int error = CRYPTO_FAILED; kcf_context_t *ictx; kcf_provider_desc_t *old_pd; kcf_provider_desc_t *new_pd; crypto_mechanism_t *mech1 = NULL, *mech2 = NULL; crypto_mech_type_t prov_mt1, prov_mt2; crypto_func_group_t fg = 0; if (!can_resubmit(areq, &mech1, &mech2, &fg)) return (error); old_pd = areq->an_provider; /* * Add old_pd to the list of providers already tried. We release * the hold on old_pd (from the earlier kcf_get_mech_provider()) in * kcf_free_triedlist(). */ if (kcf_insert_triedlist(&areq->an_tried_plist, old_pd, KM_NOSLEEP) == NULL) return (error); if (mech1 && !mech2) { new_pd = kcf_get_mech_provider(mech1->cm_type, NULL, &error, areq->an_tried_plist, fg, (areq->an_reqarg.cr_flag & CRYPTO_RESTRICTED), 0); } else { ASSERT(mech1 != NULL && mech2 != NULL); new_pd = kcf_get_dual_provider(mech1, mech2, NULL, &prov_mt1, &prov_mt2, &error, areq->an_tried_plist, fg, fg, (areq->an_reqarg.cr_flag & CRYPTO_RESTRICTED), 0); } if (new_pd == NULL) return (error); /* * We reuse the old context by resetting provider specific * fields in it. */ if ((ictx = areq->an_context) != NULL) { crypto_ctx_t *ctx; ASSERT(old_pd == ictx->kc_prov_desc); KCF_PROV_REFRELE(ictx->kc_prov_desc); KCF_PROV_REFHOLD(new_pd); ictx->kc_prov_desc = new_pd; ctx = &ictx->kc_glbl_ctx; ctx->cc_provider = new_pd->pd_prov_handle; ctx->cc_session = new_pd->pd_sid; ctx->cc_provider_private = NULL; } /* We reuse areq. by resetting the provider and context fields. */ KCF_PROV_REFRELE(old_pd); KCF_PROV_REFHOLD(new_pd); areq->an_provider = new_pd; mutex_enter(&areq->an_lock); areq->an_state = REQ_WAITING; mutex_exit(&areq->an_lock); switch (new_pd->pd_prov_type) { case CRYPTO_SW_PROVIDER: error = kcf_disp_sw_request(areq); break; case CRYPTO_HW_PROVIDER: { taskq_t *taskq = new_pd->pd_sched_info.ks_taskq; if (taskq_dispatch(taskq, process_req_hwp, areq, TQ_NOSLEEP) == TASKQID_INVALID) { error = CRYPTO_HOST_MEMORY; } else { error = CRYPTO_QUEUED; } break; default: break; } } return (error); } static inline int EMPTY_TASKQ(taskq_t *tq) { #ifdef _KERNEL return (tq->tq_lowest_id == tq->tq_next_id); #else return (tq->tq_task.tqent_next == &tq->tq_task || tq->tq_active == 0); #endif } /* * Routine called by both ioctl and k-api. The consumer should * bundle the parameters into a kcf_req_params_t structure. A bunch * of macros are available in ops_impl.h for this bundling. They are: * * KCF_WRAP_DIGEST_OPS_PARAMS() * KCF_WRAP_MAC_OPS_PARAMS() * KCF_WRAP_ENCRYPT_OPS_PARAMS() * KCF_WRAP_DECRYPT_OPS_PARAMS() ... etc. * * It is the caller's responsibility to free the ctx argument when * appropriate. See the KCF_CONTEXT_COND_RELEASE macro for details. */ int kcf_submit_request(kcf_provider_desc_t *pd, crypto_ctx_t *ctx, crypto_call_req_t *crq, kcf_req_params_t *params, boolean_t cont) { int error = CRYPTO_SUCCESS; kcf_areq_node_t *areq; kcf_sreq_node_t *sreq; kcf_context_t *kcf_ctx; taskq_t *taskq = pd->pd_sched_info.ks_taskq; kcf_ctx = ctx ? (kcf_context_t *)ctx->cc_framework_private : NULL; /* Synchronous cases */ if (crq == NULL) { switch (pd->pd_prov_type) { case CRYPTO_SW_PROVIDER: error = common_submit_request(pd, ctx, params, KCF_RHNDL(KM_SLEEP)); break; case CRYPTO_HW_PROVIDER: /* * Special case for CRYPTO_SYNCHRONOUS providers that * never return a CRYPTO_QUEUED error. We skip any * request allocation and call the SPI directly. */ if ((pd->pd_flags & CRYPTO_SYNCHRONOUS) && EMPTY_TASKQ(taskq)) { KCF_PROV_IREFHOLD(pd); if (pd->pd_state == KCF_PROV_READY) { error = common_submit_request(pd, ctx, params, KCF_RHNDL(KM_SLEEP)); KCF_PROV_IREFRELE(pd); ASSERT(error != CRYPTO_QUEUED); break; } KCF_PROV_IREFRELE(pd); } sreq = kmem_cache_alloc(kcf_sreq_cache, KM_SLEEP); sreq->sn_state = REQ_ALLOCATED; sreq->sn_rv = CRYPTO_FAILED; sreq->sn_params = params; /* * Note that we do not need to hold the context * for synchronous case as the context will never * become invalid underneath us. We do not need to hold * the provider here either as the caller has a hold. */ sreq->sn_context = kcf_ctx; ASSERT(KCF_PROV_REFHELD(pd)); sreq->sn_provider = pd; ASSERT(taskq != NULL); /* * Call the SPI directly if the taskq is empty and the * provider is not busy, else dispatch to the taskq. * Calling directly is fine as this is the synchronous * case. This is unlike the asynchronous case where we * must always dispatch to the taskq. */ if (EMPTY_TASKQ(taskq) && pd->pd_state == KCF_PROV_READY) { process_req_hwp(sreq); } else { /* * We can not tell from taskq_dispatch() return * value if we exceeded maxalloc. Hence the * check here. Since we are allowed to wait in * the synchronous case, we wait for the taskq * to become empty. */ if (taskq->tq_nalloc >= crypto_taskq_maxalloc) { taskq_wait(taskq); } (void) taskq_dispatch(taskq, process_req_hwp, sreq, TQ_SLEEP); } /* * Wait for the notification to arrive, * if the operation is not done yet. * Bug# 4722589 will make the wait a cv_wait_sig(). */ mutex_enter(&sreq->sn_lock); while (sreq->sn_state < REQ_DONE) cv_wait(&sreq->sn_cv, &sreq->sn_lock); mutex_exit(&sreq->sn_lock); error = sreq->sn_rv; kmem_cache_free(kcf_sreq_cache, sreq); break; default: error = CRYPTO_FAILED; break; } } else { /* Asynchronous cases */ switch (pd->pd_prov_type) { case CRYPTO_SW_PROVIDER: if (!(crq->cr_flag & CRYPTO_ALWAYS_QUEUE)) { /* * This case has less overhead since there is * no switching of context. */ error = common_submit_request(pd, ctx, params, KCF_RHNDL(KM_NOSLEEP)); } else { /* * CRYPTO_ALWAYS_QUEUE is set. We need to * queue the request and return. */ areq = kcf_areqnode_alloc(pd, kcf_ctx, crq, params, cont); if (areq == NULL) error = CRYPTO_HOST_MEMORY; else { if (!(crq->cr_flag & CRYPTO_SKIP_REQID)) { /* * Set the request handle. This handle * is used for any crypto_cancel_req(9f) * calls from the consumer. We have to * do this before dispatching the * request. */ crq->cr_reqid = kcf_reqid_insert(areq); } error = kcf_disp_sw_request(areq); /* * There is an error processing this * request. Remove the handle and * release the request structure. */ if (error != CRYPTO_QUEUED) { if (!(crq->cr_flag & CRYPTO_SKIP_REQID)) kcf_reqid_delete(areq); KCF_AREQ_REFRELE(areq); } } } break; case CRYPTO_HW_PROVIDER: /* * We need to queue the request and return. */ areq = kcf_areqnode_alloc(pd, kcf_ctx, crq, params, cont); if (areq == NULL) { error = CRYPTO_HOST_MEMORY; goto done; } ASSERT(taskq != NULL); /* * We can not tell from taskq_dispatch() return * value if we exceeded maxalloc. Hence the check * here. */ if (taskq->tq_nalloc >= crypto_taskq_maxalloc) { error = CRYPTO_BUSY; KCF_AREQ_REFRELE(areq); goto done; } if (!(crq->cr_flag & CRYPTO_SKIP_REQID)) { /* * Set the request handle. This handle is used * for any crypto_cancel_req(9f) calls from the * consumer. We have to do this before dispatching * the request. */ crq->cr_reqid = kcf_reqid_insert(areq); } if (taskq_dispatch(taskq, process_req_hwp, areq, TQ_NOSLEEP) == TASKQID_INVALID) { error = CRYPTO_HOST_MEMORY; if (!(crq->cr_flag & CRYPTO_SKIP_REQID)) kcf_reqid_delete(areq); KCF_AREQ_REFRELE(areq); } else { error = CRYPTO_QUEUED; } break; default: error = CRYPTO_FAILED; break; } } done: return (error); } /* * We're done with this framework context, so free it. Note that freeing * framework context (kcf_context) frees the global context (crypto_ctx). * * The provider is responsible for freeing provider private context after a * final or single operation and resetting the cc_provider_private field * to NULL. It should do this before it notifies the framework of the * completion. We still need to call KCF_PROV_FREE_CONTEXT to handle cases * like crypto_cancel_ctx(9f). */ void kcf_free_context(kcf_context_t *kcf_ctx) { kcf_provider_desc_t *pd = kcf_ctx->kc_prov_desc; crypto_ctx_t *gctx = &kcf_ctx->kc_glbl_ctx; kcf_context_t *kcf_secondctx = kcf_ctx->kc_secondctx; /* Release the second context, if any */ if (kcf_secondctx != NULL) KCF_CONTEXT_REFRELE(kcf_secondctx); if (gctx->cc_provider_private != NULL) { mutex_enter(&pd->pd_lock); if (!KCF_IS_PROV_REMOVED(pd)) { /* * Increment the provider's internal refcnt so it * doesn't unregister from the framework while * we're calling the entry point. */ KCF_PROV_IREFHOLD(pd); mutex_exit(&pd->pd_lock); (void) KCF_PROV_FREE_CONTEXT(pd, gctx); KCF_PROV_IREFRELE(pd); } else { mutex_exit(&pd->pd_lock); } } /* kcf_ctx->kc_prov_desc has a hold on pd */ KCF_PROV_REFRELE(kcf_ctx->kc_prov_desc); /* check if this context is shared with a software provider */ if ((gctx->cc_flags & CRYPTO_INIT_OPSTATE) && kcf_ctx->kc_sw_prov_desc != NULL) { KCF_PROV_REFRELE(kcf_ctx->kc_sw_prov_desc); } kmem_cache_free(kcf_context_cache, kcf_ctx); } /* * Free the request after releasing all the holds. */ void kcf_free_req(kcf_areq_node_t *areq) { KCF_PROV_REFRELE(areq->an_provider); if (areq->an_context != NULL) KCF_CONTEXT_REFRELE(areq->an_context); if (areq->an_tried_plist != NULL) kcf_free_triedlist(areq->an_tried_plist); kmem_cache_free(kcf_areq_cache, areq); } /* * Utility routine to remove a request from the chain of requests * hanging off a context. */ static void kcf_removereq_in_ctxchain(kcf_context_t *ictx, kcf_areq_node_t *areq) { kcf_areq_node_t *cur, *prev; /* * Get context lock, search for areq in the chain and remove it. */ ASSERT(ictx != NULL); mutex_enter(&ictx->kc_in_use_lock); prev = cur = ictx->kc_req_chain_first; while (cur != NULL) { if (cur == areq) { if (prev == cur) { if ((ictx->kc_req_chain_first = cur->an_ctxchain_next) == NULL) ictx->kc_req_chain_last = NULL; } else { if (cur == ictx->kc_req_chain_last) ictx->kc_req_chain_last = prev; prev->an_ctxchain_next = cur->an_ctxchain_next; } break; } prev = cur; cur = cur->an_ctxchain_next; } mutex_exit(&ictx->kc_in_use_lock); } /* * Remove the specified node from the global software queue. * * The caller must hold the queue lock and request lock (an_lock). */ static void kcf_remove_node(kcf_areq_node_t *node) { kcf_areq_node_t *nextp = node->an_next; kcf_areq_node_t *prevp = node->an_prev; if (nextp != NULL) nextp->an_prev = prevp; else gswq->gs_last = prevp; if (prevp != NULL) prevp->an_next = nextp; else gswq->gs_first = nextp; node->an_state = REQ_CANCELED; } /* * Add the request node to the end of the global software queue. * * The caller should not hold the queue lock. Returns 0 if the * request is successfully queued. Returns CRYPTO_BUSY if the limit * on the number of jobs is exceeded. */ static int kcf_enqueue(kcf_areq_node_t *node) { kcf_areq_node_t *tnode; mutex_enter(&gswq->gs_lock); if (gswq->gs_njobs >= gswq->gs_maxjobs) { mutex_exit(&gswq->gs_lock); return (CRYPTO_BUSY); } if (gswq->gs_last == NULL) { gswq->gs_first = gswq->gs_last = node; } else { ASSERT(gswq->gs_last->an_next == NULL); tnode = gswq->gs_last; tnode->an_next = node; gswq->gs_last = node; node->an_prev = tnode; } gswq->gs_njobs++; /* an_lock not needed here as we hold gs_lock */ node->an_state = REQ_WAITING; mutex_exit(&gswq->gs_lock); return (0); } /* * kmem_cache_alloc constructor for sync request structure. */ -/* ARGSUSED */ static int kcf_sreq_cache_constructor(void *buf, void *cdrarg, int kmflags) { + (void) cdrarg, (void) kmflags; kcf_sreq_node_t *sreq = (kcf_sreq_node_t *)buf; sreq->sn_type = CRYPTO_SYNCH; cv_init(&sreq->sn_cv, NULL, CV_DEFAULT, NULL); mutex_init(&sreq->sn_lock, NULL, MUTEX_DEFAULT, NULL); return (0); } -/* ARGSUSED */ static void kcf_sreq_cache_destructor(void *buf, void *cdrarg) { + (void) cdrarg; kcf_sreq_node_t *sreq = (kcf_sreq_node_t *)buf; mutex_destroy(&sreq->sn_lock); cv_destroy(&sreq->sn_cv); } /* * kmem_cache_alloc constructor for async request structure. */ -/* ARGSUSED */ static int kcf_areq_cache_constructor(void *buf, void *cdrarg, int kmflags) { + (void) cdrarg, (void) kmflags; kcf_areq_node_t *areq = (kcf_areq_node_t *)buf; areq->an_type = CRYPTO_ASYNCH; areq->an_refcnt = 0; mutex_init(&areq->an_lock, NULL, MUTEX_DEFAULT, NULL); cv_init(&areq->an_done, NULL, CV_DEFAULT, NULL); cv_init(&areq->an_turn_cv, NULL, CV_DEFAULT, NULL); return (0); } -/* ARGSUSED */ static void kcf_areq_cache_destructor(void *buf, void *cdrarg) { + (void) cdrarg; kcf_areq_node_t *areq = (kcf_areq_node_t *)buf; ASSERT(areq->an_refcnt == 0); mutex_destroy(&areq->an_lock); cv_destroy(&areq->an_done); cv_destroy(&areq->an_turn_cv); } /* * kmem_cache_alloc constructor for kcf_context structure. */ -/* ARGSUSED */ static int kcf_context_cache_constructor(void *buf, void *cdrarg, int kmflags) { + (void) cdrarg, (void) kmflags; kcf_context_t *kctx = (kcf_context_t *)buf; kctx->kc_refcnt = 0; mutex_init(&kctx->kc_in_use_lock, NULL, MUTEX_DEFAULT, NULL); return (0); } -/* ARGSUSED */ static void kcf_context_cache_destructor(void *buf, void *cdrarg) { + (void) cdrarg; kcf_context_t *kctx = (kcf_context_t *)buf; ASSERT(kctx->kc_refcnt == 0); mutex_destroy(&kctx->kc_in_use_lock); } void kcf_sched_destroy(void) { int i; if (kcf_misc_kstat) kstat_delete(kcf_misc_kstat); if (kcfpool) { mutex_destroy(&kcfpool->kp_thread_lock); cv_destroy(&kcfpool->kp_nothr_cv); mutex_destroy(&kcfpool->kp_user_lock); cv_destroy(&kcfpool->kp_user_cv); kmem_free(kcfpool, sizeof (kcf_pool_t)); } for (i = 0; i < REQID_TABLES; i++) { if (kcf_reqid_table[i]) { mutex_destroy(&(kcf_reqid_table[i]->rt_lock)); kmem_free(kcf_reqid_table[i], sizeof (kcf_reqid_table_t)); } } if (gswq) { mutex_destroy(&gswq->gs_lock); cv_destroy(&gswq->gs_cv); kmem_free(gswq, sizeof (kcf_global_swq_t)); } if (kcf_context_cache) kmem_cache_destroy(kcf_context_cache); if (kcf_areq_cache) kmem_cache_destroy(kcf_areq_cache); if (kcf_sreq_cache) kmem_cache_destroy(kcf_sreq_cache); mutex_destroy(&ntfy_list_lock); cv_destroy(&ntfy_list_cv); } /* * Creates and initializes all the structures needed by the framework. */ void kcf_sched_init(void) { int i; kcf_reqid_table_t *rt; /* * Create all the kmem caches needed by the framework. We set the * align argument to 64, to get a slab aligned to 64-byte as well as * have the objects (cache_chunksize) to be a 64-byte multiple. * This helps to avoid false sharing as this is the size of the * CPU cache line. */ kcf_sreq_cache = kmem_cache_create("kcf_sreq_cache", sizeof (struct kcf_sreq_node), 64, kcf_sreq_cache_constructor, kcf_sreq_cache_destructor, NULL, NULL, NULL, 0); kcf_areq_cache = kmem_cache_create("kcf_areq_cache", sizeof (struct kcf_areq_node), 64, kcf_areq_cache_constructor, kcf_areq_cache_destructor, NULL, NULL, NULL, 0); kcf_context_cache = kmem_cache_create("kcf_context_cache", sizeof (struct kcf_context), 64, kcf_context_cache_constructor, kcf_context_cache_destructor, NULL, NULL, NULL, 0); gswq = kmem_alloc(sizeof (kcf_global_swq_t), KM_SLEEP); mutex_init(&gswq->gs_lock, NULL, MUTEX_DEFAULT, NULL); cv_init(&gswq->gs_cv, NULL, CV_DEFAULT, NULL); gswq->gs_njobs = 0; gswq->gs_maxjobs = kcf_maxthreads * crypto_taskq_maxalloc; gswq->gs_first = gswq->gs_last = NULL; /* Initialize the global reqid table */ for (i = 0; i < REQID_TABLES; i++) { rt = kmem_zalloc(sizeof (kcf_reqid_table_t), KM_SLEEP); kcf_reqid_table[i] = rt; mutex_init(&rt->rt_lock, NULL, MUTEX_DEFAULT, NULL); rt->rt_curid = i; } /* Allocate and initialize the thread pool */ kcfpool_alloc(); /* Initialize the event notification list variables */ mutex_init(&ntfy_list_lock, NULL, MUTEX_DEFAULT, NULL); cv_init(&ntfy_list_cv, NULL, CV_DEFAULT, NULL); /* Create the kcf kstat */ kcf_misc_kstat = kstat_create("kcf", 0, "framework_stats", "crypto", KSTAT_TYPE_NAMED, sizeof (kcf_stats_t) / sizeof (kstat_named_t), KSTAT_FLAG_VIRTUAL); if (kcf_misc_kstat != NULL) { kcf_misc_kstat->ks_data = &kcf_ksdata; kcf_misc_kstat->ks_update = kcf_misc_kstat_update; kstat_install(kcf_misc_kstat); } } /* * Signal the waiting sync client. */ void kcf_sop_done(kcf_sreq_node_t *sreq, int error) { mutex_enter(&sreq->sn_lock); sreq->sn_state = REQ_DONE; sreq->sn_rv = error; cv_signal(&sreq->sn_cv); mutex_exit(&sreq->sn_lock); } /* * Callback the async client with the operation status. * We free the async request node and possibly the context. * We also handle any chain of requests hanging off of * the context. */ void kcf_aop_done(kcf_areq_node_t *areq, int error) { kcf_op_type_t optype; boolean_t skip_notify = B_FALSE; kcf_context_t *ictx; kcf_areq_node_t *nextreq; /* * Handle recoverable errors. This has to be done first * before doing anything else in this routine so that * we do not change the state of the request. */ if (error != CRYPTO_SUCCESS && IS_RECOVERABLE(error)) { /* * We try another provider, if one is available. Else * we continue with the failure notification to the * client. */ if (kcf_resubmit_request(areq) == CRYPTO_QUEUED) return; } mutex_enter(&areq->an_lock); areq->an_state = REQ_DONE; mutex_exit(&areq->an_lock); optype = (&areq->an_params)->rp_optype; if ((ictx = areq->an_context) != NULL) { /* * A request after it is removed from the request * queue, still stays on a chain of requests hanging * of its context structure. It needs to be removed * from this chain at this point. */ mutex_enter(&ictx->kc_in_use_lock); nextreq = areq->an_ctxchain_next; if (nextreq != NULL) { mutex_enter(&nextreq->an_lock); nextreq->an_is_my_turn = B_TRUE; cv_signal(&nextreq->an_turn_cv); mutex_exit(&nextreq->an_lock); } ictx->kc_req_chain_first = nextreq; if (nextreq == NULL) ictx->kc_req_chain_last = NULL; mutex_exit(&ictx->kc_in_use_lock); if (IS_SINGLE_OP(optype) || IS_FINAL_OP(optype)) { ASSERT(nextreq == NULL); KCF_CONTEXT_REFRELE(ictx); } else if (error != CRYPTO_SUCCESS && IS_INIT_OP(optype)) { /* * NOTE - We do not release the context in case of update * operations. We require the consumer to free it explicitly, * in case it wants to abandon an update operation. This is done * as there may be mechanisms in ECB mode that can continue * even if an operation on a block fails. */ KCF_CONTEXT_REFRELE(ictx); } } /* Deal with the internal continuation to this request first */ if (areq->an_isdual) { kcf_dual_req_t *next_arg; next_arg = (kcf_dual_req_t *)areq->an_reqarg.cr_callback_arg; next_arg->kr_areq = areq; KCF_AREQ_REFHOLD(areq); areq->an_isdual = B_FALSE; NOTIFY_CLIENT(areq, error); return; } /* * If CRYPTO_NOTIFY_OPDONE flag is set, we should notify * always. If this flag is clear, we skip the notification * provided there are no errors. We check this flag for only * init or update operations. It is ignored for single, final or * atomic operations. */ skip_notify = (IS_UPDATE_OP(optype) || IS_INIT_OP(optype)) && (!(areq->an_reqarg.cr_flag & CRYPTO_NOTIFY_OPDONE)) && (error == CRYPTO_SUCCESS); if (!skip_notify) { NOTIFY_CLIENT(areq, error); } if (!(areq->an_reqarg.cr_flag & CRYPTO_SKIP_REQID)) kcf_reqid_delete(areq); KCF_AREQ_REFRELE(areq); } /* * Allocate the thread pool and initialize all the fields. */ static void kcfpool_alloc() { kcfpool = kmem_alloc(sizeof (kcf_pool_t), KM_SLEEP); kcfpool->kp_threads = kcfpool->kp_idlethreads = 0; kcfpool->kp_blockedthreads = 0; kcfpool->kp_signal_create_thread = B_FALSE; kcfpool->kp_nthrs = 0; kcfpool->kp_user_waiting = B_FALSE; mutex_init(&kcfpool->kp_thread_lock, NULL, MUTEX_DEFAULT, NULL); cv_init(&kcfpool->kp_nothr_cv, NULL, CV_DEFAULT, NULL); mutex_init(&kcfpool->kp_user_lock, NULL, MUTEX_DEFAULT, NULL); cv_init(&kcfpool->kp_user_cv, NULL, CV_DEFAULT, NULL); kcf_idlethr_timeout = KCF_DEFAULT_THRTIMEOUT; } /* * Insert the async request in the hash table after assigning it * an ID. Returns the ID. * * The ID is used by the caller to pass as an argument to a * cancel_req() routine later. */ static crypto_req_id_t kcf_reqid_insert(kcf_areq_node_t *areq) { int indx; crypto_req_id_t id; kcf_areq_node_t *headp; kcf_reqid_table_t *rt; rt = kcf_reqid_table[CPU_SEQID_UNSTABLE & REQID_TABLE_MASK]; mutex_enter(&rt->rt_lock); rt->rt_curid = id = (rt->rt_curid - REQID_COUNTER_LOW) | REQID_COUNTER_HIGH; SET_REQID(areq, id); indx = REQID_HASH(id); headp = areq->an_idnext = rt->rt_idhash[indx]; areq->an_idprev = NULL; if (headp != NULL) headp->an_idprev = areq; rt->rt_idhash[indx] = areq; mutex_exit(&rt->rt_lock); return (id); } /* * Delete the async request from the hash table. */ static void kcf_reqid_delete(kcf_areq_node_t *areq) { int indx; kcf_areq_node_t *nextp, *prevp; crypto_req_id_t id = GET_REQID(areq); kcf_reqid_table_t *rt; rt = kcf_reqid_table[id & REQID_TABLE_MASK]; indx = REQID_HASH(id); mutex_enter(&rt->rt_lock); nextp = areq->an_idnext; prevp = areq->an_idprev; if (nextp != NULL) nextp->an_idprev = prevp; if (prevp != NULL) prevp->an_idnext = nextp; else rt->rt_idhash[indx] = nextp; SET_REQID(areq, 0); cv_broadcast(&areq->an_done); mutex_exit(&rt->rt_lock); } /* * Cancel a single asynchronous request. * * We guarantee that no problems will result from calling * crypto_cancel_req() for a request which is either running, or * has already completed. We remove the request from any queues * if it is possible. We wait for request completion if the * request is dispatched to a provider. * * Calling context: * Can be called from user context only. * * NOTE: We acquire the following locks in this routine (in order): * - rt_lock (kcf_reqid_table_t) * - gswq->gs_lock * - areq->an_lock * - ictx->kc_in_use_lock (from kcf_removereq_in_ctxchain()) * * This locking order MUST be maintained in code every where else. */ void crypto_cancel_req(crypto_req_id_t id) { int indx; kcf_areq_node_t *areq; kcf_provider_desc_t *pd; kcf_context_t *ictx; kcf_reqid_table_t *rt; rt = kcf_reqid_table[id & REQID_TABLE_MASK]; indx = REQID_HASH(id); mutex_enter(&rt->rt_lock); for (areq = rt->rt_idhash[indx]; areq; areq = areq->an_idnext) { if (GET_REQID(areq) == id) { /* * We found the request. It is either still waiting * in the framework queues or running at the provider. */ pd = areq->an_provider; ASSERT(pd != NULL); switch (pd->pd_prov_type) { case CRYPTO_SW_PROVIDER: mutex_enter(&gswq->gs_lock); mutex_enter(&areq->an_lock); /* This request can be safely canceled. */ if (areq->an_state <= REQ_WAITING) { /* Remove from gswq, global software queue. */ kcf_remove_node(areq); if ((ictx = areq->an_context) != NULL) kcf_removereq_in_ctxchain(ictx, areq); mutex_exit(&areq->an_lock); mutex_exit(&gswq->gs_lock); mutex_exit(&rt->rt_lock); /* Remove areq from hash table and free it. */ kcf_reqid_delete(areq); KCF_AREQ_REFRELE(areq); return; } mutex_exit(&areq->an_lock); mutex_exit(&gswq->gs_lock); break; case CRYPTO_HW_PROVIDER: /* * There is no interface to remove an entry * once it is on the taskq. So, we do not do * anything for a hardware provider. */ break; default: break; } /* * The request is running. Wait for the request completion * to notify us. */ KCF_AREQ_REFHOLD(areq); while (GET_REQID(areq) == id) cv_wait(&areq->an_done, &rt->rt_lock); KCF_AREQ_REFRELE(areq); break; } } mutex_exit(&rt->rt_lock); } /* * Cancel all asynchronous requests associated with the * passed in crypto context and free it. * * A client SHOULD NOT call this routine after calling a crypto_*_final * routine. This routine is called only during intermediate operations. * The client should not use the crypto context after this function returns * since we destroy it. * * Calling context: * Can be called from user context only. */ void crypto_cancel_ctx(crypto_context_t ctx) { kcf_context_t *ictx; kcf_areq_node_t *areq; if (ctx == NULL) return; ictx = (kcf_context_t *)((crypto_ctx_t *)ctx)->cc_framework_private; mutex_enter(&ictx->kc_in_use_lock); /* Walk the chain and cancel each request */ while ((areq = ictx->kc_req_chain_first) != NULL) { /* * We have to drop the lock here as we may have * to wait for request completion. We hold the * request before dropping the lock though, so that it * won't be freed underneath us. */ KCF_AREQ_REFHOLD(areq); mutex_exit(&ictx->kc_in_use_lock); crypto_cancel_req(GET_REQID(areq)); KCF_AREQ_REFRELE(areq); mutex_enter(&ictx->kc_in_use_lock); } mutex_exit(&ictx->kc_in_use_lock); KCF_CONTEXT_REFRELE(ictx); } /* * Update kstats. */ static int kcf_misc_kstat_update(kstat_t *ksp, int rw) { uint_t tcnt; kcf_stats_t *ks_data; if (rw == KSTAT_WRITE) return (EACCES); ks_data = ksp->ks_data; ks_data->ks_thrs_in_pool.value.ui32 = kcfpool->kp_threads; /* * The failover thread is counted in kp_idlethreads in * some corner cases. This is done to avoid doing more checks * when submitting a request. We account for those cases below. */ if ((tcnt = kcfpool->kp_idlethreads) == (kcfpool->kp_threads + 1)) tcnt--; ks_data->ks_idle_thrs.value.ui32 = tcnt; ks_data->ks_minthrs.value.ui32 = kcf_minthreads; ks_data->ks_maxthrs.value.ui32 = kcf_maxthreads; ks_data->ks_swq_njobs.value.ui32 = gswq->gs_njobs; ks_data->ks_swq_maxjobs.value.ui32 = gswq->gs_maxjobs; ks_data->ks_taskq_threads.value.ui32 = crypto_taskq_threads; ks_data->ks_taskq_minalloc.value.ui32 = crypto_taskq_minalloc; ks_data->ks_taskq_maxalloc.value.ui32 = crypto_taskq_maxalloc; return (0); } /* * Allocate and initialize a kcf_dual_req, used for saving the arguments of * a dual operation or an atomic operation that has to be internally * simulated with multiple single steps. * crq determines the memory allocation flags. */ kcf_dual_req_t * kcf_alloc_req(crypto_call_req_t *crq) { kcf_dual_req_t *kcr; kcr = kmem_alloc(sizeof (kcf_dual_req_t), KCF_KMFLAG(crq)); if (kcr == NULL) return (NULL); /* Copy the whole crypto_call_req struct, as it isn't persistent */ if (crq != NULL) kcr->kr_callreq = *crq; else bzero(&(kcr->kr_callreq), sizeof (crypto_call_req_t)); kcr->kr_areq = NULL; kcr->kr_saveoffset = 0; kcr->kr_savelen = 0; return (kcr); } /* * Callback routine for the next part of a simulated dual part. * Schedules the next step. * * This routine can be called from interrupt context. */ void kcf_next_req(void *next_req_arg, int status) { kcf_dual_req_t *next_req = (kcf_dual_req_t *)next_req_arg; kcf_req_params_t *params = &(next_req->kr_params); kcf_areq_node_t *areq = next_req->kr_areq; int error = status; kcf_provider_desc_t *pd = NULL; crypto_dual_data_t *ct = NULL; /* Stop the processing if an error occurred at this step */ if (error != CRYPTO_SUCCESS) { out: areq->an_reqarg = next_req->kr_callreq; KCF_AREQ_REFRELE(areq); kmem_free(next_req, sizeof (kcf_dual_req_t)); areq->an_isdual = B_FALSE; kcf_aop_done(areq, error); return; } switch (params->rp_opgrp) { case KCF_OG_MAC: { /* * The next req is submitted with the same reqid as the * first part. The consumer only got back that reqid, and * should still be able to cancel the operation during its * second step. */ kcf_mac_ops_params_t *mops = &(params->rp_u.mac_params); crypto_ctx_template_t mac_tmpl; kcf_mech_entry_t *me; ct = (crypto_dual_data_t *)mops->mo_data; mac_tmpl = (crypto_ctx_template_t)mops->mo_templ; /* No expected recoverable failures, so no retry list */ pd = kcf_get_mech_provider(mops->mo_framework_mechtype, &me, &error, NULL, CRYPTO_FG_MAC_ATOMIC, (areq->an_reqarg.cr_flag & CRYPTO_RESTRICTED), ct->dd_len2); if (pd == NULL) { error = CRYPTO_MECH_NOT_SUPPORTED; goto out; } /* Validate the MAC context template here */ if ((pd->pd_prov_type == CRYPTO_SW_PROVIDER) && (mac_tmpl != NULL)) { kcf_ctx_template_t *ctx_mac_tmpl; ctx_mac_tmpl = (kcf_ctx_template_t *)mac_tmpl; if (ctx_mac_tmpl->ct_generation != me->me_gen_swprov) { KCF_PROV_REFRELE(pd); error = CRYPTO_OLD_CTX_TEMPLATE; goto out; } mops->mo_templ = ctx_mac_tmpl->ct_prov_tmpl; } break; } case KCF_OG_DECRYPT: { kcf_decrypt_ops_params_t *dcrops = &(params->rp_u.decrypt_params); ct = (crypto_dual_data_t *)dcrops->dop_ciphertext; /* No expected recoverable failures, so no retry list */ pd = kcf_get_mech_provider(dcrops->dop_framework_mechtype, NULL, &error, NULL, CRYPTO_FG_DECRYPT_ATOMIC, (areq->an_reqarg.cr_flag & CRYPTO_RESTRICTED), ct->dd_len1); if (pd == NULL) { error = CRYPTO_MECH_NOT_SUPPORTED; goto out; } break; } default: break; } /* The second step uses len2 and offset2 of the dual_data */ next_req->kr_saveoffset = ct->dd_offset1; next_req->kr_savelen = ct->dd_len1; ct->dd_offset1 = ct->dd_offset2; ct->dd_len1 = ct->dd_len2; /* preserve if the caller is restricted */ if (areq->an_reqarg.cr_flag & CRYPTO_RESTRICTED) { areq->an_reqarg.cr_flag = CRYPTO_RESTRICTED; } else { areq->an_reqarg.cr_flag = 0; } areq->an_reqarg.cr_callback_func = kcf_last_req; areq->an_reqarg.cr_callback_arg = next_req; areq->an_isdual = B_TRUE; /* * We would like to call kcf_submit_request() here. But, * that is not possible as that routine allocates a new * kcf_areq_node_t request structure, while we need to * reuse the existing request structure. */ switch (pd->pd_prov_type) { case CRYPTO_SW_PROVIDER: error = common_submit_request(pd, NULL, params, KCF_RHNDL(KM_NOSLEEP)); break; case CRYPTO_HW_PROVIDER: { kcf_provider_desc_t *old_pd; taskq_t *taskq = pd->pd_sched_info.ks_taskq; /* * Set the params for the second step in the * dual-ops. */ areq->an_params = *params; old_pd = areq->an_provider; KCF_PROV_REFRELE(old_pd); KCF_PROV_REFHOLD(pd); areq->an_provider = pd; /* * Note that we have to do a taskq_dispatch() * here as we may be in interrupt context. */ if (taskq_dispatch(taskq, process_req_hwp, areq, TQ_NOSLEEP) == (taskqid_t)0) { error = CRYPTO_HOST_MEMORY; } else { error = CRYPTO_QUEUED; } break; } default: break; } /* * We have to release the holds on the request and the provider * in all cases. */ KCF_AREQ_REFRELE(areq); KCF_PROV_REFRELE(pd); if (error != CRYPTO_QUEUED) { /* restore, clean up, and invoke the client's callback */ ct->dd_offset1 = next_req->kr_saveoffset; ct->dd_len1 = next_req->kr_savelen; areq->an_reqarg = next_req->kr_callreq; kmem_free(next_req, sizeof (kcf_dual_req_t)); areq->an_isdual = B_FALSE; kcf_aop_done(areq, error); } } /* * Last part of an emulated dual operation. * Clean up and restore ... */ void kcf_last_req(void *last_req_arg, int status) { kcf_dual_req_t *last_req = (kcf_dual_req_t *)last_req_arg; kcf_req_params_t *params = &(last_req->kr_params); kcf_areq_node_t *areq = last_req->kr_areq; crypto_dual_data_t *ct = NULL; switch (params->rp_opgrp) { case KCF_OG_MAC: { kcf_mac_ops_params_t *mops = &(params->rp_u.mac_params); ct = (crypto_dual_data_t *)mops->mo_data; break; } case KCF_OG_DECRYPT: { kcf_decrypt_ops_params_t *dcrops = &(params->rp_u.decrypt_params); ct = (crypto_dual_data_t *)dcrops->dop_ciphertext; break; } default: { panic("invalid kcf_op_group_t %d", (int)params->rp_opgrp); return; } } ct->dd_offset1 = last_req->kr_saveoffset; ct->dd_len1 = last_req->kr_savelen; /* The submitter used kcf_last_req as its callback */ if (areq == NULL) { crypto_call_req_t *cr = &last_req->kr_callreq; (*(cr->cr_callback_func))(cr->cr_callback_arg, status); kmem_free(last_req, sizeof (kcf_dual_req_t)); return; } areq->an_reqarg = last_req->kr_callreq; KCF_AREQ_REFRELE(areq); kmem_free(last_req, sizeof (kcf_dual_req_t)); areq->an_isdual = B_FALSE; kcf_aop_done(areq, status); } diff --git a/module/icp/io/aes.c b/module/icp/io/aes.c index c47c7567b900..d50e3bdc15f1 100644 --- a/module/icp/io/aes.c +++ b/module/icp/io/aes.c @@ -1,1457 +1,1457 @@ /* * CDDL HEADER START * * The contents of this file are subject to the terms of the * Common Development and Distribution License (the "License"). * You may not use this file except in compliance with the License. * * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE * or http://www.opensolaris.org/os/licensing. * See the License for the specific language governing permissions * and limitations under the License. * * When distributing Covered Code, include this CDDL HEADER in each * file and include the License file at usr/src/OPENSOLARIS.LICENSE. * If applicable, add the following below this CDDL HEADER, with the * fields enclosed by brackets "[]" replaced with your own identifying * information: Portions Copyright [yyyy] [name of copyright owner] * * CDDL HEADER END */ /* * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved. */ /* * AES provider for the Kernel Cryptographic Framework (KCF) */ #include #include #include #include #include #include #include #define _AES_IMPL #include #include #define CRYPTO_PROVIDER_NAME "aes" extern struct mod_ops mod_cryptoops; /* * Module linkage information for the kernel. */ static struct modlcrypto modlcrypto = { &mod_cryptoops, "AES Kernel SW Provider" }; static struct modlinkage modlinkage = { MODREV_1, { (void *)&modlcrypto, NULL } }; /* * Mechanism info structure passed to KCF during registration. */ static crypto_mech_info_t aes_mech_info_tab[] = { /* AES_ECB */ {SUN_CKM_AES_ECB, AES_ECB_MECH_INFO_TYPE, CRYPTO_FG_ENCRYPT | CRYPTO_FG_ENCRYPT_ATOMIC | CRYPTO_FG_DECRYPT | CRYPTO_FG_DECRYPT_ATOMIC, AES_MIN_KEY_BYTES, AES_MAX_KEY_BYTES, CRYPTO_KEYSIZE_UNIT_IN_BYTES}, /* AES_CBC */ {SUN_CKM_AES_CBC, AES_CBC_MECH_INFO_TYPE, CRYPTO_FG_ENCRYPT | CRYPTO_FG_ENCRYPT_ATOMIC | CRYPTO_FG_DECRYPT | CRYPTO_FG_DECRYPT_ATOMIC, AES_MIN_KEY_BYTES, AES_MAX_KEY_BYTES, CRYPTO_KEYSIZE_UNIT_IN_BYTES}, /* AES_CTR */ {SUN_CKM_AES_CTR, AES_CTR_MECH_INFO_TYPE, CRYPTO_FG_ENCRYPT | CRYPTO_FG_ENCRYPT_ATOMIC | CRYPTO_FG_DECRYPT | CRYPTO_FG_DECRYPT_ATOMIC, AES_MIN_KEY_BYTES, AES_MAX_KEY_BYTES, CRYPTO_KEYSIZE_UNIT_IN_BYTES}, /* AES_CCM */ {SUN_CKM_AES_CCM, AES_CCM_MECH_INFO_TYPE, CRYPTO_FG_ENCRYPT | CRYPTO_FG_ENCRYPT_ATOMIC | CRYPTO_FG_DECRYPT | CRYPTO_FG_DECRYPT_ATOMIC, AES_MIN_KEY_BYTES, AES_MAX_KEY_BYTES, CRYPTO_KEYSIZE_UNIT_IN_BYTES}, /* AES_GCM */ {SUN_CKM_AES_GCM, AES_GCM_MECH_INFO_TYPE, CRYPTO_FG_ENCRYPT | CRYPTO_FG_ENCRYPT_ATOMIC | CRYPTO_FG_DECRYPT | CRYPTO_FG_DECRYPT_ATOMIC, AES_MIN_KEY_BYTES, AES_MAX_KEY_BYTES, CRYPTO_KEYSIZE_UNIT_IN_BYTES}, /* AES_GMAC */ {SUN_CKM_AES_GMAC, AES_GMAC_MECH_INFO_TYPE, CRYPTO_FG_ENCRYPT | CRYPTO_FG_ENCRYPT_ATOMIC | CRYPTO_FG_DECRYPT | CRYPTO_FG_DECRYPT_ATOMIC | CRYPTO_FG_MAC | CRYPTO_FG_MAC_ATOMIC | CRYPTO_FG_SIGN | CRYPTO_FG_SIGN_ATOMIC | CRYPTO_FG_VERIFY | CRYPTO_FG_VERIFY_ATOMIC, AES_MIN_KEY_BYTES, AES_MAX_KEY_BYTES, CRYPTO_KEYSIZE_UNIT_IN_BYTES} }; static void aes_provider_status(crypto_provider_handle_t, uint_t *); static crypto_control_ops_t aes_control_ops = { aes_provider_status }; static int aes_encrypt_init(crypto_ctx_t *, crypto_mechanism_t *, crypto_key_t *, crypto_spi_ctx_template_t, crypto_req_handle_t); static int aes_decrypt_init(crypto_ctx_t *, crypto_mechanism_t *, crypto_key_t *, crypto_spi_ctx_template_t, crypto_req_handle_t); static int aes_common_init(crypto_ctx_t *, crypto_mechanism_t *, crypto_key_t *, crypto_spi_ctx_template_t, crypto_req_handle_t, boolean_t); static int aes_common_init_ctx(aes_ctx_t *, crypto_spi_ctx_template_t *, crypto_mechanism_t *, crypto_key_t *, int, boolean_t); static int aes_encrypt_final(crypto_ctx_t *, crypto_data_t *, crypto_req_handle_t); static int aes_decrypt_final(crypto_ctx_t *, crypto_data_t *, crypto_req_handle_t); static int aes_encrypt(crypto_ctx_t *, crypto_data_t *, crypto_data_t *, crypto_req_handle_t); static int aes_encrypt_update(crypto_ctx_t *, crypto_data_t *, crypto_data_t *, crypto_req_handle_t); static int aes_encrypt_atomic(crypto_provider_handle_t, crypto_session_id_t, crypto_mechanism_t *, crypto_key_t *, crypto_data_t *, crypto_data_t *, crypto_spi_ctx_template_t, crypto_req_handle_t); static int aes_decrypt(crypto_ctx_t *, crypto_data_t *, crypto_data_t *, crypto_req_handle_t); static int aes_decrypt_update(crypto_ctx_t *, crypto_data_t *, crypto_data_t *, crypto_req_handle_t); static int aes_decrypt_atomic(crypto_provider_handle_t, crypto_session_id_t, crypto_mechanism_t *, crypto_key_t *, crypto_data_t *, crypto_data_t *, crypto_spi_ctx_template_t, crypto_req_handle_t); static crypto_cipher_ops_t aes_cipher_ops = { .encrypt_init = aes_encrypt_init, .encrypt = aes_encrypt, .encrypt_update = aes_encrypt_update, .encrypt_final = aes_encrypt_final, .encrypt_atomic = aes_encrypt_atomic, .decrypt_init = aes_decrypt_init, .decrypt = aes_decrypt, .decrypt_update = aes_decrypt_update, .decrypt_final = aes_decrypt_final, .decrypt_atomic = aes_decrypt_atomic }; static int aes_mac_atomic(crypto_provider_handle_t, crypto_session_id_t, crypto_mechanism_t *, crypto_key_t *, crypto_data_t *, crypto_data_t *, crypto_spi_ctx_template_t, crypto_req_handle_t); static int aes_mac_verify_atomic(crypto_provider_handle_t, crypto_session_id_t, crypto_mechanism_t *, crypto_key_t *, crypto_data_t *, crypto_data_t *, crypto_spi_ctx_template_t, crypto_req_handle_t); static crypto_mac_ops_t aes_mac_ops = { .mac_init = NULL, .mac = NULL, .mac_update = NULL, .mac_final = NULL, .mac_atomic = aes_mac_atomic, .mac_verify_atomic = aes_mac_verify_atomic }; static int aes_create_ctx_template(crypto_provider_handle_t, crypto_mechanism_t *, crypto_key_t *, crypto_spi_ctx_template_t *, size_t *, crypto_req_handle_t); static int aes_free_context(crypto_ctx_t *); static crypto_ctx_ops_t aes_ctx_ops = { .create_ctx_template = aes_create_ctx_template, .free_context = aes_free_context }; static crypto_ops_t aes_crypto_ops = {{{{{ &aes_control_ops, NULL, &aes_cipher_ops, &aes_mac_ops, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, &aes_ctx_ops }}}}}; static crypto_provider_info_t aes_prov_info = {{{{ CRYPTO_SPI_VERSION_1, "AES Software Provider", CRYPTO_SW_PROVIDER, NULL, &aes_crypto_ops, sizeof (aes_mech_info_tab)/sizeof (crypto_mech_info_t), aes_mech_info_tab }}}}; static crypto_kcf_provider_handle_t aes_prov_handle = 0; static crypto_data_t null_crypto_data = { CRYPTO_DATA_RAW }; int aes_mod_init(void) { int ret; /* Determine the fastest available implementation. */ aes_impl_init(); gcm_impl_init(); if ((ret = mod_install(&modlinkage)) != 0) return (ret); /* Register with KCF. If the registration fails, remove the module. */ if (crypto_register_provider(&aes_prov_info, &aes_prov_handle)) { (void) mod_remove(&modlinkage); return (EACCES); } return (0); } int aes_mod_fini(void) { /* Unregister from KCF if module is registered */ if (aes_prov_handle != 0) { if (crypto_unregister_provider(aes_prov_handle)) return (EBUSY); aes_prov_handle = 0; } return (mod_remove(&modlinkage)); } static int aes_check_mech_param(crypto_mechanism_t *mechanism, aes_ctx_t **ctx, int kmflag) { void *p = NULL; boolean_t param_required = B_TRUE; size_t param_len; void *(*alloc_fun)(int); int rv = CRYPTO_SUCCESS; switch (mechanism->cm_type) { case AES_ECB_MECH_INFO_TYPE: param_required = B_FALSE; alloc_fun = ecb_alloc_ctx; break; case AES_CBC_MECH_INFO_TYPE: param_len = AES_BLOCK_LEN; alloc_fun = cbc_alloc_ctx; break; case AES_CTR_MECH_INFO_TYPE: param_len = sizeof (CK_AES_CTR_PARAMS); alloc_fun = ctr_alloc_ctx; break; case AES_CCM_MECH_INFO_TYPE: param_len = sizeof (CK_AES_CCM_PARAMS); alloc_fun = ccm_alloc_ctx; break; case AES_GCM_MECH_INFO_TYPE: param_len = sizeof (CK_AES_GCM_PARAMS); alloc_fun = gcm_alloc_ctx; break; case AES_GMAC_MECH_INFO_TYPE: param_len = sizeof (CK_AES_GMAC_PARAMS); alloc_fun = gmac_alloc_ctx; break; default: rv = CRYPTO_MECHANISM_INVALID; return (rv); } if (param_required && mechanism->cm_param != NULL && mechanism->cm_param_len != param_len) { rv = CRYPTO_MECHANISM_PARAM_INVALID; } if (ctx != NULL) { p = (alloc_fun)(kmflag); *ctx = p; } return (rv); } /* * Initialize key schedules for AES */ static int init_keysched(crypto_key_t *key, void *newbie) { /* * Only keys by value are supported by this module. */ switch (key->ck_format) { case CRYPTO_KEY_RAW: if (key->ck_length < AES_MINBITS || key->ck_length > AES_MAXBITS) { return (CRYPTO_KEY_SIZE_RANGE); } /* key length must be either 128, 192, or 256 */ if ((key->ck_length & 63) != 0) return (CRYPTO_KEY_SIZE_RANGE); break; default: return (CRYPTO_KEY_TYPE_INCONSISTENT); } aes_init_keysched(key->ck_data, key->ck_length, newbie); return (CRYPTO_SUCCESS); } /* * KCF software provider control entry points. */ -/* ARGSUSED */ static void aes_provider_status(crypto_provider_handle_t provider, uint_t *status) { + (void) provider; *status = CRYPTO_PROVIDER_READY; } static int aes_encrypt_init(crypto_ctx_t *ctx, crypto_mechanism_t *mechanism, crypto_key_t *key, crypto_spi_ctx_template_t template, crypto_req_handle_t req) { return (aes_common_init(ctx, mechanism, key, template, req, B_TRUE)); } static int aes_decrypt_init(crypto_ctx_t *ctx, crypto_mechanism_t *mechanism, crypto_key_t *key, crypto_spi_ctx_template_t template, crypto_req_handle_t req) { return (aes_common_init(ctx, mechanism, key, template, req, B_FALSE)); } /* * KCF software provider encrypt entry points. */ static int aes_common_init(crypto_ctx_t *ctx, crypto_mechanism_t *mechanism, crypto_key_t *key, crypto_spi_ctx_template_t template, crypto_req_handle_t req, boolean_t is_encrypt_init) { aes_ctx_t *aes_ctx; int rv; int kmflag; /* * Only keys by value are supported by this module. */ if (key->ck_format != CRYPTO_KEY_RAW) { return (CRYPTO_KEY_TYPE_INCONSISTENT); } kmflag = crypto_kmflag(req); if ((rv = aes_check_mech_param(mechanism, &aes_ctx, kmflag)) != CRYPTO_SUCCESS) return (rv); rv = aes_common_init_ctx(aes_ctx, template, mechanism, key, kmflag, is_encrypt_init); if (rv != CRYPTO_SUCCESS) { crypto_free_mode_ctx(aes_ctx); return (rv); } ctx->cc_provider_private = aes_ctx; return (CRYPTO_SUCCESS); } static void aes_copy_block64(uint8_t *in, uint64_t *out) { if (IS_P2ALIGNED(in, sizeof (uint64_t))) { /* LINTED: pointer alignment */ out[0] = *(uint64_t *)&in[0]; /* LINTED: pointer alignment */ out[1] = *(uint64_t *)&in[8]; } else { uint8_t *iv8 = (uint8_t *)&out[0]; AES_COPY_BLOCK(in, iv8); } } static int aes_encrypt(crypto_ctx_t *ctx, crypto_data_t *plaintext, crypto_data_t *ciphertext, crypto_req_handle_t req) { int ret = CRYPTO_FAILED; aes_ctx_t *aes_ctx; size_t saved_length, saved_offset, length_needed; ASSERT(ctx->cc_provider_private != NULL); aes_ctx = ctx->cc_provider_private; /* * For block ciphers, plaintext must be a multiple of AES block size. * This test is only valid for ciphers whose blocksize is a power of 2. */ if (((aes_ctx->ac_flags & (CTR_MODE|CCM_MODE|GCM_MODE|GMAC_MODE)) == 0) && (plaintext->cd_length & (AES_BLOCK_LEN - 1)) != 0) return (CRYPTO_DATA_LEN_RANGE); ASSERT(ciphertext != NULL); /* * We need to just return the length needed to store the output. * We should not destroy the context for the following case. */ switch (aes_ctx->ac_flags & (CCM_MODE|GCM_MODE|GMAC_MODE)) { case CCM_MODE: length_needed = plaintext->cd_length + aes_ctx->ac_mac_len; break; case GCM_MODE: length_needed = plaintext->cd_length + aes_ctx->ac_tag_len; break; case GMAC_MODE: if (plaintext->cd_length != 0) return (CRYPTO_ARGUMENTS_BAD); length_needed = aes_ctx->ac_tag_len; break; default: length_needed = plaintext->cd_length; } if (ciphertext->cd_length < length_needed) { ciphertext->cd_length = length_needed; return (CRYPTO_BUFFER_TOO_SMALL); } saved_length = ciphertext->cd_length; saved_offset = ciphertext->cd_offset; /* * Do an update on the specified input data. */ ret = aes_encrypt_update(ctx, plaintext, ciphertext, req); if (ret != CRYPTO_SUCCESS) { return (ret); } /* * For CCM mode, aes_ccm_encrypt_final() will take care of any * left-over unprocessed data, and compute the MAC */ if (aes_ctx->ac_flags & CCM_MODE) { /* * ccm_encrypt_final() will compute the MAC and append * it to existing ciphertext. So, need to adjust the left over * length value accordingly */ /* order of following 2 lines MUST not be reversed */ ciphertext->cd_offset = ciphertext->cd_length; ciphertext->cd_length = saved_length - ciphertext->cd_length; ret = ccm_encrypt_final((ccm_ctx_t *)aes_ctx, ciphertext, AES_BLOCK_LEN, aes_encrypt_block, aes_xor_block); if (ret != CRYPTO_SUCCESS) { return (ret); } if (plaintext != ciphertext) { ciphertext->cd_length = ciphertext->cd_offset - saved_offset; } ciphertext->cd_offset = saved_offset; } else if (aes_ctx->ac_flags & (GCM_MODE|GMAC_MODE)) { /* * gcm_encrypt_final() will compute the MAC and append * it to existing ciphertext. So, need to adjust the left over * length value accordingly */ /* order of following 2 lines MUST not be reversed */ ciphertext->cd_offset = ciphertext->cd_length; ciphertext->cd_length = saved_length - ciphertext->cd_length; ret = gcm_encrypt_final((gcm_ctx_t *)aes_ctx, ciphertext, AES_BLOCK_LEN, aes_encrypt_block, aes_copy_block, aes_xor_block); if (ret != CRYPTO_SUCCESS) { return (ret); } if (plaintext != ciphertext) { ciphertext->cd_length = ciphertext->cd_offset - saved_offset; } ciphertext->cd_offset = saved_offset; } ASSERT(aes_ctx->ac_remainder_len == 0); (void) aes_free_context(ctx); return (ret); } static int aes_decrypt(crypto_ctx_t *ctx, crypto_data_t *ciphertext, crypto_data_t *plaintext, crypto_req_handle_t req) { int ret = CRYPTO_FAILED; aes_ctx_t *aes_ctx; off_t saved_offset; size_t saved_length, length_needed; ASSERT(ctx->cc_provider_private != NULL); aes_ctx = ctx->cc_provider_private; /* * For block ciphers, plaintext must be a multiple of AES block size. * This test is only valid for ciphers whose blocksize is a power of 2. */ if (((aes_ctx->ac_flags & (CTR_MODE|CCM_MODE|GCM_MODE|GMAC_MODE)) == 0) && (ciphertext->cd_length & (AES_BLOCK_LEN - 1)) != 0) { return (CRYPTO_ENCRYPTED_DATA_LEN_RANGE); } ASSERT(plaintext != NULL); /* * Return length needed to store the output. * Do not destroy context when plaintext buffer is too small. * * CCM: plaintext is MAC len smaller than cipher text * GCM: plaintext is TAG len smaller than cipher text * GMAC: plaintext length must be zero */ switch (aes_ctx->ac_flags & (CCM_MODE|GCM_MODE|GMAC_MODE)) { case CCM_MODE: length_needed = aes_ctx->ac_processed_data_len; break; case GCM_MODE: length_needed = ciphertext->cd_length - aes_ctx->ac_tag_len; break; case GMAC_MODE: if (plaintext->cd_length != 0) return (CRYPTO_ARGUMENTS_BAD); length_needed = 0; break; default: length_needed = ciphertext->cd_length; } if (plaintext->cd_length < length_needed) { plaintext->cd_length = length_needed; return (CRYPTO_BUFFER_TOO_SMALL); } saved_offset = plaintext->cd_offset; saved_length = plaintext->cd_length; /* * Do an update on the specified input data. */ ret = aes_decrypt_update(ctx, ciphertext, plaintext, req); if (ret != CRYPTO_SUCCESS) { goto cleanup; } if (aes_ctx->ac_flags & CCM_MODE) { ASSERT(aes_ctx->ac_processed_data_len == aes_ctx->ac_data_len); ASSERT(aes_ctx->ac_processed_mac_len == aes_ctx->ac_mac_len); /* order of following 2 lines MUST not be reversed */ plaintext->cd_offset = plaintext->cd_length; plaintext->cd_length = saved_length - plaintext->cd_length; ret = ccm_decrypt_final((ccm_ctx_t *)aes_ctx, plaintext, AES_BLOCK_LEN, aes_encrypt_block, aes_copy_block, aes_xor_block); if (ret == CRYPTO_SUCCESS) { if (plaintext != ciphertext) { plaintext->cd_length = plaintext->cd_offset - saved_offset; } } else { plaintext->cd_length = saved_length; } plaintext->cd_offset = saved_offset; } else if (aes_ctx->ac_flags & (GCM_MODE|GMAC_MODE)) { /* order of following 2 lines MUST not be reversed */ plaintext->cd_offset = plaintext->cd_length; plaintext->cd_length = saved_length - plaintext->cd_length; ret = gcm_decrypt_final((gcm_ctx_t *)aes_ctx, plaintext, AES_BLOCK_LEN, aes_encrypt_block, aes_xor_block); if (ret == CRYPTO_SUCCESS) { if (plaintext != ciphertext) { plaintext->cd_length = plaintext->cd_offset - saved_offset; } } else { plaintext->cd_length = saved_length; } plaintext->cd_offset = saved_offset; } ASSERT(aes_ctx->ac_remainder_len == 0); cleanup: (void) aes_free_context(ctx); return (ret); } -/* ARGSUSED */ static int aes_encrypt_update(crypto_ctx_t *ctx, crypto_data_t *plaintext, crypto_data_t *ciphertext, crypto_req_handle_t req) { + (void) req; off_t saved_offset; size_t saved_length, out_len; int ret = CRYPTO_SUCCESS; aes_ctx_t *aes_ctx; ASSERT(ctx->cc_provider_private != NULL); aes_ctx = ctx->cc_provider_private; ASSERT(ciphertext != NULL); /* compute number of bytes that will hold the ciphertext */ out_len = aes_ctx->ac_remainder_len; out_len += plaintext->cd_length; out_len &= ~(AES_BLOCK_LEN - 1); /* return length needed to store the output */ if (ciphertext->cd_length < out_len) { ciphertext->cd_length = out_len; return (CRYPTO_BUFFER_TOO_SMALL); } saved_offset = ciphertext->cd_offset; saved_length = ciphertext->cd_length; /* * Do the AES update on the specified input data. */ switch (plaintext->cd_format) { case CRYPTO_DATA_RAW: ret = crypto_update_iov(ctx->cc_provider_private, plaintext, ciphertext, aes_encrypt_contiguous_blocks, aes_copy_block64); break; case CRYPTO_DATA_UIO: ret = crypto_update_uio(ctx->cc_provider_private, plaintext, ciphertext, aes_encrypt_contiguous_blocks, aes_copy_block64); break; default: ret = CRYPTO_ARGUMENTS_BAD; } /* * Since AES counter mode is a stream cipher, we call * ctr_mode_final() to pick up any remaining bytes. * It is an internal function that does not destroy * the context like *normal* final routines. */ if ((aes_ctx->ac_flags & CTR_MODE) && (aes_ctx->ac_remainder_len > 0)) { ret = ctr_mode_final((ctr_ctx_t *)aes_ctx, ciphertext, aes_encrypt_block); } if (ret == CRYPTO_SUCCESS) { if (plaintext != ciphertext) ciphertext->cd_length = ciphertext->cd_offset - saved_offset; } else { ciphertext->cd_length = saved_length; } ciphertext->cd_offset = saved_offset; return (ret); } static int aes_decrypt_update(crypto_ctx_t *ctx, crypto_data_t *ciphertext, crypto_data_t *plaintext, crypto_req_handle_t req) { off_t saved_offset; size_t saved_length, out_len; int ret = CRYPTO_SUCCESS; aes_ctx_t *aes_ctx; ASSERT(ctx->cc_provider_private != NULL); aes_ctx = ctx->cc_provider_private; ASSERT(plaintext != NULL); /* * Compute number of bytes that will hold the plaintext. * This is not necessary for CCM, GCM, and GMAC since these * mechanisms never return plaintext for update operations. */ if ((aes_ctx->ac_flags & (CCM_MODE|GCM_MODE|GMAC_MODE)) == 0) { out_len = aes_ctx->ac_remainder_len; out_len += ciphertext->cd_length; out_len &= ~(AES_BLOCK_LEN - 1); /* return length needed to store the output */ if (plaintext->cd_length < out_len) { plaintext->cd_length = out_len; return (CRYPTO_BUFFER_TOO_SMALL); } } saved_offset = plaintext->cd_offset; saved_length = plaintext->cd_length; if (aes_ctx->ac_flags & (GCM_MODE|GMAC_MODE)) gcm_set_kmflag((gcm_ctx_t *)aes_ctx, crypto_kmflag(req)); /* * Do the AES update on the specified input data. */ switch (ciphertext->cd_format) { case CRYPTO_DATA_RAW: ret = crypto_update_iov(ctx->cc_provider_private, ciphertext, plaintext, aes_decrypt_contiguous_blocks, aes_copy_block64); break; case CRYPTO_DATA_UIO: ret = crypto_update_uio(ctx->cc_provider_private, ciphertext, plaintext, aes_decrypt_contiguous_blocks, aes_copy_block64); break; default: ret = CRYPTO_ARGUMENTS_BAD; } /* * Since AES counter mode is a stream cipher, we call * ctr_mode_final() to pick up any remaining bytes. * It is an internal function that does not destroy * the context like *normal* final routines. */ if ((aes_ctx->ac_flags & CTR_MODE) && (aes_ctx->ac_remainder_len > 0)) { ret = ctr_mode_final((ctr_ctx_t *)aes_ctx, plaintext, aes_encrypt_block); if (ret == CRYPTO_DATA_LEN_RANGE) ret = CRYPTO_ENCRYPTED_DATA_LEN_RANGE; } if (ret == CRYPTO_SUCCESS) { if (ciphertext != plaintext) plaintext->cd_length = plaintext->cd_offset - saved_offset; } else { plaintext->cd_length = saved_length; } plaintext->cd_offset = saved_offset; return (ret); } -/* ARGSUSED */ static int aes_encrypt_final(crypto_ctx_t *ctx, crypto_data_t *data, crypto_req_handle_t req) { + (void) req; aes_ctx_t *aes_ctx; int ret; ASSERT(ctx->cc_provider_private != NULL); aes_ctx = ctx->cc_provider_private; if (data->cd_format != CRYPTO_DATA_RAW && data->cd_format != CRYPTO_DATA_UIO) { return (CRYPTO_ARGUMENTS_BAD); } if (aes_ctx->ac_flags & CTR_MODE) { if (aes_ctx->ac_remainder_len > 0) { ret = ctr_mode_final((ctr_ctx_t *)aes_ctx, data, aes_encrypt_block); if (ret != CRYPTO_SUCCESS) return (ret); } } else if (aes_ctx->ac_flags & CCM_MODE) { ret = ccm_encrypt_final((ccm_ctx_t *)aes_ctx, data, AES_BLOCK_LEN, aes_encrypt_block, aes_xor_block); if (ret != CRYPTO_SUCCESS) { return (ret); } } else if (aes_ctx->ac_flags & (GCM_MODE|GMAC_MODE)) { size_t saved_offset = data->cd_offset; ret = gcm_encrypt_final((gcm_ctx_t *)aes_ctx, data, AES_BLOCK_LEN, aes_encrypt_block, aes_copy_block, aes_xor_block); if (ret != CRYPTO_SUCCESS) { return (ret); } data->cd_length = data->cd_offset - saved_offset; data->cd_offset = saved_offset; } else { /* * There must be no unprocessed plaintext. * This happens if the length of the last data is * not a multiple of the AES block length. */ if (aes_ctx->ac_remainder_len > 0) { return (CRYPTO_DATA_LEN_RANGE); } data->cd_length = 0; } (void) aes_free_context(ctx); return (CRYPTO_SUCCESS); } -/* ARGSUSED */ static int aes_decrypt_final(crypto_ctx_t *ctx, crypto_data_t *data, crypto_req_handle_t req) { + (void) req; aes_ctx_t *aes_ctx; int ret; off_t saved_offset; size_t saved_length; ASSERT(ctx->cc_provider_private != NULL); aes_ctx = ctx->cc_provider_private; if (data->cd_format != CRYPTO_DATA_RAW && data->cd_format != CRYPTO_DATA_UIO) { return (CRYPTO_ARGUMENTS_BAD); } /* * There must be no unprocessed ciphertext. * This happens if the length of the last ciphertext is * not a multiple of the AES block length. */ if (aes_ctx->ac_remainder_len > 0) { if ((aes_ctx->ac_flags & CTR_MODE) == 0) return (CRYPTO_ENCRYPTED_DATA_LEN_RANGE); else { ret = ctr_mode_final((ctr_ctx_t *)aes_ctx, data, aes_encrypt_block); if (ret == CRYPTO_DATA_LEN_RANGE) ret = CRYPTO_ENCRYPTED_DATA_LEN_RANGE; if (ret != CRYPTO_SUCCESS) return (ret); } } if (aes_ctx->ac_flags & CCM_MODE) { /* * This is where all the plaintext is returned, make sure * the plaintext buffer is big enough */ size_t pt_len = aes_ctx->ac_data_len; if (data->cd_length < pt_len) { data->cd_length = pt_len; return (CRYPTO_BUFFER_TOO_SMALL); } ASSERT(aes_ctx->ac_processed_data_len == pt_len); ASSERT(aes_ctx->ac_processed_mac_len == aes_ctx->ac_mac_len); saved_offset = data->cd_offset; saved_length = data->cd_length; ret = ccm_decrypt_final((ccm_ctx_t *)aes_ctx, data, AES_BLOCK_LEN, aes_encrypt_block, aes_copy_block, aes_xor_block); if (ret == CRYPTO_SUCCESS) { data->cd_length = data->cd_offset - saved_offset; } else { data->cd_length = saved_length; } data->cd_offset = saved_offset; if (ret != CRYPTO_SUCCESS) { return (ret); } } else if (aes_ctx->ac_flags & (GCM_MODE|GMAC_MODE)) { /* * This is where all the plaintext is returned, make sure * the plaintext buffer is big enough */ gcm_ctx_t *ctx = (gcm_ctx_t *)aes_ctx; size_t pt_len = ctx->gcm_processed_data_len - ctx->gcm_tag_len; if (data->cd_length < pt_len) { data->cd_length = pt_len; return (CRYPTO_BUFFER_TOO_SMALL); } saved_offset = data->cd_offset; saved_length = data->cd_length; ret = gcm_decrypt_final((gcm_ctx_t *)aes_ctx, data, AES_BLOCK_LEN, aes_encrypt_block, aes_xor_block); if (ret == CRYPTO_SUCCESS) { data->cd_length = data->cd_offset - saved_offset; } else { data->cd_length = saved_length; } data->cd_offset = saved_offset; if (ret != CRYPTO_SUCCESS) { return (ret); } } if ((aes_ctx->ac_flags & (CTR_MODE|CCM_MODE|GCM_MODE|GMAC_MODE)) == 0) { data->cd_length = 0; } (void) aes_free_context(ctx); return (CRYPTO_SUCCESS); } -/* ARGSUSED */ static int aes_encrypt_atomic(crypto_provider_handle_t provider, crypto_session_id_t session_id, crypto_mechanism_t *mechanism, crypto_key_t *key, crypto_data_t *plaintext, crypto_data_t *ciphertext, crypto_spi_ctx_template_t template, crypto_req_handle_t req) { + (void) provider, (void) session_id; aes_ctx_t aes_ctx; /* on the stack */ off_t saved_offset; size_t saved_length; size_t length_needed; int ret; ASSERT(ciphertext != NULL); /* * CTR, CCM, GCM, and GMAC modes do not require that plaintext * be a multiple of AES block size. */ switch (mechanism->cm_type) { case AES_CTR_MECH_INFO_TYPE: case AES_CCM_MECH_INFO_TYPE: case AES_GCM_MECH_INFO_TYPE: case AES_GMAC_MECH_INFO_TYPE: break; default: if ((plaintext->cd_length & (AES_BLOCK_LEN - 1)) != 0) return (CRYPTO_DATA_LEN_RANGE); } if ((ret = aes_check_mech_param(mechanism, NULL, 0)) != CRYPTO_SUCCESS) return (ret); bzero(&aes_ctx, sizeof (aes_ctx_t)); ret = aes_common_init_ctx(&aes_ctx, template, mechanism, key, crypto_kmflag(req), B_TRUE); if (ret != CRYPTO_SUCCESS) return (ret); switch (mechanism->cm_type) { case AES_CCM_MECH_INFO_TYPE: length_needed = plaintext->cd_length + aes_ctx.ac_mac_len; break; case AES_GMAC_MECH_INFO_TYPE: if (plaintext->cd_length != 0) return (CRYPTO_ARGUMENTS_BAD); fallthrough; case AES_GCM_MECH_INFO_TYPE: length_needed = plaintext->cd_length + aes_ctx.ac_tag_len; break; default: length_needed = plaintext->cd_length; } /* return size of buffer needed to store output */ if (ciphertext->cd_length < length_needed) { ciphertext->cd_length = length_needed; ret = CRYPTO_BUFFER_TOO_SMALL; goto out; } saved_offset = ciphertext->cd_offset; saved_length = ciphertext->cd_length; /* * Do an update on the specified input data. */ switch (plaintext->cd_format) { case CRYPTO_DATA_RAW: ret = crypto_update_iov(&aes_ctx, plaintext, ciphertext, aes_encrypt_contiguous_blocks, aes_copy_block64); break; case CRYPTO_DATA_UIO: ret = crypto_update_uio(&aes_ctx, plaintext, ciphertext, aes_encrypt_contiguous_blocks, aes_copy_block64); break; default: ret = CRYPTO_ARGUMENTS_BAD; } if (ret == CRYPTO_SUCCESS) { if (mechanism->cm_type == AES_CCM_MECH_INFO_TYPE) { ret = ccm_encrypt_final((ccm_ctx_t *)&aes_ctx, ciphertext, AES_BLOCK_LEN, aes_encrypt_block, aes_xor_block); if (ret != CRYPTO_SUCCESS) goto out; ASSERT(aes_ctx.ac_remainder_len == 0); } else if (mechanism->cm_type == AES_GCM_MECH_INFO_TYPE || mechanism->cm_type == AES_GMAC_MECH_INFO_TYPE) { ret = gcm_encrypt_final((gcm_ctx_t *)&aes_ctx, ciphertext, AES_BLOCK_LEN, aes_encrypt_block, aes_copy_block, aes_xor_block); if (ret != CRYPTO_SUCCESS) goto out; ASSERT(aes_ctx.ac_remainder_len == 0); } else if (mechanism->cm_type == AES_CTR_MECH_INFO_TYPE) { if (aes_ctx.ac_remainder_len > 0) { ret = ctr_mode_final((ctr_ctx_t *)&aes_ctx, ciphertext, aes_encrypt_block); if (ret != CRYPTO_SUCCESS) goto out; } } else { ASSERT(aes_ctx.ac_remainder_len == 0); } if (plaintext != ciphertext) { ciphertext->cd_length = ciphertext->cd_offset - saved_offset; } } else { ciphertext->cd_length = saved_length; } ciphertext->cd_offset = saved_offset; out: if (aes_ctx.ac_flags & PROVIDER_OWNS_KEY_SCHEDULE) { bzero(aes_ctx.ac_keysched, aes_ctx.ac_keysched_len); kmem_free(aes_ctx.ac_keysched, aes_ctx.ac_keysched_len); } #ifdef CAN_USE_GCM_ASM if (aes_ctx.ac_flags & (GCM_MODE|GMAC_MODE) && ((gcm_ctx_t *)&aes_ctx)->gcm_Htable != NULL) { gcm_ctx_t *ctx = (gcm_ctx_t *)&aes_ctx; bzero(ctx->gcm_Htable, ctx->gcm_htab_len); kmem_free(ctx->gcm_Htable, ctx->gcm_htab_len); } #endif return (ret); } -/* ARGSUSED */ static int aes_decrypt_atomic(crypto_provider_handle_t provider, crypto_session_id_t session_id, crypto_mechanism_t *mechanism, crypto_key_t *key, crypto_data_t *ciphertext, crypto_data_t *plaintext, crypto_spi_ctx_template_t template, crypto_req_handle_t req) { + (void) provider, (void) session_id; aes_ctx_t aes_ctx; /* on the stack */ off_t saved_offset; size_t saved_length; size_t length_needed; int ret; ASSERT(plaintext != NULL); /* * CCM, GCM, CTR, and GMAC modes do not require that ciphertext * be a multiple of AES block size. */ switch (mechanism->cm_type) { case AES_CTR_MECH_INFO_TYPE: case AES_CCM_MECH_INFO_TYPE: case AES_GCM_MECH_INFO_TYPE: case AES_GMAC_MECH_INFO_TYPE: break; default: if ((ciphertext->cd_length & (AES_BLOCK_LEN - 1)) != 0) return (CRYPTO_ENCRYPTED_DATA_LEN_RANGE); } if ((ret = aes_check_mech_param(mechanism, NULL, 0)) != CRYPTO_SUCCESS) return (ret); bzero(&aes_ctx, sizeof (aes_ctx_t)); ret = aes_common_init_ctx(&aes_ctx, template, mechanism, key, crypto_kmflag(req), B_FALSE); if (ret != CRYPTO_SUCCESS) return (ret); switch (mechanism->cm_type) { case AES_CCM_MECH_INFO_TYPE: length_needed = aes_ctx.ac_data_len; break; case AES_GCM_MECH_INFO_TYPE: length_needed = ciphertext->cd_length - aes_ctx.ac_tag_len; break; case AES_GMAC_MECH_INFO_TYPE: if (plaintext->cd_length != 0) return (CRYPTO_ARGUMENTS_BAD); length_needed = 0; break; default: length_needed = ciphertext->cd_length; } /* return size of buffer needed to store output */ if (plaintext->cd_length < length_needed) { plaintext->cd_length = length_needed; ret = CRYPTO_BUFFER_TOO_SMALL; goto out; } saved_offset = plaintext->cd_offset; saved_length = plaintext->cd_length; if (mechanism->cm_type == AES_GCM_MECH_INFO_TYPE || mechanism->cm_type == AES_GMAC_MECH_INFO_TYPE) gcm_set_kmflag((gcm_ctx_t *)&aes_ctx, crypto_kmflag(req)); /* * Do an update on the specified input data. */ switch (ciphertext->cd_format) { case CRYPTO_DATA_RAW: ret = crypto_update_iov(&aes_ctx, ciphertext, plaintext, aes_decrypt_contiguous_blocks, aes_copy_block64); break; case CRYPTO_DATA_UIO: ret = crypto_update_uio(&aes_ctx, ciphertext, plaintext, aes_decrypt_contiguous_blocks, aes_copy_block64); break; default: ret = CRYPTO_ARGUMENTS_BAD; } if (ret == CRYPTO_SUCCESS) { if (mechanism->cm_type == AES_CCM_MECH_INFO_TYPE) { ASSERT(aes_ctx.ac_processed_data_len == aes_ctx.ac_data_len); ASSERT(aes_ctx.ac_processed_mac_len == aes_ctx.ac_mac_len); ret = ccm_decrypt_final((ccm_ctx_t *)&aes_ctx, plaintext, AES_BLOCK_LEN, aes_encrypt_block, aes_copy_block, aes_xor_block); ASSERT(aes_ctx.ac_remainder_len == 0); if ((ret == CRYPTO_SUCCESS) && (ciphertext != plaintext)) { plaintext->cd_length = plaintext->cd_offset - saved_offset; } else { plaintext->cd_length = saved_length; } } else if (mechanism->cm_type == AES_GCM_MECH_INFO_TYPE || mechanism->cm_type == AES_GMAC_MECH_INFO_TYPE) { ret = gcm_decrypt_final((gcm_ctx_t *)&aes_ctx, plaintext, AES_BLOCK_LEN, aes_encrypt_block, aes_xor_block); ASSERT(aes_ctx.ac_remainder_len == 0); if ((ret == CRYPTO_SUCCESS) && (ciphertext != plaintext)) { plaintext->cd_length = plaintext->cd_offset - saved_offset; } else { plaintext->cd_length = saved_length; } } else if (mechanism->cm_type != AES_CTR_MECH_INFO_TYPE) { ASSERT(aes_ctx.ac_remainder_len == 0); if (ciphertext != plaintext) plaintext->cd_length = plaintext->cd_offset - saved_offset; } else { if (aes_ctx.ac_remainder_len > 0) { ret = ctr_mode_final((ctr_ctx_t *)&aes_ctx, plaintext, aes_encrypt_block); if (ret == CRYPTO_DATA_LEN_RANGE) ret = CRYPTO_ENCRYPTED_DATA_LEN_RANGE; if (ret != CRYPTO_SUCCESS) goto out; } if (ciphertext != plaintext) plaintext->cd_length = plaintext->cd_offset - saved_offset; } } else { plaintext->cd_length = saved_length; } plaintext->cd_offset = saved_offset; out: if (aes_ctx.ac_flags & PROVIDER_OWNS_KEY_SCHEDULE) { bzero(aes_ctx.ac_keysched, aes_ctx.ac_keysched_len); kmem_free(aes_ctx.ac_keysched, aes_ctx.ac_keysched_len); } if (aes_ctx.ac_flags & CCM_MODE) { if (aes_ctx.ac_pt_buf != NULL) { vmem_free(aes_ctx.ac_pt_buf, aes_ctx.ac_data_len); } } else if (aes_ctx.ac_flags & (GCM_MODE|GMAC_MODE)) { if (((gcm_ctx_t *)&aes_ctx)->gcm_pt_buf != NULL) { vmem_free(((gcm_ctx_t *)&aes_ctx)->gcm_pt_buf, ((gcm_ctx_t *)&aes_ctx)->gcm_pt_buf_len); } #ifdef CAN_USE_GCM_ASM if (((gcm_ctx_t *)&aes_ctx)->gcm_Htable != NULL) { gcm_ctx_t *ctx = (gcm_ctx_t *)&aes_ctx; bzero(ctx->gcm_Htable, ctx->gcm_htab_len); kmem_free(ctx->gcm_Htable, ctx->gcm_htab_len); } #endif } return (ret); } /* * KCF software provider context template entry points. */ -/* ARGSUSED */ static int aes_create_ctx_template(crypto_provider_handle_t provider, crypto_mechanism_t *mechanism, crypto_key_t *key, crypto_spi_ctx_template_t *tmpl, size_t *tmpl_size, crypto_req_handle_t req) { + (void) provider; void *keysched; size_t size; int rv; if (mechanism->cm_type != AES_ECB_MECH_INFO_TYPE && mechanism->cm_type != AES_CBC_MECH_INFO_TYPE && mechanism->cm_type != AES_CTR_MECH_INFO_TYPE && mechanism->cm_type != AES_CCM_MECH_INFO_TYPE && mechanism->cm_type != AES_GCM_MECH_INFO_TYPE && mechanism->cm_type != AES_GMAC_MECH_INFO_TYPE) return (CRYPTO_MECHANISM_INVALID); if ((keysched = aes_alloc_keysched(&size, crypto_kmflag(req))) == NULL) { return (CRYPTO_HOST_MEMORY); } /* * Initialize key schedule. Key length information is stored * in the key. */ if ((rv = init_keysched(key, keysched)) != CRYPTO_SUCCESS) { bzero(keysched, size); kmem_free(keysched, size); return (rv); } *tmpl = keysched; *tmpl_size = size; return (CRYPTO_SUCCESS); } static int aes_free_context(crypto_ctx_t *ctx) { aes_ctx_t *aes_ctx = ctx->cc_provider_private; if (aes_ctx != NULL) { if (aes_ctx->ac_flags & PROVIDER_OWNS_KEY_SCHEDULE) { ASSERT(aes_ctx->ac_keysched_len != 0); bzero(aes_ctx->ac_keysched, aes_ctx->ac_keysched_len); kmem_free(aes_ctx->ac_keysched, aes_ctx->ac_keysched_len); } crypto_free_mode_ctx(aes_ctx); ctx->cc_provider_private = NULL; } return (CRYPTO_SUCCESS); } static int aes_common_init_ctx(aes_ctx_t *aes_ctx, crypto_spi_ctx_template_t *template, crypto_mechanism_t *mechanism, crypto_key_t *key, int kmflag, boolean_t is_encrypt_init) { int rv = CRYPTO_SUCCESS; void *keysched; size_t size = 0; if (template == NULL) { if ((keysched = aes_alloc_keysched(&size, kmflag)) == NULL) return (CRYPTO_HOST_MEMORY); /* * Initialize key schedule. * Key length is stored in the key. */ if ((rv = init_keysched(key, keysched)) != CRYPTO_SUCCESS) { kmem_free(keysched, size); return (rv); } aes_ctx->ac_flags |= PROVIDER_OWNS_KEY_SCHEDULE; aes_ctx->ac_keysched_len = size; } else { keysched = template; } aes_ctx->ac_keysched = keysched; switch (mechanism->cm_type) { case AES_CBC_MECH_INFO_TYPE: rv = cbc_init_ctx((cbc_ctx_t *)aes_ctx, mechanism->cm_param, mechanism->cm_param_len, AES_BLOCK_LEN, aes_copy_block64); break; case AES_CTR_MECH_INFO_TYPE: { CK_AES_CTR_PARAMS *pp; if (mechanism->cm_param == NULL || mechanism->cm_param_len != sizeof (CK_AES_CTR_PARAMS)) { return (CRYPTO_MECHANISM_PARAM_INVALID); } pp = (CK_AES_CTR_PARAMS *)(void *)mechanism->cm_param; rv = ctr_init_ctx((ctr_ctx_t *)aes_ctx, pp->ulCounterBits, pp->cb, aes_copy_block); break; } case AES_CCM_MECH_INFO_TYPE: if (mechanism->cm_param == NULL || mechanism->cm_param_len != sizeof (CK_AES_CCM_PARAMS)) { return (CRYPTO_MECHANISM_PARAM_INVALID); } rv = ccm_init_ctx((ccm_ctx_t *)aes_ctx, mechanism->cm_param, kmflag, is_encrypt_init, AES_BLOCK_LEN, aes_encrypt_block, aes_xor_block); break; case AES_GCM_MECH_INFO_TYPE: if (mechanism->cm_param == NULL || mechanism->cm_param_len != sizeof (CK_AES_GCM_PARAMS)) { return (CRYPTO_MECHANISM_PARAM_INVALID); } rv = gcm_init_ctx((gcm_ctx_t *)aes_ctx, mechanism->cm_param, AES_BLOCK_LEN, aes_encrypt_block, aes_copy_block, aes_xor_block); break; case AES_GMAC_MECH_INFO_TYPE: if (mechanism->cm_param == NULL || mechanism->cm_param_len != sizeof (CK_AES_GMAC_PARAMS)) { return (CRYPTO_MECHANISM_PARAM_INVALID); } rv = gmac_init_ctx((gcm_ctx_t *)aes_ctx, mechanism->cm_param, AES_BLOCK_LEN, aes_encrypt_block, aes_copy_block, aes_xor_block); break; case AES_ECB_MECH_INFO_TYPE: aes_ctx->ac_flags |= ECB_MODE; } if (rv != CRYPTO_SUCCESS) { if (aes_ctx->ac_flags & PROVIDER_OWNS_KEY_SCHEDULE) { bzero(keysched, size); kmem_free(keysched, size); } } return (rv); } static int process_gmac_mech(crypto_mechanism_t *mech, crypto_data_t *data, CK_AES_GCM_PARAMS *gcm_params) { /* LINTED: pointer alignment */ CK_AES_GMAC_PARAMS *params = (CK_AES_GMAC_PARAMS *)mech->cm_param; if (mech->cm_type != AES_GMAC_MECH_INFO_TYPE) return (CRYPTO_MECHANISM_INVALID); if (mech->cm_param_len != sizeof (CK_AES_GMAC_PARAMS)) return (CRYPTO_MECHANISM_PARAM_INVALID); if (params->pIv == NULL) return (CRYPTO_MECHANISM_PARAM_INVALID); gcm_params->pIv = params->pIv; gcm_params->ulIvLen = AES_GMAC_IV_LEN; gcm_params->ulTagBits = AES_GMAC_TAG_BITS; if (data == NULL) return (CRYPTO_SUCCESS); if (data->cd_format != CRYPTO_DATA_RAW) return (CRYPTO_ARGUMENTS_BAD); gcm_params->pAAD = (uchar_t *)data->cd_raw.iov_base; gcm_params->ulAADLen = data->cd_length; return (CRYPTO_SUCCESS); } static int aes_mac_atomic(crypto_provider_handle_t provider, crypto_session_id_t session_id, crypto_mechanism_t *mechanism, crypto_key_t *key, crypto_data_t *data, crypto_data_t *mac, crypto_spi_ctx_template_t template, crypto_req_handle_t req) { CK_AES_GCM_PARAMS gcm_params; crypto_mechanism_t gcm_mech; int rv; if ((rv = process_gmac_mech(mechanism, data, &gcm_params)) != CRYPTO_SUCCESS) return (rv); gcm_mech.cm_type = AES_GCM_MECH_INFO_TYPE; gcm_mech.cm_param_len = sizeof (CK_AES_GCM_PARAMS); gcm_mech.cm_param = (char *)&gcm_params; return (aes_encrypt_atomic(provider, session_id, &gcm_mech, key, &null_crypto_data, mac, template, req)); } static int aes_mac_verify_atomic(crypto_provider_handle_t provider, crypto_session_id_t session_id, crypto_mechanism_t *mechanism, crypto_key_t *key, crypto_data_t *data, crypto_data_t *mac, crypto_spi_ctx_template_t template, crypto_req_handle_t req) { CK_AES_GCM_PARAMS gcm_params; crypto_mechanism_t gcm_mech; int rv; if ((rv = process_gmac_mech(mechanism, data, &gcm_params)) != CRYPTO_SUCCESS) return (rv); gcm_mech.cm_type = AES_GCM_MECH_INFO_TYPE; gcm_mech.cm_param_len = sizeof (CK_AES_GCM_PARAMS); gcm_mech.cm_param = (char *)&gcm_params; return (aes_decrypt_atomic(provider, session_id, &gcm_mech, key, mac, &null_crypto_data, template, req)); } diff --git a/module/icp/io/sha1_mod.c b/module/icp/io/sha1_mod.c index 6dcee6b2ecf2..5a372e3d3754 100644 --- a/module/icp/io/sha1_mod.c +++ b/module/icp/io/sha1_mod.c @@ -1,1230 +1,1230 @@ /* * CDDL HEADER START * * The contents of this file are subject to the terms of the * Common Development and Distribution License (the "License"). * You may not use this file except in compliance with the License. * * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE * or http://www.opensolaris.org/os/licensing. * See the License for the specific language governing permissions * and limitations under the License. * * When distributing Covered Code, include this CDDL HEADER in each * file and include the License file at usr/src/OPENSOLARIS.LICENSE. * If applicable, add the following below this CDDL HEADER, with the * fields enclosed by brackets "[]" replaced with your own identifying * information: Portions Copyright [yyyy] [name of copyright owner] * * CDDL HEADER END */ /* * Copyright 2010 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ #include #include #include #include #include #include #include /* * The sha1 module is created with two modlinkages: * - a modlmisc that allows consumers to directly call the entry points * SHA1Init, SHA1Update, and SHA1Final. * - a modlcrypto that allows the module to register with the Kernel * Cryptographic Framework (KCF) as a software provider for the SHA1 * mechanisms. */ static struct modlcrypto modlcrypto = { &mod_cryptoops, "SHA1 Kernel SW Provider 1.1" }; static struct modlinkage modlinkage = { MODREV_1, { &modlcrypto, NULL } }; /* * Macros to access the SHA1 or SHA1-HMAC contexts from a context passed * by KCF to one of the entry points. */ #define PROV_SHA1_CTX(ctx) ((sha1_ctx_t *)(ctx)->cc_provider_private) #define PROV_SHA1_HMAC_CTX(ctx) ((sha1_hmac_ctx_t *)(ctx)->cc_provider_private) /* to extract the digest length passed as mechanism parameter */ #define PROV_SHA1_GET_DIGEST_LEN(m, len) { \ if (IS_P2ALIGNED((m)->cm_param, sizeof (ulong_t))) \ (len) = (uint32_t)*((ulong_t *)(void *)mechanism->cm_param); \ else { \ ulong_t tmp_ulong; \ bcopy((m)->cm_param, &tmp_ulong, sizeof (ulong_t)); \ (len) = (uint32_t)tmp_ulong; \ } \ } #define PROV_SHA1_DIGEST_KEY(ctx, key, len, digest) { \ SHA1Init(ctx); \ SHA1Update(ctx, key, len); \ SHA1Final(digest, ctx); \ } /* * Mechanism info structure passed to KCF during registration. */ static crypto_mech_info_t sha1_mech_info_tab[] = { /* SHA1 */ {SUN_CKM_SHA1, SHA1_MECH_INFO_TYPE, CRYPTO_FG_DIGEST | CRYPTO_FG_DIGEST_ATOMIC, 0, 0, CRYPTO_KEYSIZE_UNIT_IN_BITS}, /* SHA1-HMAC */ {SUN_CKM_SHA1_HMAC, SHA1_HMAC_MECH_INFO_TYPE, CRYPTO_FG_MAC | CRYPTO_FG_MAC_ATOMIC, SHA1_HMAC_MIN_KEY_LEN, SHA1_HMAC_MAX_KEY_LEN, CRYPTO_KEYSIZE_UNIT_IN_BYTES}, /* SHA1-HMAC GENERAL */ {SUN_CKM_SHA1_HMAC_GENERAL, SHA1_HMAC_GEN_MECH_INFO_TYPE, CRYPTO_FG_MAC | CRYPTO_FG_MAC_ATOMIC, SHA1_HMAC_MIN_KEY_LEN, SHA1_HMAC_MAX_KEY_LEN, CRYPTO_KEYSIZE_UNIT_IN_BYTES} }; static void sha1_provider_status(crypto_provider_handle_t, uint_t *); static crypto_control_ops_t sha1_control_ops = { sha1_provider_status }; static int sha1_digest_init(crypto_ctx_t *, crypto_mechanism_t *, crypto_req_handle_t); static int sha1_digest(crypto_ctx_t *, crypto_data_t *, crypto_data_t *, crypto_req_handle_t); static int sha1_digest_update(crypto_ctx_t *, crypto_data_t *, crypto_req_handle_t); static int sha1_digest_final(crypto_ctx_t *, crypto_data_t *, crypto_req_handle_t); static int sha1_digest_atomic(crypto_provider_handle_t, crypto_session_id_t, crypto_mechanism_t *, crypto_data_t *, crypto_data_t *, crypto_req_handle_t); static crypto_digest_ops_t sha1_digest_ops = { .digest_init = sha1_digest_init, .digest = sha1_digest, .digest_update = sha1_digest_update, .digest_key = NULL, .digest_final = sha1_digest_final, .digest_atomic = sha1_digest_atomic }; static int sha1_mac_init(crypto_ctx_t *, crypto_mechanism_t *, crypto_key_t *, crypto_spi_ctx_template_t, crypto_req_handle_t); static int sha1_mac_update(crypto_ctx_t *, crypto_data_t *, crypto_req_handle_t); static int sha1_mac_final(crypto_ctx_t *, crypto_data_t *, crypto_req_handle_t); static int sha1_mac_atomic(crypto_provider_handle_t, crypto_session_id_t, crypto_mechanism_t *, crypto_key_t *, crypto_data_t *, crypto_data_t *, crypto_spi_ctx_template_t, crypto_req_handle_t); static int sha1_mac_verify_atomic(crypto_provider_handle_t, crypto_session_id_t, crypto_mechanism_t *, crypto_key_t *, crypto_data_t *, crypto_data_t *, crypto_spi_ctx_template_t, crypto_req_handle_t); static crypto_mac_ops_t sha1_mac_ops = { .mac_init = sha1_mac_init, .mac = NULL, .mac_update = sha1_mac_update, .mac_final = sha1_mac_final, .mac_atomic = sha1_mac_atomic, .mac_verify_atomic = sha1_mac_verify_atomic }; static int sha1_create_ctx_template(crypto_provider_handle_t, crypto_mechanism_t *, crypto_key_t *, crypto_spi_ctx_template_t *, size_t *, crypto_req_handle_t); static int sha1_free_context(crypto_ctx_t *); static crypto_ctx_ops_t sha1_ctx_ops = { .create_ctx_template = sha1_create_ctx_template, .free_context = sha1_free_context }; static crypto_ops_t sha1_crypto_ops = {{{{{ &sha1_control_ops, &sha1_digest_ops, NULL, &sha1_mac_ops, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, &sha1_ctx_ops, }}}}}; static crypto_provider_info_t sha1_prov_info = {{{{ CRYPTO_SPI_VERSION_1, "SHA1 Software Provider", CRYPTO_SW_PROVIDER, NULL, &sha1_crypto_ops, sizeof (sha1_mech_info_tab)/sizeof (crypto_mech_info_t), sha1_mech_info_tab }}}}; static crypto_kcf_provider_handle_t sha1_prov_handle = 0; int sha1_mod_init(void) { int ret; if ((ret = mod_install(&modlinkage)) != 0) return (ret); /* * Register with KCF. If the registration fails, log an * error but do not uninstall the module, since the functionality * provided by misc/sha1 should still be available. */ if ((ret = crypto_register_provider(&sha1_prov_info, &sha1_prov_handle)) != CRYPTO_SUCCESS) cmn_err(CE_WARN, "sha1 _init: " "crypto_register_provider() failed (0x%x)", ret); return (0); } int sha1_mod_fini(void) { int ret; if (sha1_prov_handle != 0) { if ((ret = crypto_unregister_provider(sha1_prov_handle)) != CRYPTO_SUCCESS) { cmn_err(CE_WARN, "sha1 _fini: crypto_unregister_provider() " "failed (0x%x)", ret); return (EBUSY); } sha1_prov_handle = 0; } return (mod_remove(&modlinkage)); } /* * KCF software provider control entry points. */ -/* ARGSUSED */ static void sha1_provider_status(crypto_provider_handle_t provider, uint_t *status) { + (void) provider, (void) status; *status = CRYPTO_PROVIDER_READY; } /* * KCF software provider digest entry points. */ static int sha1_digest_init(crypto_ctx_t *ctx, crypto_mechanism_t *mechanism, crypto_req_handle_t req) { if (mechanism->cm_type != SHA1_MECH_INFO_TYPE) return (CRYPTO_MECHANISM_INVALID); /* * Allocate and initialize SHA1 context. */ ctx->cc_provider_private = kmem_alloc(sizeof (sha1_ctx_t), crypto_kmflag(req)); if (ctx->cc_provider_private == NULL) return (CRYPTO_HOST_MEMORY); PROV_SHA1_CTX(ctx)->sc_mech_type = SHA1_MECH_INFO_TYPE; SHA1Init(&PROV_SHA1_CTX(ctx)->sc_sha1_ctx); return (CRYPTO_SUCCESS); } /* * Helper SHA1 digest update function for uio data. */ static int sha1_digest_update_uio(SHA1_CTX *sha1_ctx, crypto_data_t *data) { off_t offset = data->cd_offset; size_t length = data->cd_length; uint_t vec_idx = 0; size_t cur_len; /* we support only kernel buffer */ if (zfs_uio_segflg(data->cd_uio) != UIO_SYSSPACE) return (CRYPTO_ARGUMENTS_BAD); /* * Jump to the first iovec containing data to be * digested. */ offset = zfs_uio_index_at_offset(data->cd_uio, offset, &vec_idx); if (vec_idx == zfs_uio_iovcnt(data->cd_uio)) { /* * The caller specified an offset that is larger than the * total size of the buffers it provided. */ return (CRYPTO_DATA_LEN_RANGE); } /* * Now do the digesting on the iovecs. */ while (vec_idx < zfs_uio_iovcnt(data->cd_uio) && length > 0) { cur_len = MIN(zfs_uio_iovlen(data->cd_uio, vec_idx) - offset, length); SHA1Update(sha1_ctx, (uint8_t *)zfs_uio_iovbase(data->cd_uio, vec_idx) + offset, cur_len); length -= cur_len; vec_idx++; offset = 0; } if (vec_idx == zfs_uio_iovcnt(data->cd_uio) && length > 0) { /* * The end of the specified iovec's was reached but * the length requested could not be processed, i.e. * The caller requested to digest more data than it provided. */ return (CRYPTO_DATA_LEN_RANGE); } return (CRYPTO_SUCCESS); } /* * Helper SHA1 digest final function for uio data. * digest_len is the length of the desired digest. If digest_len * is smaller than the default SHA1 digest length, the caller * must pass a scratch buffer, digest_scratch, which must * be at least SHA1_DIGEST_LENGTH bytes. */ static int sha1_digest_final_uio(SHA1_CTX *sha1_ctx, crypto_data_t *digest, ulong_t digest_len, uchar_t *digest_scratch) { off_t offset = digest->cd_offset; uint_t vec_idx = 0; /* we support only kernel buffer */ if (zfs_uio_segflg(digest->cd_uio) != UIO_SYSSPACE) return (CRYPTO_ARGUMENTS_BAD); /* * Jump to the first iovec containing ptr to the digest to * be returned. */ offset = zfs_uio_index_at_offset(digest->cd_uio, offset, &vec_idx); if (vec_idx == zfs_uio_iovcnt(digest->cd_uio)) { /* * The caller specified an offset that is * larger than the total size of the buffers * it provided. */ return (CRYPTO_DATA_LEN_RANGE); } if (offset + digest_len <= zfs_uio_iovlen(digest->cd_uio, vec_idx)) { /* * The computed SHA1 digest will fit in the current * iovec. */ if (digest_len != SHA1_DIGEST_LENGTH) { /* * The caller requested a short digest. Digest * into a scratch buffer and return to * the user only what was requested. */ SHA1Final(digest_scratch, sha1_ctx); bcopy(digest_scratch, (uchar_t *) zfs_uio_iovbase(digest->cd_uio, vec_idx) + offset, digest_len); } else { SHA1Final((uchar_t *)zfs_uio_iovbase(digest-> cd_uio, vec_idx) + offset, sha1_ctx); } } else { /* * The computed digest will be crossing one or more iovec's. * This is bad performance-wise but we need to support it. * Allocate a small scratch buffer on the stack and * copy it piece meal to the specified digest iovec's. */ uchar_t digest_tmp[SHA1_DIGEST_LENGTH]; off_t scratch_offset = 0; size_t length = digest_len; size_t cur_len; SHA1Final(digest_tmp, sha1_ctx); while (vec_idx < zfs_uio_iovcnt(digest->cd_uio) && length > 0) { cur_len = MIN(zfs_uio_iovlen(digest->cd_uio, vec_idx) - offset, length); bcopy(digest_tmp + scratch_offset, zfs_uio_iovbase(digest->cd_uio, vec_idx) + offset, cur_len); length -= cur_len; vec_idx++; scratch_offset += cur_len; offset = 0; } if (vec_idx == zfs_uio_iovcnt(digest->cd_uio) && length > 0) { /* * The end of the specified iovec's was reached but * the length requested could not be processed, i.e. * The caller requested to digest more data than it * provided. */ return (CRYPTO_DATA_LEN_RANGE); } } return (CRYPTO_SUCCESS); } -/* ARGSUSED */ static int sha1_digest(crypto_ctx_t *ctx, crypto_data_t *data, crypto_data_t *digest, crypto_req_handle_t req) { + (void) req; int ret = CRYPTO_SUCCESS; ASSERT(ctx->cc_provider_private != NULL); /* * We need to just return the length needed to store the output. * We should not destroy the context for the following cases. */ if ((digest->cd_length == 0) || (digest->cd_length < SHA1_DIGEST_LENGTH)) { digest->cd_length = SHA1_DIGEST_LENGTH; return (CRYPTO_BUFFER_TOO_SMALL); } /* * Do the SHA1 update on the specified input data. */ switch (data->cd_format) { case CRYPTO_DATA_RAW: SHA1Update(&PROV_SHA1_CTX(ctx)->sc_sha1_ctx, (uint8_t *)data->cd_raw.iov_base + data->cd_offset, data->cd_length); break; case CRYPTO_DATA_UIO: ret = sha1_digest_update_uio(&PROV_SHA1_CTX(ctx)->sc_sha1_ctx, data); break; default: ret = CRYPTO_ARGUMENTS_BAD; } if (ret != CRYPTO_SUCCESS) { /* the update failed, free context and bail */ kmem_free(ctx->cc_provider_private, sizeof (sha1_ctx_t)); ctx->cc_provider_private = NULL; digest->cd_length = 0; return (ret); } /* * Do a SHA1 final, must be done separately since the digest * type can be different than the input data type. */ switch (digest->cd_format) { case CRYPTO_DATA_RAW: SHA1Final((unsigned char *)digest->cd_raw.iov_base + digest->cd_offset, &PROV_SHA1_CTX(ctx)->sc_sha1_ctx); break; case CRYPTO_DATA_UIO: ret = sha1_digest_final_uio(&PROV_SHA1_CTX(ctx)->sc_sha1_ctx, digest, SHA1_DIGEST_LENGTH, NULL); break; default: ret = CRYPTO_ARGUMENTS_BAD; } /* all done, free context and return */ if (ret == CRYPTO_SUCCESS) { digest->cd_length = SHA1_DIGEST_LENGTH; } else { digest->cd_length = 0; } kmem_free(ctx->cc_provider_private, sizeof (sha1_ctx_t)); ctx->cc_provider_private = NULL; return (ret); } -/* ARGSUSED */ static int sha1_digest_update(crypto_ctx_t *ctx, crypto_data_t *data, crypto_req_handle_t req) { + (void) req; int ret = CRYPTO_SUCCESS; ASSERT(ctx->cc_provider_private != NULL); /* * Do the SHA1 update on the specified input data. */ switch (data->cd_format) { case CRYPTO_DATA_RAW: SHA1Update(&PROV_SHA1_CTX(ctx)->sc_sha1_ctx, (uint8_t *)data->cd_raw.iov_base + data->cd_offset, data->cd_length); break; case CRYPTO_DATA_UIO: ret = sha1_digest_update_uio(&PROV_SHA1_CTX(ctx)->sc_sha1_ctx, data); break; default: ret = CRYPTO_ARGUMENTS_BAD; } return (ret); } -/* ARGSUSED */ static int sha1_digest_final(crypto_ctx_t *ctx, crypto_data_t *digest, crypto_req_handle_t req) { + (void) req; int ret = CRYPTO_SUCCESS; ASSERT(ctx->cc_provider_private != NULL); /* * We need to just return the length needed to store the output. * We should not destroy the context for the following cases. */ if ((digest->cd_length == 0) || (digest->cd_length < SHA1_DIGEST_LENGTH)) { digest->cd_length = SHA1_DIGEST_LENGTH; return (CRYPTO_BUFFER_TOO_SMALL); } /* * Do a SHA1 final. */ switch (digest->cd_format) { case CRYPTO_DATA_RAW: SHA1Final((unsigned char *)digest->cd_raw.iov_base + digest->cd_offset, &PROV_SHA1_CTX(ctx)->sc_sha1_ctx); break; case CRYPTO_DATA_UIO: ret = sha1_digest_final_uio(&PROV_SHA1_CTX(ctx)->sc_sha1_ctx, digest, SHA1_DIGEST_LENGTH, NULL); break; default: ret = CRYPTO_ARGUMENTS_BAD; } /* all done, free context and return */ if (ret == CRYPTO_SUCCESS) { digest->cd_length = SHA1_DIGEST_LENGTH; } else { digest->cd_length = 0; } kmem_free(ctx->cc_provider_private, sizeof (sha1_ctx_t)); ctx->cc_provider_private = NULL; return (ret); } -/* ARGSUSED */ static int sha1_digest_atomic(crypto_provider_handle_t provider, crypto_session_id_t session_id, crypto_mechanism_t *mechanism, crypto_data_t *data, crypto_data_t *digest, crypto_req_handle_t req) { + (void) provider, (void) session_id, (void) req; int ret = CRYPTO_SUCCESS; SHA1_CTX sha1_ctx; if (mechanism->cm_type != SHA1_MECH_INFO_TYPE) return (CRYPTO_MECHANISM_INVALID); /* * Do the SHA1 init. */ SHA1Init(&sha1_ctx); /* * Do the SHA1 update on the specified input data. */ switch (data->cd_format) { case CRYPTO_DATA_RAW: SHA1Update(&sha1_ctx, (uint8_t *)data->cd_raw.iov_base + data->cd_offset, data->cd_length); break; case CRYPTO_DATA_UIO: ret = sha1_digest_update_uio(&sha1_ctx, data); break; default: ret = CRYPTO_ARGUMENTS_BAD; } if (ret != CRYPTO_SUCCESS) { /* the update failed, bail */ digest->cd_length = 0; return (ret); } /* * Do a SHA1 final, must be done separately since the digest * type can be different than the input data type. */ switch (digest->cd_format) { case CRYPTO_DATA_RAW: SHA1Final((unsigned char *)digest->cd_raw.iov_base + digest->cd_offset, &sha1_ctx); break; case CRYPTO_DATA_UIO: ret = sha1_digest_final_uio(&sha1_ctx, digest, SHA1_DIGEST_LENGTH, NULL); break; default: ret = CRYPTO_ARGUMENTS_BAD; } if (ret == CRYPTO_SUCCESS) { digest->cd_length = SHA1_DIGEST_LENGTH; } else { digest->cd_length = 0; } return (ret); } /* * KCF software provider mac entry points. * * SHA1 HMAC is: SHA1(key XOR opad, SHA1(key XOR ipad, text)) * * Init: * The initialization routine initializes what we denote * as the inner and outer contexts by doing * - for inner context: SHA1(key XOR ipad) * - for outer context: SHA1(key XOR opad) * * Update: * Each subsequent SHA1 HMAC update will result in an * update of the inner context with the specified data. * * Final: * The SHA1 HMAC final will do a SHA1 final operation on the * inner context, and the resulting digest will be used * as the data for an update on the outer context. Last * but not least, a SHA1 final on the outer context will * be performed to obtain the SHA1 HMAC digest to return * to the user. */ /* * Initialize a SHA1-HMAC context. */ static void sha1_mac_init_ctx(sha1_hmac_ctx_t *ctx, void *keyval, uint_t length_in_bytes) { uint32_t ipad[SHA1_HMAC_INTS_PER_BLOCK]; uint32_t opad[SHA1_HMAC_INTS_PER_BLOCK]; uint_t i; bzero(ipad, SHA1_HMAC_BLOCK_SIZE); bzero(opad, SHA1_HMAC_BLOCK_SIZE); bcopy(keyval, ipad, length_in_bytes); bcopy(keyval, opad, length_in_bytes); /* XOR key with ipad (0x36) and opad (0x5c) */ for (i = 0; i < SHA1_HMAC_INTS_PER_BLOCK; i++) { ipad[i] ^= 0x36363636; opad[i] ^= 0x5c5c5c5c; } /* perform SHA1 on ipad */ SHA1Init(&ctx->hc_icontext); SHA1Update(&ctx->hc_icontext, (uint8_t *)ipad, SHA1_HMAC_BLOCK_SIZE); /* perform SHA1 on opad */ SHA1Init(&ctx->hc_ocontext); SHA1Update(&ctx->hc_ocontext, (uint8_t *)opad, SHA1_HMAC_BLOCK_SIZE); } /* */ static int sha1_mac_init(crypto_ctx_t *ctx, crypto_mechanism_t *mechanism, crypto_key_t *key, crypto_spi_ctx_template_t ctx_template, crypto_req_handle_t req) { int ret = CRYPTO_SUCCESS; uint_t keylen_in_bytes = CRYPTO_BITS2BYTES(key->ck_length); if (mechanism->cm_type != SHA1_HMAC_MECH_INFO_TYPE && mechanism->cm_type != SHA1_HMAC_GEN_MECH_INFO_TYPE) return (CRYPTO_MECHANISM_INVALID); /* Add support for key by attributes (RFE 4706552) */ if (key->ck_format != CRYPTO_KEY_RAW) return (CRYPTO_ARGUMENTS_BAD); ctx->cc_provider_private = kmem_alloc(sizeof (sha1_hmac_ctx_t), crypto_kmflag(req)); if (ctx->cc_provider_private == NULL) return (CRYPTO_HOST_MEMORY); if (ctx_template != NULL) { /* reuse context template */ bcopy(ctx_template, PROV_SHA1_HMAC_CTX(ctx), sizeof (sha1_hmac_ctx_t)); } else { /* no context template, compute context */ if (keylen_in_bytes > SHA1_HMAC_BLOCK_SIZE) { uchar_t digested_key[SHA1_DIGEST_LENGTH]; sha1_hmac_ctx_t *hmac_ctx = ctx->cc_provider_private; /* * Hash the passed-in key to get a smaller key. * The inner context is used since it hasn't been * initialized yet. */ PROV_SHA1_DIGEST_KEY(&hmac_ctx->hc_icontext, key->ck_data, keylen_in_bytes, digested_key); sha1_mac_init_ctx(PROV_SHA1_HMAC_CTX(ctx), digested_key, SHA1_DIGEST_LENGTH); } else { sha1_mac_init_ctx(PROV_SHA1_HMAC_CTX(ctx), key->ck_data, keylen_in_bytes); } } /* * Get the mechanism parameters, if applicable. */ PROV_SHA1_HMAC_CTX(ctx)->hc_mech_type = mechanism->cm_type; if (mechanism->cm_type == SHA1_HMAC_GEN_MECH_INFO_TYPE) { if (mechanism->cm_param == NULL || mechanism->cm_param_len != sizeof (ulong_t)) ret = CRYPTO_MECHANISM_PARAM_INVALID; PROV_SHA1_GET_DIGEST_LEN(mechanism, PROV_SHA1_HMAC_CTX(ctx)->hc_digest_len); if (PROV_SHA1_HMAC_CTX(ctx)->hc_digest_len > SHA1_DIGEST_LENGTH) ret = CRYPTO_MECHANISM_PARAM_INVALID; } if (ret != CRYPTO_SUCCESS) { bzero(ctx->cc_provider_private, sizeof (sha1_hmac_ctx_t)); kmem_free(ctx->cc_provider_private, sizeof (sha1_hmac_ctx_t)); ctx->cc_provider_private = NULL; } return (ret); } -/* ARGSUSED */ static int sha1_mac_update(crypto_ctx_t *ctx, crypto_data_t *data, crypto_req_handle_t req) { + (void) req; int ret = CRYPTO_SUCCESS; ASSERT(ctx->cc_provider_private != NULL); /* * Do a SHA1 update of the inner context using the specified * data. */ switch (data->cd_format) { case CRYPTO_DATA_RAW: SHA1Update(&PROV_SHA1_HMAC_CTX(ctx)->hc_icontext, (uint8_t *)data->cd_raw.iov_base + data->cd_offset, data->cd_length); break; case CRYPTO_DATA_UIO: ret = sha1_digest_update_uio( &PROV_SHA1_HMAC_CTX(ctx)->hc_icontext, data); break; default: ret = CRYPTO_ARGUMENTS_BAD; } return (ret); } -/* ARGSUSED */ static int sha1_mac_final(crypto_ctx_t *ctx, crypto_data_t *mac, crypto_req_handle_t req) { + (void) req; int ret = CRYPTO_SUCCESS; uchar_t digest[SHA1_DIGEST_LENGTH]; uint32_t digest_len = SHA1_DIGEST_LENGTH; ASSERT(ctx->cc_provider_private != NULL); if (PROV_SHA1_HMAC_CTX(ctx)->hc_mech_type == SHA1_HMAC_GEN_MECH_INFO_TYPE) digest_len = PROV_SHA1_HMAC_CTX(ctx)->hc_digest_len; /* * We need to just return the length needed to store the output. * We should not destroy the context for the following cases. */ if ((mac->cd_length == 0) || (mac->cd_length < digest_len)) { mac->cd_length = digest_len; return (CRYPTO_BUFFER_TOO_SMALL); } /* * Do a SHA1 final on the inner context. */ SHA1Final(digest, &PROV_SHA1_HMAC_CTX(ctx)->hc_icontext); /* * Do a SHA1 update on the outer context, feeding the inner * digest as data. */ SHA1Update(&PROV_SHA1_HMAC_CTX(ctx)->hc_ocontext, digest, SHA1_DIGEST_LENGTH); /* * Do a SHA1 final on the outer context, storing the computing * digest in the users buffer. */ switch (mac->cd_format) { case CRYPTO_DATA_RAW: if (digest_len != SHA1_DIGEST_LENGTH) { /* * The caller requested a short digest. Digest * into a scratch buffer and return to * the user only what was requested. */ SHA1Final(digest, &PROV_SHA1_HMAC_CTX(ctx)->hc_ocontext); bcopy(digest, (unsigned char *)mac->cd_raw.iov_base + mac->cd_offset, digest_len); } else { SHA1Final((unsigned char *)mac->cd_raw.iov_base + mac->cd_offset, &PROV_SHA1_HMAC_CTX(ctx)->hc_ocontext); } break; case CRYPTO_DATA_UIO: ret = sha1_digest_final_uio( &PROV_SHA1_HMAC_CTX(ctx)->hc_ocontext, mac, digest_len, digest); break; default: ret = CRYPTO_ARGUMENTS_BAD; } if (ret == CRYPTO_SUCCESS) { mac->cd_length = digest_len; } else { mac->cd_length = 0; } bzero(ctx->cc_provider_private, sizeof (sha1_hmac_ctx_t)); kmem_free(ctx->cc_provider_private, sizeof (sha1_hmac_ctx_t)); ctx->cc_provider_private = NULL; return (ret); } #define SHA1_MAC_UPDATE(data, ctx, ret) { \ switch (data->cd_format) { \ case CRYPTO_DATA_RAW: \ SHA1Update(&(ctx).hc_icontext, \ (uint8_t *)data->cd_raw.iov_base + \ data->cd_offset, data->cd_length); \ break; \ case CRYPTO_DATA_UIO: \ ret = sha1_digest_update_uio(&(ctx).hc_icontext, data); \ break; \ default: \ ret = CRYPTO_ARGUMENTS_BAD; \ } \ } -/* ARGSUSED */ static int sha1_mac_atomic(crypto_provider_handle_t provider, crypto_session_id_t session_id, crypto_mechanism_t *mechanism, crypto_key_t *key, crypto_data_t *data, crypto_data_t *mac, crypto_spi_ctx_template_t ctx_template, crypto_req_handle_t req) { + (void) provider, (void) session_id, (void) req; int ret = CRYPTO_SUCCESS; uchar_t digest[SHA1_DIGEST_LENGTH]; sha1_hmac_ctx_t sha1_hmac_ctx; uint32_t digest_len = SHA1_DIGEST_LENGTH; uint_t keylen_in_bytes = CRYPTO_BITS2BYTES(key->ck_length); if (mechanism->cm_type != SHA1_HMAC_MECH_INFO_TYPE && mechanism->cm_type != SHA1_HMAC_GEN_MECH_INFO_TYPE) return (CRYPTO_MECHANISM_INVALID); /* Add support for key by attributes (RFE 4706552) */ if (key->ck_format != CRYPTO_KEY_RAW) return (CRYPTO_ARGUMENTS_BAD); if (ctx_template != NULL) { /* reuse context template */ bcopy(ctx_template, &sha1_hmac_ctx, sizeof (sha1_hmac_ctx_t)); } else { /* no context template, initialize context */ if (keylen_in_bytes > SHA1_HMAC_BLOCK_SIZE) { /* * Hash the passed-in key to get a smaller key. * The inner context is used since it hasn't been * initialized yet. */ PROV_SHA1_DIGEST_KEY(&sha1_hmac_ctx.hc_icontext, key->ck_data, keylen_in_bytes, digest); sha1_mac_init_ctx(&sha1_hmac_ctx, digest, SHA1_DIGEST_LENGTH); } else { sha1_mac_init_ctx(&sha1_hmac_ctx, key->ck_data, keylen_in_bytes); } } /* get the mechanism parameters, if applicable */ if (mechanism->cm_type == SHA1_HMAC_GEN_MECH_INFO_TYPE) { if (mechanism->cm_param == NULL || mechanism->cm_param_len != sizeof (ulong_t)) { ret = CRYPTO_MECHANISM_PARAM_INVALID; goto bail; } PROV_SHA1_GET_DIGEST_LEN(mechanism, digest_len); if (digest_len > SHA1_DIGEST_LENGTH) { ret = CRYPTO_MECHANISM_PARAM_INVALID; goto bail; } } /* do a SHA1 update of the inner context using the specified data */ SHA1_MAC_UPDATE(data, sha1_hmac_ctx, ret); if (ret != CRYPTO_SUCCESS) /* the update failed, free context and bail */ goto bail; /* * Do a SHA1 final on the inner context. */ SHA1Final(digest, &sha1_hmac_ctx.hc_icontext); /* * Do an SHA1 update on the outer context, feeding the inner * digest as data. */ SHA1Update(&sha1_hmac_ctx.hc_ocontext, digest, SHA1_DIGEST_LENGTH); /* * Do a SHA1 final on the outer context, storing the computed * digest in the users buffer. */ switch (mac->cd_format) { case CRYPTO_DATA_RAW: if (digest_len != SHA1_DIGEST_LENGTH) { /* * The caller requested a short digest. Digest * into a scratch buffer and return to * the user only what was requested. */ SHA1Final(digest, &sha1_hmac_ctx.hc_ocontext); bcopy(digest, (unsigned char *)mac->cd_raw.iov_base + mac->cd_offset, digest_len); } else { SHA1Final((unsigned char *)mac->cd_raw.iov_base + mac->cd_offset, &sha1_hmac_ctx.hc_ocontext); } break; case CRYPTO_DATA_UIO: ret = sha1_digest_final_uio(&sha1_hmac_ctx.hc_ocontext, mac, digest_len, digest); break; default: ret = CRYPTO_ARGUMENTS_BAD; } if (ret == CRYPTO_SUCCESS) { mac->cd_length = digest_len; } else { mac->cd_length = 0; } /* Extra paranoia: zeroize the context on the stack */ bzero(&sha1_hmac_ctx, sizeof (sha1_hmac_ctx_t)); return (ret); bail: bzero(&sha1_hmac_ctx, sizeof (sha1_hmac_ctx_t)); mac->cd_length = 0; return (ret); } -/* ARGSUSED */ static int sha1_mac_verify_atomic(crypto_provider_handle_t provider, crypto_session_id_t session_id, crypto_mechanism_t *mechanism, crypto_key_t *key, crypto_data_t *data, crypto_data_t *mac, crypto_spi_ctx_template_t ctx_template, crypto_req_handle_t req) { + (void) provider, (void) session_id, (void) req; int ret = CRYPTO_SUCCESS; uchar_t digest[SHA1_DIGEST_LENGTH]; sha1_hmac_ctx_t sha1_hmac_ctx; uint32_t digest_len = SHA1_DIGEST_LENGTH; uint_t keylen_in_bytes = CRYPTO_BITS2BYTES(key->ck_length); if (mechanism->cm_type != SHA1_HMAC_MECH_INFO_TYPE && mechanism->cm_type != SHA1_HMAC_GEN_MECH_INFO_TYPE) return (CRYPTO_MECHANISM_INVALID); /* Add support for key by attributes (RFE 4706552) */ if (key->ck_format != CRYPTO_KEY_RAW) return (CRYPTO_ARGUMENTS_BAD); if (ctx_template != NULL) { /* reuse context template */ bcopy(ctx_template, &sha1_hmac_ctx, sizeof (sha1_hmac_ctx_t)); } else { /* no context template, initialize context */ if (keylen_in_bytes > SHA1_HMAC_BLOCK_SIZE) { /* * Hash the passed-in key to get a smaller key. * The inner context is used since it hasn't been * initialized yet. */ PROV_SHA1_DIGEST_KEY(&sha1_hmac_ctx.hc_icontext, key->ck_data, keylen_in_bytes, digest); sha1_mac_init_ctx(&sha1_hmac_ctx, digest, SHA1_DIGEST_LENGTH); } else { sha1_mac_init_ctx(&sha1_hmac_ctx, key->ck_data, keylen_in_bytes); } } /* get the mechanism parameters, if applicable */ if (mechanism->cm_type == SHA1_HMAC_GEN_MECH_INFO_TYPE) { if (mechanism->cm_param == NULL || mechanism->cm_param_len != sizeof (ulong_t)) { ret = CRYPTO_MECHANISM_PARAM_INVALID; goto bail; } PROV_SHA1_GET_DIGEST_LEN(mechanism, digest_len); if (digest_len > SHA1_DIGEST_LENGTH) { ret = CRYPTO_MECHANISM_PARAM_INVALID; goto bail; } } if (mac->cd_length != digest_len) { ret = CRYPTO_INVALID_MAC; goto bail; } /* do a SHA1 update of the inner context using the specified data */ SHA1_MAC_UPDATE(data, sha1_hmac_ctx, ret); if (ret != CRYPTO_SUCCESS) /* the update failed, free context and bail */ goto bail; /* do a SHA1 final on the inner context */ SHA1Final(digest, &sha1_hmac_ctx.hc_icontext); /* * Do an SHA1 update on the outer context, feeding the inner * digest as data. */ SHA1Update(&sha1_hmac_ctx.hc_ocontext, digest, SHA1_DIGEST_LENGTH); /* * Do a SHA1 final on the outer context, storing the computed * digest in the users buffer. */ SHA1Final(digest, &sha1_hmac_ctx.hc_ocontext); /* * Compare the computed digest against the expected digest passed * as argument. */ switch (mac->cd_format) { case CRYPTO_DATA_RAW: if (bcmp(digest, (unsigned char *)mac->cd_raw.iov_base + mac->cd_offset, digest_len) != 0) ret = CRYPTO_INVALID_MAC; break; case CRYPTO_DATA_UIO: { off_t offset = mac->cd_offset; uint_t vec_idx = 0; off_t scratch_offset = 0; size_t length = digest_len; size_t cur_len; /* we support only kernel buffer */ if (zfs_uio_segflg(mac->cd_uio) != UIO_SYSSPACE) return (CRYPTO_ARGUMENTS_BAD); /* jump to the first iovec containing the expected digest */ offset = zfs_uio_index_at_offset(mac->cd_uio, offset, &vec_idx); if (vec_idx == zfs_uio_iovcnt(mac->cd_uio)) { /* * The caller specified an offset that is * larger than the total size of the buffers * it provided. */ ret = CRYPTO_DATA_LEN_RANGE; break; } /* do the comparison of computed digest vs specified one */ while (vec_idx < zfs_uio_iovcnt(mac->cd_uio) && length > 0) { cur_len = MIN(zfs_uio_iovlen(mac->cd_uio, vec_idx) - offset, length); if (bcmp(digest + scratch_offset, zfs_uio_iovbase(mac->cd_uio, vec_idx) + offset, cur_len) != 0) { ret = CRYPTO_INVALID_MAC; break; } length -= cur_len; vec_idx++; scratch_offset += cur_len; offset = 0; } break; } default: ret = CRYPTO_ARGUMENTS_BAD; } bzero(&sha1_hmac_ctx, sizeof (sha1_hmac_ctx_t)); return (ret); bail: bzero(&sha1_hmac_ctx, sizeof (sha1_hmac_ctx_t)); mac->cd_length = 0; return (ret); } /* * KCF software provider context management entry points. */ -/* ARGSUSED */ static int sha1_create_ctx_template(crypto_provider_handle_t provider, crypto_mechanism_t *mechanism, crypto_key_t *key, crypto_spi_ctx_template_t *ctx_template, size_t *ctx_template_size, crypto_req_handle_t req) { + (void) provider; sha1_hmac_ctx_t *sha1_hmac_ctx_tmpl; uint_t keylen_in_bytes = CRYPTO_BITS2BYTES(key->ck_length); if ((mechanism->cm_type != SHA1_HMAC_MECH_INFO_TYPE) && (mechanism->cm_type != SHA1_HMAC_GEN_MECH_INFO_TYPE)) { return (CRYPTO_MECHANISM_INVALID); } /* Add support for key by attributes (RFE 4706552) */ if (key->ck_format != CRYPTO_KEY_RAW) return (CRYPTO_ARGUMENTS_BAD); /* * Allocate and initialize SHA1 context. */ sha1_hmac_ctx_tmpl = kmem_alloc(sizeof (sha1_hmac_ctx_t), crypto_kmflag(req)); if (sha1_hmac_ctx_tmpl == NULL) return (CRYPTO_HOST_MEMORY); if (keylen_in_bytes > SHA1_HMAC_BLOCK_SIZE) { uchar_t digested_key[SHA1_DIGEST_LENGTH]; /* * Hash the passed-in key to get a smaller key. * The inner context is used since it hasn't been * initialized yet. */ PROV_SHA1_DIGEST_KEY(&sha1_hmac_ctx_tmpl->hc_icontext, key->ck_data, keylen_in_bytes, digested_key); sha1_mac_init_ctx(sha1_hmac_ctx_tmpl, digested_key, SHA1_DIGEST_LENGTH); } else { sha1_mac_init_ctx(sha1_hmac_ctx_tmpl, key->ck_data, keylen_in_bytes); } sha1_hmac_ctx_tmpl->hc_mech_type = mechanism->cm_type; *ctx_template = (crypto_spi_ctx_template_t)sha1_hmac_ctx_tmpl; *ctx_template_size = sizeof (sha1_hmac_ctx_t); return (CRYPTO_SUCCESS); } static int sha1_free_context(crypto_ctx_t *ctx) { uint_t ctx_len; sha1_mech_type_t mech_type; if (ctx->cc_provider_private == NULL) return (CRYPTO_SUCCESS); /* * We have to free either SHA1 or SHA1-HMAC contexts, which * have different lengths. */ mech_type = PROV_SHA1_CTX(ctx)->sc_mech_type; if (mech_type == SHA1_MECH_INFO_TYPE) ctx_len = sizeof (sha1_ctx_t); else { ASSERT(mech_type == SHA1_HMAC_MECH_INFO_TYPE || mech_type == SHA1_HMAC_GEN_MECH_INFO_TYPE); ctx_len = sizeof (sha1_hmac_ctx_t); } bzero(ctx->cc_provider_private, ctx_len); kmem_free(ctx->cc_provider_private, ctx_len); ctx->cc_provider_private = NULL; return (CRYPTO_SUCCESS); } diff --git a/module/icp/io/sha2_mod.c b/module/icp/io/sha2_mod.c index d690cd0bcb05..77957ee114d9 100644 --- a/module/icp/io/sha2_mod.c +++ b/module/icp/io/sha2_mod.c @@ -1,1399 +1,1399 @@ /* * CDDL HEADER START * * The contents of this file are subject to the terms of the * Common Development and Distribution License (the "License"). * You may not use this file except in compliance with the License. * * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE * or http://www.opensolaris.org/os/licensing. * See the License for the specific language governing permissions * and limitations under the License. * * When distributing Covered Code, include this CDDL HEADER in each * file and include the License file at usr/src/OPENSOLARIS.LICENSE. * If applicable, add the following below this CDDL HEADER, with the * fields enclosed by brackets "[]" replaced with your own identifying * information: Portions Copyright [yyyy] [name of copyright owner] * * CDDL HEADER END */ /* * Copyright 2010 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ #include #include #include #include #include #define _SHA2_IMPL #include #include /* * The sha2 module is created with two modlinkages: * - a modlmisc that allows consumers to directly call the entry points * SHA2Init, SHA2Update, and SHA2Final. * - a modlcrypto that allows the module to register with the Kernel * Cryptographic Framework (KCF) as a software provider for the SHA2 * mechanisms. */ static struct modlcrypto modlcrypto = { &mod_cryptoops, "SHA2 Kernel SW Provider" }; static struct modlinkage modlinkage = { MODREV_1, {&modlcrypto, NULL} }; /* * Macros to access the SHA2 or SHA2-HMAC contexts from a context passed * by KCF to one of the entry points. */ #define PROV_SHA2_CTX(ctx) ((sha2_ctx_t *)(ctx)->cc_provider_private) #define PROV_SHA2_HMAC_CTX(ctx) ((sha2_hmac_ctx_t *)(ctx)->cc_provider_private) /* to extract the digest length passed as mechanism parameter */ #define PROV_SHA2_GET_DIGEST_LEN(m, len) { \ if (IS_P2ALIGNED((m)->cm_param, sizeof (ulong_t))) \ (len) = (uint32_t)*((ulong_t *)(m)->cm_param); \ else { \ ulong_t tmp_ulong; \ bcopy((m)->cm_param, &tmp_ulong, sizeof (ulong_t)); \ (len) = (uint32_t)tmp_ulong; \ } \ } #define PROV_SHA2_DIGEST_KEY(mech, ctx, key, len, digest) { \ SHA2Init(mech, ctx); \ SHA2Update(ctx, key, len); \ SHA2Final(digest, ctx); \ } /* * Mechanism info structure passed to KCF during registration. */ static crypto_mech_info_t sha2_mech_info_tab[] = { /* SHA256 */ {SUN_CKM_SHA256, SHA256_MECH_INFO_TYPE, CRYPTO_FG_DIGEST | CRYPTO_FG_DIGEST_ATOMIC, 0, 0, CRYPTO_KEYSIZE_UNIT_IN_BITS}, /* SHA256-HMAC */ {SUN_CKM_SHA256_HMAC, SHA256_HMAC_MECH_INFO_TYPE, CRYPTO_FG_MAC | CRYPTO_FG_MAC_ATOMIC, SHA2_HMAC_MIN_KEY_LEN, SHA2_HMAC_MAX_KEY_LEN, CRYPTO_KEYSIZE_UNIT_IN_BYTES}, /* SHA256-HMAC GENERAL */ {SUN_CKM_SHA256_HMAC_GENERAL, SHA256_HMAC_GEN_MECH_INFO_TYPE, CRYPTO_FG_MAC | CRYPTO_FG_MAC_ATOMIC, SHA2_HMAC_MIN_KEY_LEN, SHA2_HMAC_MAX_KEY_LEN, CRYPTO_KEYSIZE_UNIT_IN_BYTES}, /* SHA384 */ {SUN_CKM_SHA384, SHA384_MECH_INFO_TYPE, CRYPTO_FG_DIGEST | CRYPTO_FG_DIGEST_ATOMIC, 0, 0, CRYPTO_KEYSIZE_UNIT_IN_BITS}, /* SHA384-HMAC */ {SUN_CKM_SHA384_HMAC, SHA384_HMAC_MECH_INFO_TYPE, CRYPTO_FG_MAC | CRYPTO_FG_MAC_ATOMIC, SHA2_HMAC_MIN_KEY_LEN, SHA2_HMAC_MAX_KEY_LEN, CRYPTO_KEYSIZE_UNIT_IN_BYTES}, /* SHA384-HMAC GENERAL */ {SUN_CKM_SHA384_HMAC_GENERAL, SHA384_HMAC_GEN_MECH_INFO_TYPE, CRYPTO_FG_MAC | CRYPTO_FG_MAC_ATOMIC, SHA2_HMAC_MIN_KEY_LEN, SHA2_HMAC_MAX_KEY_LEN, CRYPTO_KEYSIZE_UNIT_IN_BYTES}, /* SHA512 */ {SUN_CKM_SHA512, SHA512_MECH_INFO_TYPE, CRYPTO_FG_DIGEST | CRYPTO_FG_DIGEST_ATOMIC, 0, 0, CRYPTO_KEYSIZE_UNIT_IN_BITS}, /* SHA512-HMAC */ {SUN_CKM_SHA512_HMAC, SHA512_HMAC_MECH_INFO_TYPE, CRYPTO_FG_MAC | CRYPTO_FG_MAC_ATOMIC, SHA2_HMAC_MIN_KEY_LEN, SHA2_HMAC_MAX_KEY_LEN, CRYPTO_KEYSIZE_UNIT_IN_BYTES}, /* SHA512-HMAC GENERAL */ {SUN_CKM_SHA512_HMAC_GENERAL, SHA512_HMAC_GEN_MECH_INFO_TYPE, CRYPTO_FG_MAC | CRYPTO_FG_MAC_ATOMIC, SHA2_HMAC_MIN_KEY_LEN, SHA2_HMAC_MAX_KEY_LEN, CRYPTO_KEYSIZE_UNIT_IN_BYTES} }; static void sha2_provider_status(crypto_provider_handle_t, uint_t *); static crypto_control_ops_t sha2_control_ops = { sha2_provider_status }; static int sha2_digest_init(crypto_ctx_t *, crypto_mechanism_t *, crypto_req_handle_t); static int sha2_digest(crypto_ctx_t *, crypto_data_t *, crypto_data_t *, crypto_req_handle_t); static int sha2_digest_update(crypto_ctx_t *, crypto_data_t *, crypto_req_handle_t); static int sha2_digest_final(crypto_ctx_t *, crypto_data_t *, crypto_req_handle_t); static int sha2_digest_atomic(crypto_provider_handle_t, crypto_session_id_t, crypto_mechanism_t *, crypto_data_t *, crypto_data_t *, crypto_req_handle_t); static crypto_digest_ops_t sha2_digest_ops = { .digest_init = sha2_digest_init, .digest = sha2_digest, .digest_update = sha2_digest_update, .digest_key = NULL, .digest_final = sha2_digest_final, .digest_atomic = sha2_digest_atomic }; static int sha2_mac_init(crypto_ctx_t *, crypto_mechanism_t *, crypto_key_t *, crypto_spi_ctx_template_t, crypto_req_handle_t); static int sha2_mac_update(crypto_ctx_t *, crypto_data_t *, crypto_req_handle_t); static int sha2_mac_final(crypto_ctx_t *, crypto_data_t *, crypto_req_handle_t); static int sha2_mac_atomic(crypto_provider_handle_t, crypto_session_id_t, crypto_mechanism_t *, crypto_key_t *, crypto_data_t *, crypto_data_t *, crypto_spi_ctx_template_t, crypto_req_handle_t); static int sha2_mac_verify_atomic(crypto_provider_handle_t, crypto_session_id_t, crypto_mechanism_t *, crypto_key_t *, crypto_data_t *, crypto_data_t *, crypto_spi_ctx_template_t, crypto_req_handle_t); static crypto_mac_ops_t sha2_mac_ops = { .mac_init = sha2_mac_init, .mac = NULL, .mac_update = sha2_mac_update, .mac_final = sha2_mac_final, .mac_atomic = sha2_mac_atomic, .mac_verify_atomic = sha2_mac_verify_atomic }; static int sha2_create_ctx_template(crypto_provider_handle_t, crypto_mechanism_t *, crypto_key_t *, crypto_spi_ctx_template_t *, size_t *, crypto_req_handle_t); static int sha2_free_context(crypto_ctx_t *); static crypto_ctx_ops_t sha2_ctx_ops = { .create_ctx_template = sha2_create_ctx_template, .free_context = sha2_free_context }; static crypto_ops_t sha2_crypto_ops = {{{{{ &sha2_control_ops, &sha2_digest_ops, NULL, &sha2_mac_ops, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, &sha2_ctx_ops }}}}}; static crypto_provider_info_t sha2_prov_info = {{{{ CRYPTO_SPI_VERSION_1, "SHA2 Software Provider", CRYPTO_SW_PROVIDER, NULL, &sha2_crypto_ops, sizeof (sha2_mech_info_tab)/sizeof (crypto_mech_info_t), sha2_mech_info_tab }}}}; static crypto_kcf_provider_handle_t sha2_prov_handle = 0; int sha2_mod_init(void) { int ret; if ((ret = mod_install(&modlinkage)) != 0) return (ret); /* * Register with KCF. If the registration fails, log an * error but do not uninstall the module, since the functionality * provided by misc/sha2 should still be available. */ if ((ret = crypto_register_provider(&sha2_prov_info, &sha2_prov_handle)) != CRYPTO_SUCCESS) cmn_err(CE_WARN, "sha2 _init: " "crypto_register_provider() failed (0x%x)", ret); return (0); } int sha2_mod_fini(void) { int ret; if (sha2_prov_handle != 0) { if ((ret = crypto_unregister_provider(sha2_prov_handle)) != CRYPTO_SUCCESS) { cmn_err(CE_WARN, "sha2 _fini: crypto_unregister_provider() " "failed (0x%x)", ret); return (EBUSY); } sha2_prov_handle = 0; } return (mod_remove(&modlinkage)); } /* * KCF software provider control entry points. */ -/* ARGSUSED */ static void sha2_provider_status(crypto_provider_handle_t provider, uint_t *status) { + (void) provider; *status = CRYPTO_PROVIDER_READY; } /* * KCF software provider digest entry points. */ static int sha2_digest_init(crypto_ctx_t *ctx, crypto_mechanism_t *mechanism, crypto_req_handle_t req) { /* * Allocate and initialize SHA2 context. */ ctx->cc_provider_private = kmem_alloc(sizeof (sha2_ctx_t), crypto_kmflag(req)); if (ctx->cc_provider_private == NULL) return (CRYPTO_HOST_MEMORY); PROV_SHA2_CTX(ctx)->sc_mech_type = mechanism->cm_type; SHA2Init(mechanism->cm_type, &PROV_SHA2_CTX(ctx)->sc_sha2_ctx); return (CRYPTO_SUCCESS); } /* * Helper SHA2 digest update function for uio data. */ static int sha2_digest_update_uio(SHA2_CTX *sha2_ctx, crypto_data_t *data) { off_t offset = data->cd_offset; size_t length = data->cd_length; uint_t vec_idx = 0; size_t cur_len; /* we support only kernel buffer */ if (zfs_uio_segflg(data->cd_uio) != UIO_SYSSPACE) return (CRYPTO_ARGUMENTS_BAD); /* * Jump to the first iovec containing data to be * digested. */ offset = zfs_uio_index_at_offset(data->cd_uio, offset, &vec_idx); if (vec_idx == zfs_uio_iovcnt(data->cd_uio)) { /* * The caller specified an offset that is larger than the * total size of the buffers it provided. */ return (CRYPTO_DATA_LEN_RANGE); } /* * Now do the digesting on the iovecs. */ while (vec_idx < zfs_uio_iovcnt(data->cd_uio) && length > 0) { cur_len = MIN(zfs_uio_iovlen(data->cd_uio, vec_idx) - offset, length); SHA2Update(sha2_ctx, (uint8_t *)zfs_uio_iovbase(data->cd_uio, vec_idx) + offset, cur_len); length -= cur_len; vec_idx++; offset = 0; } if (vec_idx == zfs_uio_iovcnt(data->cd_uio) && length > 0) { /* * The end of the specified iovec's was reached but * the length requested could not be processed, i.e. * The caller requested to digest more data than it provided. */ return (CRYPTO_DATA_LEN_RANGE); } return (CRYPTO_SUCCESS); } /* * Helper SHA2 digest final function for uio data. * digest_len is the length of the desired digest. If digest_len * is smaller than the default SHA2 digest length, the caller * must pass a scratch buffer, digest_scratch, which must * be at least the algorithm's digest length bytes. */ static int sha2_digest_final_uio(SHA2_CTX *sha2_ctx, crypto_data_t *digest, ulong_t digest_len, uchar_t *digest_scratch) { off_t offset = digest->cd_offset; uint_t vec_idx = 0; /* we support only kernel buffer */ if (zfs_uio_segflg(digest->cd_uio) != UIO_SYSSPACE) return (CRYPTO_ARGUMENTS_BAD); /* * Jump to the first iovec containing ptr to the digest to * be returned. */ offset = zfs_uio_index_at_offset(digest->cd_uio, offset, &vec_idx); if (vec_idx == zfs_uio_iovcnt(digest->cd_uio)) { /* * The caller specified an offset that is * larger than the total size of the buffers * it provided. */ return (CRYPTO_DATA_LEN_RANGE); } if (offset + digest_len <= zfs_uio_iovlen(digest->cd_uio, vec_idx)) { /* * The computed SHA2 digest will fit in the current * iovec. */ if (((sha2_ctx->algotype <= SHA256_HMAC_GEN_MECH_INFO_TYPE) && (digest_len != SHA256_DIGEST_LENGTH)) || ((sha2_ctx->algotype > SHA256_HMAC_GEN_MECH_INFO_TYPE) && (digest_len != SHA512_DIGEST_LENGTH))) { /* * The caller requested a short digest. Digest * into a scratch buffer and return to * the user only what was requested. */ SHA2Final(digest_scratch, sha2_ctx); bcopy(digest_scratch, (uchar_t *) zfs_uio_iovbase(digest->cd_uio, vec_idx) + offset, digest_len); } else { SHA2Final((uchar_t *)zfs_uio_iovbase(digest-> cd_uio, vec_idx) + offset, sha2_ctx); } } else { /* * The computed digest will be crossing one or more iovec's. * This is bad performance-wise but we need to support it. * Allocate a small scratch buffer on the stack and * copy it piece meal to the specified digest iovec's. */ uchar_t digest_tmp[SHA512_DIGEST_LENGTH]; off_t scratch_offset = 0; size_t length = digest_len; size_t cur_len; SHA2Final(digest_tmp, sha2_ctx); while (vec_idx < zfs_uio_iovcnt(digest->cd_uio) && length > 0) { cur_len = MIN(zfs_uio_iovlen(digest->cd_uio, vec_idx) - offset, length); bcopy(digest_tmp + scratch_offset, zfs_uio_iovbase(digest->cd_uio, vec_idx) + offset, cur_len); length -= cur_len; vec_idx++; scratch_offset += cur_len; offset = 0; } if (vec_idx == zfs_uio_iovcnt(digest->cd_uio) && length > 0) { /* * The end of the specified iovec's was reached but * the length requested could not be processed, i.e. * The caller requested to digest more data than it * provided. */ return (CRYPTO_DATA_LEN_RANGE); } } return (CRYPTO_SUCCESS); } -/* ARGSUSED */ static int sha2_digest(crypto_ctx_t *ctx, crypto_data_t *data, crypto_data_t *digest, crypto_req_handle_t req) { + (void) req; int ret = CRYPTO_SUCCESS; uint_t sha_digest_len; ASSERT(ctx->cc_provider_private != NULL); switch (PROV_SHA2_CTX(ctx)->sc_mech_type) { case SHA256_MECH_INFO_TYPE: sha_digest_len = SHA256_DIGEST_LENGTH; break; case SHA384_MECH_INFO_TYPE: sha_digest_len = SHA384_DIGEST_LENGTH; break; case SHA512_MECH_INFO_TYPE: sha_digest_len = SHA512_DIGEST_LENGTH; break; default: return (CRYPTO_MECHANISM_INVALID); } /* * We need to just return the length needed to store the output. * We should not destroy the context for the following cases. */ if ((digest->cd_length == 0) || (digest->cd_length < sha_digest_len)) { digest->cd_length = sha_digest_len; return (CRYPTO_BUFFER_TOO_SMALL); } /* * Do the SHA2 update on the specified input data. */ switch (data->cd_format) { case CRYPTO_DATA_RAW: SHA2Update(&PROV_SHA2_CTX(ctx)->sc_sha2_ctx, (uint8_t *)data->cd_raw.iov_base + data->cd_offset, data->cd_length); break; case CRYPTO_DATA_UIO: ret = sha2_digest_update_uio(&PROV_SHA2_CTX(ctx)->sc_sha2_ctx, data); break; default: ret = CRYPTO_ARGUMENTS_BAD; } if (ret != CRYPTO_SUCCESS) { /* the update failed, free context and bail */ kmem_free(ctx->cc_provider_private, sizeof (sha2_ctx_t)); ctx->cc_provider_private = NULL; digest->cd_length = 0; return (ret); } /* * Do a SHA2 final, must be done separately since the digest * type can be different than the input data type. */ switch (digest->cd_format) { case CRYPTO_DATA_RAW: SHA2Final((unsigned char *)digest->cd_raw.iov_base + digest->cd_offset, &PROV_SHA2_CTX(ctx)->sc_sha2_ctx); break; case CRYPTO_DATA_UIO: ret = sha2_digest_final_uio(&PROV_SHA2_CTX(ctx)->sc_sha2_ctx, digest, sha_digest_len, NULL); break; default: ret = CRYPTO_ARGUMENTS_BAD; } /* all done, free context and return */ if (ret == CRYPTO_SUCCESS) digest->cd_length = sha_digest_len; else digest->cd_length = 0; kmem_free(ctx->cc_provider_private, sizeof (sha2_ctx_t)); ctx->cc_provider_private = NULL; return (ret); } -/* ARGSUSED */ static int sha2_digest_update(crypto_ctx_t *ctx, crypto_data_t *data, crypto_req_handle_t req) { + (void) req; int ret = CRYPTO_SUCCESS; ASSERT(ctx->cc_provider_private != NULL); /* * Do the SHA2 update on the specified input data. */ switch (data->cd_format) { case CRYPTO_DATA_RAW: SHA2Update(&PROV_SHA2_CTX(ctx)->sc_sha2_ctx, (uint8_t *)data->cd_raw.iov_base + data->cd_offset, data->cd_length); break; case CRYPTO_DATA_UIO: ret = sha2_digest_update_uio(&PROV_SHA2_CTX(ctx)->sc_sha2_ctx, data); break; default: ret = CRYPTO_ARGUMENTS_BAD; } return (ret); } -/* ARGSUSED */ static int sha2_digest_final(crypto_ctx_t *ctx, crypto_data_t *digest, crypto_req_handle_t req) { + (void) req; int ret = CRYPTO_SUCCESS; uint_t sha_digest_len; ASSERT(ctx->cc_provider_private != NULL); switch (PROV_SHA2_CTX(ctx)->sc_mech_type) { case SHA256_MECH_INFO_TYPE: sha_digest_len = SHA256_DIGEST_LENGTH; break; case SHA384_MECH_INFO_TYPE: sha_digest_len = SHA384_DIGEST_LENGTH; break; case SHA512_MECH_INFO_TYPE: sha_digest_len = SHA512_DIGEST_LENGTH; break; default: return (CRYPTO_MECHANISM_INVALID); } /* * We need to just return the length needed to store the output. * We should not destroy the context for the following cases. */ if ((digest->cd_length == 0) || (digest->cd_length < sha_digest_len)) { digest->cd_length = sha_digest_len; return (CRYPTO_BUFFER_TOO_SMALL); } /* * Do a SHA2 final. */ switch (digest->cd_format) { case CRYPTO_DATA_RAW: SHA2Final((unsigned char *)digest->cd_raw.iov_base + digest->cd_offset, &PROV_SHA2_CTX(ctx)->sc_sha2_ctx); break; case CRYPTO_DATA_UIO: ret = sha2_digest_final_uio(&PROV_SHA2_CTX(ctx)->sc_sha2_ctx, digest, sha_digest_len, NULL); break; default: ret = CRYPTO_ARGUMENTS_BAD; } /* all done, free context and return */ if (ret == CRYPTO_SUCCESS) digest->cd_length = sha_digest_len; else digest->cd_length = 0; kmem_free(ctx->cc_provider_private, sizeof (sha2_ctx_t)); ctx->cc_provider_private = NULL; return (ret); } -/* ARGSUSED */ static int sha2_digest_atomic(crypto_provider_handle_t provider, crypto_session_id_t session_id, crypto_mechanism_t *mechanism, crypto_data_t *data, crypto_data_t *digest, crypto_req_handle_t req) { + (void) provider, (void) session_id, (void) req; int ret = CRYPTO_SUCCESS; SHA2_CTX sha2_ctx; uint32_t sha_digest_len; /* * Do the SHA inits. */ SHA2Init(mechanism->cm_type, &sha2_ctx); switch (data->cd_format) { case CRYPTO_DATA_RAW: SHA2Update(&sha2_ctx, (uint8_t *)data-> cd_raw.iov_base + data->cd_offset, data->cd_length); break; case CRYPTO_DATA_UIO: ret = sha2_digest_update_uio(&sha2_ctx, data); break; default: ret = CRYPTO_ARGUMENTS_BAD; } /* * Do the SHA updates on the specified input data. */ if (ret != CRYPTO_SUCCESS) { /* the update failed, bail */ digest->cd_length = 0; return (ret); } if (mechanism->cm_type <= SHA256_HMAC_GEN_MECH_INFO_TYPE) sha_digest_len = SHA256_DIGEST_LENGTH; else sha_digest_len = SHA512_DIGEST_LENGTH; /* * Do a SHA2 final, must be done separately since the digest * type can be different than the input data type. */ switch (digest->cd_format) { case CRYPTO_DATA_RAW: SHA2Final((unsigned char *)digest->cd_raw.iov_base + digest->cd_offset, &sha2_ctx); break; case CRYPTO_DATA_UIO: ret = sha2_digest_final_uio(&sha2_ctx, digest, sha_digest_len, NULL); break; default: ret = CRYPTO_ARGUMENTS_BAD; } if (ret == CRYPTO_SUCCESS) digest->cd_length = sha_digest_len; else digest->cd_length = 0; return (ret); } /* * KCF software provider mac entry points. * * SHA2 HMAC is: SHA2(key XOR opad, SHA2(key XOR ipad, text)) * * Init: * The initialization routine initializes what we denote * as the inner and outer contexts by doing * - for inner context: SHA2(key XOR ipad) * - for outer context: SHA2(key XOR opad) * * Update: * Each subsequent SHA2 HMAC update will result in an * update of the inner context with the specified data. * * Final: * The SHA2 HMAC final will do a SHA2 final operation on the * inner context, and the resulting digest will be used * as the data for an update on the outer context. Last * but not least, a SHA2 final on the outer context will * be performed to obtain the SHA2 HMAC digest to return * to the user. */ /* * Initialize a SHA2-HMAC context. */ static void sha2_mac_init_ctx(sha2_hmac_ctx_t *ctx, void *keyval, uint_t length_in_bytes) { uint64_t ipad[SHA512_HMAC_BLOCK_SIZE / sizeof (uint64_t)]; uint64_t opad[SHA512_HMAC_BLOCK_SIZE / sizeof (uint64_t)]; int i, block_size, blocks_per_int64; /* Determine the block size */ if (ctx->hc_mech_type <= SHA256_HMAC_GEN_MECH_INFO_TYPE) { block_size = SHA256_HMAC_BLOCK_SIZE; blocks_per_int64 = SHA256_HMAC_BLOCK_SIZE / sizeof (uint64_t); } else { block_size = SHA512_HMAC_BLOCK_SIZE; blocks_per_int64 = SHA512_HMAC_BLOCK_SIZE / sizeof (uint64_t); } (void) bzero(ipad, block_size); (void) bzero(opad, block_size); (void) bcopy(keyval, ipad, length_in_bytes); (void) bcopy(keyval, opad, length_in_bytes); /* XOR key with ipad (0x36) and opad (0x5c) */ for (i = 0; i < blocks_per_int64; i ++) { ipad[i] ^= 0x3636363636363636; opad[i] ^= 0x5c5c5c5c5c5c5c5c; } /* perform SHA2 on ipad */ SHA2Init(ctx->hc_mech_type, &ctx->hc_icontext); SHA2Update(&ctx->hc_icontext, (uint8_t *)ipad, block_size); /* perform SHA2 on opad */ SHA2Init(ctx->hc_mech_type, &ctx->hc_ocontext); SHA2Update(&ctx->hc_ocontext, (uint8_t *)opad, block_size); } /* */ static int sha2_mac_init(crypto_ctx_t *ctx, crypto_mechanism_t *mechanism, crypto_key_t *key, crypto_spi_ctx_template_t ctx_template, crypto_req_handle_t req) { int ret = CRYPTO_SUCCESS; uint_t keylen_in_bytes = CRYPTO_BITS2BYTES(key->ck_length); uint_t sha_digest_len, sha_hmac_block_size; /* * Set the digest length and block size to values appropriate to the * mechanism */ switch (mechanism->cm_type) { case SHA256_HMAC_MECH_INFO_TYPE: case SHA256_HMAC_GEN_MECH_INFO_TYPE: sha_digest_len = SHA256_DIGEST_LENGTH; sha_hmac_block_size = SHA256_HMAC_BLOCK_SIZE; break; case SHA384_HMAC_MECH_INFO_TYPE: case SHA384_HMAC_GEN_MECH_INFO_TYPE: case SHA512_HMAC_MECH_INFO_TYPE: case SHA512_HMAC_GEN_MECH_INFO_TYPE: sha_digest_len = SHA512_DIGEST_LENGTH; sha_hmac_block_size = SHA512_HMAC_BLOCK_SIZE; break; default: return (CRYPTO_MECHANISM_INVALID); } if (key->ck_format != CRYPTO_KEY_RAW) return (CRYPTO_ARGUMENTS_BAD); ctx->cc_provider_private = kmem_alloc(sizeof (sha2_hmac_ctx_t), crypto_kmflag(req)); if (ctx->cc_provider_private == NULL) return (CRYPTO_HOST_MEMORY); PROV_SHA2_HMAC_CTX(ctx)->hc_mech_type = mechanism->cm_type; if (ctx_template != NULL) { /* reuse context template */ bcopy(ctx_template, PROV_SHA2_HMAC_CTX(ctx), sizeof (sha2_hmac_ctx_t)); } else { /* no context template, compute context */ if (keylen_in_bytes > sha_hmac_block_size) { uchar_t digested_key[SHA512_DIGEST_LENGTH]; sha2_hmac_ctx_t *hmac_ctx = ctx->cc_provider_private; /* * Hash the passed-in key to get a smaller key. * The inner context is used since it hasn't been * initialized yet. */ PROV_SHA2_DIGEST_KEY(mechanism->cm_type / 3, &hmac_ctx->hc_icontext, key->ck_data, keylen_in_bytes, digested_key); sha2_mac_init_ctx(PROV_SHA2_HMAC_CTX(ctx), digested_key, sha_digest_len); } else { sha2_mac_init_ctx(PROV_SHA2_HMAC_CTX(ctx), key->ck_data, keylen_in_bytes); } } /* * Get the mechanism parameters, if applicable. */ if (mechanism->cm_type % 3 == 2) { if (mechanism->cm_param == NULL || mechanism->cm_param_len != sizeof (ulong_t)) ret = CRYPTO_MECHANISM_PARAM_INVALID; PROV_SHA2_GET_DIGEST_LEN(mechanism, PROV_SHA2_HMAC_CTX(ctx)->hc_digest_len); if (PROV_SHA2_HMAC_CTX(ctx)->hc_digest_len > sha_digest_len) ret = CRYPTO_MECHANISM_PARAM_INVALID; } if (ret != CRYPTO_SUCCESS) { bzero(ctx->cc_provider_private, sizeof (sha2_hmac_ctx_t)); kmem_free(ctx->cc_provider_private, sizeof (sha2_hmac_ctx_t)); ctx->cc_provider_private = NULL; } return (ret); } -/* ARGSUSED */ static int sha2_mac_update(crypto_ctx_t *ctx, crypto_data_t *data, crypto_req_handle_t req) { + (void) req; int ret = CRYPTO_SUCCESS; ASSERT(ctx->cc_provider_private != NULL); /* * Do a SHA2 update of the inner context using the specified * data. */ switch (data->cd_format) { case CRYPTO_DATA_RAW: SHA2Update(&PROV_SHA2_HMAC_CTX(ctx)->hc_icontext, (uint8_t *)data->cd_raw.iov_base + data->cd_offset, data->cd_length); break; case CRYPTO_DATA_UIO: ret = sha2_digest_update_uio( &PROV_SHA2_HMAC_CTX(ctx)->hc_icontext, data); break; default: ret = CRYPTO_ARGUMENTS_BAD; } return (ret); } -/* ARGSUSED */ static int sha2_mac_final(crypto_ctx_t *ctx, crypto_data_t *mac, crypto_req_handle_t req) { + (void) req; int ret = CRYPTO_SUCCESS; uchar_t digest[SHA512_DIGEST_LENGTH]; uint32_t digest_len, sha_digest_len; ASSERT(ctx->cc_provider_private != NULL); /* Set the digest lengths to values appropriate to the mechanism */ switch (PROV_SHA2_HMAC_CTX(ctx)->hc_mech_type) { case SHA256_HMAC_MECH_INFO_TYPE: sha_digest_len = digest_len = SHA256_DIGEST_LENGTH; break; case SHA384_HMAC_MECH_INFO_TYPE: sha_digest_len = digest_len = SHA384_DIGEST_LENGTH; break; case SHA512_HMAC_MECH_INFO_TYPE: sha_digest_len = digest_len = SHA512_DIGEST_LENGTH; break; case SHA256_HMAC_GEN_MECH_INFO_TYPE: sha_digest_len = SHA256_DIGEST_LENGTH; digest_len = PROV_SHA2_HMAC_CTX(ctx)->hc_digest_len; break; case SHA384_HMAC_GEN_MECH_INFO_TYPE: case SHA512_HMAC_GEN_MECH_INFO_TYPE: sha_digest_len = SHA512_DIGEST_LENGTH; digest_len = PROV_SHA2_HMAC_CTX(ctx)->hc_digest_len; break; default: return (CRYPTO_ARGUMENTS_BAD); } /* * We need to just return the length needed to store the output. * We should not destroy the context for the following cases. */ if ((mac->cd_length == 0) || (mac->cd_length < digest_len)) { mac->cd_length = digest_len; return (CRYPTO_BUFFER_TOO_SMALL); } /* * Do a SHA2 final on the inner context. */ SHA2Final(digest, &PROV_SHA2_HMAC_CTX(ctx)->hc_icontext); /* * Do a SHA2 update on the outer context, feeding the inner * digest as data. */ SHA2Update(&PROV_SHA2_HMAC_CTX(ctx)->hc_ocontext, digest, sha_digest_len); /* * Do a SHA2 final on the outer context, storing the computing * digest in the users buffer. */ switch (mac->cd_format) { case CRYPTO_DATA_RAW: if (digest_len != sha_digest_len) { /* * The caller requested a short digest. Digest * into a scratch buffer and return to * the user only what was requested. */ SHA2Final(digest, &PROV_SHA2_HMAC_CTX(ctx)->hc_ocontext); bcopy(digest, (unsigned char *)mac->cd_raw.iov_base + mac->cd_offset, digest_len); } else { SHA2Final((unsigned char *)mac->cd_raw.iov_base + mac->cd_offset, &PROV_SHA2_HMAC_CTX(ctx)->hc_ocontext); } break; case CRYPTO_DATA_UIO: ret = sha2_digest_final_uio( &PROV_SHA2_HMAC_CTX(ctx)->hc_ocontext, mac, digest_len, digest); break; default: ret = CRYPTO_ARGUMENTS_BAD; } if (ret == CRYPTO_SUCCESS) mac->cd_length = digest_len; else mac->cd_length = 0; bzero(ctx->cc_provider_private, sizeof (sha2_hmac_ctx_t)); kmem_free(ctx->cc_provider_private, sizeof (sha2_hmac_ctx_t)); ctx->cc_provider_private = NULL; return (ret); } #define SHA2_MAC_UPDATE(data, ctx, ret) { \ switch (data->cd_format) { \ case CRYPTO_DATA_RAW: \ SHA2Update(&(ctx).hc_icontext, \ (uint8_t *)data->cd_raw.iov_base + \ data->cd_offset, data->cd_length); \ break; \ case CRYPTO_DATA_UIO: \ ret = sha2_digest_update_uio(&(ctx).hc_icontext, data); \ break; \ default: \ ret = CRYPTO_ARGUMENTS_BAD; \ } \ } -/* ARGSUSED */ static int sha2_mac_atomic(crypto_provider_handle_t provider, crypto_session_id_t session_id, crypto_mechanism_t *mechanism, crypto_key_t *key, crypto_data_t *data, crypto_data_t *mac, crypto_spi_ctx_template_t ctx_template, crypto_req_handle_t req) { + (void) provider, (void) session_id, (void) req; int ret = CRYPTO_SUCCESS; uchar_t digest[SHA512_DIGEST_LENGTH]; sha2_hmac_ctx_t sha2_hmac_ctx; uint32_t sha_digest_len, digest_len, sha_hmac_block_size; uint_t keylen_in_bytes = CRYPTO_BITS2BYTES(key->ck_length); /* * Set the digest length and block size to values appropriate to the * mechanism */ switch (mechanism->cm_type) { case SHA256_HMAC_MECH_INFO_TYPE: case SHA256_HMAC_GEN_MECH_INFO_TYPE: sha_digest_len = digest_len = SHA256_DIGEST_LENGTH; sha_hmac_block_size = SHA256_HMAC_BLOCK_SIZE; break; case SHA384_HMAC_MECH_INFO_TYPE: case SHA384_HMAC_GEN_MECH_INFO_TYPE: case SHA512_HMAC_MECH_INFO_TYPE: case SHA512_HMAC_GEN_MECH_INFO_TYPE: sha_digest_len = digest_len = SHA512_DIGEST_LENGTH; sha_hmac_block_size = SHA512_HMAC_BLOCK_SIZE; break; default: return (CRYPTO_MECHANISM_INVALID); } /* Add support for key by attributes (RFE 4706552) */ if (key->ck_format != CRYPTO_KEY_RAW) return (CRYPTO_ARGUMENTS_BAD); if (ctx_template != NULL) { /* reuse context template */ bcopy(ctx_template, &sha2_hmac_ctx, sizeof (sha2_hmac_ctx_t)); } else { sha2_hmac_ctx.hc_mech_type = mechanism->cm_type; /* no context template, initialize context */ if (keylen_in_bytes > sha_hmac_block_size) { /* * Hash the passed-in key to get a smaller key. * The inner context is used since it hasn't been * initialized yet. */ PROV_SHA2_DIGEST_KEY(mechanism->cm_type / 3, &sha2_hmac_ctx.hc_icontext, key->ck_data, keylen_in_bytes, digest); sha2_mac_init_ctx(&sha2_hmac_ctx, digest, sha_digest_len); } else { sha2_mac_init_ctx(&sha2_hmac_ctx, key->ck_data, keylen_in_bytes); } } /* get the mechanism parameters, if applicable */ if ((mechanism->cm_type % 3) == 2) { if (mechanism->cm_param == NULL || mechanism->cm_param_len != sizeof (ulong_t)) { ret = CRYPTO_MECHANISM_PARAM_INVALID; goto bail; } PROV_SHA2_GET_DIGEST_LEN(mechanism, digest_len); if (digest_len > sha_digest_len) { ret = CRYPTO_MECHANISM_PARAM_INVALID; goto bail; } } /* do a SHA2 update of the inner context using the specified data */ SHA2_MAC_UPDATE(data, sha2_hmac_ctx, ret); if (ret != CRYPTO_SUCCESS) /* the update failed, free context and bail */ goto bail; /* * Do a SHA2 final on the inner context. */ SHA2Final(digest, &sha2_hmac_ctx.hc_icontext); /* * Do an SHA2 update on the outer context, feeding the inner * digest as data. * * HMAC-SHA384 needs special handling as the outer hash needs only 48 * bytes of the inner hash value. */ if (mechanism->cm_type == SHA384_HMAC_MECH_INFO_TYPE || mechanism->cm_type == SHA384_HMAC_GEN_MECH_INFO_TYPE) SHA2Update(&sha2_hmac_ctx.hc_ocontext, digest, SHA384_DIGEST_LENGTH); else SHA2Update(&sha2_hmac_ctx.hc_ocontext, digest, sha_digest_len); /* * Do a SHA2 final on the outer context, storing the computed * digest in the users buffer. */ switch (mac->cd_format) { case CRYPTO_DATA_RAW: if (digest_len != sha_digest_len) { /* * The caller requested a short digest. Digest * into a scratch buffer and return to * the user only what was requested. */ SHA2Final(digest, &sha2_hmac_ctx.hc_ocontext); bcopy(digest, (unsigned char *)mac->cd_raw.iov_base + mac->cd_offset, digest_len); } else { SHA2Final((unsigned char *)mac->cd_raw.iov_base + mac->cd_offset, &sha2_hmac_ctx.hc_ocontext); } break; case CRYPTO_DATA_UIO: ret = sha2_digest_final_uio(&sha2_hmac_ctx.hc_ocontext, mac, digest_len, digest); break; default: ret = CRYPTO_ARGUMENTS_BAD; } if (ret == CRYPTO_SUCCESS) { mac->cd_length = digest_len; return (CRYPTO_SUCCESS); } bail: bzero(&sha2_hmac_ctx, sizeof (sha2_hmac_ctx_t)); mac->cd_length = 0; return (ret); } -/* ARGSUSED */ static int sha2_mac_verify_atomic(crypto_provider_handle_t provider, crypto_session_id_t session_id, crypto_mechanism_t *mechanism, crypto_key_t *key, crypto_data_t *data, crypto_data_t *mac, crypto_spi_ctx_template_t ctx_template, crypto_req_handle_t req) { + (void) provider, (void) session_id, (void) req; int ret = CRYPTO_SUCCESS; uchar_t digest[SHA512_DIGEST_LENGTH]; sha2_hmac_ctx_t sha2_hmac_ctx; uint32_t sha_digest_len, digest_len, sha_hmac_block_size; uint_t keylen_in_bytes = CRYPTO_BITS2BYTES(key->ck_length); /* * Set the digest length and block size to values appropriate to the * mechanism */ switch (mechanism->cm_type) { case SHA256_HMAC_MECH_INFO_TYPE: case SHA256_HMAC_GEN_MECH_INFO_TYPE: sha_digest_len = digest_len = SHA256_DIGEST_LENGTH; sha_hmac_block_size = SHA256_HMAC_BLOCK_SIZE; break; case SHA384_HMAC_MECH_INFO_TYPE: case SHA384_HMAC_GEN_MECH_INFO_TYPE: case SHA512_HMAC_MECH_INFO_TYPE: case SHA512_HMAC_GEN_MECH_INFO_TYPE: sha_digest_len = digest_len = SHA512_DIGEST_LENGTH; sha_hmac_block_size = SHA512_HMAC_BLOCK_SIZE; break; default: return (CRYPTO_MECHANISM_INVALID); } /* Add support for key by attributes (RFE 4706552) */ if (key->ck_format != CRYPTO_KEY_RAW) return (CRYPTO_ARGUMENTS_BAD); if (ctx_template != NULL) { /* reuse context template */ bcopy(ctx_template, &sha2_hmac_ctx, sizeof (sha2_hmac_ctx_t)); } else { sha2_hmac_ctx.hc_mech_type = mechanism->cm_type; /* no context template, initialize context */ if (keylen_in_bytes > sha_hmac_block_size) { /* * Hash the passed-in key to get a smaller key. * The inner context is used since it hasn't been * initialized yet. */ PROV_SHA2_DIGEST_KEY(mechanism->cm_type / 3, &sha2_hmac_ctx.hc_icontext, key->ck_data, keylen_in_bytes, digest); sha2_mac_init_ctx(&sha2_hmac_ctx, digest, sha_digest_len); } else { sha2_mac_init_ctx(&sha2_hmac_ctx, key->ck_data, keylen_in_bytes); } } /* get the mechanism parameters, if applicable */ if (mechanism->cm_type % 3 == 2) { if (mechanism->cm_param == NULL || mechanism->cm_param_len != sizeof (ulong_t)) { ret = CRYPTO_MECHANISM_PARAM_INVALID; goto bail; } PROV_SHA2_GET_DIGEST_LEN(mechanism, digest_len); if (digest_len > sha_digest_len) { ret = CRYPTO_MECHANISM_PARAM_INVALID; goto bail; } } if (mac->cd_length != digest_len) { ret = CRYPTO_INVALID_MAC; goto bail; } /* do a SHA2 update of the inner context using the specified data */ SHA2_MAC_UPDATE(data, sha2_hmac_ctx, ret); if (ret != CRYPTO_SUCCESS) /* the update failed, free context and bail */ goto bail; /* do a SHA2 final on the inner context */ SHA2Final(digest, &sha2_hmac_ctx.hc_icontext); /* * Do an SHA2 update on the outer context, feeding the inner * digest as data. * * HMAC-SHA384 needs special handling as the outer hash needs only 48 * bytes of the inner hash value. */ if (mechanism->cm_type == SHA384_HMAC_MECH_INFO_TYPE || mechanism->cm_type == SHA384_HMAC_GEN_MECH_INFO_TYPE) SHA2Update(&sha2_hmac_ctx.hc_ocontext, digest, SHA384_DIGEST_LENGTH); else SHA2Update(&sha2_hmac_ctx.hc_ocontext, digest, sha_digest_len); /* * Do a SHA2 final on the outer context, storing the computed * digest in the users buffer. */ SHA2Final(digest, &sha2_hmac_ctx.hc_ocontext); /* * Compare the computed digest against the expected digest passed * as argument. */ switch (mac->cd_format) { case CRYPTO_DATA_RAW: if (bcmp(digest, (unsigned char *)mac->cd_raw.iov_base + mac->cd_offset, digest_len) != 0) ret = CRYPTO_INVALID_MAC; break; case CRYPTO_DATA_UIO: { off_t offset = mac->cd_offset; uint_t vec_idx = 0; off_t scratch_offset = 0; size_t length = digest_len; size_t cur_len; /* we support only kernel buffer */ if (zfs_uio_segflg(mac->cd_uio) != UIO_SYSSPACE) return (CRYPTO_ARGUMENTS_BAD); /* jump to the first iovec containing the expected digest */ offset = zfs_uio_index_at_offset(mac->cd_uio, offset, &vec_idx); if (vec_idx == zfs_uio_iovcnt(mac->cd_uio)) { /* * The caller specified an offset that is * larger than the total size of the buffers * it provided. */ ret = CRYPTO_DATA_LEN_RANGE; break; } /* do the comparison of computed digest vs specified one */ while (vec_idx < zfs_uio_iovcnt(mac->cd_uio) && length > 0) { cur_len = MIN(zfs_uio_iovlen(mac->cd_uio, vec_idx) - offset, length); if (bcmp(digest + scratch_offset, zfs_uio_iovbase(mac->cd_uio, vec_idx) + offset, cur_len) != 0) { ret = CRYPTO_INVALID_MAC; break; } length -= cur_len; vec_idx++; scratch_offset += cur_len; offset = 0; } break; } default: ret = CRYPTO_ARGUMENTS_BAD; } return (ret); bail: bzero(&sha2_hmac_ctx, sizeof (sha2_hmac_ctx_t)); mac->cd_length = 0; return (ret); } /* * KCF software provider context management entry points. */ -/* ARGSUSED */ static int sha2_create_ctx_template(crypto_provider_handle_t provider, crypto_mechanism_t *mechanism, crypto_key_t *key, crypto_spi_ctx_template_t *ctx_template, size_t *ctx_template_size, crypto_req_handle_t req) { + (void) provider; sha2_hmac_ctx_t *sha2_hmac_ctx_tmpl; uint_t keylen_in_bytes = CRYPTO_BITS2BYTES(key->ck_length); uint32_t sha_digest_len, sha_hmac_block_size; /* * Set the digest length and block size to values appropriate to the * mechanism */ switch (mechanism->cm_type) { case SHA256_HMAC_MECH_INFO_TYPE: case SHA256_HMAC_GEN_MECH_INFO_TYPE: sha_digest_len = SHA256_DIGEST_LENGTH; sha_hmac_block_size = SHA256_HMAC_BLOCK_SIZE; break; case SHA384_HMAC_MECH_INFO_TYPE: case SHA384_HMAC_GEN_MECH_INFO_TYPE: case SHA512_HMAC_MECH_INFO_TYPE: case SHA512_HMAC_GEN_MECH_INFO_TYPE: sha_digest_len = SHA512_DIGEST_LENGTH; sha_hmac_block_size = SHA512_HMAC_BLOCK_SIZE; break; default: return (CRYPTO_MECHANISM_INVALID); } /* Add support for key by attributes (RFE 4706552) */ if (key->ck_format != CRYPTO_KEY_RAW) return (CRYPTO_ARGUMENTS_BAD); /* * Allocate and initialize SHA2 context. */ sha2_hmac_ctx_tmpl = kmem_alloc(sizeof (sha2_hmac_ctx_t), crypto_kmflag(req)); if (sha2_hmac_ctx_tmpl == NULL) return (CRYPTO_HOST_MEMORY); sha2_hmac_ctx_tmpl->hc_mech_type = mechanism->cm_type; if (keylen_in_bytes > sha_hmac_block_size) { uchar_t digested_key[SHA512_DIGEST_LENGTH]; /* * Hash the passed-in key to get a smaller key. * The inner context is used since it hasn't been * initialized yet. */ PROV_SHA2_DIGEST_KEY(mechanism->cm_type / 3, &sha2_hmac_ctx_tmpl->hc_icontext, key->ck_data, keylen_in_bytes, digested_key); sha2_mac_init_ctx(sha2_hmac_ctx_tmpl, digested_key, sha_digest_len); } else { sha2_mac_init_ctx(sha2_hmac_ctx_tmpl, key->ck_data, keylen_in_bytes); } *ctx_template = (crypto_spi_ctx_template_t)sha2_hmac_ctx_tmpl; *ctx_template_size = sizeof (sha2_hmac_ctx_t); return (CRYPTO_SUCCESS); } static int sha2_free_context(crypto_ctx_t *ctx) { uint_t ctx_len; if (ctx->cc_provider_private == NULL) return (CRYPTO_SUCCESS); /* * We have to free either SHA2 or SHA2-HMAC contexts, which * have different lengths. * * Note: Below is dependent on the mechanism ordering. */ if (PROV_SHA2_CTX(ctx)->sc_mech_type % 3 == 0) ctx_len = sizeof (sha2_ctx_t); else ctx_len = sizeof (sha2_hmac_ctx_t); bzero(ctx->cc_provider_private, ctx_len); kmem_free(ctx->cc_provider_private, ctx_len); ctx->cc_provider_private = NULL; return (CRYPTO_SUCCESS); } diff --git a/module/icp/io/skein_mod.c b/module/icp/io/skein_mod.c index ac7d201eb708..49dcbadd86f5 100644 --- a/module/icp/io/skein_mod.c +++ b/module/icp/io/skein_mod.c @@ -1,728 +1,727 @@ /* * CDDL HEADER START * * The contents of this file are subject to the terms of the * Common Development and Distribution License (the "License"). * You may not use this file except in compliance with the License. * * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE * or http://opensource.org/licenses/CDDL-1.0. * See the License for the specific language governing permissions * and limitations under the License. * * When distributing Covered Code, include this CDDL HEADER in each * file and include the License file at usr/src/OPENSOLARIS.LICENSE. * If applicable, add the following below this CDDL HEADER, with the * fields enclosed by brackets "[]" replaced with your own identifying * information: Portions Copyright [yyyy] [name of copyright owner] * * CDDL HEADER END */ /* * Copyright 2013 Saso Kiselkov. All rights reserved. */ #include #include #include #include #include #define SKEIN_MODULE_IMPL #include /* * Like the sha2 module, we create the skein module with two modlinkages: * - modlmisc to allow direct calls to Skein_* API functions. * - modlcrypto to integrate well into the Kernel Crypto Framework (KCF). */ static struct modlmisc modlmisc = { &mod_cryptoops, "Skein Message-Digest Algorithm" }; static struct modlcrypto modlcrypto = { &mod_cryptoops, "Skein Kernel SW Provider" }; static struct modlinkage modlinkage = { MODREV_1, {&modlmisc, &modlcrypto, NULL} }; static crypto_mech_info_t skein_mech_info_tab[] = { {CKM_SKEIN_256, SKEIN_256_MECH_INFO_TYPE, CRYPTO_FG_DIGEST | CRYPTO_FG_DIGEST_ATOMIC, 0, 0, CRYPTO_KEYSIZE_UNIT_IN_BITS}, {CKM_SKEIN_256_MAC, SKEIN_256_MAC_MECH_INFO_TYPE, CRYPTO_FG_MAC | CRYPTO_FG_MAC_ATOMIC, 1, INT_MAX, CRYPTO_KEYSIZE_UNIT_IN_BYTES}, {CKM_SKEIN_512, SKEIN_512_MECH_INFO_TYPE, CRYPTO_FG_DIGEST | CRYPTO_FG_DIGEST_ATOMIC, 0, 0, CRYPTO_KEYSIZE_UNIT_IN_BITS}, {CKM_SKEIN_512_MAC, SKEIN_512_MAC_MECH_INFO_TYPE, CRYPTO_FG_MAC | CRYPTO_FG_MAC_ATOMIC, 1, INT_MAX, CRYPTO_KEYSIZE_UNIT_IN_BYTES}, {CKM_SKEIN1024, SKEIN1024_MECH_INFO_TYPE, CRYPTO_FG_DIGEST | CRYPTO_FG_DIGEST_ATOMIC, 0, 0, CRYPTO_KEYSIZE_UNIT_IN_BITS}, {CKM_SKEIN1024_MAC, SKEIN1024_MAC_MECH_INFO_TYPE, CRYPTO_FG_MAC | CRYPTO_FG_MAC_ATOMIC, 1, INT_MAX, CRYPTO_KEYSIZE_UNIT_IN_BYTES} }; static void skein_provider_status(crypto_provider_handle_t, uint_t *); static crypto_control_ops_t skein_control_ops = { skein_provider_status }; static int skein_digest_init(crypto_ctx_t *, crypto_mechanism_t *, crypto_req_handle_t); static int skein_digest(crypto_ctx_t *, crypto_data_t *, crypto_data_t *, crypto_req_handle_t); static int skein_update(crypto_ctx_t *, crypto_data_t *, crypto_req_handle_t); static int skein_final(crypto_ctx_t *, crypto_data_t *, crypto_req_handle_t); static int skein_digest_atomic(crypto_provider_handle_t, crypto_session_id_t, crypto_mechanism_t *, crypto_data_t *, crypto_data_t *, crypto_req_handle_t); static crypto_digest_ops_t skein_digest_ops = { .digest_init = skein_digest_init, .digest = skein_digest, .digest_update = skein_update, .digest_key = NULL, .digest_final = skein_final, .digest_atomic = skein_digest_atomic }; static int skein_mac_init(crypto_ctx_t *, crypto_mechanism_t *, crypto_key_t *, crypto_spi_ctx_template_t, crypto_req_handle_t); static int skein_mac_atomic(crypto_provider_handle_t, crypto_session_id_t, crypto_mechanism_t *, crypto_key_t *, crypto_data_t *, crypto_data_t *, crypto_spi_ctx_template_t, crypto_req_handle_t); static crypto_mac_ops_t skein_mac_ops = { .mac_init = skein_mac_init, .mac = NULL, .mac_update = skein_update, /* using regular digest update is OK here */ .mac_final = skein_final, /* using regular digest final is OK here */ .mac_atomic = skein_mac_atomic, .mac_verify_atomic = NULL }; static int skein_create_ctx_template(crypto_provider_handle_t, crypto_mechanism_t *, crypto_key_t *, crypto_spi_ctx_template_t *, size_t *, crypto_req_handle_t); static int skein_free_context(crypto_ctx_t *); static crypto_ctx_ops_t skein_ctx_ops = { .create_ctx_template = skein_create_ctx_template, .free_context = skein_free_context }; static crypto_ops_t skein_crypto_ops = {{{{{ &skein_control_ops, &skein_digest_ops, NULL, &skein_mac_ops, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, &skein_ctx_ops, }}}}}; static crypto_provider_info_t skein_prov_info = {{{{ CRYPTO_SPI_VERSION_1, "Skein Software Provider", CRYPTO_SW_PROVIDER, NULL, &skein_crypto_ops, sizeof (skein_mech_info_tab) / sizeof (crypto_mech_info_t), skein_mech_info_tab }}}}; static crypto_kcf_provider_handle_t skein_prov_handle = 0; typedef struct skein_ctx { skein_mech_type_t sc_mech_type; size_t sc_digest_bitlen; /*LINTED(E_ANONYMOUS_UNION_DECL)*/ union { Skein_256_Ctxt_t sc_256; Skein_512_Ctxt_t sc_512; Skein1024_Ctxt_t sc_1024; }; } skein_ctx_t; #define SKEIN_CTX(_ctx_) ((skein_ctx_t *)((_ctx_)->cc_provider_private)) #define SKEIN_CTX_LVALUE(_ctx_) (_ctx_)->cc_provider_private #define SKEIN_OP(_skein_ctx, _op, ...) \ do { \ skein_ctx_t *sc = (_skein_ctx); \ switch (sc->sc_mech_type) { \ case SKEIN_256_MECH_INFO_TYPE: \ case SKEIN_256_MAC_MECH_INFO_TYPE: \ (void) Skein_256_ ## _op(&sc->sc_256, __VA_ARGS__);\ break; \ case SKEIN_512_MECH_INFO_TYPE: \ case SKEIN_512_MAC_MECH_INFO_TYPE: \ (void) Skein_512_ ## _op(&sc->sc_512, __VA_ARGS__);\ break; \ case SKEIN1024_MECH_INFO_TYPE: \ case SKEIN1024_MAC_MECH_INFO_TYPE: \ (void) Skein1024_ ## _op(&sc->sc_1024, __VA_ARGS__);\ break; \ } \ } while (0) static int skein_get_digest_bitlen(const crypto_mechanism_t *mechanism, size_t *result) { if (mechanism->cm_param != NULL) { /*LINTED(E_BAD_PTR_CAST_ALIGN)*/ skein_param_t *param = (skein_param_t *)mechanism->cm_param; if (mechanism->cm_param_len != sizeof (*param) || param->sp_digest_bitlen == 0) { return (CRYPTO_MECHANISM_PARAM_INVALID); } *result = param->sp_digest_bitlen; } else { switch (mechanism->cm_type) { case SKEIN_256_MECH_INFO_TYPE: *result = 256; break; case SKEIN_512_MECH_INFO_TYPE: *result = 512; break; case SKEIN1024_MECH_INFO_TYPE: *result = 1024; break; default: return (CRYPTO_MECHANISM_INVALID); } } return (CRYPTO_SUCCESS); } int skein_mod_init(void) { int error; if ((error = mod_install(&modlinkage)) != 0) return (error); /* * Try to register with KCF - failure shouldn't unload us, since we * still may want to continue providing misc/skein functionality. */ (void) crypto_register_provider(&skein_prov_info, &skein_prov_handle); return (0); } int skein_mod_fini(void) { int ret; if (skein_prov_handle != 0) { if ((ret = crypto_unregister_provider(skein_prov_handle)) != CRYPTO_SUCCESS) { cmn_err(CE_WARN, "skein _fini: crypto_unregister_provider() " "failed (0x%x)", ret); return (EBUSY); } skein_prov_handle = 0; } return (mod_remove(&modlinkage)); } /* * KCF software provider control entry points. */ -/* ARGSUSED */ static void skein_provider_status(crypto_provider_handle_t provider, uint_t *status) { + (void) provider; *status = CRYPTO_PROVIDER_READY; } /* * General Skein hashing helper functions. */ /* * Performs an Update on a context with uio input data. */ static int skein_digest_update_uio(skein_ctx_t *ctx, const crypto_data_t *data) { off_t offset = data->cd_offset; size_t length = data->cd_length; uint_t vec_idx = 0; size_t cur_len; zfs_uio_t *uio = data->cd_uio; /* we support only kernel buffer */ if (zfs_uio_segflg(uio) != UIO_SYSSPACE) return (CRYPTO_ARGUMENTS_BAD); /* * Jump to the first iovec containing data to be * digested. */ offset = zfs_uio_index_at_offset(uio, offset, &vec_idx); if (vec_idx == zfs_uio_iovcnt(uio)) { /* * The caller specified an offset that is larger than the * total size of the buffers it provided. */ return (CRYPTO_DATA_LEN_RANGE); } /* * Now do the digesting on the iovecs. */ while (vec_idx < zfs_uio_iovcnt(uio) && length > 0) { cur_len = MIN(zfs_uio_iovlen(uio, vec_idx) - offset, length); SKEIN_OP(ctx, Update, (uint8_t *)zfs_uio_iovbase(uio, vec_idx) + offset, cur_len); length -= cur_len; vec_idx++; offset = 0; } if (vec_idx == zfs_uio_iovcnt(uio) && length > 0) { /* * The end of the specified iovec's was reached but * the length requested could not be processed, i.e. * The caller requested to digest more data than it provided. */ return (CRYPTO_DATA_LEN_RANGE); } return (CRYPTO_SUCCESS); } /* * Performs a Final on a context and writes to a uio digest output. */ static int skein_digest_final_uio(skein_ctx_t *ctx, crypto_data_t *digest, crypto_req_handle_t req) { off_t offset = digest->cd_offset; uint_t vec_idx = 0; zfs_uio_t *uio = digest->cd_uio; /* we support only kernel buffer */ if (zfs_uio_segflg(uio) != UIO_SYSSPACE) return (CRYPTO_ARGUMENTS_BAD); /* * Jump to the first iovec containing ptr to the digest to be returned. */ offset = zfs_uio_index_at_offset(uio, offset, &vec_idx); if (vec_idx == zfs_uio_iovcnt(uio)) { /* * The caller specified an offset that is larger than the * total size of the buffers it provided. */ return (CRYPTO_DATA_LEN_RANGE); } if (offset + CRYPTO_BITS2BYTES(ctx->sc_digest_bitlen) <= zfs_uio_iovlen(uio, vec_idx)) { /* The computed digest will fit in the current iovec. */ SKEIN_OP(ctx, Final, (uchar_t *)zfs_uio_iovbase(uio, vec_idx) + offset); } else { uint8_t *digest_tmp; off_t scratch_offset = 0; size_t length = CRYPTO_BITS2BYTES(ctx->sc_digest_bitlen); size_t cur_len; digest_tmp = kmem_alloc(CRYPTO_BITS2BYTES( ctx->sc_digest_bitlen), crypto_kmflag(req)); if (digest_tmp == NULL) return (CRYPTO_HOST_MEMORY); SKEIN_OP(ctx, Final, digest_tmp); while (vec_idx < zfs_uio_iovcnt(uio) && length > 0) { cur_len = MIN(zfs_uio_iovlen(uio, vec_idx) - offset, length); bcopy(digest_tmp + scratch_offset, zfs_uio_iovbase(uio, vec_idx) + offset, cur_len); length -= cur_len; vec_idx++; scratch_offset += cur_len; offset = 0; } kmem_free(digest_tmp, CRYPTO_BITS2BYTES(ctx->sc_digest_bitlen)); if (vec_idx == zfs_uio_iovcnt(uio) && length > 0) { /* * The end of the specified iovec's was reached but * the length requested could not be processed, i.e. * The caller requested to digest more data than it * provided. */ return (CRYPTO_DATA_LEN_RANGE); } } return (CRYPTO_SUCCESS); } /* * KCF software provider digest entry points. */ /* * Initializes a skein digest context to the configuration in `mechanism'. * The mechanism cm_type must be one of SKEIN_*_MECH_INFO_TYPE. The cm_param * field may contain a skein_param_t structure indicating the length of the * digest the algorithm should produce. Otherwise the default output lengths * are applied (32 bytes for Skein-256, 64 bytes for Skein-512 and 128 bytes * for Skein-1024). */ static int skein_digest_init(crypto_ctx_t *ctx, crypto_mechanism_t *mechanism, crypto_req_handle_t req) { int error = CRYPTO_SUCCESS; if (!VALID_SKEIN_DIGEST_MECH(mechanism->cm_type)) return (CRYPTO_MECHANISM_INVALID); SKEIN_CTX_LVALUE(ctx) = kmem_alloc(sizeof (*SKEIN_CTX(ctx)), crypto_kmflag(req)); if (SKEIN_CTX(ctx) == NULL) return (CRYPTO_HOST_MEMORY); SKEIN_CTX(ctx)->sc_mech_type = mechanism->cm_type; error = skein_get_digest_bitlen(mechanism, &SKEIN_CTX(ctx)->sc_digest_bitlen); if (error != CRYPTO_SUCCESS) goto errout; SKEIN_OP(SKEIN_CTX(ctx), Init, SKEIN_CTX(ctx)->sc_digest_bitlen); return (CRYPTO_SUCCESS); errout: bzero(SKEIN_CTX(ctx), sizeof (*SKEIN_CTX(ctx))); kmem_free(SKEIN_CTX(ctx), sizeof (*SKEIN_CTX(ctx))); SKEIN_CTX_LVALUE(ctx) = NULL; return (error); } /* * Executes a skein_update and skein_digest on a pre-initialized crypto * context in a single step. See the documentation to these functions to * see what to pass here. */ static int skein_digest(crypto_ctx_t *ctx, crypto_data_t *data, crypto_data_t *digest, crypto_req_handle_t req) { int error = CRYPTO_SUCCESS; ASSERT(SKEIN_CTX(ctx) != NULL); if (digest->cd_length < CRYPTO_BITS2BYTES(SKEIN_CTX(ctx)->sc_digest_bitlen)) { digest->cd_length = CRYPTO_BITS2BYTES(SKEIN_CTX(ctx)->sc_digest_bitlen); return (CRYPTO_BUFFER_TOO_SMALL); } error = skein_update(ctx, data, req); if (error != CRYPTO_SUCCESS) { bzero(SKEIN_CTX(ctx), sizeof (*SKEIN_CTX(ctx))); kmem_free(SKEIN_CTX(ctx), sizeof (*SKEIN_CTX(ctx))); SKEIN_CTX_LVALUE(ctx) = NULL; digest->cd_length = 0; return (error); } error = skein_final(ctx, digest, req); return (error); } /* * Performs a skein Update with the input message in `data' (successive calls * can push more data). This is used both for digest and MAC operation. * Supported input data formats are raw, uio and mblk. */ -/*ARGSUSED*/ static int skein_update(crypto_ctx_t *ctx, crypto_data_t *data, crypto_req_handle_t req) { + (void) req; int error = CRYPTO_SUCCESS; ASSERT(SKEIN_CTX(ctx) != NULL); switch (data->cd_format) { case CRYPTO_DATA_RAW: SKEIN_OP(SKEIN_CTX(ctx), Update, (uint8_t *)data->cd_raw.iov_base + data->cd_offset, data->cd_length); break; case CRYPTO_DATA_UIO: error = skein_digest_update_uio(SKEIN_CTX(ctx), data); break; default: error = CRYPTO_ARGUMENTS_BAD; } return (error); } /* * Performs a skein Final, writing the output to `digest'. This is used both * for digest and MAC operation. * Supported output digest formats are raw, uio and mblk. */ -/*ARGSUSED*/ static int skein_final(crypto_ctx_t *ctx, crypto_data_t *digest, crypto_req_handle_t req) { int error = CRYPTO_SUCCESS; ASSERT(SKEIN_CTX(ctx) != NULL); if (digest->cd_length < CRYPTO_BITS2BYTES(SKEIN_CTX(ctx)->sc_digest_bitlen)) { digest->cd_length = CRYPTO_BITS2BYTES(SKEIN_CTX(ctx)->sc_digest_bitlen); return (CRYPTO_BUFFER_TOO_SMALL); } switch (digest->cd_format) { case CRYPTO_DATA_RAW: SKEIN_OP(SKEIN_CTX(ctx), Final, (uint8_t *)digest->cd_raw.iov_base + digest->cd_offset); break; case CRYPTO_DATA_UIO: error = skein_digest_final_uio(SKEIN_CTX(ctx), digest, req); break; default: error = CRYPTO_ARGUMENTS_BAD; } if (error == CRYPTO_SUCCESS) digest->cd_length = CRYPTO_BITS2BYTES(SKEIN_CTX(ctx)->sc_digest_bitlen); else digest->cd_length = 0; bzero(SKEIN_CTX(ctx), sizeof (*SKEIN_CTX(ctx))); kmem_free(SKEIN_CTX(ctx), sizeof (*(SKEIN_CTX(ctx)))); SKEIN_CTX_LVALUE(ctx) = NULL; return (error); } /* * Performs a full skein digest computation in a single call, configuring the * algorithm according to `mechanism', reading the input to be digested from * `data' and writing the output to `digest'. * Supported input/output formats are raw, uio and mblk. */ -/*ARGSUSED*/ static int skein_digest_atomic(crypto_provider_handle_t provider, crypto_session_id_t session_id, crypto_mechanism_t *mechanism, crypto_data_t *data, crypto_data_t *digest, crypto_req_handle_t req) { - int error; - skein_ctx_t skein_ctx; - crypto_ctx_t ctx; + (void) provider, (void) session_id, (void) req; + int error; + skein_ctx_t skein_ctx; + crypto_ctx_t ctx; SKEIN_CTX_LVALUE(&ctx) = &skein_ctx; /* Init */ if (!VALID_SKEIN_DIGEST_MECH(mechanism->cm_type)) return (CRYPTO_MECHANISM_INVALID); skein_ctx.sc_mech_type = mechanism->cm_type; error = skein_get_digest_bitlen(mechanism, &skein_ctx.sc_digest_bitlen); if (error != CRYPTO_SUCCESS) goto out; SKEIN_OP(&skein_ctx, Init, skein_ctx.sc_digest_bitlen); if ((error = skein_update(&ctx, data, digest)) != CRYPTO_SUCCESS) goto out; if ((error = skein_final(&ctx, data, digest)) != CRYPTO_SUCCESS) goto out; out: if (error == CRYPTO_SUCCESS) digest->cd_length = CRYPTO_BITS2BYTES(skein_ctx.sc_digest_bitlen); else digest->cd_length = 0; bzero(&skein_ctx, sizeof (skein_ctx)); return (error); } /* * Helper function that builds a Skein MAC context from the provided * mechanism and key. */ static int skein_mac_ctx_build(skein_ctx_t *ctx, crypto_mechanism_t *mechanism, crypto_key_t *key) { int error; if (!VALID_SKEIN_MAC_MECH(mechanism->cm_type)) return (CRYPTO_MECHANISM_INVALID); if (key->ck_format != CRYPTO_KEY_RAW) return (CRYPTO_ARGUMENTS_BAD); ctx->sc_mech_type = mechanism->cm_type; error = skein_get_digest_bitlen(mechanism, &ctx->sc_digest_bitlen); if (error != CRYPTO_SUCCESS) return (error); SKEIN_OP(ctx, InitExt, ctx->sc_digest_bitlen, 0, key->ck_data, CRYPTO_BITS2BYTES(key->ck_length)); return (CRYPTO_SUCCESS); } /* * KCF software provide mac entry points. */ /* * Initializes a skein MAC context. You may pass a ctx_template, in which * case the template will be reused to make initialization more efficient. * Otherwise a new context will be constructed. The mechanism cm_type must * be one of SKEIN_*_MAC_MECH_INFO_TYPE. Same as in skein_digest_init, you * may pass a skein_param_t in cm_param to configure the length of the * digest. The key must be in raw format. */ static int skein_mac_init(crypto_ctx_t *ctx, crypto_mechanism_t *mechanism, crypto_key_t *key, crypto_spi_ctx_template_t ctx_template, crypto_req_handle_t req) { int error; SKEIN_CTX_LVALUE(ctx) = kmem_alloc(sizeof (*SKEIN_CTX(ctx)), crypto_kmflag(req)); if (SKEIN_CTX(ctx) == NULL) return (CRYPTO_HOST_MEMORY); if (ctx_template != NULL) { bcopy(ctx_template, SKEIN_CTX(ctx), sizeof (*SKEIN_CTX(ctx))); } else { error = skein_mac_ctx_build(SKEIN_CTX(ctx), mechanism, key); if (error != CRYPTO_SUCCESS) goto errout; } return (CRYPTO_SUCCESS); errout: bzero(SKEIN_CTX(ctx), sizeof (*SKEIN_CTX(ctx))); kmem_free(SKEIN_CTX(ctx), sizeof (*SKEIN_CTX(ctx))); return (error); } /* * The MAC update and final calls are reused from the regular digest code. */ -/*ARGSUSED*/ /* * Same as skein_digest_atomic, performs an atomic Skein MAC operation in * one step. All the same properties apply to the arguments of this * function as to those of the partial operations above. */ static int skein_mac_atomic(crypto_provider_handle_t provider, crypto_session_id_t session_id, crypto_mechanism_t *mechanism, crypto_key_t *key, crypto_data_t *data, crypto_data_t *mac, crypto_spi_ctx_template_t ctx_template, crypto_req_handle_t req) { /* faux crypto context just for skein_digest_{update,final} */ - int error; - crypto_ctx_t ctx; - skein_ctx_t skein_ctx; + (void) provider, (void) session_id; + int error; + crypto_ctx_t ctx; + skein_ctx_t skein_ctx; SKEIN_CTX_LVALUE(&ctx) = &skein_ctx; if (ctx_template != NULL) { bcopy(ctx_template, &skein_ctx, sizeof (skein_ctx)); } else { error = skein_mac_ctx_build(&skein_ctx, mechanism, key); if (error != CRYPTO_SUCCESS) goto errout; } if ((error = skein_update(&ctx, data, req)) != CRYPTO_SUCCESS) goto errout; if ((error = skein_final(&ctx, mac, req)) != CRYPTO_SUCCESS) goto errout; return (CRYPTO_SUCCESS); errout: bzero(&skein_ctx, sizeof (skein_ctx)); return (error); } /* * KCF software provider context management entry points. */ /* * Constructs a context template for the Skein MAC algorithm. The same * properties apply to the arguments of this function as to those of * skein_mac_init. */ -/*ARGSUSED*/ static int skein_create_ctx_template(crypto_provider_handle_t provider, crypto_mechanism_t *mechanism, crypto_key_t *key, crypto_spi_ctx_template_t *ctx_template, size_t *ctx_template_size, crypto_req_handle_t req) { - int error; - skein_ctx_t *ctx_tmpl; + (void) provider; + int error; + skein_ctx_t *ctx_tmpl; ctx_tmpl = kmem_alloc(sizeof (*ctx_tmpl), crypto_kmflag(req)); if (ctx_tmpl == NULL) return (CRYPTO_HOST_MEMORY); error = skein_mac_ctx_build(ctx_tmpl, mechanism, key); if (error != CRYPTO_SUCCESS) goto errout; *ctx_template = ctx_tmpl; *ctx_template_size = sizeof (*ctx_tmpl); return (CRYPTO_SUCCESS); errout: bzero(ctx_tmpl, sizeof (*ctx_tmpl)); kmem_free(ctx_tmpl, sizeof (*ctx_tmpl)); return (error); } /* * Frees a skein context in a parent crypto context. */ static int skein_free_context(crypto_ctx_t *ctx) { if (SKEIN_CTX(ctx) != NULL) { bzero(SKEIN_CTX(ctx), sizeof (*SKEIN_CTX(ctx))); kmem_free(SKEIN_CTX(ctx), sizeof (*SKEIN_CTX(ctx))); SKEIN_CTX_LVALUE(ctx) = NULL; } return (CRYPTO_SUCCESS); } diff --git a/module/icp/os/modconf.c b/module/icp/os/modconf.c index 3743416ed951..f1822af4e266 100644 --- a/module/icp/os/modconf.c +++ b/module/icp/os/modconf.c @@ -1,173 +1,175 @@ /* * CDDL HEADER START * * The contents of this file are subject to the terms of the * Common Development and Distribution License (the "License"). * You may not use this file except in compliance with the License. * * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE * or http://www.opensolaris.org/os/licensing. * See the License for the specific language governing permissions * and limitations under the License. * * When distributing Covered Code, include this CDDL HEADER in each * file and include the License file at usr/src/OPENSOLARIS.LICENSE. * If applicable, add the following below this CDDL HEADER, with the * fields enclosed by brackets "[]" replaced with your own identifying * information: Portions Copyright [yyyy] [name of copyright owner] * * CDDL HEADER END */ /* * Copyright 2008 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ #include #include /* * Null operations; used for uninitialized and "misc" modules. */ static int mod_null(struct modlmisc *, struct modlinkage *); static int mod_infonull(void *, struct modlinkage *, int *); /* * Cryptographic Modules */ struct mod_ops mod_cryptoops = { .modm_install = mod_null, .modm_remove = mod_null, .modm_info = mod_infonull }; /* * Null operation; return 0. */ static int mod_null(struct modlmisc *modl, struct modlinkage *modlp) { + (void) modl, (void) modlp; return (0); } /* * Status for User modules. */ static int mod_infonull(void *modl, struct modlinkage *modlp, int *p0) { + (void) modl, (void) modlp; *p0 = -1; /* for modinfo display */ return (0); } /* * Install a module. * (This routine is in the Solaris SPARC DDI/DKI) */ int mod_install(struct modlinkage *modlp) { int retval = -1; /* No linkage structures */ struct modlmisc **linkpp; struct modlmisc **linkpp1; if (modlp->ml_rev != MODREV_1) { cmn_err(CE_WARN, "mod_install: " "modlinkage structure is not MODREV_1\n"); return (EINVAL); } linkpp = (struct modlmisc **)&modlp->ml_linkage[0]; while (*linkpp != NULL) { if ((retval = MODL_INSTALL(*linkpp, modlp)) != 0) { linkpp1 = (struct modlmisc **)&modlp->ml_linkage[0]; while (linkpp1 != linkpp) { MODL_REMOVE(*linkpp1, modlp); /* clean up */ linkpp1++; } break; } linkpp++; } return (retval); } static char *reins_err = "Could not reinstall %s\nReboot to correct the problem"; /* * Remove a module. This is called by the module wrapper routine. * (This routine is in the Solaris SPARC DDI/DKI) */ int mod_remove(struct modlinkage *modlp) { int retval = 0; struct modlmisc **linkpp, *last_linkp; linkpp = (struct modlmisc **)&modlp->ml_linkage[0]; while (*linkpp != NULL) { if ((retval = MODL_REMOVE(*linkpp, modlp)) != 0) { last_linkp = *linkpp; linkpp = (struct modlmisc **)&modlp->ml_linkage[0]; while (*linkpp != last_linkp) { if (MODL_INSTALL(*linkpp, modlp) != 0) { cmn_err(CE_WARN, reins_err, (*linkpp)->misc_linkinfo); break; } linkpp++; } break; } linkpp++; } return (retval); } /* * Get module status. * (This routine is in the Solaris SPARC DDI/DKI) */ int mod_info(struct modlinkage *modlp, struct modinfo *modinfop) { int i; int retval = 0; struct modspecific_info *msip; struct modlmisc **linkpp; modinfop->mi_rev = modlp->ml_rev; linkpp = (struct modlmisc **)modlp->ml_linkage; msip = &modinfop->mi_msinfo[0]; for (i = 0; i < MODMAXLINK; i++) { if (*linkpp == NULL) { msip->msi_linkinfo[0] = '\0'; } else { (void) strlcpy(msip->msi_linkinfo, (*linkpp)->misc_linkinfo, MODMAXLINKINFOLEN); retval = MODL_INFO(*linkpp, modlp, &msip->msi_p0); if (retval != 0) break; linkpp++; } msip++; } if (modinfop->mi_info == MI_INFO_LINKAGE) { /* * Slight kludge used to extract the address of the * modlinkage structure from the module (just after * loading a module for the very first time) */ modinfop->mi_base = (void *)modlp; } if (retval == 0) return (1); return (0); } diff --git a/module/icp/os/modhash.c b/module/icp/os/modhash.c index a897871001ce..8bd06973eff1 100644 --- a/module/icp/os/modhash.c +++ b/module/icp/os/modhash.c @@ -1,927 +1,927 @@ /* * CDDL HEADER START * * The contents of this file are subject to the terms of the * Common Development and Distribution License (the "License"). * You may not use this file except in compliance with the License. * * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE * or http://www.opensolaris.org/os/licensing. * See the License for the specific language governing permissions * and limitations under the License. * * When distributing Covered Code, include this CDDL HEADER in each * file and include the License file at usr/src/OPENSOLARIS.LICENSE. * If applicable, add the following below this CDDL HEADER, with the * fields enclosed by brackets "[]" replaced with your own identifying * information: Portions Copyright [yyyy] [name of copyright owner] * * CDDL HEADER END */ /* * Copyright 2008 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ /* * mod_hash: flexible hash table implementation. * * This is a reasonably fast, reasonably flexible hash table implementation * which features pluggable hash algorithms to support storing arbitrary keys * and values. It is designed to handle small (< 100,000 items) amounts of * data. The hash uses chaining to resolve collisions, and does not feature a * mechanism to grow the hash. Care must be taken to pick nchains to be large * enough for the application at hand, or lots of time will be wasted searching * hash chains. * * The client of the hash is required to supply a number of items to support * the various hash functions: * * - Destructor functions for the key and value being hashed. * A destructor is responsible for freeing an object when the hash * table is no longer storing it. Since keys and values can be of * arbitrary type, separate destructors for keys & values are used. * These may be mod_hash_null_keydtor and mod_hash_null_valdtor if no * destructor is needed for either a key or value. * * - A hashing algorithm which returns a uint_t representing a hash index * The number returned need _not_ be between 0 and nchains. The mod_hash * code will take care of doing that. The second argument (after the * key) to the hashing function is a void * that represents * hash_alg_data-- this is provided so that the hashing algorithm can * maintain some state across calls, or keep algorithm-specific * constants associated with the hash table. * * A pointer-hashing and a string-hashing algorithm are supplied in * this file. * * - A key comparator (a la qsort). * This is used when searching the hash chain. The key comparator * determines if two keys match. It should follow the return value * semantics of strcmp. * * string and pointer comparators are supplied in this file. * * mod_hash_create_strhash() and mod_hash_create_ptrhash() provide good * examples of how to create a customized hash table. * * Basic hash operations: * * mod_hash_create_strhash(name, nchains, dtor), * create a hash using strings as keys. * NOTE: This create a hash which automatically cleans up the string * values it is given for keys. * * mod_hash_create_ptrhash(name, nchains, dtor, key_elem_size): * create a hash using pointers as keys. * * mod_hash_create_extended(name, nchains, kdtor, vdtor, * hash_alg, hash_alg_data, * keycmp, sleep) * create a customized hash table. * * mod_hash_destroy_hash(hash): * destroy the given hash table, calling the key and value destructors * on each key-value pair stored in the hash. * * mod_hash_insert(hash, key, val): * place a key, value pair into the given hash. * duplicate keys are rejected. * * mod_hash_insert_reserve(hash, key, val, handle): * place a key, value pair into the given hash, using handle to indicate * the reserved storage for the pair. (no memory allocation is needed * during a mod_hash_insert_reserve.) duplicate keys are rejected. * * mod_hash_reserve(hash, *handle): * reserve storage for a key-value pair using the memory allocation * policy of 'hash', returning the storage handle in 'handle'. * * mod_hash_reserve_nosleep(hash, *handle): reserve storage for a key-value * pair ignoring the memory allocation policy of 'hash' and always without * sleep, returning the storage handle in 'handle'. * * mod_hash_remove(hash, key, *val): * remove a key-value pair with key 'key' from 'hash', destroying the * stored key, and returning the value in val. * * mod_hash_replace(hash, key, val) * atomically remove an existing key-value pair from a hash, and replace * the key and value with the ones supplied. The removed key and value * (if any) are destroyed. * * mod_hash_destroy(hash, key): * remove a key-value pair with key 'key' from 'hash', destroying both * stored key and stored value. * * mod_hash_find(hash, key, val): * find a value in the hash table corresponding to the given key. * * mod_hash_find_cb(hash, key, val, found_callback) * find a value in the hash table corresponding to the given key. * If a value is found, call specified callback passing key and val to it. * The callback is called with the hash lock held. * It is intended to be used in situations where the act of locating the * data must also modify it - such as in reference counting schemes. * * mod_hash_walk(hash, callback(key, elem, arg), arg) * walks all the elements in the hashtable and invokes the callback * function with the key/value pair for each element. the hashtable * is locked for readers so the callback function should not attempt * to do any updates to the hashable. the callback function should * return MH_WALK_CONTINUE to continue walking the hashtable or * MH_WALK_TERMINATE to abort the walk of the hashtable. * * mod_hash_clear(hash): * clears the given hash table of entries, calling the key and value * destructors for every element in the hash. */ #include #include #include #include /* * MH_KEY_DESTROY() * Invoke the key destructor. */ #define MH_KEY_DESTROY(hash, key) ((hash->mh_kdtor)(key)) /* * MH_VAL_DESTROY() * Invoke the value destructor. */ #define MH_VAL_DESTROY(hash, val) ((hash->mh_vdtor)(val)) /* * MH_KEYCMP() * Call the key comparator for the given hash keys. */ #define MH_KEYCMP(hash, key1, key2) ((hash->mh_keycmp)(key1, key2)) /* * Cache for struct mod_hash_entry */ kmem_cache_t *mh_e_cache = NULL; mod_hash_t *mh_head = NULL; kmutex_t mh_head_lock; /* * mod_hash_null_keydtor() * mod_hash_null_valdtor() * no-op key and value destructors. */ -/*ARGSUSED*/ void mod_hash_null_keydtor(mod_hash_key_t key) { + (void) key; } -/*ARGSUSED*/ void mod_hash_null_valdtor(mod_hash_val_t val) { + (void) val; } /* * mod_hash_bystr() * mod_hash_strkey_cmp() * mod_hash_strkey_dtor() * mod_hash_strval_dtor() * Hash and key comparison routines for hashes with string keys. * * mod_hash_create_strhash() * Create a hash using strings as keys * * The string hashing algorithm is from the "Dragon Book" -- * "Compilers: Principles, Tools & Techniques", by Aho, Sethi, Ullman */ -/*ARGSUSED*/ uint_t mod_hash_bystr(void *hash_data, mod_hash_key_t key) { + (void) hash_data; uint_t hash = 0; uint_t g; char *p, *k = (char *)key; ASSERT(k); for (p = k; *p != '\0'; p++) { hash = (hash << 4) + *p; if ((g = (hash & 0xf0000000)) != 0) { hash ^= (g >> 24); hash ^= g; } } return (hash); } int mod_hash_strkey_cmp(mod_hash_key_t key1, mod_hash_key_t key2) { return (strcmp((char *)key1, (char *)key2)); } void mod_hash_strkey_dtor(mod_hash_key_t key) { char *c = (char *)key; kmem_free(c, strlen(c) + 1); } void mod_hash_strval_dtor(mod_hash_val_t val) { char *c = (char *)val; kmem_free(c, strlen(c) + 1); } mod_hash_t * mod_hash_create_strhash_nodtr(char *name, size_t nchains, void (*val_dtor)(mod_hash_val_t)) { return mod_hash_create_extended(name, nchains, mod_hash_null_keydtor, val_dtor, mod_hash_bystr, NULL, mod_hash_strkey_cmp, KM_SLEEP); } mod_hash_t * mod_hash_create_strhash(char *name, size_t nchains, void (*val_dtor)(mod_hash_val_t)) { return mod_hash_create_extended(name, nchains, mod_hash_strkey_dtor, val_dtor, mod_hash_bystr, NULL, mod_hash_strkey_cmp, KM_SLEEP); } void mod_hash_destroy_strhash(mod_hash_t *strhash) { ASSERT(strhash); mod_hash_destroy_hash(strhash); } /* * mod_hash_byptr() * mod_hash_ptrkey_cmp() * Hash and key comparison routines for hashes with pointer keys. * * mod_hash_create_ptrhash() * mod_hash_destroy_ptrhash() * Create a hash that uses pointers as keys. This hash algorithm * picks an appropriate set of middle bits in the address to hash on * based on the size of the hash table and a hint about the size of * the items pointed at. */ uint_t mod_hash_byptr(void *hash_data, mod_hash_key_t key) { uintptr_t k = (uintptr_t)key; k >>= (int)(uintptr_t)hash_data; return ((uint_t)k); } int mod_hash_ptrkey_cmp(mod_hash_key_t key1, mod_hash_key_t key2) { uintptr_t k1 = (uintptr_t)key1; uintptr_t k2 = (uintptr_t)key2; if (k1 > k2) return (-1); else if (k1 < k2) return (1); else return (0); } mod_hash_t * mod_hash_create_ptrhash(char *name, size_t nchains, void (*val_dtor)(mod_hash_val_t), size_t key_elem_size) { size_t rshift; /* * We want to hash on the bits in the middle of the address word * Bits far to the right in the word have little significance, and * are likely to all look the same (for example, an array of * 256-byte structures will have the bottom 8 bits of address * words the same). So we want to right-shift each address to * ignore the bottom bits. * * The high bits, which are also unused, will get taken out when * mod_hash takes hashkey % nchains. */ rshift = highbit64(key_elem_size); return mod_hash_create_extended(name, nchains, mod_hash_null_keydtor, val_dtor, mod_hash_byptr, (void *)rshift, mod_hash_ptrkey_cmp, KM_SLEEP); } void mod_hash_destroy_ptrhash(mod_hash_t *hash) { ASSERT(hash); mod_hash_destroy_hash(hash); } /* * mod_hash_byid() * mod_hash_idkey_cmp() * Hash and key comparison routines for hashes with 32-bit unsigned keys. * * mod_hash_create_idhash() * mod_hash_destroy_idhash() * mod_hash_iddata_gen() * Create a hash that uses numeric keys. * * The hash algorithm is documented in "Introduction to Algorithms" * (Cormen, Leiserson, Rivest); when the hash table is created, it * attempts to find the next largest prime above the number of hash * slots. The hash index is then this number times the key modulo * the hash size, or (key * prime) % nchains. */ uint_t mod_hash_byid(void *hash_data, mod_hash_key_t key) { uint_t kval = (uint_t)(uintptr_t)hash_data; return ((uint_t)(uintptr_t)key * (uint_t)kval); } int mod_hash_idkey_cmp(mod_hash_key_t key1, mod_hash_key_t key2) { return ((uint_t)(uintptr_t)key1 - (uint_t)(uintptr_t)key2); } /* * Generate the next largest prime number greater than nchains; this value * is intended to be later passed in to mod_hash_create_extended() as the * hash_data. */ uint_t mod_hash_iddata_gen(size_t nchains) { uint_t kval, i, prime; /* * Pick the first (odd) prime greater than nchains. Make sure kval is * odd (so start with nchains +1 or +2 as appropriate). */ kval = (nchains % 2 == 0) ? nchains + 1 : nchains + 2; for (;;) { prime = 1; for (i = 3; i * i <= kval; i += 2) { if (kval % i == 0) prime = 0; } if (prime == 1) break; kval += 2; } return (kval); } mod_hash_t * mod_hash_create_idhash(char *name, size_t nchains, void (*val_dtor)(mod_hash_val_t)) { uint_t kval = mod_hash_iddata_gen(nchains); return (mod_hash_create_extended(name, nchains, mod_hash_null_keydtor, val_dtor, mod_hash_byid, (void *)(uintptr_t)kval, mod_hash_idkey_cmp, KM_SLEEP)); } void mod_hash_destroy_idhash(mod_hash_t *hash) { ASSERT(hash); mod_hash_destroy_hash(hash); } void mod_hash_fini(void) { mutex_destroy(&mh_head_lock); if (mh_e_cache) { kmem_cache_destroy(mh_e_cache); mh_e_cache = NULL; } } /* * mod_hash_init() * sets up globals, etc for mod_hash_* */ void mod_hash_init(void) { ASSERT(mh_e_cache == NULL); mh_e_cache = kmem_cache_create("mod_hash_entries", sizeof (struct mod_hash_entry), 0, NULL, NULL, NULL, NULL, NULL, 0); mutex_init(&mh_head_lock, NULL, MUTEX_DEFAULT, NULL); } /* * mod_hash_create_extended() * The full-blown hash creation function. * * notes: * nchains - how many hash slots to create. More hash slots will * result in shorter hash chains, but will consume * slightly more memory up front. * sleep - should be KM_SLEEP or KM_NOSLEEP, to indicate whether * to sleep for memory, or fail in low-memory conditions. * * Fails only if KM_NOSLEEP was specified, and no memory was available. */ mod_hash_t * mod_hash_create_extended( char *hname, /* descriptive name for hash */ size_t nchains, /* number of hash slots */ void (*kdtor)(mod_hash_key_t), /* key destructor */ void (*vdtor)(mod_hash_val_t), /* value destructor */ uint_t (*hash_alg)(void *, mod_hash_key_t), /* hash algorithm */ void *hash_alg_data, /* pass-thru arg for hash_alg */ int (*keycmp)(mod_hash_key_t, mod_hash_key_t), /* key comparator */ int sleep) /* whether to sleep for mem */ { mod_hash_t *mod_hash; size_t size; ASSERT(hname && keycmp && hash_alg && vdtor && kdtor); if ((mod_hash = kmem_zalloc(MH_SIZE(nchains), sleep)) == NULL) return (NULL); size = strlen(hname) + 1; mod_hash->mh_name = kmem_alloc(size, sleep); if (mod_hash->mh_name == NULL) { kmem_free(mod_hash, MH_SIZE(nchains)); return (NULL); } (void) strlcpy(mod_hash->mh_name, hname, size); rw_init(&mod_hash->mh_contents, NULL, RW_DEFAULT, NULL); mod_hash->mh_sleep = sleep; mod_hash->mh_nchains = nchains; mod_hash->mh_kdtor = kdtor; mod_hash->mh_vdtor = vdtor; mod_hash->mh_hashalg = hash_alg; mod_hash->mh_hashalg_data = hash_alg_data; mod_hash->mh_keycmp = keycmp; /* * Link the hash up on the list of hashes */ mutex_enter(&mh_head_lock); mod_hash->mh_next = mh_head; mh_head = mod_hash; mutex_exit(&mh_head_lock); return (mod_hash); } /* * mod_hash_destroy_hash() * destroy a hash table, destroying all of its stored keys and values * as well. */ void mod_hash_destroy_hash(mod_hash_t *hash) { mod_hash_t *mhp, *mhpp; mutex_enter(&mh_head_lock); /* * Remove the hash from the hash list */ if (hash == mh_head) { /* removing 1st list elem */ mh_head = mh_head->mh_next; } else { /* * mhpp can start out NULL since we know the 1st elem isn't the * droid we're looking for. */ mhpp = NULL; for (mhp = mh_head; mhp != NULL; mhp = mhp->mh_next) { if (mhp == hash) { mhpp->mh_next = mhp->mh_next; break; } mhpp = mhp; } } mutex_exit(&mh_head_lock); /* * Clean out keys and values. */ mod_hash_clear(hash); rw_destroy(&hash->mh_contents); kmem_free(hash->mh_name, strlen(hash->mh_name) + 1); kmem_free(hash, MH_SIZE(hash->mh_nchains)); } /* * i_mod_hash() * Call the hashing algorithm for this hash table, with the given key. */ uint_t i_mod_hash(mod_hash_t *hash, mod_hash_key_t key) { uint_t h; /* * Prevent div by 0 problems; * Also a nice shortcut when using a hash as a list */ if (hash->mh_nchains == 1) return (0); h = (hash->mh_hashalg)(hash->mh_hashalg_data, key); return (h % (hash->mh_nchains - 1)); } /* * i_mod_hash_insert_nosync() * mod_hash_insert() * mod_hash_insert_reserve() * insert 'val' into the hash table, using 'key' as its key. If 'key' is * already a key in the hash, an error will be returned, and the key-val * pair will not be inserted. i_mod_hash_insert_nosync() supports a simple * handle abstraction, allowing hash entry allocation to be separated from * the hash insertion. this abstraction allows simple use of the mod_hash * structure in situations where mod_hash_insert() with a KM_SLEEP * allocation policy would otherwise be unsafe. */ int i_mod_hash_insert_nosync(mod_hash_t *hash, mod_hash_key_t key, mod_hash_val_t val, mod_hash_hndl_t handle) { uint_t hashidx; struct mod_hash_entry *entry; ASSERT(hash); /* * If we've not been given reserved storage, allocate storage directly, * using the hash's allocation policy. */ if (handle == (mod_hash_hndl_t)0) { entry = kmem_cache_alloc(mh_e_cache, hash->mh_sleep); if (entry == NULL) { hash->mh_stat.mhs_nomem++; return (MH_ERR_NOMEM); } } else { entry = (struct mod_hash_entry *)handle; } hashidx = i_mod_hash(hash, key); entry->mhe_key = key; entry->mhe_val = val; entry->mhe_next = hash->mh_entries[hashidx]; hash->mh_entries[hashidx] = entry; hash->mh_stat.mhs_nelems++; return (0); } int mod_hash_insert(mod_hash_t *hash, mod_hash_key_t key, mod_hash_val_t val) { int res; mod_hash_val_t v; rw_enter(&hash->mh_contents, RW_WRITER); /* * Disallow duplicate keys in the hash */ if (i_mod_hash_find_nosync(hash, key, &v) == 0) { rw_exit(&hash->mh_contents); hash->mh_stat.mhs_coll++; return (MH_ERR_DUPLICATE); } res = i_mod_hash_insert_nosync(hash, key, val, (mod_hash_hndl_t)0); rw_exit(&hash->mh_contents); return (res); } int mod_hash_insert_reserve(mod_hash_t *hash, mod_hash_key_t key, mod_hash_val_t val, mod_hash_hndl_t handle) { int res; mod_hash_val_t v; rw_enter(&hash->mh_contents, RW_WRITER); /* * Disallow duplicate keys in the hash */ if (i_mod_hash_find_nosync(hash, key, &v) == 0) { rw_exit(&hash->mh_contents); hash->mh_stat.mhs_coll++; return (MH_ERR_DUPLICATE); } res = i_mod_hash_insert_nosync(hash, key, val, handle); rw_exit(&hash->mh_contents); return (res); } /* * mod_hash_reserve() * mod_hash_reserve_nosleep() * mod_hash_cancel() * Make or cancel a mod_hash_entry_t reservation. Reservations are used in * mod_hash_insert_reserve() above. */ int mod_hash_reserve(mod_hash_t *hash, mod_hash_hndl_t *handlep) { *handlep = kmem_cache_alloc(mh_e_cache, hash->mh_sleep); if (*handlep == NULL) { hash->mh_stat.mhs_nomem++; return (MH_ERR_NOMEM); } return (0); } int mod_hash_reserve_nosleep(mod_hash_t *hash, mod_hash_hndl_t *handlep) { *handlep = kmem_cache_alloc(mh_e_cache, KM_NOSLEEP); if (*handlep == NULL) { hash->mh_stat.mhs_nomem++; return (MH_ERR_NOMEM); } return (0); } -/*ARGSUSED*/ void mod_hash_cancel(mod_hash_t *hash, mod_hash_hndl_t *handlep) { + (void) hash; kmem_cache_free(mh_e_cache, *handlep); *handlep = (mod_hash_hndl_t)0; } /* * i_mod_hash_remove_nosync() * mod_hash_remove() * Remove an element from the hash table. */ int i_mod_hash_remove_nosync(mod_hash_t *hash, mod_hash_key_t key, mod_hash_val_t *val) { int hashidx; struct mod_hash_entry *e, *ep; hashidx = i_mod_hash(hash, key); ep = NULL; /* e's parent */ for (e = hash->mh_entries[hashidx]; e != NULL; e = e->mhe_next) { if (MH_KEYCMP(hash, e->mhe_key, key) == 0) break; ep = e; } if (e == NULL) { /* not found */ return (MH_ERR_NOTFOUND); } if (ep == NULL) /* special case 1st element in bucket */ hash->mh_entries[hashidx] = e->mhe_next; else ep->mhe_next = e->mhe_next; /* * Clean up resources used by the node's key. */ MH_KEY_DESTROY(hash, e->mhe_key); *val = e->mhe_val; kmem_cache_free(mh_e_cache, e); hash->mh_stat.mhs_nelems--; return (0); } int mod_hash_remove(mod_hash_t *hash, mod_hash_key_t key, mod_hash_val_t *val) { int res; rw_enter(&hash->mh_contents, RW_WRITER); res = i_mod_hash_remove_nosync(hash, key, val); rw_exit(&hash->mh_contents); return (res); } /* * mod_hash_replace() * atomically remove an existing key-value pair from a hash, and replace * the key and value with the ones supplied. The removed key and value * (if any) are destroyed. */ int mod_hash_replace(mod_hash_t *hash, mod_hash_key_t key, mod_hash_val_t val) { int res; mod_hash_val_t v; rw_enter(&hash->mh_contents, RW_WRITER); if (i_mod_hash_remove_nosync(hash, key, &v) == 0) { /* * mod_hash_remove() takes care of freeing up the key resources. */ MH_VAL_DESTROY(hash, v); } res = i_mod_hash_insert_nosync(hash, key, val, (mod_hash_hndl_t)0); rw_exit(&hash->mh_contents); return (res); } /* * mod_hash_destroy() * Remove an element from the hash table matching 'key', and destroy it. */ int mod_hash_destroy(mod_hash_t *hash, mod_hash_key_t key) { mod_hash_val_t val; int rv; rw_enter(&hash->mh_contents, RW_WRITER); if ((rv = i_mod_hash_remove_nosync(hash, key, &val)) == 0) { /* * mod_hash_remove() takes care of freeing up the key resources. */ MH_VAL_DESTROY(hash, val); } rw_exit(&hash->mh_contents); return (rv); } /* * i_mod_hash_find_nosync() * mod_hash_find() * Find a value in the hash table corresponding to the given key. */ int i_mod_hash_find_nosync(mod_hash_t *hash, mod_hash_key_t key, mod_hash_val_t *val) { uint_t hashidx; struct mod_hash_entry *e; hashidx = i_mod_hash(hash, key); for (e = hash->mh_entries[hashidx]; e != NULL; e = e->mhe_next) { if (MH_KEYCMP(hash, e->mhe_key, key) == 0) { *val = e->mhe_val; hash->mh_stat.mhs_hit++; return (0); } } hash->mh_stat.mhs_miss++; return (MH_ERR_NOTFOUND); } int mod_hash_find(mod_hash_t *hash, mod_hash_key_t key, mod_hash_val_t *val) { int res; rw_enter(&hash->mh_contents, RW_READER); res = i_mod_hash_find_nosync(hash, key, val); rw_exit(&hash->mh_contents); return (res); } int mod_hash_find_cb(mod_hash_t *hash, mod_hash_key_t key, mod_hash_val_t *val, void (*find_cb)(mod_hash_key_t, mod_hash_val_t)) { int res; rw_enter(&hash->mh_contents, RW_READER); res = i_mod_hash_find_nosync(hash, key, val); if (res == 0) { find_cb(key, *val); } rw_exit(&hash->mh_contents); return (res); } int mod_hash_find_cb_rval(mod_hash_t *hash, mod_hash_key_t key, mod_hash_val_t *val, int (*find_cb)(mod_hash_key_t, mod_hash_val_t), int *cb_rval) { int res; rw_enter(&hash->mh_contents, RW_READER); res = i_mod_hash_find_nosync(hash, key, val); if (res == 0) { *cb_rval = find_cb(key, *val); } rw_exit(&hash->mh_contents); return (res); } void i_mod_hash_walk_nosync(mod_hash_t *hash, uint_t (*callback)(mod_hash_key_t, mod_hash_val_t *, void *), void *arg) { struct mod_hash_entry *e; uint_t hashidx; int res = MH_WALK_CONTINUE; for (hashidx = 0; (hashidx < (hash->mh_nchains - 1)) && (res == MH_WALK_CONTINUE); hashidx++) { e = hash->mh_entries[hashidx]; while ((e != NULL) && (res == MH_WALK_CONTINUE)) { res = callback(e->mhe_key, e->mhe_val, arg); e = e->mhe_next; } } } /* * mod_hash_walk() * Walks all the elements in the hashtable and invokes the callback * function with the key/value pair for each element. The hashtable * is locked for readers so the callback function should not attempt * to do any updates to the hashable. The callback function should * return MH_WALK_CONTINUE to continue walking the hashtable or * MH_WALK_TERMINATE to abort the walk of the hashtable. */ void mod_hash_walk(mod_hash_t *hash, uint_t (*callback)(mod_hash_key_t, mod_hash_val_t *, void *), void *arg) { rw_enter(&hash->mh_contents, RW_READER); i_mod_hash_walk_nosync(hash, callback, arg); rw_exit(&hash->mh_contents); } /* * i_mod_hash_clear_nosync() * mod_hash_clear() * Clears the given hash table by calling the destructor of every hash * element and freeing up all mod_hash_entry's. */ void i_mod_hash_clear_nosync(mod_hash_t *hash) { int i; struct mod_hash_entry *e, *old_e; for (i = 0; i < hash->mh_nchains; i++) { e = hash->mh_entries[i]; while (e != NULL) { MH_KEY_DESTROY(hash, e->mhe_key); MH_VAL_DESTROY(hash, e->mhe_val); old_e = e; e = e->mhe_next; kmem_cache_free(mh_e_cache, old_e); } hash->mh_entries[i] = NULL; } hash->mh_stat.mhs_nelems = 0; } void mod_hash_clear(mod_hash_t *hash) { ASSERT(hash); rw_enter(&hash->mh_contents, RW_WRITER); i_mod_hash_clear_nosync(hash); rw_exit(&hash->mh_contents); }