diff --git a/sys/conf/files.amd64 b/sys/conf/files.amd64 --- a/sys/conf/files.amd64 +++ b/sys/conf/files.amd64 @@ -108,7 +108,8 @@ crypto/openssl/amd64/sha1-x86_64.S optional ossl crypto/openssl/amd64/sha256-x86_64.S optional ossl crypto/openssl/amd64/sha512-x86_64.S optional ossl -crypto/openssl/amd64/ossl_aes_gcm.c optional ossl +crypto/openssl/amd64/ossl_aes_gcm_avx512.c optional ossl +crypto/openssl/ossl_aes_gcm.c optional ossl dev/acpi_support/acpi_wmi_if.m standard dev/agp/agp_amd64.c optional agp dev/agp/agp_i810.c optional agp diff --git a/sys/conf/files.powerpc b/sys/conf/files.powerpc --- a/sys/conf/files.powerpc +++ b/sys/conf/files.powerpc @@ -22,9 +22,11 @@ # openssl ppc common files crypto/openssl/ossl_ppc.c optional ossl powerpc64 | ossl powerpc64le +crypto/openssl/ossl_aes_gcm.c optional ossl powerpc64 | ossl powerpc64le # openssl assembly files (powerpc64le) crypto/openssl/powerpc64le/aes-ppc.S optional ossl powerpc64le +crypto/openssl/powerpc64le/aes-gcm-ppc.S optional ossl powerpc64le crypto/openssl/powerpc64le/aesp8-ppc.S optional ossl powerpc64le crypto/openssl/powerpc64le/chacha-ppc.S optional ossl powerpc64le crypto/openssl/powerpc64le/ecp_nistz256-ppc64.S optional ossl powerpc64le @@ -45,6 +47,7 @@ # openssl assembly files (powerpc64) crypto/openssl/powerpc64/aes-ppc.S optional ossl powerpc64 +crypto/openssl/powerpc64/aes-gcm-ppc.S optional ossl powerpc64 crypto/openssl/powerpc64/aesp8-ppc.S optional ossl powerpc64 crypto/openssl/powerpc64/chacha-ppc.S optional ossl powerpc64 crypto/openssl/powerpc64/ecp_nistz256-ppc64.S optional ossl powerpc64 diff --git a/sys/crypto/openssl/amd64/ossl_aes_gcm_avx512.c b/sys/crypto/openssl/amd64/ossl_aes_gcm_avx512.c new file mode 100644 --- /dev/null +++ b/sys/crypto/openssl/amd64/ossl_aes_gcm_avx512.c @@ -0,0 +1,232 @@ +/* + * Copyright 2010-2022 The OpenSSL Project Authors. All Rights Reserved. + * Copyright (c) 2021, Intel Corporation. All Rights Reserved. + * + * Licensed under the Apache License 2.0 (the "License"). You may not use + * this file except in compliance with the License. You can obtain a copy + * in the file LICENSE in the source distribution or at + * https://www.openssl.org/source/license.html + */ + +/* + * This file contains an AES-GCM wrapper implementation from OpenSSL, using + * VAES extensions. It was ported from cipher_aes_gcm_hw_vaes_avx512.inc. + */ + +#include +#include + +#include +#include +#include + +#include + +_Static_assert( + sizeof(struct ossl_gcm_context) <= sizeof(struct ossl_cipher_context), + "ossl_gcm_context too large"); + +void aesni_set_encrypt_key(const void *key, int bits, void *ctx); + +static void +gcm_init(struct ossl_gcm_context *ctx, const void *key, size_t keylen) +{ + KASSERT(keylen == 128 || keylen == 192 || keylen == 256, + ("%s: invalid key length %zu", __func__, keylen)); + + memset(&ctx->gcm, 0, sizeof(ctx->gcm)); + memset(&ctx->aes_ks, 0, sizeof(ctx->aes_ks)); + aesni_set_encrypt_key(key, keylen, &ctx->aes_ks); + ctx->ops->init(ctx, key, keylen); +} + +static void +gcm_tag(struct ossl_gcm_context *ctx, unsigned char *tag, size_t len) +{ + (void)ctx->ops->finish(ctx, NULL, 0); + memcpy(tag, ctx->gcm.Xi.c, len); +} + +void ossl_gcm_gmult_avx512(uint64_t Xi[2], void *gcm128ctx); +void ossl_aes_gcm_init_avx512(const void *ks, void *gcm128ctx); +void ossl_aes_gcm_setiv_avx512(const void *ks, void *gcm128ctx, + const unsigned char *iv, size_t ivlen); +void ossl_aes_gcm_update_aad_avx512(void *gcm128ctx, const unsigned char *aad, + size_t len); +void ossl_aes_gcm_encrypt_avx512(const void *ks, void *gcm128ctx, + unsigned int *pblocklen, const unsigned char *in, size_t len, + unsigned char *out); +void ossl_aes_gcm_decrypt_avx512(const void *ks, void *gcm128ctx, + unsigned int *pblocklen, const unsigned char *in, size_t len, + unsigned char *out); +void ossl_aes_gcm_finalize_avx512(void *gcm128ctx, unsigned int pblocklen); + +static void +gcm_init_avx512(struct ossl_gcm_context *ctx, const void *key, size_t keylen) +{ + ossl_aes_gcm_init_avx512(&ctx->aes_ks, &ctx->gcm); +} + +static void +gcm_setiv_avx512(struct ossl_gcm_context *ctx, const unsigned char *iv, + size_t len) +{ + KASSERT(len == AES_GCM_IV_LEN, + ("%s: invalid IV length %zu", __func__, len)); + + ctx->gcm.Yi.u[0] = 0; /* Current counter */ + ctx->gcm.Yi.u[1] = 0; + ctx->gcm.Xi.u[0] = 0; /* AAD hash */ + ctx->gcm.Xi.u[1] = 0; + ctx->gcm.len.u[0] = 0; /* AAD length */ + ctx->gcm.len.u[1] = 0; /* Message length */ + ctx->gcm.ares = 0; + ctx->gcm.mres = 0; + + ossl_aes_gcm_setiv_avx512(&ctx->aes_ks, ctx, iv, len); +} + +static int +gcm_aad_avx512(struct ossl_gcm_context *ctx, const unsigned char *aad, + size_t len) +{ + uint64_t alen = ctx->gcm.len.u[0]; + size_t lenblks; + unsigned int ares; + + /* Bad sequence: call of AAD update after message processing */ + if (ctx->gcm.len.u[1]) + return -2; + + alen += len; + /* AAD is limited by 2^64 bits, thus 2^61 bytes */ + if (alen > (1ull << 61) || (sizeof(len) == 8 && alen < len)) + return -1; + ctx->gcm.len.u[0] = alen; + + ares = ctx->gcm.ares; + /* Partial AAD block left from previous AAD update calls */ + if (ares > 0) { + /* + * Fill partial block buffer till full block + * (note, the hash is stored reflected) + */ + while (ares > 0 && len > 0) { + ctx->gcm.Xi.c[15 - ares] ^= *(aad++); + --len; + ares = (ares + 1) % AES_BLOCK_LEN; + } + /* Full block gathered */ + if (ares == 0) { + ossl_gcm_gmult_avx512(ctx->gcm.Xi.u, ctx); + } else { /* no more AAD */ + ctx->gcm.ares = ares; + return 0; + } + } + + /* Bulk AAD processing */ + lenblks = len & ((size_t)(-AES_BLOCK_LEN)); + if (lenblks > 0) { + ossl_aes_gcm_update_aad_avx512(ctx, aad, lenblks); + aad += lenblks; + len -= lenblks; + } + + /* Add remaining AAD to the hash (note, the hash is stored reflected) */ + if (len > 0) { + ares = (unsigned int)len; + for (size_t i = 0; i < len; ++i) + ctx->gcm.Xi.c[15 - i] ^= aad[i]; + } + + ctx->gcm.ares = ares; + + return 0; +} + +static int +_gcm_encrypt_avx512(struct ossl_gcm_context *ctx, const unsigned char *in, + unsigned char *out, size_t len, bool encrypt) +{ + uint64_t mlen = ctx->gcm.len.u[1]; + + mlen += len; + if (mlen > ((1ull << 36) - 32) || (sizeof(len) == 8 && mlen < len)) + return -1; + + ctx->gcm.len.u[1] = mlen; + + /* Finalize GHASH(AAD) if AAD partial blocks left unprocessed */ + if (ctx->gcm.ares > 0) { + ossl_gcm_gmult_avx512(ctx->gcm.Xi.u, ctx); + ctx->gcm.ares = 0; + } + + if (encrypt) { + ossl_aes_gcm_encrypt_avx512(&ctx->aes_ks, ctx, &ctx->gcm.mres, + in, len, out); + } else { + ossl_aes_gcm_decrypt_avx512(&ctx->aes_ks, ctx, &ctx->gcm.mres, + in, len, out); + } + + return 0; +} + +static int +gcm_encrypt_avx512(struct ossl_gcm_context *ctx, const unsigned char *in, + unsigned char *out, size_t len) +{ + return _gcm_encrypt_avx512(ctx, in, out, len, true); +} + +static int +gcm_decrypt_avx512(struct ossl_gcm_context *ctx, const unsigned char *in, + unsigned char *out, size_t len) +{ + return _gcm_encrypt_avx512(ctx, in, out, len, false); +} + +static int +gcm_finish_avx512(struct ossl_gcm_context *ctx, const unsigned char *tag, + size_t len) +{ + unsigned int *res = &ctx->gcm.mres; + + /* Finalize AAD processing */ + if (ctx->gcm.ares > 0) + res = &ctx->gcm.ares; + + ossl_aes_gcm_finalize_avx512(ctx, *res); + + ctx->gcm.ares = ctx->gcm.mres = 0; + + if (tag != NULL) + return timingsafe_bcmp(ctx->gcm.Xi.c, tag, len); + return 0; +} + +static const struct ossl_aes_gcm_ops gcm_ops_avx512 = { + .init = gcm_init_avx512, + .setiv = gcm_setiv_avx512, + .aad = gcm_aad_avx512, + .encrypt = gcm_encrypt_avx512, + .decrypt = gcm_decrypt_avx512, + .finish = gcm_finish_avx512, + .tag = gcm_tag, +}; + +int ossl_aes_gcm_setkey_avx512(const unsigned char *key, int klen, void *_ctx); + +int +ossl_aes_gcm_setkey_avx512(const unsigned char *key, int klen, + void *_ctx) +{ + struct ossl_gcm_context *ctx; + + ctx = _ctx; + ctx->ops = &gcm_ops_avx512; + gcm_init(ctx, key, klen); + return (0); +} diff --git a/sys/crypto/openssl/amd64/ossl_aes_gcm.c b/sys/crypto/openssl/ossl_aes_gcm.c rename from sys/crypto/openssl/amd64/ossl_aes_gcm.c rename to sys/crypto/openssl/ossl_aes_gcm.c --- a/sys/crypto/openssl/amd64/ossl_aes_gcm.c +++ b/sys/crypto/openssl/ossl_aes_gcm.c @@ -1,6 +1,7 @@ /* * Copyright 2010-2022 The OpenSSL Project Authors. All Rights Reserved. * Copyright (c) 2021, Intel Corporation. All Rights Reserved. + * Copyright (c) 2023, Raptor Engineering, LLC. All Rights Reserved. * * Licensed under the Apache License 2.0 (the "License"). You may not use * this file except in compliance with the License. You can obtain a copy @@ -9,11 +10,10 @@ */ /* - * This file contains 2 AES-GCM wrapper implementations from OpenSSL, using - * AES-NI and VAES extensions respectively. These were ported from - * cipher_aes_gcm_hw_aesni.inc and cipher_aes_gcm_hw_vaes_avx512.inc. The - * AES-NI implementation makes use of a generic C implementation for partial - * blocks, ported from gcm128.c with OPENSSL_SMALL_FOOTPRINT defined. + * This file contains an AES-GCM wrapper implementation from OpenSSL, using + * AES-NI (x86) or POWER8 Crypto Extensions (ppc). It was ported from + * cipher_aes_gcm_hw_aesni.inc and it makes use of a generic C implementation + * for partial blocks, ported from gcm128.c with OPENSSL_SMALL_FOOTPRINT defined. */ #include @@ -29,225 +29,151 @@ sizeof(struct ossl_gcm_context) <= sizeof(struct ossl_cipher_context), "ossl_gcm_context too large"); -void aesni_set_encrypt_key(const void *key, int bits, void *ctx); - -static void -gcm_init(struct ossl_gcm_context *ctx, const void *key, size_t keylen) -{ - KASSERT(keylen == 128 || keylen == 192 || keylen == 256, - ("%s: invalid key length %zu", __func__, keylen)); - - memset(&ctx->gcm, 0, sizeof(ctx->gcm)); - memset(&ctx->aes_ks, 0, sizeof(ctx->aes_ks)); - aesni_set_encrypt_key(key, keylen, &ctx->aes_ks); - ctx->ops->init(ctx, key, keylen); -} - -static void -gcm_tag(struct ossl_gcm_context *ctx, unsigned char *tag, size_t len) -{ - (void)ctx->ops->finish(ctx, NULL, 0); - memcpy(tag, ctx->gcm.Xi.c, len); -} +#if defined(__amd64__) || defined(__i386__) +#define AES_set_encrypt_key aesni_set_encrypt_key +#define AES_gcm_encrypt aesni_gcm_encrypt +#define AES_gcm_decrypt aesni_gcm_decrypt +#define AES_encrypt aesni_encrypt +#define AES_ctr32_encrypt_blocks aesni_ctr32_encrypt_blocks +#define GCM_init gcm_init_avx +#define GCM_gmult gcm_gmult_avx +#define GCM_ghash gcm_ghash_avx + +void AES_set_encrypt_key(const void *key, int bits, void *ctx); +size_t AES_gcm_encrypt(const unsigned char *in, unsigned char *out, size_t len, + const void *key, unsigned char ivec[16], uint64_t *Xi); +size_t AES_gcm_decrypt(const unsigned char *in, unsigned char *out, size_t len, + const void *key, unsigned char ivec[16], uint64_t *Xi); +void AES_encrypt(const unsigned char *in, unsigned char *out, void *ks); +void AES_ctr32_encrypt_blocks(const unsigned char *in, unsigned char *out, + size_t blocks, void *ks, const unsigned char *iv); + +void GCM_init(__uint128_t Htable[16], uint64_t Xi[2]); +void GCM_gmult(uint64_t Xi[2], const __uint128_t Htable[16]); +void GCM_ghash(uint64_t Xi[2], const __uint128_t Htable[16], const void *in, + size_t len); + +#elif defined(__powerpc64__) +#define AES_set_encrypt_key aes_p8_set_encrypt_key +#define AES_gcm_encrypt(i,o,l,k,v,x) ppc_aes_gcm_crypt(i,o,l,k,v,x,1) +#define AES_gcm_decrypt(i,o,l,k,v,x) ppc_aes_gcm_crypt(i,o,l,k,v,x,0) +#define AES_encrypt aes_p8_encrypt +#define AES_ctr32_encrypt_blocks aes_p8_ctr32_encrypt_blocks +#define GCM_init gcm_init_p8 +#define GCM_gmult gcm_gmult_p8 +#define GCM_ghash gcm_ghash_p8 + +size_t ppc_aes_gcm_encrypt(const unsigned char *in, unsigned char *out, size_t len, + const void *key, unsigned char ivec[16], uint64_t *Xi); +size_t ppc_aes_gcm_decrypt(const unsigned char *in, unsigned char *out, size_t len, + const void *key, unsigned char ivec[16], uint64_t *Xi); + +void AES_set_encrypt_key(const void *key, int bits, void *ctx); +void AES_encrypt(const unsigned char *in, unsigned char *out, void *ks); +void AES_ctr32_encrypt_blocks(const unsigned char *in, unsigned char *out, + size_t blocks, void *ks, const unsigned char *iv); -void ossl_gcm_gmult_avx512(uint64_t Xi[2], void *gcm128ctx); -void ossl_aes_gcm_init_avx512(const void *ks, void *gcm128ctx); -void ossl_aes_gcm_setiv_avx512(const void *ks, void *gcm128ctx, - const unsigned char *iv, size_t ivlen); -void ossl_aes_gcm_update_aad_avx512(void *gcm128ctx, const unsigned char *aad, +void GCM_init(__uint128_t Htable[16], uint64_t Xi[2]); +void GCM_gmult(uint64_t Xi[2], const __uint128_t Htable[16]); +void GCM_ghash(uint64_t Xi[2], const __uint128_t Htable[16], const void *in, size_t len); -void ossl_aes_gcm_encrypt_avx512(const void *ks, void *gcm128ctx, - unsigned int *pblocklen, const unsigned char *in, size_t len, - unsigned char *out); -void ossl_aes_gcm_decrypt_avx512(const void *ks, void *gcm128ctx, - unsigned int *pblocklen, const unsigned char *in, size_t len, - unsigned char *out); -void ossl_aes_gcm_finalize_avx512(void *gcm128ctx, unsigned int pblocklen); - -static void -gcm_init_avx512(struct ossl_gcm_context *ctx, const void *key, size_t keylen) -{ - ossl_aes_gcm_init_avx512(&ctx->aes_ks, &ctx->gcm); -} - -static void -gcm_setiv_avx512(struct ossl_gcm_context *ctx, const unsigned char *iv, - size_t len) -{ - KASSERT(len == AES_GCM_IV_LEN, - ("%s: invalid IV length %zu", __func__, len)); - - ctx->gcm.Yi.u[0] = 0; /* Current counter */ - ctx->gcm.Yi.u[1] = 0; - ctx->gcm.Xi.u[0] = 0; /* AAD hash */ - ctx->gcm.Xi.u[1] = 0; - ctx->gcm.len.u[0] = 0; /* AAD length */ - ctx->gcm.len.u[1] = 0; /* Message length */ - ctx->gcm.ares = 0; - ctx->gcm.mres = 0; - ossl_aes_gcm_setiv_avx512(&ctx->aes_ks, ctx, iv, len); -} - -static int -gcm_aad_avx512(struct ossl_gcm_context *ctx, const unsigned char *aad, - size_t len) +static size_t ppc_aes_gcm_crypt(const unsigned char *in, unsigned char *out, + size_t len, const void *key, unsigned char ivec_[16], uint64_t *Xi, + int encrypt) { - uint64_t alen = ctx->gcm.len.u[0]; - size_t lenblks; - unsigned int ares; - - /* Bad sequence: call of AAD update after message processing */ - if (ctx->gcm.len.u[1]) - return -2; - - alen += len; - /* AAD is limited by 2^64 bits, thus 2^61 bytes */ - if (alen > (1ull << 61) || (sizeof(len) == 8 && alen < len)) - return -1; - ctx->gcm.len.u[0] = alen; + union { + uint32_t d[4]; + uint8_t c[16]; + } *ivec = (void *)ivec_; + int s = 0; + int ndone = 0; + int ctr_reset = 0; + uint32_t ivec_val; + uint64_t blocks_unused; + uint64_t nb = len / 16; + uint64_t next_ctr = 0; + unsigned char ctr_saved[12]; + + memcpy(ctr_saved, ivec, 12); + + while (nb) { + ivec_val = ivec->d[3]; +#if BYTE_ORDER == LITTLE_ENDIAN + ivec_val = bswap32(ivec_val); +#endif - ares = ctx->gcm.ares; - /* Partial AAD block left from previous AAD update calls */ - if (ares > 0) { - /* - * Fill partial block buffer till full block - * (note, the hash is stored reflected) - */ - while (ares > 0 && len > 0) { - ctx->gcm.Xi.c[15 - ares] ^= *(aad++); - --len; - ares = (ares + 1) % AES_BLOCK_LEN; - } - /* Full block gathered */ - if (ares == 0) { - ossl_gcm_gmult_avx512(ctx->gcm.Xi.u, ctx); - } else { /* no more AAD */ - ctx->gcm.ares = ares; - return 0; + blocks_unused = (uint64_t) 0xffffffffU + 1 - (uint64_t)ivec_val; + if (nb > blocks_unused) { + len = blocks_unused * 16; + nb -= blocks_unused; + next_ctr = blocks_unused; + ctr_reset = 1; + } else { + len = nb * 16; + next_ctr = nb; + nb = 0; } - } - /* Bulk AAD processing */ - lenblks = len & ((size_t)(-AES_BLOCK_LEN)); - if (lenblks > 0) { - ossl_aes_gcm_update_aad_avx512(ctx, aad, lenblks); - aad += lenblks; - len -= lenblks; - } + s = encrypt ? ppc_aes_gcm_encrypt(in, out, len, key, ivec->c, Xi) + : ppc_aes_gcm_decrypt(in, out, len, key, ivec->c, Xi); - /* Add remaining AAD to the hash (note, the hash is stored reflected) */ - if (len > 0) { - ares = (unsigned int)len; - for (size_t i = 0; i < len; ++i) - ctx->gcm.Xi.c[15 - i] ^= aad[i]; + /* add counter to ivec */ +#if BYTE_ORDER == LITTLE_ENDIAN + ivec->d[3] = bswap32(ivec_val + next_ctr); +#else + ivec->d[3] += next_ctr; +#endif + if (ctr_reset) { + ctr_reset = 0; + in += len; + out += len; + } + memcpy(ivec, ctr_saved, 12); + ndone += s; } - ctx->gcm.ares = ares; - - return 0; + return ndone; } -static int -_gcm_encrypt_avx512(struct ossl_gcm_context *ctx, const unsigned char *in, - unsigned char *out, size_t len, bool encrypt) -{ - uint64_t mlen = ctx->gcm.len.u[1]; - - mlen += len; - if (mlen > ((1ull << 36) - 32) || (sizeof(len) == 8 && mlen < len)) - return -1; - - ctx->gcm.len.u[1] = mlen; - - /* Finalize GHASH(AAD) if AAD partial blocks left unprocessed */ - if (ctx->gcm.ares > 0) { - ossl_gcm_gmult_avx512(ctx->gcm.Xi.u, ctx); - ctx->gcm.ares = 0; - } - - if (encrypt) { - ossl_aes_gcm_encrypt_avx512(&ctx->aes_ks, ctx, &ctx->gcm.mres, - in, len, out); - } else { - ossl_aes_gcm_decrypt_avx512(&ctx->aes_ks, ctx, &ctx->gcm.mres, - in, len, out); - } - - return 0; -} +#else +#error "Unsupported architecture!" +#endif -static int -gcm_encrypt_avx512(struct ossl_gcm_context *ctx, const unsigned char *in, - unsigned char *out, size_t len) +static void +gcm_init(struct ossl_gcm_context *ctx, const void *key, size_t keylen) { - return _gcm_encrypt_avx512(ctx, in, out, len, true); -} + KASSERT(keylen == 128 || keylen == 192 || keylen == 256, + ("%s: invalid key length %zu", __func__, keylen)); -static int -gcm_decrypt_avx512(struct ossl_gcm_context *ctx, const unsigned char *in, - unsigned char *out, size_t len) -{ - return _gcm_encrypt_avx512(ctx, in, out, len, false); + memset(&ctx->gcm, 0, sizeof(ctx->gcm)); + memset(&ctx->aes_ks, 0, sizeof(ctx->aes_ks)); + AES_set_encrypt_key(key, keylen, &ctx->aes_ks); + ctx->ops->init(ctx, key, keylen); } -static int -gcm_finish_avx512(struct ossl_gcm_context *ctx, const unsigned char *tag, - size_t len) +static void +gcm_tag_op(struct ossl_gcm_context *ctx, unsigned char *tag, size_t len) { - unsigned int *res = &ctx->gcm.mres; - - /* Finalize AAD processing */ - if (ctx->gcm.ares > 0) - res = &ctx->gcm.ares; - - ossl_aes_gcm_finalize_avx512(ctx, *res); - - ctx->gcm.ares = ctx->gcm.mres = 0; - - if (tag != NULL) - return timingsafe_bcmp(ctx->gcm.Xi.c, tag, len); - return 0; + (void)ctx->ops->finish(ctx, NULL, 0); + memcpy(tag, ctx->gcm.Xi.c, len); } -static const struct ossl_aes_gcm_ops gcm_ops_avx512 = { - .init = gcm_init_avx512, - .setiv = gcm_setiv_avx512, - .aad = gcm_aad_avx512, - .encrypt = gcm_encrypt_avx512, - .decrypt = gcm_decrypt_avx512, - .finish = gcm_finish_avx512, - .tag = gcm_tag, -}; - -size_t aesni_gcm_encrypt(const unsigned char *in, unsigned char *out, size_t len, - const void *key, unsigned char ivec[16], uint64_t *Xi); -size_t aesni_gcm_decrypt(const unsigned char *in, unsigned char *out, size_t len, - const void *key, unsigned char ivec[16], uint64_t *Xi); -void aesni_encrypt(const unsigned char *in, unsigned char *out, void *ks); -void aesni_ctr32_encrypt_blocks(const unsigned char *in, unsigned char *out, - size_t blocks, void *ks, const unsigned char *iv); - -void gcm_init_avx(__uint128_t Htable[16], uint64_t Xi[2]); -void gcm_gmult_avx(uint64_t Xi[2], const __uint128_t Htable[16]); -void gcm_ghash_avx(uint64_t Xi[2], const __uint128_t Htable[16], const void *in, - size_t len); - static void -gcm_init_aesni(struct ossl_gcm_context *ctx, const void *key, size_t keylen) +gcm_init_op(struct ossl_gcm_context *ctx, const void *key, size_t keylen) { - aesni_encrypt(ctx->gcm.H.c, ctx->gcm.H.c, &ctx->aes_ks); + AES_encrypt(ctx->gcm.H.c, ctx->gcm.H.c, &ctx->aes_ks); #if BYTE_ORDER == LITTLE_ENDIAN ctx->gcm.H.u[0] = bswap64(ctx->gcm.H.u[0]); ctx->gcm.H.u[1] = bswap64(ctx->gcm.H.u[1]); #endif - gcm_init_avx(ctx->gcm.Htable, ctx->gcm.H.u); + GCM_init(ctx->gcm.Htable, ctx->gcm.H.u); } static void -gcm_setiv_aesni(struct ossl_gcm_context *ctx, const unsigned char *iv, +gcm_setiv_op(struct ossl_gcm_context *ctx, const unsigned char *iv, size_t len) { uint32_t ctr; @@ -269,7 +195,7 @@ ctx->gcm.Xi.u[0] = 0; ctx->gcm.Xi.u[1] = 0; - aesni_encrypt(ctx->gcm.Yi.c, ctx->gcm.EK0.c, &ctx->aes_ks); + AES_encrypt(ctx->gcm.Yi.c, ctx->gcm.EK0.c, &ctx->aes_ks); ctr++; #if BYTE_ORDER == LITTLE_ENDIAN @@ -280,7 +206,7 @@ } static int -gcm_aad_aesni(struct ossl_gcm_context *ctx, const unsigned char *aad, +gcm_aad_op(struct ossl_gcm_context *ctx, const unsigned char *aad, size_t len) { size_t i; @@ -303,14 +229,14 @@ n = (n + 1) % 16; } if (n == 0) - gcm_gmult_avx(ctx->gcm.Xi.u, ctx->gcm.Htable); + GCM_gmult(ctx->gcm.Xi.u, ctx->gcm.Htable); else { ctx->gcm.ares = n; return 0; } } if ((i = (len & (size_t)-AES_BLOCK_LEN))) { - gcm_ghash_avx(ctx->gcm.Xi.u, ctx->gcm.Htable, aad, i); + GCM_ghash(ctx->gcm.Xi.u, ctx->gcm.Htable, aad, i); aad += i; len -= i; } @@ -341,7 +267,7 @@ if (ctx->gcm.ares) { /* First call to encrypt finalizes GHASH(AAD) */ - gcm_gmult_avx(ctx->gcm.Xi.u, ctx->gcm.Htable); + GCM_gmult(ctx->gcm.Xi.u, ctx->gcm.Htable); ctx->gcm.ares = 0; } @@ -354,7 +280,7 @@ n = mres % 16; for (i = 0; i < len; ++i) { if (n == 0) { - aesni_encrypt(ctx->gcm.Yi.c, ctx->gcm.EKi.c, + AES_encrypt(ctx->gcm.Yi.c, ctx->gcm.EKi.c, &ctx->aes_ks); ++ctr; #if BYTE_ORDER == LITTLE_ENDIAN @@ -366,7 +292,7 @@ ctx->gcm.Xi.c[n] ^= out[i] = in[i] ^ ctx->gcm.EKi.c[n]; mres = n = (n + 1) % 16; if (n == 0) - gcm_gmult_avx(ctx->gcm.Xi.u, ctx->gcm.Htable); + GCM_gmult(ctx->gcm.Xi.u, ctx->gcm.Htable); } ctx->gcm.mres = mres; @@ -390,7 +316,7 @@ if (ctx->gcm.ares) { /* First call to encrypt finalizes GHASH(AAD) */ - gcm_gmult_avx(ctx->gcm.Xi.u, ctx->gcm.Htable); + GCM_gmult(ctx->gcm.Xi.u, ctx->gcm.Htable); ctx->gcm.ares = 0; } @@ -408,7 +334,7 @@ n = (n + 1) % 16; } if (n == 0) { - gcm_gmult_avx(ctx->gcm.Xi.u, ctx->gcm.Htable); + GCM_gmult(ctx->gcm.Xi.u, ctx->gcm.Htable); mres = 0; } else { ctx->gcm.mres = n; @@ -418,7 +344,7 @@ if ((i = (len & (size_t)-16))) { size_t j = i / 16; - aesni_ctr32_encrypt_blocks(in, out, j, &ctx->aes_ks, ctx->gcm.Yi.c); + AES_ctr32_encrypt_blocks(in, out, j, &ctx->aes_ks, ctx->gcm.Yi.c); ctr += (unsigned int)j; #if BYTE_ORDER == LITTLE_ENDIAN ctx->gcm.Yi.d[3] = bswap32(ctr); @@ -430,12 +356,12 @@ while (j--) { for (i = 0; i < 16; ++i) ctx->gcm.Xi.c[i] ^= out[i]; - gcm_gmult_avx(ctx->gcm.Xi.u, ctx->gcm.Htable); + GCM_gmult(ctx->gcm.Xi.u, ctx->gcm.Htable); out += 16; } } if (len) { - aesni_encrypt(ctx->gcm.Yi.c, ctx->gcm.EKi.c, &ctx->aes_ks); + AES_encrypt(ctx->gcm.Yi.c, ctx->gcm.EKi.c, &ctx->aes_ks); ++ctr; #if BYTE_ORDER == LITTLE_ENDIAN ctx->gcm.Yi.d[3] = bswap32(ctr); @@ -453,7 +379,7 @@ } static int -gcm_encrypt_aesni(struct ossl_gcm_context *ctx, const unsigned char *in, +gcm_encrypt_op(struct ossl_gcm_context *ctx, const unsigned char *in, unsigned char *out, size_t len) { size_t bulk = 0, res; @@ -463,7 +389,7 @@ if ((error = gcm_encrypt(ctx, in, out, res)) != 0) return error; - bulk = aesni_gcm_encrypt(in + res, out + res, len - res, + bulk = AES_gcm_encrypt(in + res, out + res, len - res, &ctx->aes_ks, ctx->gcm.Yi.c, ctx->gcm.Xi.u); ctx->gcm.len.u[1] += bulk; bulk += res; @@ -492,7 +418,7 @@ if (ctx->gcm.ares) { /* First call to encrypt finalizes GHASH(AAD) */ - gcm_gmult_avx(ctx->gcm.Xi.u, ctx->gcm.Htable); + GCM_gmult(ctx->gcm.Xi.u, ctx->gcm.Htable); ctx->gcm.ares = 0; } @@ -506,7 +432,7 @@ for (i = 0; i < len; ++i) { uint8_t c; if (n == 0) { - aesni_encrypt(ctx->gcm.Yi.c, ctx->gcm.EKi.c, + AES_encrypt(ctx->gcm.Yi.c, ctx->gcm.EKi.c, &ctx->aes_ks); ++ctr; #if BYTE_ORDER == LITTLE_ENDIAN @@ -520,7 +446,7 @@ ctx->gcm.Xi.c[n] ^= c; mres = n = (n + 1) % 16; if (n == 0) - gcm_gmult_avx(ctx->gcm.Xi.u, ctx->gcm.Htable); + GCM_gmult(ctx->gcm.Xi.u, ctx->gcm.Htable); } ctx->gcm.mres = mres; @@ -544,7 +470,7 @@ if (ctx->gcm.ares) { /* First call to decrypt finalizes GHASH(AAD) */ - gcm_gmult_avx(ctx->gcm.Xi.u, ctx->gcm.Htable); + GCM_gmult(ctx->gcm.Xi.u, ctx->gcm.Htable); ctx->gcm.ares = 0; } @@ -564,7 +490,7 @@ n = (n + 1) % 16; } if (n == 0) { - gcm_gmult_avx(ctx->gcm.Xi.u, ctx->gcm.Htable); + GCM_gmult(ctx->gcm.Xi.u, ctx->gcm.Htable); mres = 0; } else { ctx->gcm.mres = n; @@ -578,12 +504,12 @@ size_t k; for (k = 0; k < 16; ++k) ctx->gcm.Xi.c[k] ^= in[k]; - gcm_gmult_avx(ctx->gcm.Xi.u, ctx->gcm.Htable); + GCM_gmult(ctx->gcm.Xi.u, ctx->gcm.Htable); in += 16; } j = i / 16; in -= i; - aesni_ctr32_encrypt_blocks(in, out, j, &ctx->aes_ks, ctx->gcm.Yi.c); + AES_ctr32_encrypt_blocks(in, out, j, &ctx->aes_ks, ctx->gcm.Yi.c); ctr += (unsigned int)j; #if BYTE_ORDER == LITTLE_ENDIAN ctx->gcm.Yi.d[3] = bswap32(ctr); @@ -595,7 +521,7 @@ len -= i; } if (len) { - aesni_encrypt(ctx->gcm.Yi.c, ctx->gcm.EKi.c, &ctx->aes_ks); + AES_encrypt(ctx->gcm.Yi.c, ctx->gcm.EKi.c, &ctx->aes_ks); ++ctr; #if BYTE_ORDER == LITTLE_ENDIAN ctx->gcm.Yi.d[3] = bswap32(ctr); @@ -615,7 +541,7 @@ } static int -gcm_decrypt_aesni(struct ossl_gcm_context *ctx, const unsigned char *in, +gcm_decrypt_op(struct ossl_gcm_context *ctx, const unsigned char *in, unsigned char *out, size_t len) { size_t bulk = 0, res; @@ -625,8 +551,8 @@ if ((error = gcm_decrypt(ctx, in, out, res)) != 0) return error; - bulk = aesni_gcm_decrypt(in + res, out + res, len - res, &ctx->aes_ks, - ctx->gcm.Yi.c, ctx->gcm.Xi.u); + bulk = AES_gcm_decrypt(in + res, out + res, len - res, &ctx->aes_ks, + ctx->gcm.Yi.c, ctx->gcm.Xi.u); ctx->gcm.len.u[1] += bulk; bulk += res; @@ -637,14 +563,14 @@ } static int -gcm_finish_aesni(struct ossl_gcm_context *ctx, const unsigned char *tag, +gcm_finish_op(struct ossl_gcm_context *ctx, const unsigned char *tag, size_t len) { uint64_t alen = ctx->gcm.len.u[0] << 3; uint64_t clen = ctx->gcm.len.u[1] << 3; if (ctx->gcm.mres || ctx->gcm.ares) - gcm_gmult_avx(ctx->gcm.Xi.u, ctx->gcm.Htable); + GCM_gmult(ctx->gcm.Xi.u, ctx->gcm.Htable); #if BYTE_ORDER == LITTLE_ENDIAN alen = bswap64(alen); @@ -653,7 +579,7 @@ ctx->gcm.Xi.u[0] ^= alen; ctx->gcm.Xi.u[1] ^= clen; - gcm_gmult_avx(ctx->gcm.Xi.u, ctx->gcm.Htable); + GCM_gmult(ctx->gcm.Xi.u, ctx->gcm.Htable); ctx->gcm.Xi.u[0] ^= ctx->gcm.EK0.u[0]; ctx->gcm.Xi.u[1] ^= ctx->gcm.EK0.u[1]; @@ -663,40 +589,26 @@ return 0; } -static const struct ossl_aes_gcm_ops gcm_ops_aesni = { - .init = gcm_init_aesni, - .setiv = gcm_setiv_aesni, - .aad = gcm_aad_aesni, - .encrypt = gcm_encrypt_aesni, - .decrypt = gcm_decrypt_aesni, - .finish = gcm_finish_aesni, - .tag = gcm_tag, +static const struct ossl_aes_gcm_ops gcm_ops = { + .init = gcm_init_op, + .setiv = gcm_setiv_op, + .aad = gcm_aad_op, + .encrypt = gcm_encrypt_op, + .decrypt = gcm_decrypt_op, + .finish = gcm_finish_op, + .tag = gcm_tag_op, }; -int ossl_aes_gcm_setkey_aesni(const unsigned char *key, int klen, void *_ctx); - -int -ossl_aes_gcm_setkey_aesni(const unsigned char *key, int klen, - void *_ctx) -{ - struct ossl_gcm_context *ctx; - - ctx = _ctx; - ctx->ops = &gcm_ops_aesni; - gcm_init(ctx, key, klen); - return (0); -} - -int ossl_aes_gcm_setkey_avx512(const unsigned char *key, int klen, void *_ctx); +int ossl_aes_gcm_setkey(const unsigned char *key, int klen, void *_ctx); int -ossl_aes_gcm_setkey_avx512(const unsigned char *key, int klen, +ossl_aes_gcm_setkey(const unsigned char *key, int klen, void *_ctx) { struct ossl_gcm_context *ctx; ctx = _ctx; - ctx->ops = &gcm_ops_avx512; + ctx->ops = &gcm_ops; gcm_init(ctx, key, klen); return (0); } diff --git a/sys/crypto/openssl/ossl_ppc.c b/sys/crypto/openssl/ossl_ppc.c --- a/sys/crypto/openssl/ossl_ppc.c +++ b/sys/crypto/openssl/ossl_ppc.c @@ -38,9 +38,12 @@ ossl_cipher_setkey_t aes_p8_set_encrypt_key; ossl_cipher_setkey_t aes_p8_set_decrypt_key; + ossl_cipher_setkey_t vpaes_set_encrypt_key; ossl_cipher_setkey_t vpaes_set_decrypt_key; +ossl_cipher_setkey_t ossl_aes_gcm_setkey; + void ossl_cpuid(struct ossl_softc *sc) { @@ -75,7 +78,11 @@ ossl_cipher_aes_cbc.set_encrypt_key = aes_p8_set_encrypt_key; ossl_cipher_aes_cbc.set_decrypt_key = aes_p8_set_decrypt_key; sc->has_aes = true; - } else if (OPENSSL_ppccap_P & PPC_ALTIVEC) { + + ossl_cipher_aes_gcm.set_encrypt_key = ossl_aes_gcm_setkey; + ossl_cipher_aes_gcm.set_decrypt_key = ossl_aes_gcm_setkey; + sc->has_aes_gcm = true; + } else if (OPENSSL_ppccap_P & PPC_ALTIVEC) { ossl_cipher_aes_cbc.set_encrypt_key = vpaes_set_encrypt_key; ossl_cipher_aes_cbc.set_decrypt_key = vpaes_set_decrypt_key; sc->has_aes = true; diff --git a/sys/crypto/openssl/ossl_x86.c b/sys/crypto/openssl/ossl_x86.c --- a/sys/crypto/openssl/ossl_x86.c +++ b/sys/crypto/openssl/ossl_x86.c @@ -56,7 +56,7 @@ #ifdef __amd64__ int ossl_vaes_vpclmulqdq_capable(void); -ossl_cipher_setkey_t ossl_aes_gcm_setkey_aesni; +ossl_cipher_setkey_t ossl_aes_gcm_setkey; ossl_cipher_setkey_t ossl_aes_gcm_setkey_avx512; #endif @@ -141,8 +141,8 @@ } else if ((cpu_feature2 & (CPUID2_AVX | CPUID2_PCLMULQDQ | CPUID2_MOVBE)) == (CPUID2_AVX | CPUID2_PCLMULQDQ | CPUID2_MOVBE)) { - ossl_cipher_aes_gcm.set_encrypt_key = ossl_aes_gcm_setkey_aesni; - ossl_cipher_aes_gcm.set_decrypt_key = ossl_aes_gcm_setkey_aesni; + ossl_cipher_aes_gcm.set_encrypt_key = ossl_aes_gcm_setkey; + ossl_cipher_aes_gcm.set_decrypt_key = ossl_aes_gcm_setkey; sc->has_aes_gcm = true; } else { sc->has_aes_gcm = false; diff --git a/sys/crypto/openssl/powerpc64/aes-gcm-ppc.S b/sys/crypto/openssl/powerpc64/aes-gcm-ppc.S new file mode 100644 --- /dev/null +++ b/sys/crypto/openssl/powerpc64/aes-gcm-ppc.S @@ -0,0 +1,1338 @@ +.machine "any" +.text + + + + + +.macro .Loop_aes_middle4x + xxlor 19+32, 1, 1 + xxlor 20+32, 2, 2 + xxlor 21+32, 3, 3 + xxlor 22+32, 4, 4 + + .long 0x11EF9D08 + .long 0x12109D08 + .long 0x12319D08 + .long 0x12529D08 + + .long 0x11EFA508 + .long 0x1210A508 + .long 0x1231A508 + .long 0x1252A508 + + .long 0x11EFAD08 + .long 0x1210AD08 + .long 0x1231AD08 + .long 0x1252AD08 + + .long 0x11EFB508 + .long 0x1210B508 + .long 0x1231B508 + .long 0x1252B508 + + xxlor 19+32, 5, 5 + xxlor 20+32, 6, 6 + xxlor 21+32, 7, 7 + xxlor 22+32, 8, 8 + + .long 0x11EF9D08 + .long 0x12109D08 + .long 0x12319D08 + .long 0x12529D08 + + .long 0x11EFA508 + .long 0x1210A508 + .long 0x1231A508 + .long 0x1252A508 + + .long 0x11EFAD08 + .long 0x1210AD08 + .long 0x1231AD08 + .long 0x1252AD08 + + .long 0x11EFB508 + .long 0x1210B508 + .long 0x1231B508 + .long 0x1252B508 + + xxlor 23+32, 9, 9 + .long 0x11EFBD08 + .long 0x1210BD08 + .long 0x1231BD08 + .long 0x1252BD08 +.endm + + + + + +.macro .Loop_aes_middle8x + xxlor 23+32, 1, 1 + xxlor 24+32, 2, 2 + xxlor 25+32, 3, 3 + xxlor 26+32, 4, 4 + + .long 0x11EFBD08 + .long 0x1210BD08 + .long 0x1231BD08 + .long 0x1252BD08 + .long 0x1273BD08 + .long 0x1294BD08 + .long 0x12B5BD08 + .long 0x12D6BD08 + + .long 0x11EFC508 + .long 0x1210C508 + .long 0x1231C508 + .long 0x1252C508 + .long 0x1273C508 + .long 0x1294C508 + .long 0x12B5C508 + .long 0x12D6C508 + + .long 0x11EFCD08 + .long 0x1210CD08 + .long 0x1231CD08 + .long 0x1252CD08 + .long 0x1273CD08 + .long 0x1294CD08 + .long 0x12B5CD08 + .long 0x12D6CD08 + + .long 0x11EFD508 + .long 0x1210D508 + .long 0x1231D508 + .long 0x1252D508 + .long 0x1273D508 + .long 0x1294D508 + .long 0x12B5D508 + .long 0x12D6D508 + + xxlor 23+32, 5, 5 + xxlor 24+32, 6, 6 + xxlor 25+32, 7, 7 + xxlor 26+32, 8, 8 + + .long 0x11EFBD08 + .long 0x1210BD08 + .long 0x1231BD08 + .long 0x1252BD08 + .long 0x1273BD08 + .long 0x1294BD08 + .long 0x12B5BD08 + .long 0x12D6BD08 + + .long 0x11EFC508 + .long 0x1210C508 + .long 0x1231C508 + .long 0x1252C508 + .long 0x1273C508 + .long 0x1294C508 + .long 0x12B5C508 + .long 0x12D6C508 + + .long 0x11EFCD08 + .long 0x1210CD08 + .long 0x1231CD08 + .long 0x1252CD08 + .long 0x1273CD08 + .long 0x1294CD08 + .long 0x12B5CD08 + .long 0x12D6CD08 + + .long 0x11EFD508 + .long 0x1210D508 + .long 0x1231D508 + .long 0x1252D508 + .long 0x1273D508 + .long 0x1294D508 + .long 0x12B5D508 + .long 0x12D6D508 + + xxlor 23+32, 9, 9 + .long 0x11EFBD08 + .long 0x1210BD08 + .long 0x1231BD08 + .long 0x1252BD08 + .long 0x1273BD08 + .long 0x1294BD08 + .long 0x12B5BD08 + .long 0x12D6BD08 +.endm + + + + +ppc_aes_gcm_ghash: + vxor 15, 15, 0 + + xxlxor 29, 29, 29 + + .long 0x12EC7CC8 + .long 0x130984C8 + .long 0x13268CC8 + .long 0x134394C8 + + vxor 23, 23, 24 + vxor 23, 23, 25 + vxor 23, 23, 26 + + .long 0x130D7CC8 + .long 0x132A84C8 + .long 0x13478CC8 + .long 0x136494C8 + + vxor 24, 24, 25 + vxor 24, 24, 26 + vxor 24, 24, 27 + + + .long 0x139714C8 + + xxlor 29+32, 29, 29 + vsldoi 26, 24, 29, 8 + vsldoi 29, 29, 24, 8 + vxor 23, 23, 26 + + vsldoi 23, 23, 23, 8 + vxor 23, 23, 28 + + .long 0x130E7CC8 + .long 0x132B84C8 + .long 0x13488CC8 + .long 0x136594C8 + + vxor 24, 24, 25 + vxor 24, 24, 26 + vxor 24, 24, 27 + + vxor 24, 24, 29 + + + vsldoi 27, 23, 23, 8 + .long 0x12F714C8 + vxor 27, 27, 24 + vxor 23, 23, 27 + + xxlor 32, 23+32, 23+32 + + blr + + + + + +.macro ppc_aes_gcm_ghash2_4x + + vxor 15, 15, 0 + + xxlxor 29, 29, 29 + + .long 0x12EC7CC8 + .long 0x130984C8 + .long 0x13268CC8 + .long 0x134394C8 + + vxor 23, 23, 24 + vxor 23, 23, 25 + vxor 23, 23, 26 + + .long 0x130D7CC8 + .long 0x132A84C8 + .long 0x13478CC8 + .long 0x136494C8 + + vxor 24, 24, 25 + vxor 24, 24, 26 + + + .long 0x139714C8 + + xxlor 29+32, 29, 29 + + vxor 24, 24, 27 + vsldoi 26, 24, 29, 8 + vsldoi 29, 29, 24, 8 + vxor 23, 23, 26 + + vsldoi 23, 23, 23, 8 + vxor 23, 23, 28 + + .long 0x130E7CC8 + .long 0x132B84C8 + .long 0x13488CC8 + .long 0x136594C8 + + vxor 24, 24, 25 + vxor 24, 24, 26 + vxor 24, 24, 27 + + vxor 24, 24, 29 + + + vsldoi 27, 23, 23, 8 + .long 0x12F714C8 + vxor 27, 27, 24 + vxor 27, 23, 27 + + + .long 0x1309A4C8 + .long 0x1326ACC8 + .long 0x1343B4C8 + vxor 19, 19, 27 + .long 0x12EC9CC8 + + vxor 23, 23, 24 + vxor 23, 23, 25 + vxor 23, 23, 26 + + .long 0x130D9CC8 + .long 0x132AA4C8 + .long 0x1347ACC8 + .long 0x1364B4C8 + + vxor 24, 24, 25 + vxor 24, 24, 26 + + + .long 0x139714C8 + + xxlor 29+32, 29, 29 + + vxor 24, 24, 27 + vsldoi 26, 24, 29, 8 + vsldoi 29, 29, 24, 8 + vxor 23, 23, 26 + + vsldoi 23, 23, 23, 8 + vxor 23, 23, 28 + + .long 0x130E9CC8 + .long 0x132BA4C8 + .long 0x1348ACC8 + .long 0x1365B4C8 + + vxor 24, 24, 25 + vxor 24, 24, 26 + vxor 24, 24, 27 + + vxor 24, 24, 29 + + + vsldoi 27, 23, 23, 8 + .long 0x12F714C8 + vxor 27, 27, 24 + vxor 23, 23, 27 + + xxlor 32, 23+32, 23+32 + +.endm + + + + +.macro ppc_update_hash_1x + vxor 28, 28, 0 + + vxor 19, 19, 19 + + .long 0x12C3E4C8 + .long 0x12E4E4C8 + .long 0x1305E4C8 + + .long 0x137614C8 + + vsldoi 25, 23, 19, 8 + vsldoi 26, 19, 23, 8 + vxor 22, 22, 25 + vxor 24, 24, 26 + + vsldoi 22, 22, 22, 8 + vxor 22, 22, 27 + + vsldoi 20, 22, 22, 8 + .long 0x12D614C8 + vxor 20, 20, 24 + vxor 22, 22, 20 + + vor 0,22,22 + +.endm + + + + + + + + + + + + + +.global ppc_aes_gcm_encrypt +.align 5 +ppc_aes_gcm_encrypt: +_ppc_aes_gcm_encrypt: + + stdu 1,-512(1) + mflr 0 + + std 14,112(1) + std 15,120(1) + std 16,128(1) + std 17,136(1) + std 18,144(1) + std 19,152(1) + std 20,160(1) + std 21,168(1) + li 9, 256 + stvx 20, 9, 1 + addi 9, 9, 16 + stvx 21, 9, 1 + addi 9, 9, 16 + stvx 22, 9, 1 + addi 9, 9, 16 + stvx 23, 9, 1 + addi 9, 9, 16 + stvx 24, 9, 1 + addi 9, 9, 16 + stvx 25, 9, 1 + addi 9, 9, 16 + stvx 26, 9, 1 + addi 9, 9, 16 + stvx 27, 9, 1 + addi 9, 9, 16 + stvx 28, 9, 1 + addi 9, 9, 16 + stvx 29, 9, 1 + addi 9, 9, 16 + stvx 30, 9, 1 + addi 9, 9, 16 + stvx 31, 9, 1 + std 0, 528(1) + + + lxvb16x 32, 0, 8 + + + li 10, 32 + lxvd2x 2+32, 10, 8 + li 10, 48 + lxvd2x 3+32, 10, 8 + li 10, 64 + lxvd2x 4+32, 10, 8 + li 10, 80 + lxvd2x 5+32, 10, 8 + + li 10, 96 + lxvd2x 6+32, 10, 8 + li 10, 112 + lxvd2x 7+32, 10, 8 + li 10, 128 + lxvd2x 8+32, 10, 8 + + li 10, 144 + lxvd2x 9+32, 10, 8 + li 10, 160 + lxvd2x 10+32, 10, 8 + li 10, 176 + lxvd2x 11+32, 10, 8 + + li 10, 192 + lxvd2x 12+32, 10, 8 + li 10, 208 + lxvd2x 13+32, 10, 8 + li 10, 224 + lxvd2x 14+32, 10, 8 + + + lxvb16x 30+32, 0, 7 + + mr 12, 5 + li 11, 0 + + + vxor 31, 31, 31 + vspltisb 22,1 + vsldoi 31, 31, 22,1 + + + lxv 0, 0(6) + lxv 1, 0x10(6) + lxv 2, 0x20(6) + lxv 3, 0x30(6) + lxv 4, 0x40(6) + lxv 5, 0x50(6) + lxv 6, 0x60(6) + lxv 7, 0x70(6) + lxv 8, 0x80(6) + lxv 9, 0x90(6) + lxv 10, 0xa0(6) + + + lwz 9,240(6) + + + + xxlor 32+29, 0, 0 + vxor 15, 30, 29 + + cmpdi 9, 10 + beq .Loop_aes_gcm_8x + + + lxv 11, 0xb0(6) + lxv 12, 0xc0(6) + + cmpdi 9, 12 + beq .Loop_aes_gcm_8x + + + lxv 13, 0xd0(6) + lxv 14, 0xe0(6) + cmpdi 9, 14 + beq .Loop_aes_gcm_8x + + b aes_gcm_out + +.align 5 +.Loop_aes_gcm_8x: + mr 14, 3 + mr 9, 4 + + + li 10, 128 + divdu 10, 5, 10 + cmpdi 10, 0 + beq .Loop_last_block + + .long 0x13DEF8C0 + vxor 16, 30, 29 + .long 0x13DEF8C0 + vxor 17, 30, 29 + .long 0x13DEF8C0 + vxor 18, 30, 29 + .long 0x13DEF8C0 + vxor 19, 30, 29 + .long 0x13DEF8C0 + vxor 20, 30, 29 + .long 0x13DEF8C0 + vxor 21, 30, 29 + .long 0x13DEF8C0 + vxor 22, 30, 29 + + mtctr 10 + + li 15, 16 + li 16, 32 + li 17, 48 + li 18, 64 + li 19, 80 + li 20, 96 + li 21, 112 + + lwz 10, 240(6) + +.Loop_8x_block: + + lxvb16x 15, 0, 14 + lxvb16x 16, 15, 14 + lxvb16x 17, 16, 14 + lxvb16x 18, 17, 14 + lxvb16x 19, 18, 14 + lxvb16x 20, 19, 14 + lxvb16x 21, 20, 14 + lxvb16x 22, 21, 14 + addi 14, 14, 128 + +.Loop_aes_middle8x + + xxlor 23+32, 10, 10 + + cmpdi 10, 10 + beq Do_next_ghash + + + xxlor 24+32, 11, 11 + + .long 0x11EFBD08 + .long 0x1210BD08 + .long 0x1231BD08 + .long 0x1252BD08 + .long 0x1273BD08 + .long 0x1294BD08 + .long 0x12B5BD08 + .long 0x12D6BD08 + + .long 0x11EFC508 + .long 0x1210C508 + .long 0x1231C508 + .long 0x1252C508 + .long 0x1273C508 + .long 0x1294C508 + .long 0x12B5C508 + .long 0x12D6C508 + + xxlor 23+32, 12, 12 + + cmpdi 10, 12 + beq Do_next_ghash + + + xxlor 24+32, 13, 13 + + .long 0x11EFBD08 + .long 0x1210BD08 + .long 0x1231BD08 + .long 0x1252BD08 + .long 0x1273BD08 + .long 0x1294BD08 + .long 0x12B5BD08 + .long 0x12D6BD08 + + .long 0x11EFC508 + .long 0x1210C508 + .long 0x1231C508 + .long 0x1252C508 + .long 0x1273C508 + .long 0x1294C508 + .long 0x12B5C508 + .long 0x12D6C508 + + xxlor 23+32, 14, 14 + + cmpdi 10, 14 + beq Do_next_ghash + b aes_gcm_out + +Do_next_ghash: + + + + .long 0x11EFBD09 + .long 0x1210BD09 + + xxlxor 47, 47, 15 + stxvb16x 47, 0, 9 + xxlxor 48, 48, 16 + stxvb16x 48, 15, 9 + + .long 0x1231BD09 + .long 0x1252BD09 + + xxlxor 49, 49, 17 + stxvb16x 49, 16, 9 + xxlxor 50, 50, 18 + stxvb16x 50, 17, 9 + + .long 0x1273BD09 + .long 0x1294BD09 + + xxlxor 51, 51, 19 + stxvb16x 51, 18, 9 + xxlxor 52, 52, 20 + stxvb16x 52, 19, 9 + + .long 0x12B5BD09 + .long 0x12D6BD09 + + xxlxor 53, 53, 21 + stxvb16x 53, 20, 9 + xxlxor 54, 54, 22 + stxvb16x 54, 21, 9 + + addi 9, 9, 128 + + + ppc_aes_gcm_ghash2_4x + + xxlor 27+32, 0, 0 + .long 0x13DEF8C0 + vor 29,30,30 + vxor 15, 30, 27 + .long 0x13DEF8C0 + vxor 16, 30, 27 + .long 0x13DEF8C0 + vxor 17, 30, 27 + .long 0x13DEF8C0 + vxor 18, 30, 27 + .long 0x13DEF8C0 + vxor 19, 30, 27 + .long 0x13DEF8C0 + vxor 20, 30, 27 + .long 0x13DEF8C0 + vxor 21, 30, 27 + .long 0x13DEF8C0 + vxor 22, 30, 27 + + addi 12, 12, -128 + addi 11, 11, 128 + + bdnz .Loop_8x_block + + vor 30,29,29 + +.Loop_last_block: + cmpdi 12, 0 + beq aes_gcm_out + + + li 10, 16 + divdu 10, 12, 10 + + mtctr 10 + + lwz 10, 240(6) + + cmpdi 12, 16 + blt Final_block + +.macro .Loop_aes_middle_1x + xxlor 19+32, 1, 1 + xxlor 20+32, 2, 2 + xxlor 21+32, 3, 3 + xxlor 22+32, 4, 4 + + .long 0x11EF9D08 + .long 0x11EFA508 + .long 0x11EFAD08 + .long 0x11EFB508 + + xxlor 19+32, 5, 5 + xxlor 20+32, 6, 6 + xxlor 21+32, 7, 7 + xxlor 22+32, 8, 8 + + .long 0x11EF9D08 + .long 0x11EFA508 + .long 0x11EFAD08 + .long 0x11EFB508 + + xxlor 19+32, 9, 9 + .long 0x11EF9D08 +.endm + +Next_rem_block: + lxvb16x 15, 0, 14 + +.Loop_aes_middle_1x + + xxlor 23+32, 10, 10 + + cmpdi 10, 10 + beq Do_next_1x + + + xxlor 24+32, 11, 11 + + .long 0x11EFBD08 + .long 0x11EFC508 + + xxlor 23+32, 12, 12 + + cmpdi 10, 12 + beq Do_next_1x + + + xxlor 24+32, 13, 13 + + .long 0x11EFBD08 + .long 0x11EFC508 + + xxlor 23+32, 14, 14 + + cmpdi 10, 14 + beq Do_next_1x + +Do_next_1x: + .long 0x11EFBD09 + + xxlxor 47, 47, 15 + stxvb16x 47, 0, 9 + addi 14, 14, 16 + addi 9, 9, 16 + + vor 28,15,15 + ppc_update_hash_1x + + addi 12, 12, -16 + addi 11, 11, 16 + xxlor 19+32, 0, 0 + .long 0x13DEF8C0 + vxor 15, 30, 19 + + bdnz Next_rem_block + + cmpdi 12, 0 + beq aes_gcm_out + +Final_block: +.Loop_aes_middle_1x + + xxlor 23+32, 10, 10 + + cmpdi 10, 10 + beq Do_final_1x + + + xxlor 24+32, 11, 11 + + .long 0x11EFBD08 + .long 0x11EFC508 + + xxlor 23+32, 12, 12 + + cmpdi 10, 12 + beq Do_final_1x + + + xxlor 24+32, 13, 13 + + .long 0x11EFBD08 + .long 0x11EFC508 + + xxlor 23+32, 14, 14 + + cmpdi 10, 14 + beq Do_final_1x + +Do_final_1x: + .long 0x11EFBD09 + + lxvb16x 15, 0, 14 + xxlxor 47, 47, 15 + + + li 15, 16 + sub 15, 15, 12 + + vspltisb 16,-1 + vspltisb 17,0 + li 10, 192 + stvx 16, 10, 1 + addi 10, 10, 16 + stvx 17, 10, 1 + + addi 10, 1, 192 + lxvb16x 16, 15, 10 + xxland 47, 47, 16 + + vor 28,15,15 + ppc_update_hash_1x + + + bl Write_partial_block + + b aes_gcm_out + + + + + + + +Write_partial_block: + li 10, 192 + stxvb16x 15+32, 10, 1 + + + addi 10, 9, -1 + addi 16, 1, 191 + + mtctr 12 + li 15, 0 + +Write_last_byte: + lbzu 14, 1(16) + stbu 14, 1(10) + bdnz Write_last_byte + blr + +aes_gcm_out: + + stxvb16x 32, 0, 8 + add 3, 11, 12 + + li 9, 256 + lvx 20, 9, 1 + addi 9, 9, 16 + lvx 21, 9, 1 + addi 9, 9, 16 + lvx 22, 9, 1 + addi 9, 9, 16 + lvx 23, 9, 1 + addi 9, 9, 16 + lvx 24, 9, 1 + addi 9, 9, 16 + lvx 25, 9, 1 + addi 9, 9, 16 + lvx 26, 9, 1 + addi 9, 9, 16 + lvx 27, 9, 1 + addi 9, 9, 16 + lvx 28, 9, 1 + addi 9, 9, 16 + lvx 29, 9, 1 + addi 9, 9, 16 + lvx 30, 9, 1 + addi 9, 9, 16 + lvx 31, 9, 1 + + ld 0, 528(1) + ld 14,112(1) + ld 15,120(1) + ld 16,128(1) + ld 17,136(1) + ld 18,144(1) + ld 19,152(1) + ld 20,160(1) + ld 21,168(1) + + mtlr 0 + addi 1, 1, 512 + blr + + + + +.global ppc_aes_gcm_decrypt +.align 5 +ppc_aes_gcm_decrypt: +_ppc_aes_gcm_decrypt: + + stdu 1,-512(1) + mflr 0 + + std 14,112(1) + std 15,120(1) + std 16,128(1) + std 17,136(1) + std 18,144(1) + std 19,152(1) + std 20,160(1) + std 21,168(1) + li 9, 256 + stvx 20, 9, 1 + addi 9, 9, 16 + stvx 21, 9, 1 + addi 9, 9, 16 + stvx 22, 9, 1 + addi 9, 9, 16 + stvx 23, 9, 1 + addi 9, 9, 16 + stvx 24, 9, 1 + addi 9, 9, 16 + stvx 25, 9, 1 + addi 9, 9, 16 + stvx 26, 9, 1 + addi 9, 9, 16 + stvx 27, 9, 1 + addi 9, 9, 16 + stvx 28, 9, 1 + addi 9, 9, 16 + stvx 29, 9, 1 + addi 9, 9, 16 + stvx 30, 9, 1 + addi 9, 9, 16 + stvx 31, 9, 1 + std 0, 528(1) + + + lxvb16x 32, 0, 8 + + + li 10, 32 + lxvd2x 2+32, 10, 8 + li 10, 48 + lxvd2x 3+32, 10, 8 + li 10, 64 + lxvd2x 4+32, 10, 8 + li 10, 80 + lxvd2x 5+32, 10, 8 + + li 10, 96 + lxvd2x 6+32, 10, 8 + li 10, 112 + lxvd2x 7+32, 10, 8 + li 10, 128 + lxvd2x 8+32, 10, 8 + + li 10, 144 + lxvd2x 9+32, 10, 8 + li 10, 160 + lxvd2x 10+32, 10, 8 + li 10, 176 + lxvd2x 11+32, 10, 8 + + li 10, 192 + lxvd2x 12+32, 10, 8 + li 10, 208 + lxvd2x 13+32, 10, 8 + li 10, 224 + lxvd2x 14+32, 10, 8 + + + lxvb16x 30+32, 0, 7 + + mr 12, 5 + li 11, 0 + + + vxor 31, 31, 31 + vspltisb 22,1 + vsldoi 31, 31, 22,1 + + + lxv 0, 0(6) + lxv 1, 0x10(6) + lxv 2, 0x20(6) + lxv 3, 0x30(6) + lxv 4, 0x40(6) + lxv 5, 0x50(6) + lxv 6, 0x60(6) + lxv 7, 0x70(6) + lxv 8, 0x80(6) + lxv 9, 0x90(6) + lxv 10, 0xa0(6) + + + lwz 9,240(6) + + + + xxlor 32+29, 0, 0 + vxor 15, 30, 29 + + cmpdi 9, 10 + beq .Loop_aes_gcm_8x_dec + + + lxv 11, 0xb0(6) + lxv 12, 0xc0(6) + + cmpdi 9, 12 + beq .Loop_aes_gcm_8x_dec + + + lxv 13, 0xd0(6) + lxv 14, 0xe0(6) + cmpdi 9, 14 + beq .Loop_aes_gcm_8x_dec + + b aes_gcm_out + +.align 5 +.Loop_aes_gcm_8x_dec: + mr 14, 3 + mr 9, 4 + + + li 10, 128 + divdu 10, 5, 10 + cmpdi 10, 0 + beq .Loop_last_block_dec + + .long 0x13DEF8C0 + vxor 16, 30, 29 + .long 0x13DEF8C0 + vxor 17, 30, 29 + .long 0x13DEF8C0 + vxor 18, 30, 29 + .long 0x13DEF8C0 + vxor 19, 30, 29 + .long 0x13DEF8C0 + vxor 20, 30, 29 + .long 0x13DEF8C0 + vxor 21, 30, 29 + .long 0x13DEF8C0 + vxor 22, 30, 29 + + mtctr 10 + + li 15, 16 + li 16, 32 + li 17, 48 + li 18, 64 + li 19, 80 + li 20, 96 + li 21, 112 + + lwz 10, 240(6) + +.Loop_8x_block_dec: + + lxvb16x 15, 0, 14 + lxvb16x 16, 15, 14 + lxvb16x 17, 16, 14 + lxvb16x 18, 17, 14 + lxvb16x 19, 18, 14 + lxvb16x 20, 19, 14 + lxvb16x 21, 20, 14 + lxvb16x 22, 21, 14 + addi 14, 14, 128 + +.Loop_aes_middle8x + + xxlor 23+32, 10, 10 + + cmpdi 10, 10 + beq Do_last_aes_dec + + + xxlor 24+32, 11, 11 + + .long 0x11EFBD08 + .long 0x1210BD08 + .long 0x1231BD08 + .long 0x1252BD08 + .long 0x1273BD08 + .long 0x1294BD08 + .long 0x12B5BD08 + .long 0x12D6BD08 + + .long 0x11EFC508 + .long 0x1210C508 + .long 0x1231C508 + .long 0x1252C508 + .long 0x1273C508 + .long 0x1294C508 + .long 0x12B5C508 + .long 0x12D6C508 + + xxlor 23+32, 12, 12 + + cmpdi 10, 12 + beq Do_last_aes_dec + + + xxlor 24+32, 13, 13 + + .long 0x11EFBD08 + .long 0x1210BD08 + .long 0x1231BD08 + .long 0x1252BD08 + .long 0x1273BD08 + .long 0x1294BD08 + .long 0x12B5BD08 + .long 0x12D6BD08 + + .long 0x11EFC508 + .long 0x1210C508 + .long 0x1231C508 + .long 0x1252C508 + .long 0x1273C508 + .long 0x1294C508 + .long 0x12B5C508 + .long 0x12D6C508 + + xxlor 23+32, 14, 14 + + cmpdi 10, 14 + beq Do_last_aes_dec + b aes_gcm_out + +Do_last_aes_dec: + + + + .long 0x11EFBD09 + .long 0x1210BD09 + + xxlxor 47, 47, 15 + stxvb16x 47, 0, 9 + xxlxor 48, 48, 16 + stxvb16x 48, 15, 9 + + .long 0x1231BD09 + .long 0x1252BD09 + + xxlxor 49, 49, 17 + stxvb16x 49, 16, 9 + xxlxor 50, 50, 18 + stxvb16x 50, 17, 9 + + .long 0x1273BD09 + .long 0x1294BD09 + + xxlxor 51, 51, 19 + stxvb16x 51, 18, 9 + xxlxor 52, 52, 20 + stxvb16x 52, 19, 9 + + .long 0x12B5BD09 + .long 0x12D6BD09 + + xxlxor 53, 53, 21 + stxvb16x 53, 20, 9 + xxlxor 54, 54, 22 + stxvb16x 54, 21, 9 + + addi 9, 9, 128 + + xxlor 15+32, 15, 15 + xxlor 16+32, 16, 16 + xxlor 17+32, 17, 17 + xxlor 18+32, 18, 18 + xxlor 19+32, 19, 19 + xxlor 20+32, 20, 20 + xxlor 21+32, 21, 21 + xxlor 22+32, 22, 22 + + + ppc_aes_gcm_ghash2_4x + + xxlor 27+32, 0, 0 + .long 0x13DEF8C0 + vor 29,30,30 + vxor 15, 30, 27 + .long 0x13DEF8C0 + vxor 16, 30, 27 + .long 0x13DEF8C0 + vxor 17, 30, 27 + .long 0x13DEF8C0 + vxor 18, 30, 27 + .long 0x13DEF8C0 + vxor 19, 30, 27 + .long 0x13DEF8C0 + vxor 20, 30, 27 + .long 0x13DEF8C0 + vxor 21, 30, 27 + .long 0x13DEF8C0 + vxor 22, 30, 27 + addi 12, 12, -128 + addi 11, 11, 128 + + bdnz .Loop_8x_block_dec + + vor 30,29,29 + +.Loop_last_block_dec: + cmpdi 12, 0 + beq aes_gcm_out + + + li 10, 16 + divdu 10, 12, 10 + + mtctr 10 + + lwz 10,240(6) + + cmpdi 12, 16 + blt Final_block_dec + +Next_rem_block_dec: + lxvb16x 15, 0, 14 + +.Loop_aes_middle_1x + + xxlor 23+32, 10, 10 + + cmpdi 10, 10 + beq Do_next_1x_dec + + + xxlor 24+32, 11, 11 + + .long 0x11EFBD08 + .long 0x11EFC508 + + xxlor 23+32, 12, 12 + + cmpdi 10, 12 + beq Do_next_1x_dec + + + xxlor 24+32, 13, 13 + + .long 0x11EFBD08 + .long 0x11EFC508 + + xxlor 23+32, 14, 14 + + cmpdi 10, 14 + beq Do_next_1x_dec + +Do_next_1x_dec: + .long 0x11EFBD09 + + xxlxor 47, 47, 15 + stxvb16x 47, 0, 9 + addi 14, 14, 16 + addi 9, 9, 16 + + xxlor 28+32, 15, 15 + ppc_update_hash_1x + + addi 12, 12, -16 + addi 11, 11, 16 + xxlor 19+32, 0, 0 + .long 0x13DEF8C0 + vxor 15, 30, 19 + + bdnz Next_rem_block_dec + + cmpdi 12, 0 + beq aes_gcm_out + +Final_block_dec: +.Loop_aes_middle_1x + + xxlor 23+32, 10, 10 + + cmpdi 10, 10 + beq Do_final_1x_dec + + + xxlor 24+32, 11, 11 + + .long 0x11EFBD08 + .long 0x11EFC508 + + xxlor 23+32, 12, 12 + + cmpdi 10, 12 + beq Do_final_1x_dec + + + xxlor 24+32, 13, 13 + + .long 0x11EFBD08 + .long 0x11EFC508 + + xxlor 23+32, 14, 14 + + cmpdi 10, 14 + beq Do_final_1x_dec + +Do_final_1x_dec: + .long 0x11EFBD09 + + lxvb16x 15, 0, 14 + xxlxor 47, 47, 15 + + + li 15, 16 + sub 15, 15, 12 + + vspltisb 16,-1 + vspltisb 17,0 + li 10, 192 + stvx 16, 10, 1 + addi 10, 10, 16 + stvx 17, 10, 1 + + addi 10, 1, 192 + lxvb16x 16, 15, 10 + xxland 47, 47, 16 + + xxlor 28+32, 15, 15 + ppc_update_hash_1x + + + bl Write_partial_block + + b aes_gcm_out diff --git a/sys/crypto/openssl/powerpc64le/aes-gcm-ppc.S b/sys/crypto/openssl/powerpc64le/aes-gcm-ppc.S new file mode 100644 --- /dev/null +++ b/sys/crypto/openssl/powerpc64le/aes-gcm-ppc.S @@ -0,0 +1,1340 @@ +/* Do not modify. This file is auto-generated from aes-ppc.pl. */ +.machine "any" +.abiversion 2 +.text + + + + + +.macro .Loop_aes_middle4x + xxlor 19+32, 1, 1 + xxlor 20+32, 2, 2 + xxlor 21+32, 3, 3 + xxlor 22+32, 4, 4 + + .long 0x11EF9D08 + .long 0x12109D08 + .long 0x12319D08 + .long 0x12529D08 + + .long 0x11EFA508 + .long 0x1210A508 + .long 0x1231A508 + .long 0x1252A508 + + .long 0x11EFAD08 + .long 0x1210AD08 + .long 0x1231AD08 + .long 0x1252AD08 + + .long 0x11EFB508 + .long 0x1210B508 + .long 0x1231B508 + .long 0x1252B508 + + xxlor 19+32, 5, 5 + xxlor 20+32, 6, 6 + xxlor 21+32, 7, 7 + xxlor 22+32, 8, 8 + + .long 0x11EF9D08 + .long 0x12109D08 + .long 0x12319D08 + .long 0x12529D08 + + .long 0x11EFA508 + .long 0x1210A508 + .long 0x1231A508 + .long 0x1252A508 + + .long 0x11EFAD08 + .long 0x1210AD08 + .long 0x1231AD08 + .long 0x1252AD08 + + .long 0x11EFB508 + .long 0x1210B508 + .long 0x1231B508 + .long 0x1252B508 + + xxlor 23+32, 9, 9 + .long 0x11EFBD08 + .long 0x1210BD08 + .long 0x1231BD08 + .long 0x1252BD08 +.endm + + + + + +.macro .Loop_aes_middle8x + xxlor 23+32, 1, 1 + xxlor 24+32, 2, 2 + xxlor 25+32, 3, 3 + xxlor 26+32, 4, 4 + + .long 0x11EFBD08 + .long 0x1210BD08 + .long 0x1231BD08 + .long 0x1252BD08 + .long 0x1273BD08 + .long 0x1294BD08 + .long 0x12B5BD08 + .long 0x12D6BD08 + + .long 0x11EFC508 + .long 0x1210C508 + .long 0x1231C508 + .long 0x1252C508 + .long 0x1273C508 + .long 0x1294C508 + .long 0x12B5C508 + .long 0x12D6C508 + + .long 0x11EFCD08 + .long 0x1210CD08 + .long 0x1231CD08 + .long 0x1252CD08 + .long 0x1273CD08 + .long 0x1294CD08 + .long 0x12B5CD08 + .long 0x12D6CD08 + + .long 0x11EFD508 + .long 0x1210D508 + .long 0x1231D508 + .long 0x1252D508 + .long 0x1273D508 + .long 0x1294D508 + .long 0x12B5D508 + .long 0x12D6D508 + + xxlor 23+32, 5, 5 + xxlor 24+32, 6, 6 + xxlor 25+32, 7, 7 + xxlor 26+32, 8, 8 + + .long 0x11EFBD08 + .long 0x1210BD08 + .long 0x1231BD08 + .long 0x1252BD08 + .long 0x1273BD08 + .long 0x1294BD08 + .long 0x12B5BD08 + .long 0x12D6BD08 + + .long 0x11EFC508 + .long 0x1210C508 + .long 0x1231C508 + .long 0x1252C508 + .long 0x1273C508 + .long 0x1294C508 + .long 0x12B5C508 + .long 0x12D6C508 + + .long 0x11EFCD08 + .long 0x1210CD08 + .long 0x1231CD08 + .long 0x1252CD08 + .long 0x1273CD08 + .long 0x1294CD08 + .long 0x12B5CD08 + .long 0x12D6CD08 + + .long 0x11EFD508 + .long 0x1210D508 + .long 0x1231D508 + .long 0x1252D508 + .long 0x1273D508 + .long 0x1294D508 + .long 0x12B5D508 + .long 0x12D6D508 + + xxlor 23+32, 9, 9 + .long 0x11EFBD08 + .long 0x1210BD08 + .long 0x1231BD08 + .long 0x1252BD08 + .long 0x1273BD08 + .long 0x1294BD08 + .long 0x12B5BD08 + .long 0x12D6BD08 +.endm + + + + +ppc_aes_gcm_ghash: + vxor 15, 15, 0 + + xxlxor 29, 29, 29 + + .long 0x12EC7CC8 + .long 0x130984C8 + .long 0x13268CC8 + .long 0x134394C8 + + vxor 23, 23, 24 + vxor 23, 23, 25 + vxor 23, 23, 26 + + .long 0x130D7CC8 + .long 0x132A84C8 + .long 0x13478CC8 + .long 0x136494C8 + + vxor 24, 24, 25 + vxor 24, 24, 26 + vxor 24, 24, 27 + + + .long 0x139714C8 + + xxlor 29+32, 29, 29 + vsldoi 26, 24, 29, 8 + vsldoi 29, 29, 24, 8 + vxor 23, 23, 26 + + vsldoi 23, 23, 23, 8 + vxor 23, 23, 28 + + .long 0x130E7CC8 + .long 0x132B84C8 + .long 0x13488CC8 + .long 0x136594C8 + + vxor 24, 24, 25 + vxor 24, 24, 26 + vxor 24, 24, 27 + + vxor 24, 24, 29 + + + vsldoi 27, 23, 23, 8 + .long 0x12F714C8 + vxor 27, 27, 24 + vxor 23, 23, 27 + + xxlor 32, 23+32, 23+32 + + blr + + + + + +.macro ppc_aes_gcm_ghash2_4x + + vxor 15, 15, 0 + + xxlxor 29, 29, 29 + + .long 0x12EC7CC8 + .long 0x130984C8 + .long 0x13268CC8 + .long 0x134394C8 + + vxor 23, 23, 24 + vxor 23, 23, 25 + vxor 23, 23, 26 + + .long 0x130D7CC8 + .long 0x132A84C8 + .long 0x13478CC8 + .long 0x136494C8 + + vxor 24, 24, 25 + vxor 24, 24, 26 + + + .long 0x139714C8 + + xxlor 29+32, 29, 29 + + vxor 24, 24, 27 + vsldoi 26, 24, 29, 8 + vsldoi 29, 29, 24, 8 + vxor 23, 23, 26 + + vsldoi 23, 23, 23, 8 + vxor 23, 23, 28 + + .long 0x130E7CC8 + .long 0x132B84C8 + .long 0x13488CC8 + .long 0x136594C8 + + vxor 24, 24, 25 + vxor 24, 24, 26 + vxor 24, 24, 27 + + vxor 24, 24, 29 + + + vsldoi 27, 23, 23, 8 + .long 0x12F714C8 + vxor 27, 27, 24 + vxor 27, 23, 27 + + + .long 0x1309A4C8 + .long 0x1326ACC8 + .long 0x1343B4C8 + vxor 19, 19, 27 + .long 0x12EC9CC8 + + vxor 23, 23, 24 + vxor 23, 23, 25 + vxor 23, 23, 26 + + .long 0x130D9CC8 + .long 0x132AA4C8 + .long 0x1347ACC8 + .long 0x1364B4C8 + + vxor 24, 24, 25 + vxor 24, 24, 26 + + + .long 0x139714C8 + + xxlor 29+32, 29, 29 + + vxor 24, 24, 27 + vsldoi 26, 24, 29, 8 + vsldoi 29, 29, 24, 8 + vxor 23, 23, 26 + + vsldoi 23, 23, 23, 8 + vxor 23, 23, 28 + + .long 0x130E9CC8 + .long 0x132BA4C8 + .long 0x1348ACC8 + .long 0x1365B4C8 + + vxor 24, 24, 25 + vxor 24, 24, 26 + vxor 24, 24, 27 + + vxor 24, 24, 29 + + + vsldoi 27, 23, 23, 8 + .long 0x12F714C8 + vxor 27, 27, 24 + vxor 23, 23, 27 + + xxlor 32, 23+32, 23+32 + +.endm + + + + +.macro ppc_update_hash_1x + vxor 28, 28, 0 + + vxor 19, 19, 19 + + .long 0x12C3E4C8 + .long 0x12E4E4C8 + .long 0x1305E4C8 + + .long 0x137614C8 + + vsldoi 25, 23, 19, 8 + vsldoi 26, 19, 23, 8 + vxor 22, 22, 25 + vxor 24, 24, 26 + + vsldoi 22, 22, 22, 8 + vxor 22, 22, 27 + + vsldoi 20, 22, 22, 8 + .long 0x12D614C8 + vxor 20, 20, 24 + vxor 22, 22, 20 + + vor 0,22,22 + +.endm + + + + + + + + + + + + + +.global ppc_aes_gcm_encrypt +.align 5 +ppc_aes_gcm_encrypt: +_ppc_aes_gcm_encrypt: + + stdu 1,-512(1) + mflr 0 + + std 14,112(1) + std 15,120(1) + std 16,128(1) + std 17,136(1) + std 18,144(1) + std 19,152(1) + std 20,160(1) + std 21,168(1) + li 9, 256 + stvx 20, 9, 1 + addi 9, 9, 16 + stvx 21, 9, 1 + addi 9, 9, 16 + stvx 22, 9, 1 + addi 9, 9, 16 + stvx 23, 9, 1 + addi 9, 9, 16 + stvx 24, 9, 1 + addi 9, 9, 16 + stvx 25, 9, 1 + addi 9, 9, 16 + stvx 26, 9, 1 + addi 9, 9, 16 + stvx 27, 9, 1 + addi 9, 9, 16 + stvx 28, 9, 1 + addi 9, 9, 16 + stvx 29, 9, 1 + addi 9, 9, 16 + stvx 30, 9, 1 + addi 9, 9, 16 + stvx 31, 9, 1 + std 0, 528(1) + + + lxvb16x 32, 0, 8 + + + li 10, 32 + lxvd2x 2+32, 10, 8 + li 10, 48 + lxvd2x 3+32, 10, 8 + li 10, 64 + lxvd2x 4+32, 10, 8 + li 10, 80 + lxvd2x 5+32, 10, 8 + + li 10, 96 + lxvd2x 6+32, 10, 8 + li 10, 112 + lxvd2x 7+32, 10, 8 + li 10, 128 + lxvd2x 8+32, 10, 8 + + li 10, 144 + lxvd2x 9+32, 10, 8 + li 10, 160 + lxvd2x 10+32, 10, 8 + li 10, 176 + lxvd2x 11+32, 10, 8 + + li 10, 192 + lxvd2x 12+32, 10, 8 + li 10, 208 + lxvd2x 13+32, 10, 8 + li 10, 224 + lxvd2x 14+32, 10, 8 + + + lxvb16x 30+32, 0, 7 + + mr 12, 5 + li 11, 0 + + + vxor 31, 31, 31 + vspltisb 22,1 + vsldoi 31, 31, 22,1 + + + lxv 0, 0(6) + lxv 1, 0x10(6) + lxv 2, 0x20(6) + lxv 3, 0x30(6) + lxv 4, 0x40(6) + lxv 5, 0x50(6) + lxv 6, 0x60(6) + lxv 7, 0x70(6) + lxv 8, 0x80(6) + lxv 9, 0x90(6) + lxv 10, 0xa0(6) + + + lwz 9,240(6) + + + + xxlor 32+29, 0, 0 + vxor 15, 30, 29 + + cmpdi 9, 10 + beq .Loop_aes_gcm_8x + + + lxv 11, 0xb0(6) + lxv 12, 0xc0(6) + + cmpdi 9, 12 + beq .Loop_aes_gcm_8x + + + lxv 13, 0xd0(6) + lxv 14, 0xe0(6) + cmpdi 9, 14 + beq .Loop_aes_gcm_8x + + b aes_gcm_out + +.align 5 +.Loop_aes_gcm_8x: + mr 14, 3 + mr 9, 4 + + + li 10, 128 + divdu 10, 5, 10 + cmpdi 10, 0 + beq .Loop_last_block + + .long 0x13DEF8C0 + vxor 16, 30, 29 + .long 0x13DEF8C0 + vxor 17, 30, 29 + .long 0x13DEF8C0 + vxor 18, 30, 29 + .long 0x13DEF8C0 + vxor 19, 30, 29 + .long 0x13DEF8C0 + vxor 20, 30, 29 + .long 0x13DEF8C0 + vxor 21, 30, 29 + .long 0x13DEF8C0 + vxor 22, 30, 29 + + mtctr 10 + + li 15, 16 + li 16, 32 + li 17, 48 + li 18, 64 + li 19, 80 + li 20, 96 + li 21, 112 + + lwz 10, 240(6) + +.Loop_8x_block: + + lxvb16x 15, 0, 14 + lxvb16x 16, 15, 14 + lxvb16x 17, 16, 14 + lxvb16x 18, 17, 14 + lxvb16x 19, 18, 14 + lxvb16x 20, 19, 14 + lxvb16x 21, 20, 14 + lxvb16x 22, 21, 14 + addi 14, 14, 128 + +.Loop_aes_middle8x + + xxlor 23+32, 10, 10 + + cmpdi 10, 10 + beq Do_next_ghash + + + xxlor 24+32, 11, 11 + + .long 0x11EFBD08 + .long 0x1210BD08 + .long 0x1231BD08 + .long 0x1252BD08 + .long 0x1273BD08 + .long 0x1294BD08 + .long 0x12B5BD08 + .long 0x12D6BD08 + + .long 0x11EFC508 + .long 0x1210C508 + .long 0x1231C508 + .long 0x1252C508 + .long 0x1273C508 + .long 0x1294C508 + .long 0x12B5C508 + .long 0x12D6C508 + + xxlor 23+32, 12, 12 + + cmpdi 10, 12 + beq Do_next_ghash + + + xxlor 24+32, 13, 13 + + .long 0x11EFBD08 + .long 0x1210BD08 + .long 0x1231BD08 + .long 0x1252BD08 + .long 0x1273BD08 + .long 0x1294BD08 + .long 0x12B5BD08 + .long 0x12D6BD08 + + .long 0x11EFC508 + .long 0x1210C508 + .long 0x1231C508 + .long 0x1252C508 + .long 0x1273C508 + .long 0x1294C508 + .long 0x12B5C508 + .long 0x12D6C508 + + xxlor 23+32, 14, 14 + + cmpdi 10, 14 + beq Do_next_ghash + b aes_gcm_out + +Do_next_ghash: + + + + .long 0x11EFBD09 + .long 0x1210BD09 + + xxlxor 47, 47, 15 + stxvb16x 47, 0, 9 + xxlxor 48, 48, 16 + stxvb16x 48, 15, 9 + + .long 0x1231BD09 + .long 0x1252BD09 + + xxlxor 49, 49, 17 + stxvb16x 49, 16, 9 + xxlxor 50, 50, 18 + stxvb16x 50, 17, 9 + + .long 0x1273BD09 + .long 0x1294BD09 + + xxlxor 51, 51, 19 + stxvb16x 51, 18, 9 + xxlxor 52, 52, 20 + stxvb16x 52, 19, 9 + + .long 0x12B5BD09 + .long 0x12D6BD09 + + xxlxor 53, 53, 21 + stxvb16x 53, 20, 9 + xxlxor 54, 54, 22 + stxvb16x 54, 21, 9 + + addi 9, 9, 128 + + + ppc_aes_gcm_ghash2_4x + + xxlor 27+32, 0, 0 + .long 0x13DEF8C0 + vor 29,30,30 + vxor 15, 30, 27 + .long 0x13DEF8C0 + vxor 16, 30, 27 + .long 0x13DEF8C0 + vxor 17, 30, 27 + .long 0x13DEF8C0 + vxor 18, 30, 27 + .long 0x13DEF8C0 + vxor 19, 30, 27 + .long 0x13DEF8C0 + vxor 20, 30, 27 + .long 0x13DEF8C0 + vxor 21, 30, 27 + .long 0x13DEF8C0 + vxor 22, 30, 27 + + addi 12, 12, -128 + addi 11, 11, 128 + + bdnz .Loop_8x_block + + vor 30,29,29 + +.Loop_last_block: + cmpdi 12, 0 + beq aes_gcm_out + + + li 10, 16 + divdu 10, 12, 10 + + mtctr 10 + + lwz 10, 240(6) + + cmpdi 12, 16 + blt Final_block + +.macro .Loop_aes_middle_1x + xxlor 19+32, 1, 1 + xxlor 20+32, 2, 2 + xxlor 21+32, 3, 3 + xxlor 22+32, 4, 4 + + .long 0x11EF9D08 + .long 0x11EFA508 + .long 0x11EFAD08 + .long 0x11EFB508 + + xxlor 19+32, 5, 5 + xxlor 20+32, 6, 6 + xxlor 21+32, 7, 7 + xxlor 22+32, 8, 8 + + .long 0x11EF9D08 + .long 0x11EFA508 + .long 0x11EFAD08 + .long 0x11EFB508 + + xxlor 19+32, 9, 9 + .long 0x11EF9D08 +.endm + +Next_rem_block: + lxvb16x 15, 0, 14 + +.Loop_aes_middle_1x + + xxlor 23+32, 10, 10 + + cmpdi 10, 10 + beq Do_next_1x + + + xxlor 24+32, 11, 11 + + .long 0x11EFBD08 + .long 0x11EFC508 + + xxlor 23+32, 12, 12 + + cmpdi 10, 12 + beq Do_next_1x + + + xxlor 24+32, 13, 13 + + .long 0x11EFBD08 + .long 0x11EFC508 + + xxlor 23+32, 14, 14 + + cmpdi 10, 14 + beq Do_next_1x + +Do_next_1x: + .long 0x11EFBD09 + + xxlxor 47, 47, 15 + stxvb16x 47, 0, 9 + addi 14, 14, 16 + addi 9, 9, 16 + + vor 28,15,15 + ppc_update_hash_1x + + addi 12, 12, -16 + addi 11, 11, 16 + xxlor 19+32, 0, 0 + .long 0x13DEF8C0 + vxor 15, 30, 19 + + bdnz Next_rem_block + + cmpdi 12, 0 + beq aes_gcm_out + +Final_block: +.Loop_aes_middle_1x + + xxlor 23+32, 10, 10 + + cmpdi 10, 10 + beq Do_final_1x + + + xxlor 24+32, 11, 11 + + .long 0x11EFBD08 + .long 0x11EFC508 + + xxlor 23+32, 12, 12 + + cmpdi 10, 12 + beq Do_final_1x + + + xxlor 24+32, 13, 13 + + .long 0x11EFBD08 + .long 0x11EFC508 + + xxlor 23+32, 14, 14 + + cmpdi 10, 14 + beq Do_final_1x + +Do_final_1x: + .long 0x11EFBD09 + + lxvb16x 15, 0, 14 + xxlxor 47, 47, 15 + + + li 15, 16 + sub 15, 15, 12 + + vspltisb 16,-1 + vspltisb 17,0 + li 10, 192 + stvx 16, 10, 1 + addi 10, 10, 16 + stvx 17, 10, 1 + + addi 10, 1, 192 + lxvb16x 16, 15, 10 + xxland 47, 47, 16 + + vor 28,15,15 + ppc_update_hash_1x + + + bl Write_partial_block + + b aes_gcm_out + + + + + + + +Write_partial_block: + li 10, 192 + stxvb16x 15+32, 10, 1 + + + addi 10, 9, -1 + addi 16, 1, 191 + + mtctr 12 + li 15, 0 + +Write_last_byte: + lbzu 14, 1(16) + stbu 14, 1(10) + bdnz Write_last_byte + blr + +aes_gcm_out: + + stxvb16x 32, 0, 8 + add 3, 11, 12 + + li 9, 256 + lvx 20, 9, 1 + addi 9, 9, 16 + lvx 21, 9, 1 + addi 9, 9, 16 + lvx 22, 9, 1 + addi 9, 9, 16 + lvx 23, 9, 1 + addi 9, 9, 16 + lvx 24, 9, 1 + addi 9, 9, 16 + lvx 25, 9, 1 + addi 9, 9, 16 + lvx 26, 9, 1 + addi 9, 9, 16 + lvx 27, 9, 1 + addi 9, 9, 16 + lvx 28, 9, 1 + addi 9, 9, 16 + lvx 29, 9, 1 + addi 9, 9, 16 + lvx 30, 9, 1 + addi 9, 9, 16 + lvx 31, 9, 1 + + ld 0, 528(1) + ld 14,112(1) + ld 15,120(1) + ld 16,128(1) + ld 17,136(1) + ld 18,144(1) + ld 19,152(1) + ld 20,160(1) + ld 21,168(1) + + mtlr 0 + addi 1, 1, 512 + blr + + + + +.global ppc_aes_gcm_decrypt +.align 5 +ppc_aes_gcm_decrypt: +_ppc_aes_gcm_decrypt: + + stdu 1,-512(1) + mflr 0 + + std 14,112(1) + std 15,120(1) + std 16,128(1) + std 17,136(1) + std 18,144(1) + std 19,152(1) + std 20,160(1) + std 21,168(1) + li 9, 256 + stvx 20, 9, 1 + addi 9, 9, 16 + stvx 21, 9, 1 + addi 9, 9, 16 + stvx 22, 9, 1 + addi 9, 9, 16 + stvx 23, 9, 1 + addi 9, 9, 16 + stvx 24, 9, 1 + addi 9, 9, 16 + stvx 25, 9, 1 + addi 9, 9, 16 + stvx 26, 9, 1 + addi 9, 9, 16 + stvx 27, 9, 1 + addi 9, 9, 16 + stvx 28, 9, 1 + addi 9, 9, 16 + stvx 29, 9, 1 + addi 9, 9, 16 + stvx 30, 9, 1 + addi 9, 9, 16 + stvx 31, 9, 1 + std 0, 528(1) + + + lxvb16x 32, 0, 8 + + + li 10, 32 + lxvd2x 2+32, 10, 8 + li 10, 48 + lxvd2x 3+32, 10, 8 + li 10, 64 + lxvd2x 4+32, 10, 8 + li 10, 80 + lxvd2x 5+32, 10, 8 + + li 10, 96 + lxvd2x 6+32, 10, 8 + li 10, 112 + lxvd2x 7+32, 10, 8 + li 10, 128 + lxvd2x 8+32, 10, 8 + + li 10, 144 + lxvd2x 9+32, 10, 8 + li 10, 160 + lxvd2x 10+32, 10, 8 + li 10, 176 + lxvd2x 11+32, 10, 8 + + li 10, 192 + lxvd2x 12+32, 10, 8 + li 10, 208 + lxvd2x 13+32, 10, 8 + li 10, 224 + lxvd2x 14+32, 10, 8 + + + lxvb16x 30+32, 0, 7 + + mr 12, 5 + li 11, 0 + + + vxor 31, 31, 31 + vspltisb 22,1 + vsldoi 31, 31, 22,1 + + + lxv 0, 0(6) + lxv 1, 0x10(6) + lxv 2, 0x20(6) + lxv 3, 0x30(6) + lxv 4, 0x40(6) + lxv 5, 0x50(6) + lxv 6, 0x60(6) + lxv 7, 0x70(6) + lxv 8, 0x80(6) + lxv 9, 0x90(6) + lxv 10, 0xa0(6) + + + lwz 9,240(6) + + + + xxlor 32+29, 0, 0 + vxor 15, 30, 29 + + cmpdi 9, 10 + beq .Loop_aes_gcm_8x_dec + + + lxv 11, 0xb0(6) + lxv 12, 0xc0(6) + + cmpdi 9, 12 + beq .Loop_aes_gcm_8x_dec + + + lxv 13, 0xd0(6) + lxv 14, 0xe0(6) + cmpdi 9, 14 + beq .Loop_aes_gcm_8x_dec + + b aes_gcm_out + +.align 5 +.Loop_aes_gcm_8x_dec: + mr 14, 3 + mr 9, 4 + + + li 10, 128 + divdu 10, 5, 10 + cmpdi 10, 0 + beq .Loop_last_block_dec + + .long 0x13DEF8C0 + vxor 16, 30, 29 + .long 0x13DEF8C0 + vxor 17, 30, 29 + .long 0x13DEF8C0 + vxor 18, 30, 29 + .long 0x13DEF8C0 + vxor 19, 30, 29 + .long 0x13DEF8C0 + vxor 20, 30, 29 + .long 0x13DEF8C0 + vxor 21, 30, 29 + .long 0x13DEF8C0 + vxor 22, 30, 29 + + mtctr 10 + + li 15, 16 + li 16, 32 + li 17, 48 + li 18, 64 + li 19, 80 + li 20, 96 + li 21, 112 + + lwz 10, 240(6) + +.Loop_8x_block_dec: + + lxvb16x 15, 0, 14 + lxvb16x 16, 15, 14 + lxvb16x 17, 16, 14 + lxvb16x 18, 17, 14 + lxvb16x 19, 18, 14 + lxvb16x 20, 19, 14 + lxvb16x 21, 20, 14 + lxvb16x 22, 21, 14 + addi 14, 14, 128 + +.Loop_aes_middle8x + + xxlor 23+32, 10, 10 + + cmpdi 10, 10 + beq Do_last_aes_dec + + + xxlor 24+32, 11, 11 + + .long 0x11EFBD08 + .long 0x1210BD08 + .long 0x1231BD08 + .long 0x1252BD08 + .long 0x1273BD08 + .long 0x1294BD08 + .long 0x12B5BD08 + .long 0x12D6BD08 + + .long 0x11EFC508 + .long 0x1210C508 + .long 0x1231C508 + .long 0x1252C508 + .long 0x1273C508 + .long 0x1294C508 + .long 0x12B5C508 + .long 0x12D6C508 + + xxlor 23+32, 12, 12 + + cmpdi 10, 12 + beq Do_last_aes_dec + + + xxlor 24+32, 13, 13 + + .long 0x11EFBD08 + .long 0x1210BD08 + .long 0x1231BD08 + .long 0x1252BD08 + .long 0x1273BD08 + .long 0x1294BD08 + .long 0x12B5BD08 + .long 0x12D6BD08 + + .long 0x11EFC508 + .long 0x1210C508 + .long 0x1231C508 + .long 0x1252C508 + .long 0x1273C508 + .long 0x1294C508 + .long 0x12B5C508 + .long 0x12D6C508 + + xxlor 23+32, 14, 14 + + cmpdi 10, 14 + beq Do_last_aes_dec + b aes_gcm_out + +Do_last_aes_dec: + + + + .long 0x11EFBD09 + .long 0x1210BD09 + + xxlxor 47, 47, 15 + stxvb16x 47, 0, 9 + xxlxor 48, 48, 16 + stxvb16x 48, 15, 9 + + .long 0x1231BD09 + .long 0x1252BD09 + + xxlxor 49, 49, 17 + stxvb16x 49, 16, 9 + xxlxor 50, 50, 18 + stxvb16x 50, 17, 9 + + .long 0x1273BD09 + .long 0x1294BD09 + + xxlxor 51, 51, 19 + stxvb16x 51, 18, 9 + xxlxor 52, 52, 20 + stxvb16x 52, 19, 9 + + .long 0x12B5BD09 + .long 0x12D6BD09 + + xxlxor 53, 53, 21 + stxvb16x 53, 20, 9 + xxlxor 54, 54, 22 + stxvb16x 54, 21, 9 + + addi 9, 9, 128 + + xxlor 15+32, 15, 15 + xxlor 16+32, 16, 16 + xxlor 17+32, 17, 17 + xxlor 18+32, 18, 18 + xxlor 19+32, 19, 19 + xxlor 20+32, 20, 20 + xxlor 21+32, 21, 21 + xxlor 22+32, 22, 22 + + + ppc_aes_gcm_ghash2_4x + + xxlor 27+32, 0, 0 + .long 0x13DEF8C0 + vor 29,30,30 + vxor 15, 30, 27 + .long 0x13DEF8C0 + vxor 16, 30, 27 + .long 0x13DEF8C0 + vxor 17, 30, 27 + .long 0x13DEF8C0 + vxor 18, 30, 27 + .long 0x13DEF8C0 + vxor 19, 30, 27 + .long 0x13DEF8C0 + vxor 20, 30, 27 + .long 0x13DEF8C0 + vxor 21, 30, 27 + .long 0x13DEF8C0 + vxor 22, 30, 27 + addi 12, 12, -128 + addi 11, 11, 128 + + bdnz .Loop_8x_block_dec + + vor 30,29,29 + +.Loop_last_block_dec: + cmpdi 12, 0 + beq aes_gcm_out + + + li 10, 16 + divdu 10, 12, 10 + + mtctr 10 + + lwz 10,240(6) + + cmpdi 12, 16 + blt Final_block_dec + +Next_rem_block_dec: + lxvb16x 15, 0, 14 + +.Loop_aes_middle_1x + + xxlor 23+32, 10, 10 + + cmpdi 10, 10 + beq Do_next_1x_dec + + + xxlor 24+32, 11, 11 + + .long 0x11EFBD08 + .long 0x11EFC508 + + xxlor 23+32, 12, 12 + + cmpdi 10, 12 + beq Do_next_1x_dec + + + xxlor 24+32, 13, 13 + + .long 0x11EFBD08 + .long 0x11EFC508 + + xxlor 23+32, 14, 14 + + cmpdi 10, 14 + beq Do_next_1x_dec + +Do_next_1x_dec: + .long 0x11EFBD09 + + xxlxor 47, 47, 15 + stxvb16x 47, 0, 9 + addi 14, 14, 16 + addi 9, 9, 16 + + xxlor 28+32, 15, 15 + ppc_update_hash_1x + + addi 12, 12, -16 + addi 11, 11, 16 + xxlor 19+32, 0, 0 + .long 0x13DEF8C0 + vxor 15, 30, 19 + + bdnz Next_rem_block_dec + + cmpdi 12, 0 + beq aes_gcm_out + +Final_block_dec: +.Loop_aes_middle_1x + + xxlor 23+32, 10, 10 + + cmpdi 10, 10 + beq Do_final_1x_dec + + + xxlor 24+32, 11, 11 + + .long 0x11EFBD08 + .long 0x11EFC508 + + xxlor 23+32, 12, 12 + + cmpdi 10, 12 + beq Do_final_1x_dec + + + xxlor 24+32, 13, 13 + + .long 0x11EFBD08 + .long 0x11EFC508 + + xxlor 23+32, 14, 14 + + cmpdi 10, 14 + beq Do_final_1x_dec + +Do_final_1x_dec: + .long 0x11EFBD09 + + lxvb16x 15, 0, 14 + xxlxor 47, 47, 15 + + + li 15, 16 + sub 15, 15, 12 + + vspltisb 16,-1 + vspltisb 17,0 + li 10, 192 + stvx 16, 10, 1 + addi 10, 10, 16 + stvx 17, 10, 1 + + addi 10, 1, 192 + lxvb16x 16, 15, 10 + xxland 47, 47, 16 + + xxlor 28+32, 15, 15 + ppc_update_hash_1x + + + bl Write_partial_block + + b aes_gcm_out diff --git a/sys/modules/ossl/Makefile b/sys/modules/ossl/Makefile --- a/sys/modules/ossl/Makefile +++ b/sys/modules/ossl/Makefile @@ -48,6 +48,7 @@ sha256-x86_64.S \ sha512-x86_64.S \ ossl_aes_gcm.c \ + ossl_aes_gcm_avx512.c \ ossl_x86.c SRCS.i386= \ @@ -60,6 +61,8 @@ ossl_x86.c SRCS.powerpc64le= \ + aes-gcm-ppc.S \ + ossl_aes_gcm.c \ ossl_ppccap.c \ aes-ppc.S \ aesp8-ppc.S \ @@ -81,6 +84,8 @@ x25519-ppc64.S SRCS.powerpc64= \ + aes-gcm-ppc.S \ + ossl_aes_gcm.c \ ossl_ppccap.c \ aes-ppc.S \ aesp8-ppc.S \