diff --git a/sys/conf/files.arm64 b/sys/conf/files.arm64 --- a/sys/conf/files.arm64 +++ b/sys/conf/files.arm64 @@ -279,6 +279,16 @@ compile-with "${CC} -c ${CFLAGS:C/^-O2$/-O3/:N-nostdinc:N-mgeneral-regs-only} -I$S/crypto/armv8/ ${WERROR} ${NO_WCAST_QUAL} ${PROF} -march=armv8-a+crypto ${.IMPSRC}" \ no-implicit-rule \ clean "armv8_crypto_wrap.o" +aesv8-armx.o optional armv8crypto \ + dependency "$S/crypto/openssl/aarch64/aesv8-armx.S" \ + compile-with "${CC} -c ${CFLAGS:C/^-O2$/-O3/:N-nostdinc:N-mgeneral-regs-only} -I$S/crypto/armv8/ -I$S/crypto/openssl/crypto ${WERROR} ${NO_WCAST_QUAL} ${PROF} -march=armv8-a+crypto ${.IMPSRC}" \ + no-implicit-rule \ + clean "aesv8-armx.o" +ghashv8-armx.o optional armv8crypto \ + dependency "$S/crypto/openssl/aarch64/ghashv8-armx.S" \ + compile-with "${CC} -c ${CFLAGS:C/^-O2$/-O3/:N-nostdinc:N-mgeneral-regs-only} -I$S/crypto/armv8/ -I$S/crypto/openssl/crypto ${WERROR} ${NO_WCAST_QUAL} ${PROF} -march=armv8-a+crypto ${.IMPSRC}" \ + no-implicit-rule \ + clean "ghashv8-armx.o" crypto/des/des_enc.c optional netsmb crypto/openssl/ossl_aarch64.c optional ossl crypto/openssl/aarch64/sha1-armv8.S optional ossl \ diff --git a/sys/crypto/armv8/armv8_crypto.h b/sys/crypto/armv8/armv8_crypto.h --- a/sys/crypto/armv8/armv8_crypto.h +++ b/sys/crypto/armv8/armv8_crypto.h @@ -32,27 +32,56 @@ #ifndef _ARMV8_CRYPTO_H_ #define _ARMV8_CRYPTO_H_ -#define AES128_ROUNDS 10 -#define AES192_ROUNDS 12 #define AES256_ROUNDS 14 #define AES_SCHED_LEN ((AES256_ROUNDS + 1) * AES_BLOCK_LEN) +typedef struct { + uint32_t aes_key[AES_SCHED_LEN/4]; + int aes_rounds; +} AES_key_t; + +typedef union { + uint64_t u[2]; + uint32_t d[4]; + uint8_t c[16]; + size_t t[16 / sizeof(size_t)]; +} __uint128_val_t; + struct armv8_crypto_session { - uint32_t enc_schedule[AES_SCHED_LEN/4]; - uint32_t dec_schedule[AES_SCHED_LEN/4]; - uint32_t xts_schedule[AES_SCHED_LEN/4]; - int algo; - int rounds; + AES_key_t enc_schedule; + AES_key_t dec_schedule; + AES_key_t xts_schedule; + __uint128_val_t Htable[16]; }; -void armv8_aes_encrypt_cbc(int, const void *, size_t, const uint8_t *, +/* Prototypes for aesv8-armx.S */ +void aes_v8_encrypt(uint8_t *in, uint8_t *out, const AES_key_t *key); +int aes_v8_set_encrypt_key(const unsigned char *userKey, const int bits, const AES_key_t *key); +int aes_v8_set_decrypt_key(const unsigned char *userKey, const int bits, const AES_key_t *key); + +/* Prototypes for ghashv8-armx.S */ +void gcm_init_v8(__uint128_val_t Htable[16], const uint64_t Xi[2]); +void gcm_gmult_v8(uint64_t Xi[2], const __uint128_val_t Htable[16]); +void gcm_ghash_v8(uint64_t Xi[2], const __uint128_val_t Htable[16], const uint8_t *inp, size_t len); + +void armv8_aes_encrypt_cbc(const AES_key_t *, size_t, const uint8_t *, uint8_t *, const uint8_t[static AES_BLOCK_LEN]); -void armv8_aes_decrypt_cbc(int, const void *, size_t, uint8_t *, +void armv8_aes_decrypt_cbc(const AES_key_t *, size_t, uint8_t *, const uint8_t[static AES_BLOCK_LEN]); +void armv8_aes_encrypt_gcm(AES_key_t *, size_t, const uint8_t *, + uint8_t *, size_t, const uint8_t*, + uint8_t tag[static GMAC_DIGEST_LEN], + const uint8_t[static AES_BLOCK_LEN], + const __uint128_val_t *); +int armv8_aes_decrypt_gcm(AES_key_t *, size_t, const uint8_t *, + uint8_t *, size_t, const uint8_t*, + const uint8_t tag[static GMAC_DIGEST_LEN], + const uint8_t[static AES_BLOCK_LEN], + const __uint128_val_t *); -void armv8_aes_encrypt_xts(int, const void *, const void *, size_t, +void armv8_aes_encrypt_xts(AES_key_t *, const void *, size_t, const uint8_t *, uint8_t *, const uint8_t[AES_BLOCK_LEN]); -void armv8_aes_decrypt_xts(int, const void *, const void *, size_t, +void armv8_aes_decrypt_xts(AES_key_t *, const void *, size_t, const uint8_t *, uint8_t *, const uint8_t[AES_BLOCK_LEN]); #endif /* _ARMV8_CRYPTO_H_ */ diff --git a/sys/crypto/armv8/armv8_crypto.c b/sys/crypto/armv8/armv8_crypto.c --- a/sys/crypto/armv8/armv8_crypto.c +++ b/sys/crypto/armv8/armv8_crypto.c @@ -2,6 +2,7 @@ * Copyright (c) 2005-2008 Pawel Jakub Dawidek * Copyright (c) 2010 Konstantin Belousov * Copyright (c) 2014,2016 The FreeBSD Foundation + * Copyright (c) 2020 Ampere Computing * All rights reserved. * * Portions of this software were developed by John-Mark Gurney @@ -58,6 +59,7 @@ #include #include +#include #include #include #include @@ -66,6 +68,7 @@ int dieing; int32_t cid; struct rwlock lock; + bool has_pmul; }; static struct mtx *ctx_mtx; @@ -109,16 +112,20 @@ switch (ID_AA64ISAR0_AES_VAL(reg)) { case ID_AA64ISAR0_AES_BASE: + ret = 0; + device_set_desc(dev, "AES-CBC,AES-XTS"); + break; case ID_AA64ISAR0_AES_PMULL: ret = 0; + device_set_desc(dev, "AES-CBC,AES-XTS,AES-GCM"); + break; + default: break; case ID_AA64ISAR0_AES_NONE: device_printf(dev, "CPU lacks AES instructions"); break; } - device_set_desc_copy(dev, "AES-CBC,AES-XTS"); - /* TODO: Check more fields as we support more features */ return (ret); @@ -128,11 +135,17 @@ armv8_crypto_attach(device_t dev) { struct armv8_crypto_softc *sc; + uint64_t reg; int i; sc = device_get_softc(dev); sc->dieing = 0; + reg = READ_SPECIALREG(id_aa64isar0_el1); + + if (ID_AA64ISAR0_AES_VAL(reg) == ID_AA64ISAR0_AES_PMULL) + sc->has_pmul = true; + sc->cid = crypto_get_driverid(dev, sizeof(struct armv8_crypto_session), CRYPTOCAP_F_SOFTWARE | CRYPTOCAP_F_SYNC | CRYPTOCAP_F_ACCEL_SOFTWARE); if (sc->cid < 0) { @@ -185,14 +198,43 @@ return (0); } +#define SUPPORTED_SES (CSP_F_SEPARATE_OUTPUT | CSP_F_SEPARATE_AAD) + static int armv8_crypto_probesession(device_t dev, const struct crypto_session_params *csp) { + struct armv8_crypto_softc *sc; + + sc = device_get_softc(dev); - if (csp->csp_flags != 0) + if ((csp->csp_flags & ~(SUPPORTED_SES)) != 0) return (EINVAL); + switch (csp->csp_mode) { + case CSP_MODE_AEAD: + switch (csp->csp_cipher_alg) { + case CRYPTO_AES_NIST_GCM_16: + if (!sc->has_pmul) + return (EINVAL); + if (csp->csp_ivlen != AES_GCM_IV_LEN) + return (EINVAL); + if (csp->csp_auth_mlen != 0 && + csp->csp_auth_mlen != GMAC_DIGEST_LEN) + return (EINVAL); + switch (csp->csp_cipher_klen * 8) { + case 128: + case 192: + case 256: + break; + default: + return (EINVAL); + } + break; + default: + return (EINVAL); + } + break; case CSP_MODE_CIPHER: switch (csp->csp_cipher_alg) { case CRYPTO_AES_CBC: @@ -228,40 +270,55 @@ return (CRYPTODEV_PROBE_ACCEL_SOFTWARE); } -static void +static int armv8_crypto_cipher_setup(struct armv8_crypto_session *ses, const struct crypto_session_params *csp, const uint8_t *key, int keylen) { - int i; + __uint128_val_t H; + struct fpu_kern_ctx *ctx; + int kt, i; if (csp->csp_cipher_alg == CRYPTO_AES_XTS) keylen /= 2; switch (keylen * 8) { case 128: - ses->rounds = AES128_ROUNDS; - break; case 192: - ses->rounds = AES192_ROUNDS; - break; case 256: - ses->rounds = AES256_ROUNDS; break; default: - panic("invalid AES key length"); + return (EINVAL); } - rijndaelKeySetupEnc(ses->enc_schedule, key, keylen * 8); - rijndaelKeySetupDec(ses->dec_schedule, key, keylen * 8); - if (csp->csp_cipher_alg == CRYPTO_AES_XTS) - rijndaelKeySetupEnc(ses->xts_schedule, key + keylen, keylen * 8); + kt = is_fpu_kern_thread(0); + if (!kt) { + AQUIRE_CTX(i, ctx); + fpu_kern_enter(curthread, ctx, + FPU_KERN_NORMAL | FPU_KERN_KTHR); + } + + aes_v8_set_encrypt_key(key, + keylen * 8, &ses->enc_schedule); + + if ((csp->csp_cipher_alg == CRYPTO_AES_XTS) || + (csp->csp_cipher_alg == CRYPTO_AES_CBC)) + aes_v8_set_decrypt_key(key, + keylen * 8, &ses->dec_schedule); + + if (csp->csp_cipher_alg == CRYPTO_AES_NIST_GCM_16) { + memset(H.c, 0, sizeof(H.c)); + aes_v8_encrypt(H.c, H.c, &ses->enc_schedule); + H.u[0] = bswap64(H.u[0]); + H.u[1] = bswap64(H.u[1]); + gcm_init_v8(ses->Htable, H.u); + } - for (i = 0; i < nitems(ses->enc_schedule); i++) { - ses->enc_schedule[i] = bswap32(ses->enc_schedule[i]); - ses->dec_schedule[i] = bswap32(ses->dec_schedule[i]); - if (csp->csp_cipher_alg == CRYPTO_AES_XTS) - ses->xts_schedule[i] = bswap32(ses->xts_schedule[i]); + if (!kt) { + fpu_kern_leave(curthread, ctx); + RELEASE_CTX(i, ctx); } + + return (0); } static int @@ -270,6 +327,7 @@ { struct armv8_crypto_softc *sc; struct armv8_crypto_session *ses; + int error; sc = device_get_softc(dev); rw_wlock(&sc->lock); @@ -279,40 +337,29 @@ } ses = crypto_get_driver_session(cses); - armv8_crypto_cipher_setup(ses, csp, csp->csp_cipher_key, + error = armv8_crypto_cipher_setup(ses, csp, csp->csp_cipher_key, csp->csp_cipher_klen); rw_wunlock(&sc->lock); - return (0); + return (error); } static int armv8_crypto_process(device_t dev, struct cryptop *crp, int hint __unused) { struct armv8_crypto_session *ses; - int error; - - /* We can only handle full blocks for now */ - if ((crp->crp_payload_length % AES_BLOCK_LEN) != 0) { - error = EINVAL; - goto out; - } ses = crypto_get_driver_session(crp->crp_session); - error = armv8_crypto_cipher_process(ses, crp); - -out: - crp->crp_etype = error; + crp->crp_etype = armv8_crypto_cipher_process(ses, crp); crypto_done(crp); return (0); } static uint8_t * -armv8_crypto_cipher_alloc(struct cryptop *crp, int *allocated) +armv8_crypto_cipher_alloc(struct cryptop *crp, int start, int length, int *allocated) { uint8_t *addr; - addr = crypto_contiguous_subsegment(crp, crp->crp_payload_start, - crp->crp_payload_length); + addr = crypto_contiguous_subsegment(crp, start, length); if (addr != NULL) { *allocated = 0; return (addr); @@ -320,8 +367,7 @@ addr = malloc(crp->crp_payload_length, M_ARMV8_CRYPTO, M_NOWAIT); if (addr != NULL) { *allocated = 1; - crypto_copydata(crp, crp->crp_payload_start, - crp->crp_payload_length, addr); + crypto_copydata(crp, start, length, addr); } else *allocated = 0; return (addr); @@ -333,19 +379,63 @@ { const struct crypto_session_params *csp; struct fpu_kern_ctx *ctx; - uint8_t *buf; - uint8_t iv[AES_BLOCK_LEN]; - int allocated, i; + uint8_t *buf, *authbuf, *outbuf; + uint8_t iv[AES_BLOCK_LEN], tag[GMAC_DIGEST_LEN]; + int allocated, authallocated, outallocated, i; int encflag; int kt; + int error; + bool outcopy; csp = crypto_get_params(crp->crp_session); encflag = CRYPTO_OP_IS_ENCRYPT(crp->crp_op); - buf = armv8_crypto_cipher_alloc(crp, &allocated); + allocated = 0; + outallocated = 0; + authallocated = 0; + authbuf = NULL; + kt = 1; + + buf = armv8_crypto_cipher_alloc(crp, crp->crp_payload_start, + crp->crp_payload_length, &allocated); if (buf == NULL) return (ENOMEM); + if (csp->csp_cipher_alg == CRYPTO_AES_NIST_GCM_16) { + if (crp->crp_aad != NULL) + authbuf = crp->crp_aad; + else + authbuf = armv8_crypto_cipher_alloc(crp, crp->crp_aad_start, + crp->crp_aad_length, &authallocated); + if (authbuf == NULL) { + error = ENOMEM; + goto out; + } + } + + if (CRYPTO_HAS_OUTPUT_BUFFER(crp)) { + outbuf = crypto_buffer_contiguous_subsegment(&crp->crp_obuf, + crp->crp_payload_output_start, crp->crp_payload_length); + if (outbuf == NULL) { + outcopy = true; + if (allocated) + outbuf = buf; + else { + outbuf = malloc(crp->crp_payload_length, + M_ARMV8_CRYPTO, M_NOWAIT); + if (outbuf == NULL) { + error = ENOMEM; + goto out; + } + outallocated = true; + } + } else + outcopy = false; + } else { + outbuf = buf; + outcopy = allocated; + } + kt = is_fpu_kern_thread(0); if (!kt) { AQUIRE_CTX(i, ctx); @@ -363,36 +453,74 @@ /* Do work */ switch (csp->csp_cipher_alg) { case CRYPTO_AES_CBC: + if ((crp->crp_payload_length % AES_BLOCK_LEN) != 0) { + error = EINVAL; + goto out; + } if (encflag) - armv8_aes_encrypt_cbc(ses->rounds, ses->enc_schedule, + armv8_aes_encrypt_cbc(&ses->enc_schedule, crp->crp_payload_length, buf, buf, iv); else - armv8_aes_decrypt_cbc(ses->rounds, ses->dec_schedule, + armv8_aes_decrypt_cbc(&ses->dec_schedule, crp->crp_payload_length, buf, iv); break; case CRYPTO_AES_XTS: if (encflag) - armv8_aes_encrypt_xts(ses->rounds, ses->enc_schedule, - ses->xts_schedule, crp->crp_payload_length, buf, + armv8_aes_encrypt_xts(&ses->enc_schedule, + &ses->xts_schedule.aes_key, crp->crp_payload_length, buf, buf, iv); else - armv8_aes_decrypt_xts(ses->rounds, ses->dec_schedule, - ses->xts_schedule, crp->crp_payload_length, buf, + armv8_aes_decrypt_xts(&ses->dec_schedule, + &ses->xts_schedule.aes_key, crp->crp_payload_length, buf, buf, iv); break; + case CRYPTO_AES_NIST_GCM_16: + if (encflag) { + memset(tag, 0, sizeof(tag)); + armv8_aes_encrypt_gcm(&ses->enc_schedule, + crp->crp_payload_length, + buf, outbuf, + crp->crp_aad_length, authbuf, + tag, iv, ses->Htable); + crypto_copyback(crp, crp->crp_digest_start, sizeof(tag), + tag); + } else { + crypto_copydata(crp, crp->crp_digest_start, sizeof(tag), + tag); + if (armv8_aes_decrypt_gcm(&ses->enc_schedule, + crp->crp_payload_length, + buf, outbuf, + crp->crp_aad_length, authbuf, + tag, iv, ses->Htable) != 0) { + error = EBADMSG; + goto out; + } + } + break; } - if (allocated) - crypto_copyback(crp, crp->crp_payload_start, - crp->crp_payload_length, buf); + if (outcopy) + crypto_copyback(crp, CRYPTO_HAS_OUTPUT_BUFFER(crp) ? + crp->crp_payload_output_start : crp->crp_payload_start, + crp->crp_payload_length, outbuf); + error = 0; +out: if (!kt) { fpu_kern_leave(curthread, ctx); RELEASE_CTX(i, ctx); } + if (allocated) zfree(buf, M_ARMV8_CRYPTO); - return (0); + if (authallocated) + zfree(authbuf, M_ARMV8_CRYPTO); + if (outallocated) + zfree(outbuf, M_ARMV8_CRYPTO); + explicit_bzero(iv, sizeof(iv)); + explicit_bzero(tag, sizeof(tag)); + + return (error); } static device_method_t armv8_crypto_methods[] = { diff --git a/sys/crypto/armv8/armv8_crypto_wrap.c b/sys/crypto/armv8/armv8_crypto_wrap.c --- a/sys/crypto/armv8/armv8_crypto_wrap.c +++ b/sys/crypto/armv8/armv8_crypto_wrap.c @@ -1,5 +1,6 @@ /*- * Copyright (c) 2016 The FreeBSD Foundation + * Copyright (c) 2020 Ampere Computing * All rights reserved. * * This software was developed by Andrew Turner under @@ -41,6 +42,8 @@ #include #include +#include +#include #include #include @@ -90,7 +93,7 @@ } void -armv8_aes_encrypt_cbc(int rounds, const void *key_schedule, size_t len, +armv8_aes_encrypt_cbc(const AES_key_t *key, size_t len, const uint8_t *from, uint8_t *to, const uint8_t iv[static AES_BLOCK_LEN]) { uint8x16_t tot, ivreg, tmp; @@ -100,8 +103,8 @@ ivreg = vld1q_u8(iv); for (i = 0; i < len; i++) { tmp = vld1q_u8(from); - tot = armv8_aes_enc(rounds - 1, key_schedule, - veorq_u8(tmp, ivreg)); + tot = armv8_aes_enc(key->aes_rounds - 1, + (const void*)key->aes_key, veorq_u8(tmp, ivreg)); ivreg = tot; vst1q_u8(to, tot); from += AES_BLOCK_LEN; @@ -110,7 +113,7 @@ } void -armv8_aes_decrypt_cbc(int rounds, const void *key_schedule, size_t len, +armv8_aes_decrypt_cbc(const AES_key_t *key, size_t len, uint8_t *buf, const uint8_t iv[static AES_BLOCK_LEN]) { uint8x16_t ivreg, nextiv, tmp; @@ -120,7 +123,8 @@ ivreg = vld1q_u8(iv); for (i = 0; i < len; i++) { nextiv = vld1q_u8(buf); - tmp = armv8_aes_dec(rounds - 1, key_schedule, nextiv); + tmp = armv8_aes_dec(key->aes_rounds - 1, + (const void*)key->aes_key, nextiv); vst1q_u8(buf, veorq_u8(tmp, ivreg)); ivreg = nextiv; buf += AES_BLOCK_LEN; @@ -200,21 +204,203 @@ } void -armv8_aes_encrypt_xts(int rounds, const void *data_schedule, +armv8_aes_encrypt_xts(AES_key_t *data_schedule, const void *tweak_schedule, size_t len, const uint8_t *from, uint8_t *to, const uint8_t iv[static AES_BLOCK_LEN]) { - armv8_aes_crypt_xts(rounds, data_schedule, tweak_schedule, len, from, to, - iv, 1); + armv8_aes_crypt_xts(data_schedule->aes_rounds, + (const void *)&data_schedule->aes_key, tweak_schedule, len, from, + to, iv, 1); } void -armv8_aes_decrypt_xts(int rounds, const void *data_schedule, +armv8_aes_decrypt_xts(AES_key_t *data_schedule, const void *tweak_schedule, size_t len, const uint8_t *from, uint8_t *to, const uint8_t iv[static AES_BLOCK_LEN]) { - armv8_aes_crypt_xts(rounds, data_schedule, tweak_schedule, len, from, to, - iv, 0); + armv8_aes_crypt_xts(data_schedule->aes_rounds, + (const void *)&data_schedule->aes_key, tweak_schedule, len, from, + to,iv, 0); + +} + +#define AES_INC_COUNTER(counter) \ + do { \ + for (int pos = AES_BLOCK_LEN - 1; \ + pos >= 0; pos--) \ + if (++(counter)[pos]) \ + break; \ + } while (0) + +void +armv8_aes_encrypt_gcm(AES_key_t *aes_key, size_t len, + const uint8_t *from, uint8_t *to, + size_t authdatalen, const uint8_t *authdata, + uint8_t tag[static GMAC_DIGEST_LEN], + const uint8_t iv[static AES_GCM_IV_LEN], + const __uint128_val_t *Htable) +{ + size_t i; + const uint64_t *from64; + uint64_t *to64; + uint8_t aes_counter[AES_BLOCK_LEN]; + uint8_t block[AES_BLOCK_LEN]; + size_t trailer; + __uint128_val_t EK0, EKi, Xi, lenblock; + + bzero(&aes_counter, AES_BLOCK_LEN); + memcpy(aes_counter, iv, AES_GCM_IV_LEN); + + /* Setup the counter */ + aes_counter[AES_BLOCK_LEN - 1] = 1; + + /* EK0 for a final GMAC round */ + aes_v8_encrypt(aes_counter, EK0.c, aes_key); + + /* GCM starts with 2 as counter, 1 is used for final xor of tag. */ + aes_counter[AES_BLOCK_LEN - 1] = 2; + + memset(Xi.c, 0, sizeof(Xi.c)); + memset(block, 0, sizeof(block)); + memcpy(block, authdata, min(authdatalen, sizeof(block))); + gcm_ghash_v8(Xi.u, Htable, block, AES_BLOCK_LEN); + + from64 = (const uint64_t*)from; + to64 = (uint64_t*)to; + trailer = len % AES_BLOCK_LEN; + + for (i = 0; i < (len - trailer); i += AES_BLOCK_LEN) { + aes_v8_encrypt(aes_counter, EKi.c, aes_key); + AES_INC_COUNTER(aes_counter); + to64[0] = from64[0] ^ EKi.u[0]; + to64[1] = from64[1] ^ EKi.u[1]; + gcm_ghash_v8(Xi.u, Htable, (uint8_t*)to64, AES_BLOCK_LEN); + + to64 += 2; + from64 += 2; + } + + to += (len - trailer); + from += (len - trailer); + + if (trailer) { + aes_v8_encrypt(aes_counter, EKi.c, aes_key); + AES_INC_COUNTER(aes_counter); + for (i = 0; i < trailer; i++) { + block[i] = to[i] = from[i] ^ EKi.c[i % AES_BLOCK_LEN]; + } + + for (; i < AES_BLOCK_LEN; i++) + block[i] = 0; + + gcm_ghash_v8(Xi.u, Htable, block, AES_BLOCK_LEN); + } + + /* Lengths block */ + lenblock.u[0] = lenblock.u[1] = 0; + lenblock.d[1] = htobe32(authdatalen * 8); + lenblock.d[3] = htobe32(len * 8); + gcm_ghash_v8(Xi.u, Htable, lenblock.c, AES_BLOCK_LEN); + + Xi.u[0] ^= EK0.u[0]; + Xi.u[1] ^= EK0.u[1]; + memcpy(tag, Xi.c, GMAC_DIGEST_LEN); + + explicit_bzero(aes_counter, sizeof(aes_counter)); + explicit_bzero(Xi.c, sizeof(Xi.c)); + explicit_bzero(EK0.c, sizeof(EK0.c)); + explicit_bzero(EKi.c, sizeof(EKi.c)); + explicit_bzero(lenblock.c, sizeof(lenblock.c)); +} + +int +armv8_aes_decrypt_gcm(AES_key_t *aes_key, size_t len, + const uint8_t *from, uint8_t *to, + size_t authdatalen, const uint8_t *authdata, + const uint8_t tag[static GMAC_DIGEST_LEN], + const uint8_t iv[static AES_GCM_IV_LEN], + const __uint128_val_t *Htable) +{ + size_t i; + const uint64_t *from64; + uint64_t *to64; + uint8_t aes_counter[AES_BLOCK_LEN]; + uint8_t block[AES_BLOCK_LEN]; + size_t trailer; + __uint128_val_t EK0, EKi, Xi, lenblock; + int error; + + error = 0; + bzero(&aes_counter, AES_BLOCK_LEN); + memcpy(aes_counter, iv, AES_GCM_IV_LEN); + + /* Setup the counter */ + aes_counter[AES_BLOCK_LEN - 1] = 1; + + /* EK0 for a final GMAC round */ + aes_v8_encrypt(aes_counter, EK0.c, aes_key); + + memset(Xi.c, 0, sizeof(Xi.c)); + memset(block, 0, sizeof(block)); + memcpy(block, authdata, min(authdatalen, sizeof(block))); + gcm_ghash_v8(Xi.u, Htable, block, AES_BLOCK_LEN); + trailer = len % AES_BLOCK_LEN; + gcm_ghash_v8(Xi.u, Htable, from, len - trailer); + + if (trailer) { + for (i = 0; i < trailer; i++) + block[i] = from[len - trailer + i]; + for (; i < AES_BLOCK_LEN; i++) + block[i] = 0; + gcm_ghash_v8(Xi.u, Htable, block, AES_BLOCK_LEN); + } + + /* Lengths block */ + lenblock.u[0] = lenblock.u[1] = 0; + lenblock.d[1] = htobe32(authdatalen * 8); + lenblock.d[3] = htobe32(len * 8); + gcm_ghash_v8(Xi.u, Htable, lenblock.c, AES_BLOCK_LEN); + + Xi.u[0] ^= EK0.u[0]; + Xi.u[1] ^= EK0.u[1]; + if (timingsafe_bcmp(tag, Xi.c, GMAC_DIGEST_LEN) != 0) { + error = EBADMSG; + goto out; + } + + /* GCM starts with 2 as counter, 1 is used for final xor of tag. */ + aes_counter[AES_BLOCK_LEN - 1] = 2; + + from64 = (const uint64_t*)from; + to64 = (uint64_t*)to; + + for (i = 0; i < (len - trailer); i += AES_BLOCK_LEN) { + aes_v8_encrypt(aes_counter, EKi.c, aes_key); + AES_INC_COUNTER(aes_counter); + to64[0] = from64[0] ^ EKi.u[0]; + to64[1] = from64[1] ^ EKi.u[1]; + to64 += 2; + from64 += 2; + } + + to += (len - trailer); + from += (len - trailer); + + if (trailer) { + aes_v8_encrypt(aes_counter, EKi.c, aes_key); + AES_INC_COUNTER(aes_counter); + for (i = 0; i < trailer; i++) + to[i] = from[i] ^ EKi.c[i % AES_BLOCK_LEN]; + } + +out: + explicit_bzero(aes_counter, sizeof(aes_counter)); + explicit_bzero(Xi.c, sizeof(Xi.c)); + explicit_bzero(EK0.c, sizeof(EK0.c)); + explicit_bzero(EKi.c, sizeof(EKi.c)); + explicit_bzero(lenblock.c, sizeof(lenblock.c)); + + return (error); } diff --git a/sys/modules/armv8crypto/Makefile b/sys/modules/armv8crypto/Makefile --- a/sys/modules/armv8crypto/Makefile +++ b/sys/modules/armv8crypto/Makefile @@ -1,12 +1,13 @@ # $FreeBSD$ .PATH: ${SRCTOP}/sys/crypto/armv8 +.PATH: ${SRCTOP}/sys/crypto/openssl/aarch64 KMOD= armv8crypto SRCS= armv8_crypto.c SRCS+= device_if.h bus_if.h opt_bus.h cryptodev_if.h -OBJS+= armv8_crypto_wrap.o +OBJS+= armv8_crypto_wrap.o aesv8-armx.o ghashv8-armx.o # Remove -nostdinc so we can get the intrinsics. armv8_crypto_wrap.o: armv8_crypto_wrap.c @@ -16,6 +17,22 @@ -march=armv8-a+crypto ${.IMPSRC} ${CTFCONVERT_CMD} +aesv8-armx.o: aesv8-armx.S + ${CC} -c ${CFLAGS:C/^-O2$/-O3/:N-nostdinc:N-mgeneral-regs-only} \ + -I${SRCTOP}/sys/crypto/armv8 \ + -I${SRCTOP}/sys/crypto/openssl/crypto \ + ${WERROR} ${PROF} \ + -march=armv8-a+crypto ${.IMPSRC} + ${CTFCONVERT_CMD} + +ghashv8-armx.o: ghashv8-armx.S + ${CC} -c ${CFLAGS:C/^-O2$/-O3/:N-nostdinc:N-mgeneral-regs-only} \ + -I${SRCTOP}/sys/crypto/armv8 \ + -I${SRCTOP}/sys/crypto/openssl/crypto \ + ${WERROR} ${PROF} \ + -march=armv8-a+crypto ${.IMPSRC} + ${CTFCONVERT_CMD} + armv8_crypto_wrap.o: armv8_crypto.h .include