diff --git a/sys/crypto/armv8/armv8_crypto.c b/sys/crypto/armv8/armv8_crypto.c index 0811a1c03390..18b0870f380b 100644 --- a/sys/crypto/armv8/armv8_crypto.c +++ b/sys/crypto/armv8/armv8_crypto.c @@ -1,545 +1,508 @@ /*- * Copyright (c) 2005-2008 Pawel Jakub Dawidek * Copyright (c) 2010 Konstantin Belousov * Copyright (c) 2014,2016 The FreeBSD Foundation * Copyright (c) 2020 Ampere Computing * All rights reserved. * * Portions of this software were developed by John-Mark Gurney * under sponsorship of the FreeBSD Foundation and * Rubicon Communications, LLC (Netgate). * * This software was developed by Andrew Turner under * sponsorship from the FreeBSD Foundation. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ /* * This is based on the aesni code. */ #include __FBSDID("$FreeBSD$"); #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include struct armv8_crypto_softc { int dieing; int32_t cid; struct rwlock lock; bool has_pmul; }; static struct mtx *ctx_mtx; static struct fpu_kern_ctx **ctx_vfp; #define AQUIRE_CTX(i, ctx) \ do { \ (i) = PCPU_GET(cpuid); \ mtx_lock(&ctx_mtx[(i)]); \ (ctx) = ctx_vfp[(i)]; \ } while (0) #define RELEASE_CTX(i, ctx) \ do { \ mtx_unlock(&ctx_mtx[(i)]); \ (i) = -1; \ (ctx) = NULL; \ } while (0) static int armv8_crypto_cipher_process(struct armv8_crypto_session *, struct cryptop *); MALLOC_DEFINE(M_ARMV8_CRYPTO, "armv8_crypto", "ARMv8 Crypto Data"); static void armv8_crypto_identify(driver_t *drv, device_t parent) { /* NB: order 10 is so we get attached after h/w devices */ if (device_find_child(parent, "armv8crypto", -1) == NULL && BUS_ADD_CHILD(parent, 10, "armv8crypto", -1) == 0) panic("ARMv8 crypto: could not attach"); } static int armv8_crypto_probe(device_t dev) { uint64_t reg; int ret = ENXIO; reg = READ_SPECIALREG(id_aa64isar0_el1); switch (ID_AA64ISAR0_AES_VAL(reg)) { case ID_AA64ISAR0_AES_BASE: ret = 0; device_set_desc(dev, "AES-CBC,AES-XTS"); break; case ID_AA64ISAR0_AES_PMULL: ret = 0; device_set_desc(dev, "AES-CBC,AES-XTS,AES-GCM"); break; default: break; case ID_AA64ISAR0_AES_NONE: device_printf(dev, "CPU lacks AES instructions\n"); break; } /* TODO: Check more fields as we support more features */ return (ret); } static int armv8_crypto_attach(device_t dev) { struct armv8_crypto_softc *sc; uint64_t reg; int i; sc = device_get_softc(dev); sc->dieing = 0; reg = READ_SPECIALREG(id_aa64isar0_el1); if (ID_AA64ISAR0_AES_VAL(reg) == ID_AA64ISAR0_AES_PMULL) sc->has_pmul = true; sc->cid = crypto_get_driverid(dev, sizeof(struct armv8_crypto_session), CRYPTOCAP_F_SOFTWARE | CRYPTOCAP_F_SYNC | CRYPTOCAP_F_ACCEL_SOFTWARE); if (sc->cid < 0) { device_printf(dev, "Could not get crypto driver id.\n"); return (ENOMEM); } rw_init(&sc->lock, "armv8crypto"); ctx_mtx = malloc(sizeof(*ctx_mtx) * (mp_maxid + 1), M_ARMV8_CRYPTO, M_WAITOK|M_ZERO); ctx_vfp = malloc(sizeof(*ctx_vfp) * (mp_maxid + 1), M_ARMV8_CRYPTO, M_WAITOK|M_ZERO); CPU_FOREACH(i) { ctx_vfp[i] = fpu_kern_alloc_ctx(0); mtx_init(&ctx_mtx[i], "armv8cryptoctx", NULL, MTX_DEF|MTX_NEW); } return (0); } static int armv8_crypto_detach(device_t dev) { struct armv8_crypto_softc *sc; int i; sc = device_get_softc(dev); rw_wlock(&sc->lock); sc->dieing = 1; rw_wunlock(&sc->lock); crypto_unregister_all(sc->cid); rw_destroy(&sc->lock); CPU_FOREACH(i) { if (ctx_vfp[i] != NULL) { mtx_destroy(&ctx_mtx[i]); fpu_kern_free_ctx(ctx_vfp[i]); } ctx_vfp[i] = NULL; } free(ctx_mtx, M_ARMV8_CRYPTO); ctx_mtx = NULL; free(ctx_vfp, M_ARMV8_CRYPTO); ctx_vfp = NULL; return (0); } #define SUPPORTED_SES (CSP_F_SEPARATE_OUTPUT | CSP_F_SEPARATE_AAD) static int armv8_crypto_probesession(device_t dev, const struct crypto_session_params *csp) { struct armv8_crypto_softc *sc; sc = device_get_softc(dev); if ((csp->csp_flags & ~(SUPPORTED_SES)) != 0) return (EINVAL); switch (csp->csp_mode) { case CSP_MODE_AEAD: switch (csp->csp_cipher_alg) { case CRYPTO_AES_NIST_GCM_16: if (!sc->has_pmul) return (EINVAL); if (csp->csp_auth_mlen != 0 && csp->csp_auth_mlen != GMAC_DIGEST_LEN) return (EINVAL); switch (csp->csp_cipher_klen * 8) { case 128: case 192: case 256: break; default: return (EINVAL); } break; default: return (EINVAL); } break; case CSP_MODE_CIPHER: switch (csp->csp_cipher_alg) { case CRYPTO_AES_CBC: if (csp->csp_ivlen != AES_BLOCK_LEN) return (EINVAL); switch (csp->csp_cipher_klen * 8) { case 128: case 192: case 256: break; default: return (EINVAL); } break; case CRYPTO_AES_XTS: if (csp->csp_ivlen != AES_XTS_IV_LEN) return (EINVAL); switch (csp->csp_cipher_klen * 8) { case 256: case 512: break; default: return (EINVAL); } break; default: return (EINVAL); } break; default: return (EINVAL); } return (CRYPTODEV_PROBE_ACCEL_SOFTWARE); } static int armv8_crypto_cipher_setup(struct armv8_crypto_session *ses, const struct crypto_session_params *csp, const uint8_t *key, int keylen) { __uint128_val_t H; struct fpu_kern_ctx *ctx; int kt, i; if (csp->csp_cipher_alg == CRYPTO_AES_XTS) keylen /= 2; switch (keylen * 8) { case 128: case 192: case 256: break; default: return (EINVAL); } kt = is_fpu_kern_thread(0); if (!kt) { AQUIRE_CTX(i, ctx); fpu_kern_enter(curthread, ctx, FPU_KERN_NORMAL | FPU_KERN_KTHR); } aes_v8_set_encrypt_key(key, keylen * 8, &ses->enc_schedule); if ((csp->csp_cipher_alg == CRYPTO_AES_XTS) || (csp->csp_cipher_alg == CRYPTO_AES_CBC)) aes_v8_set_decrypt_key(key, keylen * 8, &ses->dec_schedule); if (csp->csp_cipher_alg == CRYPTO_AES_XTS) aes_v8_set_encrypt_key(key + keylen, keylen * 8, &ses->xts_schedule); if (csp->csp_cipher_alg == CRYPTO_AES_NIST_GCM_16) { memset(H.c, 0, sizeof(H.c)); aes_v8_encrypt(H.c, H.c, &ses->enc_schedule); H.u[0] = bswap64(H.u[0]); H.u[1] = bswap64(H.u[1]); gcm_init_v8(ses->Htable, H.u); } if (!kt) { fpu_kern_leave(curthread, ctx); RELEASE_CTX(i, ctx); } return (0); } static int armv8_crypto_newsession(device_t dev, crypto_session_t cses, const struct crypto_session_params *csp) { struct armv8_crypto_softc *sc; struct armv8_crypto_session *ses; int error; sc = device_get_softc(dev); rw_wlock(&sc->lock); if (sc->dieing) { rw_wunlock(&sc->lock); return (EINVAL); } ses = crypto_get_driver_session(cses); error = armv8_crypto_cipher_setup(ses, csp, csp->csp_cipher_key, csp->csp_cipher_klen); rw_wunlock(&sc->lock); return (error); } static int armv8_crypto_process(device_t dev, struct cryptop *crp, int hint __unused) { struct armv8_crypto_session *ses; ses = crypto_get_driver_session(crp->crp_session); crp->crp_etype = armv8_crypto_cipher_process(ses, crp); crypto_done(crp); return (0); } static uint8_t * armv8_crypto_cipher_alloc(struct cryptop *crp, int start, int length, int *allocated) { uint8_t *addr; addr = crypto_contiguous_subsegment(crp, start, length); if (addr != NULL) { *allocated = 0; return (addr); } addr = malloc(crp->crp_payload_length, M_ARMV8_CRYPTO, M_NOWAIT); if (addr != NULL) { *allocated = 1; crypto_copydata(crp, start, length, addr); } else *allocated = 0; return (addr); } static int armv8_crypto_cipher_process(struct armv8_crypto_session *ses, struct cryptop *crp) { + struct crypto_buffer_cursor fromc, toc; const struct crypto_session_params *csp; struct fpu_kern_ctx *ctx; - uint8_t *buf, *authbuf, *outbuf; + uint8_t *authbuf; uint8_t iv[AES_BLOCK_LEN], tag[GMAC_DIGEST_LEN]; - int allocated, authallocated, outallocated, i; + int authallocated, i; int encflag; int kt; int error; - bool outcopy; csp = crypto_get_params(crp->crp_session); encflag = CRYPTO_OP_IS_ENCRYPT(crp->crp_op); - allocated = 0; - outallocated = 0; authallocated = 0; authbuf = NULL; kt = 1; - buf = armv8_crypto_cipher_alloc(crp, crp->crp_payload_start, - crp->crp_payload_length, &allocated); - if (buf == NULL) - return (ENOMEM); - if (csp->csp_cipher_alg == CRYPTO_AES_NIST_GCM_16) { if (crp->crp_aad != NULL) authbuf = crp->crp_aad; else authbuf = armv8_crypto_cipher_alloc(crp, crp->crp_aad_start, crp->crp_aad_length, &authallocated); if (authbuf == NULL) { error = ENOMEM; goto out; } } - + crypto_cursor_init(&fromc, &crp->crp_buf); + crypto_cursor_advance(&fromc, crp->crp_payload_start); if (CRYPTO_HAS_OUTPUT_BUFFER(crp)) { - outbuf = crypto_buffer_contiguous_subsegment(&crp->crp_obuf, - crp->crp_payload_output_start, crp->crp_payload_length); - if (outbuf == NULL) { - outcopy = true; - if (allocated) - outbuf = buf; - else { - outbuf = malloc(crp->crp_payload_length, - M_ARMV8_CRYPTO, M_NOWAIT); - if (outbuf == NULL) { - error = ENOMEM; - goto out; - } - outallocated = true; - } - } else - outcopy = false; + crypto_cursor_init(&toc, &crp->crp_obuf); + crypto_cursor_advance(&toc, crp->crp_payload_output_start); } else { - outbuf = buf; - outcopy = allocated; + crypto_cursor_copy(&fromc, &toc); } kt = is_fpu_kern_thread(0); if (!kt) { AQUIRE_CTX(i, ctx); fpu_kern_enter(curthread, ctx, FPU_KERN_NORMAL | FPU_KERN_KTHR); } if (crp->crp_cipher_key != NULL) { armv8_crypto_cipher_setup(ses, csp, crp->crp_cipher_key, csp->csp_cipher_klen); } crypto_read_iv(crp, iv); - /* Do work */ switch (csp->csp_cipher_alg) { case CRYPTO_AES_CBC: if ((crp->crp_payload_length % AES_BLOCK_LEN) != 0) { error = EINVAL; goto out; } if (encflag) armv8_aes_encrypt_cbc(&ses->enc_schedule, - crp->crp_payload_length, buf, buf, iv); + crp->crp_payload_length, &fromc, &toc, iv); else armv8_aes_decrypt_cbc(&ses->dec_schedule, - crp->crp_payload_length, buf, iv); + crp->crp_payload_length, &fromc, &toc, iv); break; case CRYPTO_AES_XTS: if (encflag) armv8_aes_encrypt_xts(&ses->enc_schedule, - &ses->xts_schedule.aes_key, crp->crp_payload_length, buf, - buf, iv); + &ses->xts_schedule.aes_key, crp->crp_payload_length, + &fromc, &toc, iv); else armv8_aes_decrypt_xts(&ses->dec_schedule, - &ses->xts_schedule.aes_key, crp->crp_payload_length, buf, - buf, iv); + &ses->xts_schedule.aes_key, crp->crp_payload_length, + &fromc, &toc, iv); break; case CRYPTO_AES_NIST_GCM_16: if (encflag) { memset(tag, 0, sizeof(tag)); armv8_aes_encrypt_gcm(&ses->enc_schedule, - crp->crp_payload_length, - buf, outbuf, - crp->crp_aad_length, authbuf, - tag, iv, ses->Htable); + crp->crp_payload_length, &fromc, &toc, + crp->crp_aad_length, authbuf, tag, iv, ses->Htable); crypto_copyback(crp, crp->crp_digest_start, sizeof(tag), tag); } else { crypto_copydata(crp, crp->crp_digest_start, sizeof(tag), tag); - if (armv8_aes_decrypt_gcm(&ses->enc_schedule, - crp->crp_payload_length, - buf, outbuf, - crp->crp_aad_length, authbuf, - tag, iv, ses->Htable) != 0) { - error = EBADMSG; + error = armv8_aes_decrypt_gcm(&ses->enc_schedule, + crp->crp_payload_length, &fromc, &toc, + crp->crp_aad_length, authbuf, tag, iv, ses->Htable); + if (error != 0) goto out; - } } break; } - if (outcopy) - crypto_copyback(crp, CRYPTO_HAS_OUTPUT_BUFFER(crp) ? - crp->crp_payload_output_start : crp->crp_payload_start, - crp->crp_payload_length, outbuf); - error = 0; out: if (!kt) { fpu_kern_leave(curthread, ctx); RELEASE_CTX(i, ctx); } - if (allocated) - zfree(buf, M_ARMV8_CRYPTO); if (authallocated) zfree(authbuf, M_ARMV8_CRYPTO); - if (outallocated) - zfree(outbuf, M_ARMV8_CRYPTO); explicit_bzero(iv, sizeof(iv)); explicit_bzero(tag, sizeof(tag)); return (error); } static device_method_t armv8_crypto_methods[] = { DEVMETHOD(device_identify, armv8_crypto_identify), DEVMETHOD(device_probe, armv8_crypto_probe), DEVMETHOD(device_attach, armv8_crypto_attach), DEVMETHOD(device_detach, armv8_crypto_detach), DEVMETHOD(cryptodev_probesession, armv8_crypto_probesession), DEVMETHOD(cryptodev_newsession, armv8_crypto_newsession), DEVMETHOD(cryptodev_process, armv8_crypto_process), DEVMETHOD_END, }; static DEFINE_CLASS_0(armv8crypto, armv8_crypto_driver, armv8_crypto_methods, sizeof(struct armv8_crypto_softc)); static devclass_t armv8_crypto_devclass; DRIVER_MODULE(armv8crypto, nexus, armv8_crypto_driver, armv8_crypto_devclass, 0, 0); diff --git a/sys/crypto/armv8/armv8_crypto.h b/sys/crypto/armv8/armv8_crypto.h index 855aabd8bac3..a978248703fe 100644 --- a/sys/crypto/armv8/armv8_crypto.h +++ b/sys/crypto/armv8/armv8_crypto.h @@ -1,86 +1,91 @@ /*- * Copyright (c) 2016 The FreeBSD Foundation * * This software was developed by Andrew Turner under * sponsorship from the FreeBSD Foundation. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * $FreeBSD$ */ #ifndef _ARMV8_CRYPTO_H_ #define _ARMV8_CRYPTO_H_ #define AES256_ROUNDS 14 #define AES_SCHED_LEN ((AES256_ROUNDS + 1) * AES_BLOCK_LEN) typedef struct { uint32_t aes_key[AES_SCHED_LEN/4]; int aes_rounds; } AES_key_t; typedef union { uint64_t u[2]; uint32_t d[4]; uint8_t c[16]; size_t t[16 / sizeof(size_t)]; } __uint128_val_t; struct armv8_crypto_session { AES_key_t enc_schedule; AES_key_t dec_schedule; AES_key_t xts_schedule; __uint128_val_t Htable[16]; }; /* Prototypes for aesv8-armx.S */ void aes_v8_encrypt(uint8_t *in, uint8_t *out, const AES_key_t *key); int aes_v8_set_encrypt_key(const unsigned char *userKey, const int bits, const AES_key_t *key); int aes_v8_set_decrypt_key(const unsigned char *userKey, const int bits, const AES_key_t *key); /* Prototypes for ghashv8-armx.S */ void gcm_init_v8(__uint128_val_t Htable[16], const uint64_t Xi[2]); void gcm_gmult_v8(uint64_t Xi[2], const __uint128_val_t Htable[16]); void gcm_ghash_v8(uint64_t Xi[2], const __uint128_val_t Htable[16], const uint8_t *inp, size_t len); -void armv8_aes_encrypt_cbc(const AES_key_t *, size_t, const uint8_t *, - uint8_t *, const uint8_t[static AES_BLOCK_LEN]); -void armv8_aes_decrypt_cbc(const AES_key_t *, size_t, uint8_t *, +void armv8_aes_encrypt_cbc(const AES_key_t *key, size_t len, + struct crypto_buffer_cursor *fromc, struct crypto_buffer_cursor *toc, + const uint8_t iv[static AES_BLOCK_LEN]); +void armv8_aes_decrypt_cbc(const AES_key_t *, size_t, + struct crypto_buffer_cursor *fromc, struct crypto_buffer_cursor *toc, const uint8_t[static AES_BLOCK_LEN]); -void armv8_aes_encrypt_gcm(AES_key_t *, size_t, const uint8_t *, - uint8_t *, size_t, const uint8_t*, +void armv8_aes_encrypt_gcm(AES_key_t *, size_t, + struct crypto_buffer_cursor *, struct crypto_buffer_cursor *, + size_t, const uint8_t *, uint8_t tag[static GMAC_DIGEST_LEN], const uint8_t[static AES_BLOCK_LEN], const __uint128_val_t *); -int armv8_aes_decrypt_gcm(AES_key_t *, size_t, const uint8_t *, - uint8_t *, size_t, const uint8_t*, - const uint8_t tag[static GMAC_DIGEST_LEN], +int armv8_aes_decrypt_gcm(AES_key_t *, size_t, + struct crypto_buffer_cursor *, struct crypto_buffer_cursor *, + size_t, const uint8_t *, const uint8_t tag[static GMAC_DIGEST_LEN], const uint8_t[static AES_BLOCK_LEN], const __uint128_val_t *); void armv8_aes_encrypt_xts(AES_key_t *, const void *, size_t, - const uint8_t *, uint8_t *, const uint8_t[AES_BLOCK_LEN]); + struct crypto_buffer_cursor *, struct crypto_buffer_cursor *, + const uint8_t[AES_BLOCK_LEN]); void armv8_aes_decrypt_xts(AES_key_t *, const void *, size_t, - const uint8_t *, uint8_t *, const uint8_t[AES_BLOCK_LEN]); + struct crypto_buffer_cursor *, struct crypto_buffer_cursor *, + const uint8_t[AES_BLOCK_LEN]); #endif /* _ARMV8_CRYPTO_H_ */ diff --git a/sys/crypto/armv8/armv8_crypto_wrap.c b/sys/crypto/armv8/armv8_crypto_wrap.c index b5aee0cc1cf6..8c489978eaaa 100644 --- a/sys/crypto/armv8/armv8_crypto_wrap.c +++ b/sys/crypto/armv8/armv8_crypto_wrap.c @@ -1,410 +1,537 @@ /*- * Copyright (c) 2016 The FreeBSD Foundation * Copyright (c) 2020 Ampere Computing * All rights reserved. * * This software was developed by Andrew Turner under * sponsorship from the FreeBSD Foundation. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * This file is derived from aesni_wrap.c: * Copyright (C) 2008 Damien Miller * Copyright (c) 2010 Konstantin Belousov * Copyright (c) 2010-2011 Pawel Jakub Dawidek * Copyright 2012-2013 John-Mark Gurney * Copyright (c) 2014 The FreeBSD Foundation */ /* * This code is built with floating-point enabled. Make sure to have entered * into floating-point context before calling any of these functions. */ #include __FBSDID("$FreeBSD$"); #include #include #include #include #include #include #include #include #include static uint8x16_t armv8_aes_enc(int rounds, const uint8x16_t *keysched, const uint8x16_t from) { uint8x16_t tmp; int i; tmp = from; for (i = 0; i < rounds - 1; i += 2) { tmp = vaeseq_u8(tmp, keysched[i]); tmp = vaesmcq_u8(tmp); tmp = vaeseq_u8(tmp, keysched[i + 1]); tmp = vaesmcq_u8(tmp); } tmp = vaeseq_u8(tmp, keysched[rounds - 1]); tmp = vaesmcq_u8(tmp); tmp = vaeseq_u8(tmp, keysched[rounds]); tmp = veorq_u8(tmp, keysched[rounds + 1]); return (tmp); } static uint8x16_t armv8_aes_dec(int rounds, const uint8x16_t *keysched, const uint8x16_t from) { uint8x16_t tmp; int i; tmp = from; for (i = 0; i < rounds - 1; i += 2) { tmp = vaesdq_u8(tmp, keysched[i]); tmp = vaesimcq_u8(tmp); tmp = vaesdq_u8(tmp, keysched[i+1]); tmp = vaesimcq_u8(tmp); } tmp = vaesdq_u8(tmp, keysched[rounds - 1]); tmp = vaesimcq_u8(tmp); tmp = vaesdq_u8(tmp, keysched[rounds]); tmp = veorq_u8(tmp, keysched[rounds + 1]); return (tmp); } void armv8_aes_encrypt_cbc(const AES_key_t *key, size_t len, - const uint8_t *from, uint8_t *to, const uint8_t iv[static AES_BLOCK_LEN]) + struct crypto_buffer_cursor *fromc, struct crypto_buffer_cursor *toc, + const uint8_t iv[static AES_BLOCK_LEN]) { uint8x16_t tot, ivreg, tmp; - size_t i; + uint8_t block[AES_BLOCK_LEN], *from, *to; + size_t fromseglen, oseglen, seglen, toseglen; + + KASSERT(len % AES_BLOCK_LEN == 0, + ("%s: length %zu not a multiple of the block size", __func__, len)); - len /= AES_BLOCK_LEN; ivreg = vld1q_u8(iv); - for (i = 0; i < len; i++) { - tmp = vld1q_u8(from); - tot = armv8_aes_enc(key->aes_rounds - 1, - (const void*)key->aes_key, veorq_u8(tmp, ivreg)); - ivreg = tot; - vst1q_u8(to, tot); - from += AES_BLOCK_LEN; - to += AES_BLOCK_LEN; + for (; len > 0; len -= seglen) { + from = crypto_cursor_segment(fromc, &fromseglen); + to = crypto_cursor_segment(toc, &toseglen); + + seglen = ulmin(len, ulmin(fromseglen, toseglen)); + if (seglen < AES_BLOCK_LEN) { + crypto_cursor_copydata(fromc, AES_BLOCK_LEN, block); + tmp = vld1q_u8(block); + tot = armv8_aes_enc(key->aes_rounds - 1, + (const void *)key->aes_key, veorq_u8(tmp, ivreg)); + ivreg = tot; + vst1q_u8(block, tot); + crypto_cursor_copyback(toc, AES_BLOCK_LEN, block); + seglen = AES_BLOCK_LEN; + } else { + for (oseglen = seglen; seglen >= AES_BLOCK_LEN; + seglen -= AES_BLOCK_LEN) { + tmp = vld1q_u8(from); + tot = armv8_aes_enc(key->aes_rounds - 1, + (const void *)key->aes_key, + veorq_u8(tmp, ivreg)); + ivreg = tot; + vst1q_u8(to, tot); + from += AES_BLOCK_LEN; + to += AES_BLOCK_LEN; + } + seglen = oseglen - seglen; + crypto_cursor_advance(fromc, seglen); + crypto_cursor_advance(toc, seglen); + } } + + explicit_bzero(block, sizeof(block)); } void armv8_aes_decrypt_cbc(const AES_key_t *key, size_t len, - uint8_t *buf, const uint8_t iv[static AES_BLOCK_LEN]) + struct crypto_buffer_cursor *fromc, struct crypto_buffer_cursor *toc, + const uint8_t iv[static AES_BLOCK_LEN]) { uint8x16_t ivreg, nextiv, tmp; - size_t i; + uint8_t block[AES_BLOCK_LEN], *from, *to; + size_t fromseglen, oseglen, seglen, toseglen; + + KASSERT(len % AES_BLOCK_LEN == 0, + ("%s: length %zu not a multiple of the block size", __func__, len)); - len /= AES_BLOCK_LEN; ivreg = vld1q_u8(iv); - for (i = 0; i < len; i++) { - nextiv = vld1q_u8(buf); - tmp = armv8_aes_dec(key->aes_rounds - 1, - (const void*)key->aes_key, nextiv); - vst1q_u8(buf, veorq_u8(tmp, ivreg)); - ivreg = nextiv; - buf += AES_BLOCK_LEN; + for (; len > 0; len -= seglen) { + from = crypto_cursor_segment(fromc, &fromseglen); + to = crypto_cursor_segment(toc, &toseglen); + + seglen = ulmin(len, ulmin(fromseglen, toseglen)); + if (seglen < AES_BLOCK_LEN) { + crypto_cursor_copydata(fromc, AES_BLOCK_LEN, block); + nextiv = vld1q_u8(block); + tmp = armv8_aes_dec(key->aes_rounds - 1, + (const void *)key->aes_key, nextiv); + vst1q_u8(block, veorq_u8(tmp, ivreg)); + ivreg = nextiv; + crypto_cursor_copyback(toc, AES_BLOCK_LEN, block); + seglen = AES_BLOCK_LEN; + } else { + for (oseglen = seglen; seglen >= AES_BLOCK_LEN; + seglen -= AES_BLOCK_LEN) { + nextiv = vld1q_u8(from); + tmp = armv8_aes_dec(key->aes_rounds - 1, + (const void *)key->aes_key, nextiv); + vst1q_u8(to, veorq_u8(tmp, ivreg)); + ivreg = nextiv; + from += AES_BLOCK_LEN; + to += AES_BLOCK_LEN; + } + crypto_cursor_advance(fromc, oseglen - seglen); + crypto_cursor_advance(toc, oseglen - seglen); + seglen = oseglen - seglen; + } } + + explicit_bzero(block, sizeof(block)); } #define AES_XTS_BLOCKSIZE 16 #define AES_XTS_IVSIZE 8 #define AES_XTS_ALPHA 0x87 /* GF(2^128) generator polynomial */ static inline int32x4_t xts_crank_lfsr(int32x4_t inp) { const int32x4_t alphamask = {AES_XTS_ALPHA, 1, 1, 1}; int32x4_t xtweak, ret; /* set up xor mask */ xtweak = vextq_s32(inp, inp, 3); xtweak = vshrq_n_s32(xtweak, 31); xtweak &= alphamask; /* next term */ ret = vshlq_n_s32(inp, 1); ret ^= xtweak; return ret; } static void armv8_aes_crypt_xts_block(int rounds, const uint8x16_t *key_schedule, uint8x16_t *tweak, const uint8_t *from, uint8_t *to, int do_encrypt) { uint8x16_t block; block = vld1q_u8(from) ^ *tweak; if (do_encrypt) block = armv8_aes_enc(rounds - 1, key_schedule, block); else block = armv8_aes_dec(rounds - 1, key_schedule, block); vst1q_u8(to, block ^ *tweak); *tweak = vreinterpretq_u8_s32(xts_crank_lfsr(vreinterpretq_s32_u8(*tweak))); } static void armv8_aes_crypt_xts(int rounds, const uint8x16_t *data_schedule, - const uint8x16_t *tweak_schedule, size_t len, const uint8_t *from, - uint8_t *to, const uint8_t iv[static AES_BLOCK_LEN], int do_encrypt) + const uint8x16_t *tweak_schedule, size_t len, + struct crypto_buffer_cursor *fromc, struct crypto_buffer_cursor *toc, + const uint8_t iv[static AES_BLOCK_LEN], int do_encrypt) { uint8x16_t tweakreg; + uint8_t block[AES_XTS_BLOCKSIZE] __aligned(16); uint8_t tweak[AES_XTS_BLOCKSIZE] __aligned(16); - size_t i, cnt; + uint8_t *from, *to; + size_t fromseglen, oseglen, seglen, toseglen; + + KASSERT(len % AES_XTS_BLOCKSIZE == 0, + ("%s: length %zu not a multiple of the block size", __func__, len)); /* * Prepare tweak as E_k2(IV). IV is specified as LE representation * of a 64-bit block number which we allow to be passed in directly. */ #if BYTE_ORDER == LITTLE_ENDIAN bcopy(iv, tweak, AES_XTS_IVSIZE); /* Last 64 bits of IV are always zero. */ bzero(tweak + AES_XTS_IVSIZE, AES_XTS_IVSIZE); #else #error Only LITTLE_ENDIAN architectures are supported. #endif tweakreg = vld1q_u8(tweak); tweakreg = armv8_aes_enc(rounds - 1, tweak_schedule, tweakreg); - cnt = len / AES_XTS_BLOCKSIZE; - for (i = 0; i < cnt; i++) { - armv8_aes_crypt_xts_block(rounds, data_schedule, &tweakreg, - from, to, do_encrypt); - from += AES_XTS_BLOCKSIZE; - to += AES_XTS_BLOCKSIZE; + for (; len > 0; len -= seglen) { + from = crypto_cursor_segment(fromc, &fromseglen); + to = crypto_cursor_segment(toc, &toseglen); + + seglen = ulmin(len, ulmin(fromseglen, toseglen)); + if (seglen < AES_XTS_BLOCKSIZE) { + printf("%d seglen %zu\n", __LINE__, seglen); + crypto_cursor_copydata(fromc, AES_XTS_BLOCKSIZE, block); + armv8_aes_crypt_xts_block(rounds, data_schedule, + &tweakreg, block, block, do_encrypt); + crypto_cursor_copyback(toc, AES_XTS_BLOCKSIZE, block); + seglen = AES_XTS_BLOCKSIZE; + } else { + printf("%d seglen %zu\n", __LINE__, seglen); + for (oseglen = seglen; seglen >= AES_XTS_BLOCKSIZE; + seglen -= AES_XTS_BLOCKSIZE) { + armv8_aes_crypt_xts_block(rounds, data_schedule, + &tweakreg, from, to, do_encrypt); + from += AES_XTS_BLOCKSIZE; + to += AES_XTS_BLOCKSIZE; + } + seglen = oseglen - seglen; + crypto_cursor_advance(fromc, seglen); + crypto_cursor_advance(toc, seglen); + } } + + explicit_bzero(block, sizeof(block)); } void armv8_aes_encrypt_xts(AES_key_t *data_schedule, - const void *tweak_schedule, size_t len, const uint8_t *from, uint8_t *to, - const uint8_t iv[static AES_BLOCK_LEN]) + const void *tweak_schedule, size_t len, struct crypto_buffer_cursor *fromc, + struct crypto_buffer_cursor *toc, const uint8_t iv[static AES_BLOCK_LEN]) { - armv8_aes_crypt_xts(data_schedule->aes_rounds, - (const void *)&data_schedule->aes_key, tweak_schedule, len, from, - to, iv, 1); + (const void *)&data_schedule->aes_key, tweak_schedule, len, fromc, + toc, iv, 1); } void armv8_aes_decrypt_xts(AES_key_t *data_schedule, - const void *tweak_schedule, size_t len, const uint8_t *from, uint8_t *to, + const void *tweak_schedule, size_t len, + struct crypto_buffer_cursor *fromc, struct crypto_buffer_cursor *toc, const uint8_t iv[static AES_BLOCK_LEN]) { - armv8_aes_crypt_xts(data_schedule->aes_rounds, - (const void *)&data_schedule->aes_key, tweak_schedule, len, from, - to,iv, 0); + (const void *)&data_schedule->aes_key, tweak_schedule, len, fromc, + toc, iv, 0); } - #define AES_INC_COUNTER(counter) \ do { \ for (int pos = AES_BLOCK_LEN - 1; \ pos >= 0; pos--) \ if (++(counter)[pos]) \ break; \ } while (0) struct armv8_gcm_state { __uint128_val_t EK0; __uint128_val_t EKi; __uint128_val_t Xi; __uint128_val_t lenblock; uint8_t aes_counter[AES_BLOCK_LEN]; }; static void armv8_aes_gmac_setup(struct armv8_gcm_state *s, AES_key_t *aes_key, const uint8_t *authdata, size_t authdatalen, const uint8_t iv[static AES_GCM_IV_LEN], const __uint128_val_t *Htable) { uint8_t block[AES_BLOCK_LEN]; size_t trailer; bzero(s->aes_counter, AES_BLOCK_LEN); memcpy(s->aes_counter, iv, AES_GCM_IV_LEN); /* Setup the counter */ s->aes_counter[AES_BLOCK_LEN - 1] = 1; /* EK0 for a final GMAC round */ aes_v8_encrypt(s->aes_counter, s->EK0.c, aes_key); /* GCM starts with 2 as counter, 1 is used for final xor of tag. */ s->aes_counter[AES_BLOCK_LEN - 1] = 2; memset(s->Xi.c, 0, sizeof(s->Xi.c)); trailer = authdatalen % AES_BLOCK_LEN; if (authdatalen - trailer > 0) { gcm_ghash_v8(s->Xi.u, Htable, authdata, authdatalen - trailer); authdata += authdatalen - trailer; } if (trailer > 0 || authdatalen == 0) { memset(block, 0, sizeof(block)); memcpy(block, authdata, trailer); gcm_ghash_v8(s->Xi.u, Htable, block, AES_BLOCK_LEN); } } static void armv8_aes_gmac_finish(struct armv8_gcm_state *s, size_t len, size_t authdatalen, const __uint128_val_t *Htable) { /* Lengths block */ s->lenblock.u[0] = s->lenblock.u[1] = 0; s->lenblock.d[1] = htobe32(authdatalen * 8); s->lenblock.d[3] = htobe32(len * 8); gcm_ghash_v8(s->Xi.u, Htable, s->lenblock.c, AES_BLOCK_LEN); s->Xi.u[0] ^= s->EK0.u[0]; s->Xi.u[1] ^= s->EK0.u[1]; } +static void +armv8_aes_encrypt_gcm_block(struct armv8_gcm_state *s, AES_key_t *aes_key, + const uint64_t *from, uint64_t *to) +{ + aes_v8_encrypt(s->aes_counter, s->EKi.c, aes_key); + AES_INC_COUNTER(s->aes_counter); + to[0] = from[0] ^ s->EKi.u[0]; + to[1] = from[1] ^ s->EKi.u[1]; +} + +static void +armv8_aes_decrypt_gcm_block(struct armv8_gcm_state *s, AES_key_t *aes_key, + const uint64_t *from, uint64_t *to) +{ + armv8_aes_encrypt_gcm_block(s, aes_key, from, to); +} + void armv8_aes_encrypt_gcm(AES_key_t *aes_key, size_t len, - const uint8_t *from, uint8_t *to, + struct crypto_buffer_cursor *fromc, struct crypto_buffer_cursor *toc, size_t authdatalen, const uint8_t *authdata, uint8_t tag[static GMAC_DIGEST_LEN], const uint8_t iv[static AES_GCM_IV_LEN], const __uint128_val_t *Htable) { struct armv8_gcm_state s; - const uint64_t *from64; - uint64_t *to64; - uint8_t block[AES_BLOCK_LEN]; - size_t i, trailer; + uint8_t block[AES_BLOCK_LEN] __aligned(AES_BLOCK_LEN); + uint64_t *from64, *to64; + size_t fromseglen, i, olen, oseglen, seglen, toseglen; armv8_aes_gmac_setup(&s, aes_key, authdata, authdatalen, iv, Htable); - from64 = (const uint64_t *)from; - to64 = (uint64_t *)to; - trailer = len % AES_BLOCK_LEN; - - for (i = 0; i < (len - trailer); i += AES_BLOCK_LEN) { - aes_v8_encrypt(s.aes_counter, s.EKi.c, aes_key); - AES_INC_COUNTER(s.aes_counter); - to64[0] = from64[0] ^ s.EKi.u[0]; - to64[1] = from64[1] ^ s.EKi.u[1]; - gcm_ghash_v8(s.Xi.u, Htable, (uint8_t*)to64, AES_BLOCK_LEN); - - to64 += 2; - from64 += 2; - } - - to += (len - trailer); - from += (len - trailer); - - if (trailer) { - aes_v8_encrypt(s.aes_counter, s.EKi.c, aes_key); - AES_INC_COUNTER(s.aes_counter); - memset(block, 0, sizeof(block)); - for (i = 0; i < trailer; i++) { - block[i] = to[i] = from[i] ^ s.EKi.c[i]; + for (olen = len; len > 0; len -= seglen) { + from64 = crypto_cursor_segment(fromc, &fromseglen); + to64 = crypto_cursor_segment(toc, &toseglen); + + seglen = ulmin(len, ulmin(fromseglen, toseglen)); + if (seglen < AES_BLOCK_LEN) { + seglen = ulmin(len, AES_BLOCK_LEN); + + memset(block, 0, sizeof(block)); + crypto_cursor_copydata(fromc, (int)seglen, block); + + if (seglen == AES_BLOCK_LEN) { + armv8_aes_encrypt_gcm_block(&s, aes_key, + (uint64_t *)block, (uint64_t *)block); + } else { + aes_v8_encrypt(s.aes_counter, s.EKi.c, aes_key); + AES_INC_COUNTER(s.aes_counter); + for (i = 0; i < seglen; i++) + block[i] ^= s.EKi.c[i]; + } + gcm_ghash_v8(s.Xi.u, Htable, block, seglen); + + crypto_cursor_copyback(toc, (int)seglen, block); + } else { + for (oseglen = seglen; seglen >= AES_BLOCK_LEN; + seglen -= AES_BLOCK_LEN) { + armv8_aes_encrypt_gcm_block(&s, aes_key, from64, + to64); + gcm_ghash_v8(s.Xi.u, Htable, (uint8_t *)to64, + AES_BLOCK_LEN); + + from64 += 2; + to64 += 2; + } + + seglen = oseglen - seglen; + crypto_cursor_advance(fromc, seglen); + crypto_cursor_advance(toc, seglen); } - - gcm_ghash_v8(s.Xi.u, Htable, block, AES_BLOCK_LEN); } - armv8_aes_gmac_finish(&s, len, authdatalen, Htable); + armv8_aes_gmac_finish(&s, olen, authdatalen, Htable); memcpy(tag, s.Xi.c, GMAC_DIGEST_LEN); + explicit_bzero(block, sizeof(block)); explicit_bzero(&s, sizeof(s)); } int armv8_aes_decrypt_gcm(AES_key_t *aes_key, size_t len, - const uint8_t *from, uint8_t *to, + struct crypto_buffer_cursor *fromc, struct crypto_buffer_cursor *toc, size_t authdatalen, const uint8_t *authdata, const uint8_t tag[static GMAC_DIGEST_LEN], const uint8_t iv[static AES_GCM_IV_LEN], const __uint128_val_t *Htable) { struct armv8_gcm_state s; - const uint64_t *from64; - uint64_t *to64; - uint8_t block[AES_BLOCK_LEN]; - size_t i, trailer; + struct crypto_buffer_cursor fromcc; + uint8_t block[AES_BLOCK_LEN] __aligned(AES_BLOCK_LEN), *from; + uint64_t *block64, *from64, *to64; + size_t fromseglen, olen, oseglen, seglen, toseglen; int error; - error = 0; - armv8_aes_gmac_setup(&s, aes_key, authdata, authdatalen, iv, Htable); - trailer = len % AES_BLOCK_LEN; - if (len - trailer > 0) - gcm_ghash_v8(s.Xi.u, Htable, from, len - trailer); - if (trailer > 0) { - memset(block, 0, sizeof(block)); - memcpy(block, from + len - trailer, trailer); - gcm_ghash_v8(s.Xi.u, Htable, block, AES_BLOCK_LEN); + crypto_cursor_copy(fromc, &fromcc); + for (olen = len; len > 0; len -= seglen) { + from = crypto_cursor_segment(&fromcc, &fromseglen); + seglen = ulmin(len, fromseglen); + seglen -= seglen % AES_BLOCK_LEN; + if (seglen > 0) { + gcm_ghash_v8(s.Xi.u, Htable, from, seglen); + crypto_cursor_advance(&fromcc, seglen); + } else { + memset(block, 0, sizeof(block)); + seglen = ulmin(len, AES_BLOCK_LEN); + crypto_cursor_copydata(&fromcc, seglen, block); + gcm_ghash_v8(s.Xi.u, Htable, block, seglen); + } } - armv8_aes_gmac_finish(&s, len, authdatalen, Htable); + armv8_aes_gmac_finish(&s, olen, authdatalen, Htable); if (timingsafe_bcmp(tag, s.Xi.c, GMAC_DIGEST_LEN) != 0) { error = EBADMSG; goto out; } - from64 = (const uint64_t *)from; - to64 = (uint64_t *)to; + block64 = (uint64_t *)block; + for (len = olen; len > 0; len -= seglen) { + from64 = crypto_cursor_segment(fromc, &fromseglen); + to64 = crypto_cursor_segment(toc, &toseglen); - for (i = 0; i < (len - trailer); i += AES_BLOCK_LEN) { - aes_v8_encrypt(s.aes_counter, s.EKi.c, aes_key); - AES_INC_COUNTER(s.aes_counter); - to64[0] = from64[0] ^ s.EKi.u[0]; - to64[1] = from64[1] ^ s.EKi.u[1]; - to64 += 2; - from64 += 2; - } + seglen = ulmin(len, ulmin(fromseglen, toseglen)); + if (seglen < AES_BLOCK_LEN) { + seglen = ulmin(len, AES_BLOCK_LEN); - to += (len - trailer); - from += (len - trailer); + memset(block, 0, sizeof(block)); + crypto_cursor_copydata(fromc, seglen, block); - if (trailer) { - aes_v8_encrypt(s.aes_counter, s.EKi.c, aes_key); - AES_INC_COUNTER(s.aes_counter); - for (i = 0; i < trailer; i++) - to[i] = from[i] ^ s.EKi.c[i]; + armv8_aes_decrypt_gcm_block(&s, aes_key, block64, + block64); + + crypto_cursor_copyback(toc, (int)seglen, block); + } else { + for (oseglen = seglen; seglen >= AES_BLOCK_LEN; + seglen -= AES_BLOCK_LEN) { + armv8_aes_decrypt_gcm_block(&s, aes_key, from64, + to64); + + from64 += 2; + to64 += 2; + } + + seglen = oseglen - seglen; + crypto_cursor_advance(fromc, seglen); + crypto_cursor_advance(toc, seglen); + } } + error = 0; out: + explicit_bzero(block, sizeof(block)); explicit_bzero(&s, sizeof(s)); return (error); }