Index: sys/crypto/ccp/ccp.h =================================================================== --- /dev/null +++ sys/crypto/ccp/ccp.h @@ -0,0 +1,234 @@ +/*- + * Copyright (c) 2017 Conrad Meyer + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * $FreeBSD$ + */ + +#pragma once + +/* + * Keccak SHAKE128 (if supported by the device?) uses a 1344 bit block. + * SHA3-224 is the next largest block size, at 1152 bits. However, crypto(4) + * doesn't support any SHA3 hash, so SHA2 is the constraint: + */ +#define CCP_HASH_MAX_BLOCK_SIZE (SHA2_512_HMAC_BLOCK_LEN) + +#define CCP_AES_MAX_KEY_LEN (AES_XTS_MAX_KEY) +#define CCP_MAX_CRYPTO_IV_LEN 32 /* GCM IV + GHASH context */ + +#define MAX_HW_QUEUES 5 +#define MAX_LSB_REGIONS 8 + +#ifndef __must_check +#define __must_check __attribute__((__warn_unused_result__)) +#endif + +/* + * Internal data structures. + */ +enum sha_version { + SHA1, +#if 0 + SHA2_224, +#endif + SHA2_256, SHA2_384, SHA2_512 +}; + +struct ccp_session_hmac { + struct auth_hash *auth_hash; + int hash_len; + unsigned int partial_digest_len; + unsigned int auth_mode; + unsigned int mk_size; + char ipad[CCP_HASH_MAX_BLOCK_SIZE]; + char opad[CCP_HASH_MAX_BLOCK_SIZE]; +}; + +struct ccp_session_gmac { + int hash_len; + char final_block[GMAC_BLOCK_LEN]; +}; + +struct ccp_session_blkcipher { + unsigned cipher_mode; + unsigned cipher_type; + unsigned key_len; + unsigned iv_len; + char enckey[CCP_AES_MAX_KEY_LEN]; + char deckey[CCP_AES_MAX_KEY_LEN]; + char iv[CCP_MAX_CRYPTO_IV_LEN]; +}; + +struct ccp_session { + bool active : 1; + bool cipher_first : 1; + int pending; + enum { HMAC, BLKCIPHER, AUTHENC, GCM } mode; + unsigned queue; + union { + struct ccp_session_hmac hmac; + struct ccp_session_gmac gmac; + }; + struct ccp_session_blkcipher blkcipher; +}; + +struct ccp_softc; +struct ccp_queue { + struct mtx cq_lock; + unsigned cq_qindex; + struct ccp_softc *cq_softc; + + /* Host memory and tracking structures for descriptor ring. */ + bus_dma_tag_t ring_desc_tag; + bus_dmamap_t ring_desc_map; + struct ccp_desc *desc_ring; + bus_addr_t desc_ring_bus_addr; + /* Callbacks and arguments ring; indices correspond to above ring. */ + struct ccp_completion_ctx *completions_ring; + + uint32_t qcontrol; /* Cached register value */ + unsigned lsb_mask; /* LSBs available to queue */ + int private_lsb; /* Reserved LSB #, or -1 */ + + unsigned cq_head; + unsigned cq_tail; + unsigned cq_acq_tail; + + bool cq_waiting; /* Thread waiting for space */ + + struct sglist *cq_sg_crp; + struct sglist *cq_sg_ulptx; + struct sglist *cq_sg_dst; +}; + +struct ccp_completion_ctx { + void (*callback_fn)(struct ccp_queue *qp, struct ccp_session *s, + void *arg, int error); + void *callback_arg; + struct ccp_session *session; +}; + +struct ccp_softc { + device_t dev; + int32_t cid; + struct ccp_session *sessions; + int nsessions; + struct mtx lock; + bool detaching; + + unsigned ring_size_order; + + /* + * Each command queue is either public or private. "Private" + * (PSP-only) by default. PSP grants access to some queues to host via + * QMR (Queue Mask Register). Set bits are host accessible. + */ + uint8_t valid_queues; + + uint8_t hw_version; + uint8_t num_queues; + uint16_t hw_features; + uint16_t num_lsb_entries; + + /* Primary BAR (RID 2) used for register access */ + bus_space_tag_t pci_bus_tag; + bus_space_handle_t pci_bus_handle; + int pci_resource_id; + struct resource *pci_resource; + + /* Secondary BAR (RID 5) apparently used for MSI-X */ + int pci_resource_id_msix; + struct resource *pci_resource_msix; + + /* Interrupt resources */ + void *intr_tag[2]; + struct resource *intr_res[2]; + unsigned intr_count; + + struct ccp_queue queues[MAX_HW_QUEUES]; +}; + +/* Internal globals */ +SYSCTL_DECL(_hw_ccp); +MALLOC_DECLARE(M_CCP); +extern struct ccp_softc *g_ccp_softc; + +/* + * Internal hardware manipulation routines. + */ +int ccp_hw_attach(device_t dev); +void ccp_hw_detach(device_t dev); + +void ccp_queue_write_tail(struct ccp_queue *qp); + +#ifdef DDB +void db_ccp_show_hw(struct ccp_softc *sc); +void db_ccp_show_queue_hw(struct ccp_queue *qp); +#endif + +/* + * Internal hardware crypt-op submission routines. + */ +int ccp_authenc(struct ccp_queue *sc, struct ccp_session *s, + struct cryptop *crp, struct cryptodesc *crda, struct cryptodesc *crde) + __must_check; +int ccp_blkcipher(struct ccp_queue *sc, struct ccp_session *s, + struct cryptop *crp) __must_check; +int ccp_gcm(struct ccp_queue *sc, struct ccp_session *s, struct cryptop *crp, + struct cryptodesc *crda, struct cryptodesc *crde) __must_check; +int ccp_hmac(struct ccp_queue *sc, struct ccp_session *s, struct cryptop *crp) + __must_check; + +/* + * Internal hardware TRNG read routine. + */ +u_int random_ccp_read(void *v, u_int c); + +/* XXX */ +int ccp_queue_acquire_reserve(struct ccp_queue *qp, unsigned n, int mflags) + __must_check; +void ccp_queue_abort(struct ccp_queue *qp); +void ccp_queue_release(struct ccp_queue *qp); + +/* + * Internal inline routines. + */ +static inline unsigned +ccp_queue_get_active(struct ccp_queue *qp) +{ + struct ccp_softc *sc; + + sc = qp->cq_softc; + return ((qp->cq_tail - qp->cq_head) & ((1 << sc->ring_size_order) - 1)); +} + +static inline unsigned +ccp_queue_get_ring_space(struct ccp_queue *qp) +{ + struct ccp_softc *sc; + + sc = qp->cq_softc; + return ((1 << sc->ring_size_order) - ccp_queue_get_active(qp) - 1); +} Index: sys/crypto/ccp/ccp.c =================================================================== --- /dev/null +++ sys/crypto/ccp/ccp.c @@ -0,0 +1,1002 @@ +/*- + * Copyright (c) 2017 Chelsio Communications, Inc. + * Copyright (c) 2017 Conrad Meyer + * All rights reserved. + * Largely borrowed from ccr(4), Written by: John Baldwin + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include +__FBSDID("$FreeBSD$"); + +#include "opt_ddb.h" + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#ifdef DDB +#include +#endif + +#include + +#include + +#include +#include + +#include "cryptodev_if.h" + +#include "ccp.h" +#include "ccp_hardware.h" + +MALLOC_DEFINE(M_CCP, "ccp", "AMD CCP crypto"); + +/* + * Need a global softc available for garbage random_source API, which lacks any + * context pointer. It's also handy for debugging. + */ +struct ccp_softc *g_ccp_softc; + +static struct pciid { + uint32_t devid; + const char *desc; +} ccp_ids[] = { + { 0x14561022, "AMD CCP-5a" }, + { 0x14681022, "AMD CCP-5b" }, +}; +MODULE_PNP_INFO("W32:vendor/device", pci, ccp, ccp_ids, sizeof(ccp_ids[0]), + nitems(ccp_ids)); + +static struct random_source random_ccp = { + .rs_ident = "AMD CCP TRNG", + .rs_source = RANDOM_PURE_CCP, + .rs_read = random_ccp_read, +}; + +/* + * ccp_populate_sglist() generates a scatter/gather list that covers the entire + * crypto operation buffer. + */ +static int +ccp_populate_sglist(struct sglist *sg, struct cryptop *crp) +{ + int error; + + sglist_reset(sg); + if (crp->crp_flags & CRYPTO_F_IMBUF) + error = sglist_append_mbuf(sg, (struct mbuf *)crp->crp_buf); + else if (crp->crp_flags & CRYPTO_F_IOV) + error = sglist_append_uio(sg, (struct uio *)crp->crp_buf); + else + error = sglist_append(sg, crp->crp_buf, crp->crp_ilen); + return (error); +} + +/* + * Handle a GCM request with an empty payload by performing the + * operation in software. Derived from swcr_authenc(). + */ +static void +ccp_gcm_soft(struct ccp_session *s, struct cryptop *crp, + struct cryptodesc *crda, struct cryptodesc *crde) +{ + struct aes_gmac_ctx gmac_ctx; + char block[GMAC_BLOCK_LEN]; + char digest[GMAC_DIGEST_LEN]; + char iv[AES_BLOCK_LEN]; + int i, len; + + /* + * This assumes a 12-byte IV from the crp. See longer comment + * above in ccp_gcm() for more details. + */ + if (crde->crd_flags & CRD_F_ENCRYPT) { + if (crde->crd_flags & CRD_F_IV_EXPLICIT) + memcpy(iv, crde->crd_iv, 12); + else + arc4rand(iv, 12, 0); + } else { + if (crde->crd_flags & CRD_F_IV_EXPLICIT) + memcpy(iv, crde->crd_iv, 12); + else + crypto_copydata(crp->crp_flags, crp->crp_buf, + crde->crd_inject, 12, iv); + } + *(uint32_t *)&iv[12] = htobe32(1); + + /* Initialize the MAC. */ + AES_GMAC_Init(&gmac_ctx); + AES_GMAC_Setkey(&gmac_ctx, s->blkcipher.enckey, s->blkcipher.key_len); + AES_GMAC_Reinit(&gmac_ctx, iv, sizeof(iv)); + + /* MAC the AAD. */ + for (i = 0; i < crda->crd_len; i += sizeof(block)) { + len = imin(crda->crd_len - i, sizeof(block)); + crypto_copydata(crp->crp_flags, crp->crp_buf, crda->crd_skip + + i, len, block); + bzero(block + len, sizeof(block) - len); + AES_GMAC_Update(&gmac_ctx, block, sizeof(block)); + } + + /* Length block. */ + bzero(block, sizeof(block)); + ((uint32_t *)block)[1] = htobe32(crda->crd_len * 8); + AES_GMAC_Update(&gmac_ctx, block, sizeof(block)); + AES_GMAC_Final(digest, &gmac_ctx); + + if (crde->crd_flags & CRD_F_ENCRYPT) { + crypto_copyback(crp->crp_flags, crp->crp_buf, crda->crd_inject, + sizeof(digest), digest); + crp->crp_etype = 0; + } else { + char digest2[GMAC_DIGEST_LEN]; + + crypto_copydata(crp->crp_flags, crp->crp_buf, crda->crd_inject, + sizeof(digest2), digest2); + if (timingsafe_bcmp(digest, digest2, sizeof(digest)) == 0) + crp->crp_etype = 0; + else + crp->crp_etype = EBADMSG; + } + crypto_done(crp); +} + +static int +ccp_probe(device_t dev) +{ + struct pciid *ip; + uint32_t id; + + id = pci_get_devid(dev); + for (ip = ccp_ids; ip < &ccp_ids[nitems(ccp_ids)]; ip++) { + if (id == ip->devid) { + device_set_desc(dev, ip->desc); + return (0); + } + } + return (ENXIO); +} + +static void +ccp_initialize_queues(struct ccp_softc *sc) +{ + struct ccp_queue *qp; + size_t i; + + for (i = 0; i < nitems(sc->queues); i++) { + qp = &sc->queues[i]; + + qp->cq_softc = sc; + qp->cq_qindex = i; + mtx_init(&qp->cq_lock, "ccp queue", NULL, MTX_DEF); + /* XXX - arbitrarily chosen sizes */ + qp->cq_sg_crp = sglist_alloc(32, M_WAITOK); + /* Two more SGEs than sg_crp to accommodate ipad. */ + qp->cq_sg_ulptx = sglist_alloc(34, M_WAITOK); + qp->cq_sg_dst = sglist_alloc(2, M_WAITOK); + } +} + +static void +ccp_free_queues(struct ccp_softc *sc) +{ + struct ccp_queue *qp; + size_t i; + + for (i = 0; i < nitems(sc->queues); i++) { + qp = &sc->queues[i]; + + mtx_destroy(&qp->cq_lock); + sglist_free(qp->cq_sg_crp); + sglist_free(qp->cq_sg_ulptx); + sglist_free(qp->cq_sg_dst); + } +} + +static int +ccp_attach(device_t dev) +{ + struct ccp_softc *sc; + int error; + + sc = device_get_softc(dev); + sc->dev = dev; + + device_printf(dev, "XXX%s: sc=%p\n", __func__, sc); + + sc->cid = crypto_get_driverid(dev, CRYPTOCAP_F_HARDWARE); + if (sc->cid < 0) { + device_printf(dev, "could not get crypto driver id\n"); + return (ENXIO); + } + + error = ccp_hw_attach(dev); + if (error != 0) + return (error); + + mtx_init(&sc->lock, "ccp", NULL, MTX_DEF); + + ccp_initialize_queues(sc); + + if (g_ccp_softc == NULL) { + g_ccp_softc = sc; + if ((sc->hw_features & VERSION_CAP_TRNG) != 0) + random_source_register(&random_ccp); + } + + if ((sc->hw_features & VERSION_CAP_AES) != 0) { + crypto_register(sc->cid, CRYPTO_AES_CBC, 0, 0); + crypto_register(sc->cid, CRYPTO_AES_ICM, 0, 0); + crypto_register(sc->cid, CRYPTO_AES_NIST_GCM_16, 0, 0); + crypto_register(sc->cid, CRYPTO_AES_128_NIST_GMAC, 0, 0); + crypto_register(sc->cid, CRYPTO_AES_192_NIST_GMAC, 0, 0); + crypto_register(sc->cid, CRYPTO_AES_256_NIST_GMAC, 0, 0); +#if 0 + crypto_register(sc->cid, CRYPTO_AES_XTS, 0, 0); +#endif + } + if ((sc->hw_features & VERSION_CAP_SHA) != 0) { + crypto_register(sc->cid, CRYPTO_SHA1_HMAC, 0, 0); + crypto_register(sc->cid, CRYPTO_SHA2_256_HMAC, 0, 0); + crypto_register(sc->cid, CRYPTO_SHA2_384_HMAC, 0, 0); + crypto_register(sc->cid, CRYPTO_SHA2_512_HMAC, 0, 0); + } + + return (0); +} + +static int +ccp_detach(device_t dev) +{ + struct ccp_softc *sc; + int i; + + sc = device_get_softc(dev); + + mtx_lock(&sc->lock); + for (i = 0; i < sc->nsessions; i++) { + if (sc->sessions[i].active || sc->sessions[i].pending != 0) { + mtx_unlock(&sc->lock); + return (EBUSY); + } + } + sc->detaching = true; + mtx_unlock(&sc->lock); + + crypto_unregister_all(sc->cid); + if (g_ccp_softc == sc && (sc->hw_features & VERSION_CAP_TRNG) != 0) + random_source_deregister(&random_ccp); + + ccp_hw_detach(dev); + ccp_free_queues(sc); + + if (g_ccp_softc == sc) + g_ccp_softc = NULL; + + free(sc->sessions, M_CCP); + mtx_destroy(&sc->lock); + return (0); +} + +static void +ccp_init_hmac_digest(struct ccp_session *s, int cri_alg, char *key, + int klen) +{ + union authctx auth_ctx; + struct auth_hash *axf; + u_int i; + + /* + * If the key is larger than the block size, use the digest of + * the key as the key instead. + */ + axf = s->hmac.auth_hash; + klen /= 8; + if (klen > axf->blocksize) { + axf->Init(&auth_ctx); + axf->Update(&auth_ctx, key, klen); + axf->Final(s->hmac.ipad, &auth_ctx); + explicit_bzero(&auth_ctx, sizeof(auth_ctx)); + klen = axf->hashsize; + } else + memcpy(s->hmac.ipad, key, klen); + + memset(s->hmac.ipad + klen, 0, axf->blocksize - klen); + memcpy(s->hmac.opad, s->hmac.ipad, axf->blocksize); + + for (i = 0; i < axf->blocksize; i++) { + s->hmac.ipad[i] ^= HMAC_IPAD_VAL; + s->hmac.opad[i] ^= HMAC_OPAD_VAL; + } +} + +static int +ccp_aes_check_keylen(int alg, int klen) +{ + + switch (klen) { + case 128: + case 192: + if (alg == CRYPTO_AES_XTS) + return (EINVAL); + break; + case 256: + break; + case 512: + if (alg != CRYPTO_AES_XTS) + return (EINVAL); + break; + default: + return (EINVAL); + } + return (0); +} + +/* + * Borrowed from cesa_prep_aes_key(). We should perhaps have a public + * function to generate this instead. + * + * NB: The crypto engine wants the words in the decryption key in reverse + * order. + */ +static void +ccp_aes_getdeckey(void *dec_key, const void *enc_key, unsigned int kbits) +{ + uint32_t ek[4 * (RIJNDAEL_MAXNR + 1)]; + uint32_t *dkey; + int i; + + rijndaelKeySetupEnc(ek, enc_key, kbits); + dkey = dec_key; + dkey += (kbits / 8) / 4; + + switch (kbits) { + case 128: + for (i = 0; i < 4; i++) + *--dkey = htobe32(ek[4 * 10 + i]); + break; + case 192: + for (i = 0; i < 2; i++) + *--dkey = htobe32(ek[4 * 11 + 2 + i]); + for (i = 0; i < 4; i++) + *--dkey = htobe32(ek[4 * 12 + i]); + break; + case 256: + for (i = 0; i < 4; i++) + *--dkey = htobe32(ek[4 * 13 + i]); + for (i = 0; i < 4; i++) + *--dkey = htobe32(ek[4 * 14 + i]); + break; + } + MPASS(dkey == dec_key); +} + +static void +ccp_aes_setkey(struct ccp_session *s, int alg, const void *key, int klen) +{ + unsigned kbits; + + if (alg == CRYPTO_AES_XTS) + kbits = klen / 2; + else + kbits = klen; + + switch (kbits) { + case 128: + s->blkcipher.cipher_type = CCP_AES_TYPE_128; + break; + case 192: + s->blkcipher.cipher_type = CCP_AES_TYPE_192; + break; + case 256: + s->blkcipher.cipher_type = CCP_AES_TYPE_256; + break; + default: + panic("should not get here"); + } + + s->blkcipher.key_len = klen / 8; + memcpy(s->blkcipher.enckey, key, s->blkcipher.key_len); + switch (alg) { + case CRYPTO_AES_XTS: + ccp_aes_getdeckey(s->blkcipher.deckey, key, kbits); + break; + } +} + +static int +ccp_newsession(device_t dev, uint32_t *sidp, struct cryptoini *cri) +{ + struct ccp_softc *sc; + struct ccp_session *s; + struct auth_hash *auth_hash; + struct cryptoini *c, *hash, *cipher; + enum ccp_aes_mode cipher_mode; + unsigned auth_mode, iv_len; + unsigned partial_digest_len; + unsigned q; + int error, i, sess; + bool gcm_hash; + + if (sidp == NULL || cri == NULL) + return (EINVAL); + + gcm_hash = false; + cipher = NULL; + hash = NULL; + auth_hash = NULL; + /* XXX reconcile auth_mode with use by ccp_sha */ + auth_mode = 0; + cipher_mode = CCP_AES_MODE_ECB; + iv_len = 0; + partial_digest_len = 0; + for (c = cri; c != NULL; c = c->cri_next) { + switch (c->cri_alg) { + case CRYPTO_SHA1_HMAC: + case CRYPTO_SHA2_256_HMAC: + case CRYPTO_SHA2_384_HMAC: + case CRYPTO_SHA2_512_HMAC: + case CRYPTO_AES_128_NIST_GMAC: + case CRYPTO_AES_192_NIST_GMAC: + case CRYPTO_AES_256_NIST_GMAC: + if (hash) + return (EINVAL); + hash = c; + switch (c->cri_alg) { + case CRYPTO_SHA1_HMAC: + auth_hash = &auth_hash_hmac_sha1; + auth_mode = SHA1; + partial_digest_len = SHA1_HASH_LEN; + break; + case CRYPTO_SHA2_256_HMAC: + auth_hash = &auth_hash_hmac_sha2_256; + auth_mode = SHA2_256; + partial_digest_len = SHA2_256_HASH_LEN; + break; + case CRYPTO_SHA2_384_HMAC: + auth_hash = &auth_hash_hmac_sha2_384; + auth_mode = SHA2_384; + partial_digest_len = SHA2_512_HASH_LEN; + break; + case CRYPTO_SHA2_512_HMAC: + auth_hash = &auth_hash_hmac_sha2_512; + auth_mode = SHA2_512; + partial_digest_len = SHA2_512_HASH_LEN; + break; + case CRYPTO_AES_128_NIST_GMAC: + case CRYPTO_AES_192_NIST_GMAC: + case CRYPTO_AES_256_NIST_GMAC: + gcm_hash = true; +#if 0 + auth_mode = CHCR_SCMD_AUTH_MODE_GHASH; +#endif + break; + } + break; + case CRYPTO_AES_CBC: + case CRYPTO_AES_ICM: + case CRYPTO_AES_NIST_GCM_16: + case CRYPTO_AES_XTS: + if (cipher) + return (EINVAL); + cipher = c; + switch (c->cri_alg) { + case CRYPTO_AES_CBC: + cipher_mode = CCP_AES_MODE_CBC; + iv_len = AES_BLOCK_LEN; + break; + case CRYPTO_AES_ICM: + cipher_mode = CCP_AES_MODE_CTR; + iv_len = AES_BLOCK_LEN; + break; + case CRYPTO_AES_NIST_GCM_16: + cipher_mode = CCP_AES_MODE_GCTR; + iv_len = AES_GCM_IV_LEN; + break; + case CRYPTO_AES_XTS: +#if 0 + cipher_mode = CHCR_SCMD_CIPHER_MODE_AES_XTS; +#endif + iv_len = AES_BLOCK_LEN; + break; + } + if (c->cri_key != NULL) { + error = ccp_aes_check_keylen(c->cri_alg, + c->cri_klen); + if (error != 0) + return (error); + } + break; + default: + return (EINVAL); + } + } + if (gcm_hash != (cipher_mode == CCP_AES_MODE_GCTR)) + return (EINVAL); + if (hash == NULL && cipher == NULL) + return (EINVAL); + if (hash != NULL && hash->cri_key == NULL) + return (EINVAL); + + sc = device_get_softc(dev); + mtx_lock(&sc->lock); + if (sc->detaching) { + mtx_unlock(&sc->lock); + return (ENXIO); + } + sess = -1; + for (i = 0; i < sc->nsessions; i++) { + if (!sc->sessions[i].active && sc->sessions[i].pending == 0) { + sess = i; + break; + } + } + if (sess == -1) { + s = malloc(sizeof(*s) * (sc->nsessions + 1), M_CCP, + M_NOWAIT | M_ZERO); + if (s == NULL) { + mtx_unlock(&sc->lock); + return (ENOMEM); + } + if (sc->sessions != NULL) + memcpy(s, sc->sessions, sizeof(*s) * sc->nsessions); + sess = sc->nsessions; + free(sc->sessions, M_CCP); + sc->sessions = s; + sc->nsessions++; + } + + s = &sc->sessions[sess]; + + /* Just grab the first usable queue for now. */ + for (q = 0; q < nitems(sc->queues); q++) + if ((sc->valid_queues & (1 << q)) != 0) + break; + if (q == nitems(sc->queues)) { + mtx_unlock(&sc->lock); + return (ENXIO); + } + s->queue = q; + + if (gcm_hash) + s->mode = GCM; + else if (hash != NULL && cipher != NULL) + s->mode = AUTHENC; + else if (hash != NULL) + s->mode = HMAC; + else { + MPASS(cipher != NULL); + s->mode = BLKCIPHER; + } + if (gcm_hash) { + if (hash->cri_mlen == 0) + s->gmac.hash_len = AES_GMAC_HASH_LEN; + else + s->gmac.hash_len = hash->cri_mlen; + } else if (hash != NULL) { + s->hmac.auth_hash = auth_hash; + s->hmac.auth_mode = auth_mode; + s->hmac.partial_digest_len = partial_digest_len; + if (hash->cri_mlen == 0) + s->hmac.hash_len = auth_hash->hashsize; + else + s->hmac.hash_len = hash->cri_mlen; + ccp_init_hmac_digest(s, hash->cri_alg, hash->cri_key, + hash->cri_klen); + } + if (cipher != NULL) { + s->blkcipher.cipher_mode = cipher_mode; + s->blkcipher.iv_len = iv_len; + if (cipher->cri_key != NULL) + ccp_aes_setkey(s, cipher->cri_alg, cipher->cri_key, + cipher->cri_klen); + } + + s->active = true; + mtx_unlock(&sc->lock); + + *sidp = sess; + return (0); +} + +static int +ccp_freesession(device_t dev, uint64_t tid) +{ + struct ccp_softc *sc; + uint32_t sid; + int error; + + sc = device_get_softc(dev); + sid = CRYPTO_SESID2LID(tid); + mtx_lock(&sc->lock); + if (sid >= sc->nsessions || !sc->sessions[sid].active) + error = EINVAL; + else { + if (sc->sessions[sid].pending != 0) + device_printf(dev, + "session %d freed with %d pending requests\n", sid, + sc->sessions[sid].pending); + sc->sessions[sid].active = false; + error = 0; + } + mtx_unlock(&sc->lock); + return (error); +} + +static int +ccp_process(device_t dev, struct cryptop *crp, int hint) +{ + struct ccp_softc *sc; + struct ccp_queue *qp; + struct ccp_session *s; + struct cryptodesc *crd, *crda, *crde; + uint32_t sid; + int error; + bool qpheld; + + qpheld = false; + qp = NULL; + if (crp == NULL) + return (EINVAL); + + crd = crp->crp_desc; + sid = CRYPTO_SESID2LID(crp->crp_sid); + sc = device_get_softc(dev); + mtx_lock(&sc->lock); + if (sid >= sc->nsessions || !sc->sessions[sid].active) { + mtx_unlock(&sc->lock); + error = EINVAL; + goto out; + } + + s = &sc->sessions[sid]; + qp = &sc->queues[s->queue]; + mtx_unlock(&sc->lock); + error = ccp_queue_acquire_reserve(qp, 1 /* placeholder */, M_NOWAIT); + if (error != 0) + goto out; + qpheld = true; + + error = ccp_populate_sglist(qp->cq_sg_crp, crp); + if (error != 0) + goto out; + + switch (s->mode) { + case HMAC: + if (crd->crd_flags & CRD_F_KEY_EXPLICIT) + ccp_init_hmac_digest(s, crd->crd_alg, crd->crd_key, + crd->crd_klen); + error = ccp_hmac(qp, s, crp); + break; + case BLKCIPHER: + if (crd->crd_flags & CRD_F_KEY_EXPLICIT) { + error = ccp_aes_check_keylen(crd->crd_alg, + crd->crd_klen); + if (error != 0) + break; + ccp_aes_setkey(s, crd->crd_alg, crd->crd_key, + crd->crd_klen); + } + error = ccp_blkcipher(qp, s, crp); + break; + case AUTHENC: + error = 0; + switch (crd->crd_alg) { + case CRYPTO_AES_CBC: + case CRYPTO_AES_ICM: + case CRYPTO_AES_XTS: + /* Only encrypt-then-authenticate supported. */ + crde = crd; + crda = crd->crd_next; + if (!(crde->crd_flags & CRD_F_ENCRYPT)) { + error = EINVAL; + break; + } + s->cipher_first = true; + break; + default: + crda = crd; + crde = crd->crd_next; + if (crde->crd_flags & CRD_F_ENCRYPT) { + error = EINVAL; + break; + } + s->cipher_first = false; + break; + } + if (error != 0) + break; + if (crda->crd_flags & CRD_F_KEY_EXPLICIT) + ccp_init_hmac_digest(s, crda->crd_alg, crda->crd_key, + crda->crd_klen); + if (crde->crd_flags & CRD_F_KEY_EXPLICIT) { + error = ccp_aes_check_keylen(crde->crd_alg, + crde->crd_klen); + if (error != 0) + break; + ccp_aes_setkey(s, crde->crd_alg, crde->crd_key, + crde->crd_klen); + } + error = ccp_authenc(qp, s, crp, crda, crde); + break; + case GCM: + error = 0; + if (crd->crd_alg == CRYPTO_AES_NIST_GCM_16) { + crde = crd; + crda = crd->crd_next; + s->cipher_first = true; + } else { + crda = crd; + crde = crd->crd_next; + s->cipher_first = false; + } + if (crde->crd_flags & CRD_F_KEY_EXPLICIT) { + error = ccp_aes_check_keylen(crde->crd_alg, + crde->crd_klen); + if (error != 0) + break; + ccp_aes_setkey(s, crde->crd_alg, crde->crd_key, + crde->crd_klen); + } + if (crde->crd_len == 0) { + mtx_unlock(&qp->cq_lock); + ccp_gcm_soft(s, crp, crda, crde); + return (0); + } + error = ccp_gcm(qp, s, crp, crda, crde); + break; + } + + if (error == 0) + s->pending++; + +out: + if (qpheld) { + if (error != 0) { + /* + * Squash EAGAIN so callers don't uselessly and + * expensively retry if the ring was full. + */ + if (error == EAGAIN) + error = ENOMEM; + ccp_queue_abort(qp); + } else + ccp_queue_release(qp); + } + + if (error != 0) { + device_printf(dev, "XXX %s: early error:%d\n", __func__, + error); + crp->crp_etype = error; + crypto_done(crp); + } + return (0); +} + +static device_method_t ccp_methods[] = { + DEVMETHOD(device_probe, ccp_probe), + DEVMETHOD(device_attach, ccp_attach), + DEVMETHOD(device_detach, ccp_detach), + + DEVMETHOD(cryptodev_newsession, ccp_newsession), + DEVMETHOD(cryptodev_freesession, ccp_freesession), + DEVMETHOD(cryptodev_process, ccp_process), + + DEVMETHOD_END +}; + +static driver_t ccp_driver = { + "ccp", + ccp_methods, + sizeof(struct ccp_softc) +}; + +static devclass_t ccp_devclass; +DRIVER_MODULE(ccp, pci, ccp_driver, ccp_devclass, NULL, NULL); +MODULE_VERSION(ccp, 1); +MODULE_DEPEND(ccp, crypto, 1, 1, 1); +MODULE_DEPEND(ccp, random_device, 1, 1, 1); + +static int +ccp_queue_reserve_space(struct ccp_queue *qp, unsigned n, int mflags) +{ + struct ccp_softc *sc; + + mtx_assert(&qp->cq_lock, MA_OWNED); + sc = qp->cq_softc; + + if (n < 1 || n >= (1 << sc->ring_size_order)) + return (EINVAL); + + while (true) { + if (ccp_queue_get_ring_space(qp) >= n) + return (0); + if ((mflags & M_WAITOK) == 0) + return (EAGAIN); + qp->cq_waiting = true; + msleep(&qp->cq_tail, &qp->cq_lock, 0, "ccpqfull", 0); + } +} + +int +ccp_queue_acquire_reserve(struct ccp_queue *qp, unsigned n, int mflags) +{ + int error; + + mtx_lock(&qp->cq_lock); + qp->cq_acq_tail = qp->cq_tail; + error = ccp_queue_reserve_space(qp, n, mflags); + if (error != 0) + mtx_unlock(&qp->cq_lock); + return (error); +} + +void +ccp_queue_release(struct ccp_queue *qp) +{ + + mtx_assert(&qp->cq_lock, MA_OWNED); + if (qp->cq_tail != qp->cq_acq_tail) { + wmb(); + ccp_queue_write_tail(qp); + } + mtx_unlock(&qp->cq_lock); +} + +void +ccp_queue_abort(struct ccp_queue *qp) +{ + unsigned i; + + mtx_assert(&qp->cq_lock, MA_OWNED); + + /* Wipe out any descriptors associated with this aborted txn. */ + for (i = qp->cq_acq_tail; i != qp->cq_tail; + i = (i + 1) % (1 << qp->cq_softc->ring_size_order)) { + memset(&qp->desc_ring[i], 0, sizeof(qp->desc_ring[i])); + } + qp->cq_tail = qp->cq_acq_tail; + + mtx_unlock(&qp->cq_lock); +} + +#ifdef DDB +#define _db_show_lock(lo) LOCK_CLASS(lo)->lc_ddb_show(lo) +#define db_show_lock(lk) _db_show_lock(&(lk)->lock_object) +static void +db_show_ccp_sc(struct ccp_softc *sc) +{ + + db_printf("ccp softc at %p\n", sc); + db_printf(" cid: %d\n", (int)sc->cid); + db_printf(" nsessions: %d\n", sc->nsessions); + + db_printf(" lock: "); + db_show_lock(&sc->lock); + + db_printf(" detaching: %d\n", (int)sc->detaching); + db_printf(" ring_size_order: %u\n", sc->ring_size_order); + + db_printf(" hw_version: %d\n", (int)sc->hw_version); + db_printf(" hw_features: %b\n", (int)sc->hw_features, + "\20\24ELFC\23TRNG\22Zip_Compress\16Zip_Decompress\13ECC\12RSA" + "\11SHA\0103DES\07AES"); + + db_printf(" hw status:\n"); + db_ccp_show_hw(sc); +} + +static void +db_show_ccp_qp(struct ccp_queue *qp) +{ + + db_printf(" lock: "); + db_show_lock(&qp->cq_lock); + +#if 0 + db_printf(" head: %u\n", sc->head); + db_printf(" tail: %u\n", sc->tail); + db_printf(" hw_head: %u\n", sc->hw_head); + db_printf(" last_seen: 0x%lx\n", sc->last_seen); + db_printf(" ring: %p\n", sc->ring); + db_printf(" descriptors: %p\n", sc->hw_desc_ring); + db_printf(" descriptors (phys): 0x%jx\n", + (uintmax_t)sc->hw_desc_bus_addr); + + db_printf(" ring[%u] (tail):\n", sc->tail % + (1 << sc->ring_size_order)); + db_printf(" id: %u\n", ioat_get_ring_entry(sc, sc->tail)->id); + db_printf(" addr: 0x%lx\n", + RING_PHYS_ADDR(sc, sc->tail)); + db_printf(" next: 0x%lx\n", + ioat_get_descriptor(sc, sc->tail)->generic.next); + + db_printf(" ring[%u] (head - 1):\n", (sc->head - 1) % + (1 << sc->ring_size_order)); + db_printf(" id: %u\n", ioat_get_ring_entry(sc, sc->head - 1)->id); + db_printf(" addr: 0x%lx\n", + RING_PHYS_ADDR(sc, sc->head - 1)); + db_printf(" next: 0x%lx\n", + ioat_get_descriptor(sc, sc->head - 1)->generic.next); + + db_printf(" ring[%u] (head):\n", (sc->head) % + (1 << sc->ring_size_order)); + db_printf(" id: %u\n", ioat_get_ring_entry(sc, sc->head)->id); + db_printf(" addr: 0x%lx\n", + RING_PHYS_ADDR(sc, sc->head)); + db_printf(" next: 0x%lx\n", + ioat_get_descriptor(sc, sc->head)->generic.next); + + for (idx = 0; idx < (1 << sc->ring_size_order); idx++) + if ((*sc->comp_update & IOAT_CHANSTS_COMPLETED_DESCRIPTOR_MASK) + == RING_PHYS_ADDR(sc, idx)) + db_printf(" ring[%u] == hardware tail\n", idx); +#endif + + db_printf(" hw status:\n"); + db_ccp_show_queue_hw(qp); +} + +DB_SHOW_COMMAND(ccp, db_show_ccp) +{ + struct ccp_softc *sc; + unsigned unit, qindex; + + if (!have_addr) + goto usage; + + unit = (unsigned)addr; + + sc = devclass_get_softc(ccp_devclass, unit); + if (sc == NULL) { + db_printf("No such device ccp%u\n", unit); + goto usage; + } + + if (count == -1) { + db_show_ccp_sc(sc); + return; + } + + qindex = (unsigned)count; + if (qindex >= nitems(sc->queues)) { + db_printf("No such queue %u\n", qindex); + goto usage; + } + db_show_ccp_qp(&sc->queues[qindex]); + return; + +usage: + db_printf("usage: show ccp [,]\n"); + return; +} +#endif /* DDB */ Index: sys/crypto/ccp/ccp_hardware.h =================================================================== --- /dev/null +++ sys/crypto/ccp/ccp_hardware.h @@ -0,0 +1,417 @@ +/*- + * Copyright (c) 2017 Conrad Meyer + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * $FreeBSD$ + */ + +#pragma once + +#define CMD_QUEUE_MASK_OFFSET 0x000 +#define CMD_QUEUE_PRIO_OFFSET 0x004 +#define CMD_REQID_CONFIG_OFFSET 0x008 +#define TRNG_OUT_OFFSET 0x00C +#define CMD_CMD_TIMEOUT_OFFSET 0x010 +#define LSB_PUBLIC_MASK_LO_OFFSET 0x018 +#define LSB_PUBLIC_MASK_HI_OFFSET 0x01C +#define LSB_PRIVATE_MASK_LO_OFFSET 0x020 +#define LSB_PRIVATE_MASK_HI_OFFSET 0x024 + +#define VERSION_REG 0x100 +#define VERSION_NUM_MASK 0x3F +#define VERSION_CAP_MASK 0x7FC0 +#define VERSION_CAP_AES (1 << 6) +#define VERSION_CAP_3DES (1 << 7) +#define VERSION_CAP_SHA (1 << 8) +#define VERSION_CAP_RSA (1 << 9) +#define VERSION_CAP_ECC (1 << 10) +#define VERSION_CAP_ZDE (1 << 11) +#define VERSION_CAP_ZCE (1 << 12) +#define VERSION_CAP_TRNG (1 << 13) +#define VERSION_CAP_ELFC (1 << 14) +#define VERSION_NUMVQM_SHIFT 15 +#define VERSION_NUMVQM_MASK 0xF +#define VERSION_LSBSIZE_SHIFT 19 +#define VERSION_LSBSIZE_MASK 0x3FF + +#define CMD_Q_CONTROL_BASE 0x000 +#define CMD_Q_TAIL_LO_BASE 0x004 +#define CMD_Q_HEAD_LO_BASE 0x008 +#define CMD_Q_INT_ENABLE_BASE 0x00C +#define CMD_Q_INTERRUPT_STATUS_BASE 0x010 + +#define CMD_Q_STATUS_BASE 0x100 +#define CMD_Q_INT_STATUS_BASE 0x104 + +#define CMD_Q_STATUS_INCR 0x1000 + +/* Don't think there's much point in keeping these -- OS can't access: */ +#define CMD_CONFIG_0_OFFSET 0x6000 +#define CMD_TRNG_CTL_OFFSET 0x6008 +#define CMD_AES_MASK_OFFSET 0x6010 +#define CMD_CLK_GATE_CTL_OFFSET 0x603C + +/* CMD_Q_CONTROL_BASE bits */ +#define CMD_Q_RUN (1 << 0) +#define CMD_Q_HALTED (1 << 1) +#define CMD_Q_MEM_LOCATION (1 << 2) +#define CMD_Q_SIZE_SHIFT 3 +#define CMD_Q_SIZE_MASK 0x1F +#define CMD_Q_PTR_HI_SHIFT 16 +#define CMD_Q_PTR_HI_MASK 0xFFFF + +/* + * The following bits are used for both CMD_Q_INT_ENABLE_BASE and + * CMD_Q_INTERRUPT_STATUS_BASE. + */ +#define INT_COMPLETION (1 << 0) +#define INT_ERROR (1 << 1) +#define INT_QUEUE_STOPPED (1 << 2) +#define INT_QUEUE_EMPTY (1 << 3) +#define ALL_INTERRUPTS (INT_COMPLETION | \ + INT_ERROR | \ + INT_QUEUE_STOPPED | \ + INT_QUEUE_EMPTY) + +#define STATUS_ERROR_MASK 0x3F +#define STATUS_JOBSTATUS_SHIFT 7 +#define STATUS_JOBSTATUS_MASK 0x7 +#define STATUS_ERRORSOURCE_SHIFT 10 +#define STATUS_ERRORSOURCE_MASK 0x3 +#define STATUS_VLSB_FAULTBLOCK_SHIFT 12 +#define STATUS_VLSB_FAULTBLOCK_MASK 0x7 + +/* From JOBSTATUS field in STATUS register above */ +#define JOBSTATUS_IDLE 0 +#define JOBSTATUS_ACTIVE_WAITING 1 +#define JOBSTATUS_ACTIVE 2 +#define JOBSTATUS_WAIT_ABORT 3 +#define JOBSTATUS_DYN_ERROR 4 +#define JOBSTATUS_PREPARE_HALT 5 + +/* From ERRORSOURCE field in STATUS register */ +#define ERRORSOURCE_INPUT_MEMORY 0 +#define ERRORSOURCE_CMD_DESCRIPTOR 1 +#define ERRORSOURCE_INPUT_DATA 2 +#define ERRORSOURCE_KEY_DATA 3 + +#define Q_DESC_SIZE sizeof(struct ccp_desc) + +enum ccp_aes_mode { + CCP_AES_MODE_ECB = 0, + CCP_AES_MODE_CBC, + CCP_AES_MODE_OFB, + CCP_AES_MODE_CFB, + CCP_AES_MODE_CTR, + CCP_AES_MODE_CMAC, + CCP_AES_MODE_GHASH, + CCP_AES_MODE_GCTR, +}; + +enum ccp_aes_ghash_mode { + CCP_AES_MODE_GHASH_AAD = 0, + CCP_AES_MODE_GHASH_FINAL, +}; + +enum ccp_aes_type { + CCP_AES_TYPE_128 = 0, + CCP_AES_TYPE_192, + CCP_AES_TYPE_256, +}; + +enum ccp_des_mode { + CCP_DES_MODE_ECB = 0, + CCP_DES_MODE_CBC, + CCP_DES_MODE_CFB, +}; + +enum ccp_des_type { + CCP_DES_TYPE_128 = 0, /* 112 + 16 parity */ + CCP_DES_TYPE_192, /* 168 + 24 parity */ +}; + +enum ccp_sha_type { + CCP_SHA_TYPE_1 = 1, + CCP_SHA_TYPE_224, + CCP_SHA_TYPE_256, + CCP_SHA_TYPE_384, + CCP_SHA_TYPE_512, + CCP_SHA_TYPE_RSVD1, + CCP_SHA_TYPE_RSVD2, + CCP_SHA3_TYPE_224, + CCP_SHA3_TYPE_256, + CCP_SHA3_TYPE_384, + CCP_SHA3_TYPE_512, +}; + +enum ccp_cipher_algo { + CCP_CIPHER_ALGO_AES_CBC = 0, + CCP_CIPHER_ALGO_AES_ECB, + CCP_CIPHER_ALGO_AES_CTR, + CCP_CIPHER_ALGO_AES_GCM, + CCP_CIPHER_ALGO_3DES_CBC, +}; + +enum ccp_cipher_dir { + CCP_CIPHER_DIR_DECRYPT = 0, + CCP_CIPHER_DIR_ENCRYPT = 1, +}; + +enum ccp_hash_algo { + CCP_AUTH_ALGO_SHA1 = 0, + CCP_AUTH_ALGO_SHA1_HMAC, + CCP_AUTH_ALGO_SHA224, + CCP_AUTH_ALGO_SHA224_HMAC, + CCP_AUTH_ALGO_SHA3_224, + CCP_AUTH_ALGO_SHA3_224_HMAC, + CCP_AUTH_ALGO_SHA256, + CCP_AUTH_ALGO_SHA256_HMAC, + CCP_AUTH_ALGO_SHA3_256, + CCP_AUTH_ALGO_SHA3_256_HMAC, + CCP_AUTH_ALGO_SHA384, + CCP_AUTH_ALGO_SHA384_HMAC, + CCP_AUTH_ALGO_SHA3_384, + CCP_AUTH_ALGO_SHA3_384_HMAC, + CCP_AUTH_ALGO_SHA512, + CCP_AUTH_ALGO_SHA512_HMAC, + CCP_AUTH_ALGO_SHA3_512, + CCP_AUTH_ALGO_SHA3_512_HMAC, + CCP_AUTH_ALGO_AES_CMAC, + CCP_AUTH_ALGO_AES_GCM, +}; + +enum ccp_hash_op { + CCP_AUTH_OP_GENERATE = 0, + CCP_AUTH_OP_VERIFY = 1, +}; + +enum ccp_engine { + CCP_ENGINE_AES = 0, + CCP_ENGINE_XTS_AES_128, + CCP_ENGINE_3DES, + CCP_ENGINE_SHA, + CCP_ENGINE_RSA, + CCP_ENGINE_PASSTHRU, + CCP_ENGINE_ZLIB_DECOMPRESS, + CCP_ENGINE_ECC, +}; + +enum ccp_passthru_bitwise { + CCP_PASSTHRU_BITWISE_NOOP = 0, + CCP_PASSTHRU_BITWISE_AND, + CCP_PASSTHRU_BITWISE_OR, + CCP_PASSTHRU_BITWISE_XOR, + CCP_PASSTHRU_BITWISE_MASK, +}; + +enum ccp_passthru_byteswap { + CCP_PASSTHRU_BYTESWAP_NOOP = 0, + CCP_PASSTHRU_BYTESWAP_32BIT, + CCP_PASSTHRU_BYTESWAP_256BIT, +}; + +/** + * descriptor for version 5 CPP commands + * 8 32-bit words: + * word 0: function; engine; control bits + * word 1: length of source data + * word 2: low 32 bits of source pointer + * word 3: upper 16 bits of source pointer; source memory type + * word 4: low 32 bits of destination pointer + * word 5: upper 16 bits of destination pointer; destination memory + * type + * word 6: low 32 bits of key pointer + * word 7: upper 16 bits of key pointer; key memory type + */ + +struct ccp_desc { + union dword0 { + struct { + uint32_t hoc:1; /* Halt on completion */ + uint32_t ioc:1; /* Intr. on completion */ + uint32_t reserved_1:1; + uint32_t som:1; /* Start of message */ + uint32_t eom:1; /* End " */ + uint32_t size:7; + uint32_t encrypt:1; + uint32_t mode:5; + uint32_t type:2; + uint32_t engine:4; + uint32_t prot:1; + uint32_t reserved_2:7; + } aes; + struct { + uint32_t hoc:1; /* Halt on completion */ + uint32_t ioc:1; /* Intr. on completion */ + uint32_t reserved_1:1; + uint32_t som:1; /* Start of message */ + uint32_t eom:1; /* End " */ + uint32_t size:7; + uint32_t encrypt:1; + uint32_t mode:5; + uint32_t type:2; + uint32_t engine:4; + uint32_t prot:1; + uint32_t reserved_2:7; + } des; + struct { + uint32_t hoc:1; /* Halt on completion */ + uint32_t ioc:1; /* Intr. on completion */ + uint32_t reserved_1:1; + uint32_t som:1; /* Start of message */ + uint32_t eom:1; /* End " */ + uint32_t size:7; + uint32_t encrypt:1; + uint32_t reserved_2:5; + uint32_t type:2; + uint32_t engine:4; + uint32_t prot:1; + uint32_t reserved_3:7; + } aes_xts; + struct { + uint32_t hoc:1; /* Halt on completion */ + uint32_t ioc:1; /* Intr. on completion */ + uint32_t reserved_1:1; + uint32_t som:1; /* Start of message */ + uint32_t eom:1; /* End " */ + uint32_t reserved_2:10; + uint32_t type:4; + uint32_t reserved_3:1; + uint32_t engine:4; + uint32_t prot:1; + uint32_t reserved_4:7; + } sha; + struct { + uint32_t hoc:1; /* Halt on completion */ + uint32_t ioc:1; /* Intr. on completion */ + uint32_t reserved_1:1; + uint32_t som:1; /* Start of message */ + uint32_t eom:1; /* End " */ + uint32_t mode:3; + uint32_t size:12; + uint32_t engine:4; + uint32_t prot:1; + uint32_t reserved_2:7; + } rsa; + struct { + uint32_t hoc:1; /* Halt on completion */ + uint32_t ioc:1; /* Intr. on completion */ + uint32_t reserved_1:1; + uint32_t som:1; /* Start of message */ + uint32_t eom:1; /* End " */ + uint32_t byteswap:2; + uint32_t bitwise:3; + uint32_t reflect:2; + uint32_t reserved_2:8; + uint32_t engine:4; + uint32_t prot:1; + uint32_t reserved_3:7; + } pt; + struct { + uint32_t hoc:1; /* Halt on completion */ + uint32_t ioc:1; /* Intr. on completion */ + uint32_t reserved_1:1; + uint32_t som:1; /* Start of message */ + uint32_t eom:1; /* End " */ + uint32_t reserved_2:13; + uint32_t reserved_3:2; + uint32_t engine:4; + uint32_t prot:1; + uint32_t reserved_4:7; + } zlib; + struct { + uint32_t hoc:1; /* Halt on completion */ + uint32_t ioc:1; /* Intr. on completion */ + uint32_t reserved_1:1; + uint32_t som:1; /* Start of message */ + uint32_t eom:1; /* End " */ + uint32_t size:10; + uint32_t type:2; + uint32_t mode:3; + uint32_t engine:4; + uint32_t prot:1; + uint32_t reserved_2:7; + } ecc; + struct { + uint32_t hoc:1; /* Halt on completion */ + uint32_t ioc:1; /* Intr. on completion */ + uint32_t reserved_1:1; + uint32_t som:1; /* Start of message */ + uint32_t eom:1; /* End " */ + uint32_t function:15; + uint32_t engine:4; + uint32_t prot:1; + uint32_t reserved_2:7; + } /* generic */; + }; + + uint32_t length; + uint32_t src_lo; + + struct dword3 { + uint32_t src_hi:16; + uint32_t src_mem:2; + uint32_t lsb_ctx_id:8; + uint32_t reserved_3:5; + uint32_t src_fixed:1; + }; + + union dword4 { + uint32_t dst_lo; /* NON-SHA */ + uint32_t sha_len_lo; /* SHA */ + }; + + union dword5 { + struct { + uint32_t dst_hi:16; + uint32_t dst_mem:2; + uint32_t reserved_4:13; + uint32_t dst_fixed:1; + }; + uint32_t sha_len_hi; + }; + + uint32_t key_lo; + + struct dword7 { + uint32_t key_hi:16; + uint32_t key_mem:2; + uint32_t reserved_5:14; + }; +}; + +enum ccp_memtype { + CCP_MEMTYPE_SYSTEM = 0, + CCP_MEMTYPE_SB, + CCP_MEMTYPE_LOCAL, +}; + +enum ccp_cmd_order { + CCP_CMD_CIPHER = 0, + CCP_CMD_AUTH, + CCP_CMD_CIPHER_HASH, + CCP_CMD_HASH_CIPHER, + CCP_CMD_COMBINED, + CCP_CMD_NOT_SUPPORTED, +}; Index: sys/crypto/ccp/ccp_hardware.c =================================================================== --- /dev/null +++ sys/crypto/ccp/ccp_hardware.c @@ -0,0 +1,2000 @@ +/*- + * Copyright (c) 2017 Chelsio Communications, Inc. + * Copyright (c) 2017 Conrad Meyer + * All rights reserved. + * Largely borrowed from ccr(4), Written by: John Baldwin + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include +__FBSDID("$FreeBSD$"); + +#include "opt_ddb.h" + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#ifdef DDB +#include +#endif + +#include +#include + +#include +#include +#include + +#include +#include + +#include +#include + +#include "cryptodev_if.h" + +#include "ccp.h" +#include "ccp_hardware.h" +#include "ccp_lsb.h" + +CTASSERT(sizeof(struct ccp_desc) == 32); + +SYSCTL_NODE(_hw, OID_AUTO, ccp, CTLFLAG_RD, 0, "ccp node"); + +unsigned g_ccp_ring_order = 11; +SYSCTL_UINT(_hw_ccp, OID_AUTO, ring_order, CTLFLAG_RDTUN, &g_ccp_ring_order, + 0, "Set CCP ring order. (1 << this) == ring size. Min: 6, Max: 16"); + +static inline uint32_t +ccp_read_4(struct ccp_softc *sc, uint32_t offset) +{ + return (bus_space_read_4(sc->pci_bus_tag, sc->pci_bus_handle, offset)); +} + +static inline void +ccp_write_4(struct ccp_softc *sc, uint32_t offset, uint32_t value) +{ + bus_space_write_4(sc->pci_bus_tag, sc->pci_bus_handle, offset, value); +} + +static inline uint32_t +ccp_read_queue_4(struct ccp_softc *sc, unsigned queue, uint32_t offset) +{ + /* + * Each queue gets its own 4kB register space. Queue 0 is at 0x1000. + */ + return (ccp_read_4(sc, (CMD_Q_STATUS_INCR * (1 + queue)) + offset)); +} + +static inline void +ccp_write_queue_4(struct ccp_softc *sc, unsigned queue, uint32_t offset, + uint32_t value) +{ + ccp_write_4(sc, (CMD_Q_STATUS_INCR * (1 + queue)) + offset, value); +} + +void +ccp_queue_write_tail(struct ccp_queue *qp) +{ + ccp_write_queue_4(qp->cq_softc, qp->cq_qindex, CMD_Q_TAIL_LO_BASE, + ((uint32_t)qp->desc_ring_bus_addr) + (Q_DESC_SIZE * qp->cq_tail)); +} + +/* + * Given a queue and a reserved LSB entry index, compute the LSB *entry id* of + * that entry for the queue's private LSB region. + */ +static inline uint8_t +ccp_queue_lsb_entry(struct ccp_queue *qp, unsigned lsb_entry) +{ + return ((qp->private_lsb * LSB_REGION_LENGTH + lsb_entry)); +} + +/* + * Given a queue and a reserved LSB entry index, compute the LSB *address* of + * that entry for the queue's private LSB region. + */ +static inline uint32_t +ccp_queue_lsb_address(struct ccp_queue *qp, unsigned lsb_entry) +{ + return (ccp_queue_lsb_entry(qp, lsb_entry) * LSB_ENTRY_SIZE); +} + +/* + * Some terminology: + * + * LSB - Local Storage Block + * ========================= + * + * 8 segments/regions, each containing 16 entries. + * + * Each entry contains 256 bits (32 bytes). + * + * Segments are virtually addressed in commands, but accesses cannot cross + * segment boundaries. Virtual map uses an identity mapping by default + * (virtual segment N corresponds to physical segment N). + * + * Access to a physical region can be restricted to any subset of all five + * queues. + * + * "Pass-through" mode + * =================== + * + * Pass-through is a generic DMA engine, much like ioat(4). Some nice + * features: + * + * - Supports byte-swapping for endian conversion (32- or 256-bit words) + * - AND, OR, XOR with fixed 256-bit mask + * - CRC32 of data (may be used in tandem with bswap, but not bit operations) + * - Read/write of LSB + * - Memset + * + * If bit manipulation mode is enabled, input must be a multiple of 256 bits + * (32 bytes). + * + * If byte-swapping is enabled, input must be a multiple of the word size. + * + * Zlib mode -- only usable from one queue at a time, single job at a time. + * ======================================================================== + * + * Only usable from private host, aka PSP? Not host processor? + * + * RNG. + * ==== + * + * Raw bits are conditioned with AES and fed through CTR_DRBG. Output goes in + * a ring buffer readable by software. + * + * NIST SP 800-90B Repetition Count and Adaptive Proportion health checks are + * implemented on the raw input stream and may be enabled to verify min-entropy + * of 0.5 bits per bit. + */ + +static void +ccp_dmamap_cb(void *arg, bus_dma_segment_t *segs, int nseg, int error) +{ + bus_addr_t *baddr; + + KASSERT(error == 0, ("%s: error:%d", __func__, error)); + baddr = arg; + *baddr = segs->ds_addr; +} + +static int +ccp_hw_attach_queue(device_t dev, uint64_t lsbmask, unsigned queue) +{ + struct ccp_softc *sc; + struct ccp_queue *qp; + void *desc; + size_t ringsz, num_descriptors; + int error; + + desc = NULL; + sc = device_get_softc(dev); + qp = &sc->queues[queue]; + + /* + * Don't bother allocating a ring for queues the host isn't allowed to + * drive. + */ + if ((sc->valid_queues & (1 << queue)) == 0) + return (0); + + ccp_queue_decode_lsb_regions(sc, lsbmask, queue); + + /* Ignore queues that do not have any LSB access. */ + if (qp->lsb_mask == 0) { + device_printf(dev, "Ignoring queue %u with no LSB access\n", + queue); + sc->valid_queues &= ~(1 << queue); + return (0); + } + + num_descriptors = 1 << sc->ring_size_order; + ringsz = sizeof(struct ccp_desc) * num_descriptors; + + /* + * "Queue_Size" is order - 1. + * + * Queue must be aligned to 5+Queue_Size+1 == 5 + order bits. + */ + error = bus_dma_tag_create(bus_get_dma_tag(dev), + 1 << (5 + sc->ring_size_order), +#if defined(__i386__) && !defined(PAE) + 0, BUS_SPACE_MAXADDR, +#else + (bus_addr_t)1 << 32, BUS_SPACE_MAXADDR_48BIT, +#endif + BUS_SPACE_MAXADDR, NULL, NULL, ringsz, 1, + ringsz, 0, NULL, NULL, &qp->ring_desc_tag); + if (error != 0) + goto out; + + error = bus_dmamem_alloc(qp->ring_desc_tag, &desc, + BUS_DMA_ZERO | BUS_DMA_WAITOK, &qp->ring_desc_map); + if (error != 0) + goto out; + + error = bus_dmamap_load(qp->ring_desc_tag, qp->ring_desc_map, desc, + ringsz, ccp_dmamap_cb, &qp->desc_ring_bus_addr, BUS_DMA_WAITOK); + if (error != 0) + goto out; + + qp->desc_ring = desc; + qp->completions_ring = malloc(num_descriptors * + sizeof(*qp->completions_ring), M_CCP, M_ZERO | M_WAITOK); + + /* Zero control register; among other things, clears the RUN flag. */ + qp->qcontrol = 0; + ccp_write_queue_4(sc, queue, CMD_Q_CONTROL_BASE, qp->qcontrol); + ccp_write_queue_4(sc, queue, CMD_Q_INT_ENABLE_BASE, 0); + + /* Clear any leftover interrupt status flags */ + ccp_write_queue_4(sc, queue, CMD_Q_INTERRUPT_STATUS_BASE, + ALL_INTERRUPTS); + + qp->qcontrol |= (sc->ring_size_order - 1) << CMD_Q_SIZE_SHIFT; + + ccp_write_queue_4(sc, queue, CMD_Q_TAIL_LO_BASE, + (uint32_t)qp->desc_ring_bus_addr); + ccp_write_queue_4(sc, queue, CMD_Q_HEAD_LO_BASE, + (uint32_t)qp->desc_ring_bus_addr); + + /* + * Enable completion interrupts, as well as error or administrative + * halt interrupts. We don't use administrative halts, but they + * shouldn't trip unless we do, so it ought to be harmless. + */ + ccp_write_queue_4(sc, queue, CMD_Q_INT_ENABLE_BASE, + INT_COMPLETION | INT_ERROR | INT_QUEUE_STOPPED); + + qp->qcontrol |= (qp->desc_ring_bus_addr >> 32) << CMD_Q_PTR_HI_SHIFT; + qp->qcontrol |= CMD_Q_RUN; + ccp_write_queue_4(sc, queue, CMD_Q_CONTROL_BASE, qp->qcontrol); + +out: + if (error != 0) { + if (qp->desc_ring != NULL) + bus_dmamap_unload(qp->ring_desc_tag, + qp->ring_desc_map); + if (desc != NULL) + bus_dmamem_free(qp->ring_desc_tag, desc, + qp->ring_desc_map); + if (qp->ring_desc_tag != NULL) + bus_dma_tag_destroy(qp->ring_desc_tag); + } + return (error); +} + +static void +ccp_hw_detach_queue(device_t dev, unsigned queue) +{ + struct ccp_softc *sc; + struct ccp_queue *qp; + + sc = device_get_softc(dev); + qp = &sc->queues[queue]; + + /* + * Don't bother allocating a ring for queues the host isn't allowed to + * drive. + */ + if ((sc->valid_queues & (1 << queue)) == 0) + return; + + free(qp->completions_ring, M_CCP); + bus_dmamap_unload(qp->ring_desc_tag, qp->ring_desc_map); + bus_dmamem_free(qp->ring_desc_tag, qp->desc_ring, qp->ring_desc_map); + bus_dma_tag_destroy(qp->ring_desc_tag); +} + +static int +ccp_map_pci_bar(device_t dev) +{ + struct ccp_softc *sc; + + sc = device_get_softc(dev); + + sc->pci_resource_id = PCIR_BAR(2); + sc->pci_resource = bus_alloc_resource_any(dev, SYS_RES_MEMORY, + &sc->pci_resource_id, RF_ACTIVE); + if (sc->pci_resource == NULL) { + device_printf(dev, "unable to allocate pci resource\n"); + return (ENODEV); + } + + sc->pci_resource_id_msix = PCIR_BAR(5); + sc->pci_resource_msix = bus_alloc_resource_any(dev, SYS_RES_MEMORY, + &sc->pci_resource_id_msix, RF_ACTIVE); + if (sc->pci_resource_msix == NULL) { + device_printf(dev, "unable to allocate pci resource msix\n"); + bus_release_resource(dev, SYS_RES_MEMORY, sc->pci_resource_id, + sc->pci_resource); + return (ENODEV); + } + + sc->pci_bus_tag = rman_get_bustag(sc->pci_resource); + sc->pci_bus_handle = rman_get_bushandle(sc->pci_resource); + return (0); +} + +static void +ccp_unmap_pci_bar(device_t dev) +{ + struct ccp_softc *sc; + + sc = device_get_softc(dev); + + bus_release_resource(dev, SYS_RES_MEMORY, sc->pci_resource_id_msix, + sc->pci_resource_msix); + bus_release_resource(dev, SYS_RES_MEMORY, sc->pci_resource_id, + sc->pci_resource); +} + +const static struct ccp_error_code { + uint8_t ce_code; + const char *ce_name; + int ce_errno; + const char *ce_desc; +} ccp_error_codes[] = { + { 0x01, "ILLEGAL_ENGINE", EIO, "Requested engine was invalid" }, + { 0x03, "ILLEGAL_FUNCTION_TYPE", EIO, + "A non-supported function type was specified" }, + { 0x04, "ILLEGAL_FUNCTION_MODE", EIO, + "A non-supported function mode was specified" }, + { 0x05, "ILLEGAL_FUNCTION_ENCRYPT", EIO, + "A CMAC type was specified when ENCRYPT was not specified" }, + { 0x06, "ILLEGAL_FUNCTION_SIZE", EIO, + "A non-supported function size was specified.\n" + "AES-CFB: Size was not 127 or 7;\n" + "3DES-CFB: Size was not 7;\n" + "RSA: See supported size table (7.4.2);\n" + "ECC: Size was greater than 576 bits." }, + { 0x07, "Zlib_MISSING_INIT_EOM", EIO, + "Zlib command does not have INIT and EOM set" }, + { 0x08, "ILLEGAL_FUNCTION_RSVD", EIO, + "Reserved bits in a function specification were not 0" }, + { 0x09, "ILLEGAL_BUFFER_LENGTH", EIO, + "The buffer length specified was not correct for the selected engine" + }, + { 0x0A, "VLSB_FAULT", EIO, "Illegal VLSB segment mapping:\n" + "Undefined VLSB segment mapping or\n" + "mapping to unsupported LSB segment id" }, + { 0x0B, "ILLEGAL_MEM_ADDR", EFAULT, + "The specified source/destination buffer access was illegal:\n" + "Data buffer located in a LSB location disallowed by the LSB protection masks; or\n" + "Data buffer not completely contained within a single segment; or\n" + "Pointer with Fixed=1 is not 32-bit aligned; or\n" + "Pointer with Fixed=1 attempted to reference non-AXI1 (local) memory." + }, + { 0x0C, "ILLEGAL_MEM_SEL", EIO, + "A src_mem, dst_mem, or key_mem field was illegal:\n" + "A field was set to a reserved value; or\n" + "A public command attempted to reference AXI1 (local) or GART memory; or\n" + "A Zlib command attmpted to use the LSB." }, + { 0x0D, "ILLEGAL_CONTEXT_ADDR", EIO, + "The specified context location was illegal:\n" + "Context located in a LSB location disallowed by the LSB protection masks; or\n" + "Context not completely contained within a single segment." }, + { 0x0E, "ILLEGAL_KEY_ADDR", EIO, + "The specified key location was illegal:\n" + "Key located in a LSB location disallowed by the LSB protection masks; or\n" + "Key not completely contained within a single segment." }, + { 0x12, "CMD_TIMEOUT", EIO, "A command timeout violation occurred" }, + /* XXX Could fill out these descriptions too */ + { 0x13, "IDMA0_AXI_SLVERR", EIO, "" }, + { 0x14, "IDMA0_AXI_DECERR", EIO, "" }, + { 0x16, "IDMA1_AXI_SLVERR", EIO, "" }, + { 0x17, "IDMA1_AXI_DECERR", EIO, "" }, + { 0x19, "ZLIBVHB_AXI_SLVERR", EIO, "" }, + { 0x1A, "ZLIBVHB_AXI_DECERR", EIO, "" }, + { 0x1C, "ZLIB_UNEXPECTED_EOM", EIO, "" }, + { 0x1D, "ZLIB_EXTRA_DATA", EIO, "" }, + { 0x1E, "ZLIB_BTYPE", EIO, "" }, + { 0x20, "ZLIB_UNDEFINED_DISTANCE_SYMBOL", EIO, "" }, + { 0x21, "ZLIB_CODE_LENGTH_SYMBOL", EIO, "" }, + { 0x22, "ZLIB_VHB_ILLEGAL_FETCH", EIO, "" }, + { 0x23, "ZLIB_UNCOMPRESSED_LEN", EIO, "" }, + { 0x24, "ZLIB_LIMIT_REACHED", EIO, "" }, + { 0x25, "ZLIB_CHECKSUM_MISMATCH", EIO, "" }, + { 0x26, "ODMA0_AXI_SLVERR", EIO, "" }, + { 0x27, "ODMA0_AXI_DECERR", EIO, "" }, + { 0x29, "ODMA1_AXI_SLVERR", EIO, "" }, + { 0x2A, "ODMA1_AXI_DECERR", EIO, "" }, + { 0x2B, "LSB_PARITY_ERR", EIO, + "A read from the LSB encountered a parity error" }, +}; + +static void +ccp_intr_handle_error(struct ccp_queue *qp, const struct ccp_desc *desc) +{ + struct ccp_completion_ctx *cctx; + const struct ccp_error_code *ec; + struct ccp_softc *sc; + uint32_t status, error, esource, faultblock; + unsigned q, idx; + int errno; + + sc = qp->cq_softc; + q = qp->cq_qindex; + + status = ccp_read_queue_4(sc, q, CMD_Q_STATUS_BASE); + + error = status & STATUS_ERROR_MASK; + + /* Decode error status */ + ec = NULL; + for (idx = 0; idx < nitems(ccp_error_codes); idx++) + if (ccp_error_codes[idx].ce_code == error) { + ec = &ccp_error_codes[idx]; + break; + } + + esource = (status >> STATUS_ERRORSOURCE_SHIFT) & + STATUS_ERRORSOURCE_MASK; + faultblock = (status >> STATUS_VLSB_FAULTBLOCK_SHIFT) & + STATUS_VLSB_FAULTBLOCK_MASK; + device_printf(sc->dev, "Error: %s (%u) Source: %u Faulting LSB block: %u\n", + (ec != NULL) ? ec->ce_name : "(reserved)", error, esource, + faultblock); + if (ec != NULL) + device_printf(sc->dev, "Error description: %s\n", ec->ce_desc); + + /* TODO Could format the desc nicely here */ + idx = desc - qp->desc_ring; + device_printf(sc->dev, "Bad descriptor index: %u contents: %32D\n", + idx, (const void *)desc, " "); + + /* + * TODO Per § 14.4 "Error Handling," DMA_Status, DMA_Read/Write_Status, + * Zlib Decompress status may be interesting. + */ + + while (true) { + /* Keep unused descriptors zero for next use. */ + memset(&qp->desc_ring[idx], 0, sizeof(qp->desc_ring[idx])); + + cctx = &qp->completions_ring[idx]; + + /* + * Restart procedure described in § 14.2.5. Could be used by HoC if we + * used that. + * + * Advance HEAD_LO past bad descriptor + any remaining in + * transaction manually, then restart queue. + */ + idx = (idx + 1) % (1 << sc->ring_size_order); + + /* Callback function signals end of transaction */ + if (cctx->callback_fn != NULL) { + if (ec == NULL) + errno = EIO; + else + errno = ec->ce_errno; + /* TODO More specific error code */ + cctx->callback_fn(qp, cctx->session, cctx->callback_arg, errno); + cctx->callback_fn = NULL; + break; + } + } + + qp->cq_head = idx; + qp->cq_waiting = false; + wakeup(&qp->cq_tail); + device_printf(sc->dev, "%s: wrote sw head:%u\n", __func__, + qp->cq_head); + ccp_write_queue_4(sc, q, CMD_Q_HEAD_LO_BASE, + (uint32_t)qp->desc_ring_bus_addr + (idx * Q_DESC_SIZE)); + ccp_write_queue_4(sc, q, CMD_Q_CONTROL_BASE, qp->qcontrol); + device_printf(sc->dev, "%s: Restarted queue\n", __func__); +} + +static void +ccp_intr_run_completions(struct ccp_queue *qp, uint32_t ints) +{ + struct ccp_completion_ctx *cctx; + struct ccp_softc *sc; + const struct ccp_desc *desc; + uint32_t headlo, idx; + unsigned q, completed; + + sc = qp->cq_softc; + q = qp->cq_qindex; + + mtx_lock(&qp->cq_lock); + + /* + * Hardware HEAD_LO points to the first incomplete descriptor. Process + * any submitted and completed descriptors, up to but not including + * HEAD_LO. + */ + headlo = ccp_read_queue_4(sc, q, CMD_Q_HEAD_LO_BASE); + idx = (headlo - (uint32_t)qp->desc_ring_bus_addr) / Q_DESC_SIZE; + + device_printf(sc->dev, "%s: hw head:%u sw head:%u\n", __func__, idx, + qp->cq_head); + completed = 0; + while (qp->cq_head != idx) { + device_printf(sc->dev, "%s: completing:%u\n", __func__, + qp->cq_head); + + cctx = &qp->completions_ring[qp->cq_head]; + if (cctx->callback_fn != NULL) { + cctx->callback_fn(qp, cctx->session, + cctx->callback_arg, 0); + cctx->callback_fn = NULL; + } + + /* Keep unused descriptors zero for next use. */ + memset(&qp->desc_ring[qp->cq_head], 0, + sizeof(qp->desc_ring[qp->cq_head])); + + qp->cq_head = (qp->cq_head + 1) % (1 << sc->ring_size_order); + completed++; + } + if (completed > 0) { + qp->cq_waiting = false; + wakeup(&qp->cq_tail); + } + + device_printf(sc->dev, "%s: wrote sw head:%u\n", __func__, + qp->cq_head); + + /* + * Desc points to the first incomplete descriptor, at the time we read + * HEAD_LO. If there was an error flagged in interrupt status, the HW + * will not proceed past the erroneous descriptor by itself. + */ + desc = &qp->desc_ring[idx]; + if ((ints & INT_ERROR) != 0) + ccp_intr_handle_error(qp, desc); + + mtx_unlock(&qp->cq_lock); +} + +static void +ccp_intr_handler(void *arg) +{ + struct ccp_softc *sc = arg; + size_t i; + uint32_t ints; + + device_printf(sc->dev, "%s: interrupt\n", __func__); + + /* + * We get one global interrupt per PCI device, shared over all of + * its queues. Scan each valid queue on interrupt for flags indicating + * activity. + */ + for (i = 0; i < nitems(sc->queues); i++) { + if ((sc->valid_queues & (1 << i)) == 0) + continue; + + ints = ccp_read_queue_4(sc, i, CMD_Q_INTERRUPT_STATUS_BASE); + if (ints == 0) + continue; + +#if 0 + device_printf(sc->dev, "%s: %x interrupts on queue %zu\n", + __func__, (unsigned)ints, i); +#endif + /* Write back 1s to clear interrupt status bits. */ + ccp_write_queue_4(sc, i, CMD_Q_INTERRUPT_STATUS_BASE, ints); + + /* + * If there was an error, we still need to run completions on + * any descriptors prior to the error. The completions handler + * invoked below will also handle the error descriptor. + */ + if ((ints & (INT_COMPLETION | INT_ERROR)) != 0) + ccp_intr_run_completions(&sc->queues[i], ints); + + if ((ints & INT_QUEUE_STOPPED) != 0) + device_printf(sc->dev, "%s: queue %zu stopped\n", + __func__, i); + } + + /* Re-enable interrupts after processing */ + for (i = 0; i < nitems(sc->queues); i++) { + if ((sc->valid_queues & (1 << i)) == 0) + continue; + ccp_write_queue_4(sc, i, CMD_Q_INT_ENABLE_BASE, + INT_COMPLETION | INT_ERROR | INT_QUEUE_STOPPED); + } +} + +static int +ccp_intr_filter(void *arg) +{ + struct ccp_softc *sc = arg; + size_t i; + + /* TODO: Split individual queues into separate taskqueues? */ + for (i = 0; i < nitems(sc->queues); i++) { + if ((sc->valid_queues & (1 << i)) == 0) + continue; + + /* Mask interrupt until task completes */ + ccp_write_queue_4(sc, i, CMD_Q_INT_ENABLE_BASE, 0); + } + + return (FILTER_SCHEDULE_THREAD); +} + +static int +ccp_setup_interrupts(struct ccp_softc *sc) +{ + uint32_t nvec; + int rid, error, n, ridcopy; + + n = pci_msix_count(sc->dev); + if (n < 1) { + device_printf(sc->dev, "XXX %s: msix_count: %d\n", __func__, n); + return (ENXIO); + } + + nvec = n; + error = pci_alloc_msix(sc->dev, &nvec); + if (error != 0) { + device_printf(sc->dev, "XXX %s: alloc_msix error: %d\n", + __func__, error); + return (error); + } + if (nvec < 1) { + device_printf(sc->dev, "XXX %s: alloc_msix: 0 vectors\n", + __func__); + return (ENXIO); + } + if (nvec > nitems(sc->intr_res)) { + device_printf(sc->dev, "XXX %s: too many vectors: %u\n", + __func__, nvec); + nvec = nitems(sc->intr_res); + } + + for (rid = 1; rid < 1 + nvec; rid++) { + ridcopy = rid; + sc->intr_res[rid - 1] = bus_alloc_resource_any(sc->dev, + SYS_RES_IRQ, &ridcopy, RF_ACTIVE); + if (sc->intr_res[rid - 1] == NULL) { + device_printf(sc->dev, "%s: Failed to alloc IRQ resource\n", + __func__); + return (ENXIO); + } + + sc->intr_tag[rid - 1] = NULL; + error = bus_setup_intr(sc->dev, sc->intr_res[rid - 1], + INTR_MPSAFE | INTR_TYPE_MISC, ccp_intr_filter, + ccp_intr_handler, sc, &sc->intr_tag[rid - 1]); + if (error != 0) + device_printf(sc->dev, "%s: setup_intr: %d\n", + __func__, error); + } + sc->intr_count = nvec; + + return (error); +} + +static void +ccp_release_interrupts(struct ccp_softc *sc) +{ + unsigned i; + + for (i = 0; i < sc->intr_count; i++) { + if (sc->intr_tag[i] != NULL) + bus_teardown_intr(sc->dev, sc->intr_res[i], + sc->intr_tag[i]); + if (sc->intr_res[i] != NULL) + bus_release_resource(sc->dev, SYS_RES_IRQ, + rman_get_rid(sc->intr_res[i]), sc->intr_res[i]); + } + + pci_release_msi(sc->dev); +} + +int +ccp_hw_attach(device_t dev) +{ + struct ccp_softc *sc; + uint64_t lsbmask; + uint32_t version, lsbmasklo, lsbmaskhi; + unsigned queue_idx, j; + int error; + bool bars_mapped, interrupts_setup; + + queue_idx = 0; + bars_mapped = interrupts_setup = false; + sc = device_get_softc(dev); + + error = ccp_map_pci_bar(dev); + if (error != 0) { + device_printf(dev, "XXX%s: couldn't map BAR(s)\n", __func__); + goto out; + } + bars_mapped = true; + + error = pci_enable_busmaster(dev); + if (error != 0) { + device_printf(dev, "XXX%s: couldn't enable busmaster\n", + __func__); + goto out; + } + + sc->ring_size_order = g_ccp_ring_order; + if (sc->ring_size_order < 6 || sc->ring_size_order > 16) { + device_printf(dev, "bogus hw.ccp.ring_order\n"); + error = EINVAL; + goto out; + } + sc->valid_queues = ccp_read_4(sc, CMD_QUEUE_MASK_OFFSET); + + version = ccp_read_4(sc, VERSION_REG); + if ((version & VERSION_NUM_MASK) < 5) { + device_printf(dev, + "driver supports version 5 and later hardware\n"); + error = ENXIO; + goto out; + } + + error = ccp_setup_interrupts(sc); + if (error != 0) + goto out; + interrupts_setup = true; + + sc->hw_version = version & VERSION_NUM_MASK; + sc->num_queues = (version >> VERSION_NUMVQM_SHIFT) & + VERSION_NUMVQM_MASK; + sc->num_lsb_entries = (version >> VERSION_LSBSIZE_SHIFT) & + VERSION_LSBSIZE_MASK; + sc->hw_features = version & VERSION_CAP_MASK; + + /* + * Copy private LSB mask to public registers to enable access to LSB + * from all queues allowed by BIOS. + */ + lsbmasklo = ccp_read_4(sc, LSB_PRIVATE_MASK_LO_OFFSET); + lsbmaskhi = ccp_read_4(sc, LSB_PRIVATE_MASK_HI_OFFSET); + ccp_write_4(sc, LSB_PUBLIC_MASK_LO_OFFSET, lsbmasklo); + ccp_write_4(sc, LSB_PUBLIC_MASK_HI_OFFSET, lsbmaskhi); + + lsbmask = ((uint64_t)lsbmaskhi << 30) | lsbmasklo; + device_printf(dev, "XXX%s: 2\n", __func__); + + for (; queue_idx < nitems(sc->queues); queue_idx++) { + error = ccp_hw_attach_queue(dev, lsbmask, queue_idx); + if (error != 0) { + device_printf(dev, "XXX%s: couldn't attach queue %u\n", + __func__, queue_idx); + goto out; + } + } + ccp_assign_lsb_regions(sc, lsbmask); + device_printf(dev, "XXX%s: 3\n", __func__); + +out: + if (error != 0) { + if (interrupts_setup) + ccp_release_interrupts(sc); + for (j = 0; j < queue_idx; j++) + ccp_hw_detach_queue(dev, j); + if (sc->ring_size_order != 0) + pci_disable_busmaster(dev); + if (bars_mapped) + ccp_unmap_pci_bar(dev); + } + return (error); +} + +void +ccp_hw_detach(device_t dev) +{ + struct ccp_softc *sc; + unsigned i; + + sc = device_get_softc(dev); + + for (i = 0; i < nitems(sc->queues); i++) + ccp_hw_detach_queue(dev, i); + + ccp_release_interrupts(sc); + pci_disable_busmaster(dev); + ccp_unmap_pci_bar(dev); +} + +static int __must_check +ccp_passthrough(struct ccp_queue *qp, bus_addr_t dst, + enum ccp_memtype dst_type, bus_addr_t src, enum ccp_memtype src_type, + bus_size_t len, enum ccp_passthru_byteswap swapmode, + enum ccp_passthru_bitwise bitmode, bool interrupt, + const struct ccp_completion_ctx *cctx) +{ + struct ccp_desc *desc; + + if (ccp_queue_get_ring_space(qp) == 0) + return (EAGAIN); + + desc = &qp->desc_ring[qp->cq_tail]; + + memset(desc, 0, sizeof(*desc)); + desc->engine = CCP_ENGINE_PASSTHRU; + + desc->pt.ioc = interrupt; + desc->pt.byteswap = swapmode; + desc->pt.bitwise = bitmode; + desc->length = len; + + desc->src_lo = (uint32_t)src; + desc->src_hi = src >> 32; + desc->src_mem = src_type; + + desc->dst_lo = (uint32_t)dst; + desc->dst_hi = dst >> 32; + desc->dst_mem = dst_type; + + if (bitmode != CCP_PASSTHRU_BITWISE_NOOP) + desc->lsb_ctx_id = ccp_queue_lsb_entry(qp, LSB_ENTRY_KEY); + + if (cctx != NULL) + memcpy(&qp->completions_ring[qp->cq_tail], cctx, sizeof(*cctx)); + + qp->cq_tail = (qp->cq_tail + 1) % (1 << qp->cq_softc->ring_size_order); + return (0); +} + +static int __must_check +ccp_passthrough_sgl(struct ccp_queue *qp, bus_addr_t lsb_addr, bool tolsb, + struct sglist *sgl, bus_size_t len, bool interrupt, + const struct ccp_completion_ctx *cctx) +{ + struct sglist_seg *seg; + size_t i, remain, nb; + int error; + + remain = len; + for (i = 0; i < sgl->sg_nseg && remain != 0; i++) { + seg = &sgl->sg_segs[i]; + /* crd_len is int, so 32-bit min() is ok. */ + nb = min(remain, seg->ss_len); + + if (tolsb) + error = ccp_passthrough(qp, lsb_addr, CCP_MEMTYPE_SB, + seg->ss_paddr, CCP_MEMTYPE_SYSTEM, nb, + CCP_PASSTHRU_BYTESWAP_NOOP, + CCP_PASSTHRU_BITWISE_NOOP, + (nb == remain) && interrupt, cctx); + else + error = ccp_passthrough(qp, seg->ss_paddr, + CCP_MEMTYPE_SYSTEM, lsb_addr, CCP_MEMTYPE_SB, nb, + CCP_PASSTHRU_BYTESWAP_NOOP, + CCP_PASSTHRU_BITWISE_NOOP, + (nb == remain) && interrupt, cctx); + if (error != 0) + return (error); + + remain -= nb; + } + return (0); +} + +/* + * Note that these vectors are in reverse of the usual order. + */ +const struct SHA_vectors { + uint32_t SHA1[8]; + uint32_t SHA224[8]; + uint32_t SHA256[8]; + uint64_t SHA384[8]; + uint64_t SHA512[8]; +} SHA_H __aligned(PAGE_SIZE) = { + .SHA1 = { + 0xc3d2e1f0ul, + 0x10325476ul, + 0x98badcfeul, + 0xefcdab89ul, + 0x67452301ul, + 0, + 0, + 0, + }, + .SHA224 = { + 0xbefa4fa4ul, + 0x64f98fa7ul, + 0x68581511ul, + 0xffc00b31ul, + 0xf70e5939ul, + 0x3070dd17ul, + 0x367cd507ul, + 0xc1059ed8ul, + }, + .SHA256 = { + 0x5be0cd19ul, + 0x1f83d9abul, + 0x9b05688cul, + 0x510e527ful, + 0xa54ff53aul, + 0x3c6ef372ul, + 0xbb67ae85ul, + 0x6a09e667ul, + }, + .SHA384 = { + 0x47b5481dbefa4fa4ull, + 0xdb0c2e0d64f98fa7ull, + 0x8eb44a8768581511ull, + 0x67332667ffc00b31ull, + 0x152fecd8f70e5939ull, + 0x9159015a3070dd17ull, + 0x629a292a367cd507ull, + 0xcbbb9d5dc1059ed8ull, + }, + .SHA512 = { + 0x5be0cd19137e2179ull, + 0x1f83d9abfb41bd6bull, + 0x9b05688c2b3e6c1full, + 0x510e527fade682d1ull, + 0xa54ff53a5f1d36f1ull, + 0x3c6ef372fe94f82bull, + 0xbb67ae8584caa73bull, + 0x6a09e667f3bcc908ull, + }, +}; +/* Ensure vectors do not cross a page boundary. */ +CTASSERT(PAGE_SIZE - ((uintptr_t)&SHA_H % PAGE_SIZE) >= sizeof(SHA_H)); + +const struct SHA_Defn { + enum sha_version version; + const void *H_vectors; + size_t H_size; + struct auth_hash *axf; + enum ccp_sha_type engine_type; +} SHA_definitions[] = { + { + .version = SHA1, + .H_vectors = SHA_H.SHA1, + .H_size = sizeof(SHA_H.SHA1), + .axf = &auth_hash_hmac_sha1, + .engine_type = CCP_SHA_TYPE_1, + }, +#if 0 + { + .version = SHA2_224, + .H_vectors = SHA_H.SHA224, + .H_size = sizeof(SHA_H.SHA224), + .axf = &auth_hash_hmac_sha2_224, + .engine_type = CCP_SHA_TYPE_224, + }, +#endif + { + .version = SHA2_256, + .H_vectors = SHA_H.SHA256, + .H_size = sizeof(SHA_H.SHA256), + .axf = &auth_hash_hmac_sha2_256, + .engine_type = CCP_SHA_TYPE_256, + }, + { + .version = SHA2_384, + .H_vectors = SHA_H.SHA384, + .H_size = sizeof(SHA_H.SHA384), + .axf = &auth_hash_hmac_sha2_384, + .engine_type = CCP_SHA_TYPE_384, + }, + { + .version = SHA2_512, + .H_vectors = SHA_H.SHA512, + .H_size = sizeof(SHA_H.SHA512), + .axf = &auth_hash_hmac_sha2_512, + .engine_type = CCP_SHA_TYPE_512, + }, +}; + +static int __must_check +ccp_sha_single_desc(struct ccp_queue *qp, const struct SHA_Defn *defn, + vm_paddr_t addr, size_t len, bool start, bool end, uint64_t msgbits) +{ + struct ccp_desc *desc; + + if (ccp_queue_get_ring_space(qp) == 0) + return (EAGAIN); + + desc = &qp->desc_ring[qp->cq_tail]; + + memset(desc, 0, sizeof(*desc)); + desc->engine = CCP_ENGINE_SHA; + desc->som = start; + desc->eom = end; + + desc->sha.type = defn->engine_type; + desc->length = len; + + if (end) { + desc->sha_len_lo = (uint32_t)msgbits; + desc->sha_len_hi = msgbits >> 32; + } + + desc->src_lo = (uint32_t)addr; + desc->src_hi = addr >> 32; + desc->src_mem = CCP_MEMTYPE_SYSTEM; + + desc->lsb_ctx_id = ccp_queue_lsb_entry(qp, LSB_ENTRY_SHA); + + qp->cq_tail = (qp->cq_tail + 1) % (1 << qp->cq_softc->ring_size_order); + return (0); +} + +static int __must_check +ccp_sha(struct ccp_queue *qp, enum sha_version version, struct sglist *sgl_src, + struct sglist *sgl_dst, const struct ccp_completion_ctx *cctx) +{ + const struct SHA_Defn *defn; + struct sglist_seg *seg; + size_t i, msgsize, remaining, nb; + uint32_t lsbaddr; + int error; + + for (i = 0; i < nitems(SHA_definitions); i++) + if (SHA_definitions[i].version == version) + break; + if (i == nitems(SHA_definitions)) + return (EINVAL); + defn = &SHA_definitions[i]; + + /* XXX validate input ??? */ + + /* Load initial SHA state into LSB */ + /* XXX ensure H_vectors don't span page boundaries */ + error = ccp_passthrough(qp, ccp_queue_lsb_address(qp, LSB_ENTRY_SHA), + CCP_MEMTYPE_SB, pmap_kextract((vm_offset_t)defn->H_vectors), + CCP_MEMTYPE_SYSTEM, roundup2(defn->H_size, LSB_ENTRY_SIZE), + CCP_PASSTHRU_BYTESWAP_NOOP, CCP_PASSTHRU_BITWISE_NOOP, false, + NULL); + if (error != 0) + return (error); + + /* Execute series of SHA updates on correctly sized buffers */ + msgsize = 0; + for (i = 0; i < sgl_src->sg_nseg; i++) { + seg = &sgl_src->sg_segs[i]; + msgsize += seg->ss_len; + error = ccp_sha_single_desc(qp, defn, seg->ss_paddr, + seg->ss_len, i == 0, i == sgl_src->sg_nseg - 1, + msgsize << 3); + if (error != 0) + return (error); + } + + /* Copy result out to sgl_dst */ + remaining = roundup2(defn->H_size, LSB_ENTRY_SIZE); + lsbaddr = ccp_queue_lsb_address(qp, LSB_ENTRY_SHA); + for (i = 0; i < sgl_dst->sg_nseg; i++) { + seg = &sgl_dst->sg_segs[i]; + /* crd_len is int, so 32-bit min() is ok. */ + nb = min(remaining, seg->ss_len); + + error = ccp_passthrough(qp, seg->ss_paddr, CCP_MEMTYPE_SYSTEM, + lsbaddr, CCP_MEMTYPE_SB, nb, CCP_PASSTHRU_BYTESWAP_NOOP, + CCP_PASSTHRU_BITWISE_NOOP, + (cctx != NULL) ? (nb == remaining) : false, + (nb == remaining) ? cctx : NULL); + if (error != 0) + return (error); + + remaining -= nb; + lsbaddr += nb; + if (remaining == 0) + break; + } + + return (0); +} + +static void +byteswap256(uint64_t *buffer) +{ + uint64_t t; + + t = bswap64(buffer[3]); + buffer[3] = bswap64(buffer[0]); + buffer[0] = t; + + t = bswap64(buffer[2]); + buffer[2] = bswap64(buffer[1]); + buffer[1] = t; +} + +/* + * Translate CCP internal LSB hash format into a standard hash ouput. + * + * Manipulates input buffer with byteswap256 operation. + */ +static void +ccp_sha_copy_result(char *output, char *buffer, enum sha_version version) +{ + const struct SHA_Defn *defn; + size_t i; + + for (i = 0; i < nitems(SHA_definitions); i++) + if (SHA_definitions[i].version == version) + break; + if (i == nitems(SHA_definitions)) + panic("bogus sha version auth_mode %u\n", (unsigned)version); + + defn = &SHA_definitions[i]; + + /* Swap 256bit manually -- DMA engine can, but with limitations */ + byteswap256((void *)buffer); + if (defn->axf->hashsize > LSB_ENTRY_SIZE) + byteswap256((void *)(buffer + LSB_ENTRY_SIZE)); + + switch (defn->version) { + case SHA1: + memcpy(output, buffer + 12, defn->axf->hashsize); + break; +#if 0 + case SHA2_224: + memcpy(output, buffer + XXX, defn->axf->hashsize); + break; +#endif + case SHA2_256: + memcpy(output, buffer, defn->axf->hashsize); + break; + case SHA2_384: + memcpy(output, + buffer + LSB_ENTRY_SIZE * 3 - defn->axf->hashsize, + defn->axf->hashsize - LSB_ENTRY_SIZE); + memcpy(output + defn->axf->hashsize - LSB_ENTRY_SIZE, buffer, + LSB_ENTRY_SIZE); + break; + case SHA2_512: + memcpy(output, buffer + LSB_ENTRY_SIZE, LSB_ENTRY_SIZE); + memcpy(output + LSB_ENTRY_SIZE, buffer, LSB_ENTRY_SIZE); + break; + } +} + +static void +ccp_do_hmac_done(struct ccp_queue *qp, struct ccp_session *s, + struct cryptop *crp, struct cryptodesc *crd, int error) +{ + char ihash[SHA2_512_HASH_LEN /* max hash len */]; + union authctx auth_ctx; + struct auth_hash *axf; + + axf = s->hmac.auth_hash; + + s->pending--; + + if (error != 0) { + crp->crp_etype = error; + goto out; + } + + /* Do remaining outer hash over small inner hash in software */ + axf->Init(&auth_ctx); + axf->Update(&auth_ctx, s->hmac.opad, axf->blocksize); + ccp_sha_copy_result(ihash, s->hmac.ipad, s->hmac.auth_mode); +#if 0 + device_printf(dev, "%s sha intermediate=%64D\n", __func__, + (u_char *)ihash, " "); +#endif + axf->Update(&auth_ctx, ihash, axf->hashsize); + axf->Final(s->hmac.ipad, &auth_ctx); + + crypto_copyback(crp->crp_flags, crp->crp_buf, crd->crd_inject, + s->hmac.hash_len, s->hmac.ipad); + + /* Avoid leaking key material */ + explicit_bzero(&auth_ctx, sizeof(auth_ctx)); + explicit_bzero(s->hmac.ipad, sizeof(s->hmac.ipad)); + explicit_bzero(s->hmac.opad, sizeof(s->hmac.opad)); + +out: + crypto_done(crp); +} + +static void +ccp_hmac_done(struct ccp_queue *qp, struct ccp_session *s, void *vcrp, + int error) +{ + struct cryptodesc *crd; + struct cryptop *crp; + + crp = vcrp; + crd = crp->crp_desc; + ccp_do_hmac_done(qp, s, crp, crd, error); +} + +static int __must_check +ccp_do_hmac(struct ccp_queue *qp, struct ccp_session *s, struct cryptop *crp, + struct cryptodesc *crd, const struct ccp_completion_ctx *cctx) +{ + device_t dev; + struct auth_hash *axf; + int error; + + dev = qp->cq_softc->dev; + axf = s->hmac.auth_hash; + + /* + * Populate the SGL describing inside hash contents. We want to hash + * the ipad (key XOR fixed bit pattern) concatenated with the user + * data. + */ + sglist_reset(qp->cq_sg_ulptx); + error = sglist_append(qp->cq_sg_ulptx, s->hmac.ipad, axf->blocksize); + if (error != 0) + return (error); + error = sglist_append_sglist(qp->cq_sg_ulptx, qp->cq_sg_crp, + crd->crd_skip, crd->crd_len); + if (error != 0) { + device_printf(dev, "%s: sglist too short\n", __func__); + return (error); + } + /* Populate SGL for output -- just reuse hmac.ipad buffer. */ + sglist_reset(qp->cq_sg_dst); + error = sglist_append(qp->cq_sg_dst, s->hmac.ipad, + roundup2(axf->hashsize, LSB_ENTRY_SIZE)); + if (error != 0) + return (error); + + error = ccp_sha(qp, s->hmac.auth_mode, qp->cq_sg_ulptx, qp->cq_sg_dst, + cctx); + if (error != 0) { + device_printf(dev, "%s: ccp_sha error\n", __func__); + return (error); + } + return (0); +} + +int __must_check +ccp_hmac(struct ccp_queue *qp, struct ccp_session *s, struct cryptop *crp) +{ + struct ccp_completion_ctx ctx; + struct cryptodesc *crd; + + crd = crp->crp_desc; + + ctx.callback_fn = ccp_hmac_done; + ctx.callback_arg = crp; + ctx.session = s; + + return (ccp_do_hmac(qp, s, crp, crd, &ctx)); +} + +static void +ccp_byteswap(char *data, size_t len) +{ + size_t i; + char t; + + len--; + for (i = 0; i < len; i++, len--) { + t = data[i]; + data[i] = data[len]; + data[len] = t; + } +} + +static void +ccp_blkcipher_done(struct ccp_queue *qp, struct ccp_session *s, void *vcrp, + int error) +{ + struct cryptop *crp; + + explicit_bzero(&s->blkcipher, sizeof(s->blkcipher)); + + crp = vcrp; + + s->pending--; + + if (error != 0) + crp->crp_etype = error; + + device_printf(qp->cq_softc->dev, "XXX %s: qp=%p crp=%p\n", __func__, + qp, crp); + crypto_done(crp); +} + +static void +ccp_collect_iv(struct ccp_session *s, struct cryptop *crp, + struct cryptodesc *crd) +{ + + if (crd->crd_flags & CRD_F_ENCRYPT) { + if (crd->crd_flags & CRD_F_IV_EXPLICIT) + memcpy(s->blkcipher.iv, crd->crd_iv, + s->blkcipher.iv_len); + else + arc4rand(s->blkcipher.iv, s->blkcipher.iv_len, 0); + if ((crd->crd_flags & CRD_F_IV_PRESENT) == 0) + crypto_copyback(crp->crp_flags, crp->crp_buf, + crd->crd_inject, s->blkcipher.iv_len, + s->blkcipher.iv); + } else { + if (crd->crd_flags & CRD_F_IV_EXPLICIT) + memcpy(s->blkcipher.iv, crd->crd_iv, + s->blkcipher.iv_len); + else + crypto_copydata(crp->crp_flags, crp->crp_buf, + crd->crd_inject, s->blkcipher.iv_len, + s->blkcipher.iv); + } + + /* + * If the input IV is 12 bytes, append an explicit counter of 1. + */ + if (crd->crd_alg == CRYPTO_AES_NIST_GCM_16 && + s->blkcipher.iv_len == 12) { + *(uint32_t *)&s->blkcipher.iv[12] = htobe32(1); + s->blkcipher.iv_len = AES_BLOCK_LEN; + } + + /* Reverse order of IV material for HW */ + printf("ccpX: YYY %s: IV: %16D len: %u\n", __func__, s->blkcipher.iv, + " ", s->blkcipher.iv_len); + ccp_byteswap(s->blkcipher.iv, s->blkcipher.iv_len); +} + +static int __must_check +ccp_do_pst_to_lsb(struct ccp_queue *qp, uint32_t lsbaddr, const void *src, + size_t len) +{ + int error; + + sglist_reset(qp->cq_sg_ulptx); + error = sglist_append(qp->cq_sg_ulptx, __DECONST(void *, src), len); + if (error != 0) + return (error); + + error = ccp_passthrough_sgl(qp, lsbaddr, true, qp->cq_sg_ulptx, len, + false, NULL); + return (error); +} + +static int __must_check +ccp_do_blkcipher(struct ccp_queue *qp, struct ccp_session *s, + struct cryptop *crp, struct cryptodesc *crd, + const struct ccp_completion_ctx *cctx) +{ + struct ccp_desc *desc; + char *keydata; + device_t dev; + enum ccp_cipher_dir dir; + int error; + size_t keydata_len; + unsigned i; + + dev = qp->cq_softc->dev; + + if (s->blkcipher.key_len == 0 || crd->crd_len == 0) { + device_printf(dev, "%s: empty\n", __func__); + return (EINVAL); + } + if ((crd->crd_len % AES_BLOCK_LEN) != 0) { + device_printf(dev, "%s: len modulo: %d\n", __func__, + crd->crd_len); + return (EINVAL); + } + + /* + * Individual segments must be multiples of AES block size for the HW + * to process it. Non-compliant inputs aren't bogus, just not doable + * on this hardware. + */ + for (i = 0; i < qp->cq_sg_crp->sg_nseg; i++) + if ((qp->cq_sg_crp->sg_segs[i].ss_len % AES_BLOCK_LEN) != 0) { + device_printf(dev, "%s: seg modulo: %zu\n", __func__, + qp->cq_sg_crp->sg_segs[i].ss_len); + return (EINVAL); + } + + /* Gather IV/nonce data */ + ccp_collect_iv(s, crp, crd); + + if ((crd->crd_flags & CRD_F_ENCRYPT) != 0) + dir = CCP_CIPHER_DIR_ENCRYPT; + else + dir = CCP_CIPHER_DIR_DECRYPT; + + /* Set up passthrough op(s) to copy IV into LSB */ + error = ccp_do_pst_to_lsb(qp, ccp_queue_lsb_address(qp, LSB_ENTRY_IV), + s->blkcipher.iv, s->blkcipher.iv_len); + if (error != 0) + return (error); + + /* + * Initialize keydata and keydata_len for GCC. The default case of the + * following switch is impossible to reach, but GCC doesn't know that. + */ + keydata_len = 0; + keydata = NULL; + + switch (crd->crd_alg) { + case CRYPTO_AES_CBC: + case CRYPTO_AES_ICM: + keydata = s->blkcipher.enckey; + keydata_len = s->blkcipher.key_len; + break; + /* XXX deal with XTS */ +#if 0 + case CRYPTO_AES_XTS: + key_half = s->blkcipher.key_len / 2; + memcpy(crwr->key_ctx.key, s->blkcipher.enckey + key_half, + key_half); + if (crd->crd_flags & CRD_F_ENCRYPT) + memcpy(crwr->key_ctx.key + key_half, + s->blkcipher.enckey, key_half); + else + memcpy(crwr->key_ctx.key + key_half, + s->blkcipher.deckey, key_half); + break; +#endif + } + + /* Reverse order of key material for HW */ + device_printf(dev, "YYY %s: KEY: %16D\n", __func__, keydata, " "); + ccp_byteswap(keydata, keydata_len); + + /* Store key material into LSB to avoid page boundaries */ + error = ccp_do_pst_to_lsb(qp, ccp_queue_lsb_address(qp, LSB_ENTRY_KEY), + keydata, keydata_len); + if (error != 0) + return (error); + + /* + * Point SGLs at the subset of cryptop buffer contents representing the + * data. + */ + sglist_reset(qp->cq_sg_ulptx); + error = sglist_append_sglist(qp->cq_sg_ulptx, qp->cq_sg_crp, + crd->crd_skip, crd->crd_len); + if (error != 0) + return (error); + + device_printf(dev, "YYY %s: Contents: %16D\n", __func__, + (void *)PHYS_TO_DMAP(qp->cq_sg_ulptx->sg_segs[0].ss_paddr), " "); + + device_printf(dev, "XXX %s: starting AES ops @ %u\n", __func__, + qp->cq_tail); + + if (ccp_queue_get_ring_space(qp) < qp->cq_sg_ulptx->sg_nseg) + return (EAGAIN); + + for (i = 0; i < qp->cq_sg_ulptx->sg_nseg; i++) { + struct sglist_seg *seg; + + seg = &qp->cq_sg_ulptx->sg_segs[i]; + + desc = &qp->desc_ring[qp->cq_tail]; + desc->engine = CCP_ENGINE_AES; + desc->som = (i == 0); + desc->eom = (i == qp->cq_sg_ulptx->sg_nseg - 1); + desc->ioc = (desc->eom && cctx != NULL); + device_printf(dev, "XXX %s: AES %u: som:%d eom:%d ioc:%d dir:%d\n", __func__, + qp->cq_tail, (int)desc->som, (int)desc->eom, (int)desc->ioc, (int)dir); + + if (desc->ioc) + memcpy(&qp->completions_ring[qp->cq_tail], cctx, + sizeof(*cctx)); + + desc->aes.encrypt = dir; + desc->aes.mode = s->blkcipher.cipher_mode; + desc->aes.type = s->blkcipher.cipher_type; + if (crd->crd_alg == CRYPTO_AES_ICM) + /* + * Size of CTR value in bits, - 1. ICM mode uses all + * 128 bits as counter. + */ + desc->aes.size = 127; + + device_printf(dev, "XXX %s: AES %u: mode:%u type:%u size:%u\n", __func__, + qp->cq_tail, (unsigned)desc->aes.mode, (unsigned)desc->aes.type, (unsigned)desc->aes.size); + + desc->length = seg->ss_len; + desc->src_lo = (uint32_t)seg->ss_paddr; + desc->src_hi = (seg->ss_paddr >> 32); + desc->src_mem = CCP_MEMTYPE_SYSTEM; + + /* Crypt in-place */ + desc->dst_lo = desc->src_lo; + desc->dst_hi = desc->src_hi; + desc->dst_mem = desc->src_mem; + + desc->key_lo = ccp_queue_lsb_address(qp, LSB_ENTRY_KEY); + desc->key_hi = 0; + desc->key_mem = CCP_MEMTYPE_SB; + + desc->lsb_ctx_id = ccp_queue_lsb_entry(qp, LSB_ENTRY_IV); + + qp->cq_tail = (qp->cq_tail + 1) % + (1 << qp->cq_softc->ring_size_order); + } + return (0); +} + +int __must_check +ccp_blkcipher(struct ccp_queue *qp, struct ccp_session *s, struct cryptop *crp) +{ + struct ccp_completion_ctx ctx; + struct cryptodesc *crd; + + crd = crp->crp_desc; + + ctx.callback_fn = ccp_blkcipher_done; + ctx.session = s; + ctx.callback_arg = crp; + + return (ccp_do_blkcipher(qp, s, crp, crd, &ctx)); +} + +static void +ccp_authenc_done(struct ccp_queue *qp, struct ccp_session *s, void *vcrp, + int error) +{ + struct cryptodesc *crda; + struct cryptop *crp; + + explicit_bzero(&s->blkcipher, sizeof(s->blkcipher)); + + crp = vcrp; + if (s->cipher_first) + crda = crp->crp_desc->crd_next; + else + crda = crp->crp_desc; + + ccp_do_hmac_done(qp, s, crp, crda, error); +} + +int __must_check +ccp_authenc(struct ccp_queue *qp, struct ccp_session *s, struct cryptop *crp, + struct cryptodesc *crda, struct cryptodesc *crde) +{ + struct ccp_completion_ctx ctx; + int error; + + ctx.callback_fn = ccp_authenc_done; + ctx.session = s; + ctx.callback_arg = crp; + + /* Perform first operation */ + if (s->cipher_first) + error = ccp_do_blkcipher(qp, s, crp, crde, NULL); + else + error = ccp_do_hmac(qp, s, crp, crda, NULL); + if (error != 0) + return (error); + + /* Perform second operation */ + if (s->cipher_first) + error = ccp_do_hmac(qp, s, crp, crda, &ctx); + else + error = ccp_do_blkcipher(qp, s, crp, crde, &ctx); + return (error); +} + +static int __must_check +ccp_do_ghash_aad(struct ccp_queue *qp, struct ccp_session *s) +{ + struct ccp_desc *desc; + struct sglist_seg *seg; + unsigned i; + + if (ccp_queue_get_ring_space(qp) < qp->cq_sg_ulptx->sg_nseg) + return (EAGAIN); + + for (i = 0; i < qp->cq_sg_ulptx->sg_nseg; i++) { + seg = &qp->cq_sg_ulptx->sg_segs[i]; + + desc = &qp->desc_ring[qp->cq_tail]; + + desc->engine = CCP_ENGINE_AES; + desc->aes.mode = CCP_AES_MODE_GHASH; + desc->aes.type = s->blkcipher.cipher_type; + desc->aes.encrypt = CCP_AES_MODE_GHASH_AAD; + + desc->som = (i == 0); + desc->length = seg->ss_len; + + desc->src_lo = (uint32_t)seg->ss_paddr; + desc->src_hi = (seg->ss_paddr >> 32); + desc->src_mem = CCP_MEMTYPE_SYSTEM; + + desc->lsb_ctx_id = ccp_queue_lsb_entry(qp, LSB_ENTRY_IV); + + desc->key_lo = ccp_queue_lsb_address(qp, LSB_ENTRY_KEY); + desc->key_mem = CCP_MEMTYPE_SB; + + qp->cq_tail = (qp->cq_tail + 1) % + (1 << qp->cq_softc->ring_size_order); + } + return (0); +} + +static int __must_check +ccp_do_gctr(struct ccp_queue *qp, struct ccp_session *s, + enum ccp_cipher_dir dir, struct sglist_seg *seg, bool som, bool eom) +{ + struct ccp_desc *desc; + + if (ccp_queue_get_ring_space(qp) == 0) + return (EAGAIN); + + desc = &qp->desc_ring[qp->cq_tail]; + + desc->engine = CCP_ENGINE_AES; + desc->aes.mode = CCP_AES_MODE_GCTR; + desc->aes.type = s->blkcipher.cipher_type; + desc->aes.encrypt = dir; + desc->aes.size = 8 * (seg->ss_len % GMAC_BLOCK_LEN) - 1; + + desc->som = som; + desc->eom = eom; + + /* Trailing bytes will be masked off by aes.size above. */ + desc->length = roundup2(seg->ss_len, GMAC_BLOCK_LEN); + + desc->dst_lo = desc->src_lo = (uint32_t)seg->ss_paddr; + desc->dst_hi = desc->src_hi = seg->ss_paddr >> 32; + desc->dst_mem = desc->src_mem = CCP_MEMTYPE_SYSTEM; + + desc->lsb_ctx_id = ccp_queue_lsb_entry(qp, LSB_ENTRY_IV); + + desc->key_lo = ccp_queue_lsb_address(qp, LSB_ENTRY_KEY); + desc->key_mem = CCP_MEMTYPE_SB; + + qp->cq_tail = (qp->cq_tail + 1) % + (1 << qp->cq_softc->ring_size_order); + return (0); +} + +static int __must_check +ccp_do_ghash_final(struct ccp_queue *qp, struct ccp_session *s) +{ + struct ccp_desc *desc; + + if (ccp_queue_get_ring_space(qp) == 0) + return (EAGAIN); + + desc = &qp->desc_ring[qp->cq_tail]; + + desc->engine = CCP_ENGINE_AES; + desc->aes.mode = CCP_AES_MODE_GHASH; + desc->aes.type = s->blkcipher.cipher_type; + desc->aes.encrypt = CCP_AES_MODE_GHASH_FINAL; + + desc->length = GMAC_BLOCK_LEN; + + desc->src_lo = ccp_queue_lsb_address(qp, LSB_ENTRY_GHASH_IN); + desc->src_mem = CCP_MEMTYPE_SB; + + desc->lsb_ctx_id = ccp_queue_lsb_entry(qp, LSB_ENTRY_IV); + + desc->key_lo = ccp_queue_lsb_address(qp, LSB_ENTRY_KEY); + desc->key_mem = CCP_MEMTYPE_SB; + + desc->dst_lo = ccp_queue_lsb_address(qp, LSB_ENTRY_GHASH); + desc->dst_mem = CCP_MEMTYPE_SB; + + qp->cq_tail = (qp->cq_tail + 1) % + (1 << qp->cq_softc->ring_size_order); + return (0); +} + +static void +ccp_gcm_done(struct ccp_queue *qp, struct ccp_session *s, void *vcrp, + int error) +{ + char tag[GMAC_DIGEST_LEN]; + struct cryptodesc *crde, *crda; + struct cryptop *crp; + + crp = vcrp; + if (s->cipher_first) { + crde = crp->crp_desc; + crda = crp->crp_desc->crd_next; + } else { + crde = crp->crp_desc->crd_next; + crda = crp->crp_desc; + } + + s->pending--; + + if (error != 0) { + crp->crp_etype = error; + goto out; + } + + /* Encrypt is done. Decrypt needs to verify tag. */ + if ((crde->crd_flags & CRD_F_ENCRYPT) != 0) + goto out; + + /* Copy in message tag. */ + crypto_copydata(crp->crp_flags, crp->crp_buf, crda->crd_inject, + sizeof(tag), tag); + + /* Verify tag against computed GMAC */ + if (timingsafe_bcmp(tag, s->gmac.final_block, s->gmac.hash_len) != 0) + crp->crp_etype = EBADMSG; + +out: + explicit_bzero(&s->blkcipher, sizeof(s->blkcipher)); + explicit_bzero(&s->gmac, sizeof(s->gmac)); + crypto_done(crp); +} + +int __must_check +ccp_gcm(struct ccp_queue *qp, struct ccp_session *s, struct cryptop *crp, + struct cryptodesc *crda, struct cryptodesc *crde) +{ + struct ccp_completion_ctx ctx; + enum ccp_cipher_dir dir; + device_t dev; + unsigned i; + int error; + + if (s->blkcipher.key_len == 0) + return (EINVAL); + + /* + * AAD is only permitted before the cipher/plain text, not + * after. + */ + if (crda->crd_len + crda->crd_skip > crde->crd_len + crde->crd_skip) + return (EINVAL); + + dev = qp->cq_softc->dev; + + if ((crde->crd_flags & CRD_F_ENCRYPT) != 0) + dir = CCP_CIPHER_DIR_ENCRYPT; + else + dir = CCP_CIPHER_DIR_DECRYPT; + + /* Zero initial GHASH portion of context */ + memset(s->blkcipher.iv, 0, sizeof(s->blkcipher.iv)); + + /* Gather IV data */ + ccp_collect_iv(s, crp, crde); + + /* Reverse order of key material for HW */ + ccp_byteswap(s->blkcipher.enckey, s->blkcipher.key_len); + + /* Prepare input buffer of concatenated lengths for final GHASH */ + be64enc(s->gmac.final_block, (uint64_t)crda->crd_len * 8); + be64enc(&s->gmac.final_block[8], (uint64_t)crde->crd_len * 8); + + /* Send IV + initial zero GHASH, key data, and lengths buffer to LSB */ + error = ccp_do_pst_to_lsb(qp, ccp_queue_lsb_address(qp, LSB_ENTRY_IV), + s->blkcipher.iv, 32); + if (error != 0) + return (error); + error = ccp_do_pst_to_lsb(qp, ccp_queue_lsb_address(qp, LSB_ENTRY_KEY), + s->blkcipher.enckey, s->blkcipher.key_len); + if (error != 0) + return (error); + error = ccp_do_pst_to_lsb(qp, + ccp_queue_lsb_address(qp, LSB_ENTRY_GHASH_IN), s->gmac.final_block, + GMAC_BLOCK_LEN); + if (error != 0) + return (error); + + /* First step - compute GHASH over AAD */ + if (crda->crd_len != 0) { + sglist_reset(qp->cq_sg_ulptx); + error = sglist_append_sglist(qp->cq_sg_ulptx, qp->cq_sg_crp, + crda->crd_skip, crda->crd_len); + if (error != 0) + return (error); + + /* This engine cannot process non-block multiple AAD data. */ + for (i = 0; i < qp->cq_sg_ulptx->sg_nseg; i++) + if ((qp->cq_sg_ulptx->sg_segs[i].ss_len % + GMAC_BLOCK_LEN) != 0) { + device_printf(dev, "%s: AD seg modulo: %zu\n", + __func__, + qp->cq_sg_ulptx->sg_segs[i].ss_len); + return (EINVAL); + } + + error = ccp_do_ghash_aad(qp, s); + if (error != 0) + return (error); + } + + /* Feed data piece by piece into GCTR */ + sglist_reset(qp->cq_sg_ulptx); + error = sglist_append_sglist(qp->cq_sg_ulptx, qp->cq_sg_crp, + crde->crd_skip, crde->crd_len); + if (error != 0) + return (error); + + /* + * All segments except the last must be even multiples of AES block + * size for the HW to process it. Non-compliant inputs aren't bogus, + * just not doable on this hardware. + * + * XXX: Well, the hardware will produce a valid tag for shorter final + * segment inputs, but it will still write out a block-sized plaintext + * or ciphertext chunk. For a typical CRP this tramples trailing data, + * including the provided message tag. So, reject such inputs for now. + */ + for (i = 0; i < qp->cq_sg_ulptx->sg_nseg; i++) + if ((qp->cq_sg_ulptx->sg_segs[i].ss_len % AES_BLOCK_LEN) != 0) { + device_printf(dev, "%s: seg modulo: %zu\n", __func__, + qp->cq_sg_ulptx->sg_segs[i].ss_len); + return (EINVAL); + } + + for (i = 0; i < qp->cq_sg_ulptx->sg_nseg; i++) { + struct sglist_seg *seg; + + seg = &qp->cq_sg_ulptx->sg_segs[i]; + error = ccp_do_gctr(qp, s, dir, seg, + (i == 0 && crda->crd_len == 0), + i == (qp->cq_sg_ulptx->sg_nseg - 1)); + if (error != 0) + return (error); + } + + /* Send just initial IV (not GHASH!) to LSB again */ + error = ccp_do_pst_to_lsb(qp, ccp_queue_lsb_address(qp, LSB_ENTRY_IV), + s->blkcipher.iv, s->blkcipher.iv_len); + if (error != 0) + return (error); + + ctx.callback_fn = ccp_gcm_done; + ctx.session = s; + ctx.callback_arg = crp; + + /* Compute final hash and copy result back */ + error = ccp_do_ghash_final(qp, s); + if (error != 0) + return (error); + + /* When encrypting, copy computed tag out to caller buffer. */ + sglist_reset(qp->cq_sg_ulptx); + if (dir == CCP_CIPHER_DIR_ENCRYPT) + error = sglist_append_sglist(qp->cq_sg_ulptx, qp->cq_sg_crp, + crda->crd_inject, s->gmac.hash_len); + else + /* + * For decrypting, copy the computed tag out to our session + * buffer to verify in our callback. + */ + error = sglist_append(qp->cq_sg_ulptx, s->gmac.final_block, + s->gmac.hash_len); + if (error != 0) + return (error); + error = ccp_passthrough_sgl(qp, + ccp_queue_lsb_address(qp, LSB_ENTRY_GHASH), false, qp->cq_sg_ulptx, + s->gmac.hash_len, true, &ctx); + return (error); +} + +#define MAX_TRNG_RETRIES 10 +u_int +random_ccp_read(void *v, u_int c) +{ + uint32_t *buf; + u_int i, j; + + KASSERT(c % sizeof(*buf) == 0, ("%u not multiple of u_long", c)); + + buf = v; + for (i = c; i > 0; i -= sizeof(*buf)) { + for (j = 0; j < MAX_TRNG_RETRIES; j++) { + *buf = ccp_read_4(g_ccp_softc, TRNG_OUT_OFFSET); + if (*buf != 0) + break; + } + if (j == MAX_TRNG_RETRIES) + return (0); + buf++; + } + return (c); + +} + +#ifdef DDB +void +db_ccp_show_hw(struct ccp_softc *sc) +{ + + db_printf(" queue mask: 0x%x\n", + ccp_read_4(sc, CMD_QUEUE_MASK_OFFSET)); + db_printf(" queue prio: 0x%x\n", + ccp_read_4(sc, CMD_QUEUE_PRIO_OFFSET)); + db_printf(" reqid: 0x%x\n", ccp_read_4(sc, CMD_REQID_CONFIG_OFFSET)); + db_printf(" trng output: 0x%x\n", ccp_read_4(sc, TRNG_OUT_OFFSET)); + db_printf(" cmd timeout: 0x%x\n", + ccp_read_4(sc, CMD_CMD_TIMEOUT_OFFSET)); + db_printf(" lsb public mask lo: 0x%x\n", + ccp_read_4(sc, LSB_PUBLIC_MASK_LO_OFFSET)); + db_printf(" lsb public mask hi: 0x%x\n", + ccp_read_4(sc, LSB_PUBLIC_MASK_HI_OFFSET)); + db_printf(" lsb private mask lo: 0x%x\n", + ccp_read_4(sc, LSB_PRIVATE_MASK_LO_OFFSET)); + db_printf(" lsb private mask hi: 0x%x\n", + ccp_read_4(sc, LSB_PRIVATE_MASK_HI_OFFSET)); + db_printf(" version: 0x%x\n", ccp_read_4(sc, VERSION_REG)); +} + +void +db_ccp_show_queue_hw(struct ccp_queue *qp) +{ + const struct ccp_error_code *ec; + struct ccp_softc *sc; + uint32_t status, error, esource, faultblock, headlo, qcontrol; + unsigned q, i; + + sc = qp->cq_softc; + q = qp->cq_qindex; + + qcontrol = ccp_read_queue_4(sc, q, CMD_Q_CONTROL_BASE); + db_printf(" qcontrol: 0x%x%s%s\n", qcontrol, + (qcontrol & CMD_Q_RUN) ? " RUN" : "", + (qcontrol & CMD_Q_HALTED) ? " HALTED" : ""); + db_printf(" tail_lo: 0x%x\n", + ccp_read_queue_4(sc, q, CMD_Q_TAIL_LO_BASE)); + headlo = ccp_read_queue_4(sc, q, CMD_Q_HEAD_LO_BASE); + db_printf(" head_lo: 0x%x\n", headlo); + db_printf(" int enable: 0x%x\n", + ccp_read_queue_4(sc, q, CMD_Q_INT_ENABLE_BASE)); + db_printf(" interrupt status: 0x%x\n", + ccp_read_queue_4(sc, q, CMD_Q_INTERRUPT_STATUS_BASE)); + status = ccp_read_queue_4(sc, q, CMD_Q_STATUS_BASE); + db_printf(" status: 0x%x\n", status); + db_printf(" int stats: 0x%x\n", + ccp_read_queue_4(sc, q, CMD_Q_INT_STATUS_BASE)); + + error = status & STATUS_ERROR_MASK; + if (error == 0) + return; + + esource = (status >> STATUS_ERRORSOURCE_SHIFT) & + STATUS_ERRORSOURCE_MASK; + faultblock = (status >> STATUS_VLSB_FAULTBLOCK_SHIFT) & + STATUS_VLSB_FAULTBLOCK_MASK; + + ec = NULL; + for (i = 0; i < nitems(ccp_error_codes); i++) + if (ccp_error_codes[i].ce_code == error) + break; + if (i < nitems(ccp_error_codes)) + ec = &ccp_error_codes[i]; + + db_printf(" Error: %s (%u) Source: %u Faulting LSB block: %u\n", + (ec != NULL) ? ec->ce_name : "(reserved)", error, esource, + faultblock); + if (ec != NULL) + db_printf(" Error description: %s\n", ec->ce_desc); + + i = (headlo - (uint32_t)qp->desc_ring_bus_addr) / Q_DESC_SIZE; + db_printf(" Bad descriptor idx: %u contents:\n %32D\n", i, + (void *)&qp->desc_ring[i], " "); +} +#endif Index: sys/crypto/ccp/ccp_lsb.h =================================================================== --- /dev/null +++ sys/crypto/ccp/ccp_lsb.h @@ -0,0 +1,43 @@ +/*- + * Copyright (c) 2017 Conrad Meyer + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * $FreeBSD$ + */ + +#pragma once + +#define LSB_ENTRY_SIZE 32 /* bytes, or 256 bits */ +#define LSB_REGION_LENGTH 16 /* entries */ + +/* For now, just statically allocate some LSB entries for specific purposes. */ +#define LSB_ENTRY_KEY 0 +#define LSB_ENTRY_IV 2 +#define LSB_ENTRY_SHA 4 +#define LSB_ENTRY_GHASH 6 +#define LSB_ENTRY_GHASH_IN 7 + +void ccp_queue_decode_lsb_regions(struct ccp_softc *sc, uint64_t lsbmask, + unsigned queue); +void ccp_assign_lsb_regions(struct ccp_softc *sc, uint64_t lsbmask); Index: sys/crypto/ccp/ccp_lsb.c =================================================================== --- /dev/null +++ sys/crypto/ccp/ccp_lsb.c @@ -0,0 +1,97 @@ +/*- + * Copyright (c) 2017 Conrad Meyer + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include +__FBSDID("$FreeBSD$"); + +#include +#include +#include +#include + +#include + +#include "ccp.h" +#include "ccp_lsb.h" + +void +ccp_queue_decode_lsb_regions(struct ccp_softc *sc, uint64_t lsbmask, + unsigned queue) +{ + struct ccp_queue *qp; + unsigned i; + + qp = &sc->queues[queue]; + + qp->lsb_mask = 0; + + for (i = 0; i < MAX_LSB_REGIONS; i++) { + if (((1 << queue) & lsbmask) != 0) + qp->lsb_mask |= (1 << i); + lsbmask >>= MAX_HW_QUEUES; + } + + /* + * Ignore region 0, which has special entries that cannot be used + * generally. + */ + qp->lsb_mask &= ~(1 << 0); +} + +/* + * Look for a private LSB for each queue. There are 7 general purpose LSBs + * total and 5 queues. PSP will reserve some of both. Firmware limits some + * queues' access to some LSBs; we hope it is fairly sane and just use a dumb + * greedy algorithm to assign LSBs to queues. + */ +void +ccp_assign_lsb_regions(struct ccp_softc *sc, uint64_t lsbmask) +{ + unsigned q, i; + + for (q = 0; q < nitems(sc->queues); q++) { + if (((1 << q) & sc->valid_queues) == 0) + continue; + + sc->queues[q].private_lsb = -1; + + /* Intentionally skip specialized 0th LSB */ + for (i = 1; i < MAX_LSB_REGIONS; i++) { + if ((lsbmask & + (1ull << (q + (MAX_HW_QUEUES * i)))) != 0) { + sc->queues[q].private_lsb = i; + lsbmask &= ~(0x1Full << (MAX_HW_QUEUES * i)); + break; + } + } + + if (i == MAX_LSB_REGIONS) { + device_printf(sc->dev, + "Ignoring queue %u with no private LSB\n", q); + sc->valid_queues &= ~(1 << q); + } + } +} Index: sys/dev/random/random_harvestq.c =================================================================== --- sys/dev/random/random_harvestq.c +++ sys/dev/random/random_harvestq.c @@ -304,6 +304,7 @@ [RANDOM_PURE_RNDTEST] = "PURE_RNDTEST", [RANDOM_PURE_VIRTIO] = "PURE_VIRTIO", [RANDOM_PURE_BROADCOM] = "PURE_BROADCOM", + [RANDOM_PURE_CCP] = "PURE_CCP", /* "ENTROPYSOURCE" */ }; Index: sys/modules/Makefile =================================================================== --- sys/modules/Makefile +++ sys/modules/Makefile @@ -79,6 +79,7 @@ cas \ ${_cbb} \ cc \ + ${_ccp} \ cd9660 \ cd9660_iconv \ ${_ce} \ @@ -575,6 +576,7 @@ .endif _cardbus= cardbus _cbb= cbb +_ccp= ccp _cpuctl= cpuctl _cpufreq= cpufreq _cs= cs Index: sys/modules/ccp/Makefile =================================================================== --- /dev/null +++ sys/modules/ccp/Makefile @@ -0,0 +1,21 @@ +# $FreeBSD$ + +.PATH: ${SRCTOP}/sys/crypto/ccp + +KMOD= ccp + +SRCS= ccp.c ccp_hardware.c ccp_lsb.c +SRCS+= ccp.h ccp_hardware.h ccp_lsb.h +SRCS+= opt_ddb.h +SRCS+= bus_if.h +SRCS+= device_if.h +SRCS+= cryptodev_if.h +SRCS+= pci_if.h + +CFLAGS+= -fms-extensions +CFLAGS.clang+= -Wno-microsoft-anon-tag + +MFILES= kern/bus_if.m kern/device_if.m opencrypto/cryptodev_if.m \ + dev/pci/pci_if.m + +.include Index: sys/sys/random.h =================================================================== --- sys/sys/random.h +++ sys/sys/random.h @@ -94,6 +94,7 @@ RANDOM_PURE_RNDTEST, RANDOM_PURE_VIRTIO, RANDOM_PURE_BROADCOM, + RANDOM_PURE_CCP, ENTROPYSOURCE }; Index: sys/x86/include/bus.h =================================================================== --- sys/x86/include/bus.h +++ sys/x86/include/bus.h @@ -118,6 +118,7 @@ #define BUS_SPACE_MAXADDR_24BIT 0xFFFFFF #define BUS_SPACE_MAXADDR_32BIT 0xFFFFFFFF #if defined(__amd64__) || defined(PAE) +#define BUS_SPACE_MAXADDR_48BIT 0xFFFFFFFFFFFFULL #define BUS_SPACE_MAXADDR 0xFFFFFFFFFFFFFFFFULL #else #define BUS_SPACE_MAXADDR 0xFFFFFFFF Index: tests/sys/opencrypto/cryptotest.py =================================================================== --- tests/sys/opencrypto/cryptotest.py +++ tests/sys/opencrypto/cryptotest.py @@ -45,9 +45,9 @@ assert os.path.exists(os.path.join(katdir, base)), "Please 'pkg install nist-kat'" return iglob(os.path.join(katdir, base, glob)) -aesmodules = [ 'cryptosoft0', 'aesni0', 'ccr0' ] +aesmodules = [ 'cryptosoft0', 'aesni0', 'ccr0', 'ccp0' ] desmodules = [ 'cryptosoft0', ] -shamodules = [ 'cryptosoft0', 'aesni0', 'ccr0' ] +shamodules = [ 'cryptosoft0', 'aesni0', 'ccr0', 'ccp0' ] def GenTestCase(cname): try: @@ -108,13 +108,25 @@ # XXX - isn't supported continue - c = Crypto(cryptodev.CRYPTO_AES_NIST_GCM_16, - cipherkey, - mac=self._gmacsizes[len(cipherkey)], - mackey=cipherkey, crid=crid) + try: + c = Crypto(cryptodev.CRYPTO_AES_NIST_GCM_16, + cipherkey, + mac=self._gmacsizes[len(cipherkey)], + mackey=cipherkey, crid=crid) + except EnvironmentError, e: + # Can't test algorithms the driver does not support. + if e.errno != errno.EOPNOTSUPP: + raise + continue if mode == 'ENCRYPT': - rct, rtag = c.encrypt(pt, iv, aad) + try: + rct, rtag = c.encrypt(pt, iv, aad) + except EnvironmentError, e: + # Can't test inputs the driver does not support. + if e.errno != errno.EINVAL: + raise + continue rtag = rtag[:len(tag)] data['rct'] = rct.encode('hex') data['rtag'] = rtag.encode('hex') @@ -128,7 +140,13 @@ self.assertRaises(IOError, c.decrypt, *args) else: - rpt, rtag = c.decrypt(*args) + try: + rpt, rtag = c.decrypt(*args) + except EnvironmentError, e: + # Can't test inputs the driver does not support. + if e.errno != errno.EINVAL: + raise + continue data['rpt'] = rpt.encode('hex') data['rtag'] = rtag.encode('hex') self.assertEqual(rpt, pt, @@ -189,7 +207,13 @@ if swapptct: pt, ct = ct, pt # run the fun - c = Crypto(meth, cipherkey, crid=crid) + try: + c = Crypto(meth, cipherkey, crid=crid) + except EnvironmentError, e: + # Can't test hashes the driver does not support. + if e.errno != errno.EOPNOTSUPP: + raise + continue r = curfun(c, pt, iv) self.assertEqual(r, ct) @@ -309,6 +333,7 @@ cryptosoft = GenTestCase('cryptosoft0') aesni = GenTestCase('aesni0') ccr = GenTestCase('ccr0') +ccp = GenTestCase('ccp0') if __name__ == '__main__': unittest.main()