Changeset View
Standalone View
sys/crypto/ccp/ccp_hardware.c
- This file was added.
/*- | |||||
* Copyright (c) 2017 Chelsio Communications, Inc. | |||||
* Copyright (c) 2017 Conrad Meyer <cem@FreeBSD.org> | |||||
* All rights reserved. | |||||
* Largely borrowed from ccr(4), Written by: John Baldwin <jhb@FreeBSD.org> | |||||
* | |||||
* Redistribution and use in source and binary forms, with or without | |||||
* modification, are permitted provided that the following conditions | |||||
* are met: | |||||
* 1. Redistributions of source code must retain the above copyright | |||||
* notice, this list of conditions and the following disclaimer. | |||||
* 2. Redistributions in binary form must reproduce the above copyright | |||||
* notice, this list of conditions and the following disclaimer in the | |||||
* documentation and/or other materials provided with the distribution. | |||||
* | |||||
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND | |||||
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE | |||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE | |||||
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE | |||||
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL | |||||
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS | |||||
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) | |||||
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT | |||||
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY | |||||
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF | |||||
* SUCH DAMAGE. | |||||
*/ | |||||
#include <sys/cdefs.h> | |||||
__FBSDID("$FreeBSD$"); | |||||
#include "opt_ddb.h" | |||||
#include <sys/types.h> | |||||
#include <sys/bus.h> | |||||
#include <sys/lock.h> | |||||
#include <sys/kernel.h> | |||||
#include <sys/malloc.h> | |||||
#include <sys/mutex.h> | |||||
#include <sys/module.h> | |||||
#include <sys/rman.h> | |||||
#include <sys/sglist.h> | |||||
#include <sys/sysctl.h> | |||||
#ifdef DDB | |||||
#include <ddb/ddb.h> | |||||
#endif | |||||
#include <dev/pci/pcireg.h> | |||||
#include <dev/pci/pcivar.h> | |||||
#include <machine/bus.h> | |||||
#include <machine/resource.h> | |||||
#include <machine/vmparam.h> | |||||
#include <opencrypto/cryptodev.h> | |||||
#include <opencrypto/xform.h> | |||||
#include <vm/vm.h> | |||||
#include <vm/pmap.h> | |||||
#include "cryptodev_if.h" | |||||
#include "ccp.h" | |||||
#include "ccp_hardware.h" | |||||
#include "ccp_lsb.h" | |||||
CTASSERT(sizeof(struct ccp_desc) == 32); | |||||
SYSCTL_NODE(_hw, OID_AUTO, ccp, CTLFLAG_RD, 0, "ccp node"); | |||||
unsigned g_ccp_ring_order = 11; | |||||
SYSCTL_UINT(_hw_ccp, OID_AUTO, ring_order, CTLFLAG_RDTUN, &g_ccp_ring_order, | |||||
0, "Set CCP ring order. (1 << this) == ring size. Min: 1, Max: 16"); | |||||
static inline uint32_t | |||||
ccp_read_4(struct ccp_softc *sc, uint32_t offset) | |||||
{ | |||||
return (bus_space_read_4(sc->pci_bus_tag, sc->pci_bus_handle, offset)); | |||||
} | |||||
static inline void | |||||
ccp_write_4(struct ccp_softc *sc, uint32_t offset, uint32_t value) | |||||
{ | |||||
bus_space_write_4(sc->pci_bus_tag, sc->pci_bus_handle, offset, value); | |||||
} | |||||
static inline uint32_t | |||||
ccp_read_queue_4(struct ccp_softc *sc, unsigned queue, uint32_t offset) | |||||
{ | |||||
/* | |||||
* Each queue gets its own 4kB register space. Queue 0 is at 0x1000. | |||||
*/ | |||||
markj: This line is misindented. | |||||
return (ccp_read_4(sc, (CMD_Q_STATUS_INCR * (1 + queue)) + offset)); | |||||
} | |||||
static inline void | |||||
ccp_write_queue_4(struct ccp_softc *sc, unsigned queue, uint32_t offset, | |||||
uint32_t value) | |||||
{ | |||||
ccp_write_4(sc, (CMD_Q_STATUS_INCR * (1 + queue)) + offset, value); | |||||
} | |||||
void | |||||
ccp_queue_write_tail(struct ccp_queue *qp) | |||||
{ | |||||
ccp_write_queue_4(qp->cq_softc, qp->cq_qindex, CMD_Q_TAIL_LO_BASE, | |||||
((uint32_t)qp->desc_ring_bus_addr) + (Q_DESC_SIZE * qp->cq_tail)); | |||||
} | |||||
/* | |||||
* Given a queue and a reserved LSB entry index, compute the LSB *entry id* of | |||||
* that entry for the queue's private LSB region. | |||||
*/ | |||||
static inline uint8_t | |||||
ccp_queue_lsb_entry(struct ccp_queue *qp, unsigned lsb_entry) | |||||
{ | |||||
return ((qp->private_lsb * LSB_REGION_LENGTH + lsb_entry)); | |||||
} | |||||
/* | |||||
* Given a queue and a reserved LSB entry index, compute the LSB *address* of | |||||
* that entry for the queue's private LSB region. | |||||
*/ | |||||
static inline uint32_t | |||||
ccp_queue_lsb_address(struct ccp_queue *qp, unsigned lsb_entry) | |||||
{ | |||||
return (ccp_queue_lsb_entry(qp, lsb_entry) * LSB_ENTRY_SIZE); | |||||
} | |||||
/* | |||||
* Some terminology: | |||||
* | |||||
* LSB - Local Storage Block | |||||
* ========================= | |||||
* | |||||
* 8 segments/regions, each containing 16 entries. | |||||
* | |||||
* Each entry contains 256 bits (32 bytes). | |||||
* | |||||
* Segments are virtually addressed in commands, but accesses cannot cross | |||||
* segment boundaries. Virtual map uses an identity mapping by default | |||||
* (virtual segment N corresponds to physical segment N). | |||||
* | |||||
* Access to a physical region can be restricted to any subset of all five | |||||
* queues. | |||||
* | |||||
* "Pass-through" mode | |||||
* =================== | |||||
* | |||||
* Pass-through is a generic DMA engine, much like ioat(4). Some nice | |||||
* features: | |||||
* | |||||
* - Supports byte-swapping for endian conversion (32- or 256-bit words) | |||||
* - AND, OR, XOR with fixed 256-bit mask | |||||
* - CRC32 of data (may be used in tandem with bswap, but not bit operations) | |||||
* - Read/write of LSB | |||||
* - Memset | |||||
* | |||||
* If bit manipulation mode is enabled, input must be a multiple of 256 bits | |||||
* (32 bytes). | |||||
* | |||||
* If byte-swapping is enabled, input must be a multiple of the word size. | |||||
* | |||||
* Zlib mode -- only usable from one queue at a time, single job at a time. | |||||
* ======================================================================== | |||||
* | |||||
* Only usable from private host, aka PSP? Not host processor? | |||||
* | |||||
* RNG. | |||||
* ==== | |||||
* | |||||
* Raw bits are conditioned with AES and fed through CTR_DRBG. Output goes in | |||||
* a ring buffer readable by software. | |||||
* | |||||
* NIST SP 800-90B Repetition Count and Adaptive Proportion health checks are | |||||
* implemented on the raw input stream and may be enabled to verify min-entropy | |||||
* of 0.5 bits per bit. | |||||
*/ | |||||
static void | |||||
ccp_dmamap_cb(void *arg, bus_dma_segment_t *segs, int nseg, int error) | |||||
{ | |||||
bus_addr_t *baddr; | |||||
KASSERT(error == 0, ("%s: error:%d", __func__, error)); | |||||
baddr = arg; | |||||
*baddr = segs->ds_addr; | |||||
} | |||||
static int | |||||
ccp_hw_attach_queue(device_t dev, uint64_t lsbmask, unsigned queue) | |||||
{ | |||||
struct ccp_softc *sc; | |||||
struct ccp_queue *qp; | |||||
void *desc; | |||||
size_t ringsz, num_descriptors; | |||||
int error; | |||||
desc = NULL; | |||||
sc = device_get_softc(dev); | |||||
qp = &sc->queues[queue]; | |||||
/* | |||||
* Don't bother allocating a ring for queues the host isn't allowed to | |||||
* drive. | |||||
*/ | |||||
if ((sc->valid_queues & (1 << queue)) == 0) | |||||
return (0); | |||||
ccp_queue_decode_lsb_regions(sc, lsbmask, queue); | |||||
/* Ignore queues that do not have any LSB access. */ | |||||
if (qp->lsb_mask == 0) { | |||||
device_printf(dev, "Ignoring queue %u with no LSB access\n", | |||||
queue); | |||||
sc->valid_queues &= ~(1 << queue); | |||||
return (0); | |||||
} | |||||
num_descriptors = 1 << sc->ring_size_order; | |||||
ringsz = sizeof(struct ccp_desc) * num_descriptors; | |||||
/* | |||||
* "Queue_Size" is order - 1. | |||||
* | |||||
* Queue must be aligned to 5+Queue_Size+1 == 5 + order bits. | |||||
*/ | |||||
error = bus_dma_tag_create(bus_get_dma_tag(dev), | |||||
1 << (5 + sc->ring_size_order), | |||||
#if defined(__i386__) && !defined(PAE) | |||||
0, BUS_SPACE_MAXADDR, | |||||
#else | |||||
(bus_addr_t)1 << 32, BUS_SPACE_MAXADDR_48BIT, | |||||
#endif | |||||
BUS_SPACE_MAXADDR, NULL, NULL, ringsz, 1, | |||||
ringsz, 0, NULL, NULL, &qp->ring_desc_tag); | |||||
if (error != 0) | |||||
goto out; | |||||
error = bus_dmamem_alloc(qp->ring_desc_tag, &desc, | |||||
BUS_DMA_ZERO | BUS_DMA_WAITOK, &qp->ring_desc_map); | |||||
if (error != 0) | |||||
goto out; | |||||
error = bus_dmamap_load(qp->ring_desc_tag, qp->ring_desc_map, desc, | |||||
ringsz, ccp_dmamap_cb, &qp->desc_ring_bus_addr, BUS_DMA_WAITOK); | |||||
if (error != 0) | |||||
goto out; | |||||
qp->desc_ring = desc; | |||||
qp->completions_ring = malloc(num_descriptors * | |||||
sizeof(*qp->completions_ring), M_CCP, M_ZERO | M_WAITOK); | |||||
/* Zero control register; among other things, clears the RUN flag. */ | |||||
qp->qcontrol = 0; | |||||
ccp_write_queue_4(sc, queue, CMD_Q_CONTROL_BASE, qp->qcontrol); | |||||
ccp_write_queue_4(sc, queue, CMD_Q_INT_ENABLE_BASE, 0); | |||||
/* Clear any leftover interrupt status flags */ | |||||
ccp_write_queue_4(sc, queue, CMD_Q_INTERRUPT_STATUS_BASE, | |||||
ALL_INTERRUPTS); | |||||
qp->qcontrol |= (sc->ring_size_order - 1) << CMD_Q_SIZE_SHIFT; | |||||
ccp_write_queue_4(sc, queue, CMD_Q_TAIL_LO_BASE, | |||||
(uint32_t)qp->desc_ring_bus_addr); | |||||
ccp_write_queue_4(sc, queue, CMD_Q_HEAD_LO_BASE, | |||||
(uint32_t)qp->desc_ring_bus_addr); | |||||
/* | |||||
* Enable completion interrupts, as well as error or administrative | |||||
* halt interrupts. We don't use administrative halts, but they | |||||
* shouldn't trip unless we do, so it ought to be harmless. We also | |||||
* are not prepared to actually handle errors at this time. | |||||
*/ | |||||
ccp_write_queue_4(sc, queue, CMD_Q_INT_ENABLE_BASE, | |||||
INT_COMPLETION | INT_ERROR | INT_QUEUE_STOPPED); | |||||
qp->qcontrol |= (qp->desc_ring_bus_addr >> 32) << CMD_Q_PTR_HI_SHIFT; | |||||
qp->qcontrol |= CMD_Q_RUN; | |||||
ccp_write_queue_4(sc, queue, CMD_Q_CONTROL_BASE, qp->qcontrol); | |||||
out: | |||||
if (error != 0) { | |||||
if (qp->desc_ring != NULL) | |||||
bus_dmamap_unload(qp->ring_desc_tag, | |||||
qp->ring_desc_map); | |||||
if (desc != NULL) | |||||
bus_dmamem_free(qp->ring_desc_tag, desc, | |||||
qp->ring_desc_map); | |||||
if (qp->ring_desc_tag != NULL) | |||||
bus_dma_tag_destroy(qp->ring_desc_tag); | |||||
} | |||||
return (error); | |||||
} | |||||
static void | |||||
ccp_hw_detach_queue(device_t dev, unsigned queue) | |||||
{ | |||||
struct ccp_softc *sc; | |||||
struct ccp_queue *qp; | |||||
sc = device_get_softc(dev); | |||||
qp = &sc->queues[queue]; | |||||
/* | |||||
* Don't bother allocating a ring for queues the host isn't allowed to | |||||
* drive. | |||||
*/ | |||||
if ((sc->valid_queues & (1 << queue)) == 0) | |||||
return; | |||||
free(qp->completions_ring, M_CCP); | |||||
bus_dmamap_unload(qp->ring_desc_tag, qp->ring_desc_map); | |||||
bus_dmamem_free(qp->ring_desc_tag, qp->desc_ring, qp->ring_desc_map); | |||||
bus_dma_tag_destroy(qp->ring_desc_tag); | |||||
} | |||||
static int | |||||
ccp_map_pci_bar(device_t dev) | |||||
{ | |||||
struct ccp_softc *sc; | |||||
sc = device_get_softc(dev); | |||||
sc->pci_resource_id = PCIR_BAR(2); | |||||
sc->pci_resource = bus_alloc_resource_any(dev, SYS_RES_MEMORY, | |||||
&sc->pci_resource_id, RF_ACTIVE); | |||||
if (sc->pci_resource == NULL) { | |||||
device_printf(dev, "unable to allocate pci resource\n"); | |||||
return (ENODEV); | |||||
} | |||||
sc->pci_resource_id_msix = PCIR_BAR(5); | |||||
sc->pci_resource_msix = bus_alloc_resource_any(dev, SYS_RES_MEMORY, | |||||
&sc->pci_resource_id_msix, RF_ACTIVE); | |||||
if (sc->pci_resource_msix == NULL) { | |||||
device_printf(dev, "unable to allocate pci resource msix\n"); | |||||
bus_release_resource(dev, SYS_RES_MEMORY, sc->pci_resource_id, | |||||
sc->pci_resource); | |||||
return (ENODEV); | |||||
} | |||||
sc->pci_bus_tag = rman_get_bustag(sc->pci_resource); | |||||
sc->pci_bus_handle = rman_get_bushandle(sc->pci_resource); | |||||
return (0); | |||||
} | |||||
static void | |||||
ccp_unmap_pci_bar(device_t dev) | |||||
{ | |||||
struct ccp_softc *sc; | |||||
sc = device_get_softc(dev); | |||||
bus_release_resource(dev, SYS_RES_MEMORY, sc->pci_resource_id_msix, | |||||
sc->pci_resource_msix); | |||||
bus_release_resource(dev, SYS_RES_MEMORY, sc->pci_resource_id, | |||||
sc->pci_resource); | |||||
} | |||||
static void | |||||
ccp_intr_handle_error(struct ccp_queue *qp, const struct ccp_desc *desc) | |||||
{ | |||||
struct ccp_completion_ctx *cctx; | |||||
struct ccp_softc *sc; | |||||
uint32_t status, error, esource, faultblock; | |||||
unsigned q, idx; | |||||
sc = qp->cq_softc; | |||||
q = qp->cq_qindex; | |||||
status = ccp_read_queue_4(sc, q, CMD_Q_STATUS_BASE); | |||||
/* TODO: Decode error status from table on pg. 106 */ | |||||
error = status & STATUS_ERROR_MASK; | |||||
esource = (status >> STATUS_ERRORSOURCE_SHIFT) & | |||||
STATUS_ERRORSOURCE_MASK; | |||||
faultblock = (status >> STATUS_VLSB_FAULTBLOCK_SHIFT) & | |||||
STATUS_VLSB_FAULTBLOCK_MASK; | |||||
device_printf(sc->dev, "Error: %u Source: %u Faulting LSB block: %u\n", | |||||
error, esource, faultblock); | |||||
/* TODO Could format the desc nicely here */ | |||||
idx = desc - qp->desc_ring; | |||||
device_printf(sc->dev, "Bad descriptor index: %u contents: %32D\n", | |||||
idx, (const void *)desc, " "); | |||||
/* | |||||
* TODO Per § 14.4 "Error Handling," DMA_Status, DMA_Read/Write_Status, | |||||
* Zlib Decompress status may be interesting. | |||||
*/ | |||||
cctx = &qp->completions_ring[idx]; | |||||
if (cctx->callback_fn != NULL) { | |||||
/* TODO More specific error code */ | |||||
cctx->callback_fn(qp, cctx->session, cctx->callback_arg, EIO); | |||||
cctx->callback_fn = NULL; | |||||
} | |||||
/* | |||||
* Restart procedure described in § 14.2.5. Could be used by HoC if we | |||||
* used that. | |||||
* | |||||
* Advance HEAD_LO past bad descriptor manually, then restart queue. | |||||
*/ | |||||
idx = (idx + 1) % (1 << sc->ring_size_order); | |||||
qp->cq_head = idx; | |||||
device_printf(sc->dev, "%s: wrote sw head:%u\n", __func__, | |||||
qp->cq_head); | |||||
ccp_write_queue_4(sc, q, CMD_Q_HEAD_LO_BASE, | |||||
(uint32_t)qp->desc_ring_bus_addr + (idx * Q_DESC_SIZE)); | |||||
ccp_write_queue_4(sc, q, CMD_Q_CONTROL_BASE, qp->qcontrol); | |||||
device_printf(sc->dev, "%s: Restarted queue\n", __func__); | |||||
} | |||||
static void | |||||
ccp_intr_run_completions(struct ccp_queue *qp, uint32_t ints) | |||||
{ | |||||
struct ccp_completion_ctx *cctx; | |||||
struct ccp_softc *sc; | |||||
const struct ccp_desc *desc; | |||||
uint32_t headlo, idx; | |||||
unsigned q; | |||||
sc = qp->cq_softc; | |||||
q = qp->cq_qindex; | |||||
mtx_lock(&qp->cq_lock); | |||||
/* | |||||
* Hardware HEAD_LO points to the first incomplete descriptor. Process | |||||
* any submitted and completed descriptors, up to but not including | |||||
* HEAD_LO. | |||||
*/ | |||||
headlo = ccp_read_queue_4(sc, q, CMD_Q_HEAD_LO_BASE); | |||||
idx = (headlo - (uint32_t)qp->desc_ring_bus_addr) / Q_DESC_SIZE; | |||||
device_printf(sc->dev, "%s: hw head:%u sw head:%u\n", __func__, idx, | |||||
qp->cq_head); | |||||
while (qp->cq_head != idx) { | |||||
device_printf(sc->dev, "%s: completing:%u\n", __func__, | |||||
qp->cq_head); | |||||
cctx = &qp->completions_ring[qp->cq_head]; | |||||
if (cctx->callback_fn != NULL) { | |||||
cctx->callback_fn(qp, cctx->session, | |||||
cctx->callback_arg, 0); | |||||
cctx->callback_fn = NULL; | |||||
} | |||||
qp->cq_head = (qp->cq_head + 1) % (1 << sc->ring_size_order); | |||||
} | |||||
device_printf(sc->dev, "%s: wrote sw head:%u\n", __func__, | |||||
qp->cq_head); | |||||
/* | |||||
* Desc points to the first incomplete descriptor, at the time we read | |||||
Done Inline ActionsYou could just set ec before breaking from the loop? markj: You could just set ec before breaking from the loop? | |||||
* HEAD_LO. If there was an error flagged in interrupt status, the HW | |||||
* will not proceed past the erroneous descriptor by itself. | |||||
*/ | |||||
desc = &qp->desc_ring[idx]; | |||||
if ((ints & INT_ERROR) != 0) | |||||
ccp_intr_handle_error(qp, desc); | |||||
mtx_unlock(&qp->cq_lock); | |||||
} | |||||
static void | |||||
ccp_intr_handler(void *arg) | |||||
{ | |||||
struct ccp_softc *sc = arg; | |||||
size_t i; | |||||
uint32_t ints; | |||||
device_printf(sc->dev, "%s: interrupt\n", __func__); | |||||
/* | |||||
* We get one global interrupt per PCI device, shared over all of | |||||
* its queues. Scan each valid queue on interrupt for flags indicating | |||||
* activity. | |||||
*/ | |||||
for (i = 0; i < nitems(sc->queues); i++) { | |||||
if ((sc->valid_queues & (1 << i)) == 0) | |||||
continue; | |||||
ints = ccp_read_queue_4(sc, i, CMD_Q_INTERRUPT_STATUS_BASE); | |||||
if (ints == 0) | |||||
continue; | |||||
#if 0 | |||||
device_printf(sc->dev, "%s: %x interrupts on queue %zu\n", | |||||
__func__, (unsigned)ints, i); | |||||
#endif | |||||
/* Write back 1s to clear interrupt status bits. */ | |||||
ccp_write_queue_4(sc, i, CMD_Q_INTERRUPT_STATUS_BASE, ints); | |||||
if ((ints & INT_COMPLETION) != 0) | |||||
ccp_intr_run_completions(&sc->queues[i], ints); | |||||
if ((ints & INT_QUEUE_STOPPED) != 0) | |||||
device_printf(sc->dev, "%s: queue %zu stopped\n", | |||||
__func__, i); | |||||
} | |||||
} | |||||
static int | |||||
ccp_setup_interrupts(struct ccp_softc *sc) | |||||
{ | |||||
int rid, error; | |||||
#if 0 | |||||
/* MSIX code */ | |||||
uint32_t nvec; | |||||
int n; | |||||
n = pci_msix_count(sc->dev); | |||||
device_printf(sc->dev, "XXX %s: msix_count: %d\n", __func__, n); | |||||
if (n < 1) { | |||||
return (ENXIO); | |||||
} | |||||
nvec = n; | |||||
n = pci_alloc_msix(sc->dev, &nvec); | |||||
device_printf(sc->dev, "XXX %s: alloc_msix: %d nvec=%u\n", | |||||
__func__, n, nvec); | |||||
if (n != 0) { | |||||
return (n); | |||||
} | |||||
if (nvec < 1) { | |||||
return (ENXIO); | |||||
} | |||||
rid = 1; | |||||
sc->intr_res = bus_alloc_resource_any(sc->dev, SYS_RES_IRQ, &rid, | |||||
RF_ACTIVE); | |||||
#else | |||||
/* INTx code */ | |||||
rid = 0; | |||||
sc->intr_res = bus_alloc_resource_any(sc->dev, SYS_RES_IRQ, &rid, | |||||
RF_ACTIVE | RF_SHAREABLE); | |||||
#endif | |||||
if (sc->intr_res == NULL) { | |||||
device_printf(sc->dev, "%s: Failed to alloc IRQ resource\n", | |||||
__func__); | |||||
return (ENXIO); | |||||
} | |||||
sc->intr_tag = NULL; | |||||
error = bus_setup_intr(sc->dev, sc->intr_res, | |||||
INTR_MPSAFE | INTR_TYPE_MISC, NULL, ccp_intr_handler, sc, | |||||
&sc->intr_tag); | |||||
if (error != 0) | |||||
device_printf(sc->dev, "%s: setup_intr: %d\n", __func__, error); | |||||
return (error); | |||||
} | |||||
static void | |||||
ccp_release_interrupts(struct ccp_softc *sc) | |||||
{ | |||||
if (sc->intr_tag != NULL) | |||||
bus_teardown_intr(sc->dev, sc->intr_res, sc->intr_tag); | |||||
if (sc->intr_res != NULL) | |||||
bus_release_resource(sc->dev, SYS_RES_IRQ, | |||||
rman_get_rid(sc->intr_res), sc->intr_res); | |||||
pci_release_msi(sc->dev); | |||||
} | |||||
int | |||||
ccp_hw_attach(device_t dev) | |||||
{ | |||||
struct ccp_softc *sc; | |||||
uint64_t lsbmask; | |||||
uint32_t version, lsbmasklo, lsbmaskhi; | |||||
unsigned i, j; | |||||
int error; | |||||
bool bars_mapped, interrupts_setup; | |||||
i = 0; | |||||
bars_mapped = interrupts_setup = false; | |||||
sc = device_get_softc(dev); | |||||
error = ccp_map_pci_bar(dev); | |||||
if (error != 0) { | |||||
device_printf(dev, "XXX%s: couldn't map BAR(s)\n", __func__); | |||||
goto out; | |||||
} | |||||
bars_mapped = true; | |||||
error = pci_enable_busmaster(dev); | |||||
if (error != 0) { | |||||
device_printf(dev, "XXX%s: couldn't enable busmaster\n", | |||||
__func__); | |||||
goto out; | |||||
} | |||||
sc->ring_size_order = g_ccp_ring_order; | |||||
sc->valid_queues = ccp_read_4(sc, CMD_QUEUE_MASK_OFFSET); | |||||
version = ccp_read_4(sc, VERSION_REG); | |||||
if ((version & VERSION_NUM_MASK) < 5) { | |||||
device_printf(dev, | |||||
"driver supports version 5 and later hardware\n"); | |||||
error = ENXIO; | |||||
goto out; | |||||
} | |||||
error = ccp_setup_interrupts(sc); | |||||
if (error != 0) | |||||
goto out; | |||||
interrupts_setup = true; | |||||
sc->hw_version = version & VERSION_NUM_MASK; | |||||
sc->num_queues = (version >> VERSION_NUMVQM_SHIFT) & | |||||
VERSION_NUMVQM_MASK; | |||||
sc->num_lsb_entries = (version >> VERSION_LSBSIZE_SHIFT) & | |||||
VERSION_LSBSIZE_MASK; | |||||
sc->hw_features = version & VERSION_CAP_MASK; | |||||
/* | |||||
* Copy private LSB mask to public registers to enable access to LSB | |||||
* from all queues allowed by BIOS. | |||||
*/ | |||||
lsbmasklo = ccp_read_4(sc, LSB_PRIVATE_MASK_LO_OFFSET); | |||||
lsbmaskhi = ccp_read_4(sc, LSB_PRIVATE_MASK_HI_OFFSET); | |||||
ccp_write_4(sc, LSB_PUBLIC_MASK_LO_OFFSET, lsbmasklo); | |||||
ccp_write_4(sc, LSB_PUBLIC_MASK_HI_OFFSET, lsbmaskhi); | |||||
lsbmask = ((uint64_t)lsbmaskhi << 30) | lsbmasklo; | |||||
device_printf(dev, "XXX%s: 2\n", __func__); | |||||
for (; i < nitems(sc->queues); i++) { | |||||
error = ccp_hw_attach_queue(dev, lsbmask, i); | |||||
if (error != 0) { | |||||
device_printf(dev, "XXX%s: couldn't attach queue %u\n", | |||||
__func__, i); | |||||
goto out; | |||||
} | |||||
} | |||||
ccp_assign_lsb_regions(sc, lsbmask); | |||||
device_printf(dev, "XXX%s: 3\n", __func__); | |||||
out: | |||||
if (error != 0) { | |||||
if (interrupts_setup) | |||||
ccp_release_interrupts(sc); | |||||
for (j = 0; j < i; j++) | |||||
ccp_hw_detach_queue(dev, j); | |||||
if (sc->ring_size_order != 0) | |||||
pci_disable_busmaster(dev); | |||||
if (bars_mapped) | |||||
ccp_unmap_pci_bar(dev); | |||||
} | |||||
return (error); | |||||
} | |||||
Done Inline ActionsThis looks like it should be assigning to error instead. There's also an extra newline. markj: This looks like it should be assigning to error instead. There's also an extra newline. | |||||
void | |||||
ccp_hw_detach(device_t dev) | |||||
{ | |||||
struct ccp_softc *sc; | |||||
unsigned i; | |||||
sc = device_get_softc(dev); | |||||
for (i = 0; i < nitems(sc->queues); i++) | |||||
ccp_hw_detach_queue(dev, i); | |||||
ccp_release_interrupts(sc); | |||||
pci_disable_busmaster(dev); | |||||
ccp_unmap_pci_bar(dev); | |||||
} | |||||
static void | |||||
ccp_passthrough(struct ccp_queue *qp, bus_addr_t dst, | |||||
enum ccp_memtype dst_type, bus_addr_t src, enum ccp_memtype src_type, | |||||
Not Done Inline ActionsIt looks like we don't do proper cleanup in the caller if this loop aborts. e.g., we won't call pci_release_msi(). markj: It looks like we don't do proper cleanup in the caller if this loop aborts. e.g., we won't call… | |||||
Not Done Inline ActionsYes, that's true. Handling cleanup fully correctly if some step in initialization fails is hard. cem: Yes, that's true. Handling cleanup fully correctly if some step in initialization fails is… | |||||
bus_size_t len, enum ccp_passthru_byteswap swapmode, | |||||
enum ccp_passthru_bitwise bitmode, bool interrupt, | |||||
Done Inline Actions"rid" may be modified by bus_alloc_resource_any(). In practice I guess it's not an issue for SYS_RES_IRQ, but the man page explicitly warns about this. markj: "rid" may be modified by bus_alloc_resource_any(). In practice I guess it's not an issue for… | |||||
Done Inline ActionsHm. We could use a temporary copy for the call to bus_alloc_resource_any. cem: Hm. We could use a temporary copy for the call to bus_alloc_resource_any. | |||||
const struct ccp_completion_ctx *cctx) | |||||
{ | |||||
struct ccp_desc *desc; | |||||
KASSERT(ccp_queue_get_ring_space(qp) > 0, | |||||
("ccp_passthrough on full ring")); | |||||
desc = &qp->desc_ring[qp->cq_tail]; | |||||
memset(desc, 0, sizeof(*desc)); | |||||
desc->engine = CCP_ENGINE_PASSTHRU; | |||||
desc->pt.ioc = interrupt; | |||||
desc->pt.byteswap = swapmode; | |||||
desc->pt.bitwise = bitmode; | |||||
desc->length = len; | |||||
desc->src_lo = (uint32_t)src; | |||||
desc->src_hi = src >> 32; | |||||
desc->src_mem = src_type; | |||||
desc->dst_lo = (uint32_t)dst; | |||||
desc->dst_hi = dst >> 32; | |||||
desc->dst_mem = dst_type; | |||||
if (bitmode != CCP_PASSTHRU_BITWISE_NOOP) | |||||
desc->lsb_ctx_id = ccp_queue_lsb_entry(qp, LSB_ENTRY_KEY); | |||||
if (cctx != NULL) | |||||
memcpy(&qp->completions_ring[qp->cq_tail], cctx, sizeof(*cctx)); | |||||
qp->cq_tail = (qp->cq_tail + 1) % (1 << qp->cq_softc->ring_size_order); | |||||
} | |||||
static void | |||||
ccp_passthrough_sgl(struct ccp_queue *qp, bus_addr_t lsb_addr, bool tolsb, | |||||
struct sglist *sgl, bus_size_t len, bool interrupt, | |||||
const struct ccp_completion_ctx *cctx) | |||||
{ | |||||
struct sglist_seg *seg; | |||||
size_t i, remain, nb; | |||||
remain = len; | |||||
for (i = 0; i < sgl->sg_nseg && remain != 0; i++) { | |||||
seg = &sgl->sg_segs[i]; | |||||
nb = min(remain, seg->ss_len); | |||||
if (tolsb) | |||||
ccp_passthrough(qp, lsb_addr, CCP_MEMTYPE_SB, | |||||
seg->ss_paddr, CCP_MEMTYPE_SYSTEM, nb, | |||||
CCP_PASSTHRU_BYTESWAP_NOOP, | |||||
CCP_PASSTHRU_BITWISE_NOOP, | |||||
(nb == remain) && interrupt, cctx); | |||||
else | |||||
ccp_passthrough(qp, seg->ss_paddr, CCP_MEMTYPE_SYSTEM, | |||||
lsb_addr, CCP_MEMTYPE_SB, nb, | |||||
CCP_PASSTHRU_BYTESWAP_NOOP, | |||||
CCP_PASSTHRU_BITWISE_NOOP, | |||||
(nb == remain) && interrupt, cctx); | |||||
remain -= nb; | |||||
} | |||||
} | |||||
Done Inline ActionsIs it worth checking the value of g_ccp_ring_order first to make sure it's sane? markj: Is it worth checking the value of g_ccp_ring_order first to make sure it's sane? | |||||
Done Inline ActionsMight as well. cem: Might as well. | |||||
/* | |||||
* Note that these vectors are in reverse of the usual order. | |||||
*/ | |||||
const uint32_t SHA1_H[] = { | |||||
0xc3d2e1f0ul, | |||||
0x10325476ul, | |||||
0x98badcfeul, | |||||
0xefcdab89ul, | |||||
0x67452301ul, | |||||
0, | |||||
0, | |||||
0, | |||||
}; | |||||
const uint32_t SHA224_H[] = { | |||||
0xbefa4fa4ul, | |||||
0x64f98fa7ul, | |||||
0x68581511ul, | |||||
0xffc00b31ul, | |||||
0xf70e5939ul, | |||||
0x3070dd17ul, | |||||
0x367cd507ul, | |||||
0xc1059ed8ul, | |||||
}; | |||||
const uint32_t SHA256_H[] = { | |||||
0x5be0cd19ul, | |||||
0x1f83d9abul, | |||||
0x9b05688cul, | |||||
0x510e527ful, | |||||
0xa54ff53aul, | |||||
0x3c6ef372ul, | |||||
0xbb67ae85ul, | |||||
0x6a09e667ul, | |||||
}; | |||||
const uint64_t SHA384_H[] = { | |||||
0x47b5481dbefa4fa4ull, | |||||
0xdb0c2e0d64f98fa7ull, | |||||
0x8eb44a8768581511ull, | |||||
0x67332667ffc00b31ull, | |||||
0x152fecd8f70e5939ull, | |||||
0x9159015a3070dd17ull, | |||||
0x629a292a367cd507ull, | |||||
0xcbbb9d5dc1059ed8ull, | |||||
}; | |||||
const uint64_t SHA512_H[] = { | |||||
0x5be0cd19137e2179ull, | |||||
0x1f83d9abfb41bd6bull, | |||||
Done Inline ActionsIt'd be nice to come up with a better name for "i" given its use in this cleanup block. markj: It'd be nice to come up with a better name for "i" given its use in this cleanup block. | |||||
0x9b05688c2b3e6c1full, | |||||
0x510e527fade682d1ull, | |||||
0xa54ff53a5f1d36f1ull, | |||||
0x3c6ef372fe94f82bull, | |||||
0xbb67ae8584caa73bull, | |||||
0x6a09e667f3bcc908ull, | |||||
}; | |||||
const struct SHA_Defn { | |||||
enum sha_version version; | |||||
const void *H_vectors; | |||||
size_t H_size; | |||||
struct auth_hash *axf; | |||||
enum ccp_sha_type engine_type; | |||||
} SHA_definitions[] = { | |||||
{ | |||||
.version = SHA1, | |||||
.H_vectors = SHA1_H, | |||||
.H_size = sizeof(SHA1_H), | |||||
.axf = &auth_hash_hmac_sha1, | |||||
.engine_type = CCP_SHA_TYPE_1, | |||||
}, | |||||
#if 0 | |||||
{ | |||||
.version = SHA2_224, | |||||
.H_vectors = SHA224_H, | |||||
.H_size = sizeof(SHA224_H), | |||||
.axf = &auth_hash_hmac_sha2_224, | |||||
.engine_type = CCP_SHA_TYPE_224, | |||||
}, | |||||
#endif | |||||
{ | |||||
.version = SHA2_256, | |||||
.H_vectors = SHA256_H, | |||||
.H_size = sizeof(SHA256_H), | |||||
.axf = &auth_hash_hmac_sha2_256, | |||||
.engine_type = CCP_SHA_TYPE_256, | |||||
}, | |||||
{ | |||||
.version = SHA2_384, | |||||
.H_vectors = SHA384_H, | |||||
.H_size = sizeof(SHA384_H), | |||||
.axf = &auth_hash_hmac_sha2_384, | |||||
.engine_type = CCP_SHA_TYPE_384, | |||||
}, | |||||
{ | |||||
.version = SHA2_512, | |||||
.H_vectors = SHA512_H, | |||||
.H_size = sizeof(SHA512_H), | |||||
.axf = &auth_hash_hmac_sha2_512, | |||||
.engine_type = CCP_SHA_TYPE_512, | |||||
}, | |||||
}; | |||||
static void | |||||
ccp_sha_single_desc(struct ccp_queue *qp, const struct SHA_Defn *defn, | |||||
vm_paddr_t addr, size_t len, bool start, bool end, uint64_t msgbits) | |||||
{ | |||||
struct ccp_desc *desc; | |||||
KASSERT(ccp_queue_get_ring_space(qp) > 0, | |||||
("ccp_passthrough on full ring")); | |||||
desc = &qp->desc_ring[qp->cq_tail]; | |||||
memset(desc, 0, sizeof(*desc)); | |||||
desc->engine = CCP_ENGINE_SHA; | |||||
desc->som = start; | |||||
desc->eom = end; | |||||
desc->sha.type = defn->engine_type; | |||||
desc->length = len; | |||||
if (end) { | |||||
desc->sha_len_lo = (uint32_t)msgbits; | |||||
desc->sha_len_hi = msgbits >> 32; | |||||
} | |||||
desc->src_lo = (uint32_t)addr; | |||||
desc->src_hi = addr >> 32; | |||||
desc->src_mem = CCP_MEMTYPE_SYSTEM; | |||||
desc->lsb_ctx_id = ccp_queue_lsb_entry(qp, LSB_ENTRY_SHA); | |||||
qp->cq_tail = (qp->cq_tail + 1) % (1 << qp->cq_softc->ring_size_order); | |||||
} | |||||
static int | |||||
ccp_sha(struct ccp_queue *qp, enum sha_version version, struct sglist *sgl_src, | |||||
struct sglist *sgl_dst, const struct ccp_completion_ctx *cctx, int mflags) | |||||
{ | |||||
const struct SHA_Defn *defn; | |||||
struct sglist_seg *seg; | |||||
size_t i, msgsize, remaining, nb; | |||||
uint32_t lsbaddr; | |||||
for (i = 0; i < nitems(SHA_definitions); i++) | |||||
if (SHA_definitions[i].version == version) | |||||
break; | |||||
if (i == nitems(SHA_definitions)) | |||||
return (EINVAL); | |||||
defn = &SHA_definitions[i]; | |||||
/* XXX validate input ??? */ | |||||
/* Load initial SHA state into LSB */ | |||||
ccp_passthrough(qp, ccp_queue_lsb_address(qp, LSB_ENTRY_SHA), | |||||
CCP_MEMTYPE_SB, pmap_kextract((vm_offset_t)defn->H_vectors), | |||||
CCP_MEMTYPE_SYSTEM, roundup2(defn->H_size, LSB_ENTRY_SIZE), | |||||
CCP_PASSTHRU_BYTESWAP_NOOP, CCP_PASSTHRU_BITWISE_NOOP, false, | |||||
NULL); | |||||
/* Execute series of SHA updates on correctly sized buffers */ | |||||
msgsize = 0; | |||||
for (i = 0; i < sgl_src->sg_nseg; i++) { | |||||
seg = &sgl_src->sg_segs[i]; | |||||
msgsize += seg->ss_len; | |||||
ccp_sha_single_desc(qp, defn, seg->ss_paddr, seg->ss_len, | |||||
i == 0, i == sgl_src->sg_nseg - 1, msgsize << 3); | |||||
} | |||||
/* Copy result out to sgl_dst */ | |||||
remaining = roundup2(defn->H_size, LSB_ENTRY_SIZE); | |||||
lsbaddr = ccp_queue_lsb_address(qp, LSB_ENTRY_SHA); | |||||
for (i = 0; i < sgl_dst->sg_nseg; i++) { | |||||
seg = &sgl_dst->sg_segs[i]; | |||||
nb = min(remaining, seg->ss_len); | |||||
ccp_passthrough(qp, seg->ss_paddr, CCP_MEMTYPE_SYSTEM, lsbaddr, | |||||
CCP_MEMTYPE_SB, nb, CCP_PASSTHRU_BYTESWAP_NOOP, | |||||
CCP_PASSTHRU_BITWISE_NOOP, nb == remaining, | |||||
(nb == remaining) ? cctx : NULL); | |||||
remaining -= nb; | |||||
lsbaddr += nb; | |||||
if (remaining == 0) | |||||
break; | |||||
} | |||||
return (0); | |||||
} | |||||
static void | |||||
byteswap256(uint64_t *buffer) | |||||
{ | |||||
uint64_t t; | |||||
t = bswap64(buffer[3]); | |||||
buffer[3] = bswap64(buffer[0]); | |||||
buffer[0] = t; | |||||
t = bswap64(buffer[2]); | |||||
buffer[2] = bswap64(buffer[1]); | |||||
buffer[1] = t; | |||||
} | |||||
/* | |||||
* Translate CCP internal LSB hash format into a standard hash ouput. | |||||
* | |||||
* Manipulates input buffer with byteswap256 operation. | |||||
*/ | |||||
static void | |||||
ccp_sha_copy_result(char *output, char *buffer, enum sha_version version) | |||||
{ | |||||
const struct SHA_Defn *defn; | |||||
size_t i; | |||||
for (i = 0; i < nitems(SHA_definitions); i++) | |||||
if (SHA_definitions[i].version == version) | |||||
break; | |||||
if (i == nitems(SHA_definitions)) | |||||
panic("bogus sha version auth_mode %u\n", (unsigned)version); | |||||
defn = &SHA_definitions[i]; | |||||
/* Swap 256bit manually -- DMA engine can, but with limitations */ | |||||
byteswap256((void *)buffer); | |||||
if (defn->axf->hashsize > LSB_ENTRY_SIZE) | |||||
byteswap256((void *)(buffer + LSB_ENTRY_SIZE)); | |||||
switch (defn->version) { | |||||
case SHA1: | |||||
memcpy(output, buffer + 12, defn->axf->hashsize); | |||||
break; | |||||
#if 0 | |||||
case SHA2_224: | |||||
memcpy(output, buffer + XXX, defn->axf->hashsize); | |||||
break; | |||||
#endif | |||||
case SHA2_256: | |||||
memcpy(output, buffer, defn->axf->hashsize); | |||||
break; | |||||
case SHA2_384: | |||||
memcpy(output, | |||||
buffer + LSB_ENTRY_SIZE * 3 - defn->axf->hashsize, | |||||
defn->axf->hashsize - LSB_ENTRY_SIZE); | |||||
memcpy(output + defn->axf->hashsize - LSB_ENTRY_SIZE, buffer, | |||||
LSB_ENTRY_SIZE); | |||||
break; | |||||
case SHA2_512: | |||||
memcpy(output, buffer + LSB_ENTRY_SIZE, LSB_ENTRY_SIZE); | |||||
memcpy(output + LSB_ENTRY_SIZE, buffer, LSB_ENTRY_SIZE); | |||||
break; | |||||
} | |||||
} | |||||
void | |||||
XXX_ccp_test(struct ccp_softc *sc) | |||||
{ | |||||
const struct SHA_Defn *defn; | |||||
uint64_t var1, var2; | |||||
uint32_t res[32] __aligned(128) = { 0 }; | |||||
struct ccp_queue *qp; | |||||
unsigned q; | |||||
int error; | |||||
const char *msg = "a"; | |||||
device_printf(sc->dev, "%s enter\n", __func__); | |||||
var1 = 0; | |||||
var2 = 0xdeadbeef; | |||||
device_printf(sc->dev, "%s var1=%lx var2=%lx\n", __func__, var1, var2); | |||||
for (q = 0; q < nitems(sc->queues); q++) | |||||
if ((sc->valid_queues & (1 << q)) != 0) | |||||
break; | |||||
if (q == nitems(sc->queues)) { | |||||
device_printf(sc->dev, "%s: no valid queues\n", __func__); | |||||
return; | |||||
} | |||||
qp = &sc->queues[q]; | |||||
struct sglist_seg sgl_segs[] = { | |||||
{ | |||||
Done Inline Actionsmflags is unused. markj: mflags is unused. | |||||
Done Inline ActionsAt this time, yes, but the intent is to eventually pass M_WAITOK / M_NOWAIT. cem: At this time, yes, but the intent is to eventually pass M_WAITOK / M_NOWAIT. | |||||
.ss_paddr = pmap_kextract((vm_offset_t)msg), | |||||
.ss_len = 1, | |||||
}, | |||||
}; | |||||
struct sglist sgl = { | |||||
.sg_segs = sgl_segs, | |||||
.sg_nseg = 1, | |||||
.sg_maxseg = 1, | |||||
.sg_refs = 1, | |||||
}; | |||||
struct sglist_seg sgl2_segs[] = { | |||||
{ | |||||
.ss_paddr = pmap_kextract((vm_offset_t)res), | |||||
Not Done Inline ActionsShould this be addressed? :) markj: Should this be addressed? :) | |||||
Not Done Inline ActionsProbably :( cem: Probably :( | |||||
.ss_len = sizeof(res), | |||||
}, | |||||
}; | |||||
struct sglist sgl_dst = { | |||||
.sg_segs = sgl2_segs, | |||||
Done Inline ActionsIt would be a good idea to verify this too. Perhaps also cache the paddrs, but the lookup is pretty cheap, so it may not be worth the effort. markj: It would be a good idea to verify this too. Perhaps also cache the paddrs, but the lookup is… | |||||
Done Inline ActionsSure. Note that the H_vectors here are local to this driver (since they're in reverse order). Can you think of an annotation short of __align(PAGE_SIZE) to make sure they don't cross a page boundary at compile time? I suppose some clever CTASSERTs might work. cem: Sure. Note that the H_vectors here are local to this driver (since they're in reverse order). | |||||
Done Inline ActionsI can't think of another annotation that might help, but I think this CTASSERT might do it: CTASSERT(PAGE_SIZE - ((uintptr_t)&vec[0] % PAGE_SIZE) >= sizeof(vec)) I think all of the vectors would together fit in less of a page, so you could maybe put them in a single array and align that. markj: I can't think of another annotation that might help, but I think this CTASSERT might do it… | |||||
Done Inline ActionsThanks, I'll do something like that. cem: Thanks, I'll do something like that. | |||||
.sg_nseg = 1, | |||||
.sg_maxseg = 1, | |||||
.sg_refs = 1, | |||||
}; | |||||
device_printf(sc->dev, "%s ccp_sha\n", __func__); | |||||
error = ccp_sha(qp, SHA2_384, &sgl, &sgl_dst, NULL, M_WAITOK); | |||||
if (error != 0) { | |||||
device_printf(sc->dev, "%s: ccp_sha error: %d\n", __func__, | |||||
error); | |||||
return; | |||||
} | |||||
wmb(); | |||||
device_printf(sc->dev, "%s sending to HW\n", __func__); | |||||
ccp_queue_write_tail(qp); | |||||
device_printf(sc->dev, "%s sleeping\n", __func__); | |||||
DELAY(500 * 1000); | |||||
Done Inline ActionsNote that min() will truncate to 32 bits. markj: Note that min() will truncate to 32 bits. | |||||
Done Inline ActionsThe other consideration is that crd_len is int, so we're dealing with at most 31 bit lengths. cem: The other consideration is that `crd_len` is `int`, so we're dealing with at most 31 bit… | |||||
device_printf(sc->dev, "%s hardware head is 0x%x (base=0x%x)\n", | |||||
__func__, ccp_read_queue_4(sc, q, CMD_Q_HEAD_LO_BASE), | |||||
(uint32_t)qp->desc_ring_bus_addr); | |||||
device_printf(sc->dev, "%s qcontrol=0x%x\n", __func__, | |||||
ccp_read_queue_4(sc, q, CMD_Q_CONTROL_BASE)); | |||||
device_printf(sc->dev, "%s sha1 res=%20D\n", __func__, | |||||
(u_char*)&res[3], " "); | |||||
device_printf(sc->dev, "%s sha256 res=%32D\n", __func__, | |||||
(u_char*)res, " "); | |||||
device_printf(sc->dev, "%s sha384 res=%16D %32D\n", __func__, | |||||
(u_char*)&res[12], " ", (u_char*)res, " "); | |||||
device_printf(sc->dev, "%s sha512 res=%32D %32D\n", __func__, | |||||
(u_char*)&res[8], " ", (u_char*)res, " "); | |||||
device_printf(sc->dev, "%s leave\n", __func__); | |||||
/* XXX Below: basic single-descriptor SHA test */ | |||||
return; | |||||
/* Load initial SHA state into LSB */ | |||||
defn = &SHA_definitions[0]; | |||||
device_printf(sc->dev, "%s writing PST desc to load hash init values\n", __func__); | |||||
ccp_passthrough(qp, ccp_queue_lsb_address(qp, LSB_ENTRY_SHA), | |||||
CCP_MEMTYPE_SB, pmap_kextract((vm_offset_t)defn->H_vectors), | |||||
CCP_MEMTYPE_SYSTEM, roundup2(defn->H_size, LSB_ENTRY_SIZE), | |||||
CCP_PASSTHRU_BYTESWAP_NOOP, CCP_PASSTHRU_BITWISE_NOOP, false, | |||||
NULL); | |||||
/* Queue single SHA desc of empty vector. */ | |||||
device_printf(sc->dev, "%s writing SHA desc\n", __func__); | |||||
ccp_sha_single_desc(qp, defn, pmap_kextract((vm_offset_t)msg), 1, true, true, 8); | |||||
/* Copy result out */ | |||||
device_printf(sc->dev, "%s writing PST desc to fetch result\n", __func__); | |||||
ccp_passthrough(qp, pmap_kextract((vm_offset_t)res), | |||||
CCP_MEMTYPE_SYSTEM, ccp_queue_lsb_address(qp, LSB_ENTRY_SHA), | |||||
CCP_MEMTYPE_SB, sizeof(res), CCP_PASSTHRU_BYTESWAP_256BIT, | |||||
CCP_PASSTHRU_BITWISE_NOOP, false, NULL); | |||||
wmb(); | |||||
device_printf(sc->dev, "%s sending to HW\n", __func__); | |||||
ccp_queue_write_tail(qp); | |||||
/* XXX Below: basic PST test */ | |||||
return; | |||||
ccp_passthrough(qp, pmap_kextract((vm_offset_t)&var1), | |||||
CCP_MEMTYPE_SYSTEM, pmap_kextract((vm_offset_t)&var2), | |||||
CCP_MEMTYPE_SYSTEM, sizeof(var1), CCP_PASSTHRU_BYTESWAP_NOOP, | |||||
CCP_PASSTHRU_BITWISE_NOOP, false, NULL); | |||||
device_printf(sc->dev, "%s incrementing tail\n", __func__); | |||||
wmb(); | |||||
ccp_queue_write_tail(qp); | |||||
device_printf(sc->dev, "%s tail incremented; writing control word RUN & sleeping 0.5s\n", __func__); | |||||
DELAY(500 * 1000); | |||||
device_printf(sc->dev, "%s var1=%lx var2=%lx\n", __func__, var1, var2); | |||||
} | |||||
static void | |||||
ccp_hmac_done(struct ccp_queue *qp, struct ccp_session *s, void *vcrp, | |||||
int error) | |||||
{ | |||||
char ihash[SHA2_512_HASH_LEN /* max hash len */]; | |||||
union authctx auth_ctx; | |||||
struct cryptodesc *crd; | |||||
struct auth_hash *axf; | |||||
struct cryptop *crp; | |||||
crp = vcrp; | |||||
crd = crp->crp_desc; | |||||
axf = s->hmac.auth_hash; | |||||
s->pending--; | |||||
if (error != 0) { | |||||
crp->crp_etype = error; | |||||
goto out; | |||||
} | |||||
/* Do remaining outer hash over small inner hash in software */ | |||||
axf->Init(&auth_ctx); | |||||
axf->Update(&auth_ctx, s->hmac.opad, axf->blocksize); | |||||
ccp_sha_copy_result(ihash, s->hmac.ipad, s->hmac.auth_mode); | |||||
#if 0 | |||||
device_printf(dev, "%s sha intermediate=%64D\n", __func__, | |||||
(u_char *)ihash, " "); | |||||
#endif | |||||
axf->Update(&auth_ctx, ihash, axf->hashsize); | |||||
axf->Final(s->hmac.ipad, &auth_ctx); | |||||
crypto_copyback(crp->crp_flags, crp->crp_buf, crd->crd_inject, | |||||
s->hmac.hash_len, s->hmac.ipad); | |||||
/* Avoid leaking key material */ | |||||
explicit_bzero(&auth_ctx, sizeof(auth_ctx)); | |||||
explicit_bzero(s->hmac.ipad, sizeof(s->hmac.ipad)); | |||||
explicit_bzero(s->hmac.opad, sizeof(s->hmac.opad)); | |||||
out: | |||||
crypto_done(crp); | |||||
return; | |||||
} | |||||
int | |||||
ccp_hmac(struct ccp_queue *qp, struct ccp_session *s, struct cryptop *crp) | |||||
{ | |||||
struct ccp_completion_ctx ctx; | |||||
device_t dev; | |||||
struct auth_hash *axf; | |||||
struct cryptodesc *crd; | |||||
int error, sgl_nsegs; | |||||
dev = qp->cq_softc->dev; | |||||
crd = crp->crp_desc; | |||||
axf = s->hmac.auth_hash; | |||||
/* | |||||
* Populate the SGL describing inside hash contents. We want to hash | |||||
* the ipad (key XOR fixed bit pattern) concatenated with the user | |||||
* data. | |||||
*/ | |||||
sglist_reset(qp->cq_sg_ulptx); | |||||
error = sglist_append(qp->cq_sg_ulptx, s->hmac.ipad, axf->blocksize); | |||||
if (error != 0) | |||||
return (error); | |||||
error = sglist_append_sglist(qp->cq_sg_ulptx, qp->cq_sg_crp, | |||||
crd->crd_skip, crd->crd_len); | |||||
if (error != 0) { | |||||
device_printf(dev, "%s: sglist too short\n", __func__); | |||||
return (error); | |||||
} | |||||
/* Populate SGL for output -- just reuse hmac.ipad buffer. */ | |||||
sglist_reset(qp->cq_sg_dst); | |||||
error = sglist_append(qp->cq_sg_dst, s->hmac.ipad, | |||||
roundup2(axf->hashsize, LSB_ENTRY_SIZE)); | |||||
if (error != 0) | |||||
return (error); | |||||
/* XXX Determine # of ops required here and ensure we have enough. */ | |||||
sgl_nsegs = qp->cq_sg_ulptx->sg_nseg; | |||||
//sgl_len = ccp_ulptx_sgl_len(sgl_nsegs); | |||||
ctx.callback_fn = ccp_hmac_done; | |||||
ctx.callback_arg = crp; | |||||
ctx.session = s; | |||||
error = ccp_sha(qp, s->hmac.auth_mode, qp->cq_sg_ulptx, qp->cq_sg_dst, | |||||
&ctx, M_NOWAIT); | |||||
if (error != 0) { | |||||
device_printf(dev, "%s: ccp_sha error\n", __func__); | |||||
return (error); | |||||
} | |||||
wmb(); | |||||
ccp_queue_write_tail(qp); | |||||
return (0); | |||||
} | |||||
static void | |||||
ccp_byteswap(char *data, size_t len) | |||||
{ | |||||
size_t i; | |||||
Done Inline ActionsWrite-only var, but I'm guessing that addressing the XXX might change that. markj: Write-only var, but I'm guessing that addressing the XXX might change that. | |||||
Done Inline ActionsIt's currently vestigial from ccr(4) based on the following commented out line, but yeah, the XXX probably needs addressing. cem: It's currently vestigial from ccr(4) based on the following commented out line, but yeah, the… | |||||
char t; | |||||
len--; | |||||
for (i = 0; i < len; i++, len--) { | |||||
t = data[i]; | |||||
data[i] = data[len]; | |||||
data[len] = t; | |||||
} | |||||
} | |||||
static void | |||||
ccp_blkcipher_done(struct ccp_queue *qp, struct ccp_session *s, void *vcrp, | |||||
int error) | |||||
{ | |||||
struct cryptop *crp; | |||||
crp = vcrp; | |||||
s->pending--; | |||||
if (error != 0) | |||||
crp->crp_etype = error; | |||||
device_printf(qp->cq_softc->dev, "XXX %s: qp=%p crp=%p\n", __func__, | |||||
qp, crp); | |||||
crypto_done(crp); | |||||
return; | |||||
} | |||||
int | |||||
ccp_blkcipher(struct ccp_queue *qp, struct ccp_session *s, struct cryptop *crp) | |||||
{ | |||||
struct ccp_completion_ctx ctx; | |||||
struct cryptodesc *crd; | |||||
struct ccp_desc *desc; | |||||
device_t dev; | |||||
char *keydata; | |||||
enum ccp_cipher_dir dir; | |||||
int sgl_nsegs, error; | |||||
size_t keydata_len; | |||||
unsigned i; | |||||
/* XXX Determine # of ops required here and ensure we have enough. */ | |||||
crd = crp->crp_desc; | |||||
dev = qp->cq_softc->dev; | |||||
if (s->blkcipher.key_len == 0 || crd->crd_len == 0) | |||||
return (EINVAL); | |||||
if (crd->crd_alg == CRYPTO_AES_CBC && | |||||
(crd->crd_len % AES_BLOCK_LEN) != 0) | |||||
return (EINVAL); | |||||
/* | |||||
* Individual segments must be multiples of AES block size for the HW | |||||
* to process it. Non-compliant inputs aren't bogus, just not doable | |||||
* on this hardware. | |||||
*/ | |||||
for (i = 0; i < qp->cq_sg_crp->sg_nseg; i++) | |||||
if ((qp->cq_sg_crp->sg_segs[i].ss_len % AES_BLOCK_LEN) != 0) | |||||
return (EAGAIN); | |||||
/* Gather IV/nonce data */ | |||||
if (crd->crd_flags & CRD_F_ENCRYPT) { | |||||
dir = CCP_CIPHER_DIR_ENCRYPT; | |||||
if (crd->crd_flags & CRD_F_IV_EXPLICIT) | |||||
memcpy(s->blkcipher.iv, crd->crd_iv, | |||||
s->blkcipher.iv_len); | |||||
else | |||||
arc4rand(s->blkcipher.iv, s->blkcipher.iv_len, 0); | |||||
if ((crd->crd_flags & CRD_F_IV_PRESENT) == 0) | |||||
crypto_copyback(crp->crp_flags, crp->crp_buf, | |||||
crd->crd_inject, s->blkcipher.iv_len, | |||||
s->blkcipher.iv); | |||||
} else { | |||||
dir = CCP_CIPHER_DIR_DECRYPT; | |||||
if (crd->crd_flags & CRD_F_IV_EXPLICIT) | |||||
memcpy(s->blkcipher.iv, crd->crd_iv, | |||||
s->blkcipher.iv_len); | |||||
else | |||||
crypto_copydata(crp->crp_flags, crp->crp_buf, | |||||
crd->crd_inject, s->blkcipher.iv_len, | |||||
s->blkcipher.iv); | |||||
} | |||||
/* Reverse order of IV material for HW */ | |||||
device_printf(dev, "YYY %s: IV: %16D len: %u\n", __func__, | |||||
s->blkcipher.iv, " ", s->blkcipher.iv_len); | |||||
ccp_byteswap(s->blkcipher.iv, s->blkcipher.iv_len); | |||||
/* Set up passthrough op(s) to copy IV into LSB */ | |||||
sglist_reset(qp->cq_sg_ulptx); | |||||
error = sglist_append(qp->cq_sg_ulptx, s->blkcipher.iv, | |||||
s->blkcipher.iv_len); | |||||
if (error != 0) | |||||
return (error); | |||||
device_printf(dev, "XXX %s: starting IV pst @ %u\n", __func__, | |||||
qp->cq_tail); | |||||
ccp_passthrough_sgl(qp, ccp_queue_lsb_address(qp, LSB_ENTRY_IV), true, | |||||
qp->cq_sg_ulptx, s->blkcipher.iv_len, false, NULL); | |||||
switch (crd->crd_alg) { | |||||
case CRYPTO_AES_CBC: | |||||
keydata = s->blkcipher.enckey; | |||||
keydata_len = s->blkcipher.key_len; | |||||
break; | |||||
case CRYPTO_AES_ICM: | |||||
keydata = s->blkcipher.enckey; | |||||
keydata_len = s->blkcipher.key_len; | |||||
break; | |||||
/* XXX deal with XTS */ | |||||
#if 0 | |||||
case CRYPTO_AES_XTS: | |||||
key_half = s->blkcipher.key_len / 2; | |||||
memcpy(crwr->key_ctx.key, s->blkcipher.enckey + key_half, | |||||
key_half); | |||||
if (crd->crd_flags & CRD_F_ENCRYPT) | |||||
memcpy(crwr->key_ctx.key + key_half, | |||||
s->blkcipher.enckey, key_half); | |||||
else | |||||
memcpy(crwr->key_ctx.key + key_half, | |||||
s->blkcipher.deckey, key_half); | |||||
break; | |||||
#endif | |||||
} | |||||
/* Reverse order of key material for HW */ | |||||
ccp_byteswap(keydata, keydata_len); | |||||
/* Store key material into LSB to avoid page boundaries */ | |||||
sglist_reset(qp->cq_sg_ulptx); | |||||
error = sglist_append(qp->cq_sg_ulptx, keydata, keydata_len); | |||||
if (error != 0) | |||||
return (error); | |||||
device_printf(dev, "XXX %s: starting KEY pst @ %u\n", __func__, | |||||
qp->cq_tail); | |||||
ccp_passthrough_sgl(qp, ccp_queue_lsb_address(qp, LSB_ENTRY_KEY), true, | |||||
qp->cq_sg_ulptx, keydata_len, false, NULL); | |||||
/* | |||||
* Point SGLs at the subset of cryptop buffer contents representing the | |||||
* data. | |||||
*/ | |||||
sglist_reset(qp->cq_sg_ulptx); | |||||
error = sglist_append_sglist(qp->cq_sg_ulptx, qp->cq_sg_crp, | |||||
crd->crd_skip, crd->crd_len); | |||||
if (error != 0) | |||||
return (error); | |||||
sgl_nsegs = qp->cq_sg_ulptx->sg_nseg; | |||||
//sgl_len = ccp_ulptx_sgl_len(sgl_nsegs); | |||||
ctx.callback_fn = ccp_blkcipher_done; | |||||
ctx.session = s; | |||||
ctx.callback_arg = crp; | |||||
device_printf(dev, "XXX %s: starting AES ops @ %u\n", __func__, | |||||
qp->cq_tail); | |||||
for (i = 0; i < qp->cq_sg_ulptx->sg_nseg; i++) { | |||||
struct sglist_seg *seg; | |||||
seg = &qp->cq_sg_ulptx->sg_segs[i]; | |||||
desc = &qp->desc_ring[qp->cq_tail]; | |||||
desc->engine = CCP_ENGINE_AES; | |||||
desc->som = (i == 0); | |||||
desc->eom = (i == qp->cq_sg_ulptx->sg_nseg - 1); | |||||
Done Inline Actionskeydata and keydata_len are left uninitialized if we go through the default case. markj: keydata and keydata_len are left uninitialized if we go through the default case. | |||||
Done Inline ActionsThe default case here is impossible to reach. This routine is invoked from ccp_process with a session mode of BLKCIPHER. That requires ccp_newsession with one of CBC, ICM, or XTS. And newsession shouldn't be called with XTS at this time as it isn't registered. cem: The default case here is impossible to reach. This routine is invoked from ccp_process with a… | |||||
Done Inline ActionsThe compiler doesn't know that. :) markj: The compiler doesn't know that. :) | |||||
Done Inline ActionsIt doesn't produce an error. I don't recall it producing a warning, either, thought it may be. Do you think the compiler is assuming UB and generating bad code? The CBC and ICM tests pass, so I think that is unlikely. cem: It doesn't produce an error. I don't recall it producing a warning, either, thought it may be. | |||||
Done Inline ActionsAt least gcc 6 emits an error for this. markj: At least gcc 6 emits an error for this. | |||||
Done Inline ActionsAh, sure. I have been building with Clang. Is keydata / keydata_len all I need to initialize to silence the GCC warning? cem: Ah, sure. I have been building with Clang. Is keydata / keydata_len all I need to initialize… | |||||
Done Inline ActionsYep, looks like it. gcc is also complaining about sgl_nsegs being write-only. With those two things fixed, ccp builds. markj: Yep, looks like it. gcc is also complaining about sgl_nsegs being write-only. With those two… | |||||
desc->ioc = desc->eom; | |||||
device_printf(dev, "XXX %s: AES %u: som:%d eom:%d ioc:%d dir:%d\n", __func__, | |||||
qp->cq_tail, (int)desc->som, (int)desc->eom, (int)desc->ioc, (int)dir); | |||||
if (desc->ioc) | |||||
memcpy(&qp->completions_ring[qp->cq_tail], &ctx, | |||||
Done Inline ActionsIt'd be nice to clean this up. markj: It'd be nice to clean this up. | |||||
Done Inline ActionsAs in, actually implement XTS? :-) Yes. cem: As in, actually implement XTS? :-) Yes. | |||||
sizeof(ctx)); | |||||
desc->aes.encrypt = dir; | |||||
desc->aes.mode = s->blkcipher.cipher_mode; | |||||
desc->aes.type = s->blkcipher.cipher_type; | |||||
if (crd->crd_alg == CRYPTO_AES_ICM) | |||||
/* | |||||
* Size of CTR value in bits, - 1. Hardcode 32 bits | |||||
* for now. | |||||
*/ | |||||
desc->aes.size = 0x1f; | |||||
device_printf(dev, "XXX %s: AES %u: mode:%u type:%u size:%u\n", __func__, | |||||
qp->cq_tail, (unsigned)desc->aes.mode, (unsigned)desc->aes.type, (unsigned)desc->aes.size); | |||||
desc->length = seg->ss_len; | |||||
desc->src_lo = (uint32_t)seg->ss_paddr; | |||||
desc->src_hi = (seg->ss_paddr >> 32); | |||||
desc->src_mem = CCP_MEMTYPE_SYSTEM; | |||||
/* Crypt in-place */ | |||||
desc->dst_lo = desc->src_lo; | |||||
desc->dst_hi = desc->src_hi; | |||||
desc->dst_mem = desc->src_mem; | |||||
desc->key_lo = ccp_queue_lsb_address(qp, LSB_ENTRY_KEY); | |||||
desc->key_hi = 0; | |||||
desc->key_mem = CCP_MEMTYPE_SB; | |||||
desc->lsb_ctx_id = ccp_queue_lsb_entry(qp, LSB_ENTRY_IV); | |||||
qp->cq_tail = (qp->cq_tail + 1) % | |||||
(1 << qp->cq_softc->ring_size_order); | |||||
} | |||||
wmb(); | |||||
ccp_queue_write_tail(qp); | |||||
Done Inline ActionsThis doesn't seem to be used either? markj: This doesn't seem to be used either? | |||||
Done Inline ActionsVestigial from the ccr driver. cem: Vestigial from the ccr driver. | |||||
return (0); | |||||
} | |||||
#if 0 | |||||
/* | |||||
* 'hashsize' is the length of a full digest. 'authsize' is the | |||||
* requested digest length for this operation which may be less | |||||
* than 'hashsize'. | |||||
*/ | |||||
static int | |||||
ccp_hmac_ctrl(unsigned int hashsize, unsigned int authsize) | |||||
{ | |||||
if (authsize == 10) | |||||
return (CHCR_SCMD_HMAC_CTRL_TRUNC_RFC4366); | |||||
if (authsize == 12) | |||||
return (CHCR_SCMD_HMAC_CTRL_IPSEC_96BIT); | |||||
if (authsize == hashsize / 2) | |||||
return (CHCR_SCMD_HMAC_CTRL_DIV2); | |||||
return (CHCR_SCMD_HMAC_CTRL_NO_TRUNC); | |||||
} | |||||
#endif | |||||
int | |||||
ccp_authenc(struct ccp_queue *qp, struct ccp_session *s, struct cryptop *crp, | |||||
struct cryptodesc *crda, struct cryptodesc *crde) | |||||
{ | |||||
device_printf(qp->cq_softc->dev, "%s not supported\n", __func__); | |||||
#if 0 | |||||
char iv[CHCR_MAX_CRYPTO_IV_LEN]; | |||||
struct chcr_wr *crwr; | |||||
struct wrqe *wr; | |||||
struct auth_hash *axf; | |||||
char *dst; | |||||
u_int iv_loc, kctx_len, key_half, op_type, transhdr_len, wr_len; | |||||
u_int hash_size_in_response, imm_len, iopad_size; | |||||
u_int aad_start, aad_len, aad_stop; | |||||
u_int auth_start, auth_stop, auth_insert; | |||||
u_int cipher_start, cipher_stop; | |||||
Done Inline ActionsDoes C actually guarantee that this cast will give you the low 32 bits? It would be a bit cleaner to explicitly mask the bits you want. markj: Does C actually guarantee that this cast will give you the low 32 bits? It would be a bit… | |||||
Done Inline ActionsI believe so. Not sure how else it could be interpreted. Note that vm_paddr_t is an integer rather than pointer type. https://stackoverflow.com/questions/6752567/casting-a-large-number-type-to-a-smaller-type claims that the standard says:
cem: I believe so. Not sure how else it could be interpreted. Note that vm_paddr_t is an integer… | |||||
Done Inline ActionsI see, thanks. markj: I see, thanks. | |||||
u_int hmac_ctrl, input_len; | |||||
int dsgl_nsegs, dsgl_len; | |||||
int sgl_nsegs, sgl_len; | |||||
int error; | |||||
/* | |||||
* If there is a need in the future, requests with an empty | |||||
* payload could be supported as HMAC-only requests. | |||||
*/ | |||||
if (s->blkcipher.key_len == 0 || crde->crd_len == 0) | |||||
return (EINVAL); | |||||
if (crde->crd_alg == CRYPTO_AES_CBC && | |||||
(crde->crd_len % AES_BLOCK_LEN) != 0) | |||||
return (EINVAL); | |||||
/* | |||||
* AAD is only permitted before the cipher/plain text, not | |||||
* after. | |||||
*/ | |||||
if (crda->crd_len + crda->crd_skip > crde->crd_len + crde->crd_skip) | |||||
return (EINVAL); | |||||
axf = s->hmac.auth_hash; | |||||
hash_size_in_response = s->hmac.hash_len; | |||||
/* | |||||
* The IV is always stored at the start of the buffer even | |||||
* though it may be duplicated in the payload. The crypto | |||||
* engine doesn't work properly if the IV offset points inside | |||||
* of the AAD region, so a second copy is always required. | |||||
*/ | |||||
iv_loc = IV_IMMEDIATE; | |||||
if (crde->crd_flags & CRD_F_ENCRYPT) { | |||||
op_type = CHCR_ENCRYPT_OP; | |||||
if (crde->crd_flags & CRD_F_IV_EXPLICIT) | |||||
memcpy(iv, crde->crd_iv, s->blkcipher.iv_len); | |||||
else | |||||
arc4rand(iv, s->blkcipher.iv_len, 0); | |||||
if ((crde->crd_flags & CRD_F_IV_PRESENT) == 0) | |||||
crypto_copyback(crp->crp_flags, crp->crp_buf, | |||||
crde->crd_inject, s->blkcipher.iv_len, iv); | |||||
} else { | |||||
op_type = CHCR_DECRYPT_OP; | |||||
if (crde->crd_flags & CRD_F_IV_EXPLICIT) | |||||
memcpy(iv, crde->crd_iv, s->blkcipher.iv_len); | |||||
else | |||||
crypto_copydata(crp->crp_flags, crp->crp_buf, | |||||
crde->crd_inject, s->blkcipher.iv_len, iv); | |||||
} | |||||
/* | |||||
* The output buffer consists of the cipher text followed by | |||||
* the hash when encrypting. For decryption it only contains | |||||
* the plain text. | |||||
*/ | |||||
if (op_type == CHCR_ENCRYPT_OP) { | |||||
if (crde->crd_len + hash_size_in_response > MAX_REQUEST_SIZE) | |||||
return (EFBIG); | |||||
} else { | |||||
if (crde->crd_len > MAX_REQUEST_SIZE) | |||||
return (EFBIG); | |||||
} | |||||
sglist_reset(qp->cq_sg_dst); | |||||
error = sglist_append_sglist(qp->cq_sg_dst, qp->cq_sg_crp, crde->crd_skip, | |||||
crde->crd_len); | |||||
if (error != 0) | |||||
return (error); | |||||
if (op_type == CHCR_ENCRYPT_OP) { | |||||
error = sglist_append_sglist(qp->cq_sg_dst, qp->cq_sg_crp, | |||||
crda->crd_inject, hash_size_in_response); | |||||
if (error != 0) | |||||
return (error); | |||||
} | |||||
dsgl_nsegs = ccp_count_sgl(qp->cq_sg_dst, DSGL_SGE_MAXLEN); | |||||
if (dsgl_nsegs > MAX_RX_PHYS_DSGL_SGE) | |||||
return (EFBIG); | |||||
dsgl_len = ccp_phys_dsgl_len(dsgl_nsegs); | |||||
/* PADs must be 128-bit aligned. */ | |||||
iopad_size = roundup2(s->hmac.partial_digest_len, 16); | |||||
/* | |||||
* The 'key' part of the key context consists of the key followed | |||||
* by the IPAD and OPAD. | |||||
*/ | |||||
kctx_len = roundup2(s->blkcipher.key_len, 16) + iopad_size * 2; | |||||
transhdr_len = CIPHER_TRANSHDR_SIZE(kctx_len, dsgl_len); | |||||
/* | |||||
* The input buffer consists of the IV, any AAD, and then the | |||||
* cipher/plain text. For decryption requests the hash is | |||||
* appended after the cipher text. | |||||
*/ | |||||
if (crda->crd_skip < crde->crd_skip) { | |||||
if (crda->crd_skip + crda->crd_len > crde->crd_skip) | |||||
aad_len = (crde->crd_skip - crda->crd_skip); | |||||
else | |||||
aad_len = crda->crd_len; | |||||
} else | |||||
aad_len = 0; | |||||
input_len = aad_len + crde->crd_len; | |||||
/* | |||||
* The firmware hangs if sent a request which is a | |||||
* bit smaller than MAX_REQUEST_SIZE. In particular, the | |||||
* firmware appears to require 512 - 16 bytes of spare room | |||||
* along with the size of the hash even if the hash isn't | |||||
* included in the input buffer. | |||||
*/ | |||||
if (input_len + roundup2(axf->hashsize, 16) + (512 - 16) > | |||||
MAX_REQUEST_SIZE) | |||||
return (EFBIG); | |||||
if (op_type == CHCR_DECRYPT_OP) | |||||
input_len += hash_size_in_response; | |||||
if (ccp_use_imm_data(transhdr_len, s->blkcipher.iv_len + input_len)) { | |||||
imm_len = input_len; | |||||
sgl_nsegs = 0; | |||||
sgl_len = 0; | |||||
} else { | |||||
imm_len = 0; | |||||
sglist_reset(sc->sg_ulptx); | |||||
if (aad_len != 0) { | |||||
error = sglist_append_sglist(sc->sg_ulptx, qp->cq_sg_crp, | |||||
crda->crd_skip, aad_len); | |||||
if (error != 0) | |||||
return (error); | |||||
} | |||||
error = sglist_append_sglist(sc->sg_ulptx, qp->cq_sg_crp, | |||||
crde->crd_skip, crde->crd_len); | |||||
if (error != 0) | |||||
return (error); | |||||
if (op_type == CHCR_DECRYPT_OP) { | |||||
error = sglist_append_sglist(sc->sg_ulptx, qp->cq_sg_crp, | |||||
crda->crd_inject, hash_size_in_response); | |||||
if (error != 0) | |||||
return (error); | |||||
} | |||||
sgl_nsegs = sc->sg_ulptx->sg_nseg; | |||||
sgl_len = ccp_ulptx_sgl_len(sgl_nsegs); | |||||
} | |||||
/* | |||||
* Any auth-only data before the cipher region is marked as AAD. | |||||
* Auth-data that overlaps with the cipher region is placed in | |||||
* the auth section. | |||||
*/ | |||||
if (aad_len != 0) { | |||||
aad_start = s->blkcipher.iv_len + 1; | |||||
aad_stop = aad_start + aad_len - 1; | |||||
} else { | |||||
aad_start = 0; | |||||
aad_stop = 0; | |||||
} | |||||
cipher_start = s->blkcipher.iv_len + aad_len + 1; | |||||
if (op_type == CHCR_DECRYPT_OP) | |||||
cipher_stop = hash_size_in_response; | |||||
else | |||||
cipher_stop = 0; | |||||
if (aad_len == crda->crd_len) { | |||||
auth_start = 0; | |||||
auth_stop = 0; | |||||
} else { | |||||
if (aad_len != 0) | |||||
auth_start = cipher_start; | |||||
else | |||||
auth_start = s->blkcipher.iv_len + crda->crd_skip - | |||||
crde->crd_skip + 1; | |||||
auth_stop = (crde->crd_skip + crde->crd_len) - | |||||
(crda->crd_skip + crda->crd_len) + cipher_stop; | |||||
} | |||||
if (op_type == CHCR_DECRYPT_OP) | |||||
auth_insert = hash_size_in_response; | |||||
else | |||||
auth_insert = 0; | |||||
wr_len = roundup2(transhdr_len, 16) + roundup2(imm_len, 16) + sgl_len; | |||||
if (iv_loc == IV_IMMEDIATE) | |||||
wr_len += s->blkcipher.iv_len; | |||||
wr = alloc_wrqe(wr_len, sc->txq); | |||||
if (wr == NULL) { | |||||
sc->stats_wr_nomem++; | |||||
return (ENOMEM); | |||||
} | |||||
crwr = wrtod(wr); | |||||
memset(crwr, 0, wr_len); | |||||
ccp_populate_wreq(sc, crwr, kctx_len, wr_len, sid, imm_len, sgl_len, | |||||
op_type == CHCR_DECRYPT_OP ? hash_size_in_response : 0, iv_loc, | |||||
crp); | |||||
/* XXX: Hardcodes SGE loopback channel of 0. */ | |||||
crwr->sec_cpl.op_ivinsrtofst = htobe32( | |||||
V_CPL_TX_SEC_PDU_OPCODE(CPL_TX_SEC_PDU) | | |||||
V_CPL_TX_SEC_PDU_RXCHID(sc->tx_channel_id) | | |||||
V_CPL_TX_SEC_PDU_ACKFOLLOWS(0) | V_CPL_TX_SEC_PDU_ULPTXLPBK(1) | | |||||
V_CPL_TX_SEC_PDU_CPLLEN(2) | V_CPL_TX_SEC_PDU_PLACEHOLDER(0) | | |||||
V_CPL_TX_SEC_PDU_IVINSRTOFST(1)); | |||||
crwr->sec_cpl.pldlen = htobe32(s->blkcipher.iv_len + input_len); | |||||
crwr->sec_cpl.aadstart_cipherstop_hi = htobe32( | |||||
V_CPL_TX_SEC_PDU_AADSTART(aad_start) | | |||||
V_CPL_TX_SEC_PDU_AADSTOP(aad_stop) | | |||||
V_CPL_TX_SEC_PDU_CIPHERSTART(cipher_start) | | |||||
V_CPL_TX_SEC_PDU_CIPHERSTOP_HI(cipher_stop >> 4)); | |||||
crwr->sec_cpl.cipherstop_lo_authinsert = htobe32( | |||||
V_CPL_TX_SEC_PDU_CIPHERSTOP_LO(cipher_stop & 0xf) | | |||||
V_CPL_TX_SEC_PDU_AUTHSTART(auth_start) | | |||||
V_CPL_TX_SEC_PDU_AUTHSTOP(auth_stop) | | |||||
V_CPL_TX_SEC_PDU_AUTHINSERT(auth_insert)); | |||||
/* These two flits are actually a CPL_TLS_TX_SCMD_FMT. */ | |||||
hmac_ctrl = ccp_hmac_ctrl(axf->hashsize, hash_size_in_response); | |||||
crwr->sec_cpl.seqno_numivs = htobe32( | |||||
V_SCMD_SEQ_NO_CTRL(0) | | |||||
V_SCMD_PROTO_VERSION(CHCR_SCMD_PROTO_VERSION_GENERIC) | | |||||
V_SCMD_ENC_DEC_CTRL(op_type) | | |||||
V_SCMD_CIPH_AUTH_SEQ_CTRL(op_type == CHCR_ENCRYPT_OP ? 1 : 0) | | |||||
V_SCMD_CIPH_MODE(s->blkcipher.cipher_mode) | | |||||
V_SCMD_AUTH_MODE(s->hmac.auth_mode) | | |||||
V_SCMD_HMAC_CTRL(hmac_ctrl) | | |||||
V_SCMD_IV_SIZE(s->blkcipher.iv_len / 2) | | |||||
V_SCMD_NUM_IVS(0)); | |||||
crwr->sec_cpl.ivgen_hdrlen = htobe32( | |||||
V_SCMD_IV_GEN_CTRL(0) | | |||||
V_SCMD_MORE_FRAGS(0) | V_SCMD_LAST_FRAG(0) | V_SCMD_MAC_ONLY(0) | | |||||
V_SCMD_AADIVDROP(1) | V_SCMD_HDR_LEN(dsgl_len)); | |||||
crwr->key_ctx.ctx_hdr = s->blkcipher.key_ctx_hdr; | |||||
switch (crde->crd_alg) { | |||||
case CRYPTO_AES_CBC: | |||||
if (crde->crd_flags & CRD_F_ENCRYPT) | |||||
memcpy(crwr->key_ctx.key, s->blkcipher.enckey, | |||||
s->blkcipher.key_len); | |||||
else | |||||
memcpy(crwr->key_ctx.key, s->blkcipher.deckey, | |||||
s->blkcipher.key_len); | |||||
break; | |||||
case CRYPTO_AES_ICM: | |||||
memcpy(crwr->key_ctx.key, s->blkcipher.enckey, | |||||
s->blkcipher.key_len); | |||||
break; | |||||
case CRYPTO_AES_XTS: | |||||
key_half = s->blkcipher.key_len / 2; | |||||
memcpy(crwr->key_ctx.key, s->blkcipher.enckey + key_half, | |||||
key_half); | |||||
if (crde->crd_flags & CRD_F_ENCRYPT) | |||||
memcpy(crwr->key_ctx.key + key_half, | |||||
s->blkcipher.enckey, key_half); | |||||
else | |||||
memcpy(crwr->key_ctx.key + key_half, | |||||
s->blkcipher.deckey, key_half); | |||||
break; | |||||
} | |||||
dst = crwr->key_ctx.key + roundup2(s->blkcipher.key_len, 16); | |||||
memcpy(dst, s->hmac.ipad, s->hmac.partial_digest_len); | |||||
memcpy(dst + iopad_size, s->hmac.opad, s->hmac.partial_digest_len); | |||||
dst = (char *)(crwr + 1) + kctx_len; | |||||
ccp_write_phys_dsgl(sc, dst, dsgl_nsegs); | |||||
dst += sizeof(struct cpl_rx_phys_dsgl) + dsgl_len; | |||||
if (iv_loc == IV_IMMEDIATE) { | |||||
memcpy(dst, iv, s->blkcipher.iv_len); | |||||
dst += s->blkcipher.iv_len; | |||||
} | |||||
if (imm_len != 0) { | |||||
if (aad_len != 0) { | |||||
crypto_copydata(crp->crp_flags, crp->crp_buf, | |||||
crda->crd_skip, aad_len, dst); | |||||
dst += aad_len; | |||||
} | |||||
crypto_copydata(crp->crp_flags, crp->crp_buf, crde->crd_skip, | |||||
crde->crd_len, dst); | |||||
dst += crde->crd_len; | |||||
if (op_type == CHCR_DECRYPT_OP) | |||||
crypto_copydata(crp->crp_flags, crp->crp_buf, | |||||
crda->crd_inject, hash_size_in_response, dst); | |||||
} else | |||||
ccp_write_ulptx_sgl(sc, dst, sgl_nsegs); | |||||
/* XXX: TODO backpressure */ | |||||
t4_wrq_tx(sc->adapter, wr); | |||||
#endif | |||||
return (ENXIO); | |||||
} | |||||
#if 0 | |||||
static int | |||||
ccp_authenc_done(struct ccp_softc *sc, struct ccp_session *s, | |||||
struct cryptop *crp, const struct cpl_fw6_pld *cpl, int error) | |||||
{ | |||||
struct cryptodesc *crd; | |||||
/* | |||||
* The updated IV to permit chained requests is at | |||||
* cpl->data[2], but OCF doesn't permit chained requests. | |||||
* | |||||
* For a decryption request, the hardware may do a verification | |||||
* of the HMAC which will fail if the existing HMAC isn't in the | |||||
* buffer. If that happens, clear the error and copy the HMAC | |||||
* from the CPL reply into the buffer. | |||||
* | |||||
* For encryption requests, crd should be the cipher request | |||||
* which will have CRD_F_ENCRYPT set. For decryption | |||||
* requests, crp_desc will be the HMAC request which should | |||||
* not have this flag set. | |||||
*/ | |||||
crd = crp->crp_desc; | |||||
if (error == EBADMSG && !CHK_PAD_ERR_BIT(be64toh(cpl->data[0])) && | |||||
!(crd->crd_flags & CRD_F_ENCRYPT)) { | |||||
crypto_copyback(crp->crp_flags, crp->crp_buf, crd->crd_inject, | |||||
s->hmac.hash_len, (c_caddr_t)(cpl + 1)); | |||||
error = 0; | |||||
} | |||||
return (error); | |||||
} | |||||
#endif | |||||
int | |||||
ccp_gcm(struct ccp_queue *qp, struct ccp_session *s, struct cryptop *crp, | |||||
struct cryptodesc *crda, struct cryptodesc *crde) | |||||
{ | |||||
device_printf(qp->cq_softc->dev, "%s not supported\n", __func__); | |||||
#if 0 | |||||
char iv[CHCR_MAX_CRYPTO_IV_LEN]; | |||||
struct chcr_wr *crwr; | |||||
struct wrqe *wr; | |||||
char *dst; | |||||
u_int iv_len, iv_loc, kctx_len, op_type, transhdr_len, wr_len; | |||||
u_int hash_size_in_response, imm_len; | |||||
u_int aad_start, aad_stop, cipher_start, cipher_stop, auth_insert; | |||||
u_int hmac_ctrl, input_len; | |||||
int dsgl_nsegs, dsgl_len; | |||||
int sgl_nsegs, sgl_len; | |||||
int error; | |||||
if (s->blkcipher.key_len == 0) | |||||
return (EINVAL); | |||||
/* | |||||
* AAD is only permitted before the cipher/plain text, not | |||||
* after. | |||||
*/ | |||||
if (crda->crd_len + crda->crd_skip > crde->crd_len + crde->crd_skip) | |||||
return (EINVAL); | |||||
hash_size_in_response = s->gmac.hash_len; | |||||
/* | |||||
* The IV is always stored at the start of the buffer even | |||||
* though it may be duplicated in the payload. The crypto | |||||
* engine doesn't work properly if the IV offset points inside | |||||
* of the AAD region, so a second copy is always required. | |||||
* | |||||
* The IV for GCM is further complicated in that IPSec | |||||
* provides a full 16-byte IV (including the counter), whereas | |||||
* the /dev/crypto interface sometimes provides a full 16-byte | |||||
* IV (if no IV is provided in the ioctl) and sometimes a | |||||
* 12-byte IV (if the IV was explicit). For now the driver | |||||
* always assumes a 12-byte IV and initializes the low 4 byte | |||||
* counter to 1. | |||||
*/ | |||||
iv_loc = IV_IMMEDIATE; | |||||
if (crde->crd_flags & CRD_F_ENCRYPT) { | |||||
op_type = CHCR_ENCRYPT_OP; | |||||
if (crde->crd_flags & CRD_F_IV_EXPLICIT) | |||||
memcpy(iv, crde->crd_iv, s->blkcipher.iv_len); | |||||
else | |||||
arc4rand(iv, s->blkcipher.iv_len, 0); | |||||
if ((crde->crd_flags & CRD_F_IV_PRESENT) == 0) | |||||
crypto_copyback(crp->crp_flags, crp->crp_buf, | |||||
crde->crd_inject, s->blkcipher.iv_len, iv); | |||||
} else { | |||||
op_type = CHCR_DECRYPT_OP; | |||||
if (crde->crd_flags & CRD_F_IV_EXPLICIT) | |||||
memcpy(iv, crde->crd_iv, s->blkcipher.iv_len); | |||||
else | |||||
crypto_copydata(crp->crp_flags, crp->crp_buf, | |||||
crde->crd_inject, s->blkcipher.iv_len, iv); | |||||
} | |||||
/* | |||||
* If the input IV is 12 bytes, append an explicit counter of | |||||
* 1. | |||||
*/ | |||||
if (s->blkcipher.iv_len == 12) { | |||||
*(uint32_t *)&iv[12] = htobe32(1); | |||||
iv_len = AES_BLOCK_LEN; | |||||
} else | |||||
iv_len = s->blkcipher.iv_len; | |||||
/* | |||||
* The output buffer consists of the cipher text followed by | |||||
* the tag when encrypting. For decryption it only contains | |||||
* the plain text. | |||||
*/ | |||||
if (op_type == CHCR_ENCRYPT_OP) { | |||||
if (crde->crd_len + hash_size_in_response > MAX_REQUEST_SIZE) | |||||
return (EFBIG); | |||||
} else { | |||||
if (crde->crd_len > MAX_REQUEST_SIZE) | |||||
return (EFBIG); | |||||
} | |||||
sglist_reset(qp->cq_sg_dst); | |||||
error = sglist_append_sglist(qp->cq_sg_dst, qp->cq_sg_crp, crde->crd_skip, | |||||
crde->crd_len); | |||||
if (error != 0) | |||||
return (error); | |||||
if (op_type == CHCR_ENCRYPT_OP) { | |||||
error = sglist_append_sglist(qp->cq_sg_dst, qp->cq_sg_crp, | |||||
crda->crd_inject, hash_size_in_response); | |||||
if (error != 0) | |||||
return (error); | |||||
} | |||||
dsgl_nsegs = ccp_count_sgl(qp->cq_sg_dst, DSGL_SGE_MAXLEN); | |||||
if (dsgl_nsegs > MAX_RX_PHYS_DSGL_SGE) | |||||
return (EFBIG); | |||||
dsgl_len = ccp_phys_dsgl_len(dsgl_nsegs); | |||||
/* | |||||
* The 'key' part of the key context consists of the key followed | |||||
* by the Galois hash key. | |||||
*/ | |||||
kctx_len = roundup2(s->blkcipher.key_len, 16) + GMAC_BLOCK_LEN; | |||||
transhdr_len = CIPHER_TRANSHDR_SIZE(kctx_len, dsgl_len); | |||||
/* | |||||
* The input buffer consists of the IV, any AAD, and then the | |||||
* cipher/plain text. For decryption requests the hash is | |||||
* appended after the cipher text. | |||||
*/ | |||||
input_len = crda->crd_len + crde->crd_len; | |||||
if (op_type == CHCR_DECRYPT_OP) | |||||
input_len += hash_size_in_response; | |||||
if (input_len > MAX_REQUEST_SIZE) | |||||
return (EFBIG); | |||||
if (ccp_use_imm_data(transhdr_len, iv_len + input_len)) { | |||||
imm_len = input_len; | |||||
sgl_nsegs = 0; | |||||
sgl_len = 0; | |||||
} else { | |||||
imm_len = 0; | |||||
sglist_reset(sc->sg_ulptx); | |||||
if (crda->crd_len != 0) { | |||||
error = sglist_append_sglist(sc->sg_ulptx, qp->cq_sg_crp, | |||||
crda->crd_skip, crda->crd_len); | |||||
if (error != 0) | |||||
return (error); | |||||
} | |||||
error = sglist_append_sglist(sc->sg_ulptx, qp->cq_sg_crp, | |||||
crde->crd_skip, crde->crd_len); | |||||
if (error != 0) | |||||
return (error); | |||||
if (op_type == CHCR_DECRYPT_OP) { | |||||
error = sglist_append_sglist(sc->sg_ulptx, qp->cq_sg_crp, | |||||
crda->crd_inject, hash_size_in_response); | |||||
if (error != 0) | |||||
return (error); | |||||
} | |||||
sgl_nsegs = sc->sg_ulptx->sg_nseg; | |||||
sgl_len = ccp_ulptx_sgl_len(sgl_nsegs); | |||||
} | |||||
if (crda->crd_len != 0) { | |||||
aad_start = iv_len + 1; | |||||
aad_stop = aad_start + crda->crd_len - 1; | |||||
} else { | |||||
aad_start = 0; | |||||
aad_stop = 0; | |||||
} | |||||
cipher_start = iv_len + crda->crd_len + 1; | |||||
if (op_type == CHCR_DECRYPT_OP) | |||||
cipher_stop = hash_size_in_response; | |||||
else | |||||
cipher_stop = 0; | |||||
if (op_type == CHCR_DECRYPT_OP) | |||||
auth_insert = hash_size_in_response; | |||||
else | |||||
auth_insert = 0; | |||||
wr_len = roundup2(transhdr_len, 16) + roundup2(imm_len, 16) + sgl_len; | |||||
if (iv_loc == IV_IMMEDIATE) | |||||
wr_len += iv_len; | |||||
wr = alloc_wrqe(wr_len, sc->txq); | |||||
if (wr == NULL) { | |||||
sc->stats_wr_nomem++; | |||||
return (ENOMEM); | |||||
} | |||||
crwr = wrtod(wr); | |||||
memset(crwr, 0, wr_len); | |||||
ccp_populate_wreq(sc, crwr, kctx_len, wr_len, sid, imm_len, sgl_len, | |||||
0, iv_loc, crp); | |||||
/* XXX: Hardcodes SGE loopback channel of 0. */ | |||||
crwr->sec_cpl.op_ivinsrtofst = htobe32( | |||||
V_CPL_TX_SEC_PDU_OPCODE(CPL_TX_SEC_PDU) | | |||||
V_CPL_TX_SEC_PDU_RXCHID(sc->tx_channel_id) | | |||||
V_CPL_TX_SEC_PDU_ACKFOLLOWS(0) | V_CPL_TX_SEC_PDU_ULPTXLPBK(1) | | |||||
V_CPL_TX_SEC_PDU_CPLLEN(2) | V_CPL_TX_SEC_PDU_PLACEHOLDER(0) | | |||||
V_CPL_TX_SEC_PDU_IVINSRTOFST(1)); | |||||
crwr->sec_cpl.pldlen = htobe32(iv_len + input_len); | |||||
/* | |||||
* NB: cipherstop is explicitly set to 0. On encrypt it | |||||
* should normally be set to 0 anyway (as the encrypt crd ends | |||||
* at the end of the input). However, for decrypt the cipher | |||||
* ends before the tag in the AUTHENC case (and authstop is | |||||
* set to stop before the tag), but for GCM the cipher still | |||||
* runs to the end of the buffer. Not sure if this is | |||||
* intentional or a firmware quirk, but it is required for | |||||
* working tag validation with GCM decryption. | |||||
*/ | |||||
crwr->sec_cpl.aadstart_cipherstop_hi = htobe32( | |||||
V_CPL_TX_SEC_PDU_AADSTART(aad_start) | | |||||
V_CPL_TX_SEC_PDU_AADSTOP(aad_stop) | | |||||
V_CPL_TX_SEC_PDU_CIPHERSTART(cipher_start) | | |||||
V_CPL_TX_SEC_PDU_CIPHERSTOP_HI(0)); | |||||
crwr->sec_cpl.cipherstop_lo_authinsert = htobe32( | |||||
V_CPL_TX_SEC_PDU_CIPHERSTOP_LO(0) | | |||||
V_CPL_TX_SEC_PDU_AUTHSTART(cipher_start) | | |||||
V_CPL_TX_SEC_PDU_AUTHSTOP(cipher_stop) | | |||||
V_CPL_TX_SEC_PDU_AUTHINSERT(auth_insert)); | |||||
/* These two flits are actually a CPL_TLS_TX_SCMD_FMT. */ | |||||
hmac_ctrl = ccp_hmac_ctrl(AES_GMAC_HASH_LEN, hash_size_in_response); | |||||
crwr->sec_cpl.seqno_numivs = htobe32( | |||||
V_SCMD_SEQ_NO_CTRL(0) | | |||||
V_SCMD_PROTO_VERSION(CHCR_SCMD_PROTO_VERSION_GENERIC) | | |||||
V_SCMD_ENC_DEC_CTRL(op_type) | | |||||
V_SCMD_CIPH_AUTH_SEQ_CTRL(op_type == CHCR_ENCRYPT_OP ? 1 : 0) | | |||||
V_SCMD_CIPH_MODE(CHCR_SCMD_CIPHER_MODE_AES_GCM) | | |||||
V_SCMD_AUTH_MODE(CHCR_SCMD_AUTH_MODE_GHASH) | | |||||
V_SCMD_HMAC_CTRL(hmac_ctrl) | | |||||
V_SCMD_IV_SIZE(iv_len / 2) | | |||||
V_SCMD_NUM_IVS(0)); | |||||
crwr->sec_cpl.ivgen_hdrlen = htobe32( | |||||
V_SCMD_IV_GEN_CTRL(0) | | |||||
V_SCMD_MORE_FRAGS(0) | V_SCMD_LAST_FRAG(0) | V_SCMD_MAC_ONLY(0) | | |||||
V_SCMD_AADIVDROP(1) | V_SCMD_HDR_LEN(dsgl_len)); | |||||
crwr->key_ctx.ctx_hdr = s->blkcipher.key_ctx_hdr; | |||||
memcpy(crwr->key_ctx.key, s->blkcipher.enckey, s->blkcipher.key_len); | |||||
dst = crwr->key_ctx.key + roundup2(s->blkcipher.key_len, 16); | |||||
memcpy(dst, s->gmac.ghash_h, GMAC_BLOCK_LEN); | |||||
dst = (char *)(crwr + 1) + kctx_len; | |||||
ccp_write_phys_dsgl(sc, dst, dsgl_nsegs); | |||||
dst += sizeof(struct cpl_rx_phys_dsgl) + dsgl_len; | |||||
if (iv_loc == IV_IMMEDIATE) { | |||||
memcpy(dst, iv, iv_len); | |||||
dst += iv_len; | |||||
} | |||||
if (imm_len != 0) { | |||||
if (crda->crd_len != 0) { | |||||
crypto_copydata(crp->crp_flags, crp->crp_buf, | |||||
crda->crd_skip, crda->crd_len, dst); | |||||
dst += crda->crd_len; | |||||
} | |||||
crypto_copydata(crp->crp_flags, crp->crp_buf, crde->crd_skip, | |||||
crde->crd_len, dst); | |||||
dst += crde->crd_len; | |||||
if (op_type == CHCR_DECRYPT_OP) | |||||
crypto_copydata(crp->crp_flags, crp->crp_buf, | |||||
crda->crd_inject, hash_size_in_response, dst); | |||||
} else | |||||
ccp_write_ulptx_sgl(sc, dst, sgl_nsegs); | |||||
/* XXX: TODO backpressure */ | |||||
t4_wrq_tx(sc->adapter, wr); | |||||
#endif | |||||
return (ENXIO); | |||||
} | |||||
#if 0 | |||||
static int | |||||
ccp_gcm_done(struct ccp_softc *sc, struct ccp_session *s, | |||||
struct cryptop *crp, const struct cpl_fw6_pld *cpl, int error) | |||||
{ | |||||
/* | |||||
* The updated IV to permit chained requests is at | |||||
* cpl->data[2], but OCF doesn't permit chained requests. | |||||
* | |||||
* Note that the hardware should always verify the GMAC hash. | |||||
*/ | |||||
return (error); | |||||
} | |||||
#endif | |||||
#define MAX_TRNG_RETRIES 10 | |||||
u_int | |||||
random_ccp_read(void *v, u_int c) | |||||
{ | |||||
uint32_t *buf; | |||||
u_int i, j; | |||||
KASSERT(c % sizeof(*buf) == 0, ("%u not multiple of u_long", c)); | |||||
buf = v; | |||||
for (i = c; i > 0; i -= sizeof(*buf)) { | |||||
for (j = 0; j < MAX_TRNG_RETRIES; j++) { | |||||
*buf = ccp_read_4(g_ccp_softc, TRNG_OUT_OFFSET); | |||||
if (*buf != 0) | |||||
break; | |||||
} | |||||
if (j == MAX_TRNG_RETRIES) | |||||
return (0); | |||||
buf++; | |||||
} | |||||
return (c); | |||||
} | |||||
#ifdef DDB | |||||
void | |||||
db_ccp_show_hw(struct ccp_softc *sc) | |||||
{ | |||||
db_printf(" queue mask: 0x%x\n", | |||||
ccp_read_4(sc, CMD_QUEUE_MASK_OFFSET)); | |||||
db_printf(" queue prio: 0x%x\n", | |||||
ccp_read_4(sc, CMD_QUEUE_PRIO_OFFSET)); | |||||
db_printf(" reqid: 0x%x\n", ccp_read_4(sc, CMD_REQID_CONFIG_OFFSET)); | |||||
db_printf(" trng output: 0x%x\n", ccp_read_4(sc, TRNG_OUT_OFFSET)); | |||||
db_printf(" cmd timeout: 0x%x\n", | |||||
ccp_read_4(sc, CMD_CMD_TIMEOUT_OFFSET)); | |||||
db_printf(" lsb public mask lo: 0x%x\n", | |||||
ccp_read_4(sc, LSB_PUBLIC_MASK_LO_OFFSET)); | |||||
db_printf(" lsb public mask hi: 0x%x\n", | |||||
ccp_read_4(sc, LSB_PUBLIC_MASK_HI_OFFSET)); | |||||
db_printf(" lsb private mask lo: 0x%x\n", | |||||
ccp_read_4(sc, LSB_PRIVATE_MASK_LO_OFFSET)); | |||||
db_printf(" lsb private mask hi: 0x%x\n", | |||||
ccp_read_4(sc, LSB_PRIVATE_MASK_HI_OFFSET)); | |||||
db_printf(" version: 0x%x\n", ccp_read_4(sc, VERSION_REG)); | |||||
} | |||||
void | |||||
db_ccp_show_queue_hw(struct ccp_queue *qp) | |||||
{ | |||||
struct ccp_softc *sc; | |||||
unsigned q; | |||||
sc = qp->cq_softc; | |||||
q = qp->cq_qindex; | |||||
db_printf(" qcontrol: 0x%x\n", | |||||
ccp_read_queue_4(sc, q, CMD_Q_CONTROL_BASE)); | |||||
db_printf(" tail_lo: 0x%x\n", | |||||
ccp_read_queue_4(sc, q, CMD_Q_TAIL_LO_BASE)); | |||||
db_printf(" head_lo: 0x%x\n", | |||||
ccp_read_queue_4(sc, q, CMD_Q_HEAD_LO_BASE)); | |||||
db_printf(" int enable: 0x%x\n", | |||||
ccp_read_queue_4(sc, q, CMD_Q_INT_ENABLE_BASE)); | |||||
db_printf(" interrupt status: 0x%x\n", | |||||
ccp_read_queue_4(sc, q, CMD_Q_INTERRUPT_STATUS_BASE)); | |||||
db_printf(" status: 0x%x\n", | |||||
ccp_read_queue_4(sc, q, CMD_Q_STATUS_BASE)); | |||||
db_printf(" int stats: 0x%x\n", | |||||
ccp_read_queue_4(sc, q, CMD_Q_INT_STATUS_BASE)); | |||||
} | |||||
#endif |
This line is misindented.