diff --git a/sys/kern/uipc_ktls.c b/sys/kern/uipc_ktls.c --- a/sys/kern/uipc_ktls.c +++ b/sys/kern/uipc_ktls.c @@ -82,6 +82,7 @@ STAILQ_HEAD(, mbuf) m_head; STAILQ_HEAD(, socket) so_head; bool running; + int lastallocfail; } __aligned(CACHE_LINE_SIZE); struct ktls_domain_info { @@ -95,6 +96,7 @@ LIST_HEAD(, ktls_crypto_backend) ktls_backends; static struct rmlock ktls_backends_lock; static uma_zone_t ktls_session_zone; +static uma_zone_t ktls_buffer_zone; static uint16_t ktls_cpuid_lookup[MAXCPU]; SYSCTL_NODE(_kern_ipc, OID_AUTO, tls, CTLFLAG_RW | CTLFLAG_MPSAFE, 0, @@ -116,7 +118,7 @@ "Bind crypto threads to cores (1) or cores and domains (2) at boot"); static u_int ktls_maxlen = 16384; -SYSCTL_UINT(_kern_ipc_tls, OID_AUTO, maxlen, CTLFLAG_RWTUN, +SYSCTL_UINT(_kern_ipc_tls, OID_AUTO, maxlen, CTLFLAG_RDTUN, &ktls_maxlen, 0, "Maximum TLS record size"); static int ktls_number_threads; @@ -134,6 +136,11 @@ &ktls_cbc_enable, 1, "Enable Support of AES-CBC crypto for kernel TLS"); +static bool ktls_sw_buffer_cache = true; +SYSCTL_BOOL(_kern_ipc_tls, OID_AUTO, sw_buffer_cache, CTLFLAG_RDTUN, + &ktls_sw_buffer_cache, 1, + "Enable caching of output buffers for SW encryption"); + static COUNTER_U64_DEFINE_EARLY(ktls_tasks_active); SYSCTL_COUNTER_U64(_kern_ipc_tls, OID_AUTO, tasks_active, CTLFLAG_RD, &ktls_tasks_active, "Number of active tasks"); @@ -366,6 +373,51 @@ } #endif +static int +ktls_buffer_import(void *arg, void **store, int count, int domain, int flags) +{ + vm_page_t m; + int i; + + KASSERT((ktls_maxlen & PAGE_MASK) == 0, + ("%s: ktls max length %d is not page size-aligned", + __func__, ktls_maxlen)); + + for (i = 0; i < count; i++) { + m = vm_page_alloc_contig_domain(NULL, 0, domain, + VM_ALLOC_NORMAL | VM_ALLOC_NOOBJ | VM_ALLOC_WIRED | + VM_ALLOC_NODUMP | malloc2vm_flags(flags), + atop(ktls_maxlen), 0, ~0ul, PAGE_SIZE, 0, + VM_MEMATTR_DEFAULT); + if (m == NULL) + break; + store[i] = (void *)PHYS_TO_DMAP(VM_PAGE_TO_PHYS(m)); + } + return (i); +} + +static void +ktls_buffer_release(void *arg __unused, void **store, int count) +{ + vm_page_t m; + int i, j; + + for (i = 0; i < count; i++) { + m = PHYS_TO_VM_PAGE(DMAP_TO_PHYS((vm_offset_t)store[i])); + for (j = 0; j < atop(ktls_maxlen); j++) { + (void)vm_page_unwire_noq(m + j); + vm_page_free(m + j); + } + } +} + +static void +ktls_free_mext_contig(struct mbuf *m) +{ + M_ASSERTEXTPG(m); + uma_zfree(ktls_buffer_zone, (void *)PHYS_TO_DMAP(m->m_epg_pa[0])); +} + static void ktls_init(void *dummy __unused) { @@ -385,6 +437,13 @@ NULL, NULL, NULL, NULL, UMA_ALIGN_CACHE, 0); + if (ktls_sw_buffer_cache) { + ktls_buffer_zone = uma_zcache_create("ktls_buffers", + roundup2(ktls_maxlen, PAGE_SIZE), NULL, NULL, NULL, NULL, + ktls_buffer_import, ktls_buffer_release, NULL, + UMA_ZONE_FIRSTTOUCH); + } + /* * Initialize the workqueues to run the TLS work. We create a * work queue for each CPU. @@ -1974,6 +2033,30 @@ wakeup(wq); } +static void * +ktls_buffer_alloc(struct ktls_wq *wq, struct mbuf *m) +{ + void *buf; + + if (m->m_epg_npgs <= 2) + return (NULL); + if (ktls_buffer_zone == NULL) + return (NULL); + if ((u_int)(ticks - wq->lastallocfail) < hz) { + /* + * Rate-limit allocation attempts after a failure. + * ktls_buffer_import() will acquire a per-domain mutex to check + * the free page queues and may fail consistently if memory is + * fragmented. + */ + return (NULL); + } + buf = uma_zalloc(ktls_buffer_zone, M_NOWAIT | M_NORECLAIM); + if (buf == NULL) + wq->lastallocfail = ticks; + return (buf); +} + void ktls_enqueue(struct mbuf *m, struct socket *so, int page_count) { @@ -2006,7 +2089,7 @@ } static __noinline void -ktls_encrypt(struct mbuf *top) +ktls_encrypt(struct ktls_wq *wq, struct mbuf *top) { struct ktls_session *tls; struct socket *so; @@ -2015,6 +2098,7 @@ struct iovec src_iov[1 + btoc(TLS_MAX_MSG_SIZE_V10_2)]; struct iovec dst_iov[1 + btoc(TLS_MAX_MSG_SIZE_V10_2)]; vm_page_t pg; + void *cbuf; int error, i, len, npages, off, total_pages; bool is_anon; @@ -2056,6 +2140,9 @@ KASSERT(npages + m->m_epg_npgs <= total_pages, ("page count mismatch: top %p, total_pages %d, m %p", top, total_pages, m)); + KASSERT(ptoa(m->m_epg_npgs) <= ktls_maxlen, + ("page count %d larger than maximum frame length %d", + m->m_epg_npgs, ktls_maxlen)); /* * Generate source and destination ivoecs to pass to @@ -2072,37 +2159,50 @@ len = m_epg_pagelen(m, i, off); src_iov[i].iov_len = len; src_iov[i].iov_base = - (char *)(void *)PHYS_TO_DMAP(m->m_epg_pa[i]) + - off; + (char *)(void *)PHYS_TO_DMAP(m->m_epg_pa[i]) + off; + } - if (is_anon) { - dst_iov[i].iov_base = src_iov[i].iov_base; - dst_iov[i].iov_len = src_iov[i].iov_len; - continue; - } -retry_page: - pg = vm_page_alloc(NULL, 0, VM_ALLOC_NORMAL | - VM_ALLOC_NOOBJ | VM_ALLOC_NODUMP | VM_ALLOC_WIRED); - if (pg == NULL) { - vm_wait(NULL); - goto retry_page; + if (is_anon) { + memcpy(dst_iov, src_iov, i * sizeof(struct iovec)); + } else if ((cbuf = ktls_buffer_alloc(wq, m)) != NULL) { + len = ptoa(m->m_epg_npgs - 1) + m->m_epg_last_len - + m->m_epg_1st_off; + dst_iov[0].iov_base = (char *)cbuf + m->m_epg_1st_off; + dst_iov[0].iov_len = len; + parray[0] = DMAP_TO_PHYS((vm_offset_t)cbuf); + i = 1; + } else { + cbuf = NULL; + off = m->m_epg_1st_off; + for (i = 0; i < m->m_epg_npgs; i++, off = 0) { + do { + pg = vm_page_alloc(NULL, 0, + VM_ALLOC_NORMAL | + VM_ALLOC_NOOBJ | + VM_ALLOC_NODUMP | + VM_ALLOC_WIRED | + VM_ALLOC_WAITFAIL); + } while (pg == NULL); + + len = m_epg_pagelen(m, i, off); + parray[i] = VM_PAGE_TO_PHYS(pg); + dst_iov[i].iov_base = + (char *)(void *)PHYS_TO_DMAP( + parray[i]) + off; + dst_iov[i].iov_len = len; } - parray[i] = VM_PAGE_TO_PHYS(pg); - dst_iov[i].iov_base = - (char *)(void *)PHYS_TO_DMAP(parray[i]) + off; - dst_iov[i].iov_len = len; } if (__predict_false(m->m_epg_npgs == 0)) { /* TLS 1.0 empty fragment. */ npages++; } else - npages += i; + npages += m->m_epg_npgs; error = (*tls->sw_encrypt)(tls, (const struct tls_record_layer *)m->m_epg_hdr, - m->m_epg_trail, src_iov, dst_iov, i, m->m_epg_seqno, - m->m_epg_record_type); + m->m_epg_trail, src_iov, dst_iov, m->m_epg_npgs, i, + m->m_epg_seqno, m->m_epg_record_type); if (error) { counter_u64_add(ktls_offload_failed_crypto, 1); break; @@ -2118,11 +2218,19 @@ m->m_ext.ext_free(m); /* Replace them with the new pages. */ - for (i = 0; i < m->m_epg_npgs; i++) - m->m_epg_pa[i] = parray[i]; + if (cbuf != NULL) { + for (i = 0; i < m->m_epg_npgs; i++) + m->m_epg_pa[i] = parray[0] + ptoa(i); + + /* Contig pages should go back to the cache. */ + m->m_ext.ext_free = ktls_free_mext_contig; + } else { + for (i = 0; i < m->m_epg_npgs; i++) + m->m_epg_pa[i] = parray[i]; - /* Use the basic free routine. */ - m->m_ext.ext_free = mb_free_mext_pgs; + /* Use the basic free routine. */ + m->m_ext.ext_free = mb_free_mext_pgs; + } /* Pages are now writable. */ m->m_epg_flags |= EPG_FLAG_ANON; @@ -2189,7 +2297,7 @@ ktls_free(m->m_epg_tls); uma_zfree(zone_mbuf, m); } else { - ktls_encrypt(m); + ktls_encrypt(wq, m); counter_u64_add(ktls_cnt_tx_queued, -1); } } diff --git a/sys/opencrypto/ktls_ocf.c b/sys/opencrypto/ktls_ocf.c --- a/sys/opencrypto/ktls_ocf.c +++ b/sys/opencrypto/ktls_ocf.c @@ -178,15 +178,15 @@ static int ktls_ocf_tls_cbc_encrypt(struct ktls_session *tls, const struct tls_record_layer *hdr, uint8_t *trailer, struct iovec *iniov, - struct iovec *outiov, int iovcnt, uint64_t seqno, + struct iovec *outiov, int iniovcnt, int outiovcnt, uint64_t seqno, uint8_t record_type __unused) { struct uio uio, out_uio; struct tls_mac_data ad; struct cryptop crp; struct ocf_session *os; - struct iovec iov[iovcnt + 2]; - struct iovec out_iov[iovcnt + 1]; + struct iovec iov[iniovcnt + 2]; + struct iovec out_iov[outiovcnt + 1]; int i, error; uint16_t tls_comp_len; uint8_t pad; @@ -219,10 +219,11 @@ * at least compute inplace as well while we are here. */ tls_comp_len = 0; - inplace = true; - for (i = 0; i < iovcnt; i++) { + inplace = iniovcnt == outiovcnt; + for (i = 0; i < iniovcnt; i++) { tls_comp_len += iniov[i].iov_len; - if (iniov[i].iov_base != outiov[i].iov_base) + if (inplace && + (i >= outiovcnt || iniov[i].iov_base != outiov[i].iov_base)) inplace = false; } @@ -236,11 +237,11 @@ /* First, compute the MAC. */ iov[0].iov_base = &ad; iov[0].iov_len = sizeof(ad); - memcpy(&iov[1], iniov, sizeof(*iniov) * iovcnt); - iov[iovcnt + 1].iov_base = trailer; - iov[iovcnt + 1].iov_len = os->mac_len; + memcpy(&iov[1], iniov, sizeof(*iniov) * iniovcnt); + iov[iniovcnt + 1].iov_base = trailer; + iov[iniovcnt + 1].iov_len = os->mac_len; uio.uio_iov = iov; - uio.uio_iovcnt = iovcnt + 2; + uio.uio_iovcnt = iniovcnt + 2; uio.uio_offset = 0; uio.uio_segflg = UIO_SYSSPACE; uio.uio_td = curthread; @@ -279,10 +280,10 @@ * Don't recopy the input iovec, instead just adjust the * trailer length and skip over the AAD vector in the uio. */ - iov[iovcnt + 1].iov_len += pad + 1; + iov[iniovcnt + 1].iov_len += pad + 1; uio.uio_iov = iov + 1; - uio.uio_iovcnt = iovcnt + 1; - uio.uio_resid = tls_comp_len + iov[iovcnt + 1].iov_len; + uio.uio_iovcnt = iniovcnt + 1; + uio.uio_resid = tls_comp_len + iov[iniovcnt + 1].iov_len; KASSERT(uio.uio_resid % AES_BLOCK_LEN == 0, ("invalid encryption size")); @@ -297,10 +298,10 @@ memcpy(crp.crp_iv, hdr + 1, AES_BLOCK_LEN); crypto_use_uio(&crp, &uio); if (!inplace) { - memcpy(out_iov, outiov, sizeof(*iniov) * iovcnt); - out_iov[iovcnt] = iov[iovcnt + 1]; + memcpy(out_iov, outiov, sizeof(*iniov) * outiovcnt); + out_iov[outiovcnt] = iov[outiovcnt + 1]; out_uio.uio_iov = out_iov; - out_uio.uio_iovcnt = iovcnt + 1; + out_uio.uio_iovcnt = outiovcnt + 1; out_uio.uio_offset = 0; out_uio.uio_segflg = UIO_SYSSPACE; out_uio.uio_td = curthread; @@ -338,14 +339,14 @@ static int ktls_ocf_tls12_aead_encrypt(struct ktls_session *tls, const struct tls_record_layer *hdr, uint8_t *trailer, struct iovec *iniov, - struct iovec *outiov, int iovcnt, uint64_t seqno, + struct iovec *outiov, int iniovcnt, int outiovcnt, uint64_t seqno, uint8_t record_type __unused) { struct uio uio, out_uio, *tag_uio; struct tls_aead_data ad; struct cryptop crp; struct ocf_session *os; - struct iovec iov[iovcnt + 1]; + struct iovec iov[outiovcnt + 1]; int i, error; uint16_t tls_comp_len; bool inplace; @@ -353,13 +354,13 @@ os = tls->cipher; uio.uio_iov = iniov; - uio.uio_iovcnt = iovcnt; + uio.uio_iovcnt = iniovcnt; uio.uio_offset = 0; uio.uio_segflg = UIO_SYSSPACE; uio.uio_td = curthread; out_uio.uio_iov = outiov; - out_uio.uio_iovcnt = iovcnt; + out_uio.uio_iovcnt = outiovcnt; out_uio.uio_offset = 0; out_uio.uio_segflg = UIO_SYSSPACE; out_uio.uio_td = curthread; @@ -396,10 +397,11 @@ crp.crp_aad_length = sizeof(ad); /* Compute payload length and determine if encryption is in place. */ - inplace = true; + inplace = iniovcnt == outiovcnt; crp.crp_payload_start = 0; - for (i = 0; i < iovcnt; i++) { - if (iniov[i].iov_base != outiov[i].iov_base) + for (i = 0; i < iniovcnt; i++) { + if (inplace && + (i >= outiovcnt || iniov[i].iov_base != outiov[i].iov_base)) inplace = false; crp.crp_payload_length += iniov[i].iov_len; } @@ -412,9 +414,9 @@ tag_uio = &out_uio; /* Duplicate iovec and append vector for tag. */ - memcpy(iov, tag_uio->uio_iov, iovcnt * sizeof(struct iovec)); - iov[iovcnt].iov_base = trailer; - iov[iovcnt].iov_len = AES_GMAC_HASH_LEN; + memcpy(iov, tag_uio->uio_iov, outiovcnt * sizeof(struct iovec)); + iov[outiovcnt].iov_base = trailer; + iov[outiovcnt].iov_len = AES_GMAC_HASH_LEN; tag_uio->uio_iov = iov; tag_uio->uio_iovcnt++; crp.crp_digest_start = tag_uio->uio_resid; @@ -510,14 +512,15 @@ static int ktls_ocf_tls13_aead_encrypt(struct ktls_session *tls, const struct tls_record_layer *hdr, uint8_t *trailer, struct iovec *iniov, - struct iovec *outiov, int iovcnt, uint64_t seqno, uint8_t record_type) + struct iovec *outiov, int iniovcnt, int outiovcnt, uint64_t seqno, + uint8_t record_type) { struct uio uio, out_uio; struct tls_aead_data_13 ad; char nonce[12]; struct cryptop crp; struct ocf_session *os; - struct iovec iov[iovcnt + 1], out_iov[iovcnt + 1]; + struct iovec iov[iniovcnt + 1], out_iov[outiovcnt + 1]; int i, error; bool inplace; @@ -538,10 +541,11 @@ crp.crp_aad_length = sizeof(ad); /* Compute payload length and determine if encryption is in place. */ - inplace = true; + inplace = iniovcnt == outiovcnt; crp.crp_payload_start = 0; - for (i = 0; i < iovcnt; i++) { - if (iniov[i].iov_base != outiov[i].iov_base) + for (i = 0; i < iniovcnt; i++) { + if (inplace && (i >= outiovcnt || + iniov[i].iov_base != outiov[i].iov_base)) inplace = false; crp.crp_payload_length += iniov[i].iov_len; } @@ -556,11 +560,11 @@ * include the full trailer as input to get the record_type * even if only the first byte is used. */ - memcpy(iov, iniov, iovcnt * sizeof(*iov)); - iov[iovcnt].iov_base = trailer; - iov[iovcnt].iov_len = tls->params.tls_tlen; + memcpy(iov, iniov, iniovcnt * sizeof(*iov)); + iov[iniovcnt].iov_base = trailer; + iov[iniovcnt].iov_len = tls->params.tls_tlen; uio.uio_iov = iov; - uio.uio_iovcnt = iovcnt + 1; + uio.uio_iovcnt = iniovcnt + 1; uio.uio_offset = 0; uio.uio_resid = crp.crp_payload_length + tls->params.tls_tlen - 1; uio.uio_segflg = UIO_SYSSPACE; @@ -569,11 +573,11 @@ if (!inplace) { /* Duplicate the output iov to append the trailer. */ - memcpy(out_iov, outiov, iovcnt * sizeof(*out_iov)); - out_iov[iovcnt] = iov[iovcnt]; + memcpy(out_iov, outiov, outiovcnt * sizeof(*out_iov)); + out_iov[outiovcnt] = iov[outiovcnt]; out_uio.uio_iov = out_iov; - out_uio.uio_iovcnt = iovcnt + 1; + out_uio.uio_iovcnt = outiovcnt + 1; out_uio.uio_offset = 0; out_uio.uio_resid = crp.crp_payload_length + tls->params.tls_tlen - 1; diff --git a/sys/sys/ktls.h b/sys/sys/ktls.h --- a/sys/sys/ktls.h +++ b/sys/sys/ktls.h @@ -164,7 +164,7 @@ #define KTLS_TX 1 #define KTLS_RX 2 -#define KTLS_API_VERSION 7 +#define KTLS_API_VERSION 8 struct iovec; struct ktls_session; @@ -186,8 +186,8 @@ union { int (*sw_encrypt)(struct ktls_session *tls, const struct tls_record_layer *hdr, uint8_t *trailer, - struct iovec *src, struct iovec *dst, int iovcnt, - uint64_t seqno, uint8_t record_type); + struct iovec *src, struct iovec *dst, int srciovcnt, + int dstiovcnt, uint64_t seqno, uint8_t record_type); int (*sw_decrypt)(struct ktls_session *tls, const struct tls_record_layer *hdr, struct mbuf *m, uint64_t seqno, int *trailer_len);