Page MenuHomeFreeBSD

D24628.diff
No OneTemporary

D24628.diff

Index: head/share/man/man4/tcp.4
===================================================================
--- head/share/man/man4/tcp.4
+++ head/share/man/man4/tcp.4
@@ -34,7 +34,7 @@
.\" From: @(#)tcp.4 8.1 (Berkeley) 6/5/93
.\" $FreeBSD$
.\"
-.Dd April 27, 2020
+.Dd July 23, 2020
.Dt TCP 4
.Os
.Sh NAME
@@ -356,10 +356,22 @@
The control message contains a
.Vt struct tls_get_record
which includes fields from the TLS record header.
-If a corrupted TLS record is received,
+If an invalid or corrupted TLS record is received,
recvmsg 2
-will fail with
-.Dv EBADMSG .
+will fail with one of the following errors:
+.Bl -tag -width Er
+.It Bq Er EINVAL
+The version fields in a TLS record's header did not match the version required
+by the
+.Vt struct tls_so_enable
+structure used to enable in-kernel TLS.
+.It Bq Er EMSGSIZE
+A TLS record's length was either too small or too large.
+.It Bq Er EMSGSIZE
+The connection was closed after sending a truncated TLS record.
+.It Bq Er EBADMSG
+The TLS record failed to match the included authentication tag.
+.El
.Pp
At present, only a single receive key may be set on a socket.
As such, users of this option must disable rekeying.
Index: head/sys/kern/uipc_ktls.c
===================================================================
--- head/sys/kern/uipc_ktls.c
+++ head/sys/kern/uipc_ktls.c
@@ -78,7 +78,8 @@
struct ktls_wq {
struct mtx mtx;
- STAILQ_HEAD(, mbuf) head;
+ STAILQ_HEAD(, mbuf) m_head;
+ STAILQ_HEAD(, socket) so_head;
bool running;
} __aligned(CACHE_LINE_SIZE);
@@ -130,10 +131,16 @@
SYSCTL_COUNTER_U64(_kern_ipc_tls, OID_AUTO, tasks_active, CTLFLAG_RD,
&ktls_tasks_active, "Number of active tasks");
-static counter_u64_t ktls_cnt_on;
-SYSCTL_COUNTER_U64(_kern_ipc_tls_stats, OID_AUTO, so_inqueue, CTLFLAG_RD,
- &ktls_cnt_on, "Number of TLS records in queue to tasks for SW crypto");
+static counter_u64_t ktls_cnt_tx_queued;
+SYSCTL_COUNTER_U64(_kern_ipc_tls_stats, OID_AUTO, sw_tx_inqueue, CTLFLAG_RD,
+ &ktls_cnt_tx_queued,
+ "Number of TLS records in queue to tasks for SW encryption");
+static counter_u64_t ktls_cnt_rx_queued;
+SYSCTL_COUNTER_U64(_kern_ipc_tls_stats, OID_AUTO, sw_rx_inqueue, CTLFLAG_RD,
+ &ktls_cnt_rx_queued,
+ "Number of TLS sockets in queue to tasks for SW decryption");
+
static counter_u64_t ktls_offload_total;
SYSCTL_COUNTER_U64(_kern_ipc_tls_stats, OID_AUTO, offload_total,
CTLFLAG_RD, &ktls_offload_total,
@@ -148,6 +155,10 @@
SYSCTL_COUNTER_U64(_kern_ipc_tls_stats, OID_AUTO, active, CTLFLAG_RD,
&ktls_offload_active, "Total Active TLS sessions");
+static counter_u64_t ktls_offload_corrupted_records;
+SYSCTL_COUNTER_U64(_kern_ipc_tls_stats, OID_AUTO, corrupted_records, CTLFLAG_RD,
+ &ktls_offload_corrupted_records, "Total corrupted TLS records received");
+
static counter_u64_t ktls_offload_failed_crypto;
SYSCTL_COUNTER_U64(_kern_ipc_tls_stats, OID_AUTO, failed_crypto, CTLFLAG_RD,
&ktls_offload_failed_crypto, "Total TLS crypto failures");
@@ -333,10 +344,12 @@
int error, i;
ktls_tasks_active = counter_u64_alloc(M_WAITOK);
- ktls_cnt_on = counter_u64_alloc(M_WAITOK);
+ ktls_cnt_tx_queued = counter_u64_alloc(M_WAITOK);
+ ktls_cnt_rx_queued = counter_u64_alloc(M_WAITOK);
ktls_offload_total = counter_u64_alloc(M_WAITOK);
ktls_offload_enable_calls = counter_u64_alloc(M_WAITOK);
ktls_offload_active = counter_u64_alloc(M_WAITOK);
+ ktls_offload_corrupted_records = counter_u64_alloc(M_WAITOK);
ktls_offload_failed_crypto = counter_u64_alloc(M_WAITOK);
ktls_switch_to_ifnet = counter_u64_alloc(M_WAITOK);
ktls_switch_to_sw = counter_u64_alloc(M_WAITOK);
@@ -369,7 +382,8 @@
* work queue for each CPU.
*/
CPU_FOREACH(i) {
- STAILQ_INIT(&ktls_wq[i].head);
+ STAILQ_INIT(&ktls_wq[i].m_head);
+ STAILQ_INIT(&ktls_wq[i].so_head);
mtx_init(&ktls_wq[i].mtx, "ktls work queue", NULL, MTX_DEF);
error = kproc_kthread_add(ktls_work_thread, &ktls_wq[i],
&ktls_proc, &td, 0, 0, "KTLS", "thr_%d", i);
@@ -855,7 +869,7 @@
}
static int
-ktls_try_sw(struct socket *so, struct ktls_session *tls)
+ktls_try_sw(struct socket *so, struct ktls_session *tls, int direction)
{
struct rm_priotracker prio;
struct ktls_crypto_backend *be;
@@ -870,7 +884,7 @@
if (ktls_allow_unload)
rm_rlock(&ktls_backends_lock, &prio);
LIST_FOREACH(be, &ktls_backends, next) {
- if (be->try(so, tls) == 0)
+ if (be->try(so, tls, direction) == 0)
break;
KASSERT(tls->cipher == NULL,
("ktls backend leaked a cipher pointer"));
@@ -896,6 +910,61 @@
return (0);
}
+/*
+ * KTLS RX stores data in the socket buffer as a list of TLS records,
+ * where each record is stored as a control message containg the TLS
+ * header followed by data mbufs containing the decrypted data. This
+ * is different from KTLS TX which always uses an mb_ext_pgs mbuf for
+ * both encrypted and decrypted data. TLS records decrypted by a NIC
+ * should be queued to the socket buffer as records, but encrypted
+ * data which needs to be decrypted by software arrives as a stream of
+ * regular mbufs which need to be converted. In addition, there may
+ * already be pending encrypted data in the socket buffer when KTLS RX
+ * is enabled.
+ *
+ * To manage not-yet-decrypted data for KTLS RX, the following scheme
+ * is used:
+ *
+ * - A single chain of NOTREADY mbufs is hung off of sb_mtls.
+ *
+ * - ktls_check_rx checks this chain of mbufs reading the TLS header
+ * from the first mbuf. Once all of the data for that TLS record is
+ * queued, the socket is queued to a worker thread.
+ *
+ * - The worker thread calls ktls_decrypt to decrypt TLS records in
+ * the TLS chain. Each TLS record is detached from the TLS chain,
+ * decrypted, and inserted into the regular socket buffer chain as
+ * record starting with a control message holding the TLS header and
+ * a chain of mbufs holding the encrypted data.
+ */
+
+static void
+sb_mark_notready(struct sockbuf *sb)
+{
+ struct mbuf *m;
+
+ m = sb->sb_mb;
+ sb->sb_mtls = m;
+ sb->sb_mb = NULL;
+ sb->sb_mbtail = NULL;
+ sb->sb_lastrecord = NULL;
+ for (; m != NULL; m = m->m_next) {
+ KASSERT(m->m_nextpkt == NULL, ("%s: m_nextpkt != NULL",
+ __func__));
+ KASSERT((m->m_flags & M_NOTAVAIL) == 0, ("%s: mbuf not avail",
+ __func__));
+ KASSERT(sb->sb_acc >= m->m_len, ("%s: sb_acc < m->m_len",
+ __func__));
+ m->m_flags |= M_NOTREADY;
+ sb->sb_acc -= m->m_len;
+ sb->sb_tlscc += m->m_len;
+ sb->sb_mtlstail = m;
+ }
+ KASSERT(sb->sb_acc == 0 && sb->sb_tlscc == sb->sb_ccc,
+ ("%s: acc %u tlscc %u ccc %u", __func__, sb->sb_acc, sb->sb_tlscc,
+ sb->sb_ccc));
+}
+
int
ktls_enable_rx(struct socket *so, struct tls_enable *en)
{
@@ -924,16 +993,20 @@
if (en->cipher_algorithm == CRYPTO_AES_CBC && !ktls_cbc_enable)
return (ENOTSUP);
+ /* TLS 1.3 is not yet supported. */
+ if (en->tls_vmajor == TLS_MAJOR_VER_ONE &&
+ en->tls_vminor == TLS_MINOR_VER_THREE)
+ return (ENOTSUP);
+
error = ktls_create_session(so, en, &tls);
if (error)
return (error);
- /* TLS RX offload is only supported on TOE currently. */
#ifdef TCP_OFFLOAD
error = ktls_try_toe(so, tls, KTLS_RX);
-#else
- error = EOPNOTSUPP;
+ if (error)
#endif
+ error = ktls_try_sw(so, tls, KTLS_RX);
if (error) {
ktls_cleanup(tls);
@@ -942,7 +1015,13 @@
/* Mark the socket as using TLS offload. */
SOCKBUF_LOCK(&so->so_rcv);
+ so->so_rcv.sb_tls_seqno = be64dec(en->rec_seq);
so->so_rcv.sb_tls_info = tls;
+ so->so_rcv.sb_flags |= SB_TLS_RX;
+
+ /* Mark existing data as not ready until it can be decrypted. */
+ sb_mark_notready(&so->so_rcv);
+ ktls_check_rx(&so->so_rcv);
SOCKBUF_UNLOCK(&so->so_rcv);
counter_u64_add(ktls_offload_total, 1);
@@ -993,7 +1072,7 @@
#endif
error = ktls_try_ifnet(so, tls, false);
if (error)
- error = ktls_try_sw(so, tls);
+ error = ktls_try_sw(so, tls, KTLS_TX);
if (error) {
ktls_cleanup(tls);
@@ -1098,7 +1177,7 @@
if (mode == TCP_TLS_MODE_IFNET)
error = ktls_try_ifnet(so, tls_new, true);
else
- error = ktls_try_sw(so, tls_new);
+ error = ktls_try_sw(so, tls_new, KTLS_TX);
if (error) {
counter_u64_add(ktls_switch_failed, 1);
ktls_free(tls_new);
@@ -1422,6 +1501,345 @@
}
void
+ktls_check_rx(struct sockbuf *sb)
+{
+ struct tls_record_layer hdr;
+ struct ktls_wq *wq;
+ struct socket *so;
+ bool running;
+
+ SOCKBUF_LOCK_ASSERT(sb);
+ KASSERT(sb->sb_flags & SB_TLS_RX, ("%s: sockbuf %p isn't TLS RX",
+ __func__, sb));
+ so = __containerof(sb, struct socket, so_rcv);
+
+ if (sb->sb_flags & SB_TLS_RX_RUNNING)
+ return;
+
+ /* Is there enough queued for a TLS header? */
+ if (sb->sb_tlscc < sizeof(hdr)) {
+ if ((sb->sb_state & SBS_CANTRCVMORE) != 0 && sb->sb_tlscc != 0)
+ so->so_error = EMSGSIZE;
+ return;
+ }
+
+ m_copydata(sb->sb_mtls, 0, sizeof(hdr), (void *)&hdr);
+
+ /* Is the entire record queued? */
+ if (sb->sb_tlscc < sizeof(hdr) + ntohs(hdr.tls_length)) {
+ if ((sb->sb_state & SBS_CANTRCVMORE) != 0)
+ so->so_error = EMSGSIZE;
+ return;
+ }
+
+ sb->sb_flags |= SB_TLS_RX_RUNNING;
+
+ soref(so);
+ wq = &ktls_wq[so->so_rcv.sb_tls_info->wq_index];
+ mtx_lock(&wq->mtx);
+ STAILQ_INSERT_TAIL(&wq->so_head, so, so_ktls_rx_list);
+ running = wq->running;
+ mtx_unlock(&wq->mtx);
+ if (!running)
+ wakeup(wq);
+ counter_u64_add(ktls_cnt_rx_queued, 1);
+}
+
+static struct mbuf *
+ktls_detach_record(struct sockbuf *sb, int len)
+{
+ struct mbuf *m, *n, *top;
+ int remain;
+
+ SOCKBUF_LOCK_ASSERT(sb);
+ MPASS(len <= sb->sb_tlscc);
+
+ /*
+ * If TLS chain is the exact size of the record,
+ * just grab the whole record.
+ */
+ top = sb->sb_mtls;
+ if (sb->sb_tlscc == len) {
+ sb->sb_mtls = NULL;
+ sb->sb_mtlstail = NULL;
+ goto out;
+ }
+
+ /*
+ * While it would be nice to use m_split() here, we need
+ * to know exactly what m_split() allocates to update the
+ * accounting, so do it inline instead.
+ */
+ remain = len;
+ for (m = top; remain > m->m_len; m = m->m_next)
+ remain -= m->m_len;
+
+ /* Easy case: don't have to split 'm'. */
+ if (remain == m->m_len) {
+ sb->sb_mtls = m->m_next;
+ if (sb->sb_mtls == NULL)
+ sb->sb_mtlstail = NULL;
+ m->m_next = NULL;
+ goto out;
+ }
+
+ /*
+ * Need to allocate an mbuf to hold the remainder of 'm'. Try
+ * with M_NOWAIT first.
+ */
+ n = m_get(M_NOWAIT, MT_DATA);
+ if (n == NULL) {
+ /*
+ * Use M_WAITOK with socket buffer unlocked. If
+ * 'sb_mtls' changes while the lock is dropped, return
+ * NULL to force the caller to retry.
+ */
+ SOCKBUF_UNLOCK(sb);
+
+ n = m_get(M_WAITOK, MT_DATA);
+
+ SOCKBUF_LOCK(sb);
+ if (sb->sb_mtls != top) {
+ m_free(n);
+ return (NULL);
+ }
+ }
+ n->m_flags |= M_NOTREADY;
+
+ /* Store remainder in 'n'. */
+ n->m_len = m->m_len - remain;
+ if (m->m_flags & M_EXT) {
+ n->m_data = m->m_data + remain;
+ mb_dupcl(n, m);
+ } else {
+ bcopy(mtod(m, caddr_t) + remain, mtod(n, caddr_t), n->m_len);
+ }
+
+ /* Trim 'm' and update accounting. */
+ m->m_len -= n->m_len;
+ sb->sb_tlscc -= n->m_len;
+ sb->sb_ccc -= n->m_len;
+
+ /* Account for 'n'. */
+ sballoc_ktls_rx(sb, n);
+
+ /* Insert 'n' into the TLS chain. */
+ sb->sb_mtls = n;
+ n->m_next = m->m_next;
+ if (sb->sb_mtlstail == m)
+ sb->sb_mtlstail = n;
+
+ /* Detach the record from the TLS chain. */
+ m->m_next = NULL;
+
+out:
+ MPASS(m_length(top, NULL) == len);
+ for (m = top; m != NULL; m = m->m_next)
+ sbfree_ktls_rx(sb, m);
+ sb->sb_tlsdcc = len;
+ sb->sb_ccc += len;
+ SBCHECK(sb);
+ return (top);
+}
+
+static int
+m_segments(struct mbuf *m, int skip)
+{
+ int count;
+
+ while (skip >= m->m_len) {
+ skip -= m->m_len;
+ m = m->m_next;
+ }
+
+ for (count = 0; m != NULL; count++)
+ m = m->m_next;
+ return (count);
+}
+
+static void
+ktls_decrypt(struct socket *so)
+{
+ char tls_header[MBUF_PEXT_HDR_LEN];
+ struct ktls_session *tls;
+ struct sockbuf *sb;
+ struct tls_record_layer *hdr;
+ struct tls_get_record tgr;
+ struct mbuf *control, *data, *m;
+ uint64_t seqno;
+ int error, remain, tls_len, trail_len;
+
+ hdr = (struct tls_record_layer *)tls_header;
+ sb = &so->so_rcv;
+ SOCKBUF_LOCK(sb);
+ KASSERT(sb->sb_flags & SB_TLS_RX_RUNNING,
+ ("%s: socket %p not running", __func__, so));
+
+ tls = sb->sb_tls_info;
+ MPASS(tls != NULL);
+
+ for (;;) {
+ /* Is there enough queued for a TLS header? */
+ if (sb->sb_tlscc < tls->params.tls_hlen)
+ break;
+
+ m_copydata(sb->sb_mtls, 0, tls->params.tls_hlen, tls_header);
+ tls_len = sizeof(*hdr) + ntohs(hdr->tls_length);
+
+ if (hdr->tls_vmajor != tls->params.tls_vmajor ||
+ hdr->tls_vminor != tls->params.tls_vminor)
+ error = EINVAL;
+ else if (tls_len < tls->params.tls_hlen || tls_len >
+ tls->params.tls_hlen + TLS_MAX_MSG_SIZE_V10_2 +
+ tls->params.tls_tlen)
+ error = EMSGSIZE;
+ else
+ error = 0;
+ if (__predict_false(error != 0)) {
+ /*
+ * We have a corrupted record and are likely
+ * out of sync. The connection isn't
+ * recoverable at this point, so abort it.
+ */
+ SOCKBUF_UNLOCK(sb);
+ counter_u64_add(ktls_offload_corrupted_records, 1);
+
+ CURVNET_SET(so->so_vnet);
+ so->so_proto->pr_usrreqs->pru_abort(so);
+ so->so_error = error;
+ CURVNET_RESTORE();
+ goto deref;
+ }
+
+ /* Is the entire record queued? */
+ if (sb->sb_tlscc < tls_len)
+ break;
+
+ /*
+ * Split out the portion of the mbuf chain containing
+ * this TLS record.
+ */
+ data = ktls_detach_record(sb, tls_len);
+ if (data == NULL)
+ continue;
+ MPASS(sb->sb_tlsdcc == tls_len);
+
+ seqno = sb->sb_tls_seqno;
+ sb->sb_tls_seqno++;
+ SBCHECK(sb);
+ SOCKBUF_UNLOCK(sb);
+
+ error = tls->sw_decrypt(tls, hdr, data, seqno, &trail_len);
+ if (error) {
+ counter_u64_add(ktls_offload_failed_crypto, 1);
+
+ SOCKBUF_LOCK(sb);
+ if (sb->sb_tlsdcc == 0) {
+ /*
+ * sbcut/drop/flush discarded these
+ * mbufs.
+ */
+ m_freem(data);
+ break;
+ }
+
+ /*
+ * Drop this TLS record's data, but keep
+ * decrypting subsequent records.
+ */
+ sb->sb_ccc -= tls_len;
+ sb->sb_tlsdcc = 0;
+
+ CURVNET_SET(so->so_vnet);
+ so->so_error = EBADMSG;
+ sorwakeup_locked(so);
+ CURVNET_RESTORE();
+
+ m_freem(data);
+
+ SOCKBUF_LOCK(sb);
+ continue;
+ }
+
+ /* Allocate the control mbuf. */
+ tgr.tls_type = hdr->tls_type;
+ tgr.tls_vmajor = hdr->tls_vmajor;
+ tgr.tls_vminor = hdr->tls_vminor;
+ tgr.tls_length = htobe16(tls_len - tls->params.tls_hlen -
+ trail_len);
+ control = sbcreatecontrol_how(&tgr, sizeof(tgr),
+ TLS_GET_RECORD, IPPROTO_TCP, M_WAITOK);
+
+ SOCKBUF_LOCK(sb);
+ if (sb->sb_tlsdcc == 0) {
+ /* sbcut/drop/flush discarded these mbufs. */
+ MPASS(sb->sb_tlscc == 0);
+ m_freem(data);
+ m_freem(control);
+ break;
+ }
+
+ /*
+ * Clear the 'dcc' accounting in preparation for
+ * adding the decrypted record.
+ */
+ sb->sb_ccc -= tls_len;
+ sb->sb_tlsdcc = 0;
+ SBCHECK(sb);
+
+ /* If there is no payload, drop all of the data. */
+ if (tgr.tls_length == htobe16(0)) {
+ m_freem(data);
+ data = NULL;
+ } else {
+ /* Trim header. */
+ remain = tls->params.tls_hlen;
+ while (remain > 0) {
+ if (data->m_len > remain) {
+ data->m_data += remain;
+ data->m_len -= remain;
+ break;
+ }
+ remain -= data->m_len;
+ data = m_free(data);
+ }
+
+ /* Trim trailer and clear M_NOTREADY. */
+ remain = be16toh(tgr.tls_length);
+ m = data;
+ for (m = data; remain > m->m_len; m = m->m_next) {
+ m->m_flags &= ~M_NOTREADY;
+ remain -= m->m_len;
+ }
+ m->m_len = remain;
+ m_freem(m->m_next);
+ m->m_next = NULL;
+ m->m_flags &= ~M_NOTREADY;
+
+ /* Set EOR on the final mbuf. */
+ m->m_flags |= M_EOR;
+ }
+
+ sbappendcontrol_locked(sb, data, control, 0);
+ }
+
+ sb->sb_flags &= ~SB_TLS_RX_RUNNING;
+
+ if ((sb->sb_state & SBS_CANTRCVMORE) != 0 && sb->sb_tlscc > 0)
+ so->so_error = EMSGSIZE;
+
+ sorwakeup_locked(so);
+
+deref:
+ SOCKBUF_UNLOCK_ASSERT(sb);
+
+ CURVNET_SET(so->so_vnet);
+ SOCK_LOCK(so);
+ sorele(so);
+ CURVNET_RESTORE();
+}
+
+void
ktls_enqueue_to_free(struct mbuf *m)
{
struct ktls_wq *wq;
@@ -1431,7 +1849,7 @@
m->m_epg_flags |= EPG_FLAG_2FREE;
wq = &ktls_wq[m->m_epg_tls->wq_index];
mtx_lock(&wq->mtx);
- STAILQ_INSERT_TAIL(&wq->head, m, m_epg_stailq);
+ STAILQ_INSERT_TAIL(&wq->m_head, m, m_epg_stailq);
running = wq->running;
mtx_unlock(&wq->mtx);
if (!running)
@@ -1461,12 +1879,12 @@
wq = &ktls_wq[m->m_epg_tls->wq_index];
mtx_lock(&wq->mtx);
- STAILQ_INSERT_TAIL(&wq->head, m, m_epg_stailq);
+ STAILQ_INSERT_TAIL(&wq->m_head, m, m_epg_stailq);
running = wq->running;
mtx_unlock(&wq->mtx);
if (!running)
wakeup(wq);
- counter_u64_add(ktls_cnt_on, 1);
+ counter_u64_add(ktls_cnt_tx_queued, 1);
}
static __noinline void
@@ -1618,31 +2036,41 @@
{
struct ktls_wq *wq = ctx;
struct mbuf *m, *n;
- STAILQ_HEAD(, mbuf) local_head;
+ struct socket *so, *son;
+ STAILQ_HEAD(, mbuf) local_m_head;
+ STAILQ_HEAD(, socket) local_so_head;
#if defined(__aarch64__) || defined(__amd64__) || defined(__i386__)
fpu_kern_thread(0);
#endif
for (;;) {
mtx_lock(&wq->mtx);
- while (STAILQ_EMPTY(&wq->head)) {
+ while (STAILQ_EMPTY(&wq->m_head) &&
+ STAILQ_EMPTY(&wq->so_head)) {
wq->running = false;
mtx_sleep(wq, &wq->mtx, 0, "-", 0);
wq->running = true;
}
- STAILQ_INIT(&local_head);
- STAILQ_CONCAT(&local_head, &wq->head);
+ STAILQ_INIT(&local_m_head);
+ STAILQ_CONCAT(&local_m_head, &wq->m_head);
+ STAILQ_INIT(&local_so_head);
+ STAILQ_CONCAT(&local_so_head, &wq->so_head);
mtx_unlock(&wq->mtx);
- STAILQ_FOREACH_SAFE(m, &local_head, m_epg_stailq, n) {
+ STAILQ_FOREACH_SAFE(m, &local_m_head, m_epg_stailq, n) {
if (m->m_epg_flags & EPG_FLAG_2FREE) {
ktls_free(m->m_epg_tls);
uma_zfree(zone_mbuf, m);
} else {
ktls_encrypt(m);
- counter_u64_add(ktls_cnt_on, -1);
+ counter_u64_add(ktls_cnt_tx_queued, -1);
}
+ }
+
+ STAILQ_FOREACH_SAFE(so, &local_so_head, so_ktls_rx_list, son) {
+ ktls_decrypt(so);
+ counter_u64_add(ktls_cnt_rx_queued, -1);
}
}
}
Index: head/sys/kern/uipc_sockbuf.c
===================================================================
--- head/sys/kern/uipc_sockbuf.c
+++ head/sys/kern/uipc_sockbuf.c
@@ -70,6 +70,8 @@
static u_long sb_efficiency = 8; /* parameter for sbreserve() */
+static void sbcompress_ktls_rx(struct sockbuf *sb, struct mbuf *m,
+ struct mbuf *n);
static struct mbuf *sbcut_internal(struct sockbuf *sb, int len);
static void sbflush_internal(struct sockbuf *sb);
@@ -334,7 +336,52 @@
sb->sb_sndptroff -= m->m_len;
}
+#ifdef KERN_TLS
/*
+ * Similar to sballoc/sbfree but does not adjust state associated with
+ * the sb_mb chain such as sb_fnrdy or sb_sndptr*. Also assumes mbufs
+ * are not ready.
+ */
+void
+sballoc_ktls_rx(struct sockbuf *sb, struct mbuf *m)
+{
+
+ SOCKBUF_LOCK_ASSERT(sb);
+
+ sb->sb_ccc += m->m_len;
+ sb->sb_tlscc += m->m_len;
+
+ sb->sb_mbcnt += MSIZE;
+ sb->sb_mcnt += 1;
+
+ if (m->m_flags & M_EXT) {
+ sb->sb_mbcnt += m->m_ext.ext_size;
+ sb->sb_ccnt += 1;
+ }
+}
+
+void
+sbfree_ktls_rx(struct sockbuf *sb, struct mbuf *m)
+{
+
+#if 0 /* XXX: not yet: soclose() call path comes here w/o lock. */
+ SOCKBUF_LOCK_ASSERT(sb);
+#endif
+
+ sb->sb_ccc -= m->m_len;
+ sb->sb_tlscc -= m->m_len;
+
+ sb->sb_mbcnt -= MSIZE;
+ sb->sb_mcnt -= 1;
+
+ if (m->m_flags & M_EXT) {
+ sb->sb_mbcnt -= m->m_ext.ext_size;
+ sb->sb_ccnt -= 1;
+ }
+}
+#endif
+
+/*
* Socantsendmore indicates that no more data will be sent on the socket; it
* would normally be applied to a socket when the user informs the system
* that no more data is to be sent, by the protocol code (in case
@@ -370,6 +417,10 @@
SOCKBUF_LOCK_ASSERT(&so->so_rcv);
so->so_rcv.sb_state |= SBS_CANTRCVMORE;
+#ifdef KERN_TLS
+ if (so->so_rcv.sb_flags & SB_TLS_RX)
+ ktls_check_rx(&so->so_rcv);
+#endif
sorwakeup_locked(so);
mtx_assert(SOCKBUF_MTX(&so->so_rcv), MA_NOTOWNED);
}
@@ -770,6 +821,24 @@
}
panic("%s from %s:%u", __func__, file, line);
}
+
+#ifdef KERN_TLS
+ m = sb->sb_mtls;
+ while (m && m->m_next)
+ m = m->m_next;
+
+ if (m != sb->sb_mtlstail) {
+ printf("%s: sb_mtls %p sb_mtlstail %p last %p\n",
+ __func__, sb->sb_mtls, sb->sb_mtlstail, m);
+ printf("TLS packet tree:\n");
+ printf("\t");
+ for (m = sb->sb_mtls; m != NULL; m = m->m_next) {
+ printf("%p ", m);
+ }
+ printf("\n");
+ panic("%s from %s:%u", __func__, file, line);
+ }
+#endif
}
#endif /* SOCKBUF_DEBUG */
@@ -847,7 +916,30 @@
SOCKBUF_UNLOCK(sb);
}
+#ifdef KERN_TLS
/*
+ * Append an mbuf containing encrypted TLS data. The data
+ * is marked M_NOTREADY until it has been decrypted and
+ * stored as a TLS record.
+ */
+static void
+sbappend_ktls_rx(struct sockbuf *sb, struct mbuf *m)
+{
+ struct mbuf *n;
+
+ SBLASTMBUFCHK(sb);
+
+ /* Remove all packet headers and mbuf tags to get a pure data chain. */
+ m_demote(m, 1, 0);
+
+ for (n = m; n != NULL; n = n->m_next)
+ n->m_flags |= M_NOTREADY;
+ sbcompress_ktls_rx(sb, m, sb->sb_mtlstail);
+ ktls_check_rx(sb);
+}
+#endif
+
+/*
* This version of sbappend() should only be used when the caller absolutely
* knows that there will never be more than one record in the socket buffer,
* that is, a stream protocol (such as TCP).
@@ -858,6 +950,19 @@
SOCKBUF_LOCK_ASSERT(sb);
KASSERT(m->m_nextpkt == NULL,("sbappendstream 0"));
+
+#ifdef KERN_TLS
+ /*
+ * Decrypted TLS records are appended as records via
+ * sbappendrecord(). TCP passes encrypted TLS records to this
+ * function which must be scheduled for decryption.
+ */
+ if (sb->sb_flags & SB_TLS_RX) {
+ sbappend_ktls_rx(sb, m);
+ return;
+ }
+#endif
+
KASSERT(sb->sb_mb == sb->sb_lastrecord,("sbappendstream 1"));
SBLASTMBUFCHK(sb);
@@ -896,6 +1001,9 @@
{
struct mbuf *m, *n, *fnrdy;
u_long acc, ccc, mbcnt;
+#ifdef KERN_TLS
+ u_long tlscc;
+#endif
SOCKBUF_LOCK_ASSERT(sb);
@@ -931,9 +1039,46 @@
mbcnt += m->m_ext.ext_size;
}
}
+#ifdef KERN_TLS
+ /*
+ * Account for mbufs "detached" by ktls_detach_record() while
+ * they are decrypted by ktls_decrypt(). tlsdcc gives a count
+ * of the detached bytes that are included in ccc. The mbufs
+ * and clusters are not included in the socket buffer
+ * accounting.
+ */
+ ccc += sb->sb_tlsdcc;
+
+ tlscc = 0;
+ for (m = sb->sb_mtls; m; m = m->m_next) {
+ if (m->m_nextpkt != NULL) {
+ printf("sb %p TLS mbuf %p with nextpkt\n", sb, m);
+ goto fail;
+ }
+ if ((m->m_flags & M_NOTREADY) == 0) {
+ printf("sb %p TLS mbuf %p ready\n", sb, m);
+ goto fail;
+ }
+ tlscc += m->m_len;
+ ccc += m->m_len;
+ mbcnt += MSIZE;
+ if (m->m_flags & M_EXT) /*XXX*/ /* pretty sure this is bogus */
+ mbcnt += m->m_ext.ext_size;
+ }
+
+ if (sb->sb_tlscc != tlscc) {
+ printf("tlscc %ld/%u dcc %u\n", tlscc, sb->sb_tlscc,
+ sb->sb_tlsdcc);
+ goto fail;
+ }
+#endif
if (acc != sb->sb_acc || ccc != sb->sb_ccc || mbcnt != sb->sb_mbcnt) {
printf("acc %ld/%u ccc %ld/%u mbcnt %ld/%u\n",
acc, sb->sb_acc, ccc, sb->sb_ccc, mbcnt, sb->sb_mbcnt);
+#ifdef KERN_TLS
+ printf("tlscc %ld/%u dcc %u\n", tlscc, sb->sb_tlscc,
+ sb->sb_tlsdcc);
+#endif
goto fail;
}
return;
@@ -1209,14 +1354,72 @@
SBLASTMBUFCHK(sb);
}
+#ifdef KERN_TLS
/*
+ * A version of sbcompress() for encrypted TLS RX mbufs. These mbufs
+ * are appended to the 'sb_mtls' chain instead of 'sb_mb' and are also
+ * a bit simpler (no EOR markers, always MT_DATA, etc.).
+ */
+static void
+sbcompress_ktls_rx(struct sockbuf *sb, struct mbuf *m, struct mbuf *n)
+{
+
+ SOCKBUF_LOCK_ASSERT(sb);
+
+ while (m) {
+ KASSERT((m->m_flags & M_EOR) == 0,
+ ("TLS RX mbuf %p with EOR", m));
+ KASSERT(m->m_type == MT_DATA,
+ ("TLS RX mbuf %p is not MT_DATA", m));
+ KASSERT((m->m_flags & M_NOTREADY) != 0,
+ ("TLS RX mbuf %p ready", m));
+ KASSERT((m->m_flags & M_EXTPG) == 0,
+ ("TLS RX mbuf %p unmapped", m));
+
+ if (m->m_len == 0) {
+ m = m_free(m);
+ continue;
+ }
+
+ /*
+ * Even though both 'n' and 'm' are NOTREADY, it's ok
+ * to coalesce the data.
+ */
+ if (n &&
+ M_WRITABLE(n) &&
+ ((sb->sb_flags & SB_NOCOALESCE) == 0) &&
+ !(n->m_flags & (M_EXTPG)) &&
+ m->m_len <= MCLBYTES / 4 && /* XXX: Don't copy too much */
+ m->m_len <= M_TRAILINGSPACE(n)) {
+ m_copydata(m, 0, m->m_len, mtodo(n, n->m_len));
+ n->m_len += m->m_len;
+ sb->sb_ccc += m->m_len;
+ sb->sb_tlscc += m->m_len;
+ m = m_free(m);
+ continue;
+ }
+ if (n)
+ n->m_next = m;
+ else
+ sb->sb_mtls = m;
+ sb->sb_mtlstail = m;
+ sballoc_ktls_rx(sb, m);
+ n = m;
+ m = m->m_next;
+ n->m_next = NULL;
+ }
+ SBLASTMBUFCHK(sb);
+}
+#endif
+
+/*
* Free all mbufs in a sockbuf. Check that all resources are reclaimed.
*/
static void
sbflush_internal(struct sockbuf *sb)
{
- while (sb->sb_mbcnt) {
+ while (sb->sb_mbcnt || sb->sb_tlsdcc) {
/*
* Don't call sbcut(sb, 0) if the leading mbuf is non-empty:
* we would loop forever. Panic instead.
@@ -1254,6 +1457,7 @@
sbcut_internal(struct sockbuf *sb, int len)
{
struct mbuf *m, *next, *mfree;
+ bool is_tls;
KASSERT(len >= 0, ("%s: len is %d but it is supposed to be >= 0",
__func__, len));
@@ -1261,10 +1465,25 @@
__func__, len, sb->sb_ccc));
next = (m = sb->sb_mb) ? m->m_nextpkt : 0;
+ is_tls = false;
mfree = NULL;
while (len > 0) {
if (m == NULL) {
+#ifdef KERN_TLS
+ if (next == NULL && !is_tls) {
+ if (sb->sb_tlsdcc != 0) {
+ MPASS(len >= sb->sb_tlsdcc);
+ len -= sb->sb_tlsdcc;
+ sb->sb_ccc -= sb->sb_tlsdcc;
+ sb->sb_tlsdcc = 0;
+ if (len == 0)
+ break;
+ }
+ next = sb->sb_mtls;
+ is_tls = true;
+ }
+#endif
KASSERT(next, ("%s: no next, len %d", __func__, len));
m = next;
next = m->m_nextpkt;
@@ -1283,12 +1502,17 @@
break;
}
len -= m->m_len;
- sbfree(sb, m);
+#ifdef KERN_TLS
+ if (is_tls)
+ sbfree_ktls_rx(sb, m);
+ else
+#endif
+ sbfree(sb, m);
/*
* Do not put M_NOTREADY buffers to the free list, they
* are referenced from outside.
*/
- if (m->m_flags & M_NOTREADY)
+ if (m->m_flags & M_NOTREADY && !is_tls)
m = m->m_next;
else {
struct mbuf *n;
@@ -1314,6 +1538,14 @@
mfree = m;
m = n;
}
+#ifdef KERN_TLS
+ if (is_tls) {
+ sb->sb_mb = NULL;
+ sb->sb_mtls = m;
+ if (m == NULL)
+ sb->sb_mtlstail = NULL;
+ } else
+#endif
if (m) {
sb->sb_mb = m;
m->m_nextpkt = next;
@@ -1489,17 +1721,18 @@
* type for presentation on a socket buffer.
*/
struct mbuf *
-sbcreatecontrol(caddr_t p, int size, int type, int level)
+sbcreatecontrol_how(void *p, int size, int type, int level, int wait)
{
struct cmsghdr *cp;
struct mbuf *m;
+ MBUF_CHECKSLEEP(wait);
if (CMSG_SPACE((u_int)size) > MCLBYTES)
return ((struct mbuf *) NULL);
if (CMSG_SPACE((u_int)size) > MLEN)
- m = m_getcl(M_NOWAIT, MT_CONTROL, 0);
+ m = m_getcl(wait, MT_CONTROL, 0);
else
- m = m_get(M_NOWAIT, MT_CONTROL);
+ m = m_get(wait, MT_CONTROL);
if (m == NULL)
return ((struct mbuf *) NULL);
cp = mtod(m, struct cmsghdr *);
@@ -1518,6 +1751,13 @@
cp->cmsg_level = level;
cp->cmsg_type = type;
return (m);
+}
+
+struct mbuf *
+sbcreatecontrol(caddr_t p, int size, int type, int level)
+{
+
+ return (sbcreatecontrol_how(p, size, type, level, M_NOWAIT));
}
/*
Index: head/sys/kern/uipc_socket.c
===================================================================
--- head/sys/kern/uipc_socket.c
+++ head/sys/kern/uipc_socket.c
@@ -1965,7 +1965,8 @@
}
SOCKBUF_LOCK_ASSERT(&so->so_rcv);
if (so->so_rcv.sb_state & SBS_CANTRCVMORE) {
- if (m == NULL) {
+ if (m == NULL && so->so_rcv.sb_tlsdcc == 0 &&
+ so->so_rcv.sb_tlscc == 0) {
SOCKBUF_UNLOCK(&so->so_rcv);
goto release;
} else
Index: head/sys/opencrypto/ktls_ocf.c
===================================================================
--- head/sys/opencrypto/ktls_ocf.c
+++ head/sys/opencrypto/ktls_ocf.c
@@ -223,6 +223,56 @@
}
static int
+ktls_ocf_tls12_gcm_decrypt(struct ktls_session *tls,
+ const struct tls_record_layer *hdr, struct mbuf *m, uint64_t seqno,
+ int *trailer_len)
+{
+ struct tls_aead_data ad;
+ struct cryptop crp;
+ struct ocf_session *os;
+ struct ocf_operation oo;
+ int error;
+ uint16_t tls_comp_len;
+
+ os = tls->cipher;
+
+ oo.os = os;
+ oo.done = false;
+
+ crypto_initreq(&crp, os->sid);
+
+ /* Setup the IV. */
+ memcpy(crp.crp_iv, tls->params.iv, TLS_AEAD_GCM_LEN);
+ memcpy(crp.crp_iv + TLS_AEAD_GCM_LEN, hdr + 1, sizeof(uint64_t));
+
+ /* Setup the AAD. */
+ tls_comp_len = ntohs(hdr->tls_length) -
+ (AES_GMAC_HASH_LEN + sizeof(uint64_t));
+ ad.seq = htobe64(seqno);
+ ad.type = hdr->tls_type;
+ ad.tls_vmajor = hdr->tls_vmajor;
+ ad.tls_vminor = hdr->tls_vminor;
+ ad.tls_length = htons(tls_comp_len);
+ crp.crp_aad = &ad;
+ crp.crp_aad_length = sizeof(ad);
+
+ crp.crp_payload_start = tls->params.tls_hlen;
+ crp.crp_payload_length = tls_comp_len;
+ crp.crp_digest_start = crp.crp_payload_start + crp.crp_payload_length;
+
+ crp.crp_op = CRYPTO_OP_DECRYPT | CRYPTO_OP_VERIFY_DIGEST;
+ crp.crp_flags = CRYPTO_F_CBIMM | CRYPTO_F_IV_SEPARATE;
+ crypto_use_mbuf(&crp, m);
+
+ counter_u64_add(ocf_tls12_gcm_crypts, 1);
+ error = ktls_ocf_dispatch(os, &crp);
+
+ crypto_destroyreq(&crp);
+ *trailer_len = AES_GMAC_HASH_LEN;
+ return (error);
+}
+
+static int
ktls_ocf_tls13_gcm_encrypt(struct ktls_session *tls,
const struct tls_record_layer *hdr, uint8_t *trailer, struct iovec *iniov,
struct iovec *outiov, int iovcnt, uint64_t seqno, uint8_t record_type)
@@ -325,7 +375,7 @@
}
static int
-ktls_ocf_try(struct socket *so, struct ktls_session *tls)
+ktls_ocf_try(struct socket *so, struct ktls_session *tls, int direction)
{
struct crypto_session_params csp;
struct ocf_session *os;
@@ -359,6 +409,11 @@
tls->params.tls_vminor > TLS_MINOR_VER_THREE)
return (EPROTONOSUPPORT);
+ /* TLS 1.3 is not yet supported for receive. */
+ if (direction == KTLS_RX &&
+ tls->params.tls_vminor == TLS_MINOR_VER_THREE)
+ return (EPROTONOSUPPORT);
+
os = malloc(sizeof(*os), M_KTLS_OCF, M_NOWAIT | M_ZERO);
if (os == NULL)
return (ENOMEM);
@@ -372,10 +427,14 @@
mtx_init(&os->lock, "ktls_ocf", NULL, MTX_DEF);
tls->cipher = os;
- if (tls->params.tls_vminor == TLS_MINOR_VER_THREE)
- tls->sw_encrypt = ktls_ocf_tls13_gcm_encrypt;
- else
- tls->sw_encrypt = ktls_ocf_tls12_gcm_encrypt;
+ if (direction == KTLS_TX) {
+ if (tls->params.tls_vminor == TLS_MINOR_VER_THREE)
+ tls->sw_encrypt = ktls_ocf_tls13_gcm_encrypt;
+ else
+ tls->sw_encrypt = ktls_ocf_tls12_gcm_encrypt;
+ } else {
+ tls->sw_decrypt = ktls_ocf_tls12_gcm_decrypt;
+ }
tls->free = ktls_ocf_free;
return (0);
}
Index: head/sys/sys/ktls.h
===================================================================
--- head/sys/sys/ktls.h
+++ head/sys/sys/ktls.h
@@ -163,7 +163,7 @@
#define KTLS_TX 1
#define KTLS_RX 2
-#define KTLS_API_VERSION 6
+#define KTLS_API_VERSION 7
struct iovec;
struct ktls_session;
@@ -174,7 +174,7 @@
struct ktls_crypto_backend {
LIST_ENTRY(ktls_crypto_backend) next;
- int (*try)(struct socket *so, struct ktls_session *tls);
+ int (*try)(struct socket *so, struct ktls_session *tls, int direction);
int prio;
int api_version;
int use_count;
@@ -182,11 +182,16 @@
};
struct ktls_session {
- int (*sw_encrypt)(struct ktls_session *tls,
- const struct tls_record_layer *hdr, uint8_t *trailer,
- struct iovec *src, struct iovec *dst, int iovcnt,
- uint64_t seqno, uint8_t record_type);
union {
+ int (*sw_encrypt)(struct ktls_session *tls,
+ const struct tls_record_layer *hdr, uint8_t *trailer,
+ struct iovec *src, struct iovec *dst, int iovcnt,
+ uint64_t seqno, uint8_t record_type);
+ int (*sw_decrypt)(struct ktls_session *tls,
+ const struct tls_record_layer *hdr, struct mbuf *m,
+ uint64_t seqno, int *trailer_len);
+ };
+ union {
void *cipher;
struct m_snd_tag *snd_tag;
};
@@ -202,6 +207,7 @@
bool reset_pending;
} __aligned(CACHE_LINE_SIZE);
+void ktls_check_rx(struct sockbuf *sb);
int ktls_crypto_backend_register(struct ktls_crypto_backend *be);
int ktls_crypto_backend_deregister(struct ktls_crypto_backend *be);
int ktls_enable_rx(struct socket *so, struct tls_enable *en);
Index: head/sys/sys/sockbuf.h
===================================================================
--- head/sys/sys/sockbuf.h
+++ head/sys/sys/sockbuf.h
@@ -38,6 +38,8 @@
/*
* Constants for sb_flags field of struct sockbuf/xsockbuf.
*/
+#define SB_TLS_RX 0x01 /* using KTLS on RX */
+#define SB_TLS_RX_RUNNING 0x02 /* KTLS RX operation running */
#define SB_WAIT 0x04 /* someone is waiting for data/space */
#define SB_SEL 0x08 /* someone is selecting */
#define SB_ASYNC 0x10 /* ASYNC I/O, need signals */
@@ -99,10 +101,14 @@
u_int sb_ccnt; /* (a) number of clusters in buffer */
u_int sb_mbmax; /* (a) max chars of mbufs to use */
u_int sb_ctl; /* (a) non-data chars in buffer */
+ u_int sb_tlscc; /* (a) TLS chain characters */
+ u_int sb_tlsdcc; /* (a) TLS characters being decrypted */
int sb_lowat; /* (a) low water mark */
sbintime_t sb_timeo; /* (a) timeout for read/write */
uint64_t sb_tls_seqno; /* (a) TLS seqno */
struct ktls_session *sb_tls_info; /* (a + b) TLS state */
+ struct mbuf *sb_mtls; /* (a) TLS mbuf chain */
+ struct mbuf *sb_mtlstail; /* (a) last mbuf in TLS chain */
short sb_flags; /* (a) flags, see above */
int (*sb_upcall)(struct socket *, void *, int); /* (a) */
void *sb_upcallarg; /* (a) */
@@ -153,6 +159,9 @@
void sbcompress(struct sockbuf *sb, struct mbuf *m, struct mbuf *n);
struct mbuf *
sbcreatecontrol(caddr_t p, int size, int type, int level);
+struct mbuf *
+ sbcreatecontrol_how(void *p, int size, int type, int level,
+ int wait);
void sbdestroy(struct sockbuf *sb, struct socket *so);
void sbdrop(struct sockbuf *sb, int len);
void sbdrop_locked(struct sockbuf *sb, int len);
@@ -178,6 +187,8 @@
void sbunlock(struct sockbuf *sb);
void sballoc(struct sockbuf *, struct mbuf *);
void sbfree(struct sockbuf *, struct mbuf *);
+void sballoc_ktls_rx(struct sockbuf *sb, struct mbuf *m);
+void sbfree_ktls_rx(struct sockbuf *sb, struct mbuf *m);
int sbready(struct sockbuf *, struct mbuf *, int);
/*
Index: head/sys/sys/socketvar.h
===================================================================
--- head/sys/sys/socketvar.h
+++ head/sys/sys/socketvar.h
@@ -83,6 +83,7 @@
* (f) not locked since integer reads/writes are atomic.
* (g) used only as a sleep/wakeup address, no value.
* (h) locked by global mutex so_global_mtx.
+ * (k) locked by KTLS workqueue mutex
*/
TAILQ_HEAD(accept_queue, socket);
struct socket {
@@ -132,6 +133,9 @@
/* (b) cached MAC label for peer */
struct label *so_peerlabel;
u_long so_oobmark; /* chars to oob mark */
+
+ /* (k) Our place on KTLS RX work queue. */
+ STAILQ_ENTRY(socket) so_ktls_rx_list;
};
/*
* Listening socket, where accepts occur, is so_listen in all

File Metadata

Mime Type
text/plain
Expires
Sat, Jan 18, 2:12 AM (17 h, 46 m)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
15848886
Default Alt Text
D24628.diff (34 KB)

Event Timeline