Page Menu
Home
FreeBSD
Search
Configure Global Search
Log In
Files
F107644917
D24628.diff
No One
Temporary
Actions
View File
Edit File
Delete File
View Transforms
Subscribe
Mute Notifications
Flag For Later
Award Token
Size
34 KB
Referenced Files
None
Subscribers
None
D24628.diff
View Options
Index: head/share/man/man4/tcp.4
===================================================================
--- head/share/man/man4/tcp.4
+++ head/share/man/man4/tcp.4
@@ -34,7 +34,7 @@
.\" From: @(#)tcp.4 8.1 (Berkeley) 6/5/93
.\" $FreeBSD$
.\"
-.Dd April 27, 2020
+.Dd July 23, 2020
.Dt TCP 4
.Os
.Sh NAME
@@ -356,10 +356,22 @@
The control message contains a
.Vt struct tls_get_record
which includes fields from the TLS record header.
-If a corrupted TLS record is received,
+If an invalid or corrupted TLS record is received,
recvmsg 2
-will fail with
-.Dv EBADMSG .
+will fail with one of the following errors:
+.Bl -tag -width Er
+.It Bq Er EINVAL
+The version fields in a TLS record's header did not match the version required
+by the
+.Vt struct tls_so_enable
+structure used to enable in-kernel TLS.
+.It Bq Er EMSGSIZE
+A TLS record's length was either too small or too large.
+.It Bq Er EMSGSIZE
+The connection was closed after sending a truncated TLS record.
+.It Bq Er EBADMSG
+The TLS record failed to match the included authentication tag.
+.El
.Pp
At present, only a single receive key may be set on a socket.
As such, users of this option must disable rekeying.
Index: head/sys/kern/uipc_ktls.c
===================================================================
--- head/sys/kern/uipc_ktls.c
+++ head/sys/kern/uipc_ktls.c
@@ -78,7 +78,8 @@
struct ktls_wq {
struct mtx mtx;
- STAILQ_HEAD(, mbuf) head;
+ STAILQ_HEAD(, mbuf) m_head;
+ STAILQ_HEAD(, socket) so_head;
bool running;
} __aligned(CACHE_LINE_SIZE);
@@ -130,10 +131,16 @@
SYSCTL_COUNTER_U64(_kern_ipc_tls, OID_AUTO, tasks_active, CTLFLAG_RD,
&ktls_tasks_active, "Number of active tasks");
-static counter_u64_t ktls_cnt_on;
-SYSCTL_COUNTER_U64(_kern_ipc_tls_stats, OID_AUTO, so_inqueue, CTLFLAG_RD,
- &ktls_cnt_on, "Number of TLS records in queue to tasks for SW crypto");
+static counter_u64_t ktls_cnt_tx_queued;
+SYSCTL_COUNTER_U64(_kern_ipc_tls_stats, OID_AUTO, sw_tx_inqueue, CTLFLAG_RD,
+ &ktls_cnt_tx_queued,
+ "Number of TLS records in queue to tasks for SW encryption");
+static counter_u64_t ktls_cnt_rx_queued;
+SYSCTL_COUNTER_U64(_kern_ipc_tls_stats, OID_AUTO, sw_rx_inqueue, CTLFLAG_RD,
+ &ktls_cnt_rx_queued,
+ "Number of TLS sockets in queue to tasks for SW decryption");
+
static counter_u64_t ktls_offload_total;
SYSCTL_COUNTER_U64(_kern_ipc_tls_stats, OID_AUTO, offload_total,
CTLFLAG_RD, &ktls_offload_total,
@@ -148,6 +155,10 @@
SYSCTL_COUNTER_U64(_kern_ipc_tls_stats, OID_AUTO, active, CTLFLAG_RD,
&ktls_offload_active, "Total Active TLS sessions");
+static counter_u64_t ktls_offload_corrupted_records;
+SYSCTL_COUNTER_U64(_kern_ipc_tls_stats, OID_AUTO, corrupted_records, CTLFLAG_RD,
+ &ktls_offload_corrupted_records, "Total corrupted TLS records received");
+
static counter_u64_t ktls_offload_failed_crypto;
SYSCTL_COUNTER_U64(_kern_ipc_tls_stats, OID_AUTO, failed_crypto, CTLFLAG_RD,
&ktls_offload_failed_crypto, "Total TLS crypto failures");
@@ -333,10 +344,12 @@
int error, i;
ktls_tasks_active = counter_u64_alloc(M_WAITOK);
- ktls_cnt_on = counter_u64_alloc(M_WAITOK);
+ ktls_cnt_tx_queued = counter_u64_alloc(M_WAITOK);
+ ktls_cnt_rx_queued = counter_u64_alloc(M_WAITOK);
ktls_offload_total = counter_u64_alloc(M_WAITOK);
ktls_offload_enable_calls = counter_u64_alloc(M_WAITOK);
ktls_offload_active = counter_u64_alloc(M_WAITOK);
+ ktls_offload_corrupted_records = counter_u64_alloc(M_WAITOK);
ktls_offload_failed_crypto = counter_u64_alloc(M_WAITOK);
ktls_switch_to_ifnet = counter_u64_alloc(M_WAITOK);
ktls_switch_to_sw = counter_u64_alloc(M_WAITOK);
@@ -369,7 +382,8 @@
* work queue for each CPU.
*/
CPU_FOREACH(i) {
- STAILQ_INIT(&ktls_wq[i].head);
+ STAILQ_INIT(&ktls_wq[i].m_head);
+ STAILQ_INIT(&ktls_wq[i].so_head);
mtx_init(&ktls_wq[i].mtx, "ktls work queue", NULL, MTX_DEF);
error = kproc_kthread_add(ktls_work_thread, &ktls_wq[i],
&ktls_proc, &td, 0, 0, "KTLS", "thr_%d", i);
@@ -855,7 +869,7 @@
}
static int
-ktls_try_sw(struct socket *so, struct ktls_session *tls)
+ktls_try_sw(struct socket *so, struct ktls_session *tls, int direction)
{
struct rm_priotracker prio;
struct ktls_crypto_backend *be;
@@ -870,7 +884,7 @@
if (ktls_allow_unload)
rm_rlock(&ktls_backends_lock, &prio);
LIST_FOREACH(be, &ktls_backends, next) {
- if (be->try(so, tls) == 0)
+ if (be->try(so, tls, direction) == 0)
break;
KASSERT(tls->cipher == NULL,
("ktls backend leaked a cipher pointer"));
@@ -896,6 +910,61 @@
return (0);
}
+/*
+ * KTLS RX stores data in the socket buffer as a list of TLS records,
+ * where each record is stored as a control message containg the TLS
+ * header followed by data mbufs containing the decrypted data. This
+ * is different from KTLS TX which always uses an mb_ext_pgs mbuf for
+ * both encrypted and decrypted data. TLS records decrypted by a NIC
+ * should be queued to the socket buffer as records, but encrypted
+ * data which needs to be decrypted by software arrives as a stream of
+ * regular mbufs which need to be converted. In addition, there may
+ * already be pending encrypted data in the socket buffer when KTLS RX
+ * is enabled.
+ *
+ * To manage not-yet-decrypted data for KTLS RX, the following scheme
+ * is used:
+ *
+ * - A single chain of NOTREADY mbufs is hung off of sb_mtls.
+ *
+ * - ktls_check_rx checks this chain of mbufs reading the TLS header
+ * from the first mbuf. Once all of the data for that TLS record is
+ * queued, the socket is queued to a worker thread.
+ *
+ * - The worker thread calls ktls_decrypt to decrypt TLS records in
+ * the TLS chain. Each TLS record is detached from the TLS chain,
+ * decrypted, and inserted into the regular socket buffer chain as
+ * record starting with a control message holding the TLS header and
+ * a chain of mbufs holding the encrypted data.
+ */
+
+static void
+sb_mark_notready(struct sockbuf *sb)
+{
+ struct mbuf *m;
+
+ m = sb->sb_mb;
+ sb->sb_mtls = m;
+ sb->sb_mb = NULL;
+ sb->sb_mbtail = NULL;
+ sb->sb_lastrecord = NULL;
+ for (; m != NULL; m = m->m_next) {
+ KASSERT(m->m_nextpkt == NULL, ("%s: m_nextpkt != NULL",
+ __func__));
+ KASSERT((m->m_flags & M_NOTAVAIL) == 0, ("%s: mbuf not avail",
+ __func__));
+ KASSERT(sb->sb_acc >= m->m_len, ("%s: sb_acc < m->m_len",
+ __func__));
+ m->m_flags |= M_NOTREADY;
+ sb->sb_acc -= m->m_len;
+ sb->sb_tlscc += m->m_len;
+ sb->sb_mtlstail = m;
+ }
+ KASSERT(sb->sb_acc == 0 && sb->sb_tlscc == sb->sb_ccc,
+ ("%s: acc %u tlscc %u ccc %u", __func__, sb->sb_acc, sb->sb_tlscc,
+ sb->sb_ccc));
+}
+
int
ktls_enable_rx(struct socket *so, struct tls_enable *en)
{
@@ -924,16 +993,20 @@
if (en->cipher_algorithm == CRYPTO_AES_CBC && !ktls_cbc_enable)
return (ENOTSUP);
+ /* TLS 1.3 is not yet supported. */
+ if (en->tls_vmajor == TLS_MAJOR_VER_ONE &&
+ en->tls_vminor == TLS_MINOR_VER_THREE)
+ return (ENOTSUP);
+
error = ktls_create_session(so, en, &tls);
if (error)
return (error);
- /* TLS RX offload is only supported on TOE currently. */
#ifdef TCP_OFFLOAD
error = ktls_try_toe(so, tls, KTLS_RX);
-#else
- error = EOPNOTSUPP;
+ if (error)
#endif
+ error = ktls_try_sw(so, tls, KTLS_RX);
if (error) {
ktls_cleanup(tls);
@@ -942,7 +1015,13 @@
/* Mark the socket as using TLS offload. */
SOCKBUF_LOCK(&so->so_rcv);
+ so->so_rcv.sb_tls_seqno = be64dec(en->rec_seq);
so->so_rcv.sb_tls_info = tls;
+ so->so_rcv.sb_flags |= SB_TLS_RX;
+
+ /* Mark existing data as not ready until it can be decrypted. */
+ sb_mark_notready(&so->so_rcv);
+ ktls_check_rx(&so->so_rcv);
SOCKBUF_UNLOCK(&so->so_rcv);
counter_u64_add(ktls_offload_total, 1);
@@ -993,7 +1072,7 @@
#endif
error = ktls_try_ifnet(so, tls, false);
if (error)
- error = ktls_try_sw(so, tls);
+ error = ktls_try_sw(so, tls, KTLS_TX);
if (error) {
ktls_cleanup(tls);
@@ -1098,7 +1177,7 @@
if (mode == TCP_TLS_MODE_IFNET)
error = ktls_try_ifnet(so, tls_new, true);
else
- error = ktls_try_sw(so, tls_new);
+ error = ktls_try_sw(so, tls_new, KTLS_TX);
if (error) {
counter_u64_add(ktls_switch_failed, 1);
ktls_free(tls_new);
@@ -1422,6 +1501,345 @@
}
void
+ktls_check_rx(struct sockbuf *sb)
+{
+ struct tls_record_layer hdr;
+ struct ktls_wq *wq;
+ struct socket *so;
+ bool running;
+
+ SOCKBUF_LOCK_ASSERT(sb);
+ KASSERT(sb->sb_flags & SB_TLS_RX, ("%s: sockbuf %p isn't TLS RX",
+ __func__, sb));
+ so = __containerof(sb, struct socket, so_rcv);
+
+ if (sb->sb_flags & SB_TLS_RX_RUNNING)
+ return;
+
+ /* Is there enough queued for a TLS header? */
+ if (sb->sb_tlscc < sizeof(hdr)) {
+ if ((sb->sb_state & SBS_CANTRCVMORE) != 0 && sb->sb_tlscc != 0)
+ so->so_error = EMSGSIZE;
+ return;
+ }
+
+ m_copydata(sb->sb_mtls, 0, sizeof(hdr), (void *)&hdr);
+
+ /* Is the entire record queued? */
+ if (sb->sb_tlscc < sizeof(hdr) + ntohs(hdr.tls_length)) {
+ if ((sb->sb_state & SBS_CANTRCVMORE) != 0)
+ so->so_error = EMSGSIZE;
+ return;
+ }
+
+ sb->sb_flags |= SB_TLS_RX_RUNNING;
+
+ soref(so);
+ wq = &ktls_wq[so->so_rcv.sb_tls_info->wq_index];
+ mtx_lock(&wq->mtx);
+ STAILQ_INSERT_TAIL(&wq->so_head, so, so_ktls_rx_list);
+ running = wq->running;
+ mtx_unlock(&wq->mtx);
+ if (!running)
+ wakeup(wq);
+ counter_u64_add(ktls_cnt_rx_queued, 1);
+}
+
+static struct mbuf *
+ktls_detach_record(struct sockbuf *sb, int len)
+{
+ struct mbuf *m, *n, *top;
+ int remain;
+
+ SOCKBUF_LOCK_ASSERT(sb);
+ MPASS(len <= sb->sb_tlscc);
+
+ /*
+ * If TLS chain is the exact size of the record,
+ * just grab the whole record.
+ */
+ top = sb->sb_mtls;
+ if (sb->sb_tlscc == len) {
+ sb->sb_mtls = NULL;
+ sb->sb_mtlstail = NULL;
+ goto out;
+ }
+
+ /*
+ * While it would be nice to use m_split() here, we need
+ * to know exactly what m_split() allocates to update the
+ * accounting, so do it inline instead.
+ */
+ remain = len;
+ for (m = top; remain > m->m_len; m = m->m_next)
+ remain -= m->m_len;
+
+ /* Easy case: don't have to split 'm'. */
+ if (remain == m->m_len) {
+ sb->sb_mtls = m->m_next;
+ if (sb->sb_mtls == NULL)
+ sb->sb_mtlstail = NULL;
+ m->m_next = NULL;
+ goto out;
+ }
+
+ /*
+ * Need to allocate an mbuf to hold the remainder of 'm'. Try
+ * with M_NOWAIT first.
+ */
+ n = m_get(M_NOWAIT, MT_DATA);
+ if (n == NULL) {
+ /*
+ * Use M_WAITOK with socket buffer unlocked. If
+ * 'sb_mtls' changes while the lock is dropped, return
+ * NULL to force the caller to retry.
+ */
+ SOCKBUF_UNLOCK(sb);
+
+ n = m_get(M_WAITOK, MT_DATA);
+
+ SOCKBUF_LOCK(sb);
+ if (sb->sb_mtls != top) {
+ m_free(n);
+ return (NULL);
+ }
+ }
+ n->m_flags |= M_NOTREADY;
+
+ /* Store remainder in 'n'. */
+ n->m_len = m->m_len - remain;
+ if (m->m_flags & M_EXT) {
+ n->m_data = m->m_data + remain;
+ mb_dupcl(n, m);
+ } else {
+ bcopy(mtod(m, caddr_t) + remain, mtod(n, caddr_t), n->m_len);
+ }
+
+ /* Trim 'm' and update accounting. */
+ m->m_len -= n->m_len;
+ sb->sb_tlscc -= n->m_len;
+ sb->sb_ccc -= n->m_len;
+
+ /* Account for 'n'. */
+ sballoc_ktls_rx(sb, n);
+
+ /* Insert 'n' into the TLS chain. */
+ sb->sb_mtls = n;
+ n->m_next = m->m_next;
+ if (sb->sb_mtlstail == m)
+ sb->sb_mtlstail = n;
+
+ /* Detach the record from the TLS chain. */
+ m->m_next = NULL;
+
+out:
+ MPASS(m_length(top, NULL) == len);
+ for (m = top; m != NULL; m = m->m_next)
+ sbfree_ktls_rx(sb, m);
+ sb->sb_tlsdcc = len;
+ sb->sb_ccc += len;
+ SBCHECK(sb);
+ return (top);
+}
+
+static int
+m_segments(struct mbuf *m, int skip)
+{
+ int count;
+
+ while (skip >= m->m_len) {
+ skip -= m->m_len;
+ m = m->m_next;
+ }
+
+ for (count = 0; m != NULL; count++)
+ m = m->m_next;
+ return (count);
+}
+
+static void
+ktls_decrypt(struct socket *so)
+{
+ char tls_header[MBUF_PEXT_HDR_LEN];
+ struct ktls_session *tls;
+ struct sockbuf *sb;
+ struct tls_record_layer *hdr;
+ struct tls_get_record tgr;
+ struct mbuf *control, *data, *m;
+ uint64_t seqno;
+ int error, remain, tls_len, trail_len;
+
+ hdr = (struct tls_record_layer *)tls_header;
+ sb = &so->so_rcv;
+ SOCKBUF_LOCK(sb);
+ KASSERT(sb->sb_flags & SB_TLS_RX_RUNNING,
+ ("%s: socket %p not running", __func__, so));
+
+ tls = sb->sb_tls_info;
+ MPASS(tls != NULL);
+
+ for (;;) {
+ /* Is there enough queued for a TLS header? */
+ if (sb->sb_tlscc < tls->params.tls_hlen)
+ break;
+
+ m_copydata(sb->sb_mtls, 0, tls->params.tls_hlen, tls_header);
+ tls_len = sizeof(*hdr) + ntohs(hdr->tls_length);
+
+ if (hdr->tls_vmajor != tls->params.tls_vmajor ||
+ hdr->tls_vminor != tls->params.tls_vminor)
+ error = EINVAL;
+ else if (tls_len < tls->params.tls_hlen || tls_len >
+ tls->params.tls_hlen + TLS_MAX_MSG_SIZE_V10_2 +
+ tls->params.tls_tlen)
+ error = EMSGSIZE;
+ else
+ error = 0;
+ if (__predict_false(error != 0)) {
+ /*
+ * We have a corrupted record and are likely
+ * out of sync. The connection isn't
+ * recoverable at this point, so abort it.
+ */
+ SOCKBUF_UNLOCK(sb);
+ counter_u64_add(ktls_offload_corrupted_records, 1);
+
+ CURVNET_SET(so->so_vnet);
+ so->so_proto->pr_usrreqs->pru_abort(so);
+ so->so_error = error;
+ CURVNET_RESTORE();
+ goto deref;
+ }
+
+ /* Is the entire record queued? */
+ if (sb->sb_tlscc < tls_len)
+ break;
+
+ /*
+ * Split out the portion of the mbuf chain containing
+ * this TLS record.
+ */
+ data = ktls_detach_record(sb, tls_len);
+ if (data == NULL)
+ continue;
+ MPASS(sb->sb_tlsdcc == tls_len);
+
+ seqno = sb->sb_tls_seqno;
+ sb->sb_tls_seqno++;
+ SBCHECK(sb);
+ SOCKBUF_UNLOCK(sb);
+
+ error = tls->sw_decrypt(tls, hdr, data, seqno, &trail_len);
+ if (error) {
+ counter_u64_add(ktls_offload_failed_crypto, 1);
+
+ SOCKBUF_LOCK(sb);
+ if (sb->sb_tlsdcc == 0) {
+ /*
+ * sbcut/drop/flush discarded these
+ * mbufs.
+ */
+ m_freem(data);
+ break;
+ }
+
+ /*
+ * Drop this TLS record's data, but keep
+ * decrypting subsequent records.
+ */
+ sb->sb_ccc -= tls_len;
+ sb->sb_tlsdcc = 0;
+
+ CURVNET_SET(so->so_vnet);
+ so->so_error = EBADMSG;
+ sorwakeup_locked(so);
+ CURVNET_RESTORE();
+
+ m_freem(data);
+
+ SOCKBUF_LOCK(sb);
+ continue;
+ }
+
+ /* Allocate the control mbuf. */
+ tgr.tls_type = hdr->tls_type;
+ tgr.tls_vmajor = hdr->tls_vmajor;
+ tgr.tls_vminor = hdr->tls_vminor;
+ tgr.tls_length = htobe16(tls_len - tls->params.tls_hlen -
+ trail_len);
+ control = sbcreatecontrol_how(&tgr, sizeof(tgr),
+ TLS_GET_RECORD, IPPROTO_TCP, M_WAITOK);
+
+ SOCKBUF_LOCK(sb);
+ if (sb->sb_tlsdcc == 0) {
+ /* sbcut/drop/flush discarded these mbufs. */
+ MPASS(sb->sb_tlscc == 0);
+ m_freem(data);
+ m_freem(control);
+ break;
+ }
+
+ /*
+ * Clear the 'dcc' accounting in preparation for
+ * adding the decrypted record.
+ */
+ sb->sb_ccc -= tls_len;
+ sb->sb_tlsdcc = 0;
+ SBCHECK(sb);
+
+ /* If there is no payload, drop all of the data. */
+ if (tgr.tls_length == htobe16(0)) {
+ m_freem(data);
+ data = NULL;
+ } else {
+ /* Trim header. */
+ remain = tls->params.tls_hlen;
+ while (remain > 0) {
+ if (data->m_len > remain) {
+ data->m_data += remain;
+ data->m_len -= remain;
+ break;
+ }
+ remain -= data->m_len;
+ data = m_free(data);
+ }
+
+ /* Trim trailer and clear M_NOTREADY. */
+ remain = be16toh(tgr.tls_length);
+ m = data;
+ for (m = data; remain > m->m_len; m = m->m_next) {
+ m->m_flags &= ~M_NOTREADY;
+ remain -= m->m_len;
+ }
+ m->m_len = remain;
+ m_freem(m->m_next);
+ m->m_next = NULL;
+ m->m_flags &= ~M_NOTREADY;
+
+ /* Set EOR on the final mbuf. */
+ m->m_flags |= M_EOR;
+ }
+
+ sbappendcontrol_locked(sb, data, control, 0);
+ }
+
+ sb->sb_flags &= ~SB_TLS_RX_RUNNING;
+
+ if ((sb->sb_state & SBS_CANTRCVMORE) != 0 && sb->sb_tlscc > 0)
+ so->so_error = EMSGSIZE;
+
+ sorwakeup_locked(so);
+
+deref:
+ SOCKBUF_UNLOCK_ASSERT(sb);
+
+ CURVNET_SET(so->so_vnet);
+ SOCK_LOCK(so);
+ sorele(so);
+ CURVNET_RESTORE();
+}
+
+void
ktls_enqueue_to_free(struct mbuf *m)
{
struct ktls_wq *wq;
@@ -1431,7 +1849,7 @@
m->m_epg_flags |= EPG_FLAG_2FREE;
wq = &ktls_wq[m->m_epg_tls->wq_index];
mtx_lock(&wq->mtx);
- STAILQ_INSERT_TAIL(&wq->head, m, m_epg_stailq);
+ STAILQ_INSERT_TAIL(&wq->m_head, m, m_epg_stailq);
running = wq->running;
mtx_unlock(&wq->mtx);
if (!running)
@@ -1461,12 +1879,12 @@
wq = &ktls_wq[m->m_epg_tls->wq_index];
mtx_lock(&wq->mtx);
- STAILQ_INSERT_TAIL(&wq->head, m, m_epg_stailq);
+ STAILQ_INSERT_TAIL(&wq->m_head, m, m_epg_stailq);
running = wq->running;
mtx_unlock(&wq->mtx);
if (!running)
wakeup(wq);
- counter_u64_add(ktls_cnt_on, 1);
+ counter_u64_add(ktls_cnt_tx_queued, 1);
}
static __noinline void
@@ -1618,31 +2036,41 @@
{
struct ktls_wq *wq = ctx;
struct mbuf *m, *n;
- STAILQ_HEAD(, mbuf) local_head;
+ struct socket *so, *son;
+ STAILQ_HEAD(, mbuf) local_m_head;
+ STAILQ_HEAD(, socket) local_so_head;
#if defined(__aarch64__) || defined(__amd64__) || defined(__i386__)
fpu_kern_thread(0);
#endif
for (;;) {
mtx_lock(&wq->mtx);
- while (STAILQ_EMPTY(&wq->head)) {
+ while (STAILQ_EMPTY(&wq->m_head) &&
+ STAILQ_EMPTY(&wq->so_head)) {
wq->running = false;
mtx_sleep(wq, &wq->mtx, 0, "-", 0);
wq->running = true;
}
- STAILQ_INIT(&local_head);
- STAILQ_CONCAT(&local_head, &wq->head);
+ STAILQ_INIT(&local_m_head);
+ STAILQ_CONCAT(&local_m_head, &wq->m_head);
+ STAILQ_INIT(&local_so_head);
+ STAILQ_CONCAT(&local_so_head, &wq->so_head);
mtx_unlock(&wq->mtx);
- STAILQ_FOREACH_SAFE(m, &local_head, m_epg_stailq, n) {
+ STAILQ_FOREACH_SAFE(m, &local_m_head, m_epg_stailq, n) {
if (m->m_epg_flags & EPG_FLAG_2FREE) {
ktls_free(m->m_epg_tls);
uma_zfree(zone_mbuf, m);
} else {
ktls_encrypt(m);
- counter_u64_add(ktls_cnt_on, -1);
+ counter_u64_add(ktls_cnt_tx_queued, -1);
}
+ }
+
+ STAILQ_FOREACH_SAFE(so, &local_so_head, so_ktls_rx_list, son) {
+ ktls_decrypt(so);
+ counter_u64_add(ktls_cnt_rx_queued, -1);
}
}
}
Index: head/sys/kern/uipc_sockbuf.c
===================================================================
--- head/sys/kern/uipc_sockbuf.c
+++ head/sys/kern/uipc_sockbuf.c
@@ -70,6 +70,8 @@
static u_long sb_efficiency = 8; /* parameter for sbreserve() */
+static void sbcompress_ktls_rx(struct sockbuf *sb, struct mbuf *m,
+ struct mbuf *n);
static struct mbuf *sbcut_internal(struct sockbuf *sb, int len);
static void sbflush_internal(struct sockbuf *sb);
@@ -334,7 +336,52 @@
sb->sb_sndptroff -= m->m_len;
}
+#ifdef KERN_TLS
/*
+ * Similar to sballoc/sbfree but does not adjust state associated with
+ * the sb_mb chain such as sb_fnrdy or sb_sndptr*. Also assumes mbufs
+ * are not ready.
+ */
+void
+sballoc_ktls_rx(struct sockbuf *sb, struct mbuf *m)
+{
+
+ SOCKBUF_LOCK_ASSERT(sb);
+
+ sb->sb_ccc += m->m_len;
+ sb->sb_tlscc += m->m_len;
+
+ sb->sb_mbcnt += MSIZE;
+ sb->sb_mcnt += 1;
+
+ if (m->m_flags & M_EXT) {
+ sb->sb_mbcnt += m->m_ext.ext_size;
+ sb->sb_ccnt += 1;
+ }
+}
+
+void
+sbfree_ktls_rx(struct sockbuf *sb, struct mbuf *m)
+{
+
+#if 0 /* XXX: not yet: soclose() call path comes here w/o lock. */
+ SOCKBUF_LOCK_ASSERT(sb);
+#endif
+
+ sb->sb_ccc -= m->m_len;
+ sb->sb_tlscc -= m->m_len;
+
+ sb->sb_mbcnt -= MSIZE;
+ sb->sb_mcnt -= 1;
+
+ if (m->m_flags & M_EXT) {
+ sb->sb_mbcnt -= m->m_ext.ext_size;
+ sb->sb_ccnt -= 1;
+ }
+}
+#endif
+
+/*
* Socantsendmore indicates that no more data will be sent on the socket; it
* would normally be applied to a socket when the user informs the system
* that no more data is to be sent, by the protocol code (in case
@@ -370,6 +417,10 @@
SOCKBUF_LOCK_ASSERT(&so->so_rcv);
so->so_rcv.sb_state |= SBS_CANTRCVMORE;
+#ifdef KERN_TLS
+ if (so->so_rcv.sb_flags & SB_TLS_RX)
+ ktls_check_rx(&so->so_rcv);
+#endif
sorwakeup_locked(so);
mtx_assert(SOCKBUF_MTX(&so->so_rcv), MA_NOTOWNED);
}
@@ -770,6 +821,24 @@
}
panic("%s from %s:%u", __func__, file, line);
}
+
+#ifdef KERN_TLS
+ m = sb->sb_mtls;
+ while (m && m->m_next)
+ m = m->m_next;
+
+ if (m != sb->sb_mtlstail) {
+ printf("%s: sb_mtls %p sb_mtlstail %p last %p\n",
+ __func__, sb->sb_mtls, sb->sb_mtlstail, m);
+ printf("TLS packet tree:\n");
+ printf("\t");
+ for (m = sb->sb_mtls; m != NULL; m = m->m_next) {
+ printf("%p ", m);
+ }
+ printf("\n");
+ panic("%s from %s:%u", __func__, file, line);
+ }
+#endif
}
#endif /* SOCKBUF_DEBUG */
@@ -847,7 +916,30 @@
SOCKBUF_UNLOCK(sb);
}
+#ifdef KERN_TLS
/*
+ * Append an mbuf containing encrypted TLS data. The data
+ * is marked M_NOTREADY until it has been decrypted and
+ * stored as a TLS record.
+ */
+static void
+sbappend_ktls_rx(struct sockbuf *sb, struct mbuf *m)
+{
+ struct mbuf *n;
+
+ SBLASTMBUFCHK(sb);
+
+ /* Remove all packet headers and mbuf tags to get a pure data chain. */
+ m_demote(m, 1, 0);
+
+ for (n = m; n != NULL; n = n->m_next)
+ n->m_flags |= M_NOTREADY;
+ sbcompress_ktls_rx(sb, m, sb->sb_mtlstail);
+ ktls_check_rx(sb);
+}
+#endif
+
+/*
* This version of sbappend() should only be used when the caller absolutely
* knows that there will never be more than one record in the socket buffer,
* that is, a stream protocol (such as TCP).
@@ -858,6 +950,19 @@
SOCKBUF_LOCK_ASSERT(sb);
KASSERT(m->m_nextpkt == NULL,("sbappendstream 0"));
+
+#ifdef KERN_TLS
+ /*
+ * Decrypted TLS records are appended as records via
+ * sbappendrecord(). TCP passes encrypted TLS records to this
+ * function which must be scheduled for decryption.
+ */
+ if (sb->sb_flags & SB_TLS_RX) {
+ sbappend_ktls_rx(sb, m);
+ return;
+ }
+#endif
+
KASSERT(sb->sb_mb == sb->sb_lastrecord,("sbappendstream 1"));
SBLASTMBUFCHK(sb);
@@ -896,6 +1001,9 @@
{
struct mbuf *m, *n, *fnrdy;
u_long acc, ccc, mbcnt;
+#ifdef KERN_TLS
+ u_long tlscc;
+#endif
SOCKBUF_LOCK_ASSERT(sb);
@@ -931,9 +1039,46 @@
mbcnt += m->m_ext.ext_size;
}
}
+#ifdef KERN_TLS
+ /*
+ * Account for mbufs "detached" by ktls_detach_record() while
+ * they are decrypted by ktls_decrypt(). tlsdcc gives a count
+ * of the detached bytes that are included in ccc. The mbufs
+ * and clusters are not included in the socket buffer
+ * accounting.
+ */
+ ccc += sb->sb_tlsdcc;
+
+ tlscc = 0;
+ for (m = sb->sb_mtls; m; m = m->m_next) {
+ if (m->m_nextpkt != NULL) {
+ printf("sb %p TLS mbuf %p with nextpkt\n", sb, m);
+ goto fail;
+ }
+ if ((m->m_flags & M_NOTREADY) == 0) {
+ printf("sb %p TLS mbuf %p ready\n", sb, m);
+ goto fail;
+ }
+ tlscc += m->m_len;
+ ccc += m->m_len;
+ mbcnt += MSIZE;
+ if (m->m_flags & M_EXT) /*XXX*/ /* pretty sure this is bogus */
+ mbcnt += m->m_ext.ext_size;
+ }
+
+ if (sb->sb_tlscc != tlscc) {
+ printf("tlscc %ld/%u dcc %u\n", tlscc, sb->sb_tlscc,
+ sb->sb_tlsdcc);
+ goto fail;
+ }
+#endif
if (acc != sb->sb_acc || ccc != sb->sb_ccc || mbcnt != sb->sb_mbcnt) {
printf("acc %ld/%u ccc %ld/%u mbcnt %ld/%u\n",
acc, sb->sb_acc, ccc, sb->sb_ccc, mbcnt, sb->sb_mbcnt);
+#ifdef KERN_TLS
+ printf("tlscc %ld/%u dcc %u\n", tlscc, sb->sb_tlscc,
+ sb->sb_tlsdcc);
+#endif
goto fail;
}
return;
@@ -1209,14 +1354,72 @@
SBLASTMBUFCHK(sb);
}
+#ifdef KERN_TLS
/*
+ * A version of sbcompress() for encrypted TLS RX mbufs. These mbufs
+ * are appended to the 'sb_mtls' chain instead of 'sb_mb' and are also
+ * a bit simpler (no EOR markers, always MT_DATA, etc.).
+ */
+static void
+sbcompress_ktls_rx(struct sockbuf *sb, struct mbuf *m, struct mbuf *n)
+{
+
+ SOCKBUF_LOCK_ASSERT(sb);
+
+ while (m) {
+ KASSERT((m->m_flags & M_EOR) == 0,
+ ("TLS RX mbuf %p with EOR", m));
+ KASSERT(m->m_type == MT_DATA,
+ ("TLS RX mbuf %p is not MT_DATA", m));
+ KASSERT((m->m_flags & M_NOTREADY) != 0,
+ ("TLS RX mbuf %p ready", m));
+ KASSERT((m->m_flags & M_EXTPG) == 0,
+ ("TLS RX mbuf %p unmapped", m));
+
+ if (m->m_len == 0) {
+ m = m_free(m);
+ continue;
+ }
+
+ /*
+ * Even though both 'n' and 'm' are NOTREADY, it's ok
+ * to coalesce the data.
+ */
+ if (n &&
+ M_WRITABLE(n) &&
+ ((sb->sb_flags & SB_NOCOALESCE) == 0) &&
+ !(n->m_flags & (M_EXTPG)) &&
+ m->m_len <= MCLBYTES / 4 && /* XXX: Don't copy too much */
+ m->m_len <= M_TRAILINGSPACE(n)) {
+ m_copydata(m, 0, m->m_len, mtodo(n, n->m_len));
+ n->m_len += m->m_len;
+ sb->sb_ccc += m->m_len;
+ sb->sb_tlscc += m->m_len;
+ m = m_free(m);
+ continue;
+ }
+ if (n)
+ n->m_next = m;
+ else
+ sb->sb_mtls = m;
+ sb->sb_mtlstail = m;
+ sballoc_ktls_rx(sb, m);
+ n = m;
+ m = m->m_next;
+ n->m_next = NULL;
+ }
+ SBLASTMBUFCHK(sb);
+}
+#endif
+
+/*
* Free all mbufs in a sockbuf. Check that all resources are reclaimed.
*/
static void
sbflush_internal(struct sockbuf *sb)
{
- while (sb->sb_mbcnt) {
+ while (sb->sb_mbcnt || sb->sb_tlsdcc) {
/*
* Don't call sbcut(sb, 0) if the leading mbuf is non-empty:
* we would loop forever. Panic instead.
@@ -1254,6 +1457,7 @@
sbcut_internal(struct sockbuf *sb, int len)
{
struct mbuf *m, *next, *mfree;
+ bool is_tls;
KASSERT(len >= 0, ("%s: len is %d but it is supposed to be >= 0",
__func__, len));
@@ -1261,10 +1465,25 @@
__func__, len, sb->sb_ccc));
next = (m = sb->sb_mb) ? m->m_nextpkt : 0;
+ is_tls = false;
mfree = NULL;
while (len > 0) {
if (m == NULL) {
+#ifdef KERN_TLS
+ if (next == NULL && !is_tls) {
+ if (sb->sb_tlsdcc != 0) {
+ MPASS(len >= sb->sb_tlsdcc);
+ len -= sb->sb_tlsdcc;
+ sb->sb_ccc -= sb->sb_tlsdcc;
+ sb->sb_tlsdcc = 0;
+ if (len == 0)
+ break;
+ }
+ next = sb->sb_mtls;
+ is_tls = true;
+ }
+#endif
KASSERT(next, ("%s: no next, len %d", __func__, len));
m = next;
next = m->m_nextpkt;
@@ -1283,12 +1502,17 @@
break;
}
len -= m->m_len;
- sbfree(sb, m);
+#ifdef KERN_TLS
+ if (is_tls)
+ sbfree_ktls_rx(sb, m);
+ else
+#endif
+ sbfree(sb, m);
/*
* Do not put M_NOTREADY buffers to the free list, they
* are referenced from outside.
*/
- if (m->m_flags & M_NOTREADY)
+ if (m->m_flags & M_NOTREADY && !is_tls)
m = m->m_next;
else {
struct mbuf *n;
@@ -1314,6 +1538,14 @@
mfree = m;
m = n;
}
+#ifdef KERN_TLS
+ if (is_tls) {
+ sb->sb_mb = NULL;
+ sb->sb_mtls = m;
+ if (m == NULL)
+ sb->sb_mtlstail = NULL;
+ } else
+#endif
if (m) {
sb->sb_mb = m;
m->m_nextpkt = next;
@@ -1489,17 +1721,18 @@
* type for presentation on a socket buffer.
*/
struct mbuf *
-sbcreatecontrol(caddr_t p, int size, int type, int level)
+sbcreatecontrol_how(void *p, int size, int type, int level, int wait)
{
struct cmsghdr *cp;
struct mbuf *m;
+ MBUF_CHECKSLEEP(wait);
if (CMSG_SPACE((u_int)size) > MCLBYTES)
return ((struct mbuf *) NULL);
if (CMSG_SPACE((u_int)size) > MLEN)
- m = m_getcl(M_NOWAIT, MT_CONTROL, 0);
+ m = m_getcl(wait, MT_CONTROL, 0);
else
- m = m_get(M_NOWAIT, MT_CONTROL);
+ m = m_get(wait, MT_CONTROL);
if (m == NULL)
return ((struct mbuf *) NULL);
cp = mtod(m, struct cmsghdr *);
@@ -1518,6 +1751,13 @@
cp->cmsg_level = level;
cp->cmsg_type = type;
return (m);
+}
+
+struct mbuf *
+sbcreatecontrol(caddr_t p, int size, int type, int level)
+{
+
+ return (sbcreatecontrol_how(p, size, type, level, M_NOWAIT));
}
/*
Index: head/sys/kern/uipc_socket.c
===================================================================
--- head/sys/kern/uipc_socket.c
+++ head/sys/kern/uipc_socket.c
@@ -1965,7 +1965,8 @@
}
SOCKBUF_LOCK_ASSERT(&so->so_rcv);
if (so->so_rcv.sb_state & SBS_CANTRCVMORE) {
- if (m == NULL) {
+ if (m == NULL && so->so_rcv.sb_tlsdcc == 0 &&
+ so->so_rcv.sb_tlscc == 0) {
SOCKBUF_UNLOCK(&so->so_rcv);
goto release;
} else
Index: head/sys/opencrypto/ktls_ocf.c
===================================================================
--- head/sys/opencrypto/ktls_ocf.c
+++ head/sys/opencrypto/ktls_ocf.c
@@ -223,6 +223,56 @@
}
static int
+ktls_ocf_tls12_gcm_decrypt(struct ktls_session *tls,
+ const struct tls_record_layer *hdr, struct mbuf *m, uint64_t seqno,
+ int *trailer_len)
+{
+ struct tls_aead_data ad;
+ struct cryptop crp;
+ struct ocf_session *os;
+ struct ocf_operation oo;
+ int error;
+ uint16_t tls_comp_len;
+
+ os = tls->cipher;
+
+ oo.os = os;
+ oo.done = false;
+
+ crypto_initreq(&crp, os->sid);
+
+ /* Setup the IV. */
+ memcpy(crp.crp_iv, tls->params.iv, TLS_AEAD_GCM_LEN);
+ memcpy(crp.crp_iv + TLS_AEAD_GCM_LEN, hdr + 1, sizeof(uint64_t));
+
+ /* Setup the AAD. */
+ tls_comp_len = ntohs(hdr->tls_length) -
+ (AES_GMAC_HASH_LEN + sizeof(uint64_t));
+ ad.seq = htobe64(seqno);
+ ad.type = hdr->tls_type;
+ ad.tls_vmajor = hdr->tls_vmajor;
+ ad.tls_vminor = hdr->tls_vminor;
+ ad.tls_length = htons(tls_comp_len);
+ crp.crp_aad = &ad;
+ crp.crp_aad_length = sizeof(ad);
+
+ crp.crp_payload_start = tls->params.tls_hlen;
+ crp.crp_payload_length = tls_comp_len;
+ crp.crp_digest_start = crp.crp_payload_start + crp.crp_payload_length;
+
+ crp.crp_op = CRYPTO_OP_DECRYPT | CRYPTO_OP_VERIFY_DIGEST;
+ crp.crp_flags = CRYPTO_F_CBIMM | CRYPTO_F_IV_SEPARATE;
+ crypto_use_mbuf(&crp, m);
+
+ counter_u64_add(ocf_tls12_gcm_crypts, 1);
+ error = ktls_ocf_dispatch(os, &crp);
+
+ crypto_destroyreq(&crp);
+ *trailer_len = AES_GMAC_HASH_LEN;
+ return (error);
+}
+
+static int
ktls_ocf_tls13_gcm_encrypt(struct ktls_session *tls,
const struct tls_record_layer *hdr, uint8_t *trailer, struct iovec *iniov,
struct iovec *outiov, int iovcnt, uint64_t seqno, uint8_t record_type)
@@ -325,7 +375,7 @@
}
static int
-ktls_ocf_try(struct socket *so, struct ktls_session *tls)
+ktls_ocf_try(struct socket *so, struct ktls_session *tls, int direction)
{
struct crypto_session_params csp;
struct ocf_session *os;
@@ -359,6 +409,11 @@
tls->params.tls_vminor > TLS_MINOR_VER_THREE)
return (EPROTONOSUPPORT);
+ /* TLS 1.3 is not yet supported for receive. */
+ if (direction == KTLS_RX &&
+ tls->params.tls_vminor == TLS_MINOR_VER_THREE)
+ return (EPROTONOSUPPORT);
+
os = malloc(sizeof(*os), M_KTLS_OCF, M_NOWAIT | M_ZERO);
if (os == NULL)
return (ENOMEM);
@@ -372,10 +427,14 @@
mtx_init(&os->lock, "ktls_ocf", NULL, MTX_DEF);
tls->cipher = os;
- if (tls->params.tls_vminor == TLS_MINOR_VER_THREE)
- tls->sw_encrypt = ktls_ocf_tls13_gcm_encrypt;
- else
- tls->sw_encrypt = ktls_ocf_tls12_gcm_encrypt;
+ if (direction == KTLS_TX) {
+ if (tls->params.tls_vminor == TLS_MINOR_VER_THREE)
+ tls->sw_encrypt = ktls_ocf_tls13_gcm_encrypt;
+ else
+ tls->sw_encrypt = ktls_ocf_tls12_gcm_encrypt;
+ } else {
+ tls->sw_decrypt = ktls_ocf_tls12_gcm_decrypt;
+ }
tls->free = ktls_ocf_free;
return (0);
}
Index: head/sys/sys/ktls.h
===================================================================
--- head/sys/sys/ktls.h
+++ head/sys/sys/ktls.h
@@ -163,7 +163,7 @@
#define KTLS_TX 1
#define KTLS_RX 2
-#define KTLS_API_VERSION 6
+#define KTLS_API_VERSION 7
struct iovec;
struct ktls_session;
@@ -174,7 +174,7 @@
struct ktls_crypto_backend {
LIST_ENTRY(ktls_crypto_backend) next;
- int (*try)(struct socket *so, struct ktls_session *tls);
+ int (*try)(struct socket *so, struct ktls_session *tls, int direction);
int prio;
int api_version;
int use_count;
@@ -182,11 +182,16 @@
};
struct ktls_session {
- int (*sw_encrypt)(struct ktls_session *tls,
- const struct tls_record_layer *hdr, uint8_t *trailer,
- struct iovec *src, struct iovec *dst, int iovcnt,
- uint64_t seqno, uint8_t record_type);
union {
+ int (*sw_encrypt)(struct ktls_session *tls,
+ const struct tls_record_layer *hdr, uint8_t *trailer,
+ struct iovec *src, struct iovec *dst, int iovcnt,
+ uint64_t seqno, uint8_t record_type);
+ int (*sw_decrypt)(struct ktls_session *tls,
+ const struct tls_record_layer *hdr, struct mbuf *m,
+ uint64_t seqno, int *trailer_len);
+ };
+ union {
void *cipher;
struct m_snd_tag *snd_tag;
};
@@ -202,6 +207,7 @@
bool reset_pending;
} __aligned(CACHE_LINE_SIZE);
+void ktls_check_rx(struct sockbuf *sb);
int ktls_crypto_backend_register(struct ktls_crypto_backend *be);
int ktls_crypto_backend_deregister(struct ktls_crypto_backend *be);
int ktls_enable_rx(struct socket *so, struct tls_enable *en);
Index: head/sys/sys/sockbuf.h
===================================================================
--- head/sys/sys/sockbuf.h
+++ head/sys/sys/sockbuf.h
@@ -38,6 +38,8 @@
/*
* Constants for sb_flags field of struct sockbuf/xsockbuf.
*/
+#define SB_TLS_RX 0x01 /* using KTLS on RX */
+#define SB_TLS_RX_RUNNING 0x02 /* KTLS RX operation running */
#define SB_WAIT 0x04 /* someone is waiting for data/space */
#define SB_SEL 0x08 /* someone is selecting */
#define SB_ASYNC 0x10 /* ASYNC I/O, need signals */
@@ -99,10 +101,14 @@
u_int sb_ccnt; /* (a) number of clusters in buffer */
u_int sb_mbmax; /* (a) max chars of mbufs to use */
u_int sb_ctl; /* (a) non-data chars in buffer */
+ u_int sb_tlscc; /* (a) TLS chain characters */
+ u_int sb_tlsdcc; /* (a) TLS characters being decrypted */
int sb_lowat; /* (a) low water mark */
sbintime_t sb_timeo; /* (a) timeout for read/write */
uint64_t sb_tls_seqno; /* (a) TLS seqno */
struct ktls_session *sb_tls_info; /* (a + b) TLS state */
+ struct mbuf *sb_mtls; /* (a) TLS mbuf chain */
+ struct mbuf *sb_mtlstail; /* (a) last mbuf in TLS chain */
short sb_flags; /* (a) flags, see above */
int (*sb_upcall)(struct socket *, void *, int); /* (a) */
void *sb_upcallarg; /* (a) */
@@ -153,6 +159,9 @@
void sbcompress(struct sockbuf *sb, struct mbuf *m, struct mbuf *n);
struct mbuf *
sbcreatecontrol(caddr_t p, int size, int type, int level);
+struct mbuf *
+ sbcreatecontrol_how(void *p, int size, int type, int level,
+ int wait);
void sbdestroy(struct sockbuf *sb, struct socket *so);
void sbdrop(struct sockbuf *sb, int len);
void sbdrop_locked(struct sockbuf *sb, int len);
@@ -178,6 +187,8 @@
void sbunlock(struct sockbuf *sb);
void sballoc(struct sockbuf *, struct mbuf *);
void sbfree(struct sockbuf *, struct mbuf *);
+void sballoc_ktls_rx(struct sockbuf *sb, struct mbuf *m);
+void sbfree_ktls_rx(struct sockbuf *sb, struct mbuf *m);
int sbready(struct sockbuf *, struct mbuf *, int);
/*
Index: head/sys/sys/socketvar.h
===================================================================
--- head/sys/sys/socketvar.h
+++ head/sys/sys/socketvar.h
@@ -83,6 +83,7 @@
* (f) not locked since integer reads/writes are atomic.
* (g) used only as a sleep/wakeup address, no value.
* (h) locked by global mutex so_global_mtx.
+ * (k) locked by KTLS workqueue mutex
*/
TAILQ_HEAD(accept_queue, socket);
struct socket {
@@ -132,6 +133,9 @@
/* (b) cached MAC label for peer */
struct label *so_peerlabel;
u_long so_oobmark; /* chars to oob mark */
+
+ /* (k) Our place on KTLS RX work queue. */
+ STAILQ_ENTRY(socket) so_ktls_rx_list;
};
/*
* Listening socket, where accepts occur, is so_listen in all
File Metadata
Details
Attached
Mime Type
text/plain
Expires
Sat, Jan 18, 2:12 AM (17 h, 46 m)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
15848886
Default Alt Text
D24628.diff (34 KB)
Attached To
Mode
D24628: Add support for KTLS RX via software decryption.
Attached
Detach File
Event Timeline
Log In to Comment