diff --git a/sys/kern/uipc_ktls.c b/sys/kern/uipc_ktls.c --- a/sys/kern/uipc_ktls.c +++ b/sys/kern/uipc_ktls.c @@ -299,7 +299,7 @@ static void ktls_cleanup(struct ktls_session *tls); #if defined(INET) || defined(INET6) -static void ktls_reset_send_tag(void *context, int pending); +static void ktls_reset_send_receive_tag(void *context, int pending); #endif static void ktls_work_thread(void *ctx); static void ktls_alloc_thread(void *ctx); @@ -503,7 +503,7 @@ #if defined(INET) || defined(INET6) static int ktls_create_session(struct socket *so, struct tls_enable *en, - struct ktls_session **tlsp) + struct ktls_session **tlsp, int direction) { struct ktls_session *tls; int error; @@ -608,9 +608,10 @@ counter_u64_add(ktls_offload_active, 1); refcount_init(&tls->refcount, 1); - TASK_INIT(&tls->reset_tag_task, 0, ktls_reset_send_tag, tls); + TASK_INIT(&tls->reset_tag_task, 0, ktls_reset_send_receive_tag, tls); tls->wq_index = ktls_get_cpu(so); + tls->direction = direction; tls->params.cipher_algorithm = en->cipher_algorithm; tls->params.auth_algorithm = en->auth_algorithm; @@ -743,11 +744,12 @@ counter_u64_add(ktls_offload_active, 1); refcount_init(&tls_new->refcount, 1); - TASK_INIT(&tls_new->reset_tag_task, 0, ktls_reset_send_tag, tls_new); + TASK_INIT(&tls_new->reset_tag_task, 0, ktls_reset_send_receive_tag, tls_new); /* Copy fields from existing session. */ tls_new->params = tls->params; tls_new->wq_index = tls->wq_index; + tls_new->direction = tls->direction; /* Deep copy keys. */ if (tls_new->params.auth_key != NULL) { @@ -797,8 +799,8 @@ counter_u64_add(ktls_ifnet_chacha20, -1); break; } - if (tls->snd_tag != NULL) - m_snd_tag_rele(tls->snd_tag); + if (tls->snd_rcv_tag != NULL) + m_snd_tag_rele(tls->snd_rcv_tag); break; #ifdef TCP_OFFLOAD case TCP_TLS_MODE_TOE: @@ -980,28 +982,135 @@ return (error); } +/* + * Common code for allocating a TLS receive tag for doing HW + * decryption of TLS data. + * + * This function allocates a new TLS receive tag on whatever interface + * the connection is currently routed over. + */ static int -ktls_try_ifnet(struct socket *so, struct ktls_session *tls, bool force) +ktls_alloc_rcv_tag(struct inpcb *inp, struct ktls_session *tls, bool force, + struct m_snd_tag **mstp) { - struct m_snd_tag *mst; + union if_snd_tag_alloc_params params; + struct ifnet *ifp; + struct nhop_object *nh; + struct tcpcb *tp; int error; - error = ktls_alloc_snd_tag(so->so_pcb, tls, force, &mst); - if (error == 0) { - tls->mode = TCP_TLS_MODE_IFNET; - tls->snd_tag = mst; - switch (tls->params.cipher_algorithm) { - case CRYPTO_AES_CBC: - counter_u64_add(ktls_ifnet_cbc, 1); - break; - case CRYPTO_AES_NIST_GCM_16: - counter_u64_add(ktls_ifnet_gcm, 1); - break; - case CRYPTO_CHACHA20_POLY1305: - counter_u64_add(ktls_ifnet_chacha20, 1); - break; + INP_RLOCK(inp); + if (inp->inp_flags2 & INP_FREED) { + INP_RUNLOCK(inp); + return (ECONNRESET); + } + if (inp->inp_flags & (INP_TIMEWAIT | INP_DROPPED)) { + INP_RUNLOCK(inp); + return (ECONNRESET); + } + if (inp->inp_socket == NULL) { + INP_RUNLOCK(inp); + return (ECONNRESET); + } + tp = intotcpcb(inp); + + /* + * Check administrative controls on ifnet TLS to determine if + * ifnet TLS should be denied. + * + * - Always permit 'force' requests. + * - ktls_ifnet_permitted == 0: always deny. + */ + if (!force && ktls_ifnet_permitted == 0) { + INP_RUNLOCK(inp); + return (ENXIO); + } + + /* + * XXX: Use the cached route in the inpcb to find the + * interface. This should perhaps instead use + * rtalloc1_fib(dst, 0, 0, fibnum). Since KTLS is only + * enabled after a connection has completed key negotiation in + * userland, the cached route will be present in practice. + */ + nh = inp->inp_route.ro_nh; + if (nh == NULL) { + INP_RUNLOCK(inp); + return (ENXIO); + } + ifp = nh->nh_ifp; + if_ref(ifp); + + params.hdr.type = IF_SND_TAG_TYPE_TLS_RX; + params.hdr.flowid = inp->inp_flowid; + params.hdr.flowtype = inp->inp_flowtype; + params.hdr.numa_domain = inp->inp_numa_domain; + params.tls_rx.inp = inp; + params.tls_rx.tls = tls; + + INP_RUNLOCK(inp); + + if ((ifp->if_capenable & IFCAP_MEXTPG) == 0) { + error = EOPNOTSUPP; + goto out; + } + + /* XXX reusing TXTLS flags */ + if (inp->inp_vflag & INP_IPV6) { + if ((ifp->if_capenable & IFCAP_TXTLS6) == 0) { + error = EOPNOTSUPP; + goto out; } + } else { + if ((ifp->if_capenable & IFCAP_TXTLS4) == 0) { + error = EOPNOTSUPP; + goto out; + } + } + error = m_snd_tag_alloc(ifp, ¶ms, mstp); +out: + if_rele(ifp); + return (error); +} + +static int +ktls_try_ifnet(struct socket *so, struct ktls_session *tls, int direction, bool force) +{ + struct m_snd_tag *mst; + int error; + + switch (direction) { + case KTLS_TX: + error = ktls_alloc_snd_tag(so->so_pcb, tls, force, &mst); + if (__predict_false(error != 0)) + goto done; + break; + case KTLS_RX: + error = ktls_alloc_rcv_tag(so->so_pcb, tls, force, &mst); + if (__predict_false(error != 0)) + goto done; + break; + default: + return (EINVAL); + } + + tls->mode = TCP_TLS_MODE_IFNET; + tls->snd_rcv_tag = mst; + + switch (tls->params.cipher_algorithm) { + case CRYPTO_AES_CBC: + counter_u64_add(ktls_ifnet_cbc, 1); + break; + case CRYPTO_AES_NIST_GCM_16: + counter_u64_add(ktls_ifnet_gcm, 1); + break; + case CRYPTO_CHACHA20_POLY1305: + counter_u64_add(ktls_ifnet_chacha20, 1); + break; + default: + break; } +done: return (error); } @@ -1187,7 +1296,7 @@ en->tls_vminor == TLS_MINOR_VER_THREE) return (ENOTSUP); - error = ktls_create_session(so, en, &tls); + error = ktls_create_session(so, en, &tls, KTLS_RX); if (error) return (error); @@ -1197,12 +1306,6 @@ return (error); } -#ifdef TCP_OFFLOAD - error = ktls_try_toe(so, tls, KTLS_RX); - if (error) -#endif - ktls_use_sw(tls); - /* Mark the socket as using TLS offload. */ SOCKBUF_LOCK(&so->so_rcv); so->so_rcv.sb_tls_seqno = be64dec(en->rec_seq); @@ -1216,6 +1319,15 @@ } SOCKBUF_UNLOCK(&so->so_rcv); + /* Prefer TOE -> ifnet TLS -> software TLS. */ +#ifdef TCP_OFFLOAD + error = ktls_try_toe(so, tls, KTLS_RX); + if (error) +#endif + error = ktls_try_ifnet(so, tls, KTLS_RX, false); + if (error) + ktls_use_sw(tls); + counter_u64_add(ktls_offload_total, 1); return (0); @@ -1256,7 +1368,7 @@ if (mb_use_ext_pgs == 0) return (ENXIO); - error = ktls_create_session(so, en, &tls); + error = ktls_create_session(so, en, &tls, KTLS_TX); if (error) return (error); @@ -1265,7 +1377,7 @@ error = ktls_try_toe(so, tls, KTLS_TX); if (error) #endif - error = ktls_try_ifnet(so, tls, false); + error = ktls_try_ifnet(so, tls, KTLS_TX, false); if (error) error = ktls_try_sw(so, tls, KTLS_TX); @@ -1321,6 +1433,39 @@ return (0); } +int +ktls_get_rx_sequence(struct inpcb *inp, uint32_t *tcpseq, uint64_t *tlsseq) +{ + struct socket *so; + struct tcpcb *tp; + + so = inp->inp_socket; + if (__predict_false(so == NULL)) + return (EINVAL); + + INP_RLOCK(inp); + if (inp->inp_flags2 & INP_FREED) { + INP_RUNLOCK(inp); + return (ECONNRESET); + } + if (inp->inp_flags & (INP_TIMEWAIT | INP_DROPPED)) { + INP_RUNLOCK(inp); + return (ECONNRESET); + } + + tp = intotcpcb(inp); + MPASS(tp != NULL); + + SOCKBUF_LOCK(&so->so_rcv); + *tcpseq = tp->rcv_nxt - so->so_rcv.sb_tlscc; + *tlsseq = so->so_rcv.sb_tls_seqno; + SOCKBUF_UNLOCK(&so->so_rcv); + + INP_RUNLOCK(inp); + + return (0); +} + int ktls_get_tx_mode(struct socket *so, int *modep) { @@ -1382,7 +1527,7 @@ tls_new = ktls_clone_session(tls); if (mode == TCP_TLS_MODE_IFNET) - error = ktls_try_ifnet(so, tls_new, true); + error = ktls_try_ifnet(so, tls_new, KTLS_TX, true); else error = ktls_try_sw(so, tls_new, KTLS_TX); if (error) { @@ -1441,19 +1586,21 @@ } /* - * Try to allocate a new TLS send tag. This task is scheduled when - * ip_output detects a route change while trying to transmit a packet - * holding a TLS record. If a new tag is allocated, replace the tag - * in the TLS session. Subsequent packets on the connection will use - * the new tag. If a new tag cannot be allocated, drop the - * connection. + * ktls_reset_send_receive_tag - try to allocate a new TLS send or receive tag. + * + * This task is scheduled when ip_output detects a route change while + * trying to transmit a packet holding a TLS record. If a new tag is + * allocated, replace the tag in the TLS session. Subsequent packets + * on the connection will use the new tag. If a new tag cannot be + * allocated, drop the connection. */ static void -ktls_reset_send_tag(void *context, int pending) +ktls_reset_send_receive_tag(void *context, int pending) { struct epoch_tracker et; struct ktls_session *tls; - struct m_snd_tag *old, *new; + struct m_snd_tag *snd_rcv_old; + struct m_snd_tag *snd_rcv_new; struct inpcb *inp; struct tcpcb *tp; int error; @@ -1469,72 +1616,81 @@ * an ifp mismatch and drop packets until a new tag is * allocated. * - * Write-lock the INP when changing tls->snd_tag since + * Write-lock the INP when changing tls->snd_rcv_tag since * ip[6]_output_send() holds a read-lock when reading the * pointer. */ INP_WLOCK(inp); - old = tls->snd_tag; - tls->snd_tag = NULL; + snd_rcv_old = tls->snd_rcv_tag; + tls->snd_rcv_tag = NULL; INP_WUNLOCK(inp); - if (old != NULL) - m_snd_tag_rele(old); - error = ktls_alloc_snd_tag(inp, tls, true, &new); + if (snd_rcv_old != NULL) + m_snd_tag_rele(snd_rcv_old); - if (error == 0) { - INP_WLOCK(inp); - tls->snd_tag = new; - mtx_pool_lock(mtxpool_sleep, tls); - tls->reset_pending = false; - mtx_pool_unlock(mtxpool_sleep, tls); - if (!in_pcbrele_wlocked(inp)) - INP_WUNLOCK(inp); + switch (tls->direction) { + case KTLS_TX: + error = ktls_alloc_snd_tag(inp, tls, true, &snd_rcv_new); + break; + case KTLS_RX: + error = ktls_alloc_rcv_tag(inp, tls, true, &snd_rcv_new); + break; + default: + goto drop_connection; + } + if (error != 0) + goto drop_connection; - counter_u64_add(ktls_ifnet_reset, 1); + INP_WLOCK(inp); + tls->snd_rcv_tag = snd_rcv_new; - /* - * XXX: Should we kick tcp_output explicitly now that - * the send tag is fixed or just rely on timers? - */ - } else { - NET_EPOCH_ENTER(et); - INP_WLOCK(inp); - if (!in_pcbrele_wlocked(inp)) { - if (!(inp->inp_flags & INP_TIMEWAIT) && - !(inp->inp_flags & INP_DROPPED)) { - tp = intotcpcb(inp); - CURVNET_SET(tp->t_vnet); - tp = tcp_drop(tp, ECONNABORTED); - CURVNET_RESTORE(); - if (tp != NULL) - INP_WUNLOCK(inp); - counter_u64_add(ktls_ifnet_reset_dropped, 1); - } else - INP_WUNLOCK(inp); - } - NET_EPOCH_EXIT(et); + mtx_pool_lock(mtxpool_sleep, tls); + tls->reset_pending = false; + mtx_pool_unlock(mtxpool_sleep, tls); - counter_u64_add(ktls_ifnet_reset_failed, 1); + if (!in_pcbrele_wlocked(inp)) + INP_WUNLOCK(inp); - /* - * Leave reset_pending true to avoid future tasks while - * the socket goes away. - */ - } + counter_u64_add(ktls_ifnet_reset, 1); ktls_free(tls); -} -int -ktls_output_eagain(struct inpcb *inp, struct ktls_session *tls) -{ + /* + * XXX: Should we kick tcp_output explicitly now that + * the send tag is fixed or just rely on timers? + */ + return; - if (inp == NULL) - return (ENOBUFS); +drop_connection: + NET_EPOCH_ENTER(et); + INP_WLOCK(inp); + if (!in_pcbrele_wlocked(inp)) { + if (!(inp->inp_flags & INP_TIMEWAIT) && + !(inp->inp_flags & INP_DROPPED)) { + tp = intotcpcb(inp); + CURVNET_SET(tp->t_vnet); + tp = tcp_drop(tp, ECONNABORTED); + CURVNET_RESTORE(); + if (tp != NULL) + INP_WUNLOCK(inp); + counter_u64_add(ktls_ifnet_reset_dropped, 1); + } else + INP_WUNLOCK(inp); + } + NET_EPOCH_EXIT(et); - INP_LOCK_ASSERT(inp); + counter_u64_add(ktls_ifnet_reset_failed, 1); + + /* + * Leave reset_pending true to avoid future tasks while + * the socket goes away. + */ + ktls_free(tls); +} +static void +ktls_output_eagain_tls(struct inpcb *inp, struct ktls_session *tls) +{ /* * See if we should schedule a task to update the send tag for * this session. @@ -1548,6 +1704,30 @@ taskqueue_enqueue(taskqueue_thread, &tls->reset_tag_task); } mtx_pool_unlock(mtxpool_sleep, tls); +} + +int +ktls_output_eagain(struct inpcb *inp) +{ + struct socket *so; + struct ktls_session *tls; + + if (__predict_false(inp == NULL)) + goto done; + INP_LOCK_ASSERT(inp); + + so = inp->inp_socket; + if (__predict_false(so == NULL)) + goto done; + + tls = so->so_rcv.sb_tls_info; + if (__predict_true(tls != NULL)) + ktls_output_eagain_tls(inp, tls); + + tls = so->so_snd.sb_tls_info; + if (__predict_true(tls != NULL)) + ktls_output_eagain_tls(inp, tls); +done: return (ENOBUFS); } @@ -1566,7 +1746,7 @@ MPASS(tls->mode == TCP_TLS_MODE_IFNET); - if (tls->snd_tag == NULL) { + if (tls->snd_rcv_tag == NULL) { /* * Resetting send tag, ignore this change. The * pending reset may or may not see this updated rate @@ -1576,10 +1756,11 @@ return (0); } - MPASS(tls->snd_tag != NULL); - MPASS(tls->snd_tag->sw->type == IF_SND_TAG_TYPE_TLS_RATE_LIMIT); + mst = tls->snd_rcv_tag; + + MPASS(mst != NULL); + MPASS(mst->sw->type == IF_SND_TAG_TYPE_TLS_RATE_LIMIT); - mst = tls->snd_tag; return (mst->sw->snd_tag_modify(mst, ¶ms)); } #endif @@ -1862,7 +2043,7 @@ return (NULL); } } - n->m_flags |= M_NOTREADY; + n->m_flags |= (m->m_flags & (M_NOTREADY | M_DECRYPTED)); /* Store remainder in 'n'. */ n->m_len = m->m_len - remain; @@ -1900,10 +2081,114 @@ return (top); } +/* + * Check if a mbuf chain is fully decrypted at the given offset and + * length. Returns -1 if all data is decrypted. 0 if there is a mix of + * encrypted and decrypted data. Else 1 if all data is encrypted. + */ +int +ktls_mbuf_crypto_state(struct mbuf *mb, int offset, int len) +{ + int m_flags_ored = 0; + int m_flags_anded = -1; + + for (; mb != NULL; mb = mb->m_next) { + if (offset < mb->m_len) + break; + offset -= mb->m_len; + } + offset += len; + + for (; mb != NULL; mb = mb->m_next) { + m_flags_ored |= mb->m_flags; + m_flags_anded &= mb->m_flags; + + if (offset <= mb->m_len) + break; + offset -= mb->m_len; + } + MPASS(mb != NULL || offset == 0); + + if ((m_flags_ored ^ m_flags_anded) & M_DECRYPTED) + return (0); /* mixed */ + else + return ((m_flags_ored & M_DECRYPTED) ? -1 : 1); +} + +/* + * ktls_resync_ifnet - get HW TLS RX back on track after packet loss + */ +static int +ktls_resync_ifnet(struct socket *so, uint32_t tls_len, uint64_t tls_rcd_num) +{ + union if_snd_tag_modify_params params; + struct m_snd_tag *mst; + struct inpcb *inp; + struct tcpcb *tp; + + mst = so->so_rcv.sb_tls_info->snd_rcv_tag; + if (__predict_false(mst == NULL)) + return (EINVAL); + + inp = sotoinpcb(so); + if (__predict_false(inp == NULL)) + return (EINVAL); + + INP_RLOCK(inp); + if (inp->inp_flags2 & INP_FREED) { + INP_RUNLOCK(inp); + return (ECONNRESET); + } + if (inp->inp_flags & (INP_TIMEWAIT | INP_DROPPED)) { + INP_RUNLOCK(inp); + return (ECONNRESET); + } + + tp = intotcpcb(inp); + MPASS(tp != NULL); + + /* Get the TCP sequence number of the next valid TLS header. */ + SOCKBUF_LOCK(&so->so_rcv); + params.tls_rx.tls_hdr_tcp_sn = + tp->rcv_nxt - so->so_rcv.sb_tlscc - tls_len; + params.tls_rx.tls_rec_length = tls_len; + params.tls_rx.tls_rec_number = tls_rcd_num; + SOCKBUF_UNLOCK(&so->so_rcv); + + INP_RUNLOCK(inp); + + MPASS(mst->sw->type == IF_SND_TAG_TYPE_TLS_RX); + return (mst->sw->snd_tag_modify(mst, ¶ms)); +} + +static struct mbuf * +ktls_dup_rx_chain(struct mbuf *m) +{ + struct mbuf *top; + struct mbuf **pp; + + MPASS(m != NULL); + + pp = ⊤ + + do { + *pp = m_get2(m->m_len, M_WAITOK, MT_DATA, M_NOTREADY); + if (m->m_flags & M_DECRYPTED) + m_copydata(m, 0, ((*pp)->m_len = m->m_len), mtod(*pp, caddr_t)); + else + memset(mtod(*pp, caddr_t), 0, ((*pp)->m_len = m->m_len)); + pp = &(*pp)->m_next; + } while ((m = m->m_next) != NULL); + + *pp = NULL; + + return (top); +} + static void ktls_decrypt(struct socket *so) { - char tls_header[MBUF_PEXT_HDR_LEN]; + char tls_header[MBUF_PEXT_HDR_LEN] __aligned(8); struct ktls_session *tls; struct sockbuf *sb; struct tls_record_layer *hdr; @@ -1911,6 +2196,7 @@ struct mbuf *control, *data, *m; uint64_t seqno; int error, remain, tls_len, trail_len; + int state; hdr = (struct tls_record_layer *)tls_header; sb = &so->so_rcv; @@ -1972,7 +2258,53 @@ SBCHECK(sb); SOCKBUF_UNLOCK(sb); - error = tls->sw_decrypt(tls, hdr, data, seqno, &trail_len); + /* get crypto state for this TLS record */ + state = ktls_mbuf_crypto_state(data, 0, tls_len); + + switch (state) { + struct mbuf **pp; + struct mbuf *m0; + struct mbuf *m1; + case 0: + pp = &data; + m0 = data; + m1 = ktls_dup_rx_chain(data); + + /* Perform XOR of crypto sequence. */ + error = tls->sw_recrypt(tls, hdr, m1, seqno); + if (__predict_false(error != 0)) { + m_freem(m1); + break; + } + + /* Reconstruct encrypted mbuf data. */ + while (m0 != NULL) { + if (m0->m_flags & M_DECRYPTED) { + *pp = m1; + pp = &(*pp)->m_next; + m0 = m_free(m0); + m1 = m1->m_next; + } else { + *pp = m0; + pp = &(*pp)->m_next; + m0 = m0->m_next; + m1 = m_free(m1); + } + } + *pp = NULL; + + MPASS(m1 == NULL); + + /* FALLTHROUGH */ + case 1: + error = tls->sw_decrypt(tls, hdr, data, seqno, &trail_len); + break; + default: + error = 0; + trail_len = tls->params.tls_tlen; /* XXX */ + break; + } + if (error) { counter_u64_add(ktls_offload_failed_crypto, 1); @@ -2051,19 +2383,31 @@ remain = be16toh(tgr.tls_length); m = data; for (m = data; remain > m->m_len; m = m->m_next) { - m->m_flags &= ~M_NOTREADY; + m->m_flags &= ~(M_NOTREADY | M_DECRYPTED); remain -= m->m_len; } m->m_len = remain; m_freem(m->m_next); m->m_next = NULL; - m->m_flags &= ~M_NOTREADY; + m->m_flags &= ~(M_NOTREADY | M_DECRYPTED); /* Set EOR on the final mbuf. */ m->m_flags |= M_EOR; } sbappendcontrol_locked(sb, data, control, 0); + + if (__predict_false(state != -1)) { + sb->sb_flags |= SB_TLS_RX_RESYNC; + SOCKBUF_UNLOCK(sb); + ktls_resync_ifnet(so, tls_len, seqno); + SOCKBUF_LOCK(sb); + } else if (__predict_false(sb->sb_flags & SB_TLS_RX_RESYNC)) { + sb->sb_flags &= ~SB_TLS_RX_RESYNC; + SOCKBUF_UNLOCK(sb); + ktls_resync_ifnet(so, 0, seqno); + SOCKBUF_LOCK(sb); + } } sb->sb_flags &= ~SB_TLS_RX_RUNNING; diff --git a/sys/kern/uipc_mbuf.c b/sys/kern/uipc_mbuf.c --- a/sys/kern/uipc_mbuf.c +++ b/sys/kern/uipc_mbuf.c @@ -278,13 +278,14 @@ { struct mbuf *m; + flags |= M_DEMOTEFLAGS; + for (m = all ? m0 : m0->m_next; m != NULL; m = m->m_next) { KASSERT(m->m_nextpkt == NULL, ("%s: m_nextpkt in m %p, m0 %p", __func__, m, m0)); if (m->m_flags & M_PKTHDR) m_demote_pkthdr(m); - m->m_flags = m->m_flags & (M_EXT | M_RDONLY | M_NOFREE | - M_EXTPG | flags); + m->m_flags &= flags; } } diff --git a/sys/kern/uipc_sockbuf.c b/sys/kern/uipc_sockbuf.c --- a/sys/kern/uipc_sockbuf.c +++ b/sys/kern/uipc_sockbuf.c @@ -54,6 +54,8 @@ #include #include +#include + /* * Function pointer set by the AIO routines so that the socket buffer code * can call back into the AIO module if it is loaded. @@ -924,14 +926,33 @@ sbappend_ktls_rx(struct sockbuf *sb, struct mbuf *m) { struct mbuf *n; + int flags = 0; SBLASTMBUFCHK(sb); + MPASS((m->m_flags & M_PKTHDR) != 0); + /* Remove all packet headers and mbuf tags to get a pure data chain. */ - m_demote(m, 1, 0); + for (n = m; n != NULL; n = n->m_next) { + if (n->m_flags & M_PKTHDR) { + switch (n->m_pkthdr.csum_flags & CSUM_TLS_MASK) { + case CSUM_TLS_DECRYPTED: + m_demote_pkthdr(n); + /* mark all subsequent packets decrypted */ + flags = M_NOTREADY | M_DECRYPTED; + break; + default: + m_demote_pkthdr(n); + /* mark all subsequent packets not ready */ + flags = M_NOTREADY; + break; + } + } + + n->m_flags &= M_DEMOTEFLAGS; + n->m_flags |= flags; + } - for (n = m; n != NULL; n = n->m_next) - n->m_flags |= M_NOTREADY; sbcompress_ktls_rx(sb, m, sb->sb_mtlstail); ktls_check_rx(sb); } @@ -1386,7 +1407,8 @@ if (n && M_WRITABLE(n) && ((sb->sb_flags & SB_NOCOALESCE) == 0) && - !(n->m_flags & (M_EXTPG)) && + !((m->m_flags ^ n->m_flags) & M_DECRYPTED) && + !(n->m_flags & M_EXTPG) && m->m_len <= MCLBYTES / 4 && /* XXX: Don't copy too much */ m->m_len <= M_TRAILINGSPACE(n)) { m_copydata(m, 0, m->m_len, mtodo(n, n->m_len)); diff --git a/sys/net/if_lagg.c b/sys/net/if_lagg.c --- a/sys/net/if_lagg.c +++ b/sys/net/if_lagg.c @@ -1764,6 +1764,14 @@ .type = IF_SND_TAG_TYPE_TLS }; +static const struct if_snd_tag_sw lagg_snd_tag_tls_rx_sw = { + .snd_tag_modify = lagg_snd_tag_modify, + .snd_tag_query = lagg_snd_tag_query, + .snd_tag_free = lagg_snd_tag_free, + .next_snd_tag = lagg_next_snd_tag, + .type = IF_SND_TAG_TYPE_TLS_RX +}; + #ifdef RATELIMIT static const struct if_snd_tag_sw lagg_snd_tag_tls_rl_sw = { .snd_tag_modify = lagg_snd_tag_modify, @@ -1852,6 +1860,9 @@ case IF_SND_TAG_TYPE_TLS: sw = &lagg_snd_tag_tls_sw; break; + case IF_SND_TAG_TYPE_TLS_RX: + sw = &lagg_snd_tag_tls_rx_sw; + break; #ifdef RATELIMIT case IF_SND_TAG_TYPE_TLS_RATE_LIMIT: sw = &lagg_snd_tag_tls_rl_sw; diff --git a/sys/net/if_var.h b/sys/net/if_var.h --- a/sys/net/if_var.h +++ b/sys/net/if_var.h @@ -192,7 +192,8 @@ #define IF_SND_TAG_TYPE_UNLIMITED 1 #define IF_SND_TAG_TYPE_TLS 2 #define IF_SND_TAG_TYPE_TLS_RATE_LIMIT 3 -#define IF_SND_TAG_TYPE_MAX 4 +#define IF_SND_TAG_TYPE_TLS_RX 4 +#define IF_SND_TAG_TYPE_MAX 5 struct if_snd_tag_alloc_header { uint32_t type; /* send tag type, see IF_SND_TAG_XXX */ @@ -229,11 +230,26 @@ uint32_t flags; /* M_NOWAIT or M_WAITOK */ }; +struct if_snd_tag_modify_tls_rx { + /* TCP sequence number of TLS header in host endian format */ + uint32_t tls_hdr_tcp_sn; + + /* + * TLS record length, including all headers, data and trailers. + * If the tls_rec_length is zero, it means HW encryption resumed. + */ + uint32_t tls_rec_length; + + /* TLS record number in host endian format */ + uint64_t tls_rec_number; +}; + union if_snd_tag_alloc_params { struct if_snd_tag_alloc_header hdr; struct if_snd_tag_alloc_rate_limit rate_limit; struct if_snd_tag_alloc_rate_limit unlimited; struct if_snd_tag_alloc_tls tls; + struct if_snd_tag_alloc_tls tls_rx; struct if_snd_tag_alloc_tls_rate_limit tls_rate_limit; }; @@ -241,6 +257,7 @@ struct if_snd_tag_rate_limit_params rate_limit; struct if_snd_tag_rate_limit_params unlimited; struct if_snd_tag_rate_limit_params tls_rate_limit; + struct if_snd_tag_modify_tls_rx tls_rx; }; union if_snd_tag_query_params { diff --git a/sys/netinet/ip_output.c b/sys/netinet/ip_output.c --- a/sys/netinet/ip_output.c +++ b/sys/netinet/ip_output.c @@ -231,7 +231,7 @@ */ if (m->m_next != NULL && mbuf_has_tls_session(m->m_next)) { tls = ktls_hold(m->m_next->m_epg_tls); - mst = tls->snd_tag; + mst = tls->snd_rcv_tag; /* * If a TLS session doesn't have a valid tag, it must @@ -279,7 +279,7 @@ #ifdef KERN_TLS if (tls != NULL) { if (error == EAGAIN) - error = ktls_output_eagain(inp, tls); + error = ktls_output_eagain(inp); ktls_free(tls); } #endif diff --git a/sys/netinet/tcp_lro.c b/sys/netinet/tcp_lro.c --- a/sys/netinet/tcp_lro.c +++ b/sys/netinet/tcp_lro.c @@ -395,7 +395,8 @@ htons(m->m_pkthdr.ether_vtag) & htons(EVL_VLID_MASK); } /* Store decrypted flag, if any. */ - if (__predict_false(m->m_flags & M_DECRYPTED)) + if (__predict_false((m->m_pkthdr.csum_flags & + CSUM_TLS_MASK) == CSUM_TLS_DECRYPTED)) po->data.lro_flags |= LRO_FLAG_DECRYPTED; } @@ -833,6 +834,8 @@ le->m_head->m_pkthdr.csum_flags = CSUM_DATA_VALID | CSUM_PSEUDO_HDR | CSUM_IP_CHECKED | CSUM_IP_VALID; le->m_head->m_pkthdr.csum_data = 0xffff; + if (__predict_false(le->outer.data.lro_flags & LRO_FLAG_DECRYPTED)) + le->m_head->m_pkthdr.csum_flags |= CSUM_TLS_DECRYPTED; break; case LRO_TYPE_IPV6_TCP: csum = tcp_lro_update_checksum(&le->inner, le, @@ -844,6 +847,8 @@ le->m_head->m_pkthdr.csum_flags = CSUM_DATA_VALID | CSUM_PSEUDO_HDR; le->m_head->m_pkthdr.csum_data = 0xffff; + if (__predict_false(le->outer.data.lro_flags & LRO_FLAG_DECRYPTED)) + le->m_head->m_pkthdr.csum_flags |= CSUM_TLS_DECRYPTED; break; case LRO_TYPE_NONE: switch (le->outer.data.lro_type) { @@ -854,6 +859,8 @@ le->m_head->m_pkthdr.csum_flags = CSUM_DATA_VALID | CSUM_PSEUDO_HDR | CSUM_IP_CHECKED | CSUM_IP_VALID; le->m_head->m_pkthdr.csum_data = 0xffff; + if (__predict_false(le->outer.data.lro_flags & LRO_FLAG_DECRYPTED)) + le->m_head->m_pkthdr.csum_flags |= CSUM_TLS_DECRYPTED; break; case LRO_TYPE_IPV6_TCP: csum = tcp_lro_update_checksum(&le->outer, le, @@ -862,6 +869,8 @@ le->m_head->m_pkthdr.csum_flags = CSUM_DATA_VALID | CSUM_PSEUDO_HDR; le->m_head->m_pkthdr.csum_data = 0xffff; + if (__predict_false(le->outer.data.lro_flags & LRO_FLAG_DECRYPTED)) + le->m_head->m_pkthdr.csum_flags |= CSUM_TLS_DECRYPTED; break; default: break; diff --git a/sys/netinet/tcp_ratelimit.c b/sys/netinet/tcp_ratelimit.c --- a/sys/netinet/tcp_ratelimit.c +++ b/sys/netinet/tcp_ratelimit.c @@ -1354,14 +1354,14 @@ if (rte) rl_increment_using(rte); #ifdef KERN_TLS - if (rte != NULL && tls != NULL && tls->snd_tag != NULL) { + if (rte != NULL && tls != NULL && tls->snd_rcv_tag != NULL) { /* * Fake a route change error to reset the TLS * send tag. This will convert the existing * tag to a TLS ratelimit tag. */ - MPASS(tls->snd_tag->sw->type == IF_SND_TAG_TYPE_TLS); - ktls_output_eagain(tp->t_inpcb, tls); + MPASS(tls->snd_rcv_tag->sw->type == IF_SND_TAG_TYPE_TLS); + ktls_output_eagain(tp->t_inpcb); } #endif } else { @@ -1404,8 +1404,8 @@ if (tp->t_inpcb->inp_socket->so_snd.sb_flags & SB_TLS_IFNET) { tls = tp->t_inpcb->inp_socket->so_snd.sb_tls_info; MPASS(tls->mode == TCP_TLS_MODE_IFNET); - if (tls->snd_tag != NULL && - tls->snd_tag->sw->type != IF_SND_TAG_TYPE_TLS_RATE_LIMIT) { + if (tls->snd_rcv_tag != NULL && + tls->snd_rcv_tag->sw->type != IF_SND_TAG_TYPE_TLS_RATE_LIMIT) { /* * NIC probably doesn't support ratelimit TLS * tags if it didn't allocate one when an diff --git a/sys/netinet6/ip6_output.c b/sys/netinet6/ip6_output.c --- a/sys/netinet6/ip6_output.c +++ b/sys/netinet6/ip6_output.c @@ -344,7 +344,7 @@ */ if (m->m_next != NULL && mbuf_has_tls_session(m->m_next)) { tls = ktls_hold(m->m_next->m_epg_tls); - mst = tls->snd_tag; + mst = tls->snd_rcv_tag; /* * If a TLS session doesn't have a valid tag, it must @@ -392,7 +392,7 @@ #ifdef KERN_TLS if (tls != NULL) { if (error == EAGAIN) - error = ktls_output_eagain(inp, tls); + error = ktls_output_eagain(inp); ktls_free(tls); } #endif diff --git a/sys/opencrypto/ktls_ocf.c b/sys/opencrypto/ktls_ocf.c --- a/sys/opencrypto/ktls_ocf.c +++ b/sys/opencrypto/ktls_ocf.c @@ -458,15 +458,11 @@ struct tls_aead_data ad; struct cryptop crp; struct ktls_ocf_session *os; - struct ocf_operation oo; int error; uint16_t tls_comp_len; os = tls->ocf_session; - oo.os = os; - oo.done = false; - crypto_initreq(&crp, os->sid); /* Setup the IV. */ @@ -517,6 +513,79 @@ return (error); } +static int +ktls_ocf_tls12_aead_recrypt(struct ktls_session *tls, + const struct tls_record_layer *hdr, struct mbuf *m, uint64_t seqno) +{ + char tag[MAX(AES_GMAC_HASH_LEN, POLY1305_HASH_LEN)] __aligned(8); + struct tls_aead_data ad; + struct cryptop crp; + struct ktls_ocf_session *os; + int error; + uint16_t tls_comp_len; + + if (tls->params.tls_tlen > sizeof(tag)) + return (ENOMEM); + + os = tls->ocf_session; + + crypto_initreq(&crp, os->sid); + + /* Setup the IV. */ + if (tls->params.cipher_algorithm == CRYPTO_AES_NIST_GCM_16) { + memcpy(crp.crp_iv, tls->params.iv, TLS_AEAD_GCM_LEN); + memcpy(crp.crp_iv + TLS_AEAD_GCM_LEN, hdr + 1, + sizeof(uint64_t)); + } else { + /* + * Chacha20-Poly1305 constructs the IV for TLS 1.2 + * identically to constructing the IV for AEAD in TLS + * 1.3. + */ + memcpy(crp.crp_iv, tls->params.iv, tls->params.iv_len); + *(uint64_t *)(crp.crp_iv + 4) ^= htobe64(seqno); + } + + /* Setup the AAD. */ + if (tls->params.cipher_algorithm == CRYPTO_AES_NIST_GCM_16) + tls_comp_len = ntohs(hdr->tls_length) - + (AES_GMAC_HASH_LEN + sizeof(uint64_t)); + else + tls_comp_len = ntohs(hdr->tls_length) - POLY1305_HASH_LEN; + ad.seq = htobe64(seqno); + ad.type = hdr->tls_type; + ad.tls_vmajor = hdr->tls_vmajor; + ad.tls_vminor = hdr->tls_vminor; + ad.tls_length = htons(tls_comp_len); + crp.crp_aad = &ad; + crp.crp_aad_length = sizeof(ad); + + crp.crp_payload_start = tls->params.tls_hlen; + crp.crp_payload_length = tls_comp_len; + crp.crp_digest_start = crp.crp_payload_start + crp.crp_payload_length; + + crp.crp_op = CRYPTO_OP_ENCRYPT | CRYPTO_OP_COMPUTE_DIGEST; + crp.crp_flags = CRYPTO_F_CBIMM | CRYPTO_F_IV_SEPARATE; + crypto_use_mbuf(&crp, m); + + if (tls->params.cipher_algorithm == CRYPTO_AES_NIST_GCM_16) + counter_u64_add(ocf_tls12_gcm_crypts, 1); + else + counter_u64_add(ocf_tls12_chacha20_crypts, 1); + + counter_u64_add(ocf_inplace, 1); + + m_copydata(m, tls->params.tls_hlen + tls_comp_len, tls->params.tls_tlen, tag); + + error = ktls_ocf_dispatch(os, &crp); + + m_copyback(m, tls->params.tls_hlen + tls_comp_len, tls->params.tls_tlen, tag); + + crypto_destroyreq(&crp); + + return (error); +} + static int ktls_ocf_tls13_aead_encrypt(struct ktls_ocf_encrypt_state *state, struct ktls_session *tls, struct mbuf *m, struct iovec *outiov, @@ -755,6 +824,7 @@ tls->sw_encrypt = ktls_ocf_tls12_aead_encrypt; } else { tls->sw_decrypt = ktls_ocf_tls12_aead_decrypt; + tls->sw_recrypt = ktls_ocf_tls12_aead_recrypt; } } else { tls->sw_encrypt = ktls_ocf_tls_cbc_encrypt; diff --git a/sys/sys/ktls.h b/sys/sys/ktls.h --- a/sys/sys/ktls.h +++ b/sys/sys/ktls.h @@ -180,16 +180,22 @@ int (*sw_encrypt)(struct ktls_ocf_encrypt_state *state, struct ktls_session *tls, struct mbuf *m, struct iovec *outiov, int outiovcnt); - int (*sw_decrypt)(struct ktls_session *tls, - const struct tls_record_layer *hdr, struct mbuf *m, - uint64_t seqno, int *trailer_len); + struct { + int (*sw_decrypt)(struct ktls_session *tls, + const struct tls_record_layer *hdr, struct mbuf *m, + uint64_t seqno, int *trailer_len); + int (*sw_recrypt)(struct ktls_session *tls, + const struct tls_record_layer *hdr, struct mbuf *m, + uint64_t seqno); + }; }; struct ktls_ocf_session *ocf_session; - struct m_snd_tag *snd_tag; + struct m_snd_tag *snd_rcv_tag; struct tls_session_params params; u_int wq_index; volatile u_int refcount; int mode; + int direction; struct task reset_tag_task; struct task disable_ifnet_task; @@ -207,6 +213,7 @@ extern unsigned int ktls_ifnet_max_rexmit_pct; void ktls_check_rx(struct sockbuf *sb); +int ktls_mbuf_crypto_state(struct mbuf *mb, int offset, int len); void ktls_disable_ifnet(void *arg); int ktls_enable_rx(struct socket *so, struct tls_enable *en); int ktls_enable_tx(struct socket *so, struct tls_enable *en); @@ -219,7 +226,8 @@ int ktls_get_rx_mode(struct socket *so, int *modep); int ktls_set_tx_mode(struct socket *so, int mode); int ktls_get_tx_mode(struct socket *so, int *modep); -int ktls_output_eagain(struct inpcb *inp, struct ktls_session *tls); +int ktls_get_rx_sequence(struct inpcb *inp, uint32_t *tcpseq, uint64_t *tlsseq); +int ktls_output_eagain(struct inpcb *inp); #ifdef RATELIMIT int ktls_modify_txrtlmt(struct ktls_session *tls, uint64_t max_pacing_rate); #endif diff --git a/sys/sys/mbuf.h b/sys/sys/mbuf.h --- a/sys/sys/mbuf.h +++ b/sys/sys/mbuf.h @@ -496,6 +496,12 @@ (M_PKTHDR|M_EOR|M_RDONLY|M_BCAST|M_MCAST|M_PROMISC|M_VLANTAG|M_TSTMP| \ M_TSTMP_HPREC|M_TSTMP_LRO|M_PROTOFLAGS) +/* + * Flags preserved during demote. + */ +#define M_DEMOTEFLAGS \ + (M_EXT | M_RDONLY | M_NOFREE | M_EXTPG) + /* * Mbuf flag description for use with printf(9) %b identifier. */ @@ -721,6 +727,8 @@ #define CSUM_UDP_IPV6 CSUM_IP6_UDP #define CSUM_TCP_IPV6 CSUM_IP6_TCP #define CSUM_SCTP_IPV6 CSUM_IP6_SCTP +#define CSUM_TLS_MASK (CSUM_L5_CALC|CSUM_L5_VALID) +#define CSUM_TLS_DECRYPTED CSUM_L5_CALC /* * mbuf types describing the content of the mbuf (including external storage). diff --git a/sys/sys/sockbuf.h b/sys/sys/sockbuf.h --- a/sys/sys/sockbuf.h +++ b/sys/sys/sockbuf.h @@ -53,6 +53,7 @@ #define SB_STOP 0x1000 /* backpressure indicator */ #define SB_AIO_RUNNING 0x2000 /* AIO operation running */ #define SB_TLS_IFNET 0x4000 /* has used / is using ifnet KTLS */ +#define SB_TLS_RX_RESYNC 0x8000 /* KTLS RX lost HW sync */ #define SBS_CANTSENDMORE 0x0010 /* can't send more data to peer */ #define SBS_CANTRCVMORE 0x0020 /* can't receive more data from peer */