diff --git a/sys/kern/uipc_ktls.c b/sys/kern/uipc_ktls.c --- a/sys/kern/uipc_ktls.c +++ b/sys/kern/uipc_ktls.c @@ -503,7 +503,7 @@ #if defined(INET) || defined(INET6) static int ktls_create_session(struct socket *so, struct tls_enable *en, - struct ktls_session **tlsp) + struct ktls_session **tlsp, int direction) { struct ktls_session *tls; int error; @@ -622,6 +622,7 @@ TASK_INIT(&tls->reset_tag_task, 0, ktls_reset_send_tag, tls); tls->wq_index = ktls_get_cpu(so); + tls->direction = direction; tls->params.cipher_algorithm = en->cipher_algorithm; tls->params.auth_algorithm = en->auth_algorithm; @@ -759,6 +760,7 @@ /* Copy fields from existing session. */ tls_new->params = tls->params; tls_new->wq_index = tls->wq_index; + tls_new->direction = tls->direction; /* Deep copy keys. */ if (tls_new->params.auth_key != NULL) { @@ -983,28 +985,132 @@ return (error); } +/* + * Common code for allocating a TLS receive tag for doing HW + * decryption of TLS data. + * + * This function allocates a new TLS receive tag on whatever interface + * the connection is currently routed over. + */ static int -ktls_try_ifnet(struct socket *so, struct ktls_session *tls, bool force) +ktls_alloc_rcv_tag(struct inpcb *inp, struct ktls_session *tls, bool force, + struct m_snd_tag **mstp) { - struct m_snd_tag *mst; + union if_snd_tag_alloc_params params; + struct ifnet *ifp; + struct nhop_object *nh; + struct tcpcb *tp; int error; - error = ktls_alloc_snd_tag(so->so_pcb, tls, force, &mst); - if (error == 0) { - tls->mode = TCP_TLS_MODE_IFNET; - tls->snd_tag = mst; - switch (tls->params.cipher_algorithm) { - case CRYPTO_AES_CBC: - counter_u64_add(ktls_ifnet_cbc, 1); - break; - case CRYPTO_AES_NIST_GCM_16: - counter_u64_add(ktls_ifnet_gcm, 1); - break; - case CRYPTO_CHACHA20_POLY1305: - counter_u64_add(ktls_ifnet_chacha20, 1); - break; + INP_RLOCK(inp); + if (inp->inp_flags & (INP_TIMEWAIT | INP_DROPPED)) { + INP_RUNLOCK(inp); + return (ECONNRESET); + } + if (inp->inp_socket == NULL) { + INP_RUNLOCK(inp); + return (ECONNRESET); + } + tp = intotcpcb(inp); + + /* + * Check administrative controls on ifnet TLS to determine if + * ifnet TLS should be denied. + * + * - Always permit 'force' requests. + * - ktls_ifnet_permitted == 0: always deny. + */ + if (!force && ktls_ifnet_permitted == 0) { + INP_RUNLOCK(inp); + return (ENXIO); + } + + /* + * XXX: Use the cached route in the inpcb to find the + * interface. This should perhaps instead use + * rtalloc1_fib(dst, 0, 0, fibnum). Since KTLS is only + * enabled after a connection has completed key negotiation in + * userland, the cached route will be present in practice. + */ + nh = inp->inp_route.ro_nh; + if (nh == NULL) { + INP_RUNLOCK(inp); + return (ENXIO); + } + ifp = nh->nh_ifp; + if_ref(ifp); + + params.hdr.type = IF_SND_TAG_TYPE_TLS_RX; + params.hdr.flowid = inp->inp_flowid; + params.hdr.flowtype = inp->inp_flowtype; + params.hdr.numa_domain = inp->inp_numa_domain; + params.tls_rx.inp = inp; + params.tls_rx.tls = tls; + params.tls_rx.vlan_id = 0; + + INP_RUNLOCK(inp); + + if ((ifp->if_capenable & IFCAP_MEXTPG) == 0) { + error = EOPNOTSUPP; + goto out; + } + + /* XXX reusing TXTLS flags */ + if (inp->inp_vflag & INP_IPV6) { + if ((ifp->if_capenable & IFCAP_TXTLS6) == 0) { + error = EOPNOTSUPP; + goto out; + } + } else { + if ((ifp->if_capenable & IFCAP_TXTLS4) == 0) { + error = EOPNOTSUPP; + goto out; } } + error = m_snd_tag_alloc(ifp, ¶ms, mstp); +out: + if_rele(ifp); + return (error); +} + +static int +ktls_try_ifnet(struct socket *so, struct ktls_session *tls, int direction, bool force) +{ + struct m_snd_tag *mst; + int error; + + switch (direction) { + case KTLS_TX: + error = ktls_alloc_snd_tag(so->so_pcb, tls, force, &mst); + if (__predict_false(error != 0)) + goto done; + break; + case KTLS_RX: + error = ktls_alloc_rcv_tag(so->so_pcb, tls, force, &mst); + if (__predict_false(error != 0)) + goto done; + break; + default: + return (EINVAL); + } + + tls->mode = TCP_TLS_MODE_IFNET; + tls->snd_tag = mst; + + switch (tls->params.cipher_algorithm) { + case CRYPTO_AES_CBC: + counter_u64_add(ktls_ifnet_cbc, 1); + break; + case CRYPTO_AES_NIST_GCM_16: + counter_u64_add(ktls_ifnet_gcm, 1); + break; + case CRYPTO_CHACHA20_POLY1305: + counter_u64_add(ktls_ifnet_chacha20, 1); + break; + default: + break; + } +done: return (error); } @@ -1185,7 +1291,7 @@ if (en->cipher_algorithm == CRYPTO_AES_CBC && !ktls_cbc_enable) return (ENOTSUP); - error = ktls_create_session(so, en, &tls); + error = ktls_create_session(so, en, &tls, KTLS_RX); if (error) return (error); @@ -1206,10 +1312,13 @@ ktls_check_rx(&so->so_rcv); SOCKBUF_UNLOCK(&so->so_rcv); + /* Prefer TOE -> ifnet TLS -> software TLS. */ #ifdef TCP_OFFLOAD error = ktls_try_toe(so, tls, KTLS_RX); if (error) #endif + error = ktls_try_ifnet(so, tls, KTLS_RX, false); + if (error) ktls_use_sw(tls); counter_u64_add(ktls_offload_total, 1); @@ -1252,7 +1361,7 @@ if (mb_use_ext_pgs == 0) return (ENXIO); - error = ktls_create_session(so, en, &tls); + error = ktls_create_session(so, en, &tls, KTLS_TX); if (error) return (error); @@ -1261,7 +1370,7 @@ error = ktls_try_toe(so, tls, KTLS_TX); if (error) #endif - error = ktls_try_ifnet(so, tls, false); + error = ktls_try_ifnet(so, tls, KTLS_TX, false); if (error) error = ktls_try_sw(so, tls, KTLS_TX); @@ -1360,6 +1469,36 @@ return (0); } +int +ktls_get_rx_sequence(struct inpcb *inp, uint32_t *tcpseq, uint64_t *tlsseq) +{ + struct socket *so; + struct tcpcb *tp; + + INP_RLOCK(inp); + so = inp->inp_socket; + if (__predict_false(so == NULL)) { + INP_RUNLOCK(inp); + return (EINVAL); + } + if (inp->inp_flags & (INP_TIMEWAIT | INP_DROPPED)) { + INP_RUNLOCK(inp); + return (ECONNRESET); + } + + tp = intotcpcb(inp); + MPASS(tp != NULL); + + SOCKBUF_LOCK(&so->so_rcv); + *tcpseq = tp->rcv_nxt - so->so_rcv.sb_tlscc; + *tlsseq = so->so_rcv.sb_tls_seqno; + SOCKBUF_UNLOCK(&so->so_rcv); + + INP_RUNLOCK(inp); + + return (0); +} + int ktls_get_tx_mode(struct socket *so, int *modep) { @@ -1421,7 +1560,7 @@ tls_new = ktls_clone_session(tls); if (mode == TCP_TLS_MODE_IFNET) - error = ktls_try_ifnet(so, tls_new, true); + error = ktls_try_ifnet(so, tls_new, KTLS_TX, true); else error = ktls_try_sw(so, tls_new, KTLS_TX); if (error) { @@ -1480,19 +1619,21 @@ } /* - * Try to allocate a new TLS send tag. This task is scheduled when - * ip_output detects a route change while trying to transmit a packet - * holding a TLS record. If a new tag is allocated, replace the tag - * in the TLS session. Subsequent packets on the connection will use - * the new tag. If a new tag cannot be allocated, drop the - * connection. + * ktls_reset_send_tag - try to allocate a new TLS send or receive tag. + * + * This task is scheduled when ip_output detects a route change while + * trying to transmit a packet holding a TLS record. If a new tag is + * allocated, replace the tag in the TLS session. Subsequent packets + * on the connection will use the new tag. If a new tag cannot be + * allocated, drop the connection. */ static void ktls_reset_send_tag(void *context, int pending) { struct epoch_tracker et; struct ktls_session *tls; - struct m_snd_tag *old, *new; + struct m_snd_tag *snd_old; + struct m_snd_tag *snd_new; struct inpcb *inp; struct tcpcb *tp; int error; @@ -1513,67 +1654,76 @@ * pointer. */ INP_WLOCK(inp); - old = tls->snd_tag; + snd_old = tls->snd_tag; tls->snd_tag = NULL; INP_WUNLOCK(inp); - if (old != NULL) - m_snd_tag_rele(old); - error = ktls_alloc_snd_tag(inp, tls, true, &new); + if (snd_old != NULL) + m_snd_tag_rele(snd_old); - if (error == 0) { - INP_WLOCK(inp); - tls->snd_tag = new; - mtx_pool_lock(mtxpool_sleep, tls); - tls->reset_pending = false; - mtx_pool_unlock(mtxpool_sleep, tls); - if (!in_pcbrele_wlocked(inp)) - INP_WUNLOCK(inp); + switch (tls->direction) { + case KTLS_TX: + error = ktls_alloc_snd_tag(inp, tls, true, &snd_new); + break; + case KTLS_RX: + error = ktls_alloc_rcv_tag(inp, tls, true, &snd_new); + break; + default: + goto drop_connection; + } + if (error != 0) + goto drop_connection; - counter_u64_add(ktls_ifnet_reset, 1); + INP_WLOCK(inp); + tls->snd_tag = snd_new; - /* - * XXX: Should we kick tcp_output explicitly now that - * the send tag is fixed or just rely on timers? - */ - } else { - NET_EPOCH_ENTER(et); - INP_WLOCK(inp); - if (!in_pcbrele_wlocked(inp)) { - if (!(inp->inp_flags & INP_TIMEWAIT) && - !(inp->inp_flags & INP_DROPPED)) { - tp = intotcpcb(inp); - CURVNET_SET(tp->t_vnet); - tp = tcp_drop(tp, ECONNABORTED); - CURVNET_RESTORE(); - if (tp != NULL) - INP_WUNLOCK(inp); - counter_u64_add(ktls_ifnet_reset_dropped, 1); - } else - INP_WUNLOCK(inp); - } - NET_EPOCH_EXIT(et); + mtx_pool_lock(mtxpool_sleep, tls); + tls->reset_pending = false; + mtx_pool_unlock(mtxpool_sleep, tls); - counter_u64_add(ktls_ifnet_reset_failed, 1); + if (!in_pcbrele_wlocked(inp)) + INP_WUNLOCK(inp); - /* - * Leave reset_pending true to avoid future tasks while - * the socket goes away. - */ - } + counter_u64_add(ktls_ifnet_reset, 1); ktls_free(tls); -} -int -ktls_output_eagain(struct inpcb *inp, struct ktls_session *tls) -{ + /* + * XXX: Should we kick tcp_output explicitly now that + * the send tag is fixed or just rely on timers? + */ + return; - if (inp == NULL) - return (ENOBUFS); +drop_connection: + NET_EPOCH_ENTER(et); + INP_WLOCK(inp); + if (!in_pcbrele_wlocked(inp)) { + if (!(inp->inp_flags & INP_TIMEWAIT) && + !(inp->inp_flags & INP_DROPPED)) { + tp = intotcpcb(inp); + CURVNET_SET(tp->t_vnet); + tp = tcp_drop(tp, ECONNABORTED); + CURVNET_RESTORE(); + if (tp != NULL) + INP_WUNLOCK(inp); + counter_u64_add(ktls_ifnet_reset_dropped, 1); + } else + INP_WUNLOCK(inp); + } + NET_EPOCH_EXIT(et); - INP_LOCK_ASSERT(inp); + counter_u64_add(ktls_ifnet_reset_failed, 1); + /* + * Leave reset_pending true to avoid future tasks while + * the socket goes away. + */ + ktls_free(tls); +} + +static void +ktls_output_eagain_tls(struct inpcb *inp, struct ktls_session *tls) +{ /* * See if we should schedule a task to update the send tag for * this session. @@ -1587,6 +1737,30 @@ taskqueue_enqueue(taskqueue_thread, &tls->reset_tag_task); } mtx_pool_unlock(mtxpool_sleep, tls); +} + +int +ktls_output_eagain(struct inpcb *inp) +{ + struct socket *so; + struct ktls_session *tls; + + if (__predict_false(inp == NULL)) + goto done; + INP_LOCK_ASSERT(inp); + + so = inp->inp_socket; + if (__predict_false(so == NULL)) + goto done; + + tls = so->so_rcv.sb_tls_info; + if (__predict_true(tls != NULL)) + ktls_output_eagain_tls(inp, tls); + + tls = so->so_snd.sb_tls_info; + if (__predict_true(tls != NULL)) + ktls_output_eagain_tls(inp, tls); +done: return (ENOBUFS); } @@ -1615,10 +1789,11 @@ return (0); } - MPASS(tls->snd_tag != NULL); - MPASS(tls->snd_tag->sw->type == IF_SND_TAG_TYPE_TLS_RATE_LIMIT); - mst = tls->snd_tag; + + MPASS(mst != NULL); + MPASS(mst->sw->type == IF_SND_TAG_TYPE_TLS_RATE_LIMIT); + return (mst->sw->snd_tag_modify(mst, ¶ms)); } #endif @@ -1901,7 +2076,7 @@ return (NULL); } } - n->m_flags |= M_NOTREADY; + n->m_flags |= (m->m_flags & (M_NOTREADY | M_DECRYPTED)); /* Store remainder in 'n'. */ n->m_len = m->m_len - remain; @@ -1986,6 +2161,86 @@ return (0); } +/* + * Check if a mbuf chain is fully decrypted at the given offset and + * length. Returns KTLS_MBUF_CRYPTO_ST_DECRYPTED if all data is + * decrypted. KTLS_MBUF_CRYPTO_ST_MIXED if there is a mix of encrypted + * and decrypted data. Else KTLS_MBUF_CRYPTO_ST_ENCRYPTED if all data + * is encrypted. + */ +int +ktls_mbuf_crypto_state(struct mbuf *mb, int offset, int len) +{ + int m_flags_ored = 0; + int m_flags_anded = -1; + + for (; mb != NULL; mb = mb->m_next) { + if (offset < mb->m_len) + break; + offset -= mb->m_len; + } + offset += len; + + for (; mb != NULL; mb = mb->m_next) { + m_flags_ored |= mb->m_flags; + m_flags_anded &= mb->m_flags; + + if (offset <= mb->m_len) + break; + offset -= mb->m_len; + } + MPASS(mb != NULL || offset == 0); + + if ((m_flags_ored ^ m_flags_anded) & M_DECRYPTED) + return (KTLS_MBUF_CRYPTO_ST_MIXED); + else + return ((m_flags_ored & M_DECRYPTED) ? + KTLS_MBUF_CRYPTO_ST_DECRYPTED : + KTLS_MBUF_CRYPTO_ST_ENCRYPTED); +} + +/* + * ktls_resync_ifnet - get HW TLS RX back on track after packet loss + */ +static int +ktls_resync_ifnet(struct socket *so, uint32_t tls_len, uint64_t tls_rcd_num) +{ + union if_snd_tag_modify_params params; + struct m_snd_tag *mst; + struct inpcb *inp; + struct tcpcb *tp; + + mst = so->so_rcv.sb_tls_info->snd_tag; + if (__predict_false(mst == NULL)) + return (EINVAL); + + inp = sotoinpcb(so); + if (__predict_false(inp == NULL)) + return (EINVAL); + + INP_RLOCK(inp); + if (inp->inp_flags & (INP_TIMEWAIT | INP_DROPPED)) { + INP_RUNLOCK(inp); + return (ECONNRESET); + } + + tp = intotcpcb(inp); + MPASS(tp != NULL); + + /* Get the TCP sequence number of the next valid TLS header. */ + SOCKBUF_LOCK(&so->so_rcv); + params.tls_rx.tls_hdr_tcp_sn = + tp->rcv_nxt - so->so_rcv.sb_tlscc - tls_len; + params.tls_rx.tls_rec_length = tls_len; + params.tls_rx.tls_seq_number = tls_rcd_num; + SOCKBUF_UNLOCK(&so->so_rcv); + + INP_RUNLOCK(inp); + + MPASS(mst->sw->type == IF_SND_TAG_TYPE_TLS_RX); + return (mst->sw->snd_tag_modify(mst, ¶ms)); +} + static void ktls_decrypt(struct socket *so) { @@ -1996,7 +2251,7 @@ struct tls_get_record tgr; struct mbuf *control, *data, *m; uint64_t seqno; - int error, remain, tls_len, trail_len; + int error, remain, tls_len, trail_len, state; bool tls13; uint8_t vminor, record_type; @@ -2067,13 +2322,51 @@ SBCHECK(sb); SOCKBUF_UNLOCK(sb); - error = tls->sw_decrypt(tls, hdr, data, seqno, &trail_len); - if (error == 0) { - if (tls13) + /* get crypto state for this TLS record */ + state = ktls_mbuf_crypto_state(data, 0, tls_len); + + switch (state) { + case KTLS_MBUF_CRYPTO_ST_MIXED: + error = tls->sw_recrypt(tls, hdr, data, seqno, &trail_len); + if (__predict_true(error == 0)) { + if (tls13) { + error = tls13_find_record_type(tls, data, + tls_len, &trail_len, &record_type); + } else { + record_type = hdr->tls_type; + } + } + break; + case KTLS_MBUF_CRYPTO_ST_ENCRYPTED: + error = tls->sw_decrypt(tls, hdr, data, seqno, &trail_len); + if (__predict_true(error == 0)) { + if (tls13) { + error = tls13_find_record_type(tls, data, + tls_len, &trail_len, &record_type); + } else { + record_type = hdr->tls_type; + } + } + break; + case KTLS_MBUF_CRYPTO_ST_DECRYPTED: + /* + * NIC TLS is only supported for AEAD + * ciphersuites which used a fixed sized + * trailer. + */ + if (tls13) { + trail_len = tls->params.tls_tlen - 1; error = tls13_find_record_type(tls, data, tls_len, &trail_len, &record_type); - else + } else { + trail_len = tls->params.tls_tlen; + error = 0; record_type = hdr->tls_type; + } + break; + default: + error = EINVAL; + break; } if (error) { counter_u64_add(ktls_offload_failed_crypto, 1); @@ -2154,19 +2447,31 @@ remain = be16toh(tgr.tls_length); m = data; for (m = data; remain > m->m_len; m = m->m_next) { - m->m_flags &= ~M_NOTREADY; + m->m_flags &= ~(M_NOTREADY | M_DECRYPTED); remain -= m->m_len; } m->m_len = remain; m_freem(m->m_next); m->m_next = NULL; - m->m_flags &= ~M_NOTREADY; + m->m_flags &= ~(M_NOTREADY | M_DECRYPTED); /* Set EOR on the final mbuf. */ m->m_flags |= M_EOR; } sbappendcontrol_locked(sb, data, control, 0); + + if (__predict_false(state != KTLS_MBUF_CRYPTO_ST_DECRYPTED)) { + sb->sb_flags |= SB_TLS_RX_RESYNC; + SOCKBUF_UNLOCK(sb); + ktls_resync_ifnet(so, tls_len, seqno); + SOCKBUF_LOCK(sb); + } else if (__predict_false(sb->sb_flags & SB_TLS_RX_RESYNC)) { + sb->sb_flags &= ~SB_TLS_RX_RESYNC; + SOCKBUF_UNLOCK(sb); + ktls_resync_ifnet(so, 0, seqno); + SOCKBUF_LOCK(sb); + } } sb->sb_flags &= ~SB_TLS_RX_RUNNING; diff --git a/sys/kern/uipc_sockbuf.c b/sys/kern/uipc_sockbuf.c --- a/sys/kern/uipc_sockbuf.c +++ b/sys/kern/uipc_sockbuf.c @@ -54,6 +54,8 @@ #include #include +#include + /* * Function pointer set by the AIO routines so that the socket buffer code * can call back into the AIO module if it is loaded. @@ -924,14 +926,30 @@ sbappend_ktls_rx(struct sockbuf *sb, struct mbuf *m) { struct mbuf *n; + int flags = 0; SBLASTMBUFCHK(sb); + /* Mbuf chain must start with a packet header. */ + MPASS((m->m_flags & M_PKTHDR) != 0); + /* Remove all packet headers and mbuf tags to get a pure data chain. */ - m_demote(m, 1, 0); + for (n = m; n != NULL; n = n->m_next) { + if (n->m_flags & M_PKTHDR) { + if ((n->m_pkthdr.csum_flags & CSUM_TLS_MASK) == CSUM_TLS_DECRYPTED) { + /* mark all subsequent packets decrypted */ + flags = M_NOTREADY | M_DECRYPTED; + } else { + /* mark all subsequent packets not ready */ + flags = M_NOTREADY; + } + m_demote_pkthdr(n); + } + + n->m_flags &= M_DEMOTEFLAGS; + n->m_flags |= flags; + } - for (n = m; n != NULL; n = n->m_next) - n->m_flags |= M_NOTREADY; sbcompress_ktls_rx(sb, m, sb->sb_mtlstail); ktls_check_rx(sb); } @@ -1387,7 +1405,8 @@ if (n && M_WRITABLE(n) && ((sb->sb_flags & SB_NOCOALESCE) == 0) && - !(n->m_flags & (M_EXTPG)) && + !((m->m_flags ^ n->m_flags) & M_DECRYPTED) && + !(n->m_flags & M_EXTPG) && m->m_len <= MCLBYTES / 4 && /* XXX: Don't copy too much */ m->m_len <= M_TRAILINGSPACE(n)) { m_copydata(m, 0, m->m_len, mtodo(n, n->m_len)); diff --git a/sys/net/if_lagg.c b/sys/net/if_lagg.c --- a/sys/net/if_lagg.c +++ b/sys/net/if_lagg.c @@ -1766,6 +1766,14 @@ .type = IF_SND_TAG_TYPE_TLS }; +static const struct if_snd_tag_sw lagg_snd_tag_tls_rx_sw = { + .snd_tag_modify = lagg_snd_tag_modify, + .snd_tag_query = lagg_snd_tag_query, + .snd_tag_free = lagg_snd_tag_free, + .next_snd_tag = lagg_next_snd_tag, + .type = IF_SND_TAG_TYPE_TLS_RX +}; + #ifdef RATELIMIT static const struct if_snd_tag_sw lagg_snd_tag_tls_rl_sw = { .snd_tag_modify = lagg_snd_tag_modify, @@ -1851,6 +1859,9 @@ case IF_SND_TAG_TYPE_TLS: sw = &lagg_snd_tag_tls_sw; break; + case IF_SND_TAG_TYPE_TLS_RX: + sw = &lagg_snd_tag_tls_rx_sw; + break; #ifdef RATELIMIT case IF_SND_TAG_TYPE_TLS_RATE_LIMIT: sw = &lagg_snd_tag_tls_rl_sw; diff --git a/sys/net/if_vlan.c b/sys/net/if_vlan.c --- a/sys/net/if_vlan.c +++ b/sys/net/if_vlan.c @@ -363,6 +363,14 @@ .type = IF_SND_TAG_TYPE_TLS }; +static const struct if_snd_tag_sw vlan_snd_tag_tls_rx_sw = { + .snd_tag_modify = vlan_snd_tag_modify, + .snd_tag_query = vlan_snd_tag_query, + .snd_tag_free = vlan_snd_tag_free, + .next_snd_tag = vlan_next_snd_tag, + .type = IF_SND_TAG_TYPE_TLS_RX +}; + #ifdef RATELIMIT static const struct if_snd_tag_sw vlan_snd_tag_tls_rl_sw = { .snd_tag_modify = vlan_snd_tag_modify, @@ -2166,6 +2174,9 @@ struct ifnet *parent; int error; + NET_EPOCH_ENTER(et); + ifv = ifp->if_softc; + switch (params->hdr.type) { #ifdef RATELIMIT case IF_SND_TAG_TYPE_UNLIMITED: @@ -2179,6 +2190,12 @@ case IF_SND_TAG_TYPE_TLS: sw = &vlan_snd_tag_tls_sw; break; + case IF_SND_TAG_TYPE_TLS_RX: + sw = &vlan_snd_tag_tls_rx_sw; + if (params->tls_rx.vlan_id != 0) + goto failure; + params->tls_rx.vlan_id = ifv->ifv_vid; + break; #ifdef RATELIMIT case IF_SND_TAG_TYPE_TLS_RATE_LIMIT: sw = &vlan_snd_tag_tls_rl_sw; @@ -2186,19 +2203,15 @@ #endif #endif default: - return (EOPNOTSUPP); + goto failure; } - NET_EPOCH_ENTER(et); - ifv = ifp->if_softc; if (ifv->ifv_trunk != NULL) parent = PARENT(ifv); else parent = NULL; - if (parent == NULL) { - NET_EPOCH_EXIT(et); - return (EOPNOTSUPP); - } + if (parent == NULL) + goto failure; if_ref(parent); NET_EPOCH_EXIT(et); @@ -2219,6 +2232,9 @@ *ppmt = &vst->com; return (0); +failure: + NET_EPOCH_EXIT(et); + return (EOPNOTSUPP); } static struct m_snd_tag * diff --git a/sys/netinet/ip_output.c b/sys/netinet/ip_output.c --- a/sys/netinet/ip_output.c +++ b/sys/netinet/ip_output.c @@ -280,7 +280,7 @@ #ifdef KERN_TLS if (tls != NULL) { if (error == EAGAIN) - error = ktls_output_eagain(inp, tls); + error = ktls_output_eagain(inp); ktls_free(tls); } #endif diff --git a/sys/netinet/tcp_ratelimit.c b/sys/netinet/tcp_ratelimit.c --- a/sys/netinet/tcp_ratelimit.c +++ b/sys/netinet/tcp_ratelimit.c @@ -1361,7 +1361,7 @@ * tag to a TLS ratelimit tag. */ MPASS(tls->snd_tag->sw->type == IF_SND_TAG_TYPE_TLS); - ktls_output_eagain(tp->t_inpcb, tls); + ktls_output_eagain(tp->t_inpcb); } #endif } else { diff --git a/sys/netinet6/ip6_output.c b/sys/netinet6/ip6_output.c --- a/sys/netinet6/ip6_output.c +++ b/sys/netinet6/ip6_output.c @@ -378,7 +378,7 @@ #ifdef KERN_TLS if (tls != NULL) { if (error == EAGAIN) - error = ktls_output_eagain(inp, tls); + error = ktls_output_eagain(inp); ktls_free(tls); } #endif diff --git a/sys/opencrypto/ktls_ocf.c b/sys/opencrypto/ktls_ocf.c --- a/sys/opencrypto/ktls_ocf.c +++ b/sys/opencrypto/ktls_ocf.c @@ -39,11 +39,13 @@ #include #include #include +#include #include #include #include #include #include +#include #include #include @@ -470,9 +472,9 @@ } static int -ktls_ocf_tls12_aead_decrypt(struct ktls_session *tls, +ktls_ocf_tls12_aead_xxcrypt(struct ktls_session *tls, const struct tls_record_layer *hdr, struct mbuf *m, uint64_t seqno, - int *trailer_len) + int crp_op) { struct tls_aead_data ad; struct cryptop crp; @@ -517,7 +519,7 @@ crp.crp_payload_length = tls_comp_len; crp.crp_digest_start = crp.crp_payload_start + crp.crp_payload_length; - crp.crp_op = CRYPTO_OP_DECRYPT | CRYPTO_OP_VERIFY_DIGEST; + crp.crp_op = crp_op; crp.crp_flags = CRYPTO_F_CBIMM | CRYPTO_F_IV_SEPARATE; crypto_use_mbuf(&crp, m); @@ -528,7 +530,107 @@ error = ktls_ocf_dispatch(os, &crp); crypto_destroyreq(&crp); + return (error); +} + +static int +ktls_ocf_tls12_aead_decrypt(struct ktls_session *tls, + const struct tls_record_layer *hdr, struct mbuf *m, uint64_t seqno, + int *trailer_len) +{ + *trailer_len = tls->params.tls_tlen; + + return (ktls_ocf_tls12_aead_xxcrypt(tls, hdr, m, seqno, + CRYPTO_OP_DECRYPT | CRYPTO_OP_VERIFY_DIGEST)); +} + +static struct mbuf * +ktls_ocf_zero_dup(struct mbuf *m, int *psize) +{ + struct mbuf *top; + struct mbuf **pp; + + MPASS(m != NULL); + + pp = ⊤ + *psize = 0; + + do { + /* round up to nearest 32-bits */ + const int sz = (m->m_len + 3) & ~3; + /* get an mbuf */ + *pp = m_get2(sz, M_WAITOK, MT_DATA, M_NOTREADY); + (*pp)->m_len = m->m_len; + (*psize) += m->m_len; + memset(mtod(*pp, void *), 0, sz); + pp = &(*pp)->m_next; + } while ((m = m->m_next) != NULL); + + *pp = NULL; + + return (top); +} + +/* + * Reconstruct encrypted mbuf data in input buffer. + */ +static void +ktls_ocf_recrypt_fixup(struct mbuf *m0, struct mbuf *m1) +{ + while (m0 != NULL) { + MPASS(m1 != NULL); + MPASS(m0->m_len == m1->m_len); + + if (m0->m_flags & M_DECRYPTED) { + uintptr_t align = mtod(m0, uintptr_t) | mtod(m1, uintptr_t); + + /* check if we can do 32-bit XORs */ + if ((align & 3) == 0) { + for (int off = 0; off != howmany(m0->m_len, 4); off++) + mtod(m0, uint32_t *)[off] ^= mtod(m1, uint32_t *)[off]; + } else { + for (int off = 0; off != m0->m_len; off++) + mtod(m0, uint8_t *)[off] ^= mtod(m1, uint8_t *)[off]; + } + } + m0 = m0->m_next; + m1 = m1->m_next; + } + + MPASS(m0 == NULL); + MPASS(m1 == NULL); +} + +static const uint8_t ktls_ocf_zero_hash[MAX(AES_GMAC_HASH_LEN, POLY1305_HASH_LEN)]; + +static int +ktls_ocf_tls12_aead_recrypt(struct ktls_session *tls, + const struct tls_record_layer *hdr, struct mbuf *m, + uint64_t seqno, int *trailer_len) +{ + struct mbuf *mzero; + int error; + int size; + *trailer_len = tls->params.tls_tlen; + + mzero = ktls_ocf_zero_dup(m, &size); + + error = ktls_ocf_tls12_aead_xxcrypt(tls, hdr, mzero, seqno, + CRYPTO_OP_ENCRYPT | CRYPTO_OP_COMPUTE_DIGEST); + + if (__predict_true(error == 0)) { + /* Keep the hash tag intact. */ + m_copyback(mzero, size - *trailer_len, *trailer_len, ktls_ocf_zero_hash); + + ktls_ocf_recrypt_fixup(m, mzero); + + error = ktls_ocf_tls12_aead_xxcrypt(tls, hdr, m, seqno, + CRYPTO_OP_DECRYPT | CRYPTO_OP_VERIFY_DIGEST); + } + + m_freem(mzero); + return (error); } @@ -611,9 +713,9 @@ } static int -ktls_ocf_tls13_aead_decrypt(struct ktls_session *tls, +ktls_ocf_tls13_aead_xxcrypt(struct ktls_session *tls, const struct tls_record_layer *hdr, struct mbuf *m, uint64_t seqno, - int *trailer_len) + int crp_op) { struct tls_aead_data_13 ad; struct cryptop crp; @@ -647,7 +749,7 @@ crp.crp_payload_length = ntohs(hdr->tls_length) - tag_len; crp.crp_digest_start = crp.crp_payload_start + crp.crp_payload_length; - crp.crp_op = CRYPTO_OP_DECRYPT | CRYPTO_OP_VERIFY_DIGEST; + crp.crp_op = crp_op; crp.crp_flags = CRYPTO_F_CBIMM | CRYPTO_F_IV_SEPARATE; crypto_use_mbuf(&crp, m); @@ -658,7 +760,48 @@ error = ktls_ocf_dispatch(os, &crp); crypto_destroyreq(&crp); - *trailer_len = tag_len; + return (error); +} + +static int +ktls_ocf_tls13_aead_decrypt(struct ktls_session *tls, + const struct tls_record_layer *hdr, struct mbuf *m, uint64_t seqno, + int *trailer_len) +{ + *trailer_len = tls->params.tls_tlen - 1; + + return (ktls_ocf_tls13_aead_xxcrypt(tls, hdr, m, seqno, + CRYPTO_OP_DECRYPT | CRYPTO_OP_VERIFY_DIGEST)); +} + +static int +ktls_ocf_tls13_aead_recrypt(struct ktls_session *tls, + const struct tls_record_layer *hdr, struct mbuf *m, + uint64_t seqno, int *trailer_len) +{ + struct mbuf *mzero; + int error; + int size; + + *trailer_len = tls->params.tls_tlen - 1; + + mzero = ktls_ocf_zero_dup(m, &size); + + error = ktls_ocf_tls13_aead_xxcrypt(tls, hdr, mzero, seqno, + CRYPTO_OP_ENCRYPT | CRYPTO_OP_COMPUTE_DIGEST); + + if (__predict_true(error == 0)) { + /* Keep the hash tag intact. */ + m_copyback(mzero, size - *trailer_len, *trailer_len, ktls_ocf_zero_hash); + + ktls_ocf_recrypt_fixup(m, mzero); + + error = ktls_ocf_tls13_aead_xxcrypt(tls, hdr, m, seqno, + CRYPTO_OP_DECRYPT | CRYPTO_OP_VERIFY_DIGEST); + } + + m_freem(mzero); + return (error); } @@ -806,15 +949,21 @@ if (tls->params.cipher_algorithm == CRYPTO_AES_NIST_GCM_16 || tls->params.cipher_algorithm == CRYPTO_CHACHA20_POLY1305) { if (direction == KTLS_TX) { - if (tls->params.tls_vminor == TLS_MINOR_VER_THREE) + if (tls->params.tls_vminor == TLS_MINOR_VER_THREE) { + tls->sw_decrypt = NULL; tls->sw_encrypt = ktls_ocf_tls13_aead_encrypt; - else + } else { + tls->sw_decrypt = NULL; tls->sw_encrypt = ktls_ocf_tls12_aead_encrypt; + } } else { - if (tls->params.tls_vminor == TLS_MINOR_VER_THREE) + if (tls->params.tls_vminor == TLS_MINOR_VER_THREE) { tls->sw_decrypt = ktls_ocf_tls13_aead_decrypt; - else + tls->sw_recrypt = ktls_ocf_tls13_aead_recrypt; + } else { tls->sw_decrypt = ktls_ocf_tls12_aead_decrypt; + tls->sw_recrypt = ktls_ocf_tls12_aead_recrypt; + } } } else { tls->sw_encrypt = ktls_ocf_tls_cbc_encrypt; diff --git a/sys/sys/ktls.h b/sys/sys/ktls.h --- a/sys/sys/ktls.h +++ b/sys/sys/ktls.h @@ -180,16 +180,20 @@ int (*sw_encrypt)(struct ktls_ocf_encrypt_state *state, struct ktls_session *tls, struct mbuf *m, struct iovec *outiov, int outiovcnt); - int (*sw_decrypt)(struct ktls_session *tls, + int (*sw_recrypt)(struct ktls_session *tls, const struct tls_record_layer *hdr, struct mbuf *m, uint64_t seqno, int *trailer_len); }; + int (*sw_decrypt)(struct ktls_session *tls, + const struct tls_record_layer *hdr, struct mbuf *m, + uint64_t seqno, int *trailer_len); struct ktls_ocf_session *ocf_session; struct m_snd_tag *snd_tag; struct tls_session_params params; u_int wq_index; volatile u_int refcount; int mode; + int direction; struct task reset_tag_task; struct task disable_ifnet_task; @@ -207,6 +211,10 @@ extern unsigned int ktls_ifnet_max_rexmit_pct; void ktls_check_rx(struct sockbuf *sb); +int ktls_mbuf_crypto_state(struct mbuf *mb, int offset, int len); +#define KTLS_MBUF_CRYPTO_ST_MIXED 0 +#define KTLS_MBUF_CRYPTO_ST_ENCRYPTED 1 +#define KTLS_MBUF_CRYPTO_ST_DECRYPTED -1 void ktls_disable_ifnet(void *arg); int ktls_enable_rx(struct socket *so, struct tls_enable *en); int ktls_enable_tx(struct socket *so, struct tls_enable *en); @@ -220,7 +228,7 @@ int ktls_set_tx_mode(struct socket *so, int mode); int ktls_get_tx_mode(struct socket *so, int *modep); int ktls_get_rx_sequence(struct inpcb *inp, uint32_t *tcpseq, uint64_t *tlsseq); -int ktls_output_eagain(struct inpcb *inp, struct ktls_session *tls); +int ktls_output_eagain(struct inpcb *inp); #ifdef RATELIMIT int ktls_modify_txrtlmt(struct ktls_session *tls, uint64_t max_pacing_rate); #endif diff --git a/sys/sys/sockbuf.h b/sys/sys/sockbuf.h --- a/sys/sys/sockbuf.h +++ b/sys/sys/sockbuf.h @@ -53,6 +53,7 @@ #define SB_STOP 0x1000 /* backpressure indicator */ #define SB_AIO_RUNNING 0x2000 /* AIO operation running */ #define SB_TLS_IFNET 0x4000 /* has used / is using ifnet KTLS */ +#define SB_TLS_RX_RESYNC 0x8000 /* KTLS RX lost HW sync */ #define SBS_CANTSENDMORE 0x0010 /* can't send more data to peer */ #define SBS_CANTRCVMORE 0x0020 /* can't receive more data from peer */