diff --git a/sys/kern/uipc_ktls.c b/sys/kern/uipc_ktls.c --- a/sys/kern/uipc_ktls.c +++ b/sys/kern/uipc_ktls.c @@ -291,7 +291,7 @@ static void ktls_cleanup(struct ktls_session *tls); #if defined(INET) || defined(INET6) -static void ktls_reset_send_tag(void *context, int pending); +static void ktls_reset_send_receive_tag(void *context, int pending); #endif static void ktls_work_thread(void *ctx); static void ktls_alloc_thread(void *ctx); @@ -482,7 +482,7 @@ #if defined(INET) || defined(INET6) static int ktls_create_session(struct socket *so, struct tls_enable *en, - struct ktls_session **tlsp) + struct ktls_session **tlsp, int direction) { struct ktls_session *tls; int error; @@ -579,9 +579,10 @@ counter_u64_add(ktls_offload_active, 1); refcount_init(&tls->refcount, 1); - TASK_INIT(&tls->reset_tag_task, 0, ktls_reset_send_tag, tls); + TASK_INIT(&tls->reset_tag_task, 0, ktls_reset_send_receive_tag, tls); tls->wq_index = ktls_get_cpu(so); + tls->direction = direction; tls->params.cipher_algorithm = en->cipher_algorithm; tls->params.auth_algorithm = en->auth_algorithm; @@ -711,11 +712,12 @@ counter_u64_add(ktls_offload_active, 1); refcount_init(&tls_new->refcount, 1); - TASK_INIT(&tls_new->reset_tag_task, 0, ktls_reset_send_tag, tls_new); + TASK_INIT(&tls_new->reset_tag_task, 0, ktls_reset_send_receive_tag, tls_new); /* Copy fields from existing session. */ tls_new->params = tls->params; tls_new->wq_index = tls->wq_index; + tls_new->direction = tls->direction; /* Deep copy keys. */ if (tls_new->params.auth_key != NULL) { @@ -765,8 +767,8 @@ counter_u64_add(ktls_ifnet_chacha20, -1); break; } - if (tls->snd_tag != NULL) - m_snd_tag_rele(tls->snd_tag); + if (tls->snd_rcv_tag != NULL) + m_snd_tag_rele(tls->snd_rcv_tag); break; #ifdef TCP_OFFLOAD case TCP_TLS_MODE_TOE: @@ -948,28 +950,137 @@ return (error); } +/* + * Common code for allocating a TLS receive tag for doing HW + * decryption of TLS data. + * + * This function allocates a new TLS receive tag on whatever interface + * the connection is currently routed over. + */ static int -ktls_try_ifnet(struct socket *so, struct ktls_session *tls, bool force) +ktls_alloc_rcv_tag(struct inpcb *inp, struct ktls_session *tls, bool force, + struct m_snd_tag **mstp) { - struct m_snd_tag *mst; + union if_snd_tag_alloc_params params; + struct ifnet *ifp; + struct nhop_object *nh; + struct tcpcb *tp; int error; - error = ktls_alloc_snd_tag(so->so_pcb, tls, force, &mst); - if (error == 0) { - tls->mode = TCP_TLS_MODE_IFNET; - tls->snd_tag = mst; - switch (tls->params.cipher_algorithm) { - case CRYPTO_AES_CBC: - counter_u64_add(ktls_ifnet_cbc, 1); - break; - case CRYPTO_AES_NIST_GCM_16: - counter_u64_add(ktls_ifnet_gcm, 1); - break; - case CRYPTO_CHACHA20_POLY1305: - counter_u64_add(ktls_ifnet_chacha20, 1); - break; + INP_RLOCK(inp); + if (inp->inp_flags2 & INP_FREED) { + INP_RUNLOCK(inp); + return (ECONNRESET); + } + if (inp->inp_flags & (INP_TIMEWAIT | INP_DROPPED)) { + INP_RUNLOCK(inp); + return (ECONNRESET); + } + if (inp->inp_socket == NULL) { + INP_RUNLOCK(inp); + return (ECONNRESET); + } + tp = intotcpcb(inp); + + /* + * Check administrative controls on ifnet TLS to determine if + * ifnet TLS should be denied. + * + * - Always permit 'force' requests. + * - ktls_ifnet_permitted == 0: always deny. + */ + if (!force && ktls_ifnet_permitted == 0) { + INP_RUNLOCK(inp); + return (ENXIO); + } + + /* + * XXX: Use the cached route in the inpcb to find the + * interface. This should perhaps instead use + * rtalloc1_fib(dst, 0, 0, fibnum). Since KTLS is only + * enabled after a connection has completed key negotiation in + * userland, the cached route will be present in practice. + */ + nh = inp->inp_route.ro_nh; + if (nh == NULL) { + INP_RUNLOCK(inp); + return (ENXIO); + } + ifp = nh->nh_ifp; + if_ref(ifp); + + params.hdr.type = IF_SND_TAG_TYPE_TLS_RX; + params.hdr.flowid = inp->inp_flowid; + params.hdr.flowtype = inp->inp_flowtype; + params.hdr.numa_domain = inp->inp_numa_domain; + params.tls_rx.inp = inp; + params.tls_rx.tls = tls; + params.tls_rx.next_tls_hdr_tcp_sn = + tp->rcv_nxt - sbavail(&inp->inp_socket->so_rcv); + + INP_RUNLOCK(inp); + + if ((ifp->if_capenable & IFCAP_MEXTPG) == 0) { + error = EOPNOTSUPP; + goto out; + } + + /* XXX reusing TXTLS flags */ + if (inp->inp_vflag & INP_IPV6) { + if ((ifp->if_capenable & IFCAP_TXTLS6) == 0) { + error = EOPNOTSUPP; + goto out; } + } else { + if ((ifp->if_capenable & IFCAP_TXTLS4) == 0) { + error = EOPNOTSUPP; + goto out; + } + } + error = m_snd_tag_alloc(ifp, ¶ms, mstp); +out: + if_rele(ifp); + return (error); +} + +static int +ktls_try_ifnet(struct socket *so, struct ktls_session *tls, int direction, bool force) +{ + struct m_snd_tag *mst; + int error; + + switch (direction) { + case KTLS_TX: + error = ktls_alloc_snd_tag(so->so_pcb, tls, force, &mst); + if (__predict_false(error != 0)) + goto done; + break; + case KTLS_RX: + error = ktls_alloc_rcv_tag(so->so_pcb, tls, force, &mst); + if (__predict_false(error != 0)) + goto done; + break; + default: + return (EINVAL); + } + + tls->mode = TCP_TLS_MODE_IFNET; + tls->snd_rcv_tag = mst; + + switch (tls->params.cipher_algorithm) { + case CRYPTO_AES_CBC: + counter_u64_add(ktls_ifnet_cbc, 1); + break; + case CRYPTO_AES_NIST_GCM_16: + counter_u64_add(ktls_ifnet_gcm, 1); + break; + case CRYPTO_CHACHA20_POLY1305: + counter_u64_add(ktls_ifnet_chacha20, 1); + break; + default: + break; } +done: return (error); } @@ -1155,7 +1266,7 @@ en->tls_vminor == TLS_MINOR_VER_THREE) return (ENOTSUP); - error = ktls_create_session(so, en, &tls); + error = ktls_create_session(so, en, &tls, KTLS_RX); if (error) return (error); @@ -1176,10 +1287,13 @@ ktls_check_rx(&so->so_rcv); SOCKBUF_UNLOCK(&so->so_rcv); + /* Prefer TOE -> ifnet TLS -> software TLS. */ #ifdef TCP_OFFLOAD error = ktls_try_toe(so, tls, KTLS_RX); if (error) #endif + error = ktls_try_ifnet(so, tls, KTLS_RX, false); + if (error) ktls_use_sw(tls); counter_u64_add(ktls_offload_total, 1); @@ -1222,7 +1336,7 @@ if (mb_use_ext_pgs == 0) return (ENXIO); - error = ktls_create_session(so, en, &tls); + error = ktls_create_session(so, en, &tls, KTLS_TX); if (error) return (error); @@ -1231,7 +1345,7 @@ error = ktls_try_toe(so, tls, KTLS_TX); if (error) #endif - error = ktls_try_ifnet(so, tls, false); + error = ktls_try_ifnet(so, tls, KTLS_TX, false); if (error) error = ktls_try_sw(so, tls, KTLS_TX); @@ -1348,7 +1462,7 @@ tls_new = ktls_clone_session(tls); if (mode == TCP_TLS_MODE_IFNET) - error = ktls_try_ifnet(so, tls_new, true); + error = ktls_try_ifnet(so, tls_new, KTLS_TX, true); else error = ktls_try_sw(so, tls_new, KTLS_TX); if (error) { @@ -1407,19 +1521,21 @@ } /* - * Try to allocate a new TLS send tag. This task is scheduled when - * ip_output detects a route change while trying to transmit a packet - * holding a TLS record. If a new tag is allocated, replace the tag - * in the TLS session. Subsequent packets on the connection will use - * the new tag. If a new tag cannot be allocated, drop the - * connection. + * ktls_reset_send_receive_tag - try to allocate a new TLS send or receive tag. + * + * This task is scheduled when ip_output detects a route change while + * trying to transmit a packet holding a TLS record. If a new tag is + * allocated, replace the tag in the TLS session. Subsequent packets + * on the connection will use the new tag. If a new tag cannot be + * allocated, drop the connection. */ static void -ktls_reset_send_tag(void *context, int pending) +ktls_reset_send_receive_tag(void *context, int pending) { struct epoch_tracker et; struct ktls_session *tls; - struct m_snd_tag *old, *new; + struct m_snd_tag *snd_rcv_old; + struct m_snd_tag *snd_rcv_new; struct inpcb *inp; struct tcpcb *tp; int error; @@ -1435,72 +1551,81 @@ * an ifp mismatch and drop packets until a new tag is * allocated. * - * Write-lock the INP when changing tls->snd_tag since + * Write-lock the INP when changing tls->snd_rcv_tag since * ip[6]_output_send() holds a read-lock when reading the * pointer. */ INP_WLOCK(inp); - old = tls->snd_tag; - tls->snd_tag = NULL; + snd_rcv_old = tls->snd_rcv_tag; + tls->snd_rcv_tag = NULL; INP_WUNLOCK(inp); - if (old != NULL) - m_snd_tag_rele(old); - error = ktls_alloc_snd_tag(inp, tls, true, &new); + if (snd_rcv_old != NULL) + m_snd_tag_rele(snd_rcv_old); - if (error == 0) { - INP_WLOCK(inp); - tls->snd_tag = new; - mtx_pool_lock(mtxpool_sleep, tls); - tls->reset_pending = false; - mtx_pool_unlock(mtxpool_sleep, tls); - if (!in_pcbrele_wlocked(inp)) - INP_WUNLOCK(inp); + switch (tls->direction) { + case KTLS_TX: + error = ktls_alloc_snd_tag(inp, tls, true, &snd_rcv_new); + break; + case KTLS_RX: + error = ktls_alloc_rcv_tag(inp, tls, true, &snd_rcv_new); + break; + default: + goto drop_connection; + } + if (error != 0) + goto drop_connection; - counter_u64_add(ktls_ifnet_reset, 1); + INP_WLOCK(inp); + tls->snd_rcv_tag = snd_rcv_new; - /* - * XXX: Should we kick tcp_output explicitly now that - * the send tag is fixed or just rely on timers? - */ - } else { - NET_EPOCH_ENTER(et); - INP_WLOCK(inp); - if (!in_pcbrele_wlocked(inp)) { - if (!(inp->inp_flags & INP_TIMEWAIT) && - !(inp->inp_flags & INP_DROPPED)) { - tp = intotcpcb(inp); - CURVNET_SET(tp->t_vnet); - tp = tcp_drop(tp, ECONNABORTED); - CURVNET_RESTORE(); - if (tp != NULL) - INP_WUNLOCK(inp); - counter_u64_add(ktls_ifnet_reset_dropped, 1); - } else - INP_WUNLOCK(inp); - } - NET_EPOCH_EXIT(et); + mtx_pool_lock(mtxpool_sleep, tls); + tls->reset_pending = false; + mtx_pool_unlock(mtxpool_sleep, tls); - counter_u64_add(ktls_ifnet_reset_failed, 1); + if (!in_pcbrele_wlocked(inp)) + INP_WUNLOCK(inp); - /* - * Leave reset_pending true to avoid future tasks while - * the socket goes away. - */ - } + counter_u64_add(ktls_ifnet_reset, 1); ktls_free(tls); -} -int -ktls_output_eagain(struct inpcb *inp, struct ktls_session *tls) -{ + /* + * XXX: Should we kick tcp_output explicitly now that + * the send tag is fixed or just rely on timers? + */ + return; - if (inp == NULL) - return (ENOBUFS); +drop_connection: + NET_EPOCH_ENTER(et); + INP_WLOCK(inp); + if (!in_pcbrele_wlocked(inp)) { + if (!(inp->inp_flags & INP_TIMEWAIT) && + !(inp->inp_flags & INP_DROPPED)) { + tp = intotcpcb(inp); + CURVNET_SET(tp->t_vnet); + tp = tcp_drop(tp, ECONNABORTED); + CURVNET_RESTORE(); + if (tp != NULL) + INP_WUNLOCK(inp); + counter_u64_add(ktls_ifnet_reset_dropped, 1); + } else + INP_WUNLOCK(inp); + } + NET_EPOCH_EXIT(et); - INP_LOCK_ASSERT(inp); + counter_u64_add(ktls_ifnet_reset_failed, 1); + /* + * Leave reset_pending true to avoid future tasks while + * the socket goes away. + */ + ktls_free(tls); +} + +static void +ktls_output_eagain_tls(struct inpcb *inp, struct ktls_session *tls) +{ /* * See if we should schedule a task to update the send tag for * this session. @@ -1514,6 +1639,30 @@ taskqueue_enqueue(taskqueue_thread, &tls->reset_tag_task); } mtx_pool_unlock(mtxpool_sleep, tls); +} + +int +ktls_output_eagain(struct inpcb *inp) +{ + struct socket *so; + struct ktls_session *tls; + + if (__predict_false(inp == NULL)) + goto done; + INP_LOCK_ASSERT(inp); + + so = inp->inp_socket; + if (__predict_false(so == NULL)) + goto done; + + tls = so->so_rcv.sb_tls_info; + if (__predict_true(tls != NULL)) + ktls_output_eagain_tls(inp, tls); + + tls = so->so_snd.sb_tls_info; + if (__predict_true(tls != NULL)) + ktls_output_eagain_tls(inp, tls); +done: return (ENOBUFS); } @@ -1532,7 +1681,7 @@ MPASS(tls->mode == TCP_TLS_MODE_IFNET); - if (tls->snd_tag == NULL) { + if (tls->snd_rcv_tag == NULL) { /* * Resetting send tag, ignore this change. The * pending reset may or may not see this updated rate @@ -1542,10 +1691,11 @@ return (0); } - MPASS(tls->snd_tag != NULL); - MPASS(tls->snd_tag->sw->type == IF_SND_TAG_TYPE_TLS_RATE_LIMIT); + mst = tls->snd_rcv_tag; + + MPASS(mst != NULL); + MPASS(mst->sw->type == IF_SND_TAG_TYPE_TLS_RATE_LIMIT); - mst = tls->snd_tag; return (mst->sw->snd_tag_modify(mst, ¶ms)); } #endif @@ -1814,7 +1964,7 @@ return (NULL); } } - n->m_flags |= M_NOTREADY; + n->m_flags |= M_NOTREADY | (m->m_flags & M_DECRYPTED); /* Store remainder in 'n'. */ n->m_len = m->m_len - remain; @@ -1852,6 +2002,76 @@ return (top); } +/* + * Check if a mbuf chain is fully decrypted at the given offset and + * length. Returns true if all data is decrypted. Else false. + */ +bool +ktls_mbuf_is_decrypted(struct mbuf *mb, int offset, int len) +{ + for (; mb != NULL; mb = mb->m_next) { + if (mb->m_len >= offset) { + offset -= mb->m_len; + continue; + } + offset += len; + break; + } + + for (; mb != NULL; mb = mb->m_next) { + if (!(mb->m_flags & M_DECRYPTED)) + return (false); + if (mb->m_len > offset) { + offset -= mb->m_len; + continue; + } + break; + } + MPASS(mb != NULL || offset == 0); + return (true); +} + +/* + * ktls_resync_ifnet - get TLS RX back on track after packet loss + */ +static int +ktls_resync_ifnet(struct socket *so) +{ + union if_snd_tag_modify_params params; + struct m_snd_tag *mst; + struct inpcb *inp; + struct tcpcb *tp; + + mst = so->so_rcv.sb_tls_info->snd_rcv_tag; + if (__predict_false(mst == NULL)) + return (EINVAL); + + inp = sotoinpcb(so); + if (__predict_false(inp == NULL)) + return (EINVAL); + + INP_RLOCK(inp); + if (inp->inp_flags2 & INP_FREED) { + INP_RUNLOCK(inp); + return (ECONNRESET); + } + if (inp->inp_flags & (INP_TIMEWAIT | INP_DROPPED)) { + INP_RUNLOCK(inp); + return (ECONNRESET); + } + + tp = intotcpcb(inp); + MPASS(tp != NULL); + + /* Get the TCP sequence number of the next TLS header. */ + params.tls_rx.next_tls_hdr_tcp_sn = + tp->rcv_nxt - sbavail(&so->so_rcv); + INP_RUNLOCK(inp); + + MPASS(mst->sw->type == IF_SND_TAG_TYPE_TLS_RX); + return (mst->sw->snd_tag_modify(mst, ¶ms)); +} + static void ktls_decrypt(struct socket *so) { @@ -2025,6 +2245,7 @@ sorwakeup_locked(so); + ktls_resync_ifnet(so); deref: SOCKBUF_UNLOCK_ASSERT(sb); diff --git a/sys/net/if_var.h b/sys/net/if_var.h --- a/sys/net/if_var.h +++ b/sys/net/if_var.h @@ -192,7 +192,8 @@ #define IF_SND_TAG_TYPE_UNLIMITED 1 #define IF_SND_TAG_TYPE_TLS 2 #define IF_SND_TAG_TYPE_TLS_RATE_LIMIT 3 -#define IF_SND_TAG_TYPE_MAX 4 +#define IF_SND_TAG_TYPE_TLS_RX 4 +#define IF_SND_TAG_TYPE_MAX 5 struct if_snd_tag_alloc_header { uint32_t type; /* send tag type, see IF_SND_TAG_XXX */ @@ -214,6 +215,15 @@ const struct ktls_session *tls; }; +struct if_snd_tag_alloc_tls_rx { + struct if_snd_tag_alloc_header hdr; + struct inpcb *inp; + const struct ktls_session *tls; + + /* TCP sequence number in host endian format */ + uint32_t next_tls_hdr_tcp_sn; +}; + struct if_snd_tag_alloc_tls_rate_limit { struct if_snd_tag_alloc_header hdr; struct inpcb *inp; @@ -229,11 +239,17 @@ uint32_t flags; /* M_NOWAIT or M_WAITOK */ }; +struct if_snd_tag_modify_tls_rx { + /* TCP sequence number in host endian format */ + uint32_t next_tls_hdr_tcp_sn; +}; + union if_snd_tag_alloc_params { struct if_snd_tag_alloc_header hdr; struct if_snd_tag_alloc_rate_limit rate_limit; struct if_snd_tag_alloc_rate_limit unlimited; struct if_snd_tag_alloc_tls tls; + struct if_snd_tag_alloc_tls_rx tls_rx; struct if_snd_tag_alloc_tls_rate_limit tls_rate_limit; }; @@ -241,6 +257,7 @@ struct if_snd_tag_rate_limit_params rate_limit; struct if_snd_tag_rate_limit_params unlimited; struct if_snd_tag_rate_limit_params tls_rate_limit; + struct if_snd_tag_modify_tls_rx tls_rx; }; union if_snd_tag_query_params { diff --git a/sys/netinet/ip_output.c b/sys/netinet/ip_output.c --- a/sys/netinet/ip_output.c +++ b/sys/netinet/ip_output.c @@ -280,7 +280,7 @@ #ifdef KERN_TLS if (tls != NULL) { if (error == EAGAIN) - error = ktls_output_eagain(inp, tls); + error = ktls_output_eagain(inp); ktls_free(tls); } #endif diff --git a/sys/netinet/tcp_ratelimit.c b/sys/netinet/tcp_ratelimit.c --- a/sys/netinet/tcp_ratelimit.c +++ b/sys/netinet/tcp_ratelimit.c @@ -1361,7 +1361,7 @@ * tag to a TLS ratelimit tag. */ MPASS(tls->snd_tag->sw->type == IF_SND_TAG_TYPE_TLS); - ktls_output_eagain(tp->t_inpcb, tls); + ktls_output_eagain(tp->t_inpcb); } #endif } else { diff --git a/sys/netinet6/ip6_output.c b/sys/netinet6/ip6_output.c --- a/sys/netinet6/ip6_output.c +++ b/sys/netinet6/ip6_output.c @@ -392,7 +392,7 @@ #ifdef KERN_TLS if (tls != NULL) { if (error == EAGAIN) - error = ktls_output_eagain(inp, tls); + error = ktls_output_eagain(inp); ktls_free(tls); } #endif diff --git a/sys/sys/ktls.h b/sys/sys/ktls.h --- a/sys/sys/ktls.h +++ b/sys/sys/ktls.h @@ -185,11 +185,12 @@ uint64_t seqno, int *trailer_len); }; struct ktls_ocf_session *ocf_session; - struct m_snd_tag *snd_tag; + struct m_snd_tag *snd_rcv_tag; struct tls_session_params params; u_int wq_index; volatile u_int refcount; int mode; + int direction; struct task reset_tag_task; struct task disable_ifnet_task; @@ -202,6 +203,7 @@ extern unsigned int ktls_ifnet_max_rexmit_pct; void ktls_check_rx(struct sockbuf *sb); +bool ktls_mbuf_is_decrypted(struct mbuf *mb, int offset, int len); void ktls_disable_ifnet(void *arg); int ktls_enable_rx(struct socket *so, struct tls_enable *en); int ktls_enable_tx(struct socket *so, struct tls_enable *en); @@ -214,7 +216,7 @@ int ktls_get_rx_mode(struct socket *so, int *modep); int ktls_set_tx_mode(struct socket *so, int mode); int ktls_get_tx_mode(struct socket *so, int *modep); -int ktls_output_eagain(struct inpcb *inp, struct ktls_session *tls); +int ktls_output_eagain(struct inpcb *inp); #ifdef RATELIMIT int ktls_modify_txrtlmt(struct ktls_session *tls, uint64_t max_pacing_rate); #endif