Index: sbin/ifconfig/ifconfig.8 =================================================================== --- sbin/ifconfig/ifconfig.8 +++ sbin/ifconfig/ifconfig.8 @@ -538,6 +538,28 @@ If the driver supports .Xr tcp 4 large receive offloading, disable LRO on the interface. +.It Cm txtls +Transmit TLS offload encrypts Transport Layer Security (TLS) records and +segments the encrypted record into one or more +.Xr tcp 4 +segments over either +.Xr ip 4 +or +.Xr ip6 4 . +If the driver supports transmit TLS offload, +enable transmit TLS offload on the interface. +Some drivers may not be able to support transmit TLS offload for +.Xr ip 4 +and +.Xr ip6 4 +packets, so they may enable only one of them. +.It Fl txtls +If the driver supports transmit TLS offload, +disable transmit TLS offload on the interface. +It will always disable TLS for +.Xr ip 4 +and +.Xr ip6 4 . .It Cm nomap If the driver supports unmapped network buffers, enable them on the interface. Index: sbin/ifconfig/ifconfig.c =================================================================== --- sbin/ifconfig/ifconfig.c +++ sbin/ifconfig/ifconfig.c @@ -1257,7 +1257,7 @@ "\020\1RXCSUM\2TXCSUM\3NETCONS\4VLAN_MTU\5VLAN_HWTAGGING\6JUMBO_MTU\7POLLING" \ "\10VLAN_HWCSUM\11TSO4\12TSO6\13LRO\14WOL_UCAST\15WOL_MCAST\16WOL_MAGIC" \ "\17TOE4\20TOE6\21VLAN_HWFILTER\23VLAN_HWTSO\24LINKSTATE\25NETMAP" \ -"\26RXCSUM_IPV6\27TXCSUM_IPV6\31TXRTLMT\32HWRXTSTMP\33NOMAP" +"\26RXCSUM_IPV6\27TXCSUM_IPV6\31TXRTLMT\32HWRXTSTMP\33NOMAP\34TXTLS4\35TXTLS6" /* * Print the status of the interface. If an address family was @@ -1585,6 +1585,8 @@ DEF_CMD("-toe", -IFCAP_TOE, setifcap), DEF_CMD("lro", IFCAP_LRO, setifcap), DEF_CMD("-lro", -IFCAP_LRO, setifcap), + DEF_CMD("txtls", IFCAP_TXTLS, setifcap), + DEF_CMD("-txtls", -IFCAP_TXTLS, setifcap), DEF_CMD("wol", IFCAP_WOL, setifcap), DEF_CMD("-wol", -IFCAP_WOL, setifcap), DEF_CMD("wol_ucast", IFCAP_WOL_UCAST, setifcap), Index: share/man/man4/tcp.4 =================================================================== --- share/man/man4/tcp.4 +++ share/man/man4/tcp.4 @@ -293,6 +293,51 @@ the system does not send any outgoing segments and drops any inbound segments. .Pp Each dropped segment is taken into account in the TCP protocol statistics. +.It Dv TCP_TXTLS_ENABLE +Enable in-kernel Transport Layer Security (TLS) for data written to this +socket. +The +.Vt struct tls_so_enable +argument defines the encryption and authentication algorithms and keys +used to encrypt the socket data as well as the maximum TLS record +payload size. +.Pp +All data written to this socket will be encapsulated in TLS records +and subsequently encrypted. +By default all data written to this socket is treated as application data. +Individual TLS records with a type other than application data +(for example, handshake messages), +may be transmitted by invoking +.Xr sendmsg 2 +with a custom TLS record type set in a +.Dv TLS_SET_RECORD_TYPE +control message. +The payload of this control message is a single byte holding the desired +TLS record type. +.Pp +Data read from this socket will still be encrypted and must be parsed by +a TLS-aware consumer. +.Pp +At present, only a single key may be set on a socket. +As such, users of this option must disable rekeying. +.It Dv TCP_TXTLS_MODE +The integer argument can be used to get or set the current TLS mode of a +socket. +Setting the mode can only used to toggle between software and NIC TLS after +TLS has been initially enabled via the +.Dv TCP_TXTLS_ENABLE +option. +The available modes are: +.Bl -tag -width "Dv TCP_TLS_MODE_IFNET" +.It Dv TCP_TLS_MODE_NONE +In-kernel TLS framing and encryption is not enabled for this socket. +.It Dv TCP_TLS_MODE_SW +TLS records are encrypted by the kernel prior to placing the data in the +socket buffer. +Typically this encryption is performed in software. +.It Dv TCP_TLS_MODE_IFNET +TLS records are encrypted by the network interface card (NIC). +.El .El .Pp The option level for the Index: sys/conf/NOTES =================================================================== --- sys/conf/NOTES +++ sys/conf/NOTES @@ -668,6 +668,10 @@ options IPSEC_SUPPORT #options IPSEC_DEBUG #debug for IP security + +# TLS framing and encryption of data transmitted over TCP sockets. +options KERN_TLS # TLS transmit offload + # # SMB/CIFS requester # NETSMB enables support for SMB protocol, it requires LIBMCHAIN and LIBICONV Index: sys/conf/files =================================================================== --- sys/conf/files +++ sys/conf/files @@ -3868,6 +3868,7 @@ kern/uipc_accf.c standard kern/uipc_debug.c optional ddb kern/uipc_domain.c standard +kern/uipc_ktls.c optional kern_tls kern/uipc_mbuf.c standard kern/uipc_mbuf2.c standard kern/uipc_mbufhash.c standard Index: sys/conf/makeLINT.mk =================================================================== --- sys/conf/makeLINT.mk +++ sys/conf/makeLINT.mk @@ -48,8 +48,10 @@ echo "nodevice netmap" >> ${.TARGET}-NOIP .endif .if ${TARGET} == "arm" - cat ${.TARGET} ${.CURDIR}/NOTES.armv5 > ${.TARGET}-V5 - cat ${.TARGET} ${.CURDIR}/NOTES.armv7 > ${.TARGET}-V7 + cat ${NOTES} ${.CURDIR}/NOTES.armv5 | sed -E -n -f ${MAKELINT_SED} > \ + ${.TARGET}-V5 + cat ${NOTES} ${.CURDIR}/NOTES.armv7 | sed -E -n -f ${MAKELINT_SED} > \ + ${.TARGET}-V7 rm ${.TARGET} .endif .if ${TARGET} == "mips" Index: sys/conf/options =================================================================== --- sys/conf/options +++ sys/conf/options @@ -440,6 +440,7 @@ IPSEC_DEBUG opt_ipsec.h IPSEC_SUPPORT opt_ipsec.h IPSTEALTH +KERN_TLS KRPC LIBALIAS LIBMCHAIN Index: sys/kern/kern_mbuf.c =================================================================== --- sys/kern/kern_mbuf.c +++ sys/kern/kern_mbuf.c @@ -31,6 +31,7 @@ __FBSDID("$FreeBSD$"); #include "opt_param.h" +#include "opt_kern_tls.h" #include #include @@ -41,10 +42,12 @@ #include #include #include +#include #include #include #include #include +#include #include #include #include @@ -112,10 +115,10 @@ int nmbjumbo9; /* limits number of 9k jumbo clusters */ int nmbjumbo16; /* limits number of 16k jumbo clusters */ -bool mb_use_ext_pgs; /* use EXT_PGS mbufs for sendfile */ +bool mb_use_ext_pgs; /* use EXT_PGS mbufs for sendfile & TLS */ SYSCTL_BOOL(_kern_ipc, OID_AUTO, mb_use_ext_pgs, CTLFLAG_RWTUN, &mb_use_ext_pgs, 0, - "Use unmapped mbufs for sendfile(2)"); + "Use unmapped mbufs for sendfile(2) and TLS offload"); static quad_t maxmbufmem; /* overall real memory limit for all mbufs */ @@ -1281,13 +1284,27 @@ uma_zfree(zone_jumbo16, m->m_ext.ext_buf); uma_zfree(zone_mbuf, mref); break; - case EXT_PGS: + case EXT_PGS: { +#ifdef KERN_TLS + struct mbuf_ext_pgs *pgs; + struct ktls_session *tls; +#endif + KASSERT(mref->m_ext.ext_free != NULL, ("%s: ext_free not set", __func__)); mref->m_ext.ext_free(mref); - uma_zfree(zone_extpgs, mref->m_ext.ext_pgs); +#ifdef KERN_TLS + pgs = mref->m_ext.ext_pgs; + tls = pgs->tls; + if (tls != NULL && + !refcount_release_if_not_last(&tls->refcount)) + ktls_enqueue_to_free(pgs); + else +#endif + uma_zfree(zone_extpgs, mref->m_ext.ext_pgs); uma_zfree(zone_mbuf, mref); break; + } case EXT_SFBUF: case EXT_NET_DRV: case EXT_MOD_TYPE: Index: sys/kern/kern_sendfile.c =================================================================== --- sys/kern/kern_sendfile.c +++ sys/kern/kern_sendfile.c @@ -30,12 +30,15 @@ #include __FBSDID("$FreeBSD$"); +#include "opt_kern_tls.h" + #include #include #include #include #include #include +#include #include #include #include @@ -85,6 +88,7 @@ int npages; struct socket *so; struct mbuf *m; + struct ktls_session *tls; vm_page_t pa[]; }; @@ -316,6 +320,15 @@ if (!refcount_release(&sfio->nios)) return; +#ifdef INVARIANTS + if ((sfio->m->m_flags & M_EXT) != 0 && + sfio->m->m_ext.ext_type == EXT_PGS) + KASSERT(sfio->tls == sfio->m->m_ext.ext_pgs->tls, + ("TLS session mismatch")); + else + KASSERT(sfio->tls == NULL, + ("non-ext_pgs mbuf with TLS session")); +#endif CURVNET_SET(so->so_vnet); if (sfio->error) { /* @@ -333,12 +346,29 @@ so->so_error = EIO; mb_free_notready(sfio->m, sfio->npages); +#ifdef KERN_TLS + } else if (sfio->tls != NULL && sfio->tls->sw_encrypt != NULL) { + /* + * I/O operation is complete, but we still need to + * encrypt. We cannot do this in the interrupt thread + * of the disk controller, so forward the mbufs to a + * different thread. + * + * Donate the socket reference from sfio to rather + * than explicitly invoking soref(). + */ + ktls_enqueue(sfio->m, so, sfio->npages); + goto out_with_ref; +#endif } else (void)(so->so_proto->pr_usrreqs->pru_ready)(so, sfio->m, sfio->npages); SOCK_LOCK(so); sorele(so); +#ifdef KERN_TLS +out_with_ref: +#endif CURVNET_RESTORE(); free(sfio, M_TEMP); } @@ -571,6 +601,9 @@ struct vnode *vp; struct vm_object *obj; struct socket *so; +#ifdef KERN_TLS + struct ktls_session *tls; +#endif struct mbuf_ext_pgs *ext_pgs; struct mbuf *m, *mh, *mhtail; struct sf_buf *sf; @@ -579,12 +612,18 @@ struct vattr va; off_t off, sbytes, rem, obj_size; int bsize, error, ext_pgs_idx, hdrlen, max_pgs, softerr; +#ifdef KERN_TLS + int tls_enq_cnt; +#endif bool use_ext_pgs; obj = NULL; so = NULL; m = mh = NULL; sfs = NULL; +#ifdef KERN_TLS + tls = NULL; +#endif hdrlen = sbytes = 0; softerr = 0; use_ext_pgs = false; @@ -621,6 +660,9 @@ * we implement that, but possibly shouldn't. */ (void)sblock(&so->so_snd, SBL_WAIT | SBL_NOINTR); +#ifdef KERN_TLS + tls = ktls_hold(so->so_snd.sb_tls_info); +#endif /* * Loop through the pages of the file, starting with the requested @@ -714,7 +756,14 @@ if (hdr_uio != NULL && hdr_uio->uio_resid > 0) { hdr_uio->uio_td = td; hdr_uio->uio_rw = UIO_WRITE; - mh = m_uiotombuf(hdr_uio, M_WAITOK, space, 0, 0); +#ifdef KERN_TLS + if (tls != NULL) + mh = m_uiotombuf(hdr_uio, M_WAITOK, space, + tls->params.max_frame_len, M_NOMAP); + else +#endif + mh = m_uiotombuf(hdr_uio, M_WAITOK, + space, 0, 0); hdrlen = m_length(mh, &mhtail); space -= hdrlen; /* @@ -788,6 +837,15 @@ sfio->so = so; sfio->error = 0; +#ifdef KERN_TLS + /* + * This doesn't use ktls_hold() because sfio->m will + * also have a reference on 'tls' that will be valid + * for all of sfio's lifetime. + */ + sfio->tls = tls; +#endif + nios = sendfile_swapin(obj, sfio, off, space, npages, rhpages, flags); @@ -802,11 +860,22 @@ * bufs are restricted to TCP as that is what has been * tested. In particular, unmapped mbufs have not * been tested with UNIX-domain sockets. + * + * TLS frames always require unmapped mbufs. */ - if (mb_use_ext_pgs && - so->so_proto->pr_protocol == IPPROTO_TCP) { + if ((mb_use_ext_pgs && + so->so_proto->pr_protocol == IPPROTO_TCP) +#ifdef KERN_TLS + || tls != NULL +#endif + ) { use_ext_pgs = true; - max_pgs = MBUF_PEXT_MAX_PGS; +#ifdef KERN_TLS + if (tls != NULL) + max_pgs = num_pages(tls->params.max_frame_len); + else +#endif + max_pgs = MBUF_PEXT_MAX_PGS; /* Start at last index, to wrap on first use. */ ext_pgs_idx = max_pgs - 1; @@ -985,6 +1054,14 @@ __func__, m_length(m, NULL), space, hdrlen)); CURVNET_SET(so->so_vnet); +#ifdef KERN_TLS + if (tls != NULL) { + error = ktls_frame(m, tls, &tls_enq_cnt, + TLS_RLTYPE_APP); + if (error != 0) + goto done; + } +#endif if (nios == 0) { /* * If sendfile_swapin() didn't initiate any I/Os, @@ -993,8 +1070,16 @@ * PRUS_NOTREADY flag. */ free(sfio, M_TEMP); - error = (*so->so_proto->pr_usrreqs->pru_send) - (so, 0, m, NULL, NULL, td); +#ifdef KERN_TLS + if (tls != NULL && tls->sw_encrypt != NULL) { + error = (*so->so_proto->pr_usrreqs->pru_send) + (so, PRUS_NOTREADY, m, NULL, NULL, td); + soref(so); + ktls_enqueue(m, so, tls_enq_cnt); + } else +#endif + error = (*so->so_proto->pr_usrreqs->pru_send) + (so, 0, m, NULL, NULL, td); } else { sfio->npages = npages; soref(so); @@ -1058,6 +1143,10 @@ mtx_destroy(&sfs->mtx); free(sfs, M_TEMP); } +#ifdef KERN_TLS + if (tls != NULL) + ktls_free(tls); +#endif if (error == ERESTART) error = EINTR; Index: sys/kern/uipc_ktls.c =================================================================== --- /dev/null +++ sys/kern/uipc_ktls.c @@ -0,0 +1,1457 @@ +/*- + * SPDX-License-Identifier: BSD-2-Clause + * + * Copyright (c) 2014-2019 Netflix Inc. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include +__FBSDID("$FreeBSD$"); + +#include "opt_inet.h" +#include "opt_inet6.h" +#include "opt_rss.h" + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#if defined(__aarch64__) || defined(__amd64__) || defined(__i386__) +#include +#endif +#include +#ifdef RSS +#include +#include +#endif +#if defined(INET) || defined(INET6) +#include +#include +#endif +#include +#include +#include +#include +#include +#include + +struct ktls_wq { + struct mtx mtx; + STAILQ_HEAD(, mbuf_ext_pgs) head; + bool running; +} __aligned(CACHE_LINE_SIZE); + +static struct ktls_wq *ktls_wq; +static struct proc *ktls_proc; +LIST_HEAD(, ktls_crypto_backend) ktls_backends; +static struct rmlock ktls_backends_lock; +static uma_zone_t ktls_session_zone; +static uint16_t ktls_cpuid_lookup[MAXCPU]; + +SYSCTL_NODE(_kern_ipc, OID_AUTO, tls, CTLFLAG_RW, 0, + "Kernel TLS offload"); +SYSCTL_NODE(_kern_ipc_tls, OID_AUTO, stats, CTLFLAG_RW, 0, + "Kernel TLS offload stats"); + +static int ktls_allow_unload; +SYSCTL_INT(_kern_ipc_tls, OID_AUTO, allow_unload, CTLFLAG_RDTUN, + &ktls_allow_unload, 0, "Allow software crypto modules to unload"); + +#ifdef RSS +static int ktls_bind_threads = 1; +#else +static int ktls_bind_threads; +#endif +SYSCTL_INT(_kern_ipc_tls, OID_AUTO, bind_threads, CTLFLAG_RDTUN, + &ktls_bind_threads, 0, + "Bind crypto threads to cores or domains at boot"); + +static u_int ktls_maxlen = 16384; +SYSCTL_UINT(_kern_ipc_tls, OID_AUTO, maxlen, CTLFLAG_RWTUN, + &ktls_maxlen, 0, "Maximum TLS record size"); + +static int ktls_number_threads; +SYSCTL_INT(_kern_ipc_tls_stats, OID_AUTO, threads, CTLFLAG_RD, + &ktls_number_threads, 0, + "Number of TLS threads in thread-pool"); + +static bool ktls_offload_enable; +SYSCTL_BOOL(_kern_ipc_tls, OID_AUTO, enable, CTLFLAG_RW, + &ktls_offload_enable, 0, + "Enable support for kernel TLS offload"); + +static bool ktls_cbc_enable = true; +SYSCTL_BOOL(_kern_ipc_tls, OID_AUTO, cbc_enable, CTLFLAG_RW, + &ktls_cbc_enable, 1, + "Enable Support of AES-CBC crypto for kernel TLS"); + +static counter_u64_t ktls_tasks_active; +SYSCTL_COUNTER_U64(_kern_ipc_tls, OID_AUTO, tasks_active, CTLFLAG_RD, + &ktls_tasks_active, "Number of active tasks"); + +static counter_u64_t ktls_cnt_on; +SYSCTL_COUNTER_U64(_kern_ipc_tls_stats, OID_AUTO, so_inqueue, CTLFLAG_RD, + &ktls_cnt_on, "Number of TLS records in queue to tasks for SW crypto"); + +static counter_u64_t ktls_offload_total; +SYSCTL_COUNTER_U64(_kern_ipc_tls_stats, OID_AUTO, offload_total, + CTLFLAG_RD, &ktls_offload_total, + "Total successful TLS setups (parameters set)"); + +static counter_u64_t ktls_offload_enable_calls; +SYSCTL_COUNTER_U64(_kern_ipc_tls_stats, OID_AUTO, enable_calls, + CTLFLAG_RD, &ktls_offload_enable_calls, + "Total number of TLS enable calls made"); + +static counter_u64_t ktls_offload_active; +SYSCTL_COUNTER_U64(_kern_ipc_tls_stats, OID_AUTO, active, CTLFLAG_RD, + &ktls_offload_active, "Total Active TLS sessions"); + +static counter_u64_t ktls_offload_failed_crypto; +SYSCTL_COUNTER_U64(_kern_ipc_tls_stats, OID_AUTO, failed_crypto, CTLFLAG_RD, + &ktls_offload_failed_crypto, "Total TLS crypto failures"); + +static counter_u64_t ktls_switch_to_ifnet; +SYSCTL_COUNTER_U64(_kern_ipc_tls_stats, OID_AUTO, switch_to_ifnet, CTLFLAG_RD, + &ktls_switch_to_ifnet, "TLS sessions switched from SW to ifnet"); + +static counter_u64_t ktls_switch_to_sw; +SYSCTL_COUNTER_U64(_kern_ipc_tls_stats, OID_AUTO, switch_to_sw, CTLFLAG_RD, + &ktls_switch_to_sw, "TLS sessions switched from ifnet to SW"); + +static counter_u64_t ktls_switch_failed; +SYSCTL_COUNTER_U64(_kern_ipc_tls_stats, OID_AUTO, switch_failed, CTLFLAG_RD, + &ktls_switch_failed, "TLS sessions unable to switch between SW and ifnet"); + +SYSCTL_NODE(_kern_ipc_tls, OID_AUTO, sw, CTLFLAG_RD, 0, + "Software TLS session stats"); +SYSCTL_NODE(_kern_ipc_tls, OID_AUTO, ifnet, CTLFLAG_RD, 0, + "Hardware (ifnet) TLS session stats"); + +static counter_u64_t ktls_sw_cbc; +SYSCTL_COUNTER_U64(_kern_ipc_tls_sw, OID_AUTO, cbc, CTLFLAG_RD, &ktls_sw_cbc, + "Active number of software TLS sessions using AES-CBC"); + +static counter_u64_t ktls_sw_gcm; +SYSCTL_COUNTER_U64(_kern_ipc_tls_sw, OID_AUTO, gcm, CTLFLAG_RD, &ktls_sw_gcm, + "Active number of software TLS sessions using AES-GCM"); + +static counter_u64_t ktls_ifnet_cbc; +SYSCTL_COUNTER_U64(_kern_ipc_tls_ifnet, OID_AUTO, cbc, CTLFLAG_RD, + &ktls_ifnet_cbc, + "Active number of ifnet TLS sessions using AES-CBC"); + +static counter_u64_t ktls_ifnet_gcm; +SYSCTL_COUNTER_U64(_kern_ipc_tls_ifnet, OID_AUTO, gcm, CTLFLAG_RD, + &ktls_ifnet_gcm, + "Active number of ifnet TLS sessions using AES-GCM"); + +static counter_u64_t ktls_ifnet_reset; +SYSCTL_COUNTER_U64(_kern_ipc_tls_ifnet, OID_AUTO, reset, CTLFLAG_RD, + &ktls_ifnet_reset, "TLS sessions updated to a new ifnet send tag"); + +static counter_u64_t ktls_ifnet_reset_dropped; +SYSCTL_COUNTER_U64(_kern_ipc_tls_ifnet, OID_AUTO, reset_dropped, CTLFLAG_RD, + &ktls_ifnet_reset_dropped, + "TLS sessions dropped after failing to update ifnet send tag"); + +static counter_u64_t ktls_ifnet_reset_failed; +SYSCTL_COUNTER_U64(_kern_ipc_tls_ifnet, OID_AUTO, reset_failed, CTLFLAG_RD, + &ktls_ifnet_reset_failed, + "TLS sessions that failed to allocate a new ifnet send tag"); + +static int ktls_ifnet_permitted; +SYSCTL_UINT(_kern_ipc_tls_ifnet, OID_AUTO, permitted, CTLFLAG_RWTUN, + &ktls_ifnet_permitted, 1, + "Whether to permit hardware (ifnet) TLS sessions"); + +static MALLOC_DEFINE(M_KTLS, "ktls", "Kernel TLS"); + +static void ktls_cleanup(struct ktls_session *tls); +#if defined(INET) || defined(INET6) +static void ktls_reset_send_tag(void *context, int pending); +#endif +static void ktls_work_thread(void *ctx); + +int +ktls_crypto_backend_register(struct ktls_crypto_backend *be) +{ + struct ktls_crypto_backend *curr_be, *tmp; + + if (be->api_version != KTLS_API_VERSION) { + printf("KTLS: API version mismatch (%d vs %d) for %s\n", + be->api_version, KTLS_API_VERSION, + be->name); + return (EINVAL); + } + + rm_wlock(&ktls_backends_lock); + printf("KTLS: Registering crypto method %s with prio %d\n", + be->name, be->prio); + if (LIST_EMPTY(&ktls_backends)) { + LIST_INSERT_HEAD(&ktls_backends, be, next); + } else { + LIST_FOREACH_SAFE(curr_be, &ktls_backends, next, tmp) { + if (curr_be->prio < be->prio) { + LIST_INSERT_BEFORE(curr_be, be, next); + break; + } + if (LIST_NEXT(curr_be, next) == NULL) { + LIST_INSERT_AFTER(curr_be, be, next); + break; + } + } + } + rm_wunlock(&ktls_backends_lock); + return (0); +} + +int +ktls_crypto_backend_deregister(struct ktls_crypto_backend *be) +{ + struct ktls_crypto_backend *tmp; + + /* + * Don't error if the backend isn't registered. This permits + * MOD_UNLOAD handlers to use this function unconditionally. + */ + rm_wlock(&ktls_backends_lock); + LIST_FOREACH(tmp, &ktls_backends, next) { + if (tmp == be) + break; + } + if (tmp == NULL) { + rm_wunlock(&ktls_backends_lock); + return (0); + } + + if (!ktls_allow_unload) { + rm_wunlock(&ktls_backends_lock); + printf( + "KTLS: Deregistering crypto method %s is not supported\n", + be->name); + return (EBUSY); + } + + if (be->use_count) { + rm_wunlock(&ktls_backends_lock); + return (EBUSY); + } + + LIST_REMOVE(be, next); + rm_wunlock(&ktls_backends_lock); + return (0); +} + +#if defined(INET) || defined(INET6) +static uint16_t +ktls_get_cpu(struct socket *so) +{ + struct inpcb *inp; + uint16_t cpuid; + + inp = sotoinpcb(so); +#ifdef RSS + cpuid = rss_hash2cpuid(inp->inp_flowid, inp->inp_flowtype); + if (cpuid != NETISR_CPUID_NONE) + return (cpuid); +#endif + /* + * Just use the flowid to shard connections in a repeatable + * fashion. Note that some crypto backends rely on the + * serialization provided by having the same connection use + * the same queue. + */ + cpuid = ktls_cpuid_lookup[inp->inp_flowid % ktls_number_threads]; + return (cpuid); +} +#endif + +static void +ktls_init(void *dummy __unused) +{ + struct thread *td; + struct pcpu *pc; + cpuset_t mask; + int error, i; + + ktls_tasks_active = counter_u64_alloc(M_WAITOK); + ktls_cnt_on = counter_u64_alloc(M_WAITOK); + ktls_offload_total = counter_u64_alloc(M_WAITOK); + ktls_offload_enable_calls = counter_u64_alloc(M_WAITOK); + ktls_offload_active = counter_u64_alloc(M_WAITOK); + ktls_offload_failed_crypto = counter_u64_alloc(M_WAITOK); + ktls_switch_to_ifnet = counter_u64_alloc(M_WAITOK); + ktls_switch_to_sw = counter_u64_alloc(M_WAITOK); + ktls_switch_failed = counter_u64_alloc(M_WAITOK); + ktls_sw_cbc = counter_u64_alloc(M_WAITOK); + ktls_sw_gcm = counter_u64_alloc(M_WAITOK); + ktls_ifnet_cbc = counter_u64_alloc(M_WAITOK); + ktls_ifnet_gcm = counter_u64_alloc(M_WAITOK); + ktls_ifnet_reset = counter_u64_alloc(M_WAITOK); + ktls_ifnet_reset_dropped = counter_u64_alloc(M_WAITOK); + ktls_ifnet_reset_failed = counter_u64_alloc(M_WAITOK); + + rm_init(&ktls_backends_lock, "ktls backends"); + LIST_INIT(&ktls_backends); + + ktls_wq = malloc(sizeof(*ktls_wq) * (mp_maxid + 1), M_KTLS, + M_WAITOK | M_ZERO); + + ktls_session_zone = uma_zcreate("ktls_session", + sizeof(struct ktls_session), +#ifdef INVARIANTS + trash_ctor, trash_dtor, trash_init, trash_fini, +#else + NULL, NULL, NULL, NULL, +#endif + UMA_ALIGN_CACHE, 0); + + /* + * Initialize the workqueues to run the TLS work. We create a + * work queue for each CPU. + */ + CPU_FOREACH(i) { + STAILQ_INIT(&ktls_wq[i].head); + mtx_init(&ktls_wq[i].mtx, "ktls work queue", NULL, MTX_DEF); + error = kproc_kthread_add(ktls_work_thread, &ktls_wq[i], + &ktls_proc, &td, 0, 0, "KTLS", "ktls_thr_%d", i); + if (error) + panic("Can't add KTLS thread %d error %d", i, error); + + /* + * Bind threads to cores. If ktls_bind_threads is > + * 1, then we bind to the NUMA domain. + */ + if (ktls_bind_threads) { + if (ktls_bind_threads > 1) { + pc = pcpu_find(i); + CPU_COPY(&cpuset_domain[pc->pc_domain], &mask); + } else { + CPU_SETOF(i, &mask); + } + error = cpuset_setthread(td->td_tid, &mask); + if (error) + panic( + "Unable to bind KTLS thread for CPU %d error %d", + i, error); + } + ktls_cpuid_lookup[ktls_number_threads] = i; + ktls_number_threads++; + } + printf("KTLS: Initialized %d threads\n", ktls_number_threads); +} +SYSINIT(ktls, SI_SUB_SMP + 1, SI_ORDER_ANY, ktls_init, NULL); + +#if defined(INET) || defined(INET6) +static int +ktls_create_session(struct socket *so, struct tls_enable *en, + struct ktls_session **tlsp) +{ + struct ktls_session *tls; + int error; + + /* Only TLS 1.0 - 1.2 are supported. */ + if (en->tls_vmajor != TLS_MAJOR_VER_ONE) + return (EINVAL); + if (en->tls_vminor < TLS_MINOR_VER_ZERO || + en->tls_vminor > TLS_MINOR_VER_TWO) + return (EINVAL); + + if (en->auth_key_len < 0 || en->auth_key_len > TLS_MAX_PARAM_SIZE) + return (EINVAL); + if (en->cipher_key_len < 0 || en->cipher_key_len > TLS_MAX_PARAM_SIZE) + return (EINVAL); + if (en->iv_len < 0 || en->iv_len > TLS_MAX_PARAM_SIZE) + return (EINVAL); + + /* All supported algorithms require a cipher key. */ + if (en->cipher_key_len == 0) + return (EINVAL); + + /* No flags are currently supported. */ + if (en->flags != 0) + return (EINVAL); + + /* Common checks for supported algorithms. */ + switch (en->cipher_algorithm) { + case CRYPTO_AES_NIST_GCM_16: + /* + * auth_algorithm isn't used, but permit GMAC values + * for compatibility. + */ + switch (en->auth_algorithm) { + case 0: + case CRYPTO_AES_128_NIST_GMAC: + case CRYPTO_AES_192_NIST_GMAC: + case CRYPTO_AES_256_NIST_GMAC: + break; + default: + return (EINVAL); + } + if (en->auth_key_len != 0) + return (EINVAL); + if (en->iv_len != TLS_AEAD_GCM_LEN) + return (EINVAL); + break; + case CRYPTO_AES_CBC: + switch (en->auth_algorithm) { + case CRYPTO_SHA1_HMAC: + /* + * TLS 1.0 requires an implicit IV. TLS 1.1+ + * all use explicit IVs. + */ + if (en->tls_vminor == TLS_MINOR_VER_ZERO) { + if (en->iv_len != TLS_CBC_IMPLICIT_IV_LEN) + return (EINVAL); + break; + } + + /* FALLTHROUGH */ + case CRYPTO_SHA2_256_HMAC: + case CRYPTO_SHA2_384_HMAC: + /* Ignore any supplied IV. */ + en->iv_len = 0; + break; + default: + return (EINVAL); + } + if (en->auth_key_len == 0) + return (EINVAL); + break; + default: + return (EINVAL); + } + + tls = uma_zalloc(ktls_session_zone, M_WAITOK | M_ZERO); + + counter_u64_add(ktls_offload_active, 1); + + refcount_init(&tls->refcount, 1); + TASK_INIT(&tls->reset_tag_task, 0, ktls_reset_send_tag, tls); + + tls->wq_index = ktls_get_cpu(so); + + tls->params.cipher_algorithm = en->cipher_algorithm; + tls->params.auth_algorithm = en->auth_algorithm; + tls->params.tls_vmajor = en->tls_vmajor; + tls->params.tls_vminor = en->tls_vminor; + tls->params.flags = en->flags; + tls->params.max_frame_len = min(TLS_MAX_MSG_SIZE_V10_2, ktls_maxlen); + + /* Set the header and trailer lengths. */ + tls->params.tls_hlen = sizeof(struct tls_record_layer); + switch (en->cipher_algorithm) { + case CRYPTO_AES_NIST_GCM_16: + tls->params.tls_hlen += 8; + tls->params.tls_tlen = AES_GMAC_HASH_LEN; + tls->params.tls_bs = 1; + break; + case CRYPTO_AES_CBC: + switch (en->auth_algorithm) { + case CRYPTO_SHA1_HMAC: + if (en->tls_vminor == TLS_MINOR_VER_ZERO) { + /* Implicit IV, no nonce. */ + } else { + tls->params.tls_hlen += AES_BLOCK_LEN; + } + tls->params.tls_tlen = AES_BLOCK_LEN + + SHA1_HASH_LEN; + break; + case CRYPTO_SHA2_256_HMAC: + tls->params.tls_hlen += AES_BLOCK_LEN; + tls->params.tls_tlen = AES_BLOCK_LEN + + SHA2_256_HASH_LEN; + break; + case CRYPTO_SHA2_384_HMAC: + tls->params.tls_hlen += AES_BLOCK_LEN; + tls->params.tls_tlen = AES_BLOCK_LEN + + SHA2_384_HASH_LEN; + break; + default: + panic("invalid hmac"); + } + tls->params.tls_bs = AES_BLOCK_LEN; + break; + default: + panic("invalid cipher"); + } + + KASSERT(tls->params.tls_hlen <= MBUF_PEXT_HDR_LEN, + ("TLS header length too long: %d", tls->params.tls_hlen)); + KASSERT(tls->params.tls_tlen <= MBUF_PEXT_TRAIL_LEN, + ("TLS trailer length too long: %d", tls->params.tls_tlen)); + + if (en->auth_key_len != 0) { + tls->params.auth_key_len = en->auth_key_len; + tls->params.auth_key = malloc(en->auth_key_len, M_KTLS, + M_WAITOK); + error = copyin(en->auth_key, tls->params.auth_key, + en->auth_key_len); + if (error) + goto out; + } + + tls->params.cipher_key_len = en->cipher_key_len; + tls->params.cipher_key = malloc(en->cipher_key_len, M_KTLS, M_WAITOK); + error = copyin(en->cipher_key, tls->params.cipher_key, + en->cipher_key_len); + if (error) + goto out; + + /* + * This holds the implicit portion of the nonce for GCM and + * the initial implicit IV for TLS 1.0. The explicit portions + * of the IV are generated in ktls_frame() and ktls_seq(). + */ + if (en->iv_len != 0) { + MPASS(en->iv_len <= sizeof(tls->params.iv)); + tls->params.iv_len = en->iv_len; + error = copyin(en->iv, tls->params.iv, en->iv_len); + if (error) + goto out; + } + + *tlsp = tls; + return (0); + +out: + ktls_cleanup(tls); + return (error); +} + +static struct ktls_session * +ktls_clone_session(struct ktls_session *tls) +{ + struct ktls_session *tls_new; + + tls_new = uma_zalloc(ktls_session_zone, M_WAITOK | M_ZERO); + + counter_u64_add(ktls_offload_active, 1); + + refcount_init(&tls_new->refcount, 1); + + /* Copy fields from existing session. */ + tls_new->params = tls->params; + tls_new->wq_index = tls->wq_index; + + /* Deep copy keys. */ + if (tls_new->params.auth_key != NULL) { + tls_new->params.auth_key = malloc(tls->params.auth_key_len, + M_KTLS, M_WAITOK); + memcpy(tls_new->params.auth_key, tls->params.auth_key, + tls->params.auth_key_len); + } + + tls_new->params.cipher_key = malloc(tls->params.cipher_key_len, M_KTLS, + M_WAITOK); + memcpy(tls_new->params.cipher_key, tls->params.cipher_key, + tls->params.cipher_key_len); + + return (tls_new); +} +#endif + +static void +ktls_cleanup(struct ktls_session *tls) +{ + + counter_u64_add(ktls_offload_active, -1); + if (tls->free != NULL) { + MPASS(tls->be != NULL); + switch (tls->params.cipher_algorithm) { + case CRYPTO_AES_CBC: + counter_u64_add(ktls_sw_cbc, -1); + break; + case CRYPTO_AES_NIST_GCM_16: + counter_u64_add(ktls_sw_gcm, -1); + break; + } + tls->free(tls); + } else if (tls->snd_tag != NULL) { + switch (tls->params.cipher_algorithm) { + case CRYPTO_AES_CBC: + counter_u64_add(ktls_ifnet_cbc, -1); + break; + case CRYPTO_AES_NIST_GCM_16: + counter_u64_add(ktls_ifnet_gcm, -1); + break; + } + m_snd_tag_rele(tls->snd_tag); + } + if (tls->params.auth_key != NULL) { + explicit_bzero(tls->params.auth_key, tls->params.auth_key_len); + free(tls->params.auth_key, M_KTLS); + tls->params.auth_key = NULL; + tls->params.auth_key_len = 0; + } + if (tls->params.cipher_key != NULL) { + explicit_bzero(tls->params.cipher_key, + tls->params.cipher_key_len); + free(tls->params.cipher_key, M_KTLS); + tls->params.cipher_key = NULL; + tls->params.cipher_key_len = 0; + } + explicit_bzero(tls->params.iv, sizeof(tls->params.iv)); +} + +#if defined(INET) || defined(INET6) +/* + * Common code used when first enabling ifnet TLS on a connection or + * when allocating a new ifnet TLS session due to a routing change. + * This function allocates a new TLS send tag on whatever interface + * the connection is currently routed over. + */ +static int +ktls_alloc_snd_tag(struct inpcb *inp, struct ktls_session *tls, bool force, + struct m_snd_tag **mstp) +{ + union if_snd_tag_alloc_params params; + struct ifnet *ifp; + struct rtentry *rt; + struct tcpcb *tp; + int error; + + INP_RLOCK(inp); + if (inp->inp_flags2 & INP_FREED) { + INP_RUNLOCK(inp); + return (ECONNRESET); + } + if (inp->inp_flags & (INP_TIMEWAIT | INP_DROPPED)) { + INP_RUNLOCK(inp); + return (ECONNRESET); + } + if (inp->inp_socket == NULL) { + INP_RUNLOCK(inp); + return (ECONNRESET); + } + tp = intotcpcb(inp); + + /* + * Check administrative controls on ifnet TLS to determine if + * ifnet TLS should be denied. + * + * - Always permit 'force' requests. + * - ktls_ifnet_permitted == 0: always deny. + */ + if (!force && ktls_ifnet_permitted == 0) { + INP_RUNLOCK(inp); + return (ENXIO); + } + + /* + * XXX: Use the cached route in the inpcb to find the + * interface. This should perhaps instead use + * rtalloc1_fib(dst, 0, 0, fibnum). Since KTLS is only + * enabled after a connection has completed key negotiation in + * userland, the cached route will be present in practice. + */ + rt = inp->inp_route.ro_rt; + if (rt == NULL || rt->rt_ifp == NULL) { + INP_RUNLOCK(inp); + return (ENXIO); + } + ifp = rt->rt_ifp; + if_ref(ifp); + + params.hdr.type = IF_SND_TAG_TYPE_TLS; + params.hdr.flowid = inp->inp_flowid; + params.hdr.flowtype = inp->inp_flowtype; + params.tls.inp = inp; + params.tls.tls = tls; + INP_RUNLOCK(inp); + + if (ifp->if_snd_tag_alloc == NULL) { + error = EOPNOTSUPP; + goto out; + } + if ((ifp->if_capenable & IFCAP_NOMAP) == 0) { + error = EOPNOTSUPP; + goto out; + } + if (inp->inp_vflag & INP_IPV6) { + if ((ifp->if_capenable & IFCAP_TXTLS6) == 0) { + error = EOPNOTSUPP; + goto out; + } + } else { + if ((ifp->if_capenable & IFCAP_TXTLS4) == 0) { + error = EOPNOTSUPP; + goto out; + } + } + error = ifp->if_snd_tag_alloc(ifp, ¶ms, mstp); +out: + if_rele(ifp); + return (error); +} + +static int +ktls_try_ifnet(struct socket *so, struct ktls_session *tls, bool force) +{ + struct m_snd_tag *mst; + int error; + + error = ktls_alloc_snd_tag(so->so_pcb, tls, force, &mst); + if (error == 0) { + tls->snd_tag = mst; + switch (tls->params.cipher_algorithm) { + case CRYPTO_AES_CBC: + counter_u64_add(ktls_ifnet_cbc, 1); + break; + case CRYPTO_AES_NIST_GCM_16: + counter_u64_add(ktls_ifnet_gcm, 1); + break; + } + } + return (error); +} + +static int +ktls_try_sw(struct socket *so, struct ktls_session *tls) +{ + struct rm_priotracker prio; + struct ktls_crypto_backend *be; + + /* + * Choose the best software crypto backend. Backends are + * stored in sorted priority order (larget value == most + * important at the head of the list), so this just stops on + * the first backend that claims the session by returning + * success. + */ + if (ktls_allow_unload) + rm_rlock(&ktls_backends_lock, &prio); + LIST_FOREACH(be, &ktls_backends, next) { + if (be->try(so, tls) == 0) + break; + KASSERT(tls->cipher == NULL, + ("ktls backend leaked a cipher pointer")); + } + if (be != NULL) { + if (ktls_allow_unload) + be->use_count++; + tls->be = be; + } + if (ktls_allow_unload) + rm_runlock(&ktls_backends_lock, &prio); + if (be == NULL) + return (EOPNOTSUPP); + switch (tls->params.cipher_algorithm) { + case CRYPTO_AES_CBC: + counter_u64_add(ktls_sw_cbc, 1); + break; + case CRYPTO_AES_NIST_GCM_16: + counter_u64_add(ktls_sw_gcm, 1); + break; + } + return (0); +} + +int +ktls_enable_tx(struct socket *so, struct tls_enable *en) +{ + struct ktls_session *tls; + int error; + + if (!ktls_offload_enable) + return (ENOTSUP); + + counter_u64_add(ktls_offload_enable_calls, 1); + + /* + * This should always be true since only the TCP socket option + * invokes this function. + */ + if (so->so_proto->pr_protocol != IPPROTO_TCP) + return (EINVAL); + + /* + * XXX: Don't overwrite existing sessions. We should permit + * this to support rekeying in the future. + */ + if (so->so_snd.sb_tls_info != NULL) + return (EALREADY); + + if (en->cipher_algorithm == CRYPTO_AES_CBC && !ktls_cbc_enable) + return (ENOTSUP); + + /* TLS requires ext pgs */ + if (mb_use_ext_pgs == 0) + return (ENXIO); + + error = ktls_create_session(so, en, &tls); + if (error) + return (error); + + /* Prefer ifnet TLS over software TLS. */ + error = ktls_try_ifnet(so, tls, false); + if (error) + error = ktls_try_sw(so, tls); + + if (error) { + ktls_cleanup(tls); + return (error); + } + + error = sblock(&so->so_snd, SBL_WAIT); + if (error) { + ktls_cleanup(tls); + return (error); + } + + SOCKBUF_LOCK(&so->so_snd); + so->so_snd.sb_tls_info = tls; + if (tls->sw_encrypt == NULL) + so->so_snd.sb_flags |= SB_TLS_IFNET; + SOCKBUF_UNLOCK(&so->so_snd); + sbunlock(&so->so_snd); + + counter_u64_add(ktls_offload_total, 1); + + return (0); +} + +int +ktls_get_tx_mode(struct socket *so) +{ + struct ktls_session *tls; + struct inpcb *inp; + int mode; + + inp = so->so_pcb; + INP_WLOCK_ASSERT(inp); + SOCKBUF_LOCK(&so->so_snd); + tls = so->so_snd.sb_tls_info; + if (tls == NULL) + mode = TCP_TLS_MODE_NONE; + else if (tls->sw_encrypt != NULL) + mode = TCP_TLS_MODE_SW; + else + mode = TCP_TLS_MODE_IFNET; + SOCKBUF_UNLOCK(&so->so_snd); + return (mode); +} + +/* + * Switch between SW and ifnet TLS sessions as requested. + */ +int +ktls_set_tx_mode(struct socket *so, int mode) +{ + struct ktls_session *tls, *tls_new; + struct inpcb *inp; + int error; + + MPASS(mode == TCP_TLS_MODE_SW || mode == TCP_TLS_MODE_IFNET); + + inp = so->so_pcb; + INP_WLOCK_ASSERT(inp); + SOCKBUF_LOCK(&so->so_snd); + tls = so->so_snd.sb_tls_info; + if (tls == NULL) { + SOCKBUF_UNLOCK(&so->so_snd); + return (0); + } + + if ((tls->sw_encrypt != NULL && mode == TCP_TLS_MODE_SW) || + (tls->sw_encrypt == NULL && mode == TCP_TLS_MODE_IFNET)) { + SOCKBUF_UNLOCK(&so->so_snd); + return (0); + } + + tls = ktls_hold(tls); + SOCKBUF_UNLOCK(&so->so_snd); + INP_WUNLOCK(inp); + + tls_new = ktls_clone_session(tls); + + if (mode == TCP_TLS_MODE_IFNET) + error = ktls_try_ifnet(so, tls_new, true); + else + error = ktls_try_sw(so, tls_new); + if (error) { + counter_u64_add(ktls_switch_failed, 1); + ktls_free(tls_new); + ktls_free(tls); + INP_WLOCK(inp); + return (error); + } + + error = sblock(&so->so_snd, SBL_WAIT); + if (error) { + counter_u64_add(ktls_switch_failed, 1); + ktls_free(tls_new); + ktls_free(tls); + INP_WLOCK(inp); + return (error); + } + + /* + * If we raced with another session change, keep the existing + * session. + */ + if (tls != so->so_snd.sb_tls_info) { + counter_u64_add(ktls_switch_failed, 1); + sbunlock(&so->so_snd); + ktls_free(tls_new); + ktls_free(tls); + INP_WLOCK(inp); + return (EBUSY); + } + + SOCKBUF_LOCK(&so->so_snd); + so->so_snd.sb_tls_info = tls_new; + if (tls_new->sw_encrypt == NULL) + so->so_snd.sb_flags |= SB_TLS_IFNET; + SOCKBUF_UNLOCK(&so->so_snd); + sbunlock(&so->so_snd); + + /* + * Drop two references on 'tls'. The first is for the + * ktls_hold() above. The second drops the reference from the + * socket buffer. + */ + KASSERT(tls->refcount >= 2, ("too few references on old session")); + ktls_free(tls); + ktls_free(tls); + + if (mode == TCP_TLS_MODE_IFNET) + counter_u64_add(ktls_switch_to_ifnet, 1); + else + counter_u64_add(ktls_switch_to_sw, 1); + + INP_WLOCK(inp); + return (0); +} + +/* + * Try to allocate a new TLS send tag. This task is scheduled when + * ip_output detects a route change while trying to transmit a packet + * holding a TLS record. If a new tag is allocated, replace the tag + * in the TLS session. Subsequent packets on the connection will use + * the new tag. If a new tag cannot be allocated, drop the + * connection. + */ +static void +ktls_reset_send_tag(void *context, int pending) +{ + struct epoch_tracker et; + struct ktls_session *tls; + struct m_snd_tag *old, *new; + struct inpcb *inp; + struct tcpcb *tp; + int error; + + MPASS(pending == 1); + + tls = context; + inp = tls->inp; + + /* + * Free the old tag first before allocating a new one. + * ip[6]_output_send() will treat a NULL send tag the same as + * an ifp mismatch and drop packets until a new tag is + * allocated. + * + * Write-lock the INP when changing tls->snd_tag since + * ip[6]_output_send() holds a read-lock when reading the + * pointer. + */ + INP_WLOCK(inp); + old = tls->snd_tag; + tls->snd_tag = NULL; + INP_WUNLOCK(inp); + if (old != NULL) + m_snd_tag_rele(old); + + error = ktls_alloc_snd_tag(inp, tls, true, &new); + + if (error == 0) { + INP_WLOCK(inp); + tls->snd_tag = new; + mtx_pool_lock(mtxpool_sleep, tls); + tls->reset_pending = false; + mtx_pool_unlock(mtxpool_sleep, tls); + if (!in_pcbrele_wlocked(inp)) + INP_WUNLOCK(inp); + + counter_u64_add(ktls_ifnet_reset, 1); + + /* + * XXX: Should we kick tcp_output explicitly now that + * the send tag is fixed or just rely on timers? + */ + } else { + INP_INFO_RLOCK_ET(&V_tcbinfo, et); + INP_WLOCK(inp); + if (!in_pcbrele_wlocked(inp)) { + if (!(inp->inp_flags & INP_TIMEWAIT) && + !(inp->inp_flags & INP_DROPPED)) { + tp = intotcpcb(inp); + tp = tcp_drop(tp, ECONNABORTED); + if (tp != NULL) + INP_WUNLOCK(inp); + counter_u64_add(ktls_ifnet_reset_dropped, 1); + } else + INP_WUNLOCK(inp); + } + INP_INFO_RUNLOCK_ET(&V_tcbinfo, et); + + counter_u64_add(ktls_ifnet_reset_failed, 1); + + /* + * Leave reset_pending true to avoid future tasks while + * the socket goes away. + */ + } + + ktls_free(tls); +} + +int +ktls_output_eagain(struct inpcb *inp, struct ktls_session *tls) +{ + + if (inp == NULL) + return (ENOBUFS); + + INP_LOCK_ASSERT(inp); + + /* + * See if we should schedule a task to update the send tag for + * this session. + */ + mtx_pool_lock(mtxpool_sleep, tls); + if (!tls->reset_pending) { + (void) ktls_hold(tls); + in_pcbref(inp); + tls->inp = inp; + tls->reset_pending = true; + taskqueue_enqueue(taskqueue_thread, &tls->reset_tag_task); + } + mtx_pool_unlock(mtxpool_sleep, tls); + return (ENOBUFS); +} +#endif + +void +ktls_destroy(struct ktls_session *tls) +{ + struct rm_priotracker prio; + + ktls_cleanup(tls); + if (tls->be != NULL && ktls_allow_unload) { + rm_rlock(&ktls_backends_lock, &prio); + tls->be->use_count--; + rm_runlock(&ktls_backends_lock, &prio); + } + uma_zfree(ktls_session_zone, tls); +} + +void +ktls_seq(struct sockbuf *sb, struct mbuf *m) +{ + struct mbuf_ext_pgs *pgs; + struct tls_record_layer *tlshdr; + uint64_t seqno; + + for (; m != NULL; m = m->m_next) { + KASSERT((m->m_flags & M_NOMAP) != 0, + ("ktls_seq: mapped mbuf %p", m)); + + pgs = m->m_ext.ext_pgs; + pgs->seqno = sb->sb_tls_seqno; + + /* + * Store the sequence number in the TLS header as the + * explicit part of the IV for GCM. + */ + if (pgs->tls->params.cipher_algorithm == + CRYPTO_AES_NIST_GCM_16) { + tlshdr = (void *)pgs->hdr; + seqno = htobe64(pgs->seqno); + memcpy(tlshdr + 1, &seqno, sizeof(seqno)); + } + sb->sb_tls_seqno++; + } +} + +/* + * Add TLS framing (headers and trailers) to a chain of mbufs. Each + * mbuf in the chain must be an unmapped mbuf. The payload of the + * mbuf must be populated with the payload of each TLS record. + * + * The record_type argument specifies the TLS record type used when + * populating the TLS header. + * + * The enq_count argument on return is set to the number of pages of + * payload data for this entire chain that need to be encrypted via SW + * encryption. The returned value should be passed to ktls_enqueue + * when scheduling encryption of this chain of mbufs. + */ +int +ktls_frame(struct mbuf *top, struct ktls_session *tls, int *enq_cnt, + uint8_t record_type) +{ + struct tls_record_layer *tlshdr; + struct mbuf *m; + struct mbuf_ext_pgs *pgs; + uint16_t tls_len; + int maxlen; + + maxlen = tls->params.max_frame_len; + *enq_cnt = 0; + for (m = top; m != NULL; m = m->m_next) { + /* + * All mbufs in the chain should be non-empty TLS + * records whose payload does not exceed the maximum + * frame length. + */ + if (m->m_len > maxlen || m->m_len == 0) + return (EINVAL); + tls_len = m->m_len; + + /* + * TLS frames require unmapped mbufs to store session + * info. + */ + KASSERT((m->m_flags & M_NOMAP) != 0, + ("ktls_frame: mapped mbuf %p (top = %p)\n", m, top)); + + pgs = m->m_ext.ext_pgs; + + /* Save a reference to the session. */ + pgs->tls = ktls_hold(tls); + + pgs->hdr_len = tls->params.tls_hlen; + pgs->trail_len = tls->params.tls_tlen; + if (tls->params.cipher_algorithm == CRYPTO_AES_CBC) { + int bs, delta; + + /* + * AES-CBC pads messages to a multiple of the + * block size. Note that the padding is + * applied after the digest and the encryption + * is done on the "plaintext || mac || padding". + * At least one byte of padding is always + * present. + * + * Compute the final trailer length assuming + * at most one block of padding. + * tls->params.sb_tls_tlen is the maximum + * possible trailer length (padding + digest). + * delta holds the number of excess padding + * bytes if the maximum were used. Those + * extra bytes are removed. + */ + bs = tls->params.tls_bs; + delta = (tls_len + tls->params.tls_tlen) & (bs - 1); + pgs->trail_len -= delta; + } + m->m_len += pgs->hdr_len + pgs->trail_len; + + /* Populate the TLS header. */ + tlshdr = (void *)pgs->hdr; + tlshdr->tls_vmajor = tls->params.tls_vmajor; + tlshdr->tls_vminor = tls->params.tls_vminor; + tlshdr->tls_type = record_type; + tlshdr->tls_length = htons(m->m_len - sizeof(*tlshdr)); + + /* + * For GCM, the sequence number is stored in the + * header by ktls_seq(). For CBC, a random nonce is + * inserted for TLS 1.1+. + */ + if (tls->params.cipher_algorithm == CRYPTO_AES_CBC && + tls->params.tls_vminor >= TLS_MINOR_VER_ONE) + arc4rand(tlshdr + 1, AES_BLOCK_LEN, 0); + + /* + * When using SW encryption, mark the mbuf not ready. + * It will be marked ready via sbready() after the + * record has been encrypted. + * + * When using ifnet TLS, unencrypted TLS records are + * sent down the stack to the NIC. + */ + if (tls->sw_encrypt != NULL) { + m->m_flags |= M_NOTREADY; + pgs->nrdy = pgs->npgs; + *enq_cnt += pgs->npgs; + } + } + return (0); +} + +void +ktls_enqueue_to_free(struct mbuf_ext_pgs *pgs) +{ + struct ktls_wq *wq; + bool running; + + /* Mark it for freeing. */ + pgs->mbuf = NULL; + wq = &ktls_wq[pgs->tls->wq_index]; + mtx_lock(&wq->mtx); + STAILQ_INSERT_TAIL(&wq->head, pgs, stailq); + running = wq->running; + mtx_unlock(&wq->mtx); + if (!running) + wakeup(wq); +} + +void +ktls_enqueue(struct mbuf *m, struct socket *so, int page_count) +{ + struct mbuf_ext_pgs *pgs; + struct ktls_wq *wq; + bool running; + + KASSERT(((m->m_flags & (M_NOMAP | M_NOTREADY)) == + (M_NOMAP | M_NOTREADY)), + ("ktls_enqueue: %p not unready & nomap mbuf\n", m)); + KASSERT(page_count != 0, ("enqueueing TLS mbuf with zero page count")); + + pgs = m->m_ext.ext_pgs; + + KASSERT(pgs->tls->sw_encrypt != NULL, ("ifnet TLS mbuf")); + + pgs->enc_cnt = page_count; + pgs->mbuf = m; + + /* + * Save a pointer to the socket. The caller is responsible + * for taking an additional reference via soref(). + */ + pgs->so = so; + + wq = &ktls_wq[pgs->tls->wq_index]; + mtx_lock(&wq->mtx); + STAILQ_INSERT_TAIL(&wq->head, pgs, stailq); + running = wq->running; + mtx_unlock(&wq->mtx); + if (!running) + wakeup(wq); + counter_u64_add(ktls_cnt_on, 1); +} + +static __noinline void +ktls_encrypt(struct mbuf_ext_pgs *pgs) +{ + struct ktls_session *tls; + struct socket *so; + struct mbuf *m, *top; + vm_paddr_t parray[1 + btoc(TLS_MAX_MSG_SIZE_V10_2)]; + struct iovec src_iov[1 + btoc(TLS_MAX_MSG_SIZE_V10_2)]; + struct iovec dst_iov[1 + btoc(TLS_MAX_MSG_SIZE_V10_2)]; + vm_page_t pg; + int error, i, len, npages, off, total_pages, wire_adj; + bool is_anon; + + so = pgs->so; + tls = pgs->tls; + top = pgs->mbuf; + KASSERT(tls != NULL, ("tls = NULL, top = %p, pgs = %p\n", top, pgs)); + KASSERT(so != NULL, ("so = NULL, top = %p, pgs = %p\n", top, pgs)); +#ifdef INVARIANTS + pgs->so = NULL; + pgs->mbuf = NULL; +#endif + total_pages = pgs->enc_cnt; + npages = 0; + + /* + * Encrypt the TLS records in the chain of mbufs starting with + * 'top'. 'total_pages' gives us a total count of pages and is + * used to know when we have finished encrypting the TLS + * records originally queued with 'top'. + * + * NB: These mbufs are queued in the socket buffer and + * 'm_next' is traversing the mbufs in the socket buffer. The + * socket buffer lock is not held while traversing this chain. + * Since the mbufs are all marked M_NOTREADY their 'm_next' + * pointers should be stable. However, the 'm_next' of the + * last mbuf encrypted is not necessarily NULL. It can point + * to other mbufs appended while 'top' was on the TLS work + * queue. + * + * Each mbuf holds an entire TLS record. + */ + error = 0; + for (m = top; npages != total_pages; m = m->m_next) { + pgs = m->m_ext.ext_pgs; + + KASSERT(pgs->tls == tls, + ("different TLS sessions in a single mbuf chain: %p vs %p", + tls, pgs->tls)); + KASSERT((m->m_flags & (M_NOMAP | M_NOTREADY)) == + (M_NOMAP | M_NOTREADY), + ("%p not unready & nomap mbuf (top = %p)\n", m, top)); + KASSERT(npages + pgs->npgs <= total_pages, + ("page count mismatch: top %p, total_pages %d, m %p", top, + total_pages, m)); + + /* + * Generate source and destination ivoecs to pass to + * the SW encryption backend. For writable mbufs, the + * destination iovec is a copy of the source and + * encryption is done in place. For file-backed mbufs + * (from sendfile), anonymous wired pages are + * allocated and assigned to the destination iovec. + */ + is_anon = M_WRITABLE(m); + + off = pgs->first_pg_off; + wire_adj = 0; + for (i = 0; i < pgs->npgs; i++, off = 0) { + len = mbuf_ext_pg_len(pgs, i, off); + src_iov[i].iov_len = len; + src_iov[i].iov_base = + (char *)(void *)PHYS_TO_DMAP(pgs->pa[i]) + off; + + if (is_anon) { + dst_iov[i].iov_base = src_iov[i].iov_base; + dst_iov[i].iov_len = src_iov[i].iov_len; + continue; + } +retry_page: + pg = vm_page_alloc(NULL, 0, VM_ALLOC_SYSTEM | + VM_ALLOC_NOOBJ | VM_ALLOC_NODUMP); + if (pg == NULL) { + if (wire_adj) + vm_wire_add(wire_adj); + wire_adj = 0; + vm_wait(NULL); + goto retry_page; + } + wire_adj++; + parray[i] = VM_PAGE_TO_PHYS(pg); + dst_iov[i].iov_base = + (char *)(void *)PHYS_TO_DMAP(parray[i]) + off; + dst_iov[i].iov_len = len; + } + + npages += i; + if (wire_adj) + vm_wire_add(wire_adj); + + error = (*tls->sw_encrypt)(tls, + (const struct tls_record_layer *)pgs->hdr, + pgs->trail, src_iov, dst_iov, i, pgs->seqno); + if (error) { + counter_u64_add(ktls_offload_failed_crypto, 1); + break; + } + + /* + * For file-backed mbufs, release the file-backed + * pages and replace them in the ext_pgs array with + * the anonymous wired pages allocated above. + */ + if (!is_anon) { + /* Free the old pages. */ + m->m_ext.ext_free(m); + + /* Replace them with the new pages. */ + for (i = 0; i < pgs->npgs; i++) + pgs->pa[i] = parray[i]; + + /* Use the basic free routine. */ + m->m_ext.ext_free = mb_free_mext_pgs; + } + + /* + * Drop a reference to the session now that it is no + * longer needed. Existing code depends on encrypted + * records having no associated session vs + * yet-to-be-encrypted records having an associated + * session. + */ + pgs->tls = NULL; + ktls_free(tls); + } + + CURVNET_SET(so->so_vnet); + if (error == 0) { + (void)(*so->so_proto->pr_usrreqs->pru_ready)(so, top, npages); + } else { + so->so_proto->pr_usrreqs->pru_abort(so); + so->so_error = EIO; + mb_free_notready(top, total_pages); + } + + SOCK_LOCK(so); + sorele(so); + CURVNET_RESTORE(); +} + +static void +ktls_work_thread(void *ctx) +{ + struct ktls_wq *wq = ctx; + struct mbuf_ext_pgs *p, *n; + struct ktls_session *tls; + STAILQ_HEAD(, mbuf_ext_pgs) local_head; + +#if defined(__aarch64__) || defined(__amd64__) || defined(__i386__) + fpu_kern_thread(0); +#endif + for (;;) { + mtx_lock(&wq->mtx); + while (STAILQ_EMPTY(&wq->head)) { + wq->running = false; + mtx_sleep(wq, &wq->mtx, 0, "-", 0); + wq->running = true; + } + + STAILQ_INIT(&local_head); + STAILQ_CONCAT(&local_head, &wq->head); + mtx_unlock(&wq->mtx); + + STAILQ_FOREACH_SAFE(p, &local_head, stailq, n) { + if (p->mbuf != NULL) { + ktls_encrypt(p); + counter_u64_add(ktls_cnt_on, -1); + } else { + tls = p->tls; + ktls_free(tls); + uma_zfree(zone_extpgs, p); + } + } + } +} Index: sys/kern/uipc_sockbuf.c =================================================================== --- sys/kern/uipc_sockbuf.c +++ sys/kern/uipc_sockbuf.c @@ -34,11 +34,13 @@ #include __FBSDID("$FreeBSD$"); +#include "opt_kern_tls.h" #include "opt_param.h" #include #include /* for aio_swake proto */ #include +#include #include #include #include @@ -112,7 +114,8 @@ MPASS((m->m_flags & M_NOTREADY) == 0); /* Compress small unmapped mbufs into plain mbufs. */ - if ((m->m_flags & M_NOMAP) && m->m_len <= MLEN) { + if ((m->m_flags & M_NOMAP) && m->m_len <= MLEN && + !mbuf_has_tls_session(m)) { MPASS(m->m_flags & M_EXT); ext_size = m->m_ext.ext_size; if (mb_unmapped_compress(m) == 0) { @@ -133,6 +136,8 @@ while ((n != NULL) && (n != end) && (m->m_flags & M_EOR) == 0 && M_WRITABLE(m) && (m->m_flags & M_NOMAP) == 0 && + !mbuf_has_tls_session(n) && + !mbuf_has_tls_session(m) && n->m_len <= MCLBYTES / 4 && /* XXX: Don't copy too much */ n->m_len <= M_TRAILINGSPACE(m) && m->m_type == n->m_type) { @@ -668,6 +673,11 @@ { sbrelease_internal(sb, so); +#ifdef KERN_TLS + if (sb->sb_tls_info != NULL) + ktls_free(sb->sb_tls_info); + sb->sb_tls_info = NULL; +#endif } /* @@ -831,6 +841,11 @@ SBLASTMBUFCHK(sb); +#ifdef KERN_TLS + if (sb->sb_tls_info != NULL) + ktls_seq(sb, m); +#endif + /* Remove all packet headers and mbuf tags to get a pure data chain. */ m_demote(m, 1, flags & PRUS_NOTREADY ? M_NOTREADY : 0); @@ -1134,6 +1149,8 @@ ((sb->sb_flags & SB_NOCOALESCE) == 0) && !(m->m_flags & M_NOTREADY) && !(n->m_flags & (M_NOTREADY | M_NOMAP)) && + !mbuf_has_tls_session(m) && + !mbuf_has_tls_session(n) && m->m_len <= MCLBYTES / 4 && /* XXX: Don't copy too much */ m->m_len <= M_TRAILINGSPACE(n) && n->m_type == m->m_type) { @@ -1149,7 +1166,8 @@ continue; } if (m->m_len <= MLEN && (m->m_flags & M_NOMAP) && - (m->m_flags & M_NOTREADY) == 0) + (m->m_flags & M_NOTREADY) == 0 && + !mbuf_has_tls_session(m)) (void)mb_unmapped_compress(m); if (n) n->m_next = m; Index: sys/kern/uipc_socket.c =================================================================== --- sys/kern/uipc_socket.c +++ sys/kern/uipc_socket.c @@ -107,6 +107,7 @@ #include "opt_inet.h" #include "opt_inet6.h" +#include "opt_kern_tls.h" #include "opt_sctp.h" #include @@ -123,6 +124,7 @@ #include #include #include +#include #include #include #include @@ -141,6 +143,7 @@ #include #include #include +#include #include @@ -1442,7 +1445,15 @@ ssize_t resid; int clen = 0, error, dontroute; int atomic = sosendallatonce(so) || top; + int pru_flag; +#ifdef KERN_TLS + struct ktls_session *tls; + int tls_enq_cnt, tls_pruflag; + uint8_t tls_rtype; + tls = NULL; + tls_rtype = TLS_RLTYPE_APP; +#endif if (uio != NULL) resid = uio->uio_resid; else @@ -1474,6 +1485,28 @@ if (error) goto out; +#ifdef KERN_TLS + tls_pruflag = 0; + tls = ktls_hold(so->so_snd.sb_tls_info); + if (tls != NULL) { + if (tls->sw_encrypt != NULL) + tls_pruflag = PRUS_NOTREADY; + + if (control != NULL) { + struct cmsghdr *cm = mtod(control, struct cmsghdr *); + + if (clen >= sizeof(*cm) && + cm->cmsg_type == TLS_SET_RECORD_TYPE) { + tls_rtype = *((uint8_t *)CMSG_DATA(cm)); + clen = 0; + m_freem(control); + control = NULL; + atomic = 1; + } + } + } +#endif + restart: do { SOCKBUF_LOCK(&so->so_snd); @@ -1551,10 +1584,27 @@ * is a workaround to prevent protocol send * methods to panic. */ - top = m_uiotombuf(uio, M_WAITOK, space, - (atomic ? max_hdr : 0), - (atomic ? M_PKTHDR : 0) | - ((flags & MSG_EOR) ? M_EOR : 0)); +#ifdef KERN_TLS + if (tls != NULL) { + top = m_uiotombuf(uio, M_WAITOK, space, + tls->params.max_frame_len, + M_NOMAP | + ((flags & MSG_EOR) ? M_EOR : 0)); + if (top != NULL) { + error = ktls_frame(top, tls, + &tls_enq_cnt, tls_rtype); + if (error) { + m_freem(top); + goto release; + } + } + tls_rtype = TLS_RLTYPE_APP; + } else +#endif + top = m_uiotombuf(uio, M_WAITOK, space, + (atomic ? max_hdr : 0), + (atomic ? M_PKTHDR : 0) | + ((flags & MSG_EOR) ? M_EOR : 0)); if (top == NULL) { error = EFAULT; /* only possible error */ goto release; @@ -1578,8 +1628,8 @@ * this. */ VNET_SO_ASSERT(so); - error = (*so->so_proto->pr_usrreqs->pru_send)(so, - (flags & MSG_OOB) ? PRUS_OOB : + + pru_flag = (flags & MSG_OOB) ? PRUS_OOB : /* * If the user set MSG_EOF, the protocol understands * this flag and nothing left to send then use @@ -1591,13 +1641,37 @@ PRUS_EOF : /* If there is more to send set PRUS_MORETOCOME. */ (flags & MSG_MORETOCOME) || - (resid > 0 && space > 0) ? PRUS_MORETOCOME : 0, - top, addr, control, td); + (resid > 0 && space > 0) ? PRUS_MORETOCOME : 0; + +#ifdef KERN_TLS + pru_flag |= tls_pruflag; +#endif + + error = (*so->so_proto->pr_usrreqs->pru_send)(so, + pru_flag, top, addr, control, td); + if (dontroute) { SOCK_LOCK(so); so->so_options &= ~SO_DONTROUTE; SOCK_UNLOCK(so); } + +#ifdef KERN_TLS + if (tls != NULL && tls->sw_encrypt != NULL) { + /* + * Note that error is intentionally + * ignored. + * + * Like sendfile(), we rely on the + * completion routine (pru_ready()) + * to free the mbufs in the event that + * pru_send() encountered an error and + * did not append them to the sockbuf. + */ + soref(so); + ktls_enqueue(top, so, tls_enq_cnt); + } +#endif clen = 0; control = NULL; top = NULL; @@ -1609,6 +1683,10 @@ release: sbunlock(&so->so_snd); out: +#ifdef KERN_TLS + if (tls != NULL) + ktls_free(tls); +#endif if (top != NULL) m_freem(top); if (control != NULL) Index: sys/modules/Makefile =================================================================== --- sys/modules/Makefile +++ sys/modules/Makefile @@ -200,6 +200,7 @@ khelp \ krpc \ ksyms \ + ${_ktls_ocf} \ le \ lge \ libalias \ @@ -412,6 +413,7 @@ _cryptodev= cryptodev _random_fortuna=random_fortuna _random_other= random_other +_ktls_ocf= ktls_ocf .endif .endif Index: sys/modules/ktls_ocf/Makefile =================================================================== --- /dev/null +++ sys/modules/ktls_ocf/Makefile @@ -0,0 +1,8 @@ +# $FreeBSD$ + +.PATH: ${SRCTOP}/sys/opencrypto + +KMOD= ktls_ocf +SRCS= ktls_ocf.c + +.include Index: sys/net/ieee8023ad_lacp.h =================================================================== --- sys/net/ieee8023ad_lacp.h +++ sys/net/ieee8023ad_lacp.h @@ -293,7 +293,7 @@ struct mbuf *lacp_input(struct lagg_port *, struct mbuf *); struct lagg_port *lacp_select_tx_port(struct lagg_softc *, struct mbuf *); -#ifdef RATELIMIT +#if defined(RATELIMIT) || defined(KERN_TLS) struct lagg_port *lacp_select_tx_port_by_hash(struct lagg_softc *, uint32_t); #endif void lacp_attach(struct lagg_softc *); Index: sys/net/ieee8023ad_lacp.c =================================================================== --- sys/net/ieee8023ad_lacp.c +++ sys/net/ieee8023ad_lacp.c @@ -32,6 +32,7 @@ #include __FBSDID("$FreeBSD$"); +#include "opt_kern_tls.h" #include "opt_ratelimit.h" #include @@ -882,7 +883,7 @@ return (lp->lp_lagg); } -#ifdef RATELIMIT +#if defined(RATELIMIT) || defined(KERN_TLS) struct lagg_port * lacp_select_tx_port_by_hash(struct lagg_softc *sc, uint32_t flowid) { Index: sys/net/if.h =================================================================== --- sys/net/if.h +++ sys/net/if.h @@ -247,6 +247,8 @@ #define IFCAP_TXRTLMT 0x1000000 /* hardware supports TX rate limiting */ #define IFCAP_HWRXTSTMP 0x2000000 /* hardware rx timestamping */ #define IFCAP_NOMAP 0x4000000 /* can TX unmapped mbufs */ +#define IFCAP_TXTLS4 0x8000000 /* can do TLS encryption and segmentation for TCP */ +#define IFCAP_TXTLS6 0x10000000 /* can do TLS encryption and segmentation for TCP6 */ #define IFCAP_HWCSUM_IPV6 (IFCAP_RXCSUM_IPV6 | IFCAP_TXCSUM_IPV6) @@ -254,6 +256,7 @@ #define IFCAP_TSO (IFCAP_TSO4 | IFCAP_TSO6) #define IFCAP_WOL (IFCAP_WOL_UCAST | IFCAP_WOL_MCAST | IFCAP_WOL_MAGIC) #define IFCAP_TOE (IFCAP_TOE4 | IFCAP_TOE6) +#define IFCAP_TXTLS (IFCAP_TXTLS4 | IFCAP_TXTLS6) #define IFCAP_CANTCHANGE (IFCAP_NETMAP) Index: sys/net/if_lagg.c =================================================================== --- sys/net/if_lagg.c +++ sys/net/if_lagg.c @@ -23,6 +23,7 @@ #include "opt_inet.h" #include "opt_inet6.h" +#include "opt_kern_tls.h" #include "opt_ratelimit.h" #include @@ -135,7 +136,7 @@ static void lagg_init(void *); static void lagg_stop(struct lagg_softc *); static int lagg_ioctl(struct ifnet *, u_long, caddr_t); -#ifdef RATELIMIT +#if defined(KERN_TLS) || defined(RATELIMIT) static int lagg_snd_tag_alloc(struct ifnet *, union if_snd_tag_alloc_params *, struct m_snd_tag **); @@ -532,7 +533,7 @@ ifp->if_ioctl = lagg_ioctl; ifp->if_get_counter = lagg_get_counter; ifp->if_flags = IFF_SIMPLEX | IFF_BROADCAST | IFF_MULTICAST; -#ifdef RATELIMIT +#if defined(KERN_TLS) || defined(RATELIMIT) ifp->if_snd_tag_alloc = lagg_snd_tag_alloc; ifp->if_snd_tag_modify = lagg_snd_tag_modify; ifp->if_snd_tag_query = lagg_snd_tag_query; @@ -1547,7 +1548,7 @@ return (error); } -#ifdef RATELIMIT +#if defined(KERN_TLS) || defined(RATELIMIT) static inline struct lagg_snd_tag * mst_to_lst(struct m_snd_tag *mst) { @@ -1794,7 +1795,7 @@ struct lagg_softc *sc = (struct lagg_softc *)ifp->if_softc; int error; -#ifdef RATELIMIT +#if defined(KERN_TLS) || defined(RATELIMIT) if (m->m_pkthdr.csum_flags & CSUM_SND_TAG) MPASS(m->m_pkthdr.snd_tag->ifp == ifp); #endif @@ -1990,7 +1991,7 @@ lagg_enqueue(struct ifnet *ifp, struct mbuf *m) { -#ifdef RATELIMIT +#if defined(KERN_TLS) || defined(RATELIMIT) if (m->m_pkthdr.csum_flags & CSUM_SND_TAG) { struct lagg_snd_tag *lst; struct m_snd_tag *mst; Index: sys/net/if_var.h =================================================================== --- sys/net/if_var.h +++ sys/net/if_var.h @@ -188,11 +188,13 @@ * m_snd_tag" comes from the network driver and it is free to allocate * as much additional space as it wants for its own use. */ +struct ktls_session; struct m_snd_tag; #define IF_SND_TAG_TYPE_RATE_LIMIT 0 #define IF_SND_TAG_TYPE_UNLIMITED 1 -#define IF_SND_TAG_TYPE_MAX 2 +#define IF_SND_TAG_TYPE_TLS 2 +#define IF_SND_TAG_TYPE_MAX 3 struct if_snd_tag_alloc_header { uint32_t type; /* send tag type, see IF_SND_TAG_XXX */ @@ -205,6 +207,12 @@ uint64_t max_rate; /* in bytes/s */ }; +struct if_snd_tag_alloc_tls { + struct if_snd_tag_alloc_header hdr; + struct inpcb *inp; + const struct ktls_session *tls; +}; + struct if_snd_tag_rate_limit_params { uint64_t max_rate; /* in bytes/s */ uint32_t queue_level; /* 0 (empty) .. 65535 (full) */ @@ -217,6 +225,7 @@ struct if_snd_tag_alloc_header hdr; struct if_snd_tag_alloc_rate_limit rate_limit; struct if_snd_tag_alloc_rate_limit unlimited; + struct if_snd_tag_alloc_tls tls; }; union if_snd_tag_modify_params { Index: sys/net/if_vlan.c =================================================================== --- sys/net/if_vlan.c +++ sys/net/if_vlan.c @@ -46,6 +46,7 @@ __FBSDID("$FreeBSD$"); #include "opt_inet.h" +#include "opt_kern_tls.h" #include "opt_vlan.h" #include "opt_ratelimit.h" @@ -103,7 +104,7 @@ int refcnt; }; -#ifdef RATELIMIT +#if defined(KERN_TLS) || defined(RATELIMIT) struct vlan_snd_tag { struct m_snd_tag com; struct m_snd_tag *tag; @@ -278,7 +279,7 @@ static void vlan_init(void *foo); static void vlan_input(struct ifnet *ifp, struct mbuf *m); static int vlan_ioctl(struct ifnet *ifp, u_long cmd, caddr_t addr); -#ifdef RATELIMIT +#if defined(KERN_TLS) || defined(RATELIMIT) static int vlan_snd_tag_alloc(struct ifnet *, union if_snd_tag_alloc_params *, struct m_snd_tag **); static int vlan_snd_tag_modify(struct m_snd_tag *, @@ -1064,7 +1065,7 @@ ifp->if_transmit = vlan_transmit; ifp->if_qflush = vlan_qflush; ifp->if_ioctl = vlan_ioctl; -#ifdef RATELIMIT +#if defined(KERN_TLS) || defined(RATELIMIT) ifp->if_snd_tag_alloc = vlan_snd_tag_alloc; ifp->if_snd_tag_modify = vlan_snd_tag_modify; ifp->if_snd_tag_query = vlan_snd_tag_query; @@ -1157,7 +1158,7 @@ BPF_MTAP(ifp, m); -#ifdef RATELIMIT +#if defined(KERN_TLS) || defined(RATELIMIT) if (m->m_pkthdr.csum_flags & CSUM_SND_TAG) { struct vlan_snd_tag *vst; struct m_snd_tag *mst; @@ -1741,6 +1742,20 @@ cap |= (p->if_capabilities & IFCAP_NOMAP); ena |= (mena & IFCAP_NOMAP); + /* + * If the parent interface can offload encryption and segmentation + * of TLS records over TCP, propagate it's capability to the VLAN + * interface. + * + * All TLS drivers in the tree today can deal with VLANs. If + * this ever changes, then a new IFCAP_VLAN_TXTLS can be + * defined. + */ + if (p->if_capabilities & IFCAP_TXTLS) + cap |= p->if_capabilities & IFCAP_TXTLS; + if (p->if_capenable & IFCAP_TXTLS) + ena |= mena & IFCAP_TXTLS; + ifp->if_capabilities = cap; ifp->if_capenable = ena; ifp->if_hwassist = hwa; @@ -1972,7 +1987,7 @@ return (error); } -#ifdef RATELIMIT +#if defined(KERN_TLS) || defined(RATELIMIT) static int vlan_snd_tag_alloc(struct ifnet *ifp, union if_snd_tag_alloc_params *params, Index: sys/netinet/ip_output.c =================================================================== --- sys/netinet/ip_output.c +++ sys/netinet/ip_output.c @@ -36,6 +36,7 @@ #include "opt_inet.h" #include "opt_ipsec.h" +#include "opt_kern_tls.h" #include "opt_mbuf_stress_test.h" #include "opt_mpath.h" #include "opt_ratelimit.h" @@ -46,6 +47,7 @@ #include #include #include +#include #include #include #include @@ -212,14 +214,39 @@ ip_output_send(struct inpcb *inp, struct ifnet *ifp, struct mbuf *m, const struct sockaddr_in *gw, struct route *ro) { +#ifdef KERN_TLS + struct ktls_session *tls = NULL; +#endif struct m_snd_tag *mst; int error; MPASS((m->m_pkthdr.csum_flags & CSUM_SND_TAG) == 0); mst = NULL; +#ifdef KERN_TLS + /* + * If this is an unencrypted TLS record, save a reference to + * the record. This local reference is used to call + * ktls_output_eagain after the mbuf has been freed (thus + * dropping the mbuf's reference) in if_output. + */ + if (m->m_next != NULL && mbuf_has_tls_session(m->m_next)) { + tls = ktls_hold(m->m_next->m_ext.ext_pgs->tls); + mst = tls->snd_tag; + + /* + * If a TLS session doesn't have a valid tag, it must + * have had an earlier ifp mismatch, so drop this + * packet. + */ + if (mst == NULL) { + error = EAGAIN; + goto done; + } + } +#endif #ifdef RATELIMIT - if (inp != NULL) { + if (inp != NULL && mst == NULL) { if ((inp->inp_flags2 & INP_RATE_LIMIT_CHANGED) != 0 || (inp->inp_snd_tag != NULL && inp->inp_snd_tag->ifp != ifp)) @@ -246,6 +273,13 @@ done: /* Check for route change invalidating send tags. */ +#ifdef KERN_TLS + if (tls != NULL) { + if (error == EAGAIN) + error = ktls_output_eagain(inp, tls); + ktls_free(tls); + } +#endif #ifdef RATELIMIT if (error == EAGAIN) in_pcboutput_eagain(inp); Index: sys/netinet/tcp.h =================================================================== --- sys/netinet/tcp.h +++ sys/netinet/tcp.h @@ -174,6 +174,8 @@ #define TCP_LOGDUMP 37 /* dump connection log events to device */ #define TCP_LOGDUMPID 38 /* dump events from connections with same ID to device */ +#define TCP_TXTLS_ENABLE 39 /* TLS framing and encryption for transmit */ +#define TCP_TXTLS_MODE 40 /* Transmit TLS mode */ #define TCP_CONGESTION 64 /* get/set congestion control algorithm */ #define TCP_CCALGOOPT 65 /* get/set cc algorithm specific options */ #define TCP_DELACK 72 /* socket option for delayed ack */ @@ -337,4 +339,14 @@ uint32_t pcbcnt; }; +/* TLS modes for TCP_TXTLS_MODE */ +#define TCP_TLS_MODE_NONE 0 +#define TCP_TLS_MODE_SW 1 +#define TCP_TLS_MODE_IFNET 2 + +/* + * TCP Control message types + */ +#define TLS_SET_RECORD_TYPE 1 + #endif /* !_NETINET_TCP_H_ */ Index: sys/netinet/tcp_output.c =================================================================== --- sys/netinet/tcp_output.c +++ sys/netinet/tcp_output.c @@ -37,6 +37,7 @@ #include "opt_inet.h" #include "opt_inet6.h" #include "opt_ipsec.h" +#include "opt_kern_tls.h" #include "opt_tcpdebug.h" #include @@ -46,6 +47,9 @@ #include #endif #include +#ifdef KERN_TLS +#include +#endif #include #include #include @@ -219,6 +223,14 @@ isipv6 = (tp->t_inpcb->inp_vflag & INP_IPV6) != 0; #endif + bool hw_tls; + +#ifdef KERN_TLS + if (so->so_snd.sb_flags & SB_TLS_IFNET) + hw_tls = true; + else +#endif + hw_tls = false; INP_WLOCK_ASSERT(tp->t_inpcb); @@ -1000,7 +1012,7 @@ * to the offset in the socket buffer chain. */ mb = sbsndptr_noadv(&so->so_snd, off, &moff); - if (len <= MHLEN - hdrlen - max_linkhdr) { + if (len <= MHLEN - hdrlen - max_linkhdr && !hw_tls) { m_copydata(mb, moff, len, mtod(m, caddr_t) + hdrlen); if (SEQ_LT(tp->snd_nxt, tp->snd_max)) @@ -1013,7 +1025,7 @@ msb = &so->so_snd; m->m_next = tcp_m_copym(mb, moff, &len, if_hw_tsomaxsegcount, - if_hw_tsomaxsegsize, msb); + if_hw_tsomaxsegsize, msb, hw_tls); if (len <= (tp->t_maxseg - optlen)) { /* * Must have ran out of mbufs for the copy @@ -1810,8 +1822,12 @@ */ struct mbuf * tcp_m_copym(struct mbuf *m, int32_t off0, int32_t *plen, - int32_t seglimit, int32_t segsize, struct sockbuf *sb) + int32_t seglimit, int32_t segsize, struct sockbuf *sb, bool hw_tls) { +#ifdef KERN_TLS + struct ktls_session *tls, *ntls; + struct mbuf *start; +#endif struct mbuf *n, **np; struct mbuf *top; int32_t off = off0; @@ -1843,6 +1859,13 @@ np = ⊤ top = NULL; pkthdrlen = NULL; +#ifdef KERN_TLS + if (m->m_flags & M_NOMAP) + tls = m->m_ext.ext_pgs->tls; + else + tls = NULL; + start = m; +#endif while (len > 0) { if (m == NULL) { KASSERT(len == M_COPYALL, @@ -1852,6 +1875,38 @@ *pkthdrlen = len_cp; break; } +#ifdef KERN_TLS + if (hw_tls) { + if (m->m_flags & M_NOMAP) + ntls = m->m_ext.ext_pgs->tls; + else + ntls = NULL; + + /* + * Avoid mixing TLS records with handshake + * data or TLS records from different + * sessions. + */ + if (tls != ntls) { + MPASS(m != start); + *plen = len_cp; + if (pkthdrlen != NULL) + *pkthdrlen = len_cp; + break; + } + + /* + * Don't end a send in the middle of a TLS + * record if it spans multiple TLS records. + */ + if (tls != NULL && (m != start) && len < m->m_len) { + *plen = len_cp; + if (pkthdrlen != NULL) + *pkthdrlen = len_cp; + break; + } + } +#endif mlen = min(len, m->m_len - off); if (seglimit) { /* Index: sys/netinet/tcp_stacks/rack.c =================================================================== --- sys/netinet/tcp_stacks/rack.c +++ sys/netinet/tcp_stacks/rack.c @@ -6991,6 +6991,8 @@ struct ip6_hdr *ip6 = NULL; int32_t isipv6; #endif + bool hw_tls; + /* setup and take the cache hits here */ rack = (struct tcp_rack *)tp->t_fb_ptr; inp = rack->rc_inp; @@ -6999,6 +7001,13 @@ kern_prefetch(sb, &do_a_prefetch); do_a_prefetch = 1; +#ifdef KERN_TLS + if (so->so_snd.sb_flags & SB_TLS_IFNET) + hw_tls = true; + else +#endif + hw_tls = false; + INP_WLOCK_ASSERT(inp); #ifdef TCP_OFFLOAD if (tp->t_flags & TF_TOE) @@ -7983,7 +7992,7 @@ * sb_offset in the socket buffer chain. */ mb = sbsndptr_noadv(sb, sb_offset, &moff); - if (len <= MHLEN - hdrlen - max_linkhdr) { + if (len <= MHLEN - hdrlen - max_linkhdr && !hw_tls) { m_copydata(mb, moff, (int)len, mtod(m, caddr_t)+hdrlen); if (SEQ_LT(tp->snd_nxt, tp->snd_max)) @@ -7997,7 +8006,8 @@ else msb = sb; m->m_next = tcp_m_copym(mb, moff, &len, - if_hw_tsomaxsegcount, if_hw_tsomaxsegsize, msb); + if_hw_tsomaxsegcount, if_hw_tsomaxsegsize, msb, + hw_tls); if (len <= (tp->t_maxseg - optlen)) { /* * Must have ran out of mbufs for the copy Index: sys/netinet/tcp_subr.c =================================================================== --- sys/netinet/tcp_subr.c +++ sys/netinet/tcp_subr.c @@ -37,6 +37,7 @@ #include "opt_inet.h" #include "opt_inet6.h" #include "opt_ipsec.h" +#include "opt_kern_tls.h" #include "opt_tcpdebug.h" #include @@ -50,6 +51,9 @@ #ifdef TCP_HHOOK #include #endif +#ifdef KERN_TLS +#include +#endif #include #include #include @@ -3061,6 +3065,120 @@ CTLFLAG_VNET | CTLTYPE_STRUCT | CTLFLAG_WR | CTLFLAG_SKIP, NULL, 0, sysctl_drop, "", "Drop TCP connection"); +#ifdef KERN_TLS +static int +sysctl_switch_tls(SYSCTL_HANDLER_ARGS) +{ + /* addrs[0] is a foreign socket, addrs[1] is a local one. */ + struct sockaddr_storage addrs[2]; + struct inpcb *inp; + struct sockaddr_in *fin, *lin; + struct epoch_tracker et; +#ifdef INET6 + struct sockaddr_in6 *fin6, *lin6; +#endif + int error; + + inp = NULL; + fin = lin = NULL; +#ifdef INET6 + fin6 = lin6 = NULL; +#endif + error = 0; + + if (req->oldptr != NULL || req->oldlen != 0) + return (EINVAL); + if (req->newptr == NULL) + return (EPERM); + if (req->newlen < sizeof(addrs)) + return (ENOMEM); + error = SYSCTL_IN(req, &addrs, sizeof(addrs)); + if (error) + return (error); + + switch (addrs[0].ss_family) { +#ifdef INET6 + case AF_INET6: + fin6 = (struct sockaddr_in6 *)&addrs[0]; + lin6 = (struct sockaddr_in6 *)&addrs[1]; + if (fin6->sin6_len != sizeof(struct sockaddr_in6) || + lin6->sin6_len != sizeof(struct sockaddr_in6)) + return (EINVAL); + if (IN6_IS_ADDR_V4MAPPED(&fin6->sin6_addr)) { + if (!IN6_IS_ADDR_V4MAPPED(&lin6->sin6_addr)) + return (EINVAL); + in6_sin6_2_sin_in_sock((struct sockaddr *)&addrs[0]); + in6_sin6_2_sin_in_sock((struct sockaddr *)&addrs[1]); + fin = (struct sockaddr_in *)&addrs[0]; + lin = (struct sockaddr_in *)&addrs[1]; + break; + } + error = sa6_embedscope(fin6, V_ip6_use_defzone); + if (error) + return (error); + error = sa6_embedscope(lin6, V_ip6_use_defzone); + if (error) + return (error); + break; +#endif +#ifdef INET + case AF_INET: + fin = (struct sockaddr_in *)&addrs[0]; + lin = (struct sockaddr_in *)&addrs[1]; + if (fin->sin_len != sizeof(struct sockaddr_in) || + lin->sin_len != sizeof(struct sockaddr_in)) + return (EINVAL); + break; +#endif + default: + return (EINVAL); + } + INP_INFO_RLOCK_ET(&V_tcbinfo, et); + switch (addrs[0].ss_family) { +#ifdef INET6 + case AF_INET6: + inp = in6_pcblookup(&V_tcbinfo, &fin6->sin6_addr, + fin6->sin6_port, &lin6->sin6_addr, lin6->sin6_port, + INPLOOKUP_WLOCKPCB, NULL); + break; +#endif +#ifdef INET + case AF_INET: + inp = in_pcblookup(&V_tcbinfo, fin->sin_addr, fin->sin_port, + lin->sin_addr, lin->sin_port, INPLOOKUP_WLOCKPCB, NULL); + break; +#endif + } + INP_INFO_RUNLOCK_ET(&V_tcbinfo, et); + if (inp != NULL) { + if ((inp->inp_flags & (INP_TIMEWAIT | INP_DROPPED)) != 0 || + inp->inp_socket == NULL) { + error = ECONNRESET; + INP_WUNLOCK(inp); + } else { + struct socket *so; + + so = inp->inp_socket; + soref(so); + error = ktls_set_tx_mode(so, + arg2 == 0 ? TCP_TLS_MODE_SW : TCP_TLS_MODE_IFNET); + INP_WUNLOCK(inp); + SOCK_LOCK(so); + sorele(so); + } + } else + error = ESRCH; + return (error); +} + +SYSCTL_PROC(_net_inet_tcp, OID_AUTO, switch_to_sw_tls, + CTLFLAG_VNET | CTLTYPE_STRUCT | CTLFLAG_WR | CTLFLAG_SKIP, NULL, + 0, sysctl_switch_tls, "", "Switch TCP connection to SW TLS"); +SYSCTL_PROC(_net_inet_tcp, OID_AUTO, switch_to_ifnet_tls, + CTLFLAG_VNET | CTLTYPE_STRUCT | CTLFLAG_WR | CTLFLAG_SKIP, NULL, + 1, sysctl_switch_tls, "", "Switch TCP connection to ifnet TLS"); +#endif + /* * Generate a standardized TCP log line for use throughout the * tcp subsystem. Memory allocation is done with M_NOWAIT to Index: sys/netinet/tcp_usrreq.c =================================================================== --- sys/netinet/tcp_usrreq.c +++ sys/netinet/tcp_usrreq.c @@ -44,6 +44,7 @@ #include "opt_inet.h" #include "opt_inet6.h" #include "opt_ipsec.h" +#include "opt_kern_tls.h" #include "opt_tcpdebug.h" #include @@ -52,6 +53,7 @@ #include #include #include +#include #include #include #ifdef INET6 @@ -1755,6 +1757,9 @@ int error, opt, optval; u_int ui; struct tcp_info ti; +#ifdef KERN_TLS + struct tls_enable tls; +#endif struct cc_algo *algo; char *pbuf, buf[TCP_LOG_ID_LEN]; size_t len; @@ -1917,6 +1922,29 @@ INP_WUNLOCK(inp); break; +#ifdef KERN_TLS + case TCP_TXTLS_ENABLE: + INP_WUNLOCK(inp); + error = sooptcopyin(sopt, &tls, sizeof(tls), + sizeof(tls)); + if (error) + break; + error = ktls_enable_tx(so, &tls); + break; + case TCP_TXTLS_MODE: + INP_WUNLOCK(inp); + error = sooptcopyin(sopt, &ui, sizeof(ui), sizeof(ui)); + if (error) + return (error); + if (ui != TCP_TLS_MODE_SW && ui != TCP_TLS_MODE_IFNET) + return (EINVAL); + + INP_WLOCK_RECHECK(inp); + error = ktls_set_tx_mode(so, ui); + INP_WUNLOCK(inp); + break; +#endif + case TCP_KEEPIDLE: case TCP_KEEPINTVL: case TCP_KEEPINIT: @@ -2197,6 +2225,13 @@ INP_WUNLOCK(inp); error = EINVAL; break; +#endif +#ifdef KERN_TLS + case TCP_TXTLS_MODE: + optval = ktls_get_tx_mode(so); + INP_WUNLOCK(inp); + error = sooptcopyout(sopt, &optval, sizeof(optval)); + break; #endif default: INP_WUNLOCK(inp); Index: sys/netinet/tcp_var.h =================================================================== --- sys/netinet/tcp_var.h +++ sys/netinet/tcp_var.h @@ -944,7 +944,7 @@ void tcp_sndbuf_autoscale(struct tcpcb *, struct socket *, uint32_t); struct mbuf * tcp_m_copym(struct mbuf *m, int32_t off0, int32_t *plen, - int32_t seglimit, int32_t segsize, struct sockbuf *sb); + int32_t seglimit, int32_t segsize, struct sockbuf *sb, bool hw_tls); static inline void Index: sys/netinet6/ip6_output.c =================================================================== --- sys/netinet6/ip6_output.c +++ sys/netinet6/ip6_output.c @@ -68,6 +68,7 @@ #include "opt_inet.h" #include "opt_inet6.h" #include "opt_ipsec.h" +#include "opt_kern_tls.h" #include "opt_ratelimit.h" #include "opt_route.h" #include "opt_rss.h" @@ -75,6 +76,7 @@ #include #include +#include #include #include #include @@ -280,14 +282,39 @@ ip6_output_send(struct inpcb *inp, struct ifnet *ifp, struct ifnet *origifp, struct mbuf *m, struct sockaddr_in6 *dst, struct route_in6 *ro) { +#ifdef KERN_TLS + struct ktls_session *tls = NULL; +#endif struct m_snd_tag *mst; int error; MPASS((m->m_pkthdr.csum_flags & CSUM_SND_TAG) == 0); mst = NULL; +#ifdef KERN_TLS + /* + * If this is an unencrypted TLS record, save a reference to + * the record. This local reference is used to call + * ktls_output_eagain after the mbuf has been freed (thus + * dropping the mbuf's reference) in if_output. + */ + if (m->m_next != NULL && mbuf_has_tls_session(m->m_next)) { + tls = ktls_hold(m->m_next->m_ext.ext_pgs->tls); + mst = tls->snd_tag; + + /* + * If a TLS session doesn't have a valid tag, it must + * have had an earlier ifp mismatch, so drop this + * packet. + */ + if (mst == NULL) { + error = EAGAIN; + goto done; + } + } +#endif #ifdef RATELIMIT - if (inp != NULL) { + if (inp != NULL && mst == NULL) { if ((inp->inp_flags2 & INP_RATE_LIMIT_CHANGED) != 0 || (inp->inp_snd_tag != NULL && inp->inp_snd_tag->ifp != ifp)) @@ -314,6 +341,13 @@ done: /* Check for route change invalidating send tags. */ +#ifdef KERN_TLS + if (tls != NULL) { + if (error == EAGAIN) + error = ktls_output_eagain(inp, tls); + ktls_free(tls); + } +#endif #ifdef RATELIMIT if (error == EAGAIN) in_pcboutput_eagain(inp); Index: sys/opencrypto/ktls_ocf.c =================================================================== --- /dev/null +++ sys/opencrypto/ktls_ocf.c @@ -0,0 +1,308 @@ +/*- + * SPDX-License-Identifier: BSD-2-Clause + * + * Copyright (c) 2019 Netflix Inc. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include +__FBSDID("$FreeBSD$"); + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +struct ocf_session { + crypto_session_t sid; + int crda_alg; + struct mtx lock; +}; + +struct ocf_operation { + struct ocf_session *os; + bool done; + struct iovec iov[0]; +}; + +static MALLOC_DEFINE(M_KTLS_OCF, "ktls_ocf", "OCF KTLS"); + +SYSCTL_DECL(_kern_ipc_tls); +SYSCTL_DECL(_kern_ipc_tls_stats); + +static counter_u64_t ocf_gcm_crypts; +SYSCTL_COUNTER_U64(_kern_ipc_tls_stats, OID_AUTO, ocf_gcm_crypts, CTLFLAG_RD, + &ocf_gcm_crypts, + "Total number of OCF GCM encryption operations"); + +static counter_u64_t ocf_retries; +SYSCTL_COUNTER_U64(_kern_ipc_tls_stats, OID_AUTO, ocf_retries, CTLFLAG_RD, + &ocf_retries, + "Number of OCF encryption operation retries"); + +static int +ktls_ocf_callback(struct cryptop *crp) +{ + struct ocf_operation *oo; + + oo = crp->crp_opaque; + mtx_lock(&oo->os->lock); + oo->done = true; + mtx_unlock(&oo->os->lock); + wakeup(oo); + return (0); +} + +static int +ktls_ocf_encrypt(struct ktls_session *tls, const struct tls_record_layer *hdr, + uint8_t *trailer, struct iovec *iniov, struct iovec *outiov, int iovcnt, + uint64_t seqno) +{ + struct uio uio; + struct tls_aead_data ad; + struct tls_nonce_data nd; + struct cryptodesc *crde, *crda; + struct cryptop *crp; + struct ocf_session *os; + struct ocf_operation *oo; + struct iovec *iov; + int i, error; + uint16_t tls_comp_len; + + os = tls->cipher; + + oo = malloc(sizeof(*oo) + (iovcnt + 2) * sizeof(*iov), M_KTLS_OCF, + M_WAITOK | M_ZERO); + oo->os = os; + iov = oo->iov; + + crp = crypto_getreq(2); + if (crp == NULL) { + free(oo, M_KTLS_OCF); + return (ENOMEM); + } + + /* Setup the IV. */ + memcpy(nd.fixed, tls->params.iv, TLS_AEAD_GCM_LEN); + memcpy(&nd.seq, hdr + 1, sizeof(nd.seq)); + + /* Setup the AAD. */ + tls_comp_len = ntohs(hdr->tls_length) - + (AES_GMAC_HASH_LEN + sizeof(nd.seq)); + ad.seq = htobe64(seqno); + ad.type = hdr->tls_type; + ad.tls_vmajor = hdr->tls_vmajor; + ad.tls_vminor = hdr->tls_vminor; + ad.tls_length = htons(tls_comp_len); + iov[0].iov_base = &ad; + iov[0].iov_len = sizeof(ad); + uio.uio_resid = sizeof(ad); + + /* + * OCF always does encryption in place, so copy the data if + * needed. Ugh. + */ + for (i = 0; i < iovcnt; i++) { + iov[i + 1] = outiov[i]; + if (iniov[i].iov_base != outiov[i].iov_base) + memcpy(outiov[i].iov_base, iniov[i].iov_base, + outiov[i].iov_len); + uio.uio_resid += outiov[i].iov_len; + } + + iov[iovcnt + 1].iov_base = trailer; + iov[iovcnt + 1].iov_len = AES_GMAC_HASH_LEN; + uio.uio_resid += AES_GMAC_HASH_LEN; + + uio.uio_iov = iov; + uio.uio_iovcnt = iovcnt + 2; + uio.uio_offset = 0; + uio.uio_segflg = UIO_SYSSPACE; + uio.uio_td = curthread; + + crp->crp_session = os->sid; + crp->crp_flags = CRYPTO_F_IOV | CRYPTO_F_CBIMM; + crp->crp_uio = &uio; + crp->crp_ilen = uio.uio_resid; + crp->crp_opaque = oo; + crp->crp_callback = ktls_ocf_callback; + + crde = crp->crp_desc; + crda = crde->crd_next; + + crda->crd_alg = os->crda_alg; + crda->crd_skip = 0; + crda->crd_len = sizeof(ad); + crda->crd_inject = crp->crp_ilen - AES_GMAC_HASH_LEN; + + crde->crd_alg = CRYPTO_AES_NIST_GCM_16; + crde->crd_skip = sizeof(ad); + crde->crd_len = crp->crp_ilen - (sizeof(ad) + AES_GMAC_HASH_LEN); + crde->crd_flags = CRD_F_ENCRYPT | CRD_F_IV_EXPLICIT | CRD_F_IV_PRESENT; + memcpy(crde->crd_iv, &nd, sizeof(nd)); + + counter_u64_add(ocf_gcm_crypts, 1); + for (;;) { + error = crypto_dispatch(crp); + if (error) + break; + + mtx_lock(&os->lock); + while (!oo->done) + mtx_sleep(oo, &os->lock, 0, "ocfktls", 0); + mtx_unlock(&os->lock); + + if (crp->crp_etype != EAGAIN) { + error = crp->crp_etype; + break; + } + + crp->crp_etype = 0; + crp->crp_flags &= ~CRYPTO_F_DONE; + oo->done = false; + counter_u64_add(ocf_retries, 1); + } + + crypto_freereq(crp); + free(oo, M_KTLS_OCF); + return (error); +} + +static void +ktls_ocf_free(struct ktls_session *tls) +{ + struct ocf_session *os; + + os = tls->cipher; + mtx_destroy(&os->lock); + explicit_bzero(os, sizeof(*os)); + free(os, M_KTLS_OCF); +} + +static int +ktls_ocf_try(struct socket *so, struct ktls_session *tls) +{ + struct cryptoini cria, crie; + struct ocf_session *os; + int error; + + memset(&cria, 0, sizeof(cria)); + memset(&crie, 0, sizeof(crie)); + + switch (tls->params.cipher_algorithm) { + case CRYPTO_AES_NIST_GCM_16: + if (tls->params.iv_len != TLS_AEAD_GCM_LEN) + return (EINVAL); + switch (tls->params.cipher_key_len) { + case 128 / 8: + cria.cri_alg = CRYPTO_AES_128_NIST_GMAC; + break; + case 256 / 8: + cria.cri_alg = CRYPTO_AES_256_NIST_GMAC; + break; + default: + return (EINVAL); + } + cria.cri_key = tls->params.cipher_key; + cria.cri_klen = tls->params.cipher_key_len * 8; + break; + default: + return (EPROTONOSUPPORT); + } + + /* Only TLS 1.1 and TLS 1.2 are currently supported. */ + if (tls->params.tls_vmajor != TLS_MAJOR_VER_ONE || + tls->params.tls_vminor < TLS_MINOR_VER_ONE || + tls->params.tls_vminor > TLS_MINOR_VER_TWO) + return (EPROTONOSUPPORT); + + os = malloc(sizeof(*os), M_KTLS_OCF, M_NOWAIT | M_ZERO); + if (os == NULL) + return (ENOMEM); + + crie.cri_alg = tls->params.cipher_algorithm; + crie.cri_key = tls->params.cipher_key; + crie.cri_klen = tls->params.cipher_key_len * 8; + + crie.cri_next = &cria; + error = crypto_newsession(&os->sid, &crie, + CRYPTO_FLAG_HARDWARE | CRYPTO_FLAG_SOFTWARE); + if (error) { + free(os, M_KTLS_OCF); + return (error); + } + + os->crda_alg = cria.cri_alg; + mtx_init(&os->lock, "ktls_ocf", NULL, MTX_DEF); + tls->cipher = os; + tls->sw_encrypt = ktls_ocf_encrypt; + tls->free = ktls_ocf_free; + return (0); +} + +struct ktls_crypto_backend ocf_backend = { + .name = "OCF", + .prio = 5, + .api_version = KTLS_API_VERSION, + .try = ktls_ocf_try, +}; + +static int +ktls_ocf_modevent(module_t mod, int what, void *arg) +{ + int error; + + switch (what) { + case MOD_LOAD: + ocf_gcm_crypts = counter_u64_alloc(M_WAITOK); + ocf_retries = counter_u64_alloc(M_WAITOK); + return (ktls_crypto_backend_register(&ocf_backend)); + case MOD_UNLOAD: + error = ktls_crypto_backend_deregister(&ocf_backend); + if (error) + return (error); + counter_u64_free(ocf_gcm_crypts); + counter_u64_free(ocf_retries); + return (0); + default: + return (EOPNOTSUPP); + } +} + +static moduledata_t ktls_ocf_moduledata = { + "ktls_ocf", + ktls_ocf_modevent, + NULL +}; + +DECLARE_MODULE(ktls_ocf, ktls_ocf_moduledata, SI_SUB_PROTO_END, SI_ORDER_ANY); Index: sys/sys/ktls.h =================================================================== --- /dev/null +++ sys/sys/ktls.h @@ -0,0 +1,194 @@ +/*- + * SPDX-License-Identifier: BSD-2-Clause-FreeBSD + * + * Copyright (c) 2014-2019 Netflix Inc. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * $FreeBSD$ + */ +#ifndef _SYS_KTLS_H_ +#define _SYS_KTLS_H_ + +#include +#include + +struct tls_record_layer { + uint8_t tls_type; + uint8_t tls_vmajor; + uint8_t tls_vminor; + uint16_t tls_length; + uint8_t tls_data[0]; +} __attribute__ ((packed)); + +#define TLS_MAX_MSG_SIZE_V10_2 16384 +#define TLS_MAX_PARAM_SIZE 1024 /* Max key/mac/iv in sockopt */ +#define TLS_AEAD_GCM_LEN 4 +#define TLS_CBC_IMPLICIT_IV_LEN 16 + +/* Type values for the record layer */ +#define TLS_RLTYPE_APP 23 + +/* + * Nonce for GCM. + */ +struct tls_nonce_data { + uint8_t fixed[TLS_AEAD_GCM_LEN]; + uint64_t seq; +} __packed; + +/* + * AEAD additional data format per RFC. + */ +struct tls_aead_data { + uint64_t seq; /* In network order */ + uint8_t type; + uint8_t tls_vmajor; + uint8_t tls_vminor; + uint16_t tls_length; +} __packed; + +/* + * Stream Cipher MAC additional data input. This does not match the + * exact data on the wire (the sequence number is not placed on the + * wire, and any explicit IV after the record header is not covered by + * the MAC). + */ +struct tls_mac_data { + uint64_t seq; + uint8_t type; + uint8_t tls_vmajor; + uint8_t tls_vminor; + uint16_t tls_length; +} __packed; + +#define TLS_MAJOR_VER_ONE 3 +#define TLS_MINOR_VER_ZERO 1 /* 3, 1 */ +#define TLS_MINOR_VER_ONE 2 /* 3, 2 */ +#define TLS_MINOR_VER_TWO 3 /* 3, 3 */ + +/* For TCP_TXTLS_ENABLE */ +struct tls_enable { + const uint8_t *cipher_key; + const uint8_t *iv; /* Implicit IV. */ + const uint8_t *auth_key; + int cipher_algorithm; /* e.g. CRYPTO_AES_CBC */ + int cipher_key_len; + int iv_len; + int auth_algorithm; /* e.g. CRYPTO_SHA2_256_HMAC */ + int auth_key_len; + int flags; + uint8_t tls_vmajor; + uint8_t tls_vminor; +}; + +struct tls_session_params { + uint8_t *cipher_key; + uint8_t *auth_key; + uint8_t iv[TLS_CBC_IMPLICIT_IV_LEN]; + int cipher_algorithm; + int auth_algorithm; + uint16_t cipher_key_len; + uint16_t iv_len; + uint16_t auth_key_len; + uint16_t max_frame_len; + uint8_t tls_vmajor; + uint8_t tls_vminor; + uint8_t tls_hlen; + uint8_t tls_tlen; + uint8_t tls_bs; + uint8_t flags; +}; + +#ifdef _KERNEL + +#define KTLS_API_VERSION 5 + +struct iovec; +struct ktls_session; +struct m_snd_tag; +struct mbuf; +struct mbuf_ext_pgs; +struct sockbuf; +struct socket; + +struct ktls_crypto_backend { + LIST_ENTRY(ktls_crypto_backend) next; + int (*try)(struct socket *so, struct ktls_session *tls); + int prio; + int api_version; + int use_count; + const char *name; +}; + +struct ktls_session { + int (*sw_encrypt)(struct ktls_session *tls, + const struct tls_record_layer *hdr, uint8_t *trailer, + struct iovec *src, struct iovec *dst, int iovcnt, + uint64_t seqno); + union { + void *cipher; + struct m_snd_tag *snd_tag; + }; + struct ktls_crypto_backend *be; + void (*free)(struct ktls_session *tls); + struct tls_session_params params; + u_int wq_index; + volatile u_int refcount; + + struct task reset_tag_task; + struct inpcb *inp; + bool reset_pending; +} __aligned(CACHE_LINE_SIZE); + +int ktls_crypto_backend_register(struct ktls_crypto_backend *be); +int ktls_crypto_backend_deregister(struct ktls_crypto_backend *be); +int ktls_enable_tx(struct socket *so, struct tls_enable *en); +void ktls_destroy(struct ktls_session *tls); +int ktls_frame(struct mbuf *m, struct ktls_session *tls, int *enqueue_cnt, + uint8_t record_type); +void ktls_seq(struct sockbuf *sb, struct mbuf *m); +void ktls_enqueue(struct mbuf *m, struct socket *so, int page_count); +void ktls_enqueue_to_free(struct mbuf_ext_pgs *pgs); +int ktls_set_tx_mode(struct socket *so, int mode); +int ktls_get_tx_mode(struct socket *so); +int ktls_output_eagain(struct inpcb *inp, struct ktls_session *tls); + +static inline struct ktls_session * +ktls_hold(struct ktls_session *tls) +{ + + if (tls != NULL) + refcount_acquire(&tls->refcount); + return (tls); +} + +static inline void +ktls_free(struct ktls_session *tls) +{ + + if (refcount_release(&tls->refcount)) + ktls_destroy(tls); +} + +#endif /* !_KERNEL */ +#endif /* !_SYS_KTLS_H_ */ Index: sys/sys/mbuf.h =================================================================== --- sys/sys/mbuf.h +++ sys/sys/mbuf.h @@ -301,6 +301,7 @@ }; }; +struct ktls_session; struct socket; /* @@ -344,7 +345,7 @@ uint16_t last_pg_len; /* Length of last page */ vm_paddr_t pa[MBUF_PEXT_MAX_PGS]; /* phys addrs of pages */ char hdr[MBUF_PEXT_HDR_LEN]; /* TLS header */ - void *tls; /* TLS session */ + struct ktls_session *tls; /* TLS session */ #if defined(__i386__) || \ (defined(__powerpc__) && !defined(__powerpc64__) && defined(BOOKE)) /* @@ -357,9 +358,10 @@ char trail[MBUF_PEXT_TRAIL_LEN]; /* TLS trailer */ struct { struct socket *so; - void *mbuf; + struct mbuf *mbuf; uint64_t seqno; STAILQ_ENTRY(mbuf_ext_pgs) stailq; + int enc_cnt; }; }; }; @@ -1505,5 +1507,18 @@ void netdump_mbuf_reinit(int nmbuf, int nclust, int clsize); #endif +static inline bool +mbuf_has_tls_session(struct mbuf *m) +{ + + if (m->m_flags & M_NOMAP) { + MBUF_EXT_PGS_ASSERT(m); + if (m->m_ext.ext_pgs->tls != NULL) { + return (true); + } + } + return (false); +} + #endif /* _KERNEL */ #endif /* !_SYS_MBUF_H_ */ Index: sys/sys/sockbuf.h =================================================================== --- sys/sys/sockbuf.h +++ sys/sys/sockbuf.h @@ -50,6 +50,7 @@ #define SB_AUTOSIZE 0x800 /* automatically size socket buffer */ #define SB_STOP 0x1000 /* backpressure indicator */ #define SB_AIO_RUNNING 0x2000 /* AIO operation running */ +#define SB_TLS_IFNET 0x4000 /* has used / is using ifnet KTLS */ #define SBS_CANTSENDMORE 0x0010 /* can't send more data to peer */ #define SBS_CANTRCVMORE 0x0020 /* can't receive more data from peer */ @@ -63,6 +64,7 @@ #define SB_MAX (2*1024*1024) /* default for max chars in sockbuf */ +struct ktls_session; struct mbuf; struct sockaddr; struct socket; @@ -74,6 +76,7 @@ * * Locking key to struct sockbuf: * (a) locked by SOCKBUF_LOCK(). + * (b) locked by sblock() */ struct sockbuf { struct mtx sb_mtx; /* sockbuf lock */ @@ -98,6 +101,8 @@ u_int sb_ctl; /* (a) non-data chars in buffer */ int sb_lowat; /* (a) low water mark */ sbintime_t sb_timeo; /* (a) timeout for read/write */ + uint64_t sb_tls_seqno; /* (a) TLS seqno */ + struct ktls_session *sb_tls_info; /* (a + b) TLS state */ short sb_flags; /* (a) flags, see above */ int (*sb_upcall)(struct socket *, void *, int); /* (a) */ void *sb_upcallarg; /* (a) */ Index: tools/tools/switch_tls/Makefile =================================================================== --- /dev/null +++ tools/tools/switch_tls/Makefile @@ -0,0 +1,6 @@ +# $FreeBSD$ + +PROG= switch_tls +MAN= + +.include Index: tools/tools/switch_tls/switch_tls.c =================================================================== --- /dev/null +++ tools/tools/switch_tls/switch_tls.c @@ -0,0 +1,381 @@ +/* $OpenBSD: tcpdrop.c,v 1.4 2004/05/22 23:55:22 deraadt Exp $ */ + +/*- + * Copyright (c) 2009 Juli Mallett + * Copyright (c) 2004 Markus Friedl + * + * Permission to use, copy, modify, and distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + */ + +#include +__FBSDID("$FreeBSD$"); + +#include +#include +#include +#include +#include + +#include +#include +#define TCPSTATES +#include +#include + +#include +#include +#include +#include +#include +#include +#include + +#define TCPDROP_FOREIGN 0 +#define TCPDROP_LOCAL 1 + +#define SW_TLS 0 +#define IFNET_TLS 1 + +struct host_service { + char hs_host[NI_MAXHOST]; + char hs_service[NI_MAXSERV]; +}; + +static bool tcpswitch_list_commands = false; + +static char *findport(const char *); +static struct xinpgen *getxpcblist(const char *); +static void sockinfo(const struct sockaddr *, struct host_service *); +static bool tcpswitch(const struct sockaddr *, const struct sockaddr *, int); +static bool tcpswitchall(const char *, int); +static bool tcpswitchbyname(const char *, const char *, const char *, + const char *, int); +static bool tcpswitchconn(const struct in_conninfo *, int); +static void usage(void); + +/* + * Switch a tcp connection. + */ +int +main(int argc, char *argv[]) +{ + char stack[TCP_FUNCTION_NAME_LEN_MAX]; + char *lport, *fport; + bool switchall, switchallstack; + int ch, mode; + + switchall = false; + switchallstack = false; + stack[0] = '\0'; + mode = SW_TLS; + + while ((ch = getopt(argc, argv, "ailS:s")) != -1) { + switch (ch) { + case 'a': + switchall = true; + break; + case 'i': + mode = IFNET_TLS; + break; + case 'l': + tcpswitch_list_commands = true; + break; + case 'S': + switchallstack = true; + strlcpy(stack, optarg, sizeof(stack)); + break; + case 's': + mode = SW_TLS; + break; + default: + usage(); + } + } + argc -= optind; + argv += optind; + + if (switchall && switchallstack) + usage(); + if (switchall || switchallstack) { + if (argc != 0) + usage(); + if (!tcpswitchall(stack, mode)) + exit(1); + exit(0); + } + + if ((argc != 2 && argc != 4) || tcpswitch_list_commands) + usage(); + + if (argc == 2) { + lport = findport(argv[0]); + fport = findport(argv[1]); + if (lport == NULL || lport[1] == '\0' || fport == NULL || + fport[1] == '\0') + usage(); + *lport++ = '\0'; + *fport++ = '\0'; + if (!tcpswitchbyname(argv[0], lport, argv[1], fport, mode)) + exit(1); + } else if (!tcpswitchbyname(argv[0], argv[1], argv[2], argv[3], mode)) + exit(1); + + exit(0); +} + +static char * +findport(const char *arg) +{ + char *dot, *colon; + + /* A strrspn() or strrpbrk() would be nice. */ + dot = strrchr(arg, '.'); + colon = strrchr(arg, ':'); + if (dot == NULL) + return (colon); + if (colon == NULL) + return (dot); + if (dot < colon) + return (colon); + else + return (dot); +} + +static struct xinpgen * +getxpcblist(const char *name) +{ + struct xinpgen *xinp; + size_t len; + int rv; + + len = 0; + rv = sysctlbyname(name, NULL, &len, NULL, 0); + if (rv == -1) + err(1, "sysctlbyname %s", name); + + if (len == 0) + errx(1, "%s is empty", name); + + xinp = malloc(len); + if (xinp == NULL) + errx(1, "malloc failed"); + + rv = sysctlbyname(name, xinp, &len, NULL, 0); + if (rv == -1) + err(1, "sysctlbyname %s", name); + + return (xinp); +} + +static void +sockinfo(const struct sockaddr *sa, struct host_service *hs) +{ + static const int flags = NI_NUMERICHOST | NI_NUMERICSERV; + int rv; + + rv = getnameinfo(sa, sa->sa_len, hs->hs_host, sizeof hs->hs_host, + hs->hs_service, sizeof hs->hs_service, flags); + if (rv == -1) + err(1, "getnameinfo"); +} + +static bool +tcpswitch(const struct sockaddr *lsa, const struct sockaddr *fsa, int mode) +{ + struct host_service local, foreign; + struct sockaddr_storage addrs[2]; + int rv; + + memcpy(&addrs[TCPDROP_FOREIGN], fsa, fsa->sa_len); + memcpy(&addrs[TCPDROP_LOCAL], lsa, lsa->sa_len); + + sockinfo(lsa, &local); + sockinfo(fsa, &foreign); + + if (tcpswitch_list_commands) { + printf("switch_tls %s %s %s %s %s\n", + mode == SW_TLS ? "-s" : "-i", + local.hs_host, local.hs_service, + foreign.hs_host, foreign.hs_service); + return (true); + } + + rv = sysctlbyname(mode == SW_TLS ? "net.inet.tcp.switch_to_sw_tls" : + "net.inet.tcp.switch_to_ifnet_tls", NULL, NULL, &addrs, + sizeof addrs); + if (rv == -1) { + warn("%s %s %s %s", local.hs_host, local.hs_service, + foreign.hs_host, foreign.hs_service); + return (false); + } + printf("%s %s %s %s: switched\n", local.hs_host, local.hs_service, + foreign.hs_host, foreign.hs_service); + return (true); +} + +static bool +tcpswitchall(const char *stack, int mode) +{ + struct xinpgen *head, *xinp; + struct xtcpcb *xtp; + struct xinpcb *xip; + bool ok; + + ok = true; + + head = getxpcblist("net.inet.tcp.pcblist"); + +#define XINP_NEXT(xinp) \ + ((struct xinpgen *)(uintptr_t)((uintptr_t)(xinp) + (xinp)->xig_len)) + + for (xinp = XINP_NEXT(head); xinp->xig_len > sizeof *xinp; + xinp = XINP_NEXT(xinp)) { + xtp = (struct xtcpcb *)xinp; + xip = &xtp->xt_inp; + + /* + * XXX + * Check protocol, support just v4 or v6, etc. + */ + + /* Ignore PCBs which were freed during copyout. */ + if (xip->inp_gencnt > head->xig_gen) + continue; + + /* Skip listening sockets. */ + if (xtp->t_state == TCPS_LISTEN) + continue; + + /* If requested, skip sockets not having the requested stack. */ + if (stack[0] != '\0' && + strncmp(xtp->xt_stack, stack, TCP_FUNCTION_NAME_LEN_MAX)) + continue; + + if (!tcpswitchconn(&xip->inp_inc, mode)) + ok = false; + } + free(head); + + return (ok); +} + +static bool +tcpswitchbyname(const char *lhost, const char *lport, const char *fhost, + const char *fport, int mode) +{ + static const struct addrinfo hints = { + /* + * Look for streams in all domains. + */ + .ai_family = AF_UNSPEC, + .ai_socktype = SOCK_STREAM, + }; + struct addrinfo *ail, *local, *aif, *foreign; + int error; + bool ok, infamily; + + error = getaddrinfo(lhost, lport, &hints, &local); + if (error != 0) + errx(1, "getaddrinfo: %s port %s: %s", lhost, lport, + gai_strerror(error)); + + error = getaddrinfo(fhost, fport, &hints, &foreign); + if (error != 0) { + freeaddrinfo(local); /* XXX gratuitous */ + errx(1, "getaddrinfo: %s port %s: %s", fhost, fport, + gai_strerror(error)); + } + + ok = true; + infamily = false; + + /* + * Try every combination of local and foreign address pairs. + */ + for (ail = local; ail != NULL; ail = ail->ai_next) { + for (aif = foreign; aif != NULL; aif = aif->ai_next) { + if (ail->ai_family != aif->ai_family) + continue; + infamily = true; + if (!tcpswitch(ail->ai_addr, aif->ai_addr, mode)) + ok = false; + } + } + + if (!infamily) { + warnx("%s %s %s %s: different address families", lhost, lport, + fhost, fport); + ok = false; + } + + freeaddrinfo(local); + freeaddrinfo(foreign); + + return (ok); +} + +static bool +tcpswitchconn(const struct in_conninfo *inc, int mode) +{ + struct sockaddr *local, *foreign; + struct sockaddr_in6 sin6[2]; + struct sockaddr_in sin4[2]; + + if ((inc->inc_flags & INC_ISIPV6) != 0) { + memset(sin6, 0, sizeof sin6); + + sin6[TCPDROP_LOCAL].sin6_len = sizeof sin6[TCPDROP_LOCAL]; + sin6[TCPDROP_LOCAL].sin6_family = AF_INET6; + sin6[TCPDROP_LOCAL].sin6_port = inc->inc_lport; + memcpy(&sin6[TCPDROP_LOCAL].sin6_addr, &inc->inc6_laddr, + sizeof inc->inc6_laddr); + local = (struct sockaddr *)&sin6[TCPDROP_LOCAL]; + + sin6[TCPDROP_FOREIGN].sin6_len = sizeof sin6[TCPDROP_FOREIGN]; + sin6[TCPDROP_FOREIGN].sin6_family = AF_INET6; + sin6[TCPDROP_FOREIGN].sin6_port = inc->inc_fport; + memcpy(&sin6[TCPDROP_FOREIGN].sin6_addr, &inc->inc6_faddr, + sizeof inc->inc6_faddr); + foreign = (struct sockaddr *)&sin6[TCPDROP_FOREIGN]; + } else { + memset(sin4, 0, sizeof sin4); + + sin4[TCPDROP_LOCAL].sin_len = sizeof sin4[TCPDROP_LOCAL]; + sin4[TCPDROP_LOCAL].sin_family = AF_INET; + sin4[TCPDROP_LOCAL].sin_port = inc->inc_lport; + memcpy(&sin4[TCPDROP_LOCAL].sin_addr, &inc->inc_laddr, + sizeof inc->inc_laddr); + local = (struct sockaddr *)&sin4[TCPDROP_LOCAL]; + + sin4[TCPDROP_FOREIGN].sin_len = sizeof sin4[TCPDROP_FOREIGN]; + sin4[TCPDROP_FOREIGN].sin_family = AF_INET; + sin4[TCPDROP_FOREIGN].sin_port = inc->inc_fport; + memcpy(&sin4[TCPDROP_FOREIGN].sin_addr, &inc->inc_faddr, + sizeof inc->inc_faddr); + foreign = (struct sockaddr *)&sin4[TCPDROP_FOREIGN]; + } + + return (tcpswitch(local, foreign, mode)); +} + +static void +usage(void) +{ + fprintf(stderr, +"usage: switch_tls [-i | -s] local-address local-port foreign-address foreign-port\n" +" switch_tls [-i | -s] local-address:local-port foreign-address:foreign-port\n" +" switch_tls [-i | -s] local-address.local-port foreign-address.foreign-port\n" +" switch_tls [-l | -i | -s] -a\n" +" switch_tls [-l | -i | -s] -S stack\n"); + exit(1); +}