Index: sys/conf/files =================================================================== --- sys/conf/files +++ sys/conf/files @@ -4270,7 +4270,7 @@ netinet/sctputil.c optional inet sctp | inet6 sctp netinet/siftr.c optional inet siftr alq | inet6 siftr alq netinet/tcp_debug.c optional tcpdebug -netinet/tcp_fastopen.c optional inet tcp_rfc7413 | inet6 tcp_rfc7413 +netinet/tcp_fastopen.c optional inet | inet6 netinet/tcp_hostcache.c optional inet | inet6 netinet/tcp_input.c optional inet | inet6 netinet/tcp_lro.c optional inet | inet6 Index: sys/conf/options =================================================================== --- sys/conf/options +++ sys/conf/options @@ -450,8 +450,8 @@ SIFTR TCP_HHOOK opt_inet.h TCP_OFFLOAD opt_inet.h # Enable code to dispatch TCP offloading -TCP_RFC7413 opt_inet.h -TCP_RFC7413_MAX_KEYS opt_inet.h +TCP_FASTOPEN_MAX_KEYS opt_inet.h +TCP_FASTOPEN_MAX_PSKS opt_inet.h TCP_SIGNATURE opt_ipsec.h VLAN_ARRAY opt_vlan.h XBONEHACK Index: sys/netinet/tcp.h =================================================================== --- sys/netinet/tcp.h +++ sys/netinet/tcp.h @@ -152,6 +152,9 @@ #define TCP_MAXHLEN (0xf<<2) /* max length of header in bytes */ #define TCP_MAXOLEN (TCP_MAXHLEN - sizeof(struct tcphdr)) /* max space left for options */ + +#define TCP_FASTOPEN_MAX_COOKIE_LEN 16 /* Per RFC7413 */ +#define TCP_FASTOPEN_PSK_LEN 16 /* Same as TCP_FASTOPEN_KEY_LEN */ #endif /* __BSD_VISIBLE */ /* @@ -252,6 +255,16 @@ /* Padding to grow without breaking ABI. */ u_int32_t __tcpi_pad[26]; /* Padding. */ }; + +/* + * If this structure is provided when setting the TCP_FASTOPEN socket + * option, and the enable member is non-zero, a subsequent connect will use + * pre-shared key (PSK) mode using the provided key. + */ +struct tcp_fastopen { + int enable; + uint8_t psk[TCP_FASTOPEN_PSK_LEN]; +}; #endif #define TCP_FUNCTION_NAME_LEN_MAX 32 Index: sys/netinet/tcp_fastopen.h =================================================================== --- sys/netinet/tcp_fastopen.h +++ sys/netinet/tcp_fastopen.h @@ -1,5 +1,5 @@ /*- - * Copyright (c) 2015 Patrick Kelsey + * Copyright (c) 2015-2017 Patrick Kelsey * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -31,17 +31,59 @@ #ifdef _KERNEL -#define TCP_FASTOPEN_COOKIE_LEN 8 /* tied to SipHash24 64-bit output */ +#define TCP_FASTOPEN_COOKIE_LEN 8 /* SipHash24 64-bit output */ -VNET_DECLARE(unsigned int, tcp_fastopen_enabled); -#define V_tcp_fastopen_enabled VNET(tcp_fastopen_enabled) +VNET_DECLARE(unsigned int, tcp_fastopen_client_enabled); +#define V_tcp_fastopen_client_enabled VNET(tcp_fastopen_client_enabled) + +VNET_DECLARE(unsigned int, tcp_fastopen_server_enabled); +#define V_tcp_fastopen_server_enabled VNET(tcp_fastopen_server_enabled) + +union tcp_fastopen_ip_addr { + struct in_addr v4; + struct in6_addr v6; +}; + +struct tcp_fastopen_ccache_entry { + TAILQ_ENTRY(tcp_fastopen_ccache_entry) cce_link; + union tcp_fastopen_ip_addr cce_client_ip; /* network byte order */ + union tcp_fastopen_ip_addr cce_server_ip; /* network byte order */ + uint16_t server_port; /* network byte order */ + uint16_t server_mss; /* host byte order */ + uint8_t af; + uint8_t cookie_len; + uint8_t cookie[TCP_FASTOPEN_MAX_COOKIE_LEN]; + sbintime_t disable_time; /* non-zero value means path is disabled */ +}; + +struct tcp_fastopen_ccache; + +struct tcp_fastopen_ccache_bucket { + struct mtx ccb_mtx; + TAILQ_HEAD(bucket_entries, tcp_fastopen_ccache_entry) ccb_entries; + int ccb_num_entries; + struct tcp_fastopen_ccache *ccb_ccache; +}; + +struct tcp_fastopen_ccache { + uma_zone_t zone; + struct tcp_fastopen_ccache_bucket *base; + unsigned int bucket_limit; + unsigned int buckets; + unsigned int mask; + uint32_t secret; +}; void tcp_fastopen_init(void); void tcp_fastopen_destroy(void); unsigned int *tcp_fastopen_alloc_counter(void); -void tcp_fastopen_decrement_counter(unsigned int *counter); -int tcp_fastopen_check_cookie(struct in_conninfo *inc, uint8_t *cookie, - unsigned int len, uint64_t *latest_cookie); +void tcp_fastopen_decrement_counter(unsigned int *); +int tcp_fastopen_check_cookie(struct in_conninfo *, uint8_t *, unsigned int, + uint64_t *); +void tcp_fastopen_connect(struct tcpcb *); +void tcp_fastopen_disable_path(struct tcpcb *); +void tcp_fastopen_update_cache(struct tcpcb *, uint16_t, uint8_t, + uint8_t *); #endif /* _KERNEL */ #endif /* _TCP_FASTOPEN_H_ */ Index: sys/netinet/tcp_fastopen.c =================================================================== --- sys/netinet/tcp_fastopen.c +++ sys/netinet/tcp_fastopen.c @@ -1,5 +1,5 @@ /*- - * Copyright (c) 2015 Patrick Kelsey + * Copyright (c) 2015-2017 Patrick Kelsey * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -25,21 +25,38 @@ */ /* - * This is a server-side implementation of TCP Fast Open (TFO) [RFC7413]. + * This is an implementation of TCP Fast Open (TFO) [RFC7413]. * - * This implementation is currently considered to be experimental and is not - * included in kernel builds by default. To include this code, add the - * following line to your kernel config: + * The generated TFO cookies are the 64-bit output of + * SipHash24(key=<16-byte-key>, msg=). Multiple concurrent valid + * keys are supported so that time-based rolling cookie invalidation + * policies can be implemented in the system. The default number of + * concurrent keys is 2. This can be adjusted in the kernel config as + * follows: * - * options TCP_RFC7413 + * options TCP_FASTOPEN_MAX_KEYS= * - * The generated TFO cookies are the 64-bit output of - * SipHash24(<16-byte-key>). Multiple concurrent valid keys are - * supported so that time-based rolling cookie invalidation policies can be - * implemented in the system. The default number of concurrent keys is 2. - * This can be adjusted in the kernel config as follows: * - * options TCP_RFC7413_MAX_KEYS= + * In addition to the facilities defined in RFC7413, this implementation + * supports a pre-shared key (PSK) mode of operation in which the TFO server + * requires the client to be in posession of a shared secret in order for + * the client to be able to successfully open TFO connections with the + * server. This is useful, for example, in environments where TFO servers + * are exposed to both internal and external clients and only wish to allow + * TFO connections from internal clients. + * + * In the PSK mode of operation, the server generates and sends TFO cookies + * to requesting clients as usual. However, when validating cookies + * received in TFO SYNs from clients, the server requires the + * client-supplied cookie to equal SipHash24(key=<16-byte-psk>, + * msg=). + * + * Multiple concurrent valid pre-shared keys are supported so that + * time-based rolling PSK invalidation policies can be implemented in the + * system. The default number of concurrent pre-shared keys is 2. This can + * be adjusted in the kernel config as follows: + * + * options TCP_FASTOPEN_MAX_PSKS= * * * The following TFO-specific sysctls are defined: @@ -49,31 +66,72 @@ * be valid. * * net.inet.tcp.fastopen.autokey (RW, default 120) - * When this and net.inet.tcp.fastopen.enabled are non-zero, a new key - * will be automatically generated after this many seconds. + * When this and net.inet.tcp.fastopen.server_enabled are non-zero, a + * new key will be automatically generated after this many seconds. + * + * net.inet.tcp.fastopen.ccache_bucket_limit + * (RWTUN, default TCP_FASTOPEN_CCACHE_BUCKET_LIMIT_DEFAULT) + * The maximum number of entries in a client cookie cache bucket. + * + * net.inet.tcp.fastopen.ccache_buckets + * (RDTUN, default TCP_FASTOPEN_CCACHE_BUCKETS_DEFAULT) + * The number of client cookie cache buckets. * - * net.inet.tcp.fastopen.enabled (RW, default 0) - * When zero, no new TFO connections can be created. On the transition - * from enabled to disabled, all installed keys are removed. On the - * transition from disabled to enabled, if net.inet.tcp.fastopen.autokey - * is non-zero and there are no keys installed, a new key will be - * generated immediately. The transition from enabled to disabled does - * not affect any TFO connections in progress; it only prevents new ones - * from being made. + * net.inet.tcp.fastopen.client_enabled (RW, default 0) + * When zero, no new active (i.e., client) TFO connections can be + * created. On the transition from enabled to disabled, the client + * cookie cache is cleared and disabled. The transition from enabled to + * disabled does not affect any active TFO connections in progress; it + * only prevents new ones from being made. * - * net.inet.tcp.fastopen.keylen (RO) + * net.inet.tcp.fastopen.keylen (RD) * The key length in bytes. * - * net.inet.tcp.fastopen.maxkeys (RO) + * net.inet.tcp.fastopen.maxkeys (RD) * The maximum number of keys supported. * - * net.inet.tcp.fastopen.numkeys (RO) + * net.inet.tcp.fastopen.maxpsks (RD) + * The maximum number of pre-shared keys supported. + * + * net.inet.tcp.fastopen.numkeys (RD) * The current number of keys installed. * - * net.inet.tcp.fastopen.setkey (WO) - * Install a new key by writing net.inet.tcp.fastopen.keylen bytes to this - * sysctl. + * net.inet.tcp.fastopen.numpsks (RD) + * The current number of pre-shared keys installed. + * + * net.inet.tcp.fastopen.path_disable_time + * (RW, default TCP_FASTOPEN_PATH_DSIABLE_TIME_DEFAULT) + * When a failure occurs while trying to create a new active (i.e., + * client) TFO connection, new active connections on the same path, as + * determined by the tuple {client_ip, server_ip, server_port}, will be + * forced to be non-TFO for this many seconds. Note that the path + * disable mechanism relies on state stored in client cookie cache + * entries, so it is possible for the disable time for a given path to + * be reduced if the corresponding client cookie cache entry is reused + * due to resource pressure before the disable period has elapsed. + * + * net.inet.tcp.fastopen.psk_enabled (RW, default 0) + * When non-zero, pre-shared key (PSK) mode is enabled for all TFO + * servers. On the transition from enabled to disabled, all installed + * pre-shared keys are removed. * + * net.inet.tcp.fastopen.server_enabled (RW, default 0) + * When zero, no new passive (i.e., server) TFO connections can be + * created. On the transition from enabled to disabled, all installed + * keys and pre-shared keys are removed. On the transition from + * disabled to enabled, if net.inet.tcp.fastopen.autokey is non-zero and + * there are no keys installed, a new key will be generated immediately. + * The transition from enabled to disabled does not affect any passive + * TFO connections in progress; it only prevents new ones from being + * made. + * + * net.inet.tcp.fastopen.setkey (WR) + * Install a new key by writing net.inet.tcp.fastopen.keylen bytes to + * this sysctl. + * + * net.inet.tcp.fastopen.setpsk (WR) + * Install a new pre-shared key by writing net.inet.tcp.fastopen.keylen + * bytes to this sysctl. * * In order for TFO connections to be created via a listen socket, that * socket must have the TCP_FASTOPEN socket option set on it. This option @@ -105,6 +163,7 @@ #include #include +#include #include #include #include @@ -119,21 +178,54 @@ #include #include -#include #include +#include #define TCP_FASTOPEN_KEY_LEN SIPHASH_KEY_LENGTH -#if !defined(TCP_RFC7413_MAX_KEYS) || (TCP_RFC7413_MAX_KEYS < 1) +#if TCP_FASTOPEN_PSK_LEN != TCP_FASTOPEN_KEY_LEN +#error TCP_FASTOPEN_PSK_LEN must be equal to TCP_FASTOPEN_KEY_LEN +#endif + +/* + * Because a PSK-mode setsockopt() uses tcpcb.t_tfo_cookie.client to hold + * the PSK until the connect occurs. + */ +#if TCP_FASTOPEN_MAX_COOKIE_LEN < TCP_FASTOPEN_PSK_LEN +#error TCP_FASTOPEN_MAX_COOKIE_LEN must be >= TCP_FASTOPEN_PSK_LEN +#endif + +#define TCP_FASTOPEN_CCACHE_BUCKET_LIMIT_DEFAULT 16 +#define TCP_FASTOPEN_CCACHE_BUCKETS_DEFAULT 2048 /* must be power of 2 */ + +#define TCP_FASTOPEN_PATH_DISABLE_TIME_DEFAULT 900 /* seconds */ + +#if !defined(TCP_FASTOPEN_MAX_KEYS) || (TCP_FASTOPEN_MAX_KEYS < 1) +#undef TCP_FASTOPEN_MAX_KEYS #define TCP_FASTOPEN_MAX_KEYS 2 -#else -#define TCP_FASTOPEN_MAX_KEYS TCP_RFC7413_MAX_KEYS +#endif + +#if TCP_FASTOPEN_MAX_KEYS > 10 +#undef TCP_FASTOPEN_MAX_KEYS +#define TCP_FASTOPEN_MAX_KEYS 10 +#endif + +#if !defined(TCP_FASTOPEN_MAX_PSKS) || (TCP_FASTOPEN_MAX_PSKS < 1) +#undef TCP_FASTOPEN_MAX_PSKS +#define TCP_FASTOPEN_MAX_PSKS 2 +#endif + +#if TCP_FASTOPEN_MAX_PSKS > 10 +#undef TCP_FASTOPEN_MAX_PSKS +#define TCP_FASTOPEN_MAX_PSKS 10 #endif struct tcp_fastopen_keylist { unsigned int newest; + unsigned int newest_psk; uint8_t key[TCP_FASTOPEN_MAX_KEYS][TCP_FASTOPEN_KEY_LEN]; + uint8_t psk[TCP_FASTOPEN_MAX_PSKS][TCP_FASTOPEN_KEY_LEN]; }; struct tcp_fastopen_callout { @@ -141,6 +233,16 @@ struct vnet *v; }; +static struct tcp_fastopen_ccache_entry *tcp_fastopen_ccache_lookup( + struct in_conninfo *, struct tcp_fastopen_ccache_bucket **); +static struct tcp_fastopen_ccache_entry *tcp_fastopen_ccache_create( + struct tcp_fastopen_ccache_bucket *, struct in_conninfo *, uint16_t, uint8_t, + uint8_t *); +static void tcp_fastopen_ccache_bucket_trim(struct tcp_fastopen_ccache_bucket *, + unsigned int); +static void tcp_fastopen_ccache_entry_drop(struct tcp_fastopen_ccache_entry *, + struct tcp_fastopen_ccache_bucket *); + SYSCTL_NODE(_net_inet_tcp, OID_AUTO, fastopen, CTLFLAG_RW, 0, "TCP Fast Open"); static VNET_DEFINE(int, tcp_fastopen_acceptany) = 0; @@ -157,12 +259,25 @@ &sysctl_net_inet_tcp_fastopen_autokey, "IU", "Number of seconds between auto-generation of a new key; zero disables"); -VNET_DEFINE(unsigned int, tcp_fastopen_enabled) = 0; -static int sysctl_net_inet_tcp_fastopen_enabled(SYSCTL_HANDLER_ARGS); -SYSCTL_PROC(_net_inet_tcp_fastopen, OID_AUTO, enabled, +static int sysctl_net_inet_tcp_fastopen_ccache_bucket_limit(SYSCTL_HANDLER_ARGS); +SYSCTL_PROC(_net_inet_tcp_fastopen, OID_AUTO, ccache_bucket_limit, + CTLFLAG_VNET | CTLTYPE_UINT | CTLFLAG_RWTUN, NULL, 0, + &sysctl_net_inet_tcp_fastopen_ccache_bucket_limit, "IU", + "Max entries per bucket in client cookie cache"); + +static VNET_DEFINE(unsigned int, tcp_fastopen_ccache_buckets) = + TCP_FASTOPEN_CCACHE_BUCKETS_DEFAULT; +#define V_tcp_fastopen_ccache_buckets VNET(tcp_fastopen_ccache_buckets) +SYSCTL_UINT(_net_inet_tcp_fastopen, OID_AUTO, ccache_buckets, + CTLFLAG_VNET | CTLFLAG_RDTUN, &VNET_NAME(tcp_fastopen_ccache_buckets), 0, + "Client cookie cache number of buckets (power of 2)"); + +VNET_DEFINE(unsigned int, tcp_fastopen_client_enabled) = 0; +static int sysctl_net_inet_tcp_fastopen_client_enabled(SYSCTL_HANDLER_ARGS); +SYSCTL_PROC(_net_inet_tcp_fastopen, OID_AUTO, client_enabled, CTLFLAG_VNET | CTLTYPE_UINT | CTLFLAG_RW, NULL, 0, - &sysctl_net_inet_tcp_fastopen_enabled, "IU", - "Enable/disable TCP Fast Open processing"); + &sysctl_net_inet_tcp_fastopen_client_enabled, "IU", + "Enable/disable TCP Fast Open client functionality"); SYSCTL_INT(_net_inet_tcp_fastopen, OID_AUTO, keylen, CTLFLAG_RD, SYSCTL_NULL_INT_PTR, TCP_FASTOPEN_KEY_LEN, @@ -172,18 +287,56 @@ CTLFLAG_RD, SYSCTL_NULL_INT_PTR, TCP_FASTOPEN_MAX_KEYS, "Maximum number of keys supported"); +SYSCTL_INT(_net_inet_tcp_fastopen, OID_AUTO, maxpsks, + CTLFLAG_RD, SYSCTL_NULL_INT_PTR, TCP_FASTOPEN_MAX_PSKS, + "Maximum number of pre-shared keys supported"); + static VNET_DEFINE(unsigned int, tcp_fastopen_numkeys) = 0; #define V_tcp_fastopen_numkeys VNET(tcp_fastopen_numkeys) SYSCTL_UINT(_net_inet_tcp_fastopen, OID_AUTO, numkeys, CTLFLAG_VNET | CTLFLAG_RD, &VNET_NAME(tcp_fastopen_numkeys), 0, "Number of keys installed"); +static VNET_DEFINE(unsigned int, tcp_fastopen_numpsks) = 0; +#define V_tcp_fastopen_numpsks VNET(tcp_fastopen_numpsks) +SYSCTL_UINT(_net_inet_tcp_fastopen, OID_AUTO, numpsks, + CTLFLAG_VNET | CTLFLAG_RD, &VNET_NAME(tcp_fastopen_numpsks), 0, + "Number of pre-shared keys installed"); + +static VNET_DEFINE(unsigned int, tcp_fastopen_path_disable_time) = + TCP_FASTOPEN_PATH_DISABLE_TIME_DEFAULT; +#define V_tcp_fastopen_path_disable_time VNET(tcp_fastopen_path_disable_time) +SYSCTL_UINT(_net_inet_tcp_fastopen, OID_AUTO, path_disable_time, + CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(tcp_fastopen_path_disable_time), 0, + "Seconds a TFO failure disables a {client_ip, server_ip, server_port} path"); + +static VNET_DEFINE(unsigned int, tcp_fastopen_psk_enabled) = 0; +#define V_tcp_fastopen_psk_enabled VNET(tcp_fastopen_psk_enabled) +static int sysctl_net_inet_tcp_fastopen_psk_enabled(SYSCTL_HANDLER_ARGS); +SYSCTL_PROC(_net_inet_tcp_fastopen, OID_AUTO, psk_enabled, + CTLFLAG_VNET | CTLTYPE_UINT | CTLFLAG_RW, NULL, 0, + &sysctl_net_inet_tcp_fastopen_psk_enabled, "IU", + "Enable/disable TCP Fast Open server pre-shared key mode"); + +VNET_DEFINE(unsigned int, tcp_fastopen_server_enabled) = 0; +static int sysctl_net_inet_tcp_fastopen_server_enabled(SYSCTL_HANDLER_ARGS); +SYSCTL_PROC(_net_inet_tcp_fastopen, OID_AUTO, server_enabled, + CTLFLAG_VNET | CTLTYPE_UINT | CTLFLAG_RW, NULL, 0, + &sysctl_net_inet_tcp_fastopen_server_enabled, "IU", + "Enable/disable TCP Fast Open server functionality"); + static int sysctl_net_inet_tcp_fastopen_setkey(SYSCTL_HANDLER_ARGS); SYSCTL_PROC(_net_inet_tcp_fastopen, OID_AUTO, setkey, CTLFLAG_VNET | CTLTYPE_OPAQUE | CTLFLAG_WR, NULL, 0, &sysctl_net_inet_tcp_fastopen_setkey, "", "Install a new key"); +static int sysctl_net_inet_tcp_fastopen_setpsk(SYSCTL_HANDLER_ARGS); +SYSCTL_PROC(_net_inet_tcp_fastopen, OID_AUTO, setpsk, + CTLFLAG_VNET | CTLTYPE_OPAQUE | CTLFLAG_WR, NULL, 0, + &sysctl_net_inet_tcp_fastopen_setpsk, "", + "Install a new pre-shared key"); + static VNET_DEFINE(struct rmlock, tcp_fastopen_keylock); #define V_tcp_fastopen_keylock VNET(tcp_fastopen_keylock) @@ -201,9 +354,21 @@ static VNET_DEFINE(uma_zone_t, counter_zone); #define V_counter_zone VNET(counter_zone) +static MALLOC_DEFINE(M_TCP_FASTOPEN_CCACHE, "tfo_ccache", "TFO client cookie cache buckets"); + +static VNET_DEFINE(struct tcp_fastopen_ccache, tcp_fastopen_ccache); +#define V_tcp_fastopen_ccache VNET(tcp_fastopen_ccache) + +#define CCB_LOCK(ccb) mtx_lock(&(ccb)->ccb_mtx) +#define CCB_UNLOCK(ccb) mtx_unlock(&(ccb)->ccb_mtx) +#define CCB_LOCK_ASSERT(ccb) mtx_assert(&(ccb)->ccb_mtx, MA_OWNED) + + void tcp_fastopen_init(void) { + unsigned int i; + V_counter_zone = uma_zcreate("tfo", sizeof(unsigned int), NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, 0); rm_init(&V_tcp_fastopen_keylock, "tfo_keylock"); @@ -211,11 +376,67 @@ &V_tcp_fastopen_keylock, 0); V_tcp_fastopen_autokey_ctx.v = curvnet; V_tcp_fastopen_keys.newest = TCP_FASTOPEN_MAX_KEYS - 1; + V_tcp_fastopen_keys.newest_psk = TCP_FASTOPEN_MAX_PSKS - 1; + + /* May already be non-zero if kernel tunable was set */ + if (V_tcp_fastopen_ccache.bucket_limit == 0) + V_tcp_fastopen_ccache.bucket_limit = + TCP_FASTOPEN_CCACHE_BUCKET_LIMIT_DEFAULT; + + /* May already be non-zero if kernel tunable was set */ + if ((V_tcp_fastopen_ccache_buckets == 0) || + !powerof2(V_tcp_fastopen_ccache_buckets)) + V_tcp_fastopen_ccache.buckets = + TCP_FASTOPEN_CCACHE_BUCKETS_DEFAULT; + else + V_tcp_fastopen_ccache.buckets = V_tcp_fastopen_ccache_buckets; + + V_tcp_fastopen_ccache.mask = V_tcp_fastopen_ccache.buckets - 1; + V_tcp_fastopen_ccache.secret = arc4random(); + + V_tcp_fastopen_ccache.base = malloc(V_tcp_fastopen_ccache.buckets * + sizeof(struct tcp_fastopen_ccache_bucket), M_TCP_FASTOPEN_CCACHE, + M_WAITOK | M_ZERO); + + for (i = 0; i < V_tcp_fastopen_ccache.buckets; i++) { + TAILQ_INIT(&V_tcp_fastopen_ccache.base[i].ccb_entries); + mtx_init(&V_tcp_fastopen_ccache.base[i].ccb_mtx, "tfo_ccache_bucket", + NULL, MTX_DEF); + V_tcp_fastopen_ccache.base[i].ccb_num_entries = -1; /* bucket disabled */ + V_tcp_fastopen_ccache.base[i].ccb_ccache = &V_tcp_fastopen_ccache; + } + + /* + * Note that while the total number of entries in the cookie cache + * is limited by the table management logic to + * V_tcp_fastopen_ccache.buckets * + * V_tcp_fastopen_ccache.bucket_limit, the total number of items in + * this zone can exceed that amount by the number of CPUs in the + * system times the maximum number of unallocated items that can be + * present in each UMA per-CPU cache for this zone. + */ + V_tcp_fastopen_ccache.zone = uma_zcreate("tfo_ccache_entries", + sizeof(struct tcp_fastopen_ccache_entry), NULL, NULL, NULL, NULL, + UMA_ALIGN_CACHE, 0); } void tcp_fastopen_destroy(void) { + struct tcp_fastopen_ccache_bucket *ccb; + unsigned int i; + + for (i = 0; i < V_tcp_fastopen_ccache.buckets; i++) { + ccb = &V_tcp_fastopen_ccache.base[i]; + tcp_fastopen_ccache_bucket_trim(ccb, 0); + mtx_destroy(&ccb->ccb_mtx); + } + + KASSERT(uma_zone_get_cur(V_tcp_fastopen_ccache.zone) == 0, + ("%s: TFO ccache zone allocation count not 0", __func__)); + uma_zdestroy(V_tcp_fastopen_ccache.zone); + free(V_tcp_fastopen_ccache.base, M_TCP_FASTOPEN_CCACHE); + callout_drain(&V_tcp_fastopen_autokey_ctx.c); rm_destroy(&V_tcp_fastopen_keylock); uma_zdestroy(V_counter_zone); @@ -254,6 +475,19 @@ } static void +tcp_fastopen_addpsk_locked(uint8_t *psk) +{ + + V_tcp_fastopen_keys.newest_psk++; + if (V_tcp_fastopen_keys.newest_psk == TCP_FASTOPEN_MAX_PSKS) + V_tcp_fastopen_keys.newest_psk = 0; + memcpy(V_tcp_fastopen_keys.psk[V_tcp_fastopen_keys.newest_psk], psk, + TCP_FASTOPEN_KEY_LEN); + if (V_tcp_fastopen_numpsks < TCP_FASTOPEN_MAX_PSKS) + V_tcp_fastopen_numpsks++; +} + +static void tcp_fastopen_autokey_locked(void) { uint8_t newkey[TCP_FASTOPEN_KEY_LEN]; @@ -300,6 +534,49 @@ return (siphash); } +static uint64_t +tcp_fastopen_make_psk_cookie(uint8_t *psk, uint8_t *cookie, uint8_t cookie_len) +{ + SIPHASH_CTX ctx; + uint64_t psk_cookie; + + SipHash24_Init(&ctx); + SipHash_SetKey(&ctx, psk); + SipHash_Update(&ctx, cookie, cookie_len); + SipHash_Final((u_int8_t *)&psk_cookie, &ctx); + + return (psk_cookie); +} + +static int +tcp_fastopen_find_cookie_match_locked(uint8_t *wire_cookie, uint64_t *cur_cookie) +{ + unsigned int i, psk_index; + uint64_t psk_cookie; + + if (V_tcp_fastopen_psk_enabled) { + psk_index = V_tcp_fastopen_keys.newest_psk; + for (i = 0; i < V_tcp_fastopen_numpsks; i++) { + psk_cookie = + tcp_fastopen_make_psk_cookie( + V_tcp_fastopen_keys.psk[psk_index], + (uint8_t *)cur_cookie, + TCP_FASTOPEN_COOKIE_LEN); + + if (memcmp(wire_cookie, &psk_cookie, + TCP_FASTOPEN_COOKIE_LEN) == 0) + return (1); + + if (psk_index == 0) + psk_index = TCP_FASTOPEN_MAX_PSKS - 1; + else + psk_index--; + } + } else if (memcmp(wire_cookie, cur_cookie, TCP_FASTOPEN_COOKIE_LEN) == 0) + return (1); + + return (0); +} /* * Return values: @@ -313,6 +590,7 @@ { struct rm_priotracker tracker; unsigned int i, key_index; + int rv; uint64_t cur_cookie; if (V_tcp_fastopen_acceptany) { @@ -320,21 +598,22 @@ return (1); } + TCP_FASTOPEN_KEYS_RLOCK(&tracker); if (len != TCP_FASTOPEN_COOKIE_LEN) { if (V_tcp_fastopen_numkeys > 0) { *latest_cookie = tcp_fastopen_make_cookie( V_tcp_fastopen_keys.key[V_tcp_fastopen_keys.newest], inc); - return (0); - } - return (-1); + rv = 0; + } else + rv = -1; + goto out; } /* * Check against each available key, from newest to oldest. */ - TCP_FASTOPEN_KEYS_RLOCK(&tracker); key_index = V_tcp_fastopen_keys.newest; for (i = 0; i < V_tcp_fastopen_numkeys; i++) { cur_cookie = @@ -342,18 +621,19 @@ inc); if (i == 0) *latest_cookie = cur_cookie; - if (memcmp(cookie, &cur_cookie, TCP_FASTOPEN_COOKIE_LEN) == 0) { - TCP_FASTOPEN_KEYS_RUNLOCK(&tracker); - return (1); - } + rv = tcp_fastopen_find_cookie_match_locked(cookie, &cur_cookie); + if (rv) + goto out; if (key_index == 0) key_index = TCP_FASTOPEN_MAX_KEYS - 1; else key_index--; } - TCP_FASTOPEN_KEYS_RUNLOCK(&tracker); + rv = 0; - return (0); + out: + TCP_FASTOPEN_KEYS_RUNLOCK(&tracker); + return (rv); } static int @@ -369,7 +649,7 @@ return (EINVAL); TCP_FASTOPEN_KEYS_WLOCK(); - if (V_tcp_fastopen_enabled) { + if (V_tcp_fastopen_server_enabled) { if (V_tcp_fastopen_autokey && !new) callout_stop(&V_tcp_fastopen_autokey_ctx.c); else if (new) @@ -385,24 +665,54 @@ } static int -sysctl_net_inet_tcp_fastopen_enabled(SYSCTL_HANDLER_ARGS) +sysctl_net_inet_tcp_fastopen_psk_enabled(SYSCTL_HANDLER_ARGS) +{ + int error; + unsigned int new; + + new = V_tcp_fastopen_psk_enabled; + error = sysctl_handle_int(oidp, &new, 0, req); + if (error == 0 && req->newptr) { + if (V_tcp_fastopen_psk_enabled && !new) { + /* enabled -> disabled */ + TCP_FASTOPEN_KEYS_WLOCK(); + V_tcp_fastopen_numpsks = 0; + V_tcp_fastopen_keys.newest_psk = + TCP_FASTOPEN_MAX_PSKS - 1; + V_tcp_fastopen_psk_enabled = 0; + TCP_FASTOPEN_KEYS_WUNLOCK(); + } else if (!V_tcp_fastopen_psk_enabled && new) { + /* disabled -> enabled */ + TCP_FASTOPEN_KEYS_WLOCK(); + V_tcp_fastopen_psk_enabled = 1; + TCP_FASTOPEN_KEYS_WUNLOCK(); + } + } + return (error); +} + +static int +sysctl_net_inet_tcp_fastopen_server_enabled(SYSCTL_HANDLER_ARGS) { int error; unsigned int new; - new = V_tcp_fastopen_enabled; + new = V_tcp_fastopen_server_enabled; error = sysctl_handle_int(oidp, &new, 0, req); if (error == 0 && req->newptr) { - if (V_tcp_fastopen_enabled && !new) { + if (V_tcp_fastopen_server_enabled && !new) { /* enabled -> disabled */ TCP_FASTOPEN_KEYS_WLOCK(); V_tcp_fastopen_numkeys = 0; V_tcp_fastopen_keys.newest = TCP_FASTOPEN_MAX_KEYS - 1; if (V_tcp_fastopen_autokey) callout_stop(&V_tcp_fastopen_autokey_ctx.c); - V_tcp_fastopen_enabled = 0; + V_tcp_fastopen_numpsks = 0; + V_tcp_fastopen_keys.newest_psk = + TCP_FASTOPEN_MAX_PSKS - 1; + V_tcp_fastopen_server_enabled = 0; TCP_FASTOPEN_KEYS_WUNLOCK(); - } else if (!V_tcp_fastopen_enabled && new) { + } else if (!V_tcp_fastopen_server_enabled && new) { /* disabled -> enabled */ TCP_FASTOPEN_KEYS_WLOCK(); if (V_tcp_fastopen_autokey && @@ -413,7 +723,7 @@ tcp_fastopen_autokey_callout, &V_tcp_fastopen_autokey_ctx); } - V_tcp_fastopen_enabled = 1; + V_tcp_fastopen_server_enabled = 1; TCP_FASTOPEN_KEYS_WUNLOCK(); } } @@ -442,3 +752,368 @@ return (0); } + +static int +sysctl_net_inet_tcp_fastopen_setpsk(SYSCTL_HANDLER_ARGS) +{ + int error; + uint8_t newpsk[TCP_FASTOPEN_KEY_LEN]; + + if (req->oldptr != NULL || req->oldlen != 0) + return (EINVAL); + if (req->newptr == NULL) + return (EPERM); + if (req->newlen != sizeof(newpsk)) + return (EINVAL); + error = SYSCTL_IN(req, newpsk, sizeof(newpsk)); + if (error) + return (error); + + TCP_FASTOPEN_KEYS_WLOCK(); + tcp_fastopen_addpsk_locked(newpsk); + TCP_FASTOPEN_KEYS_WUNLOCK(); + + return (0); +} + +static int +sysctl_net_inet_tcp_fastopen_ccache_bucket_limit(SYSCTL_HANDLER_ARGS) +{ + struct tcp_fastopen_ccache_bucket *ccb; + int error; + unsigned int new; + unsigned int i; + + new = V_tcp_fastopen_ccache.bucket_limit; + error = sysctl_handle_int(oidp, &new, 0, req); + if (error == 0 && req->newptr) { + if ((new == 0) || (new > INT_MAX)) + error = EINVAL; + else { + if (new < V_tcp_fastopen_ccache.bucket_limit) { + for (i = 0; i < V_tcp_fastopen_ccache.buckets; + i++) { + ccb = &V_tcp_fastopen_ccache.base[i]; + tcp_fastopen_ccache_bucket_trim(ccb, new); + } + } + V_tcp_fastopen_ccache.bucket_limit = new; + } + + } + return (error); +} + +static int +sysctl_net_inet_tcp_fastopen_client_enabled(SYSCTL_HANDLER_ARGS) +{ + struct tcp_fastopen_ccache_bucket *ccb; + int error; + unsigned int new, i; + + new = V_tcp_fastopen_client_enabled; + error = sysctl_handle_int(oidp, &new, 0, req); + if (error == 0 && req->newptr) { + if (V_tcp_fastopen_client_enabled && !new) { + /* enabled -> disabled */ + for (i = 0; i < V_tcp_fastopen_ccache.buckets; i++) { + ccb = &V_tcp_fastopen_ccache.base[i]; + tcp_fastopen_ccache_bucket_trim(ccb, 0); + } + V_tcp_fastopen_client_enabled = 0; + } else if (!V_tcp_fastopen_client_enabled && new) { + /* disabled -> enabled */ + for (i = 0; i < V_tcp_fastopen_ccache.buckets; i++) { + ccb = &V_tcp_fastopen_ccache.base[i]; + CCB_LOCK(ccb); + KASSERT(TAILQ_EMPTY(&ccb->ccb_entries), + ("%s: ccb->ccb_entries not empty", __func__)); + KASSERT(ccb->ccb_num_entries == -1, + ("%s: ccb->ccb_num_entries %d not -1", __func__, + ccb->ccb_num_entries)); + ccb->ccb_num_entries = 0; /* enable bucket */ + CCB_UNLOCK(ccb); + } + V_tcp_fastopen_client_enabled = 1; + } + } + return (error); +} + +void +tcp_fastopen_connect(struct tcpcb *tp) +{ + struct inpcb *inp; + struct tcp_fastopen_ccache_bucket *ccb; + struct tcp_fastopen_ccache_entry *cce; + sbintime_t now; + uint16_t server_mss; + uint64_t psk_cookie; + + inp = tp->t_inpcb; + cce = tcp_fastopen_ccache_lookup(&inp->inp_inc, &ccb); + if (cce) { + if (cce->disable_time == 0) { + if (tp->t_tfo_client_cookie_len == + TCP_FASTOPEN_PSK_LEN) { + psk_cookie = + tcp_fastopen_make_psk_cookie( + tp->t_tfo_cookie.client, + cce->cookie, cce->cookie_len); + } else { + tp->t_tfo_client_cookie_len = cce->cookie_len; + memcpy(tp->t_tfo_cookie.client, cce->cookie, + cce->cookie_len); + } + server_mss = cce->server_mss; + CCB_UNLOCK(ccb); + if (tp->t_tfo_client_cookie_len == + TCP_FASTOPEN_PSK_LEN) { + tp->t_tfo_client_cookie_len = + TCP_FASTOPEN_COOKIE_LEN; + memcpy(tp->t_tfo_cookie.client, &psk_cookie, + TCP_FASTOPEN_COOKIE_LEN); + } + tcp_mss(tp, server_mss ? server_mss : -1); + tp->snd_wnd = tp->t_maxseg; + } else { + /* + * The path is disabled. Check the time and + * possibly reenable. + */ + now = getsbinuptime(); + if (now - cce->disable_time > + ((sbintime_t)V_tcp_fastopen_path_disable_time << 32)) { + /* + * Re-enable path. Force a TFO cookie + * request. Forget the old MSS as it may be + * bogus now, and we will rediscover it in + * the SYN|ACK. + */ + cce->disable_time = 0; + cce->server_mss = 0; + cce->cookie_len = 0; + /* + * tp->t_tfo... cookie details are already + * zero from the tcpcb init. + */ + } else { + /* + * Path is disabled, so disable TFO on this + * connection. + */ + tp->t_flags &= ~TF_FASTOPEN; + } + CCB_UNLOCK(ccb); + tcp_mss(tp, -1); + /* + * snd_wnd is irrelevant since we are either forcing + * a TFO cookie request or disabling TFO - either + * way, no data with the SYN. + */ + } + } else { + /* + * Create a new entry for this path. This is done now, + * instead of lazily on the SYN|ACK, so that there is a + * cache entry for this path to disable if this TFO attempt + * fails. + */ + cce = tcp_fastopen_ccache_create(ccb, &inp->inp_inc, 0, 0, NULL); + CCB_UNLOCK(ccb); + tcp_mss(tp, -1); + /* + * snd_wnd is irrelevant since we are forcing a TFO cookie + * request. + */ + } +} + +void +tcp_fastopen_disable_path(struct tcpcb *tp) +{ + struct in_conninfo *inc = &tp->t_inpcb->inp_inc; + struct tcp_fastopen_ccache_bucket *ccb; + struct tcp_fastopen_ccache_entry *cce; + + cce = tcp_fastopen_ccache_lookup(inc, &ccb); + if (cce) { + cce->server_mss = 0; + cce->cookie_len = 0; + /* + * Preserve the existing disable time if it is already + * disabled. + */ + if (cce->disable_time == 0) + cce->disable_time = getsbinuptime(); + } else /* use invalid cookie len to create disabled entry */ + tcp_fastopen_ccache_create(ccb, inc, 0, + TCP_FASTOPEN_MAX_COOKIE_LEN + 1, NULL); + + CCB_UNLOCK(ccb); + tp->t_flags &= ~TF_FASTOPEN; +} + +void +tcp_fastopen_update_cache(struct tcpcb *tp, uint16_t mss, + uint8_t cookie_len, uint8_t *cookie) +{ + struct in_conninfo *inc = &tp->t_inpcb->inp_inc; + struct tcp_fastopen_ccache_bucket *ccb; + struct tcp_fastopen_ccache_entry *cce; + + cce = tcp_fastopen_ccache_lookup(inc, &ccb); + if (cce) { + cce->server_mss = mss; + if ((cookie_len <= TCP_FASTOPEN_MAX_COOKIE_LEN) && + ((cookie_len & 0x1) == 0)) { + cce->server_mss = mss; + cce->cookie_len = cookie_len; + memcpy(cce->cookie, cookie, cookie_len); + cce->disable_time = 0; + } else { + /* invalid cookie length, disable entry */ + cce->server_mss = 0; + cce->cookie_len = 0; + /* + * Preserve the existing disable time if it is + * already disabled. + */ + if (cce->disable_time == 0) + cce->disable_time = getsbinuptime(); + } + } else + tcp_fastopen_ccache_create(ccb, inc, mss, cookie_len, cookie); + + CCB_UNLOCK(ccb); +} + +static struct tcp_fastopen_ccache_entry * +tcp_fastopen_ccache_lookup(struct in_conninfo *inc, + struct tcp_fastopen_ccache_bucket **ccbp) +{ + struct tcp_fastopen_ccache_bucket *ccb; + struct tcp_fastopen_ccache_entry *cce; + uint32_t last_word; + uint32_t hash; + + hash = jenkins_hash32((uint32_t *)&inc->inc_ie.ie_dependladdr, 4, + V_tcp_fastopen_ccache.secret); + hash = jenkins_hash32((uint32_t *)&inc->inc_ie.ie_dependfaddr, 4, + hash); + last_word = inc->inc_fport; + hash = jenkins_hash32(&last_word, 1, hash); + ccb = &V_tcp_fastopen_ccache.base[hash & V_tcp_fastopen_ccache.mask]; + *ccbp = ccb; + CCB_LOCK(ccb); + + /* + * Always returns with locked bucket. + */ + TAILQ_FOREACH(cce, &ccb->ccb_entries, cce_link) + if ((!(cce->af == AF_INET6) == !(inc->inc_flags & INC_ISIPV6)) && + (cce->server_port == inc->inc_ie.ie_fport) && + (((cce->af == AF_INET) && + (cce->cce_client_ip.v4.s_addr == inc->inc_laddr.s_addr) && + (cce->cce_server_ip.v4.s_addr == inc->inc_faddr.s_addr)) || + ((cce->af == AF_INET6) && + IN6_ARE_ADDR_EQUAL(&cce->cce_client_ip.v6, &inc->inc6_laddr) && + IN6_ARE_ADDR_EQUAL(&cce->cce_server_ip.v6, &inc->inc6_faddr)))) + break; + + return (cce); +} + +static struct tcp_fastopen_ccache_entry * +tcp_fastopen_ccache_create(struct tcp_fastopen_ccache_bucket *ccb, + struct in_conninfo *inc, uint16_t mss, uint8_t cookie_len, uint8_t *cookie) +{ + struct tcp_fastopen_ccache_entry *cce; + + /* + * 1. Create a new entry, or + * 2. Reclaim an existing entry, or + * 3. Fail + */ + + CCB_LOCK_ASSERT(ccb); + + cce = NULL; + if (ccb->ccb_num_entries < V_tcp_fastopen_ccache.bucket_limit) + cce = uma_zalloc(V_tcp_fastopen_ccache.zone, M_NOWAIT); + + if (cce == NULL) { + /* + * At bucket limit, or out of memory - reclaim last + * entry in bucket. + */ + cce = TAILQ_LAST(&ccb->ccb_entries, bucket_entries); + if (cce == NULL) { + /* XXX count this event */ + return (NULL); + } + } + + TAILQ_INSERT_HEAD(&ccb->ccb_entries, cce, cce_link); + cce->af = (inc->inc_flags & INC_ISIPV6) ? AF_INET6 : AF_INET; + if (cce->af == AF_INET) { + cce->cce_client_ip.v4 = inc->inc_laddr; + cce->cce_server_ip.v4 = inc->inc_faddr; + } else { + cce->cce_client_ip.v6 = inc->inc6_laddr; + cce->cce_server_ip.v6 = inc->inc6_faddr; + } + cce->server_port = inc->inc_fport; + if ((cookie_len <= TCP_FASTOPEN_MAX_COOKIE_LEN) && + ((cookie_len & 0x1) == 0)) { + cce->server_mss = mss; + cce->cookie_len = cookie_len; + memcpy(cce->cookie, cookie, cookie_len); + cce->disable_time = 0; + } else { + /* invalid cookie length, disable cce */ + cce->server_mss = 0; + cce->cookie_len = 0; + cce->disable_time = getsbinuptime(); + } + + return (cce); +} + +static void +tcp_fastopen_ccache_bucket_trim(struct tcp_fastopen_ccache_bucket *ccb, + unsigned int limit) +{ + struct tcp_fastopen_ccache_entry *cce, *cce_tmp; + unsigned int entries; + + CCB_LOCK(ccb); + entries = 0; + TAILQ_FOREACH_SAFE(cce, &ccb->ccb_entries, cce_link, cce_tmp) { + entries++; + if (entries > limit) + tcp_fastopen_ccache_entry_drop(cce, ccb); + } + KASSERT(ccb->ccb_num_entries == limit, + ("%s: ccb->ccb_num_entries %d not %d", __func__, + ccb->ccb_num_entries, limit)); + if (limit == 0) { + KASSERT(TAILQ_EMPTY(&ccb->ccb_entries), + ("%s: ccb->ccb_entries not empty", __func__)); + ccb->ccb_num_entries = -1; /* disable bucket */ + } + CCB_UNLOCK(ccb); +} + +static void +tcp_fastopen_ccache_entry_drop(struct tcp_fastopen_ccache_entry *cce, + struct tcp_fastopen_ccache_bucket *ccb) +{ + + CCB_LOCK_ASSERT(ccb); + + TAILQ_REMOVE(&ccb->ccb_entries, cce, cce_link); + ccb->ccb_num_entries--; + uma_zfree(V_tcp_fastopen_ccache.zone, cce); +} + Index: sys/netinet/tcp_input.c =================================================================== --- sys/netinet/tcp_input.c +++ sys/netinet/tcp_input.c @@ -100,9 +100,6 @@ #include #include #include -#ifdef TCP_RFC7413 -#include -#endif #include #include #include @@ -111,6 +108,7 @@ #include #include #include +#include #ifdef TCPPCAP #include #endif @@ -1129,9 +1127,7 @@ rstreason = BANDLIM_RST_OPENPORT; goto dropwithreset; } -#ifdef TCP_RFC7413 tfo_socket_result: -#endif if (so == NULL) { /* * We completed the 3-way handshake @@ -1374,12 +1370,9 @@ #endif TCP_PROBE3(debug__input, tp, th, m); tcp_dooptions(&to, optp, optlen, TO_SYN); -#ifdef TCP_RFC7413 if (syncache_add(&inc, &to, th, inp, &so, m, NULL, NULL)) goto tfo_socket_result; -#else - syncache_add(&inc, &to, th, inp, &so, m, NULL, NULL); -#endif + /* * Entry added to syncache and mbuf consumed. * Only the listen socket is unlocked by syncache_add(). @@ -1549,9 +1542,7 @@ struct in_conninfo *inc; struct mbuf *mfree; struct tcpopt to; -#ifdef TCP_RFC7413 int tfo_syn; -#endif #ifdef TCPDEBUG /* @@ -1716,6 +1707,13 @@ if ((tp->t_flags & TF_SACK_PERMIT) && (to.to_flags & TOF_SACKPERM) == 0) tp->t_flags &= ~TF_SACK_PERMIT; + if (IS_FASTOPEN(tp->t_flags)) { + if (to.to_flags & TOF_FASTOPEN) + tcp_fastopen_update_cache(tp, to.to_mss, + to.to_tfo_len, to.to_tfo_cookie); + else + tcp_fastopen_disable_path(tp); + } } /* @@ -1973,7 +1971,6 @@ rstreason = BANDLIM_RST_OPENPORT; goto dropwithreset; } -#ifdef TCP_RFC7413 if (IS_FASTOPEN(tp->t_flags)) { /* * When a TFO connection is in SYN_RECEIVED, the @@ -1994,7 +1991,6 @@ goto drop; } } -#endif break; /* @@ -2026,6 +2022,8 @@ tp->irs = th->th_seq; tcp_rcvseqinit(tp); if (thflags & TH_ACK) { + int tfo_partial_ack = 0; + TCPSTAT_INC(tcps_connects); soisconnected(so); #ifdef MAC @@ -2040,10 +2038,19 @@ TCP_MAXWIN << tp->rcv_scale); tp->snd_una++; /* SYN is acked */ /* + * If not all the data that was sent in the TFO SYN + * has been acked, resend the remainder rightg away. + */ + if (IS_FASTOPEN(tp->t_flags) && + (tp->snd_una != tp->snd_max)) { + tp->snd_nxt = th->th_ack; + tfo_partial_ack = 1; + } + /* * If there's data, delay ACK; if there's also a FIN * ACKNOW will be turned on later. */ - if (DELAY_ACK(tp, tlen) && tlen != 0) + if (DELAY_ACK(tp, tlen) && tlen != 0 && !tfo_partial_ack) tcp_timer_activate(tp, TT_DELACK, tcp_delacktime); else @@ -2402,13 +2409,11 @@ if ((thflags & TH_ACK) == 0) { if (tp->t_state == TCPS_SYN_RECEIVED || (tp->t_flags & TF_NEEDSYN)) { -#ifdef TCP_RFC7413 if (tp->t_state == TCPS_SYN_RECEIVED && IS_FASTOPEN(tp->t_flags)) { tp->snd_wnd = tiwin; cc_conn_init(tp); } -#endif goto step6; } else if (tp->t_flags & TF_ACKNOW) goto dropafterack; @@ -2449,7 +2454,6 @@ tcp_state_change(tp, TCPS_ESTABLISHED); TCP_PROBE5(accept__established, NULL, tp, m, tp, th); -#ifdef TCP_RFC7413 if (tp->t_tfo_pending) { tcp_fastopen_decrement_counter(tp->t_tfo_pending); tp->t_tfo_pending = NULL; @@ -2468,7 +2472,6 @@ * is retransmitted. */ if (!IS_FASTOPEN(tp->t_flags)) -#endif cc_conn_init(tp); tcp_timer_activate(tp, TT_KEEP, TP_KEEPIDLE(tp)); } @@ -3034,12 +3037,8 @@ * case PRU_RCVD). If a FIN has already been received on this * connection then we just ignore the text. */ -#ifdef TCP_RFC7413 tfo_syn = ((tp->t_state == TCPS_SYN_RECEIVED) && IS_FASTOPEN(tp->t_flags)); -#else -#define tfo_syn (false) -#endif if ((tlen || (thflags & TH_FIN) || tfo_syn) && TCPS_HAVERCVDFIN(tp->t_state) == 0) { tcp_seq save_start = th->th_seq; @@ -3263,9 +3262,6 @@ if (tp != NULL) INP_WUNLOCK(tp->t_inpcb); m_freem(m); -#ifndef TCP_RFC7413 -#undef tfo_syn -#endif } /* @@ -3419,7 +3415,6 @@ to->to_sacks = cp + 2; TCPSTAT_INC(tcps_sack_rcv_blocks); break; -#ifdef TCP_RFC7413 case TCPOPT_FAST_OPEN: if ((optlen != TCPOLEN_FAST_OPEN_EMPTY) && (optlen < TCPOLEN_FAST_OPEN_MIN) && @@ -3427,13 +3422,13 @@ continue; if (!(flags & TO_SYN)) continue; - if (!V_tcp_fastopen_enabled) + if (!V_tcp_fastopen_client_enabled && + !V_tcp_fastopen_server_enabled) continue; to->to_flags |= TOF_FASTOPEN; to->to_tfo_len = optlen - 2; to->to_tfo_cookie = to->to_tfo_len ? cp + 2 : NULL; break; -#endif default: continue; } Index: sys/netinet/tcp_output.c =================================================================== --- sys/netinet/tcp_output.c +++ sys/netinet/tcp_output.c @@ -71,9 +71,6 @@ #include #include #endif -#ifdef TCP_RFC7413 -#include -#endif #include #define TCPOUTFLAGS #include @@ -82,6 +79,7 @@ #include #include #include +#include #ifdef TCPPCAP #include #endif @@ -212,6 +210,8 @@ struct sackhole *p; int tso, mtu; struct tcpopt to; + unsigned int wanted_cookie = 0; + unsigned int dont_sendalot = 0; #if 0 int maxburst = TCP_MAXBURST; #endif @@ -229,7 +229,6 @@ return (tcp_offload_output(tp)); #endif -#ifdef TCP_RFC7413 /* * For TFO connections in SYN_RECEIVED, only allow the initial * SYN|ACK and those sent by the retransmit timer. @@ -237,9 +236,9 @@ if (IS_FASTOPEN(tp->t_flags) && (tp->t_state == TCPS_SYN_RECEIVED) && SEQ_GT(tp->snd_max, tp->snd_una) && /* initial SYN|ACK sent */ - (tp->snd_nxt != tp->snd_una)) /* not a retransmit */ + (tp->snd_nxt != tp->snd_una)) /* not a retransmit */ return (0); -#endif + /* * Determine length of data that should be transmitted, * and flags that will be used. @@ -425,7 +424,6 @@ if ((flags & TH_SYN) && SEQ_GT(tp->snd_nxt, tp->snd_una)) { if (tp->t_state != TCPS_SYN_RECEIVED) flags &= ~TH_SYN; -#ifdef TCP_RFC7413 /* * When sending additional segments following a TFO SYN|ACK, * do not include the SYN bit. @@ -433,7 +431,6 @@ if (IS_FASTOPEN(tp->t_flags) && (tp->t_state == TCPS_SYN_RECEIVED)) flags &= ~TH_SYN; -#endif off--, len++; } @@ -447,17 +444,24 @@ flags &= ~TH_FIN; } -#ifdef TCP_RFC7413 /* - * When retransmitting SYN|ACK on a passively-created TFO socket, - * don't include data, as the presence of data may have caused the - * original SYN|ACK to have been dropped by a middlebox. + * On TFO sockets, ensure no data is sent in the following cases: + * + * - When retransmitting SYN|ACK on a passively-created socket + * + * - When retransmitting SYN on an actively created socket + * + * - When sending a zero-length cookie (cookie request) on an + * actively created socket + * + * - When the socket is in the CLOSED state (RST is being sent) */ if (IS_FASTOPEN(tp->t_flags) && - (((tp->t_state == TCPS_SYN_RECEIVED) && (tp->t_rxtshift > 0)) || + (((flags & TH_SYN) && (tp->t_rxtshift > 0)) || + ((tp->t_state == TCPS_SYN_SENT) && + (tp->t_tfo_client_cookie_len == 0)) || (flags & TH_RST))) len = 0; -#endif if (len <= 0) { /* * If FIN has been sent but not acked, @@ -761,22 +765,38 @@ tp->snd_nxt = tp->iss; to.to_mss = tcp_mssopt(&tp->t_inpcb->inp_inc); to.to_flags |= TOF_MSS; -#ifdef TCP_RFC7413 /* - * Only include the TFO option on the first - * transmission of the SYN|ACK on a - * passively-created TFO socket, as the presence of - * the TFO option may have caused the original - * SYN|ACK to have been dropped by a middlebox. + * On SYN or SYN|ACK transmits on TFO connections, + * only include the TFO option if it is not a + * retransmit, as the presence of the TFO option may + * have caused the original SYN or SYN|ACK to have + * been dropped by a middlebox. */ if (IS_FASTOPEN(tp->t_flags) && - (tp->t_state == TCPS_SYN_RECEIVED) && (tp->t_rxtshift == 0)) { - to.to_tfo_len = TCP_FASTOPEN_COOKIE_LEN; - to.to_tfo_cookie = (u_char *)&tp->t_tfo_cookie; - to.to_flags |= TOF_FASTOPEN; + if (tp->t_state == TCPS_SYN_RECEIVED) { + to.to_tfo_len = TCP_FASTOPEN_COOKIE_LEN; + to.to_tfo_cookie = + (u_int8_t *)&tp->t_tfo_cookie.server; + to.to_flags |= TOF_FASTOPEN; + wanted_cookie = 1; + } else if (tp->t_state == TCPS_SYN_SENT) { + to.to_tfo_len = + tp->t_tfo_client_cookie_len; + to.to_tfo_cookie = + tp->t_tfo_cookie.client; + to.to_flags |= TOF_FASTOPEN; + wanted_cookie = 1; + /* + * If we wind up having more data to + * send with the SYN than can fit in + * one segment, don't send any more + * until the SYN|ACK comes back from + * the other end. + */ + dont_sendalot = 1; + } } -#endif } /* Window scaling. */ if ((flags & TH_SYN) && (tp->t_flags & TF_REQ_SCALE)) { @@ -820,6 +840,14 @@ /* Processing the options. */ hdrlen += optlen = tcp_addoptions(&to, opt); + + /* + * If we wanted a TFO option to be added, but it was unable + * to fit, ensure no data is sent. + */ + if (IS_FASTOPEN(tp->t_flags) && wanted_cookie && + !(to.to_flags & TOF_FASTOPEN)) + len = 0; } /* @@ -964,6 +992,8 @@ } else { len = tp->t_maxseg - optlen - ipoptlen; sendalot = 1; + if (dont_sendalot) + sendalot = 0; } } else tso = 0; @@ -1767,15 +1797,15 @@ TCPSTAT_INC(tcps_sack_send_blocks); break; } -#ifdef TCP_RFC7413 case TOF_FASTOPEN: { int total_len; - /* XXX is there any point to aligning this option? */ total_len = TCPOLEN_FAST_OPEN_EMPTY + to->to_tfo_len; - if (TCP_MAXOLEN - optlen < total_len) + if (TCP_MAXOLEN - optlen < total_len) { + to->to_flags &= ~TOF_FASTOPEN; continue; + } *optp++ = TCPOPT_FAST_OPEN; *optp++ = total_len; if (to->to_tfo_len > 0) { @@ -1785,7 +1815,6 @@ optlen += total_len; break; } -#endif default: panic("%s: unknown TCP option type", __func__); break; Index: sys/netinet/tcp_subr.c =================================================================== --- sys/netinet/tcp_subr.c +++ sys/netinet/tcp_subr.c @@ -93,9 +93,6 @@ #include #endif -#ifdef TCP_RFC7413 -#include -#endif #include #include #include @@ -107,6 +104,7 @@ #include #endif #include +#include #ifdef TCPPCAP #include #endif @@ -755,9 +753,7 @@ V_sack_hole_zone = uma_zcreate("sackhole", sizeof(struct sackhole), NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, 0); -#ifdef TCP_RFC7413 tcp_fastopen_init(); -#endif /* Skip initialization of globals for non-default instances. */ if (!IS_DEFAULT_VNET(curvnet)) @@ -844,13 +840,11 @@ uma_zdestroy(V_sack_hole_zone); uma_zdestroy(V_tcpcb_zone); -#ifdef TCP_RFC7413 /* * Cannot free the zone until all tcpcbs are released as we attach * the allocations to them. */ tcp_fastopen_destroy(); -#endif #ifdef TCP_HHOOK error = hhook_head_deregister(V_tcp_hhh[HHOOK_TCP_EST_IN]); @@ -1647,7 +1641,6 @@ if (tp->t_state == TCPS_LISTEN) tcp_offload_listen_stop(tp); #endif -#ifdef TCP_RFC7413 /* * This releases the TFO pending counter resource for TFO listen * sockets as well as passively-created TFO sockets that transition @@ -1657,7 +1650,6 @@ tcp_fastopen_decrement_counter(tp->t_tfo_pending); tp->t_tfo_pending = NULL; } -#endif in_pcbdrop(inp); TCPSTAT_INC(tcps_closed); if (tp->t_state != TCPS_CLOSED) @@ -2407,6 +2399,9 @@ if (tp->t_state != TCPS_SYN_SENT) return (inp); + if (IS_FASTOPEN(tp->t_flags)) + tcp_fastopen_disable_path(tp); + tp = tcp_drop(tp, errno); if (tp != NULL) return (inp); Index: sys/netinet/tcp_syncache.h =================================================================== --- sys/netinet/tcp_syncache.h +++ sys/netinet/tcp_syncache.h @@ -75,9 +75,7 @@ #endif struct label *sc_label; /* MAC label reference */ struct ucred *sc_cred; /* cred cache for jail checks */ -#ifdef TCP_RFC7413 void *sc_tfo_cookie; /* for TCP Fast Open response */ -#endif void *sc_pspare; /* TCP_SIGNATURE */ u_int32_t sc_spare[2]; /* UTO */ }; Index: sys/netinet/tcp_syncache.c =================================================================== --- sys/netinet/tcp_syncache.c +++ sys/netinet/tcp_syncache.c @@ -83,9 +83,7 @@ #include #endif #include -#ifdef TCP_RFC7413 #include -#endif #include #include #include @@ -1176,7 +1174,6 @@ return (0); } -#ifdef TCP_RFC7413 static void syncache_tfo_expand(struct syncache *sc, struct socket **lsop, struct mbuf *m, uint64_t response_cookie) @@ -1201,14 +1198,13 @@ inp = sotoinpcb(*lsop); tp = intotcpcb(inp); tp->t_flags |= TF_FASTOPEN; - tp->t_tfo_cookie = response_cookie; + tp->t_tfo_cookie.server = response_cookie; tp->snd_max = tp->iss; tp->snd_nxt = tp->iss; tp->t_tfo_pending = pending_counter; TCPSTAT_INC(tcps_sc_completed); } } -#endif /* TCP_RFC7413 */ /* * Given a LISTEN socket and an inbound SYN request, add @@ -1251,12 +1247,10 @@ #endif struct syncache scs; struct ucred *cred; -#ifdef TCP_RFC7413 uint64_t tfo_response_cookie; unsigned int *tfo_pending = NULL; int tfo_cookie_valid = 0; int tfo_response_cookie_valid = 0; -#endif INP_WLOCK_ASSERT(inp); /* listen socket */ KASSERT((th->th_flags & (TH_RST|TH_ACK|TH_SYN)) == TH_SYN, @@ -1281,9 +1275,9 @@ win = so->sol_sbrcv_hiwat; ltflags = (tp->t_flags & (TF_NOOPT | TF_SIGNATURE)); -#ifdef TCP_RFC7413 - if (V_tcp_fastopen_enabled && IS_FASTOPEN(tp->t_flags) && - (tp->t_tfo_pending != NULL) && (to->to_flags & TOF_FASTOPEN)) { + if (V_tcp_fastopen_server_enabled && IS_FASTOPEN(tp->t_flags) && + (tp->t_tfo_pending != NULL) && + (to->to_flags & TOF_FASTOPEN)) { /* * Limit the number of pending TFO connections to * approximately half of the queue limit. This prevents TFO @@ -1307,7 +1301,6 @@ */ tfo_pending = tp->t_tfo_pending; } -#endif /* By the time we drop the lock these should no longer be used. */ so = NULL; @@ -1320,9 +1313,7 @@ } else mac_syncache_create(maclabel, inp); #endif -#ifdef TCP_RFC7413 if (!tfo_cookie_valid) -#endif INP_WUNLOCK(inp); /* @@ -1368,10 +1359,8 @@ sc = syncache_lookup(inc, &sch); /* returns locked entry */ SCH_LOCK_ASSERT(sch); if (sc != NULL) { -#ifdef TCP_RFC7413 if (tfo_cookie_valid) INP_WUNLOCK(inp); -#endif TCPSTAT_INC(tcps_sc_dupsyn); if (ipopts) { /* @@ -1414,13 +1403,11 @@ goto done; } -#ifdef TCP_RFC7413 if (tfo_cookie_valid) { bzero(&scs, sizeof(scs)); sc = &scs; goto skip_alloc; } -#endif sc = uma_zalloc(V_tcp_syncache.zone, M_NOWAIT | M_ZERO); if (sc == NULL) { @@ -1448,11 +1435,9 @@ } } -#ifdef TCP_RFC7413 skip_alloc: if (!tfo_cookie_valid && tfo_response_cookie_valid) sc->sc_tfo_cookie = &tfo_response_cookie; -#endif /* * Fill in the syncache values. @@ -1561,14 +1546,12 @@ #endif SCH_UNLOCK(sch); -#ifdef TCP_RFC7413 if (tfo_cookie_valid) { syncache_tfo_expand(sc, lsop, m, tfo_response_cookie); /* INP_WUNLOCK(inp) will be performed by the caller */ rv = 1; goto tfo_expanded; } -#endif /* * Do a standard 3-way handshake. @@ -1591,7 +1574,7 @@ *lsop = NULL; m_freem(m); } -#ifdef TCP_RFC7413 + /* * If tfo_pending is not NULL here, then a TFO SYN that did not * result in a new socket was processed and the associated pending @@ -1602,7 +1585,6 @@ tcp_fastopen_decrement_counter(tfo_pending); tfo_expanded: -#endif if (cred != NULL) crfree(cred); #ifdef MAC @@ -1739,7 +1721,6 @@ if (sc->sc_flags & SCF_SIGNATURE) to.to_flags |= TOF_SIGNATURE; #endif -#ifdef TCP_RFC7413 if (sc->sc_tfo_cookie) { to.to_flags |= TOF_FASTOPEN; to.to_tfo_len = TCP_FASTOPEN_COOKIE_LEN; @@ -1747,7 +1728,6 @@ /* don't send cookie again when retransmitting response */ sc->sc_tfo_cookie = NULL; } -#endif optlen = tcp_addoptions(&to, (u_char *)(th + 1)); /* Adjust headers by option size. */ Index: sys/netinet/tcp_usrreq.c =================================================================== --- sys/netinet/tcp_usrreq.c +++ sys/netinet/tcp_usrreq.c @@ -85,9 +85,6 @@ #include #include #endif -#ifdef TCP_RFC7413 -#include -#endif #include #include #include @@ -95,6 +92,7 @@ #include #include #include +#include #ifdef TCPPCAP #include #endif @@ -429,11 +427,9 @@ #endif } SOCK_UNLOCK(so); - -#ifdef TCP_RFC7413 if (IS_FASTOPEN(tp->t_flags)) tp->t_tfo_pending = tcp_fastopen_alloc_counter(); -#endif + out: TCPDEBUG2(PRU_LISTEN); TCP_PROBE2(debug__user, tp, PRU_LISTEN); @@ -479,11 +475,9 @@ #endif } SOCK_UNLOCK(so); - -#ifdef TCP_RFC7413 if (IS_FASTOPEN(tp->t_flags)) tp->t_tfo_pending = tcp_fastopen_alloc_counter(); -#endif + out: TCPDEBUG2(PRU_LISTEN); TCP_PROBE2(debug__user, tp, PRU_LISTEN); @@ -848,7 +842,7 @@ } tp = intotcpcb(inp); TCPDEBUG1(); -#ifdef TCP_RFC7413 + /* * For passively-created TFO connections, don't attempt a window * update while still in SYN_RECEIVED as this may trigger an early @@ -859,7 +853,6 @@ if (IS_FASTOPEN(tp->t_flags) && (tp->t_state == TCPS_SYN_RECEIVED)) goto out; -#endif #ifdef TCP_OFFLOAD if (tp->t_flags & TF_TOE) tcp_offload_rcvd(tp); @@ -950,8 +943,13 @@ #endif if (error) goto out; - tp->snd_wnd = TTCP_CLIENT_SND_WND; - tcp_mss(tp, -1); + if (IS_FASTOPEN(tp->t_flags)) + tcp_fastopen_connect(tp); + else + { + tp->snd_wnd = TTCP_CLIENT_SND_WND; + tcp_mss(tp, -1); + } } if (flags & PRUS_EOF) { /* @@ -997,6 +995,12 @@ * initialize window to default value, and * initialize maxseg using peer's cached MSS. */ + + /* + * Not going to contemplate SYN|URG + */ + if (IS_FASTOPEN(tp->t_flags)) + tp->t_flags &= ~TF_FASTOPEN; #ifdef INET6 if (isipv6) error = tcp6_connect(tp, nam, td); @@ -1768,28 +1772,52 @@ goto unlock_and_done; #endif -#ifdef TCP_RFC7413 - case TCP_FASTOPEN: + case TCP_FASTOPEN: { + struct tcp_fastopen tfo_optval; + INP_WUNLOCK(inp); - if (!V_tcp_fastopen_enabled) + if (!V_tcp_fastopen_client_enabled && + !V_tcp_fastopen_server_enabled) return (EPERM); - error = sooptcopyin(sopt, &optval, sizeof optval, - sizeof optval); + error = sooptcopyin(sopt, &tfo_optval, + sizeof(tfo_optval), sizeof(int)); if (error) return (error); INP_WLOCK_RECHECK(inp); - if (optval) { - tp->t_flags |= TF_FASTOPEN; - if ((tp->t_state == TCPS_LISTEN) && - (tp->t_tfo_pending == NULL)) - tp->t_tfo_pending = - tcp_fastopen_alloc_counter(); + if (tfo_optval.enable) { + if (tp->t_state == TCPS_LISTEN) { + if (!V_tcp_fastopen_server_enabled) { + error = EPERM; + goto unlock_and_done; + } + + tp->t_flags |= TF_FASTOPEN; + if (tp->t_tfo_pending == NULL) + tp->t_tfo_pending = + tcp_fastopen_alloc_counter(); + } else { + /* + * If a pre-shared key was provided, + * stash it in the client cookie + * field of the tcpcb for use during + * connect. + */ + if (sopt->sopt_valsize == + sizeof(tfo_optval)) { + memcpy(tp->t_tfo_cookie.client, + tfo_optval.psk, + TCP_FASTOPEN_PSK_LEN); + tp->t_tfo_client_cookie_len = + TCP_FASTOPEN_PSK_LEN; + } + tp->t_flags |= TF_FASTOPEN; + } } else tp->t_flags &= ~TF_FASTOPEN; goto unlock_and_done; -#endif + } default: INP_WUNLOCK(inp); @@ -1871,14 +1899,11 @@ error = sooptcopyout(sopt, &optval, sizeof optval); break; #endif - -#ifdef TCP_RFC7413 case TCP_FASTOPEN: optval = tp->t_flags & TF_FASTOPEN; INP_WUNLOCK(inp); error = sooptcopyout(sopt, &optval, sizeof optval); break; -#endif default: INP_WUNLOCK(inp); error = ENOPROTOOPT; Index: sys/netinet/tcp_var.h =================================================================== --- sys/netinet/tcp_var.h +++ sys/netinet/tcp_var.h @@ -191,10 +191,12 @@ u_int t_flags2; /* More tcpcb flags storage */ struct tcp_function_block *t_fb;/* TCP function call block */ void *t_fb_ptr; /* Pointer to t_fb specific data */ -#ifdef TCP_RFC7413 - uint64_t t_tfo_cookie; /* TCP Fast Open cookie */ - unsigned int *t_tfo_pending; /* TCP Fast Open pending counter */ -#endif + uint8_t t_tfo_client_cookie_len; /* TCP Fast Open client cookie length */ + unsigned int *t_tfo_pending; /* TCP Fast Open server pending counter */ + union { + uint8_t client[TCP_FASTOPEN_MAX_COOKIE_LEN]; + uint64_t server; + } t_tfo_cookie; /* TCP Fast Open cookie to send */ #ifdef TCPPCAP struct mbufq t_inpkts; /* List of saved input packets. */ struct mbufq t_outpkts; /* List of saved output packets. */ @@ -322,11 +324,7 @@ #define ENTER_RECOVERY(t_flags) t_flags |= (TF_CONGRECOVERY | TF_FASTRECOVERY) #define EXIT_RECOVERY(t_flags) t_flags &= ~(TF_CONGRECOVERY | TF_FASTRECOVERY) -#if defined(_KERNEL) && !defined(TCP_RFC7413) -#define IS_FASTOPEN(t_flags) (false) -#else #define IS_FASTOPEN(t_flags) (t_flags & TF_FASTOPEN) -#endif #define BYTES_THIS_ACK(tp, th) (th->th_ack - tp->snd_una) @@ -365,7 +363,7 @@ u_int32_t to_tsecr; /* reflected timestamp */ u_char *to_sacks; /* pointer to the first SACK blocks */ u_char *to_signature; /* pointer to the TCP-MD5 signature */ - u_char *to_tfo_cookie; /* pointer to the TFO cookie */ + u_int8_t *to_tfo_cookie; /* pointer to the TFO cookie */ u_int16_t to_mss; /* maximum segment size */ u_int8_t to_wscale; /* window scaling */ u_int8_t to_nsacks; /* number of SACK blocks */