Index: sys/conf/options =================================================================== --- sys/conf/options +++ sys/conf/options @@ -451,6 +451,7 @@ TCP_OFFLOAD opt_inet.h # Enable code to dispatch TCP offloading TCP_RFC7413 opt_inet.h TCP_RFC7413_MAX_KEYS opt_inet.h +TCP_RFC7413_MAX_PSKS opt_inet.h TCP_SIGNATURE opt_ipsec.h VLAN_ARRAY opt_vlan.h XBONEHACK Index: sys/netinet/tcp.h =================================================================== --- sys/netinet/tcp.h +++ sys/netinet/tcp.h @@ -101,8 +101,6 @@ #define TCPOLEN_SIGNATURE 18 #define TCPOPT_FAST_OPEN 34 #define TCPOLEN_FAST_OPEN_EMPTY 2 -#define TCPOLEN_FAST_OPEN_MIN 6 -#define TCPOLEN_FAST_OPEN_MAX 18 /* Miscellaneous constants */ #define MAX_SACK_BLKS 6 /* Max # SACK blocks stored at receiver side */ @@ -152,6 +150,10 @@ #define TCP_MAXHLEN (0xf<<2) /* max length of header in bytes */ #define TCP_MAXOLEN (TCP_MAXHLEN - sizeof(struct tcphdr)) /* max space left for options */ + +#define TCP_FASTOPEN_MIN_COOKIE_LEN 4 /* Per RFC7413 */ +#define TCP_FASTOPEN_MAX_COOKIE_LEN 16 /* Per RFC7413 */ +#define TCP_FASTOPEN_PSK_LEN 16 /* Same as TCP_FASTOPEN_KEY_LEN */ #endif /* __BSD_VISIBLE */ /* @@ -252,6 +254,16 @@ /* Padding to grow without breaking ABI. */ u_int32_t __tcpi_pad[26]; /* Padding. */ }; + +/* + * If this structure is provided when setting the TCP_FASTOPEN socket + * option, and the enable member is non-zero, a subsequent connect will use + * pre-shared key (PSK) mode using the provided key. + */ +struct tcp_fastopen { + int enable; + uint8_t psk[TCP_FASTOPEN_PSK_LEN]; +}; #endif #define TCP_FUNCTION_NAME_LEN_MAX 32 Index: sys/netinet/tcp_fastopen.h =================================================================== --- sys/netinet/tcp_fastopen.h +++ sys/netinet/tcp_fastopen.h @@ -1,5 +1,5 @@ /*- - * Copyright (c) 2015 Patrick Kelsey + * Copyright (c) 2015-2017 Patrick Kelsey * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -31,17 +31,59 @@ #ifdef _KERNEL -#define TCP_FASTOPEN_COOKIE_LEN 8 /* tied to SipHash24 64-bit output */ +#define TCP_FASTOPEN_COOKIE_LEN 8 /* SipHash24 64-bit output */ -VNET_DECLARE(unsigned int, tcp_fastopen_enabled); -#define V_tcp_fastopen_enabled VNET(tcp_fastopen_enabled) +VNET_DECLARE(unsigned int, tcp_fastopen_client_enable); +#define V_tcp_fastopen_client_enable VNET(tcp_fastopen_client_enable) + +VNET_DECLARE(unsigned int, tcp_fastopen_server_enable); +#define V_tcp_fastopen_server_enable VNET(tcp_fastopen_server_enable) + +union tcp_fastopen_ip_addr { + struct in_addr v4; + struct in6_addr v6; +}; + +struct tcp_fastopen_ccache_entry { + TAILQ_ENTRY(tcp_fastopen_ccache_entry) cce_link; + union tcp_fastopen_ip_addr cce_client_ip; /* network byte order */ + union tcp_fastopen_ip_addr cce_server_ip; /* network byte order */ + uint16_t server_port; /* network byte order */ + uint16_t server_mss; /* host byte order */ + uint8_t af; + uint8_t cookie_len; + uint8_t cookie[TCP_FASTOPEN_MAX_COOKIE_LEN]; + sbintime_t disable_time; /* non-zero value means path is disabled */ +}; + +struct tcp_fastopen_ccache; + +struct tcp_fastopen_ccache_bucket { + struct mtx ccb_mtx; + TAILQ_HEAD(bucket_entries, tcp_fastopen_ccache_entry) ccb_entries; + int ccb_num_entries; + struct tcp_fastopen_ccache *ccb_ccache; +}; + +struct tcp_fastopen_ccache { + uma_zone_t zone; + struct tcp_fastopen_ccache_bucket *base; + unsigned int bucket_limit; + unsigned int buckets; + unsigned int mask; + uint32_t secret; +}; void tcp_fastopen_init(void); void tcp_fastopen_destroy(void); unsigned int *tcp_fastopen_alloc_counter(void); -void tcp_fastopen_decrement_counter(unsigned int *counter); -int tcp_fastopen_check_cookie(struct in_conninfo *inc, uint8_t *cookie, - unsigned int len, uint64_t *latest_cookie); +void tcp_fastopen_decrement_counter(unsigned int *); +int tcp_fastopen_check_cookie(struct in_conninfo *, uint8_t *, unsigned int, + uint64_t *); +void tcp_fastopen_connect(struct tcpcb *); +void tcp_fastopen_disable_path(struct tcpcb *); +void tcp_fastopen_update_cache(struct tcpcb *, uint16_t, uint8_t, + uint8_t *); #endif /* _KERNEL */ #endif /* _TCP_FASTOPEN_H_ */ Index: sys/netinet/tcp_fastopen.c =================================================================== --- sys/netinet/tcp_fastopen.c +++ sys/netinet/tcp_fastopen.c @@ -1,5 +1,5 @@ /*- - * Copyright (c) 2015 Patrick Kelsey + * Copyright (c) 2015-2017 Patrick Kelsey * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -25,23 +25,44 @@ */ /* - * This is a server-side implementation of TCP Fast Open (TFO) [RFC7413]. - * - * This implementation is currently considered to be experimental and is not - * included in kernel builds by default. To include this code, add the - * following line to your kernel config: + * This is an implementation of TCP Fast Open (TFO) [RFC7413]. To include + * this code, add the following line to your kernel config: * * options TCP_RFC7413 * + * * The generated TFO cookies are the 64-bit output of - * SipHash24(<16-byte-key>). Multiple concurrent valid keys are - * supported so that time-based rolling cookie invalidation policies can be - * implemented in the system. The default number of concurrent keys is 2. - * This can be adjusted in the kernel config as follows: + * SipHash24(key=<16-byte-key>, msg=). Multiple concurrent valid + * keys are supported so that time-based rolling cookie invalidation + * policies can be implemented in the system. The default number of + * concurrent keys is 2. This can be adjusted in the kernel config as + * follows: * * options TCP_RFC7413_MAX_KEYS= * * + * In addition to the facilities defined in RFC7413, this implementation + * supports a pre-shared key (PSK) mode of operation in which the TFO server + * requires the client to be in posession of a shared secret in order for + * the client to be able to successfully open TFO connections with the + * server. This is useful, for example, in environments where TFO servers + * are exposed to both internal and external clients and only wish to allow + * TFO connections from internal clients. + * + * In the PSK mode of operation, the server generates and sends TFO cookies + * to requesting clients as usual. However, when validating cookies + * received in TFO SYNs from clients, the server requires the + * client-supplied cookie to equal SipHash24(key=<16-byte-psk>, + * msg=). + * + * Multiple concurrent valid pre-shared keys are supported so that + * time-based rolling PSK invalidation policies can be implemented in the + * system. The default number of concurrent pre-shared keys is 2. This can + * be adjusted in the kernel config as follows: + * + * options TCP_RFC7413_MAX_PSKS= + * + * * The following TFO-specific sysctls are defined: * * net.inet.tcp.fastopen.acceptany (RW, default 0) @@ -49,31 +70,72 @@ * be valid. * * net.inet.tcp.fastopen.autokey (RW, default 120) - * When this and net.inet.tcp.fastopen.enabled are non-zero, a new key - * will be automatically generated after this many seconds. - * - * net.inet.tcp.fastopen.enabled (RW, default 0) - * When zero, no new TFO connections can be created. On the transition - * from enabled to disabled, all installed keys are removed. On the - * transition from disabled to enabled, if net.inet.tcp.fastopen.autokey - * is non-zero and there are no keys installed, a new key will be - * generated immediately. The transition from enabled to disabled does - * not affect any TFO connections in progress; it only prevents new ones - * from being made. - * - * net.inet.tcp.fastopen.keylen (RO) + * When this and net.inet.tcp.fastopen.server_enable are non-zero, a new + * key will be automatically generated after this many seconds. + * + * net.inet.tcp.fastopen.ccache_bucket_limit + * (RWTUN, default TCP_FASTOPEN_CCACHE_BUCKET_LIMIT_DEFAULT) + * The maximum number of entries in a client cookie cache bucket. + * + * net.inet.tcp.fastopen.ccache_buckets + * (RDTUN, default TCP_FASTOPEN_CCACHE_BUCKETS_DEFAULT) + * The number of client cookie cache buckets. + * + * net.inet.tcp.fastopen.client_enable (RW, default 0) + * When zero, no new active (i.e., client) TFO connections can be + * created. On the transition from enabled to disabled, the client + * cookie cache is cleared and disabled. The transition from enabled to + * disabled does not affect any active TFO connections in progress; it + * only prevents new ones from being made. + * + * net.inet.tcp.fastopen.keylen (RD) * The key length in bytes. * - * net.inet.tcp.fastopen.maxkeys (RO) + * net.inet.tcp.fastopen.maxkeys (RD) * The maximum number of keys supported. * - * net.inet.tcp.fastopen.numkeys (RO) + * net.inet.tcp.fastopen.maxpsks (RD) + * The maximum number of pre-shared keys supported. + * + * net.inet.tcp.fastopen.numkeys (RD) * The current number of keys installed. * - * net.inet.tcp.fastopen.setkey (WO) - * Install a new key by writing net.inet.tcp.fastopen.keylen bytes to this - * sysctl. + * net.inet.tcp.fastopen.numpsks (RD) + * The current number of pre-shared keys installed. + * + * net.inet.tcp.fastopen.path_disable_time + * (RW, default TCP_FASTOPEN_PATH_DISABLE_TIME_DEFAULT) + * When a failure occurs while trying to create a new active (i.e., + * client) TFO connection, new active connections on the same path, as + * determined by the tuple {client_ip, server_ip, server_port}, will be + * forced to be non-TFO for this many seconds. Note that the path + * disable mechanism relies on state stored in client cookie cache + * entries, so it is possible for the disable time for a given path to + * be reduced if the corresponding client cookie cache entry is reused + * due to resource pressure before the disable period has elapsed. * + * net.inet.tcp.fastopen.psk_enable (RW, default 0) + * When non-zero, pre-shared key (PSK) mode is enabled for all TFO + * servers. On the transition from enabled to disabled, all installed + * pre-shared keys are removed. + * + * net.inet.tcp.fastopen.server_enable (RW, default 0) + * When zero, no new passive (i.e., server) TFO connections can be + * created. On the transition from enabled to disabled, all installed + * keys and pre-shared keys are removed. On the transition from + * disabled to enabled, if net.inet.tcp.fastopen.autokey is non-zero and + * there are no keys installed, a new key will be generated immediately. + * The transition from enabled to disabled does not affect any passive + * TFO connections in progress; it only prevents new ones from being + * made. + * + * net.inet.tcp.fastopen.setkey (WR) + * Install a new key by writing net.inet.tcp.fastopen.keylen bytes to + * this sysctl. + * + * net.inet.tcp.fastopen.setpsk (WR) + * Install a new pre-shared key by writing net.inet.tcp.fastopen.keylen + * bytes to this sysctl. * * In order for TFO connections to be created via a listen socket, that * socket must have the TCP_FASTOPEN socket option set on it. This option @@ -105,6 +167,7 @@ #include #include +#include #include #include #include @@ -119,21 +182,56 @@ #include #include -#include #include +#include #define TCP_FASTOPEN_KEY_LEN SIPHASH_KEY_LENGTH +#if TCP_FASTOPEN_PSK_LEN != TCP_FASTOPEN_KEY_LEN +#error TCP_FASTOPEN_PSK_LEN must be equal to TCP_FASTOPEN_KEY_LEN +#endif + +/* + * Because a PSK-mode setsockopt() uses tcpcb.t_tfo_cookie.client to hold + * the PSK until the connect occurs. + */ +#if TCP_FASTOPEN_MAX_COOKIE_LEN < TCP_FASTOPEN_PSK_LEN +#error TCP_FASTOPEN_MAX_COOKIE_LEN must be >= TCP_FASTOPEN_PSK_LEN +#endif + +#define TCP_FASTOPEN_CCACHE_BUCKET_LIMIT_DEFAULT 16 +#define TCP_FASTOPEN_CCACHE_BUCKETS_DEFAULT 2048 /* must be power of 2 */ + +#define TCP_FASTOPEN_PATH_DISABLE_TIME_DEFAULT 900 /* seconds */ + #if !defined(TCP_RFC7413_MAX_KEYS) || (TCP_RFC7413_MAX_KEYS < 1) #define TCP_FASTOPEN_MAX_KEYS 2 #else #define TCP_FASTOPEN_MAX_KEYS TCP_RFC7413_MAX_KEYS #endif +#if TCP_FASTOPEN_MAX_KEYS > 10 +#undef TCP_FASTOPEN_MAX_KEYS +#define TCP_FASTOPEN_MAX_KEYS 10 +#endif + +#if !defined(TCP_RFC7413_MAX_PSKS) || (TCP_RFC7413_MAX_PSKS < 1) +#define TCP_FASTOPEN_MAX_PSKS 2 +#else +#define TCP_FASTOPEN_MAX_PSKS TCP_RFC7413_MAX_PSKS +#endif + +#if TCP_FASTOPEN_MAX_PSKS > 10 +#undef TCP_FASTOPEN_MAX_PSKS +#define TCP_FASTOPEN_MAX_PSKS 10 +#endif + struct tcp_fastopen_keylist { unsigned int newest; + unsigned int newest_psk; uint8_t key[TCP_FASTOPEN_MAX_KEYS][TCP_FASTOPEN_KEY_LEN]; + uint8_t psk[TCP_FASTOPEN_MAX_PSKS][TCP_FASTOPEN_KEY_LEN]; }; struct tcp_fastopen_callout { @@ -141,6 +239,16 @@ struct vnet *v; }; +static struct tcp_fastopen_ccache_entry *tcp_fastopen_ccache_lookup( + struct in_conninfo *, struct tcp_fastopen_ccache_bucket **); +static struct tcp_fastopen_ccache_entry *tcp_fastopen_ccache_create( + struct tcp_fastopen_ccache_bucket *, struct in_conninfo *, uint16_t, uint8_t, + uint8_t *); +static void tcp_fastopen_ccache_bucket_trim(struct tcp_fastopen_ccache_bucket *, + unsigned int); +static void tcp_fastopen_ccache_entry_drop(struct tcp_fastopen_ccache_entry *, + struct tcp_fastopen_ccache_bucket *); + SYSCTL_NODE(_net_inet_tcp, OID_AUTO, fastopen, CTLFLAG_RW, 0, "TCP Fast Open"); static VNET_DEFINE(int, tcp_fastopen_acceptany) = 0; @@ -157,12 +265,25 @@ &sysctl_net_inet_tcp_fastopen_autokey, "IU", "Number of seconds between auto-generation of a new key; zero disables"); -VNET_DEFINE(unsigned int, tcp_fastopen_enabled) = 0; -static int sysctl_net_inet_tcp_fastopen_enabled(SYSCTL_HANDLER_ARGS); -SYSCTL_PROC(_net_inet_tcp_fastopen, OID_AUTO, enabled, +static int sysctl_net_inet_tcp_fastopen_ccache_bucket_limit(SYSCTL_HANDLER_ARGS); +SYSCTL_PROC(_net_inet_tcp_fastopen, OID_AUTO, ccache_bucket_limit, + CTLFLAG_VNET | CTLTYPE_UINT | CTLFLAG_RWTUN, NULL, 0, + &sysctl_net_inet_tcp_fastopen_ccache_bucket_limit, "IU", + "Max entries per bucket in client cookie cache"); + +static VNET_DEFINE(unsigned int, tcp_fastopen_ccache_buckets) = + TCP_FASTOPEN_CCACHE_BUCKETS_DEFAULT; +#define V_tcp_fastopen_ccache_buckets VNET(tcp_fastopen_ccache_buckets) +SYSCTL_UINT(_net_inet_tcp_fastopen, OID_AUTO, ccache_buckets, + CTLFLAG_VNET | CTLFLAG_RDTUN, &VNET_NAME(tcp_fastopen_ccache_buckets), 0, + "Client cookie cache number of buckets (power of 2)"); + +VNET_DEFINE(unsigned int, tcp_fastopen_client_enable) = 0; +static int sysctl_net_inet_tcp_fastopen_client_enable(SYSCTL_HANDLER_ARGS); +SYSCTL_PROC(_net_inet_tcp_fastopen, OID_AUTO, client_enable, CTLFLAG_VNET | CTLTYPE_UINT | CTLFLAG_RW, NULL, 0, - &sysctl_net_inet_tcp_fastopen_enabled, "IU", - "Enable/disable TCP Fast Open processing"); + &sysctl_net_inet_tcp_fastopen_client_enable, "IU", + "Enable/disable TCP Fast Open client functionality"); SYSCTL_INT(_net_inet_tcp_fastopen, OID_AUTO, keylen, CTLFLAG_RD, SYSCTL_NULL_INT_PTR, TCP_FASTOPEN_KEY_LEN, @@ -172,18 +293,56 @@ CTLFLAG_RD, SYSCTL_NULL_INT_PTR, TCP_FASTOPEN_MAX_KEYS, "Maximum number of keys supported"); +SYSCTL_INT(_net_inet_tcp_fastopen, OID_AUTO, maxpsks, + CTLFLAG_RD, SYSCTL_NULL_INT_PTR, TCP_FASTOPEN_MAX_PSKS, + "Maximum number of pre-shared keys supported"); + static VNET_DEFINE(unsigned int, tcp_fastopen_numkeys) = 0; #define V_tcp_fastopen_numkeys VNET(tcp_fastopen_numkeys) SYSCTL_UINT(_net_inet_tcp_fastopen, OID_AUTO, numkeys, CTLFLAG_VNET | CTLFLAG_RD, &VNET_NAME(tcp_fastopen_numkeys), 0, "Number of keys installed"); +static VNET_DEFINE(unsigned int, tcp_fastopen_numpsks) = 0; +#define V_tcp_fastopen_numpsks VNET(tcp_fastopen_numpsks) +SYSCTL_UINT(_net_inet_tcp_fastopen, OID_AUTO, numpsks, + CTLFLAG_VNET | CTLFLAG_RD, &VNET_NAME(tcp_fastopen_numpsks), 0, + "Number of pre-shared keys installed"); + +static VNET_DEFINE(unsigned int, tcp_fastopen_path_disable_time) = + TCP_FASTOPEN_PATH_DISABLE_TIME_DEFAULT; +#define V_tcp_fastopen_path_disable_time VNET(tcp_fastopen_path_disable_time) +SYSCTL_UINT(_net_inet_tcp_fastopen, OID_AUTO, path_disable_time, + CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(tcp_fastopen_path_disable_time), 0, + "Seconds a TFO failure disables a {client_ip, server_ip, server_port} path"); + +static VNET_DEFINE(unsigned int, tcp_fastopen_psk_enable) = 0; +#define V_tcp_fastopen_psk_enable VNET(tcp_fastopen_psk_enable) +static int sysctl_net_inet_tcp_fastopen_psk_enable(SYSCTL_HANDLER_ARGS); +SYSCTL_PROC(_net_inet_tcp_fastopen, OID_AUTO, psk_enable, + CTLFLAG_VNET | CTLTYPE_UINT | CTLFLAG_RW, NULL, 0, + &sysctl_net_inet_tcp_fastopen_psk_enable, "IU", + "Enable/disable TCP Fast Open server pre-shared key mode"); + +VNET_DEFINE(unsigned int, tcp_fastopen_server_enable) = 0; +static int sysctl_net_inet_tcp_fastopen_server_enable(SYSCTL_HANDLER_ARGS); +SYSCTL_PROC(_net_inet_tcp_fastopen, OID_AUTO, server_enable, + CTLFLAG_VNET | CTLTYPE_UINT | CTLFLAG_RW, NULL, 0, + &sysctl_net_inet_tcp_fastopen_server_enable, "IU", + "Enable/disable TCP Fast Open server functionality"); + static int sysctl_net_inet_tcp_fastopen_setkey(SYSCTL_HANDLER_ARGS); SYSCTL_PROC(_net_inet_tcp_fastopen, OID_AUTO, setkey, CTLFLAG_VNET | CTLTYPE_OPAQUE | CTLFLAG_WR, NULL, 0, &sysctl_net_inet_tcp_fastopen_setkey, "", "Install a new key"); +static int sysctl_net_inet_tcp_fastopen_setpsk(SYSCTL_HANDLER_ARGS); +SYSCTL_PROC(_net_inet_tcp_fastopen, OID_AUTO, setpsk, + CTLFLAG_VNET | CTLTYPE_OPAQUE | CTLFLAG_WR, NULL, 0, + &sysctl_net_inet_tcp_fastopen_setpsk, "", + "Install a new pre-shared key"); + static VNET_DEFINE(struct rmlock, tcp_fastopen_keylock); #define V_tcp_fastopen_keylock VNET(tcp_fastopen_keylock) @@ -201,9 +360,21 @@ static VNET_DEFINE(uma_zone_t, counter_zone); #define V_counter_zone VNET(counter_zone) +static MALLOC_DEFINE(M_TCP_FASTOPEN_CCACHE, "tfo_ccache", "TFO client cookie cache buckets"); + +static VNET_DEFINE(struct tcp_fastopen_ccache, tcp_fastopen_ccache); +#define V_tcp_fastopen_ccache VNET(tcp_fastopen_ccache) + +#define CCB_LOCK(ccb) mtx_lock(&(ccb)->ccb_mtx) +#define CCB_UNLOCK(ccb) mtx_unlock(&(ccb)->ccb_mtx) +#define CCB_LOCK_ASSERT(ccb) mtx_assert(&(ccb)->ccb_mtx, MA_OWNED) + + void tcp_fastopen_init(void) { + unsigned int i; + V_counter_zone = uma_zcreate("tfo", sizeof(unsigned int), NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, 0); rm_init(&V_tcp_fastopen_keylock, "tfo_keylock"); @@ -211,11 +382,67 @@ &V_tcp_fastopen_keylock, 0); V_tcp_fastopen_autokey_ctx.v = curvnet; V_tcp_fastopen_keys.newest = TCP_FASTOPEN_MAX_KEYS - 1; + V_tcp_fastopen_keys.newest_psk = TCP_FASTOPEN_MAX_PSKS - 1; + + /* May already be non-zero if kernel tunable was set */ + if (V_tcp_fastopen_ccache.bucket_limit == 0) + V_tcp_fastopen_ccache.bucket_limit = + TCP_FASTOPEN_CCACHE_BUCKET_LIMIT_DEFAULT; + + /* May already be non-zero if kernel tunable was set */ + if ((V_tcp_fastopen_ccache_buckets == 0) || + !powerof2(V_tcp_fastopen_ccache_buckets)) + V_tcp_fastopen_ccache.buckets = + TCP_FASTOPEN_CCACHE_BUCKETS_DEFAULT; + else + V_tcp_fastopen_ccache.buckets = V_tcp_fastopen_ccache_buckets; + + V_tcp_fastopen_ccache.mask = V_tcp_fastopen_ccache.buckets - 1; + V_tcp_fastopen_ccache.secret = arc4random(); + + V_tcp_fastopen_ccache.base = malloc(V_tcp_fastopen_ccache.buckets * + sizeof(struct tcp_fastopen_ccache_bucket), M_TCP_FASTOPEN_CCACHE, + M_WAITOK | M_ZERO); + + for (i = 0; i < V_tcp_fastopen_ccache.buckets; i++) { + TAILQ_INIT(&V_tcp_fastopen_ccache.base[i].ccb_entries); + mtx_init(&V_tcp_fastopen_ccache.base[i].ccb_mtx, "tfo_ccache_bucket", + NULL, MTX_DEF); + V_tcp_fastopen_ccache.base[i].ccb_num_entries = -1; /* bucket disabled */ + V_tcp_fastopen_ccache.base[i].ccb_ccache = &V_tcp_fastopen_ccache; + } + + /* + * Note that while the total number of entries in the cookie cache + * is limited by the table management logic to + * V_tcp_fastopen_ccache.buckets * + * V_tcp_fastopen_ccache.bucket_limit, the total number of items in + * this zone can exceed that amount by the number of CPUs in the + * system times the maximum number of unallocated items that can be + * present in each UMA per-CPU cache for this zone. + */ + V_tcp_fastopen_ccache.zone = uma_zcreate("tfo_ccache_entries", + sizeof(struct tcp_fastopen_ccache_entry), NULL, NULL, NULL, NULL, + UMA_ALIGN_CACHE, 0); } void tcp_fastopen_destroy(void) { + struct tcp_fastopen_ccache_bucket *ccb; + unsigned int i; + + for (i = 0; i < V_tcp_fastopen_ccache.buckets; i++) { + ccb = &V_tcp_fastopen_ccache.base[i]; + tcp_fastopen_ccache_bucket_trim(ccb, 0); + mtx_destroy(&ccb->ccb_mtx); + } + + KASSERT(uma_zone_get_cur(V_tcp_fastopen_ccache.zone) == 0, + ("%s: TFO ccache zone allocation count not 0", __func__)); + uma_zdestroy(V_tcp_fastopen_ccache.zone); + free(V_tcp_fastopen_ccache.base, M_TCP_FASTOPEN_CCACHE); + callout_drain(&V_tcp_fastopen_autokey_ctx.c); rm_destroy(&V_tcp_fastopen_keylock); uma_zdestroy(V_counter_zone); @@ -254,6 +481,19 @@ } static void +tcp_fastopen_addpsk_locked(uint8_t *psk) +{ + + V_tcp_fastopen_keys.newest_psk++; + if (V_tcp_fastopen_keys.newest_psk == TCP_FASTOPEN_MAX_PSKS) + V_tcp_fastopen_keys.newest_psk = 0; + memcpy(V_tcp_fastopen_keys.psk[V_tcp_fastopen_keys.newest_psk], psk, + TCP_FASTOPEN_KEY_LEN); + if (V_tcp_fastopen_numpsks < TCP_FASTOPEN_MAX_PSKS) + V_tcp_fastopen_numpsks++; +} + +static void tcp_fastopen_autokey_locked(void) { uint8_t newkey[TCP_FASTOPEN_KEY_LEN]; @@ -300,6 +540,49 @@ return (siphash); } +static uint64_t +tcp_fastopen_make_psk_cookie(uint8_t *psk, uint8_t *cookie, uint8_t cookie_len) +{ + SIPHASH_CTX ctx; + uint64_t psk_cookie; + + SipHash24_Init(&ctx); + SipHash_SetKey(&ctx, psk); + SipHash_Update(&ctx, cookie, cookie_len); + SipHash_Final((u_int8_t *)&psk_cookie, &ctx); + + return (psk_cookie); +} + +static int +tcp_fastopen_find_cookie_match_locked(uint8_t *wire_cookie, uint64_t *cur_cookie) +{ + unsigned int i, psk_index; + uint64_t psk_cookie; + + if (V_tcp_fastopen_psk_enable) { + psk_index = V_tcp_fastopen_keys.newest_psk; + for (i = 0; i < V_tcp_fastopen_numpsks; i++) { + psk_cookie = + tcp_fastopen_make_psk_cookie( + V_tcp_fastopen_keys.psk[psk_index], + (uint8_t *)cur_cookie, + TCP_FASTOPEN_COOKIE_LEN); + + if (memcmp(wire_cookie, &psk_cookie, + TCP_FASTOPEN_COOKIE_LEN) == 0) + return (1); + + if (psk_index == 0) + psk_index = TCP_FASTOPEN_MAX_PSKS - 1; + else + psk_index--; + } + } else if (memcmp(wire_cookie, cur_cookie, TCP_FASTOPEN_COOKIE_LEN) == 0) + return (1); + + return (0); +} /* * Return values: @@ -344,10 +627,9 @@ inc); if (i == 0) *latest_cookie = cur_cookie; - if (memcmp(cookie, &cur_cookie, TCP_FASTOPEN_COOKIE_LEN) == 0) { - rv = 1; + rv = tcp_fastopen_find_cookie_match_locked(cookie, &cur_cookie); + if (rv) goto out; - } if (key_index == 0) key_index = TCP_FASTOPEN_MAX_KEYS - 1; else @@ -355,7 +637,7 @@ } rv = 0; -out: + out: TCP_FASTOPEN_KEYS_RUNLOCK(&tracker); return (rv); } @@ -373,7 +655,7 @@ return (EINVAL); TCP_FASTOPEN_KEYS_WLOCK(); - if (V_tcp_fastopen_enabled) { + if (V_tcp_fastopen_server_enable) { if (V_tcp_fastopen_autokey && !new) callout_stop(&V_tcp_fastopen_autokey_ctx.c); else if (new) @@ -389,24 +671,54 @@ } static int -sysctl_net_inet_tcp_fastopen_enabled(SYSCTL_HANDLER_ARGS) +sysctl_net_inet_tcp_fastopen_psk_enable(SYSCTL_HANDLER_ARGS) +{ + int error; + unsigned int new; + + new = V_tcp_fastopen_psk_enable; + error = sysctl_handle_int(oidp, &new, 0, req); + if (error == 0 && req->newptr) { + if (V_tcp_fastopen_psk_enable && !new) { + /* enabled -> disabled */ + TCP_FASTOPEN_KEYS_WLOCK(); + V_tcp_fastopen_numpsks = 0; + V_tcp_fastopen_keys.newest_psk = + TCP_FASTOPEN_MAX_PSKS - 1; + V_tcp_fastopen_psk_enable = 0; + TCP_FASTOPEN_KEYS_WUNLOCK(); + } else if (!V_tcp_fastopen_psk_enable && new) { + /* disabled -> enabled */ + TCP_FASTOPEN_KEYS_WLOCK(); + V_tcp_fastopen_psk_enable = 1; + TCP_FASTOPEN_KEYS_WUNLOCK(); + } + } + return (error); +} + +static int +sysctl_net_inet_tcp_fastopen_server_enable(SYSCTL_HANDLER_ARGS) { int error; unsigned int new; - new = V_tcp_fastopen_enabled; + new = V_tcp_fastopen_server_enable; error = sysctl_handle_int(oidp, &new, 0, req); if (error == 0 && req->newptr) { - if (V_tcp_fastopen_enabled && !new) { + if (V_tcp_fastopen_server_enable && !new) { /* enabled -> disabled */ TCP_FASTOPEN_KEYS_WLOCK(); V_tcp_fastopen_numkeys = 0; V_tcp_fastopen_keys.newest = TCP_FASTOPEN_MAX_KEYS - 1; if (V_tcp_fastopen_autokey) callout_stop(&V_tcp_fastopen_autokey_ctx.c); - V_tcp_fastopen_enabled = 0; + V_tcp_fastopen_numpsks = 0; + V_tcp_fastopen_keys.newest_psk = + TCP_FASTOPEN_MAX_PSKS - 1; + V_tcp_fastopen_server_enable = 0; TCP_FASTOPEN_KEYS_WUNLOCK(); - } else if (!V_tcp_fastopen_enabled && new) { + } else if (!V_tcp_fastopen_server_enable && new) { /* disabled -> enabled */ TCP_FASTOPEN_KEYS_WLOCK(); if (V_tcp_fastopen_autokey && @@ -417,7 +729,7 @@ tcp_fastopen_autokey_callout, &V_tcp_fastopen_autokey_ctx); } - V_tcp_fastopen_enabled = 1; + V_tcp_fastopen_server_enable = 1; TCP_FASTOPEN_KEYS_WUNLOCK(); } } @@ -446,3 +758,369 @@ return (0); } + +static int +sysctl_net_inet_tcp_fastopen_setpsk(SYSCTL_HANDLER_ARGS) +{ + int error; + uint8_t newpsk[TCP_FASTOPEN_KEY_LEN]; + + if (req->oldptr != NULL || req->oldlen != 0) + return (EINVAL); + if (req->newptr == NULL) + return (EPERM); + if (req->newlen != sizeof(newpsk)) + return (EINVAL); + error = SYSCTL_IN(req, newpsk, sizeof(newpsk)); + if (error) + return (error); + + TCP_FASTOPEN_KEYS_WLOCK(); + tcp_fastopen_addpsk_locked(newpsk); + TCP_FASTOPEN_KEYS_WUNLOCK(); + + return (0); +} + +static int +sysctl_net_inet_tcp_fastopen_ccache_bucket_limit(SYSCTL_HANDLER_ARGS) +{ + struct tcp_fastopen_ccache_bucket *ccb; + int error; + unsigned int new; + unsigned int i; + + new = V_tcp_fastopen_ccache.bucket_limit; + error = sysctl_handle_int(oidp, &new, 0, req); + if (error == 0 && req->newptr) { + if ((new == 0) || (new > INT_MAX)) + error = EINVAL; + else { + if (new < V_tcp_fastopen_ccache.bucket_limit) { + for (i = 0; i < V_tcp_fastopen_ccache.buckets; + i++) { + ccb = &V_tcp_fastopen_ccache.base[i]; + tcp_fastopen_ccache_bucket_trim(ccb, new); + } + } + V_tcp_fastopen_ccache.bucket_limit = new; + } + + } + return (error); +} + +static int +sysctl_net_inet_tcp_fastopen_client_enable(SYSCTL_HANDLER_ARGS) +{ + struct tcp_fastopen_ccache_bucket *ccb; + int error; + unsigned int new, i; + + new = V_tcp_fastopen_client_enable; + error = sysctl_handle_int(oidp, &new, 0, req); + if (error == 0 && req->newptr) { + if (V_tcp_fastopen_client_enable && !new) { + /* enabled -> disabled */ + for (i = 0; i < V_tcp_fastopen_ccache.buckets; i++) { + ccb = &V_tcp_fastopen_ccache.base[i]; + tcp_fastopen_ccache_bucket_trim(ccb, 0); + } + V_tcp_fastopen_client_enable = 0; + } else if (!V_tcp_fastopen_client_enable && new) { + /* disabled -> enabled */ + for (i = 0; i < V_tcp_fastopen_ccache.buckets; i++) { + ccb = &V_tcp_fastopen_ccache.base[i]; + CCB_LOCK(ccb); + KASSERT(TAILQ_EMPTY(&ccb->ccb_entries), + ("%s: ccb->ccb_entries not empty", __func__)); + KASSERT(ccb->ccb_num_entries == -1, + ("%s: ccb->ccb_num_entries %d not -1", __func__, + ccb->ccb_num_entries)); + ccb->ccb_num_entries = 0; /* enable bucket */ + CCB_UNLOCK(ccb); + } + V_tcp_fastopen_client_enable = 1; + } + } + return (error); +} + +void +tcp_fastopen_connect(struct tcpcb *tp) +{ + struct inpcb *inp; + struct tcp_fastopen_ccache_bucket *ccb; + struct tcp_fastopen_ccache_entry *cce; + sbintime_t now; + uint16_t server_mss; + uint64_t psk_cookie; + + inp = tp->t_inpcb; + cce = tcp_fastopen_ccache_lookup(&inp->inp_inc, &ccb); + if (cce) { + if (cce->disable_time == 0) { + if ((cce->cookie_len > 0) && + (tp->t_tfo_client_cookie_len == + TCP_FASTOPEN_PSK_LEN)) { + psk_cookie = + tcp_fastopen_make_psk_cookie( + tp->t_tfo_cookie.client, + cce->cookie, cce->cookie_len); + } else { + tp->t_tfo_client_cookie_len = cce->cookie_len; + memcpy(tp->t_tfo_cookie.client, cce->cookie, + cce->cookie_len); + } + server_mss = cce->server_mss; + CCB_UNLOCK(ccb); + if (tp->t_tfo_client_cookie_len == + TCP_FASTOPEN_PSK_LEN) { + tp->t_tfo_client_cookie_len = + TCP_FASTOPEN_COOKIE_LEN; + memcpy(tp->t_tfo_cookie.client, &psk_cookie, + TCP_FASTOPEN_COOKIE_LEN); + } + tcp_mss(tp, server_mss ? server_mss : -1); + tp->snd_wnd = tp->t_maxseg; + } else { + /* + * The path is disabled. Check the time and + * possibly re-enable. + */ + now = getsbinuptime(); + if (now - cce->disable_time > + ((sbintime_t)V_tcp_fastopen_path_disable_time << 32)) { + /* + * Re-enable path. Force a TFO cookie + * request. Forget the old MSS as it may be + * bogus now, and we will rediscover it in + * the SYN|ACK. + */ + cce->disable_time = 0; + cce->server_mss = 0; + cce->cookie_len = 0; + /* + * tp->t_tfo... cookie details are already + * zero from the tcpcb init. + */ + } else { + /* + * Path is disabled, so disable TFO on this + * connection. + */ + tp->t_flags &= ~TF_FASTOPEN; + } + CCB_UNLOCK(ccb); + tcp_mss(tp, -1); + /* + * snd_wnd is irrelevant since we are either forcing + * a TFO cookie request or disabling TFO - either + * way, no data with the SYN. + */ + } + } else { + /* + * A new entry for this path will be created when a SYN|ACK + * comes back, or the attempt otherwise fails. + */ + CCB_UNLOCK(ccb); + tcp_mss(tp, -1); + /* + * snd_wnd is irrelevant since we are forcing a TFO cookie + * request. + */ + } +} + +void +tcp_fastopen_disable_path(struct tcpcb *tp) +{ + struct in_conninfo *inc = &tp->t_inpcb->inp_inc; + struct tcp_fastopen_ccache_bucket *ccb; + struct tcp_fastopen_ccache_entry *cce; + + cce = tcp_fastopen_ccache_lookup(inc, &ccb); + if (cce) { + cce->server_mss = 0; + cce->cookie_len = 0; + /* + * Preserve the existing disable time if it is already + * disabled. + */ + if (cce->disable_time == 0) + cce->disable_time = getsbinuptime(); + } else /* use invalid cookie len to create disabled entry */ + tcp_fastopen_ccache_create(ccb, inc, 0, + TCP_FASTOPEN_MAX_COOKIE_LEN + 1, NULL); + + CCB_UNLOCK(ccb); + tp->t_flags &= ~TF_FASTOPEN; +} + +void +tcp_fastopen_update_cache(struct tcpcb *tp, uint16_t mss, + uint8_t cookie_len, uint8_t *cookie) +{ + struct in_conninfo *inc = &tp->t_inpcb->inp_inc; + struct tcp_fastopen_ccache_bucket *ccb; + struct tcp_fastopen_ccache_entry *cce; + + cce = tcp_fastopen_ccache_lookup(inc, &ccb); + if (cce) { + if ((cookie_len >= TCP_FASTOPEN_MIN_COOKIE_LEN) && + (cookie_len <= TCP_FASTOPEN_MAX_COOKIE_LEN) && + ((cookie_len & 0x1) == 0)) { + cce->server_mss = mss; + cce->cookie_len = cookie_len; + memcpy(cce->cookie, cookie, cookie_len); + cce->disable_time = 0; + } else { + /* invalid cookie length, disable entry */ + cce->server_mss = 0; + cce->cookie_len = 0; + /* + * Preserve the existing disable time if it is + * already disabled. + */ + if (cce->disable_time == 0) + cce->disable_time = getsbinuptime(); + } + } else + tcp_fastopen_ccache_create(ccb, inc, mss, cookie_len, cookie); + + CCB_UNLOCK(ccb); +} + +static struct tcp_fastopen_ccache_entry * +tcp_fastopen_ccache_lookup(struct in_conninfo *inc, + struct tcp_fastopen_ccache_bucket **ccbp) +{ + struct tcp_fastopen_ccache_bucket *ccb; + struct tcp_fastopen_ccache_entry *cce; + uint32_t last_word; + uint32_t hash; + + hash = jenkins_hash32((uint32_t *)&inc->inc_ie.ie_dependladdr, 4, + V_tcp_fastopen_ccache.secret); + hash = jenkins_hash32((uint32_t *)&inc->inc_ie.ie_dependfaddr, 4, + hash); + last_word = inc->inc_fport; + hash = jenkins_hash32(&last_word, 1, hash); + ccb = &V_tcp_fastopen_ccache.base[hash & V_tcp_fastopen_ccache.mask]; + *ccbp = ccb; + CCB_LOCK(ccb); + + /* + * Always returns with locked bucket. + */ + TAILQ_FOREACH(cce, &ccb->ccb_entries, cce_link) + if ((!(cce->af == AF_INET6) == !(inc->inc_flags & INC_ISIPV6)) && + (cce->server_port == inc->inc_ie.ie_fport) && + (((cce->af == AF_INET) && + (cce->cce_client_ip.v4.s_addr == inc->inc_laddr.s_addr) && + (cce->cce_server_ip.v4.s_addr == inc->inc_faddr.s_addr)) || + ((cce->af == AF_INET6) && + IN6_ARE_ADDR_EQUAL(&cce->cce_client_ip.v6, &inc->inc6_laddr) && + IN6_ARE_ADDR_EQUAL(&cce->cce_server_ip.v6, &inc->inc6_faddr)))) + break; + + return (cce); +} + +static struct tcp_fastopen_ccache_entry * +tcp_fastopen_ccache_create(struct tcp_fastopen_ccache_bucket *ccb, + struct in_conninfo *inc, uint16_t mss, uint8_t cookie_len, uint8_t *cookie) +{ + struct tcp_fastopen_ccache_entry *cce; + + /* + * 1. Create a new entry, or + * 2. Reclaim an existing entry, or + * 3. Fail + */ + + CCB_LOCK_ASSERT(ccb); + + cce = NULL; + if (ccb->ccb_num_entries < V_tcp_fastopen_ccache.bucket_limit) + cce = uma_zalloc(V_tcp_fastopen_ccache.zone, M_NOWAIT); + + if (cce == NULL) { + /* + * At bucket limit, or out of memory - reclaim last + * entry in bucket. + */ + cce = TAILQ_LAST(&ccb->ccb_entries, bucket_entries); + if (cce == NULL) { + /* XXX count this event */ + return (NULL); + } + + TAILQ_REMOVE(&ccb->ccb_entries, cce, cce_link); + } else + ccb->ccb_num_entries++; + + TAILQ_INSERT_HEAD(&ccb->ccb_entries, cce, cce_link); + cce->af = (inc->inc_flags & INC_ISIPV6) ? AF_INET6 : AF_INET; + if (cce->af == AF_INET) { + cce->cce_client_ip.v4 = inc->inc_laddr; + cce->cce_server_ip.v4 = inc->inc_faddr; + } else { + cce->cce_client_ip.v6 = inc->inc6_laddr; + cce->cce_server_ip.v6 = inc->inc6_faddr; + } + cce->server_port = inc->inc_fport; + if ((cookie_len <= TCP_FASTOPEN_MAX_COOKIE_LEN) && + ((cookie_len & 0x1) == 0)) { + cce->server_mss = mss; + cce->cookie_len = cookie_len; + memcpy(cce->cookie, cookie, cookie_len); + cce->disable_time = 0; + } else { + /* invalid cookie length, disable cce */ + cce->server_mss = 0; + cce->cookie_len = 0; + cce->disable_time = getsbinuptime(); + } + + return (cce); +} + +static void +tcp_fastopen_ccache_bucket_trim(struct tcp_fastopen_ccache_bucket *ccb, + unsigned int limit) +{ + struct tcp_fastopen_ccache_entry *cce, *cce_tmp; + unsigned int entries; + + CCB_LOCK(ccb); + entries = 0; + TAILQ_FOREACH_SAFE(cce, &ccb->ccb_entries, cce_link, cce_tmp) { + entries++; + if (entries > limit) + tcp_fastopen_ccache_entry_drop(cce, ccb); + } + KASSERT(ccb->ccb_num_entries <= limit, + ("%s: ccb->ccb_num_entries %d exceeds limit %d", __func__, + ccb->ccb_num_entries, limit)); + if (limit == 0) { + KASSERT(TAILQ_EMPTY(&ccb->ccb_entries), + ("%s: ccb->ccb_entries not empty", __func__)); + ccb->ccb_num_entries = -1; /* disable bucket */ + } + CCB_UNLOCK(ccb); +} + +static void +tcp_fastopen_ccache_entry_drop(struct tcp_fastopen_ccache_entry *cce, + struct tcp_fastopen_ccache_bucket *ccb) +{ + + CCB_LOCK_ASSERT(ccb); + + TAILQ_REMOVE(&ccb->ccb_entries, cce, cce_link); + ccb->ccb_num_entries--; + uma_zfree(V_tcp_fastopen_ccache.zone, cce); +} + Index: sys/netinet/tcp_input.c =================================================================== --- sys/netinet/tcp_input.c +++ sys/netinet/tcp_input.c @@ -100,9 +100,6 @@ #include #include #include -#ifdef TCP_RFC7413 -#include -#endif #include #include #include @@ -111,6 +108,9 @@ #include #include #include +#ifdef TCP_RFC7413 +#include +#endif #ifdef TCPPCAP #include #endif @@ -1716,6 +1716,15 @@ if ((tp->t_flags & TF_SACK_PERMIT) && (to.to_flags & TOF_SACKPERM) == 0) tp->t_flags &= ~TF_SACK_PERMIT; +#ifdef TCP_RFC7413 + if (IS_FASTOPEN(tp->t_flags)) { + if (to.to_flags & TOF_FASTOPEN) + tcp_fastopen_update_cache(tp, to.to_mss, + to.to_tfo_len, to.to_tfo_cookie); + else + tcp_fastopen_disable_path(tp); + } +#endif } /* @@ -2026,6 +2035,8 @@ tp->irs = th->th_seq; tcp_rcvseqinit(tp); if (thflags & TH_ACK) { + int tfo_partial_ack = 0; + TCPSTAT_INC(tcps_connects); soisconnected(so); #ifdef MAC @@ -2040,10 +2051,19 @@ TCP_MAXWIN << tp->rcv_scale); tp->snd_una++; /* SYN is acked */ /* + * If not all the data that was sent in the TFO SYN + * has been acked, resend the remainder right away. + */ + if (IS_FASTOPEN(tp->t_flags) && + (tp->snd_una != tp->snd_max)) { + tp->snd_nxt = th->th_ack; + tfo_partial_ack = 1; + } + /* * If there's data, delay ACK; if there's also a FIN * ACKNOW will be turned on later. */ - if (DELAY_ACK(tp, tlen) && tlen != 0) + if (DELAY_ACK(tp, tlen) && tlen != 0 && !tfo_partial_ack) tcp_timer_activate(tp, TT_DELACK, tcp_delacktime); else @@ -3421,13 +3441,15 @@ break; #ifdef TCP_RFC7413 case TCPOPT_FAST_OPEN: - if ((optlen != TCPOLEN_FAST_OPEN_EMPTY) && - (optlen < TCPOLEN_FAST_OPEN_MIN) && - (optlen > TCPOLEN_FAST_OPEN_MAX)) - continue; + /* + * Cookie length validation is performed by the + * server side cookie checking code or the client + * side cookie cache update code. + */ if (!(flags & TO_SYN)) continue; - if (!V_tcp_fastopen_enabled) + if (!V_tcp_fastopen_client_enable && + !V_tcp_fastopen_server_enable) continue; to->to_flags |= TOF_FASTOPEN; to->to_tfo_len = optlen - 2; Index: sys/netinet/tcp_output.c =================================================================== --- sys/netinet/tcp_output.c +++ sys/netinet/tcp_output.c @@ -71,9 +71,6 @@ #include #include #endif -#ifdef TCP_RFC7413 -#include -#endif #include #define TCPOUTFLAGS #include @@ -82,6 +79,9 @@ #include #include #include +#ifdef TCP_RFC7413 +#include +#endif #ifdef TCPPCAP #include #endif @@ -212,6 +212,10 @@ struct sackhole *p; int tso, mtu; struct tcpopt to; +#ifdef TCP_RFC7413 + unsigned int wanted_cookie = 0; + unsigned int dont_sendalot = 0; +#endif #if 0 int maxburst = TCP_MAXBURST; #endif @@ -237,7 +241,7 @@ if (IS_FASTOPEN(tp->t_flags) && (tp->t_state == TCPS_SYN_RECEIVED) && SEQ_GT(tp->snd_max, tp->snd_una) && /* initial SYN|ACK sent */ - (tp->snd_nxt != tp->snd_una)) /* not a retransmit */ + (tp->snd_nxt != tp->snd_una)) /* not a retransmit */ return (0); #endif /* @@ -449,12 +453,21 @@ #ifdef TCP_RFC7413 /* - * When retransmitting SYN|ACK on a passively-created TFO socket, - * don't include data, as the presence of data may have caused the - * original SYN|ACK to have been dropped by a middlebox. + * On TFO sockets, ensure no data is sent in the following cases: + * + * - When retransmitting SYN|ACK on a passively-created socket + * + * - When retransmitting SYN on an actively created socket + * + * - When sending a zero-length cookie (cookie request) on an + * actively created socket + * + * - When the socket is in the CLOSED state (RST is being sent) */ if (IS_FASTOPEN(tp->t_flags) && - (((tp->t_state == TCPS_SYN_RECEIVED) && (tp->t_rxtshift > 0)) || + (((flags & TH_SYN) && (tp->t_rxtshift > 0)) || + ((tp->t_state == TCPS_SYN_SENT) && + (tp->t_tfo_client_cookie_len == 0)) || (flags & TH_RST))) len = 0; #endif @@ -541,7 +554,7 @@ if ((tp->t_flags & TF_TSO) && V_tcp_do_tso && len > tp->t_maxseg && ((tp->t_flags & TF_SIGNATURE) == 0) && tp->rcv_numsacks == 0 && sack_rxmit == 0 && - ipoptlen == 0) + ipoptlen == 0 && !(flags & TH_SYN)) tso = 1; if (sack_rxmit) { @@ -763,18 +776,36 @@ to.to_flags |= TOF_MSS; #ifdef TCP_RFC7413 /* - * Only include the TFO option on the first - * transmission of the SYN|ACK on a - * passively-created TFO socket, as the presence of - * the TFO option may have caused the original - * SYN|ACK to have been dropped by a middlebox. + * On SYN or SYN|ACK transmits on TFO connections, + * only include the TFO option if it is not a + * retransmit, as the presence of the TFO option may + * have caused the original SYN or SYN|ACK to have + * been dropped by a middlebox. */ if (IS_FASTOPEN(tp->t_flags) && - (tp->t_state == TCPS_SYN_RECEIVED) && (tp->t_rxtshift == 0)) { - to.to_tfo_len = TCP_FASTOPEN_COOKIE_LEN; - to.to_tfo_cookie = (u_char *)&tp->t_tfo_cookie; - to.to_flags |= TOF_FASTOPEN; + if (tp->t_state == TCPS_SYN_RECEIVED) { + to.to_tfo_len = TCP_FASTOPEN_COOKIE_LEN; + to.to_tfo_cookie = + (u_int8_t *)&tp->t_tfo_cookie.server; + to.to_flags |= TOF_FASTOPEN; + wanted_cookie = 1; + } else if (tp->t_state == TCPS_SYN_SENT) { + to.to_tfo_len = + tp->t_tfo_client_cookie_len; + to.to_tfo_cookie = + tp->t_tfo_cookie.client; + to.to_flags |= TOF_FASTOPEN; + wanted_cookie = 1; + /* + * If we wind up having more data to + * send with the SYN than can fit in + * one segment, don't send any more + * until the SYN|ACK comes back from + * the other end. + */ + dont_sendalot = 1; + } } #endif } @@ -820,6 +851,15 @@ /* Processing the options. */ hdrlen += optlen = tcp_addoptions(&to, opt); +#ifdef TCP_RFC7413 + /* + * If we wanted a TFO option to be added, but it was unable + * to fit, ensure no data is sent. + */ + if (IS_FASTOPEN(tp->t_flags) && wanted_cookie && + !(to.to_flags & TOF_FASTOPEN)) + len = 0; +#endif } /* @@ -964,6 +1004,10 @@ } else { len = tp->t_maxseg - optlen - ipoptlen; sendalot = 1; +#ifdef TCP_RFC7413 + if (dont_sendalot) + sendalot = 0; +#endif } } else tso = 0; @@ -1774,8 +1818,10 @@ /* XXX is there any point to aligning this option? */ total_len = TCPOLEN_FAST_OPEN_EMPTY + to->to_tfo_len; - if (TCP_MAXOLEN - optlen < total_len) + if (TCP_MAXOLEN - optlen < total_len) { + to->to_flags &= ~TOF_FASTOPEN; continue; + } *optp++ = TCPOPT_FAST_OPEN; *optp++ = total_len; if (to->to_tfo_len > 0) { Index: sys/netinet/tcp_subr.c =================================================================== --- sys/netinet/tcp_subr.c +++ sys/netinet/tcp_subr.c @@ -93,9 +93,6 @@ #include #endif -#ifdef TCP_RFC7413 -#include -#endif #include #include #include @@ -107,6 +104,9 @@ #include #endif #include +#ifdef TCP_RFC7413 +#include +#endif #ifdef TCPPCAP #include #endif @@ -2407,6 +2407,11 @@ if (tp->t_state != TCPS_SYN_SENT) return (inp); +#ifdef TCP_RFC7413 + if (IS_FASTOPEN(tp->t_flags)) + tcp_fastopen_disable_path(tp); +#endif + tp = tcp_drop(tp, errno); if (tp != NULL) return (inp); Index: sys/netinet/tcp_syncache.c =================================================================== --- sys/netinet/tcp_syncache.c +++ sys/netinet/tcp_syncache.c @@ -1201,7 +1201,7 @@ inp = sotoinpcb(*lsop); tp = intotcpcb(inp); tp->t_flags |= TF_FASTOPEN; - tp->t_tfo_cookie = response_cookie; + tp->t_tfo_cookie.server = response_cookie; tp->snd_max = tp->iss; tp->snd_nxt = tp->iss; tp->t_tfo_pending = pending_counter; @@ -1282,8 +1282,9 @@ ltflags = (tp->t_flags & (TF_NOOPT | TF_SIGNATURE)); #ifdef TCP_RFC7413 - if (V_tcp_fastopen_enabled && IS_FASTOPEN(tp->t_flags) && - (tp->t_tfo_pending != NULL) && (to->to_flags & TOF_FASTOPEN)) { + if (V_tcp_fastopen_server_enable && IS_FASTOPEN(tp->t_flags) && + (tp->t_tfo_pending != NULL) && + (to->to_flags & TOF_FASTOPEN)) { /* * Limit the number of pending TFO connections to * approximately half of the queue limit. This prevents TFO Index: sys/netinet/tcp_usrreq.c =================================================================== --- sys/netinet/tcp_usrreq.c +++ sys/netinet/tcp_usrreq.c @@ -85,9 +85,6 @@ #include #include #endif -#ifdef TCP_RFC7413 -#include -#endif #include #include #include @@ -95,6 +92,9 @@ #include #include #include +#ifdef TCP_RFC7413 +#include +#endif #ifdef TCPPCAP #include #endif @@ -950,8 +950,15 @@ #endif if (error) goto out; - tp->snd_wnd = TTCP_CLIENT_SND_WND; - tcp_mss(tp, -1); +#ifdef TCP_RFC7413 + if (IS_FASTOPEN(tp->t_flags)) + tcp_fastopen_connect(tp); + else +#endif + { + tp->snd_wnd = TTCP_CLIENT_SND_WND; + tcp_mss(tp, -1); + } } if (flags & PRUS_EOF) { /* @@ -997,6 +1004,13 @@ * initialize window to default value, and * initialize maxseg using peer's cached MSS. */ +#ifdef TCP_RFC7413 + /* + * Not going to contemplate SYN|URG + */ + if (IS_FASTOPEN(tp->t_flags)) + tp->t_flags &= ~TF_FASTOPEN; +#endif #ifdef INET6 if (isipv6) error = tcp6_connect(tp, nam, td); @@ -1769,26 +1783,52 @@ #endif #ifdef TCP_RFC7413 - case TCP_FASTOPEN: + case TCP_FASTOPEN: { + struct tcp_fastopen tfo_optval; + INP_WUNLOCK(inp); - if (!V_tcp_fastopen_enabled) + if (!V_tcp_fastopen_client_enable && + !V_tcp_fastopen_server_enable) return (EPERM); - error = sooptcopyin(sopt, &optval, sizeof optval, - sizeof optval); + error = sooptcopyin(sopt, &tfo_optval, + sizeof(tfo_optval), sizeof(int)); if (error) return (error); INP_WLOCK_RECHECK(inp); - if (optval) { - tp->t_flags |= TF_FASTOPEN; - if ((tp->t_state == TCPS_LISTEN) && - (tp->t_tfo_pending == NULL)) - tp->t_tfo_pending = - tcp_fastopen_alloc_counter(); + if (tfo_optval.enable) { + if (tp->t_state == TCPS_LISTEN) { + if (!V_tcp_fastopen_server_enable) { + error = EPERM; + goto unlock_and_done; + } + + tp->t_flags |= TF_FASTOPEN; + if (tp->t_tfo_pending == NULL) + tp->t_tfo_pending = + tcp_fastopen_alloc_counter(); + } else { + /* + * If a pre-shared key was provided, + * stash it in the client cookie + * field of the tcpcb for use during + * connect. + */ + if (sopt->sopt_valsize == + sizeof(tfo_optval)) { + memcpy(tp->t_tfo_cookie.client, + tfo_optval.psk, + TCP_FASTOPEN_PSK_LEN); + tp->t_tfo_client_cookie_len = + TCP_FASTOPEN_PSK_LEN; + } + tp->t_flags |= TF_FASTOPEN; + } } else tp->t_flags &= ~TF_FASTOPEN; goto unlock_and_done; + } #endif default: Index: sys/netinet/tcp_var.h =================================================================== --- sys/netinet/tcp_var.h +++ sys/netinet/tcp_var.h @@ -192,8 +192,12 @@ struct tcp_function_block *t_fb;/* TCP function call block */ void *t_fb_ptr; /* Pointer to t_fb specific data */ #ifdef TCP_RFC7413 - uint64_t t_tfo_cookie; /* TCP Fast Open cookie */ - unsigned int *t_tfo_pending; /* TCP Fast Open pending counter */ + uint8_t t_tfo_client_cookie_len; /* TCP Fast Open client cookie length */ + unsigned int *t_tfo_pending; /* TCP Fast Open server pending counter */ + union { + uint8_t client[TCP_FASTOPEN_MAX_COOKIE_LEN]; + uint64_t server; + } t_tfo_cookie; /* TCP Fast Open cookie to send */ #endif #ifdef TCPPCAP struct mbufq t_inpkts; /* List of saved input packets. */ @@ -365,7 +369,7 @@ u_int32_t to_tsecr; /* reflected timestamp */ u_char *to_sacks; /* pointer to the first SACK blocks */ u_char *to_signature; /* pointer to the TCP-MD5 signature */ - u_char *to_tfo_cookie; /* pointer to the TFO cookie */ + u_int8_t *to_tfo_cookie; /* pointer to the TFO cookie */ u_int16_t to_mss; /* maximum segment size */ u_int8_t to_wscale; /* window scaling */ u_int8_t to_nsacks; /* number of SACK blocks */