Index: share/man/man4/tcp.4 =================================================================== --- share/man/man4/tcp.4 +++ share/man/man4/tcp.4 @@ -34,7 +34,7 @@ .\" From: @(#)tcp.4 8.1 (Berkeley) 6/5/93 .\" $FreeBSD$ .\" -.Dd February 13, 2021 +.Dd February 25, 2021 .Dt TCP 4 .Os .Sh NAME @@ -644,6 +644,23 @@ specific connection. This is needed to help with connection establishment when a broken firewall is in the network path. +.It Va ecn.generalized +Enable sending all segments as ECN capable transport, +including SYN, SYN/ACK, and retransmissions. +This may only be enabled, when ECN support itself is also active. +Disabling ECN support will disable this feature automatically. +Settings: +.Bl -tag -compact +.It 0 +Regular RFC3168 operation. +Send only new data segements as ECN capable transport. +(default) +.It 1 +Support generalized ECN (ECN++), and send all segments of an ECN-enabled +session as ECN capable transport. +Also control packets to non-established and non-listening ports are +identially marked. +.El .It Va pmtud_blackhole_detection Enable automatic path MTU blackhole detection. In case of retransmits of MSS sized segments, Index: sys/netinet/tcp_input.c =================================================================== --- sys/netinet/tcp_input.c +++ sys/netinet/tcp_input.c @@ -203,8 +203,10 @@ "TCP ECN"); VNET_DEFINE(int, tcp_do_ecn) = 2; -SYSCTL_INT(_net_inet_tcp_ecn, OID_AUTO, enable, CTLFLAG_VNET | CTLFLAG_RW, - &VNET_NAME(tcp_do_ecn), 0, +static int sysctl_net_inet_tcp_ecn_enable_check(SYSCTL_HANDLER_ARGS); +SYSCTL_PROC(_net_inet_tcp_ecn, OID_AUTO, enable, + CTLFLAG_VNET | CTLFLAG_RW | CTLTYPE_UINT | CTLFLAG_NEEDGIANT, + &VNET_NAME(tcp_do_ecn), 0, &sysctl_net_inet_tcp_ecn_enable_check, "IU", "TCP ECN support"); VNET_DEFINE(int, tcp_ecn_maxretries) = 1; @@ -212,6 +214,13 @@ &VNET_NAME(tcp_ecn_maxretries), 0, "Max retries before giving up on ECN"); +VNET_DEFINE(int, tcp_ecn_generalized) = 0; +static int sysctl_net_inet_tcp_ecn_generalized_check(SYSCTL_HANDLER_ARGS); +SYSCTL_PROC(_net_inet_tcp_ecn, OID_AUTO, generalized, + CTLFLAG_VNET | CTLFLAG_RW | CTLTYPE_UINT | CTLFLAG_NEEDGIANT, + &VNET_NAME(tcp_ecn_generalized), 0, &sysctl_net_inet_tcp_ecn_generalized_check, "IU", + "Send all packets as ECT"); + VNET_DEFINE(int, tcp_insecure_syn) = 0; SYSCTL_INT(_net_inet_tcp, OID_AUTO, insecure_syn, CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(tcp_insecure_syn), 0, @@ -4065,3 +4074,45 @@ return (4 * maxseg); } } + +static int +sysctl_net_inet_tcp_ecn_enable_check(SYSCTL_HANDLER_ARGS) +{ + uint32_t new; + int error; + + new = V_tcp_do_ecn; + error = sysctl_handle_int(oidp, &new, 0, req); + if (error == 0 && req->newptr != NULL) { + if (new > 2) + error = EINVAL; + else { + V_tcp_do_ecn = new; + if (new == 0) + V_tcp_ecn_generalized = new; + } + } + + return (error); +} + +static int +sysctl_net_inet_tcp_ecn_generalized_check(SYSCTL_HANDLER_ARGS) +{ + uint32_t new; + int error; + + new = V_tcp_ecn_generalized; + error = sysctl_handle_int(oidp, &new, 0, req); + if (error == 0 && req->newptr != NULL) { + if (new > 1) + error = EINVAL; + else + if (!V_tcp_do_ecn && new == 1) + error = EINVAL; + else + V_tcp_ecn_generalized = new; + } + + return (error); +} Index: sys/netinet/tcp_output.c =================================================================== --- sys/netinet/tcp_output.c +++ sys/netinet/tcp_output.c @@ -1181,17 +1181,32 @@ tp->t_flags2 &= ~TF2_ECN_SND_ECE; } - if (tp->t_state == TCPS_ESTABLISHED && - (tp->t_flags2 & TF2_ECN_PERMIT)) { + if (((tp->t_state == TCPS_ESTABLISHED) && + (tp->t_flags2 & TF2_ECN_PERMIT)) || + ((tp->t_state > TCPS_ESTABLISHED) && + (tp->t_flags2 & TF2_ECN_PERMIT) && + V_tcp_ecn_generalized) || + /* + * Note that a passive open SYN,ACK + * is actually sent from tcp_syncache + */ + (((flags & (TH_SYN|TH_ACK)) == (TH_SYN)) && + ((flags & (TH_ECE|TH_CWR)) == (TH_ECE|TH_CWR)) && + V_tcp_ecn_generalized) || + (((flags & (TH_SYN|TH_ACK)) == (TH_SYN|TH_ACK)) && + (flags & (TH_CWR|TH_ECE)) && + V_tcp_ecn_generalized)) { /* - * If the peer has ECN, mark data packets with - * ECN capable transmission (ECT). - * Ignore pure ack packets, retransmissions and window probes. + * If the peer has ECN, mark new data packets + * with ECN capable transmission (ECT). + * Ignore pure ack packets, retransmissions and + * window probes unless doing generalized ECN. */ - if (len > 0 && SEQ_GEQ(tp->snd_nxt, tp->snd_max) && - (sack_rxmit == 0) && + if (V_tcp_ecn_generalized || + (len > 0 && (sack_rxmit == 0) && + SEQ_GEQ(tp->snd_nxt, tp->snd_max) && !((tp->t_flags & TF_FORCEDATA) && len == 1 && - SEQ_LT(tp->snd_una, tp->snd_max))) { + SEQ_LT(tp->snd_una, tp->snd_max)))) { #ifdef INET6 if (isipv6) ip6->ip6_flow |= htonl(IPTOS_ECN_ECT0 << 20); @@ -1203,7 +1218,11 @@ * Reply with proper ECN notifications. * Only set CWR on new data segments. */ - if (tp->t_flags2 & TF2_ECN_SND_CWR) { + if (tp->t_flags2 & TF2_ECN_SND_CWR && + (len > 0 && (sack_rxmit == 0) && + SEQ_GEQ(tp->snd_nxt, tp->snd_max) && + !((tp->t_flags & TF_FORCEDATA) && len == 1 && + SEQ_LT(tp->snd_una, tp->snd_max)))) { flags |= TH_CWR; tp->t_flags2 &= ~TF2_ECN_SND_CWR; } Index: sys/netinet/tcp_stacks/rack.c =================================================================== --- sys/netinet/tcp_stacks/rack.c +++ sys/netinet/tcp_stacks/rack.c @@ -13435,15 +13435,29 @@ flags |= TH_ECE; tp->t_flags2 &= ~TF2_ECN_SND_ECE; } - if (tp->t_state == TCPS_ESTABLISHED && - (tp->t_flags2 & TF2_ECN_PERMIT)) { + if ((tp->t_state == TCPS_ESTABLISHED && + (tp->t_flags2 & TF2_ECN_PERMIT)) || + ((tp->t_state > TCPS_ESTABLISHED) && + (tp->t_flags2 & TF2_ECN_PERMIT) && + V_tcp_ecn_generalized) || + /* + * Note that a passive open SYN,ACK + * is actually sent from tcp_syncache + */ + (((flags & (TH_SYN|TH_ACK)) == (TH_SYN)) && + ((flags & (TH_ECE|TH_CWR)) == (TH_ECE|TH_CWR)) && + V_tcp_ecn_generalized) || + (((flags & (TH_SYN|TH_ACK)) == (TH_SYN|TH_ACK)) && + (flags & (TH_CWR|TH_ECE)) && + V_tcp_ecn_generalized)) { /* * If the peer has ECN, mark data packets with ECN capable * transmission (ECT). Ignore pure ack packets, - * retransmissions. + * retransmissions unless doing generalized ECN. */ - if (len > 0 && SEQ_GEQ(tp->snd_nxt, tp->snd_max) && - (sack_rxmit == 0)) { + if (V_tcp_ecn_generalized || + ((len > 0 && SEQ_GEQ(tp->snd_nxt, tp->snd_max) && + (sack_rxmit == 0)))) { #ifdef INET6 if (isipv6) ip6->ip6_flow |= htonl(IPTOS_ECN_ECT0 << 20); @@ -13455,7 +13469,9 @@ * Reply with proper ECN notifications. * Only set CWR on new data segments. */ - if (tp->t_flags2 & TF2_ECN_SND_CWR) { + if ((tp->t_flags2 & TF2_ECN_SND_CWR) && + (len > 0 && SEQ_GEQ(tp->snd_nxt, tp->snd_max) && + (sack_rxmit == 0))) { flags |= TH_CWR; tp->t_flags2 &= ~TF2_ECN_SND_CWR; } Index: sys/netinet/tcp_subr.c =================================================================== --- sys/netinet/tcp_subr.c +++ sys/netinet/tcp_subr.c @@ -1643,6 +1643,24 @@ } #endif + /* + * Send out control packets with same IP ECN header + * bits, as when an established or listening socket + * would exist. + */ + if ((V_tcp_do_ecn == 1) && V_tcp_ecn_generalized) { +#ifdef INET6 + if (isipv6) + ip6->ip6_flow |= htonl(IPTOS_ECN_ECT0 << 20); +#endif /* INET6 */ +#if defined(INET6) && defined(INET) + else +#endif +#ifdef INET + ip->ip_tos |= IPTOS_ECN_ECT0; +#endif /* INET */ + } + m->m_pkthdr.csum_data = offsetof(struct tcphdr, th_sum); #ifdef INET6 if (isipv6) { Index: sys/netinet/tcp_syncache.c =================================================================== --- sys/netinet/tcp_syncache.c +++ sys/netinet/tcp_syncache.c @@ -1894,6 +1894,21 @@ if ((flags & TH_SYN) && (sc->sc_flags & SCF_ECN)) { th->th_flags |= TH_ECE; TCPSTAT_INC(tcps_ecn_shs); + + if ((V_tcp_ecn_generalized && + (flags & TH_ACK))) { +#ifdef INET6 + if (sc->sc_inc.inc_flags & INC_ISIPV6) + ip6->ip6_flow |= htonl(IPTOS_ECN_ECT0 << 20); +#endif +#if defined(INET6) && defined(INET) + else +#endif +#ifdef INET + ip->ip_tos |= IPTOS_ECN_ECT0; +#endif + TCPSTAT_INC(tcps_ecn_ect0); + } } /* Tack on the TCP options. */ Index: sys/netinet/tcp_var.h =================================================================== --- sys/netinet/tcp_var.h +++ sys/netinet/tcp_var.h @@ -848,6 +848,7 @@ VNET_DECLARE(int, tcp_do_sack); VNET_DECLARE(int, tcp_do_tso); VNET_DECLARE(int, tcp_ecn_maxretries); +VNET_DECLARE(int, tcp_ecn_generalized); VNET_DECLARE(int, tcp_initcwnd_segments); VNET_DECLARE(int, tcp_insecure_rst); VNET_DECLARE(int, tcp_insecure_syn); @@ -893,6 +894,7 @@ #define V_tcp_do_sack VNET(tcp_do_sack) #define V_tcp_do_tso VNET(tcp_do_tso) #define V_tcp_ecn_maxretries VNET(tcp_ecn_maxretries) +#define V_tcp_ecn_generalized VNET(tcp_ecn_generalized) #define V_tcp_initcwnd_segments VNET(tcp_initcwnd_segments) #define V_tcp_insecure_rst VNET(tcp_insecure_rst) #define V_tcp_insecure_syn VNET(tcp_insecure_syn)