Index: share/man/man4/tcp.4 =================================================================== --- share/man/man4/tcp.4 +++ share/man/man4/tcp.4 @@ -34,7 +34,7 @@ .\" From: @(#)tcp.4 8.1 (Berkeley) 6/5/93 .\" $FreeBSD$ .\" -.Dd January 8, 2022 +.Dd February 25, 2022 .Dt TCP 4 .Os .Sh NAME @@ -711,6 +711,23 @@ specific connection. This is needed to help with connection establishment when a broken firewall is in the network path. +.It Va ecn.generalized +Enable sending all segments as ECN capable transport, +including SYN, SYN/ACK, and retransmissions. +This may only be enabled when ECN support itself is also active. +Disabling ECN support will disable this feature automatically. +Settings: +.Bl -tag -compact +.It 0 +Regular RFC3168 operation. +Send only new data segments as ECN capable transport. +(default) +.It 1 +Support generalized ECN (ECN++), and send all segments of an ECN-enabled +session as ECN capable transport. +Also control packets to non-established and non-listening ports are +identically marked, if outgoing sessions would request ECN. +.El .It Va pmtud_blackhole_detection Enable automatic path MTU blackhole detection. In case of retransmits of MSS sized segments, Index: sys/netinet/tcp_ecn.h =================================================================== --- sys/netinet/tcp_ecn.h +++ sys/netinet/tcp_ecn.h @@ -48,7 +48,7 @@ int tcp_ecn_output_established(struct tcpcb *, uint16_t *, int); void tcp_ecn_syncache_socket(struct tcpcb *, struct syncache *); int tcp_ecn_syncache_add(uint16_t, int); -uint16_t tcp_ecn_syncache_respond(uint16_t, struct syncache *); +int tcp_ecn_syncache_respond(uint16_t *, struct syncache *); #endif /* _KERNEL */ Index: sys/netinet/tcp_ecn.c =================================================================== --- sys/netinet/tcp_ecn.c +++ sys/netinet/tcp_ecn.c @@ -111,9 +111,11 @@ { thflags &= (TH_CWR|TH_ECE); - if (((thflags & (TH_CWR | TH_ECE)) == TH_ECE) && + if ((thflags == TH_ECE) && V_tcp_do_ecn) { tp->t_flags2 |= TF2_ECN_PERMIT; + if (V_tcp_ecn_generalized) + tp->t_flags2 |= TF2_ECN_PLUSPLUS; TCPSTAT_INC(tcps_ecn_shs); } } @@ -133,6 +135,8 @@ if ((thflags & (TH_CWR | TH_ECE)) == (TH_CWR | TH_ECE)) { tp->t_flags2 |= TF2_ECN_PERMIT; tp->t_flags2 |= TF2_ECN_SND_ECE; + if (V_tcp_ecn_generalized) + tp->t_flags2 |= TF2_ECN_PLUSPLUS; TCPSTAT_INC(tcps_ecn_shs); } } @@ -214,7 +218,20 @@ */ newdata = (len > 0 && SEQ_GEQ(tp->snd_nxt, tp->snd_max) && !((tp->t_flags & TF_FORCEDATA) && len == 1)); - if (newdata) { + if (newdata || + /* + * Send ECN SYN segments as ECN-capable transport + * when ecn.generalized is set. This can not be + * futher simplified, as a fall-back to non-ECN + * may occur. + */ + ((tp->t_flags2 & TF2_ECN_PLUSPLUS) && + (((*thflags & (TH_SYN|TH_ACK|TH_ECE|TH_CWR)) == + (TH_SYN| TH_ECE|TH_CWR)) || + ((*thflags & (TH_SYN|TH_ACK|TH_ECE|TH_CWR)) == + (TH_SYN|TH_ACK| TH_CWR)) || + ((*thflags & (TH_SYN|TH_ACK|TH_ECE|TH_CWR)) == + (TH_SYN|TH_ACK|TH_ECE ))))) { ipecn = IPTOS_ECN_ECT0; TCPSTAT_INC(tcps_ecn_ect0); } @@ -243,6 +260,8 @@ switch (sc->sc_flags & SCF_ECN_MASK) { case SCF_ECN: tp->t_flags2 |= TF2_ECN_PERMIT; + if (V_tcp_ecn_generalized) + tp->t_flags2 |= TF2_ECN_PLUSPLUS; break; /* undefined SCF codepoint */ default: @@ -278,20 +297,25 @@ * Set up the ECN information for the from * syncache information. */ -uint16_t -tcp_ecn_syncache_respond(uint16_t thflags, struct syncache *sc) +int +tcp_ecn_syncache_respond(uint16_t *thflags, struct syncache *sc) { - if ((thflags & TH_SYN) && + int ipecn = IPTOS_ECN_NOTECT; + + if ((*thflags & TH_SYN) && (sc->sc_flags & SCF_ECN_MASK)) { switch (sc->sc_flags & SCF_ECN_MASK) { case SCF_ECN: - thflags |= (0 | TH_ECE); + *thflags |= (0 | TH_ECE); TCPSTAT_INC(tcps_ecn_shs); + if ((V_tcp_ecn_generalized && + (*thflags & TH_ACK))) + ipecn = IPTOS_ECN_ECT0; break; /* undefined SCF codepoint */ default: break; } } - return thflags; + return ipecn; } Index: sys/netinet/tcp_input.c =================================================================== --- sys/netinet/tcp_input.c +++ sys/netinet/tcp_input.c @@ -211,8 +211,10 @@ "TCP ECN"); VNET_DEFINE(int, tcp_do_ecn) = 2; -SYSCTL_INT(_net_inet_tcp_ecn, OID_AUTO, enable, CTLFLAG_VNET | CTLFLAG_RW, - &VNET_NAME(tcp_do_ecn), 0, +static int sysctl_net_inet_tcp_ecn_enable_check(SYSCTL_HANDLER_ARGS); +SYSCTL_PROC(_net_inet_tcp_ecn, OID_AUTO, enable, + CTLFLAG_VNET | CTLFLAG_RW | CTLTYPE_UINT | CTLFLAG_NEEDGIANT, + &VNET_NAME(tcp_do_ecn), 0, &sysctl_net_inet_tcp_ecn_enable_check, "IU", "TCP ECN support"); VNET_DEFINE(int, tcp_ecn_maxretries) = 1; @@ -220,6 +222,13 @@ &VNET_NAME(tcp_ecn_maxretries), 0, "Max retries before giving up on ECN"); +VNET_DEFINE(int, tcp_ecn_generalized) = 0; +static int sysctl_net_inet_tcp_ecn_generalized_check(SYSCTL_HANDLER_ARGS); +SYSCTL_PROC(_net_inet_tcp_ecn, OID_AUTO, generalized, + CTLFLAG_VNET | CTLFLAG_RW | CTLTYPE_UINT | CTLFLAG_NEEDGIANT, + &VNET_NAME(tcp_ecn_generalized), 0, &sysctl_net_inet_tcp_ecn_generalized_check, "IU", + "Send all packets as ECT"); + VNET_DEFINE(int, tcp_insecure_syn) = 0; SYSCTL_INT(_net_inet_tcp, OID_AUTO, insecure_syn, CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(tcp_insecure_syn), 0, @@ -4071,3 +4080,45 @@ return (4 * maxseg); } } + +static int +sysctl_net_inet_tcp_ecn_enable_check(SYSCTL_HANDLER_ARGS) +{ + uint32_t new; + int error; + + new = V_tcp_do_ecn; + error = sysctl_handle_int(oidp, &new, 0, req); + if (error == 0 && req->newptr != NULL) { + if (new > 2) + error = EINVAL; + else { + V_tcp_do_ecn = new; + if (new == 0) + V_tcp_ecn_generalized = new; + } + } + + return (error); +} + +static int +sysctl_net_inet_tcp_ecn_generalized_check(SYSCTL_HANDLER_ARGS) +{ + uint32_t new; + int error; + + new = V_tcp_ecn_generalized; + error = sysctl_handle_int(oidp, &new, 0, req); + if (error == 0 && req->newptr != NULL) { + if (new > 1) + error = EINVAL; + else + if (!V_tcp_do_ecn && new == 1) + error = EINVAL; + else + V_tcp_ecn_generalized = new; + } + + return (error); +} Index: sys/netinet/tcp_output.c =================================================================== --- sys/netinet/tcp_output.c +++ sys/netinet/tcp_output.c @@ -1204,8 +1204,9 @@ flags |= tcp_ecn_output_syn_sent(tp); } /* Also handle parallel SYN for ECN */ - if ((TCPS_HAVERCVDSYN(tp->t_state)) && - (tp->t_flags2 & TF2_ECN_PERMIT)) { + if ((tp->t_flags2 & TF2_ECN_PLUSPLUS) || + (TCPS_HAVERCVDSYN(tp->t_state) && + (tp->t_flags2 & TF2_ECN_PERMIT))) { int ect = tcp_ecn_output_established(tp, &flags, len); if ((tp->t_state == TCPS_SYN_RECEIVED) && (tp->t_flags2 & TF2_ECN_SND_ECE)) Index: sys/netinet/tcp_stacks/rack.c =================================================================== --- sys/netinet/tcp_stacks/rack.c +++ sys/netinet/tcp_stacks/rack.c @@ -18276,8 +18276,9 @@ flags |= tcp_ecn_output_syn_sent(tp); } /* Also handle parallel SYN for ECN */ - if (TCPS_HAVERCVDSYN(tp->t_state) && - (tp->t_flags2 & TF2_ECN_PERMIT)) { + if ((tp->t_flags2 & TF2_ECN_PLUSPLUS) || + (TCPS_HAVERCVDSYN(tp->t_state) && + (tp->t_flags2 & TF2_ECN_PERMIT))) { int ect = tcp_ecn_output_established(tp, &flags, len); if ((tp->t_state == TCPS_SYN_RECEIVED) && (tp->t_flags2 & TF2_ECN_SND_ECE)) Index: sys/netinet/tcp_subr.c =================================================================== --- sys/netinet/tcp_subr.c +++ sys/netinet/tcp_subr.c @@ -2033,6 +2033,26 @@ } #endif + /* + * Send out control packets with same IP ECN header + * bits, as when an established or listening socket + * would exist. + */ + if (V_tcp_ecn_generalized && ((V_tcp_do_ecn == 1) || + ((tp != NULL) && (tp->t_flags2 & TF2_ECN_PERMIT)))) { +#ifdef INET6 + if (isipv6) + ip6->ip6_flow |= htonl(IPTOS_ECN_ECT0 << 20); +#endif /* INET6 */ +#if defined(INET6) && defined(INET) + else +#endif +#ifdef INET + ip->ip_tos |= IPTOS_ECN_ECT0; +#endif /* INET */ + } + + m->m_pkthdr.csum_data = offsetof(struct tcphdr, th_sum); #ifdef INET6 if (isipv6) { if (port) { Index: sys/netinet/tcp_syncache.c =================================================================== --- sys/netinet/tcp_syncache.c +++ sys/netinet/tcp_syncache.c @@ -132,7 +132,7 @@ static void syncache_drop(struct syncache *, struct syncache_head *); static void syncache_free(struct syncache *); static void syncache_insert(struct syncache *, struct syncache_head *); -static int syncache_respond(struct syncache *, const struct mbuf *, int); +static int syncache_respond(struct syncache *, const struct mbuf *, uint16_t); static struct socket *syncache_socket(struct syncache *, struct socket *, struct mbuf *m); static void syncache_timeout(struct syncache *sc, struct syncache_head *sch, @@ -1816,14 +1816,14 @@ * i.e. m0 != NULL, or upon 3WHS ACK timeout, i.e. m0 == NULL. */ static int -syncache_respond(struct syncache *sc, const struct mbuf *m0, int flags) +syncache_respond(struct syncache *sc, const struct mbuf *m0, uint16_t flags) { struct ip *ip = NULL; struct mbuf *m; struct tcphdr *th = NULL; struct udphdr *udp = NULL; int optlen, error = 0; /* Make compiler happy */ - u_int16_t hlen, tlen, mssopt, ulen; + uint16_t hlen, tlen, mssopt, ulen; struct tcpopt to; #ifdef INET6 struct ip6_hdr *ip6 = NULL; @@ -1940,7 +1940,17 @@ th->th_win = htons(sc->sc_wnd); th->th_urp = 0; - flags = tcp_ecn_syncache_respond(flags, sc); + int ect = tcp_ecn_syncache_respond(&flags, sc); +#ifdef INET6 + if (sc->sc_inc.inc_flags & INC_ISIPV6) + ip6->ip6_flow |= htonl(ect << 20); +#endif +#if defined(INET6) && defined(INET) + else +#endif +#ifdef INET + ip->ip_tos |= ect; +#endif tcp_set_flags(th, flags); /* Tack on the TCP options. */ Index: sys/netinet/tcp_usrreq.c =================================================================== --- sys/netinet/tcp_usrreq.c +++ sys/netinet/tcp_usrreq.c @@ -3068,6 +3068,10 @@ db_printf("%sTF2_ACE_PERMIT", comma ? ", " : ""); comma = 1; } + if (t_flags2 & TF2_ECN_PLUSPLUS) { + db_printf("%sTF2_ECN_PLUSPLUS", comma ? ", " : ""); + comma = 1; + } if (t_flags2 & TF2_FBYTES_COMPLETE) { db_printf("%sTF2_FBYTES_COMPLETE", comma ? ", " : ""); comma = 1; Index: sys/netinet/tcp_var.h =================================================================== --- sys/netinet/tcp_var.h +++ sys/netinet/tcp_var.h @@ -569,6 +569,7 @@ #define TF2_ECN_SND_CWR 0x00000040 /* ECN CWR in queue */ #define TF2_ECN_SND_ECE 0x00000080 /* ECN ECE in queue */ #define TF2_ACE_PERMIT 0x00000100 /* Accurate ECN mode */ +#define TF2_ECN_PLUSPLUS 0x00000200 /* ECN++ session */ #define TF2_FBYTES_COMPLETE 0x00000400 /* We have first bytes in and out */ /* * Structure to hold TCP options that are only used during segment @@ -1004,6 +1005,7 @@ VNET_DECLARE(int, tcp_do_sack); VNET_DECLARE(int, tcp_do_tso); VNET_DECLARE(int, tcp_ecn_maxretries); +VNET_DECLARE(int, tcp_ecn_generalized); VNET_DECLARE(int, tcp_initcwnd_segments); VNET_DECLARE(int, tcp_insecure_rst); VNET_DECLARE(int, tcp_insecure_syn); @@ -1050,6 +1052,7 @@ #define V_tcp_do_sack VNET(tcp_do_sack) #define V_tcp_do_tso VNET(tcp_do_tso) #define V_tcp_ecn_maxretries VNET(tcp_ecn_maxretries) +#define V_tcp_ecn_generalized VNET(tcp_ecn_generalized) #define V_tcp_initcwnd_segments VNET(tcp_initcwnd_segments) #define V_tcp_insecure_rst VNET(tcp_insecure_rst) #define V_tcp_insecure_syn VNET(tcp_insecure_syn)