Page MenuHomeFreeBSD

D23230.id103223.diff
No OneTemporary

D23230.id103223.diff

Index: share/man/man4/tcp.4
===================================================================
--- share/man/man4/tcp.4
+++ share/man/man4/tcp.4
@@ -34,7 +34,7 @@
.\" From: @(#)tcp.4 8.1 (Berkeley) 6/5/93
.\" $FreeBSD$
.\"
-.Dd January 8, 2022
+.Dd February 25, 2022
.Dt TCP 4
.Os
.Sh NAME
@@ -711,6 +711,23 @@
specific connection.
This is needed to help with connection establishment
when a broken firewall is in the network path.
+.It Va ecn.generalized
+Enable sending all segments as ECN capable transport,
+including SYN, SYN/ACK, and retransmissions.
+This may only be enabled when ECN support itself is also active.
+Disabling ECN support will disable this feature automatically.
+Settings:
+.Bl -tag -compact
+.It 0
+Regular RFC3168 operation.
+Send only new data segments as ECN capable transport.
+(default)
+.It 1
+Support generalized ECN (ECN++), and send all segments of an ECN-enabled
+session as ECN capable transport.
+Also control packets to non-established and non-listening ports are
+identically marked, if outgoing sessions would request ECN.
+.El
.It Va pmtud_blackhole_detection
Enable automatic path MTU blackhole detection.
In case of retransmits of MSS sized segments,
Index: sys/netinet/tcp_ecn.h
===================================================================
--- sys/netinet/tcp_ecn.h
+++ sys/netinet/tcp_ecn.h
@@ -48,7 +48,7 @@
int tcp_ecn_output_established(struct tcpcb *, uint16_t *, int);
void tcp_ecn_syncache_socket(struct tcpcb *, struct syncache *);
int tcp_ecn_syncache_add(uint16_t, int);
-uint16_t tcp_ecn_syncache_respond(uint16_t, struct syncache *);
+int tcp_ecn_syncache_respond(uint16_t *, struct syncache *);
#endif /* _KERNEL */
Index: sys/netinet/tcp_ecn.c
===================================================================
--- sys/netinet/tcp_ecn.c
+++ sys/netinet/tcp_ecn.c
@@ -111,9 +111,11 @@
{
thflags &= (TH_CWR|TH_ECE);
- if (((thflags & (TH_CWR | TH_ECE)) == TH_ECE) &&
+ if ((thflags == TH_ECE) &&
V_tcp_do_ecn) {
tp->t_flags2 |= TF2_ECN_PERMIT;
+ if (V_tcp_ecn_generalized)
+ tp->t_flags2 |= TF2_ECN_PLUSPLUS;
TCPSTAT_INC(tcps_ecn_shs);
}
}
@@ -133,6 +135,8 @@
if ((thflags & (TH_CWR | TH_ECE)) == (TH_CWR | TH_ECE)) {
tp->t_flags2 |= TF2_ECN_PERMIT;
tp->t_flags2 |= TF2_ECN_SND_ECE;
+ if (V_tcp_ecn_generalized)
+ tp->t_flags2 |= TF2_ECN_PLUSPLUS;
TCPSTAT_INC(tcps_ecn_shs);
}
}
@@ -214,7 +218,20 @@
*/
newdata = (len > 0 && SEQ_GEQ(tp->snd_nxt, tp->snd_max) &&
!((tp->t_flags & TF_FORCEDATA) && len == 1));
- if (newdata) {
+ if (newdata ||
+ /*
+ * Send ECN SYN segments as ECN-capable transport
+ * when ecn.generalized is set. This can not be
+ * futher simplified, as a fall-back to non-ECN
+ * may occur.
+ */
+ ((tp->t_flags2 & TF2_ECN_PLUSPLUS) &&
+ (((*thflags & (TH_SYN|TH_ACK|TH_ECE|TH_CWR)) ==
+ (TH_SYN| TH_ECE|TH_CWR)) ||
+ ((*thflags & (TH_SYN|TH_ACK|TH_ECE|TH_CWR)) ==
+ (TH_SYN|TH_ACK| TH_CWR)) ||
+ ((*thflags & (TH_SYN|TH_ACK|TH_ECE|TH_CWR)) ==
+ (TH_SYN|TH_ACK|TH_ECE ))))) {
ipecn = IPTOS_ECN_ECT0;
TCPSTAT_INC(tcps_ecn_ect0);
}
@@ -243,6 +260,8 @@
switch (sc->sc_flags & SCF_ECN_MASK) {
case SCF_ECN:
tp->t_flags2 |= TF2_ECN_PERMIT;
+ if (V_tcp_ecn_generalized)
+ tp->t_flags2 |= TF2_ECN_PLUSPLUS;
break;
/* undefined SCF codepoint */
default:
@@ -278,20 +297,25 @@
* Set up the ECN information for the <SYN,ACK> from
* syncache information.
*/
-uint16_t
-tcp_ecn_syncache_respond(uint16_t thflags, struct syncache *sc)
+int
+tcp_ecn_syncache_respond(uint16_t *thflags, struct syncache *sc)
{
- if ((thflags & TH_SYN) &&
+ int ipecn = IPTOS_ECN_NOTECT;
+
+ if ((*thflags & TH_SYN) &&
(sc->sc_flags & SCF_ECN_MASK)) {
switch (sc->sc_flags & SCF_ECN_MASK) {
case SCF_ECN:
- thflags |= (0 | TH_ECE);
+ *thflags |= (0 | TH_ECE);
TCPSTAT_INC(tcps_ecn_shs);
+ if ((V_tcp_ecn_generalized &&
+ (*thflags & TH_ACK)))
+ ipecn = IPTOS_ECN_ECT0;
break;
/* undefined SCF codepoint */
default:
break;
}
}
- return thflags;
+ return ipecn;
}
Index: sys/netinet/tcp_input.c
===================================================================
--- sys/netinet/tcp_input.c
+++ sys/netinet/tcp_input.c
@@ -211,8 +211,10 @@
"TCP ECN");
VNET_DEFINE(int, tcp_do_ecn) = 2;
-SYSCTL_INT(_net_inet_tcp_ecn, OID_AUTO, enable, CTLFLAG_VNET | CTLFLAG_RW,
- &VNET_NAME(tcp_do_ecn), 0,
+static int sysctl_net_inet_tcp_ecn_enable_check(SYSCTL_HANDLER_ARGS);
+SYSCTL_PROC(_net_inet_tcp_ecn, OID_AUTO, enable,
+ CTLFLAG_VNET | CTLFLAG_RW | CTLTYPE_UINT | CTLFLAG_NEEDGIANT,
+ &VNET_NAME(tcp_do_ecn), 0, &sysctl_net_inet_tcp_ecn_enable_check, "IU",
"TCP ECN support");
VNET_DEFINE(int, tcp_ecn_maxretries) = 1;
@@ -220,6 +222,13 @@
&VNET_NAME(tcp_ecn_maxretries), 0,
"Max retries before giving up on ECN");
+VNET_DEFINE(int, tcp_ecn_generalized) = 0;
+static int sysctl_net_inet_tcp_ecn_generalized_check(SYSCTL_HANDLER_ARGS);
+SYSCTL_PROC(_net_inet_tcp_ecn, OID_AUTO, generalized,
+ CTLFLAG_VNET | CTLFLAG_RW | CTLTYPE_UINT | CTLFLAG_NEEDGIANT,
+ &VNET_NAME(tcp_ecn_generalized), 0, &sysctl_net_inet_tcp_ecn_generalized_check, "IU",
+ "Send all packets as ECT");
+
VNET_DEFINE(int, tcp_insecure_syn) = 0;
SYSCTL_INT(_net_inet_tcp, OID_AUTO, insecure_syn, CTLFLAG_VNET | CTLFLAG_RW,
&VNET_NAME(tcp_insecure_syn), 0,
@@ -4071,3 +4080,45 @@
return (4 * maxseg);
}
}
+
+static int
+sysctl_net_inet_tcp_ecn_enable_check(SYSCTL_HANDLER_ARGS)
+{
+ uint32_t new;
+ int error;
+
+ new = V_tcp_do_ecn;
+ error = sysctl_handle_int(oidp, &new, 0, req);
+ if (error == 0 && req->newptr != NULL) {
+ if (new > 2)
+ error = EINVAL;
+ else {
+ V_tcp_do_ecn = new;
+ if (new == 0)
+ V_tcp_ecn_generalized = new;
+ }
+ }
+
+ return (error);
+}
+
+static int
+sysctl_net_inet_tcp_ecn_generalized_check(SYSCTL_HANDLER_ARGS)
+{
+ uint32_t new;
+ int error;
+
+ new = V_tcp_ecn_generalized;
+ error = sysctl_handle_int(oidp, &new, 0, req);
+ if (error == 0 && req->newptr != NULL) {
+ if (new > 1)
+ error = EINVAL;
+ else
+ if (!V_tcp_do_ecn && new == 1)
+ error = EINVAL;
+ else
+ V_tcp_ecn_generalized = new;
+ }
+
+ return (error);
+}
Index: sys/netinet/tcp_output.c
===================================================================
--- sys/netinet/tcp_output.c
+++ sys/netinet/tcp_output.c
@@ -1204,8 +1204,9 @@
flags |= tcp_ecn_output_syn_sent(tp);
}
/* Also handle parallel SYN for ECN */
- if ((TCPS_HAVERCVDSYN(tp->t_state)) &&
- (tp->t_flags2 & TF2_ECN_PERMIT)) {
+ if ((tp->t_flags2 & TF2_ECN_PLUSPLUS) ||
+ (TCPS_HAVERCVDSYN(tp->t_state) &&
+ (tp->t_flags2 & TF2_ECN_PERMIT))) {
int ect = tcp_ecn_output_established(tp, &flags, len);
if ((tp->t_state == TCPS_SYN_RECEIVED) &&
(tp->t_flags2 & TF2_ECN_SND_ECE))
Index: sys/netinet/tcp_stacks/rack.c
===================================================================
--- sys/netinet/tcp_stacks/rack.c
+++ sys/netinet/tcp_stacks/rack.c
@@ -18276,8 +18276,9 @@
flags |= tcp_ecn_output_syn_sent(tp);
}
/* Also handle parallel SYN for ECN */
- if (TCPS_HAVERCVDSYN(tp->t_state) &&
- (tp->t_flags2 & TF2_ECN_PERMIT)) {
+ if ((tp->t_flags2 & TF2_ECN_PLUSPLUS) ||
+ (TCPS_HAVERCVDSYN(tp->t_state) &&
+ (tp->t_flags2 & TF2_ECN_PERMIT))) {
int ect = tcp_ecn_output_established(tp, &flags, len);
if ((tp->t_state == TCPS_SYN_RECEIVED) &&
(tp->t_flags2 & TF2_ECN_SND_ECE))
Index: sys/netinet/tcp_subr.c
===================================================================
--- sys/netinet/tcp_subr.c
+++ sys/netinet/tcp_subr.c
@@ -2033,6 +2033,26 @@
}
#endif
+ /*
+ * Send out control packets with same IP ECN header
+ * bits, as when an established or listening socket
+ * would exist.
+ */
+ if (V_tcp_ecn_generalized && ((V_tcp_do_ecn == 1) ||
+ ((tp != NULL) && (tp->t_flags2 & TF2_ECN_PERMIT)))) {
+#ifdef INET6
+ if (isipv6)
+ ip6->ip6_flow |= htonl(IPTOS_ECN_ECT0 << 20);
+#endif /* INET6 */
+#if defined(INET6) && defined(INET)
+ else
+#endif
+#ifdef INET
+ ip->ip_tos |= IPTOS_ECN_ECT0;
+#endif /* INET */
+ }
+
+ m->m_pkthdr.csum_data = offsetof(struct tcphdr, th_sum);
#ifdef INET6
if (isipv6) {
if (port) {
Index: sys/netinet/tcp_syncache.c
===================================================================
--- sys/netinet/tcp_syncache.c
+++ sys/netinet/tcp_syncache.c
@@ -132,7 +132,7 @@
static void syncache_drop(struct syncache *, struct syncache_head *);
static void syncache_free(struct syncache *);
static void syncache_insert(struct syncache *, struct syncache_head *);
-static int syncache_respond(struct syncache *, const struct mbuf *, int);
+static int syncache_respond(struct syncache *, const struct mbuf *, uint16_t);
static struct socket *syncache_socket(struct syncache *, struct socket *,
struct mbuf *m);
static void syncache_timeout(struct syncache *sc, struct syncache_head *sch,
@@ -1816,14 +1816,14 @@
* i.e. m0 != NULL, or upon 3WHS ACK timeout, i.e. m0 == NULL.
*/
static int
-syncache_respond(struct syncache *sc, const struct mbuf *m0, int flags)
+syncache_respond(struct syncache *sc, const struct mbuf *m0, uint16_t flags)
{
struct ip *ip = NULL;
struct mbuf *m;
struct tcphdr *th = NULL;
struct udphdr *udp = NULL;
int optlen, error = 0; /* Make compiler happy */
- u_int16_t hlen, tlen, mssopt, ulen;
+ uint16_t hlen, tlen, mssopt, ulen;
struct tcpopt to;
#ifdef INET6
struct ip6_hdr *ip6 = NULL;
@@ -1940,7 +1940,17 @@
th->th_win = htons(sc->sc_wnd);
th->th_urp = 0;
- flags = tcp_ecn_syncache_respond(flags, sc);
+ int ect = tcp_ecn_syncache_respond(&flags, sc);
+#ifdef INET6
+ if (sc->sc_inc.inc_flags & INC_ISIPV6)
+ ip6->ip6_flow |= htonl(ect << 20);
+#endif
+#if defined(INET6) && defined(INET)
+ else
+#endif
+#ifdef INET
+ ip->ip_tos |= ect;
+#endif
tcp_set_flags(th, flags);
/* Tack on the TCP options. */
Index: sys/netinet/tcp_usrreq.c
===================================================================
--- sys/netinet/tcp_usrreq.c
+++ sys/netinet/tcp_usrreq.c
@@ -3068,6 +3068,10 @@
db_printf("%sTF2_ACE_PERMIT", comma ? ", " : "");
comma = 1;
}
+ if (t_flags2 & TF2_ECN_PLUSPLUS) {
+ db_printf("%sTF2_ECN_PLUSPLUS", comma ? ", " : "");
+ comma = 1;
+ }
if (t_flags2 & TF2_FBYTES_COMPLETE) {
db_printf("%sTF2_FBYTES_COMPLETE", comma ? ", " : "");
comma = 1;
Index: sys/netinet/tcp_var.h
===================================================================
--- sys/netinet/tcp_var.h
+++ sys/netinet/tcp_var.h
@@ -569,6 +569,7 @@
#define TF2_ECN_SND_CWR 0x00000040 /* ECN CWR in queue */
#define TF2_ECN_SND_ECE 0x00000080 /* ECN ECE in queue */
#define TF2_ACE_PERMIT 0x00000100 /* Accurate ECN mode */
+#define TF2_ECN_PLUSPLUS 0x00000200 /* ECN++ session */
#define TF2_FBYTES_COMPLETE 0x00000400 /* We have first bytes in and out */
/*
* Structure to hold TCP options that are only used during segment
@@ -1004,6 +1005,7 @@
VNET_DECLARE(int, tcp_do_sack);
VNET_DECLARE(int, tcp_do_tso);
VNET_DECLARE(int, tcp_ecn_maxretries);
+VNET_DECLARE(int, tcp_ecn_generalized);
VNET_DECLARE(int, tcp_initcwnd_segments);
VNET_DECLARE(int, tcp_insecure_rst);
VNET_DECLARE(int, tcp_insecure_syn);
@@ -1050,6 +1052,7 @@
#define V_tcp_do_sack VNET(tcp_do_sack)
#define V_tcp_do_tso VNET(tcp_do_tso)
#define V_tcp_ecn_maxretries VNET(tcp_ecn_maxretries)
+#define V_tcp_ecn_generalized VNET(tcp_ecn_generalized)
#define V_tcp_initcwnd_segments VNET(tcp_initcwnd_segments)
#define V_tcp_insecure_rst VNET(tcp_insecure_rst)
#define V_tcp_insecure_syn VNET(tcp_insecure_syn)

File Metadata

Mime Type
text/plain
Expires
Sun, Jan 25, 11:27 PM (13 h, 29 m)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
27979473
Default Alt Text
D23230.id103223.diff (11 KB)

Event Timeline