Page Menu
Home
FreeBSD
Search
Configure Global Search
Log In
Files
F160248422
D36303.id111278.diff
No One
Temporary
Actions
View File
Edit File
Delete File
View Transforms
Subscribe
Mute Notifications
Flag For Later
Award Token
Size
15 KB
Referenced Files
None
Subscribers
None
D36303.id111278.diff
View Options
diff --git a/share/man/man4/tcp.4 b/share/man/man4/tcp.4
--- a/share/man/man4/tcp.4
+++ b/share/man/man4/tcp.4
@@ -34,7 +34,7 @@
.\" From: @(#)tcp.4 8.1 (Berkeley) 6/5/93
.\" $FreeBSD$
.\"
-.Dd August 1, 2022
+.Dd September 30, 2022
.Dt TCP 4
.Os
.Sh NAME
@@ -520,6 +520,9 @@
specific connection.
This is needed to help with connection establishment
when a broken firewall is in the network path.
+.It Va ecn.option
+Reflect back the number of received bytes with a particular ECN marking
+by using the Accurate ECN TCP option on each outgoing packet.
.It Va fast_finwait2_recycle
Recycle
.Tn TCP
diff --git a/sys/netinet/tcp.h b/sys/netinet/tcp.h
--- a/sys/netinet/tcp.h
+++ b/sys/netinet/tcp.h
@@ -104,6 +104,10 @@
#define TCPOLEN_SIGNATURE 18
#define TCPOPT_FAST_OPEN 34
#define TCPOLEN_FAST_OPEN_EMPTY 2
+#define TCPOPT_ACCECN0 0xAC
+#define TCPOPT_ACCECN1 0XAE
+#define TCPOLEN_ACCECN_EMPTY 2
+#define TCPOLEN_ACCECN_COUNTER 3
#define MAX_TCPOPTLEN 40 /* Absolute maximum TCP options len */
diff --git a/sys/netinet/tcp_ecn.h b/sys/netinet/tcp_ecn.h
--- a/sys/netinet/tcp_ecn.h
+++ b/sys/netinet/tcp_ecn.h
@@ -43,7 +43,7 @@
void tcp_ecn_input_syn_sent(struct tcpcb *, uint16_t, int);
void tcp_ecn_input_parallel_syn(struct tcpcb *, uint16_t, int);
-int tcp_ecn_input_segment(struct tcpcb *, uint16_t, int);
+int tcp_ecn_input_segment(struct tcpcb *, uint16_t, int, int);
uint16_t tcp_ecn_output_syn_sent(struct tcpcb *);
int tcp_ecn_output_established(struct tcpcb *, uint16_t *, int, bool);
void tcp_ecn_syncache_socket(struct tcpcb *, struct syncache *);
diff --git a/sys/netinet/tcp_ecn.c b/sys/netinet/tcp_ecn.c
--- a/sys/netinet/tcp_ecn.c
+++ b/sys/netinet/tcp_ecn.c
@@ -102,6 +102,24 @@
#include <netinet/tcpip.h>
#include <netinet/tcp_ecn.h>
+static SYSCTL_NODE(_net_inet_tcp, OID_AUTO, ecn,
+ CTLFLAG_RW | CTLFLAG_MPSAFE, 0,
+ "TCP ECN");
+
+VNET_DEFINE(int, tcp_do_ecn) = 2;
+SYSCTL_INT(_net_inet_tcp_ecn, OID_AUTO, enable,
+ CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(tcp_do_ecn), 0,
+ "TCP ECN support");
+
+VNET_DEFINE(int, tcp_ecn_maxretries) = 1;
+SYSCTL_INT(_net_inet_tcp_ecn, OID_AUTO, maxretries,
+ CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(tcp_ecn_maxretries), 0,
+ "Max retries before giving up on ECN");
+
+VNET_DEFINE(int, tcp_ecn_option) = 0;
+SYSCTL_INT(_net_inet_tcp_ecn, OID_AUTO, option,
+ CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(tcp_ecn_option), 0,
+ "Use AccECN TCP option");
/*
* Process incoming SYN,ACK packet
@@ -109,7 +127,6 @@
void
tcp_ecn_input_syn_sent(struct tcpcb *tp, uint16_t thflags, int iptos)
{
-
if (V_tcp_do_ecn == 0)
return;
if ((V_tcp_do_ecn == 1) ||
@@ -261,19 +278,25 @@
* TCP ECN processing.
*/
int
-tcp_ecn_input_segment(struct tcpcb *tp, uint16_t thflags, int iptos)
+tcp_ecn_input_segment(struct tcpcb *tp, uint16_t thflags, int tlen, int iptos)
{
int delta_ace = 0;
if (tp->t_flags2 & (TF2_ECN_PERMIT | TF2_ACE_PERMIT)) {
switch (iptos & IPTOS_ECN_MASK) {
case IPTOS_ECN_CE:
+ tp->t_flags2 |= TF2_ACO_CE;
+ tp->t_rceb += tlen;
TCPSTAT_INC(tcps_ecn_ce);
break;
case IPTOS_ECN_ECT0:
+ tp->t_flags2 |= TF2_ACO_E0;
+ tp->t_re0b += tlen;
TCPSTAT_INC(tcps_ecn_ect0);
break;
case IPTOS_ECN_ECT1:
+ tp->t_flags2 |= TF2_ACO_E1;
+ tp->t_re1b += tlen;
TCPSTAT_INC(tcps_ecn_ect1);
break;
}
diff --git a/sys/netinet/tcp_input.c b/sys/netinet/tcp_input.c
--- a/sys/netinet/tcp_input.c
+++ b/sys/netinet/tcp_input.c
@@ -204,20 +204,6 @@
&VNET_NAME(tcp_abc_l_var), 2,
"Cap the max cwnd increment during slow-start to this number of segments");
-static SYSCTL_NODE(_net_inet_tcp, OID_AUTO, ecn,
- CTLFLAG_RW | CTLFLAG_MPSAFE, 0,
- "TCP ECN");
-
-VNET_DEFINE(int, tcp_do_ecn) = 2;
-SYSCTL_INT(_net_inet_tcp_ecn, OID_AUTO, enable, CTLFLAG_VNET | CTLFLAG_RW,
- &VNET_NAME(tcp_do_ecn), 0,
- "TCP ECN support");
-
-VNET_DEFINE(int, tcp_ecn_maxretries) = 1;
-SYSCTL_INT(_net_inet_tcp_ecn, OID_AUTO, maxretries, CTLFLAG_VNET | CTLFLAG_RW,
- &VNET_NAME(tcp_ecn_maxretries), 0,
- "Max retries before giving up on ECN");
-
VNET_DEFINE(int, tcp_insecure_syn) = 0;
SYSCTL_INT(_net_inet_tcp, OID_AUTO, insecure_syn, CTLFLAG_VNET | CTLFLAG_RW,
&VNET_NAME(tcp_insecure_syn), 0,
@@ -1630,7 +1616,7 @@
/*
* TCP ECN processing.
*/
- if (tcp_ecn_input_segment(tp, thflags, iptos))
+ if (tcp_ecn_input_segment(tp, thflags, tlen, iptos))
cc_cong_signal(tp, th, CC_ECN);
/*
diff --git a/sys/netinet/tcp_output.c b/sys/netinet/tcp_output.c
--- a/sys/netinet/tcp_output.c
+++ b/sys/netinet/tcp_output.c
@@ -890,9 +890,37 @@
if (tp->t_flags & TF_SIGNATURE)
to.to_flags |= TOF_SIGNATURE;
#endif /* TCP_SIGNATURE */
-
+ /*
+ * AccECN option
+ * Don't send on <SYN>, only on <SYN,ACK> or
+ * when doing an AccECN session
+ */
+ if (V_tcp_ecn_option &&
+ ((V_tcp_do_ecn == 3) || (V_tcp_do_ecn == 4)) &&
+ ((tp->t_flags2 & TF2_ACE_PERMIT) ||
+ ((flags & TH_SYN) && (flags & TH_ACK)))) {
+ to.to_flags |= TOF_ACCECNOPT;
+ to.to_ee0b = tp->t_re0b;
+ to.to_ee1b = tp->t_re1b;
+ to.to_eceb = tp->t_rceb;
+ to.to_flags |= ((tp->t_flags2 & TF2_ACO_E0) ? TOF_ACCE_E0 : 0) |
+ ((tp->t_flags2 & TF2_ACO_E1) ? TOF_ACCE_E1 : 0) |
+ ((tp->t_flags2 & TF2_ACO_CE) ? TOF_ACCE_CE : 0);
+ if (flags & TH_SYN)
+ to.to_flags |= TOF_ACCE_SYN;
+ if (tp->t_flags & TF_ACKNOW)
+ to.to_flags |= TOF_ACCE_ACKNOW;
+ }
/* Processing the options. */
hdrlen += optlen = tcp_addoptions(&to, opt);
+ if (to.to_flags & TOF_ACCECNOPT) {
+ if ((to.to_flags & TOF_ACCE_E0) == 0)
+ tp->t_flags2 &= ~TF2_ACO_E0;
+ if ((to.to_flags & TOF_ACCE_E1) == 0)
+ tp->t_flags2 &= ~TF2_ACO_E1;
+ if ((to.to_flags & TOF_ACCE_CE) == 0)
+ tp->t_flags2 &= ~TF2_ACO_CE;
+ }
/*
* If we wanted a TFO option to be added, but it was unable
* to fit, ensure no data is sent.
@@ -1955,6 +1983,128 @@
optlen += total_len;
break;
}
+ case TOF_ACCECNOPT:
+ {
+ int max_len = TCP_MAXOLEN - optlen;
+ if (max_len < TCPOLEN_ACCECN_EMPTY) {
+ to->to_flags &= ~TOF_ACCECNOPT;
+ continue;
+ }
+ if (max_len < (TCPOLEN_ACCECN_EMPTY +
+ 1 * TCPOLEN_ACCECN_COUNTER)) {
+ if (to->to_flags & TOF_ACCE_SYN) {
+ *optp++ = TCPOPT_ACCECN0;
+ optlen += TCPOLEN_ACCECN_EMPTY;
+ *optp++ = TCPOLEN_ACCECN_EMPTY;
+ continue;
+ } else {
+ to->to_flags &= ~TOF_ACCECNOPT;
+ continue;
+ }
+ }
+ if (max_len < (TCPOLEN_ACCECN_EMPTY +
+ 2 * TCPOLEN_ACCECN_COUNTER)) {
+ if (to->to_flags & TOF_ACCE_E1) {
+ *optp++ = TCPOPT_ACCECN1;
+ *optp++ = TCPOLEN_ACCECN_EMPTY +
+ TCPOLEN_ACCECN_COUNTER;
+ optlen += TCPOLEN_ACCECN_EMPTY +
+ TCPOLEN_ACCECN_COUNTER;
+ *optp++ = (char)(to->to_ee1b >> 16);
+ *optp++ = (char)(to->to_ee1b >> 8);
+ *optp++ = (char)(to->to_ee1b);
+ to->to_flags &= ~TOF_ACCE_E1;
+ continue;
+ }
+ *optp++ = TCPOPT_ACCECN0;
+ *optp++ = TCPOLEN_ACCECN_EMPTY +
+ TCPOLEN_ACCECN_COUNTER;
+ optlen += TCPOLEN_ACCECN_EMPTY +
+ TCPOLEN_ACCECN_COUNTER;
+ *optp++ = (char)(to->to_ee0b >> 16);
+ *optp++ = (char)(to->to_ee0b >> 8);
+ *optp++ = (char)(to->to_ee0b);
+ to->to_flags &= ~TOF_ACCE_E0;
+ continue;
+ }
+ if (max_len < (TCPOLEN_ACCECN_EMPTY +
+ 3 * TCPOLEN_ACCECN_COUNTER)) {
+ if (to->to_flags & TOF_ACCE_E1) {
+ *optp++ = TCPOPT_ACCECN1;
+ *optp++ = TCPOLEN_ACCECN_EMPTY +
+ 2 * TCPOLEN_ACCECN_COUNTER;
+ optlen += TCPOLEN_ACCECN_EMPTY +
+ 2 * TCPOLEN_ACCECN_COUNTER;
+ *optp++ = (char)(to->to_ee1b >> 16);
+ *optp++ = (char)(to->to_ee1b >> 8);
+ *optp++ = (char)(to->to_ee1b);
+ to->to_flags &= ~TOF_ACCE_E1;
+ *optp++ = (char)(to->to_eceb >> 16);
+ *optp++ = (char)(to->to_eceb >> 8);
+ *optp++ = (char)(to->to_eceb);
+ to->to_flags &= ~TOF_ACCE_CE;
+ continue;
+ }
+ *optp++ = TCPOPT_ACCECN0;
+ *optp++ = TCPOLEN_ACCECN_EMPTY +
+ 2 * TCPOLEN_ACCECN_COUNTER;
+ optlen += TCPOLEN_ACCECN_EMPTY +
+ 2 * TCPOLEN_ACCECN_COUNTER;
+ *optp++ = (char)(to->to_ee0b >> 16);
+ *optp++ = (char)(to->to_ee0b >> 8);
+ *optp++ = (char)(to->to_ee0b);
+ to->to_flags &= ~TOF_ACCE_E0;
+ *optp++ = (char)(to->to_eceb >> 16);
+ *optp++ = (char)(to->to_eceb >> 8);
+ *optp++ = (char)(to->to_eceb);
+ to->to_flags &= ~TOF_ACCE_CE;
+ continue;
+ }
+ /*
+ * TCP option sufficient to hold full AccECN option
+ * but only send changed counters normally,
+ * full counters on ACKNOW
+ */
+ if (to->to_flags & TOF_ACCE_E1) {
+ *optp++ = TCPOPT_ACCECN1;
+ *optp++ = TCPOLEN_ACCECN_EMPTY +
+ 3 * TCPOLEN_ACCECN_COUNTER;
+ optlen += TCPOLEN_ACCECN_EMPTY +
+ 3 * TCPOLEN_ACCECN_COUNTER;
+ *optp++ = (char)(to->to_ee1b >> 16);
+ *optp++ = (char)(to->to_ee1b >> 8);
+ *optp++ = (char)(to->to_ee1b);
+ to->to_flags &= ~TOF_ACCE_E1;
+ *optp++ = (char)(to->to_eceb >> 16);
+ *optp++ = (char)(to->to_eceb >> 8);
+ *optp++ = (char)(to->to_eceb);
+ to->to_flags &= ~TOF_ACCE_CE;
+ *optp++ = (char)(to->to_ee0b >> 16);
+ *optp++ = (char)(to->to_ee0b >> 8);
+ *optp++ = (char)(to->to_ee0b);
+ to->to_flags &= ~TOF_ACCE_E0;
+ continue;
+ } else {
+ *optp++ = TCPOPT_ACCECN0;
+ *optp++ = TCPOLEN_ACCECN_EMPTY +
+ 3 * TCPOLEN_ACCECN_COUNTER;
+ optlen += TCPOLEN_ACCECN_EMPTY +
+ 3 * TCPOLEN_ACCECN_COUNTER;
+ *optp++ = (char)(to->to_ee0b >> 16);
+ *optp++ = (char)(to->to_ee0b >> 8);
+ *optp++ = (char)(to->to_ee0b);
+ to->to_flags &= ~TOF_ACCE_E0;
+ *optp++ = (char)(to->to_eceb >> 16);
+ *optp++ = (char)(to->to_eceb >> 8);
+ *optp++ = (char)(to->to_eceb);
+ to->to_flags &= ~TOF_ACCE_CE;
+ *optp++ = (char)(to->to_ee1b >> 16);
+ *optp++ = (char)(to->to_ee1b >> 8);
+ *optp++ = (char)(to->to_ee1b);
+ to->to_flags &= ~TOF_ACCE_E1;
+ continue;
+ }
+ }
default:
panic("%s: unknown TCP option type", __func__);
break;
diff --git a/sys/netinet/tcp_stacks/rack.c b/sys/netinet/tcp_stacks/rack.c
--- a/sys/netinet/tcp_stacks/rack.c
+++ b/sys/netinet/tcp_stacks/rack.c
@@ -13528,8 +13528,8 @@
rack_cc_after_idle(rack, tp);
}
tp->t_rcvtime = ticks;
- /* Now what about ECN? */
- if (tcp_ecn_input_segment(tp, ae->flags, ae->codepoint))
+ /* Now what about ECN of a chain of pure ACKs? */
+ if (tcp_ecn_input_segment(tp, ae->flags, 0, ae->codepoint))
rack_cong_signal(tp, CC_ECN, ae->ack, __LINE__);
#ifdef TCP_ACCOUNTING
/* Count for the specific type of ack in */
@@ -14319,7 +14319,7 @@
* TCP ECN processing. XXXJTL: If we ever use ECN, we need to move
* this to occur after we've validated the segment.
*/
- if (tcp_ecn_input_segment(tp, thflags, iptos))
+ if (tcp_ecn_input_segment(tp, thflags, tlen, iptos))
rack_cong_signal(tp, CC_ECN, th->th_ack, __LINE__);
/*
diff --git a/sys/netinet/tcp_subr.c b/sys/netinet/tcp_subr.c
--- a/sys/netinet/tcp_subr.c
+++ b/sys/netinet/tcp_subr.c
@@ -2316,6 +2316,11 @@
tcp_log_tcpcbinit(tp);
#endif
tp->t_pacing_rate = -1;
+ if (V_tcp_do_lrd)
+ tp->t_flags |= TF_LRD;
+ tp->t_re0b = 1;
+ tp->t_re1b = 1;
+ tp->t_rceb = 0;
if (tp->t_fb->tfb_tcp_fb_init) {
if ((*tp->t_fb->tfb_tcp_fb_init)(tp)) {
refcount_release(&tp->t_fb->tfb_refcnt);
@@ -2328,8 +2333,6 @@
if (V_tcp_perconn_stats_enable == 1)
tp->t_stats = stats_blob_alloc(V_tcp_perconn_stats_dflt_tpl, 0);
#endif
- if (V_tcp_do_lrd)
- tp->t_flags |= TF_LRD;
return (tp); /* XXX */
}
diff --git a/sys/netinet/tcp_syncache.c b/sys/netinet/tcp_syncache.c
--- a/sys/netinet/tcp_syncache.c
+++ b/sys/netinet/tcp_syncache.c
@@ -1958,6 +1958,19 @@
/* don't send cookie again when retransmitting response */
sc->sc_tfo_cookie = NULL;
}
+ if (V_tcp_ecn_option)
+ to.to_flags |= TOF_ACCE_SYN;
+ }
+ if (V_tcp_ecn_option &&
+ (sc->sc_flags & SCF_ECN_MASK) &&
+ ((sc->sc_flags & SCF_ECN_MASK) != SCF_ECN)) {
+ to.to_flags |= TOF_ACCECNOPT;
+ to.to_flags |= TOF_ACCE_E0 |
+ TOF_ACCE_E1 |
+ TOF_ACCE_CE;
+ to.to_ee0b = 1;
+ to.to_ee1b = 1;
+ to.to_eceb = 0;
}
if (sc->sc_flags & SCF_TIMESTAMP) {
to.to_tsval = sc->sc_tsoff + tcp_ts_getticks();
diff --git a/sys/netinet/tcp_var.h b/sys/netinet/tcp_var.h
--- a/sys/netinet/tcp_var.h
+++ b/sys/netinet/tcp_var.h
@@ -251,6 +251,9 @@
int t_loglimit; /* Maximum number of log entries */
uint32_t t_rcep; /* Number of received CE marked pkts */
uint32_t t_scep; /* Synced number of delivered CE pkts */
+ uint32_t t_re0b; /* Number of ECT0 marked data bytes */
+ uint32_t t_re1b; /* Number of ECT1 marked data bytes */
+ uint32_t t_rceb; /* Number of CE marked data bytes */
int64_t t_pacing_rate; /* bytes / sec, -1 => unlimited */
struct tcp_log_stailq t_logs; /* Log buffer */
struct tcp_log_id_node *t_lin;
@@ -570,7 +573,10 @@
#define TF2_ECN_SND_CWR 0x00000040 /* ECN CWR in queue */
#define TF2_ECN_SND_ECE 0x00000080 /* ECN ECE in queue */
#define TF2_ACE_PERMIT 0x00000100 /* Accurate ECN mode */
-#define TF2_FBYTES_COMPLETE 0x00000400 /* We have first bytes in and out */
+#define TF2_ACO_E0 0x00000200 /* EE0 counter changed */
+#define TF2_ACO_E1 0x00000400 /* EE1 counter changed */
+#define TF2_ACO_CE 0x00000800 /* ECE counter changed */
+#define TF2_FBYTES_COMPLETE 0x00001000 /* We have first bytes in and out */
/*
* Structure to hold TCP options that are only used during segment
* processing (in tcp_input), but not held in the tcpcb.
@@ -581,14 +587,21 @@
*/
struct tcpopt {
u_int32_t to_flags; /* which options are present */
-#define TOF_MSS 0x0001 /* maximum segment size */
-#define TOF_SCALE 0x0002 /* window scaling */
-#define TOF_SACKPERM 0x0004 /* SACK permitted */
-#define TOF_TS 0x0010 /* timestamp */
-#define TOF_SIGNATURE 0x0040 /* TCP-MD5 signature option (RFC2385) */
-#define TOF_SACK 0x0080 /* Peer sent SACK option */
-#define TOF_FASTOPEN 0x0100 /* TCP Fast Open (TFO) cookie */
-#define TOF_MAXOPT 0x0200
+#define TOF_MSS 0x00000001 /* maximum segment size */
+#define TOF_SCALE 0x00000002 /* window scaling */
+#define TOF_SACKPERM 0x00000004 /* SACK permitted */
+#define TOF_TS 0x00000010 /* timestamp */
+#define TOF_SIGNATURE 0x00000040 /* TCP-MD5 signature option (RFC2385) */
+#define TOF_SACK 0x00000080 /* Peer sent SACK option */
+#define TOF_FASTOPEN 0x00000100 /* TCP Fast Open (TFO) cookie */
+#define TOF_ACCECNOPT 0x00000200 /* AccECN Option */
+#define TOF_MAXOPT 0x00000400
+ /* Keep internal flags above TOF_MAXOPT */
+#define TOF_ACCE_SYN 0x80000000 /* send empty option */
+#define TOF_ACCE_CE 0x40000000 /* CE counter changed */
+#define TOF_ACCE_E0 0x20000000 /* E0 counter changed */
+#define TOF_ACCE_E1 0x10000000 /* E1 counter changed */
+#define TOF_ACCE_ACKNOW 0x08000000 /* send full option */
u_int32_t to_tsval; /* new timestamp */
u_int32_t to_tsecr; /* reflected timestamp */
u_char *to_sacks; /* pointer to the first SACK blocks */
@@ -598,7 +611,10 @@
u_int8_t to_wscale; /* window scaling */
u_int8_t to_nsacks; /* number of SACK blocks */
u_int8_t to_tfo_len; /* TFO cookie length */
- u_int32_t to_spare; /* UTO */
+ u_int32_t to_ee0b; /* AccECN E0 marked bytes */
+ u_int32_t to_ee1b; /* AccECN E1 marked bytes */
+ u_int32_t to_eceb; /* AccECN CE marked bytes */
+ u_int32_t to_spare; /* UTO */
};
/*
@@ -1006,6 +1022,7 @@
VNET_DECLARE(int, tcp_do_sack);
VNET_DECLARE(int, tcp_do_tso);
VNET_DECLARE(int, tcp_ecn_maxretries);
+VNET_DECLARE(int, tcp_ecn_option);
VNET_DECLARE(int, tcp_initcwnd_segments);
VNET_DECLARE(int, tcp_insecure_rst);
VNET_DECLARE(int, tcp_insecure_syn);
@@ -1052,6 +1069,7 @@
#define V_tcp_do_sack VNET(tcp_do_sack)
#define V_tcp_do_tso VNET(tcp_do_tso)
#define V_tcp_ecn_maxretries VNET(tcp_ecn_maxretries)
+#define V_tcp_ecn_option VNET(tcp_ecn_option)
#define V_tcp_initcwnd_segments VNET(tcp_initcwnd_segments)
#define V_tcp_insecure_rst VNET(tcp_insecure_rst)
#define V_tcp_insecure_syn VNET(tcp_insecure_syn)
File Metadata
Details
Attached
Mime Type
text/plain
Expires
Tue, Jun 23, 1:31 PM (4 h, 41 m)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
34248387
Default Alt Text
D36303.id111278.diff (15 KB)
Attached To
Mode
D36303: Implement AccECN option (w/ early assigned option number)
Attached
Detach File
Event Timeline
Log In to Comment