Page MenuHomeFreeBSD

D36303.id111807.diff
No OneTemporary

D36303.id111807.diff

diff --git a/share/man/man4/tcp.4 b/share/man/man4/tcp.4
--- a/share/man/man4/tcp.4
+++ b/share/man/man4/tcp.4
@@ -34,7 +34,7 @@
.\" From: @(#)tcp.4 8.1 (Berkeley) 6/5/93
.\" $FreeBSD$
.\"
-.Dd October 7, 2022
+.Dd October 11, 2022
.Dt TCP 4
.Os
.Sh NAME
@@ -520,6 +520,9 @@
specific connection.
This is needed to help with connection establishment
when a broken firewall is in the network path.
+.It Va ecn.option
+Reflect back the number of received bytes with a particular ECN marking
+by using the Accurate ECN TCP option on each outgoing packet.
.It Va fast_finwait2_recycle
Recycle
.Tn TCP
diff --git a/sys/netinet/tcp.h b/sys/netinet/tcp.h
--- a/sys/netinet/tcp.h
+++ b/sys/netinet/tcp.h
@@ -104,6 +104,10 @@
#define TCPOLEN_SIGNATURE 18
#define TCPOPT_FAST_OPEN 34
#define TCPOLEN_FAST_OPEN_EMPTY 2
+#define TCPOPT_ACCECN0 0xAC
+#define TCPOPT_ACCECN1 0XAE
+#define TCPOLEN_ACCECN_EMPTY 2
+#define TCPOLEN_ACCECN_COUNTER 3
#define MAX_TCPOPTLEN 40 /* Absolute maximum TCP options len */
diff --git a/sys/netinet/tcp_ecn.h b/sys/netinet/tcp_ecn.h
--- a/sys/netinet/tcp_ecn.h
+++ b/sys/netinet/tcp_ecn.h
@@ -43,7 +43,7 @@
void tcp_ecn_input_syn_sent(struct tcpcb *, uint16_t, int);
void tcp_ecn_input_parallel_syn(struct tcpcb *, uint16_t, int);
-int tcp_ecn_input_segment(struct tcpcb *, uint16_t, int);
+int tcp_ecn_input_segment(struct tcpcb *, uint16_t, int, int);
uint16_t tcp_ecn_output_syn_sent(struct tcpcb *);
int tcp_ecn_output_established(struct tcpcb *, uint16_t *, int, bool);
void tcp_ecn_syncache_socket(struct tcpcb *, struct syncache *);
diff --git a/sys/netinet/tcp_ecn.c b/sys/netinet/tcp_ecn.c
--- a/sys/netinet/tcp_ecn.c
+++ b/sys/netinet/tcp_ecn.c
@@ -101,6 +101,24 @@
#include <netinet/tcpip.h>
#include <netinet/tcp_ecn.h>
+static SYSCTL_NODE(_net_inet_tcp, OID_AUTO, ecn,
+ CTLFLAG_RW | CTLFLAG_MPSAFE, 0,
+ "TCP ECN");
+
+VNET_DEFINE(int, tcp_do_ecn) = 2;
+SYSCTL_INT(_net_inet_tcp_ecn, OID_AUTO, enable,
+ CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(tcp_do_ecn), 0,
+ "TCP ECN support");
+
+VNET_DEFINE(int, tcp_ecn_maxretries) = 1;
+SYSCTL_INT(_net_inet_tcp_ecn, OID_AUTO, maxretries,
+ CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(tcp_ecn_maxretries), 0,
+ "Max retries before giving up on ECN");
+
+VNET_DEFINE(int, tcp_ecn_option) = 0;
+SYSCTL_INT(_net_inet_tcp_ecn, OID_AUTO, option,
+ CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(tcp_ecn_option), 0,
+ "Use AccECN TCP option");
/*
* Process incoming SYN,ACK packet
@@ -108,7 +126,7 @@
void
tcp_ecn_input_syn_sent(struct tcpcb *tp, uint16_t thflags, int iptos)
{
-
+log(2, "%s:%d\t th:%03x ecn:%02x\n", __func__, __LINE__, thflags, iptos);
if (V_tcp_do_ecn == 0)
return;
if ((V_tcp_do_ecn == 1) ||
@@ -209,6 +227,7 @@
void
tcp_ecn_input_parallel_syn(struct tcpcb *tp, uint16_t thflags, int iptos)
{
+log(2, "%s:%d\t th:%03x ecn:%02x\n", __func__, __LINE__, thflags, iptos);
if (thflags & TH_ACK)
return;
if (V_tcp_do_ecn == 0)
@@ -271,19 +290,26 @@
* TCP ECN processing.
*/
int
-tcp_ecn_input_segment(struct tcpcb *tp, uint16_t thflags, int iptos)
+tcp_ecn_input_segment(struct tcpcb *tp, uint16_t thflags, int tlen, int iptos)
{
+log(2, "%s:%d\t th:%03x ecn:%02x\n", __func__, __LINE__, thflags, iptos);
int delta_ace = 0;
if (tp->t_flags2 & (TF2_ECN_PERMIT | TF2_ACE_PERMIT)) {
switch (iptos & IPTOS_ECN_MASK) {
case IPTOS_ECN_CE:
+ tp->t_flags2 |= TF2_ACO_CE;
+ tp->t_rceb += tlen;
TCPSTAT_INC(tcps_ecn_ce);
break;
case IPTOS_ECN_ECT0:
+ tp->t_flags2 |= TF2_ACO_E0;
+ tp->t_re0b += tlen;
TCPSTAT_INC(tcps_ecn_ect0);
break;
case IPTOS_ECN_ECT1:
+ tp->t_flags2 |= TF2_ACO_E1;
+ tp->t_re1b += tlen;
TCPSTAT_INC(tcps_ecn_ect1);
break;
}
@@ -348,6 +374,7 @@
uint16_t
tcp_ecn_output_syn_sent(struct tcpcb *tp)
{
+log(2, "%s:%d\t\n", __func__, __LINE__);
uint16_t thflags = 0;
if (V_tcp_do_ecn == 0)
@@ -379,6 +406,8 @@
int
tcp_ecn_output_established(struct tcpcb *tp, uint16_t *thflags, int len, bool rxmit)
{
+log(2, "%s:%d\t th:%03x\n", __func__, __LINE__, *thflags);
+
int ipecn = IPTOS_ECN_NOTECT;
bool newdata;
@@ -400,6 +429,11 @@
* Reply with proper ECN notifications.
*/
if (tp->t_flags2 & TF2_ACE_PERMIT) {
+ log(2, "%s:%d\t ace:%d ecn:%d rcep:%d\n",
+ __func__, __LINE__,
+ (tp->t_flags2 & TF2_ACE_PERMIT) ? 1 : 0,
+ (tp->t_flags2 & TF2_ECN_PERMIT) ? 1:0,
+ tp->t_rcep);
*thflags &= ~(TH_AE|TH_CWR|TH_ECE);
if (tp->t_rcep & 0x01)
*thflags |= TH_ECE;
@@ -439,6 +473,8 @@
void
tcp_ecn_syncache_socket(struct tcpcb *tp, struct syncache *sc)
{
+log(2, "%s:%d\t tf2:%03x sc:%02x\n", __func__, __LINE__, tp->t_flags2, sc->sc_flags);
+
if (sc->sc_flags & SCF_ECN_MASK) {
switch (sc->sc_flags & SCF_ECN_MASK) {
case SCF_ECN:
@@ -472,6 +508,8 @@
int
tcp_ecn_syncache_add(uint16_t thflags, int iptos)
{
+log(2, "%s:%d\t th:%03x ecn:%02x\n", __func__, __LINE__, thflags, iptos);
+
int scflags = 0;
switch (thflags & (TH_AE|TH_CWR|TH_ECE)) {
@@ -534,6 +572,7 @@
uint16_t
tcp_ecn_syncache_respond(uint16_t thflags, struct syncache *sc)
{
+log(2, "%s:%d\t th:%03x sc:%02x\n", __func__, __LINE__, thflags, sc->sc_flags);
if ((thflags & TH_SYN) &&
(sc->sc_flags & SCF_ECN_MASK)) {
switch (sc->sc_flags & SCF_ECN_MASK) {
diff --git a/sys/netinet/tcp_input.c b/sys/netinet/tcp_input.c
--- a/sys/netinet/tcp_input.c
+++ b/sys/netinet/tcp_input.c
@@ -204,20 +204,6 @@
&VNET_NAME(tcp_abc_l_var), 2,
"Cap the max cwnd increment during slow-start to this number of segments");
-static SYSCTL_NODE(_net_inet_tcp, OID_AUTO, ecn,
- CTLFLAG_RW | CTLFLAG_MPSAFE, 0,
- "TCP ECN");
-
-VNET_DEFINE(int, tcp_do_ecn) = 2;
-SYSCTL_INT(_net_inet_tcp_ecn, OID_AUTO, enable, CTLFLAG_VNET | CTLFLAG_RW,
- &VNET_NAME(tcp_do_ecn), 0,
- "TCP ECN support");
-
-VNET_DEFINE(int, tcp_ecn_maxretries) = 1;
-SYSCTL_INT(_net_inet_tcp_ecn, OID_AUTO, maxretries, CTLFLAG_VNET | CTLFLAG_RW,
- &VNET_NAME(tcp_ecn_maxretries), 0,
- "Max retries before giving up on ECN");
-
VNET_DEFINE(int, tcp_insecure_syn) = 0;
SYSCTL_INT(_net_inet_tcp, OID_AUTO, insecure_syn, CTLFLAG_VNET | CTLFLAG_RW,
&VNET_NAME(tcp_insecure_syn), 0,
@@ -1634,7 +1620,7 @@
/*
* TCP ECN processing.
*/
- if (tcp_ecn_input_segment(tp, thflags, iptos))
+ if (tcp_ecn_input_segment(tp, thflags, tlen, iptos))
cc_cong_signal(tp, th, CC_ECN);
/*
@@ -2235,6 +2221,7 @@
*/
if ((thflags & TH_SYN) && tp->t_state != TCPS_SYN_SENT &&
tp->t_state != TCPS_SYN_RECEIVED) {
+ log(2, "%s:%d\n", __func__, __LINE__);
TCPSTAT_INC(tcps_badsyn);
if (V_tcp_insecure_syn &&
SEQ_GEQ(th->th_seq, tp->last_ack_sent) &&
diff --git a/sys/netinet/tcp_output.c b/sys/netinet/tcp_output.c
--- a/sys/netinet/tcp_output.c
+++ b/sys/netinet/tcp_output.c
@@ -888,9 +888,37 @@
if (tp->t_flags & TF_SIGNATURE)
to.to_flags |= TOF_SIGNATURE;
#endif /* TCP_SIGNATURE */
-
+ /*
+ * AccECN option
+ * Don't send on <SYN>, only on <SYN,ACK> or
+ * when doing an AccECN session
+ */
+ if (V_tcp_ecn_option &&
+ ((V_tcp_do_ecn == 3) || (V_tcp_do_ecn == 4)) &&
+ ((tp->t_flags2 & TF2_ACE_PERMIT) ||
+ ((flags & TH_SYN) && (flags & TH_ACK)))) {
+ to.to_flags |= TOF_ACCECNOPT;
+ to.to_ee0b = tp->t_re0b;
+ to.to_ee1b = tp->t_re1b;
+ to.to_eceb = tp->t_rceb;
+ to.to_flags |= ((tp->t_flags2 & TF2_ACO_E0) ? TOF_ACCE_E0 : 0) |
+ ((tp->t_flags2 & TF2_ACO_E1) ? TOF_ACCE_E1 : 0) |
+ ((tp->t_flags2 & TF2_ACO_CE) ? TOF_ACCE_CE : 0);
+ if (flags & TH_SYN)
+ to.to_flags |= TOF_ACCE_SYN;
+ if (tp->t_flags & TF_ACKNOW)
+ to.to_flags |= TOF_ACCE_ACKNOW;
+ }
/* Processing the options. */
hdrlen += optlen = tcp_addoptions(&to, opt);
+ if (to.to_flags & TOF_ACCECNOPT) {
+ if ((to.to_flags & TOF_ACCE_E0) == 0)
+ tp->t_flags2 &= ~TF2_ACO_E0;
+ if ((to.to_flags & TOF_ACCE_E1) == 0)
+ tp->t_flags2 &= ~TF2_ACO_E1;
+ if ((to.to_flags & TOF_ACCE_CE) == 0)
+ tp->t_flags2 &= ~TF2_ACO_CE;
+ }
/*
* If we wanted a TFO option to be added, but it was unable
* to fit, ensure no data is sent.
@@ -1776,6 +1804,8 @@
int tt;
int maxunacktime;
+ log(2, "%s:%d\n", __func__, __LINE__);
+
tp->t_flags &= ~TF_PREVVALID;
if (tcp_timer_active(tp, TT_REXMT))
panic("tcp_setpersist: retransmit pending");
@@ -1953,6 +1983,128 @@
optlen += total_len;
break;
}
+ case TOF_ACCECNOPT:
+ {
+ int max_len = TCP_MAXOLEN - optlen;
+ if (max_len < TCPOLEN_ACCECN_EMPTY) {
+ to->to_flags &= ~TOF_ACCECNOPT;
+ continue;
+ }
+ if (max_len < (TCPOLEN_ACCECN_EMPTY +
+ 1 * TCPOLEN_ACCECN_COUNTER)) {
+ if (to->to_flags & TOF_ACCE_SYN) {
+ *optp++ = TCPOPT_ACCECN0;
+ optlen += TCPOLEN_ACCECN_EMPTY;
+ *optp++ = TCPOLEN_ACCECN_EMPTY;
+ continue;
+ } else {
+ to->to_flags &= ~TOF_ACCECNOPT;
+ continue;
+ }
+ }
+ if (max_len < (TCPOLEN_ACCECN_EMPTY +
+ 2 * TCPOLEN_ACCECN_COUNTER)) {
+ if (to->to_flags & TOF_ACCE_E1) {
+ *optp++ = TCPOPT_ACCECN1;
+ *optp++ = TCPOLEN_ACCECN_EMPTY +
+ TCPOLEN_ACCECN_COUNTER;
+ optlen += TCPOLEN_ACCECN_EMPTY +
+ TCPOLEN_ACCECN_COUNTER;
+ *optp++ = (char)(to->to_ee1b >> 16);
+ *optp++ = (char)(to->to_ee1b >> 8);
+ *optp++ = (char)(to->to_ee1b);
+ to->to_flags &= ~TOF_ACCE_E1;
+ continue;
+ }
+ *optp++ = TCPOPT_ACCECN0;
+ *optp++ = TCPOLEN_ACCECN_EMPTY +
+ TCPOLEN_ACCECN_COUNTER;
+ optlen += TCPOLEN_ACCECN_EMPTY +
+ TCPOLEN_ACCECN_COUNTER;
+ *optp++ = (char)(to->to_ee0b >> 16);
+ *optp++ = (char)(to->to_ee0b >> 8);
+ *optp++ = (char)(to->to_ee0b);
+ to->to_flags &= ~TOF_ACCE_E0;
+ continue;
+ }
+ if (max_len < (TCPOLEN_ACCECN_EMPTY +
+ 3 * TCPOLEN_ACCECN_COUNTER)) {
+ if (to->to_flags & TOF_ACCE_E1) {
+ *optp++ = TCPOPT_ACCECN1;
+ *optp++ = TCPOLEN_ACCECN_EMPTY +
+ 2 * TCPOLEN_ACCECN_COUNTER;
+ optlen += TCPOLEN_ACCECN_EMPTY +
+ 2 * TCPOLEN_ACCECN_COUNTER;
+ *optp++ = (char)(to->to_ee1b >> 16);
+ *optp++ = (char)(to->to_ee1b >> 8);
+ *optp++ = (char)(to->to_ee1b);
+ to->to_flags &= ~TOF_ACCE_E1;
+ *optp++ = (char)(to->to_eceb >> 16);
+ *optp++ = (char)(to->to_eceb >> 8);
+ *optp++ = (char)(to->to_eceb);
+ to->to_flags &= ~TOF_ACCE_CE;
+ continue;
+ }
+ *optp++ = TCPOPT_ACCECN0;
+ *optp++ = TCPOLEN_ACCECN_EMPTY +
+ 2 * TCPOLEN_ACCECN_COUNTER;
+ optlen += TCPOLEN_ACCECN_EMPTY +
+ 2 * TCPOLEN_ACCECN_COUNTER;
+ *optp++ = (char)(to->to_ee0b >> 16);
+ *optp++ = (char)(to->to_ee0b >> 8);
+ *optp++ = (char)(to->to_ee0b);
+ to->to_flags &= ~TOF_ACCE_E0;
+ *optp++ = (char)(to->to_eceb >> 16);
+ *optp++ = (char)(to->to_eceb >> 8);
+ *optp++ = (char)(to->to_eceb);
+ to->to_flags &= ~TOF_ACCE_CE;
+ continue;
+ }
+ /*
+ * TCP option sufficient to hold full AccECN option
+ * but only send changed counters normally,
+ * full counters on ACKNOW
+ */
+ if (to->to_flags & TOF_ACCE_E1) {
+ *optp++ = TCPOPT_ACCECN1;
+ *optp++ = TCPOLEN_ACCECN_EMPTY +
+ 3 * TCPOLEN_ACCECN_COUNTER;
+ optlen += TCPOLEN_ACCECN_EMPTY +
+ 3 * TCPOLEN_ACCECN_COUNTER;
+ *optp++ = (char)(to->to_ee1b >> 16);
+ *optp++ = (char)(to->to_ee1b >> 8);
+ *optp++ = (char)(to->to_ee1b);
+ to->to_flags &= ~TOF_ACCE_E1;
+ *optp++ = (char)(to->to_eceb >> 16);
+ *optp++ = (char)(to->to_eceb >> 8);
+ *optp++ = (char)(to->to_eceb);
+ to->to_flags &= ~TOF_ACCE_CE;
+ *optp++ = (char)(to->to_ee0b >> 16);
+ *optp++ = (char)(to->to_ee0b >> 8);
+ *optp++ = (char)(to->to_ee0b);
+ to->to_flags &= ~TOF_ACCE_E0;
+ continue;
+ } else {
+ *optp++ = TCPOPT_ACCECN0;
+ *optp++ = TCPOLEN_ACCECN_EMPTY +
+ 3 * TCPOLEN_ACCECN_COUNTER;
+ optlen += TCPOLEN_ACCECN_EMPTY +
+ 3 * TCPOLEN_ACCECN_COUNTER;
+ *optp++ = (char)(to->to_ee0b >> 16);
+ *optp++ = (char)(to->to_ee0b >> 8);
+ *optp++ = (char)(to->to_ee0b);
+ to->to_flags &= ~TOF_ACCE_E0;
+ *optp++ = (char)(to->to_eceb >> 16);
+ *optp++ = (char)(to->to_eceb >> 8);
+ *optp++ = (char)(to->to_eceb);
+ to->to_flags &= ~TOF_ACCE_CE;
+ *optp++ = (char)(to->to_ee1b >> 16);
+ *optp++ = (char)(to->to_ee1b >> 8);
+ *optp++ = (char)(to->to_ee1b);
+ to->to_flags &= ~TOF_ACCE_E1;
+ continue;
+ }
+ }
default:
panic("%s: unknown TCP option type", __func__);
break;
diff --git a/sys/netinet/tcp_stacks/rack.c b/sys/netinet/tcp_stacks/rack.c
--- a/sys/netinet/tcp_stacks/rack.c
+++ b/sys/netinet/tcp_stacks/rack.c
@@ -13528,8 +13528,8 @@
rack_cc_after_idle(rack, tp);
}
tp->t_rcvtime = ticks;
- /* Now what about ECN? */
- if (tcp_ecn_input_segment(tp, ae->flags, ae->codepoint))
+ /* Now what about ECN of a chain of pure ACKs? */
+ if (tcp_ecn_input_segment(tp, ae->flags, 0, ae->codepoint))
rack_cong_signal(tp, CC_ECN, ae->ack, __LINE__);
#ifdef TCP_ACCOUNTING
/* Count for the specific type of ack in */
@@ -14318,7 +14318,7 @@
* TCP ECN processing. XXXJTL: If we ever use ECN, we need to move
* this to occur after we've validated the segment.
*/
- if (tcp_ecn_input_segment(tp, thflags, iptos))
+ if (tcp_ecn_input_segment(tp, thflags, tlen, iptos))
rack_cong_signal(tp, CC_ECN, th->th_ack, __LINE__);
/*
diff --git a/sys/netinet/tcp_subr.c b/sys/netinet/tcp_subr.c
--- a/sys/netinet/tcp_subr.c
+++ b/sys/netinet/tcp_subr.c
@@ -1801,6 +1801,7 @@
#ifdef INVARIANTS
int thflags = tcp_get_flags(th);
#endif
+ log(2, "%s:%d\n", __func__, __LINE__);
KASSERT(tp != NULL || m != NULL, ("tcp_respond: tp and m both NULL"));
NET_EPOCH_ASSERT();
@@ -2008,9 +2009,28 @@
if (tp->t_flags & TF_SIGNATURE)
to.to_flags |= TOF_SIGNATURE;
#endif
+ /* AccECN option */
+ if (V_tcp_ecn_option &&
+ ((V_tcp_do_ecn == 3) || (V_tcp_do_ecn == 4)) &&
+ (tp->t_flags2 & TF2_ACE_PERMIT)) {
+ to.to_flags |= TOF_ACCECNOPT;
+ to.to_ee0b = tp->t_re0b;
+ to.to_ee1b = tp->t_re1b;
+ to.to_eceb = tp->t_rceb;
+ to.to_flags |= ((tp->t_flags2 & TF2_ACO_E0) ? TOF_ACCE_E0 : 0) |
+ ((tp->t_flags2 & TF2_ACO_E1) ? TOF_ACCE_E1 : 0) |
+ ((tp->t_flags2 & TF2_ACO_CE) ? TOF_ACCE_CE : 0);
+ }
/* Add the options. */
tlen += optlen = tcp_addoptions(&to, optp);
-
+ if (to.to_flags & TOF_ACCECNOPT) {
+ if ((to.to_flags & TOF_ACCE_E0) == 0)
+ tp->t_flags2 &= ~TF2_ACO_E0;
+ if ((to.to_flags & TOF_ACCE_E1) == 0)
+ tp->t_flags2 &= ~TF2_ACO_E1;
+ if ((to.to_flags & TOF_ACCE_CE) == 0)
+ tp->t_flags2 &= ~TF2_ACO_CE;
+ }
/* Update m_len in the correct mbuf. */
optm->m_len += optlen;
} else
@@ -2331,6 +2351,11 @@
tcp_log_tcpcbinit(tp);
#endif
tp->t_pacing_rate = -1;
+ if (V_tcp_do_lrd)
+ tp->t_flags |= TF_LRD;
+ tp->t_re0b = 1;
+ tp->t_re1b = 1;
+ tp->t_rceb = 0;
if (tp->t_fb->tfb_tcp_fb_init) {
if ((*tp->t_fb->tfb_tcp_fb_init)(tp)) {
refcount_release(&tp->t_fb->tfb_refcnt);
@@ -2343,8 +2368,6 @@
if (V_tcp_perconn_stats_enable == 1)
tp->t_stats = stats_blob_alloc(V_tcp_perconn_stats_dflt_tpl, 0);
#endif
- if (V_tcp_do_lrd)
- tp->t_flags |= TF_LRD;
return (tp); /* XXX */
}
diff --git a/sys/netinet/tcp_syncache.c b/sys/netinet/tcp_syncache.c
--- a/sys/netinet/tcp_syncache.c
+++ b/sys/netinet/tcp_syncache.c
@@ -475,6 +475,8 @@
int tick = ticks;
char *s;
bool paused;
+
+ log(2, "%s:%d\t\n", __func__, __LINE__);
CURVNET_SET(sch->sch_sc->vnet);
@@ -615,6 +617,7 @@
struct syncache *sc;
struct syncache_head *sch;
char *s = NULL;
+ log(2, "%s:%d\t\n", __func__, __LINE__);
if (syncache_cookiesonly())
return;
@@ -1412,6 +1415,8 @@
int tfo_cookie_valid = 0;
int tfo_response_cookie_valid = 0;
bool locked;
+ log(2, "%s:%d\t\n", __func__, __LINE__);
+
INP_RLOCK_ASSERT(inp); /* listen socket */
KASSERT((tcp_get_flags(th) & (TH_RST|TH_ACK|TH_SYN)) == TH_SYN,
@@ -1817,6 +1822,7 @@
#ifdef INET6
struct ip6_hdr *ip6 = NULL;
#endif
+ log(2, "%s.%d\n", __func__, __LINE__);
NET_EPOCH_ASSERT();
@@ -1956,6 +1962,19 @@
/* don't send cookie again when retransmitting response */
sc->sc_tfo_cookie = NULL;
}
+ if (V_tcp_ecn_option)
+ to.to_flags |= TOF_ACCE_SYN;
+ }
+ if (V_tcp_ecn_option &&
+ (sc->sc_flags & SCF_ECN_MASK) &&
+ ((sc->sc_flags & SCF_ECN_MASK) != SCF_ECN)) {
+ to.to_flags |= TOF_ACCECNOPT;
+ to.to_flags |= TOF_ACCE_E0 |
+ TOF_ACCE_E1 |
+ TOF_ACCE_CE;
+ to.to_ee0b = 1;
+ to.to_ee1b = 1;
+ to.to_eceb = 0;
}
if (sc->sc_flags & SCF_TIMESTAMP) {
to.to_tsval = sc->sc_tsoff + tcp_ts_getticks();
diff --git a/sys/netinet/tcp_timer.c b/sys/netinet/tcp_timer.c
--- a/sys/netinet/tcp_timer.c
+++ b/sys/netinet/tcp_timer.c
@@ -321,6 +321,8 @@
#ifdef TCPDEBUG
int ostate;
+ log(2, "%s:%d\n", __func__, __LINE__);
+
ostate = tp->t_state;
#endif
inp = tp->t_inpcb;
@@ -396,6 +398,8 @@
#ifdef TCPDEBUG
int ostate;
+ log(2, "%s:%d\n", __func__, __LINE__);
+
ostate = tp->t_state;
#endif
inp = tp->t_inpcb;
@@ -540,6 +544,8 @@
#ifdef TCPDEBUG
int ostate;
+ log(2, "%s:%d\n", __func__, __LINE__);
+
ostate = tp->t_state;
#endif
inp = tp->t_inpcb;
@@ -624,6 +630,8 @@
#ifdef TCPDEBUG
int ostate;
+ log(2, "%s:%d\n", __func__, __LINE__);
+
ostate = tp->t_state;
#endif
inp = tp->t_inpcb;
diff --git a/sys/netinet/tcp_var.h b/sys/netinet/tcp_var.h
--- a/sys/netinet/tcp_var.h
+++ b/sys/netinet/tcp_var.h
@@ -251,6 +251,9 @@
int t_loglimit; /* Maximum number of log entries */
uint32_t t_rcep; /* Number of received CE marked pkts */
uint32_t t_scep; /* Synced number of delivered CE pkts */
+ uint32_t t_re0b; /* Number of ECT0 marked data bytes */
+ uint32_t t_re1b; /* Number of ECT1 marked data bytes */
+ uint32_t t_rceb; /* Number of CE marked data bytes */
int64_t t_pacing_rate; /* bytes / sec, -1 => unlimited */
struct tcp_log_stailq t_logs; /* Log buffer */
struct tcp_log_id_node *t_lin;
@@ -570,7 +573,10 @@
#define TF2_ECN_SND_CWR 0x00000040 /* ECN CWR in queue */
#define TF2_ECN_SND_ECE 0x00000080 /* ECN ECE in queue */
#define TF2_ACE_PERMIT 0x00000100 /* Accurate ECN mode */
-#define TF2_FBYTES_COMPLETE 0x00000400 /* We have first bytes in and out */
+#define TF2_ACO_E0 0x00000200 /* EE0 counter changed */
+#define TF2_ACO_E1 0x00000400 /* EE1 counter changed */
+#define TF2_ACO_CE 0x00000800 /* ECE counter changed */
+#define TF2_FBYTES_COMPLETE 0x00001000 /* We have first bytes in and out */
/*
* Structure to hold TCP options that are only used during segment
* processing (in tcp_input), but not held in the tcpcb.
@@ -581,14 +587,21 @@
*/
struct tcpopt {
u_int32_t to_flags; /* which options are present */
-#define TOF_MSS 0x0001 /* maximum segment size */
-#define TOF_SCALE 0x0002 /* window scaling */
-#define TOF_SACKPERM 0x0004 /* SACK permitted */
-#define TOF_TS 0x0010 /* timestamp */
-#define TOF_SIGNATURE 0x0040 /* TCP-MD5 signature option (RFC2385) */
-#define TOF_SACK 0x0080 /* Peer sent SACK option */
-#define TOF_FASTOPEN 0x0100 /* TCP Fast Open (TFO) cookie */
-#define TOF_MAXOPT 0x0200
+#define TOF_MSS 0x00000001 /* maximum segment size */
+#define TOF_SCALE 0x00000002 /* window scaling */
+#define TOF_SACKPERM 0x00000004 /* SACK permitted */
+#define TOF_TS 0x00000010 /* timestamp */
+#define TOF_SIGNATURE 0x00000040 /* TCP-MD5 signature option (RFC2385) */
+#define TOF_SACK 0x00000080 /* Peer sent SACK option */
+#define TOF_FASTOPEN 0x00000100 /* TCP Fast Open (TFO) cookie */
+#define TOF_ACCECNOPT 0x00000200 /* AccECN Option */
+#define TOF_MAXOPT 0x00000400
+ /* Keep internal flags above TOF_MAXOPT */
+#define TOF_ACCE_SYN 0x80000000 /* send empty option */
+#define TOF_ACCE_CE 0x40000000 /* CE counter changed */
+#define TOF_ACCE_E0 0x20000000 /* E0 counter changed */
+#define TOF_ACCE_E1 0x10000000 /* E1 counter changed */
+#define TOF_ACCE_ACKNOW 0x08000000 /* send full option */
u_int32_t to_tsval; /* new timestamp */
u_int32_t to_tsecr; /* reflected timestamp */
u_char *to_sacks; /* pointer to the first SACK blocks */
@@ -598,7 +611,10 @@
u_int8_t to_wscale; /* window scaling */
u_int8_t to_nsacks; /* number of SACK blocks */
u_int8_t to_tfo_len; /* TFO cookie length */
- u_int32_t to_spare; /* UTO */
+ u_int32_t to_ee0b; /* AccECN E0 marked bytes */
+ u_int32_t to_ee1b; /* AccECN E1 marked bytes */
+ u_int32_t to_eceb; /* AccECN CE marked bytes */
+ u_int32_t to_spare; /* UTO */
};
/*
@@ -989,6 +1005,7 @@
VNET_DECLARE(int, tcp_do_sack);
VNET_DECLARE(int, tcp_do_tso);
VNET_DECLARE(int, tcp_ecn_maxretries);
+VNET_DECLARE(int, tcp_ecn_option);
VNET_DECLARE(int, tcp_initcwnd_segments);
VNET_DECLARE(int, tcp_insecure_rst);
VNET_DECLARE(int, tcp_insecure_syn);
@@ -1035,6 +1052,7 @@
#define V_tcp_do_sack VNET(tcp_do_sack)
#define V_tcp_do_tso VNET(tcp_do_tso)
#define V_tcp_ecn_maxretries VNET(tcp_ecn_maxretries)
+#define V_tcp_ecn_option VNET(tcp_ecn_option)
#define V_tcp_initcwnd_segments VNET(tcp_initcwnd_segments)
#define V_tcp_insecure_rst VNET(tcp_insecure_rst)
#define V_tcp_insecure_syn VNET(tcp_insecure_syn)

File Metadata

Mime Type
text/plain
Expires
Thu, Jun 25, 3:13 PM (15 h, 18 m)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
34324259
Default Alt Text
D36303.id111807.diff (20 KB)

Event Timeline