Page Menu
Home
FreeBSD
Search
Configure Global Search
Log In
Files
F140645615
D21011.id60155.diff
No One
Temporary
Actions
View File
Edit File
Delete File
View Transforms
Subscribe
Mute Notifications
Flag For Later
Award Token
Size
27 KB
Referenced Files
None
Subscribers
None
D21011.id60155.diff
View Options
Index: sys/dev/cxgbe/tom/t4_listen.c
===================================================================
--- sys/dev/cxgbe/tom/t4_listen.c
+++ sys/dev/cxgbe/tom/t4_listen.c
@@ -1097,7 +1097,7 @@
static void
pass_accept_req_to_protohdrs(struct adapter *sc, const struct mbuf *m,
- struct in_conninfo *inc, struct tcphdr *th)
+ struct in_conninfo *inc, struct tcphdr *th, uint8_t *iptos)
{
const struct cpl_pass_accept_req *cpl = mtod(m, const void *);
const struct ether_header *eh;
@@ -1114,6 +1114,21 @@
tcp = (const void *)(l3hdr + G_IP_HDR_LEN(hlen));
}
+ /* extract TOS (DiffServ + ECN) byte for AccECN */
+ if (iptos) {
+ if (((struct ip *)l3hdr)->ip_v == IPVERSION) {
+ const struct ip *ip = (const void *)l3hdr;
+ *iptos = ip->ip_tos;
+ }
+#ifdef INET6
+ else
+ if (((struct ip *)l3hdr)->ip_v == (IPV6_VERSION >> 4)) {
+ const struct ip6_hdr *ip6 = (const void *)l3hdr;
+ *iptos = (ntohl(ip6->ip6_flow) >> 20) & 0xff;
+ }
+#endif /* INET */
+ }
+
if (inc) {
bzero(inc, sizeof(*inc));
inc->inc_fport = tcp->th_sport;
@@ -1254,6 +1269,7 @@
struct synq_entry *synqe = NULL;
int reject_reason, v, ntids;
uint16_t vid, l2info;
+ uint8_t iptos;
struct epoch_tracker et;
#ifdef INVARIANTS
unsigned int opcode = G_CPL_OPCODE(be32toh(OPCODE_TID(cpl)));
@@ -1317,7 +1333,7 @@
if (lctx->vnet != ifp->if_vnet)
REJECT_PASS_ACCEPT_REQ(true);
- pass_accept_req_to_protohdrs(sc, m, &inc, &th);
+ pass_accept_req_to_protohdrs(sc, m, &inc, &th, &iptos);
if (inc.inc_flags & INC_ISIPV6) {
/* Don't offload if the ifcap isn't enabled */
@@ -1390,7 +1406,7 @@
* syncache_add. Note that syncache_add releases the pcb lock.
*/
t4opt_to_tcpopt(&cpl->tcpopt, &to);
- toe_syncache_add(&inc, &to, &th, inp, tod, synqe);
+ toe_syncache_add(&inc, &to, &th, inp, tod, synqe, iptos);
if (atomic_load_int(&synqe->ok_to_respond) > 0) {
uint64_t opt0;
@@ -1471,9 +1487,10 @@
struct tcphdr *th, struct tcpopt *to)
{
uint16_t tcp_opt = be16toh(cpl->tcp_opt);
+ uint8_t iptos;
/* start off with the original SYN */
- pass_accept_req_to_protohdrs(sc, synqe->syn, inc, th);
+ pass_accept_req_to_protohdrs(sc, synqe->syn, inc, th, &iptos);
/* modify parts to make it look like the ACK to our SYN|ACK */
th->th_flags = TH_ACK;
Index: sys/netinet/cc/cc_dctcp.c
===================================================================
--- sys/netinet/cc/cc_dctcp.c
+++ sys/netinet/cc/cc_dctcp.c
@@ -108,7 +108,7 @@
dctcp_data = ccv->cc_data;
- if (CCV(ccv, t_flags) & TF_ECN_PERMIT) {
+ if (CCV(ccv, t_flags2) & TF2_ECN_PERMIT) {
/*
* DCTCP doesn't treat receipt of ECN marked packet as a
* congestion event. Thus, DCTCP always executes the ACK
@@ -276,8 +276,8 @@
dctcp_data->ece_curr = 1;
break;
case CC_RTO:
- if (CCV(ccv, t_flags) & TF_ECN_PERMIT) {
- CCV(ccv, t_flags) |= TF_ECN_SND_CWR;
+ if (CCV(ccv, t_flags2) & TF2_ECN_PERMIT) {
+ CCV(ccv, t_flags2) |= TF2_ECN_SND_CWR;
dctcp_update_alpha(ccv);
dctcp_data->save_sndnxt += CCV(ccv, t_maxseg);
dctcp_data->num_cong_events++;
@@ -293,7 +293,7 @@
dctcp_data = ccv->cc_data;
- if (CCV(ccv, t_flags) & TF_ECN_PERMIT)
+ if (CCV(ccv, t_flags2) & TF2_ECN_PERMIT)
dctcp_data->save_sndnxt = CCV(ccv, snd_nxt);
}
@@ -305,7 +305,7 @@
{
dctcp_cc_algo.post_recovery = newreno_cc_algo.post_recovery;
- if (CCV(ccv, t_flags) & TF_ECN_PERMIT)
+ if (CCV(ccv, t_flags2) & TF2_ECN_PERMIT)
dctcp_update_alpha(ccv);
}
@@ -336,12 +336,12 @@
if (!dctcp_data->ce_prev && (ccflag & CCF_DELACK))
delay_ack = 0;
dctcp_data->ce_prev = 1;
- CCV(ccv, t_flags) |= TF_ECN_SND_ECE;
+ CCV(ccv, t_flags2) |= TF2_ECN_SND_ECE;
} else {
if (dctcp_data->ce_prev && (ccflag & CCF_DELACK))
delay_ack = 0;
dctcp_data->ce_prev = 0;
- CCV(ccv, t_flags) &= ~TF_ECN_SND_ECE;
+ CCV(ccv, t_flags2) &= ~TF2_ECN_SND_ECE;
}
/* DCTCP sets delayed ack when this segment sets the CWR flag. */
Index: sys/netinet/tcp.h
===================================================================
--- sys/netinet/tcp.h
+++ sys/netinet/tcp.h
@@ -71,8 +71,10 @@
#define TH_URG 0x20
#define TH_ECE 0x40
#define TH_CWR 0x80
+#define TH_AE 0x100 /* maps into th_x2 */
+
#define TH_FLAGS (TH_FIN|TH_SYN|TH_RST|TH_PUSH|TH_ACK|TH_URG|TH_ECE|TH_CWR)
-#define PRINT_TH_FLAGS "\20\1FIN\2SYN\3RST\4PUSH\5ACK\6URG\7ECE\10CWR"
+#define PRINT_TH_FLAGS "\20\1FIN\2SYN\3RST\4PUSH\5ACK\6URG\7ECE\10CWR\11AE"
u_short th_win; /* window */
u_short th_sum; /* checksum */
Index: sys/netinet/tcp_input.c
===================================================================
--- sys/netinet/tcp_input.c
+++ sys/netinet/tcp_input.c
@@ -390,16 +390,16 @@
case CC_NDUPACK:
if (!IN_FASTRECOVERY(tp->t_flags)) {
tp->snd_recover = tp->snd_max;
- if (tp->t_flags & TF_ECN_PERMIT)
- tp->t_flags |= TF_ECN_SND_CWR;
+ if (tp->t_flags2 & TF2_ECN_PERMIT)
+ tp->t_flags2 |= TF2_ECN_SND_CWR;
}
break;
case CC_ECN:
if (!IN_CONGRECOVERY(tp->t_flags)) {
TCPSTAT_INC(tcps_ecn_rcwnd);
tp->snd_recover = tp->snd_max;
- if (tp->t_flags & TF_ECN_PERMIT)
- tp->t_flags |= TF_ECN_SND_CWR;
+ if (tp->t_flags2 & TF2_ECN_PERMIT)
+ tp->t_flags2 |= TF2_ECN_SND_CWR;
}
break;
case CC_RTO:
@@ -1325,7 +1325,7 @@
#endif
TCP_PROBE3(debug__input, tp, th, m);
tcp_dooptions(&to, optp, optlen, TO_SYN);
- if (syncache_add(&inc, &to, th, inp, &so, m, NULL, NULL))
+ if (syncache_add(&inc, &to, th, inp, &so, m, NULL, NULL, iptos))
goto tfo_socket_result;
/*
@@ -1575,12 +1575,9 @@
/*
* TCP ECN processing.
*/
- if (tp->t_flags & TF_ECN_PERMIT) {
- if (thflags & TH_CWR)
- tp->t_flags &= ~TF_ECN_SND_ECE;
+ if (tp->t_flags2 & TF2_ECN_PERMIT) {
switch (iptos & IPTOS_ECN_MASK) {
case IPTOS_ECN_CE:
- tp->t_flags |= TF_ECN_SND_ECE;
TCPSTAT_INC(tcps_ecn_ce);
break;
case IPTOS_ECN_ECT0:
@@ -1591,11 +1588,29 @@
break;
}
+ char d_ace;
+
+ if (tp->t_flags2 & TF2_ACE_PERMIT) {
+ d_ace = (tcp_get_ace(th) + 8 - (tp->s_cep & 0x07)) & 0x07;
+ tp->s_cep += d_ace;
+ if ((iptos & IPTOS_ECN_MASK) == IPTOS_ECN_CE)
+ tp->r_cep += 1;
+ } else {
+ if (thflags & TH_CWR)
+ tp->t_flags2 &= ~TF2_ECN_SND_ECE;
+ if ((iptos & IPTOS_ECN_MASK) == IPTOS_ECN_CE)
+ tp->t_flags2 |= TF2_ECN_SND_ECE;
+ }
+
/* Process a packet differently from RFC3168. */
cc_ecnpkt_handler(tp, th, iptos);
- /* Congestion experienced. */
- if (thflags & TH_ECE) {
+ /* Congestion experienced.
+ * With ACE, process a cong signal with ACE changed,
+ * for legacy ECN, whenever ECE is received
+ */
+ if ((!(tp->t_flags2 & TF2_ACE_PERMIT) && (thflags & TH_ECE)) ||
+ ((tp->t_flags2 & TF2_ACE_PERMIT) && (d_ace != 0))) {
cc_cong_signal(tp, th, CC_ECN);
}
}
@@ -2009,10 +2024,70 @@
if (((thflags & (TH_CWR | TH_ECE)) == TH_ECE) &&
V_tcp_do_ecn) {
- tp->t_flags |= TF_ECN_PERMIT;
+ tp->t_flags2 |= TF2_ECN_PERMIT;
TCPSTAT_INC(tcps_ecn_shs);
}
+ /* decoding Accurate ECN according to table in section 3.1.1 */
+ if ((V_tcp_do_ecn == 3) ||
+ (V_tcp_do_ecn == 4)) {
+ int xflags;
+ xflags = ((th->th_x2 << 8) | thflags) & (TH_AE|TH_CWR|TH_ECE);
+ switch (xflags) {
+ /* non-ECT SYN */
+ case (0|TH_CWR|0):
+ tp->t_flags2 |= TF2_ACE_PERMIT;
+ tp->s_cep = 5;
+ TCPSTAT_INC(tcps_ecn_shs);
+ TCPSTAT_INC(tcps_ace_nect);
+ break;
+ /* ECT1 SYN */
+ case (0|TH_CWR|TH_ECE):
+ tp->t_flags2 |= TF2_ACE_PERMIT;
+ tp->s_cep = 5;
+ TCPSTAT_INC(tcps_ecn_shs);
+ TCPSTAT_INC(tcps_ace_ect1);
+ break;
+ /* ECT0 SYN */
+ case (TH_AE|0|0):
+ tp->t_flags2 |= TF2_ACE_PERMIT;
+ tp->s_cep = 5;
+ TCPSTAT_INC(tcps_ecn_shs);
+ TCPSTAT_INC(tcps_ace_ect0);
+ break;
+ /* CE SYN */
+ case (TH_AE|TH_CWR|0):
+ tp->t_flags2 |= TF2_ACE_PERMIT;
+ tp->s_cep = 6;
+ TCPSTAT_INC(tcps_ecn_shs);
+ TCPSTAT_INC(tcps_ace_nect);
+ break;
+ default:
+ break;
+ }
+ /*
+ * Set the AccECN Codepoints on
+ * the outgoing ACK to the SYN,ACK
+ * according to table 3 in the
+ * AccECN draft
+ */
+ switch (iptos & IPTOS_ECN_MASK) {
+ /* non-ECT SYN,ACK */
+ case (IPTOS_ECN_NOTECT):
+ tp->r_cep = 0b010;
+ break;
+ case (IPTOS_ECN_ECT0):
+ tp->r_cep = 0b100;
+ break;
+ case (IPTOS_ECN_ECT1):
+ tp->r_cep = 0b011;
+ break;
+ case (IPTOS_ECN_CE):
+ tp->r_cep = 0b110;
+ break;
+ }
+ }
+
/*
* Received <SYN,ACK> in SYN_SENT[*] state.
* Transitions:
Index: sys/netinet/tcp_output.c
===================================================================
--- sys/netinet/tcp_output.c
+++ sys/netinet/tcp_output.c
@@ -1110,9 +1110,20 @@
} else
flags |= TH_ECE|TH_CWR;
}
-
+ /*
+ * Send an Accurate ECN setup SYN packet
+ */
+ if (tp->t_state == TCPS_SYN_SENT && V_tcp_do_ecn == 3) {
+ if (tp->t_rxtshift >= 1) {
+ if (tp->t_rxtshift <= V_tcp_ecn_maxretries)
+ flags |= TH_ECE|TH_CWR|TH_AE;
+ } else
+ flags |= TH_ECE|TH_CWR|TH_AE;
+ }
+
if (tp->t_state == TCPS_ESTABLISHED &&
- (tp->t_flags & TF_ECN_PERMIT)) {
+ ((tp->t_flags2 & TF2_ECN_PERMIT) ||
+ (tp->t_flags2 & TF2_ACE_PERMIT))) {
/*
* If the peer has ECN, mark data packets with
* ECN capable transmission (ECT).
@@ -1128,18 +1139,45 @@
ip->ip_tos |= IPTOS_ECN_ECT0;
TCPSTAT_INC(tcps_ecn_ect0);
}
-
+
/*
* Reply with proper ECN notifications.
*/
- if (tp->t_flags & TF_ECN_SND_CWR) {
- flags |= TH_CWR;
- tp->t_flags &= ~TF_ECN_SND_CWR;
- }
- if (tp->t_flags & TF_ECN_SND_ECE)
- flags |= TH_ECE;
+ if (tp->t_flags2 & TF2_ACE_PERMIT) {
+ if (tp->r_cep & 0x01)
+ flags |= TH_ECE;
+ else
+ flags &= ~TH_ECE;
+ if (tp->r_cep & 0x02)
+ flags |= TH_CWR;
+ else
+ flags &= ~TH_CWR;
+ if (tp->r_cep & 0x04)
+ flags |= TH_AE;
+ else
+ flags &= ~TH_AE;
+ if (!(tp->t_flags2 & TF2_ECN_PERMIT)) {
+ /*
+ * here we process the final
+ * ACK of the 3WHS
+ */
+ if (tp->r_cep == 0b110) {
+ tp->r_cep = 6;
+ } else {
+ tp->r_cep = 5;
+ }
+ tp->t_flags2 |= TF2_ECN_PERMIT;
+ }
+ } else {
+ if (tp->t_flags2 & TF2_ECN_SND_CWR) {
+ flags |= TH_CWR;
+ tp->t_flags2 &= ~TF2_ECN_SND_CWR;
+ }
+ if (tp->t_flags2 & TF2_ECN_SND_ECE)
+ flags |= TH_ECE;
+ }
}
-
+
/*
* If we are doing retransmissions, then snd_nxt will
* not reflect the first unsent octet. For ACK only
@@ -1169,7 +1207,9 @@
bcopy(opt, th + 1, optlen);
th->th_off = (sizeof (struct tcphdr) + optlen) >> 2;
}
- th->th_flags = flags;
+ th->th_flags = (flags & (TH_CWR|TH_ECE|TH_URG|TH_ACK|
+ TH_PUSH|TH_RST|TH_SYN|TH_FIN));
+ th->th_x2 = (flags & (TH_AE)) >> 8;
/*
* Calculate receive window. Don't shrink window,
* but avoid silly window syndrome.
Index: sys/netinet/tcp_stacks/rack.c
===================================================================
--- sys/netinet/tcp_stacks/rack.c
+++ sys/netinet/tcp_stacks/rack.c
@@ -1353,16 +1353,16 @@
rack->r_ctl.rc_prr_sndcnt = tp->t_maxseg;
rack->r_ctl.rc_prr_recovery_fs = tp->snd_max - tp->snd_una;
tp->snd_recover = tp->snd_max;
- if (tp->t_flags & TF_ECN_PERMIT)
- tp->t_flags |= TF_ECN_SND_CWR;
+ if (tp->t_flags2 & TF2_ECN_PERMIT)
+ tp->t_flags2 |= TF2_ECN_SND_CWR;
}
break;
case CC_ECN:
if (!IN_CONGRECOVERY(tp->t_flags)) {
TCPSTAT_INC(tcps_ecn_rcwnd);
tp->snd_recover = tp->snd_max;
- if (tp->t_flags & TF_ECN_PERMIT)
- tp->t_flags |= TF_ECN_SND_CWR;
+ if (tp->t_flags2 & TF2_ECN_PERMIT)
+ tp->t_flags2 |= TF2_ECN_SND_CWR;
}
break;
case CC_RTO:
@@ -5265,7 +5265,7 @@
if (((thflags & (TH_CWR | TH_ECE)) == TH_ECE) &&
V_tcp_do_ecn) {
- tp->t_flags |= TF_ECN_PERMIT;
+ tp->t_flags2 |= TF2_ECN_PERMIT;
TCPSTAT_INC(tcps_ecn_shs);
}
if (SEQ_GT(th->th_ack, tp->snd_una)) {
@@ -6602,12 +6602,12 @@
* TCP ECN processing. XXXJTL: If we ever use ECN, we need to move
* this to occur after we've validated the segment.
*/
- if (tp->t_flags & TF_ECN_PERMIT) {
+ if (tp->t_flags2 & TF2_ECN_PERMIT) {
if (thflags & TH_CWR)
- tp->t_flags &= ~TF_ECN_SND_ECE;
+ tp->t_flags2 &= ~TF2_ECN_SND_ECE;
switch (iptos & IPTOS_ECN_MASK) {
case IPTOS_ECN_CE:
- tp->t_flags |= TF_ECN_SND_ECE;
+ tp->t_flags2 |= TF2_ECN_SND_ECE;
TCPSTAT_INC(tcps_ecn_ce);
break;
case IPTOS_ECN_ECT0:
@@ -8152,7 +8152,7 @@
flags |= TH_ECE | TH_CWR;
}
if (tp->t_state == TCPS_ESTABLISHED &&
- (tp->t_flags & TF_ECN_PERMIT)) {
+ (tp->t_flags2 & TF2_ECN_PERMIT)) {
/*
* If the peer has ECN, mark data packets with ECN capable
* transmission (ECT). Ignore pure ack packets,
@@ -8171,11 +8171,11 @@
/*
* Reply with proper ECN notifications.
*/
- if (tp->t_flags & TF_ECN_SND_CWR) {
+ if (tp->t_flags2 & TF2_ECN_SND_CWR) {
flags |= TH_CWR;
- tp->t_flags &= ~TF_ECN_SND_CWR;
+ tp->t_flags2 &= ~TF2_ECN_SND_CWR;
}
- if (tp->t_flags & TF_ECN_SND_ECE)
+ if (tp->t_flags2 & TF2_ECN_SND_ECE)
flags |= TH_ECE;
}
/*
Index: sys/netinet/tcp_subr.c
===================================================================
--- sys/netinet/tcp_subr.c
+++ sys/netinet/tcp_subr.c
@@ -3167,7 +3167,7 @@
}
sp = s + strlen(s);
if (th)
- sprintf(sp, " tcpflags 0x%b", th->th_flags, PRINT_TH_FLAGS);
+ sprintf(sp, " tcpflags 0x%b", (th->th_x2 << 8) | th->th_flags, PRINT_TH_FLAGS);
if (*(s + size - 1) != '\0')
panic("%s: string too long", __func__);
return (s);
@@ -3244,3 +3244,16 @@
if (inp->inp_socket == NULL)
xt->xt_inp.xi_socket.xso_protocol = IPPROTO_TCP;
}
+
+int
+tcp_get_ace(struct tcphdr *th)
+{
+ int ace = 0;
+ if (th->th_flags & TH_ECE)
+ ace += 1;
+ if (th->th_flags & TH_CWR)
+ ace += 2;
+ if (th->th_x2 & (TH_AE >> 8))
+ ace += 4;
+ return ace;
+}
\ No newline at end of file
Index: sys/netinet/tcp_syncache.h
===================================================================
--- sys/netinet/tcp_syncache.h
+++ sys/netinet/tcp_syncache.h
@@ -45,7 +45,7 @@
struct tcphdr *, struct socket **, struct mbuf *);
int syncache_add(struct in_conninfo *, struct tcpopt *,
struct tcphdr *, struct inpcb *, struct socket **, struct mbuf *,
- void *, void *);
+ void *, void *, uint8_t);
void syncache_chkrst(struct in_conninfo *, struct tcphdr *, struct mbuf *);
void syncache_badack(struct in_conninfo *);
int syncache_pcblist(struct sysctl_req *req, int max_pcbs, int *pcbs_exported);
@@ -90,6 +90,10 @@
#define SCF_SIGNATURE 0x20 /* send MD5 digests */
#define SCF_SACK 0x80 /* send SACK option */
#define SCF_ECN 0x100 /* send ECN setup packet */
+#define SCF_ACE_N 0x200 /* send ACE non-ECT setup */
+#define SCF_ACE_0 0x400 /* send ACE ECT0 setup */
+#define SCF_ACE_1 0x800 /* send ACE ECT1 setup */
+#define SCF_ACE_CE 0x1000 /* send ACE CE setup */
struct syncache_head {
struct mtx sch_mtx;
Index: sys/netinet/tcp_syncache.c
===================================================================
--- sys/netinet/tcp_syncache.c
+++ sys/netinet/tcp_syncache.c
@@ -964,7 +964,20 @@
}
if (sc->sc_flags & SCF_ECN)
- tp->t_flags |= TF_ECN_PERMIT;
+ tp->t_flags2 |= TF2_ECN_PERMIT;
+
+ if ((sc->sc_flags & SCF_ACE_N) ||
+ (sc->sc_flags & SCF_ACE_0) ||
+ (sc->sc_flags & SCF_ACE_1) ||
+ (sc->sc_flags & SCF_ACE_CE)) {
+ tp->t_flags2 |= TF2_ACE_PERMIT;
+ tp->s_cep = 5;
+ tp->r_cep = 5;
+ if (sc->sc_flags & SCF_ACE_CE) {
+ tp->s_cep=6;
+ tp->r_cep=6;
+ }
+ }
/*
* Set up MSS and get cached values from tcp_hostcache.
@@ -1309,7 +1322,7 @@
int
syncache_add(struct in_conninfo *inc, struct tcpopt *to, struct tcphdr *th,
struct inpcb *inp, struct socket **lsop, struct mbuf *m, void *tod,
- void *todctx)
+ void *todctx, uint8_t tos)
{
struct tcpcb *tp;
struct socket *so;
@@ -1612,8 +1625,62 @@
sc->sc_peer_mss = to->to_mss; /* peer mss may be zero */
if (ltflags & TF_NOOPT)
sc->sc_flags |= SCF_NOOPT;
- if ((th->th_flags & (TH_ECE|TH_CWR)) && V_tcp_do_ecn)
- sc->sc_flags |= SCF_ECN;
+ /* ECN Handshake */
+ if (V_tcp_do_ecn) {
+ int xflags;
+ xflags = ((th->th_x2 << 8) | th->th_flags) & (TH_AE|TH_CWR|TH_ECE);
+ switch (xflags) {
+ /* no ECN */
+ case (0|0|0):
+ break;
+ /* legacy ECN */
+ case (0|TH_CWR|TH_ECE):
+ sc->sc_flags |= SCF_ECN;
+ break;
+ /* Accurate ECN */
+ case (TH_AE|TH_CWR|TH_ECE):
+ if ((V_tcp_do_ecn == 3) ||
+ (V_tcp_do_ecn == 4)) {
+
+ switch (tos & IPTOS_ECN_MASK) {
+ case IPTOS_ECN_CE:
+ sc->sc_flags |= SCF_ACE_CE;
+ break;
+ case IPTOS_ECN_ECT0:
+ sc->sc_flags |= SCF_ACE_0;
+ break;
+ case IPTOS_ECN_ECT1:
+ sc->sc_flags |= SCF_ACE_1;
+ break;
+ case IPTOS_ECN_NOTECT:
+ sc->sc_flags |= SCF_ACE_N;
+ break;
+ }
+ } else
+ sc->sc_flags |= SCF_ECN;
+ break;
+ /* Default Case (section 3.1.2) */
+ default:
+ if ((V_tcp_do_ecn == 3) ||
+ (V_tcp_do_ecn == 4)) {
+ switch (tos & IPTOS_ECN_MASK) {
+ case IPTOS_ECN_CE:
+ sc->sc_flags |= SCF_ACE_CE;
+ break;
+ case IPTOS_ECN_ECT0:
+ sc->sc_flags |= SCF_ACE_0;
+ break;
+ case IPTOS_ECN_ECT1:
+ sc->sc_flags |= SCF_ACE_1;
+ break;
+ case IPTOS_ECN_NOTECT:
+ sc->sc_flags |= SCF_ACE_N;
+ break;
+ }
+ }
+ break;
+ }
+ }
if (V_tcp_syncookies)
sc->sc_iss = syncookie_generate(sch, sc);
@@ -1787,6 +1854,28 @@
TCPSTAT_INC(tcps_ecn_shs);
}
+ if ((flags & TH_SYN) && (sc->sc_flags & SCF_ACE_N)) {
+ th->th_flags |= TH_CWR;
+ TCPSTAT_INC(tcps_ecn_shs);
+ TCPSTAT_INC(tcps_ace_nect);
+ }
+ if ((flags & TH_SYN) && (sc->sc_flags & SCF_ACE_0)) {
+ th->th_x2 |= (TH_AE >> 8);
+ TCPSTAT_INC(tcps_ecn_shs);
+ TCPSTAT_INC(tcps_ace_ect0);
+ }
+ if ((flags & TH_SYN) && (sc->sc_flags & SCF_ACE_1)) {
+ th->th_flags |= (TH_ECE | TH_CWR);
+ TCPSTAT_INC(tcps_ecn_shs);
+ TCPSTAT_INC(tcps_ace_ect1);
+ }
+ if ((flags & TH_SYN) && (sc->sc_flags & SCF_ACE_CE)) {
+ th->th_flags |= TH_CWR;
+ th->th_x2 |= (TH_AE >> 8);
+ TCPSTAT_INC(tcps_ecn_shs);
+ TCPSTAT_INC(tcps_ace_ce);
+ }
+
/* Tack on the TCP options. */
if ((sc->sc_flags & SCF_NOOPT) == 0) {
to.to_flags = 0;
Index: sys/netinet/tcp_usrreq.c
===================================================================
--- sys/netinet/tcp_usrreq.c
+++ sys/netinet/tcp_usrreq.c
@@ -1517,7 +1517,7 @@
ti->tcpi_snd_wscale = tp->snd_scale;
ti->tcpi_rcv_wscale = tp->rcv_scale;
}
- if (tp->t_flags & TF_ECN_PERMIT)
+ if (tp->t_flags2 & TF2_ECN_PERMIT)
ti->tcpi_options |= TCPI_OPT_ECN;
ti->tcpi_rto = tp->t_rxtcur * tick;
@@ -2484,6 +2484,10 @@
db_printf("%sTF_NOPUSH", comma ? ", " : "");
comma = 1;
}
+ if (t_flags & TF_PREVVALID) {
+ db_printf("%sTF_PREVVALID", comma ? ", " : "");
+ comma = 1;
+ }
if (t_flags & TF_MORETOCOME) {
db_printf("%sTF_MORETOCOME", comma ? ", " : "");
comma = 1;
@@ -2512,6 +2516,10 @@
db_printf("%sTF_WASFRECOVERY", comma ? ", " : "");
comma = 1;
}
+ if (t_flags & TF_WASCRECOVERY) {
+ db_printf("%sTF_WASCRECOVERY", comma ? ", " : "");
+ comma = 1;
+ }
if (t_flags & TF_SIGNATURE) {
db_printf("%sTF_SIGNATURE", comma ? ", " : "");
comma = 1;
@@ -2524,8 +2532,8 @@
db_printf("%sTF_TSO", comma ? ", " : "");
comma = 1;
}
- if (t_flags & TF_ECN_PERMIT) {
- db_printf("%sTF_ECN_PERMIT", comma ? ", " : "");
+ if (t_flags & TF_TOE) {
+ db_printf("%sTF_TOE", comma ? ", " : "");
comma = 1;
}
if (t_flags & TF_FASTOPEN) {
@@ -2534,6 +2542,50 @@
}
}
+static void
+db_print_tflags2(u_int t_flags2)
+{
+ int comma;
+
+ comma = 0;
+ if (t_flags2 & TF2_PLPMTU_BLACKHOLE) {
+ db_printf("%sTF2_PLPMTU_BLACKHOLE", comma ? ", " : "");
+ comma = 1;
+ }
+ if (t_flags2 & TF2_PLPMTU_PMTUD) {
+ db_printf("%sTF2_PLPMTU_PMTUD", comma ? ", " : "");
+ comma = 1;
+ }
+ if (t_flags2 & TF2_PLPMTU_MAXSEGSNT) {
+ db_printf("%sTF2_PLPMTU_MAXSEGSNT", comma ? ", " : "");
+ comma = 1;
+ }
+ if (t_flags2 & TF2_LOG_AUTO) {
+ db_printf("%sTF2_LOG_AUTO", comma ? ", " : "");
+ comma = 1;
+ }
+ if (t_flags2 & TF2_DROP_AF_DATA) {
+ db_printf("%sTF2_DROP_AF_DATA", comma ? ", " : "");
+ comma = 1;
+ }
+ if (t_flags2 & TF2_ECN_PERMIT) {
+ db_printf("%sTF2_ECN_PERMIT", comma ? ", " : "");
+ comma = 1;
+ }
+ if (t_flags2 & TF2_ECN_SND_CWR) {
+ db_printf("%sTF2_ECN_SND_CWR", comma ? ", " : "");
+ comma = 1;
+ }
+ if (t_flags2 & TF2_ECN_SND_ECE) {
+ db_printf("%sTF2_ECN_SND_ECE", comma ? ", " : "");
+ comma = 1;
+ }
+ if (t_flags2 & TF2_ACE_PERMIT) {
+ db_printf("%sTF2_ACE_PERMIT", comma ? ", " : "");
+ comma = 1;
+ }
+}
+
static void
db_print_toobflags(char t_oobflags)
{
@@ -2581,6 +2633,12 @@
db_print_tflags(tp->t_flags);
db_printf(")\n");
+ db_print_indent(indent);
+ db_printf("t_flags2: 0x%x (", tp->t_flags2);
+ db_print_tflags2(tp->t_flags2);
+ db_printf(")\n");
+
+
db_print_indent(indent);
db_printf("snd_una: 0x%08x snd_max: 0x%08x snd_nxt: x0%08x\n",
tp->snd_una, tp->snd_max, tp->snd_nxt);
Index: sys/netinet/tcp_var.h
===================================================================
--- sys/netinet/tcp_var.h
+++ sys/netinet/tcp_var.h
@@ -205,6 +205,8 @@
u_int t_keepcnt; /* number of keepalives before close */
int t_dupacks; /* consecutive dup acks recd */
int t_lognum; /* Number of log entries */
+ uint32_t r_cep; /* Number of received CE marked packets */
+ uint32_t s_cep; /* Synced number of delivered CE packets */
struct tcp_log_stailq t_logs; /* Log buffer */
struct tcp_log_id_node *t_lin;
struct tcp_log_id_bucket *t_lib;
@@ -306,33 +308,30 @@
/*
* Flags and utility macros for the t_flags field.
*/
-#define TF_ACKNOW 0x000001 /* ack peer immediately */
-#define TF_DELACK 0x000002 /* ack, but try to delay it */
-#define TF_NODELAY 0x000004 /* don't delay packets to coalesce */
-#define TF_NOOPT 0x000008 /* don't use tcp options */
-#define TF_SENTFIN 0x000010 /* have sent FIN */
-#define TF_REQ_SCALE 0x000020 /* have/will request window scaling */
-#define TF_RCVD_SCALE 0x000040 /* other side has requested scaling */
-#define TF_REQ_TSTMP 0x000080 /* have/will request timestamps */
-#define TF_RCVD_TSTMP 0x000100 /* a timestamp was received in SYN */
-#define TF_SACK_PERMIT 0x000200 /* other side said I could SACK */
-#define TF_NEEDSYN 0x000400 /* send SYN (implicit state) */
-#define TF_NEEDFIN 0x000800 /* send FIN (implicit state) */
-#define TF_NOPUSH 0x001000 /* don't push */
-#define TF_PREVVALID 0x002000 /* saved values for bad rxmit valid */
-#define TF_MORETOCOME 0x010000 /* More data to be appended to sock */
-#define TF_LQ_OVERFLOW 0x020000 /* listen queue overflow */
-#define TF_LASTIDLE 0x040000 /* connection was previously idle */
-#define TF_RXWIN0SENT 0x080000 /* sent a receiver win 0 in response */
-#define TF_FASTRECOVERY 0x100000 /* in NewReno Fast Recovery */
-#define TF_WASFRECOVERY 0x200000 /* was in NewReno Fast Recovery */
-#define TF_SIGNATURE 0x400000 /* require MD5 digests (RFC2385) */
-#define TF_FORCEDATA 0x800000 /* force out a byte */
-#define TF_TSO 0x1000000 /* TSO enabled on this connection */
-#define TF_TOE 0x2000000 /* this connection is offloaded */
-#define TF_ECN_PERMIT 0x4000000 /* connection ECN-ready */
-#define TF_ECN_SND_CWR 0x8000000 /* ECN CWR in queue */
-#define TF_ECN_SND_ECE 0x10000000 /* ECN ECE in queue */
+#define TF_ACKNOW 0x00000001 /* ack peer immediately */
+#define TF_DELACK 0x00000002 /* ack, but try to delay it */
+#define TF_NODELAY 0x00000004 /* don't delay packets to coalesce */
+#define TF_NOOPT 0x00000008 /* don't use tcp options */
+#define TF_SENTFIN 0x00000010 /* have sent FIN */
+#define TF_REQ_SCALE 0x00000020 /* have/will request window scaling */
+#define TF_RCVD_SCALE 0x00000040 /* other side has requested scaling */
+#define TF_REQ_TSTMP 0x00000080 /* have/will request timestamps */
+#define TF_RCVD_TSTMP 0x00000100 /* a timestamp was received in SYN */
+#define TF_SACK_PERMIT 0x00000200 /* other side said I could SACK */
+#define TF_NEEDSYN 0x00000400 /* send SYN (implicit state) */
+#define TF_NEEDFIN 0x00000800 /* send FIN (implicit state) */
+#define TF_NOPUSH 0x00001000 /* don't push */
+#define TF_PREVVALID 0x00002000 /* saved values for bad rxmit valid */
+#define TF_MORETOCOME 0x00010000 /* More data to be appended to sock */
+#define TF_LQ_OVERFLOW 0x00020000 /* listen queue overflow */
+#define TF_LASTIDLE 0x00040000 /* connection was previously idle */
+#define TF_RXWIN0SENT 0x00080000 /* sent a receiver win 0 in response */
+#define TF_FASTRECOVERY 0x00100000 /* in NewReno Fast Recovery */
+#define TF_WASFRECOVERY 0x00200000 /* was in NewReno Fast Recovery */
+#define TF_SIGNATURE 0x00400000 /* require MD5 digests (RFC2385) */
+#define TF_FORCEDATA 0x00800000 /* force out a byte */
+#define TF_TSO 0x01000000 /* TSO enabled on this connection */
+#define TF_TOE 0x02000000 /* this connection is offloaded */
#define TF_CONGRECOVERY 0x20000000 /* congestion recovery mode */
#define TF_WASCRECOVERY 0x40000000 /* was in congestion recovery */
#define TF_FASTOPEN 0x80000000 /* TCP Fast Open indication */
@@ -370,7 +369,11 @@
#define TF2_PLPMTU_PMTUD 0x00000002 /* Allowed to attempt PLPMTUD. */
#define TF2_PLPMTU_MAXSEGSNT 0x00000004 /* Last seg sent was full seg. */
#define TF2_LOG_AUTO 0x00000008 /* Session is auto-logging. */
-#define TF2_DROP_AF_DATA 0x00000010 /* Drop after all data ack'd */
+#define TF2_DROP_AF_DATA 0x00000010 /* Drop after all data ack'd */
+#define TF2_ECN_PERMIT 0x00000020 /* connection ECN-ready */
+#define TF2_ECN_SND_CWR 0x00000040 /* ECN CWR in queue */
+#define TF2_ECN_SND_ECE 0x00000080 /* ECN ECE in queue */
+#define TF2_ACE_PERMIT 0x00000100 /* Accurate ECN mode */
/*
* Structure to hold TCP options that are only used during segment
@@ -610,7 +613,12 @@
uint64_t tcps_pmtud_blackhole_activated_min_mss; /* BH at min MSS Count */
uint64_t tcps_pmtud_blackhole_failed; /* Black Hole Failure Count */
- uint64_t _pad[12]; /* 6 UTO, 6 TBD */
+ /* Accurate ECN Handshake stats */
+ uint64_t tcps_ace_nect; /* ACE SYN packet with Non-ECT */
+ uint64_t tcps_ace_ect1; /* ACE SYN packet with ECT1 */
+ uint64_t tcps_ace_ect0; /* ACE SYN packet with ECT0 */
+ uint64_t tcps_ace_ce; /* ACE SYN packet with CE */
+ uint64_t _pad[8]; /* 6 UTO, 6 TBD */
};
#define tcps_rcvmemdrop tcps_rcvreassfull /* compat */
@@ -946,6 +954,7 @@
tcp_m_copym(struct mbuf *m, int32_t off0, int32_t *plen,
int32_t seglimit, int32_t segsize, struct sockbuf *sb);
+int tcp_get_ace(struct tcphdr *th);
static inline void
tcp_fields_to_host(struct tcphdr *th)
Index: sys/netinet/toecore.h
===================================================================
--- sys/netinet/toecore.h
+++ sys/netinet/toecore.h
@@ -130,7 +130,7 @@
void toe_connect_failed(struct toedev *, struct inpcb *, int);
void toe_syncache_add(struct in_conninfo *, struct tcpopt *, struct tcphdr *,
- struct inpcb *, void *, void *);
+ struct inpcb *, void *, void *, uint8_t);
int toe_syncache_expand(struct in_conninfo *, struct tcpopt *, struct tcphdr *,
struct socket **);
Index: sys/netinet/toecore.c
===================================================================
--- sys/netinet/toecore.c
+++ sys/netinet/toecore.c
@@ -337,13 +337,13 @@
void
toe_syncache_add(struct in_conninfo *inc, struct tcpopt *to, struct tcphdr *th,
- struct inpcb *inp, void *tod, void *todctx)
+ struct inpcb *inp, void *tod, void *todctx, uint8_t tos)
{
struct socket *lso = inp->inp_socket;
INP_WLOCK_ASSERT(inp);
- syncache_add(inc, to, th, inp, &lso, NULL, tod, todctx);
+ syncache_add(inc, to, th, inp, &lso, NULL, tod, todctx, tos);
}
int
Index: usr.bin/netstat/inet.c
===================================================================
--- usr.bin/netstat/inet.c
+++ usr.bin/netstat/inet.c
@@ -763,6 +763,15 @@
p(tcps_ecn_rcwnd, "\t{:congestion-reductions/%ju} "
"{N:/time%s ECN reduced the congestion window}\n");
+ p(tcps_ace_nect, "\t{:ace-nonect-syn/%ju} "
+ "{N:/ACE SYN packet%s with Non-ECT}\n");
+ p(tcps_ace_ect0, "\t{:ace-ect0-syn/%ju} "
+ "{N:/ACE SYN packet%s with ECT0}\n");
+ p(tcps_ace_ect1, "\t{:ace-ect1-syn/%ju} "
+ "{N:/ACE SYN packet%s with ECT1}\n");
+ p(tcps_ace_ce, "\t{:ace-ce-syn/%ju} "
+ "{N:/ACE SYN packet%s with CE}\n");
+
xo_close_container("ecn");
xo_open_container("tcp-signature");
p(tcps_sig_rcvgoodsig, "\t{:received-good-signature/%ju} "
File Metadata
Details
Attached
Mime Type
text/plain
Expires
Sat, Dec 27, 9:00 AM (17 h, 22 s)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
27301649
Default Alt Text
D21011.id60155.diff (27 KB)
Attached To
Mode
D21011: Functional implementation of Accurate ECN in FreeBSD
Attached
Detach File
Event Timeline
Log In to Comment