Page MenuHomeFreeBSD

D21011.id60155.diff
No OneTemporary

D21011.id60155.diff

Index: sys/dev/cxgbe/tom/t4_listen.c
===================================================================
--- sys/dev/cxgbe/tom/t4_listen.c
+++ sys/dev/cxgbe/tom/t4_listen.c
@@ -1097,7 +1097,7 @@
static void
pass_accept_req_to_protohdrs(struct adapter *sc, const struct mbuf *m,
- struct in_conninfo *inc, struct tcphdr *th)
+ struct in_conninfo *inc, struct tcphdr *th, uint8_t *iptos)
{
const struct cpl_pass_accept_req *cpl = mtod(m, const void *);
const struct ether_header *eh;
@@ -1114,6 +1114,21 @@
tcp = (const void *)(l3hdr + G_IP_HDR_LEN(hlen));
}
+ /* extract TOS (DiffServ + ECN) byte for AccECN */
+ if (iptos) {
+ if (((struct ip *)l3hdr)->ip_v == IPVERSION) {
+ const struct ip *ip = (const void *)l3hdr;
+ *iptos = ip->ip_tos;
+ }
+#ifdef INET6
+ else
+ if (((struct ip *)l3hdr)->ip_v == (IPV6_VERSION >> 4)) {
+ const struct ip6_hdr *ip6 = (const void *)l3hdr;
+ *iptos = (ntohl(ip6->ip6_flow) >> 20) & 0xff;
+ }
+#endif /* INET */
+ }
+
if (inc) {
bzero(inc, sizeof(*inc));
inc->inc_fport = tcp->th_sport;
@@ -1254,6 +1269,7 @@
struct synq_entry *synqe = NULL;
int reject_reason, v, ntids;
uint16_t vid, l2info;
+ uint8_t iptos;
struct epoch_tracker et;
#ifdef INVARIANTS
unsigned int opcode = G_CPL_OPCODE(be32toh(OPCODE_TID(cpl)));
@@ -1317,7 +1333,7 @@
if (lctx->vnet != ifp->if_vnet)
REJECT_PASS_ACCEPT_REQ(true);
- pass_accept_req_to_protohdrs(sc, m, &inc, &th);
+ pass_accept_req_to_protohdrs(sc, m, &inc, &th, &iptos);
if (inc.inc_flags & INC_ISIPV6) {
/* Don't offload if the ifcap isn't enabled */
@@ -1390,7 +1406,7 @@
* syncache_add. Note that syncache_add releases the pcb lock.
*/
t4opt_to_tcpopt(&cpl->tcpopt, &to);
- toe_syncache_add(&inc, &to, &th, inp, tod, synqe);
+ toe_syncache_add(&inc, &to, &th, inp, tod, synqe, iptos);
if (atomic_load_int(&synqe->ok_to_respond) > 0) {
uint64_t opt0;
@@ -1471,9 +1487,10 @@
struct tcphdr *th, struct tcpopt *to)
{
uint16_t tcp_opt = be16toh(cpl->tcp_opt);
+ uint8_t iptos;
/* start off with the original SYN */
- pass_accept_req_to_protohdrs(sc, synqe->syn, inc, th);
+ pass_accept_req_to_protohdrs(sc, synqe->syn, inc, th, &iptos);
/* modify parts to make it look like the ACK to our SYN|ACK */
th->th_flags = TH_ACK;
Index: sys/netinet/cc/cc_dctcp.c
===================================================================
--- sys/netinet/cc/cc_dctcp.c
+++ sys/netinet/cc/cc_dctcp.c
@@ -108,7 +108,7 @@
dctcp_data = ccv->cc_data;
- if (CCV(ccv, t_flags) & TF_ECN_PERMIT) {
+ if (CCV(ccv, t_flags2) & TF2_ECN_PERMIT) {
/*
* DCTCP doesn't treat receipt of ECN marked packet as a
* congestion event. Thus, DCTCP always executes the ACK
@@ -276,8 +276,8 @@
dctcp_data->ece_curr = 1;
break;
case CC_RTO:
- if (CCV(ccv, t_flags) & TF_ECN_PERMIT) {
- CCV(ccv, t_flags) |= TF_ECN_SND_CWR;
+ if (CCV(ccv, t_flags2) & TF2_ECN_PERMIT) {
+ CCV(ccv, t_flags2) |= TF2_ECN_SND_CWR;
dctcp_update_alpha(ccv);
dctcp_data->save_sndnxt += CCV(ccv, t_maxseg);
dctcp_data->num_cong_events++;
@@ -293,7 +293,7 @@
dctcp_data = ccv->cc_data;
- if (CCV(ccv, t_flags) & TF_ECN_PERMIT)
+ if (CCV(ccv, t_flags2) & TF2_ECN_PERMIT)
dctcp_data->save_sndnxt = CCV(ccv, snd_nxt);
}
@@ -305,7 +305,7 @@
{
dctcp_cc_algo.post_recovery = newreno_cc_algo.post_recovery;
- if (CCV(ccv, t_flags) & TF_ECN_PERMIT)
+ if (CCV(ccv, t_flags2) & TF2_ECN_PERMIT)
dctcp_update_alpha(ccv);
}
@@ -336,12 +336,12 @@
if (!dctcp_data->ce_prev && (ccflag & CCF_DELACK))
delay_ack = 0;
dctcp_data->ce_prev = 1;
- CCV(ccv, t_flags) |= TF_ECN_SND_ECE;
+ CCV(ccv, t_flags2) |= TF2_ECN_SND_ECE;
} else {
if (dctcp_data->ce_prev && (ccflag & CCF_DELACK))
delay_ack = 0;
dctcp_data->ce_prev = 0;
- CCV(ccv, t_flags) &= ~TF_ECN_SND_ECE;
+ CCV(ccv, t_flags2) &= ~TF2_ECN_SND_ECE;
}
/* DCTCP sets delayed ack when this segment sets the CWR flag. */
Index: sys/netinet/tcp.h
===================================================================
--- sys/netinet/tcp.h
+++ sys/netinet/tcp.h
@@ -71,8 +71,10 @@
#define TH_URG 0x20
#define TH_ECE 0x40
#define TH_CWR 0x80
+#define TH_AE 0x100 /* maps into th_x2 */
+
#define TH_FLAGS (TH_FIN|TH_SYN|TH_RST|TH_PUSH|TH_ACK|TH_URG|TH_ECE|TH_CWR)
-#define PRINT_TH_FLAGS "\20\1FIN\2SYN\3RST\4PUSH\5ACK\6URG\7ECE\10CWR"
+#define PRINT_TH_FLAGS "\20\1FIN\2SYN\3RST\4PUSH\5ACK\6URG\7ECE\10CWR\11AE"
u_short th_win; /* window */
u_short th_sum; /* checksum */
Index: sys/netinet/tcp_input.c
===================================================================
--- sys/netinet/tcp_input.c
+++ sys/netinet/tcp_input.c
@@ -390,16 +390,16 @@
case CC_NDUPACK:
if (!IN_FASTRECOVERY(tp->t_flags)) {
tp->snd_recover = tp->snd_max;
- if (tp->t_flags & TF_ECN_PERMIT)
- tp->t_flags |= TF_ECN_SND_CWR;
+ if (tp->t_flags2 & TF2_ECN_PERMIT)
+ tp->t_flags2 |= TF2_ECN_SND_CWR;
}
break;
case CC_ECN:
if (!IN_CONGRECOVERY(tp->t_flags)) {
TCPSTAT_INC(tcps_ecn_rcwnd);
tp->snd_recover = tp->snd_max;
- if (tp->t_flags & TF_ECN_PERMIT)
- tp->t_flags |= TF_ECN_SND_CWR;
+ if (tp->t_flags2 & TF2_ECN_PERMIT)
+ tp->t_flags2 |= TF2_ECN_SND_CWR;
}
break;
case CC_RTO:
@@ -1325,7 +1325,7 @@
#endif
TCP_PROBE3(debug__input, tp, th, m);
tcp_dooptions(&to, optp, optlen, TO_SYN);
- if (syncache_add(&inc, &to, th, inp, &so, m, NULL, NULL))
+ if (syncache_add(&inc, &to, th, inp, &so, m, NULL, NULL, iptos))
goto tfo_socket_result;
/*
@@ -1575,12 +1575,9 @@
/*
* TCP ECN processing.
*/
- if (tp->t_flags & TF_ECN_PERMIT) {
- if (thflags & TH_CWR)
- tp->t_flags &= ~TF_ECN_SND_ECE;
+ if (tp->t_flags2 & TF2_ECN_PERMIT) {
switch (iptos & IPTOS_ECN_MASK) {
case IPTOS_ECN_CE:
- tp->t_flags |= TF_ECN_SND_ECE;
TCPSTAT_INC(tcps_ecn_ce);
break;
case IPTOS_ECN_ECT0:
@@ -1591,11 +1588,29 @@
break;
}
+ char d_ace;
+
+ if (tp->t_flags2 & TF2_ACE_PERMIT) {
+ d_ace = (tcp_get_ace(th) + 8 - (tp->s_cep & 0x07)) & 0x07;
+ tp->s_cep += d_ace;
+ if ((iptos & IPTOS_ECN_MASK) == IPTOS_ECN_CE)
+ tp->r_cep += 1;
+ } else {
+ if (thflags & TH_CWR)
+ tp->t_flags2 &= ~TF2_ECN_SND_ECE;
+ if ((iptos & IPTOS_ECN_MASK) == IPTOS_ECN_CE)
+ tp->t_flags2 |= TF2_ECN_SND_ECE;
+ }
+
/* Process a packet differently from RFC3168. */
cc_ecnpkt_handler(tp, th, iptos);
- /* Congestion experienced. */
- if (thflags & TH_ECE) {
+ /* Congestion experienced.
+ * With ACE, process a cong signal with ACE changed,
+ * for legacy ECN, whenever ECE is received
+ */
+ if ((!(tp->t_flags2 & TF2_ACE_PERMIT) && (thflags & TH_ECE)) ||
+ ((tp->t_flags2 & TF2_ACE_PERMIT) && (d_ace != 0))) {
cc_cong_signal(tp, th, CC_ECN);
}
}
@@ -2009,10 +2024,70 @@
if (((thflags & (TH_CWR | TH_ECE)) == TH_ECE) &&
V_tcp_do_ecn) {
- tp->t_flags |= TF_ECN_PERMIT;
+ tp->t_flags2 |= TF2_ECN_PERMIT;
TCPSTAT_INC(tcps_ecn_shs);
}
+ /* decoding Accurate ECN according to table in section 3.1.1 */
+ if ((V_tcp_do_ecn == 3) ||
+ (V_tcp_do_ecn == 4)) {
+ int xflags;
+ xflags = ((th->th_x2 << 8) | thflags) & (TH_AE|TH_CWR|TH_ECE);
+ switch (xflags) {
+ /* non-ECT SYN */
+ case (0|TH_CWR|0):
+ tp->t_flags2 |= TF2_ACE_PERMIT;
+ tp->s_cep = 5;
+ TCPSTAT_INC(tcps_ecn_shs);
+ TCPSTAT_INC(tcps_ace_nect);
+ break;
+ /* ECT1 SYN */
+ case (0|TH_CWR|TH_ECE):
+ tp->t_flags2 |= TF2_ACE_PERMIT;
+ tp->s_cep = 5;
+ TCPSTAT_INC(tcps_ecn_shs);
+ TCPSTAT_INC(tcps_ace_ect1);
+ break;
+ /* ECT0 SYN */
+ case (TH_AE|0|0):
+ tp->t_flags2 |= TF2_ACE_PERMIT;
+ tp->s_cep = 5;
+ TCPSTAT_INC(tcps_ecn_shs);
+ TCPSTAT_INC(tcps_ace_ect0);
+ break;
+ /* CE SYN */
+ case (TH_AE|TH_CWR|0):
+ tp->t_flags2 |= TF2_ACE_PERMIT;
+ tp->s_cep = 6;
+ TCPSTAT_INC(tcps_ecn_shs);
+ TCPSTAT_INC(tcps_ace_nect);
+ break;
+ default:
+ break;
+ }
+ /*
+ * Set the AccECN Codepoints on
+ * the outgoing ACK to the SYN,ACK
+ * according to table 3 in the
+ * AccECN draft
+ */
+ switch (iptos & IPTOS_ECN_MASK) {
+ /* non-ECT SYN,ACK */
+ case (IPTOS_ECN_NOTECT):
+ tp->r_cep = 0b010;
+ break;
+ case (IPTOS_ECN_ECT0):
+ tp->r_cep = 0b100;
+ break;
+ case (IPTOS_ECN_ECT1):
+ tp->r_cep = 0b011;
+ break;
+ case (IPTOS_ECN_CE):
+ tp->r_cep = 0b110;
+ break;
+ }
+ }
+
/*
* Received <SYN,ACK> in SYN_SENT[*] state.
* Transitions:
Index: sys/netinet/tcp_output.c
===================================================================
--- sys/netinet/tcp_output.c
+++ sys/netinet/tcp_output.c
@@ -1110,9 +1110,20 @@
} else
flags |= TH_ECE|TH_CWR;
}
-
+ /*
+ * Send an Accurate ECN setup SYN packet
+ */
+ if (tp->t_state == TCPS_SYN_SENT && V_tcp_do_ecn == 3) {
+ if (tp->t_rxtshift >= 1) {
+ if (tp->t_rxtshift <= V_tcp_ecn_maxretries)
+ flags |= TH_ECE|TH_CWR|TH_AE;
+ } else
+ flags |= TH_ECE|TH_CWR|TH_AE;
+ }
+
if (tp->t_state == TCPS_ESTABLISHED &&
- (tp->t_flags & TF_ECN_PERMIT)) {
+ ((tp->t_flags2 & TF2_ECN_PERMIT) ||
+ (tp->t_flags2 & TF2_ACE_PERMIT))) {
/*
* If the peer has ECN, mark data packets with
* ECN capable transmission (ECT).
@@ -1128,18 +1139,45 @@
ip->ip_tos |= IPTOS_ECN_ECT0;
TCPSTAT_INC(tcps_ecn_ect0);
}
-
+
/*
* Reply with proper ECN notifications.
*/
- if (tp->t_flags & TF_ECN_SND_CWR) {
- flags |= TH_CWR;
- tp->t_flags &= ~TF_ECN_SND_CWR;
- }
- if (tp->t_flags & TF_ECN_SND_ECE)
- flags |= TH_ECE;
+ if (tp->t_flags2 & TF2_ACE_PERMIT) {
+ if (tp->r_cep & 0x01)
+ flags |= TH_ECE;
+ else
+ flags &= ~TH_ECE;
+ if (tp->r_cep & 0x02)
+ flags |= TH_CWR;
+ else
+ flags &= ~TH_CWR;
+ if (tp->r_cep & 0x04)
+ flags |= TH_AE;
+ else
+ flags &= ~TH_AE;
+ if (!(tp->t_flags2 & TF2_ECN_PERMIT)) {
+ /*
+ * here we process the final
+ * ACK of the 3WHS
+ */
+ if (tp->r_cep == 0b110) {
+ tp->r_cep = 6;
+ } else {
+ tp->r_cep = 5;
+ }
+ tp->t_flags2 |= TF2_ECN_PERMIT;
+ }
+ } else {
+ if (tp->t_flags2 & TF2_ECN_SND_CWR) {
+ flags |= TH_CWR;
+ tp->t_flags2 &= ~TF2_ECN_SND_CWR;
+ }
+ if (tp->t_flags2 & TF2_ECN_SND_ECE)
+ flags |= TH_ECE;
+ }
}
-
+
/*
* If we are doing retransmissions, then snd_nxt will
* not reflect the first unsent octet. For ACK only
@@ -1169,7 +1207,9 @@
bcopy(opt, th + 1, optlen);
th->th_off = (sizeof (struct tcphdr) + optlen) >> 2;
}
- th->th_flags = flags;
+ th->th_flags = (flags & (TH_CWR|TH_ECE|TH_URG|TH_ACK|
+ TH_PUSH|TH_RST|TH_SYN|TH_FIN));
+ th->th_x2 = (flags & (TH_AE)) >> 8;
/*
* Calculate receive window. Don't shrink window,
* but avoid silly window syndrome.
Index: sys/netinet/tcp_stacks/rack.c
===================================================================
--- sys/netinet/tcp_stacks/rack.c
+++ sys/netinet/tcp_stacks/rack.c
@@ -1353,16 +1353,16 @@
rack->r_ctl.rc_prr_sndcnt = tp->t_maxseg;
rack->r_ctl.rc_prr_recovery_fs = tp->snd_max - tp->snd_una;
tp->snd_recover = tp->snd_max;
- if (tp->t_flags & TF_ECN_PERMIT)
- tp->t_flags |= TF_ECN_SND_CWR;
+ if (tp->t_flags2 & TF2_ECN_PERMIT)
+ tp->t_flags2 |= TF2_ECN_SND_CWR;
}
break;
case CC_ECN:
if (!IN_CONGRECOVERY(tp->t_flags)) {
TCPSTAT_INC(tcps_ecn_rcwnd);
tp->snd_recover = tp->snd_max;
- if (tp->t_flags & TF_ECN_PERMIT)
- tp->t_flags |= TF_ECN_SND_CWR;
+ if (tp->t_flags2 & TF2_ECN_PERMIT)
+ tp->t_flags2 |= TF2_ECN_SND_CWR;
}
break;
case CC_RTO:
@@ -5265,7 +5265,7 @@
if (((thflags & (TH_CWR | TH_ECE)) == TH_ECE) &&
V_tcp_do_ecn) {
- tp->t_flags |= TF_ECN_PERMIT;
+ tp->t_flags2 |= TF2_ECN_PERMIT;
TCPSTAT_INC(tcps_ecn_shs);
}
if (SEQ_GT(th->th_ack, tp->snd_una)) {
@@ -6602,12 +6602,12 @@
* TCP ECN processing. XXXJTL: If we ever use ECN, we need to move
* this to occur after we've validated the segment.
*/
- if (tp->t_flags & TF_ECN_PERMIT) {
+ if (tp->t_flags2 & TF2_ECN_PERMIT) {
if (thflags & TH_CWR)
- tp->t_flags &= ~TF_ECN_SND_ECE;
+ tp->t_flags2 &= ~TF2_ECN_SND_ECE;
switch (iptos & IPTOS_ECN_MASK) {
case IPTOS_ECN_CE:
- tp->t_flags |= TF_ECN_SND_ECE;
+ tp->t_flags2 |= TF2_ECN_SND_ECE;
TCPSTAT_INC(tcps_ecn_ce);
break;
case IPTOS_ECN_ECT0:
@@ -8152,7 +8152,7 @@
flags |= TH_ECE | TH_CWR;
}
if (tp->t_state == TCPS_ESTABLISHED &&
- (tp->t_flags & TF_ECN_PERMIT)) {
+ (tp->t_flags2 & TF2_ECN_PERMIT)) {
/*
* If the peer has ECN, mark data packets with ECN capable
* transmission (ECT). Ignore pure ack packets,
@@ -8171,11 +8171,11 @@
/*
* Reply with proper ECN notifications.
*/
- if (tp->t_flags & TF_ECN_SND_CWR) {
+ if (tp->t_flags2 & TF2_ECN_SND_CWR) {
flags |= TH_CWR;
- tp->t_flags &= ~TF_ECN_SND_CWR;
+ tp->t_flags2 &= ~TF2_ECN_SND_CWR;
}
- if (tp->t_flags & TF_ECN_SND_ECE)
+ if (tp->t_flags2 & TF2_ECN_SND_ECE)
flags |= TH_ECE;
}
/*
Index: sys/netinet/tcp_subr.c
===================================================================
--- sys/netinet/tcp_subr.c
+++ sys/netinet/tcp_subr.c
@@ -3167,7 +3167,7 @@
}
sp = s + strlen(s);
if (th)
- sprintf(sp, " tcpflags 0x%b", th->th_flags, PRINT_TH_FLAGS);
+ sprintf(sp, " tcpflags 0x%b", (th->th_x2 << 8) | th->th_flags, PRINT_TH_FLAGS);
if (*(s + size - 1) != '\0')
panic("%s: string too long", __func__);
return (s);
@@ -3244,3 +3244,16 @@
if (inp->inp_socket == NULL)
xt->xt_inp.xi_socket.xso_protocol = IPPROTO_TCP;
}
+
+int
+tcp_get_ace(struct tcphdr *th)
+{
+ int ace = 0;
+ if (th->th_flags & TH_ECE)
+ ace += 1;
+ if (th->th_flags & TH_CWR)
+ ace += 2;
+ if (th->th_x2 & (TH_AE >> 8))
+ ace += 4;
+ return ace;
+}
\ No newline at end of file
Index: sys/netinet/tcp_syncache.h
===================================================================
--- sys/netinet/tcp_syncache.h
+++ sys/netinet/tcp_syncache.h
@@ -45,7 +45,7 @@
struct tcphdr *, struct socket **, struct mbuf *);
int syncache_add(struct in_conninfo *, struct tcpopt *,
struct tcphdr *, struct inpcb *, struct socket **, struct mbuf *,
- void *, void *);
+ void *, void *, uint8_t);
void syncache_chkrst(struct in_conninfo *, struct tcphdr *, struct mbuf *);
void syncache_badack(struct in_conninfo *);
int syncache_pcblist(struct sysctl_req *req, int max_pcbs, int *pcbs_exported);
@@ -90,6 +90,10 @@
#define SCF_SIGNATURE 0x20 /* send MD5 digests */
#define SCF_SACK 0x80 /* send SACK option */
#define SCF_ECN 0x100 /* send ECN setup packet */
+#define SCF_ACE_N 0x200 /* send ACE non-ECT setup */
+#define SCF_ACE_0 0x400 /* send ACE ECT0 setup */
+#define SCF_ACE_1 0x800 /* send ACE ECT1 setup */
+#define SCF_ACE_CE 0x1000 /* send ACE CE setup */
struct syncache_head {
struct mtx sch_mtx;
Index: sys/netinet/tcp_syncache.c
===================================================================
--- sys/netinet/tcp_syncache.c
+++ sys/netinet/tcp_syncache.c
@@ -964,7 +964,20 @@
}
if (sc->sc_flags & SCF_ECN)
- tp->t_flags |= TF_ECN_PERMIT;
+ tp->t_flags2 |= TF2_ECN_PERMIT;
+
+ if ((sc->sc_flags & SCF_ACE_N) ||
+ (sc->sc_flags & SCF_ACE_0) ||
+ (sc->sc_flags & SCF_ACE_1) ||
+ (sc->sc_flags & SCF_ACE_CE)) {
+ tp->t_flags2 |= TF2_ACE_PERMIT;
+ tp->s_cep = 5;
+ tp->r_cep = 5;
+ if (sc->sc_flags & SCF_ACE_CE) {
+ tp->s_cep=6;
+ tp->r_cep=6;
+ }
+ }
/*
* Set up MSS and get cached values from tcp_hostcache.
@@ -1309,7 +1322,7 @@
int
syncache_add(struct in_conninfo *inc, struct tcpopt *to, struct tcphdr *th,
struct inpcb *inp, struct socket **lsop, struct mbuf *m, void *tod,
- void *todctx)
+ void *todctx, uint8_t tos)
{
struct tcpcb *tp;
struct socket *so;
@@ -1612,8 +1625,62 @@
sc->sc_peer_mss = to->to_mss; /* peer mss may be zero */
if (ltflags & TF_NOOPT)
sc->sc_flags |= SCF_NOOPT;
- if ((th->th_flags & (TH_ECE|TH_CWR)) && V_tcp_do_ecn)
- sc->sc_flags |= SCF_ECN;
+ /* ECN Handshake */
+ if (V_tcp_do_ecn) {
+ int xflags;
+ xflags = ((th->th_x2 << 8) | th->th_flags) & (TH_AE|TH_CWR|TH_ECE);
+ switch (xflags) {
+ /* no ECN */
+ case (0|0|0):
+ break;
+ /* legacy ECN */
+ case (0|TH_CWR|TH_ECE):
+ sc->sc_flags |= SCF_ECN;
+ break;
+ /* Accurate ECN */
+ case (TH_AE|TH_CWR|TH_ECE):
+ if ((V_tcp_do_ecn == 3) ||
+ (V_tcp_do_ecn == 4)) {
+
+ switch (tos & IPTOS_ECN_MASK) {
+ case IPTOS_ECN_CE:
+ sc->sc_flags |= SCF_ACE_CE;
+ break;
+ case IPTOS_ECN_ECT0:
+ sc->sc_flags |= SCF_ACE_0;
+ break;
+ case IPTOS_ECN_ECT1:
+ sc->sc_flags |= SCF_ACE_1;
+ break;
+ case IPTOS_ECN_NOTECT:
+ sc->sc_flags |= SCF_ACE_N;
+ break;
+ }
+ } else
+ sc->sc_flags |= SCF_ECN;
+ break;
+ /* Default Case (section 3.1.2) */
+ default:
+ if ((V_tcp_do_ecn == 3) ||
+ (V_tcp_do_ecn == 4)) {
+ switch (tos & IPTOS_ECN_MASK) {
+ case IPTOS_ECN_CE:
+ sc->sc_flags |= SCF_ACE_CE;
+ break;
+ case IPTOS_ECN_ECT0:
+ sc->sc_flags |= SCF_ACE_0;
+ break;
+ case IPTOS_ECN_ECT1:
+ sc->sc_flags |= SCF_ACE_1;
+ break;
+ case IPTOS_ECN_NOTECT:
+ sc->sc_flags |= SCF_ACE_N;
+ break;
+ }
+ }
+ break;
+ }
+ }
if (V_tcp_syncookies)
sc->sc_iss = syncookie_generate(sch, sc);
@@ -1787,6 +1854,28 @@
TCPSTAT_INC(tcps_ecn_shs);
}
+ if ((flags & TH_SYN) && (sc->sc_flags & SCF_ACE_N)) {
+ th->th_flags |= TH_CWR;
+ TCPSTAT_INC(tcps_ecn_shs);
+ TCPSTAT_INC(tcps_ace_nect);
+ }
+ if ((flags & TH_SYN) && (sc->sc_flags & SCF_ACE_0)) {
+ th->th_x2 |= (TH_AE >> 8);
+ TCPSTAT_INC(tcps_ecn_shs);
+ TCPSTAT_INC(tcps_ace_ect0);
+ }
+ if ((flags & TH_SYN) && (sc->sc_flags & SCF_ACE_1)) {
+ th->th_flags |= (TH_ECE | TH_CWR);
+ TCPSTAT_INC(tcps_ecn_shs);
+ TCPSTAT_INC(tcps_ace_ect1);
+ }
+ if ((flags & TH_SYN) && (sc->sc_flags & SCF_ACE_CE)) {
+ th->th_flags |= TH_CWR;
+ th->th_x2 |= (TH_AE >> 8);
+ TCPSTAT_INC(tcps_ecn_shs);
+ TCPSTAT_INC(tcps_ace_ce);
+ }
+
/* Tack on the TCP options. */
if ((sc->sc_flags & SCF_NOOPT) == 0) {
to.to_flags = 0;
Index: sys/netinet/tcp_usrreq.c
===================================================================
--- sys/netinet/tcp_usrreq.c
+++ sys/netinet/tcp_usrreq.c
@@ -1517,7 +1517,7 @@
ti->tcpi_snd_wscale = tp->snd_scale;
ti->tcpi_rcv_wscale = tp->rcv_scale;
}
- if (tp->t_flags & TF_ECN_PERMIT)
+ if (tp->t_flags2 & TF2_ECN_PERMIT)
ti->tcpi_options |= TCPI_OPT_ECN;
ti->tcpi_rto = tp->t_rxtcur * tick;
@@ -2484,6 +2484,10 @@
db_printf("%sTF_NOPUSH", comma ? ", " : "");
comma = 1;
}
+ if (t_flags & TF_PREVVALID) {
+ db_printf("%sTF_PREVVALID", comma ? ", " : "");
+ comma = 1;
+ }
if (t_flags & TF_MORETOCOME) {
db_printf("%sTF_MORETOCOME", comma ? ", " : "");
comma = 1;
@@ -2512,6 +2516,10 @@
db_printf("%sTF_WASFRECOVERY", comma ? ", " : "");
comma = 1;
}
+ if (t_flags & TF_WASCRECOVERY) {
+ db_printf("%sTF_WASCRECOVERY", comma ? ", " : "");
+ comma = 1;
+ }
if (t_flags & TF_SIGNATURE) {
db_printf("%sTF_SIGNATURE", comma ? ", " : "");
comma = 1;
@@ -2524,8 +2532,8 @@
db_printf("%sTF_TSO", comma ? ", " : "");
comma = 1;
}
- if (t_flags & TF_ECN_PERMIT) {
- db_printf("%sTF_ECN_PERMIT", comma ? ", " : "");
+ if (t_flags & TF_TOE) {
+ db_printf("%sTF_TOE", comma ? ", " : "");
comma = 1;
}
if (t_flags & TF_FASTOPEN) {
@@ -2534,6 +2542,50 @@
}
}
+static void
+db_print_tflags2(u_int t_flags2)
+{
+ int comma;
+
+ comma = 0;
+ if (t_flags2 & TF2_PLPMTU_BLACKHOLE) {
+ db_printf("%sTF2_PLPMTU_BLACKHOLE", comma ? ", " : "");
+ comma = 1;
+ }
+ if (t_flags2 & TF2_PLPMTU_PMTUD) {
+ db_printf("%sTF2_PLPMTU_PMTUD", comma ? ", " : "");
+ comma = 1;
+ }
+ if (t_flags2 & TF2_PLPMTU_MAXSEGSNT) {
+ db_printf("%sTF2_PLPMTU_MAXSEGSNT", comma ? ", " : "");
+ comma = 1;
+ }
+ if (t_flags2 & TF2_LOG_AUTO) {
+ db_printf("%sTF2_LOG_AUTO", comma ? ", " : "");
+ comma = 1;
+ }
+ if (t_flags2 & TF2_DROP_AF_DATA) {
+ db_printf("%sTF2_DROP_AF_DATA", comma ? ", " : "");
+ comma = 1;
+ }
+ if (t_flags2 & TF2_ECN_PERMIT) {
+ db_printf("%sTF2_ECN_PERMIT", comma ? ", " : "");
+ comma = 1;
+ }
+ if (t_flags2 & TF2_ECN_SND_CWR) {
+ db_printf("%sTF2_ECN_SND_CWR", comma ? ", " : "");
+ comma = 1;
+ }
+ if (t_flags2 & TF2_ECN_SND_ECE) {
+ db_printf("%sTF2_ECN_SND_ECE", comma ? ", " : "");
+ comma = 1;
+ }
+ if (t_flags2 & TF2_ACE_PERMIT) {
+ db_printf("%sTF2_ACE_PERMIT", comma ? ", " : "");
+ comma = 1;
+ }
+}
+
static void
db_print_toobflags(char t_oobflags)
{
@@ -2581,6 +2633,12 @@
db_print_tflags(tp->t_flags);
db_printf(")\n");
+ db_print_indent(indent);
+ db_printf("t_flags2: 0x%x (", tp->t_flags2);
+ db_print_tflags2(tp->t_flags2);
+ db_printf(")\n");
+
+
db_print_indent(indent);
db_printf("snd_una: 0x%08x snd_max: 0x%08x snd_nxt: x0%08x\n",
tp->snd_una, tp->snd_max, tp->snd_nxt);
Index: sys/netinet/tcp_var.h
===================================================================
--- sys/netinet/tcp_var.h
+++ sys/netinet/tcp_var.h
@@ -205,6 +205,8 @@
u_int t_keepcnt; /* number of keepalives before close */
int t_dupacks; /* consecutive dup acks recd */
int t_lognum; /* Number of log entries */
+ uint32_t r_cep; /* Number of received CE marked packets */
+ uint32_t s_cep; /* Synced number of delivered CE packets */
struct tcp_log_stailq t_logs; /* Log buffer */
struct tcp_log_id_node *t_lin;
struct tcp_log_id_bucket *t_lib;
@@ -306,33 +308,30 @@
/*
* Flags and utility macros for the t_flags field.
*/
-#define TF_ACKNOW 0x000001 /* ack peer immediately */
-#define TF_DELACK 0x000002 /* ack, but try to delay it */
-#define TF_NODELAY 0x000004 /* don't delay packets to coalesce */
-#define TF_NOOPT 0x000008 /* don't use tcp options */
-#define TF_SENTFIN 0x000010 /* have sent FIN */
-#define TF_REQ_SCALE 0x000020 /* have/will request window scaling */
-#define TF_RCVD_SCALE 0x000040 /* other side has requested scaling */
-#define TF_REQ_TSTMP 0x000080 /* have/will request timestamps */
-#define TF_RCVD_TSTMP 0x000100 /* a timestamp was received in SYN */
-#define TF_SACK_PERMIT 0x000200 /* other side said I could SACK */
-#define TF_NEEDSYN 0x000400 /* send SYN (implicit state) */
-#define TF_NEEDFIN 0x000800 /* send FIN (implicit state) */
-#define TF_NOPUSH 0x001000 /* don't push */
-#define TF_PREVVALID 0x002000 /* saved values for bad rxmit valid */
-#define TF_MORETOCOME 0x010000 /* More data to be appended to sock */
-#define TF_LQ_OVERFLOW 0x020000 /* listen queue overflow */
-#define TF_LASTIDLE 0x040000 /* connection was previously idle */
-#define TF_RXWIN0SENT 0x080000 /* sent a receiver win 0 in response */
-#define TF_FASTRECOVERY 0x100000 /* in NewReno Fast Recovery */
-#define TF_WASFRECOVERY 0x200000 /* was in NewReno Fast Recovery */
-#define TF_SIGNATURE 0x400000 /* require MD5 digests (RFC2385) */
-#define TF_FORCEDATA 0x800000 /* force out a byte */
-#define TF_TSO 0x1000000 /* TSO enabled on this connection */
-#define TF_TOE 0x2000000 /* this connection is offloaded */
-#define TF_ECN_PERMIT 0x4000000 /* connection ECN-ready */
-#define TF_ECN_SND_CWR 0x8000000 /* ECN CWR in queue */
-#define TF_ECN_SND_ECE 0x10000000 /* ECN ECE in queue */
+#define TF_ACKNOW 0x00000001 /* ack peer immediately */
+#define TF_DELACK 0x00000002 /* ack, but try to delay it */
+#define TF_NODELAY 0x00000004 /* don't delay packets to coalesce */
+#define TF_NOOPT 0x00000008 /* don't use tcp options */
+#define TF_SENTFIN 0x00000010 /* have sent FIN */
+#define TF_REQ_SCALE 0x00000020 /* have/will request window scaling */
+#define TF_RCVD_SCALE 0x00000040 /* other side has requested scaling */
+#define TF_REQ_TSTMP 0x00000080 /* have/will request timestamps */
+#define TF_RCVD_TSTMP 0x00000100 /* a timestamp was received in SYN */
+#define TF_SACK_PERMIT 0x00000200 /* other side said I could SACK */
+#define TF_NEEDSYN 0x00000400 /* send SYN (implicit state) */
+#define TF_NEEDFIN 0x00000800 /* send FIN (implicit state) */
+#define TF_NOPUSH 0x00001000 /* don't push */
+#define TF_PREVVALID 0x00002000 /* saved values for bad rxmit valid */
+#define TF_MORETOCOME 0x00010000 /* More data to be appended to sock */
+#define TF_LQ_OVERFLOW 0x00020000 /* listen queue overflow */
+#define TF_LASTIDLE 0x00040000 /* connection was previously idle */
+#define TF_RXWIN0SENT 0x00080000 /* sent a receiver win 0 in response */
+#define TF_FASTRECOVERY 0x00100000 /* in NewReno Fast Recovery */
+#define TF_WASFRECOVERY 0x00200000 /* was in NewReno Fast Recovery */
+#define TF_SIGNATURE 0x00400000 /* require MD5 digests (RFC2385) */
+#define TF_FORCEDATA 0x00800000 /* force out a byte */
+#define TF_TSO 0x01000000 /* TSO enabled on this connection */
+#define TF_TOE 0x02000000 /* this connection is offloaded */
#define TF_CONGRECOVERY 0x20000000 /* congestion recovery mode */
#define TF_WASCRECOVERY 0x40000000 /* was in congestion recovery */
#define TF_FASTOPEN 0x80000000 /* TCP Fast Open indication */
@@ -370,7 +369,11 @@
#define TF2_PLPMTU_PMTUD 0x00000002 /* Allowed to attempt PLPMTUD. */
#define TF2_PLPMTU_MAXSEGSNT 0x00000004 /* Last seg sent was full seg. */
#define TF2_LOG_AUTO 0x00000008 /* Session is auto-logging. */
-#define TF2_DROP_AF_DATA 0x00000010 /* Drop after all data ack'd */
+#define TF2_DROP_AF_DATA 0x00000010 /* Drop after all data ack'd */
+#define TF2_ECN_PERMIT 0x00000020 /* connection ECN-ready */
+#define TF2_ECN_SND_CWR 0x00000040 /* ECN CWR in queue */
+#define TF2_ECN_SND_ECE 0x00000080 /* ECN ECE in queue */
+#define TF2_ACE_PERMIT 0x00000100 /* Accurate ECN mode */
/*
* Structure to hold TCP options that are only used during segment
@@ -610,7 +613,12 @@
uint64_t tcps_pmtud_blackhole_activated_min_mss; /* BH at min MSS Count */
uint64_t tcps_pmtud_blackhole_failed; /* Black Hole Failure Count */
- uint64_t _pad[12]; /* 6 UTO, 6 TBD */
+ /* Accurate ECN Handshake stats */
+ uint64_t tcps_ace_nect; /* ACE SYN packet with Non-ECT */
+ uint64_t tcps_ace_ect1; /* ACE SYN packet with ECT1 */
+ uint64_t tcps_ace_ect0; /* ACE SYN packet with ECT0 */
+ uint64_t tcps_ace_ce; /* ACE SYN packet with CE */
+ uint64_t _pad[8]; /* 6 UTO, 6 TBD */
};
#define tcps_rcvmemdrop tcps_rcvreassfull /* compat */
@@ -946,6 +954,7 @@
tcp_m_copym(struct mbuf *m, int32_t off0, int32_t *plen,
int32_t seglimit, int32_t segsize, struct sockbuf *sb);
+int tcp_get_ace(struct tcphdr *th);
static inline void
tcp_fields_to_host(struct tcphdr *th)
Index: sys/netinet/toecore.h
===================================================================
--- sys/netinet/toecore.h
+++ sys/netinet/toecore.h
@@ -130,7 +130,7 @@
void toe_connect_failed(struct toedev *, struct inpcb *, int);
void toe_syncache_add(struct in_conninfo *, struct tcpopt *, struct tcphdr *,
- struct inpcb *, void *, void *);
+ struct inpcb *, void *, void *, uint8_t);
int toe_syncache_expand(struct in_conninfo *, struct tcpopt *, struct tcphdr *,
struct socket **);
Index: sys/netinet/toecore.c
===================================================================
--- sys/netinet/toecore.c
+++ sys/netinet/toecore.c
@@ -337,13 +337,13 @@
void
toe_syncache_add(struct in_conninfo *inc, struct tcpopt *to, struct tcphdr *th,
- struct inpcb *inp, void *tod, void *todctx)
+ struct inpcb *inp, void *tod, void *todctx, uint8_t tos)
{
struct socket *lso = inp->inp_socket;
INP_WLOCK_ASSERT(inp);
- syncache_add(inc, to, th, inp, &lso, NULL, tod, todctx);
+ syncache_add(inc, to, th, inp, &lso, NULL, tod, todctx, tos);
}
int
Index: usr.bin/netstat/inet.c
===================================================================
--- usr.bin/netstat/inet.c
+++ usr.bin/netstat/inet.c
@@ -763,6 +763,15 @@
p(tcps_ecn_rcwnd, "\t{:congestion-reductions/%ju} "
"{N:/time%s ECN reduced the congestion window}\n");
+ p(tcps_ace_nect, "\t{:ace-nonect-syn/%ju} "
+ "{N:/ACE SYN packet%s with Non-ECT}\n");
+ p(tcps_ace_ect0, "\t{:ace-ect0-syn/%ju} "
+ "{N:/ACE SYN packet%s with ECT0}\n");
+ p(tcps_ace_ect1, "\t{:ace-ect1-syn/%ju} "
+ "{N:/ACE SYN packet%s with ECT1}\n");
+ p(tcps_ace_ce, "\t{:ace-ce-syn/%ju} "
+ "{N:/ACE SYN packet%s with CE}\n");
+
xo_close_container("ecn");
xo_open_container("tcp-signature");
p(tcps_sig_rcvgoodsig, "\t{:received-good-signature/%ju} "

File Metadata

Mime Type
text/plain
Expires
Sat, Dec 27, 9:00 AM (17 h, 22 s)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
27301649
Default Alt Text
D21011.id60155.diff (27 KB)

Event Timeline