Index: sys/dev/cxgbe/tom/t4_listen.c =================================================================== --- sys/dev/cxgbe/tom/t4_listen.c +++ sys/dev/cxgbe/tom/t4_listen.c @@ -1097,7 +1097,7 @@ static void pass_accept_req_to_protohdrs(struct adapter *sc, const struct mbuf *m, - struct in_conninfo *inc, struct tcphdr *th) + struct in_conninfo *inc, struct tcphdr *th, uint8_t *iptos) { const struct cpl_pass_accept_req *cpl = mtod(m, const void *); const struct ether_header *eh; @@ -1114,6 +1114,21 @@ tcp = (const void *)(l3hdr + G_IP_HDR_LEN(hlen)); } + /* extract TOS (DiffServ + ECN) byte for AccECN */ + if (iptos) { + if (((struct ip *)l3hdr)->ip_v == IPVERSION) { + const struct ip *ip = (const void *)l3hdr; + *iptos = ip->ip_tos; + } +#ifdef INET6 + else + if (((struct ip *)l3hdr)->ip_v == (IPV6_VERSION >> 4)) { + const struct ip6_hdr *ip6 = (const void *)l3hdr; + *iptos = (ntohl(ip6->ip6_flow) >> 20) & 0xff; + } +#endif /* INET */ + } + if (inc) { bzero(inc, sizeof(*inc)); inc->inc_fport = tcp->th_sport; @@ -1254,6 +1269,7 @@ struct synq_entry *synqe = NULL; int reject_reason, v, ntids; uint16_t vid, l2info; + uint8_t iptos; struct epoch_tracker et; #ifdef INVARIANTS unsigned int opcode = G_CPL_OPCODE(be32toh(OPCODE_TID(cpl))); @@ -1317,7 +1333,7 @@ if (lctx->vnet != ifp->if_vnet) REJECT_PASS_ACCEPT_REQ(true); - pass_accept_req_to_protohdrs(sc, m, &inc, &th); + pass_accept_req_to_protohdrs(sc, m, &inc, &th, &iptos); if (inc.inc_flags & INC_ISIPV6) { /* Don't offload if the ifcap isn't enabled */ @@ -1390,7 +1406,7 @@ * syncache_add. Note that syncache_add releases the pcb lock. */ t4opt_to_tcpopt(&cpl->tcpopt, &to); - toe_syncache_add(&inc, &to, &th, inp, tod, synqe); + toe_syncache_add(&inc, &to, &th, inp, tod, synqe, iptos); if (atomic_load_int(&synqe->ok_to_respond) > 0) { uint64_t opt0; @@ -1471,9 +1487,10 @@ struct tcphdr *th, struct tcpopt *to) { uint16_t tcp_opt = be16toh(cpl->tcp_opt); + uint8_t iptos; /* start off with the original SYN */ - pass_accept_req_to_protohdrs(sc, synqe->syn, inc, th); + pass_accept_req_to_protohdrs(sc, synqe->syn, inc, th, &iptos); /* modify parts to make it look like the ACK to our SYN|ACK */ th->th_flags = TH_ACK; Index: sys/netinet/cc/cc_dctcp.c =================================================================== --- sys/netinet/cc/cc_dctcp.c +++ sys/netinet/cc/cc_dctcp.c @@ -108,7 +108,7 @@ dctcp_data = ccv->cc_data; - if (CCV(ccv, t_flags) & TF_ECN_PERMIT) { + if (CCV(ccv, t_flags2) & TF2_ECN_PERMIT) { /* * DCTCP doesn't treat receipt of ECN marked packet as a * congestion event. Thus, DCTCP always executes the ACK @@ -276,8 +276,8 @@ dctcp_data->ece_curr = 1; break; case CC_RTO: - if (CCV(ccv, t_flags) & TF_ECN_PERMIT) { - CCV(ccv, t_flags) |= TF_ECN_SND_CWR; + if (CCV(ccv, t_flags2) & TF2_ECN_PERMIT) { + CCV(ccv, t_flags2) |= TF2_ECN_SND_CWR; dctcp_update_alpha(ccv); dctcp_data->save_sndnxt += CCV(ccv, t_maxseg); dctcp_data->num_cong_events++; @@ -293,7 +293,7 @@ dctcp_data = ccv->cc_data; - if (CCV(ccv, t_flags) & TF_ECN_PERMIT) + if (CCV(ccv, t_flags2) & TF2_ECN_PERMIT) dctcp_data->save_sndnxt = CCV(ccv, snd_nxt); } @@ -305,7 +305,7 @@ { dctcp_cc_algo.post_recovery = newreno_cc_algo.post_recovery; - if (CCV(ccv, t_flags) & TF_ECN_PERMIT) + if (CCV(ccv, t_flags2) & TF2_ECN_PERMIT) dctcp_update_alpha(ccv); } @@ -336,12 +336,12 @@ if (!dctcp_data->ce_prev && (ccflag & CCF_DELACK)) delay_ack = 0; dctcp_data->ce_prev = 1; - CCV(ccv, t_flags) |= TF_ECN_SND_ECE; + CCV(ccv, t_flags2) |= TF2_ECN_SND_ECE; } else { if (dctcp_data->ce_prev && (ccflag & CCF_DELACK)) delay_ack = 0; dctcp_data->ce_prev = 0; - CCV(ccv, t_flags) &= ~TF_ECN_SND_ECE; + CCV(ccv, t_flags2) &= ~TF2_ECN_SND_ECE; } /* DCTCP sets delayed ack when this segment sets the CWR flag. */ Index: sys/netinet/tcp.h =================================================================== --- sys/netinet/tcp.h +++ sys/netinet/tcp.h @@ -71,8 +71,10 @@ #define TH_URG 0x20 #define TH_ECE 0x40 #define TH_CWR 0x80 +#define TH_AE 0x100 /* maps into th_x2 */ + #define TH_FLAGS (TH_FIN|TH_SYN|TH_RST|TH_PUSH|TH_ACK|TH_URG|TH_ECE|TH_CWR) -#define PRINT_TH_FLAGS "\20\1FIN\2SYN\3RST\4PUSH\5ACK\6URG\7ECE\10CWR" +#define PRINT_TH_FLAGS "\20\1FIN\2SYN\3RST\4PUSH\5ACK\6URG\7ECE\10CWR\11AE" u_short th_win; /* window */ u_short th_sum; /* checksum */ Index: sys/netinet/tcp_input.c =================================================================== --- sys/netinet/tcp_input.c +++ sys/netinet/tcp_input.c @@ -390,16 +390,16 @@ case CC_NDUPACK: if (!IN_FASTRECOVERY(tp->t_flags)) { tp->snd_recover = tp->snd_max; - if (tp->t_flags & TF_ECN_PERMIT) - tp->t_flags |= TF_ECN_SND_CWR; + if (tp->t_flags2 & TF2_ECN_PERMIT) + tp->t_flags2 |= TF2_ECN_SND_CWR; } break; case CC_ECN: if (!IN_CONGRECOVERY(tp->t_flags)) { TCPSTAT_INC(tcps_ecn_rcwnd); tp->snd_recover = tp->snd_max; - if (tp->t_flags & TF_ECN_PERMIT) - tp->t_flags |= TF_ECN_SND_CWR; + if (tp->t_flags2 & TF2_ECN_PERMIT) + tp->t_flags2 |= TF2_ECN_SND_CWR; } break; case CC_RTO: @@ -1325,7 +1325,7 @@ #endif TCP_PROBE3(debug__input, tp, th, m); tcp_dooptions(&to, optp, optlen, TO_SYN); - if (syncache_add(&inc, &to, th, inp, &so, m, NULL, NULL)) + if (syncache_add(&inc, &to, th, inp, &so, m, NULL, NULL, iptos)) goto tfo_socket_result; /* @@ -1575,12 +1575,9 @@ /* * TCP ECN processing. */ - if (tp->t_flags & TF_ECN_PERMIT) { - if (thflags & TH_CWR) - tp->t_flags &= ~TF_ECN_SND_ECE; + if (tp->t_flags2 & TF2_ECN_PERMIT) { switch (iptos & IPTOS_ECN_MASK) { case IPTOS_ECN_CE: - tp->t_flags |= TF_ECN_SND_ECE; TCPSTAT_INC(tcps_ecn_ce); break; case IPTOS_ECN_ECT0: @@ -1591,11 +1588,29 @@ break; } + char d_ace; + + if (tp->t_flags2 & TF2_ACE_PERMIT) { + d_ace = (tcp_get_ace(th) + 8 - (tp->s_cep & 0x07)) & 0x07; + tp->s_cep += d_ace; + if ((iptos & IPTOS_ECN_MASK) == IPTOS_ECN_CE) + tp->r_cep += 1; + } else { + if (thflags & TH_CWR) + tp->t_flags2 &= ~TF2_ECN_SND_ECE; + if ((iptos & IPTOS_ECN_MASK) == IPTOS_ECN_CE) + tp->t_flags2 |= TF2_ECN_SND_ECE; + } + /* Process a packet differently from RFC3168. */ cc_ecnpkt_handler(tp, th, iptos); - /* Congestion experienced. */ - if (thflags & TH_ECE) { + /* Congestion experienced. + * With ACE, process a cong signal with ACE changed, + * for legacy ECN, whenever ECE is received + */ + if ((!(tp->t_flags2 & TF2_ACE_PERMIT) && (thflags & TH_ECE)) || + ((tp->t_flags2 & TF2_ACE_PERMIT) && (d_ace != 0))) { cc_cong_signal(tp, th, CC_ECN); } } @@ -2009,10 +2024,70 @@ if (((thflags & (TH_CWR | TH_ECE)) == TH_ECE) && V_tcp_do_ecn) { - tp->t_flags |= TF_ECN_PERMIT; + tp->t_flags2 |= TF2_ECN_PERMIT; TCPSTAT_INC(tcps_ecn_shs); } + /* decoding Accurate ECN according to table in section 3.1.1 */ + if ((V_tcp_do_ecn == 3) || + (V_tcp_do_ecn == 4)) { + int xflags; + xflags = ((th->th_x2 << 8) | thflags) & (TH_AE|TH_CWR|TH_ECE); + switch (xflags) { + /* non-ECT SYN */ + case (0|TH_CWR|0): + tp->t_flags2 |= TF2_ACE_PERMIT; + tp->s_cep = 5; + TCPSTAT_INC(tcps_ecn_shs); + TCPSTAT_INC(tcps_ace_nect); + break; + /* ECT1 SYN */ + case (0|TH_CWR|TH_ECE): + tp->t_flags2 |= TF2_ACE_PERMIT; + tp->s_cep = 5; + TCPSTAT_INC(tcps_ecn_shs); + TCPSTAT_INC(tcps_ace_ect1); + break; + /* ECT0 SYN */ + case (TH_AE|0|0): + tp->t_flags2 |= TF2_ACE_PERMIT; + tp->s_cep = 5; + TCPSTAT_INC(tcps_ecn_shs); + TCPSTAT_INC(tcps_ace_ect0); + break; + /* CE SYN */ + case (TH_AE|TH_CWR|0): + tp->t_flags2 |= TF2_ACE_PERMIT; + tp->s_cep = 6; + TCPSTAT_INC(tcps_ecn_shs); + TCPSTAT_INC(tcps_ace_nect); + break; + default: + break; + } + /* + * Set the AccECN Codepoints on + * the outgoing ACK to the SYN,ACK + * according to table 3 in the + * AccECN draft + */ + switch (iptos & IPTOS_ECN_MASK) { + /* non-ECT SYN,ACK */ + case (IPTOS_ECN_NOTECT): + tp->r_cep = 0b010; + break; + case (IPTOS_ECN_ECT0): + tp->r_cep = 0b100; + break; + case (IPTOS_ECN_ECT1): + tp->r_cep = 0b011; + break; + case (IPTOS_ECN_CE): + tp->r_cep = 0b110; + break; + } + } + /* * Received in SYN_SENT[*] state. * Transitions: Index: sys/netinet/tcp_output.c =================================================================== --- sys/netinet/tcp_output.c +++ sys/netinet/tcp_output.c @@ -1110,9 +1110,20 @@ } else flags |= TH_ECE|TH_CWR; } - + /* + * Send an Accurate ECN setup SYN packet + */ + if (tp->t_state == TCPS_SYN_SENT && V_tcp_do_ecn == 3) { + if (tp->t_rxtshift >= 1) { + if (tp->t_rxtshift <= V_tcp_ecn_maxretries) + flags |= TH_ECE|TH_CWR|TH_AE; + } else + flags |= TH_ECE|TH_CWR|TH_AE; + } + if (tp->t_state == TCPS_ESTABLISHED && - (tp->t_flags & TF_ECN_PERMIT)) { + ((tp->t_flags2 & TF2_ECN_PERMIT) || + (tp->t_flags2 & TF2_ACE_PERMIT))) { /* * If the peer has ECN, mark data packets with * ECN capable transmission (ECT). @@ -1128,18 +1139,45 @@ ip->ip_tos |= IPTOS_ECN_ECT0; TCPSTAT_INC(tcps_ecn_ect0); } - + /* * Reply with proper ECN notifications. */ - if (tp->t_flags & TF_ECN_SND_CWR) { - flags |= TH_CWR; - tp->t_flags &= ~TF_ECN_SND_CWR; - } - if (tp->t_flags & TF_ECN_SND_ECE) - flags |= TH_ECE; + if (tp->t_flags2 & TF2_ACE_PERMIT) { + if (tp->r_cep & 0x01) + flags |= TH_ECE; + else + flags &= ~TH_ECE; + if (tp->r_cep & 0x02) + flags |= TH_CWR; + else + flags &= ~TH_CWR; + if (tp->r_cep & 0x04) + flags |= TH_AE; + else + flags &= ~TH_AE; + if (!(tp->t_flags2 & TF2_ECN_PERMIT)) { + /* + * here we process the final + * ACK of the 3WHS + */ + if (tp->r_cep == 0b110) { + tp->r_cep = 6; + } else { + tp->r_cep = 5; + } + tp->t_flags2 |= TF2_ECN_PERMIT; + } + } else { + if (tp->t_flags2 & TF2_ECN_SND_CWR) { + flags |= TH_CWR; + tp->t_flags2 &= ~TF2_ECN_SND_CWR; + } + if (tp->t_flags2 & TF2_ECN_SND_ECE) + flags |= TH_ECE; + } } - + /* * If we are doing retransmissions, then snd_nxt will * not reflect the first unsent octet. For ACK only @@ -1169,7 +1207,9 @@ bcopy(opt, th + 1, optlen); th->th_off = (sizeof (struct tcphdr) + optlen) >> 2; } - th->th_flags = flags; + th->th_flags = (flags & (TH_CWR|TH_ECE|TH_URG|TH_ACK| + TH_PUSH|TH_RST|TH_SYN|TH_FIN)); + th->th_x2 = (flags & (TH_AE)) >> 8; /* * Calculate receive window. Don't shrink window, * but avoid silly window syndrome. Index: sys/netinet/tcp_stacks/rack.c =================================================================== --- sys/netinet/tcp_stacks/rack.c +++ sys/netinet/tcp_stacks/rack.c @@ -1353,16 +1353,16 @@ rack->r_ctl.rc_prr_sndcnt = tp->t_maxseg; rack->r_ctl.rc_prr_recovery_fs = tp->snd_max - tp->snd_una; tp->snd_recover = tp->snd_max; - if (tp->t_flags & TF_ECN_PERMIT) - tp->t_flags |= TF_ECN_SND_CWR; + if (tp->t_flags2 & TF2_ECN_PERMIT) + tp->t_flags2 |= TF2_ECN_SND_CWR; } break; case CC_ECN: if (!IN_CONGRECOVERY(tp->t_flags)) { TCPSTAT_INC(tcps_ecn_rcwnd); tp->snd_recover = tp->snd_max; - if (tp->t_flags & TF_ECN_PERMIT) - tp->t_flags |= TF_ECN_SND_CWR; + if (tp->t_flags2 & TF2_ECN_PERMIT) + tp->t_flags2 |= TF2_ECN_SND_CWR; } break; case CC_RTO: @@ -5265,7 +5265,7 @@ if (((thflags & (TH_CWR | TH_ECE)) == TH_ECE) && V_tcp_do_ecn) { - tp->t_flags |= TF_ECN_PERMIT; + tp->t_flags2 |= TF2_ECN_PERMIT; TCPSTAT_INC(tcps_ecn_shs); } if (SEQ_GT(th->th_ack, tp->snd_una)) { @@ -6602,12 +6602,12 @@ * TCP ECN processing. XXXJTL: If we ever use ECN, we need to move * this to occur after we've validated the segment. */ - if (tp->t_flags & TF_ECN_PERMIT) { + if (tp->t_flags2 & TF2_ECN_PERMIT) { if (thflags & TH_CWR) - tp->t_flags &= ~TF_ECN_SND_ECE; + tp->t_flags2 &= ~TF2_ECN_SND_ECE; switch (iptos & IPTOS_ECN_MASK) { case IPTOS_ECN_CE: - tp->t_flags |= TF_ECN_SND_ECE; + tp->t_flags2 |= TF2_ECN_SND_ECE; TCPSTAT_INC(tcps_ecn_ce); break; case IPTOS_ECN_ECT0: @@ -8152,7 +8152,7 @@ flags |= TH_ECE | TH_CWR; } if (tp->t_state == TCPS_ESTABLISHED && - (tp->t_flags & TF_ECN_PERMIT)) { + (tp->t_flags2 & TF2_ECN_PERMIT)) { /* * If the peer has ECN, mark data packets with ECN capable * transmission (ECT). Ignore pure ack packets, @@ -8171,11 +8171,11 @@ /* * Reply with proper ECN notifications. */ - if (tp->t_flags & TF_ECN_SND_CWR) { + if (tp->t_flags2 & TF2_ECN_SND_CWR) { flags |= TH_CWR; - tp->t_flags &= ~TF_ECN_SND_CWR; + tp->t_flags2 &= ~TF2_ECN_SND_CWR; } - if (tp->t_flags & TF_ECN_SND_ECE) + if (tp->t_flags2 & TF2_ECN_SND_ECE) flags |= TH_ECE; } /* Index: sys/netinet/tcp_subr.c =================================================================== --- sys/netinet/tcp_subr.c +++ sys/netinet/tcp_subr.c @@ -3167,7 +3167,7 @@ } sp = s + strlen(s); if (th) - sprintf(sp, " tcpflags 0x%b", th->th_flags, PRINT_TH_FLAGS); + sprintf(sp, " tcpflags 0x%b", (th->th_x2 << 8) | th->th_flags, PRINT_TH_FLAGS); if (*(s + size - 1) != '\0') panic("%s: string too long", __func__); return (s); @@ -3244,3 +3244,16 @@ if (inp->inp_socket == NULL) xt->xt_inp.xi_socket.xso_protocol = IPPROTO_TCP; } + +int +tcp_get_ace(struct tcphdr *th) +{ + int ace = 0; + if (th->th_flags & TH_ECE) + ace += 1; + if (th->th_flags & TH_CWR) + ace += 2; + if (th->th_x2 & (TH_AE >> 8)) + ace += 4; + return ace; +} \ No newline at end of file Index: sys/netinet/tcp_syncache.h =================================================================== --- sys/netinet/tcp_syncache.h +++ sys/netinet/tcp_syncache.h @@ -45,7 +45,7 @@ struct tcphdr *, struct socket **, struct mbuf *); int syncache_add(struct in_conninfo *, struct tcpopt *, struct tcphdr *, struct inpcb *, struct socket **, struct mbuf *, - void *, void *); + void *, void *, uint8_t); void syncache_chkrst(struct in_conninfo *, struct tcphdr *, struct mbuf *); void syncache_badack(struct in_conninfo *); int syncache_pcblist(struct sysctl_req *req, int max_pcbs, int *pcbs_exported); @@ -90,6 +90,10 @@ #define SCF_SIGNATURE 0x20 /* send MD5 digests */ #define SCF_SACK 0x80 /* send SACK option */ #define SCF_ECN 0x100 /* send ECN setup packet */ +#define SCF_ACE_N 0x200 /* send ACE non-ECT setup */ +#define SCF_ACE_0 0x400 /* send ACE ECT0 setup */ +#define SCF_ACE_1 0x800 /* send ACE ECT1 setup */ +#define SCF_ACE_CE 0x1000 /* send ACE CE setup */ struct syncache_head { struct mtx sch_mtx; Index: sys/netinet/tcp_syncache.c =================================================================== --- sys/netinet/tcp_syncache.c +++ sys/netinet/tcp_syncache.c @@ -964,7 +964,20 @@ } if (sc->sc_flags & SCF_ECN) - tp->t_flags |= TF_ECN_PERMIT; + tp->t_flags2 |= TF2_ECN_PERMIT; + + if ((sc->sc_flags & SCF_ACE_N) || + (sc->sc_flags & SCF_ACE_0) || + (sc->sc_flags & SCF_ACE_1) || + (sc->sc_flags & SCF_ACE_CE)) { + tp->t_flags2 |= TF2_ACE_PERMIT; + tp->s_cep = 5; + tp->r_cep = 5; + if (sc->sc_flags & SCF_ACE_CE) { + tp->s_cep=6; + tp->r_cep=6; + } + } /* * Set up MSS and get cached values from tcp_hostcache. @@ -1309,7 +1322,7 @@ int syncache_add(struct in_conninfo *inc, struct tcpopt *to, struct tcphdr *th, struct inpcb *inp, struct socket **lsop, struct mbuf *m, void *tod, - void *todctx) + void *todctx, uint8_t tos) { struct tcpcb *tp; struct socket *so; @@ -1612,8 +1625,62 @@ sc->sc_peer_mss = to->to_mss; /* peer mss may be zero */ if (ltflags & TF_NOOPT) sc->sc_flags |= SCF_NOOPT; - if ((th->th_flags & (TH_ECE|TH_CWR)) && V_tcp_do_ecn) - sc->sc_flags |= SCF_ECN; + /* ECN Handshake */ + if (V_tcp_do_ecn) { + int xflags; + xflags = ((th->th_x2 << 8) | th->th_flags) & (TH_AE|TH_CWR|TH_ECE); + switch (xflags) { + /* no ECN */ + case (0|0|0): + break; + /* legacy ECN */ + case (0|TH_CWR|TH_ECE): + sc->sc_flags |= SCF_ECN; + break; + /* Accurate ECN */ + case (TH_AE|TH_CWR|TH_ECE): + if ((V_tcp_do_ecn == 3) || + (V_tcp_do_ecn == 4)) { + + switch (tos & IPTOS_ECN_MASK) { + case IPTOS_ECN_CE: + sc->sc_flags |= SCF_ACE_CE; + break; + case IPTOS_ECN_ECT0: + sc->sc_flags |= SCF_ACE_0; + break; + case IPTOS_ECN_ECT1: + sc->sc_flags |= SCF_ACE_1; + break; + case IPTOS_ECN_NOTECT: + sc->sc_flags |= SCF_ACE_N; + break; + } + } else + sc->sc_flags |= SCF_ECN; + break; + /* Default Case (section 3.1.2) */ + default: + if ((V_tcp_do_ecn == 3) || + (V_tcp_do_ecn == 4)) { + switch (tos & IPTOS_ECN_MASK) { + case IPTOS_ECN_CE: + sc->sc_flags |= SCF_ACE_CE; + break; + case IPTOS_ECN_ECT0: + sc->sc_flags |= SCF_ACE_0; + break; + case IPTOS_ECN_ECT1: + sc->sc_flags |= SCF_ACE_1; + break; + case IPTOS_ECN_NOTECT: + sc->sc_flags |= SCF_ACE_N; + break; + } + } + break; + } + } if (V_tcp_syncookies) sc->sc_iss = syncookie_generate(sch, sc); @@ -1787,6 +1854,28 @@ TCPSTAT_INC(tcps_ecn_shs); } + if ((flags & TH_SYN) && (sc->sc_flags & SCF_ACE_N)) { + th->th_flags |= TH_CWR; + TCPSTAT_INC(tcps_ecn_shs); + TCPSTAT_INC(tcps_ace_nect); + } + if ((flags & TH_SYN) && (sc->sc_flags & SCF_ACE_0)) { + th->th_x2 |= (TH_AE >> 8); + TCPSTAT_INC(tcps_ecn_shs); + TCPSTAT_INC(tcps_ace_ect0); + } + if ((flags & TH_SYN) && (sc->sc_flags & SCF_ACE_1)) { + th->th_flags |= (TH_ECE | TH_CWR); + TCPSTAT_INC(tcps_ecn_shs); + TCPSTAT_INC(tcps_ace_ect1); + } + if ((flags & TH_SYN) && (sc->sc_flags & SCF_ACE_CE)) { + th->th_flags |= TH_CWR; + th->th_x2 |= (TH_AE >> 8); + TCPSTAT_INC(tcps_ecn_shs); + TCPSTAT_INC(tcps_ace_ce); + } + /* Tack on the TCP options. */ if ((sc->sc_flags & SCF_NOOPT) == 0) { to.to_flags = 0; Index: sys/netinet/tcp_usrreq.c =================================================================== --- sys/netinet/tcp_usrreq.c +++ sys/netinet/tcp_usrreq.c @@ -1517,7 +1517,7 @@ ti->tcpi_snd_wscale = tp->snd_scale; ti->tcpi_rcv_wscale = tp->rcv_scale; } - if (tp->t_flags & TF_ECN_PERMIT) + if (tp->t_flags2 & TF2_ECN_PERMIT) ti->tcpi_options |= TCPI_OPT_ECN; ti->tcpi_rto = tp->t_rxtcur * tick; @@ -2484,6 +2484,10 @@ db_printf("%sTF_NOPUSH", comma ? ", " : ""); comma = 1; } + if (t_flags & TF_PREVVALID) { + db_printf("%sTF_PREVVALID", comma ? ", " : ""); + comma = 1; + } if (t_flags & TF_MORETOCOME) { db_printf("%sTF_MORETOCOME", comma ? ", " : ""); comma = 1; @@ -2512,6 +2516,10 @@ db_printf("%sTF_WASFRECOVERY", comma ? ", " : ""); comma = 1; } + if (t_flags & TF_WASCRECOVERY) { + db_printf("%sTF_WASCRECOVERY", comma ? ", " : ""); + comma = 1; + } if (t_flags & TF_SIGNATURE) { db_printf("%sTF_SIGNATURE", comma ? ", " : ""); comma = 1; @@ -2524,8 +2532,8 @@ db_printf("%sTF_TSO", comma ? ", " : ""); comma = 1; } - if (t_flags & TF_ECN_PERMIT) { - db_printf("%sTF_ECN_PERMIT", comma ? ", " : ""); + if (t_flags & TF_TOE) { + db_printf("%sTF_TOE", comma ? ", " : ""); comma = 1; } if (t_flags & TF_FASTOPEN) { @@ -2534,6 +2542,50 @@ } } +static void +db_print_tflags2(u_int t_flags2) +{ + int comma; + + comma = 0; + if (t_flags2 & TF2_PLPMTU_BLACKHOLE) { + db_printf("%sTF2_PLPMTU_BLACKHOLE", comma ? ", " : ""); + comma = 1; + } + if (t_flags2 & TF2_PLPMTU_PMTUD) { + db_printf("%sTF2_PLPMTU_PMTUD", comma ? ", " : ""); + comma = 1; + } + if (t_flags2 & TF2_PLPMTU_MAXSEGSNT) { + db_printf("%sTF2_PLPMTU_MAXSEGSNT", comma ? ", " : ""); + comma = 1; + } + if (t_flags2 & TF2_LOG_AUTO) { + db_printf("%sTF2_LOG_AUTO", comma ? ", " : ""); + comma = 1; + } + if (t_flags2 & TF2_DROP_AF_DATA) { + db_printf("%sTF2_DROP_AF_DATA", comma ? ", " : ""); + comma = 1; + } + if (t_flags2 & TF2_ECN_PERMIT) { + db_printf("%sTF2_ECN_PERMIT", comma ? ", " : ""); + comma = 1; + } + if (t_flags2 & TF2_ECN_SND_CWR) { + db_printf("%sTF2_ECN_SND_CWR", comma ? ", " : ""); + comma = 1; + } + if (t_flags2 & TF2_ECN_SND_ECE) { + db_printf("%sTF2_ECN_SND_ECE", comma ? ", " : ""); + comma = 1; + } + if (t_flags2 & TF2_ACE_PERMIT) { + db_printf("%sTF2_ACE_PERMIT", comma ? ", " : ""); + comma = 1; + } +} + static void db_print_toobflags(char t_oobflags) { @@ -2581,6 +2633,12 @@ db_print_tflags(tp->t_flags); db_printf(")\n"); + db_print_indent(indent); + db_printf("t_flags2: 0x%x (", tp->t_flags2); + db_print_tflags2(tp->t_flags2); + db_printf(")\n"); + + db_print_indent(indent); db_printf("snd_una: 0x%08x snd_max: 0x%08x snd_nxt: x0%08x\n", tp->snd_una, tp->snd_max, tp->snd_nxt); Index: sys/netinet/tcp_var.h =================================================================== --- sys/netinet/tcp_var.h +++ sys/netinet/tcp_var.h @@ -205,6 +205,8 @@ u_int t_keepcnt; /* number of keepalives before close */ int t_dupacks; /* consecutive dup acks recd */ int t_lognum; /* Number of log entries */ + uint32_t r_cep; /* Number of received CE marked packets */ + uint32_t s_cep; /* Synced number of delivered CE packets */ struct tcp_log_stailq t_logs; /* Log buffer */ struct tcp_log_id_node *t_lin; struct tcp_log_id_bucket *t_lib; @@ -306,33 +308,30 @@ /* * Flags and utility macros for the t_flags field. */ -#define TF_ACKNOW 0x000001 /* ack peer immediately */ -#define TF_DELACK 0x000002 /* ack, but try to delay it */ -#define TF_NODELAY 0x000004 /* don't delay packets to coalesce */ -#define TF_NOOPT 0x000008 /* don't use tcp options */ -#define TF_SENTFIN 0x000010 /* have sent FIN */ -#define TF_REQ_SCALE 0x000020 /* have/will request window scaling */ -#define TF_RCVD_SCALE 0x000040 /* other side has requested scaling */ -#define TF_REQ_TSTMP 0x000080 /* have/will request timestamps */ -#define TF_RCVD_TSTMP 0x000100 /* a timestamp was received in SYN */ -#define TF_SACK_PERMIT 0x000200 /* other side said I could SACK */ -#define TF_NEEDSYN 0x000400 /* send SYN (implicit state) */ -#define TF_NEEDFIN 0x000800 /* send FIN (implicit state) */ -#define TF_NOPUSH 0x001000 /* don't push */ -#define TF_PREVVALID 0x002000 /* saved values for bad rxmit valid */ -#define TF_MORETOCOME 0x010000 /* More data to be appended to sock */ -#define TF_LQ_OVERFLOW 0x020000 /* listen queue overflow */ -#define TF_LASTIDLE 0x040000 /* connection was previously idle */ -#define TF_RXWIN0SENT 0x080000 /* sent a receiver win 0 in response */ -#define TF_FASTRECOVERY 0x100000 /* in NewReno Fast Recovery */ -#define TF_WASFRECOVERY 0x200000 /* was in NewReno Fast Recovery */ -#define TF_SIGNATURE 0x400000 /* require MD5 digests (RFC2385) */ -#define TF_FORCEDATA 0x800000 /* force out a byte */ -#define TF_TSO 0x1000000 /* TSO enabled on this connection */ -#define TF_TOE 0x2000000 /* this connection is offloaded */ -#define TF_ECN_PERMIT 0x4000000 /* connection ECN-ready */ -#define TF_ECN_SND_CWR 0x8000000 /* ECN CWR in queue */ -#define TF_ECN_SND_ECE 0x10000000 /* ECN ECE in queue */ +#define TF_ACKNOW 0x00000001 /* ack peer immediately */ +#define TF_DELACK 0x00000002 /* ack, but try to delay it */ +#define TF_NODELAY 0x00000004 /* don't delay packets to coalesce */ +#define TF_NOOPT 0x00000008 /* don't use tcp options */ +#define TF_SENTFIN 0x00000010 /* have sent FIN */ +#define TF_REQ_SCALE 0x00000020 /* have/will request window scaling */ +#define TF_RCVD_SCALE 0x00000040 /* other side has requested scaling */ +#define TF_REQ_TSTMP 0x00000080 /* have/will request timestamps */ +#define TF_RCVD_TSTMP 0x00000100 /* a timestamp was received in SYN */ +#define TF_SACK_PERMIT 0x00000200 /* other side said I could SACK */ +#define TF_NEEDSYN 0x00000400 /* send SYN (implicit state) */ +#define TF_NEEDFIN 0x00000800 /* send FIN (implicit state) */ +#define TF_NOPUSH 0x00001000 /* don't push */ +#define TF_PREVVALID 0x00002000 /* saved values for bad rxmit valid */ +#define TF_MORETOCOME 0x00010000 /* More data to be appended to sock */ +#define TF_LQ_OVERFLOW 0x00020000 /* listen queue overflow */ +#define TF_LASTIDLE 0x00040000 /* connection was previously idle */ +#define TF_RXWIN0SENT 0x00080000 /* sent a receiver win 0 in response */ +#define TF_FASTRECOVERY 0x00100000 /* in NewReno Fast Recovery */ +#define TF_WASFRECOVERY 0x00200000 /* was in NewReno Fast Recovery */ +#define TF_SIGNATURE 0x00400000 /* require MD5 digests (RFC2385) */ +#define TF_FORCEDATA 0x00800000 /* force out a byte */ +#define TF_TSO 0x01000000 /* TSO enabled on this connection */ +#define TF_TOE 0x02000000 /* this connection is offloaded */ #define TF_CONGRECOVERY 0x20000000 /* congestion recovery mode */ #define TF_WASCRECOVERY 0x40000000 /* was in congestion recovery */ #define TF_FASTOPEN 0x80000000 /* TCP Fast Open indication */ @@ -370,7 +369,11 @@ #define TF2_PLPMTU_PMTUD 0x00000002 /* Allowed to attempt PLPMTUD. */ #define TF2_PLPMTU_MAXSEGSNT 0x00000004 /* Last seg sent was full seg. */ #define TF2_LOG_AUTO 0x00000008 /* Session is auto-logging. */ -#define TF2_DROP_AF_DATA 0x00000010 /* Drop after all data ack'd */ +#define TF2_DROP_AF_DATA 0x00000010 /* Drop after all data ack'd */ +#define TF2_ECN_PERMIT 0x00000020 /* connection ECN-ready */ +#define TF2_ECN_SND_CWR 0x00000040 /* ECN CWR in queue */ +#define TF2_ECN_SND_ECE 0x00000080 /* ECN ECE in queue */ +#define TF2_ACE_PERMIT 0x00000100 /* Accurate ECN mode */ /* * Structure to hold TCP options that are only used during segment @@ -610,7 +613,12 @@ uint64_t tcps_pmtud_blackhole_activated_min_mss; /* BH at min MSS Count */ uint64_t tcps_pmtud_blackhole_failed; /* Black Hole Failure Count */ - uint64_t _pad[12]; /* 6 UTO, 6 TBD */ + /* Accurate ECN Handshake stats */ + uint64_t tcps_ace_nect; /* ACE SYN packet with Non-ECT */ + uint64_t tcps_ace_ect1; /* ACE SYN packet with ECT1 */ + uint64_t tcps_ace_ect0; /* ACE SYN packet with ECT0 */ + uint64_t tcps_ace_ce; /* ACE SYN packet with CE */ + uint64_t _pad[8]; /* 6 UTO, 6 TBD */ }; #define tcps_rcvmemdrop tcps_rcvreassfull /* compat */ @@ -946,6 +954,7 @@ tcp_m_copym(struct mbuf *m, int32_t off0, int32_t *plen, int32_t seglimit, int32_t segsize, struct sockbuf *sb); +int tcp_get_ace(struct tcphdr *th); static inline void tcp_fields_to_host(struct tcphdr *th) Index: sys/netinet/toecore.h =================================================================== --- sys/netinet/toecore.h +++ sys/netinet/toecore.h @@ -130,7 +130,7 @@ void toe_connect_failed(struct toedev *, struct inpcb *, int); void toe_syncache_add(struct in_conninfo *, struct tcpopt *, struct tcphdr *, - struct inpcb *, void *, void *); + struct inpcb *, void *, void *, uint8_t); int toe_syncache_expand(struct in_conninfo *, struct tcpopt *, struct tcphdr *, struct socket **); Index: sys/netinet/toecore.c =================================================================== --- sys/netinet/toecore.c +++ sys/netinet/toecore.c @@ -337,13 +337,13 @@ void toe_syncache_add(struct in_conninfo *inc, struct tcpopt *to, struct tcphdr *th, - struct inpcb *inp, void *tod, void *todctx) + struct inpcb *inp, void *tod, void *todctx, uint8_t tos) { struct socket *lso = inp->inp_socket; INP_WLOCK_ASSERT(inp); - syncache_add(inc, to, th, inp, &lso, NULL, tod, todctx); + syncache_add(inc, to, th, inp, &lso, NULL, tod, todctx, tos); } int Index: usr.bin/netstat/inet.c =================================================================== --- usr.bin/netstat/inet.c +++ usr.bin/netstat/inet.c @@ -763,6 +763,15 @@ p(tcps_ecn_rcwnd, "\t{:congestion-reductions/%ju} " "{N:/time%s ECN reduced the congestion window}\n"); + p(tcps_ace_nect, "\t{:ace-nonect-syn/%ju} " + "{N:/ACE SYN packet%s with Non-ECT}\n"); + p(tcps_ace_ect0, "\t{:ace-ect0-syn/%ju} " + "{N:/ACE SYN packet%s with ECT0}\n"); + p(tcps_ace_ect1, "\t{:ace-ect1-syn/%ju} " + "{N:/ACE SYN packet%s with ECT1}\n"); + p(tcps_ace_ce, "\t{:ace-ce-syn/%ju} " + "{N:/ACE SYN packet%s with CE}\n"); + xo_close_container("ecn"); xo_open_container("tcp-signature"); p(tcps_sig_rcvgoodsig, "\t{:received-good-signature/%ju} "