Index: sys/conf/files =================================================================== --- sys/conf/files +++ sys/conf/files @@ -4364,6 +4364,7 @@ netinet/sctputil.c optional inet sctp | inet6 sctp netinet/siftr.c optional inet siftr alq | inet6 siftr alq netinet/tcp_debug.c optional tcpdebug +netinet/tcp_ecn.c optional inet | inet6 netinet/tcp_fastopen.c optional inet tcp_rfc7413 | inet6 tcp_rfc7413 netinet/tcp_hostcache.c optional inet | inet6 netinet/tcp_input.c optional inet | inet6 Index: sys/netinet/tcp_ecn.h =================================================================== --- /dev/null +++ sys/netinet/tcp_ecn.h @@ -0,0 +1,55 @@ +/*- + * SPDX-License-Identifier: BSD-3-Clause + * + * Copyright (c) 1982, 1986, 1993, 1994, 1995 + * The Regents of the University of California. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * @(#)tcp_ecn.h 8.4 (Berkeley) 5/24/95 + * $FreeBSD$ + */ + +#ifndef _NETINET_TCP_ECN_H_ +#define _NETINET_TCP_ECN_H_ + +#include +#include +#include + +#ifdef _KERNEL + +void tcp_ecn_input_syn_sent(struct tcpcb *, uint16_t, int); +void tcp_ecn_input_parallel_syn(struct tcpcb *, uint16_t, int); +int tcp_ecn_input_segment(struct tcpcb *, uint16_t, int); +uint16_t tcp_ecn_output_syn_sent(struct tcpcb *); +int tcp_ecn_output_established(struct tcpcb *, uint16_t *, int); +void tcp_ecn_syncache_socket(struct tcpcb *, struct syncache *); +int tcp_ecn_syncache_add(uint16_t, int); +uint16_t tcp_ecn_syncache_respond(uint16_t, struct syncache *); + +#endif /* _KERNEL */ + +#endif /* _NETINET_TCP_ECN_H_ */ Index: sys/netinet/tcp_ecn.c =================================================================== --- /dev/null +++ sys/netinet/tcp_ecn.c @@ -0,0 +1,296 @@ +/*- + * SPDX-License-Identifier: BSD-3-Clause + * + * Copyright (c) 1982, 1986, 1988, 1990, 1993, 1994, 1995 + * The Regents of the University of California. All rights reserved. + * Copyright (c) 2007-2008,2010 + * Swinburne University of Technology, Melbourne, Australia. + * Copyright (c) 2009-2010 Lawrence Stewart + * Copyright (c) 2010 The FreeBSD Foundation + * Copyright (c) 2010-2011 Juniper Networks, Inc. + * Copyright (c) 2019 Richard Scheffenegger + * All rights reserved. + * + * Portions of this software were developed at the Centre for Advanced Internet + * Architectures, Swinburne University of Technology, by Lawrence Stewart, + * James Healy and David Hayes, made possible in part by a grant from the Cisco + * University Research Program Fund at Community Foundation Silicon Valley. + * + * Portions of this software were developed at the Centre for Advanced + * Internet Architectures, Swinburne University of Technology, Melbourne, + * Australia by David Hayes under sponsorship from the FreeBSD Foundation. + * + * Portions of this software were developed by Robert N. M. Watson under + * contract to Juniper Networks, Inc. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * @(#)tcp_ecn.c 8.12 (Berkeley) 5/24/95 + */ + +/* + * Utility functions to deal with Explicit Congestion Notification in TCP + * implementing the essential parts of the Accurate ECN extension + * https://tools.ietf.org/html/draft-ietf-tcpm-accurate-ecn-09 + */ + +#include +__FBSDID("$FreeBSD$"); + +#include "opt_inet.h" +#include "opt_inet6.h" +#include "opt_tcpdebug.h" + +#include +#include +#include +#include +#include +#include +#include +#include + +#include + +#include + +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + + +/* + * Process incoming SYN,ACK packet + */ +void +tcp_ecn_input_syn_sent(struct tcpcb *tp, uint16_t thflags, int iptos) +{ + thflags &= (TH_CWR|TH_ECE); + + if (((thflags & (TH_CWR | TH_ECE)) == TH_ECE) && + V_tcp_do_ecn) { + tp->t_flags2 |= TF2_ECN_PERMIT; + KMOD_TCPSTAT_INC(tcps_ecn_shs); + } +} + +/* + * Handle parallel SYN for ECN + */ +void +tcp_ecn_input_parallel_syn(struct tcpcb *tp, uint16_t thflags, int iptos) +{ + if (thflags & TH_ACK) + return; + if (V_tcp_do_ecn == 0) + return; + if ((V_tcp_do_ecn == 1) || (V_tcp_do_ecn == 2)) { + /* RFC3168 ECN handling */ + if ((thflags & (TH_CWR | TH_ECE)) == (TH_CWR | TH_ECE)) { + tp->t_flags2 |= TF2_ECN_PERMIT; + tp->t_flags2 |= TF2_ECN_SND_ECE; + KMOD_TCPSTAT_INC(tcps_ecn_shs); + } + } +} + +/* + * TCP ECN processing. + */ +int +tcp_ecn_input_segment(struct tcpcb *tp, uint16_t thflags, int iptos) +{ + int delta_ace = 0; + + if (tp->t_flags2 & TF2_ECN_PERMIT) { + switch (iptos & IPTOS_ECN_MASK) { + case IPTOS_ECN_CE: + KMOD_TCPSTAT_INC(tcps_ecn_ce); + break; + case IPTOS_ECN_ECT0: + KMOD_TCPSTAT_INC(tcps_ecn_ect0); + break; + case IPTOS_ECN_ECT1: + KMOD_TCPSTAT_INC(tcps_ecn_ect1); + break; + } + + /* RFC3168 ECN handling */ + if (thflags & TH_ECE) + delta_ace = 1; + if (thflags & TH_CWR) { + tp->t_flags2 &= ~TF2_ECN_SND_ECE; + tp->t_flags |= TF_ACKNOW; + } + if ((iptos & IPTOS_ECN_MASK) == IPTOS_ECN_CE) + tp->t_flags2 |= TF2_ECN_SND_ECE; + + /* Process a packet differently from RFC3168. */ + cc_ecnpkt_handler_flags(tp, thflags, iptos); + } + + return delta_ace; +} + +/* + * Send ECN setup packet header flags + */ +uint16_t +tcp_ecn_output_syn_sent(struct tcpcb *tp) +{ + uint16_t thflags = 0; + + if (V_tcp_do_ecn == 1) { + /* Send a RFC3168 ECN setup packet */ + if (tp->t_rxtshift >= 1) { + if (tp->t_rxtshift <= V_tcp_ecn_maxretries) + thflags = TH_ECE|TH_CWR; + } else + thflags = TH_ECE|TH_CWR; + } + + return thflags; +} + +/* + * output processing of ECN feature + * returning IP ECN header codepoint + */ +int +tcp_ecn_output_established(struct tcpcb *tp, uint16_t *thflags, int len) +{ + int ipecn = IPTOS_ECN_NOTECT; + bool newdata; + + /* + * If the peer has ECN, mark data packets with + * ECN capable transmission (ECT). + * Ignore pure control packets, retransmissions + * and window probes. + */ + newdata = (len > 0 && SEQ_GEQ(tp->snd_nxt, tp->snd_max) && + !((tp->t_flags & TF_FORCEDATA) && len == 1)); + if (newdata) { + ipecn = IPTOS_ECN_ECT0; + KMOD_TCPSTAT_INC(tcps_ecn_ect0); + } + /* + * Reply with proper ECN notifications. + */ + if (newdata && + (tp->t_flags2 & TF2_ECN_SND_CWR)) { + *thflags |= TH_CWR; + tp->t_flags2 &= ~TF2_ECN_SND_CWR; + } + if (tp->t_flags2 & TF2_ECN_SND_ECE) + *thflags |= TH_ECE; + + return ipecn; +} + +/* + * Set up the ECN related tcpcb fields from + * a syncache entry + */ +void +tcp_ecn_syncache_socket(struct tcpcb *tp, struct syncache *sc) +{ + if (sc->sc_flags & SCF_ECN) { + switch (sc->sc_flags & SCF_ECN) { + case SCF_ECN: + tp->t_flags2 |= TF2_ECN_PERMIT; + break; + /* undefined SCF codepoint */ + default: + break; + } + } +} + +/* + * Process a packets ECN information, and provide the + * syncache with the relevant information. + */ +int +tcp_ecn_syncache_add(uint16_t thflags, int iptos) +{ + int scflags = 0; + + switch (thflags & (TH_CWR|TH_ECE)) { + /* no ECN */ + case (0|0): + break; + /* legacy ECN */ + case (TH_CWR|TH_ECE): + scflags = SCF_ECN; + break; + default: + break; + } + return scflags; +} + +/* + * Set up the ECN information for the from + * syncache information. + */ +uint16_t +tcp_ecn_syncache_respond(uint16_t thflags, struct syncache *sc) +{ + if ((thflags & TH_SYN) && + (sc->sc_flags & SCF_ECN)) { + switch (sc->sc_flags & SCF_ECN) { + case SCF_ECN: + thflags |= (0 | TH_ECE); + KMOD_TCPSTAT_INC(tcps_ecn_shs); + break; + /* undefined SCF codepoint */ + default: + break; + } + } + return thflags; +} Index: sys/netinet/tcp_input.c =================================================================== --- sys/netinet/tcp_input.c +++ sys/netinet/tcp_input.c @@ -104,6 +104,7 @@ #include #include #include +#include #include #include #include @@ -1517,7 +1518,8 @@ tcp_do_segment(struct mbuf *m, struct tcphdr *th, struct socket *so, struct tcpcb *tp, int drop_hdrlen, int tlen, uint8_t iptos) { - int thflags, acked, ourfinisacked, needoutput = 0, sack_changed; + uint16_t thflags; + int acked, ourfinisacked, needoutput = 0, sack_changed; int rstreason, todrop, win, incforsyn = 0; uint32_t tiwin; uint16_t nsegs; @@ -1597,32 +1599,8 @@ /* * TCP ECN processing. */ - if (tp->t_flags2 & TF2_ECN_PERMIT) { - if (thflags & TH_CWR) { - tp->t_flags2 &= ~TF2_ECN_SND_ECE; - tp->t_flags |= TF_ACKNOW; - } - switch (iptos & IPTOS_ECN_MASK) { - case IPTOS_ECN_CE: - tp->t_flags2 |= TF2_ECN_SND_ECE; - TCPSTAT_INC(tcps_ecn_ce); - break; - case IPTOS_ECN_ECT0: - TCPSTAT_INC(tcps_ecn_ect0); - break; - case IPTOS_ECN_ECT1: - TCPSTAT_INC(tcps_ecn_ect1); - break; - } - - /* Process a packet differently from RFC3168. */ - cc_ecnpkt_handler(tp, th, iptos); - - /* Congestion experienced. */ - if (thflags & TH_ECE) { - cc_cong_signal(tp, th, CC_ECN); - } - } + if (tcp_ecn_input_segment(tp, thflags, iptos)) + cc_cong_signal(tp, th, CC_ECN); /* * Parse options on any incoming segment. @@ -1663,13 +1641,7 @@ */ if (tp->t_state == TCPS_SYN_SENT && (thflags & TH_SYN)) { /* Handle parallel SYN for ECN */ - if (!(thflags & TH_ACK) && - ((thflags & (TH_CWR | TH_ECE)) == (TH_CWR | TH_ECE)) && - ((V_tcp_do_ecn == 1) || (V_tcp_do_ecn == 2))) { - tp->t_flags2 |= TF2_ECN_PERMIT; - tp->t_flags2 |= TF2_ECN_SND_ECE; - TCPSTAT_INC(tcps_ecn_shs); - } + tcp_ecn_input_parallel_syn(tp, thflags, iptos); if ((to.to_flags & TOF_SCALE) && (tp->t_flags & TF_REQ_SCALE) && !(tp->t_flags & TF_NOOPT)) { @@ -2075,11 +2047,7 @@ else tp->t_flags |= TF_ACKNOW; - if (((thflags & (TH_CWR | TH_ECE)) == TH_ECE) && - (V_tcp_do_ecn == 1)) { - tp->t_flags2 |= TF2_ECN_PERMIT; - TCPSTAT_INC(tcps_ecn_shs); - } + tcp_ecn_input_syn_sent(tp, thflags, iptos); /* * Received in SYN_SENT[*] state. Index: sys/netinet/tcp_output.c =================================================================== --- sys/netinet/tcp_output.c +++ sys/netinet/tcp_output.c @@ -98,6 +98,7 @@ #ifdef TCP_OFFLOAD #include #endif +#include #include @@ -199,7 +200,8 @@ struct socket *so = tp->t_inpcb->inp_socket; int32_t len; uint32_t recwin, sendwin; - int off, flags, error = 0; /* Keep compiler happy */ + uint16_t flags; + int off, error = 0; /* Keep compiler happy */ u_int if_hw_tsomaxsegcount = 0; u_int if_hw_tsomaxsegsize = 0; struct mbuf *m; @@ -1197,54 +1199,27 @@ * resend those bits a number of times as per * RFC 3168. */ - if (tp->t_state == TCPS_SYN_SENT && V_tcp_do_ecn == 1) { - if (tp->t_rxtshift >= 1) { - if (tp->t_rxtshift <= V_tcp_ecn_maxretries) - flags |= TH_ECE|TH_CWR; - } else - flags |= TH_ECE|TH_CWR; - } - /* Handle parallel SYN for ECN */ - if ((tp->t_state == TCPS_SYN_RECEIVED) && - (tp->t_flags2 & TF2_ECN_SND_ECE)) { - flags |= TH_ECE; - tp->t_flags2 &= ~TF2_ECN_SND_ECE; + if (tp->t_state == TCPS_SYN_SENT && V_tcp_do_ecn) { + flags |= tcp_ecn_output_syn_sent(tp); } - - if (TCPS_HAVEESTABLISHED(tp->t_state) && + /* Also handle parallel SYN for ECN */ + if ((TCPS_HAVERCVDSYN(tp->t_state)) && (tp->t_flags2 & TF2_ECN_PERMIT)) { - /* - * If the peer has ECN, mark data packets with - * ECN capable transmission (ECT). - * Ignore pure ack packets, retransmissions and window probes. - */ - if (len > 0 && SEQ_GEQ(tp->snd_nxt, tp->snd_max) && - (sack_rxmit == 0) && - !((tp->t_flags & TF_FORCEDATA) && len == 1 && - SEQ_LT(tp->snd_una, tp->snd_max))) { + int ect = tcp_ecn_output_established(tp, &flags, len); + if ((tp->t_state == TCPS_SYN_RECEIVED) && + (tp->t_flags2 & TF2_ECN_SND_ECE)) + tp->t_flags2 &= ~TF2_ECN_SND_ECE; #ifdef INET6 - if (isipv6) { - ip6->ip6_flow &= ~htonl(IPTOS_ECN_MASK << 20); - ip6->ip6_flow |= htonl(IPTOS_ECN_ECT0 << 20); - } - else + if (isipv6) { + ip6->ip6_flow &= ~htonl(IPTOS_ECN_MASK << 20); + ip6->ip6_flow |= htonl(ect << 20); + } + else #endif - { - ip->ip_tos &= ~IPTOS_ECN_MASK; - ip->ip_tos |= IPTOS_ECN_ECT0; - } - TCPSTAT_INC(tcps_ecn_ect0); - /* - * Reply with proper ECN notifications. - * Only set CWR on new data segments. - */ - if (tp->t_flags2 & TF2_ECN_SND_CWR) { - flags |= TH_CWR; - tp->t_flags2 &= ~TF2_ECN_SND_CWR; - } + { + ip->ip_tos &= ~IPTOS_ECN_MASK; + ip->ip_tos |= ect; } - if (tp->t_flags2 & TF2_ECN_SND_ECE) - flags |= TH_ECE; } /* Index: sys/netinet/tcp_stacks/rack.c =================================================================== --- sys/netinet/tcp_stacks/rack.c +++ sys/netinet/tcp_stacks/rack.c @@ -113,6 +113,7 @@ #ifdef INET6 #include #endif +#include #include @@ -11406,11 +11407,9 @@ tp->t_flags |= TF_ACKNOW; rack->rc_dack_toggle = 0; } - if (((thflags & (TH_CWR | TH_ECE)) == TH_ECE) && - (V_tcp_do_ecn == 1)) { - tp->t_flags2 |= TF2_ECN_PERMIT; - KMOD_TCPSTAT_INC(tcps_ecn_shs); - } + + tcp_ecn_input_syn_sent(tp, thflags, iptos); + if (SEQ_GT(th->th_ack, tp->snd_una)) { /* * We advance snd_una for the @@ -13683,31 +13682,8 @@ } tp->t_rcvtime = ticks; /* Now what about ECN? */ - if (tp->t_flags2 & TF2_ECN_PERMIT) { - if (ae->flags & TH_CWR) { - tp->t_flags2 &= ~TF2_ECN_SND_ECE; - tp->t_flags |= TF_ACKNOW; - } - switch (ae->codepoint & IPTOS_ECN_MASK) { - case IPTOS_ECN_CE: - tp->t_flags2 |= TF2_ECN_SND_ECE; - KMOD_TCPSTAT_INC(tcps_ecn_ce); - break; - case IPTOS_ECN_ECT0: - KMOD_TCPSTAT_INC(tcps_ecn_ect0); - break; - case IPTOS_ECN_ECT1: - KMOD_TCPSTAT_INC(tcps_ecn_ect1); - break; - } - - /* Process a packet differently from RFC3168. */ - cc_ecnpkt_handler_flags(tp, ae->flags, ae->codepoint); - /* Congestion experienced. */ - if (ae->flags & TH_ECE) { - rack_cong_signal(tp, CC_ECN, ae->ack); - } - } + if (tcp_ecn_input_segment(tp, ae->flags, ae->codepoint)) + rack_cong_signal(tp, CC_ECN, ae->ack); #ifdef TCP_ACCOUNTING /* Count for the specific type of ack in */ counter_u64_add(tcp_cnt_counters[ae->ack_val_set], 1); @@ -14457,32 +14433,8 @@ * TCP ECN processing. XXXJTL: If we ever use ECN, we need to move * this to occur after we've validated the segment. */ - if (tp->t_flags2 & TF2_ECN_PERMIT) { - if (thflags & TH_CWR) { - tp->t_flags2 &= ~TF2_ECN_SND_ECE; - tp->t_flags |= TF_ACKNOW; - } - switch (iptos & IPTOS_ECN_MASK) { - case IPTOS_ECN_CE: - tp->t_flags2 |= TF2_ECN_SND_ECE; - KMOD_TCPSTAT_INC(tcps_ecn_ce); - break; - case IPTOS_ECN_ECT0: - KMOD_TCPSTAT_INC(tcps_ecn_ect0); - break; - case IPTOS_ECN_ECT1: - KMOD_TCPSTAT_INC(tcps_ecn_ect1); - break; - } - - /* Process a packet differently from RFC3168. */ - cc_ecnpkt_handler(tp, th, iptos); - - /* Congestion experienced. */ - if (thflags & TH_ECE) { - rack_cong_signal(tp, CC_ECN, th->th_ack); - } - } + if (tcp_ecn_input_segment(tp, thflags, iptos)) + rack_cong_signal(tp, CC_ECN, th->th_ack); /* * If echoed timestamp is later than the current time, fall back to @@ -14516,13 +14468,7 @@ */ if (tp->t_state == TCPS_SYN_SENT && (thflags & TH_SYN)) { /* Handle parallel SYN for ECN */ - if (!(thflags & TH_ACK) && - ((thflags & (TH_CWR | TH_ECE)) == (TH_CWR | TH_ECE)) && - ((V_tcp_do_ecn == 1) || (V_tcp_do_ecn == 2))) { - tp->t_flags2 |= TF2_ECN_PERMIT; - tp->t_flags2 |= TF2_ECN_SND_ECE; - TCPSTAT_INC(tcps_ecn_shs); - } + tcp_ecn_input_parallel_syn(tp, thflags, iptos); if ((to.to_flags & TOF_SCALE) && (tp->t_flags & TF_REQ_SCALE)) { tp->t_flags |= TF_RCVD_SCALE; @@ -15879,7 +15825,8 @@ struct tcpopt to; u_char opt[TCP_MAXOLEN]; uint32_t hdrlen, optlen; - int32_t slot, segsiz, max_val, tso = 0, error, flags, ulen = 0; + int32_t slot, segsiz, max_val, tso = 0, error, ulen = 0; + uint16_t flags; uint32_t if_hw_tsomaxsegcount = 0, startseq; uint32_t if_hw_tsomaxsegsize; @@ -16056,6 +16003,24 @@ udp->uh_ulen = htons(ulen); } m->m_pkthdr.rcvif = (struct ifnet *)0; + if (TCPS_HAVERCVDSYN(tp->t_state) && + (tp->t_flags2 & TF2_ECN_PERMIT)) { + int ect = tcp_ecn_output_established(tp, &flags, len); + if ((tp->t_state == TCPS_SYN_RECEIVED) && + (tp->t_flags2 & TF2_ECN_SND_ECE)) + tp->t_flags2 &= ~TF2_ECN_SND_ECE; +#ifdef INET6 + if (rack->r_is_v6) { + ip6->ip6_flow &= ~htonl(IPTOS_ECN_MASK << 20); + ip6->ip6_flow |= htonl(ect << 20); + } + else +#endif + { + ip->ip_tos &= ~IPTOS_ECN_MASK; + ip->ip_tos |= ect; + } + } m->m_pkthdr.len = hdrlen + len; /* in6_cksum() need this */ #ifdef INET6 if (rack->r_is_v6) { @@ -16379,7 +16344,8 @@ u_char opt[TCP_MAXOLEN]; uint32_t hdrlen, optlen; int cnt_thru = 1; - int32_t slot, segsiz, len, max_val, tso = 0, sb_offset, error, flags, ulen = 0; + int32_t slot, segsiz, len, max_val, tso = 0, sb_offset, error, ulen = 0; + uint16_t flags; uint32_t s_soff; uint32_t if_hw_tsomaxsegcount = 0, startseq; uint32_t if_hw_tsomaxsegsize; @@ -16528,37 +16494,23 @@ udp->uh_ulen = htons(ulen); } m->m_pkthdr.rcvif = (struct ifnet *)0; - if (tp->t_state == TCPS_ESTABLISHED && + if (TCPS_HAVERCVDSYN(tp->t_state) && (tp->t_flags2 & TF2_ECN_PERMIT)) { - /* - * If the peer has ECN, mark data packets with ECN capable - * transmission (ECT). Ignore pure ack packets, - * retransmissions. - */ - if (len > 0 && SEQ_GEQ(tp->snd_nxt, tp->snd_max)) { + int ect = tcp_ecn_output_established(tp, &flags, len); + if ((tp->t_state == TCPS_SYN_RECEIVED) && + (tp->t_flags2 & TF2_ECN_SND_ECE)) + tp->t_flags2 &= ~TF2_ECN_SND_ECE; #ifdef INET6 - if (rack->r_is_v6) { - ip6->ip6_flow &= ~htonl(IPTOS_ECN_MASK << 20); - ip6->ip6_flow |= htonl(IPTOS_ECN_ECT0 << 20); - } - else + if (rack->r_is_v6) { + ip6->ip6_flow &= ~htonl(IPTOS_ECN_MASK << 20); + ip6->ip6_flow |= htonl(ect << 20); + } + else #endif - { - ip->ip_tos &= ~IPTOS_ECN_MASK; - ip->ip_tos |= IPTOS_ECN_ECT0; - } - KMOD_TCPSTAT_INC(tcps_ecn_ect0); - /* - * Reply with proper ECN notifications. - * Only set CWR on new data segments. - */ - if (tp->t_flags2 & TF2_ECN_SND_CWR) { - flags |= TH_CWR; - tp->t_flags2 &= ~TF2_ECN_SND_CWR; - } + { + ip->ip_tos &= ~IPTOS_ECN_MASK; + ip->ip_tos |= ect; } - if (tp->t_flags2 & TF2_ECN_SND_ECE) - flags |= TH_ECE; } m->m_pkthdr.len = hdrlen + len; /* in6_cksum() need this */ #ifdef INET6 @@ -16786,7 +16738,8 @@ struct socket *so; uint32_t recwin; uint32_t sb_offset, s_moff = 0; - int32_t len, flags, error = 0; + int32_t len, error = 0; + uint16_t flags; struct mbuf *m, *s_mb = NULL; struct mbuf *mb; uint32_t if_hw_tsomaxsegcount = 0; @@ -18596,51 +18549,27 @@ * are on a retransmit, we may resend those bits a number of times * as per RFC 3168. */ - if (tp->t_state == TCPS_SYN_SENT && V_tcp_do_ecn == 1) { - if (tp->t_rxtshift >= 1) { - if (tp->t_rxtshift <= V_tcp_ecn_maxretries) - flags |= TH_ECE | TH_CWR; - } else - flags |= TH_ECE | TH_CWR; + if (tp->t_state == TCPS_SYN_SENT && V_tcp_do_ecn) { + flags |= tcp_ecn_output_syn_sent(tp); } - /* Handle parallel SYN for ECN */ - if ((tp->t_state == TCPS_SYN_RECEIVED) && - (tp->t_flags2 & TF2_ECN_SND_ECE)) { - flags |= TH_ECE; - tp->t_flags2 &= ~TF2_ECN_SND_ECE; - } - if (TCPS_HAVEESTABLISHED(tp->t_state) && + /* Also handle parallel SYN for ECN */ + if (TCPS_HAVERCVDSYN(tp->t_state) && (tp->t_flags2 & TF2_ECN_PERMIT)) { - /* - * If the peer has ECN, mark data packets with ECN capable - * transmission (ECT). Ignore pure ack packets, - * retransmissions. - */ - if (len > 0 && SEQ_GEQ(tp->snd_nxt, tp->snd_max) && - (sack_rxmit == 0)) { + int ect = tcp_ecn_output_established(tp, &flags, len); + if ((tp->t_state == TCPS_SYN_RECEIVED) && + (tp->t_flags2 & TF2_ECN_SND_ECE)) + tp->t_flags2 &= ~TF2_ECN_SND_ECE; #ifdef INET6 - if (isipv6) { - ip6->ip6_flow &= ~htonl(IPTOS_ECN_MASK << 20); - ip6->ip6_flow |= htonl(IPTOS_ECN_ECT0 << 20); - } - else + if (isipv6) { + ip6->ip6_flow &= ~htonl(IPTOS_ECN_MASK << 20); + ip6->ip6_flow |= htonl(ect << 20); + } + else #endif - { - ip->ip_tos &= ~IPTOS_ECN_MASK; - ip->ip_tos |= IPTOS_ECN_ECT0; - } - KMOD_TCPSTAT_INC(tcps_ecn_ect0); - /* - * Reply with proper ECN notifications. - * Only set CWR on new data segments. - */ - if (tp->t_flags2 & TF2_ECN_SND_CWR) { - flags |= TH_CWR; - tp->t_flags2 &= ~TF2_ECN_SND_CWR; - } + { + ip->ip_tos &= ~IPTOS_ECN_MASK; + ip->ip_tos |= ect; } - if (tp->t_flags2 & TF2_ECN_SND_ECE) - flags |= TH_ECE; } /* * If we are doing retransmissions, then snd_nxt will not reflect Index: sys/netinet/tcp_syncache.c =================================================================== --- sys/netinet/tcp_syncache.c +++ sys/netinet/tcp_syncache.c @@ -89,6 +89,7 @@ #include #include #include +#include #ifdef INET6 #include #endif @@ -1027,8 +1028,7 @@ tp->t_flags |= TF_SACK_PERMIT; } - if (sc->sc_flags & SCF_ECN) - tp->t_flags2 |= TF2_ECN_PERMIT; + tcp_ecn_syncache_socket(tp, sc); /* * Set up MSS and get cached values from tcp_hostcache. @@ -1743,9 +1743,9 @@ sc->sc_peer_mss = to->to_mss; /* peer mss may be zero */ if (ltflags & TF_NOOPT) sc->sc_flags |= SCF_NOOPT; - if (((tcp_get_flags(th) & (TH_ECE|TH_CWR)) == (TH_ECE|TH_CWR)) && - V_tcp_do_ecn) - sc->sc_flags |= SCF_ECN; + /* ECN Handshake */ + if (V_tcp_do_ecn) + sc->sc_flags |= tcp_ecn_syncache_add(tcp_get_flags(th), iptos); if (V_tcp_syncookies) sc->sc_iss = syncookie_generate(sch, sc); @@ -1938,10 +1938,7 @@ th->th_win = htons(sc->sc_wnd); th->th_urp = 0; - if ((flags & TH_SYN) && (sc->sc_flags & SCF_ECN)) { - flags |= TH_ECE; - TCPSTAT_INC(tcps_ecn_shs); - } + flags = tcp_ecn_syncache_respond(flags, sc); tcp_set_flags(th, flags); /* Tack on the TCP options. */