Changeset View
Changeset View
Standalone View
Standalone View
sys/netinet/tcp_stacks/rack.c
- This file is larger than 256 KB, so syntax highlighting is disabled by default.
Show First 20 Lines • Show All 4,088 Lines • ▼ Show 20 Lines | if (!IN_FASTRECOVERY(tp->t_flags)) { | ||||
} | } | ||||
rack->r_ctl.rc_prr_recovery_fs = tp->snd_max - tp->snd_una; | rack->r_ctl.rc_prr_recovery_fs = tp->snd_max - tp->snd_una; | ||||
tp->snd_recover = tp->snd_max; | tp->snd_recover = tp->snd_max; | ||||
if (tp->t_flags2 & TF2_ECN_PERMIT) | if (tp->t_flags2 & TF2_ECN_PERMIT) | ||||
tp->t_flags2 |= TF2_ECN_SND_CWR; | tp->t_flags2 |= TF2_ECN_SND_CWR; | ||||
} | } | ||||
break; | break; | ||||
case CC_ECN: | case CC_ECN: | ||||
if (!IN_CONGRECOVERY(tp->t_flags)) { | if (!IN_CONGRECOVERY(tp->t_flags) || | ||||
/* | |||||
* Allow ECN reaction on ACK to CWR, if | |||||
* that data segment was also CE marked. | |||||
*/ | |||||
SEQ_GEQ(th->th_ack, tp->snd_recover)) { | |||||
rscheff: At this stage, the congestion episode was not yet finished on an ACK covering snd_recover (both… | |||||
EXIT_CONGRECOVERY(tp->t_flags); | |||||
KMOD_TCPSTAT_INC(tcps_ecn_rcwnd); | KMOD_TCPSTAT_INC(tcps_ecn_rcwnd); | ||||
tp->snd_recover = tp->snd_max; | tp->snd_recover = tp->snd_max + 1; | ||||
Done Inline ActionsAs CWR will be sent only on the next new data segment, the congestion epoch is over at snd_max+1 with ECN, rather than when snd_max is cumulative acked as in loss recovery. rscheff: As CWR will be sent only on the next new data segment, the congestion epoch is over at… | |||||
if (tp->t_flags2 & TF2_ECN_PERMIT) | if (tp->t_flags2 & TF2_ECN_PERMIT) | ||||
tp->t_flags2 |= TF2_ECN_SND_CWR; | tp->t_flags2 |= TF2_ECN_SND_CWR; | ||||
} | } | ||||
break; | break; | ||||
case CC_RTO: | case CC_RTO: | ||||
tp->t_dupacks = 0; | tp->t_dupacks = 0; | ||||
tp->t_bytes_acked = 0; | tp->t_bytes_acked = 0; | ||||
EXIT_RECOVERY(tp->t_flags); | EXIT_RECOVERY(tp->t_flags); | ||||
▲ Show 20 Lines • Show All 9,400 Lines • ▼ Show 20 Lines | #endif | ||||
if (tp->t_state == TCPS_ESTABLISHED && | if (tp->t_state == TCPS_ESTABLISHED && | ||||
(tp->t_flags2 & TF2_ECN_PERMIT)) { | (tp->t_flags2 & TF2_ECN_PERMIT)) { | ||||
/* | /* | ||||
* If the peer has ECN, mark data packets with ECN capable | * If the peer has ECN, mark data packets with ECN capable | ||||
* transmission (ECT). Ignore pure ack packets, | * transmission (ECT). Ignore pure ack packets, | ||||
* retransmissions. | * retransmissions. | ||||
*/ | */ | ||||
if (len > 0 && SEQ_GEQ(tp->snd_nxt, tp->snd_max) && | if (len > 0 && SEQ_GEQ(tp->snd_nxt, tp->snd_max) && | ||||
(sack_rxmit == 0)) { | (sack_rxmit == 0)) { | ||||
#ifdef INET6 | #ifdef INET6 | ||||
Done Inline Actionsthis is to address the case, where persist window probes are done using new 1-octet data segments. rscheff: this is to address the case, where persist window probes are done using new 1-octet data… | |||||
Done Inline Actionsrscheff: rS360639 had removed the simple check for window probe packets (snd_nxt == snd_max, len==1, but… | |||||
if (isipv6) | if (isipv6) | ||||
ip6->ip6_flow |= htonl(IPTOS_ECN_ECT0 << 20); | ip6->ip6_flow |= htonl(IPTOS_ECN_ECT0 << 20); | ||||
else | else | ||||
#endif | #endif | ||||
ip->ip_tos |= IPTOS_ECN_ECT0; | ip->ip_tos |= IPTOS_ECN_ECT0; | ||||
KMOD_TCPSTAT_INC(tcps_ecn_ect0); | KMOD_TCPSTAT_INC(tcps_ecn_ect0); | ||||
} | |||||
/* | /* | ||||
* Reply with proper ECN notifications. | * Reply with proper ECN notifications. | ||||
* Only set CWR on new data segments. | |||||
*/ | */ | ||||
if (tp->t_flags2 & TF2_ECN_SND_CWR) { | if (tp->t_flags2 & TF2_ECN_SND_CWR) { | ||||
flags |= TH_CWR; | flags |= TH_CWR; | ||||
tp->t_flags2 &= ~TF2_ECN_SND_CWR; | tp->t_flags2 &= ~TF2_ECN_SND_CWR; | ||||
} | |||||
} | } | ||||
if (tp->t_flags2 & TF2_ECN_SND_ECE) | if (tp->t_flags2 & TF2_ECN_SND_ECE) | ||||
flags |= TH_ECE; | flags |= TH_ECE; | ||||
} | } | ||||
/* | /* | ||||
* If we are doing retransmissions, then snd_nxt will not reflect | * If we are doing retransmissions, then snd_nxt will not reflect | ||||
* the first unsent octet. For ACK only packets, we do not want the | * the first unsent octet. For ACK only packets, we do not want the | ||||
* sequence number of the retransmitted packet, we want the sequence | * sequence number of the retransmitted packet, we want the sequence | ||||
▲ Show 20 Lines • Show All 1,538 Lines • Show Last 20 Lines |
At this stage, the congestion episode was not yet finished on an ACK covering snd_recover (both loss or ECN cc-reaction).
As per RFC guidance, only a single CC reaction per window is required; Since ECN and loss are tracked by two flags, we may enter (and reduce cwnd) independently.
However, exit is done only when th_ack >= snd_recover - and snd_recover is pulled forward by whatever happened last.
Most problematic may be non-SACK, NewReno recovery, where an ECN after loss could prolong the fast recovery episode (a subsequent loss episode may not result in another cc reaction, if overlapped by an ECN - however, there already were two cc reactions earlier).