diff --git a/sys/netinet/tcp_input.c b/sys/netinet/tcp_input.c --- a/sys/netinet/tcp_input.c +++ b/sys/netinet/tcp_input.c @@ -439,10 +439,7 @@ case CC_RTO: tp->t_dupacks = 0; tp->t_bytes_acked = 0; - if ((tp->t_rxtshift > 1) || - !((tp->t_flags & TF_SACK_PERMIT) && - (!TAILQ_EMPTY(&tp->snd_holes)))) - EXIT_RECOVERY(tp->t_flags); + EXIT_RECOVERY(tp->t_flags); if (tp->t_flags2 & TF2_ECN_PERMIT) tp->t_flags2 |= TF2_ECN_SND_CWR; break; @@ -2700,7 +2697,6 @@ } if (tcp_is_sack_recovery(tp, &to)) { TCPSTAT_INC(tcps_sack_recovery_episode); - tp->snd_recover = tp->snd_nxt; tp->snd_cwnd = maxseg; (void) tcp_output(tp); if (SEQ_GT(th->th_ack, tp->snd_una)) { @@ -2743,8 +2739,12 @@ __func__)); if (tp->t_dupacks == 1) tp->snd_limited = 0; - tp->snd_cwnd = - (tp->snd_nxt - tp->snd_una) + + if ((tp->snd_nxt == tp->snd_max) && + (tp->t_rxtshift == 0)) + tp->snd_cwnd = + SEQ_SUB(tp->snd_nxt, + tp->snd_una); + tp->snd_cwnd += (tp->t_dupacks - tp->snd_limited) * maxseg; /* @@ -2793,7 +2793,9 @@ * counted as dupacks here. */ if (tcp_is_sack_recovery(tp, &to) && - (sack_changed != SACK_NOCHANGE)) { + (((tp->t_rxtshift == 0) && (sack_changed != SACK_NOCHANGE)) || + ((tp->t_rxtshift > 0) && (sack_changed == SACK_NEWLOSS))) && + (tp->snd_nxt == tp->snd_max)) { tp->t_dupacks++; /* limit overhead by setting maxseg last */ if (!IN_FASTRECOVERY(tp->t_flags) && diff --git a/sys/netinet/tcp_output.c b/sys/netinet/tcp_output.c --- a/sys/netinet/tcp_output.c +++ b/sys/netinet/tcp_output.c @@ -264,6 +264,7 @@ } } again: + sendwin = 0; /* * If we've recently taken a timeout, snd_max will be greater than * snd_nxt. There may be SACK information that allows us to avoid @@ -271,12 +272,12 @@ */ if ((tp->t_flags & TF_SACK_PERMIT) && SEQ_LT(tp->snd_nxt, tp->snd_max)) - tcp_sack_adjust(tp); + sendwin = tcp_sack_adjust(tp); sendalot = 0; tso = 0; mtu = 0; off = tp->snd_nxt - tp->snd_una; - sendwin = min(tp->snd_wnd, tp->snd_cwnd); + sendwin = min(tp->snd_wnd, tp->snd_cwnd + sendwin); flags = tcp_outflags[tp->t_state]; /* @@ -293,7 +294,8 @@ sack_bytes_rxmt = 0; len = 0; p = NULL; - if ((tp->t_flags & TF_SACK_PERMIT) && IN_FASTRECOVERY(tp->t_flags) && + if ((tp->t_flags & TF_SACK_PERMIT) && + (IN_FASTRECOVERY(tp->t_flags) || SEQ_LT(tp->snd_nxt, tp->snd_max)) && (p = tcp_sack_output(tp, &sack_bytes_rxmt))) { uint32_t cwin; @@ -392,7 +394,7 @@ * in which case len is already set. */ if (sack_rxmit == 0) { - if (sack_bytes_rxmt == 0) { + if ((sack_bytes_rxmt == 0) || SEQ_LT(tp->snd_nxt, tp->snd_max)) { len = ((int32_t)min(sbavail(&so->so_snd), sendwin) - off); } else { @@ -1622,12 +1624,17 @@ if (SEQ_GT(tp->snd_nxt + xlen, tp->snd_max)) tp->snd_max = tp->snd_nxt + xlen; } - if ((error == 0) && - (TCPS_HAVEESTABLISHED(tp->t_state) && - (tp->t_flags & TF_SACK_PERMIT) && - tp->rcv_numsacks > 0)) { - /* Clean up any DSACK's sent */ - tcp_clean_dsack_blocks(tp); + if ((tp->rcv_numsacks > 0) && + (error == 0) && + TCPS_HAVEESTABLISHED(tp->t_state) && + (tp->t_flags & TF_SACK_PERMIT)) { + /* Clean up any DSACK's sent */ + tcp_clean_dsack_blocks(tp); + } + if (sack_rxmit && + (error == 0) && + SEQ_LT(tp->snd_nxt, SEQ_MIN(p->rxmit, p->end))) { + tp->snd_nxt = SEQ_MIN(p->rxmit, p->end); } if (error) { /* diff --git a/sys/netinet/tcp_sack.c b/sys/netinet/tcp_sack.c --- a/sys/netinet/tcp_sack.c +++ b/sys/netinet/tcp_sack.c @@ -989,7 +989,7 @@ highdata--; highdata = SEQ_MIN(highdata, tp->snd_recover); if (SEQ_LT(th->th_ack, highdata)) { - tp->snd_fack = th->th_ack; + tp->snd_fack = SEQ_MAX(th->th_ack, tp->snd_fack); if ((temp = tcp_sackhole_insert(tp, SEQ_MAX(th->th_ack, highdata - maxseg), highdata, NULL)) != NULL) { tp->sackhint.hole_bytes += @@ -1059,41 +1059,47 @@ * After a timeout, the SACK list may be rebuilt. This SACK information * should be used to avoid retransmitting SACKed data. This function * traverses the SACK list to see if snd_nxt should be moved forward. + * In addition, cwnd will be inflated by the sacked bytes traversed when + * moving snd_nxt forward. This prevents a traffic burst after the final + * full ACK, and also keeps ACKs coming back. */ -void +int tcp_sack_adjust(struct tcpcb *tp) { + int sacked = 0; struct sackhole *p, *cur = TAILQ_FIRST(&tp->snd_holes); INP_WLOCK_ASSERT(tptoinpcb(tp)); if (cur == NULL) { /* No holes */ - return; + return (0); } if (SEQ_GEQ(tp->snd_nxt, tp->snd_fack)) { /* We're already beyond any SACKed blocks */ - return; + return (tp->sackhint.sacked_bytes); } - /*- + /* * Two cases for which we want to advance snd_nxt: * i) snd_nxt lies between end of one hole and beginning of another * ii) snd_nxt lies between end of last hole and snd_fack */ while ((p = TAILQ_NEXT(cur, scblink)) != NULL) { if (SEQ_LT(tp->snd_nxt, cur->end)) { - return; + return (sacked); } + sacked += p->start - cur->end; if (SEQ_GEQ(tp->snd_nxt, p->start)) { cur = p; } else { tp->snd_nxt = p->start; - return; + return (sacked); } } if (SEQ_LT(tp->snd_nxt, cur->end)) { - return; + return (sacked); } tp->snd_nxt = tp->snd_fack; + return (tp->sackhint.sacked_bytes); } /* diff --git a/sys/netinet/tcp_var.h b/sys/netinet/tcp_var.h --- a/sys/netinet/tcp_var.h +++ b/sys/netinet/tcp_var.h @@ -1485,7 +1485,7 @@ tcp_seq rcv_lastend); void tcp_clean_dsack_blocks(struct tcpcb *tp); void tcp_clean_sackreport(struct tcpcb *tp); -void tcp_sack_adjust(struct tcpcb *tp); +int tcp_sack_adjust(struct tcpcb *tp); struct sackhole *tcp_sack_output(struct tcpcb *tp, int *sack_bytes_rexmt); void tcp_do_prr_ack(struct tcpcb *, struct tcphdr *, struct tcpopt *, sackstatus_t, u_int *);