diff --git a/sys/netinet/tcp_input.c b/sys/netinet/tcp_input.c --- a/sys/netinet/tcp_input.c +++ b/sys/netinet/tcp_input.c @@ -439,10 +439,7 @@ case CC_RTO: tp->t_dupacks = 0; tp->t_bytes_acked = 0; - if ((tp->t_rxtshift > 1) || - !((tp->t_flags & TF_SACK_PERMIT) && - (!TAILQ_EMPTY(&tp->snd_holes)))) - EXIT_RECOVERY(tp->t_flags); + EXIT_RECOVERY(tp->t_flags); if (tp->t_flags2 & TF2_ECN_PERMIT) tp->t_flags2 |= TF2_ECN_SND_CWR; break; @@ -2689,7 +2686,6 @@ if (tcp_is_sack_recovery(tp, &to)) { TCPSTAT_INC( tcps_sack_recovery_episode); - tp->snd_recover = tp->snd_nxt; tp->snd_cwnd = maxseg; (void) tcp_output(tp); if (SEQ_GT(th->th_ack, tp->snd_una)) @@ -2779,7 +2775,10 @@ * counted as dupacks here. */ if (tcp_is_sack_recovery(tp, &to) && - (sack_changed != SACK_NOCHANGE)) { + (sack_changed != SACK_NOCHANGE) && + (((tp->t_rxtshift == 0) && (sack_changed != SACK_NOCHANGE)) || + ((tp->t_rxtshift > 0) && (sack_changed == SACK_NEWLOSS))) && + (tp->snd_nxt == tp->snd_max)) { tp->t_dupacks++; /* limit overhead by setting maxseg last */ if (!IN_FASTRECOVERY(tp->t_flags) && diff --git a/sys/netinet/tcp_output.c b/sys/netinet/tcp_output.c --- a/sys/netinet/tcp_output.c +++ b/sys/netinet/tcp_output.c @@ -264,6 +264,7 @@ } } again: + sendwin = 0; /* * If we've recently taken a timeout, snd_max will be greater than * snd_nxt. There may be SACK information that allows us to avoid @@ -271,12 +272,12 @@ */ if ((tp->t_flags & TF_SACK_PERMIT) && SEQ_LT(tp->snd_nxt, tp->snd_max)) - tcp_sack_adjust(tp); + sendwin = tcp_sack_adjust(tp); sendalot = 0; tso = 0; mtu = 0; off = tp->snd_nxt - tp->snd_una; - sendwin = min(tp->snd_wnd, tp->snd_cwnd); + sendwin = min(tp->snd_wnd, tp->snd_cwnd + sendwin); flags = tcp_outflags[tp->t_state]; /* @@ -293,7 +294,8 @@ sack_bytes_rxmt = 0; len = 0; p = NULL; - if ((tp->t_flags & TF_SACK_PERMIT) && IN_FASTRECOVERY(tp->t_flags) && + if ((tp->t_flags & TF_SACK_PERMIT) && + (IN_FASTRECOVERY(tp->t_flags) || SEQ_LT(tp->snd_nxt, tp->snd_max)) && (p = tcp_sack_output(tp, &sack_bytes_rxmt))) { uint32_t cwin; @@ -562,11 +564,12 @@ tso = 1; if (sack_rxmit) { - if (SEQ_LT(p->rxmit + len, tp->snd_una + sbused(&so->so_snd))) + if (SEQ_LT(p->rxmit + len, + tp->snd_una + sbused(&so->so_snd))) flags &= ~TH_FIN; } else { - if (SEQ_LT(tp->snd_nxt + len, tp->snd_una + - sbused(&so->so_snd))) + if (SEQ_LT(tp->snd_nxt + len, + tp->snd_una + sbused(&so->so_snd))) flags &= ~TH_FIN; } @@ -1626,12 +1629,17 @@ if (SEQ_GT(tp->snd_nxt + xlen, tp->snd_max)) tp->snd_max = tp->snd_nxt + xlen; } - if ((error == 0) && - (TCPS_HAVEESTABLISHED(tp->t_state) && - (tp->t_flags & TF_SACK_PERMIT) && - tp->rcv_numsacks > 0)) { - /* Clean up any DSACK's sent */ - tcp_clean_dsack_blocks(tp); + if ((tp->rcv_numsacks > 0) && + (error == 0) && + TCPS_HAVEESTABLISHED(tp->t_state) && + (tp->t_flags & TF_SACK_PERMIT)) { + /* Clean up any DSACK's sent */ + tcp_clean_dsack_blocks(tp); + } + if (sack_rxmit && + (error == 0) && + SEQ_LT(tp->snd_nxt, SEQ_MIN(p->rxmit, p->end))) { + tp->snd_nxt = SEQ_MIN(p->rxmit, p->end); } if (error) { /* diff --git a/sys/netinet/tcp_sack.c b/sys/netinet/tcp_sack.c --- a/sys/netinet/tcp_sack.c +++ b/sys/netinet/tcp_sack.c @@ -983,12 +983,13 @@ if (tp->t_flags & TF_SENTFIN) highdata--; highdata = SEQ_MIN(highdata, tp->snd_recover); - if (th->th_ack != highdata) { + if (SEQ_LT(th->th_ack, highdata)) { tp->snd_fack = th->th_ack; if ((temp = tcp_sackhole_insert(tp, SEQ_MAX(th->th_ack, - highdata - maxseg), highdata, NULL)) != NULL) - tp->sackhint.hole_bytes += temp->end - - temp->start; + highdata - maxseg), highdata, NULL)) != NULL) { + tp->sackhint.hole_bytes += + temp->end - temp->start; + } } } (void) tcp_output(tp); @@ -1079,35 +1080,45 @@ * After a timeout, the SACK list may be rebuilt. This SACK information * should be used to avoid retransmitting SACKed data. This function * traverses the SACK list to see if snd_nxt should be moved forward. + * In addition, cwnd will be inflated by the sacked bytes traversed when + * moving snd_nxt forward. This prevents a traffic burst after the final + * full ACK, and also keeps ACKs coming back. */ -void +int tcp_sack_adjust(struct tcpcb *tp) { + int sacked = 0; struct sackhole *p, *cur = TAILQ_FIRST(&tp->snd_holes); INP_WLOCK_ASSERT(tptoinpcb(tp)); - if (cur == NULL) - return; /* No holes */ - if (SEQ_GEQ(tp->snd_nxt, tp->snd_fack)) - return; /* We're already beyond any SACKed blocks */ - /*- + if (cur == NULL) { + /* No holes */ + return (0); + } + if (SEQ_GEQ(tp->snd_nxt, tp->snd_fack)) { + /* We're already beyond any SACKed blocks */ + return (tp->sackhint.sacked_bytes); + } + /* * Two cases for which we want to advance snd_nxt: * i) snd_nxt lies between end of one hole and beginning of another * ii) snd_nxt lies between end of last hole and snd_fack */ while ((p = TAILQ_NEXT(cur, scblink)) != NULL) { if (SEQ_LT(tp->snd_nxt, cur->end)) - return; - if (SEQ_GEQ(tp->snd_nxt, p->start)) + return (sacked); + sacked += p->start - cur->end; + if (SEQ_GEQ(tp->snd_nxt, p->start)) { cur = p; - else { + } else { tp->snd_nxt = p->start; - return; + return (sacked); } } if (SEQ_LT(tp->snd_nxt, cur->end)) - return; + return (sacked); tp->snd_nxt = tp->snd_fack; + return (tp->sackhint.sacked_bytes); } /* diff --git a/sys/netinet/tcp_timer.c b/sys/netinet/tcp_timer.c --- a/sys/netinet/tcp_timer.c +++ b/sys/netinet/tcp_timer.c @@ -790,7 +790,7 @@ in_losing(inp); } tp->snd_nxt = tp->snd_una; - tp->snd_recover = tp->snd_max; + tp->snd_recover = tp->snd_fack; /* * Force a segment to be sent. */ diff --git a/sys/netinet/tcp_var.h b/sys/netinet/tcp_var.h --- a/sys/netinet/tcp_var.h +++ b/sys/netinet/tcp_var.h @@ -1494,7 +1494,7 @@ void tcp_update_sack_list(struct tcpcb *tp, tcp_seq rcv_laststart, tcp_seq rcv_lastend); void tcp_clean_dsack_blocks(struct tcpcb *tp); void tcp_clean_sackreport(struct tcpcb *tp); -void tcp_sack_adjust(struct tcpcb *tp); +int tcp_sack_adjust(struct tcpcb *tp); struct sackhole *tcp_sack_output(struct tcpcb *tp, int *sack_bytes_rexmt); void tcp_do_prr_ack(struct tcpcb *, struct tcphdr *, struct tcpopt *, sackstatus_t); void tcp_lost_retransmission(struct tcpcb *, struct tcphdr *);