Page MenuHomeFreeBSD

D43470.id133394.diff
No OneTemporary

D43470.id133394.diff

diff --git a/sys/netinet/tcp_input.c b/sys/netinet/tcp_input.c
--- a/sys/netinet/tcp_input.c
+++ b/sys/netinet/tcp_input.c
@@ -439,10 +439,7 @@
case CC_RTO:
tp->t_dupacks = 0;
tp->t_bytes_acked = 0;
- if ((tp->t_rxtshift > 1) ||
- !((tp->t_flags & TF_SACK_PERMIT) &&
- (!TAILQ_EMPTY(&tp->snd_holes))))
- EXIT_RECOVERY(tp->t_flags);
+ EXIT_RECOVERY(tp->t_flags);
if (tp->t_flags2 & TF2_ECN_PERMIT)
tp->t_flags2 |= TF2_ECN_SND_CWR;
break;
@@ -474,13 +471,12 @@
{
INP_WLOCK_ASSERT(tptoinpcb(tp));
- /* XXXLAS: KASSERT that we're in recovery? */
-
if (CC_ALGO(tp)->post_recovery != NULL) {
tp->t_ccv.curack = th->th_ack;
CC_ALGO(tp)->post_recovery(&tp->t_ccv);
}
- /* XXXLAS: EXIT_RECOVERY ? */
+ EXIT_RECOVERY(tp->t_flags);
+
tp->t_bytes_acked = 0;
tp->sackhint.delivered_data = 0;
tp->sackhint.prr_delivered = 0;
@@ -2609,26 +2605,36 @@
tcp_do_prr_ack(tp, th, &to,
sack_changed, &maxseg);
} else if (tcp_is_sack_recovery(tp, &to) &&
- IN_FASTRECOVERY(tp->t_flags)) {
+ IN_FASTRECOVERY(tp->t_flags) &&
+ (tp->snd_nxt == tp->snd_max)) {
int awnd;
/*
- * Compute the amount of data in flight first.
- * We can inject new data into the pipe iff
- * we have less than 1/2 the original window's
- * worth of data in flight.
+ * Compute the amount of data in flight
+ * first. We can inject new data into
+ * the pipe iff we have less than
+ * ssthresh worth of data in flight.
*/
if (V_tcp_do_newsack) {
awnd = tcp_compute_pipe(tp);
} else {
- awnd = (tp->snd_nxt - tp->snd_fack) +
- tp->sackhint.sack_bytes_rexmit;
+ awnd = tp->snd_nxt - tp->snd_fack +
+ tp->sackhint.sack_bytes_rexmit;
}
if (awnd < tp->snd_ssthresh) {
- tp->snd_cwnd += maxseg;
+ tp->snd_cwnd += imax(maxseg,
+ imin(2 * maxseg,
+ tp->sackhint.delivered_data));
if (tp->snd_cwnd > tp->snd_ssthresh)
- tp->snd_cwnd = tp->snd_ssthresh;
+ tp->snd_cwnd =
+ tp->snd_ssthresh;
}
+ } else if (tcp_is_sack_recovery(tp, &to) &&
+ IN_FASTRECOVERY(tp->t_flags) &&
+ SEQ_LT(tp->snd_nxt, tp->snd_max)) {
+ tp->snd_cwnd += imax(maxseg,
+ imin(2 * maxseg,
+ tp->sackhint.delivered_data));
} else {
tp->snd_cwnd += maxseg;
}
@@ -2652,14 +2658,13 @@
tcp_seq onxt = tp->snd_nxt;
/*
- * If we're doing sack, or prr, check
- * to see if we're already in sack
+ * If we're doing sack, check to
+ * see if we're already in sack
* recovery. If we're not doing sack,
* check to see if we're in newreno
* recovery.
*/
- if (V_tcp_do_prr ||
- (tp->t_flags & TF_SACK_PERMIT)) {
+ if (tcp_is_sack_recovery(tp, &to)) {
if (IN_FASTRECOVERY(tp->t_flags)) {
tp->t_dupacks = 0;
break;
@@ -2673,37 +2678,46 @@
}
/* Congestion signal before ack. */
cc_cong_signal(tp, th, CC_NDUPACK);
- cc_ack_received(tp, th, nsegs,
- CC_DUPACK);
+ cc_ack_received(tp, th, nsegs, CC_DUPACK);
tcp_timer_activate(tp, TT_REXMT, 0);
tp->t_rtttime = 0;
if (V_tcp_do_prr) {
/*
- * snd_ssthresh is already updated by
- * cc_cong_signal.
+ * snd_ssthresh and snd_recover are
+ * already updated by cc_cong_signal.
*/
if (tcp_is_sack_recovery(tp, &to)) {
/*
- * Exclude Limited Transmit
+ * Include Limited Transmit
* segments here
*/
tp->sackhint.prr_delivered =
- maxseg;
+ imin(tp->snd_max - th->th_ack,
+ (tp->snd_limited + 1) * maxseg);
} else {
tp->sackhint.prr_delivered =
- imin(tp->snd_max - tp->snd_una,
- imin(INT_MAX / 65536,
- tp->t_dupacks) * maxseg);
+ maxseg;
}
tp->sackhint.recover_fs = max(1,
tp->snd_nxt - tp->snd_una);
}
+ tp->snd_limited = 0;
if (tcp_is_sack_recovery(tp, &to)) {
TCPSTAT_INC(tcps_sack_recovery_episode);
- tp->snd_recover = tp->snd_nxt;
- tp->snd_cwnd = maxseg;
+ /*
+ * When entering LR after RTO due to
+ * Duplicate ACKs, retransmit existing
+ * holes from the scoreboard.
+ */
+ tcp_resend_sackholes(tp);
+ /* Avoid inflating cwnd in tcp_output */
+ tp->snd_nxt = tp->snd_max;
+ tp->snd_cwnd = tcp_compute_pipe(tp) +
+ maxseg;
(void) tcp_output(tp);
- if (SEQ_GT(th->th_ack, tp->snd_una)) {
+ /* Set cwnd to the expected flightsize */
+ tp->snd_cwnd = tp->snd_ssthresh;
+ if (SEQ_GT(th->th_ack, tp->snd_una))
goto resume_partialack;
}
goto drop;
@@ -2744,7 +2758,8 @@
if (tp->t_dupacks == 1)
tp->snd_limited = 0;
tp->snd_cwnd =
- (tp->snd_nxt - tp->snd_una) +
+ SEQ_SUB(tp->snd_nxt, tp->snd_una) -
+ tcp_sack_adjust(tp) +
(tp->t_dupacks - tp->snd_limited) *
maxseg;
/*
@@ -2793,7 +2808,10 @@
* counted as dupacks here.
*/
if (tcp_is_sack_recovery(tp, &to) &&
- (sack_changed != SACK_NOCHANGE)) {
+ ((sack_changed == SACK_NEWLOSS) ||
+ ((sack_changed != SACK_NOCHANGE) &&
+ ((tp->t_rxtshift == 0) ||
+ (tp->snd_nxt == tp->snd_max))))) {
tp->t_dupacks++;
/* limit overhead by setting maxseg last */
if (!IN_FASTRECOVERY(tp->t_flags) &&
@@ -2813,11 +2831,13 @@
* If the congestion window was inflated to account
* for the other side's cached packets, retract it.
*/
- if (IN_FASTRECOVERY(tp->t_flags)) {
- if (SEQ_LT(th->th_ack, tp->snd_recover)) {
+ if (SEQ_LT(th->th_ack, tp->snd_recover)) {
+ if (IN_FASTRECOVERY(tp->t_flags)) {
if (tp->t_flags & TF_SACK_PERMIT) {
- if (V_tcp_do_prr && to.to_flags & TOF_SACK) {
- tcp_timer_activate(tp, TT_REXMT, 0);
+ if (V_tcp_do_prr &&
+ (to.to_flags & TOF_SACK)) {
+ tcp_timer_activate(tp,
+ TT_REXMT, 0);
tp->t_rtttime = 0;
tcp_do_prr_ack(tp, th, &to,
sack_changed, &maxseg);
@@ -2830,24 +2850,18 @@
} else {
tcp_newreno_partial_ack(tp, th);
}
- } else {
- cc_post_recovery(tp, th);
- }
- } else if (IN_CONGRECOVERY(tp->t_flags)) {
- if (SEQ_LT(th->th_ack, tp->snd_recover)) {
- if (V_tcp_do_prr) {
- tp->sackhint.delivered_data = BYTES_THIS_ACK(tp, th);
- tp->snd_fack = th->th_ack;
- /*
- * During ECN cwnd reduction
- * always use PRR-SSRB
- */
- tcp_do_prr_ack(tp, th, &to, SACK_CHANGE,
- &maxseg);
- (void) tcp_output(tp);
- }
- } else {
- cc_post_recovery(tp, th);
+ } else if (IN_CONGRECOVERY(tp->t_flags) &&
+ (V_tcp_do_prr)) {
+ tp->sackhint.delivered_data =
+ BYTES_THIS_ACK(tp, th);
+ tp->snd_fack = th->th_ack;
+ /*
+ * During ECN cwnd reduction
+ * always use PRR-SSRB
+ */
+ tcp_do_prr_ack(tp, th, &to, SACK_CHANGE,
+ &maxseg);
+ (void) tcp_output(tp);
}
}
/*
@@ -2999,15 +3013,13 @@
SEQ_GT(tp->snd_una, tp->snd_recover) &&
SEQ_LEQ(th->th_ack, tp->snd_recover))
tp->snd_recover = th->th_ack - 1;
- /* XXXLAS: Can this be moved up into cc_post_recovery? */
+ tp->snd_una = th->th_ack;
if (IN_RECOVERY(tp->t_flags) &&
SEQ_GEQ(th->th_ack, tp->snd_recover)) {
- EXIT_RECOVERY(tp->t_flags);
+ cc_post_recovery(tp, th);
}
- tp->snd_una = th->th_ack;
- if (tp->t_flags & TF_SACK_PERMIT) {
- if (SEQ_GT(tp->snd_una, tp->snd_recover))
- tp->snd_recover = tp->snd_una;
+ if (SEQ_GT(tp->snd_una, tp->snd_recover)) {
+ tp->snd_recover = tp->snd_una;
}
if (SEQ_LT(tp->snd_nxt, tp->snd_una))
tp->snd_nxt = tp->snd_una;
@@ -4041,9 +4053,7 @@
*/
if (IN_FASTRECOVERY(tp->t_flags)) {
if (tcp_is_sack_recovery(tp, to)) {
- tp->snd_cwnd = tp->snd_nxt - tp->snd_recover +
- tp->sackhint.sack_bytes_rexmit +
- (snd_cnt * maxseg);
+ tp->snd_cwnd = pipe - del_data + (snd_cnt * maxseg);
} else {
tp->snd_cwnd = (tp->snd_max - tp->snd_una) +
(snd_cnt * maxseg);
@@ -4071,17 +4081,19 @@
tcp_timer_activate(tp, TT_REXMT, 0);
tp->t_rtttime = 0;
- tp->snd_nxt = th->th_ack;
- /*
- * Set snd_cwnd to one segment beyond acknowledged offset.
- * (tp->snd_una has not yet been updated when this function is called.)
- */
- tp->snd_cwnd = maxseg + BYTES_THIS_ACK(tp, th);
- tp->t_flags |= TF_ACKNOW;
- (void) tcp_output(tp);
- tp->snd_cwnd = ocwnd;
- if (SEQ_GT(onxt, tp->snd_nxt))
- tp->snd_nxt = onxt;
+ if (IN_FASTRECOVERY(tp->t_flags)) {
+ tp->snd_nxt = th->th_ack;
+ /*
+ * Set snd_cwnd to one segment beyond acknowledged offset.
+ * (tp->snd_una has not yet been updated when this function is called.)
+ */
+ tp->snd_cwnd = maxseg + BYTES_THIS_ACK(tp, th);
+ tp->t_flags |= TF_ACKNOW;
+ (void) tcp_output(tp);
+ tp->snd_cwnd = ocwnd;
+ if (SEQ_GT(onxt, tp->snd_nxt))
+ tp->snd_nxt = onxt;
+ }
/*
* Partial window deflation. Relies on fact that tp->snd_una
* not updated yet.
diff --git a/sys/netinet/tcp_output.c b/sys/netinet/tcp_output.c
--- a/sys/netinet/tcp_output.c
+++ b/sys/netinet/tcp_output.c
@@ -264,19 +264,22 @@
}
}
again:
+ sendwin = 0;
/*
* If we've recently taken a timeout, snd_max will be greater than
* snd_nxt. There may be SACK information that allows us to avoid
* resending already delivered data. Adjust snd_nxt accordingly.
*/
if ((tp->t_flags & TF_SACK_PERMIT) &&
- SEQ_LT(tp->snd_nxt, tp->snd_max))
- tcp_sack_adjust(tp);
+ (tp->sackhint.nexthole != NULL) &&
+ !IN_FASTRECOVERY(tp->t_flags)) {
+ sendwin = tcp_sack_adjust(tp);
+ }
sendalot = 0;
tso = 0;
mtu = 0;
off = tp->snd_nxt - tp->snd_una;
- sendwin = min(tp->snd_wnd, tp->snd_cwnd);
+ sendwin = min(tp->snd_wnd, tp->snd_cwnd + sendwin);
flags = tcp_outflags[tp->t_state];
/*
@@ -293,12 +296,16 @@
sack_bytes_rxmt = 0;
len = 0;
p = NULL;
- if ((tp->t_flags & TF_SACK_PERMIT) && IN_FASTRECOVERY(tp->t_flags) &&
+ if ((tp->t_flags & TF_SACK_PERMIT) &&
+ (IN_FASTRECOVERY(tp->t_flags) || SEQ_LT(tp->snd_nxt, tp->snd_max)) &&
(p = tcp_sack_output(tp, &sack_bytes_rxmt))) {
uint32_t cwin;
- cwin =
- imax(min(tp->snd_wnd, tp->snd_cwnd) - sack_bytes_rxmt, 0);
+ if (IN_FASTRECOVERY(tp->t_flags)) {
+ cwin = imax(sendwin - tcp_compute_pipe(tp), 0);
+ } else {
+ cwin = imax(sendwin - off, 0);
+ }
/* Do not retransmit SACK segments beyond snd_recover */
if (SEQ_GT(p->end, tp->snd_recover)) {
/*
@@ -317,19 +324,34 @@
goto after_sack_rexmit;
} else {
/* Can rexmit part of the current hole */
- len = ((int32_t)ulmin(cwin,
- SEQ_SUB(tp->snd_recover, p->rxmit)));
+ if (cwin <= (len = SEQ_SUB(tp->snd_recover, p->rxmit))) {
+ len = cwin;
+ } else {
+ sendalot = 1;
+ }
}
} else {
- len = ((int32_t)ulmin(cwin,
- SEQ_SUB(p->end, p->rxmit)));
+ if (cwin <= (len = SEQ_SUB(p->end, p->rxmit))) {
+ len = cwin;
+ } else {
+ sendalot = 1;
+ }
}
if (len > 0) {
off = SEQ_SUB(p->rxmit, tp->snd_una);
KASSERT(off >= 0,("%s: sack block to the left of una : %d",
__func__, off));
sack_rxmit = 1;
- sendalot = 1;
+ } else {
+ /* we could have transmitted from the scoreboard,
+ * but sendwin (expected flightsize) - pipe didn't
+ * allow any transmission.
+ * Bypass recalculating the possible transmission
+ * length further down by setting sack_rxmit.
+ * Wouldn't be here if there would have been
+ * nothing in the scoreboard to transmit.
+ */
+ sack_rxmit = 1;
}
}
after_sack_rexmit:
@@ -392,35 +414,17 @@
* in which case len is already set.
*/
if (sack_rxmit == 0) {
- if (sack_bytes_rxmt == 0) {
- len = ((int32_t)min(sbavail(&so->so_snd), sendwin) -
- off);
+ if ((sack_bytes_rxmt == 0) ||
+ SEQ_LT(tp->snd_nxt, tp->snd_max)) {
+ len = min(sbavail(&so->so_snd), sendwin) - off;
} else {
- int32_t cwin;
-
/*
* We are inside of a SACK recovery episode and are
* sending new data, having retransmitted all the
* data possible in the scoreboard.
*/
- len = ((int32_t)min(sbavail(&so->so_snd), tp->snd_wnd) -
- off);
- /*
- * Don't remove this (len > 0) check !
- * We explicitly check for len > 0 here (although it
- * isn't really necessary), to work around a gcc
- * optimization issue - to force gcc to compute
- * len above. Without this check, the computation
- * of len is bungled by the optimizer.
- */
- if (len > 0) {
- cwin = tp->snd_cwnd - imax(0, (int32_t)
- (tp->snd_nxt - tp->snd_recover)) -
- sack_bytes_rxmt;
- if (cwin < 0)
- cwin = 0;
- len = imin(len, cwin);
- }
+ len = imin(sbavail(&so->so_snd) - off,
+ sendwin - tcp_compute_pipe(tp));
}
}
@@ -551,16 +555,19 @@
ipoptlen += ipsec_optlen;
#endif
- if ((tp->t_flags & TF_TSO) && V_tcp_do_tso && len > tp->t_maxseg &&
+ if ((len > tp->t_maxseg) &&
+ (tp->t_flags & TF_TSO) &&
+ V_tcp_do_tso &&
(tp->t_port == 0) &&
- ((tp->t_flags & TF_SIGNATURE) == 0) &&
- tp->rcv_numsacks == 0 && sack_rxmit == 0 &&
- ipoptlen == 0 && !(flags & TH_SYN))
+ !(tp->t_flags & TF_SIGNATURE) &&
+ (tp->rcv_numsacks == 0) &&
+ (ipoptlen == 0) &&
+ !(flags & TH_SYN))
tso = 1;
if (SEQ_LT((sack_rxmit ? p->rxmit : tp->snd_nxt) + len,
tp->snd_una + sbused(&so->so_snd))) {
- flags &= ~TH_FIN;
+ flags &= ~TH_FIN;
}
recwin = lmin(lmax(sbspace(&so->so_rcv), 0),
@@ -605,9 +612,8 @@
if (!(tp->t_flags & TF_MORETOCOME) && /* normal case */
(idle || (tp->t_flags & TF_NODELAY)) &&
(uint32_t)len + (uint32_t)off >= sbavail(&so->so_snd) &&
- (tp->t_flags & TF_NOPUSH) == 0) {
+ (tp->t_flags & TF_NOPUSH) == 0)
goto send;
- }
if (tp->t_flags & TF_FORCEDATA) /* typ. timeout case */
goto send;
if (len >= tp->max_sndwnd / 2 && tp->max_sndwnd > 0)
@@ -1629,6 +1635,15 @@
/* Clean up any DSACK's sent */
tcp_clean_dsack_blocks(tp);
}
+ if ((error == 0) &&
+ sack_rxmit &&
+ SEQ_LT(tp->snd_nxt, SEQ_MIN(p->rxmit, p->end))) {
+ /*
+ * When transmitting from SACK scoreboard
+ * after an RTO, pull snd_nxt along.
+ */
+ tp->snd_nxt = SEQ_MIN(p->rxmit, p->end);
+ }
if (error) {
/*
* We know that the packet was lost, so back out the
diff --git a/sys/netinet/tcp_sack.c b/sys/netinet/tcp_sack.c
--- a/sys/netinet/tcp_sack.c
+++ b/sys/netinet/tcp_sack.c
@@ -953,8 +953,17 @@
/* Send one or 2 segments based on how much new data was acked. */
if ((BYTES_THIS_ACK(tp, th) / maxseg) >= 2)
num_segs = 2;
- tp->snd_cwnd = (tp->sackhint.sack_bytes_rexmit +
- (tp->snd_nxt - tp->snd_recover) + num_segs * maxseg);
+ if (tp->snd_nxt == tp->snd_max) {
+ tp->snd_cwnd = (tp->sackhint.sack_bytes_rexmit +
+ (tp->snd_nxt - tp->snd_recover) + num_segs * maxseg);
+ } else {
+ /*
+ * Since cwnd not is the expected flightsize during
+ * SACK LR, not deflating cwnd allows the partial
+ * ACKed amount to be sent.
+ */
+ tp->snd_cwnd += 0;
+ }
if (tp->snd_cwnd > tp->snd_ssthresh)
tp->snd_cwnd = tp->snd_ssthresh;
tp->t_flags |= TF_ACKNOW;
@@ -989,7 +998,7 @@
highdata--;
highdata = SEQ_MIN(highdata, tp->snd_recover);
if (SEQ_LT(th->th_ack, highdata)) {
- tp->snd_fack = th->th_ack;
+ tp->snd_fack = SEQ_MAX(th->th_ack, tp->snd_fack);
if ((temp = tcp_sackhole_insert(tp, SEQ_MAX(th->th_ack,
highdata - maxseg), highdata, NULL)) != NULL) {
tp->sackhint.hole_bytes +=
@@ -1060,40 +1069,43 @@
* should be used to avoid retransmitting SACKed data. This function
* traverses the SACK list to see if snd_nxt should be moved forward.
*/
-void
+int
tcp_sack_adjust(struct tcpcb *tp)
{
+ int sacked = 0;
struct sackhole *p, *cur = TAILQ_FIRST(&tp->snd_holes);
INP_WLOCK_ASSERT(tptoinpcb(tp));
if (cur == NULL) {
/* No holes */
- return;
+ return (0);
}
if (SEQ_GEQ(tp->snd_nxt, tp->snd_fack)) {
/* We're already beyond any SACKed blocks */
- return;
+ return (tp->sackhint.sacked_bytes);
}
- /*-
+ /*
* Two cases for which we want to advance snd_nxt:
* i) snd_nxt lies between end of one hole and beginning of another
* ii) snd_nxt lies between end of last hole and snd_fack
*/
while ((p = TAILQ_NEXT(cur, scblink)) != NULL) {
if (SEQ_LT(tp->snd_nxt, cur->end)) {
- return;
+ return (sacked);
}
+ sacked += p->start - cur->end;
if (SEQ_GEQ(tp->snd_nxt, p->start)) {
cur = p;
} else {
tp->snd_nxt = p->start;
- return;
+ return (sacked);
}
}
if (SEQ_LT(tp->snd_nxt, cur->end)) {
- return;
+ return (sacked);
}
tp->snd_nxt = tp->snd_fack;
+ return (tp->sackhint.sacked_bytes);
}
/*
diff --git a/sys/netinet/tcp_var.h b/sys/netinet/tcp_var.h
--- a/sys/netinet/tcp_var.h
+++ b/sys/netinet/tcp_var.h
@@ -1485,7 +1485,7 @@
tcp_seq rcv_lastend);
void tcp_clean_dsack_blocks(struct tcpcb *tp);
void tcp_clean_sackreport(struct tcpcb *tp);
-void tcp_sack_adjust(struct tcpcb *tp);
+int tcp_sack_adjust(struct tcpcb *tp);
struct sackhole *tcp_sack_output(struct tcpcb *tp, int *sack_bytes_rexmt);
void tcp_do_prr_ack(struct tcpcb *, struct tcphdr *, struct tcpopt *,
sackstatus_t, u_int *);

File Metadata

Mime Type
text/plain
Expires
Fri, Nov 28, 1:36 PM (12 h, 15 m)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
26285126
Default Alt Text
D43470.id133394.diff (17 KB)

Event Timeline