Page Menu
Home
FreeBSD
Search
Configure Global Search
Log In
Files
F137946240
D43470.id133394.diff
No One
Temporary
Actions
View File
Edit File
Delete File
View Transforms
Subscribe
Mute Notifications
Flag For Later
Award Token
Size
17 KB
Referenced Files
None
Subscribers
None
D43470.id133394.diff
View Options
diff --git a/sys/netinet/tcp_input.c b/sys/netinet/tcp_input.c
--- a/sys/netinet/tcp_input.c
+++ b/sys/netinet/tcp_input.c
@@ -439,10 +439,7 @@
case CC_RTO:
tp->t_dupacks = 0;
tp->t_bytes_acked = 0;
- if ((tp->t_rxtshift > 1) ||
- !((tp->t_flags & TF_SACK_PERMIT) &&
- (!TAILQ_EMPTY(&tp->snd_holes))))
- EXIT_RECOVERY(tp->t_flags);
+ EXIT_RECOVERY(tp->t_flags);
if (tp->t_flags2 & TF2_ECN_PERMIT)
tp->t_flags2 |= TF2_ECN_SND_CWR;
break;
@@ -474,13 +471,12 @@
{
INP_WLOCK_ASSERT(tptoinpcb(tp));
- /* XXXLAS: KASSERT that we're in recovery? */
-
if (CC_ALGO(tp)->post_recovery != NULL) {
tp->t_ccv.curack = th->th_ack;
CC_ALGO(tp)->post_recovery(&tp->t_ccv);
}
- /* XXXLAS: EXIT_RECOVERY ? */
+ EXIT_RECOVERY(tp->t_flags);
+
tp->t_bytes_acked = 0;
tp->sackhint.delivered_data = 0;
tp->sackhint.prr_delivered = 0;
@@ -2609,26 +2605,36 @@
tcp_do_prr_ack(tp, th, &to,
sack_changed, &maxseg);
} else if (tcp_is_sack_recovery(tp, &to) &&
- IN_FASTRECOVERY(tp->t_flags)) {
+ IN_FASTRECOVERY(tp->t_flags) &&
+ (tp->snd_nxt == tp->snd_max)) {
int awnd;
/*
- * Compute the amount of data in flight first.
- * We can inject new data into the pipe iff
- * we have less than 1/2 the original window's
- * worth of data in flight.
+ * Compute the amount of data in flight
+ * first. We can inject new data into
+ * the pipe iff we have less than
+ * ssthresh worth of data in flight.
*/
if (V_tcp_do_newsack) {
awnd = tcp_compute_pipe(tp);
} else {
- awnd = (tp->snd_nxt - tp->snd_fack) +
- tp->sackhint.sack_bytes_rexmit;
+ awnd = tp->snd_nxt - tp->snd_fack +
+ tp->sackhint.sack_bytes_rexmit;
}
if (awnd < tp->snd_ssthresh) {
- tp->snd_cwnd += maxseg;
+ tp->snd_cwnd += imax(maxseg,
+ imin(2 * maxseg,
+ tp->sackhint.delivered_data));
if (tp->snd_cwnd > tp->snd_ssthresh)
- tp->snd_cwnd = tp->snd_ssthresh;
+ tp->snd_cwnd =
+ tp->snd_ssthresh;
}
+ } else if (tcp_is_sack_recovery(tp, &to) &&
+ IN_FASTRECOVERY(tp->t_flags) &&
+ SEQ_LT(tp->snd_nxt, tp->snd_max)) {
+ tp->snd_cwnd += imax(maxseg,
+ imin(2 * maxseg,
+ tp->sackhint.delivered_data));
} else {
tp->snd_cwnd += maxseg;
}
@@ -2652,14 +2658,13 @@
tcp_seq onxt = tp->snd_nxt;
/*
- * If we're doing sack, or prr, check
- * to see if we're already in sack
+ * If we're doing sack, check to
+ * see if we're already in sack
* recovery. If we're not doing sack,
* check to see if we're in newreno
* recovery.
*/
- if (V_tcp_do_prr ||
- (tp->t_flags & TF_SACK_PERMIT)) {
+ if (tcp_is_sack_recovery(tp, &to)) {
if (IN_FASTRECOVERY(tp->t_flags)) {
tp->t_dupacks = 0;
break;
@@ -2673,37 +2678,46 @@
}
/* Congestion signal before ack. */
cc_cong_signal(tp, th, CC_NDUPACK);
- cc_ack_received(tp, th, nsegs,
- CC_DUPACK);
+ cc_ack_received(tp, th, nsegs, CC_DUPACK);
tcp_timer_activate(tp, TT_REXMT, 0);
tp->t_rtttime = 0;
if (V_tcp_do_prr) {
/*
- * snd_ssthresh is already updated by
- * cc_cong_signal.
+ * snd_ssthresh and snd_recover are
+ * already updated by cc_cong_signal.
*/
if (tcp_is_sack_recovery(tp, &to)) {
/*
- * Exclude Limited Transmit
+ * Include Limited Transmit
* segments here
*/
tp->sackhint.prr_delivered =
- maxseg;
+ imin(tp->snd_max - th->th_ack,
+ (tp->snd_limited + 1) * maxseg);
} else {
tp->sackhint.prr_delivered =
- imin(tp->snd_max - tp->snd_una,
- imin(INT_MAX / 65536,
- tp->t_dupacks) * maxseg);
+ maxseg;
}
tp->sackhint.recover_fs = max(1,
tp->snd_nxt - tp->snd_una);
}
+ tp->snd_limited = 0;
if (tcp_is_sack_recovery(tp, &to)) {
TCPSTAT_INC(tcps_sack_recovery_episode);
- tp->snd_recover = tp->snd_nxt;
- tp->snd_cwnd = maxseg;
+ /*
+ * When entering LR after RTO due to
+ * Duplicate ACKs, retransmit existing
+ * holes from the scoreboard.
+ */
+ tcp_resend_sackholes(tp);
+ /* Avoid inflating cwnd in tcp_output */
+ tp->snd_nxt = tp->snd_max;
+ tp->snd_cwnd = tcp_compute_pipe(tp) +
+ maxseg;
(void) tcp_output(tp);
- if (SEQ_GT(th->th_ack, tp->snd_una)) {
+ /* Set cwnd to the expected flightsize */
+ tp->snd_cwnd = tp->snd_ssthresh;
+ if (SEQ_GT(th->th_ack, tp->snd_una))
goto resume_partialack;
}
goto drop;
@@ -2744,7 +2758,8 @@
if (tp->t_dupacks == 1)
tp->snd_limited = 0;
tp->snd_cwnd =
- (tp->snd_nxt - tp->snd_una) +
+ SEQ_SUB(tp->snd_nxt, tp->snd_una) -
+ tcp_sack_adjust(tp) +
(tp->t_dupacks - tp->snd_limited) *
maxseg;
/*
@@ -2793,7 +2808,10 @@
* counted as dupacks here.
*/
if (tcp_is_sack_recovery(tp, &to) &&
- (sack_changed != SACK_NOCHANGE)) {
+ ((sack_changed == SACK_NEWLOSS) ||
+ ((sack_changed != SACK_NOCHANGE) &&
+ ((tp->t_rxtshift == 0) ||
+ (tp->snd_nxt == tp->snd_max))))) {
tp->t_dupacks++;
/* limit overhead by setting maxseg last */
if (!IN_FASTRECOVERY(tp->t_flags) &&
@@ -2813,11 +2831,13 @@
* If the congestion window was inflated to account
* for the other side's cached packets, retract it.
*/
- if (IN_FASTRECOVERY(tp->t_flags)) {
- if (SEQ_LT(th->th_ack, tp->snd_recover)) {
+ if (SEQ_LT(th->th_ack, tp->snd_recover)) {
+ if (IN_FASTRECOVERY(tp->t_flags)) {
if (tp->t_flags & TF_SACK_PERMIT) {
- if (V_tcp_do_prr && to.to_flags & TOF_SACK) {
- tcp_timer_activate(tp, TT_REXMT, 0);
+ if (V_tcp_do_prr &&
+ (to.to_flags & TOF_SACK)) {
+ tcp_timer_activate(tp,
+ TT_REXMT, 0);
tp->t_rtttime = 0;
tcp_do_prr_ack(tp, th, &to,
sack_changed, &maxseg);
@@ -2830,24 +2850,18 @@
} else {
tcp_newreno_partial_ack(tp, th);
}
- } else {
- cc_post_recovery(tp, th);
- }
- } else if (IN_CONGRECOVERY(tp->t_flags)) {
- if (SEQ_LT(th->th_ack, tp->snd_recover)) {
- if (V_tcp_do_prr) {
- tp->sackhint.delivered_data = BYTES_THIS_ACK(tp, th);
- tp->snd_fack = th->th_ack;
- /*
- * During ECN cwnd reduction
- * always use PRR-SSRB
- */
- tcp_do_prr_ack(tp, th, &to, SACK_CHANGE,
- &maxseg);
- (void) tcp_output(tp);
- }
- } else {
- cc_post_recovery(tp, th);
+ } else if (IN_CONGRECOVERY(tp->t_flags) &&
+ (V_tcp_do_prr)) {
+ tp->sackhint.delivered_data =
+ BYTES_THIS_ACK(tp, th);
+ tp->snd_fack = th->th_ack;
+ /*
+ * During ECN cwnd reduction
+ * always use PRR-SSRB
+ */
+ tcp_do_prr_ack(tp, th, &to, SACK_CHANGE,
+ &maxseg);
+ (void) tcp_output(tp);
}
}
/*
@@ -2999,15 +3013,13 @@
SEQ_GT(tp->snd_una, tp->snd_recover) &&
SEQ_LEQ(th->th_ack, tp->snd_recover))
tp->snd_recover = th->th_ack - 1;
- /* XXXLAS: Can this be moved up into cc_post_recovery? */
+ tp->snd_una = th->th_ack;
if (IN_RECOVERY(tp->t_flags) &&
SEQ_GEQ(th->th_ack, tp->snd_recover)) {
- EXIT_RECOVERY(tp->t_flags);
+ cc_post_recovery(tp, th);
}
- tp->snd_una = th->th_ack;
- if (tp->t_flags & TF_SACK_PERMIT) {
- if (SEQ_GT(tp->snd_una, tp->snd_recover))
- tp->snd_recover = tp->snd_una;
+ if (SEQ_GT(tp->snd_una, tp->snd_recover)) {
+ tp->snd_recover = tp->snd_una;
}
if (SEQ_LT(tp->snd_nxt, tp->snd_una))
tp->snd_nxt = tp->snd_una;
@@ -4041,9 +4053,7 @@
*/
if (IN_FASTRECOVERY(tp->t_flags)) {
if (tcp_is_sack_recovery(tp, to)) {
- tp->snd_cwnd = tp->snd_nxt - tp->snd_recover +
- tp->sackhint.sack_bytes_rexmit +
- (snd_cnt * maxseg);
+ tp->snd_cwnd = pipe - del_data + (snd_cnt * maxseg);
} else {
tp->snd_cwnd = (tp->snd_max - tp->snd_una) +
(snd_cnt * maxseg);
@@ -4071,17 +4081,19 @@
tcp_timer_activate(tp, TT_REXMT, 0);
tp->t_rtttime = 0;
- tp->snd_nxt = th->th_ack;
- /*
- * Set snd_cwnd to one segment beyond acknowledged offset.
- * (tp->snd_una has not yet been updated when this function is called.)
- */
- tp->snd_cwnd = maxseg + BYTES_THIS_ACK(tp, th);
- tp->t_flags |= TF_ACKNOW;
- (void) tcp_output(tp);
- tp->snd_cwnd = ocwnd;
- if (SEQ_GT(onxt, tp->snd_nxt))
- tp->snd_nxt = onxt;
+ if (IN_FASTRECOVERY(tp->t_flags)) {
+ tp->snd_nxt = th->th_ack;
+ /*
+ * Set snd_cwnd to one segment beyond acknowledged offset.
+ * (tp->snd_una has not yet been updated when this function is called.)
+ */
+ tp->snd_cwnd = maxseg + BYTES_THIS_ACK(tp, th);
+ tp->t_flags |= TF_ACKNOW;
+ (void) tcp_output(tp);
+ tp->snd_cwnd = ocwnd;
+ if (SEQ_GT(onxt, tp->snd_nxt))
+ tp->snd_nxt = onxt;
+ }
/*
* Partial window deflation. Relies on fact that tp->snd_una
* not updated yet.
diff --git a/sys/netinet/tcp_output.c b/sys/netinet/tcp_output.c
--- a/sys/netinet/tcp_output.c
+++ b/sys/netinet/tcp_output.c
@@ -264,19 +264,22 @@
}
}
again:
+ sendwin = 0;
/*
* If we've recently taken a timeout, snd_max will be greater than
* snd_nxt. There may be SACK information that allows us to avoid
* resending already delivered data. Adjust snd_nxt accordingly.
*/
if ((tp->t_flags & TF_SACK_PERMIT) &&
- SEQ_LT(tp->snd_nxt, tp->snd_max))
- tcp_sack_adjust(tp);
+ (tp->sackhint.nexthole != NULL) &&
+ !IN_FASTRECOVERY(tp->t_flags)) {
+ sendwin = tcp_sack_adjust(tp);
+ }
sendalot = 0;
tso = 0;
mtu = 0;
off = tp->snd_nxt - tp->snd_una;
- sendwin = min(tp->snd_wnd, tp->snd_cwnd);
+ sendwin = min(tp->snd_wnd, tp->snd_cwnd + sendwin);
flags = tcp_outflags[tp->t_state];
/*
@@ -293,12 +296,16 @@
sack_bytes_rxmt = 0;
len = 0;
p = NULL;
- if ((tp->t_flags & TF_SACK_PERMIT) && IN_FASTRECOVERY(tp->t_flags) &&
+ if ((tp->t_flags & TF_SACK_PERMIT) &&
+ (IN_FASTRECOVERY(tp->t_flags) || SEQ_LT(tp->snd_nxt, tp->snd_max)) &&
(p = tcp_sack_output(tp, &sack_bytes_rxmt))) {
uint32_t cwin;
- cwin =
- imax(min(tp->snd_wnd, tp->snd_cwnd) - sack_bytes_rxmt, 0);
+ if (IN_FASTRECOVERY(tp->t_flags)) {
+ cwin = imax(sendwin - tcp_compute_pipe(tp), 0);
+ } else {
+ cwin = imax(sendwin - off, 0);
+ }
/* Do not retransmit SACK segments beyond snd_recover */
if (SEQ_GT(p->end, tp->snd_recover)) {
/*
@@ -317,19 +324,34 @@
goto after_sack_rexmit;
} else {
/* Can rexmit part of the current hole */
- len = ((int32_t)ulmin(cwin,
- SEQ_SUB(tp->snd_recover, p->rxmit)));
+ if (cwin <= (len = SEQ_SUB(tp->snd_recover, p->rxmit))) {
+ len = cwin;
+ } else {
+ sendalot = 1;
+ }
}
} else {
- len = ((int32_t)ulmin(cwin,
- SEQ_SUB(p->end, p->rxmit)));
+ if (cwin <= (len = SEQ_SUB(p->end, p->rxmit))) {
+ len = cwin;
+ } else {
+ sendalot = 1;
+ }
}
if (len > 0) {
off = SEQ_SUB(p->rxmit, tp->snd_una);
KASSERT(off >= 0,("%s: sack block to the left of una : %d",
__func__, off));
sack_rxmit = 1;
- sendalot = 1;
+ } else {
+ /* we could have transmitted from the scoreboard,
+ * but sendwin (expected flightsize) - pipe didn't
+ * allow any transmission.
+ * Bypass recalculating the possible transmission
+ * length further down by setting sack_rxmit.
+ * Wouldn't be here if there would have been
+ * nothing in the scoreboard to transmit.
+ */
+ sack_rxmit = 1;
}
}
after_sack_rexmit:
@@ -392,35 +414,17 @@
* in which case len is already set.
*/
if (sack_rxmit == 0) {
- if (sack_bytes_rxmt == 0) {
- len = ((int32_t)min(sbavail(&so->so_snd), sendwin) -
- off);
+ if ((sack_bytes_rxmt == 0) ||
+ SEQ_LT(tp->snd_nxt, tp->snd_max)) {
+ len = min(sbavail(&so->so_snd), sendwin) - off;
} else {
- int32_t cwin;
-
/*
* We are inside of a SACK recovery episode and are
* sending new data, having retransmitted all the
* data possible in the scoreboard.
*/
- len = ((int32_t)min(sbavail(&so->so_snd), tp->snd_wnd) -
- off);
- /*
- * Don't remove this (len > 0) check !
- * We explicitly check for len > 0 here (although it
- * isn't really necessary), to work around a gcc
- * optimization issue - to force gcc to compute
- * len above. Without this check, the computation
- * of len is bungled by the optimizer.
- */
- if (len > 0) {
- cwin = tp->snd_cwnd - imax(0, (int32_t)
- (tp->snd_nxt - tp->snd_recover)) -
- sack_bytes_rxmt;
- if (cwin < 0)
- cwin = 0;
- len = imin(len, cwin);
- }
+ len = imin(sbavail(&so->so_snd) - off,
+ sendwin - tcp_compute_pipe(tp));
}
}
@@ -551,16 +555,19 @@
ipoptlen += ipsec_optlen;
#endif
- if ((tp->t_flags & TF_TSO) && V_tcp_do_tso && len > tp->t_maxseg &&
+ if ((len > tp->t_maxseg) &&
+ (tp->t_flags & TF_TSO) &&
+ V_tcp_do_tso &&
(tp->t_port == 0) &&
- ((tp->t_flags & TF_SIGNATURE) == 0) &&
- tp->rcv_numsacks == 0 && sack_rxmit == 0 &&
- ipoptlen == 0 && !(flags & TH_SYN))
+ !(tp->t_flags & TF_SIGNATURE) &&
+ (tp->rcv_numsacks == 0) &&
+ (ipoptlen == 0) &&
+ !(flags & TH_SYN))
tso = 1;
if (SEQ_LT((sack_rxmit ? p->rxmit : tp->snd_nxt) + len,
tp->snd_una + sbused(&so->so_snd))) {
- flags &= ~TH_FIN;
+ flags &= ~TH_FIN;
}
recwin = lmin(lmax(sbspace(&so->so_rcv), 0),
@@ -605,9 +612,8 @@
if (!(tp->t_flags & TF_MORETOCOME) && /* normal case */
(idle || (tp->t_flags & TF_NODELAY)) &&
(uint32_t)len + (uint32_t)off >= sbavail(&so->so_snd) &&
- (tp->t_flags & TF_NOPUSH) == 0) {
+ (tp->t_flags & TF_NOPUSH) == 0)
goto send;
- }
if (tp->t_flags & TF_FORCEDATA) /* typ. timeout case */
goto send;
if (len >= tp->max_sndwnd / 2 && tp->max_sndwnd > 0)
@@ -1629,6 +1635,15 @@
/* Clean up any DSACK's sent */
tcp_clean_dsack_blocks(tp);
}
+ if ((error == 0) &&
+ sack_rxmit &&
+ SEQ_LT(tp->snd_nxt, SEQ_MIN(p->rxmit, p->end))) {
+ /*
+ * When transmitting from SACK scoreboard
+ * after an RTO, pull snd_nxt along.
+ */
+ tp->snd_nxt = SEQ_MIN(p->rxmit, p->end);
+ }
if (error) {
/*
* We know that the packet was lost, so back out the
diff --git a/sys/netinet/tcp_sack.c b/sys/netinet/tcp_sack.c
--- a/sys/netinet/tcp_sack.c
+++ b/sys/netinet/tcp_sack.c
@@ -953,8 +953,17 @@
/* Send one or 2 segments based on how much new data was acked. */
if ((BYTES_THIS_ACK(tp, th) / maxseg) >= 2)
num_segs = 2;
- tp->snd_cwnd = (tp->sackhint.sack_bytes_rexmit +
- (tp->snd_nxt - tp->snd_recover) + num_segs * maxseg);
+ if (tp->snd_nxt == tp->snd_max) {
+ tp->snd_cwnd = (tp->sackhint.sack_bytes_rexmit +
+ (tp->snd_nxt - tp->snd_recover) + num_segs * maxseg);
+ } else {
+ /*
+ * Since cwnd not is the expected flightsize during
+ * SACK LR, not deflating cwnd allows the partial
+ * ACKed amount to be sent.
+ */
+ tp->snd_cwnd += 0;
+ }
if (tp->snd_cwnd > tp->snd_ssthresh)
tp->snd_cwnd = tp->snd_ssthresh;
tp->t_flags |= TF_ACKNOW;
@@ -989,7 +998,7 @@
highdata--;
highdata = SEQ_MIN(highdata, tp->snd_recover);
if (SEQ_LT(th->th_ack, highdata)) {
- tp->snd_fack = th->th_ack;
+ tp->snd_fack = SEQ_MAX(th->th_ack, tp->snd_fack);
if ((temp = tcp_sackhole_insert(tp, SEQ_MAX(th->th_ack,
highdata - maxseg), highdata, NULL)) != NULL) {
tp->sackhint.hole_bytes +=
@@ -1060,40 +1069,43 @@
* should be used to avoid retransmitting SACKed data. This function
* traverses the SACK list to see if snd_nxt should be moved forward.
*/
-void
+int
tcp_sack_adjust(struct tcpcb *tp)
{
+ int sacked = 0;
struct sackhole *p, *cur = TAILQ_FIRST(&tp->snd_holes);
INP_WLOCK_ASSERT(tptoinpcb(tp));
if (cur == NULL) {
/* No holes */
- return;
+ return (0);
}
if (SEQ_GEQ(tp->snd_nxt, tp->snd_fack)) {
/* We're already beyond any SACKed blocks */
- return;
+ return (tp->sackhint.sacked_bytes);
}
- /*-
+ /*
* Two cases for which we want to advance snd_nxt:
* i) snd_nxt lies between end of one hole and beginning of another
* ii) snd_nxt lies between end of last hole and snd_fack
*/
while ((p = TAILQ_NEXT(cur, scblink)) != NULL) {
if (SEQ_LT(tp->snd_nxt, cur->end)) {
- return;
+ return (sacked);
}
+ sacked += p->start - cur->end;
if (SEQ_GEQ(tp->snd_nxt, p->start)) {
cur = p;
} else {
tp->snd_nxt = p->start;
- return;
+ return (sacked);
}
}
if (SEQ_LT(tp->snd_nxt, cur->end)) {
- return;
+ return (sacked);
}
tp->snd_nxt = tp->snd_fack;
+ return (tp->sackhint.sacked_bytes);
}
/*
diff --git a/sys/netinet/tcp_var.h b/sys/netinet/tcp_var.h
--- a/sys/netinet/tcp_var.h
+++ b/sys/netinet/tcp_var.h
@@ -1485,7 +1485,7 @@
tcp_seq rcv_lastend);
void tcp_clean_dsack_blocks(struct tcpcb *tp);
void tcp_clean_sackreport(struct tcpcb *tp);
-void tcp_sack_adjust(struct tcpcb *tp);
+int tcp_sack_adjust(struct tcpcb *tp);
struct sackhole *tcp_sack_output(struct tcpcb *tp, int *sack_bytes_rexmt);
void tcp_do_prr_ack(struct tcpcb *, struct tcphdr *, struct tcpopt *,
sackstatus_t, u_int *);
File Metadata
Details
Attached
Mime Type
text/plain
Expires
Fri, Nov 28, 1:36 PM (12 h, 15 m)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
26285126
Default Alt Text
D43470.id133394.diff (17 KB)
Attached To
Mode
D43470: tcp: refactor cwnd during SACK transmissions and enable TSO
Attached
Detach File
Event Timeline
Log In to Comment