Page MenuHomeFreeBSD

D18985.id53258.diff
No OneTemporary

D18985.id53258.diff

Index: sys/netinet/tcp_input.c
===================================================================
--- sys/netinet/tcp_input.c
+++ sys/netinet/tcp_input.c
@@ -2467,23 +2467,21 @@
}
if ((tp->t_flags & TF_SACK_PERMIT) &&
((to.to_flags & TOF_SACK) ||
- !TAILQ_EMPTY(&tp->snd_holes)))
+ !TAILQ_EMPTY(&tp->snd_holes))) {
sack_changed = tcp_sack_doack(tp, &to, th->th_ack);
- else
+ } else {
/*
* Reset the value so that previous (valid) value
* from the last ack with SACK doesn't get used.
*/
tp->sackhint.sacked_bytes = 0;
-
+ }
#ifdef TCP_HHOOK
/* Run HHOOK_TCP_ESTABLISHED_IN helper hooks. */
hhook_run_tcp_est_in(tp, th, &to);
#endif
-
+ u_int maxseg;
if (SEQ_LEQ(th->th_ack, tp->snd_una)) {
- u_int maxseg;
-
maxseg = tcp_maxseg(tp);
if (tlen == 0 &&
(tiwin == tp->snd_wnd ||
@@ -2573,9 +2571,22 @@
tp->snd_cwnd += maxseg;
(void) tp->t_fb->tfb_tcp_output(tp);
goto drop;
- } else if (tp->t_dupacks == tcprexmtthresh) {
+ } else if ((tp->t_dupacks == tcprexmtthresh) ||
+ /*
+ * Add RFC6675 trigger condition of more
+ * than (dupthresh-1)*mss sacked data.
+ * If the count of holes in the
+ * scoreboard is >= dupthresh, we could
+ * also enter loss recovery, but don't
+ * have that value readily available.
+ */
+ ((tp->t_flags & TF_SACK_PERMIT) &&
+ (V_tcp_do_rfc6675_pipe) &&
+ (tp->sackhint.sacked_bytes >
+ (tcprexmtthresh - 1) * maxseg))) {
+enter_recovery:
+ tp->t_dupacks = tcprexmtthresh;
tcp_seq onxt = tp->snd_nxt;
-
/*
* If we're doing sack, check to
* see if we're already in sack
@@ -2607,6 +2618,8 @@
tp->sack_newdata = tp->snd_nxt;
tp->snd_cwnd = maxseg;
(void) tp->t_fb->tfb_tcp_output(tp);
+ if (SEQ_GT(th->th_ack, tp->snd_una))
+ goto resume_partialack;
goto drop;
}
tp->snd_nxt = th->th_ack;
@@ -2684,12 +2697,23 @@
tp->t_dupacks = 0;
/*
* If this ack also has new SACK info, increment the
- * counter as per rfc6675.
+ * counter as per rfc6675. Start FastRecovery if
+ * sufficient SACKed bytes were part of this
+ * partial ACK.
*/
- if ((tp->t_flags & TF_SACK_PERMIT) && sack_changed)
+ if ((V_tcp_do_rfc6675_pipe) &&
+ (tp->t_flags & TF_SACK_PERMIT) && sack_changed) {
tp->t_dupacks++;
+ /* limit overhead by setting maxseg last */
+ if (!IN_FASTRECOVERY(tp->t_flags) &&
+ (tp->sackhint.sacked_bytes >
+ ((tcprexmtthresh - 1) *
+ (maxseg = tcp_maxseg(tp))))) {
+ goto enter_recovery;
+ }
+ }
}
-
+resume_partialack:
KASSERT(SEQ_GT(th->th_ack, tp->snd_una),
("%s: th_ack <= snd_una", __func__));
Index: sys/netinet/tcp_sack.c
===================================================================
--- sys/netinet/tcp_sack.c
+++ sys/netinet/tcp_sack.c
@@ -345,9 +345,7 @@
* tp->snd_holes is an ordered list of holes (oldest to newest, in terms of
* the sequence space).
* Returns 1 if incoming ACK has previously unknown SACK information,
- * 0 otherwise. Note: We treat (snd_una, th_ack) as a sack block so any changes
- * to that (i.e. left edge moving) would also be considered a change in SACK
- * information which is slightly different than rfc6675.
+ * 0 otherwise.
*/
int
tcp_sack_doack(struct tcpcb *tp, struct tcpopt *to, tcp_seq th_ack)
@@ -355,16 +353,21 @@
struct sackhole *cur, *temp;
struct sackblk sack, sack_blocks[TCP_MAX_SACK + 1], *sblkp;
int i, j, num_sack_blks, sack_changed;
+ int delivered_data, left_edge_delta;
INP_WLOCK_ASSERT(tp->t_inpcb);
num_sack_blks = 0;
sack_changed = 0;
+ delivered_data = 0;
+ left_edge_delta = 0;
/*
* If SND.UNA will be advanced by SEG.ACK, and if SACK holes exist,
* treat [SND.UNA, SEG.ACK) as if it is a SACK block.
+ * Account changes to SND.UNA always in delivered data.
*/
if (SEQ_LT(tp->snd_una, th_ack) && !TAILQ_EMPTY(&tp->snd_holes)) {
+ left_edge_delta = th_ack - tp->snd_una;
sack_blocks[num_sack_blks].start = tp->snd_una;
sack_blocks[num_sack_blks++].end = th_ack;
}
@@ -373,7 +376,6 @@
* received new blocks from the other side.
*/
if (to->to_flags & TOF_SACK) {
- tp->sackhint.sacked_bytes = 0; /* reset */
for (i = 0; i < to->to_nsacks; i++) {
bcopy((to->to_sacks + i * TCPOLEN_SACK),
&sack, sizeof(sack));
@@ -386,8 +388,6 @@
SEQ_GT(sack.end, tp->snd_una) &&
SEQ_LEQ(sack.end, tp->snd_max)) {
sack_blocks[num_sack_blks++] = sack;
- tp->sackhint.sacked_bytes +=
- (sack.end-sack.start);
}
}
}
@@ -412,7 +412,7 @@
}
}
}
- if (TAILQ_EMPTY(&tp->snd_holes))
+ if (TAILQ_EMPTY(&tp->snd_holes)) {
/*
* Empty scoreboard. Need to initialize snd_fack (it may be
* uninitialized or have a bogus value). Scoreboard holes
@@ -421,6 +421,8 @@
* scoreboard).
*/
tp->snd_fack = SEQ_MAX(tp->snd_una, th_ack);
+ tp->sackhint.sacked_bytes = 0; /* reset */
+ }
/*
* In the while-loop below, incoming SACK blocks (sack_blocks[]) and
* SACK holes (snd_holes) are traversed from their tails with just
@@ -444,6 +446,7 @@
*/
temp = tcp_sackhole_insert(tp, tp->snd_fack,sblkp->start,NULL);
if (temp != NULL) {
+ delivered_data += sblkp->end - sblkp->start;
tp->snd_fack = sblkp->end;
/* Go to the previous sack block. */
sblkp--;
@@ -462,10 +465,12 @@
sblkp--;
if (sblkp >= sack_blocks &&
SEQ_LT(tp->snd_fack, sblkp->end))
+ delivered_data += sblkp->end - tp->snd_fack;
tp->snd_fack = sblkp->end;
}
} else if (SEQ_LT(tp->snd_fack, sblkp->end)) {
/* fack is advanced. */
+ delivered_data += sblkp->end - tp->snd_fack;
tp->snd_fack = sblkp->end;
sack_changed = 1;
}
@@ -499,6 +504,7 @@
/* Data acks at least the beginning of hole. */
if (SEQ_GEQ(sblkp->end, cur->end)) {
/* Acks entire hole, so delete hole. */
+ delivered_data += (cur->end - cur->start);
temp = cur;
cur = TAILQ_PREV(cur, sackhole_head, scblink);
tcp_sackhole_remove(tp, temp);
@@ -510,6 +516,7 @@
continue;
} else {
/* Move start of hole forward. */
+ delivered_data += (sblkp->end - cur->start);
cur->start = sblkp->end;
cur->rxmit = SEQ_MAX(cur->rxmit, cur->start);
}
@@ -517,6 +524,7 @@
/* Data acks at least the end of hole. */
if (SEQ_GEQ(sblkp->end, cur->end)) {
/* Move end of hole backward. */
+ delivered_data += (cur->end - sblkp->start);
cur->end = sblkp->start;
cur->rxmit = SEQ_MIN(cur->rxmit, cur->end);
} else {
@@ -536,6 +544,7 @@
cur->end = sblkp->start;
cur->rxmit = SEQ_MIN(cur->rxmit,
cur->end);
+ delivered_data += (sblkp->end - sblkp->start);
}
}
}
@@ -550,6 +559,18 @@
else
sblkp--;
}
+ tp->sackhint.delivered_data = delivered_data;
+ tp->sackhint.sacked_bytes += delivered_data - left_edge_delta;
+ if (!(to->to_flags & TOF_SACK))
+ /*
+ * If this ACK did not contain any
+ * SACK blocks, any only moved the
+ * left edge right, it is a pure
+ * cumulative ACK. Do not count
+ * DupAck for this. Also required
+ * for RFC6675 rescue retransmission.
+ */
+ sack_changed = 0;
return (sack_changed);
}
@@ -595,6 +616,31 @@
if (tp->snd_cwnd > tp->snd_ssthresh)
tp->snd_cwnd = tp->snd_ssthresh;
tp->t_flags |= TF_ACKNOW;
+ /*
+ * RFC6675 rescue retransmission
+ * Add a hole between th_ack (snd_una is not yet set) and snd_max,
+ * if this was a pure cumulative ACK and no data was send beyond
+ * recovery point. Since the data in the socket has not been freed
+ * at this point, we check if the scoreboard is empty, and the ACK
+ * delivered some new data, indicating a full ACK. Also, if the
+ * recovery point is still at snd_max, we are probably application
+ * limited. However, this inference might not always be true. The
+ * rescue retransmission may rarely be slightly premature
+ * compared to RFC6675.
+ * The corresponding ACK+SACK will cause any further outstanding
+ * segments to be retransmitted. This addresses a corner case, when
+ * the trailing packets of a window are lost and no further data
+ * is available for sending.
+ */
+ if ((V_tcp_do_rfc6675_pipe) &&
+ SEQ_LT(th->th_ack, tp->snd_recover) &&
+ (tp->snd_recover == tp->snd_max) &&
+ TAILQ_EMPTY(&tp->snd_holes) &&
+ (tp->sackhint.delivered_data > 0)) {
+ struct sackhole *hole;
+ int maxseg = tcp_maxseg(tp);
+ hole = tcp_sackhole_insert(tp, SEQ_MAX(th->th_ack, tp->snd_max - maxseg), tp->snd_max, NULL);
+ }
(void) tp->t_fb->tfb_tcp_output(tp);
}
Index: sys/netinet/tcp_var.h
===================================================================
--- sys/netinet/tcp_var.h
+++ sys/netinet/tcp_var.h
@@ -70,15 +70,15 @@
struct sackhint {
struct sackhole *nexthole;
- int sack_bytes_rexmit;
+ int32_t sack_bytes_rexmit;
tcp_seq last_sack_ack; /* Most recent/largest sacked ack */
- int ispare; /* explicit pad for 64bit alignment */
- int sacked_bytes; /*
- * Total sacked bytes reported by the
+ int32_t delivered_data; /* Newly acked data from last SACK */
+
+ int32_t sacked_bytes; /* Total sacked bytes reported by the
* receiver via sack option
*/
- uint32_t _pad1[1]; /* TBD */
+ int32_t sacked_bytes_old; /* just for demonstration */
uint64_t _pad[1]; /* TBD */
};

File Metadata

Mime Type
text/plain
Expires
Mon, May 18, 3:48 PM (3 h, 55 m)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
33254794
Default Alt Text
D18985.id53258.diff (9 KB)

Event Timeline