Page MenuHomeFreeBSD

D18624.id52245.diff
No OneTemporary

D18624.id52245.diff

Index: sys/netinet/tcp_input.c
===================================================================
--- sys/netinet/tcp_input.c
+++ sys/netinet/tcp_input.c
@@ -2467,15 +2467,25 @@
}
if ((tp->t_flags & TF_SACK_PERMIT) &&
((to.to_flags & TOF_SACK) ||
- !TAILQ_EMPTY(&tp->snd_holes)))
+ !TAILQ_EMPTY(&tp->snd_holes))) {
sack_changed = tcp_sack_doack(tp, &to, th->th_ack);
- else
+ LOGTCPCBSTATE;
+ if (TAILQ_EMPTY(&tp->snd_holes) &&
+ !(to.to_flags & TOF_SACK) &&
+ ((tp->snd_max - th->th_ack) == sbavail(&so->so_snd)) &&
+ SEQ_LT(th->th_ack, tp->snd_recover)) {
+ if (so->so_options & SO_DEBUG)
+ log(LOG_DEBUG,"rfc6675 rescue retransmission\n");
+ }
+ } else {
/*
* Reset the value so that previous (valid) value
* from the last ack with SACK doesn't get used.
*/
tp->sackhint.sacked_bytes = 0;
-
+ tp->sackhint.sacked_bytes_old = 0;
+ LOGTCPCBSTATE;
+ }
#ifdef TCP_HHOOK
/* Run HHOOK_TCP_ESTABLISHED_IN helper hooks. */
hhook_run_tcp_est_in(tp, th, &to);
@@ -2518,7 +2528,7 @@
* window size so do congestion avoidance
* (set ssthresh to half the current window
* and pull our congestion window back to
- * the new ssthresh).
+ * the new ssthresh09).
*
* Dup acks mean that packets have left the
* network (they're now cached at the receiver)
@@ -2540,9 +2550,17 @@
*/
if (th->th_ack != tp->snd_una ||
((tp->t_flags & TF_SACK_PERMIT) &&
- !sack_changed))
+ !sack_changed)) {
+// log(LOG_DEBUG,"tcp_input:2569 falling through here %u\n", tp->snd_fack);
+// if (SEQ_LT(th->th_ack, tp->snd_recover) &&
+// (tp->t_flags & TF_SACK_PERMIT) &&
+// TAILQ_EMPTY(&tp->snd_holes) &&
+// ((tp->snd_max - tp->snd_una) == sbavail(&so->so_snd))) {
+// log(LOG_DEBUG,"adding hole for rescue rexmit\n");
+// tcp_sackhole_insert(tp, tp->snd_una, tp->snd_max, NULL);
+// }
break;
- else if (!tcp_timer_active(tp, TT_REXMT))
+ } else if (!tcp_timer_active(tp, TT_REXMT))
tp->t_dupacks = 0;
else if (++tp->t_dupacks > tcprexmtthresh ||
IN_FASTRECOVERY(tp->t_flags)) {
@@ -2571,9 +2589,23 @@
}
} else
tp->snd_cwnd += maxseg;
+
(void) tp->t_fb->tfb_tcp_output(tp);
goto drop;
- } else if (tp->t_dupacks == tcprexmtthresh) {
+ } else if ((tp->t_dupacks == tcprexmtthresh) ||
+ /*
+ * Add RFC6675 trigger condition of more
+ * than (dupthresh-1)*mss sacked data.
+ * If the count of holes in the
+ * scoreboard is >= dupthresh, we could
+ * also enter loss recovery, but don't
+ * have that value readily available.
+ */
+ ((tp->t_flags & TF_SACK_PERMIT) &&
+ (V_tcp_do_rfc6675_pipe) &&
+ (tp->sackhint.sacked_bytes >
+ (tcprexmtthresh - 1) * maxseg))) {
+ tp->t_dupacks = tcprexmtthresh;
tcp_seq onxt = tp->snd_nxt;
/*
@@ -2686,8 +2718,10 @@
* If this ack also has new SACK info, increment the
* counter as per rfc6675.
*/
- if ((tp->t_flags & TF_SACK_PERMIT) && sack_changed)
+ if ((tp->t_flags & TF_SACK_PERMIT) && sack_changed) {
tp->t_dupacks++;
+ }
+ LOGTCPCBSTATE;
}
KASSERT(SEQ_GT(th->th_ack, tp->snd_una),
Index: sys/netinet/tcp_output.c
===================================================================
--- sys/netinet/tcp_output.c
+++ sys/netinet/tcp_output.c
@@ -54,6 +54,7 @@
#include <sys/socket.h>
#include <sys/socketvar.h>
#include <sys/sysctl.h>
+#include <sys/syslog.h>
#include <net/if.h>
#include <net/route.h>
@@ -285,43 +286,71 @@
sack_bytes_rxmt = 0;
len = 0;
p = NULL;
- if ((tp->t_flags & TF_SACK_PERMIT) && IN_FASTRECOVERY(tp->t_flags) &&
- (p = tcp_sack_output(tp, &sack_bytes_rxmt))) {
+
+ LOGTCPCBSTATE2;
+
+ if ((tp->t_flags & TF_SACK_PERMIT) && IN_FASTRECOVERY(tp->t_flags)) {
+ p = tcp_sack_output(tp, &sack_bytes_rxmt);
+ /*
+ * RFC6675 Rescue Retransmission
+ * when no new data is available, and
+ * all Scoreboard Holes were retransmitted,
+ * resend 1 MSS just beneath snd_max
+ */
uint32_t cwin;
cwin =
imax(min(tp->snd_wnd, tp->snd_cwnd) - sack_bytes_rxmt, 0);
- /* Do not retransmit SACK segments beyond snd_recover */
- if (SEQ_GT(p->end, tp->snd_recover)) {
- /*
- * (At least) part of sack hole extends beyond
- * snd_recover. Check to see if we can rexmit data
- * for this hole.
- */
- if (SEQ_GEQ(p->rxmit, tp->snd_recover)) {
+/* if (V_tcp_do_rfc6675_pipe && (p == NULL) &&
+ SEQ_GT(tp->snd_max, tp->snd_una) &&
+ ((tp->snd_max - tp->snd_una) == sbavail(&so->so_snd))) {
+// if (so->so_options & SO_DEBUG) {
+// log(LOG_DEBUG,"rfc6675 rescue retransmission");
+// }
+ len = ((int32_t)ulmin(tp->t_maxseg, cwin));
+ tp->snd_nxt = tp->snd_max - len - tp->snd_una;
+ sendalot = 1;
+ TCPSTAT_INC(tcps_sack_rescxmits);
+ TCPSTAT_ADD(tcps_sack_rescxmit_bytes, len);
+ }
+*/ if (p != NULL) {
+ /* Do not retransmit SACK segments beyond snd_recover */
+ if (SEQ_GT(p->end, tp->snd_recover)) {
/*
- * Can't rexmit any more data for this hole.
- * That data will be rexmitted in the next
- * sack recovery episode, when snd_recover
- * moves past p->rxmit.
+ * (At least) part of sack hole extends beyond
+ * snd_recover. Check to see if we can rexmit data
+ * for this hole.
*/
- p = NULL;
- goto after_sack_rexmit;
- } else
- /* Can rexmit part of the current hole */
- len = ((int32_t)ulmin(cwin,
+ if (SEQ_GEQ(p->rxmit, tp->snd_recover)) {
+ /*
+ * Can't rexmit any more data for this hole.
+ * That data will be rexmitted in the next
+ * sack recovery episode, when snd_recover
+ * moves past p->rxmit.
+ */
+ p = NULL;
+ goto after_sack_rexmit;
+ } else
+ /* Can rexmit part of the current hole */
+ len = ((int32_t)ulmin(cwin,
tp->snd_recover - p->rxmit));
- } else
- len = ((int32_t)ulmin(cwin, p->end - p->rxmit));
- off = p->rxmit - tp->snd_una;
- KASSERT(off >= 0,("%s: sack block to the left of una : %d",
- __func__, off));
- if (len > 0) {
- sack_rxmit = 1;
- sendalot = 1;
- TCPSTAT_INC(tcps_sack_rexmits);
- TCPSTAT_ADD(tcps_sack_rexmit_bytes,
- min(len, tp->t_maxseg));
+ } else
+ len = ((int32_t)ulmin(cwin, p->end - p->rxmit));
+ off = p->rxmit - tp->snd_una;
+ if (off < 0) {
+ log(LOG_DEBUG,"near panic: una: %u, rxmit: %u, start: %u, end:%u, len: %i\n",
+ tp->snd_una - tp->iss, p->rxmit - tp->iss, p->start - tp->iss, p->end - tp->iss, len);
+ off = 0;
+ }
+ KASSERT(off >= 0,("%s: sack block to the left of una : %d",
+ __func__, off));
+ if (len > 0) {
+ sack_rxmit = 1;
+ sendalot = 1;
+ TCPSTAT_INC(tcps_sack_rexmits);
+ TCPSTAT_ADD(tcps_sack_rexmit_bytes,
+ min(len, tp->t_maxseg));
+ }
}
}
after_sack_rexmit:
@@ -1405,6 +1434,8 @@
TCP_PROBE5(connect__request, NULL, tp, ip, tp, th);
TCP_PROBE5(send, NULL, tp, ip, tp, th);
+ if (so->so_options & SO_DEBUG)
+ log(LOG_DEBUG, "tcp_output:1444 hand off to IP\n");
#ifdef TCPPCAP
/* Save packet, if requested. */
Index: sys/netinet/tcp_sack.c
===================================================================
--- sys/netinet/tcp_sack.c
+++ sys/netinet/tcp_sack.c
@@ -355,25 +355,33 @@
struct sackhole *cur, *temp;
struct sackblk sack, sack_blocks[TCP_MAX_SACK + 1], *sblkp;
int i, j, num_sack_blks, sack_changed;
+ int delivered_data, left_edge_delta;
INP_WLOCK_ASSERT(tp->t_inpcb);
num_sack_blks = 0;
sack_changed = 0;
+ delivered_data = 0;
+ left_edge_delta = 0;
/*
* If SND.UNA will be advanced by SEG.ACK, and if SACK holes exist,
* treat [SND.UNA, SEG.ACK) as if it is a SACK block.
+ * Account changes to SND.UNA always in delivered data.
*/
- if (SEQ_LT(tp->snd_una, th_ack) && !TAILQ_EMPTY(&tp->snd_holes)) {
- sack_blocks[num_sack_blks].start = tp->snd_una;
- sack_blocks[num_sack_blks++].end = th_ack;
+ if SEQ_LT(tp->snd_una, th_ack) {
+// delivered_data = th_ack - tp->snd_una;
+ left_edge_delta = th_ack - tp->snd_una;
+ if(!TAILQ_EMPTY(&tp->snd_holes)) {
+ sack_blocks[num_sack_blks].start = tp->snd_una;
+ sack_blocks[num_sack_blks++].end = th_ack;
+ }
}
/*
* Append received valid SACK blocks to sack_blocks[], but only if we
* received new blocks from the other side.
*/
if (to->to_flags & TOF_SACK) {
- tp->sackhint.sacked_bytes = 0; /* reset */
+ tp->sackhint.sacked_bytes_old = 0; /* reset */
for (i = 0; i < to->to_nsacks; i++) {
bcopy((to->to_sacks + i * TCPOLEN_SACK),
&sack, sizeof(sack));
@@ -386,8 +394,8 @@
SEQ_GT(sack.end, tp->snd_una) &&
SEQ_LEQ(sack.end, tp->snd_max)) {
sack_blocks[num_sack_blks++] = sack;
- tp->sackhint.sacked_bytes +=
- (sack.end-sack.start);
+ tp->sackhint.sacked_bytes_old +=
+ (sack.end - sack.start);
}
}
}
@@ -412,7 +420,7 @@
}
}
}
- if (TAILQ_EMPTY(&tp->snd_holes))
+ if (TAILQ_EMPTY(&tp->snd_holes)) {
/*
* Empty scoreboard. Need to initialize snd_fack (it may be
* uninitialized or have a bogus value). Scoreboard holes
@@ -421,6 +429,8 @@
* scoreboard).
*/
tp->snd_fack = SEQ_MAX(tp->snd_una, th_ack);
+ tp->sackhint.sacked_bytes = 0; /* reset */
+ }
/*
* In the while-loop below, incoming SACK blocks (sack_blocks[]) and
* SACK holes (snd_holes) are traversed from their tails with just
@@ -444,6 +454,7 @@
*/
temp = tcp_sackhole_insert(tp, tp->snd_fack,sblkp->start,NULL);
if (temp != NULL) {
+ delivered_data += sblkp->end - sblkp->start;
tp->snd_fack = sblkp->end;
/* Go to the previous sack block. */
sblkp--;
@@ -462,10 +473,12 @@
sblkp--;
if (sblkp >= sack_blocks &&
SEQ_LT(tp->snd_fack, sblkp->end))
+ delivered_data += sblkp->end - tp->snd_fack;
tp->snd_fack = sblkp->end;
}
} else if (SEQ_LT(tp->snd_fack, sblkp->end)) {
/* fack is advanced. */
+ delivered_data += sblkp->end - tp->snd_fack;
tp->snd_fack = sblkp->end;
sack_changed = 1;
}
@@ -499,6 +512,7 @@
/* Data acks at least the beginning of hole. */
if (SEQ_GEQ(sblkp->end, cur->end)) {
/* Acks entire hole, so delete hole. */
+ delivered_data += (cur->end - cur->start);
temp = cur;
cur = TAILQ_PREV(cur, sackhole_head, scblink);
tcp_sackhole_remove(tp, temp);
@@ -510,6 +524,7 @@
continue;
} else {
/* Move start of hole forward. */
+ delivered_data += (sblkp->end - cur->start);
cur->start = sblkp->end;
cur->rxmit = SEQ_MAX(cur->rxmit, cur->start);
}
@@ -517,6 +532,7 @@
/* Data acks at least the end of hole. */
if (SEQ_GEQ(sblkp->end, cur->end)) {
/* Move end of hole backward. */
+ delivered_data += (cur->end - sblkp->start);
cur->end = sblkp->start;
cur->rxmit = SEQ_MIN(cur->rxmit, cur->end);
} else {
@@ -536,6 +552,7 @@
cur->end = sblkp->start;
cur->rxmit = SEQ_MIN(cur->rxmit,
cur->end);
+ delivered_data += (sblkp->end - sblkp->start);
}
}
}
@@ -550,6 +567,18 @@
else
sblkp--;
}
+ tp->sackhint.delivered_data = delivered_data;
+ tp->sackhint.sacked_bytes += delivered_data - left_edge_delta;
+ if (!(to->to_flags & TOF_SACK))
+ /*
+ * If this ACK did not contain any
+ * SACK blocks, any only moved the
+ * left edge right, it is a pure
+ * cumulative ACK. Do not count
+ * DupAck for this. Also required
+ * for RFC6675 rescue retransmission.
+ */
+ sack_changed = 0;
return (sack_changed);
}
@@ -595,7 +624,42 @@
if (tp->snd_cwnd > tp->snd_ssthresh)
tp->snd_cwnd = tp->snd_ssthresh;
tp->t_flags |= TF_ACKNOW;
+ /*
+ * RFC6675 rescue retransmission
+ * Add a hole between th_ack (una is not yet set) and snd_max,
+ * if this was a pure cumulative ACK and no data was send beyond
+ * recovery point. Since the data in the socket has not been freed
+ * at this point, this may still happen when more new data is ready to
+ * send. The rescue retransmission may be slightly premature
+ * compared to RFC6675.
+ */
+ if ((V_tcp_do_rfc6675_pipe) &&
+ SEQ_LT(th->th_ack, tp->snd_recover) &&
+ (tp->snd_recover == tp->snd_max) &&
+ TAILQ_EMPTY(&tp->snd_holes) &&
+ (tp->sackhint.delivered_data > 0)) {
+ struct sackhole *hole;
+ int maxseg = tcp_maxseg(tp);
+ hole = tcp_sackhole_insert(tp, SEQ_MAX(th->th_ack, tp->snd_max - maxseg), tp->snd_max, NULL);
+// if ((tp->snd_max - th->th_ack) > maxseg) { // do this with PRR to avoid bursts.
+ /*
+ * have to insert lower hole after
+ * rescue retransmission, for
+ * sackhint updates to pick this up
+ */
+// hole = tcp_sackhole_insert(tp, th->th_ack, tp->snd_max - maxseg, NULL);
+// log(LOG_DEBUG,"low hole %u - %u <- %u\n", hole->start - tp->iss, hole->end - tp->iss, hole->rxmit - tp->iss);
+// }
+ log(LOG_DEBUG,"high hole %u - %u <- %u\n", tp->sackhint.nexthole->start - tp->iss, tp->sackhint.nexthole->end - tp->iss, tp->sackhint.nexthole->rxmit - tp->iss);
+ log(LOG_DEBUG,"nexthole: %p (%u) hole: %p (%u)\n",
+ (void *)tp->sackhint.nexthole, tp->sackhint.nexthole->start - tp->iss,
+ (void *)hole, hole->start - tp->iss);
+ }
+
(void) tp->t_fb->tfb_tcp_output(tp);
+
+ struct socket *so = tp->t_inpcb->inp_socket;
+ LOGTCPCBSTATE2;
}
#if 0
@@ -649,6 +713,8 @@
INP_WLOCK_ASSERT(tp->t_inpcb);
*sack_bytes_rexmt = tp->sackhint.sack_bytes_rexmit;
hole = tp->sackhint.nexthole;
+ struct socket *so = tp->t_inpcb->inp_socket;
+ LOGTCPCBSTATE2;
if (hole == NULL || SEQ_LT(hole->rxmit, hole->end))
goto out;
while ((hole = TAILQ_NEXT(hole, scblink)) != NULL) {
Index: sys/netinet/tcp_var.h
===================================================================
--- sys/netinet/tcp_var.h
+++ sys/netinet/tcp_var.h
@@ -43,6 +43,89 @@
#include <sys/mbuf.h>
#endif
+#define STRIPPATH(s)\
+ (sizeof(s) > 2 && (s)[sizeof(s)-2] == '/' ? (s) + sizeof(s) - 1 : \
+ sizeof(s) > 3 && (s)[sizeof(s)-3] == '/' ? (s) + sizeof(s) - 2 : \
+ sizeof(s) > 4 && (s)[sizeof(s)-4] == '/' ? (s) + sizeof(s) - 3 : \
+ sizeof(s) > 5 && (s)[sizeof(s)-5] == '/' ? (s) + sizeof(s) - 4 : \
+ sizeof(s) > 6 && (s)[sizeof(s)-6] == '/' ? (s) + sizeof(s) - 5 : \
+ sizeof(s) > 7 && (s)[sizeof(s)-7] == '/' ? (s) + sizeof(s) - 6 : \
+ sizeof(s) > 8 && (s)[sizeof(s)-8] == '/' ? (s) + sizeof(s) - 7 : \
+ sizeof(s) > 9 && (s)[sizeof(s)-9] == '/' ? (s) + sizeof(s) - 8 : \
+ sizeof(s) > 10 && (s)[sizeof(s)-10] == '/' ? (s) + sizeof(s) - 9 : \
+ sizeof(s) > 11 && (s)[sizeof(s)-11] == '/' ? (s) + sizeof(s) - 10 : \
+ sizeof(s) > 12 && (s)[sizeof(s)-12] == '/' ? (s) + sizeof(s) - 11 : \
+ sizeof(s) > 13 && (s)[sizeof(s)-13] == '/' ? (s) + sizeof(s) - 12 : \
+ sizeof(s) > 14 && (s)[sizeof(s)-14] == '/' ? (s) + sizeof(s) - 13 : \
+ sizeof(s) > 15 && (s)[sizeof(s)-15] == '/' ? (s) + sizeof(s) - 14 : (s))
+
+#define __JUSTFILE__ STRIPPATH(__FILE__)
+
+#define LOGTCPCBSTATE do { \
+ if (so->so_options & SO_DEBUG) { \
+ log(LOG_DEBUG,"%12s:%-4d una:%5u ack:%5u fack:%5u rp:%5u new:%5u max:%5u nxt:%5u " \
+ "cwnd:%5u dup:%2d pipe ori:%5u old:%5u new:%5u sack re:%5u old:%5u by:%5u dd:%5u avail:%5u %s %s %s\n", \
+ __JUSTFILE__, \
+ __LINE__, \
+ tp->snd_una - tp->iss, \
+ th->th_ack - tp->iss, \
+ (tp->snd_fack == 0) ? 0 : tp->snd_fack - tp->iss, \
+ tp->snd_recover - tp->iss, \
+ (tp->sack_newdata == 0) ? 0 : tp->sack_newdata - tp->iss, \
+ tp->snd_max - tp->iss, \
+ tp->snd_nxt - tp->iss, \
+ tp->snd_cwnd, \
+ tp->t_dupacks, \
+ tp->snd_nxt - tp->snd_fack + tp->sackhint.sack_bytes_rexmit, \
+ tp->snd_max - tp->snd_una + tp->sackhint.sack_bytes_rexmit - tp->sackhint.sacked_bytes_old, \
+ tp->snd_max - tp->snd_una + tp->sackhint.sack_bytes_rexmit - tp->sackhint.sacked_bytes, \
+ \
+ tp->sackhint.sack_bytes_rexmit, \
+ tp->sackhint.sacked_bytes_old, \
+ tp->sackhint.sacked_bytes, \
+ tp->sackhint.delivered_data, \
+ \
+ sbavail(&so->so_snd), \
+ IN_RECOVERY(tp->t_flags) ? "LR" : " ", \
+ (to.to_flags & TOF_SACK) ? "sack" : " ", \
+ TAILQ_EMPTY(&tp->snd_holes) ? "empty":" " \
+ ); \
+ } \
+ } while (0)
+
+#define LOGTCPCBSTATE2 do { \
+ if (so->so_options & SO_DEBUG) { \
+ log(LOG_DEBUG,"%12s:%-4d una:%5u ack:----- fack:%5u rp:%5u new:%5u max:%5u nxt:%5u " \
+ "cwnd:%5u dup:%2d pipe ori:%5u old:%5u new:%5u sack re:%5u old:%5u by:%5u dd:%5u avail:%5u %s %s %s\n", \
+ __JUSTFILE__, \
+ __LINE__, \
+ tp->snd_una - tp->iss, \
+ \
+ (tp->snd_fack == 0) ? 0 : tp->snd_fack - tp->iss, \
+ tp->snd_recover - tp->iss, \
+ (tp->sack_newdata == 0) ? 0 : tp->sack_newdata - tp->iss, \
+ tp->snd_max - tp->iss, \
+ tp->snd_nxt - tp->iss, \
+ tp->snd_cwnd, \
+ tp->t_dupacks, \
+ tp->snd_nxt - tp->snd_fack + tp->sackhint.sack_bytes_rexmit, \
+ tp->snd_max - tp->snd_una + tp->sackhint.sack_bytes_rexmit - tp->sackhint.sacked_bytes_old, \
+ tp->snd_max - tp->snd_una + tp->sackhint.sack_bytes_rexmit - tp->sackhint.sacked_bytes, \
+ \
+ tp->sackhint.sack_bytes_rexmit, \
+ tp->sackhint.sacked_bytes_old, \
+ tp->sackhint.sacked_bytes, \
+ tp->sackhint.delivered_data, \
+ \
+ sbavail(&so->so_snd), \
+ IN_RECOVERY(tp->t_flags) ? "LR" : " ", \
+ "----", \
+ TAILQ_EMPTY(&tp->snd_holes) ? "empty":" " \
+ ); \
+ } \
+ } while (0)
+
+
#if defined(_KERNEL) || defined(_WANT_TCPCB)
/* TCP segment queue entry */
struct tseg_qent {
@@ -70,15 +153,16 @@
struct sackhint {
struct sackhole *nexthole;
- int sack_bytes_rexmit;
+ int32_t sack_bytes_rexmit;
tcp_seq last_sack_ack; /* Most recent/largest sacked ack */
- int ispare; /* explicit pad for 64bit alignment */
- int sacked_bytes; /*
- * Total sacked bytes reported by the
+ int32_t delivered_data; /* Newly acked data from last SACK */
+
+ int32_t sacked_bytes; /* Total sacked bytes reported by the
* receiver via sack option
*/
- uint32_t _pad1[1]; /* TBD */
+ int32_t sacked_bytes_old; /* just for demonstration */
+// uint32_t _pad1[1]; /* TBD */
uint64_t _pad[1]; /* TBD */
};

File Metadata

Mime Type
text/plain
Expires
Tue, Mar 17, 1:12 AM (15 h, 43 m)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
29799793
Default Alt Text
D18624.id52245.diff (17 KB)

Event Timeline