Page Menu
Home
FreeBSD
Search
Configure Global Search
Log In
Files
F156733343
D28931.id88968.diff
No One
Temporary
Actions
View File
Edit File
Delete File
View Transforms
Subscribe
Mute Notifications
Flag For Later
Award Token
Size
10 KB
Referenced Files
None
Subscribers
None
D28931.id88968.diff
View Options
diff --git a/share/man/man4/tcp.4 b/share/man/man4/tcp.4
--- a/share/man/man4/tcp.4
+++ b/share/man/man4/tcp.4
@@ -547,6 +547,13 @@
application limited and the network bandwidth is not utilized completely.
That prevents self-inflicted packet losses once the application starts to
transmit data at a higher speed.
+.It Va do_lrd
+Enable Lost Retransmission Detection for SACK-enabled sessions, disabled by
+default.
+Under severe congestion, a retransmission can be lost which then leads to a
+mandatory Retransmission Timeout (RTO), followed by slow-start.
+LRD will try to resend the repeatedly lost packet, preventing the time-consuming
+RTO and performance reducing slow-start.
.It Va do_prr
Perform SACK loss recovery using the Proportional Rate Reduction (PRR) algorithm
described in RFC6937.
diff --git a/sys/netinet/tcp.h b/sys/netinet/tcp.h
--- a/sys/netinet/tcp.h
+++ b/sys/netinet/tcp.h
@@ -199,6 +199,7 @@
#define TCP_PROC_ACCOUNTING 76 /* Do accounting on tcp cpu usage and counts */
#define TCP_USE_CMP_ACKS 77 /* The transport can handle the Compressed mbuf acks */
#define TCP_PERF_INFO 78 /* retrieve accounting counters */
+#define TCP_LRD 79 /* toggle Lost Retransmission Detection for A/B testing */
#define TCP_KEEPINIT 128 /* N, time to establish connection */
#define TCP_KEEPIDLE 256 /* L,N,X start keeplives after this period */
#define TCP_KEEPINTVL 512 /* L,N interval between keepalives */
diff --git a/sys/netinet/tcp_input.c b/sys/netinet/tcp_input.c
--- a/sys/netinet/tcp_input.c
+++ b/sys/netinet/tcp_input.c
@@ -164,6 +164,11 @@
&VNET_NAME(tcp_do_prr), 1,
"Enable Proportional Rate Reduction per RFC 6937");
+VNET_DEFINE(int, tcp_do_lrd) = 0;
+SYSCTL_INT(_net_inet_tcp, OID_AUTO, do_lrd, CTLFLAG_VNET | CTLFLAG_RW,
+ &VNET_NAME(tcp_do_lrd), 1,
+ "Perform Lost Retransmission Detection");
+
VNET_DEFINE(int, tcp_do_newcwv) = 0;
SYSCTL_INT(_net_inet_tcp, OID_AUTO, newcwv, CTLFLAG_VNET | CTLFLAG_RW,
&VNET_NAME(tcp_do_newcwv), 0,
@@ -2523,9 +2528,12 @@
}
if ((tp->t_flags & TF_SACK_PERMIT) &&
((to.to_flags & TOF_SACK) ||
- !TAILQ_EMPTY(&tp->snd_holes)))
- sack_changed = tcp_sack_doack(tp, &to, th->th_ack);
- else
+ !TAILQ_EMPTY(&tp->snd_holes))) {
+ if (((sack_changed = tcp_sack_doack(tp, &to, th->th_ack)) != 0) &&
+ (tp->t_flags & TF_LRD)) {
+ tcp_sack_lost_retransmission(tp, th);
+ }
+ } else
/*
* Reset the value so that previous (valid) value
* from the last ack with SACK doesn't get used.
diff --git a/sys/netinet/tcp_output.c b/sys/netinet/tcp_output.c
--- a/sys/netinet/tcp_output.c
+++ b/sys/netinet/tcp_output.c
@@ -1264,6 +1264,14 @@
} else {
th->th_seq = htonl(p->rxmit);
p->rxmit += len;
+ /*
+ * Lost Retransmission Detection
+ * trigger resending of a (then
+ * still existing) hole, when
+ * fack acks recoverypoint.
+ */
+ if ((tp->t_flags & TF_LRD) && SEQ_GEQ(p->rxmit, p->end))
+ p->rxmit = tp->snd_recover;
tp->sackhint.sack_bytes_rexmit += len;
}
if (IN_RECOVERY(tp->t_flags)) {
diff --git a/sys/netinet/tcp_sack.c b/sys/netinet/tcp_sack.c
--- a/sys/netinet/tcp_sack.c
+++ b/sys/netinet/tcp_sack.c
@@ -119,6 +119,7 @@
#include <netinet/tcp_var.h>
#include <netinet6/tcp6_var.h>
#include <netinet/tcpip.h>
+#include <netinet/cc/cc.h>
#ifdef TCPDEBUG
#include <netinet/tcp_debug.h>
#endif /* TCPDEBUG */
@@ -730,7 +731,8 @@
cur = TAILQ_PREV(cur, sackhole_head, scblink);
continue;
}
- tp->sackhint.sack_bytes_rexmit -= (cur->rxmit - cur->start);
+ tp->sackhint.sack_bytes_rexmit -=
+ (SEQ_MIN(cur->rxmit, cur->end) - cur->start);
KASSERT(tp->sackhint.sack_bytes_rexmit >= 0,
("sackhint bytes rtx >= 0"));
sack_changed = 1;
@@ -761,6 +763,8 @@
delivered_data += (cur->end - sblkp->start);
cur->end = sblkp->start;
cur->rxmit = SEQ_MIN(cur->rxmit, cur->end);
+ if ((tp->t_flags & TF_LRD) && SEQ_GEQ(cur->rxmit, cur->end))
+ cur->rxmit = tp->snd_recover;
} else {
/*
* ACKs some data in middle of a hole; need
@@ -771,18 +775,21 @@
if (temp != NULL) {
if (SEQ_GT(cur->rxmit, temp->rxmit)) {
temp->rxmit = cur->rxmit;
- tp->sackhint.sack_bytes_rexmit
- += (temp->rxmit
- - temp->start);
+ tp->sackhint.sack_bytes_rexmit +=
+ (SEQ_MIN(temp->rxmit,
+ temp->end) - temp->start);
}
cur->end = sblkp->start;
cur->rxmit = SEQ_MIN(cur->rxmit,
cur->end);
+ if ((tp->t_flags & TF_LRD) && SEQ_GEQ(cur->rxmit, cur->end))
+ cur->rxmit = tp->snd_recover;
delivered_data += (sblkp->end - sblkp->start);
}
}
}
- tp->sackhint.sack_bytes_rexmit += (cur->rxmit - cur->start);
+ tp->sackhint.sack_bytes_rexmit +=
+ (SEQ_MIN(cur->rxmit, cur->end) - cur->start);
/*
* Testing sblkp->start against cur->start tells us whether
* we're done with the sack block or the sack hole.
@@ -912,7 +919,7 @@
*sack_bytes_rexmt += (p->rxmit - p->start);
break;
}
- *sack_bytes_rexmt += (p->rxmit - p->start);
+ *sack_bytes_rexmt += (SEQ_MIN(p->rxmit, p->end) - p->start);
}
return (p);
}
@@ -989,3 +996,57 @@
return;
tp->snd_nxt = tp->snd_fack;
}
+
+/*
+ * Lost Retransmission Detection
+ * Check is FACK is beyond the rexmit of the leftmost hole.
+ * If yes, we restart sending from still existing holes,
+ * and adjust cwnd via the congestion control module.
+ */
+void
+tcp_sack_lost_retransmission(struct tcpcb *tp, struct tcphdr *th)
+{
+ struct sackhole *temp;
+ uint32_t prev_cwnd;
+ if (IN_RECOVERY(tp->t_flags) &&
+ SEQ_GT(tp->snd_fack, tp->snd_recover) &&
+ ((temp = TAILQ_FIRST(&tp->snd_holes)) != NULL) &&
+ SEQ_GEQ(temp->rxmit, temp->end) &&
+ SEQ_GEQ(tp->snd_fack, temp->rxmit)) {
+ TCPSTAT_INC(tcps_sack_lostrexmt);
+ /*
+ * Start retransmissions from the first hole, and
+ * subsequently all other remaining holes, including
+ * those, which had been sent completely before.
+ */
+ tp->sackhint.nexthole = temp;
+ TAILQ_FOREACH(temp, &tp->snd_holes, scblink) {
+ if (SEQ_GEQ(tp->snd_fack, temp->rxmit) &&
+ SEQ_GEQ(temp->rxmit, temp->end))
+ temp->rxmit = temp->start;
+ }
+ /*
+ * Remember the old ssthresh, to deduct the beta factor used
+ * by the CC module. Finally, set cwnd to ssthresh just
+ * prior to invoking another cwnd reduction by the CC
+ * module, to not shrink it excessively.
+ */
+ prev_cwnd = tp->snd_cwnd;
+ tp->snd_cwnd = tp->snd_ssthresh;
+ /*
+ * Formally exit recovery, and let the CC module adjust
+ * ssthresh as intended.
+ */
+ EXIT_RECOVERY(tp->t_flags);
+ cc_cong_signal(tp, th, CC_NDUPACK);
+ /*
+ * For PRR, adjust recover_fs as if this new reduction
+ * initialized this variable.
+ * cwnd will be adjusted by SACK or PRR processing
+ * subsequently, only set it to a safe value here.
+ */
+ tp->snd_cwnd = tcp_maxseg(tp);
+ tp->sackhint.recover_fs = (tp->snd_max - tp->snd_una) -
+ tp->sackhint.recover_fs;
+ }
+}
diff --git a/sys/netinet/tcp_subr.c b/sys/netinet/tcp_subr.c
--- a/sys/netinet/tcp_subr.c
+++ b/sys/netinet/tcp_subr.c
@@ -2171,6 +2171,8 @@
if (V_tcp_perconn_stats_enable == 1)
tp->t_stats = stats_blob_alloc(V_tcp_perconn_stats_dflt_tpl, 0);
#endif
+ if (V_tcp_do_lrd)
+ tp->t_flags |= TF_LRD;
return (tp); /* XXX */
}
diff --git a/sys/netinet/tcp_usrreq.c b/sys/netinet/tcp_usrreq.c
--- a/sys/netinet/tcp_usrreq.c
+++ b/sys/netinet/tcp_usrreq.c
@@ -2001,6 +2001,7 @@
case TCP_NODELAY:
case TCP_NOOPT:
+ case TCP_LRD:
INP_WUNLOCK(inp);
error = sooptcopyin(sopt, &optval, sizeof optval,
sizeof optval);
@@ -2015,6 +2016,9 @@
case TCP_NOOPT:
opt = TF_NOOPT;
break;
+ case TCP_LRD:
+ opt = TF_LRD;
+ break;
default:
opt = 0; /* dead code to fool gcc */
break;
@@ -2562,6 +2566,11 @@
error = sooptcopyout(sopt, &optval, sizeof(optval));
break;
#endif
+ case TCP_LRD:
+ optval = tp->t_flags & TF_LRD;
+ INP_WUNLOCK(inp);
+ error = sooptcopyout(sopt, &optval, sizeof optval);
+ break;
default:
INP_WUNLOCK(inp);
error = ENOPROTOOPT;
diff --git a/sys/netinet/tcp_var.h b/sys/netinet/tcp_var.h
--- a/sys/netinet/tcp_var.h
+++ b/sys/netinet/tcp_var.h
@@ -410,7 +410,7 @@
#define TF_TOE 0x02000000 /* this connection is offloaded */
#define TF_WAKESOW 0x04000000 /* wake up send socket */
#define TF_UNUSED1 0x08000000 /* unused */
-#define TF_UNUSED2 0x10000000 /* unused */
+#define TF_LRD 0x10000000 /* Lost Retransmission Detection */
#define TF_CONGRECOVERY 0x20000000 /* congestion recovery mode */
#define TF_WASCRECOVERY 0x40000000 /* was in congestion recovery */
#define TF_FASTOPEN 0x80000000 /* TCP Fast Open indication */
@@ -673,6 +673,7 @@
uint64_t tcps_sack_rexmit_bytes; /* SACK rexmit bytes */
uint64_t tcps_sack_rcv_blocks; /* SACK blocks (options) received */
uint64_t tcps_sack_send_blocks; /* SACK blocks (options) sent */
+ uint64_t tcps_sack_lostrexmt; /* SACK lost retransmission recovered */
uint64_t tcps_sack_sboverflow; /* times scoreboard overflowed */
/* ECN related stats */
@@ -697,7 +698,7 @@
uint64_t tcps_tunneled_pkts; /* Packets encap's in UDP received */
uint64_t tcps_tunneled_errs; /* Packets that had errors that were UDP encaped */
- uint64_t _pad[10]; /* 6 UTO, 6 TBD */
+ uint64_t _pad[9]; /* 6 UTO, 3 TBD */
};
#define tcps_rcvmemdrop tcps_rcvreassfull /* compat */
@@ -859,6 +860,7 @@
VNET_DECLARE(int, tcp_do_autorcvbuf);
VNET_DECLARE(int, tcp_do_autosndbuf);
VNET_DECLARE(int, tcp_do_ecn);
+VNET_DECLARE(int, tcp_do_lrd);
VNET_DECLARE(int, tcp_do_prr);
VNET_DECLARE(int, tcp_do_prr_conservative);
VNET_DECLARE(int, tcp_do_newcwv);
@@ -893,6 +895,7 @@
VNET_DECLARE(struct inpcbhead, tcb);
VNET_DECLARE(struct inpcbinfo, tcbinfo);
+#define V_tcp_do_lrd VNET(tcp_do_lrd)
#define V_tcp_do_prr VNET(tcp_do_prr)
#define V_tcp_do_prr_conservative VNET(tcp_do_prr_conservative)
#define V_tcp_do_newcwv VNET(tcp_do_newcwv)
@@ -1091,8 +1094,10 @@
void tcp_sack_adjust(struct tcpcb *tp);
struct sackhole *tcp_sack_output(struct tcpcb *tp, int *sack_bytes_rexmt);
void tcp_do_prr_ack(struct tcpcb *, struct tcphdr *, struct tcpopt *);
+void tcp_lost_retransmission(struct tcpcb *, struct tcphdr *);
void tcp_sack_partialack(struct tcpcb *, struct tcphdr *);
void tcp_free_sackholes(struct tcpcb *tp);
+void tcp_sack_lost_retransmission(struct tcpcb *, struct tcphdr *);
int tcp_newreno(struct tcpcb *, struct tcphdr *);
int tcp_compute_pipe(struct tcpcb *);
uint32_t tcp_compute_initwnd(uint32_t);
diff --git a/usr.bin/netstat/inet.c b/usr.bin/netstat/inet.c
--- a/usr.bin/netstat/inet.c
+++ b/usr.bin/netstat/inet.c
@@ -809,6 +809,8 @@
"{N:/SACK option%s (SACK blocks) received}\n");
p(tcps_sack_send_blocks, "\t{:sent-option-blocks/%ju} "
"{N:/SACK option%s (SACK blocks) sent}\n");
+ p(tcps_sack_lostrexmit, "\t{:lost-retransmissions/%ju} "
+ "{N:/SACK retransmission%s lost}\n");
p1a(tcps_sack_sboverflow, "\t{:scoreboard-overflows/%ju} "
"{N:/SACK scoreboard overflow}\n");
File Metadata
Details
Attached
Mime Type
text/plain
Expires
Sat, May 16, 11:58 PM (8 h, 9 m)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
33160645
Default Alt Text
D28931.id88968.diff (10 KB)
Attached To
Mode
D28931: Basic Lost Retransmission Detection (LRD)
Attached
Detach File
Event Timeline
Log In to Comment