Page MenuHomeFreeBSD

D36716.id111063.diff
No OneTemporary

D36716.id111063.diff

Index: sys/netinet/tcp_input.c
===================================================================
--- sys/netinet/tcp_input.c
+++ sys/netinet/tcp_input.c
@@ -1888,11 +1888,21 @@
&tcp_savetcp, 0);
#endif
TCP_PROBE3(debug__input, tp, th, m);
+ /*
+ * Clear t_acktime if remote side has ACKd
+ * all data in the socket buffer.
+ * Otherwise, update t_acktime if we received
+ * a sufficiently large ACK.
+ */
+ if (sbavail(&so->so_snd) == 0)
+ tp->t_acktime = 0;
+ else if (acked > 1)
+ tp->t_acktime = ticks;
if (tp->snd_una == tp->snd_max)
tcp_timer_activate(tp, TT_REXMT, 0);
else if (!tcp_timer_active(tp, TT_PERSIST))
tcp_timer_activate(tp, TT_REXMT,
- tp->t_rxtcur);
+ TP_RXTCUR(tp));
sowwakeup(so);
if (sbavail(&so->so_snd))
(void) tcp_output(tp);
@@ -2091,6 +2101,7 @@
*/
tp->t_starttime = ticks;
if (tp->t_flags & TF_NEEDFIN) {
+ tp->t_acktime = ticks;
tcp_state_change(tp, TCPS_FIN_WAIT_1);
tp->t_flags &= ~TF_NEEDFIN;
thflags &= ~TH_SYN;
@@ -2475,6 +2486,7 @@
tp->t_tfo_pending = NULL;
}
if (tp->t_flags & TF_NEEDFIN) {
+ tp->t_acktime = ticks;
tcp_state_change(tp, TCPS_FIN_WAIT_1);
tp->t_flags &= ~TF_NEEDFIN;
} else {
@@ -2921,6 +2933,20 @@
tcp_xmit_timer(tp, ticks - tp->t_rtttime);
}
+ SOCKBUF_LOCK(&so->so_snd);
+ /*
+ * Clear t_acktime if remote side has ACKd all data in the
+ * socket buffer and FIN (if applicable).
+ * Otherwise, update t_acktime if we received a sufficiently
+ * large ACK.
+ */
+ if ((tp->t_state <= TCPS_CLOSE_WAIT &&
+ acked == sbavail(&so->so_snd)) ||
+ acked > sbavail(&so->so_snd))
+ tp->t_acktime = 0;
+ else if (acked > 1)
+ tp->t_acktime = ticks;
+
/*
* If all outstanding data is acked, stop retransmit
* timer and remember to restart (more output or persist).
@@ -2931,14 +2957,16 @@
tcp_timer_activate(tp, TT_REXMT, 0);
needoutput = 1;
} else if (!tcp_timer_active(tp, TT_PERSIST))
- tcp_timer_activate(tp, TT_REXMT, tp->t_rxtcur);
+ tcp_timer_activate(tp, TT_REXMT, TP_RXTCUR(tp));
/*
* If no data (only SYN) was ACK'd,
* skip rest of ACK processing.
*/
- if (acked == 0)
+ if (acked == 0) {
+ SOCKBUF_UNLOCK(&so->so_snd);
goto step6;
+ }
/*
* Let the congestion control algorithm update congestion
@@ -2947,7 +2975,6 @@
*/
cc_ack_received(tp, th, nsegs, CC_ACK);
- SOCKBUF_LOCK(&so->so_snd);
if (acked > sbavail(&so->so_snd)) {
if (tp->snd_wnd >= sbavail(&so->so_snd))
tp->snd_wnd -= sbavail(&so->so_snd);
Index: sys/netinet/tcp_output.c
===================================================================
--- sys/netinet/tcp_output.c
+++ sys/netinet/tcp_output.c
@@ -506,7 +506,8 @@
*/
len = 0;
if ((sendwin == 0) && (TCPS_HAVEESTABLISHED(tp->t_state)) &&
- (off < (int) sbavail(&so->so_snd))) {
+ (off < (int) sbavail(&so->so_snd)) &&
+ !tcp_timer_active(tp, TT_PERSIST)) {
tcp_timer_activate(tp, TT_REXMT, 0);
tp->t_rxtshift = 0;
tp->snd_nxt = tp->snd_una;
@@ -734,7 +735,7 @@
SEQ_GT(tp->snd_max, tp->snd_una) &&
!tcp_timer_active(tp, TT_REXMT) &&
!tcp_timer_active(tp, TT_PERSIST)) {
- tcp_timer_activate(tp, TT_REXMT, tp->t_rxtcur);
+ tcp_timer_activate(tp, TT_REXMT, TP_RXTCUR(tp));
goto just_return;
}
/*
@@ -1578,6 +1579,12 @@
goto timer;
tp->snd_nxt += len;
if (SEQ_GT(tp->snd_nxt, tp->snd_max)) {
+ /*
+ * Update "made progress" indication if we just
+ * added new data to an empty socket buffer.
+ */
+ if (tp->snd_una == tp->snd_max)
+ tp->t_acktime = ticks;
tp->snd_max = tp->snd_nxt;
/*
* Time this transmission if not a retransmission and
@@ -1616,7 +1623,7 @@
tcp_timer_activate(tp, TT_PERSIST, 0);
tp->t_rxtshift = 0;
}
- tcp_timer_activate(tp, TT_REXMT, tp->t_rxtcur);
+ tcp_timer_activate(tp, TT_REXMT, TP_RXTCUR(tp));
} else if (len == 0 && sbavail(&so->so_snd) &&
!tcp_timer_active(tp, TT_REXMT) &&
!tcp_timer_active(tp, TT_PERSIST)) {
@@ -1769,15 +1776,29 @@
{
int t = ((tp->t_srtt >> 2) + tp->t_rttvar) >> 1;
int tt;
+ int maxunacktime;
tp->t_flags &= ~TF_PREVVALID;
if (tcp_timer_active(tp, TT_REXMT))
panic("tcp_setpersist: retransmit pending");
+ /*
+ * If the state is already closed, don't bother.
+ */
+ if (tp->t_state == TCPS_CLOSED)
+ return;
+
/*
* Start/restart persistence timer.
*/
TCPT_RANGESET(tt, t * tcp_backoff[tp->t_rxtshift],
tcp_persmin, tcp_persmax);
+ if (TP_MAXUNACKTIME(tp) && tp->t_acktime) {
+ maxunacktime = tp->t_acktime + TP_MAXUNACKTIME(tp) - ticks;
+ if (maxunacktime < 1)
+ maxunacktime = 1;
+ if (maxunacktime < tt)
+ tt = maxunacktime;
+ }
tcp_timer_activate(tp, TT_PERSIST, tt);
if (tp->t_rxtshift < TCP_MAXRXTSHIFT)
tp->t_rxtshift++;
Index: sys/netinet/tcp_syncache.c
===================================================================
--- sys/netinet/tcp_syncache.c
+++ sys/netinet/tcp_syncache.c
@@ -1030,6 +1030,7 @@
/*
* Copy and activate timers.
*/
+ tp->t_maxunacktime = sototcpcb(lso)->t_maxunacktime;
tp->t_keepinit = sototcpcb(lso)->t_keepinit;
tp->t_keepidle = sototcpcb(lso)->t_keepidle;
tp->t_keepintvl = sototcpcb(lso)->t_keepintvl;
Index: sys/netinet/tcp_timer.h
===================================================================
--- sys/netinet/tcp_timer.h
+++ sys/netinet/tcp_timer.h
@@ -86,6 +86,7 @@
#define TCPTV_KEEP_IDLE (120*60*hz) /* dflt time before probing */
#define TCPTV_KEEPINTVL ( 75*hz) /* default probe interval */
#define TCPTV_KEEPCNT 8 /* max probes before drop */
+#define TCPTV_MAXUNACKTIME 0 /* max time without making progress */
#define TCPTV_FINWAIT2_TIMEOUT (60*hz) /* FIN_WAIT_2 timeout if no receiver */
@@ -183,6 +184,17 @@
#define TP_KEEPINTVL(tp) ((tp)->t_keepintvl ? (tp)->t_keepintvl : tcp_keepintvl)
#define TP_KEEPCNT(tp) ((tp)->t_keepcnt ? (tp)->t_keepcnt : tcp_keepcnt)
#define TP_MAXIDLE(tp) (TP_KEEPCNT(tp) * TP_KEEPINTVL(tp))
+#define TP_MAXUNACKTIME(tp) \
+ ((tp)->t_maxunacktime ? (tp)->t_maxunacktime : tcp_maxunacktime)
+
+/*
+ * Obtain the time until the restransmit timer should fire.
+ * This macro ensures the restransmit timer fires at the earlier of the
+ * t_rxtcur value or the time the maxunacktime would be exceeded.
+ */
+#define TP_RXTCUR(tp) \
+ ((TP_MAXUNACKTIME(tp) == 0 || tp->t_acktime == 0) ? tp->t_rxtcur : \
+ max(1, min(tp->t_rxtcur, tp->t_acktime + TP_MAXUNACKTIME(tp) - ticks)))
extern int tcp_persmin; /* minimum persist interval */
extern int tcp_persmax; /* maximum persist interval */
@@ -191,6 +203,7 @@
extern int tcp_keepintvl; /* time between keepalive probes */
extern int tcp_keepcnt; /* number of keepalives */
extern int tcp_delacktime; /* time before sending a delayed ACK */
+extern int tcp_maxunacktime; /* max time without making progress */
extern int tcp_maxpersistidle;
extern int tcp_rexmit_initial;
extern int tcp_rexmit_min;
Index: sys/netinet/tcp_timer.c
===================================================================
--- sys/netinet/tcp_timer.c
+++ sys/netinet/tcp_timer.c
@@ -167,6 +167,12 @@
&tcp_rexmit_drop_options, 0,
"Drop TCP options from 3rd and later retransmitted SYN");
+int tcp_maxunacktime = TCPTV_MAXUNACKTIME;
+SYSCTL_PROC(_net_inet_tcp, OID_AUTO, maxunacktime,
+ CTLTYPE_INT|CTLFLAG_RW | CTLFLAG_NEEDGIANT,
+ &tcp_maxunacktime, 0, sysctl_msec_to_ticks, "I",
+ "Maximum time (in ms) that a session can linger without making progress");
+
VNET_DEFINE(int, tcp_pmtud_blackhole_detect);
SYSCTL_INT(_net_inet_tcp, OID_AUTO, pmtud_blackhole_detection,
CTLFLAG_RW|CTLFLAG_VNET,
@@ -505,12 +511,38 @@
CURVNET_RESTORE();
}
+/*
+ * Has this session exceeded the maximum time without seeing a substantive
+ * acknowledgement? If so, return true; otherwise false.
+ */
+static bool
+tcp_maxunacktime_check(struct tcpcb *tp)
+{
+
+ /* Are we tracking this timer for this session? */
+ if (TP_MAXUNACKTIME(tp) == 0)
+ return false;
+
+ /* Do we have a current measurement. */
+ if (tp->t_acktime == 0)
+ return false;
+
+ /* Are we within the acceptable range? */
+ if (TSTMP_GT(TP_MAXUNACKTIME(tp) + tp->t_acktime, (u_int)ticks))
+ return false;
+
+ /* We exceeded the timer. */
+ TCPSTAT_INC(tcps_progdrops);
+ return true;
+}
+
void
tcp_timer_persist(void *xtp)
{
struct tcpcb *tp = xtp;
struct inpcb *inp;
struct epoch_tracker et;
+ bool progdrop;
int outrv;
CURVNET_SET(tp->t_vnet);
#ifdef TCPDEBUG
@@ -546,11 +578,15 @@
* backoff, drop the connection if the idle time
* (no responses to probes) reaches the maximum
* backoff that we would use if retransmitting.
+ * Also, drop the connection if we haven't been making
+ * progress.
*/
- if (tp->t_rxtshift == TCP_MAXRXTSHIFT &&
+ progdrop = tcp_maxunacktime_check(tp);
+ if (progdrop || (tp->t_rxtshift == TCP_MAXRXTSHIFT &&
(ticks - tp->t_rcvtime >= tcp_maxpersistidle ||
- ticks - tp->t_rcvtime >= TCP_REXMTVAL(tp) * tcp_totbackoff)) {
- TCPSTAT_INC(tcps_persistdrop);
+ ticks - tp->t_rcvtime >= TCP_REXMTVAL(tp) * tcp_totbackoff))) {
+ if (!progdrop)
+ TCPSTAT_INC(tcps_persistdrop);
NET_EPOCH_ENTER(et);
tcp_log_end_status(tp, TCP_EI_STATUS_PERSIST_MAX);
tp = tcp_drop(tp, ETIMEDOUT);
@@ -630,10 +666,15 @@
* Retransmission timer went off. Message has not
* been acked within retransmit interval. Back off
* to a longer retransmit interval and retransmit one segment.
+ *
+ * If we've either exceeded the maximum number of retransmissions,
+ * or we've gone long enough without making progress, then drop
+ * the session.
*/
- if (++tp->t_rxtshift > TCP_MAXRXTSHIFT) {
+ if (++tp->t_rxtshift > TCP_MAXRXTSHIFT || tcp_maxunacktime_check(tp)) {
+ if (tp->t_rxtshift > TCP_MAXRXTSHIFT)
+ TCPSTAT_INC(tcps_timeoutdrop);
tp->t_rxtshift = TCP_MAXRXTSHIFT;
- TCPSTAT_INC(tcps_timeoutdrop);
NET_EPOCH_ENTER(et);
tcp_log_end_status(tp, TCP_EI_STATUS_RETRAN);
tp = tcp_drop(tp, ETIMEDOUT);
Index: sys/netinet/tcp_usrreq.c
===================================================================
--- sys/netinet/tcp_usrreq.c
+++ sys/netinet/tcp_usrreq.c
@@ -1116,6 +1116,8 @@
}
}
if (!(flags & PRUS_OOB)) {
+ if (tp->t_acktime == 0)
+ tp->t_acktime = ticks;
sbappendstream(&so->so_snd, m, flags);
m = NULL;
if (nam && tp->t_state < TCPS_SYN_SENT) {
@@ -1202,6 +1204,8 @@
* of data past the urgent section.
* Otherwise, snd_up should be one lower.
*/
+ if (tp->t_acktime == 0)
+ tp->t_acktime = ticks;
sbappendstream_locked(&so->so_snd, m, flags);
SOCKBUF_UNLOCK(&so->so_snd);
m = NULL;
@@ -2375,7 +2379,7 @@
error = ktls_enable_rx(so, &tls);
break;
#endif
-
+ case TCP_MAXUNACKTIME:
case TCP_KEEPIDLE:
case TCP_KEEPINTVL:
case TCP_KEEPINIT:
@@ -2392,6 +2396,10 @@
INP_WLOCK_RECHECK(inp);
switch (sopt->sopt_name) {
+ case TCP_MAXUNACKTIME:
+ tp->t_maxunacktime = ui;
+ break;
+
case TCP_KEEPIDLE:
tp->t_keepidle = ui;
/*
@@ -2658,11 +2666,15 @@
INP_WUNLOCK(inp);
error = sooptcopyout(sopt, buf, len + 1);
break;
+ case TCP_MAXUNACKTIME:
case TCP_KEEPIDLE:
case TCP_KEEPINTVL:
case TCP_KEEPINIT:
case TCP_KEEPCNT:
switch (sopt->sopt_name) {
+ case TCP_MAXUNACKTIME:
+ ui = TP_MAXUNACKTIME(tp) / hz;
+ break;
case TCP_KEEPIDLE:
ui = TP_KEEPIDLE(tp) / hz;
break;
@@ -2834,6 +2846,8 @@
tcp_state_change(tp, TCPS_LAST_ACK);
break;
}
+ if (tp->t_acktime == 0)
+ tp->t_acktime = ticks;
if (tp->t_state >= TCPS_FIN_WAIT_2) {
soisdisconnected(tp->t_inpcb->inp_socket);
/* Prevent the connection hanging in FIN_WAIT_2 forever. */
Index: sys/netinet/tcp_var.h
===================================================================
--- sys/netinet/tcp_var.h
+++ sys/netinet/tcp_var.h
@@ -708,6 +708,7 @@
uint64_t tcps_keeptimeo; /* keepalive timeouts */
uint64_t tcps_keepprobe; /* keepalive probes sent */
uint64_t tcps_keepdrops; /* connections dropped in keepalive */
+ uint64_t tcps_progdrops; /* drops due to no progress */
uint64_t tcps_sndtotal; /* total packets sent */
uint64_t tcps_sndpack; /* data packets sent */
Index: usr.bin/netstat/inet.c
===================================================================
--- usr.bin/netstat/inet.c
+++ usr.bin/netstat/inet.c
@@ -757,6 +757,8 @@
"{N:/keepalive probe%s sent}\n");
p(tcps_keepdrops, "\t\t{:connections-dropped-by-keepalives/%ju} "
"{N:/connection%s dropped by keepalive}\n");
+ p(tcps_progdrops, "\t{:connections-dropped-due-to-progress-time/%ju} "
+ "{N:/connection%s dropped due to exceeding progress time}\n");
p(tcps_predack, "\t{:ack-header-predictions/%ju} "
"{N:/correct ACK header prediction%s}\n");
p(tcps_preddat, "\t{:data-packet-header-predictions/%ju} "
Index: usr.bin/systat/tcp.c
===================================================================
--- usr.bin/systat/tcp.c
+++ usr.bin/systat/tcp.c
@@ -125,8 +125,8 @@
L(5, "- in embryonic state"); R(5, "- ack-only");
L(6, "- on retransmit timeout"); R(6, "- window probes");
L(7, "- by keepalive"); R(7, "- window updates");
- L(8, "- from listen queue"); R(8, "- urgent data only");
- R(9, "- control");
+ L(8, "- exceeded progress time"); R(8, "- urgent data only");
+ L(9, "- from listen queue"); R(9, "- control");
R(10, "- resends by PMTU discovery");
L(11, "TCP Timers"); R(11, "total packets received");
L(12, "potential rtt updates"); R(12, "- in sequence");
@@ -179,6 +179,7 @@
DO(tcps_keeptimeo);
DO(tcps_keepprobe);
DO(tcps_keepdrops);
+ DO(tcps_progdrops);
DO(tcps_sndtotal);
DO(tcps_sndpack);
@@ -248,8 +249,8 @@
L(5, tcps_conndrops); R(5, tcps_sndacks);
L(6, tcps_timeoutdrop); R(6, tcps_sndprobe);
L(7, tcps_keepdrops); R(7, tcps_sndwinup);
- L(8, tcps_listendrop); R(8, tcps_sndurg);
- R(9, tcps_sndctrl);
+ L(8, tcps_progdrops); R(8, tcps_sndurg);
+ L(9, tcps_listendrop); R(9, tcps_sndctrl);
R(10, tcps_mturesent);
R(11, tcps_rcvtotal);
L(12, tcps_segstimed); R(12, tcps_rcvpack);

File Metadata

Mime Type
text/plain
Expires
Sat, Dec 28, 7:16 AM (2 h, 26 m)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
15621035
Default Alt Text
D36716.id111063.diff (13 KB)

Event Timeline