Page MenuHomeFreeBSD

D28822.id88940.diff
No OneTemporary

D28822.id88940.diff

Index: sys/netinet/tcp_input.c
===================================================================
--- sys/netinet/tcp_input.c
+++ sys/netinet/tcp_input.c
@@ -154,11 +154,6 @@
&VNET_NAME(drop_synfin), 0,
"Drop TCP packets with SYN+FIN set");
-VNET_DEFINE(int, tcp_do_prr_conservative) = 0;
-SYSCTL_INT(_net_inet_tcp, OID_AUTO, do_prr_conservative, CTLFLAG_VNET | CTLFLAG_RW,
- &VNET_NAME(tcp_do_prr_conservative), 0,
- "Do conservative Proportional Rate Reduction");
-
VNET_DEFINE(int, tcp_do_prr) = 1;
SYSCTL_INT(_net_inet_tcp, OID_AUTO, do_prr, CTLFLAG_VNET | CTLFLAG_RW,
&VNET_NAME(tcp_do_prr), 1,
@@ -2595,7 +2590,7 @@
if (th->th_ack != tp->snd_una ||
((tp->t_flags & TF_SACK_PERMIT) &&
(to.to_flags & TOF_SACK) &&
- !sack_changed))
+ (sack_changed == SACK_NOCHANGE)))
break;
else if (!tcp_timer_active(tp, TT_REXMT))
tp->t_dupacks = 0;
@@ -2606,7 +2601,11 @@
if (V_tcp_do_prr &&
IN_FASTRECOVERY(tp->t_flags) &&
(tp->t_flags & TF_SACK_PERMIT)) {
- tcp_do_prr_ack(tp, th, &to);
+ /*
+ * While dealing with DupAcks,
+ * always use PRR-CRB
+ */
+ tcp_do_prr_ack(tp, th, &to, SACK_NEWLOSS);
} else if ((tp->t_flags & TF_SACK_PERMIT) &&
(to.to_flags & TOF_SACK) &&
IN_FASTRECOVERY(tp->t_flags)) {
@@ -2781,7 +2780,7 @@
*/
if ((tp->t_flags & TF_SACK_PERMIT) &&
(to.to_flags & TOF_SACK) &&
- sack_changed) {
+ !(sack_changed == SACK_NOCHANGE)) {
tp->t_dupacks++;
/* limit overhead by setting maxseg last */
if (!IN_FASTRECOVERY(tp->t_flags) &&
@@ -2807,7 +2806,7 @@
if (V_tcp_do_prr && to.to_flags & TOF_SACK) {
tcp_timer_activate(tp, TT_REXMT, 0);
tp->t_rtttime = 0;
- tcp_do_prr_ack(tp, th, &to);
+ tcp_do_prr_ack(tp, th, &to, sack_changed);
tp->t_flags |= TF_ACKNOW;
(void) tcp_output(tp);
} else
@@ -2821,7 +2820,11 @@
if (V_tcp_do_prr) {
tp->sackhint.delivered_data = BYTES_THIS_ACK(tp, th);
tp->snd_fack = th->th_ack;
- tcp_do_prr_ack(tp, th, &to);
+ /*
+ * During ECN cwnd reduction
+ * always use PRR-SSRB
+ */
+ tcp_do_prr_ack(tp, th, &to, SACK_CHANGE);
(void) tcp_output(tp);
}
} else
@@ -3952,7 +3955,7 @@
}
void
-tcp_do_prr_ack(struct tcpcb *tp, struct tcphdr *th, struct tcpopt *to)
+tcp_do_prr_ack(struct tcpcb *tp, struct tcphdr *th, struct tcpopt *to, int sack_changed)
{
int snd_cnt = 0, limit = 0, del_data = 0, pipe = 0;
int maxseg = tcp_maxseg(tp);
@@ -3968,7 +3971,8 @@
if (V_tcp_do_newsack)
pipe = tcp_compute_pipe(tp);
else
- pipe = (tp->snd_nxt - tp->snd_fack) + tp->sackhint.sack_bytes_rexmit;
+ pipe = (tp->snd_nxt - tp->snd_fack) +
+ tp->sackhint.sack_bytes_rexmit;
tp->sackhint.prr_delivered += del_data;
/*
* Proportional Rate Reduction
@@ -3981,9 +3985,19 @@
tp->snd_ssthresh, tp->sackhint.recover_fs) -
tp->sackhint.prr_out;
} else {
- if (V_tcp_do_prr_conservative)
+ /*
+ * PRR 6937bis heuristic:
+ * - A partial ack without SACK block beneath snd_recover
+ * indicates further loss.
+ * - An SACK scoreboard update adding a new hole indicates
+ * further loss, so be conservative and send at most one
+ * segment.
+ * - Prevent ACK splitting attacks, by being conservative
+ * when no new data is acked.
+ */
+ if ((sack_changed == SACK_NEWLOSS) || (del_data == 0))
limit = tp->sackhint.prr_delivered -
- tp->sackhint.prr_out;
+ tp->sackhint.prr_out;
else
limit = imax(tp->sackhint.prr_delivered -
tp->sackhint.prr_out, del_data) +
@@ -3997,11 +4011,12 @@
* accordingly.
*/
if (IN_FASTRECOVERY(tp->t_flags)) {
- tp->snd_cwnd = imax(maxseg, tp->snd_nxt - tp->snd_recover +
- tp->sackhint.sack_bytes_rexmit + (snd_cnt * maxseg));
+ tp->snd_cwnd = tp->snd_nxt - tp->snd_recover +
+ tp->sackhint.sack_bytes_rexmit +
+ (snd_cnt * maxseg);
} else if (IN_CONGRECOVERY(tp->t_flags))
- tp->snd_cwnd = imax(maxseg, pipe - del_data +
- (snd_cnt * maxseg));
+ tp->snd_cwnd = pipe - del_data + (snd_cnt * maxseg);
+ tp->snd_cwnd = imax(maxseg, tp->snd_cwnd);
}
/*
Index: sys/netinet/tcp_sack.c
===================================================================
--- sys/netinet/tcp_sack.c
+++ sys/netinet/tcp_sack.c
@@ -550,8 +550,9 @@
* Process cumulative ACK and the TCP SACK option to update the scoreboard.
* tp->snd_holes is an ordered list of holes (oldest to newest, in terms of
* the sequence space).
- * Returns 1 if incoming ACK has previously unknown SACK information,
- * 0 otherwise.
+ * Returns SACK_NEWLOSS if incoming ACK indicates ongoing loss (hole split, new hole),
+ * SACK_CHANGE if incoming ACK has previously unknown SACK information,
+ * SACK_NOCHANGE otherwise.
*/
int
tcp_sack_doack(struct tcpcb *tp, struct tcpopt *to, tcp_seq th_ack)
@@ -564,7 +565,7 @@
INP_WLOCK_ASSERT(tp->t_inpcb);
num_sack_blks = 0;
- sack_changed = 0;
+ sack_changed = SACK_NOCHANGE;
delivered_data = 0;
left_edge_delta = 0;
/*
@@ -583,7 +584,7 @@
if (SEQ_LT(tp->snd_fack, th_ack)) {
delivered_data += th_ack - tp->snd_una;
tp->snd_fack = th_ack;
- sack_changed = 1;
+ sack_changed = SACK_CHANGE;
}
}
/*
@@ -667,7 +668,7 @@
delivered_data += sblkp->end - sblkp->start;
tp->snd_fack = sblkp->end;
sblkp--;
- sack_changed = 1;
+ sack_changed = SACK_NEWLOSS;
} else {
/*
* Append a new SACK hole at the tail. If the
@@ -681,7 +682,7 @@
tp->snd_fack = sblkp->end;
/* Go to the previous sack block. */
sblkp--;
- sack_changed = 1;
+ sack_changed = SACK_CHANGE;
} else {
/*
* We failed to add a new hole based on the current
@@ -698,7 +699,12 @@
SEQ_LT(tp->snd_fack, sblkp->end)) {
delivered_data += sblkp->end - tp->snd_fack;
tp->snd_fack = sblkp->end;
- sack_changed = 1;
+ /*
+ * While the Scoreboard didn't change in
+ * size, we only ended up here because
+ * some SACK data had to be dismissed.
+ */
+ sack_changed = SACK_NEWLOSS;
}
}
}
@@ -706,7 +712,7 @@
/* fack is advanced. */
delivered_data += sblkp->end - tp->snd_fack;
tp->snd_fack = sblkp->end;
- sack_changed = 1;
+ sack_changed = SACK_CHANGE;
}
cur = TAILQ_LAST(&tp->snd_holes, sackhole_head); /* Last SACK hole. */
/*
@@ -733,7 +739,7 @@
tp->sackhint.sack_bytes_rexmit -= (cur->rxmit - cur->start);
KASSERT(tp->sackhint.sack_bytes_rexmit >= 0,
("sackhint bytes rtx >= 0"));
- sack_changed = 1;
+ sack_changed = SACK_CHANGE;
if (SEQ_LEQ(sblkp->start, cur->start)) {
/* Data acks at least the beginning of hole. */
if (SEQ_GEQ(sblkp->end, cur->end)) {
@@ -768,6 +774,7 @@
*/
temp = tcp_sackhole_insert(tp, sblkp->end,
cur->end, cur);
+ sack_changed = SACK_NEWLOSS;
if (temp != NULL) {
if (SEQ_GT(cur->rxmit, temp->rxmit)) {
temp->rxmit = cur->rxmit;
@@ -802,7 +809,7 @@
* DupAck for this. Also required
* for RFC6675 rescue retransmission.
*/
- sack_changed = 0;
+ sack_changed = SACK_NOCHANGE;
tp->sackhint.delivered_data = delivered_data;
tp->sackhint.sacked_bytes += delivered_data - left_edge_delta;
KASSERT((delivered_data >= 0), ("delivered_data < 0"));
Index: sys/netinet/tcp_var.h
===================================================================
--- sys/netinet/tcp_var.h
+++ sys/netinet/tcp_var.h
@@ -286,6 +286,11 @@
struct tcphdr tt_t;
};
+/* SACK scoreboard update status */
+#define SACK_NOCHANGE 0
+#define SACK_CHANGE 1
+#define SACK_NEWLOSS 2
+
/* Enable TCP/UDP tunneling port */
#define TCP_TUNNELING_PORT_MIN 0
#define TCP_TUNNELING_PORT_MAX 65535
@@ -894,7 +899,6 @@
VNET_DECLARE(struct inpcbinfo, tcbinfo);
#define V_tcp_do_prr VNET(tcp_do_prr)
-#define V_tcp_do_prr_conservative VNET(tcp_do_prr_conservative)
#define V_tcp_do_newcwv VNET(tcp_do_newcwv)
#define V_drop_synfin VNET(drop_synfin)
#define V_path_mtu_discovery VNET(path_mtu_discovery)
@@ -1090,7 +1094,7 @@
void tcp_clean_sackreport(struct tcpcb *tp);
void tcp_sack_adjust(struct tcpcb *tp);
struct sackhole *tcp_sack_output(struct tcpcb *tp, int *sack_bytes_rexmt);
-void tcp_do_prr_ack(struct tcpcb *, struct tcphdr *, struct tcpopt *);
+void tcp_do_prr_ack(struct tcpcb *, struct tcphdr *, struct tcpopt *, int);
void tcp_sack_partialack(struct tcpcb *, struct tcphdr *);
void tcp_free_sackholes(struct tcpcb *tp);
int tcp_newreno(struct tcpcb *, struct tcphdr *);

File Metadata

Mime Type
text/plain
Expires
Fri, Jun 12, 8:25 PM (22 h, 29 m)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
33914467
Default Alt Text
D28822.id88940.diff (8 KB)

Event Timeline