Page MenuHomeFreeBSD

D28822.id116062.diff
No OneTemporary

D28822.id116062.diff

diff --git a/sys/netinet/tcp_input.c b/sys/netinet/tcp_input.c
--- a/sys/netinet/tcp_input.c
+++ b/sys/netinet/tcp_input.c
@@ -157,11 +157,6 @@
&VNET_NAME(drop_synfin), 0,
"Drop TCP packets with SYN+FIN set");
-VNET_DEFINE(int, tcp_do_prr_conservative) = 0;
-SYSCTL_INT(_net_inet_tcp, OID_AUTO, do_prr_conservative, CTLFLAG_VNET | CTLFLAG_RW,
- &VNET_NAME(tcp_do_prr_conservative), 0,
- "Do conservative Proportional Rate Reduction");
-
VNET_DEFINE(int, tcp_do_prr) = 1;
SYSCTL_INT(_net_inet_tcp, OID_AUTO, do_prr, CTLFLAG_VNET | CTLFLAG_RW,
&VNET_NAME(tcp_do_prr), 1,
@@ -1489,7 +1484,8 @@
struct tcpcb *tp, int drop_hdrlen, int tlen, uint8_t iptos)
{
uint16_t thflags;
- int acked, ourfinisacked, needoutput = 0, sack_changed;
+ int acked, ourfinisacked, needoutput = 0;
+ sackstatus_t sack_changed;
int rstreason, todrop, win, incforsyn = 0;
uint32_t tiwin;
uint16_t nsegs;
@@ -1503,7 +1499,7 @@
thflags = tcp_get_flags(th);
tp->sackhint.last_sack_ack = 0;
- sack_changed = 0;
+ sack_changed = SACK_NOCHANGE;
nsegs = max(1, m->m_pkthdr.lro_nsegs);
NET_EPOCH_ASSERT();
@@ -2542,7 +2538,7 @@
*/
if (th->th_ack != tp->snd_una ||
(tcp_is_sack_recovery(tp, &to) &&
- !sack_changed))
+ (sack_changed == SACK_NOCHANGE)))
break;
else if (!tcp_timer_active(tp, TT_REXMT))
tp->t_dupacks = 0;
@@ -2551,8 +2547,13 @@
cc_ack_received(tp, th, nsegs,
CC_DUPACK);
if (V_tcp_do_prr &&
- IN_FASTRECOVERY(tp->t_flags)) {
- tcp_do_prr_ack(tp, th, &to);
+ IN_FASTRECOVERY(tp->t_flags) &&
+ (tp->t_flags & TF_SACK_PERMIT)) {
+ /*
+ * While dealing with DupAcks,
+ * always use PRR-CRB
+ */
+ tcp_do_prr_ack(tp, th, &to, SACK_NEWLOSS);
} else if (tcp_is_sack_recovery(tp, &to) &&
IN_FASTRECOVERY(tp->t_flags)) {
int awnd;
@@ -2731,7 +2732,7 @@
* counted as dupacks here.
*/
if (tcp_is_sack_recovery(tp, &to) &&
- sack_changed) {
+ (sack_changed != SACK_NOCHANGE)) {
tp->t_dupacks++;
/* limit overhead by setting maxseg last */
if (!IN_FASTRECOVERY(tp->t_flags) &&
@@ -2757,7 +2758,7 @@
if (V_tcp_do_prr && to.to_flags & TOF_SACK) {
tcp_timer_activate(tp, TT_REXMT, 0);
tp->t_rtttime = 0;
- tcp_do_prr_ack(tp, th, &to);
+ tcp_do_prr_ack(tp, th, &to, sack_changed);
tp->t_flags |= TF_ACKNOW;
(void) tcp_output(tp);
} else
@@ -2771,7 +2772,11 @@
if (V_tcp_do_prr) {
tp->sackhint.delivered_data = BYTES_THIS_ACK(tp, th);
tp->snd_fack = th->th_ack;
- tcp_do_prr_ack(tp, th, &to);
+ /*
+ * During ECN cwnd reduction
+ * always use PRR-SSRB
+ */
+ tcp_do_prr_ack(tp, th, &to, SACK_CHANGE);
(void) tcp_output(tp);
}
} else
@@ -3894,7 +3899,7 @@
}
void
-tcp_do_prr_ack(struct tcpcb *tp, struct tcphdr *th, struct tcpopt *to)
+tcp_do_prr_ack(struct tcpcb *tp, struct tcphdr *th, struct tcpopt *to, sackstatus_t sack_changed)
{
int snd_cnt = 0, limit = 0, del_data = 0, pipe = 0;
int maxseg = tcp_maxseg(tp);
@@ -3934,7 +3939,17 @@
tp->snd_ssthresh, tp->sackhint.recover_fs) -
tp->sackhint.prr_out;
} else {
- if (V_tcp_do_prr_conservative || (del_data == 0))
+ /*
+ * PRR 6937bis heuristic:
+ * - A partial ack without SACK block beneath snd_recover
+ * indicates further loss.
+ * - An SACK scoreboard update adding a new hole indicates
+ * further loss, so be conservative and send at most one
+ * segment.
+ * - Prevent ACK splitting attacks, by being conservative
+ * when no new data is acked.
+ */
+ if ((sack_changed == SACK_NEWLOSS) || (del_data == 0))
limit = tp->sackhint.prr_delivered -
tp->sackhint.prr_out;
else
diff --git a/sys/netinet/tcp_sack.c b/sys/netinet/tcp_sack.c
--- a/sys/netinet/tcp_sack.c
+++ b/sys/netinet/tcp_sack.c
@@ -546,21 +546,23 @@
* Process cumulative ACK and the TCP SACK option to update the scoreboard.
* tp->snd_holes is an ordered list of holes (oldest to newest, in terms of
* the sequence space).
- * Returns 1 if incoming ACK has previously unknown SACK information,
- * 0 otherwise.
+ * Returns SACK_NEWLOSS if incoming ACK indicates ongoing loss (hole split, new hole),
+ * SACK_CHANGE if incoming ACK has previously unknown SACK information,
+ * SACK_NOCHANGE otherwise.
*/
-int
+sackstatus_t
tcp_sack_doack(struct tcpcb *tp, struct tcpopt *to, tcp_seq th_ack)
{
struct sackhole *cur, *temp;
struct sackblk sack, sack_blocks[TCP_MAX_SACK + 1], *sblkp;
- int i, j, num_sack_blks, sack_changed;
+ int i, j, num_sack_blks;
+ sackstatus_t sack_changed;
int delivered_data, left_edge_delta;
INP_WLOCK_ASSERT(tptoinpcb(tp));
num_sack_blks = 0;
- sack_changed = 0;
+ sack_changed = SACK_NOCHANGE;
delivered_data = 0;
left_edge_delta = 0;
/*
@@ -579,7 +581,7 @@
if (SEQ_LT(tp->snd_fack, th_ack)) {
delivered_data += th_ack - tp->snd_una;
tp->snd_fack = th_ack;
- sack_changed = 1;
+ sack_changed = SACK_CHANGE;
}
}
/*
@@ -669,7 +671,7 @@
delivered_data += sblkp->end - sblkp->start;
tp->snd_fack = sblkp->end;
sblkp--;
- sack_changed = 1;
+ sack_changed = SACK_NEWLOSS;
} else {
/*
* Append a new SACK hole at the tail. If the
@@ -683,7 +685,7 @@
tp->snd_fack = sblkp->end;
/* Go to the previous sack block. */
sblkp--;
- sack_changed = 1;
+ sack_changed = SACK_CHANGE;
} else {
/*
* We failed to add a new hole based on the current
@@ -700,7 +702,12 @@
SEQ_LT(tp->snd_fack, sblkp->end)) {
delivered_data += sblkp->end - tp->snd_fack;
tp->snd_fack = sblkp->end;
- sack_changed = 1;
+ /*
+ * While the Scoreboard didn't change in
+ * size, we only ended up here because
+ * some SACK data had to be dismissed.
+ */
+ sack_changed = SACK_NEWLOSS;
}
}
}
@@ -708,7 +715,7 @@
/* fack is advanced. */
delivered_data += sblkp->end - tp->snd_fack;
tp->snd_fack = sblkp->end;
- sack_changed = 1;
+ sack_changed = SACK_CHANGE;
}
cur = TAILQ_LAST(&tp->snd_holes, sackhole_head); /* Last SACK hole. */
/*
@@ -736,7 +743,7 @@
(SEQ_MIN(cur->rxmit, cur->end) - cur->start);
KASSERT(tp->sackhint.sack_bytes_rexmit >= 0,
("sackhint bytes rtx >= 0"));
- sack_changed = 1;
+ sack_changed = SACK_CHANGE;
if (SEQ_LEQ(sblkp->start, cur->start)) {
/* Data acks at least the beginning of hole. */
if (SEQ_GEQ(sblkp->end, cur->end)) {
@@ -773,6 +780,7 @@
*/
temp = tcp_sackhole_insert(tp, sblkp->end,
cur->end, cur);
+ sack_changed = SACK_NEWLOSS;
if (temp != NULL) {
if (SEQ_GT(cur->rxmit, temp->rxmit)) {
temp->rxmit = cur->rxmit;
@@ -810,7 +818,7 @@
* DupAck for this. Also required
* for RFC6675 rescue retransmission.
*/
- sack_changed = 0;
+ sack_changed = SACK_NOCHANGE;
tp->sackhint.delivered_data = delivered_data;
tp->sackhint.sacked_bytes += delivered_data - left_edge_delta;
KASSERT((delivered_data >= 0), ("delivered_data < 0"));
diff --git a/sys/netinet/tcp_var.h b/sys/netinet/tcp_var.h
--- a/sys/netinet/tcp_var.h
+++ b/sys/netinet/tcp_var.h
@@ -304,6 +304,13 @@
struct tcphdr tt_t;
};
+/* SACK scoreboard update status */
+typedef enum {
+ SACK_NOCHANGE = 0,
+ SACK_CHANGE,
+ SACK_NEWLOSS
+} sackstatus_t;
+
/* Enable TCP/UDP tunneling port */
#define TCP_TUNNELING_PORT_MIN 0
#define TCP_TUNNELING_PORT_MAX 65535
@@ -1035,7 +1042,6 @@
#define V_tcp_do_lrd VNET(tcp_do_lrd)
#define V_tcp_do_prr VNET(tcp_do_prr)
-#define V_tcp_do_prr_conservative VNET(tcp_do_prr_conservative)
#define V_tcp_do_newcwv VNET(tcp_do_newcwv)
#define V_drop_synfin VNET(drop_synfin)
#define V_path_mtu_discovery VNET(path_mtu_discovery)
@@ -1210,7 +1216,8 @@
uint32_t tcp_new_ts_offset(struct in_conninfo *);
tcp_seq tcp_new_isn(struct in_conninfo *);
-int tcp_sack_doack(struct tcpcb *, struct tcpopt *, tcp_seq);
+sackstatus_t
+ tcp_sack_doack(struct tcpcb *, struct tcpopt *, tcp_seq);
int tcp_dsack_block_exists(struct tcpcb *);
void tcp_update_dsack_list(struct tcpcb *, tcp_seq, tcp_seq);
void tcp_update_sack_list(struct tcpcb *tp, tcp_seq rcv_laststart, tcp_seq rcv_lastend);
@@ -1218,7 +1225,7 @@
void tcp_clean_sackreport(struct tcpcb *tp);
void tcp_sack_adjust(struct tcpcb *tp);
struct sackhole *tcp_sack_output(struct tcpcb *tp, int *sack_bytes_rexmt);
-void tcp_do_prr_ack(struct tcpcb *, struct tcphdr *, struct tcpopt *);
+void tcp_do_prr_ack(struct tcpcb *, struct tcphdr *, struct tcpopt *, sackstatus_t);
void tcp_lost_retransmission(struct tcpcb *, struct tcphdr *);
void tcp_sack_partialack(struct tcpcb *, struct tcphdr *);
void tcp_free_sackholes(struct tcpcb *tp);

File Metadata

Mime Type
text/plain
Expires
Tue, Jan 13, 3:31 AM (12 m, 41 s)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
27624517
Default Alt Text
D28822.id116062.diff (8 KB)

Event Timeline