Index: sys/netinet/tcp_input.c =================================================================== --- sys/netinet/tcp_input.c +++ sys/netinet/tcp_input.c @@ -154,11 +154,6 @@ &VNET_NAME(drop_synfin), 0, "Drop TCP packets with SYN+FIN set"); -VNET_DEFINE(int, tcp_do_prr_conservative) = 0; -SYSCTL_INT(_net_inet_tcp, OID_AUTO, do_prr_conservative, CTLFLAG_VNET | CTLFLAG_RW, - &VNET_NAME(tcp_do_prr_conservative), 0, - "Do conservative Proportional Rate Reduction"); - VNET_DEFINE(int, tcp_do_prr) = 1; SYSCTL_INT(_net_inet_tcp, OID_AUTO, do_prr, CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(tcp_do_prr), 1, @@ -2595,7 +2590,7 @@ if (th->th_ack != tp->snd_una || ((tp->t_flags & TF_SACK_PERMIT) && (to.to_flags & TOF_SACK) && - !sack_changed)) + (sack_changed == SACK_NOCHANGE))) break; else if (!tcp_timer_active(tp, TT_REXMT)) tp->t_dupacks = 0; @@ -2606,7 +2601,11 @@ if (V_tcp_do_prr && IN_FASTRECOVERY(tp->t_flags) && (tp->t_flags & TF_SACK_PERMIT)) { - tcp_do_prr_ack(tp, th, &to); + /* + * While dealing with DupAcks, + * always use PRR-CRB + */ + tcp_do_prr_ack(tp, th, &to, SACK_NEWLOSS); } else if ((tp->t_flags & TF_SACK_PERMIT) && (to.to_flags & TOF_SACK) && IN_FASTRECOVERY(tp->t_flags)) { @@ -2781,7 +2780,7 @@ */ if ((tp->t_flags & TF_SACK_PERMIT) && (to.to_flags & TOF_SACK) && - sack_changed) { + !(sack_changed == SACK_NOCHANGE)) { tp->t_dupacks++; /* limit overhead by setting maxseg last */ if (!IN_FASTRECOVERY(tp->t_flags) && @@ -2807,7 +2806,7 @@ if (V_tcp_do_prr && to.to_flags & TOF_SACK) { tcp_timer_activate(tp, TT_REXMT, 0); tp->t_rtttime = 0; - tcp_do_prr_ack(tp, th, &to); + tcp_do_prr_ack(tp, th, &to, sack_changed); tp->t_flags |= TF_ACKNOW; (void) tcp_output(tp); } else @@ -2821,7 +2820,11 @@ if (V_tcp_do_prr) { tp->sackhint.delivered_data = BYTES_THIS_ACK(tp, th); tp->snd_fack = th->th_ack; - tcp_do_prr_ack(tp, th, &to); + /* + * During ECN cwnd reduction + * always use PRR-SSRB + */ + tcp_do_prr_ack(tp, th, &to, SACK_CHANGE); (void) tcp_output(tp); } } else @@ -3952,7 +3955,7 @@ } void -tcp_do_prr_ack(struct tcpcb *tp, struct tcphdr *th, struct tcpopt *to) +tcp_do_prr_ack(struct tcpcb *tp, struct tcphdr *th, struct tcpopt *to, int sack_changed) { int snd_cnt = 0, limit = 0, del_data = 0, pipe = 0; int maxseg = tcp_maxseg(tp); @@ -3968,7 +3971,8 @@ if (V_tcp_do_newsack) pipe = tcp_compute_pipe(tp); else - pipe = (tp->snd_nxt - tp->snd_fack) + tp->sackhint.sack_bytes_rexmit; + pipe = (tp->snd_nxt - tp->snd_fack) + + tp->sackhint.sack_bytes_rexmit; tp->sackhint.prr_delivered += del_data; /* * Proportional Rate Reduction @@ -3981,9 +3985,19 @@ tp->snd_ssthresh, tp->sackhint.recover_fs) - tp->sackhint.prr_out; } else { - if (V_tcp_do_prr_conservative) + /* + * PRR 6937bis heuristic: + * - A partial ack without SACK block beneath snd_recover + * indicates further loss. + * - An SACK scoreboard update adding a new hole indicates + * further loss, so be conservative and send at most one + * segment. + * - Prevent ACK splitting attacks, by being conservative + * when no new data is acked. + */ + if ((sack_changed == SACK_NEWLOSS) || (del_data == 0)) limit = tp->sackhint.prr_delivered - - tp->sackhint.prr_out; + tp->sackhint.prr_out; else limit = imax(tp->sackhint.prr_delivered - tp->sackhint.prr_out, del_data) + @@ -3997,11 +4011,12 @@ * accordingly. */ if (IN_FASTRECOVERY(tp->t_flags)) { - tp->snd_cwnd = imax(maxseg, tp->snd_nxt - tp->snd_recover + - tp->sackhint.sack_bytes_rexmit + (snd_cnt * maxseg)); + tp->snd_cwnd = tp->snd_nxt - tp->snd_recover + + tp->sackhint.sack_bytes_rexmit + + (snd_cnt * maxseg); } else if (IN_CONGRECOVERY(tp->t_flags)) - tp->snd_cwnd = imax(maxseg, pipe - del_data + - (snd_cnt * maxseg)); + tp->snd_cwnd = pipe - del_data + (snd_cnt * maxseg); + tp->snd_cwnd = imax(maxseg, tp->snd_cwnd); } /* Index: sys/netinet/tcp_sack.c =================================================================== --- sys/netinet/tcp_sack.c +++ sys/netinet/tcp_sack.c @@ -550,8 +550,9 @@ * Process cumulative ACK and the TCP SACK option to update the scoreboard. * tp->snd_holes is an ordered list of holes (oldest to newest, in terms of * the sequence space). - * Returns 1 if incoming ACK has previously unknown SACK information, - * 0 otherwise. + * Returns SACK_NEWLOSS if incoming ACK indicates ongoing loss (hole split, new hole), + * SACK_CHANGE if incoming ACK has previously unknown SACK information, + * SACK_NOCHANGE otherwise. */ int tcp_sack_doack(struct tcpcb *tp, struct tcpopt *to, tcp_seq th_ack) @@ -564,7 +565,7 @@ INP_WLOCK_ASSERT(tp->t_inpcb); num_sack_blks = 0; - sack_changed = 0; + sack_changed = SACK_NOCHANGE; delivered_data = 0; left_edge_delta = 0; /* @@ -583,7 +584,7 @@ if (SEQ_LT(tp->snd_fack, th_ack)) { delivered_data += th_ack - tp->snd_una; tp->snd_fack = th_ack; - sack_changed = 1; + sack_changed = SACK_CHANGE; } } /* @@ -667,7 +668,7 @@ delivered_data += sblkp->end - sblkp->start; tp->snd_fack = sblkp->end; sblkp--; - sack_changed = 1; + sack_changed = SACK_NEWLOSS; } else { /* * Append a new SACK hole at the tail. If the @@ -681,7 +682,7 @@ tp->snd_fack = sblkp->end; /* Go to the previous sack block. */ sblkp--; - sack_changed = 1; + sack_changed = SACK_CHANGE; } else { /* * We failed to add a new hole based on the current @@ -698,7 +699,12 @@ SEQ_LT(tp->snd_fack, sblkp->end)) { delivered_data += sblkp->end - tp->snd_fack; tp->snd_fack = sblkp->end; - sack_changed = 1; + /* + * While the Scoreboard didn't change in + * size, we only ended up here because + * some SACK data had to be dismissed. + */ + sack_changed = SACK_NEWLOSS; } } } @@ -706,7 +712,7 @@ /* fack is advanced. */ delivered_data += sblkp->end - tp->snd_fack; tp->snd_fack = sblkp->end; - sack_changed = 1; + sack_changed = SACK_CHANGE; } cur = TAILQ_LAST(&tp->snd_holes, sackhole_head); /* Last SACK hole. */ /* @@ -733,7 +739,7 @@ tp->sackhint.sack_bytes_rexmit -= (cur->rxmit - cur->start); KASSERT(tp->sackhint.sack_bytes_rexmit >= 0, ("sackhint bytes rtx >= 0")); - sack_changed = 1; + sack_changed = SACK_CHANGE; if (SEQ_LEQ(sblkp->start, cur->start)) { /* Data acks at least the beginning of hole. */ if (SEQ_GEQ(sblkp->end, cur->end)) { @@ -768,6 +774,7 @@ */ temp = tcp_sackhole_insert(tp, sblkp->end, cur->end, cur); + sack_changed = SACK_NEWLOSS; if (temp != NULL) { if (SEQ_GT(cur->rxmit, temp->rxmit)) { temp->rxmit = cur->rxmit; @@ -802,7 +809,7 @@ * DupAck for this. Also required * for RFC6675 rescue retransmission. */ - sack_changed = 0; + sack_changed = SACK_NOCHANGE; tp->sackhint.delivered_data = delivered_data; tp->sackhint.sacked_bytes += delivered_data - left_edge_delta; KASSERT((delivered_data >= 0), ("delivered_data < 0")); Index: sys/netinet/tcp_var.h =================================================================== --- sys/netinet/tcp_var.h +++ sys/netinet/tcp_var.h @@ -286,6 +286,11 @@ struct tcphdr tt_t; }; +/* SACK scoreboard update status */ +#define SACK_NOCHANGE 0 +#define SACK_CHANGE 1 +#define SACK_NEWLOSS 2 + /* Enable TCP/UDP tunneling port */ #define TCP_TUNNELING_PORT_MIN 0 #define TCP_TUNNELING_PORT_MAX 65535 @@ -894,7 +899,6 @@ VNET_DECLARE(struct inpcbinfo, tcbinfo); #define V_tcp_do_prr VNET(tcp_do_prr) -#define V_tcp_do_prr_conservative VNET(tcp_do_prr_conservative) #define V_tcp_do_newcwv VNET(tcp_do_newcwv) #define V_drop_synfin VNET(drop_synfin) #define V_path_mtu_discovery VNET(path_mtu_discovery) @@ -1090,7 +1094,7 @@ void tcp_clean_sackreport(struct tcpcb *tp); void tcp_sack_adjust(struct tcpcb *tp); struct sackhole *tcp_sack_output(struct tcpcb *tp, int *sack_bytes_rexmt); -void tcp_do_prr_ack(struct tcpcb *, struct tcphdr *, struct tcpopt *); +void tcp_do_prr_ack(struct tcpcb *, struct tcphdr *, struct tcpopt *, int); void tcp_sack_partialack(struct tcpcb *, struct tcphdr *); void tcp_free_sackholes(struct tcpcb *tp); int tcp_newreno(struct tcpcb *, struct tcphdr *);