Index: sys/netinet/tcp_input.c =================================================================== --- sys/netinet/tcp_input.c +++ sys/netinet/tcp_input.c @@ -163,6 +163,11 @@ &VNET_NAME(tcp_do_prr), 1, "Enable Proportional Rate Reduction per RFC 6937"); +VNET_DEFINE(int, tcp_do_lrd) = 1; +SYSCTL_INT(_net_inet_tcp, OID_AUTO, do_lrd, CTLFLAG_VNET | CTLFLAG_RW, + &VNET_NAME(tcp_do_lrd), 1, + "Perform Lost Retransmission Detection"); + VNET_DEFINE(int, tcp_do_newcwv) = 0; SYSCTL_INT(_net_inet_tcp, OID_AUTO, newcwv, CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(tcp_do_newcwv), 0, @@ -2492,9 +2497,12 @@ } if ((tp->t_flags & TF_SACK_PERMIT) && ((to.to_flags & TOF_SACK) || - !TAILQ_EMPTY(&tp->snd_holes))) - sack_changed = tcp_sack_doack(tp, &to, th->th_ack); - else + !TAILQ_EMPTY(&tp->snd_holes))) { + if (((sack_changed = tcp_sack_doack(tp, &to, th->th_ack)) != 0) && + (V_tcp_do_lrd)) { + tcp_lost_retransmission(tp, th); + } + } else /* * Reset the value so that previous (valid) value * from the last ack with SACK doesn't get used. @@ -2589,14 +2597,15 @@ pipe = (tp->snd_nxt - tp->snd_fack) + tp->sackhint.sack_bytes_rexmit; tp->sackhint.prr_delivered += del_data; - if (pipe > tp->snd_ssthresh) { + if (pipe >= tp->snd_ssthresh) { if (tp->sackhint.recover_fs == 0) tp->sackhint.recover_fs = imax(1, tp->snd_nxt - tp->snd_una); snd_cnt = howmany((long)tp->sackhint.prr_delivered * tp->snd_ssthresh, tp->sackhint.recover_fs) - (tp->sackhint.sack_bytes_rexmit + - (tp->snd_nxt - tp->snd_recover)); + (tp->snd_nxt - tp->snd_recover) + + tp->sackhint.prr_out); } else { if (V_tcp_do_prr_conservative) limit = tp->sackhint.prr_delivered - @@ -2689,7 +2698,9 @@ * snd_ssthresh is already updated by * cc_cong_signal. */ - tp->sackhint.prr_delivered = 0; + tp->sackhint.prr_delivered = + tp->sackhint.sacked_bytes; + tp->sackhint.prr_out = 0; tp->sackhint.sack_bytes_rexmit = 0; tp->sackhint.recover_fs = max(1, tp->snd_nxt - tp->snd_una); @@ -3964,14 +3975,15 @@ /* * Proportional Rate Reduction */ - if (pipe > tp->snd_ssthresh) { + if (pipe >= tp->snd_ssthresh) { if (tp->sackhint.recover_fs == 0) tp->sackhint.recover_fs = imax(1, tp->snd_nxt - tp->snd_una); snd_cnt = howmany((long)tp->sackhint.prr_delivered * tp->snd_ssthresh, tp->sackhint.recover_fs) - (tp->sackhint.sack_bytes_rexmit + - (tp->snd_nxt - tp->snd_recover)); + (tp->snd_nxt - tp->snd_recover) + + tp->sackhint.prr_out); } else { if (V_tcp_do_prr_conservative) limit = tp->sackhint.prr_delivered - @@ -4067,3 +4079,58 @@ return (4 * maxseg); } } + +/* + * Lost Retransmission Detection + * Check is FACK is >= than the end of the leftmost hole. + * If yes, we restart sending from still existing holes, + * and adjust cwnd via the congestion control module. + */ +void +tcp_lost_retransmission(struct tcpcb *tp, struct tcphdr *th) +{ + struct sackhole *temp; + uint32_t prev_ssthresh; + if (IN_RECOVERY(tp->t_flags) && + ((temp = TAILQ_FIRST(&tp->snd_holes)) != NULL) && + SEQ_GEQ(tp->snd_fack, tp->snd_recover) && + SEQ_GEQ(temp->rxmit, temp->end) && + SEQ_GEQ(tp->snd_fack, temp->rxmit)) { + /* + * Start retransmissions from the first hole, and + * subsequently all other remaining holes, including + * those, which had been sent completely before. + */ + tp->sackhint.nexthole = temp; + TAILQ_FOREACH(temp, &tp->snd_holes, scblink) { + if (SEQ_GEQ(tp->snd_fack, temp->rxmit) && + SEQ_GEQ(temp->rxmit, temp->end)) + temp->rxmit = temp->start; + } + /* + * Remember the amount of new data sent in the last window + * and the old ssthresh, to deduct the beta factor used + * by the CC module. Finally, set cwnd to ssthresh just + * prior to invoking another cwnd reduction by the CC + * module, to not shrink it excessively. + */ + tp->sackhint.prr_out += tp->snd_max - tp->snd_recover; + prev_ssthresh = tp->snd_ssthresh; + tp->snd_cwnd = tp->snd_ssthresh; + /* + * Formally exit recovery, and let the CC module adjust + * ssthresh as intended. + */ + EXIT_RECOVERY(tp->t_flags); + cc_cong_signal(tp, th, CC_NDUPACK); + /* + * Some magic: The ACKs received in the 2nd window already + * arrive at the PRR reduced rate. Thus, on a per-ACK basis, + * keep the same fraction of new segments vs. ACKs. + * This ratio is maintained, by reducing recover_fs by the + * same amount, that got applied to ssthresh by the CC module. + */ + tp->sackhint.recover_fs = ((long)tp->sackhint.recover_fs * + tp->snd_ssthresh) / prev_ssthresh; + } +} Index: sys/netinet/tcp_output.c =================================================================== --- sys/netinet/tcp_output.c +++ sys/netinet/tcp_output.c @@ -1234,6 +1234,14 @@ } else { th->th_seq = htonl(p->rxmit); p->rxmit += len; + /* + * Lost Retransmission Detection + * trigger resending of a (then + * still existing) hole, when + * fack acks recoverypoint. + */ + if (SEQ_GEQ(p->rxmit, p->end)) + p->rxmit = tp->snd_recover; tp->sackhint.sack_bytes_rexmit += len; } th->th_ack = htonl(tp->rcv_nxt); Index: sys/netinet/tcp_sack.c =================================================================== --- sys/netinet/tcp_sack.c +++ sys/netinet/tcp_sack.c @@ -687,7 +687,8 @@ cur = TAILQ_PREV(cur, sackhole_head, scblink); continue; } - tp->sackhint.sack_bytes_rexmit -= (cur->rxmit - cur->start); + tp->sackhint.sack_bytes_rexmit -= + (SEQ_MIN(cur->rxmit, cur->end) - cur->start); KASSERT(tp->sackhint.sack_bytes_rexmit >= 0, ("sackhint bytes rtx >= 0")); sack_changed = 1; @@ -717,7 +718,8 @@ /* Move end of hole backward. */ delivered_data += (cur->end - sblkp->start); cur->end = sblkp->start; - cur->rxmit = SEQ_MIN(cur->rxmit, cur->end); + if (SEQ_GEQ(cur->rxmit, cur->end)) + cur->rxmit = tp->snd_recover; } else { /* * ACKs some data in middle of a hole; need @@ -729,17 +731,18 @@ if (SEQ_GT(cur->rxmit, temp->rxmit)) { temp->rxmit = cur->rxmit; tp->sackhint.sack_bytes_rexmit - += (temp->rxmit - - temp->start); + += (SEQ_MIN(temp->rxmit, + temp->end) - temp->start); } cur->end = sblkp->start; - cur->rxmit = SEQ_MIN(cur->rxmit, - cur->end); + if (SEQ_GEQ(cur->rxmit, cur->end)) + cur->rxmit = tp->snd_recover; delivered_data += (sblkp->end - sblkp->start); } } } - tp->sackhint.sack_bytes_rexmit += (cur->rxmit - cur->start); + tp->sackhint.sack_bytes_rexmit += + (SEQ_MIN(cur->rxmit, cur->end) - cur->start); /* * Testing sblkp->start against cur->start tells us whether * we're done with the sack block or the sack hole. @@ -858,7 +861,7 @@ *sack_bytes_rexmt += (p->rxmit - p->start); break; } - *sack_bytes_rexmt += (p->rxmit - p->start); + *sack_bytes_rexmt += (SEQ_MIN(p->rxmit, p->end) - p->start); } return (p); } Index: sys/netinet/tcp_var.h =================================================================== --- sys/netinet/tcp_var.h +++ sys/netinet/tcp_var.h @@ -115,7 +115,7 @@ */ uint32_t recover_fs; /* Flight Size at the start of Loss recovery */ uint32_t prr_delivered; /* Total bytes delivered using PRR */ - uint32_t _pad[1]; /* TBD */ + uint32_t prr_out; /* Bytes sent using PRR */ }; #define SEGQ_EMPTY(tp) TAILQ_EMPTY(&(tp)->t_segq) @@ -868,6 +868,7 @@ VNET_DECLARE(struct inpcbhead, tcb); VNET_DECLARE(struct inpcbinfo, tcbinfo); +#define V_tcp_do_lrd VNET(tcp_do_lrd) #define V_tcp_do_prr VNET(tcp_do_prr) #define V_tcp_do_prr_conservative VNET(tcp_do_prr_conservative) #define V_tcp_do_newcwv VNET(tcp_do_newcwv) @@ -1064,6 +1065,7 @@ void tcp_sack_adjust(struct tcpcb *tp); struct sackhole *tcp_sack_output(struct tcpcb *tp, int *sack_bytes_rexmt); void tcp_prr_partialack(struct tcpcb *, struct tcphdr *); +void tcp_lost_retransmission(struct tcpcb *, struct tcphdr *); void tcp_sack_partialack(struct tcpcb *, struct tcphdr *); void tcp_free_sackholes(struct tcpcb *tp); int tcp_newreno(struct tcpcb *, struct tcphdr *);