Changeset View
Changeset View
Standalone View
Standalone View
sys/netinet/tcp_input.c
Show First 20 Lines • Show All 157 Lines • ▼ Show 20 Lines | SYSCTL_INT(_net_inet_tcp, OID_AUTO, do_prr_conservative, CTLFLAG_VNET | CTLFLAG_RW, | ||||
&VNET_NAME(tcp_do_prr_conservative), 0, | &VNET_NAME(tcp_do_prr_conservative), 0, | ||||
"Do conservative Proportional Rate Reduction"); | "Do conservative Proportional Rate Reduction"); | ||||
VNET_DEFINE(int, tcp_do_prr) = 1; | VNET_DEFINE(int, tcp_do_prr) = 1; | ||||
SYSCTL_INT(_net_inet_tcp, OID_AUTO, do_prr, CTLFLAG_VNET | CTLFLAG_RW, | SYSCTL_INT(_net_inet_tcp, OID_AUTO, do_prr, CTLFLAG_VNET | CTLFLAG_RW, | ||||
&VNET_NAME(tcp_do_prr), 1, | &VNET_NAME(tcp_do_prr), 1, | ||||
"Enable Proportional Rate Reduction per RFC 6937"); | "Enable Proportional Rate Reduction per RFC 6937"); | ||||
VNET_DEFINE(int, tcp_do_lrd) = 1; | |||||
SYSCTL_INT(_net_inet_tcp, OID_AUTO, do_lrd, CTLFLAG_VNET | CTLFLAG_RW, | |||||
&VNET_NAME(tcp_do_lrd), 1, | |||||
"Perform Lost Retransmission Detection"); | |||||
VNET_DEFINE(int, tcp_do_newcwv) = 0; | VNET_DEFINE(int, tcp_do_newcwv) = 0; | ||||
SYSCTL_INT(_net_inet_tcp, OID_AUTO, newcwv, CTLFLAG_VNET | CTLFLAG_RW, | SYSCTL_INT(_net_inet_tcp, OID_AUTO, newcwv, CTLFLAG_VNET | CTLFLAG_RW, | ||||
&VNET_NAME(tcp_do_newcwv), 0, | &VNET_NAME(tcp_do_newcwv), 0, | ||||
"Enable New Congestion Window Validation per RFC7661"); | "Enable New Congestion Window Validation per RFC7661"); | ||||
VNET_DEFINE(int, tcp_do_rfc6675_pipe) = 0; | VNET_DEFINE(int, tcp_do_rfc6675_pipe) = 0; | ||||
SYSCTL_INT(_net_inet_tcp, OID_AUTO, rfc6675_pipe, CTLFLAG_VNET | CTLFLAG_RW, | SYSCTL_INT(_net_inet_tcp, OID_AUTO, rfc6675_pipe, CTLFLAG_VNET | CTLFLAG_RW, | ||||
&VNET_NAME(tcp_do_rfc6675_pipe), 0, | &VNET_NAME(tcp_do_rfc6675_pipe), 0, | ||||
▲ Show 20 Lines • Show All 2,313 Lines • ▼ Show 20 Lines | #endif | ||||
case TCPS_CLOSING: | case TCPS_CLOSING: | ||||
case TCPS_LAST_ACK: | case TCPS_LAST_ACK: | ||||
if (SEQ_GT(th->th_ack, tp->snd_max)) { | if (SEQ_GT(th->th_ack, tp->snd_max)) { | ||||
TCPSTAT_INC(tcps_rcvacktoomuch); | TCPSTAT_INC(tcps_rcvacktoomuch); | ||||
goto dropafterack; | goto dropafterack; | ||||
} | } | ||||
if ((tp->t_flags & TF_SACK_PERMIT) && | if ((tp->t_flags & TF_SACK_PERMIT) && | ||||
((to.to_flags & TOF_SACK) || | ((to.to_flags & TOF_SACK) || | ||||
!TAILQ_EMPTY(&tp->snd_holes))) | !TAILQ_EMPTY(&tp->snd_holes))) { | ||||
sack_changed = tcp_sack_doack(tp, &to, th->th_ack); | if (((sack_changed = tcp_sack_doack(tp, &to, th->th_ack)) != 0) && | ||||
else | (V_tcp_do_lrd)) { | ||||
tcp_lost_retransmission(tp, th); | |||||
} | |||||
} else | |||||
/* | /* | ||||
* Reset the value so that previous (valid) value | * Reset the value so that previous (valid) value | ||||
* from the last ack with SACK doesn't get used. | * from the last ack with SACK doesn't get used. | ||||
*/ | */ | ||||
tp->sackhint.sacked_bytes = 0; | tp->sackhint.sacked_bytes = 0; | ||||
#ifdef TCP_HHOOK | #ifdef TCP_HHOOK | ||||
/* Run HHOOK_TCP_ESTABLISHED_IN helper hooks. */ | /* Run HHOOK_TCP_ESTABLISHED_IN helper hooks. */ | ||||
▲ Show 20 Lines • Show All 78 Lines • ▼ Show 20 Lines | if (SEQ_LEQ(th->th_ack, tp->snd_una)) { | ||||
*/ | */ | ||||
del_data = tp->sackhint.delivered_data; | del_data = tp->sackhint.delivered_data; | ||||
if (V_tcp_do_rfc6675_pipe) | if (V_tcp_do_rfc6675_pipe) | ||||
pipe = tcp_compute_pipe(tp); | pipe = tcp_compute_pipe(tp); | ||||
else | else | ||||
pipe = (tp->snd_nxt - tp->snd_fack) + | pipe = (tp->snd_nxt - tp->snd_fack) + | ||||
tp->sackhint.sack_bytes_rexmit; | tp->sackhint.sack_bytes_rexmit; | ||||
tp->sackhint.prr_delivered += del_data; | tp->sackhint.prr_delivered += del_data; | ||||
if (pipe > tp->snd_ssthresh) { | if (pipe >= tp->snd_ssthresh) { | ||||
if (tp->sackhint.recover_fs == 0) | if (tp->sackhint.recover_fs == 0) | ||||
tp->sackhint.recover_fs = | tp->sackhint.recover_fs = | ||||
imax(1, tp->snd_nxt - tp->snd_una); | imax(1, tp->snd_nxt - tp->snd_una); | ||||
snd_cnt = howmany((long)tp->sackhint.prr_delivered * | snd_cnt = howmany((long)tp->sackhint.prr_delivered * | ||||
tp->snd_ssthresh, tp->sackhint.recover_fs) - | tp->snd_ssthresh, tp->sackhint.recover_fs) - | ||||
(tp->sackhint.sack_bytes_rexmit + | (tp->sackhint.sack_bytes_rexmit + | ||||
(tp->snd_nxt - tp->snd_recover)); | (tp->snd_nxt - tp->snd_recover) + | ||||
tp->sackhint.prr_out); | |||||
} else { | } else { | ||||
if (V_tcp_do_prr_conservative) | if (V_tcp_do_prr_conservative) | ||||
limit = tp->sackhint.prr_delivered - | limit = tp->sackhint.prr_delivered - | ||||
tp->sackhint.sack_bytes_rexmit; | tp->sackhint.sack_bytes_rexmit; | ||||
else | else | ||||
limit = imax(tp->sackhint.prr_delivered - | limit = imax(tp->sackhint.prr_delivered - | ||||
tp->sackhint.sack_bytes_rexmit, | tp->sackhint.sack_bytes_rexmit, | ||||
del_data) + maxseg; | del_data) + maxseg; | ||||
▲ Show 20 Lines • Show All 76 Lines • ▼ Show 20 Lines | enter_recovery: | ||||
CC_DUPACK); | CC_DUPACK); | ||||
tcp_timer_activate(tp, TT_REXMT, 0); | tcp_timer_activate(tp, TT_REXMT, 0); | ||||
tp->t_rtttime = 0; | tp->t_rtttime = 0; | ||||
if (V_tcp_do_prr) { | if (V_tcp_do_prr) { | ||||
/* | /* | ||||
* snd_ssthresh is already updated by | * snd_ssthresh is already updated by | ||||
* cc_cong_signal. | * cc_cong_signal. | ||||
*/ | */ | ||||
tp->sackhint.prr_delivered = 0; | tp->sackhint.prr_delivered = | ||||
tp->sackhint.sacked_bytes; | |||||
tp->sackhint.prr_out = 0; | |||||
tp->sackhint.sack_bytes_rexmit = 0; | tp->sackhint.sack_bytes_rexmit = 0; | ||||
tp->sackhint.recover_fs = max(1, | tp->sackhint.recover_fs = max(1, | ||||
tp->snd_nxt - tp->snd_una); | tp->snd_nxt - tp->snd_una); | ||||
} | } | ||||
if (tp->t_flags & TF_SACK_PERMIT) { | if (tp->t_flags & TF_SACK_PERMIT) { | ||||
TCPSTAT_INC( | TCPSTAT_INC( | ||||
tcps_sack_recovery_episode); | tcps_sack_recovery_episode); | ||||
tp->snd_recover = tp->snd_nxt; | tp->snd_recover = tp->snd_nxt; | ||||
▲ Show 20 Lines • Show All 1,258 Lines • ▼ Show 20 Lines | tcp_prr_partialack(struct tcpcb *tp, struct tcphdr *th) | ||||
if (V_tcp_do_rfc6675_pipe) | if (V_tcp_do_rfc6675_pipe) | ||||
pipe = tcp_compute_pipe(tp); | pipe = tcp_compute_pipe(tp); | ||||
else | else | ||||
pipe = (tp->snd_nxt - tp->snd_fack) + tp->sackhint.sack_bytes_rexmit; | pipe = (tp->snd_nxt - tp->snd_fack) + tp->sackhint.sack_bytes_rexmit; | ||||
tp->sackhint.prr_delivered += del_data; | tp->sackhint.prr_delivered += del_data; | ||||
/* | /* | ||||
* Proportional Rate Reduction | * Proportional Rate Reduction | ||||
*/ | */ | ||||
if (pipe > tp->snd_ssthresh) { | if (pipe >= tp->snd_ssthresh) { | ||||
if (tp->sackhint.recover_fs == 0) | if (tp->sackhint.recover_fs == 0) | ||||
tp->sackhint.recover_fs = | tp->sackhint.recover_fs = | ||||
imax(1, tp->snd_nxt - tp->snd_una); | imax(1, tp->snd_nxt - tp->snd_una); | ||||
snd_cnt = howmany((long)tp->sackhint.prr_delivered * | snd_cnt = howmany((long)tp->sackhint.prr_delivered * | ||||
tp->snd_ssthresh, tp->sackhint.recover_fs) - | tp->snd_ssthresh, tp->sackhint.recover_fs) - | ||||
(tp->sackhint.sack_bytes_rexmit + | (tp->sackhint.sack_bytes_rexmit + | ||||
(tp->snd_nxt - tp->snd_recover)); | (tp->snd_nxt - tp->snd_recover) + | ||||
tp->sackhint.prr_out); | |||||
} else { | } else { | ||||
if (V_tcp_do_prr_conservative) | if (V_tcp_do_prr_conservative) | ||||
limit = tp->sackhint.prr_delivered - | limit = tp->sackhint.prr_delivered - | ||||
tp->sackhint.sack_bytes_rexmit; | tp->sackhint.sack_bytes_rexmit; | ||||
else | else | ||||
limit = imax(tp->sackhint.prr_delivered - | limit = imax(tp->sackhint.prr_delivered - | ||||
tp->sackhint.sack_bytes_rexmit, | tp->sackhint.sack_bytes_rexmit, | ||||
del_data) + maxseg; | del_data) + maxseg; | ||||
▲ Show 20 Lines • Show All 77 Lines • ▼ Show 20 Lines | tcp_compute_initwnd(uint32_t maxseg) | ||||
else { | else { | ||||
/* Per RFC5681 Section 3.1 */ | /* Per RFC5681 Section 3.1 */ | ||||
if (maxseg > 2190) | if (maxseg > 2190) | ||||
return (2 * maxseg); | return (2 * maxseg); | ||||
else if (maxseg > 1095) | else if (maxseg > 1095) | ||||
return (3 * maxseg); | return (3 * maxseg); | ||||
else | else | ||||
return (4 * maxseg); | return (4 * maxseg); | ||||
} | |||||
} | |||||
/* | |||||
* Lost Retransmission Detection | |||||
* Check is FACK is >= than the end of the leftmost hole. | |||||
* If yes, we restart sending from still existing holes, | |||||
* and adjust cwnd via the congestion control module. | |||||
*/ | |||||
void | |||||
tcp_lost_retransmission(struct tcpcb *tp, struct tcphdr *th) | |||||
{ | |||||
struct sackhole *temp; | |||||
uint32_t prev_ssthresh; | |||||
if (IN_RECOVERY(tp->t_flags) && | |||||
((temp = TAILQ_FIRST(&tp->snd_holes)) != NULL) && | |||||
SEQ_GEQ(tp->snd_fack, tp->snd_recover) && | |||||
SEQ_GEQ(temp->rxmit, temp->end) && | |||||
SEQ_GEQ(tp->snd_fack, temp->rxmit)) { | |||||
/* | |||||
* Start retransmissions from the first hole, and | |||||
* subsequently all other remaining holes, including | |||||
* those, which had been sent completely before. | |||||
*/ | |||||
tp->sackhint.nexthole = temp; | |||||
TAILQ_FOREACH(temp, &tp->snd_holes, scblink) { | |||||
if (SEQ_GEQ(tp->snd_fack, temp->rxmit) && | |||||
SEQ_GEQ(temp->rxmit, temp->end)) | |||||
temp->rxmit = temp->start; | |||||
} | |||||
/* | |||||
* Remember the amount of new data sent in the last window | |||||
* and the old ssthresh, to deduct the beta factor used | |||||
* by the CC module. Finally, set cwnd to ssthresh just | |||||
* prior to invoking another cwnd reduction by the CC | |||||
* module, to not shrink it excessively. | |||||
*/ | |||||
tp->sackhint.prr_out += tp->snd_max - tp->snd_recover; | |||||
prev_ssthresh = tp->snd_ssthresh; | |||||
tp->snd_cwnd = tp->snd_ssthresh; | |||||
/* | |||||
* Formally exit recovery, and let the CC module adjust | |||||
* ssthresh as intended. | |||||
*/ | |||||
EXIT_RECOVERY(tp->t_flags); | |||||
cc_cong_signal(tp, th, CC_NDUPACK); | |||||
/* | |||||
* Some magic: The ACKs received in the 2nd window already | |||||
* arrive at the PRR reduced rate. Thus, on a per-ACK basis, | |||||
* keep the same fraction of new segments vs. ACKs. | |||||
* This ratio is maintained, by reducing recover_fs by the | |||||
* same amount, that got applied to ssthresh by the CC module. | |||||
*/ | |||||
tp->sackhint.recover_fs = ((long)tp->sackhint.recover_fs * | |||||
tp->snd_ssthresh) / prev_ssthresh; | |||||
} | } | ||||
} | } |