Changeset View
Changeset View
Standalone View
Standalone View
sys/netinet/tcp_input.c
Show First 20 Lines • Show All 147 Lines • ▼ Show 20 Lines | SYSCTL_INT(_net_inet_tcp, OID_AUTO, delayed_ack, CTLFLAG_VNET | CTLFLAG_RW, | ||||
&VNET_NAME(tcp_delack_enabled), 0, | &VNET_NAME(tcp_delack_enabled), 0, | ||||
"Delay ACK to try and piggyback it onto a data packet"); | "Delay ACK to try and piggyback it onto a data packet"); | ||||
VNET_DEFINE(int, drop_synfin) = 0; | VNET_DEFINE(int, drop_synfin) = 0; | ||||
SYSCTL_INT(_net_inet_tcp, OID_AUTO, drop_synfin, CTLFLAG_VNET | CTLFLAG_RW, | SYSCTL_INT(_net_inet_tcp, OID_AUTO, drop_synfin, CTLFLAG_VNET | CTLFLAG_RW, | ||||
&VNET_NAME(drop_synfin), 0, | &VNET_NAME(drop_synfin), 0, | ||||
"Drop TCP packets with SYN+FIN set"); | "Drop TCP packets with SYN+FIN set"); | ||||
VNET_DEFINE(int, tcp_do_prr_conservative) = 0; | |||||
SYSCTL_INT(_net_inet_tcp, OID_AUTO, do_prr_conservative, CTLFLAG_VNET | CTLFLAG_RW, | |||||
&VNET_NAME(tcp_do_prr_conservative), 0, | |||||
"Do conservative Proportional Rate Reduction"); | |||||
VNET_DEFINE(int, tcp_do_prr) = 1; | |||||
SYSCTL_INT(_net_inet_tcp, OID_AUTO, do_prr, CTLFLAG_VNET | CTLFLAG_RW, | |||||
&VNET_NAME(tcp_do_prr), 1, | |||||
"Enable Proportional Rate Reduction per RFC 6937"); | |||||
VNET_DEFINE(int, tcp_do_newcwv) = 0; | VNET_DEFINE(int, tcp_do_newcwv) = 0; | ||||
SYSCTL_INT(_net_inet_tcp, OID_AUTO, newcwv, CTLFLAG_VNET | CTLFLAG_RW, | SYSCTL_INT(_net_inet_tcp, OID_AUTO, newcwv, CTLFLAG_VNET | CTLFLAG_RW, | ||||
&VNET_NAME(tcp_do_newcwv), 0, | &VNET_NAME(tcp_do_newcwv), 0, | ||||
"Enable New Congestion Window Validation per RFC7661"); | "Enable New Congestion Window Validation per RFC7661"); | ||||
VNET_DEFINE(int, tcp_do_rfc6675_pipe) = 0; | VNET_DEFINE(int, tcp_do_rfc6675_pipe) = 0; | ||||
SYSCTL_INT(_net_inet_tcp, OID_AUTO, rfc6675_pipe, CTLFLAG_VNET | CTLFLAG_RW, | SYSCTL_INT(_net_inet_tcp, OID_AUTO, rfc6675_pipe, CTLFLAG_VNET | CTLFLAG_RW, | ||||
&VNET_NAME(tcp_do_rfc6675_pipe), 0, | &VNET_NAME(tcp_do_rfc6675_pipe), 0, | ||||
▲ Show 20 Lines • Show All 2,337 Lines • ▼ Show 20 Lines | if (SEQ_LEQ(th->th_ack, tp->snd_una)) { | ||||
!sack_changed)) | !sack_changed)) | ||||
break; | break; | ||||
else if (!tcp_timer_active(tp, TT_REXMT)) | else if (!tcp_timer_active(tp, TT_REXMT)) | ||||
tp->t_dupacks = 0; | tp->t_dupacks = 0; | ||||
else if (++tp->t_dupacks > tcprexmtthresh || | else if (++tp->t_dupacks > tcprexmtthresh || | ||||
IN_FASTRECOVERY(tp->t_flags)) { | IN_FASTRECOVERY(tp->t_flags)) { | ||||
cc_ack_received(tp, th, nsegs, | cc_ack_received(tp, th, nsegs, | ||||
CC_DUPACK); | CC_DUPACK); | ||||
if ((tp->t_flags & TF_SACK_PERMIT) && | if (V_tcp_do_prr && | ||||
IN_FASTRECOVERY(tp->t_flags) && | |||||
(tp->t_flags & TF_SACK_PERMIT)) { | |||||
long snd_cnt = 0, limit = 0; | |||||
long del_data = 0, pipe = 0; | |||||
/* | |||||
* In a duplicate ACK del_data is only the | |||||
* diff_in_sack. If no SACK is used del_data | |||||
* will be 0. Pipe is the amount of data we | |||||
* estimate to be in the network. | |||||
*/ | |||||
del_data = tp->sackhint.delivered_data; | |||||
pipe = (tp->snd_nxt - tp->snd_fack) + | |||||
tp->sackhint.sack_bytes_rexmit; | |||||
tp->sackhint.prr_delivered += del_data; | |||||
if (pipe > tp->snd_ssthresh) { | |||||
snd_cnt = (tp->sackhint.prr_delivered * | |||||
tp->snd_ssthresh / tp->sackhint.recover_fs) + | |||||
1 - tp->sackhint.sack_bytes_rexmit; | |||||
} | |||||
else { | |||||
if (V_tcp_do_prr_conservative) | |||||
limit = tp->sackhint.prr_delivered - tp->sackhint.sack_bytes_rexmit; | |||||
else | |||||
if ((tp->sackhint.prr_delivered - tp->sackhint.sack_bytes_rexmit) > del_data) | |||||
limit = tp->sackhint.prr_delivered - tp->sackhint.sack_bytes_rexmit + | |||||
tp->t_maxseg; | |||||
else | |||||
limit = del_data + tp->t_maxseg; | |||||
if ((tp->snd_ssthresh - pipe) < limit) | |||||
snd_cnt = tp->snd_ssthresh - pipe; | |||||
else | |||||
snd_cnt = limit; | |||||
} | |||||
snd_cnt = max((snd_cnt / tp->t_maxseg), 0); | |||||
/* | |||||
* Send snd_cnt new data into the network in | |||||
* response to this ACK. If there is a going | |||||
* to be a SACK retransmission, adjust snd_cwnd | |||||
* accordingly. | |||||
*/ | |||||
tp->snd_cwnd = tp->snd_nxt - tp->snd_recover + | |||||
tp->sackhint.sack_bytes_rexmit + | |||||
(snd_cnt * tp->t_maxseg); | |||||
} | |||||
else if ((tp->t_flags & TF_SACK_PERMIT) && | |||||
IN_FASTRECOVERY(tp->t_flags)) { | IN_FASTRECOVERY(tp->t_flags)) { | ||||
int awnd; | int awnd; | ||||
/* | /* | ||||
* Compute the amount of data in flight first. | * Compute the amount of data in flight first. | ||||
* We can inject new data into the pipe iff | * We can inject new data into the pipe iff | ||||
rgrimes: Mixing white space fixes makes more lines of code to review. White space fixes should just be… | |||||
* we have less than 1/2 the original window's | * we have less than 1/2 the original window's | ||||
* worth of data in flight. | * worth of data in flight. | ||||
*/ | */ | ||||
if (V_tcp_do_rfc6675_pipe) | if (V_tcp_do_rfc6675_pipe) | ||||
awnd = tcp_compute_pipe(tp); | awnd = tcp_compute_pipe(tp); | ||||
else | else | ||||
awnd = (tp->snd_nxt - tp->snd_fack) + | awnd = (tp->snd_nxt - tp->snd_fack) + | ||||
tp->sackhint.sack_bytes_rexmit; | tp->sackhint.sack_bytes_rexmit; | ||||
if (awnd < tp->snd_ssthresh) { | if (awnd < tp->snd_ssthresh) { | ||||
tp->snd_cwnd += maxseg; | tp->snd_cwnd += maxseg; | ||||
if (tp->snd_cwnd > tp->snd_ssthresh) | if (tp->snd_cwnd > tp->snd_ssthresh) | ||||
tp->snd_cwnd = tp->snd_ssthresh; | tp->snd_cwnd = tp->snd_ssthresh; | ||||
} | } | ||||
} else | } else | ||||
tp->snd_cwnd += maxseg; | tp->snd_cwnd += maxseg; | ||||
(void) tp->t_fb->tfb_tcp_output(tp); | (void) tp->t_fb->tfb_tcp_output(tp); | ||||
goto drop; | goto drop; | ||||
} else if (tp->t_dupacks == tcprexmtthresh) { | } else if (tp->t_dupacks == tcprexmtthresh) { | ||||
tcp_seq onxt = tp->snd_nxt; | tcp_seq onxt = tp->snd_nxt; | ||||
/* | /* | ||||
* If we're doing sack, check to | * If we're doing sack, or prr, check | ||||
* see if we're already in sack | * to see if we're already in sack | ||||
* recovery. If we're not doing sack, | * recovery. If we're not doing sack, | ||||
* check to see if we're in newreno | * check to see if we're in newreno | ||||
* recovery. | * recovery. | ||||
*/ | */ | ||||
if (tp->t_flags & TF_SACK_PERMIT) { | if (V_tcp_do_prr || | ||||
(tp->t_flags & TF_SACK_PERMIT)) { | |||||
if (IN_FASTRECOVERY(tp->t_flags)) { | if (IN_FASTRECOVERY(tp->t_flags)) { | ||||
tp->t_dupacks = 0; | tp->t_dupacks = 0; | ||||
break; | break; | ||||
} | } | ||||
Done Inline ActionsDuplicated code from below copied into above should be reduciable by use of complicated if expression, which would then more closely match the comment above. if (V_tcp_do_prf || tp->t_flags & TF_SACK_PERMIT) { rgrimes: Duplicated code from below copied into above should be reduciable by use of complicated if… | |||||
} else { | } else { | ||||
if (SEQ_LEQ(th->th_ack, | if (SEQ_LEQ(th->th_ack, | ||||
tp->snd_recover)) { | tp->snd_recover)) { | ||||
tp->t_dupacks = 0; | tp->t_dupacks = 0; | ||||
break; | break; | ||||
} | } | ||||
} | } | ||||
/* Congestion signal before ack. */ | /* Congestion signal before ack. */ | ||||
cc_cong_signal(tp, th, CC_NDUPACK); | cc_cong_signal(tp, th, CC_NDUPACK); | ||||
cc_ack_received(tp, th, nsegs, | cc_ack_received(tp, th, nsegs, | ||||
CC_DUPACK); | CC_DUPACK); | ||||
tcp_timer_activate(tp, TT_REXMT, 0); | tcp_timer_activate(tp, TT_REXMT, 0); | ||||
tp->t_rtttime = 0; | tp->t_rtttime = 0; | ||||
if (V_tcp_do_prr) { | |||||
/* | |||||
* snd_ssthresh is already updated by | |||||
* cc_cong_signal. | |||||
*/ | |||||
tp->sackhint.prr_delivered = 0; | |||||
tp->sackhint.sack_bytes_rexmit = 0; | |||||
if (!(tp->sackhint.recover_fs = tp->snd_nxt - tp->snd_una)) | |||||
tp->sackhint.recover_fs = 1; | |||||
} | |||||
if (tp->t_flags & TF_SACK_PERMIT) { | if (tp->t_flags & TF_SACK_PERMIT) { | ||||
TCPSTAT_INC( | TCPSTAT_INC( | ||||
tcps_sack_recovery_episode); | tcps_sack_recovery_episode); | ||||
tp->sack_newdata = tp->snd_nxt; | tp->sack_newdata = tp->snd_nxt; | ||||
tp->snd_cwnd = maxseg; | tp->snd_cwnd = maxseg; | ||||
(void) tp->t_fb->tfb_tcp_output(tp); | (void) tp->t_fb->tfb_tcp_output(tp); | ||||
goto drop; | goto drop; | ||||
} | } | ||||
▲ Show 20 Lines • Show All 83 Lines • ▼ Show 20 Lines | #endif | ||||
/* | /* | ||||
* If the congestion window was inflated to account | * If the congestion window was inflated to account | ||||
* for the other side's cached packets, retract it. | * for the other side's cached packets, retract it. | ||||
*/ | */ | ||||
if (IN_FASTRECOVERY(tp->t_flags)) { | if (IN_FASTRECOVERY(tp->t_flags)) { | ||||
if (SEQ_LT(th->th_ack, tp->snd_recover)) { | if (SEQ_LT(th->th_ack, tp->snd_recover)) { | ||||
if (tp->t_flags & TF_SACK_PERMIT) | if (tp->t_flags & TF_SACK_PERMIT) | ||||
if (V_tcp_do_prr) | |||||
tcp_prr_partialack(tp, th); | |||||
else | |||||
tcp_sack_partialack(tp, th); | tcp_sack_partialack(tp, th); | ||||
else | else | ||||
tcp_newreno_partial_ack(tp, th); | tcp_newreno_partial_ack(tp, th); | ||||
} else | } else | ||||
cc_post_recovery(tp, th); | cc_post_recovery(tp, th); | ||||
} | } | ||||
/* | /* | ||||
* If we reach this point, ACK is not a duplicate, | * If we reach this point, ACK is not a duplicate, | ||||
* i.e., it ACKs something we sent. | * i.e., it ACKs something we sent. | ||||
▲ Show 20 Lines • Show All 1,086 Lines • ▼ Show 20 Lines | #endif | ||||
if (maxmtu && thcmtu) | if (maxmtu && thcmtu) | ||||
mss = min(maxmtu, thcmtu) - min_protoh; | mss = min(maxmtu, thcmtu) - min_protoh; | ||||
else if (maxmtu || thcmtu) | else if (maxmtu || thcmtu) | ||||
mss = max(maxmtu, thcmtu) - min_protoh; | mss = max(maxmtu, thcmtu) - min_protoh; | ||||
return (mss); | return (mss); | ||||
} | } | ||||
void | |||||
tcp_prr_partialack(struct tcpcb *tp, struct tcphdr *th) | |||||
{ | |||||
long snd_cnt = 0, limit = 0, del_data = 0, pipe = 0; | |||||
INP_WLOCK_ASSERT(tp->t_inpcb); | |||||
tcp_timer_activate(tp, TT_REXMT, 0); | |||||
tp->t_rtttime = 0; | |||||
/* | |||||
* Compute the amount of data that this ACK is indicating | |||||
* (del_data) and an estimate of how many bytes are in the | |||||
* network. | |||||
*/ | |||||
if (SEQ_GEQ(th->th_ack, tp->snd_una)) | |||||
del_data = BYTES_THIS_ACK(tp, th); | |||||
del_data += tp->sackhint.delivered_data; | |||||
pipe = (tp->snd_nxt - tp->snd_fack) + tp->sackhint.sack_bytes_rexmit; | |||||
tp->sackhint.prr_delivered += del_data; | |||||
/* | |||||
* Proportional Rate Reduction | |||||
*/ | |||||
if (pipe > tp->snd_ssthresh) | |||||
snd_cnt = (tp->sackhint.prr_delivered * tp->snd_ssthresh / tp->sackhint.recover_fs) - | |||||
tp->sackhint.sack_bytes_rexmit; | |||||
else { | |||||
if (V_tcp_do_prr_conservative) | |||||
limit = tp->sackhint.prr_delivered - tp->sackhint.sack_bytes_rexmit; | |||||
else | |||||
if ((tp->sackhint.prr_delivered - tp->sackhint.sack_bytes_rexmit) > del_data) | |||||
limit = tp->sackhint.prr_delivered - tp->sackhint.sack_bytes_rexmit + tp->t_maxseg; | |||||
else | |||||
limit = del_data + tp->t_maxseg; | |||||
snd_cnt = min((tp->snd_ssthresh - pipe), limit); | |||||
} | |||||
snd_cnt = max((snd_cnt / tp->t_maxseg), 0); | |||||
/* | |||||
* Send snd_cnt new data into the network in response to this ack. | |||||
* If there is going to be a SACK retransmission, adjust snd_cwnd | |||||
* accordingly. | |||||
*/ | |||||
tp->snd_cwnd = tp->snd_nxt - tp->snd_recover + | |||||
tp->sackhint.sack_bytes_rexmit + (snd_cnt * tp->t_maxseg); | |||||
tp->t_flags |= TF_ACKNOW; | |||||
(void) tcp_output(tp); | |||||
} | |||||
/* | /* | ||||
* On a partial ack arrives, force the retransmission of the | * On a partial ack arrives, force the retransmission of the | ||||
* next unacknowledged segment. Do not clear tp->t_dupacks. | * next unacknowledged segment. Do not clear tp->t_dupacks. | ||||
* By setting snd_nxt to ti_ack, this forces retransmission timer to | * By setting snd_nxt to ti_ack, this forces retransmission timer to | ||||
* be started again. | * be started again. | ||||
*/ | */ | ||||
void | void | ||||
▲ Show 20 Lines • Show All 66 Lines • Show Last 20 Lines |
Mixing white space fixes makes more lines of code to review. White space fixes should just be committed individually and ASAP.