Changeset View
Changeset View
Standalone View
Standalone View
sys/netinet/tcp_input.c
Show First 20 Lines • Show All 148 Lines • ▼ Show 20 Lines | SYSCTL_INT(_net_inet_tcp, OID_AUTO, delayed_ack, CTLFLAG_VNET | CTLFLAG_RW, | ||||
&VNET_NAME(tcp_delack_enabled), 0, | &VNET_NAME(tcp_delack_enabled), 0, | ||||
"Delay ACK to try and piggyback it onto a data packet"); | "Delay ACK to try and piggyback it onto a data packet"); | ||||
VNET_DEFINE(int, drop_synfin) = 0; | VNET_DEFINE(int, drop_synfin) = 0; | ||||
SYSCTL_INT(_net_inet_tcp, OID_AUTO, drop_synfin, CTLFLAG_VNET | CTLFLAG_RW, | SYSCTL_INT(_net_inet_tcp, OID_AUTO, drop_synfin, CTLFLAG_VNET | CTLFLAG_RW, | ||||
&VNET_NAME(drop_synfin), 0, | &VNET_NAME(drop_synfin), 0, | ||||
"Drop TCP packets with SYN+FIN set"); | "Drop TCP packets with SYN+FIN set"); | ||||
VNET_DEFINE(int, tcp_do_prr_conservative) = 0; | |||||
SYSCTL_INT(_net_inet_tcp, OID_AUTO, do_prr_conservative, CTLFLAG_VNET | CTLFLAG_RW, | |||||
&VNET_NAME(tcp_do_prr_conservative), 0, | |||||
"Do conservative Proportional Rate Reduction"); | |||||
VNET_DEFINE(int, tcp_do_prr) = 1; | VNET_DEFINE(int, tcp_do_prr) = 1; | ||||
SYSCTL_INT(_net_inet_tcp, OID_AUTO, do_prr, CTLFLAG_VNET | CTLFLAG_RW, | SYSCTL_INT(_net_inet_tcp, OID_AUTO, do_prr, CTLFLAG_VNET | CTLFLAG_RW, | ||||
&VNET_NAME(tcp_do_prr), 1, | &VNET_NAME(tcp_do_prr), 1, | ||||
"Enable Proportional Rate Reduction per RFC 6937"); | "Enable Proportional Rate Reduction per RFC 6937"); | ||||
VNET_DEFINE(int, tcp_do_newcwv) = 0; | VNET_DEFINE(int, tcp_do_newcwv) = 0; | ||||
SYSCTL_INT(_net_inet_tcp, OID_AUTO, newcwv, CTLFLAG_VNET | CTLFLAG_RW, | SYSCTL_INT(_net_inet_tcp, OID_AUTO, newcwv, CTLFLAG_VNET | CTLFLAG_RW, | ||||
&VNET_NAME(tcp_do_newcwv), 0, | &VNET_NAME(tcp_do_newcwv), 0, | ||||
▲ Show 20 Lines • Show All 2,420 Lines • ▼ Show 20 Lines | if (SEQ_LEQ(th->th_ack, tp->snd_una)) { | ||||
* 2) Acks with SACK but without any new SACK | * 2) Acks with SACK but without any new SACK | ||||
* information in them. These could result from | * information in them. These could result from | ||||
* any anomaly in the network like a switch | * any anomaly in the network like a switch | ||||
* duplicating packets or a possible DoS attack. | * duplicating packets or a possible DoS attack. | ||||
*/ | */ | ||||
if (th->th_ack != tp->snd_una || | if (th->th_ack != tp->snd_una || | ||||
((tp->t_flags & TF_SACK_PERMIT) && | ((tp->t_flags & TF_SACK_PERMIT) && | ||||
(to.to_flags & TOF_SACK) && | (to.to_flags & TOF_SACK) && | ||||
!sack_changed)) | (sack_changed == SACK_NOCHANGE))) | ||||
break; | break; | ||||
else if (!tcp_timer_active(tp, TT_REXMT)) | else if (!tcp_timer_active(tp, TT_REXMT)) | ||||
tp->t_dupacks = 0; | tp->t_dupacks = 0; | ||||
else if (++tp->t_dupacks > tcprexmtthresh || | else if (++tp->t_dupacks > tcprexmtthresh || | ||||
IN_FASTRECOVERY(tp->t_flags)) { | IN_FASTRECOVERY(tp->t_flags)) { | ||||
cc_ack_received(tp, th, nsegs, | cc_ack_received(tp, th, nsegs, | ||||
CC_DUPACK); | CC_DUPACK); | ||||
if (V_tcp_do_prr && | if (V_tcp_do_prr && | ||||
IN_FASTRECOVERY(tp->t_flags) && | IN_FASTRECOVERY(tp->t_flags) && | ||||
(tp->t_flags & TF_SACK_PERMIT)) { | (tp->t_flags & TF_SACK_PERMIT)) { | ||||
tcp_do_prr_ack(tp, th, &to); | /* | ||||
* While dealing with DupAcks, | |||||
* always use PRR-CRB | |||||
*/ | |||||
tcp_do_prr_ack(tp, th, &to, SACK_NEWLOSS); | |||||
} else if ((tp->t_flags & TF_SACK_PERMIT) && | } else if ((tp->t_flags & TF_SACK_PERMIT) && | ||||
(to.to_flags & TOF_SACK) && | (to.to_flags & TOF_SACK) && | ||||
IN_FASTRECOVERY(tp->t_flags)) { | IN_FASTRECOVERY(tp->t_flags)) { | ||||
int awnd; | int awnd; | ||||
/* | /* | ||||
* Compute the amount of data in flight first. | * Compute the amount of data in flight first. | ||||
* We can inject new data into the pipe iff | * We can inject new data into the pipe iff | ||||
▲ Show 20 Lines • Show All 158 Lines • ▼ Show 20 Lines | enter_recovery: | ||||
* sack_changed tracks all changes to the SACK | * sack_changed tracks all changes to the SACK | ||||
* scoreboard, including when partial ACKs without | * scoreboard, including when partial ACKs without | ||||
* SACK options are received, and clear the scoreboard | * SACK options are received, and clear the scoreboard | ||||
* from the left side. Such partial ACKs should not be | * from the left side. Such partial ACKs should not be | ||||
* counted as dupacks here. | * counted as dupacks here. | ||||
*/ | */ | ||||
if ((tp->t_flags & TF_SACK_PERMIT) && | if ((tp->t_flags & TF_SACK_PERMIT) && | ||||
(to.to_flags & TOF_SACK) && | (to.to_flags & TOF_SACK) && | ||||
sack_changed) { | !(sack_changed == SACK_NOCHANGE)) { | ||||
tp->t_dupacks++; | tp->t_dupacks++; | ||||
/* limit overhead by setting maxseg last */ | /* limit overhead by setting maxseg last */ | ||||
if (!IN_FASTRECOVERY(tp->t_flags) && | if (!IN_FASTRECOVERY(tp->t_flags) && | ||||
(tp->sackhint.sacked_bytes > | (tp->sackhint.sacked_bytes > | ||||
((tcprexmtthresh - 1) * | ((tcprexmtthresh - 1) * | ||||
(maxseg = tcp_maxseg(tp))))) { | (maxseg = tcp_maxseg(tp))))) { | ||||
goto enter_recovery; | goto enter_recovery; | ||||
} | } | ||||
Show All 9 Lines | resume_partialack: | ||||
* for the other side's cached packets, retract it. | * for the other side's cached packets, retract it. | ||||
*/ | */ | ||||
if (IN_FASTRECOVERY(tp->t_flags)) { | if (IN_FASTRECOVERY(tp->t_flags)) { | ||||
if (SEQ_LT(th->th_ack, tp->snd_recover)) { | if (SEQ_LT(th->th_ack, tp->snd_recover)) { | ||||
if (tp->t_flags & TF_SACK_PERMIT) | if (tp->t_flags & TF_SACK_PERMIT) | ||||
if (V_tcp_do_prr && to.to_flags & TOF_SACK) { | if (V_tcp_do_prr && to.to_flags & TOF_SACK) { | ||||
tcp_timer_activate(tp, TT_REXMT, 0); | tcp_timer_activate(tp, TT_REXMT, 0); | ||||
tp->t_rtttime = 0; | tp->t_rtttime = 0; | ||||
tcp_do_prr_ack(tp, th, &to); | tcp_do_prr_ack(tp, th, &to, sack_changed); | ||||
tp->t_flags |= TF_ACKNOW; | tp->t_flags |= TF_ACKNOW; | ||||
(void) tcp_output(tp); | (void) tcp_output(tp); | ||||
} else | } else | ||||
tcp_sack_partialack(tp, th); | tcp_sack_partialack(tp, th); | ||||
else | else | ||||
tcp_newreno_partial_ack(tp, th); | tcp_newreno_partial_ack(tp, th); | ||||
} else | } else | ||||
cc_post_recovery(tp, th); | cc_post_recovery(tp, th); | ||||
} else if (IN_CONGRECOVERY(tp->t_flags)) { | } else if (IN_CONGRECOVERY(tp->t_flags)) { | ||||
if (SEQ_LT(th->th_ack, tp->snd_recover)) { | if (SEQ_LT(th->th_ack, tp->snd_recover)) { | ||||
if (V_tcp_do_prr) { | if (V_tcp_do_prr) { | ||||
tp->sackhint.delivered_data = BYTES_THIS_ACK(tp, th); | tp->sackhint.delivered_data = BYTES_THIS_ACK(tp, th); | ||||
tp->snd_fack = th->th_ack; | tp->snd_fack = th->th_ack; | ||||
tcp_do_prr_ack(tp, th, &to); | /* | ||||
* During ECN cwnd reduction | |||||
* always use PRR-SSRB | |||||
*/ | |||||
tcp_do_prr_ack(tp, th, &to, SACK_CHANGE); | |||||
(void) tcp_output(tp); | (void) tcp_output(tp); | ||||
} | } | ||||
} else | } else | ||||
cc_post_recovery(tp, th); | cc_post_recovery(tp, th); | ||||
} | } | ||||
/* | /* | ||||
* If we reach this point, ACK is not a duplicate, | * If we reach this point, ACK is not a duplicate, | ||||
* i.e., it ACKs something we sent. | * i.e., it ACKs something we sent. | ||||
▲ Show 20 Lines • Show All 1,114 Lines • ▼ Show 20 Lines | if (maxmtu && thcmtu) | ||||
mss = min(maxmtu, thcmtu) - min_protoh; | mss = min(maxmtu, thcmtu) - min_protoh; | ||||
else if (maxmtu || thcmtu) | else if (maxmtu || thcmtu) | ||||
mss = max(maxmtu, thcmtu) - min_protoh; | mss = max(maxmtu, thcmtu) - min_protoh; | ||||
return (mss); | return (mss); | ||||
} | } | ||||
void | void | ||||
tcp_do_prr_ack(struct tcpcb *tp, struct tcphdr *th, struct tcpopt *to) | tcp_do_prr_ack(struct tcpcb *tp, struct tcphdr *th, struct tcpopt *to, int sack_changed) | ||||
{ | { | ||||
int snd_cnt = 0, limit = 0, del_data = 0, pipe = 0; | int snd_cnt = 0, limit = 0, del_data = 0, pipe = 0; | ||||
int maxseg = tcp_maxseg(tp); | int maxseg = tcp_maxseg(tp); | ||||
INP_WLOCK_ASSERT(tp->t_inpcb); | INP_WLOCK_ASSERT(tp->t_inpcb); | ||||
/* | /* | ||||
* Compute the amount of data that this ACK is indicating | * Compute the amount of data that this ACK is indicating | ||||
* (del_data) and an estimate of how many bytes are in the | * (del_data) and an estimate of how many bytes are in the | ||||
* network. | * network. | ||||
*/ | */ | ||||
del_data = tp->sackhint.delivered_data; | del_data = tp->sackhint.delivered_data; | ||||
if (V_tcp_do_newsack) | if (V_tcp_do_newsack) | ||||
pipe = tcp_compute_pipe(tp); | pipe = tcp_compute_pipe(tp); | ||||
else | else | ||||
pipe = (tp->snd_nxt - tp->snd_fack) + tp->sackhint.sack_bytes_rexmit; | pipe = (tp->snd_nxt - tp->snd_fack) + | ||||
tp->sackhint.sack_bytes_rexmit; | |||||
tp->sackhint.prr_delivered += del_data; | tp->sackhint.prr_delivered += del_data; | ||||
/* | /* | ||||
* Proportional Rate Reduction | * Proportional Rate Reduction | ||||
*/ | */ | ||||
if (pipe >= tp->snd_ssthresh) { | if (pipe >= tp->snd_ssthresh) { | ||||
if (tp->sackhint.recover_fs == 0) | if (tp->sackhint.recover_fs == 0) | ||||
tp->sackhint.recover_fs = | tp->sackhint.recover_fs = | ||||
imax(1, tp->snd_nxt - tp->snd_una); | imax(1, tp->snd_nxt - tp->snd_una); | ||||
snd_cnt = howmany((long)tp->sackhint.prr_delivered * | snd_cnt = howmany((long)tp->sackhint.prr_delivered * | ||||
tp->snd_ssthresh, tp->sackhint.recover_fs) - | tp->snd_ssthresh, tp->sackhint.recover_fs) - | ||||
tp->sackhint.prr_out; | tp->sackhint.prr_out; | ||||
} else { | } else { | ||||
if (V_tcp_do_prr_conservative) | /* | ||||
* PRR 6937bis heuristic: | |||||
* - A partial ack without SACK block beneath snd_recover | |||||
* indicates further loss. | |||||
* - An SACK scoreboard update adding a new hole indicates | |||||
* further loss, so be conservative and send at most one | |||||
* segment. | |||||
* - Prevent ACK splitting attacks, by being conservative | |||||
* when no new data is acked. | |||||
*/ | |||||
if ((sack_changed == SACK_NEWLOSS) || (del_data == 0)) | |||||
limit = tp->sackhint.prr_delivered - | limit = tp->sackhint.prr_delivered - | ||||
tp->sackhint.prr_out; | tp->sackhint.prr_out; | ||||
else | else | ||||
limit = imax(tp->sackhint.prr_delivered - | limit = imax(tp->sackhint.prr_delivered - | ||||
tp->sackhint.prr_out, del_data) + | tp->sackhint.prr_out, del_data) + | ||||
maxseg; | maxseg; | ||||
snd_cnt = imin((tp->snd_ssthresh - pipe), limit); | snd_cnt = imin((tp->snd_ssthresh - pipe), limit); | ||||
} | } | ||||
snd_cnt = imax(snd_cnt, 0) / maxseg; | snd_cnt = imax(snd_cnt, 0) / maxseg; | ||||
/* | /* | ||||
* Send snd_cnt new data into the network in response to this ack. | * Send snd_cnt new data into the network in response to this ack. | ||||
* If there is going to be a SACK retransmission, adjust snd_cwnd | * If there is going to be a SACK retransmission, adjust snd_cwnd | ||||
* accordingly. | * accordingly. | ||||
*/ | */ | ||||
if (IN_FASTRECOVERY(tp->t_flags)) { | if (IN_FASTRECOVERY(tp->t_flags)) { | ||||
tp->snd_cwnd = imax(maxseg, tp->snd_nxt - tp->snd_recover + | tp->snd_cwnd = tp->snd_nxt - tp->snd_recover + | ||||
tp->sackhint.sack_bytes_rexmit + (snd_cnt * maxseg)); | tp->sackhint.sack_bytes_rexmit + | ||||
(snd_cnt * maxseg); | |||||
} else if (IN_CONGRECOVERY(tp->t_flags)) | } else if (IN_CONGRECOVERY(tp->t_flags)) | ||||
tp->snd_cwnd = imax(maxseg, pipe - del_data + | tp->snd_cwnd = pipe - del_data + (snd_cnt * maxseg); | ||||
(snd_cnt * maxseg)); | tp->snd_cwnd = imax(maxseg, tp->snd_cwnd); | ||||
} | } | ||||
/* | /* | ||||
* On a partial ack arrives, force the retransmission of the | * On a partial ack arrives, force the retransmission of the | ||||
* next unacknowledged segment. Do not clear tp->t_dupacks. | * next unacknowledged segment. Do not clear tp->t_dupacks. | ||||
* By setting snd_nxt to ti_ack, this forces retransmission timer to | * By setting snd_nxt to ti_ack, this forces retransmission timer to | ||||
* be started again. | * be started again. | ||||
*/ | */ | ||||
▲ Show 20 Lines • Show All 67 Lines • Show Last 20 Lines |