Changeset View
Changeset View
Standalone View
Standalone View
sys/netinet/tcp_input.c
Show First 20 Lines • Show All 151 Lines • ▼ Show 20 Lines | SYSCTL_INT(_net_inet_tcp, OID_AUTO, delayed_ack, CTLFLAG_VNET | CTLFLAG_RW, | ||||
&VNET_NAME(tcp_delack_enabled), 0, | &VNET_NAME(tcp_delack_enabled), 0, | ||||
"Delay ACK to try and piggyback it onto a data packet"); | "Delay ACK to try and piggyback it onto a data packet"); | ||||
VNET_DEFINE(int, drop_synfin) = 0; | VNET_DEFINE(int, drop_synfin) = 0; | ||||
SYSCTL_INT(_net_inet_tcp, OID_AUTO, drop_synfin, CTLFLAG_VNET | CTLFLAG_RW, | SYSCTL_INT(_net_inet_tcp, OID_AUTO, drop_synfin, CTLFLAG_VNET | CTLFLAG_RW, | ||||
&VNET_NAME(drop_synfin), 0, | &VNET_NAME(drop_synfin), 0, | ||||
"Drop TCP packets with SYN+FIN set"); | "Drop TCP packets with SYN+FIN set"); | ||||
VNET_DEFINE(int, tcp_do_prr_conservative) = 0; | |||||
SYSCTL_INT(_net_inet_tcp, OID_AUTO, do_prr_conservative, CTLFLAG_VNET | CTLFLAG_RW, | |||||
&VNET_NAME(tcp_do_prr_conservative), 0, | |||||
"Do conservative Proportional Rate Reduction"); | |||||
VNET_DEFINE(int, tcp_do_prr) = 1; | VNET_DEFINE(int, tcp_do_prr) = 1; | ||||
SYSCTL_INT(_net_inet_tcp, OID_AUTO, do_prr, CTLFLAG_VNET | CTLFLAG_RW, | SYSCTL_INT(_net_inet_tcp, OID_AUTO, do_prr, CTLFLAG_VNET | CTLFLAG_RW, | ||||
&VNET_NAME(tcp_do_prr), 1, | &VNET_NAME(tcp_do_prr), 1, | ||||
"Enable Proportional Rate Reduction per RFC 6937"); | "Enable Proportional Rate Reduction per RFC 6937"); | ||||
VNET_DEFINE(int, tcp_do_lrd) = 0; | VNET_DEFINE(int, tcp_do_lrd) = 0; | ||||
SYSCTL_INT(_net_inet_tcp, OID_AUTO, do_lrd, CTLFLAG_VNET | CTLFLAG_RW, | SYSCTL_INT(_net_inet_tcp, OID_AUTO, do_lrd, CTLFLAG_VNET | CTLFLAG_RW, | ||||
&VNET_NAME(tcp_do_lrd), 1, | &VNET_NAME(tcp_do_lrd), 1, | ||||
▲ Show 20 Lines • Show All 1,311 Lines • ▼ Show 20 Lines | tcp_handle_wakeup(struct tcpcb *tp) | ||||
} | } | ||||
} | } | ||||
void | void | ||||
tcp_do_segment(struct mbuf *m, struct tcphdr *th, struct socket *so, | tcp_do_segment(struct mbuf *m, struct tcphdr *th, struct socket *so, | ||||
struct tcpcb *tp, int drop_hdrlen, int tlen, uint8_t iptos) | struct tcpcb *tp, int drop_hdrlen, int tlen, uint8_t iptos) | ||||
{ | { | ||||
uint16_t thflags; | uint16_t thflags; | ||||
int acked, ourfinisacked, needoutput = 0, sack_changed; | int acked, ourfinisacked, needoutput = 0; | ||||
sackstatus_t sack_changed; | |||||
int rstreason, todrop, win, incforsyn = 0; | int rstreason, todrop, win, incforsyn = 0; | ||||
uint32_t tiwin; | uint32_t tiwin; | ||||
uint16_t nsegs; | uint16_t nsegs; | ||||
char *s; | char *s; | ||||
struct inpcb *inp = tptoinpcb(tp); | struct inpcb *inp = tptoinpcb(tp); | ||||
struct in_conninfo *inc = &inp->inp_inc; | struct in_conninfo *inc = &inp->inp_inc; | ||||
struct mbuf *mfree; | struct mbuf *mfree; | ||||
struct tcpopt to; | struct tcpopt to; | ||||
int tfo_syn; | int tfo_syn; | ||||
u_int maxseg; | u_int maxseg; | ||||
thflags = tcp_get_flags(th); | thflags = tcp_get_flags(th); | ||||
tp->sackhint.last_sack_ack = 0; | tp->sackhint.last_sack_ack = 0; | ||||
sack_changed = 0; | sack_changed = SACK_NOCHANGE; | ||||
nsegs = max(1, m->m_pkthdr.lro_nsegs); | nsegs = max(1, m->m_pkthdr.lro_nsegs); | ||||
NET_EPOCH_ASSERT(); | NET_EPOCH_ASSERT(); | ||||
INP_WLOCK_ASSERT(inp); | INP_WLOCK_ASSERT(inp); | ||||
KASSERT(tp->t_state > TCPS_LISTEN, ("%s: TCPS_LISTEN", | KASSERT(tp->t_state > TCPS_LISTEN, ("%s: TCPS_LISTEN", | ||||
__func__)); | __func__)); | ||||
KASSERT(tp->t_state != TCPS_TIME_WAIT, ("%s: TCPS_TIME_WAIT", | KASSERT(tp->t_state != TCPS_TIME_WAIT, ("%s: TCPS_TIME_WAIT", | ||||
__func__)); | __func__)); | ||||
▲ Show 20 Lines • Show All 1,022 Lines • ▼ Show 20 Lines | if (SEQ_LEQ(th->th_ack, tp->snd_una)) { | ||||
* 1) Old acks | * 1) Old acks | ||||
* 2) Acks with SACK but without any new SACK | * 2) Acks with SACK but without any new SACK | ||||
* information in them. These could result from | * information in them. These could result from | ||||
* any anomaly in the network like a switch | * any anomaly in the network like a switch | ||||
* duplicating packets or a possible DoS attack. | * duplicating packets or a possible DoS attack. | ||||
*/ | */ | ||||
if (th->th_ack != tp->snd_una || | if (th->th_ack != tp->snd_una || | ||||
(tcp_is_sack_recovery(tp, &to) && | (tcp_is_sack_recovery(tp, &to) && | ||||
!sack_changed)) | (sack_changed == SACK_NOCHANGE))) | ||||
break; | break; | ||||
else if (!tcp_timer_active(tp, TT_REXMT)) | else if (!tcp_timer_active(tp, TT_REXMT)) | ||||
tp->t_dupacks = 0; | tp->t_dupacks = 0; | ||||
else if (++tp->t_dupacks > tcprexmtthresh || | else if (++tp->t_dupacks > tcprexmtthresh || | ||||
IN_FASTRECOVERY(tp->t_flags)) { | IN_FASTRECOVERY(tp->t_flags)) { | ||||
cc_ack_received(tp, th, nsegs, | cc_ack_received(tp, th, nsegs, | ||||
CC_DUPACK); | CC_DUPACK); | ||||
if (V_tcp_do_prr && | if (V_tcp_do_prr && | ||||
IN_FASTRECOVERY(tp->t_flags)) { | IN_FASTRECOVERY(tp->t_flags) && | ||||
tcp_do_prr_ack(tp, th, &to); | (tp->t_flags & TF_SACK_PERMIT)) { | ||||
/* | |||||
* While dealing with DupAcks, | |||||
* always use PRR-CRB | |||||
*/ | |||||
tcp_do_prr_ack(tp, th, &to, SACK_NEWLOSS); | |||||
} else if (tcp_is_sack_recovery(tp, &to) && | } else if (tcp_is_sack_recovery(tp, &to) && | ||||
IN_FASTRECOVERY(tp->t_flags)) { | IN_FASTRECOVERY(tp->t_flags)) { | ||||
int awnd; | int awnd; | ||||
/* | /* | ||||
* Compute the amount of data in flight first. | * Compute the amount of data in flight first. | ||||
* We can inject new data into the pipe iff | * We can inject new data into the pipe iff | ||||
* we have less than 1/2 the original window's | * we have less than 1/2 the original window's | ||||
▲ Show 20 Lines • Show All 162 Lines • ▼ Show 20 Lines | enter_recovery: | ||||
* counter as per rfc6675. The variable | * counter as per rfc6675. The variable | ||||
* sack_changed tracks all changes to the SACK | * sack_changed tracks all changes to the SACK | ||||
* scoreboard, including when partial ACKs without | * scoreboard, including when partial ACKs without | ||||
* SACK options are received, and clear the scoreboard | * SACK options are received, and clear the scoreboard | ||||
* from the left side. Such partial ACKs should not be | * from the left side. Such partial ACKs should not be | ||||
* counted as dupacks here. | * counted as dupacks here. | ||||
*/ | */ | ||||
if (tcp_is_sack_recovery(tp, &to) && | if (tcp_is_sack_recovery(tp, &to) && | ||||
sack_changed) { | (sack_changed != SACK_NOCHANGE)) { | ||||
tp->t_dupacks++; | tp->t_dupacks++; | ||||
/* limit overhead by setting maxseg last */ | /* limit overhead by setting maxseg last */ | ||||
if (!IN_FASTRECOVERY(tp->t_flags) && | if (!IN_FASTRECOVERY(tp->t_flags) && | ||||
(tp->sackhint.sacked_bytes > | (tp->sackhint.sacked_bytes > | ||||
((tcprexmtthresh - 1) * | ((tcprexmtthresh - 1) * | ||||
(maxseg = tcp_maxseg(tp))))) { | (maxseg = tcp_maxseg(tp))))) { | ||||
goto enter_recovery; | goto enter_recovery; | ||||
} | } | ||||
Show All 9 Lines | resume_partialack: | ||||
* for the other side's cached packets, retract it. | * for the other side's cached packets, retract it. | ||||
*/ | */ | ||||
if (IN_FASTRECOVERY(tp->t_flags)) { | if (IN_FASTRECOVERY(tp->t_flags)) { | ||||
if (SEQ_LT(th->th_ack, tp->snd_recover)) { | if (SEQ_LT(th->th_ack, tp->snd_recover)) { | ||||
if (tp->t_flags & TF_SACK_PERMIT) | if (tp->t_flags & TF_SACK_PERMIT) | ||||
if (V_tcp_do_prr && to.to_flags & TOF_SACK) { | if (V_tcp_do_prr && to.to_flags & TOF_SACK) { | ||||
tcp_timer_activate(tp, TT_REXMT, 0); | tcp_timer_activate(tp, TT_REXMT, 0); | ||||
tp->t_rtttime = 0; | tp->t_rtttime = 0; | ||||
tcp_do_prr_ack(tp, th, &to); | tcp_do_prr_ack(tp, th, &to, sack_changed); | ||||
tp->t_flags |= TF_ACKNOW; | tp->t_flags |= TF_ACKNOW; | ||||
(void) tcp_output(tp); | (void) tcp_output(tp); | ||||
} else | } else | ||||
tcp_sack_partialack(tp, th); | tcp_sack_partialack(tp, th); | ||||
else | else | ||||
tcp_newreno_partial_ack(tp, th); | tcp_newreno_partial_ack(tp, th); | ||||
} else | } else | ||||
cc_post_recovery(tp, th); | cc_post_recovery(tp, th); | ||||
} else if (IN_CONGRECOVERY(tp->t_flags)) { | } else if (IN_CONGRECOVERY(tp->t_flags)) { | ||||
if (SEQ_LT(th->th_ack, tp->snd_recover)) { | if (SEQ_LT(th->th_ack, tp->snd_recover)) { | ||||
if (V_tcp_do_prr) { | if (V_tcp_do_prr) { | ||||
tp->sackhint.delivered_data = BYTES_THIS_ACK(tp, th); | tp->sackhint.delivered_data = BYTES_THIS_ACK(tp, th); | ||||
tp->snd_fack = th->th_ack; | tp->snd_fack = th->th_ack; | ||||
tcp_do_prr_ack(tp, th, &to); | /* | ||||
* During ECN cwnd reduction | |||||
* always use PRR-SSRB | |||||
*/ | |||||
tcp_do_prr_ack(tp, th, &to, SACK_CHANGE); | |||||
(void) tcp_output(tp); | (void) tcp_output(tp); | ||||
} | } | ||||
} else | } else | ||||
cc_post_recovery(tp, th); | cc_post_recovery(tp, th); | ||||
} | } | ||||
/* | /* | ||||
* If we reach this point, ACK is not a duplicate, | * If we reach this point, ACK is not a duplicate, | ||||
* i.e., it ACKs something we sent. | * i.e., it ACKs something we sent. | ||||
▲ Show 20 Lines • Show All 1,106 Lines • ▼ Show 20 Lines | if (maxmtu && thcmtu) | ||||
mss = min(maxmtu, thcmtu) - min_protoh; | mss = min(maxmtu, thcmtu) - min_protoh; | ||||
else if (maxmtu || thcmtu) | else if (maxmtu || thcmtu) | ||||
mss = max(maxmtu, thcmtu) - min_protoh; | mss = max(maxmtu, thcmtu) - min_protoh; | ||||
return (mss); | return (mss); | ||||
} | } | ||||
void | void | ||||
tcp_do_prr_ack(struct tcpcb *tp, struct tcphdr *th, struct tcpopt *to) | tcp_do_prr_ack(struct tcpcb *tp, struct tcphdr *th, struct tcpopt *to, sackstatus_t sack_changed) | ||||
{ | { | ||||
int snd_cnt = 0, limit = 0, del_data = 0, pipe = 0; | int snd_cnt = 0, limit = 0, del_data = 0, pipe = 0; | ||||
int maxseg = tcp_maxseg(tp); | int maxseg = tcp_maxseg(tp); | ||||
INP_WLOCK_ASSERT(tptoinpcb(tp)); | INP_WLOCK_ASSERT(tptoinpcb(tp)); | ||||
/* | /* | ||||
* Compute the amount of data that this ACK is indicating | * Compute the amount of data that this ACK is indicating | ||||
Show All 23 Lines | tcp_do_prr_ack(struct tcpcb *tp, struct tcphdr *th, struct tcpopt *to, sackstatus_t sack_changed) | ||||
if (pipe >= tp->snd_ssthresh) { | if (pipe >= tp->snd_ssthresh) { | ||||
if (tp->sackhint.recover_fs == 0) | if (tp->sackhint.recover_fs == 0) | ||||
tp->sackhint.recover_fs = | tp->sackhint.recover_fs = | ||||
imax(1, tp->snd_nxt - tp->snd_una); | imax(1, tp->snd_nxt - tp->snd_una); | ||||
snd_cnt = howmany((long)tp->sackhint.prr_delivered * | snd_cnt = howmany((long)tp->sackhint.prr_delivered * | ||||
tp->snd_ssthresh, tp->sackhint.recover_fs) - | tp->snd_ssthresh, tp->sackhint.recover_fs) - | ||||
tp->sackhint.prr_out; | tp->sackhint.prr_out; | ||||
} else { | } else { | ||||
if (V_tcp_do_prr_conservative || (del_data == 0)) | /* | ||||
* PRR 6937bis heuristic: | |||||
* - A partial ack without SACK block beneath snd_recover | |||||
* indicates further loss. | |||||
* - An SACK scoreboard update adding a new hole indicates | |||||
* further loss, so be conservative and send at most one | |||||
* segment. | |||||
* - Prevent ACK splitting attacks, by being conservative | |||||
* when no new data is acked. | |||||
*/ | |||||
if ((sack_changed == SACK_NEWLOSS) || (del_data == 0)) | |||||
limit = tp->sackhint.prr_delivered - | limit = tp->sackhint.prr_delivered - | ||||
tp->sackhint.prr_out; | tp->sackhint.prr_out; | ||||
else | else | ||||
limit = imax(tp->sackhint.prr_delivered - | limit = imax(tp->sackhint.prr_delivered - | ||||
tp->sackhint.prr_out, del_data) + | tp->sackhint.prr_out, del_data) + | ||||
maxseg; | maxseg; | ||||
snd_cnt = imin((tp->snd_ssthresh - pipe), limit); | snd_cnt = imin((tp->snd_ssthresh - pipe), limit); | ||||
} | } | ||||
▲ Show 20 Lines • Show All 97 Lines • Show Last 20 Lines |