Changeset View
Standalone View
sys/netinet/tcp_input.c
Show First 20 Lines • Show All 1,475 Lines • ▼ Show 20 Lines | drop: | ||||
return (IPPROTO_DONE); | return (IPPROTO_DONE); | ||||
} | } | ||||
static void | static void | ||||
tcp_do_segment(struct mbuf *m, struct tcphdr *th, struct socket *so, | tcp_do_segment(struct mbuf *m, struct tcphdr *th, struct socket *so, | ||||
struct tcpcb *tp, int drop_hdrlen, int tlen, uint8_t iptos, | struct tcpcb *tp, int drop_hdrlen, int tlen, uint8_t iptos, | ||||
int ti_locked) | int ti_locked) | ||||
{ | { | ||||
int thflags, acked, ourfinisacked, needoutput = 0; | int thflags, acked, ourfinisacked, needoutput = 0, sack_changed; | ||||
int rstreason, todrop, win; | int rstreason, todrop, win; | ||||
u_long tiwin; | u_long tiwin; | ||||
char *s; | char *s; | ||||
struct in_conninfo *inc; | struct in_conninfo *inc; | ||||
struct mbuf *mfree; | struct mbuf *mfree; | ||||
struct tcpopt to; | struct tcpopt to; | ||||
#ifdef TCPDEBUG | #ifdef TCPDEBUG | ||||
/* | /* | ||||
* The size of tcp_saveipgen must be the size of the max ip header, | * The size of tcp_saveipgen must be the size of the max ip header, | ||||
* now IPv6. | * now IPv6. | ||||
*/ | */ | ||||
u_char tcp_saveipgen[IP6_HDR_LEN]; | u_char tcp_saveipgen[IP6_HDR_LEN]; | ||||
struct tcphdr tcp_savetcp; | struct tcphdr tcp_savetcp; | ||||
short ostate = 0; | short ostate = 0; | ||||
#endif | #endif | ||||
thflags = th->th_flags; | thflags = th->th_flags; | ||||
inc = &tp->t_inpcb->inp_inc; | inc = &tp->t_inpcb->inp_inc; | ||||
tp->sackhint.last_sack_ack = 0; | tp->sackhint.last_sack_ack = 0; | ||||
sack_changed = 0; | |||||
/* | /* | ||||
* If this is either a state-changing packet or current state isn't | * If this is either a state-changing packet or current state isn't | ||||
* established, we require a write lock on tcbinfo. Otherwise, we | * established, we require a write lock on tcbinfo. Otherwise, we | ||||
* allow the tcbinfo to be in either alocked or unlocked, as the | * allow the tcbinfo to be in either alocked or unlocked, as the | ||||
* caller may have unnecessarily acquired a write lock due to a race. | * caller may have unnecessarily acquired a write lock due to a race. | ||||
*/ | */ | ||||
if ((thflags & (TH_SYN | TH_FIN | TH_RST)) != 0 || | if ((thflags & (TH_SYN | TH_FIN | TH_RST)) != 0 || | ||||
▲ Show 20 Lines • Show All 907 Lines • ▼ Show 20 Lines | #endif | ||||
case TCPS_LAST_ACK: | case TCPS_LAST_ACK: | ||||
if (SEQ_GT(th->th_ack, tp->snd_max)) { | if (SEQ_GT(th->th_ack, tp->snd_max)) { | ||||
TCPSTAT_INC(tcps_rcvacktoomuch); | TCPSTAT_INC(tcps_rcvacktoomuch); | ||||
goto dropafterack; | goto dropafterack; | ||||
} | } | ||||
if ((tp->t_flags & TF_SACK_PERMIT) && | if ((tp->t_flags & TF_SACK_PERMIT) && | ||||
((to.to_flags & TOF_SACK) || | ((to.to_flags & TOF_SACK) || | ||||
!TAILQ_EMPTY(&tp->snd_holes))) | !TAILQ_EMPTY(&tp->snd_holes))) | ||||
tcp_sack_doack(tp, &to, th->th_ack); | sack_changed = tcp_sack_doack(tp, &to, th->th_ack); | ||||
else | else | ||||
/* | /* | ||||
* Reset the value so that previous (valid) value | * Reset the value so that previous (valid) value | ||||
* from the last ack with SACK doesn't get used. | * from the last ack with SACK doesn't get used. | ||||
*/ | */ | ||||
tp->sackhint.sacked_bytes = 0; | tp->sackhint.sacked_bytes = 0; | ||||
/* Run HHOOK_TCP_ESTABLISHED_IN helper hooks. */ | /* Run HHOOK_TCP_ESTABLISHED_IN helper hooks. */ | ||||
hhook_run_tcp_est_in(tp, th, &to); | hhook_run_tcp_est_in(tp, th, &to); | ||||
if (SEQ_LEQ(th->th_ack, tp->snd_una)) { | if (SEQ_LEQ(th->th_ack, tp->snd_una)) { | ||||
if (tlen == 0 && tiwin == tp->snd_wnd) { | if (tlen == 0 && | ||||
(tiwin == tp->snd_wnd || | |||||
(tp->t_flags & TF_SACK_PERMIT))) { | |||||
/* | /* | ||||
* If this is the first time we've seen a | * If this is the first time we've seen a | ||||
* FIN from the remote, this is not a | * FIN from the remote, this is not a | ||||
* duplicate and it needs to be processed | * duplicate and it needs to be processed | ||||
* normally. This happens during a | * normally. This happens during a | ||||
* simultaneous close. | * simultaneous close. | ||||
*/ | */ | ||||
if ((thflags & TH_FIN) && | if ((thflags & TH_FIN) && | ||||
Show All 25 Lines | if (SEQ_LEQ(th->th_ack, tp->snd_una)) { | ||||
* network (they're now cached at the receiver) | * network (they're now cached at the receiver) | ||||
* so bump cwnd by the amount in the receiver | * so bump cwnd by the amount in the receiver | ||||
* to keep a constant cwnd packets in the | * to keep a constant cwnd packets in the | ||||
* network. | * network. | ||||
* | * | ||||
* When using TCP ECN, notify the peer that | * When using TCP ECN, notify the peer that | ||||
* we reduced the cwnd. | * we reduced the cwnd. | ||||
*/ | */ | ||||
if (!tcp_timer_active(tp, TT_REXMT) || | /* | ||||
th->th_ack != tp->snd_una) | * Old acks should not be affecting t_dupacks | ||||
* counting so ignore them. | |||||
*/ | |||||
if (th->th_ack != tp->snd_una || | |||||
(tp->t_flags & TF_SACK_PERMIT) && !sack_changed) | |||||
hiren: Hum, I need to update the comment about this and also should update the condition to say:
(tp… | |||||
hirenAuthorUnsubmitted Not Done Inline ActionsI think I should add that condition. We don't want to skip striking the counter if window has changed. hiren: I think I should add that condition. We don't want to skip striking the counter if window has… | |||||
Not Done Inline ActionsWhat's the rationale for checking the window information on a SACK session? On a non-SACK session, it is a clue that this a packet might have been lost (i.e. this is a duplicate ACK). On a SACK session, the updated SACK information is a clue that a packet might have been lost. jtl: What's the rationale for checking the window information on a SACK session? On a non-SACK… | |||||
Not Done Inline ActionsYou are right. If SACK in present and sack_changed is FALSE, there is no point in checking window information. As at that point, it cannot be a dupack and we'd just want to skip it. I'll revert back to the prev condition and remove ' tiwin == tp->snd_wnd' check. hiren: You are right. If SACK in present and sack_changed is FALSE, there is no point in checking… | |||||
break; | |||||
else if (!tcp_timer_active(tp, TT_REXMT)) | |||||
tp->t_dupacks = 0; | tp->t_dupacks = 0; | ||||
else if (++tp->t_dupacks > tcprexmtthresh || | else if (++tp->t_dupacks > tcprexmtthresh || | ||||
IN_FASTRECOVERY(tp->t_flags)) { | IN_FASTRECOVERY(tp->t_flags)) { | ||||
cc_ack_received(tp, th, CC_DUPACK); | cc_ack_received(tp, th, CC_DUPACK); | ||||
if ((tp->t_flags & TF_SACK_PERMIT) && | if ((tp->t_flags & TF_SACK_PERMIT) && | ||||
IN_FASTRECOVERY(tp->t_flags)) { | IN_FASTRECOVERY(tp->t_flags)) { | ||||
int awnd; | int awnd; | ||||
▲ Show 20 Lines • Show All 112 Lines • ▼ Show 20 Lines | if (SEQ_LEQ(th->th_ack, tp->snd_una)) { | ||||
("%s: sent too much", | ("%s: sent too much", | ||||
__func__)); | __func__)); | ||||
tp->snd_limited = 2; | tp->snd_limited = 2; | ||||
} else if (sent > 0) | } else if (sent > 0) | ||||
++tp->snd_limited; | ++tp->snd_limited; | ||||
tp->snd_cwnd = oldcwnd; | tp->snd_cwnd = oldcwnd; | ||||
goto drop; | goto drop; | ||||
} | } | ||||
} else | } | ||||
tp->t_dupacks = 0; | |||||
break; | break; | ||||
} else { | |||||
tp->t_dupacks = 0; | |||||
if ((tp->t_flags & TF_SACK_PERMIT) && sack_changed) | |||||
tp->t_dupacks = 1; | |||||
} | } | ||||
KASSERT(SEQ_GT(th->th_ack, tp->snd_una), | KASSERT(SEQ_GT(th->th_ack, tp->snd_una), | ||||
("%s: th_ack <= snd_una", __func__)); | ("%s: th_ack <= snd_una", __func__)); | ||||
/* | /* | ||||
* If the congestion window was inflated to account | * If the congestion window was inflated to account | ||||
Not Done Inline ActionsThis portion looks incorrect and unnecessary. This causes a wrong dupack increment event when an ACK comes which moves left edge and also is the first ack with SACK info in it. This code would detect that as a duplicate ack which is not correct. An example below: 00:00:00.046409 IP 10.10.10.10.56022 > 10.10.11.12.http: Flags [.], ack 1680, win 1018, options [nop,nop,TS val 1056985310 ecr 195535656], length 0 00:00:00.048354 IP 10.10.10.10.56022 > 10.10.11.12.http: Flags [.], ack 3128, win 1040, options [nop,nop,TS val 1056985313 ecr 195535657,nop,nop,sack 1 {4576:6024}], length 0 00:00:00.050344 IP 10.10.10.10.56022 > 10.10.11.12.http: Flags [.], ack 3128, win 1040, options [nop,nop,TS val 1056985315 ecr 195535657,nop,nop,sack 1 {4576:7472}], length 0 00:00:00.050580 IP 10.10.10.10.56022 > 10.10.11.12.http: Flags [.], ack 3128, win 1040, options [nop,nop,TS val 1056985315 ecr 195535657,nop,nop,sack 1 {4576:8920}], length 0 00:00:00.050738 IP 10.10.11.12.http > 10.10.10.10.56022: Flags [.], seq 3128:4576, ack 141, win 1040, options [nop,nop,TS val 195535684 ecr 1056985315], length 1448: HTTP With this code, first 'ack 3128' is considered first dupack, which is wrong. I'll have to think a bit more about this and update. hiren: This portion looks incorrect and unnecessary. This causes a wrong dupack increment event when… | |||||
Not Done Inline ActionsHuh. Now that I think about it, this *is* the correct way to identify a "dupack" if we want to follow rfc6675. hiren: Huh. Now that I think about it, this *is* the correct way to identify a "dupack" if we want to… | |||||
Not Done Inline Actions
Agreed. However, I think this code will produce false positives. Namely, I think sack_changed will always be true when snd_una advances. I think this check would be closer to the behavior envisioned by RFC6675 if sack_changed only indicated whether there was a new SACK block (as distinct from advancing snd_una). jtl: > Huh. Now that I think about it, this *is* the correct way to identify a "dupack" if we want… | |||||
Not Done Inline Actionsrfc6675 says "a segment that arrives carrying a SACK block that identifies previously unacknowledged and un-SACKed octets between HighACK and HighData." And not strictly a *new* sack block. And as you rightly said, we count snd_una-th_ack a hole, any change in that should be considered a new info. hiren: rfc6675 says "a segment that arrives carrying a SACK block that identifies previously… | |||||
Not Done Inline ActionsHmmm... I am looking at the same words as you, but reading them differently. :-) To be a "duplicate acknowledgement", the ACK needs to contain "a SACK block that identifies previously unacknowledged and un-SACKed octets". Even though we internally treat an ACK that advances snd_una as a SACK for (snd_una, th_ack), that is merely an implementation choice. It doesn't make the ACK actually become a SACK block. And, I think there is good reason to treat these differently. A new SACK block is an indication that packets with lower sequence numbers may have been lost. However, receiving an ACK that advances snd_una does not provide such an indication. jtl: Hmmm... I am looking at the same words as you, but reading them differently. :-)
To be a… | |||||
* for the other side's cached packets, retract it. | * for the other side's cached packets, retract it. | ||||
*/ | */ | ||||
if (IN_FASTRECOVERY(tp->t_flags)) { | if (IN_FASTRECOVERY(tp->t_flags)) { | ||||
if (SEQ_LT(th->th_ack, tp->snd_recover)) { | if (SEQ_LT(th->th_ack, tp->snd_recover)) { | ||||
if (tp->t_flags & TF_SACK_PERMIT) | if (tp->t_flags & TF_SACK_PERMIT) | ||||
tcp_sack_partialack(tp, th); | tcp_sack_partialack(tp, th); | ||||
else | else | ||||
tcp_newreno_partial_ack(tp, th); | tcp_newreno_partial_ack(tp, th); | ||||
} else | } else | ||||
cc_post_recovery(tp, th); | cc_post_recovery(tp, th); | ||||
} | } | ||||
tp->t_dupacks = 0; | |||||
/* | /* | ||||
* If we reach this point, ACK is not a duplicate, | * If we reach this point, ACK is not a duplicate, | ||||
* i.e., it ACKs something we sent. | * i.e., it ACKs something we sent. | ||||
*/ | */ | ||||
if (tp->t_flags & TF_NEEDSYN) { | if (tp->t_flags & TF_NEEDSYN) { | ||||
/* | /* | ||||
* T/TCP: Connection was half-synchronized, and our | * T/TCP: Connection was half-synchronized, and our | ||||
* SYN has been ACK'd (so connection is now fully | * SYN has been ACK'd (so connection is now fully | ||||
▲ Show 20 Lines • Show All 1,114 Lines • Show Last 20 Lines |
Hum, I need to update the comment about this and also should update the condition to say:
(tp->t_flags &TF_SACK_PERMIT) && tiwin == tp->snd_wnd && !sack_changed
Do we care about window being the same here in this check?