Changeset View
Changeset View
Standalone View
Standalone View
sys/netinet/tcp_output.c
Show First 20 Lines • Show All 185 Lines • ▼ Show 20 Lines | |||||
} | } | ||||
/* | /* | ||||
* Tcp output routine: figure out what should be sent and send it. | * Tcp output routine: figure out what should be sent and send it. | ||||
*/ | */ | ||||
int | int | ||||
tcp_output(struct tcpcb *tp) | tcp_output(struct tcpcb *tp) | ||||
{ | { | ||||
struct socket *so = tp->t_inpcb->inp_socket; | struct inpcb *inp = tp->t_inpcb; | ||||
struct socket *so = inp->inp_socket; | |||||
int32_t len; | int32_t len; | ||||
uint32_t recwin, sendwin; | uint32_t recwin, sendwin; | ||||
int off, flags, error = 0; /* Keep compiler happy */ | int off, flags, error = 0; /* Keep compiler happy */ | ||||
struct mbuf *m; | struct mbuf *m; | ||||
struct ip *ip = NULL; | struct ip *ip = NULL; | ||||
#ifdef TCPDEBUG | #ifdef TCPDEBUG | ||||
struct ipovly *ipov = NULL; | struct ipovly *ipov = NULL; | ||||
#endif | #endif | ||||
struct tcphdr *th; | struct tcphdr *th; | ||||
u_char opt[TCP_MAXOLEN]; | u_char opt[TCP_MAXOLEN]; | ||||
unsigned ipoptlen, optlen, hdrlen; | unsigned ipoptlen, optlen, hdrlen; | ||||
#if defined(IPSEC) || defined(IPSEC_SUPPORT) | #if defined(IPSEC) || defined(IPSEC_SUPPORT) | ||||
unsigned ipsec_optlen = 0; | unsigned ipsec_optlen = 0; | ||||
#endif | #endif | ||||
int idle, sendalot; | int idle, sendalot; | ||||
int sack_rxmit, sack_bytes_rxmt; | int sack_rxmit, sack_bytes_rxmt; | ||||
struct sackhole *p; | struct sackhole *p; | ||||
int tso, mtu; | int tso, mtu; | ||||
struct tcpopt to; | struct tcpopt to; | ||||
unsigned int wanted_cookie = 0; | unsigned int wanted_cookie = 0; | ||||
unsigned int dont_sendalot = 0; | unsigned int dont_sendalot = 0; | ||||
sbintime_t t; | |||||
#if 0 | #if 0 | ||||
int maxburst = TCP_MAXBURST; | int maxburst = TCP_MAXBURST; | ||||
#endif | #endif | ||||
#ifdef INET6 | #ifdef INET6 | ||||
struct ip6_hdr *ip6 = NULL; | struct ip6_hdr *ip6 = NULL; | ||||
int isipv6; | int isipv6; | ||||
isipv6 = (tp->t_inpcb->inp_vflag & INP_IPV6) != 0; | isipv6 = (tp->t_inpcb->inp_vflag & INP_IPV6) != 0; | ||||
#endif | #endif | ||||
t = tcp_ts_getsbintime(); | |||||
INP_WLOCK_ASSERT(tp->t_inpcb); | INP_WLOCK_ASSERT(inp); | ||||
#ifdef TCP_OFFLOAD | #ifdef TCP_OFFLOAD | ||||
if (tp->t_flags & TF_TOE) | if (tp->t_flags & TF_TOE) | ||||
return (tcp_offload_output(tp)); | return (tcp_offload_output(tp)); | ||||
#endif | #endif | ||||
/* | /* | ||||
* For TFO connections in SYN_RECEIVED, only allow the initial | * For TFO connections in SYN_RECEIVED, only allow the initial | ||||
* SYN|ACK and those sent by the retransmit timer. | * SYN|ACK and those sent by the retransmit timer. | ||||
*/ | */ | ||||
if (IS_FASTOPEN(tp->t_flags) && | if (IS_FASTOPEN(tp->t_flags) && | ||||
(tp->t_state == TCPS_SYN_RECEIVED) && | (tp->t_state == TCPS_SYN_RECEIVED) && | ||||
SEQ_GT(tp->snd_max, tp->snd_una) && /* initial SYN|ACK sent */ | SEQ_GT(tp->snd_max, tp->snd_una) && /* initial SYN|ACK sent */ | ||||
(tp->snd_nxt != tp->snd_una)) /* not a retransmit */ | (tp->snd_nxt != tp->snd_una)) /* not a retransmit */ | ||||
return (0); | return (0); | ||||
/* | /* | ||||
* Determine length of data that should be transmitted, | * Determine length of data that should be transmitted, | ||||
* and flags that will be used. | * and flags that will be used. | ||||
* If there is some data or critical controls (SYN, RST) | * If there is some data or critical controls (SYN, RST) | ||||
* to send, then transmit; otherwise, investigate further. | * to send, then transmit; otherwise, investigate further. | ||||
*/ | */ | ||||
idle = (tp->t_flags & TF_LASTIDLE) || (tp->snd_max == tp->snd_una); | idle = (tp->t_flags & TF_LASTIDLE) || (tp->snd_max == tp->snd_una); | ||||
if (idle && ticks - tp->t_rcvtime >= tp->t_rxtcur) | if (idle && (t - tp->t_rcvtime) >= tp->t_rxtcur) | ||||
cc_after_idle(tp); | cc_after_idle(tp); | ||||
tp->t_flags &= ~TF_LASTIDLE; | tp->t_flags &= ~TF_LASTIDLE; | ||||
if (idle) { | if (idle) { | ||||
if (tp->t_flags & TF_MORETOCOME) { | if (tp->t_flags & TF_MORETOCOME) { | ||||
tp->t_flags |= TF_LASTIDLE; | tp->t_flags |= TF_LASTIDLE; | ||||
idle = 0; | idle = 0; | ||||
} | } | ||||
} | } | ||||
again: | again: | ||||
t = tcp_ts_getsbintime(); | |||||
/* | /* | ||||
* If we've recently taken a timeout, snd_max will be greater than | * If we've recently taken a timeout, snd_max will be greater than | ||||
* snd_nxt. There may be SACK information that allows us to avoid | * snd_nxt. There may be SACK information that allows us to avoid | ||||
* resending already delivered data. Adjust snd_nxt accordingly. | * resending already delivered data. Adjust snd_nxt accordingly. | ||||
*/ | */ | ||||
if ((tp->t_flags & TF_SACK_PERMIT) && | if ((tp->t_flags & TF_SACK_PERMIT) && | ||||
SEQ_LT(tp->snd_nxt, tp->snd_max)) | SEQ_LT(tp->snd_nxt, tp->snd_max)) | ||||
tcp_sack_adjust(tp); | tcp_sack_adjust(tp); | ||||
▲ Show 20 Lines • Show All 535 Lines • ▼ Show 20 Lines | if ((tp->t_flags & TF_NOOPT) == 0) { | ||||
/* Window scaling. */ | /* Window scaling. */ | ||||
if ((flags & TH_SYN) && (tp->t_flags & TF_REQ_SCALE)) { | if ((flags & TH_SYN) && (tp->t_flags & TF_REQ_SCALE)) { | ||||
to.to_wscale = tp->request_r_scale; | to.to_wscale = tp->request_r_scale; | ||||
to.to_flags |= TOF_SCALE; | to.to_flags |= TOF_SCALE; | ||||
} | } | ||||
/* Timestamps. */ | /* Timestamps. */ | ||||
if ((tp->t_flags & TF_RCVD_TSTMP) || | if ((tp->t_flags & TF_RCVD_TSTMP) || | ||||
((flags & TH_SYN) && (tp->t_flags & TF_REQ_TSTMP))) { | ((flags & TH_SYN) && (tp->t_flags & TF_REQ_TSTMP))) { | ||||
to.to_tsval = tcp_ts_getticks() + tp->ts_offset; | |||||
/* | |||||
* This next part is subtle and extremely critical. | |||||
* If we've been idle long enough with respect to | |||||
* the peer we have to lie about our timestamp so | |||||
* that the peer doesn't see our timestamp as being | |||||
* "before" the last one that we sent out. The TCP | |||||
* standard gives no mention to high resolution | |||||
* timestamp interoperability. | |||||
*/ | |||||
if (SEQ_GT(tp->t_lasttsecr, TCP_SBT_TO_TS(t))) | |||||
to.to_tsval = (uint32_t)(tp->t_lasttsecr + MAX_TS_STEP); | |||||
else | |||||
to.to_tsval = TCP_SBT_TO_TS(t); | |||||
tp->t_lasttsval = to.to_tsval; | |||||
bdrewery: We ended up reverting this (our usecase has keepalives so we ignored the problem) as the… | |||||
to.to_tsecr = tp->ts_recent; | to.to_tsecr = tp->ts_recent; | ||||
to.to_flags |= TOF_TS; | to.to_flags |= TOF_TS; | ||||
} | } | ||||
/* Set receive buffer autosizing timestamp. */ | /* Set receive buffer autosizing timestamp. */ | ||||
if (tp->rfbuf_ts == 0 && | if (tp->rfbuf_ts == 0 && | ||||
(so->so_rcv.sb_flags & SB_AUTOSIZE)) | (so->so_rcv.sb_flags & SB_AUTOSIZE)) | ||||
tp->rfbuf_ts = tcp_ts_getticks(); | tp->rfbuf_ts = TCP_SBT_TO_TS(t); | ||||
/* Selective ACK's. */ | /* Selective ACK's. */ | ||||
if (tp->t_flags & TF_SACK_PERMIT) { | if (tp->t_flags & TF_SACK_PERMIT) { | ||||
if (flags & TH_SYN) | if (flags & TH_SYN) | ||||
to.to_flags |= TOF_SACKPERM; | to.to_flags |= TOF_SACKPERM; | ||||
else if (TCPS_HAVEESTABLISHED(tp->t_state) && | else if (TCPS_HAVEESTABLISHED(tp->t_state) && | ||||
(tp->t_flags & TF_SACK_PERMIT) && | (tp->t_flags & TF_SACK_PERMIT) && | ||||
tp->rcv_numsacks > 0) { | tp->rcv_numsacks > 0) { | ||||
▲ Show 20 Lines • Show All 656 Lines • ▼ Show 20 Lines | if ((tp->t_flags & TF_FORCEDATA) == 0 || | ||||
tp->snd_nxt += len; | tp->snd_nxt += len; | ||||
if (SEQ_GT(tp->snd_nxt, tp->snd_max)) { | if (SEQ_GT(tp->snd_nxt, tp->snd_max)) { | ||||
tp->snd_max = tp->snd_nxt; | tp->snd_max = tp->snd_nxt; | ||||
/* | /* | ||||
* Time this transmission if not a retransmission and | * Time this transmission if not a retransmission and | ||||
* not currently timing anything. | * not currently timing anything. | ||||
*/ | */ | ||||
if (tp->t_rtttime == 0) { | if (tp->t_rtttime == 0) { | ||||
tp->t_rtttime = ticks; | tp->t_rtttime = t; | ||||
tp->t_rtseq = startseq; | tp->t_rtseq = startseq; | ||||
TCPSTAT_INC(tcps_segstimed); | TCPSTAT_INC(tcps_segstimed); | ||||
} | } | ||||
} | } | ||||
/* | /* | ||||
* Set retransmit timer if not currently set, | * Set retransmit timer if not currently set, | ||||
* and not doing a pure ack or a keep-alive probe. | * and not doing a pure ack or a keep-alive probe. | ||||
▲ Show 20 Lines • Show All 149 Lines • ▼ Show 20 Lines | #endif | ||||
if (sendalot) | if (sendalot) | ||||
goto again; | goto again; | ||||
return (0); | return (0); | ||||
} | } | ||||
void | void | ||||
tcp_setpersist(struct tcpcb *tp) | tcp_setpersist(struct tcpcb *tp) | ||||
{ | { | ||||
int t = ((tp->t_srtt >> 2) + tp->t_rttvar) >> 1; | uint64_t tt, t = ((tp->t_srtt >> 2) + tp->t_rttvar) >> 1; | ||||
int tt; | |||||
tp->t_flags &= ~TF_PREVVALID; | tp->t_flags &= ~TF_PREVVALID; | ||||
if (tcp_timer_active(tp, TT_REXMT)) | if (tcp_timer_active(tp, TT_REXMT)) | ||||
panic("tcp_setpersist: retransmit pending"); | panic("tcp_setpersist: retransmit pending"); | ||||
/* | /* | ||||
* Start/restart persistence timer. | * Start/restart persistence timer. | ||||
*/ | */ | ||||
TCPT_RANGESET(tt, t * tcp_backoff[tp->t_rxtshift], | TCPT_RANGESET(tt, t * tcp_backoff[tp->t_rxtshift], | ||||
tcp_persmin, tcp_persmax); | tcp_persmin*tick_sbt, tcp_persmax*tick_sbt); | ||||
tcp_timer_activate(tp, TT_PERSIST, tt); | tcp_timer_activate(tp, TT_PERSIST, tt); | ||||
if (tp->t_rxtshift < TCP_MAXRXTSHIFT) | if (tp->t_rxtshift < TCP_MAXRXTSHIFT) | ||||
tp->t_rxtshift++; | tp->t_rxtshift++; | ||||
} | } | ||||
/* | /* | ||||
* Insert TCP options according to the supplied parameters to the place | * Insert TCP options according to the supplied parameters to the place | ||||
* optp in a consistent way. Can handle unaligned destinations. | * optp in a consistent way. Can handle unaligned destinations. | ||||
▲ Show 20 Lines • Show All 232 Lines • Show Last 20 Lines |
We ended up reverting this (our usecase has keepalives so we ignored the problem) as the timestamp will still wrap on the next packet.