Changeset View
Changeset View
Standalone View
Standalone View
head/sys/netinet/tcp_input.c
Show First 20 Lines • Show All 1,480 Lines • ▼ Show 20 Lines | drop: | ||||
INP_INFO_UNLOCK_ASSERT(&V_tcbinfo); | INP_INFO_UNLOCK_ASSERT(&V_tcbinfo); | ||||
if (s != NULL) | if (s != NULL) | ||||
free(s, M_TCPLOG); | free(s, M_TCPLOG); | ||||
if (m != NULL) | if (m != NULL) | ||||
m_freem(m); | m_freem(m); | ||||
return (IPPROTO_DONE); | return (IPPROTO_DONE); | ||||
} | } | ||||
/* | |||||
* Automatic sizing of receive socket buffer. Often the send | |||||
* buffer size is not optimally adjusted to the actual network | |||||
* conditions at hand (delay bandwidth product). Setting the | |||||
* buffer size too small limits throughput on links with high | |||||
* bandwidth and high delay (eg. trans-continental/oceanic links). | |||||
* | |||||
* On the receive side the socket buffer memory is only rarely | |||||
* used to any significant extent. This allows us to be much | |||||
* more aggressive in scaling the receive socket buffer. For | |||||
* the case that the buffer space is actually used to a large | |||||
* extent and we run out of kernel memory we can simply drop | |||||
* the new segments; TCP on the sender will just retransmit it | |||||
* later. Setting the buffer size too big may only consume too | |||||
* much kernel memory if the application doesn't read() from | |||||
* the socket or packet loss or reordering makes use of the | |||||
* reassembly queue. | |||||
* | |||||
* The criteria to step up the receive buffer one notch are: | |||||
* 1. Application has not set receive buffer size with | |||||
* SO_RCVBUF. Setting SO_RCVBUF clears SB_AUTOSIZE. | |||||
* 2. the number of bytes received during the time it takes | |||||
* one timestamp to be reflected back to us (the RTT); | |||||
* 3. received bytes per RTT is within seven eighth of the | |||||
* current socket buffer size; | |||||
* 4. receive buffer size has not hit maximal automatic size; | |||||
* | |||||
* This algorithm does one step per RTT at most and only if | |||||
* we receive a bulk stream w/o packet losses or reorderings. | |||||
* Shrinking the buffer during idle times is not necessary as | |||||
* it doesn't consume any memory when idle. | |||||
* | |||||
* TODO: Only step up if the application is actually serving | |||||
* the buffer to better manage the socket buffer resources. | |||||
*/ | |||||
int | |||||
tcp_autorcvbuf(struct mbuf *m, struct tcphdr *th, struct socket *so, | |||||
struct tcpcb *tp, int tlen) | |||||
{ | |||||
int newsize = 0; | |||||
if (V_tcp_do_autorcvbuf && (so->so_rcv.sb_flags & SB_AUTOSIZE) && | |||||
tp->t_srtt != 0 && tp->rfbuf_ts != 0 && | |||||
TCP_TS_TO_TICKS(tcp_ts_getticks() - tp->rfbuf_ts) > | |||||
(tp->t_srtt >> TCP_RTT_SHIFT)) { | |||||
if (tp->rfbuf_cnt > (so->so_rcv.sb_hiwat / 8 * 7) && | |||||
so->so_rcv.sb_hiwat < V_tcp_autorcvbuf_max) { | |||||
newsize = min(so->so_rcv.sb_hiwat + | |||||
V_tcp_autorcvbuf_inc, V_tcp_autorcvbuf_max); | |||||
} | |||||
TCP_PROBE6(receive__autoresize, NULL, tp, m, tp, th, newsize); | |||||
/* Start over with next RTT. */ | |||||
tp->rfbuf_ts = 0; | |||||
tp->rfbuf_cnt = 0; | |||||
} else { | |||||
tp->rfbuf_cnt += tlen; /* add up */ | |||||
} | |||||
return (newsize); | |||||
} | |||||
void | void | ||||
tcp_do_segment(struct mbuf *m, struct tcphdr *th, struct socket *so, | tcp_do_segment(struct mbuf *m, struct tcphdr *th, struct socket *so, | ||||
struct tcpcb *tp, int drop_hdrlen, int tlen, uint8_t iptos, | struct tcpcb *tp, int drop_hdrlen, int tlen, uint8_t iptos, | ||||
int ti_locked) | int ti_locked) | ||||
{ | { | ||||
int thflags, acked, ourfinisacked, needoutput = 0, sack_changed; | int thflags, acked, ourfinisacked, needoutput = 0, sack_changed; | ||||
int rstreason, todrop, win; | int rstreason, todrop, win; | ||||
uint32_t tiwin; | uint32_t tiwin; | ||||
▲ Show 20 Lines • Show All 347 Lines • ▼ Show 20 Lines | #endif | ||||
TCPSTAT_ADD(tcps_rcvbyte, tlen); | TCPSTAT_ADD(tcps_rcvbyte, tlen); | ||||
#ifdef TCPDEBUG | #ifdef TCPDEBUG | ||||
if (so->so_options & SO_DEBUG) | if (so->so_options & SO_DEBUG) | ||||
tcp_trace(TA_INPUT, ostate, tp, | tcp_trace(TA_INPUT, ostate, tp, | ||||
(void *)tcp_saveipgen, &tcp_savetcp, 0); | (void *)tcp_saveipgen, &tcp_savetcp, 0); | ||||
#endif | #endif | ||||
TCP_PROBE3(debug__input, tp, th, m); | TCP_PROBE3(debug__input, tp, th, m); | ||||
/* | newsize = tcp_autorcvbuf(m, th, so, tp, tlen); | ||||
* Automatic sizing of receive socket buffer. Often the send | |||||
* buffer size is not optimally adjusted to the actual network | |||||
* conditions at hand (delay bandwidth product). Setting the | |||||
* buffer size too small limits throughput on links with high | |||||
* bandwidth and high delay (eg. trans-continental/oceanic links). | |||||
* | |||||
* On the receive side the socket buffer memory is only rarely | |||||
* used to any significant extent. This allows us to be much | |||||
* more aggressive in scaling the receive socket buffer. For | |||||
* the case that the buffer space is actually used to a large | |||||
* extent and we run out of kernel memory we can simply drop | |||||
* the new segments; TCP on the sender will just retransmit it | |||||
* later. Setting the buffer size too big may only consume too | |||||
* much kernel memory if the application doesn't read() from | |||||
* the socket or packet loss or reordering makes use of the | |||||
* reassembly queue. | |||||
* | |||||
* The criteria to step up the receive buffer one notch are: | |||||
* 1. Application has not set receive buffer size with | |||||
* SO_RCVBUF. Setting SO_RCVBUF clears SB_AUTOSIZE. | |||||
* 2. the number of bytes received during the time it takes | |||||
* one timestamp to be reflected back to us (the RTT); | |||||
* 3. received bytes per RTT is within seven eighth of the | |||||
* current socket buffer size; | |||||
* 4. receive buffer size has not hit maximal automatic size; | |||||
* | |||||
* This algorithm does one step per RTT at most and only if | |||||
* we receive a bulk stream w/o packet losses or reorderings. | |||||
* Shrinking the buffer during idle times is not necessary as | |||||
* it doesn't consume any memory when idle. | |||||
* | |||||
* TODO: Only step up if the application is actually serving | |||||
* the buffer to better manage the socket buffer resources. | |||||
*/ | |||||
if (V_tcp_do_autorcvbuf && | |||||
(to.to_flags & TOF_TS) && | |||||
to.to_tsecr && | |||||
(so->so_rcv.sb_flags & SB_AUTOSIZE)) { | |||||
if (TSTMP_GT(to.to_tsecr, tp->rfbuf_ts) && | |||||
to.to_tsecr - tp->rfbuf_ts < hz) { | |||||
if (tp->rfbuf_cnt > | |||||
(so->so_rcv.sb_hiwat / 8 * 7) && | |||||
so->so_rcv.sb_hiwat < | |||||
V_tcp_autorcvbuf_max) { | |||||
newsize = | |||||
min(so->so_rcv.sb_hiwat + | |||||
V_tcp_autorcvbuf_inc, | |||||
V_tcp_autorcvbuf_max); | |||||
} | |||||
/* Start over with next RTT. */ | |||||
tp->rfbuf_ts = 0; | |||||
tp->rfbuf_cnt = 0; | |||||
} else | |||||
tp->rfbuf_cnt += tlen; /* add up */ | |||||
} | |||||
/* Add data to socket buffer. */ | /* Add data to socket buffer. */ | ||||
SOCKBUF_LOCK(&so->so_rcv); | SOCKBUF_LOCK(&so->so_rcv); | ||||
if (so->so_rcv.sb_state & SBS_CANTRCVMORE) { | if (so->so_rcv.sb_state & SBS_CANTRCVMORE) { | ||||
m_freem(m); | m_freem(m); | ||||
} else { | } else { | ||||
/* | /* | ||||
* Set new socket buffer size. | * Set new socket buffer size. | ||||
Show All 23 Lines | #endif | ||||
* and then do TCP input processing. | * and then do TCP input processing. | ||||
* Receive window is amount of space in rcv queue, | * Receive window is amount of space in rcv queue, | ||||
* but not less than advertised window. | * but not less than advertised window. | ||||
*/ | */ | ||||
win = sbspace(&so->so_rcv); | win = sbspace(&so->so_rcv); | ||||
if (win < 0) | if (win < 0) | ||||
win = 0; | win = 0; | ||||
tp->rcv_wnd = imax(win, (int)(tp->rcv_adv - tp->rcv_nxt)); | tp->rcv_wnd = imax(win, (int)(tp->rcv_adv - tp->rcv_nxt)); | ||||
/* Reset receive buffer auto scaling when not in bulk receive mode. */ | |||||
tp->rfbuf_ts = 0; | |||||
tp->rfbuf_cnt = 0; | |||||
switch (tp->t_state) { | switch (tp->t_state) { | ||||
/* | /* | ||||
* If the state is SYN_RECEIVED: | * If the state is SYN_RECEIVED: | ||||
* if seg contains an ACK, but not for our SYN/ACK, send a RST. | * if seg contains an ACK, but not for our SYN/ACK, send a RST. | ||||
*/ | */ | ||||
case TCPS_SYN_RECEIVED: | case TCPS_SYN_RECEIVED: | ||||
▲ Show 20 Lines • Show All 1,937 Lines • Show Last 20 Lines |