Changeset View
Changeset View
Standalone View
Standalone View
sys/netinet/tcp_input.c
Show First 20 Lines • Show All 1,523 Lines • ▼ Show 20 Lines | #endif | ||||
* allow the tcbinfo to be in either alocked or unlocked, as the | * allow the tcbinfo to be in either alocked or unlocked, as the | ||||
* caller may have unnecessarily acquired a write lock due to a race. | * caller may have unnecessarily acquired a write lock due to a race. | ||||
*/ | */ | ||||
if ((thflags & (TH_SYN | TH_FIN | TH_RST)) != 0 || | if ((thflags & (TH_SYN | TH_FIN | TH_RST)) != 0 || | ||||
tp->t_state != TCPS_ESTABLISHED) { | tp->t_state != TCPS_ESTABLISHED) { | ||||
KASSERT(ti_locked == TI_RLOCKED, ("%s ti_locked %d for " | KASSERT(ti_locked == TI_RLOCKED, ("%s ti_locked %d for " | ||||
"SYN/FIN/RST/!EST", __func__, ti_locked)); | "SYN/FIN/RST/!EST", __func__, ti_locked)); | ||||
INP_INFO_RLOCK_ASSERT(&V_tcbinfo); | INP_INFO_RLOCK_ASSERT(&V_tcbinfo); | ||||
} else { | } else { | ||||
lstewart: Note that t_srtt is in units of hz ticks scaled by TCP_RTT_SHIFT, and tcp_ts_getticks() returns… | |||||
#ifdef INVARIANTS | #ifdef INVARIANTS | ||||
if (ti_locked == TI_RLOCKED) | if (ti_locked == TI_RLOCKED) | ||||
INP_INFO_RLOCK_ASSERT(&V_tcbinfo); | INP_INFO_RLOCK_ASSERT(&V_tcbinfo); | ||||
else { | else { | ||||
KASSERT(ti_locked == TI_UNLOCKED, ("%s: EST " | KASSERT(ti_locked == TI_UNLOCKED, ("%s: EST " | ||||
"ti_locked: %d", __func__, ti_locked)); | "ti_locked: %d", __func__, ti_locked)); | ||||
INP_INFO_UNLOCK_ASSERT(&V_tcbinfo); | INP_INFO_UNLOCK_ASSERT(&V_tcbinfo); | ||||
} | } | ||||
▲ Show 20 Lines • Show All 339 Lines • ▼ Show 20 Lines | #endif | ||||
* we receive a bulk stream w/o packet losses or reorderings. | * we receive a bulk stream w/o packet losses or reorderings. | ||||
* Shrinking the buffer during idle times is not necessary as | * Shrinking the buffer during idle times is not necessary as | ||||
* it doesn't consume any memory when idle. | * it doesn't consume any memory when idle. | ||||
* | * | ||||
* TODO: Only step up if the application is actually serving | * TODO: Only step up if the application is actually serving | ||||
* the buffer to better manage the socket buffer resources. | * the buffer to better manage the socket buffer resources. | ||||
*/ | */ | ||||
if (V_tcp_do_autorcvbuf && | if (V_tcp_do_autorcvbuf && | ||||
(to.to_flags & TOF_TS) && | |||||
to.to_tsecr && | |||||
(so->so_rcv.sb_flags & SB_AUTOSIZE)) { | (so->so_rcv.sb_flags & SB_AUTOSIZE)) { | ||||
if (TSTMP_GT(to.to_tsecr, tp->rfbuf_ts) && | int rtt_check = 0; | ||||
to.to_tsecr - tp->rfbuf_ts < hz) { | uint32_t t = 0; | ||||
smhAuthorUnsubmitted Done Inline ActionsGiven that timestamps or estimates are used to calculate srtt is there any need for separate code paths here or is it adding complexity with no real gain? smh: Given that timestamps or estimates are used to calculate srtt is there any need for separate… | |||||
if ((to.to_flags & TOF_TS) != 0 && | |||||
to.to_tsecr) { | |||||
/* Timestamp based RTT. */ | |||||
if (TSTMP_GT(to.to_tsecr, | |||||
tp->rfbuf_ts) && | |||||
to.to_tsecr - tp->rfbuf_ts < hz) | |||||
rtt_check = 3; | |||||
else | |||||
rtt_check = 1; | |||||
} else { | |||||
/* Estimated RTT. */ | |||||
t = tcp_ts_getticks(); | |||||
if (tp->t_srtt != 0 && | |||||
tp->rfbuf_ts != 0 && | |||||
t - tp->rfbuf_ts > | |||||
(tp->t_srtt >> TCP_RTT_SHIFT)) | |||||
rtt_check = 4; | |||||
else | |||||
rtt_check = 2; | |||||
} | |||||
TCP_PROBE6(receive__autoresize, NULL, | |||||
tp, t, tp, th, rtt_check); | |||||
if (rtt_check > 2) { | |||||
if (tp->rfbuf_cnt > | if (tp->rfbuf_cnt > | ||||
(so->so_rcv.sb_hiwat / 8 * 7) && | (so->so_rcv.sb_hiwat / 8 * 7) && | ||||
so->so_rcv.sb_hiwat < | so->so_rcv.sb_hiwat < | ||||
V_tcp_autorcvbuf_max) { | V_tcp_autorcvbuf_max) { | ||||
newsize = | newsize = min( | ||||
min(so->so_rcv.sb_hiwat + | so->so_rcv.sb_hiwat + | ||||
V_tcp_autorcvbuf_inc, | V_tcp_autorcvbuf_inc, | ||||
V_tcp_autorcvbuf_max); | V_tcp_autorcvbuf_max); | ||||
TCP_PROBE6(receive__autoresize, | |||||
newsize, tp, t, tp, th, | |||||
rtt_check); | |||||
} | } | ||||
/* Start over with next RTT. */ | /* Start over with next RTT. */ | ||||
tp->rfbuf_ts = 0; | tp->rfbuf_ts = 0; | ||||
tp->rfbuf_cnt = 0; | tp->rfbuf_cnt = 0; | ||||
} else | } else if (rtt_check != 0) | ||||
tp->rfbuf_cnt += tlen; /* add up */ | tp->rfbuf_cnt += tlen; /* add up */ | ||||
} | } | ||||
/* Add data to socket buffer. */ | /* Add data to socket buffer. */ | ||||
SOCKBUF_LOCK(&so->so_rcv); | SOCKBUF_LOCK(&so->so_rcv); | ||||
if (so->so_rcv.sb_state & SBS_CANTRCVMORE) { | if (so->so_rcv.sb_state & SBS_CANTRCVMORE) { | ||||
m_freem(m); | m_freem(m); | ||||
} else { | } else { | ||||
Show All 26 Lines | #endif | ||||
* Receive window is amount of space in rcv queue, | * Receive window is amount of space in rcv queue, | ||||
* but not less than advertised window. | * but not less than advertised window. | ||||
*/ | */ | ||||
win = sbspace(&so->so_rcv); | win = sbspace(&so->so_rcv); | ||||
if (win < 0) | if (win < 0) | ||||
win = 0; | win = 0; | ||||
tp->rcv_wnd = imax(win, (int)(tp->rcv_adv - tp->rcv_nxt)); | tp->rcv_wnd = imax(win, (int)(tp->rcv_adv - tp->rcv_nxt)); | ||||
/* | |||||
* Disabling the reset below gives an extra 20% performance increase | |||||
* when testing with 17ms RTT on a 1Gbps line, 80MB/s -> 100MB/s. | |||||
* | |||||
* So the question is why was this done and is it being triggered | |||||
* incorrectly? | |||||
*/ | |||||
TCP_PROBE5(receive__autoreset, NULL, tp, t, tp, th); | |||||
/* Reset receive buffer auto scaling when not in bulk receive mode. */ | /* Reset receive buffer auto scaling when not in bulk receive mode. */ | ||||
tp->rfbuf_ts = 0; | //tp->rfbuf_ts = 0; | ||||
tp->rfbuf_cnt = 0; | //tp->rfbuf_cnt = 0; | ||||
switch (tp->t_state) { | switch (tp->t_state) { | ||||
/* | /* | ||||
* If the state is SYN_RECEIVED: | * If the state is SYN_RECEIVED: | ||||
* if seg contains an ACK, but not for our SYN/ACK, send a RST. | * if seg contains an ACK, but not for our SYN/ACK, send a RST. | ||||
*/ | */ | ||||
case TCPS_SYN_RECEIVED: | case TCPS_SYN_RECEIVED: | ||||
▲ Show 20 Lines • Show All 1,527 Lines • ▼ Show 20 Lines | |||||
* and update averages and current timeout. | * and update averages and current timeout. | ||||
*/ | */ | ||||
void | void | ||||
tcp_xmit_timer(struct tcpcb *tp, int rtt) | tcp_xmit_timer(struct tcpcb *tp, int rtt) | ||||
{ | { | ||||
int delta; | int delta; | ||||
INP_WLOCK_ASSERT(tp->t_inpcb); | INP_WLOCK_ASSERT(tp->t_inpcb); | ||||
TCP_PROBE6(xmit__timer, NULL, tp, NULL, tp, NULL, rtt); | |||||
TCPSTAT_INC(tcps_rttupdated); | TCPSTAT_INC(tcps_rttupdated); | ||||
tp->t_rttupdated++; | tp->t_rttupdated++; | ||||
if ((tp->t_srtt != 0) && (tp->t_rxtshift <= TCP_RTT_INVALIDATE)) { | if ((tp->t_srtt != 0) && (tp->t_rxtshift <= TCP_RTT_INVALIDATE)) { | ||||
/* | /* | ||||
* srtt is stored as fixed point with 5 bits after the | * srtt is stored as fixed point with 5 bits after the | ||||
* binary point (i.e., scaled by 8). The following magic | * binary point (i.e., scaled by 8). The following magic | ||||
* is equivalent to the smoothing algorithm in rfc793 with | * is equivalent to the smoothing algorithm in rfc793 with | ||||
▲ Show 20 Lines • Show All 394 Lines • Show Last 20 Lines |
Note that t_srtt is in units of hz ticks scaled by TCP_RTT_SHIFT, and tcp_ts_getticks() returns (confusingly) "timestamp ticks" which are fixed at 1ms i.e. if hz != 1000, the units of left and right hand sides differ. You need to tweak one of the sides accordingly.