diff --git a/sys/netinet/tcp_stacks/rack.c b/sys/netinet/tcp_stacks/rack.c --- a/sys/netinet/tcp_stacks/rack.c +++ b/sys/netinet/tcp_stacks/rack.c @@ -6563,13 +6563,65 @@ rack->r_ctl.rc_snd_max_at_rto = tp->snd_max; } +static void +rack_convert_rtts(struct tcpcb *tp) +{ + if (tp->t_srtt > 1) { + uint32_t val, frac; + + val = tp->t_srtt >> TCP_RTT_SHIFT; + frac = tp->t_srtt & 0x1f; + tp->t_srtt = TICKS_2_USEC(val); + /* + * frac is the fractional part of the srtt (if any) + * but its in ticks and every bit represents + * 1/32nd of a hz. + */ + if (frac) { + if (hz == 1000) { + frac = (((uint64_t)frac * (uint64_t)HPTS_USEC_IN_MSEC) / (uint64_t)TCP_RTT_SCALE); + } else { + frac = (((uint64_t)frac * (uint64_t)HPTS_USEC_IN_SEC) / ((uint64_t)(hz) * (uint64_t)TCP_RTT_SCALE)); + } + tp->t_srtt += frac; + } + } + if (tp->t_rttvar) { + uint32_t val, frac; + + val = tp->t_rttvar >> TCP_RTTVAR_SHIFT; + frac = tp->t_rttvar & 0x1f; + tp->t_rttvar = TICKS_2_USEC(val); + /* + * frac is the fractional part of the srtt (if any) + * but its in ticks and every bit represents + * 1/32nd of a hz. + */ + if (frac) { + if (hz == 1000) { + frac = (((uint64_t)frac * (uint64_t)HPTS_USEC_IN_MSEC) / (uint64_t)TCP_RTT_SCALE); + } else { + frac = (((uint64_t)frac * (uint64_t)HPTS_USEC_IN_SEC) / ((uint64_t)(hz) * (uint64_t)TCP_RTT_SCALE)); + } + tp->t_rttvar += frac; + } + } + RACK_TCPT_RANGESET(tp->t_rxtcur, RACK_REXMTVAL(tp), + rack_rto_min, rack_rto_max); +} + static void rack_cc_conn_init(struct tcpcb *tp) { struct tcp_rack *rack; rack = (struct tcp_rack *)tp->t_fb_ptr; + cc_conn_init(tp); + /* + * Now convert to rack's internal format. + */ + rack_convert_rtts(tp); /* * We want a chance to stay in slowstart as * we create a connection. TCP spec says that @@ -11916,9 +11968,9 @@ static int rack_init_fsb(struct tcpcb *tp, struct tcp_rack *rack) { - /* - * Allocate the larger of spaces V6 if available else just - * V4 and include udphdr (overbook) + /* + * Allocate the larger of spaces V6 if available else just + * V4 and include udphdr (overbook) */ #ifdef INET6 rack->r_ctl.fsb.tcp_ip_hdr_len = sizeof(struct ip6_hdr) + sizeof(struct tcphdr) + sizeof(struct udphdr); @@ -12147,47 +12199,7 @@ * bit decimal so we have to carefully convert * these to get the full precision. */ - if (tp->t_srtt > 1) { - uint32_t val, frac; - - val = tp->t_srtt >> TCP_RTT_SHIFT; - frac = tp->t_srtt & 0x1f; - tp->t_srtt = TICKS_2_USEC(val); - /* - * frac is the fractional part of the srtt (if any) - * but its in ticks and every bit represents - * 1/32nd of a hz. - */ - if (frac) { - if (hz == 1000) { - frac = (((uint64_t)frac * (uint64_t)HPTS_USEC_IN_MSEC) / (uint64_t)TCP_RTT_SCALE); - } else { - frac = (((uint64_t)frac * (uint64_t)HPTS_USEC_IN_SEC) / ((uint64_t)(hz) * (uint64_t)TCP_RTT_SCALE)); - } - tp->t_srtt += frac; - } - } - if (tp->t_rttvar) { - uint32_t val, frac; - - val = tp->t_rttvar >> TCP_RTTVAR_SHIFT; - frac = tp->t_rttvar & 0x1f; - tp->t_rttvar = TICKS_2_USEC(val); - /* - * frac is the fractional part of the srtt (if any) - * but its in ticks and every bit represents - * 1/32nd of a hz. - */ - if (frac) { - if (hz == 1000) { - frac = (((uint64_t)frac * (uint64_t)HPTS_USEC_IN_MSEC) / (uint64_t)TCP_RTT_SCALE); - } else { - frac = (((uint64_t)frac * (uint64_t)HPTS_USEC_IN_SEC) / ((uint64_t)(hz) * (uint64_t)TCP_RTT_SCALE)); - } - tp->t_rttvar += frac; - } - } - tp->t_rxtcur = TICKS_2_USEC(tp->t_rxtcur); + rack_convert_rtts(tp); tp->t_rttlow = TICKS_2_USEC(tp->t_rttlow); if (rack_def_profile) rack_set_profile(rack, rack_def_profile); @@ -12230,7 +12242,7 @@ rack_stop_all_timers(tp); /* Lets setup the fsb block */ rack_start_hpts_timer(rack, tp, tcp_get_usecs(NULL), 0, 0, 0); - rack_log_rtt_shrinks(rack, us_cts, 0, + rack_log_rtt_shrinks(rack, us_cts, tp->t_rxtcur, __LINE__, RACK_RTTS_INIT); return (0); } diff --git a/sys/netinet/tcp_subr.c b/sys/netinet/tcp_subr.c --- a/sys/netinet/tcp_subr.c +++ b/sys/netinet/tcp_subr.c @@ -2309,62 +2309,6 @@ tp->t_fb->tfb_tcp_timer_stop_all(tp); } - /* - * If we got enough samples through the srtt filter, - * save the rtt and rttvar in the routing entry. - * 'Enough' is arbitrarily defined as 4 rtt samples. - * 4 samples is enough for the srtt filter to converge - * to within enough % of the correct value; fewer samples - * and we could save a bogus rtt. The danger is not high - * as tcp quickly recovers from everything. - * XXX: Works very well but needs some more statistics! - */ - if (tp->t_rttupdated >= 4) { - struct hc_metrics_lite metrics; - uint32_t ssthresh; - - bzero(&metrics, sizeof(metrics)); - /* - * Update the ssthresh always when the conditions below - * are satisfied. This gives us better new start value - * for the congestion avoidance for new connections. - * ssthresh is only set if packet loss occurred on a session. - * - * XXXRW: 'so' may be NULL here, and/or socket buffer may be - * being torn down. Ideally this code would not use 'so'. - */ - ssthresh = tp->snd_ssthresh; - if (ssthresh != 0 && ssthresh < so->so_snd.sb_hiwat / 2) { - /* - * convert the limit from user data bytes to - * packets then to packet data bytes. - */ - ssthresh = (ssthresh + tp->t_maxseg / 2) / tp->t_maxseg; - if (ssthresh < 2) - ssthresh = 2; - ssthresh *= (tp->t_maxseg + -#ifdef INET6 - (isipv6 ? sizeof (struct ip6_hdr) + - sizeof (struct tcphdr) : -#endif - sizeof (struct tcpiphdr) -#ifdef INET6 - ) -#endif - ); - } else - ssthresh = 0; - metrics.rmx_ssthresh = ssthresh; - - metrics.rmx_rtt = tp->t_srtt; - metrics.rmx_rttvar = tp->t_rttvar; - metrics.rmx_cwnd = tp->snd_cwnd; - metrics.rmx_sendpipe = 0; - metrics.rmx_recvpipe = 0; - - tcp_hc_update(&inp->inp_inc, &metrics); - } - /* free the reassembly queue, if any */ tcp_reass_flush(tp); @@ -2404,6 +2348,68 @@ TCPSTATES_DEC(tp->t_state); if (tp->t_fb->tfb_tcp_fb_fini) (*tp->t_fb->tfb_tcp_fb_fini)(tp, 1); + + /* + * If we got enough samples through the srtt filter, + * save the rtt and rttvar in the routing entry. + * 'Enough' is arbitrarily defined as 4 rtt samples. + * 4 samples is enough for the srtt filter to converge + * to within enough % of the correct value; fewer samples + * and we could save a bogus rtt. The danger is not high + * as tcp quickly recovers from everything. + * XXX: Works very well but needs some more statistics! + * + * XXXRRS: Updating must be after the stack fini() since + * that may be converting some internal representation of + * say srtt etc into the general one used by other stacks. + * Lets also at least protect against the so being NULL + * as RW stated below. + */ + if ((tp->t_rttupdated >= 4) && (so != NULL)) { + struct hc_metrics_lite metrics; + uint32_t ssthresh; + + bzero(&metrics, sizeof(metrics)); + /* + * Update the ssthresh always when the conditions below + * are satisfied. This gives us better new start value + * for the congestion avoidance for new connections. + * ssthresh is only set if packet loss occurred on a session. + * + * XXXRW: 'so' may be NULL here, and/or socket buffer may be + * being torn down. Ideally this code would not use 'so'. + */ + ssthresh = tp->snd_ssthresh; + if (ssthresh != 0 && ssthresh < so->so_snd.sb_hiwat / 2) { + /* + * convert the limit from user data bytes to + * packets then to packet data bytes. + */ + ssthresh = (ssthresh + tp->t_maxseg / 2) / tp->t_maxseg; + if (ssthresh < 2) + ssthresh = 2; + ssthresh *= (tp->t_maxseg + +#ifdef INET6 + (isipv6 ? sizeof (struct ip6_hdr) + + sizeof (struct tcphdr) : +#endif + sizeof (struct tcpiphdr) +#ifdef INET6 + ) +#endif + ); + } else + ssthresh = 0; + metrics.rmx_ssthresh = ssthresh; + + metrics.rmx_rtt = tp->t_srtt; + metrics.rmx_rttvar = tp->t_rttvar; + metrics.rmx_cwnd = tp->snd_cwnd; + metrics.rmx_sendpipe = 0; + metrics.rmx_recvpipe = 0; + + tcp_hc_update(&inp->inp_inc, &metrics); + } refcount_release(&tp->t_fb->tfb_refcnt); tp->t_inpcb = NULL; uma_zfree(V_tcpcb_zone, tp);