Index: sys/netinet/tcp_stacks/rack.c =================================================================== --- sys/netinet/tcp_stacks/rack.c +++ sys/netinet/tcp_stacks/rack.c @@ -6567,9 +6567,86 @@ rack_cc_conn_init(struct tcpcb *tp) { struct tcp_rack *rack; + struct hc_metrics_lite metrics; + u_int maxseg; + int rtt; rack = (struct tcp_rack *)tp->t_fb_ptr; - cc_conn_init(tp); + + maxseg = tcp_maxseg(tp); + if (tp->t_srtt == 0 && (rtt = metrics.rmx_rtt)) { + /* First pull it out of the HC */ + tp->t_srtt = rtt; + tp->t_rttbest = tp->t_srtt + TCP_RTT_SCALE; + TCPSTAT_INC(tcps_usedrtt); + if (metrics.rmx_rttvar) { + tp->t_rttvar = metrics.rmx_rttvar; + TCPSTAT_INC(tcps_usedrttvar); + } else { + /* default variation is +- 1 rtt */ + tp->t_rttvar = + tp->t_srtt * TCP_RTTVAR_SCALE / TCP_RTT_SCALE; + } + /* + * Now convert to rack's internal format. + */ + if (tp->t_srtt > 1) { + uint32_t val, frac; + + val = tp->t_srtt >> TCP_RTT_SHIFT; + frac = tp->t_srtt & 0x1f; + tp->t_srtt = TICKS_2_USEC(val); + /* + * frac is the fractional part of the srtt (if any) + * but its in ticks and every bit represents + * 1/32nd of a hz. + */ + if (frac) { + if (hz == 1000) { + frac = (((uint64_t)frac * (uint64_t)HPTS_USEC_IN_MSEC) / (uint64_t)TCP_RTT_SCALE); + } else { + frac = (((uint64_t)frac * (uint64_t)HPTS_USEC_IN_SEC) / ((uint64_t)(hz) * (uint64_t)TCP_RTT_SCALE)); + } + tp->t_srtt += frac; + } + } + if (tp->t_rttvar) { + uint32_t val, frac; + + val = tp->t_rttvar >> TCP_RTTVAR_SHIFT; + frac = tp->t_rttvar & 0x1f; + tp->t_rttvar = TICKS_2_USEC(val); + /* + * frac is the fractional part of the srtt (if any) + * but its in ticks and every bit represents + * 1/32nd of a hz. + */ + if (frac) { + if (hz == 1000) { + frac = (((uint64_t)frac * (uint64_t)HPTS_USEC_IN_MSEC) / (uint64_t)TCP_RTT_SCALE); + } else { + frac = (((uint64_t)frac * (uint64_t)HPTS_USEC_IN_SEC) / ((uint64_t)(hz) * (uint64_t)TCP_RTT_SCALE)); + } + tp->t_rttvar += frac; + } + } + RACK_TCPT_RANGESET(tp->t_rxtcur, RACK_REXMTVAL(tp), + rack_rto_min, rack_rto_max); + } + /* + * Set the initial slow-start flight size. + * + * If a SYN or SYN/ACK was lost and retransmitted, we have to + * reduce the initial CWND to one segment as congestion is likely + * requiring us to be cautious. + */ + if (tp->snd_cwnd == 1) + tp->snd_cwnd = maxseg; /* SYN(-ACK) lost */ + else + tp->snd_cwnd = tcp_compute_initwnd(maxseg); + + if (CC_ALGO(tp)->conn_init != NULL) + CC_ALGO(tp)->conn_init(tp->ccv); /* * We want a chance to stay in slowstart as * we create a connection. TCP spec says that @@ -12187,7 +12264,8 @@ tp->t_rttvar += frac; } } - tp->t_rxtcur = TICKS_2_USEC(tp->t_rxtcur); + RACK_TCPT_RANGESET(tp->t_rxtcur, RACK_REXMTVAL(tp), + rack_rto_min, rack_rto_max); tp->t_rttlow = TICKS_2_USEC(tp->t_rttlow); if (rack_def_profile) rack_set_profile(rack, rack_def_profile); @@ -12230,7 +12308,7 @@ rack_stop_all_timers(tp); /* Lets setup the fsb block */ rack_start_hpts_timer(rack, tp, tcp_get_usecs(NULL), 0, 0, 0); - rack_log_rtt_shrinks(rack, us_cts, 0, + rack_log_rtt_shrinks(rack, us_cts, tp->t_rxtcur, __LINE__, RACK_RTTS_INIT); return (0); } Index: sys/netinet/tcp_subr.c =================================================================== --- sys/netinet/tcp_subr.c +++ sys/netinet/tcp_subr.c @@ -2309,62 +2309,6 @@ tp->t_fb->tfb_tcp_timer_stop_all(tp); } - /* - * If we got enough samples through the srtt filter, - * save the rtt and rttvar in the routing entry. - * 'Enough' is arbitrarily defined as 4 rtt samples. - * 4 samples is enough for the srtt filter to converge - * to within enough % of the correct value; fewer samples - * and we could save a bogus rtt. The danger is not high - * as tcp quickly recovers from everything. - * XXX: Works very well but needs some more statistics! - */ - if (tp->t_rttupdated >= 4) { - struct hc_metrics_lite metrics; - uint32_t ssthresh; - - bzero(&metrics, sizeof(metrics)); - /* - * Update the ssthresh always when the conditions below - * are satisfied. This gives us better new start value - * for the congestion avoidance for new connections. - * ssthresh is only set if packet loss occurred on a session. - * - * XXXRW: 'so' may be NULL here, and/or socket buffer may be - * being torn down. Ideally this code would not use 'so'. - */ - ssthresh = tp->snd_ssthresh; - if (ssthresh != 0 && ssthresh < so->so_snd.sb_hiwat / 2) { - /* - * convert the limit from user data bytes to - * packets then to packet data bytes. - */ - ssthresh = (ssthresh + tp->t_maxseg / 2) / tp->t_maxseg; - if (ssthresh < 2) - ssthresh = 2; - ssthresh *= (tp->t_maxseg + -#ifdef INET6 - (isipv6 ? sizeof (struct ip6_hdr) + - sizeof (struct tcphdr) : -#endif - sizeof (struct tcpiphdr) -#ifdef INET6 - ) -#endif - ); - } else - ssthresh = 0; - metrics.rmx_ssthresh = ssthresh; - - metrics.rmx_rtt = tp->t_srtt; - metrics.rmx_rttvar = tp->t_rttvar; - metrics.rmx_cwnd = tp->snd_cwnd; - metrics.rmx_sendpipe = 0; - metrics.rmx_recvpipe = 0; - - tcp_hc_update(&inp->inp_inc, &metrics); - } - /* free the reassembly queue, if any */ tcp_reass_flush(tp); @@ -2404,6 +2348,68 @@ TCPSTATES_DEC(tp->t_state); if (tp->t_fb->tfb_tcp_fb_fini) (*tp->t_fb->tfb_tcp_fb_fini)(tp, 1); + + /* + * If we got enough samples through the srtt filter, + * save the rtt and rttvar in the routing entry. + * 'Enough' is arbitrarily defined as 4 rtt samples. + * 4 samples is enough for the srtt filter to converge + * to within enough % of the correct value; fewer samples + * and we could save a bogus rtt. The danger is not high + * as tcp quickly recovers from everything. + * XXX: Works very well but needs some more statistics! + * + * XXXRRS: Updating must be after the stack fini() since + * that may be converting some internal representation of + * say srtt etc into the general one used by other stacks. + * Lets also at least protect against the so being NULL + * as RW stated below. + */ + if ((tp->t_rttupdated >= 4) && (so != NULL)) { + struct hc_metrics_lite metrics; + uint32_t ssthresh; + + bzero(&metrics, sizeof(metrics)); + /* + * Update the ssthresh always when the conditions below + * are satisfied. This gives us better new start value + * for the congestion avoidance for new connections. + * ssthresh is only set if packet loss occurred on a session. + * + * XXXRW: 'so' may be NULL here, and/or socket buffer may be + * being torn down. Ideally this code would not use 'so'. + */ + ssthresh = tp->snd_ssthresh; + if (ssthresh != 0 && ssthresh < so->so_snd.sb_hiwat / 2) { + /* + * convert the limit from user data bytes to + * packets then to packet data bytes. + */ + ssthresh = (ssthresh + tp->t_maxseg / 2) / tp->t_maxseg; + if (ssthresh < 2) + ssthresh = 2; + ssthresh *= (tp->t_maxseg + +#ifdef INET6 + (isipv6 ? sizeof (struct ip6_hdr) + + sizeof (struct tcphdr) : +#endif + sizeof (struct tcpiphdr) +#ifdef INET6 + ) +#endif + ); + } else + ssthresh = 0; + metrics.rmx_ssthresh = ssthresh; + + metrics.rmx_rtt = tp->t_srtt; + metrics.rmx_rttvar = tp->t_rttvar; + metrics.rmx_cwnd = tp->snd_cwnd; + metrics.rmx_sendpipe = 0; + metrics.rmx_recvpipe = 0; + + tcp_hc_update(&inp->inp_inc, &metrics); + } refcount_release(&tp->t_fb->tfb_refcnt); tp->t_inpcb = NULL; uma_zfree(V_tcpcb_zone, tp);