Index: sys/netinet/tcp_hpts.c =================================================================== --- sys/netinet/tcp_hpts.c +++ sys/netinet/tcp_hpts.c @@ -1330,6 +1330,17 @@ kern_prefetch(tp->t_fb_ptr, &did_prefetch); did_prefetch = 1; } + /* + * We set inp_hpts_calls to 1 before any possible output. + * The contract with the transport is that if it cares about + * hpts calling it should clear the flag. That way next time + * it is called it will know it is hpts. + * + * We also only call tfb_do_queued_segments() tcp_output() + * it is expected that if segments are queued and come in that + * the final input mbuf will cause a call to output if it is needed. + */ + inp->inp_hpts_calls = 1; if ((inp->inp_flags2 & INP_SUPPORTS_MBUFQ) && !STAILQ_EMPTY(&tp->t_inqueue)) { error = (*tp->t_fb->tfb_do_queued_segments)(tp, 0); @@ -1337,12 +1348,11 @@ /* The input killed the connection */ goto skip_pacing; } + } else { + error = tcp_output(tp); + if (error < 0) + goto skip_pacing; } - inp->inp_hpts_calls = 1; - error = tcp_output(tp); - if (error < 0) - goto skip_pacing; - inp->inp_hpts_calls = 0; if (ninp) { /* * If we have a nxt inp, see if we can Index: sys/netinet/tcp_stacks/bbr.c =================================================================== --- sys/netinet/tcp_stacks/bbr.c +++ sys/netinet/tcp_stacks/bbr.c @@ -11581,6 +11581,9 @@ /* Do we have the correct timer running? */ bbr_timer_audit(tp, bbr, lcts, &so->so_snd); } + /* Clear the flag, it may have been cleared by output but we may not have */ + if ((nxt_pkt == 0) && (inp->inp_hpts_calls)) + inp->inp_hpts_calls = 0; /* Do we have a new state */ if (bbr->r_state != tp->t_state) bbr_set_state(tp, bbr, tiwin); @@ -11850,6 +11853,8 @@ memcpy(&bbr->rc_tv, tv, sizeof(struct timeval)); cts = tcp_tv_to_usectick(&bbr->rc_tv); inp = bbr->rc_inp; + hpts_calling = inp->inp_hpts_calls; + inp->inp_hpts_calls = 0; so = inp->inp_socket; sb = &so->so_snd; if (tp->t_nic_ktls_xmit) @@ -11985,7 +11990,7 @@ merged_val = bbr->rc_pacer_started; merged_val <<= 32; merged_val |= bbr->r_ctl.rc_last_delay_val; - bbr_log_pacing_delay_calc(bbr, inp->inp_hpts_calls, + bbr_log_pacing_delay_calc(bbr, hpts_calling, bbr->r_ctl.rc_agg_early, cts, delay_calc, merged_val, bbr->r_agg_early_set, 3); bbr->r_ctl.rc_last_delay_val = 0; @@ -12028,8 +12033,6 @@ * c) We had room to send something. * */ - hpts_calling = inp->inp_hpts_calls; - inp->inp_hpts_calls = 0; if (bbr->r_ctl.rc_hpts_flags & PACE_TMR_MASK) { int retval; Index: sys/netinet/tcp_stacks/rack.c =================================================================== --- sys/netinet/tcp_stacks/rack.c +++ sys/netinet/tcp_stacks/rack.c @@ -16427,6 +16427,8 @@ } did_out = 1; } + if (rack->rc_inp->inp_hpts_calls) + rack->rc_inp->inp_hpts_calls = 0; rack_free_trim(rack); #ifdef TCP_ACCOUNTING sched_unpin(); @@ -16525,6 +16527,17 @@ (*ts_ptr == TCP_LRO_TS_OPTION))) no_output = 1; } + if ((no_output == 1) && (slot_remaining < tcp_min_hptsi_time)) { + /* + * It is unrealistic to think we can pace in less than + * the minimum granularity of the pacer (def:250usec). So + * if we have less than that time remaining we should go + * ahead and allow output to be "early". We will attempt to + * make up for it in any pacing time we try to apply on + * the outbound packet. + */ + no_output = 0; + } } if (m->m_flags & M_ACKCMP) { /* @@ -16984,6 +16997,9 @@ rack_start_hpts_timer(rack, tp, cts, slot_remaining, 0, 0); rack_free_trim(rack); } + /* Clear the flag, it may have been cleared by output but we may not have */ + if ((nxt_pkt == 0) && (inp->inp_hpts_calls)) + inp->inp_hpts_calls = 0; /* Update any rounds needed */ if (rack_verbose_logging && tcp_bblogging_on(rack->rc_tp)) rack_log_hystart_event(rack, high_seq, 8); @@ -19637,6 +19653,7 @@ ts_val = get_cyclecount(); #endif hpts_calling = inp->inp_hpts_calls; + rack->rc_inp->inp_hpts_calls = 0; #ifdef TCP_OFFLOAD if (tp->t_flags & TF_TOE) { #ifdef TCP_ACCOUNTING @@ -19759,7 +19776,6 @@ counter_u64_add(rack_out_size[TCP_MSS_ACCT_INPACE], 1); return (0); } - rack->rc_inp->inp_hpts_calls = 0; /* Finish out both pacing early and late accounting */ if ((rack->r_ctl.rc_hpts_flags & PACE_PKT_OUTPUT) && TSTMP_GT(rack->r_ctl.rc_last_output_to, cts)) { @@ -19876,7 +19892,7 @@ while (rack->rc_free_cnt < rack_free_cache) { rsm = rack_alloc(rack); if (rsm == NULL) { - if (inp->inp_hpts_calls) + if (hpts_calling) /* Retry in a ms */ slot = (1 * HPTS_USEC_IN_MSEC); so = inp->inp_socket; @@ -19887,8 +19903,6 @@ rack->rc_free_cnt++; rsm = NULL; } - if (inp->inp_hpts_calls) - inp->inp_hpts_calls = 0; sack_rxmit = 0; len = 0; rsm = NULL;