diff --git a/sys/netinet/tcp_stacks/rack.c b/sys/netinet/tcp_stacks/rack.c --- a/sys/netinet/tcp_stacks/rack.c +++ b/sys/netinet/tcp_stacks/rack.c @@ -16433,6 +16433,9 @@ return (0); } +#define TCP_LRO_TS_OPTION \ + ntohl((TCPOPT_NOP << 24) | (TCPOPT_NOP << 16) | \ + (TCPOPT_TIMESTAMP << 8) | TCPOLEN_TIMESTAMP) static int rack_do_segment_nounlock(struct tcpcb *tp, struct mbuf *m, struct tcphdr *th, @@ -16458,6 +16461,8 @@ struct tcp_rack *rack; struct rack_sendmap *rsm; int32_t prev_state = 0; + int no_output = 0; + int slot_remaining = 0; #ifdef TCP_ACCOUNTING int ack_val_set = 0xf; #endif @@ -16480,6 +16485,43 @@ */ rack_deferred_init(tp, rack); } + /* + * Check to see if we need to skip any output plans. This + * can happen in the non-LRO path where we are pacing and + * must process the ack coming in but need to defer sending + * anything becase a pacing timer is running. + */ + us_cts = tcp_tv_to_usectick(tv); + if ((rack->rc_always_pace == 1) && + (rack->r_ctl.rc_hpts_flags & PACE_PKT_OUTPUT) && + (TSTMP_LT(us_cts, rack->r_ctl.rc_last_output_to))) { + /* + * Ok conditions are right for queuing the packets + * but we do have to check the flags in the inp, it + * could be, if a sack is present, we want to be awoken and + * so should process the packets. + */ + slot_remaining = rack->r_ctl.rc_last_output_to - us_cts; + if (rack->rc_inp->inp_flags2 & INP_DONT_SACK_QUEUE) { + no_output = 1; + } else { + /* + * If there is no options, or just a + * timestamp option, we will want to queue + * the packets. This is the same that LRO does + * and will need to change with accurate ECN. + */ + uint32_t *ts_ptr; + int optlen; + + optlen = (th->th_off << 2) - sizeof(struct tcphdr); + ts_ptr = (uint32_t *)(th + 1); + if ((optlen == 0) || + ((optlen == TCPOLEN_TSTAMP_APPA) && + (*ts_ptr == TCP_LRO_TS_OPTION))) + no_output = 1; + } + } if (m->m_flags & M_ACKCMP) { /* * All compressed ack's are ack's by definition so @@ -16914,7 +16956,7 @@ } } #endif - if (nxt_pkt == 0) { + if ((nxt_pkt == 0) && (no_output == 0)) { if ((rack->r_wanted_output != 0) || (rack->r_fast_output != 0)) { do_output_now: if (tcp_output(tp) < 0) { @@ -16927,6 +16969,16 @@ } rack_start_hpts_timer(rack, tp, cts, 0, 0, 0); rack_free_trim(rack); + } else if ((no_output == 1) && + (nxt_pkt == 0) && + (tcp_in_hpts(rack->rc_inp) == 0)) { + /* + * We are not in hpts and we had a pacing timer up. Use + * the remaining time (slot_remaining) to restart the timer. + */ + KASSERT ((slot_remaining != 0), ("slot remaining is zero for rack:%p tp:%p", rack, tp)); + rack_start_hpts_timer(rack, tp, cts, slot_remaining, 0, 0); + rack_free_trim(rack); } /* Update any rounds needed */ if (rack_verbose_logging && tcp_bblogging_on(rack->rc_tp))