diff --git a/sys/netinet/tcp_subr.c b/sys/netinet/tcp_subr.c --- a/sys/netinet/tcp_subr.c +++ b/sys/netinet/tcp_subr.c @@ -2309,7 +2309,8 @@ tp->t_hpts_cpu = HPTS_CPU_NONE; tp->t_lro_cpu = HPTS_CPU_NONE; - callout_init_rw(&tp->t_callout, &inp->inp_lock, CALLOUT_RETURNUNLOCKED); + callout_init_rw(&tp->t_callout, &inp->inp_lock, + CALLOUT_TRYLOCK | CALLOUT_RETURNUNLOCKED); for (int i = 0; i < TT_N; i++) tp->t_timers[i] = SBT_MAX; diff --git a/sys/netinet/tcp_timer.c b/sys/netinet/tcp_timer.c --- a/sys/netinet/tcp_timer.c +++ b/sys/netinet/tcp_timer.c @@ -949,35 +949,26 @@ /* * Stop all timers associated with tcpcb. - * * Called when tcpcb moves to TCPS_CLOSED. - * - * XXXGL: unfortunately our callout(9) is not able to fully stop a locked - * callout even when only two threads are involved: the callout itself and the - * thread that does callout_stop(). See where softclock_call_cc() swaps the - * callwheel lock to callout lock and then checks cc_exec_cancel(). This is - * the race window. If it happens, the tcp_timer_enter() won't be executed, - * however pcb lock will be locked and released, hence we can't free memory. - * Until callout(9) is improved, just keep retrying. In my profiling I've seen - * such event happening less than 1 time per hour with 20-30 Gbit/s of traffic. */ void tcp_timer_stop(struct tcpcb *tp) { - struct inpcb *inp = tptoinpcb(tp); - INP_WLOCK_ASSERT(inp); - - if (curthread->td_pflags & TDP_INTCPCALLOUT) { - int stopped __diagused; + INP_WLOCK_ASSERT(tptoinpcb(tp)); - stopped = callout_stop(&tp->t_callout); - MPASS(stopped == 0); - for (tt_which i = 0; i < TT_N; i++) - tp->t_timers[i] = SBT_MAX; - } else while(__predict_false(callout_stop(&tp->t_callout) == 0)) { - INP_WUNLOCK(inp); - kern_yield(PRI_UNCHANGED); - INP_WLOCK(inp); - } + /* + * We don't check return value from callout_stop(). There are two + * reasons why it can return 0. First, a legitimate one: we could have + * been called from the callout itself. Second, callout(9) has a bug. + * It can race internally in softclock_call_cc(), when callout has + * already completed, but cc_exec_curr still points at the callout. + */ + (void )callout_stop(&tp->t_callout); + /* + * In case of being called from callout itself, we must make sure that + * we don't reschedule. + */ + for (tt_which i = 0; i < TT_N; i++) + tp->t_timers[i] = SBT_MAX; }