diff --git a/sys/netinet/tcp_stacks/rack.c b/sys/netinet/tcp_stacks/rack.c --- a/sys/netinet/tcp_stacks/rack.c +++ b/sys/netinet/tcp_stacks/rack.c @@ -5910,9 +5910,6 @@ */ struct rack_sendmap *rsm; - if (tp->tt_flags & TT_STOPPED) { - return (1); - } counter_u64_add(rack_to_tot, 1); if (rack->r_state && (rack->r_state != tp->t_state)) rack_set_state(tp, rack); @@ -6123,9 +6120,6 @@ uint32_t out, avail; int collapsed_win = 0; - if (tp->tt_flags & TT_STOPPED) { - return (1); - } if (TSTMP_LT(cts, rack->r_ctl.rc_timer_exp)) { /* Its not time yet */ return (0); @@ -6312,9 +6306,7 @@ static int rack_timeout_delack(struct tcpcb *tp, struct tcp_rack *rack, uint32_t cts) { - if (tp->tt_flags & TT_STOPPED) { - return (1); - } + rack_log_to_event(rack, RACK_TO_FRM_DELACK, NULL); tp->t_flags &= ~TF_DELACK; tp->t_flags |= TF_ACKNOW; @@ -6337,9 +6329,6 @@ struct tcptemp *t_template; int32_t retval = 1; - if (tp->tt_flags & TT_STOPPED) { - return (1); - } if (rack->rc_in_persist == 0) return (0); if (ctf_progress_timeout_check(tp, false)) { @@ -6425,9 +6414,6 @@ struct tcptemp *t_template; struct inpcb *inp = tptoinpcb(tp); - if (tp->tt_flags & TT_STOPPED) { - return (1); - } rack->r_ctl.rc_hpts_flags &= ~PACE_TMR_KEEP; rack_log_to_event(rack, RACK_TO_FRM_KEEP, NULL); /* @@ -6654,9 +6640,6 @@ int32_t retval = 0; bool isipv6; - if (tp->tt_flags & TT_STOPPED) { - return (1); - } if ((tp->t_flags & TF_GPUTINPROG) && (tp->t_rxtshift)) { /* diff --git a/sys/netinet/tcp_subr.c b/sys/netinet/tcp_subr.c --- a/sys/netinet/tcp_subr.c +++ b/sys/netinet/tcp_subr.c @@ -2228,11 +2228,14 @@ V_tcp_mssdflt; /* Set up our timeouts. */ - callout_init(&tp->tt_rexmt, 1); - callout_init(&tp->tt_persist, 1); - callout_init(&tp->tt_keep, 1); - callout_init(&tp->tt_2msl, 1); - callout_init(&tp->tt_delack, 1); +#define CALLOUT_INIT(tt_) \ + callout_init_rw(&tp->tt_, &inp->inp_lock, CALLOUT_RETURNUNLOCKED) + CALLOUT_INIT(tt_rexmt); + CALLOUT_INIT(tt_persist); + CALLOUT_INIT(tt_keep); + CALLOUT_INIT(tt_2msl); + CALLOUT_INIT(tt_delack); +#undef CALLOUT_INIT switch (V_tcp_do_rfc1323) { case 0: @@ -2301,13 +2304,6 @@ if (V_tcp_do_lrd) tp->t_flags |= TF_LRD; - /* - * XXXGL: this self-reference might be pointless. It will go away - * when the TCP timers are properly locked and could never fire after - * tcp_discardcb(). - */ - in_pcbref(inp); - return (tp); } @@ -2341,18 +2337,16 @@ tcp_discardcb(struct tcpcb *tp) { struct inpcb *inp = tptoinpcb(tp); + struct socket *so = tptosocket(tp); +#ifdef INET6 + bool isipv6 = (inp->inp_vflag & INP_IPV6) != 0; +#endif INP_WLOCK_ASSERT(inp); - /* - * Make sure that all of our timers are stopped before we delete the - * PCB. - * - * If stopping a timer fails, we schedule a discard function in same - * callout, and the last discard function called will take care of - * deleting the tcpcb. - */ - tp->tt_draincnt = 0; +#ifdef INVARIANTS + tp->t_flags |= TF_DYING; +#endif tcp_timer_stop(tp, TT_REXMT); tcp_timer_stop(tp, TT_PERSIST); tcp_timer_stop(tp, TT_KEEP); @@ -2402,23 +2396,7 @@ #endif CC_ALGO(tp) = NULL; - if (tp->tt_draincnt == 0) - tcp_freecb(tp); -} -bool -tcp_freecb(struct tcpcb *tp) -{ - struct inpcb *inp = tptoinpcb(tp); - struct socket *so = tptosocket(tp); -#ifdef INET6 - bool isipv6 = (inp->inp_vflag & INP_IPV6) != 0; -#endif - - INP_WLOCK_ASSERT(inp); - MPASS(tp->tt_draincnt == 0); - - /* We own the last reference on tcpcb, let's free it. */ #ifdef TCP_BLACKBOX tcp_log_tcpcbfini(tp); #endif @@ -2489,8 +2467,6 @@ } refcount_release(&tp->t_fb->tfb_refcnt); - - return (in_pcbrele_wlocked(inp)); } /* diff --git a/sys/netinet/tcp_timer.h b/sys/netinet/tcp_timer.h --- a/sys/netinet/tcp_timer.h +++ b/sys/netinet/tcp_timer.h @@ -145,15 +145,13 @@ #ifdef _KERNEL -/* - * Flags for the tcpcb's tt_flags field. - */ -#define TT_DELACK 0x0001 -#define TT_REXMT 0x0002 -#define TT_PERSIST 0x0004 -#define TT_KEEP 0x0008 -#define TT_2MSL 0x0010 -#define TT_MASK (TT_DELACK|TT_REXMT|TT_PERSIST|TT_KEEP|TT_2MSL) +typedef enum { + TT_DELACK = 0, + TT_REXMT, + TT_PERSIST, + TT_KEEP, + TT_2MSL, +} tt_which; /* * Suspend flags - used when suspending a timer @@ -165,8 +163,6 @@ #define TT_KEEP_SUS 0x0800 #define TT_2MSL_SUS 0x1000 -#define TT_STOPPED 0x00010000 - #define TP_KEEPINIT(tp) ((tp)->t_keepinit ? (tp)->t_keepinit : tcp_keepinit) #define TP_KEEPIDLE(tp) ((tp)->t_keepidle ? (tp)->t_keepidle : tcp_keepidle) #define TP_KEEPINTVL(tp) ((tp)->t_keepintvl ? (tp)->t_keepintvl : tcp_keepintvl) diff --git a/sys/netinet/tcp_timer.c b/sys/netinet/tcp_timer.c --- a/sys/netinet/tcp_timer.c +++ b/sys/netinet/tcp_timer.c @@ -254,21 +254,9 @@ struct inpcb *inp = tptoinpcb(tp); #endif - INP_WLOCK(inp); - CURVNET_SET(inp->inp_vnet); + INP_WLOCK_ASSERT(inp); - if (callout_pending(&tp->tt_delack) || - !callout_active(&tp->tt_delack)) { - INP_WUNLOCK(inp); - CURVNET_RESTORE(); - return; - } - callout_deactivate(&tp->tt_delack); - if ((inp->inp_flags & INP_DROPPED) != 0) { - INP_WUNLOCK(inp); - CURVNET_RESTORE(); - return; - } + CURVNET_SET(inp->inp_vnet); tp->t_flags |= TF_ACKNOW; TCPSTAT_INC(tcps_delack); NET_EPOCH_ENTER(et); @@ -277,24 +265,6 @@ CURVNET_RESTORE(); } -/* - * Call tcp_close() from a callout context. - */ -static void -tcp_timer_close(struct tcpcb *tp) -{ - struct epoch_tracker et; - struct inpcb *inp = tptoinpcb(tp); - - INP_WLOCK_ASSERT(inp); - - NET_EPOCH_ENTER(et); - tp = tcp_close(tp); - NET_EPOCH_EXIT(et); - if (tp != NULL) - INP_WUNLOCK(inp); -} - /* * Call tcp_drop() from a callout context. */ @@ -318,31 +288,18 @@ { struct tcpcb *tp = xtp; struct inpcb *inp = tptoinpcb(tp); + bool close = false; #ifdef TCPDEBUG int ostate; ostate = tp->t_state; #endif - INP_WLOCK(inp); - CURVNET_SET(inp->inp_vnet); + INP_WLOCK_ASSERT(inp); + CURVNET_SET(inp->inp_vnet); tcp_log_end_status(tp, TCP_EI_STATUS_2MSL); tcp_free_sackholes(tp); - if (callout_pending(&tp->tt_2msl) || - !callout_active(&tp->tt_2msl)) { - INP_WUNLOCK(inp); - CURVNET_RESTORE(); - return; - } - callout_deactivate(&tp->tt_2msl); - if (inp->inp_flags & INP_DROPPED) { - INP_WUNLOCK(inp); - CURVNET_RESTORE(); - return; - } - KASSERT((tp->tt_flags & TT_STOPPED) == 0, - ("%s: tp %p tcpcb can't be stopped here", __func__, tp)); /* * 2 MSL timeout in shutdown went off. If we're closed but * still waiting for peer to close and connection has been idle @@ -355,26 +312,19 @@ * * XXXGL: check if inp_socket shall always be !NULL here? */ - if (tp->t_state == TCPS_TIME_WAIT) { - tcp_timer_close(tp); - CURVNET_RESTORE(); - return; - } else if (tp->t_state == TCPS_FIN_WAIT_2 && + if (tp->t_state == TCPS_TIME_WAIT) + close = true; + else if (tp->t_state == TCPS_FIN_WAIT_2 && tcp_fast_finwait2_recycle && inp->inp_socket && (inp->inp_socket->so_rcv.sb_state & SBS_CANTRCVMORE)) { TCPSTAT_INC(tcps_finwait2_drops); - tcp_timer_close(tp); - CURVNET_RESTORE(); - return; + close = true; } else { if (ticks - tp->t_rcvtime <= TP_MAXIDLE(tp)) { callout_reset(&tp->tt_2msl, TP_KEEPINTVL(tp), tcp_timer_2msl, tp); - } else { - tcp_timer_close(tp); - CURVNET_RESTORE(); - return; - } + } else + close = true; } #ifdef TCPDEBUG @@ -384,7 +334,15 @@ #endif TCP_PROBE2(debug__user, tp, PRU_SLOWTIMO); - INP_WUNLOCK(inp); + if (close) { + struct epoch_tracker et; + + NET_EPOCH_ENTER(et); + tp = tcp_close(tp); + NET_EPOCH_EXIT(et); + } + if (tp != NULL) + INP_WUNLOCK(inp); CURVNET_RESTORE(); } @@ -401,24 +359,9 @@ ostate = tp->t_state; #endif - INP_WLOCK(inp); - CURVNET_SET(inp->inp_vnet); - - if (callout_pending(&tp->tt_keep) || - !callout_active(&tp->tt_keep)) { - INP_WUNLOCK(inp); - CURVNET_RESTORE(); - return; - } - callout_deactivate(&tp->tt_keep); - if (inp->inp_flags & INP_DROPPED) { - INP_WUNLOCK(inp); - CURVNET_RESTORE(); - return; - } - KASSERT((tp->tt_flags & TT_STOPPED) == 0, - ("%s: tp %p tcpcb can't be stopped here", __func__, tp)); + INP_WLOCK_ASSERT(inp); + CURVNET_SET(inp->inp_vnet); /* * Because we don't regularly reset the keepalive callout in * the ESTABLISHED state, it may be that we don't actually need @@ -547,23 +490,9 @@ ostate = tp->t_state; #endif - INP_WLOCK(inp); - CURVNET_SET(inp->inp_vnet); + INP_WLOCK_ASSERT(inp); - if (callout_pending(&tp->tt_persist) || - !callout_active(&tp->tt_persist)) { - INP_WUNLOCK(inp); - CURVNET_RESTORE(); - return; - } - callout_deactivate(&tp->tt_persist); - if (inp->inp_flags & INP_DROPPED) { - INP_WUNLOCK(inp); - CURVNET_RESTORE(); - return; - } - KASSERT((tp->tt_flags & TT_STOPPED) == 0, - ("%s: tp %p tcpcb can't be stopped here", __func__, tp)); + CURVNET_SET(inp->inp_vnet); /* * Persistence timer into zero window. * Force a byte to be output, if possible. @@ -631,23 +560,9 @@ ostate = tp->t_state; #endif - INP_WLOCK(inp); - CURVNET_SET(inp->inp_vnet); + INP_WLOCK_ASSERT(inp); - if (callout_pending(&tp->tt_rexmt) || - !callout_active(&tp->tt_rexmt)) { - INP_WUNLOCK(inp); - CURVNET_RESTORE(); - return; - } - callout_deactivate(&tp->tt_rexmt); - if (inp->inp_flags & INP_DROPPED) { - INP_WUNLOCK(inp); - CURVNET_RESTORE(); - return; - } - KASSERT((tp->tt_flags & TT_STOPPED) == 0, - ("%s: tp %p tcpcb can't be stopped here", __func__, tp)); + CURVNET_SET(inp->inp_vnet); tcp_free_sackholes(tp); TCP_LOG_EVENT(tp, NULL, NULL, NULL, TCP_LOG_RTO, 0, 0, NULL, false); if (tp->t_fb->tfb_tcp_rexmit_tmr) { @@ -900,21 +815,20 @@ } void -tcp_timer_activate(struct tcpcb *tp, uint32_t timer_type, u_int delta) +tcp_timer_activate(struct tcpcb *tp, tt_which timer_type, u_int delta) { struct callout *t_callout; callout_func_t *f_callout; struct inpcb *inp = tptoinpcb(tp); int cpu = inp_to_cpuid(inp); + INP_WLOCK_ASSERT(inp); + #ifdef TCP_OFFLOAD if (tp->t_flags & TF_TOE) return; #endif - if (tp->tt_flags & TT_STOPPED) - return; - switch (timer_type) { case TT_DELACK: t_callout = &tp->tt_delack; @@ -951,7 +865,7 @@ } int -tcp_timer_active(struct tcpcb *tp, uint32_t timer_type) +tcp_timer_active(struct tcpcb *tp, tt_which timer_type) { struct callout *t_callout; @@ -1088,32 +1002,15 @@ } } -static void -tcp_timer_discard(void *ptp) -{ - struct epoch_tracker et; - struct tcpcb *tp = (struct tcpcb *)ptp; - struct inpcb *inp = tptoinpcb(tp); - - INP_WLOCK(inp); - CURVNET_SET(inp->inp_vnet); - NET_EPOCH_ENTER(et); - - KASSERT((tp->tt_flags & TT_STOPPED) != 0, - ("%s: tcpcb has to be stopped here", __func__)); - if (--tp->tt_draincnt > 0 || - tcp_freecb(tp) == false) - INP_WUNLOCK(inp); - NET_EPOCH_EXIT(et); - CURVNET_RESTORE(); -} - void -tcp_timer_stop(struct tcpcb *tp, uint32_t timer_type) +tcp_timer_stop(struct tcpcb *tp, tt_which timer_type) { struct callout *t_callout; + bool dying __diagused; + int stopped __diagused; + + INP_WLOCK_ASSERT(tptoinpcb(tp)); - tp->tt_flags |= TT_STOPPED; switch (timer_type) { case TT_DELACK: t_callout = &tp->tt_delack; @@ -1129,6 +1026,9 @@ break; case TT_2MSL: t_callout = &tp->tt_2msl; +#ifdef INVARIANTS + dying = tp->t_flags & TF_DYING; +#endif break; default: if (tp->t_fb->tfb_tcp_timer_stop) { @@ -1140,14 +1040,14 @@ return; } panic("tp %p bad timer_type %#x", tp, timer_type); - } - - if (callout_async_drain(t_callout, tcp_timer_discard) == 0) { - /* - * Can't stop the callout, defer tcpcb actual deletion - * to the last one. We do this using the async drain - * function and incrementing the count in - */ - tp->tt_draincnt++; } + + stopped = callout_stop(t_callout); + /* + * The only legit case to fail to stop the callout is that we are + * already in the callout. This happens only when connection is + * closed via 2MSL timeout. + */ + KASSERT((timer_type == TT_2MSL && dying) || stopped != 0, + ("%s: failed to stop %u at %p", __func__, timer_type, tp)); } diff --git a/sys/netinet/tcp_var.h b/sys/netinet/tcp_var.h --- a/sys/netinet/tcp_var.h +++ b/sys/netinet/tcp_var.h @@ -146,7 +146,6 @@ struct callout tt_2msl; /* 2*msl TIME_WAIT timer */ struct callout tt_delack; /* delayed ACK timer */ uint32_t tt_flags; /* Timers flags */ - uint32_t tt_draincnt; /* Count being drained */ uint32_t t_maxseg:24, /* maximum segment size */ t_logstate:8; /* State of "black box" logging */ @@ -548,7 +547,7 @@ #define TF_TSO 0x01000000 /* TSO enabled on this connection */ #define TF_TOE 0x02000000 /* this connection is offloaded */ #define TF_CLOSED 0x04000000 /* close(2) called on socket */ -#define TF_UNUSED1 0x08000000 /* unused */ +#define TF_DYING 0x08000000 /* used with INVARIANTS only */ #define TF_LRD 0x10000000 /* Lost Retransmission Detection */ #define TF_CONGRECOVERY 0x20000000 /* congestion recovery mode */ #define TF_WASCRECOVERY 0x40000000 /* was in congestion recovery */ @@ -1083,7 +1082,6 @@ struct tcpcb * tcp_close(struct tcpcb *); void tcp_discardcb(struct tcpcb *); -bool tcp_freecb(struct tcpcb *); void tcp_twstart(struct tcpcb *); int tcp_ctloutput(struct socket *, struct sockopt *); void tcp_fini(void *);