Index: head/sys/netinet/tcp_timer.h =================================================================== --- head/sys/netinet/tcp_timer.h +++ head/sys/netinet/tcp_timer.h @@ -191,6 +191,9 @@ extern int tcp_finwait2_timeout; extern int tcp_fast_finwait2_recycle; +int tcp_inpinfo_lock_add(struct inpcb *inp); +void tcp_inpinfo_lock_del(struct inpcb *inp, struct tcpcb *tp); + void tcp_timer_init(void); void tcp_timer_2msl(void *xtp); void tcp_timer_discard(void *); Index: head/sys/netinet/tcp_timer.c =================================================================== --- head/sys/netinet/tcp_timer.c +++ head/sys/netinet/tcp_timer.c @@ -294,11 +294,6 @@ CURVNET_RESTORE(); return; } - KASSERT((tp->t_timers->tt_flags & TT_STOPPED) == 0, - ("%s: tp %p tcpcb can't be stopped here", __func__, tp)); - KASSERT((tp->t_timers->tt_flags & TT_DELACK) != 0, - ("%s: tp %p delack callout should be running", __func__, tp)); - tp->t_flags |= TF_ACKNOW; TCPSTAT_INC(tcps_delack); (void) tp->t_fb->tfb_tcp_output(tp); @@ -306,6 +301,39 @@ CURVNET_RESTORE(); } +int +tcp_inpinfo_lock_add(struct inpcb *inp) +{ + in_pcbref(inp); + INP_WUNLOCK(inp); + INP_INFO_RLOCK(&V_tcbinfo); + INP_WLOCK(inp); + if (inp->inp_flags & (INP_TIMEWAIT | INP_DROPPED)) { + return(1); + } + return(0); + +} + +void +tcp_inpinfo_lock_del(struct inpcb *inp, struct tcpcb *tp) +{ + INP_INFO_RUNLOCK(&V_tcbinfo); + if (inp && (tp == NULL)) { + /* + * If tcp_close/drop() gets called and tp + * returns NULL, then the function dropped + * the inp lock, we hold a reference keeping + * this around, so we must re-aquire the + * INP_WLOCK() in order to proceed with + * our dropping the inp reference. + */ + INP_WLOCK(inp); + } + if (inp && in_pcbrele_wlocked(inp) == 0) + INP_WUNLOCK(inp); +} + void tcp_timer_2msl(void *xtp) { @@ -317,7 +345,6 @@ ostate = tp->t_state; #endif - INP_INFO_RLOCK(&V_tcbinfo); inp = tp->t_inpcb; KASSERT(inp != NULL, ("%s: tp %p tp->t_inpcb == NULL", __func__, tp)); INP_WLOCK(inp); @@ -325,21 +352,17 @@ if (callout_pending(&tp->t_timers->tt_2msl) || !callout_active(&tp->t_timers->tt_2msl)) { INP_WUNLOCK(tp->t_inpcb); - INP_INFO_RUNLOCK(&V_tcbinfo); CURVNET_RESTORE(); return; } callout_deactivate(&tp->t_timers->tt_2msl); if ((inp->inp_flags & INP_DROPPED) != 0) { INP_WUNLOCK(inp); - INP_INFO_RUNLOCK(&V_tcbinfo); CURVNET_RESTORE(); return; } KASSERT((tp->t_timers->tt_flags & TT_STOPPED) == 0, ("%s: tp %p tcpcb can't be stopped here", __func__, tp)); - KASSERT((tp->t_timers->tt_flags & TT_2MSL) != 0, - ("%s: tp %p 2msl callout should be running", __func__, tp)); /* * 2 MSL timeout in shutdown went off. If we're closed but * still waiting for peer to close and connection has been idle @@ -355,7 +378,6 @@ */ if ((inp->inp_flags & INP_TIMEWAIT) != 0) { INP_WUNLOCK(inp); - INP_INFO_RUNLOCK(&V_tcbinfo); CURVNET_RESTORE(); return; } @@ -363,15 +385,26 @@ tp->t_inpcb && tp->t_inpcb->inp_socket && (tp->t_inpcb->inp_socket->so_rcv.sb_state & SBS_CANTRCVMORE)) { TCPSTAT_INC(tcps_finwait2_drops); + if (tcp_inpinfo_lock_add(inp)) { + tcp_inpinfo_lock_del(inp, tp); + goto out; + } tp = tcp_close(tp); + tcp_inpinfo_lock_del(inp, tp); + goto out; } else { if (ticks - tp->t_rcvtime <= TP_MAXIDLE(tp)) { - if (!callout_reset(&tp->t_timers->tt_2msl, - TP_KEEPINTVL(tp), tcp_timer_2msl, tp)) { - tp->t_timers->tt_flags &= ~TT_2MSL_RST; + callout_reset(&tp->t_timers->tt_2msl, + TP_KEEPINTVL(tp), tcp_timer_2msl, tp); + } else { + if (tcp_inpinfo_lock_add(inp)) { + tcp_inpinfo_lock_del(inp, tp); + goto out; } - } else - tp = tcp_close(tp); + tp = tcp_close(tp); + tcp_inpinfo_lock_del(inp, tp); + goto out; + } } #ifdef TCPDEBUG @@ -383,7 +416,7 @@ if (tp != NULL) INP_WUNLOCK(inp); - INP_INFO_RUNLOCK(&V_tcbinfo); +out: CURVNET_RESTORE(); } @@ -399,28 +432,23 @@ ostate = tp->t_state; #endif - INP_INFO_RLOCK(&V_tcbinfo); inp = tp->t_inpcb; KASSERT(inp != NULL, ("%s: tp %p tp->t_inpcb == NULL", __func__, tp)); INP_WLOCK(inp); if (callout_pending(&tp->t_timers->tt_keep) || !callout_active(&tp->t_timers->tt_keep)) { INP_WUNLOCK(inp); - INP_INFO_RUNLOCK(&V_tcbinfo); CURVNET_RESTORE(); return; } callout_deactivate(&tp->t_timers->tt_keep); if ((inp->inp_flags & INP_DROPPED) != 0) { INP_WUNLOCK(inp); - INP_INFO_RUNLOCK(&V_tcbinfo); CURVNET_RESTORE(); return; } KASSERT((tp->t_timers->tt_flags & TT_STOPPED) == 0, ("%s: tp %p tcpcb can't be stopped here", __func__, tp)); - KASSERT((tp->t_timers->tt_flags & TT_KEEP) != 0, - ("%s: tp %p keep callout should be running", __func__, tp)); /* * Keep-alive timer went off; send something * or drop connection if idle for too long. @@ -452,14 +480,11 @@ tp->rcv_nxt, tp->snd_una - 1, 0); free(t_template, M_TEMP); } - if (!callout_reset(&tp->t_timers->tt_keep, TP_KEEPINTVL(tp), - tcp_timer_keep, tp)) { - tp->t_timers->tt_flags &= ~TT_KEEP_RST; - } - } else if (!callout_reset(&tp->t_timers->tt_keep, TP_KEEPIDLE(tp), - tcp_timer_keep, tp)) { - tp->t_timers->tt_flags &= ~TT_KEEP_RST; - } + callout_reset(&tp->t_timers->tt_keep, TP_KEEPINTVL(tp), + tcp_timer_keep, tp); + } else + callout_reset(&tp->t_timers->tt_keep, TP_KEEPIDLE(tp), + tcp_timer_keep, tp); #ifdef TCPDEBUG if (inp->inp_socket->so_options & SO_DEBUG) @@ -468,12 +493,16 @@ #endif TCP_PROBE2(debug__user, tp, PRU_SLOWTIMO); INP_WUNLOCK(inp); - INP_INFO_RUNLOCK(&V_tcbinfo); CURVNET_RESTORE(); return; dropit: TCPSTAT_INC(tcps_keepdrops); + + if (tcp_inpinfo_lock_add(inp)) { + tcp_inpinfo_lock_del(inp, tp); + goto out; + } tp = tcp_drop(tp, ETIMEDOUT); #ifdef TCPDEBUG @@ -482,9 +511,8 @@ PRU_SLOWTIMO); #endif TCP_PROBE2(debug__user, tp, PRU_SLOWTIMO); - if (tp != NULL) - INP_WUNLOCK(tp->t_inpcb); - INP_INFO_RUNLOCK(&V_tcbinfo); + tcp_inpinfo_lock_del(inp, tp); +out: CURVNET_RESTORE(); } @@ -499,28 +527,23 @@ ostate = tp->t_state; #endif - INP_INFO_RLOCK(&V_tcbinfo); inp = tp->t_inpcb; KASSERT(inp != NULL, ("%s: tp %p tp->t_inpcb == NULL", __func__, tp)); INP_WLOCK(inp); if (callout_pending(&tp->t_timers->tt_persist) || !callout_active(&tp->t_timers->tt_persist)) { INP_WUNLOCK(inp); - INP_INFO_RUNLOCK(&V_tcbinfo); CURVNET_RESTORE(); return; } callout_deactivate(&tp->t_timers->tt_persist); if ((inp->inp_flags & INP_DROPPED) != 0) { INP_WUNLOCK(inp); - INP_INFO_RUNLOCK(&V_tcbinfo); CURVNET_RESTORE(); return; } KASSERT((tp->t_timers->tt_flags & TT_STOPPED) == 0, ("%s: tp %p tcpcb can't be stopped here", __func__, tp)); - KASSERT((tp->t_timers->tt_flags & TT_PERSIST) != 0, - ("%s: tp %p persist callout should be running", __func__, tp)); /* * Persistence timer into zero window. * Force a byte to be output, if possible. @@ -537,7 +560,12 @@ (ticks - tp->t_rcvtime >= tcp_maxpersistidle || ticks - tp->t_rcvtime >= TCP_REXMTVAL(tp) * tcp_totbackoff)) { TCPSTAT_INC(tcps_persistdrop); + if (tcp_inpinfo_lock_add(inp)) { + tcp_inpinfo_lock_del(inp, tp); + goto out; + } tp = tcp_drop(tp, ETIMEDOUT); + tcp_inpinfo_lock_del(inp, tp); goto out; } /* @@ -547,7 +575,12 @@ if (tp->t_state > TCPS_CLOSE_WAIT && (ticks - tp->t_rcvtime) >= TCPTV_PERSMAX) { TCPSTAT_INC(tcps_persistdrop); + if (tcp_inpinfo_lock_add(inp)) { + tcp_inpinfo_lock_del(inp, tp); + goto out; + } tp = tcp_drop(tp, ETIMEDOUT); + tcp_inpinfo_lock_del(inp, tp); goto out; } tcp_setpersist(tp); @@ -555,15 +588,13 @@ (void) tp->t_fb->tfb_tcp_output(tp); tp->t_flags &= ~TF_FORCEDATA; -out: #ifdef TCPDEBUG if (tp != NULL && tp->t_inpcb->inp_socket->so_options & SO_DEBUG) tcp_trace(TA_USER, ostate, tp, NULL, NULL, PRU_SLOWTIMO); #endif TCP_PROBE2(debug__user, tp, PRU_SLOWTIMO); - if (tp != NULL) - INP_WUNLOCK(inp); - INP_INFO_RUNLOCK(&V_tcbinfo); + INP_WUNLOCK(inp); +out: CURVNET_RESTORE(); } @@ -573,36 +604,29 @@ struct tcpcb *tp = xtp; CURVNET_SET(tp->t_vnet); int rexmt; - int headlocked; struct inpcb *inp; #ifdef TCPDEBUG int ostate; ostate = tp->t_state; #endif - - INP_INFO_RLOCK(&V_tcbinfo); inp = tp->t_inpcb; KASSERT(inp != NULL, ("%s: tp %p tp->t_inpcb == NULL", __func__, tp)); INP_WLOCK(inp); if (callout_pending(&tp->t_timers->tt_rexmt) || !callout_active(&tp->t_timers->tt_rexmt)) { INP_WUNLOCK(inp); - INP_INFO_RUNLOCK(&V_tcbinfo); CURVNET_RESTORE(); return; } callout_deactivate(&tp->t_timers->tt_rexmt); if ((inp->inp_flags & INP_DROPPED) != 0) { INP_WUNLOCK(inp); - INP_INFO_RUNLOCK(&V_tcbinfo); CURVNET_RESTORE(); return; } KASSERT((tp->t_timers->tt_flags & TT_STOPPED) == 0, ("%s: tp %p tcpcb can't be stopped here", __func__, tp)); - KASSERT((tp->t_timers->tt_flags & TT_REXMT) != 0, - ("%s: tp %p rexmt callout should be running", __func__, tp)); tcp_free_sackholes(tp); if (tp->t_fb->tfb_tcp_rexmit_tmr) { /* The stack has a timer action too. */ @@ -616,14 +640,15 @@ if (++tp->t_rxtshift > TCP_MAXRXTSHIFT) { tp->t_rxtshift = TCP_MAXRXTSHIFT; TCPSTAT_INC(tcps_timeoutdrop); - + if (tcp_inpinfo_lock_add(inp)) { + tcp_inpinfo_lock_del(inp, tp); + goto out; + } tp = tcp_drop(tp, tp->t_softerror ? tp->t_softerror : ETIMEDOUT); - headlocked = 1; + tcp_inpinfo_lock_del(inp, tp); goto out; } - INP_INFO_RUNLOCK(&V_tcbinfo); - headlocked = 0; if (tp->t_state == TCPS_SYN_SENT) { /* * If the SYN was retransmitted, indicate CWND to be @@ -811,17 +836,14 @@ (void) tp->t_fb->tfb_tcp_output(tp); -out: #ifdef TCPDEBUG if (tp != NULL && (tp->t_inpcb->inp_socket->so_options & SO_DEBUG)) tcp_trace(TA_USER, ostate, tp, (void *)0, (struct tcphdr *)0, PRU_SLOWTIMO); #endif TCP_PROBE2(debug__user, tp, PRU_SLOWTIMO); - if (tp != NULL) - INP_WUNLOCK(inp); - if (headlocked) - INP_INFO_RUNLOCK(&V_tcbinfo); + INP_WUNLOCK(inp); +out: CURVNET_RESTORE(); } @@ -832,7 +854,6 @@ timeout_t *f_callout; struct inpcb *inp = tp->t_inpcb; int cpu = inp_to_cpuid(inp); - uint32_t f_reset; #ifdef TCP_OFFLOAD if (tp->t_flags & TF_TOE) @@ -846,27 +867,22 @@ case TT_DELACK: t_callout = &tp->t_timers->tt_delack; f_callout = tcp_timer_delack; - f_reset = TT_DELACK_RST; break; case TT_REXMT: t_callout = &tp->t_timers->tt_rexmt; f_callout = tcp_timer_rexmt; - f_reset = TT_REXMT_RST; break; case TT_PERSIST: t_callout = &tp->t_timers->tt_persist; f_callout = tcp_timer_persist; - f_reset = TT_PERSIST_RST; break; case TT_KEEP: t_callout = &tp->t_timers->tt_keep; f_callout = tcp_timer_keep; - f_reset = TT_KEEP_RST; break; case TT_2MSL: t_callout = &tp->t_timers->tt_2msl; f_callout = tcp_timer_2msl; - f_reset = TT_2MSL_RST; break; default: if (tp->t_fb->tfb_tcp_timer_activate) { @@ -876,24 +892,9 @@ panic("tp %p bad timer_type %#x", tp, timer_type); } if (delta == 0) { - if ((tp->t_timers->tt_flags & timer_type) && - (callout_stop(t_callout) > 0) && - (tp->t_timers->tt_flags & f_reset)) { - tp->t_timers->tt_flags &= ~(timer_type | f_reset); - } + callout_stop(t_callout); } else { - if ((tp->t_timers->tt_flags & timer_type) == 0) { - tp->t_timers->tt_flags |= (timer_type | f_reset); - callout_reset_on(t_callout, delta, f_callout, tp, cpu); - } else { - /* Reset already running callout on the same CPU. */ - if (!callout_reset(t_callout, delta, f_callout, tp)) { - /* - * Callout not cancelled, consider it as not - * properly restarted. */ - tp->t_timers->tt_flags &= ~f_reset; - } - } + callout_reset_on(t_callout, delta, f_callout, tp, cpu); } } @@ -931,30 +932,23 @@ tcp_timer_stop(struct tcpcb *tp, uint32_t timer_type) { struct callout *t_callout; - uint32_t f_reset; tp->t_timers->tt_flags |= TT_STOPPED; - switch (timer_type) { case TT_DELACK: t_callout = &tp->t_timers->tt_delack; - f_reset = TT_DELACK_RST; break; case TT_REXMT: t_callout = &tp->t_timers->tt_rexmt; - f_reset = TT_REXMT_RST; break; case TT_PERSIST: t_callout = &tp->t_timers->tt_persist; - f_reset = TT_PERSIST_RST; break; case TT_KEEP: t_callout = &tp->t_timers->tt_keep; - f_reset = TT_KEEP_RST; break; case TT_2MSL: t_callout = &tp->t_timers->tt_2msl; - f_reset = TT_2MSL_RST; break; default: if (tp->t_fb->tfb_tcp_timer_stop) { @@ -968,15 +962,13 @@ panic("tp %p bad timer_type %#x", tp, timer_type); } - if (tp->t_timers->tt_flags & timer_type) { - if (callout_async_drain(t_callout, tcp_timer_discard) == 0) { - /* - * Can't stop the callout, defer tcpcb actual deletion - * to the last one. We do this using the async drain - * function and incrementing the count in - */ - tp->t_timers->tt_draincnt++; - } + if (callout_async_drain(t_callout, tcp_timer_discard) == 0) { + /* + * Can't stop the callout, defer tcpcb actual deletion + * to the last one. We do this using the async drain + * function and incrementing the count in + */ + tp->t_timers->tt_draincnt++; } }