Index: sys/dev/cxgbe/tom/t4_cpl_io.c =================================================================== --- sys/dev/cxgbe/tom/t4_cpl_io.c +++ sys/dev/cxgbe/tom/t4_cpl_io.c @@ -2300,6 +2300,12 @@ if (moretocome) tp->t_flags |= TF_MORETOCOME; error = tcp_output(tp); + if (error < 0) { + INP_UNLOCK_ASSERT(inp); + SOCK_IO_SEND_UNLOCK(so); + error = -error; + goto out; + } if (moretocome) tp->t_flags &= ~TF_MORETOCOME; } Index: sys/netinet/tcp_hpts.c =================================================================== --- sys/netinet/tcp_hpts.c +++ sys/netinet/tcp_hpts.c @@ -1550,6 +1550,8 @@ } inp->inp_hpts_calls = 1; error = tcp_output(tp); + if (error < 0) + goto skip_pacing; inp->inp_hpts_calls = 0; if (ninp && ninp->inp_ppcb) { /* Index: sys/netinet/tcp_stacks/bbr.c =================================================================== --- sys/netinet/tcp_stacks/bbr.c +++ sys/netinet/tcp_stacks/bbr.c @@ -4580,8 +4580,7 @@ } if (ctf_progress_timeout_check(tp, true)) { bbr_log_progress_event(bbr, tp, tick, PROGRESS_DROP, __LINE__); - tcp_set_inp_to_drop(bbr->rc_inp, ETIMEDOUT); - return (1); + return (-ETIMEDOUT); /* tcp_drop() */ } /* Did we somehow get into persists? */ if (bbr->rc_in_persist) { @@ -4773,8 +4772,7 @@ */ if (ctf_progress_timeout_check(tp, true)) { bbr_log_progress_event(bbr, tp, tick, PROGRESS_DROP, __LINE__); - tcp_set_inp_to_drop(bbr->rc_inp, ETIMEDOUT); - goto out; + return (-ETIMEDOUT); /* tcp_drop() */ } /* * Hack: if the peer is dead/unreachable, we do not time out if the @@ -4787,8 +4785,7 @@ ticks - tp->t_rcvtime >= TCP_REXMTVAL(tp) * tcp_totbackoff)) { KMOD_TCPSTAT_INC(tcps_persistdrop); tcp_log_end_status(tp, TCP_EI_STATUS_PERSIST_MAX); - tcp_set_inp_to_drop(bbr->rc_inp, ETIMEDOUT); - goto out; + return (-ETIMEDOUT); /* tcp_drop() */ } if ((sbavail(&bbr->rc_inp->inp_socket->so_snd) == 0) && tp->snd_una == tp->snd_max) { @@ -4804,8 +4801,7 @@ (ticks - tp->t_rcvtime) >= TCPTV_PERSMAX) { KMOD_TCPSTAT_INC(tcps_persistdrop); tcp_log_end_status(tp, TCP_EI_STATUS_PERSIST_MAX); - tcp_set_inp_to_drop(bbr->rc_inp, ETIMEDOUT); - goto out; + return (-ETIMEDOUT); /* tcp_drop() */ } t_template = tcpip_maketemplate(bbr->rc_inp); if (t_template) { @@ -4877,8 +4873,7 @@ dropit: KMOD_TCPSTAT_INC(tcps_keepdrops); tcp_log_end_status(tp, TCP_EI_STATUS_KEEP_MAX); - tcp_set_inp_to_drop(bbr->rc_inp, ETIMEDOUT); - return (1); + return (-ETIMEDOUT); /* tcp_drop() */ } /* @@ -4998,10 +4993,8 @@ * and retransmit one segment. */ if (ctf_progress_timeout_check(tp, true)) { - retval = 1; bbr_log_progress_event(bbr, tp, tick, PROGRESS_DROP, __LINE__); - tcp_set_inp_to_drop(bbr->rc_inp, ETIMEDOUT); - goto out; + return (-ETIMEDOUT); /* tcp_drop() */ } bbr_remxt_tmr(tp); if ((bbr->r_ctl.rc_resend == NULL) || @@ -5017,11 +5010,11 @@ if (tp->t_rxtshift > TCP_MAXRXTSHIFT) { tp->t_rxtshift = TCP_MAXRXTSHIFT; KMOD_TCPSTAT_INC(tcps_timeoutdrop); - retval = 1; tcp_log_end_status(tp, TCP_EI_STATUS_RETRAN); - tcp_set_inp_to_drop(bbr->rc_inp, - (tp->t_softerror ? (uint16_t) tp->t_softerror : ETIMEDOUT)); - goto out; + /* XXXGL: previously t_softerror was casted to uint16_t */ + MPASS(tp->t_softerror >= 0); + retval = tp->t_softerror ? -tp->t_softerror : -ETIMEDOUT; + return (retval); /* tcp_drop() */ } if (tp->t_state == TCPS_SYN_SENT) { /* @@ -5194,7 +5187,7 @@ tp->snd_recover = tp->snd_max; tp->t_flags |= TF_ACKNOW; tp->t_rtttime = 0; -out: + return (retval); } @@ -11637,7 +11630,8 @@ if (bbr->r_wanted_output != 0) { bbr->rc_output_starts_timer = 0; did_out = 1; - (void)tcp_output(tp); + if (tcp_output(tp) < 0) + return (1); } else bbr_start_hpts_timer(bbr, tp, cts, 6, 0, 0); } @@ -11676,7 +11670,8 @@ /* We are late */ bbr->r_ctl.rc_last_delay_val = 0; BBR_STAT_INC(bbr_force_output); - (void)tcp_output(tp); + if (tcp_output(tp) < 0) + return (1); } } } @@ -12163,9 +12158,16 @@ hpts_calling = inp->inp_hpts_calls; inp->inp_hpts_calls = 0; if (bbr->r_ctl.rc_hpts_flags & PACE_TMR_MASK) { - if (bbr_process_timers(tp, bbr, cts, hpts_calling)) { + int retval; + + retval = bbr_process_timers(tp, bbr, cts, hpts_calling); + if (retval != 0) { counter_u64_add(bbr_out_size[TCP_MSS_ACCT_ATIMER], 1); - return (0); + /* + * If timers want tcp_drop(), then pass error out, + * otherwise suppress it. + */ + return (retval < 0 ? retval : 0); } } bbr->rc_inp->inp_flags2 &= ~INP_MBUF_QUEUE_READY; @@ -13234,10 +13236,9 @@ * is the only thing to do. */ BBR_STAT_INC(bbr_offset_drop); - tcp_set_inp_to_drop(inp, EFAULT); SOCKBUF_UNLOCK(sb); (void)m_free(m); - return (0); + return (-EFAULT); /* tcp_drop() */ } len = rsm->r_end - rsm->r_start; } @@ -13891,7 +13892,7 @@ bbr->oerror_cnt++; if (bbr_max_net_error_cnt && (bbr->oerror_cnt >= bbr_max_net_error_cnt)) { /* drop the session */ - tcp_set_inp_to_drop(inp, ENETDOWN); + return (-ENETDOWN); } switch (error) { case ENOBUFS: @@ -14238,6 +14239,7 @@ .tfb_tcp_handoff_ok = bbr_handoff_ok, .tfb_tcp_mtu_chg = bbr_mtu_chg, .tfb_pru_options = bbr_pru_options, + .tfb_flags = TCP_FUNC_OUTPUT_CANDROP, }; /* Index: sys/netinet/tcp_stacks/rack.c =================================================================== --- sys/netinet/tcp_stacks/rack.c +++ sys/netinet/tcp_stacks/rack.c @@ -6319,8 +6319,7 @@ } if (ctf_progress_timeout_check(tp, true)) { rack_log_progress_event(rack, tp, tick, PROGRESS_DROP, __LINE__); - tcp_set_inp_to_drop(tp->t_inpcb, ETIMEDOUT); - return (1); + return (-ETIMEDOUT); /* tcp_drop() */ } /* * A TLP timer has expired. We have been idle for 2 rtts. So we now @@ -6538,9 +6537,8 @@ if (ctf_progress_timeout_check(tp, false)) { tcp_log_end_status(tp, TCP_EI_STATUS_PERSIST_MAX); rack_log_progress_event(rack, tp, tick, PROGRESS_DROP, __LINE__); - tcp_set_inp_to_drop(inp, ETIMEDOUT); counter_u64_add(rack_persists_lost_ends, rack->r_ctl.persist_lost_ends); - return (1); + return (-ETIMEDOUT); /* tcp_drop() */ } KASSERT(inp != NULL, ("%s: tp %p tp->t_inpcb == NULL", __func__, tp)); /* @@ -6558,10 +6556,9 @@ (ticks - tp->t_rcvtime >= tcp_maxpersistidle || TICKS_2_USEC(ticks - tp->t_rcvtime) >= RACK_REXMTVAL(tp) * tcp_totbackoff)) { KMOD_TCPSTAT_INC(tcps_persistdrop); - retval = 1; tcp_log_end_status(tp, TCP_EI_STATUS_PERSIST_MAX); - tcp_set_inp_to_drop(rack->rc_inp, ETIMEDOUT); counter_u64_add(rack_persists_lost_ends, rack->r_ctl.persist_lost_ends); + retval = -ETIMEDOUT; /* tcp_drop() */ goto out; } if ((sbavail(&rack->rc_inp->inp_socket->so_snd) == 0) && @@ -6574,11 +6571,10 @@ */ if (tp->t_state > TCPS_CLOSE_WAIT && (ticks - tp->t_rcvtime) >= TCPTV_PERSMAX) { - retval = 1; KMOD_TCPSTAT_INC(tcps_persistdrop); tcp_log_end_status(tp, TCP_EI_STATUS_PERSIST_MAX); - tcp_set_inp_to_drop(rack->rc_inp, ETIMEDOUT); counter_u64_add(rack_persists_lost_ends, rack->r_ctl.persist_lost_ends); + retval = -ETIMEDOUT; /* tcp_drop() */ goto out; } t_template = tcpip_maketemplate(rack->rc_inp); @@ -6669,8 +6665,7 @@ dropit: KMOD_TCPSTAT_INC(tcps_keepdrops); tcp_log_end_status(tp, TCP_EI_STATUS_KEEP_MAX); - tcp_set_inp_to_drop(rack->rc_inp, ETIMEDOUT); - return (1); + return (-ETIMEDOUT); /* tcp_drop() */ } /* @@ -6874,8 +6869,7 @@ if (ctf_progress_timeout_check(tp, false)) { tcp_log_end_status(tp, TCP_EI_STATUS_RETRAN); rack_log_progress_event(rack, tp, tick, PROGRESS_DROP, __LINE__); - tcp_set_inp_to_drop(inp, ETIMEDOUT); - return (1); + return (-ETIMEDOUT); /* tcp_drop() */ } rack->r_ctl.rc_hpts_flags &= ~PACE_TMR_RXT; rack->r_ctl.retran_during_recovery = 0; @@ -6944,10 +6938,10 @@ drop_it: tp->t_rxtshift = TCP_MAXRXTSHIFT; KMOD_TCPSTAT_INC(tcps_timeoutdrop); - retval = 1; - tcp_set_inp_to_drop(rack->rc_inp, - (tp->t_softerror ? (uint16_t) tp->t_softerror : ETIMEDOUT)); - goto out; + /* XXXGL: previously t_softerror was casted to uint16_t */ + MPASS(tp->t_softerror >= 0); + retval = tp->t_softerror ? -tp->t_softerror : -ETIMEDOUT; + goto out; /* tcp_drop() */ } if (tp->t_state == TCPS_SYN_SENT) { /* @@ -14164,7 +14158,12 @@ ctf_calc_rwin(so, tp); if ((rack->r_wanted_output != 0) || (rack->r_fast_output != 0)) { send_out_a_rst: - (void)tcp_output(tp); + if (tcp_output(tp) < 0) { +#ifdef TCP_ACCOUNTING + sched_unpin(); +#endif + return (1); + } did_out = 1; } rack_free_trim(rack); @@ -14649,8 +14648,9 @@ if (nxt_pkt == 0) { if ((rack->r_wanted_output != 0) || (rack->r_fast_output != 0)) { do_output_now: + if (tcp_output(tp) < 0) + return (1); did_out = 1; - (void)tcp_output(tp); } rack_start_hpts_timer(rack, tp, cts, 0, 0, 0); rack_free_trim(rack); @@ -16877,12 +16877,20 @@ } /* Do the timers, which may override the pacer */ if (rack->r_ctl.rc_hpts_flags & PACE_TMR_MASK) { - if (rack_process_timers(tp, rack, cts, hpts_calling, &doing_tlp)) { + int retval; + + retval = rack_process_timers(tp, rack, cts, hpts_calling, + &doing_tlp); + if (retval != 0) { counter_u64_add(rack_out_size[TCP_MSS_ACCT_ATIMER], 1); #ifdef TCP_ACCOUNTING sched_unpin(); #endif - return (0); + /* + * If timers want tcp_drop(), then pass error out, + * otherwise suppress it. + */ + return (retval < 0 ? retval : 0); } } if (rack->rc_in_persist) { @@ -20396,6 +20404,7 @@ .tfb_tcp_mtu_chg = rack_mtu_change, .tfb_pru_options = rack_pru_options, .tfb_hwtls_change = rack_hw_tls_change, + .tfb_flags = TCP_FUNC_OUTPUT_CANDROP, }; /* Index: sys/netinet/tcp_subr.c =================================================================== --- sys/netinet/tcp_subr.c +++ sys/netinet/tcp_subr.c @@ -375,7 +375,7 @@ static int tcp_default_handoff_ok(struct tcpcb *tp); static struct inpcb *tcp_notify(struct inpcb *, int); static struct inpcb *tcp_mtudisc_notify(struct inpcb *, int); -static void tcp_mtudisc(struct inpcb *, int); +static struct inpcb *tcp_mtudisc(struct inpcb *, int); static char * tcp_log_addr(struct in_conninfo *inc, struct tcphdr *th, void *ip4hdr, const void *ip6hdr); @@ -2391,7 +2391,8 @@ if (TCPS_HAVERCVDSYN(tp->t_state)) { tcp_state_change(tp, TCPS_CLOSED); - (void) tcp_output(tp); + /* Don't use tcp_output() here due to possible recursion. */ + (void)tcp_output_nodrop(tp); TCPSTAT_INC(tcps_drops); } else TCPSTAT_INC(tcps_conndrops); @@ -3019,7 +3020,7 @@ inc.inc_fibnum = inp->inp_inc.inc_fibnum; tcp_hc_updatemtu(&inc, mtu); - tcp_mtudisc(inp, mtu); + inp = tcp_mtudisc(inp, mtu); } } else inp = (*notify)(inp, @@ -3467,11 +3468,10 @@ tcp_mtudisc_notify(struct inpcb *inp, int error) { - tcp_mtudisc(inp, -1); - return (inp); + return (tcp_mtudisc(inp, -1)); } -static void +static struct inpcb * tcp_mtudisc(struct inpcb *inp, int mtuoffer) { struct tcpcb *tp; @@ -3480,7 +3480,7 @@ INP_WLOCK_ASSERT(inp); if ((inp->inp_flags & INP_TIMEWAIT) || (inp->inp_flags & INP_DROPPED)) - return; + return (inp); tp = intotcpcb(inp); KASSERT(tp != NULL, ("tcp_mtudisc: tp == NULL")); @@ -3510,7 +3510,10 @@ */ tp->t_fb->tfb_tcp_mtu_chg(tp); } - tcp_output(tp); + if (tcp_output(tp) < 0) + return (NULL); + else + return (inp); } #ifdef INET Index: sys/netinet/tcp_timer.c =================================================================== --- sys/netinet/tcp_timer.c +++ sys/netinet/tcp_timer.c @@ -292,8 +292,7 @@ tp->t_flags |= TF_ACKNOW; TCPSTAT_INC(tcps_delack); NET_EPOCH_ENTER(et); - (void) tcp_output(tp); - INP_WUNLOCK(inp); + (void) tcp_output_unlock(tp); NET_EPOCH_EXIT(et); CURVNET_RESTORE(); } @@ -502,6 +501,7 @@ struct tcpcb *tp = xtp; struct inpcb *inp; struct epoch_tracker et; + int outrv; CURVNET_SET(tp->t_vnet); #ifdef TCPDEBUG int ostate; @@ -563,8 +563,7 @@ tcp_setpersist(tp); tp->t_flags |= TF_FORCEDATA; NET_EPOCH_ENTER(et); - (void) tcp_output(tp); - NET_EPOCH_EXIT(et); + outrv = tcp_output_nodrop(tp); tp->t_flags &= ~TF_FORCEDATA; #ifdef TCPDEBUG @@ -572,7 +571,8 @@ tcp_trace(TA_USER, ostate, tp, NULL, NULL, PRU_SLOWTIMO); #endif TCP_PROBE2(debug__user, tp, PRU_SLOWTIMO); - INP_WUNLOCK(inp); + (void) tcp_unlock_or_drop(tp, outrv); + NET_EPOCH_EXIT(et); out: CURVNET_RESTORE(); } @@ -582,7 +582,7 @@ { struct tcpcb *tp = xtp; CURVNET_SET(tp->t_vnet); - int rexmt; + int rexmt, outrv; struct inpcb *inp; struct epoch_tracker et; bool isipv6; @@ -843,15 +843,15 @@ cc_cong_signal(tp, NULL, CC_RTO); NET_EPOCH_ENTER(et); - (void) tcp_output(tp); - NET_EPOCH_EXIT(et); + outrv = tcp_output_nodrop(tp); #ifdef TCPDEBUG if (tp != NULL && (tp->t_inpcb->inp_socket->so_options & SO_DEBUG)) tcp_trace(TA_USER, ostate, tp, (void *)0, (struct tcphdr *)0, PRU_SLOWTIMO); #endif TCP_PROBE2(debug__user, tp, PRU_SLOWTIMO); - INP_WUNLOCK(inp); + (void) tcp_unlock_or_drop(tp, outrv); + NET_EPOCH_EXIT(et); out: CURVNET_RESTORE(); } Index: sys/netinet/tcp_usrreq.c =================================================================== --- sys/netinet/tcp_usrreq.c +++ sys/netinet/tcp_usrreq.c @@ -594,6 +594,8 @@ #endif tcp_timer_activate(tp, TT_KEEP, TP_KEEPINIT(tp)); error = tcp_output(tp); + KASSERT(error >= 0, ("TCP stack %s requested tcp_drop(%p) at connect()", + tp->t_fb->tfb_tcp_block_name, tp)); out_in_epoch: NET_EPOCH_EXIT(et); out: @@ -720,6 +722,8 @@ #endif NET_EPOCH_EXIT(et); out: + KASSERT(error >= 0, ("TCP stack %s requested tcp_drop(%p) at connect()", + tp->t_fb->tfb_tcp_block_name, tp)); /* * If the implicit bind in the connect call fails, restore * the flags we modified. @@ -896,21 +900,20 @@ inp = sotoinpcb(so); KASSERT(inp != NULL, ("inp == NULL")); INP_WLOCK(inp); + tp = intotcpcb(inp); if (inp->inp_flags & (INP_TIMEWAIT | INP_DROPPED)) { error = ECONNRESET; goto out; } - tp = intotcpcb(inp); TCPDEBUG1(); socantsendmore(so); tcp_usrclosed(tp); if (!(inp->inp_flags & INP_DROPPED)) - error = tcp_output(tp); - + error = tcp_output_nodrop(tp); out: TCPDEBUG2(PRU_SHUTDOWN); TCP_PROBE2(debug__user, tp, PRU_SHUTDOWN); - INP_WUNLOCK(inp); + error = tcp_unlock_or_drop(tp, error); NET_EPOCH_EXIT(et); return (error); @@ -925,17 +928,18 @@ struct epoch_tracker et; struct inpcb *inp; struct tcpcb *tp = NULL; - int error = 0; + int outrv = 0, error = 0; TCPDEBUG0; inp = sotoinpcb(so); KASSERT(inp != NULL, ("tcp_usr_rcvd: inp == NULL")); INP_WLOCK(inp); + NET_EPOCH_ENTER(et); + tp = intotcpcb(inp); if (inp->inp_flags & (INP_TIMEWAIT | INP_DROPPED)) { error = ECONNRESET; goto out; } - tp = intotcpcb(inp); TCPDEBUG1(); /* * For passively-created TFO connections, don't attempt a window @@ -947,18 +951,17 @@ if (IS_FASTOPEN(tp->t_flags) && (tp->t_state == TCPS_SYN_RECEIVED)) goto out; - NET_EPOCH_ENTER(et); #ifdef TCP_OFFLOAD if (tp->t_flags & TF_TOE) tcp_offload_rcvd(tp); else #endif - tcp_output(tp); - NET_EPOCH_EXIT(et); + outrv = tcp_output_nodrop(tp); out: TCPDEBUG2(PRU_RCVD); TCP_PROBE2(debug__user, tp, PRU_RCVD); - INP_WUNLOCK(inp); + (void) tcp_unlock_or_drop(tp, outrv); + NET_EPOCH_EXIT(et); return (error); } @@ -999,6 +1002,7 @@ inp = sotoinpcb(so); KASSERT(inp != NULL, ("tcp_usr_send: inp == NULL")); INP_WLOCK(inp); + tp = intotcpcb(inp); vflagsav = inp->inp_vflag; incflagsav = inp->inp_inc.inc_flags; restoreflags = false; @@ -1018,7 +1022,6 @@ m_freem(control); /* empty control, just free it */ control = NULL; } - tp = intotcpcb(inp); if ((flags & PRUS_OOB) != 0 && (error = tcp_pru_options_support(tp, PRUS_OOB)) != 0) goto out; @@ -1188,7 +1191,7 @@ !(flags & PRUS_NOTREADY)) { if (flags & PRUS_MORETOCOME) tp->t_flags |= TF_MORETOCOME; - error = tcp_output(tp); + error = tcp_output_nodrop(tp); if (flags & PRUS_MORETOCOME) tp->t_flags &= ~TF_MORETOCOME; } @@ -1255,7 +1258,7 @@ tp->snd_up = tp->snd_una + sbavail(&so->so_snd); if ((flags & PRUS_NOTREADY) == 0) { tp->t_flags |= TF_FORCEDATA; - error = tcp_output(tp); + error = tcp_output_nodrop(tp); tp->t_flags &= ~TF_FORCEDATA; } } @@ -1285,7 +1288,7 @@ ((flags & PRUS_EOF) ? PRU_SEND_EOF : PRU_SEND)); TCP_PROBE2(debug__user, tp, (flags & PRUS_OOB) ? PRU_SENDOOB : ((flags & PRUS_EOF) ? PRU_SEND_EOF : PRU_SEND)); - INP_WUNLOCK(inp); + error = tcp_unlock_or_drop(tp, error); NET_EPOCH_EXIT(et); return (error); } @@ -1310,12 +1313,13 @@ SOCKBUF_LOCK(&so->so_snd); error = sbready(&so->so_snd, m, count); SOCKBUF_UNLOCK(&so->so_snd); - if (error == 0) { - NET_EPOCH_ENTER(et); - error = tcp_output(tp); - NET_EPOCH_EXIT(et); + if (error) { + INP_WUNLOCK(inp); + return (error); } - INP_WUNLOCK(inp); + NET_EPOCH_ENTER(et); + error = tcp_output_unlock(tp); + NET_EPOCH_EXIT(et); return (error); } @@ -2238,7 +2242,7 @@ struct epoch_tracker et; NET_EPOCH_ENTER(et); - error = tcp_output(tp); + error = tcp_output_nodrop(tp); NET_EPOCH_EXIT(et); } } @@ -2767,7 +2771,8 @@ sbflush(&so->so_rcv); tcp_usrclosed(tp); if (!(inp->inp_flags & INP_DROPPED)) - tcp_output(tp); + /* Ignore stack's drop request, we already at it. */ + (void)tcp_output_nodrop(tp); } } Index: sys/netinet/tcp_var.h =================================================================== --- sys/netinet/tcp_var.h +++ sys/netinet/tcp_var.h @@ -316,7 +316,8 @@ * function below. */ /* Flags for tcp functions */ -#define TCP_FUNC_BEING_REMOVED 0x01 /* Can no longer be referenced */ +#define TCP_FUNC_BEING_REMOVED 0x01 /* Can no longer be referenced */ +#define TCP_FUNC_OUTPUT_CANDROP 0x02 /* tfb_tcp_output may ask tcp_drop */ /* * If defining the optional tcp_timers, in the @@ -386,12 +387,107 @@ TAILQ_HEAD(tcp_funchead, tcp_function); +struct tcpcb * tcp_drop(struct tcpcb *, int); + +#ifdef _NETINET_IN_PCB_H_ +/* + * tcp_output() + * Handles tcp_drop request from advanced stacks and reports that inpcb is + * gone with negative return code. + * Drop in replacement for the default stack. + */ static inline int tcp_output(struct tcpcb *tp) { + int rv; + + INP_WLOCK_ASSERT(tp->t_inpcb); + + rv = tp->t_fb->tfb_tcp_output(tp); + if (rv < 0) { + KASSERT(tp->t_fb->tfb_flags & TCP_FUNC_OUTPUT_CANDROP, + ("TCP stack %s requested tcp_drop(%p)", + tp->t_fb->tfb_tcp_block_name, tp)); + tp = tcp_drop(tp, -rv); + if (tp) + INP_WUNLOCK(tp->t_inpcb); + } - return (tp->t_fb->tfb_tcp_output(tp)); + return (rv); } + +/* + * tcp_output_unlock() + * Always returns unlocked, handles drop request from advanced stacks. + * Always returns positive error code. + */ +static inline int +tcp_output_unlock(struct tcpcb *tp) +{ + int rv; + + INP_WLOCK_ASSERT(tp->t_inpcb); + + rv = tp->t_fb->tfb_tcp_output(tp); + if (rv < 0) { + KASSERT(tp->t_fb->tfb_flags & TCP_FUNC_OUTPUT_CANDROP, + ("TCP stack %s requested tcp_drop(%p)", + tp->t_fb->tfb_tcp_block_name, tp)); + rv = -rv; + tp = tcp_drop(tp, rv); + if (tp) + INP_WUNLOCK(tp->t_inpcb); + } else + INP_WUNLOCK(tp->t_inpcb); + + return (rv); +} + +/* + * tcp_output_nodrop() + * Always returns locked. It is caller's responsibility to run tcp_drop()! + * Useful in syscall implementations, when we want to perform some logging + * and/or tracing with tcpcb before calling tcp_drop(). To be used with + * tcp_unlock_or_drop() later. + * + * XXXGL: maybe don't allow stacks to return a drop request at certain + * TCP states? Why would it do in connect(2)? In recv(2)? + */ +static inline int +tcp_output_nodrop(struct tcpcb *tp) +{ + int rv; + + INP_WLOCK_ASSERT(tp->t_inpcb); + + rv = tp->t_fb->tfb_tcp_output(tp); + KASSERT(rv >= 0 || tp->t_fb->tfb_flags & TCP_FUNC_OUTPUT_CANDROP, + ("TCP stack %s requested tcp_drop(%p)", + tp->t_fb->tfb_tcp_block_name, tp)); + return (rv); +} + +/* + * tcp_unlock_or_drop() + * Handle return code from tfb_tcp_output() after we have logged/traced, + * to be used with tcp_output_nodrop(). + */ +static inline int +tcp_unlock_or_drop(struct tcpcb *tp, int tcp_output_retval) +{ + + INP_WLOCK_ASSERT(tp->t_inpcb); + + if (tcp_output_retval < 0) { + tcp_output_retval = -tcp_output_retval; + if (tcp_drop(tp, tcp_output_retval) != NULL) + INP_WUNLOCK(tp->t_inpcb); + } else + INP_WUNLOCK(tp->t_inpcb); + + return (tcp_output_retval); +} +#endif /* _NETINET_IN_PCB_H_ */ #endif /* _KERNEL */ /* @@ -978,8 +1074,6 @@ void tcp_ctlinput(int, struct sockaddr *, void *); int tcp_ctloutput(struct socket *, struct sockopt *); void tcp_ctlinput_viaudp(int, struct sockaddr *, void *, void *); -struct tcpcb * - tcp_drop(struct tcpcb *, int); void tcp_drain(void); void tcp_init(void); void tcp_fini(void *); Index: sys/netinet/toecore.c =================================================================== --- sys/netinet/toecore.c +++ sys/netinet/toecore.c @@ -532,7 +532,8 @@ KASSERT(!(tp->t_flags & TF_TOE), ("%s: tp %p still offloaded.", __func__, tp)); tcp_timer_activate(tp, TT_KEEP, TP_KEEPINIT(tp)); - (void) tcp_output(tp); + if (tcp_output(tp) < 0) + INP_WLOCK(inp); /* re-acquire */ } else { tp = tcp_drop(tp, err); if (tp == NULL)