Page Menu
Home
FreeBSD
Search
Configure Global Search
Log In
Files
F103971263
D37321.diff
No One
Temporary
Actions
View File
Edit File
Delete File
View Transforms
Subscribe
Mute Notifications
Flag For Later
Award Token
Size
29 KB
Referenced Files
None
Subscribers
None
D37321.diff
View Options
diff --git a/sys/netinet/tcp_stacks/bbr.c b/sys/netinet/tcp_stacks/bbr.c
--- a/sys/netinet/tcp_stacks/bbr.c
+++ b/sys/netinet/tcp_stacks/bbr.c
@@ -5285,37 +5285,13 @@
}
}
-static void
-bbr_timer_stop(struct tcpcb *tp, uint32_t timer_type)
+static int
+bbr_stopall(struct tcpcb *tp)
{
struct tcp_bbr *bbr;
bbr = (struct tcp_bbr *)tp->t_fb_ptr;
bbr->rc_all_timers_stopped = 1;
- return;
-}
-
-/*
- * stop all timers always returning 0.
- */
-static int
-bbr_stopall(struct tcpcb *tp)
-{
- return (0);
-}
-
-static void
-bbr_timer_activate(struct tcpcb *tp, uint32_t timer_type, uint32_t delta)
-{
- return;
-}
-
-/*
- * return true if a bbr timer (rack or tlp) is active.
- */
-static int
-bbr_timer_active(struct tcpcb *tp, uint32_t timer_type)
-{
return (0);
}
@@ -14168,9 +14144,6 @@
.tfb_tcp_fb_init = bbr_init,
.tfb_tcp_fb_fini = bbr_fini,
.tfb_tcp_timer_stop_all = bbr_stopall,
- .tfb_tcp_timer_activate = bbr_timer_activate,
- .tfb_tcp_timer_active = bbr_timer_active,
- .tfb_tcp_timer_stop = bbr_timer_stop,
.tfb_tcp_rexmit_tmr = bbr_remxt_tmr,
.tfb_tcp_handoff_ok = bbr_handoff_ok,
.tfb_tcp_mtu_chg = bbr_mtu_chg,
diff --git a/sys/netinet/tcp_stacks/rack.c b/sys/netinet/tcp_stacks/rack.c
--- a/sys/netinet/tcp_stacks/rack.c
+++ b/sys/netinet/tcp_stacks/rack.c
@@ -489,10 +489,6 @@
static int rack_set_sockopt(struct inpcb *inp, struct sockopt *sopt);
static void rack_set_state(struct tcpcb *tp, struct tcp_rack *rack);
static int32_t rack_stopall(struct tcpcb *tp);
-static void
-rack_timer_activate(struct tcpcb *tp, uint32_t timer_type,
- uint32_t delta);
-static int32_t rack_timer_active(struct tcpcb *tp, uint32_t timer_type);
static void rack_timer_cancel(struct tcpcb *tp, struct tcp_rack *rack, uint32_t cts, int line);
static void rack_timer_stop(struct tcpcb *tp, uint32_t timer_type);
static uint32_t
@@ -5910,9 +5906,6 @@
*/
struct rack_sendmap *rsm;
- if (tp->tt_flags & TT_STOPPED) {
- return (1);
- }
counter_u64_add(rack_to_tot, 1);
if (rack->r_state && (rack->r_state != tp->t_state))
rack_set_state(tp, rack);
@@ -6123,9 +6116,6 @@
uint32_t out, avail;
int collapsed_win = 0;
- if (tp->tt_flags & TT_STOPPED) {
- return (1);
- }
if (TSTMP_LT(cts, rack->r_ctl.rc_timer_exp)) {
/* Its not time yet */
return (0);
@@ -6312,9 +6302,7 @@
static int
rack_timeout_delack(struct tcpcb *tp, struct tcp_rack *rack, uint32_t cts)
{
- if (tp->tt_flags & TT_STOPPED) {
- return (1);
- }
+
rack_log_to_event(rack, RACK_TO_FRM_DELACK, NULL);
tp->t_flags &= ~TF_DELACK;
tp->t_flags |= TF_ACKNOW;
@@ -6337,9 +6325,6 @@
struct tcptemp *t_template;
int32_t retval = 1;
- if (tp->tt_flags & TT_STOPPED) {
- return (1);
- }
if (rack->rc_in_persist == 0)
return (0);
if (ctf_progress_timeout_check(tp, false)) {
@@ -6425,9 +6410,6 @@
struct tcptemp *t_template;
struct inpcb *inp = tptoinpcb(tp);
- if (tp->tt_flags & TT_STOPPED) {
- return (1);
- }
rack->r_ctl.rc_hpts_flags &= ~PACE_TMR_KEEP;
rack_log_to_event(rack, RACK_TO_FRM_KEEP, NULL);
/*
@@ -6654,9 +6636,6 @@
int32_t retval = 0;
bool isipv6;
- if (tp->tt_flags & TT_STOPPED) {
- return (1);
- }
if ((tp->t_flags & TF_GPUTINPROG) &&
(tp->t_rxtshift)) {
/*
@@ -7060,12 +7039,6 @@
rack_log_to_cancel(rack, hpts_removed, line, us_cts, &tv, flags_on_entry);
}
-static void
-rack_timer_stop(struct tcpcb *tp, uint32_t timer_type)
-{
- return;
-}
-
static int
rack_stopall(struct tcpcb *tp)
{
@@ -7075,18 +7048,6 @@
return (0);
}
-static void
-rack_timer_activate(struct tcpcb *tp, uint32_t timer_type, uint32_t delta)
-{
- return;
-}
-
-static int
-rack_timer_active(struct tcpcb *tp, uint32_t timer_type)
-{
- return (0);
-}
-
static void
rack_stop_all_timers(struct tcpcb *tp)
{
@@ -20307,9 +20268,6 @@
.tfb_tcp_fb_init = rack_init,
.tfb_tcp_fb_fini = rack_fini,
.tfb_tcp_timer_stop_all = rack_stopall,
- .tfb_tcp_timer_activate = rack_timer_activate,
- .tfb_tcp_timer_active = rack_timer_active,
- .tfb_tcp_timer_stop = rack_timer_stop,
.tfb_tcp_rexmit_tmr = rack_remxt_tmr,
.tfb_tcp_handoff_ok = rack_handoff_ok,
.tfb_tcp_mtu_chg = rack_mtu_change,
diff --git a/sys/netinet/tcp_subr.c b/sys/netinet/tcp_subr.c
--- a/sys/netinet/tcp_subr.c
+++ b/sys/netinet/tcp_subr.c
@@ -1194,22 +1194,6 @@
*num_names = 0;
return (EINVAL);
}
- if (blk->tfb_tcp_timer_stop_all ||
- blk->tfb_tcp_timer_activate ||
- blk->tfb_tcp_timer_active ||
- blk->tfb_tcp_timer_stop) {
- /*
- * If you define one timer function you
- * must have them all.
- */
- if ((blk->tfb_tcp_timer_stop_all == NULL) ||
- (blk->tfb_tcp_timer_activate == NULL) ||
- (blk->tfb_tcp_timer_active == NULL) ||
- (blk->tfb_tcp_timer_stop == NULL)) {
- *num_names = 0;
- return (EINVAL);
- }
- }
if (blk->tfb_flags & TCP_FUNC_BEING_REMOVED) {
*num_names = 0;
@@ -2227,12 +2211,9 @@
#endif /* INET6 */
V_tcp_mssdflt;
- /* Set up our timeouts. */
- callout_init(&tp->tt_rexmt, 1);
- callout_init(&tp->tt_persist, 1);
- callout_init(&tp->tt_keep, 1);
- callout_init(&tp->tt_2msl, 1);
- callout_init(&tp->tt_delack, 1);
+ callout_init_rw(&tp->t_callout, &inp->inp_lock, CALLOUT_RETURNUNLOCKED);
+ for (int i = 0; i < TT_N; i++)
+ tp->t_timers[i] = SBT_MAX;
switch (V_tcp_do_rfc1323) {
case 0:
@@ -2301,13 +2282,6 @@
if (V_tcp_do_lrd)
tp->t_flags |= TF_LRD;
- /*
- * XXXGL: this self-reference might be pointless. It will go away
- * when the TCP timers are properly locked and could never fire after
- * tcp_discardcb().
- */
- in_pcbref(inp);
-
return (tp);
}
@@ -2341,32 +2315,15 @@
tcp_discardcb(struct tcpcb *tp)
{
struct inpcb *inp = tptoinpcb(tp);
+ struct socket *so = tptosocket(tp);
+#ifdef INET6
+ bool isipv6 = (inp->inp_vflag & INP_IPV6) != 0;
+#endif
INP_WLOCK_ASSERT(inp);
- /*
- * Make sure that all of our timers are stopped before we delete the
- * PCB.
- *
- * If stopping a timer fails, we schedule a discard function in same
- * callout, and the last discard function called will take care of
- * deleting the tcpcb.
- */
- tp->tt_draincnt = 0;
- tcp_timer_stop(tp, TT_REXMT);
- tcp_timer_stop(tp, TT_PERSIST);
- tcp_timer_stop(tp, TT_KEEP);
- tcp_timer_stop(tp, TT_2MSL);
- tcp_timer_stop(tp, TT_DELACK);
+ tcp_timer_stop(tp);
if (tp->t_fb->tfb_tcp_timer_stop_all) {
- /*
- * Call the stop-all function of the methods,
- * this function should call the tcp_timer_stop()
- * method with each of the function specific timeouts.
- * That stop will be called via the tfb_tcp_timer_stop()
- * which should use the async drain function of the
- * callout system (see tcp_var.h).
- */
tp->t_fb->tfb_tcp_timer_stop_all(tp);
}
@@ -2402,23 +2359,7 @@
#endif
CC_ALGO(tp) = NULL;
- if (tp->tt_draincnt == 0)
- tcp_freecb(tp);
-}
-bool
-tcp_freecb(struct tcpcb *tp)
-{
- struct inpcb *inp = tptoinpcb(tp);
- struct socket *so = tptosocket(tp);
-#ifdef INET6
- bool isipv6 = (inp->inp_vflag & INP_IPV6) != 0;
-#endif
-
- INP_WLOCK_ASSERT(inp);
- MPASS(tp->tt_draincnt == 0);
-
- /* We own the last reference on tcpcb, let's free it. */
#ifdef TCP_BLACKBOX
tcp_log_tcpcbfini(tp);
#endif
@@ -2489,8 +2430,6 @@
}
refcount_release(&tp->t_fb->tfb_refcnt);
-
- return (in_pcbrele_wlocked(inp));
}
/*
@@ -3940,17 +3879,17 @@
(tp->t_flags2 & TF2_ACE_PERMIT) ? 2 : 0;
now = getsbinuptime();
-#define COPYTIMER(ttt) do { \
- if (callout_active(&tp->ttt)) \
- xt->ttt = (tp->ttt.c_time - now) / SBT_1MS; \
- else \
- xt->ttt = 0; \
+#define COPYTIMER(which,where) do { \
+ if (tp->t_timers[which] != SBT_MAX) \
+ xt->where = (tp->t_timers[which] - now) / SBT_1MS; \
+ else \
+ xt->where = 0; \
} while (0)
- COPYTIMER(tt_delack);
- COPYTIMER(tt_rexmt);
- COPYTIMER(tt_persist);
- COPYTIMER(tt_keep);
- COPYTIMER(tt_2msl);
+ COPYTIMER(TT_DELACK, tt_delack);
+ COPYTIMER(TT_REXMT, tt_rexmt);
+ COPYTIMER(TT_PERSIST, tt_persist);
+ COPYTIMER(TT_KEEP, tt_keep);
+ COPYTIMER(TT_2MSL, tt_2msl);
#undef COPYTIMER
xt->t_rcvtime = 1000 * (ticks - tp->t_rcvtime) / hz;
diff --git a/sys/netinet/tcp_timer.h b/sys/netinet/tcp_timer.h
--- a/sys/netinet/tcp_timer.h
+++ b/sys/netinet/tcp_timer.h
@@ -145,18 +145,6 @@
#ifdef _KERNEL
-/*
- * Flags for the tcpcb's tt_flags field.
- */
-#define TT_DELACK 0x0001
-#define TT_REXMT 0x0002
-#define TT_PERSIST 0x0004
-#define TT_KEEP 0x0008
-#define TT_2MSL 0x0010
-#define TT_MASK (TT_DELACK|TT_REXMT|TT_PERSIST|TT_KEEP|TT_2MSL)
-
-#define TT_STOPPED 0x00010000
-
#define TP_KEEPINIT(tp) ((tp)->t_keepinit ? (tp)->t_keepinit : tcp_keepinit)
#define TP_KEEPIDLE(tp) ((tp)->t_keepidle ? (tp)->t_keepidle : tcp_keepidle)
#define TP_KEEPINTVL(tp) ((tp)->t_keepintvl ? (tp)->t_keepintvl : tcp_keepintvl)
@@ -205,13 +193,6 @@
VNET_DECLARE(int, tcp_msl);
#define V_tcp_msl VNET(tcp_msl)
-void tcp_timer_init(void);
-void tcp_timer_2msl(void *xtp);
-void tcp_timer_keep(void *xtp);
-void tcp_timer_persist(void *xtp);
-void tcp_timer_rexmt(void *xtp);
-void tcp_timer_delack(void *xtp);
-
#endif /* _KERNEL */
#endif /* !_NETINET_TCP_TIMER_H_ */
diff --git a/sys/netinet/tcp_timer.c b/sys/netinet/tcp_timer.c
--- a/sys/netinet/tcp_timer.c
+++ b/sys/netinet/tcp_timer.c
@@ -243,104 +243,86 @@
/*
* TCP timer processing.
+ *
+ * Each connection has 5 timers associated with it, which can be scheduled
+ * simultaneously. They all are serviced by one callout tcp_timer_enter().
+ * This function executes the next timer via tcp_timersw[] vector. Each
+ * timer is supposed to return 'true' unless the connection was destroyed.
+ * In the former case tcp_timer_enter() will schedule callout for next timer.
*/
-void
-tcp_timer_delack(void *xtp)
-{
- struct epoch_tracker et;
- struct tcpcb *tp = xtp;
- struct inpcb *inp = tptoinpcb(tp);
-
- INP_WLOCK(inp);
- CURVNET_SET(inp->inp_vnet);
-
- if (callout_pending(&tp->tt_delack) ||
- !callout_active(&tp->tt_delack)) {
- INP_WUNLOCK(inp);
- CURVNET_RESTORE();
- return;
- }
- callout_deactivate(&tp->tt_delack);
- if ((inp->inp_flags & INP_DROPPED) != 0) {
- INP_WUNLOCK(inp);
- CURVNET_RESTORE();
- return;
- }
- tp->t_flags |= TF_ACKNOW;
- TCPSTAT_INC(tcps_delack);
- NET_EPOCH_ENTER(et);
- (void) tcp_output_unlock(tp);
- NET_EPOCH_EXIT(et);
- CURVNET_RESTORE();
-}
+typedef bool tcp_timer_t(struct tcpcb *);
+static tcp_timer_t tcp_timer_delack;
+static tcp_timer_t tcp_timer_2msl;
+static tcp_timer_t tcp_timer_keep;
+static tcp_timer_t tcp_timer_persist;
+static tcp_timer_t tcp_timer_rexmt;
+
+static tcp_timer_t * const tcp_timersw[TT_N] = {
+ [TT_DELACK] = tcp_timer_delack,
+ [TT_REXMT] = tcp_timer_rexmt,
+ [TT_PERSIST] = tcp_timer_persist,
+ [TT_KEEP] = tcp_timer_keep,
+ [TT_2MSL] = tcp_timer_2msl,
+};
/*
- * Call tcp_close() from a callout context.
+ * tcp_output_locked() s a timer specific variation of call to tcp_output(),
+ * see tcp_var.h for the rest. It handles drop request from advanced stacks,
+ * but keeps tcpcb locked unless tcp_drop() destroyed it.
+ * Returns true if tcpcb is valid and locked.
*/
-static void
-tcp_timer_close(struct tcpcb *tp)
+static inline bool
+tcp_output_locked(struct tcpcb *tp)
{
- struct epoch_tracker et;
- struct inpcb *inp = tptoinpcb(tp);
+ int rv;
- INP_WLOCK_ASSERT(inp);
+ INP_WLOCK_ASSERT(tptoinpcb(tp));
- NET_EPOCH_ENTER(et);
- tp = tcp_close(tp);
- NET_EPOCH_EXIT(et);
- if (tp != NULL)
- INP_WUNLOCK(inp);
+ if ((rv = tp->t_fb->tfb_tcp_output(tp)) < 0) {
+ KASSERT(tp->t_fb->tfb_flags & TCP_FUNC_OUTPUT_CANDROP,
+ ("TCP stack %s requested tcp_drop(%p)",
+ tp->t_fb->tfb_tcp_block_name, tp));
+ tp = tcp_drop(tp, rv);
+ }
+
+ return (tp != NULL);
}
-/*
- * Call tcp_drop() from a callout context.
- */
-static void
-tcp_timer_drop(struct tcpcb *tp)
+static bool
+tcp_timer_delack(struct tcpcb *tp)
{
struct epoch_tracker et;
+#if defined(INVARIANTS) || defined(VIMAGE)
struct inpcb *inp = tptoinpcb(tp);
+#endif
+ bool rv;
INP_WLOCK_ASSERT(inp);
+ CURVNET_SET(inp->inp_vnet);
+ tp->t_flags |= TF_ACKNOW;
+ TCPSTAT_INC(tcps_delack);
NET_EPOCH_ENTER(et);
- tp = tcp_drop(tp, ETIMEDOUT);
+ rv = tcp_output_locked(tp);
NET_EPOCH_EXIT(et);
- if (tp != NULL)
- INP_WUNLOCK(inp);
+ CURVNET_RESTORE();
+
+ return (rv);
}
-void
-tcp_timer_2msl(void *xtp)
+static bool
+tcp_timer_2msl(struct tcpcb *tp)
{
- struct tcpcb *tp = xtp;
struct inpcb *inp = tptoinpcb(tp);
-#ifdef TCPDEBUG
- int ostate;
+ bool close = false;
- ostate = tp->t_state;
-#endif
+ INP_WLOCK_ASSERT(inp);
- INP_WLOCK(inp);
+ TCP_PROBE2(debug__user, tp, PRU_SLOWTIMO);
CURVNET_SET(inp->inp_vnet);
-
tcp_log_end_status(tp, TCP_EI_STATUS_2MSL);
tcp_free_sackholes(tp);
- if (callout_pending(&tp->tt_2msl) ||
- !callout_active(&tp->tt_2msl)) {
- INP_WUNLOCK(inp);
- CURVNET_RESTORE();
- return;
- }
- callout_deactivate(&tp->tt_2msl);
- if (inp->inp_flags & INP_DROPPED) {
- INP_WUNLOCK(inp);
- CURVNET_RESTORE();
- return;
- }
- KASSERT((tp->tt_flags & TT_STOPPED) == 0,
- ("%s: tp %p tcpcb can't be stopped here", __func__, tp));
/*
* 2 MSL timeout in shutdown went off. If we're closed but
* still waiting for peer to close and connection has been idle
@@ -354,69 +336,41 @@
* XXXGL: check if inp_socket shall always be !NULL here?
*/
if (tp->t_state == TCPS_TIME_WAIT) {
- tcp_timer_close(tp);
- CURVNET_RESTORE();
- return;
+ close = true;
} else if (tp->t_state == TCPS_FIN_WAIT_2 &&
tcp_fast_finwait2_recycle && inp->inp_socket &&
(inp->inp_socket->so_rcv.sb_state & SBS_CANTRCVMORE)) {
TCPSTAT_INC(tcps_finwait2_drops);
- tcp_timer_close(tp);
- CURVNET_RESTORE();
- return;
+ close = true;
} else {
- if (ticks - tp->t_rcvtime <= TP_MAXIDLE(tp)) {
- callout_reset(&tp->tt_2msl,
- TP_KEEPINTVL(tp), tcp_timer_2msl, tp);
- } else {
- tcp_timer_close(tp);
- CURVNET_RESTORE();
- return;
- }
+ if (ticks - tp->t_rcvtime <= TP_MAXIDLE(tp))
+ tcp_timer_activate(tp, TT_2MSL, TP_KEEPINTVL(tp));
+ else
+ close = true;
}
+ if (close) {
+ struct epoch_tracker et;
-#ifdef TCPDEBUG
- if (tptosocket(tp)->so_options & SO_DEBUG)
- tcp_trace(TA_USER, ostate, tp, (void *)0, (struct tcphdr *)0,
- PRU_SLOWTIMO);
-#endif
- TCP_PROBE2(debug__user, tp, PRU_SLOWTIMO);
-
- INP_WUNLOCK(inp);
+ NET_EPOCH_ENTER(et);
+ tp = tcp_close(tp);
+ NET_EPOCH_EXIT(et);
+ }
CURVNET_RESTORE();
+
+ return (tp != NULL);
}
-void
-tcp_timer_keep(void *xtp)
+static bool
+tcp_timer_keep(struct tcpcb *tp)
{
struct epoch_tracker et;
- struct tcpcb *tp = xtp;
struct inpcb *inp = tptoinpcb(tp);
struct tcptemp *t_template;
-#ifdef TCPDEBUG
- int ostate;
- ostate = tp->t_state;
-#endif
+ INP_WLOCK_ASSERT(inp);
- INP_WLOCK(inp);
+ TCP_PROBE2(debug__user, tp, PRU_SLOWTIMO);
CURVNET_SET(inp->inp_vnet);
-
- if (callout_pending(&tp->tt_keep) ||
- !callout_active(&tp->tt_keep)) {
- INP_WUNLOCK(inp);
- CURVNET_RESTORE();
- return;
- }
- callout_deactivate(&tp->tt_keep);
- if (inp->inp_flags & INP_DROPPED) {
- INP_WUNLOCK(inp);
- CURVNET_RESTORE();
- return;
- }
- KASSERT((tp->tt_flags & TT_STOPPED) == 0,
- ("%s: tp %p tcpcb can't be stopped here", __func__, tp));
-
/*
* Because we don't regularly reset the keepalive callout in
* the ESTABLISHED state, it may be that we don't actually need
@@ -428,11 +382,10 @@
idletime = ticks - tp->t_rcvtime;
if (idletime < TP_KEEPIDLE(tp)) {
- callout_reset(&tp->tt_keep,
- TP_KEEPIDLE(tp) - idletime, tcp_timer_keep, tp);
- INP_WUNLOCK(inp);
+ tcp_timer_activate(tp, TT_KEEP,
+ TP_KEEPIDLE(tp) - idletime);
CURVNET_RESTORE();
- return;
+ return (true);
}
}
@@ -470,38 +423,22 @@
NET_EPOCH_EXIT(et);
free(t_template, M_TEMP);
}
- callout_reset(&tp->tt_keep, TP_KEEPINTVL(tp),
- tcp_timer_keep, tp);
+ tcp_timer_activate(tp, TT_KEEP, TP_KEEPINTVL(tp));
} else
- callout_reset(&tp->tt_keep, TP_KEEPIDLE(tp),
- tcp_timer_keep, tp);
+ tcp_timer_activate(tp, TT_KEEP, TP_KEEPIDLE(tp));
-#ifdef TCPDEBUG
- if (inp->inp_socket->so_options & SO_DEBUG)
- tcp_trace(TA_USER, ostate, tp, (void *)0, (struct tcphdr *)0,
- PRU_SLOWTIMO);
-#endif
- TCP_PROBE2(debug__user, tp, PRU_SLOWTIMO);
- INP_WUNLOCK(inp);
CURVNET_RESTORE();
- return;
+ return (true);
dropit:
TCPSTAT_INC(tcps_keepdrops);
NET_EPOCH_ENTER(et);
tcp_log_end_status(tp, TCP_EI_STATUS_KEEP_MAX);
tp = tcp_drop(tp, ETIMEDOUT);
-
-#ifdef TCPDEBUG
- if (tp != NULL && (tptosocket(tp)->so_options & SO_DEBUG))
- tcp_trace(TA_USER, ostate, tp, (void *)0, (struct tcphdr *)0,
- PRU_SLOWTIMO);
-#endif
- TCP_PROBE2(debug__user, tp, PRU_SLOWTIMO);
NET_EPOCH_EXIT(et);
- if (tp != NULL)
- INP_WUNLOCK(inp);
CURVNET_RESTORE();
+
+ return (tp != NULL);
}
/*
@@ -529,37 +466,19 @@
return true;
}
-void
-tcp_timer_persist(void *xtp)
+static bool
+tcp_timer_persist(struct tcpcb *tp)
{
struct epoch_tracker et;
- struct tcpcb *tp = xtp;
+#if defined(INVARIANTS) || defined(VIMAGE)
struct inpcb *inp = tptoinpcb(tp);
- bool progdrop;
- int outrv;
-#ifdef TCPDEBUG
- int ostate;
-
- ostate = tp->t_state;
#endif
+ bool progdrop, rv;
- INP_WLOCK(inp);
- CURVNET_SET(inp->inp_vnet);
+ INP_WLOCK_ASSERT(inp);
- if (callout_pending(&tp->tt_persist) ||
- !callout_active(&tp->tt_persist)) {
- INP_WUNLOCK(inp);
- CURVNET_RESTORE();
- return;
- }
- callout_deactivate(&tp->tt_persist);
- if (inp->inp_flags & INP_DROPPED) {
- INP_WUNLOCK(inp);
- CURVNET_RESTORE();
- return;
- }
- KASSERT((tp->tt_flags & TT_STOPPED) == 0,
- ("%s: tp %p tcpcb can't be stopped here", __func__, tp));
+ TCP_PROBE2(debug__user, tp, PRU_SLOWTIMO);
+ CURVNET_SET(inp->inp_vnet);
/*
* Persistence timer into zero window.
* Force a byte to be output, if possible.
@@ -581,9 +500,7 @@
if (!progdrop)
TCPSTAT_INC(tcps_persistdrop);
tcp_log_end_status(tp, TCP_EI_STATUS_PERSIST_MAX);
- tcp_timer_drop(tp);
- CURVNET_RESTORE();
- return;
+ goto dropit;
}
/*
* If the user has closed the socket then drop a persisting
@@ -593,57 +510,39 @@
(ticks - tp->t_rcvtime) >= TCPTV_PERSMAX) {
TCPSTAT_INC(tcps_persistdrop);
tcp_log_end_status(tp, TCP_EI_STATUS_PERSIST_MAX);
- tcp_timer_drop(tp);
- CURVNET_RESTORE();
- return;
+ goto dropit;
}
tcp_setpersist(tp);
tp->t_flags |= TF_FORCEDATA;
NET_EPOCH_ENTER(et);
- outrv = tcp_output_nodrop(tp);
- tp->t_flags &= ~TF_FORCEDATA;
+ if ((rv = tcp_output_locked(tp)))
+ tp->t_flags &= ~TF_FORCEDATA;
+ NET_EPOCH_EXIT(et);
+ CURVNET_RESTORE();
-#ifdef TCPDEBUG
- if (tp != NULL && tptosocket(tp)->so_options & SO_DEBUG)
- tcp_trace(TA_USER, ostate, tp, NULL, NULL, PRU_SLOWTIMO);
-#endif
- TCP_PROBE2(debug__user, tp, PRU_SLOWTIMO);
- (void) tcp_unlock_or_drop(tp, outrv);
+ return (rv);
+
+dropit:
+ NET_EPOCH_ENTER(et);
+ tp = tcp_drop(tp, ETIMEDOUT);
NET_EPOCH_EXIT(et);
CURVNET_RESTORE();
+
+ return (tp != NULL);
}
-void
-tcp_timer_rexmt(void * xtp)
+static bool
+tcp_timer_rexmt(struct tcpcb *tp)
{
struct epoch_tracker et;
- struct tcpcb *tp = xtp;
struct inpcb *inp = tptoinpcb(tp);
- int rexmt, outrv;
- bool isipv6;
-#ifdef TCPDEBUG
- int ostate;
+ int rexmt;
+ bool isipv6, rv;
- ostate = tp->t_state;
-#endif
+ INP_WLOCK_ASSERT(inp);
- INP_WLOCK(inp);
+ TCP_PROBE2(debug__user, tp, PRU_SLOWTIMO);
CURVNET_SET(inp->inp_vnet);
-
- if (callout_pending(&tp->tt_rexmt) ||
- !callout_active(&tp->tt_rexmt)) {
- INP_WUNLOCK(inp);
- CURVNET_RESTORE();
- return;
- }
- callout_deactivate(&tp->tt_rexmt);
- if (inp->inp_flags & INP_DROPPED) {
- INP_WUNLOCK(inp);
- CURVNET_RESTORE();
- return;
- }
- KASSERT((tp->tt_flags & TT_STOPPED) == 0,
- ("%s: tp %p tcpcb can't be stopped here", __func__, tp));
tcp_free_sackholes(tp);
TCP_LOG_EVENT(tp, NULL, NULL, NULL, TCP_LOG_RTO, 0, 0, NULL, false);
if (tp->t_fb->tfb_tcp_rexmit_tmr) {
@@ -664,9 +563,12 @@
TCPSTAT_INC(tcps_timeoutdrop);
tp->t_rxtshift = TCP_MAXRXTSHIFT;
tcp_log_end_status(tp, TCP_EI_STATUS_RETRAN);
- tcp_timer_drop(tp);
+ NET_EPOCH_ENTER(et);
+ tp = tcp_drop(tp, ETIMEDOUT);
+ NET_EPOCH_EXIT(et);
CURVNET_RESTORE();
- return;
+
+ return (tp != NULL);
}
if (tp->t_state == TCPS_SYN_SENT) {
/*
@@ -883,159 +785,131 @@
cc_cong_signal(tp, NULL, CC_RTO);
NET_EPOCH_ENTER(et);
- outrv = tcp_output_nodrop(tp);
-#ifdef TCPDEBUG
- if (tp != NULL && (tptosocket(tp)->so_options & SO_DEBUG))
- tcp_trace(TA_USER, ostate, tp, (void *)0, (struct tcphdr *)0,
- PRU_SLOWTIMO);
-#endif
- TCP_PROBE2(debug__user, tp, PRU_SLOWTIMO);
- (void) tcp_unlock_or_drop(tp, outrv);
+ rv = tcp_output_locked(tp);
NET_EPOCH_EXIT(et);
CURVNET_RESTORE();
+
+ return (rv);
+}
+
+static inline tt_which
+tcp_timer_next(struct tcpcb *tp, sbintime_t *precision)
+{
+ tt_which i, rv;
+ sbintime_t after, before;
+
+ for (i = 0, rv = TT_N, after = before = SBT_MAX; i < TT_N; i++) {
+ if (tp->t_timers[i] < after) {
+ after = tp->t_timers[i];
+ rv = i;
+ }
+ before = MIN(before, tp->t_timers[i] + tp->t_precisions[i]);
+ }
+ if (precision != NULL)
+ *precision = before - after;
+
+ return (rv);
+}
+
+static void
+tcp_timer_enter(void *xtp)
+{
+ struct tcpcb *tp = xtp;
+ struct inpcb *inp = tptoinpcb(tp);
+ sbintime_t precision;
+ tt_which which;
+
+ INP_WLOCK_ASSERT(inp);
+ MPASS((curthread->td_pflags & TDP_INTCPCALLOUT) == 0);
+
+ curthread->td_pflags |= TDP_INTCPCALLOUT;
+
+ which = tcp_timer_next(tp, NULL);
+ MPASS(which < TT_N);
+ tp->t_timers[which] = SBT_MAX;
+ tp->t_precisions[which] = 0;
+
+ if (tcp_timersw[which](tp)) {
+ if ((which = tcp_timer_next(tp, &precision)) != TT_N) {
+ callout_reset_sbt_on(&tp->t_callout,
+ tp->t_timers[which], precision, tcp_timer_enter,
+ tp, inp_to_cpuid(inp), C_ABSOLUTE);
+ }
+ INP_WUNLOCK(inp);
+ }
+
+ curthread->td_pflags &= ~TDP_INTCPCALLOUT;
}
+/*
+ * Activate or stop (delta == 0) a TCP timer.
+ */
void
-tcp_timer_activate(struct tcpcb *tp, uint32_t timer_type, u_int delta)
+tcp_timer_activate(struct tcpcb *tp, tt_which which, u_int delta)
{
- struct callout *t_callout;
- callout_func_t *f_callout;
struct inpcb *inp = tptoinpcb(tp);
- int cpu = inp_to_cpuid(inp);
+ sbintime_t precision;
#ifdef TCP_OFFLOAD
if (tp->t_flags & TF_TOE)
return;
#endif
- if (tp->tt_flags & TT_STOPPED)
- return;
+ INP_WLOCK_ASSERT(inp);
- switch (timer_type) {
- case TT_DELACK:
- t_callout = &tp->tt_delack;
- f_callout = tcp_timer_delack;
- break;
- case TT_REXMT:
- t_callout = &tp->tt_rexmt;
- f_callout = tcp_timer_rexmt;
- break;
- case TT_PERSIST:
- t_callout = &tp->tt_persist;
- f_callout = tcp_timer_persist;
- break;
- case TT_KEEP:
- t_callout = &tp->tt_keep;
- f_callout = tcp_timer_keep;
- break;
- case TT_2MSL:
- t_callout = &tp->tt_2msl;
- f_callout = tcp_timer_2msl;
- break;
- default:
- if (tp->t_fb->tfb_tcp_timer_activate) {
- tp->t_fb->tfb_tcp_timer_activate(tp, timer_type, delta);
- return;
- }
- panic("tp %p bad timer_type %#x", tp, timer_type);
- }
- if (delta == 0) {
- callout_stop(t_callout);
- } else {
- callout_reset_on(t_callout, delta, f_callout, tp, cpu);
- }
-}
+ if (delta > 0)
+ callout_when(tick_sbt * delta, 0, C_HARDCLOCK,
+ &tp->t_timers[which], &tp->t_precisions[which]);
+ else
+ tp->t_timers[which] = SBT_MAX;
-int
-tcp_timer_active(struct tcpcb *tp, uint32_t timer_type)
-{
- struct callout *t_callout;
-
- switch (timer_type) {
- case TT_DELACK:
- t_callout = &tp->tt_delack;
- break;
- case TT_REXMT:
- t_callout = &tp->tt_rexmt;
- break;
- case TT_PERSIST:
- t_callout = &tp->tt_persist;
- break;
- case TT_KEEP:
- t_callout = &tp->tt_keep;
- break;
- case TT_2MSL:
- t_callout = &tp->tt_2msl;
- break;
- default:
- if (tp->t_fb->tfb_tcp_timer_active) {
- return(tp->t_fb->tfb_tcp_timer_active(tp, timer_type));
- }
- panic("tp %p bad timer_type %#x", tp, timer_type);
- }
- return callout_active(t_callout);
+ if ((which = tcp_timer_next(tp, &precision)) != TT_N)
+ callout_reset_sbt_on(&tp->t_callout, tp->t_timers[which],
+ precision, tcp_timer_enter, tp, inp_to_cpuid(inp),
+ C_ABSOLUTE);
+ else
+ callout_stop(&tp->t_callout);
}
-static void
-tcp_timer_discard(void *ptp)
+bool
+tcp_timer_active(struct tcpcb *tp, tt_which which)
{
- struct epoch_tracker et;
- struct tcpcb *tp = (struct tcpcb *)ptp;
- struct inpcb *inp = tptoinpcb(tp);
- INP_WLOCK(inp);
- CURVNET_SET(inp->inp_vnet);
- NET_EPOCH_ENTER(et);
+ INP_WLOCK_ASSERT(tptoinpcb(tp));
- KASSERT((tp->tt_flags & TT_STOPPED) != 0,
- ("%s: tcpcb has to be stopped here", __func__));
- if (--tp->tt_draincnt > 0 ||
- tcp_freecb(tp) == false)
- INP_WUNLOCK(inp);
- NET_EPOCH_EXIT(et);
- CURVNET_RESTORE();
+ return (tp->t_timers[which] != SBT_MAX);
}
+/*
+ * Stop all timers associated with tcpcb.
+ *
+ * Called only on tcpcb destruction. The tcpcb shall already be dropped from
+ * the pcb lookup database and socket is not losing the last reference.
+ *
+ * XXXGL: unfortunately our callout(9) is not able to fully stop a locked
+ * callout even when only two threads are involved: the callout itself and the
+ * thread that does callout_stop(). See where softclock_call_cc() swaps the
+ * callwheel lock to callout lock and then checks cc_exec_cancel(). This is
+ * the race window. If it happens, the tcp_timer_enter() won't be executed,
+ * however pcb lock will be locked and released, hence we can't free memory.
+ * Until callout(9) is improved, just keep retrying. In my profiling I've seen
+ * such event happening less than 1 time per hour with 20-30 Gbit/s of traffic.
+ */
void
-tcp_timer_stop(struct tcpcb *tp, uint32_t timer_type)
+tcp_timer_stop(struct tcpcb *tp)
{
- struct callout *t_callout;
-
- tp->tt_flags |= TT_STOPPED;
- switch (timer_type) {
- case TT_DELACK:
- t_callout = &tp->tt_delack;
- break;
- case TT_REXMT:
- t_callout = &tp->tt_rexmt;
- break;
- case TT_PERSIST:
- t_callout = &tp->tt_persist;
- break;
- case TT_KEEP:
- t_callout = &tp->tt_keep;
- break;
- case TT_2MSL:
- t_callout = &tp->tt_2msl;
- break;
- default:
- if (tp->t_fb->tfb_tcp_timer_stop) {
- /*
- * XXXrrs we need to look at this with the
- * stop case below (flags).
- */
- tp->t_fb->tfb_tcp_timer_stop(tp, timer_type);
- return;
- }
- panic("tp %p bad timer_type %#x", tp, timer_type);
- }
+ struct inpcb *inp = tptoinpcb(tp);
- if (callout_async_drain(t_callout, tcp_timer_discard) == 0) {
- /*
- * Can't stop the callout, defer tcpcb actual deletion
- * to the last one. We do this using the async drain
- * function and incrementing the count in
- */
- tp->tt_draincnt++;
+ INP_WLOCK_ASSERT(inp);
+
+ if (curthread->td_pflags & TDP_INTCPCALLOUT) {
+ int stopped __diagused;
+
+ stopped = callout_stop(&tp->t_callout);
+ MPASS(stopped == 0);
+ } else while(__predict_false(callout_stop(&tp->t_callout) == 0)) {
+ INP_WUNLOCK(inp);
+ kern_yield(PRI_UNCHANGED);
+ INP_WLOCK(inp);
}
}
diff --git a/sys/netinet/tcp_usrreq.c b/sys/netinet/tcp_usrreq.c
--- a/sys/netinet/tcp_usrreq.c
+++ b/sys/netinet/tcp_usrreq.c
@@ -3072,10 +3072,8 @@
TAILQ_FIRST(&tp->t_segq), tp->t_segqlen, tp->t_dupacks);
db_print_indent(indent);
- db_printf("tt_rexmt: %p tt_persist: %p tt_keep: %p\n",
- &tp->tt_rexmt, &tp->tt_persist, &tp->tt_keep);
- db_printf("tt_2msl: %p tt_delack: %p\n", &tp->tt_2msl,
- &tp->tt_delack);
+ db_printf("t_callout: %p t_timers: %p\n",
+ &tp->t_callout, &tp->t_timers);
db_print_indent(indent);
db_printf("t_state: %d (", tp->t_state);
diff --git a/sys/netinet/tcp_var.h b/sys/netinet/tcp_var.h
--- a/sys/netinet/tcp_var.h
+++ b/sys/netinet/tcp_var.h
@@ -126,6 +126,15 @@
STAILQ_HEAD(tcp_log_stailq, tcp_log_mem);
+typedef enum {
+ TT_DELACK = 0,
+ TT_REXMT,
+ TT_PERSIST,
+ TT_KEEP,
+ TT_2MSL,
+ TT_N,
+} tt_which;
+
/*
* Tcp control block, one per tcp connection.
*/
@@ -137,13 +146,9 @@
struct tcp_function_block *t_fb;/* TCP function call block */
void *t_fb_ptr; /* Pointer to t_fb specific data */
- struct callout tt_rexmt; /* retransmit timer */
- struct callout tt_persist; /* retransmit persistence */
- struct callout tt_keep; /* keepalive */
- struct callout tt_2msl; /* 2*msl TIME_WAIT timer */
- struct callout tt_delack; /* delayed ACK timer */
- uint32_t tt_flags; /* Timers flags */
- uint32_t tt_draincnt; /* Count being drained */
+ struct callout t_callout;
+ sbintime_t t_timers[TT_N];
+ sbintime_t t_precisions[TT_N];
uint32_t t_maxseg:24, /* maximum segment size */
t_logstate:8; /* State of "black box" logging */
@@ -370,10 +375,6 @@
void (*tfb_tcp_fb_fini)(struct tcpcb *, int);
/* Optional timers, must define all if you define one */
int (*tfb_tcp_timer_stop_all)(struct tcpcb *);
- void (*tfb_tcp_timer_activate)(struct tcpcb *,
- uint32_t, u_int);
- int (*tfb_tcp_timer_active)(struct tcpcb *, uint32_t);
- void (*tfb_tcp_timer_stop)(struct tcpcb *, uint32_t);
void (*tfb_tcp_rexmit_tmr)(struct tcpcb *);
int (*tfb_tcp_handoff_ok)(struct tcpcb *);
void (*tfb_tcp_mtu_chg)(struct tcpcb *);
@@ -1086,7 +1087,6 @@
struct tcpcb *
tcp_close(struct tcpcb *);
void tcp_discardcb(struct tcpcb *);
-bool tcp_freecb(struct tcpcb *);
void tcp_twstart(struct tcpcb *);
int tcp_ctloutput(struct socket *, struct sockopt *);
void tcp_fini(void *);
@@ -1186,9 +1186,9 @@
struct tcptemp *
tcpip_maketemplate(struct inpcb *);
void tcpip_fillheaders(struct inpcb *, uint16_t, void *, void *);
-void tcp_timer_activate(struct tcpcb *, uint32_t, u_int);
-int tcp_timer_active(struct tcpcb *, uint32_t);
-void tcp_timer_stop(struct tcpcb *, uint32_t);
+void tcp_timer_activate(struct tcpcb *, tt_which, u_int);
+bool tcp_timer_active(struct tcpcb *, tt_which);
+void tcp_timer_stop(struct tcpcb *);
void tcp_trace(short, short, struct tcpcb *, void *, struct tcphdr *, int);
int inp_to_cpuid(struct inpcb *inp);
/*
diff --git a/sys/sys/proc.h b/sys/sys/proc.h
--- a/sys/sys/proc.h
+++ b/sys/sys/proc.h
@@ -557,7 +557,7 @@
#define TDP_RESETSPUR 0x04000000 /* Reset spurious page fault history. */
#define TDP_NERRNO 0x08000000 /* Last errno is already in td_errno */
#define TDP_UIOHELD 0x10000000 /* Current uio has pages held in td_ma */
-#define TDP_UNUSED0 0x20000000 /* UNUSED */
+#define TDP_INTCPCALLOUT 0x20000000 /* used by netinet/tcp_timer.c */
#define TDP_EXECVMSPC 0x40000000 /* Execve destroyed old vmspace */
#define TDP_SIGFASTPENDING 0x80000000 /* Pending signal due to sigfastblock */
File Metadata
Details
Attached
Mime Type
text/plain
Expires
Mon, Dec 2, 9:33 PM (20 h, 55 m)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
14992272
Default Alt Text
D37321.diff (29 KB)
Attached To
Mode
D37321: tcp: use single locked callout per tcpcb for the TCP timers
Attached
Detach File
Event Timeline
Log In to Comment