Page MenuHomeFreeBSD

D33025.diff
No OneTemporary

D33025.diff

Index: sys/netinet/in_pcb.h
===================================================================
--- sys/netinet/in_pcb.h
+++ sys/netinet/in_pcb.h
@@ -234,22 +234,21 @@
* fields can *not* be collapsed into a signal bit field.
*/
#if defined(__amd64__) || defined(__i386__)
- volatile uint8_t inp_in_hpts; /* on output hpts (lock b) */
- volatile uint8_t inp_in_input; /* on input hpts (lock b) */
+ uint8_t inp_in_hpts; /* on output hpts (lock b) */
+ uint8_t inp_in_dropq; /* on input hpts (lock b) */
#else
- volatile uint32_t inp_in_hpts; /* on output hpts (lock b) */
- volatile uint32_t inp_in_input; /* on input hpts (lock b) */
+ uint32_t inp_in_hpts; /* on output hpts (lock b) */
+ uint32_t inp_in_dropq; /* on input hpts (lock b) */
#endif
volatile uint16_t inp_hpts_cpu; /* Lock (i) */
volatile uint16_t inp_irq_cpu; /* Set by LRO in behalf of or the driver */
u_int inp_refcount; /* (i) refcount */
int inp_flags; /* (i) generic IP/datagram flags */
int inp_flags2; /* (i) generic IP/datagram flags #2*/
- volatile uint16_t inp_input_cpu; /* Lock (i) */
- volatile uint8_t inp_hpts_cpu_set :1, /* on output hpts (i) */
- inp_input_cpu_set : 1, /* on input hpts (i) */
+ uint16_t inp_dropq_cpu; /* Lock (i) */
+ uint8_t inp_hpts_cpu_set :1, /* on output hpts (i) */
+ inp_dropq_cpu_set : 1, /* on input hpts (i) */
inp_hpts_calls :1, /* (i) from output hpts */
- inp_input_calls :1, /* (i) from input hpts */
inp_irq_cpu_set :1, /* (i) from LRO/Driver */
inp_spare_bits2 : 3;
uint8_t inp_numa_domain; /* numa domain */
@@ -257,7 +256,8 @@
struct socket *inp_socket; /* (i) back pointer to socket */
uint32_t inp_hptsslot; /* Hpts wheel slot this tcb is Lock(i&b) */
uint32_t inp_hpts_drop_reas; /* reason we are dropping the PCB (lock i&b) */
- TAILQ_ENTRY(inpcb) inp_input; /* pacing in queue next lock(b) */
+ uint32_t inp_dropq_gencnt;
+ TAILQ_ENTRY(inpcb) inp_dropq; /* hpts drop queue next lock(b) */
struct inpcbinfo *inp_pcbinfo; /* (c) PCB list info */
struct ucred *inp_cred; /* (c) cache of socket cred */
u_int32_t inp_flow; /* (i) IPv6 flow information */
Index: sys/netinet/in_pcb.c
===================================================================
--- sys/netinet/in_pcb.c
+++ sys/netinet/in_pcb.c
@@ -629,7 +629,7 @@
* If using hpts lets drop a random number in so
* not all new connections fall on the same CPU.
*/
- inp->inp_hpts_cpu = inp->inp_input_cpu = hpts_random_cpu(inp);
+ inp->inp_hpts_cpu = inp->inp_dropq_cpu = hpts_random_cpu(inp);
#endif
refcount_init(&inp->inp_refcount, 1); /* Reference from socket. */
INP_WLOCK(inp);
@@ -1760,7 +1760,7 @@
MPASS(inp->inp_flags & INP_FREED);
MPASS(inp->inp_socket == NULL);
MPASS(inp->inp_in_hpts == 0);
- MPASS(inp->inp_in_input == 0);
+ MPASS(inp->inp_in_dropq == 0);
INP_RUNLOCK(inp);
uma_zfree_smr(inp->inp_pcbinfo->ipi_zone, inp);
return (true);
@@ -1778,7 +1778,7 @@
MPASS(inp->inp_flags & INP_FREED);
MPASS(inp->inp_socket == NULL);
MPASS(inp->inp_in_hpts == 0);
- MPASS(inp->inp_in_input == 0);
+ MPASS(inp->inp_in_dropq == 0);
INP_WUNLOCK(inp);
uma_zfree_smr(inp->inp_pcbinfo->ipi_zone, inp);
return (true);
Index: sys/netinet/tcp_hpts.h
===================================================================
--- sys/netinet/tcp_hpts.h
+++ sys/netinet/tcp_hpts.h
@@ -116,9 +116,9 @@
#ifdef _KERNEL
#define tcp_hpts_remove(a, b) __tcp_hpts_remove(a, b, __LINE__)
void __tcp_hpts_remove(struct inpcb *inp, int32_t flags, int32_t line);
-#define HPTS_REMOVE_INPUT 0x01
+#define HPTS_REMOVE_DROPQ 0x01
#define HPTS_REMOVE_OUTPUT 0x02
-#define HPTS_REMOVE_ALL (HPTS_REMOVE_INPUT | HPTS_REMOVE_OUTPUT)
+#define HPTS_REMOVE_ALL (HPTS_REMOVE_DROPQ | HPTS_REMOVE_OUTPUT)
static inline bool
tcp_in_hpts(struct inpcb *inp)
@@ -160,8 +160,7 @@
void __tcp_set_hpts(struct inpcb *inp, int32_t line);
#define tcp_set_hpts(a) __tcp_set_hpts(a, __LINE__)
-void __tcp_set_inp_to_drop(struct inpcb *inp, uint16_t reason, int32_t line);
-#define tcp_set_inp_to_drop(a, b) __tcp_set_inp_to_drop(a, b, __LINE__)
+void tcp_set_inp_to_drop(struct inpcb *inp, uint16_t reason);
void tcp_run_hpts(void);
Index: sys/netinet/tcp_hpts.c
===================================================================
--- sys/netinet/tcp_hpts.c
+++ sys/netinet/tcp_hpts.c
@@ -62,15 +62,7 @@
* Of course this is a bare bones example and the stack will probably
* have more consideration then just the above.
*
- * Now the second function (actually two functions I guess :D)
- * the tcp_hpts system provides is the ability to either abort
- * a connection (later) or process input on a connection.
- * Why would you want to do this? To keep processor locality
- * and or not have to worry about untangling any recursive
- * locks. The input function now is hooked to the new LRO
- * system as well.
- *
- * In order to use the input redirection function the
+ * In order to run input queued segments from the HPTS context the
* tcp stack must define an input function for
* tfb_do_queued_segments(). This function understands
* how to dequeue a array of packets that were input and
@@ -109,6 +101,10 @@
* you have defined the tfb_do_segment_nounlock() as
* described above.
*
+ * Now the second function the tcp_hpts system provides is the ability
+ * to abort a connection later. Why would you want to do this?
+ * To not have to worry about untangling any recursive locks.
+ *
* The second feature of the input side of hpts is the
* dropping of a connection. This is due to the way that
* locking may have occured on the INP_WLOCK. So if
@@ -202,6 +198,8 @@
/* Each hpts has its own p_mtx which is used for locking */
#define HPTS_MTX_ASSERT(hpts) mtx_assert(&(hpts)->p_mtx, MA_OWNED)
+#define HPTS_LOCK(hpts) mtx_lock(&(hpts)->p_mtx)
+#define HPTS_UNLOCK(hpts) mtx_unlock(&(hpts)->p_mtx)
TAILQ_HEAD(hptsh, inpcb);
struct tcp_hpts_entry {
/* Cache line 0x00 */
@@ -226,10 +224,11 @@
uint8_t p_fill[3]; /* Fill to 32 bits */
/* Cache line 0x40 */
void *p_inp;
- struct hptsh p_input; /* For the tcp-input runner */
+ TAILQ_HEAD(, inpcb) p_dropq; /* Delayed drop queue */
/* Hptsi wheel */
struct hptsh *p_hptss;
- int32_t p_on_inqueue_cnt; /* Count on input queue in this hpts */
+ uint32_t p_dropq_cnt; /* Count on drop queue */
+ uint32_t p_dropq_gencnt;
uint32_t p_hpts_sleep_time; /* Current sleep interval having a max
* of 255ms */
uint32_t overidden_sleep; /* what was overrided by min-sleep for logging */
@@ -270,7 +269,6 @@
static int hpts_use_assigned_cpu = 1;
static int32_t hpts_uses_oldest = OLDEST_THRESHOLD;
-static void tcp_input_data(struct tcp_hpts_entry *hpts, struct timeval *tv);
static int32_t tcp_hptsi(struct tcp_hpts_entry *hpts, int from_callout);
static void tcp_hpts_thread(void *ctx);
static void tcp_init_hptsi(void *st);
@@ -558,41 +556,6 @@
}
}
-static inline void
-hpts_sane_input_remove(struct tcp_hpts_entry *hpts, struct inpcb *inp, int clear)
-{
- HPTS_MTX_ASSERT(hpts);
- KASSERT(hpts->p_cpu == inp->inp_hpts_cpu,
- ("%s: hpts:%p inp:%p incorrect CPU", __FUNCTION__, hpts, inp));
- KASSERT(inp->inp_in_input != 0,
- ("%s: hpts:%p inp:%p not on the input hpts?", __FUNCTION__, hpts, inp));
- TAILQ_REMOVE(&hpts->p_input, inp, inp_input);
- hpts->p_on_inqueue_cnt--;
- KASSERT(hpts->p_on_inqueue_cnt >= 0,
- ("Hpts in goes negative inp:%p hpts:%p",
- inp, hpts));
- KASSERT((((TAILQ_EMPTY(&hpts->p_input) != 0) && (hpts->p_on_inqueue_cnt == 0)) ||
- ((TAILQ_EMPTY(&hpts->p_input) == 0) && (hpts->p_on_inqueue_cnt > 0))),
- ("%s hpts:%p input cnt (p_on_inqueue):%d and queue state mismatch",
- __FUNCTION__, hpts, hpts->p_on_inqueue_cnt));
- if (clear)
- inp->inp_in_input = 0;
-}
-
-static inline void
-hpts_sane_input_insert(struct tcp_hpts_entry *hpts, struct inpcb *inp, int line)
-{
- HPTS_MTX_ASSERT(hpts);
- KASSERT(hpts->p_cpu == inp->inp_hpts_cpu,
- ("%s: hpts:%p inp:%p incorrect CPU", __FUNCTION__, hpts, inp));
- KASSERT(inp->inp_in_input == 0,
- ("%s: hpts:%p inp:%p already on the input hpts?", __FUNCTION__, hpts, inp));
- TAILQ_INSERT_TAIL(&hpts->p_input, inp, inp_input);
- inp->inp_in_input = 1;
- hpts->p_on_inqueue_cnt++;
- in_pcbref(inp);
-}
-
static struct tcp_hpts_entry *
tcp_hpts_lock(struct inpcb *inp)
{
@@ -614,19 +577,19 @@
}
static struct tcp_hpts_entry *
-tcp_input_lock(struct inpcb *inp)
+tcp_dropq_lock(struct inpcb *inp)
{
struct tcp_hpts_entry *hpts;
int32_t hpts_num;
again:
- hpts_num = inp->inp_input_cpu;
+ hpts_num = inp->inp_dropq_cpu;
hpts = tcp_pace.rp_ent[hpts_num];
KASSERT(mtx_owned(&hpts->p_mtx) == 0,
("Hpts:%p owns mtx prior-to lock line:%d",
hpts, __LINE__));
mtx_lock(&hpts->p_mtx);
- if (hpts_num != inp->inp_input_cpu) {
+ if (hpts_num != inp->inp_dropq_cpu) {
mtx_unlock(&hpts->p_mtx);
goto again;
}
@@ -652,13 +615,38 @@
}
static void
-tcp_hpts_remove_locked_input(struct tcp_hpts_entry *hpts, struct inpcb *inp, int32_t flags, int32_t line)
+tcp_dropq_remove(struct tcp_hpts_entry *hpts, struct inpcb *inp)
{
+ bool released __diagused;
+
HPTS_MTX_ASSERT(hpts);
- if (inp->inp_in_input) {
- hpts_sane_input_remove(hpts, inp, 1);
- tcp_remove_hpts_ref(inp, hpts, line);
+ INP_WLOCK_ASSERT(inp);
+
+ if (inp->inp_in_dropq != IHPTS_ONQUEUE)
+ return;
+
+ MPASS(hpts->p_cpu == inp->inp_dropq_cpu);
+ if (__predict_true(inp->inp_dropq_gencnt == hpts->p_dropq_gencnt)) {
+ TAILQ_REMOVE(&hpts->p_dropq, inp, inp_dropq);
+ MPASS(hpts->p_dropq_cnt > 0);
+ hpts->p_dropq_cnt--;
+ inp->inp_in_dropq = IHPTS_NONE;
+ released = in_pcbrele_wlocked(inp);
+ MPASS(released == false);
+ } else {
+ /*
+ * tcp_delayed_drop() now owns the TAILQ head of this inp.
+ * Can't TAILQ_REMOVE, just mark it.
+ */
+#ifdef INVARIANTS
+ struct inpcb *tmp;
+
+ TAILQ_FOREACH(tmp, &hpts->p_dropq, inp_dropq)
+ MPASS(tmp != inp);
+#endif
+ inp->inp_in_dropq = IHPTS_MOVING;
}
+
}
/*
@@ -669,7 +657,7 @@
*
* Valid values in the flags are
* HPTS_REMOVE_OUTPUT - remove from the output of the hpts.
- * HPTS_REMOVE_INPUT - remove from the input of the hpts.
+ * HPTS_REMOVE_DROPQ - remove from the drop queue of the hpts.
* Note that you can use one or both values together
* and get two actions.
*/
@@ -684,9 +672,9 @@
tcp_hpts_remove_locked_output(hpts, inp, flags, line);
mtx_unlock(&hpts->p_mtx);
}
- if (flags & HPTS_REMOVE_INPUT) {
- hpts = tcp_input_lock(inp);
- tcp_hpts_remove_locked_input(hpts, inp, flags, line);
+ if (flags & HPTS_REMOVE_DROPQ) {
+ hpts = tcp_dropq_lock(inp);
+ tcp_dropq_remove(hpts, inp);
mtx_unlock(&hpts->p_mtx);
}
}
@@ -1097,31 +1085,29 @@
}
void
-__tcp_set_inp_to_drop(struct inpcb *inp, uint16_t reason, int32_t line)
+tcp_set_inp_to_drop(struct inpcb *inp, uint16_t reason)
{
struct tcp_hpts_entry *hpts;
- struct tcpcb *tp;
+ struct tcpcb *tp = intotcpcb(inp);
- tp = intotcpcb(inp);
- hpts = tcp_input_lock(tp->t_inpcb);
- if (inp->inp_in_input == 0) {
- /* Ok we need to set it on the hpts in the current slot */
- hpts_sane_input_insert(hpts, inp, line);
- if ((hpts->p_hpts_active == 0) &&
- (hpts->p_on_min_sleep == 0)){
- /*
- * Activate the hpts if it is sleeping.
- */
- hpts->p_direct_wake = 1;
- tcp_wakehpts(hpts);
- }
- } else if ((hpts->p_hpts_active == 0) &&
- (hpts->p_on_min_sleep == 0)){
+ INP_WLOCK_ASSERT(inp);
+ inp->inp_hpts_drop_reas = reason;
+ if (inp->inp_in_dropq != IHPTS_NONE)
+ return;
+ hpts = tcp_dropq_lock(tp->t_inpcb);
+ MPASS(hpts->p_cpu == inp->inp_dropq_cpu);
+
+ TAILQ_INSERT_TAIL(&hpts->p_dropq, inp, inp_dropq);
+ inp->inp_in_dropq = IHPTS_ONQUEUE;
+ inp->inp_dropq_gencnt = hpts->p_dropq_gencnt;
+ hpts->p_dropq_cnt++;
+ in_pcbref(inp);
+
+ if ((hpts->p_hpts_active == 0) && (hpts->p_on_min_sleep == 0)){
hpts->p_direct_wake = 1;
tcp_wakehpts(hpts);
}
- inp->inp_hpts_drop_reas = reason;
- mtx_unlock(&hpts->p_mtx);
+ HPTS_UNLOCK(hpts);
}
static uint16_t
@@ -1136,8 +1122,8 @@
* If one has been set use it i.e. we want both in and out on the
* same hpts.
*/
- if (inp->inp_input_cpu_set) {
- return (inp->inp_input_cpu);
+ if (inp->inp_dropq_cpu_set) {
+ return (inp->inp_dropq_cpu);
} else if (inp->inp_hpts_cpu_set) {
return (inp->inp_hpts_cpu);
}
@@ -1160,8 +1146,8 @@
* If one has been set use it i.e. we want both in and out on the
* same hpts.
*/
- if (inp->inp_input_cpu_set) {
- return (inp->inp_input_cpu);
+ if (inp->inp_dropq_cpu_set) {
+ return (inp->inp_dropq_cpu);
} else if (inp->inp_hpts_cpu_set) {
return (inp->inp_hpts_cpu);
}
@@ -1249,117 +1235,50 @@
* list.
*/
static void
-tcp_input_data(struct tcp_hpts_entry *hpts, struct timeval *tv)
+tcp_delayed_drop(struct tcp_hpts_entry *hpts)
{
+ TAILQ_HEAD(, inpcb) head = TAILQ_HEAD_INITIALIZER(head);
+ struct inpcb *inp, *tmp;
struct tcpcb *tp;
- struct inpcb *inp;
- uint16_t drop_reason;
- int16_t set_cpu;
- uint32_t did_prefetch = 0;
- int dropped;
HPTS_MTX_ASSERT(hpts);
NET_EPOCH_ASSERT();
- while ((inp = TAILQ_FIRST(&hpts->p_input)) != NULL) {
- HPTS_MTX_ASSERT(hpts);
- hpts_sane_input_remove(hpts, inp, 0);
- if (inp->inp_input_cpu_set == 0) {
- set_cpu = 1;
- } else {
- set_cpu = 0;
- }
- hpts->p_inp = inp;
- drop_reason = inp->inp_hpts_drop_reas;
- inp->inp_in_input = 0;
- mtx_unlock(&hpts->p_mtx);
+ TAILQ_SWAP(&head, &hpts->p_dropq, inpcb, inp_dropq);
+ hpts->p_dropq_cnt = 0;
+ hpts->p_dropq_gencnt++;
+ HPTS_UNLOCK(hpts);
+
+ TAILQ_FOREACH_SAFE(inp, &head, inp_dropq, tmp) {
INP_WLOCK(inp);
-#ifdef VIMAGE
- CURVNET_SET(inp->inp_vnet);
-#endif
- if ((inp->inp_flags & (INP_TIMEWAIT | INP_DROPPED))) {
-out:
- hpts->p_inp = NULL;
- if (in_pcbrele_wlocked(inp) == 0) {
+ MPASS(inp->inp_hpts_drop_reas != 0);
+ if (__predict_false(inp->inp_in_dropq == IHPTS_MOVING)) {
+ inp->inp_in_dropq = IHPTS_NONE;
+ if (in_pcbrele_wlocked(inp) == false)
INP_WUNLOCK(inp);
- }
-#ifdef VIMAGE
- CURVNET_RESTORE();
-#endif
- mtx_lock(&hpts->p_mtx);
continue;
}
- tp = intotcpcb(inp);
- if ((tp == NULL) || (tp->t_inpcb == NULL)) {
- goto out;
- }
- if (drop_reason) {
- /* This tcb is being destroyed for drop_reason */
- tcp_drop_in_pkts(tp);
- tp = tcp_drop(tp, drop_reason);
- if (tp == NULL) {
- INP_WLOCK(inp);
- }
- if (in_pcbrele_wlocked(inp) == 0)
+ MPASS(inp->inp_in_dropq == IHPTS_ONQUEUE);
+ inp->inp_in_dropq = IHPTS_NONE;
+ if ((inp->inp_flags & (INP_TIMEWAIT | INP_DROPPED))) {
+ if (in_pcbrele_wlocked(inp) == false)
INP_WUNLOCK(inp);
-#ifdef VIMAGE
- CURVNET_RESTORE();
-#endif
- mtx_lock(&hpts->p_mtx);
continue;
}
- if (set_cpu) {
- /*
- * Setup so the next time we will move to the right
- * CPU. This should be a rare event. It will
- * sometimes happens when we are the client side
- * (usually not the server). Somehow tcp_output()
- * gets called before the tcp_do_segment() sets the
- * intial state. This means the r_cpu and r_hpts_cpu
- * is 0. We get on the hpts, and then tcp_input()
- * gets called setting up the r_cpu to the correct
- * value. The hpts goes off and sees the mis-match.
- * We simply correct it here and the CPU will switch
- * to the new hpts nextime the tcb gets added to the
- * the hpts (not this time) :-)
- */
- tcp_set_hpts(inp);
- }
- if (tp->t_fb_ptr != NULL) {
- kern_prefetch(tp->t_fb_ptr, &did_prefetch);
- did_prefetch = 1;
- }
- if ((tp->t_fb->tfb_do_queued_segments != NULL) && tp->t_in_pkt) {
- if (inp->inp_in_input)
- tcp_hpts_remove(inp, HPTS_REMOVE_INPUT);
- dropped = (*tp->t_fb->tfb_do_queued_segments)(inp->inp_socket, tp, 0);
- if (dropped) {
- /* Re-acquire the wlock so we can release the reference */
- INP_WLOCK(inp);
- }
- } else if (tp->t_in_pkt) {
- /*
- * We reach here only if we had a
- * stack that supported INP_SUPPORTS_MBUFQ
- * and then somehow switched to a stack that
- * does not. The packets are basically stranded
- * and would hang with the connection until
- * cleanup without this code. Its not the
- * best way but I know of no other way to
- * handle it since the stack needs functions
- * it does not have to handle queued packets.
- */
+ CURVNET_SET(inp->inp_vnet);
+ if (__predict_true((tp = intotcpcb(inp)) != NULL)) {
+ MPASS(tp->t_inpcb == inp);
tcp_drop_in_pkts(tp);
+ tp = tcp_drop(tp, inp->inp_hpts_drop_reas);
+ if (tp == NULL)
+ INP_WLOCK(inp);
}
- if (in_pcbrele_wlocked(inp) == 0)
+ if (in_pcbrele_wlocked(inp) == false)
INP_WUNLOCK(inp);
- INP_UNLOCK_ASSERT(inp);
-#ifdef VIMAGE
CURVNET_RESTORE();
-#endif
- mtx_lock(&hpts->p_mtx);
- hpts->p_inp = NULL;
}
+
+ mtx_lock(&hpts->p_mtx); /* XXXGL */
}
static void
@@ -1489,10 +1408,10 @@
hpts->p_nxt_slot = hpts->p_prev_slot;
hpts->p_runningslot = hpts_slot(hpts->p_prev_slot, 1);
}
- KASSERT((((TAILQ_EMPTY(&hpts->p_input) != 0) && (hpts->p_on_inqueue_cnt == 0)) ||
- ((TAILQ_EMPTY(&hpts->p_input) == 0) && (hpts->p_on_inqueue_cnt > 0))),
+ KASSERT((((TAILQ_EMPTY(&hpts->p_dropq) != 0) && (hpts->p_dropq_cnt == 0)) ||
+ ((TAILQ_EMPTY(&hpts->p_dropq) == 0) && (hpts->p_dropq_cnt > 0))),
("%s hpts:%p in_hpts cnt:%d and queue state mismatch",
- __FUNCTION__, hpts, hpts->p_on_inqueue_cnt));
+ __FUNCTION__, hpts, hpts->p_dropq_cnt));
HPTS_MTX_ASSERT(hpts);
if (hpts->p_on_queue_cnt == 0) {
goto no_one;
@@ -1716,10 +1635,10 @@
* Check to see if we took an excess amount of time and need to run
* more ticks (if we did not hit eno-bufs).
*/
- KASSERT((((TAILQ_EMPTY(&hpts->p_input) != 0) && (hpts->p_on_inqueue_cnt == 0)) ||
- ((TAILQ_EMPTY(&hpts->p_input) == 0) && (hpts->p_on_inqueue_cnt > 0))),
+ KASSERT((((TAILQ_EMPTY(&hpts->p_dropq) != 0) && (hpts->p_dropq_cnt == 0)) ||
+ ((TAILQ_EMPTY(&hpts->p_dropq) == 0) && (hpts->p_dropq_cnt > 0))),
("%s hpts:%p in_hpts cnt:%d queue state mismatch",
- __FUNCTION__, hpts, hpts->p_on_inqueue_cnt));
+ __FUNCTION__, hpts, hpts->p_dropq_cnt));
hpts->p_prev_slot = hpts->p_cur_slot;
hpts->p_lasttick = hpts->p_curtick;
if ((from_callout == 0) || (loop_cnt > max_pacer_loops)) {
@@ -1765,31 +1684,30 @@
* Run any input that may be there not covered
* in running data.
*/
- if (!TAILQ_EMPTY(&hpts->p_input)) {
- tcp_input_data(hpts, &tv);
- /*
- * Now did we spend too long running input and need to run more ticks?
- * Note that if wrap_loop_cnt < 2 then we should have the conditions
- * in the KASSERT's true. But if the wheel is behind i.e. wrap_loop_cnt
- * is greater than 2, then the condtion most likely are *not* true. Also
- * if we are called not from the callout, we don't run the wheel multiple
- * times so the slots may not align either.
- */
- KASSERT(((hpts->p_prev_slot == hpts->p_cur_slot) ||
- (wrap_loop_cnt >= 2) || (from_callout == 0)),
- ("H:%p p_prev_slot:%u not equal to p_cur_slot:%u", hpts,
- hpts->p_prev_slot, hpts->p_cur_slot));
- KASSERT(((hpts->p_lasttick == hpts->p_curtick)
- || (wrap_loop_cnt >= 2) || (from_callout == 0)),
- ("H:%p p_lasttick:%u not equal to p_curtick:%u", hpts,
- hpts->p_lasttick, hpts->p_curtick));
- if (from_callout && (hpts->p_lasttick != hpts->p_curtick)) {
- hpts->p_curtick = tcp_gethptstick(&tv);
- counter_u64_add(hpts_loops, 1);
- hpts->p_cur_slot = tick_to_wheel(hpts->p_curtick);
- goto again;
- }
+ tcp_delayed_drop(hpts);
+ /*
+ * Now did we spend too long running input and need to run more ticks?
+ * Note that if wrap_loop_cnt < 2 then we should have the conditions
+ * in the KASSERT's true. But if the wheel is behind i.e. wrap_loop_cnt
+ * is greater than 2, then the condtion most likely are *not* true.
+ * Also if we are called not from the callout, we don't run the wheel
+ * multiple times so the slots may not align either.
+ */
+ KASSERT(((hpts->p_prev_slot == hpts->p_cur_slot) ||
+ (wrap_loop_cnt >= 2) || (from_callout == 0)),
+ ("H:%p p_prev_slot:%u not equal to p_cur_slot:%u", hpts,
+ hpts->p_prev_slot, hpts->p_cur_slot));
+ KASSERT(((hpts->p_lasttick == hpts->p_curtick)
+ || (wrap_loop_cnt >= 2) || (from_callout == 0)),
+ ("H:%p p_lasttick:%u not equal to p_curtick:%u", hpts,
+ hpts->p_lasttick, hpts->p_curtick));
+ if (from_callout && (hpts->p_lasttick != hpts->p_curtick)) {
+ hpts->p_curtick = tcp_gethptstick(&tv);
+ counter_u64_add(hpts_loops, 1);
+ hpts->p_cur_slot = tick_to_wheel(hpts->p_curtick);
+ goto again;
}
+
if (from_callout){
tcp_hpts_set_max_sleep(hpts, wrap_loop_cnt);
}
@@ -1814,12 +1732,12 @@
inp->inp_hpts_cpu_set = 1;
}
mtx_unlock(&hpts->p_mtx);
- hpts = tcp_input_lock(inp);
- if ((inp->inp_input_cpu_set == 0) &&
- (inp->inp_in_input == 0)) {
- inp->inp_input_cpu = hpts_cpuid(inp, &failed);
+ hpts = tcp_dropq_lock(inp);
+ if ((inp->inp_dropq_cpu_set == 0) &&
+ (inp->inp_in_dropq == 0)) {
+ inp->inp_dropq_cpu = hpts_cpuid(inp, &failed);
if (failed == 0)
- inp->inp_input_cpu_set = 1;
+ inp->inp_dropq_cpu_set = 1;
}
mtx_unlock(&hpts->p_mtx);
}
@@ -2140,7 +2058,7 @@
*/
mtx_init(&hpts->p_mtx, "tcp_hpts_lck",
"hpts", MTX_DEF | MTX_DUPOK);
- TAILQ_INIT(&hpts->p_input);
+ TAILQ_INIT(&hpts->p_dropq);
for (j = 0; j < NUM_OF_HPTSI_SLOTS; j++) {
TAILQ_INIT(&hpts->p_hptss[j]);
}
@@ -2155,8 +2073,8 @@
SYSCTL_ADD_INT(&hpts->hpts_ctx,
SYSCTL_CHILDREN(hpts->hpts_root),
OID_AUTO, "in_qcnt", CTLFLAG_RD,
- &hpts->p_on_inqueue_cnt, 0,
- "Count TCB's awaiting input processing");
+ &hpts->p_dropq_cnt, 0,
+ "Count TCB's awaiting delayed drop");
SYSCTL_ADD_INT(&hpts->hpts_ctx,
SYSCTL_CHILDREN(hpts->hpts_root),
OID_AUTO, "out_qcnt", CTLFLAG_RD,
Index: sys/netinet/tcp_lro.c
===================================================================
--- sys/netinet/tcp_lro.c
+++ sys/netinet/tcp_lro.c
@@ -1354,7 +1354,7 @@
if (le->m_head != NULL) {
counter_u64_add(tcp_inp_lro_direct_queue, 1);
tcp_lro_log(tp, lc, le, NULL, 22, 1,
- inp->inp_flags2, inp->inp_in_input, 1);
+ inp->inp_flags2, inp->inp_in_dropq, 1);
tcp_queue_pkts(inp, tp, le);
}
if (should_wake) {
Index: sys/netinet/tcp_stacks/bbr.c
===================================================================
--- sys/netinet/tcp_stacks/bbr.c
+++ sys/netinet/tcp_stacks/bbr.c
@@ -1884,7 +1884,7 @@
l->pacing_gain = bbr->r_ctl.rc_bbr_hptsi_gain;
l->cwnd_gain = bbr->r_ctl.rc_bbr_cwnd_gain;
l->inhpts = tcp_in_hpts(bbr->rc_inp);
- l->ininput = bbr->rc_inp->inp_in_input;
+ l->ininput = bbr->rc_inp->inp_in_dropq;
l->use_lt_bw = bbr->rc_lt_use_bw;
l->pkts_out = bbr->r_ctl.rc_flight_at_input;
l->pkt_epoch = bbr->r_ctl.rc_pkt_epoch;
Index: sys/netinet/tcp_stacks/rack.c
===================================================================
--- sys/netinet/tcp_stacks/rack.c
+++ sys/netinet/tcp_stacks/rack.c
@@ -2295,7 +2295,7 @@
log.u_bbr.flex6 = rsm->r_end;
log.u_bbr.flex8 = mod;
log.u_bbr.inhpts = tcp_in_hpts(rack->rc_inp);
- log.u_bbr.ininput = rack->rc_inp->inp_in_input;
+ log.u_bbr.ininput = rack->rc_inp->inp_in_dropq;
log.u_bbr.timeStamp = tcp_get_usecs(&tv);
log.u_bbr.inflight = ctf_flight_size(rack->rc_tp, rack->r_ctl.rc_sacked);
log.u_bbr.pkts_out = rack->r_ctl.rc_out_at_rto;
@@ -2330,7 +2330,7 @@
else
log.u_bbr.pkts_out = rack->r_ctl.rc_prr_sndcnt;
log.u_bbr.inhpts = tcp_in_hpts(rack->rc_inp);
- log.u_bbr.ininput = rack->rc_inp->inp_in_input;
+ log.u_bbr.ininput = rack->rc_inp->inp_in_dropq;
log.u_bbr.timeStamp = tcp_get_usecs(&tv);
log.u_bbr.inflight = ctf_flight_size(rack->rc_tp, rack->r_ctl.rc_sacked);
log.u_bbr.pkts_out = rack->r_ctl.rc_out_at_rto;
@@ -2355,7 +2355,7 @@
memset(&log.u_bbr, 0, sizeof(log.u_bbr));
log.u_bbr.inhpts = tcp_in_hpts(rack->rc_inp);
- log.u_bbr.ininput = rack->rc_inp->inp_in_input;
+ log.u_bbr.ininput = rack->rc_inp->inp_in_dropq;
log.u_bbr.flex8 = to_num;
log.u_bbr.flex1 = rack->r_ctl.rc_rack_min_rtt;
log.u_bbr.flex2 = rack->rc_rack_rtt;
@@ -2394,7 +2394,7 @@
memset(&log.u_bbr, 0, sizeof(log.u_bbr));
log.u_bbr.flex8 = flag;
log.u_bbr.inhpts = tcp_in_hpts(rack->rc_inp);
- log.u_bbr.ininput = rack->rc_inp->inp_in_input;
+ log.u_bbr.ininput = rack->rc_inp->inp_in_dropq;
log.u_bbr.cur_del_rate = (uint64_t)prev;
log.u_bbr.delRate = (uint64_t)rsm;
log.u_bbr.rttProp = (uint64_t)next;
@@ -2439,7 +2439,7 @@
struct timeval tv;
memset(&log.u_bbr, 0, sizeof(log.u_bbr));
log.u_bbr.inhpts = tcp_in_hpts(rack->rc_inp);
- log.u_bbr.ininput = rack->rc_inp->inp_in_input;
+ log.u_bbr.ininput = rack->rc_inp->inp_in_dropq;
log.u_bbr.flex1 = t;
log.u_bbr.flex2 = len;
log.u_bbr.flex3 = rack->r_ctl.rc_rack_min_rtt;
@@ -2589,7 +2589,7 @@
memset(&log.u_bbr, 0, sizeof(log.u_bbr));
log.u_bbr.inhpts = tcp_in_hpts(rack->rc_inp);
- log.u_bbr.ininput = rack->rc_inp->inp_in_input;
+ log.u_bbr.ininput = rack->rc_inp->inp_in_dropq;
log.u_bbr.flex1 = line;
log.u_bbr.flex2 = tick;
log.u_bbr.flex3 = tp->t_maxunacktime;
@@ -2616,7 +2616,7 @@
memset(&log.u_bbr, 0, sizeof(log.u_bbr));
log.u_bbr.inhpts = tcp_in_hpts(rack->rc_inp);
- log.u_bbr.ininput = rack->rc_inp->inp_in_input;
+ log.u_bbr.ininput = rack->rc_inp->inp_in_dropq;
log.u_bbr.flex1 = slot;
if (rack->rack_no_prr)
log.u_bbr.flex2 = 0;
@@ -2718,7 +2718,7 @@
memset(&log.u_bbr, 0, sizeof(log.u_bbr));
log.u_bbr.inhpts = tcp_in_hpts(rack->rc_inp);
- log.u_bbr.ininput = rack->rc_inp->inp_in_input;
+ log.u_bbr.ininput = rack->rc_inp->inp_in_dropq;
log.u_bbr.flex1 = slot;
log.u_bbr.flex2 = rack->r_ctl.rc_hpts_flags;
log.u_bbr.flex4 = reason;
@@ -2751,7 +2751,7 @@
memset(&log.u_bbr, 0, sizeof(log.u_bbr));
log.u_bbr.inhpts = tcp_in_hpts(rack->rc_inp);
- log.u_bbr.ininput = rack->rc_inp->inp_in_input;
+ log.u_bbr.ininput = rack->rc_inp->inp_in_dropq;
log.u_bbr.flex1 = line;
log.u_bbr.flex2 = rack->r_ctl.rc_last_output_to;
log.u_bbr.flex3 = flags_on_entry;
@@ -13329,7 +13329,7 @@
#endif
memset(&log.u_bbr, 0, sizeof(log.u_bbr));
log.u_bbr.inhpts = tcp_in_hpts(rack->rc_inp);
- log.u_bbr.ininput = rack->rc_inp->inp_in_input;
+ log.u_bbr.ininput = rack->rc_inp->inp_in_dropq;
if (rack->rack_no_prr == 0)
log.u_bbr.flex1 = rack->r_ctl.rc_prr_sndcnt;
else
@@ -14321,7 +14321,7 @@
#endif
memset(&log.u_bbr, 0, sizeof(log.u_bbr));
log.u_bbr.inhpts = tcp_in_hpts(rack->rc_inp);
- log.u_bbr.ininput = rack->rc_inp->inp_in_input;
+ log.u_bbr.ininput = rack->rc_inp->inp_in_dropq;
if (rack->rack_no_prr == 0)
log.u_bbr.flex1 = rack->r_ctl.rc_prr_sndcnt;
else
@@ -15612,7 +15612,7 @@
memset(&log.u_bbr, 0, sizeof(log.u_bbr));
log.u_bbr.inhpts = tcp_in_hpts(rack->rc_inp);
- log.u_bbr.ininput = rack->rc_inp->inp_in_input;
+ log.u_bbr.ininput = rack->rc_inp->inp_in_dropq;
log.u_bbr.flex1 = error;
log.u_bbr.flex2 = flags;
log.u_bbr.flex3 = rsm_is_null;
@@ -16128,7 +16128,7 @@
memset(&log.u_bbr, 0, sizeof(log.u_bbr));
log.u_bbr.inhpts = tcp_in_hpts(rack->rc_inp);
- log.u_bbr.ininput = rack->rc_inp->inp_in_input;
+ log.u_bbr.ininput = rack->rc_inp->inp_in_dropq;
if (rack->rack_no_prr)
log.u_bbr.flex1 = 0;
else
@@ -16629,7 +16629,7 @@
memset(&log.u_bbr, 0, sizeof(log.u_bbr));
log.u_bbr.inhpts = tcp_in_hpts(rack->rc_inp);
- log.u_bbr.ininput = rack->rc_inp->inp_in_input;
+ log.u_bbr.ininput = rack->rc_inp->inp_in_dropq;
if (rack->rack_no_prr)
log.u_bbr.flex1 = 0;
else
@@ -18801,7 +18801,7 @@
memset(&log.u_bbr, 0, sizeof(log.u_bbr));
log.u_bbr.inhpts = tcp_in_hpts(rack->rc_inp);
- log.u_bbr.ininput = rack->rc_inp->inp_in_input;
+ log.u_bbr.ininput = rack->rc_inp->inp_in_dropq;
if (rack->rack_no_prr)
log.u_bbr.flex1 = 0;
else

File Metadata

Mime Type
text/plain
Expires
Sun, Oct 26, 4:11 PM (2 h, 24 m)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
24220216
Default Alt Text
D33025.diff (27 KB)

Event Timeline