Page MenuHomeFreeBSD

D33026.id99369.diff
No OneTemporary

D33026.id99369.diff

diff --git a/sys/netinet/in_pcb.h b/sys/netinet/in_pcb.h
--- a/sys/netinet/in_pcb.h
+++ b/sys/netinet/in_pcb.h
@@ -221,7 +221,7 @@
#define inp_zero_size (sizeof(struct inpcb) - \
offsetof(struct inpcb, inp_start_zero))
TAILQ_ENTRY(inpcb) inp_hpts; /* pacing out queue next lock(b) */
-
+ uint32_t inp_hpts_gencnt; /* XXXGL */
uint32_t inp_hpts_request; /* Current hpts request, zero if
* fits in the pacing window (i&b). */
/*
@@ -254,7 +254,7 @@
uint8_t inp_numa_domain; /* numa domain */
void *inp_ppcb; /* (i) pointer to per-protocol pcb */
struct socket *inp_socket; /* (i) back pointer to socket */
- uint32_t inp_hptsslot; /* Hpts wheel slot this tcb is Lock(i&b) */
+ int32_t inp_hptsslot; /* Hpts wheel slot this tcb is Lock(i&b) */
uint32_t inp_hpts_drop_reas; /* reason we are dropping the PCB (lock i&b) */
uint32_t inp_dropq_gencnt;
TAILQ_ENTRY(inpcb) inp_dropq; /* hpts drop queue next lock(b) */
diff --git a/sys/netinet/tcp_hpts.h b/sys/netinet/tcp_hpts.h
--- a/sys/netinet/tcp_hpts.h
+++ b/sys/netinet/tcp_hpts.h
@@ -119,13 +119,7 @@
#define HPTS_REMOVE_DROPQ 0x01
#define HPTS_REMOVE_OUTPUT 0x02
#define HPTS_REMOVE_ALL (HPTS_REMOVE_DROPQ | HPTS_REMOVE_OUTPUT)
-
-static inline bool
-tcp_in_hpts(struct inpcb *inp)
-{
-
- return (inp->inp_in_hpts > 0);
-}
+bool tcp_in_hpts(struct inpcb *);
/*
* To insert a TCB on the hpts you *must* be holding the
@@ -151,11 +145,10 @@
* that INP_WLOCK() or from destroying your TCB where again
* you should already have the INP_WLOCK().
*/
-uint32_t __tcp_hpts_insert(struct inpcb *inp, uint32_t slot, int32_t line);
-#define tcp_hpts_insert(a, b) __tcp_hpts_insert(a, b, __LINE__)
-
-uint32_t
-tcp_hpts_insert_diag(struct inpcb *inp, uint32_t slot, int32_t line, struct hpts_diag *diag);
+uint32_t tcp_hpts_insert_diag(struct inpcb *inp, uint32_t slot, int32_t line,
+ struct hpts_diag *diag);
+#define tcp_hpts_insert(inp, slot) \
+ tcp_hpts_insert_diag((inp), (slot), __LINE__, NULL)
void __tcp_set_hpts(struct inpcb *inp, int32_t line);
#define tcp_set_hpts(a) __tcp_set_hpts(a, __LINE__)
@@ -164,6 +157,8 @@
void tcp_run_hpts(void);
+uint16_t hpts_random_cpu(struct inpcb *inp);
+
extern int32_t tcp_min_hptsi_time;
#endif /* _KERNEL */
diff --git a/sys/netinet/tcp_hpts.c b/sys/netinet/tcp_hpts.c
--- a/sys/netinet/tcp_hpts.c
+++ b/sys/netinet/tcp_hpts.c
@@ -200,7 +200,6 @@
#define HPTS_MTX_ASSERT(hpts) mtx_assert(&(hpts)->p_mtx, MA_OWNED)
#define HPTS_LOCK(hpts) mtx_lock(&(hpts)->p_mtx)
#define HPTS_UNLOCK(hpts) mtx_unlock(&(hpts)->p_mtx)
-TAILQ_HEAD(hptsh, inpcb);
struct tcp_hpts_entry {
/* Cache line 0x00 */
struct mtx p_mtx; /* Mutex for hpts */
@@ -223,10 +222,12 @@
p_avail:5;
uint8_t p_fill[3]; /* Fill to 32 bits */
/* Cache line 0x40 */
- void *p_inp;
TAILQ_HEAD(, inpcb) p_dropq; /* Delayed drop queue */
- /* Hptsi wheel */
- struct hptsh *p_hptss;
+ struct hptsh {
+ TAILQ_HEAD(, inpcb) head;
+ uint32_t count;
+ uint32_t gencnt;
+ } *p_hptss; /* Hptsi wheel */
uint32_t p_dropq_cnt; /* Count on drop queue */
uint32_t p_dropq_gencnt;
uint32_t p_hpts_sleep_time; /* Current sleep interval having a max
@@ -249,12 +250,11 @@
struct callout co __aligned(CACHE_LINE_SIZE);
} __aligned(CACHE_LINE_SIZE);
-struct tcp_hptsi {
- struct proc *rp_proc; /* Process structure for hpts */
+static struct tcp_hptsi {
struct tcp_hpts_entry **rp_ent; /* Array of hptss */
uint32_t *cts_last_ran;
uint32_t rp_num_hptss; /* Number of hpts threads */
-};
+} tcp_pace;
MALLOC_DEFINE(M_TCPHPTS, "tcp_hpts", "TCP hpts");
#ifdef RSS
@@ -263,7 +263,6 @@
static int tcp_bind_threads = 2;
#endif
static int tcp_use_irq_cpu = 0;
-static struct tcp_hptsi tcp_pace;
static uint32_t *cts_last_ran;
static int hpts_does_tp_logging = 0;
static int hpts_use_assigned_cpu = 1;
@@ -302,6 +301,12 @@
int cpu[MAXCPU];
} hpts_domains[MAXMEMDOM];
+enum {
+ IHPTS_NONE = 0,
+ IHPTS_ONQUEUE,
+ IHPTS_MOVING,
+};
+
counter_u64_t hpts_hopelessly_behind;
SYSCTL_COUNTER_U64(_net_inet_tcp_hpts_stats, OID_AUTO, hopeless, CTLFLAG_RD,
@@ -521,58 +526,42 @@
swi_sched(hpts->ie_cookie, 0);
}
-static inline void
-hpts_sane_pace_remove(struct tcp_hpts_entry *hpts, struct inpcb *inp, struct hptsh *head, int clear)
+static void
+inp_hpts_insert(struct inpcb *inp, struct tcp_hpts_entry *hpts)
{
- HPTS_MTX_ASSERT(hpts);
- KASSERT(hpts->p_cpu == inp->inp_hpts_cpu, ("%s: hpts:%p inp:%p incorrect CPU", __FUNCTION__, hpts, inp));
- KASSERT(inp->inp_in_hpts != 0, ("%s: hpts:%p inp:%p not on the hpts?", __FUNCTION__, hpts, inp));
- TAILQ_REMOVE(head, inp, inp_hpts);
- hpts->p_on_queue_cnt--;
- KASSERT(hpts->p_on_queue_cnt >= 0,
- ("Hpts goes negative inp:%p hpts:%p",
- inp, hpts));
- if (clear) {
- inp->inp_hpts_request = 0;
- inp->inp_in_hpts = 0;
- }
-}
+ struct hptsh *hptsh;
-static inline void
-hpts_sane_pace_insert(struct tcp_hpts_entry *hpts, struct inpcb *inp, struct hptsh *head, int line, int noref)
-{
+ INP_WLOCK_ASSERT(inp);
HPTS_MTX_ASSERT(hpts);
- KASSERT(hpts->p_cpu == inp->inp_hpts_cpu,
- ("%s: hpts:%p inp:%p incorrect CPU", __FUNCTION__, hpts, inp));
- KASSERT(((noref == 1) && (inp->inp_in_hpts == 1)) ||
- ((noref == 0) && (inp->inp_in_hpts == 0)),
- ("%s: hpts:%p inp:%p already on the hpts?",
- __FUNCTION__, hpts, inp));
- TAILQ_INSERT_TAIL(head, inp, inp_hpts);
- inp->inp_in_hpts = 1;
- hpts->p_on_queue_cnt++;
- if (noref == 0) {
+ MPASS(hpts->p_cpu == inp->inp_hpts_cpu);
+ MPASS(!(inp->inp_flags & (INP_DROPPED|INP_TIMEWAIT)));
+
+ hptsh = &hpts->p_hptss[inp->inp_hptsslot];
+
+ if (inp->inp_in_hpts == IHPTS_NONE) {
+ inp->inp_in_hpts = IHPTS_ONQUEUE;
in_pcbref(inp);
- }
+ } else if (inp->inp_in_hpts == IHPTS_MOVING) {
+ inp->inp_in_hpts = IHPTS_ONQUEUE;
+ } else
+ MPASS(inp->inp_in_hpts == IHPTS_ONQUEUE);
+ inp->inp_hpts_gencnt = hptsh->gencnt;
+
+ TAILQ_INSERT_TAIL(&hptsh->head, inp, inp_hpts);
+ hptsh->count++;
+ hpts->p_on_queue_cnt++;
}
static struct tcp_hpts_entry *
tcp_hpts_lock(struct inpcb *inp)
{
struct tcp_hpts_entry *hpts;
- int32_t hpts_num;
-again:
- hpts_num = inp->inp_hpts_cpu;
- hpts = tcp_pace.rp_ent[hpts_num];
- KASSERT(mtx_owned(&hpts->p_mtx) == 0,
- ("Hpts:%p owns mtx prior-to lock line:%d",
- hpts, __LINE__));
- mtx_lock(&hpts->p_mtx);
- if (hpts_num != inp->inp_hpts_cpu) {
- mtx_unlock(&hpts->p_mtx);
- goto again;
- }
+ INP_LOCK_ASSERT(inp);
+
+ hpts = tcp_pace.rp_ent[inp->inp_hpts_cpu];
+ HPTS_LOCK(hpts);
+
return (hpts);
}
@@ -580,38 +569,23 @@
tcp_dropq_lock(struct inpcb *inp)
{
struct tcp_hpts_entry *hpts;
- int32_t hpts_num;
-again:
- hpts_num = inp->inp_dropq_cpu;
- hpts = tcp_pace.rp_ent[hpts_num];
- KASSERT(mtx_owned(&hpts->p_mtx) == 0,
- ("Hpts:%p owns mtx prior-to lock line:%d",
- hpts, __LINE__));
- mtx_lock(&hpts->p_mtx);
- if (hpts_num != inp->inp_dropq_cpu) {
- mtx_unlock(&hpts->p_mtx);
- goto again;
- }
- return (hpts);
-}
+ INP_LOCK_ASSERT(inp);
-static void
-tcp_remove_hpts_ref(struct inpcb *inp, struct tcp_hpts_entry *hpts, int line)
-{
- int32_t ret;
+ hpts = tcp_pace.rp_ent[inp->inp_dropq_cpu];
+ HPTS_LOCK(hpts);
- ret = in_pcbrele_wlocked(inp);
- KASSERT(ret != 1, ("inpcb:%p release ret 1", inp));
+ return (hpts);
}
static void
-tcp_hpts_remove_locked_output(struct tcp_hpts_entry *hpts, struct inpcb *inp, int32_t flags, int32_t line)
+inp_hpts_release(struct inpcb *inp)
{
- if (inp->inp_in_hpts) {
- hpts_sane_pace_remove(hpts, inp, &hpts->p_hptss[inp->inp_hptsslot], 1);
- tcp_remove_hpts_ref(inp, hpts, line);
- }
+ bool released __diagused;
+
+ inp->inp_in_hpts = IHPTS_NONE;
+ released = in_pcbrele_wlocked(inp);
+ MPASS(released == false);
}
static void
@@ -665,18 +639,62 @@
__tcp_hpts_remove(struct inpcb *inp, int32_t flags, int32_t line)
{
struct tcp_hpts_entry *hpts;
+ struct hptsh *hptsh;
INP_WLOCK_ASSERT(inp);
- if (flags & HPTS_REMOVE_OUTPUT) {
- hpts = tcp_hpts_lock(inp);
- tcp_hpts_remove_locked_output(hpts, inp, flags, line);
- mtx_unlock(&hpts->p_mtx);
- }
+
if (flags & HPTS_REMOVE_DROPQ) {
hpts = tcp_dropq_lock(inp);
tcp_dropq_remove(hpts, inp);
mtx_unlock(&hpts->p_mtx);
}
+
+ MPASS(flags & HPTS_REMOVE_OUTPUT);
+
+ hpts = tcp_hpts_lock(inp);
+ if (inp->inp_in_hpts == IHPTS_ONQUEUE) {
+ hptsh = &hpts->p_hptss[inp->inp_hptsslot];
+ inp->inp_hpts_request = 0;
+ if (__predict_true(inp->inp_hpts_gencnt == hptsh->gencnt)) {
+ TAILQ_REMOVE(&hptsh->head, inp, inp_hpts);
+ MPASS(hptsh->count > 0);
+ hptsh->count--;
+ MPASS(hpts->p_on_queue_cnt > 0);
+ hpts->p_on_queue_cnt--;
+ inp_hpts_release(inp);
+ } else {
+ /*
+ * tcp_hptsi() now owns the TAILQ head of this inp.
+ * Can't TAILQ_REMOVE, just mark it.
+ */
+#ifdef INVARIANTS
+ struct inpcb *tmp;
+
+ TAILQ_FOREACH(tmp, &hptsh->head, inp_hpts)
+ MPASS(tmp != inp);
+#endif
+ inp->inp_in_hpts = IHPTS_MOVING;
+ inp->inp_hptsslot = -1;
+ }
+ } else if (inp->inp_in_hpts == IHPTS_MOVING) {
+ /*
+ * Handle a special race condition:
+ * tcp_hptsi() moves inpcb to detached tailq
+ * tcp_hpts_remove() marks as IHPTS_MOVING, slot = -1
+ * tcp_hpts_insert() sets slot to a meaningful value
+ * tcp_hpts_remove() again (we are here!), then in_pcbdrop()
+ * tcp_hptsi() finds pcb with meaningful slot and INP_DROPPED
+ */
+ inp->inp_hptsslot = -1;
+ }
+ HPTS_UNLOCK(hpts);
+}
+
+bool
+tcp_in_hpts(struct inpcb *inp)
+{
+
+ return (inp->inp_in_hpts == IHPTS_ONQUEUE);
}
static inline int
@@ -841,46 +859,6 @@
}
}
-static int
-tcp_queue_to_hpts_immediate_locked(struct inpcb *inp, struct tcp_hpts_entry *hpts, int32_t line, int32_t noref)
-{
- uint32_t need_wake = 0;
-
- HPTS_MTX_ASSERT(hpts);
- if (inp->inp_in_hpts == 0) {
- /* Ok we need to set it on the hpts in the current slot */
- inp->inp_hpts_request = 0;
- if ((hpts->p_hpts_active == 0) ||
- (hpts->p_wheel_complete)) {
- /*
- * A sleeping hpts we want in next slot to run
- * note that in this state p_prev_slot == p_cur_slot
- */
- inp->inp_hptsslot = hpts_slot(hpts->p_prev_slot, 1);
- if ((hpts->p_on_min_sleep == 0) && (hpts->p_hpts_active == 0))
- need_wake = 1;
- } else if ((void *)inp == hpts->p_inp) {
- /*
- * The hpts system is running and the caller
- * was awoken by the hpts system.
- * We can't allow you to go into the same slot we
- * are in (we don't want a loop :-D).
- */
- inp->inp_hptsslot = hpts->p_nxt_slot;
- } else
- inp->inp_hptsslot = hpts->p_runningslot;
- hpts_sane_pace_insert(hpts, inp, &hpts->p_hptss[inp->inp_hptsslot], line, noref);
- if (need_wake) {
- /*
- * Activate the hpts if it is sleeping and its
- * timeout is not 1.
- */
- hpts->p_direct_wake = 1;
- tcp_wakehpts(hpts);
- }
- }
- return (need_wake);
-}
#ifdef INVARIANTS
static void
@@ -917,17 +895,27 @@
}
#endif
-static void
-tcp_hpts_insert_locked(struct tcp_hpts_entry *hpts, struct inpcb *inp, uint32_t slot, int32_t line,
- struct hpts_diag *diag, struct timeval *tv)
+uint32_t
+tcp_hpts_insert_diag(struct inpcb *inp, uint32_t slot, int32_t line, struct hpts_diag *diag)
{
- uint32_t need_new_to = 0;
- uint32_t wheel_cts;
- int32_t wheel_slot, maxslots, last_slot;
+ struct tcp_hpts_entry *hpts;
+ struct timeval tv;
+ uint32_t slot_on, wheel_cts, last_slot, need_new_to = 0;
+ int32_t wheel_slot, maxslots;
int cpu;
- int8_t need_wakeup = 0;
+ bool need_wakeup = false;
- HPTS_MTX_ASSERT(hpts);
+ INP_WLOCK_ASSERT(inp);
+ MPASS(!tcp_in_hpts(inp));
+ MPASS(!(inp->inp_flags & (INP_DROPPED|INP_TIMEWAIT)));
+
+ /*
+ * We now return the next-slot the hpts will be on, beyond its
+ * current run (if up) or where it was when it stopped if it is
+ * sleeping.
+ */
+ hpts = tcp_hpts_lock(inp);
+ microuptime(&tv);
if (diag) {
memset(diag, 0, sizeof(struct hpts_diag));
diag->p_hpts_active = hpts->p_hpts_active;
@@ -941,14 +929,37 @@
diag->p_on_min_sleep = hpts->p_on_min_sleep;
diag->hpts_sleep_time = hpts->p_hpts_sleep_time;
}
- KASSERT(inp->inp_in_hpts == 0, ("Hpts:%p tp:%p already on hpts and add?", hpts, inp));
if (slot == 0) {
- /* Immediate */
- tcp_queue_to_hpts_immediate_locked(inp, hpts, line, 0);
- return;
+ /* Ok we need to set it on the hpts in the current slot */
+ inp->inp_hpts_request = 0;
+ if ((hpts->p_hpts_active == 0) || (hpts->p_wheel_complete)) {
+ /*
+ * A sleeping hpts we want in next slot to run
+ * note that in this state p_prev_slot == p_cur_slot
+ */
+ inp->inp_hptsslot = hpts_slot(hpts->p_prev_slot, 1);
+ if ((hpts->p_on_min_sleep == 0) &&
+ (hpts->p_hpts_active == 0))
+ need_wakeup = true;
+ } else
+ inp->inp_hptsslot = hpts->p_runningslot;
+ if (__predict_true(inp->inp_in_hpts != IHPTS_MOVING))
+ inp_hpts_insert(inp, hpts);
+ if (need_wakeup) {
+ /*
+ * Activate the hpts if it is sleeping and its
+ * timeout is not 1.
+ */
+ hpts->p_direct_wake = 1;
+ tcp_wakehpts(hpts);
+ }
+ slot_on = hpts->p_nxt_slot;
+ HPTS_UNLOCK(hpts);
+
+ return (slot_on);
}
/* Get the current time relative to the wheel */
- wheel_cts = tcp_tv_to_hptstick(tv);
+ wheel_cts = tcp_tv_to_hptstick(&tv);
/* Map it onto the wheel */
wheel_slot = tick_to_wheel(wheel_cts);
/* Now what's the max we can place it at? */
@@ -988,7 +999,8 @@
#ifdef INVARIANTS
check_if_slot_would_be_wrong(hpts, inp, inp->inp_hptsslot, line);
#endif
- hpts_sane_pace_insert(hpts, inp, &hpts->p_hptss[inp->inp_hptsslot], line, 0);
+ if (__predict_true(inp->inp_in_hpts != IHPTS_MOVING))
+ inp_hpts_insert(inp, hpts);
if ((hpts->p_hpts_active == 0) &&
(inp->inp_hpts_request == 0) &&
(hpts->p_on_min_sleep == 0)) {
@@ -1056,32 +1068,10 @@
diag->co_ret = co_ret;
}
}
-}
-
-uint32_t
-tcp_hpts_insert_diag(struct inpcb *inp, uint32_t slot, int32_t line, struct hpts_diag *diag)
-{
- struct tcp_hpts_entry *hpts;
- uint32_t slot_on;
- struct timeval tv;
-
- /*
- * We now return the next-slot the hpts will be on, beyond its
- * current run (if up) or where it was when it stopped if it is
- * sleeping.
- */
- INP_WLOCK_ASSERT(inp);
- hpts = tcp_hpts_lock(inp);
- microuptime(&tv);
- tcp_hpts_insert_locked(hpts, inp, slot, line, diag, &tv);
slot_on = hpts->p_nxt_slot;
- mtx_unlock(&hpts->p_mtx);
- return (slot_on);
-}
+ HPTS_UNLOCK(hpts);
-uint32_t
-__tcp_hpts_insert(struct inpcb *inp, uint32_t slot, int32_t line){
- return (tcp_hpts_insert_diag(inp, slot, line, NULL));
+ return (slot_on);
}
void
@@ -1110,7 +1100,7 @@
HPTS_UNLOCK(hpts);
}
-static uint16_t
+uint16_t
hpts_random_cpu(struct inpcb *inp){
/*
* No flow type set distribute the load randomly.
@@ -1215,24 +1205,8 @@
}
/*
- * Do NOT try to optimize the processing of inp's
- * by first pulling off all the inp's into a temporary
- * list (e.g. TAILQ_CONCAT). If you do that the subtle
- * interactions of switching CPU's will kill because of
- * problems in the linked list manipulation. Basically
- * you would switch cpu's with the hpts mutex locked
- * but then while you were processing one of the inp's
- * some other one that you switch will get a new
- * packet on the different CPU. It will insert it
- * on the new hpts's input list. Creating a temporary
- * link in the inp will not fix it either, since
- * the other hpts will be doing the same thing and
- * you will both end up using the temporary link.
- *
- * You will die in an ASSERT for tailq corruption if you
- * run INVARIANTS or you will die horribly without
- * INVARIANTS in some unknown way with a corrupt linked
- * list.
+ * Delayed drop functionality is factored out into separate function,
+ * but logic is similar to the logic of tcp_hptsi().
*/
static void
tcp_delayed_drop(struct tcp_hpts_entry *hpts)
@@ -1292,7 +1266,7 @@
* be the sleep time.
*/
for (i = 0, t = hpts_slot(hpts->p_cur_slot, 1); i < NUM_OF_HPTSI_SLOTS; i++) {
- if (TAILQ_EMPTY(&hpts->p_hptss[t]) == 0) {
+ if (TAILQ_EMPTY(&hpts->p_hptss[t].head) == 0) {
fnd = 1;
break;
}
@@ -1310,7 +1284,7 @@
tcp_hptsi(struct tcp_hpts_entry *hpts, int from_callout)
{
struct tcpcb *tp;
- struct inpcb *inp = NULL, *ninp;
+ struct inpcb *inp;
struct timeval tv;
uint64_t total_slots_processed = 0;
int32_t slots_to_run, i, error;
@@ -1322,7 +1296,6 @@
int32_t wrap_loop_cnt = 0;
int32_t slot_pos_of_endpoint = 0;
int32_t orig_exit_slot;
- int16_t set_cpu;
int8_t completed_measure = 0, seen_endpoint = 0;
HPTS_MTX_ASSERT(hpts);
@@ -1386,18 +1359,29 @@
* run them, the extra 10usecs of late (by being
* put behind) does not really matter in this situation.
*/
-#ifdef INVARIANTS
- /*
- * To prevent a panic we need to update the inpslot to the
- * new location. This is safe since it takes both the
- * INP lock and the pacer mutex to change the inp_hptsslot.
- */
- TAILQ_FOREACH(inp, &hpts->p_hptss[hpts->p_nxt_slot], inp_hpts) {
+ TAILQ_FOREACH(inp, &hpts->p_hptss[hpts->p_nxt_slot].head,
+ inp_hpts) {
+ MPASS(inp->inp_hptsslot == hpts->p_nxt_slot);
+ MPASS(inp->inp_hpts_gencnt ==
+ hpts->p_hptss[hpts->p_nxt_slot].gencnt);
+ MPASS(inp->inp_in_hpts == IHPTS_ONQUEUE);
+
+ /*
+ * Update gencnt and nextslot accordingly to match
+ * the new location. This is safe since it takes both
+ * the INP lock and the pacer mutex to change the
+ * inp_hptsslot and inp_hpts_gencnt.
+ */
+ inp->inp_hpts_gencnt =
+ hpts->p_hptss[hpts->p_runningslot].gencnt;
inp->inp_hptsslot = hpts->p_runningslot;
}
-#endif
- TAILQ_CONCAT(&hpts->p_hptss[hpts->p_runningslot],
- &hpts->p_hptss[hpts->p_nxt_slot], inp_hpts);
+ TAILQ_CONCAT(&hpts->p_hptss[hpts->p_runningslot].head,
+ &hpts->p_hptss[hpts->p_nxt_slot].head, inp_hpts);
+ hpts->p_hptss[hpts->p_runningslot].count +=
+ hpts->p_hptss[hpts->p_nxt_slot].count;
+ hpts->p_hptss[hpts->p_nxt_slot].count = 0;
+ hpts->p_hptss[hpts->p_nxt_slot].gencnt++;
slots_to_run = NUM_OF_HPTSI_SLOTS - 1;
counter_u64_add(wheel_wrap, 1);
} else {
@@ -1412,46 +1396,79 @@
((TAILQ_EMPTY(&hpts->p_dropq) == 0) && (hpts->p_dropq_cnt > 0))),
("%s hpts:%p in_hpts cnt:%d and queue state mismatch",
__FUNCTION__, hpts, hpts->p_dropq_cnt));
- HPTS_MTX_ASSERT(hpts);
if (hpts->p_on_queue_cnt == 0) {
goto no_one;
}
- HPTS_MTX_ASSERT(hpts);
for (i = 0; i < slots_to_run; i++) {
+ struct inpcb *inp, *ninp;
+ TAILQ_HEAD(, inpcb) head = TAILQ_HEAD_INITIALIZER(head);
+ struct hptsh *hptsh;
+ uint32_t runningslot, gencnt;
+
/*
* Calculate our delay, if there are no extra ticks there
* was not any (i.e. if slots_to_run == 1, no delay).
*/
- hpts->p_delayed_by = (slots_to_run - (i + 1)) * HPTS_TICKS_PER_SLOT;
- HPTS_MTX_ASSERT(hpts);
- while ((inp = TAILQ_FIRST(&hpts->p_hptss[hpts->p_runningslot])) != NULL) {
- HPTS_MTX_ASSERT(hpts);
+ hpts->p_delayed_by = (slots_to_run - (i + 1)) *
+ HPTS_TICKS_PER_SLOT;
+
+ runningslot = hpts->p_runningslot;
+ hptsh = &hpts->p_hptss[runningslot];
+ TAILQ_SWAP(&head, &hptsh->head, inpcb, inp_hpts);
+ hpts->p_on_queue_cnt -= hptsh->count;
+ hptsh->count = 0;
+ gencnt = hptsh->gencnt++;
+
+ HPTS_UNLOCK(hpts);
+
+ TAILQ_FOREACH_SAFE(inp, &head, inp_hpts, ninp) {
+ bool set_cpu;
+
+ if (ninp != NULL) {
+ /* We prefetch the next inp if possible */
+ kern_prefetch(ninp, &prefetch_ninp);
+ prefetch_ninp = 1;
+ }
+
/* For debugging */
if (seen_endpoint == 0) {
seen_endpoint = 1;
- orig_exit_slot = slot_pos_of_endpoint = hpts->p_runningslot;
+ orig_exit_slot = slot_pos_of_endpoint =
+ runningslot;
} else if (completed_measure == 0) {
/* Record the new position */
- orig_exit_slot = hpts->p_runningslot;
+ orig_exit_slot = runningslot;
}
total_slots_processed++;
- hpts->p_inp = inp;
paced_cnt++;
- KASSERT(hpts->p_runningslot == inp->inp_hptsslot,
- ("Hpts:%p inp:%p slot mis-aligned %u vs %u",
- hpts, inp, hpts->p_runningslot, inp->inp_hptsslot));
- /* Now pull it */
+
+ INP_WLOCK(inp);
if (inp->inp_hpts_cpu_set == 0) {
- set_cpu = 1;
+ set_cpu = true;
} else {
- set_cpu = 0;
+ set_cpu = false;
}
- hpts_sane_pace_remove(hpts, inp, &hpts->p_hptss[hpts->p_runningslot], 0);
- if ((ninp = TAILQ_FIRST(&hpts->p_hptss[hpts->p_runningslot])) != NULL) {
- /* We prefetch the next inp if possible */
- kern_prefetch(ninp, &prefetch_ninp);
- prefetch_ninp = 1;
+
+ if (__predict_false(inp->inp_in_hpts == IHPTS_MOVING)) {
+ if (inp->inp_hptsslot == -1) {
+ inp->inp_in_hpts = IHPTS_NONE;
+ if (in_pcbrele_wlocked(inp) == false)
+ INP_WUNLOCK(inp);
+ } else {
+ HPTS_LOCK(hpts);
+ inp_hpts_insert(inp, hpts);
+ HPTS_UNLOCK(hpts);
+ INP_WUNLOCK(inp);
+ }
+ continue;
}
+
+ MPASS(inp->inp_in_hpts == IHPTS_ONQUEUE);
+ MPASS(!(inp->inp_flags & (INP_DROPPED|INP_TIMEWAIT)));
+ KASSERT(runningslot == inp->inp_hptsslot,
+ ("Hpts:%p inp:%p slot mis-aligned %u vs %u",
+ hpts, inp, runningslot, inp->inp_hptsslot));
+
if (inp->inp_hpts_request) {
/*
* This guy is deferred out further in time
@@ -1463,54 +1480,37 @@
remaining_slots = slots_to_run - (i + 1);
if (inp->inp_hpts_request > remaining_slots) {
+ HPTS_LOCK(hpts);
/*
* How far out can we go?
*/
- maxslots = max_slots_available(hpts, hpts->p_cur_slot, &last_slot);
+ maxslots = max_slots_available(hpts,
+ hpts->p_cur_slot, &last_slot);
if (maxslots >= inp->inp_hpts_request) {
- /* we can place it finally to be processed */
- inp->inp_hptsslot = hpts_slot(hpts->p_runningslot, inp->inp_hpts_request);
+ /* We can place it finally to
+ * be processed. */
+ inp->inp_hptsslot = hpts_slot(
+ hpts->p_runningslot,
+ inp->inp_hpts_request);
inp->inp_hpts_request = 0;
} else {
/* Work off some more time */
inp->inp_hptsslot = last_slot;
- inp->inp_hpts_request-= maxslots;
+ inp->inp_hpts_request -=
+ maxslots;
}
- hpts_sane_pace_insert(hpts, inp, &hpts->p_hptss[inp->inp_hptsslot], __LINE__, 1);
- hpts->p_inp = NULL;
+ inp_hpts_insert(inp, hpts);
+ HPTS_UNLOCK(hpts);
+ INP_WUNLOCK(inp);
continue;
}
inp->inp_hpts_request = 0;
/* Fall through we will so do it now */
}
- /*
- * We clear the hpts flag here after dealing with
- * remaining slots. This way anyone looking with the
- * TCB lock will see its on the hpts until just
- * before we unlock.
- */
- inp->inp_in_hpts = 0;
- mtx_unlock(&hpts->p_mtx);
- INP_WLOCK(inp);
- if (in_pcbrele_wlocked(inp)) {
- mtx_lock(&hpts->p_mtx);
- hpts->p_inp = NULL;
- continue;
- }
- if ((inp->inp_flags & (INP_TIMEWAIT | INP_DROPPED))) {
- out_now:
- KASSERT(mtx_owned(&hpts->p_mtx) == 0,
- ("Hpts:%p owns mtx prior-to lock line:%d",
- hpts, __LINE__));
- INP_WUNLOCK(inp);
- mtx_lock(&hpts->p_mtx);
- hpts->p_inp = NULL;
- continue;
- }
+
+ inp_hpts_release(inp);
tp = intotcpcb(inp);
- if ((tp == NULL) || (tp->t_inpcb == NULL)) {
- goto out_now;
- }
+ MPASS(tp);
if (set_cpu) {
/*
* Setup so the next time we will move to
@@ -1531,24 +1531,11 @@
*/
tcp_set_hpts(inp);
}
-#ifdef VIMAGE
CURVNET_SET(inp->inp_vnet);
-#endif
/* Lets do any logging that we might want to */
if (hpts_does_tp_logging && (tp->t_logstate != TCP_LOG_STATE_OFF)) {
tcp_hpts_log(hpts, tp, &tv, slots_to_run, i, from_callout);
}
- /*
- * There is a hole here, we get the refcnt on the
- * inp so it will still be preserved but to make
- * sure we can get the INP we need to hold the p_mtx
- * above while we pull out the tp/inp, as long as
- * fini gets the lock first we are assured of having
- * a sane INP we can lock and test.
- */
- KASSERT(mtx_owned(&hpts->p_mtx) == 0,
- ("Hpts:%p owns mtx prior-to tcp_output call line:%d",
- hpts, __LINE__));
if (tp->t_fb_ptr != NULL) {
kern_prefetch(tp->t_fb_ptr, &did_prefetch);
@@ -1601,15 +1588,7 @@
}
INP_WUNLOCK(inp);
skip_pacing:
-#ifdef VIMAGE
CURVNET_RESTORE();
-#endif
- INP_UNLOCK_ASSERT(inp);
- KASSERT(mtx_owned(&hpts->p_mtx) == 0,
- ("Hpts:%p owns mtx prior-to lock line:%d",
- hpts, __LINE__));
- mtx_lock(&hpts->p_mtx);
- hpts->p_inp = NULL;
}
if (seen_endpoint) {
/*
@@ -1621,8 +1600,7 @@
*/
completed_measure = 1;
}
- HPTS_MTX_ASSERT(hpts);
- hpts->p_inp = NULL;
+ HPTS_LOCK(hpts);
hpts->p_runningslot++;
if (hpts->p_runningslot >= NUM_OF_HPTSI_SLOTS) {
hpts->p_runningslot = 0;
@@ -2025,7 +2003,6 @@
uint32_t ncpus = mp_ncpus ? mp_ncpus : MAXCPU;
int count, domain, cpu;
- tcp_pace.rp_proc = NULL;
tcp_pace.rp_num_hptss = ncpus;
hpts_hopelessly_behind = counter_u64_alloc(M_WAITOK);
hpts_loops = counter_u64_alloc(M_WAITOK);
@@ -2060,7 +2037,9 @@
"hpts", MTX_DEF | MTX_DUPOK);
TAILQ_INIT(&hpts->p_dropq);
for (j = 0; j < NUM_OF_HPTSI_SLOTS; j++) {
- TAILQ_INIT(&hpts->p_hptss[j]);
+ TAILQ_INIT(&hpts->p_hptss[j].head);
+ hpts->p_hptss[j].count = 0;
+ hpts->p_hptss[j].gencnt = 0;
}
sysctl_ctx_init(&hpts->hpts_ctx);
sprintf(unit, "%d", i);
diff --git a/sys/netinet/tcp_subr.c b/sys/netinet/tcp_subr.c
--- a/sys/netinet/tcp_subr.c
+++ b/sys/netinet/tcp_subr.c
@@ -2587,6 +2587,9 @@
tcp_fastopen_decrement_counter(tp->t_tfo_pending);
tp->t_tfo_pending = NULL;
}
+#ifdef TCPHPTS
+ tcp_hpts_remove(inp, HPTS_REMOVE_ALL);
+#endif
in_pcbdrop(inp);
TCPSTAT_INC(tcps_closed);
if (tp->t_state != TCPS_CLOSED)
diff --git a/sys/netinet/tcp_timewait.c b/sys/netinet/tcp_timewait.c
--- a/sys/netinet/tcp_timewait.c
+++ b/sys/netinet/tcp_timewait.c
@@ -82,6 +82,7 @@
#include <netinet/tcp_seq.h>
#include <netinet/tcp_timer.h>
#include <netinet/tcp_var.h>
+#include <netinet/tcp_hpts.h>
#ifdef INET6
#include <netinet6/tcp6_var.h>
#endif
@@ -343,6 +344,9 @@
* Note: soisdisconnected() call used to be made in tcp_discardcb(),
* and might not be needed here any longer.
*/
+#ifdef TCPHPTS
+ tcp_hpts_remove(inp, HPTS_REMOVE_ALL);
+#endif
tcp_discardcb(tp);
soisdisconnected(so);
tw->tw_so_options = so->so_options;

File Metadata

Mime Type
text/plain
Expires
Sat, Mar 28, 10:57 AM (19 h, 44 m)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
30469151
Default Alt Text
D33026.id99369.diff (25 KB)

Event Timeline