Page Menu
Home
FreeBSD
Search
Configure Global Search
Log In
Files
F142245711
D12130.diff
No One
Temporary
Actions
View File
Edit File
Delete File
View Transforms
Subscribe
Mute Notifications
Flag For Later
Award Token
Size
9 KB
Referenced Files
None
Subscribers
None
D12130.diff
View Options
Index: head/sys/kern/sched_ule.c
===================================================================
--- head/sys/kern/sched_ule.c
+++ head/sys/kern/sched_ule.c
@@ -240,9 +240,9 @@
volatile int tdq_load; /* Aggregate load. */
volatile int tdq_cpu_idle; /* cpu_idle() is active. */
int tdq_sysload; /* For loadavg, !ITHD load. */
- int tdq_transferable; /* Transferable thread count. */
- short tdq_switchcnt; /* Switches this tick. */
- short tdq_oldswitchcnt; /* Switches last tick. */
+ volatile int tdq_transferable; /* Transferable thread count. */
+ volatile short tdq_switchcnt; /* Switches this tick. */
+ volatile short tdq_oldswitchcnt; /* Switches last tick. */
u_char tdq_lowpri; /* Lowest priority thread. */
u_char tdq_ipipending; /* IPI pending. */
u_char tdq_idx; /* Current insert index. */
@@ -274,6 +274,8 @@
static int affinity;
static int steal_idle = 1;
static int steal_thresh = 2;
+static int always_steal = 0;
+static int trysteal_limit = 2;
/*
* One thread queue per processor.
@@ -319,7 +321,7 @@
static void runq_print(struct runq *rq);
static void tdq_add(struct tdq *, struct thread *, int);
#ifdef SMP
-static int tdq_move(struct tdq *, struct tdq *);
+static struct thread *tdq_move(struct tdq *, struct tdq *);
static int tdq_idled(struct tdq *);
static void tdq_notify(struct tdq *, struct thread *);
static struct thread *tdq_steal(struct tdq *, int);
@@ -841,7 +843,7 @@
CPU_FILL(&hmask);
for (;;) {
- high = sched_highest(cg, hmask, 1);
+ high = sched_highest(cg, hmask, 2);
/* Stop if there is no more CPU with transferrable threads. */
if (high == -1)
break;
@@ -924,33 +926,32 @@
static int
sched_balance_pair(struct tdq *high, struct tdq *low)
{
- int moved;
+ struct thread *td;
int cpu;
tdq_lock_pair(high, low);
- moved = 0;
+ td = NULL;
/*
- * Determine what the imbalance is and then adjust that to how many
- * threads we actually have to give up (transferable).
+ * Transfer a thread from high to low.
*/
if (high->tdq_transferable != 0 && high->tdq_load > low->tdq_load &&
- (moved = tdq_move(high, low)) > 0) {
+ (td = tdq_move(high, low)) != NULL) {
/*
- * In case the target isn't the current cpu IPI it to force a
- * reschedule with the new workload.
+ * In case the target isn't the current cpu notify it of the
+ * new load, possibly sending an IPI to force it to reschedule.
*/
cpu = TDQ_ID(low);
if (cpu != PCPU_GET(cpuid))
- ipi_cpu(cpu, IPI_PREEMPT);
+ tdq_notify(low, td);
}
tdq_unlock_pair(high, low);
- return (moved);
+ return (td != NULL);
}
/*
* Move a thread from one thread queue to another.
*/
-static int
+static struct thread *
tdq_move(struct tdq *from, struct tdq *to)
{
struct td_sched *ts;
@@ -965,7 +966,7 @@
cpu = TDQ_ID(to);
td = tdq_steal(tdq, cpu);
if (td == NULL)
- return (0);
+ return (NULL);
ts = td_get_sched(td);
/*
* Although the run queue is locked the thread may be blocked. Lock
@@ -978,7 +979,7 @@
ts->ts_cpu = cpu;
td->td_lock = TDQ_LOCKPTR(to);
tdq_add(to, td, SRQ_YIELDING);
- return (1);
+ return (td);
}
/*
@@ -991,51 +992,80 @@
struct cpu_group *cg;
struct tdq *steal;
cpuset_t mask;
- int thresh;
- int cpu;
+ int cpu, switchcnt;
- if (smp_started == 0 || steal_idle == 0)
+ if (smp_started == 0 || steal_idle == 0 || tdq->tdq_cg == NULL)
return (1);
CPU_FILL(&mask);
CPU_CLR(PCPU_GET(cpuid), &mask);
- /* We don't want to be preempted while we're iterating. */
- spinlock_enter();
- for (cg = tdq->tdq_cg; cg != NULL; ) {
- if ((cg->cg_flags & CG_FLAG_THREAD) == 0)
- thresh = steal_thresh;
- else
- thresh = 1;
- cpu = sched_highest(cg, mask, thresh);
+ restart:
+ switchcnt = tdq->tdq_switchcnt + tdq->tdq_oldswitchcnt;
+ for (cg = tdq->tdq_cg; ; ) {
+ cpu = sched_highest(cg, mask, steal_thresh);
+ /*
+ * We were assigned a thread but not preempted. Returning
+ * 0 here will cause our caller to switch to it.
+ */
+ if (tdq->tdq_load)
+ return (0);
if (cpu == -1) {
cg = cg->cg_parent;
+ if (cg == NULL)
+ return (1);
continue;
}
steal = TDQ_CPU(cpu);
- CPU_CLR(cpu, &mask);
+ /*
+ * The data returned by sched_highest() is stale and
+ * the chosen CPU no longer has an eligible thread.
+ *
+ * Testing this ahead of tdq_lock_pair() only catches
+ * this situation about 20% of the time on an 8 core
+ * 16 thread Ryzen 7, but it still helps performance.
+ */
+ if (steal->tdq_load < steal_thresh ||
+ steal->tdq_transferable == 0)
+ goto restart;
tdq_lock_pair(tdq, steal);
- if (steal->tdq_load < thresh || steal->tdq_transferable == 0) {
- tdq_unlock_pair(tdq, steal);
- continue;
- }
/*
- * If a thread was added while interrupts were disabled don't
- * steal one here. If we fail to acquire one due to affinity
- * restrictions loop again with this cpu removed from the
- * set.
+ * We were assigned a thread while waiting for the locks.
+ * Switch to it now instead of stealing a thread.
*/
- if (tdq->tdq_load == 0 && tdq_move(steal, tdq) == 0) {
+ if (tdq->tdq_load)
+ break;
+ /*
+ * The data returned by sched_highest() is stale and
+ * the chosen CPU no longer has an eligible thread, or
+ * we were preempted and the CPU loading info may be out
+ * of date. The latter is rare. In either case restart
+ * the search.
+ */
+ if (steal->tdq_load < steal_thresh ||
+ steal->tdq_transferable == 0 ||
+ switchcnt != tdq->tdq_switchcnt + tdq->tdq_oldswitchcnt) {
tdq_unlock_pair(tdq, steal);
- continue;
+ goto restart;
}
- spinlock_exit();
- TDQ_UNLOCK(steal);
- mi_switch(SW_VOL | SWT_IDLE, NULL);
- thread_unlock(curthread);
-
- return (0);
+ /*
+ * Steal the thread and switch to it.
+ */
+ if (tdq_move(steal, tdq) != NULL)
+ break;
+ /*
+ * We failed to acquire a thread even though it looked
+ * like one was available. This could be due to affinity
+ * restrictions or for other reasons. Loop again after
+ * removing this CPU from the set. The restart logic
+ * above does not restore this CPU to the set due to the
+ * likelyhood of failing here again.
+ */
+ CPU_CLR(cpu, &mask);
+ tdq_unlock_pair(tdq, steal);
}
- spinlock_exit();
- return (1);
+ TDQ_UNLOCK(steal);
+ mi_switch(SW_VOL | SWT_IDLE, NULL);
+ thread_unlock(curthread);
+ return (0);
}
/*
@@ -1835,6 +1865,88 @@
}
/*
+ * This tdq is about to idle. Try to steal a thread from another CPU before
+ * choosing the idle thread.
+ */
+static void
+tdq_trysteal(struct tdq *tdq)
+{
+ struct cpu_group *cg;
+ struct tdq *steal;
+ cpuset_t mask;
+ int cpu, i;
+
+ if (smp_started == 0 || trysteal_limit == 0 || tdq->tdq_cg == NULL)
+ return;
+ CPU_FILL(&mask);
+ CPU_CLR(PCPU_GET(cpuid), &mask);
+ /* We don't want to be preempted while we're iterating. */
+ spinlock_enter();
+ TDQ_UNLOCK(tdq);
+ for (i = 1, cg = tdq->tdq_cg; ; ) {
+ cpu = sched_highest(cg, mask, steal_thresh);
+ /*
+ * If a thread was added while interrupts were disabled don't
+ * steal one here.
+ */
+ if (tdq->tdq_load > 0) {
+ TDQ_LOCK(tdq);
+ break;
+ }
+ if (cpu == -1) {
+ i++;
+ cg = cg->cg_parent;
+ if (cg == NULL || i > trysteal_limit) {
+ TDQ_LOCK(tdq);
+ break;
+ }
+ continue;
+ }
+ steal = TDQ_CPU(cpu);
+ /*
+ * The data returned by sched_highest() is stale and
+ * the chosen CPU no longer has an eligible thread.
+ */
+ if (steal->tdq_load < steal_thresh ||
+ steal->tdq_transferable == 0)
+ continue;
+ tdq_lock_pair(tdq, steal);
+ /*
+ * If we get to this point, unconditonally exit the loop
+ * to bound the time spent in the critcal section.
+ *
+ * If a thread was added while interrupts were disabled don't
+ * steal one here.
+ */
+ if (tdq->tdq_load > 0) {
+ TDQ_UNLOCK(steal);
+ break;
+ }
+ /*
+ * The data returned by sched_highest() is stale and
+ * the chosen CPU no longer has an eligible thread.
+ */
+ if (steal->tdq_load < steal_thresh ||
+ steal->tdq_transferable == 0) {
+ TDQ_UNLOCK(steal);
+ break;
+ }
+ /*
+ * If we fail to acquire one due to affinity restrictions,
+ * bail out and let the idle thread to a more complete search
+ * outside of a critical section.
+ */
+ if (tdq_move(steal, tdq) == NULL) {
+ TDQ_UNLOCK(steal);
+ break;
+ }
+ TDQ_UNLOCK(steal);
+ break;
+ }
+ spinlock_exit();
+}
+
+/*
* Handle migration from sched_switch(). This happens only for
* cpu binding.
*/
@@ -1946,6 +2058,8 @@
TDQ_LOCK(tdq);
mtx = thread_lock_block(td);
tdq_load_rem(tdq, td);
+ if (tdq->tdq_load == 0)
+ tdq_trysteal(tdq);
}
#if (KTR_COMPILE & KTR_SCHED) != 0
@@ -2677,7 +2791,7 @@
}
switchcnt = tdq->tdq_switchcnt + tdq->tdq_oldswitchcnt;
#ifdef SMP
- if (switchcnt != oldswitchcnt) {
+ if (always_steal || switchcnt != oldswitchcnt) {
oldswitchcnt = switchcnt;
if (tdq_idled(tdq) == 0)
continue;
@@ -2714,6 +2828,15 @@
* to avoid race with tdq_notify.
*/
atomic_thread_fence_seq_cst();
+ /*
+ * Checking for again after the fence picks up assigned
+ * threads often enough to make it worthwhile to do so in
+ * order to avoid calling cpu_idle().
+ */
+ if (tdq->tdq_load != 0) {
+ tdq->tdq_cpu_idle = 0;
+ continue;
+ }
cpu_idle(switchcnt * 4 > sched_idlespinthresh);
tdq->tdq_cpu_idle = 0;
@@ -2948,6 +3071,10 @@
"Attempts to steal work from other cores before idling");
SYSCTL_INT(_kern_sched, OID_AUTO, steal_thresh, CTLFLAG_RW, &steal_thresh, 0,
"Minimum load on remote CPU before we'll steal");
+SYSCTL_INT(_kern_sched, OID_AUTO, trysteal_limit, CTLFLAG_RW, &trysteal_limit,
+ 0, "Topological distance limit for stealing threads in sched_switch()");
+SYSCTL_INT(_kern_sched, OID_AUTO, always_steal, CTLFLAG_RW, &always_steal, 0,
+ "Always run the stealer from the idle thread");
SYSCTL_PROC(_kern_sched, OID_AUTO, topology_spec, CTLTYPE_STRING |
CTLFLAG_MPSAFE | CTLFLAG_RD, NULL, 0, sysctl_kern_sched_topology_spec, "A",
"XML dump of detected CPU topology");
File Metadata
Details
Attached
Mime Type
text/plain
Expires
Sun, Jan 18, 6:26 PM (17 h, 42 m)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
27701847
Default Alt Text
D12130.diff (9 KB)
Attached To
Mode
D12130: Greatly reduce interrupt latency caused by steal_idle
Attached
Detach File
Event Timeline
Log In to Comment