Page Menu
Home
FreeBSD
Search
Configure Global Search
Log In
Files
F107872092
D35737.diff
No One
Temporary
Actions
View File
Edit File
Delete File
View Transforms
Subscribe
Mute Notifications
Flag For Later
Award Token
Size
12 KB
Referenced Files
None
Subscribers
None
D35737.diff
View Options
diff --git a/sys/kern/sched_ule.c b/sys/kern/sched_ule.c
--- a/sys/kern/sched_ule.c
+++ b/sys/kern/sched_ule.c
@@ -226,9 +226,16 @@
static int __read_mostly sched_idlespinthresh = -1;
/*
- * tdq - per processor runqs and statistics. All fields are protected by the
- * tdq_lock. The load and lowpri may be accessed without to avoid excess
- * locking in sched_pickcpu();
+ * tdq - per processor runqs and statistics. A mutex synchronizes access to
+ * most fields. Some fields are loaded or modified without the mutex.
+ *
+ * Locking protocols:
+ * (c) constant after initialization
+ * (f) flag, set with the tdq lock held, cleared on local CPU
+ * (l) all accesses are CPU-local
+ * (ls) stores are performed by the local CPU, loads may be lockless
+ * (t) all accesses are protected by the tdq mutex
+ * (ts) stores are serialized by the tdq mutex, loads may be lockless
*/
struct tdq {
/*
@@ -236,33 +243,41 @@
* tdq_lock is padded to avoid false sharing with tdq_load and
* tdq_cpu_idle.
*/
- struct mtx_padalign tdq_lock; /* run queue lock. */
- struct cpu_group *tdq_cg; /* Pointer to cpu topology. */
- struct thread *tdq_curthread; /* Current executing thread. */
- volatile int tdq_load; /* Aggregate load. */
- volatile int tdq_cpu_idle; /* cpu_idle() is active. */
- int tdq_sysload; /* For loadavg, !ITHD load. */
- volatile int tdq_transferable; /* Transferable thread count. */
- volatile short tdq_switchcnt; /* Switches this tick. */
- volatile short tdq_oldswitchcnt; /* Switches last tick. */
- u_char tdq_lowpri; /* Lowest priority thread. */
- u_char tdq_owepreempt; /* Remote preemption pending. */
- u_char tdq_idx; /* Current insert index. */
- u_char tdq_ridx; /* Current removal index. */
- int tdq_id; /* cpuid. */
- struct runq tdq_realtime; /* real-time run queue. */
- struct runq tdq_timeshare; /* timeshare run queue. */
- struct runq tdq_idle; /* Queue of IDLE threads. */
+ struct mtx_padalign tdq_lock; /* run queue lock. */
+ struct cpu_group *tdq_cg; /* (c) Pointer to cpu topology. */
+ struct thread *tdq_curthread; /* (t) Current executing thread. */
+ int tdq_load; /* (ts) Aggregate load. */
+ int tdq_sysload; /* (ts) For loadavg, !ITHD load. */
+ int tdq_cpu_idle; /* (ls) cpu_idle() is active. */
+ int tdq_transferable; /* (ts) Transferable thread count. */
+ short tdq_switchcnt; /* (l) Switches this tick. */
+ short tdq_oldswitchcnt; /* (l) Switches last tick. */
+ u_char tdq_lowpri; /* (ts) Lowest priority thread. */
+ u_char tdq_owepreempt; /* (f) Remote preemption pending. */
+ u_char tdq_idx; /* (t) Current insert index. */
+ u_char tdq_ridx; /* (t) Current removal index. */
+ int tdq_id; /* (c) cpuid. */
+ struct runq tdq_realtime; /* (t) real-time run queue. */
+ struct runq tdq_timeshare; /* (t) timeshare run queue. */
+ struct runq tdq_idle; /* (t) Queue of IDLE threads. */
char tdq_name[TDQ_NAME_LEN];
#ifdef KTR
char tdq_loadname[TDQ_LOADNAME_LEN];
#endif
-} __aligned(64);
+};
/* Idle thread states and config. */
#define TDQ_RUNNING 1
#define TDQ_IDLE 2
+/* Lockless accessors. */
+#define TDQ_LOAD(tdq) atomic_load_int(&(tdq)->tdq_load)
+#define TDQ_TRANSFERABLE(tdq) atomic_load_int(&(tdq)->tdq_transferable)
+#define TDQ_SWITCHCNT(tdq) (atomic_load_short(&(tdq)->tdq_switchcnt) + \
+ atomic_load_short(&(tdq)->tdq_oldswitchcnt))
+#define TDQ_SWITCHCNT_INC(tdq) (atomic_store_short(&(tdq)->tdq_switchcnt, \
+ atomic_load_short(&(tdq)->tdq_switchcnt) + 1))
+
#ifdef SMP
struct cpu_group __read_mostly *cpu_top; /* CPU topology */
@@ -323,7 +338,7 @@
static __inline void tdq_runq_add(struct tdq *, struct thread *, int);
static __inline void tdq_runq_rem(struct tdq *, struct thread *);
static inline int sched_shouldpreempt(int, int, int);
-void tdq_print(int cpu);
+static void tdq_print(int cpu);
static void runq_print(struct runq *rq);
static int tdq_add(struct tdq *, struct thread *, int);
#ifdef SMP
@@ -398,7 +413,7 @@
/*
* Print the status of a per-cpu thread queue. Should be a ddb show cmd.
*/
-void
+static void __unused
tdq_print(int cpu)
{
struct tdq *tdq;
@@ -608,7 +623,7 @@
TDQ_LOCK_ASSERT(tdq, MA_OWNED);
if (ctd == NULL)
- ctd = atomic_load_ptr(&tdq->tdq_curthread);
+ ctd = tdq->tdq_curthread;
td = tdq_choose(tdq);
if (td == NULL || td->td_priority > ctd->td_priority)
tdq->tdq_lowpri = ctd->td_priority;
@@ -699,7 +714,7 @@
if (!CPU_ISSET(c, &cg->cg_mask))
continue;
tdq = TDQ_CPU(c);
- l = tdq->tdq_load;
+ l = TDQ_LOAD(tdq);
if (c == s->cs_prefer) {
if (__predict_false(s->cs_running))
l--;
@@ -714,7 +729,8 @@
* If the threads is already on the CPU, don't look on the TDQ
* priority, since it can be the priority of the thread itself.
*/
- if (l > s->cs_load || (tdq->tdq_lowpri <= s->cs_pri &&
+ if (l > s->cs_load ||
+ (atomic_load_char(&tdq->tdq_lowpri) <= s->cs_pri &&
(!s->cs_running || c != s->cs_prefer)) ||
!CPU_ISSET(c, s->cs_mask))
continue;
@@ -769,14 +785,14 @@
if (!CPU_ISSET(c, &cg->cg_mask))
continue;
tdq = TDQ_CPU(c);
- l = tdq->tdq_load;
+ l = TDQ_LOAD(tdq);
load = l * 256;
total += load;
/*
* Check this CPU is acceptable.
*/
- if (l < s->cs_load || (tdq->tdq_transferable < s->cs_trans) ||
+ if (l < s->cs_load || TDQ_TRANSFERABLE(tdq) < s->cs_trans ||
!CPU_ISSET(c, s->cs_mask))
continue;
@@ -848,13 +864,13 @@
if (CPU_EMPTY(&lmask))
break;
tdq = TDQ_CPU(high);
- if (tdq->tdq_load == 1) {
+ if (TDQ_LOAD(tdq) == 1) {
/*
* There is only one running thread. We can't move
* it from here, so tell it to pick new CPU by itself.
*/
TDQ_LOCK(tdq);
- td = atomic_load_ptr(&tdq->tdq_curthread);
+ td = tdq->tdq_curthread;
if ((td->td_flags & TDF_IDLETD) == 0 &&
THREAD_CAN_MIGRATE(td)) {
td->td_flags |= TDF_NEEDRESCHED | TDF_PICKCPU;
@@ -866,9 +882,9 @@
}
anylow = 1;
nextlow:
- if (tdq->tdq_transferable == 0)
+ if (TDQ_TRANSFERABLE(tdq) == 0)
continue;
- low = sched_lowest(cg, &lmask, -1, tdq->tdq_load - 1, high, 1);
+ low = sched_lowest(cg, &lmask, -1, TDQ_LOAD(tdq) - 1, high, 1);
/* Stop if we looked well and found no less loaded CPU. */
if (anylow && low == -1)
break;
@@ -1015,15 +1031,15 @@
return (1);
CPU_FILL(&mask);
CPU_CLR(PCPU_GET(cpuid), &mask);
- restart:
- switchcnt = tdq->tdq_switchcnt + tdq->tdq_oldswitchcnt;
+restart:
+ switchcnt = TDQ_SWITCHCNT(tdq);
for (cg = tdq->tdq_cg, goup = 0; ; ) {
cpu = sched_highest(cg, &mask, steal_thresh, 1);
/*
* We were assigned a thread but not preempted. Returning
* 0 here will cause our caller to switch to it.
*/
- if (tdq->tdq_load)
+ if (TDQ_LOAD(tdq))
return (0);
/*
@@ -1059,8 +1075,8 @@
* this situation about 20% of the time on an 8 core
* 16 thread Ryzen 7, but it still helps performance.
*/
- if (steal->tdq_load < steal_thresh ||
- steal->tdq_transferable == 0)
+ if (TDQ_LOAD(steal) < steal_thresh ||
+ TDQ_TRANSFERABLE(steal) == 0)
goto restart;
/*
* Try to lock both queues. If we are assigned a thread while
@@ -1085,9 +1101,9 @@
* of date. The latter is rare. In either case restart
* the search.
*/
- if (steal->tdq_load < steal_thresh ||
- steal->tdq_transferable == 0 ||
- switchcnt != tdq->tdq_switchcnt + tdq->tdq_oldswitchcnt) {
+ if (TDQ_LOAD(steal) < steal_thresh ||
+ TDQ_TRANSFERABLE(steal) == 0 ||
+ switchcnt != TDQ_SWITCHCNT(tdq)) {
tdq_unlock_pair(tdq, steal);
goto restart;
}
@@ -1151,7 +1167,7 @@
*/
cpu = TDQ_ID(tdq);
if (TD_IS_IDLETHREAD(tdq->tdq_curthread) &&
- (tdq->tdq_cpu_idle == 0 || cpu_idle_wakeup(cpu)))
+ (atomic_load_int(&tdq->tdq_cpu_idle) == 0 || cpu_idle_wakeup(cpu)))
return;
/*
@@ -1344,13 +1360,15 @@
* expired and it is idle, run it there.
*/
if (THREAD_CAN_SCHED(td, ts->ts_cpu) &&
- tdq->tdq_lowpri >= PRI_MIN_IDLE &&
+ atomic_load_int(&tdq->tdq_lowpri) >= PRI_MIN_IDLE &&
SCHED_AFFINITY(ts, CG_SHARE_L2)) {
if (cg->cg_flags & CG_FLAG_THREAD) {
/* Check all SMT threads for being idle. */
for (cpu = cg->cg_first; cpu <= cg->cg_last; cpu++) {
+ pri =
+ atomic_load_char(&TDQ_CPU(cpu)->tdq_lowpri);
if (CPU_ISSET(cpu, &cg->cg_mask) &&
- TDQ_CPU(cpu)->tdq_lowpri < PRI_MIN_IDLE)
+ pri < PRI_MIN_IDLE)
break;
}
if (cpu > cg->cg_last) {
@@ -1421,8 +1439,8 @@
*/
tdq = TDQ_CPU(cpu);
if (THREAD_CAN_SCHED(td, self) && TDQ_SELF()->tdq_lowpri > pri &&
- tdq->tdq_lowpri < PRI_MIN_IDLE &&
- TDQ_SELF()->tdq_load <= tdq->tdq_load + 1) {
+ atomic_load_char(&tdq->tdq_lowpri) < PRI_MIN_IDLE &&
+ TDQ_LOAD(TDQ_SELF()) <= TDQ_LOAD(tdq) + 1) {
SCHED_STAT_INC(pickcpu_local);
cpu = self;
}
@@ -2018,7 +2036,7 @@
* If a thread was added while interrupts were disabled don't
* steal one here.
*/
- if (tdq->tdq_load > 0) {
+ if (TDQ_LOAD(tdq) > 0) {
TDQ_LOCK(tdq);
break;
}
@@ -2060,8 +2078,8 @@
* At this point unconditionally exit the loop to bound
* the time spent in the critcal section.
*/
- if (steal->tdq_load < steal_thresh ||
- steal->tdq_transferable == 0)
+ if (TDQ_LOAD(steal) < steal_thresh ||
+ TDQ_TRANSFERABLE(steal) == 0)
continue;
/*
* Try to lock both queues. If we are assigned a thread while
@@ -2078,8 +2096,8 @@
* The data returned by sched_highest() is stale and
* the chosen CPU no longer has an eligible thread.
*/
- if (steal->tdq_load < steal_thresh ||
- steal->tdq_transferable == 0) {
+ if (TDQ_LOAD(steal) < steal_thresh ||
+ TDQ_TRANSFERABLE(steal) == 0) {
TDQ_UNLOCK(steal);
break;
}
@@ -2180,9 +2198,9 @@
(flags & SW_PREEMPT) != 0;
td->td_flags &= ~(TDF_NEEDRESCHED | TDF_PICKCPU | TDF_SLICEEND);
td->td_owepreempt = 0;
- tdq->tdq_owepreempt = 0;
+ atomic_store_char(&tdq->tdq_owepreempt, 0);
if (!TD_IS_IDLETHREAD(td))
- tdq->tdq_switchcnt++;
+ TDQ_SWITCHCNT_INC(tdq);
/*
* Always block the thread lock so we can drop the tdq lock early.
@@ -2542,6 +2560,7 @@
*/
tdq->tdq_oldswitchcnt = tdq->tdq_switchcnt;
tdq->tdq_switchcnt = tdq->tdq_load;
+
/*
* Advance the insert index once for each tick to ensure that all
* threads get a chance to run.
@@ -2598,10 +2617,10 @@
tdq = TDQ_SELF();
if ((curthread->td_flags & TDF_IDLETD) != 0) {
- if (tdq->tdq_load > 0)
+ if (TDQ_LOAD(tdq) > 0)
goto out;
} else
- if (tdq->tdq_load - 1 > 0)
+ if (TDQ_LOAD(tdq) - 1 > 0)
goto out;
load = 0;
out:
@@ -2896,10 +2915,10 @@
total = 0;
CPU_FOREACH(i)
- total += TDQ_CPU(i)->tdq_sysload;
+ total += atomic_load_int(&TDQ_CPU(i)->tdq_sysload);
return (total);
#else
- return (TDQ_SELF()->tdq_sysload);
+ return (atomic_load_int(&TDQ_SELF()->tdq_sysload));
#endif
}
@@ -2939,18 +2958,18 @@
THREAD_NO_SLEEPING();
oldswitchcnt = -1;
for (;;) {
- if (tdq->tdq_load) {
+ if (TDQ_LOAD(tdq)) {
thread_lock(td);
mi_switch(SW_VOL | SWT_IDLE);
}
- switchcnt = tdq->tdq_switchcnt + tdq->tdq_oldswitchcnt;
+ switchcnt = TDQ_SWITCHCNT(tdq);
#ifdef SMP
if (always_steal || switchcnt != oldswitchcnt) {
oldswitchcnt = switchcnt;
if (tdq_idled(tdq) == 0)
continue;
}
- switchcnt = tdq->tdq_switchcnt + tdq->tdq_oldswitchcnt;
+ switchcnt = TDQ_SWITCHCNT(tdq);
#else
oldswitchcnt = switchcnt;
#endif
@@ -2963,19 +2982,19 @@
*/
if (TDQ_IDLESPIN(tdq) && switchcnt > sched_idlespinthresh) {
for (i = 0; i < sched_idlespins; i++) {
- if (tdq->tdq_load)
+ if (TDQ_LOAD(tdq))
break;
cpu_spinwait();
}
}
/* If there was context switch during spin, restart it. */
- switchcnt = tdq->tdq_switchcnt + tdq->tdq_oldswitchcnt;
- if (tdq->tdq_load != 0 || switchcnt != oldswitchcnt)
+ switchcnt = TDQ_SWITCHCNT(tdq);
+ if (TDQ_LOAD(tdq) != 0 || switchcnt != oldswitchcnt)
continue;
/* Run main MD idle handler. */
- tdq->tdq_cpu_idle = 1;
+ atomic_store_int(&tdq->tdq_cpu_idle, 1);
/*
* Make sure that the tdq_cpu_idle update is globally visible
* before cpu_idle() reads tdq_load. The order is important
@@ -2987,21 +3006,21 @@
* threads often enough to make it worthwhile to do so in
* order to avoid calling cpu_idle().
*/
- if (tdq->tdq_load != 0) {
- tdq->tdq_cpu_idle = 0;
+ if (TDQ_LOAD(tdq) != 0) {
+ atomic_store_int(&tdq->tdq_cpu_idle, 0);
continue;
}
cpu_idle(switchcnt * 4 > sched_idlespinthresh);
- tdq->tdq_cpu_idle = 0;
+ atomic_store_int(&tdq->tdq_cpu_idle, 0);
/*
* Account thread-less hardware interrupts and
* other wakeup reasons equal to context switches.
*/
- switchcnt = tdq->tdq_switchcnt + tdq->tdq_oldswitchcnt;
+ switchcnt = TDQ_SWITCHCNT(tdq);
if (switchcnt != oldswitchcnt)
continue;
- tdq->tdq_switchcnt++;
+ TDQ_SWITCHCNT_INC(tdq);
oldswitchcnt++;
}
}
File Metadata
Details
Attached
Mime Type
text/plain
Expires
Sun, Jan 19, 9:40 PM (20 h, 1 m)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
15961066
Default Alt Text
D35737.diff (12 KB)
Attached To
Mode
D35737: ule: Use explicit atomic accesses for tdq fields
Attached
Detach File
Event Timeline
Log In to Comment