Page Menu
Home
FreeBSD
Search
Configure Global Search
Log In
Files
F106135277
D24622.diff
No One
Temporary
Actions
View File
Edit File
Delete File
View Transforms
Subscribe
Mute Notifications
Flag For Later
Award Token
Size
15 KB
Referenced Files
None
Subscribers
None
D24622.diff
View Options
Index: sys/kern/subr_epoch.c
===================================================================
--- sys/kern/subr_epoch.c
+++ sys/kern/subr_epoch.c
@@ -66,16 +66,18 @@
#define EPOCH_ALIGN CACHE_LINE_SIZE
#endif
-TAILQ_HEAD (epoch_tdlist, epoch_tracker);
+TAILQ_HEAD(epoch_tdlist, epoch_tracker);
typedef struct epoch_record {
ck_epoch_record_t er_record;
- struct epoch_context er_drain_ctx;
struct epoch *er_parent;
- volatile struct epoch_tdlist er_tdlist;
- volatile uint32_t er_gen;
+ struct epoch_tdlist er_tdlist;
+ struct thread *er_firsttd;
+ struct thread *er_blockedtd;
+ struct mtx er_lock;
+ struct lock_object er_lo;
uint32_t er_cpuid;
int er_drain_state;
-} __aligned(EPOCH_ALIGN) *epoch_record_t;
+} __aligned(EPOCH_ALIGN) *epoch_record_t;
#define EPOCH_DRAIN_START 2
#define EPOCH_DRAIN_RUNNING 1
@@ -91,8 +93,6 @@
const char *e_name;
};
-/* arbitrary --- needs benchmarking */
-#define MAX_ADAPTIVE_SPIN 100
#define MAX_EPOCHS 64
CTASSERT(sizeof(ck_epoch_entry_t) == sizeof(struct epoch_context));
@@ -101,33 +101,22 @@
SYSCTL_NODE(_kern_epoch, OID_AUTO, stats, CTLFLAG_RW | CTLFLAG_MPSAFE, 0,
"epoch stats");
-/* Stats. */
-static counter_u64_t block_count;
-
-SYSCTL_COUNTER_U64(_kern_epoch_stats, OID_AUTO, nblocked, CTLFLAG_RW,
- &block_count, "# of times a thread was in an epoch when epoch_wait was called");
-static counter_u64_t migrate_count;
-
-SYSCTL_COUNTER_U64(_kern_epoch_stats, OID_AUTO, migrations, CTLFLAG_RW,
- &migrate_count, "# of times thread was migrated to another CPU in epoch_wait");
-static counter_u64_t turnstile_count;
-
-SYSCTL_COUNTER_U64(_kern_epoch_stats, OID_AUTO, ncontended, CTLFLAG_RW,
- &turnstile_count, "# of times a thread was blocked on a lock in an epoch during an epoch_wait");
-static counter_u64_t switch_count;
-
-SYSCTL_COUNTER_U64(_kern_epoch_stats, OID_AUTO, switches, CTLFLAG_RW,
- &switch_count, "# of times a thread voluntarily context switched in epoch_wait");
-static counter_u64_t epoch_call_count;
-
-SYSCTL_COUNTER_U64(_kern_epoch_stats, OID_AUTO, epoch_calls, CTLFLAG_RW,
- &epoch_call_count, "# of times a callback was deferred");
-static counter_u64_t epoch_call_task_count;
-
-SYSCTL_COUNTER_U64(_kern_epoch_stats, OID_AUTO, epoch_call_tasks, CTLFLAG_RW,
- &epoch_call_task_count, "# of times a callback task was run");
-
-TAILQ_HEAD (threadlist, thread);
+static COUNTER_U64_DEFINE_EARLY(block_count);
+SYSCTL_COUNTER_U64(_kern_epoch_stats, OID_AUTO, blocked, CTLFLAG_RW,
+ &block_count,
+ "Number of times a thread was in an epoch when epoch_wait was called");
+static COUNTER_U64_DEFINE_EARLY(turnstile_count);
+SYSCTL_COUNTER_U64(_kern_epoch_stats, OID_AUTO, contended, CTLFLAG_RW,
+ &turnstile_count,
+ "Number of times a thread was blocked on a lock in an epoch during an epoch_wait");
+static COUNTER_U64_DEFINE_EARLY(call_count);
+SYSCTL_COUNTER_U64(_kern_epoch_stats, OID_AUTO, calls, CTLFLAG_RW,
+ &call_count,
+ "Number of times a callback was deferred");
+static COUNTER_U64_DEFINE_EARLY(call_task_count);
+SYSCTL_COUNTER_U64(_kern_epoch_stats, OID_AUTO, call_tasks, CTLFLAG_RW,
+ &call_task_count,
+ "Number of times a callback task was run");
CK_STACK_CONTAINER(struct ck_epoch_entry, stack_entry,
ck_epoch_entry_container)
@@ -186,7 +175,6 @@
va_list ap;
struct stackentry se, *new;
- stack_zero(&se.se_stack); /* XXX: is it really needed? */
stack_save(&se.se_stack);
/* Tree is never reduced - go lockless. */
@@ -265,13 +253,6 @@
{
int cpu;
- block_count = counter_u64_alloc(M_WAITOK);
- migrate_count = counter_u64_alloc(M_WAITOK);
- turnstile_count = counter_u64_alloc(M_WAITOK);
- switch_count = counter_u64_alloc(M_WAITOK);
- epoch_call_count = counter_u64_alloc(M_WAITOK);
- epoch_call_task_count = counter_u64_alloc(M_WAITOK);
-
pcpu_zone_record = uma_zcreate("epoch_record pcpu",
sizeof(struct epoch_record), NULL, NULL, NULL, NULL,
UMA_ALIGN_PTR, UMA_ZONE_PCPU);
@@ -306,24 +287,39 @@
epoch_record_t er;
int cpu;
- epoch->e_pcpu_record = uma_zalloc_pcpu(pcpu_zone_record, M_WAITOK);
+ epoch->e_pcpu_record = uma_zalloc_pcpu(pcpu_zone_record, M_WAITOK |
+ M_ZERO);
CPU_FOREACH(cpu) {
er = zpcpu_get_cpu(epoch->e_pcpu_record, cpu);
- bzero(er, sizeof(*er));
ck_epoch_register(&epoch->e_epoch, &er->er_record, NULL);
- TAILQ_INIT((struct threadlist *)(uintptr_t)&er->er_tdlist);
+ mtx_init(&er->er_lock, "epoch wait", NULL, MTX_DEF);
+ er->er_lo.lo_name = epoch->e_name;
+ TAILQ_INIT(&er->er_tdlist);
er->er_cpuid = cpu;
er->er_parent = epoch;
}
}
+/*
+ * Slow path for epoch_exit_preempt(): wake up blocked threads that have
+ * propagated their scheduling priority to us.
+ */
static void
-epoch_adjust_prio(struct thread *td, u_char prio)
+epoch_unblock(epoch_record_t er)
{
+ struct turnstile *ts;
- thread_lock(td);
- sched_prio(td, prio);
- thread_unlock(td);
+ KASSERT(er->er_blockedtd == curthread,
+ ("%s: unblocking from wrong thread", __func__));
+
+ mtx_lock(&er->er_lock);
+ turnstile_chain_lock(&er->er_lo);
+ ts = turnstile_lookup(&er->er_lo);
+ turnstile_broadcast(ts, TS_EXCLUSIVE_QUEUE);
+ turnstile_unpend(ts);
+ turnstile_chain_unlock(&er->er_lo);
+ er->er_blockedtd = NULL;
+ mtx_unlock(&er->er_lock);
}
epoch_t
@@ -391,9 +387,9 @@
THREAD_NO_SLEEPING();
critical_enter();
sched_pin();
- td->td_pre_epoch_prio = td->td_priority;
er = epoch_currecord(epoch);
TAILQ_INSERT_TAIL(&er->er_tdlist, et, et_link);
+ er->er_firsttd = TAILQ_FIRST(&er->er_tdlist)->et_td;
ck_epoch_begin(&er->er_record, &et->et_section);
critical_exit();
}
@@ -414,26 +410,27 @@
_epoch_exit_preempt(epoch_t epoch, epoch_tracker_t et EPOCH_FILE_LINE)
{
struct epoch_record *er;
+ struct epoch_tracker *fet;
struct thread *td;
INIT_CHECK(epoch);
td = curthread;
+ THREAD_SLEEPING_OK();
+
critical_enter();
sched_unpin();
- THREAD_SLEEPING_OK();
er = epoch_currecord(epoch);
+
MPASS(epoch->e_flags & EPOCH_PREEMPT);
- MPASS(et != NULL);
MPASS(et->et_td == td);
-#ifdef INVARIANTS
- et->et_td = (void*)0xDEADBEEF;
-#endif
+
ck_epoch_end(&er->er_record, &et->et_section);
TAILQ_REMOVE(&er->er_tdlist, et, et_link);
- er->er_gen++;
- if (__predict_false(td->td_pre_epoch_prio != td->td_priority))
- epoch_adjust_prio(td, td->td_pre_epoch_prio);
+ fet = TAILQ_FIRST(&er->er_tdlist);
+ er->er_firsttd = fet != NULL ? fet->et_td : NULL;
critical_exit();
+ if (__predict_false(er->er_blockedtd == td))
+ epoch_unblock(er);
#ifdef EPOCH_TRACE
epoch_trace_exit(td, epoch, et, file, line);
#endif
@@ -458,148 +455,61 @@
epoch_block_handler_preempt(struct ck_epoch *global __unused,
ck_epoch_record_t *cr, void *arg __unused)
{
- epoch_record_t record;
- struct thread *td, *owner, *curwaittd;
- struct epoch_tracker *tdwait;
+ struct epoch_record *er;
+ struct thread *td;
struct turnstile *ts;
- struct lock_object *lock;
- int spincount, gen;
- int locksheld __unused;
- record = __containerof(cr, struct epoch_record, er_record);
- td = curthread;
- locksheld = td->td_locks;
- spincount = 0;
counter_u64_add(block_count, 1);
- /*
- * We lost a race and there's no longer any threads
- * on the CPU in an epoch section.
- */
- if (TAILQ_EMPTY(&record->er_tdlist))
- return;
- if (record->er_cpuid != curcpu) {
+ er = __containerof(cr, struct epoch_record, er_record);
+
+ td = er->er_firsttd;
+ if (td == NULL)
+ return;
+ if (TD_IS_RUNNING(td)) {
/*
- * If the head of the list is running, we can wait for it
- * to remove itself from the list and thus save us the
- * overhead of a migration
+ * There is nothing useful we can do until this thread exits the
+ * epoch.
*/
- gen = record->er_gen;
- thread_unlock(td);
+ cpu_spinwait();
+ return;
+ }
+
+ mtx_lock(&er->er_lock);
+ if (er->er_blockedtd == NULL) {
/*
- * We can't actually check if the waiting thread is running
- * so we simply poll for it to exit before giving up and
- * migrating.
+ * A thread in the target epoch is off-CPU. Prepare to make it
+ * the owner of this CPU's turnstile so that we can lend
+ * priority. Ensure that it will wake us up upon exiting the
+ * section, using the thread lock to ensure that it doesn't get
+ * scheduled and exit the section before we're ready.
*/
- do {
- cpu_spinwait();
- } while (!TAILQ_EMPTY(&record->er_tdlist) &&
- gen == record->er_gen &&
- spincount++ < MAX_ADAPTIVE_SPIN);
thread_lock(td);
- /*
- * If the generation has changed we can poll again
- * otherwise we need to migrate.
- */
- if (gen != record->er_gen)
+ if (TD_IS_RUNNING(td) || td != er->er_firsttd) {
+ thread_unlock(td);
+ mtx_unlock(&er->er_lock);
return;
+ }
+ er->er_blockedtd = td;
+ thread_unlock(td);
+ } else {
/*
- * Being on the same CPU as that of the record on which
- * we need to wait allows us access to the thread
- * list associated with that CPU. We can then examine the
- * oldest thread in the queue and wait on its turnstile
- * until it resumes and so on until a grace period
- * elapses.
- *
- */
- counter_u64_add(migrate_count, 1);
- sched_bind(td, record->er_cpuid);
- /*
- * At this point we need to return to the ck code
- * to scan to see if a grace period has elapsed.
- * We can't move on to check the thread list, because
- * in the meantime new threads may have arrived that
- * in fact belong to a different epoch.
+ * At least one other thread is blocked waiting for a thread to
+ * exit the target epoch. Join it.
*/
- return;
+ td = er->er_blockedtd;
}
- /*
- * Try to find a thread in an epoch section on this CPU
- * waiting on a turnstile. Otherwise find the lowest
- * priority thread (highest prio value) and drop our priority
- * to match to allow it to run.
- */
- TAILQ_FOREACH(tdwait, &record->er_tdlist, et_link) {
- /*
- * Propagate our priority to any other waiters to prevent us
- * from starving them. They will have their original priority
- * restore on exit from epoch_wait().
- */
- curwaittd = tdwait->et_td;
- if (!TD_IS_INHIBITED(curwaittd) && curwaittd->td_priority > td->td_priority) {
- critical_enter();
- thread_unlock(td);
- thread_lock(curwaittd);
- sched_prio(curwaittd, td->td_priority);
- thread_unlock(curwaittd);
- thread_lock(td);
- critical_exit();
- }
- if (TD_IS_INHIBITED(curwaittd) && TD_ON_LOCK(curwaittd) &&
- ((ts = curwaittd->td_blocked) != NULL)) {
- /*
- * We unlock td to allow turnstile_wait to reacquire
- * the thread lock. Before unlocking it we enter a
- * critical section to prevent preemption after we
- * reenable interrupts by dropping the thread lock in
- * order to prevent curwaittd from getting to run.
- */
- critical_enter();
- thread_unlock(td);
+ ts = turnstile_trywait(&er->er_lo);
+ mtx_unlock(&er->er_lock);
- if (turnstile_lock(ts, &lock, &owner)) {
- if (ts == curwaittd->td_blocked) {
- MPASS(TD_IS_INHIBITED(curwaittd) &&
- TD_ON_LOCK(curwaittd));
- critical_exit();
- turnstile_wait(ts, owner,
- curwaittd->td_tsqueue);
- counter_u64_add(turnstile_count, 1);
- thread_lock(td);
- return;
- }
- turnstile_unlock(ts, lock);
- }
- thread_lock(td);
- critical_exit();
- KASSERT(td->td_locks == locksheld,
- ("%d extra locks held", td->td_locks - locksheld));
- }
- }
- /*
- * We didn't find any threads actually blocked on a lock
- * so we have nothing to do except context switch away.
- */
- counter_u64_add(switch_count, 1);
- mi_switch(SW_VOL | SWT_RELINQUISH);
- /*
- * It is important the thread lock is dropped while yielding
- * to allow other threads to acquire the lock pointed to by
- * TDQ_LOCKPTR(td). Currently mi_switch() will unlock the
- * thread lock before returning. Else a deadlock like
- * situation might happen.
- */
- thread_lock(td);
+ counter_u64_add(turnstile_count, 1);
+ turnstile_wait(ts, td, TS_EXCLUSIVE_QUEUE);
}
void
epoch_wait_preempt(epoch_t epoch)
{
struct thread *td;
- int was_bound;
- int old_cpu;
- int old_pinned;
- u_char old_prio;
int locks __unused;
MPASS(cold || epoch != NULL);
@@ -615,34 +525,10 @@
"of an epoch section of the same epoch"));
#endif
DROP_GIANT();
- thread_lock(td);
-
- old_cpu = PCPU_GET(cpuid);
- old_pinned = td->td_pinned;
- old_prio = td->td_priority;
- was_bound = sched_is_bound(td);
- sched_unbind(td);
- td->td_pinned = 0;
- sched_bind(td, old_cpu);
ck_epoch_synchronize_wait(&epoch->e_epoch, epoch_block_handler_preempt,
NULL);
- /* restore CPU binding, if any */
- if (was_bound != 0) {
- sched_bind(td, old_cpu);
- } else {
- /* get thread back to initial CPU, if any */
- if (old_pinned != 0)
- sched_bind(td, old_cpu);
- sched_unbind(td);
- }
- /* restore pinned after bind */
- td->td_pinned = old_pinned;
-
- /* restore thread priority */
- sched_prio(td, old_prio);
- thread_unlock(td);
PICKUP_GIANT();
KASSERT(td->td_locks == locks,
("%d residual locks held", td->td_locks - locks));
@@ -731,8 +617,8 @@
*DPCPU_PTR(epoch_cb_count) -= total;
critical_exit();
- counter_u64_add(epoch_call_count, total);
- counter_u64_add(epoch_call_task_count, 1);
+ counter_u64_add(call_count, total);
+ counter_u64_add(call_task_count, 1);
head = ck_stack_batch_pop_npsc(&cb_stack);
for (cursor = head; cursor != NULL; cursor = next) {
Index: sys/kern/subr_turnstile.c
===================================================================
--- sys/kern/subr_turnstile.c
+++ sys/kern/subr_turnstile.c
@@ -590,41 +590,6 @@
return (ts);
}
-bool
-turnstile_lock(struct turnstile *ts, struct lock_object **lockp,
- struct thread **tdp)
-{
- struct turnstile_chain *tc;
- struct lock_object *lock;
-
- if ((lock = ts->ts_lockobj) == NULL)
- return (false);
- tc = TC_LOOKUP(lock);
- mtx_lock_spin(&tc->tc_lock);
- mtx_lock_spin(&ts->ts_lock);
- if (__predict_false(lock != ts->ts_lockobj)) {
- mtx_unlock_spin(&tc->tc_lock);
- mtx_unlock_spin(&ts->ts_lock);
- return (false);
- }
- *lockp = lock;
- *tdp = ts->ts_owner;
- return (true);
-}
-
-void
-turnstile_unlock(struct turnstile *ts, struct lock_object *lock)
-{
- struct turnstile_chain *tc;
-
- mtx_assert(&ts->ts_lock, MA_OWNED);
- mtx_unlock_spin(&ts->ts_lock);
- if (ts == curthread->td_turnstile)
- ts->ts_lockobj = NULL;
- tc = TC_LOOKUP(lock);
- mtx_unlock_spin(&tc->tc_lock);
-}
-
void
turnstile_assert(struct turnstile *ts)
{
Index: sys/sys/epoch.h
===================================================================
--- sys/sys/epoch.h
+++ sys/sys/epoch.h
@@ -61,7 +61,7 @@
const char *et_file;
int et_line;
#endif
-} __aligned(sizeof(void *));
+};
typedef struct epoch_tracker *epoch_tracker_t;
epoch_t epoch_alloc(const char *name, int flags);
Index: sys/sys/proc.h
===================================================================
--- sys/sys/proc.h
+++ sys/sys/proc.h
@@ -316,7 +316,6 @@
u_char td_pri_class; /* (t) Scheduling class. */
u_char td_user_pri; /* (t) User pri from estcpu and nice. */
u_char td_base_user_pri; /* (t) Base user pri */
- u_char td_pre_epoch_prio; /* (k) User pri on entry to epoch */
uintptr_t td_rb_list; /* (k) Robust list head. */
uintptr_t td_rbp_list; /* (k) Robust priv list head. */
uintptr_t td_rb_inact; /* (k) Current in-action mutex loc. */
Index: sys/sys/turnstile.h
===================================================================
--- sys/sys/turnstile.h
+++ sys/sys/turnstile.h
@@ -99,9 +99,7 @@
struct turnstile *turnstile_trywait(struct lock_object *);
void turnstile_unpend(struct turnstile *);
void turnstile_wait(struct turnstile *, struct thread *, int);
-bool turnstile_lock(struct turnstile *, struct lock_object **,
- struct thread **);
-void turnstile_unlock(struct turnstile *, struct lock_object *);
void turnstile_assert(struct turnstile *);
+
#endif /* _KERNEL */
#endif /* _SYS_TURNSTILE_H_ */
File Metadata
Details
Attached
Mime Type
text/plain
Expires
Thu, Dec 26, 11:48 PM (11 h, 39 m)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
15609114
Default Alt Text
D24622.diff (15 KB)
Attached To
Mode
D24622: Use the regular turnstile interface to lend prio to preempted readers.
Attached
Detach File
Event Timeline
Log In to Comment