D1438.id2992.diff
No OneTemporary
Actions

Size

52 KB

Referenced Files

None

Subscribers

None

D1438.id2992.diff
View Options

	Index: sys/kern/init_main.c
	===================================================================
	--- sys/kern/init_main.c
	+++ sys/kern/init_main.c
	@@ -504,7 +504,8 @@

	callout_init_mtx(&p->p_itcallout, &p->p_mtx, 0);
	callout_init_mtx(&p->p_limco, &p->p_mtx, 0);
	- callout_init(&td->td_slpcallout, CALLOUT_MPSAFE);
	+ mtx_init(&td->td_slpmutex, "td_slpmutex", NULL, MTX_SPIN);
	+ callout_init_mtx(&td->td_slpcallout, &td->td_slpmutex, 0);

	/* Create credentials. */
	p->p_ucred = crget();
	Index: sys/kern/kern_condvar.c
	===================================================================
	--- sys/kern/kern_condvar.c
	+++ sys/kern/kern_condvar.c
	@@ -313,15 +313,13 @@
	DROP_GIANT();

	sleepq_add(cvp, lock, cvp->cv_description, SLEEPQ_CONDVAR, 0);
	+ sleepq_release(cvp);
	sleepq_set_timeout_sbt(cvp, sbt, pr, flags);
	if (lock != &Giant.lock_object) {
	- if (class->lc_flags & LC_SLEEPABLE)
	- sleepq_release(cvp);
	WITNESS_SAVE(lock, lock_witness);
	lock_state = class->lc_unlock(lock);
	- if (class->lc_flags & LC_SLEEPABLE)
	- sleepq_lock(cvp);
	}
	+ sleepq_lock(cvp);
	rval = sleepq_timedwait(cvp, 0);

	#ifdef KTRACE
	@@ -383,15 +381,13 @@

	sleepq_add(cvp, lock, cvp->cv_description, SLEEPQ_CONDVAR \|
	SLEEPQ_INTERRUPTIBLE, 0);
	+ sleepq_release(cvp);
	sleepq_set_timeout_sbt(cvp, sbt, pr, flags);
	if (lock != &Giant.lock_object) {
	- if (class->lc_flags & LC_SLEEPABLE)
	- sleepq_release(cvp);
	WITNESS_SAVE(lock, lock_witness);
	lock_state = class->lc_unlock(lock);
	- if (class->lc_flags & LC_SLEEPABLE)
	- sleepq_lock(cvp);
	}
	+ sleepq_lock(cvp);
	rval = sleepq_timedwait_sig(cvp, 0);

	#ifdef KTRACE
	Index: sys/kern/kern_lock.c
	===================================================================
	--- sys/kern/kern_lock.c
	+++ sys/kern/kern_lock.c
	@@ -210,9 +210,11 @@
	GIANT_SAVE();
	sleepq_add(&lk->lock_object, NULL, wmesg, SLEEPQ_LK \| (catch ?
	SLEEPQ_INTERRUPTIBLE : 0), queue);
	- if ((flags & LK_TIMELOCK) && timo)
	+ if ((flags & LK_TIMELOCK) && timo) {
	+ sleepq_release(&lk->lock_object);
	sleepq_set_timeout(&lk->lock_object, timo);
	-
	+ sleepq_lock(&lk->lock_object);
	+ }
	/*
	* Decisional switch for real sleeping.
	*/
	Index: sys/kern/kern_switch.c
	===================================================================
	--- sys/kern/kern_switch.c
	+++ sys/kern/kern_switch.c
	@@ -93,8 +93,6 @@
	&DPCPU_NAME(sched_switch_stats[SWT_TURNSTILE]), "");
	SCHED_STAT_DEFINE_VAR(sleepq,
	&DPCPU_NAME(sched_switch_stats[SWT_SLEEPQ]), "");
	-SCHED_STAT_DEFINE_VAR(sleepqtimo,
	- &DPCPU_NAME(sched_switch_stats[SWT_SLEEPQTIMO]), "");
	SCHED_STAT_DEFINE_VAR(relinquish,
	&DPCPU_NAME(sched_switch_stats[SWT_RELINQUISH]), "");
	SCHED_STAT_DEFINE_VAR(needresched,
	Index: sys/kern/kern_synch.c
	===================================================================
	--- sys/kern/kern_synch.c
	+++ sys/kern/kern_synch.c
	@@ -236,13 +236,17 @@
	* return from cursig().
	*/
	sleepq_add(ident, lock, wmesg, sleepq_flags, 0);
	- if (sbt != 0)
	- sleepq_set_timeout_sbt(ident, sbt, pr, flags);
	if (lock != NULL && class->lc_flags & LC_SLEEPABLE) {
	sleepq_release(ident);
	WITNESS_SAVE(lock, lock_witness);
	lock_state = class->lc_unlock(lock);
	+ if (sbt != 0)
	+ sleepq_set_timeout_sbt(ident, sbt, pr, flags);
	sleepq_lock(ident);
	+ } else if (sbt != 0) {
	+ sleepq_release(ident);
	+ sleepq_set_timeout_sbt(ident, sbt, pr, flags);
	+ sleepq_lock(ident);
	}
	if (sbt != 0 && catch)
	rval = sleepq_timedwait_sig(ident, pri);
	@@ -306,8 +310,11 @@
	* We put ourselves on the sleep queue and start our timeout.
	*/
	sleepq_add(ident, &mtx->lock_object, wmesg, SLEEPQ_SLEEP, 0);
	- if (sbt != 0)
	+ if (sbt != 0) {
	+ sleepq_release(ident);
	sleepq_set_timeout_sbt(ident, sbt, pr, flags);
	+ sleepq_lock(ident);
	+ }

	/*
	* Can't call ktrace with any spin locks held so it can lock the
	Index: sys/kern/kern_thread.c
	===================================================================
	--- sys/kern/kern_thread.c
	+++ sys/kern/kern_thread.c
	@@ -149,6 +149,9 @@
	audit_thread_alloc(td);
	#endif
	umtx_thread_alloc(td);
	+
	+ mtx_init(&td->td_slpmutex, "td_slpmutex", NULL, MTX_SPIN);
	+ callout_init_mtx(&td->td_slpcallout, &td->td_slpmutex, 0);
	return (0);
	}

	@@ -162,6 +165,10 @@

	td = (struct thread *)mem;

	+ /* make sure to drain any use of the "td->td_slpcallout" */
	+ callout_drain(&td->td_slpcallout);
	+ mtx_destroy(&td->td_slpmutex);
	+
	#ifdef INVARIANTS
	/* Verify that this thread is in a safe state to free. */
	switch (td->td_state) {
	@@ -544,7 +551,6 @@
	LIST_INIT(&td->td_lprof[0]);
	LIST_INIT(&td->td_lprof[1]);
	sigqueue_init(&td->td_sigqueue, p);
	- callout_init(&td->td_slpcallout, CALLOUT_MPSAFE);
	TAILQ_INSERT_TAIL(&p->p_threads, td, td_plist);
	p->p_numthreads++;
	}
	Index: sys/kern/kern_timeout.c
	===================================================================
	--- sys/kern/kern_timeout.c
	+++ sys/kern/kern_timeout.c
	@@ -54,6 +54,8 @@
	#include <sys/lock.h>
	#include <sys/malloc.h>
	#include <sys/mutex.h>
	+#include <sys/rmlock.h>
	+#include <sys/rwlock.h>
	#include <sys/proc.h>
	#include <sys/sdt.h>
	#include <sys/sleepqueue.h>
	@@ -124,37 +126,216 @@
	*/
	u_int callwheelsize, callwheelmask;

	+typedef void callout_mutex_op_t(struct lock_object *);
	+typedef int callout_owned_op_t(struct lock_object *);
	+
	+struct callout_mutex_ops {
	+ callout_mutex_op_t *lock;
	+ callout_mutex_op_t *unlock;
	+ callout_owned_op_t *owned;
	+};
	+
	+enum {
	+ CALLOUT_LC_UNUSED_0,
	+ CALLOUT_LC_UNUSED_1,
	+ CALLOUT_LC_UNUSED_2,
	+ CALLOUT_LC_UNUSED_3,
	+ CALLOUT_LC_SPIN,
	+ CALLOUT_LC_MUTEX,
	+ CALLOUT_LC_RW,
	+ CALLOUT_LC_RM,
	+};
	+
	+static void
	+callout_mutex_op_none(struct lock_object *lock)
	+{
	+}
	+
	+static int
	+callout_owned_op_none(struct lock_object *lock)
	+{
	+ return (0);
	+}
	+
	+static void
	+callout_mutex_lock(struct lock_object *lock)
	+{
	+ mtx_lock((struct mtx *)lock);
	+}
	+
	+static void
	+callout_mutex_unlock(struct lock_object *lock)
	+{
	+ mtx_unlock((struct mtx *)lock);
	+}
	+
	+static void
	+callout_mutex_lock_spin(struct lock_object *lock)
	+{
	+ mtx_lock_spin((struct mtx *)lock);
	+}
	+
	+static void
	+callout_mutex_unlock_spin(struct lock_object *lock)
	+{
	+ mtx_unlock_spin((struct mtx *)lock);
	+}
	+
	+static int
	+callout_mutex_owned(struct lock_object *lock)
	+{
	+ return (mtx_owned((struct mtx *)lock));
	+}
	+
	+static void
	+callout_rm_wlock(struct lock_object *lock)
	+{
	+ rm_wlock((struct rmlock *)lock);
	+}
	+
	+static void
	+callout_rm_wunlock(struct lock_object *lock)
	+{
	+ rm_wunlock((struct rmlock *)lock);
	+}
	+
	+static int
	+callout_rm_owned(struct lock_object *lock)
	+{
	+ return (rm_wowned((struct rmlock *)lock));
	+}
	+
	+static void
	+callout_rw_wlock(struct lock_object *lock)
	+{
	+ rw_wlock((struct rwlock *)lock);
	+}
	+
	+static void
	+callout_rw_wunlock(struct lock_object *lock)
	+{
	+ rw_wunlock((struct rwlock *)lock);
	+}
	+
	+static int
	+callout_rw_owned(struct lock_object *lock)
	+{
	+ return (rw_wowned((struct rwlock *)lock));
	+}
	+
	+static const struct callout_mutex_ops callout_mutex_ops[8] = {
	+ [CALLOUT_LC_UNUSED_0] = {
	+ .lock = callout_mutex_op_none,
	+ .unlock = callout_mutex_op_none,
	+ .owned = callout_owned_op_none,
	+ },
	+ [CALLOUT_LC_UNUSED_1] = {
	+ .lock = callout_mutex_op_none,
	+ .unlock = callout_mutex_op_none,
	+ .owned = callout_owned_op_none,
	+ },
	+ [CALLOUT_LC_UNUSED_2] = {
	+ .lock = callout_mutex_op_none,
	+ .unlock = callout_mutex_op_none,
	+ .owned = callout_owned_op_none,
	+ },
	+ [CALLOUT_LC_UNUSED_3] = {
	+ .lock = callout_mutex_op_none,
	+ .unlock = callout_mutex_op_none,
	+ .owned = callout_owned_op_none,
	+ },
	+ [CALLOUT_LC_SPIN] = {
	+ .lock = callout_mutex_lock_spin,
	+ .unlock = callout_mutex_unlock_spin,
	+ .owned = callout_mutex_owned,
	+ },
	+ [CALLOUT_LC_MUTEX] = {
	+ .lock = callout_mutex_lock,
	+ .unlock = callout_mutex_unlock,
	+ .owned = callout_mutex_owned,
	+ },
	+ [CALLOUT_LC_RW] = {
	+ .lock = callout_rw_wlock,
	+ .unlock = callout_rw_wunlock,
	+ .owned = callout_rw_owned,
	+ },
	+ [CALLOUT_LC_RM] = {
	+ .lock = callout_rm_wlock,
	+ .unlock = callout_rm_wunlock,
	+ .owned = callout_rm_owned,
	+ },
	+};
	+
	+static void
	+callout_lock_client(int c_flags, struct lock_object *c_lock)
	+{
	+ callout_mutex_ops[CALLOUT_GET_LC(c_flags)].lock(c_lock);
	+}
	+
	+static void
	+callout_unlock_client(int c_flags, struct lock_object *c_lock)
	+{
	+ callout_mutex_ops[CALLOUT_GET_LC(c_flags)].unlock(c_lock);
	+}
	+
	+#ifdef SMP
	+static int
	+callout_lock_owned_client(int c_flags, struct lock_object *c_lock)
	+{
	+ return (callout_mutex_ops[CALLOUT_GET_LC(c_flags)].owned(c_lock));
	+}
	+#endif
	+
	/*
	- * The callout cpu exec entities represent informations necessary for
	- * describing the state of callouts currently running on the CPU and the ones
	- * necessary for migrating callouts to the new callout cpu. In particular,
	- * the first entry of the array cc_exec_entity holds informations for callout
	- * running in SWI thread context, while the second one holds informations
	- * for callout running directly from hardware interrupt context.
	- * The cached informations are very important for deferring migration when
	- * the migrating callout is already running.
	+ * The callout CPU exec structure represent information necessary for
	+ * describing the state of callouts currently running on the CPU and
	+ * for handling deferred callout restarts.
	+ *
	+ * In particular, the first entry of the array cc_exec_entity holds
	+ * information for callouts running from the SWI thread context, while
	+ * the second one holds information for callouts running directly from
	+ * the hardware interrupt context.
	*/
	struct cc_exec {
	- struct callout *cc_next;
	+ /*
	+ * The "cc_curr" points to the currently executing callout and
	+ * is protected by the "cc_lock" spinlock. If no callback is
	+ * currently executing it is equal to "NULL".
	+ */
	struct callout *cc_curr;
	-#ifdef SMP
	- void (ce_migration_func)(void );
	- void *ce_migration_arg;
	- int ce_migration_cpu;
	- sbintime_t ce_migration_time;
	- sbintime_t ce_migration_prec;
	-#endif
	- bool cc_cancel;
	- bool cc_waiting;
	+ /*
	+ * The "cc_restart_args" structure holds the argument for a
	+ * deferred callback restart and is protected by the "cc_lock"
	+ * spinlock. The structure is only valid if "cc_restart" is
	+ * "true". If "cc_restart" is "false" the information in the
	+ * "cc_restart_args" structure shall be ignored.
	+ */
	+ struct callout_args cc_restart_args;
	+ bool cc_restart;
	+ /*
	+ * The "cc_cancel" variable allows the currently pending
	+ * callback to be atomically cancelled. This field is write
	+ * protected by the "cc_lock" spinlock.
	+ */
	+ bool cc_cancel;
	+ /*
	+ * The "cc_drain_fn" points to a function which shall be
	+ * called with the argument stored in "cc_drain_arg" when an
	+ * asynchronous drain is performed. This field is write
	+ * protected by the "cc_lock" spinlock.
	+ */
	+ callout_func_t *cc_drain_fn;
	+ void *cc_drain_arg;
	};

	/*
	- * There is one struct callout_cpu per cpu, holding all relevant
	+ * There is one "struct callout_cpu" per CPU, holding all relevant
	* state for the callout processing thread on the individual CPU.
	*/
	struct callout_cpu {
	struct mtx_padalign cc_lock;
	struct cc_exec cc_exec_entity[2];
	+ struct callout *cc_exec_next_dir;
	struct callout *cc_callout;
	struct callout_list *cc_callwheel;
	struct callout_tailq cc_expireq;
	@@ -166,27 +347,7 @@
	char cc_ktr_event_name[20];
	};

	-#define cc_exec_curr cc_exec_entity[0].cc_curr
	-#define cc_exec_next cc_exec_entity[0].cc_next
	-#define cc_exec_cancel cc_exec_entity[0].cc_cancel
	-#define cc_exec_waiting cc_exec_entity[0].cc_waiting
	-#define cc_exec_curr_dir cc_exec_entity[1].cc_curr
	-#define cc_exec_next_dir cc_exec_entity[1].cc_next
	-#define cc_exec_cancel_dir cc_exec_entity[1].cc_cancel
	-#define cc_exec_waiting_dir cc_exec_entity[1].cc_waiting
	-
	#ifdef SMP
	-#define cc_migration_func cc_exec_entity[0].ce_migration_func
	-#define cc_migration_arg cc_exec_entity[0].ce_migration_arg
	-#define cc_migration_cpu cc_exec_entity[0].ce_migration_cpu
	-#define cc_migration_time cc_exec_entity[0].ce_migration_time
	-#define cc_migration_prec cc_exec_entity[0].ce_migration_prec
	-#define cc_migration_func_dir cc_exec_entity[1].ce_migration_func
	-#define cc_migration_arg_dir cc_exec_entity[1].ce_migration_arg
	-#define cc_migration_cpu_dir cc_exec_entity[1].ce_migration_cpu
	-#define cc_migration_time_dir cc_exec_entity[1].ce_migration_time
	-#define cc_migration_prec_dir cc_exec_entity[1].ce_migration_prec
	-
	struct callout_cpu cc_cpu[MAXCPU];
	#define CPUBLOCK MAXCPU
	#define CC_CPU(cpu) (&cc_cpu[(cpu)])
	@@ -211,62 +372,11 @@

	static MALLOC_DEFINE(M_CALLOUT, "callout", "Callout datastructures");

	-/**
	- * Locked by cc_lock:
	- * cc_curr - If a callout is in progress, it is cc_curr.
	- * If cc_curr is non-NULL, threads waiting in
	- * callout_drain() will be woken up as soon as the
	- * relevant callout completes.
	- * cc_cancel - Changing to 1 with both callout_lock and cc_lock held
	- * guarantees that the current callout will not run.
	- * The softclock() function sets this to 0 before it
	- * drops callout_lock to acquire c_lock, and it calls
	- * the handler only if curr_cancelled is still 0 after
	- * cc_lock is successfully acquired.
	- * cc_waiting - If a thread is waiting in callout_drain(), then
	- * callout_wait is nonzero. Set only when
	- * cc_curr is non-NULL.
	- */
	-
	/*
	- * Resets the execution entity tied to a specific callout cpu.
	+ * Kernel low level callwheel initialization called from cpu0 during
	+ * kernel startup:
	*/
	static void
	-cc_cce_cleanup(struct callout_cpu *cc, int direct)
	-{
	-
	- cc->cc_exec_entity[direct].cc_curr = NULL;
	- cc->cc_exec_entity[direct].cc_next = NULL;
	- cc->cc_exec_entity[direct].cc_cancel = false;
	- cc->cc_exec_entity[direct].cc_waiting = false;
	-#ifdef SMP
	- cc->cc_exec_entity[direct].ce_migration_cpu = CPUBLOCK;
	- cc->cc_exec_entity[direct].ce_migration_time = 0;
	- cc->cc_exec_entity[direct].ce_migration_prec = 0;
	- cc->cc_exec_entity[direct].ce_migration_func = NULL;
	- cc->cc_exec_entity[direct].ce_migration_arg = NULL;
	-#endif
	-}
	-
	-/*
	- * Checks if migration is requested by a specific callout cpu.
	- */
	-static int
	-cc_cce_migrating(struct callout_cpu *cc, int direct)
	-{
	-
	-#ifdef SMP
	- return (cc->cc_exec_entity[direct].ce_migration_cpu != CPUBLOCK);
	-#else
	- return (0);
	-#endif
	-}
	-
	-/*
	- * Kernel low level callwheel initialization
	- * called on cpu0 during kernel startup.
	- */
	-static void
	callout_callwheel_init(void *dummy)
	{
	struct callout_cpu *cc;
	@@ -324,8 +434,6 @@
	LIST_INIT(&cc->cc_callwheel[i]);
	TAILQ_INIT(&cc->cc_expireq);
	cc->cc_firstevent = SBT_MAX;
	- for (i = 0; i < 2; i++)
	- cc_cce_cleanup(cc, i);
	snprintf(cc->cc_ktr_event_name, sizeof(cc->cc_ktr_event_name),
	"callwheel cpu %d", cpu);
	if (cc->cc_callout == NULL) /* Only cpu0 handles timeout(9) */
	@@ -333,42 +441,12 @@
	for (i = 0; i < ncallout; i++) {
	c = &cc->cc_callout[i];
	callout_init(c, 0);
	- c->c_flags = CALLOUT_LOCAL_ALLOC;
	+ c->c_flags \|= CALLOUT_LOCAL_ALLOC;
	SLIST_INSERT_HEAD(&cc->cc_callfree, c, c_links.sle);
	}
	}

	-#ifdef SMP
	/*
	- * Switches the cpu tied to a specific callout.
	- * The function expects a locked incoming callout cpu and returns with
	- * locked outcoming callout cpu.
	- */
	-static struct callout_cpu *
	-callout_cpu_switch(struct callout c, struct callout_cpu cc, int new_cpu)
	-{
	- struct callout_cpu *new_cc;
	-
	- MPASS(c != NULL && cc != NULL);
	- CC_LOCK_ASSERT(cc);
	-
	- /*
	- * Avoid interrupts and preemption firing after the callout cpu
	- * is blocked in order to avoid deadlocks as the new thread
	- * may be willing to acquire the callout cpu lock.
	- */
	- c->c_cpu = CPUBLOCK;
	- spinlock_enter();
	- CC_UNLOCK(cc);
	- new_cc = CC_CPU(new_cpu);
	- CC_LOCK(new_cc);
	- spinlock_exit();
	- c->c_cpu = new_cpu;
	- return (new_cc);
	-}
	-#endif
	-
	-/*
	* Start standard softclock thread.
	*/
	static void
	@@ -444,9 +522,8 @@
	#ifdef CALLOUT_PROFILING
	int depth_dir = 0, mpcalls_dir = 0, lockcalls_dir = 0;
	#endif
	-
	cc = CC_SELF();
	- mtx_lock_spin_flags(&cc->cc_lock, MTX_QUIET);
	+ CC_LOCK(cc);

	/* Compute the buckets of the last scan and present times. */
	firstb = callout_hash(cc->cc_lastscan);
	@@ -549,7 +626,7 @@
	avg_mpcalls_dir += (mpcalls_dir * 1000 - avg_mpcalls_dir) >> 8;
	avg_lockcalls_dir += (lockcalls_dir * 1000 - avg_lockcalls_dir) >> 8;
	#endif
	- mtx_unlock_spin_flags(&cc->cc_lock, MTX_QUIET);
	+ CC_UNLOCK(cc);
	/*
	* swi_sched acquires the thread lock, so we don't want to call it
	* with cc_lock held; incorrect locking order.
	@@ -562,49 +639,55 @@
	callout_lock(struct callout *c)
	{
	struct callout_cpu *cc;
	- int cpu;
	-
	- for (;;) {
	- cpu = c->c_cpu;
	-#ifdef SMP
	- if (cpu == CPUBLOCK) {
	- while (c->c_cpu == CPUBLOCK)
	- cpu_spinwait();
	- continue;
	- }
	-#endif
	- cc = CC_CPU(cpu);
	- CC_LOCK(cc);
	- if (cpu == c->c_cpu)
	- break;
	- CC_UNLOCK(cc);
	- }
	+ cc = CC_CPU(c->c_cpu);
	+ CC_LOCK(cc);
	return (cc);
	}

	-static void
	-callout_cc_add(struct callout c, struct callout_cpu cc,
	- sbintime_t sbt, sbintime_t precision, void (func)(void ),
	- void *arg, int cpu, int flags)
	+static struct callout_cpu *
	+callout_cc_add_locked(struct callout c, struct callout_cpu cc,
	+ struct callout_args *coa, bool can_swap_cpu)
	{
	+#ifndef NO_EVENTTIMERS
	+ sbintime_t sbt;
	+#endif
	int bucket;

	CC_LOCK_ASSERT(cc);
	- if (sbt < cc->cc_lastscan)
	- sbt = cc->cc_lastscan;
	- c->c_arg = arg;
	- c->c_flags \|= (CALLOUT_ACTIVE \| CALLOUT_PENDING);
	- if (flags & C_DIRECT_EXEC)
	- c->c_flags \|= CALLOUT_DIRECT;
	- c->c_flags &= ~CALLOUT_PROCESSED;
	- c->c_func = func;
	- c->c_time = sbt;
	- c->c_precision = precision;
	+
	+ /* update flags before swapping locks, if any */
	+ c->c_flags &= ~(CALLOUT_PROCESSED \| CALLOUT_DIRECT \| CALLOUT_DEFRESTART);
	+ if (coa->flags & C_DIRECT_EXEC)
	+ c->c_flags \|= (CALLOUT_ACTIVE \| CALLOUT_PENDING \| CALLOUT_DIRECT);
	+ else
	+ c->c_flags \|= (CALLOUT_ACTIVE \| CALLOUT_PENDING);
	+
	+#ifdef SMP
	+ /*
	+ * Check if we are changing the CPU on which the callback
	+ * should be executed and if we have a lock protecting us:
	+ */
	+ if (can_swap_cpu != false && coa->cpu != c->c_cpu &&
	+ callout_lock_owned_client(c->c_flags, c->c_lock) != 0) {
	+ CC_UNLOCK(cc);
	+ c->c_cpu = coa->cpu;
	+ cc = callout_lock(c);
	+ }
	+#endif
	+ if (coa->time < cc->cc_lastscan)
	+ coa->time = cc->cc_lastscan;
	+ c->c_arg = coa->arg;
	+ c->c_func = coa->func;
	+ c->c_time = coa->time;
	+ c->c_precision = coa->precision;
	+
	bucket = callout_get_bucket(c->c_time);
	CTR3(KTR_CALLOUT, "precision set for %p: %d.%08x",
	c, (int)(c->c_precision >> 32),
	(u_int)(c->c_precision & 0xffffffff));
	LIST_INSERT_HEAD(&cc->cc_callwheel[bucket], c, c_links.le);
	+
	+ /* Ensure we are first to be scanned, if called via a callback */
	if (cc->cc_bucket == bucket)
	cc->cc_exec_next_dir = c;
	#ifndef NO_EVENTTIMERS
	@@ -617,9 +700,10 @@
	sbt = c->c_time + c->c_precision;
	if (sbt < cc->cc_firstevent) {
	cc->cc_firstevent = sbt;
	- cpu_new_callout(cpu, sbt, c->c_time);
	+ cpu_new_callout(coa->cpu, sbt, c->c_time);
	}
	#endif
	+ return (cc);
	}

	static void
	@@ -626,8 +710,6 @@
	callout_cc_del(struct callout c, struct callout_cpu cc)
	{

	- if ((c->c_flags & CALLOUT_LOCAL_ALLOC) == 0)
	- return;
	c->c_func = NULL;
	SLIST_INSERT_HEAD(&cc->cc_callfree, c, c_links.sle);
	}
	@@ -639,20 +721,10 @@
	#endif
	int direct)
	{
	- struct rm_priotracker tracker;
	- void (c_func)(void );
	+ callout_func_t *c_func;
	void *c_arg;
	- struct lock_class *class;
	struct lock_object *c_lock;
	- uintptr_t lock_status;
	int c_flags;
	-#ifdef SMP
	- struct callout_cpu *new_cc;
	- void (new_func)(void );
	- void *new_arg;
	- int flags, new_cpu;
	- sbintime_t new_prec, new_time;
	-#endif
	#if defined(DIAGNOSTIC) \|\| defined(CALLOUT_PROFILING)
	sbintime_t sbt1, sbt2;
	struct timespec ts2;
	@@ -663,37 +735,43 @@
	KASSERT((c->c_flags & (CALLOUT_PENDING \| CALLOUT_ACTIVE)) ==
	(CALLOUT_PENDING \| CALLOUT_ACTIVE),
	("softclock_call_cc: pend\|act %p %x", c, c->c_flags));
	- class = (c->c_lock != NULL) ? LOCK_CLASS(c->c_lock) : NULL;
	- lock_status = 0;
	- if (c->c_flags & CALLOUT_SHAREDLOCK) {
	- if (class == &lock_class_rm)
	- lock_status = (uintptr_t)&tracker;
	- else
	- lock_status = 1;
	- }
	c_lock = c->c_lock;
	c_func = c->c_func;
	c_arg = c->c_arg;
	c_flags = c->c_flags;
	- if (c->c_flags & CALLOUT_LOCAL_ALLOC)
	- c->c_flags = CALLOUT_LOCAL_ALLOC;
	- else
	- c->c_flags &= ~CALLOUT_PENDING;
	+
	+ /* remove pending bit */
	+ c->c_flags &= ~CALLOUT_PENDING;
	+
	+ /* reset our local state */
	cc->cc_exec_entity[direct].cc_curr = c;
	cc->cc_exec_entity[direct].cc_cancel = false;
	- CC_UNLOCK(cc);
	+ cc->cc_exec_entity[direct].cc_restart = false;
	+ cc->cc_exec_entity[direct].cc_drain_fn = NULL;
	+ cc->cc_exec_entity[direct].cc_drain_arg = NULL;
	+
	if (c_lock != NULL) {
	- class->lc_lock(c_lock, lock_status);
	+ CC_UNLOCK(cc);
	+
	+ /* unlocked region for switching locks */
	+
	+ callout_lock_client(c_flags, c_lock);
	+
	/*
	- * The callout may have been cancelled
	- * while we switched locks.
	+ * Check if the callout may have been cancelled while
	+ * we were switching locks. Even though the callout is
	+ * specifying a lock, it might not be certain this
	+ * lock is locked when starting and stopping callouts.
	*/
	+ CC_LOCK(cc);
	if (cc->cc_exec_entity[direct].cc_cancel) {
	- class->lc_unlock(c_lock);
	- goto skip;
	+ callout_unlock_client(c_flags, c_lock);
	+ goto skip_cc_locked;
	}
	- /* The callout cannot be stopped now. */
	+ /* The callout cannot be stopped now! */
	cc->cc_exec_entity[direct].cc_cancel = true;
	+ CC_UNLOCK(cc);
	+
	if (c_lock == &Giant.lock_object) {
	#ifdef CALLOUT_PROFILING
	(*gcalls)++;
	@@ -708,6 +786,8 @@
	c, c_func, c_arg);
	}
	} else {
	+ CC_UNLOCK(cc);
	+ /* unlocked region */
	#ifdef CALLOUT_PROFILING
	(*mpcalls)++;
	#endif
	@@ -740,85 +820,40 @@
	#endif
	KTR_STATE0(KTR_SCHED, "callout", cc->cc_ktr_event_name, "idle");
	CTR1(KTR_CALLOUT, "callout %p finished", c);
	+
	+ /*
	+ * At this point the callback structure might have been freed,
	+ * so we need to check the previously copied value of
	+ * "c->c_flags":
	+ */
	if ((c_flags & CALLOUT_RETURNUNLOCKED) == 0)
	- class->lc_unlock(c_lock);
	-skip:
	+ callout_unlock_client(c_flags, c_lock);
	+
	CC_LOCK(cc);
	+
	+skip_cc_locked:
	KASSERT(cc->cc_exec_entity[direct].cc_curr == c, ("mishandled cc_curr"));
	cc->cc_exec_entity[direct].cc_curr = NULL;
	- if (cc->cc_exec_entity[direct].cc_waiting) {
	+
	+ /* Check if there is anything which needs draining */
	+ if (cc->cc_exec_entity[direct].cc_drain_fn != NULL) {
	/*
	- * There is someone waiting for the
	- * callout to complete.
	- * If the callout was scheduled for
	- * migration just cancel it.
	+ * Unlock the CPU callout last, so that any use of
	+ * structures belonging to the callout are complete:
	*/
	- if (cc_cce_migrating(cc, direct)) {
	- cc_cce_cleanup(cc, direct);
	-
	- /*
	- * It should be assert here that the callout is not
	- * destroyed but that is not easy.
	- */
	- c->c_flags &= ~CALLOUT_DFRMIGRATION;
	- }
	- cc->cc_exec_entity[direct].cc_waiting = false;
	CC_UNLOCK(cc);
	- wakeup(&cc->cc_exec_entity[direct].cc_waiting);
	+ /* call drain function unlocked */
	+ cc->cc_exec_entity[direct].cc_drain_fn(
	+ cc->cc_exec_entity[direct].cc_drain_arg);
	CC_LOCK(cc);
	- } else if (cc_cce_migrating(cc, direct)) {
	- KASSERT((c_flags & CALLOUT_LOCAL_ALLOC) == 0,
	- ("Migrating legacy callout %p", c));
	-#ifdef SMP
	- /*
	- * If the callout was scheduled for
	- * migration just perform it now.
	- */
	- new_cpu = cc->cc_exec_entity[direct].ce_migration_cpu;
	- new_time = cc->cc_exec_entity[direct].ce_migration_time;
	- new_prec = cc->cc_exec_entity[direct].ce_migration_prec;
	- new_func = cc->cc_exec_entity[direct].ce_migration_func;
	- new_arg = cc->cc_exec_entity[direct].ce_migration_arg;
	- cc_cce_cleanup(cc, direct);
	-
	- /*
	- * It should be assert here that the callout is not destroyed
	- * but that is not easy.
	- *
	- * As first thing, handle deferred callout stops.
	- */
	- if ((c->c_flags & CALLOUT_DFRMIGRATION) == 0) {
	- CTR3(KTR_CALLOUT,
	- "deferred cancelled %p func %p arg %p",
	- c, new_func, new_arg);
	- callout_cc_del(c, cc);
	- return;
	- }
	- c->c_flags &= ~CALLOUT_DFRMIGRATION;
	-
	- new_cc = callout_cpu_switch(c, cc, new_cpu);
	- flags = (direct) ? C_DIRECT_EXEC : 0;
	- callout_cc_add(c, new_cc, new_time, new_prec, new_func,
	- new_arg, new_cpu, flags);
	- CC_UNLOCK(new_cc);
	- CC_LOCK(cc);
	-#else
	- panic("migration should not happen");
	-#endif
	+ } else if (c_flags & CALLOUT_LOCAL_ALLOC) {
	+ /* return callout back to freelist */
	+ callout_cc_del(c, cc);
	+ } else if (cc->cc_exec_entity[direct].cc_restart) {
	+ /* [re-]schedule callout, if any */
	+ cc = callout_cc_add_locked(c, cc,
	+ &cc->cc_exec_entity[direct].cc_restart_args, false);
	}
	- /*
	- * If the current callout is locally allocated (from
	- * timeout(9)) then put it on the freelist.
	- *
	- * Note: we need to check the cached copy of c_flags because
	- * if it was not local, then it's not safe to deref the
	- * callout pointer.
	- */
	- KASSERT((c_flags & CALLOUT_LOCAL_ALLOC) == 0 \|\|
	- c->c_flags == CALLOUT_LOCAL_ALLOC,
	- ("corrupted callout"));
	- if (c_flags & CALLOUT_LOCAL_ALLOC)
	- callout_cc_del(c, cc);
	}

	/*
	@@ -899,10 +934,11 @@
	/* XXX Attempt to malloc first */
	panic("timeout table full");
	SLIST_REMOVE_HEAD(&cc->cc_callfree, c_links.sle);
	- callout_reset(new, to_ticks, ftn, arg);
	handle.callout = new;
	CC_UNLOCK(cc);

	+ callout_reset(new, to_ticks, ftn, arg);
	+
	return (handle);
	}

	@@ -910,6 +946,7 @@
	untimeout(timeout_t ftn, void arg, struct callout_handle handle)
	{
	struct callout_cpu *cc;
	+ bool match;

	/*
	* Check for a handle that was initialized
	@@ -920,9 +957,11 @@
	return;

	cc = callout_lock(handle.callout);
	- if (handle.callout->c_func == ftn && handle.callout->c_arg == arg)
	+ match = (handle.callout->c_func == ftn && handle.callout->c_arg == arg);
	+ CC_UNLOCK(cc);
	+
	+ if (match)
	callout_stop(handle.callout);
	- CC_UNLOCK(cc);
	}

	void
	@@ -931,6 +970,119 @@
	handle->callout = NULL;
	}

	+static int
	+callout_restart_async(struct callout c, struct callout_args coa,
	+ callout_func_t drain_fn, void drain_arg)
	+{
	+ struct callout_cpu *cc;
	+ int cancelled;
	+ int direct;
	+
	+ cc = callout_lock(c);
	+
	+ /* Figure out if the callout is direct or not */
	+ direct = ((c->c_flags & CALLOUT_DIRECT) != 0);
	+
	+ /*
	+ * Check if the callback is currently scheduled for
	+ * completion:
	+ */
	+ if (cc->cc_exec_entity[direct].cc_curr == c) {
	+ /*
	+ * Try to prevent the callback from running by setting
	+ * the "cc_cancel" variable to "true". Also check if
	+ * the callout was previously subject to a deferred
	+ * callout restart:
	+ */
	+ if (cc->cc_exec_entity[direct].cc_cancel == false \|\|
	+ (c->c_flags & CALLOUT_DEFRESTART) != 0) {
	+ cc->cc_exec_entity[direct].cc_cancel = true;
	+ cancelled = 1;
	+ } else {
	+ cancelled = 0;
	+ }
	+
	+ /*
	+ * Prevent callback restart if "callout_drain_xxx()"
	+ * is being called or we are stopping the callout or
	+ * the callback was preallocated by us:
	+ */
	+ if (cc->cc_exec_entity[direct].cc_drain_fn != NULL \|\|
	+ coa == NULL \|\| (c->c_flags & CALLOUT_LOCAL_ALLOC) != 0) {
	+ CTR4(KTR_CALLOUT, "%s %p func %p arg %p",
	+ cancelled ? "cancelled and draining" : "draining",
	+ c, c->c_func, c->c_arg);
	+
	+ /* clear old flags, if any */
	+ c->c_flags &= ~(CALLOUT_ACTIVE \| CALLOUT_PENDING \|
	+ CALLOUT_DEFRESTART \| CALLOUT_PROCESSED);
	+
	+ /* clear restart flag, if any */
	+ cc->cc_exec_entity[direct].cc_restart = false;
	+
	+ /* set drain function, if any */
	+ if (drain_fn != NULL) {
	+ cc->cc_exec_entity[direct].cc_drain_fn = drain_fn;
	+ cc->cc_exec_entity[direct].cc_drain_arg = drain_arg;
	+ cancelled \|= 2; /* XXX define the value */
	+ }
	+ } else {
	+ CTR4(KTR_CALLOUT, "%s %p func %p arg %p",
	+ cancelled ? "cancelled and restarting" : "restarting",
	+ c, c->c_func, c->c_arg);
	+
	+ /* get us back into the game */
	+ c->c_flags \|= (CALLOUT_ACTIVE \| CALLOUT_PENDING \|
	+ CALLOUT_DEFRESTART);
	+ c->c_flags &= ~CALLOUT_PROCESSED;
	+
	+ /* enable deferred restart */
	+ cc->cc_exec_entity[direct].cc_restart = true;
	+
	+ /* store arguments for the deferred restart, if any */
	+ cc->cc_exec_entity[direct].cc_restart_args = *coa;
	+ }
	+ } else {
	+ /* stop callout */
	+ if (c->c_flags & CALLOUT_PENDING) {
	+ /*
	+ * The callback has not yet been executed, and
	+ * we simply just need to unlink it:
	+ */
	+ if ((c->c_flags & CALLOUT_PROCESSED) == 0) {
	+ if (cc->cc_exec_next_dir == c)
	+ cc->cc_exec_next_dir = LIST_NEXT(c, c_links.le);
	+ LIST_REMOVE(c, c_links.le);
	+ } else {
	+ TAILQ_REMOVE(&cc->cc_expireq, c, c_links.tqe);
	+ }
	+ cancelled = 1;
	+ } else {
	+ cancelled = 0;
	+ }
	+
	+ /* [re-]schedule callout, if any */
	+ if (coa != NULL) {
	+ cc = callout_cc_add_locked(c, cc, coa, true);
	+ } else {
	+ /* clear old flags, if any */
	+ c->c_flags &= ~(CALLOUT_ACTIVE \| CALLOUT_PENDING \|
	+ CALLOUT_DEFRESTART \| CALLOUT_PROCESSED);
	+
	+ /* return callback to pre-allocated list, if any */
	+ if ((c->c_flags & CALLOUT_LOCAL_ALLOC) && cancelled != 0) {
	+ callout_cc_del(c, cc);
	+ }
	+ }
	+
	+ CTR4(KTR_CALLOUT, "%s %p func %p arg %p",
	+ cancelled ? "rescheduled" : "scheduled",
	+ c, c->c_func, c->c_arg);
	+ }
	+ CC_UNLOCK(cc);
	+ return (cancelled);
	+}
	+
	/*
	* New interface; clients allocate their own callout structures.
	*
	@@ -949,25 +1101,32 @@
	*/
	int
	callout_reset_sbt_on(struct callout *c, sbintime_t sbt, sbintime_t precision,
	- void (ftn)(void ), void *arg, int cpu, int flags)
	+ callout_func_t ftn, void arg, int cpu, int flags)
	{
	- sbintime_t to_sbt, pr;
	- struct callout_cpu *cc;
	- int cancelled, direct;
	+ struct callout_args coa;

	- cancelled = 0;
	- if (flags & C_ABSOLUTE) {
	- to_sbt = sbt;
	+ /* store arguments for callout add function */
	+ coa.func = ftn;
	+ coa.arg = arg;
	+ coa.precision = precision;
	+ coa.flags = flags;
	+ coa.cpu = cpu;
	+
	+ /* compute the rest of the arguments needed */
	+ if (coa.flags & C_ABSOLUTE) {
	+ coa.time = sbt;
	} else {
	- if ((flags & C_HARDCLOCK) && (sbt < tick_sbt))
	+ sbintime_t pr;
	+
	+ if ((coa.flags & C_HARDCLOCK) && (sbt < tick_sbt))
	sbt = tick_sbt;
	- if ((flags & C_HARDCLOCK) \|\|
	+ if ((coa.flags & C_HARDCLOCK) \|\|
	#ifdef NO_EVENTTIMERS
	sbt >= sbt_timethreshold) {
	- to_sbt = getsbinuptime();
	+ coa.time = getsbinuptime();

	/* Add safety belt for the case of hz > 1000. */
	- to_sbt += tc_tick_sbt - tick_sbt;
	+ coa.time += tc_tick_sbt - tick_sbt;
	#else
	sbt >= sbt_tickthreshold) {
	/*
	@@ -977,101 +1136,29 @@
	* active ones.
	*/
	#ifdef __LP64__
	- to_sbt = DPCPU_GET(hardclocktime);
	+ coa.time = DPCPU_GET(hardclocktime);
	#else
	spinlock_enter();
	- to_sbt = DPCPU_GET(hardclocktime);
	+ coa.time = DPCPU_GET(hardclocktime);
	spinlock_exit();
	#endif
	#endif
	- if ((flags & C_HARDCLOCK) == 0)
	- to_sbt += tick_sbt;
	+ if ((coa.flags & C_HARDCLOCK) == 0)
	+ coa.time += tick_sbt;
	} else
	- to_sbt = sbinuptime();
	- if (SBT_MAX - to_sbt < sbt)
	- to_sbt = SBT_MAX;
	+ coa.time = sbinuptime();
	+ if (SBT_MAX - coa.time < sbt)
	+ coa.time = SBT_MAX;
	else
	- to_sbt += sbt;
	- pr = ((C_PRELGET(flags) < 0) ? sbt >> tc_precexp :
	- sbt >> C_PRELGET(flags));
	- if (pr > precision)
	- precision = pr;
	+ coa.time += sbt;
	+ pr = ((C_PRELGET(coa.flags) < 0) ? sbt >> tc_precexp :
	+ sbt >> C_PRELGET(coa.flags));
	+ if (pr > coa.precision)
	+ coa.precision = pr;
	}
	- /*
	- * Don't allow migration of pre-allocated callouts lest they
	- * become unbalanced.
	- */
	- if (c->c_flags & CALLOUT_LOCAL_ALLOC)
	- cpu = c->c_cpu;
	- direct = (c->c_flags & CALLOUT_DIRECT) != 0;
	- KASSERT(!direct \|\| c->c_lock == NULL,
	- ("%s: direct callout %p has lock", __func__, c));
	- cc = callout_lock(c);
	- if (cc->cc_exec_entity[direct].cc_curr == c) {
	- /*
	- * We're being asked to reschedule a callout which is
	- * currently in progress. If there is a lock then we
	- * can cancel the callout if it has not really started.
	- */
	- if (c->c_lock != NULL && !cc->cc_exec_entity[direct].cc_cancel)
	- cancelled = cc->cc_exec_entity[direct].cc_cancel = true;
	- if (cc->cc_exec_entity[direct].cc_waiting) {
	- /*
	- * Someone has called callout_drain to kill this
	- * callout. Don't reschedule.
	- */
	- CTR4(KTR_CALLOUT, "%s %p func %p arg %p",
	- cancelled ? "cancelled" : "failed to cancel",
	- c, c->c_func, c->c_arg);
	- CC_UNLOCK(cc);
	- return (cancelled);
	- }
	- }
	- if (c->c_flags & CALLOUT_PENDING) {
	- if ((c->c_flags & CALLOUT_PROCESSED) == 0) {
	- if (cc->cc_exec_next_dir == c)
	- cc->cc_exec_next_dir = LIST_NEXT(c, c_links.le);
	- LIST_REMOVE(c, c_links.le);
	- } else
	- TAILQ_REMOVE(&cc->cc_expireq, c, c_links.tqe);
	- cancelled = 1;
	- c->c_flags &= ~(CALLOUT_ACTIVE \| CALLOUT_PENDING);
	- }

	-#ifdef SMP
	- /*
	- * If the callout must migrate try to perform it immediately.
	- * If the callout is currently running, just defer the migration
	- * to a more appropriate moment.
	- */
	- if (c->c_cpu != cpu) {
	- if (cc->cc_exec_entity[direct].cc_curr == c) {
	- cc->cc_exec_entity[direct].ce_migration_cpu = cpu;
	- cc->cc_exec_entity[direct].ce_migration_time
	- = to_sbt;
	- cc->cc_exec_entity[direct].ce_migration_prec
	- = precision;
	- cc->cc_exec_entity[direct].ce_migration_func = ftn;
	- cc->cc_exec_entity[direct].ce_migration_arg = arg;
	- c->c_flags \|= CALLOUT_DFRMIGRATION;
	- CTR6(KTR_CALLOUT,
	- "migration of %p func %p arg %p in %d.%08x to %u deferred",
	- c, c->c_func, c->c_arg, (int)(to_sbt >> 32),
	- (u_int)(to_sbt & 0xffffffff), cpu);
	- CC_UNLOCK(cc);
	- return (cancelled);
	- }
	- cc = callout_cpu_switch(c, cc, cpu);
	- }
	-#endif
	-
	- callout_cc_add(c, cc, to_sbt, precision, ftn, arg, cpu, flags);
	- CTR6(KTR_CALLOUT, "%sscheduled %p func %p arg %p in %d.%08x",
	- cancelled ? "re" : "", c, c->c_func, c->c_arg, (int)(to_sbt >> 32),
	- (u_int)(to_sbt & 0xffffffff));
	- CC_UNLOCK(cc);
	-
	- return (cancelled);
	+ /* get callback started, if any */
	+ return (callout_restart_async(c, &coa, NULL, NULL));
	}

	/*
	@@ -1090,189 +1177,79 @@
	}

	int
	-_callout_stop_safe(struct callout *c, int safe)
	+callout_stop(struct callout *c)
	{
	- struct callout_cpu cc, old_cc;
	- struct lock_class *class;
	- int direct, sq_locked, use_lock;
	+ /* get callback stopped, if any */
	+ return (callout_restart_async(c, NULL, NULL, NULL));
	+}

	- /*
	- * Some old subsystems don't hold Giant while running a callout_stop(),
	- * so just discard this check for the moment.
	- */
	- if (!safe && c->c_lock != NULL) {
	- if (c->c_lock == &Giant.lock_object)
	- use_lock = mtx_owned(&Giant);
	- else {
	- use_lock = 1;
	- class = LOCK_CLASS(c->c_lock);
	- class->lc_assert(c->c_lock, LA_XLOCKED);
	- }
	- } else
	- use_lock = 0;
	- direct = (c->c_flags & CALLOUT_DIRECT) != 0;
	- sq_locked = 0;
	- old_cc = NULL;
	-again:
	- cc = callout_lock(c);
	+static void
	+callout_drain_function(void *arg)
	+{
	+ wakeup(arg);
	+}

	- /*
	- * If the callout was migrating while the callout cpu lock was
	- * dropped, just drop the sleepqueue lock and check the states
	- * again.
	- */
	- if (sq_locked != 0 && cc != old_cc) {
	-#ifdef SMP
	- CC_UNLOCK(cc);
	- sleepq_release(&old_cc->cc_exec_entity[direct].cc_waiting);
	- sq_locked = 0;
	- old_cc = NULL;
	- goto again;
	-#else
	- panic("migration should not happen");
	-#endif
	- }
	+int
	+callout_drain_async(struct callout c, callout_func_t fn, void *arg)
	+{
	+ /* get callback stopped, if any */
	+ return (callout_restart_async(c, NULL, fn, arg) & 2);
	+}

	- /*
	- * If the callout isn't pending, it's not on the queue, so
	- * don't attempt to remove it from the queue. We can try to
	- * stop it by other means however.
	- */
	- if (!(c->c_flags & CALLOUT_PENDING)) {
	- c->c_flags &= ~CALLOUT_ACTIVE;
	+int
	+callout_drain(struct callout *c)
	+{
	+ int cancelled;

	+ WITNESS_WARN(WARN_GIANTOK \| WARN_SLEEPOK, NULL,
	+ "Draining callout");
	+
	+ callout_lock_client(c->c_flags, c->c_lock);
	+
	+ /* at this point the "c->c_cpu" field is not changing */
	+
	+ cancelled = callout_drain_async(c, &callout_drain_function, c);
	+
	+ if (cancelled != 0) {
	+ struct callout_cpu *cc;
	+ int direct;
	+
	+ CTR3(KTR_CALLOUT, "need to drain %p func %p arg %p",
	+ c, c->c_func, c->c_arg);
	+
	+ cc = callout_lock(c);
	+ direct = ((c->c_flags & CALLOUT_DIRECT) != 0);
	+
	/*
	- * If it wasn't on the queue and it isn't the current
	- * callout, then we can't stop it, so just bail.
	+ * We've gotten our callout CPU lock, it is safe to
	+ * drop the initial lock:
	*/
	- if (cc->cc_exec_entity[direct].cc_curr != c) {
	- CTR3(KTR_CALLOUT, "failed to stop %p func %p arg %p",
	- c, c->c_func, c->c_arg);
	- CC_UNLOCK(cc);
	- if (sq_locked)
	- sleepq_release(
	- &cc->cc_exec_entity[direct].cc_waiting);
	- return (0);
	- }
	+ callout_unlock_client(c->c_flags, c->c_lock);

	- if (safe) {
	- /*
	- * The current callout is running (or just
	- * about to run) and blocking is allowed, so
	- * just wait for the current invocation to
	- * finish.
	- */
	- while (cc->cc_exec_entity[direct].cc_curr == c) {
	- /*
	- * Use direct calls to sleepqueue interface
	- * instead of cv/msleep in order to avoid
	- * a LOR between cc_lock and sleepqueue
	- * chain spinlocks. This piece of code
	- * emulates a msleep_spin() call actually.
	- *
	- * If we already have the sleepqueue chain
	- * locked, then we can safely block. If we
	- * don't already have it locked, however,
	- * we have to drop the cc_lock to lock
	- * it. This opens several races, so we
	- * restart at the beginning once we have
	- * both locks. If nothing has changed, then
	- * we will end up back here with sq_locked
	- * set.
	- */
	- if (!sq_locked) {
	- CC_UNLOCK(cc);
	- sleepq_lock(
	- &cc->cc_exec_entity[direct].cc_waiting);
	- sq_locked = 1;
	- old_cc = cc;
	- goto again;
	- }
	+ /* Wait for drain to complete */

	- /*
	- * Migration could be cancelled here, but
	- * as long as it is still not sure when it
	- * will be packed up, just let softclock()
	- * take care of it.
	- */
	- cc->cc_exec_entity[direct].cc_waiting = true;
	- DROP_GIANT();
	- CC_UNLOCK(cc);
	- sleepq_add(
	- &cc->cc_exec_entity[direct].cc_waiting,
	- &cc->cc_lock.lock_object, "codrain",
	- SLEEPQ_SLEEP, 0);
	- sleepq_wait(
	- &cc->cc_exec_entity[direct].cc_waiting,
	- 0);
	- sq_locked = 0;
	- old_cc = NULL;
	+ while (cc->cc_exec_entity[direct].cc_curr == c)
	+ msleep_spin(c, (struct mtx *)&cc->cc_lock, "codrain", 0);

	- /* Reacquire locks previously released. */
	- PICKUP_GIANT();
	- CC_LOCK(cc);
	- }
	- } else if (use_lock &&
	- !cc->cc_exec_entity[direct].cc_cancel) {
	- /*
	- * The current callout is waiting for its
	- * lock which we hold. Cancel the callout
	- * and return. After our caller drops the
	- * lock, the callout will be skipped in
	- * softclock().
	- */
	- cc->cc_exec_entity[direct].cc_cancel = true;
	- CTR3(KTR_CALLOUT, "cancelled %p func %p arg %p",
	- c, c->c_func, c->c_arg);
	- KASSERT(!cc_cce_migrating(cc, direct),
	- ("callout wrongly scheduled for migration"));
	- CC_UNLOCK(cc);
	- KASSERT(!sq_locked, ("sleepqueue chain locked"));
	- return (1);
	- } else if ((c->c_flags & CALLOUT_DFRMIGRATION) != 0) {
	- c->c_flags &= ~CALLOUT_DFRMIGRATION;
	- CTR3(KTR_CALLOUT, "postponing stop %p func %p arg %p",
	- c, c->c_func, c->c_arg);
	- CC_UNLOCK(cc);
	- return (1);
	- }
	- CTR3(KTR_CALLOUT, "failed to stop %p func %p arg %p",
	- c, c->c_func, c->c_arg);
	CC_UNLOCK(cc);
	- KASSERT(!sq_locked, ("sleepqueue chain still locked"));
	- return (0);
	+ } else {
	+ callout_unlock_client(c->c_flags, c->c_lock);
	}
	- if (sq_locked)
	- sleepq_release(&cc->cc_exec_entity[direct].cc_waiting);

	- c->c_flags &= ~(CALLOUT_ACTIVE \| CALLOUT_PENDING);
	-
	CTR3(KTR_CALLOUT, "cancelled %p func %p arg %p",
	c, c->c_func, c->c_arg);
	- if ((c->c_flags & CALLOUT_PROCESSED) == 0) {
	- if (cc->cc_exec_next_dir == c)
	- cc->cc_exec_next_dir = LIST_NEXT(c, c_links.le);
	- LIST_REMOVE(c, c_links.le);
	- } else
	- TAILQ_REMOVE(&cc->cc_expireq, c, c_links.tqe);
	- callout_cc_del(c, cc);

	- CC_UNLOCK(cc);
	- return (1);
	+ return (cancelled & 1);
	}

	void
	callout_init(struct callout *c, int mpsafe)
	{
	- bzero(c, sizeof *c);
	if (mpsafe) {
	- c->c_lock = NULL;
	- c->c_flags = CALLOUT_RETURNUNLOCKED;
	+ _callout_init_lock(c, NULL, CALLOUT_RETURNUNLOCKED);
	} else {
	- c->c_lock = &Giant.lock_object;
	- c->c_flags = 0;
	+ _callout_init_lock(c, &Giant.lock_object, 0);
	}
	- c->c_cpu = timeout_cpu;
	}

	void
	@@ -1279,15 +1256,26 @@
	_callout_init_lock(struct callout c, struct lock_object lock, int flags)
	{
	bzero(c, sizeof *c);
	+ KASSERT((flags & ~CALLOUT_RETURNUNLOCKED) == 0,
	+ ("callout_init_lock: bad flags 0x%08x", flags));
	+ flags &= CALLOUT_RETURNUNLOCKED;
	+ if (lock != NULL) {
	+ struct lock_class *class = LOCK_CLASS(lock);
	+ if (class == &lock_class_mtx_sleep)
	+ flags \|= CALLOUT_SET_LC(CALLOUT_LC_MUTEX);
	+ else if (class == &lock_class_mtx_spin)
	+ flags \|= CALLOUT_SET_LC(CALLOUT_LC_SPIN);
	+ else if (class == &lock_class_rm)
	+ flags \|= CALLOUT_SET_LC(CALLOUT_LC_RM);
	+ else if (class == &lock_class_rw)
	+ flags \|= CALLOUT_SET_LC(CALLOUT_LC_RW);
	+ else
	+ panic("callout_init_lock: Unsupported lock class '%s'\n", class->lc_name);
	+ } else {
	+ flags \|= CALLOUT_SET_LC(CALLOUT_LC_UNUSED_0);
	+ }
	c->c_lock = lock;
	- KASSERT((flags & ~(CALLOUT_RETURNUNLOCKED \| CALLOUT_SHAREDLOCK)) == 0,
	- ("callout_init_lock: bad flags %d", flags));
	- KASSERT(lock != NULL \|\| (flags & CALLOUT_RETURNUNLOCKED) == 0,
	- ("callout_init_lock: CALLOUT_RETURNUNLOCKED with no lock"));
	- KASSERT(lock == NULL \|\| !(LOCK_CLASS(lock)->lc_flags &
	- (LC_SPINLOCK \| LC_SLEEPABLE)), ("%s: invalid lock class",
	- __func__));
	- c->c_flags = flags & (CALLOUT_RETURNUNLOCKED \| CALLOUT_SHAREDLOCK);
	+ c->c_flags = flags;
	c->c_cpu = timeout_cpu;
	}

	Index: sys/kern/subr_sleepqueue.c
	===================================================================
	--- sys/kern/subr_sleepqueue.c
	+++ sys/kern/subr_sleepqueue.c
	@@ -152,7 +152,8 @@
	*/
	static int sleepq_catch_signals(void *wchan, int pri);
	static int sleepq_check_signals(void);
	-static int sleepq_check_timeout(void);
	+static int sleepq_check_timeout(struct thread *);
	+static void sleepq_stop_timeout(struct thread *);
	#ifdef INVARIANTS
	static void sleepq_dtor(void mem, int size, void arg);
	#endif
	@@ -373,17 +374,14 @@
	sleepq_set_timeout_sbt(void *wchan, sbintime_t sbt, sbintime_t pr,
	int flags)
	{
	- struct sleepqueue_chain *sc;
	struct thread *td;

	td = curthread;
	- sc = SC_LOOKUP(wchan);
	- mtx_assert(&sc->sc_lock, MA_OWNED);
	- MPASS(TD_ON_SLEEPQ(td));
	- MPASS(td->td_sleepqueue == NULL);
	- MPASS(wchan != NULL);
	+
	+ mtx_lock_spin(&td->td_slpmutex);
	callout_reset_sbt_on(&td->td_slpcallout, sbt, pr,
	sleepq_timeout, td, PCPU_GET(cpuid), flags \| C_DIRECT_EXEC);
	+ mtx_unlock_spin(&td->td_slpmutex);
	}

	/*
	@@ -559,11 +557,8 @@
	* Check to see if we timed out.
	*/
	static int
	-sleepq_check_timeout(void)
	+sleepq_check_timeout(struct thread *td)
	{
	- struct thread *td;
	-
	- td = curthread;
	THREAD_LOCK_ASSERT(td, MA_OWNED);

	/*
	@@ -573,28 +568,21 @@
	td->td_flags &= ~TDF_TIMEOUT;
	return (EWOULDBLOCK);
	}
	-
	- /*
	- * If TDF_TIMOFAIL is set, the timeout ran after we had
	- * already been woken up.
	- */
	- if (td->td_flags & TDF_TIMOFAIL)
	- td->td_flags &= ~TDF_TIMOFAIL;
	-
	- /*
	- * If callout_stop() fails, then the timeout is running on
	- * another CPU, so synchronize with it to avoid having it
	- * accidentally wake up a subsequent sleep.
	- */
	- else if (callout_stop(&td->td_slpcallout) == 0) {
	- td->td_flags \|= TDF_TIMEOUT;
	- TD_SET_SLEEPING(td);
	- mi_switch(SW_INVOL \| SWT_SLEEPQTIMO, NULL);
	- }
	return (0);
	}

	/*
	+ * Atomically stop the timeout by using a mutex.
	+ */
	+static void
	+sleepq_stop_timeout(struct thread *td)
	+{
	+ mtx_lock_spin(&td->td_slpmutex);
	+ callout_stop(&td->td_slpcallout);
	+ mtx_unlock_spin(&td->td_slpmutex);
	+}
	+
	+/*
	* Check to see if we were awoken by a signal.
	*/
	static int
	@@ -664,9 +652,11 @@
	MPASS(!(td->td_flags & TDF_SINTR));
	thread_lock(td);
	sleepq_switch(wchan, pri);
	- rval = sleepq_check_timeout();
	+ rval = sleepq_check_timeout(td);
	thread_unlock(td);

	+ sleepq_stop_timeout(td);
	+
	return (rval);
	}

	@@ -677,12 +667,18 @@
	int
	sleepq_timedwait_sig(void *wchan, int pri)
	{
	+ struct thread *td;
	int rcatch, rvalt, rvals;

	+ td = curthread;
	+
	rcatch = sleepq_catch_signals(wchan, pri);
	- rvalt = sleepq_check_timeout();
	+ rvalt = sleepq_check_timeout(td);
	rvals = sleepq_check_signals();
	- thread_unlock(curthread);
	+ thread_unlock(td);
	+
	+ sleepq_stop_timeout(td);
	+
	if (rcatch)
	return (rcatch);
	if (rvals)
	@@ -889,64 +885,49 @@
	static void
	sleepq_timeout(void *arg)
	{
	- struct sleepqueue_chain *sc;
	- struct sleepqueue *sq;
	- struct thread *td;
	- void *wchan;
	- int wakeup_swapper;
	+ struct thread *td = arg;
	+ int wakeup_swapper = 0;

	- td = arg;
	- wakeup_swapper = 0;
	CTR3(KTR_PROC, "sleepq_timeout: thread %p (pid %ld, %s)",
	(void )td, (long)td->td_proc->p_pid, (void )td->td_name);

	- /*
	- * First, see if the thread is asleep and get the wait channel if
	- * it is.
	- */
	+ /* Handle the three cases which can happen */
	+
	thread_lock(td);
	- if (TD_IS_SLEEPING(td) && TD_ON_SLEEPQ(td)) {
	- wchan = td->td_wchan;
	- sc = SC_LOOKUP(wchan);
	- THREAD_LOCKPTR_ASSERT(td, &sc->sc_lock);
	- sq = sleepq_lookup(wchan);
	- MPASS(sq != NULL);
	- td->td_flags \|= TDF_TIMEOUT;
	- wakeup_swapper = sleepq_resume_thread(sq, td, 0);
	- thread_unlock(td);
	- if (wakeup_swapper)
	- kick_proc0();
	- return;
	- }
	+ if (TD_ON_SLEEPQ(td)) {
	+ if (TD_IS_SLEEPING(td)) {
	+ struct sleepqueue_chain *sc;
	+ struct sleepqueue *sq;
	+ void *wchan;

	- /*
	- * If the thread is on the SLEEPQ but isn't sleeping yet, it
	- * can either be on another CPU in between sleepq_add() and
	- * one of the sleepq_wait() routines or it can be in
	- * sleepq_catch_signals().
	- */
	- if (TD_ON_SLEEPQ(td)) {
	- td->td_flags \|= TDF_TIMEOUT;
	- thread_unlock(td);
	- return;
	+ /*
	+ * Case I - thread is asleep and needs to be
	+ * awoken:
	+ */
	+ wchan = td->td_wchan;
	+ sc = SC_LOOKUP(wchan);
	+ THREAD_LOCKPTR_ASSERT(td, &sc->sc_lock);
	+ sq = sleepq_lookup(wchan);
	+ MPASS(sq != NULL);
	+ td->td_flags \|= TDF_TIMEOUT;
	+ wakeup_swapper = sleepq_resume_thread(sq, td, 0);
	+ } else {
	+ /*
	+ * Case II - cancel going to sleep by setting
	+ * the timeout flag because the target thread
	+ * is not asleep yet. It can be on another CPU
	+ * in between sleepq_add() and one of the
	+ * sleepq_wait() routines or it can be in
	+ * sleepq_catch_signals().
	+ */
	+ td->td_flags \|= TDF_TIMEOUT;
	+ }
	+ } else {
	+ /*
	+ * Case III - thread is already woken up by a wakeup
	+ * call and should not timeout. Nothing to do!
	+ */
	}
	-
	- /*
	- * Now check for the edge cases. First, if TDF_TIMEOUT is set,
	- * then the other thread has already yielded to us, so clear
	- * the flag and resume it. If TDF_TIMEOUT is not set, then the
	- * we know that the other thread is not on a sleep queue, but it
	- * hasn't resumed execution yet. In that case, set TDF_TIMOFAIL
	- * to let it know that the timeout has already run and doesn't
	- * need to be canceled.
	- */
	- if (td->td_flags & TDF_TIMEOUT) {
	- MPASS(TD_IS_SLEEPING(td));
	- td->td_flags &= ~TDF_TIMEOUT;
	- TD_CLR_SLEEPING(td);
	- wakeup_swapper = setrunnable(td);
	- } else
	- td->td_flags \|= TDF_TIMOFAIL;
	thread_unlock(td);
	if (wakeup_swapper)
	kick_proc0();
	Index: sys/ofed/include/linux/completion.h
	===================================================================
	--- sys/ofed/include/linux/completion.h
	+++ sys/ofed/include/linux/completion.h
	@@ -105,7 +105,9 @@
	if (c->done)
	break;
	sleepq_add(c, NULL, "completion", flags, 0);
	+ sleepq_release(c);
	sleepq_set_timeout(c, end - ticks);
	+ sleepq_lock(c);
	if (flags & SLEEPQ_INTERRUPTIBLE) {
	if (sleepq_timedwait_sig(c, 0) != 0)
	return (-ERESTARTSYS);
	Index: sys/sys/_callout.h
	===================================================================
	--- sys/sys/_callout.h
	+++ sys/sys/_callout.h
	@@ -46,6 +46,17 @@
	SLIST_HEAD(callout_slist, callout);
	TAILQ_HEAD(callout_tailq, callout);

	+typedef void callout_func_t(void *);
	+
	+struct callout_args {
	+ sbintime_t time; /* absolute time for the event */
	+ sbintime_t precision; /* delta allowed wrt opt */
	+ void arg; / function argument */
	+ callout_func_t func; / function to call */
	+ int flags; /* flags passed to callout_reset() */
	+ int cpu; /* CPU we're scheduled on */
	+};
	+
	struct callout {
	union {
	LIST_ENTRY(callout) le;
	@@ -52,13 +63,13 @@
	SLIST_ENTRY(callout) sle;
	TAILQ_ENTRY(callout) tqe;
	} c_links;
	- sbintime_t c_time; /* ticks to the event */
	+ sbintime_t c_time; /* absolute time for the event */
	sbintime_t c_precision; /* delta allowed wrt opt */
	void c_arg; / function argument */
	- void (c_func)(void ); /* function to call */
	- struct lock_object c_lock; / lock to handle */
	+ callout_func_t c_func; / function to call */
	+ struct lock_object c_lock; / callback lock */
	int c_flags; /* state of this entry */
	- volatile int c_cpu; /* CPU we're scheduled on */
	+ int c_cpu; /* CPU we're scheduled on */
	};

	#endif
	Index: sys/sys/callout.h
	===================================================================
	--- sys/sys/callout.h
	+++ sys/sys/callout.h
	@@ -45,10 +45,12 @@
	#define CALLOUT_PENDING 0x0004 /* callout is waiting for timeout */
	#define CALLOUT_MPSAFE 0x0008 /* callout handler is mp safe */
	#define CALLOUT_RETURNUNLOCKED 0x0010 /* handler returns with mtx unlocked */
	-#define CALLOUT_SHAREDLOCK 0x0020 /* callout lock held in shared mode */
	-#define CALLOUT_DFRMIGRATION 0x0040 /* callout in deferred migration mode */
	+#define CALLOUT_UNUSED_5 0x0020 /* --available-- */
	+#define CALLOUT_DEFRESTART 0x0040 /* callout restart is deferred */
	#define CALLOUT_PROCESSED 0x0080 /* callout in wheel or processing list? */
	#define CALLOUT_DIRECT 0x0100 /* allow exec from hw int context */
	+#define CALLOUT_SET_LC(x) (((x) & 7) << 16) /* set lock class */
	+#define CALLOUT_GET_LC(x) (((x) >> 16) & 7) /* get lock class */

	#define C_DIRECT_EXEC 0x0001 /* direct execution of callout */
	#define C_PRELBITS 7
	@@ -65,7 +67,8 @@
	#ifdef _KERNEL
	#define callout_active(c) ((c)->c_flags & CALLOUT_ACTIVE)
	#define callout_deactivate(c) ((c)->c_flags &= ~CALLOUT_ACTIVE)
	-#define callout_drain(c) _callout_stop_safe(c, 1)
	+int callout_drain(struct callout *);
	+int callout_drain_async(struct callout , callout_func_t , void *);
	void callout_init(struct callout *, int);
	void _callout_init_lock(struct callout , struct lock_object , int);
	#define callout_init_mtx(c, mtx, flags) \
	@@ -79,7 +82,7 @@
	NULL, (flags))
	#define callout_pending(c) ((c)->c_flags & CALLOUT_PENDING)
	int callout_reset_sbt_on(struct callout *, sbintime_t, sbintime_t,
	- void ()(void ), void *, int, int);
	+ callout_func_t , void , int, int);
	#define callout_reset_sbt(c, sbt, pr, fn, arg, flags) \
	callout_reset_sbt_on((c), (sbt), (pr), (fn), (arg), (c)->c_cpu, (flags))
	#define callout_reset_sbt_curcpu(c, sbt, pr, fn, arg, flags) \
	@@ -103,8 +106,7 @@
	int callout_schedule_on(struct callout *, int, int);
	#define callout_schedule_curcpu(c, on_tick) \
	callout_schedule_on((c), (on_tick), PCPU_GET(cpuid))
	-#define callout_stop(c) _callout_stop_safe(c, 0)
	-int _callout_stop_safe(struct callout *, int);
	+int callout_stop(struct callout *);
	void callout_process(sbintime_t now);

	#endif
	Index: sys/sys/proc.h
	===================================================================
	--- sys/sys/proc.h
	+++ sys/sys/proc.h
	@@ -308,6 +308,7 @@
	} td_uretoff; /* (k) Syscall aux returns. */
	#define td_retval td_uretoff.tdu_retval
	struct callout td_slpcallout; /* (h) Callout for sleep. */
	+ struct mtx td_slpmutex; /* (h) Mutex for sleep callout */
	struct trapframe td_frame; / (k) */
	struct vm_object td_kstack_obj;/ (a) Kstack object. */
	vm_offset_t td_kstack; /* (a) Kernel VA of kstack. */
	@@ -364,7 +365,7 @@
	#define TDF_ALLPROCSUSP 0x00000200 /* suspended by SINGLE_ALLPROC */
	#define TDF_BOUNDARY 0x00000400 /* Thread suspended at user boundary */
	#define TDF_ASTPENDING 0x00000800 /* Thread has some asynchronous events. */
	-#define TDF_TIMOFAIL 0x00001000 /* Timeout from sleep after we were awake. */
	+#define TDF_UNUSED12 0x00001000 /* --available-- */
	#define TDF_SBDRY 0x00002000 /* Stop only on usermode boundary. */
	#define TDF_UPIBLOCKED 0x00004000 /* Thread blocked on user PI mutex. */
	#define TDF_NEEDSUSPCHK 0x00008000 /* Thread may need to suspend. */
	@@ -704,7 +705,7 @@
	#define SWT_OWEPREEMPT 2 /* Switching due to opepreempt. */
	#define SWT_TURNSTILE 3 /* Turnstile contention. */
	#define SWT_SLEEPQ 4 /* Sleepq wait. */
	-#define SWT_SLEEPQTIMO 5 /* Sleepq timeout wait. */
	+#define SWT_UNUSED5 5 /* --available-- */
	#define SWT_RELINQUISH 6 /* yield call. */
	#define SWT_NEEDRESCHED 7 /* NEEDRESCHED was set. */
	#define SWT_IDLE 8 /* Switching from the idle thread. */

File Metadata

Mime Type: text/plain
Expires: Thu, Jan 22, 11:07 AM (21 h, 1 m)
Storage Engine: blob
Storage Format: Raw Data
Storage Handle: 27843206
Default Alt Text: D1438.id2992.diff (52 KB)

D1438.id2992.diffNo OneTemporaryActions

D1438.id2992.diffView Options

File Metadata

Event Timeline

D1438.id2992.diff
No OneTemporary
Actions

D1438.id2992.diff
View Options