diff --git a/sys/kern/kern_timeout.c b/sys/kern/kern_timeout.c --- a/sys/kern/kern_timeout.c +++ b/sys/kern/kern_timeout.c @@ -52,14 +52,17 @@ #include #include #include +#include #include #include #include #include +#include #include #include #include #include +#include #ifdef DDB #include @@ -77,6 +80,8 @@ SDT_PROBE_DEFINE1(callout_execute, , , callout__start, "struct callout *"); SDT_PROBE_DEFINE1(callout_execute, , , callout__end, "struct callout *"); +static void softclock_thread(void *arg); + #ifdef CALLOUT_PROFILING static int avg_depth; SYSCTL_INT(_debug, OID_AUTO, to_avg_depth, CTLFLAG_RD, &avg_depth, 0, @@ -166,7 +171,7 @@ struct callout_tailq cc_expireq; sbintime_t cc_firstevent; sbintime_t cc_lastscan; - void *cc_cookie; + struct thread *cc_thread; u_int cc_bucket; u_int cc_inited; #ifdef KTR @@ -222,7 +227,7 @@ * relevant callout completes. * cc_cancel - Changing to 1 with both callout_lock and cc_lock held * guarantees that the current callout will not run. - * The softclock() function sets this to 0 before it + * The softclock_call_cc() function sets this to 0 before it * drops callout_lock to acquire c_lock, and it calls * the handler only if curr_cancelled is still 0 after * cc_lock is successfully acquired. @@ -316,7 +321,7 @@ { int i; - mtx_init(&cc->cc_lock, "callout", NULL, MTX_SPIN | MTX_RECURSE); + mtx_init(&cc->cc_lock, "callout", NULL, MTX_SPIN); cc->cc_inited = 1; cc->cc_callwheel = malloc_domainset(sizeof(struct callout_list) * callwheelsize, M_CALLOUT, @@ -369,28 +374,38 @@ static void start_softclock(void *dummy) { + struct proc *p; + struct thread *td; struct callout_cpu *cc; - char name[MAXCOMLEN]; - int cpu; + int cpu, error; bool pin_swi; - struct intr_event *ie; + p = NULL; CPU_FOREACH(cpu) { cc = CC_CPU(cpu); - snprintf(name, sizeof(name), "clock (%d)", cpu); - ie = NULL; - if (swi_add(&ie, name, softclock, cc, SWI_CLOCK, - INTR_MPSAFE, &cc->cc_cookie)) - panic("died while creating standard software ithreads"); + error = kproc_kthread_add(softclock_thread, cc, &p, &td, + RFSTOPPED, 0, "clock", "clock (%d)", cpu); + if (error != 0) + panic("failed to create softclock thread for cpu %d: %d", + cpu, error); + CC_LOCK(cc); + cc->cc_thread = td; + thread_lock(td); + sched_class(td, PRI_ITHD); + sched_prio(td, PI_SWI(SWI_CLOCK)); + TD_SET_IWAIT(td); + thread_lock_set(td, (struct mtx *)&cc->cc_lock); + thread_unlock(td); if (cpu == cc_default_cpu) pin_swi = pin_default_swi; else pin_swi = pin_pcpu_swi; - if (pin_swi && (intr_event_bind(ie, cpu) != 0)) { - printf("%s: %s clock couldn't be pinned to cpu %d\n", - __func__, - cpu == cc_default_cpu ? "default" : "per-cpu", - cpu); + if (pin_swi) { + error = cpuset_setithread(td->td_tid, cpu); + if (error != 0) + printf("%s: %s clock couldn't be pinned to cpu %d: %d\n", + __func__, cpu == cc_default_cpu ? + "default" : "per-cpu", cpu, error); } } } @@ -418,6 +433,7 @@ struct callout *tmp, *tmpn; struct callout_cpu *cc; struct callout_list *sc; + struct thread *td; sbintime_t first, last, max, tmp_max; uint32_t lookahead; u_int firstb, lastb, nowb; @@ -529,13 +545,15 @@ avg_mpcalls_dir += (mpcalls_dir * 1000 - avg_mpcalls_dir) >> 8; avg_lockcalls_dir += (lockcalls_dir * 1000 - avg_lockcalls_dir) >> 8; #endif - mtx_unlock_spin_flags(&cc->cc_lock, MTX_QUIET); - /* - * swi_sched acquires the thread lock, so we don't want to call it - * with cc_lock held; incorrect locking order. - */ - if (!TAILQ_EMPTY(&cc->cc_expireq)) - swi_sched(cc->cc_cookie, 0); + if (!TAILQ_EMPTY(&cc->cc_expireq)) { + td = cc->cc_thread; + if (TD_AWAITING_INTR(td)) { + TD_CLR_IWAIT(td); + sched_add(td, SRQ_INTR); + } else + mtx_unlock_spin_flags(&cc->cc_lock, MTX_QUIET); + } else + mtx_unlock_spin_flags(&cc->cc_lock, MTX_QUIET); } static struct callout_cpu * @@ -797,38 +815,57 @@ */ /* - * Software (low priority) clock interrupt. + * Software (low priority) clock interrupt thread handler. * Run periodic events from timeout queue. */ -void -softclock(void *arg) +static void +softclock_thread(void *arg) { + struct thread *td = curthread; struct callout_cpu *cc; struct callout *c; #ifdef CALLOUT_PROFILING - int depth = 0, gcalls = 0, lockcalls = 0, mpcalls = 0; + int depth, gcalls, lockcalls, mpcalls; #endif cc = (struct callout_cpu *)arg; CC_LOCK(cc); - while ((c = TAILQ_FIRST(&cc->cc_expireq)) != NULL) { - TAILQ_REMOVE(&cc->cc_expireq, c, c_links.tqe); - softclock_call_cc(c, cc, + for (;;) { + while (TAILQ_EMPTY(&cc->cc_expireq)) { + /* + * Use CC_LOCK(cc) as the thread_lock while + * idle. + */ + thread_lock(td); + thread_lock_set(td, (struct mtx *)&cc->cc_lock); + TD_SET_IWAIT(td); + mi_switch(SW_VOL | SWT_IWAIT); + + /* mi_switch() drops thread_lock(). */ + CC_LOCK(cc); + } + #ifdef CALLOUT_PROFILING - &mpcalls, &lockcalls, &gcalls, + depth = gcalls = lockcalls = mpcalls = 0; #endif - 0); + while ((c = TAILQ_FIRST(&cc->cc_expireq)) != NULL) { + TAILQ_REMOVE(&cc->cc_expireq, c, c_links.tqe); + softclock_call_cc(c, cc, #ifdef CALLOUT_PROFILING - ++depth; + &mpcalls, &lockcalls, &gcalls, #endif - } + 0); #ifdef CALLOUT_PROFILING - avg_depth += (depth * 1000 - avg_depth) >> 8; - avg_mpcalls += (mpcalls * 1000 - avg_mpcalls) >> 8; - avg_lockcalls += (lockcalls * 1000 - avg_lockcalls) >> 8; - avg_gcalls += (gcalls * 1000 - avg_gcalls) >> 8; + ++depth; #endif - CC_UNLOCK(cc); + } +#ifdef CALLOUT_PROFILING + avg_depth += (depth * 1000 - avg_depth) >> 8; + avg_mpcalls += (mpcalls * 1000 - avg_mpcalls) >> 8; + avg_lockcalls += (lockcalls * 1000 - avg_lockcalls) >> 8; + avg_gcalls += (gcalls * 1000 - avg_gcalls) >> 8; +#endif + } } void diff --git a/sys/sys/systm.h b/sys/sys/systm.h --- a/sys/sys/systm.h +++ b/sys/sys/systm.h @@ -471,7 +471,6 @@ void hardclock(int cnt, int usermode); void hardclock_sync(int cpu); -void softclock(void *); void statclock(int cnt, int usermode); void profclock(int cnt, int usermode, uintfptr_t pc);