diff --git a/sys/amd64/amd64/cpu_switch.S b/sys/amd64/amd64/cpu_switch.S --- a/sys/amd64/amd64/cpu_switch.S +++ b/sys/amd64/amd64/cpu_switch.S @@ -35,7 +35,6 @@ #include #include "assym.inc" -#include "opt_sched.h" /*****************************************************************************/ /* Scheduling */ @@ -136,13 +135,11 @@ movq %r15,TD_LOCK(%r13) /* Release the old thread */ sw1: leaq TD_MD_PCB(%r12),%r8 -#if defined(SCHED_ULE) movq $blocked_lock, %rdx movq TD_LOCK(%r12),%rcx cmpq %rcx, %rdx je sw1wait sw1cont: -#endif /* * At this point, we've switched address spaces and are ready * to load up the rest of the next context. @@ -492,7 +489,6 @@ END(resumectx) /* Wait for the new thread to become unblocked */ -#if defined(SCHED_ULE) sw1wait: 1: pause @@ -500,4 +496,3 @@ cmpq %rcx, %rdx je 1b jmp sw1cont -#endif diff --git a/sys/amd64/amd64/machdep.c b/sys/amd64/amd64/machdep.c --- a/sys/amd64/amd64/machdep.c +++ b/sys/amd64/amd64/machdep.c @@ -322,7 +322,6 @@ } SYSINIT(late_ifunc_resolve, SI_SUB_CPU, SI_ORDER_ANY, late_ifunc_resolve, NULL); - void cpu_setregs(void) { @@ -1353,6 +1352,8 @@ TUNABLE_INT_FETCH("hw.use_xsave", &use_xsave); } + sched_instance_select(); + link_elf_ireloc(); /* diff --git a/sys/amd64/conf/GENERIC b/sys/amd64/conf/GENERIC --- a/sys/amd64/conf/GENERIC +++ b/sys/amd64/conf/GENERIC @@ -24,6 +24,7 @@ makeoptions WITH_CTF=1 # Run ctfconvert(1) for DTrace support options SCHED_ULE # ULE scheduler +options SCHED_4BSD # Original 4.xBSD scheduler options NUMA # Non-Uniform Memory Architecture support options PREEMPTION # Enable kernel thread preemption options EXTERR_STRINGS diff --git a/sys/arm/arm/machdep.c b/sys/arm/arm/machdep.c --- a/sys/arm/arm/machdep.c +++ b/sys/arm/arm/machdep.c @@ -523,6 +523,9 @@ /* Do basic tuning, hz etc */ init_param1(); + sched_instance_select(); + /* link_elf_ireloc(); */ + /* * Allocate a page for the system page mapped to 0xffff0000 * This page will just contain the system vectors and can be diff --git a/sys/arm/arm/swtch-v6.S b/sys/arm/arm/swtch-v6.S --- a/sys/arm/arm/swtch-v6.S +++ b/sys/arm/arm/swtch-v6.S @@ -79,7 +79,6 @@ */ #include "assym.inc" -#include "opt_sched.h" #include #include @@ -432,11 +431,7 @@ * r11 = newtd */ -#if defined(SMP) && defined(SCHED_ULE) - /* - * 386 and amd64 do the blocked lock test only for SMP and SCHED_ULE - * QQQ: What does it mean in reality and why is it done? - */ +#if defined(SMP) ldr r6, =blocked_lock 1: ldr r3, [r11, #TD_LOCK] /* atomic write regular read */ diff --git a/sys/arm/include/ifunc.h b/sys/arm/include/ifunc.h new file mode 100644 --- /dev/null +++ b/sys/arm/include/ifunc.h @@ -0,0 +1,10 @@ +/* + * This file is in the public domain. + */ + +#ifndef __ARM_IFUNC_H +#define __ARM_IFUNC_H + +#define __DO_NOT_HAVE_SYS_IFUNCS 1 + +#endif diff --git a/sys/arm64/arm64/machdep.c b/sys/arm64/arm64/machdep.c --- a/sys/arm64/arm64/machdep.c +++ b/sys/arm64/arm64/machdep.c @@ -825,6 +825,7 @@ PCPU_SET(curthread, &thread0); PCPU_SET(midr, get_midr()); + sched_instance_select(); link_elf_ireloc(); #ifdef FDT try_load_dtb(); diff --git a/sys/arm64/arm64/swtch.S b/sys/arm64/arm64/swtch.S --- a/sys/arm64/arm64/swtch.S +++ b/sys/arm64/arm64/swtch.S @@ -31,7 +31,6 @@ #include "assym.inc" #include "opt_kstack_pages.h" -#include "opt_sched.h" #include @@ -197,7 +196,7 @@ * Release the old thread. */ stlr x2, [x0, #TD_LOCK] -#if defined(SCHED_ULE) && defined(SMP) +#if defined(SMP) /* Spin if TD_LOCK points to a blocked_lock */ ldr x2, =_C_LABEL(blocked_lock) 1: diff --git a/sys/conf/NOTES b/sys/conf/NOTES --- a/sys/conf/NOTES +++ b/sys/conf/NOTES @@ -210,7 +210,7 @@ # options SCHED_4BSD options SCHED_STATS -#options SCHED_ULE +options SCHED_ULE ##################################################################### # SMP OPTIONS: diff --git a/sys/conf/files b/sys/conf/files --- a/sys/conf/files +++ b/sys/conf/files @@ -3921,6 +3921,7 @@ kern/p1003_1b.c standard kern/posix4_mib.c standard kern/sched_4bsd.c optional sched_4bsd +kern/sched_shim.c standard kern/sched_ule.c optional sched_ule kern/serdev_if.m standard kern/stack_protector.c standard \ diff --git a/sys/i386/i386/machdep.c b/sys/i386/i386/machdep.c --- a/sys/i386/i386/machdep.c +++ b/sys/i386/i386/machdep.c @@ -1544,6 +1544,7 @@ /* Initialize preload_kmdp */ preload_initkmdp(!metadata_missing); + sched_instance_select(); link_elf_ireloc(); vm86_initialize(); diff --git a/sys/i386/i386/swtch.S b/sys/i386/i386/swtch.S --- a/sys/i386/i386/swtch.S +++ b/sys/i386/i386/swtch.S @@ -30,27 +30,11 @@ * SUCH DAMAGE. */ -#include "opt_sched.h" - #include #include "assym.inc" -#if defined(SMP) && defined(SCHED_ULE) -#define SETOP xchgl #define BLOCK_SPIN(reg) \ - movl $blocked_lock,%eax ; \ - 100: ; \ - lock ; \ - cmpxchgl %eax,TD_LOCK(reg) ; \ - jne 101f ; \ - pause ; \ - jmp 100b ; \ - 101: -#else -#define SETOP movl -#define BLOCK_SPIN(reg) -#endif /*****************************************************************************/ /* Scheduling */ @@ -162,7 +146,7 @@ /* Switchout td_lock */ movl %esi,%eax movl PCPU(CPUID),%esi - SETOP %eax,TD_LOCK(%edi) + xchgl %eax,TD_LOCK(%edi) /* Release bit from old pmap->pm_active */ movl PCPU(CURPMAP), %ebx @@ -181,7 +165,18 @@ #endif btsl %esi, PM_ACTIVE(%ebx) /* set new */ sw1: - BLOCK_SPIN(%ecx) +#ifdef SMP + movl $blocked_lock,%eax +100: + + lock + cmpxchgl %eax,TD_LOCK(reg) + jne 101f + pause + jmp 100b +101: +#endif + /* * At this point, we have managed thread locks and are ready * to load up the rest of the next context. diff --git a/sys/kern/sched_4bsd.c b/sys/kern/sched_4bsd.c --- a/sys/kern/sched_4bsd.c +++ b/sys/kern/sched_4bsd.c @@ -34,12 +34,10 @@ * SUCH DAMAGE. */ -#include #include "opt_hwpmc_hooks.h" #include "opt_hwt_hooks.h" #include "opt_sched.h" -#include #include #include #include @@ -68,12 +66,6 @@ #include #endif -#ifdef KDTRACE_HOOKS -#include -int __read_mostly dtrace_vtime_active; -dtrace_vtime_switch_func_t dtrace_vtime_switch_func; -#endif - /* * INVERSE_ESTCPU_WEIGHT is only suitable for statclock() frequencies in * the range 100-256 Hz (approximately). @@ -139,7 +131,6 @@ static void schedcpu(void); static void schedcpu_thread(void); static void sched_priority(struct thread *td, u_char prio); -static void sched_setup(void *dummy); static void maybe_resched(struct thread *td); static void updatepri(struct thread *td); static void resetpriority(struct thread *td); @@ -155,13 +146,12 @@ schedcpu_thread, NULL }; -SYSINIT(schedcpu, SI_SUB_LAST, SI_ORDER_FIRST, kproc_start, - &sched_kp); -SYSINIT(sched_setup, SI_SUB_RUN_QUEUE, SI_ORDER_FIRST, sched_setup, NULL); -static void sched_initticks(void *dummy); -SYSINIT(sched_initticks, SI_SUB_CLOCKS, SI_ORDER_THIRD, sched_initticks, - NULL); +static void +sched_4bsd_schedcpu(void) +{ + kproc_start(&sched_kp); +} /* * Global run queue. @@ -198,7 +188,7 @@ } static int -sysctl_kern_quantum(SYSCTL_HANDLER_ARGS) +sysctl_kern_4bsd_quantum(SYSCTL_HANDLER_ARGS) { int error, new_val, period; @@ -215,77 +205,58 @@ return (0); } -SYSCTL_NODE(_kern, OID_AUTO, sched, CTLFLAG_RD | CTLFLAG_MPSAFE, 0, - "Scheduler"); +SYSCTL_NODE(_kern_sched, OID_AUTO, 4bsd, CTLFLAG_RD | CTLFLAG_MPSAFE, 0, + "4BSD Scheduler"); -SYSCTL_STRING(_kern_sched, OID_AUTO, name, CTLFLAG_RD, "4BSD", 0, - "Scheduler name"); -SYSCTL_PROC(_kern_sched, OID_AUTO, quantum, +SYSCTL_PROC(_kern_sched_4bsd, OID_AUTO, quantum, CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE, NULL, 0, - sysctl_kern_quantum, "I", + sysctl_kern_4bsd_quantum, "I", "Quantum for timeshare threads in microseconds"); -SYSCTL_INT(_kern_sched, OID_AUTO, slice, CTLFLAG_RW, &sched_slice, 0, +SYSCTL_INT(_kern_sched_4bsd, OID_AUTO, slice, CTLFLAG_RW, &sched_slice, 0, "Quantum for timeshare threads in stathz ticks"); #ifdef SMP /* Enable forwarding of wakeups to all other cpus */ -static SYSCTL_NODE(_kern_sched, OID_AUTO, ipiwakeup, +static SYSCTL_NODE(_kern_sched_4bsd, OID_AUTO, ipiwakeup, CTLFLAG_RD | CTLFLAG_MPSAFE, NULL, "Kernel SMP"); static int runq_fuzz = 1; -SYSCTL_INT(_kern_sched, OID_AUTO, runq_fuzz, CTLFLAG_RW, &runq_fuzz, 0, ""); +SYSCTL_INT(_kern_sched_4bsd, OID_AUTO, runq_fuzz, CTLFLAG_RW, + &runq_fuzz, 0, ""); static int forward_wakeup_enabled = 1; -SYSCTL_INT(_kern_sched_ipiwakeup, OID_AUTO, enabled, CTLFLAG_RW, +SYSCTL_INT(_kern_sched_4bsd_ipiwakeup, OID_AUTO, enabled, CTLFLAG_RW, &forward_wakeup_enabled, 0, "Forwarding of wakeup to idle CPUs"); static int forward_wakeups_requested = 0; -SYSCTL_INT(_kern_sched_ipiwakeup, OID_AUTO, requested, CTLFLAG_RD, +SYSCTL_INT(_kern_sched_4bsd_ipiwakeup, OID_AUTO, requested, CTLFLAG_RD, &forward_wakeups_requested, 0, "Requests for Forwarding of wakeup to idle CPUs"); static int forward_wakeups_delivered = 0; -SYSCTL_INT(_kern_sched_ipiwakeup, OID_AUTO, delivered, CTLFLAG_RD, +SYSCTL_INT(_kern_sched_4bsd_ipiwakeup, OID_AUTO, delivered, CTLFLAG_RD, &forward_wakeups_delivered, 0, "Completed Forwarding of wakeup to idle CPUs"); static int forward_wakeup_use_mask = 1; -SYSCTL_INT(_kern_sched_ipiwakeup, OID_AUTO, usemask, CTLFLAG_RW, +SYSCTL_INT(_kern_sched_4bsd_ipiwakeup, OID_AUTO, usemask, CTLFLAG_RW, &forward_wakeup_use_mask, 0, "Use the mask of idle cpus"); static int forward_wakeup_use_loop = 0; -SYSCTL_INT(_kern_sched_ipiwakeup, OID_AUTO, useloop, CTLFLAG_RW, +SYSCTL_INT(_kern_sched_4bsd_ipiwakeup, OID_AUTO, useloop, CTLFLAG_RW, &forward_wakeup_use_loop, 0, "Use a loop to find idle cpus"); #endif #if 0 static int sched_followon = 0; -SYSCTL_INT(_kern_sched, OID_AUTO, followon, CTLFLAG_RW, +SYSCTL_INT(_kern_sched_4bsd, OID_AUTO, followon, CTLFLAG_RW, &sched_followon, 0, "allow threads to share a quantum"); #endif -SDT_PROVIDER_DEFINE(sched); - -SDT_PROBE_DEFINE3(sched, , , change__pri, "struct thread *", - "struct proc *", "uint8_t"); -SDT_PROBE_DEFINE3(sched, , , dequeue, "struct thread *", - "struct proc *", "void *"); -SDT_PROBE_DEFINE4(sched, , , enqueue, "struct thread *", - "struct proc *", "void *", "int"); -SDT_PROBE_DEFINE4(sched, , , lend__pri, "struct thread *", - "struct proc *", "uint8_t", "struct thread *"); -SDT_PROBE_DEFINE2(sched, , , load__change, "int", "int"); -SDT_PROBE_DEFINE2(sched, , , off__cpu, "struct thread *", - "struct proc *"); -SDT_PROBE_DEFINE(sched, , , on__cpu); -SDT_PROBE_DEFINE(sched, , , remain__cpu); -SDT_PROBE_DEFINE2(sched, , , surrender, "struct thread *", - "struct proc *"); - static __inline void sched_load_add(void) { @@ -322,7 +293,7 @@ * determines if the new thread should preempt the current thread. If so, * it sets td_owepreempt to request a preemption. */ -int +static int maybe_preempt(struct thread *td) { #ifdef PREEMPTION @@ -441,10 +412,7 @@ #define loadfactor(loadav) (2 * (loadav)) #define decay_cpu(loadfac, cpu) (((loadfac) * (cpu)) / ((loadfac) + FSCALE)) -/* decay 95% of `ts_pctcpu' in 60 seconds; see CCPU_SHIFT before changing */ -static fixpt_t ccpu = 0.95122942450071400909 * FSCALE; /* exp(-1/20) */ -SYSCTL_UINT(_kern, OID_AUTO, ccpu, CTLFLAG_RD, &ccpu, 0, - "Decay factor used for updating %CPU"); +extern fixpt_t ccpu; /* * If `ccpu' is not equal to `exp(-1/20)' and you still want to use the @@ -640,10 +608,14 @@ sched_prio(td, td->td_user_pri); } -/* ARGSUSED */ static void -sched_setup(void *dummy) +sched_4bsd_setup(void) { + /* + * Decay 95% of `ts_pctcpu' in 60 seconds; see CCPU_SHIFT + * before changing. + */ + ccpu = 0.95122942450071400909 * FSCALE; /* exp(-1/20) */ setup_runqs(); @@ -655,7 +627,7 @@ * This routine determines time constants after stathz and hz are setup. */ static void -sched_initticks(void *dummy) +sched_4bsd_initticks(void) { realstathz = stathz ? stathz : hz; @@ -672,8 +644,8 @@ * Called from: * proc0_init() */ -void -schedinit(void) +static void +sched_4bsd_init(void) { /* @@ -684,15 +656,15 @@ mtx_init(&sched_lock, "sched lock", NULL, MTX_SPIN); } -void -schedinit_ap(void) +static void +sched_4bsd_init_ap(void) { /* Nothing needed. */ } -bool -sched_runnable(void) +static bool +sched_4bsd_runnable(void) { #ifdef SMP return (runq_not_empty(&runq) || @@ -702,18 +674,14 @@ #endif } -int -sched_rr_interval(void) +static int +sched_4bsd_rr_interval(void) { /* Convert sched_slice from stathz to hz. */ return (imax(1, (sched_slice * hz + realstathz / 2) / realstathz)); } -SCHED_STAT_DEFINE(ithread_demotions, "Interrupt thread priority demotions"); -SCHED_STAT_DEFINE(ithread_preemptions, - "Interrupt thread preemptions due to time-sharing"); - /* * We adjust the priority of the current process. The priority of a * process gets worse as it accumulates CPU time. The cpu usage @@ -773,8 +741,8 @@ stat->idlecalls = 0; } -void -sched_clock(struct thread *td, int cnt) +static void +sched_4bsd_clock(struct thread *td, int cnt) { for ( ; cnt > 0; cnt--) @@ -784,8 +752,8 @@ /* * Charge child's scheduling CPU usage to parent. */ -void -sched_exit(struct proc *p, struct thread *td) +static void +sched_4bsd_exit(struct proc *p, struct thread *td) { KTR_STATE1(KTR_SCHED, "thread", sched_tdname(td), "proc exit", @@ -795,8 +763,8 @@ sched_exit_thread(FIRST_THREAD_IN_PROC(p), td); } -void -sched_exit_thread(struct thread *td, struct thread *child) +static void +sched_4bsd_exit_thread(struct thread *td, struct thread *child) { KTR_STATE1(KTR_SCHED, "thread", sched_tdname(child), "exit", @@ -811,14 +779,14 @@ thread_unlock(child); } -void -sched_fork(struct thread *td, struct thread *childtd) +static void +sched_4bsd_fork(struct thread *td, struct thread *childtd) { sched_fork_thread(td, childtd); } -void -sched_fork_thread(struct thread *td, struct thread *childtd) +static void +sched_4bsd_fork_thread(struct thread *td, struct thread *childtd) { struct td_sched *ts, *tsc; @@ -836,8 +804,8 @@ ts->ts_slice = 1; } -void -sched_nice(struct proc *p, int nice) +static void +sched_4bsd_nice(struct proc *p, int nice) { struct thread *td; @@ -851,8 +819,8 @@ } } -void -sched_class(struct thread *td, int class) +static void +sched_4bsd_class(struct thread *td, int class) { THREAD_LOCK_ASSERT(td, MA_OWNED); td->td_pri_class = class; @@ -890,8 +858,8 @@ * Update a thread's priority when it is lent another thread's * priority. */ -void -sched_lend_prio(struct thread *td, u_char prio) +static void +sched_4bsd_lend_prio(struct thread *td, u_char prio) { td->td_flags |= TDF_BORROWING; @@ -906,8 +874,8 @@ * important than prio the thread will keep a priority boost * of prio. */ -void -sched_unlend_prio(struct thread *td, u_char prio) +static void +sched_4bsd_unlend_prio(struct thread *td, u_char prio) { u_char base_pri; @@ -923,8 +891,8 @@ sched_lend_prio(td, prio); } -void -sched_prio(struct thread *td, u_char prio) +static void +sched_4bsd_prio(struct thread *td, u_char prio) { u_char oldprio; @@ -950,8 +918,8 @@ turnstile_adjust(td, oldprio); } -void -sched_ithread_prio(struct thread *td, u_char prio) +static void +sched_4bsd_ithread_prio(struct thread *td, u_char prio) { THREAD_LOCK_ASSERT(td, MA_OWNED); MPASS(td->td_pri_class == PRI_ITHD); @@ -959,8 +927,8 @@ sched_prio(td, prio); } -void -sched_user_prio(struct thread *td, u_char prio) +static void +sched_4bsd_user_prio(struct thread *td, u_char prio) { THREAD_LOCK_ASSERT(td, MA_OWNED); @@ -970,8 +938,8 @@ td->td_user_pri = prio; } -void -sched_lend_user_prio(struct thread *td, u_char prio) +static void +sched_4bsd_lend_user_prio(struct thread *td, u_char prio) { THREAD_LOCK_ASSERT(td, MA_OWNED); @@ -986,8 +954,8 @@ /* * Like the above but first check if there is anything to do. */ -void -sched_lend_user_prio_cond(struct thread *td, u_char prio) +static void +sched_4bsd_lend_user_prio_cond(struct thread *td, u_char prio) { if (td->td_lend_user_pri == prio) @@ -998,8 +966,8 @@ thread_unlock(td); } -void -sched_sleep(struct thread *td, int pri) +static void +sched_4bsd_sleep(struct thread *td, int pri) { THREAD_LOCK_ASSERT(td, MA_OWNED); @@ -1009,8 +977,8 @@ sched_prio(td, pri); } -void -sched_switch(struct thread *td, int flags) +static void +sched_4bsd_sswitch(struct thread *td, int flags) { struct thread *newtd; struct mtx *tmtx; @@ -1142,8 +1110,8 @@ mtx_unlock_spin(&sched_lock); } -void -sched_wakeup(struct thread *td, int srqflags) +static void +sched_4bsd_wakeup(struct thread *td, int srqflags) { struct td_sched *ts; @@ -1318,8 +1286,8 @@ } #endif -void -sched_add(struct thread *td, int flags) +static void +sched_4bsd_add(struct thread *td, int flags) #ifdef SMP { cpuset_t tidlemsk; @@ -1466,8 +1434,8 @@ } #endif /* SMP */ -void -sched_rem(struct thread *td) +static void +sched_4bsd_rem(struct thread *td) { struct td_sched *ts; @@ -1496,8 +1464,8 @@ * Select threads to run. Note that running threads still consume a * slot. */ -struct thread * -sched_choose(void) +static struct thread * +sched_4bsd_choose(void) { struct thread *td; struct runq *rq; @@ -1541,8 +1509,8 @@ return (PCPU_GET(idlethread)); } -void -sched_preempt(struct thread *td) +static void +sched_4bsd_preempt(struct thread *td) { int flags; @@ -1558,8 +1526,8 @@ } } -void -sched_userret_slowpath(struct thread *td) +static void +sched_4bsd_userret_slowpath(struct thread *td) { thread_lock(td); @@ -1568,8 +1536,8 @@ thread_unlock(td); } -void -sched_bind(struct thread *td, int cpu) +static void +sched_4bsd_bind(struct thread *td, int cpu) { #ifdef SMP struct td_sched *ts = td_get_sched(td); @@ -1589,48 +1557,48 @@ #endif } -void -sched_unbind(struct thread* td) +static void +sched_4bsd_unbind(struct thread* td) { THREAD_LOCK_ASSERT(td, MA_OWNED); KASSERT(td == curthread, ("sched_unbind: can only bind curthread")); td->td_flags &= ~TDF_BOUND; } -int -sched_is_bound(struct thread *td) +static int +sched_4bsd_is_bound(struct thread *td) { THREAD_LOCK_ASSERT(td, MA_OWNED); return (td->td_flags & TDF_BOUND); } -void -sched_relinquish(struct thread *td) +static void +sched_4bsd_relinquish(struct thread *td) { thread_lock(td); mi_switch(SW_VOL | SWT_RELINQUISH); } -int -sched_load(void) +static int +sched_4bsd_load(void) { return (sched_tdcnt); } -int -sched_sizeof_proc(void) +static int +sched_4bsd_sizeof_proc(void) { return (sizeof(struct proc)); } -int -sched_sizeof_thread(void) +static int +sched_4bsd_sizeof_thread(void) { return (sizeof(struct thread) + sizeof(struct td_sched)); } -fixpt_t -sched_pctcpu(struct thread *td) +static fixpt_t +sched_4bsd_pctcpu(struct thread *td) { struct td_sched *ts; @@ -1639,42 +1607,8 @@ return (ts->ts_pctcpu); } -#ifdef RACCT -/* - * Calculates the contribution to the thread cpu usage for the latest - * (unfinished) second. - */ -fixpt_t -sched_pctcpu_delta(struct thread *td) -{ - struct td_sched *ts; - fixpt_t delta; - int realstathz; - - THREAD_LOCK_ASSERT(td, MA_OWNED); - ts = td_get_sched(td); - delta = 0; - realstathz = stathz ? stathz : hz; - if (ts->ts_cpticks != 0) { -#if (FSHIFT >= CCPU_SHIFT) - delta = (realstathz == 100) - ? ((fixpt_t) ts->ts_cpticks) << - (FSHIFT - CCPU_SHIFT) : - 100 * (((fixpt_t) ts->ts_cpticks) - << (FSHIFT - CCPU_SHIFT)) / realstathz; -#else - delta = ((FSCALE - ccpu) * - (ts->ts_cpticks * - FSCALE / realstathz)) >> FSHIFT; -#endif - } - - return (delta); -} -#endif - -u_int -sched_estcpu(struct thread *td) +static u_int +sched_4bsd_estcpu(struct thread *td) { return (td_get_sched(td)->ts_estcpu); @@ -1683,8 +1617,8 @@ /* * The actual idle process. */ -void -sched_idletd(void *dummy) +static void +sched_4bsd_idletd(void *dummy) { struct pcpuidlestat *stat; @@ -1725,8 +1659,8 @@ /* * A CPU is entering for the first time. */ -void -sched_ap_entry(void) +static void +sched_4bsd_ap_entry(void) { /* @@ -1749,8 +1683,8 @@ /* * A thread is exiting. */ -void -sched_throw(struct thread *td) +static void +sched_4bsd_throw(struct thread *td) { MPASS(td != NULL); @@ -1763,8 +1697,8 @@ sched_throw_tail(td); } -void -sched_fork_exit(struct thread *td) +static void +sched_4bsd_fork_exit(struct thread *td) { /* @@ -1782,8 +1716,8 @@ SDT_PROBE0(sched, , , on__cpu); } -char * -sched_tdname(struct thread *td) +static char * +sched_4bsd_tdname(struct thread *td) { #ifdef KTR struct td_sched *ts; @@ -1798,19 +1732,19 @@ #endif } -#ifdef KTR -void -sched_clear_tdname(struct thread *td) +static void +sched_4bsd_clear_tdname(struct thread *td) { +#ifdef KTR struct td_sched *ts; ts = td_get_sched(td); ts->ts_name[0] = '\0'; -} #endif +} -void -sched_affinity(struct thread *td) +static void +sched_4bsd_affinity(struct thread *td) { #ifdef SMP struct td_sched *ts; @@ -1872,3 +1806,83 @@ } #endif } + +static bool +sched_4bsd_do_timer_accounting(void) +{ +#ifdef SMP + /* + * Don't do any accounting for the disabled HTT cores, since it + * will provide misleading numbers for the userland. + * + * No locking is necessary here, since even if we lose the race + * when hlt_cpus_mask changes it is not a big deal, really. + * + * Don't do that for ULE, since ULE doesn't consider hlt_cpus_mask + * and unlike other schedulers it actually schedules threads to + * those CPUs. + */ + return (!CPU_ISSET(PCPU_GET(cpuid), &hlt_cpus_mask)); +#else + return (true); +#endif +} + +static int +sched_4bsd_find_l2_neighbor(int cpu) +{ + return (-1); +} + +struct sched_instance sched_4bsd_instance = { +#define SLOT(name) .name = sched_4bsd_##name + SLOT(load), + SLOT(rr_interval), + SLOT(runnable), + SLOT(exit), + SLOT(fork), + SLOT(fork_exit), + SLOT(class), + SLOT(nice), + SLOT(ap_entry), + SLOT(exit_thread), + SLOT(estcpu), + SLOT(fork_thread), + SLOT(ithread_prio), + SLOT(lend_prio), + SLOT(lend_user_prio), + SLOT(lend_user_prio_cond), + SLOT(pctcpu), + SLOT(prio), + SLOT(sleep), + SLOT(sswitch), + SLOT(throw), + SLOT(unlend_prio), + SLOT(user_prio), + SLOT(userret_slowpath), + SLOT(add), + SLOT(choose), + SLOT(clock), + SLOT(idletd), + SLOT(preempt), + SLOT(relinquish), + SLOT(rem), + SLOT(wakeup), + SLOT(bind), + SLOT(unbind), + SLOT(is_bound), + SLOT(affinity), + SLOT(sizeof_proc), + SLOT(sizeof_thread), + SLOT(tdname), + SLOT(clear_tdname), + SLOT(do_timer_accounting), + SLOT(find_l2_neighbor), + SLOT(init), + SLOT(init_ap), + SLOT(setup), + SLOT(initticks), + SLOT(schedcpu), +#undef SLOT +}; +DECLARE_SCHEDULER(fourbsd_sched_selector, "4BSD", &sched_4bsd_instance); diff --git a/sys/kern/sched_shim.c b/sys/kern/sched_shim.c new file mode 100644 --- /dev/null +++ b/sys/kern/sched_shim.c @@ -0,0 +1,234 @@ +/* + * Copyright 2026 The FreeBSD Foundation + * + * SPDX-License-Identifier: BSD-2-Clause + * + * This software was developed by Konstantin Belousov + * under sponsorship from the FreeBSD Foundation. + */ + +#include "opt_sched.h" + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +const struct sched_instance *active_sched; + +#ifndef __DO_NOT_HAVE_SYS_IFUNCS +#define __DEFINE_SHIM(__m, __r, __n, __p, __a) \ + DEFINE_IFUNC(, __r, __n, __p) \ + { \ + return (active_sched->__m); \ + } +#else +#define __DEFINE_SHIM(__m, __r, __n, __p, __a) \ + __r \ + __n __p \ + { \ + return (active_sched->__m __a); \ + } +#endif +#define DEFINE_SHIM0(__m, __r, __n) \ + __DEFINE_SHIM(__m, __r, __n, (void), ()) +#define DEFINE_SHIM1(__m, __r, __n, __t1, __a1) \ + __DEFINE_SHIM(__m, __r, __n, (__t1 __a1), (__a1)) +#define DEFINE_SHIM2(__m, __r, __n, __t1, __a1, __t2, __a2) \ + __DEFINE_SHIM(__m, __r, __n, (__t1 __a1, __t2 __a2), (__a1, __a2)) + +DEFINE_SHIM0(load, int, sched_load) +DEFINE_SHIM0(rr_interval, int, sched_rr_interval) +DEFINE_SHIM0(runnable, bool, sched_runnable) +DEFINE_SHIM2(exit, void, sched_exit, struct proc *, p, + struct thread *, childtd) +DEFINE_SHIM2(fork, void, sched_fork, struct thread *, td, + struct thread *, childtd) +DEFINE_SHIM1(fork_exit, void, sched_fork_exit, struct thread *, td) +DEFINE_SHIM2(class, void, sched_class, struct thread *, td, int, class) +DEFINE_SHIM2(nice, void, sched_nice, struct proc *, p, int, nice) +DEFINE_SHIM0(ap_entry, void, sched_ap_entry) +DEFINE_SHIM2(exit_thread, void, sched_exit_thread, struct thread *, td, + struct thread *, child) +DEFINE_SHIM1(estcpu, u_int, sched_estcpu, struct thread *, td) +DEFINE_SHIM2(fork_thread, void, sched_fork_thread, struct thread *, td, + struct thread *, child) +DEFINE_SHIM2(ithread_prio, void, sched_ithread_prio, struct thread *, td, + u_char, prio) +DEFINE_SHIM2(lend_prio, void, sched_lend_prio, struct thread *, td, + u_char, prio) +DEFINE_SHIM2(lend_user_prio, void, sched_lend_user_prio, struct thread *, td, + u_char, pri) +DEFINE_SHIM2(lend_user_prio_cond, void, sched_lend_user_prio_cond, + struct thread *, td, u_char, pri) +DEFINE_SHIM1(pctcpu, fixpt_t, sched_pctcpu, struct thread *, td) +DEFINE_SHIM2(prio, void, sched_prio, struct thread *, td, u_char, prio) +DEFINE_SHIM2(sleep, void, sched_sleep, struct thread *, td, int, prio) +DEFINE_SHIM2(sswitch, void, sched_switch, struct thread *, td, int, flags) +DEFINE_SHIM1(throw, void, sched_throw, struct thread *, td) +DEFINE_SHIM2(unlend_prio, void, sched_unlend_prio, struct thread *, td, + u_char, prio) +DEFINE_SHIM2(user_prio, void, sched_user_prio, struct thread *, td, + u_char, prio) +DEFINE_SHIM1(userret_slowpath, void, sched_userret_slowpath, + struct thread *, td) +DEFINE_SHIM2(add, void, sched_add, struct thread *, td, int, flags) +DEFINE_SHIM0(choose, struct thread *, sched_choose) +DEFINE_SHIM2(clock, void, sched_clock, struct thread *, td, int, cnt) +DEFINE_SHIM1(idletd, void, sched_idletd, void *, dummy) +DEFINE_SHIM1(preempt, void, sched_preempt, struct thread *, td) +DEFINE_SHIM1(relinquish, void, sched_relinquish, struct thread *, td) +DEFINE_SHIM1(rem, void, sched_rem, struct thread *, td) +DEFINE_SHIM2(wakeup, void, sched_wakeup, struct thread *, td, int, srqflags) +DEFINE_SHIM2(bind, void, sched_bind, struct thread *, td, int, cpu) +DEFINE_SHIM1(unbind, void, sched_unbind, struct thread *, td) +DEFINE_SHIM1(is_bound, int, sched_is_bound, struct thread *, td) +DEFINE_SHIM1(affinity, void, sched_affinity, struct thread *, td) +DEFINE_SHIM0(sizeof_proc, int, sched_sizeof_proc) +DEFINE_SHIM0(sizeof_thread, int, sched_sizeof_thread) +DEFINE_SHIM1(tdname, char *, sched_tdname, struct thread *, td) +DEFINE_SHIM1(clear_tdname, void, sched_clear_tdname, struct thread *, td) +DEFINE_SHIM0(do_timer_accounting, bool, sched_do_timer_accounting) +DEFINE_SHIM1(find_l2_neighbor, int, sched_find_l2_neighbor, int, cpu) +DEFINE_SHIM0(init_ap, void, schedinit_ap) + + +SCHED_STAT_DEFINE(ithread_demotions, "Interrupt thread priority demotions"); +SCHED_STAT_DEFINE(ithread_preemptions, + "Interrupt thread preemptions due to time-sharing"); + +SDT_PROVIDER_DEFINE(sched); + +SDT_PROBE_DEFINE3(sched, , , change__pri, "struct thread *", + "struct proc *", "uint8_t"); +SDT_PROBE_DEFINE3(sched, , , dequeue, "struct thread *", + "struct proc *", "void *"); +SDT_PROBE_DEFINE4(sched, , , enqueue, "struct thread *", + "struct proc *", "void *", "int"); +SDT_PROBE_DEFINE4(sched, , , lend__pri, "struct thread *", + "struct proc *", "uint8_t", "struct thread *"); +SDT_PROBE_DEFINE2(sched, , , load__change, "int", "int"); +SDT_PROBE_DEFINE2(sched, , , off__cpu, "struct thread *", + "struct proc *"); +SDT_PROBE_DEFINE(sched, , , on__cpu); +SDT_PROBE_DEFINE(sched, , , remain__cpu); +SDT_PROBE_DEFINE2(sched, , , surrender, "struct thread *", + "struct proc *"); + +#ifdef KDTRACE_HOOKS +#include +int __read_mostly dtrace_vtime_active; +dtrace_vtime_switch_func_t dtrace_vtime_switch_func; +#endif + +static char sched_name[32] = "ULE"; + +SET_DECLARE(sched_instance_set, struct sched_selection); + +void +sched_instance_select(void) +{ + struct sched_selection *s, **ss; + int i; + + TUNABLE_STR_FETCH("kern.sched.name", sched_name, sizeof(sched_name)); + SET_FOREACH(ss, sched_instance_set) { + s = *ss; + for (i = 0; s->name[i] == sched_name[i]; i++) { + if (s->name[i] == '\0') { + active_sched = s->instance; + return; + } + } + } + + /* + * No scheduler matching the configuration was found. If + * there is any scheduler compiled in, at all, use the first + * scheduler from the linker set. + */ + if (SET_BEGIN(sched_instance_set) < SET_LIMIT(sched_instance_set)) { + s = *SET_BEGIN(sched_instance_set); + active_sched = s->instance; + for (i = 0;; i++) { + sched_name[i] = s->name[i]; + if (s->name[i] == '\0') + break; + } + } +} + +void +schedinit(void) +{ + if (active_sched == NULL) + panic("Cannot find scheduler %s", sched_name); + active_sched->init(); +} + +static void +sched_setup(void *dummy) +{ + active_sched->setup(); +} +SYSINIT(sched_setup, SI_SUB_RUN_QUEUE, SI_ORDER_FIRST, sched_setup, NULL); + +static void +sched_initticks(void *dummy) +{ + active_sched->initticks(); +} +SYSINIT(sched_initticks, SI_SUB_CLOCKS, SI_ORDER_THIRD, sched_initticks, + NULL); + +static void +sched_schedcpu(void) +{ + active_sched->schedcpu(); +} +SYSINIT(schedcpu, SI_SUB_LAST, SI_ORDER_FIRST, sched_schedcpu, NULL); + +SYSCTL_NODE(_kern, OID_AUTO, sched, CTLFLAG_RD | CTLFLAG_MPSAFE, 0, + "Scheduler"); + +SYSCTL_STRING(_kern_sched, OID_AUTO, name, CTLFLAG_RD, sched_name, 0, + "Scheduler name"); + +static int +sysctl_kern_sched_available(SYSCTL_HANDLER_ARGS) +{ + struct sched_selection *s, **ss; + struct sbuf *sb, sm; + int error; + bool first; + + sb = sbuf_new_for_sysctl(&sm, NULL, 0, req); + if (sb == NULL) + return (ENOMEM); + first = true; + SET_FOREACH(ss, sched_instance_set) { + s = *ss; + if (first) + first = false; + else + sbuf_cat(sb, ","); + sbuf_cat(sb, s->name); + } + error = sbuf_finish(sb); + sbuf_delete(sb); + return (error); +} + +SYSCTL_PROC(_kern_sched, OID_AUTO, available, + CTLTYPE_STRING | CTLFLAG_RD | CTLFLAG_MPSAFE, + NULL, 0, sysctl_kern_sched_available, "A", + "List of available schedulers"); + +fixpt_t ccpu; +SYSCTL_UINT(_kern, OID_AUTO, ccpu, CTLFLAG_RD, &ccpu, 0, + "Decay factor used for updating %CPU"); diff --git a/sys/kern/sched_ule.c b/sys/kern/sched_ule.c --- a/sys/kern/sched_ule.c +++ b/sys/kern/sched_ule.c @@ -37,12 +37,10 @@ * Isilon Systems and a general lack of creativity on the part of the author. */ -#include #include "opt_hwpmc_hooks.h" #include "opt_hwt_hooks.h" #include "opt_sched.h" -#include #include #include #include @@ -74,12 +72,6 @@ #include #endif -#ifdef KDTRACE_HOOKS -#include -int __read_mostly dtrace_vtime_active; -dtrace_vtime_switch_func_t dtrace_vtime_switch_func; -#endif - #include #include @@ -406,36 +398,11 @@ static bool sched_balance_pair(struct tdq *, struct tdq *); static inline struct tdq *sched_setcpu(struct thread *, int, int); static inline void thread_unblock_switch(struct thread *, struct mtx *); -static int sysctl_kern_sched_topology_spec(SYSCTL_HANDLER_ARGS); -static int sysctl_kern_sched_topology_spec_internal(struct sbuf *sb, +static int sysctl_kern_sched_ule_topology_spec(SYSCTL_HANDLER_ARGS); +static int sysctl_kern_sched_ule_topology_spec_internal(struct sbuf *sb, struct cpu_group *cg, int indent); #endif -static void sched_setup(void *dummy); -SYSINIT(sched_setup, SI_SUB_RUN_QUEUE, SI_ORDER_FIRST, sched_setup, NULL); - -static void sched_initticks(void *dummy); -SYSINIT(sched_initticks, SI_SUB_CLOCKS, SI_ORDER_THIRD, sched_initticks, - NULL); - -SDT_PROVIDER_DEFINE(sched); - -SDT_PROBE_DEFINE3(sched, , , change__pri, "struct thread *", - "struct proc *", "uint8_t"); -SDT_PROBE_DEFINE3(sched, , , dequeue, "struct thread *", - "struct proc *", "void *"); -SDT_PROBE_DEFINE4(sched, , , enqueue, "struct thread *", - "struct proc *", "void *", "int"); -SDT_PROBE_DEFINE4(sched, , , lend__pri, "struct thread *", - "struct proc *", "uint8_t", "struct thread *"); -SDT_PROBE_DEFINE2(sched, , , load__change, "int", "int"); -SDT_PROBE_DEFINE2(sched, , , off__cpu, "struct thread *", - "struct proc *"); -SDT_PROBE_DEFINE(sched, , , on__cpu); -SDT_PROBE_DEFINE(sched, , , remain__cpu); -SDT_PROBE_DEFINE2(sched, , , surrender, "struct thread *", - "struct proc *"); - /* * Print the threads waiting on a run-queue. */ @@ -1642,7 +1609,7 @@ * information. */ static void -sched_setup(void *dummy) +sched_ule_setup(void) { struct tdq *tdq; @@ -1667,7 +1634,7 @@ */ /* ARGSUSED */ static void -sched_initticks(void *dummy) +sched_ule_initticks(void) { int incr; @@ -1891,8 +1858,8 @@ /* * Called from proc0_init() to setup the scheduler fields. */ -void -schedinit(void) +static void +sched_ule_init(void) { struct td_sched *ts0; @@ -1916,8 +1883,8 @@ * TDQ_SELF() relies on the below sched pcpu setting; it may be used only * after schedinit_ap(). */ -void -schedinit_ap(void) +static void +sched_ule_init_ap(void) { #ifdef SMP @@ -1931,8 +1898,8 @@ * priority they will switch when their slices run out, which will be * at most sched_slice stathz ticks. */ -int -sched_rr_interval(void) +static int +sched_ule_rr_interval(void) { /* Convert sched_slice from stathz to hz. */ @@ -2051,8 +2018,8 @@ * Update a thread's priority when it is lent another thread's * priority. */ -void -sched_lend_prio(struct thread *td, u_char prio) +static void +sched_ule_lend_prio(struct thread *td, u_char prio) { td->td_flags |= TDF_BORROWING; @@ -2067,8 +2034,8 @@ * important than prio, the thread will keep a priority boost * of prio. */ -void -sched_unlend_prio(struct thread *td, u_char prio) +static void +sched_ule_unlend_prio(struct thread *td, u_char prio) { u_char base_pri; @@ -2087,8 +2054,8 @@ /* * Standard entry for setting the priority to an absolute value. */ -void -sched_prio(struct thread *td, u_char prio) +static void +sched_ule_prio(struct thread *td, u_char prio) { u_char oldprio; @@ -2117,8 +2084,8 @@ /* * Set the base interrupt thread priority. */ -void -sched_ithread_prio(struct thread *td, u_char prio) +static void +sched_ule_ithread_prio(struct thread *td, u_char prio) { THREAD_LOCK_ASSERT(td, MA_OWNED); MPASS(td->td_pri_class == PRI_ITHD); @@ -2129,8 +2096,8 @@ /* * Set the base user priority, does not effect current running priority. */ -void -sched_user_prio(struct thread *td, u_char prio) +static void +sched_ule_user_prio(struct thread *td, u_char prio) { td->td_base_user_pri = prio; @@ -2139,8 +2106,8 @@ td->td_user_pri = prio; } -void -sched_lend_user_prio(struct thread *td, u_char prio) +static void +sched_ule_lend_user_prio(struct thread *td, u_char prio) { THREAD_LOCK_ASSERT(td, MA_OWNED); @@ -2155,8 +2122,8 @@ /* * Like the above but first check if there is anything to do. */ -void -sched_lend_user_prio_cond(struct thread *td, u_char prio) +static void +sched_ule_lend_user_prio_cond(struct thread *td, u_char prio) { if (td->td_lend_user_pri == prio) @@ -2327,8 +2294,8 @@ * migrating a thread from one queue to another as running threads may * be assigned elsewhere via binding. */ -void -sched_switch(struct thread *td, int flags) +static void +sched_ule_sswitch(struct thread *td, int flags) { struct thread *newtd; struct tdq *tdq; @@ -2466,8 +2433,8 @@ /* * Adjust thread priorities as a result of a nice request. */ -void -sched_nice(struct proc *p, int nice) +static void +sched_ule_nice(struct proc *p, int nice) { struct thread *td; @@ -2485,8 +2452,8 @@ /* * Record the sleep time for the interactivity scorer. */ -void -sched_sleep(struct thread *td, int prio) +static void +sched_ule_sleep(struct thread *td, int prio) { THREAD_LOCK_ASSERT(td, MA_OWNED); @@ -2506,8 +2473,8 @@ * * Requires the thread lock on entry, drops on exit. */ -void -sched_wakeup(struct thread *td, int srqflags) +static void +sched_ule_wakeup(struct thread *td, int srqflags) { struct td_sched *ts; int slptick; @@ -2546,8 +2513,8 @@ * Penalize the parent for creating a new child and initialize the child's * priority. */ -void -sched_fork(struct thread *td, struct thread *child) +static void +sched_ule_fork(struct thread *td, struct thread *child) { THREAD_LOCK_ASSERT(td, MA_OWNED); sched_pctcpu_update(td_get_sched(td), 1); @@ -2565,8 +2532,8 @@ /* * Fork a new thread, may be within the same process. */ -void -sched_fork_thread(struct thread *td, struct thread *child) +static void +sched_ule_fork_thread(struct thread *td, struct thread *child) { struct td_sched *ts; struct td_sched *ts2; @@ -2611,8 +2578,8 @@ /* * Adjust the priority class of a thread. */ -void -sched_class(struct thread *td, int class) +static void +sched_ule_class(struct thread *td, int class) { THREAD_LOCK_ASSERT(td, MA_OWNED); @@ -2624,8 +2591,8 @@ /* * Return some of the child's priority and interactivity to the parent. */ -void -sched_exit(struct proc *p, struct thread *child) +static void +sched_ule_exit(struct proc *p, struct thread *child) { struct thread *td; @@ -2642,8 +2609,8 @@ * jobs such as make. This has little effect on the make process itself but * causes new processes spawned by it to receive worse scores immediately. */ -void -sched_exit_thread(struct thread *td, struct thread *child) +static void +sched_ule_exit_thread(struct thread *td, struct thread *child) { KTR_STATE1(KTR_SCHED, "thread", sched_tdname(child), "thread exit", @@ -2660,8 +2627,8 @@ thread_unlock(td); } -void -sched_preempt(struct thread *td) +static void +sched_ule_preempt(struct thread *td) { struct tdq *tdq; int flags; @@ -2691,8 +2658,8 @@ * Fix priorities on return to user-space. Priorities may be elevated due * to static priorities in msleep() or similar. */ -void -sched_userret_slowpath(struct thread *td) +static void +sched_ule_userret_slowpath(struct thread *td) { thread_lock(td); @@ -2702,10 +2669,6 @@ thread_unlock(td); } -SCHED_STAT_DEFINE(ithread_demotions, "Interrupt thread priority demotions"); -SCHED_STAT_DEFINE(ithread_preemptions, - "Interrupt thread preemptions due to time-sharing"); - /* * Return time slice for a given thread. For ithreads this is * sched_slice. For other threads it is tdq_slice(tdq). @@ -2722,8 +2685,8 @@ * Handle a stathz tick. This is really only relevant for timeshare * and interrupt threads. */ -void -sched_clock(struct thread *td, int cnt) +static void +sched_ule_clock(struct thread *td, int cnt) { struct tdq *tdq; struct td_sched *ts; @@ -2808,8 +2771,8 @@ } } -u_int -sched_estcpu(struct thread *td __unused) +static u_int +sched_ule_estcpu(struct thread *td __unused) { return (0); @@ -2819,8 +2782,8 @@ * Return whether the current CPU has runnable tasks. Used for in-kernel * cooperative idle threads. */ -bool -sched_runnable(void) +static bool +sched_ule_runnable(void) { struct tdq *tdq; @@ -2832,8 +2795,8 @@ * Choose the highest priority thread to run. The thread is removed from * the run-queue while running however the load remains. */ -struct thread * -sched_choose(void) +static struct thread * +sched_ule_choose(void) { struct thread *td; struct tdq *tdq; @@ -2909,8 +2872,8 @@ * * Requires the thread lock on entry, drops on exit. */ -void -sched_add(struct thread *td, int flags) +static void +sched_ule_add(struct thread *td, int flags) { struct tdq *tdq; #ifdef SMP @@ -2969,8 +2932,8 @@ * when we're stealing a thread from a remote queue. Otherwise all threads * exit by calling sched_exit_thread() and sched_throw() themselves. */ -void -sched_rem(struct thread *td) +static void +sched_ule_rem(struct thread *td) { struct tdq *tdq; @@ -2992,8 +2955,8 @@ /* * Fetch cpu utilization information. Updates on demand. */ -fixpt_t -sched_pctcpu(struct thread *td) +static fixpt_t +sched_ule_pctcpu(struct thread *td) { struct td_sched *ts; u_int len; @@ -3014,8 +2977,8 @@ * Enforce affinity settings for a thread. Called after adjustments to * cpumask. */ -void -sched_affinity(struct thread *td) +static void +sched_ule_affinity(struct thread *td) { #ifdef SMP struct td_sched *ts; @@ -3045,8 +3008,8 @@ /* * Bind a thread to a target cpu. */ -void -sched_bind(struct thread *td, int cpu) +static void +sched_ule_bind(struct thread *td, int cpu) { struct td_sched *ts; @@ -3069,8 +3032,8 @@ /* * Release a bound thread. */ -void -sched_unbind(struct thread *td) +static void +sched_ule_unbind(struct thread *td) { struct td_sched *ts; @@ -3083,8 +3046,8 @@ sched_unpin(); } -int -sched_is_bound(struct thread *td) +static int +sched_ule_is_bound(struct thread *td) { THREAD_LOCK_ASSERT(td, MA_OWNED); return (td_get_sched(td)->ts_flags & TSF_BOUND); @@ -3093,8 +3056,8 @@ /* * Basic yield call. */ -void -sched_relinquish(struct thread *td) +static void +sched_ule_relinquish(struct thread *td) { thread_lock(td); mi_switch(SW_VOL | SWT_RELINQUISH); @@ -3103,8 +3066,8 @@ /* * Return the total system load. */ -int -sched_load(void) +static int +sched_ule_load(void) { #ifdef SMP int total; @@ -3119,14 +3082,14 @@ #endif } -int -sched_sizeof_proc(void) +static int +sched_ule_sizeof_proc(void) { return (sizeof(struct proc)); } -int -sched_sizeof_thread(void) +static int +sched_ule_sizeof_thread(void) { return (sizeof(struct thread) + sizeof(struct td_sched)); } @@ -3141,8 +3104,8 @@ /* * The actual idle process. */ -void -sched_idletd(void *dummy) +static void +sched_ule_idletd(void *dummy) { struct thread *td; struct tdq *tdq; @@ -3244,8 +3207,8 @@ /* * A CPU is entering for the first time. */ -void -sched_ap_entry(void) +static void +sched_ule_ap_entry(void) { struct thread *newtd; struct tdq *tdq; @@ -3274,8 +3237,8 @@ /* * A thread is exiting. */ -void -sched_throw(struct thread *td) +static void +sched_ule_throw(struct thread *td) { struct thread *newtd; struct tdq *tdq; @@ -3305,8 +3268,8 @@ * This is called from fork_exit(). Just acquire the correct locks and * let fork do the rest of the work. */ -void -sched_fork_exit(struct thread *td) +static void +sched_ule_fork_exit(struct thread *td) { struct tdq *tdq; int cpuid; @@ -3331,8 +3294,8 @@ /* * Create on first use to catch odd startup conditions. */ -char * -sched_tdname(struct thread *td) +static char * +sched_ule_tdname(struct thread *td) { #ifdef KTR struct td_sched *ts; @@ -3347,17 +3310,148 @@ #endif } -#ifdef KTR -void -sched_clear_tdname(struct thread *td) +static void +sched_ule_clear_tdname(struct thread *td) { +#ifdef KTR struct td_sched *ts; ts = td_get_sched(td); ts->ts_name[0] = '\0'; +#endif +} + +static void +sched_ule_schedcpu(void) +{ +} + +static bool +sched_ule_do_timer_accounting(void) +{ + return (true); +} + +#ifdef SMP +static int +sched_ule_find_child_with_core(int cpu, struct cpu_group *grp) +{ + int i; + + if (grp->cg_children == 0) + return (-1); + + MPASS(grp->cg_child); + for (i = 0; i < grp->cg_children; i++) { + if (CPU_ISSET(cpu, &grp->cg_child[i].cg_mask)) + return (i); + } + + return (-1); +} + +static int +sched_ule_find_l2_neighbor(int cpu) +{ + struct cpu_group *grp; + int i; + + grp = cpu_top; + if (grp == NULL) + return (-1); + + /* + * Find the smallest CPU group that contains the given core. + */ + i = 0; + while ((i = sched_ule_find_child_with_core(cpu, grp)) != -1) { + /* + * If the smallest group containing the given CPU has less + * than two members, we conclude the given CPU has no + * L2 neighbor. + */ + if (grp->cg_child[i].cg_count <= 1) + return (-1); + grp = &grp->cg_child[i]; + } + + /* Must share L2. */ + if (grp->cg_level > CG_SHARE_L2 || grp->cg_level == CG_SHARE_NONE) + return (-1); + + /* + * Select the first member of the set that isn't the reference + * CPU, which at this point is guaranteed to exist. + */ + for (i = 0; i < CPU_SETSIZE; i++) { + if (CPU_ISSET(i, &grp->cg_mask) && i != cpu) + return (i); + } + + /* Should never be reached */ + return (-1); +} +#else +static int +sched_ule_find_l2_neighbor(int cpu) +{ + return (-1); } #endif +struct sched_instance sched_ule_instance = { +#define SLOT(name) .name = sched_ule_##name + SLOT(load), + SLOT(rr_interval), + SLOT(runnable), + SLOT(exit), + SLOT(fork), + SLOT(fork_exit), + SLOT(class), + SLOT(nice), + SLOT(ap_entry), + SLOT(exit_thread), + SLOT(estcpu), + SLOT(fork_thread), + SLOT(ithread_prio), + SLOT(lend_prio), + SLOT(lend_user_prio), + SLOT(lend_user_prio_cond), + SLOT(pctcpu), + SLOT(prio), + SLOT(sleep), + SLOT(sswitch), + SLOT(throw), + SLOT(unlend_prio), + SLOT(user_prio), + SLOT(userret_slowpath), + SLOT(add), + SLOT(choose), + SLOT(clock), + SLOT(idletd), + SLOT(preempt), + SLOT(relinquish), + SLOT(rem), + SLOT(wakeup), + SLOT(bind), + SLOT(unbind), + SLOT(is_bound), + SLOT(affinity), + SLOT(sizeof_proc), + SLOT(sizeof_thread), + SLOT(tdname), + SLOT(clear_tdname), + SLOT(do_timer_accounting), + SLOT(find_l2_neighbor), + SLOT(init), + SLOT(init_ap), + SLOT(setup), + SLOT(initticks), + SLOT(schedcpu), +#undef SLOT +}; +DECLARE_SCHEDULER(ule_sched_selector, "ULE", &sched_ule_instance); + #ifdef SMP /* @@ -3365,8 +3459,8 @@ * the topology tree. */ static int -sysctl_kern_sched_topology_spec_internal(struct sbuf *sb, struct cpu_group *cg, - int indent) +sysctl_kern_sched_ule_topology_spec_internal(struct sbuf *sb, + struct cpu_group *cg, int indent) { char cpusetbuf[CPUSETBUFSIZ]; int i, first; @@ -3403,7 +3497,7 @@ if (cg->cg_children > 0) { sbuf_printf(sb, "%*s \n", indent, ""); for (i = 0; i < cg->cg_children; i++) - sysctl_kern_sched_topology_spec_internal(sb, + sysctl_kern_sched_ule_topology_spec_internal(sb, &cg->cg_child[i], indent+2); sbuf_printf(sb, "%*s \n", indent, ""); } @@ -3416,19 +3510,20 @@ * the recursive sysctl_kern_smp_topology_spec_internal(). */ static int -sysctl_kern_sched_topology_spec(SYSCTL_HANDLER_ARGS) +sysctl_kern_sched_ule_topology_spec(SYSCTL_HANDLER_ARGS) { struct sbuf *topo; int err; - KASSERT(cpu_top != NULL, ("cpu_top isn't initialized")); + if (cpu_top == NULL) + return (ENOTTY); topo = sbuf_new_for_sysctl(NULL, NULL, 512, req); if (topo == NULL) return (ENOMEM); sbuf_cat(topo, "\n"); - err = sysctl_kern_sched_topology_spec_internal(topo, cpu_top, 1); + err = sysctl_kern_sched_ule_topology_spec_internal(topo, cpu_top, 1); sbuf_cat(topo, "\n"); if (err == 0) { @@ -3459,51 +3554,51 @@ return (0); } -SYSCTL_NODE(_kern, OID_AUTO, sched, CTLFLAG_RW | CTLFLAG_MPSAFE, 0, - "Scheduler"); -SYSCTL_STRING(_kern_sched, OID_AUTO, name, CTLFLAG_RD, "ULE", 0, - "Scheduler name"); -SYSCTL_PROC(_kern_sched, OID_AUTO, quantum, +SYSCTL_NODE(_kern_sched, OID_AUTO, ule, CTLFLAG_RD | CTLFLAG_MPSAFE, 0, + "ULE Scheduler"); + +SYSCTL_PROC(_kern_sched_ule, OID_AUTO, quantum, CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE, NULL, 0, sysctl_kern_quantum, "I", "Quantum for timeshare threads in microseconds"); -SYSCTL_INT(_kern_sched, OID_AUTO, slice, CTLFLAG_RW, &sched_slice, 0, +SYSCTL_INT(_kern_sched_ule, OID_AUTO, slice, CTLFLAG_RW, &sched_slice, 0, "Quantum for timeshare threads in stathz ticks"); -SYSCTL_UINT(_kern_sched, OID_AUTO, interact, CTLFLAG_RWTUN, &sched_interact, 0, +SYSCTL_UINT(_kern_sched_ule, OID_AUTO, interact, CTLFLAG_RWTUN, &sched_interact, 0, "Interactivity score threshold"); -SYSCTL_INT(_kern_sched, OID_AUTO, preempt_thresh, CTLFLAG_RWTUN, +SYSCTL_INT(_kern_sched_ule, OID_AUTO, preempt_thresh, CTLFLAG_RWTUN, &preempt_thresh, 0, "Maximal (lowest) priority for preemption"); -SYSCTL_INT(_kern_sched, OID_AUTO, static_boost, CTLFLAG_RWTUN, &static_boost, 0, +SYSCTL_INT(_kern_sched_ule, OID_AUTO, static_boost, CTLFLAG_RWTUN, + &static_boost, 0, "Assign static kernel priorities to sleeping threads"); -SYSCTL_INT(_kern_sched, OID_AUTO, idlespins, CTLFLAG_RWTUN, &sched_idlespins, 0, +SYSCTL_INT(_kern_sched_ule, OID_AUTO, idlespins, CTLFLAG_RWTUN, + &sched_idlespins, 0, "Number of times idle thread will spin waiting for new work"); -SYSCTL_INT(_kern_sched, OID_AUTO, idlespinthresh, CTLFLAG_RW, +SYSCTL_INT(_kern_sched_ule, OID_AUTO, idlespinthresh, CTLFLAG_RW, &sched_idlespinthresh, 0, "Threshold before we will permit idle thread spinning"); #ifdef SMP -SYSCTL_INT(_kern_sched, OID_AUTO, affinity, CTLFLAG_RW, &affinity, 0, +SYSCTL_INT(_kern_sched_ule, OID_AUTO, affinity, CTLFLAG_RW, &affinity, 0, "Number of hz ticks to keep thread affinity for"); -SYSCTL_INT(_kern_sched, OID_AUTO, balance, CTLFLAG_RWTUN, &rebalance, 0, +SYSCTL_INT(_kern_sched_ule, OID_AUTO, balance, CTLFLAG_RWTUN, &rebalance, 0, "Enables the long-term load balancer"); -SYSCTL_INT(_kern_sched, OID_AUTO, balance_interval, CTLFLAG_RW, +SYSCTL_INT(_kern_sched_ule, OID_AUTO, balance_interval, CTLFLAG_RW, &balance_interval, 0, "Average period in stathz ticks to run the long-term balancer"); -SYSCTL_INT(_kern_sched, OID_AUTO, steal_idle, CTLFLAG_RWTUN, &steal_idle, 0, +SYSCTL_INT(_kern_sched_ule, OID_AUTO, steal_idle, CTLFLAG_RWTUN, + &steal_idle, 0, "Attempts to steal work from other cores before idling"); -SYSCTL_INT(_kern_sched, OID_AUTO, steal_thresh, CTLFLAG_RWTUN, &steal_thresh, 0, +SYSCTL_INT(_kern_sched_ule, OID_AUTO, steal_thresh, CTLFLAG_RWTUN, + &steal_thresh, 0, "Minimum load on remote CPU before we'll steal"); -SYSCTL_INT(_kern_sched, OID_AUTO, trysteal_limit, CTLFLAG_RWTUN, +SYSCTL_INT(_kern_sched_ule, OID_AUTO, trysteal_limit, CTLFLAG_RWTUN, &trysteal_limit, 0, "Topological distance limit for stealing threads in sched_switch()"); -SYSCTL_INT(_kern_sched, OID_AUTO, always_steal, CTLFLAG_RWTUN, &always_steal, 0, +SYSCTL_INT(_kern_sched_ule, OID_AUTO, always_steal, CTLFLAG_RWTUN, + &always_steal, 0, "Always run the stealer from the idle thread"); -SYSCTL_PROC(_kern_sched, OID_AUTO, topology_spec, CTLTYPE_STRING | - CTLFLAG_MPSAFE | CTLFLAG_RD, NULL, 0, sysctl_kern_sched_topology_spec, "A", +SYSCTL_PROC(_kern_sched_ule, OID_AUTO, topology_spec, CTLTYPE_STRING | + CTLFLAG_MPSAFE | CTLFLAG_RD, NULL, 0, + sysctl_kern_sched_ule_topology_spec, "A", "XML dump of detected CPU topology"); #endif - -/* ps compat. All cpu percentages from ULE are weighted. */ -static int ccpu = 0; -SYSCTL_INT(_kern, OID_AUTO, ccpu, CTLFLAG_RD, &ccpu, 0, - "Decay factor used for updating %CPU in 4BSD scheduler"); diff --git a/sys/net/iflib.c b/sys/net/iflib.c --- a/sys/net/iflib.c +++ b/sys/net/iflib.c @@ -29,7 +29,6 @@ #include "opt_inet.h" #include "opt_inet6.h" #include "opt_acpi.h" -#include "opt_sched.h" #include #include @@ -40,8 +39,10 @@ #include #include #include +#include #include #include +#include #include #include #include @@ -4813,83 +4814,6 @@ return (cpuid); } -#if defined(SMP) && defined(SCHED_ULE) -extern struct cpu_group *cpu_top; /* CPU topology */ - -static int -find_child_with_core(int cpu, struct cpu_group *grp) -{ - int i; - - if (grp->cg_children == 0) - return (-1); - - MPASS(grp->cg_child); - for (i = 0; i < grp->cg_children; i++) { - if (CPU_ISSET(cpu, &grp->cg_child[i].cg_mask)) - return (i); - } - - return (-1); -} - - -/* - * Find an L2 neighbor of the given CPU or return -1 if none found. This - * does not distinguish among multiple L2 neighbors if the given CPU has - * more than one (it will always return the same result in that case). - */ -static int -find_l2_neighbor(int cpu) -{ - struct cpu_group *grp; - int i; - - grp = cpu_top; - if (grp == NULL) - return (-1); - - /* - * Find the smallest CPU group that contains the given core. - */ - i = 0; - while ((i = find_child_with_core(cpu, grp)) != -1) { - /* - * If the smallest group containing the given CPU has less - * than two members, we conclude the given CPU has no - * L2 neighbor. - */ - if (grp->cg_child[i].cg_count <= 1) - return (-1); - grp = &grp->cg_child[i]; - } - - /* Must share L2. */ - if (grp->cg_level > CG_SHARE_L2 || grp->cg_level == CG_SHARE_NONE) - return (-1); - - /* - * Select the first member of the set that isn't the reference - * CPU, which at this point is guaranteed to exist. - */ - for (i = 0; i < CPU_SETSIZE; i++) { - if (CPU_ISSET(i, &grp->cg_mask) && i != cpu) - return (i); - } - - /* Should never be reached */ - return (-1); -} - -#else -static int -find_l2_neighbor(int cpu) -{ - - return (-1); -} -#endif - /* * CPU mapping behaviors * --------------------- @@ -4942,7 +4866,7 @@ unsigned int rx_cpuid; rx_cpuid = cpuid_advance(ctx, base_cpuid, qid); - l2_neighbor = find_l2_neighbor(rx_cpuid); + l2_neighbor = sched_find_l2_neighbor(rx_cpuid); if (l2_neighbor != -1) { return (l2_neighbor); } diff --git a/sys/powerpc/powerpc/machdep.c b/sys/powerpc/powerpc/machdep.c --- a/sys/powerpc/powerpc/machdep.c +++ b/sys/powerpc/powerpc/machdep.c @@ -83,6 +83,7 @@ #include #include #include +#include #include #include #include @@ -467,6 +468,7 @@ * Bring up MMU */ pmap_mmu_init(); + sched_instance_select(); link_elf_ireloc(); pmap_bootstrap(startkernel, endkernel); mtmsr(psl_kernset & ~PSL_EE); diff --git a/sys/powerpc/powerpc/swtch32.S b/sys/powerpc/powerpc/swtch32.S --- a/sys/powerpc/powerpc/swtch32.S +++ b/sys/powerpc/powerpc/swtch32.S @@ -56,7 +56,6 @@ */ #include "assym.inc" -#include "opt_sched.h" #include @@ -125,7 +124,7 @@ sync /* Make sure all of that finished */ cpu_switchin: -#if defined(SMP) && defined(SCHED_ULE) +#if defined(SMP) /* Wait for the new thread to become unblocked */ bl 1f 1: diff --git a/sys/powerpc/powerpc/swtch64.S b/sys/powerpc/powerpc/swtch64.S --- a/sys/powerpc/powerpc/swtch64.S +++ b/sys/powerpc/powerpc/swtch64.S @@ -56,7 +56,6 @@ */ #include "assym.inc" -#include "opt_sched.h" #include @@ -187,7 +186,7 @@ sync /* Make sure all of that finished */ cpu_switchin: -#if defined(SMP) && defined(SCHED_ULE) +#if defined(SMP) /* Wait for the new thread to become unblocked */ addis %r6,%r2,TOC_REF(blocked_lock)@ha ld %r6,TOC_REF(blocked_lock)@l(%r6) diff --git a/sys/riscv/include/ifunc.h b/sys/riscv/include/ifunc.h --- a/sys/riscv/include/ifunc.h +++ b/sys/riscv/include/ifunc.h @@ -30,6 +30,8 @@ #ifndef __RISCV_IFUNC_H #define __RISCV_IFUNC_H +#define __DO_NOT_HAVE_SYS_IFUNCS 1 + #define DEFINE_IFUNC(qual, ret_type, name, args) \ static ret_type (*name##_resolver(void))args __used; \ qual ret_type name args __attribute__((ifunc(#name "_resolver"))); \ diff --git a/sys/riscv/riscv/machdep.c b/sys/riscv/riscv/machdep.c --- a/sys/riscv/riscv/machdep.c +++ b/sys/riscv/riscv/machdep.c @@ -479,6 +479,8 @@ /* Initialize preload_kmdp */ preload_initkmdp(true); + sched_instance_select(); + /* link_elf_ireloc(); */ /* Read the boot metadata */ boothowto = MD_FETCH(preload_kmdp, MODINFOMD_HOWTO, int); diff --git a/sys/riscv/riscv/swtch.S b/sys/riscv/riscv/swtch.S --- a/sys/riscv/riscv/swtch.S +++ b/sys/riscv/riscv/swtch.S @@ -33,7 +33,6 @@ */ #include "assym.inc" -#include "opt_sched.h" #include #include @@ -315,7 +314,7 @@ /* Release the old thread */ sd s2, TD_LOCK(s0) -#if defined(SCHED_ULE) && defined(SMP) +#if defined(SMP) /* Spin if TD_LOCK points to a blocked_lock */ la s2, _C_LABEL(blocked_lock) 1: diff --git a/sys/sys/proc.h b/sys/sys/proc.h --- a/sys/sys/proc.h +++ b/sys/sys/proc.h @@ -1173,7 +1173,6 @@ void kern_yield(int); void killjobc(void); int leavepgrp(struct proc *p); -int maybe_preempt(struct thread *td); void maybe_yield(void); void mi_switch(int flags); int p_candebug(struct thread *td, struct proc *p); diff --git a/sys/sys/sched.h b/sys/sys/sched.h --- a/sys/sys/sched.h +++ b/sys/sys/sched.h @@ -68,6 +68,8 @@ #ifdef SCHED_STATS #include #endif +#include +#include struct proc; struct thread; @@ -114,11 +116,6 @@ void sched_unlend_prio(struct thread *td, u_char prio); void sched_user_prio(struct thread *td, u_char prio); void sched_userret_slowpath(struct thread *td); -#ifdef RACCT -#ifdef SCHED_4BSD -fixpt_t sched_pctcpu_delta(struct thread *td); -#endif -#endif static inline void sched_userret(struct thread *td) @@ -174,9 +171,7 @@ * functions. */ char *sched_tdname(struct thread *td); -#ifdef KTR void sched_clear_tdname(struct thread *td); -#endif static __inline void sched_pin(void) @@ -221,6 +216,10 @@ #define SCHED_STAT_DEFINE(name, descr) \ DPCPU_DEFINE(unsigned long, name); \ SCHED_STAT_DEFINE_VAR(name, &DPCPU_NAME(name), descr) + +#define SCHED_STAT_DECLARE(name) \ + DPCPU_DECLARE(unsigned long, name); + /* * Sched stats are always incremented in critical sections so no atomic * is necessary to increment them. @@ -229,9 +228,29 @@ #else #define SCHED_STAT_DEFINE_VAR(name, descr, ptr) #define SCHED_STAT_DEFINE(name, descr) +#define SCHED_STAT_DECLARE(name) #define SCHED_STAT_INC(var) (void)0 #endif +SCHED_STAT_DECLARE(ithread_demotions); +SCHED_STAT_DECLARE(ithread_preemptions); + +SDT_PROBE_DECLARE(sched, , , change__pri); +SDT_PROBE_DECLARE(sched, , , dequeue); +SDT_PROBE_DECLARE(sched, , , enqueue); +SDT_PROBE_DECLARE(sched, , , lend__pri); +SDT_PROBE_DECLARE(sched, , , load__change); +SDT_PROBE_DECLARE(sched, , , off__cpu); +SDT_PROBE_DECLARE(sched, , , on__cpu); +SDT_PROBE_DECLARE(sched, , , remain__cpu); +SDT_PROBE_DECLARE(sched, , , surrender); + +#ifdef KDTRACE_HOOKS +#include +extern int dtrace_vtime_active; +extern dtrace_vtime_switch_func_t dtrace_vtime_switch_func; +#endif + /* * Fixup scheduler state for proc0 and thread0 */ @@ -241,6 +260,81 @@ * Fixup scheduler state for secondary APs */ void schedinit_ap(void); + +bool sched_do_timer_accounting(void); + +/* + * Find an L2 neighbor of the given CPU or return -1 if none found. This + * does not distinguish among multiple L2 neighbors if the given CPU has + * more than one (it will always return the same result in that case). + */ +int sched_find_l2_neighbor(int cpu); + +struct sched_instance { + int (*load)(void); + int (*rr_interval)(void); + bool (*runnable)(void); + void (*exit)(struct proc *p, struct thread *childtd); + void (*fork)(struct thread *td, struct thread *childtd); + void (*fork_exit)(struct thread *td); + void (*class)(struct thread *td, int class); + void (*nice)(struct proc *p, int nice); + void (*ap_entry)(void); + void (*exit_thread)(struct thread *td, struct thread *child); + u_int (*estcpu)(struct thread *td); + void (*fork_thread)(struct thread *td, struct thread *child); + void (*ithread_prio)(struct thread *td, u_char prio); + void (*lend_prio)(struct thread *td, u_char prio); + void (*lend_user_prio)(struct thread *td, u_char pri); + void (*lend_user_prio_cond)(struct thread *td, u_char pri); + fixpt_t (*pctcpu)(struct thread *td); + void (*prio)(struct thread *td, u_char prio); + void (*sleep)(struct thread *td, int prio); + void (*sswitch)(struct thread *td, int flags); + void (*throw)(struct thread *td); + void (*unlend_prio)(struct thread *td, u_char prio); + void (*user_prio)(struct thread *td, u_char prio); + void (*userret_slowpath)(struct thread *td); + void (*add)(struct thread *td, int flags); + struct thread *(*choose)(void); + void (*clock)(struct thread *td, int cnt); + void (*idletd)(void *); + void (*preempt)(struct thread *td); + void (*relinquish)(struct thread *td); + void (*rem)(struct thread *td); + void (*wakeup)(struct thread *td, int srqflags); + void (*bind)(struct thread *td, int cpu); + void (*unbind)(struct thread *td); + int (*is_bound)(struct thread *td); + void (*affinity)(struct thread *td); + int (*sizeof_proc)(void); + int (*sizeof_thread)(void); + char *(*tdname)(struct thread *td); + void (*clear_tdname)(struct thread *td); + bool (*do_timer_accounting)(void); + int (*find_l2_neighbor)(int cpuid); + void (*init)(void); + void (*init_ap)(void); + void (*setup)(void); + void (*initticks)(void); + void (*schedcpu)(void); +}; + +extern const struct sched_instance *active_sched; + +struct sched_selection { + const char *name; + const struct sched_instance *instance; +}; +#define DECLARE_SCHEDULER(xsel_name, xsched_name, xsched_instance) \ + static struct sched_selection xsel_name = { \ + .name = xsched_name, \ + .instance = xsched_instance, \ + }; \ + DATA_SET(sched_instance_set, xsel_name); + +void sched_instance_select(void); + #endif /* _KERNEL */ /* POSIX 1003.1b Process Scheduling */ diff --git a/sys/x86/x86/cpu_machdep.c b/sys/x86/x86/cpu_machdep.c --- a/sys/x86/x86/cpu_machdep.c +++ b/sys/x86/x86/cpu_machdep.c @@ -47,7 +47,6 @@ #include "opt_kstack_pages.h" #include "opt_maxmem.h" #include "opt_platform.h" -#include "opt_sched.h" #ifdef __i386__ #include "opt_apic.h" #endif @@ -543,9 +542,7 @@ * is visible before calling cpu_idle_wakeup(). */ atomic_store_int(statep, newstate); -#if defined(SCHED_ULE) && defined(SMP) atomic_thread_fence_seq_cst(); -#endif /* * Since we may be in a critical section from cpu_idle(), if diff --git a/sys/x86/x86/local_apic.c b/sys/x86/x86/local_apic.c --- a/sys/x86/x86/local_apic.c +++ b/sys/x86/x86/local_apic.c @@ -1443,21 +1443,8 @@ kmsan_mark(frame, sizeof(*frame), KMSAN_STATE_INITED); trap_check_kstack(); -#if defined(SMP) && !defined(SCHED_ULE) - /* - * Don't do any accounting for the disabled HTT cores, since it - * will provide misleading numbers for the userland. - * - * No locking is necessary here, since even if we lose the race - * when hlt_cpus_mask changes it is not a big deal, really. - * - * Don't do that for ULE, since ULE doesn't consider hlt_cpus_mask - * and unlike other schedulers it actually schedules threads to - * those CPUs. - */ - if (CPU_ISSET(PCPU_GET(cpuid), &hlt_cpus_mask)) + if (!sched_do_timer_accounting()) return; -#endif /* Look up our local APIC structure for the tick counters. */ la = &lapics[PCPU_GET(apic_id)];