Page Menu
Home
FreeBSD
Search
Configure Global Search
Log In
Files
F143065428
D54831.diff
No One
Temporary
Actions
View File
Edit File
Delete File
View Transforms
Subscribe
Mute Notifications
Flag For Later
Award Token
Size
62 KB
Referenced Files
None
Subscribers
None
D54831.diff
View Options
diff --git a/sys/amd64/amd64/cpu_switch.S b/sys/amd64/amd64/cpu_switch.S
--- a/sys/amd64/amd64/cpu_switch.S
+++ b/sys/amd64/amd64/cpu_switch.S
@@ -35,7 +35,6 @@
#include <machine/specialreg.h>
#include "assym.inc"
-#include "opt_sched.h"
/*****************************************************************************/
/* Scheduling */
@@ -136,13 +135,11 @@
movq %r15,TD_LOCK(%r13) /* Release the old thread */
sw1:
leaq TD_MD_PCB(%r12),%r8
-#if defined(SCHED_ULE)
movq $blocked_lock, %rdx
movq TD_LOCK(%r12),%rcx
cmpq %rcx, %rdx
je sw1wait
sw1cont:
-#endif
/*
* At this point, we've switched address spaces and are ready
* to load up the rest of the next context.
@@ -492,7 +489,6 @@
END(resumectx)
/* Wait for the new thread to become unblocked */
-#if defined(SCHED_ULE)
sw1wait:
1:
pause
@@ -500,4 +496,3 @@
cmpq %rcx, %rdx
je 1b
jmp sw1cont
-#endif
diff --git a/sys/amd64/amd64/machdep.c b/sys/amd64/amd64/machdep.c
--- a/sys/amd64/amd64/machdep.c
+++ b/sys/amd64/amd64/machdep.c
@@ -322,7 +322,6 @@
}
SYSINIT(late_ifunc_resolve, SI_SUB_CPU, SI_ORDER_ANY, late_ifunc_resolve, NULL);
-
void
cpu_setregs(void)
{
@@ -1353,6 +1352,8 @@
TUNABLE_INT_FETCH("hw.use_xsave", &use_xsave);
}
+ sched_instance_select();
+
link_elf_ireloc();
/*
diff --git a/sys/amd64/conf/GENERIC b/sys/amd64/conf/GENERIC
--- a/sys/amd64/conf/GENERIC
+++ b/sys/amd64/conf/GENERIC
@@ -24,6 +24,7 @@
makeoptions WITH_CTF=1 # Run ctfconvert(1) for DTrace support
options SCHED_ULE # ULE scheduler
+options SCHED_4BSD # Original 4.xBSD scheduler
options NUMA # Non-Uniform Memory Architecture support
options PREEMPTION # Enable kernel thread preemption
options EXTERR_STRINGS
diff --git a/sys/arm/arm/machdep.c b/sys/arm/arm/machdep.c
--- a/sys/arm/arm/machdep.c
+++ b/sys/arm/arm/machdep.c
@@ -523,6 +523,9 @@
/* Do basic tuning, hz etc */
init_param1();
+ sched_instance_select();
+ /* link_elf_ireloc(); */
+
/*
* Allocate a page for the system page mapped to 0xffff0000
* This page will just contain the system vectors and can be
diff --git a/sys/arm/arm/swtch-v6.S b/sys/arm/arm/swtch-v6.S
--- a/sys/arm/arm/swtch-v6.S
+++ b/sys/arm/arm/swtch-v6.S
@@ -79,7 +79,6 @@
*/
#include "assym.inc"
-#include "opt_sched.h"
#include <machine/asm.h>
#include <machine/asmacros.h>
@@ -432,11 +431,7 @@
* r11 = newtd
*/
-#if defined(SMP) && defined(SCHED_ULE)
- /*
- * 386 and amd64 do the blocked lock test only for SMP and SCHED_ULE
- * QQQ: What does it mean in reality and why is it done?
- */
+#if defined(SMP)
ldr r6, =blocked_lock
1:
ldr r3, [r11, #TD_LOCK] /* atomic write regular read */
diff --git a/sys/arm/include/ifunc.h b/sys/arm/include/ifunc.h
new file mode 100644
--- /dev/null
+++ b/sys/arm/include/ifunc.h
@@ -0,0 +1,10 @@
+/*
+ * This file is in the public domain.
+ */
+
+#ifndef __ARM_IFUNC_H
+#define __ARM_IFUNC_H
+
+#define __DO_NOT_HAVE_SYS_IFUNCS 1
+
+#endif
diff --git a/sys/arm64/arm64/machdep.c b/sys/arm64/arm64/machdep.c
--- a/sys/arm64/arm64/machdep.c
+++ b/sys/arm64/arm64/machdep.c
@@ -825,6 +825,7 @@
PCPU_SET(curthread, &thread0);
PCPU_SET(midr, get_midr());
+ sched_instance_select();
link_elf_ireloc();
#ifdef FDT
try_load_dtb();
diff --git a/sys/arm64/arm64/swtch.S b/sys/arm64/arm64/swtch.S
--- a/sys/arm64/arm64/swtch.S
+++ b/sys/arm64/arm64/swtch.S
@@ -31,7 +31,6 @@
#include "assym.inc"
#include "opt_kstack_pages.h"
-#include "opt_sched.h"
#include <sys/elf_common.h>
@@ -197,7 +196,7 @@
* Release the old thread.
*/
stlr x2, [x0, #TD_LOCK]
-#if defined(SCHED_ULE) && defined(SMP)
+#if defined(SMP)
/* Spin if TD_LOCK points to a blocked_lock */
ldr x2, =_C_LABEL(blocked_lock)
1:
diff --git a/sys/conf/NOTES b/sys/conf/NOTES
--- a/sys/conf/NOTES
+++ b/sys/conf/NOTES
@@ -210,7 +210,7 @@
#
options SCHED_4BSD
options SCHED_STATS
-#options SCHED_ULE
+options SCHED_ULE
#####################################################################
# SMP OPTIONS:
diff --git a/sys/conf/files b/sys/conf/files
--- a/sys/conf/files
+++ b/sys/conf/files
@@ -3921,6 +3921,7 @@
kern/p1003_1b.c standard
kern/posix4_mib.c standard
kern/sched_4bsd.c optional sched_4bsd
+kern/sched_shim.c standard
kern/sched_ule.c optional sched_ule
kern/serdev_if.m standard
kern/stack_protector.c standard \
diff --git a/sys/i386/i386/machdep.c b/sys/i386/i386/machdep.c
--- a/sys/i386/i386/machdep.c
+++ b/sys/i386/i386/machdep.c
@@ -1544,6 +1544,7 @@
/* Initialize preload_kmdp */
preload_initkmdp(!metadata_missing);
+ sched_instance_select();
link_elf_ireloc();
vm86_initialize();
diff --git a/sys/i386/i386/swtch.S b/sys/i386/i386/swtch.S
--- a/sys/i386/i386/swtch.S
+++ b/sys/i386/i386/swtch.S
@@ -30,27 +30,11 @@
* SUCH DAMAGE.
*/
-#include "opt_sched.h"
-
#include <machine/asmacros.h>
#include "assym.inc"
-#if defined(SMP) && defined(SCHED_ULE)
-#define SETOP xchgl
#define BLOCK_SPIN(reg) \
- movl $blocked_lock,%eax ; \
- 100: ; \
- lock ; \
- cmpxchgl %eax,TD_LOCK(reg) ; \
- jne 101f ; \
- pause ; \
- jmp 100b ; \
- 101:
-#else
-#define SETOP movl
-#define BLOCK_SPIN(reg)
-#endif
/*****************************************************************************/
/* Scheduling */
@@ -162,7 +146,7 @@
/* Switchout td_lock */
movl %esi,%eax
movl PCPU(CPUID),%esi
- SETOP %eax,TD_LOCK(%edi)
+ xchgl %eax,TD_LOCK(%edi)
/* Release bit from old pmap->pm_active */
movl PCPU(CURPMAP), %ebx
@@ -181,7 +165,18 @@
#endif
btsl %esi, PM_ACTIVE(%ebx) /* set new */
sw1:
- BLOCK_SPIN(%ecx)
+#ifdef SMP
+ movl $blocked_lock,%eax
+100:
+
+ lock
+ cmpxchgl %eax,TD_LOCK(reg)
+ jne 101f
+ pause
+ jmp 100b
+101:
+#endif
+
/*
* At this point, we have managed thread locks and are ready
* to load up the rest of the next context.
diff --git a/sys/kern/sched_4bsd.c b/sys/kern/sched_4bsd.c
--- a/sys/kern/sched_4bsd.c
+++ b/sys/kern/sched_4bsd.c
@@ -34,12 +34,10 @@
* SUCH DAMAGE.
*/
-#include <sys/cdefs.h>
#include "opt_hwpmc_hooks.h"
#include "opt_hwt_hooks.h"
#include "opt_sched.h"
-#include <sys/param.h>
#include <sys/systm.h>
#include <sys/cpuset.h>
#include <sys/kernel.h>
@@ -68,12 +66,6 @@
#include <dev/hwt/hwt_hook.h>
#endif
-#ifdef KDTRACE_HOOKS
-#include <sys/dtrace_bsd.h>
-int __read_mostly dtrace_vtime_active;
-dtrace_vtime_switch_func_t dtrace_vtime_switch_func;
-#endif
-
/*
* INVERSE_ESTCPU_WEIGHT is only suitable for statclock() frequencies in
* the range 100-256 Hz (approximately).
@@ -139,7 +131,6 @@
static void schedcpu(void);
static void schedcpu_thread(void);
static void sched_priority(struct thread *td, u_char prio);
-static void sched_setup(void *dummy);
static void maybe_resched(struct thread *td);
static void updatepri(struct thread *td);
static void resetpriority(struct thread *td);
@@ -155,13 +146,12 @@
schedcpu_thread,
NULL
};
-SYSINIT(schedcpu, SI_SUB_LAST, SI_ORDER_FIRST, kproc_start,
- &sched_kp);
-SYSINIT(sched_setup, SI_SUB_RUN_QUEUE, SI_ORDER_FIRST, sched_setup, NULL);
-static void sched_initticks(void *dummy);
-SYSINIT(sched_initticks, SI_SUB_CLOCKS, SI_ORDER_THIRD, sched_initticks,
- NULL);
+static void
+sched_4bsd_schedcpu(void)
+{
+ kproc_start(&sched_kp);
+}
/*
* Global run queue.
@@ -198,7 +188,7 @@
}
static int
-sysctl_kern_quantum(SYSCTL_HANDLER_ARGS)
+sysctl_kern_4bsd_quantum(SYSCTL_HANDLER_ARGS)
{
int error, new_val, period;
@@ -215,77 +205,58 @@
return (0);
}
-SYSCTL_NODE(_kern, OID_AUTO, sched, CTLFLAG_RD | CTLFLAG_MPSAFE, 0,
- "Scheduler");
+SYSCTL_NODE(_kern_sched, OID_AUTO, 4bsd, CTLFLAG_RD | CTLFLAG_MPSAFE, 0,
+ "4BSD Scheduler");
-SYSCTL_STRING(_kern_sched, OID_AUTO, name, CTLFLAG_RD, "4BSD", 0,
- "Scheduler name");
-SYSCTL_PROC(_kern_sched, OID_AUTO, quantum,
+SYSCTL_PROC(_kern_sched_4bsd, OID_AUTO, quantum,
CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE, NULL, 0,
- sysctl_kern_quantum, "I",
+ sysctl_kern_4bsd_quantum, "I",
"Quantum for timeshare threads in microseconds");
-SYSCTL_INT(_kern_sched, OID_AUTO, slice, CTLFLAG_RW, &sched_slice, 0,
+SYSCTL_INT(_kern_sched_4bsd, OID_AUTO, slice, CTLFLAG_RW, &sched_slice, 0,
"Quantum for timeshare threads in stathz ticks");
#ifdef SMP
/* Enable forwarding of wakeups to all other cpus */
-static SYSCTL_NODE(_kern_sched, OID_AUTO, ipiwakeup,
+static SYSCTL_NODE(_kern_sched_4bsd, OID_AUTO, ipiwakeup,
CTLFLAG_RD | CTLFLAG_MPSAFE, NULL,
"Kernel SMP");
static int runq_fuzz = 1;
-SYSCTL_INT(_kern_sched, OID_AUTO, runq_fuzz, CTLFLAG_RW, &runq_fuzz, 0, "");
+SYSCTL_INT(_kern_sched_4bsd, OID_AUTO, runq_fuzz, CTLFLAG_RW,
+ &runq_fuzz, 0, "");
static int forward_wakeup_enabled = 1;
-SYSCTL_INT(_kern_sched_ipiwakeup, OID_AUTO, enabled, CTLFLAG_RW,
+SYSCTL_INT(_kern_sched_4bsd_ipiwakeup, OID_AUTO, enabled, CTLFLAG_RW,
&forward_wakeup_enabled, 0,
"Forwarding of wakeup to idle CPUs");
static int forward_wakeups_requested = 0;
-SYSCTL_INT(_kern_sched_ipiwakeup, OID_AUTO, requested, CTLFLAG_RD,
+SYSCTL_INT(_kern_sched_4bsd_ipiwakeup, OID_AUTO, requested, CTLFLAG_RD,
&forward_wakeups_requested, 0,
"Requests for Forwarding of wakeup to idle CPUs");
static int forward_wakeups_delivered = 0;
-SYSCTL_INT(_kern_sched_ipiwakeup, OID_AUTO, delivered, CTLFLAG_RD,
+SYSCTL_INT(_kern_sched_4bsd_ipiwakeup, OID_AUTO, delivered, CTLFLAG_RD,
&forward_wakeups_delivered, 0,
"Completed Forwarding of wakeup to idle CPUs");
static int forward_wakeup_use_mask = 1;
-SYSCTL_INT(_kern_sched_ipiwakeup, OID_AUTO, usemask, CTLFLAG_RW,
+SYSCTL_INT(_kern_sched_4bsd_ipiwakeup, OID_AUTO, usemask, CTLFLAG_RW,
&forward_wakeup_use_mask, 0,
"Use the mask of idle cpus");
static int forward_wakeup_use_loop = 0;
-SYSCTL_INT(_kern_sched_ipiwakeup, OID_AUTO, useloop, CTLFLAG_RW,
+SYSCTL_INT(_kern_sched_4bsd_ipiwakeup, OID_AUTO, useloop, CTLFLAG_RW,
&forward_wakeup_use_loop, 0,
"Use a loop to find idle cpus");
#endif
#if 0
static int sched_followon = 0;
-SYSCTL_INT(_kern_sched, OID_AUTO, followon, CTLFLAG_RW,
+SYSCTL_INT(_kern_sched_4bsd, OID_AUTO, followon, CTLFLAG_RW,
&sched_followon, 0,
"allow threads to share a quantum");
#endif
-SDT_PROVIDER_DEFINE(sched);
-
-SDT_PROBE_DEFINE3(sched, , , change__pri, "struct thread *",
- "struct proc *", "uint8_t");
-SDT_PROBE_DEFINE3(sched, , , dequeue, "struct thread *",
- "struct proc *", "void *");
-SDT_PROBE_DEFINE4(sched, , , enqueue, "struct thread *",
- "struct proc *", "void *", "int");
-SDT_PROBE_DEFINE4(sched, , , lend__pri, "struct thread *",
- "struct proc *", "uint8_t", "struct thread *");
-SDT_PROBE_DEFINE2(sched, , , load__change, "int", "int");
-SDT_PROBE_DEFINE2(sched, , , off__cpu, "struct thread *",
- "struct proc *");
-SDT_PROBE_DEFINE(sched, , , on__cpu);
-SDT_PROBE_DEFINE(sched, , , remain__cpu);
-SDT_PROBE_DEFINE2(sched, , , surrender, "struct thread *",
- "struct proc *");
-
static __inline void
sched_load_add(void)
{
@@ -322,7 +293,7 @@
* determines if the new thread should preempt the current thread. If so,
* it sets td_owepreempt to request a preemption.
*/
-int
+static int
maybe_preempt(struct thread *td)
{
#ifdef PREEMPTION
@@ -441,10 +412,7 @@
#define loadfactor(loadav) (2 * (loadav))
#define decay_cpu(loadfac, cpu) (((loadfac) * (cpu)) / ((loadfac) + FSCALE))
-/* decay 95% of `ts_pctcpu' in 60 seconds; see CCPU_SHIFT before changing */
-static fixpt_t ccpu = 0.95122942450071400909 * FSCALE; /* exp(-1/20) */
-SYSCTL_UINT(_kern, OID_AUTO, ccpu, CTLFLAG_RD, &ccpu, 0,
- "Decay factor used for updating %CPU");
+extern fixpt_t ccpu;
/*
* If `ccpu' is not equal to `exp(-1/20)' and you still want to use the
@@ -640,10 +608,14 @@
sched_prio(td, td->td_user_pri);
}
-/* ARGSUSED */
static void
-sched_setup(void *dummy)
+sched_4bsd_setup(void)
{
+ /*
+ * Decay 95% of `ts_pctcpu' in 60 seconds; see CCPU_SHIFT
+ * before changing.
+ */
+ ccpu = 0.95122942450071400909 * FSCALE; /* exp(-1/20) */
setup_runqs();
@@ -655,7 +627,7 @@
* This routine determines time constants after stathz and hz are setup.
*/
static void
-sched_initticks(void *dummy)
+sched_4bsd_initticks(void)
{
realstathz = stathz ? stathz : hz;
@@ -672,8 +644,8 @@
* Called from:
* proc0_init()
*/
-void
-schedinit(void)
+static void
+sched_4bsd_init(void)
{
/*
@@ -684,15 +656,15 @@
mtx_init(&sched_lock, "sched lock", NULL, MTX_SPIN);
}
-void
-schedinit_ap(void)
+static void
+sched_4bsd_init_ap(void)
{
/* Nothing needed. */
}
-bool
-sched_runnable(void)
+static bool
+sched_4bsd_runnable(void)
{
#ifdef SMP
return (runq_not_empty(&runq) ||
@@ -702,18 +674,14 @@
#endif
}
-int
-sched_rr_interval(void)
+static int
+sched_4bsd_rr_interval(void)
{
/* Convert sched_slice from stathz to hz. */
return (imax(1, (sched_slice * hz + realstathz / 2) / realstathz));
}
-SCHED_STAT_DEFINE(ithread_demotions, "Interrupt thread priority demotions");
-SCHED_STAT_DEFINE(ithread_preemptions,
- "Interrupt thread preemptions due to time-sharing");
-
/*
* We adjust the priority of the current process. The priority of a
* process gets worse as it accumulates CPU time. The cpu usage
@@ -773,8 +741,8 @@
stat->idlecalls = 0;
}
-void
-sched_clock(struct thread *td, int cnt)
+static void
+sched_4bsd_clock(struct thread *td, int cnt)
{
for ( ; cnt > 0; cnt--)
@@ -784,8 +752,8 @@
/*
* Charge child's scheduling CPU usage to parent.
*/
-void
-sched_exit(struct proc *p, struct thread *td)
+static void
+sched_4bsd_exit(struct proc *p, struct thread *td)
{
KTR_STATE1(KTR_SCHED, "thread", sched_tdname(td), "proc exit",
@@ -795,8 +763,8 @@
sched_exit_thread(FIRST_THREAD_IN_PROC(p), td);
}
-void
-sched_exit_thread(struct thread *td, struct thread *child)
+static void
+sched_4bsd_exit_thread(struct thread *td, struct thread *child)
{
KTR_STATE1(KTR_SCHED, "thread", sched_tdname(child), "exit",
@@ -811,14 +779,14 @@
thread_unlock(child);
}
-void
-sched_fork(struct thread *td, struct thread *childtd)
+static void
+sched_4bsd_fork(struct thread *td, struct thread *childtd)
{
sched_fork_thread(td, childtd);
}
-void
-sched_fork_thread(struct thread *td, struct thread *childtd)
+static void
+sched_4bsd_fork_thread(struct thread *td, struct thread *childtd)
{
struct td_sched *ts, *tsc;
@@ -836,8 +804,8 @@
ts->ts_slice = 1;
}
-void
-sched_nice(struct proc *p, int nice)
+static void
+sched_4bsd_nice(struct proc *p, int nice)
{
struct thread *td;
@@ -851,8 +819,8 @@
}
}
-void
-sched_class(struct thread *td, int class)
+static void
+sched_4bsd_class(struct thread *td, int class)
{
THREAD_LOCK_ASSERT(td, MA_OWNED);
td->td_pri_class = class;
@@ -890,8 +858,8 @@
* Update a thread's priority when it is lent another thread's
* priority.
*/
-void
-sched_lend_prio(struct thread *td, u_char prio)
+static void
+sched_4bsd_lend_prio(struct thread *td, u_char prio)
{
td->td_flags |= TDF_BORROWING;
@@ -906,8 +874,8 @@
* important than prio the thread will keep a priority boost
* of prio.
*/
-void
-sched_unlend_prio(struct thread *td, u_char prio)
+static void
+sched_4bsd_unlend_prio(struct thread *td, u_char prio)
{
u_char base_pri;
@@ -923,8 +891,8 @@
sched_lend_prio(td, prio);
}
-void
-sched_prio(struct thread *td, u_char prio)
+static void
+sched_4bsd_prio(struct thread *td, u_char prio)
{
u_char oldprio;
@@ -950,8 +918,8 @@
turnstile_adjust(td, oldprio);
}
-void
-sched_ithread_prio(struct thread *td, u_char prio)
+static void
+sched_4bsd_ithread_prio(struct thread *td, u_char prio)
{
THREAD_LOCK_ASSERT(td, MA_OWNED);
MPASS(td->td_pri_class == PRI_ITHD);
@@ -959,8 +927,8 @@
sched_prio(td, prio);
}
-void
-sched_user_prio(struct thread *td, u_char prio)
+static void
+sched_4bsd_user_prio(struct thread *td, u_char prio)
{
THREAD_LOCK_ASSERT(td, MA_OWNED);
@@ -970,8 +938,8 @@
td->td_user_pri = prio;
}
-void
-sched_lend_user_prio(struct thread *td, u_char prio)
+static void
+sched_4bsd_lend_user_prio(struct thread *td, u_char prio)
{
THREAD_LOCK_ASSERT(td, MA_OWNED);
@@ -986,8 +954,8 @@
/*
* Like the above but first check if there is anything to do.
*/
-void
-sched_lend_user_prio_cond(struct thread *td, u_char prio)
+static void
+sched_4bsd_lend_user_prio_cond(struct thread *td, u_char prio)
{
if (td->td_lend_user_pri == prio)
@@ -998,8 +966,8 @@
thread_unlock(td);
}
-void
-sched_sleep(struct thread *td, int pri)
+static void
+sched_4bsd_sleep(struct thread *td, int pri)
{
THREAD_LOCK_ASSERT(td, MA_OWNED);
@@ -1009,8 +977,8 @@
sched_prio(td, pri);
}
-void
-sched_switch(struct thread *td, int flags)
+static void
+sched_4bsd_sswitch(struct thread *td, int flags)
{
struct thread *newtd;
struct mtx *tmtx;
@@ -1142,8 +1110,8 @@
mtx_unlock_spin(&sched_lock);
}
-void
-sched_wakeup(struct thread *td, int srqflags)
+static void
+sched_4bsd_wakeup(struct thread *td, int srqflags)
{
struct td_sched *ts;
@@ -1318,8 +1286,8 @@
}
#endif
-void
-sched_add(struct thread *td, int flags)
+static void
+sched_4bsd_add(struct thread *td, int flags)
#ifdef SMP
{
cpuset_t tidlemsk;
@@ -1466,8 +1434,8 @@
}
#endif /* SMP */
-void
-sched_rem(struct thread *td)
+static void
+sched_4bsd_rem(struct thread *td)
{
struct td_sched *ts;
@@ -1496,8 +1464,8 @@
* Select threads to run. Note that running threads still consume a
* slot.
*/
-struct thread *
-sched_choose(void)
+static struct thread *
+sched_4bsd_choose(void)
{
struct thread *td;
struct runq *rq;
@@ -1541,8 +1509,8 @@
return (PCPU_GET(idlethread));
}
-void
-sched_preempt(struct thread *td)
+static void
+sched_4bsd_preempt(struct thread *td)
{
int flags;
@@ -1558,8 +1526,8 @@
}
}
-void
-sched_userret_slowpath(struct thread *td)
+static void
+sched_4bsd_userret_slowpath(struct thread *td)
{
thread_lock(td);
@@ -1568,8 +1536,8 @@
thread_unlock(td);
}
-void
-sched_bind(struct thread *td, int cpu)
+static void
+sched_4bsd_bind(struct thread *td, int cpu)
{
#ifdef SMP
struct td_sched *ts = td_get_sched(td);
@@ -1589,48 +1557,48 @@
#endif
}
-void
-sched_unbind(struct thread* td)
+static void
+sched_4bsd_unbind(struct thread* td)
{
THREAD_LOCK_ASSERT(td, MA_OWNED);
KASSERT(td == curthread, ("sched_unbind: can only bind curthread"));
td->td_flags &= ~TDF_BOUND;
}
-int
-sched_is_bound(struct thread *td)
+static int
+sched_4bsd_is_bound(struct thread *td)
{
THREAD_LOCK_ASSERT(td, MA_OWNED);
return (td->td_flags & TDF_BOUND);
}
-void
-sched_relinquish(struct thread *td)
+static void
+sched_4bsd_relinquish(struct thread *td)
{
thread_lock(td);
mi_switch(SW_VOL | SWT_RELINQUISH);
}
-int
-sched_load(void)
+static int
+sched_4bsd_load(void)
{
return (sched_tdcnt);
}
-int
-sched_sizeof_proc(void)
+static int
+sched_4bsd_sizeof_proc(void)
{
return (sizeof(struct proc));
}
-int
-sched_sizeof_thread(void)
+static int
+sched_4bsd_sizeof_thread(void)
{
return (sizeof(struct thread) + sizeof(struct td_sched));
}
-fixpt_t
-sched_pctcpu(struct thread *td)
+static fixpt_t
+sched_4bsd_pctcpu(struct thread *td)
{
struct td_sched *ts;
@@ -1639,42 +1607,8 @@
return (ts->ts_pctcpu);
}
-#ifdef RACCT
-/*
- * Calculates the contribution to the thread cpu usage for the latest
- * (unfinished) second.
- */
-fixpt_t
-sched_pctcpu_delta(struct thread *td)
-{
- struct td_sched *ts;
- fixpt_t delta;
- int realstathz;
-
- THREAD_LOCK_ASSERT(td, MA_OWNED);
- ts = td_get_sched(td);
- delta = 0;
- realstathz = stathz ? stathz : hz;
- if (ts->ts_cpticks != 0) {
-#if (FSHIFT >= CCPU_SHIFT)
- delta = (realstathz == 100)
- ? ((fixpt_t) ts->ts_cpticks) <<
- (FSHIFT - CCPU_SHIFT) :
- 100 * (((fixpt_t) ts->ts_cpticks)
- << (FSHIFT - CCPU_SHIFT)) / realstathz;
-#else
- delta = ((FSCALE - ccpu) *
- (ts->ts_cpticks *
- FSCALE / realstathz)) >> FSHIFT;
-#endif
- }
-
- return (delta);
-}
-#endif
-
-u_int
-sched_estcpu(struct thread *td)
+static u_int
+sched_4bsd_estcpu(struct thread *td)
{
return (td_get_sched(td)->ts_estcpu);
@@ -1683,8 +1617,8 @@
/*
* The actual idle process.
*/
-void
-sched_idletd(void *dummy)
+static void
+sched_4bsd_idletd(void *dummy)
{
struct pcpuidlestat *stat;
@@ -1725,8 +1659,8 @@
/*
* A CPU is entering for the first time.
*/
-void
-sched_ap_entry(void)
+static void
+sched_4bsd_ap_entry(void)
{
/*
@@ -1749,8 +1683,8 @@
/*
* A thread is exiting.
*/
-void
-sched_throw(struct thread *td)
+static void
+sched_4bsd_throw(struct thread *td)
{
MPASS(td != NULL);
@@ -1763,8 +1697,8 @@
sched_throw_tail(td);
}
-void
-sched_fork_exit(struct thread *td)
+static void
+sched_4bsd_fork_exit(struct thread *td)
{
/*
@@ -1782,8 +1716,8 @@
SDT_PROBE0(sched, , , on__cpu);
}
-char *
-sched_tdname(struct thread *td)
+static char *
+sched_4bsd_tdname(struct thread *td)
{
#ifdef KTR
struct td_sched *ts;
@@ -1798,19 +1732,19 @@
#endif
}
-#ifdef KTR
-void
-sched_clear_tdname(struct thread *td)
+static void
+sched_4bsd_clear_tdname(struct thread *td)
{
+#ifdef KTR
struct td_sched *ts;
ts = td_get_sched(td);
ts->ts_name[0] = '\0';
-}
#endif
+}
-void
-sched_affinity(struct thread *td)
+static void
+sched_4bsd_affinity(struct thread *td)
{
#ifdef SMP
struct td_sched *ts;
@@ -1872,3 +1806,83 @@
}
#endif
}
+
+static bool
+sched_4bsd_do_timer_accounting(void)
+{
+#ifdef SMP
+ /*
+ * Don't do any accounting for the disabled HTT cores, since it
+ * will provide misleading numbers for the userland.
+ *
+ * No locking is necessary here, since even if we lose the race
+ * when hlt_cpus_mask changes it is not a big deal, really.
+ *
+ * Don't do that for ULE, since ULE doesn't consider hlt_cpus_mask
+ * and unlike other schedulers it actually schedules threads to
+ * those CPUs.
+ */
+ return (!CPU_ISSET(PCPU_GET(cpuid), &hlt_cpus_mask));
+#else
+ return (true);
+#endif
+}
+
+static int
+sched_4bsd_find_l2_neighbor(int cpu)
+{
+ return (-1);
+}
+
+struct sched_instance sched_4bsd_instance = {
+#define SLOT(name) .name = sched_4bsd_##name
+ SLOT(load),
+ SLOT(rr_interval),
+ SLOT(runnable),
+ SLOT(exit),
+ SLOT(fork),
+ SLOT(fork_exit),
+ SLOT(class),
+ SLOT(nice),
+ SLOT(ap_entry),
+ SLOT(exit_thread),
+ SLOT(estcpu),
+ SLOT(fork_thread),
+ SLOT(ithread_prio),
+ SLOT(lend_prio),
+ SLOT(lend_user_prio),
+ SLOT(lend_user_prio_cond),
+ SLOT(pctcpu),
+ SLOT(prio),
+ SLOT(sleep),
+ SLOT(sswitch),
+ SLOT(throw),
+ SLOT(unlend_prio),
+ SLOT(user_prio),
+ SLOT(userret_slowpath),
+ SLOT(add),
+ SLOT(choose),
+ SLOT(clock),
+ SLOT(idletd),
+ SLOT(preempt),
+ SLOT(relinquish),
+ SLOT(rem),
+ SLOT(wakeup),
+ SLOT(bind),
+ SLOT(unbind),
+ SLOT(is_bound),
+ SLOT(affinity),
+ SLOT(sizeof_proc),
+ SLOT(sizeof_thread),
+ SLOT(tdname),
+ SLOT(clear_tdname),
+ SLOT(do_timer_accounting),
+ SLOT(find_l2_neighbor),
+ SLOT(init),
+ SLOT(init_ap),
+ SLOT(setup),
+ SLOT(initticks),
+ SLOT(schedcpu),
+#undef SLOT
+};
+DECLARE_SCHEDULER(fourbsd_sched_selector, "4BSD", &sched_4bsd_instance);
diff --git a/sys/kern/sched_shim.c b/sys/kern/sched_shim.c
new file mode 100644
--- /dev/null
+++ b/sys/kern/sched_shim.c
@@ -0,0 +1,234 @@
+/*
+ * Copyright 2026 The FreeBSD Foundation
+ *
+ * SPDX-License-Identifier: BSD-2-Clause
+ *
+ * This software was developed by Konstantin Belousov <kib@FreeBSD.org>
+ * under sponsorship from the FreeBSD Foundation.
+ */
+
+#include "opt_sched.h"
+
+#include <sys/systm.h>
+#include <sys/kernel.h>
+#include <sys/lock.h>
+#include <sys/proc.h>
+#include <sys/runq.h>
+#include <sys/sbuf.h>
+#include <sys/sched.h>
+#include <sys/sysctl.h>
+#include <machine/ifunc.h>
+
+const struct sched_instance *active_sched;
+
+#ifndef __DO_NOT_HAVE_SYS_IFUNCS
+#define __DEFINE_SHIM(__m, __r, __n, __p, __a) \
+ DEFINE_IFUNC(, __r, __n, __p) \
+ { \
+ return (active_sched->__m); \
+ }
+#else
+#define __DEFINE_SHIM(__m, __r, __n, __p, __a) \
+ __r \
+ __n __p \
+ { \
+ return (active_sched->__m __a); \
+ }
+#endif
+#define DEFINE_SHIM0(__m, __r, __n) \
+ __DEFINE_SHIM(__m, __r, __n, (void), ())
+#define DEFINE_SHIM1(__m, __r, __n, __t1, __a1) \
+ __DEFINE_SHIM(__m, __r, __n, (__t1 __a1), (__a1))
+#define DEFINE_SHIM2(__m, __r, __n, __t1, __a1, __t2, __a2) \
+ __DEFINE_SHIM(__m, __r, __n, (__t1 __a1, __t2 __a2), (__a1, __a2))
+
+DEFINE_SHIM0(load, int, sched_load)
+DEFINE_SHIM0(rr_interval, int, sched_rr_interval)
+DEFINE_SHIM0(runnable, bool, sched_runnable)
+DEFINE_SHIM2(exit, void, sched_exit, struct proc *, p,
+ struct thread *, childtd)
+DEFINE_SHIM2(fork, void, sched_fork, struct thread *, td,
+ struct thread *, childtd)
+DEFINE_SHIM1(fork_exit, void, sched_fork_exit, struct thread *, td)
+DEFINE_SHIM2(class, void, sched_class, struct thread *, td, int, class)
+DEFINE_SHIM2(nice, void, sched_nice, struct proc *, p, int, nice)
+DEFINE_SHIM0(ap_entry, void, sched_ap_entry)
+DEFINE_SHIM2(exit_thread, void, sched_exit_thread, struct thread *, td,
+ struct thread *, child)
+DEFINE_SHIM1(estcpu, u_int, sched_estcpu, struct thread *, td)
+DEFINE_SHIM2(fork_thread, void, sched_fork_thread, struct thread *, td,
+ struct thread *, child)
+DEFINE_SHIM2(ithread_prio, void, sched_ithread_prio, struct thread *, td,
+ u_char, prio)
+DEFINE_SHIM2(lend_prio, void, sched_lend_prio, struct thread *, td,
+ u_char, prio)
+DEFINE_SHIM2(lend_user_prio, void, sched_lend_user_prio, struct thread *, td,
+ u_char, pri)
+DEFINE_SHIM2(lend_user_prio_cond, void, sched_lend_user_prio_cond,
+ struct thread *, td, u_char, pri)
+DEFINE_SHIM1(pctcpu, fixpt_t, sched_pctcpu, struct thread *, td)
+DEFINE_SHIM2(prio, void, sched_prio, struct thread *, td, u_char, prio)
+DEFINE_SHIM2(sleep, void, sched_sleep, struct thread *, td, int, prio)
+DEFINE_SHIM2(sswitch, void, sched_switch, struct thread *, td, int, flags)
+DEFINE_SHIM1(throw, void, sched_throw, struct thread *, td)
+DEFINE_SHIM2(unlend_prio, void, sched_unlend_prio, struct thread *, td,
+ u_char, prio)
+DEFINE_SHIM2(user_prio, void, sched_user_prio, struct thread *, td,
+ u_char, prio)
+DEFINE_SHIM1(userret_slowpath, void, sched_userret_slowpath,
+ struct thread *, td)
+DEFINE_SHIM2(add, void, sched_add, struct thread *, td, int, flags)
+DEFINE_SHIM0(choose, struct thread *, sched_choose)
+DEFINE_SHIM2(clock, void, sched_clock, struct thread *, td, int, cnt)
+DEFINE_SHIM1(idletd, void, sched_idletd, void *, dummy)
+DEFINE_SHIM1(preempt, void, sched_preempt, struct thread *, td)
+DEFINE_SHIM1(relinquish, void, sched_relinquish, struct thread *, td)
+DEFINE_SHIM1(rem, void, sched_rem, struct thread *, td)
+DEFINE_SHIM2(wakeup, void, sched_wakeup, struct thread *, td, int, srqflags)
+DEFINE_SHIM2(bind, void, sched_bind, struct thread *, td, int, cpu)
+DEFINE_SHIM1(unbind, void, sched_unbind, struct thread *, td)
+DEFINE_SHIM1(is_bound, int, sched_is_bound, struct thread *, td)
+DEFINE_SHIM1(affinity, void, sched_affinity, struct thread *, td)
+DEFINE_SHIM0(sizeof_proc, int, sched_sizeof_proc)
+DEFINE_SHIM0(sizeof_thread, int, sched_sizeof_thread)
+DEFINE_SHIM1(tdname, char *, sched_tdname, struct thread *, td)
+DEFINE_SHIM1(clear_tdname, void, sched_clear_tdname, struct thread *, td)
+DEFINE_SHIM0(do_timer_accounting, bool, sched_do_timer_accounting)
+DEFINE_SHIM1(find_l2_neighbor, int, sched_find_l2_neighbor, int, cpu)
+DEFINE_SHIM0(init_ap, void, schedinit_ap)
+
+
+SCHED_STAT_DEFINE(ithread_demotions, "Interrupt thread priority demotions");
+SCHED_STAT_DEFINE(ithread_preemptions,
+ "Interrupt thread preemptions due to time-sharing");
+
+SDT_PROVIDER_DEFINE(sched);
+
+SDT_PROBE_DEFINE3(sched, , , change__pri, "struct thread *",
+ "struct proc *", "uint8_t");
+SDT_PROBE_DEFINE3(sched, , , dequeue, "struct thread *",
+ "struct proc *", "void *");
+SDT_PROBE_DEFINE4(sched, , , enqueue, "struct thread *",
+ "struct proc *", "void *", "int");
+SDT_PROBE_DEFINE4(sched, , , lend__pri, "struct thread *",
+ "struct proc *", "uint8_t", "struct thread *");
+SDT_PROBE_DEFINE2(sched, , , load__change, "int", "int");
+SDT_PROBE_DEFINE2(sched, , , off__cpu, "struct thread *",
+ "struct proc *");
+SDT_PROBE_DEFINE(sched, , , on__cpu);
+SDT_PROBE_DEFINE(sched, , , remain__cpu);
+SDT_PROBE_DEFINE2(sched, , , surrender, "struct thread *",
+ "struct proc *");
+
+#ifdef KDTRACE_HOOKS
+#include <sys/dtrace_bsd.h>
+int __read_mostly dtrace_vtime_active;
+dtrace_vtime_switch_func_t dtrace_vtime_switch_func;
+#endif
+
+static char sched_name[32] = "ULE";
+
+SET_DECLARE(sched_instance_set, struct sched_selection);
+
+void
+sched_instance_select(void)
+{
+ struct sched_selection *s, **ss;
+ int i;
+
+ TUNABLE_STR_FETCH("kern.sched.name", sched_name, sizeof(sched_name));
+ SET_FOREACH(ss, sched_instance_set) {
+ s = *ss;
+ for (i = 0; s->name[i] == sched_name[i]; i++) {
+ if (s->name[i] == '\0') {
+ active_sched = s->instance;
+ return;
+ }
+ }
+ }
+
+ /*
+ * No scheduler matching the configuration was found. If
+ * there is any scheduler compiled in, at all, use the first
+ * scheduler from the linker set.
+ */
+ if (SET_BEGIN(sched_instance_set) < SET_LIMIT(sched_instance_set)) {
+ s = *SET_BEGIN(sched_instance_set);
+ active_sched = s->instance;
+ for (i = 0;; i++) {
+ sched_name[i] = s->name[i];
+ if (s->name[i] == '\0')
+ break;
+ }
+ }
+}
+
+void
+schedinit(void)
+{
+ if (active_sched == NULL)
+ panic("Cannot find scheduler %s", sched_name);
+ active_sched->init();
+}
+
+static void
+sched_setup(void *dummy)
+{
+ active_sched->setup();
+}
+SYSINIT(sched_setup, SI_SUB_RUN_QUEUE, SI_ORDER_FIRST, sched_setup, NULL);
+
+static void
+sched_initticks(void *dummy)
+{
+ active_sched->initticks();
+}
+SYSINIT(sched_initticks, SI_SUB_CLOCKS, SI_ORDER_THIRD, sched_initticks,
+ NULL);
+
+static void
+sched_schedcpu(void)
+{
+ active_sched->schedcpu();
+}
+SYSINIT(schedcpu, SI_SUB_LAST, SI_ORDER_FIRST, sched_schedcpu, NULL);
+
+SYSCTL_NODE(_kern, OID_AUTO, sched, CTLFLAG_RD | CTLFLAG_MPSAFE, 0,
+ "Scheduler");
+
+SYSCTL_STRING(_kern_sched, OID_AUTO, name, CTLFLAG_RD, sched_name, 0,
+ "Scheduler name");
+
+static int
+sysctl_kern_sched_available(SYSCTL_HANDLER_ARGS)
+{
+ struct sched_selection *s, **ss;
+ struct sbuf *sb, sm;
+ int error;
+ bool first;
+
+ sb = sbuf_new_for_sysctl(&sm, NULL, 0, req);
+ if (sb == NULL)
+ return (ENOMEM);
+ first = true;
+ SET_FOREACH(ss, sched_instance_set) {
+ s = *ss;
+ if (first)
+ first = false;
+ else
+ sbuf_cat(sb, ",");
+ sbuf_cat(sb, s->name);
+ }
+ error = sbuf_finish(sb);
+ sbuf_delete(sb);
+ return (error);
+}
+
+SYSCTL_PROC(_kern_sched, OID_AUTO, available,
+ CTLTYPE_STRING | CTLFLAG_RD | CTLFLAG_MPSAFE,
+ NULL, 0, sysctl_kern_sched_available, "A",
+ "List of available schedulers");
+
+fixpt_t ccpu;
+SYSCTL_UINT(_kern, OID_AUTO, ccpu, CTLFLAG_RD, &ccpu, 0,
+ "Decay factor used for updating %CPU");
diff --git a/sys/kern/sched_ule.c b/sys/kern/sched_ule.c
--- a/sys/kern/sched_ule.c
+++ b/sys/kern/sched_ule.c
@@ -37,12 +37,10 @@
* Isilon Systems and a general lack of creativity on the part of the author.
*/
-#include <sys/cdefs.h>
#include "opt_hwpmc_hooks.h"
#include "opt_hwt_hooks.h"
#include "opt_sched.h"
-#include <sys/param.h>
#include <sys/systm.h>
#include <sys/kdb.h>
#include <sys/kernel.h>
@@ -74,12 +72,6 @@
#include <dev/hwt/hwt_hook.h>
#endif
-#ifdef KDTRACE_HOOKS
-#include <sys/dtrace_bsd.h>
-int __read_mostly dtrace_vtime_active;
-dtrace_vtime_switch_func_t dtrace_vtime_switch_func;
-#endif
-
#include <machine/cpu.h>
#include <machine/smp.h>
@@ -406,36 +398,11 @@
static bool sched_balance_pair(struct tdq *, struct tdq *);
static inline struct tdq *sched_setcpu(struct thread *, int, int);
static inline void thread_unblock_switch(struct thread *, struct mtx *);
-static int sysctl_kern_sched_topology_spec(SYSCTL_HANDLER_ARGS);
-static int sysctl_kern_sched_topology_spec_internal(struct sbuf *sb,
+static int sysctl_kern_sched_ule_topology_spec(SYSCTL_HANDLER_ARGS);
+static int sysctl_kern_sched_ule_topology_spec_internal(struct sbuf *sb,
struct cpu_group *cg, int indent);
#endif
-static void sched_setup(void *dummy);
-SYSINIT(sched_setup, SI_SUB_RUN_QUEUE, SI_ORDER_FIRST, sched_setup, NULL);
-
-static void sched_initticks(void *dummy);
-SYSINIT(sched_initticks, SI_SUB_CLOCKS, SI_ORDER_THIRD, sched_initticks,
- NULL);
-
-SDT_PROVIDER_DEFINE(sched);
-
-SDT_PROBE_DEFINE3(sched, , , change__pri, "struct thread *",
- "struct proc *", "uint8_t");
-SDT_PROBE_DEFINE3(sched, , , dequeue, "struct thread *",
- "struct proc *", "void *");
-SDT_PROBE_DEFINE4(sched, , , enqueue, "struct thread *",
- "struct proc *", "void *", "int");
-SDT_PROBE_DEFINE4(sched, , , lend__pri, "struct thread *",
- "struct proc *", "uint8_t", "struct thread *");
-SDT_PROBE_DEFINE2(sched, , , load__change, "int", "int");
-SDT_PROBE_DEFINE2(sched, , , off__cpu, "struct thread *",
- "struct proc *");
-SDT_PROBE_DEFINE(sched, , , on__cpu);
-SDT_PROBE_DEFINE(sched, , , remain__cpu);
-SDT_PROBE_DEFINE2(sched, , , surrender, "struct thread *",
- "struct proc *");
-
/*
* Print the threads waiting on a run-queue.
*/
@@ -1642,7 +1609,7 @@
* information.
*/
static void
-sched_setup(void *dummy)
+sched_ule_setup(void)
{
struct tdq *tdq;
@@ -1667,7 +1634,7 @@
*/
/* ARGSUSED */
static void
-sched_initticks(void *dummy)
+sched_ule_initticks(void)
{
int incr;
@@ -1891,8 +1858,8 @@
/*
* Called from proc0_init() to setup the scheduler fields.
*/
-void
-schedinit(void)
+static void
+sched_ule_init(void)
{
struct td_sched *ts0;
@@ -1916,8 +1883,8 @@
* TDQ_SELF() relies on the below sched pcpu setting; it may be used only
* after schedinit_ap().
*/
-void
-schedinit_ap(void)
+static void
+sched_ule_init_ap(void)
{
#ifdef SMP
@@ -1931,8 +1898,8 @@
* priority they will switch when their slices run out, which will be
* at most sched_slice stathz ticks.
*/
-int
-sched_rr_interval(void)
+static int
+sched_ule_rr_interval(void)
{
/* Convert sched_slice from stathz to hz. */
@@ -2051,8 +2018,8 @@
* Update a thread's priority when it is lent another thread's
* priority.
*/
-void
-sched_lend_prio(struct thread *td, u_char prio)
+static void
+sched_ule_lend_prio(struct thread *td, u_char prio)
{
td->td_flags |= TDF_BORROWING;
@@ -2067,8 +2034,8 @@
* important than prio, the thread will keep a priority boost
* of prio.
*/
-void
-sched_unlend_prio(struct thread *td, u_char prio)
+static void
+sched_ule_unlend_prio(struct thread *td, u_char prio)
{
u_char base_pri;
@@ -2087,8 +2054,8 @@
/*
* Standard entry for setting the priority to an absolute value.
*/
-void
-sched_prio(struct thread *td, u_char prio)
+static void
+sched_ule_prio(struct thread *td, u_char prio)
{
u_char oldprio;
@@ -2117,8 +2084,8 @@
/*
* Set the base interrupt thread priority.
*/
-void
-sched_ithread_prio(struct thread *td, u_char prio)
+static void
+sched_ule_ithread_prio(struct thread *td, u_char prio)
{
THREAD_LOCK_ASSERT(td, MA_OWNED);
MPASS(td->td_pri_class == PRI_ITHD);
@@ -2129,8 +2096,8 @@
/*
* Set the base user priority, does not effect current running priority.
*/
-void
-sched_user_prio(struct thread *td, u_char prio)
+static void
+sched_ule_user_prio(struct thread *td, u_char prio)
{
td->td_base_user_pri = prio;
@@ -2139,8 +2106,8 @@
td->td_user_pri = prio;
}
-void
-sched_lend_user_prio(struct thread *td, u_char prio)
+static void
+sched_ule_lend_user_prio(struct thread *td, u_char prio)
{
THREAD_LOCK_ASSERT(td, MA_OWNED);
@@ -2155,8 +2122,8 @@
/*
* Like the above but first check if there is anything to do.
*/
-void
-sched_lend_user_prio_cond(struct thread *td, u_char prio)
+static void
+sched_ule_lend_user_prio_cond(struct thread *td, u_char prio)
{
if (td->td_lend_user_pri == prio)
@@ -2327,8 +2294,8 @@
* migrating a thread from one queue to another as running threads may
* be assigned elsewhere via binding.
*/
-void
-sched_switch(struct thread *td, int flags)
+static void
+sched_ule_sswitch(struct thread *td, int flags)
{
struct thread *newtd;
struct tdq *tdq;
@@ -2466,8 +2433,8 @@
/*
* Adjust thread priorities as a result of a nice request.
*/
-void
-sched_nice(struct proc *p, int nice)
+static void
+sched_ule_nice(struct proc *p, int nice)
{
struct thread *td;
@@ -2485,8 +2452,8 @@
/*
* Record the sleep time for the interactivity scorer.
*/
-void
-sched_sleep(struct thread *td, int prio)
+static void
+sched_ule_sleep(struct thread *td, int prio)
{
THREAD_LOCK_ASSERT(td, MA_OWNED);
@@ -2506,8 +2473,8 @@
*
* Requires the thread lock on entry, drops on exit.
*/
-void
-sched_wakeup(struct thread *td, int srqflags)
+static void
+sched_ule_wakeup(struct thread *td, int srqflags)
{
struct td_sched *ts;
int slptick;
@@ -2546,8 +2513,8 @@
* Penalize the parent for creating a new child and initialize the child's
* priority.
*/
-void
-sched_fork(struct thread *td, struct thread *child)
+static void
+sched_ule_fork(struct thread *td, struct thread *child)
{
THREAD_LOCK_ASSERT(td, MA_OWNED);
sched_pctcpu_update(td_get_sched(td), 1);
@@ -2565,8 +2532,8 @@
/*
* Fork a new thread, may be within the same process.
*/
-void
-sched_fork_thread(struct thread *td, struct thread *child)
+static void
+sched_ule_fork_thread(struct thread *td, struct thread *child)
{
struct td_sched *ts;
struct td_sched *ts2;
@@ -2611,8 +2578,8 @@
/*
* Adjust the priority class of a thread.
*/
-void
-sched_class(struct thread *td, int class)
+static void
+sched_ule_class(struct thread *td, int class)
{
THREAD_LOCK_ASSERT(td, MA_OWNED);
@@ -2624,8 +2591,8 @@
/*
* Return some of the child's priority and interactivity to the parent.
*/
-void
-sched_exit(struct proc *p, struct thread *child)
+static void
+sched_ule_exit(struct proc *p, struct thread *child)
{
struct thread *td;
@@ -2642,8 +2609,8 @@
* jobs such as make. This has little effect on the make process itself but
* causes new processes spawned by it to receive worse scores immediately.
*/
-void
-sched_exit_thread(struct thread *td, struct thread *child)
+static void
+sched_ule_exit_thread(struct thread *td, struct thread *child)
{
KTR_STATE1(KTR_SCHED, "thread", sched_tdname(child), "thread exit",
@@ -2660,8 +2627,8 @@
thread_unlock(td);
}
-void
-sched_preempt(struct thread *td)
+static void
+sched_ule_preempt(struct thread *td)
{
struct tdq *tdq;
int flags;
@@ -2691,8 +2658,8 @@
* Fix priorities on return to user-space. Priorities may be elevated due
* to static priorities in msleep() or similar.
*/
-void
-sched_userret_slowpath(struct thread *td)
+static void
+sched_ule_userret_slowpath(struct thread *td)
{
thread_lock(td);
@@ -2702,10 +2669,6 @@
thread_unlock(td);
}
-SCHED_STAT_DEFINE(ithread_demotions, "Interrupt thread priority demotions");
-SCHED_STAT_DEFINE(ithread_preemptions,
- "Interrupt thread preemptions due to time-sharing");
-
/*
* Return time slice for a given thread. For ithreads this is
* sched_slice. For other threads it is tdq_slice(tdq).
@@ -2722,8 +2685,8 @@
* Handle a stathz tick. This is really only relevant for timeshare
* and interrupt threads.
*/
-void
-sched_clock(struct thread *td, int cnt)
+static void
+sched_ule_clock(struct thread *td, int cnt)
{
struct tdq *tdq;
struct td_sched *ts;
@@ -2808,8 +2771,8 @@
}
}
-u_int
-sched_estcpu(struct thread *td __unused)
+static u_int
+sched_ule_estcpu(struct thread *td __unused)
{
return (0);
@@ -2819,8 +2782,8 @@
* Return whether the current CPU has runnable tasks. Used for in-kernel
* cooperative idle threads.
*/
-bool
-sched_runnable(void)
+static bool
+sched_ule_runnable(void)
{
struct tdq *tdq;
@@ -2832,8 +2795,8 @@
* Choose the highest priority thread to run. The thread is removed from
* the run-queue while running however the load remains.
*/
-struct thread *
-sched_choose(void)
+static struct thread *
+sched_ule_choose(void)
{
struct thread *td;
struct tdq *tdq;
@@ -2909,8 +2872,8 @@
*
* Requires the thread lock on entry, drops on exit.
*/
-void
-sched_add(struct thread *td, int flags)
+static void
+sched_ule_add(struct thread *td, int flags)
{
struct tdq *tdq;
#ifdef SMP
@@ -2969,8 +2932,8 @@
* when we're stealing a thread from a remote queue. Otherwise all threads
* exit by calling sched_exit_thread() and sched_throw() themselves.
*/
-void
-sched_rem(struct thread *td)
+static void
+sched_ule_rem(struct thread *td)
{
struct tdq *tdq;
@@ -2992,8 +2955,8 @@
/*
* Fetch cpu utilization information. Updates on demand.
*/
-fixpt_t
-sched_pctcpu(struct thread *td)
+static fixpt_t
+sched_ule_pctcpu(struct thread *td)
{
struct td_sched *ts;
u_int len;
@@ -3014,8 +2977,8 @@
* Enforce affinity settings for a thread. Called after adjustments to
* cpumask.
*/
-void
-sched_affinity(struct thread *td)
+static void
+sched_ule_affinity(struct thread *td)
{
#ifdef SMP
struct td_sched *ts;
@@ -3045,8 +3008,8 @@
/*
* Bind a thread to a target cpu.
*/
-void
-sched_bind(struct thread *td, int cpu)
+static void
+sched_ule_bind(struct thread *td, int cpu)
{
struct td_sched *ts;
@@ -3069,8 +3032,8 @@
/*
* Release a bound thread.
*/
-void
-sched_unbind(struct thread *td)
+static void
+sched_ule_unbind(struct thread *td)
{
struct td_sched *ts;
@@ -3083,8 +3046,8 @@
sched_unpin();
}
-int
-sched_is_bound(struct thread *td)
+static int
+sched_ule_is_bound(struct thread *td)
{
THREAD_LOCK_ASSERT(td, MA_OWNED);
return (td_get_sched(td)->ts_flags & TSF_BOUND);
@@ -3093,8 +3056,8 @@
/*
* Basic yield call.
*/
-void
-sched_relinquish(struct thread *td)
+static void
+sched_ule_relinquish(struct thread *td)
{
thread_lock(td);
mi_switch(SW_VOL | SWT_RELINQUISH);
@@ -3103,8 +3066,8 @@
/*
* Return the total system load.
*/
-int
-sched_load(void)
+static int
+sched_ule_load(void)
{
#ifdef SMP
int total;
@@ -3119,14 +3082,14 @@
#endif
}
-int
-sched_sizeof_proc(void)
+static int
+sched_ule_sizeof_proc(void)
{
return (sizeof(struct proc));
}
-int
-sched_sizeof_thread(void)
+static int
+sched_ule_sizeof_thread(void)
{
return (sizeof(struct thread) + sizeof(struct td_sched));
}
@@ -3141,8 +3104,8 @@
/*
* The actual idle process.
*/
-void
-sched_idletd(void *dummy)
+static void
+sched_ule_idletd(void *dummy)
{
struct thread *td;
struct tdq *tdq;
@@ -3244,8 +3207,8 @@
/*
* A CPU is entering for the first time.
*/
-void
-sched_ap_entry(void)
+static void
+sched_ule_ap_entry(void)
{
struct thread *newtd;
struct tdq *tdq;
@@ -3274,8 +3237,8 @@
/*
* A thread is exiting.
*/
-void
-sched_throw(struct thread *td)
+static void
+sched_ule_throw(struct thread *td)
{
struct thread *newtd;
struct tdq *tdq;
@@ -3305,8 +3268,8 @@
* This is called from fork_exit(). Just acquire the correct locks and
* let fork do the rest of the work.
*/
-void
-sched_fork_exit(struct thread *td)
+static void
+sched_ule_fork_exit(struct thread *td)
{
struct tdq *tdq;
int cpuid;
@@ -3331,8 +3294,8 @@
/*
* Create on first use to catch odd startup conditions.
*/
-char *
-sched_tdname(struct thread *td)
+static char *
+sched_ule_tdname(struct thread *td)
{
#ifdef KTR
struct td_sched *ts;
@@ -3347,17 +3310,148 @@
#endif
}
-#ifdef KTR
-void
-sched_clear_tdname(struct thread *td)
+static void
+sched_ule_clear_tdname(struct thread *td)
{
+#ifdef KTR
struct td_sched *ts;
ts = td_get_sched(td);
ts->ts_name[0] = '\0';
+#endif
+}
+
+static void
+sched_ule_schedcpu(void)
+{
+}
+
+static bool
+sched_ule_do_timer_accounting(void)
+{
+ return (true);
+}
+
+#ifdef SMP
+static int
+sched_ule_find_child_with_core(int cpu, struct cpu_group *grp)
+{
+ int i;
+
+ if (grp->cg_children == 0)
+ return (-1);
+
+ MPASS(grp->cg_child);
+ for (i = 0; i < grp->cg_children; i++) {
+ if (CPU_ISSET(cpu, &grp->cg_child[i].cg_mask))
+ return (i);
+ }
+
+ return (-1);
+}
+
+static int
+sched_ule_find_l2_neighbor(int cpu)
+{
+ struct cpu_group *grp;
+ int i;
+
+ grp = cpu_top;
+ if (grp == NULL)
+ return (-1);
+
+ /*
+ * Find the smallest CPU group that contains the given core.
+ */
+ i = 0;
+ while ((i = sched_ule_find_child_with_core(cpu, grp)) != -1) {
+ /*
+ * If the smallest group containing the given CPU has less
+ * than two members, we conclude the given CPU has no
+ * L2 neighbor.
+ */
+ if (grp->cg_child[i].cg_count <= 1)
+ return (-1);
+ grp = &grp->cg_child[i];
+ }
+
+ /* Must share L2. */
+ if (grp->cg_level > CG_SHARE_L2 || grp->cg_level == CG_SHARE_NONE)
+ return (-1);
+
+ /*
+ * Select the first member of the set that isn't the reference
+ * CPU, which at this point is guaranteed to exist.
+ */
+ for (i = 0; i < CPU_SETSIZE; i++) {
+ if (CPU_ISSET(i, &grp->cg_mask) && i != cpu)
+ return (i);
+ }
+
+ /* Should never be reached */
+ return (-1);
+}
+#else
+static int
+sched_ule_find_l2_neighbor(int cpu)
+{
+ return (-1);
}
#endif
+struct sched_instance sched_ule_instance = {
+#define SLOT(name) .name = sched_ule_##name
+ SLOT(load),
+ SLOT(rr_interval),
+ SLOT(runnable),
+ SLOT(exit),
+ SLOT(fork),
+ SLOT(fork_exit),
+ SLOT(class),
+ SLOT(nice),
+ SLOT(ap_entry),
+ SLOT(exit_thread),
+ SLOT(estcpu),
+ SLOT(fork_thread),
+ SLOT(ithread_prio),
+ SLOT(lend_prio),
+ SLOT(lend_user_prio),
+ SLOT(lend_user_prio_cond),
+ SLOT(pctcpu),
+ SLOT(prio),
+ SLOT(sleep),
+ SLOT(sswitch),
+ SLOT(throw),
+ SLOT(unlend_prio),
+ SLOT(user_prio),
+ SLOT(userret_slowpath),
+ SLOT(add),
+ SLOT(choose),
+ SLOT(clock),
+ SLOT(idletd),
+ SLOT(preempt),
+ SLOT(relinquish),
+ SLOT(rem),
+ SLOT(wakeup),
+ SLOT(bind),
+ SLOT(unbind),
+ SLOT(is_bound),
+ SLOT(affinity),
+ SLOT(sizeof_proc),
+ SLOT(sizeof_thread),
+ SLOT(tdname),
+ SLOT(clear_tdname),
+ SLOT(do_timer_accounting),
+ SLOT(find_l2_neighbor),
+ SLOT(init),
+ SLOT(init_ap),
+ SLOT(setup),
+ SLOT(initticks),
+ SLOT(schedcpu),
+#undef SLOT
+};
+DECLARE_SCHEDULER(ule_sched_selector, "ULE", &sched_ule_instance);
+
#ifdef SMP
/*
@@ -3365,8 +3459,8 @@
* the topology tree.
*/
static int
-sysctl_kern_sched_topology_spec_internal(struct sbuf *sb, struct cpu_group *cg,
- int indent)
+sysctl_kern_sched_ule_topology_spec_internal(struct sbuf *sb,
+ struct cpu_group *cg, int indent)
{
char cpusetbuf[CPUSETBUFSIZ];
int i, first;
@@ -3403,7 +3497,7 @@
if (cg->cg_children > 0) {
sbuf_printf(sb, "%*s <children>\n", indent, "");
for (i = 0; i < cg->cg_children; i++)
- sysctl_kern_sched_topology_spec_internal(sb,
+ sysctl_kern_sched_ule_topology_spec_internal(sb,
&cg->cg_child[i], indent+2);
sbuf_printf(sb, "%*s </children>\n", indent, "");
}
@@ -3416,19 +3510,20 @@
* the recursive sysctl_kern_smp_topology_spec_internal().
*/
static int
-sysctl_kern_sched_topology_spec(SYSCTL_HANDLER_ARGS)
+sysctl_kern_sched_ule_topology_spec(SYSCTL_HANDLER_ARGS)
{
struct sbuf *topo;
int err;
- KASSERT(cpu_top != NULL, ("cpu_top isn't initialized"));
+ if (cpu_top == NULL)
+ return (ENOTTY);
topo = sbuf_new_for_sysctl(NULL, NULL, 512, req);
if (topo == NULL)
return (ENOMEM);
sbuf_cat(topo, "<groups>\n");
- err = sysctl_kern_sched_topology_spec_internal(topo, cpu_top, 1);
+ err = sysctl_kern_sched_ule_topology_spec_internal(topo, cpu_top, 1);
sbuf_cat(topo, "</groups>\n");
if (err == 0) {
@@ -3459,51 +3554,51 @@
return (0);
}
-SYSCTL_NODE(_kern, OID_AUTO, sched, CTLFLAG_RW | CTLFLAG_MPSAFE, 0,
- "Scheduler");
-SYSCTL_STRING(_kern_sched, OID_AUTO, name, CTLFLAG_RD, "ULE", 0,
- "Scheduler name");
-SYSCTL_PROC(_kern_sched, OID_AUTO, quantum,
+SYSCTL_NODE(_kern_sched, OID_AUTO, ule, CTLFLAG_RD | CTLFLAG_MPSAFE, 0,
+ "ULE Scheduler");
+
+SYSCTL_PROC(_kern_sched_ule, OID_AUTO, quantum,
CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE, NULL, 0,
sysctl_kern_quantum, "I",
"Quantum for timeshare threads in microseconds");
-SYSCTL_INT(_kern_sched, OID_AUTO, slice, CTLFLAG_RW, &sched_slice, 0,
+SYSCTL_INT(_kern_sched_ule, OID_AUTO, slice, CTLFLAG_RW, &sched_slice, 0,
"Quantum for timeshare threads in stathz ticks");
-SYSCTL_UINT(_kern_sched, OID_AUTO, interact, CTLFLAG_RWTUN, &sched_interact, 0,
+SYSCTL_UINT(_kern_sched_ule, OID_AUTO, interact, CTLFLAG_RWTUN, &sched_interact, 0,
"Interactivity score threshold");
-SYSCTL_INT(_kern_sched, OID_AUTO, preempt_thresh, CTLFLAG_RWTUN,
+SYSCTL_INT(_kern_sched_ule, OID_AUTO, preempt_thresh, CTLFLAG_RWTUN,
&preempt_thresh, 0,
"Maximal (lowest) priority for preemption");
-SYSCTL_INT(_kern_sched, OID_AUTO, static_boost, CTLFLAG_RWTUN, &static_boost, 0,
+SYSCTL_INT(_kern_sched_ule, OID_AUTO, static_boost, CTLFLAG_RWTUN,
+ &static_boost, 0,
"Assign static kernel priorities to sleeping threads");
-SYSCTL_INT(_kern_sched, OID_AUTO, idlespins, CTLFLAG_RWTUN, &sched_idlespins, 0,
+SYSCTL_INT(_kern_sched_ule, OID_AUTO, idlespins, CTLFLAG_RWTUN,
+ &sched_idlespins, 0,
"Number of times idle thread will spin waiting for new work");
-SYSCTL_INT(_kern_sched, OID_AUTO, idlespinthresh, CTLFLAG_RW,
+SYSCTL_INT(_kern_sched_ule, OID_AUTO, idlespinthresh, CTLFLAG_RW,
&sched_idlespinthresh, 0,
"Threshold before we will permit idle thread spinning");
#ifdef SMP
-SYSCTL_INT(_kern_sched, OID_AUTO, affinity, CTLFLAG_RW, &affinity, 0,
+SYSCTL_INT(_kern_sched_ule, OID_AUTO, affinity, CTLFLAG_RW, &affinity, 0,
"Number of hz ticks to keep thread affinity for");
-SYSCTL_INT(_kern_sched, OID_AUTO, balance, CTLFLAG_RWTUN, &rebalance, 0,
+SYSCTL_INT(_kern_sched_ule, OID_AUTO, balance, CTLFLAG_RWTUN, &rebalance, 0,
"Enables the long-term load balancer");
-SYSCTL_INT(_kern_sched, OID_AUTO, balance_interval, CTLFLAG_RW,
+SYSCTL_INT(_kern_sched_ule, OID_AUTO, balance_interval, CTLFLAG_RW,
&balance_interval, 0,
"Average period in stathz ticks to run the long-term balancer");
-SYSCTL_INT(_kern_sched, OID_AUTO, steal_idle, CTLFLAG_RWTUN, &steal_idle, 0,
+SYSCTL_INT(_kern_sched_ule, OID_AUTO, steal_idle, CTLFLAG_RWTUN,
+ &steal_idle, 0,
"Attempts to steal work from other cores before idling");
-SYSCTL_INT(_kern_sched, OID_AUTO, steal_thresh, CTLFLAG_RWTUN, &steal_thresh, 0,
+SYSCTL_INT(_kern_sched_ule, OID_AUTO, steal_thresh, CTLFLAG_RWTUN,
+ &steal_thresh, 0,
"Minimum load on remote CPU before we'll steal");
-SYSCTL_INT(_kern_sched, OID_AUTO, trysteal_limit, CTLFLAG_RWTUN,
+SYSCTL_INT(_kern_sched_ule, OID_AUTO, trysteal_limit, CTLFLAG_RWTUN,
&trysteal_limit, 0,
"Topological distance limit for stealing threads in sched_switch()");
-SYSCTL_INT(_kern_sched, OID_AUTO, always_steal, CTLFLAG_RWTUN, &always_steal, 0,
+SYSCTL_INT(_kern_sched_ule, OID_AUTO, always_steal, CTLFLAG_RWTUN,
+ &always_steal, 0,
"Always run the stealer from the idle thread");
-SYSCTL_PROC(_kern_sched, OID_AUTO, topology_spec, CTLTYPE_STRING |
- CTLFLAG_MPSAFE | CTLFLAG_RD, NULL, 0, sysctl_kern_sched_topology_spec, "A",
+SYSCTL_PROC(_kern_sched_ule, OID_AUTO, topology_spec, CTLTYPE_STRING |
+ CTLFLAG_MPSAFE | CTLFLAG_RD, NULL, 0,
+ sysctl_kern_sched_ule_topology_spec, "A",
"XML dump of detected CPU topology");
#endif
-
-/* ps compat. All cpu percentages from ULE are weighted. */
-static int ccpu = 0;
-SYSCTL_INT(_kern, OID_AUTO, ccpu, CTLFLAG_RD, &ccpu, 0,
- "Decay factor used for updating %CPU in 4BSD scheduler");
diff --git a/sys/net/iflib.c b/sys/net/iflib.c
--- a/sys/net/iflib.c
+++ b/sys/net/iflib.c
@@ -29,7 +29,6 @@
#include "opt_inet.h"
#include "opt_inet6.h"
#include "opt_acpi.h"
-#include "opt_sched.h"
#include <sys/param.h>
#include <sys/types.h>
@@ -40,8 +39,10 @@
#include <sys/mutex.h>
#include <sys/module.h>
#include <sys/kobj.h>
+#include <sys/proc.h>
#include <sys/rman.h>
#include <sys/sbuf.h>
+#include <sys/sched.h>
#include <sys/smp.h>
#include <sys/socket.h>
#include <sys/sockio.h>
@@ -4813,83 +4814,6 @@
return (cpuid);
}
-#if defined(SMP) && defined(SCHED_ULE)
-extern struct cpu_group *cpu_top; /* CPU topology */
-
-static int
-find_child_with_core(int cpu, struct cpu_group *grp)
-{
- int i;
-
- if (grp->cg_children == 0)
- return (-1);
-
- MPASS(grp->cg_child);
- for (i = 0; i < grp->cg_children; i++) {
- if (CPU_ISSET(cpu, &grp->cg_child[i].cg_mask))
- return (i);
- }
-
- return (-1);
-}
-
-
-/*
- * Find an L2 neighbor of the given CPU or return -1 if none found. This
- * does not distinguish among multiple L2 neighbors if the given CPU has
- * more than one (it will always return the same result in that case).
- */
-static int
-find_l2_neighbor(int cpu)
-{
- struct cpu_group *grp;
- int i;
-
- grp = cpu_top;
- if (grp == NULL)
- return (-1);
-
- /*
- * Find the smallest CPU group that contains the given core.
- */
- i = 0;
- while ((i = find_child_with_core(cpu, grp)) != -1) {
- /*
- * If the smallest group containing the given CPU has less
- * than two members, we conclude the given CPU has no
- * L2 neighbor.
- */
- if (grp->cg_child[i].cg_count <= 1)
- return (-1);
- grp = &grp->cg_child[i];
- }
-
- /* Must share L2. */
- if (grp->cg_level > CG_SHARE_L2 || grp->cg_level == CG_SHARE_NONE)
- return (-1);
-
- /*
- * Select the first member of the set that isn't the reference
- * CPU, which at this point is guaranteed to exist.
- */
- for (i = 0; i < CPU_SETSIZE; i++) {
- if (CPU_ISSET(i, &grp->cg_mask) && i != cpu)
- return (i);
- }
-
- /* Should never be reached */
- return (-1);
-}
-
-#else
-static int
-find_l2_neighbor(int cpu)
-{
-
- return (-1);
-}
-#endif
-
/*
* CPU mapping behaviors
* ---------------------
@@ -4942,7 +4866,7 @@
unsigned int rx_cpuid;
rx_cpuid = cpuid_advance(ctx, base_cpuid, qid);
- l2_neighbor = find_l2_neighbor(rx_cpuid);
+ l2_neighbor = sched_find_l2_neighbor(rx_cpuid);
if (l2_neighbor != -1) {
return (l2_neighbor);
}
diff --git a/sys/powerpc/powerpc/machdep.c b/sys/powerpc/powerpc/machdep.c
--- a/sys/powerpc/powerpc/machdep.c
+++ b/sys/powerpc/powerpc/machdep.c
@@ -83,6 +83,7 @@
#include <sys/reboot.h>
#include <sys/reg.h>
#include <sys/rwlock.h>
+#include <sys/sched.h>
#include <sys/signalvar.h>
#include <sys/syscallsubr.h>
#include <sys/sysctl.h>
@@ -467,6 +468,7 @@
* Bring up MMU
*/
pmap_mmu_init();
+ sched_instance_select();
link_elf_ireloc();
pmap_bootstrap(startkernel, endkernel);
mtmsr(psl_kernset & ~PSL_EE);
diff --git a/sys/powerpc/powerpc/swtch32.S b/sys/powerpc/powerpc/swtch32.S
--- a/sys/powerpc/powerpc/swtch32.S
+++ b/sys/powerpc/powerpc/swtch32.S
@@ -56,7 +56,6 @@
*/
#include "assym.inc"
-#include "opt_sched.h"
#include <sys/syscall.h>
@@ -125,7 +124,7 @@
sync /* Make sure all of that finished */
cpu_switchin:
-#if defined(SMP) && defined(SCHED_ULE)
+#if defined(SMP)
/* Wait for the new thread to become unblocked */
bl 1f
1:
diff --git a/sys/powerpc/powerpc/swtch64.S b/sys/powerpc/powerpc/swtch64.S
--- a/sys/powerpc/powerpc/swtch64.S
+++ b/sys/powerpc/powerpc/swtch64.S
@@ -56,7 +56,6 @@
*/
#include "assym.inc"
-#include "opt_sched.h"
#include <sys/syscall.h>
@@ -187,7 +186,7 @@
sync /* Make sure all of that finished */
cpu_switchin:
-#if defined(SMP) && defined(SCHED_ULE)
+#if defined(SMP)
/* Wait for the new thread to become unblocked */
addis %r6,%r2,TOC_REF(blocked_lock)@ha
ld %r6,TOC_REF(blocked_lock)@l(%r6)
diff --git a/sys/riscv/include/ifunc.h b/sys/riscv/include/ifunc.h
--- a/sys/riscv/include/ifunc.h
+++ b/sys/riscv/include/ifunc.h
@@ -30,6 +30,8 @@
#ifndef __RISCV_IFUNC_H
#define __RISCV_IFUNC_H
+#define __DO_NOT_HAVE_SYS_IFUNCS 1
+
#define DEFINE_IFUNC(qual, ret_type, name, args) \
static ret_type (*name##_resolver(void))args __used; \
qual ret_type name args __attribute__((ifunc(#name "_resolver"))); \
diff --git a/sys/riscv/riscv/machdep.c b/sys/riscv/riscv/machdep.c
--- a/sys/riscv/riscv/machdep.c
+++ b/sys/riscv/riscv/machdep.c
@@ -479,6 +479,8 @@
/* Initialize preload_kmdp */
preload_initkmdp(true);
+ sched_instance_select();
+ /* link_elf_ireloc(); */
/* Read the boot metadata */
boothowto = MD_FETCH(preload_kmdp, MODINFOMD_HOWTO, int);
diff --git a/sys/riscv/riscv/swtch.S b/sys/riscv/riscv/swtch.S
--- a/sys/riscv/riscv/swtch.S
+++ b/sys/riscv/riscv/swtch.S
@@ -33,7 +33,6 @@
*/
#include "assym.inc"
-#include "opt_sched.h"
#include <machine/param.h>
#include <machine/asm.h>
@@ -315,7 +314,7 @@
/* Release the old thread */
sd s2, TD_LOCK(s0)
-#if defined(SCHED_ULE) && defined(SMP)
+#if defined(SMP)
/* Spin if TD_LOCK points to a blocked_lock */
la s2, _C_LABEL(blocked_lock)
1:
diff --git a/sys/sys/proc.h b/sys/sys/proc.h
--- a/sys/sys/proc.h
+++ b/sys/sys/proc.h
@@ -1173,7 +1173,6 @@
void kern_yield(int);
void killjobc(void);
int leavepgrp(struct proc *p);
-int maybe_preempt(struct thread *td);
void maybe_yield(void);
void mi_switch(int flags);
int p_candebug(struct thread *td, struct proc *p);
diff --git a/sys/sys/sched.h b/sys/sys/sched.h
--- a/sys/sys/sched.h
+++ b/sys/sys/sched.h
@@ -68,6 +68,8 @@
#ifdef SCHED_STATS
#include <sys/pcpu.h>
#endif
+#include <sys/linker_set.h>
+#include <sys/sdt.h>
struct proc;
struct thread;
@@ -114,11 +116,6 @@
void sched_unlend_prio(struct thread *td, u_char prio);
void sched_user_prio(struct thread *td, u_char prio);
void sched_userret_slowpath(struct thread *td);
-#ifdef RACCT
-#ifdef SCHED_4BSD
-fixpt_t sched_pctcpu_delta(struct thread *td);
-#endif
-#endif
static inline void
sched_userret(struct thread *td)
@@ -174,9 +171,7 @@
* functions.
*/
char *sched_tdname(struct thread *td);
-#ifdef KTR
void sched_clear_tdname(struct thread *td);
-#endif
static __inline void
sched_pin(void)
@@ -221,6 +216,10 @@
#define SCHED_STAT_DEFINE(name, descr) \
DPCPU_DEFINE(unsigned long, name); \
SCHED_STAT_DEFINE_VAR(name, &DPCPU_NAME(name), descr)
+
+#define SCHED_STAT_DECLARE(name) \
+ DPCPU_DECLARE(unsigned long, name);
+
/*
* Sched stats are always incremented in critical sections so no atomic
* is necessary to increment them.
@@ -229,9 +228,29 @@
#else
#define SCHED_STAT_DEFINE_VAR(name, descr, ptr)
#define SCHED_STAT_DEFINE(name, descr)
+#define SCHED_STAT_DECLARE(name)
#define SCHED_STAT_INC(var) (void)0
#endif
+SCHED_STAT_DECLARE(ithread_demotions);
+SCHED_STAT_DECLARE(ithread_preemptions);
+
+SDT_PROBE_DECLARE(sched, , , change__pri);
+SDT_PROBE_DECLARE(sched, , , dequeue);
+SDT_PROBE_DECLARE(sched, , , enqueue);
+SDT_PROBE_DECLARE(sched, , , lend__pri);
+SDT_PROBE_DECLARE(sched, , , load__change);
+SDT_PROBE_DECLARE(sched, , , off__cpu);
+SDT_PROBE_DECLARE(sched, , , on__cpu);
+SDT_PROBE_DECLARE(sched, , , remain__cpu);
+SDT_PROBE_DECLARE(sched, , , surrender);
+
+#ifdef KDTRACE_HOOKS
+#include <sys/dtrace_bsd.h>
+extern int dtrace_vtime_active;
+extern dtrace_vtime_switch_func_t dtrace_vtime_switch_func;
+#endif
+
/*
* Fixup scheduler state for proc0 and thread0
*/
@@ -241,6 +260,81 @@
* Fixup scheduler state for secondary APs
*/
void schedinit_ap(void);
+
+bool sched_do_timer_accounting(void);
+
+/*
+ * Find an L2 neighbor of the given CPU or return -1 if none found. This
+ * does not distinguish among multiple L2 neighbors if the given CPU has
+ * more than one (it will always return the same result in that case).
+ */
+int sched_find_l2_neighbor(int cpu);
+
+struct sched_instance {
+ int (*load)(void);
+ int (*rr_interval)(void);
+ bool (*runnable)(void);
+ void (*exit)(struct proc *p, struct thread *childtd);
+ void (*fork)(struct thread *td, struct thread *childtd);
+ void (*fork_exit)(struct thread *td);
+ void (*class)(struct thread *td, int class);
+ void (*nice)(struct proc *p, int nice);
+ void (*ap_entry)(void);
+ void (*exit_thread)(struct thread *td, struct thread *child);
+ u_int (*estcpu)(struct thread *td);
+ void (*fork_thread)(struct thread *td, struct thread *child);
+ void (*ithread_prio)(struct thread *td, u_char prio);
+ void (*lend_prio)(struct thread *td, u_char prio);
+ void (*lend_user_prio)(struct thread *td, u_char pri);
+ void (*lend_user_prio_cond)(struct thread *td, u_char pri);
+ fixpt_t (*pctcpu)(struct thread *td);
+ void (*prio)(struct thread *td, u_char prio);
+ void (*sleep)(struct thread *td, int prio);
+ void (*sswitch)(struct thread *td, int flags);
+ void (*throw)(struct thread *td);
+ void (*unlend_prio)(struct thread *td, u_char prio);
+ void (*user_prio)(struct thread *td, u_char prio);
+ void (*userret_slowpath)(struct thread *td);
+ void (*add)(struct thread *td, int flags);
+ struct thread *(*choose)(void);
+ void (*clock)(struct thread *td, int cnt);
+ void (*idletd)(void *);
+ void (*preempt)(struct thread *td);
+ void (*relinquish)(struct thread *td);
+ void (*rem)(struct thread *td);
+ void (*wakeup)(struct thread *td, int srqflags);
+ void (*bind)(struct thread *td, int cpu);
+ void (*unbind)(struct thread *td);
+ int (*is_bound)(struct thread *td);
+ void (*affinity)(struct thread *td);
+ int (*sizeof_proc)(void);
+ int (*sizeof_thread)(void);
+ char *(*tdname)(struct thread *td);
+ void (*clear_tdname)(struct thread *td);
+ bool (*do_timer_accounting)(void);
+ int (*find_l2_neighbor)(int cpuid);
+ void (*init)(void);
+ void (*init_ap)(void);
+ void (*setup)(void);
+ void (*initticks)(void);
+ void (*schedcpu)(void);
+};
+
+extern const struct sched_instance *active_sched;
+
+struct sched_selection {
+ const char *name;
+ const struct sched_instance *instance;
+};
+#define DECLARE_SCHEDULER(xsel_name, xsched_name, xsched_instance) \
+ static struct sched_selection xsel_name = { \
+ .name = xsched_name, \
+ .instance = xsched_instance, \
+ }; \
+ DATA_SET(sched_instance_set, xsel_name);
+
+void sched_instance_select(void);
+
#endif /* _KERNEL */
/* POSIX 1003.1b Process Scheduling */
diff --git a/sys/x86/x86/cpu_machdep.c b/sys/x86/x86/cpu_machdep.c
--- a/sys/x86/x86/cpu_machdep.c
+++ b/sys/x86/x86/cpu_machdep.c
@@ -47,7 +47,6 @@
#include "opt_kstack_pages.h"
#include "opt_maxmem.h"
#include "opt_platform.h"
-#include "opt_sched.h"
#ifdef __i386__
#include "opt_apic.h"
#endif
@@ -543,9 +542,7 @@
* is visible before calling cpu_idle_wakeup().
*/
atomic_store_int(statep, newstate);
-#if defined(SCHED_ULE) && defined(SMP)
atomic_thread_fence_seq_cst();
-#endif
/*
* Since we may be in a critical section from cpu_idle(), if
diff --git a/sys/x86/x86/local_apic.c b/sys/x86/x86/local_apic.c
--- a/sys/x86/x86/local_apic.c
+++ b/sys/x86/x86/local_apic.c
@@ -1443,21 +1443,8 @@
kmsan_mark(frame, sizeof(*frame), KMSAN_STATE_INITED);
trap_check_kstack();
-#if defined(SMP) && !defined(SCHED_ULE)
- /*
- * Don't do any accounting for the disabled HTT cores, since it
- * will provide misleading numbers for the userland.
- *
- * No locking is necessary here, since even if we lose the race
- * when hlt_cpus_mask changes it is not a big deal, really.
- *
- * Don't do that for ULE, since ULE doesn't consider hlt_cpus_mask
- * and unlike other schedulers it actually schedules threads to
- * those CPUs.
- */
- if (CPU_ISSET(PCPU_GET(cpuid), &hlt_cpus_mask))
+ if (!sched_do_timer_accounting())
return;
-#endif
/* Look up our local APIC structure for the tick counters. */
la = &lapics[PCPU_GET(apic_id)];
File Metadata
Details
Attached
Mime Type
text/plain
Expires
Mon, Jan 26, 2:30 PM (15 h, 44 m)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
27971439
Default Alt Text
D54831.diff (62 KB)
Attached To
Mode
D54831: Make ULE and 4BSD coexists
Attached
Detach File
Event Timeline
Log In to Comment