D54831.diff
No OneTemporary
Actions

Size

62 KB

Referenced Files

None

Subscribers

None

D54831.diff
View Options

	diff --git a/sys/amd64/amd64/cpu_switch.S b/sys/amd64/amd64/cpu_switch.S
	--- a/sys/amd64/amd64/cpu_switch.S
	+++ b/sys/amd64/amd64/cpu_switch.S
	@@ -35,7 +35,6 @@
	#include <machine/specialreg.h>

	#include "assym.inc"
	-#include "opt_sched.h"

	/*****************************************************************************/
	/* Scheduling */
	@@ -136,13 +135,11 @@
	movq %r15,TD_LOCK(%r13) /* Release the old thread */
	sw1:
	leaq TD_MD_PCB(%r12),%r8
	-#if defined(SCHED_ULE)
	movq $blocked_lock, %rdx
	movq TD_LOCK(%r12),%rcx
	cmpq %rcx, %rdx
	je sw1wait
	sw1cont:
	-#endif
	/*
	* At this point, we've switched address spaces and are ready
	* to load up the rest of the next context.
	@@ -492,7 +489,6 @@
	END(resumectx)

	/* Wait for the new thread to become unblocked */
	-#if defined(SCHED_ULE)
	sw1wait:
	1:
	pause
	@@ -500,4 +496,3 @@
	cmpq %rcx, %rdx
	je 1b
	jmp sw1cont
	-#endif
	diff --git a/sys/amd64/amd64/machdep.c b/sys/amd64/amd64/machdep.c
	--- a/sys/amd64/amd64/machdep.c
	+++ b/sys/amd64/amd64/machdep.c
	@@ -322,7 +322,6 @@
	}
	SYSINIT(late_ifunc_resolve, SI_SUB_CPU, SI_ORDER_ANY, late_ifunc_resolve, NULL);

	-
	void
	cpu_setregs(void)
	{
	@@ -1353,6 +1352,8 @@
	TUNABLE_INT_FETCH("hw.use_xsave", &use_xsave);
	}

	+ sched_instance_select();
	+
	link_elf_ireloc();

	/*
	diff --git a/sys/amd64/conf/GENERIC b/sys/amd64/conf/GENERIC
	--- a/sys/amd64/conf/GENERIC
	+++ b/sys/amd64/conf/GENERIC
	@@ -24,6 +24,7 @@
	makeoptions WITH_CTF=1 # Run ctfconvert(1) for DTrace support

	options SCHED_ULE # ULE scheduler
	+options SCHED_4BSD # Original 4.xBSD scheduler
	options NUMA # Non-Uniform Memory Architecture support
	options PREEMPTION # Enable kernel thread preemption
	options EXTERR_STRINGS
	diff --git a/sys/arm/arm/machdep.c b/sys/arm/arm/machdep.c
	--- a/sys/arm/arm/machdep.c
	+++ b/sys/arm/arm/machdep.c
	@@ -523,6 +523,9 @@
	/* Do basic tuning, hz etc */
	init_param1();

	+ sched_instance_select();
	+ /* link_elf_ireloc(); */
	+
	/*
	* Allocate a page for the system page mapped to 0xffff0000
	* This page will just contain the system vectors and can be
	diff --git a/sys/arm/arm/swtch-v6.S b/sys/arm/arm/swtch-v6.S
	--- a/sys/arm/arm/swtch-v6.S
	+++ b/sys/arm/arm/swtch-v6.S
	@@ -79,7 +79,6 @@
	*/

	#include "assym.inc"
	-#include "opt_sched.h"

	#include <machine/asm.h>
	#include <machine/asmacros.h>
	@@ -432,11 +431,7 @@
	* r11 = newtd
	*/

	-#if defined(SMP) && defined(SCHED_ULE)
	- /*
	- * 386 and amd64 do the blocked lock test only for SMP and SCHED_ULE
	- * QQQ: What does it mean in reality and why is it done?
	- */
	+#if defined(SMP)
	ldr r6, =blocked_lock
	1:
	ldr r3, [r11, #TD_LOCK] /* atomic write regular read */
	diff --git a/sys/arm/include/ifunc.h b/sys/arm/include/ifunc.h
	new file mode 100644
	--- /dev/null
	+++ b/sys/arm/include/ifunc.h
	@@ -0,0 +1,10 @@
	+/*
	+ * This file is in the public domain.
	+ */
	+
	+#ifndef __ARM_IFUNC_H
	+#define __ARM_IFUNC_H
	+
	+#define __DO_NOT_HAVE_SYS_IFUNCS 1
	+
	+#endif
	diff --git a/sys/arm64/arm64/machdep.c b/sys/arm64/arm64/machdep.c
	--- a/sys/arm64/arm64/machdep.c
	+++ b/sys/arm64/arm64/machdep.c
	@@ -825,6 +825,7 @@
	PCPU_SET(curthread, &thread0);
	PCPU_SET(midr, get_midr());

	+ sched_instance_select();
	link_elf_ireloc();
	#ifdef FDT
	try_load_dtb();
	diff --git a/sys/arm64/arm64/swtch.S b/sys/arm64/arm64/swtch.S
	--- a/sys/arm64/arm64/swtch.S
	+++ b/sys/arm64/arm64/swtch.S
	@@ -31,7 +31,6 @@

	#include "assym.inc"
	#include "opt_kstack_pages.h"
	-#include "opt_sched.h"

	#include <sys/elf_common.h>

	@@ -197,7 +196,7 @@
	* Release the old thread.
	*/
	stlr x2, [x0, #TD_LOCK]
	-#if defined(SCHED_ULE) && defined(SMP)
	+#if defined(SMP)
	/* Spin if TD_LOCK points to a blocked_lock */
	ldr x2, =_C_LABEL(blocked_lock)
	1:
	diff --git a/sys/conf/NOTES b/sys/conf/NOTES
	--- a/sys/conf/NOTES
	+++ b/sys/conf/NOTES
	@@ -210,7 +210,7 @@
	#
	options SCHED_4BSD
	options SCHED_STATS
	-#options SCHED_ULE
	+options SCHED_ULE

	#####################################################################
	# SMP OPTIONS:
	diff --git a/sys/conf/files b/sys/conf/files
	--- a/sys/conf/files
	+++ b/sys/conf/files
	@@ -3921,6 +3921,7 @@
	kern/p1003_1b.c standard
	kern/posix4_mib.c standard
	kern/sched_4bsd.c optional sched_4bsd
	+kern/sched_shim.c standard
	kern/sched_ule.c optional sched_ule
	kern/serdev_if.m standard
	kern/stack_protector.c standard \
	diff --git a/sys/i386/i386/machdep.c b/sys/i386/i386/machdep.c
	--- a/sys/i386/i386/machdep.c
	+++ b/sys/i386/i386/machdep.c
	@@ -1544,6 +1544,7 @@

	/* Initialize preload_kmdp */
	preload_initkmdp(!metadata_missing);
	+ sched_instance_select();
	link_elf_ireloc();

	vm86_initialize();
	diff --git a/sys/i386/i386/swtch.S b/sys/i386/i386/swtch.S
	--- a/sys/i386/i386/swtch.S
	+++ b/sys/i386/i386/swtch.S
	@@ -30,27 +30,11 @@
	* SUCH DAMAGE.
	*/

	-#include "opt_sched.h"
	-
	#include <machine/asmacros.h>

	#include "assym.inc"

	-#if defined(SMP) && defined(SCHED_ULE)
	-#define SETOP xchgl
	#define BLOCK_SPIN(reg) \
	- movl $blocked_lock,%eax ; \
	- 100: ; \
	- lock ; \
	- cmpxchgl %eax,TD_LOCK(reg) ; \
	- jne 101f ; \
	- pause ; \
	- jmp 100b ; \
	- 101:
	-#else
	-#define SETOP movl
	-#define BLOCK_SPIN(reg)
	-#endif

	/*****************************************************************************/
	/* Scheduling */
	@@ -162,7 +146,7 @@
	/* Switchout td_lock */
	movl %esi,%eax
	movl PCPU(CPUID),%esi
	- SETOP %eax,TD_LOCK(%edi)
	+ xchgl %eax,TD_LOCK(%edi)

	/* Release bit from old pmap->pm_active */
	movl PCPU(CURPMAP), %ebx
	@@ -181,7 +165,18 @@
	#endif
	btsl %esi, PM_ACTIVE(%ebx) /* set new */
	sw1:
	- BLOCK_SPIN(%ecx)
	+#ifdef SMP
	+ movl $blocked_lock,%eax
	+100:
	+
	+ lock
	+ cmpxchgl %eax,TD_LOCK(reg)
	+ jne 101f
	+ pause
	+ jmp 100b
	+101:
	+#endif
	+
	/*
	* At this point, we have managed thread locks and are ready
	* to load up the rest of the next context.
	diff --git a/sys/kern/sched_4bsd.c b/sys/kern/sched_4bsd.c
	--- a/sys/kern/sched_4bsd.c
	+++ b/sys/kern/sched_4bsd.c
	@@ -34,12 +34,10 @@
	* SUCH DAMAGE.
	*/

	-#include <sys/cdefs.h>
	#include "opt_hwpmc_hooks.h"
	#include "opt_hwt_hooks.h"
	#include "opt_sched.h"

	-#include <sys/param.h>
	#include <sys/systm.h>
	#include <sys/cpuset.h>
	#include <sys/kernel.h>
	@@ -68,12 +66,6 @@
	#include <dev/hwt/hwt_hook.h>
	#endif

	-#ifdef KDTRACE_HOOKS
	-#include <sys/dtrace_bsd.h>
	-int __read_mostly dtrace_vtime_active;
	-dtrace_vtime_switch_func_t dtrace_vtime_switch_func;
	-#endif
	-
	/*
	* INVERSE_ESTCPU_WEIGHT is only suitable for statclock() frequencies in
	* the range 100-256 Hz (approximately).
	@@ -139,7 +131,6 @@
	static void schedcpu(void);
	static void schedcpu_thread(void);
	static void sched_priority(struct thread *td, u_char prio);
	-static void sched_setup(void *dummy);
	static void maybe_resched(struct thread *td);
	static void updatepri(struct thread *td);
	static void resetpriority(struct thread *td);
	@@ -155,13 +146,12 @@
	schedcpu_thread,
	NULL
	};
	-SYSINIT(schedcpu, SI_SUB_LAST, SI_ORDER_FIRST, kproc_start,
	- &sched_kp);
	-SYSINIT(sched_setup, SI_SUB_RUN_QUEUE, SI_ORDER_FIRST, sched_setup, NULL);

	-static void sched_initticks(void *dummy);
	-SYSINIT(sched_initticks, SI_SUB_CLOCKS, SI_ORDER_THIRD, sched_initticks,
	- NULL);
	+static void
	+sched_4bsd_schedcpu(void)
	+{
	+ kproc_start(&sched_kp);
	+}

	/*
	* Global run queue.
	@@ -198,7 +188,7 @@
	}

	static int
	-sysctl_kern_quantum(SYSCTL_HANDLER_ARGS)
	+sysctl_kern_4bsd_quantum(SYSCTL_HANDLER_ARGS)
	{
	int error, new_val, period;

	@@ -215,77 +205,58 @@
	return (0);
	}

	-SYSCTL_NODE(_kern, OID_AUTO, sched, CTLFLAG_RD \| CTLFLAG_MPSAFE, 0,
	- "Scheduler");
	+SYSCTL_NODE(_kern_sched, OID_AUTO, 4bsd, CTLFLAG_RD \| CTLFLAG_MPSAFE, 0,
	+ "4BSD Scheduler");

	-SYSCTL_STRING(_kern_sched, OID_AUTO, name, CTLFLAG_RD, "4BSD", 0,
	- "Scheduler name");
	-SYSCTL_PROC(_kern_sched, OID_AUTO, quantum,
	+SYSCTL_PROC(_kern_sched_4bsd, OID_AUTO, quantum,
	CTLTYPE_INT \| CTLFLAG_RW \| CTLFLAG_MPSAFE, NULL, 0,
	- sysctl_kern_quantum, "I",
	+ sysctl_kern_4bsd_quantum, "I",
	"Quantum for timeshare threads in microseconds");
	-SYSCTL_INT(_kern_sched, OID_AUTO, slice, CTLFLAG_RW, &sched_slice, 0,
	+SYSCTL_INT(_kern_sched_4bsd, OID_AUTO, slice, CTLFLAG_RW, &sched_slice, 0,
	"Quantum for timeshare threads in stathz ticks");
	#ifdef SMP
	/* Enable forwarding of wakeups to all other cpus */
	-static SYSCTL_NODE(_kern_sched, OID_AUTO, ipiwakeup,
	+static SYSCTL_NODE(_kern_sched_4bsd, OID_AUTO, ipiwakeup,
	CTLFLAG_RD \| CTLFLAG_MPSAFE, NULL,
	"Kernel SMP");

	static int runq_fuzz = 1;
	-SYSCTL_INT(_kern_sched, OID_AUTO, runq_fuzz, CTLFLAG_RW, &runq_fuzz, 0, "");
	+SYSCTL_INT(_kern_sched_4bsd, OID_AUTO, runq_fuzz, CTLFLAG_RW,
	+ &runq_fuzz, 0, "");

	static int forward_wakeup_enabled = 1;
	-SYSCTL_INT(_kern_sched_ipiwakeup, OID_AUTO, enabled, CTLFLAG_RW,
	+SYSCTL_INT(_kern_sched_4bsd_ipiwakeup, OID_AUTO, enabled, CTLFLAG_RW,
	&forward_wakeup_enabled, 0,
	"Forwarding of wakeup to idle CPUs");

	static int forward_wakeups_requested = 0;
	-SYSCTL_INT(_kern_sched_ipiwakeup, OID_AUTO, requested, CTLFLAG_RD,
	+SYSCTL_INT(_kern_sched_4bsd_ipiwakeup, OID_AUTO, requested, CTLFLAG_RD,
	&forward_wakeups_requested, 0,
	"Requests for Forwarding of wakeup to idle CPUs");

	static int forward_wakeups_delivered = 0;
	-SYSCTL_INT(_kern_sched_ipiwakeup, OID_AUTO, delivered, CTLFLAG_RD,
	+SYSCTL_INT(_kern_sched_4bsd_ipiwakeup, OID_AUTO, delivered, CTLFLAG_RD,
	&forward_wakeups_delivered, 0,
	"Completed Forwarding of wakeup to idle CPUs");

	static int forward_wakeup_use_mask = 1;
	-SYSCTL_INT(_kern_sched_ipiwakeup, OID_AUTO, usemask, CTLFLAG_RW,
	+SYSCTL_INT(_kern_sched_4bsd_ipiwakeup, OID_AUTO, usemask, CTLFLAG_RW,
	&forward_wakeup_use_mask, 0,
	"Use the mask of idle cpus");

	static int forward_wakeup_use_loop = 0;
	-SYSCTL_INT(_kern_sched_ipiwakeup, OID_AUTO, useloop, CTLFLAG_RW,
	+SYSCTL_INT(_kern_sched_4bsd_ipiwakeup, OID_AUTO, useloop, CTLFLAG_RW,
	&forward_wakeup_use_loop, 0,
	"Use a loop to find idle cpus");

	#endif
	#if 0
	static int sched_followon = 0;
	-SYSCTL_INT(_kern_sched, OID_AUTO, followon, CTLFLAG_RW,
	+SYSCTL_INT(_kern_sched_4bsd, OID_AUTO, followon, CTLFLAG_RW,
	&sched_followon, 0,
	"allow threads to share a quantum");
	#endif

	-SDT_PROVIDER_DEFINE(sched);
	-
	-SDT_PROBE_DEFINE3(sched, , , change__pri, "struct thread *",
	- "struct proc *", "uint8_t");
	-SDT_PROBE_DEFINE3(sched, , , dequeue, "struct thread *",
	- "struct proc ", "void ");
	-SDT_PROBE_DEFINE4(sched, , , enqueue, "struct thread *",
	- "struct proc ", "void ", "int");
	-SDT_PROBE_DEFINE4(sched, , , lend__pri, "struct thread *",
	- "struct proc ", "uint8_t", "struct thread ");
	-SDT_PROBE_DEFINE2(sched, , , load__change, "int", "int");
	-SDT_PROBE_DEFINE2(sched, , , off__cpu, "struct thread *",
	- "struct proc *");
	-SDT_PROBE_DEFINE(sched, , , on__cpu);
	-SDT_PROBE_DEFINE(sched, , , remain__cpu);
	-SDT_PROBE_DEFINE2(sched, , , surrender, "struct thread *",
	- "struct proc *");
	-
	static __inline void
	sched_load_add(void)
	{
	@@ -322,7 +293,7 @@
	* determines if the new thread should preempt the current thread. If so,
	* it sets td_owepreempt to request a preemption.
	*/
	-int
	+static int
	maybe_preempt(struct thread *td)
	{
	#ifdef PREEMPTION
	@@ -441,10 +412,7 @@
	#define loadfactor(loadav) (2 * (loadav))
	#define decay_cpu(loadfac, cpu) (((loadfac) * (cpu)) / ((loadfac) + FSCALE))

	-/* decay 95% of `ts_pctcpu' in 60 seconds; see CCPU_SHIFT before changing */
	-static fixpt_t ccpu = 0.95122942450071400909 * FSCALE; /* exp(-1/20) */
	-SYSCTL_UINT(_kern, OID_AUTO, ccpu, CTLFLAG_RD, &ccpu, 0,
	- "Decay factor used for updating %CPU");
	+extern fixpt_t ccpu;

	/*
	* If `ccpu' is not equal to `exp(-1/20)' and you still want to use the
	@@ -640,10 +608,14 @@
	sched_prio(td, td->td_user_pri);
	}

	-/* ARGSUSED */
	static void
	-sched_setup(void *dummy)
	+sched_4bsd_setup(void)
	{
	+ /*
	+ * Decay 95% of `ts_pctcpu' in 60 seconds; see CCPU_SHIFT
	+ * before changing.
	+ */
	+ ccpu = 0.95122942450071400909 * FSCALE; /* exp(-1/20) */

	setup_runqs();

	@@ -655,7 +627,7 @@
	* This routine determines time constants after stathz and hz are setup.
	*/
	static void
	-sched_initticks(void *dummy)
	+sched_4bsd_initticks(void)
	{

	realstathz = stathz ? stathz : hz;
	@@ -672,8 +644,8 @@
	* Called from:
	* proc0_init()
	*/
	-void
	-schedinit(void)
	+static void
	+sched_4bsd_init(void)
	{

	/*
	@@ -684,15 +656,15 @@
	mtx_init(&sched_lock, "sched lock", NULL, MTX_SPIN);
	}

	-void
	-schedinit_ap(void)
	+static void
	+sched_4bsd_init_ap(void)
	{

	/* Nothing needed. */
	}

	-bool
	-sched_runnable(void)
	+static bool
	+sched_4bsd_runnable(void)
	{
	#ifdef SMP
	return (runq_not_empty(&runq) \|\|
	@@ -702,18 +674,14 @@
	#endif
	}

	-int
	-sched_rr_interval(void)
	+static int
	+sched_4bsd_rr_interval(void)
	{

	/* Convert sched_slice from stathz to hz. */
	return (imax(1, (sched_slice * hz + realstathz / 2) / realstathz));
	}

	-SCHED_STAT_DEFINE(ithread_demotions, "Interrupt thread priority demotions");
	-SCHED_STAT_DEFINE(ithread_preemptions,
	- "Interrupt thread preemptions due to time-sharing");
	-
	/*
	* We adjust the priority of the current process. The priority of a
	* process gets worse as it accumulates CPU time. The cpu usage
	@@ -773,8 +741,8 @@
	stat->idlecalls = 0;
	}

	-void
	-sched_clock(struct thread *td, int cnt)
	+static void
	+sched_4bsd_clock(struct thread *td, int cnt)
	{

	for ( ; cnt > 0; cnt--)
	@@ -784,8 +752,8 @@
	/*
	* Charge child's scheduling CPU usage to parent.
	*/
	-void
	-sched_exit(struct proc p, struct thread td)
	+static void
	+sched_4bsd_exit(struct proc p, struct thread td)
	{

	KTR_STATE1(KTR_SCHED, "thread", sched_tdname(td), "proc exit",
	@@ -795,8 +763,8 @@
	sched_exit_thread(FIRST_THREAD_IN_PROC(p), td);
	}

	-void
	-sched_exit_thread(struct thread td, struct thread child)
	+static void
	+sched_4bsd_exit_thread(struct thread td, struct thread child)
	{

	KTR_STATE1(KTR_SCHED, "thread", sched_tdname(child), "exit",
	@@ -811,14 +779,14 @@
	thread_unlock(child);
	}

	-void
	-sched_fork(struct thread td, struct thread childtd)
	+static void
	+sched_4bsd_fork(struct thread td, struct thread childtd)
	{
	sched_fork_thread(td, childtd);
	}

	-void
	-sched_fork_thread(struct thread td, struct thread childtd)
	+static void
	+sched_4bsd_fork_thread(struct thread td, struct thread childtd)
	{
	struct td_sched ts, tsc;

	@@ -836,8 +804,8 @@
	ts->ts_slice = 1;
	}

	-void
	-sched_nice(struct proc *p, int nice)
	+static void
	+sched_4bsd_nice(struct proc *p, int nice)
	{
	struct thread *td;

	@@ -851,8 +819,8 @@
	}
	}

	-void
	-sched_class(struct thread *td, int class)
	+static void
	+sched_4bsd_class(struct thread *td, int class)
	{
	THREAD_LOCK_ASSERT(td, MA_OWNED);
	td->td_pri_class = class;
	@@ -890,8 +858,8 @@
	* Update a thread's priority when it is lent another thread's
	* priority.
	*/
	-void
	-sched_lend_prio(struct thread *td, u_char prio)
	+static void
	+sched_4bsd_lend_prio(struct thread *td, u_char prio)
	{

	td->td_flags \|= TDF_BORROWING;
	@@ -906,8 +874,8 @@
	* important than prio the thread will keep a priority boost
	* of prio.
	*/
	-void
	-sched_unlend_prio(struct thread *td, u_char prio)
	+static void
	+sched_4bsd_unlend_prio(struct thread *td, u_char prio)
	{
	u_char base_pri;

	@@ -923,8 +891,8 @@
	sched_lend_prio(td, prio);
	}

	-void
	-sched_prio(struct thread *td, u_char prio)
	+static void
	+sched_4bsd_prio(struct thread *td, u_char prio)
	{
	u_char oldprio;

	@@ -950,8 +918,8 @@
	turnstile_adjust(td, oldprio);
	}

	-void
	-sched_ithread_prio(struct thread *td, u_char prio)
	+static void
	+sched_4bsd_ithread_prio(struct thread *td, u_char prio)
	{
	THREAD_LOCK_ASSERT(td, MA_OWNED);
	MPASS(td->td_pri_class == PRI_ITHD);
	@@ -959,8 +927,8 @@
	sched_prio(td, prio);
	}

	-void
	-sched_user_prio(struct thread *td, u_char prio)
	+static void
	+sched_4bsd_user_prio(struct thread *td, u_char prio)
	{

	THREAD_LOCK_ASSERT(td, MA_OWNED);
	@@ -970,8 +938,8 @@
	td->td_user_pri = prio;
	}

	-void
	-sched_lend_user_prio(struct thread *td, u_char prio)
	+static void
	+sched_4bsd_lend_user_prio(struct thread *td, u_char prio)
	{

	THREAD_LOCK_ASSERT(td, MA_OWNED);
	@@ -986,8 +954,8 @@
	/*
	* Like the above but first check if there is anything to do.
	*/
	-void
	-sched_lend_user_prio_cond(struct thread *td, u_char prio)
	+static void
	+sched_4bsd_lend_user_prio_cond(struct thread *td, u_char prio)
	{

	if (td->td_lend_user_pri == prio)
	@@ -998,8 +966,8 @@
	thread_unlock(td);
	}

	-void
	-sched_sleep(struct thread *td, int pri)
	+static void
	+sched_4bsd_sleep(struct thread *td, int pri)
	{

	THREAD_LOCK_ASSERT(td, MA_OWNED);
	@@ -1009,8 +977,8 @@
	sched_prio(td, pri);
	}

	-void
	-sched_switch(struct thread *td, int flags)
	+static void
	+sched_4bsd_sswitch(struct thread *td, int flags)
	{
	struct thread *newtd;
	struct mtx *tmtx;
	@@ -1142,8 +1110,8 @@
	mtx_unlock_spin(&sched_lock);
	}

	-void
	-sched_wakeup(struct thread *td, int srqflags)
	+static void
	+sched_4bsd_wakeup(struct thread *td, int srqflags)
	{
	struct td_sched *ts;

	@@ -1318,8 +1286,8 @@
	}
	#endif

	-void
	-sched_add(struct thread *td, int flags)
	+static void
	+sched_4bsd_add(struct thread *td, int flags)
	#ifdef SMP
	{
	cpuset_t tidlemsk;
	@@ -1466,8 +1434,8 @@
	}
	#endif /* SMP */

	-void
	-sched_rem(struct thread *td)
	+static void
	+sched_4bsd_rem(struct thread *td)
	{
	struct td_sched *ts;

	@@ -1496,8 +1464,8 @@
	* Select threads to run. Note that running threads still consume a
	* slot.
	*/
	-struct thread *
	-sched_choose(void)
	+static struct thread *
	+sched_4bsd_choose(void)
	{
	struct thread *td;
	struct runq *rq;
	@@ -1541,8 +1509,8 @@
	return (PCPU_GET(idlethread));
	}

	-void
	-sched_preempt(struct thread *td)
	+static void
	+sched_4bsd_preempt(struct thread *td)
	{
	int flags;

	@@ -1558,8 +1526,8 @@
	}
	}

	-void
	-sched_userret_slowpath(struct thread *td)
	+static void
	+sched_4bsd_userret_slowpath(struct thread *td)
	{

	thread_lock(td);
	@@ -1568,8 +1536,8 @@
	thread_unlock(td);
	}

	-void
	-sched_bind(struct thread *td, int cpu)
	+static void
	+sched_4bsd_bind(struct thread *td, int cpu)
	{
	#ifdef SMP
	struct td_sched *ts = td_get_sched(td);
	@@ -1589,48 +1557,48 @@
	#endif
	}

	-void
	-sched_unbind(struct thread* td)
	+static void
	+sched_4bsd_unbind(struct thread* td)
	{
	THREAD_LOCK_ASSERT(td, MA_OWNED);
	KASSERT(td == curthread, ("sched_unbind: can only bind curthread"));
	td->td_flags &= ~TDF_BOUND;
	}

	-int
	-sched_is_bound(struct thread *td)
	+static int
	+sched_4bsd_is_bound(struct thread *td)
	{
	THREAD_LOCK_ASSERT(td, MA_OWNED);
	return (td->td_flags & TDF_BOUND);
	}

	-void
	-sched_relinquish(struct thread *td)
	+static void
	+sched_4bsd_relinquish(struct thread *td)
	{
	thread_lock(td);
	mi_switch(SW_VOL \| SWT_RELINQUISH);
	}

	-int
	-sched_load(void)
	+static int
	+sched_4bsd_load(void)
	{
	return (sched_tdcnt);
	}

	-int
	-sched_sizeof_proc(void)
	+static int
	+sched_4bsd_sizeof_proc(void)
	{
	return (sizeof(struct proc));
	}

	-int
	-sched_sizeof_thread(void)
	+static int
	+sched_4bsd_sizeof_thread(void)
	{
	return (sizeof(struct thread) + sizeof(struct td_sched));
	}

	-fixpt_t
	-sched_pctcpu(struct thread *td)
	+static fixpt_t
	+sched_4bsd_pctcpu(struct thread *td)
	{
	struct td_sched *ts;

	@@ -1639,42 +1607,8 @@
	return (ts->ts_pctcpu);
	}

	-#ifdef RACCT
	-/*
	- * Calculates the contribution to the thread cpu usage for the latest
	- * (unfinished) second.
	- */
	-fixpt_t
	-sched_pctcpu_delta(struct thread *td)
	-{
	- struct td_sched *ts;
	- fixpt_t delta;
	- int realstathz;
	-
	- THREAD_LOCK_ASSERT(td, MA_OWNED);
	- ts = td_get_sched(td);
	- delta = 0;
	- realstathz = stathz ? stathz : hz;
	- if (ts->ts_cpticks != 0) {
	-#if (FSHIFT >= CCPU_SHIFT)
	- delta = (realstathz == 100)
	- ? ((fixpt_t) ts->ts_cpticks) <<
	- (FSHIFT - CCPU_SHIFT) :
	- 100 * (((fixpt_t) ts->ts_cpticks)
	- << (FSHIFT - CCPU_SHIFT)) / realstathz;
	-#else
	- delta = ((FSCALE - ccpu) *
	- (ts->ts_cpticks *
	- FSCALE / realstathz)) >> FSHIFT;
	-#endif
	- }
	-
	- return (delta);
	-}
	-#endif
	-
	-u_int
	-sched_estcpu(struct thread *td)
	+static u_int
	+sched_4bsd_estcpu(struct thread *td)
	{

	return (td_get_sched(td)->ts_estcpu);
	@@ -1683,8 +1617,8 @@
	/*
	* The actual idle process.
	*/
	-void
	-sched_idletd(void *dummy)
	+static void
	+sched_4bsd_idletd(void *dummy)
	{
	struct pcpuidlestat *stat;

	@@ -1725,8 +1659,8 @@
	/*
	* A CPU is entering for the first time.
	*/
	-void
	-sched_ap_entry(void)
	+static void
	+sched_4bsd_ap_entry(void)
	{

	/*
	@@ -1749,8 +1683,8 @@
	/*
	* A thread is exiting.
	*/
	-void
	-sched_throw(struct thread *td)
	+static void
	+sched_4bsd_throw(struct thread *td)
	{

	MPASS(td != NULL);
	@@ -1763,8 +1697,8 @@
	sched_throw_tail(td);
	}

	-void
	-sched_fork_exit(struct thread *td)
	+static void
	+sched_4bsd_fork_exit(struct thread *td)
	{

	/*
	@@ -1782,8 +1716,8 @@
	SDT_PROBE0(sched, , , on__cpu);
	}

	-char *
	-sched_tdname(struct thread *td)
	+static char *
	+sched_4bsd_tdname(struct thread *td)
	{
	#ifdef KTR
	struct td_sched *ts;
	@@ -1798,19 +1732,19 @@
	#endif
	}

	-#ifdef KTR
	-void
	-sched_clear_tdname(struct thread *td)
	+static void
	+sched_4bsd_clear_tdname(struct thread *td)
	{
	+#ifdef KTR
	struct td_sched *ts;

	ts = td_get_sched(td);
	ts->ts_name[0] = '\0';
	-}
	#endif
	+}

	-void
	-sched_affinity(struct thread *td)
	+static void
	+sched_4bsd_affinity(struct thread *td)
	{
	#ifdef SMP
	struct td_sched *ts;
	@@ -1872,3 +1806,83 @@
	}
	#endif
	}
	+
	+static bool
	+sched_4bsd_do_timer_accounting(void)
	+{
	+#ifdef SMP
	+ /*
	+ * Don't do any accounting for the disabled HTT cores, since it
	+ * will provide misleading numbers for the userland.
	+ *
	+ * No locking is necessary here, since even if we lose the race
	+ * when hlt_cpus_mask changes it is not a big deal, really.
	+ *
	+ * Don't do that for ULE, since ULE doesn't consider hlt_cpus_mask
	+ * and unlike other schedulers it actually schedules threads to
	+ * those CPUs.
	+ */
	+ return (!CPU_ISSET(PCPU_GET(cpuid), &hlt_cpus_mask));
	+#else
	+ return (true);
	+#endif
	+}
	+
	+static int
	+sched_4bsd_find_l2_neighbor(int cpu)
	+{
	+ return (-1);
	+}
	+
	+struct sched_instance sched_4bsd_instance = {
	+#define SLOT(name) .name = sched_4bsd_##name
	+ SLOT(load),
	+ SLOT(rr_interval),
	+ SLOT(runnable),
	+ SLOT(exit),
	+ SLOT(fork),
	+ SLOT(fork_exit),
	+ SLOT(class),
	+ SLOT(nice),
	+ SLOT(ap_entry),
	+ SLOT(exit_thread),
	+ SLOT(estcpu),
	+ SLOT(fork_thread),
	+ SLOT(ithread_prio),
	+ SLOT(lend_prio),
	+ SLOT(lend_user_prio),
	+ SLOT(lend_user_prio_cond),
	+ SLOT(pctcpu),
	+ SLOT(prio),
	+ SLOT(sleep),
	+ SLOT(sswitch),
	+ SLOT(throw),
	+ SLOT(unlend_prio),
	+ SLOT(user_prio),
	+ SLOT(userret_slowpath),
	+ SLOT(add),
	+ SLOT(choose),
	+ SLOT(clock),
	+ SLOT(idletd),
	+ SLOT(preempt),
	+ SLOT(relinquish),
	+ SLOT(rem),
	+ SLOT(wakeup),
	+ SLOT(bind),
	+ SLOT(unbind),
	+ SLOT(is_bound),
	+ SLOT(affinity),
	+ SLOT(sizeof_proc),
	+ SLOT(sizeof_thread),
	+ SLOT(tdname),
	+ SLOT(clear_tdname),
	+ SLOT(do_timer_accounting),
	+ SLOT(find_l2_neighbor),
	+ SLOT(init),
	+ SLOT(init_ap),
	+ SLOT(setup),
	+ SLOT(initticks),
	+ SLOT(schedcpu),
	+#undef SLOT
	+};
	+DECLARE_SCHEDULER(fourbsd_sched_selector, "4BSD", &sched_4bsd_instance);
	diff --git a/sys/kern/sched_shim.c b/sys/kern/sched_shim.c
	new file mode 100644
	--- /dev/null
	+++ b/sys/kern/sched_shim.c
	@@ -0,0 +1,234 @@
	+/*
	+ * Copyright 2026 The FreeBSD Foundation
	+ *
	+ * SPDX-License-Identifier: BSD-2-Clause
	+ *
	+ * This software was developed by Konstantin Belousov <kib@FreeBSD.org>
	+ * under sponsorship from the FreeBSD Foundation.
	+ */
	+
	+#include "opt_sched.h"
	+
	+#include <sys/systm.h>
	+#include <sys/kernel.h>
	+#include <sys/lock.h>
	+#include <sys/proc.h>
	+#include <sys/runq.h>
	+#include <sys/sbuf.h>
	+#include <sys/sched.h>
	+#include <sys/sysctl.h>
	+#include <machine/ifunc.h>
	+
	+const struct sched_instance *active_sched;
	+
	+#ifndef __DO_NOT_HAVE_SYS_IFUNCS
	+#define __DEFINE_SHIM(__m, __r, __n, __p, __a) \
	+ DEFINE_IFUNC(, __r, __n, __p) \
	+ { \
	+ return (active_sched->__m); \
	+ }
	+#else
	+#define __DEFINE_SHIM(__m, __r, __n, __p, __a) \
	+ __r \
	+ __n __p \
	+ { \
	+ return (active_sched->__m __a); \
	+ }
	+#endif
	+#define DEFINE_SHIM0(__m, __r, __n) \
	+ __DEFINE_SHIM(__m, __r, __n, (void), ())
	+#define DEFINE_SHIM1(__m, __r, __n, __t1, __a1) \
	+ __DEFINE_SHIM(__m, __r, __n, (__t1 __a1), (__a1))
	+#define DEFINE_SHIM2(__m, __r, __n, __t1, __a1, __t2, __a2) \
	+ __DEFINE_SHIM(__m, __r, __n, (__t1 __a1, __t2 __a2), (__a1, __a2))
	+
	+DEFINE_SHIM0(load, int, sched_load)
	+DEFINE_SHIM0(rr_interval, int, sched_rr_interval)
	+DEFINE_SHIM0(runnable, bool, sched_runnable)
	+DEFINE_SHIM2(exit, void, sched_exit, struct proc *, p,
	+ struct thread *, childtd)
	+DEFINE_SHIM2(fork, void, sched_fork, struct thread *, td,
	+ struct thread *, childtd)
	+DEFINE_SHIM1(fork_exit, void, sched_fork_exit, struct thread *, td)
	+DEFINE_SHIM2(class, void, sched_class, struct thread *, td, int, class)
	+DEFINE_SHIM2(nice, void, sched_nice, struct proc *, p, int, nice)
	+DEFINE_SHIM0(ap_entry, void, sched_ap_entry)
	+DEFINE_SHIM2(exit_thread, void, sched_exit_thread, struct thread *, td,
	+ struct thread *, child)
	+DEFINE_SHIM1(estcpu, u_int, sched_estcpu, struct thread *, td)
	+DEFINE_SHIM2(fork_thread, void, sched_fork_thread, struct thread *, td,
	+ struct thread *, child)
	+DEFINE_SHIM2(ithread_prio, void, sched_ithread_prio, struct thread *, td,
	+ u_char, prio)
	+DEFINE_SHIM2(lend_prio, void, sched_lend_prio, struct thread *, td,
	+ u_char, prio)
	+DEFINE_SHIM2(lend_user_prio, void, sched_lend_user_prio, struct thread *, td,
	+ u_char, pri)
	+DEFINE_SHIM2(lend_user_prio_cond, void, sched_lend_user_prio_cond,
	+ struct thread *, td, u_char, pri)
	+DEFINE_SHIM1(pctcpu, fixpt_t, sched_pctcpu, struct thread *, td)
	+DEFINE_SHIM2(prio, void, sched_prio, struct thread *, td, u_char, prio)
	+DEFINE_SHIM2(sleep, void, sched_sleep, struct thread *, td, int, prio)
	+DEFINE_SHIM2(sswitch, void, sched_switch, struct thread *, td, int, flags)
	+DEFINE_SHIM1(throw, void, sched_throw, struct thread *, td)
	+DEFINE_SHIM2(unlend_prio, void, sched_unlend_prio, struct thread *, td,
	+ u_char, prio)
	+DEFINE_SHIM2(user_prio, void, sched_user_prio, struct thread *, td,
	+ u_char, prio)
	+DEFINE_SHIM1(userret_slowpath, void, sched_userret_slowpath,
	+ struct thread *, td)
	+DEFINE_SHIM2(add, void, sched_add, struct thread *, td, int, flags)
	+DEFINE_SHIM0(choose, struct thread *, sched_choose)
	+DEFINE_SHIM2(clock, void, sched_clock, struct thread *, td, int, cnt)
	+DEFINE_SHIM1(idletd, void, sched_idletd, void *, dummy)
	+DEFINE_SHIM1(preempt, void, sched_preempt, struct thread *, td)
	+DEFINE_SHIM1(relinquish, void, sched_relinquish, struct thread *, td)
	+DEFINE_SHIM1(rem, void, sched_rem, struct thread *, td)
	+DEFINE_SHIM2(wakeup, void, sched_wakeup, struct thread *, td, int, srqflags)
	+DEFINE_SHIM2(bind, void, sched_bind, struct thread *, td, int, cpu)
	+DEFINE_SHIM1(unbind, void, sched_unbind, struct thread *, td)
	+DEFINE_SHIM1(is_bound, int, sched_is_bound, struct thread *, td)
	+DEFINE_SHIM1(affinity, void, sched_affinity, struct thread *, td)
	+DEFINE_SHIM0(sizeof_proc, int, sched_sizeof_proc)
	+DEFINE_SHIM0(sizeof_thread, int, sched_sizeof_thread)
	+DEFINE_SHIM1(tdname, char , sched_tdname, struct thread , td)
	+DEFINE_SHIM1(clear_tdname, void, sched_clear_tdname, struct thread *, td)
	+DEFINE_SHIM0(do_timer_accounting, bool, sched_do_timer_accounting)
	+DEFINE_SHIM1(find_l2_neighbor, int, sched_find_l2_neighbor, int, cpu)
	+DEFINE_SHIM0(init_ap, void, schedinit_ap)
	+
	+
	+SCHED_STAT_DEFINE(ithread_demotions, "Interrupt thread priority demotions");
	+SCHED_STAT_DEFINE(ithread_preemptions,
	+ "Interrupt thread preemptions due to time-sharing");
	+
	+SDT_PROVIDER_DEFINE(sched);
	+
	+SDT_PROBE_DEFINE3(sched, , , change__pri, "struct thread *",
	+ "struct proc *", "uint8_t");
	+SDT_PROBE_DEFINE3(sched, , , dequeue, "struct thread *",
	+ "struct proc ", "void ");
	+SDT_PROBE_DEFINE4(sched, , , enqueue, "struct thread *",
	+ "struct proc ", "void ", "int");
	+SDT_PROBE_DEFINE4(sched, , , lend__pri, "struct thread *",
	+ "struct proc ", "uint8_t", "struct thread ");
	+SDT_PROBE_DEFINE2(sched, , , load__change, "int", "int");
	+SDT_PROBE_DEFINE2(sched, , , off__cpu, "struct thread *",
	+ "struct proc *");
	+SDT_PROBE_DEFINE(sched, , , on__cpu);
	+SDT_PROBE_DEFINE(sched, , , remain__cpu);
	+SDT_PROBE_DEFINE2(sched, , , surrender, "struct thread *",
	+ "struct proc *");
	+
	+#ifdef KDTRACE_HOOKS
	+#include <sys/dtrace_bsd.h>
	+int __read_mostly dtrace_vtime_active;
	+dtrace_vtime_switch_func_t dtrace_vtime_switch_func;
	+#endif
	+
	+static char sched_name[32] = "ULE";
	+
	+SET_DECLARE(sched_instance_set, struct sched_selection);
	+
	+void
	+sched_instance_select(void)
	+{
	+ struct sched_selection s, *ss;
	+ int i;
	+
	+ TUNABLE_STR_FETCH("kern.sched.name", sched_name, sizeof(sched_name));
	+ SET_FOREACH(ss, sched_instance_set) {
	+ s = *ss;
	+ for (i = 0; s->name[i] == sched_name[i]; i++) {
	+ if (s->name[i] == '\0') {
	+ active_sched = s->instance;
	+ return;
	+ }
	+ }
	+ }
	+
	+ /*
	+ * No scheduler matching the configuration was found. If
	+ * there is any scheduler compiled in, at all, use the first
	+ * scheduler from the linker set.
	+ */
	+ if (SET_BEGIN(sched_instance_set) < SET_LIMIT(sched_instance_set)) {
	+ s = *SET_BEGIN(sched_instance_set);
	+ active_sched = s->instance;
	+ for (i = 0;; i++) {
	+ sched_name[i] = s->name[i];
	+ if (s->name[i] == '\0')
	+ break;
	+ }
	+ }
	+}
	+
	+void
	+schedinit(void)
	+{
	+ if (active_sched == NULL)
	+ panic("Cannot find scheduler %s", sched_name);
	+ active_sched->init();
	+}
	+
	+static void
	+sched_setup(void *dummy)
	+{
	+ active_sched->setup();
	+}
	+SYSINIT(sched_setup, SI_SUB_RUN_QUEUE, SI_ORDER_FIRST, sched_setup, NULL);
	+
	+static void
	+sched_initticks(void *dummy)
	+{
	+ active_sched->initticks();
	+}
	+SYSINIT(sched_initticks, SI_SUB_CLOCKS, SI_ORDER_THIRD, sched_initticks,
	+ NULL);
	+
	+static void
	+sched_schedcpu(void)
	+{
	+ active_sched->schedcpu();
	+}
	+SYSINIT(schedcpu, SI_SUB_LAST, SI_ORDER_FIRST, sched_schedcpu, NULL);
	+
	+SYSCTL_NODE(_kern, OID_AUTO, sched, CTLFLAG_RD \| CTLFLAG_MPSAFE, 0,
	+ "Scheduler");
	+
	+SYSCTL_STRING(_kern_sched, OID_AUTO, name, CTLFLAG_RD, sched_name, 0,
	+ "Scheduler name");
	+
	+static int
	+sysctl_kern_sched_available(SYSCTL_HANDLER_ARGS)
	+{
	+ struct sched_selection s, *ss;
	+ struct sbuf *sb, sm;
	+ int error;
	+ bool first;
	+
	+ sb = sbuf_new_for_sysctl(&sm, NULL, 0, req);
	+ if (sb == NULL)
	+ return (ENOMEM);
	+ first = true;
	+ SET_FOREACH(ss, sched_instance_set) {
	+ s = *ss;
	+ if (first)
	+ first = false;
	+ else
	+ sbuf_cat(sb, ",");
	+ sbuf_cat(sb, s->name);
	+ }
	+ error = sbuf_finish(sb);
	+ sbuf_delete(sb);
	+ return (error);
	+}
	+
	+SYSCTL_PROC(_kern_sched, OID_AUTO, available,
	+ CTLTYPE_STRING \| CTLFLAG_RD \| CTLFLAG_MPSAFE,
	+ NULL, 0, sysctl_kern_sched_available, "A",
	+ "List of available schedulers");
	+
	+fixpt_t ccpu;
	+SYSCTL_UINT(_kern, OID_AUTO, ccpu, CTLFLAG_RD, &ccpu, 0,
	+ "Decay factor used for updating %CPU");
	diff --git a/sys/kern/sched_ule.c b/sys/kern/sched_ule.c
	--- a/sys/kern/sched_ule.c
	+++ b/sys/kern/sched_ule.c
	@@ -37,12 +37,10 @@
	* Isilon Systems and a general lack of creativity on the part of the author.
	*/

	-#include <sys/cdefs.h>
	#include "opt_hwpmc_hooks.h"
	#include "opt_hwt_hooks.h"
	#include "opt_sched.h"

	-#include <sys/param.h>
	#include <sys/systm.h>
	#include <sys/kdb.h>
	#include <sys/kernel.h>
	@@ -74,12 +72,6 @@
	#include <dev/hwt/hwt_hook.h>
	#endif

	-#ifdef KDTRACE_HOOKS
	-#include <sys/dtrace_bsd.h>
	-int __read_mostly dtrace_vtime_active;
	-dtrace_vtime_switch_func_t dtrace_vtime_switch_func;
	-#endif
	-
	#include <machine/cpu.h>
	#include <machine/smp.h>

	@@ -406,36 +398,11 @@
	static bool sched_balance_pair(struct tdq , struct tdq );
	static inline struct tdq sched_setcpu(struct thread , int, int);
	static inline void thread_unblock_switch(struct thread , struct mtx );
	-static int sysctl_kern_sched_topology_spec(SYSCTL_HANDLER_ARGS);
	-static int sysctl_kern_sched_topology_spec_internal(struct sbuf *sb,
	+static int sysctl_kern_sched_ule_topology_spec(SYSCTL_HANDLER_ARGS);
	+static int sysctl_kern_sched_ule_topology_spec_internal(struct sbuf *sb,
	struct cpu_group *cg, int indent);
	#endif

	-static void sched_setup(void *dummy);
	-SYSINIT(sched_setup, SI_SUB_RUN_QUEUE, SI_ORDER_FIRST, sched_setup, NULL);
	-
	-static void sched_initticks(void *dummy);
	-SYSINIT(sched_initticks, SI_SUB_CLOCKS, SI_ORDER_THIRD, sched_initticks,
	- NULL);
	-
	-SDT_PROVIDER_DEFINE(sched);
	-
	-SDT_PROBE_DEFINE3(sched, , , change__pri, "struct thread *",
	- "struct proc *", "uint8_t");
	-SDT_PROBE_DEFINE3(sched, , , dequeue, "struct thread *",
	- "struct proc ", "void ");
	-SDT_PROBE_DEFINE4(sched, , , enqueue, "struct thread *",
	- "struct proc ", "void ", "int");
	-SDT_PROBE_DEFINE4(sched, , , lend__pri, "struct thread *",
	- "struct proc ", "uint8_t", "struct thread ");
	-SDT_PROBE_DEFINE2(sched, , , load__change, "int", "int");
	-SDT_PROBE_DEFINE2(sched, , , off__cpu, "struct thread *",
	- "struct proc *");
	-SDT_PROBE_DEFINE(sched, , , on__cpu);
	-SDT_PROBE_DEFINE(sched, , , remain__cpu);
	-SDT_PROBE_DEFINE2(sched, , , surrender, "struct thread *",
	- "struct proc *");
	-
	/*
	* Print the threads waiting on a run-queue.
	*/
	@@ -1642,7 +1609,7 @@
	* information.
	*/
	static void
	-sched_setup(void *dummy)
	+sched_ule_setup(void)
	{
	struct tdq *tdq;

	@@ -1667,7 +1634,7 @@
	*/
	/* ARGSUSED */
	static void
	-sched_initticks(void *dummy)
	+sched_ule_initticks(void)
	{
	int incr;

	@@ -1891,8 +1858,8 @@
	/*
	* Called from proc0_init() to setup the scheduler fields.
	*/
	-void
	-schedinit(void)
	+static void
	+sched_ule_init(void)
	{
	struct td_sched *ts0;

	@@ -1916,8 +1883,8 @@
	* TDQ_SELF() relies on the below sched pcpu setting; it may be used only
	* after schedinit_ap().
	*/
	-void
	-schedinit_ap(void)
	+static void
	+sched_ule_init_ap(void)
	{

	#ifdef SMP
	@@ -1931,8 +1898,8 @@
	* priority they will switch when their slices run out, which will be
	* at most sched_slice stathz ticks.
	*/
	-int
	-sched_rr_interval(void)
	+static int
	+sched_ule_rr_interval(void)
	{

	/* Convert sched_slice from stathz to hz. */
	@@ -2051,8 +2018,8 @@
	* Update a thread's priority when it is lent another thread's
	* priority.
	*/
	-void
	-sched_lend_prio(struct thread *td, u_char prio)
	+static void
	+sched_ule_lend_prio(struct thread *td, u_char prio)
	{

	td->td_flags \|= TDF_BORROWING;
	@@ -2067,8 +2034,8 @@
	* important than prio, the thread will keep a priority boost
	* of prio.
	*/
	-void
	-sched_unlend_prio(struct thread *td, u_char prio)
	+static void
	+sched_ule_unlend_prio(struct thread *td, u_char prio)
	{
	u_char base_pri;

	@@ -2087,8 +2054,8 @@
	/*
	* Standard entry for setting the priority to an absolute value.
	*/
	-void
	-sched_prio(struct thread *td, u_char prio)
	+static void
	+sched_ule_prio(struct thread *td, u_char prio)
	{
	u_char oldprio;

	@@ -2117,8 +2084,8 @@
	/*
	* Set the base interrupt thread priority.
	*/
	-void
	-sched_ithread_prio(struct thread *td, u_char prio)
	+static void
	+sched_ule_ithread_prio(struct thread *td, u_char prio)
	{
	THREAD_LOCK_ASSERT(td, MA_OWNED);
	MPASS(td->td_pri_class == PRI_ITHD);
	@@ -2129,8 +2096,8 @@
	/*
	* Set the base user priority, does not effect current running priority.
	*/
	-void
	-sched_user_prio(struct thread *td, u_char prio)
	+static void
	+sched_ule_user_prio(struct thread *td, u_char prio)
	{

	td->td_base_user_pri = prio;
	@@ -2139,8 +2106,8 @@
	td->td_user_pri = prio;
	}

	-void
	-sched_lend_user_prio(struct thread *td, u_char prio)
	+static void
	+sched_ule_lend_user_prio(struct thread *td, u_char prio)
	{

	THREAD_LOCK_ASSERT(td, MA_OWNED);
	@@ -2155,8 +2122,8 @@
	/*
	* Like the above but first check if there is anything to do.
	*/
	-void
	-sched_lend_user_prio_cond(struct thread *td, u_char prio)
	+static void
	+sched_ule_lend_user_prio_cond(struct thread *td, u_char prio)
	{

	if (td->td_lend_user_pri == prio)
	@@ -2327,8 +2294,8 @@
	* migrating a thread from one queue to another as running threads may
	* be assigned elsewhere via binding.
	*/
	-void
	-sched_switch(struct thread *td, int flags)
	+static void
	+sched_ule_sswitch(struct thread *td, int flags)
	{
	struct thread *newtd;
	struct tdq *tdq;
	@@ -2466,8 +2433,8 @@
	/*
	* Adjust thread priorities as a result of a nice request.
	*/
	-void
	-sched_nice(struct proc *p, int nice)
	+static void
	+sched_ule_nice(struct proc *p, int nice)
	{
	struct thread *td;

	@@ -2485,8 +2452,8 @@
	/*
	* Record the sleep time for the interactivity scorer.
	*/
	-void
	-sched_sleep(struct thread *td, int prio)
	+static void
	+sched_ule_sleep(struct thread *td, int prio)
	{

	THREAD_LOCK_ASSERT(td, MA_OWNED);
	@@ -2506,8 +2473,8 @@
	*
	* Requires the thread lock on entry, drops on exit.
	*/
	-void
	-sched_wakeup(struct thread *td, int srqflags)
	+static void
	+sched_ule_wakeup(struct thread *td, int srqflags)
	{
	struct td_sched *ts;
	int slptick;
	@@ -2546,8 +2513,8 @@
	* Penalize the parent for creating a new child and initialize the child's
	* priority.
	*/
	-void
	-sched_fork(struct thread td, struct thread child)
	+static void
	+sched_ule_fork(struct thread td, struct thread child)
	{
	THREAD_LOCK_ASSERT(td, MA_OWNED);
	sched_pctcpu_update(td_get_sched(td), 1);
	@@ -2565,8 +2532,8 @@
	/*
	* Fork a new thread, may be within the same process.
	*/
	-void
	-sched_fork_thread(struct thread td, struct thread child)
	+static void
	+sched_ule_fork_thread(struct thread td, struct thread child)
	{
	struct td_sched *ts;
	struct td_sched *ts2;
	@@ -2611,8 +2578,8 @@
	/*
	* Adjust the priority class of a thread.
	*/
	-void
	-sched_class(struct thread *td, int class)
	+static void
	+sched_ule_class(struct thread *td, int class)
	{

	THREAD_LOCK_ASSERT(td, MA_OWNED);
	@@ -2624,8 +2591,8 @@
	/*
	* Return some of the child's priority and interactivity to the parent.
	*/
	-void
	-sched_exit(struct proc p, struct thread child)
	+static void
	+sched_ule_exit(struct proc p, struct thread child)
	{
	struct thread *td;

	@@ -2642,8 +2609,8 @@
	* jobs such as make. This has little effect on the make process itself but
	* causes new processes spawned by it to receive worse scores immediately.
	*/
	-void
	-sched_exit_thread(struct thread td, struct thread child)
	+static void
	+sched_ule_exit_thread(struct thread td, struct thread child)
	{

	KTR_STATE1(KTR_SCHED, "thread", sched_tdname(child), "thread exit",
	@@ -2660,8 +2627,8 @@
	thread_unlock(td);
	}

	-void
	-sched_preempt(struct thread *td)
	+static void
	+sched_ule_preempt(struct thread *td)
	{
	struct tdq *tdq;
	int flags;
	@@ -2691,8 +2658,8 @@
	* Fix priorities on return to user-space. Priorities may be elevated due
	* to static priorities in msleep() or similar.
	*/
	-void
	-sched_userret_slowpath(struct thread *td)
	+static void
	+sched_ule_userret_slowpath(struct thread *td)
	{

	thread_lock(td);
	@@ -2702,10 +2669,6 @@
	thread_unlock(td);
	}

	-SCHED_STAT_DEFINE(ithread_demotions, "Interrupt thread priority demotions");
	-SCHED_STAT_DEFINE(ithread_preemptions,
	- "Interrupt thread preemptions due to time-sharing");
	-
	/*
	* Return time slice for a given thread. For ithreads this is
	* sched_slice. For other threads it is tdq_slice(tdq).
	@@ -2722,8 +2685,8 @@
	* Handle a stathz tick. This is really only relevant for timeshare
	* and interrupt threads.
	*/
	-void
	-sched_clock(struct thread *td, int cnt)
	+static void
	+sched_ule_clock(struct thread *td, int cnt)
	{
	struct tdq *tdq;
	struct td_sched *ts;
	@@ -2808,8 +2771,8 @@
	}
	}

	-u_int
	-sched_estcpu(struct thread *td __unused)
	+static u_int
	+sched_ule_estcpu(struct thread *td __unused)
	{

	return (0);
	@@ -2819,8 +2782,8 @@
	* Return whether the current CPU has runnable tasks. Used for in-kernel
	* cooperative idle threads.
	*/
	-bool
	-sched_runnable(void)
	+static bool
	+sched_ule_runnable(void)
	{
	struct tdq *tdq;

	@@ -2832,8 +2795,8 @@
	* Choose the highest priority thread to run. The thread is removed from
	* the run-queue while running however the load remains.
	*/
	-struct thread *
	-sched_choose(void)
	+static struct thread *
	+sched_ule_choose(void)
	{
	struct thread *td;
	struct tdq *tdq;
	@@ -2909,8 +2872,8 @@
	*
	* Requires the thread lock on entry, drops on exit.
	*/
	-void
	-sched_add(struct thread *td, int flags)
	+static void
	+sched_ule_add(struct thread *td, int flags)
	{
	struct tdq *tdq;
	#ifdef SMP
	@@ -2969,8 +2932,8 @@
	* when we're stealing a thread from a remote queue. Otherwise all threads
	* exit by calling sched_exit_thread() and sched_throw() themselves.
	*/
	-void
	-sched_rem(struct thread *td)
	+static void
	+sched_ule_rem(struct thread *td)
	{
	struct tdq *tdq;

	@@ -2992,8 +2955,8 @@
	/*
	* Fetch cpu utilization information. Updates on demand.
	*/
	-fixpt_t
	-sched_pctcpu(struct thread *td)
	+static fixpt_t
	+sched_ule_pctcpu(struct thread *td)
	{
	struct td_sched *ts;
	u_int len;
	@@ -3014,8 +2977,8 @@
	* Enforce affinity settings for a thread. Called after adjustments to
	* cpumask.
	*/
	-void
	-sched_affinity(struct thread *td)
	+static void
	+sched_ule_affinity(struct thread *td)
	{
	#ifdef SMP
	struct td_sched *ts;
	@@ -3045,8 +3008,8 @@
	/*
	* Bind a thread to a target cpu.
	*/
	-void
	-sched_bind(struct thread *td, int cpu)
	+static void
	+sched_ule_bind(struct thread *td, int cpu)
	{
	struct td_sched *ts;

	@@ -3069,8 +3032,8 @@
	/*
	* Release a bound thread.
	*/
	-void
	-sched_unbind(struct thread *td)
	+static void
	+sched_ule_unbind(struct thread *td)
	{
	struct td_sched *ts;

	@@ -3083,8 +3046,8 @@
	sched_unpin();
	}

	-int
	-sched_is_bound(struct thread *td)
	+static int
	+sched_ule_is_bound(struct thread *td)
	{
	THREAD_LOCK_ASSERT(td, MA_OWNED);
	return (td_get_sched(td)->ts_flags & TSF_BOUND);
	@@ -3093,8 +3056,8 @@
	/*
	* Basic yield call.
	*/
	-void
	-sched_relinquish(struct thread *td)
	+static void
	+sched_ule_relinquish(struct thread *td)
	{
	thread_lock(td);
	mi_switch(SW_VOL \| SWT_RELINQUISH);
	@@ -3103,8 +3066,8 @@
	/*
	* Return the total system load.
	*/
	-int
	-sched_load(void)
	+static int
	+sched_ule_load(void)
	{
	#ifdef SMP
	int total;
	@@ -3119,14 +3082,14 @@
	#endif
	}

	-int
	-sched_sizeof_proc(void)
	+static int
	+sched_ule_sizeof_proc(void)
	{
	return (sizeof(struct proc));
	}

	-int
	-sched_sizeof_thread(void)
	+static int
	+sched_ule_sizeof_thread(void)
	{
	return (sizeof(struct thread) + sizeof(struct td_sched));
	}
	@@ -3141,8 +3104,8 @@
	/*
	* The actual idle process.
	*/
	-void
	-sched_idletd(void *dummy)
	+static void
	+sched_ule_idletd(void *dummy)
	{
	struct thread *td;
	struct tdq *tdq;
	@@ -3244,8 +3207,8 @@
	/*
	* A CPU is entering for the first time.
	*/
	-void
	-sched_ap_entry(void)
	+static void
	+sched_ule_ap_entry(void)
	{
	struct thread *newtd;
	struct tdq *tdq;
	@@ -3274,8 +3237,8 @@
	/*
	* A thread is exiting.
	*/
	-void
	-sched_throw(struct thread *td)
	+static void
	+sched_ule_throw(struct thread *td)
	{
	struct thread *newtd;
	struct tdq *tdq;
	@@ -3305,8 +3268,8 @@
	* This is called from fork_exit(). Just acquire the correct locks and
	* let fork do the rest of the work.
	*/
	-void
	-sched_fork_exit(struct thread *td)
	+static void
	+sched_ule_fork_exit(struct thread *td)
	{
	struct tdq *tdq;
	int cpuid;
	@@ -3331,8 +3294,8 @@
	/*
	* Create on first use to catch odd startup conditions.
	*/
	-char *
	-sched_tdname(struct thread *td)
	+static char *
	+sched_ule_tdname(struct thread *td)
	{
	#ifdef KTR
	struct td_sched *ts;
	@@ -3347,17 +3310,148 @@
	#endif
	}

	-#ifdef KTR
	-void
	-sched_clear_tdname(struct thread *td)
	+static void
	+sched_ule_clear_tdname(struct thread *td)
	{
	+#ifdef KTR
	struct td_sched *ts;

	ts = td_get_sched(td);
	ts->ts_name[0] = '\0';
	+#endif
	+}
	+
	+static void
	+sched_ule_schedcpu(void)
	+{
	+}
	+
	+static bool
	+sched_ule_do_timer_accounting(void)
	+{
	+ return (true);
	+}
	+
	+#ifdef SMP
	+static int
	+sched_ule_find_child_with_core(int cpu, struct cpu_group *grp)
	+{
	+ int i;
	+
	+ if (grp->cg_children == 0)
	+ return (-1);
	+
	+ MPASS(grp->cg_child);
	+ for (i = 0; i < grp->cg_children; i++) {
	+ if (CPU_ISSET(cpu, &grp->cg_child[i].cg_mask))
	+ return (i);
	+ }
	+
	+ return (-1);
	+}
	+
	+static int
	+sched_ule_find_l2_neighbor(int cpu)
	+{
	+ struct cpu_group *grp;
	+ int i;
	+
	+ grp = cpu_top;
	+ if (grp == NULL)
	+ return (-1);
	+
	+ /*
	+ * Find the smallest CPU group that contains the given core.
	+ */
	+ i = 0;
	+ while ((i = sched_ule_find_child_with_core(cpu, grp)) != -1) {
	+ /*
	+ * If the smallest group containing the given CPU has less
	+ * than two members, we conclude the given CPU has no
	+ * L2 neighbor.
	+ */
	+ if (grp->cg_child[i].cg_count <= 1)
	+ return (-1);
	+ grp = &grp->cg_child[i];
	+ }
	+
	+ /* Must share L2. */
	+ if (grp->cg_level > CG_SHARE_L2 \|\| grp->cg_level == CG_SHARE_NONE)
	+ return (-1);
	+
	+ /*
	+ * Select the first member of the set that isn't the reference
	+ * CPU, which at this point is guaranteed to exist.
	+ */
	+ for (i = 0; i < CPU_SETSIZE; i++) {
	+ if (CPU_ISSET(i, &grp->cg_mask) && i != cpu)
	+ return (i);
	+ }
	+
	+ /* Should never be reached */
	+ return (-1);
	+}
	+#else
	+static int
	+sched_ule_find_l2_neighbor(int cpu)
	+{
	+ return (-1);
	}
	#endif

	+struct sched_instance sched_ule_instance = {
	+#define SLOT(name) .name = sched_ule_##name
	+ SLOT(load),
	+ SLOT(rr_interval),
	+ SLOT(runnable),
	+ SLOT(exit),
	+ SLOT(fork),
	+ SLOT(fork_exit),
	+ SLOT(class),
	+ SLOT(nice),
	+ SLOT(ap_entry),
	+ SLOT(exit_thread),
	+ SLOT(estcpu),
	+ SLOT(fork_thread),
	+ SLOT(ithread_prio),
	+ SLOT(lend_prio),
	+ SLOT(lend_user_prio),
	+ SLOT(lend_user_prio_cond),
	+ SLOT(pctcpu),
	+ SLOT(prio),
	+ SLOT(sleep),
	+ SLOT(sswitch),
	+ SLOT(throw),
	+ SLOT(unlend_prio),
	+ SLOT(user_prio),
	+ SLOT(userret_slowpath),
	+ SLOT(add),
	+ SLOT(choose),
	+ SLOT(clock),
	+ SLOT(idletd),
	+ SLOT(preempt),
	+ SLOT(relinquish),
	+ SLOT(rem),
	+ SLOT(wakeup),
	+ SLOT(bind),
	+ SLOT(unbind),
	+ SLOT(is_bound),
	+ SLOT(affinity),
	+ SLOT(sizeof_proc),
	+ SLOT(sizeof_thread),
	+ SLOT(tdname),
	+ SLOT(clear_tdname),
	+ SLOT(do_timer_accounting),
	+ SLOT(find_l2_neighbor),
	+ SLOT(init),
	+ SLOT(init_ap),
	+ SLOT(setup),
	+ SLOT(initticks),
	+ SLOT(schedcpu),
	+#undef SLOT
	+};
	+DECLARE_SCHEDULER(ule_sched_selector, "ULE", &sched_ule_instance);
	+
	#ifdef SMP

	/*
	@@ -3365,8 +3459,8 @@
	* the topology tree.
	*/
	static int
	-sysctl_kern_sched_topology_spec_internal(struct sbuf sb, struct cpu_group cg,
	- int indent)
	+sysctl_kern_sched_ule_topology_spec_internal(struct sbuf *sb,
	+ struct cpu_group *cg, int indent)
	{
	char cpusetbuf[CPUSETBUFSIZ];
	int i, first;
	@@ -3403,7 +3497,7 @@
	if (cg->cg_children > 0) {
	sbuf_printf(sb, "%*s <children>\n", indent, "");
	for (i = 0; i < cg->cg_children; i++)
	- sysctl_kern_sched_topology_spec_internal(sb,
	+ sysctl_kern_sched_ule_topology_spec_internal(sb,
	&cg->cg_child[i], indent+2);
	sbuf_printf(sb, "%*s </children>\n", indent, "");
	}
	@@ -3416,19 +3510,20 @@
	* the recursive sysctl_kern_smp_topology_spec_internal().
	*/
	static int
	-sysctl_kern_sched_topology_spec(SYSCTL_HANDLER_ARGS)
	+sysctl_kern_sched_ule_topology_spec(SYSCTL_HANDLER_ARGS)
	{
	struct sbuf *topo;
	int err;

	- KASSERT(cpu_top != NULL, ("cpu_top isn't initialized"));
	+ if (cpu_top == NULL)
	+ return (ENOTTY);

	topo = sbuf_new_for_sysctl(NULL, NULL, 512, req);
	if (topo == NULL)
	return (ENOMEM);

	sbuf_cat(topo, "<groups>\n");
	- err = sysctl_kern_sched_topology_spec_internal(topo, cpu_top, 1);
	+ err = sysctl_kern_sched_ule_topology_spec_internal(topo, cpu_top, 1);
	sbuf_cat(topo, "</groups>\n");

	if (err == 0) {
	@@ -3459,51 +3554,51 @@
	return (0);
	}

	-SYSCTL_NODE(_kern, OID_AUTO, sched, CTLFLAG_RW \| CTLFLAG_MPSAFE, 0,
	- "Scheduler");
	-SYSCTL_STRING(_kern_sched, OID_AUTO, name, CTLFLAG_RD, "ULE", 0,
	- "Scheduler name");
	-SYSCTL_PROC(_kern_sched, OID_AUTO, quantum,
	+SYSCTL_NODE(_kern_sched, OID_AUTO, ule, CTLFLAG_RD \| CTLFLAG_MPSAFE, 0,
	+ "ULE Scheduler");
	+
	+SYSCTL_PROC(_kern_sched_ule, OID_AUTO, quantum,
	CTLTYPE_INT \| CTLFLAG_RW \| CTLFLAG_MPSAFE, NULL, 0,
	sysctl_kern_quantum, "I",
	"Quantum for timeshare threads in microseconds");
	-SYSCTL_INT(_kern_sched, OID_AUTO, slice, CTLFLAG_RW, &sched_slice, 0,
	+SYSCTL_INT(_kern_sched_ule, OID_AUTO, slice, CTLFLAG_RW, &sched_slice, 0,
	"Quantum for timeshare threads in stathz ticks");
	-SYSCTL_UINT(_kern_sched, OID_AUTO, interact, CTLFLAG_RWTUN, &sched_interact, 0,
	+SYSCTL_UINT(_kern_sched_ule, OID_AUTO, interact, CTLFLAG_RWTUN, &sched_interact, 0,
	"Interactivity score threshold");
	-SYSCTL_INT(_kern_sched, OID_AUTO, preempt_thresh, CTLFLAG_RWTUN,
	+SYSCTL_INT(_kern_sched_ule, OID_AUTO, preempt_thresh, CTLFLAG_RWTUN,
	&preempt_thresh, 0,
	"Maximal (lowest) priority for preemption");
	-SYSCTL_INT(_kern_sched, OID_AUTO, static_boost, CTLFLAG_RWTUN, &static_boost, 0,
	+SYSCTL_INT(_kern_sched_ule, OID_AUTO, static_boost, CTLFLAG_RWTUN,
	+ &static_boost, 0,
	"Assign static kernel priorities to sleeping threads");
	-SYSCTL_INT(_kern_sched, OID_AUTO, idlespins, CTLFLAG_RWTUN, &sched_idlespins, 0,
	+SYSCTL_INT(_kern_sched_ule, OID_AUTO, idlespins, CTLFLAG_RWTUN,
	+ &sched_idlespins, 0,
	"Number of times idle thread will spin waiting for new work");
	-SYSCTL_INT(_kern_sched, OID_AUTO, idlespinthresh, CTLFLAG_RW,
	+SYSCTL_INT(_kern_sched_ule, OID_AUTO, idlespinthresh, CTLFLAG_RW,
	&sched_idlespinthresh, 0,
	"Threshold before we will permit idle thread spinning");
	#ifdef SMP
	-SYSCTL_INT(_kern_sched, OID_AUTO, affinity, CTLFLAG_RW, &affinity, 0,
	+SYSCTL_INT(_kern_sched_ule, OID_AUTO, affinity, CTLFLAG_RW, &affinity, 0,
	"Number of hz ticks to keep thread affinity for");
	-SYSCTL_INT(_kern_sched, OID_AUTO, balance, CTLFLAG_RWTUN, &rebalance, 0,
	+SYSCTL_INT(_kern_sched_ule, OID_AUTO, balance, CTLFLAG_RWTUN, &rebalance, 0,
	"Enables the long-term load balancer");
	-SYSCTL_INT(_kern_sched, OID_AUTO, balance_interval, CTLFLAG_RW,
	+SYSCTL_INT(_kern_sched_ule, OID_AUTO, balance_interval, CTLFLAG_RW,
	&balance_interval, 0,
	"Average period in stathz ticks to run the long-term balancer");
	-SYSCTL_INT(_kern_sched, OID_AUTO, steal_idle, CTLFLAG_RWTUN, &steal_idle, 0,
	+SYSCTL_INT(_kern_sched_ule, OID_AUTO, steal_idle, CTLFLAG_RWTUN,
	+ &steal_idle, 0,
	"Attempts to steal work from other cores before idling");
	-SYSCTL_INT(_kern_sched, OID_AUTO, steal_thresh, CTLFLAG_RWTUN, &steal_thresh, 0,
	+SYSCTL_INT(_kern_sched_ule, OID_AUTO, steal_thresh, CTLFLAG_RWTUN,
	+ &steal_thresh, 0,
	"Minimum load on remote CPU before we'll steal");
	-SYSCTL_INT(_kern_sched, OID_AUTO, trysteal_limit, CTLFLAG_RWTUN,
	+SYSCTL_INT(_kern_sched_ule, OID_AUTO, trysteal_limit, CTLFLAG_RWTUN,
	&trysteal_limit, 0,
	"Topological distance limit for stealing threads in sched_switch()");
	-SYSCTL_INT(_kern_sched, OID_AUTO, always_steal, CTLFLAG_RWTUN, &always_steal, 0,
	+SYSCTL_INT(_kern_sched_ule, OID_AUTO, always_steal, CTLFLAG_RWTUN,
	+ &always_steal, 0,
	"Always run the stealer from the idle thread");
	-SYSCTL_PROC(_kern_sched, OID_AUTO, topology_spec, CTLTYPE_STRING \|
	- CTLFLAG_MPSAFE \| CTLFLAG_RD, NULL, 0, sysctl_kern_sched_topology_spec, "A",
	+SYSCTL_PROC(_kern_sched_ule, OID_AUTO, topology_spec, CTLTYPE_STRING \|
	+ CTLFLAG_MPSAFE \| CTLFLAG_RD, NULL, 0,
	+ sysctl_kern_sched_ule_topology_spec, "A",
	"XML dump of detected CPU topology");
	#endif
	-
	-/* ps compat. All cpu percentages from ULE are weighted. */
	-static int ccpu = 0;
	-SYSCTL_INT(_kern, OID_AUTO, ccpu, CTLFLAG_RD, &ccpu, 0,
	- "Decay factor used for updating %CPU in 4BSD scheduler");
	diff --git a/sys/net/iflib.c b/sys/net/iflib.c
	--- a/sys/net/iflib.c
	+++ b/sys/net/iflib.c
	@@ -29,7 +29,6 @@
	#include "opt_inet.h"
	#include "opt_inet6.h"
	#include "opt_acpi.h"
	-#include "opt_sched.h"

	#include <sys/param.h>
	#include <sys/types.h>
	@@ -40,8 +39,10 @@
	#include <sys/mutex.h>
	#include <sys/module.h>
	#include <sys/kobj.h>
	+#include <sys/proc.h>
	#include <sys/rman.h>
	#include <sys/sbuf.h>
	+#include <sys/sched.h>
	#include <sys/smp.h>
	#include <sys/socket.h>
	#include <sys/sockio.h>
	@@ -4813,83 +4814,6 @@
	return (cpuid);
	}

	-#if defined(SMP) && defined(SCHED_ULE)
	-extern struct cpu_group cpu_top; / CPU topology */
	-
	-static int
	-find_child_with_core(int cpu, struct cpu_group *grp)
	-{
	- int i;
	-
	- if (grp->cg_children == 0)
	- return (-1);
	-
	- MPASS(grp->cg_child);
	- for (i = 0; i < grp->cg_children; i++) {
	- if (CPU_ISSET(cpu, &grp->cg_child[i].cg_mask))
	- return (i);
	- }
	-
	- return (-1);
	-}
	-
	-
	-/*
	- * Find an L2 neighbor of the given CPU or return -1 if none found. This
	- * does not distinguish among multiple L2 neighbors if the given CPU has
	- * more than one (it will always return the same result in that case).
	- */
	-static int
	-find_l2_neighbor(int cpu)
	-{
	- struct cpu_group *grp;
	- int i;
	-
	- grp = cpu_top;
	- if (grp == NULL)
	- return (-1);
	-
	- /*
	- * Find the smallest CPU group that contains the given core.
	- */
	- i = 0;
	- while ((i = find_child_with_core(cpu, grp)) != -1) {
	- /*
	- * If the smallest group containing the given CPU has less
	- * than two members, we conclude the given CPU has no
	- * L2 neighbor.
	- */
	- if (grp->cg_child[i].cg_count <= 1)
	- return (-1);
	- grp = &grp->cg_child[i];
	- }
	-
	- /* Must share L2. */
	- if (grp->cg_level > CG_SHARE_L2 \|\| grp->cg_level == CG_SHARE_NONE)
	- return (-1);
	-
	- /*
	- * Select the first member of the set that isn't the reference
	- * CPU, which at this point is guaranteed to exist.
	- */
	- for (i = 0; i < CPU_SETSIZE; i++) {
	- if (CPU_ISSET(i, &grp->cg_mask) && i != cpu)
	- return (i);
	- }
	-
	- /* Should never be reached */
	- return (-1);
	-}
	-
	-#else
	-static int
	-find_l2_neighbor(int cpu)
	-{
	-
	- return (-1);
	-}
	-#endif
	-
	/*
	* CPU mapping behaviors
	* ---------------------
	@@ -4942,7 +4866,7 @@
	unsigned int rx_cpuid;

	rx_cpuid = cpuid_advance(ctx, base_cpuid, qid);
	- l2_neighbor = find_l2_neighbor(rx_cpuid);
	+ l2_neighbor = sched_find_l2_neighbor(rx_cpuid);
	if (l2_neighbor != -1) {
	return (l2_neighbor);
	}
	diff --git a/sys/powerpc/powerpc/machdep.c b/sys/powerpc/powerpc/machdep.c
	--- a/sys/powerpc/powerpc/machdep.c
	+++ b/sys/powerpc/powerpc/machdep.c
	@@ -83,6 +83,7 @@
	#include <sys/reboot.h>
	#include <sys/reg.h>
	#include <sys/rwlock.h>
	+#include <sys/sched.h>
	#include <sys/signalvar.h>
	#include <sys/syscallsubr.h>
	#include <sys/sysctl.h>
	@@ -467,6 +468,7 @@
	* Bring up MMU
	*/
	pmap_mmu_init();
	+ sched_instance_select();
	link_elf_ireloc();
	pmap_bootstrap(startkernel, endkernel);
	mtmsr(psl_kernset & ~PSL_EE);
	diff --git a/sys/powerpc/powerpc/swtch32.S b/sys/powerpc/powerpc/swtch32.S
	--- a/sys/powerpc/powerpc/swtch32.S
	+++ b/sys/powerpc/powerpc/swtch32.S
	@@ -56,7 +56,6 @@
	*/

	#include "assym.inc"
	-#include "opt_sched.h"

	#include <sys/syscall.h>

	@@ -125,7 +124,7 @@
	sync /* Make sure all of that finished */

	cpu_switchin:
	-#if defined(SMP) && defined(SCHED_ULE)
	+#if defined(SMP)
	/* Wait for the new thread to become unblocked */
	bl 1f
	1:
	diff --git a/sys/powerpc/powerpc/swtch64.S b/sys/powerpc/powerpc/swtch64.S
	--- a/sys/powerpc/powerpc/swtch64.S
	+++ b/sys/powerpc/powerpc/swtch64.S
	@@ -56,7 +56,6 @@
	*/

	#include "assym.inc"
	-#include "opt_sched.h"

	#include <sys/syscall.h>

	@@ -187,7 +186,7 @@
	sync /* Make sure all of that finished */

	cpu_switchin:
	-#if defined(SMP) && defined(SCHED_ULE)
	+#if defined(SMP)
	/* Wait for the new thread to become unblocked */
	addis %r6,%r2,TOC_REF(blocked_lock)@ha
	ld %r6,TOC_REF(blocked_lock)@l(%r6)
	diff --git a/sys/riscv/include/ifunc.h b/sys/riscv/include/ifunc.h
	--- a/sys/riscv/include/ifunc.h
	+++ b/sys/riscv/include/ifunc.h
	@@ -30,6 +30,8 @@
	#ifndef __RISCV_IFUNC_H
	#define __RISCV_IFUNC_H

	+#define __DO_NOT_HAVE_SYS_IFUNCS 1
	+
	#define DEFINE_IFUNC(qual, ret_type, name, args) \
	static ret_type (*name##_resolver(void))args __used; \
	qual ret_type name args __attribute__((ifunc(#name "_resolver"))); \
	diff --git a/sys/riscv/riscv/machdep.c b/sys/riscv/riscv/machdep.c
	--- a/sys/riscv/riscv/machdep.c
	+++ b/sys/riscv/riscv/machdep.c
	@@ -479,6 +479,8 @@

	/* Initialize preload_kmdp */
	preload_initkmdp(true);
	+ sched_instance_select();
	+ /* link_elf_ireloc(); */

	/* Read the boot metadata */
	boothowto = MD_FETCH(preload_kmdp, MODINFOMD_HOWTO, int);
	diff --git a/sys/riscv/riscv/swtch.S b/sys/riscv/riscv/swtch.S
	--- a/sys/riscv/riscv/swtch.S
	+++ b/sys/riscv/riscv/swtch.S
	@@ -33,7 +33,6 @@
	*/

	#include "assym.inc"
	-#include "opt_sched.h"

	#include <machine/param.h>
	#include <machine/asm.h>
	@@ -315,7 +314,7 @@

	/* Release the old thread */
	sd s2, TD_LOCK(s0)
	-#if defined(SCHED_ULE) && defined(SMP)
	+#if defined(SMP)
	/* Spin if TD_LOCK points to a blocked_lock */
	la s2, _C_LABEL(blocked_lock)
	1:
	diff --git a/sys/sys/proc.h b/sys/sys/proc.h
	--- a/sys/sys/proc.h
	+++ b/sys/sys/proc.h
	@@ -1173,7 +1173,6 @@
	void kern_yield(int);
	void killjobc(void);
	int leavepgrp(struct proc *p);
	-int maybe_preempt(struct thread *td);
	void maybe_yield(void);
	void mi_switch(int flags);
	int p_candebug(struct thread td, struct proc p);
	diff --git a/sys/sys/sched.h b/sys/sys/sched.h
	--- a/sys/sys/sched.h
	+++ b/sys/sys/sched.h
	@@ -68,6 +68,8 @@
	#ifdef SCHED_STATS
	#include <sys/pcpu.h>
	#endif
	+#include <sys/linker_set.h>
	+#include <sys/sdt.h>

	struct proc;
	struct thread;
	@@ -114,11 +116,6 @@
	void sched_unlend_prio(struct thread *td, u_char prio);
	void sched_user_prio(struct thread *td, u_char prio);
	void sched_userret_slowpath(struct thread *td);
	-#ifdef RACCT
	-#ifdef SCHED_4BSD
	-fixpt_t sched_pctcpu_delta(struct thread *td);
	-#endif
	-#endif

	static inline void
	sched_userret(struct thread *td)
	@@ -174,9 +171,7 @@
	* functions.
	*/
	char sched_tdname(struct thread td);
	-#ifdef KTR
	void sched_clear_tdname(struct thread *td);
	-#endif

	static __inline void
	sched_pin(void)
	@@ -221,6 +216,10 @@
	#define SCHED_STAT_DEFINE(name, descr) \
	DPCPU_DEFINE(unsigned long, name); \
	SCHED_STAT_DEFINE_VAR(name, &DPCPU_NAME(name), descr)
	+
	+#define SCHED_STAT_DECLARE(name) \
	+ DPCPU_DECLARE(unsigned long, name);
	+
	/*
	* Sched stats are always incremented in critical sections so no atomic
	* is necessary to increment them.
	@@ -229,9 +228,29 @@
	#else
	#define SCHED_STAT_DEFINE_VAR(name, descr, ptr)
	#define SCHED_STAT_DEFINE(name, descr)
	+#define SCHED_STAT_DECLARE(name)
	#define SCHED_STAT_INC(var) (void)0
	#endif

	+SCHED_STAT_DECLARE(ithread_demotions);
	+SCHED_STAT_DECLARE(ithread_preemptions);
	+
	+SDT_PROBE_DECLARE(sched, , , change__pri);
	+SDT_PROBE_DECLARE(sched, , , dequeue);
	+SDT_PROBE_DECLARE(sched, , , enqueue);
	+SDT_PROBE_DECLARE(sched, , , lend__pri);
	+SDT_PROBE_DECLARE(sched, , , load__change);
	+SDT_PROBE_DECLARE(sched, , , off__cpu);
	+SDT_PROBE_DECLARE(sched, , , on__cpu);
	+SDT_PROBE_DECLARE(sched, , , remain__cpu);
	+SDT_PROBE_DECLARE(sched, , , surrender);
	+
	+#ifdef KDTRACE_HOOKS
	+#include <sys/dtrace_bsd.h>
	+extern int dtrace_vtime_active;
	+extern dtrace_vtime_switch_func_t dtrace_vtime_switch_func;
	+#endif
	+
	/*
	* Fixup scheduler state for proc0 and thread0
	*/
	@@ -241,6 +260,81 @@
	* Fixup scheduler state for secondary APs
	*/
	void schedinit_ap(void);
	+
	+bool sched_do_timer_accounting(void);
	+
	+/*
	+ * Find an L2 neighbor of the given CPU or return -1 if none found. This
	+ * does not distinguish among multiple L2 neighbors if the given CPU has
	+ * more than one (it will always return the same result in that case).
	+ */
	+int sched_find_l2_neighbor(int cpu);
	+
	+struct sched_instance {
	+ int (*load)(void);
	+ int (*rr_interval)(void);
	+ bool (*runnable)(void);
	+ void (exit)(struct proc p, struct thread *childtd);
	+ void (fork)(struct thread td, struct thread *childtd);
	+ void (fork_exit)(struct thread td);
	+ void (class)(struct thread td, int class);
	+ void (nice)(struct proc p, int nice);
	+ void (*ap_entry)(void);
	+ void (exit_thread)(struct thread td, struct thread *child);
	+ u_int (estcpu)(struct thread td);
	+ void (fork_thread)(struct thread td, struct thread *child);
	+ void (ithread_prio)(struct thread td, u_char prio);
	+ void (lend_prio)(struct thread td, u_char prio);
	+ void (lend_user_prio)(struct thread td, u_char pri);
	+ void (lend_user_prio_cond)(struct thread td, u_char pri);
	+ fixpt_t (pctcpu)(struct thread td);
	+ void (prio)(struct thread td, u_char prio);
	+ void (sleep)(struct thread td, int prio);
	+ void (sswitch)(struct thread td, int flags);
	+ void (throw)(struct thread td);
	+ void (unlend_prio)(struct thread td, u_char prio);
	+ void (user_prio)(struct thread td, u_char prio);
	+ void (userret_slowpath)(struct thread td);
	+ void (add)(struct thread td, int flags);
	+ struct thread (choose)(void);
	+ void (clock)(struct thread td, int cnt);
	+ void (idletd)(void );
	+ void (preempt)(struct thread td);
	+ void (relinquish)(struct thread td);
	+ void (rem)(struct thread td);
	+ void (wakeup)(struct thread td, int srqflags);
	+ void (bind)(struct thread td, int cpu);
	+ void (unbind)(struct thread td);
	+ int (is_bound)(struct thread td);
	+ void (affinity)(struct thread td);
	+ int (*sizeof_proc)(void);
	+ int (*sizeof_thread)(void);
	+ char (tdname)(struct thread *td);
	+ void (clear_tdname)(struct thread td);
	+ bool (*do_timer_accounting)(void);
	+ int (*find_l2_neighbor)(int cpuid);
	+ void (*init)(void);
	+ void (*init_ap)(void);
	+ void (*setup)(void);
	+ void (*initticks)(void);
	+ void (*schedcpu)(void);
	+};
	+
	+extern const struct sched_instance *active_sched;
	+
	+struct sched_selection {
	+ const char *name;
	+ const struct sched_instance *instance;
	+};
	+#define DECLARE_SCHEDULER(xsel_name, xsched_name, xsched_instance) \
	+ static struct sched_selection xsel_name = { \
	+ .name = xsched_name, \
	+ .instance = xsched_instance, \
	+ }; \
	+ DATA_SET(sched_instance_set, xsel_name);
	+
	+void sched_instance_select(void);
	+
	#endif /* _KERNEL */

	/* POSIX 1003.1b Process Scheduling */
	diff --git a/sys/x86/x86/cpu_machdep.c b/sys/x86/x86/cpu_machdep.c
	--- a/sys/x86/x86/cpu_machdep.c
	+++ b/sys/x86/x86/cpu_machdep.c
	@@ -47,7 +47,6 @@
	#include "opt_kstack_pages.h"
	#include "opt_maxmem.h"
	#include "opt_platform.h"
	-#include "opt_sched.h"
	#ifdef __i386__
	#include "opt_apic.h"
	#endif
	@@ -543,9 +542,7 @@
	* is visible before calling cpu_idle_wakeup().
	*/
	atomic_store_int(statep, newstate);
	-#if defined(SCHED_ULE) && defined(SMP)
	atomic_thread_fence_seq_cst();
	-#endif

	/*
	* Since we may be in a critical section from cpu_idle(), if
	diff --git a/sys/x86/x86/local_apic.c b/sys/x86/x86/local_apic.c
	--- a/sys/x86/x86/local_apic.c
	+++ b/sys/x86/x86/local_apic.c
	@@ -1443,21 +1443,8 @@
	kmsan_mark(frame, sizeof(*frame), KMSAN_STATE_INITED);
	trap_check_kstack();

	-#if defined(SMP) && !defined(SCHED_ULE)
	- /*
	- * Don't do any accounting for the disabled HTT cores, since it
	- * will provide misleading numbers for the userland.
	- *
	- * No locking is necessary here, since even if we lose the race
	- * when hlt_cpus_mask changes it is not a big deal, really.
	- *
	- * Don't do that for ULE, since ULE doesn't consider hlt_cpus_mask
	- * and unlike other schedulers it actually schedules threads to
	- * those CPUs.
	- */
	- if (CPU_ISSET(PCPU_GET(cpuid), &hlt_cpus_mask))
	+ if (!sched_do_timer_accounting())
	return;
	-#endif

	/* Look up our local APIC structure for the tick counters. */
	la = &lapics[PCPU_GET(apic_id)];

File Metadata

Mime Type: text/plain
Expires: Mon, Jan 26, 2:30 PM (15 h, 44 m)
Storage Engine: blob
Storage Format: Raw Data
Storage Handle: 27971439
Default Alt Text: D54831.diff (62 KB)

D54831.diffNo OneTemporaryActions

D54831.diffView Options

File Metadata

Event Timeline

D54831.diff
No OneTemporary
Actions

D54831.diff
View Options