diff --git a/sys/alpha/include/param.h b/sys/alpha/include/param.h
index 6e36e70db9ab..1a9c9af545a5 100644
--- a/sys/alpha/include/param.h
+++ b/sys/alpha/include/param.h
@@ -1,143 +1,138 @@
 /* $FreeBSD$ */
 /* From: NetBSD: param.h,v 1.20 1997/09/19 13:52:53 leo Exp */
 
 /*
  * Copyright (c) 1988 University of Utah.
  * Copyright (c) 1992, 1993
  *	The Regents of the University of California.  All rights reserved.
  *
  * This code is derived from software contributed to Berkeley by
  * the Systems Programming Group of the University of Utah Computer
  * Science Department and Ralph Campbell.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 4. Neither the name of the University nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  * from: Utah $Hdr: machparam.h 1.11 89/08/14$
  *
  *	@(#)param.h	8.1 (Berkeley) 6/10/93
  */
 
 /*
  * Machine dependent constants for the Alpha.
  */
 
 /*
  * Round p (pointer or byte index) up to a correctly-aligned value for all
  * data types (int, long, ...).   The result is u_long and must be cast to
  * any desired pointer type.
  *
  * ALIGNED_POINTER is a boolean macro that checks whether an address
  * is valid to fetch data elements of type t from on this architecture.
  * This does not reflect the optimal alignment, just the possibility
  * (within reasonable limits). 
  *
  */
 #ifndef _ALIGNBYTES
 #define	_ALIGNBYTES		7
 #endif
 #ifndef _ALIGN
 #define	_ALIGN(p)		(((u_long)(p) + _ALIGNBYTES) &~ _ALIGNBYTES)
 #endif
 #ifndef _ALIGNED_POINTER
 #define _ALIGNED_POINTER(p,t)	((((u_long)(p)) & (sizeof(t)-1)) == 0)
 #endif
 
 #ifndef _MACHINE
 #define	_MACHINE	alpha
 #endif
 #ifndef _MACHINE_ARCH
 #define	_MACHINE_ARCH	alpha
 #endif
 
 #ifndef _NO_NAMESPACE_POLLUTION
 
 #ifndef _MACHINE_PARAM_H_
 #define _MACHINE_PARAM_H_
 
 #ifndef MACHINE
 #define	MACHINE		"alpha"
 #endif
 #ifndef MACHINE_ARCH
 #define	MACHINE_ARCH	"alpha"
 #endif
 #define	MID_MACHINE	MID_ALPHA
 
 #include <machine/alpha_cpu.h>
 #include <machine/cpu.h>
 
 #define MAXSMPCPU	8
 #ifdef SMP
 #define MAXCPU		MAXSMPCPU
 #else
 #define MAXCPU		1
 #endif
 
 #define	ALIGNBYTES		_ALIGNBYTES
 #define	ALIGN(p)		_ALIGN(p)
 #define ALIGNED_POINTER(p,t)	_ALIGNED_POINTER(p,t)
 
 #define	PAGE_SIZE	(1 << ALPHA_PGSHIFT)		/* bytes/page */
 #define PAGE_SHIFT	ALPHA_PGSHIFT
 #define PAGE_MASK	(PAGE_SIZE-1)
 #define NPTEPG		(PAGE_SIZE/(sizeof (pt_entry_t)))
 
 #define	KERNBASE	0xfffffc0000300000LL	/* start of kernel virtual */
 #define	BTOPKERNBASE	((u_long)KERNBASE >> PGSHIFT)
 
 #define	CLSIZE		1
 #define	CLSIZELOG2	0
 
 /* NOTE: SSIZE, SINCR and UPAGES must be multiples of CLSIZE */
 #define	SSIZE		1		/* initial stack size/NBPG */
 #define	SINCR		1		/* increment of stack/NBPG */
 
-/* PREEMPTION exposes scheduler bugs that need to be fixed. */
-#if 0
-#define	PREEMPTION
-#endif
-
 #ifndef	KSTACK_PAGES
 #define	KSTACK_PAGES	2		/* pages of kstack (with pcb) */
 #endif
 #define	KSTACK_GUARD_PAGES	1	/* pages of kstack guard; 0 disables */
 #define	UAREA_PAGES	1		/* pages of u-area */
 
 
 /*
  * Mach derived conversion macros
  */
 #define	round_page(x)	((((unsigned long)(x)) + PAGE_MASK) & ~(PAGE_MASK))
 #define	trunc_page(x)	((unsigned long)(x) & ~(PAGE_MASK))
 
 #define atop(x)			((unsigned long)(x) >> PAGE_SHIFT)
 #define ptoa(x)			((unsigned long)(x) << PAGE_SHIFT)
 
 #define	alpha_btop(x)		((unsigned long)(x) >> PAGE_SHIFT)
 #define	alpha_ptob(x)		((unsigned long)(x) << PAGE_SHIFT)
 
 #define pgtok(x)                ((x) * (PAGE_SIZE / 1024)) 
 
 #endif /* !_MACHINE_PARAM_H_ */
 #endif /* !_NO_NAMESPACE_POLLUTION */
diff --git a/sys/amd64/include/param.h b/sys/amd64/include/param.h
index aa8c29e95c1c..5216c55a28dc 100644
--- a/sys/amd64/include/param.h
+++ b/sys/amd64/include/param.h
@@ -1,170 +1,165 @@
 /*
  * Copyright (c) 2002 David E. O'Brien.  All rights reserved.
  * Copyright (c) 1992, 1993
  *	The Regents of the University of California.  All rights reserved.
  *
  * This code is derived from software contributed to Berkeley by
  * the Systems Programming Group of the University of Utah Computer
  * Science Department and Ralph Campbell.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 3. All advertising materials mentioning features or use of this software
  *    must display the following acknowledgement:
  *	This product includes software developed by the University of
  *	California, Berkeley and its contributors.
  * 4. Neither the name of the University nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  *	@(#)param.h	8.1 (Berkeley) 6/10/93
  * $FreeBSD$
  */
 
 /*
  * Machine dependent constants for AMD64.
  */
 
 /*
  * Round p (pointer or byte index) up to a correctly-aligned value
  * for all data types (int, long, ...).   The result is u_long and
  * must be cast to any desired pointer type.
  *
  * ALIGNED_POINTER is a boolean macro that checks whether an address
  * is valid to fetch data elements of type t from on this architecture.
  * This does not reflect the optimal alignment, just the possibility
  * (within reasonable limits). 
  *
  */
 #ifndef _ALIGNBYTES
 #define	_ALIGNBYTES	(sizeof(long) - 1)
 #endif
 #ifndef _ALIGN
 #define	_ALIGN(p)	(((u_long)(p) + _ALIGNBYTES) &~ _ALIGNBYTES)
 #endif
 #ifndef _ALIGNED_POINTER
 #define	_ALIGNED_POINTER(p,t)	((((u_long)(p)) & (sizeof(t)-1)) == 0)
 #endif
 
 #ifndef _MACHINE
 #define	_MACHINE	amd64
 #endif
 #ifndef _MACHINE_ARCH
 #define	_MACHINE_ARCH	amd64
 #endif
 
 #ifndef _NO_NAMESPACE_POLLUTION
 
 #ifndef _MACHINE_PARAM_H_
 #define	_MACHINE_PARAM_H_
 
 #ifndef MACHINE
 #define	MACHINE		"amd64"
 #endif
 #ifndef MACHINE_ARCH
 #define	MACHINE_ARCH	"amd64"
 #endif
 
 #ifdef SMP
 #define MAXCPU		8
 #else
 #define MAXCPU		1
 #endif
 
 #define	ALIGNBYTES		_ALIGNBYTES
 #define	ALIGN(p)		_ALIGN(p)
 #define	ALIGNED_POINTER(p,t)	_ALIGNED_POINTER(p,t)
 
 
 /* Size of the level 1 page table units */
 #define NPTEPG		(PAGE_SIZE/(sizeof (pt_entry_t)))
 #define	NPTEPGSHIFT	9		/* LOG2(NPTEPG) */
 #define PAGE_SHIFT	12		/* LOG2(PAGE_SIZE) */
 #define PAGE_SIZE	(1<<PAGE_SHIFT)	/* bytes/page */
 #define PAGE_MASK	(PAGE_SIZE-1)
 /* Size of the level 2 page directory units */
 #define	NPDEPG		(PAGE_SIZE/(sizeof (pd_entry_t)))
 #define	NPDEPGSHIFT	9		/* LOG2(NPDEPG) */
 #define	PDRSHIFT	21              /* LOG2(NBPDR) */
 #define	NBPDR		(1<<PDRSHIFT)   /* bytes/page dir */
 #define	PDRMASK		(NBPDR-1)
 /* Size of the level 3 page directory pointer table units */
 #define	NPDPEPG		(PAGE_SIZE/(sizeof (pdp_entry_t)))
 #define	NPDPEPGSHIFT	9		/* LOG2(NPDPEPG) */
 #define	PDPSHIFT	30		/* LOG2(NBPDP) */
 #define	NBPDP		(1<<PDPSHIFT)	/* bytes/page dir ptr table */
 #define	PDPMASK		(NBPDP-1)
 /* Size of the level 4 page-map level-4 table units */
 #define	NPML4EPG	(PAGE_SIZE/(sizeof (pml4_entry_t)))
 #define	NPML4EPGSHIFT	9		/* LOG2(NPML4EPG) */
 #define	PML4SHIFT	39		/* LOG2(NBPML4) */
 #define	NBPML4		(1ul<<PML4SHIFT)/* bytes/page map lev4 table */
 #define	PML4MASK	(NBPML4-1)
 
-/* PREEMPTION exposes scheduler bugs that need to be fixed. */
-#if 0
-#define	PREEMPTION
-#endif
-
 #define IOPAGES	2		/* pages of i/o permission bitmap */
 
 #ifndef	KSTACK_PAGES
 #define	KSTACK_PAGES	4	/* pages of kstack (with pcb) */
 #endif
 #define	KSTACK_GUARD_PAGES 1	/* pages of kstack guard; 0 disables */
 #define UAREA_PAGES	1	/* holds struct user WITHOUT PCB (see def.) */
 
 
 /*
  * Ceiling on amount of swblock kva space, can be changed via
  * the kern.maxswzone /boot/loader.conf variable.
  */
 #ifndef VM_SWZONE_SIZE_MAX
 #define	VM_SWZONE_SIZE_MAX	(32 * 1024 * 1024)
 #endif
 
 /*
  * Ceiling on size of buffer cache (really only effects write queueing,
  * the VM page cache is not effected), can be changed via
  * the kern.maxbcache /boot/loader.conf variable.
  */
 #ifndef VM_BCACHE_SIZE_MAX
 #define	VM_BCACHE_SIZE_MAX	(400 * 1024 * 1024)
 #endif
 
 /*
  * Mach derived conversion macros
  */
 #define	round_page(x)	((((unsigned long)(x)) + PAGE_MASK) & ~(PAGE_MASK))
 #define	trunc_page(x)	((unsigned long)(x) & ~(PAGE_MASK))
 #define trunc_2mpage(x)	((unsigned long)(x) & ~PDRMASK)
 #define round_2mpage(x)	((((unsigned long)(x)) + PDRMASK) & ~PDRMASK)
 
 #define	atop(x)		((unsigned long)(x) >> PAGE_SHIFT)
 #define	ptoa(x)		((unsigned long)(x) << PAGE_SHIFT)
 
 #define	amd64_btop(x)	((unsigned long)(x) >> PAGE_SHIFT)
 #define	amd64_ptob(x)	((unsigned long)(x) << PAGE_SHIFT)
 
 #define	pgtok(x)	((unsigned long)(x) * (PAGE_SIZE / 1024)) 
 
 #endif /* !_MACHINE_PARAM_H_ */
 #endif /* !_NO_NAMESPACE_POLLUTION */
diff --git a/sys/conf/NOTES b/sys/conf/NOTES
index 60300f5ba927..43a870906202 100644
--- a/sys/conf/NOTES
+++ b/sys/conf/NOTES
@@ -1,2495 +1,2500 @@
 # $FreeBSD$
 #
 # NOTES -- Lines that can be cut/pasted into kernel and hints configs.
 #
 # Lines that begin with 'device', 'options', 'machine', 'ident', 'maxusers',
 # 'makeoptions', 'hints', etc. go into the kernel configuration that you
 # run config(8) with.
 #
 # Lines that begin with 'hint.' are NOT for config(8), they go into your
 # hints file.  See /boot/device.hints and/or the 'hints' config(8) directive.
 #
 # Please use ``make LINT'' to create an old-style LINT file if you want to
 # do kernel test-builds.
 #
 # This file contains machine independent kernel configuration notes.  For
 # machine dependent notes, look in /sys/<arch>/conf/NOTES.
 #
 
 #
 # NOTES conventions and style guide:
 #
 # Large block comments should begin and end with a line containing only a
 # comment character.
 #
 # To describe a particular object, a block comment (if it exists) should
 # come first.  Next should come device, options, and hints lines in that
 # order.  All device and option lines must be described by a comment that
 # doesn't just expand the device or option name.  Use only a concise
 # comment on the same line if possible.  Very detailed descriptions of
 # devices and subsystems belong in man pages.
 #
 # A space followed by a tab separates 'options' from an option name.  Two
 # spaces followed by a tab separate 'device' from a device name.  Comments
 # after an option or device should use one space after the comment character.
 # To comment out a negative option that disables code and thus should not be
 # enabled for LINT builds, precede 'options' with "#!".
 #
 
 #
 # This is the ``identification'' of the kernel.  Usually this should
 # be the same as the name of your kernel.
 #
 ident		LINT
 
 #
 # The `maxusers' parameter controls the static sizing of a number of
 # internal system tables by a formula defined in subr_param.c.
 # Omitting this parameter or setting it to 0 will cause the system to
 # auto-size based on physical memory.
 #
 maxusers	10
 
 #
 # The `makeoptions' parameter allows variables to be passed to the
 # generated Makefile in the build area.
 #
 # CONF_CFLAGS gives some extra compiler flags that are added to ${CFLAGS}
 # after most other flags.  Here we use it to inhibit use of non-optimal
 # gcc builtin functions (e.g., memcmp).
 #
 # DEBUG happens to be magic.
 # The following is equivalent to 'config -g KERNELNAME' and creates
 # 'kernel.debug' compiled with -g debugging as well as a normal
 # 'kernel'.  Use 'make install.debug' to install the debug kernel
 # but that isn't normally necessary as the debug symbols are not loaded
 # by the kernel and are not useful there anyway.
 #
 # KERNEL can be overridden so that you can change the default name of your
 # kernel.
 #
 # MODULES_OVERRIDE can be used to limit modules built to a specific list.
 #
 makeoptions	CONF_CFLAGS=-fno-builtin  #Don't allow use of memcmp, etc.
 #makeoptions	DEBUG=-g		#Build kernel with gdb(1) debug symbols
 #makeoptions	KERNEL=foo		#Build kernel "foo" and install "/foo"
 # Only build Linux API modules and plus those parts of the sound system I need.
 #makeoptions	MODULES_OVERRIDE="linux sound/sound sound/driver/maestro3"
 makeoptions	DESTDIR=/tmp
 
 
 #
 # Certain applications can grow to be larger than the 512M limit
 # that FreeBSD initially imposes.  Below are some options to
 # allow that limit to grow to 1GB, and can be increased further
 # with changing the parameters.  MAXDSIZ is the maximum that the
 # limit can be set to, and the DFLDSIZ is the default value for
 # the limit.  MAXSSIZ is the maximum that the stack limit can be
 # set to.  You might want to set the default lower than the max,
 # and explicitly set the maximum with a shell command for processes
 # that regularly exceed the limit like INND.
 #
 options 	MAXDSIZ=(1024UL*1024*1024)
 options 	MAXSSIZ=(128UL*1024*1024)
 options 	DFLDSIZ=(1024UL*1024*1024)
 
 #
 # BLKDEV_IOSIZE sets the default block size used in user block
 # device I/O.  Note that this value will be overridden by the label
 # when specifying a block device from a label with a non-0
 # partition blocksize.  The default is PAGE_SIZE.
 #
 options 	BLKDEV_IOSIZE=8192
 
 # Options for the VM subsystem
 # L2 cache size (in KB) can be specified in PQ_CACHESIZE
 options 	PQ_CACHESIZE=512	# color for 512k cache
 # Deprecated options supported for backwards compatibility
 #options 	PQ_NOOPT		# No coloring
 #options 	PQ_LARGECACHE		# color for 512k cache
 #options 	PQ_HUGECACHE		# color for 1024k cache
 #options 	PQ_MEDIUMCACHE		# color for 256k cache
 #options 	PQ_NORMALCACHE		# color for 64k cache
 
 # This allows you to actually store this configuration file into
 # the kernel binary itself, where it may be later read by saying:
 #    strings -n 3 /boot/kernel/kernel | sed -n 's/^___//p' > MYKERNEL
 #
 options 	INCLUDE_CONFIG_FILE     # Include this file in kernel
 
 options 	GEOM_AES		# Don't use, use GEOM_BDE
 options 	GEOM_APPLE		# Apple partitioning
 options 	GEOM_BDE		# Disk encryption.
 options 	GEOM_BSD		# BSD disklabels
 options 	GEOM_CONCAT		# Disk concatenation.
 options 	GEOM_FOX		# Redundant path mitigation
 options 	GEOM_GATE		# Userland services.
 options 	GEOM_GPT		# GPT partitioning
 options 	GEOM_LABEL		# Providers labelization.
 options 	GEOM_MBR		# DOS/MBR partitioning
 options 	GEOM_MIRROR		# Disk mirroring.
 options 	GEOM_NOP		# Test class.
 options 	GEOM_PC98		# NEC PC9800 partitioning
 options 	GEOM_RAID3		# RAID3 functionality.
 options 	GEOM_STRIPE		# Disk striping.
 options 	GEOM_SUNLABEL		# Sun/Solaris partitioning
 options 	GEOM_UZIP		# Read-only compressed disks
 options 	GEOM_VOL		# Volume names from UFS superblock
 
 #
 # The root device and filesystem type can be compiled in;
 # this provides a fallback option if the root device cannot
 # be correctly guessed by the bootstrap code, or an override if
 # the RB_DFLTROOT flag (-r) is specified when booting the kernel.
 #
 options 	ROOTDEVNAME=\"ufs:da0s2e\"
 
 
 #####################################################################
 # Scheduler options:
 #
 # Specifying one of SCHED_4BSD or SCHED_ULE is mandatory.  These options
 # select which scheduler is compiled in.
 #
 # SCHED_4BSD is the historical, proven, BSD scheduler.  It has a global run
 # queue and no cpu affinity which makes it suboptimal for SMP.  It has very
 # good interactivity and priority selection.
 #
 # SCHED_ULE is a new scheduler that has been designed for SMP and has some
 # advantages for UP as well.  It is intended to replace the 4BSD scheduler
 # over time.
 #
 options 	SCHED_4BSD
 #options 	SCHED_ULE
 
 #####################################################################
 # SMP OPTIONS:
 #
 # SMP enables building of a Symmetric MultiProcessor Kernel.
 
 # Mandatory:
 options 	SMP			# Symmetric MultiProcessor Kernel
 
 # ADAPTIVE_MUTEXES changes the behavior of blocking mutexes to spin
 # if the thread that currently owns the mutex is executing on another
 # CPU.  This behaviour is enabled by default, so this option can be used
 # to disable it.
 options 	NO_ADAPTIVE_MUTEXES
 
 # ADAPTIVE_GIANT causes the Giant lock to also be made adaptive when
 # running without NO_ADAPTIVE_MUTEXES.  Normally, because Giant is assumed
 # to be held for extended periods, contention on Giant will cause a thread
 # to sleep rather than spinning.
 options 	ADAPTIVE_GIANT
 
 # MUTEX_NOINLINE forces mutex operations to call functions to perform each
 # operation rather than inlining the simple cases.  This can be used to
 # shrink the size of the kernel text segment.  Note that this behavior is
 # already implied by the INVARIANT_SUPPORT, INVARIANTS, MUTEX_PROFILING,
 # and WITNESS options.
 options 	MUTEX_NOINLINE
 
 # MUTEX_WAKE_ALL changes the mutex unlock algorithm to wake all waiters
 # when a contested mutex is released rather than just awaking the highest
 # priority waiter.
 options 	MUTEX_WAKE_ALL
 
 # SMP Debugging Options:
 #
+# PREEMPTION allows the threads that are in the kernel to be preempted
+#	  by higher priority threads.  It helps with interactivity and
+#	  allows interrupt threads to run sooner rather than waiting.
+#	  WARNING! Only tested on alpha, amd64, and i386.
 # FULL_PREEMPTION instructs the kernel to preempt non-realtime kernel
 #	  threads.  It sole use is to expose race conditions and other
 #	  bugs during development.  Enabling this option will reduce
 #	  performance and increase the frequency of kernel panics by
 #	  design.  If you aren't sure that you need it then you don't.
-#         DON'T TURN THIS ON.
+#	  Relies on the PREEMPTION option.  DON'T TURN THIS ON.
 # MUTEX_DEBUG enables various extra assertions in the mutex code.
 # SLEEPQUEUE_PROFILING enables rudimentary profiling of the hash table
 #	  used to hold active sleep queues.
 # TURNSTILE_PROFILING enables rudimentary profiling of the hash table
 #	  used to hold active lock queues.
 # WITNESS enables the witness code which detects deadlocks and cycles
 #         during locking operations.
 # WITNESS_KDB causes the witness code to drop into the kernel debugger if
 #	  a lock hierarchy violation occurs or if locks are held when going to
 #	  sleep.
 # WITNESS_SKIPSPIN disables the witness checks on spin mutexes.
+options		PREEMPTION
 options 	FULL_PREEMPTION
 options 	MUTEX_DEBUG
 options 	WITNESS
 options 	WITNESS_KDB
 options 	WITNESS_SKIPSPIN
 
 # MUTEX_PROFILING - Profiling mutual exclusion locks (mutexes).  See
 # MUTEX_PROFILING(9) for details.
 options 	MUTEX_PROFILING
 # Set the number of buffers and the hash size.  The hash size MUST be larger
 # than the number of buffers.  Hash size should be prime.
 options		MPROF_BUFFERS="1536"
 options		MPROF_HASH_SIZE="1543"
 
 # Profiling for internal hash tables.
 options 	SLEEPQUEUE_PROFILING
 options 	TURNSTILE_PROFILING
 
 
 #####################################################################
 # COMPATIBILITY OPTIONS
 
 #
 # Implement system calls compatible with 4.3BSD and older versions of
 # FreeBSD.  You probably do NOT want to remove this as much current code
 # still relies on the 4.3 emulation.  Note that some architectures that
 # are supported by FreeBSD do not include support for certain important
 # aspects of this compatibility option, namely those related to the
 # signal delivery mechanism.
 #
 options 	COMPAT_43
 
 # Enable FreeBSD4 compatibility syscalls
 options 	COMPAT_FREEBSD4
 
 #
 # These three options provide support for System V Interface
 # Definition-style interprocess communication, in the form of shared
 # memory, semaphores, and message queues, respectively.
 #
 options 	SYSVSHM
 options 	SYSVSEM
 options 	SYSVMSG
 
 
 #####################################################################
 # DEBUGGING OPTIONS
 
 #
 # Compile with kernel debugger related code.
 #
 options 	KDB
 
 #
 # Print a stack trace of the current thread on the console for a panic.
 #
 options 	KDB_TRACE
 
 #
 # Don't enter the debugger for a panic. Intended for unattended operation
 # where you may want to enter the debugger from the console, but still want
 # the machine to recover from a panic.
 #
 options 	KDB_UNATTENDED
 
 #
 # Enable the ddb debugger backend.
 #
 options 	DDB
 
 #
 # Print the numerical value of symbols in addition to the symbolic
 # representation.
 #
 options 	DDB_NUMSYM
 
 #
 # Enable the remote gdb debugger backend.
 #
 options 	GDB
 
 #
 # KTRACE enables the system-call tracing facility ktrace(2).  To be more
 # SMP-friendly, KTRACE uses a worker thread to process most trace events
 # asynchronously to the thread generating the event.  This requires a
 # pre-allocated store of objects representing trace events.  The
 # KTRACE_REQUEST_POOL option specifies the initial size of this store.
 # The size of the pool can be adjusted both at boottime and runtime via
 # the kern.ktrace_request_pool tunable and sysctl.
 #
 options 	KTRACE			#kernel tracing
 options 	KTRACE_REQUEST_POOL=101
 
 #
 # KTR is a kernel tracing mechanism imported from BSD/OS.  Currently it
 # has no userland interface aside from a few sysctl's.  It is enabled with
 # the KTR option.  KTR_ENTRIES defines the number of entries in the circular
 # trace buffer.  KTR_COMPILE defines the mask of events to compile into the
 # kernel as defined by the KTR_* constants in <sys/ktr.h>.  KTR_MASK defines the
 # initial value of the ktr_mask variable which determines at runtime what
 # events to trace.  KTR_CPUMASK determines which CPU's log events, with
 # bit X corresponding to cpu X.  KTR_VERBOSE enables dumping of KTR events
 # to the console by default.  This functionality can be toggled via the
 # debug.ktr_verbose sysctl and defaults to off if KTR_VERBOSE is not defined.
 #
 options 	KTR
 options 	KTR_ENTRIES=1024
 options 	KTR_COMPILE=(KTR_INTR|KTR_PROC)
 options 	KTR_MASK=KTR_INTR
 options 	KTR_CPUMASK=0x3
 options 	KTR_VERBOSE
 
 #
 # The INVARIANTS option is used in a number of source files to enable
 # extra sanity checking of internal structures.  This support is not
 # enabled by default because of the extra time it would take to check
 # for these conditions, which can only occur as a result of
 # programming errors.
 #
 options 	INVARIANTS
 
 #
 # The INVARIANT_SUPPORT option makes us compile in support for
 # verifying some of the internal structures.  It is a prerequisite for
 # 'INVARIANTS', as enabling 'INVARIANTS' will make these functions be
 # called.  The intent is that you can set 'INVARIANTS' for single
 # source files (by changing the source file or specifying it on the
 # command line) if you have 'INVARIANT_SUPPORT' enabled.  Also, if you
 # wish to build a kernel module with 'INVARIANTS', then adding
 # 'INVARIANT_SUPPORT' to your kernel will provide all the necessary
 # infrastructure without the added overhead.
 #
 options 	INVARIANT_SUPPORT
 
 #
 # The DIAGNOSTIC option is used to enable extra debugging information
 # from some parts of the kernel.  As this makes everything more noisy,
 # it is disabled by default.
 #
 options 	DIAGNOSTIC
 
 #
 # REGRESSION causes optional kernel interfaces necessary only for regression
 # testing to be enabled.  These interfaces may constitute security risks
 # when enabled, as they permit processes to easily modify aspects of the
 # run-time environment to reproduce unlikely or unusual (possibly normally
 # impossible) scenarios.
 #
 options 	REGRESSION
 
 #
 # RESTARTABLE_PANICS allows one to continue from a panic as if it were
 # a call to the debugger via the Debugger() function instead.  It is only
 # useful if a kernel debugger is present.  To restart from a panic, reset
 # the panicstr variable to NULL and continue execution.  This option is
 # for development use only and should NOT be used in production systems
 # to "workaround" a panic.
 #
 #options 	RESTARTABLE_PANICS
 
 #
 # This option let some drivers co-exist that can't co-exist in a running
 # system.  This is used to be able to compile all kernel code in one go for
 # quality assurance purposes (like this file, which the option takes it name
 # from.)
 #
 options 	COMPILING_LINT
 
 
 #####################################################################
 # NETWORKING OPTIONS
 
 #
 # Protocol families:
 #  Only the INET (Internet) family is officially supported in FreeBSD.
 #
 options 	INET			#Internet communications protocols
 options 	INET6			#IPv6 communications protocols
 options 	IPSEC			#IP security
 options 	IPSEC_ESP		#IP security (crypto; define w/ IPSEC)
 options 	IPSEC_DEBUG		#debug for IP security
 #
 # Set IPSEC_FILTERGIF to force packets coming through a gif tunnel
 # to be processed by any configured packet filtering (ipfw, ipf).
 # The default is that packets coming from a tunnel are _not_ processed;
 # they are assumed trusted.
 #
 # IPSEC history is preserved for such packets, and can be filtered
 # using ipfw(8)'s 'ipsec' keyword, when this option is enabled.
 #
 #options 	IPSEC_FILTERGIF		#filter ipsec packets from a tunnel
 
 #options 	FAST_IPSEC		#new IPsec (cannot define w/ IPSEC)
 
 options 	IPX			#IPX/SPX communications protocols
 options 	IPXIP			#IPX in IP encapsulation (not available)
 
 #options 	NCP			#NetWare Core protocol
 
 options 	NETATALK		#Appletalk communications protocols
 options 	NETATALKDEBUG		#Appletalk debugging
 
 #
 # SMB/CIFS requester
 # NETSMB enables support for SMB protocol, it requires LIBMCHAIN and LIBICONV
 # options.
 # NETSMBCRYPTO enables support for encrypted passwords.
 options 	NETSMB			#SMB/CIFS requester
 options 	NETSMBCRYPTO		#encrypted password support for SMB
 
 # mchain library. It can be either loaded as KLD or compiled into kernel
 options 	LIBMCHAIN
 
 # altq(9). Enable the base part of the hooks with the ALTQ option.
 # Individual disciplines must be built into the base system and can not be
 # loaded as modules at this point. In order to build a SMP kernel you must
 # also have the ALTQ_NOPCC option.
 options 	ALTQ
 options 	ALTQ_CBQ	# Class Bases Queueing
 options 	ALTQ_RED	# Random Early Drop
 options 	ALTQ_RIO	# RED In/Out
 options 	ALTQ_HFSC	# Hierarchical Packet Scheduler
 options 	ALTQ_CDNR	# Traffic conditioner
 options 	ALTQ_PRIQ	# Priority Queueing
 options 	ALTQ_NOPCC	# Required for SMP build
 options 	ALTQ_DEBUG
 
 # netgraph(4). Enable the base netgraph code with the NETGRAPH option.
 # Individual node types can be enabled with the corresponding option
 # listed below; however, this is not strictly necessary as netgraph
 # will automatically load the corresponding KLD module if the node type
 # is not already compiled into the kernel. Each type below has a
 # corresponding man page, e.g., ng_async(8).
 options 	NETGRAPH		#netgraph(4) system
 options 	NETGRAPH_ASYNC
 options 	NETGRAPH_ATMLLC
 options 	NETGRAPH_ATM_ATMPIF
 options 	NETGRAPH_BLUETOOTH		# ng_bluetooth(4)
 options 	NETGRAPH_BLUETOOTH_BT3C		# ng_bt3c(4)
 options 	NETGRAPH_BLUETOOTH_H4		# ng_h4(4)
 options 	NETGRAPH_BLUETOOTH_HCI		# ng_hci(4)
 options 	NETGRAPH_BLUETOOTH_L2CAP	# ng_l2cap(4)
 options 	NETGRAPH_BLUETOOTH_SOCKET	# ng_btsocket(4)
 options 	NETGRAPH_BLUETOOTH_UBT		# ng_ubt(4)
 options 	NETGRAPH_BLUETOOTH_UBTBCMFW	# ubtbcmfw(4)
 options 	NETGRAPH_BPF
 options 	NETGRAPH_BRIDGE
 options 	NETGRAPH_CISCO
 options 	NETGRAPH_DEVICE
 options 	NETGRAPH_ECHO
 options 	NETGRAPH_EIFACE
 options 	NETGRAPH_ETHER
 options 	NETGRAPH_FEC
 options 	NETGRAPH_FRAME_RELAY
 options 	NETGRAPH_GIF
 options 	NETGRAPH_GIF_DEMUX
 options 	NETGRAPH_HOLE
 options 	NETGRAPH_IFACE
 options 	NETGRAPH_IP_INPUT
 options 	NETGRAPH_KSOCKET
 options 	NETGRAPH_L2TP
 options 	NETGRAPH_LMI
 # MPPC compression requires proprietary files (not included)
 #options 	NETGRAPH_MPPC_COMPRESSION
 options 	NETGRAPH_MPPC_ENCRYPTION
 options 	NETGRAPH_ONE2MANY
 options 	NETGRAPH_PPP
 options 	NETGRAPH_PPPOE
 options 	NETGRAPH_PPTPGRE
 options 	NETGRAPH_RFC1490
 options 	NETGRAPH_SOCKET
 options 	NETGRAPH_SPLIT
 options 	NETGRAPH_SPPP
 options 	NETGRAPH_TEE
 options 	NETGRAPH_TTY
 options 	NETGRAPH_UI
 options 	NETGRAPH_VJC
 
 # NgATM - Netgraph ATM
 options 	NGATM_ATM
 options 	NGATM_ATMBASE
 options 	NGATM_SSCOP
 options 	NGATM_SSCFU
 options 	NGATM_UNI
 options 	NGATM_CCATM
 
 device		mn	# Munich32x/Falc54 Nx64kbit/sec cards.
 device		musycc	# LMC/SBE LMC1504 quad T1/E1
 
 #
 # Network interfaces:
 #  The `loop' device is MANDATORY when networking is enabled.
 #  The `ether' device provides generic code to handle
 #  Ethernets; it is MANDATORY when an Ethernet device driver is
 #  configured or token-ring is enabled.
 #  The `wlan' device provides generic code to support 802.11
 #  drivers, including host AP mode; it is MANDATORY for the wi
 #  driver and will eventually be required by all 802.11 drivers.
 #  The `fddi' device provides generic code to support FDDI.
 #  The `arcnet' device provides generic code to support Arcnet.
 #  The `sppp' device serves a similar role for certain types
 #  of synchronous PPP links (like `cx', `ar').
 #  The `sl' device implements the Serial Line IP (SLIP) service.
 #  The `ppp' device implements the Point-to-Point Protocol.
 #  The `bpf' device enables the Berkeley Packet Filter.  Be
 #  aware of the legal and administrative consequences of enabling this
 #  option.  The number of devices determines the maximum number of
 #  simultaneous BPF clients programs runnable.
 #  The `disc' device implements a minimal network interface,
 #  which throws away all packets sent and never receives any.  It is
 #  included for testing purposes.  This shows up as the `ds' interface.
 #  The `tap' device is a pty-like virtual Ethernet interface
 #  The `tun' device implements (user-)ppp and nos-tun
 #  The `gif' device implements IPv6 over IP4 tunneling,
 #  IPv4 over IPv6 tunneling, IPv4 over IPv4 tunneling and
 #  IPv6 over IPv6 tunneling.
 #  The `gre' device implements two types of IP4 over IP4 tunneling:
 #  GRE and MOBILE, as specified in the RFC1701 and RFC2004.
 #  The XBONEHACK option allows the same pair of addresses to be configured on
 #  multiple gif interfaces.
 #  The `faith' device captures packets sent to it and diverts them
 #  to the IPv4/IPv6 translation daemon.
 #  The `stf' device implements 6to4 encapsulation.
 #  The `ef' device provides support for multiple ethernet frame types
 #  specified via ETHER_* options. See ef(4) for details.
 #
 # The pf packet filter consists of three devices:
 #  The `pf' device provides /dev/pf and the firewall code itself.
 #  The `pflog' device provides the pflog0 interface which logs packets.
 #  The `pfsync' device provides the pfsync0 interface used for
 #   synchronization of firewall state tables (over the net).
 #
 # The PPP_BSDCOMP option enables support for compress(1) style entire
 # packet compression, the PPP_DEFLATE is for zlib/gzip style compression.
 # PPP_FILTER enables code for filtering the ppp data stream and selecting
 # events for resetting the demand dial activity timer - requires bpf.
 # See pppd(8) for more details.
 #
 device		ether			#Generic Ethernet
 device		vlan			#VLAN support
 device		wlan			#802.11 support
 device		token			#Generic TokenRing
 device		fddi			#Generic FDDI
 device		arcnet			#Generic Arcnet
 device		sppp			#Generic Synchronous PPP
 device		loop			#Network loopback device
 device		bpf			#Berkeley packet filter
 device		disc			#Discard device (ds0, ds1, etc)
 device		tap			#Virtual Ethernet driver
 device		tun			#Tunnel driver (ppp(8), nos-tun(8))
 device		sl			#Serial Line IP
 device		gre			#IP over IP tunneling
 device		pf			#PF OpenBSD packet-filter firewall
 device		pflog			#logging support interface for PF
 device		pfsync			#synchronization interface for PF
 device		ppp			#Point-to-point protocol
 options 	PPP_BSDCOMP		#PPP BSD-compress support
 options 	PPP_DEFLATE		#PPP zlib/deflate/gzip support
 options 	PPP_FILTER		#enable bpf filtering (needs bpf)
 
 device		ef			# Multiple ethernet frames support
 options 	ETHER_II		# enable Ethernet_II frame
 options 	ETHER_8023		# enable Ethernet_802.3 (Novell) frame
 options 	ETHER_8022		# enable Ethernet_802.2 frame
 options 	ETHER_SNAP		# enable Ethernet_802.2/SNAP frame
 
 # for IPv6
 device		gif			#IPv6 and IPv4 tunneling
 options 	XBONEHACK
 device		faith			#for IPv6 and IPv4 translation
 device		stf			#6to4 IPv6 over IPv4 encapsulation
 
 #
 # Internet family options:
 #
 # MROUTING enables the kernel multicast packet forwarder, which works
 # with mrouted(8).
 #
 # PIM enables Protocol Independent Multicast in the kernel.
 # Requires MROUTING enabled.
 #
 # IPFIREWALL enables support for IP firewall construction, in
 # conjunction with the `ipfw' program.  IPFIREWALL_VERBOSE sends
 # logged packets to the system logger.  IPFIREWALL_VERBOSE_LIMIT
 # limits the number of times a matching entry can be logged.
 #
 # WARNING:  IPFIREWALL defaults to a policy of "deny ip from any to any"
 # and if you do not add other rules during startup to allow access,
 # YOU WILL LOCK YOURSELF OUT.  It is suggested that you set firewall_type=open
 # in /etc/rc.conf when first enabling this feature, then refining the
 # firewall rules in /etc/rc.firewall after you've tested that the new kernel
 # feature works properly.
 #
 # IPFIREWALL_DEFAULT_TO_ACCEPT causes the default rule (at boot) to
 # allow everything.  Use with care, if a cracker can crash your
 # firewall machine, they can get to your protected machines.  However,
 # if you are using it as an as-needed filter for specific problems as
 # they arise, then this may be for you.  Changing the default to 'allow'
 # means that you won't get stuck if the kernel and /sbin/ipfw binary get
 # out of sync.
 #
 # IPDIVERT enables the divert IP sockets, used by ``ipfw divert''
 #
 # IPFIREWALL_FORWARD enables changing of the packet destination either
 # to do some sort of policy routing or transparent proxying.  Used by
 # ``ipfw forward''.
 #
 # IPSTEALTH enables code to support stealth forwarding (i.e., forwarding
 # packets without touching the ttl).  This can be useful to hide firewalls
 # from traceroute and similar tools.
 #
 # TCPDEBUG enables code which keeps traces of the TCP state machine
 # for sockets with the SO_DEBUG option set, which can then be examined
 # using the trpt(8) utility.
 #
 options 	MROUTING		# Multicast routing
 options 	PIM			# Protocol Independent Multicast
 options 	IPFIREWALL		#firewall
 options 	IPFIREWALL_VERBOSE	#enable logging to syslogd(8)
 options 	IPFIREWALL_VERBOSE_LIMIT=100	#limit verbosity
 options 	IPFIREWALL_DEFAULT_TO_ACCEPT	#allow everything by default
 options		IPFIREWALL_FORWARD	#packet destination changes
 options 	IPV6FIREWALL		#firewall for IPv6
 options 	IPV6FIREWALL_VERBOSE
 options 	IPV6FIREWALL_VERBOSE_LIMIT=100
 options 	IPV6FIREWALL_DEFAULT_TO_ACCEPT
 options 	IPDIVERT		#divert sockets
 options 	IPFILTER		#ipfilter support
 options 	IPFILTER_LOG		#ipfilter logging
 options 	IPFILTER_DEFAULT_BLOCK	#block all packets by default
 options 	IPSTEALTH		#support for stealth forwarding
 options 	TCPDEBUG
 
 # The MBUF_STRESS_TEST option enables options which create
 # various random failures / extreme cases related to mbuf
 # functions.  See mbuf(9) for a list of available test cases.
 options 	MBUF_STRESS_TEST
 
 # Statically Link in accept filters
 options 	ACCEPT_FILTER_DATA
 options 	ACCEPT_FILTER_HTTP
 
 # TCP_DROP_SYNFIN adds support for ignoring TCP packets with SYN+FIN. This
 # prevents nmap et al. from identifying the TCP/IP stack, but breaks support
 # for RFC1644 extensions and is not recommended for web servers.
 #
 options 	TCP_DROP_SYNFIN		#drop TCP packets with SYN+FIN
 
 # TCP_SIGNATURE adds support for RFC 2385 (TCP-MD5) digests. These are
 # carried in TCP option 19. This option is commonly used to protect
 # TCP sessions (e.g. BGP) where IPSEC is not available nor desirable.
 # This is enabled on a per-socket basis using the TCP_MD5SIG socket option.
 # This requires the use of 'device crypto', 'options FAST_IPSEC', and
 # 'device cryptodev' as it depends on the non-KAME IPSEC SADB code.
 #options 	TCP_SIGNATURE		#include support for RFC 2385
 
 # DUMMYNET enables the "dummynet" bandwidth limiter.  You need IPFIREWALL
 # as well.  See dummynet(4) and ipfw(8) for more info.  When you run
 # DUMMYNET it is advisable to also have "options HZ=1000" to achieve a
 # smoother scheduling of the traffic.
 #
 # BRIDGE enables bridging between ethernet cards -- see bridge(4).
 # You can use IPFIREWALL and DUMMYNET together with bridging.
 #
 options 	DUMMYNET
 options 	BRIDGE
 
 # Zero copy sockets support.  This enables "zero copy" for sending and
 # receiving data via a socket.  The send side works for any type of NIC,
 # the receive side only works for NICs that support MTUs greater than the
 # page size of your architecture and that support header splitting.  See
 # zero_copy(9) for more details.
 options 	ZERO_COPY_SOCKETS
 
 #
 # ATM (HARP version) options
 #
 # ATM_CORE includes the base ATM functionality code.  This must be included
 #	for ATM support.
 #
 # ATM_IP includes support for running IP over ATM.
 #
 # At least one (and usually only one) of the following signalling managers
 # must be included (note that all signalling managers include PVC support):
 # ATM_SIGPVC includes support for the PVC-only signalling manager `sigpvc'.
 # ATM_SPANS includes support for the `spans' signalling manager, which runs
 #	the FORE Systems's proprietary SPANS signalling protocol.
 # ATM_UNI includes support for the `uni30' and `uni31' signalling managers,
 #	which run the ATM Forum UNI 3.x signalling protocols.
 #
 # The `hfa' driver provides support for the FORE Systems, Inc.
 # PCA-200E ATM PCI Adapter.
 #
 # The `harp' pseudo-driver makes all NATM interface drivers available to HARP.
 #
 options 	ATM_CORE		#core ATM protocol family
 options 	ATM_IP			#IP over ATM support
 options 	ATM_SIGPVC		#SIGPVC signalling manager
 options 	ATM_SPANS		#SPANS signalling manager
 options 	ATM_UNI			#UNI signalling manager
 
 device		hfa			#FORE PCA-200E ATM PCI
 device		harp			#Pseudo-interface for NATM
 
 
 #####################################################################
 # FILESYSTEM OPTIONS
 
 #
 # Only the root, /usr, and /tmp filesystems need be statically
 # compiled; everything else will be automatically loaded at mount
 # time.  (Exception: the UFS family--- FFS --- cannot
 # currently be demand-loaded.)  Some people still prefer to statically
 # compile other filesystems as well.
 #
 # NB: The NULL, PORTAL, UMAP and UNION filesystems are known to be
 # buggy, and WILL panic your system if you attempt to do anything with
 # them.  They are included here as an incentive for some enterprising
 # soul to sit down and fix them.
 #
 
 # One of these is mandatory:
 options 	FFS			#Fast filesystem
 options 	NFSCLIENT		#Network File System client
 
 # The rest are optional:
 options 	CD9660			#ISO 9660 filesystem
 options 	FDESCFS			#File descriptor filesystem
 options 	HPFS			#OS/2 File system
 options 	MSDOSFS			#MS DOS File System (FAT, FAT32)
 options 	NFSSERVER		#Network File System server
 options 	NTFS			#NT File System
 options 	NULLFS			#NULL filesystem
 # Broken (depends on NCP):
 #options 	NWFS			#NetWare filesystem
 options 	PORTALFS		#Portal filesystem
 options 	PROCFS			#Process filesystem (requires PSEUDOFS)
 options 	PSEUDOFS		#Pseudo-filesystem framework
 options 	SMBFS			#SMB/CIFS filesystem
 options 	UDF			#Universal Disk Format
 # Broken (seriously (functionally) broken):
 #options 	UMAPFS			#UID map filesystem
 options 	UNIONFS			#Union filesystem
 # The xFS_ROOT options REQUIRE the associated ``options xFS''
 options 	NFS_ROOT		#NFS usable as root device
 
 # Soft updates is a technique for improving filesystem speed and
 # making abrupt shutdown less risky.
 #
 options 	SOFTUPDATES
 
 # Extended attributes allow additional data to be associated with files,
 # and is used for ACLs, Capabilities, and MAC labels.
 # See src/sys/ufs/ufs/README.extattr for more information.
 options 	UFS_EXTATTR
 options 	UFS_EXTATTR_AUTOSTART
 
 # Access Control List support for UFS filesystems.  The current ACL
 # implementation requires extended attribute support, UFS_EXTATTR,
 # for the underlying filesystem.
 # See src/sys/ufs/ufs/README.acls for more information.
 options 	UFS_ACL
 
 # Directory hashing improves the speed of operations on very large
 # directories at the expense of some memory.
 options 	UFS_DIRHASH
 
 # Make space in the kernel for a root filesystem on a md device.
 # Define to the number of kilobytes to reserve for the filesystem.
 options 	MD_ROOT_SIZE=10
 
 # Make the md device a potential root device, either with preloaded
 # images of type mfs_root or md_root.
 options 	MD_ROOT
 
 # Disk quotas are supported when this option is enabled.
 options 	QUOTA			#enable disk quotas
 
 # If you are running a machine just as a fileserver for PC and MAC
 # users, using SAMBA or Netatalk, you may consider setting this option
 # and keeping all those users' directories on a filesystem that is
 # mounted with the suiddir option. This gives new files the same
 # ownership as the directory (similar to group). It's a security hole
 # if you let these users run programs, so confine it to file-servers
 # (but it'll save you lots of headaches in those cases). Root owned
 # directories are exempt and X bits are cleared. The suid bit must be
 # set on the directory as well; see chmod(1) PC owners can't see/set
 # ownerships so they keep getting their toes trodden on. This saves
 # you all the support calls as the filesystem it's used on will act as
 # they expect: "It's my dir so it must be my file".
 #
 options 	SUIDDIR
 
 # NFS options:
 options 	NFS_MINATTRTIMO=3	# VREG attrib cache timeout in sec
 options 	NFS_MAXATTRTIMO=60
 options 	NFS_MINDIRATTRTIMO=30	# VDIR attrib cache timeout in sec
 options 	NFS_MAXDIRATTRTIMO=60
 options 	NFS_GATHERDELAY=10	# Default write gather delay (msec)
 options 	NFS_WDELAYHASHSIZ=16	# and with this
 options 	NFS_DEBUG		# Enable NFS Debugging
 
 # Coda stuff:
 options 	CODA			#CODA filesystem.
 device		vcoda			#coda minicache <-> venus comm.
 # Use the old Coda 5.x venus<->kernel interface instead of the new
 # realms-aware 6.x protocol.
 #options 	CODA_COMPAT_5
 
 #
 # Add support for the EXT2FS filesystem of Linux fame.  Be a bit
 # careful with this - the ext2fs code has a tendency to lag behind
 # changes and not be exercised very much, so mounting read/write could
 # be dangerous (and even mounting read only could result in panics.)
 #
 options 	EXT2FS
 
 # Use real implementations of the aio_* system calls.  There are numerous
 # stability and security issues in the current aio code that make it
 # unsuitable for inclusion on machines with untrusted local users.
 options 	VFS_AIO
 
 # Cryptographically secure random number generator; /dev/random
 device		random
 
 # The system memory devices; /dev/mem, /dev/kmem
 device		mem
 
 # Optional character code conversion support with LIBICONV.
 # Each option requires their base file system and LIBICONV.
 options 	CD9660_ICONV
 options 	MSDOSFS_ICONV
 options 	NTFS_ICONV
 options 	UDF_ICONV
 
 # Experimental support for large MS-DOS filesystems.
 #
 # WARNING: This uses at least 32 bytes of kernel memory (which is not
 # reclaimed until the FS is unmounted) for each file on disk to map
 # between the 32-bit inode numbers used by VFS and the 64-bit pseudo-inode
 # numbers used internally by msdosfs. This is only safe to use in certain
 # controlled situations (e.g. read-only FS with less than 1 million files).
 # Since the mappings do not persist across unmounts (or reboots), these
 # filesystems are not suitable for exporting through NFS, or any other
 # application that requires fixed inode numbers.
 options 	MSDOSFS_LARGE
 
 
 #####################################################################
 # POSIX P1003.1B
 
 # Real time extensions added in the 1993 Posix
 # _KPOSIX_PRIORITY_SCHEDULING: Build in _POSIX_PRIORITY_SCHEDULING
 
 options 	_KPOSIX_PRIORITY_SCHEDULING
 # p1003_1b_semaphores are very experimental,
 # user should be ready to assist in debugging if problems arise.
 options 	P1003_1B_SEMAPHORES
 
 
 #####################################################################
 # SECURITY POLICY PARAMETERS
 
 # Support for Mandatory Access Control (MAC):
 options 	MAC
 options 	MAC_BIBA
 options 	MAC_BSDEXTENDED
 options 	MAC_DEBUG
 options 	MAC_IFOFF
 options 	MAC_LOMAC
 options 	MAC_MLS
 options 	MAC_NONE
 options 	MAC_PARTITION
 options 	MAC_PORTACL
 options 	MAC_SEEOTHERUIDS
 options 	MAC_STUB
 options 	MAC_TEST
 
 
 #####################################################################
 # CLOCK OPTIONS
 
 # The granularity of operation is controlled by the kernel option HZ whose
 # default value (100) means a granularity of 10ms (1s/HZ).
 # Some subsystems, such as DUMMYNET, might benefit from a smaller
 # granularity such as 1ms or less, for a smoother scheduling of packets.
 # Consider, however, that reducing the granularity too much might
 # cause excessive overhead in clock interrupt processing,
 # potentially causing ticks to be missed and thus actually reducing
 # the accuracy of operation.
 
 options 	HZ=100
 
 # Enable support for the kernel PLL to use an external PPS signal,
 # under supervision of [x]ntpd(8)
 # More info in ntpd documentation: http://www.eecis.udel.edu/~ntp
 
 options 	PPS_SYNC
 
 
 #####################################################################
 # SCSI DEVICES
 
 # SCSI DEVICE CONFIGURATION
 
 # The SCSI subsystem consists of the `base' SCSI code, a number of
 # high-level SCSI device `type' drivers, and the low-level host-adapter
 # device drivers.  The host adapters are listed in the ISA and PCI
 # device configuration sections below.
 #
 # It is possible to wire down your SCSI devices so that a given bus,
 # target, and LUN always come on line as the same device unit.  In
 # earlier versions the unit numbers were assigned in the order that
 # the devices were probed on the SCSI bus.  This means that if you
 # removed a disk drive, you may have had to rewrite your /etc/fstab
 # file, and also that you had to be careful when adding a new disk
 # as it may have been probed earlier and moved your device configuration
 # around.  (See also option GEOM_VOL for a different solution to this
 # problem.)
 
 # This old behavior is maintained as the default behavior.  The unit
 # assignment begins with the first non-wired down unit for a device
 # type.  For example, if you wire a disk as "da3" then the first
 # non-wired disk will be assigned da4.
 
 # The syntax for wiring down devices is:
 
 hint.scbus.0.at="ahc0"
 hint.scbus.1.at="ahc1"
 hint.scbus.1.bus="0"
 hint.scbus.3.at="ahc2"
 hint.scbus.3.bus="0"
 hint.scbus.2.at="ahc2"
 hint.scbus.2.bus="1"
 hint.da.0.at="scbus0"
 hint.da.0.target="0"
 hint.da.0.unit="0"
 hint.da.1.at="scbus3"
 hint.da.1.target="1"
 hint.da.2.at="scbus2"
 hint.da.2.target="3"
 hint.sa.1.at="scbus1"
 hint.sa.1.target="6"
 
 # "units" (SCSI logical unit number) that are not specified are
 # treated as if specified as LUN 0.
 
 # All SCSI devices allocate as many units as are required.
 
 # The ch driver drives SCSI Media Changer ("jukebox") devices.
 #
 # The da driver drives SCSI Direct Access ("disk") and Optical Media
 # ("WORM") devices.
 #
 # The sa driver drives SCSI Sequential Access ("tape") devices.
 #
 # The cd driver drives SCSI Read Only Direct Access ("cd") devices.
 #
 # The ses driver drives SCSI Environment Services ("ses") and
 # SAF-TE ("SCSI Accessible Fault-Tolerant Enclosure") devices.
 #
 # The pt driver drives SCSI Processor devices.
 #
 #
 # Target Mode support is provided here but also requires that a SIM
 # (SCSI Host Adapter Driver) provide support as well.
 #
 # The targ driver provides target mode support as a Processor type device.
 # It exists to give the minimal context necessary to respond to Inquiry
 # commands. There is a sample user application that shows how the rest
 # of the command support might be done in /usr/share/examples/scsi_target.
 #
 # The targbh driver provides target mode support and exists to respond
 # to incoming commands that do not otherwise have a logical unit assigned
 # to them.
 #
 # The "unknown" device (uk? in pre-2.0.5) is now part of the base SCSI
 # configuration as the "pass" driver.
 
 device		scbus		#base SCSI code
 device		ch		#SCSI media changers
 device		da		#SCSI direct access devices (aka disks)
 device		sa		#SCSI tapes
 device		cd		#SCSI CD-ROMs
 device		ses		#SCSI Environmental Services (and SAF-TE)
 device		pt		#SCSI processor
 device		targ		#SCSI Target Mode Code
 device		targbh		#SCSI Target Mode Blackhole Device
 device		pass		#CAM passthrough driver
 
 # CAM OPTIONS:
 # debugging options:
 # -- NOTE --  If you specify one of the bus/target/lun options, you must
 #             specify them all!
 # CAMDEBUG: When defined enables debugging macros
 # CAM_DEBUG_BUS:  Debug the given bus.  Use -1 to debug all busses.
 # CAM_DEBUG_TARGET:  Debug the given target.  Use -1 to debug all targets.
 # CAM_DEBUG_LUN:  Debug the given lun.  Use -1 to debug all luns.
 # CAM_DEBUG_FLAGS:  OR together CAM_DEBUG_INFO, CAM_DEBUG_TRACE,
 #                   CAM_DEBUG_SUBTRACE, and CAM_DEBUG_CDB
 #
 # CAM_MAX_HIGHPOWER: Maximum number of concurrent high power (start unit) cmds
 # CAM_NEW_TRAN_CODE: this is the new transport layer code that will be switched
 #			to soon
 # SCSI_NO_SENSE_STRINGS: When defined disables sense descriptions
 # SCSI_NO_OP_STRINGS: When defined disables opcode descriptions
 # SCSI_DELAY: The number of MILLISECONDS to freeze the SIM (scsi adapter)
 #             queue after a bus reset, and the number of milliseconds to
 #             freeze the device queue after a bus device reset.  This
 #             can be changed at boot and runtime with the
 #             kern.cam.scsi_delay tunable/sysctl.
 options 	CAMDEBUG
 options 	CAM_DEBUG_BUS=-1
 options 	CAM_DEBUG_TARGET=-1
 options 	CAM_DEBUG_LUN=-1
 options 	CAM_DEBUG_FLAGS=(CAM_DEBUG_INFO|CAM_DEBUG_TRACE|CAM_DEBUG_CDB)
 options 	CAM_MAX_HIGHPOWER=4
 options 	SCSI_NO_SENSE_STRINGS
 options 	SCSI_NO_OP_STRINGS
 options 	SCSI_DELAY=8000	# Be pessimistic about Joe SCSI device
 
 # Options for the CAM CDROM driver:
 # CHANGER_MIN_BUSY_SECONDS: Guaranteed minimum time quantum for a changer LUN
 # CHANGER_MAX_BUSY_SECONDS: Maximum time quantum per changer LUN, only
 #                           enforced if there is I/O waiting for another LUN
 # The compiled in defaults for these variables are 2 and 10 seconds,
 # respectively.
 #
 # These can also be changed on the fly with the following sysctl variables:
 # kern.cam.cd.changer.min_busy_seconds
 # kern.cam.cd.changer.max_busy_seconds
 #
 options 	CHANGER_MIN_BUSY_SECONDS=2
 options 	CHANGER_MAX_BUSY_SECONDS=10
 
 # Options for the CAM sequential access driver:
 # SA_IO_TIMEOUT: Timeout for read/write/wfm  operations, in minutes
 # SA_SPACE_TIMEOUT: Timeout for space operations, in minutes
 # SA_REWIND_TIMEOUT: Timeout for rewind operations, in minutes
 # SA_ERASE_TIMEOUT: Timeout for erase operations, in minutes
 # SA_1FM_AT_EOD: Default to model which only has a default one filemark at EOT.
 options 	SA_IO_TIMEOUT=4
 options 	SA_SPACE_TIMEOUT=60
 options 	SA_REWIND_TIMEOUT=(2*60)
 options 	SA_ERASE_TIMEOUT=(4*60)
 options 	SA_1FM_AT_EOD
 
 # Optional timeout for the CAM processor target (pt) device
 # This is specified in seconds.  The default is 60 seconds.
 options 	SCSI_PT_DEFAULT_TIMEOUT=60
 
 # Optional enable of doing SES passthrough on other devices (e.g., disks)
 #
 # Normally disabled because a lot of newer SCSI disks report themselves
 # as having SES capabilities, but this can then clot up attempts to build
 # build a topology with the SES device that's on the box these drives
 # are in....
 options 	SES_ENABLE_PASSTHROUGH
 
 
 #####################################################################
 # MISCELLANEOUS DEVICES AND OPTIONS
 
 # The `pty' device usually turns out to be ``effectively mandatory'',
 # as it is required for `telnetd', `rlogind', `screen', `emacs', and
 # `xterm', among others.
 
 device		pty		#Pseudo ttys
 device		nmdm		#back-to-back tty devices
 device		md		#Memory/malloc disk
 device		snp		#Snoop device - to look at pty/vty/etc..
 device		ccd		#Concatenated disk driver
 
 # Configuring Vinum into the kernel is not necessary, since the kld
 # module gets started automatically when vinum(8) starts.  This
 # device is also untested.  Use at your own risk.
 #
 # The option VINUMDEBUG must match the value set in CFLAGS
 # in src/sbin/vinum/Makefile.  Failure to do so will result in
 # the following message from vinum(8):
 #
 # Can't get vinum config: Invalid argument
 #
 # see vinum(4) for more reasons not to use these options.
 device		vinum		#Vinum concat/mirror/raid driver
 options 	VINUMDEBUG	#enable Vinum debugging hooks
 
 # Kernel side iconv library
 options 	LIBICONV
 
 # Size of the kernel message buffer.  Should be N * pagesize.
 options 	MSGBUF_SIZE=40960
 
 # Maximum size of a tty or pty input buffer.
 options 	TTYHOG=8193
 
 
 #####################################################################
 # HARDWARE DEVICE CONFIGURATION
 
 # For ISA the required hints are listed.
 # EISA, MCA, PCI and pccard are self identifying buses, so no hints
 # are needed.
 
 #
 # Mandatory devices:
 #
 
 # The keyboard controller; it controls the keyboard and the PS/2 mouse.
 device		atkbdc
 hint.atkbdc.0.at="isa"
 hint.atkbdc.0.port="0x060"
 
 # The AT keyboard
 device		atkbd
 hint.atkbd.0.at="atkbdc"
 hint.atkbd.0.irq="1"
 
 # Options for atkbd:
 options 	ATKBD_DFLT_KEYMAP	# specify the built-in keymap
 makeoptions	ATKBD_DFLT_KEYMAP=jp.106
 
 # These options are valid for other keyboard drivers as well.
 options 	KBD_DISABLE_KEYMAP_LOAD	# refuse to load a keymap
 options 	KBD_INSTALL_CDEV	# install a CDEV entry in /dev
 
 # `flags' for atkbd:
 #       0x01    Force detection of keyboard, else we always assume a keyboard
 #       0x02    Don't reset keyboard, useful for some newer ThinkPads
 #	0x03	Force detection and avoid reset, might help with certain
 #		dockingstations
 #       0x04    Old-style (XT) keyboard support, useful for older ThinkPads
 
 # PS/2 mouse
 device		psm
 hint.psm.0.at="atkbdc"
 hint.psm.0.irq="12"
 
 # Options for psm:
 options 	PSM_HOOKRESUME		#hook the system resume event, useful
 					#for some laptops
 options 	PSM_RESETAFTERSUSPEND	#reset the device at the resume event
 
 # Video card driver for VGA adapters.
 device		vga
 hint.vga.0.at="isa"
 
 # Options for vga:
 # Try the following option if the mouse pointer is not drawn correctly
 # or font does not seem to be loaded properly.  May cause flicker on
 # some systems.
 options 	VGA_ALT_SEQACCESS
 
 # If you can dispense with some vga driver features, you may want to
 # use the following options to save some memory.
 #options 	VGA_NO_FONT_LOADING	# don't save/load font
 #options 	VGA_NO_MODE_CHANGE	# don't change video modes
 
 # Older video cards may require this option for proper operation.
 options 	VGA_SLOW_IOACCESS	# do byte-wide i/o's to TS and GDC regs
 
 # The following option probably won't work with the LCD displays.
 options 	VGA_WIDTH90		# support 90 column modes
 
 options 	FB_DEBUG		# Frame buffer debugging
 
 device		splash			# Splash screen and screen saver support
 
 # Various screen savers.
 device		blank_saver
 device		daemon_saver
 device		fade_saver
 device		fire_saver
 device		green_saver
 device		logo_saver
 device		rain_saver
 device		star_saver
 device		warp_saver
 
 # The syscons console driver (sco color console compatible).
 device		sc
 hint.sc.0.at="isa"
 options 	MAXCONS=16		# number of virtual consoles
 options 	SC_ALT_MOUSE_IMAGE	# simplified mouse cursor in text mode
 options 	SC_DFLT_FONT		# compile font in
 makeoptions	SC_DFLT_FONT=cp850
 options 	SC_DISABLE_KDBKEY	# disable `debug' key
 options 	SC_DISABLE_REBOOT	# disable reboot key sequence
 options 	SC_HISTORY_SIZE=200	# number of history buffer lines
 options 	SC_MOUSE_CHAR=0x3	# char code for text mode mouse cursor
 options 	SC_PIXEL_MODE		# add support for the raster text mode
 
 # The following options will let you change the default colors of syscons.
 options 	SC_NORM_ATTR=(FG_GREEN|BG_BLACK)
 options 	SC_NORM_REV_ATTR=(FG_YELLOW|BG_GREEN)
 options 	SC_KERNEL_CONS_ATTR=(FG_RED|BG_BLACK)
 options 	SC_KERNEL_CONS_REV_ATTR=(FG_BLACK|BG_RED)
 
 # The following options will let you change the default behaviour of
 # cut-n-paste feature
 options 	SC_CUT_SPACES2TABS	# convert leading spaces into tabs
 options 	SC_CUT_SEPCHARS=\"x09\"	# set of characters that delimit words
 					# (default is single space - \"x20\")
 
 # If you have a two button mouse, you may want to add the following option
 # to use the right button of the mouse to paste text.
 options 	SC_TWOBUTTON_MOUSE
 
 # You can selectively disable features in syscons.
 options 	SC_NO_CUTPASTE
 options 	SC_NO_FONT_LOADING
 options 	SC_NO_HISTORY
 options 	SC_NO_SYSMOUSE
 options 	SC_NO_SUSPEND_VTYSWITCH
 
 # `flags' for sc
 #	0x80	Put the video card in the VESA 800x600 dots, 16 color mode
 #	0x100	Probe for a keyboard device periodically if one is not present
 
 #
 # Optional devices:
 #
 
 #
 # SCSI host adapters:
 #
 # adv: All Narrow SCSI bus AdvanSys controllers.
 # adw: Second Generation AdvanSys controllers including the ADV940UW.
 # aha: Adaptec 154x/1535/1640
 # ahb: Adaptec 174x EISA controllers
 # ahc: Adaptec 274x/284x/2910/293x/294x/394x/3950x/3960x/398X/4944/
 #      19160x/29160x, aic7770/aic78xx
 # ahd: Adaptec 29320/39320 Controllers.
 # aic: Adaptec 6260/6360, APA-1460 (PC Card), NEC PC9801-100 (C-BUS)
 # amd: Support for the AMD 53C974 SCSI host adapter chip as found on devices
 #      such as the Tekram DC-390(T).
 # bt:  Most Buslogic controllers: including BT-445, BT-54x, BT-64x, BT-74x,
 #      BT-75x, BT-946, BT-948, BT-956, BT-958, SDC3211B, SDC3211F, SDC3222F
 # esp: NCR53c9x.  Only for SBUS hardware right now.
 # isp: Qlogic ISP 1020, 1040 and 1040B PCI SCSI host adapters,
 #      ISP 1240 Dual Ultra SCSI, ISP 1080 and 1280 (Dual) Ultra2,
 #      ISP 12160 Ultra3 SCSI,
 #      Qlogic ISP 2100 and ISP 2200 1Gb Fibre Channel host adapters.
 #      Qlogic ISP 2300 and ISP 2312 2Gb Fibre Channel host adapters.
 # ispfw: Firmware module for Qlogic host adapters
 # mpt: LSI-Logic MPT/Fusion 53c1020 or 53c1030 Ultra4
 #      or FC9x9 Fibre Channel host adapters.
 # ncr: NCR 53C810, 53C825 self-contained SCSI host adapters.
 # sym: Symbios/Logic 53C8XX family of PCI-SCSI I/O processors:
 #      53C810, 53C810A, 53C815, 53C825,  53C825A, 53C860, 53C875,
 #      53C876, 53C885,  53C895, 53C895A, 53C896,  53C897, 53C1510D,
 #      53C1010-33, 53C1010-66.
 # trm: Tekram DC395U/UW/F DC315U adapters.
 # wds: WD7000
 
 #
 # Note that the order is important in order for Buslogic ISA/EISA cards to be
 # probed correctly.
 #
 device		bt
 hint.bt.0.at="isa"
 hint.bt.0.port="0x330"
 device		adv
 hint.adv.0.at="isa"
 device		adw
 device		aha
 hint.aha.0.at="isa"
 device		aic
 hint.aic.0.at="isa"
 device		ahb
 device		ahc
 device		ahd
 device		amd
 device		esp
 device		isp
 hint.isp.0.disable="1"
 hint.isp.0.role="3"
 hint.isp.0.prefer_iomap="1"
 hint.isp.0.prefer_memmap="1"
 hint.isp.0.fwload_disable="1"
 hint.isp.0.ignore_nvram="1"
 hint.isp.0.fullduplex="1"
 hint.isp.0.topology="lport"
 hint.isp.0.topology="nport"
 hint.isp.0.topology="lport-only"
 hint.isp.0.topology="nport-only"
 # we can't get u_int64_t types, nor can we get strings if it's got
 # a leading 0x, hence this silly dodge.
 hint.isp.0.portwnn="w50000000aaaa0000"
 hint.isp.0.nodewnn="w50000000aaaa0001"
 device		ispfw
 device		mpt
 device		ncr
 device		sym
 device		trm
 device		wds
 hint.wds.0.at="isa"
 hint.wds.0.port="0x350"
 hint.wds.0.irq="11"
 hint.wds.0.drq="6"
 
 # The aic7xxx driver will attempt to use memory mapped I/O for all PCI
 # controllers that have it configured only if this option is set. Unfortunately,
 # this doesn't work on some motherboards, which prevents it from being the
 # default.
 options 	AHC_ALLOW_MEMIO
 
 # Dump the contents of the ahc controller configuration PROM.
 options 	AHC_DUMP_EEPROM
 
 # Bitmap of units to enable targetmode operations.
 options 	AHC_TMODE_ENABLE
 
 # Compile in Aic7xxx Debugging code.
 options		AHC_DEBUG
 
 # Aic7xxx driver debugging options. See sys/dev/aic7xxx/aic7xxx.h
 options		AHC_DEBUG_OPTS
 
 # Print register bitfields in debug output.  Adds ~128k to driver
 # See ahc(4).
 options		AHC_REG_PRETTY_PRINT
 
 # Compile in aic79xx debugging code.
 options 	AHD_DEBUG
 
 # Aic79xx driver debugging options.  Adds ~215k to driver.  See ahd(4).
 options 	AHD_DEBUG_OPTS=0xFFFFFFFF
 
 # Print human-readable register definitions when debugging
 options 	AHD_REG_PRETTY_PRINT
 
 # Bitmap of units to enable targetmode operations.
 options		AHD_TMODE_ENABLE
 
 # The adw driver will attempt to use memory mapped I/O for all PCI
 # controllers that have it configured only if this option is set.
 options 	ADW_ALLOW_MEMIO
 
 # Options used in dev/isp/ (Qlogic SCSI/FC driver).
 #
 #	ISP_TARGET_MODE		-	enable target mode operation
 #
 options 	ISP_TARGET_MODE=1
 
 # Options used in dev/sym/ (Symbios SCSI driver).
 #options 	SYM_SETUP_LP_PROBE_MAP	#-Low Priority Probe Map (bits)
 					# Allows the ncr to take precedence
 					# 1 (1<<0) -> 810a, 860
 					# 2 (1<<1) -> 825a, 875, 885, 895
 					# 4 (1<<2) -> 895a, 896, 1510d
 #options 	SYM_SETUP_SCSI_DIFF	#-HVD support for 825a, 875, 885
 					# disabled:0 (default), enabled:1
 #options 	SYM_SETUP_PCI_PARITY	#-PCI parity checking
 					# disabled:0, enabled:1 (default)
 #options 	SYM_SETUP_MAX_LUN	#-Number of LUNs supported
 					# default:8, range:[1..64]
 
 # The 'asr' driver provides support for current DPT/Adaptec SCSI RAID
 # controllers (SmartRAID V and VI and later).
 # These controllers require the CAM infrastructure.
 #
 device		asr
 
 # The 'dpt' driver provides support for old DPT controllers (http://www.dpt.com/).
 # These have hardware RAID-{0,1,5} support, and do multi-initiator I/O.
 # The DPT controllers are commonly re-licensed under other brand-names -
 # some controllers by Olivetti, Dec, HP, AT&T, SNI, AST, Alphatronic, NEC and
 # Compaq are actually DPT controllers.
 #
 # See src/sys/dev/dpt for debugging and other subtle options.
 #   DPT_MEASURE_PERFORMANCE Enables a set of (semi)invasive metrics. Various
 #                           instruments are enabled.  The tools in
 #                           /usr/sbin/dpt_* assume these to be enabled.
 #   DPT_HANDLE_TIMEOUTS     Normally device timeouts are handled by the DPT.
 #                           If you ant the driver to handle timeouts, enable
 #                           this option.  If your system is very busy, this
 #                           option will create more trouble than solve.
 #   DPT_TIMEOUT_FACTOR      Used to compute the excessive amount of time to
 #                           wait when timing out with the above option.
 #  DPT_DEBUG_xxxx           These are controllable from sys/dev/dpt/dpt.h
 #  DPT_LOST_IRQ             When enabled, will try, once per second, to catch
 #                           any interrupt that got lost.  Seems to help in some
 #                           DPT-firmware/Motherboard combinations.  Minimal
 #                           cost, great benefit.
 #  DPT_RESET_HBA            Make "reset" actually reset the controller
 #                           instead of fudging it.  Only enable this if you
 #			    are 100% certain you need it.
 
 device		dpt
 
 # DPT options
 #!CAM# options 	DPT_MEASURE_PERFORMANCE
 #!CAM# options 	DPT_HANDLE_TIMEOUTS
 options 	DPT_TIMEOUT_FACTOR=4
 options 	DPT_LOST_IRQ
 options 	DPT_RESET_HBA
 
 #
 # Compaq "CISS" RAID controllers (SmartRAID 5* series)
 # These controllers have a SCSI-like interface, and require the
 # CAM infrastructure.
 #
 device		ciss
 
 #
 # Intel Integrated RAID controllers.
 # This driver was developed and is maintained by Intel.  Contacts
 # at Intel for this driver are
 # "Kannanthanam, Boji T" <boji.t.kannanthanam@intel.com> and
 # "Leubner, Achim" <achim.leubner@intel.com>.
 #
 device		iir
 
 #
 # Mylex AcceleRAID and eXtremeRAID controllers with v6 and later
 # firmware.  These controllers have a SCSI-like interface, and require
 # the CAM infrastructure.
 #
 device		mly
 
 #
 # Compaq Smart RAID, Mylex DAC960 and AMI MegaRAID controllers.  Only
 # one entry is needed; the code will find and configure all supported
 # controllers.
 #
 device		ida		# Compaq Smart RAID
 device		mlx		# Mylex DAC960
 device		amr		# AMI MegaRAID
 
 #
 # 3ware ATA RAID
 #
 device		twe		# 3ware ATA RAID
 
 #
 # The 'ATA' driver supports all ATA and ATAPI devices, including PC Card
 # devices. You only need one "device ata" for it to find all
 # PCI and PC Card ATA/ATAPI devices on modern machines.
 device		ata
 device		atadisk		# ATA disk drives
 device		ataraid		# ATA RAID drives
 device		atapicd		# ATAPI CDROM drives
 device		atapifd		# ATAPI floppy drives
 device		atapist		# ATAPI tape drives
 device		atapicam	# emulate ATAPI devices as SCSI ditto via CAM
 				# needs CAM to be present (scbus & pass)
 #
 # For older non-PCI, non-PnPBIOS systems, these are the hints lines to add:
 hint.ata.0.at="isa"
 hint.ata.0.port="0x1f0"
 hint.ata.0.irq="14"
 hint.ata.1.at="isa"
 hint.ata.1.port="0x170"
 hint.ata.1.irq="15"
 
 #
 # The following options are valid on the ATA driver:
 #
 # ATA_STATIC_ID:	controller numbering is static ie depends on location
 #			else the device numbers are dynamically allocated.
 
 options 	ATA_STATIC_ID
 
 #
 # Standard floppy disk controllers and floppy tapes, supports
 # the Y-E DATA External FDD (PC Card)
 #
 device		fdc
 hint.fdc.0.at="isa"
 hint.fdc.0.port="0x3F0"
 hint.fdc.0.irq="6"
 hint.fdc.0.drq="2"
 #
 # FDC_DEBUG enables floppy debugging.  Since the debug output is huge, you
 # gotta turn it actually on by setting the variable fd_debug with DDB,
 # however.
 options 	FDC_DEBUG
 #
 # Activate this line if you happen to have an Insight floppy tape.
 # Probing them proved to be dangerous for people with floppy disks only,
 # so it's "hidden" behind a flag:
 #hint.fdc.0.flags="1"
 
 # Specify floppy devices
 hint.fd.0.at="fdc0"
 hint.fd.0.drive="0"
 hint.fd.1.at="fdc0"
 hint.fd.1.drive="1"
 
 #
 # sio: serial ports (see sio(4)), including support for various
 #      PC Card devices, such as Modem and NICs (see etc/defaults/pccard.conf)
 #
 device		sio
 hint.sio.0.at="isa"
 hint.sio.0.port="0x3F8"
 hint.sio.0.flags="0x10"
 hint.sio.0.irq="4"
 
 # Options for sio:
 options 	COM_ESP			# Code for Hayes ESP.
 options 	COM_MULTIPORT		# Code for some cards with shared IRQs.
 options 	CONSPEED=115200		# Speed for serial console
 					# (default 9600).
 
 # `flags' specific to sio(4).  See below for flags used by both sio(4) and
 # uart(4).
 #	0x20	force this unit to be the console (unless there is another
 #		higher priority console).  This replaces the COMCONSOLE option.
 #	0x40	reserve this unit for low level console operations.  Do not
 #		access the device in any normal way.
 # PnP `flags'
 #	0x1	disable probing of this device.  Used to prevent your modem
 #		from being attached as a PnP modem.
 # Other flags for sio that aren't documented in the man page.
 #	0x20000	enable hardware RTS/CTS and larger FIFOs.  Only works for
 #		ST16650A-compatible UARTs.
 
 #
 # uart: newbusified driver for serial interfaces.  It consolidates the sio(4),
 #	sab(4) and zs(4) drivers.
 #
 device		uart
 
 # Options for uart(4)
 options 	UART_PPS_ON_CTS		# Do time pulse capturing using CTS
 					# instead of DCD.
 
 # The following hint should only be used for pure ISA devices.  It is not
 # needed otherwise.  Use of hints is strongly discouraged.
 hint.uart.0.at="isa"
 
 # The following 3 hints are used when the UART is a system device (i.e., a
 # console or debug port), but only on platforms that don't have any other
 # means to pass the information to the kernel.  The unit number of the hint
 # is only used to bundle the hints together.  There is no relation to the
 # unit number of the probed UART.
 hint.uart.0.port="0x3f8"
 hint.uart.0.flags="0x10"
 hint.uart.0.baud="115200"
 
 # `flags' for serial drivers that support consoles like sio(4) and uart(4):
 #	0x10	enable console support for this unit.  Other console flags
 #		(if applicable) are ignored unless this is set.  Enabling
 #		console support does not make the unit the preferred console.
 #		Boot with -h or set boot_serial=YES in the loader.  For sio(4)
 #		specifically, the 0x20 flag can also be set (see above).
 #		Currently, at most one unit can have console support; the
 #		first one (in config file order) with this flag set is
 #		preferred.  Setting this flag for sio0 gives the old behaviour.
 #	0x80	use this port for serial line gdb support in ddb.  Also known
 #		as debug port.
 #
 
 # Options for serial drivers that support consoles:
 options 	BREAK_TO_DEBUGGER	# A BREAK on a serial console goes to
 					# ddb, if available.
 
 # Solaris implements a new BREAK which is initiated by a character
 # sequence CR ~ ^b which is similar to a familiar pattern used on
 # Sun servers by the Remote Console.
 options 	ALT_BREAK_TO_DEBUGGER
 
 # PCI Universal Communications driver
 # Supports various single and multi port PCI serial cards. Maybe later
 # also the parallel ports on combination serial/parallel cards. New cards
 # can be added in src/sys/dev/puc/pucdata.c.
 #
 # If the PUC_FASTINTR option is used the driver will try to use fast
 # interrupts. The card must then be the only user of that interrupt.
 # Interrupts cannot be shared when using PUC_FASTINTR.
 device		puc
 options 	PUC_FASTINTR
 
 #
 # Network interfaces:
 #
 # MII bus support is required for some PCI 10/100 ethernet NICs,
 # namely those which use MII-compliant transceivers or implement
 # transceiver control interfaces that operate like an MII. Adding
 # "device miibus0" to the kernel config pulls in support for
 # the generic miibus API and all of the PHY drivers, including a
 # generic one for PHYs that aren't specifically handled by an
 # individual driver.
 device		miibus
 
 # an:   Aironet 4500/4800 802.11 wireless adapters. Supports the PCMCIA,
 #       PCI and ISA varieties.
 # awi:  Support for IEEE 802.11 PC Card devices using the AMD Am79C930 and
 #       Harris (Intersil) Chipset with PCnetMobile firmware by AMD.
 # bge:	Support for gigabit ethernet adapters based on the Broadcom
 #	BCM570x family of controllers, including the 3Com 3c996-T,
 #	the Netgear GA302T, the SysKonnect SK-9D21 and SK-9D41, and
 #	the embedded gigE NICs on Dell PowerEdge 2550 servers.
 # cm:	Arcnet SMC COM90c26 / SMC COM90c56
 #	(and SMC COM90c66 in '56 compatibility mode) adapters.
 # cnw:  Xircom CNW/Netware Airsurfer PC Card adapter
 # cs:   IBM Etherjet and other Crystal Semi CS89x0-based adapters
 # dc:   Support for PCI fast ethernet adapters based on the DEC/Intel 21143
 #       and various workalikes including:
 #       the ADMtek AL981 Comet and AN985 Centaur, the ASIX Electronics
 #       AX88140A and AX88141, the Davicom DM9100 and DM9102, the Lite-On
 #       82c168 and 82c169 PNIC, the Lite-On/Macronix LC82C115 PNIC II
 #       and the Macronix 98713/98713A/98715/98715A/98725 PMAC. This driver
 #       replaces the old al, ax, dm, pn and mx drivers.  List of brands:
 #       Digital DE500-BA, Kingston KNE100TX, D-Link DFE-570TX, SOHOware SFA110,
 #       SVEC PN102-TX, CNet Pro110B, 120A, and 120B, Compex RL100-TX,
 #       LinkSys LNE100TX, LNE100TX V2.0, Jaton XpressNet, Alfa Inc GFC2204,
 #       KNE110TX.
 # de:   Digital Equipment DC21040
 # em:   Intel Pro/1000 Gigabit Ethernet 82542, 82543, 82544 based adapters.
 # ep:   3Com 3C509, 3C529, 3C556, 3C562D, 3C563D, 3C572, 3C574X, 3C579, 3C589
 #       and PC Card devices using these chipsets.
 # ex:   Intel EtherExpress Pro/10 and other i82595-based adapters,
 #       Olicom Ethernet PC Card devices.
 # fe:   Fujitsu MB86960A/MB86965A Ethernet
 # fea:  DEC DEFEA EISA FDDI adapter
 # fpa:  Support for the Digital DEFPA PCI FDDI. `device fddi' is also needed.
 # fxp:  Intel EtherExpress Pro/100B
 #	(hint of prefer_iomap can be done to prefer I/O instead of Mem mapping)
 # gx:   Intel Pro/1000 Gigabit Ethernet (82542, 82543-F, 82543-T)
 # hme:  Sun HME (Happy Meal Ethernet)
 # lge:	Support for PCI gigabit ethernet adapters based on the Level 1
 #	LXT1001 NetCellerator chipset. This includes the D-Link DGE-500SX,
 #	SMC TigerCard 1000 (SMC9462SX), and some Addtron cards.
 # my:	Myson Fast Ethernet (MTD80X, MTD89X)
 # nge:	Support for PCI gigabit ethernet adapters based on the National
 #	Semiconductor DP83820 and DP83821 chipset. This includes the
 #	SMC EZ Card 1000 (SMC9462TX), D-Link DGE-500T, Asante FriendlyNet
 #	GigaNIX 1000TA and 1000TPC, the Addtron AEG320T, the LinkSys
 #	EG1032 and EG1064, the Surecom EP-320G-TX and the Netgear GA622T.
 # pcn:	Support for PCI fast ethernet adapters based on the AMD Am79c97x
 #	chipsets, including the PCnet/FAST, PCnet/FAST+, PCnet/PRO and
 #	PCnet/Home. These were previously handled by the lnc driver (and
 #	still will be if you leave this driver out of the kernel).
 # rl:   Support for PCI fast ethernet adapters based on the RealTek 8129/8139
 #       chipset.  Note that the RealTek driver defaults to using programmed
 #       I/O to do register accesses because memory mapped mode seems to cause
 #       severe lockups on SMP hardware.  This driver also supports the
 #       Accton EN1207D `Cheetah' adapter, which uses a chip called
 #       the MPX 5030/5038, which is either a RealTek in disguise or a
 #       RealTek workalike.  Note that the D-Link DFE-530TX+ uses the RealTek
 #       chipset and is supported by this driver, not the 'vr' driver.
 # sf:   Support for Adaptec Duralink PCI fast ethernet adapters based on the
 #       Adaptec AIC-6915 "starfire" controller.
 #       This includes dual and quad port cards, as well as one 100baseFX card.
 #       Most of these are 64-bit PCI devices, except for one single port
 #       card which is 32-bit.
 # sis:  Support for NICs based on the Silicon Integrated Systems SiS 900,
 #       SiS 7016 and NS DP83815 PCI fast ethernet controller chips.
 # sbsh:	Support for Granch SBNI16 SHDSL modem PCI adapters
 # sk:   Support for the SysKonnect SK-984x series PCI gigabit ethernet NICs.
 #       This includes the SK-9841 and SK-9842 single port cards (single mode
 #       and multimode fiber) and the SK-9843 and SK-9844 dual port cards
 #       (also single mode and multimode).
 #       The driver will autodetect the number of ports on the card and
 #       attach each one as a separate network interface.
 # sn:   Support for ISA and PC Card Ethernet devices using the
 #       SMC91C90/92/94/95 chips.
 # ste:  Sundance Technologies ST201 PCI fast ethernet controller, includes
 #       the D-Link DFE-550TX.
 # ti:   Support for PCI gigabit ethernet NICs based on the Alteon Networks
 #       Tigon 1 and Tigon 2 chipsets.  This includes the Alteon AceNIC, the
 #       3Com 3c985, the Netgear GA620 and various others.  Note that you will
 #       probably want to bump up NMBCLUSTERS a lot to use this driver.
 # tl:   Support for the Texas Instruments TNETE100 series 'ThunderLAN'
 #       cards and integrated ethernet controllers.  This includes several
 #       Compaq Netelligent 10/100 cards and the built-in ethernet controllers
 #       in several Compaq Prosignia, Proliant and Deskpro systems.  It also
 #       supports several Olicom 10Mbps and 10/100 boards.
 # tx:   SMC 9432 TX, BTX and FTX cards. (SMC EtherPower II series)
 # txp:	Support for 3Com 3cR990 cards with the "Typhoon" chipset
 # vr:   Support for various fast ethernet adapters based on the VIA
 #       Technologies VT3043 `Rhine I' and VT86C100A `Rhine II' chips,
 #       including the D-Link DFE530TX (see 'rl' for DFE530TX+), the Hawking
 #       Technologies PN102TX, and the AOpen/Acer ALN-320.
 # vx:   3Com 3C590 and 3C595
 # wb:   Support for fast ethernet adapters based on the Winbond W89C840F chip.
 #       Note: this is not the same as the Winbond W89C940F, which is a
 #       NE2000 clone.
 # wi:   Lucent WaveLAN/IEEE 802.11 PCMCIA adapters. Note: this supports both
 #       the PCMCIA and ISA cards: the ISA card is really a PCMCIA to ISA
 #       bridge with a PCMCIA adapter plugged into it.
 # xe:   Xircom/Intel EtherExpress Pro100/16 PC Card ethernet controller,
 #       Accton Fast EtherCard-16, Compaq Netelligent 10/100 PC Card,
 #       Toshiba 10/100 Ethernet PC Card, Xircom 16-bit Ethernet + Modem 56
 # xl:   Support for the 3Com 3c900, 3c905, 3c905B and 3c905C (Fast)
 #       Etherlink XL cards and integrated controllers.  This includes the
 #       integrated 3c905B-TX chips in certain Dell Optiplex and Dell
 #       Precision desktop machines and the integrated 3c905-TX chips
 #       in Dell Latitude laptop docking stations.
 #       Also supported: 3Com 3c980(C)-TX, 3Com 3cSOHO100-TX, 3Com 3c450-TX
 
 # Order for ISA/EISA devices is important here
 
 device		cm
 hint.cm.0.at="isa"
 hint.cm.0.port="0x2e0"
 hint.cm.0.irq="9"
 hint.cm.0.maddr="0xdc000"
 device		cs
 hint.cs.0.at="isa"
 hint.cs.0.port="0x300"
 device		ep
 device		ex
 device		fe
 hint.fe.0.at="isa"
 hint.fe.0.port="0x300"
 device		fea
 device		sn
 hint.sn.0.at="isa"
 hint.sn.0.port="0x300"
 hint.sn.0.irq="10"
 device		an
 device		awi
 device		cnw
 device		wi
 device		xe
 
 # PCI Ethernet NICs that use the common MII bus controller code.
 device		dc		# DEC/Intel 21143 and various workalikes
 device		fxp		# Intel EtherExpress PRO/100B (82557, 82558)
 hint.fxp.0.prefer_iomap="0"
 device		hme		# Sun HME (Happy Meal Ethernet)
 device		my		# Myson Fast Ethernet (MTD80X, MTD89X)
 device		rl		# RealTek 8129/8139
 device		pcn		# AMD Am79C97x PCI 10/100 NICs
 device		sf		# Adaptec AIC-6915 (``Starfire'')
 device		sbsh		# Granch SBNI16 SHDSL modem
 device		sis		# Silicon Integrated Systems SiS 900/SiS 7016
 device		ste		# Sundance ST201 (D-Link DFE-550TX)
 device		tl		# Texas Instruments ThunderLAN
 device		tx		# SMC EtherPower II (83c170 ``EPIC'')
 device		vr		# VIA Rhine, Rhine II
 device		wb		# Winbond W89C840F
 device		xl		# 3Com 3c90x (``Boomerang'', ``Cyclone'')
 
 # PCI Ethernet NICs.
 device		de		# DEC/Intel DC21x4x (``Tulip'')
 device		txp		# 3Com 3cR990 (``Typhoon'')
 device		vx		# 3Com 3c590, 3c595 (``Vortex'')
 
 # PCI Gigabit & FDDI NICs.
 device		bge
 device		gx
 device		lge
 device		nge
 device		sk
 device		ti
 device		fpa
 
 # Use "private" jumbo buffers allocated exclusively for the ti(4) driver.
 # This option is incompatible with the TI_JUMBO_HDRSPLIT option below.
 #options 	TI_PRIVATE_JUMBOS
 # Turn on the header splitting option for the ti(4) driver firmware.  This
 # only works for Tigon II chips, and has no effect for Tigon I chips.
 options 	TI_JUMBO_HDRSPLIT
 
 # These two options allow manipulating the mbuf cluster size and mbuf size,
 # respectively.  Be very careful with NIC driver modules when changing
 # these from their default values, because that can potentially cause a
 # mismatch between the mbuf size assumed by the kernel and the mbuf size
 # assumed by a module.  The only driver that currently has the ability to
 # detect a mismatch is ti(4).
 options 	MCLSHIFT=12	# mbuf cluster shift in bits, 12 == 4KB
 options 	MSIZE=512	# mbuf size in bytes
 
 #
 # ATM related options (Cranor version)
 # (note: this driver cannot be used with the HARP ATM stack)
 #
 # The `en' device provides support for Efficient Networks (ENI)
 # ENI-155 PCI midway cards, and the Adaptec 155Mbps PCI ATM cards (ANA-59x0).
 #
 # The `hatm' device provides support for Fore/Marconi HE155 and HE622
 # ATM PCI cards.
 #
 # The `fatm' device provides support for Fore PCA200E ATM PCI cards.
 #
 # The `patm' device provides support for IDT77252 based cards like
 # ProSum's ProATM-155 and ProATM-25 and IDT's evaluation boards.
 #
 # atm device provides generic atm functions and is required for
 # atm devices.
 # NATM enables the netnatm protocol family that can be used to
 # bypass TCP/IP.
 #
 # utopia provides the access to the ATM PHY chips and is required for en,
 # hatm and fatm.
 #
 # the current driver supports only PVC operations (no atm-arp, no multicast).
 # for more details, please read the original documents at
 # http://www.ccrc.wustl.edu/pub/chuck/tech/bsdatm/bsdatm.html
 #
 device		atm
 device		en
 device		fatm			#Fore PCA200E
 device		hatm			#Fore/Marconi HE155/622
 device		patm			#IDT77252 cards (ProATM and IDT)
 device		utopia			#ATM PHY driver
 options 	NATM			#native ATM
 
 options 	LIBMBPOOL		#needed by patm, iatm
 
 #
 # Sound drivers
 #
 # sound: The generic sound driver.
 #
 
 device		sound
 
 #
 # snd_*: Device-specific drivers.
 #
 # The flags of the device tells the device a bit more info about the
 # device that normally is obtained through the PnP interface.
 #	bit  2..0   secondary DMA channel;
 #	bit  4      set if the board uses two dma channels;
 #	bit 15..8   board type, overrides autodetection; leave it
 #		    zero if don't know what to put in (and you don't,
 #		    since this is unsupported at the moment...).
 #
 # snd_als4000:		Avance Logic ALS4000 PCI.
 # snd_ad1816:		Analog Devices AD1816 ISA PnP/non-PnP.
 # snd_cmi:		CMedia CMI8338/CMI8738 PCI.
 # snd_cs4281:		Crystal Semiconductor CS4281 PCI.
 # snd_csa:		Crystal Semiconductor CS461x/428x PCI. (except
 #			4281)
 # snd_ds1:		Yamaha DS-1 PCI.
 # snd_emu10k1:		Creative EMU10K1 PCI and EMU10K2 (Audigy) PCI.
 # snd_es137x:		Ensoniq AudioPCI ES137x PCI.
 # snd_ess:		Ensoniq ESS ISA PnP/non-PnP.
 # snd_fm801:		Forte Media FM801 PCI.
 # snd_gusc:		Gravis UltraSound ISA PnP/non-PnP.
 # snd_ich:		Intel ICH PCI and some more audio controllers
 #			embedded in a chipset.
 # snd_maestro:		ESS Technology Maestro-1/2x PCI.
 # snd_maestro3:		ESS Technology Maestro-3/Allegro PCI.
 # snd_mss:		Microsoft Sound System ISA PnP/non-PnP.
 # snd_neomagic:		Neomagic 256 AV/ZX PCI.
 # snd_sb16:		Creative SoundBlaster16, to be used in
 #			conjuction with snd_sbc.
 # snd_sb8:		Creative SoundBlaster (pre-16), to be used in
 #			conjuction with snd_sbc.
 # snd_sbc:		Creative SoundBlaster ISA PnP/non-PnP.
 #			Supports ESS and Avance ISA chips as well.
 # snd_solo:		ESS Solo-1x PCI.
 # snd_t4dwave:		Trident 4DWave PCI, Sis 7018 PCI and Acer Labs
 #			M5451 PCI.
 # snd_via8233:		VIA VT8233x PCI.
 # snd_via82c686:	VIA VT82C686A PCI.
 # snd_vibes:		S3 Sonicvibes PCI.
 # snd_uaudio:		USB audio.
 
 device		snd_ad1816
 device		snd_als4000
 #device		snd_au88x0
 device		snd_cmi
 device		snd_cs4281
 device		snd_csa
 device		snd_ds1
 device		snd_emu10k1
 device		snd_es137x
 device		snd_ess
 device		snd_fm801
 device		snd_gusc
 device		snd_ich
 device		snd_maestro
 device		snd_maestro3
 device		snd_mss
 device		snd_neomagic
 device		snd_sb16
 device		snd_sb8
 device		snd_sbc
 device		snd_solo
 device		snd_t4dwave
 device		snd_via8233
 device		snd_via82c686
 device		snd_vibes
 #device		snd_vortex1
 device		snd_uaudio
 
 # For non-pnp sound cards:
 hint.snd_mss.0.at="isa"
 hint.snd_mss.0.irq="10"
 hint.snd_mss.0.drq="1"
 hint.snd_mss.0.flags="0x0"
 hint.snd_sbc.0.at="isa"
 hint.snd_sbc.0.port="0x220"
 hint.snd_sbc.0.irq="5"
 hint.snd_sbc.0.drq="1"
 hint.snd_sbc.0.flags="0x15"
 hint.snd_gusc.0.at="isa"
 hint.snd_gusc.0.port="0x220"
 hint.snd_gusc.0.irq="5"
 hint.snd_gusc.0.drq="1"
 hint.snd_gusc.0.flags="0x13"
 
 #
 # Miscellaneous hardware:
 #
 # scd: Sony CD-ROM using proprietary (non-ATAPI) interface
 # mcd: Mitsumi CD-ROM using proprietary (non-ATAPI) interface
 # bktr: Brooktree bt848/848a/849a/878/879 video capture and TV Tuner board
 # cy: Cyclades serial driver
 # joy: joystick (including IO DATA PCJOY PC Card joystick)
 # rc: RISCom/8 multiport card
 # rp: Comtrol Rocketport(ISA/PCI) - single card
 # si: Specialix SI/XIO 4-32 port terminal multiplexor
 # nmdm: nullmodem terminal driver (see nmdm(4))
 
 # Notes on the Comtrol Rocketport driver:
 #
 # The exact values used for rp0 depend on how many boards you have
 # in the system.  The manufacturer's sample configs are listed as:
 #
 #               device  rp	# core driver support
 #
 #   Comtrol Rocketport ISA single card
 #		hint.rp.0.at="isa"
 #		hint.rp.0.port="0x280"
 #
 #   If instead you have two ISA cards, one installed at 0x100 and the
 #   second installed at 0x180, then you should add the following to
 #   your kernel probe hints:
 #		hint.rp.0.at="isa"
 #		hint.rp.0.port="0x100"
 #		hint.rp.1.at="isa"
 #		hint.rp.1.port="0x180"
 #
 #   For 4 ISA cards, it might be something like this:
 #		hint.rp.0.at="isa"
 #		hint.rp.0.port="0x180"
 #		hint.rp.1.at="isa"
 #		hint.rp.1.port="0x100"
 #		hint.rp.2.at="isa"
 #		hint.rp.2.port="0x340"
 #		hint.rp.3.at="isa"
 #		hint.rp.3.port="0x240"
 #
 #   For PCI cards, you need no hints.
 
 # Mitsumi CD-ROM
 device		mcd
 hint.mcd.0.at="isa"
 hint.mcd.0.port="0x300"
 # for the Sony CDU31/33A CDROM
 device		scd
 hint.scd.0.at="isa"
 hint.scd.0.port="0x230"
 device		joy			# PnP aware, hints for nonpnp only
 hint.joy.0.at="isa"
 hint.joy.0.port="0x201"
 device		rc
 hint.rc.0.at="isa"
 hint.rc.0.port="0x220"
 hint.rc.0.irq="12"
 device		rp
 hint.rp.0.at="isa"
 hint.rp.0.port="0x280"
 device		si
 options 	SI_DEBUG
 hint.si.0.at="isa"
 hint.si.0.maddr="0xd0000"
 hint.si.0.irq="12"
 device		nmdm
 
 #
 # The 'bktr' device is a PCI video capture device using the Brooktree
 # bt848/bt848a/bt849a/bt878/bt879 chipset. When used with a TV Tuner it forms a
 # TV card, e.g. Miro PC/TV, Hauppauge WinCast/TV WinTV, VideoLogic Captivator,
 # Intel Smart Video III, AverMedia, IMS Turbo, FlyVideo.
 #
 # options 	OVERRIDE_CARD=xxx
 # options 	OVERRIDE_TUNER=xxx
 # options 	OVERRIDE_MSP=1
 # options 	OVERRIDE_DBX=1
 # These options can be used to override the auto detection
 # The current values for xxx are found in src/sys/dev/bktr/bktr_card.h
 # Using sysctl(8) run-time overrides on a per-card basis can be made
 #
 # options 	BROOKTREE_SYSTEM_DEFAULT=BROOKTREE_PAL
 # or
 # options 	BROOKTREE_SYSTEM_DEFAULT=BROOKTREE_NTSC
 # Specifies the default video capture mode.
 # This is required for Dual Crystal (28&35Mhz) boards where PAL is used
 # to prevent hangs during initialisation, e.g. VideoLogic Captivator PCI.
 #
 # options 	BKTR_USE_PLL
 # This is required for PAL or SECAM boards with a 28Mhz crystal and no 35Mhz
 # crystal, e.g. some new Bt878 cards.
 #
 # options 	BKTR_GPIO_ACCESS
 # This enable IOCTLs which give user level access to the GPIO port.
 #
 # options 	BKTR_NO_MSP_RESET
 # Prevents the MSP34xx reset. Good if you initialise the MSP in another OS first
 #
 # options 	BKTR_430_FX_MODE
 # Switch Bt878/879 cards into Intel 430FX chipset compatibility mode.
 #
 # options 	BKTR_SIS_VIA_MODE
 # Switch Bt878/879 cards into SIS/VIA chipset compatibility mode which is
 # needed for some old SiS and VIA chipset motherboards.
 # This also allows Bt878/879 chips to work on old OPTi (<1997) chipset
 # motherboards and motherboards with bad or incomplete PCI 2.1 support.
 # As a rough guess, old = before 1998
 #
 # options 	BKTR_NEW_MSP34XX_DRIVER
 # Use new, more complete initialization scheme for the msp34* soundchip.
 # Should fix stereo autodetection if the old driver does only output
 # mono sound.
 
 #
 # options	BKTR_USE_FREEBSD_SMBUS
 # Compile with FreeBSD SMBus implementation
 #
 # Brooktree driver has been ported to the new I2C framework. Thus,
 # you'll need to have the following 3 lines in the kernel config.
 #     device smbus
 #     device iicbus
 #     device iicbb
 #     device iicsmb
 # The iic and smb devices are only needed if you want to control other
 # I2C slaves connected to the external connector of some cards.
 #
 device		bktr
 
 #
 # PC Card/PCMCIA
 # (OLDCARD)
 #
 # card: pccard slots
 # pcic: isa/pccard bridge
 #device		pcic
 #hint.pcic.0.at="isa"
 #hint.pcic.1.at="isa"
 #device		card	1
 
 #
 # PC Card/PCMCIA and Cardbus
 # (NEWCARD)
 #
 # Note that NEWCARD and OLDCARD are incompatible.  Do not use both at the same
 # time.
 #
 # pccbb: pci/cardbus bridge implementing YENTA interface
 # pccard: pccard slots
 # cardbus: cardbus slots
 device		cbb
 device		pccard
 device		cardbus
 
 #
 # SMB bus
 #
 # System Management Bus support is provided by the 'smbus' device.
 # Access to the SMBus device is via the 'smb' device (/dev/smb*),
 # which is a child of the 'smbus' device.
 #
 # Supported devices:
 # smb		standard io through /dev/smb*
 #
 # Supported SMB interfaces:
 # iicsmb	I2C to SMB bridge with any iicbus interface
 # bktr		brooktree848 I2C hardware interface
 # intpm		Intel PIIX4 (82371AB, 82443MX) Power Management Unit
 # alpm		Acer Aladdin-IV/V/Pro2 Power Management Unit
 # ichsmb	Intel ICH SMBus controller chips (82801AA, 82801AB, 82801BA)
 # viapm		VIA VT82C586B/596B/686A and VT8233 Power Management Unit
 # amdpm		AMD 756 Power Management Unit
 # nfpm		NVIDIA nForce Power Management Unit
 #
 device		smbus		# Bus support, required for smb below.
 
 device		intpm
 device		alpm
 device		ichsmb
 device		viapm
 device		amdpm
 device		nfpm
 
 device		smb
 
 #
 # I2C Bus
 #
 # Philips i2c bus support is provided by the `iicbus' device.
 #
 # Supported devices:
 # ic	i2c network interface
 # iic	i2c standard io
 # iicsmb i2c to smb bridge. Allow i2c i/o with smb commands.
 #
 # Supported interfaces:
 # bktr	brooktree848 I2C software interface
 #
 # Other:
 # iicbb	generic I2C bit-banging code (needed by lpbb, bktr)
 #
 device		iicbus		# Bus support, required for ic/iic/iicsmb below.
 device		iicbb
 
 device		ic
 device		iic
 device		iicsmb		# smb over i2c bridge
 
 # Parallel-Port Bus
 #
 # Parallel port bus support is provided by the `ppbus' device.
 # Multiple devices may be attached to the parallel port, devices
 # are automatically probed and attached when found.
 #
 # Supported devices:
 # vpo	Iomega Zip Drive
 #	Requires SCSI disk support ('scbus' and 'da'), best
 #	performance is achieved with ports in EPP 1.9 mode.
 # lpt	Parallel Printer
 # plip	Parallel network interface
 # ppi	General-purpose I/O ("Geek Port") + IEEE1284 I/O
 # pps	Pulse per second Timing Interface
 # lpbb	Philips official parallel port I2C bit-banging interface
 #
 # Supported interfaces:
 # ppc	ISA-bus parallel port interfaces.
 #
 
 options 	PPC_PROBE_CHIPSET # Enable chipset specific detection
 				  # (see flags in ppc(4))
 options 	DEBUG_1284	# IEEE1284 signaling protocol debug
 options 	PERIPH_1284	# Makes your computer act as an IEEE1284
 				# compliant peripheral
 options 	DONTPROBE_1284	# Avoid boot detection of PnP parallel devices
 options 	VP0_DEBUG	# ZIP/ZIP+ debug
 options 	LPT_DEBUG	# Printer driver debug
 options 	PPC_DEBUG	# Parallel chipset level debug
 options 	PLIP_DEBUG	# Parallel network IP interface debug
 options 	PCFCLOCK_VERBOSE         # Verbose pcfclock driver
 options 	PCFCLOCK_MAX_RETRIES=5   # Maximum read tries (default 10)
 
 device		ppc
 hint.ppc.0.at="isa"
 hint.ppc.0.irq="7"
 device		ppbus
 device		vpo
 device		lpt
 device		plip
 device		ppi
 device		pps
 device		lpbb
 device		pcfclock
 
 # Kernel BOOTP support
 
 options 	BOOTP		# Use BOOTP to obtain IP address/hostname
 				# Requires NFSCLIENT and NFS_ROOT
 options 	BOOTP_NFSROOT	# NFS mount root filesystem using BOOTP info
 options 	BOOTP_NFSV3	# Use NFS v3 to NFS mount root
 options 	BOOTP_COMPAT	# Workaround for broken bootp daemons.
 options 	BOOTP_WIRED_TO=fxp0 # Use interface fxp0 for BOOTP
 
 #
 # Add software watchdog routines.
 #
 options 	SW_WATCHDOG
 
 #
 # Disable swapping of upages and stack pages.  This option removes all
 # code which actually performs swapping, so it's not possible to turn
 # it back on at run-time.
 #
 # This is sometimes usable for systems which don't have any swap space
 # (see also sysctls "vm.defer_swapspace_pageouts" and
 # "vm.disable_swapspace_pageouts")
 #
 #options 	NO_SWAPPING
 
 # Set the number of sf_bufs to allocate. sf_bufs are virtual buffers
 # for sendfile(2) that are used to map file VM pages, and normally
 # default to a quantity that is roughly 16*MAXUSERS+512. You would
 # typically want about 4 of these for each simultaneous file send.
 #
 options 	NSFBUFS=1024
 
 #
 # Enable extra debugging code for locks.  This stores the filename and
 # line of whatever acquired the lock in the lock itself, and change a
 # number of function calls to pass around the relevant data.  This is
 # not at all useful unless you are debugging lock code.  Also note
 # that it is likely to break e.g. fstat(1) unless you recompile your
 # userland with -DDEBUG_LOCKS as well.
 #
 options 	DEBUG_LOCKS
 
 
 #####################################################################
 # USB support
 # UHCI controller
 device		uhci
 # OHCI controller
 device		ohci
 # EHCI controller
 device		ehci
 # General USB code (mandatory for USB)
 device		usb
 #
 # USB Double Bulk Pipe devices
 device		udbp
 # USB Fm Radio
 device		ufm
 # Generic USB device driver
 device		ugen
 # Human Interface Device (anything with buttons and dials)
 device		uhid
 # USB keyboard
 device		ukbd
 # USB printer
 device		ulpt
 # USB Iomega Zip 100 Drive (Requires scbus and da)
 device		umass
 # USB support for Belkin F5U109 and Magic Control Technology serial adapters
 device		umct
 # USB modem support
 device		umodem
 # USB mouse
 device		ums
 # Diamond Rio 500 Mp3 player
 device		urio
 # USB scanners
 device		uscanner
 #
 # USB serial support
 device		ucom
 # USB support for Belkin F5U103 and compatible serial adapters
 device		ubsa
 # USB support for BWCT console serial adapters
 device		ubser
 # USB support for serial adapters based on the FT8U100AX and FT8U232AM
 device		uftdi
 # USB support for Prolific PL-2303 serial adapters
 device		uplcom
 # USB Visor and Palm devices
 device		uvisor
 # USB serial support for DDI pocket's PHS
 device		uvscom
 #
 # ADMtek USB ethernet. Supports the LinkSys USB100TX,
 # the Billionton USB100, the Melco LU-ATX, the D-Link DSB-650TX
 # and the SMC 2202USB. Also works with the ADMtek AN986 Pegasus
 # eval board.
 device		aue
 
 # ASIX Electronics AX88172 USB 2.0 ethernet driver. Used in the
 # LinkSys USB200M and various other adapters.
 
 device		axe
 
 #
 # CATC USB-EL1201A USB ethernet. Supports the CATC Netmate
 # and Netmate II, and the Belkin F5U111.
 device		cue
 #
 # Kawasaki LSI ethernet. Supports the LinkSys USB10T,
 # Entrega USB-NET-E45, Peracom Ethernet Adapter, the
 # 3Com 3c19250, the ADS Technologies USB-10BT, the ATen UC10T,
 # the Netgear EA101, the D-Link DSB-650, the SMC 2102USB
 # and 2104USB, and the Corega USB-T.
 device		kue
 #
 # RealTek RTL8150 USB to fast ethernet. Supports the Melco LUA-KTX
 # and the GREEN HOUSE GH-USB100B.
 device		rue
 #
 # Davicom DM9601E USB to fast ethernet. Supports the Corega FEther USB-TXC.
 device		udav
 
 
 # debugging options for the USB subsystem
 #
 options 	USB_DEBUG
 
 # options for ukbd:
 options 	UKBD_DFLT_KEYMAP	# specify the built-in keymap
 makeoptions	UKBD_DFLT_KEYMAP=it.iso
 
 # options for uplcom:
 options 	UPLCOM_INTR_INTERVAL=100	# interrupt pipe interval
 						# in milliseconds
 
 # options for uvscom:
 options 	UVSCOM_DEFAULT_OPKTSIZE=8	# default output packet size
 options 	UVSCOM_INTR_INTERVAL=100	# interrupt pipe interval
 						# in milliseconds
 
 #####################################################################
 # FireWire support
 
 device		firewire	# FireWire bus code
 device		sbp		# SCSI over Firewire (Requires scbus and da)
 device		sbp_targ	# SBP-2 Target mode  (Requires scbus and targ)
 device		fwe		# Ethernet over FireWire (non-standard!)
 device		fwip		# IP over FireWire (rfc2734 and rfc3146)
 
 #####################################################################
 # dcons support (Dumb Console Device)
 
 device		dcons			# dumb console driver
 device		dcons_crom		# FireWire attachment
 options 	DCONS_BUF_SIZE=16384	# buffer size
 options 	DCONS_POLL_HZ=100	# polling rate
 options 	DCONS_FORCE_CONSOLE=0	# force to be the primary console
 options 	DCONS_FORCE_GDB=1	# force to be the gdb device
 
 #####################################################################
 # crypto subsystem
 #
 # This is a port of the openbsd crypto framework.  Include this when
 # configuring FAST_IPSEC and when you have a h/w crypto device to accelerate
 # user applications that link to openssl.
 #
 # Drivers are ports from openbsd with some simple enhancements that have
 # been fed back to openbsd.
 
 device		crypto		# core crypto support
 device		cryptodev	# /dev/crypto for access to h/w
 
 device		rndtest		# FIPS 140-2 entropy tester
 
 device		hifn		# Hifn 7951, 7781, etc.
 options 	HIFN_DEBUG	# enable debugging support: hw.hifn.debug
 options 	HIFN_RNDTEST	# enable rndtest support
 
 device		ubsec		# Broadcom 5501, 5601, 58xx
 options 	UBSEC_DEBUG	# enable debugging support: hw.ubsec.debug
 options 	UBSEC_RNDTEST	# enable rndtest support
 
 #####################################################################
 
 
 #
 # Embedded system options:
 #
 # An embedded system might want to run something other than init.
 options 	INIT_PATH=/sbin/init:/stand/sysinstall
 
 # Debug options
 options 	BUS_DEBUG	# enable newbus debugging
 options 	DEBUG_VFS_LOCKS	# enable vfs lock debugging
 options 	SOCKBUF_DEBUG	# enable sockbuf last record/mb tail checking
 
 #####################################################################
 # SYSV IPC KERNEL PARAMETERS
 #
 # Maximum number of entries in a semaphore map.
 options 	SEMMAP=31
 
 # Maximum number of System V semaphores that can be used on the system at
 # one time.
 options 	SEMMNI=11
 
 # Total number of semaphores system wide
 options 	SEMMNS=61
 
 # Total number of undo structures in system
 options 	SEMMNU=31
 
 # Maximum number of System V semaphores that can be used by a single process
 # at one time.
 options 	SEMMSL=61
 
 # Maximum number of operations that can be outstanding on a single System V
 # semaphore at one time.
 options 	SEMOPM=101
 
 # Maximum number of undo operations that can be outstanding on a single
 # System V semaphore at one time.
 options 	SEMUME=11
 
 # Maximum number of shared memory pages system wide.
 options 	SHMALL=1025
 
 # Maximum size, in bytes, of a single System V shared memory region.
 options 	SHMMAX=(SHMMAXPGS*PAGE_SIZE+1)
 options 	SHMMAXPGS=1025
 
 # Minimum size, in bytes, of a single System V shared memory region.
 options 	SHMMIN=2
 
 # Maximum number of shared memory regions that can be used on the system
 # at one time.
 options 	SHMMNI=33
 
 # Maximum number of System V shared memory regions that can be attached to
 # a single process at one time.
 options 	SHMSEG=9
 
 # Set the amount of time (in seconds) the system will wait before
 # rebooting automatically when a kernel panic occurs.  If set to (-1),
 # the system will wait indefinitely until a key is pressed on the
 # console.
 options 	PANIC_REBOOT_WAIT_TIME=16
 
 # Attempt to bypass the buffer cache and put data directly into the
 # userland buffer for read operation when O_DIRECT flag is set on the
 # file.  Both offset and length of the read operation must be
 # multiples of the physical media sector size.
 #
 #options 	DIRECTIO
 
 # Specify a lower limit for the number of swap I/O buffers.  They are
 # (among other things) used when bypassing the buffer cache due to
 # DIRECTIO kernel option enabled and O_DIRECT flag set on file.
 #
 #options 	NSWBUF_MIN=120
 
 #####################################################################
 
 # More undocumented options for linting.
 # Note that documenting these are not considered an affront.
 
 options 	CAM_DEBUG_DELAY
 
 # VFS cluster debugging.
 options 	CLUSTERDEBUG
 
 options 	DEBUG
 
 # Kernel filelock debugging.
 options 	LOCKF_DEBUG
 
 # System V compatible message queues
 # Please note that the values provided here are used to test kernel
 # building.  The defaults in the sources provide almost the same numbers.
 # MSGSSZ must be a power of 2 between 8 and 1024.
 options 	MSGMNB=2049	# Max number of chars in queue
 options 	MSGMNI=41	# Max number of message queue identifiers
 options 	MSGSEG=2049	# Max number of message segments
 options 	MSGSSZ=16	# Size of a message segment
 options 	MSGTQL=41	# Max number of messages in system
 
 options 	NBUF=512	# Number of buffer headers
 
 options 	NMBCLUSTERS=1024	# Number of mbuf clusters
 
 options 	SCSI_NCR_DEBUG
 options 	SCSI_NCR_MAX_SYNC=10000
 options 	SCSI_NCR_MAX_WIDE=1
 options 	SCSI_NCR_MYADDR=7
 
 options 	SC_DEBUG_LEVEL=5	# Syscons debug level
 options 	SC_RENDER_DEBUG	# syscons rendering debugging
 
 options 	SHOW_BUSYBUFS	# List buffers that prevent root unmount
 options 	SLIP_IFF_OPTS
 options 	VFS_BIO_DEBUG	# VFS buffer I/O debugging
 
 options 	KSTACK_MAX_PAGES=32 # Maximum pages to give the kernel stack
 
 # Adaptec Array Controller driver options
 options		AAC_DEBUG	# Debugging levels:
 				# 0 - quiet, only emit warnings
 				# 1 - noisy, emit major function
 				#     points and things done
 				# 2 - extremely noisy, emit trace
 				#     items in loops, etc.
 
 # Yet more undocumented options for linting.
 # BKTR_ALLOC_PAGES has no effect except to cause warnings, and
 # BROOKTREE_ALLOC_PAGES hasn't actually been superseded by it, since the
 # driver still mostly spells this option BROOKTREE_ALLOC_PAGES.
 ##options 	BKTR_ALLOC_PAGES=(217*4+1)
 options 	BROOKTREE_ALLOC_PAGES=(217*4+1)
 options 	MAXFILES=999
 options 	NDEVFSINO=1025
 options 	NDEVFSOVERFLOW=32769
 
 # Yet more undocumented options for linting.
 options 	VGA_DEBUG
diff --git a/sys/conf/options b/sys/conf/options
index eee12c97b2ea..f7ac6d25b5a2 100644
--- a/sys/conf/options
+++ b/sys/conf/options
@@ -1,689 +1,690 @@
 # $FreeBSD$
 #
 #        On the handling of kernel options
 #
 # All kernel options should be listed in NOTES, with suitable
 # descriptions.  Negative options (options that make some code not
 # compile) should be commented out; LINT (generated from NOTES) should
 # compile as much code as possible.  Try to structure option-using
 # code so that a single option only switch code on, or only switch
 # code off, to make it possible to have a full compile-test.  If
 # necessary, you can check for COMPILING_LINT to get maximum code
 # coverage.
 #
 # All new options shall also be listed in either "conf/options" or
 # "conf/options.<machine>".  Options that affect a single source-file
 # <xxx>.[c|s] should be directed into "opt_<xxx>.h", while options
 # that affect multiple files should either go in "opt_global.h" if
 # this is a kernel-wide option (used just about everywhere), or in
 # "opt_<option-name-in-lower-case>.h" if it affect only some files.
 # Note that the effect of listing only an option without a
 # header-file-name in conf/options (and cousins) is that the last
 # convention is followed.
 #
 # This handling scheme is not yet fully implemented.
 #
 #
 # Format of this file:
 # Option name	filename
 #
 # If filename is missing, the default is
 # opt_<name-of-option-in-lower-case>.h
 
 AAC_DEBUG		opt_aac.h
 AHC_ALLOW_MEMIO		opt_aic7xxx.h
 AHC_TMODE_ENABLE	opt_aic7xxx.h
 AHC_DUMP_EEPROM		opt_aic7xxx.h
 AHC_DEBUG		opt_aic7xxx.h
 AHC_DEBUG_OPTS		opt_aic7xxx.h
 AHC_REG_PRETTY_PRINT    opt_aic7xxx.h
 AHD_DEBUG		opt_aic79xx.h
 AHD_DEBUG_OPTS		opt_aic79xx.h
 AHD_TMODE_ENABLE	opt_aic79xx.h	
 AHD_REG_PRETTY_PRINT	opt_aic79xx.h
 ADW_ALLOW_MEMIO		opt_adw.h
 
 TWA_DEBUG		opt_twa.h
 TWA_FLASH_FIRMWARE	opt_twa.h
 
 # Debugging options.
 DDB
 DDB_NUMSYM	opt_ddb.h
 GDB
 GDBSPEED	opt_gdb.h
 KDB		opt_global.h
 KDB_TRACE	opt_kdb.h
 KDB_UNATTENDED	opt_kdb.h
 
 # Miscellaneous options.
 ADAPTIVE_GIANT	opt_adaptive_mutexes.h
 NO_ADAPTIVE_MUTEXES	opt_adaptive_mutexes.h
 ALQ
 CODA_COMPAT_5	opt_coda.h
 COMPAT_43	opt_compat.h
 COMPAT_FREEBSD4	opt_compat.h
 COMPILING_LINT	opt_global.h
 CONSPEED	opt_comconsole.h
 CY_PCI_FASTINTR
 DIRECTIO	opt_directio.h
-FULL_PREEMPTION
+FULL_PREEMPTION	opt_sched.h
+PREEMPTION	opt_sched.h
 GEOM_AES	opt_geom.h
 GEOM_APPLE	opt_geom.h
 GEOM_BDE	opt_geom.h
 GEOM_BSD	opt_geom.h
 GEOM_CONCAT	opt_geom.h
 GEOM_FOX	opt_geom.h
 GEOM_GATE	opt_geom.h
 GEOM_GPT	opt_geom.h
 GEOM_LABEL	opt_geom.h
 GEOM_MBR	opt_geom.h
 GEOM_MIRROR	opt_geom.h
 GEOM_NOP	opt_geom.h
 GEOM_PC98	opt_geom.h
 GEOM_RAID3	opt_geom.h
 GEOM_STRIPE	opt_geom.h
 GEOM_SUNLABEL	opt_geom.h
 GEOM_UZIP	opt_geom.h
 GEOM_VOL	opt_geom.h
 KSTACK_MAX_PAGES
 KSTACK_PAGES
 KTRACE
 KTRACE_REQUEST_POOL	opt_ktrace.h
 LIBICONV
 MAC
 MAC_ALWAYS_LABEL_MBUF	opt_mac.h
 MAC_BIBA	opt_dontuse.h
 MAC_BSDEXTENDED	opt_dontuse.h
 MAC_DEBUG	opt_mac.h
 MAC_IFOFF	opt_dontuse.h
 MAC_LOMAC	opt_dontuse.h
 MAC_MLS		opt_dontuse.h
 MAC_NONE	opt_dontuse.h
 MAC_PARTITION	opt_dontuse.h
 MAC_PORTACL	opt_dontuse.h
 MAC_SEEOTHERUIDS	opt_dontuse.h
 MAC_STATIC	opt_mac.h
 MAC_STUB	opt_dontuse.h
 MAC_TEST	opt_dontuse.h
 MD_ROOT		opt_md.h
 MD_ROOT_SIZE	opt_md.h
 MPROF_BUFFERS	opt_mprof.h
 MPROF_HASH_SIZE	opt_mprof.h
 MUTEX_WAKE_ALL
 NSWBUF_MIN	opt_swap.h
 PANIC_REBOOT_WAIT_TIME	opt_panic.h
 PPS_SYNC	opt_ntp.h
 PUC_FASTINTR	opt_puc.h
 QUOTA
 SCHED_4BSD	opt_sched.h
 SCHED_ULE	opt_sched.h
 SHOW_BUSYBUFS
 SLEEPQUEUE_PROFILING
 SPX_HACK
 SUIDDIR		opt_suiddir.h
 MSGMNB		opt_sysvipc.h
 MSGMNI		opt_sysvipc.h
 MSGSEG		opt_sysvipc.h
 MSGSSZ		opt_sysvipc.h
 MSGTQL		opt_sysvipc.h
 SEMMAP		opt_sysvipc.h
 SEMMNI		opt_sysvipc.h
 SEMMNS		opt_sysvipc.h
 SEMMNU		opt_sysvipc.h
 SEMMSL		opt_sysvipc.h
 SEMOPM		opt_sysvipc.h
 SEMUME		opt_sysvipc.h
 SHMALL		opt_sysvipc.h
 SHMMAX		opt_sysvipc.h
 SHMMAXPGS	opt_sysvipc.h
 SHMMIN		opt_sysvipc.h
 SHMMNI		opt_sysvipc.h
 SHMSEG		opt_sysvipc.h
 SYSVMSG		opt_sysvipc.h
 SYSVSEM		opt_sysvipc.h
 SYSVSHM		opt_sysvipc.h
 SW_WATCHDOG	opt_watchdog.h
 TURNSTILE_PROFILING
 TTYHOG		opt_tty.h
 VFS_AIO
 WLCACHE		opt_wavelan.h
 WLDEBUG		opt_wavelan.h
 
 # POSIX kernel options
 P1003_1B_SEMAPHORES		opt_posix.h
 _KPOSIX_PRIORITY_SCHEDULING	opt_posix.h
 
 # Do we want the config file compiled into the kernel?
 INCLUDE_CONFIG_FILE	opt_config.h
 
 # Options for static filesystems.  These should only be used at config
 # time, since the corresponding lkms cannot work if there are any static
 # dependencies.  Unusability is enforced by hiding the defines for the
 # options in a never-included header.
 CD9660		opt_dontuse.h
 CODA		opt_dontuse.h
 EXT2FS		opt_dontuse.h
 FDESCFS		opt_dontuse.h
 HPFS		opt_dontuse.h
 LINPROCFS	opt_dontuse.h
 MSDOSFS		opt_dontuse.h
 NTFS		opt_dontuse.h
 NULLFS		opt_dontuse.h
 NWFS		opt_dontuse.h
 PORTALFS	opt_dontuse.h
 PROCFS		opt_dontuse.h
 PSEUDOFS	opt_dontuse.h
 SMBFS		opt_dontuse.h
 UDF		opt_dontuse.h
 UMAPFS		opt_dontuse.h
 UNIONFS		opt_dontuse.h
 
 # Broken - ffs_snapshot() dependency from ufs_lookup() :-(
 FFS		opt_ffs_broken_fixme.h
 
 # These static filesystems have one slightly bogus static dependency in
 # sys/i386/i386/autoconf.c.  If any of these filesystems are
 # statically compiled into the kernel, code for mounting them as root
 # filesystems will be enabled - but look below.
 NFSCLIENT	opt_nfs.h
 NFSSERVER	opt_nfs.h
 
 # filesystems and libiconv bridge
 CD9660_ICONV	opt_dontuse.h
 MSDOSFS_ICONV	opt_dontuse.h
 NTFS_ICONV	opt_dontuse.h
 UDF_ICONV	opt_dontuse.h
 
 # If you are following the conditions in the copyright,
 # you can enable soft-updates which will speed up a lot of thigs
 # and make the system safer from crashes at the same time.
 # otherwise a STUB module will be compiled in.
 SOFTUPDATES	opt_ffs.h
 
 # Enabling this option turns on support for Access Control Lists in UFS,
 # which can be used to support high security configurations.  Depends on
 # UFS_EXTATTR.
 UFS_ACL		opt_ufs.h
 
 # Enabling this option turns on support for extended attributes in UFS-based
 # filesystems, which can be used to support high security configurations
 # as well as new filesystem features.
 UFS_EXTATTR	opt_ufs.h
 UFS_EXTATTR_AUTOSTART	opt_ufs.h
 
 # Enable fast hash lookups for large directories on UFS-based filesystems.
 UFS_DIRHASH	opt_ufs.h
 
 # The below sentence is not in English, and neither is this one.
 # We plan to remove the static dependences above, with a
 # <filesystem>_ROOT option to control if it usable as root.  This list
 # allows these options to be present in config files already (though
 # they won't make any difference yet).
 NFS_ROOT	opt_nfsroot.h
 
 # SMB/CIFS requester
 NETSMB			opt_netsmb.h
 NETSMBCRYPTO		opt_netsmb.h
 
 # Experimental support for large MS-DOS filesystems; SEE WARNING IN "NOTES"!
 MSDOSFS_LARGE	opt_msdosfs.h
 
 # Options used only in subr_param.c.
 HZ		opt_param.h
 MAXFILES	opt_param.h
 NBUF		opt_param.h
 NMBCLUSTERS	opt_param.h
 NSFBUFS		opt_param.h
 VM_BCACHE_SIZE_MAX	opt_param.h
 VM_SWZONE_SIZE_MAX	opt_param.h
 MAXUSERS
 DFLDSIZ		opt_param.h
 MAXDSIZ		opt_param.h
 MAXSSIZ		opt_param.h
 
 # Generic SCSI options.
 CAM_MAX_HIGHPOWER	opt_cam.h
 CAMDEBUG		opt_cam.h
 CAM_DEBUG_DELAY		opt_cam.h
 CAM_DEBUG_BUS		opt_cam.h
 CAM_DEBUG_TARGET	opt_cam.h
 CAM_DEBUG_LUN		opt_cam.h
 CAM_DEBUG_FLAGS		opt_cam.h
 CAM_NEW_TRAN_CODE	opt_cam.h
 SCSI_DELAY		opt_scsi.h
 SCSI_NO_SENSE_STRINGS	opt_scsi.h
 SCSI_NO_OP_STRINGS	opt_scsi.h
 
 # Options used only in cam/scsi/scsi_cd.c
 CHANGER_MIN_BUSY_SECONDS	opt_cd.h
 CHANGER_MAX_BUSY_SECONDS	opt_cd.h
 
 # Options used only in cam/scsi/scsi_sa.c.
 SA_IO_TIMEOUT		opt_sa.h
 SA_SPACE_TIMEOUT	opt_sa.h
 SA_REWIND_TIMEOUT	opt_sa.h
 SA_ERASE_TIMEOUT	opt_sa.h
 SA_1FM_AT_EOD		opt_sa.h
 
 # Options used only in cam/scsi/scsi_pt.c
 SCSI_PT_DEFAULT_TIMEOUT	opt_pt.h
 
 # Options used only in cam/scsi/scsi_ses.c
 SES_ENABLE_PASSTHROUGH	opt_ses.h
 
 # Options used in dev/sym/ (Symbios SCSI driver).
 SYM_SETUP_LP_PROBE_MAP	opt_sym.h	#-Low Priority Probe Map (bits)
 					# Allows the ncr to take precedence
 					# 1 (1<<0) -> 810a, 860
 					# 2 (1<<1) -> 825a, 875, 885, 895
 					# 4 (1<<2) -> 895a, 896, 1510d 
 SYM_SETUP_SCSI_DIFF	opt_sym.h	#-HVD support for 825a, 875, 885
 					# disabled:0 (default), enabled:1
 SYM_SETUP_PCI_PARITY	opt_sym.h	#-PCI parity checking
 					# disabled:0, enabled:1 (default)
 SYM_SETUP_MAX_LUN	opt_sym.h	#-Number of LUNs supported
 					# default:8, range:[1..64]
 
 # Options used only in pci/ncr.c
 SCSI_NCR_DEBUG		opt_ncr.h
 SCSI_NCR_MAX_SYNC	opt_ncr.h
 SCSI_NCR_MAX_WIDE	opt_ncr.h
 SCSI_NCR_MYADDR		opt_ncr.h
 
 # Options used only in dev/isp/*
 ISP_TARGET_MODE		opt_isp.h
 ISP_FW_CRASH_DUMP	opt_isp.h
 
 # Options used in the 'ata' ATA/ATAPI driver
 ATA_STATIC_ID		opt_ata.h
 ATA_NOPCI		opt_ata.h
 DEV_ATADISK		opt_ata.h
 DEV_ATAPICD		opt_ata.h
 DEV_ATAPIST		opt_ata.h
 DEV_ATAPIFD		opt_ata.h
 DEV_ATAPICAM		opt_ata.h
 DEV_ATARAID		opt_ata.h
 
 # Net stuff.
 ACCEPT_FILTER_DATA
 ACCEPT_FILTER_HTTP
 ALTQ			opt_global.h
 ALTQ_CBQ		opt_altq.h
 ALTQ_RED		opt_altq.h
 ALTQ_RIO		opt_altq.h
 ALTQ_HFSC		opt_altq.h
 ALTQ_CDNR		opt_altq.h
 ALTQ_PRIQ		opt_altq.h
 ALTQ_NOPCC		opt_altq.h
 ALTQ_DEBUG		opt_altq.h
 BOOTP			opt_bootp.h
 BOOTP_COMPAT		opt_bootp.h
 BOOTP_NFSROOT		opt_bootp.h
 BOOTP_NFSV3		opt_bootp.h
 BOOTP_WIRED_TO		opt_bootp.h
 BRIDGE			opt_bdg.h
 DEV_PF			opt_pf.h
 DEV_PFLOG		opt_pf.h
 DEV_PFSYNC		opt_pf.h
 ETHER_II		opt_ef.h
 ETHER_8023		opt_ef.h
 ETHER_8022		opt_ef.h
 ETHER_SNAP		opt_ef.h
 MROUTING		opt_mrouting.h
 PIM			opt_mrouting.h
 INET			opt_inet.h
 INET6			opt_inet6.h
 IPSEC			opt_ipsec.h
 IPSEC_ESP		opt_ipsec.h
 IPSEC_DEBUG		opt_ipsec.h
 IPSEC_FILTERGIF		opt_ipsec.h
 FAST_IPSEC		opt_ipsec.h
 IPDIVERT
 DUMMYNET		opt_ipdn.h
 IPFILTER		opt_ipfilter.h
 IPFILTER_LOG		opt_ipfilter.h
 IPFILTER_DEFAULT_BLOCK	opt_ipfilter.h
 IPFIREWALL		opt_ipfw.h
 IPFIREWALL_VERBOSE	opt_ipfw.h
 IPFIREWALL_VERBOSE_LIMIT	opt_ipfw.h
 IPFIREWALL_DEFAULT_TO_ACCEPT	opt_ipfw.h
 IPFIREWALL_FORWARD	opt_ipfw.h
 IPV6FIREWALL		opt_ip6fw.h
 IPV6FIREWALL_VERBOSE	opt_ip6fw.h
 IPV6FIREWALL_VERBOSE_LIMIT	opt_ip6fw.h
 IPV6FIREWALL_DEFAULT_TO_ACCEPT	opt_ip6fw.h
 IPSTEALTH
 IPX
 IPXIP			opt_ipx.h
 LIBMBPOOL
 LIBMCHAIN
 MBUF_STRESS_TEST	opt_mbuf_stress_test.h
 NCP
 NETATALK		opt_atalk.h
 NET_WITH_GIANT		opt_net.h
 PPP_BSDCOMP		opt_ppp.h
 PPP_DEFLATE		opt_ppp.h
 PPP_FILTER		opt_ppp.h
 SLIP_IFF_OPTS		opt_slip.h
 TCPDEBUG
 TCP_SIGNATURE		opt_inet.h
 TCP_SACK_DEBUG		opt_tcp_sack.h 
 TCP_DROP_SYNFIN		opt_tcp_input.h
 XBONEHACK
 
 # Netgraph(4). Use option NETGRAPH to enable the base netgraph code.
 # Each netgraph node type can be either be compiled into the kernel
 # or loaded dynamically. To get the former, include the corresponding
 # option below. Each type has its own man page, e.g. ng_async(4).
 NETGRAPH
 NETGRAPH_ASYNC		opt_netgraph.h
 NETGRAPH_ATMLLC		opt_netgraph.h
 NETGRAPH_ATM_ATMPIF	opt_netgraph.h
 NETGRAPH_BLUETOOTH	opt_netgraph.h
 NETGRAPH_BLUETOOTH_BT3C	opt_netgraph.h
 NETGRAPH_BLUETOOTH_H4	opt_netgraph.h
 NETGRAPH_BLUETOOTH_HCI	opt_netgraph.h
 NETGRAPH_BLUETOOTH_L2CAP	opt_netgraph.h
 NETGRAPH_BLUETOOTH_SOCKET	opt_netgraph.h
 NETGRAPH_BLUETOOTH_UBT	opt_netgraph.h
 NETGRAPH_BLUETOOTH_UBTBCMFW	opt_netgraph.h
 NETGRAPH_BPF		opt_netgraph.h
 NETGRAPH_BRIDGE		opt_netgraph.h
 NETGRAPH_CISCO		opt_netgraph.h
 NETGRAPH_DEVICE		opt_netgraph.h
 NETGRAPH_ECHO		opt_netgraph.h
 NETGRAPH_EIFACE		opt_netgraph.h
 NETGRAPH_ETHER		opt_netgraph.h
 NETGRAPH_FEC		opt_netgraph.h
 NETGRAPH_FRAME_RELAY	opt_netgraph.h
 NETGRAPH_GIF		opt_netgraph.h
 NETGRAPH_GIF_DEMUX	opt_netgraph.h
 NETGRAPH_HOLE		opt_netgraph.h
 NETGRAPH_IFACE		opt_netgraph.h
 NETGRAPH_IP_INPUT	opt_netgraph.h
 NETGRAPH_KSOCKET	opt_netgraph.h
 NETGRAPH_L2TP		opt_netgraph.h
 NETGRAPH_LMI		opt_netgraph.h
 # MPPC compression requires proprietary files (not included)
 NETGRAPH_MPPC_COMPRESSION	opt_netgraph.h
 NETGRAPH_MPPC_ENCRYPTION	opt_netgraph.h
 NETGRAPH_ONE2MANY	opt_netgraph.h
 NETGRAPH_PPP		opt_netgraph.h
 NETGRAPH_PPPOE		opt_netgraph.h
 NETGRAPH_PPTPGRE	opt_netgraph.h
 NETGRAPH_RFC1490	opt_netgraph.h
 NETGRAPH_SOCKET		opt_netgraph.h
 NETGRAPH_SPLIT		opt_netgraph.h
 NETGRAPH_SPPP		opt_netgraph.h
 NETGRAPH_TEE		opt_netgraph.h
 NETGRAPH_TTY		opt_netgraph.h
 NETGRAPH_UI		opt_netgraph.h
 NETGRAPH_VJC		opt_netgraph.h
 
 # NgATM options
 NGATM_ATM		opt_netgraph.h
 NGATM_ATMBASE		opt_netgraph.h
 NGATM_SSCOP		opt_netgraph.h
 NGATM_SSCFU		opt_netgraph.h
 NGATM_UNI		opt_netgraph.h
 NGATM_CCATM		opt_netgraph.h
 
 # DRM options
 DRM_DEBUG		opt_drm.h
 
 ZERO_COPY_SOCKETS	opt_zero.h
 TI_PRIVATE_JUMBOS	opt_ti.h
 TI_JUMBO_HDRSPLIT	opt_ti.h
 
 # ATM (HARP version)
 ATM_CORE		opt_atm.h
 ATM_IP			opt_atm.h
 ATM_SIGPVC		opt_atm.h
 ATM_SPANS		opt_atm.h
 ATM_UNI			opt_atm.h
 
 # XXX Conflict: # of devices vs network protocol (Native ATM).
 # This makes "atm.h" unusable.
 NATM
 
 # DPT driver debug flags
 DPT_MEASURE_PERFORMANCE	opt_dpt.h
 DPT_HANDLE_TIMEOUTS	opt_dpt.h
 DPT_TIMEOUT_FACTOR	opt_dpt.h
 DPT_LOST_IRQ		opt_dpt.h
 DPT_RESET_HBA		opt_dpt.h
 
 # Misc debug flags.  Most of these should probably be replaced with
 # 'DEBUG', and then let people recompile just the interesting modules
 # with 'make CC="cc -DDEBUG"'.
 CLUSTERDEBUG		opt_debug_cluster.h
 DEBUG_1284		opt_ppb_1284.h
 VP0_DEBUG		opt_vpo.h
 LPT_DEBUG		opt_lpt.h
 PLIP_DEBUG		opt_plip.h
 LOCKF_DEBUG		opt_debug_lockf.h
 NPX_DEBUG		opt_debug_npx.h
 NETATALKDEBUG		opt_atalk.h
 SI_DEBUG		opt_debug_si.h
 SX_DEBUG		opt_debug_sx.h
 
 # Fb options
 FB_DEBUG		opt_fb.h
 FB_INSTALL_CDEV		opt_fb.h
 
 # ppbus related options
 PERIPH_1284		opt_ppb_1284.h
 DONTPROBE_1284		opt_ppb_1284.h
 
 # smbus related options
 ENABLE_ALART		opt_intpm.h
 
 # These cause changes all over the kernel
 BLKDEV_IOSIZE		opt_global.h
 BURN_BRIDGES		opt_global.h
 DEBUG			opt_global.h
 DEBUG_LOCKS		opt_global.h
 DEBUG_VFS_LOCKS		opt_global.h
 DIAGNOSTIC		opt_global.h
 INVARIANT_SUPPORT	opt_global.h
 INVARIANTS		opt_global.h
 LOOKUP_SHARED		opt_global.h
 MCLSHIFT		opt_global.h
 MUTEX_DEBUG		opt_global.h
 MUTEX_NOINLINE		opt_global.h
 MUTEX_PROFILING		opt_global.h
 MSIZE			opt_global.h
 REGRESSION		opt_global.h
 RESTARTABLE_PANICS	opt_global.h
 VFS_BIO_DEBUG		opt_global.h
 
 # These are VM related options
 VM_KMEM_SIZE		opt_vm.h
 VM_KMEM_SIZE_SCALE	opt_vm.h
 VM_KMEM_SIZE_MAX	opt_vm.h
 NO_SWAPPING		opt_vm.h
 MALLOC_MAKE_FAILURES	opt_vm.h
 MALLOC_PROFILE		opt_vm.h
 PQ_NOOPT		opt_vmpage.h
 PQ_NORMALCACHE		opt_vmpage.h
 PQ_MEDIUMCACHE		opt_vmpage.h
 PQ_LARGECACHE		opt_vmpage.h
 PQ_HUGECACHE		opt_vmpage.h
 PQ_CACHESIZE		opt_vmpage.h
 
 # Standard SMP options
 SMP			opt_global.h
 
 # Size of the kernel message buffer
 MSGBUF_SIZE		opt_msgbuf.h
 
 # NFS options
 NFS_MINATTRTIMO		opt_nfs.h
 NFS_MAXATTRTIMO		opt_nfs.h
 NFS_MINDIRATTRTIMO	opt_nfs.h
 NFS_MAXDIRATTRTIMO	opt_nfs.h
 NFS_GATHERDELAY		opt_nfs.h
 NFS_WDELAYHASHSIZ	opt_nfs.h
 NFS_DEBUG		opt_nfs.h
 
 # For the Bt848/Bt848A/Bt849/Bt878/Bt879 driver
 OVERRIDE_CARD			opt_bktr.h
 OVERRIDE_TUNER			opt_bktr.h
 OVERRIDE_DBX			opt_bktr.h
 OVERRIDE_MSP			opt_bktr.h
 BROOKTREE_SYSTEM_DEFAULT	opt_bktr.h
 BROOKTREE_ALLOC_PAGES		opt_bktr.h
 BKTR_OVERRIDE_CARD		opt_bktr.h
 BKTR_OVERRIDE_TUNER		opt_bktr.h
 BKTR_OVERRIDE_DBX		opt_bktr.h
 BKTR_OVERRIDE_MSP		opt_bktr.h
 BKTR_SYSTEM_DEFAULT		opt_bktr.h
 BKTR_ALLOC_PAGES		opt_bktr.h
 BKTR_USE_PLL			opt_bktr.h	
 BKTR_GPIO_ACCESS		opt_bktr.h
 BKTR_NO_MSP_RESET		opt_bktr.h
 BKTR_430_FX_MODE		opt_bktr.h
 BKTR_SIS_VIA_MODE		opt_bktr.h
 BKTR_USE_FREEBSD_SMBUS		opt_bktr.h
 BKTR_NEW_MSP34XX_DRIVER		opt_bktr.h
 
 # options for serial support
 COM_ESP			opt_sio.h
 COM_MULTIPORT		opt_sio.h
 BREAK_TO_DEBUGGER	opt_comconsole.h
 ALT_BREAK_TO_DEBUGGER	opt_comconsole.h
 
 # Options to support PPS
 UART_PPS_ON_CTS		opt_uart.h
 
 # options for bus/device framework
 BUS_DEBUG		opt_bus.h
 
 # options for USB support
 USB_DEBUG		opt_usb.h
 USBVERBOSE		opt_usb.h
 UKBD_DFLT_KEYMAP	opt_ukbd.h
 UPLCOM_INTR_INTERVAL	opt_uplcom.h
 UVSCOM_DEFAULT_OPKTSIZE	opt_uvscom.h
 UVSCOM_INTR_INTERVAL	opt_uvscom.h
 
 # Vinum options
 VINUMDEBUG		opt_vinum.h
 
 # Embedded system options
 INIT_PATH		opt_init_path.h
 
 ROOTDEVNAME		opt_rootdevname.h
 
 FDC_DEBUG		opt_fdc.h
 PCFCLOCK_VERBOSE	opt_pcfclock.h
 PCFCLOCK_MAX_RETRIES	opt_pcfclock.h
 TDFX_LINUX		opt_tdfx.h
 
 KTR			opt_global.h
 KTR_ALQ			opt_ktr.h
 KTR_MASK		opt_ktr.h
 KTR_CPUMASK		opt_ktr.h
 KTR_COMPILE		opt_global.h
 KTR_ENTRIES		opt_global.h
 KTR_VERBOSE		opt_ktr.h
 WITNESS			opt_global.h
 WITNESS_KDB		opt_witness.h
 WITNESS_SKIPSPIN	opt_witness.h
 
 # options for ACPI support
 ACPI_DEBUG		opt_acpi.h
 ACPI_MAX_THREADS	opt_acpi.h
 ACPI_NO_SEMAPHORES	opt_acpi.h
 ACPICA_PEDANTIC		opt_acpi.h
 
 # options for DEVFS, see sys/fs/devfs/devfs.h
 NDEVFSINO		opt_devfs.h
 NDEVFSOVERFLOW		opt_devfs.h
 
 # various 'device presence' options.
 DEV_BPF			opt_bpf.h
 DEV_ISA			opt_isa.h
 DEV_MCA			opt_mca.h
 DEV_SPLASH		opt_splash.h
 
 EISA_SLOTS		opt_eisa.h
 
 # ed driver
 ED_NO_MIIBUS		opt_ed.h
 
 # wi driver
 WI_SYMBOL_FIRMWARE	opt_wi.h
 
 # XXX bogusly global.
 DEVICE_POLLING		opt_global.h
 
 SOCKBUF_DEBUG		opt_global.h
 
 # options for ubsec driver
 UBSEC_DEBUG		opt_ubsec.h
 UBSEC_RNDTEST		opt_ubsec.h
 UBSEC_NO_RNG		opt_ubsec.h
 
 # options for hifn driver
 HIFN_DEBUG		opt_hifn.h
 HIFN_RNDTEST		opt_hifn.h
 
 # options for safenet driver
 SAFE_DEBUG		opt_safe.h
 SAFE_NO_RNG		opt_safe.h
 SAFE_RNDTEST		opt_safe.h
 
 # syscons options
 MAXCONS			opt_syscons.h
 SC_ALT_MOUSE_IMAGE	opt_syscons.h
 SC_CUT_SPACES2TABS	opt_syscons.h
 SC_CUT_SEPCHARS		opt_syscons.h
 SC_DEBUG_LEVEL		opt_syscons.h
 SC_DFLT_FONT		opt_syscons.h
 SC_DISABLE_KDBKEY	opt_syscons.h
 SC_DISABLE_REBOOT	opt_syscons.h
 SC_HISTORY_SIZE		opt_syscons.h
 SC_KERNEL_CONS_ATTR	opt_syscons.h
 SC_KERNEL_CONS_REV_ATTR	opt_syscons.h
 SC_MOUSE_CHAR		opt_syscons.h
 SC_NO_CUTPASTE		opt_syscons.h
 SC_NO_FONT_LOADING	opt_syscons.h
 SC_NO_HISTORY		opt_syscons.h
 SC_NO_SUSPEND_VTYSWITCH	opt_syscons.h
 SC_NO_SYSMOUSE		opt_syscons.h
 SC_NORM_ATTR		opt_syscons.h
 SC_NORM_REV_ATTR	opt_syscons.h
 SC_PIXEL_MODE		opt_syscons.h
 SC_RENDER_DEBUG		opt_syscons.h
 SC_TWOBUTTON_MOUSE	opt_syscons.h
 
 # kbd options
 KBD_DISABLE_KEYMAP_LOAD	opt_kbd.h
 KBD_INSTALL_CDEV	opt_kbd.h
 KBD_MAXRETRY		opt_kbd.h
 KBD_MAXWAIT		opt_kbd.h
 KBD_RESETDELAY		opt_kbd.h
 KBDIO_DEBUG		opt_kbd.h
 
 # options for the Atheros HAL (only useful with source code)
 AH_SUPPORT_AR5210	opt_ah.h
 AH_SUPPORT_AR5211	opt_ah.h
 AH_SUPPORT_AR5212	opt_ah.h
 AH_DEBUG		opt_ah.h
 AH_DEBUG_ALQ		opt_ah.h
 AH_ASSERT		opt_ah.h
 
 # dcons options 
 DCONS_BUF_SIZE		opt_dcons.h
 DCONS_POLL_HZ		opt_dcons.h
 DCONS_FORCE_CONSOLE	opt_dcons.h
 DCONS_FORCE_GDB		opt_dcons.h
 
 # Static unit counts
 NI4BTRC			opt_i4b.h
 NI4BRBCH		opt_i4b.h
 NI4BTEL			opt_i4b.h
 NI4BIPR			opt_i4b.h
 NI4BING			opt_i4b.h
 NI4BISPPP		opt_i4b.h
diff --git a/sys/i386/include/param.h b/sys/i386/include/param.h
index bb9c7a7df750..a4064e3d9d4c 100644
--- a/sys/i386/include/param.h
+++ b/sys/i386/include/param.h
@@ -1,147 +1,142 @@
 /*-
  * Copyright (c) 1990 The Regents of the University of California.
  * All rights reserved.
  *
  * This code is derived from software contributed to Berkeley by
  * William Jolitz.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 4. Neither the name of the University nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  *	from: @(#)param.h	5.8 (Berkeley) 6/28/91
  * $FreeBSD$
  */
 
 /*
  * Machine dependent constants for Intel 386.
  */
 
 /*
  * Round p (pointer or byte index) up to a correctly-aligned value
  * for all data types (int, long, ...).   The result is unsigned int
  * and must be cast to any desired pointer type.
  */
 #ifndef _ALIGNBYTES
 #define _ALIGNBYTES	(sizeof(int) - 1)
 #endif
 #ifndef _ALIGN
 #define _ALIGN(p)	(((unsigned)(p) + _ALIGNBYTES) & ~_ALIGNBYTES)
 #endif
 
 #ifndef _MACHINE
 #define	_MACHINE	i386
 #endif
 #ifndef _MACHINE_ARCH
 #define	_MACHINE_ARCH	i386
 #endif
 
 #ifndef _NO_NAMESPACE_POLLUTION
 
 #ifndef _MACHINE_PARAM_H_
 #define	_MACHINE_PARAM_H_
 
 #ifndef MACHINE
 #define MACHINE		"i386"
 #endif
 #ifndef MACHINE_ARCH
 #define	MACHINE_ARCH	"i386"
 #endif
 #define MID_MACHINE	MID_I386
 
 #ifdef SMP
 #define MAXCPU		16
 #else
 #define MAXCPU		1
 #endif /* SMP */
 
 #define ALIGNBYTES	_ALIGNBYTES
 #define ALIGN(p)	_ALIGN(p)
 
 #define PAGE_SHIFT	12		/* LOG2(PAGE_SIZE) */
 #define PAGE_SIZE	(1<<PAGE_SHIFT)	/* bytes/page */
 #define PAGE_MASK	(PAGE_SIZE-1)
 #define NPTEPG		(PAGE_SIZE/(sizeof (pt_entry_t)))
 
 #ifdef PAE
 #define NPGPTD		4
 #define PDRSHIFT	21		/* LOG2(NBPDR) */
 #else
 #define NPGPTD		1
 #define PDRSHIFT	22		/* LOG2(NBPDR) */
 #endif
 
 #define NBPTD		(NPGPTD<<PAGE_SHIFT)
 #define NPDEPTD		(NBPTD/(sizeof (pd_entry_t)))
 #define NPDEPG		(PAGE_SIZE/(sizeof (pd_entry_t)))
 #define NBPDR		(1<<PDRSHIFT)	/* bytes/page dir */
 #define PDRMASK		(NBPDR-1)
 
-/* PREEMPTION exposes scheduler bugs that need to be fixed. */
-#if 0
-#define	PREEMPTION
-#endif
-
 #define IOPAGES	2		/* pages of i/o permission bitmap */
 
 #ifndef KSTACK_PAGES
 #define KSTACK_PAGES 2		/* Includes pcb! */
 #endif
 #define KSTACK_GUARD_PAGES 1	/* pages of kstack guard; 0 disables */
 #define UAREA_PAGES 1		/* holds struct user WITHOUT PCB (see def.) */
 
 /*
  * Ceiling on amount of swblock kva space, can be changed via
  * the kern.maxswzone /boot/loader.conf variable.
  */
 #ifndef VM_SWZONE_SIZE_MAX
 #define VM_SWZONE_SIZE_MAX	(32 * 1024 * 1024)
 #endif
 
 /*
  * Ceiling on size of buffer cache (really only effects write queueing,
  * the VM page cache is not effected), can be changed via
  * the kern.maxbcache /boot/loader.conf variable.
  */
 #ifndef VM_BCACHE_SIZE_MAX
 #define VM_BCACHE_SIZE_MAX	(200 * 1024 * 1024)
 #endif
 
 /*
  * Mach derived conversion macros
  */
 #define trunc_page(x)		((x) & ~PAGE_MASK)
 #define round_page(x)		(((x) + PAGE_MASK) & ~PAGE_MASK)
 #define trunc_4mpage(x)		((x) & ~PDRMASK)
 #define round_4mpage(x)		((((x)) + PDRMASK) & ~PDRMASK)
 
 #define atop(x)			((x) >> PAGE_SHIFT)
 #define ptoa(x)			((x) << PAGE_SHIFT)
 
 #define i386_btop(x)		((x) >> PAGE_SHIFT)
 #define i386_ptob(x)		((x) << PAGE_SHIFT)
 
 #define	pgtok(x)		((x) * (PAGE_SIZE / 1024))
 
 #endif /* !_MACHINE_PARAM_H_ */
 #endif /* !_NO_NAMESPACE_POLLUTION */
diff --git a/sys/kern/kern_mutex.c b/sys/kern/kern_mutex.c
index c041886663ea..eb345bff2d3f 100644
--- a/sys/kern/kern_mutex.c
+++ b/sys/kern/kern_mutex.c
@@ -1,898 +1,899 @@
 /*-
  * Copyright (c) 1998 Berkeley Software Design, Inc. All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 3. Berkeley Software Design Inc's name may not be used to endorse or
  *    promote products derived from this software without specific prior
  *    written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY BERKELEY SOFTWARE DESIGN INC ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL BERKELEY SOFTWARE DESIGN INC BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  *	from BSDI $Id: mutex_witness.c,v 1.1.2.20 2000/04/27 03:10:27 cp Exp $
  *	and BSDI $Id: synch_machdep.c,v 2.3.2.39 2000/04/27 03:10:25 cp Exp $
  */
 
 /*
  * Machine independent bits of mutex implementation.
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include "opt_adaptive_mutexes.h"
 #include "opt_ddb.h"
 #include "opt_mprof.h"
 #include "opt_mutex_wake_all.h"
+#include "opt_sched.h"
 
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/bus.h>
 #include <sys/kdb.h>
 #include <sys/kernel.h>
 #include <sys/ktr.h>
 #include <sys/lock.h>
 #include <sys/malloc.h>
 #include <sys/mutex.h>
 #include <sys/proc.h>
 #include <sys/resourcevar.h>
 #include <sys/sched.h>
 #include <sys/sbuf.h>
 #include <sys/sysctl.h>
 #include <sys/turnstile.h>
 #include <sys/vmmeter.h>
 
 #include <machine/atomic.h>
 #include <machine/bus.h>
 #include <machine/clock.h>
 #include <machine/cpu.h>
 
 #include <ddb/ddb.h>
 
 #include <vm/vm.h>
 #include <vm/vm_extern.h>
 
 /*
  * Internal utility macros.
  */
 #define mtx_unowned(m)	((m)->mtx_lock == MTX_UNOWNED)
 
 #define mtx_owner(m)	(mtx_unowned((m)) ? NULL \
 	: (struct thread *)((m)->mtx_lock & MTX_FLAGMASK))
 
 /*
  * Lock classes for sleep and spin mutexes.
  */
 struct lock_class lock_class_mtx_sleep = {
 	"sleep mutex",
 	LC_SLEEPLOCK | LC_RECURSABLE
 };
 struct lock_class lock_class_mtx_spin = {
 	"spin mutex",
 	LC_SPINLOCK | LC_RECURSABLE
 };
 
 /*
  * System-wide mutexes
  */
 struct mtx sched_lock;
 struct mtx Giant;
 
 #ifdef MUTEX_PROFILING
 SYSCTL_NODE(_debug, OID_AUTO, mutex, CTLFLAG_RD, NULL, "mutex debugging");
 SYSCTL_NODE(_debug_mutex, OID_AUTO, prof, CTLFLAG_RD, NULL, "mutex profiling");
 static int mutex_prof_enable = 0;
 SYSCTL_INT(_debug_mutex_prof, OID_AUTO, enable, CTLFLAG_RW,
     &mutex_prof_enable, 0, "Enable tracing of mutex holdtime");
 
 struct mutex_prof {
 	const char	*name;
 	const char	*file;
 	int		line;
 	uintmax_t	cnt_max;
 	uintmax_t	cnt_tot;
 	uintmax_t	cnt_cur;
 	uintmax_t	cnt_contest_holding;
 	uintmax_t	cnt_contest_locking;
 	struct mutex_prof *next;
 };
 
 /*
  * mprof_buf is a static pool of profiling records to avoid possible
  * reentrance of the memory allocation functions.
  *
  * Note: NUM_MPROF_BUFFERS must be smaller than MPROF_HASH_SIZE.
  */
 #ifdef MPROF_BUFFERS
 #define NUM_MPROF_BUFFERS	MPROF_BUFFERS
 #else
 #define	NUM_MPROF_BUFFERS	1000
 #endif
 static struct mutex_prof mprof_buf[NUM_MPROF_BUFFERS];
 static int first_free_mprof_buf;
 #ifndef MPROF_HASH_SIZE
 #define	MPROF_HASH_SIZE		1009
 #endif
 #if NUM_MPROF_BUFFERS >= MPROF_HASH_SIZE
 #error MPROF_BUFFERS must be larger than MPROF_HASH_SIZE
 #endif
 static struct mutex_prof *mprof_hash[MPROF_HASH_SIZE];
 /* SWAG: sbuf size = avg stat. line size * number of locks */
 #define MPROF_SBUF_SIZE		256 * 400
 
 static int mutex_prof_acquisitions;
 SYSCTL_INT(_debug_mutex_prof, OID_AUTO, acquisitions, CTLFLAG_RD,
     &mutex_prof_acquisitions, 0, "Number of mutex acquistions recorded");
 static int mutex_prof_records;
 SYSCTL_INT(_debug_mutex_prof, OID_AUTO, records, CTLFLAG_RD,
     &mutex_prof_records, 0, "Number of profiling records");
 static int mutex_prof_maxrecords = NUM_MPROF_BUFFERS;
 SYSCTL_INT(_debug_mutex_prof, OID_AUTO, maxrecords, CTLFLAG_RD,
     &mutex_prof_maxrecords, 0, "Maximum number of profiling records");
 static int mutex_prof_rejected;
 SYSCTL_INT(_debug_mutex_prof, OID_AUTO, rejected, CTLFLAG_RD,
     &mutex_prof_rejected, 0, "Number of rejected profiling records");
 static int mutex_prof_hashsize = MPROF_HASH_SIZE;
 SYSCTL_INT(_debug_mutex_prof, OID_AUTO, hashsize, CTLFLAG_RD,
     &mutex_prof_hashsize, 0, "Hash size");
 static int mutex_prof_collisions = 0;
 SYSCTL_INT(_debug_mutex_prof, OID_AUTO, collisions, CTLFLAG_RD,
     &mutex_prof_collisions, 0, "Number of hash collisions");
 
 /*
  * mprof_mtx protects the profiling buffers and the hash.
  */
 static struct mtx mprof_mtx;
 MTX_SYSINIT(mprof, &mprof_mtx, "mutex profiling lock", MTX_SPIN | MTX_QUIET);
 
 static u_int64_t
 nanoseconds(void)
 {
 	struct timespec tv;
 
 	nanotime(&tv);
 	return (tv.tv_sec * (u_int64_t)1000000000 + tv.tv_nsec);
 }
 
 static int
 dump_mutex_prof_stats(SYSCTL_HANDLER_ARGS)
 {
 	struct sbuf *sb;
 	int error, i;
 	static int multiplier = 1;
 
 	if (first_free_mprof_buf == 0)
 		return (SYSCTL_OUT(req, "No locking recorded",
 		    sizeof("No locking recorded")));
 
 retry_sbufops:
 	sb = sbuf_new(NULL, NULL, MPROF_SBUF_SIZE * multiplier, SBUF_FIXEDLEN);
 	sbuf_printf(sb, "%6s %12s %11s %5s %12s %12s %s\n",
 	    "max", "total", "count", "avg", "cnt_hold", "cnt_lock", "name");
 	/*
 	 * XXX this spinlock seems to be by far the largest perpetrator
 	 * of spinlock latency (1.6 msec on an Athlon1600 was recorded
 	 * even before I pessimized it further by moving the average
 	 * computation here).
 	 */
 	mtx_lock_spin(&mprof_mtx);
 	for (i = 0; i < first_free_mprof_buf; ++i) {
 		sbuf_printf(sb, "%6ju %12ju %11ju %5ju %12ju %12ju %s:%d (%s)\n",
 		    mprof_buf[i].cnt_max / 1000,
 		    mprof_buf[i].cnt_tot / 1000,
 		    mprof_buf[i].cnt_cur,
 		    mprof_buf[i].cnt_cur == 0 ? (uintmax_t)0 :
 			mprof_buf[i].cnt_tot / (mprof_buf[i].cnt_cur * 1000),
 		    mprof_buf[i].cnt_contest_holding,
 		    mprof_buf[i].cnt_contest_locking,
 		    mprof_buf[i].file, mprof_buf[i].line, mprof_buf[i].name);
 		if (sbuf_overflowed(sb)) {
 			mtx_unlock_spin(&mprof_mtx);
 			sbuf_delete(sb);
 			multiplier++;
 			goto retry_sbufops;
 		}
 	}
 	mtx_unlock_spin(&mprof_mtx);
 	sbuf_finish(sb);
 	error = SYSCTL_OUT(req, sbuf_data(sb), sbuf_len(sb) + 1);
 	sbuf_delete(sb);
 	return (error);
 }
 SYSCTL_PROC(_debug_mutex_prof, OID_AUTO, stats, CTLTYPE_STRING | CTLFLAG_RD,
     NULL, 0, dump_mutex_prof_stats, "A", "Mutex profiling statistics");
 
 static int
 reset_mutex_prof_stats(SYSCTL_HANDLER_ARGS)
 {
 	int error, v;
 
 	if (first_free_mprof_buf == 0)
 		return (0);
 
 	v = 0;
 	error = sysctl_handle_int(oidp, &v, 0, req);
 	if (error)
 		return (error);
 	if (req->newptr == NULL)
 		return (error);
 	if (v == 0)
 		return (0);
 
 	mtx_lock_spin(&mprof_mtx);
 	bzero(mprof_buf, sizeof(*mprof_buf) * first_free_mprof_buf);
 	bzero(mprof_hash, sizeof(struct mtx *) * MPROF_HASH_SIZE);
 	first_free_mprof_buf = 0;
 	mtx_unlock_spin(&mprof_mtx);
 	return (0);
 }
 SYSCTL_PROC(_debug_mutex_prof, OID_AUTO, reset, CTLTYPE_INT | CTLFLAG_RW,
     NULL, 0, reset_mutex_prof_stats, "I", "Reset mutex profiling statistics");
 #endif
 
 /*
  * Function versions of the inlined __mtx_* macros.  These are used by
  * modules and can also be called from assembly language if needed.
  */
 void
 _mtx_lock_flags(struct mtx *m, int opts, const char *file, int line)
 {
 
 	MPASS(curthread != NULL);
 	KASSERT(m->mtx_object.lo_class == &lock_class_mtx_sleep,
 	    ("mtx_lock() of spin mutex %s @ %s:%d", m->mtx_object.lo_name,
 	    file, line));
 	WITNESS_CHECKORDER(&m->mtx_object, opts | LOP_NEWORDER | LOP_EXCLUSIVE,
 	    file, line);
 	_get_sleep_lock(m, curthread, opts, file, line);
 	LOCK_LOG_LOCK("LOCK", &m->mtx_object, opts, m->mtx_recurse, file,
 	    line);
 	WITNESS_LOCK(&m->mtx_object, opts | LOP_EXCLUSIVE, file, line);
 #ifdef MUTEX_PROFILING
 	/* don't reset the timer when/if recursing */
 	if (m->mtx_acqtime == 0) {
 		m->mtx_filename = file;
 		m->mtx_lineno = line;
 		m->mtx_acqtime = mutex_prof_enable ? nanoseconds() : 0;
 		++mutex_prof_acquisitions;
 	}
 #endif
 }
 
 void
 _mtx_unlock_flags(struct mtx *m, int opts, const char *file, int line)
 {
 
 	MPASS(curthread != NULL);
 	KASSERT(m->mtx_object.lo_class == &lock_class_mtx_sleep,
 	    ("mtx_unlock() of spin mutex %s @ %s:%d", m->mtx_object.lo_name,
 	    file, line));
 	WITNESS_UNLOCK(&m->mtx_object, opts | LOP_EXCLUSIVE, file, line);
 	LOCK_LOG_LOCK("UNLOCK", &m->mtx_object, opts, m->mtx_recurse, file,
 	    line);
 	mtx_assert(m, MA_OWNED);
 #ifdef MUTEX_PROFILING
 	if (m->mtx_acqtime != 0) {
 		static const char *unknown = "(unknown)";
 		struct mutex_prof *mpp;
 		u_int64_t acqtime, now;
 		const char *p, *q;
 		volatile u_int hash;
 
 		now = nanoseconds();
 		acqtime = m->mtx_acqtime;
 		m->mtx_acqtime = 0;
 		if (now <= acqtime)
 			goto out;
 		for (p = m->mtx_filename;
 		    p != NULL && strncmp(p, "../", 3) == 0; p += 3)
 			/* nothing */ ;
 		if (p == NULL || *p == '\0')
 			p = unknown;
 		for (hash = m->mtx_lineno, q = p; *q != '\0'; ++q)
 			hash = (hash * 2 + *q) % MPROF_HASH_SIZE;
 		mtx_lock_spin(&mprof_mtx);
 		for (mpp = mprof_hash[hash]; mpp != NULL; mpp = mpp->next)
 			if (mpp->line == m->mtx_lineno &&
 			    strcmp(mpp->file, p) == 0)
 				break;
 		if (mpp == NULL) {
 			/* Just exit if we cannot get a trace buffer */
 			if (first_free_mprof_buf >= NUM_MPROF_BUFFERS) {
 				++mutex_prof_rejected;
 				goto unlock;
 			}
 			mpp = &mprof_buf[first_free_mprof_buf++];
 			mpp->name = mtx_name(m);
 			mpp->file = p;
 			mpp->line = m->mtx_lineno;
 			mpp->next = mprof_hash[hash];
 			if (mprof_hash[hash] != NULL)
 				++mutex_prof_collisions;
 			mprof_hash[hash] = mpp;
 			++mutex_prof_records;
 		}
 		/*
 		 * Record if the mutex has been held longer now than ever
 		 * before.
 		 */
 		if (now - acqtime > mpp->cnt_max)
 			mpp->cnt_max = now - acqtime;
 		mpp->cnt_tot += now - acqtime;
 		mpp->cnt_cur++;
 		/*
 		 * There's a small race, really we should cmpxchg
 		 * 0 with the current value, but that would bill
 		 * the contention to the wrong lock instance if
 		 * it followed this also.
 		 */
 		mpp->cnt_contest_holding += m->mtx_contest_holding;
 		m->mtx_contest_holding = 0;
 		mpp->cnt_contest_locking += m->mtx_contest_locking;
 		m->mtx_contest_locking = 0;
 unlock:
 		mtx_unlock_spin(&mprof_mtx);
 	}
 out:
 #endif
 	_rel_sleep_lock(m, curthread, opts, file, line);
 }
 
 void
 _mtx_lock_spin_flags(struct mtx *m, int opts, const char *file, int line)
 {
 
 	MPASS(curthread != NULL);
 	KASSERT(m->mtx_object.lo_class == &lock_class_mtx_spin,
 	    ("mtx_lock_spin() of sleep mutex %s @ %s:%d",
 	    m->mtx_object.lo_name, file, line));
 	WITNESS_CHECKORDER(&m->mtx_object, opts | LOP_NEWORDER | LOP_EXCLUSIVE,
 	    file, line);
 #if defined(SMP) || LOCK_DEBUG > 0 || 1
 	_get_spin_lock(m, curthread, opts, file, line);
 #else
 	critical_enter();
 #endif
 	LOCK_LOG_LOCK("LOCK", &m->mtx_object, opts, m->mtx_recurse, file,
 	    line);
 	WITNESS_LOCK(&m->mtx_object, opts | LOP_EXCLUSIVE, file, line);
 }
 
 void
 _mtx_unlock_spin_flags(struct mtx *m, int opts, const char *file, int line)
 {
 
 	MPASS(curthread != NULL);
 	KASSERT(m->mtx_object.lo_class == &lock_class_mtx_spin,
 	    ("mtx_unlock_spin() of sleep mutex %s @ %s:%d",
 	    m->mtx_object.lo_name, file, line));
 	WITNESS_UNLOCK(&m->mtx_object, opts | LOP_EXCLUSIVE, file, line);
 	LOCK_LOG_LOCK("UNLOCK", &m->mtx_object, opts, m->mtx_recurse, file,
 	    line);
 	mtx_assert(m, MA_OWNED);
 #if defined(SMP) || LOCK_DEBUG > 0 || 1
 	_rel_spin_lock(m);
 #else
 	critical_exit();
 #endif
 }
 
 /*
  * The important part of mtx_trylock{,_flags}()
  * Tries to acquire lock `m.'  If this function is called on a mutex that
  * is already owned, it will recursively acquire the lock.
  */
 int
 _mtx_trylock(struct mtx *m, int opts, const char *file, int line)
 {
 	int rval;
 
 	MPASS(curthread != NULL);
 
 	if (mtx_owned(m) && (m->mtx_object.lo_flags & LO_RECURSABLE) != 0) {
 		m->mtx_recurse++;
 		atomic_set_ptr(&m->mtx_lock, MTX_RECURSED);
 		rval = 1;
 	} else
 		rval = _obtain_lock(m, curthread);
 
 	LOCK_LOG_TRY("LOCK", &m->mtx_object, opts, rval, file, line);
 	if (rval)
 		WITNESS_LOCK(&m->mtx_object, opts | LOP_EXCLUSIVE | LOP_TRYLOCK,
 		    file, line);
 
 	return (rval);
 }
 
 /*
  * _mtx_lock_sleep: the tougher part of acquiring an MTX_DEF lock.
  *
  * We call this if the lock is either contested (i.e. we need to go to
  * sleep waiting for it), or if we need to recurse on it.
  */
 void
 _mtx_lock_sleep(struct mtx *m, struct thread *td, int opts, const char *file,
     int line)
 {
 	struct turnstile *ts;
 #if defined(SMP) && !defined(NO_ADAPTIVE_MUTEXES)
 	struct thread *owner;
 #endif
 	uintptr_t v;
 #ifdef KTR
 	int cont_logged = 0;
 #endif
 #ifdef MUTEX_PROFILING
 	int contested;
 #endif
 
 	if (mtx_owned(m)) {
 		KASSERT((m->mtx_object.lo_flags & LO_RECURSABLE) != 0,
 	    ("_mtx_lock_sleep: recursed on non-recursive mutex %s @ %s:%d\n",
 		    m->mtx_object.lo_name, file, line));
 		m->mtx_recurse++;
 		atomic_set_ptr(&m->mtx_lock, MTX_RECURSED);
 		if (LOCK_LOG_TEST(&m->mtx_object, opts))
 			CTR1(KTR_LOCK, "_mtx_lock_sleep: %p recursing", m);
 		return;
 	}
 
 	if (LOCK_LOG_TEST(&m->mtx_object, opts))
 		CTR4(KTR_LOCK,
 		    "_mtx_lock_sleep: %s contested (lock=%p) at %s:%d",
 		    m->mtx_object.lo_name, (void *)m->mtx_lock, file, line);
 
 #ifdef MUTEX_PROFILING
 	contested = 0;
 #endif
 	while (!_obtain_lock(m, td)) {
 #ifdef MUTEX_PROFILING
 		contested = 1;
 		atomic_add_int(&m->mtx_contest_holding, 1);
 #endif
 		ts = turnstile_lookup(&m->mtx_object);
 		v = m->mtx_lock;
 
 		/*
 		 * Check if the lock has been released while spinning for
 		 * the turnstile chain lock.
 		 */
 		if (v == MTX_UNOWNED) {
 			turnstile_release(&m->mtx_object);
 			cpu_spinwait();
 			continue;
 		}
 
 #ifdef MUTEX_WAKE_ALL
 		MPASS(v != MTX_CONTESTED);
 #else
 		/*
 		 * The mutex was marked contested on release. This means that
 		 * there are other threads blocked on it.  Grab ownership of
 		 * it and propagate its priority to the current thread if
 		 * necessary.
 		 */
 		if (v == MTX_CONTESTED) {
 			MPASS(ts != NULL);
 			m->mtx_lock = (uintptr_t)td | MTX_CONTESTED;
 			turnstile_claim(ts);
 			break;
 		}
 #endif
 
 		/*
 		 * If the mutex isn't already contested and a failure occurs
 		 * setting the contested bit, the mutex was either released
 		 * or the state of the MTX_RECURSED bit changed.
 		 */
 		if ((v & MTX_CONTESTED) == 0 &&
 		    !atomic_cmpset_ptr(&m->mtx_lock, (void *)v,
 			(void *)(v | MTX_CONTESTED))) {
 			turnstile_release(&m->mtx_object);
 			cpu_spinwait();
 			continue;
 		}
 
 #if defined(SMP) && !defined(NO_ADAPTIVE_MUTEXES)
 		/*
 		 * If the current owner of the lock is executing on another
 		 * CPU, spin instead of blocking.
 		 */
 		owner = (struct thread *)(v & MTX_FLAGMASK);
 #ifdef ADAPTIVE_GIANT
 		if (TD_IS_RUNNING(owner)) {
 #else
 		if (m != &Giant && TD_IS_RUNNING(owner)) {
 #endif
 			turnstile_release(&m->mtx_object);
 			while (mtx_owner(m) == owner && TD_IS_RUNNING(owner)) {
 				cpu_spinwait();
 			}
 			continue;
 		}
 #endif	/* SMP && !NO_ADAPTIVE_MUTEXES */
 
 		/*
 		 * We definitely must sleep for this lock.
 		 */
 		mtx_assert(m, MA_NOTOWNED);
 
 #ifdef KTR
 		if (!cont_logged) {
 			CTR6(KTR_CONTENTION,
 			    "contention: %p at %s:%d wants %s, taken by %s:%d",
 			    td, file, line, m->mtx_object.lo_name,
 			    WITNESS_FILE(&m->mtx_object),
 			    WITNESS_LINE(&m->mtx_object));
 			cont_logged = 1;
 		}
 #endif
 
 		/*
 		 * Block on the turnstile.
 		 */
 		turnstile_wait(ts, &m->mtx_object, mtx_owner(m));
 	}
 
 #ifdef KTR
 	if (cont_logged) {
 		CTR4(KTR_CONTENTION,
 		    "contention end: %s acquired by %p at %s:%d",
 		    m->mtx_object.lo_name, td, file, line);
 	}
 #endif
 #ifdef MUTEX_PROFILING
 	if (contested)
 		m->mtx_contest_locking++;
 	m->mtx_contest_holding = 0;
 #endif
 	return;
 }
 
 /*
  * _mtx_lock_spin: the tougher part of acquiring an MTX_SPIN lock.
  *
  * This is only called if we need to actually spin for the lock. Recursion
  * is handled inline.
  */
 void
 _mtx_lock_spin(struct mtx *m, struct thread *td, int opts, const char *file,
     int line)
 {
 	int i = 0;
 
 	if (LOCK_LOG_TEST(&m->mtx_object, opts))
 		CTR1(KTR_LOCK, "_mtx_lock_spin: %p spinning", m);
 
 	for (;;) {
 		if (_obtain_lock(m, td))
 			break;
 
 		/* Give interrupts a chance while we spin. */
 		critical_exit();
 		while (m->mtx_lock != MTX_UNOWNED) {
 			if (i++ < 10000000) {
 				cpu_spinwait();
 				continue;
 			}
 			if (i < 60000000)
 				DELAY(1);
 			else if (!kdb_active) {
 				printf("spin lock %s held by %p for > 5 seconds\n",
 				    m->mtx_object.lo_name, (void *)m->mtx_lock);
 #ifdef WITNESS
 				witness_display_spinlock(&m->mtx_object,
 				    mtx_owner(m));
 #endif
 				panic("spin lock held too long");
 			}
 			cpu_spinwait();
 		}
 		critical_enter();
 	}
 
 	if (LOCK_LOG_TEST(&m->mtx_object, opts))
 		CTR1(KTR_LOCK, "_mtx_lock_spin: %p spin done", m);
 
 	return;
 }
 
 /*
  * _mtx_unlock_sleep: the tougher part of releasing an MTX_DEF lock.
  *
  * We are only called here if the lock is recursed or contested (i.e. we
  * need to wake up a blocked thread).
  */
 void
 _mtx_unlock_sleep(struct mtx *m, int opts, const char *file, int line)
 {
 	struct turnstile *ts;
 #ifndef PREEMPTION
 	struct thread *td, *td1;
 #endif
 
 	if (mtx_recursed(m)) {
 		if (--(m->mtx_recurse) == 0)
 			atomic_clear_ptr(&m->mtx_lock, MTX_RECURSED);
 		if (LOCK_LOG_TEST(&m->mtx_object, opts))
 			CTR1(KTR_LOCK, "_mtx_unlock_sleep: %p unrecurse", m);
 		return;
 	}
 
 	ts = turnstile_lookup(&m->mtx_object);
 	if (LOCK_LOG_TEST(&m->mtx_object, opts))
 		CTR1(KTR_LOCK, "_mtx_unlock_sleep: %p contested", m);
 
 #if defined(SMP) && !defined(NO_ADAPTIVE_MUTEXES)
 	if (ts == NULL) {
 		_release_lock_quick(m);
 		if (LOCK_LOG_TEST(&m->mtx_object, opts))
 			CTR1(KTR_LOCK, "_mtx_unlock_sleep: %p no sleepers", m);
 		turnstile_release(&m->mtx_object);
 		return;
 	}
 #else
 	MPASS(ts != NULL);
 #endif
 #ifndef PREEMPTION
 	/* XXX */
 	td1 = turnstile_head(ts);
 #endif
 #ifdef MUTEX_WAKE_ALL
 	turnstile_broadcast(ts);
 	_release_lock_quick(m);
 #else
 	if (turnstile_signal(ts)) {
 		_release_lock_quick(m);
 		if (LOCK_LOG_TEST(&m->mtx_object, opts))
 			CTR1(KTR_LOCK, "_mtx_unlock_sleep: %p not held", m);
 	} else {
 		m->mtx_lock = MTX_CONTESTED;
 		if (LOCK_LOG_TEST(&m->mtx_object, opts))
 			CTR1(KTR_LOCK, "_mtx_unlock_sleep: %p still contested",
 			    m);
 	}
 #endif
 	turnstile_unpend(ts);
 
 #ifndef PREEMPTION
 	/*
 	 * XXX: This is just a hack until preemption is done.  However,
 	 * once preemption is done we need to either wrap the
 	 * turnstile_signal() and release of the actual lock in an
 	 * extra critical section or change the preemption code to
 	 * always just set a flag and never do instant-preempts.
 	 */
 	td = curthread;
 	if (td->td_critnest > 0 || td1->td_priority >= td->td_priority)
 		return;
 	mtx_lock_spin(&sched_lock);
 	if (!TD_IS_RUNNING(td1)) {
 #ifdef notyet
 		if (td->td_ithd != NULL) {
 			struct ithd *it = td->td_ithd;
 
 			if (it->it_interrupted) {
 				if (LOCK_LOG_TEST(&m->mtx_object, opts))
 					CTR2(KTR_LOCK,
 				    "_mtx_unlock_sleep: %p interrupted %p",
 					    it, it->it_interrupted);
 				intr_thd_fixup(it);
 			}
 		}
 #endif
 		if (LOCK_LOG_TEST(&m->mtx_object, opts))
 			CTR2(KTR_LOCK,
 			    "_mtx_unlock_sleep: %p switching out lock=%p", m,
 			    (void *)m->mtx_lock);
 
 		mi_switch(SW_INVOL, NULL);
 		if (LOCK_LOG_TEST(&m->mtx_object, opts))
 			CTR2(KTR_LOCK, "_mtx_unlock_sleep: %p resuming lock=%p",
 			    m, (void *)m->mtx_lock);
 	}
 	mtx_unlock_spin(&sched_lock);
 #endif
 
 	return;
 }
 
 /*
  * All the unlocking of MTX_SPIN locks is done inline.
  * See the _rel_spin_lock() macro for the details.
  */
 
 /*
  * The backing function for the INVARIANTS-enabled mtx_assert()
  */
 #ifdef INVARIANT_SUPPORT
 void
 _mtx_assert(struct mtx *m, int what, const char *file, int line)
 {
 
 	if (panicstr != NULL)
 		return;
 	switch (what) {
 	case MA_OWNED:
 	case MA_OWNED | MA_RECURSED:
 	case MA_OWNED | MA_NOTRECURSED:
 		if (!mtx_owned(m))
 			panic("mutex %s not owned at %s:%d",
 			    m->mtx_object.lo_name, file, line);
 		if (mtx_recursed(m)) {
 			if ((what & MA_NOTRECURSED) != 0)
 				panic("mutex %s recursed at %s:%d",
 				    m->mtx_object.lo_name, file, line);
 		} else if ((what & MA_RECURSED) != 0) {
 			panic("mutex %s unrecursed at %s:%d",
 			    m->mtx_object.lo_name, file, line);
 		}
 		break;
 	case MA_NOTOWNED:
 		if (mtx_owned(m))
 			panic("mutex %s owned at %s:%d",
 			    m->mtx_object.lo_name, file, line);
 		break;
 	default:
 		panic("unknown mtx_assert at %s:%d", file, line);
 	}
 }
 #endif
 
 /*
  * The MUTEX_DEBUG-enabled mtx_validate()
  *
  * Most of these checks have been moved off into the LO_INITIALIZED flag
  * maintained by the witness code.
  */
 #ifdef MUTEX_DEBUG
 
 void	mtx_validate(struct mtx *);
 
 void
 mtx_validate(struct mtx *m)
 {
 
 /*
  * XXX: When kernacc() does not require Giant we can reenable this check
  */
 #ifdef notyet
 /*
  * XXX - When kernacc() is fixed on the alpha to handle K0_SEG memory properly
  * we can re-enable the kernacc() checks.
  */
 #ifndef __alpha__
 	/*
 	 * Can't call kernacc() from early init386(), especially when
 	 * initializing Giant mutex, because some stuff in kernacc()
 	 * requires Giant itself.
 	 */
 	if (!cold)
 		if (!kernacc((caddr_t)m, sizeof(m),
 		    VM_PROT_READ | VM_PROT_WRITE))
 			panic("Can't read and write to mutex %p", m);
 #endif
 #endif
 }
 #endif
 
 /*
  * General init routine used by the MTX_SYSINIT() macro.
  */
 void
 mtx_sysinit(void *arg)
 {
 	struct mtx_args *margs = arg;
 
 	mtx_init(margs->ma_mtx, margs->ma_desc, NULL, margs->ma_opts);
 }
 
 /*
  * Mutex initialization routine; initialize lock `m' of type contained in
  * `opts' with options contained in `opts' and name `name.'  The optional
  * lock type `type' is used as a general lock category name for use with
  * witness.
  */
 void
 mtx_init(struct mtx *m, const char *name, const char *type, int opts)
 {
 	struct lock_object *lock;
 
 	MPASS((opts & ~(MTX_SPIN | MTX_QUIET | MTX_RECURSE |
 	    MTX_NOWITNESS | MTX_DUPOK)) == 0);
 
 #ifdef MUTEX_DEBUG
 	/* Diagnostic and error correction */
 	mtx_validate(m);
 #endif
 
 	lock = &m->mtx_object;
 	KASSERT((lock->lo_flags & LO_INITIALIZED) == 0,
 	    ("mutex \"%s\" %p already initialized", name, m));
 	bzero(m, sizeof(*m));
 	if (opts & MTX_SPIN)
 		lock->lo_class = &lock_class_mtx_spin;
 	else
 		lock->lo_class = &lock_class_mtx_sleep;
 	lock->lo_name = name;
 	lock->lo_type = type != NULL ? type : name;
 	if (opts & MTX_QUIET)
 		lock->lo_flags = LO_QUIET;
 	if (opts & MTX_RECURSE)
 		lock->lo_flags |= LO_RECURSABLE;
 	if ((opts & MTX_NOWITNESS) == 0)
 		lock->lo_flags |= LO_WITNESS;
 	if (opts & MTX_DUPOK)
 		lock->lo_flags |= LO_DUPOK;
 
 	m->mtx_lock = MTX_UNOWNED;
 
 	LOCK_LOG_INIT(lock, opts);
 
 	WITNESS_INIT(lock);
 }
 
 /*
  * Remove lock `m' from all_mtx queue.  We don't allow MTX_QUIET to be
  * passed in as a flag here because if the corresponding mtx_init() was
  * called with MTX_QUIET set, then it will already be set in the mutex's
  * flags.
  */
 void
 mtx_destroy(struct mtx *m)
 {
 
 	LOCK_LOG_DESTROY(&m->mtx_object, 0);
 
 	if (!mtx_owned(m))
 		MPASS(mtx_unowned(m));
 	else {
 		MPASS((m->mtx_lock & (MTX_RECURSED|MTX_CONTESTED)) == 0);
 
 		/* Tell witness this isn't locked to make it happy. */
 		WITNESS_UNLOCK(&m->mtx_object, LOP_EXCLUSIVE, __FILE__,
 		    __LINE__);
 	}
 
 	WITNESS_DESTROY(&m->mtx_object);
 }
 
 /*
  * Intialize the mutex code and system mutexes.  This is called from the MD
  * startup code prior to mi_startup().  The per-CPU data space needs to be
  * setup before this is called.
  */
 void
 mutex_init(void)
 {
 
 	/* Setup thread0 so that mutexes work. */
 	LIST_INIT(&thread0.td_contested);
 
 	/* Setup turnstiles so that sleep mutexes work. */
 	init_turnstiles();
 
 	/*
 	 * Initialize mutexes.
 	 */
 	mtx_init(&Giant, "Giant", NULL, MTX_DEF | MTX_RECURSE);
 	mtx_init(&sched_lock, "sched lock", NULL, MTX_SPIN | MTX_RECURSE);
 	mtx_init(&proc0.p_mtx, "process lock", NULL, MTX_DEF | MTX_DUPOK);
 	mtx_lock(&Giant);
 }
diff --git a/sys/kern/kern_shutdown.c b/sys/kern/kern_shutdown.c
index a720becb7e13..25e81c4e21fe 100644
--- a/sys/kern/kern_shutdown.c
+++ b/sys/kern/kern_shutdown.c
@@ -1,618 +1,619 @@
 /*-
  * Copyright (c) 1986, 1988, 1991, 1993
  *	The Regents of the University of California.  All rights reserved.
  * (c) UNIX System Laboratories, Inc.
  * All or some portions of this file are derived from material licensed
  * to the University of California by American Telephone and Telegraph
  * Co. or Unix System Laboratories, Inc. and are reproduced herein with
  * the permission of UNIX System Laboratories, Inc.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 4. Neither the name of the University nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  *	@(#)kern_shutdown.c	8.3 (Berkeley) 1/21/94
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include "opt_kdb.h"
 #include "opt_mac.h"
 #include "opt_panic.h"
 #include "opt_show_busybufs.h"
+#include "opt_sched.h"
 
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/bio.h>
 #include <sys/buf.h>
 #include <sys/conf.h>
 #include <sys/cons.h>
 #include <sys/eventhandler.h>
 #include <sys/kdb.h>
 #include <sys/kernel.h>
 #include <sys/kthread.h>
 #include <sys/mac.h>
 #include <sys/malloc.h>
 #include <sys/mount.h>
 #include <sys/proc.h>
 #include <sys/reboot.h>
 #include <sys/resourcevar.h>
 #include <sys/smp.h>		/* smp_active */
 #include <sys/sysctl.h>
 #include <sys/sysproto.h>
 #include <sys/vnode.h>
 
 #include <machine/cpu.h>
 #include <machine/pcb.h>
 #include <machine/smp.h>
 
 #include <sys/signalvar.h>
 
 #ifndef PANIC_REBOOT_WAIT_TIME
 #define PANIC_REBOOT_WAIT_TIME 15 /* default to 15 seconds */
 #endif
 
 /*
  * Note that stdarg.h and the ANSI style va_start macro is used for both
  * ANSI and traditional C compilers.
  */
 #include <machine/stdarg.h>
 
 #ifdef KDB
 #ifdef KDB_UNATTENDED
 int debugger_on_panic = 0;
 #else
 int debugger_on_panic = 1;
 #endif
 SYSCTL_INT(_debug, OID_AUTO, debugger_on_panic, CTLFLAG_RW,
 	&debugger_on_panic, 0, "Run debugger on kernel panic");
 
 #ifdef KDB_TRACE
 int trace_on_panic = 1;
 #else
 int trace_on_panic = 0;
 #endif
 SYSCTL_INT(_debug, OID_AUTO, trace_on_panic, CTLFLAG_RW,
 	&trace_on_panic, 0, "Print stack trace on kernel panic");
 #endif /* KDB */
 
 int sync_on_panic = 0;
 SYSCTL_INT(_kern, OID_AUTO, sync_on_panic, CTLFLAG_RW,
 	&sync_on_panic, 0, "Do a sync before rebooting from a panic");
 
 SYSCTL_NODE(_kern, OID_AUTO, shutdown, CTLFLAG_RW, 0, "Shutdown environment");
 
 /*
  * Variable panicstr contains argument to first call to panic; used as flag
  * to indicate that the kernel has already called panic.
  */
 const char *panicstr;
 
 int dumping;				/* system is dumping */
 static struct dumperinfo dumper;	/* our selected dumper */
 
 /* Context information for dump-debuggers. */
 static struct pcb dumppcb;		/* Registers. */
 static lwpid_t dumptid;			/* Thread ID. */
 
 static void boot(int) __dead2;
 static void poweroff_wait(void *, int);
 static void shutdown_halt(void *junk, int howto);
 static void shutdown_panic(void *junk, int howto);
 static void shutdown_reset(void *junk, int howto);
 
 /* register various local shutdown events */
 static void
 shutdown_conf(void *unused)
 {
 
 	EVENTHANDLER_REGISTER(shutdown_final, poweroff_wait, NULL,
 	    SHUTDOWN_PRI_FIRST);
 	EVENTHANDLER_REGISTER(shutdown_final, shutdown_halt, NULL,
 	    SHUTDOWN_PRI_LAST + 100);
 	EVENTHANDLER_REGISTER(shutdown_final, shutdown_panic, NULL,
 	    SHUTDOWN_PRI_LAST + 100);
 	EVENTHANDLER_REGISTER(shutdown_final, shutdown_reset, NULL,
 	    SHUTDOWN_PRI_LAST + 200);
 }
 
 SYSINIT(shutdown_conf, SI_SUB_INTRINSIC, SI_ORDER_ANY, shutdown_conf, NULL)
 
 /*
  * The system call that results in a reboot
  *
  * MPSAFE
  */
 /* ARGSUSED */
 int
 reboot(struct thread *td, struct reboot_args *uap)
 {
 	int error;
 
 	error = 0;
 #ifdef MAC
 	error = mac_check_system_reboot(td->td_ucred, uap->opt);
 #endif
 	if (error == 0)
 		error = suser(td);
 	if (error == 0) {
 		mtx_lock(&Giant);
 		boot(uap->opt);
 		mtx_unlock(&Giant);
 	}
 	return (error);
 }
 
 /*
  * Called by events that want to shut down.. e.g  <CTL><ALT><DEL> on a PC
  */
 static int shutdown_howto = 0;
 
 void
 shutdown_nice(int howto)
 {
 
 	shutdown_howto = howto;
 
 	/* Send a signal to init(8) and have it shutdown the world */
 	if (initproc != NULL) {
 		PROC_LOCK(initproc);
 		psignal(initproc, SIGINT);
 		PROC_UNLOCK(initproc);
 	} else {
 		/* No init(8) running, so simply reboot */
 		boot(RB_NOSYNC);
 	}
 	return;
 }
 static int	waittime = -1;
 
 static void
 print_uptime(void)
 {
 	int f;
 	struct timespec ts;
 
 	getnanouptime(&ts);
 	printf("Uptime: ");
 	f = 0;
 	if (ts.tv_sec >= 86400) {
 		printf("%ldd", (long)ts.tv_sec / 86400);
 		ts.tv_sec %= 86400;
 		f = 1;
 	}
 	if (f || ts.tv_sec >= 3600) {
 		printf("%ldh", (long)ts.tv_sec / 3600);
 		ts.tv_sec %= 3600;
 		f = 1;
 	}
 	if (f || ts.tv_sec >= 60) {
 		printf("%ldm", (long)ts.tv_sec / 60);
 		ts.tv_sec %= 60;
 		f = 1;
 	}
 	printf("%lds\n", (long)ts.tv_sec);
 }
 
 static void
 doadump(void)
 {
 
 	/*
 	 * Sometimes people have to call this from the kernel debugger. 
 	 * (if 'panic' can not dump)
 	 * Give them a clue as to why they can't dump.
 	 */
 	if (dumper.dumper == NULL) {
 		printf("Cannot dump. No dump device defined.\n");
 		return;
 	}
 
 	savectx(&dumppcb);
 	dumptid = curthread->td_tid;
 	dumping++;
 	dumpsys(&dumper);
 }
 
 /*
  *  Go through the rigmarole of shutting down..
  * this used to be in machdep.c but I'll be dammned if I could see
  * anything machine dependant in it.
  */
 static void
 boot(int howto)
 {
 	static int first_buf_printf = 1;
 
 	/* collect extra flags that shutdown_nice might have set */
 	howto |= shutdown_howto;
 
 	/* We are out of the debugger now. */
 	kdb_active = 0;
 
 #ifdef SMP
 	if (smp_active)
 		printf("boot() called on cpu#%d\n", PCPU_GET(cpuid));
 #endif
 	/*
 	 * Do any callouts that should be done BEFORE syncing the filesystems.
 	 */
 	EVENTHANDLER_INVOKE(shutdown_pre_sync, howto);
 
 	/* 
 	 * Now sync filesystems
 	 */
 	if (!cold && (howto & RB_NOSYNC) == 0 && waittime < 0) {
 		register struct buf *bp;
 		int iter, nbusy, pbusy;
 #ifndef PREEMPTION
 		int subiter;
 #endif
 
 		waittime = 0;
 
 		sync(&thread0, NULL);
 
 		/*
 		 * With soft updates, some buffers that are
 		 * written will be remarked as dirty until other
 		 * buffers are written.
 		 */
 		for (iter = pbusy = 0; iter < 20; iter++) {
 			nbusy = 0;
 			for (bp = &buf[nbuf]; --bp >= buf; ) {
 				if ((bp->b_flags & B_INVAL) == 0 &&
 				    BUF_REFCNT(bp) > 0) {
 					nbusy++;
 				} else if ((bp->b_flags & (B_DELWRI | B_INVAL))
 						== B_DELWRI) {
 					/* bawrite(bp);*/
 					nbusy++;
 				}
 			}
 			if (nbusy == 0) {
 				if (first_buf_printf)
 					printf("No buffers busy after final sync");
 				break;
 			}
 			if (first_buf_printf) {
 				printf("Syncing disks, buffers remaining... ");
 				first_buf_printf = 0;
 			}
 			printf("%d ", nbusy);
 			if (nbusy < pbusy)
 				iter = 0;
 			pbusy = nbusy;
 			sync(&thread0, NULL);
 
 #ifdef PREEMPTION
 			/*
 			 * Drop Giant and spin for a while to allow
 			 * interrupt threads to run.
 			 */
 			DROP_GIANT();
 			DELAY(50000 * iter);
 			PICKUP_GIANT();
 #else
 			/*
 			 * Drop Giant and context switch several times to
 			 * allow interrupt threads to run.
 			 */
 			DROP_GIANT();
 			for (subiter = 0; subiter < 50 * iter; subiter++) {
 				mtx_lock_spin(&sched_lock);
 				mi_switch(SW_VOL, NULL);
 				mtx_unlock_spin(&sched_lock);
 				DELAY(1000);
 			}
 			PICKUP_GIANT();
 #endif
 		}
 		printf("\n");
 		/*
 		 * Count only busy local buffers to prevent forcing 
 		 * a fsck if we're just a client of a wedged NFS server
 		 */
 		nbusy = 0;
 		for (bp = &buf[nbuf]; --bp >= buf; ) {
 			if (((bp->b_flags&B_INVAL) == 0 && BUF_REFCNT(bp)) ||
 			    ((bp->b_flags & (B_DELWRI|B_INVAL)) == B_DELWRI)) {
 				if (bp->b_dev == NULL) {
 					TAILQ_REMOVE(&mountlist,
 					    bp->b_vp->v_mount, mnt_list);
 					continue;
 				}
 				nbusy++;
 #if defined(SHOW_BUSYBUFS) || defined(DIAGNOSTIC)
 				printf(
 			    "%d: dev:%s, flags:%0x, blkno:%ld, lblkno:%ld\n",
 				    nbusy, devtoname(bp->b_dev),
 				    bp->b_flags, (long)bp->b_blkno,
 				    (long)bp->b_lblkno);
 #endif
 			}
 		}
 		if (nbusy) {
 			/*
 			 * Failed to sync all blocks. Indicate this and don't
 			 * unmount filesystems (thus forcing an fsck on reboot).
 			 */
 			printf("Giving up on %d buffers\n", nbusy);
 			DELAY(5000000);	/* 5 seconds */
 		} else {
 			if (!first_buf_printf)
 				printf("Final sync complete\n");
 			/*
 			 * Unmount filesystems
 			 */
 			if (panicstr == 0)
 				vfs_unmountall();
 		}
 		DELAY(100000);		/* wait for console output to finish */
 	}
 
 	print_uptime();
 
 	/*
 	 * Ok, now do things that assume all filesystem activity has
 	 * been completed.
 	 */
 	EVENTHANDLER_INVOKE(shutdown_post_sync, howto);
 	splhigh();
 	if ((howto & (RB_HALT|RB_DUMP)) == RB_DUMP && !cold && !dumping) 
 		doadump();
 
 	/* Now that we're going to really halt the system... */
 	EVENTHANDLER_INVOKE(shutdown_final, howto);
 
 	for(;;) ;	/* safety against shutdown_reset not working */
 	/* NOTREACHED */
 }
 
 /*
  * If the shutdown was a clean halt, behave accordingly.
  */
 static void
 shutdown_halt(void *junk, int howto)
 {
 
 	if (howto & RB_HALT) {
 		printf("\n");
 		printf("The operating system has halted.\n");
 		printf("Please press any key to reboot.\n\n");
 		switch (cngetc()) {
 		case -1:		/* No console, just die */
 			cpu_halt();
 			/* NOTREACHED */
 		default:
 			howto &= ~RB_HALT;
 			break;
 		}
 	}
 }
 
 /*
  * Check to see if the system paniced, pause and then reboot
  * according to the specified delay.
  */
 static void
 shutdown_panic(void *junk, int howto)
 {
 	int loop;
 
 	if (howto & RB_DUMP) {
 		if (PANIC_REBOOT_WAIT_TIME != 0) {
 			if (PANIC_REBOOT_WAIT_TIME != -1) {
 				printf("Automatic reboot in %d seconds - "
 				       "press a key on the console to abort\n",
 					PANIC_REBOOT_WAIT_TIME);
 				for (loop = PANIC_REBOOT_WAIT_TIME * 10;
 				     loop > 0; --loop) {
 					DELAY(1000 * 100); /* 1/10th second */
 					/* Did user type a key? */
 					if (cncheckc() != -1)
 						break;
 				}
 				if (!loop)
 					return;
 			}
 		} else { /* zero time specified - reboot NOW */
 			return;
 		}
 		printf("--> Press a key on the console to reboot,\n");
 		printf("--> or switch off the system now.\n");
 		cngetc();
 	}
 }
 
 /*
  * Everything done, now reset
  */
 static void
 shutdown_reset(void *junk, int howto)
 {
 
 	printf("Rebooting...\n");
 	DELAY(1000000);	/* wait 1 sec for printf's to complete and be read */
 	/* cpu_boot(howto); */ /* doesn't do anything at the moment */
 	cpu_reset();
 	/* NOTREACHED */ /* assuming reset worked */
 }
 
 #ifdef SMP
 static u_int panic_cpu = NOCPU;
 #endif
 
 /*
  * Panic is called on unresolvable fatal errors.  It prints "panic: mesg",
  * and then reboots.  If we are called twice, then we avoid trying to sync
  * the disks as this often leads to recursive panics.
  *
  * MPSAFE
  */
 void
 panic(const char *fmt, ...)
 {
 	struct thread *td = curthread;
 	int bootopt, newpanic;
 	va_list ap;
 	static char buf[256];
 
 #ifdef SMP
 	/*
 	 * We don't want multiple CPU's to panic at the same time, so we
 	 * use panic_cpu as a simple spinlock.  We have to keep checking
 	 * panic_cpu if we are spinning in case the panic on the first
 	 * CPU is canceled.
 	 */
 	if (panic_cpu != PCPU_GET(cpuid))
 		while (atomic_cmpset_int(&panic_cpu, NOCPU,
 		    PCPU_GET(cpuid)) == 0)
 			while (panic_cpu != NOCPU)
 				; /* nothing */
 #endif
 
 	bootopt = RB_AUTOBOOT | RB_DUMP;
 	newpanic = 0;
 	if (panicstr)
 		bootopt |= RB_NOSYNC;
 	else {
 		panicstr = fmt;
 		newpanic = 1;
 	}
 
 	va_start(ap, fmt);
 	if (newpanic) {
 		(void)vsnprintf(buf, sizeof(buf), fmt, ap);
 		panicstr = buf;
 		printf("panic: %s\n", buf);
 	} else {
 		printf("panic: ");
 		vprintf(fmt, ap);
 		printf("\n");
 	}
 	va_end(ap);
 #ifdef SMP
 	printf("cpuid = %d\n", PCPU_GET(cpuid));
 #endif
 
 #ifdef KDB
 	if (newpanic && trace_on_panic)
 		kdb_backtrace();
 	if (debugger_on_panic)
 		kdb_enter("panic");
 #ifdef RESTARTABLE_PANICS
 	/* See if the user aborted the panic, in which case we continue. */
 	if (panicstr == NULL) {
 #ifdef SMP
 		atomic_store_rel_int(&panic_cpu, NOCPU);
 #endif
 		return;
 	}
 #endif
 #endif
 	mtx_lock_spin(&sched_lock);
 	td->td_flags |= TDF_INPANIC;
 	mtx_unlock_spin(&sched_lock);
 	if (!sync_on_panic)
 		bootopt |= RB_NOSYNC;
 	boot(bootopt);
 }
 
 /*
  * Support for poweroff delay.
  */
 #ifndef POWEROFF_DELAY
 # define POWEROFF_DELAY 5000
 #endif
 static int poweroff_delay = POWEROFF_DELAY;
 
 SYSCTL_INT(_kern_shutdown, OID_AUTO, poweroff_delay, CTLFLAG_RW,
 	&poweroff_delay, 0, "");
 
 static void
 poweroff_wait(void *junk, int howto)
 {
 
 	if (!(howto & RB_POWEROFF) || poweroff_delay <= 0)
 		return;
 	DELAY(poweroff_delay * 1000);
 }
 
 /*
  * Some system processes (e.g. syncer) need to be stopped at appropriate
  * points in their main loops prior to a system shutdown, so that they
  * won't interfere with the shutdown process (e.g. by holding a disk buf
  * to cause sync to fail).  For each of these system processes, register
  * shutdown_kproc() as a handler for one of shutdown events.
  */
 static int kproc_shutdown_wait = 60;
 SYSCTL_INT(_kern_shutdown, OID_AUTO, kproc_shutdown_wait, CTLFLAG_RW,
     &kproc_shutdown_wait, 0, "");
 
 void
 kproc_shutdown(void *arg, int howto)
 {
 	struct proc *p;
 	char procname[MAXCOMLEN + 1];
 	int error;
 
 	if (panicstr)
 		return;
 
 	p = (struct proc *)arg;
 	strlcpy(procname, p->p_comm, sizeof(procname));
 	printf("Waiting (max %d seconds) for system process `%s' to stop...",
 	    kproc_shutdown_wait, procname);
 	error = kthread_suspend(p, kproc_shutdown_wait * hz);
 
 	if (error == EWOULDBLOCK)
 		printf("timed out\n");
 	else
 		printf("done\n");
 }
 
 /* Registration of dumpers */
 int
 set_dumper(struct dumperinfo *di)
 {
 
 	if (di == NULL) {
 		bzero(&dumper, sizeof dumper);
 		return (0);
 	}
 	if (dumper.dumper != NULL)
 		return (EBUSY);
 	dumper = *di;
 	return (0);
 }
 
 #if defined(__powerpc__)
 void
 dumpsys(struct dumperinfo *di __unused)
 {
 
 	printf("Kernel dumps not implemented on this architecture\n");
 }
 #endif
diff --git a/sys/kern/kern_switch.c b/sys/kern/kern_switch.c
index e36128f702c9..69cfb5f5bae1 100644
--- a/sys/kern/kern_switch.c
+++ b/sys/kern/kern_switch.c
@@ -1,873 +1,877 @@
 /*
  * Copyright (c) 2001 Jake Burkholder <jake@FreeBSD.org>
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  */
 
 /***
 Here is the logic..
 
 If there are N processors, then there are at most N KSEs (kernel
 schedulable entities) working to process threads that belong to a
 KSEGROUP (kg). If there are X of these KSEs actually running at the
 moment in question, then there are at most M (N-X) of these KSEs on
 the run queue, as running KSEs are not on the queue.
 
 Runnable threads are queued off the KSEGROUP in priority order.
 If there are M or more threads runnable, the top M threads
 (by priority) are 'preassigned' to the M KSEs not running. The KSEs take
 their priority from those threads and are put on the run queue.
 
 The last thread that had a priority high enough to have a KSE associated
 with it, AND IS ON THE RUN QUEUE is pointed to by
 kg->kg_last_assigned. If no threads queued off the KSEGROUP have KSEs
 assigned as all the available KSEs are activly running, or because there
 are no threads queued, that pointer is NULL.
 
 When a KSE is removed from the run queue to become runnable, we know
 it was associated with the highest priority thread in the queue (at the head
 of the queue). If it is also the last assigned we know M was 1 and must
 now be 0. Since the thread is no longer queued that pointer must be
 removed from it. Since we know there were no more KSEs available,
 (M was 1 and is now 0) and since we are not FREEING our KSE
 but using it, we know there are STILL no more KSEs available, we can prove
 that the next thread in the ksegrp list will not have a KSE to assign to
 it, so we can show that the pointer must be made 'invalid' (NULL).
 
 The pointer exists so that when a new thread is made runnable, it can
 have its priority compared with the last assigned thread to see if
 it should 'steal' its KSE or not.. i.e. is it 'earlier'
 on the list than that thread or later.. If it's earlier, then the KSE is
 removed from the last assigned (which is now not assigned a KSE)
 and reassigned to the new thread, which is placed earlier in the list.
 The pointer is then backed up to the previous thread (which may or may not
 be the new thread).
 
 When a thread sleeps or is removed, the KSE becomes available and if there 
 are queued threads that are not assigned KSEs, the highest priority one of
 them is assigned the KSE, which is then placed back on the run queue at
 the approipriate place, and the kg->kg_last_assigned pointer is adjusted down
 to point to it.
 
 The following diagram shows 2 KSEs and 3 threads from a single process.
 
  RUNQ: --->KSE---KSE--...    (KSEs queued at priorities from threads)
               \    \____   
                \        \
     KSEGROUP---thread--thread--thread    (queued in priority order)
         \                 / 
          \_______________/
           (last_assigned)
 
 The result of this scheme is that the M available KSEs are always
 queued at the priorities they have inherrited from the M highest priority
 threads for that KSEGROUP. If this situation changes, the KSEs are 
 reassigned to keep this true.
 ***/
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
-#include "opt_full_preemption.h"
 #include "opt_sched.h"
 
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/kdb.h>
 #include <sys/kernel.h>
 #include <sys/ktr.h>
 #include <sys/lock.h>
 #include <sys/mutex.h>
 #include <sys/proc.h>
 #include <sys/queue.h>
 #include <sys/sched.h>
 #if defined(SMP) && (defined(__i386__) || defined(__amd64__))
 #include <sys/smp.h>
 #endif
 #include <machine/critical.h>
 #if defined(SMP) && defined(SCHED_4BSD)
 #include <sys/sysctl.h>
 #endif
 
+#ifdef FULL_PREEMPTION
+#ifndef PREEMPTION
+#error "The FULL_PREEMPTION option requires the PREEMPTION option"
+#endif
+#endif
 
 CTASSERT((RQB_BPW * RQB_LEN) == RQ_NQS);
 
 void panc(char *string1, char *string2);
 
 #if 0
 static void runq_readjust(struct runq *rq, struct kse *ke);
 #endif
 /************************************************************************
  * Functions that manipulate runnability from a thread perspective.	*
  ************************************************************************/
 /*
  * Select the KSE that will be run next.  From that find the thread, and
  * remove it from the KSEGRP's run queue.  If there is thread clustering,
  * this will be what does it.
  */
 struct thread *
 choosethread(void)
 {
 	struct kse *ke;
 	struct thread *td;
 	struct ksegrp *kg;
 
 #if defined(SMP) && (defined(__i386__) || defined(__amd64__))
 	if (smp_active == 0 && PCPU_GET(cpuid) != 0) {
 		/* Shutting down, run idlethread on AP's */
 		td = PCPU_GET(idlethread);
 		ke = td->td_kse;
 		CTR1(KTR_RUNQ, "choosethread: td=%p (idle)", td);
 		ke->ke_flags |= KEF_DIDRUN;
 		TD_SET_RUNNING(td);
 		return (td);
 	}
 #endif
 
 retry:
 	ke = sched_choose();
 	if (ke) {
 		td = ke->ke_thread;
 		KASSERT((td->td_kse == ke), ("kse/thread mismatch"));
 		kg = ke->ke_ksegrp;
 		if (td->td_proc->p_flag & P_SA) {
 			if (kg->kg_last_assigned == td) {
 				kg->kg_last_assigned = TAILQ_PREV(td,
 				    threadqueue, td_runq);
 			}
 			TAILQ_REMOVE(&kg->kg_runq, td, td_runq);
 			kg->kg_runnable--;
 		}
 		CTR2(KTR_RUNQ, "choosethread: td=%p pri=%d",
 		    td, td->td_priority);
 	} else {
 		/* Simulate runq_choose() having returned the idle thread */
 		td = PCPU_GET(idlethread);
 		ke = td->td_kse;
 		CTR1(KTR_RUNQ, "choosethread: td=%p (idle)", td);
 	}
 	ke->ke_flags |= KEF_DIDRUN;
 
 	/*
 	 * If we are in panic, only allow system threads,
 	 * plus the one we are running in, to be run.
 	 */
 	if (panicstr && ((td->td_proc->p_flag & P_SYSTEM) == 0 &&
 	    (td->td_flags & TDF_INPANIC) == 0)) {
 		/* note that it is no longer on the run queue */
 		TD_SET_CAN_RUN(td);
 		goto retry;
 	}
 
 	TD_SET_RUNNING(td);
 	return (td);
 }
 
 /*
  * Given a surplus KSE, either assign a new runable thread to it
  * (and put it in the run queue) or put it in the ksegrp's idle KSE list.
  * Assumes that the original thread is not runnable.
  */
 void
 kse_reassign(struct kse *ke)
 {
 	struct ksegrp *kg;
 	struct thread *td;
 	struct thread *original;
 
 	mtx_assert(&sched_lock, MA_OWNED);
 	original = ke->ke_thread;
 	KASSERT(original == NULL || TD_IS_INHIBITED(original),
     	    ("reassigning KSE with runnable thread"));
 	kg = ke->ke_ksegrp;
 	if (original)
 		original->td_kse = NULL;
 
 	/*
 	 * Find the first unassigned thread
 	 */
 	if ((td = kg->kg_last_assigned) != NULL)
 		td = TAILQ_NEXT(td, td_runq);
 	else 
 		td = TAILQ_FIRST(&kg->kg_runq);
 
 	/*
 	 * If we found one, assign it the kse, otherwise idle the kse.
 	 */
 	if (td) {
 		kg->kg_last_assigned = td;
 		td->td_kse = ke;
 		ke->ke_thread = td;
 		CTR2(KTR_RUNQ, "kse_reassign: ke%p -> td%p", ke, td);
 		sched_add(td, SRQ_BORING);
 		return;
 	}
 
 	ke->ke_state = KES_IDLE;
 	ke->ke_thread = NULL;
 	TAILQ_INSERT_TAIL(&kg->kg_iq, ke, ke_kgrlist);
 	kg->kg_idle_kses++;
 	CTR1(KTR_RUNQ, "kse_reassign: ke%p on idle queue", ke);
 	return;
 }
 
 #if 0
 /*
  * Remove a thread from its KSEGRP's run queue.
  * This in turn may remove it from a KSE if it was already assigned
  * to one, possibly causing a new thread to be assigned to the KSE
  * and the KSE getting a new priority.
  */
 static void
 remrunqueue(struct thread *td)
 {
 	struct thread *td2, *td3;
 	struct ksegrp *kg;
 	struct kse *ke;
 
 	mtx_assert(&sched_lock, MA_OWNED);
 	KASSERT((TD_ON_RUNQ(td)), ("remrunqueue: Bad state on run queue"));
 	kg = td->td_ksegrp;
 	ke = td->td_kse;
 	CTR1(KTR_RUNQ, "remrunqueue: td%p", td);
 	TD_SET_CAN_RUN(td);
 	/*
 	 * If it is not a threaded process, take the shortcut.
 	 */
 	if ((td->td_proc->p_flag & P_SA) == 0) {
 		/* Bring its kse with it, leave the thread attached */
 		sched_rem(td);
 		ke->ke_state = KES_THREAD; 
 		return;
 	}
    	td3 = TAILQ_PREV(td, threadqueue, td_runq);
 	TAILQ_REMOVE(&kg->kg_runq, td, td_runq);
 	kg->kg_runnable--;
 	if (ke) {
 		/*
 		 * This thread has been assigned to a KSE.
 		 * We need to dissociate it and try assign the
 		 * KSE to the next available thread. Then, we should
 		 * see if we need to move the KSE in the run queues.
 		 */
 		sched_rem(td);
 		ke->ke_state = KES_THREAD; 
 		td2 = kg->kg_last_assigned;
 		KASSERT((td2 != NULL), ("last assigned has wrong value"));
 		if (td2 == td) 
 			kg->kg_last_assigned = td3;
 		kse_reassign(ke);
 	}
 }
 #endif
 
 /*
  * Change the priority of a thread that is on the run queue.
  */
 void
 adjustrunqueue( struct thread *td, int newpri) 
 {
 	struct ksegrp *kg;
 	struct kse *ke;
 
 	mtx_assert(&sched_lock, MA_OWNED);
 	KASSERT((TD_ON_RUNQ(td)), ("adjustrunqueue: Bad state on run queue"));
 
 	ke = td->td_kse;
 	CTR1(KTR_RUNQ, "adjustrunqueue: td%p", td);
 	/*
 	 * If it is not a threaded process, take the shortcut.
 	 */
 	if ((td->td_proc->p_flag & P_SA) == 0) {
 		/* We only care about the kse in the run queue. */
 		td->td_priority = newpri;
 		if (ke->ke_rqindex != (newpri / RQ_PPQ)) {
 			sched_rem(td);
 			sched_add(td, SRQ_BORING);
 		}
 		return;
 	}
 
 	/* It is a threaded process */
 	kg = td->td_ksegrp;
 	TD_SET_CAN_RUN(td);
 	if (ke) {
 		if (kg->kg_last_assigned == td) {
 			kg->kg_last_assigned =
 			    TAILQ_PREV(td, threadqueue, td_runq);
 		}
 		sched_rem(td);
 	}
 	TAILQ_REMOVE(&kg->kg_runq, td, td_runq);
 	kg->kg_runnable--;
 	td->td_priority = newpri;
 	setrunqueue(td, SRQ_BORING);
 }
 
 void
 setrunqueue(struct thread *td, int flags)
 {
 	struct kse *ke;
 	struct ksegrp *kg;
 	struct thread *td2;
 	struct thread *tda;
 	int count;
 
 	CTR4(KTR_RUNQ, "setrunqueue: td:%p ke:%p kg:%p pid:%d",
 	    td, td->td_kse, td->td_ksegrp, td->td_proc->p_pid);
 	mtx_assert(&sched_lock, MA_OWNED);
 	KASSERT((TD_CAN_RUN(td) || TD_IS_RUNNING(td)),
 	    ("setrunqueue: bad thread state"));
 	TD_SET_RUNQ(td);
 	kg = td->td_ksegrp;
 	if ((td->td_proc->p_flag & P_SA) == 0) {
 		/*
 		 * Common path optimisation: Only one of everything
 		 * and the KSE is always already attached.
 		 * Totally ignore the ksegrp run queue.
 		 */
 		sched_add(td, flags);
 		return;
 	}
 
 	tda = kg->kg_last_assigned;
 	if ((ke = td->td_kse) == NULL) {
 		if (kg->kg_idle_kses) {
 			/*
 			 * There is a free one so it's ours for the asking..
 			 */
 			ke = TAILQ_FIRST(&kg->kg_iq);
 			CTR2(KTR_RUNQ, "setrunqueue: kg:%p: Use free ke:%p",
 			    kg, ke);
 			TAILQ_REMOVE(&kg->kg_iq, ke, ke_kgrlist);
 			ke->ke_state = KES_THREAD;
 			kg->kg_idle_kses--;
 		} else if (tda && (tda->td_priority > td->td_priority)) {
 			/*
 			 * None free, but there is one we can commandeer.
 			 */
 			ke = tda->td_kse;
 			CTR3(KTR_RUNQ,
 			    "setrunqueue: kg:%p: take ke:%p from td: %p",
 			    kg, ke, tda);
 			sched_rem(tda);
 			tda->td_kse = NULL;
 			ke->ke_thread = NULL;
 			tda = kg->kg_last_assigned =
 		    	    TAILQ_PREV(tda, threadqueue, td_runq);
 		}
 	} else {
 		/* 
 		 * Temporarily disassociate so it looks like the other cases.
 		 */
 		ke->ke_thread = NULL;
 		td->td_kse = NULL;
 	}
 
 	/*
 	 * Add the thread to the ksegrp's run queue at
 	 * the appropriate place.
 	 */
 	count = 0;
 	TAILQ_FOREACH(td2, &kg->kg_runq, td_runq) {
 		if (td2->td_priority > td->td_priority) {
 			kg->kg_runnable++;
 			TAILQ_INSERT_BEFORE(td2, td, td_runq);
 			break;
 		}
 		/* XXX Debugging hack */
 		if (++count > 10000) {
 			printf("setrunqueue(): corrupt kq_runq, td= %p\n", td);
 			panic("deadlock in setrunqueue");
 		}
 	}
 	if (td2 == NULL) {
 		/* We ran off the end of the TAILQ or it was empty. */
 		kg->kg_runnable++;
 		TAILQ_INSERT_TAIL(&kg->kg_runq, td, td_runq);
 	}
 
 	/*
 	 * If we have a ke to use, then put it on the run queue and
 	 * If needed, readjust the last_assigned pointer.
 	 */
 	if (ke) {
 		if (tda == NULL) {
 			/*
 			 * No pre-existing last assigned so whoever is first
 			 * gets the KSE we brought in.. (maybe us)
 			 */
 			td2 = TAILQ_FIRST(&kg->kg_runq);
 			KASSERT((td2->td_kse == NULL),
 			    ("unexpected ke present"));
 			td2->td_kse = ke;
 			ke->ke_thread = td2;
 			kg->kg_last_assigned = td2;
 		} else if (tda->td_priority > td->td_priority) {
 			/*
 			 * It's ours, grab it, but last_assigned is past us
 			 * so don't change it.
 			 */
 			td->td_kse = ke;
 			ke->ke_thread = td;
 		} else {
 			/* 
 			 * We are past last_assigned, so 
 			 * put the new kse on whatever is next,
 			 * which may or may not be us.
 			 */
 			td2 = TAILQ_NEXT(tda, td_runq);
 			kg->kg_last_assigned = td2;
 			td2->td_kse = ke;
 			ke->ke_thread = td2;
 		}
 		sched_add(ke->ke_thread, flags);
 	} else {
 		CTR3(KTR_RUNQ, "setrunqueue: held: td%p kg%p pid%d",
 			td, td->td_ksegrp, td->td_proc->p_pid);
 	}
 }
 
 /*
  * Kernel thread preemption implementation.  Critical sections mark
  * regions of code in which preemptions are not allowed.
  */
 void
 critical_enter(void)
 {
 	struct thread *td;
 
 	td = curthread;
 	if (td->td_critnest == 0)
 		cpu_critical_enter(td);
 	td->td_critnest++;
 }
 
 void
 critical_exit(void)
 {
 	struct thread *td;
 
 	td = curthread;
 	KASSERT(td->td_critnest != 0,
 	    ("critical_exit: td_critnest == 0"));
 	if (td->td_critnest == 1) {
 #ifdef PREEMPTION
 		mtx_assert(&sched_lock, MA_NOTOWNED);
 		if (td->td_pflags & TDP_OWEPREEMPT) {
 			mtx_lock_spin(&sched_lock);
 			mi_switch(SW_INVOL, NULL);
 			mtx_unlock_spin(&sched_lock);
 		}
 #endif
 		td->td_critnest = 0;
 		cpu_critical_exit(td);
 	} else {
 		td->td_critnest--;
 	}
 }
 
 /*
  * This function is called when a thread is about to be put on run queue
  * because it has been made runnable or its priority has been adjusted.  It
  * determines if the new thread should be immediately preempted to.  If so,
  * it switches to it and eventually returns true.  If not, it returns false
  * so that the caller may place the thread on an appropriate run queue.
  */
 int
 maybe_preempt(struct thread *td)
 {
 #ifdef PREEMPTION
 	struct thread *ctd;
 	int cpri, pri;
 #endif
 
 	mtx_assert(&sched_lock, MA_OWNED);
 #ifdef PREEMPTION
 	/*
 	 * The new thread should not preempt the current thread if any of the
 	 * following conditions are true:
 	 *
 	 *  - The current thread has a higher (numerically lower) or
 	 *    equivalent priority.  Note that this prevents curthread from
 	 *    trying to preempt to itself.
 	 *  - It is too early in the boot for context switches (cold is set).
 	 *  - The current thread has an inhibitor set or is in the process of
 	 *    exiting.  In this case, the current thread is about to switch
 	 *    out anyways, so there's no point in preempting.  If we did,
 	 *    the current thread would not be properly resumed as well, so
 	 *    just avoid that whole landmine.
 	 *  - If the new thread's priority is not a realtime priority and
 	 *    the current thread's priority is not an idle priority and
 	 *    FULL_PREEMPTION is disabled.
 	 *
 	 * If all of these conditions are false, but the current thread is in
 	 * a nested critical section, then we have to defer the preemption
 	 * until we exit the critical section.  Otherwise, switch immediately
 	 * to the new thread.
 	 */
 	ctd = curthread;
 	if (ctd->td_kse == NULL || ctd->td_kse->ke_thread != ctd)
 		return (0);
 	pri = td->td_priority;
 	cpri = ctd->td_priority;
 	if (pri >= cpri || cold /* || dumping */ || TD_IS_INHIBITED(ctd) ||
 	    td->td_kse->ke_state != KES_THREAD)
 		return (0);
 #ifndef FULL_PREEMPTION
 	if (!(pri >= PRI_MIN_ITHD && pri <= PRI_MAX_ITHD) &&
 	    !(cpri >= PRI_MIN_IDLE))
 		return (0);
 #endif
 	if (ctd->td_critnest > 1) {
 		CTR1(KTR_PROC, "maybe_preempt: in critical section %d",
 		    ctd->td_critnest);
 		ctd->td_pflags |= TDP_OWEPREEMPT;
 		return (0);
 	}
 
 	/*
 	 * Our thread state says that we are already on a run queue, so
 	 * update our state as if we had been dequeued by choosethread().
 	 */
 	MPASS(TD_ON_RUNQ(td));
 	TD_SET_RUNNING(td);
 	CTR3(KTR_PROC, "preempting to thread %p (pid %d, %s)\n", td,
 	    td->td_proc->p_pid, td->td_proc->p_comm);
 	mi_switch(SW_INVOL, td);
 	return (1);
 #else
 	return (0);
 #endif
 }
 
 #if 0
 #ifndef PREEMPTION
 /* XXX: There should be a non-static version of this. */
 static void
 printf_caddr_t(void *data)
 {
 	printf("%s", (char *)data);
 }
 static char preempt_warning[] =
     "WARNING: Kernel preemption is disabled, expect reduced performance.\n";
 SYSINIT(preempt_warning, SI_SUB_COPYRIGHT, SI_ORDER_ANY, printf_caddr_t,
     preempt_warning)
 #endif
 #endif
 
 /************************************************************************
  * SYSTEM RUN QUEUE manipulations and tests				*
  ************************************************************************/
 /*
  * Initialize a run structure.
  */
 void
 runq_init(struct runq *rq)
 {
 	int i;
 
 	bzero(rq, sizeof *rq);
 	for (i = 0; i < RQ_NQS; i++)
 		TAILQ_INIT(&rq->rq_queues[i]);
 }
 
 /*
  * Clear the status bit of the queue corresponding to priority level pri,
  * indicating that it is empty.
  */
 static __inline void
 runq_clrbit(struct runq *rq, int pri)
 {
 	struct rqbits *rqb;
 
 	rqb = &rq->rq_status;
 	CTR4(KTR_RUNQ, "runq_clrbit: bits=%#x %#x bit=%#x word=%d",
 	    rqb->rqb_bits[RQB_WORD(pri)],
 	    rqb->rqb_bits[RQB_WORD(pri)] & ~RQB_BIT(pri),
 	    RQB_BIT(pri), RQB_WORD(pri));
 	rqb->rqb_bits[RQB_WORD(pri)] &= ~RQB_BIT(pri);
 }
 
 /*
  * Find the index of the first non-empty run queue.  This is done by
  * scanning the status bits, a set bit indicates a non-empty queue.
  */
 static __inline int
 runq_findbit(struct runq *rq)
 {
 	struct rqbits *rqb;
 	int pri;
 	int i;
 
 	rqb = &rq->rq_status;
 	for (i = 0; i < RQB_LEN; i++)
 		if (rqb->rqb_bits[i]) {
 			pri = RQB_FFS(rqb->rqb_bits[i]) + (i << RQB_L2BPW);
 			CTR3(KTR_RUNQ, "runq_findbit: bits=%#x i=%d pri=%d",
 			    rqb->rqb_bits[i], i, pri);
 			return (pri);
 		}
 
 	return (-1);
 }
 
 /*
  * Set the status bit of the queue corresponding to priority level pri,
  * indicating that it is non-empty.
  */
 static __inline void
 runq_setbit(struct runq *rq, int pri)
 {
 	struct rqbits *rqb;
 
 	rqb = &rq->rq_status;
 	CTR4(KTR_RUNQ, "runq_setbit: bits=%#x %#x bit=%#x word=%d",
 	    rqb->rqb_bits[RQB_WORD(pri)],
 	    rqb->rqb_bits[RQB_WORD(pri)] | RQB_BIT(pri),
 	    RQB_BIT(pri), RQB_WORD(pri));
 	rqb->rqb_bits[RQB_WORD(pri)] |= RQB_BIT(pri);
 }
 
 /*
  * Add the KSE to the queue specified by its priority, and set the
  * corresponding status bit.
  */
 void
 runq_add(struct runq *rq, struct kse *ke)
 {
 	struct rqhead *rqh;
 	int pri;
 
 	pri = ke->ke_thread->td_priority / RQ_PPQ;
 	ke->ke_rqindex = pri;
 	runq_setbit(rq, pri);
 	rqh = &rq->rq_queues[pri];
 	CTR5(KTR_RUNQ, "runq_add: td=%p ke=%p pri=%d %d rqh=%p",
 	    ke->ke_thread, ke, ke->ke_thread->td_priority, pri, rqh);
 	TAILQ_INSERT_TAIL(rqh, ke, ke_procq);
 }
 
 /*
  * Return true if there are runnable processes of any priority on the run
  * queue, false otherwise.  Has no side effects, does not modify the run
  * queue structure.
  */
 int
 runq_check(struct runq *rq)
 {
 	struct rqbits *rqb;
 	int i;
 
 	rqb = &rq->rq_status;
 	for (i = 0; i < RQB_LEN; i++)
 		if (rqb->rqb_bits[i]) {
 			CTR2(KTR_RUNQ, "runq_check: bits=%#x i=%d",
 			    rqb->rqb_bits[i], i);
 			return (1);
 		}
 	CTR0(KTR_RUNQ, "runq_check: empty");
 
 	return (0);
 }
 
 #if defined(SMP) && defined(SCHED_4BSD)
 int runq_fuzz = 1;
 SYSCTL_DECL(_kern_sched);
 SYSCTL_INT(_kern_sched, OID_AUTO, runq_fuzz, CTLFLAG_RW, &runq_fuzz, 0, "");
 #endif
 
 /*
  * Find the highest priority process on the run queue.
  */
 struct kse *
 runq_choose(struct runq *rq)
 {
 	struct rqhead *rqh;
 	struct kse *ke;
 	int pri;
 
 	mtx_assert(&sched_lock, MA_OWNED);
 	while ((pri = runq_findbit(rq)) != -1) {
 		rqh = &rq->rq_queues[pri];
 #if defined(SMP) && defined(SCHED_4BSD)
 		/* fuzz == 1 is normal.. 0 or less are ignored */
 		if (runq_fuzz > 1) {
 			/*
 			 * In the first couple of entries, check if
 			 * there is one for our CPU as a preference.
 			 */
 			int count = runq_fuzz;
 			int cpu = PCPU_GET(cpuid);
 			struct kse *ke2;
 			ke2 = ke = TAILQ_FIRST(rqh);
 
 			while (count-- && ke2) {
 				if (ke->ke_thread->td_lastcpu == cpu) {
 					ke = ke2;
 					break;
 				}
 				ke2 = TAILQ_NEXT(ke2, ke_procq);
 			}
 		} else 
 #endif
 			ke = TAILQ_FIRST(rqh);
 		KASSERT(ke != NULL, ("runq_choose: no proc on busy queue"));
 		CTR3(KTR_RUNQ,
 		    "runq_choose: pri=%d kse=%p rqh=%p", pri, ke, rqh);
 		return (ke);
 	}
 	CTR1(KTR_RUNQ, "runq_choose: idleproc pri=%d", pri);
 
 	return (NULL);
 }
 
 /*
  * Remove the KSE from the queue specified by its priority, and clear the
  * corresponding status bit if the queue becomes empty.
  * Caller must set ke->ke_state afterwards.
  */
 void
 runq_remove(struct runq *rq, struct kse *ke)
 {
 	struct rqhead *rqh;
 	int pri;
 
 	KASSERT(ke->ke_proc->p_sflag & PS_INMEM,
 		("runq_remove: process swapped out"));
 	pri = ke->ke_rqindex;
 	rqh = &rq->rq_queues[pri];
 	CTR5(KTR_RUNQ, "runq_remove: td=%p, ke=%p pri=%d %d rqh=%p",
 	    ke->ke_thread, ke, ke->ke_thread->td_priority, pri, rqh);
 	KASSERT(ke != NULL, ("runq_remove: no proc on busy queue"));
 	TAILQ_REMOVE(rqh, ke, ke_procq);
 	if (TAILQ_EMPTY(rqh)) {
 		CTR0(KTR_RUNQ, "runq_remove: empty");
 		runq_clrbit(rq, pri);
 	}
 }
 
 #if 0
 void
 panc(char *string1, char *string2)
 {
 	printf("%s", string1);
 	kdb_enter(string2);
 }
 
 void
 thread_sanity_check(struct thread *td, char *string)
 {
 	struct proc *p;
 	struct ksegrp *kg;
 	struct kse *ke;
 	struct thread *td2 = NULL;
 	unsigned int prevpri;
 	int	saw_lastassigned = 0;
 	int unassigned = 0;
 	int assigned = 0;
 
 	p = td->td_proc;
 	kg = td->td_ksegrp;
 	ke = td->td_kse;
 
 
 	if (ke) {
 		if (p != ke->ke_proc) {
 			panc(string, "wrong proc");
 		}
 		if (ke->ke_thread != td) {
 			panc(string, "wrong thread");
 		}
 	}
 	
 	if ((p->p_flag & P_SA) == 0) {
 		if (ke == NULL) {
 			panc(string, "non KSE thread lost kse");
 		}
 	} else {
 		prevpri = 0;
 		saw_lastassigned = 0;
 		unassigned = 0;
 		assigned = 0;
 		TAILQ_FOREACH(td2, &kg->kg_runq, td_runq) {
 			if (td2->td_priority < prevpri) {
 				panc(string, "thread runqueue unosorted");
 			}
 			if ((td2->td_state == TDS_RUNQ) &&
 			    td2->td_kse &&
 			    (td2->td_kse->ke_state != KES_ONRUNQ)) {
 				panc(string, "KSE wrong state");
 			}
 			prevpri = td2->td_priority;
 			if (td2->td_kse) {
 				assigned++;
 				if (unassigned) {
 					panc(string, "unassigned before assigned");
 				}
  				if  (kg->kg_last_assigned == NULL) {
 					panc(string, "lastassigned corrupt");
 				}
 				if (saw_lastassigned) {
 					panc(string, "last assigned not last");
 				}
 				if (td2->td_kse->ke_thread != td2) {
 					panc(string, "mismatched kse/thread");
 				}
 			} else {
 				unassigned++;
 			}
 			if (td2 == kg->kg_last_assigned) {
 				saw_lastassigned = 1;
 				if (td2->td_kse == NULL) {
 					panc(string, "last assigned not assigned");
 				}
 			}
 		}
 		if (kg->kg_last_assigned && (saw_lastassigned == 0)) {
 			panc(string, "where on earth does lastassigned point?");
 		}
 #if 0
 		FOREACH_THREAD_IN_GROUP(kg, td2) {
 			if (((td2->td_flags & TDF_UNBOUND) == 0) && 
 			    (TD_ON_RUNQ(td2))) {
 				assigned++;
 				if (td2->td_kse == NULL) {
 					panc(string, "BOUND thread with no KSE");
 				}
 			}
 		}
 #endif
 #if 0
 		if ((unassigned + assigned) != kg->kg_runnable) {
 			panc(string, "wrong number in runnable");
 		}
 #endif
 	}
 	if (assigned == 12345) {
 		printf("%p %p %p %p %p %d, %d",
 		    td, td2, ke, kg, p, assigned, saw_lastassigned);
 	}
 }
 #endif
 
diff --git a/sys/kern/sched_ule.c b/sys/kern/sched_ule.c
index 5582a40f0d90..e235c1f9b752 100644
--- a/sys/kern/sched_ule.c
+++ b/sys/kern/sched_ule.c
@@ -1,1837 +1,1851 @@
 /*-
  * Copyright (c) 2002-2003, Jeffrey Roberson <jeff@freebsd.org>
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice unmodified, this list of conditions, and the following
  *    disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
+#include <opt_sched.h>
+
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/kdb.h>
 #include <sys/kernel.h>
 #include <sys/ktr.h>
 #include <sys/lock.h>
 #include <sys/mutex.h>
 #include <sys/proc.h>
 #include <sys/resource.h>
 #include <sys/resourcevar.h>
 #include <sys/sched.h>
 #include <sys/smp.h>
 #include <sys/sx.h>
 #include <sys/sysctl.h>
 #include <sys/sysproto.h>
 #include <sys/vmmeter.h>
 #ifdef KTRACE
 #include <sys/uio.h>
 #include <sys/ktrace.h>
 #endif
 
 #include <machine/cpu.h>
 #include <machine/smp.h>
 
 #define KTR_ULE	KTR_NFS
 
 /* decay 95% of `p_pctcpu' in 60 seconds; see CCPU_SHIFT before changing */
 /* XXX This is bogus compatability crap for ps */
 static fixpt_t  ccpu = 0.95122942450071400909 * FSCALE; /* exp(-1/20) */
 SYSCTL_INT(_kern, OID_AUTO, ccpu, CTLFLAG_RD, &ccpu, 0, "");
 
 static void sched_setup(void *dummy);
 SYSINIT(sched_setup, SI_SUB_RUN_QUEUE, SI_ORDER_FIRST, sched_setup, NULL)
 
 static SYSCTL_NODE(_kern, OID_AUTO, sched, CTLFLAG_RW, 0, "Scheduler");
 
 SYSCTL_STRING(_kern_sched, OID_AUTO, name, CTLFLAG_RD, "ule", 0,
     "Scheduler name");
 
 static int slice_min = 1;
 SYSCTL_INT(_kern_sched, OID_AUTO, slice_min, CTLFLAG_RW, &slice_min, 0, "");
 
 static int slice_max = 10;
 SYSCTL_INT(_kern_sched, OID_AUTO, slice_max, CTLFLAG_RW, &slice_max, 0, "");
 
 int realstathz;
 int tickincr = 1;
 
+#ifdef PREEMPTION
+static void
+printf_caddr_t(void *data)
+{
+	printf("%s", (char *)data);
+}
+static char preempt_warning[] =
+    "WARNING: Kernel PREEMPTION is unstable under SCHED_ULE.\n"; 
+SYSINIT(preempt_warning, SI_SUB_COPYRIGHT, SI_ORDER_ANY, printf_caddr_t,
+    preempt_warning)
+#endif
+
 /*
  * These datastructures are allocated within their parent datastructure but
  * are scheduler specific.
  */
 
 struct ke_sched {
 	int		ske_slice;
 	struct runq	*ske_runq;
 	/* The following variables are only used for pctcpu calculation */
 	int		ske_ltick;	/* Last tick that we were running on */
 	int		ske_ftick;	/* First tick that we were running on */
 	int		ske_ticks;	/* Tick count */
 	/* CPU that we have affinity for. */
 	u_char		ske_cpu;
 };
 #define	ke_slice	ke_sched->ske_slice
 #define	ke_runq		ke_sched->ske_runq
 #define	ke_ltick	ke_sched->ske_ltick
 #define	ke_ftick	ke_sched->ske_ftick
 #define	ke_ticks	ke_sched->ske_ticks
 #define	ke_cpu		ke_sched->ske_cpu
 #define	ke_assign	ke_procq.tqe_next
 
 #define	KEF_ASSIGNED	KEF_SCHED0	/* KSE is being migrated. */
 #define	KEF_BOUND	KEF_SCHED1	/* KSE can not migrate. */
 #define	KEF_XFERABLE	KEF_SCHED2	/* KSE was added as transferable. */
 #define	KEF_HOLD	KEF_SCHED3	/* KSE is temporarily bound. */
 
 struct kg_sched {
 	int	skg_slptime;		/* Number of ticks we vol. slept */
 	int	skg_runtime;		/* Number of ticks we were running */
 };
 #define	kg_slptime	kg_sched->skg_slptime
 #define	kg_runtime	kg_sched->skg_runtime
 
 struct td_sched {
 	int	std_slptime;
 };
 #define	td_slptime	td_sched->std_slptime
 
 struct td_sched td_sched;
 struct ke_sched ke_sched;
 struct kg_sched kg_sched;
 
 struct ke_sched *kse0_sched = &ke_sched;
 struct kg_sched *ksegrp0_sched = &kg_sched;
 struct p_sched *proc0_sched = NULL;
 struct td_sched *thread0_sched = &td_sched;
 
 /*
  * The priority is primarily determined by the interactivity score.  Thus, we
  * give lower(better) priorities to kse groups that use less CPU.  The nice
  * value is then directly added to this to allow nice to have some effect
  * on latency.
  *
  * PRI_RANGE:	Total priority range for timeshare threads.
  * PRI_NRESV:	Number of nice values.
  * PRI_BASE:	The start of the dynamic range.
  */
 #define	SCHED_PRI_RANGE		(PRI_MAX_TIMESHARE - PRI_MIN_TIMESHARE + 1)
 #define	SCHED_PRI_NRESV		((PRIO_MAX - PRIO_MIN) + 1)
 #define	SCHED_PRI_NHALF		(SCHED_PRI_NRESV / 2)
 #define	SCHED_PRI_BASE		(PRI_MIN_TIMESHARE)
 #define	SCHED_PRI_INTERACT(score)					\
     ((score) * SCHED_PRI_RANGE / SCHED_INTERACT_MAX)
 
 /*
  * These determine the interactivity of a process.
  *
  * SLP_RUN_MAX:	Maximum amount of sleep time + run time we'll accumulate
  *		before throttling back.
  * SLP_RUN_FORK:	Maximum slp+run time to inherit at fork time.
  * INTERACT_MAX:	Maximum interactivity value.  Smaller is better.
  * INTERACT_THRESH:	Threshhold for placement on the current runq.
  */
 #define	SCHED_SLP_RUN_MAX	((hz * 5) << 10)
 #define	SCHED_SLP_RUN_FORK	((hz / 2) << 10)
 #define	SCHED_INTERACT_MAX	(100)
 #define	SCHED_INTERACT_HALF	(SCHED_INTERACT_MAX / 2)
 #define	SCHED_INTERACT_THRESH	(30)
 
 /*
  * These parameters and macros determine the size of the time slice that is
  * granted to each thread.
  *
  * SLICE_MIN:	Minimum time slice granted, in units of ticks.
  * SLICE_MAX:	Maximum time slice granted.
  * SLICE_RANGE:	Range of available time slices scaled by hz.
  * SLICE_SCALE:	The number slices granted per val in the range of [0, max].
  * SLICE_NICE:  Determine the amount of slice granted to a scaled nice.
  * SLICE_NTHRESH:	The nice cutoff point for slice assignment.
  */
 #define	SCHED_SLICE_MIN			(slice_min)
 #define	SCHED_SLICE_MAX			(slice_max)
 #define	SCHED_SLICE_INTERACTIVE		(slice_max)
 #define	SCHED_SLICE_NTHRESH	(SCHED_PRI_NHALF - 1)
 #define	SCHED_SLICE_RANGE		(SCHED_SLICE_MAX - SCHED_SLICE_MIN + 1)
 #define	SCHED_SLICE_SCALE(val, max)	(((val) * SCHED_SLICE_RANGE) / (max))
 #define	SCHED_SLICE_NICE(nice)						\
     (SCHED_SLICE_MAX - SCHED_SLICE_SCALE((nice), SCHED_SLICE_NTHRESH))
 
 /*
  * This macro determines whether or not the kse belongs on the current or
  * next run queue.
  */
 #define	SCHED_INTERACTIVE(kg)						\
     (sched_interact_score(kg) < SCHED_INTERACT_THRESH)
 #define	SCHED_CURR(kg, ke)						\
     (ke->ke_thread->td_priority < kg->kg_user_pri ||			\
     SCHED_INTERACTIVE(kg))
 
 /*
  * Cpu percentage computation macros and defines.
  *
  * SCHED_CPU_TIME:	Number of seconds to average the cpu usage across.
  * SCHED_CPU_TICKS:	Number of hz ticks to average the cpu usage across.
  */
 
 #define	SCHED_CPU_TIME	10
 #define	SCHED_CPU_TICKS	(hz * SCHED_CPU_TIME)
 
 /*
  * kseq - per processor runqs and statistics.
  */
 struct kseq {
 	struct runq	ksq_idle;		/* Queue of IDLE threads. */
 	struct runq	ksq_timeshare[2];	/* Run queues for !IDLE. */
 	struct runq	*ksq_next;		/* Next timeshare queue. */
 	struct runq	*ksq_curr;		/* Current queue. */
 	int		ksq_load_timeshare;	/* Load for timeshare. */
 	int		ksq_load;		/* Aggregate load. */
 	short		ksq_nice[SCHED_PRI_NRESV]; /* KSEs in each nice bin. */
 	short		ksq_nicemin;		/* Least nice. */
 #ifdef SMP
 	int			ksq_transferable;
 	LIST_ENTRY(kseq)	ksq_siblings;	/* Next in kseq group. */
 	struct kseq_group	*ksq_group;	/* Our processor group. */
 	volatile struct kse	*ksq_assigned;	/* assigned by another CPU. */
 #else
 	int		ksq_sysload;		/* For loadavg, !ITHD load. */
 #endif
 };
 
 #ifdef SMP
 /*
  * kseq groups are groups of processors which can cheaply share threads.  When
  * one processor in the group goes idle it will check the runqs of the other
  * processors in its group prior to halting and waiting for an interrupt.
  * These groups are suitable for SMT (Symetric Multi-Threading) and not NUMA.
  * In a numa environment we'd want an idle bitmap per group and a two tiered
  * load balancer.
  */
 struct kseq_group {
 	int	ksg_cpus;		/* Count of CPUs in this kseq group. */
 	cpumask_t ksg_cpumask;		/* Mask of cpus in this group. */
 	cpumask_t ksg_idlemask;		/* Idle cpus in this group. */
 	cpumask_t ksg_mask;		/* Bit mask for first cpu. */
 	int	ksg_load;		/* Total load of this group. */
 	int	ksg_transferable;	/* Transferable load of this group. */
 	LIST_HEAD(, kseq)	ksg_members; /* Linked list of all members. */
 };
 #endif
 
 /*
  * One kse queue per processor.
  */
 #ifdef SMP
 static cpumask_t kseq_idle;
 static int ksg_maxid;
 static struct kseq	kseq_cpu[MAXCPU];
 static struct kseq_group kseq_groups[MAXCPU];
 static int bal_tick;
 static int gbal_tick;
 
 #define	KSEQ_SELF()	(&kseq_cpu[PCPU_GET(cpuid)])
 #define	KSEQ_CPU(x)	(&kseq_cpu[(x)])
 #define	KSEQ_ID(x)	((x) - kseq_cpu)
 #define	KSEQ_GROUP(x)	(&kseq_groups[(x)])
 #else	/* !SMP */
 static struct kseq	kseq_cpu;
 
 #define	KSEQ_SELF()	(&kseq_cpu)
 #define	KSEQ_CPU(x)	(&kseq_cpu)
 #endif
 
 static void sched_add_internal(struct thread *td, int preemptive);
 static void sched_slice(struct kse *ke);
 static void sched_priority(struct ksegrp *kg);
 static int sched_interact_score(struct ksegrp *kg);
 static void sched_interact_update(struct ksegrp *kg);
 static void sched_interact_fork(struct ksegrp *kg);
 static void sched_pctcpu_update(struct kse *ke);
 
 /* Operations on per processor queues */
 static struct kse * kseq_choose(struct kseq *kseq);
 static void kseq_setup(struct kseq *kseq);
 static void kseq_load_add(struct kseq *kseq, struct kse *ke);
 static void kseq_load_rem(struct kseq *kseq, struct kse *ke);
 static __inline void kseq_runq_add(struct kseq *kseq, struct kse *ke);
 static __inline void kseq_runq_rem(struct kseq *kseq, struct kse *ke);
 static void kseq_nice_add(struct kseq *kseq, int nice);
 static void kseq_nice_rem(struct kseq *kseq, int nice);
 void kseq_print(int cpu);
 #ifdef SMP
 static int kseq_transfer(struct kseq *ksq, struct kse *ke, int class);
 static struct kse *runq_steal(struct runq *rq);
 static void sched_balance(void);
 static void sched_balance_groups(void);
 static void sched_balance_group(struct kseq_group *ksg);
 static void sched_balance_pair(struct kseq *high, struct kseq *low);
 static void kseq_move(struct kseq *from, int cpu);
 static int kseq_idled(struct kseq *kseq);
 static void kseq_notify(struct kse *ke, int cpu);
 static void kseq_assign(struct kseq *);
 static struct kse *kseq_steal(struct kseq *kseq, int stealidle);
 /*
  * On P4 Xeons the round-robin interrupt delivery is broken.  As a result of
  * this, we can't pin interrupts to the cpu that they were delivered to, 
  * otherwise all ithreads only run on CPU 0.
  */
 #ifdef __i386__
 #define	KSE_CAN_MIGRATE(ke, class)					\
     ((ke)->ke_thread->td_pinned == 0 && ((ke)->ke_flags & KEF_BOUND) == 0)
 #else /* !__i386__ */
 #define	KSE_CAN_MIGRATE(ke, class)					\
     ((class) != PRI_ITHD && (ke)->ke_thread->td_pinned == 0 &&		\
     ((ke)->ke_flags & KEF_BOUND) == 0)
 #endif /* !__i386__ */
 #endif
 
 void
 kseq_print(int cpu)
 {
 	struct kseq *kseq;
 	int i;
 
 	kseq = KSEQ_CPU(cpu);
 
 	printf("kseq:\n");
 	printf("\tload:           %d\n", kseq->ksq_load);
 	printf("\tload TIMESHARE: %d\n", kseq->ksq_load_timeshare);
 #ifdef SMP
 	printf("\tload transferable: %d\n", kseq->ksq_transferable);
 #endif
 	printf("\tnicemin:\t%d\n", kseq->ksq_nicemin);
 	printf("\tnice counts:\n");
 	for (i = 0; i < SCHED_PRI_NRESV; i++)
 		if (kseq->ksq_nice[i])
 			printf("\t\t%d = %d\n",
 			    i - SCHED_PRI_NHALF, kseq->ksq_nice[i]);
 }
 
 static __inline void
 kseq_runq_add(struct kseq *kseq, struct kse *ke)
 {
 #ifdef SMP
 	if (KSE_CAN_MIGRATE(ke, PRI_BASE(ke->ke_ksegrp->kg_pri_class))) {
 		kseq->ksq_transferable++;
 		kseq->ksq_group->ksg_transferable++;
 		ke->ke_flags |= KEF_XFERABLE;
 	}
 #endif
 	runq_add(ke->ke_runq, ke);
 }
 
 static __inline void
 kseq_runq_rem(struct kseq *kseq, struct kse *ke)
 {
 #ifdef SMP
 	if (ke->ke_flags & KEF_XFERABLE) {
 		kseq->ksq_transferable--;
 		kseq->ksq_group->ksg_transferable--;
 		ke->ke_flags &= ~KEF_XFERABLE;
 	}
 #endif
 	runq_remove(ke->ke_runq, ke);
 }
 
 static void
 kseq_load_add(struct kseq *kseq, struct kse *ke)
 {
 	int class;
 	mtx_assert(&sched_lock, MA_OWNED);
 	class = PRI_BASE(ke->ke_ksegrp->kg_pri_class);
 	if (class == PRI_TIMESHARE)
 		kseq->ksq_load_timeshare++;
 	kseq->ksq_load++;
 	if (class != PRI_ITHD && (ke->ke_proc->p_flag & P_NOLOAD) == 0)
 #ifdef SMP
 		kseq->ksq_group->ksg_load++;
 #else
 		kseq->ksq_sysload++;
 #endif
 	if (ke->ke_ksegrp->kg_pri_class == PRI_TIMESHARE)
 		CTR6(KTR_ULE,
 		    "Add kse %p to %p (slice: %d, pri: %d, nice: %d(%d))",
 		    ke, ke->ke_runq, ke->ke_slice, ke->ke_thread->td_priority,
 		    ke->ke_proc->p_nice, kseq->ksq_nicemin);
 	if (ke->ke_ksegrp->kg_pri_class == PRI_TIMESHARE)
 		kseq_nice_add(kseq, ke->ke_proc->p_nice);
 }
 
 static void
 kseq_load_rem(struct kseq *kseq, struct kse *ke)
 {
 	int class;
 	mtx_assert(&sched_lock, MA_OWNED);
 	class = PRI_BASE(ke->ke_ksegrp->kg_pri_class);
 	if (class == PRI_TIMESHARE)
 		kseq->ksq_load_timeshare--;
 	if (class != PRI_ITHD  && (ke->ke_proc->p_flag & P_NOLOAD) == 0)
 #ifdef SMP
 		kseq->ksq_group->ksg_load--;
 #else
 		kseq->ksq_sysload--;
 #endif
 	kseq->ksq_load--;
 	ke->ke_runq = NULL;
 	if (ke->ke_ksegrp->kg_pri_class == PRI_TIMESHARE)
 		kseq_nice_rem(kseq, ke->ke_proc->p_nice);
 }
 
 static void
 kseq_nice_add(struct kseq *kseq, int nice)
 {
 	mtx_assert(&sched_lock, MA_OWNED);
 	/* Normalize to zero. */
 	kseq->ksq_nice[nice + SCHED_PRI_NHALF]++;
 	if (nice < kseq->ksq_nicemin || kseq->ksq_load_timeshare == 1)
 		kseq->ksq_nicemin = nice;
 }
 
 static void
 kseq_nice_rem(struct kseq *kseq, int nice) 
 {
 	int n;
 
 	mtx_assert(&sched_lock, MA_OWNED);
 	/* Normalize to zero. */
 	n = nice + SCHED_PRI_NHALF;
 	kseq->ksq_nice[n]--;
 	KASSERT(kseq->ksq_nice[n] >= 0, ("Negative nice count."));
 
 	/*
 	 * If this wasn't the smallest nice value or there are more in
 	 * this bucket we can just return.  Otherwise we have to recalculate
 	 * the smallest nice.
 	 */
 	if (nice != kseq->ksq_nicemin ||
 	    kseq->ksq_nice[n] != 0 ||
 	    kseq->ksq_load_timeshare == 0)
 		return;
 
 	for (; n < SCHED_PRI_NRESV; n++)
 		if (kseq->ksq_nice[n]) {
 			kseq->ksq_nicemin = n - SCHED_PRI_NHALF;
 			return;
 		}
 }
 
 #ifdef SMP
 /*
  * sched_balance is a simple CPU load balancing algorithm.  It operates by
  * finding the least loaded and most loaded cpu and equalizing their load
  * by migrating some processes.
  *
  * Dealing only with two CPUs at a time has two advantages.  Firstly, most
  * installations will only have 2 cpus.  Secondly, load balancing too much at
  * once can have an unpleasant effect on the system.  The scheduler rarely has
  * enough information to make perfect decisions.  So this algorithm chooses
  * algorithm simplicity and more gradual effects on load in larger systems.
  *
  * It could be improved by considering the priorities and slices assigned to
  * each task prior to balancing them.  There are many pathological cases with
  * any approach and so the semi random algorithm below may work as well as any.
  *
  */
 static void
 sched_balance(void)
 {
 	struct kseq_group *high;
 	struct kseq_group *low;
 	struct kseq_group *ksg;
 	int cnt;
 	int i;
 
 	if (smp_started == 0)
 		goto out;
 	low = high = NULL;
 	i = random() % (ksg_maxid + 1);
 	for (cnt = 0; cnt <= ksg_maxid; cnt++) {
 		ksg = KSEQ_GROUP(i);
 		/*
 		 * Find the CPU with the highest load that has some
 		 * threads to transfer.
 		 */
 		if ((high == NULL || ksg->ksg_load > high->ksg_load)
 		    && ksg->ksg_transferable)
 			high = ksg;
 		if (low == NULL || ksg->ksg_load < low->ksg_load)
 			low = ksg;
 		if (++i > ksg_maxid)
 			i = 0;
 	}
 	if (low != NULL && high != NULL && high != low)
 		sched_balance_pair(LIST_FIRST(&high->ksg_members),
 		    LIST_FIRST(&low->ksg_members));
 out:
 	bal_tick = ticks + (random() % (hz * 2));
 }
 
 static void
 sched_balance_groups(void)
 {
 	int i;
 
 	mtx_assert(&sched_lock, MA_OWNED);
 	if (smp_started)
 		for (i = 0; i <= ksg_maxid; i++)
 			sched_balance_group(KSEQ_GROUP(i));
 	gbal_tick = ticks + (random() % (hz * 2));
 }
 
 static void
 sched_balance_group(struct kseq_group *ksg)
 {
 	struct kseq *kseq;
 	struct kseq *high;
 	struct kseq *low;
 	int load;
 
 	if (ksg->ksg_transferable == 0)
 		return;
 	low = NULL;
 	high = NULL;
 	LIST_FOREACH(kseq, &ksg->ksg_members, ksq_siblings) {
 		load = kseq->ksq_load;
 		if (high == NULL || load > high->ksq_load)
 			high = kseq;
 		if (low == NULL || load < low->ksq_load)
 			low = kseq;
 	}
 	if (high != NULL && low != NULL && high != low)
 		sched_balance_pair(high, low);
 }
 
 static void
 sched_balance_pair(struct kseq *high, struct kseq *low)
 {
 	int transferable;
 	int high_load;
 	int low_load;
 	int move;
 	int diff;
 	int i;
 
 	/*
 	 * If we're transfering within a group we have to use this specific
 	 * kseq's transferable count, otherwise we can steal from other members
 	 * of the group.
 	 */
 	if (high->ksq_group == low->ksq_group) {
 		transferable = high->ksq_transferable;
 		high_load = high->ksq_load;
 		low_load = low->ksq_load;
 	} else {
 		transferable = high->ksq_group->ksg_transferable;
 		high_load = high->ksq_group->ksg_load;
 		low_load = low->ksq_group->ksg_load;
 	}
 	if (transferable == 0)
 		return;
 	/*
 	 * Determine what the imbalance is and then adjust that to how many
 	 * kses we actually have to give up (transferable).
 	 */
 	diff = high_load - low_load;
 	move = diff / 2;
 	if (diff & 0x1)
 		move++;
 	move = min(move, transferable);
 	for (i = 0; i < move; i++)
 		kseq_move(high, KSEQ_ID(low));
 	return;
 }
 
 static void
 kseq_move(struct kseq *from, int cpu)
 {
 	struct kseq *kseq;
 	struct kseq *to;
 	struct kse *ke;
 
 	kseq = from;
 	to = KSEQ_CPU(cpu);
 	ke = kseq_steal(kseq, 1);
 	if (ke == NULL) {
 		struct kseq_group *ksg;
 
 		ksg = kseq->ksq_group;
 		LIST_FOREACH(kseq, &ksg->ksg_members, ksq_siblings) {
 			if (kseq == from || kseq->ksq_transferable == 0)
 				continue;
 			ke = kseq_steal(kseq, 1);
 			break;
 		}
 		if (ke == NULL)
 			panic("kseq_move: No KSEs available with a "
 			    "transferable count of %d\n", 
 			    ksg->ksg_transferable);
 	}
 	if (kseq == to)
 		return;
 	ke->ke_state = KES_THREAD;
 	kseq_runq_rem(kseq, ke);
 	kseq_load_rem(kseq, ke);
 	kseq_notify(ke, cpu);
 }
 
 static int
 kseq_idled(struct kseq *kseq)
 {
 	struct kseq_group *ksg;
 	struct kseq *steal;
 	struct kse *ke;
 
 	ksg = kseq->ksq_group;
 	/*
 	 * If we're in a cpu group, try and steal kses from another cpu in
 	 * the group before idling.
 	 */
 	if (ksg->ksg_cpus > 1 && ksg->ksg_transferable) {
 		LIST_FOREACH(steal, &ksg->ksg_members, ksq_siblings) {
 			if (steal == kseq || steal->ksq_transferable == 0)
 				continue;
 			ke = kseq_steal(steal, 0);
 			if (ke == NULL)
 				continue;
 			ke->ke_state = KES_THREAD;
 			kseq_runq_rem(steal, ke);
 			kseq_load_rem(steal, ke);
 			ke->ke_cpu = PCPU_GET(cpuid);
 			sched_add_internal(ke->ke_thread, 0);
 			return (0);
 		}
 	}
 	/*
 	 * We only set the idled bit when all of the cpus in the group are
 	 * idle.  Otherwise we could get into a situation where a KSE bounces
 	 * back and forth between two idle cores on seperate physical CPUs.
 	 */
 	ksg->ksg_idlemask |= PCPU_GET(cpumask);
 	if (ksg->ksg_idlemask != ksg->ksg_cpumask)
 		return (1);
 	atomic_set_int(&kseq_idle, ksg->ksg_mask);
 	return (1);
 }
 
 static void
 kseq_assign(struct kseq *kseq)
 {
 	struct kse *nke;
 	struct kse *ke;
 
 	do {
 		*(volatile struct kse **)&ke = kseq->ksq_assigned;
 	} while(!atomic_cmpset_ptr(&kseq->ksq_assigned, ke, NULL));
 	for (; ke != NULL; ke = nke) {
 		nke = ke->ke_assign;
 		ke->ke_flags &= ~KEF_ASSIGNED;
 		sched_add_internal(ke->ke_thread, 0);
 	}
 }
 
 static void
 kseq_notify(struct kse *ke, int cpu)
 {
 	struct kseq *kseq;
 	struct thread *td;
 	struct pcpu *pcpu;
 	int prio;
 
 	ke->ke_cpu = cpu;
 	ke->ke_flags |= KEF_ASSIGNED;
 	prio = ke->ke_thread->td_priority;
 
 	kseq = KSEQ_CPU(cpu);
 
 	/*
 	 * Place a KSE on another cpu's queue and force a resched.
 	 */
 	do {
 		*(volatile struct kse **)&ke->ke_assign = kseq->ksq_assigned;
 	} while(!atomic_cmpset_ptr(&kseq->ksq_assigned, ke->ke_assign, ke));
 	/*
 	 * Without sched_lock we could lose a race where we set NEEDRESCHED
 	 * on a thread that is switched out before the IPI is delivered.  This
 	 * would lead us to miss the resched.  This will be a problem once
 	 * sched_lock is pushed down.
 	 */
 	pcpu = pcpu_find(cpu);
 	td = pcpu->pc_curthread;
 	if (ke->ke_thread->td_priority < td->td_priority ||
 	    td == pcpu->pc_idlethread) {
 		td->td_flags |= TDF_NEEDRESCHED;
 		ipi_selected(1 << cpu, IPI_AST);
 	}
 }
 
 static struct kse *
 runq_steal(struct runq *rq)
 {
 	struct rqhead *rqh;
 	struct rqbits *rqb;
 	struct kse *ke;
 	int word;
 	int bit;
 
 	mtx_assert(&sched_lock, MA_OWNED);
 	rqb = &rq->rq_status;
 	for (word = 0; word < RQB_LEN; word++) {
 		if (rqb->rqb_bits[word] == 0)
 			continue;
 		for (bit = 0; bit < RQB_BPW; bit++) {
 			if ((rqb->rqb_bits[word] & (1ul << bit)) == 0)
 				continue;
 			rqh = &rq->rq_queues[bit + (word << RQB_L2BPW)];
 			TAILQ_FOREACH(ke, rqh, ke_procq) {
 				if (KSE_CAN_MIGRATE(ke,
 				    PRI_BASE(ke->ke_ksegrp->kg_pri_class)))
 					return (ke);
 			}
 		}
 	}
 	return (NULL);
 }
 
 static struct kse *
 kseq_steal(struct kseq *kseq, int stealidle)
 {
 	struct kse *ke;
 
 	/*
 	 * Steal from next first to try to get a non-interactive task that
 	 * may not have run for a while.
 	 */
 	if ((ke = runq_steal(kseq->ksq_next)) != NULL)
 		return (ke);
 	if ((ke = runq_steal(kseq->ksq_curr)) != NULL)
 		return (ke);
 	if (stealidle)
 		return (runq_steal(&kseq->ksq_idle));
 	return (NULL);
 }
 
 int
 kseq_transfer(struct kseq *kseq, struct kse *ke, int class)
 {
 	struct kseq_group *ksg;
 	int cpu;
 
 	if (smp_started == 0)
 		return (0);
 	cpu = 0;
 	/*
 	 * If our load exceeds a certain threshold we should attempt to
 	 * reassign this thread.  The first candidate is the cpu that
 	 * originally ran the thread.  If it is idle, assign it there, 
 	 * otherwise, pick an idle cpu.
 	 *
 	 * The threshold at which we start to reassign kses has a large impact
 	 * on the overall performance of the system.  Tuned too high and
 	 * some CPUs may idle.  Too low and there will be excess migration
 	 * and context switches.
 	 */
 	ksg = kseq->ksq_group;
 	if (ksg->ksg_load > ksg->ksg_cpus && kseq_idle) {
 		ksg = KSEQ_CPU(ke->ke_cpu)->ksq_group;
 		if (kseq_idle & ksg->ksg_mask) {
 			cpu = ffs(ksg->ksg_idlemask);
 			if (cpu)
 				goto migrate;
 		}
 		/*
 		 * Multiple cpus could find this bit simultaneously
 		 * but the race shouldn't be terrible.
 		 */
 		cpu = ffs(kseq_idle);
 		if (cpu)
 			goto migrate;
 	}
 	/*
 	 * If another cpu in this group has idled, assign a thread over
 	 * to them after checking to see if there are idled groups.
 	 */
 	ksg = kseq->ksq_group;
 	if (ksg->ksg_idlemask) {
 		cpu = ffs(ksg->ksg_idlemask);
 		if (cpu)
 			goto migrate;
 	}
 	/*
 	 * No new CPU was found.
 	 */
 	return (0);
 migrate:
 	/*
 	 * Now that we've found an idle CPU, migrate the thread.
 	 */
 	cpu--;
 	ke->ke_runq = NULL;
 	kseq_notify(ke, cpu);
 
 	return (1);
 }
 
 #endif	/* SMP */
 
 /*
  * Pick the highest priority task we have and return it.
  */
 
 static struct kse *
 kseq_choose(struct kseq *kseq)
 {
 	struct kse *ke;
 	struct runq *swap;
 
 	mtx_assert(&sched_lock, MA_OWNED);
 	swap = NULL;
 
 	for (;;) {
 		ke = runq_choose(kseq->ksq_curr);
 		if (ke == NULL) {
 			/*
 			 * We already swapped once and didn't get anywhere.
 			 */
 			if (swap)
 				break;
 			swap = kseq->ksq_curr;
 			kseq->ksq_curr = kseq->ksq_next;
 			kseq->ksq_next = swap;
 			continue;
 		}
 		/*
 		 * If we encounter a slice of 0 the kse is in a
 		 * TIMESHARE kse group and its nice was too far out
 		 * of the range that receives slices. 
 		 */
 		if (ke->ke_slice == 0) {
 			runq_remove(ke->ke_runq, ke);
 			sched_slice(ke);
 			ke->ke_runq = kseq->ksq_next;
 			runq_add(ke->ke_runq, ke);
 			continue;
 		}
 		return (ke);
 	}
 
 	return (runq_choose(&kseq->ksq_idle));
 }
 
 static void
 kseq_setup(struct kseq *kseq)
 {
 	runq_init(&kseq->ksq_timeshare[0]);
 	runq_init(&kseq->ksq_timeshare[1]);
 	runq_init(&kseq->ksq_idle);
 	kseq->ksq_curr = &kseq->ksq_timeshare[0];
 	kseq->ksq_next = &kseq->ksq_timeshare[1];
 	kseq->ksq_load = 0;
 	kseq->ksq_load_timeshare = 0;
 }
 
 static void
 sched_setup(void *dummy)
 {
 #ifdef SMP
 	int balance_groups;
 	int i;
 #endif
 
 	slice_min = (hz/100);	/* 10ms */
 	slice_max = (hz/7);	/* ~140ms */
 
 #ifdef SMP
 	balance_groups = 0;
 	/*
 	 * Initialize the kseqs.
 	 */
 	for (i = 0; i < MAXCPU; i++) {
 		struct kseq *ksq;
 
 		ksq = &kseq_cpu[i];
 		ksq->ksq_assigned = NULL;
 		kseq_setup(&kseq_cpu[i]);
 	}
 	if (smp_topology == NULL) {
 		struct kseq_group *ksg;
 		struct kseq *ksq;
 
 		for (i = 0; i < MAXCPU; i++) {
 			ksq = &kseq_cpu[i];
 			ksg = &kseq_groups[i];
 			/*
 			 * Setup a kseq group with one member.
 			 */
 			ksq->ksq_transferable = 0;
 			ksq->ksq_group = ksg;
 			ksg->ksg_cpus = 1;
 			ksg->ksg_idlemask = 0;
 			ksg->ksg_cpumask = ksg->ksg_mask = 1 << i;
 			ksg->ksg_load = 0;
 			ksg->ksg_transferable = 0;
 			LIST_INIT(&ksg->ksg_members);
 			LIST_INSERT_HEAD(&ksg->ksg_members, ksq, ksq_siblings);
 		}
 	} else {
 		struct kseq_group *ksg;
 		struct cpu_group *cg;
 		int j;
 
 		for (i = 0; i < smp_topology->ct_count; i++) {
 			cg = &smp_topology->ct_group[i];
 			ksg = &kseq_groups[i];
 			/*
 			 * Initialize the group.
 			 */
 			ksg->ksg_idlemask = 0;
 			ksg->ksg_load = 0;
 			ksg->ksg_transferable = 0;
 			ksg->ksg_cpus = cg->cg_count;
 			ksg->ksg_cpumask = cg->cg_mask;
 			LIST_INIT(&ksg->ksg_members);
 			/*
 			 * Find all of the group members and add them.
 			 */
 			for (j = 0; j < MAXCPU; j++) {
 				if ((cg->cg_mask & (1 << j)) != 0) {
 					if (ksg->ksg_mask == 0)
 						ksg->ksg_mask = 1 << j;
 					kseq_cpu[j].ksq_transferable = 0;
 					kseq_cpu[j].ksq_group = ksg;
 					LIST_INSERT_HEAD(&ksg->ksg_members,
 					    &kseq_cpu[j], ksq_siblings);
 				}
 			}
 			if (ksg->ksg_cpus > 1)
 				balance_groups = 1;
 		}
 		ksg_maxid = smp_topology->ct_count - 1;
 	}
 	/*
 	 * Stagger the group and global load balancer so they do not
 	 * interfere with each other.
 	 */
 	bal_tick = ticks + hz;
 	if (balance_groups)
 		gbal_tick = ticks + (hz / 2);
 #else
 	kseq_setup(KSEQ_SELF());
 #endif
 	mtx_lock_spin(&sched_lock);
 	kseq_load_add(KSEQ_SELF(), &kse0);
 	mtx_unlock_spin(&sched_lock);
 }
 
 /*
  * Scale the scheduling priority according to the "interactivity" of this
  * process.
  */
 static void
 sched_priority(struct ksegrp *kg)
 {
 	int pri;
 
 	if (kg->kg_pri_class != PRI_TIMESHARE)
 		return;
 
 	pri = SCHED_PRI_INTERACT(sched_interact_score(kg));
 	pri += SCHED_PRI_BASE;
 	pri += kg->kg_proc->p_nice;
 
 	if (pri > PRI_MAX_TIMESHARE)
 		pri = PRI_MAX_TIMESHARE;
 	else if (pri < PRI_MIN_TIMESHARE)
 		pri = PRI_MIN_TIMESHARE;
 
 	kg->kg_user_pri = pri;
 
 	return;
 }
 
 /*
  * Calculate a time slice based on the properties of the kseg and the runq
  * that we're on.  This is only for PRI_TIMESHARE ksegrps.
  */
 static void
 sched_slice(struct kse *ke)
 {
 	struct kseq *kseq;
 	struct ksegrp *kg;
 
 	kg = ke->ke_ksegrp;
 	kseq = KSEQ_CPU(ke->ke_cpu);
 
 	/*
 	 * Rationale:
 	 * KSEs in interactive ksegs get a minimal slice so that we
 	 * quickly notice if it abuses its advantage.
 	 *
 	 * KSEs in non-interactive ksegs are assigned a slice that is
 	 * based on the ksegs nice value relative to the least nice kseg
 	 * on the run queue for this cpu.
 	 *
 	 * If the KSE is less nice than all others it gets the maximum
 	 * slice and other KSEs will adjust their slice relative to
 	 * this when they first expire.
 	 *
 	 * There is 20 point window that starts relative to the least
 	 * nice kse on the run queue.  Slice size is determined by
 	 * the kse distance from the last nice ksegrp.
 	 *
 	 * If the kse is outside of the window it will get no slice
 	 * and will be reevaluated each time it is selected on the
 	 * run queue.  The exception to this is nice 0 ksegs when
 	 * a nice -20 is running.  They are always granted a minimum
 	 * slice.
 	 */
 	if (!SCHED_INTERACTIVE(kg)) {
 		int nice;
 
 		nice = kg->kg_proc->p_nice + (0 - kseq->ksq_nicemin);
 		if (kseq->ksq_load_timeshare == 0 ||
 		    kg->kg_proc->p_nice < kseq->ksq_nicemin)
 			ke->ke_slice = SCHED_SLICE_MAX;
 		else if (nice <= SCHED_SLICE_NTHRESH)
 			ke->ke_slice = SCHED_SLICE_NICE(nice);
 		else if (kg->kg_proc->p_nice == 0)
 			ke->ke_slice = SCHED_SLICE_MIN;
 		else
 			ke->ke_slice = 0;
 	} else
 		ke->ke_slice = SCHED_SLICE_INTERACTIVE;
 
 	CTR6(KTR_ULE,
 	    "Sliced %p(%d) (nice: %d, nicemin: %d, load: %d, interactive: %d)",
 	    ke, ke->ke_slice, kg->kg_proc->p_nice, kseq->ksq_nicemin,
 	    kseq->ksq_load_timeshare, SCHED_INTERACTIVE(kg));
 
 	return;
 }
 
 /*
  * This routine enforces a maximum limit on the amount of scheduling history
  * kept.  It is called after either the slptime or runtime is adjusted.
  * This routine will not operate correctly when slp or run times have been
  * adjusted to more than double their maximum.
  */
 static void
 sched_interact_update(struct ksegrp *kg)
 {
 	int sum;
 
 	sum = kg->kg_runtime + kg->kg_slptime;
 	if (sum < SCHED_SLP_RUN_MAX)
 		return;
 	/*
 	 * If we have exceeded by more than 1/5th then the algorithm below
 	 * will not bring us back into range.  Dividing by two here forces
 	 * us into the range of [4/5 * SCHED_INTERACT_MAX, SCHED_INTERACT_MAX]
 	 */
 	if (sum > (SCHED_SLP_RUN_MAX / 5) * 6) {
 		kg->kg_runtime /= 2;
 		kg->kg_slptime /= 2;
 		return;
 	}
 	kg->kg_runtime = (kg->kg_runtime / 5) * 4;
 	kg->kg_slptime = (kg->kg_slptime / 5) * 4;
 }
 
 static void
 sched_interact_fork(struct ksegrp *kg)
 {
 	int ratio;
 	int sum;
 
 	sum = kg->kg_runtime + kg->kg_slptime;
 	if (sum > SCHED_SLP_RUN_FORK) {
 		ratio = sum / SCHED_SLP_RUN_FORK;
 		kg->kg_runtime /= ratio;
 		kg->kg_slptime /= ratio;
 	}
 }
 
 static int
 sched_interact_score(struct ksegrp *kg)
 {
 	int div;
 
 	if (kg->kg_runtime > kg->kg_slptime) {
 		div = max(1, kg->kg_runtime / SCHED_INTERACT_HALF);
 		return (SCHED_INTERACT_HALF +
 		    (SCHED_INTERACT_HALF - (kg->kg_slptime / div)));
 	} if (kg->kg_slptime > kg->kg_runtime) {
 		div = max(1, kg->kg_slptime / SCHED_INTERACT_HALF);
 		return (kg->kg_runtime / div);
 	}
 
 	/*
 	 * This can happen if slptime and runtime are 0.
 	 */
 	return (0);
 
 }
 
 /*
  * This is only somewhat accurate since given many processes of the same
  * priority they will switch when their slices run out, which will be
  * at most SCHED_SLICE_MAX.
  */
 int
 sched_rr_interval(void)
 {
 	return (SCHED_SLICE_MAX);
 }
 
 static void
 sched_pctcpu_update(struct kse *ke)
 {
 	/*
 	 * Adjust counters and watermark for pctcpu calc.
 	 */
 	if (ke->ke_ltick > ticks - SCHED_CPU_TICKS) {
 		/*
 		 * Shift the tick count out so that the divide doesn't
 		 * round away our results.
 		 */
 		ke->ke_ticks <<= 10;
 		ke->ke_ticks = (ke->ke_ticks / (ticks - ke->ke_ftick)) *
 			    SCHED_CPU_TICKS;
 		ke->ke_ticks >>= 10;
 	} else
 		ke->ke_ticks = 0;
 	ke->ke_ltick = ticks;
 	ke->ke_ftick = ke->ke_ltick - SCHED_CPU_TICKS;
 }
 
 void
 sched_prio(struct thread *td, u_char prio)
 {
 	struct kse *ke;
 
 	ke = td->td_kse;
 	mtx_assert(&sched_lock, MA_OWNED);
 	if (TD_ON_RUNQ(td)) {
 		/*
 		 * If the priority has been elevated due to priority
 		 * propagation, we may have to move ourselves to a new
 		 * queue.  We still call adjustrunqueue below in case kse
 		 * needs to fix things up.
 		 */
 		if (prio < td->td_priority && ke &&
 		    (ke->ke_flags & KEF_ASSIGNED) == 0 &&
 		    ke->ke_runq != KSEQ_CPU(ke->ke_cpu)->ksq_curr) {
 			runq_remove(ke->ke_runq, ke);
 			ke->ke_runq = KSEQ_CPU(ke->ke_cpu)->ksq_curr;
 			runq_add(ke->ke_runq, ke);
 		}
 		/*
 		 * Hold this kse on this cpu so that sched_prio() doesn't
 		 * cause excessive migration.  We only want migration to
 		 * happen as the result of a wakeup.
 		 */
 		ke->ke_flags |= KEF_HOLD;
 		adjustrunqueue(td, prio);
 	} else
 		td->td_priority = prio;
 }
 
 void
 sched_switch(struct thread *td, struct thread *newtd)
 {
 	struct kse *ke;
 
 	mtx_assert(&sched_lock, MA_OWNED);
 
 	ke = td->td_kse;
 
 	td->td_last_kse = ke;
 	td->td_lastcpu = td->td_oncpu;
 	td->td_oncpu = NOCPU;
 	td->td_flags &= ~TDF_NEEDRESCHED;
 	td->td_pflags &= ~TDP_OWEPREEMPT;
 
 	/*
 	 * If the KSE has been assigned it may be in the process of switching
 	 * to the new cpu.  This is the case in sched_bind().
 	 */
 	if ((ke->ke_flags & KEF_ASSIGNED) == 0) {
 		if (td == PCPU_GET(idlethread)) {
 			TD_SET_CAN_RUN(td);
 		} else if (TD_IS_RUNNING(td)) {
 			kseq_load_rem(KSEQ_CPU(ke->ke_cpu), ke);
 			/*
 			 * Don't allow the kse to migrate from a preemption.
 			 */
 			ke->ke_flags |= KEF_HOLD;
 			setrunqueue(td, SRQ_OURSELF|SRQ_YIELDING);
 		} else {
 			if (ke->ke_runq) {
 				kseq_load_rem(KSEQ_CPU(ke->ke_cpu), ke);
 			} else if ((td->td_flags & TDF_IDLETD) == 0)
 				kdb_backtrace();
 			/*
 			 * We will not be on the run queue. So we must be
 			 * sleeping or similar.
 			 */
 			if (td->td_proc->p_flag & P_SA)
 				kse_reassign(ke);
 		}
 	}
 	if (newtd != NULL)
 		kseq_load_add(KSEQ_SELF(), newtd->td_kse);
 	else
 		newtd = choosethread();
 	if (td != newtd)
 		cpu_switch(td, newtd);
 	sched_lock.mtx_lock = (uintptr_t)td;
 
 	td->td_oncpu = PCPU_GET(cpuid);
 }
 
 void
 sched_nice(struct proc *p, int nice)
 {
 	struct ksegrp *kg;
 	struct kse *ke;
 	struct thread *td;
 	struct kseq *kseq;
 
 	PROC_LOCK_ASSERT(p, MA_OWNED);
 	mtx_assert(&sched_lock, MA_OWNED);
 	/*
 	 * We need to adjust the nice counts for running KSEs.
 	 */
 	FOREACH_KSEGRP_IN_PROC(p, kg) {
 		if (kg->kg_pri_class == PRI_TIMESHARE) {
 			FOREACH_KSE_IN_GROUP(kg, ke) {
 				if (ke->ke_runq == NULL)
 					continue;
 				kseq = KSEQ_CPU(ke->ke_cpu);
 				kseq_nice_rem(kseq, p->p_nice);
 				kseq_nice_add(kseq, nice);
 			}
 		}
 	}
 	p->p_nice = nice;
 	FOREACH_KSEGRP_IN_PROC(p, kg) {
 		sched_priority(kg);
 		FOREACH_THREAD_IN_GROUP(kg, td)
 			td->td_flags |= TDF_NEEDRESCHED;
 	}
 }
 
 void
 sched_sleep(struct thread *td)
 {
 	mtx_assert(&sched_lock, MA_OWNED);
 
 	td->td_slptime = ticks;
 	td->td_base_pri = td->td_priority;
 
 	CTR2(KTR_ULE, "sleep kse %p (tick: %d)",
 	    td->td_kse, td->td_slptime);
 }
 
 void
 sched_wakeup(struct thread *td)
 {
 	mtx_assert(&sched_lock, MA_OWNED);
 
 	/*
 	 * Let the kseg know how long we slept for.  This is because process
 	 * interactivity behavior is modeled in the kseg.
 	 */
 	if (td->td_slptime) {
 		struct ksegrp *kg;
 		int hzticks;
 
 		kg = td->td_ksegrp;
 		hzticks = (ticks - td->td_slptime) << 10;
 		if (hzticks >= SCHED_SLP_RUN_MAX) {
 			kg->kg_slptime = SCHED_SLP_RUN_MAX;
 			kg->kg_runtime = 1;
 		} else {
 			kg->kg_slptime += hzticks;
 			sched_interact_update(kg);
 		}
 		sched_priority(kg);
 		if (td->td_kse)
 			sched_slice(td->td_kse);
 		CTR2(KTR_ULE, "wakeup kse %p (%d ticks)",
 		    td->td_kse, hzticks);
 		td->td_slptime = 0;
 	}
 	setrunqueue(td, SRQ_BORING);
 }
 
 /*
  * Penalize the parent for creating a new child and initialize the child's
  * priority.
  */
 void
 sched_fork(struct thread *td, struct proc *p1)
 {
 
 	mtx_assert(&sched_lock, MA_OWNED);
 
 	p1->p_nice = td->td_proc->p_nice;
 	sched_fork_ksegrp(td, FIRST_KSEGRP_IN_PROC(p1));
 	sched_fork_kse(td, FIRST_KSE_IN_PROC(p1));
 	sched_fork_thread(td, FIRST_THREAD_IN_PROC(p1));
 }
 
 void
 sched_fork_kse(struct thread *td, struct kse *child)
 {
 	struct kse *ke = td->td_kse;
 
 	child->ke_slice = 1;	/* Attempt to quickly learn interactivity. */
 	child->ke_cpu = ke->ke_cpu;
 	child->ke_runq = NULL;
 
 	/* Grab our parents cpu estimation information. */
 	child->ke_ticks = ke->ke_ticks;
 	child->ke_ltick = ke->ke_ltick;
 	child->ke_ftick = ke->ke_ftick;
 }
 
 void
 sched_fork_ksegrp(struct thread *td, struct ksegrp *child)
 {
 	struct ksegrp *kg = td->td_ksegrp;
 	PROC_LOCK_ASSERT(child->kg_proc, MA_OWNED);
 
 	child->kg_slptime = kg->kg_slptime;
 	child->kg_runtime = kg->kg_runtime;
 	child->kg_user_pri = kg->kg_user_pri;
 	sched_interact_fork(child);
 	kg->kg_runtime += tickincr << 10;
 	sched_interact_update(kg);
 
 	CTR6(KTR_ULE, "sched_fork_ksegrp: %d(%d, %d) - %d(%d, %d)",
 	    kg->kg_proc->p_pid, kg->kg_slptime, kg->kg_runtime, 
 	    child->kg_proc->p_pid, child->kg_slptime, child->kg_runtime);
 }
 
 void
 sched_fork_thread(struct thread *td, struct thread *child)
 {
 }
 
 void
 sched_class(struct ksegrp *kg, int class)
 {
 	struct kseq *kseq;
 	struct kse *ke;
 	int nclass;
 	int oclass;
 
 	mtx_assert(&sched_lock, MA_OWNED);
 	if (kg->kg_pri_class == class)
 		return;
 
 	nclass = PRI_BASE(class);
 	oclass = PRI_BASE(kg->kg_pri_class);
 	FOREACH_KSE_IN_GROUP(kg, ke) {
 		if (ke->ke_state != KES_ONRUNQ &&
 		    ke->ke_state != KES_THREAD)
 			continue;
 		kseq = KSEQ_CPU(ke->ke_cpu);
 
 #ifdef SMP
 		/*
 		 * On SMP if we're on the RUNQ we must adjust the transferable
 		 * count because could be changing to or from an interrupt
 		 * class.
 		 */
 		if (ke->ke_state == KES_ONRUNQ) {
 			if (KSE_CAN_MIGRATE(ke, oclass)) {
 				kseq->ksq_transferable--;
 				kseq->ksq_group->ksg_transferable--;
 			}
 			if (KSE_CAN_MIGRATE(ke, nclass)) {
 				kseq->ksq_transferable++;
 				kseq->ksq_group->ksg_transferable++;
 			}
 		}
 #endif
 		if (oclass == PRI_TIMESHARE) {
 			kseq->ksq_load_timeshare--;
 			kseq_nice_rem(kseq, kg->kg_proc->p_nice);
 		}
 		if (nclass == PRI_TIMESHARE) {
 			kseq->ksq_load_timeshare++;
 			kseq_nice_add(kseq, kg->kg_proc->p_nice);
 		}
 	}
 
 	kg->kg_pri_class = class;
 }
 
 /*
  * Return some of the child's priority and interactivity to the parent.
  */
 void
 sched_exit(struct proc *p, struct thread *td)
 {
 	mtx_assert(&sched_lock, MA_OWNED);
 	sched_exit_kse(FIRST_KSE_IN_PROC(p), td);
 	sched_exit_ksegrp(FIRST_KSEGRP_IN_PROC(p), td);
 }
 
 void
 sched_exit_kse(struct kse *ke, struct thread *td)
 {
 	kseq_load_rem(KSEQ_CPU(td->td_kse->ke_cpu), td->td_kse);
 }
 
 void
 sched_exit_ksegrp(struct ksegrp *kg, struct thread *td)
 {
 	/* kg->kg_slptime += td->td_ksegrp->kg_slptime; */
 	kg->kg_runtime += td->td_ksegrp->kg_runtime;
 	sched_interact_update(kg);
 }
 
 void
 sched_exit_thread(struct thread *td, struct thread *child)
 {
 }
 
 void
 sched_clock(struct thread *td)
 {
 	struct kseq *kseq;
 	struct ksegrp *kg;
 	struct kse *ke;
 
 	mtx_assert(&sched_lock, MA_OWNED);
 	kseq = KSEQ_SELF();
 #ifdef SMP
 	if (ticks == bal_tick)
 		sched_balance();
 	if (ticks == gbal_tick)
 		sched_balance_groups();
 	/*
 	 * We could have been assigned a non real-time thread without an
 	 * IPI.
 	 */
 	if (kseq->ksq_assigned)
 		kseq_assign(kseq);	/* Potentially sets NEEDRESCHED */
 #endif
 	/*
 	 * sched_setup() apparently happens prior to stathz being set.  We
 	 * need to resolve the timers earlier in the boot so we can avoid
 	 * calculating this here.
 	 */
 	if (realstathz == 0) {
 		realstathz = stathz ? stathz : hz;
 		tickincr = hz / realstathz;
 		/*
 		 * XXX This does not work for values of stathz that are much
 		 * larger than hz.
 		 */
 		if (tickincr == 0)
 			tickincr = 1;
 	}
 
 	ke = td->td_kse;
 	kg = ke->ke_ksegrp;
 
 	/* Adjust ticks for pctcpu */
 	ke->ke_ticks++;
 	ke->ke_ltick = ticks;
 
 	/* Go up to one second beyond our max and then trim back down */
 	if (ke->ke_ftick + SCHED_CPU_TICKS + hz < ke->ke_ltick)
 		sched_pctcpu_update(ke);
 
 	if (td->td_flags & TDF_IDLETD)
 		return;
 
 	CTR4(KTR_ULE, "Tick kse %p (slice: %d, slptime: %d, runtime: %d)",
 	    ke, ke->ke_slice, kg->kg_slptime >> 10, kg->kg_runtime >> 10);
 	/*
 	 * We only do slicing code for TIMESHARE ksegrps.
 	 */
 	if (kg->kg_pri_class != PRI_TIMESHARE)
 		return;
 	/*
 	 * We used a tick charge it to the ksegrp so that we can compute our
 	 * interactivity.
 	 */
 	kg->kg_runtime += tickincr << 10;
 	sched_interact_update(kg);
 
 	/*
 	 * We used up one time slice.
 	 */
 	if (--ke->ke_slice > 0)
 		return;
 	/*
 	 * We're out of time, recompute priorities and requeue.
 	 */
 	kseq_load_rem(kseq, ke);
 	sched_priority(kg);
 	sched_slice(ke);
 	if (SCHED_CURR(kg, ke))
 		ke->ke_runq = kseq->ksq_curr;
 	else
 		ke->ke_runq = kseq->ksq_next;
 	kseq_load_add(kseq, ke);
 	td->td_flags |= TDF_NEEDRESCHED;
 }
 
 int
 sched_runnable(void)
 {
 	struct kseq *kseq;
 	int load;
 
 	load = 1;
 
 	kseq = KSEQ_SELF();
 #ifdef SMP
 	if (kseq->ksq_assigned) {
 		mtx_lock_spin(&sched_lock);
 		kseq_assign(kseq);
 		mtx_unlock_spin(&sched_lock);
 	}
 #endif
 	if ((curthread->td_flags & TDF_IDLETD) != 0) {
 		if (kseq->ksq_load > 0)
 			goto out;
 	} else
 		if (kseq->ksq_load - 1 > 0)
 			goto out;
 	load = 0;
 out:
 	return (load);
 }
 
 void
 sched_userret(struct thread *td)
 {
 	struct ksegrp *kg;
 
 	kg = td->td_ksegrp;
 	
 	if (td->td_priority != kg->kg_user_pri) {
 		mtx_lock_spin(&sched_lock);
 		td->td_priority = kg->kg_user_pri;
 		mtx_unlock_spin(&sched_lock);
 	}
 }
 
 struct kse *
 sched_choose(void)
 {
 	struct kseq *kseq;
 	struct kse *ke;
 
 	mtx_assert(&sched_lock, MA_OWNED);
 	kseq = KSEQ_SELF();
 #ifdef SMP
 restart:
 	if (kseq->ksq_assigned)
 		kseq_assign(kseq);
 #endif
 	ke = kseq_choose(kseq);
 	if (ke) {
 #ifdef SMP
 		if (ke->ke_ksegrp->kg_pri_class == PRI_IDLE)
 			if (kseq_idled(kseq) == 0)
 				goto restart;
 #endif
 		kseq_runq_rem(kseq, ke);
 		ke->ke_state = KES_THREAD;
 
 		if (ke->ke_ksegrp->kg_pri_class == PRI_TIMESHARE) {
 			CTR4(KTR_ULE, "Run kse %p from %p (slice: %d, pri: %d)",
 			    ke, ke->ke_runq, ke->ke_slice,
 			    ke->ke_thread->td_priority);
 		}
 		return (ke);
 	}
 #ifdef SMP
 	if (kseq_idled(kseq) == 0)
 		goto restart;
 #endif
 	return (NULL);
 }
 
 void
 sched_add(struct thread *td, int flags)
 {
 
 	/* let jeff work out how to map the flags better */
 	/* I'm open to suggestions */
 	if (flags & SRQ_YIELDING)
 		/*
 		 * Preempting during switching can be bad JUJU
 		 * especially for KSE processes
 		 */
 		sched_add_internal(td, 0);
 	else
 		sched_add_internal(td, 1);
 }
 
 static void
 sched_add_internal(struct thread *td, int preemptive)
 {
 	struct kseq *kseq;
 	struct ksegrp *kg;
 	struct kse *ke;
 #ifdef SMP
 	int canmigrate;
 #endif
 	int class;
 
 	mtx_assert(&sched_lock, MA_OWNED);
 	ke = td->td_kse;
 	kg = td->td_ksegrp;
 	if (ke->ke_flags & KEF_ASSIGNED)
 		return;
 	kseq = KSEQ_SELF();
 	KASSERT((ke->ke_thread != NULL),
 	    ("sched_add: No thread on KSE"));
 	KASSERT((ke->ke_thread->td_kse != NULL),
 	    ("sched_add: No KSE on thread"));
 	KASSERT(ke->ke_state != KES_ONRUNQ,
 	    ("sched_add: kse %p (%s) already in run queue", ke,
 	    ke->ke_proc->p_comm));
 	KASSERT(ke->ke_proc->p_sflag & PS_INMEM,
 	    ("sched_add: process swapped out"));
 	KASSERT(ke->ke_runq == NULL,
 	    ("sched_add: KSE %p is still assigned to a run queue", ke));
 
 	class = PRI_BASE(kg->kg_pri_class);
 	switch (class) {
 	case PRI_ITHD:
 	case PRI_REALTIME:
 		ke->ke_runq = kseq->ksq_curr;
 		ke->ke_slice = SCHED_SLICE_MAX;
 		ke->ke_cpu = PCPU_GET(cpuid);
 		break;
 	case PRI_TIMESHARE:
 		if (SCHED_CURR(kg, ke))
 			ke->ke_runq = kseq->ksq_curr;
 		else
 			ke->ke_runq = kseq->ksq_next;
 		break;
 	case PRI_IDLE:
 		/*
 		 * This is for priority prop.
 		 */
 		if (ke->ke_thread->td_priority < PRI_MIN_IDLE)
 			ke->ke_runq = kseq->ksq_curr;
 		else
 			ke->ke_runq = &kseq->ksq_idle;
 		ke->ke_slice = SCHED_SLICE_MIN;
 		break;
 	default:
 		panic("Unknown pri class.");
 		break;
 	}
 #ifdef SMP
 	/*
 	 * Don't migrate running threads here.  Force the long term balancer
 	 * to do it.
 	 */
 	canmigrate = KSE_CAN_MIGRATE(ke, class);
 	if (ke->ke_flags & KEF_HOLD) {
 		ke->ke_flags &= ~KEF_HOLD;
 		canmigrate = 0;
 	}
 	/*
 	 * If this thread is pinned or bound, notify the target cpu.
 	 */
 	if (!canmigrate && ke->ke_cpu != PCPU_GET(cpuid) ) {
 		ke->ke_runq = NULL;
 		kseq_notify(ke, ke->ke_cpu);
 		return;
 	}
 	/*
 	 * If we had been idle, clear our bit in the group and potentially
 	 * the global bitmap.  If not, see if we should transfer this thread.
 	 */
 	if ((class == PRI_TIMESHARE || class == PRI_REALTIME) &&
 	    (kseq->ksq_group->ksg_idlemask & PCPU_GET(cpumask)) != 0) {
 		/*
 		 * Check to see if our group is unidling, and if so, remove it
 		 * from the global idle mask.
 		 */
 		if (kseq->ksq_group->ksg_idlemask ==
 		    kseq->ksq_group->ksg_cpumask)
 			atomic_clear_int(&kseq_idle, kseq->ksq_group->ksg_mask);
 		/*
 		 * Now remove ourselves from the group specific idle mask.
 		 */
 		kseq->ksq_group->ksg_idlemask &= ~PCPU_GET(cpumask);
 	} else if (kseq->ksq_load > 1 && canmigrate)
 		if (kseq_transfer(kseq, ke, class))
 			return;
 	ke->ke_cpu = PCPU_GET(cpuid);
 #endif
 	/*
 	 * XXX With preemption this is not necessary.
 	 */
 	if (td->td_priority < curthread->td_priority &&
 	    ke->ke_runq == kseq->ksq_curr)
 		curthread->td_flags |= TDF_NEEDRESCHED;
 	if (preemptive && maybe_preempt(td))
 		return;
 	ke->ke_ksegrp->kg_runq_kses++;
 	ke->ke_state = KES_ONRUNQ;
 
 	kseq_runq_add(kseq, ke);
 	kseq_load_add(kseq, ke);
 }
 
 void
 sched_rem(struct thread *td)
 {
 	struct kseq *kseq;
 	struct kse *ke;
 
 	ke = td->td_kse;
 	/*
 	 * It is safe to just return here because sched_rem() is only ever
 	 * used in places where we're immediately going to add the
 	 * kse back on again.  In that case it'll be added with the correct
 	 * thread and priority when the caller drops the sched_lock.
 	 */
 	if (ke->ke_flags & KEF_ASSIGNED)
 		return;
 	mtx_assert(&sched_lock, MA_OWNED);
 	KASSERT((ke->ke_state == KES_ONRUNQ),
 	    ("sched_rem: KSE not on run queue"));
 
 	ke->ke_state = KES_THREAD;
 	ke->ke_ksegrp->kg_runq_kses--;
 	kseq = KSEQ_CPU(ke->ke_cpu);
 	kseq_runq_rem(kseq, ke);
 	kseq_load_rem(kseq, ke);
 }
 
 fixpt_t
 sched_pctcpu(struct thread *td)
 {
 	fixpt_t pctcpu;
 	struct kse *ke;
 
 	pctcpu = 0;
 	ke = td->td_kse;
 	if (ke == NULL)
 		return (0);
 
 	mtx_lock_spin(&sched_lock);
 	if (ke->ke_ticks) {
 		int rtick;
 
 		/*
 		 * Don't update more frequently than twice a second.  Allowing
 		 * this causes the cpu usage to decay away too quickly due to
 		 * rounding errors.
 		 */
 		if (ke->ke_ftick + SCHED_CPU_TICKS < ke->ke_ltick ||
 		    ke->ke_ltick < (ticks - (hz / 2)))
 			sched_pctcpu_update(ke);
 		/* How many rtick per second ? */
 		rtick = min(ke->ke_ticks / SCHED_CPU_TIME, SCHED_CPU_TICKS);
 		pctcpu = (FSCALE * ((FSCALE * rtick)/realstathz)) >> FSHIFT;
 	}
 
 	ke->ke_proc->p_swtime = ke->ke_ltick - ke->ke_ftick;
 	mtx_unlock_spin(&sched_lock);
 
 	return (pctcpu);
 }
 
 void
 sched_bind(struct thread *td, int cpu)
 {
 	struct kse *ke;
 
 	mtx_assert(&sched_lock, MA_OWNED);
 	ke = td->td_kse;
 	ke->ke_flags |= KEF_BOUND;
 #ifdef SMP
 	if (PCPU_GET(cpuid) == cpu)
 		return;
 	/* sched_rem without the runq_remove */
 	ke->ke_state = KES_THREAD;
 	ke->ke_ksegrp->kg_runq_kses--;
 	kseq_load_rem(KSEQ_CPU(ke->ke_cpu), ke);
 	kseq_notify(ke, cpu);
 	/* When we return from mi_switch we'll be on the correct cpu. */
 	mi_switch(SW_VOL, NULL);
 #endif
 }
 
 void
 sched_unbind(struct thread *td)
 {
 	mtx_assert(&sched_lock, MA_OWNED);
 	td->td_kse->ke_flags &= ~KEF_BOUND;
 }
 
 int
 sched_load(void)
 {
 #ifdef SMP
 	int total;
 	int i;
 
 	total = 0;
 	for (i = 0; i <= ksg_maxid; i++)
 		total += KSEQ_GROUP(i)->ksg_load;
 	return (total);
 #else
 	return (KSEQ_SELF()->ksq_sysload);
 #endif
 }
 
 int
 sched_sizeof_kse(void)
 {
 	return (sizeof(struct kse) + sizeof(struct ke_sched));
 }
 
 int
 sched_sizeof_ksegrp(void)
 {
 	return (sizeof(struct ksegrp) + sizeof(struct kg_sched));
 }
 
 int
 sched_sizeof_proc(void)
 {
 	return (sizeof(struct proc));
 }
 
 int
 sched_sizeof_thread(void)
 {
 	return (sizeof(struct thread) + sizeof(struct td_sched));
 }
diff --git a/sys/vm/vm_zeroidle.c b/sys/vm/vm_zeroidle.c
index 6e50b6b581b1..9d779de097f2 100644
--- a/sys/vm/vm_zeroidle.c
+++ b/sys/vm/vm_zeroidle.c
@@ -1,193 +1,195 @@
 /*-
  * Copyright (c) 1994 John Dyson
  * Copyright (c) 2001 Matt Dillon
  *
  * All Rights Reserved.
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 4. Neither the name of the University nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS
  * OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
  * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY
  * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE
  * GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
  * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
  * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
  * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  *
  *	from: @(#)vm_machdep.c	7.3 (Berkeley) 5/13/91
  *	Utah $Hdr: vm_machdep.c 1.16.1.1 89/06/23$
  * from: FreeBSD: .../i386/vm_machdep.c,v 1.165 2001/07/04 23:27:04 dillon
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
+#include <opt_sched.h>
+
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/kernel.h>
 #include <sys/proc.h>
 #include <sys/resourcevar.h>
 #include <sys/vmmeter.h>
 #include <sys/lock.h>
 #include <sys/mutex.h>
 #include <sys/sched.h>
 #include <sys/sysctl.h>
 #include <sys/kthread.h>
 #include <sys/unistd.h>
 
 #include <vm/vm.h>
 #include <vm/vm_page.h>
 
 SYSCTL_DECL(_vm_stats_misc);
 
 static int cnt_prezero;
 SYSCTL_INT(_vm_stats_misc, OID_AUTO, cnt_prezero, CTLFLAG_RD,
     &cnt_prezero, 0, "");
 
 static int idlezero_enable_default = 1;
 TUNABLE_INT("vm.idlezero_enable", &idlezero_enable_default);
 /* Defer setting the enable flag until the kthread is running. */
 static int idlezero_enable = 0;
 SYSCTL_INT(_vm, OID_AUTO, idlezero_enable, CTLFLAG_RW, &idlezero_enable, 0, "");
 
 static int idlezero_maxrun = 16;
 SYSCTL_INT(_vm, OID_AUTO, idlezero_maxrun, CTLFLAG_RW, &idlezero_maxrun, 0, "");
 TUNABLE_INT("vm.idlezero_maxrun", &idlezero_maxrun);
 
 /*
  * Implement the pre-zeroed page mechanism.
  */
 
 #define ZIDLE_LO(v)	((v) * 2 / 3)
 #define ZIDLE_HI(v)	((v) * 4 / 5)
 
 static int zero_state;
 
 static int
 vm_page_zero_check(void)
 {
 
 	if (!idlezero_enable)
 		return (0);
 	/*
 	 * Attempt to maintain approximately 1/2 of our free pages in a
 	 * PG_ZERO'd state.   Add some hysteresis to (attempt to) avoid
 	 * generally zeroing a page when the system is near steady-state.
 	 * Otherwise we might get 'flutter' during disk I/O / IPC or 
 	 * fast sleeps.  We also do not want to be continuously zeroing
 	 * pages because doing so may flush our L1 and L2 caches too much.
 	 */
 	if (zero_state && vm_page_zero_count >= ZIDLE_LO(cnt.v_free_count))
 		return (0);
 	if (vm_page_zero_count >= ZIDLE_HI(cnt.v_free_count))
 		return (0);
 	return (1);
 }
 
 static int
 vm_page_zero_idle(void)
 {
 	static int free_rover;
 	vm_page_t m;
 
 	mtx_lock_spin(&vm_page_queue_free_mtx);
 	zero_state = 0;
 	m = vm_pageq_find(PQ_FREE, free_rover, FALSE);
 	if (m != NULL && (m->flags & PG_ZERO) == 0) {
 		vm_pageq_remove_nowakeup(m);
 		mtx_unlock_spin(&vm_page_queue_free_mtx);
 		pmap_zero_page_idle(m);
 		mtx_lock_spin(&vm_page_queue_free_mtx);
 		m->flags |= PG_ZERO;
 		vm_pageq_enqueue(PQ_FREE + m->pc, m);
 		++vm_page_zero_count;
 		++cnt_prezero;
 		if (vm_page_zero_count >= ZIDLE_HI(cnt.v_free_count))
 			zero_state = 1;
 	}
 	free_rover = (free_rover + PQ_PRIME2) & PQ_L2_MASK;
 	mtx_unlock_spin(&vm_page_queue_free_mtx);
 	return (1);
 }
 
 /* Called by vm_page_free to hint that a new page is available. */
 void
 vm_page_zero_idle_wakeup(void)
 {
 
 	if (idlezero_enable && vm_page_zero_check())
 		wakeup(&zero_state);
 }
 
 static void
 vm_pagezero(void __unused *arg)
 {
 	struct proc *p;
 	struct rtprio rtp;
 	struct thread *td;
 	int pages, pri;
 
 	td = curthread;
 	p = td->td_proc;
 	rtp.prio = RTP_PRIO_MAX;
 	rtp.type = RTP_PRIO_IDLE;
 	pages = 0;
 	mtx_lock_spin(&sched_lock);
 	rtp_to_pri(&rtp, td->td_ksegrp);
 	pri = td->td_priority;
 	mtx_unlock_spin(&sched_lock);
 	idlezero_enable = idlezero_enable_default;
 
 	for (;;) {
 		if (vm_page_zero_check()) {
 			pages += vm_page_zero_idle();
 #ifndef PREEMPTION
 			if (pages > idlezero_maxrun || sched_runnable()) {
 				mtx_lock_spin(&sched_lock);
 				mi_switch(SW_VOL, NULL);
 				mtx_unlock_spin(&sched_lock);
 				pages = 0;
 			}
 #endif
 		} else {
 			tsleep(&zero_state, pri, "pgzero", hz * 300);
 			pages = 0;
 		}
 	}
 }
 
 static struct proc *pagezero_proc;
 
 static void
 pagezero_start(void __unused *arg)
 {
 	int error;
 
 	error = kthread_create(vm_pagezero, NULL, &pagezero_proc, RFSTOPPED, 0,
 	    "pagezero");
 	if (error)
 		panic("pagezero_start: error %d\n", error);
 	/*
 	 * We're an idle task, don't count us in the load.
 	 */
 	PROC_LOCK(pagezero_proc);
 	pagezero_proc->p_flag |= P_NOLOAD;
 	PROC_UNLOCK(pagezero_proc);
 	mtx_lock_spin(&sched_lock);
 	setrunqueue(FIRST_THREAD_IN_PROC(pagezero_proc), SRQ_BORING);
 	mtx_unlock_spin(&sched_lock);
 }
 SYSINIT(pagezero, SI_SUB_KTHREAD_VM, SI_ORDER_ANY, pagezero_start, NULL)